[
  {
    "path": ".agent/rules/ANTIGRAVITY_INSTRUCTIONS.md",
    "content": "# Antigravity Agent Instructions for Dragonfly\n\n**READ [AGENTS.md](../../AGENTS.md)**\n\nAll project information, workflows, patterns, and guidelines are in `AGENTS.md`.\n"
  },
  {
    "path": ".circleci/config.yml",
    "content": "version: 2.1\n\nmachine: true\n\njobs:\n  build-ubuntu:\n      docker: \n        - image: ghcr.io/romange/ubuntu-dev:22\n      steps:\n        - checkout\n        - run:\n            name: Set up environment\n            environment:\n              BUILD_TYPE: Debug            \n            command: | \n              git submodule update --init --recursive\n              cmake -B build -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -GNinja -DCMAKE_CXX_COMPILER_LAUNCHER=ccache              \n        - run:\n            name: Build & Test\n            command: | \n              cd build && pwd \n              ninja -j4 src/all\n              ctest -V -L DFLY\n          \n\n        \n# Orchestrate our job run sequence\nworkflows:\n  build_and_test:\n    jobs:\n      - build-ubuntu\n"
  },
  {
    "path": ".clang-format",
    "content": "# ---\n# We'll use defaults from the Google style, but with 2 columns indentation.\nBasedOnStyle: Google\nIndentWidth: 2\nColumnLimit: 100\n---\nLanguage: Cpp\nAllowShortLoopsOnASingleLine: false\nAllowShortFunctionsOnASingleLine: false\nAllowShortIfStatementsOnASingleLine: false\nAlwaysBreakTemplateDeclarations: false\nPackConstructorInitializers: NextLine\nDerivePointerAlignment: false\nPointerAlignment: Left\nBasedOnStyle: Google\nColumnLimit: 100\n---\nLanguage: Proto\nBasedOnStyle: Google\n"
  },
  {
    "path": ".clang-tidy",
    "content": "---\n\nChecks: >\n  -abseil-no-namespace,\n  bugprone*,\n  # Sadly narrowing conversions is too noisy\n  -bugprone-narrowing-conversions,\n  -bugprone-easily-swappable-parameters,\n  -bugprone-branch-clone,\n  -bugprone-implicit-widening-of-multiplication-result,\n  -bugprone-too-small-loop-variable,\n  -bugprone-reserved-identifier,\n  boost-use-to-string,\n  performance*,\n  -cert-err58-cpp,\n  -cert-dcl58-cpp,  # Ignore std changes\n  -cert-dcl51-cpp,  # bugprone-reserved-identifier\n  # Doesn't work with abseil flags\n  clang-analyzer*,\n  google-*,\n  -google-runtime-int,\n  -google-readability-*,\n  -google-build-using-namespace,\n  misc-definitions-in-headers,\n  misc-misleading*,\n  misc-misplaced-const,\n  misc-new-delete-overloads,\n  misc-non-copyable-objects,\n  misc-redundant-expression,\n  misc-static-assert,\n  misc-throw-by-value-catch-by-reference,\n  misc-unconventional-assign-operator,\n  misc-uniqueptr-reset-release,\n  misc-unused-alias-decls,\n  misc-unused-using-decls,\n  modernize-deprecated-headers,\n  modernize-macro-to-enum,\n  modernize-make-shared,\n  modernize-make-unique,\n  modernize-pass-by-value,\n  modernize-raw-string-literal,\n  modernize-redundant-void-arg,\n  modernize-replace-disallow-copy-and-assign-macro,\n  modernize-return-braced-init-list,\n  modernize-shrink-to-fit,\n  modernize-unary-static-assert,\n  modernize-use-emplace,\n  modernize-use-equals-delete,\n  modernize-use-noexcept,\n  modernize-use-transparent-functors,\n  modernize-use-uncaught-exceptions,\n  modernize-use-using,\n  readability-avoid-const-params-in-decls,\n  readability-const-return-type,\n  readability-container-contains,\n  readability-container-size-empty,\n  readability-delete-null-pointer,\n  readability-duplicate-include,\n  readability-function-size,\n  readability-identifier-naming,\n  -readability-inconsistent-declaration-parameter-name,\n  readability-make-member-function-const,\n  readability-misplaced-array-index,\n  readability-named-parameter,\n  readability-non-const-parameter,\n  readability-redundant-access-specifiers,\n  readability-redundant-control-flow,\n  readability-redundant-declaration,\n  readability-redundant-function-ptr-dereference,\n  readability-redundant-member-init,\n  readability-redundant-preprocessor,\n  readability-redundant-smartptr-get,\n  readability-redundant-string-cstr,\n  readability-redundant-string-init,\n  readability-simplify-subscript-expr,\n  readability-static-definition-in-anonymous-namespace,\n  readability-string-compare,\n  readability-suspicious-call-argument,\n  readability-uniqueptr-delete-release,\n  readability-use-anyofallof\n\n\n# Disabled because they're currently too disruptive, but one day might be nice to have:\n# modernize-use-nullptr,\n# modernize-use-equals-default,\n# readability-qualified-auto,\n\nCheckOptions:\n  - key: bugprone-narrowing-conversions.WarnOnIntegerNarrowingConversion\n    value: false\n  - key: bugprone-narrowing-conversions.WarnOnEquivalentBitWidth\n    value: false\n"
  },
  {
    "path": ".clangd",
    "content": "Diagnostics:\n  UnusedIncludes: None\n  MissingIncludes: None\n  Includes:\n    IgnoreHeader: base/*.h\n\nCompileFlags:\n  CompilationDatabase: build-dbg/       # Search for compile_commands.json\n"
  },
  {
    "path": ".claude/hooks/format-after-edit.sh",
    "content": "#!/bin/bash\n# Hook to automatically format files after Edit/Write operations\n# Filters out src/redis directory from formatting\n\n# Read JSON input from stdin\nINPUT=$(cat)\nFILE_PATH=$(echo \"$INPUT\" | jq -r '.tool_input.file_path // empty')\n\n# Skip if no file path\nif [ -z \"$FILE_PATH\" ]; then\n  exit 0\nfi\n\n# Skip if file is in src/redis directory\nif [[ \"$FILE_PATH\" == */src/redis/* ]]; then\n  echo \"Skipping formatting for src/redis file: $FILE_PATH\" >&2\n  exit 0\nfi\n\n# Skip if file doesn't exist\nif [ ! -f \"$FILE_PATH\" ]; then\n  exit 0\nfi\n\n# Run pre-commit on the file\npre-commit run --files \"$FILE_PATH\" 2>&1\n\n# Always exit 0 to not block the operation even if formatting fails\nexit 0\n"
  },
  {
    "path": ".claude/settings.json",
    "content": "{\n  \"permissions\": {\n    \"allow\": [\n      \"Read($CLAUDE_PROJECT_DIR/**)\",\n      \"Edit($CLAUDE_PROJECT_DIR/**)\",\n      \"Write($CLAUDE_PROJECT_DIR/**)\",\n      \"Bash(./*_test:*)\",\n      \"Bash(ninja:*)\",\n      \"Bash(git add:*)\",\n      \"Bash(git reset:*)\",\n      \"Bash(gh issue view:*)\",\n      \"Bash(git log:*)\",\n      \"Bash(git show:*)\",\n      \"WebSearch\",\n      \"Bash(grep:*)\",\n      \"Bash(pre-commit run:*)\",\n      \"Bash(clang-format:*)\",\n      \"Bash(git checkout:*)\",\n      \"Bash(tee:*)\",\n      \"Bash(sort:*)\",\n      \"Bash(git patch-id:*)\"\n    ]\n  },\n  \"hooks\": {\n    \"PostToolUse\": [\n      {\n        \"matcher\": \"Edit|Write\",\n        \"hooks\": [\n          {\n            \"type\": \"command\",\n            \"command\": \"\\\"$CLAUDE_PROJECT_DIR\\\"/.claude/hooks/format-after-edit.sh\",\n            \"timeout\": 30,\n            \"statusMessage\": \"Formatting code...\"\n          }\n        ]\n      }\n    ]\n  }\n}\n"
  },
  {
    "path": ".claude/skills/reproduce-fuzz-crash/SKILL.md",
    "content": "---\nname: reproduce-fuzz-crash\ndescription: >\n  Reproduce AFL++ fuzz crashes from GitHub Actions. Use when user provides a\n  GitHub Actions fuzz run URL and wants to reproduce and analyze the crash locally.\nargument-hint: <github-actions-run-url>\nallowed-tools: Bash, Read, Grep, Glob, Write\n---\n\n# Reproduce Fuzz Crash\n\nGiven a GitHub Actions fuzz run URL, download crash artifacts, triage them\nwith `fuzz/triage_crashes.sh`, and produce a crash analysis report.\n\n**Input**: `$ARGUMENTS` — a GitHub Actions run URL like:\n`https://github.com/dragonflydb/dragonfly/actions/runs/22906484769`\nor with query params like `?pr=6855`.\n\n## Workflow\n\n### Step 1: Parse the URL\n\nExtract `owner/repo` and `run_id` from the URL.\n\n```\nhttps://github.com/{owner}/{repo}/actions/runs/{run_id}[?...]\n```\n\nStrip any query parameters from `run_id`.\n\n### Step 2: Download artifacts\n\nList crash artifacts via the GitHub API, then download each as a `.zip` directly:\n\n**IMPORTANT**: Run each command as a separate Bash tool call (no `&&` chaining)\nto ensure auto-approval works with the user's permission patterns.\n\n```bash\n# List artifacts — filter for names containing \"crash\"\ngh api repos/{owner}/{repo}/actions/runs/{run_id}/artifacts\n\n# Create output directory\nmkdir -p /tmp/fuzz-repro-{run_id}\n\n# Download each crash artifact by ID (separate command)\ngh api repos/{owner}/{repo}/actions/artifacts/{artifact_id}/zip > /tmp/fuzz-repro-{run_id}/<artifact-name>.zip\n```\n\nThis gives real `.zip` files that the triage script can consume directly.\n\nIf no crash artifacts are found, report that the run has no crash artifacts and stop.\n\nNote: there may be duplicate artifact names (same name, different IDs) from\nretried jobs. Download the **most recent** one (highest artifact ID).\n\n### Step 3: Determine mode\n\nInfer the protocol mode from the artifact name:\n- Contains \"memcache\" → `memcache`\n- Otherwise → `resp`\n\n### Step 4: Check Dragonfly binary\n\nCheck if the debug binary already exists and runs:\n\n```bash\n./build-dbg/dragonfly --version\n```\n\nOnly build if the binary doesn't exist or fails to run:\n\n```bash\nninja -C build-dbg dragonfly\n```\n\nIf `build-dbg` doesn't exist, run `./helio/blaze.sh` first.\n\n### Step 5: Run triage_crashes.sh\n\nFor each zip file, run:\n\n```bash\n./fuzz/triage_crashes.sh ./build-dbg/dragonfly <mode> /tmp/fuzz-repro-{run_id}/<artifact-name>.zip\n```\n\nCapture the full output.\n\n### Step 6: Analyze and report\n\nParse the triage output for confirmed crashes. For each confirmed crash:\n\n1. **Read the source** at the crash location — use the stack trace to identify\n   the source file and line number, then read that code.\n2. **Provide analysis**: likely root cause, what to investigate.\n\nPrint a structured report:\n\n```\n## Fuzz Crash Report\n\n**Run**: {url}\n**Artifacts**: {number} crash(es) found\n\n---\n\n### Crash NNNNNN\n\n**Reproduced**: Yes / No (false positive)\n**Signal**: SIGABRT (6) / SIGSEGV (11) / etc.\n\n**Stack trace**:\n\\```\n<stack trace from triage output>\n\\```\n\n**Analysis**:\n<1-3 sentences explaining the likely root cause based on the stack trace,\nthe assertion message, and the crash input. Identify the source file and\nline number. Suggest what to investigate.>\n```\n\n## Important Notes\n\n- The triage script uses port **6379** (resp) or **11211** (memcache).\n  Ensure no other Dragonfly or Redis instance is using these ports.\n- The script adds `--rename_command` flags to avoid false positives from\n  commands like DEBUG SLEEP that the fuzzer might generate.\n- Some crashes are non-deterministic (thread timing). The script reports\n  these as \"FALSE POSITIVE\" — note this clearly, it doesn't mean the bug\n  is invalid, just that it didn't reproduce on this run.\n- The script handles its own cleanup of Dragonfly processes.\n- Do NOT delete `/tmp/fuzz-repro-{run_id}/` — the user may want to inspect it.\n- If `gh run download` fails with permissions, suggest the user authenticate\n  with `gh auth login`.\n"
  },
  {
    "path": ".ct.yaml",
    "content": "# See https://github.com/helm/chart-testing#configuration\nremote: origin\ntarget-branch: main\nchart-dirs:\n  - contrib/charts\nhelm-extra-args: --debug --timeout 60s\ncheck-version-increment: false\nvalidate-maintainers: false\n"
  },
  {
    "path": ".cursorrules",
    "content": "# Cursor AI Rules for Dragonfly\n\n**READ `AGENTS.md`**\n\nAll project information, workflows, patterns, and guidelines are in `AGENTS.md`.\n"
  },
  {
    "path": ".devcontainer/alpine/devcontainer.json",
    "content": "{\n  \"name\": \"alpine-dev\",\n  \"image\": \"ghcr.io/romange/alpine-dev\",\n  \"customizations\": {\n    \"vscode\": {\n      \"extensions\": [\n        \"ms-vscode.cpptools\",\n        \"ms-vscode.cmake-tools\",\n        \"ms-vscode.cpptools-themes\",\n        \"twxs.cmake\"\n      ],\n      \"settings\": {\n        \"cmake.buildDirectory\": \"/build\",\n        \"extensions.ignoreRecommendations\": true,\n        \"cmake.configureArgs\": []\n      }\n    }\n  },\n  \"mounts\": [\n    \"source=alpine-vol,target=/build,type=volume\"\n  ],\n  \"postCreateCommand\": \".devcontainer/alpine/post-create.sh ${containerWorkspaceFolder}\"\n}\n"
  },
  {
    "path": ".devcontainer/alpine/post-create.sh",
    "content": "#!/bin/bash\n\ncontainerWorkspaceFolder=$1\ngit config --global --add safe.directory ${containerWorkspaceFolder}\ngit config --global --add safe.directory ${containerWorkspaceFolder}/helio\nmkdir -p /root/.local/share/CMakeTools\n"
  },
  {
    "path": ".devcontainer/fedora/devcontainer.json",
    "content": "{\n  \"name\": \"fedora30\",\n  \"image\": \"ghcr.io/romange/fedora:30\",\n  \"customizations\": {\n    \"vscode\": {\n      \"extensions\": [\n        \"ms-vscode.cpptools\",\n        \"ms-vscode.cmake-tools\",\n        \"ms-vscode.cpptools-themes\",\n        \"twxs.cmake\"\n      ],\n      \"settings\": {\n        \"cmake.buildDirectory\": \"/build\",\n        \"extensions.ignoreRecommendations\": true\n      }\n    }\n  },\n  \"mounts\": [\n    \"source=fedora-vol,target=/build,type=volume\"\n  ]\n}\n"
  },
  {
    "path": ".devcontainer/fedora41/devcontainer.json",
    "content": "{\n  \"name\": \"fedora41\",\n  \"image\": \"ghcr.io/romange/fedora:41\",\n  \"customizations\": {\n    \"vscode\": {\n      \"extensions\": [\n        \"ms-vscode.cpptools\",\n        \"ms-vscode.cmake-tools\",\n        \"ms-vscode.cpptools-themes\",\n        \"twxs.cmake\"\n      ],\n      \"settings\": {\n        \"cmake.buildDirectory\": \"/build\",\n        \"extensions.ignoreRecommendations\": true\n      }\n    }\n  },\n  \"mounts\": [\n    \"source=fedora41-vol,target=/build,type=volume\"\n  ]\n}\n"
  },
  {
    "path": ".devcontainer/ubuntu20/cmake-tools-kits.json",
    "content": "[\n  {\n    \"name\": \"GCC x86_64-linux-gnu\",\n    \"compilers\": {\n      \"C\": \"gcc\",\n      \"CXX\": \"g++\"\n    },\n    \"isTrusted\": true\n  }\n]\n"
  },
  {
    "path": ".devcontainer/ubuntu20/devcontainer.json",
    "content": "{\n  \"name\": \"ubuntu20\",\n  \"image\": \"ghcr.io/romange/ubuntu-dev:20\",\n  \"customizations\": {\n    \"vscode\": {\n      \"extensions\": [\n        \"ms-vscode.cpptools\",\n        \"ms-vscode.cmake-tools\",\n        \"ms-vscode.cpptools-themes\",\n        \"twxs.cmake\"\n      ],\n      \"settings\": {\n        \"cmake.buildDirectory\": \"/build\",\n        \"extensions.ignoreRecommendations\": true\n      }\n    }\n  },\n  \"mounts\": [\n    \"source=ubuntu20-vol,target=/build,type=volume\"\n  ],\n  \"postCreateCommand\": \".devcontainer/ubuntu20/post-create.sh ${containerWorkspaceFolder}\"\n}\n"
  },
  {
    "path": ".devcontainer/ubuntu20/post-create.sh",
    "content": "#!/bin/bash\n\ncontainerWorkspaceFolder=$1\ngit config --global --add safe.directory '*'\nmkdir -p /root/.local/share/CMakeTools\ncp ${containerWorkspaceFolder}/.devcontainer/ubuntu20/cmake-tools-kits.json /root/.local/share/CMakeTools/\n"
  },
  {
    "path": ".devcontainer/ubuntu20-gcc14/devcontainer.json",
    "content": "{\n  \"name\": \"ubuntu20-gcc14\",\n  \"image\": \"ghcr.io/romange/ubuntu-dev:20-gcc14\",\n  \"customizations\": {\n    \"vscode\": {\n      \"extensions\": [\n        \"ms-vscode.cpptools\",\n        \"ms-vscode.cmake-tools\",\n        \"ms-vscode.cpptools-themes\",\n        \"twxs.cmake\",\n        \"mk12.better-git-line-blame\"\n      ],\n      \"settings\": {\n        \"cmake.buildDirectory\": \"/build\",\n        \"cmake.configureArgs\": [\n          \"-DWITH_AWS=OFF\",\n          \"-DWITH_GCP=OFF\",\n          \"-DWITH_GPERF=OFF\"\n        ],\n        \"extensions.ignoreRecommendations\": true\n      }\n    }\n  },\n  \"mounts\": [\n    \"source=ubuntu20-gcc14-vol,target=/build,type=volume\"\n  ],\n  \"postCreateCommand\": \".devcontainer/ubuntu20/post-create.sh ${containerWorkspaceFolder}\"\n}\n"
  },
  {
    "path": ".devcontainer/ubuntu22/devcontainer.json",
    "content": "{\n  \"name\": \"ubuntu22\",\n  \"image\": \"ghcr.io/romange/ubuntu-dev:22\",\n  \"customizations\": {\n    \"vscode\": {\n      \"extensions\": [\n        \"ms-vscode.cpptools\",\n        \"ms-vscode.cmake-tools\",\n        \"ms-vscode.cpptools-themes\",\n        \"twxs.cmake\"\n      ],\n      \"settings\": {\n        \"cmake.buildDirectory\": \"/build\",\n        \"extensions.ignoreRecommendations\": true\n      }\n    }\n  },\n  \"mounts\": [\n    \"source=ubuntu22-vol,target=/build,type=volume\"\n  ],\n  \"postCreateCommand\": \".devcontainer/ubuntu22/post-create.sh ${containerWorkspaceFolder}\"\n}\n"
  },
  {
    "path": ".devcontainer/ubuntu22/post-create.sh",
    "content": "#!/bin/bash\n\ncontainerWorkspaceFolder=$1\ngit config --global --add safe.directory ${containerWorkspaceFolder}\ngit config --global --add safe.directory ${containerWorkspaceFolder}/helio\nmkdir -p /root/.local/share/CMakeTools\n"
  },
  {
    "path": ".devcontainer/ubuntu24/devcontainer.json",
    "content": "{\n  \"name\": \"ubuntu24\",\n  \"image\": \"ghcr.io/romange/ubuntu-dev:24\",\n  \"customizations\": {\n    \"vscode\": {\n      \"extensions\": [\n        \"ms-vscode.cpptools\",\n        \"ms-vscode.cmake-tools\",\n        \"ms-vscode.cpptools-themes\",\n        \"twxs.cmake\"\n      ],\n      \"settings\": {\n        \"cmake.buildDirectory\": \"/build\",\n        \"extensions.ignoreRecommendations\": true\n      }\n    }\n  },\n  \"mounts\": [\n    \"source=ubuntu24-vol,target=/build,type=volume\"\n  ],\n  \"postCreateCommand\": \".devcontainer/ubuntu24/post-create.sh ${containerWorkspaceFolder}\"\n}\n"
  },
  {
    "path": ".dockerignore",
    "content": "_deps/*\nbuild-*\ntools/packaging/*\n.github/*"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.md",
    "content": "---\nname: Bug report\nabout: Create a report to help Dragonfly DB improve\ntitle: ''\nlabels: 'bug'\nassignees: ''\n\n---\n\n**Describe the bug**\nA clear and concise description of what the bug is.\n\n**To Reproduce**\nSteps to reproduce the behavior:\n1. Insert records using `command`\n2. Query records using `command`\n3. Scroll down to '....'\n4. See error\n\n**Expected behavior**\nA clear and concise description of what you expected to happen.\n\n**Screenshots**\nIf applicable, add screenshots to help explain your problem.\n\n**Environment (please complete the following information):**\n - OS: [ubuntu 20.04]\n - Kernel: # Command: `uname -a`\n - Containerized?: [Bare Metal, Docker, Docker Compose, Docker Swarm, Kubernetes, Other]\n - Dragonfly Version: [e.g. 0.3.0]\n\n**Reproducible Code Snippet**\n```\n# Minimal code snippet to reproduce this bug\n```\n\n**Additional context**\nAdd any other context about the problem here.\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "content": "blank_issues_enabled: true\n\ncontact_links:\n  - name: Dragonfly DB Discord Channel\n    url: https://discord.gg/HsPjXGVH85\n    about: Get help! Ask questions, get support, and share ideas.\n\n  - name: GitHub Discussions\n    url: https://github.com/dragonflydb/dragonfly/discussions\n    about: Ask Questions. Benchmark Questions Belong here.\n\n  - name: Twitter\n    url: https://twitter.com/romanger\n    about: Follow Roman on Twitter\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature_request.md",
    "content": "---\nname: Feature request\nabout: Suggest an idea for Dragonfly DB\ntitle: ''\nlabels: 'feature request'\nassignees: ''\n\n---\n**Did you search GitHub Issues and GitHub Discussions First?**\nMany users may find their feature is already being discussed. Help us keep duplicates to a minimum by searching for your feature first to see if it is already in progress.\n\n**Is your feature request related to a problem? Please describe.**\nA clear and concise description of what the problem is. Ex. I'm always frustrated when [...]\n\n**Describe the solution you'd like**\nA clear and concise description of what you want to happen.\n\n**Describe alternatives you've considered**\nA clear and concise description of any alternative solutions or features you've considered.\n\n**Additional context**\nAdd any other context or screenshots about the feature request here.\n"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE.md",
    "content": "<!--\n**Commits Must Be Signed and Your PR title must conform to the conventional commit spec**\n  * See: https://github.com/dragonflydb/dragonfly/blob/main/CONTRIBUTING.md\n  * Please follow the section on `pre-commit hooks`, a linter will validate before you push\n\n  Example PR Title: <type>(<scope>)!: <description>\n\n  * `type` = bug, chore, feat, fix, docs, build, style, refactor, perf, test\n  * `!` = OPTIONAL: signals a breaking change\n  * `scope` = Optional when `type` is \"chore\" or \"docs\"\n  * `description` = short description of the change\n\nExamples:\n\n  * chore(examples): Clarify `docker` usage #120\n  * docs(readme): Fix Example Links #121\n  * feat(ingest)!: Add new ingest #122\n  * fix(ingest): Refactor for loop to list comprehension #123\n-->\n"
  },
  {
    "path": ".github/actions/builder/action.yml",
    "content": "name: Build Dragonfly\ndescription: \"Build Dragonfly with configurable CMake options\"\n\ninputs:\n  build-type:\n    description: \"CMake build type (Debug or Release)\"\n    required: false\n    default: 'Debug'\n    type: string\n  build-dir:\n    description: \"Build directory name (relative to workspace root)\"\n    required: false\n    default: 'build'\n    type: string\n  c-compiler:\n    description: \"C compiler to use\"\n    required: false\n    default: ''\n    type: string\n  cxx-compiler:\n    description: \"C++ compiler to use\"\n    required: false\n    default: ''\n    type: string\n  cxx-flags:\n    description: \"C++ compiler flags\"\n    required: false\n    default: '-no-pie'\n    type: string\n  sanitizers:\n    description: \"Enable sanitizers (NoSanitizers or Sanitizers)\"\n    required: false\n    default: 'NoSanitizers'\n    type: string\n  with-aws:\n    description: \"Build with AWS support\"\n    required: false\n    default: 'ON'\n    type: string\n  targets:\n    description: \"Build targets to compile\"\n    required: false\n    default: 'src/all'\n    type: string\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Configure CMake\n      shell: bash\n      run: |\n        # Set sanitizer flags\n        ASAN=\"OFF\"\n        USAN=\"OFF\"\n        if [ '${{ inputs.sanitizers }}' = 'Sanitizers' ]; then\n          echo \"Enabling ASAN/USAN\"\n          ASAN=\"ON\"\n          USAN=\"ON\"\n        fi\n\n        # Build cmake command array\n        CMAKE_CMD=(cmake\n          -B \"${GITHUB_WORKSPACE}/${{ inputs.build-dir }}\"\n          -DCMAKE_BUILD_TYPE=\"${{ inputs.build-type }}\"\n          -GNinja\n        )\n\n        # Add optional compiler flags\n        if [ -n \"${{ inputs.c-compiler }}\" ]; then\n          CMAKE_CMD+=(-DCMAKE_C_COMPILER=\"${{ inputs.c-compiler }}\")\n        fi\n        if [ -n \"${{ inputs.cxx-compiler }}\" ]; then\n          CMAKE_CMD+=(-DCMAKE_CXX_COMPILER=\"${{ inputs.cxx-compiler }}\")\n        fi\n        if [ -n \"${{ inputs.cxx-flags }}\" ]; then\n          CMAKE_CMD+=(-DCMAKE_CXX_FLAGS=\"${{ inputs.cxx-flags }}\")\n        fi\n\n        # Add fixed options\n        CMAKE_CMD+=(\n          -DPRINT_STACKTRACES_ON_SIGNAL=ON\n          -DWITH_AWS=\"${{ inputs.with-aws }}\"\n          -DWITH_GCP=OFF\n          -DWITH_UNWIND=OFF\n          -DWITH_GPERF=OFF\n          -DWITH_ASAN=\"${ASAN}\"\n          -DWITH_USAN=\"${USAN}\"\n        )\n\n        # Execute CMake\n        echo \"Running: ${CMAKE_CMD[@]}\"\n        \"${CMAKE_CMD[@]}\"\n\n    - name: Build\n      shell: bash\n      run: |\n        cd ${GITHUB_WORKSPACE}/${{ inputs.build-dir }}\n        echo \"Building target: ${{ inputs.targets }}\"\n        ninja ${{ inputs.targets }}\n"
  },
  {
    "path": ".github/actions/fuzzing/action.yml",
    "content": "name: Run AFL++ Fuzzing\ndescription: \"Run AFL++ fuzzing campaign with configurable parameters\"\n\ninputs:\n  mode:\n    description: \"Fuzzing mode: 'smoke' (stop on first crash) or 'long' (collect all crashes)\"\n    required: true\n    type: string\n  target:\n    description: \"Fuzz target: 'resp' or 'memcache'\"\n    required: false\n    default: 'resp'\n    type: string\n  duration-minutes:\n    description: \"Fuzzing duration in minutes\"\n    required: true\n    type: string\n  run-number:\n    description: \"GitHub run number for artifact naming\"\n    required: true\n    type: string\n  extra-seeds-dir:\n    description: \"Directory with additional seed files (initial fuzzer inputs) to merge into the corpus\"\n    required: false\n    default: ''\n  focus-commands:\n    description: \"JSON list of command names for the mutator to prefer (~70% selection weight)\"\n    required: false\n    default: ''\n  build:\n    description: \"Build the binary before fuzzing. Set to 'false' when reusing a binary built by a previous action call in the same job — fails if the binary is missing.\"\n    required: false\n    default: 'true'\n\noutputs:\n  hang_count:\n    description: \"Number of unique hangs found during fuzzing\"\n    value: ${{ steps.analyze.outputs.hang_count }}\n  crash_count:\n    description: \"Number of unique crashes found during fuzzing\"\n    value: ${{ steps.analyze.outputs.crash_count }}\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Verify AFL++ installation\n      shell: bash\n      run: |\n        echo \"Verifying AFL++ installation...\"\n        afl-fuzz -h | head -5 || true\n\n        # Verify AFL++ compilers are available\n        which afl-clang-fast\n        which afl-clang-fast++\n        afl-clang-fast --version\n\n    - name: Configure system for fuzzing\n      shell: bash\n      run: |\n        echo \"Configuring system for AFL++ fuzzing...\"\n        afl-system-config || true\n        echo core > /proc/sys/kernel/core_pattern || echo \"Warning: Could not set core_pattern\"\n        echo \"System configured\"\n\n    - name: Build Dragonfly with AFL++\n      shell: bash\n      run: |\n        if [ \"${{ inputs.build }}\" = \"false\" ]; then\n          if [ ! -f \"./build-dbg/dragonfly\" ]; then\n            echo \"::error::build=false but binary not found at ./build-dbg/dragonfly\"\n            exit 1\n          fi\n          echo \"Skipping build, reusing existing binary\"\n          ls -lh ./build-dbg/dragonfly\n        else\n          echo \"Building Dragonfly with AFL++ instrumentation...\"\n          ./helio/blaze.sh -DUSE_AFL:BOOL=ON\n          cd ./build-dbg && ninja dragonfly && cd ..\n          echo \"Build complete\"\n          ls -lh ./build-dbg/dragonfly\n        fi\n\n    - name: Merge targeted seeds\n      shell: bash\n      if: ${{ inputs.extra-seeds-dir != '' }}\n      run: |\n        EXTRA_DIR=\"${{ inputs.extra-seeds-dir }}\"\n        SEEDS_DIR=\"fuzz/seeds/${{ inputs.target }}\"\n\n        # Copy only seed files, skip metadata like focus_commands.json\n        COUNT=$(find \"$EXTRA_DIR\" -maxdepth 1 -type f ! -name '*.json' 2>/dev/null | wc -l)\n        if [ \"$COUNT\" -gt 0 ]; then\n          echo \"Merging ${COUNT} targeted seeds into corpus\"\n          find \"$EXTRA_DIR\" -maxdepth 1 -type f ! -name '*.json' -exec cp -t \"$SEEDS_DIR/\" {} +\n        else\n          echo \"No targeted seed files to merge\"\n        fi\n\n    - name: Run AFL++ fuzzing\n      shell: bash\n      run: |\n        MODE=\"${{ inputs.mode }}\"\n        DURATION_MINUTES=\"${{ inputs.duration-minutes }}\"\n\n        echo \"Starting AFL++ fuzzing...\"\n        echo \"Configuration:\"\n        echo \"  Target: ${{ inputs.target }}\"\n        echo \"  Mode: ${MODE}\"\n        echo \"  Duration: ${DURATION_MINUTES} minutes\"\n\n        cd fuzz\n        export BUILD_DIR=\"${GITHUB_WORKSPACE}/build-dbg\"\n\n        # Run fuzzer with timeout\n        timeout ${DURATION_MINUTES}m ./run_fuzzer.sh \"${{ inputs.target }}\" || EXIT_CODE=$?\n\n        # timeout returns 124 if it timed out (expected), 0 if finished naturally\n        if [ \"${EXIT_CODE:-0}\" -eq 124 ]; then\n          echo \"Fuzzing completed (timeout reached)\"\n        elif [ \"${EXIT_CODE:-0}\" -eq 0 ]; then\n          echo \"Fuzzing completed normally\"\n        else\n          echo \"::error::Fuzzer failed with exit code ${EXIT_CODE}\"\n          exit 1\n        fi\n      env:\n        # Mode-specific environment variables\n        AFL_BENCH_UNTIL_CRASH: ${{ inputs.mode == 'smoke' && '1' || '' }}\n        AFL_NO_UI: 1\n        AFL_AUTORESUME: 1\n        AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES: 1\n        AFL_TESTCACHE_SIZE: ${{ inputs.mode == 'smoke' && '50' || '500' }}\n        AFL_SKIP_CPUFREQ: 1\n        AFL_FAST_CAL: ${{ inputs.mode == 'long' && '1' || '' }}\n        AFL_PERSISTENT_RECORD: 1000\n        AFL_CUSTOM_MUTATOR_ONLY: 1\n        FUZZ_FOCUS_COMMANDS: ${{ inputs.focus-commands }}\n\n    - name: Analyze fuzzing results\n      shell: bash\n      if: always()\n      id: analyze\n      run: |\n        echo \"Analyzing fuzzing results...\"\n\n        TARGET=\"${{ inputs.target }}\"\n        CRASHES_DIR=\"fuzz/artifacts/${TARGET}/default/crashes\"\n        HANGS_DIR=\"fuzz/artifacts/${TARGET}/default/hangs\"\n        QUEUE_DIR=\"fuzz/artifacts/${TARGET}/default/queue\"\n\n        # Count results\n        CRASH_COUNT=0\n        HANG_COUNT=0\n        CORPUS_SIZE=0\n\n        if [ -d \"$CRASHES_DIR\" ]; then\n          CRASH_COUNT=$(find \"$CRASHES_DIR\" -maxdepth 1 -type f -name 'id:*' 2>/dev/null | wc -l)\n        fi\n\n        if [ -d \"$HANGS_DIR\" ]; then\n          HANG_COUNT=$(find \"$HANGS_DIR\" -maxdepth 1 -type f -name 'id:*' ! -name 'RECORD:*' 2>/dev/null | wc -l)\n        fi\n\n        if [ -d \"$QUEUE_DIR\" ]; then\n          CORPUS_SIZE=$(find \"$QUEUE_DIR\" -type f ! -name \".state\" 2>/dev/null | wc -l)\n        fi\n\n        echo \"Fuzzing Results:\"\n        echo \"   Crashes: $CRASH_COUNT\"\n        echo \"   Hangs: $HANG_COUNT\"\n        echo \"   Corpus size: $CORPUS_SIZE\"\n\n        # Show statistics for long mode\n        if [ \"${{ inputs.mode }}\" = \"long\" ]; then\n          STATS_FILE=\"fuzz/artifacts/${TARGET}/default/fuzzer_stats\"\n          if [ -f \"$STATS_FILE\" ]; then\n            echo \"\"\n            echo \"Key Statistics:\"\n            grep -E \"execs_done|execs_per_sec|paths_total|corpus_count|unique_crashes|unique_hangs|last_crash|last_hang\" \"$STATS_FILE\" || true\n          fi\n        fi\n\n        echo \"hang_count=${HANG_COUNT}\" >> \"$GITHUB_OUTPUT\"\n        echo \"crash_count=${CRASH_COUNT}\" >> \"$GITHUB_OUTPUT\"\n\n        # Fail the job if crashes or hangs were found\n        if [ \"$CRASH_COUNT\" -gt 0 ]; then\n          echo \"::error::Found $CRASH_COUNT crash(es)!\"\n          echo \"\"\n          echo \"Crash input files (excluding RECORD):\"\n          find \"$CRASHES_DIR\" -maxdepth 1 -name 'id:*' ! -name 'RECORD:*' -type f | sort || true\n          exit 1\n        fi\n\n        if [ \"$HANG_COUNT\" -gt 0 ]; then\n          echo \"::error::Found $HANG_COUNT hang(s)!\"\n          echo \"\"\n          echo \"Hang input files (excluding RECORD):\"\n          find \"$HANGS_DIR\" -maxdepth 1 -name 'id:*' ! -name 'RECORD:*' -type f | sort || true\n          exit 1\n        fi\n\n        if [ \"$CORPUS_SIZE\" -gt 0 ]; then\n          echo \"No crashes found - fuzzing test passed!\"\n        else\n          echo \"No fuzzing artifacts found (fuzzer may not have started)\"\n        fi\n\n    - name: Package crash artifacts\n      shell: bash\n      if: failure() && steps.analyze.outputs.crash_count > 0\n      run: |\n        CRASHES_DIR=\"$(pwd)/fuzz/artifacts/${{ inputs.target }}/default/crashes\"\n\n        if [ ! -d \"$CRASHES_DIR\" ] || [ -z \"$(ls -A \"$CRASHES_DIR\" 2>/dev/null)\" ]; then\n          echo \"No crash artifacts to package\"\n          exit 0\n        fi\n\n        echo \"Raw crash directory contents:\"\n        ls -la \"$CRASHES_DIR\"\n\n        mkdir -p fuzz/packaged\n\n        # Find crash input files (not RECORD files)\n        find \"$CRASHES_DIR\" -maxdepth 1 -name 'id:*' ! -name 'RECORD:*' -type f | while read -r f; do\n          CRASH_ID=$(basename \"$f\" | sed 's/^id:\\([0-9]*\\),.*/\\1/')\n          echo \"Packaging crash ${CRASH_ID}...\"\n          if ( cd fuzz && ./package_crash.sh \"$CRASH_ID\" \"$CRASHES_DIR\" ); then\n            mv \"fuzz/crash-${CRASH_ID}.tar.gz\" fuzz/packaged/ 2>/dev/null || true\n          else\n            echo \"Warning: failed to package crash ${CRASH_ID}, continuing...\"\n          fi\n        done\n\n        echo \"Packaged crashes:\"\n        ls -lh fuzz/packaged/ 2>/dev/null || echo \"  (none)\"\n\n    - name: Upload crash artifacts\n      if: failure() && steps.analyze.outputs.crash_count > 0\n      uses: actions/upload-artifact@v4\n      with:\n        name: fuzz-${{ inputs.mode }}-${{ inputs.target }}-crashes-${{ inputs.run-number }}\n        path: |\n          fuzz/packaged/*.tar.gz\n          fuzz/artifacts/${{ inputs.target }}/default/fuzzer_stats\n        retention-days: 10\n        if-no-files-found: ignore\n\n    - name: Package hang artifacts\n      shell: bash\n      if: failure() && steps.analyze.outputs.hang_count > 0\n      run: |\n        HANGS_DIR=\"fuzz/artifacts/${{ inputs.target }}/default/hangs\"\n\n        if [ ! -d \"$HANGS_DIR\" ] || [ -z \"$(ls -A \"$HANGS_DIR\" 2>/dev/null)\" ]; then\n          echo \"No hang artifacts to package\"\n          exit 0\n        fi\n\n        mkdir -p fuzz/packaged_hangs\n        tar -czf \"fuzz/packaged_hangs/hangs-${{ inputs.target }}.tar.gz\" \\\n          -C \"$(dirname \"$HANGS_DIR\")\" hangs/\n\n        echo \"Packaged hangs:\"\n        ls -lh fuzz/packaged_hangs/\n\n    - name: Upload hang artifacts\n      if: failure() && steps.analyze.outputs.hang_count > 0\n      uses: actions/upload-artifact@v4\n      with:\n        name: fuzz-${{ inputs.mode }}-${{ inputs.target }}-hangs-${{ inputs.run-number }}\n        path: |\n          fuzz/packaged_hangs/*.tar.gz\n          fuzz/artifacts/${{ inputs.target }}/default/fuzzer_stats\n        retention-days: 10\n        if-no-files-found: ignore\n"
  },
  {
    "path": ".github/actions/lint-test-chart/action.yml",
    "content": "name: Lint test chart\ndescription: \"Run lint test chart\"\n\nruns:\n  using: \"composite\"\n  steps:\n      - name: Checkout\n        uses: actions/checkout@v4\n        with:\n          fetch-depth: 0\n\n      - name: Set up Helm\n        uses: azure/setup-helm@v4\n\n      - uses: actions/setup-python@v5\n        with:\n          python-version: \"3.9\"\n          check-latest: true\n\n      - name: Chart Rendering Tests\n        shell: bash\n        run: |\n          go test -v ./contrib/charts/dragonfly/...\n\n      - name: Set up chart-testing\n        uses: helm/chart-testing-action@v2.6.1\n\n      - name: Run chart-testing (list-changed)\n        id: list-changed\n        shell: bash\n        run: |\n          changed=$(ct list-changed --config .ct.yaml)\n          if [[ -n \"$changed\" ]]; then\n            echo \"changed=true\" >> $GITHUB_OUTPUT\n          fi\n\n      - name: Run chart-testing (lint)\n        shell: bash\n        run: |\n          ct \\\n            lint \\\n            --config .ct.yaml \\\n            ${{github.event_name == 'workflow_dispatch' && '--all'}} ;\n\n      - name: Create kind cluster\n        uses: helm/kind-action@v1\n\n      - name: Install Dependencies\n        shell: bash\n        run: |\n          curl -sL https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.73.0/bundle.yaml | kubectl create -f -\n\n      - name: Getting cluster ready\n        shell: bash\n        run: |\n          kubectl label nodes chart-testing-control-plane key/node-kind=high-memory\n\n      - name: Run chart-testing (install)\n        shell: bash\n        run: |\n          ct \\\n            install \\\n            --config .ct.yaml \\\n            --debug \\\n            --helm-extra-set-args \"--set=image.repository=ghcr.io/${{ github.repository }},probes=null\" \\\n            ${{github.event_name == 'workflow_dispatch' && '--all'}} ;\n"
  },
  {
    "path": ".github/actions/multi-registry-docker-login/action.yml",
    "content": "name: 'Multi-Registry Docker Login'\ndescription: 'Authenticate with both GHCR and Google Artifact Registry'\ninputs:\n  GITHUB_TOKEN:\n    description: 'GitHub token for GHCR'\n    required: true\n  GCP_SA_KEY:\n    description: 'Google Service Account JSON key'\n    required: true\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Login to GHCR\n      uses: docker/login-action@v3\n      with:\n        registry: ghcr.io\n        username: ${{ github.repository_owner }}\n        password: ${{ inputs.GITHUB_TOKEN }}\n\n    - name: Login to Google Artifact Registry\n      uses: docker/login-action@v3\n      with:\n        registry: us-central1-docker.pkg.dev\n        username: _json_key\n        password: ${{ inputs.GCP_SA_KEY }}\n"
  },
  {
    "path": ".github/actions/regression-tests/action.yml",
    "content": "name: Regression Tests\ndescription: \"Run regression tests\"\n\ninputs:\n  dfly-executable:\n    required: true\n    type: string\n  gspace-secret:\n    required: false\n    type: string\n  run-only-on-ubuntu-latest:\n    # 'true' or 'false' cause boolean\n    # is not supported in composite actions\n    required: true\n    type: string\n  build-folder-name:\n    required: true\n    type: string\n  filter:\n    required: false\n    type: string\n  aws-access-key-id:\n    required: false\n    type: string\n    description: \"AWS access key ID (optional if using OIDC - credentials set by workflow)\"\n  aws-secret-access-key:\n    required: false\n    type: string\n    description: \"AWS secret access key (optional if using OIDC - credentials set by workflow)\"\n  s3-bucket:\n    required: true\n    type: string\n  epoll:\n    required: false\n    type: string\n\nruns:\n  using: \"composite\"\n  # bring back timeouts once composite actions start supporting them\n  # timeout-minutes: 20\n  steps:\n    - name: Sync valkey-search tests\n      uses: ./.github/actions/sync-valkey-tests\n\n    - name: Free disk space\n      if: contains(runner.labels, 'self-hosted') == false\n      shell: bash\n      run: |\n        echo \"===================Before freeing up space ============================================\"\n        df -h\n        rm -rf /hostroot/usr/share/dotnet\n        rm -rf /hostroot/usr/local/share/boost\n        rm -rf /hostroot/usr/local/lib/android\n        rm -rf /hostroot/opt/ghc\n        echo \"===================After freeing up space ============================================\"\n        df -h\n\n    - name: Install Python test requirements\n      shell: bash\n      run: |\n        cd ${GITHUB_WORKSPACE}/tests\n        # https://peps.python.org/pep-0668/#keep-the-marker-file-in-container-images\n        if compgen -G '/usr/lib/python3.*/EXTERNALLY-MANAGED' > /dev/null; then\n          pip3 install --break-system-packages -r dragonfly/requirements.txt\n        else\n          pip3 install -r dragonfly/requirements.txt\n        fi\n\n    - name: Run S3 snapshot tests with MinIO\n      if: inputs.s3-bucket != ''\n      shell: bash\n      run: |\n        echo \"=== Running S3 snapshot tests with local MinIO ===\"\n        cd ${GITHUB_WORKSPACE}/tests\n\n        export DRAGONFLY_PATH=\"${GITHUB_WORKSPACE}/${{inputs.build-folder-name}}/${{inputs.dfly-executable}}\"\n\n        # MinIO binary is downloaded and started by conftest.py when MINIO_S3_ENDPOINT is set\n        MINIO_S3_ENDPOINT=http://localhost:9000 timeout 10m pytest -k \"s3\" --timeout=300 --color=yes dragonfly/snapshot_test.py --log-cli-level=INFO -v\n\n    - name: Run PyTests\n      id: main\n      shell: bash\n      run: |\n        ls -l ${GITHUB_WORKSPACE}/\n        cd ${GITHUB_WORKSPACE}/tests\n        echo \"Current commit is ${{github.sha}}\"\n        # used by PyTests\n        export DRAGONFLY_PATH=\"${GITHUB_WORKSPACE}/${{inputs.build-folder-name}}/${{inputs.dfly-executable}}\"\n        export ROOT_DIR=\"${GITHUB_WORKSPACE}/tests/dragonfly/valkey_search\"\n        export UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1 # to crash on errors\n        export FILTER=\"${{inputs.filter}}\"\n\n        # Exclude large tests unless explicitly requested\n        if [[ \"$FILTER\" == \"large\" ]]; then\n          : # keep as-is, run only large tests\n        elif [[ -n \"$FILTER\" ]]; then\n          FILTER=\"(not large) and ($FILTER)\"\n        else\n          FILTER=\"not large\"\n        fi\n\n        if [[ \"${{inputs.epoll}}\" == 'epoll' ]]; then\n          FILTER=\"$FILTER and not exclude_epoll\"\n          # Run only replication tests with epoll\n          timeout 80m pytest -m \"$FILTER\" --durations=10 --timeout=300 --color=yes --json-report --json-report-file=report.json dragonfly --df force_epoll=true --log-cli-level=INFO || code=$?\n        else\n          # Run only replication tests with iouring\n          timeout 80m pytest -m \"$FILTER\" --durations=10 --timeout=300 --color=yes --json-report --json-report-file=report.json dragonfly --log-cli-level=INFO || code=$?\n        fi\n\n        # timeout returns 124 if we exceeded the timeout duration\n        if [[ $code -eq 124 ]]; then\n          # Add an extra new line here because when tests timeout the first line below continues from the test failure name\n          echo \"\\n\"\n          echo \"🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑\"\n          echo \"🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 TESTS TIMEDOUT 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑\"\n          echo \"🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑\"\n          # Copy the last log file because we timedout and pytest did not copy it over\n          # the /tmp/failed/ folder\n          cat /tmp/last_test_log_dir.txt | xargs -I {} mv {}/ /tmp/failed/\n          exit 1\n        fi\n\n        # when a test fails in pytest it returns 1 but there are other return codes as well so we just check if the code is non zero\n        if [[ $code -ne 0 ]]; then\n          exit 1\n        fi\n      env:\n        # Add environment variables to enable the S3 snapshot test.\n        # AWS credentials: if inputs provided, use them; otherwise rely on workflow OIDC auth\n        DRAGONFLY_S3_BUCKET: ${{ inputs.s3-bucket }}\n        AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id || env.AWS_ACCESS_KEY_ID }}\n        AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key || env.AWS_SECRET_ACCESS_KEY }}\n        AWS_SESSION_TOKEN: ${{ env.AWS_SESSION_TOKEN }}\n        AWS_REGION: ${{ env.AWS_REGION || 'us-east-1' }}\n\n    - name: Send notification on failure\n      if: failure() && github.ref == 'refs/heads/main'\n      shell: bash\n      run: |\n        get_failed_tests() {\n          local report_file=$1\n          echo $(jq -r '.tests[] | select(.outcome == \"failed\") | .nodeid' \"$report_file\")\n        }\n        cd ${GITHUB_WORKSPACE}/tests\n        failed_tests=\"\"\n        if [ -f report.json ]; then\n          failed_tests=$(get_failed_tests report.json)\n        fi\n\n        KIND=\"iouring\"\n        if [[ \"${{inputs.epoll}}\" == 'epoll' ]]; then\n          KIND=\"epoll\"\n        fi\n\n        job_link=\"${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}\"\n        message=\"Regression $KIND tests failed.\\\\n The commit is: ${{github.sha}}.\\\\n $failed_tests \\\\n Job Link: ${job_link}\\\\n\"\n\n        curl -s \\\n          -X POST \\\n          -H 'Content-Type: application/json' \\\n          '${{ inputs.gspace-secret }}' \\\n          -d '{\"text\": \"'\"${message}\"'\"}'\n    - name: Copy binary on a self hosted runner\n      if: failure() && contains(runner.labels, 'self-hosted')\n      shell: bash\n      run: |\n        cd ${GITHUB_WORKSPACE}/build\n        timestamp=$(date +%Y-%m-%d_%H:%M:%S)\n        mv ./dragonfly /var/crash/dragonfly_${timestamp}\n"
  },
  {
    "path": ".github/actions/repeat/action.yml",
    "content": "name: Run Tests On Repeat\ndescription: \"Repeat specific tests\"\n\ninputs:\n  dfly-executable:\n    required: true\n    type: string\n  run-only-on-ubuntu-latest:\n    required: true\n    type: string\n  build-folder-name:\n    required: true\n    type: string\n  expression:\n    required: false\n    type: string\n  aws-access-key-id:\n    required: false\n    type: string\n    description: \"AWS access key ID (optional if using OIDC - credentials set by workflow)\"\n  aws-secret-access-key:\n    required: false\n    type: string\n    description: \"AWS secret access key (optional if using OIDC - credentials set by workflow)\"\n  s3-bucket:\n    required: true\n    type: string\n  count:\n    required: true\n    type: number\n  timeout:\n    required: true\n    type: string\n  epoll:\n    required: true\n    type: string\n  vmodule_expression:\n    required: true\n    type: string\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Repeat pytests\n      id: main\n      shell: bash\n      run: |\n        ls -l ${GITHUB_WORKSPACE}/\n        cd ${GITHUB_WORKSPACE}/tests\n        echo \"Current commit is ${{github.sha}}\"\n        pip3 install -r dragonfly/requirements.txt\n        # used by PyTests\n        export DRAGONFLY_PATH=\"${GITHUB_WORKSPACE}/${{inputs.build-folder-name}}/${{inputs.dfly-executable}}\"\n        export UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1 # to crash on errors\n        if [[ \"${{ inputs.epoll }}\" == \"epoll\" ]]; then\n          FORCE_EPOLL=\"--df force_epoll=true\"\n        else\n          FORCE_EPOLL=\"\"\n        fi\n        if [[ $\"{{ inputs.vmodule_expression }}\" != \"\" ]]; then\n          VMOD=\"--df vmodule=${{ inputs.vmodule_expression }}\"\n        else\n          VMOD=\"\"\n        fi\n        echo Running command: timeout ${{ inputs.timeout }} pytest ${{ inputs.expression }} --drop-data-after-each-test ${FORCE_EPOLL} ${VMOD} --color=yes --json-report --json-report-file=report.json --log-cli-level=DEBUG --count=${{ inputs.count }}\n        timeout ${{ inputs.timeout }} pytest ${{ inputs.expression }} --drop-data-after-each-test ${FORCE_EPOLL} ${VMOD} --color=yes --json-report --json-report-file=report.json --log-cli-level=DEBUG --count=${{ inputs.count }} || code=$?\n        # timeout returns 124 if we exceeded the timeout duration\n        if [[ $code -eq 124 ]]; then\n          # Add an extra new line here because when tests timeout the first line below continues from the test failure name\n          echo \"\\n\"\n          echo \"🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑\"\n          echo \"🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 TESTS TIMEDOUT 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑\"\n          echo \"🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑 🛑\"\n          # Copy the last log file because we timedout and pytest did not copy it over\n          # the /tmp/failed/ folder\n          cat /tmp/last_test_log_dir.txt | xargs -I {} mv {}/ /tmp/failed/\n          exit 1\n        fi\n\n        # when a test fails in pytest it returns 1 but there are other return codes as well so we just check if the code is non zero\n        if [[ $code -ne 0 ]]; then\n          exit 1\n        fi\n      env:\n        # Add environment variables to enable the S3 snapshot test.\n        # AWS credentials: if inputs provided, use them; otherwise rely on workflow OIDC auth\n        DRAGONFLY_S3_BUCKET: ${{ inputs.s3-bucket }}\n        AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id || env.AWS_ACCESS_KEY_ID }}\n        AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key || env.AWS_SECRET_ACCESS_KEY }}\n        AWS_SESSION_TOKEN: ${{ env.AWS_SESSION_TOKEN }}\n        AWS_REGION: ${{ env.AWS_REGION || 'us-east-1' }}\n"
  },
  {
    "path": ".github/actions/sync-valkey-tests/action.yml",
    "content": "name: Sync valkey-search tests\ndescription: \"Synchronizes valkey-search tests using a fixed revision\"\n\nruns:\n  using: composite\n  steps:\n    - name: Sync valkey-search tests\n      shell: bash\n      run: |\n        cd ${GITHUB_WORKSPACE}/tests/dragonfly/valkey_search\n        # main branch revision\n        ./sync-valkey-search-tests.sh 90124dc91756b24cb2e58e5c4eea5b8d53004ea6\n"
  },
  {
    "path": ".github/actions/test-docker/action.yml",
    "content": "name: Test Docker Image\n\ninputs:\n  image_id:\n    required: true\n    type: string\n  name:\n    required: true\n    type: string\n\nruns:\n  using: \"composite\"\n  steps:\n    - name: Test Image\n      shell: bash\n      run: |\n        echo \"Testing ${{ inputs.name }} image\"\n        docker pull ${{inputs.image_id}}\n        docker image inspect ${{inputs.image_id}}\n\n        # docker run with port-forwarding\n        docker run  --name test -d -p 6379:6379 ${{inputs.image_id}}\n        until [ \"`docker inspect -f {{.State.Health.Status}} test`\"==\"healthy\" ]; do\n          sleep 0.1;\n        done;\n"
  },
  {
    "path": ".github/bullmq-skipped-tests.txt",
    "content": "# BullMQ tests excluded from CI runs against Dragonfly\n#\n# Format: one pattern per line (used as JS regex in mocha --grep --invert)\n# Categories:\n#   DRAGONFLY_BUG  - Dragonfly does not support this behaviour yet\n#   FLAKY          - Test has race conditions / timing issues unrelated to Dragonfly\n\n# ── DRAGONFLY BUG ────────────────────────────────────────────────────────────\n# BullMQ Lua scripts access keys that are not declared in KEYS[].\n# Dragonfly enforces strict Lua key declaration; allow-undeclared-keys causes\n# global transaction mode and breaks other tests.\nhandle errors.*for flows\nFlows - addBulk.*handle errors\n\n# Job.finished: job hash persists after removeOnComplete instead of being deleted.\nrejects with missing key for job message\n\n# ── FLAKY ─────────────────────────────────────────────────────────────────────\n# deduplication key removal races with the 'deduplicated' QueueEvents listener.\n# XREAD from '$' is noted as unstable in upstream BullMQ code.\nremoves deduplication key\n\n# QueueEvents 'waiting' event: XREAD from '$' is unstable on CI.\n# Upstream comment: \"additional delay since XREAD from '$' is unstable\"\nemits waiting when a job has been added\n\n# getWorkers: race between worker 'ready' event and assertion.\ngets all workers for this queue only\n\n# getWorkers (shared connection): upstream test file has comment\n# \"Test is very flaky on CI, so we skip it for now.\"\ngets all workers for a given queue\n\n# Job Scheduler monthly repeat: sinon fake-timer races with real Redis async ops.\n# The worker loop does not advance in time before the 200 s timeout expires.\nshould repeat 7:th day every month at 9:25\n"
  },
  {
    "path": ".github/copilot-instructions.md",
    "content": "---\ndescription: 'Code review guidelines for GitHub copilot in this project'\napplyTo: '**'\nexcludeAgent: [\"coding-agent\"]\n---\n\n# Code Review Instructions\n\nKeep reviews high-signal and minimal. Only comment on real bugs with high confidence.\n\n## Comment Only When\n- The issue is a correctness, security, concurrency, or architecture problem.\n- The impact is clear and non-trivial.\n- You can point to concrete evidence in the diff (not speculation).\n\n## Avoid\n- Style, formatting, naming, or minor performance nits.\n- Optional refactors or “nice to have” suggestions.\n- Praise, restating the code, or long explanations.\n- Duplicate comments for the same root cause.\n\n## Review Style\n- Be terse: 1-2 sentences per issue.\n- Include file and line references when possible.\n- If no issues are found, say “No issues found.”\n- Provide concrete suggestions for fixes when possible, or examples to illustrate the problem.\n"
  },
  {
    "path": ".github/dependabot.yml",
    "content": "version: 2\nupdates:\n  - package-ecosystem: \"github-actions\"\n    directory: \"/\"\n    schedule:\n      interval: \"weekly\"\n\n    open-pull-requests-limit: 1\n    groups:\n      actions:\n        patterns:\n          - \"*\"\n\n\n  - package-ecosystem: \"gomod\"\n    directories:\n      - \"/contrib/charts/dragonfly\"\n      - \"/tools/replay\"\n    schedule:\n      interval: \"weekly\"\n\n    open-pull-requests-limit: 1\n    #uncomment it to group dependency updates\n    #groups:\n      #go-mod:\n        #patterns:\n          #- \"*\"\n    ignore:\n      # Disable all updates except security updates\n      #remove an item from ignore list to get dependency updates of that kind\n      - dependency-name: \"*\"\n        update-types:\n          - \"version-update:semver-major\"\n          - \"version-update:semver-minor\"\n          - \"version-update:semver-patch\"\n\n\n\n  - package-ecosystem: \"pip\"\n    directories:\n      - \"/tests/dragonfly\"\n      - \"/tools\"\n    schedule:\n      interval: \"weekly\"\n\n    #uncomment it to group dependency updates\n    #groups:\n      #py-dep:\n        #patterns:\n          #- \"*\"\n    ignore:\n      # Disable all updates except security updates\n      #remove an item from ignore list to get dependency updates of that kind\n      - dependency-name: \"*\"\n        update-types:\n          - \"version-update:semver-major\"\n          - \"version-update:semver-minor\"\n          - \"version-update:semver-patch\"\n"
  },
  {
    "path": ".github/instructions/code-review.instructions.md",
    "content": "---\ndescription: 'Code review instructions for Dragonfly'\napplyTo: '**'\nexcludeAgent: [\"coding-agent\"]\n---\n\n# Dragonfly Code Review Instructions\n\nDragonfly is a high-performance, Redis-compatible in-memory data store written in C++20 with a unique shared-nothing, fiber-based architecture. Code reviews must prioritize correctness, security, and architectural compliance specific to this threading model.\n\n## Review Priorities\n\n### 🔴 CRITICAL (Block merge immediately)\n\n**Threading Model Violations** (causes deadlocks/crashes):\n- ❌ **NEVER** use `std::thread`, `std::mutex`, `std::condition_variable`, or standard library threading primitives\n- ✅ **ALWAYS** use fiber-aware equivalents: `util::fb2::Mutex`, `util::fb2::Fiber`, `util::fb2::CondVar` from `util/fibers/`\n\n**Architecture Violations**:\n- ❌ Cross-shard data access without proper synchronization\n- ✅ Per-shard operations only (see `src/server/db_slice.cc` for patterns)\n\n**Security Vulnerabilities**:\n- Authentication/authorization bypass in ACL code (`src/server/acl/`)\n- Exposed secrets, credentials in code or logs\n- Buffer overflows, use-after-free, memory safety issues\n\n**Correctness Issues**:\n- Race conditions in fiber scheduling\n- Logic errors in transaction handling (`src/server/transaction.cc`)\n- Data corruption risks in DashTable operations (`src/core/dash.h`)\n\n### 🟡 IMPORTANT (Requires discussion)\n\n**Code Quality**:\n- Missing error handling (should return `OpStatus` from `facade/op_status.h`)\n- Obvious memory leaks (check ASAN reports)\n- Performance bottlenecks in hot paths (unnecessary allocations, N+1 patterns)\n\n**Test Coverage**:\n- New features without tests (both C++ unit tests and Python integration tests)\n- Changes to critical paths (transactions, replication, cluster) without test coverage\n- Modified code that fails existing tests\n\n**Style Violations** (severe only):\n- Not following naming conventions: `snake_case` variables, `PascalCase` functions, `kPascalCase` constants\n- Code that won't pass pre-commit hooks (clang-format, 100 char limit)\n\n### 🟢 SUGGESTIONS (Non-blocking, comment only if obvious)\n\n- Over-engineering: adding abstraction layers, feature flags, or configurability not requested\n- Missing comments on complex fiber synchronization logic\n- Premature optimization without profiling\n\n## Dragonfly-Specific Patterns\n\n### ✅ DO: Correct Patterns\n\n**Threading & Synchronization**:\n```cpp\n// ✅ CORRECT: Fiber-aware mutex\nutil::fb2::Mutex mutex_;\nstd::lock_guard<util::fb2::Mutex> lock(mutex_);\n\n// ✅ CORRECT: Fiber-aware operations\nutil::fb2::Fiber fb = util::fb2::Fiber(\"name\", [&] { /* work */ });\n```\n\n\n**Per-Shard Design**:\n```cpp\n// ✅ CORRECT: Operate on shard-local data\nvoid DbSlice::SomeOperation() {\n  // Access only this shard's data\n  auto& db_slice = cntx->ns->GetCurrentDbSlice();\n}\n```\n\n### ❌ DON'T: Anti-Patterns\n\n**Threading**:\n```cpp\n// ❌ WRONG: Standard library threading (causes deadlocks!)\nstd::mutex mutex_;\nstd::thread worker;\nstd::condition_variable cv_;\n```\n\n**Global State**:\n```cpp\n// ❌ WRONG: Global mutable state (breaks shared-nothing architecture)\nstatic std::unordered_map<string, int> global_cache;\n```\n\n**Build Commands**:\n- ❌ Don't suggest `./tools/docker/build.sh` or `make` for incremental builds\n- ✅ Use `cd build-dbg && ninja <target>` instead\n\n## Code Review Checklist\n\nWhen reviewing Dragonfly code, verify:\n\n1. **Architecture Compliance**:\n   - [ ] No standard library threading primitives (`std::thread`, `std::mutex`)\n   - [ ] No global mutable state\n   - [ ] Fiber-aware synchronization used correctly\n   - [ ] Follows per-shard, shared-nothing design\n\n2. **Security**:\n   - [ ] No OWASP vulnerabilities (injection, XSS, auth bypass)\n   - [ ] No hardcoded secrets or credentials\n   - [ ] Input validation on command arguments\n   - [ ] Safe memory operations (no buffer overflows)\n\n3. **Testing**:\n   - [ ] New functionality has test coverage\n   - [ ] Tests build and pass: `cd build-dbg && ninja <test> && ./<test>`\n   - [ ] No test regressions\n\n4. **Style & Formatting**:\n   - [ ] Follows naming conventions (snake_case vars, PascalCase functions)\n   - [ ] Will pass pre-commit checks (clang-format, 100 char limit)\n   - [ ] Code compiles without warnings (CI uses `-Werror`)\n\n5. **Helio Submodule**:\n   - [ ] No direct edits to `helio/` directory (it's a git submodule)\n\n## Common False Positives to Ignore\n\nThese are **NOT** issues in Dragonfly's design. Do not comment on:\n\n1. **Single-threaded-looking code**: Per-shard operations intentionally avoid locks\n2. **Custom allocators**: mimalloc is used intentionally for performance\n3. **Manual memory management**: Required for performance-critical paths\n4. **Complex template metaprogramming**: DashTable uses advanced C++20 features\n5. **Missing const**: Not always applicable in high-performance code\n\n## Review Style Guidelines\n\n1. **Be specific**: Reference file:line, explain WHY it's wrong\n2. **Show examples**: Demonstrate the correct pattern with code\n3. **Prioritize**: Security and correctness over style\n4. **Link to docs**: Reference `docs/df-share-nothing.md`, `docs/transaction.md`, etc.\n5. **Be concise**: Dragonfly team values focused, actionable feedback\n\n## Example Review Comments\n\n**❌ BAD - Too noisy**:\n> \"Consider using auto here for type inference\"\n\n**✅ GOOD - Actionable and specific**:\n> \"🔴 CRITICAL: Line 42 uses `std::mutex`. This will cause fiber deadlocks. Replace with `util::fb2::Mutex` from helio/util/fibers/. See src/server/set_family.cc:123 for correct pattern.\"\n\n**✅ GOOD - Security focused**:\n> \"🔴 SECURITY: Line 58 doesn't validate `user_input` before passing to eval(). Vulnerable to command injection. Add validation or use SafeEval().\"\n\n**✅ GOOD - Architecture violation**:\n> \"🟡 ARCHITECTURE: Line 91 accesses global `cache_map`. Dragonfly uses shared-nothing design - each shard must have its own cache. See docs/df-share-nothing.md\"\n\n---\n\n**Key Files Reference**: See AGENTS.md for complete codebase structure, build commands, and testing procedures.\n"
  },
  {
    "path": ".github/workflows/benchmark.yml",
    "content": "name: benchmark-tests\n\non:\n  schedule:\n    - cron: \"0 9 * * *\" # run at 6 AM UTC\n  workflow_dispatch:\n\npermissions:\n  contents: read\n\njobs:\n  benchmark:\n    if: github.repository == 'dragonflydb/dragonfly'\n    strategy:\n      matrix:\n        config:\n          - operator:\n              apiVersion: \"dragonflydb.io/v1alpha1\"\n              kind: \"Dragonfly\"\n              metadata:\n                labels:\n                  app.kubernetes.io/name: \"dragonfly\"\n                  app.kubernetes.io/instance: \"dragonfly-sample\"\n                  app.kubernetes.io/part-of: \"dragonfly-operator\"\n                  app.kubernetes.io/managed-by: \"kustomize\"\n                  app.kubernetes.io/created-by: \"dragonfly-operator\"\n                name: \"dragonfly-sample\"\n              spec:\n                image: \"ghcr.io/dragonflydb/dragonfly:latest\"\n                args: [\"--cache_mode\"]\n                replicas: 2\n                resources:\n                  requests:\n                    cpu: \"2\"\n                    memory: \"2000Mi\"\n                  limits:\n                    cpu: \"2\"\n                    memory: \"2000Mi\"\n\n    runs-on: ubuntu-latest\n\n    container:\n      image: ghcr.io/romange/benchmark-dev:latest\n      options: --security-opt seccomp=unconfined\n\n    permissions:\n      id-token: write\n\n    steps:\n      - name: Setup namespace name\n        id: setup\n        run: echo \"namespace=benchmark-$(date +\"%Y-%m-%d-%s\")\" >> $GITHUB_OUTPUT\n\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Configure AWS credentials\n        uses: aws-actions/configure-aws-credentials@v5\n        with:\n          role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}\n          aws-region: ${{ vars.AWS_REGION }}\n\n      - name: Update kube config\n        run: aws eks update-kubeconfig --name \"$EKS_CLUSTER_NAME\" --region \"$AWS_REGION\"\n        env:\n          AWS_REGION: ${{ vars.AWS_REGION }}\n          EKS_CLUSTER_NAME: dev\n\n      - name: Scale up\n        run: |\n          set -x\n          aws autoscaling set-desired-capacity --auto-scaling-group-name \"$AUTOSCALING_GROUP\" --desired-capacity \"$DESIRED_CAPACITY\"\n        env:\n          AUTOSCALING_GROUP: ${{ vars.DEV_EKS_AS_GROUP }}\n          DESIRED_CAPACITY: 1\n\n      - name: Install the CRD and Operator\n        run: |\n          # Install the CRD and Operator\n          kubectl apply -f https://raw.githubusercontent.com/dragonflydb/dragonfly-operator/main/manifests/dragonfly-operator.yaml\n\n      - name: Apply Configuration\n        run: |\n          set -x\n          kubectl create namespace ${{ steps.setup.outputs.namespace }} || true\n          echo '${{ toJson(matrix.config.operator) }}' | kubectl apply -n ${{ steps.setup.outputs.namespace }} -f -\n\n      - name: Wait For Service\n        run: |\n          set -x\n          kubectl wait -n ${{ steps.setup.outputs.namespace }} dragonfly/dragonfly-sample --for=jsonpath='{.status.phase}'=ready --timeout=180s\n          kubectl wait -n ${{ steps.setup.outputs.namespace }} pods --selector app=dragonfly-sample --for condition=Ready --timeout=120s\n          kubectl describe -n ${{ steps.setup.outputs.namespace }} pod dragonfly-sample-0\n\n      - name: Run Memtier Benchmark\n        shell: bash\n        run: |\n          kubectl apply -n ${{ steps.setup.outputs.namespace }} -f tools/benchmark/k8s-benchmark-job.yaml\n\n      - name: Version upgrade\n        shell: bash\n        run: |\n          # benchmark is running, wait for 30 seconds before version upgrade\n          sleep 30\n          kubectl patch dragonfly dragonfly-sample -n ${{ steps.setup.outputs.namespace }}  --type merge -p '{\"spec\":{\"image\":\"ghcr.io/dragonflydb/dragonfly-weekly:latest\"}}'\n\n      - name: Wait for Memtier Benchmark fail\n        shell: bash\n        run: |\n          # Memtier benchmark run will fail at some point because old master shutdown on version upgrade\n          kubectl wait --for=condition=failed --timeout=120s -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark 2>/dev/null\n          kubectl logs -n ${{ steps.setup.outputs.namespace }} -f jobs/memtier-benchmark\n          kubectl delete -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark\n\n      - name: Run Memtier Benchmark again\n        shell: bash\n        run: |\n          kubectl apply -n ${{ steps.setup.outputs.namespace }} -f tools/benchmark/k8s-benchmark-job.yaml\n\n          while true; do\n            if kubectl wait --for=condition=complete --timeout=0 -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark 2>/dev/null; then\n              job_result=0\n              break\n            fi\n\n            if kubectl wait --for=condition=failed --timeout=0 -n ${{ steps.setup.outputs.namespace }} jobs/memtier-benchmark 2>/dev/null; then\n              job_result=1\n              break\n            fi\n\n            sleep 3\n          done\n\n          kubectl logs -n ${{ steps.setup.outputs.namespace }} -f jobs/memtier-benchmark\n          if [[ $job_result -eq 1 ]]; then\n              exit 1\n          fi\n\n      - name: Server checks\n        run: |\n          nohup kubectl port-forward -n ${{ steps.setup.outputs.namespace }} service/dragonfly-sample 6379:6379 &\n          pip install -r tools/requirements.txt\n          python3 tools/benchmark/post_run_checks.py\n\n      - name: Get Dragonfly logs\n        uses: nick-fields/retry@v3\n        if: always()\n        with:\n          timeout_minutes: 1\n          max_attempts: 3\n          command: |\n            kubectl logs -n ${{ steps.setup.outputs.namespace }} dragonfly-sample-0\n\n      - name: Get Dragonfly replica logs\n        uses: nick-fields/retry@v3\n        if: always()\n        with:\n          timeout_minutes: 1\n          max_attempts: 3\n          command: |\n            kubectl logs -n ${{ steps.setup.outputs.namespace }} dragonfly-sample-1\n\n      - name: Describe dragonflydb object\n        uses: nick-fields/retry@v3\n        if: always()\n        with:\n          timeout_minutes: 1\n          max_attempts: 3\n          command: |\n            kubectl describe dragonflies.dragonflydb.io -n ${{ steps.setup.outputs.namespace }} dragonfly-sample\n\n      - name: Scale down to zero\n        if: always()\n        run: |\n          set -x\n          aws autoscaling set-desired-capacity --auto-scaling-group-name \"$AUTOSCALING_GROUP\" --desired-capacity 0\n        env:\n          AUTOSCALING_GROUP: ${{ vars.DEV_EKS_AS_GROUP }}\n\n      - name: Cleanup\n        if: always()\n        run: |\n          set -x\n          kubectl delete namespace ${{ steps.setup.outputs.namespace }}\n          kubectl delete namespace dragonfly-operator-system\n\n      - name: Send notification on failure\n        if: failure() && github.ref == 'refs/heads/main'\n        shell: bash\n        run: |\n          job_link=\"${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}\"\n          message=\"Benchmark tests failed.\\\\n Job Link: ${job_link}\\\\n\"\n\n          curl -s \\\n            -X POST \\\n            -H 'Content-Type: application/json' \\\n            '${{ secrets.GSPACES_BOT_DF_BUILD }}' \\\n            -d '{\"text\": \"'\"${message}\"'\"}'\n"
  },
  {
    "path": ".github/workflows/bullmq-tests.yml",
    "content": "name: bullmq-tests\n\non:\n  schedule:\n    - cron: '0 7 * * *' # run at 7 AM daily\n  workflow_dispatch:\n\npermissions:\n  contents: read\n\nenv:\n  NODE_VERSION: \"22.12.0\"\n\njobs:\n  build:\n    if: github.repository == 'dragonflydb/dragonfly'\n    runs-on: ubuntu-latest\n    name: Build\n    timeout-minutes: 60\n\n    container:\n      image: ghcr.io/romange/ubuntu-dev:20-gcc14\n      options: --security-opt seccomp=unconfined\n      credentials:\n        username: ${{ github.repository_owner }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Build Dragonfly\n        run: |\n          cmake -B ${GITHUB_WORKSPACE}/build \\\n            -DCMAKE_BUILD_TYPE=Release \\\n            -DWITH_AWS=OFF \\\n            -DWITH_GCP=OFF \\\n            -DWITH_UNWIND=OFF \\\n            -DWITH_GPERF=OFF \\\n            -GNinja \\\n            -L\n          cd ${GITHUB_WORKSPACE}/build && ninja dragonfly\n\n      - name: Install Node.js\n        run: |\n          wget -q https://unofficial-builds.nodejs.org/download/release/v${NODE_VERSION}/node-v${NODE_VERSION}-linux-x64-glibc-217.tar.xz\n          tar -xf node-v${NODE_VERSION}-linux-x64-glibc-217.tar.xz\n          cp -r node-v${NODE_VERSION}-linux-x64-glibc-217/* /usr/local/\n          apt-get update && apt-get install -y jq redis-tools\n          npm install -g yarn\n          node --version\n          yarn --version\n\n      - name: Start Dragonfly\n        run: |\n          ${GITHUB_WORKSPACE}/build/dragonfly \\\n            --alsologtostderr \\\n            --cluster_mode=emulated \\\n            --lock_on_hashtags \\\n            --dbfilename= \\\n            --port 6379 &\n          timeout 15s bash -c 'until redis-cli -p 6379 PING 2>/dev/null | grep -q PONG; do sleep 0.1; done'\n\n      - name: Build BullMQ\n        run: |\n          cd ${GITHUB_WORKSPACE}\n          git clone https://github.com/dragonflydb/bullmq\n          cd bullmq\n          yarn install\n          yarn build\n\n      - name: Run BullMQ tests\n        run: |\n          cd ${GITHUB_WORKSPACE}/bullmq\n          SKIP_PATTERN=$(grep -v '^#' ${GITHUB_WORKSPACE}/.github/bullmq-skipped-tests.txt | grep -v '^[[:space:]]*$' | paste -sd '|' || true)\n          if [ -n \"${SKIP_PATTERN}\" ]; then\n            BULLMQ_TEST_PREFIX={b} yarn test --grep \"${SKIP_PATTERN}\" --invert\n          else\n            BULLMQ_TEST_PREFIX={b} yarn test\n          fi\n\n      - name: Upload logs on failure\n        if: failure()\n        uses: actions/upload-artifact@v6\n        with:\n          name: unit_logs\n          path: /tmp/dragonfly.*\n\n      - name: Send notification on failure\n        if: failure() && github.ref == 'refs/heads/main'\n        run: |\n          job_link=\"${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}\"\n          message=\"BullMQ tests failed.\\\\n Commit: ${{github.sha}}\\\\n Job Link: ${job_link}\\\\n\"\n\n          curl -s \\\n            -X POST \\\n            -H 'Content-Type: application/json' \\\n            '${{ secrets.GSPACES_BOT_DF_BUILD }}' \\\n            -d '{\"text\": \"'\"${message}\"'\"}'\n"
  },
  {
    "path": ".github/workflows/ci.yml",
    "content": "name: ci-tests\n\non:\n  # push:\n  # branches: [ main ]\n  pull_request:\n    branches: [main]\n  workflow_dispatch:\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\njobs:\n  pre-commit:\n    if: github.event_name == 'pull_request'\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          fetch-depth: 0\n      - uses: actions/setup-python@v6\n        with:\n          python-version: '3.12'\n          cache: 'pip'\n      - uses: actions/cache@v4\n        with:\n          path: ~/.cache/pre-commit\n          key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}\n      - uses: pre-commit/action@v3.0.1\n        with:\n          extra_args: >-\n            --show-diff-on-failure --color=always\n            --from-ref ${{ github.event.pull_request.base.sha }}\n            --to-ref ${{ github.event.pull_request.head.sha }}\n  build:\n    strategy:\n      matrix:\n        # Test of these containers\n        container: [\"ubuntu-dev:24\", \"alpine-dev:latest\"]\n        build-type: [Debug, Release]\n        compiler: [{ cxx: g++, c: gcc }]\n        # -no-pie to disable address randomization so we could symbolize stacktraces\n        cxx_flags: [\"-Werror -no-pie\"]\n        sanitizers: [\"NoSanitizers\"]\n        include:\n          - container: \"alpine-dev:latest\"\n            build-type: Debug\n            compiler: { cxx: clang++, c: clang }\n            cxx_flags: \"\"\n            sanitizers: \"NoSanitizers\"\n          - container: \"ubuntu-dev:24\"\n            build-type: Debug\n            compiler: { cxx: clang++, c: clang }\n            # https://maskray.me/blog/2023-08-25-clang-wunused-command-line-argument (search for compiler-rt)\n            cxx_flags: \"-Wno-error=unused-command-line-argument\"\n            sanitizers: \"Sanitizers\"\n\n    runs-on: ubuntu-latest\n    container:\n      image: ghcr.io/romange/${{ matrix.container }}\n      # Seems that docker by default prohibits running iouring syscalls\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n      volumes:\n        - /:/hostroot\n        - /mnt:/mnt\n      credentials:\n        username: ${{ github.repository_owner }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Prepare Environment\n        run: |\n          uname -a\n          cmake --version\n          mkdir -p ${GITHUB_WORKSPACE}/build\n          mount\n\n          echo \"===================Before freeing up space ============================================\"\n          df -h\n          rm -rf /hostroot/usr/share/dotnet\n          rm -rf /hostroot/usr/local/share/boost\n          rm -rf /hostroot/usr/local/lib/android\n          rm -rf /hostroot/opt/ghc\n          echo \"===================After freeing up space ============================================\"\n          df -h\n          touch /mnt/foo\n          ls -la /mnt/foo\n\n      - name: System diagnostics\n        run: |\n          echo \"ulimit is\"\n          ulimit -s\n          echo \"-----------------------------\"\n          echo \"disk space is:\"\n          df -h\n          echo \"-----------------------------\"\n\n      - name: Build Dragonfly\n        uses: ./.github/actions/builder\n        with:\n          build-type: ${{matrix.build-type}}\n          c-compiler: ${{matrix.compiler.c}}\n          cxx-compiler: ${{matrix.compiler.cxx}}\n          cxx-flags: ${{matrix.cxx_flags}}\n          sanitizers: ${{matrix.sanitizers}}\n          with-aws: 'OFF'\n\n      - name: PostFail\n        if: failure()\n        run: |\n          echo \"disk space is:\"\n          df -h\n\n      - name: C++ Unit Tests - IoUring\n        run: |\n          cd ${GITHUB_WORKSPACE}/build\n          echo Run ctest -V -L DFLY\n\n          GLOG_alsologtostderr=1 GLOG_vmodule=rdb_load=1,rdb_save=1,snapshot=1,op_manager=1,op_manager_test=1 \\\n          FLAGS_fiber_safety_margin=4096 timeout 20m ctest -V -L DFLY -E allocation_tracker_test\n\n          # Run allocation tracker test separately without alsologtostderr because it generates a TON of logs.\n          FLAGS_fiber_safety_margin=4096 timeout 5m ./allocation_tracker_test\n\n          timeout 5m ./dragonfly_test\n          timeout 5m ./json_family_test --jsonpathv2=false\n          timeout 5m ./tiered_storage_test --vmodule=db_slice=2 --logtostderr\n          timeout 5m ./search_test --use_numeric_range_tree=false\n          timeout 5m ./search_family_test --use_numeric_range_tree=false\n\n\n      - name: C++ Unit Tests - Epoll\n        run: |\n          cd ${GITHUB_WORKSPACE}/build\n\n          # Create a rule that automatically prints stacktrace upon segfault\n          cat > ./init.gdb <<EOF\n          catch signal SIGSEGV\n          command\n          bt\n          end\n          EOF\n\n          gdb -ix ./init.gdb --batch -ex r --args ./dragonfly_test --force_epoll\n          GLOG_alsologtostderr=1 FLAGS_fiber_safety_margin=4096 FLAGS_force_epoll=true GLOG_vmodule=rdb_load=1,rdb_save=1,snapshot=1 \\\n          timeout 20m ctest -V -L DFLY -E allocation_tracker_test\n\n          FLAGS_fiber_safety_margin=4096 FLAGS_force_epoll=true timeout 5m ./allocation_tracker_test\n\n      - name: C++ Unit Tests - IoUring with cluster mode\n        run: |\n          cd ${GITHUB_WORKSPACE}/build\n          FLAGS_fiber_safety_margin=4096 FLAGS_cluster_mode=emulated timeout 20m ctest -V -L DFLY\n\n      - name: C++ Unit Tests - IoUring with cluster mode and FLAGS_lock_on_hashtags\n        run: |\n          cd ${GITHUB_WORKSPACE}/build\n          FLAGS_fiber_safety_margin=4096 FLAGS_cluster_mode=emulated FLAGS_lock_on_hashtags=true timeout 20m ctest -V -L DFLY\n\n      - name: Upload unit logs on failure\n        if: failure()\n        uses: actions/upload-artifact@v6\n        with:\n          name: unit_logs\n          path: /tmp/*INFO*\n\n      - name: Run regression tests\n        if: matrix.container == 'ubuntu-dev:24' && matrix.sanitizers == 'NoSanitizers'\n        uses: ./.github/actions/regression-tests\n        with:\n          dfly-executable: dragonfly\n          run-only-on-ubuntu-latest: true\n          build-folder-name: build\n          # Non-release build will not run tests marked as opt_only\n          # \"not empty\" string is needed for release build because pytest command can not get empty string for filter\n          filter: ${{ matrix.build-type == 'Release' && 'not debug_only' || 'not opt_only' }}\n\n      - name: Upload regression logs on failure\n        if: failure()\n        uses: actions/upload-artifact@v6\n        with:\n          name: regression_logs\n          path: /tmp/failed/*\n\n  lint-test-chart:\n    runs-on: ubuntu-latest\n    needs: [build]\n    steps:\n      - uses: actions/checkout@v6\n      - uses: ./.github/actions/lint-test-chart\n\n  large-tests-arm:\n    runs-on: CI-LARGE-ARM\n\n    permissions:\n      id-token: write\n      contents: read\n\n    container:\n      image: ghcr.io/romange/ubuntu-dev:24\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n      volumes:\n        - /var/crash:/var/crash\n        - /:/hostroot\n        - /mnt:/mnt\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Print environment info\n        run: |\n          cat /proc/cpuinfo\n          ulimit -a\n          env\n          lsblk -l\n\n      - name: Build Dragonfly\n        uses: ./.github/actions/builder\n        with:\n          build-type: Release\n          targets: 'dragonfly'\n\n      - name: Authenticate to AWS\n        # Runs if it's NOT a PR, OR if the PR originates from the same repository (not a fork)\n        if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository\n        uses: aws-actions/configure-aws-credentials@v5\n        with:\n          role-to-assume: ${{ secrets.AWS_CI_S3_ROLE_ARN }}\n          aws-region: us-east-1\n\n      - name: Run large tests on ARM\n        uses: ./.github/actions/regression-tests\n        with:\n          dfly-executable: dragonfly\n          gspace-secret: ${{ secrets.GSPACES_BOT_DF_BUILD }}\n          build-folder-name: build\n          run-only-on-ubuntu-latest: true\n          filter: large\n          s3-bucket: ${{ secrets.S3_REGTEST_BUCKET }}\n\n      - name: Upload logs on failure\n        if: failure()\n        uses: actions/upload-artifact@v6\n        with:\n          name: large-tests-arm-logs\n          path: /tmp/failed/*\n"
  },
  {
    "path": ".github/workflows/copilot-setup-steps.yml",
    "content": "# Copilot Build Environment Setup Steps\n# This file contains steps to configure the Dragonfly build environment\n# with AWS, GCP, GPERF, and UNWIND disabled for faster development builds\n\nname: Copilot setup steps\n\n# Automatically run the setup steps when they are changed to allow for easy validation, and\n# allow manual testing through the repository's \"Actions\" tab\non:\n  workflow_dispatch:\n  push:\n    paths:\n      - .github/workflows/copilot-setup-steps.yml\n  pull_request:\n    paths:\n      - .github/workflows/copilot-setup-steps.yml\n\njobs:\n  # The job MUST be called `copilot-setup-steps` or it will not be picked up by Copilot.\n  copilot-setup-steps:\n    runs-on: ubuntu-latest\n    permissions:\n      contents: read\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Install required system dependencies\n        run: |\n          sudo apt-get update\n          sudo apt-get install -y libboost-context-dev\n\n      - name: Configure CMake build (Debug, no AWS/GCP)\n        run: ./helio/blaze.sh -DWITH_AWS=OFF -DWITH_GCP=OFF -DWITH_GPERF=OFF\n\n      - name: Install pre-commit\n        run: pip3 install pre-commit\n\n# Notes:\n# - The build directory will be created at build-dbg/\n# - Disabling AWS/GCP significantly speeds up compilation\n# - WITH_GPERF=OFF disables Google Performance Tools\n# - Use ninja for faster parallel builds\n"
  },
  {
    "path": ".github/workflows/cov.yml",
    "content": "name: Daily Coverage\n\non:\n    schedule:\n      - cron: '0 6 * * *' # run at 6 AM UTC\n    workflow_dispatch:\n\njobs:\n  build:\n    if: github.repository == 'dragonflydb/dragonfly'\n    # The CMake configure and build commands are platform agnostic and should work equally\n    # well on Windows or Mac.  You can convert this to a matrix build if you need\n    # cross-platform coverage.\n    # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        include:\n          - container: \"ubuntu-dev:24\"\n            build-type: Debug\n            compiler: {cxx: g++, c: gcc}\n            cxx_flags: \"-fprofile-arcs -ftest-coverage\"\n    timeout-minutes: 300\n    container:\n      image: ghcr.io/romange/${{ matrix.container }}\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n      volumes:\n        - /:/hostroot\n        - /mnt:/mnt\n      credentials:\n        username: ${{ github.repository_owner }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n      - name: Delete Space\n        run: |\n          df -h\n          rm -rf /hostroot/usr/share/dotnet\n          rm -rf /hostroot/usr/local/share/boost\n          rm -rf /hostroot/usr/local/lib/android\n          rm -rf /hostroot/opt/ghc\n          echo \"***************After Deletion***************************\"\n          df -h\n      - name: Install dependencies\n        run: |\n          uname -a\n          cmake --version\n          mkdir -p ${{github.workspace}}/build\n          apt update && apt install -y lcov pip\n      - name: Cache build deps\n        id: cache-deps\n        uses: actions/cache@v5\n        with:\n          path: |\n            ~/.ccache\n            ${{github.workspace}}/build/_deps\n          key: ${{ runner.os }}-deps-${{ github.base_ref }}-${{ github.sha }}\n          restore-keys: |\n            ${{ runner.os }}-deps-${{ github.base_ref }}-\n\n      - name: Configure CMake\n        run: |\n          pip install -r tests/dragonfly/requirements.txt\n          cmake -B build \\\n            -DCMAKE_BUILD_TYPE=${{matrix.build-type}} \\\n            -GNinja \\\n            -DCMAKE_C_COMPILER=\"${{matrix.compiler.c}}\" \\\n            -DCMAKE_CXX_COMPILER=\"${{matrix.compiler.cxx}}\" \\\n            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \\\n            -DCMAKE_CXX_FLAGS=\"${{matrix.cxx_flags}}\" \\\n            -L\n          pwd\n          cd build && pwd\n      - name: Build\n        run: |\n          cd $GITHUB_WORKSPACE/build\n          echo \"-----------------------------\"\n          ninja src/all\n\n      - name: Run C++ Unit Tests\n        run: |\n          cd $GITHUB_WORKSPACE/build\n          ctest -V -L DFLY\n\n      - name: Run Python Integration Tests\n        run: |\n          cd $GITHUB_WORKSPACE/build\n          export DRAGONFLY_PATH=`pwd`/dragonfly\n          pytest ../tests/dragonfly/ --durations=10 --timeout=300 --color=yes --log-cli-level=INFO\n\n      - name: Generate Coverage Report\n        run: |\n          cd $GITHUB_WORKSPACE/build\n          lcov -c -d . -o main_coverage.info\n          lcov --remove main_coverage.info -o main_coverage.info '/usr/*' '*/_deps/*' '*/third_party/*'\n          genhtml main_coverage.info --ignore-errors source --output-directory covout -p $GITHUB_WORKSPACE\n          ls ./\n          echo ls covout\n          ls covout/\n      - name: Upload coverage to Codecov\n        uses: codecov/codecov-action@v4\n        with:\n          files: build/main_coverage.info\n          fail_ci_if_error: true\n          token: ${{ secrets.CODECOV_TOKEN }}\n      - name: Upload coverage\n        uses: actions/upload-artifact@v6\n        with:\n          name: coverage-report\n          path: build/covout/\n          if-no-files-found: error\n"
  },
  {
    "path": ".github/workflows/daily-builds.yml",
    "content": "name: daily-builds\n\non:\n  schedule:\n    - cron: '0 6 * * *' # run at 6 AM UTC\n  workflow_dispatch:\n\njobs:\n  build:\n    if: github.repository == 'dragonflydb/dragonfly'\n    # The CMake configure and build commands are platform agnostic and should work equally\n    # well on Windows or Mac.  You can convert this to a matrix build if you need\n    # cross-platform coverage.\n    # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix\n    runs-on: ubuntu-latest\n    name: Build ${{ matrix.name }}\n    strategy:\n      matrix:\n        include:\n          # Build with these flags\n          - name: generic\n            container: alpine-dev\n            flags: \"-DMARCH_OPT=-march=x86-64\"\n          - name: fedora\n            container: fedora:30-gcc14\n\n    timeout-minutes: 45\n\n    container:\n      image: ghcr.io/romange/${{ matrix.container }}\n      options: --security-opt seccomp=unconfined\n      credentials:\n        username: ${{ github.repository_owner }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Show compiler version\n        run: |\n          echo \"=== Compiler Version ===\"\n          ${CXX:-g++} --version\n          echo \"=== CMake Version ===\"\n          cmake --version\n          echo \"=== glibc Version ===\"\n          ldd --version | head -1 || true\n          mkdir -p $GITHUB_WORKSPACE/build\n\n      - name: Configure & Build\n        run: |\n          cd $GITHUB_WORKSPACE/build\n          cmake .. -DCMAKE_BUILD_TYPE=Debug -GNinja ${{ matrix.flags }}\n          ninja src/all\n      - name: Test\n        run: |\n            cd $GITHUB_WORKSPACE/build\n            ctest -V -L DFLY\n\n      - name: Send notification on failure\n        if: failure() && github.ref == 'refs/heads/main'\n        run: |\n          job_link=\"${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}\"\n          message=\"Daily build (${{ matrix.name }}) failed.\\\\n Commit: ${{github.sha}}\\\\n Job Link: ${job_link}\\\\n\"\n\n          curl -s \\\n            -X POST \\\n            -H 'Content-Type: application/json' \\\n            '${{ secrets.GSPACES_BOT_DF_BUILD }}' \\\n            -d '{\"text\": \"'\"${message}\"'\"}'\n\n  build-macos:\n    if: github.repository == 'dragonflydb/dragonfly'\n    runs-on: macos-15\n    timeout-minutes: 45\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Install dependencies\n        run: |\n\n          # Remove Python3 symlinks in /usr/local/bin as workaround to brew update issues\n          # https://github.com/actions/setup-python/issues/577\n          rm /usr/local/bin/2to3* || :\n          rm /usr/local/bin/idle3* || :\n          rm /usr/local/bin/pydoc* || :\n          rm /usr/local/bin/python3* || :\n          brew update && brew install ninja boost automake zstd bison autoconf libtool\n\n          mkdir -p $GITHUB_WORKSPACE/build\n\n      - name: Configure & Build\n        run: |\n          cd $GITHUB_WORKSPACE/build\n\n          export PATH=/opt/homebrew/bin:$PATH\n          export PATH=/opt/homebrew/opt/bison/bin/:$PATH\n\n          which bison\n          bison --version\n\n          # Check system clang version\n          clang --version\n          clang++ --version\n\n          # Verify current macOS SDK\n          xcrun --show-sdk-path\n\n          autoconf --help\n          autoreconf --help\n\n          echo \"*************************** START BUILDING **************************************\"\n          # Configure for using current macOS SDK\n          export SDKROOT=$(xcrun --sdk macosx --show-sdk-path)\n          echo \"Using SDK: $SDKROOT\"\n\n          # Use system clang/clang++ with macOS SDK\n          cmake .. -DCMAKE_BUILD_TYPE=Debug -GNinja \\\n            -DCMAKE_C_COMPILER=clang \\\n            -DCMAKE_CXX_COMPILER=clang++ \\\n            -DCMAKE_OSX_SYSROOT=\"$SDKROOT\" \\\n            -DCMAKE_OSX_DEPLOYMENT_TARGET=15.0\n\n          ninja src/all\n\n      - name: Test\n        run: |\n            cd $GITHUB_WORKSPACE/build\n            ctest -V -L DFLY\n\n      - name: Send notification on failure\n        if: failure() && github.ref == 'refs/heads/main'\n        run: |\n          job_link=\"${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}\"\n          message=\"Daily build (macOS) failed.\\\\n Commit: ${{github.sha}}\\\\n Job Link: ${job_link}\\\\n\"\n\n          curl -s \\\n            -X POST \\\n            -H 'Content-Type: application/json' \\\n            '${{ secrets.GSPACES_BOT_DF_BUILD }}' \\\n            -d '{\"text\": \"'\"${message}\"'\"}'\n"
  },
  {
    "path": ".github/workflows/docker-dev-release.yml",
    "content": "name: Development Docker Build\n\non:\n  schedule:\n    - cron: '15 0 * * *'\n  workflow_dispatch:\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\nenv:\n  image: ghcr.io/dragonflydb/dragonfly-dev\n  GCS_IMAGE: us-central1-docker.pkg.dev/dragonflydb-public/dragonfly-registry/dragonfly-dev\n\njobs:\n  build_and_tag:\n    if: github.repository == 'dragonflydb/dragonfly'\n    name: Build and Push ${{matrix.flavor}} ${{ matrix.os.arch }} image\n    strategy:\n      matrix:\n        flavor: [alpine,ubuntu]\n        os:\n          - image: ubuntu-24.04\n            arch: amd64\n          - image: ubuntu-24.04-arm\n            arch: arm64\n\n    runs-on: ${{ matrix.os.image }}\n    permissions:\n      contents: read\n      packages: write\n      id-token: write\n    steps:\n      - name: checkout\n        uses: actions/checkout@v6\n        with:\n          fetch-depth: 1\n          submodules: true\n\n      - name: Set up Docker Buildx\n        uses: docker/setup-buildx-action@v3\n\n      - name: Login to Registries\n        uses: ./.github/actions/multi-registry-docker-login\n        with:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}\n\n      - name: Authenticate to Google Cloud\n        uses: google-github-actions/auth@v3\n        with:\n          credentials_json: ${{ secrets.GCP_SA_KEY }}\n\n      - name: Configure AWS Credentials\n        uses: aws-actions/configure-aws-credentials@v5\n        with:\n          role-to-assume: ${{ secrets.AWS_CI_S3_ROLE_ARN }}\n          aws-region: us-east-1\n\n      - name: Get Build Information\n        id: build_info\n        run: |\n          echo \"short_sha=$(git rev-parse --short HEAD)\" >> $GITHUB_OUTPUT\n\n      - name: Docker meta\n        id: metadata\n        uses: docker/metadata-action@v5\n        with:\n          images: |\n            ${{ env.image }}\n            ${{ env.GCS_IMAGE }}\n          tags: |\n            type=sha,enable=true,prefix=${{ matrix.flavor}}-,suffix=-${{ matrix.os.arch }},format=short\n          labels: |\n            org.opencontainers.image.vendor=DragonflyDB LTD\n            org.opencontainers.image.title=Dragonfly Development Image\n            org.opencontainers.image.description=The fastest in-memory store\n      - name: Build image\n        id: build\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          push: true\n          provenance: false  # Prevent pushing a docker manifest\n          tags: |\n            ${{ steps.metadata.outputs.tags }}\n          labels: ${{ steps.metadata.outputs.labels }}\n          file: tools/packaging/Dockerfile.${{ matrix.flavor }}-dev\n          cache-from: type=gha,scope=tagged${{ matrix.flavor }}\n          cache-to: type=gha,scope=tagged${{ matrix.flavor }},mode=max\n          load: true  # Load the build images into the local docker.\n      - name: Test Image\n        run: |\n          echo ${{ steps.build.outputs.digest }}\n          image_tags=(${{ steps.metadata.outputs.tags }})\n\n          # install redis-tools\n          sudo apt-get install redis-tools -y\n\n          for image_tag in \"${image_tags[@]}\"; do\n            echo \"Testing image: ${image_tag}\"\n            docker image inspect ${image_tag}\n            echo \"Testing ${{ matrix.flavor }} image\"\n\n            # docker run with port-forwarding\n            docker run -d -p 6379:6379 ${image_tag}\n            sleep 5\n            redis-cli -h localhost ping | grep -q \"PONG\" || exit 1\n            docker stop $(docker ps -q --filter ancestor=${image_tag})\n          done\n\n      - name: Extract and Upload Binaries\n        if: matrix.flavor == 'ubuntu'  # Only run once per flavor\n        run: |\n          # Get the image tag\n          image_tags=(${{ steps.metadata.outputs.tags }})\n          image_tag=${image_tags[0]}\n\n          # Extract version from the image\n          echo \"Extracting version from image...\"\n          VERSION=$(docker run --rm ${image_tag} dragonfly --version | sed -r \"s/\\x1B\\[([0-9]{1,3}(;[0-9]{1,2})?)?[mGK]//g\" | head -n1 | cut -d' ' -f2 | cut -d'-' -f1)\n          # Check if version starts with a release version (v*.*.*)\n          if [[ ! $VERSION =~ ^v[0-9]+\\.[0-9]+\\.[0-9]+ ]]; then\n            # Get the latest release version to use as prefix\n            LATEST_RELEASE=$(curl -s https://api.github.com/repos/dragonflydb/dragonfly/releases/latest | jq -r .tag_name)\n            VERSION=\"${LATEST_RELEASE}+${VERSION}\"\n          fi\n          echo \"Dragonfly version: $VERSION\"\n\n          echo \"Extracting binary from ${image_tag} for ${{ matrix.os.arch }}\"\n\n          # Create a temporary container and copy the binary\n          container_id=$(docker create ${image_tag})\n          docker cp ${container_id}:/usr/local/bin/dragonfly ./dragonfly\n          docker rm ${container_id}\n\n          # Create a tar archive\n          if [[ \"${{ matrix.os.arch }}\" == \"arm64\" ]]; then\n            arch_name=\"aarch64\"\n          else\n            arch_name=\"x86_64\"\n          fi\n          tar_name=\"dragonfly-${arch_name}-dbgsym.tar.gz\"\n          tar czf ${tar_name} dragonfly\n\n          # Upload to GCS\n          echo \"Uploading ${tar_name} to GCS\"\n          gcloud storage cp \"$tar_name\" \"gs://${{ secrets.STAGING_BINARY_BUCKET }}/dragonfly/$VERSION/$tar_name\"\n\n          # Upload to AWS\n          echo \"Uploading ${tar_name} to AWS\"\n          aws s3 cp \"$tar_name\" \"s3://${{ secrets.STAGING_BINARY_BUCKET }}/dragonfly/$VERSION/$tar_name\"\n\n          # Cleanup\n          rm -f dragonfly ${tar_name}\n\n    outputs:\n      # matrix jobs outputs override each other, but we use the same sha\n      # for all images, so we can use the same output name.\n      sha: ${{ steps.build_info.outputs.short_sha }}\n\n  merge_manifest:\n    if: github.repository == 'dragonflydb/dragonfly'\n    needs: [build_and_tag]\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        flavor: [alpine,ubuntu]\n    steps:\n      - name: checkout\n        uses: actions/checkout@v6\n\n      - name: Login to Registries\n        uses: ./.github/actions/multi-registry-docker-login\n        with:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}\n\n      - name: Merge and Push\n        run: |\n            # Function to create and push manifests for a given registry\n            create_and_push_manifests() {\n              local registry=$1\n              local flavor=$2\n              local sha=$3\n\n              # Create and push the manifest like dragonfly-dev:alpine-<sha>\n              local sha_tag=\"${registry}:${flavor}-${sha}\"\n              docker manifest create ${sha_tag} --amend ${sha_tag}-amd64 --amend ${sha_tag}-arm64\n              docker manifest push ${sha_tag}\n\n              # Create and push the manifest like dragonfly-dev:alpine\n              local flavor_tag=\"${registry}:${flavor}\"\n              docker manifest create ${flavor_tag} --amend ${sha_tag}-amd64 --amend ${sha_tag}-arm64\n              docker manifest push ${flavor_tag}\n            }\n\n            # GitHub Container Registry manifests\n            create_and_push_manifests \"${{ env.image }}\" \"${{ matrix.flavor }}\" \"${{ needs.build_and_tag.outputs.sha }}\"\n\n            # Google Artifact Registry manifests\n            create_and_push_manifests \"${{ env.GCS_IMAGE }}\" \"${{ matrix.flavor }}\" \"${{ needs.build_and_tag.outputs.sha }}\"\n"
  },
  {
    "path": ".github/workflows/docker-release2.yml",
    "content": "name: Docker Release-v2\n\non:\n  workflow_dispatch:\n    inputs:\n      TAG_NAME:\n        description: 'Tag name that the major tag will point to'\n        required: true\n      PRERELEASE:\n        description: 'Whether this is a prerelease'\n        type: boolean\n        required: true\n\n  release:\n    types: [published]\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\nenv:\n  TAG_NAME: ${{ github.event.inputs.TAG_NAME || github.event.release.tag_name }}\n  IS_PRERELEASE: ${{ github.event.release.prerelease || github.event.inputs.PRERELEASE }}\n  IMAGE: ghcr.io/dragonflydb/dragonfly\n  GCS_IMAGE: us-central1-docker.pkg.dev/dragonflydb-public/dragonfly-registry/dragonfly\n\njobs:\n  build_and_tag:\n    name: Build and Push ${{matrix.flavor}} ${{ matrix.os.arch }} image\n    strategy:\n      matrix:\n        flavor: [ubuntu]\n        os:\n          - image: ubuntu-24.04\n            arch: amd64\n          - image: ubuntu-24.04-arm\n            arch: arm64\n\n    runs-on: ${{ matrix.os.image }}\n    permissions:\n      contents: read\n      packages: write\n      id-token: write\n\n    steps:\n      - name: checkout\n        uses: actions/checkout@v6\n        with:\n          fetch-depth: 0\n          submodules: true\n      - name: Set up Docker Build\n        uses: docker/setup-buildx-action@v3\n\n      - name: Login to Registries\n        uses: ./.github/actions/multi-registry-docker-login\n        with:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}\n\n      - name: Fetch release asset\n        uses: dsaltares/fetch-gh-release-asset@1.1.2\n        with:\n          version: \"tags/${{ env.TAG_NAME }}\"\n          regex: true\n          file: \"dragonfly-.*\\\\.tar\\\\.gz\"\n          target: 'releases/'\n          token: ${{ secrets.GITHUB_TOKEN }}\n\n      - name: Extract artifacts\n        run: |\n          echo \"Event prerelease ${{ github.event.release.prerelease }}\"\n          echo \"Input prerelease ${{ github.event.inputs.PRERELEASE }}\"\n          ls -l\n          ls -l releases\n          for f in releases/*.tar.gz; do tar xvfz $f -C releases; done\n          rm releases/*.tar.gz\n\n      - name: Docker meta\n        id: metadata\n        uses: docker/metadata-action@v5\n        with:\n          images: |\n            ${{ env.IMAGE }}\n            ${{ env.GCS_IMAGE }}\n          flavor: |\n            latest=false\n            prefix=${{ matrix.flavor}}-\n            suffix=-${{ matrix.os.arch }}\n          tags: |\n            type=semver,pattern={{version}},enable=true,value=${{ env.TAG_NAME }}\n            type=semver,pattern={{raw}},enable=true,value=${{ env.TAG_NAME }}\n            type=ref,event=pr\n          labels: |\n            org.opencontainers.image.vendor=DragonflyDB LTD\n            org.opencontainers.image.title=Dragonfly Production Image\n            org.opencontainers.image.description=The fastest in-memory store\n            org.opencontainers.image.version=${{ env.TAG_NAME }}\n\n      - name: Build image\n        id: build\n        uses: docker/build-push-action@v6\n        with:\n          context: .\n          push: true\n          provenance: false  # Prevent pushing a docker manifest\n          tags: |\n            ${{ steps.metadata.outputs.tags }}\n          labels: ${{ steps.metadata.outputs.labels }}\n          file: tools/packaging/Dockerfile.${{ matrix.flavor }}-prod\n          cache-from: type=gha,scope=prod-${{ matrix.flavor }}\n          cache-to: type=gha,scope=prod-${{ matrix.flavor }},mode=max\n          load: true  # Load the build images into the local docker.\n\n      - name: Test Image\n        uses: ./.github/actions/test-docker\n        timeout-minutes: 1\n        with:\n          image_id: ${{ env.IMAGE }}@${{ steps.build.outputs.digest }}\n          name: ${{ matrix.flavor }}-${{ matrix.os.arch }}\n\n      - id: output-sha\n        run: |\n          echo \"sha_${{ matrix.os.arch }}=${{ steps.build.outputs.digest }}\" >> $GITHUB_OUTPUT\n    outputs:\n      sha_amd: ${{ steps.output-sha.outputs.sha_amd64 }}\n      sha_arm: ${{ steps.output-sha.outputs.sha_arm64 }}\n\n  merge_manifest:\n    needs: [build_and_tag]\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        flavor: [ubuntu]\n    steps:\n      - name: checkout\n        uses: actions/checkout@v6\n\n      - name: Login to Registries\n        uses: ./.github/actions/multi-registry-docker-login\n        with:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n          GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}\n\n      - name: Merge and Push\n        run: |\n            # Function to create and push manifests for a given registry\n            create_and_push_manifests() {\n              local registry=$1\n              local sha_amd=$2\n              local sha_arm=$3\n              local flavor=$4\n              local tag_name=$5\n              local is_prerelease=$6\n\n              # Function for semantic version comparison\n              # Returns true if current_version >= latest_version\n              semver_cmp() {\n                local current_version=$1\n                local latest_version=$2\n                local should_update=true\n\n                # Extract major.minor.patch components\n                IFS='.' read -ra CURRENT_PARTS <<< \"$current_version\"\n                IFS='.' read -ra LATEST_PARTS <<< \"$latest_version\"\n\n                # Pad arrays to same length for comparison\n                while [ ${#CURRENT_PARTS[@]} -lt 3 ]; do CURRENT_PARTS+=(0); done\n                while [ ${#LATEST_PARTS[@]} -lt 3 ]; do LATEST_PARTS+=(0); done\n\n                # Compare major.minor.patch numerically\n                if (( 10#${CURRENT_PARTS[0]} < 10#${LATEST_PARTS[0]} )); then\n                  should_update=false\n                elif (( 10#${CURRENT_PARTS[0]} == 10#${LATEST_PARTS[0]} )) && (( 10#${CURRENT_PARTS[1]} < 10#${LATEST_PARTS[1]} )); then\n                  should_update=false\n                elif (( 10#${CURRENT_PARTS[0]} == 10#${LATEST_PARTS[0]} )) && (( 10#${CURRENT_PARTS[1]} == 10#${LATEST_PARTS[1]} )) && (( 10#${CURRENT_PARTS[2]} < 10#${LATEST_PARTS[2]} )); then\n                  should_update=false\n                fi\n\n                # Log debug info to stderr instead of stdout\n                echo \"Version comparison: current=${CURRENT_PARTS[0]}.${CURRENT_PARTS[1]}.${CURRENT_PARTS[2]} vs latest=${LATEST_PARTS[0]}.${LATEST_PARTS[1]}.${LATEST_PARTS[2]}\" >&2\n\n                # Return only the result\n                echo $should_update\n              }\n\n              if [[ \"$is_prerelease\" == 'true' ]]; then\n                # Create and push the manifest like dragonfly:alpha-ubuntu\n                tag=\"${registry}:alpha-${flavor}\"\n                docker manifest create ${tag} --amend ${sha_amd} --amend ${sha_arm}\n                docker manifest push ${tag}\n              elif [[ \"$flavor\" == 'ubuntu' ]]; then\n                # Checking if this version should be tagged as latest\n                echo \"Checking if ${tag_name} should be tagged as latest...\"\n\n                # Remove 'v' prefix if present for semantic comparison\n                current_version=${tag_name#v}\n\n                # Get the current latest version by running the latest image\n                latest_version=\"\"\n                if docker pull ${registry}:latest &>/dev/null; then\n                  echo \"Found latest tag, checking its version...\"\n\n                  # First try to get version from image labels using docker inspect\n                  echo \"Method 1: Trying to get version from image labels...\"\n                  label_version=$(docker image inspect --format '{{ index .Config.Labels \"org.opencontainers.image.version\" }}' ${registry}:latest 2>/dev/null || echo \"\")\n\n                  if [[ -n \"$label_version\" ]]; then\n                    echo \"Found version from image labels: $label_version\"\n\n                    # Extract version from format like \"ubuntu-1.28.1-arm64\"\n                    if [[ $label_version == ubuntu-*-* ]]; then\n                      # Extract the middle part (version) from ubuntu-VERSION-arch\n                      latest_full_version=$(echo \"$label_version\" | cut -d'-' -f2)\n                    else\n                      # Use the label as is\n                      latest_full_version=$label_version\n                    fi\n\n                    echo \"Extracted version: $latest_full_version\"\n                  else\n                    # Fallback to running the container if label inspect failed\n                    echo \"Method 2: Falling back to container execution...\"\n                    latest_full_version=$(docker run --rm --entrypoint /bin/sh ${registry}:latest -c \"dragonfly --version | cut -d' ' -f2 | head -n 1\")\n                  fi\n\n                  echo \"Latest full version: ${latest_full_version}\"\n\n                  # Extract only the semantic version part (before any dash)\n                  latest_version=$(echo \"${latest_full_version}\" | cut -d'-' -f1)\n                  # Remove 'v' prefix if present\n                  latest_version=${latest_version#v}\n                  echo \"Current latest version: ${latest_version}\"\n                else\n                  echo \"No latest tag found yet or couldn't pull it\"\n                fi\n\n                # Compare versions only if we have a latest version\n                should_update_latest=true\n                if [[ -n \"$latest_version\" ]]; then\n                  # Call our semver comparison function\n                  should_update_latest=$(semver_cmp \"$current_version\" \"$latest_version\")\n                fi\n\n                if [[ \"$should_update_latest\" == true ]]; then\n                  echo \"Version ${tag_name} is newer than or equal to current latest, updating latest tag\"\n                  tag=\"${registry}:latest\"\n                  # Create and push the manifest like dragonfly:latest\n                  docker manifest create ${tag} --amend ${sha_amd} --amend ${sha_arm}\n                  docker manifest push ${tag}\n                else\n                  echo \"Version ${tag_name} is older than current latest (${latest_version}), NOT updating latest tag\"\n                fi\n              fi\n\n              # Create and push the manifest like dragonfly:v1.26.4\n              tag=\"${registry}:${tag_name}\"\n              docker manifest create ${tag} --amend ${sha_amd} --amend ${sha_arm}\n              docker manifest push ${tag}\n            }\n\n            # GitHub Container Registry manifests\n            ghcr_sha_amd=${{ env.IMAGE }}@${{ needs.build_and_tag.outputs.sha_amd }}\n            ghcr_sha_arm=${{ env.IMAGE }}@${{ needs.build_and_tag.outputs.sha_arm }}\n            create_and_push_manifests \"${{ env.IMAGE }}\" \"$ghcr_sha_amd\" \"$ghcr_sha_arm\" \"${{ matrix.flavor }}\" \"${{ env.TAG_NAME }}\" \"${{ env.IS_PRERELEASE }}\"\n\n            # Google Artifact Registry manifests\n            gar_sha_amd=${{ env.GCS_IMAGE }}@${{ needs.build_and_tag.outputs.sha_amd }}\n            gar_sha_arm=${{ env.GCS_IMAGE }}@${{ needs.build_and_tag.outputs.sha_arm }}\n            create_and_push_manifests \"${{ env.GCS_IMAGE }}\" \"$gar_sha_amd\" \"$gar_sha_arm\" \"${{ matrix.flavor }}\" \"${{ env.TAG_NAME }}\" \"${{ env.IS_PRERELEASE }}\"\n\n  release_helm_and_notify:\n    needs: [merge_manifest]\n    runs-on: ubuntu-latest\n    permissions:\n      contents: write\n      packages: write\n      pull-requests: write\n    steps:\n    - name: print_env\n      run: env\n\n    - name: checkout\n      uses: actions/checkout@v6\n      with:\n        token: ${{ secrets.DRAGONFLY_TOKEN }}  # PAT to push to main\n        fetch-depth: 0\n\n    - name: Install helm\n      uses: azure/setup-helm@v4\n\n    - name: Setup Go\n      uses: actions/setup-go@v6\n\n    - name: Configure Git\n      if: env.IS_PRERELEASE != 'true'\n      run: |\n        git config user.name \"$GITHUB_ACTOR\"\n        git config user.email \"$GITHUB_ACTOR@users.noreply.github.com\"\n\n    - name: Update helm chart\n      if: env.IS_PRERELEASE != 'true'\n      run: |\n        git checkout -b helm-chart-update/${{ env.TAG_NAME }} origin/main\n        sed -Ei \\\n            -e 's/^(version\\:) .*/\\1 '${{ env.TAG_NAME }}'/g' \\\n            -e 's/^(appVersion\\:) .*/\\1 \"'${{ env.TAG_NAME }}'\"/g' \\\n            contrib/charts/dragonfly/Chart.yaml\n\n        go test ./contrib/charts/dragonfly/... -update\n\n        git commit \\\n          -m 'chore(helm-chart): update to ${{ env.TAG_NAME }}' \\\n          contrib/charts/dragonfly/Chart.yaml \\\n          contrib/charts/dragonfly/ci || true\n\n    - name: Push Helm chart as OCI to Github\n      if: env.IS_PRERELEASE != 'true'\n      run: |\n        echo \"${{ secrets.GITHUB_TOKEN }}\" | \\\n          helm registry login -u ${{ github.actor }} --password-stdin ghcr.io\n\n        helm package contrib/charts/dragonfly\n\n        helm push dragonfly-${{ env.TAG_NAME }}.tgz oci://ghcr.io/${{ github.repository }}/helm\n\n    - name: Discord notification\n      env:\n        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}\n      uses: Ilshidur/action-discord@d2594079a10f1d6739ee50a2471f0ca57418b554\n      with:\n        args: 'DragonflyDB version [${{ env.TAG_NAME }}](https://github.com/dragonflydb/dragonfly/releases/tag/${{ env.TAG_NAME }}) has been released 🎉'\n\n    - name: Re-build Docs\n      if: env.IS_PRERELEASE != 'true'\n      run: |\n        curl -s -X POST '${{ secrets.VERCEL_DOCS_WEBHOOK }}'\n\n    - name: Create Helm Chart PR\n      if: env.IS_PRERELEASE != 'true'\n      env:\n        GH_TOKEN: ${{ secrets.DRAGONFLY_TOKEN }}\n      run: |\n        git push origin helm-chart-update/${{ env.TAG_NAME }}\n        gh pr create \\\n          --base main \\\n          --head helm-chart-update/${{ env.TAG_NAME }} \\\n          --title 'chore(helm-chart): update to ${{ env.TAG_NAME }}' \\\n          --body 'Automated Helm chart version bump to ${{ env.TAG_NAME }}.' \\\n          --reviewer vyavdoshenko\n"
  },
  {
    "path": ".github/workflows/epoll-regression-tests.yml",
    "content": "name: Epoll Regression Tests\n\non:\n  schedule:\n    - cron: \"0 0/3 * * *\"\n  workflow_dispatch:\n\njobs:\n  build:\n    if: github.repository == 'dragonflydb/dragonfly'\n    strategy:\n      matrix:\n        # Test of these containers\n        container: [\"ubuntu-dev:24\"]\n        proactor: [Epoll]\n        build-type: [Debug]\n        runner: [ubuntu-latest, [self-hosted, linux, ARM64]]\n\n    runs-on: ${{ matrix.runner }}\n\n    permissions:\n      id-token: write\n      contents: read\n\n    container:\n      image: ghcr.io/romange/${{ matrix.container }}\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n      volumes:\n        - /var/crash:/var/crash\n        - /:/hostroot\n        - /mnt:/mnt\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Print environment info\n        run: |\n          cat /proc/cpuinfo\n          ulimit -a\n          env\n\n      - name: Build Dragonfly\n        uses: ./.github/actions/builder\n        with:\n          build-type: ${{matrix.build-type}}\n          targets: 'dragonfly'\n\n      - name: Authenticate to AWS\n        uses: aws-actions/configure-aws-credentials@v5\n        with:\n          role-to-assume: ${{ secrets.AWS_CI_S3_ROLE_ARN }}\n          aws-region: us-east-1\n\n      - name: Run regression tests action\n        uses: ./.github/actions/regression-tests\n        with:\n          dfly-executable: dragonfly\n          gspace-secret: ${{ secrets.GSPACES_BOT_DF_BUILD }}\n          build-folder-name: build\n          filter: ${{ matrix.build-type == 'Release' && 'not empty' || 'not opt_only' }}\n          s3-bucket: ${{ secrets.S3_REGTEST_BUCKET }}\n          # Chain ternary oprator of the form (which can be nested)\n          # (expression == condition && <true expression> || <false expression>)\n          epoll: ${{ matrix.proactor == 'Epoll' && 'epoll' || 'iouring' }}\n\n      - name: Upload logs on failure\n        if: failure()\n        uses: actions/upload-artifact@v6\n        with:\n          name: logs\n          path: /tmp/failed/*\n\n  lint-test-chart:\n    if: github.repository == 'dragonflydb/dragonfly'\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6\n      - uses: ./.github/actions/lint-test-chart\n"
  },
  {
    "path": ".github/workflows/fuzz-long.yml",
    "content": "name: AFL++ Long Fuzzing Campaign\r\n\r\non:\r\n  schedule:\r\n    # Run nightly at 2 AM UTC\r\n    - cron: '0 2 * * *'\r\n  workflow_dispatch:\r\n    inputs:\r\n      resp_duration:\r\n        description: 'RESP fuzzing duration in minutes'\r\n        required: false\r\n        default: '60'\r\n        type: string\r\n      memcache_duration:\r\n        description: 'Memcache fuzzing duration in minutes'\r\n        required: false\r\n        default: '30'\r\n        type: string\r\n\r\nconcurrency:\r\n  group: ${{ github.workflow }}\r\n  cancel-in-progress: true\r\n\r\njobs:\r\n  fuzz-long:\r\n    if: github.repository == 'dragonflydb/dragonfly'\r\n    runs-on: CI-LARGE-86\r\n    timeout-minutes: 120\r\n\r\n    strategy:\r\n      fail-fast: false\r\n      matrix:\r\n        include:\r\n          - target: resp\r\n            duration: '60'\r\n          - target: memcache\r\n            duration: '30'\r\n\r\n    container:\r\n      image: ghcr.io/romange/ubuntu-dev:24-afl\r\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\r\n      credentials:\r\n        username: ${{ github.repository_owner }}\r\n        password: ${{ secrets.GITHUB_TOKEN }}\r\n\r\n    steps:\r\n      - name: Checkout code\r\n        uses: actions/checkout@v6\r\n        with:\r\n          submodules: true\r\n\r\n      - name: Run AFL++ long fuzzing campaign (${{ matrix.target }})\r\n        uses: ./.github/actions/fuzzing\r\n        with:\r\n          mode: long\r\n          target: ${{ matrix.target }}\r\n          duration-minutes: ${{ matrix.target == 'resp' && (github.event.inputs.resp_duration || matrix.duration) || (github.event.inputs.memcache_duration || matrix.duration) }}\r\n          run-number: ${{ github.run_number }}\r\n\r\n      - name: Send notification on failure\r\n        if: failure() && github.ref == 'refs/heads/main'\r\n        run: |\r\n          job_link=\"${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}\"\r\n          message=\"AFL++ ${{ matrix.target }} fuzzing found crashes.\\\\n Commit: ${{github.sha}}\\\\n Job Link: ${job_link}\\\\n\"\r\n\r\n          curl -s \\\r\n            -X POST \\\r\n            -H 'Content-Type: application/json' \\\r\n            '${{ secrets.GSPACES_BOT_DF_BUILD }}' \\\r\n            -d '{\"text\": \"'\"${message}\"'\"}'\r\n"
  },
  {
    "path": ".github/workflows/fuzz-pr.yml",
    "content": "# Run AFL++ fuzzing on PRs that touch C++ code.\n#\n# For each PR, an LLM analyzes the diff and generates:\n#   1. Targeted seed files — initial inputs crafted to exercise the changed code paths.\n#      (A \"seed\" is a RESP-encoded sequence of Redis commands that the fuzzer starts from\n#       and mutates; see fuzz/seeds/resp/*.resp for the existing seed corpus.)\n#   2. Focus command list — commands the mutator should prefer (~70% of the time),\n#      so mutations concentrate on the affected code instead of spreading randomly.\n#\n# The fuzzer then runs for 15 minutes in \"smoke\" mode (stop on first crash).\n# When ANTHROPIC_API_KEY is unavailable (e.g. fork PRs), seed generation is skipped\n# and the fuzzer uses the existing seed corpus as-is.\n#\n# Additionally, if the PR touches memcache-related code (memcache_parser, mc_family,\n# fuzz/memcache_mutator.py, or fuzz/seeds/memcache/), a focused memcache fuzzing step\n# runs automatically after RESP fuzzing passes, reusing the already-built binary.\nname: AFL++ PR Fuzzing\n\non:\n  pull_request:\n    branches: [main]\n    paths:\n      - 'src/**/*.cc'\n      - 'src/**/*.h'\n      - 'helio/**/*.cc'\n      - 'helio/**/*.h'\n      - 'fuzz/**'\n      - '.github/workflows/fuzz-pr.yml'\n      - '.github/actions/fuzzing/**'\n  workflow_dispatch:\n    inputs:\n      duration:\n        description: 'Fuzzing duration in minutes'\n        required: false\n        default: '15'\n        type: string\n      memcache-duration:\n        description: 'Memcache fuzzing duration in minutes'\n        required: false\n        default: '10'\n        type: string\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\njobs:\n  fuzz-pr:\n    runs-on: CI-LARGE-86\n    timeout-minutes: 60\n\n    container:\n      image: ghcr.io/romange/ubuntu-dev:24-afl\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n      credentials:\n        username: ${{ github.repository_owner }}\n        password: ${{ secrets.GITHUB_TOKEN }}\n\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@v6\n        with:\n          submodules: true\n          fetch-depth: 0\n\n      - name: Generate PR diff\n        id: diff\n        run: |\n          if [ \"${{ github.event_name }}\" = \"pull_request\" ]; then\n            git config --global --add safe.directory \"$GITHUB_WORKSPACE\"\n            BASE=${{ github.event.pull_request.base.sha }}\n            HEAD_SHA=${{ github.event.pull_request.head.sha }}\n            MERGE_BASE=$(git merge-base \"$BASE\" \"$HEAD_SHA\")\n            git diff \"$MERGE_BASE\"..\"$HEAD_SHA\" > /tmp/pr_diff.txt\n          else\n            echo \"\" > /tmp/pr_diff.txt\n          fi\n\n          DIFF_LINES=$(wc -l < /tmp/pr_diff.txt)\n          echo \"diff_lines=${DIFF_LINES}\" >> \"$GITHUB_OUTPUT\"\n\n          echo \"::group::PR diff summary\"\n          echo \"C++ diff lines: ${DIFF_LINES}\"\n          if [ \"$DIFF_LINES\" -gt 0 ]; then\n            echo \"Changed files:\"\n            grep '^diff --git' /tmp/pr_diff.txt | sed 's|diff --git a/.* b/|  |' || true\n          else\n            echo \"No C++ file changes in this PR — seed generation will be skipped\"\n          fi\n          echo \"::endgroup::\"\n\n      - name: Generate targeted seeds\n        id: seeds\n        run: |\n          pip install 'anthropic>=0.39,<1' 2>/dev/null || pip install --break-system-packages 'anthropic>=0.39,<1' 2>/dev/null || true\n\n          SEEDS_DIR=\"${GITHUB_WORKSPACE}/fuzz/seeds/pr_targeted\"\n          mkdir -p \"$SEEDS_DIR\"\n\n          python3 fuzz/generate_targeted_seeds.py \\\n            --output-dir \"$SEEDS_DIR\" \\\n            < /tmp/pr_diff.txt\n\n          FOCUS=\"\"\n          if [ -f \"$SEEDS_DIR/focus_commands.json\" ]; then\n            FOCUS=$(cat \"$SEEDS_DIR/focus_commands.json\")\n          fi\n          echo \"focus_commands=${FOCUS}\" >> \"$GITHUB_OUTPUT\"\n          echo \"seeds_dir=${SEEDS_DIR}\" >> \"$GITHUB_OUTPUT\"\n\n          SEED_COUNT=$(ls \"$SEEDS_DIR\"/*.resp 2>/dev/null | wc -l || echo 0)\n\n          echo \"::group::Seed generation results\"\n          echo \"Seeds generated: ${SEED_COUNT}\"\n          echo \"Focus commands: ${FOCUS:-none}\"\n          if [ \"$SEED_COUNT\" -gt 0 ]; then\n            ls -la \"$SEEDS_DIR\"/*.resp\n          fi\n          echo \"::endgroup::\"\n\n          # Job summary\n          {\n            echo \"### Fuzzing Seed Generation\"\n            echo \"\"\n            if [ \"$SEED_COUNT\" -gt 0 ]; then\n              echo \"- **Seeds generated:** ${SEED_COUNT}\"\n              echo \"- **Focus commands:** \\`${FOCUS}\\`\"\n            elif [ \"$(wc -l < /tmp/pr_diff.txt)\" -eq 0 ]; then\n              echo \"- No C++ changes in PR — using default seed corpus\"\n            elif [ -z \"$ANTHROPIC_API_KEY\" ]; then\n              echo \"- No API key — using default seed corpus\"\n            else\n              echo \"- LLM did not produce usable seeds — using default seed corpus\"\n            fi\n          } >> \"$GITHUB_STEP_SUMMARY\"\n        env:\n          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}\n\n      - name: Run AFL++ PR fuzzing\n        uses: ./.github/actions/fuzzing\n        with:\n          mode: smoke\n          duration-minutes: ${{ github.event.inputs.duration || '15' }}\n          run-number: ${{ github.run_number }}\n          extra-seeds-dir: ${{ steps.seeds.outputs.seeds_dir }}\n          focus-commands: ${{ steps.seeds.outputs.focus_commands }}\n\n      # Reuses the binary built by the RESP step above (build: false).\n      # Only runs when RESP fuzzing passed (default success() condition) and memcache\n      # code was actually touched in this PR.\n      - name: Check if memcache-related files changed\n        id: memcache-check\n        run: |\n          if [ \"${{ github.event_name }}\" = \"pull_request\" ]; then\n            CHANGED=$(grep -E '^diff --git a/(src/(facade/memcache|server/mc_family)|fuzz/(memcache_mutator|seeds/memcache))' /tmp/pr_diff.txt || true)\n            if [ -n \"$CHANGED\" ]; then\n              echo \"run=true\" >> \"$GITHUB_OUTPUT\"\n              echo \"Memcache-related files changed — will run memcache fuzzing:\"\n              echo \"$CHANGED\" | sed 's|diff --git a/.* b/|  |'\n            else\n              echo \"run=false\" >> \"$GITHUB_OUTPUT\"\n              echo \"No memcache-related files changed — skipping memcache fuzzing\"\n            fi\n          else\n            echo \"run=true\" >> \"$GITHUB_OUTPUT\"\n            echo \"Manual trigger — running memcache fuzzing\"\n          fi\n\n      - name: Run AFL++ memcache fuzzing\n        if: success() && steps.memcache-check.outputs.run == 'true'\n        uses: ./.github/actions/fuzzing\n        with:\n          mode: smoke\n          target: memcache\n          build: 'false'\n          duration-minutes: ${{ github.event.inputs['memcache-duration'] || '10' }}\n          run-number: ${{ github.run_number }}\n"
  },
  {
    "path": ".github/workflows/generate-osrepo-site.yml",
    "content": "name: generate-site\non:\n  workflow_dispatch:\n  release:\n    types: [published]\n\njobs:\n  gen-site:\n    runs-on: ubuntu-latest\n    env:\n      SiteRoot: _site\n\n    name: Generate index and site assets\n    steps:\n      - name: Checkout Repository\n        uses: actions/checkout@v6\n\n      - name: Install packaging tools\n        # RPM tools are available on ubuntu\n        run: sudo apt install -y rpm gpg createrepo-c dpkg-dev reprepro\n\n      - name: Setup requirements\n        working-directory: tools/packaging/osrepos\n        run: pip install -r requirements.txt\n\n      - name: Download packages\n        working-directory: tools/packaging/osrepos\n        run: python scripts/fetch-releases.py $SiteRoot\n\n      - name: Import GPG key\n        id: gpg-import\n        uses: crazy-max/ghaction-import-gpg@v6\n        with:\n          gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }}\n\n      - name: Sign RPMs\n        shell: sh\n        working-directory: tools/packaging/osrepos\n        run: sh scripts/sign-rpms.sh ${{ steps.gpg-import.outputs.fingerprint }}\n\n      - name: Create YUM repository\n        # Creates metadata for YUM/DNF repository, the files were copied in the download step\n        shell: sh\n        working-directory: tools/packaging/osrepos\n        run: createrepo_c -v $SiteRoot/rpm\n\n      - name: Sign YUM repository\n        shell: sh\n        working-directory: tools/packaging/osrepos\n        run: gpg --armor --detach-sign $SiteRoot/rpm/repodata/repomd.xml\n\n      - name: Create APT repository\n        # The configuration for apt repo is in tools/packaging/osrepos/reprepro-config,\n        # which ensures the same GPG key used elsewhere in this action is used to sign\n        # the repository\n        shell: sh\n        working-directory: tools/packaging/osrepos\n        run: sh -x scripts/generate-apt-repo.sh\n\n      - name: Prepare assets\n        working-directory: tools/packaging/osrepos\n        run: |\n          cp -aRv dragonfly.repo pgp-key.public dragonfly.sources $SiteRoot/\n          rm -rf $SiteRoot/deb/conf\n\n      - name: Generate Directory Listings\n        working-directory: tools/packaging/osrepos\n        run: python scripts/generate-index.py $SiteRoot\n\n      - name: Authenticate\n        uses: 'google-github-actions/auth@v3'\n        with:\n          project_id: 'dragonflydb'\n          credentials_json: ${{ secrets.GCP_BUCKET_CREDENTIALS }}\n\n      - name: GCloud setup\n        uses: 'google-github-actions/setup-gcloud@v3'\n\n      - name: Deploy site\n        working-directory: tools/packaging/osrepos\n        run: |\n          gcloud storage rm ${{ secrets.GCP_PACKAGES_BUCKET }}/**\n          gcloud storage rsync $SiteRoot ${{ secrets.GCP_PACKAGES_BUCKET }} --recursive --delete-unmatched-destination-objects\n\n      - name: Notify on failure\n        if: failure()\n        run: |\n          job_link=\"${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}\"\n          message=\"Package repo generation failed.\\nCommit: ${{ github.sha }}\\nJob: ${job_link}\"\n          curl -sSf -X POST -H 'Content-Type: application/json' '${{ secrets.GSPACES_BOT_DF_BUILD }}' -d '{\"text\": \"'\"${message}\"'\"}'\n"
  },
  {
    "path": ".github/workflows/heavy-tests.yml",
    "content": "name: Heavy Tests\n\non:\n  schedule:\n    - cron: \"0 0/6 * * *\"\n  workflow_dispatch:\n\njobs:\n  build:\n    if: github.repository == 'dragonflydb/dragonfly'\n    strategy:\n      matrix:\n        # Test of these containers\n        container: [\"ubuntu-dev:24\"]\n        proactor: [Uring]\n        build-type: [Release]\n        runner: [CI-LARGE-86, CI-LARGE-ARM]\n\n    runs-on: ${{ matrix.runner }}\n\n    permissions:\n      id-token: write\n      contents: read\n\n    container:\n      image: ghcr.io/romange/${{ matrix.container }}\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n      volumes:\n        - /var/crash:/var/crash\n        - /:/hostroot\n        - /mnt:/mnt\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Print environment info\n        run: |\n          cat /proc/cpuinfo\n          ulimit -a\n          env\n          lsblk -l\n\n      - name: Build Dragonfly\n        uses: ./.github/actions/builder\n        with:\n          build-type: ${{matrix.build-type}}\n          targets: 'dragonfly'\n\n      - name: Authenticate to AWS\n        uses: aws-actions/configure-aws-credentials@v5\n        with:\n          role-to-assume: ${{ secrets.AWS_CI_S3_ROLE_ARN }}\n          aws-region: us-east-1\n\n      - name: Run heavy tests\n        uses: ./.github/actions/regression-tests\n        with:\n          dfly-executable: dragonfly\n          gspace-secret: ${{ secrets.GSPACES_BOT_DF_BUILD }}\n          build-folder-name: build\n          filter: large\n          s3-bucket: ${{ secrets.S3_REGTEST_BUCKET }}\n\n      - name: Upload logs on failure\n        if: failure()\n        uses: actions/upload-artifact@v6\n        with:\n          name: logs-${{ matrix.runner }}\n          path: /tmp/failed/*\n"
  },
  {
    "path": ".github/workflows/ioloop-v2-regtests.yml",
    "content": "name: RegTests IoLoopV2\n\n# Manually triggered only\non:\n  workflow_dispatch:\n\njobs:\n  build:\n    strategy:\n      matrix:\n        # Test of these containers\n        container: [\"ubuntu-dev:20-gcc14\"]\n        proactor: [Uring]\n        build-type: [Debug, Release]\n        runner: [ubuntu-latest, [self-hosted, linux, ARM64]]\n\n    runs-on: ${{ matrix.runner }}\n\n    permissions:\n      id-token: write\n      contents: read\n\n    container:\n      image: ghcr.io/romange/${{ matrix.container }}\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n      volumes:\n        - /var/crash:/var/crash\n        - /:/hostroot\n        - /mnt:/mnt\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Print environment info\n        run: |\n          cat /proc/cpuinfo\n          ulimit -a\n          env\n\n      - name: Build Dragonfly\n        uses: ./.github/actions/builder\n        with:\n          build-type: ${{matrix.build-type}}\n          targets: 'dragonfly'\n\n      - name: Authenticate to AWS\n        uses: aws-actions/configure-aws-credentials@v5\n        with:\n          role-to-assume: ${{ secrets.AWS_CI_S3_ROLE_ARN }}\n          aws-region: us-east-1\n\n      - name: Run regression tests action\n        uses: ./.github/actions/regression-tests\n        with:\n          dfly-executable: dragonfly\n          gspace-secret: ${{ secrets.GSPACES_BOT_DF_BUILD }}\n          build-folder-name: build\n          filter: ${{ matrix.build-type == 'Release' && 'not debug_only and not tls' || 'not opt_only and not tls' }}\n          s3-bucket: ${{ secrets.S3_REGTEST_BUCKET }}\n\n      - name: Upload logs on failure\n        if: failure()\n        uses: actions/upload-artifact@v6\n        with:\n          name: logs\n          path: /tmp/failed/*\n\n  lint-test-chart:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6\n      - uses: ./.github/actions/lint-test-chart\n"
  },
  {
    "path": ".github/workflows/mastodon-ruby-tests.yml",
    "content": "name: Mastodon ruby tests\non:\n  schedule:\n    - cron: '0 6 * * *' # run at 6 AM UTC\n  workflow_dispatch:\n\n\njobs:\n  build-and-test:\n    if: github.repository == 'dragonflydb/dragonfly'\n    runs-on: ubuntu-latest\n    name: Build and run tests\n\n    services:\n      postgres:\n        image: postgres:14-alpine\n        env:\n          POSTGRES_PASSWORD: postgres\n          POSTGRES_USER: postgres\n        options: >-\n          --health-cmd pg_isready\n          --health-interval 10ms\n          --health-timeout 3s\n          --health-retries 50\n        ports:\n          - 5432:5432\n\n      redis:\n        image: docker.dragonflydb.io/dragonflydb/dragonfly:latest\n        options: >-\n          --health-cmd \"redis-cli ping\"\n          --health-interval 10ms\n          --health-timeout 3s\n          --health-retries 50\n        ports:\n          - 6379:6379\n\n    env:\n      DB_HOST: localhost\n      DB_USER: postgres\n      DB_PASS: postgres\n      RAILS_ENV: test\n      ALLOW_NOPAM: true\n      PAM_ENABLED: true\n      PAM_DEFAULT_SERVICE: pam_test\n      PAM_CONTROLLED_SERVICE: pam_test_controlled\n      OIDC_ENABLED: true\n      OIDC_SCOPE: read\n      SAML_ENABLED: true\n      CAS_ENABLED: true\n      BUNDLE_WITH: 'pam_authentication test'\n      GITHUB_RSPEC: false\n\n    steps:\n      - name: Checkout mastodon\n        uses: actions/checkout@v6\n        with:\n          repository: mastodon/mastodon\n      - name: Install pre-requisites\n        run: |\n          sudo apt update\n          sudo apt install -y libicu-dev libidn11-dev libvips42 ffmpeg imagemagick libpam-dev\n      - name: Set up Ruby\n        uses: ruby/setup-ruby@v1\n        with:\n          ruby-version: 3.4\n          bundler-cache: true\n      - name: Enable corepack\n        shell: bash\n        run: corepack enable\n      - name: Install all production yarn packages\n        shell: bash\n        run: yarn workspaces focus --production\n      - name: Set up Node.js\n        uses: actions/setup-node@v6\n        with:\n          node-version-file: '.nvmrc'\n      - name: Precompile assets\n        run: |-\n          bin/rails assets:precompile\n      - name: Load database schema\n        run: |\n          bin/rails db:setup\n          bin/flatware fan bin/rails db:test:prepare\n      - name: Run tests\n        env:\n          SPEC_OPTS: '--exclude-pattern \"**/self_destruct_scheduler_spec.rb\"'\n        run: |\n          unset COVERAGE\n          bin/flatware rspec -r ./spec/flatware_helper.rb\n      - name: Notify on failures\n        if: failure()\n        shell: bash\n        run: |\n          job_link=\"${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}\"\n          message=\"Mastodon ruby tests failed.\\\\n The commit is: ${{github.sha}}.\\\\n Job Link: ${job_link}\\\\n\"\n          curl -s \\\n            -X POST \\\n            -H 'Content-Type: application/json' \\\n            '${{ secrets.GSPACES_BOT_DF_BUILD }}' \\\n            -d '{\"text\": \"'\"${message}\"'\"}'\n"
  },
  {
    "path": ".github/workflows/package-install.yml",
    "content": "name: package-install-tests\n\non:\n  schedule:\n    - cron: '0 6 * * *'\n  workflow_dispatch:\n  workflow_run:\n    workflows: [\"generate-site\"]\n    types: [completed]\n\njobs:\n  test-rpm:\n    runs-on: ubuntu-latest\n    if: github.repository == 'dragonflydb/dragonfly' && (github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success')\n    container:\n      image: ghcr.io/romange/fedora:30\n    steps:\n      - name: Install on fedora\n        run: |\n          curl -Lo /etc/yum.repos.d/dragonfly.repo https://packages.dragonflydb.io/dragonfly.repo\n          dnf clean all\n          dnf makecache\n          dnf -y install dragonfly\n          dragonfly --version\n\n  test-deb-ubuntu:\n    runs-on: ubuntu-latest\n    if: github.repository == 'dragonflydb/dragonfly' && (github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success')\n    container:\n      image: ghcr.io/romange/ubuntu:noble\n    steps:\n      - name: Install on ubuntu\n        run: |\n          apt update\n          apt install -y curl\n          curl -Lo /usr/share/keyrings/dragonfly-keyring.public https://packages.dragonflydb.io/pgp-key.public\n          curl -Lo /etc/apt/sources.list.d/dragonfly.sources https://packages.dragonflydb.io/dragonfly.sources\n          apt update\n          apt install -y dragonfly\n          dragonfly --version\n\n  notify-on-failure:\n    runs-on: ubuntu-latest\n    needs: [test-rpm, test-deb-ubuntu]\n    if: github.repository == 'dragonflydb/dragonfly' && always() && (needs.test-rpm.result == 'failure' || needs.test-deb-ubuntu.result == 'failure')\n    steps:\n      - name: Notify on failure\n        run: |\n          job_link=\"${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}\"\n          message=\"Package install tests failed.\\nCommit: ${{ github.sha }}\\nJob: ${job_link}\"\n          curl -sSf -X POST -H 'Content-Type: application/json' '${{ secrets.GSPACES_BOT_DF_BUILD }}' -d '{\"text\": \"'\"${message}\"'\"}'\n"
  },
  {
    "path": ".github/workflows/regression-tests.yml",
    "content": "name: Regression Tests\n\non:\n  schedule:\n    - cron: \"0 0/3 * * *\"\n  workflow_dispatch:\n\njobs:\n  build:\n    if: github.repository == 'dragonflydb/dragonfly'\n    strategy:\n      matrix:\n        # Test of these containers\n        container: [\"ubuntu-dev:24\"]\n        proactor: [Uring]\n        build-type: [Debug, Release]\n        runner: [ubuntu-latest, [self-hosted, linux, ARM64]]\n\n    runs-on: ${{ matrix.runner }}\n\n    permissions:\n      id-token: write\n      contents: read\n\n    container:\n      image: ghcr.io/romange/${{ matrix.container }}\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n      volumes:\n        - /var/crash:/var/crash\n        - /:/hostroot\n        - /mnt:/mnt\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Print environment info\n        run: |\n          cat /proc/cpuinfo\n          ulimit -a\n          env\n          lsblk -l\n\n      - name: Build Dragonfly\n        uses: ./.github/actions/builder\n        with:\n          build-type: ${{matrix.build-type}}\n          targets: 'dragonfly'\n\n      - name: Authenticate to AWS\n        uses: aws-actions/configure-aws-credentials@v5\n        with:\n          role-to-assume: ${{ secrets.AWS_CI_S3_ROLE_ARN }}\n          aws-region: us-east-1\n\n      - name: Run regression tests action\n        uses: ./.github/actions/regression-tests\n        with:\n          dfly-executable: dragonfly\n          gspace-secret: ${{ secrets.GSPACES_BOT_DF_BUILD }}\n          build-folder-name: build\n          filter: ${{ matrix.build-type == 'Release' && 'not debug_only' || 'not opt_only' }}\n          s3-bucket: ${{ secrets.S3_REGTEST_BUCKET }}\n\n      - name: Upload logs on failure\n        if: failure()\n        uses: actions/upload-artifact@v6\n        with:\n          name: logs\n          path: /tmp/failed/*\n\n  lint-test-chart:\n    if: github.repository == 'dragonflydb/dragonfly'\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6\n      - uses: ./.github/actions/lint-test-chart\n"
  },
  {
    "path": ".github/workflows/release.yml",
    "content": "name: Version Release\n\non:\n  push:\n    tags:\n    - 'v*'\n\npermissions:\n  contents: write\n\nenv:\n  RELEASE_DIR: build-release\n\njobs:\n  create-release:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Create Release\n        uses: ncipollo/release-action@v1\n        with:\n          allowUpdates: true\n          omitBody: true\n          prerelease: true\n          draft: true\n          token: ${{ secrets.GITHUB_TOKEN }}\n\n  build-arm:\n    runs-on: ubuntu-24.04-arm\n    name: Build arm64 on ubuntu-24.04-arm\n    needs: create-release\n    container:\n      image: ghcr.io/romange/ubuntu-dev:20-gcc14\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n    steps:\n    - uses: actions/checkout@v6\n      with:\n        submodules: true\n    - name: Build artifacts\n      run: |\n            # Work around https://github.com/actions/checkout/issues/766\n            git config --global --add safe.directory \"$GITHUB_WORKSPACE\"\n            git describe --always --tags ${{ github.sha }}\n            ./tools/release.sh\n            ./tools/packaging/generate_debian_package.sh ${{ env.RELEASE_DIR }}/dragonfly-aarch64\n            mv dragonfly_*.deb ${{ env.RELEASE_DIR }}/\n\n    - name: Upload\n      uses: actions/upload-artifact@v6\n      with:\n        name: dragonfly-aarch64\n        path: |\n          ${{ env.RELEASE_DIR }}/dragonfly-*tar.gz\n          ${{ env.RELEASE_DIR }}/dragonfly_*.deb\n          ${{ env.RELEASE_DIR }}/dfly_bench-*tar.gz\n\n  build-native:\n    runs-on: ubuntu-latest\n    needs: create-release\n    strategy:\n      matrix:\n        include:\n          # Build with these flags\n          - name: debian\n            container: ubuntu-dev:20-gcc14\n          - name: rpm\n            container: fedora:30-gcc14\n    container:\n      image: ghcr.io/romange/${{ matrix.container }}\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n      # Some tests which launch their own containers need a mounted volume to write through files\n      # into child containers\n      volumes:\n        - /mnt:/mnt\n\n    steps:\n    - uses: actions/checkout@v6\n      with:\n        submodules: true\n    - name: Configure\n      run: |\n          if [ -f /etc/redhat-release ]; then\n            dnf install -y rpm-build libstdc++-static\n          fi\n    - name: Build artifacts\n      timeout-minutes: 25\n      run: |\n          # Work around https://github.com/actions/checkout/issues/766\n          git config --global --add safe.directory \"$GITHUB_WORKSPACE\"\n          git describe --always --tags ${{ github.sha }}\n\n          # set WITH_SIMSIMD=OFF for fedora:30\n          if [ \"${{ matrix.name }}\" == 'rpm' ]; then\n            export WITH_SIMSIMD=\"OFF\"\n          fi\n          ./tools/release.sh\n\n          # once the build is over, we want to generate a Debian package\n          if [ -f /etc/debian_version ]; then\n            ./tools/packaging/generate_debian_package.sh ${{ env.RELEASE_DIR }}/dragonfly-x86_64\n          else\n            echo \"Creating package for ${{github.ref_name}}\"\n            ./tools/packaging/rpm/build_rpm.sh ${{ env.RELEASE_DIR }}/dragonfly-x86_64.tar.gz ${{github.ref_name}}\n          fi\n\n    - name: Save artifacts\n      run: |\n          # place all artifacts at the same location\n          set -eu\n          mkdir -p results-artifacts\n          if [ -f /etc/debian_version ]; then\n            mv ${{ env.RELEASE_DIR }}/dragonfly-*tar.gz results-artifacts\n            mv dragonfly_*.deb results-artifacts\n            mv ${{ env.RELEASE_DIR }}/dfly_bench-*tar.gz results-artifacts\n          else\n            ls -l *.rpm\n            mv ./*.rpm ./results-artifacts/\n          fi\n\n    - name: Upload\n      uses: actions/upload-artifact@v6\n      with:\n        name: dragonfly-amd64-${{ matrix.name }}\n        path: results-artifacts/*\n\n  test-regression:\n    needs: [build-native, build-arm]\n    runs-on: ${{ matrix.runner }}\n    strategy:\n      matrix:\n        include:\n          - name: amd64\n            runner: ubuntu-latest\n            artifact: dragonfly-amd64-debian\n            binary: dragonfly-x86_64\n          - name: arm64\n            runner: ubuntu-24.04-arm\n            artifact: dragonfly-aarch64\n            binary: dragonfly-aarch64\n    container:\n      image: ghcr.io/romange/ubuntu-dev:24\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n      volumes:\n        - /mnt:/mnt\n    steps:\n    - uses: actions/checkout@v6\n      with:\n        submodules: true\n    - name: Download artifacts\n      uses: actions/download-artifact@v7\n      with:\n        name: ${{ matrix.artifact }}\n        path: results-artifacts\n    - name: Extract artifacts\n      run: |\n        set -eu\n        mkdir -p ${{ env.RELEASE_DIR }}\n        tar -xzf results-artifacts/dragonfly-*dbgsym.tar.gz -C ${{ env.RELEASE_DIR }}\n    - name: Run regression tests\n      uses: ./.github/actions/regression-tests\n      with:\n        dfly-executable: ${{ matrix.binary }}\n        gspace-secret: ${{ secrets.GSPACES_BOT_DF_BUILD }}\n        build-folder-name: ${{ env.RELEASE_DIR }}\n        filter: 'not debug_only'\n\n  publish_release:\n    runs-on: ubuntu-latest\n    needs: test-regression\n    steps:\n      - uses: actions/download-artifact@v7\n        name: Download files\n        with:\n          path: artifacts\n      - name: See all the artifacts\n        run: |\n          ls -lR artifacts/\n      - uses: ncipollo/release-action@v1\n        with:\n          artifacts: \"artifacts/dragonfly-*/*\"\n          allowUpdates: true\n          draft: true\n          prerelease: true\n          omitNameDuringUpdate: true\n          token: ${{ secrets.GITHUB_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/repeat-tests.yml",
    "content": "name: Repeat Tests\n\non:\n  workflow_dispatch:\n    inputs:\n      branch:\n        description: \"The branch on which tests will be repeated\"\n        type: string\n        required: false\n      commit:\n        description: \"A specific commit SHA to test (takes precedence over branch)\"\n        type: string\n        required: false\n      count:\n        description: \"The number of times the tests will be repeated\"\n        type: number\n        required: false\n        default: 1\n      expression:\n        description: \"A pytest expression which will filter the tests\"\n        required: true\n        type: string\n      timeout:\n        description: \"Overall timeout for all test runs\"\n        required: false\n        type: string\n        default: \"60m\"\n      epoll:\n        description: \"Force epoll mode in test\"\n        required: false\n        type: string\n        default: \"no\"\n      use_release:\n        description: \"Use latest release instead of building dragonfly\"\n        required: false\n        type: string\n        default: \"no\"\n      vmodule_expression:\n        description: \"Emit verbose dragonfly logs for modules, eg x=2,y=3\"\n        required: false\n        type: string\n        default: \"\"\n      build_type:\n        description: \"Build type: Debug or Release\"\n        required: false\n        type: choice\n        options:\n          - Debug\n          - Release\n        default: \"Debug\"\n\njobs:\n  build:\n    strategy:\n      matrix:\n        container: [\"ubuntu-dev:24\"]\n        proactor: [Uring]\n        build-type: [\"${{ inputs.build_type || 'Debug' }}\"]\n        runner: [ubuntu-latest]\n\n    runs-on: ${{ matrix.runner }}\n\n    permissions:\n      id-token: write\n      contents: read\n\n    container:\n      image: ghcr.io/romange/${{ matrix.container }}\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n      volumes:\n        - /var/crash:/var/crash\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n          ref: ${{ inputs.commit || inputs.branch }}\n\n      - name: Print environment info\n        run: |\n          cat /proc/cpuinfo\n          ulimit -a\n          env\n\n      - name: Fetch release\n        shell: bash\n        if: ${{ inputs.use_release == 'yes' }}\n        run: |\n          mkdir \"${GITHUB_WORKSPACE}\"/build\n          cd \"${GITHUB_WORKSPACE}\"/build\n          wget -q https://github.com/dragonflydb/dragonfly/releases/latest/download/dragonfly-x86_64.tar.gz\n          tar xf dragonfly-x86_64.tar.gz\n          mv dragonfly-x86_64 dragonfly\n          ls -l\n\n      - name: Build Dragonfly\n        if: ${{ inputs.use_release != 'yes' }}\n        uses: ./.github/actions/builder\n        with:\n          build-type: ${{matrix.build-type}}\n          targets: 'dragonfly'\n\n      - name: Sync valkey tests\n        uses: ./.github/actions/sync-valkey-tests\n      - name: Authenticate to AWS\n        uses: aws-actions/configure-aws-credentials@v5\n        with:\n          role-to-assume: ${{ secrets.AWS_CI_S3_ROLE_ARN }}\n          aws-region: us-east-1\n\n      - name: Run tests on repeat\n        uses: ./.github/actions/repeat\n        with:\n          run-only-on-ubuntu-latest: true\n          dfly-executable: dragonfly\n          build-folder-name: build\n          s3-bucket: ${{ secrets.S3_REGTEST_BUCKET }}\n          expression: ${{ inputs.expression }}\n          count: ${{ inputs.count }}\n          timeout: ${{ inputs.timeout }}\n          epoll: ${{ inputs.epoll }}\n          vmodule_expression: ${{ inputs.vmodule_expression }}\n\n      - name: Upload logs on failure\n        if: failure()\n        uses: actions/upload-artifact@v6\n        with:\n          name: logs\n          path: /tmp/failed/*\n\n      - name: Copy binary on a self hosted runner\n        if: failure()\n        run: |\n          # We must use sh syntax.\n          if [ \"$RUNNER_ENVIRONMENT\" = \"self-hosted\" ]; then\n            cd ${GITHUB_WORKSPACE}/build\n            timestamp=$(date +%Y-%m-%d_%H:%M:%S)\n            mv ./dragonfly /var/crash/dragonfy_${timestamp}\n          fi\n"
  },
  {
    "path": ".github/workflows/test-fakeredis.yml",
    "content": "---\nname: Test Dragonfly/Fakeredis\n\non:\n  workflow_dispatch:\n  pull_request:\n\npermissions:\n  contents: read\n  checks: write\n\nconcurrency:\n  group: dragonfly-${{ github.workflow }}-${{ github.ref }}\n  cancel-in-progress: true\n\njobs:\n  test:\n    runs-on: ubuntu-latest\n    container:\n      image: ghcr.io/romange/ubuntu-dev:22\n      options: --security-opt seccomp=unconfined --sysctl \"net.ipv6.conf.all.disable_ipv6=0\"\n    strategy:\n      fail-fast: false\n    name: \"Run tests: \"\n    permissions:\n      pull-requests: write\n      checks: read\n\n    steps:\n      - uses: actions/checkout@v6\n        with:\n          submodules: true\n\n      - name: Install dependencies\n        env:\n          PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring\n        shell: bash\n        working-directory: tests/fakeredis\n        run: |\n          pip install poetry\n          echo \"$HOME/.poetry/bin\" >> $GITHUB_PATH\n          poetry install\n      - name: Configure CMake\n        run: |\n          cmake -B ${GITHUB_WORKSPACE}/build \\\n            -DCMAKE_BUILD_TYPE=Debug -DWITH_AWS:BOOL=OFF -DWITH_GCP:BOOL=OFF -DWITH_GPERF:BOOL=OFF \\\n            -GNinja -L\n          cd ${GITHUB_WORKSPACE}/build && pwd\n\n      - name: Build\n        run: |\n          cd ${GITHUB_WORKSPACE}/build\n          ninja dragonfly\n          echo \"-----------------------------\"\n\n          # The order of redirect is important\n          ./dragonfly --proactor_threads=4  --noversion_check --port=6380  \\\n           --lua_resp2_legacy_float 1> /tmp/dragonfly.log 2>&1 &\n\n      - name: Run tests\n        working-directory: tests/fakeredis\n        run: |\n          # Some tests are pending on #5383\n          poetry run pytest test/ \\\n          --ignore test/test_hypothesis/test_transaction.py \\\n          --ignore test/test_hypothesis/test_zset.py \\\n          --ignore test/test_hypotesis_joint/test_joint.py \\\n          --junit-xml=results-tests.xml  --html=report-tests.html -v\n        continue-on-error: false  # Fail the job if tests fail\n\n      - name: Show Dragonfly stats\n        if: always()\n        run: |\n          redis-cli -p 6380 INFO ALL\n      - name: Upload Tests Result xml\n        if: always()\n        uses: actions/upload-artifact@v6\n        with:\n          name: tests-result-logs\n          path: |\n            /tmp/dragonfly.*\n\n      - name: Upload Tests Result html\n        if: always()\n        uses: actions/upload-artifact@v6\n        with:\n          name: report-tests.html\n          path: tests/fakeredis/report-tests.html\n\n      - name: Publish Test Report\n        if: ${{ github.event_name == 'pull_request' }}\n        uses: mikepenz/action-junit-report@v6\n        with:\n          report_paths: tests/fakeredis/results-tests.xml\n          # Do not create a check run\n          # annotate_only: true\n\n  publish-html-results:\n    name: Publish HTML Test Results to GitHub Pages\n    needs: test\n    if: ${{ github.ref == 'refs/heads/main' }}\n    runs-on: ubuntu-latest\n    permissions:\n      pages: write      # to deploy to Pages\n      id-token: write   # to verify the deployment originates from an appropriate source\n    environment:\n      name: github-pages\n      url: ${{ steps.deployment.outputs.page_url }}\n    steps:\n      - name: Bundle Tests Result to one artifact\n        uses: actions/upload-artifact/merge@v6\n        with:\n          delete-merged: true\n          name: test-results-html\n          pattern: '*.html'\n\n      - name: Download html pages\n        uses: actions/download-artifact@v7\n        with:\n          name: test-results-html\n          path: results/\n\n      - uses: actions/setup-python@v6\n        with:\n          cache-dependency-path: tests/fakeredis/poetry.lock\n          python-version: \"3.10\"\n\n      - name: Merge html results\n        run: |\n          pip install pytest-html-merger && mkdir merged\n          pytest_html_merger -i results/ -o merged/index.html\n\n      - name: Publish to GitHub Pages\n        uses: actions/upload-pages-artifact@v4\n        with:\n          path: merged/\n      - name: Deploy to GitHub Pages\n        id: deployment\n        uses: actions/deploy-pages@v4\n        with:\n          token: '${{ secrets.GITHUB_TOKEN }}'\n"
  },
  {
    "path": ".gitignore",
    "content": "build/*\nbuild-*\nclang/*\nclang-*\n.vscode/*.db\n.vscode/settings.json\n.vscode/launch.json\nthird_party\ngenfiles/*\n*.sublime-*\n*.orig\n.tags\n!third_party/include/*\n*.pyc\n/CMakeLists.txt.user\n_deps\nreleases\n.DS_Store\n.idea/*\n.hypothesis\n.secrets\ncmake-build-debug\n.venv/\nfuzz/artifacts/\nfuzz/corpus/\ntools/replay/traffic-replay\n\n# Valkey-search integration tests (synced from external repo)\ntests/dragonfly/valkey_search/integration/\n_codeql_build_dir/\n"
  },
  {
    "path": ".gitmodules",
    "content": "[submodule \"helio\"]\n\tpath = helio\n\turl = https://github.com/romange/helio.git\n"
  },
  {
    "path": ".gitorderfile",
    "content": "*.py\n*.md\n*.in\n*.txt\n*.sh\n*.yml\n*.h\n*.cc\n*.lua\n*.go\n*\n"
  },
  {
    "path": ".nvmrc",
    "content": "22.19\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "default_stages: [pre-commit]\nexclude: |\n    (?x)(\n      src/redis/.* |\n      src/huff/.* |\n      contrib/charts/dragonfly/ci/.* |\n      patches/.*\n    )\nrepos:\n  - repo: local\n    hooks:\n      - id: conventional-commits\n        name: Conventional Commit Minder\n        entry: contrib/scripts/conventional-commits\n        language: script\n        stages: [commit-msg]\n      - id: signed-commit\n        name: Signed Commit Enforcer\n        entry: contrib/scripts/signed-commit\n        language: script\n        stages: [commit-msg]\n\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v4.3.0\n    hooks:\n      - id: trailing-whitespace\n      - id: end-of-file-fixer\n\n  - repo: https://github.com/pre-commit/mirrors-clang-format\n    rev: v14.0.6\n    hooks:\n      - id: clang-format\n        name: Clang formatting\n\n  - repo: https://github.com/psf/black\n    rev: 25.1.0\n    hooks:\n      - id: black\n"
  },
  {
    "path": ".pre-commit-hooks.yaml",
    "content": "- id: conventional-commits\n  name: Conventional Commits Minder\n  entry: contrib/scripts/conventional-commits\n  language: script\n  description: Conventional Commits Enforcement at the `git commit` client-side level\n  always_run: true\n  stages: [commit-msg]\n\n- id: signed-commit\n  name: Signed Commit Enforcer\n  entry: contrib/scripts/signed-commit\n  language: script\n  description: Ensures all commits contain a Signed-off-by line\n  always_run: true\n  stages: [commit-msg]\n"
  },
  {
    "path": ".snyk",
    "content": "# Snyk (https://snyk.io) policy file\nexclude:\n global:\n   - tests/integration/**\n   - contrib/charts/**\n"
  },
  {
    "path": ".vscode/c_cpp_properties.json",
    "content": "{\n  \"configurations\": [\n    {\n      \"name\": \"Linux\",\n      \"includePath\": [\n        \"${default}\"\n      ],\n      \"cStandard\": \"c17\",\n      \"cppStandard\": \"c++17\",\n      \"intelliSenseMode\": \"${default}\",\n      \"compileCommands\": \"${workspaceFolder}/build-dbg/compile_commands.json\",\n      \"configurationProvider\": \"ms-vscode.cmake-tools\"\n    }\n  ],\n  \"version\": 4\n}\n"
  },
  {
    "path": "AGENTS.md",
    "content": "# Dragonfly Development Guide\n\n> **Essential reference for working with the Dragonfly codebase**\n> Architecture, build system, testing infrastructure, and development workflows.\n\n---\n\n## Table of Contents\n\n1. [Critical Workflow Rules](#critical-workflow-rules)\n2. [Quick Command Reference](#quick-command-reference)\n3. [Project Overview](#project-overview)\n4. [Repository Structure](#repository-structure)\n5. [Build Instructions](#build-instructions)\n6. [Testing](#testing)\n7. [CI/CD Pipeline](#cicd-pipeline)\n8. [Code Style & Pre-commit Hooks](#code-style--pre-commit-hooks)\n9. [Third-Party Dependencies](#third-party-dependencies)\n10. [Platform Support](#platform-support)\n11. [CMake Build Options](#cmake-build-options)\n12. [Key Files Reference](#key-files-reference)\n13. [Common Pitfalls](#common-pitfalls)\n14. [Debugging Tips](#debugging-tips)\n15. [Validation Checklist](#validation-checklist)\n\n---\n\n## Critical Workflow Rules\n\n**MANDATORY - Always Follow This Order:**\n\n1. ✅ **Read Before Edit** - Always read files before modifying\n2. ✅ **Use Correct Build Commands** - See [Quick Command Reference](#quick-command-reference) below\n3. ✅ **Test After Changes** - Build and run a relevant unit test -\n   `ninja <unit_test> && ./unit_test`\n4. ✅ **Format Code** - `pre-commit run --files <files>`\n5. ✅ **Follow Architecture** - See [Architecture Patterns](#architecture-patterns) below\n\n### Pull Request Guidelines\n\n**Conciseness is Key**: PR descriptions should be short, focused, and easy to scan.\n- **Title**: Imperative, descriptive (e.g., \"Fix fiber stack overflow in test_reply_guard_oom\")\n- **Summary**: 1-2 sentences explaining *what* changed and *why*\n- **Changes**: Bullet points for key changes\n- **Fixes**: Link issues (e.g., \"Fixes #123\")\n- **Commit messages**: Keep every line (subject and body) <= 100 characters; wrap long descriptions\n\n---\n\n## Quick Command Reference\n\n**CRITICAL: Read the full sections below for context. These are shortcuts only.**\n\n### Building (see [Build Instructions](#build-instructions) for details)\n\n```bash\n# Debug build (for development)\n./helio/blaze.sh\ncd build-dbg && ninja dragonfly              # Build main binary\ncd build-dbg && ninja generic_family_test    # Build specific test\n\n# Release build (for production/benchmarking)\n./helio/blaze.sh -release\ncd build-opt && ninja dragonfly\n```\n\n### Testing (see [Testing](#testing) for details)\n\n```bash\n# C++ Unit Tests\ncd build-dbg\nctest -V -L DFLY                                    # Run all tests\n./generic_family_test                               # Run specific test binary\n./generic_family_test --gtest_filter=\"Set.*\"        # Run specific test case\n```\n\n### Code Formatting\n\n```bash\n# Setup (once)\npipx install pre-commit clang-format black\npre-commit install\n\n# Format code\npre-commit run --files <files>              # Format specific files\npre-commit run --all-files                  # Format all files\n```\n\n### Common Operations\n\n```bash\n# Check git status\ngit status\n\n# Check current branch\ngit branch\n\n# View recent commits\ngit log --oneline -10\n```\n\n---\n\n## Architecture Patterns\n\n**Code Style**: [.clang-format](.clang-format) - snake_case vars, PascalCase functions, kPascalCase constants\n\n**DO ✅**:\n- Fiber-aware: `util::fb2::Mutex`, `util::fb2::Fiber` → [helio/util/fibers/](helio/util/fibers/)\n- Per-shard ops (no global state) → [docs/df-share-nothing.md](docs/df-share-nothing.md)\n- Command pattern → [src/server/set_family.cc](src/server/set_family.cc)\n- Error handling: `OpStatus` → [src/server/common.h](src/server/common.h)\n- Test patterns → [tests/dragonfly/conftest.py](tests/dragonfly/conftest.py)\n\n**DON'T ❌**:\n- `std::thread`, `std::mutex` (deadlocks!)\n- Global mutable state\n- Edit without reading\n- Skip tests\n- Use `./tools/docker/build.sh` for local development (use `ninja` instead)\n- Use `make` for incremental builds (use `ninja` instead)\n\n---\n\n## Project Overview\n\n**Dragonfly** is a high-performance, Redis and Memcached compatible in-memory data store written in C++20. It delivers significantly higher throughput than traditional single-threaded Redis implementations through innovative architectural choices.\n\n### Key Characteristics\n\n- **Language**: C++20 (Google C++ Style Guide 2020 version)\n- **Architecture**: Shared-nothing multi-threaded design (via `helio` library)\n- **Performance**: Uses io_uring (Linux 5.11+) for high-performance async I/O, with epoll fallback\n- **Threading Model**: Fiber-based cooperative multitasking with lock-free data structures\n- **Build System**: CMake + Ninja via `helio/blaze.sh` wrapper script\n- **Target Platform**: Linux (kernel 5.11+ recommended), FreeBSD support available\n- **Protocols**: Redis RESP2/RESP3, Memcached binary protocol\n- **Compatibility**: Drop-in replacement for Redis API coverage\n\n### Architectural Highlights\n\n**For detailed architecture documentation, see [docs/df-share-nothing.md](docs/df-share-nothing.md)**\n\n1. **Shared-Nothing Design**: Each thread operates independently with its own data structures, minimizing lock contention\n2. **Helio Framework**: Custom I/O and threading library built on io_uring/epoll with fiber support\n3. **DashTable**: Novel hash table implementation optimized for multi-core systems - see [docs/dashtable.md](docs/dashtable.md)\n4. **Transaction Model**: Non-blocking optimistic transactions - see [docs/transaction.md](docs/transaction.md)\n5. **Tiering Support**: Optional disk-backed storage for large datasets\n6. **Search Module**: Full-text search capabilities (when enabled with WITH_SEARCH)\n\n---\n\n## Repository Structure\n\n```\ndragonfly/\n├── src/                      # Main C++ source code\n│   ├── server/               # Core server implementation\n│   │   ├── dfly_main.cc      # Main entry point\n│   │   ├── main_service.cc   # Service lifecycle & command routing\n│   │   ├── db_slice.cc       # Per-thread database shard\n│   │   ├── engine_shard_set.cc # Shard management\n│   │   ├── cluster/          # Cluster mode implementation\n│   │   ├── journal/          # Replication journal\n│   │   ├── tiering/          # Tiered storage\n│   │   ├── search/           # Search module\n│   │   └── acl/              # Access control lists\n│   ├── core/                 # Core data structures\n│   │   ├── dash.h            # DashTable hash table\n│   │   ├── dense_set.h       # Compact set implementation\n│   │   ├── string_map.h      # Optimized string-keyed maps\n│   │   ├── search/           # Search core algorithms\n│   │   └── json/             # JSON support\n│   ├── facade/               # Network & command handling\n│   │   ├── dragonfly_connection.cc # Connection management\n│   │   ├── redis_parser.cc   # RESP protocol parser\n│   │   └── memcache_parser.cc # Memcached protocol\n│   └── redis/                # Redis-specific implementations\n│       └── lua/              # Lua scripting support\n│\n├── helio/                    # Git submodule: I/O and threading library\n│   │                         # ** DO NOT EDIT unless contributing to helio **\n│   ├── util/                 # Utilities: fibers, I/O, synchronization\n│   ├── io/                   # io_uring & epoll abstraction\n│   └── blaze.sh              # Build configuration wrapper\n│\n├── tests/                    # Test suite\n│   ├── dragonfly/            # Python pytest integration/regression tests\n│   │   ├── conftest.py       # Pytest fixtures & configuration\n│   │   ├── requirements.txt  # Python test dependencies\n│   │   └── *.py              # Test files\n│   └── pytest.ini            # Pytest configuration & markers\n│\n├── docs/                     # Documentation\n│   ├── build-from-source.md  # Build instructions\n│   ├── dashtable.md          # DashTable internals\n│   ├── transaction.md        # Transaction model\n│   ├── df-share-nothing.md   # Shared-nothing architecture\n│   └── differences.md        # Differences from Redis\n│\n├── contrib/                  # Utilities\n│   ├── docker/               # Docker configurations\n│   └── charts/dragonfly/     # Helm chart for Kubernetes\n│\n├── tools/                    # Benchmarking & utility tools\n│   └── packaging/            # Packaging scripts\n│\n├── CMakeLists.txt            # Root CMake configuration\n├── .clang-format             # C++ formatting rules (clang-format v14.0.6)\n├── .pre-commit-config.yaml   # Pre-commit hooks configuration\n├── pyproject.toml            # Python formatting (Black, 100 chars)\n└── CONTRIBUTING.md           # Contribution guidelines\n```\n\n### Critical Paths to Remember\n\n- **Main entry**: `src/server/dfly_main.cc`\n- **Command dispatch**: `src/server/main_service.cc`\n- **Data storage**: `src/server/db_slice.cc`\n- **Networking**: `src/facade/dragonfly_connection.cc`\n- **Helio library**: `helio/` (I/O and threading library)\n\n---\n\n## Build Instructions\n\n**For complete build instructions, see [docs/build-from-source.md](docs/build-from-source.md)**\n\n### Quick Start\n\n**Debug build** (for development):\n```bash\n./helio/blaze.sh\ncd build-dbg && ninja dragonfly\n./dragonfly --alsologtostderr\n```\n\n**Release build** (for production/benchmarking):\n```bash\n./helio/blaze.sh -release\ncd build-opt && ninja dragonfly\n```\n\n**Production release build** (static linking, optimized):\n```bash\nmake release           # Configure + build\nmake package           # Create release packages with debug symbols\n```\n\nThe [Makefile](Makefile) builds production releases with:\n- Static linking: libstdc++, libgcc, Boost, OpenSSL\n- Architecture optimizations (x86_64: `-march=core2 -msse4.1 -mtune=skylake`)\n- Debug symbols (compressed)\n- Output: `build-release/dragonfly-{arch}.tar.gz`\n\n**Common build options**:\n- See [docs/build-from-source.md](docs/build-from-source.md) for all options\n\n---\n\n## Testing\n\n**For complete testing documentation, see [tests/README.md](tests/README.md)**\n\n### Quick Reference\n\n**C++ Unit Tests**:\n```bash\ncd build-dbg\nctest -V -L DFLY                                    # Run all tests\n./generic_family_test                               # Run specific test binary\n./generic_family_test --gtest_filter=\"Set.*\"        # Run specific test case\n```\n\n---\n\n## CI/CD Pipeline\n\n**For complete CI configuration, see [.github/workflows/ci.yml](.github/workflows/ci.yml)**\n\nThe CI workflow runs on all PRs and includes:\n- **Pre-commit checks**: clang-format, black formatters\n- **Build matrix**: Multiple OS/compiler/sanitizer combinations (Ubuntu 20/24, Alpine, GCC/Clang, ASAN/UBSAN)\n- **Test execution**: C++ unit tests, Python integration tests, cluster mode tests\n- **Additional validations**: Helm charts, Docker image builds\n\n---\n\n## Code Style & Pre-commit Hooks\n\n**For complete contribution guidelines, see [CONTRIBUTING.md](CONTRIBUTING.md)**\n\n**Code style configuration files**:\n- **C++**: [.clang-format](.clang-format) - Google C++ Style Guide (2020), clang-format v14.0.6, 100 char limit\n- **Python**: [pyproject.toml](pyproject.toml) - Black formatter, 100 char limit, PEP 8 compliant\n- **Pre-commit hooks**: [.pre-commit-config.yaml](.pre-commit-config.yaml) - Automated formatting checks\n\n**Quick setup**:\n```bash\npipx install pre-commit clang-format black\npre-commit install\npre-commit run --all-files                          # Run all formatters\n```\n\n---\n\n## Third-Party Dependencies\n\n**Key Libraries**: Abseil (strings/flags), Boost 1.71+ (context/intrusive), mimalloc (allocator), jsoncons (JSON), OpenSSL (TLS), libunwind (traces)\n\n**Build artifacts**: `build-dbg/third_party/` - DO NOT edit\n\n**For complete dependency info, see [docs/build-from-source.md](docs/build-from-source.md)**\n\n---\n\n## Platform Support\n\n**Linux**: Primary platform. Kernel 5.11+ (io_uring), 5.1+ (basic), < 5.1 (epoll fallback)\n- Check: `uname -r`\n- Force epoll: `--proactor_type=epoll`\n- Docker: `--security-opt seccomp=unconfined`\n\n**FreeBSD**: Supported (kqueue backend)\n\n**macOS**: Not supported for production (use Docker/Linux)\n\n**For complete platform info, see [docs/build-from-source.md](docs/build-from-source.md)**\n\n---\n\n## CMake Build Options\n\n**For complete list of build options, see [docs/build-from-source.md](docs/build-from-source.md)**\n\n### Common Options\n\nPass options to `helio/blaze.sh` with `-D` prefix:\n\n```bash\n./helio/blaze.sh -DWITH_SEARCH=OFF -DWITH_AWS=ON\n```\n\n**Most useful options**:\n- `WITH_ASAN=ON` / `WITH_USAN=ON` - Enable sanitizers for debugging\n- `WITH_SEARCH=OFF` - Disable search module for faster builds\n- `WITH_AWS=OFF` / `WITH_GCP=OFF` - Disable cloud libraries\n- `WITH_TIERING=OFF` - Disable disk storage\n- `USE_MOLD=ON` - Faster linking with LTO (production builds)\n\n**Quick configurations**:\n```bash\n# Minimal build (fast compilation)\n./helio/blaze.sh -DWITH_GPERF=OFF -DWITH_AWS=OFF -DWITH_GCP=OFF -DWITH_TIERING=OFF -DWITH_SEARCH=OFF\n\n# Full-featured (all options ON by default)\n./helio/blaze.sh\n\n# Production optimized\n./helio/blaze.sh -release -DUSE_MOLD=ON\n```\n\n---\n\n## Key Files Reference\n\nQuick reference to the most important files in the codebase.\n\n| Purpose | File Path |\n|---------|-----------|\n| **Entry Points & Core** | |\n| Main entry point | `src/server/dfly_main.cc` |\n| Server lifecycle & command routing | `src/server/main_service.cc` |\n| Per-thread database shard | `src/server/db_slice.cc` |\n| Shard management | `src/server/engine_shard_set.cc` |\n| **Data Structures** | |\n| DashTable hash table | `src/core/dash.h` |\n| Dense set implementation | `src/core/dense_set.h` |\n| String map | `src/core/string_map.h` |\n| **Networking** | |\n| Connection handling | `src/facade/dragonfly_connection.cc` |\n| Redis protocol parser | `src/facade/redis_parser.cc` |\n| Memcached protocol parser | `src/facade/memcache_parser.cc` |\n| **Build System** | |\n| Root CMake config | `CMakeLists.txt` |\n| Build script wrapper | `helio/blaze.sh` |\n| Server CMake config | `src/server/CMakeLists.txt` |\n| **CI/CD** | |\n| Main CI workflow | `.github/workflows/ci.yml` |\n| Pre-commit config | `.pre-commit-config.yaml` |\n| **Code Style** | |\n| C++ formatting | `.clang-format` |\n| Python formatting | `pyproject.toml` |\n| **Testing** | |\n| Pytest configuration | `tests/pytest.ini` |\n| Pytest fixtures | `tests/dragonfly/conftest.py` |\n| Test requirements | `tests/dragonfly/requirements.txt` |\n| **Documentation** | |\n| Build instructions | `docs/build-from-source.md` |\n| Architecture overview | `docs/df-share-nothing.md` |\n| DashTable internals | `docs/dashtable.md` |\n| Transaction model | `docs/transaction.md` |\n| **Configuration** | |\n| Contributing guide | `CONTRIBUTING.md` |\n| CLA agreement | `CLA.txt` |\n\n---\n\n## Common Pitfalls\n\n1. **Pre-commit not installed**: `pipx install pre-commit clang-format black && pre-commit install`\n2. **Wrong binary**: Debug: `build-dbg/dragonfly`, Release: `build-opt/dragonfly`\n3. **Wrong build command**: Use `cd build-dbg && ninja <target>`, NOT `./tools/docker/build.sh`\n4. **Test timeouts**: `timeout 20m ctest -V -L DFLY`\n5. **ASAN leaks**: Check CI, suppress in `helio/util/asan_suppressions.txt`\n6. **Helio modifications**: DON'T edit `helio/` (it's a git submodule - changes go upstream)\n7. **CodeQL checks**: DON'T run codeql_checker when testing changes - it's slow and unnecessary for development\n\n---\n\n## Debugging Tips\n\n**Logging**: `--alsologtostderr --v=1 --vmodule=module=2`\n\n**ASAN**: `ASAN_OPTIONS=detect_leaks=1:symbolize=1`, suppressions: `helio/util/asan_suppressions.txt`\n\n**CI reproduction**: See [.github/workflows/ci.yml](.github/workflows/ci.yml)\n\n**Troubleshooting**: Check fiber deadlocks (use `util::fb2` not `std::mutex`), timeout issues (`--test_timeout`), ASAN reports\n\n---\n\n## Validation Checklist\n\nBefore claiming a task is complete, verify:\n\n### Code Quality\n\n- [ ] Code compiles without errors: `cd build-dbg && ninja dragonfly`\n- [ ] Code compiles without warnings (CI uses `-Werror`)\n- [ ] Code follows Google C++ Style Guide (run `clang-format`)\n- [ ] No new ASAN/UBSAN violations\n\n### Testing\n\n- [ ] All existing C++ unit tests pass: `ctest -V -L DFLY`\n- [ ] New feature has corresponding test coverage\n- [ ] Tests pass in both Debug and Release builds\n- [ ] Tests pass with ASAN/UBSAN enabled (if applicable)\n- [ ] **DO NOT run codeql_checker** - it's slow and unnecessary for development testing\n\n### Pre-commit & Style\n\n- [ ] Pre-commit hooks installed: `pre-commit install`\n- [ ] Code formatted with clang-format (C++) and black (Python)\n\n### Documentation\n\n- [ ] Public APIs have comments explaining purpose\n- [ ] Complex algorithms have explanatory comments\n- [ ] README or docs updated if behavior changes\n- [ ] No commented-out code left in final commit\n\n### Performance\n\n- [ ] No obvious performance regressions (run benchmarks if needed)\n- [ ] No unnecessary allocations in hot paths\n- [ ] Lock-free data structures used where appropriate\n"
  },
  {
    "path": "CLA.txt",
    "content": "Thanks for your interest in contributing to Dragonfly™. By contributing to this project\nin any way form or media you grant DragonflyDB Ltd. and its affiliates a perpetual, worldwide, non-exclusive, free of charge, royalty-free, irrevocable license to use,\nmodify, make available, reproduce, make derivatives, publicly display and perform, sublicense, sell, and distribute your contributions and any derivatives thereof as part of Dragonfly™.\nYou represent that You are legally entitled to grant the above license. You acknowledge that DragonflyDB currently distributes Dragonfly™ under the Business Source License 1.1 (BSL-1.1) license, and agree that your contribution may be distributed under BSL-1.1 as part of Dragonfly™. You also represent that your contributions are your original work and that neither the content contributed, nor making the contribution to Dragonfly™ violates any third party’ rights. If you are making this contribution while being engaged by any other company or entity, please make sure you have the necessary permissions required to do so.\n"
  },
  {
    "path": "CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 3.15 FATAL_ERROR)\nset(PROJECT_CONTACT romange@gmail.com)\n\ninclude(CheckCXXCompilerFlag)\n\nenable_testing()\n\nset(CMAKE_EXPORT_COMPILE_COMMANDS 1)\n\n# AFL++ fuzzing support - must be set BEFORE project() command\noption(USE_AFL \"Enable AFL++ fuzzing\" OFF)\nif(USE_AFL)\n  # Automatically set AFL++ compilers if not already set\n  if(NOT CMAKE_C_COMPILER MATCHES \"afl-\" AND NOT CMAKE_CXX_COMPILER MATCHES \"afl-\")\n    find_program(AFL_CC afl-clang-fast)\n    find_program(AFL_CXX afl-clang-fast++)\n\n    if(AFL_CC AND AFL_CXX)\n      message(STATUS \"AFL++ fuzzing enabled - setting compilers\")\n      set(CMAKE_C_COMPILER ${AFL_CC})\n      set(CMAKE_CXX_COMPILER ${AFL_CXX})\n    else()\n      message(FATAL_ERROR \"USE_AFL=ON but AFL++ compilers not found!\\n\"\n              \"Please install AFL++: apt install afl++ or build from source\\n\"\n              \"https://github.com/AFLplusplus/AFLplusplus\")\n    endif()\n  endif()\nendif()\n\n# Set targets in folders\nset_property(GLOBAL PROPERTY USE_FOLDERS ON)\nproject(DRAGONFLY C CXX)\nset(CMAKE_CXX_STANDARD 20)\n\n# Disabled because it has false positives with ref-counted intrusive pointers.\nCHECK_CXX_COMPILER_FLAG(\"-Wuse-after-free\" HAS_USE_AFTER_FREE_WARN)\nif (HAS_USE_AFTER_FREE_WARN)\n    set(CMAKE_CXX_FLAGS \"-Wno-use-after-free ${CMAKE_CXX_FLAGS}\")\nendif()\n\nif (CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\")\n    set(CMAKE_CXX_FLAGS \"-Wthread-safety -Werror=thread-safety ${CMAKE_CXX_FLAGS}\")\nendif()\n\n# We can not use here CHECK_CXX_COMPILER_FLAG because systems that do not support sanitizers\n# fail during linking time.\nset(CMAKE_REQUIRED_FLAGS \"-fsanitize=address\")\ncheck_cxx_source_compiles(\"int main() { return 0; }\" SUPPORT_ASAN)\n\nset(CMAKE_REQUIRED_FLAGS \"-fsanitize=undefined\")\ncheck_cxx_source_compiles(\"int main() { return 0; }\" SUPPORT_USAN)\nset(CMAKE_REQUIRED_FLAGS \"\")\n\n# We must define all the required variables from the root cmakefile, otherwise\n# they just disappear.\nset(CMAKE_MODULE_PATH \"${CMAKE_CURRENT_SOURCE_DIR}/helio/cmake\" ${CMAKE_MODULE_PATH})\noption(BUILD_SHARED_LIBS \"Build shared libraries\" OFF)\noption(DF_USE_SSL \"Provide support for SSL connections\" ON)\n\nfind_package(OpenSSL)\n\n# AFL++ configuration - must be before sanitizer checks\nif(USE_AFL)\n  message(STATUS \"AFL++ fuzzing mode active\")\n  message(STATUS \"  C compiler: ${CMAKE_C_COMPILER}\")\n  message(STATUS \"  C++ compiler: ${CMAKE_CXX_COMPILER}\")\n\n  # Add USE_AFL as compile definition so #ifdef USE_AFL works in code\n  add_compile_definitions(USE_AFL)\n\n  # AFL++ requires specific compiler flags for coverage instrumentation\n  set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -g\")\n  set(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -g\")\n\n  # Force disable sanitizers when fuzzing (AFL++ incompatible with ASAN/UBSAN)\n  message(STATUS \"Disabling sanitizers (incompatible with AFL++ fuzzing)\")\n  set(WITH_ASAN OFF CACHE BOOL \"Disable ASAN for fuzzing\" FORCE)\n  set(WITH_USAN OFF CACHE BOOL \"Disable UBSAN for fuzzing\" FORCE)\n\n  # Disable AWS and GCP for fuzzing builds (not needed, reduces build time)\n  message(STATUS \"Disabling AWS and GCP integrations for fuzzing\")\n  set(WITH_AWS OFF CACHE BOOL \"Disable AWS for fuzzing\" FORCE)\n  set(WITH_GCP OFF CACHE BOOL \"Disable GCP for fuzzing\" FORCE)\nendif()\n\noption(WITH_ASAN \"Enable -fsanitize=address\" OFF)\nif (SUPPORT_ASAN AND WITH_ASAN)\n  message(STATUS \"address sanitizer enabled\")\n  set(CMAKE_CXX_FLAGS_DEBUG \"${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address\")\nendif()\n\noption(WITH_USAN \"Enable -fsanitize=undefined\" OFF)\nif (SUPPORT_USAN AND WITH_USAN)\n  message(STATUS \"ub sanitizer enabled\")\n  set(CMAKE_CXX_FLAGS_DEBUG \"${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined\")\nendif()\n\ninclude(third_party)\ninclude(internal)\n\ninclude_directories(src)\ninclude_directories(helio)\n\nadd_subdirectory(helio)\nadd_subdirectory(src)\n"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "content": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make participation in our\ncommunity a harassment-free experience for everyone, regardless of age, body\nsize, visible or invisible disability, ethnicity, sex characteristics, gender\nidentity and expression, level of experience, education, socio-economic status,\nnationality, personal appearance, race, religion, or sexual identity\nand orientation.\n\nWe pledge to act and interact in ways that contribute to an open, welcoming,\ndiverse, inclusive, and healthy community.\n\n## Our Standards\n\nExamples of behavior that contributes to a positive environment for our\ncommunity include:\n\n* Demonstrating empathy and kindness toward other people\n* Being respectful of differing opinions, viewpoints, and experiences\n* Giving and gracefully accepting constructive feedback\n* Accepting responsibility and apologizing to those affected by our mistakes,\n  and learning from the experience\n* Focusing on what is best not just for us as individuals, but for the\n  overall community\n\nExamples of unacceptable behavior include:\n\n* The use of sexualized language or imagery, and sexual attention or\n  advances of any kind\n* Trolling, insulting or derogatory comments, and personal or political attacks\n* Public or private harassment\n* Publishing others' private information, such as a physical or email\n  address, without their explicit permission\n* Other conduct which could reasonably be considered inappropriate in a\n  professional setting\n\n## Enforcement Responsibilities\n\nCommunity leaders are responsible for clarifying and enforcing our standards of\nacceptable behavior and will take appropriate and fair corrective action in\nresponse to any behavior that they deem inappropriate, threatening, offensive,\nor harmful.\n\nCommunity leaders have the right and responsibility to remove, edit, or reject\ncomments, commits, code, wiki edits, issues, and other contributions that are\nnot aligned to this Code of Conduct, and will communicate reasons for moderation\ndecisions when appropriate.\n\n## Scope\n\nThis Code of Conduct applies within all community spaces, and also applies when\nan individual is officially representing the community in public spaces.\nExamples of representing our community include using an official e-mail address,\nposting via an official social media account, or acting as an appointed\nrepresentative at an online or offline event.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be\nreported to the community leaders responsible for enforcement at\nhttps://github.com/dragonflydb/dragonfly/discussions.\nAll complaints will be reviewed and investigated promptly and fairly.\n\nAll community leaders are obligated to respect the privacy and security of the\nreporter of any incident.\n\n## Enforcement Guidelines\n\nCommunity leaders will follow these Community Impact Guidelines in determining\nthe consequences for any action they deem in violation of this Code of Conduct:\n\n### 1. Correction\n\n**Community Impact**: Use of inappropriate language or other behavior deemed\nunprofessional or unwelcome in the community.\n\n**Consequence**: A private, written warning from community leaders, providing\nclarity around the nature of the violation and an explanation of why the\nbehavior was inappropriate. A public apology may be requested.\n\n### 2. Warning\n\n**Community Impact**: A violation through a single incident or series\nof actions.\n\n**Consequence**: A warning with consequences for continued behavior. No\ninteraction with the people involved, including unsolicited interaction with\nthose enforcing the Code of Conduct, for a specified period of time. This\nincludes avoiding interactions in community spaces as well as external channels\nlike social media. Violating these terms may lead to a temporary or\npermanent ban.\n\n### 3. Temporary Ban\n\n**Community Impact**: A serious violation of community standards, including\nsustained inappropriate behavior.\n\n**Consequence**: A temporary ban from any sort of interaction or public\ncommunication with the community for a specified period of time. No public or\nprivate interaction with the people involved, including unsolicited interaction\nwith those enforcing the Code of Conduct, is allowed during this period.\nViolating these terms may lead to a permanent ban.\n\n### 4. Permanent Ban\n\n**Community Impact**: Demonstrating a pattern of violation of community\nstandards, including sustained inappropriate behavior,  harassment of an\nindividual, or aggression toward or disparagement of classes of individuals.\n\n**Consequence**: A permanent ban from any sort of public interaction within\nthe community.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage],\nversion 2.0, available at\nhttps://www.contributor-covenant.org/version/2/0/code_of_conduct.html.\n\nCommunity Impact Guidelines were inspired by [Mozilla's code of conduct\nenforcement ladder](https://github.com/mozilla/diversity).\n\n[homepage]: https://www.contributor-covenant.org\n\nFor answers to common questions about this code of conduct, see the FAQ at\nhttps://www.contributor-covenant.org/faq. Translations are available at\nhttps://www.contributor-covenant.org/translations.\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing to Dragonfly DB\n\nThank you for your interest in Dragonfly DB.\n\nFeel free to browse our [Discussions](https://github.com/dragonflydb/dragonfly/discussions) and [Issues](https://github.com/dragonflydb/dragonfly/issues)\n\n## Build from source\n\nSee [building from source](./docs/build-from-source.md)\n\nPlease note that to build a development/debug version,\nit's better to alter the configure and build steps above with:\n\n```sh\n./helio/blaze.sh   # without '-release' flag. Creates build-dbg subfolder\ncd build-dbg && ninja dragonfly\n```\n\n## Before you make your changes\n\n```sh\ncd dragonfly   # project root\n\n# Make sure you have 'pre-commit', 'clang-format' and black is installed\npipx install pre-commit clang-format\npipx install pre-commit black\n\n# IMPORTANT! Enable our pre-commit message hooks\n# This will ensure your commits match our formatting requirements\npre-commit install\n```\n\nThis step must be done on each machine you wish to develop and contribute from to activate the `commit-msg` and `commit` hooks client-side.\n\nOnce you have done these things, we look forward to adding your contributions and improvements to the Dragonfly DB project.\n\n## Unit testing\n\n```\n# Build a specific test\ncd build-dbg && ninja [test_name]\n# e.g cd build-dbg && ninja generic_family_test\n\n# Run\n./[test_name]\n# e.g ./generic_family_test\n```\n\n## Rendering Helm golden files\n\nA Golang golden test is included in the dragonfly helm chart. This test will render the chart and compare the output to a golden file. If the output has changed, the test will fail and the golden file will need to be updated. This can be done by running:\n\n```bash\ncd contrib/charts/dragonfly\ngo test -v ./... -update\n```\n\nThis makes it easy to see the changes in the rendered output without having to manually run the `helm template` and diff the output.\n\n## Signoff Commits\n\nAll community submissions must include a signoff.\n\n```bash\ngit commit -s -m '...'\n```\n\n## Squash Commits\n\nPlease squash all commits for a change into a single commit (this can be done using \"git rebase -i\"). Do your best to have a well-formed commit message for the change.\n\n## Use Conventional Commits\n\nThis repo uses [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/)\n\nThe Conventional Commits specification is a lightweight convention on top of commit messages.\nIt provides an easy set of rules for creating an explicit commit history;\nwhich makes it easier to write automated tools on top of.\nThis convention dovetails with [SemVer](http://semver.org),\nby describing the features, fixes, and breaking changes made in commit messages.\n\nThe commit message should be structured as follows:\n\n---\n\n```\n<type>[optional scope]: <description>\n\n[optional body]\n\n[optional footer(s)]\n```\n\n---\n\nThis repo uses automated tools to standardize the formatting of code, text files, and commits.\n\n- [Pre-commit hooks](#pre-commit-hooks) validate and automatically apply code\n   formatting rules.\n\n## `pre-commit` hooks\n\nThe Dragonfly DB team has agreed to systematically use several pre-commit hooks to\nnormalize the formatting of code. You need to install and enable pre-commit to have these used\nwhen you do your commits.\n\n## Codebase guidelines\n\nThis repo conforms to the Google's C++ Style Guide. Keep in mind we use an older version of the\nstyle guide which can be found [here](https://github.com/google/styleguide/blob/505ba68c74eb97e6966f60907ce893001bedc706/cppguide.html).\n\nAny exceptions to the rules specified in the style guide will be documented here.\n\n## License terms for contributions\n\nPlease see our [CLA agreement](./CLA.txt)\n\n## THANK YOU FOR YOUR CONTRIBUTIONS\n"
  },
  {
    "path": "CONTRIBUTORS.md",
    "content": "# Contributors (alphabetical by surname)\n\n* **[Amir Alperin](https://github.com/iko1)**\n* **[Philipp Born](https://github.com/tamcore)**\n  * Helm Chart\n* **[Meng Chen](https://github.com/matchyc)**\n* **[Yuxuan Chen](https://github.com/YuxuanChen98)**\n* **[Pawel Kaplinski](https://github.com/pawelKapl)**\n* **[Redha Lhimeur](https://github.com/redhal)**\n* **[Braydn Moore](https://github.com/braydnm)**\n* **[Logan Raarup](https://github.com/logandk)**\n* **[Ryan Russell](https://github.com/ryanrussell)**\n  * Docs & Code Readability\n* **[Ali-Akber Saifee](https://github.com/alisaifee)**\n* **[Elle Y](https://github.com/inohime)**\n* **[ATM SALEH](https://github.com/ATM-SALEH)**\n* **[Shohei Shiraki](https://github.com/highpon)**\n* **[Leonardo Mello](https://github.com/lsvmello)**\n* **[Nico Coetzee](https://github.com/nicc777)**\n"
  },
  {
    "path": "LICENSE.md",
    "content": "# Dragonfly Business Source License 1.1\n\n<u>License</u>: Business Source License 1.1 [BSL 1.1](https://spdx.org/licenses/BUSL-1.1.html)\n\n<u>Licensor</u>: DragonflyDB, Ltd.\n\n<u>Licensed Work</u>: Dragonfly including the software components, or any portion of them, and any modification.\n\n<u>Change Date</u>: March 1, 2029\n\n<u>Change License</u>: [Apache License, Version\n2.0](https://www.apache.org/licenses/LICENSE-2.0), as published by the\nApache Foundation.\n\n<u>Additional Use Grant</u>: You may make use of the Licensed Work (i) only as part of your own product or service, provided it is not an in-memory data store product or service; and (ii) provided that you do not use, provide, distribute, or make available the Licensed Work as a Service.\nA “Service” is a commercial offering, product, hosted, or managed service, that allows third parties (other than your own employees and contractors acting on your behalf) to access and/or use the Licensed Work or a substantial set of the features or functionality of the Licensed Work to third parties as a software-as-a-service, platform-as-a-service, infrastructure-as-a-service or other similar services that compete with Licensor products or services.\n\nText of BSL 1.1\n\nThe Licensor hereby grants you the right to copy, modify, create\nderivative works, redistribute, and make non-production use of the\nLicensed Work. The Licensor may make an Additional Use Grant, above,\npermitting limited production use.\n\nEffective on the Change Date, or the fourth anniversary of the first\npublicly available distribution of a specific version of the Licensed\nWork under this License, whichever comes first, the Licensor hereby\ngrants you rights under the terms of the Change License, and the rights\ngranted in the paragraph above terminate.\n\nIf your use of the Licensed Work does not comply with the requirements\ncurrently in effect as described in this License, you must purchase a\ncommercial license from the Licensor, its affiliated entities, or\nauthorized resellers, or you must refrain from using the Licensed Work.\n\nAll copies of the original and modified Licensed Work, and derivative\nworks of the Licensed Work, are subject to this License. This License\napplies separately for each version of the Licensed Work and the Change\nDate may vary for each version of the Licensed Work released by\nLicensor.\n\nYou must conspicuously display this License on each original or modified\ncopy of the Licensed Work. If you receive the Licensed Work in original\nor modified form from a third party, the terms and conditions set forth\nin this License apply to your use of that work.\n\nAny use of the Licensed Work in violation of this License will\nautomatically terminate your rights under this License for the current\nand all other versions of the Licensed Work.\n\nThis License does not grant you any right in any trademark or logo of\nLicensor or its affiliates (provided that you may use a trademark or\nlogo of Licensor as expressly required by this License).\n\nTO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED\nON AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND\nCONDITIONS, EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION)\nWARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,\nNON-INFRINGEMENT, AND TITLE.\n"
  },
  {
    "path": "Makefile",
    "content": "BUILD_ARCH := $(shell uname -m)\nRELEASE_NAME := \"dragonfly-${BUILD_ARCH}\"\nHELIO_RELEASE_FLAGS = -DHELIO_RELEASE_FLAGS=\"-g\"\nHELIO_USE_STATIC_LIBS = ON\nHELIO_OPENSSL_USE_STATIC_LIBS = ON\nHELIO_ENABLE_GIT_VERSION = ON\nHELIO_WITH_UNWIND ?= OFF\nRELEASE_DIR=build-release\nWITH_SIMSIMD ?= ON\n\n# Some distributions (old fedora) have incorrect dependencies for crypto\n# so we add -lz for them.\nLINKER_FLAGS=-lz\n\n# equivalent to: if $(uname_m) == x86_64 || $(uname_m) == amd64\n# Override HELIO_MARCH_OPT via environment: make HELIO_MARCH_OPT=\"-march=native\"\nifneq (, $(filter $(BUILD_ARCH),x86_64 amd64))\nHELIO_MARCH_OPT ?= -march=core2 -msse4.1 -mpopcnt -mtune=skylake\nendif\n\n# For release builds we link statically libstdc++ and libgcc. Currently,\n# all the release builds are performed by gcc.\nLINKER_FLAGS += -static-libstdc++ -static-libgcc\n\n# Optional ASAN support: make ASAN=1 release\nifdef ASAN\nSANITIZE_COMPILE_FLAGS = -fsanitize=address -Wno-maybe-uninitialized\nSANITIZE_LINK_FLAGS = -fsanitize=address\nendif\n\nHELIO_FLAGS = -DHELIO_RELEASE_FLAGS=\"-g\" \\\n\t\t\t  -DCMAKE_CXX_FLAGS=\"$(SANITIZE_COMPILE_FLAGS)\" \\\n\t\t\t  -DCMAKE_EXE_LINKER_FLAGS=\"$(LINKER_FLAGS) $(SANITIZE_LINK_FLAGS)\" \\\n              -DBoost_USE_STATIC_LIBS=$(HELIO_USE_STATIC_LIBS) \\\n              -DOPENSSL_USE_STATIC_LIBS=$(HELIO_OPENSSL_USE_STATIC_LIBS) \\\n              -DENABLE_GIT_VERSION=$(HELIO_ENABLE_GIT_VERSION) \\\n              -DWITH_SIMSIMD=$(WITH_SIMSIMD) \\\n              -DWITH_UNWIND=$(HELIO_WITH_UNWIND) -DMARCH_OPT=\"$(HELIO_MARCH_OPT)\"\n\n.PHONY: default\n\nconfigure:\n\tcmake -L -B $(RELEASE_DIR) -DCMAKE_BUILD_TYPE=Release -GNinja $(HELIO_FLAGS)\n\nbuild:\n\tcd $(RELEASE_DIR); \\\n\tninja dfly_bench dragonfly && ldd dragonfly\n\npackage:\n\tcd $(RELEASE_DIR); \\\n\ttar cvfz $(RELEASE_NAME)-dbgsym.tar.gz dragonfly ../LICENSE.md; \\\n\tobjcopy \\\n\t\t--remove-section=\".debug_*\" \\\n\t\t--remove-section=\"!.debug_line\" \\\n\t\t--compress-debug-sections \\\n\t\tdragonfly \\\n\t\t$(RELEASE_NAME); \\\n\ttar cvfz $(RELEASE_NAME).tar.gz $(RELEASE_NAME) ../LICENSE.md; \\\n\tobjcopy \\\n\t\t--remove-section=\".debug_*\" \\\n\t\t--remove-section=\"!.debug_line\" \\\n\t\t--compress-debug-sections \\\n\t\tdfly_bench \\\n\t\tdfly_bench-$(BUILD_ARCH); \\\n\ttar cvfz dfly_bench-$(BUILD_ARCH).tar.gz dfly_bench-$(BUILD_ARCH)\n\nrelease: configure build\n\ndefault: release\n"
  },
  {
    "path": "README.ja-JP.md",
    "content": "<p align=\"center\">\n  <a href=\"https://dragonflydb.io\">\n    <img  src=\"/.github/images/logo-full.svg\"\n      width=\"284\" border=\"0\" alt=\"Dragonfly\">\n  </a>\n</p>\n\n[![ci-tests](https://github.com/dragonflydb/dragonfly/actions/workflows/ci.yml/badge.svg)](https://github.com/dragonflydb/dragonfly/actions/workflows/ci.yml) [![Twitter URL](https://img.shields.io/twitter/follow/dragonflydbio?style=social)](https://twitter.com/dragonflydbio)\n\nその他の言語:  [English](README.md) [简体中文](README.zh-CN.md) [한국어](README.ko-KR.md) [Português](README.pt-BR.md)\n\n[Web サイト](https://www.dragonflydb.io/) • [ドキュメント](https://dragonflydb.io/docs) • [クイックスタート](https://www.dragonflydb.io/docs/getting-started) • [コミュニティ Discord](https://discord.gg/HsPjXGVH85) • [Dragonfly Forum](https://dragonfly.discourse.group/) • [Join the Dragonfly Community](https://www.dragonflydb.io/community)\n\n[GitHub Discussions](https://github.com/dragonflydb/dragonfly/discussions) • [GitHub Issues](https://github.com/dragonflydb/dragonfly/issues) • [コントリビュート](https://github.com/dragonflydb/dragonfly/blob/main/CONTRIBUTING.md)\n\n## 世界最速のインメモリデータストア\n\nDragonfly は最新のアプリケーションワークロードのために構築されたインメモリデータストアです。\n\nRedis や Memcached の API と完全に互換性があるため、Dragonfly を採用するためにコードを変更する必要はありません。従来のインメモリデータストアと比較して、Dragonfly は 25 倍のスループット、より低いテールレイテンシでより高いキャッシュヒット率、そして容易な垂直スケーラビリティを提供します。\n\n## コンテンツ\n\n- [ベンチマーク](#ベンチマーク)\n- [クイックスタート](https://github.com/dragonflydb/dragonfly/tree/main/docs/quick-start)\n- [コンフィグ](#コンフィグ)\n- [ロードマップとステータス](#ロードマップとステータス)\n- [デザイン決定](#デザイン決定)\n- [バックグラウンド](#バックグラウンド)\n\n## <a name=\"ベンチマーク\"><a/>ベンチマーク\n\n<img src=\"http://static.dragonflydb.io/repo-assets/aws-throughput.svg\" width=\"80%\" border=\"0\"/>\n\nベンチマークでは、Dragonfly は Redis と比較して 25 倍のスループットを示し、c6gn.16xlarge で 3.8M QPS を超えました。\n\nDragonfly のピークスループットにおける 99 パーセンタイルのレイテンシ指標:\n\n| op    | r6g   | c6gn  | c7g   |\n|-------|-------|-------|-------|\n| set   | 0.8ms | 1ms   | 1ms   |\n| get   | 0.9ms | 0.9ms | 0.8ms |\n| setex | 0.9ms | 1.1ms | 1.3ms |\n\n*すべてのベンチマークは `memtier_benchmark` (下記参照) を使い、スレッド数はサーバーとインスタンスタイプごとに調整しました。`memtier` は別の c6gn.16xlarge マシンで実行した。SETEX ベンチマークの有効期限は 500 に設定し、テストが終了しても有効であることを確認しました。*\n\n```bash\n  memtier_benchmark --ratio ... -t <threads> -c 30 -n 200000 --distinct-client-seed -d 256 \\\n     --expiry-range=...\n```\n\nパイプラインモード `--pipeline=30` では、Dragonfly は SET 操作で **10M QPS**、GET 操作で **15M QPS** に達する。\n\n### Dragonfly vs. Memcached\n\nAWS 上の c6gn.16xlarge インスタンスで Dragonfly と Memcached を比較した。\n\n同程度のレイテンシで、Dragonfly のスループットは Memcached のスループットを書き込みと読み込みの両方のワークロードで上回った。Dragonfly は、[Memcached の書き込みパス](docs/memcached_benchmark.md)での競合により、書き込みワークロードでより優れたレイテンシを示しました。\n\n#### SET ベンチマーク\n\n| Server    | QPS(thousands qps) | latency 99% | 99.9%   |\n|:---------:|:------------------:|:-----------:|:-------:|\n| Dragonfly |  🟩 3844           |🟩 0.9ms     | 🟩 2.4ms |\n| Memcached |   806              |   1.6ms     | 3.2ms    |\n\n#### GET ベンチマーク\n\n| Server    | QPS(thousands qps) | latency 99% | 99.9%   |\n|-----------|:------------------:|:-----------:|:-------:|\n| Dragonfly | 🟩 3717            |   1ms       | 2.4ms   |\n| Memcached |   2100             |  🟩 0.34ms  | 🟩 0.6ms |\n\n\nMemcached は読み取りベンチマークでより低いレイテンシを示したが、スループットも低かった。\n\n### メモリ効率\n\nメモリ効率をテストするために、`debug populate 5000000 key 1024` コマンドを使用して Dragonfly と Redis に ~5GB のデータを入れ、`memtier` コマンドで更新トラフィックを送信し、`bgsave` コマンドでスナップショットを開始しました。\n\nこの図は、各サーバがメモリ効率の面でどのような挙動を示したかを示している。\n\n<img src=\"http://static.dragonflydb.io/repo-assets/bgsave-memusage.svg\" width=\"70%\" border=\"0\"/>\n\nDragonfly はアイドル状態では Redis よりも 30% メモリ効率が高く、スナップショットフェーズではメモリ使用量の目に見える増加は見られなかった。ピーク時には Redis のメモリ使用量は Dragonfly の 3 倍近くまで増加しました。\n\nDragonfly はスナップショットをより早く、数秒以内に終了させました。\n\nDragonfly のメモリ効率の詳細については、[Dashtable ドキュメント](/docs/dashtable.md)を参照してください。\n\n\n\n## <a name=\"コンフィグ\"><a/>コンフィグ\n\nDragonfly は一般的な Redis の引数をサポートしています。例えば `dragonfly --requirepass=foo --bind localhost`。\n\nDragonfly は現在、以下の Redis 固有の引数をサポートしています:\n * `port`： Redis 接続ポート (`default: 6379`).\n * `bind`： ローカルホストからの接続のみを許可する場合は `localhost` を、**その IP** アドレスへの接続 (つまり外部からの接続) を許可する場合はパブリック IP アドレスを指定する。\n * `requirepass`： AUTH 認証用のパスワード (`default: \"\"`)。\n * `maxmemory`： データベースが使用するメモリの上限 (人間が読めるバイト数) (`default: 0`)。 `maxmemory` に `0` を指定すると、プログラムが自動的に最大メモリ使用量を決定する。\n * `dir`： Dragonfly Docker はデフォルトで `/data` フォルダをスナップショットに使用し、CLI は `\"\"` を使用する。`v` の Docker オプションでホストフォルダにマッピングできる。\n * `dbfilename`： データベースを保存・ロードするファイル名 (`default: dump`).\n\nDragonfly 特有の議論もある:\n * `memcached_port`: Memcached 互換 API を有効にするポート (`default: disabled`)。\n * `keys_output_limit`: `keys` コマンドで返されるキーの最大数（`default: 8192`）。`keys` は危険なコマンドであることに注意してください。あまりに多くのキーを取得するとメモリ使用量が増大するため、結果を切り捨てています。\n * `dbnum`: `select` でサポートされるデータベースの最大数。\n * `cache_mode`: 以下の[斬新なキャッシュデザイン](#斬新なキャッシュデザイン)のセクションを参照してください。\n * `hz`: キーの有効期限評価頻度 (`default: 100`)。この頻度が低いと、アイドル時の CPU 使用量が少なくなるが、その分古くなったキーをクリアする速度が遅くなる。\n * `primary_port_http_enabled`: もし `true` (`default: true`) なら、メイン TCP ポートで HTTP コンソールにアクセスできるようにする。\n * `admin_port`: 割り当てられたポートのコンソールへの管理者アクセスを有効にする(`default: disabled`)。HTTP と RESP プロトコルの両方をサポートする。\n * `admin_bind`: 管理コンソールの TCP 接続を指定されたアドレスにバインドする(`default: any`)。HTTP と RESP の両方のプロトコルをサポートする。\n * `admin_nopass`: 割り当てられたポートで、認証トークンなしでコンソールへのオープン管理アクセスを有効にする (`default: false`)。HTTP と RESP の両方のプロトコルをサポートする。\n * `cluster_mode`: サポートするクラスターモード (`default: \"\"`)。現在は `emulated` のみをサポートしている。\n * `cluster_announce_ip`: クラスタコマンドがクライアントにアナウンスする IP。\n\n### 一般的なオプションを使用した開始スクリプトの例:\n\n```bash\n./dragonfly-x86_64 --logtostderr --requirepass=youshallnotpass --cache_mode=true -dbnum 1 --bind localhost --port 6379  --maxmemory=12gb --keys_output_limit=12288 --dbfilename dump.rdb\n```\n\nまた、`dragonfly --flagfile <filename>` を実行することで、設定ファイルから引数を指定することもできる。ファイルには 1 行に 1 つのフラグを記述し、キーと値のフラグには空白の代わりに等号を記述します。\n\nログの管理や TLS のサポートなど、その他のオプションについては `dragonfly --help` を実行してください。\n\n## <a name=\"ロードマップとステータス\"><a/>ロードマップとステータス\n\nDragonfly は現在、~185 個の Redis コマンドと、`cas` 以外のすべての Memcached コマンドをサポートしている。ほぼ Redis 5 API と同等ですが、Dragonfly の次のマイルストーンは基本的な機能を安定させ、レプリケーション API を実装することです。まだ実装されていないコマンドで必要なものがあれば、issue を開いてください。\n\nDragonfly ネイティブのレプリケーションについては、桁違いに高速な分散ログフォーマットを設計中です。\n\nレプリケーション機能に続いて、Redis バージョン 3-6 の API に不足しているコマンドを追加していく予定です。\n\n現在 Dragonfly がサポートしているコマンドについては、[コマンドリファレンス](https://dragonflydb.io/docs/category/command-reference)をご覧ください。\n\n## <a name=\"デザイン決定\"><a/> デザイン決定\n\n### 斬新なキャッシュデザイン\n\nDragonfly には、シンプルでメモリ効率の良い、単一の統一された適応型キャッシュアルゴリズムがあります。\n\n`cache_mode=true` フラグを渡すことでキャッシュモードを有効にすることができます。このモードをオンにすると、Dragonfly は将来つまずく可能性が最も低いアイテムを退避させますが、`maxmemory` の限界に近づいたときのみ退避させます。\n\n### 比較的正確な有効期限\n\n有効期限は 8 年以内。\n\nミリ秒精度の有効期限（PEXPIRE、PSETEX など）は、**2^28ms** を超える期限については、最も近い秒に丸められます。この誤差は 0.001% 以下であり、大きな範囲であれば許容範囲となります。\n\nDragonfly の期限と Redis の実装の詳細な違いについては、[こちら](docs/differences.md)を参照してください。\n\n### ネイティブ HTTP コンソールと Prometheus 互換メトリクス\n\nデフォルトでは、Dragonfly はメイン TCP ポート(6379)経由での HTTP アクセスを許可しています。その通り、Redis プロトコル経由でも HTTP プロトコル経由でも Dragonfly に接続することができます。ブラウザで試してみてください。HTTP アクセスには現在あまり情報がありませんが、将来的にはデバッグや管理に役立つ情報が含まれるようになる予定です。\n\nPrometheus 互換のメトリクスを見るには、URL `:6379/metrics` にアクセスしてください。\n\nPrometheus からエクスポートされたメトリクスは Grafana ダッシュボードと互換性があります[こちらを参照](tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json)。\n\n\n重要です！HTTP コンソールは安全なネットワーク内でアクセスすることを想定しています。Dragonfly の TCP ポートを外部に公開する場合は、`--http_admin_console=false` または `--nohttp_admin_console` でコンソールを無効にすることをお勧めします。\n\n\n## <a name=\"バックグラウンド\"><a/>バックグラウンド\n\nDragonfly は、インメモリデータストアを 2022 年に設計したらどのようになるかという実験から始まりました。メモリストアのユーザーとして、またクラウド企業で働いたエンジニアとしての経験から学んだ教訓をもとに、Dragonfly では 2 つの重要な特性を維持する必要があると考えました: それは、すべてのオペレーションにおける原子性の保証と、非常に高いスループットにおけるミリ秒以下の低レイテンシーです。\n\n私たちの最初の課題は、パブリッククラウドで現在利用可能なサーバーを使用して、CPU、メモリー、I/O リソースをフルに活用する方法でした。これを解決するために、私たちは[シェアードナッシングアーキテクチャ](https://en.wikipedia.org/wiki/Shared-nothing_architecture)を使用しています。このアーキテクチャでは、各スレッドが辞書データのスライスを独自に管理できるように、スレッド間でメモリストアの鍵空間を分割することができます。これらのスライスを \"shards\" と呼ぶ。シェアードナッシングアーキテクチャのスレッドと I/O 管理のためのライブラリは、[こちら](https://github.com/romange/helio)でオープンソースで提供されています。\n\n複数キー操作に対する原子性保証を提供するために、我々は最近の学術研究の進歩を利用している。Dragonfly のトランザクションフレームワークの開発には、論文 [\"VLL: a lock manager redesign for main memory database systems\"](https://www.cs.umd.edu/~abadi/papers/vldbj-vll.pdf) を選びました。シェアードナッシングアーキテクチャと VLL の選択により、ミューテックスやスピンロックを使用せずにアトミックなマルチキー操作を構成することができました。これは我々の PoC にとって大きなマイルストーンであり、その性能は他の商用やオープンソースのソリューションよりも際立っていました。\n\n私たちの第二の課題は、新しいストアのために、より効率的なデータ構造を設計することだった。この目標を達成するために、我々は論文 [\"Dash: Scalable Hashing on Persistent Memory\"](https://arxiv.org/pdf/2003.07302.pdf) に基づいたハッシュテーブル構造を核とした。この論文自体は、永続メモリ領域を中心にしており、メインメモリストアとは直接関係ありませんが、それでも私たちの問題に最も当てはまります。この論文で提案されているハッシュテーブル設計により、Redis の辞書に存在する 2 つの特別な特性を維持することができました: それは、データストアの成長中にハッシュをインクリメンタルする機能と、ステートレススキャン操作を使って変更中の辞書をトラバースする機能です。これら2つの特性に加え、Dash は CPU とメモリの使用効率が高い。Dash の設計を活用することで、私たちは以下のような機能をさらに革新することができました:\n * TTL レコードの効率的なレコード期限切れ。\n * LRU や LFU のような他のキャッシュ戦略よりも高いヒット率を、**ゼロメモリオーバーヘッド** で達成する新しいキャッシュエビクションアルゴリズム。\n * 新しい **フォークレス** スナップショットアルゴリズム。\n\nDragonfly の基盤を構築し、[そのパフォーマンスに満足したら](#ベンチマーク)、Redis と Memcached の機能を実装していきました。現在までに 185 個の Redis コマンド（Redis 5.0 API とほぼ同等）と 13 個の Memcached コマンドを実装しました。\n\nそして最後に、<br>\n<em>私たちの使命は、最新のハードウェアの進歩を活用した、クラウドワークロード向けの、優れた設計、超高速、コスト効率の良いインメモリデータストアを構築することです。現在のソリューションの API と提案を維持しながら、その問題点を解決するつもりです。</em>\n"
  },
  {
    "path": "README.ko-KR.md",
    "content": "<p align=\"center\">\n  <a href=\"https://dragonflydb.io\">\n    <img  src=\"/.github/images/logo-full.svg\"\n      width=\"284\" border=\"0\" alt=\"Dragonfly\">\n  </a>\n</p>\n\n[![ci-tests](https://github.com/dragonflydb/dragonfly/actions/workflows/ci.yml/badge.svg)](https://github.com/dragonflydb/dragonfly/actions/workflows/ci.yml) [![Twitter URL](https://img.shields.io/twitter/follow/dragonflydbio?style=social)](https://twitter.com/dragonflydbio)\n\n다른 언어 번역본:  [English](README.zh-CN.md) [简体中文](README.zh-CN.md) [日本語](README.ja-JP.md) [Português](README.pt-BR.md)\n\n[Website](https://www.dragonflydb.io/) • [Docs](https://dragonflydb.io/docs) • [Quick Start](https://www.dragonflydb.io/docs/getting-started) • [Community Discord](https://discord.gg/HsPjXGVH85) • [Dragonfly Forum](https://dragonfly.discourse.group/) • [Join the Dragonfly Community](https://www.dragonflydb.io/community)\n\n[GitHub Discussions](https://github.com/dragonflydb/dragonfly/discussions) • [GitHub Issues](https://github.com/dragonflydb/dragonfly/issues) • [Contributing](https://github.com/dragonflydb/dragonfly/blob/main/CONTRIBUTING.md) • [Dragonfly Cloud](https://www.dragonflydb.io/cloud)\n\n## 세상에서 가장 빠른 인-메모리 스토어\n\nDragonfly는 현대 애플리케이션 작업을 위한 인-메모리 데이터스토어입니다.\n\nDragonfly는 Redis와 Memcached API와 완벽하게 호환되며, 이를 적용하기 위한 코드 변경을 필요로 하지 않습니다. Dragonfly는 기존 레거시 인-메모리 데이터스토어와 비교하여 25배 이상의 높은 처리량과 캐시 히트율, 낮은 꼬리 지연시간을 갖고있으며 간편한 수직 확장성을 지니고 있습니다.\n\n## 콘텐츠\n\n- [벤치마크](#benchmarks)\n- [빠른 시작](https://github.com/dragonflydb/dragonfly/tree/main/docs/quick-start)\n- [설정](#configuration)\n- [로드맵과 상태](#roadmap-status)\n- [설계 의사결정](#design-decisions)\n- [개발 배경](#background)\n\n## <a name=\"benchmarks\"><a/>벤치마크\n\n<img src=\"http://static.dragonflydb.io/repo-assets/aws-throughput.svg\" width=\"80%\" border=\"0\"/>\n\n벤치마크에 따르면, Dragonfly는 레디스와 비교하여 처리량이 25배이상 증가하였고, c6gn.16xlarge 인스턴스에서 3.8M QPS를 돌파하였음을 보여줍니다.\n\nDragonfly의 피크 처리량에서의 99퍼센트 지연 시간 지표:\n\n| op    | r6g   | c6gn  | c7g   |\n|-------|-------|-------|-------|\n| set   | 0.8ms | 1ms   | 1ms   |\n| get   | 0.9ms | 0.9ms | 0.8ms |\n| setex | 0.9ms | 1.1ms | 1.3ms |\n\n*모든 벤치마크는 서버 및 인스턴스 유형별로 조정된 스레드 수를 사용하여 `memtier_benchmark`(아래를 참고) 수행되었습니다. `memtier`는 별도의 c6gn.16xlarge 머신에서 실행되었습니다. 저희는 테스트 종료 이후에도 유효하게 유지되도록 보장하기 위해 SETEX 벤치마크의 만료 시간을 500으로 설정하였습니다.*\n\n```bash\n  memtier_benchmark --ratio ... -t <threads> -c 30 -n 200000 --distinct-client-seed -d 256 \\\n     --expiry-range=...\n```\n\n파이프라인 모드에서 `--pipeline=30`은 Dragonfly가 SET 연산으로 **10M QPS**, GET 연산으로 **15M QPS**에 도달할 수 있음을 나타냅니다.\n\n### Dragonfly vs. Memcached\n\n저희는 AWS의 c6gn.16xlarge 인스턴스에서 Dragonfly와 Memcached를 비교하는 작업을 수행했습니다.\n\n비슷한 지연시간을 가진 상황에서, Dragonfly의 처리량은 쓰기 및 읽기 작업 모두에서 Memcached보다 성능이 뛰어났습니다. 쓰기 작업에서는 [Memcached의 쓰기 경로](docs/memcached_benchmark.md)에서의 경합으로 인하여 Dragonfly가 보다 적은 지연시간을 보였다는 점이 입증되었습니다.\n\n#### SET 벤치마크\n\n| Server    | QPS(thousands qps) | latency 99% | 99.9%   |\n|:---------:|:------------------:|:-----------:|:-------:|\n| Dragonfly |  🟩 3844           |🟩 0.9ms     | 🟩 2.4ms |\n| Memcached |   806              |   1.6ms     | 3.2ms    |\n\n#### GET 벤치마크\n\n| Server    | QPS(thousands qps) | latency 99% | 99.9%   |\n|-----------|:------------------:|:-----------:|:-------:|\n| Dragonfly | 🟩 3717            |   1ms       | 2.4ms   |\n| Memcached |   2100             |  🟩 0.34ms  | 🟩 0.6ms |\n\nMemcached는 읽기 벤치마크의 지연 시간은 적었지만, 처리량도 낮았습니다.\n\n### 메모리 효율\n\n메모리 효율을 테스트하기 위해서, 저희는 `debug populate 5000000 key 1024` 명령어를 활용하여 Dragonfly와 Redis에 ~5GB 정도의 데이터를 채운 후, `memtier` 를 통하여 업데이트 트래픽을 전송한 후, `bgsave` 명령을 통하여 스냅샷을 시작했습니다.\n\n이 그림은 메모리 효율 측면에서 각 서버가 어떻게 동작했는지 보여줍니다.\n\n<img src=\"http://static.dragonflydb.io/repo-assets/bgsave-memusage.svg\" width=\"70%\" border=\"0\"/>\n\nDragonfly는 유휴 상태에서 Redis보다 메모리 효율이 30% 더 좋았으며, 스냅샷 단계에서 메모리 사용량이 눈에 띄게 증가하지 않았습니다. Redis는 고점에서 Dragonfly에 비해 메모리 사용량이 약 3배 증가하였습니다.\n\nDragonfly는 스냅샷 단계를 몇 초안에 더 빨리 마쳤습니다.\n\nDragonfly의 메모리 효율에 대한 정보가 더 필요하시다면, 저희의 [Dashtable 문서](/docs/dashtable.md)를 참고하시기 바랍니다.\n\n\n## <a name=\"configuration\"><a/>설정\n\nDragonfly는 적용 가능한 Redis 인수를 지원합니다. 예를 들면, `dragonfly --requirepass=foo --bind localhost`와 같은 명령어를 사용할 수 있습니다.\n\nDragonfly는 현재 아래와 같은 Redis 인수들을 지원합니다 :\n  * `port`: Redis 연결 포트 (`기본값: 6379`).\n  * `bind`: `localhost`를 사용하여 로컬호스트 연결만 허용하거나 공용 IP 주소를 사용하여 해당 IP 주소에 연결을 허용합니다.(즉, 외부에서도 가능)\n  * `requirepass`: AUTH 인증을 위한 패스워드 (`기본값: \"\"`).\n  * `maxmemory`: 데이터베이스에서 사용하는 최대 메모리 제한(사람이 읽을 수 있는 바이트 단위) (`기본값: 0`). `maxmemory` 의 값이 `0` 이면 프로그램이 최대 메모리 사용량을 자동으로 결정합니다.\n  * `dir`: Dragonfly Docker는 스냅샷을 위해 기본적으로 `/data` 폴더를 사용하고, CLI은 `\"\"`을 사용합니다. Docker 옵션인 `-v` 을 통해서 호스트 폴더에 매핑할 수 있습니다.\n  * `dbfilename`: 저장하고 불러올 데이터베이스 파일 이름 (`기본값: dump`).\n\n아래는 Dragonfly 전용 인수 입니다 :\n  * `memcached_port`: Memcached 호환 API를 위한 포트 (`기본값: disabled`).\n  * `keys_output_limit`: `keys` 명령을 통해 반환 되는 최대 키의 수 (`기본값: 8192`). `keys` 명령은 위험하기 때문에, 너무 많은 키를 가져올 때 메모리 사용량이 급증하지 않도록 결과를 해당 인수만큼 잘라냅니다.\n  * `dbnum`: `select` 명령에 대해 지원되는 최대 데이터베이스 수.\n  * `cache_mode`: 아래의 섹션 [새로운 캐시 설계](#novel-cache-design)을 참고해주시기 바랍니다.\n  * `hz`: 키가 만료되었는지를 판단하는 빈도(`기본값: 100`). 낮은 빈도는 키 방출이 느려지는 대신, 유휴 상태일 때 CPU 사용량을 줄입니다.\n  * `primary_port_http_enabled`: `true` 인 경우 HTTP 콘솔로 메인 TCP 포트 접근을 허용합니다. (`기본값: true`).\n  * `admin_port`: 할당된 포트에서 관리자 콘솔 접근을 활성화합니다. (`기본값: disabled`). HTTP와 RESP 프로토콜 모두를 지원합니다.\n  * `admin_bind`: 주어진 주소에 관리자 콘솔 TCP 연결을 바인딩합니다. (`기본값: any`). HTTP와 RESP 프로토콜 모두를 지원합니다.\n  * `admin_nopass`: 할당된 포트에 대해서 인증 토큰 없이 관리자 콘솔 접근을 활성화합니다. (`default: false`). HTTP와 RESP 프로토콜 모두를 지원합니다.\n  * `cluster_mode`: 클러스터 모드가 지원됩니다. (`기본값: \"\"`). 현재는`emulated` 만 지원합니다.\n  * `cluster_announce_ip`: 클러스터 명령을 클라이언트에게 알리는 IP 주소.\n\n\n### 주요 옵션을 활용한 실행 스크립트 예시:\n\n```bash\n./dragonfly-x86_64 --logtostderr --requirepass=youshallnotpass --cache_mode=true -dbnum 1 --bind localhost --port 6379  --maxmemory=12gb --keys_output_limit=12288 --dbfilename dump.rdb\n```\n\n인수들은 `dragonfly --flagfile <filename>`을 실행하여 설정 파일을 통해서도 전달할 수 있습니다. 전달될 파일은 각 줄에 키-값 형태의 플래그 나열 하기위해 등호를 사용합니다.\n\n로그 관리나 TLS 지원과 같은 추가 옵션을 확인하고 싶다면, `dragonfly --help` 를 실행해보시길 바랍니다.\n\n## <a name=\"roadmap-status\"><a/>로드맵과 상태\n\nDragonfly는 현재 ~185개의 Redis 명령어들과 `cas` 뿐만 아니라 모든 Memcached 명령어를 지원합니다. 이는 거의 Redis 5 API와 동등하며, Dragonfly의 다음 마일스톤은 기본 기능 을 안정화하고 복제 API를 구현하는 것입니다. 아직 구현되지 않은 필요한 명령가 있다면, 이슈를 오픈해주세요.\n\nDraginfly 고유 복제기능을 위해, 저희는 몇 배 높은 속도를 지원할 수 있는 분산 로그 형식을 설계하고 있습니다.\n\n복제 기능을 추가한 뒤에 저희는 Redis 3-6 API에 해당되는 누락 명령어들을 계속 추가할 예정입니다.\n\nDragonfly에 의해 현재 지원되는 명령어를 확인하기 위해서 [명령어 레퍼런스](https://dragonflydb.io/docs/category/command-reference)를 참고해주시기 바랍니다.\n\n## <a name=\"design-decisions\"><a/>설계 의사결정\n\n### 새로운 캐시 설계\n\nDragonfly는 단순하고 메모리 효율적인 단일, 통합, 적응형 캐싱 알고리즘을 제공합니다.\n\n`--cache_mode=true` 플래그를 전달하여 캐싱 모드를 활성화할 수 있습니다. 이 모드가 활성화되면, Dragonfly는 `maxmemory` 한도에 가까워질 때만, 미래에 재사용 될 가능성이 가장 낮은 항목을 방출합니다.\n\n### 상대적인 정확성을 가진 만료 기한\n\n만료 범위는 약 ~8년으로 제한됩니다.\n\n밀리초 단위의 정밀한 만료 기한(PEXPIRE, PSETEX, 등)은 **2^28ms보다 큰 기한에 대해** 가장 가까운 초로 반올림됩니다. 이는 0.001% 미만의 오차를 가지며, 큰 범위에 대해 적용될 때는 수용 가능한 수준입니다. 만약 이런 방식이 사용사례에 적합하지 않다면, 문의를 주시거나 해당 사용사례를 설명하는 이슈를 오픈해주세요.\n\nDragonfly와 Redis의 만료 기한에 대한 구현의 차이는 [여기서 확인하실 수 있습니다](docs/differences.md).\n\n### 네이티브 HTTP 콘솔과 Prometheus 호환 매트릭\n\n기본적으로, Dragonfly는 메인 TCP 포트(6379)에 HTTP 접근을 허용합니다. 즉, Redis 프로토콜과 HTTP 프로토콜 모두를 통해 Dragonfly에 연결할 수 있습니다. - 서버는 연결 초기화 과정에서 프로토콜을 자동으로 인식합니다. 웹 브라우저를 통하여 시도해보시기 바랍니다. 현재 HTTP 접근은 많은 정보를 제공하지 않지만, 유용한 디버깅 및 관리 정보를 향후 추가할 예정입니다.\n\n`:6379/metrics` 에 접근하게 되면, Prometheus 호환 매트릭을 확인할 수 있습니다.\n\nPrometheus에서 내보내는 매트릭들은 Grafana 대시보드와 호환됩니다. 자세한 내용은 [여기](tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json)를 참조해주세요.\n\n중요! HTTP 노솔은 안전한 네트워크 내에서 접근하도록 설계되었습니다. Dragonfly의 TCP 포트를 외부로 노출한다면, `--http_admin_console=false` 혹은 `--nohttp_admin_console`과 같은 인수를 활용하여 콘솔을 비활성화하는 것을 조언해드립니다.\n\n\n## <a name=\"background\"><a/>개발배경\n\nDragonfly는 2022년에 인-메모리 데이터스토어를 설계한다면 어땠을까에 대한 실험으로 시작되었습니다. 클라우드 회사에서 근무한 엔지니어 및 메모리 스토어 사용자의 경험을 바탕으로, 저희는 Dragonfly에 핵심적인 두 가지 핵심 특성을 보존해야함을 알았습니다: 모든 작업에 대한 원자성 보장과 매우 높은 처리량에 대한 밀리초 이하의 낮은 지연 시간을 보장하는 것이었습니다.\n\n첫 번째 문제는 오늘날 퍼블릭 클라우드 환경에서 사용 가능한 서버를 사용하여 CPU, 메모리 및 I/O 자원을 어떻게 최대한 활용할 수 있을지였습니다. 이 문제를 해결하기 위해 저희는 [비공유 아키텍처(Shared Nothing Architecture)](https://en.wikipedia.org/wiki/Shared-nothing_architecture)를 사용했습니다. 이는 저희가 메모리 스토어의 각 스레드 사이의 키 공간을 분할할 수 있게하였습니다. 이를 통해 각 스레드들은 그들의 딕셔너리 데이터들의 조각을 관리할 수 있게 되었습니다. 저희는 이 조각들을 \"샤드(shards)\"라 불렀습니다. 비공유 아키텍처에 대한 스레드 및 I/O 관리를 위한 라이브러리는 [여기](https://github.com/romange/helio)에서 오픈소스로 제공됩니다.\n\n멀티-키 작업에 대한 원자성 보장을 위해, Dragonfly의 트랜잭션 프레임워크를 개발하기 위해 저희는 최근 학계의 연구 발전을 활용했고 [\"VLL: a lock manager redesign for main memory database systems”](https://www.cs.umd.edu/~abadi/papers/vldbj-vll.pdf) 논문을 채택했습니다. 비공유 아키텍처와 VLL의 선택은 우리가 뮤텍스나 스핀락을 사용하지 않고도 원자적 멀티-키 작업을 구성할 수 있게 했습니다.\n이것은 저희의 PoC에 있어서 주요한 마일스톤이었고, 그 성능은 다른 상용 및 오픈소스 솔루션보다 성능이 뛰어났습니다.\n\n두 번째 문제는 새로운 저장소를 위하여 더 효율적인 데이터 구조를 설계하는 것이었습니다. 이 목표를 달성하기 위해서 저희는 핵심 해시테이블 구조를 [\"Dash: Scalable Hashing on Persistent Memory\"](https://arxiv.org/pdf/2003.07302.pdf) 논문을 기반으로 작업했습니다. 이 논문은 영속적인 메모리 도메인을 중심으로 다루며, 이는 메인-메모리 저장소와 직접적인 연관관계는 없었습니다. 하지만 여전히 저희 문제를 해결하기 위해서 가장 적합했습니다. 해당 논문의 제안된 해시테이블 설계는 저희가 레디스 딕셔너리에 표현된 두 가지 특별한 특성을 유지 가능하게 해줬습니다: 데이터스토어 확장 중 증분 해싱 기능과 상태 없는 스캔 작업을 사용하여 변화하는 딕셔너리를 순회하는 능력이었습니다. 이 두 가지 속성 외에도 Dash는 CPU와 메모리 사용에서 더 효율적입니다. 저희는 다음과 같은 기능들로 더욱 혁신할 수 있었습니다:\n * TTL 레코드에 대한 효율적인 만료 처리\n * LRU와 LFU 같은 다른 캐시 전략보다 더 높은 히트율을 달성하는 새로운 캐시 방출 알고리즘과 **제로 메모리 오버헤드**.\n * 새로운 **fork-less** 스냅샷 알고리즘.\n\n저희는 Dragonfly의 기반을 구축하고 성능에 만족하게 되었을 때, Redis와 Memcached의 기능을 구현하기 시작했습니다. 저희는 약 185개의 Redis 명령(대략적으로 Redis 5.0 API와 동등)과 13개의 Memecached 명령을 구현했습니다.\n\n마지막으로, <br>\n<em>저희의 임무는 최신 하드웨어 발전을 활용하는 클라우드 작업을 위한 멋진 설계와 초고속 처리량 그리고 비용효율적인 인-메모리 데이터스토어를 만드는 것입니다. 저희는 현재 솔루션의 제품 API들이나 제안을 유지하면서 당면 과제를 해결하고자 합니다.</em>\n"
  },
  {
    "path": "README.md",
    "content": "<p align=\"center\">\n  <a href=\"https://dragonflydb.io\">\n    <img  src=\"/.github/images/logo-full.svg\"\n      width=\"284\" border=\"0\" alt=\"Dragonfly\">\n  </a>\n</p>\n\n[![ci-tests](https://github.com/dragonflydb/dragonfly/actions/workflows/ci.yml/badge.svg)](https://github.com/dragonflydb/dragonfly/actions/workflows/ci.yml) [![Twitter URL](https://img.shields.io/twitter/follow/dragonflydbio?style=social)](https://twitter.com/dragonflydbio)\n\n> Before moving on, please consider giving us a GitHub star ⭐️. Thank you!\n\nOther languages:  [简体中文](README.zh-CN.md) [日本語](README.ja-JP.md) [한국어](README.ko-KR.md) [Português](README.pt-BR.md)\n\n[Website](https://www.dragonflydb.io/) • [Docs](https://dragonflydb.io/docs) • [Quick Start](https://www.dragonflydb.io/docs/getting-started) • [Community Discord](https://discord.gg/HsPjXGVH85) • [Dragonfly Forum](https://dragonfly.discourse.group/) • [Join the Dragonfly Community](https://www.dragonflydb.io/community)\n\n[GitHub Discussions](https://github.com/dragonflydb/dragonfly/discussions) • [GitHub Issues](https://github.com/dragonflydb/dragonfly/issues) • [Contributing](https://github.com/dragonflydb/dragonfly/blob/main/CONTRIBUTING.md) • [AI Agents Guide](AGENTS.md) • [Dragonfly Cloud](https://www.dragonflydb.io/cloud)\n\n## The world's most efficient in-memory data store\n\nDragonfly is an in-memory data store built for modern application workloads.\n\nFully compatible with Redis and Memcached APIs, Dragonfly requires no code changes to adopt. Compared to legacy in-memory datastores, Dragonfly delivers 25X more throughput, higher cache hit rates with lower tail latency, and can run on up to 80% less resources for the same sized workload.\n\n## Contents\n\n- [Benchmarks](#benchmarks)\n- [Quick start](https://github.com/dragonflydb/dragonfly/tree/main/docs/quick-start)\n- [Configuration](#configuration)\n- [Roadmap and status](#roadmap-status)\n- [Design decisions](#design-decisions)\n- [Background](#background)\n- [Build from source](./docs/build-from-source.md)\n\n## <a name=\"benchmarks\"><a/>Benchmarks\n\nWe first compare Dragonfly with Redis on `m5.large` instance which is commonly used to run Redis\ndue to its single-threaded architecture. The benchmark program runs from another\nload-test instance (c5n) in the same AZ using `memtier_benchmark  -c 20 --test-time 100 -t 4 -d 256 --distinct-client-seed`\n\nDragonfly shows a comparable performance:\n\n1. SETs (`--ratio 1:0`):\n\n|  Redis                                   |      DF                                |\n| -----------------------------------------|----------------------------------------|\n| QPS: 159K, P99.9: 1.16ms, P99: 0.82ms    | QPS:173K, P99.9: 1.26ms, P99: 0.9ms    |\n|                                          |                                        |\n\n2. GETs (`--ratio 0:1`):\n\n|  Redis                                  |      DF                                |\n| ----------------------------------------|----------------------------------------|\n| QPS: 194K, P99.9: 0.8ms, P99: 0.65ms    | QPS: 191K, P99.9: 0.95ms, P99: 0.8ms   |\n\nThe benchmark above shows that the algorithmic layer inside DF that allows it to scale vertically\ndoes not take a large toll when running single-threaded.\n\nHowever, if we take a bit stronger instance (m5.xlarge), the gap between DF and Redis starts growing.\n(`memtier_benchmark  -c 20 --test-time 100 -t 6 -d 256 --distinct-client-seed`):\n1. SETs (`--ratio 1:0`):\n\n|  Redis                                  |      DF                                |\n| ----------------------------------------|----------------------------------------|\n| QPS: 190K, P99.9: 2.45ms, P99: 0.97ms   |  QPS: 279K , P99.9: 1.95ms, P99: 1.48ms|\n\n2. GETs (`--ratio 0:1`):\n\n|  Redis                                  |      DF                                |\n| ----------------------------------------|----------------------------------------|\n| QPS: 220K, P99.9: 0.98ms , P99: 0.8ms   |  QPS: 305K, P99.9: 1.03ms, P99: 0.87ms |\n\n\nDragonfly throughput capacity continues to grow with instance size,\nwhile single-threaded Redis is bottlenecked on CPU and reaches local maxima in terms of performance.\n\n<img src=\"http://static.dragonflydb.io/repo-assets/aws-throughput.svg\" width=\"80%\" border=\"0\"/>\n\nIf we compare Dragonfly and Redis on the most network-capable instance c6gn.16xlarge,\nDragonfly showed a 25X increase in throughput compared to Redis single process, crossing 3.8M QPS.\n\nDragonfly's 99th percentile latency metrics at its peak throughput:\n\n| op    | r6g   | c6gn  | c7g   |\n|-------|-------|-------|-------|\n| set   | 0.8ms | 1ms   | 1ms   |\n| get   | 0.9ms | 0.9ms | 0.8ms |\n| setex | 0.9ms | 1.1ms | 1.3ms |\n\n*All benchmarks were performed using `memtier_benchmark` (see below) with number of threads tuned per server and instance type. `memtier` was run on a separate c6gn.16xlarge machine. We set the expiry time to 500 for the SETEX benchmark to ensure it would survive the end of the test.*\n\n```bash\n  memtier_benchmark --ratio ... -t <threads> -c 30 -n 200000 --distinct-client-seed -d 256 \\\n     --expiry-range=...\n```\n\nIn pipeline mode `--pipeline=30`, Dragonfly reaches **10M QPS** for SET and **15M QPS** for GET operations.\n\n### Dragonfly vs. Memcached\n\nWe compared Dragonfly with Memcached on a c6gn.16xlarge instance on AWS.\n\nWith a comparable latency, Dragonfly throughput outperformed Memcached throughput in both write and read workloads. Dragonfly demonstrated better latency in write workloads due to contention on the [write path in Memcached](docs/memcached_benchmark.md).\n\n#### SET benchmark\n\n| Server    | QPS(thousands qps) | latency 99% | 99.9%   |\n|:---------:|:------------------:|:-----------:|:-------:|\n| Dragonfly |  🟩 3844           |🟩 0.9ms     | 🟩 2.4ms |\n| Memcached |   806              |   1.6ms     | 3.2ms    |\n\n#### GET benchmark\n\n| Server    | QPS(thousands qps) | latency 99% | 99.9%   |\n|-----------|:------------------:|:-----------:|:-------:|\n| Dragonfly | 🟩 3717            |   1ms       | 2.4ms   |\n| Memcached |   2100             |  🟩 0.34ms  | 🟩 0.6ms |\n\n\nMemcached exhibited lower latency for the read benchmark, but also lower throughput.\n\n### Memory efficiency\n\nTo test memory efficiency, we filled Dragonfly and Redis with ~5GB of data using the `debug populate 5000000 key 1024` command, sent update traffic with `memtier`, and kicked off the snapshotting with the `bgsave` command.\n\nThis figure demonstrates how each server behaved in terms of memory efficiency.\n\n<img src=\"http://static.dragonflydb.io/repo-assets/bgsave-memusage.svg\" width=\"70%\" border=\"0\"/>\n\nDragonfly was 30% more memory efficient than Redis in the idle state and did not show any visible increase in memory use during the snapshot phase. At peak, Redis memory use increased to almost 3X that of Dragonfly.\n\nDragonfly finished the snapshot faster, within a few seconds.\n\nFor more info about memory efficiency in Dragonfly, see our [Dashtable doc](/docs/dashtable.md).\n\n\n\n## <a name=\"configuration\"><a/>Configuration\n\nDragonfly supports common Redis arguments where applicable. For example, you can run: `dragonfly --requirepass=foo --bind localhost`.\n\nDragonfly currently supports the following Redis-specific arguments:\n * `port`: Redis connection port (`default: 6379`).\n * `bind`: Use `localhost` to only allow localhost connections or a public IP address to allow connections **to that IP** address (i.e. from outside too). Use `0.0.0.0` to allow all IPv4.\n * `requirepass`: The password for AUTH authentication (`default: \"\"`).\n * `maxmemory`: Limit on maximum memory (in human-readable bytes) used by the database (`default: 0`). A `maxmemory` value of `0` means the program will automatically determine its maximum memory usage.\n * `dir`: Dragonfly Docker uses the `/data` folder for snapshotting by default, the CLI uses `\"\"`. You can use the `-v` Docker option to map it to your host folder.\n * `dbfilename`: The filename to save and load the database (`default: dump`).\n\nThere are also some Dragonfly-specific arguments:\n * `memcached_port`: The port to enable Memcached-compatible API on (`default: disabled`).\n * `keys_output_limit`: Maximum number of returned keys in `keys` command (`default: 8192`). Note that `keys` is a dangerous command. We truncate its result to avoid a blowup in memory use when fetching too many keys.\n * `dbnum`: Maximum number of supported databases for `select`.\n * `cache_mode`: See the [novel cache design](#novel-cache-design) section below.\n * `hz`: Key expiry evaluation frequency (`default: 100`). Lower frequency uses less CPU when idle at the expense of a slower eviction rate.\n * `snapshot_cron`: Cron schedule expression for automatic backup snapshots using standard cron syntax with the granularity of minutes (`default: \"\"`).\n   Here are some cron schedule expression examples below, and feel free to read more about this argument in our [documentation](https://www.dragonflydb.io/docs/managing-dragonfly/backups#the-snapshot_cron-flag).\n\n   | Cron Schedule Expression | Description                                |\n   |--------------------------|--------------------------------------------|\n   | `* * * * *`              | At every minute                            |\n   | `*/5 * * * *`            | At every 5th minute                        |\n   | `5 */2 * * *`            | At minute 5 past every 2nd hour            |\n   | `0 0 * * *`              | At 00:00 (midnight) every day              |\n   | `0 6 * * 1-5`            | At 06:00 (dawn) from Monday through Friday |\n\n * `primary_port_http_enabled`: Allows accessing HTTP console on main TCP port if `true` (`default: true`).\n * `admin_port`: To enable admin access to the console on the assigned port (`default: disabled`). Supports both HTTP and RESP protocols.\n * `admin_bind`: To bind the admin console TCP connection to a given address (`default: any`). Supports both HTTP and RESP protocols.\n * `admin_nopass`: To enable open admin access to console on the assigned port, without auth token needed (`default: false`). Supports both HTTP and RESP protocols.\n * `cluster_mode`: Cluster mode supported (`default: \"\"`). Currently supports only `emulated`.\n * `cluster_announce_ip`: The IP that cluster commands announce to the client.\n * `announce_port`: The port that cluster commands announce to the client, and to replication master.\n\n### Example start script with popular options:\n\n```bash\n./dragonfly-x86_64 --logtostderr --requirepass=youshallnotpass --cache_mode=true -dbnum 1 --bind localhost --port 6379 --maxmemory=12gb --keys_output_limit=12288 --dbfilename dump.rdb\n```\n\nArguments can be also provided via:\n * `--flagfile <filename>`: The file should list one flag per line, with equal signs instead of spaces for key-value flags. No quotes are needed for flag values.\n * Setting environment variables. Set `DFLY_x`, where `x` is the exact name of the flag, case sensitive.\n\nFor more options like logs management or TLS support, run `dragonfly --help`.\n\n## <a name=\"roadmap-status\"><a/>Roadmap and status\n\nDragonfly currently supports ~185 Redis commands and all Memcached commands besides `cas`. Almost on par with the Redis 5 API, Dragonfly's next milestone will be to stabilize basic functionality and implement the replication API. If there is a command you need that is not implemented yet, please open an issue.\n\nFor Dragonfly-native replication, we are designing a distributed log format that will support order-of-magnitude higher speeds.\n\nFollowing the replication feature, we will continue adding missing commands for Redis versions 3-6 APIs.\n\nPlease see our [Command Reference](https://dragonflydb.io/docs/category/command-reference) for the current commands supported by Dragonfly.\n\n## <a name=\"design-decisions\"><a/> Design decisions\n\n### Novel cache design\n\nDragonfly has a single, unified, adaptive caching algorithm that is simple and memory efficient.\n\nYou can enable caching mode by passing the `--cache_mode=true` flag. Once this mode is on, Dragonfly will evict items least likely to be stumbled upon in the future but only when it is near the `maxmemory` limit.\n\n### Expiration deadlines with relative accuracy\n\nExpiration ranges are limited to ~8 years.\n\nExpiration deadlines with millisecond precision (PEXPIRE, PSETEX, etc.) are rounded to the closest second **for deadlines greater than 2^28ms**, which has less than 0.001% error and should be acceptable for large ranges. If this is not suitable for your use case, get in touch or open an issue explaining your case.\n\nFor more detailed differences between Dragonfly expiration deadlines and Redis implementations, [see here](docs/differences.md).\n\n### Native HTTP console and Prometheus-compatible metrics\n\nBy default, Dragonfly allows HTTP access via its main TCP port (6379). That's right, you can connect to Dragonfly via Redis protocol and via HTTP protocol — the server recognizes the protocol automatically during the connection initiation. Go ahead and try it with your browser. HTTP access currently does not have much info but will include useful debugging and management info in the future.\n\nGo to the URL `:6379/metrics` to view Prometheus-compatible metrics.\n\nThe Prometheus exported metrics are compatible with the Grafana dashboard, [see here](tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json).\n\n\nImportant! The HTTP console is meant to be accessed within a safe network. If you expose Dragonfly's TCP port externally, we advise you to disable the console with `--http_admin_console=false` or `--nohttp_admin_console`.\n\n\n## <a name=\"background\"><a/>Background\n\nDragonfly started as an experiment to see how an in-memory datastore could look if it was designed in 2022. Based on lessons learned from our experience as users of memory stores and engineers who worked for cloud companies, we knew that we need to preserve two key properties for Dragonfly: Atomicity guarantees for all operations and low, sub-millisecond latency over very high throughput.\n\nOur first challenge was how to fully utilize CPU, memory, and I/O resources using servers that are available today in public clouds. To solve this, we use [shared-nothing architecture](https://en.wikipedia.org/wiki/Shared-nothing_architecture), which allows us to partition the keyspace of the memory store between threads so that each thread can manage its own slice of dictionary data. We call these slices \"shards\". The library that powers thread and I/O management for shared-nothing architecture is open-sourced [here](https://github.com/romange/helio).\n\nTo provide atomicity guarantees for multi-key operations, we use the advancements from recent academic research. We chose the paper [\"VLL: a lock manager redesign for main memory database systems”](https://www.cs.umd.edu/~abadi/papers/vldbj-vll.pdf) to develop the transactional framework for Dragonfly. The choice of shared-nothing architecture and VLL allowed us to compose atomic multi-key operations without using mutexes or spinlocks. This was a major milestone for our PoC and its performance stood out from other commercial and open-source solutions.\n\nOur second challenge was to engineer more efficient data structures for the new store. To achieve this goal, we based our core hashtable structure on the paper [\"Dash: Scalable Hashing on Persistent Memory\"](https://arxiv.org/pdf/2003.07302.pdf). The paper itself is centered around the persistent memory domain and is not directly related to main-memory stores, but it's still most applicable to our problem. The hashtable design suggested in the paper allowed us to maintain two special properties that are present in the Redis dictionary: The incremental hashing ability during datastore growth the ability to traverse the dictionary under changes using a stateless scan operation. In addition to these two properties, Dash is more efficient in CPU and memory use. By leveraging Dash's design, we were able to innovate further with the following features:\n * Efficient record expiry for TTL records.\n * A novel cache eviction algorithm that achieves higher hit rates than other caching strategies like LRU and LFU with **zero memory overhead**.\n * A novel **fork-less** snapshotting algorithm.\n\nOnce we had built the foundation for Dragonfly and [we were happy with its performance](#benchmarks), we went on to implement the Redis and Memcached functionality. We have to date implemented ~185 Redis commands (roughly equivalent to Redis 5.0 API) and 13 Memcached commands.\n\nAnd finally, <br>\n<em>Our mission is to build a well-designed, ultra-fast, cost-efficient in-memory datastore for cloud workloads that takes advantage of the latest hardware advancements. We intend to address the pain points of current solutions while preserving their product APIs and propositions.</em>\n"
  },
  {
    "path": "README.pt-BR.md",
    "content": "<p align=\"center\">\n  <a href=\"https://dragonflydb.io\">\n    <img  src=\"/.github/images/logo-full.svg\"\n      width=\"284\" border=\"0\" alt=\"Dragonfly\">\n  </a>\n</p>\n\n[![ci-tests](https://github.com/dragonflydb/dragonfly/actions/workflows/ci.yml/badge.svg)](https://github.com/dragonflydb/dragonfly/actions/workflows/ci.yml) [![Twitter URL](https://img.shields.io/twitter/follow/dragonflydbio?style=social)](https://twitter.com/dragonflydbio)\n\n> Antes de continuar, considere deixar uma estrela no nosso repositório ⭐️. Obrigado!\n\nOutros idiomas: [简体中文](README.zh-CN.md) [日本語](README.ja-JP.md) [한국어](README.ko-KR.md) [English](README.md)\n\n[Site oficial](https://www.dragonflydb.io/) • [Documentação](https://dragonflydb.io/docs) • [Guia Rápido](https://www.dragonflydb.io/docs/getting-started) • [Discord da Comunidade](https://discord.gg/HsPjXGVH85) • [Fórum Dragonfly](https://dragonfly.discourse.group/) • [Participe da Comunidade](https://www.dragonflydb.io/community)\n\n[Discussões no GitHub](https://github.com/dragonflydb/dragonfly/discussions) • [Issues no GitHub](https://github.com/dragonflydb/dragonfly/issues) • [Contribuindo](https://github.com/dragonflydb/dragonfly/blob/main/CONTRIBUTING.md) • [Dragonfly Cloud](https://www.dragonflydb.io/cloud)\n\n## O armazenamento de dados em memória mais eficiente do mundo\n\nDragonfly é um armazenamento de dados em memória projetado para cargas de trabalho modernas.\n\nTotalmente compatível com as APIs do Redis e Memcached, o Dragonfly não requer alterações de código para adoção. Em comparação com armazenamentos legados, o Dragonfly oferece 25x mais throughput, maiores taxas de acerto em cache com menor latência de cauda e pode operar com até 80% menos recursos para a mesma carga.\n\n## Conteúdo\n\n- [Benchmarks](#benchmarks)\n- [Guia rápido](https://github.com/dragonflydb/dragonfly/tree/main/docs/quick-start)\n- [Configuração](#configuration)\n- [Roteiro e status](#roadmap-status)\n- [Decisões de design](#design-decisions)\n- [Contexto](#background)\n- [Compilação a partir do código-fonte](./docs/build-from-source.md)\n\n## <a name=\"benchmarks\"><a/>Benchmarks\n\nPrimeiro comparamos o Dragonfly com o Redis em uma instância `m5.large`, frequentemente usada para rodar Redis devido à sua arquitetura single-threaded. O benchmark roda de outra instância de carga (c5n) na mesma AZ usando `memtier_benchmark  -c 20 --test-time 100 -t 4 -d 256 --distinct-client-seed`.\n\nO Dragonfly mostra desempenho comparável:\n\n1. SETs (`--ratio 1:0`):\n\n| Redis                                 | DF                                   |\n| ------------------------------------- | ------------------------------------ |\n| QPS: 159K, P99.9: 1.16ms, P99: 0.82ms | QPS: 173K, P99.9: 1.26ms, P99: 0.9ms |\n\n2. GETs (`--ratio 0:1`):\n\n| Redis                                | DF                                   |\n| ------------------------------------ | ------------------------------------ |\n| QPS: 194K, P99.9: 0.8ms, P99: 0.65ms | QPS: 191K, P99.9: 0.95ms, P99: 0.8ms |\n\nO benchmark mostra que a camada algorítmica do DF, que permite escalabilidade vertical, não gera sobrecarga significativa em execução single-thread.\n\nCom uma instância mais forte (m5.xlarge), a diferença entre DF e Redis cresce.\n(`memtier_benchmark  -c 20 --test-time 100 -t 6 -d 256 --distinct-client-seed`):\n\n1. SETs (`--ratio 1:0`):\n\n| Redis                                 | DF                                    |\n| ------------------------------------- | ------------------------------------- |\n| QPS: 190K, P99.9: 2.45ms, P99: 0.97ms | QPS: 279K, P99.9: 1.95ms, P99: 1.48ms |\n\n2. GETs (`--ratio 0:1`):\n\n| Redis                                | DF                                    |\n| ------------------------------------ | ------------------------------------- |\n| QPS: 220K, P99.9: 0.98ms, P99: 0.8ms | QPS: 305K, P99.9: 1.03ms, P99: 0.87ms |\n\nA capacidade de throughput do Dragonfly cresce com o tamanho da instância, enquanto o Redis single-thread atinge o limite de CPU.\n\n<img src=\"http://static.dragonflydb.io/repo-assets/aws-throughput.svg\" width=\"80%\" border=\"0\"/>\n\nNa instância c6gn.16xlarge (maior capacidade de rede), o Dragonfly atinge 25x mais throughput que o Redis, superando 3.8M QPS.\n\nLatência de 99% no pico de throughput do Dragonfly:\n\n| op    | r6g   | c6gn  | c7g   |\n| ----- | ----- | ----- | ----- |\n| set   | 0.8ms | 1ms   | 1ms   |\n| get   | 0.9ms | 0.9ms | 0.8ms |\n| setex | 0.9ms | 1.1ms | 1.3ms |\n\n_Todos os benchmarks foram realizados com `memtier_benchmark`, ajustando o número de threads conforme a instância. O `memtier` rodava em uma c6gn.16xlarge separada. No benchmark SETEX, foi definido tempo de expiração de 500 para garantir sobrevivência até o final do teste._\n\n```bash\nmemtier_benchmark --ratio ... -t <threads> -c 30 -n 200000 --distinct-client-seed -d 256 \\\n   --expiry-range=...\n```\n\nEm modo pipeline `--pipeline=30`, o Dragonfly alcança **10M QPS** em SET e **15M QPS** em GET.\n\n### Dragonfly vs. Memcached\n\nComparamos Dragonfly e Memcached em uma c6gn.16xlarge na AWS.\n\nCom latência comparável, o throughput do Dragonfly superou o do Memcached tanto em leitura quanto escrita. Em escrita, a latência do Dragonfly foi melhor devido à contenção no [caminho de escrita do Memcached](docs/memcached_benchmark.md).\n\n#### Benchmark de SET\n\n| Servidor  | QPS (milhares) | latência 99% |  99.9%   |\n| :-------: | :------------: | :----------: | :------: |\n| Dragonfly |    🟩 3844     |   🟩 0.9ms   | 🟩 2.4ms |\n| Memcached |      806       |    1.6ms     |  3.2ms   |\n\n#### Benchmark de GET\n\n| Servidor  | QPS (milhares) | latência 99% |  99.9%   |\n| --------- | :------------: | :----------: | :------: |\n| Dragonfly |    🟩 3717     |     1ms      |  2.4ms   |\n| Memcached |      2100      |  🟩 0.34ms   | 🟩 0.6ms |\n\nMemcached teve menor latência em leitura, mas também menor throughput.\n\n### Eficiência de memória\n\nPara testar a eficiência de memória, preenchemos o Dragonfly e o Redis com \\~5GB de dados usando o comando `debug populate 5000000 key 1024`, enviamos tráfego de atualização com `memtier` e iniciamos o snapshot com o comando `bgsave`.\n\nA figura abaixo demonstra como cada servidor se comportou em termos de eficiência de memória.\n\n<img src=\"http://static.dragonflydb.io/repo-assets/bgsave-memusage.svg\" width=\"70%\" border=\"0\"/>\n\nO Dragonfly foi 30% mais eficiente em memória que o Redis em estado ocioso e não apresentou aumento visível no uso de memória durante a fase de snapshot. No pico, o uso de memória do Redis aumentou para quase 3 vezes o do Dragonfly.\n\nO Dragonfly concluiu o snapshot mais rápido, em poucos segundos.\n\nPara mais informações sobre eficiência de memória no Dragonfly, veja nosso [documento sobre Dashtable](/docs/dashtable.md).\n\n## <a name=\"configuration\"><a/>Configuração\n\nO Dragonfly suporta argumentos comuns do Redis quando aplicável. Por exemplo, você pode executar: `dragonfly --requirepass=foo --bind localhost`.\n\nAtualmente, o Dragonfly suporta os seguintes argumentos específicos do Redis:\n\n- `port`: Porta de conexão Redis (`padrão: 6379`).\n- `bind`: Use `localhost` para permitir conexões apenas locais ou um IP público para permitir conexões **para esse IP** (ou seja, externas também). Use `0.0.0.0` para permitir todas as conexões IPv4.\n- `requirepass`: Senha para autenticação AUTH (`padrão: \"\"`).\n- `maxmemory`: Limite de memória máxima (em bytes legíveis) usada pelo banco (`padrão: 0`). Um valor `0` significa que o programa determinará automaticamente o uso máximo de memória.\n- `dir`: O Docker do Dragonfly usa a pasta `/data` para snapshots por padrão, o CLI usa `\"\"`. Você pode usar a opção `-v` do Docker para mapear para uma pasta do host.\n- `dbfilename`: Nome do arquivo para salvar/carregar o banco de dados (`padrão: dump`).\n\nTambém há argumentos específicos do Dragonfly:\n\n- `memcached_port`: Porta para habilitar API compatível com Memcached (`padrão: desabilitado`).\n\n- `keys_output_limit`: Número máximo de chaves retornadas no comando `keys` (`padrão: 8192`). Note que `keys` é um comando perigoso. Limitamos o resultado para evitar explosão de uso de memória ao buscar muitas chaves.\n\n- `dbnum`: Número máximo de bancos de dados suportados para `select`.\n\n- `cache_mode`: Veja a seção sobre [design de cache inovador](#novel-cache-design).\n\n- `hz`: Frequência de avaliação de expiração de chave (`padrão: 100`). Frequências menores usam menos CPU em idle, mas têm menor taxa de remoção.\n\n- `snapshot_cron`: Expressão cron para snapshots automáticos usando sintaxe cron padrão, com granularidade de minutos (`padrão: \"\"`).\n\n  Exemplos:\n\n  | Expressão Cron | Descrição                           |\n  | -------------- | ----------------------------------- |\n  | `* * * * *`    | A cada minuto                       |\n  | `*/5 * * * *`  | A cada 5 minutos                    |\n  | `5 */2 * * *`  | No minuto 5 de cada 2 horas         |\n  | `0 0 * * *`    | Às 00:00 (meia-noite) todos os dias |\n  | `0 6 * * 1-5`  | Às 06:00 (manhã) de segunda a sexta |\n\n- `primary_port_http_enabled`: Permite acesso ao console HTTP na porta TCP principal se `true` (`padrão: true`).\n\n- `admin_port`: Habilita acesso admin ao console na porta atribuída (`padrão: desabilitado`). Suporta protocolos HTTP e RESP.\n\n- `admin_bind`: Define o IP de binding do console admin (`padrão: qualquer`). Suporta HTTP e RESP.\n\n- `admin_nopass`: Habilita acesso admin sem autenticação (`padrão: false`). Suporta HTTP e RESP.\n\n- `cluster_mode`: Modo cluster suportado (`padrão: \"\"`). Atualmente só `emulated`.\n\n- `cluster_announce_ip`: IP que os comandos de cluster anunciam ao cliente.\n\n- `announce_port`: Porta que os comandos de cluster anunciam ao cliente e ao master de replicação.\n\n### Exemplo de script de inicialização com opções populares:\n\n```bash\n./dragonfly-x86_64 --logtostderr --requirepass=youshallnotpass --cache_mode=true -dbnum 1 --bind localhost --port 6379 --maxmemory=12gb --keys_output_limit=12288 --dbfilename dump.rdb\n```\n\nArgumentos também podem ser passados via:\n\n- `--flagfile <arquivo>`: O arquivo deve conter um flag por linha, com `=` em vez de espaços para flags com valor. Não usar aspas.\n- Variáveis de ambiente. Use `DFLY_x`, onde `x` é o nome exato do flag (case sensitive).\n\nPara mais opções como logs ou suporte a TLS, execute `dragonfly --help`.\n\n## <a name=\"roadmap-status\"><a/>Roadmap e status\n\nAtualmente o Dragonfly suporta \\~185 comandos Redis e todos os comandos Memcached exceto `cas`. Já quase no nível da API do Redis 5, o próximo marco é estabilizar as funcionalidades básicas e implementar a API de replicação. Caso precise de um comando ainda não implementado, abra uma issue.\n\nPara replicação nativa do Dragonfly, estamos projetando um formato de log distribuído que suportará velocidades ordens de magnitude maiores.\n\nApós a replicação, continuaremos adicionando comandos faltantes das versões 3 a 6 do Redis.\n\nConsulte nossa [Referência de Comandos](https://dragonflydb.io/docs/category/command-reference) para a lista atual.\n\n## <a name=\"design-decisions\"><a/>Decisões de design\n\n### Design de cache inovador\n\nO Dragonfly tem um algoritmo de cache adaptativo, unificado e simples, eficiente em memória.\n\nVocê pode habilitar o modo cache com o flag `--cache_mode=true`. Esse modo remove itens menos prováveis de serem acessados no futuro, mas **somente** próximo ao limite de `maxmemory`.\n\n### Expiração com precisão relativa\n\nIntervalos de expiração são limitados a \\~8 anos.\n\nDeadlines com precisão de milissegundos (PEXPIRE, PSETEX etc.) são arredondadas para o segundo mais próximo **quando superiores a 2^28ms**, com erro menor que 0.001%. Se isso for inadequado, entre em contato ou abra uma issue explicando o caso.\n\nPara mais diferenças entre os deadlines do Dragonfly e do Redis, [clique aqui](docs/differences.md).\n\n### Console HTTP nativo e métricas compatíveis com Prometheus\n\nPor padrão, o Dragonfly permite acesso HTTP via porta TCP principal (6379). Ou seja, você pode conectar via protocolo Redis ou HTTP — o servidor reconhece automaticamente o protocolo ao conectar. Acesse com o navegador. Hoje o console HTTP tem pouca informação, mas no futuro incluirá debug e info de gerenciamento.\n\nAcesse `:6379/metrics` para ver métricas Prometheus-compatíveis.\n\nAs métricas são compatíveis com o dashboard do Grafana, [veja aqui](tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json).\n\nImportante: o console HTTP deve ser acessado em rede segura. Se expor a porta TCP do Dragonfly externamente, desabilite o console com `--http_admin_console=false` ou `--nohttp_admin_console`.\n\n## <a name=\"background\"><a/>Contexto\n\nO Dragonfly começou como um experimento para repensar um datastore in-memory em 2022. Baseado em lições como usuários e engenheiros de cloud, sabíamos que dois princípios deveriam ser preservados: garantias de atomicidade e latência sub-millisecond sob alto throughput.\n\nDesafio 1: Utilizar ao máximo CPU, memória e I/O em servidores modernos. A solução foi adotar [arquitetura shared-nothing](https://en.wikipedia.org/wiki/Shared-nothing_architecture), particionando o keyspace entre threads. Chamamos os slices de “shards”. A biblioteca que gerencia threads e I/O foi open-sourceada [aqui](https://github.com/romange/helio).\n\nPara garantir atomicidade em operações multi-key, usamos avanços recentes da pesquisa acadêmica. Escolhemos o paper [\"VLL: a lock manager redesign for main memory database systems\"](https://www.cs.umd.edu/~abadi/papers/vldbj-vll.pdf) como base para o framework transacional. A combinação VLL + shared-nothing permitiu compor operações atômicas multi-key **sem mutex ou spinlock**. O resultado foi um PoC com performance superior a outras soluções.\n\nDesafio 2: Estruturas de dados mais eficientes. Baseamos o hashtable no paper [\"Dash: Scalable Hashing on Persistent Memory\"](https://arxiv.org/pdf/2003.07302.pdf). Mesmo voltado à memória persistente, foi aplicável. O design permitiu manter:\n\n- Hash incremental durante crescimento.\n- Scan stateless mesmo com mudanças.\n\nAlém disso, o Dash é mais eficiente em uso de CPU/memória. Com esse design, inovamos ainda com:\n\n- Expiração eficiente para registros TTL.\n- Algoritmo de cache com mais hits que LRU/LFU com **zero overhead**.\n- Algoritmo de snapshot **sem fork**.\n\nCom essa base pronta e [performance satisfatória](#benchmarks), implementamos as APIs Redis e Memcached (\\~185 comandos Redis, equivalente ao Redis 5.0, e 13 do Memcached).\n\nPor fim, <br> <em>Nossa missão é construir um datastore in-memory rápido, eficiente e bem projetado para cargas em nuvem, aproveitando o hardware moderno. Queremos resolver as dores das soluções atuais mantendo APIs e propostas de valor.</em>\n"
  },
  {
    "path": "README.zh-CN.md",
    "content": "<p align=\"center\">\n  <a href=\"https://dragonflydb.io\">\n    <img  src=\"/.github/images/logo-full.svg\"\n      width=\"284\" border=\"0\" alt=\"Dragonfly\">\n  </a>\n</p>\n\n\n\n[![ci-tests](https://github.com/dragonflydb/dragonfly/actions/workflows/ci.yml/badge.svg)](https://github.com/dragonflydb/dragonfly/actions/workflows/ci.yml) [![Twitter URL](https://img.shields.io/twitter/follow/dragonflydbio?style=social)](https://twitter.com/dragonflydbio)\n\n> 在您继续之前，请考虑给我们一个 GitHub 星标 ⭐️。谢谢！\n\n其他语言:  [English](README.md) [日本語](README.ja-JP.md) [한국어](README.ko-KR.md) [Português](README.pt-BR.md)\n\n[主页](https://dragonflydb.io/) • [快速入门](https://github.com/dragonflydb/dragonfly/tree/main/docs/quick-start) • [社区 Discord](https://discord.gg/HsPjXGVH85) • [Dragonfly 论坛](https://dragonfly.discourse.group/) • [加入 Dragonfly 社区](https://www.dragonflydb.io/community)\n\n[GitHub Discussions](https://github.com/dragonflydb/dragonfly/discussions) • [GitHub Issues](https://github.com/dragonflydb/dragonfly/issues) • [贡献指南](https://github.com/dragonflydb/dragonfly/blob/main/CONTRIBUTING.md)\n\n## 全世界最快的内存数据库\n\nDragonfly是一种针对现代应用程序负荷需求而构建的内存数据库，完全兼容Redis和Memcached的 API，迁移时无需修改任何代码。相比于这些传统的内存数据库，Dragonfly提供了其25倍的吞吐量，高缓存命中率和低尾延迟，并且对于相同大小的工作负载运行资源最多可减少80%。\n\n## 目录\n\n- [基准测试](#基准测试)\n- [快速入门](https://github.com/dragonflydb/dragonfly/tree/main/docs/quick-start)\n- [配置方法](#配置方法)\n- [开发路线和开发现状](#开发路线和开发现状)\n- [设计决策](#设计决策)\n- [开发背景](#开发背景)\n\n## <a name=\"基准测试\"><a/> 基准测试\n\n<img src=\"http://static.dragonflydb.io/repo-assets/aws-throughput.svg\" width=\"80%\" border=\"0\"/>\n\nDragonfly在c6gn.16xlarge上达到了每秒380万个查询（QPS），相比于Redis，吞吐量提高了25倍。\n\n在Dragonfly的峰值吞吐量下，P99延迟如下：\n\n| op    | r6g   | c6gn  | c7g   |\n| ----- | ----- | ----- | ----- |\n| set   | 0.8ms | 1ms   | 1ms   |\n| get   | 0.9ms | 0.9ms | 0.8ms |\n| setex | 0.9ms | 1.1ms | 1.3ms |\n\n*所有基准测试均使用`memtier_benchmark`（见下文），根据服务器类型和实例类型调整线程数。`memtier`运行在独立的c6gn.16xlarge机器上。对于setex基准测试，我们使用了500的到期范围，以便其能够存活直到测试结束。*\n\n```bash\n  memtier_benchmark --ratio ... -t <threads> -c 30 -n 200000 --distinct-client-seed -d 256 \\\n     --expiry-range=...\n```\n\n当以管道模式运行，并设置参数`--pipeline=30`时，Dragonfly可以实现**10M qps**的SET操作和 **15M qps**的GET操作。\n\n### Memcached / Dragonfly\n\n我们在 AWS 的 `c6gn.16xlarge` 实例上比较了 memcached 和 Dragonfly。如下图所示，与 memcached 相比，Dragonfly 的吞吐量在读写两方面上都占据了优势，并且在延迟方面也还不错。对于写入工作，Dragonfly 的延迟更低，这是由于在 memcached 的写入路径上存在竞争（请参见[此处](docs/memcached_benchmark.md)）。\n\n#### SET benchmark\n\n|  Server   | QPS(thousands qps) | latency 99% |  99.9%  |\n| :-------: | :----------------: | :---------: | :-----: |\n| Dragonfly |       🟩 3844       |   🟩 0.9ms   | 🟩 2.4ms |\n| Memcached |        806         |    1.6ms    |  3.2ms  |\n\n#### GET benchmark\n\n| Server    | QPS(thousands qps) | latency 99% |  99.9%  |\n| --------- | :----------------: | :---------: | :-----: |\n| Dragonfly |       🟩 3717       |     1ms     |  2.4ms  |\n| Memcached |        2100        |  🟩 0.34ms   | 🟩 0.6ms |\n\n\n对于读取基准测试，Memcached 表现出了更低的延迟，但在吞吐量方面比不上Dragonfly。\n\n### 内存效率\n\n在接下来的测试中，我们使用 `debug populate 5000000 key 1024` 命令向 Dragonfly 和 Redis 分别写入了约 5GB 的数据。然后我们使用 `memtier` 发送更新流量并使用 `bgsave` 命令启动快照。下图清楚地展示了这两个服务器在内存效率方面的表现。\n\n<img src=\"http://static.dragonflydb.io/repo-assets/bgsave-memusage.svg\" width=\"70%\" border=\"0\"/>\n\n在空闲状态下，Dragonfly 比 Redis 节省约 30% 的内存。\n在快照阶段，Dragonfly 也没有显示出任何明显的内存增加。\n但同时，Redis 在峰值时的内存几乎达到了 Dragonfly 的 3 倍。\nDragonfly 完成快照也很快，仅在启动后几秒钟内就完成了。\n有关 Dragonfly 内存效率的更多信息，请参见 [dashtable 文档](/docs/dashtable.md)。\n\n\n\n## <a name=\"开发路线和开发现状\"><a/>配置方法\n\nDragonfly 支持 Redis 的常见参数。\n例如，您可以运行：`dragonfly --requirepass=foo --bind localhost`。\n\n目前，Dragonfly 支持以下 Redis 特定参数：\n\n* `port`：Redis 连接端口，默认为 `6379`。\n* `bind`：使用本地主机名仅允许本地连接，使用公共 IP 地址允许外部连接到**该 IP 地址**。\n* `requirepass`：AUTH 认证密码，默认为空 `\"\"`。\n* `maxmemory`：限制数据库使用的最大内存（以字节为单位）。`0` 表示程序将自动确定其最大内存使用量。默认为 `0`。\n* `dir`：默认情况下，dragonfly docker 使用 `/data` 文件夹进行快照。CLI 使用的是 `\"\"`。你可以使用 `-v` docker 选项将其映射到主机文件夹。\n* `dbfilename`：保存/加载数据库的文件名。默认为 `dump`；\n\n此外，还有 Dragonfly 特定的参数选项：\n\n* `memcached_port`：在此端口上启用 memcached 兼容的 API。默认禁用。\n\n* `keys_output_limit`：在`keys` 命令中返回的最大键数。默认为 `8192`。\n\n  `keys` 命令是危险命令。我们会截断结果以避免在获取太多键时内存溢出。\n\n* `dbnum`：`select` 支持的最大数据库数。\n\n* `cache_mode`：请参见下面的 [缓存](#全新的缓存设计) 部分。\n\n* `hz`：键到期评估频率。默认为 `100`。空闲时，使用较低的频率可以占用较少的 CPU资源，但这会导致清理过期键的速度下降。\n\n* `snapshot_cron`：定时自动备份快照的 cron 表达式，使用标准的、精确到分钟的 cron 语法。默认为空 `\"\"`。\n\n  下面是一些 cron 表达式的示例，更多关于此参数的细节请参见[文档](https://www.dragonflydb.io/docs/managing-dragonfly/backups#the-snapshot_cron-flag)。\n\n  | Cron 表达式      | 描述                               |\n  |---------------|----------------------------------|\n  | `* * * * *`   | 每分钟                              |\n  | `*/5 * * * *` | 每隔 5 分钟 (00:00, 00:05, 00:10...) |\n  | `5 */2 * * *` | 每隔 2 小时的第 5 分钟                   |\n  | `0 0 * * *`   | 每天的 00:00 午夜                     |\n  | `0 6 * * 1-5` | 从星期一到星期五的每天 06:00 黎明             |\n* `primary_port_http_enabled`：如果为 true，则允许在主 TCP 端口上访问 HTTP 控制台。默认为 `true`。\n\n* `admin_port`：如果设置，将在指定的端口上启用对控制台的管理访问。支持 HTTP 和 RESP 协议。默认禁用。\n\n* `admin_bind`：如果设置，将管理控制台 TCP 连接绑定到给定地址。支持 HTTP 和 RESP 协议。默认为 `any`。\n\n* `admin_nopass`: 如果设置，允许在不提供任何认证令牌的情况下，通过指定的端口访问管理控制台。同时支持 HTTP 和 RESP 协议。 默认为 `false`。\n\n* `cluster_mode`：支持集群模式。目前仅支持 `emulated`。默认为空 `\"\"`。\n\n* `cluster_announce_ip`：集群模式下向客户端公开的 IP。\n\n### 启动脚本示例，包含常用选项：\n\n```bash\n./dragonfly-x86_64 --logtostderr --requirepass=youshallnotpass --cache_mode=true -dbnum 1 --bind localhost --port 6379 --maxmemory=12gb --keys_output_limit=12288 --dbfilename dump.rdb\n```\n还可以通过运行 `dragonfly --flagfile <filename>` 从配置文件中获取参数，配置文件的每行应该列出一个参数，并用等号代替键值参数的空格。\n\n要获取更多选项，如日志管理或TLS支持，请运行 `dragonfly --help`。\n\n## <a name=\"开发路线和开发现状\"><a/>开发路线和开发现状\n\n目前，Dragonfly支持约185个Redis命令以及除 `cas` 之外的所有 Memcached 命令。\n我们几乎达到了Redis 5 API的水平。我们的下一个里程碑更新将会稳定基本功能并实现复刻API。\n如果您发现您需要的命令尚未实现，请提出一个Issue。\n\n对于dragonfly-native复制技术，我们正在设计一种分布式日志格式，该格式将支持更高的速度。\n\n在实现复制功能之后，我们将继续实现API 3-6中其他缺失的Redis命令。\n\n请参见[命令参考](https://dragonflydb.io/docs/category/command-reference)以了解Dragonfly当前支持的命令。\n\n## <a name=\"设计决策\"><a/> 设计决策\n\n### 全新的缓存设计\n\nDragonfly采用单一的自适应缓存算法，该算法非常简单且具备高内存效率。\n你可以通过使用 `--cache_mode=true` 参数来启用缓存模式。一旦启用了此模式，Dragonfly将会删除最低概率可能被使用的内容，但这只会在接近最大内存限制时发生。\n\n### 相对准确的过期期限\n\n过期范围限制最高为约8年。此外，**对于大于2^28ms的到期期限**，毫秒精度级别（PEXPIRE/PSETEX等）会被简化到秒级。\n这种舍入的误差小于0.001％，我希望这在长时间范围情况下是可以接受的。\n如果这不符合你的使用需求，请与我联系或提出一个Issue，并解释您的情况。\n\n关于与Redis实现之间的更多差异，请参见[此处](docs/differences.md)。\n\n### 原生HTTP控制台和兼容Prometheus的标准\n\n默认情况下，Dragonfly允许通过其主TCP端口（6379）进行HTTP访问。没错，您可以通过Redis协议或HTTP协议连接到Dragonfly - 服务器会在连接初始化期间自动识别协议。 不妨在你自己的浏览器中尝试一下。现在HTTP访问没有太多信息可供参考，但在将来，我们计划添加有用的调试和管理信息。如果您转到`: 6379/metrics` URL，您将看到一些兼容Prometheus的标准。\n\nPrometheus导出的标准与Grafana仪表盘兼容，[请参见此处](tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json)。\n\n重要！HTTP控制台仅应在安全网络内访问。如果您将Dragonfly的TCP端口暴露在外部，则建议使用`--http_admin_console=false`或`--nohttp_admin_console`禁用控制台。\n\n\n## <a name=\"开发背景\"><a/>开发背景\n\nDragonfly始于一项实验，旨在探索如果在2022年重新设计内存数据库，它会是什么样子。基于我们作为内存存储的用户以及作为云服务公司的工程师的经验教训，我们得知需要保留Dragonfly的两个关键属性：a) 为其所有操作提供原子性保证，b) 保证在非常高的吞吐量下实现低于毫秒的延迟。\n\n我们面临的首要挑战是如何充分利用当今云服务器的CPU、内存和I/O资源。为了解决这个问题，我们使用了 [无共享式架构（shared-nothing architecture）](https://en.wikipedia.org/wiki/Shared-nothing_architecture)，它允许我们在不同的线程之间分割内存存储的空间，使得每个线程可以管理自己的字典数据切片。我们称这些切片为“分片（shards）”。为无共享式架构提供线程和I/O管理功能的库在[这里](https://github.com/romange/helio)开源。\n\n为了提供对多键并发操作的原子性保证，我们使用了最近学术研究的进展。我们选择了论文 [\"VLL: a lock manager redesign for main memory database systems”](https://www.cs.umd.edu/~abadi/papers/vldbj-vll.pdf) 来开发Dragonfly的事务框架。无共享式架构和VLL的选择使我们能够在不使用互斥锁或自旋锁的情况下组合原子的多键操作。这是我们 PoC 的一个重要里程碑，它的性能在商业和开源解决方案中脱颖而出。\n\n我们面临的第二个挑战是为新存储设计更高效的数据结构。为了实现这个目标，我们基于论文[\"Dash: Scalable Hashing on Persistent Memory\"](https://arxiv.org/pdf/2003.07302.pdf)构建了核心哈希表结构。这篇论文本身是以持久性内存为中心的，与主存没有直接相关性。\n\n然而，它非常适用于我们的问题。它提出了一种哈希表设计，允许我们维护Redis字典中存在的两个特殊属性：a) 数据存储增长时的渐进式哈希能力；b）使用无状态扫描操作时，遍历变化的字典的能力。除了这两个属性之外，Dash在CPU和内存方面都更加高效。通过利用Dash的设计，我们能够进一步创新，实现以下功能：\n\n- 针对TTL的高效记录过期功能。\n- 一种新颖的缓存驱逐算法，具有比其他缓存策略（如LRU和LFU）更高的命中率，同时**零内存开销**。\n- 一种新颖的无fork快照算法。\n\n在我们为Dragonfly打下基础并满意其[性能](#基准测试)后，我们开始实现Redis和Memcached功能。\n目前，我们已经实现了约185个Redis命令（大致相当于Redis 5.0 API）和13个Memcached命令。\n\n最后，<br>\n<em>我们的使命是构建一个设计良好、超高速、成本效益高的云工作负载内存数据存储系统，利用最新的硬件技术。我们旨在解决当前解决方案的痛点，同时保留其产品API和优势。 </em>\n"
  },
  {
    "path": "TODO.md",
    "content": "1. To move lua_project to dragonfly from helio (DONE)\n2. To limit lua stack to something reasonable like 4096.\n3. To inject our own allocator to lua to track its memory.\n\n\n## Object lifecycle and thread-safety.\n\nCurrently our transactional and locking model is based on an assumption that any READ or WRITE\naccess to objects must be performed in a shard where they belong.\n\nHowever, this assumption can be relaxed to get significant gains for read-only queries.\n\n### Explanation\nOur transactional framework prevents from READ-locked objects to be mutated. It does not prevent from their PrimaryTable to grow or change, of course. These objects can move to different entries inside the table. However, our CompactObject maintains the following property - its reference CompactObject.AsRef() is valid no matter where the master object moves and it's valid and safe for reading even from other threads. The exception regarding thread safety is SmallString which uses translation table for its pointers.\n\nIf we change the SmallString translation table to be global and thread-safe (it should not have lots of write contention anyway) we may access primetable keys and values from another thread and write them directly to sockets.\n\nUse-case: large strings that need to be copied. Sets that need to be serialized for SMEMBERS/HGETALL commands etc. Additional complexity - we will need to lock those variables even for single hop transactions and unlock them afterwards. The unlocking hop does not need to increase user-visible latency since it can be done after we send reply to the socket."
  },
  {
    "path": "contrib/charts/dragonfly/.helmignore",
    "content": "# Patterns to ignore when building packages.\n# This supports shell glob matching, relative path matching, and\n# negation (prefixed with !). Only one pattern per line.\n.DS_Store\n# Common VCS dirs\n.git/\n.gitignore\n.bzr/\n.bzrignore\n.hg/\n.hgignore\n.svn/\n# Common backup files\n*.swp\n*.bak\n*.tmp\n*.orig\n*~\n# Various IDEs\n.project\n.idea/\n*.tmproj\n.vscode/\nci/\n*.go\ngo.mod\ngo.sum\n"
  },
  {
    "path": "contrib/charts/dragonfly/Chart.yaml",
    "content": "apiVersion: v2\nname: dragonfly\ndescription: Dragonfly is a modern in-memory datastore, fully compatible with Redis and Memcached APIs.\n\n# A chart can be either an 'application' or a 'library' chart.\n#\n# Application charts are a collection of templates that can be packaged into versioned archives\n# to be deployed.\n#\n# Library charts provide useful utilities or functions for the chart developer. They're included as\n# a dependency of application charts to inject those utilities and functions into the rendering\n# pipeline. Library charts do not define any templates and therefore cannot be deployed.\ntype: application\n\n# This is the chart version. This version number should be incremented each time you make changes\n# to the chart and its templates, including the app version.\n# Versions are expected to follow Semantic Versioning (https://semver.org/)\nversion: v1.37.0\n\n# This is the version number of the application being deployed. This version number should be\n# incremented each time you make changes to the application. Versions are not expected to\n# follow Semantic Versioning. They should reflect the version the application is using.\n# It is recommended to use it with quotes.\nappVersion: \"v1.37.0\"\n\nhome: https://dragonflydb.io/\n\nkeywords:\n  - database\n  - keyvalue\n  - cache\n\nsources:\n  - https://github.com/dragonflydb/dragonfly\n\nkubeVersion: \">=1.23.0-0\"\n"
  },
  {
    "path": "contrib/charts/dragonfly/README.md",
    "content": "# dragonfly\n\n![Version: v0.12.0](https://img.shields.io/badge/Version-v0.12.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v0.12.0](https://img.shields.io/badge/AppVersion-v0.12.0-informational?style=flat-square)\n\nDragonfly is a modern in-memory datastore, fully compatible with Redis and Memcached APIs.\n\n**Homepage:** <https://dragonflydb.io/>\n\n## Source Code\n\n* <https://github.com/dragonflydb/dragonfly>\n\n## Requirements\n\nKubernetes: `>=1.23.0-0`\n\n\n## Installing from a pre-packaged OCI\n\nPick a version from https://github.com/dragonflydb/dragonfly/pkgs/container/dragonfly%2Fhelm%2Fdragonfly\n\nExample:\n\n```shell\nVERSION=v1.12.1\nhelm upgrade --install dragonfly oci://ghcr.io/dragonflydb/dragonfly/helm/dragonfly --version $VERSION\n```\n\n## Values\n\n| Key | Type | Default | Description |\n|-----|------|---------|-------------|\n| affinity | object | `{}` | Affinity for pod assignment |\n| command | list | `[]` | Allow overriding the container's command |\n| commonLabels | object | `{}` | Common labels to add to all K8s resources |\n| extraArgs | list | `[]` | Extra arguments to pass to the dragonfly binary |\n| extraContainers | list | `[]` | Additional sidecar containers |\n| extraObjects | list | `[]` | extra K8s manifests to deploy |\n| extraVolumeMounts | list | `[]` | Extra volume mounts corresponding to the volumes mounted above |\n| extraVolumes | list | `[]` | Extra volumes to mount into the pods |\n| fullnameOverride | string | `\"\"` | String to fully override dragonfly.fullname |\n| image.pullPolicy | string | `\"IfNotPresent\"` | Dragonfly image pull policy |\n| image.repository | string | `\"docker.dragonflydb.io/dragonflydb/dragonfly\"` | Container Image Registry to pull the image from |\n| image.tag | string | `\"\"` | Overrides the image tag whose default is the chart appVersion. |\n| imagePullSecrets | list | `[]` | Container Registry Secret names in an array |\n| initContainers | list | `[]` | A list of initContainers to run before each pod starts |\n| nameOverride | string | `\"\"` | String to partially override dragonfly.fullname |\n| nodeSelector | object | `{}` | Node labels for pod assignment |\n| podAnnotations | object | `{}` | Annotations for pods |\n| podSecurityContext | object | `{}` | Set securityContext for pod itself |\n| probes.livenessProbe.exec.command[0] | string | `\"/bin/sh\"` |  |\n| probes.livenessProbe.exec.command[1] | string | `\"/usr/local/bin/healthcheck.sh\"` |  |\n| probes.livenessProbe.failureThreshold | int | `3` |  |\n| probes.livenessProbe.initialDelaySeconds | int | `10` |  |\n| probes.livenessProbe.periodSeconds | int | `10` |  |\n| probes.livenessProbe.successThreshold | int | `1` |  |\n| probes.livenessProbe.timeoutSeconds | int | `5` |  |\n| probes.readinessProbe.exec.command[0] | string | `\"/bin/sh\"` |  |\n| probes.readinessProbe.exec.command[1] | string | `\"/usr/local/bin/healthcheck.sh\"` |  |\n| probes.readinessProbe.failureThreshold | int | `3` |  |\n| probes.readinessProbe.initialDelaySeconds | int | `10` |  |\n| probes.readinessProbe.periodSeconds | int | `10` |  |\n| probes.readinessProbe.successThreshold | int | `1` |  |\n| probes.readinessProbe.timeoutSeconds | int | `5` |  |\n| prometheusRule.enabled | bool | `false` | Deploy a PrometheusRule |\n| prometheusRule.spec | list | `[]` | PrometheusRule.Spec https://awesome-prometheus-alerts.grep.to/rules |\n| replicaCount | int | `1` | Number of replicas to deploy |\n| resources.limits | object | `{}` | The resource limits for the containers |\n| resources.requests | object | `{}` | The requested resources for the containers |\n| env | list | `[]` | Extra environment variables |\n| envFrom | list | `[]` | Extra environment variables from K8s objects |\n| securityContext | object | `{}` | Set securityContext for containers |\n| service.annotations | object | `{}` | Extra annotations for the service |\n| service.labels | object | `{}` | Extra labels for the service |\n| service.metrics.portName | string | `\"metrics\"` | name for the metrics port |\n| service.metrics.serviceType | string | `\"ClusterIP\"` | serviceType for the metrics service |\n| service.port | int | `6379` | Dragonfly service port |\n| service.type | string | `\"ClusterIP\"` | Service type to provision. Can be NodePort, ClusterIP or LoadBalancer |\n| serviceAccount.annotations | object | `{}` | Annotations to add to the service account |\n| serviceAccount.create | bool | `true` | Specifies whether a service account should be created |\n| serviceAccount.name | string | `\"\"` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template |\n| serviceMonitor.annotations | object | `{}` | additional annotations to apply to the metrics |\n| serviceMonitor.enabled | bool | `false` | If true, a ServiceMonitor CRD is created for a prometheus operator |\n| serviceMonitor.interval | string | `\"10s\"` | scrape interval |\n| serviceMonitor.labels | object | `{}` | additional labels to apply to the metrics |\n| serviceMonitor.namespace | string | `\"\"` | namespace in which to deploy the ServiceMonitor CR. defaults to the application namespace |\n| serviceMonitor.scrapeTimeout | string | `\"10s\"` | scrape timeout |\n| storage.enabled | bool | `false` | If /data should persist. This will provision a StatefulSet instead. |\n| storage.requests | string | `\"128Mi\"` | Volume size to request for the PVC |\n| storage.storageClassName | string | `\"\"` | Global StorageClass for Persistent Volume(s) |\n| tls.cert | string | `\"\"` | TLS certificate |\n| tls.createCerts | bool | `false` | use cert-manager to automatically create the certificate |\n| tls.duration | string | `\"87600h0m0s\"` | duration or ttl of the validity of the created certificate |\n| tls.enabled | bool | `false` | enable TLS |\n| tls.existing_secret | string | `\"\"` | use TLS certificates from existing secret |\n| tls.issuer.kind | string | `\"ClusterIssuer\"` | cert-manager issuer kind. Usually Issuer or ClusterIssuer |\n| tls.issuer.name | string | `\"selfsigned\"` | name of the referenced issuer |\n| tls.key | string | `\"\"` | TLS private key |\n| tolerations | list | `[]` | Tolerations for pod assignment |\n\n----------------------------------------------\nAutogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/affinity-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      affinity:\n        podAntiAffinity:\n          preferredDuringSchedulingIgnoredDuringExecution:\n          - podAffinityTerm:\n              labelSelector:\n                matchExpressions:\n                - key: app.kubernetes.io/name\n                  operator: In\n                  values:\n                  - dragonfly\n              topologyKey: kubernetes.io/hostname\n            weight: 100\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/affinity-values.yaml",
    "content": "affinity:\n  podAntiAffinity:\n    preferredDuringSchedulingIgnoredDuringExecution:\n    - podAffinityTerm:\n        labelSelector:\n          matchExpressions:\n          - key: app.kubernetes.io/name\n            operator: In\n            values:\n            - dragonfly\n        topologyKey: kubernetes.io/hostname\n      weight: 100\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/command_extraargs-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          command:\n            - /usr/local/bin/dragonfly\n            - --logtostderr\n          args:\n            - \"--alsologtostderr\"\n            - --cache_mode=true\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/command_extraargs-values.yaml",
    "content": "command:\n  - /usr/local/bin/dragonfly\n  - --logtostderr\n\nextraArgs:\n  - --cache_mode=true\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/commonlabels-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n    project: cache-infrastructure\n    team: platform\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n    project: cache-infrastructure\n    team: platform\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n    project: cache-infrastructure\n    team: platform\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n        project: cache-infrastructure\n        team: platform\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/commonlabels-values.yaml",
    "content": "commonLabels:\n  team: platform\n  project: cache-infrastructure\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/extracontainer-string-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - args:\n          - -c\n          - date; sleep 3600;\n          command:\n          - /bin/sh\n          image: busybox:latest\n          name: sidecar-string\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/extracontainer-string-values.yaml",
    "content": "extraContainers:\n  - name: sidecar-string\n    image: busybox:latest\n    command: [\"/bin/sh\"]\n    args: [\"-c\", \"date; sleep 3600;\"]\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/extracontainer-tpl-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: sidecar-tpl\n          image: docker.dragonflydb.io/dragonflydb/dragonfly:latest\n          command: [\"/bin/sh\"]\n          args: [\"-c\", \"date; sleep 3600;\"]\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/extracontainer-tpl-values.yaml",
    "content": "extraContainers: |\n  - name: sidecar-tpl\n    image: {{ .Values.image.repository }}:latest\n    command: [\"/bin/sh\"]\n    args: [\"-c\", \"date; sleep 3600;\"]\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/extraenv-and-passwordSecret-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/extra-manifests.yaml\napiVersion: v1\nkind: Secret\nmetadata:\n  name: dfly-password\nstringData:\n  password: foobar\n---\n# Source: dragonfly/templates/extra-manifests.yaml\napiVersion: v1\nkind: Secret\nmetadata:\n  name: my-secret\nstringData:\n  password: password\n  username: username\ntype: Opaque\n---\n# Source: dragonfly/templates/extra-manifests.yaml\napiVersion: v1\ndata:\n  configKey1: configValue1\n  configKey2: configValue2\nkind: ConfigMap\nmetadata:\n  name: my-configmap\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n          \n          env:\n            - name: DFLY_requirepass\n              valueFrom:\n                secretKeyRef:\n                  name: dfly-password\n                  key: password\n            - name: ENV_VAR43\n              value: value1\n            - name: ENV_VAR323\n              value: value2\n          envFrom:\n            - configMapRef:\n                name: my-configmap\n            - secretRef:\n                name: my-secret\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/extraenv-and-passwordSecret-values.yaml",
    "content": "extraObjects:\n- apiVersion: v1\n  kind: Secret\n  metadata:\n    name: dfly-password\n  stringData:\n    password: foobar\n- apiVersion: v1\n  kind: ConfigMap\n  metadata:\n    name: my-configmap\n  data:\n    configKey1: configValue1\n    configKey2: configValue2\n- apiVersion: v1\n  kind: Secret\n  metadata:\n    name: my-secret\n  type: Opaque\n  stringData:\n    username: username\n    password: password\n\nenv:\n  - name: ENV_VAR43\n    value: value1\n  - name: ENV_VAR323\n    value: value2\n\nenvFrom:\n  - configMapRef:\n      name: my-configmap\n  - secretRef:\n      name: my-secret\n\npasswordFromSecret:\n  enable: true\n  existingSecret:\n    name: dfly-password\n    key: password\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/extraenv-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/extra-manifests.yaml\napiVersion: v1\nkind: Secret\nmetadata:\n  name: my-secret\nstringData:\n  password: password\n  username: username\ntype: Opaque\n---\n# Source: dragonfly/templates/extra-manifests.yaml\napiVersion: v1\ndata:\n  configKey1: configValue1\n  configKey2: configValue2\nkind: ConfigMap\nmetadata:\n  name: my-configmap\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n          \n          env:\n            - name: ENV_VAR43\n              value: value1\n            - name: ENV_VAR323\n              value: value2\n          envFrom:\n            - configMapRef:\n                name: my-configmap\n            - secretRef:\n                name: my-secret\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/extraenv-values.yaml",
    "content": "extraObjects:\n- apiVersion: v1\n  kind: ConfigMap\n  metadata:\n    name: my-configmap\n  data:\n    configKey1: configValue1\n    configKey2: configValue2\n- apiVersion: v1\n  kind: Secret\n  metadata:\n    name: my-secret\n  type: Opaque\n  stringData:\n    username: username\n    password: password\n\nenv:\n  - name: ENV_VAR43\n    value: value1\n  - name: ENV_VAR323\n    value: value2\n\nenvFrom:\n  - configMapRef:\n      name: my-configmap\n  - secretRef:\n      name: my-secret\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/extravolumes-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n          volumeMounts:\n            - mountPath: /tmp\n              name: tmp\n      volumes:\n        - emptyDir:\n            sizeLimit: 500Mi\n          name: tmp\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/extravolumes-values.yaml",
    "content": "extraVolumes:\n  - name: tmp\n    emptyDir:\n      sizeLimit: 500Mi\n\nextraVolumeMounts:\n  - mountPath: /tmp\n    name: tmp\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/initcontainer-string-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      initContainers:\n        - args:\n          - -c\n          - date; sleep 1;\n          command:\n          - /bin/sh\n          image: busybox:1.28\n          name: initcontainer-string\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/initcontainer-string-values.yaml",
    "content": "initContainers:\n  - name: initcontainer-string\n    image: busybox:1.28\n    command: [\"/bin/sh\"]\n    args: [\"-c\", \"date; sleep 1;\"]\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/initcontainer-tpl-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      initContainers:\n        - name: initcontainer-tpl\n          image: docker.dragonflydb.io/dragonflydb/dragonfly:latest\n          command: [\"/bin/sh\"]\n          args: [\"-c\", \"date; sleep 1;\"]\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/initcontainer-tpl-values.yaml",
    "content": "initContainers: |\n  - name: initcontainer-tpl\n    image: {{ .Values.image.repository }}:latest\n    command: [\"/bin/sh\"]\n    args: [\"-c\", \"date; sleep 1;\"]\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/password-old-env-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/extra-manifests.yaml\napiVersion: v1\nkind: Secret\nmetadata:\n  name: dfly-password\nstringData:\n  password: foobar\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.13.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n          \n          env:\n            - name: DFLY_PASSWORD\n              valueFrom:\n                secretKeyRef:\n                  name: dfly-password\n                  key: password\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/password-old-env-values.yaml",
    "content": "image:\n  tag: \"v1.13.0\"\n\nextraObjects:\n  - apiVersion: v1\n    kind: Secret\n    metadata:\n      name: dfly-password\n    stringData:\n      password: foobar\n\npasswordFromSecret:\n  enable: true\n  existingSecret:\n    name: dfly-password\n    key: password\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/passwordsecret-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/extra-manifests.yaml\napiVersion: v1\nkind: Secret\nmetadata:\n  name: dfly-password\nstringData:\n  password: foobar\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n          \n          env:\n            - name: DFLY_requirepass\n              valueFrom:\n                secretKeyRef:\n                  name: dfly-password\n                  key: password\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/passwordsecret-values.tpl.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/extra-manifests.yaml\napiVersion: v1\nkind: Secret\nmetadata:\n  name: dragonfly-password\nstringData:\n  password: foobar\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n          \n          env:\n            - name: DFLY_requirepass\n              valueFrom:\n                secretKeyRef:\n                  name: dragonfly-password\n                  key: password\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/passwordsecret-values.tpl.yaml",
    "content": "extraObjects:\n- apiVersion: v1\n  kind: Secret\n  metadata:\n    name: dragonfly-password\n  stringData:\n    password: foobar\n\npasswordFromSecret:\n  enable: true\n  existingSecret:\n    name: '{{ include \"dragonfly.name\" $ }}-password'\n    key: password\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/passwordsecret-values.yaml",
    "content": "extraObjects:\n- apiVersion: v1\n  kind: Secret\n  metadata:\n    name: dfly-password\n  stringData:\n    password: foobar\n\npasswordFromSecret:\n  enable: true\n  existingSecret:\n    name: dfly-password\n    key: password\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/persistence-and-existing-secret.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/extra-manifests.yaml\napiVersion: v1\nkind: Secret\nmetadata:\n  name: dfly-password\nstringData:\n  password: foobar\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/statefulset.yaml\napiVersion: apps/v1\nkind: StatefulSet\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  serviceName: test\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n          volumeMounts:\n            - mountPath: /data\n              name: \"test-data\"\n          env:\n            - name: DFLY_requirepass\n              valueFrom:\n                secretKeyRef:\n                  name: dfly-password\n                  key: password\n  volumeClaimTemplates:\n    - metadata:\n        name: \"test-data\"\n      spec:\n        accessModes: [ \"ReadWriteOnce\" ]\n        storageClassName: standard\n        resources:\n          requests:\n            storage: 128Mi\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/persistence-and-existing-secret.yaml",
    "content": "storage:\n  enabled: true\n  storageClassName: \"standard\"\n  requests: 128Mi\n\nextraObjects:\n- apiVersion: v1\n  kind: Secret\n  metadata:\n    name: dfly-password\n  stringData:\n    password: foobar\n\npasswordFromSecret:\n  enable: true\n  existingSecret:\n    name: dfly-password\n    key: password\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/persistent-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/statefulset.yaml\napiVersion: apps/v1\nkind: StatefulSet\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  serviceName: test\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n          volumeMounts:\n            - mountPath: /data\n              name: \"test-data\"\n  volumeClaimTemplates:\n    - metadata:\n        name: \"test-data\"\n      spec:\n        accessModes: [ \"ReadWriteOnce\" ]\n        storageClassName: standard\n        resources:\n          requests:\n            storage: 128Mi\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/persistent-values.yaml",
    "content": "storage:\n  enabled: true\n  storageClassName: \"standard\"\n  requests: 128Mi\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/priorityclassname-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/extra-manifests.yaml\napiVersion: scheduling.k8s.io/v1\ndescription: This priority class should be used only for tests.\nglobalDefault: false\nkind: PriorityClass\nmetadata:\n  name: high-priority\nvalue: 1000000\n---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      priorityClassName: high-priority\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/priorityclassname-values.yaml",
    "content": "priorityClassName: \"high-priority\"\n\nextraObjects:\n  - apiVersion: scheduling.k8s.io/v1\n    kind: PriorityClass\n    metadata:\n      name: high-priority\n    value: 1000000\n    globalDefault: false\n    description: \"This priority class should be used only for tests.\"\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/prometheusrules-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/metrics-service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly-metrics\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n    type: metrics\nspec:\n  type: ClusterIP\n  ports:\n    - name: metrics\n      port: 6379\n      targetPort: 6379\n      protocol: TCP\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n---\n# Source: dragonfly/templates/servicemonitor.yaml\napiVersion: monitoring.coreos.com/v1\nkind: ServiceMonitor\nmetadata:\n  name: test-dragonfly-metrics\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  endpoints:\n    - interval: 10s\n      scrapeTimeout: 10s\n      honorLabels: true\n      port: metrics\n      path: /metrics\n      scheme: http\n  jobLabel: \"test\"\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n      type: metrics\n  namespaceSelector:\n    matchNames:\n      - default\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/prometheusrules-values.yaml",
    "content": "serviceMonitor:\n  enabled: true\nprometheusRule:\n  enabled: true\n  namespace: default\n  spec:\n    - alert: RedisDown\n      expr: absent(dragonfly_master > 0)\n      for: 0m\n      labels:\n        severity: critical\n      annotations:\n        summary: Redis instance is down\n        description: >\n          \"Redis instance is down\"\n        runbook_url: \"https://octopus.com/docs/runbooks/runbook-examples\"\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/resources-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits:\n              cpu: 100m\n              memory: 400Mi\n            requests:\n              cpu: 100m\n              memory: 300Mi\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/resources-values.yaml",
    "content": "resources:\n  requests:\n    cpu: 100m\n    memory: 300Mi\n  limits:\n    cpu: 100m\n    memory: 400Mi\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/securitycontext-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          securityContext:\n            allowPrivilegeEscalation: false\n            readOnlyRootFilesystem: true\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/securitycontext-values.yaml",
    "content": "podSecurityContext: {}\n\nsecurityContext:\n  allowPrivilegeEscalation: false\n  readOnlyRootFilesystem: true\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/service-loadbalancer-ip.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: LoadBalancer\n  loadBalancerIP: 127.0.0.1\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/service-loadbalancer-ip.yaml",
    "content": "service:\n  type: LoadBalancer\n  loadBalancerIP: \"127.0.0.1\""
  },
  {
    "path": "contrib/charts/dragonfly/ci/service-monitor-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/metrics-service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly-metrics\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n    type: metrics\nspec:\n  type: ClusterIP\n  ports:\n    - name: metrics\n      port: 6379\n      targetPort: 6379\n      protocol: TCP\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n---\n# Source: dragonfly/templates/servicemonitor.yaml\napiVersion: monitoring.coreos.com/v1\nkind: ServiceMonitor\nmetadata:\n  name: test-dragonfly-metrics\n  namespace: default\n  labels:\n    release: prometheus-stack\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  endpoints:\n    - interval: 10s\n      scrapeTimeout: 10s\n      honorLabels: true\n      port: metrics\n      path: /metrics\n      scheme: http\n  jobLabel: \"test\"\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n      type: metrics\n  namespaceSelector:\n    matchNames:\n      - default\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/service-monitor-values.yaml",
    "content": "serviceMonitor:\n  enabled: true\n  namespace: \"\"\n  labels:\n    release: prometheus-stack\n  annotations: {}\n  interval: 10s\n  scrapeTimeout: 10s\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/taints-tolerations-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      tolerations:\n        - effect: NoSchedule\n          key: key/high-memory\n          operator: Equal\n          value: \"true\"\n        - effect: PreferNoSchedule\n          key: key/high-memory\n          operator: Equal\n          value: \"true\"\n      affinity:\n        nodeAffinity:\n          requiredDuringSchedulingIgnoredDuringExecution:\n            nodeSelectorTerms:\n            - matchExpressions:\n              - key: key/node-kind\n                operator: In\n                values:\n                - high-memory\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/taints-tolerations-values.yaml",
    "content": "tolerations:\n  - key: key/high-memory\n    operator: \"Equal\"\n    value: \"true\"\n    effect: \"NoSchedule\"\n  - key: key/high-memory\n    operator: \"Equal\"\n    value: \"true\"\n    effect: \"PreferNoSchedule\"\naffinity:\n  nodeAffinity:\n    requiredDuringSchedulingIgnoredDuringExecution:\n      nodeSelectorTerms:\n        - matchExpressions:\n            - key: key/node-kind\n              operator: In\n              values:\n                - high-memory\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/tls-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/extra-manifests.yaml\napiVersion: v1\nkind: Secret\nmetadata:\n  name: dfly-password\nstringData:\n  password: foobar\n---\n# Source: dragonfly/templates/tls-secret.yaml\napiVersion: v1\nkind: Secret\nmetadata:\n  name: test-dragonfly-tls\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\ntype: kubernetes.io/tls\ndata:\n  tls.crt: \"LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUI4ekNDQVpxZ0F3SUJBZ0lFYmIyWjJqQUtCZ2dxaGtqT1BRUURBekJaTVFzd0NRWURWUVFHRXdKR1R6RWcKTUI0R0ExVUVBd3dYWkhKaFoyOXVabXg1TG1SeVlXZHZibVpzZVM1emRtTXhEREFLQmdOVkJBZ01BMlp2YnpFTQpNQW9HQTFVRUJ3d0RabTl2TVF3d0NnWURWUVFLREFObWIyOHdIaGNOTWpJeE1qSTVNVEl3TXpJM1doY05Nekl4Ck1qSTJNVEl3TXpJM1dqQlpNUXN3Q1FZRFZRUUdFd0pHVHpFZ01CNEdBMVVFQXd3WFpISmhaMjl1Wm14NUxtUnkKWVdkdmJtWnNlUzV6ZG1NeEREQUtCZ05WQkFnTUEyWnZiekVNTUFvR0ExVUVCd3dEWm05dk1Rd3dDZ1lEVlFRSwpEQU5tYjI4d1dUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFRV05mVHVOamhQRWk3aDFjaUNTMEl0CmZLZ2lCaHhMR2xGM010amxGVGpDcnpreW5TU0FCb010TmxqY0RFMGhtL2l6YlJVb2dBY0RGY3ZrbnZDaHp4YXEKbzFBd1RqQWRCZ05WSFE0RUZnUVVTTjZGYnNKWjJFVWZYM2JlQ2g1Y0VvNmNrdFF3SHdZRFZSMGpCQmd3Rm9BVQpTTjZGYnNKWjJFVWZYM2JlQ2g1Y0VvNmNrdFF3REFZRFZSMFRCQVV3QXdFQi96QUtCZ2dxaGtqT1BRUURBd05ICkFEQkVBaUI2dEc1eHp5ajRpVC9lMHdwQ01SSE92bFFLUWV4QnloeU5QQWhybzlaQ1JnSWdhRGNkOXZNOHJDYmIKSlBSeXptMGlOOU9XTS9BMjRubW0zaXRuM0k0cmNEMD0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=\"\n  tls.key: \"LS0tLS1CRUdJTiBFQyBQUklWQVRFIEtFWS0tLS0tCk1IY0NBUUVFSU5oNmVNRHJCbEFpVDY4VDhvdnpHbjZKWmJKZXZVZWZZa0lJWU5Xd3c1NXlvQW9HQ0NxR1NNNDkKQXdFSG9VUURRZ0FFRmpYMDdqWTRUeEl1NGRYSWdrdENMWHlvSWdZY1N4cFJkekxZNVJVNHdxODVNcDBrZ0FhRApMVFpZM0F4TkladjRzMjBWS0lBSEF4WEw1Sjd3b2M4V3FnPT0KLS0tLS1FTkQgRUMgUFJJVkFURSBLRVktLS0tLQo=\"\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n        checksum/tls-secret: b97190b6585f160d4f709b965d275564bb51cd19202c6e014e1d42a972446a5c\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n            - \"--tls\"\n            - \"--tls_cert_file=/etc/dragonfly/tls/tls.crt\"\n            - \"--tls_key_file=/etc/dragonfly/tls/tls.key\"\n          resources:\n            limits: {}\n            requests: {}\n          volumeMounts:\n            - mountPath: /etc/dragonfly/tls\n              name: tls\n          env:\n            - name: DFLY_requirepass\n              valueFrom:\n                secretKeyRef:\n                  name: dfly-password\n                  key: password\n      volumes:\n        - name: tls\n          secret:\n            secretName: test-dragonfly-tls\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/tls-values.yaml",
    "content": "tls:\n  enabled: true\n  existing_secret: \"\"\n  cert: |\n    -----BEGIN CERTIFICATE-----\n    MIIB8zCCAZqgAwIBAgIEbb2Z2jAKBggqhkjOPQQDAzBZMQswCQYDVQQGEwJGTzEg\n    MB4GA1UEAwwXZHJhZ29uZmx5LmRyYWdvbmZseS5zdmMxDDAKBgNVBAgMA2ZvbzEM\n    MAoGA1UEBwwDZm9vMQwwCgYDVQQKDANmb28wHhcNMjIxMjI5MTIwMzI3WhcNMzIx\n    MjI2MTIwMzI3WjBZMQswCQYDVQQGEwJGTzEgMB4GA1UEAwwXZHJhZ29uZmx5LmRy\n    YWdvbmZseS5zdmMxDDAKBgNVBAgMA2ZvbzEMMAoGA1UEBwwDZm9vMQwwCgYDVQQK\n    DANmb28wWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAAQWNfTuNjhPEi7h1ciCS0It\n    fKgiBhxLGlF3MtjlFTjCrzkynSSABoMtNljcDE0hm/izbRUogAcDFcvknvChzxaq\n    o1AwTjAdBgNVHQ4EFgQUSN6FbsJZ2EUfX3beCh5cEo6cktQwHwYDVR0jBBgwFoAU\n    SN6FbsJZ2EUfX3beCh5cEo6cktQwDAYDVR0TBAUwAwEB/zAKBggqhkjOPQQDAwNH\n    ADBEAiB6tG5xzyj4iT/e0wpCMRHOvlQKQexByhyNPAhro9ZCRgIgaDcd9vM8rCbb\n    JPRyzm0iN9OWM/A24nmm3itn3I4rcD0=\n    -----END CERTIFICATE-----\n\n  key: |\n    -----BEGIN EC PRIVATE KEY-----\n    MHcCAQEEINh6eMDrBlAiT68T8ovzGn6JZbJevUefYkIIYNWww55yoAoGCCqGSM49\n    AwEHoUQDQgAEFjX07jY4TxIu4dXIgktCLXyoIgYcSxpRdzLY5RU4wq85Mp0kgAaD\n    LTZY3AxNIZv4s20VKIAHAxXL5J7woc8Wqg==\n    -----END EC PRIVATE KEY-----\n\nextraObjects:\n- apiVersion: v1\n  kind: Secret\n  metadata:\n    name: dfly-password\n  stringData:\n    password: foobar\n\npasswordFromSecret:\n  enable: true\n  existingSecret:\n    name: dfly-password\n    key: password\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/tolerations-values.golden.yaml",
    "content": "---\n# Source: dragonfly/templates/serviceaccount.yaml\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\n---\n# Source: dragonfly/templates/service.yaml\napiVersion: v1\nkind: Service\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  type: ClusterIP\n  ports:\n    - port: 6379\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n---\n# Source: dragonfly/templates/deployment.yaml\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: test-dragonfly\n  namespace: default\n  labels:\n    app.kubernetes.io/name: dragonfly\n    app.kubernetes.io/instance: test\n    app.kubernetes.io/version: \"v1.37.0\"\n    app.kubernetes.io/managed-by: Helm\nspec:\n  replicas: 1\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: dragonfly\n      app.kubernetes.io/instance: test\n  template:\n    metadata:\n      annotations:\n      labels:\n        app.kubernetes.io/name: dragonfly\n        app.kubernetes.io/instance: test\n    spec:\n      tolerations:\n        - effect: NoSchedule\n          operator: Exists\n      serviceAccountName: test-dragonfly\n      containers:\n        - name: dragonfly\n          image: \"docker.dragonflydb.io/dragonflydb/dragonfly:v1.37.0\"\n          imagePullPolicy: IfNotPresent\n          ports:\n            - name: dragonfly\n              containerPort: 6379\n              protocol: TCP\n          livenessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          readinessProbe:\n            exec:\n              command:\n              - /bin/sh\n              - /usr/local/bin/healthcheck.sh\n            failureThreshold: 3\n            initialDelaySeconds: 10\n            periodSeconds: 10\n            successThreshold: 1\n            timeoutSeconds: 5\n          args:\n            - \"--alsologtostderr\"\n          resources:\n            limits: {}\n            requests: {}\n"
  },
  {
    "path": "contrib/charts/dragonfly/ci/tolerations-values.yaml",
    "content": "tolerations:\n  - effect: NoSchedule\n    operator: Exists\n"
  },
  {
    "path": "contrib/charts/dragonfly/go.mod",
    "content": "module dragonfly\n\ngo 1.24.0\n\ntoolchain go1.24.7\n\nrequire github.com/gruntwork-io/terratest v0.51.0\n\nrequire (\n\tfilippo.io/edwards25519 v1.1.0 // indirect\n\tgithub.com/BurntSushi/toml v1.5.0 // indirect\n\tgithub.com/aws/aws-sdk-go-v2 v1.39.1 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/config v1.31.10 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/credentials v1.18.14 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.8 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/feature/s3/manager v1.19.8 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/internal/configsources v1.4.8 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.8 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/internal/v4a v1.4.8 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/acm v1.37.5 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/autoscaling v1.59.2 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.58.1 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/dynamodb v1.50.4 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/ec2 v1.254.0 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/ecr v1.50.4 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/ecs v1.64.1 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/iam v1.47.6 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.8 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.11.8 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.8 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.8 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/kms v1.45.5 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/lambda v1.77.5 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/rds v1.107.1 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/route53 v1.58.3 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/s3 v1.88.2 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/secretsmanager v1.39.5 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/sns v1.38.4 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/sqs v1.42.7 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/ssm v1.65.0 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/sso v1.29.4 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.0 // indirect\n\tgithub.com/aws/aws-sdk-go-v2/service/sts v1.38.5 // indirect\n\tgithub.com/aws/smithy-go v1.23.0 // indirect\n\tgithub.com/boombuler/barcode v1.1.0 // indirect\n\tgithub.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect\n\tgithub.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect\n\tgithub.com/emicklei/go-restful/v3 v3.13.0 // indirect\n\tgithub.com/fxamacker/cbor/v2 v2.9.0 // indirect\n\tgithub.com/go-errors/errors v1.5.1 // indirect\n\tgithub.com/go-logr/logr v1.4.3 // indirect\n\tgithub.com/go-openapi/jsonpointer v0.22.0 // indirect\n\tgithub.com/go-openapi/jsonreference v0.21.1 // indirect\n\tgithub.com/go-openapi/swag v0.25.0 // indirect\n\tgithub.com/go-openapi/swag/cmdutils v0.25.0 // indirect\n\tgithub.com/go-openapi/swag/conv v0.25.0 // indirect\n\tgithub.com/go-openapi/swag/fileutils v0.25.0 // indirect\n\tgithub.com/go-openapi/swag/jsonname v0.25.0 // indirect\n\tgithub.com/go-openapi/swag/jsonutils v0.25.0 // indirect\n\tgithub.com/go-openapi/swag/loading v0.25.0 // indirect\n\tgithub.com/go-openapi/swag/mangling v0.25.0 // indirect\n\tgithub.com/go-openapi/swag/netutils v0.25.0 // indirect\n\tgithub.com/go-openapi/swag/stringutils v0.25.0 // indirect\n\tgithub.com/go-openapi/swag/typeutils v0.25.0 // indirect\n\tgithub.com/go-openapi/swag/yamlutils v0.25.0 // indirect\n\tgithub.com/go-sql-driver/mysql v1.9.3 // indirect\n\tgithub.com/gogo/protobuf v1.3.2 // indirect\n\tgithub.com/gonvenience/bunt v1.4.2 // indirect\n\tgithub.com/gonvenience/idem v0.0.2 // indirect\n\tgithub.com/gonvenience/neat v1.3.16 // indirect\n\tgithub.com/gonvenience/term v1.0.4 // indirect\n\tgithub.com/gonvenience/text v1.0.9 // indirect\n\tgithub.com/gonvenience/ytbx v1.4.7 // indirect\n\tgithub.com/google/gnostic-models v0.7.0 // indirect\n\tgithub.com/google/uuid v1.6.0 // indirect\n\tgithub.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect\n\tgithub.com/gruntwork-io/go-commons v0.17.2 // indirect\n\tgithub.com/hashicorp/errwrap v1.1.0 // indirect\n\tgithub.com/hashicorp/go-multierror v1.1.1 // indirect\n\tgithub.com/homeport/dyff v1.10.2 // indirect\n\tgithub.com/jackc/pgpassfile v1.0.0 // indirect\n\tgithub.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect\n\tgithub.com/jackc/pgx/v5 v5.7.6 // indirect\n\tgithub.com/jackc/puddle/v2 v2.2.2 // indirect\n\tgithub.com/json-iterator/go v1.1.12 // indirect\n\tgithub.com/lucasb-eyer/go-colorful v1.3.0 // indirect\n\tgithub.com/mattn/go-ciede2000 v0.0.0-20170301095244-782e8c62fec3 // indirect\n\tgithub.com/mattn/go-isatty v0.0.20 // indirect\n\tgithub.com/mattn/go-zglob v0.0.6 // indirect\n\tgithub.com/mitchellh/go-homedir v1.1.0 // indirect\n\tgithub.com/mitchellh/go-ps v1.0.0 // indirect\n\tgithub.com/mitchellh/hashstructure v1.1.0 // indirect\n\tgithub.com/moby/spdystream v0.5.0 // indirect\n\tgithub.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect\n\tgithub.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect\n\tgithub.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect\n\tgithub.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect\n\tgithub.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect\n\tgithub.com/pquerna/otp v1.5.0 // indirect\n\tgithub.com/russross/blackfriday/v2 v2.1.0 // indirect\n\tgithub.com/sergi/go-diff v1.4.0 // indirect\n\tgithub.com/spf13/pflag v1.0.10 // indirect\n\tgithub.com/stretchr/testify v1.11.1 // indirect\n\tgithub.com/texttheater/golang-levenshtein v1.0.1 // indirect\n\tgithub.com/urfave/cli/v2 v2.27.7 // indirect\n\tgithub.com/virtuald/go-ordered-json v0.0.0-20170621173500-b18e6e673d74 // indirect\n\tgithub.com/x448/float16 v0.8.4 // indirect\n\tgithub.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect\n\tgo.yaml.in/yaml/v2 v2.4.3 // indirect\n\tgo.yaml.in/yaml/v3 v3.0.4 // indirect\n\tgolang.org/x/crypto v0.42.0 // indirect\n\tgolang.org/x/exp v0.0.0-20250911091902-df9299821621 // indirect\n\tgolang.org/x/net v0.44.0 // indirect\n\tgolang.org/x/oauth2 v0.31.0 // indirect\n\tgolang.org/x/sync v0.17.0 // indirect\n\tgolang.org/x/sys v0.36.0 // indirect\n\tgolang.org/x/term v0.35.0 // indirect\n\tgolang.org/x/text v0.29.0 // indirect\n\tgolang.org/x/time v0.13.0 // indirect\n\tgoogle.golang.org/protobuf v1.36.9 // indirect\n\tgopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect\n\tgopkg.in/inf.v0 v0.9.1 // indirect\n\tgopkg.in/yaml.v2 v2.4.0 // indirect\n\tgopkg.in/yaml.v3 v3.0.1 // indirect\n\tk8s.io/api v0.34.1 // indirect\n\tk8s.io/apimachinery v0.34.1 // indirect\n\tk8s.io/client-go v0.34.1 // indirect\n\tk8s.io/klog/v2 v2.130.1 // indirect\n\tk8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect\n\tk8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d // indirect\n\tsigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect\n\tsigs.k8s.io/randfill v1.0.0 // indirect\n\tsigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect\n\tsigs.k8s.io/yaml v1.6.0 // indirect\n)\n"
  },
  {
    "path": "contrib/charts/dragonfly/go.sum",
    "content": "filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=\nfilippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=\ngithub.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg=\ngithub.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=\ngithub.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=\ngithub.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=\ngithub.com/aws/aws-sdk-go-v2 v1.39.1 h1:fWZhGAwVRK/fAN2tmt7ilH4PPAE11rDj7HytrmbZ2FE=\ngithub.com/aws/aws-sdk-go-v2 v1.39.1/go.mod h1:sDioUELIUO9Znk23YVmIk86/9DOpkbyyVb1i/gUNFXY=\ngithub.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1 h1:i8p8P4diljCr60PpJp6qZXNlgX4m2yQFpYk+9ZT+J4E=\ngithub.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1/go.mod h1:ddqbooRZYNoJ2dsTwOty16rM+/Aqmk/GOXrK8cg7V00=\ngithub.com/aws/aws-sdk-go-v2/config v1.31.10 h1:7LllDZAegXU3yk41mwM6KcPu0wmjKGQB1bg99bNdQm4=\ngithub.com/aws/aws-sdk-go-v2/config v1.31.10/go.mod h1:Ge6gzXPjqu4v0oHvgAwvGzYcK921GU0hQM25WF/Kl+8=\ngithub.com/aws/aws-sdk-go-v2/credentials v1.18.14 h1:TxkI7QI+sFkTItN/6cJuMZEIVMFXeu2dI1ZffkXngKI=\ngithub.com/aws/aws-sdk-go-v2/credentials v1.18.14/go.mod h1:12x4Uw/vijC11XkctTjy92TNCQ+UnNJkT7fzX0Yd93E=\ngithub.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.8 h1:gLD09eaJUdiszm7vd1btiQUYE0Hj+0I2b8AS+75z9AY=\ngithub.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.8/go.mod h1:4RW3oMPt1POR74qVOC4SbubxAwdP4pCT0nSw3jycOU4=\ngithub.com/aws/aws-sdk-go-v2/feature/s3/manager v1.19.8 h1:QcAh/TNGM3MWe95ilMWwnieXWXsyM33Mb/RuTGlWLm4=\ngithub.com/aws/aws-sdk-go-v2/feature/s3/manager v1.19.8/go.mod h1:72m/ZCCgYpXJzsgI8uJFYMnXEjtZ4kkaolL9NRXLSnU=\ngithub.com/aws/aws-sdk-go-v2/internal/configsources v1.4.8 h1:6bgAZgRyT4RoFWhxS+aoGMFyE0cD1bSzFnEEi4bFPGI=\ngithub.com/aws/aws-sdk-go-v2/internal/configsources v1.4.8/go.mod h1:KcGkXFVU8U28qS4KvLEcPxytPZPBcRawaH2Pf/0jptE=\ngithub.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.8 h1:HhJYoES3zOz34yWEpGENqJvRVPqpmJyR3+AFg9ybhdY=\ngithub.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.8/go.mod h1:JnA+hPWeYAVbDssp83tv+ysAG8lTfLVXvSsyKg/7xNA=\ngithub.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo=\ngithub.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo=\ngithub.com/aws/aws-sdk-go-v2/internal/v4a v1.4.8 h1:1/bT9kDdLQzfZ1e6J6hpW+SfNDd6xrV8F3M2CuGyUz8=\ngithub.com/aws/aws-sdk-go-v2/internal/v4a v1.4.8/go.mod h1:RbdwTONAIi59ej/+1H+QzZORt5bcyAtbrS7FQb2pvz0=\ngithub.com/aws/aws-sdk-go-v2/service/acm v1.37.5 h1:vTmyvkmMJEKZgyhSuaEv8gZCJJlgNpSpYy/4CExjHoA=\ngithub.com/aws/aws-sdk-go-v2/service/acm v1.37.5/go.mod h1:TmyW/AiLmFEXwFsm5hh2T86BpgFbcB1icshuzFu8LgY=\ngithub.com/aws/aws-sdk-go-v2/service/autoscaling v1.59.2 h1:YOWVoIjUoiwAVIRVU3PG2yNldh9dQT5OegnO99RO4ls=\ngithub.com/aws/aws-sdk-go-v2/service/autoscaling v1.59.2/go.mod h1:t08UbddtoRQcKiIW2ZTfxX5x6vRaTj6KrKcf1R0I4tw=\ngithub.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.58.1 h1:JMYpgsJ31l0wjJCerJtIBo39HznZJ/ENJJzOSTcJh68=\ngithub.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.58.1/go.mod h1:zqtpx8Y/EydPCFy5MA9AJJBfJ+mCQz8BNHj2CvDvaYA=\ngithub.com/aws/aws-sdk-go-v2/service/dynamodb v1.50.4 h1:3EE5TTeBHPTKQNNeIHdXcJ6ENDsN7c2rCQUtbdolwV8=\ngithub.com/aws/aws-sdk-go-v2/service/dynamodb v1.50.4/go.mod h1:8rWv4Lq/jrlspgd/wpdFeKrxLByJlfpFEk9g0Tw5iOw=\ngithub.com/aws/aws-sdk-go-v2/service/ec2 v1.254.0 h1:fTLR6dLDTGChAjecRPlVrKeznT0rVdzR4yn9Z68MTGk=\ngithub.com/aws/aws-sdk-go-v2/service/ec2 v1.254.0/go.mod h1:V0jbRy1/IPapnkqgXSwVOFB+u5pnCwd9S+R3pKWULC4=\ngithub.com/aws/aws-sdk-go-v2/service/ecr v1.50.4 h1:kPe1ZLqERYZxxDi6ysoX4oYavSJ6lkGaadsN1ogg3I8=\ngithub.com/aws/aws-sdk-go-v2/service/ecr v1.50.4/go.mod h1:cAJR/1pLXISKFSSJsrsTZPw05PLL5xOIpbbzxM7GLiI=\ngithub.com/aws/aws-sdk-go-v2/service/ecs v1.64.1 h1:kAzHjjqQnu3ET5/cX1N5tKPqtExYk97wpD6MpRadq/A=\ngithub.com/aws/aws-sdk-go-v2/service/ecs v1.64.1/go.mod h1:HIaZTpBD7+mgQEIv2wMzXYJw2T23sMFVNp2Mkw/ODFk=\ngithub.com/aws/aws-sdk-go-v2/service/iam v1.47.6 h1:EWehQXACWr+6hzfZPwZChlfoVhiUCfLHE0Xh3kAfzWQ=\ngithub.com/aws/aws-sdk-go-v2/service/iam v1.47.6/go.mod h1:qRXgEBWPIltrWHQwU+HkyBvwh1QgeigFcaCGCIVrWk0=\ngithub.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 h1:oegbebPEMA/1Jny7kvwejowCaHz1FWZAQ94WXFNCyTM=\ngithub.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1/go.mod h1:kemo5Myr9ac0U9JfSjMo9yHLtw+pECEHsFtJ9tqCEI8=\ngithub.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.8 h1:tIN8MFT1z5STK5kTdOT1TCfMN/bn5fSEnlKsTL8qBOU=\ngithub.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.8/go.mod h1:VKS56txtNWjKI8FqD/hliL0BcshyF4ZaLBa1rm2Y+5s=\ngithub.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.11.8 h1:0lJ7+zL81zesTu1nd1ocKpEoYi6BqDppjoAJLn18Vr0=\ngithub.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.11.8/go.mod h1:5t+iImUczd3RYSVnc20t/ohBrmrkpdcy89pm62BSDQo=\ngithub.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.8 h1:M6JI2aGFEzYxsF6CXIuRBnkge9Wf9a2xU39rNeXgu10=\ngithub.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.8/go.mod h1:Fw+MyTwlwjFsSTE31mH211Np+CUslml8mzc0AFEG09s=\ngithub.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.8 h1:AgYCo1Rb8XChJXA871BXHDNxNWOTAr6V5YdsRIBbgv0=\ngithub.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.8/go.mod h1:Au9dvIGm1Hbqnt29d3VakOCQuN9l0WrkDDTRq8biWS4=\ngithub.com/aws/aws-sdk-go-v2/service/kms v1.45.5 h1:5AsmehPcxIp+Y8GVRa91UKpu3AO1gxhdckippth6bnA=\ngithub.com/aws/aws-sdk-go-v2/service/kms v1.45.5/go.mod h1:ooAdc5n3rjgEznIXncCYY6V9+YQDcJAYyZDJ4TwLSDM=\ngithub.com/aws/aws-sdk-go-v2/service/lambda v1.77.5 h1:rKc5Ad3PJlXGo5pigWii+m/hSPgxbNJtOicEP5nbV2E=\ngithub.com/aws/aws-sdk-go-v2/service/lambda v1.77.5/go.mod h1:fPYDox6U6puh6xhMyWpUWd19QIIqMlcQ6iCdC1jk2cE=\ngithub.com/aws/aws-sdk-go-v2/service/rds v1.107.1 h1:j7GQZWF0CbHCObPEZUK6QuP3yUQwjBJmlaojHPRZ6f8=\ngithub.com/aws/aws-sdk-go-v2/service/rds v1.107.1/go.mod h1:OW/mwGWAs6l1HnZpJupatcUFt1V0y6OiUMUp+Wd0DEc=\ngithub.com/aws/aws-sdk-go-v2/service/route53 v1.58.3 h1:jQzRC+0eI/l5mFXVoPTyyolrqyZtKIYaKHSuKJoIJKs=\ngithub.com/aws/aws-sdk-go-v2/service/route53 v1.58.3/go.mod h1:1GNaojT/gG4Ru9tT39ton6kRZ3FvptJ/QRKBoqUOVX4=\ngithub.com/aws/aws-sdk-go-v2/service/s3 v1.88.2 h1:T7b3qniouutV5Wwa9B1q7gW+Y8s1B3g9RE9qa7zLBIM=\ngithub.com/aws/aws-sdk-go-v2/service/s3 v1.88.2/go.mod h1:tW9TsLb6t1eaTdBE6LITyJW1m/+DjQPU78Q/jT2FJu8=\ngithub.com/aws/aws-sdk-go-v2/service/secretsmanager v1.39.5 h1:ssRo1z8FdFaoZc1AWz1R6/amdsxy56akVPql15/AYSs=\ngithub.com/aws/aws-sdk-go-v2/service/secretsmanager v1.39.5/go.mod h1:ut4ISJEOb5t2M1DNfx1787tF3UJGlwF3Q97uEulV/lU=\ngithub.com/aws/aws-sdk-go-v2/service/sns v1.38.4 h1:MkaMcZGwW9vt0cW+N2i5JSF/zkxKyDqpGCP1VWip3YM=\ngithub.com/aws/aws-sdk-go-v2/service/sns v1.38.4/go.mod h1:S0rwG+VHP1/jKoT6xJDe8f8Apz9HO42dUI8DmnOzYYU=\ngithub.com/aws/aws-sdk-go-v2/service/sqs v1.42.7 h1:KZldI+77SMG8vHDE55HYSjPcKSeOy2WIRo+HtIz2IY8=\ngithub.com/aws/aws-sdk-go-v2/service/sqs v1.42.7/go.mod h1:wbgNsM9psd+xQtLSDUAICjFCT/HXNZIgx3qyjqQNt88=\ngithub.com/aws/aws-sdk-go-v2/service/ssm v1.65.0 h1:6bPuMpky+qG4L7VQ1RyYVkBrEix1JRC/JPweTRfRDko=\ngithub.com/aws/aws-sdk-go-v2/service/ssm v1.65.0/go.mod h1:mbnkxOJSgkV4YHA5dWSlLolvC1EuxNcaGfn0Gf4e9UU=\ngithub.com/aws/aws-sdk-go-v2/service/sso v1.29.4 h1:FTdEN9dtWPB0EOURNtDPmwGp6GGvMqRJCAihkSl/1No=\ngithub.com/aws/aws-sdk-go-v2/service/sso v1.29.4/go.mod h1:mYubxV9Ff42fZH4kexj43gFPhgc/LyC7KqvUKt1watc=\ngithub.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.0 h1:I7ghctfGXrscr7r1Ga/mDqSJKm7Fkpl5Mwq79Z+rZqU=\ngithub.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.0/go.mod h1:Zo9id81XP6jbayIFWNuDpA6lMBWhsVy+3ou2jLa4JnA=\ngithub.com/aws/aws-sdk-go-v2/service/sts v1.38.5 h1:+LVB0xBqEgjQoqr9bGZbRzvg212B0f17JdflleJRNR4=\ngithub.com/aws/aws-sdk-go-v2/service/sts v1.38.5/go.mod h1:xoaxeqnnUaZjPjaICgIy5B+MHCSb/ZSOn4MvkFNOUA0=\ngithub.com/aws/smithy-go v1.23.0 h1:8n6I3gXzWJB2DxBDnfxgBaSX6oe0d/t10qGz7OKqMCE=\ngithub.com/aws/smithy-go v1.23.0/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=\ngithub.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=\ngithub.com/boombuler/barcode v1.1.0 h1:ChaYjBR63fr4LFyGn8E8nt7dBSt3MiU3zMOZqFvVkHo=\ngithub.com/boombuler/barcode v1.1.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=\ngithub.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=\ngithub.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=\ngithub.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=\ngithub.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes=\ngithub.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=\ngithub.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=\ngithub.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=\ngithub.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk=\ngithub.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=\ngithub.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=\ngithub.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=\ngithub.com/go-openapi/jsonpointer v0.22.0 h1:TmMhghgNef9YXxTu1tOopo+0BGEytxA+okbry0HjZsM=\ngithub.com/go-openapi/jsonpointer v0.22.0/go.mod h1:xt3jV88UtExdIkkL7NloURjRQjbeUgcxFblMjq2iaiU=\ngithub.com/go-openapi/jsonreference v0.21.1 h1:bSKrcl8819zKiOgxkbVNRUBIr6Wwj9KYrDbMjRs0cDA=\ngithub.com/go-openapi/jsonreference v0.21.1/go.mod h1:PWs8rO4xxTUqKGu+lEvvCxD5k2X7QYkKAepJyCmSTT8=\ngithub.com/go-openapi/swag v0.25.0 h1:xyZhlgInBg6wOtyTD5b+pzwVqHSOliAvgvKW+POFUts=\ngithub.com/go-openapi/swag v0.25.0/go.mod h1:yhsa7GJvO1JBFZccLq9uh/MawsC0PQd8sNz88VBXQlU=\ngithub.com/go-openapi/swag/cmdutils v0.25.0 h1:iYZ24DEGPEk6L1jO09vw39KfpxbG7KhS+WeQexS8U5A=\ngithub.com/go-openapi/swag/cmdutils v0.25.0/go.mod h1:pdae/AFo6WxLl5L0rq87eRzVPm/XRHM3MoYgRMvG4A0=\ngithub.com/go-openapi/swag/conv v0.25.0 h1:5K+e44HkOgCVE0IJTbivurzHahT62DPr2DEJqR/+4pA=\ngithub.com/go-openapi/swag/conv v0.25.0/go.mod h1:oa1ZZnb1jubNdZlD1iAhGXt6Ic4hHtuO23MwTgAXR88=\ngithub.com/go-openapi/swag/fileutils v0.25.0 h1:t7aQRuRfsP29dY4vfrNvDZv7RurwRHuyjUedtYVDmYY=\ngithub.com/go-openapi/swag/fileutils v0.25.0/go.mod h1:+NXtt5xNZZqmpIpjqcujqojGFek9/w55b3ecmOdtg8M=\ngithub.com/go-openapi/swag/jsonname v0.25.0 h1:+fuNs9gdkb2w10hgsgOBx9jtx0pvtUaDRYxD91BEpEQ=\ngithub.com/go-openapi/swag/jsonname v0.25.0/go.mod h1:71Tekow6UOLBD3wS7XhdT98g5J5GR13NOTQ9/6Q11Zo=\ngithub.com/go-openapi/swag/jsonutils v0.25.0 h1:ELKpJT29T4N/AvmDqMeDFLx2QRZQOYFthzctbIX30+A=\ngithub.com/go-openapi/swag/jsonutils v0.25.0/go.mod h1:KYL8GyGoi6tek9ajpvn0le4BWmKoUVVv8yPxklViIMo=\ngithub.com/go-openapi/swag/jsonutils/fixtures_test v0.25.0 h1:ca9vKxLnJegL2bzqXRWNabKdqVGxBzrnO8/UZnr5W0Y=\ngithub.com/go-openapi/swag/jsonutils/fixtures_test v0.25.0/go.mod h1:kjmweouyPwRUEYMSrbAidoLMGeJ5p6zdHi9BgZiqmsg=\ngithub.com/go-openapi/swag/loading v0.25.0 h1:e9mjE5fJeaK0LTepHMtG0Ief+9ETXLFhWCx7ZfiI6LI=\ngithub.com/go-openapi/swag/loading v0.25.0/go.mod h1:2ZCWXwVY1XYuoue8Bdjbn5GJK4/ufXbCfcvoSPFQJqM=\ngithub.com/go-openapi/swag/mangling v0.25.0 h1:VdTfDWX5lS3yURxYHF5SK7kYelSK69Lv2xEAeudTzM8=\ngithub.com/go-openapi/swag/mangling v0.25.0/go.mod h1:CdiMQ6pnfAgyQGSOIYnZkXvqhnnwOn997uXZMAd/7mQ=\ngithub.com/go-openapi/swag/netutils v0.25.0 h1:/e1LPmXfF9fcOYbbaP3+SQgon1fRwe5EZ0FjpR4vAjs=\ngithub.com/go-openapi/swag/netutils v0.25.0/go.mod h1:CAkkvqnUJX8NV96tNhEQvKz8SQo2KF0f7LleiJwIeRE=\ngithub.com/go-openapi/swag/stringutils v0.25.0 h1:iYfCF45GUeI/1Yrh8rQtTFCp5K1ToqWhUdzJZwvXvv8=\ngithub.com/go-openapi/swag/stringutils v0.25.0/go.mod h1:JLdSAq5169HaiDUbTvArA2yQxmgn4D6h4A+4HqVvAYg=\ngithub.com/go-openapi/swag/typeutils v0.25.0 h1:iUTsxu3F3h9v6CBzVFGXKPSBQt6d8XXgYy1YAlu+HJ8=\ngithub.com/go-openapi/swag/typeutils v0.25.0/go.mod h1:9McMC/oCdS4BKwk2shEB7x17P6HmMmA6dQRtAkSnNb8=\ngithub.com/go-openapi/swag/yamlutils v0.25.0 h1:apgy77seWLEM9HKDcieIgW8bG9aSZgH6nQ9THlHYgHA=\ngithub.com/go-openapi/swag/yamlutils v0.25.0/go.mod h1:0JvBRtc0mR02IqHURUeGgS9cG+Dfms4FCGXCnsgnt7c=\ngithub.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo=\ngithub.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU=\ngithub.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=\ngithub.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=\ngithub.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=\ngithub.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=\ngithub.com/gonvenience/bunt v1.4.2 h1:nTgkFZsw38SIJKABhLj8aXj2rqion9Zo1so/EBkbFBY=\ngithub.com/gonvenience/bunt v1.4.2/go.mod h1:WjyEO2rSYR+OLZg67Ucl+gjdXPs8GpFl63SCA02XDyI=\ngithub.com/gonvenience/idem v0.0.2 h1:jWHknjPfSbiWgYKre9wB2FhMgVLd1RWXCXzVq+7VIWg=\ngithub.com/gonvenience/idem v0.0.2/go.mod h1:0Xv1MpnNL40+dsyOxaJFa7L8ekeTRr63WaWXpiWLFFM=\ngithub.com/gonvenience/neat v1.3.16 h1:Vb0iCkSHGWaA+ry69RY3HpQ6Ooo6o/g2wjI80db8DjI=\ngithub.com/gonvenience/neat v1.3.16/go.mod h1:sLxdQNNluxbpROxTTHs3XBSJX8fwFX5toEULUy74ODA=\ngithub.com/gonvenience/term v1.0.4 h1:qkCGfmUtpzs9W4jWgNijaGF6dg3oSIh+kZCzT5cPNZY=\ngithub.com/gonvenience/term v1.0.4/go.mod h1:OzNdQC5NVBou9AifaHd1QG6EP8iDdpaT7GFm1bVgslg=\ngithub.com/gonvenience/text v1.0.9 h1:U29BxT3NZnNPcfiEnAwt6yHXe38fQs2Q+WTqs1X+atI=\ngithub.com/gonvenience/text v1.0.9/go.mod h1:JQF1ifXNRaa66jnPLqoITA+y8WATlG0eJzFC9ElJS3s=\ngithub.com/gonvenience/ytbx v1.4.7 h1:3wJ7EOfdv3Lg+h0mzKo7f8d1zMY1EJtVzzYrA3UhjHQ=\ngithub.com/gonvenience/ytbx v1.4.7/go.mod h1:ZmAU727eOTYeC4aUJuqyb9vogNAN7NiSKfw6Aoxbqys=\ngithub.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo=\ngithub.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ=\ngithub.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=\ngithub.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=\ngithub.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=\ngithub.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=\ngithub.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=\ngithub.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=\ngithub.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=\ngithub.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=\ngithub.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=\ngithub.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo=\ngithub.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA=\ngithub.com/gruntwork-io/go-commons v0.17.2 h1:14dsCJ7M5Vv2X3BIPKeG9Kdy6vTMGhM8L4WZazxfTuY=\ngithub.com/gruntwork-io/go-commons v0.17.2/go.mod h1:zs7Q2AbUKuTarBPy19CIxJVUX/rBamfW8IwuWKniWkE=\ngithub.com/gruntwork-io/terratest v0.51.0 h1:RCXlCwWlHqhUoxgF6n3hvywvbvrsTXqoqt34BrnLekw=\ngithub.com/gruntwork-io/terratest v0.51.0/go.mod h1:evZHXb8VWDgv5O5zEEwfkwMhkx9I53QR/RB11cISrpg=\ngithub.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=\ngithub.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=\ngithub.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=\ngithub.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=\ngithub.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=\ngithub.com/homeport/dyff v1.10.2 h1:XyB+D0KVwjbUFTZYIkvPtsImwkfh+ObH2CEdEHTqdr4=\ngithub.com/homeport/dyff v1.10.2/go.mod h1:0kIjL/JOGaXigzrLY6kcl5esSStbAa99r6GzEvr7lrs=\ngithub.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=\ngithub.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=\ngithub.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=\ngithub.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=\ngithub.com/jackc/pgx/v5 v5.7.6 h1:rWQc5FwZSPX58r1OQmkuaNicxdmExaEz5A2DO2hUuTk=\ngithub.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=\ngithub.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=\ngithub.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=\ngithub.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=\ngithub.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=\ngithub.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=\ngithub.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=\ngithub.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=\ngithub.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=\ngithub.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=\ngithub.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=\ngithub.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=\ngithub.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=\ngithub.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=\ngithub.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=\ngithub.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=\ngithub.com/mattn/go-ciede2000 v0.0.0-20170301095244-782e8c62fec3 h1:BXxTozrOU8zgC5dkpn3J6NTRdoP+hjok/e+ACr4Hibk=\ngithub.com/mattn/go-ciede2000 v0.0.0-20170301095244-782e8c62fec3/go.mod h1:x1uk6vxTiVuNt6S5R2UYgdhpj3oKojXvOXauHZ7dEnI=\ngithub.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=\ngithub.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=\ngithub.com/mattn/go-zglob v0.0.6 h1:mP8RnmCgho4oaUYDIDn6GNxYk+qJGUs8fJLn+twYj2A=\ngithub.com/mattn/go-zglob v0.0.6/go.mod h1:MxxjyoXXnMxfIpxTK2GAkw1w8glPsQILx3N5wrKakiY=\ngithub.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=\ngithub.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=\ngithub.com/mitchellh/go-ps v1.0.0 h1:i6ampVEEF4wQFF+bkYfwYgY+F/uYJDktmvLPf7qIgjc=\ngithub.com/mitchellh/go-ps v1.0.0/go.mod h1:J4lOc8z8yJs6vUwklHw2XEIiT4z4C40KtWVN3nvg8Pg=\ngithub.com/mitchellh/hashstructure v1.1.0 h1:P6P1hdjqAAknpY/M1CGipelZgp+4y9ja9kmUZPXP+H0=\ngithub.com/mitchellh/hashstructure v1.1.0/go.mod h1:xUDAozZz0Wmdiufv0uyhnHkUTN6/6d8ulp4AwfLKrmA=\ngithub.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU=\ngithub.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=\ngithub.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=\ngithub.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=\ngithub.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=\ngithub.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=\ngithub.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=\ngithub.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=\ngithub.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=\ngithub.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=\ngithub.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus=\ngithub.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=\ngithub.com/onsi/gomega v1.38.0 h1:c/WX+w8SLAinvuKKQFh77WEucCnPk4j2OTUr7lt7BeY=\ngithub.com/onsi/gomega v1.38.0/go.mod h1:OcXcwId0b9QsE7Y49u+BTrL4IdKOBOKnD6VQNTJEB6o=\ngithub.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=\ngithub.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=\ngithub.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/pquerna/otp v1.5.0 h1:NMMR+WrmaqXU4EzdGJEE1aUUI0AMRzsp96fFFWNPwxs=\ngithub.com/pquerna/otp v1.5.0/go.mod h1:dkJfzwRKNiegxyNb54X/3fLwhCynbMspSyWKnvi1AEg=\ngithub.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=\ngithub.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=\ngithub.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=\ngithub.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=\ngithub.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw=\ngithub.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=\ngithub.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=\ngithub.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=\ngithub.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=\ngithub.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=\ngithub.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=\ngithub.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=\ngithub.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=\ngithub.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=\ngithub.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=\ngithub.com/texttheater/golang-levenshtein v1.0.1 h1:+cRNoVrfiwufQPhoMzB6N0Yf/Mqajr6t1lOv8GyGE2U=\ngithub.com/texttheater/golang-levenshtein v1.0.1/go.mod h1:PYAKrbF5sAiq9wd+H82hs7gNaen0CplQ9uvm6+enD/8=\ngithub.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU=\ngithub.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4=\ngithub.com/virtuald/go-ordered-json v0.0.0-20170621173500-b18e6e673d74 h1:JwtAtbp7r/7QSyGz8mKUbYJBg2+6Cd7OjM8o/GNOcVo=\ngithub.com/virtuald/go-ordered-json v0.0.0-20170621173500-b18e6e673d74/go.mod h1:RmMWU37GKR2s6pgrIEB4ixgpVCt/cf7dnJv3fuH1J1c=\ngithub.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=\ngithub.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=\ngithub.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 h1:FnBeRrxr7OU4VvAzt5X7s6266i6cSVkkFPS0TuXWbIg=\ngithub.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=\ngithub.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=\ngithub.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=\ngo.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=\ngo.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=\ngo.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=\ngo.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=\ngo.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=\ngo.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=\ngolang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=\ngolang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=\ngolang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=\ngolang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI=\ngolang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8=\ngolang.org/x/exp v0.0.0-20250911091902-df9299821621 h1:2id6c1/gto0kaHYyrixvknJ8tUK/Qs5IsmBtrc+FtgU=\ngolang.org/x/exp v0.0.0-20250911091902-df9299821621/go.mod h1:TwQYMMnGpvZyc+JpB/UAuTNIsVJifOlSkrZkhcvpVUk=\ngolang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=\ngolang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=\ngolang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I=\ngolang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY=\ngolang.org/x/oauth2 v0.31.0 h1:8Fq0yVZLh4j4YA47vHKFTa9Ew5XIrCP8LC6UeNZnLxo=\ngolang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=\ngolang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=\ngolang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=\ngolang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=\ngolang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=\ngolang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ=\ngolang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA=\ngolang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=\ngolang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=\ngolang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=\ngolang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=\ngolang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI=\ngolang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=\ngolang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=\ngolang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=\ngolang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=\ngolang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE=\ngolang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=\ngolang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngoogle.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw=\ngoogle.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=\ngopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=\ngopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=\ngopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo=\ngopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=\ngopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=\ngopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=\ngopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=\ngopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=\ngopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\ngopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=\ngopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\nk8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM=\nk8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk=\nk8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4=\nk8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=\nk8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY=\nk8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8=\nk8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=\nk8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=\nk8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE=\nk8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ=\nk8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d h1:wAhiDyZ4Tdtt7e46e9M5ZSAJ/MnPGPs+Ki1gHw4w1R0=\nk8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=\nsigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=\nsigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=\nsigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=\nsigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=\nsigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=\nsigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=\nsigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=\nsigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=\n"
  },
  {
    "path": "contrib/charts/dragonfly/golden_test.go",
    "content": "package golden\n\nimport (\n\t\"flag\"\n\t\"fmt\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"regexp\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/gruntwork-io/terratest/modules/helm\"\n)\n\nvar update = flag.Bool(\"update\", false, \"update golden test output files\")\n\nfunc TestHelmRender(t *testing.T) {\n\tfiles, err := os.ReadDir(\"./ci\")\n\tif err != nil {\n\t\tt.Fatal(err)\n\t}\n\n\tfor _, f := range files {\n\t\tif !f.IsDir() && strings.HasSuffix(f.Name(), \".yaml\") && !strings.HasSuffix(f.Name(), \".golden.yaml\") {\n\t\t\t// Render this values.yaml file\n\t\t\toutput := helm.RenderTemplate(t,\n\t\t\t\t&helm.Options{\n\t\t\t\t\tValuesFiles: []string{\"ci/\" + f.Name()},\n\t\t\t\t},\n\t\t\t\t\"../dragonfly\",\n\t\t\t\t\"test\",\n\t\t\t\tnil,\n\t\t\t)\n\n\t\t\tgoldenFile := \"ci/\" + strings.TrimSuffix(f.Name(), filepath.Ext(\".yaml\")) + \".golden.yaml\"\n\t\t\tregex := regexp.MustCompile(`\\s+helm.sh/chart:\\s+.*`)\n\t\t\tbytes := regex.ReplaceAll([]byte(output), []byte(\"\"))\n\n\t\t\toutput = fmt.Sprintf(\"%s\\n\", string(bytes))\n\n\t\t\tif *update {\n\t\t\t\terr := os.WriteFile(goldenFile, []byte(output), 0644)\n\t\t\t\tif err != nil {\n\t\t\t\t\tt.Fatal(err)\n\t\t\t\t}\n\t\t\t}\n\n\t\t\texpected, err := os.ReadFile(goldenFile)\n\t\t\tif err != nil {\n\t\t\t\tt.Fatal(err)\n\t\t\t}\n\n\t\t\tif string(expected) != output {\n\t\t\t\tt.Fatalf(\"Expected %s, but got %s\\n. Update golden files by running `go test -v ./... -update`\", string(expected), output)\n\t\t\t}\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/NOTES.txt",
    "content": "1. Get the application URL by running these commands:\n\n{{- if contains \"NodePort\" .Values.service.type }}\n  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath=\"{.spec.ports[0].nodePort}\" services {{ include \"dragonfly.fullname\" . }})\n  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath=\"{.items[0].status.addresses[0].address}\")\n  echo http://$NODE_IP:$NODE_PORT\n{{- else if contains \"LoadBalancer\" .Values.service.type }}\n     NOTE: It may take a few minutes for the LoadBalancer IP to be available.\n           You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include \"dragonfly.fullname\" . }}'\n  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include \"dragonfly.fullname\" . }} --template \"{{\"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}\"}}\")\n  echo http://$SERVICE_IP:{{ .Values.service.port }}\n{{- else if contains \"ClusterIP\" .Values.service.type }}\n  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l \"app.kubernetes.io/name={{ include \"dragonfly.name\" . }},app.kubernetes.io/instance={{ .Release.Name }}\" -o jsonpath=\"{.items[0].metadata.name}\")\n  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath=\"{.spec.containers[0].ports[0].containerPort}\")\n  echo \"You can use redis-cli to connect against localhost:6379\"\n  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 6379:$CONTAINER_PORT\n{{- end }}"
  },
  {
    "path": "contrib/charts/dragonfly/templates/_helpers.tpl",
    "content": "{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"dragonfly.name\" -}}\n{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix \"-\" }}\n{{- end }}\n\n{{/*\nCreate a default fully qualified app name.\nWe truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).\nIf release name contains chart name it will be used as a full name.\n*/}}\n{{- define \"dragonfly.fullname\" -}}\n{{- if .Values.fullnameOverride }}\n{{- .Values.fullnameOverride | trunc 63 | trimSuffix \"-\" }}\n{{- else }}\n{{- $name := default .Chart.Name .Values.nameOverride }}\n{{- if contains $name .Release.Name }}\n{{- .Release.Name | trunc 63 | trimSuffix \"-\" }}\n{{- else }}\n{{- printf \"%s-%s\" .Release.Name $name | trunc 63 | trimSuffix \"-\" }}\n{{- end }}\n{{- end }}\n{{- end }}\n\n{{/*\nCreate chart name and version as used by the chart label.\n*/}}\n{{- define \"dragonfly.chart\" -}}\n{{- printf \"%s-%s\" .Chart.Name .Chart.Version | replace \"+\" \"_\" | trunc 63 | trimSuffix \"-\" }}\n{{- end }}\n\n{{/*\nCommon labels\n*/}}\n{{- define \"dragonfly.labels\" -}}\nhelm.sh/chart: {{ include \"dragonfly.chart\" . }}\n{{ include \"dragonfly.selectorLabels\" . }}\n{{- if .Chart.AppVersion }}\napp.kubernetes.io/version: {{ .Chart.AppVersion | quote }}\n{{- end }}\napp.kubernetes.io/managed-by: {{ .Release.Service }}\n{{- include \"dragonfly.commonLabels\" . }}\n{{- end }}\n\n{{/*\nUser-defined common labels\n*/}}\n{{- define \"dragonfly.commonLabels\" -}}\n{{- if .Values.commonLabels }}\n{{- range $key, $value := .Values.commonLabels }}\n{{ $key }}: {{ $value }}\n{{- end }}\n{{- end }}\n{{- end }}\n\n{{/*\nSelector labels\n*/}}\n{{- define \"dragonfly.selectorLabels\" -}}\napp.kubernetes.io/name: {{ include \"dragonfly.name\" . }}\napp.kubernetes.io/instance: {{ .Release.Name }}\n{{- end }}\n\n{{/*\nCreate the name of the service account to use\n*/}}\n{{- define \"dragonfly.serviceAccountName\" -}}\n{{- if .Values.serviceAccount.create }}\n{{- default (include \"dragonfly.fullname\" .) .Values.serviceAccount.name }}\n{{- else }}\n{{- default \"default\" .Values.serviceAccount.name }}\n{{- end }}\n{{- end }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/_pod.tpl",
    "content": "{{- define \"dragonfly.volumemounts\" -}}\n{{- if or (.Values.storage.enabled) (.Values.extraVolumeMounts) (.Values.tls.enabled) }}\nvolumeMounts:\n  {{- if .Values.storage.enabled }}\n  - mountPath: /data\n    name: \"{{ .Release.Name }}-data\"\n  {{- end }}\n  {{- if and .Values.tls .Values.tls.enabled }}\n  - mountPath: /etc/dragonfly/tls\n    name: tls\n  {{- end }}\n  {{- with .Values.extraVolumeMounts }}\n    {{- toYaml . | trim | nindent 2 }}\n  {{- end }}\n{{- end }}\n{{- end }}\n\n{{- define \"dragonfly.pod\" -}}\n{{- if ne .Values.priorityClassName \"\" }}\npriorityClassName: {{ .Values.priorityClassName }}\n{{- end }}\n{{- with .Values.tolerations }}\ntolerations:\n  {{- toYaml . | trim | nindent 2 -}}\n{{- end }}\n{{- with .Values.nodeSelector }}\nnodeSelector:\n  {{- toYaml . | trim | nindent 2 -}}\n{{- end }}\n{{- with .Values.affinity }}\naffinity:\n  {{- toYaml . | trim | nindent 2 -}}\n{{- end }}\nserviceAccountName: {{ include \"dragonfly.serviceAccountName\" . }}\n{{- with .Values.imagePullSecrets }}\nimagePullSecrets:\n  {{- toYaml . | trim | nindent 2 }}\n{{- end }}\n{{- with .Values.podSecurityContext }}\nsecurityContext:\n  {{- toYaml . | trim | nindent 2 }}\n{{- end }}\n{{- if and (eq (typeOf .Values.hostNetwork) \"bool\") .Values.hostNetwork }}\nhostNetwork: true\n{{- end }}\n{{- with .Values.topologySpreadConstraints }}\ntopologySpreadConstraints:\n  {{- toYaml . | trim | nindent 2 }}\n{{- end }}\n{{- with .Values.initContainers }}\ninitContainers:\n  {{- if eq (typeOf .) \"string\" }}\n  {{- tpl . $ | trim | nindent 2 }}\n  {{- else }}\n  {{- toYaml . | trim | nindent 2 }}\n  {{- end }}\n{{- end }}\ncontainers:\n  {{- with .Values.extraContainers }}\n  {{- if eq (typeOf .) \"string\" -}}\n  {{- tpl . $ | trim | nindent 2 }}\n  {{- else }}\n  {{- toYaml . | trim | nindent 2 }}\n  {{- end }}\n  {{- end }}\n  - name: {{ .Chart.Name }}\n    {{- with .Values.securityContext }}\n    securityContext:\n      {{- toYaml . | trim | nindent 6 }}\n    {{- end }}\n    image: \"{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}\"\n    imagePullPolicy: {{ .Values.image.pullPolicy }}\n    ports:\n      - name: dragonfly\n        containerPort: 6379\n        protocol: TCP\n    {{- with .Values.probes }}\n    {{- toYaml . | trim | nindent 4 }}\n    {{- end }}\n    {{- with .Values.command }}\n    command:\n      {{- toYaml . | trim | nindent 6 }}\n    {{- end }}\n    args:\n      - \"--alsologtostderr\"\n    {{- with .Values.extraArgs }}\n      {{- toYaml . | trim | nindent 6 }}\n    {{- end }}\n    {{- if .Values.tls.enabled }}\n      - \"--tls\"\n      - \"--tls_cert_file=/etc/dragonfly/tls/tls.crt\"\n      - \"--tls_key_file=/etc/dragonfly/tls/tls.key\"\n    {{- end }}\n    {{- with .Values.resources }}\n    resources:\n      {{- toYaml . | trim | nindent 6 }}\n    {{- end }}\n    {{- include \"dragonfly.volumemounts\" . | trim | nindent 4 }}\n    {{- if or .Values.passwordFromSecret.enable .Values.env }}\n    env:\n    {{- if .Values.passwordFromSecret.enable }}\n    {{- $appVersion := .Chart.AppVersion | trimPrefix \"v\" }}\n    {{- $imageTag := .Values.image.tag | trimPrefix \"v\" }}\n    {{- $effectiveVersion := $appVersion }}\n    {{- if and $imageTag (ne $imageTag \"\") }}\n      {{- $effectiveVersion = $imageTag }}\n    {{- end }}\n    {{- if semverCompare \">=1.14.0\" $effectiveVersion }}\n      - name: DFLY_requirepass\n    {{- else }}\n      - name: DFLY_PASSWORD\n    {{- end }}\n        valueFrom:\n          secretKeyRef:\n            name: {{ tpl .Values.passwordFromSecret.existingSecret.name $ }}\n            key: {{ .Values.passwordFromSecret.existingSecret.key }}\n    {{- end }}\n    {{- with .Values.env }}\n      {{- toYaml . | trim | nindent 6 }}\n    {{- end }}\n    {{- end }}\n    {{- with .Values.envFrom }}\n    envFrom:\n      {{- toYaml . | trim | nindent 6 }}\n    {{- end }}\n\n{{- if or (.Values.tls.enabled) (.Values.extraVolumes) }}\nvolumes:\n{{- if and .Values.tls .Values.tls.enabled }}\n  {{- if .Values.tls.existing_secret }}\n  - name: tls\n    secret:\n      secretName: {{ .Values.tls.existing_secret }}\n  {{- else if .Values.tls.createCerts }}\n  - name: tls\n    secret:\n      secretName: '{{ include \"dragonfly.fullname\" . }}-server-tls'\n  {{- else }}\n  - name: tls\n    secret:\n      secretName: {{ include \"dragonfly.fullname\" . }}-tls\n  {{- end }}\n{{- end }}\n{{- with .Values.extraVolumes }}\n  {{- toYaml . | trim | nindent 2 }}\n{{- end }}\n{{- end }}\n{{- end }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/certificate.yaml",
    "content": "{{- if and .Values.tls.enabled .Values.tls.createCerts }}\napiVersion: cert-manager.io/v1\nkind: Certificate\nmetadata:\n  name: {{ include \"dragonfly.fullname\" . }}\n  namespace: {{ .Release.Namespace }}\n  labels:\n    {{- include \"dragonfly.labels\" . | nindent 4 }}\nspec:\n  commonName: '{{ include \"dragonfly.fullname\" . }}'\n  dnsNames:\n  - '*.{{ include \"dragonfly.fullname\" . }}.{{ .Release.Namespace }}.svc.cluster.local'\n  - '{{ include \"dragonfly.fullname\" . }}.{{ .Release.Namespace }}.svc.cluster.local'\n  - '{{ include \"dragonfly.fullname\" . }}.{{ .Release.Namespace }}.svc'\n  - '{{ include \"dragonfly.fullname\" . }}.{{ .Release.Namespace }}'\n  - '{{ include \"dragonfly.fullname\" . }}'\n  - localhost\n  duration: {{ required \"tls.duration is required, if createCerts is enabled\" .Values.tls.duration }}\n  ipAddresses:\n  - 127.0.0.1\n  issuerRef:\n    kind: {{ required \"tls.issuer.kind is required, if createCerts is enabled\" .Values.tls.issuer.kind }}\n    name: {{ required \"tls.issuer.name is required, if createCerts is enabled\" .Values.tls.issuer.name }}\n    group: {{ .Values.tls.issuer.group }}\n  secretName: '{{ include \"dragonfly.fullname\" . }}-server-tls'\n  usages:\n  - client auth\n  - server auth\n  - signing\n  - key encipherment\n{{- end }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/deployment.yaml",
    "content": "{{- if not .Values.storage.enabled }}\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: {{ include \"dragonfly.fullname\" . }}\n  namespace: {{ .Release.Namespace }}\n  labels:\n    {{- include \"dragonfly.labels\" . | nindent 4 }}\nspec:\n  replicas: {{ .Values.replicaCount }}\n  selector:\n    matchLabels:\n      {{- include \"dragonfly.selectorLabels\" . | nindent 6 }}\n  template:\n    metadata:\n      annotations:\n        {{- if and (.Values.tls.enabled) (not .Values.tls.existing_secret) }}\n        checksum/tls-secret: {{ include (print $.Template.BasePath \"/tls-secret.yaml\") . | sha256sum }}\n        {{- end }}\n        {{- with .Values.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n      labels:\n        {{- include \"dragonfly.selectorLabels\" . | nindent 8 }}\n        {{- if .Values.commonLabels }}\n        {{- include \"dragonfly.commonLabels\" . | trim | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- include \"dragonfly.pod\" . | trim | nindent 6 }}\n{{- end }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/extra-manifests.yaml",
    "content": "{{ range .Values.extraObjects }}\n---\n{{ tpl (toYaml .) $ }}\n{{ end }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/metrics-service.yaml",
    "content": "{{- if .Values.serviceMonitor.enabled }}\napiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"dragonfly.fullname\" . }}-metrics\n  namespace: {{ .Release.Namespace }}\n  labels:\n    {{- include \"dragonfly.labels\" . | nindent 4 }}\n    type: metrics\nspec:\n  type: {{ .Values.service.metrics.serviceType }}\n  ports:\n    - name: {{ .Values.service.metrics.portName }}\n      port: {{ .Values.service.port }}\n      targetPort: {{ .Values.service.port }}\n      protocol: TCP\n  selector:\n    {{- include \"dragonfly.selectorLabels\" . | nindent 4 }}\n{{- end }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/prometheusrule.yaml",
    "content": "{{- if and ( .Capabilities.APIVersions.Has \"monitoring.coreos.com/v1\" ) .Values.serviceMonitor.enabled .Values.prometheusRule.enabled }}\napiVersion: monitoring.coreos.com/v1\nkind: PrometheusRule\nmetadata:\n  name: {{ template \"dragonfly.fullname\" . }}-metrics\n  namespace: {{ .Values.prometheusRule.namespace | default .Release.Namespace }}\n  labels:\n    {{- include \"dragonfly.labels\" . | nindent 4 }}\nspec:\n  groups:\n  - name: {{ template \"dragonfly.name\" . }}\n    rules:\n      {{- toYaml .Values.prometheusRule.spec | nindent 6 }}\n{{- end }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/service.yaml",
    "content": "apiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"dragonfly.fullname\" . }}\n  namespace: {{ .Release.Namespace }}\n  {{- with .Values.service.annotations }}\n  annotations:\n    {{- toYaml . | nindent 4 }}\n  {{- end }}\n  labels:\n    {{- with .Values.service.labels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\n    {{- include \"dragonfly.labels\" . | nindent 4 }}\nspec:\n  type: {{ .Values.service.type }}\n  {{- if and (eq .Values.service.type \"LoadBalancer\") (ne .Values.service.loadBalancerIP \"\") }}\n  loadBalancerIP: {{ .Values.service.loadBalancerIP }}\n  {{- end }}\n  {{- if and (eq .Values.service.type \"ClusterIP\") (ne .Values.service.clusterIP \"\") }}\n  clusterIP: {{ .Values.service.clusterIP }}\n  {{- end }}\n  ports:\n    - port: {{ .Values.service.port }}\n      targetPort: dragonfly\n      protocol: TCP\n      name: dragonfly\n  selector:\n    {{- include \"dragonfly.selectorLabels\" . | nindent 4 }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/serviceaccount.yaml",
    "content": "{{- if .Values.serviceAccount.create -}}\napiVersion: v1\nkind: ServiceAccount\nmetadata:\n  name: {{ include \"dragonfly.serviceAccountName\" . }}\n  namespace: {{ .Release.Namespace }}\n  {{- with .Values.serviceAccount.annotations }}\n  annotations:\n    {{- toYaml . | nindent 4 }}\n  {{- end }}\n  labels:\n    {{- include \"dragonfly.labels\" . | nindent 4 }}\n{{- end }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/servicemonitor.yaml",
    "content": "{{- if .Values.serviceMonitor.enabled }}\napiVersion: monitoring.coreos.com/v1\nkind: ServiceMonitor\nmetadata:\n  name: {{ template \"dragonfly.fullname\" . }}-metrics\n  {{- if .Values.serviceMonitor.namespace }}\n  namespace: {{ .Values.serviceMonitor.namespace }}\n  {{- else }}\n  namespace: {{ .Release.Namespace }}\n  {{- end }}\n  {{- with .Values.serviceMonitor.annotations }}\n  annotations:\n    {{- toYaml . | nindent 4 }}\n  {{- end }}\n  labels:\n    {{- with .Values.serviceMonitor.labels }}\n    {{- toYaml . | nindent 4 }}\n    {{- end }}\n    {{- include \"dragonfly.labels\" . | nindent 4 }}\nspec:\n  endpoints:\n    - interval: {{ .Values.serviceMonitor.interval }}\n      {{- with .Values.serviceMonitor.scrapeTimeout }}\n      scrapeTimeout: {{ . }}\n      {{- end }}\n      honorLabels: true\n      port: {{ default \"metrics\" .Values.service.metrics.portName }}\n      path: /metrics\n      {{- if .Values.tls.enabled }}\n      scheme: https\n      tlsConfig:\n        insecureSkipVerify: true\n      {{- else }}\n      scheme: http\n      {{- end }}\n  jobLabel: \"{{ .Release.Name }}\"\n  selector:\n    matchLabels:\n      {{- include \"dragonfly.selectorLabels\" . | nindent 6 }}\n      type: metrics\n  namespaceSelector:\n    matchNames:\n      - {{ .Release.Namespace }}\n{{- end }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/statefulset.yaml",
    "content": "{{- if .Values.storage.enabled }}\napiVersion: apps/v1\nkind: StatefulSet\nmetadata:\n  name: {{ include \"dragonfly.fullname\" . }}\n  namespace: {{ .Release.Namespace }}\n  labels:\n    {{- include \"dragonfly.labels\" . | nindent 4 }}\nspec:\n  serviceName: {{ .Release.Name }}\n  replicas: {{ .Values.replicaCount }}\n  selector:\n    matchLabels:\n      {{- include \"dragonfly.selectorLabels\" . | nindent 6 }}\n  template:\n    metadata:\n      annotations:\n        {{- if and (.Values.tls.enabled) (not .Values.tls.existing_secret) }}\n        checksum/tls-secret: {{ include (print $.Template.BasePath \"/tls-secret.yaml\") . | sha256sum }}\n        {{- end }}\n        {{- with .Values.podAnnotations }}\n        {{- toYaml . | nindent 8 }}\n        {{- end }}\n      labels:\n        {{- include \"dragonfly.selectorLabels\" . | nindent 8 }}\n        {{- if .Values.commonLabels }}\n        {{- include \"dragonfly.commonLabels\" . | trim | nindent 8 }}\n        {{- end }}\n    spec:\n      {{- include \"dragonfly.pod\" . | trim | nindent 6 }}\n  volumeClaimTemplates:\n    - metadata:\n        name: \"{{ .Release.Name }}-data\"\n      spec:\n        accessModes: [ \"ReadWriteOnce\" ]\n        storageClassName: {{ .Values.storage.storageClassName }}\n        resources:\n          requests:\n            storage: {{ .Values.storage.requests }}\n{{- end }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/templates/tls-secret.yaml",
    "content": "{{- if and (.Values.tls.enabled) (.Values.tls.cert) (.Values.tls.key) (not .Values.tls.existing_secret) }}\napiVersion: v1\nkind: Secret\nmetadata:\n  name: {{ include \"dragonfly.fullname\" . }}-tls\n  namespace: {{ .Release.Namespace }}\n  labels:\n    {{- include \"dragonfly.labels\" . | nindent 4 }}\ntype: kubernetes.io/tls\ndata:\n  tls.crt: {{ default \"\" .Values.tls.cert | b64enc | quote }}\n  tls.key: {{ default \"\" .Values.tls.key | b64enc | quote }}\n{{- end }}\n"
  },
  {
    "path": "contrib/charts/dragonfly/values.yaml",
    "content": "# Default values for dragonfly.\n# This is a YAML-formatted file.\n# Declare variables to be passed into your templates.\n\n# -- Number of replicas to deploy\nreplicaCount: 1\n\nimage:\n  # -- Container Image Registry to pull the image from\n  repository: docker.dragonflydb.io/dragonflydb/dragonfly\n  # -- Dragonfly image pull policy\n  pullPolicy: IfNotPresent\n  # -- Overrides the image tag whose default is the chart appVersion.\n  tag: \"\"\n\n# -- Container Registry Secret names in an array\nimagePullSecrets: []\n\n# -- String to partially override dragonfly.fullname\nnameOverride: \"\"\n\n# -- String to fully override dragonfly.fullname\nfullnameOverride: \"\"\n\n# -- Common labels to add to all resources\ncommonLabels: {}\n\nserviceAccount:\n  # -- Specifies whether a service account should be created\n  create: true\n  # -- Annotations to add to the service account\n  annotations: {}\n  # -- The name of the service account to use.\n  # If not set and create is true, a name is generated using the fullname template\n  name: \"\"\n\n# -- Annotations for pods\npodAnnotations: {}\n\n# -- Set securityContext for pod itself\npodSecurityContext: {}\n  # fsGroup: 2000\n\n# -- Set securityContext for containers\nsecurityContext: {}\n  # capabilities:\n  #   drop:\n  #   - ALL\n  # readOnlyRootFilesystem: true\n  # runAsNonRoot: true\n  # runAsUser: 1000\n\n# -- Set hostNetwork for pod\nhostNetwork: false\n\nservice:\n  # -- Service type to provision. Can be NodePort, ClusterIP or LoadBalancer\n  type: ClusterIP\n  # -- Load balancer static ip to use when service type is set to LoadBalancer\n  loadBalancerIP: \"\"\n  # -- Cluster IP address to assign to the service. Leave empty to auto-allocate\n  clusterIP: \"\"\n  # -- Dragonfly service port\n  port: 6379\n  # -- Extra annotations for the service\n  annotations: {}\n  # -- Extra labels for the service\n  labels: {}\n  metrics:\n    # -- name for the metrics port\n    portName: metrics\n    # -- serviceType for the metrics service\n    serviceType: ClusterIP\n\nserviceMonitor:\n  # -- If true, a ServiceMonitor CRD is created for a prometheus operator\n  enabled: false\n  # -- namespace in which to deploy the ServiceMonitor CR. defaults to the application namespace\n  namespace: \"\"\n  # -- additional labels to apply to the metrics\n  labels: {}\n  # -- additional annotations to apply to the metrics\n  annotations: {}\n  # -- scrape interval\n  interval: 10s\n  # -- scrape timeout\n  scrapeTimeout: 10s\n\nprometheusRule:\n  # -- Deploy a PrometheusRule\n  enabled: false\n  # -- PrometheusRule.Spec\n  # https://awesome-prometheus-alerts.grep.to/rules\n  spec: []\n\nstorage:\n  # -- If /data should persist. This will provision a StatefulSet instead.\n  enabled: false\n  # -- Global StorageClass for Persistent Volume(s)\n  storageClassName: \"\"\n  # -- Volume size to request for the PVC\n  requests: 128Mi\n\ntls:\n  # -- enable TLS\n  enabled: false\n  # -- use cert-manager to automatically create the certificate\n  createCerts: false\n  # -- duration or ttl of the validity of the created certificate\n  duration: 87600h0m0s\n  issuer:\n    # -- cert-manager issuer kind. Usually Issuer or ClusterIssuer\n    kind: ClusterIssuer\n    # -- name of the referenced issuer\n    name: selfsigned\n    # -- group of the referenced issuer\n    # if you are using an external issuer, change this to that issuer group.\n    group: cert-manager.io\n  # -- use TLS certificates from existing secret\n  existing_secret: \"\"\n  # -- TLS certificate\n  cert: \"\"\n  # cert: |\n  #   -----BEGIN CERTIFICATE-----\n  #   MIIDazCCAlOgAwIBAgIUfV3ygaaVW3+yzK5Dq6Aw6TsZ494wDQYJKoZIhvcNAQEL\n  #   ...\n  #   BQAwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM\n  #   zJAL4hNw4Tr6E52fqdmX\n  #   -----END CERTIFICATE-----\n  # -- TLS private key\n  key: \"\"\n  # key: |\n  #   -----BEGIN RSA PRIVATE KEY-----\n  #   MIIEpAIBAAKCAQEAxeD5iQGQpCUlksFvjzzAxPTw6DMJd3MpifV+HoBY4LiTyDer\n  #   ...\n  #   HLunol88AeTOcKfD6hBYGvcRfu5NV29jJxZCOBfbFQXjnNlnrhRCag==\n  #   -----END RSA PRIVATE KEY-----\n\n# If enabled will set DFLY_PASSWORD environment variable with the specified existing secret value\n# Note that if enabled and the secret does not exist pods will not start\npasswordFromSecret:\n  enable: false\n  existingSecret:\n    name: \"\"\n    key: \"\"\n\n\nprobes:\n  livenessProbe:\n    exec:\n      command:\n        - /bin/sh\n        - /usr/local/bin/healthcheck.sh\n    initialDelaySeconds: 10\n    periodSeconds: 10\n    timeoutSeconds: 5\n    failureThreshold: 3\n    successThreshold: 1\n  readinessProbe:\n    exec:\n      command:\n        - /bin/sh\n        - /usr/local/bin/healthcheck.sh\n    initialDelaySeconds: 10\n    periodSeconds: 10\n    timeoutSeconds: 5\n    failureThreshold: 3\n    successThreshold: 1\n\n# -- Allow overriding the container's command\ncommand: []\n\n# -- Extra arguments to pass to the dragonfly binary\nextraArgs: []\n\n# -- Extra volumes to mount into the pods\nextraVolumes: []\n\n# -- Extra volume mounts corresponding to the volumes mounted above\nextraVolumeMounts: []\n\n# -- A list of initContainers to run before each pod starts\ninitContainers: []\n\n# -- Additional sidecar containers\nextraContainers: []\n\n# -- extra K8s manifests to deploy\nextraObjects: []\n  # - apiVersion: cert-manager.io/v1\n  #   kind: ClusterIssuer\n  #   metadata:\n  #     name: selfsigned\n  #   spec:\n  #     selfSigned: {}\n\nresources:\n  # -- The requested resources for the containers\n  requests: {}\n  #   cpu: 100m\n  #   memory: 128Mi\n  # -- The resource limits for the containers\n  limits: {}\n  #   cpu: 100m\n  #   memory: 128Mi\n\n# -- extra environment variables\nenv: []\n\n# -- extra environment variables from K8s objects\nenvFrom: []\n\n# -- Priority class name for pod assignment\npriorityClassName: \"\"\n\n# -- Node labels for pod assignment\nnodeSelector: {}\n\n# -- Tolerations for pod assignment\ntolerations: []\n\n# -- Affinity for pod assignment\naffinity: {}\n\n# -- Topology Spread Constraints for pod assignment\ntopologySpreadConstraints: []\n"
  },
  {
    "path": "contrib/docker/README.md",
    "content": "<p align=\"center\">\n  <a href=\"https://dragonflydb.io\">\n    <img src=\"https://raw.githubusercontent.com/dragonflydb/dragonfly/main/.github/images/logo-full.svg\"\n      width=\"284\" border=\"0\" alt=\"Dragonfly\">\n  </a>\n</p>\n\n\n\n# Dragonfly DB with Docker Compose\n\nThis guide will have you up running DragonflyDB with `docker-compose` in just a few minutes.\n\n| This guide assumes you have `docker` and `docker-compose` installed on your machine. If not, [Install Docker](https://docs.docker.com/get-docker/) and [Install Docker Compose](https://docs.docker.com/compose/install/) before continuing.\n\n## Step 1\n\n```bash\n# Download Official Dragonfly DB Docker Compose File\nwget https://raw.githubusercontent.com/dragonflydb/dragonfly/main/contrib/docker/docker-compose.yml\n\n# Launch the Dragonfly DB Instance\ndocker-compose up -d\n\n# Confirm image is up\ndocker ps | grep dragonfly\n# ac94b5ba30a0   docker.dragonflydb.io/dragonflydb/dragonfly   \"entrypoint.sh drago…\"   45 seconds ago   Up 31 seconds         0.0.0.0:6379->6379/tcp, :::6379->6379/tcp   docker_dragonfly_1\n\n# Log follow the dragonfly container\ndocker logs -f docker_dragonfly_1\n```\n\nDragonfly DB will answer to both `http` and `redis` requests out of the box!\n\nYou can use `redis-cli` to connect to `localhost:6379` or open a browser and visit `http://localhost:6379`\n\n## Step 2\n\nConnect with a redis client.\n\nFrom a new terminal:\n\n```bash\nredis-cli\n127.0.0.1:6379> set hello world\nOK\n127.0.0.1:6379> keys *\n1) \"hello\"\n127.0.0.1:6379> get hello\n\"world\"\n127.0.0.1:6379> \n```\n\n## Step 3\n\nContinue being great and build your app with the power of DragonflyDB!  \n\n## Tuning Dragonfly DB\nIf you are attempting to tune Dragonfly DB for performance, consider `NAT` performance costs associated with containerization.  \n> ## Performance Tuning\n> ---\n> In `docker-compose`, there is a meaningful difference between an `overlay` network(which relies on docker `NAT` traversal on every request) and using the `host` network(see [`docker-compose.yml`](https://github.com/dragonflydb/dragonfly/blob/main/contrib/docker/docker-compose.yml)).  \n> &nbsp;  \n> Fore more information, see the [official docker-compose network_mode Docs](https://docs.docker.com/compose/compose-file/compose-file-v3/#network_mode)  \n> &nbsp;  \n\n### More Build Options\n- [Docker Quick Start](/docs/quick-start/)\n- [Kubernetes Deployment with Helm Chart](/contrib/charts/dragonfly/)\n- [Build From Source](/docs/build-from-source.md)"
  },
  {
    "path": "contrib/docker/docker-compose.yml",
    "content": "services:\n  dragonfly:\n    image: 'docker.dragonflydb.io/dragonflydb/dragonfly'\n    ulimits:\n      memlock: -1\n    ports:\n      - \"6379:6379\"\n    # For better performance, consider `host` mode instead `port` to avoid docker NAT.\n    # `host` mode is NOT currently supported in Swarm Mode.\n    # https://docs.docker.com/compose/compose-file/compose-file-v3/#network_mode\n    # network_mode: \"host\"\n    volumes:\n      - dragonflydata:/data\nvolumes:\n  dragonflydata:\n"
  },
  {
    "path": "contrib/scripts/conventional-commits",
    "content": "#!/usr/bin/env bash\n\n# list of Conventional Commits types\ncc_types=(\"feat\" \"fix\")\ndefault_types=(\"build\" \"chore\" \"ci\" \"docs\" \"${cc_types[@]}\" \"perf\" \"refactor\" \"revert\" \"style\" \"test\")\ntypes=( \"${cc_types[@]}\" )\n\nif [ $# -eq 1 ]; then\n    types=( \"${default_types[@]}\" )\nelse\n    while [ $# -gt 1 ]; do\n        types+=( \"$1\" )\n        shift\n    done\nfi\n\nmsg_file=\"$1\"\n\nr_types=\"($(IFS='|'; echo \"${types[*]}\"))\"\nr_scope=\"(\\([[:alnum:] \\/-]+\\))?\"\nr_delim='!?:'\nr_subject=\" [[:print:]].+\"\npattern=\"^$r_types$r_scope$r_delim$r_subject$\"\n\nif grep -Eq \"$pattern\" \"$msg_file\"; then\n    exit 0\nfi\n\necho \"[Commit message] $( cat \"$msg_file\" )\"\necho \"\nThank you for your interest in Dragonfly DB. \n\nTo keep things clean, we ask all commits to meet the following criteria:\n  - Be Signed (git commit -s -m ...)\n  - Valid Conventional Commit https://www.conventionalcommits.org/\n  \n  Special Commit Words are correlated to versioning. Specifically \\\"fix\\\" and \\\"feat\\\"\n  - fix: a commit of the type fix patches a bug in your codebase (this correlates with PATCH in Semantic Versioning).\n  - feat: a commit of the type feat introduces a new feature to the codebase (this correlates with MINOR in Semantic Versioning).\n  - Breaking changes have a ! before the \\\":\\\"\n\n  Finally, If there is an Issue for this Commit, Please add it to the end of the commit message.\n  - Reference Issue Number at End of Commit Message (Optional)\n\nThank you for helping us label a \\`fix\\` and \\`feat\\` properly so that our commits, issues and semantic versioning are all aligned!\n\nA Signed Conventional Commit with Issue Number look like: \n\n    git commit -s -m \\\"type(scope): description #112\\\"\n\nValid types:\n\n    $(IFS=' '; echo \"${types[*]}\")\n\nExample Document Change:\n\n    docs(readme): Fix Example Links #121\n\nExample Breaking New Feature\n    feat(ingest)!: Add new ingest # 122\n\nThis is an example of a fix with an Issue #\n\n    fix(ingest): Refactor for loop to list comprehension #123\n\nThank you for your contribution!\n\nSincerely,\nThe Dragonfly DB Contributors\n\"\nexit 1\n"
  },
  {
    "path": "contrib/scripts/signed-commit",
    "content": "#!/usr/bin/env bash\n\nif [[ -z \"$1\" ]] || [[ ! -f \"$1\" ]]; then\n  echo \"ERROR: Commit message file not provided or does not exist.\"\n  exit 1\nfi\n\n# Check if signed-off-by line is present (automatically added using -s flag)\nif ! grep -q 'Signed-off-by:' \"$1\"; then\n  echo \"ERROR: Commit message must contain a Signed-off-by line.\"\n  echo \"\"\n  echo \"To sign your commits, use the -s flag:\"\n  echo \"  git commit -s -m \\\"your commit message\\\"\"\n  exit 1\nfi\n\nexit 0\n"
  },
  {
    "path": "docs/README.md",
    "content": "<p align=\"center\">\n  <a href=\"https://dragonflydb.io\">\n    <img  src=\"https://raw.githubusercontent.com/dragonflydb/dragonfly/main/.github/images/logo-full.svg\"\n      width=\"284\" border=\"0\" alt=\"Dragonfly\">\n  </a>\n</p>\n\n\n# Quick Start\n\nThe easiest way to get started with Dragonfly is with Docker.\n\n## Deployment Method\n\nFirst, choose a deployment method.\n\nIf you are new to Dragonfly, we recommend the [DragonflyDB Docker Quick Start Guide](/docs/quick-start/)\n\nOther options:\n\n### - [Docker Compose](/contrib/docker/)\n\n### - [Helm Chart for Kubernetes](/contrib/charts/dragonfly/)\n\n\n# Learn About DragonflyDB\n## [FAQ](/docs/faq.md)\n\n## [Differences Between DragonflyDB and Redis](/docs/differences.md)\n\n## [API Commands Reference](https://dragonflydb.io/docs/category/command-reference)\n"
  },
  {
    "path": "docs/async-tiering.md",
    "content": "# Async Tiering Design Document\n\n## Background\n\nOur current tiered storage component performs disk operations inline as part of executing shard-local operations. This approach introduces latency when processing commands, impacting both the system's throughput and overall command latency. The following document discusses a potential redesign that addresses this issue and enables the execution of operations without I/O blocking.\n\n```mermaid\ngraph LR\n    %% Left Side: No Tiering\n    subgraph S1 [Shard queue no tiering]\n        direction TB\n        A1[get] --- B1[set]\n        B1 --- C1[get]\n        C1 --- D1[\"&nbsp;\"]\n    end\n\n    %% Spacing and Arrows\n    S1 --- Space1[ ]\n    Space1 -.-> Space2[ ]\n    Space2 --- S2\n\n    %% Right Side: With Tiering\n    subgraph S2 [Shard queue with tiering]\n        direction TB\n        A2[\"get<br/>I/O read\"] --- B2[\"set<br/>I/O write\"]\n        B2 --- C2[\"get<br/>I/O read\"]\n        C2 --- D2[\"&nbsp;\"]\n    end\n\n    %% Styling\n    style S1 fill:#fff,stroke:#ffcc00,stroke-width:2px\n    style S2 fill:#fff,stroke:#ffcc00,stroke-width:2px\n    style A1 fill:#fff,stroke:#ffcc00\n    style B1 fill:#fff,stroke:#ffcc00\n    style C1 fill:#fff,stroke:#ffcc00\n    style D1 fill:none,stroke:none\n    style A2 fill:#fff,stroke:#ffcc00\n    style B2 fill:#fff,stroke:#ffcc00\n    style C2 fill:#fff,stroke:#ffcc00\n    style D2 fill:none,stroke:none\n\n    %% Hide the spacer nodes\n    style Space1 fill:none,stroke:none\n    style Space2 fill:none,stroke:none\n```\n\n## High level design\n\nThe core goal is to perform tiered I/O operations concurrently while maintaining transparency for the transaction framework designed for instant RAM operations.\n\nTransactions issue asynchronous requests to the tiered storage, returning futures that the coordinating fiber awaits. Operations on the same key execute strictly in order, relying on the transactional framework for correctness, while operations on different keys can be interleaved for efficiency.\n\n### The following diagram depicts a simplified flow for a GET operation:\n\n```mermaid\nsequenceDiagram\n    participant Coordinator\n    participant Shard\n    participant Disk\n\n    Coordinator->>Shard: Get\n    Shard->>Disk: IO_Read\n    Shard-->>Coordinator: ResultFuture\n    Disk-->>Shard: ReadCallback\n    Shard-->>Coordinator: ResultFulfilled\n```\n\nThe coordinator fiber schedules a command on a shard thread. The command performs initial work, issues an asynchronous read, and returns a `ResultFuture` to the coordinator. The coordinator waits for fulfillment before replying. This parallelism hides most I/O latency (assuming non-saturated SSDs).\n\nFor complex operations like `APPEND`, that require reading the value and modifying it, a post-read handler runs on the shard thread. Since in-place disk modification isn't supported, `APPEND` becomes an IO-READ followed by a handler that modifies the value in memory. The result is returned to the coordinator and the modified value is uploaded to memory and is deleted on disk.\n\nIt is important to note that only a single read is issued for all pending asynchronous commands for a given key. Once the read finished, all callbacks are executed consecutively and atomically. This guarantees correctness of operation order and outside observers. This execution loops is aided by specialized Decoder classes that keep an intermediary value in-between modifications or avoid creating it at all for read-only sequences.\n\nUnlike the previous design where `DbSlice::Find(...)` handled tiering transparently, command implementations handling offloaded values must now use callbacks or futures (e.g., via `TieredStorage::Read` or `Modify`).\n\n### Tiered Storage Component\n\nThe `TieredStorage` component manages the lifecycle of offloaded items. Externalized blobs are immutable on disk; operations involve stashing new blobs, reading existing ones, or marking them for deletion.\n\n#### Upstream API (TieredStorage)\n\nThe primary interface used by commands includes:\n\n1.  `Read(DbIndex, Key, Value) -> Future<string>`: Asynchronously fetch an offloaded value.\n2.  `Modify(DbIndex, Key, Value, ModFunc) -> Future<Result>`: Fetch, modify in memory (via callback), and update.\n3.  `TryStash(DbIndex, Key, Value) -> Future<bool>`: Schedule a value for offloading.\n4.  `Delete(DbIndex, Value)`: Remove offloaded value.\n5.  `CancelStash(DbIndex, Key, Value)`: Start cancelling a pending stash operation.\n\n#### Downstream API (DiskStorage)\n\n`DiskStorage` handles file management and async I/O:\n\n1.  `Read(DiskSegment, ReadCb)`: Read a segment from the backing file.\n2.  `PrepareStash(Length) -> Result<pair<Offset, UringBuf>>`: Allocate a segment and prepare a buffer.\n3.  `Stash(DiskSegment, UringBuf, StashCb)`: Write the buffer to the allocated segment.\n4.  `MarkAsFree(DiskSegment)`: Mark a segment for reuse.\n\n`DiskStorage` manages the underlying file growth and page allocation via an `ExternalAllocator`.\n\n\n```mermaid\ngraph TB\n    subgraph Commands[\"called by commands or db_slice\"]\n        READ[READ]\n        REMOVE[REMOVE]\n        STASH[STASH]\n    end\n\n    subgraph TieredStorage[\"TieredStorage\"]\n        %% Invisible node to act as a landing point for the box\n        TS_TOP[ ]:::invisible\n\n        PR[pending reads<br/>+ remove?<br/>offset -> futures]\n        PS[pending stashes<br/>key -> version]\n\n        TS_BOTTOM[ ]:::invisible\n    end\n\n    subgraph DiskStorage[\"DiskStorage\"]\n        DS_TOP[ ]:::invisible\n        EA[external<br/>allocator]\n        IM[io manager]\n    end\n\n    %% Interactions between Commands and TieredStorage\n    READ -.-> |\"Future&lt;string&gt;\"| TS_TOP\n    TS_TOP -.-> READ\n    REMOVE -.-> TS_TOP\n    STASH -.-> TS_TOP\n\n    %% Interactions between TieredStorage and DiskStorage\n    TS_BOTTOM -.-> |\"callback based i/o operations\"| DS_TOP\n    DS_TOP -.-> TS_BOTTOM\n\n    %% Notes\n    Note1[pending reads for a specific<br/>offset are tracked to avoid<br/>duplicate reads and removal<br/>of segments still in use]\n    Note2[pending stashes use incremental<br/>versions to discard results of<br/>outdated operations]\n\n    Note1 -.-> TieredStorage\n    Note2 -.-> TieredStorage\n\n    %% Styling to make landing nodes invisible\n    classDef invisible fill:none,stroke:none,color:none,width:0px,height:0px;\n```\n\nConsider, for example, two high level `Read` operations for two different keys K1 and K2 residing on the same page.\nFor K1, we issue a page read from `DiskStorage` tracked by its offset. For K2, if we check and find an active operation fetching that offset, we link the K2 callback to the K1 completion, avoiding duplicate I/O.\n\nConsider issuing a `Read` request for a key (e.g., during `GET`). This triggers a disk read for the corresponding page. If `Delete` is called for the same key (e.g., via `DEL` or `SET` overwriting the key) while the read is in progress, we must be careful. Immediately calling `DiskStorage::MarkAsFree` could allow a subsequent `Stash` to overwrite the page while it's being read. To prevent this race condition, `MarkAsFree` calls are queued until concurrent reads on the affected segment complete.\n\nThese problems do not exist for `Stash` operations because they write to newly allocated pages that no other actor references yet.\n\n## API->Ops translation table\n\nThose that require I/O are colored in **bold**.\n\n| API Sequence | I/O Ops Sequence | Explanation |\n|---|---|---|\n| `SET` (overwrite) | `Delete` | We remove the reference to the blob stashed on disk. No overwrite of existing entry. |\n| `GET` | **`Read`**, `Delete` (optional) | Reads the value. Depending on policy, we might then remove the blob from storage and keep it in RAM (\"warm up\"). |\n| `DEL`, `GET` | `Delete` | `DEL` removes the entry. Subsequent `GET` won't find it in TieredStorage. |\n| `APPEND` | **`Read`**, `Delete` | Modify not done in place. Read to memory, append, then remove old disk entry. |\n| `GET`, `SET` | **`Read`**, `Delete` | `GET` triggers `Read`. `SET` triggers `Delete`. If `Read` is in-flight, `DiskStorage::MarkAsFree` is delayed until `Read` completes to avoid reusing the page prematurely. |\n| `SET`, `DEL` | **`TryStash`**, `Delete` | `SET` may be followed by `TryStash` in case we decide to offload an in-memory entry. In case `DEL` is processed when stash is still in flight, `CancelStash()` will be called. Otherwise, `MarkAsFree` will be called to mark the page as available. |\n"
  },
  {
    "path": "docs/cluster-node-health.md",
    "content": "# Cluster Node Health\n\n**Node health is passive metadata provided by the cluster manager (control plane) via the\n`DFLYCLUSTER CONFIG` command.** Dragonfly nodes do not actively determine their own health status;\ninstead, the cluster orchestrator monitors node states and communicates health information to each\nnode through the cluster configuration.\n\nDragonfly supports node health status reporting for cluster configurations, providing\nValkey-compatible behavior for cluster management commands. This feature allows the cluster\nmanager to track the health state of each node and communicate it to clients through various\ncluster commands.\n\n## Overview\n\nThe node health feature was introduced in [PR #4758](https://github.com/dragonflydb/dragonfly/pull/4758)\nand [PR #4767](https://github.com/dragonflydb/dragonfly/pull/4767) to address\n[issue #4741](https://github.com/dragonflydb/dragonfly/issues/4741).\n\nThe health status is part of the cluster configuration and can be set for both master and replica\nnodes. Different cluster commands use this information to filter or display nodes based on their\nhealth state.\n\n## Health States\n\nDragonfly supports four health states for cluster nodes:\n\n| State     | Description                                                                               | Visible in Commands |\n|-----------|-------------------------------------------------------------------------------------------|---------------------|\n| `online`  | Node is fully operational and ready to serve requests                                    | All commands        |\n| `loading` | Node is still loading data (e.g., during initial sync or restart)                       | `CLUSTER SHARDS`, `CLUSTER NODES` |\n| `fail`    | Node has failed or is unreachable                                                        | `CLUSTER SHARDS`, `CLUSTER NODES` |\n| `hidden`  | Replica exists but should not be exposed to clients (internal use by cluster manager)   | Masters: all commands; Replicas: none |\n\n### Default State\n\nWhen no health status is specified in the configuration, nodes default to the `online` state.\n\n## Configuration\n\nNode health is specified in the cluster configuration JSON that is passed via the\n`DFLYCLUSTER CONFIG` command. The health status is set using the `health` field for each node.\n\n### Configuration Format\n\n```json\n[\n  {\n    \"slot_ranges\": [\n      { \"start\": 0, \"end\": 16383 }\n    ],\n    \"master\": {\n      \"id\": \"node-master-1\",\n      \"ip\": \"10.0.0.1\",\n      \"port\": 7000,\n      \"health\": \"online\"\n    },\n    \"replicas\": [\n      {\n        \"id\": \"node-replica-1\",\n        \"ip\": \"10.0.0.2\",\n        \"port\": 7001,\n        \"health\": \"online\"\n      },\n      {\n        \"id\": \"node-replica-2\",\n        \"ip\": \"10.0.0.3\",\n        \"port\": 7002,\n        \"health\": \"loading\"\n      },\n      {\n        \"id\": \"node-replica-3\",\n        \"ip\": \"10.0.0.4\",\n        \"port\": 7003,\n        \"health\": \"fail\"\n      },\n      {\n        \"id\": \"node-replica-4\",\n        \"ip\": \"10.0.0.5\",\n        \"port\": 7004,\n        \"health\": \"hidden\"\n      }\n    ]\n  }\n]\n```\n\n### Setting Configuration\n\nUse the `DFLYCLUSTER CONFIG` command to set the cluster configuration with health information:\n\n```bash\nDFLYCLUSTER CONFIG <json_config>\n```\n\nThe health field is optional and case-insensitive. Valid values are: `online`, `loading`, `fail`,\nand `hidden`.\n\n## Command Behavior\n\nDifferent cluster commands handle node health status in different ways:\n\n### CLUSTER SHARDS\n\nThe `CLUSTER SHARDS` command returns detailed information about cluster shards, including the\nhealth status of all nodes except those marked as `hidden`.\n\n**Example:**\n\n```bash\n127.0.0.1:6379> CLUSTER SHARDS\n1) 1) \"slots\"\n   2) 1) (integer) 0\n      2) (integer) 16383\n   3) \"nodes\"\n   4) 1) 1) \"id\"\n         2) \"node-master-1\"\n         3) \"endpoint\"\n         4) \"10.0.0.1\"\n         5) \"ip\"\n         6) \"10.0.0.1\"\n         7) \"port\"\n         8) (integer) 7000\n         9) \"role\"\n        10) \"master\"\n        11) \"replication-offset\"\n        12) (integer) 0\n        13) \"health\"\n        14) \"online\"\n      2) 1) \"id\"\n         2) \"node-replica-1\"\n         3) \"endpoint\"\n         4) \"10.0.0.2\"\n         5) \"ip\"\n         6) \"10.0.0.2\"\n         7) \"port\"\n         8) (integer) 7001\n         9) \"role\"\n        10) \"replica\"\n        11) \"replication-offset\"\n        12) (integer) 0\n        13) \"health\"\n        14) \"online\"\n      3) 1) \"id\"\n         2) \"node-replica-2\"\n         3) \"endpoint\"\n         4) \"10.0.0.3\"\n         5) \"ip\"\n         6) \"10.0.0.3\"\n         7) \"port\"\n         8) (integer) 7002\n         9) \"role\"\n        10) \"replica\"\n        11) \"replication-offset\"\n        12) (integer) 0\n        13) \"health\"\n        14) \"loading\"\n      4) 1) \"id\"\n         2) \"node-replica-3\"\n         3) \"endpoint\"\n         4) \"10.0.0.4\"\n         5) \"ip\"\n         6) \"10.0.0.4\"\n         7) \"port\"\n         8) (integer) 7003\n         9) \"role\"\n        10) \"replica\"\n        11) \"replication-offset\"\n        12) (integer) 0\n        13) \"health\"\n        14) \"fail\"\n```\n\n**Note:** Nodes with `hidden` health status are filtered out and do not appear in the output.\n\n### CLUSTER SLOTS\n\nThe `CLUSTER SLOTS` command returns slot distribution information. This command filters out\nreplicas that are not ready to serve requests.\n\n**Filtering behavior:**\n- Includes replicas with `online` health status\n- Excludes replicas with `loading`, `fail`, or `hidden` health status\n\n**Example:**\n\n```bash\n127.0.0.1:6379> CLUSTER SLOTS\n1) 1) (integer) 0\n   2) (integer) 16383\n   3) 1) \"10.0.0.1\"\n      2) (integer) 7000\n      3) \"node-master-1\"\n   4) 1) \"10.0.0.2\"\n      2) (integer) 7001\n      3) \"node-replica-1\"\n```\n\nIn this example, only the master and the `online` replica (`node-replica-1`) are shown. Replicas\nwith `loading`, `fail`, or `hidden` status are not included.\n\n### CLUSTER NODES\n\nThe `CLUSTER NODES` command returns a list of all cluster nodes in a space-separated format. This\ncommand shows nodes with most health states but excludes `hidden` nodes.\n\n**Connection state mapping:**\n- `online` and `loading` nodes: shown as `connected`\n- `fail` nodes: shown as `disconnected`\n- `hidden` nodes: not shown in output\n\n**Example:**\n\n```bash\n127.0.0.1:6379> CLUSTER NODES\nnode-master-1 10.0.0.1:7000@7000 master - 0 0 0 connected 0-16383\nnode-replica-1 10.0.0.2:7001@7001 slave node-master-1 0 0 0 connected\nnode-replica-2 10.0.0.3:7002@7002 slave node-master-1 0 0 0 connected\nnode-replica-3 10.0.0.4:7003@7003 slave node-master-1 0 0 0 disconnected\n```\n\n**Note:**\n- `node-replica-1` (online): appears as `connected`\n- `node-replica-2` (loading): appears as `connected`\n- `node-replica-3` (fail): appears as `disconnected`\n- `node-replica-4` (hidden): not shown in output\n\n## Use Cases\n\n### 1. Gradual Node Addition\n\nWhen adding a new replica to a cluster, you can set its health status to `loading` while it's\nsyncing data. This allows the cluster manager to track the node but prevents clients from\nredirecting read requests to it via `CLUSTER SLOTS`.\n\n### 2. Failed Node Handling\n\nWhen a node fails or becomes unreachable, the cluster manager can mark it as `fail`. This\nprovides visibility in `CLUSTER SHARDS` and `CLUSTER NODES` while excluding it from\n`CLUSTER SLOTS` responses.\n\n### 3. Internal Replicas\n\nThe `hidden` health status is useful for replica nodes that are managed internally by the cluster\norchestrator but should not be visible to external clients. Hidden replicas are filtered out from\nall cluster commands (`CLUSTER SHARDS`, `CLUSTER SLOTS`, and `CLUSTER NODES`). Note that masters\nmarked as `hidden` are still visible in all commands; the filtering only applies to replicas.\n\n### 4. Valkey Compatibility\n\nThis feature provides Valkey-compatible behavior for cluster client APIs:\n- `CLUSTER SHARDS` returns the health status of replica nodes\n- `CLUSTER SLOTS` does not return replicas that have not finished loading\n\n## Implementation Details\n\nFor developers interested in the implementation:\n\n1. **Data Structure**: The `NodeHealth` enum is defined in `src/server/cluster/cluster_defs.h`\n   with four values: `FAIL`, `LOADING`, `ONLINE`, and `HIDDEN`.\n\n2. **Configuration Parsing**: Health status is parsed from JSON in\n   `src/server/cluster/cluster_config.cc` in the `ParseClusterNode` function.\n\n3. **Command Handlers**: The cluster commands in `src/server/cluster/cluster_family.cc` implement\n   filtering logic based on health status:\n   - `ClusterShards`: Filters out replicas with `HIDDEN` health before calling `ClusterShardsImpl`\n     (masters are still included even if marked `HIDDEN`)\n   - `ClusterSlotsImpl`: Filters out `HIDDEN`, `FAIL`, and `LOADING` replicas (masters are always\n     included)\n   - `ClusterNodesImpl`: Filters out replicas with `HIDDEN` health when listing replicas (masters\n     with `HIDDEN` health are still included) and maps health to connection state\n\n4. **Default Value**: When not specified in configuration, nodes default to `ONLINE` state as\n   defined in `ClusterExtendedNodeInfo`.\n\n## See Also\n\n- [Dragonfly Cluster Mode Documentation](https://www.dragonflydb.io/docs/cluster)\n- [CLUSTER SHARDS Command](https://redis.io/commands/cluster-shards/)\n- [CLUSTER SLOTS Command](https://redis.io/commands/cluster-slots/)\n- [CLUSTER NODES Command](https://redis.io/commands/cluster-nodes/)\n"
  },
  {
    "path": "docs/coordinator.excalidraw",
    "content": "{\n  \"type\": \"excalidraw\",\n  \"version\": 2,\n  \"source\": \"https://excalidraw.com\",\n  \"elements\": [\n    {\n      \"type\": \"rectangle\",\n      \"version\": 498,\n      \"versionNonce\": 987480120,\n      \"isDeleted\": false,\n      \"id\": \"jPwIU_a9_nxvuDFAcbzxM\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"dotted\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 712.375,\n      \"y\": 510.2500000000001,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#15aabf\",\n      \"width\": 307,\n      \"height\": 30,\n      \"seed\": 1029717964,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"type\": \"text\",\n          \"id\": \"U2-I9a2X4amHnB7NZFWGv\"\n        },\n        {\n          \"id\": \"MJoeQ6ylkFi5Z7UCzD-r-\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"KpIRIBeGsT3yzCPp6jbEN\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"Qnatw_Uix7cMFwAuW1DkJ\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"TLS6mZEI7BXyUdiiYHdrg\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"h_hyKP8N7nmD1NiZNa3ez\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"CrT6zZ8CKm_MSDw-CmcPG\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1660733356396,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 389,\n      \"versionNonce\": 1321365816,\n      \"isDeleted\": false,\n      \"id\": \"U2-I9a2X4amHnB7NZFWGv\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 717.375,\n      \"y\": 515.2500000000001,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 297,\n      \"height\": 20,\n      \"seed\": 1592449524,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1660733269433,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 1,\n      \"text\": \"coordinator\",\n      \"baseline\": 14,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"jPwIU_a9_nxvuDFAcbzxM\",\n      \"originalText\": \"coordinator\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 469,\n      \"versionNonce\": 684925752,\n      \"isDeleted\": false,\n      \"id\": \"BY5OdEEKT0Y_DTy9Zgr9C\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 714.375,\n      \"y\": 217.41666666666669,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 77,\n      \"height\": 192,\n      \"seed\": 1621471436,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"MJoeQ6ylkFi5Z7UCzD-r-\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"KpIRIBeGsT3yzCPp6jbEN\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1660733316757,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 113,\n      \"versionNonce\": 2140069448,\n      \"isDeleted\": false,\n      \"id\": \"45U617mr0L9ob4mc7Xozt\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 724.875,\n      \"y\": 171.0865384615385,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fab005\",\n      \"width\": 56,\n      \"height\": 40,\n      \"seed\": 1285924468,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1660733195706,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 1,\n      \"text\": \"shard 1\\n\",\n      \"baseline\": 34,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": null,\n      \"originalText\": \"shard 1\\n\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 123,\n      \"versionNonce\": 738921016,\n      \"isDeleted\": false,\n      \"id\": \"vY-LnNlhD3qWMEtRPoU0t\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 840.4375,\n      \"y\": 171.0865384615385,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fab005\",\n      \"width\": 64,\n      \"height\": 20,\n      \"seed\": 817296972,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1660733195706,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 1,\n      \"text\": \"shard 2\",\n      \"baseline\": 14,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": null,\n      \"originalText\": \"shard 2\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 499,\n      \"versionNonce\": 1256651064,\n      \"isDeleted\": false,\n      \"id\": \"xvkm28eoejETjF3M78jpN\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 943.125,\n      \"y\": 221.875,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 77,\n      \"height\": 187,\n      \"seed\": 1482008524,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"h_hyKP8N7nmD1NiZNa3ez\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"CrT6zZ8CKm_MSDw-CmcPG\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1660733356396,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 193,\n      \"versionNonce\": 731710264,\n      \"isDeleted\": false,\n      \"id\": \"H72xWL9unzb1mQiLvx7L4\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 950.125,\n      \"y\": 176.7115384615385,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fab005\",\n      \"width\": 63,\n      \"height\": 20,\n      \"seed\": 1704611020,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1660733195706,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 1,\n      \"text\": \"shard 3\",\n      \"baseline\": 14,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": null,\n      \"originalText\": \"shard 3\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 547,\n      \"versionNonce\": 1963108408,\n      \"isDeleted\": false,\n      \"id\": \"jj-MVcNrzcH0DbFFo9noF\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 833.9375,\n      \"y\": 221.16666666666669,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 77,\n      \"height\": 193,\n      \"seed\": 1374694167,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"Qnatw_Uix7cMFwAuW1DkJ\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"TLS6mZEI7BXyUdiiYHdrg\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1660733333008,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"id\": \"MJoeQ6ylkFi5Z7UCzD-r-\",\n      \"type\": \"arrow\",\n      \"x\": 717.875,\n      \"y\": 501.1682692307693,\n      \"width\": 24,\n      \"height\": 87,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#15aabf\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 6593352,\n      \"version\": 99,\n      \"versionNonce\": 1021163848,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1660733308793,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          -24,\n          -44\n        ],\n        [\n          -3,\n          -87\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"jPwIU_a9_nxvuDFAcbzxM\",\n        \"focus\": -0.8341352911917994,\n        \"gap\": 9.08173076923083\n      },\n      \"endBinding\": {\n        \"elementId\": \"BY5OdEEKT0Y_DTy9Zgr9C\",\n        \"focus\": -0.13122256675640864,\n        \"gap\": 4.751602564102598\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"id\": \"KpIRIBeGsT3yzCPp6jbEN\",\n      \"type\": \"arrow\",\n      \"x\": 752.875,\n      \"y\": 419.1682692307693,\n      \"width\": 16,\n      \"height\": 90,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#15aabf\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 1407934264,\n      \"version\": 74,\n      \"versionNonce\": 1205666632,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1660733316764,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          7,\n          42\n        ],\n        [\n          -9,\n          90\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"BY5OdEEKT0Y_DTy9Zgr9C\",\n        \"focus\": 0.3233993962204972,\n        \"gap\": 9.751602564102598\n      },\n      \"endBinding\": {\n        \"elementId\": \"jPwIU_a9_nxvuDFAcbzxM\",\n        \"focus\": -0.8035367629216211,\n        \"gap\": 1.0817307692308304\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"id\": \"Qnatw_Uix7cMFwAuW1DkJ\",\n      \"type\": \"arrow\",\n      \"x\": 837.875,\n      \"y\": 506.1682692307693,\n      \"width\": 7,\n      \"height\": 83,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#15aabf\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 1927132472,\n      \"version\": 74,\n      \"versionNonce\": 1840565576,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1660733325799,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          7,\n          -83\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"jPwIU_a9_nxvuDFAcbzxM\",\n        \"focus\": -0.191317746711659,\n        \"gap\": 4.0817307692308304\n      },\n      \"endBinding\": {\n        \"elementId\": \"jj-MVcNrzcH0DbFFo9noF\",\n        \"focus\": 0.4002005378587657,\n        \"gap\": 9.001602564102598\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"id\": \"TLS6mZEI7BXyUdiiYHdrg\",\n      \"type\": \"arrow\",\n      \"x\": 872.875,\n      \"y\": 423.1682692307693,\n      \"width\": 13,\n      \"height\": 82,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#15aabf\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 247434040,\n      \"version\": 76,\n      \"versionNonce\": 1827860040,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1660733333013,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          9,\n          41\n        ],\n        [\n          -4,\n          82\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"jj-MVcNrzcH0DbFFo9noF\",\n        \"focus\": 0.38070164408537926,\n        \"gap\": 9.001602564102598\n      },\n      \"endBinding\": {\n        \"elementId\": \"jPwIU_a9_nxvuDFAcbzxM\",\n        \"focus\": -0.02127803036140877,\n        \"gap\": 5.0817307692308304\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"id\": \"h_hyKP8N7nmD1NiZNa3ez\",\n      \"type\": \"arrow\",\n      \"x\": 995.875,\n      \"y\": 418.1682692307693,\n      \"width\": 13,\n      \"height\": 90,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#15aabf\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 2138692424,\n      \"version\": 57,\n      \"versionNonce\": 178091592,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1660733348048,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          12,\n          47\n        ],\n        [\n          -1,\n          90\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"xvkm28eoejETjF3M78jpN\",\n        \"focus\": 0.19231425235177602,\n        \"gap\": 9.293269230769283\n      },\n      \"endBinding\": {\n        \"elementId\": \"jPwIU_a9_nxvuDFAcbzxM\",\n        \"focus\": 0.7835976013538369,\n        \"gap\": 2.0817307692308304\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"id\": \"CrT6zZ8CKm_MSDw-CmcPG\",\n      \"type\": \"arrow\",\n      \"x\": 957.875,\n      \"y\": 502.1682692307693,\n      \"width\": 18,\n      \"height\": 91,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#15aabf\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 1991558200,\n      \"version\": 58,\n      \"versionNonce\": 1980388936,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1660733356402,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          -11,\n          -39\n        ],\n        [\n          7,\n          -91\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"jPwIU_a9_nxvuDFAcbzxM\",\n        \"focus\": 0.6245467021802061,\n        \"gap\": 8.08173076923083\n      },\n      \"endBinding\": {\n        \"elementId\": \"xvkm28eoejETjF3M78jpN\",\n        \"focus\": -0.23155463939046053,\n        \"gap\": 2.2932692307692832\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    }\n  ],\n  \"appState\": {\n    \"gridSize\": null,\n    \"viewBackgroundColor\": \"#ffffff\"\n  },\n  \"files\": {}\n}\n"
  },
  {
    "path": "docs/dashtable.md",
    "content": "\n# Dashtable in Dragonfly\n\nDashtable is a very important data structure in Dragonfly. This document explains\nhow it fits inside the engine.\n\nEach selectable database holds a primary dashtable that contains all its entries. Another instance of Dashtable holds an optional expiry information, for keys that have TTL expiry on them. Dashtable is equivalent to Redis dictionary but have some wonderful properties that make Dragonfly memory efficient in various situations.\n\n![Database Overview](./db.svg)\n\n## Redis dictionary\n\n*“All problems in computer science can be solved by another level of indirection”*\n\nThis section is a brief refresher of how redis dictionary (RD) is implemented.\nWe shamelessly \"borrowed\" a diagram from [this blogpost](https://codeburst.io/a-closer-look-at-redis-dictionary-implementation-internals-3fd815aae535), so if you want a deep-dive, you can read the original article.\n\nEach `RD` is in fact two hash-tables (see `ht` field in the diagram below). The second instance is used for incremental resizes of the dictionary.\nEach hash-table `dictht` is implemented as a [classic hashtable with separate chaining](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining). `dictEntry` is the link-list entry that wraps each key/value pair inside the table. Each dictEntry has three pointers and takes up 24 bytes of space. The bucket array of `dictht` is resized at powers of two, so usually its utilization is in [50, 100] range.\n\n![RD structure](https://miro.medium.com/max/1400/1*gNc8VzCknWRxXTBP9cVEHQ.png)\n\n<br>\n\nLet's estimate the overhead of `dictht` table inside RD.\n\n*Case 1*: it has `N` items at 100% load factor, in other words, buckets count equals to number of items. Each bucket holds a pointer to dictEntry, i.e. it's 8 bytes. In total we need: $8N + 24N = 32N$ bytes per record. <br>\n*Case 2*: `N` items at 75% load factor, in other words, the number of buckets is 1.33 higher than number of items. In total we need: $N\\*1.33\\*8 + 24N \\approx 34N$ bytes per record. <br>\n*Case 3*: `N` items at 50% load factor, say right after table growth. Number of buckets is twice the number of items, hence we need $N\\*2\\*8 + 24N = 40N$ bytes per record.\n\nIn best possible case we need at least 16 bytes to store key/value pair into the table, therefore\nthe overhead of `dictht` is on average about 16-24 bytes per item.\n\nNow lets take incremental growth into account. When `ht[0]` is full (i.e. RD needs to migrate data to a bigger table), it will instantiate a second temporary instance `ht[1]` that will hold additional 2*N buckets. Both instances will live in parallel until all data is migrated to `ht[1]` and then `ht[0]` bucket array will be deleted. All this complexity is hidden from a user by well engineered API of RD. Lets combine case 3 and case 1 to analyze memory spike at this point: `ht[0]` holds `N` items and it is fully utilized. `ht[1]` is allocated with `2N` buckets.\nOverall, the memory needed during the spike is $32N + 16N=48N$ bytes.\n\nTo summarize, RD requires between **16-32 bytes overhead**.\n\n## Dash table\n\n[Dashtable](https://arxiv.org/abs/2003.07302) is an evolution of an algorithm from 1979 called [extendible hashing](https://en.wikipedia.org/wiki/Extendible_hashing).\n\nSimilarly to a classic hashtable, dashtable (DT) also holds an array of pointers at front. However, unlike with classic tables, it points to `segments` and not to linked lists of items. Each `segment` is, in fact, a mini-hashtable of constant size. The front array of pointers to segments is called `directory`. Similarly to a classic table, when an item is inserted into a DT, it first determines the destination segment based on item's hashvalue. The segment is implemented as a hashtable with open-addressed hashing scheme and as I said - constant in size. Once segment is determined, the item inserted into one of its buckets. If an item was successfully inserted, we finished, otherwise, the segment is \"full\" and needs splitting. The DT splits the contents of a full segment in two segments, and the additional segment is added to the directory. Then it tries to reinsert the item again. To summarize, the classic chaining hash-table is built upon a dynamic array of linked-lists while dashtable is more like a dynamic array of flat hash-tables of constant size.\n\n![Dashtable Diagram](./dashtable.svg)\n\nIn the diagram above you can see how dashtable looks like. Each segment is comprised of `K` buckets. For example, in our implementation a dashtable has 60 buckets per segment (it's a compile-time parameter that can be configured).\n\n### Segment zoom-in\n\nBelow you can see the diagram of a segment. It comprised of regular buckets and stash buckets. Each bucket has `k` slots and each slot can host a key-value record.\n\n![Segment](./dashsegment.svg)\n\nIn our implementation, each segment has 56 regular buckets, 4 stash buckets and each bucket contains 14 slots. Overall, each dashtable segment has capacity to host 840 records. When an item is inserted into a segment, DT first determines its home bucket based on item's hash value. The home bucket is one of 56 regular buckets that reside in the table. Each bucket has 14 available slots and the item can reside in any free slot. If the home bucket is full,\nthen DT tries to insert to the regular bucket on the right. And if that bucket is also full,\nit tries to insert into one of 4 stash buckets. These are kept deliberately aside to gather\nspillovers from the regular buckets. The segment is \"full\" when the insertion fails, i.e. the home bucket and the neighbour bucket and all 4 stash buckets are full. Please note that segment is not necessary at full capacity, it can be that other buckets are not yet full, but unfortunately, that item can go only into these 6 buckets,\nso the segment contents must be split. In case of split event, DT creates a new segment,\nadds it to the directory and the items from the old segment partly moved to the new one,\n and partly rebalanced within the old one. Only two segments are touched during the split event.\n\nNow we can explain why seemingly similar data-structure has an advantage over a classic hashtable\nin terms of memory and cpu.\n\n 1. Memory: we need `~N/840` entries or `8N/840` bytes in dashtable directory to host N items on average.\n Basically, the overhead of directory almost disappears in DT. Say for 1M items we will\n need ~1200 segments or 9600 bytes for the main array. That's in contrast to RD where\n we will need a solid `8N` bucket array overhead - no matter what.\n For 1M items, it will obviously be 8MB. In addition, dash segments use open addressing collision\n scheme with probing, that means that they do not need anything like `dictEntry`.\n Dashtable uses lots of tricks to make its own metadata small. In our implementation,\n the average `tax` per entry is short of 20 bits compared to 64 bits in RD (dictEntry.next).\n In addition, DT incremental resize does not allocate a bigger table - instead\n it adds a single segment per split event. Assuming that key/pair entry is two 8\n byte pointers like in RD, then DT requires $16N + (8N/840) + 2.5N + O(1) \\approx 19N$\n bytes at 100% utilization. This number is very close to the optimum of 16 bytes.\n In unlikely case when all segments just doubled in size, i.e.\n DT is at 50% of utilization we may need $38N$ bytes per item.\n In practice, each segment grows independently from others,\n so the table has smooth memory usage of 22-32 bytes per item or **6-16 bytes overhead**.\n\n 1. Speed: RD requires an allocation for dictEntry per insertion and deallocation per deletion. In addition, RD uses chaining, which is cache unfriendly on modern hardware. There is a consensus in engineering and research communities that classic chaining schemes are slower than open addressing alternatives.\n Having said that, DT also needs to go through a single level of indirection when\n fetching a segment pointer. However, DT's directory size is relatively small:\n in the example above, all 9K could resize in L1 cache. Once the segment is determined,\n the rest of the insertion, however, is very fast an mostly operates on 1-3 memory cache lines.\n Finally, during resizes, RD requires to allocate a bucket array of size `2N`.\n That could be time consuming - imagine an allocation of 100M buckets for example.\n DT on the other hand requires an allocation of constant size per new segment. DT is faster\n and what's more important - it's incremental ability is better. It eliminates latency spikes\n and reduces tail latency of the operations above.\n\nPlease note that with all efficiency of Dashtable, it can not decrease drastically the\noverall memory usage. Its primary goal is to reduce waste around dictionary management.\n\nHaving said that, by reducing metadata waste we could insert dragonfly-specific attributes\ninto a table's metadata in order to implement other intelligent algorithms like forkless save. This is where some of the Dragonfly's disrupting qualities [can be seen](#forkless-save).\n\n## Benchmarks\n\nThere are many other improvements in dragonfly that save memory besides DT. I will not be\nable to cover them all here. The results below show the final result as of May 2022.\n\n### Populate single-threaded\n\nTo compare RD vs DT I often use an internal debugging command \"debug populate\" that quickly fills both datastores with data. It just saves time and gives more consistent results compared to memtier_benchmark.\nIt also shows the raw speed at which each dictionary gets filled without intermediary factors like networking, parsing etc.\nI deliberately fill datasets with a small data to show how overhead of metadata differs between two data structures.\n\nI run \"debug populate 20000000\" (20M) on both engines on my home machine \"AMD Ryzen 5 3400G with 8 cores\".\n\n|             | Dragonfly | Redis 6 |\n|-------------|-----------|---------|\n| Time        |   10.8s   |  16.0s  |\n| Memory used |    1GB    |  1.73G  |\n\nWhen looking at Redis6 \"info memory\" stats, you can see that `used_memory_overhead` field equals\nto `1.0GB`. That means that out of 1.73GB bytes allocated, a whooping 1.0GB is used for\nthe metadata. For small data use-cases the cost of metadata in Redis is larger than the data itself.\n\n### Populate multi-threaded\n\nNow I run Dragonfly on all 8 cores. Redis has the same results, of course.\n\n|             | Dragonfly | Redis 6 |\n|-------------|-----------|---------|\n| Time        |   2.43s   |  16.0s  |\n| Memory used |    896MB  |  1.73G  |\n\nDue to shared-nothing architecture, Dragonfly maintains a dashtable per thread with its own slice of data. Each thread fills 1/8th of 20M range it owns - and it much faster, almost 8 times faster. You can see that the total usage is even smaller, because now we maintain\nsmaller tables in each\nthread (it's not always the case though - we could get slightly worse memory usage than with\nsingle-threaded case, depends where we stand compared to hash table utilization).\n\n### Forkless Save\n\nThis example shows how much memory Dragonfly uses during BGSAVE under load compared to Redis. Btw, BGSAVE and SAVE in Dragonfly is the same procedure because it's implemented using fully asynchronous algorithm that maintains point-in-time snapshot guarantees.\n\nThis test consists of 3 steps:\n\n1. Execute `debug populate 5000000 key 1024` command on both servers to quickly fill them up\n   with ~5GB of data.\n2. Run `memtier_benchmark --ratio 1:0 -n 600000 --threads=2 -c 20 --distinct-client-seed  --key-prefix=\"key:\"  --hide-histogram  --key-maximum=5000000 -d 1024` command in order to send constant update traffic. This traffic should not affect substantially the memory usage of both servers.\n3. Finally, run `bgsave` on both servers while measuring their memory.\n\nIt's very hard, technically to measure exact memory usage of Redis during BGSAVE because it creates a child process that shares its parent memory in-part. We chose `cgroupsv2` as a tool to measure the memory. We put each server into a separate cgroup and we sampled `memory.current` attribute for each cgroup. Since a forked Redis process inherits the cgroup of the parent, we get an accurate estimation of their total memory usage. Although we did not need this for Dragonfly we applied the same approach for consistency.\n\n![BGSAVE](./bgsave_memusage.svg)\n\nAs you can see on the graph, Redis uses 50% more memory even before BGSAVE starts. Around second 14, BGSAVE kicks off on both servers. Visually you can not see this event on Dragonfly graph, but it's seen very well on Redis graph. It took just few seconds for Dragonfly to finish its snapshot (again, not possible to see) and around second 20 Dragonfly is already behind BGSAVE. You can see a distinguishable cliff at second 39\nwhere Redis finishes its snapshot, reaching almost x3 times more memory usage at peak.\n\n### Expiry of items during writes\n\nEfficient Expiry is very important for many scenarios. See, for example,\n[Pelikan paper'21](https://pelikan.io/2021/segcache.html). Twitter team says\nthat their memory footprint could be reduced by as much as by 60% by employing better expiry methodology. The authors of the post above show pros and cons of expiration methods in the table below:\n\n<img src=\"https://pelikan.io/assets/img/segcache/expiration.svg\" width=\"400\">\n\nThey argue that proactive expiration is very important for timely deletion of expired items.\nDragonfly, employs its own intelligent garbage collection procedure. By leveraging DashTable\ncompartmentalized structure it can actually employ a very efficient passive expiry algorithm with low CPU overhead. Our passive procedure is complimented with proactive gradual scanning of the table in background.\n\nThe procedure is a follows:\nA dashtable grows when its segment becomes full during the insertion and needs to be split.\nThis is a convenient point to perform garbage collection, but only for that segment.\nWe scan its buckets for the expired items. If we delete some of them, we may avoid growing the table altogether! The cost of scanning the segment before potential split is no more the\nsplit itself so can be estimated as `O(1)`.\n\nWe use `memtier_benchmark` for the experiment to demonstrate Dragonfly vs Redis expiry efficiency.\nWe run locally the following command:\n\n```bash\nmemtier_benchmark --ratio 1:0 -n 600000 --threads=2 -c 20 --distinct-client-seed \\\n   --key-prefix=\"key:\"  --hide-histogram --expiry-range=30-30 --key-maximum=100000000 -d 256\n```\n\nWe load larger values (256 bytes) to reduce the impact of metadata savings\nof Dragonfly.\n\n|                      | Dragonfly | Redis 6 |\n|----------------------|-----------|---------|\n| Memory peak usage    | 1.45GB    |  1.95GB |\n| Avg SET qps          | 131K      | 100K    |\n\nPlease note that Redis could sustain 30% less qps. That means that the optimal working sets for Dragonfly and Redis are different - the former needed to host at least `20s*131k` items\nat any point of time and the latter only needed to keep `20s*100K` items.\nSo for `30%` bigger working set Dragonfly needed `25%` less memory at peak.\n\n<em>*Please ignore the performance advantage of Dragonfly over Redis in this test - it has no meaning.\nI run it locally on my machine and it does not represent a real throughput benchmark. </em>\n\n<br>\n\n*All diagrams in this doc are created in [drawio app](https://app.diagrams.net/).*\n"
  },
  {
    "path": "docs/dense_set.excalidraw",
    "content": "{\n  \"type\": \"excalidraw\",\n  \"version\": 2,\n  \"source\": \"https://excalidraw.com\",\n  \"elements\": [\n    {\n      \"id\": \"LdnS4utc0Co8ZQl0k_99q\",\n      \"type\": \"rectangle\",\n      \"x\": 278.57142857142867,\n      \"y\": 767.857142857143,\n      \"width\": 157,\n      \"height\": 42,\n      \"angle\": 0,\n      \"strokeColor\": \"#364fc7\",\n      \"backgroundColor\": \"#4c6ef5\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 34309611,\n      \"version\": 379,\n      \"versionNonce\": 490192843,\n      \"isDeleted\": false,\n      \"boundElements\": [\n        {\n          \"id\": \"wIo5IjqjKx5agDWM2U6y9\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"BV0b6Du7Nu_TpcyHxOq9M\"\n        }\n      ],\n      \"updated\": 1662257477282,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"id\": \"6iemTDX54UBvWAow6YZUm\",\n      \"type\": \"ellipse\",\n      \"x\": 785.5714285714287,\n      \"y\": 670.857142857143,\n      \"width\": 151,\n      \"height\": 65,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 2110505739,\n      \"version\": 615,\n      \"versionNonce\": 1697849797,\n      \"isDeleted\": false,\n      \"boundElements\": [\n        {\n          \"type\": \"text\",\n          \"id\": \"CsENpV2URO6_T9J1e_EWv\"\n        },\n        {\n          \"id\": \"h4EkHYMe6b4cxIpFk2aJ1\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"id\": \"CsENpV2URO6_T9J1e_EWv\",\n      \"type\": \"text\",\n      \"x\": 790.5714285714287,\n      \"y\": 689.357142857143,\n      \"width\": 141,\n      \"height\": 28,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 1128884549,\n      \"version\": 556,\n      \"versionNonce\": 341608715,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"text\": \"\\\"abcd...\\\"\",\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"baseline\": 19,\n      \"containerId\": \"6iemTDX54UBvWAow6YZUm\",\n      \"originalText\": \"\\\"abcd...\\\"\"\n    },\n    {\n      \"id\": \"wIo5IjqjKx5agDWM2U6y9\",\n      \"type\": \"arrow\",\n      \"x\": 436.80362915161936,\n      \"y\": 789.7627222797395,\n      \"width\": 81.53559883961861,\n      \"height\": 0.030478424363479917,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 1941199909,\n      \"version\": 1319,\n      \"versionNonce\": 319409605,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1662257477403,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          81.53559883961861,\n          0.030478424363479917\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"LdnS4utc0Co8ZQl0k_99q\",\n        \"focus\": 0.041645385141281355,\n        \"gap\": 1.2322005801906926\n      },\n      \"endBinding\": {\n        \"elementId\": \"9mWjCy5sUe-mID6u6k7Ll\",\n        \"focus\": -0.08136851610313917,\n        \"gap\": 1.2322005801906926\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"type\": \"ellipse\",\n      \"version\": 1317,\n      \"versionNonce\": 365900933,\n      \"isDeleted\": false,\n      \"id\": \"tbWakWx-QT3DCK-_FZhx-\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1687.5714285714287,\n      \"y\": 681.857142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 151,\n      \"height\": 65,\n      \"seed\": 429183979,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"RqWMMUkOMQWtnqqIo_0RK\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"ZD_EGEh1PSlEhdhmPUGm3\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"RqWMMUkOMQWtnqqIo_0RK\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 1258,\n      \"versionNonce\": 1498415691,\n      \"isDeleted\": false,\n      \"id\": \"RqWMMUkOMQWtnqqIo_0RK\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1692.5714285714287,\n      \"y\": 700.357142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 141,\n      \"height\": 28,\n      \"seed\": 365098053,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"\\\"abcd...\\\"\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"tbWakWx-QT3DCK-_FZhx-\",\n      \"originalText\": \"\\\"abcd...\\\"\"\n    },\n    {\n      \"type\": \"arrow\",\n      \"version\": 3623,\n      \"versionNonce\": 1786105125,\n      \"isDeleted\": false,\n      \"id\": \"ZD_EGEh1PSlEhdhmPUGm3\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1593.5714285714287,\n      \"y\": 767.857142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 98.02649903619977,\n      \"height\": 41.55714530998347,\n      \"seed\": 1874017893,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"boundElements\": [],\n      \"updated\": 1662257477405,\n      \"link\": null,\n      \"locked\": false,\n      \"startBinding\": {\n        \"elementId\": \"RyzbgdtiyAgDl_Gg-xKD6\",\n        \"focus\": 0.44304364520670675,\n        \"gap\": 2\n      },\n      \"endBinding\": {\n        \"elementId\": \"tbWakWx-QT3DCK-_FZhx-\",\n        \"focus\": 0.6558676754700489,\n        \"gap\": 1\n      },\n      \"lastCommittedPoint\": null,\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\",\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          57.5,\n          -9.5\n        ],\n        [\n          98.02649903619977,\n          -41.55714530998347\n        ]\n      ]\n    },\n    {\n      \"type\": \"ellipse\",\n      \"version\": 1594,\n      \"versionNonce\": 722835269,\n      \"isDeleted\": false,\n      \"id\": \"hls1kkVvTEbIVUoHV9YjB\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1688.0714285714287,\n      \"y\": 848.357142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 151,\n      \"height\": 65,\n      \"seed\": 464754437,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"7OALlCUSo8C4wRunATj7i\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"ZD_EGEh1PSlEhdhmPUGm3\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"7OALlCUSo8C4wRunATj7i\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"7OALlCUSo8C4wRunATj7i\"\n        },\n        {\n          \"id\": \"PtndVbqi061kx-2QVmX9B\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 1533,\n      \"versionNonce\": 1814440843,\n      \"isDeleted\": false,\n      \"id\": \"7OALlCUSo8C4wRunATj7i\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1693.0714285714287,\n      \"y\": 866.857142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 141,\n      \"height\": 28,\n      \"seed\": 1547241419,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"\\\"abcd...\\\"\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"hls1kkVvTEbIVUoHV9YjB\",\n      \"originalText\": \"\\\"abcd...\\\"\"\n    },\n    {\n      \"id\": \"PtndVbqi061kx-2QVmX9B\",\n      \"type\": \"arrow\",\n      \"x\": 1595.5714285714287,\n      \"y\": 818.857142857143,\n      \"width\": 128.1422939788249,\n      \"height\": 32.825682301479674,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 310414827,\n      \"version\": 1513,\n      \"versionNonce\": 652927109,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1662257477406,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          49.5,\n          -8\n        ],\n        [\n          128.1422939788249,\n          24.825682301479674\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"LUhivcEGaeW_fHoMkT5PY\",\n        \"focus\": 0.2744377811094453,\n        \"gap\": 5\n      },\n      \"endBinding\": {\n        \"elementId\": \"hls1kkVvTEbIVUoHV9YjB\",\n        \"focus\": 0.453665660258198,\n        \"gap\": 9.270374749825422\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 539,\n      \"versionNonce\": 447891973,\n      \"isDeleted\": false,\n      \"id\": \"BmVwp90EOf01pxoCqayka\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 779.0714285714287,\n      \"y\": 273.8571428571429,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 157,\n      \"height\": 42,\n      \"seed\": 817120651,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"7-os26TSlkxMhDb-ALHK8\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"wIo5IjqjKx5agDWM2U6y9\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"7-os26TSlkxMhDb-ALHK8\"\n        },\n        {\n          \"id\": \"2-4BatkaFqKxOF9ikfE9M\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 496,\n      \"versionNonce\": 1475544267,\n      \"isDeleted\": false,\n      \"id\": \"7-os26TSlkxMhDb-ALHK8\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 784.0714285714287,\n      \"y\": 280.8571428571429,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 147,\n      \"height\": 28,\n      \"seed\": 398781605,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"DensePtr\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"BmVwp90EOf01pxoCqayka\",\n      \"originalText\": \"DensePtr\"\n    },\n    {\n      \"id\": \"BV0b6Du7Nu_TpcyHxOq9M\",\n      \"type\": \"text\",\n      \"x\": 283.57142857142867,\n      \"y\": 774.857142857143,\n      \"width\": 147,\n      \"height\": 28,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 84057963,\n      \"version\": 301,\n      \"versionNonce\": 1123890789,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"text\": \"DenseLinkKey\",\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"baseline\": 19,\n      \"containerId\": \"LdnS4utc0Co8ZQl0k_99q\",\n      \"originalText\": \"DenseLinkKey\"\n    },\n    {\n      \"type\": \"ellipse\",\n      \"version\": 1392,\n      \"versionNonce\": 208548715,\n      \"isDeleted\": false,\n      \"id\": \"oUfTPCoNOMOVUScypl9ov\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1038.0714285714287,\n      \"y\": 262.3571428571429,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 151,\n      \"height\": 65,\n      \"seed\": 1274116613,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"_ZWQLsSXL62nm9Vxybs_T\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"ZD_EGEh1PSlEhdhmPUGm3\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"_ZWQLsSXL62nm9Vxybs_T\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"_ZWQLsSXL62nm9Vxybs_T\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"PtndVbqi061kx-2QVmX9B\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"_ZWQLsSXL62nm9Vxybs_T\"\n        },\n        {\n          \"id\": \"2-4BatkaFqKxOF9ikfE9M\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 1329,\n      \"versionNonce\": 1599163589,\n      \"isDeleted\": false,\n      \"id\": \"_ZWQLsSXL62nm9Vxybs_T\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1043.0714285714287,\n      \"y\": 280.8571428571429,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 141,\n      \"height\": 28,\n      \"seed\": 1098831051,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"\\\"abcd...\\\"\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"oUfTPCoNOMOVUScypl9ov\",\n      \"originalText\": \"\\\"abcd...\\\"\"\n    },\n    {\n      \"id\": \"2-4BatkaFqKxOF9ikfE9M\",\n      \"type\": \"arrow\",\n      \"x\": 937.5714285714287,\n      \"y\": 296.8571428571429,\n      \"width\": 97,\n      \"height\": 2,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 2010023531,\n      \"version\": 543,\n      \"versionNonce\": 1848018917,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1662257477407,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          41,\n          1\n        ],\n        [\n          97,\n          -1\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"BmVwp90EOf01pxoCqayka\",\n        \"focus\": 0.0021287919105907396,\n        \"gap\": 1.5\n      },\n      \"endBinding\": {\n        \"elementId\": \"oUfTPCoNOMOVUScypl9ov\",\n        \"focus\": 0.05585205610314286,\n        \"gap\": 3.5285491921035828\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 751,\n      \"versionNonce\": 235050155,\n      \"isDeleted\": false,\n      \"id\": \"Suj1TA3n75lniv8ZthhOy\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 780.5714285714287,\n      \"y\": 481.857142857143,\n      \"strokeColor\": \"#2b8a3e\",\n      \"backgroundColor\": \"#12b886\",\n      \"width\": 157,\n      \"height\": 42,\n      \"seed\": 1337311947,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"5l8sQoeycml7y43c3H6j4\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"wIo5IjqjKx5agDWM2U6y9\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"5l8sQoeycml7y43c3H6j4\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"XNzXS4nhlngVv4LqrpGWH\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"5l8sQoeycml7y43c3H6j4\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 707,\n      \"versionNonce\": 949919621,\n      \"isDeleted\": false,\n      \"id\": \"5l8sQoeycml7y43c3H6j4\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 785.5714285714287,\n      \"y\": 488.8571428571431,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 147,\n      \"height\": 28,\n      \"seed\": 26534757,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"DensePtr\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"Suj1TA3n75lniv8ZthhOy\",\n      \"originalText\": \"DensePtr\"\n    },\n    {\n      \"type\": \"ellipse\",\n      \"version\": 1601,\n      \"versionNonce\": 822765285,\n      \"isDeleted\": false,\n      \"id\": \"e0Z3-_Eg_DtzWKAJ00uZx\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1039.5714285714287,\n      \"y\": 470.357142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 151,\n      \"height\": 65,\n      \"seed\": 959000939,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"_Bita6RwDhub4HiG4-vAe\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"ZD_EGEh1PSlEhdhmPUGm3\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"_Bita6RwDhub4HiG4-vAe\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"_Bita6RwDhub4HiG4-vAe\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"PtndVbqi061kx-2QVmX9B\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"_Bita6RwDhub4HiG4-vAe\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"XNzXS4nhlngVv4LqrpGWH\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"_Bita6RwDhub4HiG4-vAe\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 1537,\n      \"versionNonce\": 140486123,\n      \"isDeleted\": false,\n      \"id\": \"_Bita6RwDhub4HiG4-vAe\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1044.5714285714287,\n      \"y\": 488.8571428571431,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 141,\n      \"height\": 28,\n      \"seed\": 776810181,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"\\\"abcd...\\\"\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"e0Z3-_Eg_DtzWKAJ00uZx\",\n      \"originalText\": \"\\\"abcd...\\\"\"\n    },\n    {\n      \"type\": \"arrow\",\n      \"version\": 1219,\n      \"versionNonce\": 1379287877,\n      \"isDeleted\": false,\n      \"id\": \"XNzXS4nhlngVv4LqrpGWH\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 939.0714285714287,\n      \"y\": 504.8571428571431,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 97,\n      \"height\": 2,\n      \"seed\": 192269323,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"boundElements\": [],\n      \"updated\": 1662257477408,\n      \"link\": null,\n      \"locked\": false,\n      \"startBinding\": {\n        \"elementId\": \"Suj1TA3n75lniv8ZthhOy\",\n        \"focus\": 0.002128791910595701,\n        \"gap\": 1.5\n      },\n      \"endBinding\": {\n        \"elementId\": \"e0Z3-_Eg_DtzWKAJ00uZx\",\n        \"focus\": 0.055852056103139376,\n        \"gap\": 3.5285491921035685\n      },\n      \"lastCommittedPoint\": null,\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\",\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          41,\n          1\n        ],\n        [\n          97,\n          -1\n        ]\n      ]\n    },\n    {\n      \"id\": \"RGj3Y6CtyijvehUeHVywF\",\n      \"type\": \"text\",\n      \"x\": 749.5714285714287,\n      \"y\": 560.857142857143,\n      \"width\": 474,\n      \"height\": 46,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#12b886\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 130163083,\n      \"version\": 363,\n      \"versionNonce\": 1546629541,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"text\": \"Chain With Multiple Entries\",\n      \"fontSize\": 36,\n      \"fontFamily\": 1,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"baseline\": 32,\n      \"containerId\": null,\n      \"originalText\": \"Chain With Multiple Entries\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 696,\n      \"versionNonce\": 85550891,\n      \"isDeleted\": false,\n      \"id\": \"1S3pVzBUuYFr-RbDX1FXv\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 536.0714285714287,\n      \"y\": 747.857142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 157,\n      \"height\": 42,\n      \"seed\": 715872619,\n      \"groupIds\": [\n        \"kaOwSxozJCF4g6QcHxA1q\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"type\": \"text\",\n          \"id\": \"P0w2r72h8lTF4KC0S8iK0\"\n        },\n        {\n          \"id\": \"h4EkHYMe6b4cxIpFk2aJ1\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 695,\n      \"versionNonce\": 1599687115,\n      \"isDeleted\": false,\n      \"id\": \"CdJtzp6w0n0rveWC1BWuQ\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 535.0714285714287,\n      \"y\": 804.857142857143,\n      \"strokeColor\": \"#364fc7\",\n      \"backgroundColor\": \"#4c6ef5\",\n      \"width\": 157,\n      \"height\": 42,\n      \"seed\": 582081413,\n      \"groupIds\": [\n        \"kaOwSxozJCF4g6QcHxA1q\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"type\": \"text\",\n          \"id\": \"LFc4k25ArlLXtoygEGUU6\"\n        },\n        {\n          \"id\": \"iKBu85WHY4IL_69QEvdyT\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"id\": \"9mWjCy5sUe-mID6u6k7Ll\",\n      \"type\": \"rectangle\",\n      \"x\": 519.5714285714287,\n      \"y\": 703.857142857143,\n      \"width\": 188,\n      \"height\": 159,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [\n        \"kaOwSxozJCF4g6QcHxA1q\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 1146093355,\n      \"version\": 582,\n      \"versionNonce\": 952359019,\n      \"isDeleted\": false,\n      \"boundElements\": [\n        {\n          \"id\": \"wIo5IjqjKx5agDWM2U6y9\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"5d6SPvHw2keIDl-5kNmEb\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"2HOa22It8IfsktBdjpTwo\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"id\": \"LFc4k25ArlLXtoygEGUU6\",\n      \"type\": \"text\",\n      \"x\": 540.0714285714287,\n      \"y\": 811.857142857143,\n      \"width\": 147,\n      \"height\": 28,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [\n        \"kaOwSxozJCF4g6QcHxA1q\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 1252566219,\n      \"version\": 555,\n      \"versionNonce\": 790860555,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"text\": \"DenseLinkKey\",\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"baseline\": 19,\n      \"containerId\": \"CdJtzp6w0n0rveWC1BWuQ\",\n      \"originalText\": \"DenseLinkKey\"\n    },\n    {\n      \"id\": \"P0w2r72h8lTF4KC0S8iK0\",\n      \"type\": \"text\",\n      \"x\": 541.0714285714287,\n      \"y\": 754.857142857143,\n      \"width\": 147,\n      \"height\": 28,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [\n        \"kaOwSxozJCF4g6QcHxA1q\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 1221832197,\n      \"version\": 556,\n      \"versionNonce\": 1960523557,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"text\": \"DensePtr\",\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"baseline\": 19,\n      \"containerId\": \"1S3pVzBUuYFr-RbDX1FXv\",\n      \"originalText\": \"DensePtr\"\n    },\n    {\n      \"id\": \"2HOa22It8IfsktBdjpTwo\",\n      \"type\": \"text\",\n      \"x\": 524.5714285714287,\n      \"y\": 708.857142857143,\n      \"width\": 178,\n      \"height\": 28,\n      \"angle\": 0,\n      \"strokeColor\": \"#a61e4d\",\n      \"backgroundColor\": \"#12b886\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [\n        \"kaOwSxozJCF4g6QcHxA1q\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 971803051,\n      \"version\": 325,\n      \"versionNonce\": 1123219883,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"text\": \"DenseLinkKey\",\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"baseline\": 19,\n      \"containerId\": \"9mWjCy5sUe-mID6u6k7Ll\",\n      \"originalText\": \"DenseLinkKey\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 877,\n      \"versionNonce\": 1427311237,\n      \"isDeleted\": false,\n      \"id\": \"RyzbgdtiyAgDl_Gg-xKD6\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1434.5714285714287,\n      \"y\": 745.107142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 157,\n      \"height\": 42,\n      \"seed\": 1200981765,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"K8fVzXRPoMTnOm4BIQdpC\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"K8fVzXRPoMTnOm4BIQdpC\"\n        },\n        {\n          \"id\": \"ZD_EGEh1PSlEhdhmPUGm3\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 876,\n      \"versionNonce\": 1849346795,\n      \"isDeleted\": false,\n      \"id\": \"LUhivcEGaeW_fHoMkT5PY\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1433.5714285714287,\n      \"y\": 802.107142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 157,\n      \"height\": 42,\n      \"seed\": 1269700555,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"d68fKPrkXvutq5CIgpKu0\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"d68fKPrkXvutq5CIgpKu0\"\n        },\n        {\n          \"id\": \"PtndVbqi061kx-2QVmX9B\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 767,\n      \"versionNonce\": 114818213,\n      \"isDeleted\": false,\n      \"id\": \"nqXx_jG0SMox2AHT2L5F2\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1418.0714285714287,\n      \"y\": 701.107142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 188,\n      \"height\": 159,\n      \"seed\": 532176485,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"wIo5IjqjKx5agDWM2U6y9\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"5d6SPvHw2keIDl-5kNmEb\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"P46vozsH8hY3lX5pMtPk8\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"P46vozsH8hY3lX5pMtPk8\"\n        },\n        {\n          \"id\": \"CVx1AqnNI76hVX9-ObrtA\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 745,\n      \"versionNonce\": 1071377733,\n      \"isDeleted\": false,\n      \"id\": \"d68fKPrkXvutq5CIgpKu0\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1438.5714285714287,\n      \"y\": 809.107142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 147,\n      \"height\": 28,\n      \"seed\": 491710059,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"DensePtr\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"LUhivcEGaeW_fHoMkT5PY\",\n      \"originalText\": \"DensePtr\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 736,\n      \"versionNonce\": 22842443,\n      \"isDeleted\": false,\n      \"id\": \"K8fVzXRPoMTnOm4BIQdpC\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1439.5714285714287,\n      \"y\": 752.107142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 147,\n      \"height\": 28,\n      \"seed\": 1346980293,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"DensePtr\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"RyzbgdtiyAgDl_Gg-xKD6\",\n      \"originalText\": \"DensePtr\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 505,\n      \"versionNonce\": 1882147883,\n      \"isDeleted\": false,\n      \"id\": \"P46vozsH8hY3lX5pMtPk8\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1423.0714285714287,\n      \"y\": 706.107142857143,\n      \"strokeColor\": \"#a61e4d\",\n      \"backgroundColor\": \"#12b886\",\n      \"width\": 178,\n      \"height\": 28,\n      \"seed\": 723312907,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"DenseLinkKey\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": \"nqXx_jG0SMox2AHT2L5F2\",\n      \"originalText\": \"DenseLinkKey\"\n    },\n    {\n      \"id\": \"h4EkHYMe6b4cxIpFk2aJ1\",\n      \"type\": \"arrow\",\n      \"x\": 698.5714285714287,\n      \"y\": 769.857142857143,\n      \"width\": 108,\n      \"height\": 43,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 313455205,\n      \"version\": 995,\n      \"versionNonce\": 1199018661,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1662257477410,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          67,\n          -2.5\n        ],\n        [\n          108,\n          -43\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"1S3pVzBUuYFr-RbDX1FXv\",\n        \"focus\": 0.17277405270544205,\n        \"gap\": 5.5\n      },\n      \"endBinding\": {\n        \"elementId\": \"6iemTDX54UBvWAow6YZUm\",\n        \"focus\": 0.37288545736724105,\n        \"gap\": 1\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 735,\n      \"versionNonce\": 1684199269,\n      \"isDeleted\": false,\n      \"id\": \"C55jJitM19fp12H5lRwCI\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 798.5714285714287,\n      \"y\": 170.8571428571429,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#12b886\",\n      \"width\": 374,\n      \"height\": 46,\n      \"seed\": 22775717,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 36,\n      \"fontFamily\": 1,\n      \"text\": \"Chain With One Entry\",\n      \"baseline\": 32,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": null,\n      \"originalText\": \"Chain With One Entry\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 957,\n      \"versionNonce\": 694451563,\n      \"isDeleted\": false,\n      \"id\": \"hGFgRua4wpyTtp4D694Ud\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 726.5714285714287,\n      \"y\": 392.8571428571429,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#12b886\",\n      \"width\": 518,\n      \"height\": 46,\n      \"seed\": 840351749,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 36,\n      \"fontFamily\": 1,\n      \"text\": \"Chain With a Displaced Entry\",\n      \"baseline\": 32,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": null,\n      \"originalText\": \"Chain With a Displaced Entry\"\n    },\n    {\n      \"type\": \"ellipse\",\n      \"version\": 766,\n      \"versionNonce\": 1799135941,\n      \"isDeleted\": false,\n      \"id\": \"LGXZp6X5oRRKg9gzSJIcd\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1229.8214285714287,\n      \"y\": 669.982142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 151,\n      \"height\": 65,\n      \"seed\": 1902833605,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"uxrkX0MrXRysMixOCLf86\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"sGH1mRBDDfdaZOORzbU1h\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"uxrkX0MrXRysMixOCLf86\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 706,\n      \"versionNonce\": 1852282891,\n      \"isDeleted\": false,\n      \"id\": \"uxrkX0MrXRysMixOCLf86\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1234.8214285714287,\n      \"y\": 688.482142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 141,\n      \"height\": 28,\n      \"seed\": 1657838347,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"\\\"abcd...\\\"\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"LGXZp6X5oRRKg9gzSJIcd\",\n      \"originalText\": \"\\\"abcd...\\\"\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 848,\n      \"versionNonce\": 1701732011,\n      \"isDeleted\": false,\n      \"id\": \"6SF7SEj50JLrJxpeJopdp\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 980.3214285714287,\n      \"y\": 746.982142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 157,\n      \"height\": 42,\n      \"seed\": 368070501,\n      \"groupIds\": [\n        \"7648kMiz63bJLV7GO8sve\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"rmScJhxvevICKMmx6PYQF\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"sGH1mRBDDfdaZOORzbU1h\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"rmScJhxvevICKMmx6PYQF\"\n        },\n        {\n          \"id\": \"iKBu85WHY4IL_69QEvdyT\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 844,\n      \"versionNonce\": 1763922251,\n      \"isDeleted\": false,\n      \"id\": \"Nz45mnUTSGpaOgsVNIEr-\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 979.3214285714287,\n      \"y\": 803.982142857143,\n      \"strokeColor\": \"#364fc7\",\n      \"backgroundColor\": \"#4c6ef5\",\n      \"width\": 157,\n      \"height\": 42,\n      \"seed\": 998933867,\n      \"groupIds\": [\n        \"7648kMiz63bJLV7GO8sve\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"dkP-6jzTOX9bdt8GCvOJw\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"CVx1AqnNI76hVX9-ObrtA\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"dkP-6jzTOX9bdt8GCvOJw\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 734,\n      \"versionNonce\": 450949099,\n      \"isDeleted\": false,\n      \"id\": \"QvUMauaFoUm7amxqdJy2z\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 963.8214285714287,\n      \"y\": 702.982142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 188,\n      \"height\": 159,\n      \"seed\": 2086345413,\n      \"groupIds\": [\n        \"7648kMiz63bJLV7GO8sve\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"wIo5IjqjKx5agDWM2U6y9\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"5d6SPvHw2keIDl-5kNmEb\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"sEfRctJpRk7foZK9c0IAH\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"sEfRctJpRk7foZK9c0IAH\"\n        },\n        {\n          \"id\": \"iKBu85WHY4IL_69QEvdyT\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 705,\n      \"versionNonce\": 212974219,\n      \"isDeleted\": false,\n      \"id\": \"dkP-6jzTOX9bdt8GCvOJw\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 984.3214285714287,\n      \"y\": 810.982142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 147,\n      \"height\": 28,\n      \"seed\": 1586274315,\n      \"groupIds\": [\n        \"7648kMiz63bJLV7GO8sve\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"DenseLinkKey\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"Nz45mnUTSGpaOgsVNIEr-\",\n      \"originalText\": \"DenseLinkKey\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 706,\n      \"versionNonce\": 474302373,\n      \"isDeleted\": false,\n      \"id\": \"rmScJhxvevICKMmx6PYQF\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 985.3214285714287,\n      \"y\": 753.982142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 147,\n      \"height\": 28,\n      \"seed\": 370506277,\n      \"groupIds\": [\n        \"7648kMiz63bJLV7GO8sve\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"DensePtr\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"6SF7SEj50JLrJxpeJopdp\",\n      \"originalText\": \"DensePtr\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 475,\n      \"versionNonce\": 726675755,\n      \"isDeleted\": false,\n      \"id\": \"sEfRctJpRk7foZK9c0IAH\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 968.8214285714287,\n      \"y\": 707.982142857143,\n      \"strokeColor\": \"#a61e4d\",\n      \"backgroundColor\": \"#12b886\",\n      \"width\": 178,\n      \"height\": 28,\n      \"seed\": 464676523,\n      \"groupIds\": [\n        \"7648kMiz63bJLV7GO8sve\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1662257477283,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"DenseLinkKey\",\n      \"baseline\": 19,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": \"QvUMauaFoUm7amxqdJy2z\",\n      \"originalText\": \"DenseLinkKey\"\n    },\n    {\n      \"type\": \"arrow\",\n      \"version\": 1465,\n      \"versionNonce\": 1519908357,\n      \"isDeleted\": false,\n      \"id\": \"sGH1mRBDDfdaZOORzbU1h\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1142.8214285714287,\n      \"y\": 768.982142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 108,\n      \"height\": 43,\n      \"seed\": 1144014277,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"boundElements\": [],\n      \"updated\": 1662257477411,\n      \"link\": null,\n      \"locked\": false,\n      \"startBinding\": {\n        \"elementId\": \"6SF7SEj50JLrJxpeJopdp\",\n        \"focus\": 0.17277405270544205,\n        \"gap\": 5.5\n      },\n      \"endBinding\": {\n        \"elementId\": \"LGXZp6X5oRRKg9gzSJIcd\",\n        \"focus\": 0.37288545736724105,\n        \"gap\": 1\n      },\n      \"lastCommittedPoint\": null,\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\",\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          67,\n          -2.5\n        ],\n        [\n          108,\n          -43\n        ]\n      ]\n    },\n    {\n      \"type\": \"arrow\",\n      \"version\": 1414,\n      \"versionNonce\": 647294309,\n      \"isDeleted\": false,\n      \"id\": \"CVx1AqnNI76hVX9-ObrtA\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1141.8214285714287,\n      \"y\": 826.982142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 272,\n      \"height\": 42,\n      \"seed\": 171156747,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"boundElements\": [],\n      \"updated\": 1662257477411,\n      \"link\": null,\n      \"locked\": false,\n      \"startBinding\": {\n        \"elementId\": \"Nz45mnUTSGpaOgsVNIEr-\",\n        \"focus\": 0.5583475858439679,\n        \"gap\": 5.5\n      },\n      \"endBinding\": {\n        \"elementId\": \"nqXx_jG0SMox2AHT2L5F2\",\n        \"focus\": 0.06888696200536025,\n        \"gap\": 4.25\n      },\n      \"lastCommittedPoint\": null,\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\",\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          95,\n          -23\n        ],\n        [\n          272,\n          -42\n        ]\n      ]\n    },\n    {\n      \"type\": \"arrow\",\n      \"version\": 1469,\n      \"versionNonce\": 180091077,\n      \"isDeleted\": false,\n      \"id\": \"iKBu85WHY4IL_69QEvdyT\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 693.5714285714287,\n      \"y\": 826.857142857143,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 269,\n      \"height\": 43,\n      \"seed\": 1191246795,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"boundElements\": [],\n      \"updated\": 1662257477412,\n      \"link\": null,\n      \"locked\": false,\n      \"startBinding\": {\n        \"elementId\": \"CdJtzp6w0n0rveWC1BWuQ\",\n        \"focus\": 0.5091435337455598,\n        \"gap\": 1.5\n      },\n      \"endBinding\": {\n        \"elementId\": \"QvUMauaFoUm7amxqdJy2z\",\n        \"focus\": 0.10601094635015593,\n        \"gap\": 1.25\n      },\n      \"lastCommittedPoint\": null,\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\",\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          95,\n          -23\n        ],\n        [\n          269,\n          -43\n        ]\n      ]\n    }\n  ],\n  \"appState\": {\n    \"gridSize\": null,\n    \"viewBackgroundColor\": \"#ffffff\"\n  },\n  \"files\": {}\n}"
  },
  {
    "path": "docs/dense_set.md",
    "content": "# DenseSet in Dragonfly\n\n`DenseSet` uses [classic hashtable with separate chaining](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining) similar to the Redis dictionary for lookup of items within the set.\n\nThe main optimization present in `DenseSet` is the ability for a pointer to **point to either an object or a link key**, removing the need to allocate a set entry for every entry. This is accomplished by using [pointer tagging](https://en.wikipedia.org/wiki/Tagged_pointer) exploiting the fact that the top 12 bits of any userspace address are not used and can be set to indicate if the current pointer points to nothing, a link key, or an object.\n\nThe following is what each bit in a pointer is used for\n\n| Bit Index (from LSB) | Meaning |\n| -------------------- |-------- |\n|       0 - 52         | Memory address of data in the userspace |\n|          53          | Indicates if this `DensePtr` points to data stored in the `DenseSet` or the next link in a chain |\n|          54          | Displacement bit. Indicates if the current entry is in the correct list defined by the data's hash |\n|          55          | Direction displaced, this only has meaning if the Displacement bit is set. 0 indicates the entry is to the left of its correct list, 1 indicates it is to the right of the correct list. |\n|       56 - 63        | Unused |\n\nFurther, to reduce collisions items may be inserted into neighbors of the home chain (the chain determined by the hash) that are empty to reduce the number of unused spaces. These entries are then marked as displaced using pointer tagging.\n\nAn example of possible bucket configurations can be seen below.\n\n![Dense Set Visualization](./dense_set.svg) *Created using [excalidraw](https://excalidraw.com)*\n\n### Insertion\nTo insert an entry a `DenseSet` will take the following steps:\n\n1. Check if the entry already exists in the set, if so return false\n2. If the entry does not exist look for an empty chain at the hash index ± 1, prioritizing the home chain. If an empty entry is found the item will be inserted and return true\n3. If step 2 fails and the growth prerequisites are met, increase the number of buckets in the table and repeat step 2\n4. If step 3 fails, attempt to insert the entry in the home chain.\n    - If the home chain is not occupied by a displaced entry insert the new entry in the front of the list\n    - If the home chain is occupied by a displaced entry move the displaced entry to its home chain. This may cause a domino effect if the home chain of the displaced entry is occupied by a second displaced entry, resulting in up to `O(N)` \"fixes\"\n\n### Searching\nTo find an entry in a `DenseSet`:\n\n1. Check the first entry in the home and neighbour cells for matching entries\n2. If step 1 fails iterate the home chain of the searched entry and check for equality\n\n### Pending Improvements\nSome further improvements to `DenseSet` include allowing entries to be inserted in their home chain without having to perform the current `O(N)` steps to fix displaced entries. By inserting an entry in their home chain after the displaced entry instead of fixing up displaced entries, searching incurs minimal added overhead and there is no domino effect in inserting a new entry. To move a displaced entry to its home chain eventually multiple heuristics may be implemented including:\n\n- When an entry is erased if the chain becomes empty and there is a displaced entry in the neighbor chains move it to the now empty home chain\n- If a displaced entry is found as a result of a search and is the root of a chain with multiple entries, the displaced node should be moved to its home bucket\n\n\n## Benchmarks\n\nAt 100% utilization the Redis dictionary implementation uses approximately 32 bytes per record ([read the breakdown for more information](./dashtable.md#redis-dictionary))\n\nIn comparison using the neighbour cell optimization, `DenseSet` has ~21% of spaces unused at full utilization resulting in $N\\*8 + 0.2\\*16N \\approx 11.2N$ or ~12 bytes per record, yielding ~20 byte savings. The number of bytes per record saved grows as utilization decreases.\n\nCommand `memtier_benchmark -p 6379 --command \"sadd __key__ __data__\"   -n 10000000 --threads=1 -c 1 --command-key-pattern=R   --data-size=10     --key-prefix=\"key:\"  --hide-histogram --random-data --key-maximum=1 --randomize --pipeline 20`\nproduces two sets entries with lots of small records in them.\n\nThis is how memory usage looks like with DenseSet:\n\n| Server                | Memory (RSS) |\n|:---------------------:|:------:      |\n| Dragonfly/DenseSet    |  323MB 🟩    |\n| Redis                 |  586MB       |\n| Dragonfly/RedisDict   |  663MB       |\n"
  },
  {
    "path": "docs/df-share-nothing.md",
    "content": "# Dragonfly Architecture\n\nDragonfly is a modern replacement for memory stores like Redis and Memcached. It scales vertically on a single instance to support millions of requests per second. It is more memory efficient, has been designed with reliability in mind, and includes a better caching design.\n\n## Threading model\n\nDragonfly uses a single process with a multiple-thread architecture. Each Dragonfly thread is indirectly assigned several responsibilities via fibers.\n\nOne such responsibility is handling incoming connections. Once a socket listener accepts a client connection, the connection spends its entire lifetime bound to a single thread inside a fiber. Dragonfly is written to be 100% non-blocking; it uses fibers to provide asynchronicity in each thread. One of the essential properties of asynchronicity is that a thread cannot be blocked as long as it has pending CPU tasks. Dragonfly preserves this property by wrapping each unit of execution context in a fiber; we wrap units of execution that can potentially be blocked on I/O. For example, a connection loop runs within a fiber; a function that writes a snapshot runs inside a fiber, and so on.\n\nAs a side comment - asynchronicity and parallelism are different terms. Nodejs, for example, provides asynchronous execution but is single-threaded. Similarly, each Dragonfly thread is asynchronous on its own; therefore, Dragonfly is responsive to incoming events even when it handles long-running commands like saving to disk or running Lua scripts.\n\n\n### Thread actors in DF\n\nThe DF in-memory database is sharded into `N` parts, where `N` is less or equal to the number of threads in the system. Each database shard is owned and accessed by a single thread.\nThe same thread can handle TCP connections and simultaneously host a database shard.\nSee the diagram below.\n\n\n<br>\n<img src=\"http://static.dragonflydb.io/repo-assets/thread-per-core.svg\" border=\"0\"/>\n\nHere, our DF process spawns 4 threads, where threads 1 through 3 handle I/O (i.e., manage client connections) and threads 2 through 4 manage DB shards. Thread 2, for example, divides its CPU time between handling incoming requests and processing DB operations on the shard it owns.\n\nSo when we say that thread 1 is an I/O thread, we mean that Dragonfly can pin fibers that manage client connections to thread 1. In general, any thread can have many responsibilities that require CPU time; database management and connection handling are only two of those responsibilities.\n\n\n## Fibers\n\nI suggest reading my [intro post](https://www.romange.com/2018/12/15/introduction-to-fibers-in-c-/) about `Boost.Fibers` to learn more about fibers.\n\nBy the way, I want to compliment `Boost.Fibers` library–it has been exceptionally well designed:\nit's unintrusive, lightweight, and efficient. Moreover, its default scheduler can be overridden. In the case of `helio`, the I/O library that powers Dragonfly, we overrode the `Boost.Fibers` scheduler to support shared-nothing architecture and integrate it with the I/O polling loop.\n\nImportantly, fibers require bottom-up support in the application layer to preserve their asynchronicity. For example, in the snippet below, a blocking write into `fd` won't magically allow a fiber to preempt and switch to another fiber. No, the whole thread will be blocked.\n\n\n```cpp\n...\nwrite(fd, buf, 1000000);\n\n...\npthread_mutex_lock(...);\n\n```\n\nSimilarly, with a `pthread_mutex_lock` call, the whole thread might be blocked, wasting precious CPU time.. Therefore, the Dragonfly code uses *fiber-friendly* primitives for I/O, communication, and coordination. These primitives are supplied by the `helio` and `Boost.Fibers` libraries.\n\n## Life of a command request\n\nThis section explains how Dragonfly handles a command in the context of shared-nothing architecture. In most architectures used today, multi-threaded servers use mutex locks to protect their data structures, but Dragonfly does not. Why is this?\n\nInter-thread interactions in Dragonfly occur only via passing messages from thread to thread. For example, consider the following sequence diagram of handling a SET request:\n\n\n```uml\n@startuml\n\nactor       User       as A1\nboundary    connection  as B1\nentity      \"Shard K\"   as E1\nA1 ->  B1 : SET KEY VAL\nB1 -> E1 : SET KEY VAL / k = HASH(KEY) % N\nE1 -> B1 : OK\nB1 -> A1 : Response\n\n@enduml\n```\n\n<img src=\"https://www.plantuml.com/plantuml/svg/NOn12m8X48Nl_eh7Gb272Az1WGl2Wb6G5NGqLsW9PaBjqBzlL-lId6Q-zxvnFdD4dNCAlzKbA2bk_ABUnJS0U2OAFWzC9Msb29I7N3AWiNSNUvYckbeA9R7SOknX3QjFCFgAYzg9jd3zXx720njqodRp4IqmmrxegLe_7CnNLDDr3Ed9bC87\"/>\n\nHere, a connection fiber resides in a thread different from one that handles the `KEY` entity. We use hashing to decide which shard owns which key.\n\nAnother way to think of this flow is that a connection fiber serves as a coordinator for issuing transactional commands to other threads. In this simple example, the external \"SET\" command requires a single message passed from the coordinator to the destination shard thread. When we think of the Dragonfly model in the context of a single command request, I prefer to use the following diagram instead of the [one above](#thread-actors-in-df).\n\n<br>\n<img src=\"http://static.dragonflydb.io/repo-assets/coordinator.svg\" border=\"0\"/>\n\nHere, a coordinator (or connection fiber) might even reside on one of the threads that coincidently owns one of the shards. However, it is easier to think of it as a separate entity that never directly accesses any shard data.\n\nThe coordinator serves as a virtualization layer that hides all the complexity of talking to multiple shards. It employs start-of-the-art algorithms to provide atomicity (and strict serializability) semantics for multi-key commands like \"mset, mget, and blpop.\" It also offers strict serializability for Lua scripts and multi-command transactions.\n\nHiding such complexity is valuable to the end customer, but it comes with some CPU and latency costs. We believe the trade-off is worthwhile given the value that Dragonfly provides.\n\nIf you want to deep dive into Dragonfly architecture without the complexities of transactional code, it's worth checking [Midi Redis](https://github.com/romange/midi-redis/),\nwhich implements a toy backend supporting `PING`, `SET`, and `GET` [commands](https://github.com/romange/midi-redis/blob/main/server/main_service.cc#L239).\n\nIn fact, Dragonfly grew from that project; they share a common commit history.\n\nBy the way, to learn how to build even simpler TCP backends than `midi-redis`, `helio` library provides sample backends like these: [echo_server](https://github.com/romange/helio/blob/master/examples/echo_server.cc) and [ping_iouring_server.cc](https://github.com/romange/helio/blob/master/examples/pingserver/ping_iouring_server.cc). These backends reach millions of QPS on multi-core servers much like Dragonfly and midi-redis do.\n"
  },
  {
    "path": "docs/differences.md",
    "content": "# Differences with Redis\n\n## String lengths, indices.\n\nString sizes are limited to 256MB.\nIndices (say in GETRANGE and SETRANGE commands) should be signed 32 bit integers in range\n[-2147483647, 2147483648].\n\n### String handling.\n\nSORT does not take any locale into account.\n\n## Expiry ranges.\nExpirations are limited to 8 years. For commands with millisecond precision like PEXPIRE or PSETEX,\nexpirations greater than 2^28ms are quietly rounded to the nearest second losing precision of less than 0.001%.\n\n## Lua\nWe use lua 5.4.4 that has been released in 2022.\nThat means we also support [lua integers](https://github.com/redis/redis/issues/5261).\n"
  },
  {
    "path": "docs/faq.md",
    "content": "# Dragonfly Frequently Asked Questions\n\n- [Dragonfly Frequently Asked Questions](#dragonfly-frequently-asked-questions)\n  - [What is the license model of Dragonfly? Is it an open source?](#what-is-the-license-model-of-dragonfly-is-it-an-open-source)\n  - [Can I use dragonfly in production?](#can-i-use-dragonfly-in-production)\n  - [We benchmarked Dragonfly and we have not reached 4M qps throughput as you advertised.](#we-benchmarked-dragonfly-and-we-have-not-reached-4m-qps-throughput-as-you-advertised)\n  - [Dragonfly provides vertical scale, but we can achieve similar throughput with X nodes in a Redis cluster.](#dragonfly-provides-vertical-scale-but-we-can-achieve-similar-throughput-with-x-nodes-in-a-redis-cluster)\n  - [If only Dragonfly had this command I would use it for sure](#if-only-dragonfly-had-this-command-i-would-use-it-for-sure)\n\n\n## What is the license model of Dragonfly? Is it an open source?\nDragonfly is released under [BSL 1.1](../LICENSE.md) (Business Source License).\nBSL 1.1 is considered to be \"source available\" license and it's not strictly open-source license.\nWe believe that a [BSL 1.1](https://spdx.org/licenses/BUSL-1.1.html) license is more permissive\nthan licenses like AGPL, and it will allow us to\nprovide a competitive commercial service using our technology. In general terms,\nit means that Dragonfly's code is free to use and free to change as long as you do not sell services directly related to\nDragonfly or in-memory datastores.\nWe followed the trend of other technological companies like Elastic, Redis, MongoDB, Cockroach labs,\nRedpanda Data to protect our rights to provide service and support for the software we are building.\n\n## Can I use dragonfly in production?\nLicense wise you are free to use dragonfly in your production as long as you do not provide Dragonfly as a managed service.\nFrom a code maturity point of view, Dragonfly's code is covered with unit testing and the regression tests.\nHowever as with any new software there are use cases that are hard to test and predict.\nWe advise you to run your own particular use case on dragonfly for a few days before considering production usage.\n\n## We benchmarked Dragonfly and we have not reached 4M qps throughput as you advertised.\nWe conducted our experiments using a load-test generator called `memtier_benchmark`,\nand we run benchmarks on AWS network-enhanced instance `c6gn.16xlarge` on recent Linux kernel versions.\nDragonfly might reach smaller throughput on other instances, but we would\nstill expect to reach around 1M+ qps on instances with 16-32 vCPUs.\n\n## Dragonfly provides vertical scale, but we can achieve similar throughput with X nodes in a Redis cluster.\nDragonfly optimizes the use of underlying hardware, allowing it to run efficiently on instances as small as 8GB,\n and scale vertically to large 2TB machines with 128 cores. This versatility significantly\n reduces the complexity of running cluster workloads on a single node, saving hardware resources and costs.\n More importantly, it diminishes the total cost\n of ownership associated with managing multi-node clusters. In contrast, Redis in cluster\n mode imposes limitations on multi-key and transactional operations, whereas Dragonfly maintains\n the same semantics as a single-node Redis system.\n Furthermore, scaling out horizontally with small instances can lead to instability\n in production environments.\n We believe that large-scale deployments of in-memory stores require both vertical and horizontal scaling,\n which is not efficiently achievable with an in-memory store like Redis.\n\n## If only Dragonfly had this command I would use it for sure\nDragonfly implements ~190 Redis commands which we think represent a good coverage of the market.\nHowever this is not based empirical data. Having said that, if you have commands that are not covered,\nplease feel free to open an issue for that or vote for an existing issue.\nWe will do our best to prioritise those commands according to their popularity.\n"
  },
  {
    "path": "docs/memcached_benchmark.md",
    "content": "Contention in memcached under the high write throughput.\n\n<img src=\"http://static.dragonflydb.io/repo-assets/memcached_perf_top.png\" width=\"100%\" border=\"0\"/>\n\nOverall CPU usage of memcached when performing SETS benchmark:\n\n<img src=\"http://static.dragonflydb.io/repo-assets/memcached_cpu_usage.png\" width=\"100%\" border=\"0\"/>\n\n"
  },
  {
    "path": "docs/memory_bgsave.tsv",
    "content": "Time\tDragonfly\tRedis\n4\t4738531328\t6819917824\n5\t4738637824\t6819917824\n6\t4738658304\t6819913728\n7\t4738777088\t6820589568\n8\t4738781184\t6820638720\n9\t4738768896\t6820769792\n10\t4738494464\t6820777984\n11\t4738756608\t6820683776\n12\t4740325376\t6820687872\n13\t4740243456\t6820691968\n14\t4740194304\t6820687872\n15\t4740194304\t7429746688\n16\t4740734976\t7942115328\n17\t4740370432\t8400957440\n18\t4740366336\t8863305728\n19\t4740390912\t9302515712\n20\t4740399104\t9697935360\n21\t4740423680\t10074103808\n22\t4748312576\t10362601472\n23\t4750438400\t10649939968\n24\t4750315520\t10926985216\n25\t4750426112\t11195555840\n26\t4750180352\t11444666368\n27\t4750417920\t11665764352\n28\t4750131200\t11872944128\n29\t4750233600\t12060946432\n30\t4750475264\t12232212480\n31\t\t12379299840\n32\t\t12521598976\n33\t\t12647915520\n34\t\t12756508672\n35\t\t12848570368\n36\t\t12944240640\n37\t\t13025046528\n38\t\t13105799168\n39\t\t13181427712\n40\t\t8000053248\n41\t\t7048486912\n42\t\t7048507392"
  },
  {
    "path": "docs/namespaces.md",
    "content": "# Namespaces in Dragonfly\n\nDragonfly added an _experimental_ feature, allowing complete separation of data by different users.\nWe call this feature _namespaces_, and it allows using a single Dragonfly server with multiple\ntenants, each using their own data, without being able to mix them together.\n\nNote that this feature can alternatively be achieved by having each user `SELECT` a different\n(numeric) database, or by asking that each user uses a unique prefix for their keys. This approach\nhas several disadvantages, like users forgetting to `SELECT` / use their prefix, accessing data\nlogically belonging to other users.\n\nThe advantage of using Namespaces is that data is completely isolated, and users cannot accidentally\nuse data they do not own. A user must authenticate in order to access the namespace it was assigned.\nAnd as a bonus, each namespace can have multiple databases, switched via `SELECT` like any regular\ndata store.\n\nHowever, before using this feature, please note that it is experimental. This means that:\n\n* Some features are not supported for non-default namespaces, such as replication and save to RDB\n* Some tools are missing, like breakdown of memory / load per namespace\n* We do not yet consider this production ready, and it might still have some uncovered bugs\n\nSo kindly use it at your own risk.\n\n## Usage\n\nThis section describes how, as a Dragonfly user / administrator, you could use namespaces.\n\nA namespace is identified by a unique string id, defined by the user / admin. Each Dragonfly user\nis associated with a single namespace. If not set explicitly, then the default namespace is used,\nwhich is the empty string id.\n\nMultiple users can use the same namespace if they are all assigned the same namespace id. This can\nallow, for example, creating a read-only user as well as a mutating user over the same data.\n\nTo associate user `user1` with the namespace `namespace1`, use the `ACL` command with the\n`NAMESPACE:namespace1` flag:\n\n```\nACL SETUSER user1 NAMESPACE:namespace1 ON >user_pass +@all ~*\n```\n\nThis sets / creates user `user`, using password `user_pass`, using namespace `namespace1`.\n\nFor more examples check out `tests/dragonfly/acl_family_test.py` - specifically the\n`test_namespaces` function.\n\n## Technical Details\n\nThis section describes how we _implemented_ namespaces in Dragonfly. It is meant to be used by those\nwho wish to contribute pull requests to Dragonfly.\n\nPrior to adding namespaces to Dragonfly, each _shard_ had a single `DbSlice` that it owned. They\nwere thread-local, global-scope instances.\n\nTo support namespaces, we created a `Namespace` class (see `src/server/namespaces.h`) which contains\na `vector<DbSlice>`, with a `DbSlice` per shard. When first used, a `Namespace` calls the engine\nshard set to initialize the array of `DbSlice`s.\n\nTo access all `Namespace`s, we also added a registry with the original name `Namespaces`. It is a\nglobal, thread safe class that allows accessing all registered namespaces, and registering new ones\non the fly. Note that, while it is thread safe, it shouldn't be a bottle neck because it is supposed\nto only be used during the authentication of a connection (or when adding new namespaces).\n\nWhen a new connection is authenticated with Dragonfly, we look up (and create, if needed) the\nnamespace it is associated with. We then save a `Namespace* ns` inside the `dfly::ConnectionContext`\nclass to associate the user with the namespaces. Because we removed the global `DbSlice` objects,\nthis is now the only way to access namespaces, which protects users from accessing unowned data.\n\nCurrently, we do not have any support for removing namespaces, so they hang in memory until the\nserver exits.\n"
  },
  {
    "path": "docs/quick-start/README.md",
    "content": "<p align=\"center\">\n  <a href=\"https://dragonflydb.io\">\n    <img src=\"https://raw.githubusercontent.com/dragonflydb/dragonfly/main/.github/images/logo-full.svg\"\n      width=\"284\" border=\"0\" alt=\"Dragonfly\">\n  </a>\n</p>\n\n\n# Quick Start\n\nStarting with `docker run` is the simplest way to get up and running with DragonflyDB.\n\nIf you do not have docker on your machine, [Install Docker](https://docs.docker.com/get-docker/) before continuing.\n\n## Step 1\n\n### On linux\n\n```bash\ndocker run --network=host --ulimit memlock=-1 docker.dragonflydb.io/dragonflydb/dragonfly\n```\n\n### On macOS\n\n_`network=host` doesn't work well on macOS, see [this issue](https://github.com/docker/for-mac/issues/1031)_\n\n```bash\ndocker run -p 6379:6379 --ulimit memlock=-1 docker.dragonflydb.io/dragonflydb/dragonfly\n```\n\nDragonfly DB will answer to both `http` and `redis` requests out of the box!\n\nYou can use `redis-cli` to connect to `localhost:6379` or open a browser and visit `http://localhost:6379`\n\n**NOTE**: On some configurations, running with the `docker run --privileged ...` flag can fix some\ninitialization errors.\n\n## Step 2\n\nConnect with a redis client\n\n```bash\nredis-cli\n127.0.0.1:6379> set hello world\nOK\n127.0.0.1:6379> keys *\n1) \"hello\"\n127.0.0.1:6379> get hello\n\"world\"\n127.0.0.1:6379>\n```\n\n## Step 3\n\nContinue being great and build your app with the power of DragonflyDB!\n\n## Known issues\n\n\n## More Build Options\n- [Docker Compose Deployment](/contrib/docker/)\n- [Kubernetes Deployment with Helm Chart](/contrib/charts/dragonfly/)\n- [Build From Source](/docs/build-from-source.md)\n"
  },
  {
    "path": "docs/rdbsave.excalidraw",
    "content": "{\n  \"type\": \"excalidraw\",\n  \"version\": 2,\n  \"source\": \"https://excalidraw.com\",\n  \"elements\": [\n    {\n      \"type\": \"rectangle\",\n      \"version\": 586,\n      \"versionNonce\": 345912761,\n      \"isDeleted\": false,\n      \"id\": \"BY5OdEEKT0Y_DTy9Zgr9C\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 654.7020016982203,\n      \"y\": 187.24519230769243,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 165,\n      \"height\": 199,\n      \"seed\": 1621471436,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"sIrssFTnnb9f1o26g1j88\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"sIrssFTnnb9f1o26g1j88\"\n        },\n        {\n          \"id\": \"1cq4mAkO92nzlk-wjAy0a\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1661620421120,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 514,\n      \"versionNonce\": 869523031,\n      \"isDeleted\": false,\n      \"id\": \"sIrssFTnnb9f1o26g1j88\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 659.7020016982203,\n      \"y\": 261.74519230769243,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 155,\n      \"height\": 50,\n      \"seed\": 711168500,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1661620421121,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"Thread-local\\nSnapshot 1\",\n      \"baseline\": 43,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"BY5OdEEKT0Y_DTy9Zgr9C\",\n      \"originalText\": \"Thread-local\\nSnapshot 1\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 622,\n      \"versionNonce\": 1016232663,\n      \"isDeleted\": false,\n      \"id\": \"OiDY20ES-4wBxFVAzHkHt\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 866.0673076923077,\n      \"y\": 187.24519230769243,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 165,\n      \"height\": 199,\n      \"seed\": 1937655639,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"nTSFevnRPYnvrSc57ZrgV\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"nTSFevnRPYnvrSc57ZrgV\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"nTSFevnRPYnvrSc57ZrgV\"\n        },\n        {\n          \"id\": \"NGMUGV32wJmpMyvB3YQTx\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1661620421121,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 539,\n      \"versionNonce\": 941214039,\n      \"isDeleted\": false,\n      \"id\": \"nTSFevnRPYnvrSc57ZrgV\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 871.0673076923077,\n      \"y\": 256.74519230769243,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 155,\n      \"height\": 60,\n      \"seed\": 1072545177,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1661620424002,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 23.932285237126536,\n      \"fontFamily\": 1,\n      \"text\": \"Thread-local\\nSnapshot 2\",\n      \"baseline\": 51,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"OiDY20ES-4wBxFVAzHkHt\",\n      \"originalText\": \"Thread-local\\nSnapshot 2\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 608,\n      \"versionNonce\": 1548421111,\n      \"isDeleted\": false,\n      \"id\": \"0DuGwtSiWQDXGbVDx_Yq4\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1085.2980769230767,\n      \"y\": 187.24519230769243,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 165,\n      \"height\": 199,\n      \"seed\": 1695403735,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"dcrIif4WgKLztfzWXXskR\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"dcrIif4WgKLztfzWXXskR\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"dcrIif4WgKLztfzWXXskR\"\n        },\n        {\n          \"id\": \"hgq3HgiDoEU1A13Sax2A5\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1661620421121,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 530,\n      \"versionNonce\": 667080441,\n      \"isDeleted\": false,\n      \"id\": \"dcrIif4WgKLztfzWXXskR\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1090.2980769230767,\n      \"y\": 256.74519230769243,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 155,\n      \"height\": 60,\n      \"seed\": 379350553,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 23.932285237126536,\n      \"fontFamily\": 1,\n      \"text\": \"Thread-local\\nSnapshot 3\",\n      \"baseline\": 51,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"0DuGwtSiWQDXGbVDx_Yq4\",\n      \"originalText\": \"Thread-local\\nSnapshot 3\"\n    },\n    {\n      \"id\": \"577abnzpQuxk_hrNgIMkV\",\n      \"type\": \"diamond\",\n      \"x\": 689.3365384615385,\n      \"y\": 437.86057692307713,\n      \"width\": 92,\n      \"height\": 157,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#12b886\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 520181305,\n      \"version\": 125,\n      \"versionNonce\": 1270149399,\n      \"isDeleted\": false,\n      \"boundElements\": [\n        {\n          \"id\": \"1cq4mAkO92nzlk-wjAy0a\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"YWzMoutOj3POKIhzoAb6q\"\n        },\n        {\n          \"id\": \"HjlV2QEoKO1Najg9D1xnm\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"id\": \"1cq4mAkO92nzlk-wjAy0a\",\n      \"type\": \"arrow\",\n      \"x\": 728.5673076923077,\n      \"y\": 395.9759615384616,\n      \"width\": 32.307692307692264,\n      \"height\": 36.04730445962048,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 2032795417,\n      \"version\": 139,\n      \"versionNonce\": 1145353783,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          32.307692307692264,\n          11.538461538461547\n        ],\n        [\n          9.869210911479854,\n          36.04730445962048\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"BY5OdEEKT0Y_DTy9Zgr9C\",\n        \"focus\": 0.8708968370314767,\n        \"gap\": 9.73076923076917\n      },\n      \"endBinding\": {\n        \"elementId\": \"577abnzpQuxk_hrNgIMkV\",\n        \"focus\": -1.6111525113388454,\n        \"gap\": 5.625821498015291\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"id\": \"YWzMoutOj3POKIhzoAb6q\",\n      \"type\": \"text\",\n      \"x\": 694.3365384615385,\n      \"y\": 498.36057692307713,\n      \"width\": 82,\n      \"height\": 36,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 381921847,\n      \"version\": 39,\n      \"versionNonce\": 405941433,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false,\n      \"text\": \"Rdb\\nSerializer\",\n      \"fontSize\": 16,\n      \"fontFamily\": 2,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"baseline\": 32,\n      \"containerId\": \"577abnzpQuxk_hrNgIMkV\",\n      \"originalText\": \"Rdb\\nSerializer\"\n    },\n    {\n      \"id\": \"Ig1qNk-AOw_VTS_xlELs5\",\n      \"type\": \"rectangle\",\n      \"x\": 717.798076923077,\n      \"y\": 641.3605769230771,\n      \"width\": 477,\n      \"height\": 67,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fa5252\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 1664303159,\n      \"version\": 124,\n      \"versionNonce\": 111029657,\n      \"isDeleted\": false,\n      \"boundElements\": [\n        {\n          \"type\": \"text\",\n          \"id\": \"jE5wNvo8TFk1wC4v8bQ6s\"\n        },\n        {\n          \"id\": \"HjlV2QEoKO1Najg9D1xnm\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"hLcR_BUncIusv-IFL2ucM\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"WHRznFJAFjpXbmv35tCsY\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"yVBhfXkyFmu2rg16oRlxu\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"diamond\",\n      \"version\": 140,\n      \"versionNonce\": 1301746297,\n      \"isDeleted\": false,\n      \"id\": \"MclWY93u6fXaKcMyYF-Jy\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 896.4134615384614,\n      \"y\": 437.8605769230771,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#12b886\",\n      \"width\": 92,\n      \"height\": 157,\n      \"seed\": 755813689,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"NGMUGV32wJmpMyvB3YQTx\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"_xhHeDkg3dVxrIbXlln8Z\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"_xhHeDkg3dVxrIbXlln8Z\"\n        },\n        {\n          \"id\": \"hLcR_BUncIusv-IFL2ucM\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"arrow\",\n      \"version\": 167,\n      \"versionNonce\": 1223962007,\n      \"isDeleted\": false,\n      \"id\": \"NGMUGV32wJmpMyvB3YQTx\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 944.8750000000002,\n      \"y\": 387.86057692307696,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 15.10726263633046,\n      \"height\": 47.58370911007313,\n      \"seed\": 282885847,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"boundElements\": [],\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false,\n      \"startBinding\": {\n        \"elementId\": \"OiDY20ES-4wBxFVAzHkHt\",\n        \"focus\": 0.48198474540576314,\n        \"gap\": 1.615384615384528\n      },\n      \"endBinding\": {\n        \"elementId\": \"MclWY93u6fXaKcMyYF-Jy\",\n        \"focus\": -0.9774990043807243,\n        \"gap\": 2.921009509018951\n      },\n      \"lastCommittedPoint\": null,\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\",\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          14.615384615384528,\n          21.538461538461547\n        ],\n        [\n          -0.4918780209459328,\n          47.58370911007313\n        ]\n      ]\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 51,\n      \"versionNonce\": 299916121,\n      \"isDeleted\": false,\n      \"id\": \"_xhHeDkg3dVxrIbXlln8Z\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 901.4134615384614,\n      \"y\": 498.3605769230771,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 82,\n      \"height\": 36,\n      \"seed\": 1481686553,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 2,\n      \"text\": \"Rdb\\nSerializer\",\n      \"baseline\": 32,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"MclWY93u6fXaKcMyYF-Jy\",\n      \"originalText\": \"Rdb\\nSerializer\"\n    },\n    {\n      \"type\": \"diamond\",\n      \"version\": 225,\n      \"versionNonce\": 1063805623,\n      \"isDeleted\": false,\n      \"id\": \"jGf5xxZ5eve-AtPae7Yly\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1119.4903846153848,\n      \"y\": 437.8605769230772,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#12b886\",\n      \"width\": 92,\n      \"height\": 157,\n      \"seed\": 538175673,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"hgq3HgiDoEU1A13Sax2A5\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"WQcx4-r2uMVAquWROfq1l\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"WQcx4-r2uMVAquWROfq1l\"\n        },\n        {\n          \"id\": \"WHRznFJAFjpXbmv35tCsY\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"arrow\",\n      \"version\": 390,\n      \"versionNonce\": 332236857,\n      \"isDeleted\": false,\n      \"id\": \"hgq3HgiDoEU1A13Sax2A5\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1141.6872098880729,\n      \"y\": 392.47596153846166,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 70.88009780423477,\n      \"height\": 61.500951281640766,\n      \"seed\": 168221527,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"boundElements\": [],\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false,\n      \"startBinding\": {\n        \"elementId\": \"0DuGwtSiWQDXGbVDx_Yq4\",\n        \"focus\": 0.9791425008071145,\n        \"gap\": 6.230769230769226\n      },\n      \"endBinding\": {\n        \"elementId\": \"jGf5xxZ5eve-AtPae7Yly\",\n        \"focus\": -0.5445868784908863,\n        \"gap\": 4.55886494843503\n      },\n      \"lastCommittedPoint\": null,\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\",\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          70.88009780423477,\n          10.76923076923083\n        ],\n        [\n          38.5310635413573,\n          61.500951281640766\n        ]\n      ]\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 138,\n      \"versionNonce\": 2144924631,\n      \"isDeleted\": false,\n      \"id\": \"WQcx4-r2uMVAquWROfq1l\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1124.4903846153848,\n      \"y\": 498.3605769230772,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 82,\n      \"height\": 36,\n      \"seed\": 585656729,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 2,\n      \"text\": \"Rdb\\nSerializer\",\n      \"baseline\": 32,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"jGf5xxZ5eve-AtPae7Yly\",\n      \"originalText\": \"Rdb\\nSerializer\"\n    },\n    {\n      \"id\": \"jE5wNvo8TFk1wC4v8bQ6s\",\n      \"type\": \"text\",\n      \"x\": 722.798076923077,\n      \"y\": 656.8605769230771,\n      \"width\": 467,\n      \"height\": 36,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#12b886\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 320154873,\n      \"version\": 98,\n      \"versionNonce\": 1177598807,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1661620459622,\n      \"link\": null,\n      \"locked\": false,\n      \"text\": \"Blob Channel (SliceSnapshot::RecordChannel)\\nBucket-level granularity\",\n      \"fontSize\": 16,\n      \"fontFamily\": 2,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"baseline\": 32,\n      \"containerId\": \"Ig1qNk-AOw_VTS_xlELs5\",\n      \"originalText\": \"Blob Channel (SliceSnapshot::RecordChannel)\\nBucket-level granularity\"\n    },\n    {\n      \"id\": \"HjlV2QEoKO1Najg9D1xnm\",\n      \"type\": \"arrow\",\n      \"x\": 741.2581209970564,\n      \"y\": 588.5811776062717,\n      \"width\": 31.351415988958138,\n      \"height\": 44.98870164238667,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#12b886\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 1489149785,\n      \"version\": 105,\n      \"versionNonce\": 1873907193,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          31.351415988958138,\n          44.98870164238667\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"577abnzpQuxk_hrNgIMkV\",\n        \"gap\": 1.9342976914014673,\n        \"focus\": 0.8117909371106269\n      },\n      \"endBinding\": {\n        \"elementId\": \"Ig1qNk-AOw_VTS_xlELs5\",\n        \"gap\": 7.790697674418787,\n        \"focus\": -0.593178549414425\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"id\": \"hLcR_BUncIusv-IFL2ucM\",\n      \"type\": \"arrow\",\n      \"x\": 919.3365384615385,\n      \"y\": 574.4375,\n      \"width\": 31.736196893864076,\n      \"height\": 60.69051878354196,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#12b886\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 328800759,\n      \"version\": 85,\n      \"versionNonce\": 304047833,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          -14.615384615384642,\n          25.384615384615472\n        ],\n        [\n          17.120812278479434,\n          60.69051878354196\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"MclWY93u6fXaKcMyYF-Jy\",\n        \"focus\": -0.22524576872402804,\n        \"gap\": 9.584854518692971\n      },\n      \"endBinding\": {\n        \"elementId\": \"Ig1qNk-AOw_VTS_xlELs5\",\n        \"gap\": 6.232558139535168,\n        \"focus\": 0.05517004727771827\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"id\": \"WHRznFJAFjpXbmv35tCsY\",\n      \"type\": \"arrow\",\n      \"x\": 1123.951923076923,\n      \"y\": 553.6682692307693,\n      \"width\": 32.30769230769238,\n      \"height\": 81.53846153846143,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#12b886\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 971531865,\n      \"version\": 66,\n      \"versionNonce\": 789696311,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          -32.30769230769238,\n          38.46153846153834\n        ],\n        [\n          -23.84615384615404,\n          81.53846153846143\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"jGf5xxZ5eve-AtPae7Yly\",\n        \"focus\": 0.2217391304347844,\n        \"gap\": 15.012636648887266\n      },\n      \"endBinding\": {\n        \"elementId\": \"Ig1qNk-AOw_VTS_xlELs5\",\n        \"focus\": 0.6185597345566728,\n        \"gap\": 6.153846153846416\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"id\": \"yVBhfXkyFmu2rg16oRlxu\",\n      \"type\": \"arrow\",\n      \"x\": 864.7211538461538,\n      \"y\": 717.5144230769231,\n      \"width\": 67.97279116285586,\n      \"height\": 64.8374913674163,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#228be6\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 651147575,\n      \"version\": 635,\n      \"versionNonce\": 116567415,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          -42.30769230769215,\n          16.923076923076792\n        ],\n        [\n          -67.97279116285586,\n          64.8374913674163\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"Ig1qNk-AOw_VTS_xlELs5\",\n        \"focus\": -0.04672674106343535,\n        \"gap\": 9.153846153845961\n      },\n      \"endBinding\": {\n        \"elementId\": \"HK8F6p6Adyxvgasi9uzJo\",\n        \"focus\": -0.17323237259147364,\n        \"gap\": 5.1625086325837515\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"id\": \"HK8F6p6Adyxvgasi9uzJo\",\n      \"type\": \"rectangle\",\n      \"x\": 707.7980769230769,\n      \"y\": 784.4375,\n      \"width\": 155.84615384615387,\n      \"height\": 98.27507912481072,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#4c6ef5\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 2031988567,\n      \"version\": 164,\n      \"versionNonce\": 418531705,\n      \"isDeleted\": false,\n      \"boundElements\": [\n        {\n          \"id\": \"yVBhfXkyFmu2rg16oRlxu\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"fB6sqnJqDlolUIDrydMk5\"\n        },\n        {\n          \"id\": \"YVK4Nv0Onos-JNSI9I5YI\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1661620421122,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"id\": \"fB6sqnJqDlolUIDrydMk5\",\n      \"type\": \"text\",\n      \"x\": 712.7980769230769,\n      \"y\": 825.5750395624053,\n      \"width\": 145.84615384615387,\n      \"height\": 16,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#4c6ef5\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 1340401175,\n      \"version\": 194,\n      \"versionNonce\": 1565255319,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1661620421123,\n      \"link\": null,\n      \"locked\": false,\n      \"text\": \"SaveBody\",\n      \"fontSize\": 14.404558404558403,\n      \"fontFamily\": 2,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"baseline\": 13,\n      \"containerId\": \"HK8F6p6Adyxvgasi9uzJo\",\n      \"originalText\": \"SaveBody\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 216,\n      \"versionNonce\": 1292304185,\n      \"isDeleted\": false,\n      \"id\": \"w6yJKrh_ucB0qKWLRrPA1\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 921.4134615384612,\n      \"y\": 785.2230373606715,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#15aabf\",\n      \"width\": 156,\n      \"height\": 98.27507912481072,\n      \"seed\": 1894727609,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"yVBhfXkyFmu2rg16oRlxu\",\n          \"type\": \"arrow\"\n        },\n        {\n          \"id\": \"JClqLh6OUtndfrUc-BbHt\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"JClqLh6OUtndfrUc-BbHt\"\n        },\n        {\n          \"id\": \"XiGmqFegyOE2IKWoIo40s\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1661620421123,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 259,\n      \"versionNonce\": 710307031,\n      \"isDeleted\": false,\n      \"id\": \"JClqLh6OUtndfrUc-BbHt\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 926.4134615384612,\n      \"y\": 826.3605769230768,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#4c6ef5\",\n      \"width\": 146,\n      \"height\": 16,\n      \"seed\": 1215329367,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1661620421123,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 14.404558404558403,\n      \"fontFamily\": 2,\n      \"text\": \"AlignedBuffer\",\n      \"baseline\": 13,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"w6yJKrh_ucB0qKWLRrPA1\",\n      \"originalText\": \"AlignedBuffer\"\n    },\n    {\n      \"id\": \"YVK4Nv0Onos-JNSI9I5YI\",\n      \"type\": \"arrow\",\n      \"x\": 867.7980769230768,\n      \"y\": 836.7451923076923,\n      \"width\": 55.38461538461536,\n      \"height\": 0,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#15aabf\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 2028321497,\n      \"version\": 86,\n      \"versionNonce\": 506769433,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1661620421123,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          55.38461538461536,\n          0\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"HK8F6p6Adyxvgasi9uzJo\",\n        \"focus\": 0.0018973206471872748,\n        \"gap\": 4.153846153846075\n      },\n      \"endBinding\": null,\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    },\n    {\n      \"id\": \"cqCQRIsxqHSsV_j5V6fMA\",\n      \"type\": \"ellipse\",\n      \"x\": 1165.490384615384,\n      \"y\": 781.3605769230769,\n      \"width\": 128,\n      \"height\": 106,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#e64980\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 1621295255,\n      \"version\": 67,\n      \"versionNonce\": 281065975,\n      \"isDeleted\": false,\n      \"boundElements\": [\n        {\n          \"type\": \"text\",\n          \"id\": \"6N8Vr1qw1YKDs9h0ze2LI\"\n        },\n        {\n          \"id\": \"XiGmqFegyOE2IKWoIo40s\",\n          \"type\": \"arrow\"\n        }\n      ],\n      \"updated\": 1661620421123,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"id\": \"6N8Vr1qw1YKDs9h0ze2LI\",\n      \"type\": \"text\",\n      \"x\": 1170.490384615384,\n      \"y\": 816.3605769230769,\n      \"width\": 118,\n      \"height\": 36,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#e64980\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"seed\": 1910738841,\n      \"version\": 45,\n      \"versionNonce\": 1681474809,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1661620421123,\n      \"link\": null,\n      \"locked\": false,\n      \"text\": \"Direct I/O\\nFile\",\n      \"fontSize\": 16,\n      \"fontFamily\": 2,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"baseline\": 32,\n      \"containerId\": \"cqCQRIsxqHSsV_j5V6fMA\",\n      \"originalText\": \"Direct I/O\\nFile\"\n    },\n    {\n      \"id\": \"XiGmqFegyOE2IKWoIo40s\",\n      \"type\": \"arrow\",\n      \"x\": 1082.4134615384614,\n      \"y\": 834.4375,\n      \"width\": 69.23076923076928,\n      \"height\": 0.7692307692308304,\n      \"angle\": 0,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#e64980\",\n      \"fillStyle\": \"solid\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"round\",\n      \"seed\": 1724070359,\n      \"version\": 21,\n      \"versionNonce\": 178545431,\n      \"isDeleted\": false,\n      \"boundElements\": null,\n      \"updated\": 1661620421123,\n      \"link\": null,\n      \"locked\": false,\n      \"points\": [\n        [\n          0,\n          0\n        ],\n        [\n          69.23076923076928,\n          -0.7692307692308304\n        ]\n      ],\n      \"lastCommittedPoint\": null,\n      \"startBinding\": {\n        \"elementId\": \"w6yJKrh_ucB0qKWLRrPA1\",\n        \"focus\": 0.01998122507071207,\n        \"gap\": 5.000000000000227\n      },\n      \"endBinding\": {\n        \"elementId\": \"cqCQRIsxqHSsV_j5V6fMA\",\n        \"focus\": 0.029379713320443476,\n        \"gap\": 13.85030430804018\n      },\n      \"startArrowhead\": null,\n      \"endArrowhead\": \"arrow\"\n    }\n  ],\n  \"appState\": {\n    \"gridSize\": null,\n    \"viewBackgroundColor\": \"#ffffff\"\n  },\n  \"files\": {}\n}\n"
  },
  {
    "path": "docs/rdbsave.md",
    "content": "# RDB Snapshot design\n\nThe following document describes Dragonfly's point in time, forkless snapshotting procedure,\nincluding all its configurations.\n\n## Redis-compatible RDB snapshot\n\nThis snapshot is serialized into a single file or into a network socket.\nThis configuration is used to create redis-compatible backup snapshots.\n\nThe algorithm utilizes the shared-nothing architecture of Dragonfly and makes sure that each shard-thread serializes only its own data. Below is the high description of the flow.\n\n<img src=\"http://static.dragonflydb.io/repo-assets/rdbsave.svg\" width=\"80%\" border=\"0\"/>\n\n\n1. The `RdbSave` class instantiates a single blocking channel (in red).\n   Its purpose is to gather all the blobs from all the shards.\n2. In addition it creates thread-local snapshot instances in each DF shard.\nTODO: to rename them in the codebase to another name (SnapshotShard?) since `snapshot` word creates ambiguity here.\n3. Each SnapshotShard instantiates its own RdbSerializer that is used to serialize each K/V entry into a binary representation according to the Redis format spec. SnapshotShards combine multiple blobs from the same Dash bucket into a single blob. They always send blob data at bucket granularity, i.e. they never send blob into the channel that only partially covers the bucket. This is needed in order to guarantee snapshot isolation.\n4. The RdbSerializer uses `io::Sink` to emit binary data. The SnapshotShard instance passes into it a `StringFile` which is just a memory-only based sink that wraps `std::string` object. Once `StringFile` instance becomes large, it's flushed into the channel (as long as it follows the rules above).\n4. RdbSave also creates a fiber (SaveBody) that pull all the blobs from the channel. Blobs migh come in unspecified order though it's guaranteed that each blob is self sufficient but itself.\n5. DF uses direct I/O, to improve i/o throughput, which, in turn requires properly aligned memory buffers to work. Unfortunately, blobs that come from the rdb channel come in different sizes and they are not aligned by OS page granularity. Therefore, DF passes all the data from rdb channel through AlignedBuffer transformation. The purpose of this class is to copy the incoming data into a properly aligned buffer. Once it accumulates enough data, it flushes it into the output file.\n\nTo summarize, this configuration employs a single sink to create one file or one stream of data that represents the whole database.\n\n## Dragonfly Snapshot (TBD)\n\nRequired for replication. Creates several multiple files, one file per SnapshotShard. Does not require a central sink. Each SnapshotShard still uses RdbSerializer together with StringFile to guarantee bucket level granularity. We still need AlignedBuffer if we want to use direct I/O.\nFor a DF process with N shard, it will create N files. Will probably require additional metadata file to provide file-level consistency, but for now we can assume that only N files are created,\nsince our use-case will be network based replication.\n\nHow it's gonna be used? Replica (slave) will hand-shake with the master and find out how many shard it has.\nThen it will open `N` sockets and each one of them will pull shard data. First, they will pull snapshot data,\nand replay it by distributing entries among `K` replica shards. After all the snapshot data is replayed,\nthey will continue with replaying the change log (stable state replication), which is out of context\nof this document.\n\n## Relaxed point-in-time (TBD)\nWhen DF saves its snapshot file on disk, it maintains snapshot isolation by applying a virtual cut\nthrough all the process shards. Snapshotting may take time, during which, DF may process many write requests.\nThese mutations won't be part of the snapshot, because the cut captures data up to the point\n**it has started**. This is perfect for backups. I call this variation - conservative snapshotting.\n\nHowever, when we perform snapshotting for replication, we would like to produce a snapshot\nthat includes all the data upto point in time when the snapshotting **finishes**. I called\nthis *relaxed snapshotting*. The reason for relaxed snapshotting is to avoid keeping the changelog\nof all mutations during the snapshot creation.\n\nAs a side comment - we could, in theory, support the same (relaxed)\nsemantics for file snapshots, but it's not necessary since it might increase the snapshot sizes.\n\nThe snapshotting phase (full-sync) can take up lots of time which add lots of memory pressure on the system.\nKeeping the change-log aside during the full-sync phase will only add more pressure.\nWe achieve relaxed snapshotting by pushing the changes into the replication sockets without saving them aside.\nOf course, we would still need a point-in-time consistency,\nin order to know when the snapshotting finished and the stable state replication started.\n\n## Conservative and relaxed snapshotting variations\n\nBoth algorithms maintain a scanning process (fiber) that iteratively goes over the main dictionary\nand serializes its data. Before starting the process, the SnapshotShard captures\nthe change epoch of its shard (this epoch is increased with each write request).\n\n```cpp\nSnapshotShard.epoch = shard.epoch++;\n```\n\nFor sake of simplicity, we can assume that each entry in the shard maintains its own version counter.\nBy capturing the epoch number we establish a cut: all entries with `version <= SnapshotShard.epoch`\nhave not been serialized yet and were not modified by the concurrent writes.\n\nThe DashTable iteration algorithm guarantees convergence and coverage (\"at most once\"),\nbut it does not guarantee that each entry is visited *exactly once*.\nTherefore, we use entry versions for two things: 1) to avoid serialization of the same entry multiple times,\nand 2) to correctly serialize entries that need to change due to concurrent writes.\n\nSerialization Fiber:\n\n```cpp\n for (entry : table) {\n    if (entry.version <= cut.epoch) {\n      entry.version = cut.epoch + 1;\n      SendToSerializationSink(entry);\n    }\n }\n```\n\nTo allow concurrent writes during the snapshotting phase, we setup a hook that is triggered on each\nentry mutation in the table:\n\nOnWriteHook:\n```cpp\n....\nif (entry.version <= cut.version) {\n  SendToSerializationSink(entry);\n}\n...\nentry = new_entry;\nentry.version = shard.epoch++;  // guaranteed to become > cut.version\n```\n\nPlease note that this hook maintains point-in-time semantics for the conservative variation by pushing\nthe previous value of the entry into the sink before changing it.\n\nHowever, for the relaxed point-in-time, we do not have to store the old value.\nTherefore, we can do the following:\n\nOnWriteHook:\n\n```cpp\nif (entry.version <= cut.version) {\n  SendToSerializationSink(new_entry);  // do not have to send the old value\n} else {\n  // Keep sending the changes.\n  SendToSerializationSink(IncrementalDiff(entry, new_entry));\n}\n\nentry = new_entry;\nentry.version = shard.epoch++;\n```\n\nThe change data is sent along with the rest of the contents, and it requires to extend\nthe existing rdb format to support differential operations like (hset, append, etc).\nThe Serialization Fiber loop is the same for this variation.\n"
  },
  {
    "path": "docs/shard-serialization.md",
    "content": "# Shard Serialization\n\nThis document describes how Dragonfly serializes a single shard's data via `SliceSnapshot`. It\ncovers both point-in-time (PIT) and non-PIT serialization modes, their correctness guarantees,\nand the mechanisms used to coordinate concurrent mutations with the serialization process.\n\n## Overview\n\nShard serialization is used for two purposes:\n\n1. **Backups (RDB save)** — Must produce a consistent point-in-time snapshot. Always uses PIT mode.\n2. **Replication (full sync)** — Serializes baseline data and then streams journal changes. Can\n   use either PIT or non-PIT mode, controlled by the `--point_in_time_snapshot` flag (default: true).\n\nBoth modes share the same traversal infrastructure (`IterateBucketsFb` → `BucketSaveCb` →\n`SerializeBucket` → `SerializeEntry`) and the same flushing/backpressure machinery\n(`HandleFlushData` → `consumer_->ConsumeData`). They differ in **how they handle concurrent\nmutations** during the traversal.\n\n| | PIT mode | Non-PIT mode |\n|---|----------|-------------|\n| Flag | `use_snapshot_version_ == true` | `use_snapshot_version_ == false` |\n| Used for | Backups and replication | Replication only |\n| Consistency | Exact point-in-time snapshot | Eventual consistency (baseline + journal) |\n| `OnDbChange` | Serializes bucket before mutation | Barrier only (no serialization) |\n| `OnMoved` | Not registered | Handles DashTable item reshuffling |\n| Bucket versioning | Yes — skip already-serialized buckets | No — serialize every bucket visited |\n| Throughput | Lower (mutation path does serialization work) | Higher (mutation path only acquires mutex) |\n\n## Core Types\n\n| Type | Location | Role |\n|------|----------|------|\n| `SliceSnapshot` | `src/server/snapshot.h` | Orchestrates shard serialization |\n| `RdbSerializer` | `src/server/rdb_save.h` | Serializes entries into RDB-format buffers |\n| `SnapshotDataConsumerInterface` | `src/server/snapshot.h` | Downstream sink interface |\n| `RdbSaver::Impl` | `src/server/rdb_save.cc` | Consumer impl: writes to socket or channel |\n| `ThreadLocalMutex` | `src/server/synchronization.h` | Fiber-aware mutex for atomicity barrier |\n| `ChangeReq` | `src/server/table.h` | Describes a table mutation (update or insert) |\n\n## Data Flow Overview\n\n```mermaid\nflowchart TD\n  subgraph ShardThread[Shard thread / fibers]\n    MUT[DB mutation] -->|change callback| ODC[OnDbChange]\n    ODC -->|lock big_value_mu_| SB1[\"SerializeBucket<br/>(PIT only)\"]\n    SB1 --> SE1[SerializeEntry]\n    SE1 --> SAVE1[RdbSerializer::SaveEntry]\n\n    TRAV[Snapshot fiber: IterateBucketsFb] --> BSCB[BucketSaveCb]\n    BSCB -->|lock big_value_mu_ + GetLatch| SB2[SerializeBucket]\n    SB2 --> SE2[SerializeEntry]\n    SE2 --> SAVE2[RdbSerializer::SaveEntry]\n\n    MOV[DashTable move] -->|non-PIT only| OMV[OnMoved]\n    OMV -->|lock big_value_mu_| SB3[\"SerializeBucket<br/>(if moved across cursor)\"]\n\n    EXP[\"Expiry / Eviction<br/>(heartbeat, inline, lazy)\"] -->|\"RecordDelete<br/>(no OnDbChange)\"| JRN_DIRECT[\"journal::RecordEntry<br/>(DEL)\"]\n    JRN_DIRECT --> CJC\n\n    JRN[Journal change] --> CJC[ConsumeJournalChange]\n    CJC -->|lock big_value_mu_| WJE[serializer_->WriteJournalEntry]\n  end\n\n  SAVE1 -->|consume_fun_ if buffer > threshold| HFD[HandleFlushData]\n  SAVE2 -->|consume_fun_ if buffer > threshold| HFD\n\n  TRAV -->|between buckets| PS[PushSerialized]\n  PS --> FS[FlushSerialized]\n  FS --> HFD\n\n  HFD --> SEQ[seq_cond_.wait - ordering gate]\n  SEQ --> CD[consumer_->ConsumeData]\n  CD --> SINK[(Replica socket / sink)]\n```\n\n## PIT Mode (Point-in-Time Snapshot)\n\nPIT mode captures an exact snapshot of the shard at the logical moment `snapshot_version_` was\nassigned. It is the default for both backups and replication.\n\n### Bucket Versioning\n\nDragonfly's `DashTable` ([dashtable.md](dashtable.md)) maintains a version counter per physical\nbucket. The snapshot must serialize all buckets with version `< snapshot_version_`.\n\n- `SerializeBucket` sets the bucket version to `snapshot_version_`, ensuring each bucket is\n  serialized exactly once.\n- Mutations bump bucket versions, so buckets mutated after the snapshot started will have\n  version `>= snapshot_version_` and are skipped by the traversal.\n- Buckets not yet traversed but about to be mutated require **serialize-before-mutate**,\n  enforced by `OnDbChange()`.\n\n### Ordering Invariant\n\n> For any key, the replica must receive the baseline value **strictly before** any journal entry\n> that mutates that key.\n\nWe will use two terms for journal changes:\n- **Self-contained**: the journal entry fully determines the resulting logical state and can be\n  replayed without the prior value (for example `SET`, `DEL`).\n- **Baseline-dependent**: the journal entry describes a mutation of an existing value and requires\n  the baseline state to be reconstructed first (for example `HSET`, `LPUSH`).\n\nFor **transaction-driven mutations** this is guaranteed because:\n1. `OnDbChange` runs before the mutation commits and serializes the bucket if needed.\n2. `OnDbChange` unconditionally acquires `big_value_mu_` first, so the mutation and its\n  subsequent journal emission cannot overtake an in-progress bucket serialization.\n\n**Important caveat:** not all journal entries follow the\n`OnDbChange` → mutation → `RecordJournal` → `ConsumeJournalChange` sequence. Several code\npaths emit journal entries via `journal::RecordEntry` directly, bypassing `PreUpdateBlocking`\nand `OnDbChange` entirely. See [Journal Entries Without `OnDbChange`](#journal-entries-without-ondbchange)\nbelow.\n\n### Journal Entries Without `OnDbChange`\n\nNot all journal entries follow the transaction-driven\n`PreUpdateBlocking` → `OnDbChange` → mutation → `RecordJournal` → `ConsumeJournalChange`\nsequence. Several code paths call `journal::RecordEntry` directly (→\n`JournalSlice::AddLogRecord` → `ConsumeJournalChange`), bypassing `OnDbChange` entirely:\n\n| Source | Journal command | Trigger |\n|--------|----------------|---------|\n| `ExpireIfNeeded` (`db_slice.cc`) | `DEL` | Lazy expiry during key lookup, active expiry sweep (`DeleteExpiredStep`), heartbeat-driven eviction (`FreeMemWithEvictionStepAtomic`) |\n| `PrimeEvictionPolicy::Evict` (`db_slice.cc`) | `DEL` | Inline eviction when a DashTable bucket overflows during insert |\n| `generic_family.cc` (SCAN-based deletion) | `DEL` | `RecordDelete` after `DbSlice::Del` in the RM command |\n| `dflycmd.cc`, `replica.cc`, `cluster_family.cc` | `PING` / `DFLYCLUSTER` | Control signals: takeover sync, PING propagation, cluster config |\n\nAll data-mutating entries above are self-contained `DEL` commands. The non-mutating entries\n(`PING`, `DFLYCLUSTER`) carry no key-level semantics.\n\n**Why this matters for `ConsumeJournalChange` and `big_value_mu_`:** these journal entries\nstill flow through `ConsumeJournalChange`, which acquires `big_value_mu_`. Today the mutex\nserves two purposes on these paths:\n\n1. **Serializer buffer exclusivity** — preventing a journal write from interleaving with an\n   in-progress `SerializeBucket` call that shares the same `serializer_` instance.\n2. **Baseline-before-journal ordering** — a `DEL K` must not reach the output stream (or a\n   separate journal stream) while K's baseline is still being serialized. Even with separate\n   serializer buffers and tagged-chunk interleaving, the consumer could process `DEL K` before\n   receiving the full baseline, violating the ordering invariant. The mutex prevents this today\n   by blocking the journal write until `SerializeBucket` completes.\n\nThe lock is *not* needed for transaction-style ordering against `OnDbChange` (these paths\nbypass it entirely), but it is needed for both concerns above. Removing it requires (a) separate\nserializer buffers (Phase 2, item 7) **and** (b) a mechanism to defer the `DEL` until the\nbucket's baseline is fully emitted (Phase 1, item 6 — deferred deletion queue).\n\n**Could these paths call `OnDbChange` before deleting?** Not safely:\n\n- **`ExpireIfNeeded`:** `SerializeBucket` (called from `OnDbChange`) can preempt, but\n  `ExpireIfNeeded` must not — `ExpireAllIfNeeded` calls `serialization_latch_.Wait()` and\n  lazy expiry in `FindInternal` relies on cooperative scheduling.\n- **`PrimeEvictionPolicy::Evict`:** `Evict` runs inside DashTable's insert path while the\n  table is mid-structural-mutation. `OnDbChange` calls `SerializeBucket` (iterates the\n  bucket) and `CVCUponInsert` (probes the table) — both unsafe here. Re-entrancy risk.\n- **`FreeMemWithEvictionStepAtomic`:** runs from heartbeat with `serialization_latch_` held;\n  `OnDbChange` per evicted key would add overhead and preemption points inside the loop.\n\nThe ordering issue is twofold: byte-stream integrity\n([§1](#1-shard-wide-stall-under-big_value_mu_)) and baseline-before-journal correctness — a\n`DEL` must not be emitted (even to a separate stream) while the same key's baseline is still\nbeing serialized. Roadmap item 6 proposes a **deferred deletion queue** to address this\nwithout blocking or re-entrancy.\n\n### Mutation Path: `OnDbChange` (PIT)\n\n```\nOnDbChange(db_index, req)\n  lock(big_value_mu_)\n  if req is update (existing bucket):\n    bit = *req.update()\n    if !bit.is_done() && bit.GetVersion() < snapshot_version_:\n      -> SerializeBucket(db_index, *bit)\n  else (insert, new key):\n    key = get<string_view>(req.change)\n    -> table->CVCUponInsert(snapshot_version_, key, callback)\n         callback(bucket_iterator):\n           -> SerializeBucket(db_index, it)\n  unlock(big_value_mu_)\n```\n\nFor updates, `ChangeReq::update()` returns a `PrimeTable::bucket_iterator`. If the bucket has not\nbeen serialized yet (version `< snapshot_version_`), it is serialized now.\n\nFor inserts, `CVCUponInsert` (`src/core/dash.h`) simulates the insert to identify which buckets'\nversions would change, and serializes each one with version `< snapshot_version_` via the callback.\n\n### Traversal Path: `BucketSaveCb` (PIT)\n\n```\nBucketSaveCb(db_index, bucket_iterator)\n  lock(big_value_mu_)\n  if bucket version >= snapshot_version_:\n    skip (already serialized by OnDbChange or a previous visit)\n  FlushChangeToEarlierCallbacks(...)\n  lock(*db_slice_->GetLatch())\n  -> SerializeBucket(db_index, bucket_iterator)\n       set bucket version = snapshot_version_\n       for each occupied slot:\n         -> SerializeEntry -> SaveEntry -> PushToConsumerIfNeeded\n```\n\nThe version check is the key optimization: buckets already serialized by `OnDbChange` are skipped.\n\n## Non-PIT Mode (Eventual Consistency)\n\nNon-PIT mode is available **only for replication** (`stream_journal == true`) and is enabled by\nsetting `--point_in_time_snapshot=false`. It improves server throughput during full sync by\neliminating serialization work from the mutation path.\n\n### Design Rationale\n\nA replica does not need an exact point-in-time snapshot. It needs to reach eventual consistency:\nafter the full sync baseline is delivered and the journal stream catches up, the replica's state\nmust match the master's current state. This weaker guarantee allows the snapshot to be \"fuzzy\" —\nit may include some mutations that happened after the snapshot started and miss others, as long as\nthe journal stream fills in the gaps.\n\n### How It Differs from PIT\n\n**`OnDbChange` does no serialization.** In non-PIT mode, the `if (use_snapshot_version_)` block\nis skipped entirely. `OnDbChange` only acquires `big_value_mu_` and returns immediately. This\nserves as a **barrier** — it prevents mutations from modifying a bucket while it is being\nserialized by the traversal fiber — but it does not serialize anything itself.\n\n**No bucket version tracking.** `SerializeBucket` does not set the bucket version. `BucketSaveCb`\ndoes not check or skip based on version. Every bucket visited by the traversal is serialized\nunconditionally.\n\n**`OnMoved` handles DashTable reshuffling.** When items are inserted into DashTable, existing items\nmay be moved between buckets (due to hash table splitting/merging). In PIT mode this is handled by\n`OnDbChange` + bucket versioning. In non-PIT mode, since `OnDbChange` does no serialization, a\nseparate `OnMoved` callback is needed to catch items that \"jump\" across the traversal cursor:\n\n```\nOnMoved(db_index, items)\n  lock(big_value_mu_)\n  for each (source_cursor, dest_cursor) in items:\n    if IsPositionSerialized(dest_cursor) && !IsPositionSerialized(source_cursor):\n      -> SerializeBucket(db_index, CursorToBucketIt(dest))\n```\n\nAn item needs re-serialization when it moves **from** a not-yet-visited bucket **to** an\nalready-visited bucket. Without this, the item would be missed entirely: the traversal already\npassed the destination, and the source bucket still has the item removed.\n\n**`CVCUponInsert` is not used.** In PIT mode, `OnDbChange` calls `CVCUponInsert` for inserts\nto proactively serialize *all* buckets the insert would touch (home, neighbor, stash — or the\nentire segment on a split) **before** the insert commits. This is necessary because PIT must\ncapture the pre-mutation state of every affected bucket. Non-PIT has no such requirement.\nInstead, the insert proceeds, and `OnMoved` reactively handles any items that were displaced\nacross the traversal cursor. For truly new keys (not displaced existing items), non-PIT relies on\nthe cursor visiting the key's bucket later, or on the journal stream capturing the insert.\n\n### `IsPositionSerialized` — Cursor-Based Position Tracking\n\n```cpp\nbool IsPositionSerialized(DbIndex id, PrimeTable::Cursor cursor) {\n  uint8_t depth = db_slice_->GetTables(id).first->depth();\n  return id < snapshot_db_index_ ||\n         (id == snapshot_db_index_ &&\n          (cursor.bucket_id() < snapshot_cursor_.bucket_id() ||\n           (cursor.bucket_id() == snapshot_cursor_.bucket_id() &&\n            cursor.segment_id(depth) < snapshot_cursor_.segment_id(depth))));\n}\n```\n\nCompares a cursor position against the current traversal position (`snapshot_cursor_`,\n`snapshot_db_index_`). A position is \"serialized\" if it is behind the cursor — i.e., the\ntraversal has already visited it.\n\n### Traversal Path: `BucketSaveCb` (Non-PIT)\n\n```\nBucketSaveCb(db_index, bucket_iterator)\n  lock(big_value_mu_)\n  // no version check — serialize every bucket unconditionally\n  lock(*db_slice_->GetLatch())\n  -> SerializeBucket(db_index, bucket_iterator)\n       // no version update\n       for each occupied slot:\n         -> SerializeEntry -> SaveEntry -> PushToConsumerIfNeeded\n```\n\n### Correctness in Non-PIT Mode\n\nNon-PIT mode guarantees:\n- Every key that existed when the traversal started and was not deleted before being visited will\n  be serialized at least once (by the traversal or by `OnMoved`).\n- Keys inserted after the traversal started will appear in the journal stream.\n- Keys may be serialized in a state newer than the snapshot start (since mutations are not blocked\n  by `OnDbChange` serialization, only by the mutex barrier).\n- The journal stream, combined with the baseline, produces an eventually consistent replica.\n\nWhat it does **not** guarantee:\n- Point-in-time consistency. The serialized baseline is a \"fuzzy\" view spanning the traversal\n  duration.\n\n## Shared Infrastructure\n\nThe following sections apply to both PIT and non-PIT modes.\n\n### Traversal: `IterateBucketsFb`\n\n```\nIterateBucketsFb(send_full_sync_cut)\n  for each database:\n    for each logical bucket via PrimeTable::TraverseBuckets():\n      -> BucketSaveCb(db_index, bucket_iterator)\n      PushSerialized(false)  // explicit flush between buckets\n      yield if CPU time > ~15us\n    PushSerialized(true)     // force-flush after each database\n  if send_full_sync_cut:\n    serializer_->SendFullSyncCut()\n    PushSerialized(true)\n```\n\n### Serialization: `SerializeBucket` and `SerializeEntry`\n\n`SerializeBucket` iterates all occupied slots in a physical bucket and calls `SerializeEntry` for\neach. `SerializeEntry` looks up expiry and memcache flags, then calls\n`serializer_->SaveEntry(pk, pv, expire_time, mc_flags, db_index)`.\n\n### Journal Path: `ConsumeJournalChange`\n\n```\nConsumeJournalChange(item)\n  lock(big_value_mu_)\n  serializer_->WriteJournalEntry(item.journal_item.data)\n  unlock(big_value_mu_)\n```\n\nActive in both modes when `stream_journal == true`. Acquires `big_value_mu_` to ensure journal\nentries are not interleaved with bucket serialization. Does **not** flush data — only appends to\nthe serializer buffer. Flushing happens later via `ThrottleIfNeeded` → `PushSerialized(false)`,\ncalled from `JournalSlice` after the journal callback returns.\n\n### Flushing and Backpressure\n\n#### `HandleFlushData(std::string data)` — Common Blocking Sink\n\nAll serialized data ultimately flows through `HandleFlushData`:\n\n1. Assigns monotonically increasing record ID (`rec_id_++`).\n2. Optionally yields (background mode).\n3. **Blocks** on `seq_cond_.wait` until `id == last_pushed_id_ + 1` (sequential ordering).\n4. **Blocks** on `consumer_->ConsumeData(data, cntx_)` (downstream write).\n5. Updates `last_pushed_id_`, notifies waiters via `seq_cond_.notify_all()`.\n6. Optionally sleeps to throttle CPU (non-background mode, up to 2ms proportional to CPU spent).\n\n#### `FlushSerialized(RdbSerializer* serializer)`\n\nCalls `serializer->Flush(kFlushEndEntry)` to extract and optionally compress the buffer, then\npasses the result to `HandleFlushData`. Uses the main `serializer_` if no argument is given.\n\n#### `PushSerialized(bool force)`\n\nSkips if `!force` and `serializer_->SerializedLen() < kMinBlobSize` (8KB). Otherwise calls\n`FlushSerialized()` to drain the main serializer buffer.\n\n#### `RdbSerializer::PushToConsumerIfNeeded(FlushState flush_state)`\n\n```cpp\nvoid RdbSerializer::PushToConsumerIfNeeded(SerializerBase::FlushState flush_state) {\n  if (consume_fun_ && SerializedLen() > flush_threshold_) {\n    string blob = Flush(flush_state);\n    consume_fun_(std::move(blob));  // synchronous!\n  }\n}\n```\n\nOnly fires when `consume_fun_` is set **and** the buffer exceeds `flush_threshold_`. When it\nfires, it **synchronously** invokes the callback, which for `SliceSnapshot` is `HandleFlushData`.\n\n## All Code Paths That Acquire `big_value_mu_`\n\nCurrently there are **five** call sites in `snapshot.cc` that lock `big_value_mu_`. The diagrams\nbelow show the complete call chain from lock acquisition to potential blocking points.\n\n### Path 1: `BucketSaveCb` (traversal fiber, both modes)\n\n```mermaid\nflowchart LR\n  A[IterateBucketsFb] --> B[\"BucketSaveCb<br/><b>lock big_value_mu_</b><br/>lock GetLatch()\"]\n  B --> C[SerializeBucket]\n  C --> D[SerializeEntry]\n  D --> E[SaveEntry]\n  E -->|\"if buffer > threshold\"| F[\"consume_fun_()<br/>= HandleFlushData\"]\n  F --> G[\"seq_cond_.wait<br/>consumer_->ConsumeData<br/><b>BLOCKS</b>\"]\n\n  classDef lock fill:#FFF3E0,stroke:#EF6C00;\n  classDef block fill:#FFEBEE,stroke:#C62828;\n  class B lock;\n  class G block;\n```\n\n### Path 2: `OnDbChange` (mutation fiber, PIT only)\n\n```mermaid\nflowchart LR\n  A[DB mutation] --> B[\"OnDbChange<br/><b>lock big_value_mu_</b>\"]\n  B -->|PIT| C[SerializeBucket]\n  C --> D[SerializeEntry]\n  D --> E[SaveEntry]\n  E -->|\"if buffer > threshold\"| F[\"consume_fun_()<br/>= HandleFlushData\"]\n  F --> G[\"seq_cond_.wait<br/>consumer_->ConsumeData<br/><b>BLOCKS</b>\"]\n  B -->|non-PIT| H[\"return<br/>(barrier only)\"]\n\n  classDef lock fill:#FFF3E0,stroke:#EF6C00;\n  classDef block fill:#FFEBEE,stroke:#C62828;\n  classDef safe fill:#E8F5E9,stroke:#2E7D32;\n  class B lock;\n  class G block;\n  class H safe;\n```\n\n### Path 3: `OnMoved` (non-PIT only)\n\n```mermaid\nflowchart LR\n  A[DashTable move] --> B[\"OnMoved<br/><b>lock big_value_mu_</b>\"]\n  B -->|\"moved across cursor\"| C[SerializeBucket]\n  C --> D[SerializeEntry]\n  D --> E[SaveEntry]\n  E -->|\"if buffer > threshold\"| F[\"consume_fun_()<br/>= HandleFlushData\"]\n  F --> G[\"seq_cond_.wait<br/>consumer_->ConsumeData<br/><b>BLOCKS</b>\"]\n  B -->|\"same side of cursor\"| H[skip]\n\n  classDef lock fill:#FFF3E0,stroke:#EF6C00;\n  classDef block fill:#FFEBEE,stroke:#C62828;\n  class B lock;\n  class G block;\n```\n\n### Path 4: `ConsumeJournalChange` (journal callback, both modes)\n\n```mermaid\nflowchart LR\n  A[Journal change] --> B[\"ConsumeJournalChange<br/><b>lock big_value_mu_</b>\"]\n  B --> C[\"serializer_->WriteJournalEntry<br/>(buffer append only)\"]\n  C --> D[returns]\n\n  classDef lock fill:#FFF3E0,stroke:#EF6C00;\n  class B lock;\n```\n\nThis path does **not** reach `HandleFlushData`. It only appends to the serializer buffer.\n\n## All Code Paths That Reach `HandleFlushData`\n\n```mermaid\nflowchart TD\n  subgraph HAZARD[\"Under big_value_mu_ (HAZARD)\"]\n    A1[\"OnDbChange — PIT only<br/>lock big_value_mu_\"] --> SB1[\"SerializeBucket → SerializeEntry → SaveEntry\"]\n    A2[\"BucketSaveCb — both modes<br/>lock big_value_mu_ + GetLatch()\"] --> SB2[\"SerializeBucket → SerializeEntry → SaveEntry\"]\n    A3[\"OnMoved — non-PIT only<br/>lock big_value_mu_\"] --> SB3[\"SerializeBucket → SerializeEntry → SaveEntry\"]\n    SB1 --> CF[\"PushToConsumerIfNeeded<br/>consume_fun_()\"]\n    SB2 --> CF\n    SB3 --> CF\n    CF --> HFD1[HandleFlushData]\n  end\n\n  subgraph SAFE[\"Outside big_value_mu_ (SAFE)\"]\n    B1[\"IterateBucketsFb loop<br/>(between buckets)\"] --> PS1[\"PushSerialized(false)\"]\n    B2[\"IterateBucketsFb<br/>(end of database)\"] --> PS2[\"PushSerialized(true)\"]\n    B3[\"IterateBucketsFb<br/>(full sync cut)\"] --> PS3[\"PushSerialized(true)\"]\n    B4[FinalizeJournalStream] --> PS4[\"PushSerialized(true)\"]\n    B5[\"ThrottleIfNeeded<br/>(from JournalSlice)\"] --> PS5[\"PushSerialized(false)\"]\n    PS1 --> FS[FlushSerialized]\n    PS2 --> FS\n    PS3 --> FS\n    PS4 --> FS\n    PS5 --> FS\n    FS --> HFD2[HandleFlushData]\n  end\n\n  HFD1 --> BLOCK[\"seq_cond_.wait<br/>consumer_->ConsumeData<br/>(BLOCKING)\"]\n  HFD2 --> BLOCK\n\n  classDef hazard fill:#FFEBEE,stroke:#C62828,stroke-width:2px,color:#B71C1C;\n  classDef safe fill:#E8F5E9,stroke:#2E7D32,color:#1B5E20;\n  classDef block fill:#FFF3E0,stroke:#EF6C00;\n  class A1,A2,A3,CF,HFD1 hazard;\n  class B1,B2,B3,B4,B5,PS1,PS2,PS3,PS4,PS5,FS,HFD2 safe;\n  class BLOCK block;\n```\n\n## Delayed Serialization of tiered entities\n\nTiered string values are not read synchronously under `big_value_mu_`. Instead,\n`SerializeExternal` pushes a `TieredDelayedEntry` into `delayed_entries_`; the actual read and\nserialization happen later in `PushSerialized()`, outside the bucket-serialization critical\nsection. The current implementation is fragile — delayed entries live in a global side queue\nrather than being associated with their originating bucket, and this can corrupt the output\nstream — a delayed tiered value may be emitted after a journal entry for the same key,\nviolating baseline-before-journal ordering (see PR #6824).\n\nNote: `RestoreStreamer` (used for slot migration) has its own delayed-entry mechanism via\n`CmdSerializer`, which uses a keyed `flat_hash_map` rather than a plain deque. The analysis\nbelow focuses on `SliceSnapshot`; the `RestoreStreamer` path has analogous concerns but a\ndifferent data structure.\n\nThis creates two distinct notions of \"bucket finished\":\n\n1. **Traversal finished** — `SerializeBucket` has iterated every entry and returned.\n2. **Baseline fully emitted** — all delayed tiered entries from that bucket have also been\n   read, serialized, and flushed.\n\nFor in-memory values these coincide; for tiered values they do not.\n\nThe ordering invariant (`baseline(K)` before `journal(K)`) still applies. Because the baseline\nfor a tiered key `K` may only materialize when `PushSerialized()` drains `delayed_entries_`,\na bucket's completion point extends from \"finished iterating\" to \"all delayed values serialized\nand flushed\".\n\n## Locking and Synchronization\n\n### `big_value_mu_` (ThreadLocalMutex)\n\nA `ThreadLocalMutex` (`src/server/synchronization.cc`) serving as the primary synchronization\nbarrier.\n\n**Important:** `ThreadLocalMutex::lock()` and `unlock()` are **no-ops** when\n`serialization_max_chunk_size == 0`. This means `big_value_mu_` only provides actual\nsynchronization when big-value streaming is enabled. When it is disabled, all `lock_guard`\ncalls on this mutex are effectively free, and the system relies on cooperative scheduling\n(no preemption during serialization) for correctness.\n\nIts role differs by mode:\n\n**PIT mode:** Prevents mutations from modifying a bucket while it is being serialized, and\nprevents journal entries from being written during bucket serialization. This enforces both\nserialize-before-mutate and the ordering invariant.\n\n**Non-PIT mode:** Prevents mutations from modifying a bucket while `BucketSaveCb` is serializing\nit (data consistency within a single bucket). Also serves as a barrier for `ConsumeJournalChange`\nand `OnMoved`.\n\n| Path | Mode | Lock held | Additional locks |\n|------|------|-----------|-----------------|\n| `BucketSaveCb` | Both | `big_value_mu_` | `GetLatch()` |\n| `OnDbChange` | Both | `big_value_mu_` | none |\n| `OnMoved` | Non-PIT | `big_value_mu_` | none |\n| `ConsumeJournalChange` | Both | `big_value_mu_` | none |\n\n### `GetLatch()` (LocalLatch)\n\nAcquired by `BucketSaveCb` in addition to `big_value_mu_`. This is a non-preempting latch\n(`src/server/synchronization.h`) that increments a blocking counter, preventing `Heartbeat()`\nfrom running if `SerializeBucket` preempts (e.g., during large value serialization).\n\n### `seq_cond_` (CondVarAny)\n\nCondition variable used in `HandleFlushData` to ensure records are pushed to the consumer\nin sequential order of their `rec_id_`. If fiber A has `id=5` and fiber B has `id=6`, B waits\nuntil A finishes pushing and updates `last_pushed_id_` to 5.\nThis is needed because fibers are awakened in arbitrary order and reordering flushed chunks breaks\nthe wire protocol.\n\n\n## Inefficiencies and Improvement Goals\n\nThis section identifies concrete problems in the current serialization design and the\nimprovements that address them. The [Technical Roadmap](#technical-roadmap) maps these into an ordered execution\nplan.\n\n**Hard constraints** (apply to all improvements):\n- **Backpressure must be maintained.** A slow consumer must slow down the producer; we cannot\n  buffer unboundedly.\n- **Bounded serialization memory.** Intermediate buffers must not grow proportionally to the\n  dataset size.\n\n\n### 1. Shard-wide stall under `big_value_mu_`\n\n**Problem.** `big_value_mu_` is a single shard-wide mutex that guards three distinct concerns simultaneously:\n\n1. **Bucket atomicity** — the bucket must not be mutated while `SerializeBucket` iterates it.\n2. **Serializer buffer exclusivity** — `serializer_` must not be written to by two fibers.\n3. **Journal ordering** — journal entries must not interleave with bucket serialization.\n\nWhen `consume_fun_` fires under the lock (large value → `PushToConsumerIfNeeded` →\n`HandleFlushData`), the mutex is held across blocking I/O (`seq_cond_.wait`,\n`consumer_->ConsumeData`). This stalls the entire shard: traversal, mutations, journal writes,\nand `OnMoved` all contend on the same lock.\n\n**Why the mutex is needed in `ConsumeJournalChange`.**\nTransaction paths are already ordered by `OnDbChange` (it acquires `big_value_mu_` first, so\n`ConsumeJournalChange` on the same fiber cannot start while traversal holds the lock). The\nmutex matters for [paths that bypass `OnDbChange`](#journal-entries-without-ondbchange) —\ninline eviction and heartbeat-driven deletions. Without it, inline eviction could produce:\n\n**Counter-example without the `ConsumeJournalChange` mutex — inline eviction via `PrimeEvictionPolicy::Evict`:**\n1. Traversal calls `SerializeBucket(B)` and begins iterating it; the bucket contains key `K`\n   (a large hash, serialized element-by-element). The traversal preempts mid-entry via\n   `consume_fun_`.\n2. While the traversal is preempted, a client command triggers a DashTable insert on a different\n   bucket. The insert finds no free slot in its home bucket and calls\n   `PrimeEvictionPolicy::Evict`, which selects `K` as the victim.\n3. `Evict` removes `K` from the table and — still on the same fiber, inside the DashTable\n  insert — calls `journal::RecordEntry(DEL K)` directly, bypassing `OnDbChange`.\n4. `ConsumeJournalChange` appends `DEL K` to the shared serializer buffer immediately, even\n  though traversal has already emitted only a prefix of `K`'s baseline.\n5. Traversal resumes and appends the remaining bytes of `K`'s baseline.\n\nResult: the replica's byte stream contains `[partial baseline of K] [DEL K] [rest of baseline\nof K]`. The RDB decoder sees a truncated entry followed by an unexpected journal opcode, or\nparses garbage if the lengths happen to align. Even if the `DEL` is parsed out-of-band, the\nsubsequent baseline bytes reconstruct `K` on the replica, reversing the deletion.\n\n**Goal.** Separate the three concerns so that:\n- bucket atomicity uses bucket-level mechanisms (versioning + bucket completion state);\n- buffer exclusivity uses per-serializer isolation (each producer owns its buffer);\n- journal ordering uses bucket completion state and deferred deletion queues;\n- no code path blocks on downstream I/O while holding a shard-wide lock.\n\n**Approach.** See [§5 summary table](#5-summary-mutex-roles-and-their-replacements) for the\nfull mapping. Key mechanisms: bucket completion state ([§2](#2-imprecise-bucket-completion-tracking)),\nseparate serializer instances ([§3](#3-shared-serializer-buffer-and-wire-format-coupling)),\nand non-preempting chunk production. See Roadmap items 6, 7, 8, 9.\n\n### 2. Imprecise bucket completion tracking\n\n**Problem.** The system has no explicit notion of when a bucket's baseline is *fully emitted*\n(see [Delayed Serialization of tiered entities](#delayed-serialization-of-tiered-entities)\nfor details on how tiered values extend bucket completion beyond `SerializeBucket`'s return).\nThis creates two issues:\n\n- A journal entry for key K can reach the output buffer (via `ConsumeJournalChange`) before\n  K's delayed tiered baseline is drained — violating the\n  [ordering invariant](#ordering-invariant) (see PR #6824).\n- [Non-transaction journal entries](#journal-entries-without-ondbchange) (expiry, eviction)\n  bypass `OnDbChange` entirely. Since there is no bucket completion state to consult, `DEL`\n  entries can interleave mid-serialization of the deleted key's baseline.\n\n**Goal.** Make \"baseline fully emitted\" precise for every bucket — including tiered values —\nso that ordering decisions can be expressed through per-bucket state rather than shard-wide mutex exclusion.\n\n**Approach.**\n- Introduce a per snapshot instance/bucket state machine:\n  `NotVisited` → `Serializing` → `DelayedPending` → `Covered`.\n  Each bucket is identified by a stable `BucketIdentity`. A bucket must remain in the\n  tracking map (`currently_serialized_: map<BucketIdentity, State>`) until all work completes; otherwise `version >= snapshot_version_` + absent-from-map would falsely read as `Covered`.\n  State encoding:\n\n  | State | Encoding | Meaning |\n  |-------|----------|---------|\n  | **NotVisited** | `version < snapshot_version_`, not in map | Traversal has not reached this bucket |\n  | **Serializing** | `version >= snapshot_version_`, in map as `Serializing` | Traversal is iterating this bucket |\n  | **DelayedPending** | `version >= snapshot_version_`, in map as `DelayedPending` | Iteration done, tiered entries still pending |\n  | **Covered** | `version >= snapshot_version_`, not in map | Baseline fully emitted |\n\n- Associate delayed tiered entries with their originating bucket instead of the global queue.\n  Transition to `Covered` only after all delayed entries are flushed.\n- **Transaction-driven mutations:** `OnDbChange` blocks (fiber-aware wait) on\n  `Serializing`/`DelayedPending` buckets; proceeds immediately on `NotVisited` (serialize\n  now) or `Covered` (baseline already emitted). Since `OnDbChange` → mutation →\n  `RecordJournal` → `ConsumeJournalChange` is sequential on the mutation fiber, blocking\n  `OnDbChange` guarantees baseline-before-journal.\n- **Non-transaction deletions (expiry, eviction):** `OnDbChange` is\n  [infeasible on these paths](#journal-entries-without-ondbchange). Instead, use a **deferred\n  deletion queue**: enqueue the key when the bucket is `Serializing`/`DelayedPending`; drain\n  (emit `DEL`) when the bucket transitions to `Covered`. See roadmap item 6 for details.\n- **Latency tradeoff:** blocking `OnDbChange` on `DelayedPending` means a mutation fiber can\n  stall for the duration of a tiered disk read (see roadmap item 6 for mitigation).\n\nSee Roadmap items 3, 5, 6.\n\n### 3. Shared serializer buffer and wire-format coupling\n\n**Problem.** `ConsumeJournalChange` and `SerializeBucket` write to the same `serializer_`\nbuffer (the \"buffer exclusivity\" role from [§1](#1-shard-wide-stall-under-big_value_mu_)).\nEven with separate buffers, interleaved output from two serializers cannot be demuxed by the\nconsumer without a framing protocol — a journal entry injected mid-RDB-entry produces an\nunparseable byte stream (see the [eviction counter-example](#1-shard-wide-stall-under-big_value_mu_)\nfor a concrete scenario).\n\n**Goal.** Decouple journal and bucket serialization so they can produce data independently,\nwithout sharing a buffer or requiring a shard-wide lock for output integrity.\n\n**Approach.**\n- **Tagged-chunk wire format.** Extend the serialization format with tagged chunks: each\n  mid-entry flush produces a chunk tagged with a stream ID. The consumer reassembles same-ID\n  chunks before decoding. Small values (single chunk) use the existing format unchanged —\n  no overhead. Controlled by a master-side flag (`--serialization_tagged_chunks`).\n- **Separate `RdbSerializer` per producer.** Give journal entries and bucket serialization\n  their own serializer instances. Each produces tagged chunks independently. With separate\n  buffers, `ConsumeJournalChange` no longer needs `big_value_mu_` for buffer exclusivity.\n- **Flushing strategy:** small values serialize the entire bucket without preemption; large\n  values release the lock between chunks and apply backpressure outside the critical section.\n  Bucket contents remain stable across the gap because PIT versioning prevents re-serialization and `OnDbChange` blocking (§1) prevents mutation.\n\nSee Roadmap items 4, 7.\n\n### 4. Non-PIT redundant journal traffic\n\n**Problem.** Non-PIT mode (eventual consistency for replication) emits every journal entry regardless of whether the snapshot traversal will cover the mutation. For self-contained entries (`SET`, `DEL`) this is redundant but harmless. For baseline-dependent entries (`HSET`, `LPUSH`, etc.) the system emits both the baseline value and the journal entry for\nevery mutation, even when the traversal has not yet reached the bucket and will serialize the\npost-mutation value.\n\n**Goal.** In non-PIT mode, reduce journal traffic by skipping entries that are guaranteed to\nbe covered by the traversal, without compromising eventual consistency.\n\n**Approach.** Use the bucket completion state machine (§1) to classify mutations:\n\n- **Self-contained entries** (`SET`, `DEL`, `EXPIRE`): skip for `NotVisited` buckets (traversal will see post-mutation value); emit for `Covered` buckets; emit conservatively for\n  `Serializing`/`DelayedPending`. Classification is by **emitted journal command form**, not\n  the user-facing command — commands like `JSON.SET` may be self-contained or not depending\n  on arguments and must be validated individually.\n\n- **Baseline-dependent entries** (`HSET`, `LPUSH`, `SADD`, `ZADD`, `XADD`, `APPEND`, etc.):\n  **SkipBoth** — suppress both baseline serialization and journal entry — when the bucket is\n  `NotVisited`/`Serializing`, the mutation is a single-key in-memory update (no delete, no\n  rehash, no insert), and no delayed tiered entry is in flight. Otherwise fall back to emit\n  journal only or keep both. Each `SliceSnapshot` instance marks suppressed mutations locally;\n  `ConsumeJournalChange` skips them without cross-instance coordination.\n\nSee Roadmap items 10–15.\n\n### 5. Summary: mutex roles and their replacements\n\nThe previous subsections identify `big_value_mu_`'s three roles and the mechanisms that\nreplace each:\n\n| Mutex role | Replacement | Source |\n|-----------|-------------|--------|\n| Journal ordering | Bucket completion state + deferred deletion queue | §1 |\n| Buffer exclusivity | Separate `RdbSerializer` per producer + tagged chunks | §3 |\n| Bucket atomicity (PIT) | Bucket versioning + `OnDbChange` blocking | §1, §2 |\n| Bucket atomicity (non-PIT) | Non-preempting chunk production | §2, §3 |\n\nOnce all replacements are in place and validated, the mutex can be narrowed per mode and path,\nand eventually removed entirely. The roadmap structures this as a sequence of incremental\nsteps (Phases 0–4), each validated before the next begins.\n\n## Technical Roadmap\n\nThe improvements identified above are interdependent. The safest path is to split them into\nsmall, verifiable steps that first improve observability and correctness scaffolding, then\nimprove PIT and PIT+tiered correctness/robustness, and only after that tackle non-PIT\noptimizations and deeper serializer / lock-removal changes. Some of the groundwork —\nespecially bucket-level completion state — is shared and should be laid early even if the\nfirst consumers are PIT-oriented. Because non-PIT is currently experimental and unused, the\nroadmap below does **not** treat current non-PIT behavior as a compatibility constraint. Later\nnon-PIT phases may simplify, replace, or remove experimental behavior rather than preserving it.\n\n### Phase 0 — Baseline and guardrails\n\n1. **Document current invariants in code comments and tests.**\n   - Make the key ordering rules explicit near `SliceSnapshot::OnDbChange`,\n     `SliceSnapshot::ConsumeJournalChange`, `RestoreStreamer::OnDbChange`, and\n     `DbSlice::FlushChangeToEarlierCallbacks`.\n   - Prefer focused replication tests over purely end-to-end hash comparisons. The current\n     broad replication suite is useful, but Phase 0 needs tests that fail specifically when an\n     ordering invariant is broken.\n   - Add focused tests for:\n     - PIT: baseline-before-journal for baseline-dependent mutations.\n     - tiered values: delayed serialization still preserves baseline-before-journal.\n   - Suggested test strategy:\n     - **PIT ordering guardrail:** add a test in `tests/dragonfly/replication_test.py` that\n       starts full sync with `point_in_time_snapshot=true`, performs a small controlled set of\n       baseline-dependent updates during full sync (`HSET`, `LPUSH`, `APPEND`, `XADD`), waits for\n       stable sync, and then asserts exact key/value equality for only those keys. The intent is\n       to make a baseline-before-journal violation fail on a tiny, debuggable workload.\n     - **tiered delayed-entry guardrail:** rehabilitate the currently skipped tiered replication\n       test in `tests/dragonfly/tiering_test.py` and make it assert not just final equivalence,\n       but that concurrent writes to tiered keys during full sync do not lose updates.\n   - Suggested assertions:\n     - assert exact values for a small curated key set, not just whole-dataset hashes;\n     - assert replica reaches stable sync and catches up via `check_all_replicas_finished`;\n     - assert path-activation counters from logs where available (`side_saved`, `moved_saved`);\n     - for tricky cases, prefer deterministic key-level checks over probabilistic stress-only\n       validation.\n   - Suggested scope split:\n     - keep the existing large/stress replication tests as coarse regression coverage;\n     - add a handful of small, deterministic Phase 0 tests whose only purpose is to guard the\n       invariants this roadmap depends on.\n   - Goal: freeze the current correctness contract before changing behavior.\n\n2. **Add lightweight observability for snapshot/journal interleavings.**\n   - Count how often `ConsumeJournalChange` runs while a bucket is being serialized.\n   - Count flushes triggered under `big_value_mu_` versus outside it.\n   - Suggested locations for counters / debug stats:\n     - increment a counter when `ConsumeJournalChange` acquires the barrier while\n       `serialize_bucket_running_` is true;\n     - increment separate counters for `HandleFlushData` reached from under `big_value_mu_`\n       versus from `PushSerialized` outside the critical section;\n   - Suggested exposure:\n     - start with log lines in the existing `Exit SnapshotSerializer` / replication progress logs;\n     - if the signals become broadly useful, promote them to INFO/stats fields later.\n   - Suggested rollout rule:\n     - add observability before optimization, and require each new fast path to demonstrate that\n       the expected path was actually exercised in tests.\n   - Goal: validate which paths are actually hot and which optimizations are worth the risk.\n\n### Phase 1 — PIT and PIT+tiered foundation\n\n3. **Introduce explicit bucket-level completion state.**\n   - **Prerequisites:** Phase 0.1–0.2.\n   - Implement the per-snapshot-instance state machine described in\n     [§1](#1-imprecise-bucket-completion-tracking): `NotVisited` → `Serializing` →\n     `DelayedPending` → `Covered`, keyed by `BucketIdentity`.\n   - Keep this state entirely instance-local to `SliceSnapshot` / `RestoreStreamer`.\n   - Goal: replace vague \"bucket iteration finished\" reasoning with an explicit state machine\n     that will later serve both PIT+tiered correctness and non-PIT decisions.\n\n4. **Extend the wire format with tagged chunks.**\n   - **Prerequisites:** none.\n   - Implements the tagged-chunk format described in\n     [§3](#3-shared-serializer-buffer-and-wire-format-coupling). Entries that may be split\n     across preemption points are wrapped in a per-stream-tag envelope; single-chunk entries\n     use the existing format unchanged (no overhead).\n   - **Wire format:** `RDB_OPCODE_DF_MASK`-style flag bit (`DF_MASK_FLAG_CHUNKED`). When set,\n     payload is `stream_tag: uint32, payload_length: uint32, payload: bytes`. Entries without\n     the flag are unchanged.\n   - **Enablement:** master-side flag (`--serialization_tagged_chunks`), not `DflyVersion`\n     (which doesn't apply to DFS backups). The loader detects tagged chunks by the flag bit\n     and reassembles transparently.\n   - Pure format + loader-side work — no changes to serialization logic or locking. Can be\n     developed independently of Phases 0–1.\n   - **Scope:** replication and DFS backups. Only legacy `.rdb` format does not need tagged\n     chunks (`SnapshotFlush::kDisallow`, no concurrent bucket serialization).\n   - Why early: Phase 2 (item 7) needs separate serializers whose interleaved output requires\n     tagged chunks for demuxing.\n   - Goal: have the wire-format infrastructure ready before Phase 2 needs it.\n\n5. **Associate delayed tiered serialization with bucket state.**\n   - **Prerequisites:** 1.3.\n   - Address the [tiered completion gap](#delayed-serialization-of-tiered-entities): associate\n     `delayed_entries_` with their originating bucket instead of the global queue.\n   - Only transition a bucket to `Covered` once its delayed tiered entries are emitted.\n   - Goal: make \"baseline fully emitted\" precise, not just \"bucket iteration finished\".\n\n6. **Use bucket completion state to harden PIT ordering guarantees.**\n   - **Prerequisites:** 1.3 and 1.5.\n   - Re-express the PIT ordering rule in terms of bucket completion state, not just mutex\n     exclusion and `bucket.version`.\n   - For in-memory values, PIT ordering is already sound by construction (sequential\n     `OnDbChange` → mutation → `ConsumeJournalChange` on the same fiber). The real gap is\n     **tiered delayed entries** (see\n     [Delayed Serialization](#delayed-serialization-of-tiered-entities)): a journal entry\n     can reach the buffer before the delayed baseline is drained.\n   - **`OnDbChange` blocking:** block (fiber-aware wait) when the bucket is `Serializing` or\n     `DelayedPending`; proceed on `NotVisited` (serialize now → `Covered`) or `Covered`\n     (baseline already emitted). Because `OnDbChange` → mutation → `RecordJournal` →\n     `ConsumeJournalChange` is sequential on the mutation fiber, blocking `OnDbChange`\n     guarantees baseline-before-journal for all transaction-driven mutations.\n   - **Deferred deletion queue** for\n     [non-transaction journal paths](#journal-entries-without-ondbchange) (expiry, eviction —\n     where `OnDbChange` is infeasible). When a deletion encounters a bucket in\n     `Serializing`/`DelayedPending`, enqueue the key into a per-bucket\n     `pending_deletions: vector<string>` (bounded by bucket capacity, typically 12–14 slots).\n     The traversal fiber drains the queue — emitting deferred `DEL` entries — when\n     transitioning the bucket to `Covered`. For `NotVisited`/`Covered` buckets, `DEL` is\n     emitted immediately as today. Properties:\n     - no blocking, re-entrancy, or preemption on the deletion fiber;\n     - baseline-before-journal ordering preserved by construction.\n   - After this item, `big_value_mu_` is no longer needed for journal ordering, but is still\n     needed for [buffer exclusivity](#3-shared-serializer-buffer-and-wire-format-coupling)\n     (items 7–8).\n   - **Latency tradeoff:** blocking `OnDbChange` on `DelayedPending` can stall a mutation\n     fiber for the duration of a tiered disk read (`Future<io::Result<string>>`). Acceptable\n     for correctness; monitor and consider `KeepBoth` fallback if latency is excessive.\n   - Use Phase 0 tests to validate PIT+tiered behavior under preemption and backpressure.\n   - Goal: make the existing production path easier to reason about before adding new behavior.\n\n### Phase 2 — Reduce PIT blocking and serializer fragility\n\n7. **Give journal and bucket serialization separate `RdbSerializer` instances.**\n   - **Prerequisites:** 1.4 and 1.6.\n   - NOTE: maybe unnecessary if rely on 1.4.\n   - Addresses the [shared buffer problem](#3-shared-serializer-buffer-and-wire-format-coupling)\n     and the primary [shard-wide stall hazard](#blocking-under-big_value_mu_).\n   - The fix: give journal entries their own `RdbSerializer` instance. Bucket serialization\n     and journal serialization never share a buffer. Each produces tagged chunks (item 4)\n     that the consumer (replica or DFS loader) reassembles by stream tag.\n   - The same separation is needed for **DFS backups** (no journal, but still PIT): once\n     per-bucket locks (item 6) replace the shard-wide `big_value_mu_`, two concurrent\n     `SerializeBucket` calls can run on different buckets (traversal fiber on bucket A\n     preempts mid-entry via `consume_fun_`, `OnDbChange` serializes bucket B). Each call\n     needs its own buffer; tagged chunks allow their interleaved output to be reassembled.\n   - With separate serializers, `big_value_mu_` is no longer needed for buffer exclusivity.\n     `ConsumeJournalChange` writes to its own serializer without acquiring `big_value_mu_`\n     at all (journal ordering is already guaranteed by bucket completion state from item 6).\n   - The flushing strategy depends on value size:\n     - **Small values (typical case):** `consume_fun_` is disabled (or made a no-op) while\n       the lock is held. `SerializeBucket` serializes the entire bucket into the bucket\n       serializer's buffer without preempting — the buffer grows but stays bounded because\n       most buckets contain only small entries. After `SerializeBucket` returns and the lock\n       is released, the accumulated buffer is flushed as a tagged chunk outside the lock.\n     - **Large values (e.g., a 1 GB set):** the existing `kFlushMidEntry` boundaries become\n       lock-release points. After serializing a bounded batch of elements, the lock is\n       released, the accumulated chunk is flushed (with backpressure) outside the lock, and\n       the lock is re-acquired for the next batch. Bucket contents remain stable across the\n       gap because (a) PIT versioning prevents re-serialization and (b) `OnDbChange` blocking\n       (item 6) prevents the mutation from committing. Both are required: (a) alone prevents\n       double-serialization but not mid-value mutation; (b) alone prevents mutation but not\n       concurrent `SerializeBucket` entry.\n   - Goal: eliminate blocking under `big_value_mu_` by removing the shared-buffer reason for\n     holding it, rather than by restructuring the lock/unlock pattern around the same buffer.\n\n8. **Simplify `rec_id_` / `seq_cond_` ordering once tagged-chunk delivery is proven.**\n   - **Prerequisites:** 2.7, 1.4.\n   - With tagged chunks support, we may not need a consistent global order between different\n     fibers. In that case `rec_id_` / `seq_cond_.wait` become redundant.\n   - Remove `rec_id_` / `seq_cond_` only after demonstrating (via tests and observability)\n     that we do not corrupt the replication stream.\n   - Goal: avoid removing an ordering mechanism before its replacement is demonstrably sound.\n\n9. **Narrow `big_value_mu_` for PIT only after the above is proven.**\n   - **Prerequisites:** 2.7–2.8.\n   - Keep serialize-before-mutate semantics intact.\n   - Remove or narrow mutex roles only where bucket state, serializer isolation, and\n     tagged-chunk delivery already provide an equivalent correctness guarantee.\n   - Goal: simplify the active production path incrementally, not speculatively.\n\n### Phase 3 — Bring non-PIT onto the new foundation\n\n10. **Add non-PIT-specific guardrails before changing non-PIT behavior.**\n    - **Prerequisites:** 1.3 and 1.5.\n    - Add focused tests for:\n      - self-contained journal entries produce correct final state when baseline is fully\n        emitted before or after the journal entry (no mid-entry interleaving);\n      - moved items that cross the cursor are not lost;\n      - any first non-PIT bucket-state redesign still converges under concurrent full-sync writes.\n    - Suggested test strategy:\n      - add a dedicated test with `point_in_time_snapshot=false` that mutates only with\n        self-contained emitted commands (`SET`, `DEL`, `BITOP` rewritten to `SET`/`DEL`);\n      - rehabilitate the currently skipped `test_replication_onmove_flow` instead of replacing\n        it; if it is too flaky for CI, first reduce it to a smaller deterministic reproducer that\n        still asserts both replica equality and `moved_saved > 0` from snapshot logs;\n      - add non-PIT-specific observability such as counting how often `OnMoved` actually\n        serializes a bucket and optionally classifying self-contained vs baseline-dependent\n        journal entries by emitted command.\n    - Goal: avoid touching experimental non-PIT behavior without dedicated guardrails.\n\n11. **Stamp bucket version in non-PIT mode behind a feature flag.**\n    - **Prerequisites:** 1.3 and 1.5 and 3.10.\n    - Teach non-PIT `SerializeBucket` to call `SetVersion(snapshot_version_)`.\n    - Since non-PIT is experimental, prefer the simplest implementation that matches the new\n      bucket-state model rather than preserving legacy bookkeeping.\n    - Validate that traversal, `OnMoved`, and any remaining bucket-version assumptions remain\n      correct under the new design.\n    - Goal: align non-PIT with the new foundation, not preserve its old implementation details.\n\n12. **Implement self-contained journal classification in `ConsumeJournalChange`.**\n    - **Prerequisites:** 3.11.\n    - Classify emitted journal commands as self-contained vs baseline-dependent.\n    - Initially use a conservative allowlist (`SET`, `DEL`, rewritten `BITOP`).\n    - Skip `big_value_mu_` only for self-contained entries in non-PIT mode.\n    - Goal: harvest the simplest safe non-PIT redesign win first, on top of the PIT-hardened\n      foundation.\n\n13. **Add instance-local suppression state for `SkipBoth`.**\n    - **Prerequisites:** 1.3 and 1.5 and 3.11.\n    - Let `OnDbChange` record a local suppression decision for mutations whose effects will be\n      covered by future traversal.\n    - Let the same snapshot instance's `ConsumeJournalChange` consult and clear that state.\n    - Do not introduce shard-wide or cross-instance aggregation.\n    - Goal: keep the redesign entirely within the existing per-instance callback pair, without\n      carrying forward unnecessary experimental structure.\n\n14. **Implement `SkipBoth` for the narrowest safe mutation subset.**\n    - **Prerequisites:** 3.13.\n    - Start with single-key, single-bucket, in-memory updates only.\n    - Exclude inserts, deletes, rehash-triggering operations, and tiered cases.\n    - Require bucket state to be `NotVisited` or `Serializing`.\n    - Goal: prove the mechanism on a subset where correctness is easy to reason about.\n\n15. **Expand `SkipBoth` eligibility only after targeted validation.**\n    - **Prerequisites:** 3.14.\n    - Re-evaluate `DelayedPending` once delayed-entry ownership is explicit.\n    - Re-evaluate inserts only if bucket-touch coverage can be proven cheaply.\n    - Re-evaluate tiered keys only if suppression can be tied to delayed-entry completion.\n    - Goal: expand cautiously instead of generalizing the hard cases upfront.\n\n### Phase 4 — Reassess `big_value_mu_` globally\n\n16. **Narrow the lock's role by mode and path.**\n    - **Prerequisites:** 2.9 for PIT changes; 3.12–3.15 for non-PIT changes.\n    - PIT: keep only what is still required for serialize-before-mutate correctness.\n    - non-PIT: remove it from self-contained journal entries first; then reconsider `OnMoved`\n      and traversal interactions once serialization becomes non-preempting.\n    - Goal: shrink the lock surface incrementally instead of attempting full removal at once;\n      for non-PIT there is no obligation to preserve locking structure that exists only because\n      of the experimental implementation.\n\n17. **Attempt full `big_value_mu_` removal only after all prerequisites are in place.**\n    - **Prerequisites:** 4.16.\n    - Preconditions:\n      - non-preempting bounded serialization chunks,\n      - precise bucket coverage state,\n      - delayed tiered ownership tracked to completion,\n      - journal ordering independent of the mutex,\n      - tests covering PIT, non-PIT, `OnMoved`, and tiered cases.\n    - Goal: ensure lock removal is the final simplification step, not the first risky rewrite.\n"
  },
  {
    "path": "docs/thread-per-core.excalidraw",
    "content": "{\n  \"type\": \"excalidraw\",\n  \"version\": 2,\n  \"source\": \"https://excalidraw.com\",\n  \"elements\": [\n    {\n      \"type\": \"text\",\n      \"version\": 158,\n      \"versionNonce\": 1897755639,\n      \"isDeleted\": false,\n      \"id\": \"N2nJ6OaFNRqcFW23SO0u2\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 714.625,\n      \"y\": 507.5390625000001,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fab005\",\n      \"width\": 90,\n      \"height\": 20,\n      \"seed\": 1339600844,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1658676475959,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 1,\n      \"text\": \"I/O thread\",\n      \"baseline\": 14,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": null,\n      \"originalText\": \"I/O thread\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 212,\n      \"versionNonce\": 1838113753,\n      \"isDeleted\": false,\n      \"id\": \"pZs66qxoJlWQcWuBsvAxk\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 829.125,\n      \"y\": 509.4140625000001,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 90,\n      \"height\": 20,\n      \"seed\": 1172993740,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1658676475959,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 1,\n      \"text\": \"I/O thread\",\n      \"baseline\": 14,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": null,\n      \"originalText\": \"I/O thread\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 223,\n      \"versionNonce\": 1421110391,\n      \"isDeleted\": false,\n      \"id\": \"qhrDskacRkr-tNl2Q3atR\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 948.6875,\n      \"y\": 508.02455357142867,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 90,\n      \"height\": 20,\n      \"seed\": 1936794996,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1658676504307,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 1,\n      \"text\": \"I/O thread\",\n      \"baseline\": 14,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": null,\n      \"originalText\": \"I/O thread\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 344,\n      \"versionNonce\": 1641244985,\n      \"isDeleted\": false,\n      \"id\": \"jPwIU_a9_nxvuDFAcbzxM\",\n      \"fillStyle\": \"cross-hatch\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"dotted\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 712.375,\n      \"y\": 537.2500000000001,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 431,\n      \"height\": 30,\n      \"seed\": 1029717964,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"type\": \"text\",\n          \"id\": \"U2-I9a2X4amHnB7NZFWGv\"\n        }\n      ],\n      \"updated\": 1658676541606,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 239,\n      \"versionNonce\": 1717412567,\n      \"isDeleted\": false,\n      \"id\": \"U2-I9a2X4amHnB7NZFWGv\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 717.375,\n      \"y\": 542.2500000000001,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"transparent\",\n      \"width\": 421,\n      \"height\": 20,\n      \"seed\": 1592449524,\n      \"groupIds\": [],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1658676541606,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 1,\n      \"text\": \"message bus\",\n      \"baseline\": 14,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"jPwIU_a9_nxvuDFAcbzxM\",\n      \"originalText\": \"message bus\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 315,\n      \"versionNonce\": 208875257,\n      \"isDeleted\": false,\n      \"id\": \"mBFE2wiT175ZxMSdmWcvQ\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 712.375,\n      \"y\": 305.7916666666667,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fab005\",\n      \"width\": 77,\n      \"height\": 192,\n      \"seed\": 352036980,\n      \"groupIds\": [\n        \"DYa5vdmfX68EvWPAq2Beo\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"type\": \"text\",\n          \"id\": \"tK1EcrkpG35slJ07z1dTT\"\n        }\n      ],\n      \"updated\": 1658676546251,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 194,\n      \"versionNonce\": 181803287,\n      \"isDeleted\": false,\n      \"id\": \"tK1EcrkpG35slJ07z1dTT\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 717.375,\n      \"y\": 376.7916666666667,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fab005\",\n      \"width\": 67,\n      \"height\": 50,\n      \"seed\": 1251432308,\n      \"groupIds\": [\n        \"DYa5vdmfX68EvWPAq2Beo\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1658676546251,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"thread\\n1\",\n      \"baseline\": 43,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"mBFE2wiT175ZxMSdmWcvQ\",\n      \"originalText\": \"thread\\n1\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 430,\n      \"versionNonce\": 1426120247,\n      \"isDeleted\": false,\n      \"id\": \"BY5OdEEKT0Y_DTy9Zgr9C\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 833.375,\n      \"y\": 306.4166666666667,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 77,\n      \"height\": 192,\n      \"seed\": 1621471436,\n      \"groupIds\": [\n        \"DYa5vdmfX68EvWPAq2Beo\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"sIrssFTnnb9f1o26g1j88\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"sIrssFTnnb9f1o26g1j88\"\n        }\n      ],\n      \"updated\": 1658676546251,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 310,\n      \"versionNonce\": 514622649,\n      \"isDeleted\": false,\n      \"id\": \"sIrssFTnnb9f1o26g1j88\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 838.375,\n      \"y\": 377.4166666666667,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 67,\n      \"height\": 50,\n      \"seed\": 711168500,\n      \"groupIds\": [\n        \"DYa5vdmfX68EvWPAq2Beo\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1658676546251,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"thread\\n2\",\n      \"baseline\": 43,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"BY5OdEEKT0Y_DTy9Zgr9C\",\n      \"originalText\": \"thread\\n2\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 76,\n      \"versionNonce\": 1406533463,\n      \"isDeleted\": false,\n      \"id\": \"45U617mr0L9ob4mc7Xozt\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 845.375,\n      \"y\": 260.0865384615385,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fab005\",\n      \"width\": 53,\n      \"height\": 40,\n      \"seed\": 1285924468,\n      \"groupIds\": [\n        \"DYa5vdmfX68EvWPAq2Beo\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1658676546251,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 1,\n      \"text\": \"shard\\nthread\",\n      \"baseline\": 34,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": null,\n      \"originalText\": \"shard\\nthread\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 85,\n      \"versionNonce\": 2081260953,\n      \"isDeleted\": false,\n      \"id\": \"vY-LnNlhD3qWMEtRPoU0t\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 964.9375,\n      \"y\": 260.0865384615385,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fab005\",\n      \"width\": 53,\n      \"height\": 40,\n      \"seed\": 817296972,\n      \"groupIds\": [\n        \"DYa5vdmfX68EvWPAq2Beo\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1658676546251,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 1,\n      \"text\": \"shard\\nthread\",\n      \"baseline\": 34,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": null,\n      \"originalText\": \"shard\\nthread\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 458,\n      \"versionNonce\": 190540409,\n      \"isDeleted\": false,\n      \"id\": \"xvkm28eoejETjF3M78jpN\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1062.125,\n      \"y\": 310.875,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fa5252\",\n      \"width\": 77,\n      \"height\": 187,\n      \"seed\": 1482008524,\n      \"groupIds\": [\n        \"DYa5vdmfX68EvWPAq2Beo\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"nSQOBHdmN0bLo5OeoOD0P\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"nSQOBHdmN0bLo5OeoOD0P\"\n        }\n      ],\n      \"updated\": 1658676546251,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 337,\n      \"versionNonce\": 2051102103,\n      \"isDeleted\": false,\n      \"id\": \"nSQOBHdmN0bLo5OeoOD0P\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1067.125,\n      \"y\": 379.375,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fab005\",\n      \"width\": 67,\n      \"height\": 50,\n      \"seed\": 1058179828,\n      \"groupIds\": [\n        \"DYa5vdmfX68EvWPAq2Beo\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1658676546251,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"thread\\n4\",\n      \"baseline\": 43,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"xvkm28eoejETjF3M78jpN\",\n      \"originalText\": \"thread\\n4\"\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 156,\n      \"versionNonce\": 1163506521,\n      \"isDeleted\": false,\n      \"id\": \"H72xWL9unzb1mQiLvx7L4\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 1074.125,\n      \"y\": 265.7115384615385,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fab005\",\n      \"width\": 53,\n      \"height\": 40,\n      \"seed\": 1704611020,\n      \"groupIds\": [\n        \"DYa5vdmfX68EvWPAq2Beo\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1658676546251,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 16,\n      \"fontFamily\": 1,\n      \"text\": \"shard\\nthread\",\n      \"baseline\": 34,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"top\",\n      \"containerId\": null,\n      \"originalText\": \"shard\\nthread\"\n    },\n    {\n      \"type\": \"rectangle\",\n      \"version\": 510,\n      \"versionNonce\": 1046208569,\n      \"isDeleted\": false,\n      \"id\": \"jj-MVcNrzcH0DbFFo9noF\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 952.9375,\n      \"y\": 310.1666666666667,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 77,\n      \"height\": 193,\n      \"seed\": 1374694167,\n      \"groupIds\": [\n        \"DYa5vdmfX68EvWPAq2Beo\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [\n        {\n          \"id\": \"NxhycN5eOsL0I52k0H-lh\",\n          \"type\": \"text\"\n        },\n        {\n          \"id\": \"NxhycN5eOsL0I52k0H-lh\",\n          \"type\": \"text\"\n        },\n        {\n          \"type\": \"text\",\n          \"id\": \"NxhycN5eOsL0I52k0H-lh\"\n        }\n      ],\n      \"updated\": 1658676546251,\n      \"link\": null,\n      \"locked\": false\n    },\n    {\n      \"type\": \"text\",\n      \"version\": 391,\n      \"versionNonce\": 1308367831,\n      \"isDeleted\": false,\n      \"id\": \"NxhycN5eOsL0I52k0H-lh\",\n      \"fillStyle\": \"hachure\",\n      \"strokeWidth\": 1,\n      \"strokeStyle\": \"solid\",\n      \"roughness\": 1,\n      \"opacity\": 100,\n      \"angle\": 0,\n      \"x\": 957.9375,\n      \"y\": 381.6666666666667,\n      \"strokeColor\": \"#000000\",\n      \"backgroundColor\": \"#fd7e14\",\n      \"width\": 67,\n      \"height\": 50,\n      \"seed\": 617412057,\n      \"groupIds\": [\n        \"DYa5vdmfX68EvWPAq2Beo\"\n      ],\n      \"strokeSharpness\": \"sharp\",\n      \"boundElements\": [],\n      \"updated\": 1658676546251,\n      \"link\": null,\n      \"locked\": false,\n      \"fontSize\": 20,\n      \"fontFamily\": 1,\n      \"text\": \"thread\\n3\",\n      \"baseline\": 43,\n      \"textAlign\": \"center\",\n      \"verticalAlign\": \"middle\",\n      \"containerId\": \"jj-MVcNrzcH0DbFFo9noF\",\n      \"originalText\": \"thread\\n3\"\n    }\n  ],\n  \"appState\": {\n    \"gridSize\": null,\n    \"viewBackgroundColor\": \"#ffffff\"\n  },\n  \"files\": {}\n}\n"
  },
  {
    "path": "docs/transaction.md",
    "content": "# Life of a transaction\n\nThis document describes how Dragonfly transactions provide atomicity and serializability for its multi-key and multi-command operations.\n\n## Definitions\n\n### Serializability\n\nSerializability is an isolation level for database transactions. Serializability describes multiple transactions, where a transaction is usually composed of multiple operations on multiple objects.\n\nDatabase can executed transactions in parallel (and the operations in parallel). Serializability guarantees the result is the same with, as if the transactions were executed one by one. i.e. to behave like executed in a serial order.\n\nSerializability doesn’t guarantee the resulting serial order respects recency. I.e. the serial order can be different from the order in which transactions were actually executed. E.g. Tx1 begins earlier than Tx2, but the result behaves as if Tx2 executed before Tx1. That is also to say, to satisfy the same Serializability, there can be more than one possible execution schedulings.\n\n### Strict Serializability\n\nStrict serializability means that operations appear to have occurred in some order, consistent with the real-time ordering of those operations; e.g. if operation A completes before operation B begins, then A should appear to precede B in the serialization order.\n\nStrict serializability implies atomicity meaning, a transaction’s sub-operations do not appear to interleave with sub-operations from other transactions. It also implies serializability\nby definition (appear in some order...).\n\nNote that simple, single-key operations in Dragonfly are already strictly serializable because in a shared-nothing architecture each shard-thread performs operations on its keys sequentially.\nThe complexity rises when we need to provide strict-serializability (aka serializability and linearizability) for operations spawning multiple keys.\n\n## Transactions high level overview\nTransactions in Dragonfly are orchestrated by an abstract entity, called coordination layer.\nIn reality, a client connection instance takes on itself the role of a coordinator: it coordinates a transaction every time it drives a redis or memcached command to completion. The algorithm behind Dragonfly transactions is based on the [VLL paper](https://www.cs.umd.edu/~abadi/papers/vldbj-vll.pdf).\n\nEvery step within a coordinator is done sequentially. Therefore, it's easier to describe the flow using a sequence diagram. Below is a sequence diagram of a generic transaction consisting of multiple execution steps. In this diagram, the operation it executes touches keys in two different shards: `Shard1` and `Shard2`.\n\n```mermaid\n%%{init: {'theme':'base'}}%%\nsequenceDiagram\n    participant C as Coordinator\n    participant S1 as Data Shard 1\n    participant S2 as Data Shard 2\n\n    par hop1\n    C->>+S1: Schedule\n    and\n    C->>+S2: Schedule\n    S1--)C: Ack\n    S2--)C: Ack\n    end\n\n    par hop2\n    C->>S1: Exec1\n    and\n    C->>S2: Exec1\n    S1--)C: Ack\n    S2--)C: Ack\n    end\n    par hop N+1\n    C->>S1: Exec N+Fin\n    and\n    C->>S2: Exec N+Fin\n    S1--)-C: Ack\n    S2--)-C: Ack\n    end\n```\n\nThe shared-nothing architecture of Dragonfly does not allow accessing each shard data directly from a coordinator fiber. Instead, the coordinator sends messages to the shards and instructs them what to do at each step. Every time, the coordinator sends a message, it blocks until it gets an answer. We call such interaction a *message hop* or a *hop* in short.\n\nThe flow consists of two different phases: *scheduling* a transaction, and *executing* it. The execution phase may consist of one or more hops, depending on the complexity of the operation we model.\n\n*Note, that only the coordinator fiber is blocked. Its thread can still execute other fibers - like processing requests on other connections or handling operations for the shard it owns. This is the advantage of adopting fibers - they allow us to separate the execution context from OS threads.*\n\n## Scheduling a transaction\n\nThe transaction initiates with a scheduling hop, during which the coordinator sends to each shard the keys that shards handle. The coordinator sends messages to multiple shards asynchronously but it waits until all shards ack and confirm that the scheduling succeeded before it proceeds to the next steps.\n\nWhen the scheduling message is processed by a data shard, it adds the transaction to its local transaction queue (tx-queue). In order to provide serializability, i.e. to make sure that all shards order their scheduled transactions in the same order, Dragonfly maintains a global sequence counter that is used to induce a total order for all its transactions.\n\nThis global counter is shared by all coordinator entities and is represented by an atomic integer. *This counter may be a source of contention - it breaks the shared nothing model, after all. However, in practice, we have not observed a significant impact on Dragonfly performance due to other optimizations we added. These will be detailed in the [Optimization](#optimizations) section below.\n\nTransactions in tx-queue in each shard are arranged by their sequence counter.\n\nAs shown in the snippet below, a shard thread may receive transactions in a different sequence, so a transaction with a smaller id can be added to the tx-queue after a transaction with a larger id. If the scheduling algorithm running on the data shard, can not reorder the last added transaction, it fails the scheduling request. In that case, the coordinator reverts the scheduling operation by removing the tx from the shards, and retries the whole hop again by allocating a new sequence number. In reality the fail-rate of a scheduling attempt is low and the retries are rare (subject to contention on the keys). Note, inconsistent reordering happens when two coordinators try to schedule multi-shard transactions concurrently:\n\n```\nC1: enqueue msg to Shard1 to schedule T1\nC2: enqueue msg to Shard1 to schedule T2  # enqueued earlier than C1\n\nC1: enqueue msg to Shard2 to schedule T1\nC2: enqueue msg to Shard2 to schedule T2 # enqueued later than C1\n\nshard1: pull T2, add it to TxQueue, pull T1, add it to TxQueue\nshard2: pull T1, add it to TxQueue, pull T2, add it to TxQueue\n\nTxQueue1: T2, T1  # wrong order\nTxQueue2: T1, T2\n```\n\n\nOnce the transaction is added to the tx-queue, the shard also marks the tx-keys using the *intent* locks. Those locks do not block the flow of the underlying operation but merely express the intent to touch or modify the key. In reality, they are represented by a map: `lock:str->counter`. If `lock[key] == 2` it means the tx-queue has 2 pending transactions that plan to modify `key`. These intent locks are used for optimizations detailed below and are not required to implement the naive version of VLL algorithm.\n\nOnce the scheduling hops converges, it means that the transaction entered the execution phase, in which it never rollbacks, or retries. Once it's been scheduled, VLL guarantees the progress of subsequent execution operations while providing strict-serializability guarantees.\n\nIt's important to note that a scheduled transaction does not hold exclusivity on its keys. There could be other transactions that still mutate the keys it touches - these transactions were scheduled earlier and have not finished running yet, or even have not even started running.\n\n## Executing a transaction\n\nOnce the transaction is scheduled, the coordinator starts sending the execution messages. We break each command to one or more micro-ops and each operation corresponds to a single message hop.\n\nFor example, \"MSET\" corresponds to a single micro-op \"mset\" that has the same semantics, but runs in parallel on all the involved shards.\n\nHowever, \"RENAME\" requires two micro-ops: fetching the data from two keys, and then the second hop - deleting/writing a key (depending whether the key is a source or a destination).\n\nOnce a coordinator sends the micro-op request to all the shards, it waits for an answer. Only when all shards executed the micro-op and return the result, the coordinator is unblocked and it can proceed to the next hop. The coordinator is allowed to process the intermediary responses from the previous hops in order to define the next execution request.\n\nWhen a coordinator sends an execution request to data shards, it also specifies whether\nthis execution is the last hop for that command. This is necessary, so that shards could do clean-up operations when running the last execution request: unlocking the keys and removing the transaction from the tx-queue.\n\nThe shards always execute transactions at the head of the tx-queue. When the last execution hop for that transaction is executed the transaction is removed from the queue and the next one can be executed. This way we maintain the ordering guarantees specified by the scheduling order of the transactions and we maintain\nthe serializability of operations across multiple shards.\n\n## Multi-op transactions (Redis transactions)\n\nRedis transactions (MULTI/EXEC sequences) and commands produced by Lua scripts are modelled as consecutive commands within a Dragonfly transaction. In order to avoid ambiguity with terms, we call a Redis transaction - a multi-transaction in Dragonfly.\n\nThe multi feature of the transactional framework allows running consecutive commands without rescheduling the transaction for each command as if they are part of one single transaction. This feature is transparent to the commands itself, so no changes are required for them to be used in a multi-transaction.\n\nThere are three modes called \"multi modes\" in which a multi transaction can be executed, each with its own benefits and drawbacks.\n\n__1. Global mode__\n\nThe transaction is equivalent to a global transaction with multiple hops. It is scheduled globally and the commands are executed as a series of consequitive hops. This mode is required for global commands (like MOVE) and for accessing undeclared keys in Lua scripts. Otherwise, it should be avoided, because it prevents Dragonfly from running concurrently and thus greatly decreases throughput.\n\n__2. Lock ahead mode__\n\nThe transaction is equivalent to a regular transaction with multiple hops. It is scheduled on all keys used by the commands in the transaction block, or Lua script, and the commands are executed as a series of consecutive hops.\n\n__3. Non atomic mode__\n\nAll commands are executed as separate transactions making the multi-transaction not atomic. It vastly improves the throughput with contended keys, as locks are acquired only for single commands. This mode is useful for Lua scripts without atomicity requirements.\n\n## Multi-op command squashing\n\nThere are two fundamental problems to executing a series of consecutive commands on Dragonfly:\n* each command invocation requires an expensive hop\n* executing commands sequentially makes no use of our multi-threaded architecture\n\nLuckily we can make one important observation about command sequences. Given a sequence of commands _where each command needs to access only a single shard_, we can conclude that as long as they are part of one atomic transaction:\n* each command needs to preserve its order only relative to other commands accessing the same shard\n* commands accessing different shards can run in parallel\n\nThe basic idea behind command squashing is identifying consecutive series of single-shard commands and separating them by shards, while maintaing their relative order withing each shard. Once the commands are separated, we can execute a single hop on all relevant shards. Within each shard the hop callback will execute one by one only those commands, that assigned to its respective shard. Because all commands are already placed on their relevant threads, no further hops are required and all command callbacks are executed inline.\n\nReviewing our initial problems, command squashing:\n* Allows executing many commands with only one hop\n* Allows executing commands in pararllel\n\n## Optimizations\nOut of order transactions - TBD\n\n## Blocking commands (BLPOP)\n\nRedis has a rich api with around 200 commands. Few of those commands provide blocking semantics, which allow using Redis as publisher/subscriber broker.\n\nRedis (when running as a single node) is famously single threaded, and all its operations are strictly serializable. In order to build a multi-threaded memory store with the equivalent semantics as Redis, we had to design an algorithm that can parallelize potentially blocking operations and still provide strict serializability guarantees. This section focuses mainly on how to solve this challenge for BLPOP (BRPOP) command since it involves coordinating multiple keys and is considered the more complicated case. Other blocking commands can benefit from the same principles.\n\n\n### BLPOP spec\n\nBLPOP key1 key2 key3 0\n\n*BLPOP is a blocking list pop primitive. It is the blocking version of LPOP because it blocks the client connection when there are no elements to pop from any of the given lists. An element is popped from the head of the first list that is non-empty, with the given keys being checked in the order that they are given.*\n\n### Non-blocking behavior of BLPOP\nWhen BLPOP is called, if at least one of the specified keys contains a non-empty list, an element is popped from the head of the list and returned to the caller together with the key it was popped from. Keys are checked in the order that they are given. Let's say that the key1 doesn't exist and key2 and key3 hold non-empty lists. Therefore, in the example above, BLPOP returns the element from list2.\n\n### Blocking behavior\nIf none of the specified keys exist, BLPOP blocks the connection until another client performs a LPUSH or RPUSH operation against one of the keys. Once new data is present on one of the lists, the client returns with the name of the key unblocking it and the popped value.\n\n### Ordering semantics\nIf a client tries to wait on multiple keys, but at least one key contains elements, the returned key / element pair is the first key from left to right that has one or more elements. In this case the client will not be blocked. So for instance, BLPOP key1 key2 key3 key4 0, assuming that both key2 and key4 are non-empty, will always return an element from key2.\n\nIf multiple clients are blocked for the same key, the first client to be served is the one that was waiting longer (the first that was blocked for the key). Once a client is unblocked it does not retain any priority, when it blocks again with the next call to BLPOP, it will be served according to the queue order of clients already waiting for the same key.\n\nWhen a client is blocking on multiple keys at the same time, and elements are becoming available at the same time in multiple keys (because of a transaction), the client will be unblocked with the first key on the left that received data via push operation (assuming it has enough elements to serve our client, as there could be earlier clients waiting for this key as well).\n\n### BLPOP and transactions\nIf multiple elements are pushed either via a transaction or via variadic arguments of LPUSH command then BLPOP is waked after that transaction or command completely finished. Specifically, when a client performs\n`LPUSH listkey a b c`, then `BLPOP listkey 0` will pop `c`, because `lpush` pushes first `a`, then `b` and then `c` which will be the first one on the left.\n\nIf a client executes a transaction that first pushes into a list and then pops from it atomically, then another client blocked on `BLPOP` won’t pop anything, because it waits for the transaction to finish. When BLPOP itself is run in a transaction its blocking behavior is disabled and it returns the “timed-out” response if there is no element to pop.\n\n### Complexity of implementing BLPOP in Dragonfly\nThe ordering semantics of BLPOP assume total order of the underlying operations. BLPOP must “observe” multiple keys simultaneously in order to determine which one is non-empty in left-to-right order. If there are no keys with items, BLPOP blocks, waits, and “observes” which key is being filled first.\n\nFor the single-threaded Redis the order is determined by following the natural execution of operations inside the main execution thread.  However, for a multi-threaded, shared-nothing execution, there is no concept of total order or a global synchronized timeline. For non-blockign scenario, \"observing\" keys is atomic because we lock the keys when executing a command in Dragonfly.\n\nHowever with blocking scenario for BLPOP, we do not have a built-in mechanism to determine which key was filled earlier - since, as stated, the concept of total order does not exist for multiple shards.\n\n### Interesing examples to consider:\n\n**Ex1:**\n```\nclient1: blpop X, Y  // blocks\nclient2: lpush X A\nclient3: exist X Y\n```\n\nClient3 should always return 0.\n\n**Ex2:**\n\n```\nclient1: BLPOP X Y Z\nclient2: RPUSH X A\nclient3: RPUSH X B;  RPUSH Y B\n```\n\n**Ex3:**\n\n```\nclient1: BLPOP X Y Z\nclient2: RPUSH Z C\nclient3: RPUSH X A\nclient4: RPUSH X B; RPUSH Y B\n```\n\n### BLPOP Ramblings\nThere are two cases of how a key can appear and wake a blocking `BLPOP`:\n\na. with lpush/rpush/rename commands.\nb. via multi-transaction.\n\n`(a)` is actually easy to reason about, because those commands operate on a single key and single key operations are strictly serializable in shared-nothing architecture.\n\nWith `(b)` we need to consider the case where we have \"BLPOP X Y 0\" and then a multi-transaction fills both `y` and `x` using multiple \"lpush\" commands. Luckily, a multi-transaction in Dragonfly introduces a global barrier across all its shards, and it does not allow any other transactions to run as long as it does not finish. So the blocking \"blpop\" won't be awaken until the multi-transaction finishes its run. By that time the state of the keys will be well defined and \"blpop\" will be able to choose the first non empty key to pop from.\n\n\n## Background reading:\n\n### Strict Serializability\nHere is a [very nice diagram](https://jepsen.io/consistency) showing how various consistency models relate.\n\nSingle node Redis is strictly serializable because all its operation are executed sequentially\nand atomically in a single thread.\n\nMore formally: following the definition from https://jepsen.io/consistency/models/strict-serializable - due to the single threaded design of Redis, its transactions are executed in a global order, which is consistent with the main thread clock, hence it’s strictly serializable.\n\nSerializability is a global property that given a transaction log, there is an order with which transactions are consistent (the log order is not relevant).\n\nExample of serializable but not linearizable transaction: https://gist.github.com/pbailis/8279494\n\nMore material to read:\n* [Fauna Serializability vs Linearizability](https://fauna.com/blog/serializability-vs-strict-serializability-the-dirty-secret-of-database-isolation-levels)\n* [Jepsen consistency diagrams](https://jepsen.io/consistency)\n* [Strict Serializability definition](https://jepsen.io/consistency/models/strict-serializable)\n* [Example of serializable but not linearizable schedule](https://gist.github.com/pbailis/8279494)\n* [Atomic clocks and distributed databases](https://www.cockroachlabs.com/blog/living-without-atomic-clocks/)\n* [Another cockroach article about consistency](https://www.cockroachlabs.com/blog/consistency-model/)\n* [Abadi blog](http://dbmsmusings.blogspot.com/)\n* [Peter Beilis blog](http://www.bailis.org/blog) (both wrote lots of material on the subject)\n"
  },
  {
    "path": "fuzz/FUZZING.md",
    "content": "# AFL++ Fuzzing for Dragonfly\n\n## Install AFL++\n\nAFL++ must be built from source with `AFL_PERSISTENT_RECORD` enabled for crash replay.\n\n```bash\nsudo apt update\nsudo apt install llvm-18-dev clang-18 lld-18 gcc-13-plugin-dev\n\ngit clone --depth=1 --branch v4.34c https://github.com/AFLplusplus/AFLplusplus.git\ncd AFLplusplus\n\n# Enable AFL_PERSISTENT_RECORD (required for stateful crash replay)\nsed -i 's|// #define AFL_PERSISTENT_RECORD|#define AFL_PERSISTENT_RECORD|' include/config.h\n\nmake distrib\nsudo make install\n```\n\n## Prepare System\n\n```bash\nsudo afl-system-config\n```\n\n`run_fuzzer.sh` also runs these checks automatically (core_pattern, CPU governor).\n\n## Build Dragonfly\n\n```bash\ncmake -B build-dbg -DUSE_AFL=ON -DCMAKE_BUILD_TYPE=Debug -GNinja\nninja -C build-dbg dragonfly\n```\n\n## Run Fuzzer\n\n```bash\ncd fuzz\n./run_fuzzer.sh              # RESP protocol (default)\n./run_fuzzer.sh memcache     # Memcache text protocol\n```\n\nConfiguration via environment variables:\n\n| Variable | Default | Description |\n|----------|---------|-------------|\n| `AFL_PROACTOR_THREADS` | `1` | Server threads (1 = most stable coverage) |\n| `AFL_LOOP_LIMIT` | `10000` | Iterations before server restart (= `AFL_PERSISTENT_RECORD`) |\n| `BUILD_DIR` | `build-dbg` | Path to build directory |\n\n## Custom Mutators\n\nEach target has a custom AFL++ mutator that operates at the protocol level.\nInstead of flipping random bytes (which mostly breaks protocol framing and\ngets rejected by the parser), they:\n\n- Parse input into a list of commands\n- Mutate at the command/argument level (replace command, change argument,\n  insert/remove commands, swap order)\n- Serialize back to valid protocol format\n\n| Target | Mutator | Details |\n|--------|---------|---------|\n| `resp` | `resp_mutator.py` | 150+ Redis commands, wraps in MULTI/EXEC |\n| `memcache` | `memcache_mutator.py` | Store/get/meta commands, noreply toggle |\n\nMutators are loaded automatically by `run_fuzzer.sh`. AFL++'s built-in\nbyte-level mutations also run alongside them (useful for parser edge cases).\n\nTo use only the custom mutator: `export AFL_CUSTOM_MUTATOR_ONLY=1`.\n\n## Crash Replay\n\nDragonfly uses AFL++ persistent mode — the server accumulates state across\niterations. A crash at iteration N depends on state built by inputs 1..N-1.\n\n`run_fuzzer.sh` syncs `AFL_PERSISTENT_RECORD` with `afl_loop_limit`\nso the full state history is always available on crash.\n\nWhen a crash occurs, AFL++ saves:\n```\ncrashes/id:000000,sig:06,...           # the crashing input\ncrashes/RECORD:000000,cnt:000000      # first input after server start\ncrashes/RECORD:000000,cnt:000001      # second input\n...\ncrashes/RECORD:000000,cnt:NNNNNN      # input before the crash\n```\n\n### Replay (RESP)\n\n```bash\n./build/dragonfly --port 6379 --logtostderr --proactor_threads 1 --dbfilename=\"\"\n\npython3 fuzz/replay_crash.py fuzz/artifacts/resp/default/crashes 000000\n```\n\n### Replay (memcache)\n\n```bash\n./build/dragonfly --port 6379 --memcached_port=11211 --logtostderr --proactor_threads 1 --dbfilename=\"\"\n\npython3 fuzz/replay_crash.py fuzz/artifacts/memcache/default/crashes 000000 127.0.0.1 11211\n```\n\n### Package crash for sharing\n\n```bash\ncd fuzz\n# RESP\n./package_crash.sh 000000\n# Memcache\n./package_crash.sh 000000 fuzz/artifacts/memcache/default/crashes\n```\n\nCreates `crash-000000.tar.gz` containing crash data and `replay_crash.py`.\nThe recipient runs:\n\n```bash\n# RESP\n./build/dragonfly --port 6379 --logtostderr --proactor_threads 1 --dbfilename=\"\"\npython3 replay_crash.py crashes 000000\n\n# Memcache\n./build/dragonfly --port 6379 --memcached_port=11211 --logtostderr --proactor_threads 1 --dbfilename=\"\"\npython3 replay_crash.py crashes 000000 127.0.0.1 11211\n```\n\n## Seed Corpus\n\n| Target | Directory | Seeds | Coverage |\n|--------|-----------|-------|----------|\n| `resp` | `seeds/resp/` | 79 | string, list, hash, set, zset, stream, JSON, search, bloom, geo, HLL, bitops, scripting, ACL, pub/sub, transactions, server ops |\n| `memcache` | `seeds/memcache/` | 15 | set/get, add/replace, append/prepend, cas, incr/decr, delete, multiget, gat, noreply, meta commands, flush, stats |\n\nTo add a new RESP seed:\n```\n*3\n$3\nSET\n$3\nkey\n$5\nvalue\n```\n\nTo add a new memcache seed:\n```\nset mykey 0 0 5\nhello\nget mykey\n```\n"
  },
  {
    "path": "fuzz/dict/memcache.dict",
    "content": "# Memcache text protocol dictionary for AFL++\n\n# Store commands\n\"set\"\n\"add\"\n\"replace\"\n\"append\"\n\"prepend\"\n\"cas\"\n\n# Retrieval commands\n\"get\"\n\"gets\"\n\"gat\"\n\"gats\"\n\n# Utility commands\n\"delete\"\n\"incr\"\n\"decr\"\n\"flush_all\"\n\"stats\"\n\"version\"\n\"quit\"\n\n# Meta commands\n\"ms\"\n\"mg\"\n\"md\"\n\"ma\"\n\"mn\"\n\"me\"\n\n# Flags/options\n\"noreply\"\n\n# Common keys\n\"key\"\n\"mykey\"\n\"k1\"\n\"k2\"\n\"k3\"\n\"counter\"\n\n# Numbers\n\"0\"\n\"1\"\n\"5\"\n\"10\"\n\"100\"\n\"1000\"\n\"65535\"\n\"4294967295\"\n\"99999999999\"\n\n# Expiry values\n\"0\"\n\"30\"\n\"3600\"\n\"9999999\"\n\n# Line endings\n\"\\x0d\\x0a\"\n\n# Partial commands for edge cases\n\"set \"\n\"get \"\n\"delete \"\n\"incr \"\n\"decr \"\n\"cas \"\n\"gat \"\n\n# Malformed patterns\n\"\\x0d\"\n\"\\x0a\"\n\"\\x00\"\n\"\\xff\"\n\" \"\n\"  \"\n\"\"\n"
  },
  {
    "path": "fuzz/dict/resp.dict",
    "content": "# AFL++ dictionary for RESP protocol\n# Dragonfly command keywords and common patterns\n\n# RESP protocol markers\n\"*\"\n\"$\"\n\"+\"\n\"-\"\n\":\"\n\"\\x0d\\x0a\"\n\n# Common commands - String operations\n\"GET\"\n\"SET\"\n\"MGET\"\n\"MSET\"\n\"INCR\"\n\"DECR\"\n\"APPEND\"\n\"STRLEN\"\n\"SETEX\"\n\"SETNX\"\n\"GETSET\"\n\"GETRANGE\"\n\"SETRANGE\"\n\n# List operations\n\"LPUSH\"\n\"RPUSH\"\n\"LPOP\"\n\"RPOP\"\n\"LLEN\"\n\"LRANGE\"\n\"LINDEX\"\n\"LSET\"\n\"LTRIM\"\n\n# Hash operations\n\"HSET\"\n\"HGET\"\n\"HMSET\"\n\"HMGET\"\n\"HGETALL\"\n\"HDEL\"\n\"HEXISTS\"\n\"HLEN\"\n\"HKEYS\"\n\"HVALS\"\n\"HINCRBY\"\n\n# Set operations\n\"SADD\"\n\"SREM\"\n\"SMEMBERS\"\n\"SISMEMBER\"\n\"SCARD\"\n\"SINTER\"\n\"SUNION\"\n\"SDIFF\"\n\"SPOP\"\n\n# Sorted set operations\n\"ZADD\"\n\"ZREM\"\n\"ZRANGE\"\n\"ZRANGEBYSCORE\"\n\"ZRANK\"\n\"ZSCORE\"\n\"ZCARD\"\n\"ZCOUNT\"\n\"ZINCRBY\"\n\n# Key operations\n\"DEL\"\n\"EXISTS\"\n\"EXPIRE\"\n\"TTL\"\n\"PERSIST\"\n\"KEYS\"\n\"SCAN\"\n\"TYPE\"\n\"RENAME\"\n\"RENAMENX\"\n\n# Transaction commands\n\"MULTI\"\n\"EXEC\"\n\"DISCARD\"\n\"WATCH\"\n\"UNWATCH\"\n\n# Pub/Sub commands\n\"PUBLISH\"\n\"SUBSCRIBE\"\n\"UNSUBSCRIBE\"\n\"PSUBSCRIBE\"\n\"PUNSUBSCRIBE\"\n\n# Stream commands\n\"XADD\"\n\"XREAD\"\n\"XRANGE\"\n\"XLEN\"\n\"XDEL\"\n\"XTRIM\"\n\"XGROUP\"\n\"XREADGROUP\"\n\n# JSON commands\n\"JSON.SET\"\n\"JSON.GET\"\n\"JSON.DEL\"\n\"JSON.TYPE\"\n\"JSON.NUMINCRBY\"\n\"JSON.ARRAPPEND\"\n\"JSON.ARRLEN\"\n\n# Bloom filter commands\n\"BF.ADD\"\n\"BF.EXISTS\"\n\"BF.RESERVE\"\n\"BF.MADD\"\n\"BF.MEXISTS\"\n\n# HyperLogLog commands\n\"PFADD\"\n\"PFCOUNT\"\n\"PFMERGE\"\n\n# Geo commands\n\"GEOADD\"\n\"GEODIST\"\n\"GEORADIUS\"\n\"GEOSEARCH\"\n\n# Server commands\n\"PING\"\n\"ECHO\"\n\"INFO\"\n\"DBSIZE\"\n\"SELECT\"\n\n# Cluster commands\n\"CLUSTER\"\n\"READONLY\"\n\"READWRITE\"\n\n# Common keys for testing\n\"key\"\n\"mykey\"\n\"key1\"\n\"key2\"\n\"test\"\n\"foo\"\n\"bar\"\n\"user:1\"\n\"session:123\"\n\n# Common values\n\"value\"\n\"hello\"\n\"world\"\n\"123\"\n\"0\"\n\"1\"\n\"-1\"\n\n# Number patterns (0, 1, -1 already above)\n\"100\"\n\"1000\"\n\"-100\"\n\n# Special arguments\n\"NX\"\n\"XX\"\n\"EX\"\n\"PX\"\n\"GT\"\n\"LT\"\n\"WITHSCORES\"\n\"LIMIT\"\n\"COUNT\"\n\"MATCH\"\n\n# Small RESP framing patterns (larger patterns removed — AFL++ warned about >33B tokens)\n\"*1\\x0d\\x0a$\"\n\"*2\\x0d\\x0a$\"\n\"*3\\x0d\\x0a$\"\n\n# Scripting commands\n\"EVAL\"\n\"EVALSHA\"\n\"EVAL_RO\"\n\"EVALSHA_RO\"\n\"SCRIPT\"\n\n# Bitfield commands\n\"BITFIELD\"\n\"BITFIELD_RO\"\n\"BITOP\"\n\"BITCOUNT\"\n\"BITPOS\"\n\"GETBIT\"\n\"SETBIT\"\n\n# More sorted set operations\n\"ZINTER\"\n\"ZUNION\"\n\"ZINTERSTORE\"\n\"ZUNIONSTORE\"\n\"ZPOPMIN\"\n\"ZPOPMAX\"\n\"ZMPOP\"\n\n# Edge case numbers\n\"9223372036854775807\"\n\"-9223372036854775808\"\n\"2147483647\"\n\"-2147483648\"\n\"0.0\"\n\"-0.0\"\n\"inf\"\n\"-inf\"\n\"+inf\"\n\"nan\"\n\n# Stream IDs and patterns\n\"0-0\"\n\"0-*\"\n\"$\"\n\">\"\n\"*\"\n\"MAXLEN\"\n\"MINID\"\n\n# JSON paths\n\"$..\"\n\"$[*]\"\n\"$[-1]\"\n\"$.name\"\n\"$..name\"\n\n# RESP protocol edge cases\n\"*-1\\x0d\\x0a\"\n\"$-1\\x0d\\x0a\"\n\"*0\\x0d\\x0a\"\n\"$0\\x0d\\x0a\\x0d\\x0a\"\n\n# Lua scripting patterns\n\"return redis.call\"\n\"redis.pcall\"\n\"KEYS[1]\"\n\"ARGV[1]\"\n\n# Bitfield subcommands\n\"OVERFLOW\"\n\"WRAP\"\n\"SAT\"\n\"FAIL\"\n\n# Aggregate options\n\"AGGREGATE\"\n\"SUM\"\n\"MIN\"\n\"MAX\"\n\"WEIGHTS\"\n\n# Binary edge cases\n\"\\x00\"\n\"\\xff\"\n\"\\x00\\x00\\x00\\x00\"\n\n# --- Additional commands for broader coverage ---\n\n# Missing key operations\n\"COPY\"\n\"SORT\"\n\"SORT_RO\"\n\"UNLINK\"\n\"TOUCH\"\n\"OBJECT\"\n\"RANDOMKEY\"\n\"DUMP\"\n\"RESTORE\"\n\"WAIT\"\n\"EXPIREAT\"\n\"PEXPIRE\"\n\"PEXPIREAT\"\n\"PEXPIRETIME\"\n\"EXPIRETIME\"\n\"PTTL\"\n\n# String commands\n\"GETDEL\"\n\"GETEX\"\n\"INCRBYFLOAT\"\n\"DECRBY\"\n\"INCRBY\"\n\"MSETNX\"\n\"PSETEX\"\n\"SUBSTR\"\n\n# List commands\n\"LPOS\"\n\"LMPOP\"\n\"LMOVE\"\n\"BLMOVE\"\n\"BLMPOP\"\n\"BLPOP\"\n\"BRPOP\"\n\"LPUSHX\"\n\"RPUSHX\"\n\"RPOPLPUSH\"\n\n# Set commands\n\"SRANDMEMBER\"\n\"SMOVE\"\n\"SMISMEMBER\"\n\"SINTERCARD\"\n\"SDIFFSTORE\"\n\"SINTERSTORE\"\n\"SUNIONSTORE\"\n\n# Sorted set commands\n\"ZDIFF\"\n\"ZDIFFSTORE\"\n\"ZLEXCOUNT\"\n\"ZRANGEBYLEX\"\n\"ZRANGESTORE\"\n\"ZRANDMEMBER\"\n\"ZREVRANGE\"\n\"ZREVRANGEBYLEX\"\n\"ZREVRANGEBYSCORE\"\n\"ZREVRANK\"\n\"ZMSCORE\"\n\"ZREMRANGEBYLEX\"\n\"ZREMRANGEBYRANK\"\n\"ZREMRANGEBYSCORE\"\n\"BZMPOP\"\n\"BZPOPMIN\"\n\"BZPOPMAX\"\n\n# Hash commands\n\"HRANDFIELD\"\n\"HSCAN\"\n\"HSETEX\"\n\"HSETNX\"\n\"HSTRLEN\"\n\"HINCRBYFLOAT\"\n\"HEXPIRE\"\n\n# Server/client commands\n\"CLIENT\"\n\"CONFIG\"\n\"MEMORY\"\n\"ACL\"\n\"HELLO\"\n\"COMMAND\"\n\"LATENCY\"\n\"SLOWLOG\"\n\"BGSAVE\"\n\"LASTSAVE\"\n\"ROLE\"\n\n# Subcommands\n\"OBJECT ENCODING\"\n\"OBJECT HELP\"\n\"OBJECT FREQ\"\n\"OBJECT IDLETIME\"\n\"CLIENT SETNAME\"\n\"CLIENT GETNAME\"\n\"CLIENT LIST\"\n\"CLIENT ID\"\n\"CLIENT INFO\"\n\"CONFIG GET\"\n\"CONFIG SET\"\n\"MEMORY USAGE\"\n\"MEMORY DOCTOR\"\n\"ACL LIST\"\n\"ACL WHOAMI\"\n\"ACL SETUSER\"\n\"COMMAND COUNT\"\n\"COMMAND INFO\"\n\n# Scan operations (HSCAN already above)\n\"SSCAN\"\n\"ZSCAN\"\n\n# Function/script commands\n\"FUNCTION\"\n\"FUNCTION LOAD\"\n\"FUNCTION LIST\"\n\"FUNCTION DELETE\"\n\n# More JSON commands\n\"JSON.ARRINSERT\"\n\"JSON.ARRTRIM\"\n\"JSON.ARRPOP\"\n\"JSON.ARRINDEX\"\n\"JSON.OBJKEYS\"\n\"JSON.OBJLEN\"\n\"JSON.STRAPPEND\"\n\"JSON.STRLEN\"\n\"JSON.TOGGLE\"\n\"JSON.CLEAR\"\n\"JSON.MERGE\"\n\"JSON.MGET\"\n\"JSON.MSET\"\n\"JSON.DEBUG\"\n\"JSON.RESP\"\n\n# More Geo commands\n\"GEOPOS\"\n\"GEOHASH\"\n\"GEOSEARCHSTORE\"\n\"GEORADIUSBYMEMBER\"\n\n# Search commands\n\"FT.CREATE\"\n\"FT.SEARCH\"\n\"FT.DROPINDEX\"\n\"FT.INFO\"\n\"FT.ALTER\"\n\n# Additional arguments\n\"REPLACE\"\n\"ABSTTL\"\n\"IDLETIME\"\n\"FREQ\"\n\"LEFT\"\n\"RIGHT\"\n\"BEFORE\"\n\"AFTER\"\n\"BY\"\n\"ASC\"\n\"DESC\"\n\"ALPHA\"\n\"STORE\"\n\"REV\"\n\"BYSCORE\"\n\"BYLEX\"\n\"CH\"\n\"KEEPTTL\"\n\"EXAT\"\n\"PXAT\"\n\"ENCODING\"\n\"REFCOUNT\"\n\n# Malformed RESP for edge-case testing\n\"*-2\\x0d\\x0a\"\n\"*999999\\x0d\\x0a\"\n\"$-2\\x0d\\x0a\"\n\"$999999999\\x0d\\x0a\"\n\"*\\x0d\\x0a\"\n\"$\\x0d\\x0a\"\n\"+\\x0d\\x0a\"\n\"-\\x0d\\x0a\"\n\":\\x0d\\x0a\"\n\n# Inline commands (no RESP framing)\n\"PING\\x0d\\x0a\"\n\"PING\\x0a\"\n\"SET key value\\x0d\\x0a\"\n\"GET key\\x0a\"\n\"QUIT\\x0d\\x0a\"\n\n# More binary patterns\n\"\\xfe\\xff\\x00\\x01\"\n\"\\x0d\\x0a\\x0d\\x0a\"\n\"\\x0d\\x0d\\x0a\\x0a\"\n\"\\x00\\x01\\x02\\x03\"\n\n# RESP edge cases (small fragments only)\n\"$0\\x0d\\x0a\\x0d\\x0a\"\n\"$-1\\x0d\\x0a\"\n"
  },
  {
    "path": "fuzz/generate_targeted_seeds.py",
    "content": "#!/usr/bin/env python3\n\"\"\"Generate PR-targeted fuzzing inputs from a code diff using an LLM.\n\nFuzzing terminology used in this file:\n  - Seed:  An initial input file for the fuzzer. Each seed is a sequence of\n           commands encoded in RESP wire format (see fuzz/seeds/resp/*.resp for examples).\n           The fuzzer starts from these seeds and mutates them to explore code paths.\n  - Targeted seed:  A seed crafted specifically to exercise code paths changed in a PR.\n           We send the PR diff + all existing seeds to an LLM, and it generates new seeds\n           that target the changed code.\n  - Focus commands:  A list of command names (e.g. [\"SET\", \"GET\"]) that the\n           AFL++ mutator should prefer. When set, the mutator picks these commands ~70%\n           of the time instead of choosing uniformly from all known commands.\n\nFlow:\n  1. Read unified diff from stdin, extract changed C++ file paths.\n  2. Load all existing seed files so the LLM knows what's already covered.\n  3. Call Claude API: send the diff + seeds, get back JSON with command arrays + focus commands.\n  4. Encode commands as RESP wire format, write to output dir.\n\nThe LLM returns commands as plain arrays (e.g. [\"SET\", \"key\", \"value\"]) and we handle\nRESP encoding ourselves — this avoids JSON escaping issues and byte-count mismatches.\n\nWhen ANTHROPIC_API_KEY is not available (e.g. fork PRs), exits with no output and\nthe fuzzer runs with the existing seed corpus as-is.\n\nUsage:\n    git diff base..HEAD | python3 fuzz/generate_targeted_seeds.py --output-dir /tmp/seeds\n\"\"\"\n\nimport argparse\nimport glob\nimport json\nimport os\nimport re\nimport sys\n\n# Max diff lines to send to the LLM (Haiku handles ~200K tokens, so this is generous)\nMAX_DIFF_LINES = 20000\n\nLLM_SYSTEM_PROMPT = \"\"\"\\\nYou are a fuzzing expert for Dragonfly, a Redis-compatible in-memory database written in C++.\n\nYour job: given a code diff and existing seed files, generate NEW fuzzing seeds that \\\ntarget the changed code paths. You also return a list of Redis commands to focus on.\n\n## Dragonfly architecture (for context)\n- src/server/*_family.cc — command implementations (e.g. string_family.cc has GET/SET/INCR)\n- src/server/main_service.cc — command dispatch, MULTI/EXEC\n- src/server/db_slice.cc — per-shard key-value storage\n- src/facade/redis_parser.cc — RESP protocol parsing\n- src/facade/dragonfly_connection.cc — connection handling\n- src/core/ — data structures (dash table, dense_set, compact_object, etc.)\n- src/server/journal/ — replication journal\n- src/server/cluster/ — cluster mode\n- src/server/search/ — search module (FT.* commands)\n- src/server/tiering/ — SSD tiering\n\n## What to generate\nBased on the diff, figure out:\n1. What commands are affected (new, modified, or impacted by infrastructure changes)\n2. What edge cases the changes introduce (boundary values, empty inputs, error paths)\n3. What command sequences would stress the changed code\n\n## Output format\nReturn valid JSON (no markdown, no explanation):\n{\n  \"focus_commands\": [\"CMD1\", \"CMD2\", ...],\n  \"seeds\": [\n    {\n      \"name\": \"pr_something.resp\",\n      \"commands\": [\n        [\"SET\", \"mykey\", \"myvalue\"],\n        [\"GET\", \"mykey\"]\n      ]\n    }\n  ]\n}\n\nEach \"commands\" entry is a list of Redis commands. Each command is a list of strings \\\n(command name + arguments). We handle RESP wire encoding — just give plain strings.\n\nCRITICAL: Output must be valid JSON. Do NOT use code expressions like \"x\" * 1024 or \\\nstring concatenation. For long values write actual repeated characters inline, e.g. \\\n\"xxxxxxxxxx\" (just the literal string). Keep values short (under 100 chars) — \\\nthe fuzzer will mutate and grow them.\n\nRules for seeds:\n- 3-10 commands per seed, forming a logical sequence\n- Include setup commands before queries (e.g. SET before GET)\n- Test edge cases from the diff: boundary values, empty/huge inputs, type mismatches\n- Include at least one seed wrapping commands in MULTI/EXEC\n- Generate 3-8 seeds total\n- Prefix all names with \"pr_\"\n\"\"\"\n\n\ndef extract_changed_files(diff_text):\n    \"\"\"Extract C++/header file paths from a unified diff.\"\"\"\n    files = []\n    for match in re.finditer(r\"^diff --git a/(.+?) b/(.+?)$\", diff_text, re.MULTILINE):\n        path = match.group(2)\n        if re.search(r\"\\.(cc|h)$\", path):\n            files.append(path)\n    return sorted(set(files))\n\n\ndef load_example_seeds(seeds_dir):\n    \"\"\"Load ALL existing seed files to show the LLM what's already covered.\n\n    We send every seed so the LLM has full context about existing coverage\n    and can generate complementary seeds for new/changed code paths.\n    \"\"\"\n    examples = []\n    for path in sorted(glob.glob(os.path.join(seeds_dir, \"*.resp\"))):\n        name = os.path.basename(path)\n        with open(path) as f:\n            examples.append({\"name\": name, \"content\": f.read()})\n    return examples\n\n\ndef truncate_diff(diff_text, max_lines=MAX_DIFF_LINES):\n    \"\"\"Truncate diff to max_lines.\"\"\"\n    lines = diff_text.splitlines(True)\n    if len(lines) <= max_lines:\n        return diff_text, len(lines)\n    return \"\".join(lines[:max_lines]), max_lines\n\n\ndef encode_resp(commands):\n    \"\"\"Encode a list of commands as RESP wire format.\n\n    Each command is a list of string arguments, e.g. [\"SET\", \"key\", \"value\"].\n    Returns bytes in RESP format: *N\\\\r\\\\n$len\\\\r\\\\narg\\\\r\\\\n...\n    \"\"\"\n    result = bytearray()\n    for cmd in commands:\n        if not cmd:\n            continue\n        result.extend(b\"*%d\\r\\n\" % len(cmd))\n        for arg in cmd:\n            arg_bytes = arg.encode() if isinstance(arg, str) else arg\n            result.extend(b\"$%d\\r\\n%s\\r\\n\" % (len(arg_bytes), arg_bytes))\n    return bytes(result)\n\n\ndef call_llm(diff_text, changed_files, example_seeds, api_key, model):\n    \"\"\"Call Claude API to generate targeted seeds from the diff.\"\"\"\n    try:\n        import anthropic\n    except ImportError:\n        print(\"anthropic package not available\", file=sys.stderr)\n        return None\n\n    truncated, num_lines = truncate_diff(diff_text)\n\n    # Build examples section — show existing seeds so the LLM knows what's covered\n    examples_text = \"\"\n    for ex in example_seeds:\n        examples_text += \"--- %s ---\\n%s\\n\\n\" % (ex[\"name\"], ex[\"content\"].rstrip())\n\n    prompt = (\n        \"Here are ALL existing seed files (RESP wire format) so you know what's already covered:\\n\\n\"\n        \"%s\\n\"\n        \"Now analyze this diff and generate targeted fuzzing seeds.\\n\\n\"\n        \"Changed files: %s\\n\\n\"\n        \"Diff (%d lines):\\n```\\n%s\\n```\\n\\n\"\n        \"Respond with valid JSON only.\"\n    ) % (examples_text, \", \".join(changed_files), num_lines, truncated)\n\n    client = anthropic.Anthropic(api_key=api_key)\n    response = client.messages.create(\n        model=model,\n        max_tokens=16384,\n        system=LLM_SYSTEM_PROMPT,\n        messages=[{\"role\": \"user\", \"content\": prompt}],\n    )\n\n    text = response.content[0].text.strip()\n\n    # Try to extract JSON from the response (LLMs sometimes wrap in markdown)\n    json_match = re.search(r\"```(?:json)?\\s*\\n(.*?)\\n```\", text, re.DOTALL)\n    if json_match:\n        text = json_match.group(1)\n\n    try:\n        return json.loads(text)\n    except json.JSONDecodeError:\n        pass\n\n    # Try to find the outermost { ... } and parse that\n    brace_match = re.search(r\"\\{.*\\}\", text, re.DOTALL)\n    if brace_match:\n        try:\n            return json.loads(brace_match.group(0))\n        except json.JSONDecodeError:\n            pass\n\n    # Log raw response for debugging and raise\n    print(\"Raw LLM response (first 2000 chars):\\n%s\" % text[:2000], file=sys.stderr)\n    raise ValueError(\"Could not parse LLM response as JSON\")\n\n\ndef write_output(output_dir, focus_commands, seeds):\n    \"\"\"Write seed files and focus_commands.json to output directory.\"\"\"\n    os.makedirs(output_dir, exist_ok=True)\n\n    focus_path = os.path.join(output_dir, \"focus_commands.json\")\n    with open(focus_path, \"w\") as f:\n        json.dump(focus_commands, f)\n    print(\"Wrote %d focus commands to %s\" % (len(focus_commands), focus_path), file=sys.stderr)\n\n    written = 0\n    for seed in seeds:\n        name = seed.get(\"name\") or \"pr_seed_%d.resp\" % written\n        if not name.endswith(\".resp\"):\n            name += \".resp\"\n        path = os.path.join(output_dir, name)\n        with open(path, \"wb\") as f:\n            f.write(seed[\"content\"])\n        written += 1\n\n    print(\"Wrote %d seed files to %s\" % (written, output_dir), file=sys.stderr)\n\n\ndef main():\n    parser = argparse.ArgumentParser(description=\"Generate targeted fuzzing seeds from a PR diff\")\n    parser.add_argument(\n        \"--output-dir\",\n        default=\"fuzz/seeds/pr_targeted\",\n        help=\"Directory to write seeds and focus_commands.json\",\n    )\n    parser.add_argument(\n        \"--seeds-dir\",\n        default=None,\n        help=\"Directory with existing seed files (auto-detected if not set)\",\n    )\n    parser.add_argument(\n        \"--api-key\", default=None, help=\"Anthropic API key (or set ANTHROPIC_API_KEY env var)\"\n    )\n    parser.add_argument(\"--model\", default=\"claude-haiku-4-5-20251001\", help=\"Claude model to use\")\n    args = parser.parse_args()\n\n    api_key = args.api_key or os.environ.get(\"ANTHROPIC_API_KEY\")\n    if not api_key:\n        print(\"No ANTHROPIC_API_KEY set, skipping seed generation\", file=sys.stderr)\n        return\n\n    diff_text = sys.stdin.read()\n    if not diff_text.strip():\n        print(\"No diff provided, skipping\", file=sys.stderr)\n        return\n\n    changed_files = extract_changed_files(diff_text)\n    if not changed_files:\n        print(\"No C++ files in diff, skipping\", file=sys.stderr)\n        return\n\n    print(\"Changed C++ files: %s\" % \", \".join(changed_files), file=sys.stderr)\n\n    # Find seeds directory\n    seeds_dir = args.seeds_dir\n    if not seeds_dir:\n        script_dir = os.path.dirname(os.path.abspath(__file__))\n        seeds_dir = os.path.join(script_dir, \"seeds\", \"resp\")\n\n    example_seeds = load_example_seeds(seeds_dir)\n    print(\"Loaded %d existing seeds\" % len(example_seeds), file=sys.stderr)\n\n    try:\n        result = call_llm(diff_text, changed_files, example_seeds, api_key, args.model)\n    except Exception as e:\n        print(\"LLM call failed: %s\" % e, file=sys.stderr)\n        return\n\n    if not result:\n        return\n\n    # Extract focus commands\n    focus_commands = result.get(\"focus_commands\", [])\n    if not isinstance(focus_commands, list):\n        focus_commands = []\n\n    # Encode command arrays as RESP and collect valid seeds\n    valid_seeds = []\n    for s in result.get(\"seeds\", []):\n        if not isinstance(s, dict) or \"commands\" not in s:\n            continue\n        commands = s[\"commands\"]\n        if not isinstance(commands, list) or not commands:\n            continue\n        # Filter out non-list entries and ensure all args are strings\n        clean_commands = []\n        for cmd in commands:\n            if isinstance(cmd, list) and cmd:\n                clean_commands.append([str(arg) for arg in cmd])\n        if not clean_commands:\n            continue\n        content = encode_resp(clean_commands)\n        if content:\n            valid_seeds.append({\"name\": s.get(\"name\") or \"\", \"content\": content})\n        else:\n            print(\"Discarding empty seed: %s\" % s.get(\"name\", \"?\"), file=sys.stderr)\n\n    if not valid_seeds and not focus_commands:\n        print(\"LLM returned no usable output\", file=sys.stderr)\n        return\n\n    print(\n        \"Generated %d seeds, %d focus commands\" % (len(valid_seeds), len(focus_commands)),\n        file=sys.stderr,\n    )\n    write_output(args.output_dir, focus_commands, valid_seeds)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "fuzz/memcache_mutator.py",
    "content": "\"\"\"AFL++ custom mutator for memcache text protocol.\n\nMutates at the command level instead of random bytes,\nkeeping memcache protocol framing valid.\n\nUsage:\n    export PYTHONPATH=/path/to/dragonfly/fuzz\n    export AFL_PYTHON_MODULE=memcache_mutator\n    afl-fuzz ...\n\"\"\"\n\nimport random\n\n# fmt: off\n# (command, type, min_extra_args, max_extra_args)\n# type: \"store\" = key flags exptime bytes [noreply]\\r\\ndata\\r\\n\n#       \"cas\"   = key flags exptime bytes cas_unique [noreply]\\r\\ndata\\r\\n\n#       \"get\"   = key [key ...]\\r\\n\n#       \"gat\"   = exptime key [key ...]\\r\\n\n#       \"delta\" = key delta [noreply]\\r\\n\n#       \"del\"   = key [noreply]\\r\\n\n#       \"bare\"  = \\r\\n (no args)\n#       \"meta_store\" = key datalen [flags...]\\r\\ndata\\r\\n\n#       \"meta\"  = key [flags...]\\r\\n\n\nCOMMANDS = [\n    # Store commands\n    (\"set\",     \"store\"),\n    (\"add\",     \"store\"),\n    (\"replace\", \"store\"),\n    (\"append\",  \"store\"),\n    (\"prepend\", \"store\"),\n    (\"cas\",     \"cas\"),\n    # Retrieval\n    (\"get\",     \"get\"),\n    (\"gets\",    \"get\"),\n    (\"gat\",     \"gat\"),\n    (\"gats\",    \"gat\"),\n    # Delete / arithmetic\n    (\"delete\",  \"del\"),\n    (\"incr\",    \"delta\"),\n    (\"decr\",    \"delta\"),\n    # Utility\n    (\"flush_all\", \"bare\"),\n    (\"stats\",     \"bare\"),\n    (\"version\",   \"bare\"),\n    (\"quit\",      \"bare\"),\n    # Meta commands\n    (\"ms\",      \"meta_store\"),\n    (\"mg\",      \"meta\"),\n    (\"md\",      \"meta\"),\n    (\"ma\",      \"meta\"),\n    (\"mn\",      \"bare\"),\n    (\"me\",      \"meta\"),\n]\n# fmt: on\n\nKEYS = [b\"k\", b\"key\", b\"k1\", b\"k2\", b\"k3\", b\"mykey\", b\"counter\", b\"buf\"]\nVALUES = [b\"abc\", b\"hello\", b\"x\", b\"\", b\"0\", b\"12345\", b\"\\x00\\xff\", b\"a\" * 100]\nEXPIRY = [b\"0\", b\"10\", b\"100\", b\"3600\", b\"9999999\"]\nFLAGS = [b\"0\", b\"1\", b\"255\", b\"65535\", b\"4294967295\"]\nDELTAS = [b\"1\", b\"5\", b\"10\", b\"100\", b\"0\", b\"99999999999\"]\nMETA_FLAGS = [b\"T30\", b\"N10\", b\"R\", b\"v\", b\"h\", b\"l\", b\"t\", b\"c\", b\"f1\", b\"q\", b\"k\"]\nFUZZ_VALUES = [b\"\\x00\", b\"\\xff\" * 4, b\"\\r\\n\", b\"A\" * 256, b\"-1\", b\"NaN\"]\n\n\ndef init(seed):\n    random.seed(seed)\n\n\ndef _random_key():\n    if random.random() < 0.8:\n        return random.choice(KEYS)\n    return random.choice(FUZZ_VALUES)\n\n\ndef _random_value():\n    if random.random() < 0.7:\n        return random.choice(VALUES)\n    return random.choice(FUZZ_VALUES)\n\n\ndef _random_command():\n    \"\"\"Generate a single random memcache command.\"\"\"\n    cmd_name, cmd_type = random.choice(COMMANDS)\n    cmd = cmd_name.encode() if isinstance(cmd_name, str) else cmd_name\n\n    if cmd_type == \"store\":\n        key = _random_key()\n        flags = random.choice(FLAGS)\n        expiry = random.choice(EXPIRY)\n        value = _random_value()\n        noreply = b\" noreply\" if random.random() < 0.3 else b\"\"\n        return (\n            cmd\n            + b\" \"\n            + key\n            + b\" \"\n            + flags\n            + b\" \"\n            + expiry\n            + b\" \"\n            + str(len(value)).encode()\n            + noreply\n            + b\"\\r\\n\"\n            + value\n            + b\"\\r\\n\"\n        )\n\n    elif cmd_type == \"cas\":\n        key = _random_key()\n        flags = random.choice(FLAGS)\n        expiry = random.choice(EXPIRY)\n        value = _random_value()\n        cas_id = str(random.randint(0, 99999)).encode()\n        noreply = b\" noreply\" if random.random() < 0.3 else b\"\"\n        return (\n            cmd\n            + b\" \"\n            + key\n            + b\" \"\n            + flags\n            + b\" \"\n            + expiry\n            + b\" \"\n            + str(len(value)).encode()\n            + b\" \"\n            + cas_id\n            + noreply\n            + b\"\\r\\n\"\n            + value\n            + b\"\\r\\n\"\n        )\n\n    elif cmd_type == \"get\":\n        nkeys = random.randint(1, 4)\n        keys = b\" \".join(_random_key() for _ in range(nkeys))\n        return cmd + b\" \" + keys + b\"\\r\\n\"\n\n    elif cmd_type == \"gat\":\n        expiry = random.choice(EXPIRY)\n        nkeys = random.randint(1, 3)\n        keys = b\" \".join(_random_key() for _ in range(nkeys))\n        return cmd + b\" \" + expiry + b\" \" + keys + b\"\\r\\n\"\n\n    elif cmd_type == \"delta\":\n        key = _random_key()\n        delta = random.choice(DELTAS)\n        noreply = b\" noreply\" if random.random() < 0.3 else b\"\"\n        return cmd + b\" \" + key + b\" \" + delta + noreply + b\"\\r\\n\"\n\n    elif cmd_type == \"del\":\n        key = _random_key()\n        noreply = b\" noreply\" if random.random() < 0.3 else b\"\"\n        return cmd + b\" \" + key + noreply + b\"\\r\\n\"\n\n    elif cmd_type == \"meta_store\":\n        key = _random_key()\n        value = _random_value()\n        meta_flags = b\" \".join(random.sample(META_FLAGS, random.randint(0, 3)))\n        extra = (b\" \" + meta_flags) if meta_flags else b\"\"\n        return (\n            cmd + b\" \" + key + b\" \" + str(len(value)).encode() + extra + b\"\\r\\n\" + value + b\"\\r\\n\"\n        )\n\n    elif cmd_type == \"meta\":\n        key = _random_key()\n        meta_flags = b\" \".join(random.sample(META_FLAGS, random.randint(0, 3)))\n        extra = (b\" \" + meta_flags) if meta_flags else b\"\"\n        return cmd + b\" \" + key + extra + b\"\\r\\n\"\n\n    else:  # bare\n        return cmd + b\"\\r\\n\"\n\n\ndef _parse_mc_commands(buf):\n    \"\"\"Best-effort parse of memcache text protocol into list of raw command lines.\n    Returns (commands, success) where commands is a list of bytes.\"\"\"\n    commands = []\n    data = bytes(buf)\n    pos = 0\n\n    while pos < len(data):\n        end = data.find(b\"\\r\\n\", pos)\n        if end < 0:\n            break\n\n        line = data[pos:end]\n        pos = end + 2\n\n        # Check if this is a store command that has a data block\n        parts = line.split(b\" \")\n        if len(parts) >= 5 and parts[0].lower() in (\n            b\"set\",\n            b\"add\",\n            b\"replace\",\n            b\"append\",\n            b\"prepend\",\n            b\"cas\",\n        ):\n            try:\n                nbytes = int(parts[4])\n                if pos + nbytes + 2 <= len(data):\n                    value = data[pos : pos + nbytes]\n                    pos += nbytes + 2  # skip value + \\r\\n\n                    commands.append((line, value))\n                    continue\n            except (ValueError, IndexError):\n                pass\n        elif len(parts) >= 3 and parts[0].lower() == b\"ms\":\n            try:\n                nbytes = int(parts[2]) if len(parts) > 2 else int(parts[1])\n                if pos + nbytes + 2 <= len(data):\n                    value = data[pos : pos + nbytes]\n                    pos += nbytes + 2\n                    commands.append((line, value))\n                    continue\n            except (ValueError, IndexError):\n                pass\n\n        commands.append((line, None))\n\n    return (commands, len(commands) > 0)\n\n\ndef _commands_to_bytes(commands):\n    \"\"\"Serialize parsed commands back to memcache protocol bytes.\"\"\"\n    parts = []\n    for line, value in commands:\n        parts.append(line + b\"\\r\\n\")\n        if value is not None:\n            parts.append(value + b\"\\r\\n\")\n    return b\"\".join(parts)\n\n\ndef _mutate_commands(commands):\n    \"\"\"Apply random mutations to parsed memcache commands.\"\"\"\n    result = list(commands)\n\n    mutation = random.random()\n\n    if mutation < 0.25 and len(result) > 0:\n        # Replace a command entirely\n        idx = random.randint(0, len(result) - 1)\n        new_cmd = _random_command()\n        # Parse the generated command back\n        parsed, _ = _parse_mc_commands(new_cmd)\n        if parsed:\n            result[idx] = parsed[0]\n\n    elif mutation < 0.45 and len(result) > 0:\n        # Mutate a key or value in a command\n        idx = random.randint(0, len(result) - 1)\n        line, value = result[idx]\n        parts = line.split(b\" \")\n        if len(parts) >= 2:\n            cmd = parts[0].lower()\n            # Mutate the correct key index depending on command\n            if cmd in (b\"gat\", b\"gats\") and len(parts) >= 3:\n                key_idx = random.randint(2, len(parts) - 1)\n                parts[key_idx] = _random_key()\n            else:\n                parts[1] = _random_key()\n            if value is not None:\n                new_value = _random_value()\n                # Update byte count in the header\n                length_idx = None\n                if cmd == b\"ms\" and len(parts) >= 3:\n                    length_idx = 2\n                elif len(parts) >= 5:\n                    length_idx = 4\n                if length_idx is not None:\n                    try:\n                        int(parts[length_idx])\n                        parts[length_idx] = str(len(new_value)).encode()\n                    except ValueError:\n                        pass\n                value = new_value\n            result[idx] = (b\" \".join(parts), value)\n\n    elif mutation < 0.6:\n        # Insert a new random command\n        new_cmd = _random_command()\n        parsed, _ = _parse_mc_commands(new_cmd)\n        if parsed:\n            pos = random.randint(0, len(result))\n            result.insert(pos, parsed[0])\n\n    elif mutation < 0.7 and len(result) > 1:\n        # Remove a command\n        idx = random.randint(0, len(result) - 1)\n        result.pop(idx)\n\n    elif mutation < 0.8 and len(result) >= 2:\n        # Swap two commands\n        i, j = random.sample(range(len(result)), 2)\n        result[i], result[j] = result[j], result[i]\n\n    elif mutation < 0.9 and len(result) > 0:\n        # Duplicate a command\n        idx = random.randint(0, len(result) - 1)\n        result.insert(idx + 1, result[idx])\n\n    else:\n        # Toggle noreply on a command\n        if len(result) > 0:\n            idx = random.randint(0, len(result) - 1)\n            line, value = result[idx]\n            if line.endswith(b\" noreply\"):\n                line = line[:-8]\n            else:\n                line = line + b\" noreply\"\n            result[idx] = (line, value)\n\n    return result\n\n\ndef fuzz(buf, add_buf, max_size):\n    \"\"\"Main mutation function called by AFL++.\"\"\"\n    commands, ok = _parse_mc_commands(buf)\n\n    if ok and commands:\n        mutated = _mutate_commands(commands)\n        result = _commands_to_bytes(mutated)\n    else:\n        n = random.randint(1, 5)\n        result = b\"\".join(_random_command() for _ in range(n))\n\n    if len(result) > max_size:\n        result = result[:max_size]\n    return bytearray(result)\n\n\ndef havoc_mutation(buf, max_size):\n    \"\"\"Called during havoc stage.\"\"\"\n    commands, ok = _parse_mc_commands(buf)\n    if not ok or not commands:\n        return bytearray(_random_command()[:max_size])\n\n    mutated = _mutate_commands(commands)\n    result = _commands_to_bytes(mutated)\n    if len(result) > max_size:\n        result = result[:max_size]\n    return bytearray(result)\n\n\ndef havoc_mutation_probability():\n    return 50\n"
  },
  {
    "path": "fuzz/package_crash.sh",
    "content": "#!/usr/bin/env bash\n\nset -e\n\nGREEN='\\033[0;32m'\nRED='\\033[0;31m'\nNC='\\033[0m'\n\nprint_info() { echo -e \"${GREEN}[INFO]${NC} $1\"; }\nprint_error() { echo -e \"${RED}[ERROR]${NC} $1\"; }\n\nusage() {\n    echo \"Usage: $0 <crash_id> [crashes_dir]\"\n    echo \"\"\n    echo \"Packages a crash and its RECORD files into a self-contained archive\"\n    echo \"that can be sent to another developer for reproduction.\"\n    echo \"\"\n    echo \"Arguments:\"\n    echo \"  crash_id      Crash ID (e.g. 000000)\"\n    echo \"  crashes_dir   Path to crashes directory (default: fuzz/artifacts/resp/default/crashes)\"\n    echo \"\"\n    echo \"Example:\"\n    echo \"  $0 000000\"\n    echo \"  $0 000001 /path/to/crashes\"\n    exit 1\n}\n\nif [[ $# -lt 1 ]]; then\n    usage\nfi\n\nCRASH_ID=\"$1\"\nSCRIPT_DIR=\"$(cd \"$(dirname \"${BASH_SOURCE[0]}\")\" && pwd)\"\nCRASHES_DIR=\"${2:-$SCRIPT_DIR/artifacts/resp/default/crashes}\"\n\nif [[ ! -d \"$CRASHES_DIR\" ]]; then\n    print_error \"Crashes directory not found: $CRASHES_DIR\"\n    exit 1\nfi\n\n# Find the crash input file\nCRASH_FILE=$(find \"$CRASHES_DIR\" -maxdepth 1 -name \"id:${CRASH_ID},*\" ! -name \"RECORD:*\" | head -1)\nif [[ -z \"$CRASH_FILE\" ]]; then\n    print_error \"Crash input not found for id:${CRASH_ID} in $CRASHES_DIR\"\n    exit 1\nfi\n\n# Count RECORD files\nRECORD_COUNT=$(find \"$CRASHES_DIR\" -maxdepth 1 -name \"RECORD:${CRASH_ID},cnt:*\" | wc -l)\n\nARCHIVE_NAME=\"crash-${CRASH_ID}\"\nTMPDIR=$(mktemp -d)\nDEST=\"$TMPDIR/$ARCHIVE_NAME\"\nmkdir -p \"$DEST/crashes\"\n\nprint_info \"Packaging crash ${CRASH_ID}...\"\nprint_info \"Crash input: $(basename \"$CRASH_FILE\")\"\nprint_info \"RECORD files: ${RECORD_COUNT}\"\n\n# Copy crash input and RECORD files into crashes/ subdirectory\ncp \"$CRASH_FILE\" \"$DEST/crashes/\"\nif [[ $RECORD_COUNT -gt 0 ]]; then\n    find \"$CRASHES_DIR\" -maxdepth 1 -name \"RECORD:${CRASH_ID},cnt:*\" -exec cp {} \"$DEST/crashes/\" \\;\nfi\n\n# Copy replay_crash.py\ncp \"$SCRIPT_DIR/replay_crash.py\" \"$DEST/\"\n\n# Create archive\nOUTPUT=\"$(pwd)/${ARCHIVE_NAME}.tar.gz\"\ntar -czf \"$OUTPUT\" -C \"$TMPDIR\" \"$ARCHIVE_NAME\"\nrm -rf \"$TMPDIR\"\n\nSIZE=$(du -h \"$OUTPUT\" | cut -f1)\nprint_info \"Archive created: ${OUTPUT} (${SIZE})\"\necho \"\"\n# Detect target from directory structure: artifacts/<target>/default/crashes\nTARGET_NAME=$(basename \"$(dirname \"$(dirname \"$CRASHES_DIR\")\")\")\nIS_MEMCACHE=false\nif [[ \"$TARGET_NAME\" == \"memcache\" ]]; then\n    IS_MEMCACHE=true\nfi\n\necho \"To reproduce:\"\necho \"  1. Start dragonfly:\"\nif [[ \"$IS_MEMCACHE\" == true ]]; then\n    echo \"     ./build/dragonfly --port 6379 --memcached_port=11211 --logtostderr --proactor_threads 1 --dbfilename=\\\"\\\"\"\nelse\n    echo \"     ./build/dragonfly --port 6379 --logtostderr --proactor_threads 1 --dbfilename=\\\"\\\"\"\nfi\necho \"  2. Extract and replay:\"\necho \"     tar xzf ${ARCHIVE_NAME}.tar.gz\"\necho \"     cd ${ARCHIVE_NAME}\"\nif [[ \"$IS_MEMCACHE\" == true ]]; then\n    echo \"     python3 replay_crash.py crashes ${CRASH_ID} 127.0.0.1 11211\"\nelse\n    echo \"     python3 replay_crash.py crashes ${CRASH_ID}\"\nfi\n"
  },
  {
    "path": "fuzz/replay_crash.py",
    "content": "#!/usr/bin/env python3\r\n\"\"\"Replays a crash from AFL++ persistent mode RECORD files.\r\n\r\nIn persistent mode, a crash depends on accumulated server state from all\r\nprevious iterations. AFL_PERSISTENT_RECORD saves these as RECORD files.\r\nThis script replays them in order against a running Dragonfly instance.\r\n\r\nUsage:\r\n    # Start dragonfly in another terminal:\r\n    ./build-dbg/dragonfly --port 6379 --logtostderr --proactor_threads 1\r\n\r\n    # Replay crash:\r\n    python3 fuzz/replay_crash.py fuzz/artifacts/resp/default/crashes 000000\r\n\"\"\"\r\n\r\nimport glob\r\nimport os\r\nimport socket\r\nimport sys\r\n\r\n\r\ndef send_input(host, port, data):\r\n    \"\"\"Send data over TCP. Mirrors SendFuzzInputToServer.\"\"\"\r\n    try:\r\n        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\r\n        s.settimeout(0.2)\r\n        s.connect((host, port))\r\n    except ConnectionRefusedError:\r\n        print(\"\\033[0;31m[ERROR]\\033[0m Connection refused — is Dragonfly running?\")\r\n        sys.exit(1)\r\n\r\n    try:\r\n        s.sendall(data)\r\n    except Exception:\r\n        pass\r\n\r\n    try:\r\n        s.recv(4096)\r\n    except Exception:\r\n        pass\r\n    s.close()\r\n\r\n\r\ndef main():\r\n    if len(sys.argv) < 3:\r\n        print(f\"Usage: {sys.argv[0]} <crash_dir> <crash_id> [host] [port]\")\r\n        sys.exit(1)\r\n\r\n    crash_dir = sys.argv[1]\r\n    crash_id = sys.argv[2]\r\n    host = sys.argv[3] if len(sys.argv) > 3 else \"127.0.0.1\"\r\n    port = int(sys.argv[4]) if len(sys.argv) > 4 else 6379\r\n\r\n    # Find RECORD files sorted by cnt\r\n    pattern = os.path.join(crash_dir, f\"RECORD:{crash_id},cnt:*\")\r\n    records = sorted(glob.glob(pattern))\r\n\r\n    # Find crash input file\r\n    crash_files = [\r\n        f\r\n        for f in glob.glob(os.path.join(crash_dir, f\"id:{crash_id},*\"))\r\n        if not os.path.basename(f).startswith(\"RECORD:\")\r\n    ]\r\n    if not crash_files:\r\n        print(f\"\\033[0;31m[ERROR]\\033[0m Crash input not found for id:{crash_id}\")\r\n        sys.exit(1)\r\n\r\n    crash_file = crash_files[0]\r\n\r\n    print(f\"\\033[0;32m[INFO]\\033[0m Replaying crash {crash_id} against {host}:{port}\")\r\n    print(f\"\\033[0;32m[INFO]\\033[0m RECORD files: {len(records)}\")\r\n    print(f\"\\033[0;32m[INFO]\\033[0m Crash file: {crash_file}\")\r\n    print()\r\n\r\n    # Replay all RECORD inputs\r\n    for i, rec in enumerate(records):\r\n        if i % 1000 == 0:\r\n            print(f\"\\033[1;33m[REPLAY]\\033[0m Progress: {i} / {len(records)}\")\r\n        with open(rec, \"rb\") as f:\r\n            data = f.read()\r\n        send_input(host, port, data)\r\n\r\n    # Send the crash input\r\n    print(f\"\\033[1;33m[REPLAY]\\033[0m Sending crash input: {os.path.basename(crash_file)}\")\r\n    with open(crash_file, \"rb\") as f:\r\n        data = f.read()\r\n    send_input(host, port, data)\r\n\r\n    print()\r\n    print(\"\\033[0;32m[INFO]\\033[0m Replay complete. Check if the Dragonfly process crashed.\")\r\n    print(\r\n        \"\\033[0;32m[INFO]\\033[0m If not, the bug may depend on thread timing (non-deterministic).\"\r\n    )\r\n\r\n\r\nif __name__ == \"__main__\":\r\n    main()\r\n"
  },
  {
    "path": "fuzz/resp_mutator.py",
    "content": "\"\"\"AFL++ custom mutator for RESP protocol.\n\nInstead of random byte-level mutations (which would break protocol framing and get\nrejected by the parser), this mutator operates at the command level: it parses\nthe input into commands, then randomly replaces/inserts/removes/reorders commands and\narguments while keeping RESP encoding valid. This ensures mutated inputs actually\nreach command execution code paths.\n\nFocus commands (optional, set via FUZZ_FOCUS_COMMANDS env var):\n    When running PR-targeted fuzzing, generate_targeted_seeds.py produces a list of\n    command names affected by the code change. This mutator reads that list and\n    picks those commands ~70% of the time, concentrating mutations on the changed code.\n    Commands not already in the COMMANDS table are auto-registered with default arity.\n\nUsage:\n    export PYTHONPATH=/path/to/dragonfly/fuzz\n    export AFL_PYTHON_MODULE=resp_mutator\n    export AFL_CUSTOM_MUTATOR_ONLY=1\n    afl-fuzz ...\n\"\"\"\n\nimport json\nimport os\nimport random\nimport struct\n\n# fmt: off\n# Commands grouped by arity pattern: (name, min_args, max_args)\n# min/max are argument counts AFTER the command name itself.\nCOMMANDS = [\n    # String\n    (b\"GET\", 1, 1), (b\"SET\", 2, 6), (b\"MGET\", 1, 5), (b\"MSET\", 2, 10),\n    (b\"SETNX\", 2, 2), (b\"SETEX\", 3, 3), (b\"PSETEX\", 3, 3),\n    (b\"INCR\", 1, 1), (b\"DECR\", 1, 1), (b\"INCRBY\", 2, 2), (b\"DECRBY\", 2, 2),\n    (b\"INCRBYFLOAT\", 2, 2), (b\"APPEND\", 2, 2), (b\"STRLEN\", 1, 1),\n    (b\"GETRANGE\", 3, 3), (b\"SETRANGE\", 3, 3), (b\"GETSET\", 2, 2),\n    (b\"GETDEL\", 1, 1), (b\"GETEX\", 1, 3), (b\"SUBSTR\", 3, 3),\n    (b\"MSETNX\", 2, 10),\n    # Key\n    (b\"DEL\", 1, 5), (b\"UNLINK\", 1, 5), (b\"EXISTS\", 1, 5),\n    (b\"EXPIRE\", 2, 3), (b\"EXPIREAT\", 2, 3), (b\"PEXPIRE\", 2, 3),\n    (b\"PEXPIREAT\", 2, 3), (b\"PERSIST\", 1, 1),\n    (b\"TTL\", 1, 1), (b\"PTTL\", 1, 1), (b\"EXPIRETIME\", 1, 1), (b\"PEXPIRETIME\", 1, 1),\n    (b\"TYPE\", 1, 1), (b\"RENAME\", 2, 2), (b\"RENAMENX\", 2, 2),\n    (b\"COPY\", 2, 4), (b\"DUMP\", 1, 1), (b\"TOUCH\", 1, 5),\n    (b\"OBJECT\", 2, 2), (b\"RANDOMKEY\", 0, 0), (b\"KEYS\", 1, 1),\n    (b\"SCAN\", 1, 5), (b\"SORT\", 1, 7), (b\"SORT_RO\", 1, 7),\n    # List\n    (b\"LPUSH\", 2, 5), (b\"RPUSH\", 2, 5), (b\"LPOP\", 1, 2), (b\"RPOP\", 1, 2),\n    (b\"LLEN\", 1, 1), (b\"LINDEX\", 2, 2), (b\"LSET\", 3, 3),\n    (b\"LRANGE\", 3, 3), (b\"LTRIM\", 3, 3), (b\"LREM\", 3, 3),\n    (b\"LPOS\", 2, 6), (b\"LMOVE\", 4, 4), (b\"LMPOP\", 2, 4),\n    (b\"LPUSHX\", 2, 5), (b\"RPUSHX\", 2, 5), (b\"RPOPLPUSH\", 2, 2),\n    (b\"BLPOP\", 2, 5), (b\"BRPOP\", 2, 5), (b\"BLMOVE\", 5, 5), (b\"BLMPOP\", 3, 5),\n    # Hash\n    (b\"HSET\", 3, 9), (b\"HGET\", 2, 2), (b\"HDEL\", 2, 5),\n    (b\"HEXISTS\", 2, 2), (b\"HLEN\", 1, 1), (b\"HKEYS\", 1, 1),\n    (b\"HVALS\", 1, 1), (b\"HGETALL\", 1, 1), (b\"HINCRBY\", 3, 3),\n    (b\"HINCRBYFLOAT\", 3, 3), (b\"HMSET\", 3, 9), (b\"HMGET\", 2, 5),\n    (b\"HSETNX\", 3, 3), (b\"HSTRLEN\", 2, 2), (b\"HRANDFIELD\", 1, 3),\n    (b\"HSCAN\", 2, 6),\n    # Set\n    (b\"SADD\", 2, 5), (b\"SREM\", 2, 5), (b\"SMEMBERS\", 1, 1),\n    (b\"SISMEMBER\", 2, 2), (b\"SMISMEMBER\", 2, 5), (b\"SCARD\", 1, 1),\n    (b\"SPOP\", 1, 2), (b\"SRANDMEMBER\", 1, 2), (b\"SMOVE\", 3, 3),\n    (b\"SDIFF\", 1, 3), (b\"SINTER\", 1, 3), (b\"SUNION\", 1, 3),\n    (b\"SDIFFSTORE\", 2, 4), (b\"SINTERSTORE\", 2, 4), (b\"SUNIONSTORE\", 2, 4),\n    (b\"SINTERCARD\", 2, 5), (b\"SSCAN\", 2, 6),\n    # Sorted set\n    (b\"ZADD\", 3, 9), (b\"ZREM\", 2, 5), (b\"ZSCORE\", 2, 2), (b\"ZMSCORE\", 2, 5),\n    (b\"ZRANK\", 2, 2), (b\"ZREVRANK\", 2, 2), (b\"ZCARD\", 1, 1),\n    (b\"ZCOUNT\", 3, 3), (b\"ZLEXCOUNT\", 3, 3),\n    (b\"ZRANGE\", 3, 7), (b\"ZRANGEBYLEX\", 3, 7), (b\"ZRANGEBYSCORE\", 3, 7),\n    (b\"ZREVRANGE\", 3, 5), (b\"ZREVRANGEBYLEX\", 3, 7), (b\"ZREVRANGEBYSCORE\", 3, 7),\n    (b\"ZRANGESTORE\", 4, 8),\n    (b\"ZINCRBY\", 3, 3), (b\"ZRANDMEMBER\", 1, 3),\n    (b\"ZPOPMIN\", 1, 2), (b\"ZPOPMAX\", 1, 2),\n    (b\"BZPOPMIN\", 2, 4), (b\"BZPOPMAX\", 2, 4),\n    (b\"ZDIFF\", 2, 5), (b\"ZDIFFSTORE\", 3, 5),\n    (b\"ZMPOP\", 2, 4), (b\"BZMPOP\", 3, 5),\n    (b\"ZREMRANGEBYRANK\", 3, 3), (b\"ZREMRANGEBYSCORE\", 3, 3),\n    (b\"ZREMRANGEBYLEX\", 3, 3),\n    (b\"ZSCAN\", 2, 6),\n    # Stream\n    (b\"XADD\", 3, 9), (b\"XLEN\", 1, 1), (b\"XRANGE\", 3, 5),\n    (b\"XREVRANGE\", 3, 5), (b\"XREAD\", 3, 7), (b\"XTRIM\", 2, 4),\n    (b\"XDEL\", 2, 5), (b\"XINFO\", 2, 3), (b\"XACK\", 3, 5),\n    (b\"XGROUP\", 3, 6), (b\"XREADGROUP\", 5, 9),\n    (b\"XAUTOCLAIM\", 4, 6), (b\"XCLAIM\", 4, 8),\n    # HyperLogLog\n    (b\"PFADD\", 1, 5), (b\"PFCOUNT\", 1, 3), (b\"PFMERGE\", 2, 4),\n    # Geo\n    (b\"GEOADD\", 4, 10), (b\"GEODIST\", 3, 4), (b\"GEOPOS\", 2, 5),\n    (b\"GEOHASH\", 2, 5), (b\"GEOSEARCH\", 4, 10), (b\"GEOSEARCHSTORE\", 5, 11),\n    # Pub/Sub\n    (b\"SUBSCRIBE\", 1, 3), (b\"PUBLISH\", 2, 2), (b\"PSUBSCRIBE\", 1, 3),\n    # Transaction\n    (b\"MULTI\", 0, 0), (b\"EXEC\", 0, 0), (b\"DISCARD\", 0, 0),\n    (b\"WATCH\", 1, 3), (b\"UNWATCH\", 0, 0),\n    # Script\n    (b\"EVAL\", 2, 6), (b\"EVALSHA\", 2, 6), (b\"EVALRO\", 2, 6),\n    # JSON\n    (b\"JSON.SET\", 3, 4), (b\"JSON.GET\", 1, 4), (b\"JSON.DEL\", 1, 2),\n    (b\"JSON.TYPE\", 1, 2), (b\"JSON.NUMINCRBY\", 3, 3),\n    (b\"JSON.ARRAPPEND\", 3, 6), (b\"JSON.ARRLEN\", 1, 2),\n    (b\"JSON.ARRINSERT\", 4, 6), (b\"JSON.ARRTRIM\", 4, 4),\n    (b\"JSON.ARRPOP\", 1, 3), (b\"JSON.ARRINDEX\", 3, 5),\n    (b\"JSON.OBJKEYS\", 1, 2), (b\"JSON.OBJLEN\", 1, 2),\n    (b\"JSON.STRAPPEND\", 2, 3), (b\"JSON.STRLEN\", 1, 2),\n    (b\"JSON.TOGGLE\", 2, 2), (b\"JSON.CLEAR\", 1, 2),\n    (b\"JSON.MERGE\", 3, 3), (b\"JSON.MGET\", 2, 5),\n    # Bloom filter\n    (b\"BF.ADD\", 2, 2), (b\"BF.EXISTS\", 2, 2), (b\"BF.MADD\", 2, 5),\n    (b\"BF.MEXISTS\", 2, 5), (b\"BF.RESERVE\", 3, 5),\n    # Server\n    (b\"PING\", 0, 1), (b\"ECHO\", 1, 1), (b\"SELECT\", 1, 1),\n    (b\"DBSIZE\", 0, 0), (b\"INFO\", 0, 1),\n    (b\"CONFIG\", 2, 3), (b\"CLIENT\", 1, 3), (b\"COMMAND\", 0, 2),\n    (b\"MEMORY\", 1, 2), (b\"ACL\", 1, 5),\n    (b\"MONITOR\", 0, 0), (b\"RESET\", 0, 0), (b\"HELLO\", 0, 5),\n    (b\"WAIT\", 2, 2), (b\"BGSAVE\", 0, 1),\n    (b\"OBJECT\", 2, 2), (b\"LATENCY\", 1, 2), (b\"SLOWLOG\", 1, 2),\n    # Bitops\n    (b\"SETBIT\", 3, 3), (b\"GETBIT\", 2, 2), (b\"BITCOUNT\", 1, 4),\n    (b\"BITOP\", 3, 5), (b\"BITPOS\", 2, 5), (b\"BITFIELD\", 2, 8),\n    # Search\n    (b\"FT.CREATE\", 3, 15), (b\"FT.SEARCH\", 2, 10), (b\"FT.DROPINDEX\", 1, 2),\n    (b\"FT.INFO\", 1, 1), (b\"FT.ALTER\", 3, 8),\n    # Throttle\n    (b\"CL.THROTTLE\", 5, 5),\n]\n# fmt: on\n\nKEYS = [b\"k\", b\"key\", b\"k1\", b\"k2\", b\"k3\", b\"src\", b\"dst\", b\"mylist\", b\"myset\", b\"myhash\"]\nVALUES = [b\"v\", b\"val\", b\"hello\", b\"0\", b\"1\", b\"-1\", b\"100\", b\"3.14\", b\"\", b\"a b\"]\nSPECIAL = [b\"*\", b\"?\", b\"[\", b\"NX\", b\"XX\", b\"EX\", b\"PX\", b\"GT\", b\"LT\", b\"KEEPTTL\"]\nJSON_VALUES = [b'{\"a\":1}', b\"[1,2,3]\", b'\"str\"', b\"42\", b\"null\", b\"true\"]\nJSON_PATHS = [b\"$\", b\"$.a\", b\"$.*\", b\"$.arr[0]\", b\".\"]\nSCORE_VALUES = [b\"0\", b\"1\", b\"-inf\", b\"+inf\", b\"(1\", b\"(5\", b\"3.14\"]\nSTREAM_IDS = [b\"*\", b\"0-0\", b\"1-1\", b\"$\", b\">\"]\n\n# Fuzzy values: binary junk, edge cases\nFUZZ_VALUES = [\n    b\"\\x00\",\n    b\"\\xff\" * 4,\n    b\"\\r\\n\",\n    b\"$-1\\r\\n\",\n    b\"*0\\r\\n\",\n    b\"A\" * 256,\n    b\"-1\",\n    b\"99999999999\",\n    b\"NaN\",\n    b\"inf\",\n]\n\n# Focus commands: when set via FUZZ_FOCUS_COMMANDS env var (JSON list of command names),\n# the mutator will prefer these commands ~70% of the time. Used by PR fuzzing to\n# concentrate mutations on commands affected by the code change.\n_FOCUS_COMMANDS = []\n_FOCUS_WEIGHT = 0.7\n\n_focus_env = os.environ.get(\"FUZZ_FOCUS_COMMANDS\", \"\")\nif _focus_env:\n    try:\n        raw = json.loads(_focus_env)\n        if isinstance(raw, str):\n            raw = [raw]\n        if isinstance(raw, list):\n            _focus_names = {s.strip().upper() for s in raw if isinstance(s, str) and s.strip()}\n        else:\n            _focus_names = set()\n        _FOCUS_COMMANDS = [c for c in COMMANDS if c[0].decode().upper() in _focus_names]\n        # Add unknown commands (e.g. newly added in a PR) with default arity\n        _known = {c[0].decode().upper() for c in COMMANDS}\n        for name in _focus_names - _known:\n            entry = (name.encode(), 1, 3)\n            COMMANDS.append(entry)\n            _FOCUS_COMMANDS.append(entry)\n    except (json.JSONDecodeError, TypeError, ValueError):\n        pass\n\n\ndef _pick_command():\n    \"\"\"Pick a command tuple, preferring focus commands when available.\"\"\"\n    if _FOCUS_COMMANDS and random.random() < _FOCUS_WEIGHT:\n        return random.choice(_FOCUS_COMMANDS)\n    return random.choice(COMMANDS)\n\n\ndef init(seed):\n    random.seed(seed)\n\n\ndef _encode_resp(*args):\n    \"\"\"Encode a list of args into RESP array.\"\"\"\n    parts = [b\"*%d\\r\\n\" % len(args)]\n    for a in args:\n        if not isinstance(a, bytes):\n            a = str(a).encode()\n        parts.append(b\"$%d\\r\\n%s\\r\\n\" % (len(a), a))\n    return b\"\".join(parts)\n\n\ndef _random_arg():\n    \"\"\"Generate a random argument value.\"\"\"\n    r = random.random()\n    if r < 0.3:\n        return random.choice(KEYS)\n    if r < 0.55:\n        return random.choice(VALUES)\n    if r < 0.7:\n        return random.choice(SPECIAL)\n    if r < 0.8:\n        return random.choice(FUZZ_VALUES)\n    if r < 0.85:\n        return random.choice(JSON_VALUES)\n    if r < 0.9:\n        return random.choice(JSON_PATHS)\n    if r < 0.95:\n        return random.choice(SCORE_VALUES)\n    return random.choice(STREAM_IDS)\n\n\ndef _random_command():\n    \"\"\"Generate a single random RESP command.\"\"\"\n    cmd_name, min_args, max_args = _pick_command()\n    nargs = random.randint(min_args, max_args)\n    args = [cmd_name] + [_random_arg() for _ in range(nargs)]\n    return _encode_resp(*args)\n\n\ndef _parse_resp_commands(buf):\n    \"\"\"Best-effort parse of RESP buffer into list of commands (each is list of bytes).\n    Returns (commands, success). On parse failure returns ([], False).\"\"\"\n    commands = []\n    pos = 0\n    data = bytes(buf)\n\n    while pos < len(data):\n        # Skip whitespace/newlines\n        while pos < len(data) and data[pos : pos + 1] in (b\"\\r\", b\"\\n\", b\" \"):\n            pos += 1\n        if pos >= len(data):\n            break\n\n        if data[pos : pos + 1] != b\"*\":\n            return ([], False)\n\n        # Parse *N\\r\\n\n        end = data.find(b\"\\r\\n\", pos)\n        if end < 0:\n            return ([], False)\n        try:\n            nargs = int(data[pos + 1 : end])\n        except ValueError:\n            return ([], False)\n        pos = end + 2\n\n        args = []\n        for _ in range(nargs):\n            if pos >= len(data) or data[pos : pos + 1] != b\"$\":\n                return ([], False)\n            end = data.find(b\"\\r\\n\", pos)\n            if end < 0:\n                return ([], False)\n            try:\n                slen = int(data[pos + 1 : end])\n            except ValueError:\n                return ([], False)\n            pos = end + 2\n            if slen < 0:\n                args.append(b\"\")\n                continue\n            if pos + slen + 2 > len(data):\n                return ([], False)\n            args.append(data[pos : pos + slen])\n            pos += slen + 2\n\n        if args:\n            commands.append(args)\n\n    return (commands, True)\n\n\ndef _mutate_commands(commands):\n    \"\"\"Apply random mutations to a list of parsed commands.\"\"\"\n    result = list(commands)\n\n    mutation = random.random()\n\n    if mutation < 0.2 and len(result) > 0:\n        # Replace a random command entirely\n        idx = random.randint(0, len(result) - 1)\n        cmd_name, min_args, max_args = _pick_command()\n        nargs = random.randint(min_args, max_args)\n        result[idx] = [cmd_name] + [_random_arg() for _ in range(nargs)]\n\n    elif mutation < 0.4 and len(result) > 0:\n        # Mutate an argument of a random command\n        idx = random.randint(0, len(result) - 1)\n        cmd = list(result[idx])\n        if len(cmd) > 1:\n            arg_idx = random.randint(1, len(cmd) - 1)\n            cmd[arg_idx] = _random_arg()\n            result[idx] = cmd\n\n    elif mutation < 0.55:\n        # Insert a new random command\n        pos = random.randint(0, len(result))\n        cmd_name, min_args, max_args = _pick_command()\n        nargs = random.randint(min_args, max_args)\n        result.insert(pos, [cmd_name] + [_random_arg() for _ in range(nargs)])\n\n    elif mutation < 0.65 and len(result) > 1:\n        # Remove a random command\n        idx = random.randint(0, len(result) - 1)\n        result.pop(idx)\n\n    elif mutation < 0.75 and len(result) >= 2:\n        # Swap two commands\n        i, j = random.sample(range(len(result)), 2)\n        result[i], result[j] = result[j], result[i]\n\n    elif mutation < 0.85 and len(result) > 0:\n        # Duplicate a command\n        idx = random.randint(0, len(result) - 1)\n        result.insert(idx + 1, list(result[idx]))\n\n    elif mutation < 0.92 and len(result) > 0:\n        # Wrap some commands in MULTI/EXEC\n        start = random.randint(0, len(result) - 1)\n        end = random.randint(start + 1, min(start + 5, len(result)))\n        result.insert(start, [b\"MULTI\"])\n        result.insert(end + 1, [b\"EXEC\"])\n\n    else:\n        # Add extra argument to a random command\n        if len(result) > 0:\n            idx = random.randint(0, len(result) - 1)\n            result[idx] = list(result[idx]) + [_random_arg()]\n\n    return result\n\n\ndef _commands_to_resp(commands):\n    \"\"\"Serialize list of commands back to RESP bytes.\"\"\"\n    parts = []\n    for cmd in commands:\n        parts.append(_encode_resp(*cmd))\n    return b\"\".join(parts)\n\n\ndef fuzz(buf, add_buf, max_size):\n    \"\"\"Main mutation function called by AFL++.\"\"\"\n    # Try to parse the input as RESP\n    commands, ok = _parse_resp_commands(buf)\n\n    if ok and commands:\n        # Parsed successfully — mutate at command level\n        mutated = _mutate_commands(commands)\n        result = _commands_to_resp(mutated)\n    else:\n        # Could not parse — generate random commands from scratch\n        n = random.randint(1, 5)\n        result = b\"\".join(_random_command() for _ in range(n))\n\n    if len(result) > max_size:\n        result = result[:max_size]\n\n    return bytearray(result)\n\n\ndef havoc_mutation(buf, max_size):\n    \"\"\"Called during havoc stage — single small mutation.\"\"\"\n    commands, ok = _parse_resp_commands(buf)\n    if not ok or not commands:\n        return bytearray(_random_command()[:max_size])\n\n    # Single small mutation\n    mutated = _mutate_commands(commands)\n    result = _commands_to_resp(mutated)\n    if len(result) > max_size:\n        result = result[:max_size]\n    return bytearray(result)\n\n\ndef havoc_mutation_probability():\n    \"\"\"How often our havoc_mutation is called vs AFL++'s built-in mutations.\"\"\"\n    return 50\n"
  },
  {
    "path": "fuzz/run_fuzzer.sh",
    "content": "#!/usr/bin/env bash\n\nset -e\n\nGREEN='\\033[0;32m'\nBLUE='\\033[0;34m'\nYELLOW='\\033[1;33m'\nNC='\\033[0m'\n\nSCRIPT_DIR=\"$(cd \"$(dirname \"${BASH_SOURCE[0]}\")\" && pwd)\"\nPROJECT_ROOT=\"$(cd \"$SCRIPT_DIR/..\" && pwd)\"\n\n# Target: \"resp\" (default) or \"memcache\"\nTARGET=\"${1:-resp}\"\nBUILD_DIR=\"${BUILD_DIR:-$PROJECT_ROOT/build-dbg}\"\nFUZZ_DIR=\"$SCRIPT_DIR\"\nOUTPUT_DIR=\"${OUTPUT_DIR:-$FUZZ_DIR/artifacts/$TARGET}\"\nCORPUS_DIR=\"${CORPUS_DIR:-$FUZZ_DIR/corpus/$TARGET}\"\nSEEDS_DIR=\"${SEEDS_DIR:-$FUZZ_DIR/seeds/$TARGET}\"\nDICT_FILE=\"${DICT_FILE:-$FUZZ_DIR/dict/$TARGET.dict}\"\nTIMEOUT=\"5000\"\nFUZZ_TARGET=\"$BUILD_DIR/dragonfly\"\nAFL_PROACTOR_THREADS=\"${AFL_PROACTOR_THREADS:-1}\"\n\n# Persistent record: restart server every N iterations and record the last N inputs.\n# This ensures that on crash, ALL inputs that built the current server state are available\n# for replay. Without this, state from earlier iterations is lost and crashes become\n# non-reproducible. Max recommended by AFL++: 10000.\nAFL_LOOP_LIMIT=\"${AFL_LOOP_LIMIT:-10000}\"\n\nprint_info() {\n    echo -e \"${GREEN}[INFO]${NC} $1\"\n}\n\nprint_note() {\n    echo -e \"${BLUE}[NOTE]${NC} $1\"\n}\n\nprint_warning() {\n    echo -e \"${YELLOW}[WARNING]${NC} $1\"\n}\n\ncheck_requirements() {\n    if [[ ! -f \"${FUZZ_TARGET}\" ]]; then\n        print_warning \"Dragonfly not found at ${FUZZ_TARGET}\"\n        print_warning \"Build with: -DUSE_AFL=ON\"\n        exit 1\n    fi\n\n    if [[ \"$TARGET\" != \"resp\" && \"$TARGET\" != \"memcache\" ]]; then\n        print_warning \"Unknown target: $TARGET (use 'resp' or 'memcache')\"\n        exit 1\n    fi\n}\n\nsetup_directories() {\n    print_info \"Setting up directories...\"\n    mkdir -p \"${OUTPUT_DIR}\"\n    mkdir -p \"${CORPUS_DIR}\"\n\n    if [[ -z \"$(ls -A \"$CORPUS_DIR\" 2>/dev/null)\" ]]; then\n        if [[ -d \"${SEEDS_DIR}\" ]] && [[ -n \"$(ls -A \"${SEEDS_DIR}\" 2>/dev/null)\" ]]; then\n            print_info \"Copying seeds to corpus...\"\n            cp \"${SEEDS_DIR}\"/* \"${CORPUS_DIR}/\" 2>/dev/null || true\n        else\n            print_warning \"No seeds found, creating minimal seed\"\n            if [[ \"$TARGET\" == \"memcache\" ]]; then\n                printf 'version\\r\\n' > \"${CORPUS_DIR}/version\"\n            else\n                echo -e '*1\\r\\n$4\\r\\nPING\\r\\n' > \"${CORPUS_DIR}/ping\"\n            fi\n        fi\n    fi\n}\n\nshow_config() {\n    echo \"\"\n    print_info \"AFL++ Persistent Mode Configuration:\"\n    echo \"  Target:           ${TARGET}\"\n    echo \"  Binary:           ${FUZZ_TARGET}\"\n    echo \"  Corpus:           ${CORPUS_DIR}\"\n    echo \"  Output:           ${OUTPUT_DIR}\"\n    echo \"  Dictionary:       ${DICT_FILE}\"\n    echo \"  Timeout:          ${TIMEOUT}ms\"\n    echo \"  Proactor threads: ${AFL_PROACTOR_THREADS}\"\n    echo \"  Loop limit:      ${AFL_LOOP_LIMIT} (= AFL_PERSISTENT_RECORD)\"\n    echo \"\"\n    print_note \"Fuzzing integrated in dragonfly (USE_AFL + persistent mode)\"\n    print_note \"Usage: ./run_fuzzer.sh [resp|memcache]\"\n    print_note \"To change proactor threads: export AFL_PROACTOR_THREADS=N (default: 1)\"\n    print_note \"To change loop limit: export AFL_LOOP_LIMIT=N (default: 10000)\"\n    echo \"\"\n}\n\nrun_fuzzer() {\n    print_info \"Starting AFL++ persistent mode fuzzing (target: $TARGET)...\"\n    print_info \"Press Ctrl+C to stop\"\n    echo \"\"\n\n    AFL_CMD=(\n        afl-fuzz\n        -o \"${OUTPUT_DIR}\"\n        -t \"${TIMEOUT}\"\n        -m 4096\n        -i \"${CORPUS_DIR}\"\n    )\n\n    if [[ -f \"${DICT_FILE}\" ]]; then\n        AFL_CMD+=(-x \"${DICT_FILE}\")\n    fi\n\n    AFL_CMD+=(\n        --\n        \"${FUZZ_TARGET}\"\n        --port=6379\n        --logtostderr\n        --proactor_threads=${AFL_PROACTOR_THREADS}\n        --afl_loop_limit=${AFL_LOOP_LIMIT}\n        --bind=0.0.0.0\n        --bind=::\n        --dbfilename=\"\"\n        --omit_basic_usage\n        --rename_command=SHUTDOWN=\n        --rename_command=DEBUG=\n        --rename_command=FLUSHALL=\n        --rename_command=FLUSHDB=\n        --max_bulk_len=1048576\n    )\n\n    if [[ \"$TARGET\" == \"memcache\" ]]; then\n        AFL_CMD+=(--memcached_port=11211 --afl_target_port=11211)\n    fi\n\n    print_info \"Running: ${AFL_CMD[*]}\"\n    echo \"\"\n\n    cd \"${OUTPUT_DIR}\"\n\n    # Run AFL++ - fuzzing integrated in dragonfly via USE_AFL\n    # AFL_HANG_TMOUT: Only consider it a hang if no response for 60 seconds\n    # This prevents false positives from slow but legitimate operations\n    export AFL_HANG_TMOUT=60000\n\n    # Dragonfly has ~350K edges, default AFL++ bitmap is 64KB (massive collisions).\n    # Use 512KB bitmap to reduce hash collisions and improve stability.\n    export AFL_MAP_SIZE=524288\n\n    # Record the last N inputs before a crash for replay.\n    # Synced with afl_loop_limit so the full server state history is always captured.\n    export AFL_PERSISTENT_RECORD=${AFL_LOOP_LIMIT}\n\n    # Even with 1 proactor thread, some coverage instability is expected.\n    # Tell AFL++ to continue despite unstable coverage — don't bail on flaky edges.\n    export AFL_IGNORE_PROBLEMS=1\n\n    # More aggressive havoc mutations from the start — don't wait for deterministic\n    # stages to finish. Useful for protocol fuzzing where random mutations find new paths.\n    export AFL_EXPAND_HAVOC_NOW=1\n\n    # Custom protocol mutator — mutates at command/argument level\n    # instead of random bytes, keeping protocol framing valid.\n    export PYTHONPATH=\"$FUZZ_DIR\"\n    if [[ \"$TARGET\" == \"memcache\" ]]; then\n        export AFL_PYTHON_MODULE=memcache_mutator\n    else\n        export AFL_PYTHON_MODULE=resp_mutator\n    fi\n\n    exec \"${AFL_CMD[@]}\"\n}\n\nmain() {\n    check_requirements\n    setup_directories\n    show_config\n    run_fuzzer\n}\n\nmain \"$@\"\n"
  },
  {
    "path": "fuzz/seeds/memcache/add_replace.mc",
    "content": "set key1 0 0 3\r\nabc\r\nadd key2 0 0 3\r\ndef\r\nreplace key1 0 0 3\r\nxyz\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/append_prepend.mc",
    "content": "set buf 0 0 5\r\nhello\r\nappend buf 0 0 6\r\n world\r\nprepend buf 0 0 4\r\nsay\r\nget buf\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/cas.mc",
    "content": "set mykey 0 0 3\r\nabc\r\ngets mykey\r\ncas mykey 0 0 3 1\r\nxyz\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/delete.mc",
    "content": "set key1 0 0 1\r\na\r\nset key2 0 0 1\r\nb\r\ndelete key1\r\ndelete key2 noreply\r\nget key1\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/expiry.mc",
    "content": "set exp1 0 10 3\r\nabc\r\nset exp2 0 0 3\r\ndef\r\nset exp3 0 9999999 3\r\nghi\r\nget exp1 exp2 exp3\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/flags.mc",
    "content": "set f1 0 0 3\r\nabc\r\nset f2 1 0 3\r\ndef\r\nset f3 65535 0 3\r\nghi\r\nset f4 4294967295 0 3\r\njkl\r\ngets f1 f2 f3 f4\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/flush.mc",
    "content": "set a 0 0 1\r\nx\r\nset b 0 0 1\r\ny\r\nflush_all\r\nget a b\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/gat.mc",
    "content": "set mykey 0 100 5\r\nhello\r\ngat 200 mykey\r\ngats 300 mykey\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/incr_decr.mc",
    "content": "set counter 0 0 1\r\n0\r\nincr counter 1\r\nincr counter 10\r\ndecr counter 5\r\nget counter\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/large_value.mc",
    "content": "set big 0 0 100\r\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\r\nget big\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/meta_commands.mc",
    "content": "ms mykey 5\r\nhello\r\nmg mykey\r\nmd mykey\r\nma counter\r\nmn\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/multiget.mc",
    "content": "set k1 0 0 1\r\na\r\nset k2 0 0 1\r\nb\r\nset k3 0 0 1\r\nc\r\nget k1 k2 k3\r\ngets k1 k2 k3\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/noreply.mc",
    "content": "set key1 0 0 3 noreply\r\nabc\r\nadd key2 0 0 3 noreply\r\ndef\r\nreplace key1 0 0 3 noreply\r\nxyz\r\nincr counter 1 noreply\r\ndelete key2 noreply\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/set_get.mc",
    "content": "set mykey 0 0 5\r\nhello\r\nget mykey\r\n"
  },
  {
    "path": "fuzz/seeds/memcache/stats_version.mc",
    "content": "stats\r\nversion\r\nquit\r\n"
  },
  {
    "path": "fuzz/seeds/resp/acl.resp",
    "content": "*2\r\n$3\r\nACL\r\n$6\r\nWHOAMI\r\n*2\r\n$3\r\nACL\r\n$4\r\nLIST\r\n"
  },
  {
    "path": "fuzz/seeds/resp/acl_ops.resp",
    "content": "*2\n$3\nACL\n$6\nWHOAMI\n*2\n$3\nACL\n$4\nLIST\n*2\n$3\nACL\n$5\nUSERS\n*3\n$3\nACL\n$3\nCAT\n*2\n$3\nACL\n$7\nGENPASS\n*2\n$7\nCOMMAND\n$5\nCOUNT\n*2\n$7\nCOMMAND\n$4\nDOCS\n"
  },
  {
    "path": "fuzz/seeds/resp/acl_ops2.resp",
    "content": "*2\n$3\nACL\n$6\nWHOAMI\n*2\n$3\nACL\n$4\nLIST\n*2\n$3\nACL\n$5\nUSERS\n*3\n$3\nACL\n$3\nCAT\n$6\nstring\n*2\n$3\nACL\n$7\nGENPASS\n*4\n$3\nACL\n$7\nSETUSER\n$8\ntestuser\n$2\non\n*3\n$3\nACL\n$7\nGETUSER\n$8\ntestuser\n*4\n$3\nACL\n$6\nDRYRUN\n$8\ntestuser\n$3\nGET\n$3\nkey\n*2\n$3\nACL\n$4\nHELP\n*3\n$3\nACL\n$3\nLOG\n$5\nRESET\n*3\n$3\nACL\n$7\nDELUSER\n$8\ntestuser\n*2\n$3\nACL\n$4\nSAVE\n*2\n$3\nACL\n$4\nLOAD\n*3\n$4\nAUTH\n$8\ntestuser\n$8\npassword\n*2\n$7\nCOMMAND\n$5\nCOUNT\n*3\n$7\nCOMMAND\n$4\nINFO\n$3\nGET\n*2\n$7\nCOMMAND\n$4\nDOCS\n$3\nSET\n*2\n$7\nCOMMAND\n$4\nLIST\n"
  },
  {
    "path": "fuzz/seeds/resp/bf_add.resp",
    "content": "*4\n$10\nBF.RESERVE\n$7\nmybloom\n$4\n0.01\n$4\n1000\n*3\n$6\nBF.ADD\n$7\nmybloom\n$5\nitem1\n*3\n$9\nBF.EXISTS\n$7\nmybloom\n$5\nitem1\n"
  },
  {
    "path": "fuzz/seeds/resp/bitfield.resp",
    "content": "*6\n$8\nBITFIELD\n$3\nkey\n$3\nGET\n$2\nu8\n$1\n0\n"
  },
  {
    "path": "fuzz/seeds/resp/bitfield_ops.resp",
    "content": "*8\n$8\nBITFIELD\n$2\nbk\n$3\nSET\n$3\nu8\n$1\n0\n$3\n200\n$3\nGET\n$3\nu8\n$1\n0\n*5\n$8\nBITFIELD\n$2\nbk\n$6\nINCRBY\n$3\nu8\n$1\n0\n$2\n10\n*6\n$8\nBITFIELD\n$2\nbk\n$8\nOVERFLOW\n$3\nSAT\n$6\nINCRBY\n$3\nu8\n$1\n0\n$3\n100\n*5\n$5\nBITOP\n$3\nAND\n$4\ndest\n$2\nbk\n$2\nbk\n*3\n$6\nBITPOS\n$2\nbk\n$1\n1\n"
  },
  {
    "path": "fuzz/seeds/resp/bitops.resp",
    "content": "*4\n$6\nSETBIT\n$2\nbk\n$1\n7\n$1\n1\n*3\n$6\nGETBIT\n$2\nbk\n$1\n0\n*2\n$8\nBITCOUNT\n$2\nbk\n"
  },
  {
    "path": "fuzz/seeds/resp/bloom_ops.resp",
    "content": "*4\n$10\nBF.RESERVE\n$2\nbf\n$4\n0.01\n$4\n1000\n*3\n$6\nBF.ADD\n$2\nbf\n$5\nitem1\n*3\n$9\nBF.EXISTS\n$2\nbf\n$5\nitem1\n*5\n$7\nBF.MADD\n$2\nbf\n$5\nitem2\n$5\nitem3\n$5\nitem4\n*5\n$10\nBF.MEXISTS\n$2\nbf\n$5\nitem1\n$5\nitem2\n$5\nitemX\n"
  },
  {
    "path": "fuzz/seeds/resp/client.resp",
    "content": "*3\n$6\nCLIENT\n$7\nSETNAME\n$10\ntestclient\n*2\n$6\nCLIENT\n$7\nGETNAME\n*2\n$6\nCLIENT\n$4\nLIST\n"
  },
  {
    "path": "fuzz/seeds/resp/config.resp",
    "content": "*3\n$6\nCONFIG\n$3\nGET\n$9\nmaxmemory\n"
  },
  {
    "path": "fuzz/seeds/resp/copy.resp",
    "content": "*3\r\n$3\r\nSET\r\n$3\r\nsrc\r\n$5\r\nhello\r\n*3\r\n$4\r\nCOPY\r\n$3\r\nsrc\r\n$3\r\ndst\r\n"
  },
  {
    "path": "fuzz/seeds/resp/del.resp",
    "content": "*2\r\n$3\r\nDEL\r\n$3\r\nkey\r\n"
  },
  {
    "path": "fuzz/seeds/resp/eval.resp",
    "content": "*3\n$4\nEVAL\n$26\nreturn redis.call(\"PING\")\n$0\n"
  },
  {
    "path": "fuzz/seeds/resp/expire_ops.resp",
    "content": "*3\n$3\nSET\n$2\nek\n$3\nval\n*3\n$6\nEXPIRE\n$2\nek\n$3\n300\n*2\n$3\nTTL\n$2\nek\n*2\n$4\nPTTL\n$2\nek\n*2\n$10\nEXPIRETIME\n$2\nek\n*2\n$11\nPEXPIRETIME\n$2\nek\n*3\n$8\nEXPIREAT\n$2\nek\n$10\n9999999999\n*2\n$7\nPERSIST\n$2\nek\n*3\n$7\nPEXPIRE\n$2\nek\n$6\n300000\n*3\n$9\nPEXPIREAT\n$2\nek\n$13\n9999999999000\n*2\n$5\nTOUCH\n$2\nek\n"
  },
  {
    "path": "fuzz/seeds/resp/function.resp",
    "content": "*3\r\n$8\r\nFUNCTION\r\n$4\r\nLOAD\r\n$56\r\n#!lua name=mylib\r\nredis.register_function('myfunc', function() return 1 end)\r\n*2\r\n$8\r\nFUNCTION\r\n$4\r\nLIST\r\n"
  },
  {
    "path": "fuzz/seeds/resp/function_ops.resp",
    "content": "*3\n$8\nFUNCTION\n$4\nLOAD\n$56\n#!lua name=mylib\nredis.register_function('myfunc', function() return 1 end)\n*2\n$8\nFUNCTION\n$4\nLIST\n*3\n$8\nFUNCTION\n$6\nDELETE\n$5\nmylib\n"
  },
  {
    "path": "fuzz/seeds/resp/generic_ops.resp",
    "content": "*3\n$3\nSET\n$2\ngk\n$3\nval\n*2\n$4\nTYPE\n$2\ngk\n*2\n$6\nEXISTS\n$2\ngk\n*3\n$6\nEXPIRE\n$2\ngk\n$3\n300\n*2\n$3\nTTL\n$2\ngk\n*2\n$4\nPTTL\n$2\ngk\n*2\n$10\nEXPIRETIME\n$2\ngk\n*3\n$7\nPEXPIRE\n$2\ngk\n$6\n300000\n*2\n$11\nPEXPIRETIME\n$2\ngk\n*2\n$7\nPERSIST\n$2\ngk\n*3\n$4\nCOPY\n$2\ngk\n$3\ngk2\n*3\n$6\nRENAME\n$3\ngk2\n$3\ngk3\n*2\n$4\nDUMP\n$3\ngk3\n*2\n$6\nUNLINK\n$3\ngk3\n*2\n$4\nKEYS\n$1\n*\n*3\n$4\nSCAN\n$1\n0\n$5\nCOUNT\n$1\n5\n*2\n$9\nRANDOMKEY\n*2\n$6\nDBSIZE\n*2\n$4\nTIME\n*3\n$6\nSELECT\n$1\n0\n*5\n$4\nSORT\n$2\ngk\n$2\nBY\n$6\nnosort\n$5\nALPHA\n"
  },
  {
    "path": "fuzz/seeds/resp/generic_ops2.resp",
    "content": "*3\n$3\nSET\n$3\ngk1\n$3\nval\n*3\n$3\nSET\n$3\ngk2\n$3\nval\n*2\n$2\nDEL\n$3\ngk1\n*2\n$3\nGET\n$3\ngk2\n*2\n$3\nTTL\n$3\ngk2\n*3\n$8\nRENAMENX\n$3\ngk2\n$3\ngk3\n*2\n$4\nECHO\n$5\nhello\n*3\n$5\nSTICK\n$3\ngk3\n*2\n$5\nTOUCH\n$3\ngk3\n*2\n$4\nTYPE\n$3\ngk3\n*3\n$4\nMOVE\n$3\ngk3\n$1\n1\n*2\n$7\nSORT_RO\n$3\ngk3\n*3\n$3\nSET\n$3\ngk4\n$3\nval\n*4\n$7\nRESTORE\n$3\ngk5\n$1\n0\n$5\ndummy\n"
  },
  {
    "path": "fuzz/seeds/resp/geo_ops.resp",
    "content": "*8\n$6\nGEOADD\n$2\ngk\n$9\n13.361389\n$9\n38.115556\n$7\nPalermo\n$9\n15.087269\n$9\n37.502669\n$7\nCatania\n*5\n$7\nGEODIST\n$2\ngk\n$7\nPalermo\n$7\nCatania\n$2\nkm\n*3\n$7\nGEOHASH\n$2\ngk\n$7\nPalermo\n*3\n$6\nGEOPOS\n$2\ngk\n$7\nPalermo\n*7\n$9\nGEOSEARCH\n$2\ngk\n$9\nFROMLONLAT\n$2\n15\n$2\n37\n$6\nBYRADIUS\n$3\n200\n$2\nkm\n*6\n$10\nGEORADIUS\n$2\ngk\n$2\n15\n$2\n37\n$3\n200\n$2\nkm\n"
  },
  {
    "path": "fuzz/seeds/resp/geo_ops2.resp",
    "content": "*11\n$6\nGEOADD\n$3\ngx1\n$9\n13.361389\n$9\n38.115556\n$7\nPalermo\n$9\n15.087269\n$9\n37.502669\n$7\nCatania\n$9\n2.349014\n$9\n48.864716\n$5\nParis\n*7\n$10\nGEORADIUS\n$3\ngx1\n$2\n15\n$2\n37\n$3\n200\n$2\nkm\n*6\n$19\nGEORADIUSBYMEMBER\n$3\ngx1\n$7\nPalermo\n$3\n200\n$2\nkm\n*7\n$13\nGEORADIUS_RO\n$3\ngx1\n$2\n15\n$2\n37\n$3\n200\n$2\nkm\n*6\n$22\nGEORADIUSBYMEMBER_RO\n$3\ngx1\n$7\nPalermo\n$3\n200\n$2\nkm\n*9\n$9\nGEOSEARCH\n$3\ngx1\n$10\nFROMLONLAT\n$2\n15\n$2\n37\n$6\nBYRADIUS\n$3\n200\n$2\nkm\n$3\nASC\n*10\n$14\nGEOSEARCHSTORE\n$4\ngdst\n$3\ngx1\n$10\nFROMLONLAT\n$2\n15\n$2\n37\n$6\nBYRADIUS\n$3\n200\n$2\nkm\n"
  },
  {
    "path": "fuzz/seeds/resp/geoadd.resp",
    "content": "*5\n$6\nGEOADD\n$5\nmygeo\n$9\n13.361389\n$9\n38.115556\n$7\nPalermo\n*5\n$7\nGEODIST\n$5\nmygeo\n$7\nPalermo\n$7\nCatania\n$2\nkm\n"
  },
  {
    "path": "fuzz/seeds/resp/get.resp",
    "content": "*2\r\n$3\r\nGET\r\n$3\r\nkey\r\n"
  },
  {
    "path": "fuzz/seeds/resp/getdel.resp",
    "content": "*3\n$3\nSET\n$1\nk\n$1\nv\n*2\n$6\nGETDEL\n$1\nk\n"
  },
  {
    "path": "fuzz/seeds/resp/hash_ops.resp",
    "content": "*8\n$4\nHSET\n$2\nhh\n$2\nf1\n$2\nv1\n$2\nf2\n$2\nv2\n$2\nf3\n$2\n10\n*3\n$4\nHGET\n$2\nhh\n$2\nf1\n*4\n$5\nHMGET\n$2\nhh\n$2\nf1\n$2\nf2\n*2\n$7\nHGETALL\n$2\nhh\n*2\n$5\nHKEYS\n$2\nhh\n*2\n$5\nHVALS\n$2\nhh\n*2\n$4\nHLEN\n$2\nhh\n*3\n$7\nHEXISTS\n$2\nhh\n$2\nf1\n*3\n$7\nHSTRLEN\n$2\nhh\n$2\nf1\n*4\n$7\nHINCRBY\n$2\nhh\n$2\nf3\n$1\n5\n*4\n$12\nHINCRBYFLOAT\n$2\nhh\n$2\nf3\n$3\n1.5\n*3\n$10\nHRANDFIELD\n$2\nhh\n$1\n2\n*4\n$6\nHSETNX\n$2\nhh\n$4\nnewf\n$4\nnewv\n*3\n$4\nHDEL\n$2\nhh\n$2\nf2\n*3\n$5\nHSCAN\n$2\nhh\n$1\n0\n"
  },
  {
    "path": "fuzz/seeds/resp/hash_ops2.resp",
    "content": "*6\n$4\nHSET\n$3\nhx1\n$2\nf1\n$2\nv1\n$2\nf2\n$2\nv2\n*4\n$5\nHMSET\n$3\nhx1\n$2\nf3\n$2\nv3\n*4\n$6\nHSETNX\n$3\nhx1\n$6\nnewkey\n$5\nnewvl\n*4\n$7\nHSTRLEN\n$3\nhx1\n$2\nf1\n*3\n$12\nHINCRBYFLOAT\n$3\nhx1\n$2\nf1\n$3\n1.5\n*3\n$9\nHRANDFIELD\n$3\nhx1\n$1\n2\n*5\n$6\nHSETEX\n$3\nhx1\n$3\n300\n$2\nf4\n$2\nv4\n*4\n$7\nHEXPIRE\n$3\nhx1\n$3\n300\n$2\nf4\n"
  },
  {
    "path": "fuzz/seeds/resp/hll_ops.resp",
    "content": "*5\n$5\nPFADD\n$4\nhll1\n$1\na\n$1\nb\n$1\nc\n*4\n$5\nPFADD\n$4\nhll2\n$1\nc\n$1\nd\n$1\ne\n*2\n$7\nPFCOUNT\n$4\nhll1\n*3\n$7\nPFCOUNT\n$4\nhll1\n$4\nhll2\n*4\n$7\nPFMERGE\n$4\nhll3\n$4\nhll1\n$4\nhll2\n"
  },
  {
    "path": "fuzz/seeds/resp/hset.resp",
    "content": "*4\n$4\nHSET\n$4\nhash\n$5\nfield\n$5\nvalue\n"
  },
  {
    "path": "fuzz/seeds/resp/json.resp",
    "content": "*4\n$8\nJSON.SET\n$3\ndoc\n$1\n$\n$15\n{\"name\":\"test\"}\n"
  },
  {
    "path": "fuzz/seeds/resp/json_ops.resp",
    "content": "*4\n$8\nJSON.SET\n$2\njk\n$1\n$\n$52\n{\"name\":\"test\",\"age\":30,\"tags\":[\"a\",\"b\"],\"nested\":{\"x\":1}}\n*3\n$8\nJSON.GET\n$2\njk\n$1\n$\n*3\n$9\nJSON.TYPE\n$2\njk\n$1\n$\n*3\n$10\nJSON.STRLEN\n$2\njk\n$6\n$.name\n*3\n$11\nJSON.OBJLEN\n$2\njk\n$1\n$\n*3\n$11\nJSON.OBJKEYS\n$2\njk\n$1\n$\n*3\n$10\nJSON.ARRLEN\n$2\njk\n$6\n$.tags\n*4\n$13\nJSON.ARRAPPEND\n$2\njk\n$6\n$.tags\n$3\n\"c\"\n*5\n$13\nJSON.ARRINSERT\n$2\njk\n$6\n$.tags\n$1\n0\n$3\n\"z\"\n*4\n$11\nJSON.ARRPOP\n$2\njk\n$6\n$.tags\n$2\n-1\n*5\n$12\nJSON.ARRTRIM\n$2\njk\n$6\n$.tags\n$1\n0\n$1\n2\n*4\n$12\nJSON.ARRINDEX\n$2\njk\n$6\n$.tags\n$3\n\"a\"\n*3\n$14\nJSON.NUMINCRBY\n$2\njk\n$5\n$.age\n$1\n1\n*3\n$14\nJSON.NUMMULTBY\n$2\njk\n$5\n$.age\n$1\n2\n*4\n$12\nJSON.STRAPPEND\n$2\njk\n$6\n$.name\n$4\n\"_x\"\n*3\n$11\nJSON.TOGGLE\n$2\njk\n$6\n$.tags\n*3\n$10\nJSON.CLEAR\n$2\njk\n$6\n$.tags\n*3\n$8\nJSON.DEL\n$2\njk\n$8\n$.nested\n*3\n$9\nJSON.RESP\n$2\njk\n$1\n$\n"
  },
  {
    "path": "fuzz/seeds/resp/json_ops2.resp",
    "content": "*4\n$8\nJSON.SET\n$3\njm1\n$1\n$\n$13\n{\"a\":1,\"b\":2}\n*4\n$8\nJSON.SET\n$3\njm2\n$1\n$\n$13\n{\"a\":3,\"c\":4}\n*3\n$9\nJSON.MGET\n$3\njm1\n$3\njm2\n$1\n$\n*4\n$9\nJSON.MSET\n$3\njm1\n$3\n$.a\n$1\n9\n*4\n$10\nJSON.MERGE\n$3\njm1\n$1\n$\n$9\n{\"d\":\"new\"}\n*3\n$10\nJSON.DEBUG\n$6\nMEMORY\n$3\njm1\n$1\n$\n*3\n$10\nJSON.FORGET\n$3\njm2\n$3\n$.c\n"
  },
  {
    "path": "fuzz/seeds/resp/list_blocking.resp",
    "content": "*5\n$5\nRPUSH\n$3\nlb1\n$1\na\n$1\nb\n$1\nc\n*5\n$5\nRPUSH\n$3\nlb2\n$1\nx\n$1\ny\n$1\nz\n*3\n$10\nRPOPLPUSH\n$3\nlb1\n$3\nlb2\n*5\n$5\nLMOVE\n$3\nlb1\n$3\nlb2\n$4\nLEFT\n$5\nRIGHT\n*4\n$5\nLMPOP\n$1\n2\n$3\nlb1\n$3\nlb2\n$4\nLEFT\n*4\n$5\nLPUSH\n$3\nbq1\n$1\n1\n$1\n2\n*3\n$5\nBLPOP\n$3\nbq1\n$1\n1\n*3\n$5\nBRPOP\n$3\nbq1\n$1\n1\n*5\n$6\nBLMOVE\n$3\nlb1\n$3\nlb2\n$4\nLEFT\n$5\nRIGHT\n$1\n1\n*5\n$6\nBLMPOP\n$1\n1\n$1\n1\n$3\nlb1\n$4\nLEFT\n"
  },
  {
    "path": "fuzz/seeds/resp/list_ops.resp",
    "content": "*5\n$5\nRPUSH\n$2\nll\n$1\na\n$1\nb\n$1\nc\n*3\n$6\nLPUSHX\n$2\nll\n$1\nx\n*3\n$6\nRPUSHX\n$2\nll\n$1\nz\n*2\n$4\nLLEN\n$2\nll\n*4\n$6\nLRANGE\n$2\nll\n$1\n0\n$2\n-1\n*3\n$6\nLINDEX\n$2\nll\n$1\n2\n*5\n$7\nLINSERT\n$2\nll\n$6\nBEFORE\n$1\nb\n$4\nnew1\n*4\n$4\nLSET\n$2\nll\n$1\n0\n$4\nhead\n*4\n$5\nLTRIM\n$2\nll\n$1\n0\n$1\n4\n*4\n$4\nLREM\n$2\nll\n$1\n1\n$1\na\n*2\n$4\nLPOP\n$2\nll\n*2\n$4\nRPOP\n$2\nll\n*5\n$5\nRPUSH\n$2\nl2\n$1\n1\n$1\n2\n$1\n3\n*4\n$5\nLMOVE\n$2\nll\n$2\nl2\n$4\nLEFT\n"
  },
  {
    "path": "fuzz/seeds/resp/lmpop.resp",
    "content": "*5\n$5\nRPUSH\n$6\nmylist\n$1\na\n$1\nb\n$1\nc\n*4\n$5\nLMPOP\n$1\n1\n$6\nmylist\n$4\nLEFT\n"
  },
  {
    "path": "fuzz/seeds/resp/lpos.resp",
    "content": "*7\n$5\nRPUSH\n$6\nmylist\n$1\na\n$1\nb\n$1\nc\n$1\na\n$1\nd\n*3\n$4\nLPOS\n$6\nmylist\n$1\na\n"
  },
  {
    "path": "fuzz/seeds/resp/lpush.resp",
    "content": "*3\r\n$5\r\nLPUSH\r\n$4\r\nlist\r\n$4\r\nitem\r\n"
  },
  {
    "path": "fuzz/seeds/resp/memory.resp",
    "content": "*3\n$3\nSET\n$5\nmykey\n$9\nsomevalue\n*3\n$6\nMEMORY\n$5\nUSAGE\n$5\nmykey\n"
  },
  {
    "path": "fuzz/seeds/resp/monitor.resp",
    "content": "*1\n$7\nMONITOR\n"
  },
  {
    "path": "fuzz/seeds/resp/mset.resp",
    "content": "*5\r\n$4\r\nMSET\r\n$1\r\na\r\n$1\r\n1\r\n$1\r\nb\r\n$1\r\n2\r\n*3\r\n$4\r\nMGET\r\n$1\r\na\r\n$1\r\nb\r\n"
  },
  {
    "path": "fuzz/seeds/resp/multi_type_pipeline.resp",
    "content": "*3\n$3\nSET\n$2\npk\n$5\nhello\n*5\n$5\nRPUSH\n$2\npl\n$1\na\n$1\nb\n$1\nc\n*4\n$4\nHSET\n$2\nph\n$1\nf\n$1\nv\n*4\n$4\nSADD\n$2\nps\n$1\nx\n$1\ny\n*6\n$4\nZADD\n$2\npz\n$1\n1\n$1\na\n$1\n2\n$1\nb\n*5\n$4\nXADD\n$2\npx\n$1\n*\n$1\nk\n$1\nv\n*4\n$8\nJSON.SET\n$2\npj\n$1\n$\n$13\n{\"a\":1,\"b\":2}\n*2\n$4\nTYPE\n$2\npk\n*2\n$4\nTYPE\n$2\npl\n*2\n$4\nTYPE\n$2\nph\n*2\n$4\nTYPE\n$2\nps\n*2\n$4\nTYPE\n$2\npz\n*2\n$4\nTYPE\n$2\npx\n*8\n$3\nDEL\n$2\npk\n$2\npl\n$2\nph\n$2\nps\n$2\npz\n$2\npx\n$2\npj\n"
  },
  {
    "path": "fuzz/seeds/resp/object.resp",
    "content": "*3\n$3\nSET\n$5\nmykey\n$3\nval\n*3\n$6\nOBJECT\n$8\nENCODING\n$5\nmykey\n"
  },
  {
    "path": "fuzz/seeds/resp/pfadd.resp",
    "content": "*5\n$5\nPFADD\n$4\nhll1\n$1\na\n$1\nb\n$1\nc\n*2\n$7\nPFCOUNT\n$4\nhll1\n"
  },
  {
    "path": "fuzz/seeds/resp/ping.resp",
    "content": "*1\r\n$4\r\nPING\r\n"
  },
  {
    "path": "fuzz/seeds/resp/pipeline.resp",
    "content": "*1\n$4\nPING\n*3\n$3\nSET\n$1\na\n$1\n1\n*2\n$4\nINCR\n$1\na\n*2\n$3\nGET\n$1\na\n*2\n$3\nDEL\n$1\na\n"
  },
  {
    "path": "fuzz/seeds/resp/pubsub_ops.resp",
    "content": "*3\n$7\nPUBLISH\n$4\nchan\n$5\nhello\n*2\n$6\nPUBSUB\n$8\nCHANNELS\n*3\n$6\nPUBSUB\n$6\nNUMSUB\n$4\nchan\n"
  },
  {
    "path": "fuzz/seeds/resp/pubsub_ops2.resp",
    "content": "*3\n$7\nPUBLISH\n$5\nchan1\n$3\nmsg\n*3\n$7\nPUBLISH\n$5\nchan2\n$4\nmsg2\n*2\n$6\nPUBSUB\n$8\nCHANNELS\n*3\n$6\nPUBSUB\n$6\nNUMSUB\n$5\nchan1\n*2\n$6\nPUBSUB\n$8\nNUMPAT\n*2\n$9\nSUBSCRIBE\n$5\nchan1\n*2\n$11\nUNSUBSCRIBE\n$5\nchan1\n*2\n$10\nPSUBSCRIBE\n$5\nchan*\n*2\n$12\nPUNSUBSCRIBE\n$5\nchan*\n*2\n$10\nSSUBSCRIBE\n$5\nchan1\n*3\n$8\nSPUBLISH\n$5\nchan1\n$4\nsmsg\n"
  },
  {
    "path": "fuzz/seeds/resp/rename.resp",
    "content": "*3\r\n$3\r\nSET\r\n$3\r\nfoo\r\n$5\r\nhello\r\n*3\r\n$6\r\nRENAME\r\n$3\r\nfoo\r\n$3\r\nbar\r\n"
  },
  {
    "path": "fuzz/seeds/resp/rpoplpush.resp",
    "content": "*3\r\n$5\r\nLPUSH\r\n$3\r\nsrc\r\n$1\r\na\r\n*3\r\n$5\r\nLPUSH\r\n$3\r\nsrc\r\n$1\r\nb\r\n*3\r\n$9\r\nRPOPLPUSH\r\n$3\r\nsrc\r\n$3\r\ndst\r\n"
  },
  {
    "path": "fuzz/seeds/resp/sadd.resp",
    "content": "*3\n$4\nSADD\n$3\nset\n$6\nmember\n"
  },
  {
    "path": "fuzz/seeds/resp/scan_hscan.resp",
    "content": "*6\n$4\nHSET\n$1\nh\n$2\nf1\n$2\nv1\n$2\nf2\n$2\nv2\n*3\n$5\nHSCAN\n$1\nh\n$1\n0\n"
  },
  {
    "path": "fuzz/seeds/resp/script_ops.resp",
    "content": "*3\n$4\nEVAL\n$28\nreturn redis.call('PING')\n$1\n0\n*4\n$4\nEVAL\n$44\nreturn redis.call('SET', KEYS[1], ARGV[1])\n$1\n1\n$2\nek\n$2\nev\n*4\n$7\nEVAL_RO\n$37\nreturn redis.call('GET', KEYS[1])\n$1\n1\n$2\nek\n*2\n$6\nSCRIPT\n$5\nFLUSH\n"
  },
  {
    "path": "fuzz/seeds/resp/script_ops2.resp",
    "content": "*4\n$4\nEVAL\n$44\nreturn redis.call('SET', KEYS[1], ARGV[1])\n$1\n1\n$3\nesk\n$5\nesval\n*4\n$7\nEVAL_RO\n$37\nreturn redis.call('GET', KEYS[1])\n$1\n1\n$3\nesk\n*3\n$6\nSCRIPT\n$5\nFLUSH\n$5\nASYNC\n*3\n$6\nSCRIPT\n$6\nEXISTS\n$40\ne0e1f9fabfc9d4800c877a703b823ac0578ff831\n*4\n$8\nEVALSHA\n$40\ne0e1f9fabfc9d4800c877a703b823ac0578ff831\n$1\n0\n*4\n$11\nEVALSHA_RO\n$40\ne0e1f9fabfc9d4800c877a703b823ac0578ff831\n$1\n0\n"
  },
  {
    "path": "fuzz/seeds/resp/sdiffstore.resp",
    "content": "*4\r\n$4\r\nSADD\r\n$2\r\ns1\r\n$1\r\na\r\n$1\r\nb\r\n*3\r\n$4\r\nSADD\r\n$2\r\ns2\r\n$1\r\nb\r\n*4\r\n$10\r\nSDIFFSTORE\r\n$3\r\ndst\r\n$2\r\ns1\r\n$2\r\ns2\r\n"
  },
  {
    "path": "fuzz/seeds/resp/search_ops.resp",
    "content": "*8\n$9\nFT.CREATE\n$5\nmyidx\n$2\nON\n$4\nHASH\n$6\nSCHEMA\n$5\ntitle\n$4\nTEXT\n$5\nscore\n$7\nNUMERIC\n*3\n$7\nFT.INFO\n$5\nmyidx\n*8\n$4\nHSET\n$4\ndoc1\n$5\ntitle\n$5\nhello\n$5\nscore\n$1\n1\n*8\n$4\nHSET\n$4\ndoc2\n$5\ntitle\n$5\nworld\n$5\nscore\n$1\n2\n*3\n$9\nFT.SEARCH\n$5\nmyidx\n$5\nhello\n*5\n$9\nFT.SEARCH\n$5\nmyidx\n$1\n*\n$5\nLIMIT\n$1\n0\n$1\n5\n*2\n$8\nFT._LIST\n*3\n$12\nFT.DROPINDEX\n$5\nmyidx\n"
  },
  {
    "path": "fuzz/seeds/resp/search_ops2.resp",
    "content": "*8\n$9\nFT.CREATE\n$5\nidx2\n$2\nON\n$4\nHASH\n$6\nPREFIX\n$1\n1\n$4\ndoc:\n$6\nSCHEMA\n$5\ntitle\n$4\nTEXT\n$5\nscore\n$7\nNUMERIC\n*4\n$4\nHSET\n$5\ndoc:1\n$5\ntitle\n$5\nhello\n$5\nscore\n$1\n1\n*4\n$4\nHSET\n$5\ndoc:2\n$5\ntitle\n$5\nworld\n$5\nscore\n$1\n2\n*3\n$9\nFT.SEARCH\n$5\nidx2\n$5\nhello\n*7\n$9\nFT.SEARCH\n$5\nidx2\n$1\n*\n$6\nSORTBY\n$5\nscore\n$5\nLIMIT\n$1\n0\n$1\n1\n*2\n$7\nFT.INFO\n$5\nidx2\n*5\n$8\nFT.ALTER\n$5\nidx2\n$6\nSCHEMA\n$3\nADD\n$3\ntag\n$3\nTAG\n*3\n$9\nFT.CONFIG\n$3\nGET\n$1\n*\n*3\n$9\nFT.CONFIG\n$3\nSET\n$14\nMAXSEARCHRESULTS\n$5\n10000\n*6\n$12\nFT.SYNUPDATE\n$5\nidx2\n$2\ng1\n$5\nhello\n$2\nhi\n$3\nhey\n*2\n$10\nFT.SYNDUMP\n$5\nidx2\n*3\n$12\nFT.AGGREGATE\n$5\nidx2\n$1\n*\n*2\n$10\nFT.TAGVALS\n$5\nidx2\n$3\ntag\n*2\n$12\nFT.DROPINDEX\n$5\nidx2\n"
  },
  {
    "path": "fuzz/seeds/resp/server_ops.resp",
    "content": "*2\n$4\nINFO\n$6\nserver\n*2\n$4\nINFO\n$6\nmemory\n*2\n$4\nINFO\n$11\nreplication\n*1\n$6\nDBSIZE\n*3\n$6\nCLIENT\n$7\nSETNAME\n$4\nfuzz\n*2\n$6\nCLIENT\n$7\nGETNAME\n*2\n$6\nCLIENT\n$2\nID\n*2\n$6\nCLIENT\n$4\nINFO\n*3\n$6\nCONFIG\n$3\nGET\n$9\nmaxmemory\n*2\n$4\nROLE\n*2\n$7\nLASTSAVE\n*3\n$6\nMEMORY\n$5\nUSAGE\n$4\nnokey\n*2\n$7\nSLOWLOG\n$3\nLEN\n*2\n$7\nLATENCY\n$6\nLATEST\n*3\n$5\nHELLO\n$1\n2\n"
  },
  {
    "path": "fuzz/seeds/resp/server_ops2.resp",
    "content": "*2\n$4\nINFO\n$3\nall\n*2\n$6\nCLIENT\n$4\nLIST\n*3\n$6\nCLIENT\n$4\nINFO\n*2\n$7\nCLUSTER\n$4\nINFO\n*2\n$7\nCLUSTER\n$5\nMYID\n*2\n$7\nCLUSTER\n$5\nSLOTS\n*1\n$8\nREADONLY\n*1\n$9\nREADWRITE\n*2\n$7\nSLOWLOG\n$3\nGET\n*2\n$7\nLATENCY\n$7\nHISTORY\n$5\nevent\n*2\n$6\nMEMORY\n$6\nDOCTOR\n*2\n$6\nMEMORY\n$5\nSTATS\n*3\n$5\nHELLO\n$1\n3\n*4\n$4\nDFLY\n$7\nCLUSTER\n$6\nCONFIG\n$2\n{}\n*2\n$1\nQUIT\n"
  },
  {
    "path": "fuzz/seeds/resp/set.resp",
    "content": "*3\r\n$3\r\nSET\r\n$3\r\nkey\r\n$5\r\nvalue\r\n"
  },
  {
    "path": "fuzz/seeds/resp/set_ops.resp",
    "content": "*6\n$4\nSADD\n$2\ns1\n$1\na\n$1\nb\n$1\nc\n$1\nd\n*5\n$4\nSADD\n$2\ns2\n$1\nc\n$1\nd\n$1\ne\n*2\n$5\nSCARD\n$2\ns1\n*2\n$8\nSMEMBERS\n$2\ns1\n*3\n$9\nSISMEMBER\n$2\ns1\n$1\na\n*4\n$10\nSMISMEMBER\n$2\ns1\n$1\na\n$1\nz\n*3\n$4\nSREM\n$2\ns1\n$1\nd\n*3\n$5\nSMOVE\n$2\ns1\n$2\ns2\n$1\na\n*3\n$6\nSUNION\n$2\ns1\n$2\ns2\n*3\n$5\nSINTER\n$2\ns1\n$2\ns2\n*3\n$5\nSDIFF\n$2\ns1\n$2\ns2\n*4\n$11\nSUNIONSTORE\n$4\nsdst\n$2\ns1\n$2\ns2\n*4\n$11\nSINTERSTORE\n$4\nidst\n$2\ns1\n$2\ns2\n*4\n$10\nSDIFFSTORE\n$4\nddst\n$2\ns1\n$2\ns2\n*4\n$10\nSINTERCARD\n$1\n2\n$2\ns1\n$2\ns2\n*3\n$4\nSPOP\n$2\ns1\n$1\n1\n*3\n$5\nSSCAN\n$2\ns2\n$1\n0\n"
  },
  {
    "path": "fuzz/seeds/resp/set_ops2.resp",
    "content": "*4\n$4\nSADD\n$3\nsx1\n$1\na\n$1\nb\n*4\n$6\nSADDEX\n$3\nsx1\n$3\n300\n$1\nc\n"
  },
  {
    "path": "fuzz/seeds/resp/smove.resp",
    "content": "*3\r\n$4\r\nSADD\r\n$3\r\nsrc\r\n$1\r\na\r\n*3\r\n$4\r\nSADD\r\n$3\r\ndst\r\n$1\r\nb\r\n*4\r\n$5\r\nSMOVE\r\n$3\r\nsrc\r\n$3\r\ndst\r\n$1\r\na\r\n"
  },
  {
    "path": "fuzz/seeds/resp/sort.resp",
    "content": "*4\r\n$5\r\nLPUSH\r\n$4\r\nlist\r\n$1\r\n3\r\n$1\r\n1\r\n*3\r\n$5\r\nLPUSH\r\n$4\r\nlist\r\n$1\r\n2\r\n*4\r\n$4\r\nSORT\r\n$4\r\nlist\r\n$5\r\nSTORE\r\n$6\r\nsorted\r\n"
  },
  {
    "path": "fuzz/seeds/resp/srandmember.resp",
    "content": "*7\n$4\nSADD\n$5\nmyset\n$1\na\n$1\nb\n$1\nc\n$1\nd\n$1\ne\n*3\n$11\nSRANDMEMBER\n$5\nmyset\n$1\n3\n"
  },
  {
    "path": "fuzz/seeds/resp/stream_ops.resp",
    "content": "*5\n$4\nXADD\n$2\nst\n$1\n*\n$1\nk\n$1\nv\n*5\n$4\nXADD\n$2\nst\n$1\n*\n$1\nk\n$2\nv2\n*5\n$4\nXADD\n$2\nst\n$1\n*\n$1\nk\n$2\nv3\n*2\n$4\nXLEN\n$2\nst\n*4\n$6\nXRANGE\n$2\nst\n$1\n-\n$1\n+\n*4\n$9\nXREVRANGE\n$2\nst\n$1\n+\n$1\n-\n*4\n$5\nXTRIM\n$2\nst\n$6\nMAXLEN\n$1\n2\n*4\n$6\nXGROUP\n$6\nCREATE\n$2\nst\n$2\ng1\n$1\n0\n*7\n$10\nXREADGROUP\n$5\nGROUP\n$2\ng1\n$2\nc1\n$7\nSTREAMS\n$2\nst\n$1\n>\n*4\n$4\nXACK\n$2\nst\n$2\ng1\n$3\n0-1\n*4\n$8\nXPENDING\n$2\nst\n$2\ng1\n$1\n-\n$1\n+\n$2\n10\n*4\n$5\nXINFO\n$6\nSTREAM\n$2\nst\n*3\n$6\nXSETID\n$2\nst\n$3\n0-5\n"
  },
  {
    "path": "fuzz/seeds/resp/stream_ops2.resp",
    "content": "*5\n$4\nXADD\n$3\nsx1\n$1\n*\n$1\nk\n$2\nv1\n*5\n$4\nXADD\n$3\nsx1\n$1\n*\n$1\nk\n$2\nv2\n*5\n$4\nXADD\n$3\nsx1\n$1\n*\n$1\nk\n$2\nv3\n*4\n$6\nXGROUP\n$6\nCREATE\n$3\nsx1\n$3\nsg1\n$1\n0\n*7\n$10\nXREADGROUP\n$5\nGROUP\n$3\nsg1\n$2\nc1\n$7\nSTREAMS\n$3\nsx1\n$1\n>\n*5\n$6\nXCLAIM\n$3\nsx1\n$3\nsg1\n$2\nc1\n$1\n0\n$3\n0-1\n*6\n$10\nXAUTOCLAIM\n$3\nsx1\n$3\nsg1\n$2\nc1\n$1\n0\n$3\n0-0\n*3\n$4\nXDEL\n$3\nsx1\n$3\n0-1\n"
  },
  {
    "path": "fuzz/seeds/resp/string_ops.resp",
    "content": "*3\n$3\nSET\n$2\nsk\n$5\nhello\n*3\n$6\nAPPEND\n$2\nsk\n$6\n_world\n*2\n$6\nSTRLEN\n$2\nsk\n*4\n$8\nGETRANGE\n$2\nsk\n$1\n0\n$1\n4\n*4\n$8\nSETRANGE\n$2\nsk\n$1\n6\n$3\nfoo\n*3\n$5\nSETEX\n$3\nsk2\n$2\n60\n$4\ntemp\n*3\n$6\nPSETEX\n$3\nsk3\n$5\n60000\n$4\ntemp\n*3\n$5\nSETNX\n$3\nsk4\n$3\nnew\n*3\n$6\nGETSET\n$2\nsk\n$3\nold\n*6\n$4\nMSET\n$2\nm1\n$2\nv1\n$2\nm2\n$2\nv2\n*3\n$4\nMGET\n$2\nm1\n$2\nm2\n*3\n$3\nSET\n$2\nci\n$1\n0\n*2\n$4\nINCR\n$2\nci\n*2\n$4\nDECR\n$2\nci\n*3\n$6\nINCRBY\n$2\nci\n$2\n10\n*3\n$6\nDECRBY\n$2\nci\n$1\n5\n*3\n$12\nINCRBYFLOAT\n$2\nci\n$3\n1.5\n*2\n$6\nGETDEL\n$2\nm2\n*4\n$5\nGETEX\n$2\nm1\n$2\nEX\n$2\n60\n"
  },
  {
    "path": "fuzz/seeds/resp/string_ops2.resp",
    "content": "*4\n$5\nMSETNX\n$2\nnx1\n$2\nv1\n$2\nnx2\n$2\nv2\n*3\n$7\nPREPEND\n$2\nnx1\n$3\npre\n*3\n$6\nSUBSTR\n$2\nnx1\n$1\n0\n$1\n3\n*2\n$6\nDIGEST\n$2\nnx1\n*4\n$5\nSETEX\n$2\nsx\n$1\n3\n$3\nval\n*4\n$6\nPSETEX\n$2\npx\n$4\n3000\n$3\nval\n*3\n$5\nGETEX\n$2\nsx\n$2\nEX\n$1\n5\n*3\n$6\nAPPEND\n$2\nsx\n$4\n_end\n*3\n$8\nSETRANGE\n$2\nsx\n$1\n0\n$3\nNEW\n*2\n$6\nGETDEL\n$2\npx\n"
  },
  {
    "path": "fuzz/seeds/resp/subscribe.resp",
    "content": "*2\n$9\nSUBSCRIBE\n$9\nmychannel\n"
  },
  {
    "path": "fuzz/seeds/resp/throttle.resp",
    "content": "*6\n$11\nCL.THROTTLE\n$6\nmyrate\n$2\n10\n$2\n30\n$2\n60\n$1\n1\n"
  },
  {
    "path": "fuzz/seeds/resp/transaction.resp",
    "content": "*1\r\n$5\r\nMULTI\r\n*3\r\n$3\r\nSET\r\n$1\r\na\r\n$1\r\n1\r\n*1\r\n$4\r\nEXEC\r\n"
  },
  {
    "path": "fuzz/seeds/resp/transaction_ops2.resp",
    "content": "*3\n$3\nSET\n$2\ntk\n$3\nval\n*1\n$5\nWATCH\n$2\ntk\n*1\n$5\nMULTI\n*3\n$3\nSET\n$2\ntk\n$4\nnew1\n*1\n$7\nDISCARD\n*1\n$7\nUNWATCH\n*1\n$5\nMULTI\n*3\n$3\nSET\n$2\ntk\n$4\nnew2\n*1\n$4\nEXEC\n"
  },
  {
    "path": "fuzz/seeds/resp/watch.resp",
    "content": "*2\r\n$5\r\nWATCH\r\n$1\r\na\r\n*1\r\n$5\r\nMULTI\r\n*3\r\n$3\r\nSET\r\n$1\r\na\r\n$1\r\n1\r\n*1\r\n$4\r\nEXEC\r\n"
  },
  {
    "path": "fuzz/seeds/resp/watch_multi.resp",
    "content": "*2\n$5\nWATCH\n$1\nk\n*1\n$5\nMULTI\n*3\n$3\nSET\n$1\nk\n$1\n1\n*1\n$4\nEXEC\n"
  },
  {
    "path": "fuzz/seeds/resp/xadd.resp",
    "content": "*5\n$4\nXADD\n$6\nstream\n$1\n*\n$5\nfield\n$5\nvalue\n"
  },
  {
    "path": "fuzz/seeds/resp/xread.resp",
    "content": "*5\n$5\nXREAD\n$5\nCOUNT\n$1\n1\n$7\nSTREAMS\n$6\nstream\n$1\n0\n"
  },
  {
    "path": "fuzz/seeds/resp/zadd.resp",
    "content": "*5\n$4\nZADD\n$4\nzset\n$1\n1\n$6\nmember\n"
  },
  {
    "path": "fuzz/seeds/resp/zmpop.resp",
    "content": "*8\n$4\nZADD\n$5\nmyzst\n$1\n1\n$1\na\n$1\n2\n$1\nb\n$1\n3\n$1\nc\n*4\n$5\nZMPOP\n$1\n1\n$5\nmyzst\n$3\nMIN\n"
  },
  {
    "path": "fuzz/seeds/resp/zrangebyscore.resp",
    "content": "*5\n$13\nZRANGEBYSCORE\n$4\nzset\n$4\n-inf\n$4\n+inf\n$10\nWITHSCORES\n"
  },
  {
    "path": "fuzz/seeds/resp/zset_ops.resp",
    "content": "*8\n$4\nZADD\n$2\nz1\n$1\n1\n$1\na\n$1\n2\n$1\nb\n$1\n3\n$1\nc\n*6\n$4\nZADD\n$2\nz2\n$1\n2\n$1\nb\n$1\n4\n$1\nd\n*3\n$7\nZINCRBY\n$2\nz1\n$1\n5\n$1\na\n*3\n$5\nZSCORE\n$2\nz1\n$1\na\n*4\n$7\nZMSCORE\n$2\nz1\n$1\na\n$1\nc\n*2\n$5\nZCARD\n$2\nz1\n*4\n$6\nZCOUNT\n$2\nz1\n$4\n-inf\n$4\n+inf\n*3\n$5\nZRANK\n$2\nz1\n$1\nb\n*3\n$8\nZREVRANK\n$2\nz1\n$1\nb\n*4\n$6\nZRANGE\n$2\nz1\n$1\n0\n$2\n-1\n*4\n$9\nZREVRANGE\n$2\nz1\n$1\n0\n$2\n-1\n*4\n$12\nZRANGEBYLEX\n$2\nz1\n$1\n-\n$1\n+\n*5\n$12\nZRANGEBYSCORE\n$2\nz1\n$1\n1\n$1\n3\n$10\nWITHSCORES\n*4\n$15\nZREMRANGEBYRANK\n$2\nz2\n$1\n0\n$1\n0\n*4\n$16\nZREMRANGEBYSCORE\n$2\nz2\n$1\n0\n$1\n2\n*3\n$7\nZPOPMIN\n$2\nz1\n$1\n1\n*3\n$7\nZPOPMAX\n$2\nz1\n$1\n1\n*3\n$6\nZUNION\n$1\n2\n$2\nz1\n$2\nz2\n*4\n$11\nZUNIONSTORE\n$4\nzdst\n$1\n2\n$2\nz1\n*3\n$5\nZSCAN\n$2\nz1\n$1\n0\n*3\n$11\nZRANDMEMBER\n$2\nz1\n$1\n2\n"
  },
  {
    "path": "fuzz/seeds/resp/zset_ops2.resp",
    "content": "*8\n$4\nZADD\n$3\nza1\n$1\n1\n$1\na\n$1\n2\n$1\nb\n$1\n3\n$1\nc\n*8\n$4\nZADD\n$3\nza2\n$1\n2\n$1\nb\n$1\n4\n$1\nd\n$1\n5\n$1\ne\n*4\n$6\nZINTER\n$1\n2\n$3\nza1\n$3\nza2\n*4\n$11\nZINTERSTORE\n$4\nzint\n$1\n2\n$3\nza1\n$3\nza2\n*5\n$10\nZINTERCARD\n$1\n2\n$3\nza1\n$3\nza2\n*4\n$5\nZDIFF\n$1\n2\n$3\nza1\n$3\nza2\n*4\n$10\nZDIFFSTORE\n$5\nzdiff\n$1\n2\n$3\nza1\n*3\n$4\nZREM\n$3\nza2\n$1\nd\n*4\n$14\nZREMRANGEBYLEX\n$3\nza1\n$3\n[a]\n$3\n[b]\n*6\n$11\nZRANGESTORE\n$5\nzrngs\n$3\nza1\n$1\n0\n$2\n-1\n$7\nBYSCORE\n*4\n$9\nZLEXCOUNT\n$3\nza1\n$1\n-\n$1\n+\n*6\n$15\nZREVRANGEBYSCORE\n$3\nza1\n$4\n+inf\n$4\n-inf\n$10\nWITHSCORES\n$5\nLIMIT\n$1\n0\n$1\n2\n*4\n$13\nZREVRANGEBYLEX\n$3\nza1\n$1\n+\n$1\n-\n"
  },
  {
    "path": "fuzz/triage_crashes.sh",
    "content": "#!/usr/bin/env bash\n# Triage AFL++ crash artifacts: replay each crash against a fresh Dragonfly\n# instance and report whether it's confirmed or a false positive.\n#\n# Usage:\n#   ./fuzz/triage_crashes.sh <dragonfly_binary> <mode> <crashes.zip>\n#\n#   dragonfly_binary  Path to Dragonfly binary\n#   mode              Protocol: 'resp' or 'memcache'\n#   crashes.zip       .zip downloaded from CI artifacts (contains crash-*.tar.gz files)\n#\n# Examples:\n#   ./fuzz/triage_crashes.sh ./build-dbg/dragonfly resp fuzz-long-resp-crashes-35.zip\n#   ./fuzz/triage_crashes.sh ./build-dbg/dragonfly memcache fuzz-long-memcache-crashes-35.zip\n\nset -euo pipefail\n\n# ─── Colors ───────────────────────────────────────────────────────────────────\nRED='\\033[0;31m'\nGREEN='\\033[0;32m'\nYELLOW='\\033[1;33m'\nCYAN='\\033[0;36m'\nBOLD='\\033[1m'\nNC='\\033[0m'\n\n# ─── Config ───────────────────────────────────────────────────────────────────\nRESP_PORT=6379\nMC_PORT=11211\nSTARTUP_TIMEOUT=5   # seconds to wait for Dragonfly to accept connections\nPOST_REPLAY_WAIT=3  # seconds to wait after replay for Dragonfly to crash\n\nprint_info()  { echo -e \"${GREEN}[INFO]${NC}  $1\"; }\nprint_error() { echo -e \"${RED}[ERROR]${NC} $1\"; }\nprint_warn()  { echo -e \"${YELLOW}[WARN]${NC}  $1\"; }\n\nusage() {\n    echo -e \"${BOLD}Usage:${NC} $0 <dragonfly_binary> <mode> <crashes.zip>\"\n    echo \"\"\n    echo \"  dragonfly_binary  Path to Dragonfly binary\"\n    echo \"  mode              Protocol: 'resp' or 'memcache'\"\n    echo \"  crashes.zip       .zip downloaded from CI artifacts\"\n    echo \"\"\n    echo \"Examples:\"\n    echo \"  $0 ./build-dbg/dragonfly resp fuzz-long-resp-crashes-35.zip\"\n    echo \"  $0 ./build-dbg/dragonfly memcache fuzz-long-memcache-crashes-35.zip\"\n    exit 1\n}\n\n# ─── Args ─────────────────────────────────────────────────────────────────────\nif [[ $# -lt 3 ]]; then\n    usage\nfi\n\nDRAGONFLY_BIN=\"$(realpath \"$1\")\"\nMODE=\"$2\"\nCRASHES_ZIP=\"$(realpath \"$3\")\"\n\nif [[ ! -f \"$DRAGONFLY_BIN\" ]]; then\n    print_error \"Dragonfly binary not found: $DRAGONFLY_BIN\"\n    exit 1\nfi\nif [[ \"$MODE\" != \"resp\" && \"$MODE\" != \"memcache\" ]]; then\n    print_error \"Mode must be 'resp' or 'memcache', got: $MODE\"\n    exit 1\nfi\nif [[ ! -f \"$CRASHES_ZIP\" ]]; then\n    print_error \"Crashes zip not found: $CRASHES_ZIP\"\n    exit 1\nfi\nif [[ \"$CRASHES_ZIP\" != *.zip ]]; then\n    print_error \"Expected a .zip file (CI artifact), got: $CRASHES_ZIP\"\n    exit 1\nfi\n\n# ─── Working directory ────────────────────────────────────────────────────────\nWORK_DIR=$(mktemp -d /tmp/triage_XXXXXX)\nDF_PID=\"\"\ncleanup() {\n    [[ -n \"$DF_PID\" ]] && kill -9 \"$DF_PID\" 2>/dev/null || true\n    rm -rf \"$WORK_DIR\"\n}\ntrap cleanup EXIT INT TERM\n\n# ─── Extract zip ──────────────────────────────────────────────────────────────\nprint_info \"Extracting $(basename \"$CRASHES_ZIP\")...\"\nunzip -q \"$CRASHES_ZIP\" -d \"$WORK_DIR/input\"\nCRASHES_DIR=\"$WORK_DIR/input\"\n\n# ─── Find crash archives ──────────────────────────────────────────────────────\nmapfile -t CRASH_ARCHIVES < <(find \"$CRASHES_DIR\" -name 'crash-*.tar.gz' | sort)\nTOTAL=${#CRASH_ARCHIVES[@]}\n\nif [[ $TOTAL -eq 0 ]]; then\n    print_error \"No crash-*.tar.gz files found in: $CRASHES_DIR\"\n    exit 1\nfi\n\nprint_info \"Found $TOTAL crash archive(s)  mode=$MODE  binary=$DRAGONFLY_BIN\"\necho \"\"\n\n# ─── Locate replay_crash.py ───────────────────────────────────────────────────\nSCRIPT_DIR=\"$(cd \"$(dirname \"${BASH_SOURCE[0]}\")\" && pwd)\"\nREPLAY_SCRIPT=\"$SCRIPT_DIR/replay_crash.py\"\nif [[ ! -f \"$REPLAY_SCRIPT\" ]]; then\n    print_error \"replay_crash.py not found at: $REPLAY_SCRIPT\"\n    print_error \"Run this script from the repository root or fuzz/ directory.\"\n    exit 1\nfi\n\n# ─── Helpers ──────────────────────────────────────────────────────────────────\n# Wait until a TCP port accepts connections\nwait_for_port() {\n    local host=\"$1\" port=\"$2\" timeout_sec=\"$3\"\n    local deadline=$((SECONDS + timeout_sec))\n    while [[ $SECONDS -lt $deadline ]]; do\n        if (>/dev/tcp/\"$host\"/\"$port\") 2>/dev/null; then\n            return 0\n        fi\n        sleep 0.2\n    done\n    return 1\n}\n\n# Wait until a TCP port stops accepting connections\nwait_port_free() {\n    local port=\"$1\" timeout_sec=\"${2:-5}\"\n    local deadline=$((SECONDS + timeout_sec))\n    while [[ $SECONDS -lt $deadline ]]; do\n        if ! (>/dev/tcp/127.0.0.1/\"$port\") 2>/dev/null; then\n            return 0\n        fi\n        sleep 0.2\n    done\n    return 1\n}\n\n# Show crash info from glog log directory.\nshow_crash_log() {\n    local log_dir=\"$1\"\n    local fatal_link=\"$log_dir/dragonfly.FATAL\"\n\n    if [[ -f \"$fatal_link\" ]]; then\n        # Skip the 4-line glog file header, show crash message + stack trace\n        sed -n '5,$p' \"$fatal_link\" | head -40 | sed 's/^/    /'\n        return\n    fi\n\n    # No FATAL file — fall back to tail of INFO log\n    local info_log\n    info_log=$(ls -t \"$log_dir\"/dragonfly.*.log.INFO.* 2>/dev/null | head -1 || true)\n    if [[ -n \"$info_log\" ]]; then\n        echo \"    (no FATAL log — last INFO log lines:)\"\n        tail -20 \"$info_log\" | sed 's/^/    /'\n    else\n        echo \"    (no log files found in $log_dir)\"\n    fi\n}\n\n# ─── Main loop ────────────────────────────────────────────────────────────────\nCONFIRMED=0\nFALSE_POSITIVE=0\nFAILED=0\n\nfor CRASH_ARCHIVE in \"${CRASH_ARCHIVES[@]}\"; do\n    CRASH_NAME=$(basename \"$CRASH_ARCHIVE\" .tar.gz)   # crash-000000\n    CRASH_ID=\"${CRASH_NAME#crash-}\"                    # 000000\n    IDX=$((CONFIRMED + FALSE_POSITIVE + FAILED + 1))\n\n    echo -e \"${CYAN}${BOLD}─── [$IDX/$TOTAL] Crash ${CRASH_ID} ───${NC}\"\n\n    # Extract this crash archive\n    EXTRACT_DIR=\"$WORK_DIR/current_crash\"\n    rm -rf \"$EXTRACT_DIR\"\n    mkdir -p \"$EXTRACT_DIR\"\n    tar -xzf \"$CRASH_ARCHIVE\" -C \"$EXTRACT_DIR\"\n\n    CRASH_DATA_DIR=\"$EXTRACT_DIR/${CRASH_NAME}/crashes\"\n    if [[ ! -d \"$CRASH_DATA_DIR\" ]]; then\n        print_warn \"Expected directory not found: $CRASH_DATA_DIR — skipping\"\n        FAILED=$((FAILED + 1))\n        echo \"\"\n        continue\n    fi\n\n    # Kill any leftover process on the port from a previous iteration\n    if (>/dev/tcp/127.0.0.1/\"$RESP_PORT\") 2>/dev/null; then\n        print_warn \"Port $RESP_PORT still in use — waiting...\"\n        wait_port_free \"$RESP_PORT\" 5 || {\n            print_error \"Port $RESP_PORT still blocked after 5s — cannot start Dragonfly\"\n            FAILED=$((FAILED + 1))\n            echo \"\"\n            continue\n        }\n    fi\n\n    # Start Dragonfly — use --log_dir so glog writes to separate per-level files\n    # (dragonfly.FATAL symlink is created on crash and contains the fatal message)\n    LOG_DIR=\"$WORK_DIR/logs_${CRASH_ID}\"\n    mkdir -p \"$LOG_DIR\"\n\n    # Mirror the exact flags used by run_fuzzer.sh so replay runs in the same\n    # server configuration as when the crash was found.\n    # Missing rename_command flags are the most common cause of false positives:\n    # if FLUSHALL/FLUSHDB/SHUTDOWN are not disabled, they execute during replay,\n    # wiping state or shutting down the server before the crash can trigger.\n    DF_ARGS=(\n        --port \"$RESP_PORT\"\n        --log_dir=\"$LOG_DIR\"\n        --proactor_threads 1\n        --dbfilename=\"\"\n        --omit_basic_usage\n        --rename_command=SHUTDOWN=\n        --rename_command=DEBUG=\n        --rename_command=FLUSHALL=\n        --rename_command=FLUSHDB=\n        --max_bulk_len=1048576\n    )\n    [[ \"$MODE\" == \"memcache\" ]] && DF_ARGS+=(--memcached_port=\"$MC_PORT\")\n\n    \"$DRAGONFLY_BIN\" \"${DF_ARGS[@]}\" >/dev/null 2>&1 &\n    DF_PID=$!\n\n    if ! wait_for_port 127.0.0.1 \"$RESP_PORT\" \"$STARTUP_TIMEOUT\"; then\n        print_error \"Dragonfly did not start within ${STARTUP_TIMEOUT}s (crash $CRASH_ID)\"\n        kill -9 \"$DF_PID\" 2>/dev/null || true\n        wait \"$DF_PID\" 2>/dev/null && true || true\n        DF_PID=\"\"\n        FAILED=$((FAILED + 1))\n        echo \"\"\n        continue\n    fi\n    # In memcache mode also verify the memcache listener is up before replaying\n    if [[ \"$MODE\" == \"memcache\" ]] && ! wait_for_port 127.0.0.1 \"$MC_PORT\" 3; then\n        print_error \"Memcache port $MC_PORT not ready (crash $CRASH_ID)\"\n        kill -9 \"$DF_PID\" 2>/dev/null || true\n        wait \"$DF_PID\" 2>/dev/null && true || true\n        DF_PID=\"\"\n        FAILED=$((FAILED + 1))\n        echo \"\"\n        continue\n    fi\n\n    # Replay the crash\n    REPLAY_PORT=\"$RESP_PORT\"\n    [[ \"$MODE\" == \"memcache\" ]] && REPLAY_PORT=\"$MC_PORT\"\n\n    if ! python3 \"$REPLAY_SCRIPT\" \\\n            \"$CRASH_DATA_DIR\" \"$CRASH_ID\" 127.0.0.1 \"$REPLAY_PORT\" \\\n            >/dev/null 2>&1; then\n        print_warn \"Replay script failed for crash $CRASH_ID — skipping\"\n        kill -9 \"$DF_PID\" 2>/dev/null || true\n        wait \"$DF_PID\" 2>/dev/null && true || true\n        DF_PID=\"\"\n        FAILED=$((FAILED + 1))\n        echo \"\"\n        continue\n    fi\n\n    # Wait for Dragonfly to die (poll every 100ms)\n    DIED=false\n    for _ in $(seq 1 $((POST_REPLAY_WAIT * 10))); do\n        if ! kill -0 \"$DF_PID\" 2>/dev/null; then\n            DIED=true\n            break\n        fi\n        sleep 0.1\n    done\n\n    if ! $DIED; then\n        echo -e \"  ${YELLOW}FALSE POSITIVE${NC} — Dragonfly alive after replay\"\n        FALSE_POSITIVE=$((FALSE_POSITIVE + 1))\n        kill -9 \"$DF_PID\" 2>/dev/null || true\n        wait \"$DF_PID\" 2>/dev/null && true || true\n        DF_PID=\"\"\n    else\n        # Capture signal without triggering set -e (assignment always exits 0)\n        wait \"$DF_PID\" 2>/dev/null && EXIT_CODE=0 || EXIT_CODE=$?\n        DF_PID=\"\"\n        # Sanity check: exit code > 128 means killed by signal; otherwise not a signal death\n        if [[ $EXIT_CODE -le 128 ]]; then\n            echo -e \"  ${YELLOW}FALSE POSITIVE${NC} — Dragonfly exited cleanly (code $EXIT_CODE)\"\n            FALSE_POSITIVE=$((FALSE_POSITIVE + 1))\n            echo \"\"\n            continue\n        fi\n        SIGNAL=$((EXIT_CODE - 128))\n        CONFIRMED=$((CONFIRMED + 1))\n\n        if [[ $SIGNAL -eq 6 ]]; then\n            echo -e \"  ${RED}CONFIRMED${NC} — SIGABRT (signal 6) — assertion / LOG(FATAL)\"\n            show_crash_log \"$LOG_DIR\"\n        elif [[ $SIGNAL -eq 11 ]]; then\n            echo -e \"  ${RED}CONFIRMED${NC} — SIGSEGV (signal 11) — segmentation fault\"\n            show_crash_log \"$LOG_DIR\"\n        else\n            echo -e \"  ${RED}CONFIRMED${NC} — signal $SIGNAL (exit code $EXIT_CODE)\"\n            show_crash_log \"$LOG_DIR\"\n        fi\n    fi\n    echo \"\"\ndone\n\n# ─── Summary ──────────────────────────────────────────────────────────────────\necho -e \"${CYAN}${BOLD}═══ Triage Summary ═══${NC}\"\nprintf \"  %-18s %d\\n\" \"Total:\" \"$TOTAL\"\nprintf \"  ${RED}%-18s %d${NC}\\n\" \"Confirmed:\" \"$CONFIRMED\"\nprintf \"  ${YELLOW}%-18s %d${NC}\\n\" \"False positive:\" \"$FALSE_POSITIVE\"\n[[ $FAILED -gt 0 ]] && printf \"  ${RED}%-18s %d${NC}\\n\" \"Failed/skipped:\" \"$FAILED\"\n\n# Exit 1 if any confirmed crashes found\n[[ $CONFIRMED -gt 0 ]] && exit 1\nexit 0\n"
  },
  {
    "path": "go.work",
    "content": "go 1.24.0\n\ntoolchain go1.24.7\n\nuse (\n\t./contrib/charts/dragonfly\n\t./tools/replay\n)\n"
  },
  {
    "path": "go.work.sum",
    "content": "cel.dev/expr v0.16.1/go.mod h1:AsGA5zb3WruAEQeQng1RZdGEXmBj0jvMWh6l5SnNuC8=\ncloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U=\ncloud.google.com/go/auth v0.10.2/go.mod h1:xxA5AqpDrvS+Gkmo9RqrGGRh6WSNKKOXhY3zNOr38tI=\ncloud.google.com/go/auth/oauth2adapt v0.2.5/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8=\ncloud.google.com/go/cloudbuild v1.19.0/go.mod h1:ZGRqbNMrVGhknIIjwASa6MqoRTOpXIVMSI+Ew5DMPuY=\ncloud.google.com/go/compute v1.19.1/go.mod h1:6ylj3a05WF8leseCdIf77NK0g1ey+nj5IKd5/kvShxE=\ncloud.google.com/go/compute/metadata v0.5.2/go.mod h1:C66sj2AluDcIqakBq/M8lw8/ybHgOZqin2obFxa/E5k=\ncloud.google.com/go/iam v1.2.2/go.mod h1:0Ys8ccaZHdI1dEUilwzqng/6ps2YB6vRsjIe00/+6JY=\ncloud.google.com/go/longrunning v0.6.2/go.mod h1:k/vIs83RN4bE3YCswdXC5PFfWVILjm3hpEUlSko4PiI=\ncloud.google.com/go/monitoring v1.21.2/go.mod h1:hS3pXvaG8KgWTSz+dAdyzPrGUYmi2Q+WFX8g2hqVEZU=\ncloud.google.com/go/storage v1.47.0/go.mod h1:Ks0vP374w0PW6jOUameJbapbQKXqkjGd/OJRp2fb9IQ=\ngithub.com/Azure/azure-sdk-for-go v51.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=\ngithub.com/Azure/azure-sdk-for-go/sdk/azcore v1.17.0/go.mod h1:XCW7KnZet0Opnr7HccfUw1PLc4CjHqpcaxW8DHklNkQ=\ngithub.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg=\ngithub.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkYRNWPENUnqx6bJ2xnSDFI2tjwZNuY=\ngithub.com/Azure/azure-sdk-for-go/sdk/resourcemanager/appcontainers/armappcontainers/v3 v3.0.0/go.mod h1:LDN3sr8FJ36sY6ZmMes6Q2vHJ+5r1aFsE3wEo7VbXJg=\ngithub.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0/go.mod h1:5kakwfW5CjC9KK+Q4wjXAg+ShuIm2mBMua0ZFj2C8PE=\ngithub.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=\ngithub.com/Azure/go-autorest/autorest v0.11.20/go.mod h1:o3tqFY+QR40VOlk+pV4d77mORO64jOXSgEnPQgLK6JY=\ngithub.com/Azure/go-autorest/autorest/adal v0.9.13/go.mod h1:W/MM4U6nLxnIskrw4UwWzlHfGjwUS50aOsc/I3yuU8M=\ngithub.com/Azure/go-autorest/autorest/azure/auth v0.5.8/go.mod h1:kxyKZTSfKh8OVFWPAgOgQ/frrJgeYQJPyR5fLFmXko4=\ngithub.com/Azure/go-autorest/autorest/azure/cli v0.4.2/go.mod h1:7qkJkT+j6b+hIpzMOwPChJhTqS8VbsqqgULzMNRugoM=\ngithub.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74=\ngithub.com/Azure/go-autorest/autorest/to v0.4.0/go.mod h1:fE8iZBn7LQR7zH/9XU2NcPR4o9jEImooCeWJcYV/zLE=\ngithub.com/Azure/go-autorest/autorest/validation v0.3.1/go.mod h1:yhLgjC0Wda5DYXl6JAsWyUe4KVNffhoDhG0zVzUMo3E=\ngithub.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8=\ngithub.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU=\ngithub.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI=\ngithub.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.24.1/go.mod h1:itPGVDKf9cC/ov4MdvJ2QZ0khw4bfoo9jzwTJlaxy2k=\ngithub.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.48.1/go.mod h1:jyqM3eLpJ3IbIFDTKVz2rF9T/xWGW0rIriGwnz8l9Tk=\ngithub.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.48.1/go.mod h1:viRWSEhtMZqz1rhwmOVKkWl6SwmVowfL9O2YR5gI2PE=\ngithub.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c=\ngithub.com/agext/levenshtein v1.2.3/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=\ngithub.com/apparentlymart/go-textseg/v13 v13.0.0/go.mod h1:ZK2fH7c4NqDTLtiYLvIkEghdlcqw7yxLeM89kiTRPUo=\ngithub.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4=\ngithub.com/aws/aws-lambda-go v1.47.0/go.mod h1:dpMpZgvWx5vuQJfBt0zqBha60q7Dd7RfgJv23DymV8A=\ngithub.com/aws/aws-sdk-go v1.44.122/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=\ngithub.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d/go.mod h1:6QX/PXZ00z/TKoufEY6K/a0k6AhaJrQKdFe6OfVXsa4=\ngithub.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=\ngithub.com/bradleyfalzon/ghinstallation v1.1.1/go.mod h1:vyCmHTciHx/uuyN82Zc3rXN3X2KTK8nUTCrTMwAhcug=\ngithub.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=\ngithub.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=\ngithub.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw=\ngithub.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=\ngithub.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=\ngithub.com/cncf/xds/go v0.0.0-20240905190251-b4127c9b8d78/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=\ngithub.com/containerd/stargz-snapshotter/estargz v0.14.3/go.mod h1:KY//uOCIkSuNAHhJogcZtrNHdKrA99/FCCRjE3HD36o=\ngithub.com/denisenkom/go-mssqldb v0.12.3/go.mod h1:k0mtMFOnU+AihqFxPMiF05rtiDrorD1Vrm1KEz5hxDo=\ngithub.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=\ngithub.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE=\ngithub.com/docker/cli v27.1.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=\ngithub.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=\ngithub.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=\ngithub.com/envoyproxy/go-control-plane v0.13.0/go.mod h1:GRaKG3dwvFoTg4nj7aXdZnvMg4d7nvT/wl9WgVXn3Q8=\ngithub.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4=\ngithub.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=\ngithub.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=\ngithub.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=\ngithub.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=\ngithub.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=\ngithub.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0=\ngithub.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI=\ngithub.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=\ngithub.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=\ngithub.com/gonvenience/wrap v1.1.2/go.mod h1:GiryBSXoI3BAAhbWD1cZVj7RZmtiu0ERi/6R6eJfslI=\ngithub.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=\ngithub.com/google/go-containerregistry v0.20.2/go.mod h1:z38EKdKh4h7IP2gSfUUqEvalZBqs6AoLeWfUy34nQC8=\ngithub.com/google/go-github/v29 v29.0.2/go.mod h1:CHKiKKPHJ0REzfwc14QMklvtHwCveD0PxlMjLlzAM5E=\ngithub.com/google/go-github/v44 v44.1.0/go.mod h1:iWn00mWcP6PRWHhXm0zuFJ8wbEjE5AGO5D5HXYM4zgw=\ngithub.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=\ngithub.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=\ngithub.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA=\ngithub.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=\ngithub.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk=\ngithub.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=\ngithub.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=\ngithub.com/hashicorp/go-getter v1.7.5/go.mod h1:W7TalhMmbPmsSMdNjD0ZskARur/9GJ17cfHTRtXV744=\ngithub.com/hashicorp/go-getter/v2 v2.2.3/go.mod h1:hp5Yy0GMQvwWVUmwLs3ygivz1JSLI323hdIE9J9m7TY=\ngithub.com/hashicorp/go-safetemp v1.0.0/go.mod h1:oaerMy3BhqiTbVye6QuFhFtIceqFoDHxNAB65b+Rj1I=\ngithub.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=\ngithub.com/hashicorp/hcl/v2 v2.22.0/go.mod h1:62ZYHrXgPoX8xBnzl8QzbWq4dyDsDtfCRgIq1rbJEvA=\ngithub.com/hashicorp/terraform-json v0.23.0/go.mod h1:MHdXbBAbSg0GvzuWazEGKAn/cyNfIB7mN6y7KJN6y2c=\ngithub.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=\ngithub.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=\ngithub.com/jinzhu/copier v0.0.0-20190924061706-b57f9002281a/go.mod h1:yL958EeXv8Ylng6IfnvG4oflryUi3vgA3xPs9hmII1s=\ngithub.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=\ngithub.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=\ngithub.com/jstemmer/go-junit-report v1.0.0/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=\ngithub.com/klauspost/compress v1.16.5/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=\ngithub.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=\ngithub.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=\ngithub.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=\ngithub.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=\ngithub.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=\ngithub.com/miekg/dns v1.1.62/go.mod h1:mvDlcItzm+br7MToIKqkglaGhlFMHJ9DTNNWONWXbNQ=\ngithub.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8=\ngithub.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0=\ngithub.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=\ngithub.com/opencontainers/image-spec v1.1.0-rc3/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8=\ngithub.com/oracle/oci-go-sdk v7.1.0+incompatible/go.mod h1:VQb79nF8Z2cwLkLS35ukwStZIg5F66tcBccjip/j888=\ngithub.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=\ngithub.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=\ngithub.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=\ngithub.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=\ngithub.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=\ngithub.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=\ngithub.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=\ngithub.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=\ngithub.com/slack-go/slack v0.15.0/go.mod h1:hlGi5oXA+Gt+yWTPP0plCdRKmjsDxecdHxYQdlMQKOw=\ngithub.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0=\ngithub.com/tmccombs/hcl2json v0.6.4/go.mod h1:+ppKlIW3H5nsAsZddXPy2iMyvld3SHxyjswOZhavRDk=\ngithub.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=\ngithub.com/urfave/cli v1.22.16 h1:MH0k6uJxdwdeWQTwhSO42Pwr4YLrNLwBtg1MRgTqPdQ=\ngithub.com/urfave/cli v1.22.16/go.mod h1:EeJR6BKodywf4zciqrdw6hpCPk68JO9z5LazXZMn5Po=\ngithub.com/vbatts/tar-split v0.11.3/go.mod h1:9QlHN18E+fEH7RdG+QAJJcuya3rqT7eXSTY7wGrAokY=\ngithub.com/zclconf/go-cty v1.15.0/go.mod h1:VvMs5i0vgZdhYawQNq5kePSpLAoz8u1xvZgrPIxfnZE=\ngo.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=\ngo.opentelemetry.io/contrib/detectors/gcp v1.29.0/go.mod h1:GW2aWZNwR2ZxDLdv8OyC2G8zkRoQBuURgV7RPQgcPoU=\ngo.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI=\ngo.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8=\ngo.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=\ngo.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=\ngo.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok=\ngo.opentelemetry.io/otel/sdk/metric v1.29.0/go.mod h1:6zZLdCl2fkauYoZIOn/soQIDSWFmNSRcICarHfuhNJQ=\ngo.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=\ngo.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=\ngolang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI=\ngolang.org/x/tools/go/expect v0.1.1-deprecated/go.mod h1:eihoPOH+FgIqa3FpoTwguz/bVUSGBlGQU67vpBeOrBY=\ngolang.org/x/tools/go/packages/packagestest v0.1.1-deprecated/go.mod h1:RVAQXBGNv1ib0J382/DPCRS/BPnsGebyM1Gj5VSDpG8=\ngolang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=\ngoogle.golang.org/api v0.206.0/go.mod h1:BtB8bfjTYIrai3d8UyvPmV9REGgox7coh+ZRwm0b+W8=\ngoogle.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=\ngoogle.golang.org/genproto v0.0.0-20241113202542-65e8d215514f/go.mod h1:Q5m6g8b5KaFFzsQFIGdJkSJDGeJiybVenoYFMMa3ohI=\ngoogle.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4=\ngoogle.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI=\ngoogle.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=\ngoogle.golang.org/grpc/stats/opentelemetry v0.0.0-20240907200651-3ffb98b2c93a/go.mod h1:9i1T9n4ZinTUZGgzENMi8MDDgbGC5mqTS75JAv6xN3A=\ngotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU=\nk8s.io/gengo/v2 v2.0.0-20250604051438-85fd79dbfd9f/go.mod h1:EJykeLsmFC60UQbYJezXkEsG2FLrt0GPNkU5iK5GWxU=\nsigs.k8s.io/structured-merge-diff/v4 v4.2.3/go.mod h1:qjx8mGObPmV2aSZepjQjbmb2ihdVs8cGKBraizNC69E=\n"
  },
  {
    "path": "patches/mimalloc-v2.2.4/0_base.patch",
    "content": "diff --git a/CMakeLists.txt b/CMakeLists.txt\nindex 5ce084f6..00eba70c 100644\n--- a/CMakeLists.txt\n+++ b/CMakeLists.txt\n@@ -1,4 +1,4 @@\n-cmake_minimum_required(VERSION 3.18)\n+cmake_minimum_required(VERSION 3.16)\n project(libmimalloc C CXX)\n \n set(CMAKE_C_STANDARD 11)\n@@ -44,7 +44,38 @@ option(MI_WIN_USE_FLS       \"Use Fiber local storage on Windows to detect thread\n option(MI_CHECK_FULL        \"Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)\" OFF)\n option(MI_USE_LIBATOMIC     \"Explicitly link with -latomic (on older systems) (deprecated and detected automatically)\" OFF)\n \n-include(CheckLinkerFlag)    # requires cmake 3.18\n+function(CHECK_LINKER_FLAG _lang _flag _var)\n+  get_property (_supported_languages GLOBAL PROPERTY ENABLED_LANGUAGES)\n+  if (NOT _lang IN_LIST _supported_languages)\n+    message (SEND_ERROR \"check_linker_flag: ${_lang}: unknown language.\")\n+    return()\n+  endif()\n+  include (Check${_lang}SourceCompiles)\n+  set(CMAKE_REQUIRED_LINK_OPTIONS \"${_flag}\")\n+  # Normalize locale during test compilation.\n+  set(_locale_vars LC_ALL LC_MESSAGES LANG)\n+  foreach(v IN LISTS _locale_vars)\n+    set(_locale_vars_saved_${v} \"$ENV{${v}}\")\n+    set(ENV{${v}} C)\n+  endforeach()\n+  if (_lang MATCHES \"^(C|CXX)$\")\n+    set (_source \"int main() { return 0; }\")\n+  elseif (_lang STREQUAL \"Fortran\")\n+    set (_source \"       program test\\n       stop\\n       end program\")\n+  elseif (_lang MATCHES \"^(OBJC|OBJCXX)$\")\n+    set (_source \"#ifndef __OBJC__\\n#  error \\\"Not an Objective-C++ compiler\\\"\\n#endif\\nint main(void) { return 0; }\")\n+  else()\n+    message (SEND_ERROR \"check_linker_flag: ${_lang}: unsupported language.\")\n+    return()\n+  endif()\n+  set(_common_patterns \"\")\n+  check_c_source_compiles(\"${_source}\" ${_var} ${_common_patterns})\n+  foreach(v IN LISTS _locale_vars)\n+    set(ENV{${v}} ${_locale_vars_saved_${v}})\n+  endforeach()\n+  set(${_var} \"${${_var}}\" PARENT_SCOPE)\n+endfunction()\n+\n include(CheckIncludeFiles)\n include(GNUInstallDirs)\n include(\"cmake/mimalloc-config-version.cmake\")\ndiff --git a/src/alloc.c b/src/alloc.c\nindex 0fed5e75..870f8d10 100644\n--- a/src/alloc.c\n+++ b/src/alloc.c\n@@ -670,6 +670,24 @@ mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, boo\n }\n #endif\n \n+bool mi_heap_page_is_underutilized(mi_heap_t* heap, void* p, float ratio) mi_attr_noexcept {\n+  mi_page_t* page = _mi_ptr_page(p);   // get the page that this belongs to\n+\n+  mi_heap_t* page_heap = (mi_heap_t*)(mi_atomic_load_acquire(&(page)->xheap));\n+\n+  // the heap id matches and it is not a full page\n+  if (mi_likely(page_heap == heap && page->flags.x.in_full == 0)) {\n+    // first in the list, meaning it's the head of page queue, thus being used for malloc\n+    if (page->prev == NULL)\n+      return false;\n+\n+    // this page belong to this heap and is not first in the page queue. Lets check its\n+    // utilization.\n+    return page->used <= (unsigned)(page->capacity * ratio);\n+  }\n+  return false;\n+}\n+\n // ------------------------------------------------------\n // ensure explicit external inline definitions are emitted!\n // ------------------------------------------------------\n"
  },
  {
    "path": "patches/mimalloc-v2.2.4/1_add_stat_type.patch",
    "content": "diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h\nindex a15d9cba..ee822ca9 100644\n--- a/include/mimalloc/types.h\n+++ b/include/mimalloc/types.h\n@@ -682,4 +682,23 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);\n #define mi_heap_stat_decrease(heap,stat,amount)  mi_stat_decrease( (heap)->tld->stats.stat, amount)\n #define mi_heap_stat_adjust_decrease(heap,stat,amount)  mi_stat_adjust_decrease( (heap)->tld->stats.stat, amount)\n \n+#define MI_DFLY_PAGE_BELOW_THRESHOLD 1\n+#define MI_DFLY_PAGE_FULL 2\n+#define MI_DFLY_HEAP_MISMATCH 4\n+#define MI_DFLY_PAGE_USED_FOR_MALLOC 8\n+\n+typedef struct mi_page_usage_stats_s {\n+  uintptr_t page_address;\n+  size_t block_size;\n+  uint16_t capacity;\n+  uint16_t reserved;\n+  uint16_t used;\n+  // Collects the current state of page as returned by mi_heap_page_is_underutilized\n+  // 0th bit set: page usage is below threshold: MI_DFLY_PAGE_BELOW_THRESHOLD\n+  // 1st bit set: the page is full: MI_DFLY_PAGE_FULL\n+  // 2nd bit set: the page heap did not match the heap requested: MI_DFLY_HEAP_MISMATCH\n+  // 3rd bit set: that the page is currently used for malloc operations: MI_DFLY_PAGE_USED_FOR_MALLOC\n+  uint8_t flags;\n+} mi_page_usage_stats_t;\n+\n #endif\n"
  },
  {
    "path": "patches/mimalloc-v2.2.4/2_return_stat.patch",
    "content": "diff --git a/src/alloc.c b/src/alloc.c\nindex 893f3094..88318d0e 100644\n--- a/src/alloc.c\n+++ b/src/alloc.c\n@@ -676,22 +676,45 @@ mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, boo\n }\n #endif\n \n-bool mi_heap_page_is_underutilized(mi_heap_t* heap, void* p, float ratio) mi_attr_noexcept {\n-  mi_page_t* page = _mi_ptr_page(p);   // get the page that this belongs to\n+mi_page_usage_stats_t mi_heap_page_is_underutilized(mi_heap_t *heap, void *p, float ratio,\n+                                                    bool return_detailed_stats) mi_attr_noexcept {\n+  mi_page_t *page = _mi_ptr_page(p); // get the page that this belongs to\n+  mi_heap_t *page_heap = (mi_heap_t *) (mi_atomic_load_acquire(&(page)->xheap));\n+\n+  if (!return_detailed_stats) {\n+    mi_page_usage_stats_t result = {.flags = 0};\n+    if (mi_likely(page_heap == heap && page->flags.x.in_full == 0)) {\n+      if (page->prev != NULL && page->used <= (unsigned) (page->capacity * ratio))\n+        result.flags = MI_DFLY_PAGE_BELOW_THRESHOLD;\n+    }\n+    return result;\n+  }\n+\n+  mi_page_usage_stats_t result = {\n+    .page_address = (uintptr_t) page,\n+    .block_size = page->block_size,\n+    .capacity = page->capacity,\n+    .reserved = page->reserved,\n+    .used = page->used,\n+    .flags = 0,\n+  };\n\n-  mi_heap_t* page_heap = (mi_heap_t*)(mi_atomic_load_acquire(&(page)->xheap));\n+  if (page->flags.x.in_full == 1) {\n+    result.flags |= MI_DFLY_PAGE_FULL;\n+  }\n+\n+  if (page_heap != heap) {\n+    result.flags |= MI_DFLY_HEAP_MISMATCH;\n+  }\n\n-  // the heap id matches and it is not a full page\n-  if (mi_likely(page_heap == heap && page->flags.x.in_full == 0)) {\n-    // first in the list, meaning it's the head of page queue, thus being used for malloc\n-    if (page->prev == NULL)\n-      return false;\n+  if (page->prev == NULL) {\n+    result.flags |= MI_DFLY_PAGE_USED_FOR_MALLOC;\n+  }\n\n-    // this page belong to this heap and is not first in the page queue. Lets check its\n-    // utilization.\n-    return page->used <= (unsigned)(page->capacity * ratio);\n+  if (result.flags == 0 && result.used <= (unsigned) (result.capacity * ratio)) {\n+    result.flags = MI_DFLY_PAGE_BELOW_THRESHOLD;\n   }\n-  return false;\n+  return result;\n }\n \n // ------------------------------------------------------\n"
  },
  {
    "path": "patches/mimalloc-v2.2.4/3_track_full_size.patch",
    "content": "commit e0cda4eb4a54cfcd33afcd5fbd7ecd86510ac4f9\nAuthor: Roman Gershman <romange@gmail.com>\nDate:   Wed Sep 3 23:30:34 2025 +0300\n\n    chore: track comitted size of full pages in a heap\n    \n    Signed-off-by: Roman Gershman <romange@gmail.com>\n\ndiff --git a/include/mimalloc/types.h b/include/mimalloc/types.h\nindex a15d9cba..34d99a94 100644\n--- a/include/mimalloc/types.h\n+++ b/include/mimalloc/types.h\n@@ -559,9 +559,10 @@ struct mi_heap_s {\n   uintptr_t             cookie;                              // random cookie to verify pointers (see `_mi_ptr_cookie`)\n   uintptr_t             keys[2];                             // two random keys used to encode the `thread_delayed_free` list\n   mi_random_ctx_t       random;                              // random number context used for secure allocation\n-  size_t                page_count;                          // total number of pages in the `pages` queues.\n-  size_t                page_retired_min;                    // smallest retired index (retired pages are fully free, but still in the page queues)\n-  size_t                page_retired_max;                    // largest retired index into the `pages` array.\n+  uint32_t              page_count;                          // total number of pages in the `pages` queues.\n+  uint16_t              page_retired_min;                    // smallest retired index (retired pages are fully free, but still in the page queues)\n+  uint16_t              page_retired_max;                    // largest retired index into the `pages` array.\n+  size_t                full_page_size;                      // total size of pages residing in MI_BIN_FULL bin.\n   long                  generic_count;                       // how often is `_mi_malloc_generic` called?\n   long                  generic_collect_count;               // how often is `_mi_malloc_generic` called without collecting?\n   mi_heap_t*            next;                                // list of heaps per thread\ndiff --git a/src/init.c b/src/init.c\nindex 3fc8b033..61ee4c76 100644\n--- a/src/init.c\n+++ b/src/init.c\n@@ -118,6 +118,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {\n   { {0}, {0}, 0, true }, // random\n   0,                // page count\n   MI_BIN_FULL, 0,   // page retired min/max\n+  0,                // full page size\n   0, 0,             // generic count\n   NULL,             // next\n   false,            // can reclaim\n@@ -167,6 +168,7 @@ mi_decl_cache_align mi_heap_t _mi_heap_main = {\n   { {0x846ca68b}, {0}, 0, true },  // random\n   0,                // page count\n   MI_BIN_FULL, 0,   // page retired min/max\n+  0,                // full page size\n   0, 0,             // generic count\n   NULL,             // next heap\n   false,            // can reclaim\ndiff --git a/src/page-queue.c b/src/page-queue.c\nindex c719b626..524b09d8 100644\n--- a/src/page-queue.c\n+++ b/src/page-queue.c\n@@ -232,6 +232,10 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {\n   page->next = NULL;\n   page->prev = NULL;\n   // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL);\n+  if (mi_page_queue_is_full(queue)) {\n+    mi_assert_internal(heap->full_page_size >= mi_page_block_size(page) * page->capacity);\n+    heap->full_page_size -= mi_page_block_size(page) * page->capacity;\n+  }\n   mi_page_set_in_full(page,false);\n }\n \n@@ -246,6 +250,9 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_\n                       (mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(queue)) ||\n                         (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));\n \n+  if (mi_page_queue_is_full(queue)) {\n+    heap->full_page_size += mi_page_block_size(page) * page->capacity;\n+  }\n   mi_page_set_in_full(page, mi_page_queue_is_full(queue));\n   // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap);\n   page->next = queue->first;\n@@ -339,6 +346,12 @@ static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t*\n     }\n   }\n \n+  if (mi_page_queue_is_full(to)) {\n+    heap->full_page_size += mi_page_block_size(page) * page->capacity;\n+  } else if (mi_page_queue_is_full(from)) {\n+    mi_assert_internal(heap->full_page_size >= mi_page_block_size(page) * page->capacity);\n+    heap->full_page_size -= mi_page_block_size(page) * page->capacity;\n+  }\n   mi_page_set_in_full(page, mi_page_queue_is_full(to));\n }\n \n"
  },
  {
    "path": "patches/mimalloc-v2.2.4/4_fix_heap_collect.patch",
    "content": "diff --git a/src/heap.c b/src/heap.c\nindex f96e60d0..5cb7c1ff 100644\n--- a/src/heap.c\n+++ b/src/heap.c\n@@ -24,7 +24,7 @@ terms of the MIT license. A copy of the license can be found in the file\n typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2);\n \n // Visit all pages in a heap; returns `false` if break was called.\n-static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2)\n+static bool mi_heap_visit_pages(mi_heap_t* heap, size_t max_q_id,  heap_page_visitor_fun* fn, void* arg1, void* arg2)\n {\n   if (heap==NULL || heap->page_count==0) return 0;\n \n@@ -34,7 +34,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void\n   size_t count = 0;\n   #endif\n \n-  for (size_t i = 0; i <= MI_BIN_FULL; i++) {\n+  for (size_t i = 0; i <= max_q_id; i++) {\n     mi_page_queue_t* pq = &heap->pages[i];\n     mi_page_t* page = pq->first;\n     while(page != NULL) {\n@@ -47,7 +47,6 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void\n       page = next; // and continue\n     }\n   }\n-  mi_assert_internal(count == total);\n   return true;\n }\n \n@@ -67,7 +66,7 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_\n #if MI_DEBUG>=3\n static bool mi_heap_is_valid(mi_heap_t* heap) {\n   mi_assert_internal(heap!=NULL);\n-  mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL);\n+  mi_heap_visit_pages(heap, MI_BIN_FULL, &mi_heap_page_is_valid, NULL, NULL);\n   return true;\n }\n #endif\n@@ -149,7 +148,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)\n \n   // if abandoning, mark all pages to no longer add to delayed_free\n   if (collect == MI_ABANDON) {\n-    mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);\n+    mi_heap_visit_pages(heap, MI_BIN_FULL, &mi_heap_page_never_delayed_free, NULL, NULL);\n   }\n \n   // free all current thread delayed blocks.\n@@ -160,7 +159,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)\n   _mi_heap_collect_retired(heap, force);\n\n   // collect all pages owned by this thread\n-  mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);\n+  mi_heap_visit_pages(heap, collect == MI_NORMAL ? MI_BIN_HUGE : MI_BIN_FULL, &mi_heap_page_collect, &collect, NULL);\n   mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );\n \n   // collect abandoned segments (in particular, purge expired parts of segments in the abandoned segment list)\n@@ -368,7 +367,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_\n }\n \n void _mi_heap_destroy_pages(mi_heap_t* heap) {\n-  mi_heap_visit_pages(heap, &_mi_heap_page_destroy, NULL, NULL);\n+  mi_heap_visit_pages(heap, MI_BIN_FULL, &_mi_heap_page_destroy, NULL, NULL);\n   mi_heap_reset_pages(heap);\n }\n \n@@ -539,7 +538,7 @@ bool mi_heap_check_owned(mi_heap_t* heap, const void* p) {\n   if (heap==NULL || !mi_heap_is_initialized(heap)) return false;\n   if (((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) return false;  // only aligned pointers\n   bool found = false;\n-  mi_heap_visit_pages(heap, &mi_heap_page_check_owned, (void*)p, &found);\n+  mi_heap_visit_pages(heap, MI_BIN_FULL, &mi_heap_page_check_owned, (void*)p, &found);\n   return found;\n }\n \n@@ -705,7 +704,7 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa\n // Visit all heap pages as areas\n static bool mi_heap_visit_areas(const mi_heap_t* heap, mi_heap_area_visit_fun* visitor, void* arg) {\n   if (visitor == NULL) return false;\n-  return mi_heap_visit_pages((mi_heap_t*)heap, &mi_heap_visit_areas_page, (void*)(visitor), arg); // note: function pointer to void* :-{\n+  return mi_heap_visit_pages((mi_heap_t*)heap, MI_BIN_FULL, &mi_heap_visit_areas_page, (void*)(visitor), arg); // note: function pointer to void* :-{\n }\n \n // Just to pass arguments\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[tool.black]\nline-length = 100\ninclude = '\\.py$'\nextend-exclude = '''\n/(\n    | .git\n    | .__pycache__\n    | build-dbg\n    | build-opt\n    | helio\n)/\n'''\n"
  },
  {
    "path": "src/.gitignore",
    "content": "server/version.cc"
  },
  {
    "path": "src/CMakeLists.txt",
    "content": "option(ENABLE_GIT_VERSION \"Build with Git metadata\" OFF)\n\noption(WITH_SIMSIMD \"Enable SimSIMD vector optimizations\" OFF)\noption(SIMSIMD_NATIVE_F16 \"Enable native float16 support in SimSIMD\" OFF)\noption(WITH_SEARCH \"Enable compilation of search module\" ON)\n\nif (\"${CMAKE_SYSTEM_NAME}\" STREQUAL \"FreeBSD\")\n  set(DFLY_TOOLS_MAKE \"gmake\")\nelse()\n  set(DFLY_TOOLS_MAKE \"make\")\nendif()\n\nfunction(cur_gen_dir out_dir)\n  file(RELATIVE_PATH _rel_folder \"${PROJECT_SOURCE_DIR}\" \"${CMAKE_CURRENT_SOURCE_DIR}\")\n\n  set(_tmp_dir ${ROOT_GEN_DIR}/${_rel_folder})\n  set(${out_dir} ${_tmp_dir} PARENT_SCOPE)\n  file(MAKE_DIRECTORY ${_tmp_dir})\nendfunction()\n\nset(ROOT_GEN_DIR ${CMAKE_SOURCE_DIR}/genfiles)\nfile(MAKE_DIRECTORY ${ROOT_GEN_DIR})\ninclude_directories(${ROOT_GEN_DIR}/src)\n\nfunction(gen_bison name)\n  GET_FILENAME_COMPONENT(_in ${name}.y ABSOLUTE)\n  cur_gen_dir(gen_dir)\n  # add_library(${lib_name} ${gen_dir}/${name}.cc)\n  set(full_path_cc ${gen_dir}/${name}.cc ${gen_dir}/${name}.hh)\n\n  ADD_CUSTOM_COMMAND(\n           OUTPUT ${full_path_cc}\n           COMMAND mkdir -p ${gen_dir}\n           COMMAND bison --language=c++ -o ${gen_dir}/${name}.cc ${name}.y -Wconflicts-sr\n           DEPENDS ${_in}\n           WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}\n           COMMENT \"Generating parser from ${name}.y\" VERBATIM)\n set_source_files_properties(${name}.cc ${name}_base.h PROPERTIES GENERATED TRUE)\nendfunction()\n\n\nMessage(STATUS \"THIRD_PARTY_LIB_DIR ${THIRD_PARTY_LIB_DIR}\")\n\ninclude(external_libs.cmake)\n\nif(ENABLE_GIT_VERSION)\n    include(GetGitRevisionDescription.cmake)\n    get_git_head_revision(GIT_REFSPEC GIT_SHA1)\n    git_local_changes(GIT_CLEAN_DIRTY)\n    if(\"${GIT_CLEAN_DIRTY}\" STREQUAL \"DIRTY\")\n        set(GIT_CLEAN_DIRTY \"-dirty\")\n        else()\n        set(GIT_CLEAN_DIRTY \"\")\n    endif()\n    Message(STATUS \"GIT_SHA1 ${GIT_SHA1}\")\n    git_describe(GIT_VER --always)\n    Message(STATUS \"GIT_VER ${GIT_VER}\")\n    string(TIMESTAMP PRJ_BUILD_TIME \"%Y-%m-%d %H:%M:%S\" UTC)\nelse(ENABLE_GIT_VERSION)\n    set(GIT_VER \"dev\")\n    set(GIT_SHA1 \"0000000\")\n    set(GIT_CLEAN_DIRTY \"-dev\")\n    set(PRJ_BUILD_TIME \"bigbang\")\nendif(ENABLE_GIT_VERSION)\n\n\nfunction(gen_flex name)\n  GET_FILENAME_COMPONENT(_in ${name}.lex ABSOLUTE)\n  cur_gen_dir(gen_dir)\n\n  ADD_CUSTOM_COMMAND(\n           OUTPUT ${gen_dir}/${name}.cc ${gen_dir}/${name}.h\n           COMMAND mkdir -p ${gen_dir}\n\n           COMMAND ${REFLEX} -o ${gen_dir}/${name}.cc  --unicode --header-file=${gen_dir}/${name}.h\n                             --bison-complete  --bison-locations  ${_in}\n           DEPENDS ${_in} reflex_project\n           WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}\n           COMMENT \"Generating lexer from ${name}.lex\" VERBATIM)\n\n  set_source_files_properties(${gen_dir}/${name}.h ${gen_dir}/${name}.cc\n                              PROPERTIES GENERATED TRUE)\nendfunction()\n\n# the output file resides in the build directory.\nconfigure_file(server/version.cc.in \"${CMAKE_CURRENT_SOURCE_DIR}/server/version.cc\" @ONLY)\n\nadd_subdirectory(redis)\nadd_subdirectory(core)\nadd_subdirectory(facade)\nadd_subdirectory(server)\n"
  },
  {
    "path": "src/GetGitRevisionDescription.cmake",
    "content": "# - Returns a version string from Git\n#\n# These functions force a re-configure on each git commit so that you can\n# trust the values of the variables in your build system.\n#\n#  get_git_head_revision(<refspecvar> <hashvar> [ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR])\n#\n# Returns the refspec and sha hash of the current head revision\n#\n#  git_describe(<var> [<additional arguments to git describe> ...])\n#\n# Returns the results of git describe on the source tree, and adjusting\n# the output so that it tests false if an error occurs.\n#\n#  git_describe_working_tree(<var> [<additional arguments to git describe> ...])\n#\n# Returns the results of git describe on the working tree (--dirty option),\n# and adjusting the output so that it tests false if an error occurs.\n#\n#  git_get_exact_tag(<var> [<additional arguments to git describe> ...])\n#\n# Returns the results of git describe --exact-match on the source tree,\n# and adjusting the output so that it tests false if there was no exact\n# matching tag.\n#\n#  git_local_changes(<var>)\n#\n# Returns either \"CLEAN\" or \"DIRTY\" with respect to uncommitted changes.\n# Uses the return code of \"git diff-index --quiet HEAD --\".\n# Does not regard untracked files.\n#\n# Requires CMake 2.6 or newer (uses the 'function' command)\n#\n# Original Author:\n# 2009-2020 Ryan Pavlik <ryan.pavlik@gmail.com> <abiryan@ryand.net>\n# http://academic.cleardefinition.com\n#\n# Copyright 2009-2013, Iowa State University.\n# Copyright 2013-2020, Ryan Pavlik\n# Copyright 2013-2020, Contributors\n# SPDX-License-Identifier: BSL-1.0\n# Distributed under the Boost Software License, Version 1.0.\n# (See accompanying file LICENSE_1_0.txt or copy at\n# http://www.boost.org/LICENSE_1_0.txt)\n\nif(__get_git_revision_description)\n    return()\nendif()\nset(__get_git_revision_description YES)\n\n# We must run the following at \"include\" time, not at function call time,\n# to find the path to this module rather than the path to a calling list file\nget_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH)\n\n# Function _git_find_closest_git_dir finds the next closest .git directory\n# that is part of any directory in the path defined by _start_dir.\n# The result is returned in the parent scope variable whose name is passed\n# as variable _git_dir_var. If no .git directory can be found, the\n# function returns an empty string via _git_dir_var.\n#\n# Example: Given a path C:/bla/foo/bar and assuming C:/bla/.git exists and\n# neither foo nor bar contain a file/directory .git. This wil return\n# C:/bla/.git\n#\nfunction(_git_find_closest_git_dir _start_dir _git_dir_var)\n    set(cur_dir \"${_start_dir}\")\n    set(git_dir \"${_start_dir}/.git\")\n    while(NOT EXISTS \"${git_dir}\")\n        # .git dir not found, search parent directories\n        set(git_previous_parent \"${cur_dir}\")\n        get_filename_component(cur_dir \"${cur_dir}\" DIRECTORY)\n        if(cur_dir STREQUAL git_previous_parent)\n            # We have reached the root directory, we are not in git\n            set(${_git_dir_var}\n                \"\"\n                PARENT_SCOPE)\n            return()\n        endif()\n        set(git_dir \"${cur_dir}/.git\")\n    endwhile()\n    set(${_git_dir_var}\n        \"${git_dir}\"\n        PARENT_SCOPE)\nendfunction()\n\nfunction(get_git_head_revision _refspecvar _hashvar)\n    _git_find_closest_git_dir(\"${CMAKE_CURRENT_SOURCE_DIR}\" GIT_DIR)\n\n    if(\"${ARGN}\" STREQUAL \"ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR\")\n        set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR TRUE)\n    else()\n        set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR FALSE)\n    endif()\n    if(NOT \"${GIT_DIR}\" STREQUAL \"\")\n        file(RELATIVE_PATH _relative_to_source_dir \"${CMAKE_SOURCE_DIR}\"\n             \"${GIT_DIR}\")\n        if(\"${_relative_to_source_dir}\" MATCHES \"[.][.]\" AND NOT ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR)\n            # We've gone above the CMake root dir.\n            set(GIT_DIR \"\")\n        endif()\n    endif()\n    if(\"${GIT_DIR}\" STREQUAL \"\")\n        set(${_refspecvar}\n            \"GITDIR-NOTFOUND\"\n            PARENT_SCOPE)\n        set(${_hashvar}\n            \"GITDIR-NOTFOUND\"\n            PARENT_SCOPE)\n        return()\n    endif()\n\n    # Check if the current source dir is a git submodule or a worktree.\n    # In both cases .git is a file instead of a directory.\n    #\n    if(NOT IS_DIRECTORY ${GIT_DIR})\n        # The following git command will return a non empty string that\n        # points to the super project working tree if the current\n        # source dir is inside a git submodule.\n        # Otherwise the command will return an empty string.\n        #\n        execute_process(\n            COMMAND \"${GIT_EXECUTABLE}\" rev-parse\n                    --show-superproject-working-tree\n            WORKING_DIRECTORY \"${CMAKE_CURRENT_SOURCE_DIR}\"\n            OUTPUT_VARIABLE out\n            ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)\n        if(NOT \"${out}\" STREQUAL \"\")\n            # If out is empty, GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a submodule\n            file(READ ${GIT_DIR} submodule)\n            string(REGEX REPLACE \"gitdir: (.*)$\" \"\\\\1\" GIT_DIR_RELATIVE\n                                 ${submodule})\n            string(STRIP ${GIT_DIR_RELATIVE} GIT_DIR_RELATIVE)\n            get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH)\n            get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE}\n                                   ABSOLUTE)\n            set(HEAD_SOURCE_FILE \"${GIT_DIR}/HEAD\")\n        else()\n            # GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a worktree\n            file(READ ${GIT_DIR} worktree_ref)\n            # The .git directory contains a path to the worktree information directory\n            # inside the parent git repo of the worktree.\n            #\n            string(REGEX REPLACE \"gitdir: (.*)$\" \"\\\\1\" git_worktree_dir\n                                 ${worktree_ref})\n            string(STRIP ${git_worktree_dir} git_worktree_dir)\n            _git_find_closest_git_dir(\"${git_worktree_dir}\" GIT_DIR)\n            set(HEAD_SOURCE_FILE \"${git_worktree_dir}/HEAD\")\n        endif()\n    else()\n        set(HEAD_SOURCE_FILE \"${GIT_DIR}/HEAD\")\n    endif()\n    set(GIT_DATA \"${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data\")\n    if(NOT EXISTS \"${GIT_DATA}\")\n        file(MAKE_DIRECTORY \"${GIT_DATA}\")\n    endif()\n\n    if(NOT EXISTS \"${HEAD_SOURCE_FILE}\")\n        return()\n    endif()\n    set(HEAD_FILE \"${GIT_DATA}/HEAD\")\n    configure_file(\"${HEAD_SOURCE_FILE}\" \"${HEAD_FILE}\" COPYONLY)\n\n    configure_file(\"${_gitdescmoddir}/GetGitRevisionDescription.cmake.in\"\n                   \"${GIT_DATA}/grabRef.cmake\" @ONLY)\n    include(\"${GIT_DATA}/grabRef.cmake\")\n\n    set(${_refspecvar}\n        \"${HEAD_REF}\"\n        PARENT_SCOPE)\n    set(${_hashvar}\n        \"${HEAD_HASH}\"\n        PARENT_SCOPE)\nendfunction()\n\nfunction(git_describe _var)\n    if(NOT GIT_FOUND)\n        find_package(Git QUIET)\n    endif()\n    get_git_head_revision(refspec hash)\n    if(NOT GIT_FOUND)\n        set(${_var}\n            \"GIT-NOTFOUND\"\n            PARENT_SCOPE)\n        return()\n    endif()\n    if(NOT hash)\n        set(${_var}\n            \"HEAD-HASH-NOTFOUND\"\n            PARENT_SCOPE)\n        return()\n    endif()\n\n    # TODO sanitize\n    #if((${ARGN}\" MATCHES \"&&\") OR\n    #\t(ARGN MATCHES \"||\") OR\n    #\t(ARGN MATCHES \"\\\\;\"))\n    #\tmessage(\"Please report the following error to the project!\")\n    #\tmessage(FATAL_ERROR \"Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}\")\n    #endif()\n\n    #message(STATUS \"Arguments to execute_process: ${ARGN}\")\n\n    execute_process(\n        COMMAND \"${GIT_EXECUTABLE}\" describe --tags --always ${hash} ${ARGN}\n        WORKING_DIRECTORY \"${CMAKE_CURRENT_SOURCE_DIR}\"\n        RESULT_VARIABLE res\n        OUTPUT_VARIABLE out\n        ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)\n    if(NOT res EQUAL 0)\n        set(out \"${out}-${res}-NOTFOUND\")\n    endif()\n\n    set(${_var}\n        \"${out}\"\n        PARENT_SCOPE)\nendfunction()\n\nfunction(git_describe_working_tree _var)\n    if(NOT GIT_FOUND)\n        find_package(Git QUIET)\n    endif()\n    if(NOT GIT_FOUND)\n        set(${_var}\n            \"GIT-NOTFOUND\"\n            PARENT_SCOPE)\n        return()\n    endif()\n\n    execute_process(\n        COMMAND \"${GIT_EXECUTABLE}\" describe --dirty ${ARGN}\n        WORKING_DIRECTORY \"${CMAKE_CURRENT_SOURCE_DIR}\"\n        RESULT_VARIABLE res\n        OUTPUT_VARIABLE out\n        ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)\n    if(NOT res EQUAL 0)\n        set(out \"${out}-${res}-NOTFOUND\")\n    endif()\n\n    set(${_var}\n        \"${out}\"\n        PARENT_SCOPE)\nendfunction()\n\nfunction(git_get_exact_tag _var)\n    git_describe(out --exact-match ${ARGN})\n    set(${_var}\n        \"${out}\"\n        PARENT_SCOPE)\nendfunction()\n\nfunction(git_local_changes _var)\n    if(NOT GIT_FOUND)\n        find_package(Git QUIET)\n    endif()\n    get_git_head_revision(refspec hash)\n    if(NOT GIT_FOUND)\n        set(${_var}\n            \"GIT-NOTFOUND\"\n            PARENT_SCOPE)\n        return()\n    endif()\n    if(NOT hash)\n        set(${_var}\n            \"HEAD-HASH-NOTFOUND\"\n            PARENT_SCOPE)\n        return()\n    endif()\n\n    execute_process(\n        COMMAND \"${GIT_EXECUTABLE}\" diff-index --quiet HEAD --\n        WORKING_DIRECTORY \"${CMAKE_CURRENT_SOURCE_DIR}\"\n        RESULT_VARIABLE res\n        OUTPUT_VARIABLE out\n        ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)\n    if(res EQUAL 0)\n        set(${_var}\n            \"CLEAN\"\n            PARENT_SCOPE)\n    else()\n        set(${_var}\n            \"DIRTY\"\n            PARENT_SCOPE)\n    endif()\nendfunction()\n"
  },
  {
    "path": "src/GetGitRevisionDescription.cmake.in",
    "content": "#\n# Internal file for GetGitRevisionDescription.cmake\n#\n# Requires CMake 2.6 or newer (uses the 'function' command)\n#\n# Original Author:\n# 2009-2010 Ryan Pavlik <rpavlik@iastate.edu> <abiryan@ryand.net>\n# http://academic.cleardefinition.com\n# Iowa State University HCI Graduate Program/VRAC\n#\n# Copyright 2009-2012, Iowa State University\n# Copyright 2011-2015, Contributors\n# Distributed under the Boost Software License, Version 1.0.\n# (See accompanying file LICENSE_1_0.txt or copy at\n# http://www.boost.org/LICENSE_1_0.txt)\n# SPDX-License-Identifier: BSL-1.0\n\nset(HEAD_HASH)\n\nfile(READ \"@HEAD_FILE@\" HEAD_CONTENTS LIMIT 1024)\n\nstring(STRIP \"${HEAD_CONTENTS}\" HEAD_CONTENTS)\nif(HEAD_CONTENTS MATCHES \"ref\")\n\t# named branch\n\tstring(REPLACE \"ref: \" \"\" HEAD_REF \"${HEAD_CONTENTS}\")\n\tif(EXISTS \"@GIT_DIR@/${HEAD_REF}\")\n\t\tconfigure_file(\"@GIT_DIR@/${HEAD_REF}\" \"@GIT_DATA@/head-ref\" COPYONLY)\n\telse()\n\t\tconfigure_file(\"@GIT_DIR@/packed-refs\" \"@GIT_DATA@/packed-refs\" COPYONLY)\n\t\tfile(READ \"@GIT_DATA@/packed-refs\" PACKED_REFS)\n\t\tif(${PACKED_REFS} MATCHES \"([0-9a-z]*) ${HEAD_REF}\")\n\t\t\tset(HEAD_HASH \"${CMAKE_MATCH_1}\")\n\t\tendif()\n\tendif()\nelse()\n\t# detached HEAD\n\tconfigure_file(\"@GIT_DIR@/HEAD\" \"@GIT_DATA@/head-ref\" COPYONLY)\nendif()\n\nif(NOT HEAD_HASH)\n\tfile(READ \"@GIT_DATA@/head-ref\" HEAD_HASH LIMIT 1024)\n\tstring(STRIP \"${HEAD_HASH}\" HEAD_HASH)\nendif()\n"
  },
  {
    "path": "src/common/arg_range.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/types/span.h>\n\n#include <string_view>\n#include <variant>\n\n#include \"base/iterator.h\"\n\nnamespace cmn {\n\nusing ArgSlice = absl::Span<const std::string_view>;\nusing OwnedArgSlice = absl::Span<const std::string>;\n\ninline std::string_view ToSV(std::string_view slice) {\n  return slice;\n}\n\ninline std::string_view ToSV(const std::string& slice) {\n  return slice;\n}\n\ninline std::string_view ToSV(std::string&& slice) = delete;\n\nconstexpr auto kToSV = [](auto&& v) { return ToSV(std::forward<decltype(v)>(v)); };\n\nstruct ArgRange {\n  ArgRange(ArgRange&&) = default;\n  ArgRange(const ArgRange&) = default;\n  ArgRange(ArgRange& range) : ArgRange((const ArgRange&)range) {\n  }\n\n  template <typename T, std::enable_if_t<!std::is_same_v<ArgRange, T>, bool> = true>\n  ArgRange(T&& span) : span(std::forward<T>(span)) {  // NOLINT google-explicit-constructor)\n  }\n\n  size_t Size() const {\n    return std::visit([](const auto& span) { return span.size(); }, span);\n  }\n\n  auto Range() const {\n    return base::it::Wrap(kToSV, span);\n  }\n\n  auto begin() const {\n    return Range().first;\n  }\n\n  auto end() const {\n    return Range().second;\n  }\n\n  std::string_view operator[](size_t idx) const {\n    return std::visit([idx](const auto& span) -> std::string_view { return span[idx]; }, span);\n  }\n\n  std::variant<ArgSlice, OwnedArgSlice> span;\n};\n\n}  // namespace cmn\n"
  },
  {
    "path": "src/common/backed_args.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/inlined_vector.h>\n\n#include <cstdint>\n#include <string_view>\n\nnamespace cmn {\n\nclass BackedArguments {\n  constexpr static size_t kLenCap = 5;\n  constexpr static size_t kStorageCap = 88;\n\n public:\n  using value_type = std::string_view;\n\n  BackedArguments() {\n  }\n\n  class iterator {\n   public:\n    using iterator_category = std::random_access_iterator_tag;\n    using value_type = std::string_view;\n    using difference_type = std::ptrdiff_t;\n    using pointer = const std::string_view*;\n    using reference = std::string_view;\n\n    iterator(const BackedArguments* ba, size_t index) : ba_(ba), index_(index) {\n    }\n\n    iterator& operator++() {\n      ++index_;\n      return *this;\n    }\n\n    iterator& operator--() {\n      --index_;\n      return *this;\n    }\n\n    iterator& operator+=(int delta) {\n      index_ += delta;\n      return *this;\n    }\n\n    iterator operator+(int delta) const {\n      iterator res(*this);\n      res += delta;\n      return res;\n    }\n\n    ptrdiff_t operator-(iterator other) const {\n      return ptrdiff_t(index_) - ptrdiff_t(other.index_);\n    }\n\n    bool operator==(const iterator& other) const {\n      return index_ == other.index_ && ba_ == other.ba_;\n    }\n\n    bool operator!=(const iterator& other) const {\n      return !(*this == other);\n    }\n\n    std::string_view operator*() const {\n      return ba_->at(index_);\n    }\n\n   private:\n    const BackedArguments* ba_;\n    size_t index_;\n  };\n\n  // Construct the arguments from iterator range.\n  // TODO: In general we could get away without the len argument,\n  // but that would require fixing base::it::CompoundIterator to support subtraction.\n  // Similarly, I wish that CompoundIterator supported the -> operator.\n  template <typename I> BackedArguments(I begin, I end, size_t len) {\n    Assign(begin, end, len);\n  }\n\n  template <typename I> void Assign(I begin, I end, size_t len);\n\n  void Reserve(size_t arg_cnt, size_t total_size) {\n    offsets_.reserve(arg_cnt);\n    storage_.reserve(total_size);\n  }\n\n  size_t HeapMemory() const {\n    size_t s1 = offsets_.capacity() <= kLenCap ? 0 : offsets_.capacity() * sizeof(uint32_t);\n    size_t s2 = storage_.capacity() <= kStorageCap ? 0 : storage_.capacity();\n    return s1 + s2;\n  }\n\n  void SwapArgs(cmn::BackedArguments& other) {\n    offsets_.swap(other.offsets_);\n    storage_.swap(other.storage_);\n  }\n\n  // The capacity is chosen so that we allocate a fully utilized (128 bytes) block.\n  using StorageType = absl::InlinedVector<char, kStorageCap>;\n\n  std::string_view Front() const {\n    return std::string_view{storage_.data(), elem_len(0)};\n  }\n\n  size_t size() const {\n    return offsets_.size();\n  }\n\n  bool empty() const {\n    return offsets_.empty();\n  }\n\n  size_t elem_len(size_t i) const {\n    return elem_capacity(i) - 1;\n  }\n\n  size_t elem_capacity(size_t i) const {\n    uint32_t next_offs = i + 1 >= offsets_.size() ? storage_.size() : offsets_[i + 1];\n    return next_offs - offsets_[i];\n  }\n\n  std::string_view at(uint32_t index) const {\n    uint32_t offset = offsets_[index];\n    return std::string_view{storage_.data() + offset, elem_len(index)};\n  }\n\n  char* data(uint32_t index) {\n    uint32_t offset = offsets_[index];\n    return storage_.data() + offset;\n  }\n\n  std::string_view operator[](uint32_t index) const {\n    return at(index);\n  }\n\n  iterator begin() const {\n    return {this, 0};\n  }\n\n  iterator end() const {\n    return {this, offsets_.size()};\n  }\n\n  void clear() {\n    // Clear the contents without deallocating memory. clear() deallocates inlined_vector.\n    offsets_.resize(0);\n    storage_.resize(0);\n  }\n\n  std::string_view back() const {\n    assert(size() > 0);\n    return at(size() - 1);\n  }\n\n  // Reserves space for additional argument of given length at the end.\n  void PushArg(size_t len) {\n    size_t old_size = storage_.size();\n    offsets_.push_back(old_size);\n    storage_.resize(old_size + len + 1);\n  }\n\n  void PushArg(std::string_view arg) {\n    PushArg(arg.size());\n    char* dest = storage_.data() + offsets_.back();\n    if (arg.size() > 0)\n      memcpy(dest, arg.data(), arg.size());\n    dest[arg.size()] = '\\0';\n  }\n\n  void PopArg() {\n    uint32_t last_offs = offsets_.back();\n    offsets_.pop_back();\n    storage_.resize(last_offs);\n  }\n\n protected:\n  absl::InlinedVector<uint32_t, kLenCap> offsets_;\n  StorageType storage_;\n};\n\nstatic_assert(sizeof(BackedArguments) == 128);\n\ntemplate <typename I> void BackedArguments::Assign(I begin, I end, size_t len) {\n  offsets_.resize(len);\n  size_t total_size = 0;\n  unsigned idx = 0;\n  for (auto it = begin; it != end; ++it) {\n    offsets_[idx++] = total_size;\n    total_size += (*it).size() + 1;  // +1 for '\\0'\n  }\n  storage_.resize(total_size);\n\n  // Reclaim memory if we have too much allocated.\n  if (storage_.capacity() > kStorageCap && total_size < storage_.capacity() / 2)\n    storage_.shrink_to_fit();\n\n  char* next = storage_.data();\n  for (auto it = begin; it != end; ++it) {\n    size_t sz = (*it).size();\n    if (sz > 0) {\n      memcpy(next, (*it).data(), sz);\n    }\n    next[sz] = '\\0';\n    next += sz + 1;\n  }\n}\n\n}  // namespace cmn\n"
  },
  {
    "path": "src/common/heap_size.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n// This file provides utilities to *estimate* heap memory usage of classes.\n// The main function exposed here is HeapSize() (with various overloads).\n// It supports simple structs (returns 0), std::string (returns capacity if it's larger than SSO)\n// and common containers, such as std::vector, std::deque, absl::flat_hash_map and unique_ptr.\n//\n// Example usage:\n// absl::flat_hash_map<std::string, std::vector<std::unique_ptr<int>>> m;\n// ...\n// size_t size = HeapSize(m);\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n#include <absl/container/inlined_vector.h>\n#include <absl/types/span.h>\n\n#include <deque>\n#include <string>\n#include <string_view>\n#include <type_traits>\n#include <vector>\n\nnamespace cmn {\n\nnamespace heap_size_detail {\n\ntemplate <class, class = void> struct has_marked_stackonly : std::false_type {};\n\ntemplate <class T>\nstruct has_marked_stackonly<T, std::void_t<typename T::is_stackonly>> : std::true_type {};\n\ntemplate <typename T> constexpr bool StackOnlyType() {\n  return std::is_trivial_v<T> || std::is_same_v<T, std::string_view> ||\n         has_marked_stackonly<T>::value;\n}\n\ntemplate <typename T, typename = void> struct has_used_mem : std::false_type {};\n\ntemplate <typename T>\nstruct has_used_mem<T, std::void_t<decltype(&T::UsedMemory)>> : std::true_type {};\n\ntemplate <typename Container> size_t AccumulateContainer(const Container& c);\n}  // namespace heap_size_detail\n\ninline size_t HeapSize(const std::string& s) {\n  constexpr size_t kSmallStringOptSize = 15;\n  return s.capacity() > kSmallStringOptSize ? s.capacity() : 0UL;\n}\n\ntemplate <typename T, std::enable_if_t<heap_size_detail::has_used_mem<T>::value, bool> = true>\nsize_t HeapSize(const T& t) {\n  return t.UsedMemory();\n}\n\ntemplate <typename T, std::enable_if_t<heap_size_detail::StackOnlyType<T>(), bool> = true>\nsize_t HeapSize(const T& t) {\n  return 0;\n}\n\ntemplate <typename T> size_t HeapSize(absl::Span<T>) {\n  return 0;\n}\n\n// Declare first, so that we can use these \"recursively\"\ntemplate <typename T> size_t HeapSize(const std::vector<T>& v);\ntemplate <typename T> size_t HeapSize(const std::unique_ptr<T>& t);\ntemplate <typename T> size_t HeapSize(const std::deque<T>& d);\ntemplate <typename T1, typename T2> size_t HeapSize(const std::pair<T1, T2>& p);\ntemplate <typename T, size_t N> size_t HeapSize(const absl::InlinedVector<T, N>& v);\ntemplate <typename K, typename V> size_t HeapSize(const absl::flat_hash_map<K, V>& m);\ntemplate <typename K> size_t HeapSize(const absl::flat_hash_set<K>& s);\n\ntemplate <typename T> size_t HeapSize(const std::unique_ptr<T>& t) {\n  if (t == nullptr) {\n    return 0;\n  } else {\n    return sizeof(T) + HeapSize(*t);\n  }\n}\n\ntemplate <typename T> size_t HeapSize(const std::vector<T>& v) {\n  return (v.capacity() * sizeof(T)) + heap_size_detail::AccumulateContainer(v);\n}\n\ntemplate <typename T> size_t HeapSize(const std::deque<T>& d) {\n  return (d.size() * sizeof(T)) + heap_size_detail::AccumulateContainer(d);\n}\n\ntemplate <typename T1, typename T2> size_t HeapSize(const std::pair<T1, T2>& p) {\n  return HeapSize(p.first) + HeapSize(p.second);\n}\n\ntemplate <typename T, size_t N> size_t HeapSize(const absl::InlinedVector<T, N>& v) {\n  size_t size = 0;\n  if (v.capacity() > N) {\n    size += v.capacity() * sizeof(T);\n  }\n  size += heap_size_detail::AccumulateContainer(v);\n  return size;\n}\n\ntemplate <typename K, typename V> size_t HeapSize(const absl::flat_hash_map<K, V>& m) {\n  size_t size = m.capacity() * sizeof(typename absl::flat_hash_map<K, V>::value_type);\n\n  if constexpr (!heap_size_detail::StackOnlyType<K>() || !heap_size_detail::StackOnlyType<V>()) {\n    for (const auto& kv : m) {\n      size += HeapSize(kv);\n    }\n  }\n\n  return size;\n}\n\ntemplate <typename K> size_t HeapSize(const absl::flat_hash_set<K>& s) {\n  size_t size = s.capacity() * sizeof(typename absl::flat_hash_set<K>::value_type);\n\n  if constexpr (!heap_size_detail::StackOnlyType<K>()) {\n    for (const auto& k : s) {\n      size += HeapSize(k);\n    }\n  }\n\n  return size;\n}\n\nnamespace heap_size_detail {\ntemplate <typename Container> size_t AccumulateContainer(const Container& c) {\n  size_t size = 0;\n\n  if constexpr (!heap_size_detail::StackOnlyType<typename Container::value_type>()) {\n    for (const auto& e : c) {\n      size += HeapSize(e);\n    }\n  }\n\n  return size;\n}\n}  // namespace heap_size_detail\n\n}  // namespace cmn\n"
  },
  {
    "path": "src/common/string_or_view.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <string>\n#include <string_view>\n#include <variant>\n\nnamespace cmn {\n\nclass StringOrView {\n public:\n  static StringOrView FromString(std::string s) {\n    StringOrView sov;\n    sov.val_ = std::move(s);\n    return sov;\n  }\n\n  static StringOrView FromView(std::string_view sv) {\n    StringOrView sov;\n    sov.val_ = sv;\n    return sov;\n  }\n\n  StringOrView() = default;\n  StringOrView(const StringOrView& o) = default;\n  StringOrView(StringOrView&& o) = default;\n  StringOrView& operator=(const StringOrView& o) = default;\n  StringOrView& operator=(StringOrView&& o) = default;\n\n  bool operator==(const StringOrView& o) const {\n    return *this == o.view();\n  }\n\n  bool operator==(std::string_view o) const {\n    return view() == o;\n  }\n\n  bool operator!=(const StringOrView& o) const {\n    return *this != o.view();\n  }\n\n  bool operator!=(std::string_view o) const {\n    return !(*this == o);\n  }\n\n  std::string_view view() const {\n    return visit([](const auto& s) -> std::string_view { return s; }, val_);\n  }\n\n  friend std::ostream& operator<<(std::ostream& o, const StringOrView& key) {\n    return o << key.view();\n  }\n\n  // Make hashable\n  template <typename H> friend H AbslHashValue(H h, const StringOrView& c) {\n    return H::combine(std::move(h), c.view());\n  }\n\n  // If the key is backed by a string_view, replace it with a string with the same value\n  void MakeOwned() {\n    if (std::holds_alternative<std::string_view>(val_))\n      val_ = std::string{std::get<std::string_view>(val_)};\n  }\n\n  // Move out of value as string\n  std::string Take() && {\n    MakeOwned();\n    return std::move(std::get<std::string>(val_));\n  }\n\n  std::string* GetMutable() {\n    MakeOwned();\n    return &std::get<std::string>(val_);\n  }\n\n  bool empty() const {\n    return visit([](const auto& s) { return s.empty(); }, val_);\n  }\n\n private:\n  std::variant<std::string_view, std::string> val_;\n};\n\n}  // namespace cmn\n"
  },
  {
    "path": "src/core/CMakeLists.txt",
    "content": "find_library(LIB_PCRE2 NAMES pcre2-8)\nif(LIB_PCRE2)\n  set(PCRE2_LIB ${LIB_PCRE2})\nelse()\n  message(STATUS \"pcre2-8 not found. Building without PCRE2 support.\")\n  set(PCRE2_LIB \"\")\nendif()\n\nfind_library(LIB_RE2 NAMES re2)\nif(LIB_RE2)\n  set(RE2_LIB ${LIB_RE2})\nelse()\n  message(STATUS \"re2 not found. Building without RE2 support.\")\n  set(RE2_LIB \"\")\nendif()\n\nif (WITH_SEARCH)\n  add_subdirectory(search)\nelse()\n  add_library(dfly_search_core INTERFACE)\nendif()\n\nadd_subdirectory(json)\nadd_subdirectory(page_usage)\n\nadd_library(dfly_core allocation_tracker.cc bloom.cc topk.cc compact_object.cc cms.cc dense_set.cc\n    dragonfly_core.cc extent_tree.cc huff_coder.cc\n    interpreter.cc glob_matcher.cc mi_memory_resource.cc qlist.cc dict_builder.cc sds_utils.cc\n    segment_allocator.cc score_map.cc small_string.cc sorted_map.cc task_queue.cc\n    tx_queue.cc string_set.cc string_map.cc tiering_types.cc top_keys.cc\n    detail/bitpacking.cc detail/listpack_wrap.cc detail/listpack.cc\n    oah_entry.cc)\n\ncxx_link(dfly_core base dfly_search_core dfly_page_usage fibers2 jsonpath\n    absl::flat_hash_map absl::str_format absl::random_random redis_lib\n    TRDP::lua lua_modules\n    OpenSSL::Crypto TRDP::dconv TRDP::lz4 TRDP::hdr_histogram)\n\nadd_executable(dash_bench dash_bench.cc)\ncxx_link(dash_bench dfly_core redis_test_lib)\n\nhelio_cxx_test(dfly_core_test dfly_core TRDP::fast_float ${PCRE2_LIB} ${RE2_LIB} LABELS DFLY)\nhelio_cxx_test(compact_object_test dfly_core LABELS DFLY)\nhelio_cxx_test(extent_tree_test dfly_core LABELS DFLY)\nhelio_cxx_test(dash_test dfly_core file redis_test_lib DATA testdata/ids.txt.zst LABELS DFLY)\nhelio_cxx_test(interpreter_test dfly_core LABELS DFLY)\n\nhelio_cxx_test(string_set_test dfly_core LABELS DFLY)\nhelio_cxx_test(string_map_test dfly_core LABELS DFLY)\nhelio_cxx_test(oah_set_test dfly_core LABELS DFLY)\nhelio_cxx_test(sorted_map_test dfly_core redis_test_lib LABELS DFLY)\nhelio_cxx_test(bptree_set_test dfly_core LABELS DFLY)\nhelio_cxx_test(linear_search_map_test dfly_core LABELS DFLY)\nhelio_cxx_test(score_map_test dfly_core LABELS DFLY)\nhelio_cxx_test(flatbuffers_test dfly_core TRDP::flatbuffers LABELS DFLY)\nhelio_cxx_test(bloom_test dfly_core LABELS DFLY)\nhelio_cxx_test(allocation_tracker_test dfly_core absl::random_random LABELS DFLY)\nhelio_cxx_test(qlist_test dfly_core DATA testdata/list.txt.zst LABELS DFLY)\nhelio_cxx_test(listpack_test dfly_core redis_lib LABELS DFLY)\nhelio_cxx_test(zstd_test dfly_core TRDP::zstd LABELS DFLY)\nhelio_cxx_test(dict_builder_test dfly_core LABELS DFLY)\nhelio_cxx_test(top_keys_test dfly_core LABELS DFLY)\nhelio_cxx_test(topk_test dfly_core LABELS DFLY)\nhelio_cxx_test(page_usage_stats_test dfly_core LABELS DFLY)\nhelio_cxx_test(cms_test dfly_core LABELS DFLY)\nhelio_cxx_test(memory_test TRDP::mimalloc2 LABELS DFLY)\n\nif(LIB_PCRE2)\n  target_compile_definitions(dfly_core_test PRIVATE USE_PCRE2=1)\n  # target_compile_definitions(dfly_core PUBLIC USE_PCRE2=1)\nendif()\n\nif(LIB_RE2)\n  target_compile_definitions(dfly_core_test PRIVATE USE_RE2)\nendif()\n"
  },
  {
    "path": "src/core/allocation_tracker.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/allocation_tracker.h\"\n\n#include \"absl/random/random.h\"\n#include \"base/logging.h\"\n#include \"util/fibers/stacktrace.h\"\n\nnamespace dfly {\nnamespace {\nthread_local AllocationTracker g_tracker;\nthread_local absl::InsecureBitGen g_bitgen;\n\nbool CanCallVlog(std::string_view trace) {\n  // GLOG fails when logging while flushing the current log under a mutex\n  return trace.find(\"LogMessage::Flush\") == std::string::npos;\n}\n\n}  // namespace\n\nAllocationTracker& AllocationTracker::Get() {\n  return g_tracker;\n}\n\nbool AllocationTracker::Add(const TrackingInfo& info) {\n  if (tracking_.size() >= tracking_.capacity()) {\n    return false;\n  }\n\n  tracking_.push_back(info);\n\n  UpdateAbsSizes();\n\n  return true;\n}\n\nbool AllocationTracker::Remove(size_t lower_bound, size_t upper_bound) {\n  size_t before_size = tracking_.size();\n\n  tracking_.erase(std::remove_if(tracking_.begin(), tracking_.end(),\n                                 [&](const TrackingInfo& info) {\n                                   return info.lower_bound == lower_bound &&\n                                          info.upper_bound == upper_bound;\n                                 }),\n                  tracking_.end());\n\n  UpdateAbsSizes();\n\n  return before_size != tracking_.size();\n}\n\nvoid AllocationTracker::Clear() {\n  tracking_.clear();\n}\n\nabsl::Span<const AllocationTracker::TrackingInfo> AllocationTracker::GetRanges() const {\n  return absl::MakeConstSpan(tracking_);\n}\n\nvoid AllocationTracker::ProcessNew(void* ptr, size_t size) {\n  if (size < abs_min_size_ || size > abs_max_size_) {\n    return;\n  }\n\n  if (inside_tracker_) {\n    return;\n  }\n\n  // Prevent endless recursion, in case logging allocates memory\n  inside_tracker_ = true;\n  for (const auto& band : tracking_) {\n    if (size > band.upper_bound || size < band.lower_bound) {\n      continue;\n    }\n\n    // Micro optimization: in case sample_odds == 1.0 - do not draw a random number\n    if (band.sample_odds != 1.0 && absl::Uniform(g_bitgen, 0.0, 1.0) >= band.sample_odds) {\n      continue;\n    }\n\n    size_t usable = mi_usable_size(ptr);\n    std::string trace = util::fb2::GetStacktrace();\n\n    if (CanCallVlog(trace)) {\n      DCHECK_GE(usable, size);\n      LOG(INFO) << \"Allocating \" << usable << \" bytes (\" << ptr << \"). Stack: \" << trace;\n    }\n\n    break;\n  }\n  inside_tracker_ = false;\n}\n\nvoid AllocationTracker::ProcessDelete(void* ptr) {\n  if (inside_tracker_) {\n    return;\n  }\n\n  inside_tracker_ = true;\n  // we partially handle deletes, specifically when specifying a single range with\n  // 100% sampling rate.\n  if (tracking_.size() == 1 && tracking_.front().sample_odds == 1) {\n    size_t usable = mi_usable_size(ptr);\n    if (usable <= tracking_.front().upper_bound && usable >= tracking_.front().lower_bound) {\n      std::string trace = util::fb2::GetStacktrace();\n      LOG_IF(INFO, CanCallVlog(trace)) << \"Deallocating \" << usable << \" bytes (\" << ptr << \")\\n\"\n                                       << trace;\n    }\n  }\n  inside_tracker_ = false;\n}\n\nvoid AllocationTracker::UpdateAbsSizes() {\n  abs_min_size_ = 0;\n  abs_max_size_ = 0;\n  for (const auto& tracker : tracking_) {\n    abs_min_size_ = std::min(abs_min_size_, tracker.lower_bound);\n    abs_max_size_ = std::max(abs_max_size_, tracker.upper_bound);\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/allocation_tracker.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <absl/container/inlined_vector.h>\n#include <mimalloc.h>\n\n#include <cstddef>\n\nnamespace dfly {\n\n// Allows \"tracking\" of memory allocations by size bands. Tracking is naive in that it only prints\n// the stack trace of the memory allocation, if matched by size & sampling criteria.\n// Supports up to 4 different bands in parallel.\n//\n// Thread-local. Must be configured in all relevant threads separately.\n//\n// #define INJECT_ALLOCATION_TRACKER before #include exactly once to override new/delete\nclass AllocationTracker {\n public:\n  struct TrackingInfo {\n    size_t lower_bound = 0;\n    size_t upper_bound = 0;\n    double sample_odds = 0.0;\n  };\n\n  // Returns a thread-local reference.\n  static AllocationTracker& Get();\n\n  // Will track memory allocations in range [lower, upper]. Sample odds must be between [0, 1],\n  // where 1 means all allocations are tracked and 0 means none.\n  bool Add(const TrackingInfo& info);\n\n  // Removes all tracking exactly matching lower_bound and upper_bound.\n  // Returns true if the tracking range [lower_bound, upper_bound] was removed\n  // and false, otherwise.\n  bool Remove(size_t lower_bound, size_t upper_bound);\n\n  // Clears *all* tracking.\n  void Clear();\n\n  absl::Span<const TrackingInfo> GetRanges() const;\n\n  void ProcessNew(void* ptr, size_t size);\n  void ProcessDelete(void* ptr);\n\n private:\n  void UpdateAbsSizes();\n\n  absl::InlinedVector<TrackingInfo, 4> tracking_;\n  bool inside_tracker_ = false;\n  size_t abs_min_size_ = 0;\n  size_t abs_max_size_ = 0;\n};\n\n}  // namespace dfly\n\n#ifdef INJECT_ALLOCATION_TRACKER\n// Code here is copied from mimalloc-new-delete, and modified to add tracking\nvoid operator delete(void* p) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free(p);\n};\nvoid operator delete[](void* p) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free(p);\n};\n\nvoid operator delete(void* p, const std::nothrow_t&) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free(p);\n}\nvoid operator delete[](void* p, const std::nothrow_t&) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free(p);\n}\n\nvoid* operator new(std::size_t n) noexcept(false) {\n  auto v = mi_new(n);\n  dfly::AllocationTracker::Get().ProcessNew(v, n);\n  return v;\n}\nvoid* operator new[](std::size_t n) noexcept(false) {\n  auto v = mi_new(n);\n  dfly::AllocationTracker::Get().ProcessNew(v, n);\n  return v;\n}\n\nvoid* operator new(std::size_t n, const std::nothrow_t& tag) noexcept {\n  (void)(tag);\n  auto v = mi_new_nothrow(n);\n  dfly::AllocationTracker::Get().ProcessNew(v, n);\n  return v;\n}\nvoid* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept {\n  (void)(tag);\n  auto v = mi_new_nothrow(n);\n  dfly::AllocationTracker::Get().ProcessNew(v, n);\n  return v;\n}\n\n#if (__cplusplus >= 201402L || _MSC_VER >= 1916)\nvoid operator delete(void* p, std::size_t n) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free_size(p, n);\n};\nvoid operator delete[](void* p, std::size_t n) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free_size(p, n);\n};\n#endif\n\n#if (__cplusplus > 201402L || defined(__cpp_aligned_new))\nvoid operator delete(void* p, std::align_val_t al) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free_aligned(p, static_cast<size_t>(al));\n}\nvoid operator delete[](void* p, std::align_val_t al) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free_aligned(p, static_cast<size_t>(al));\n}\nvoid operator delete(void* p, std::size_t n, std::align_val_t al) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free_size_aligned(p, n, static_cast<size_t>(al));\n};\nvoid operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free_size_aligned(p, n, static_cast<size_t>(al));\n};\nvoid operator delete(void* p, std::align_val_t al, const std::nothrow_t&) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free_aligned(p, static_cast<size_t>(al));\n}\nvoid operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept {\n  dfly::AllocationTracker::Get().ProcessDelete(p);\n  mi_free_aligned(p, static_cast<size_t>(al));\n}\n\nvoid* operator new(std::size_t n, std::align_val_t al) noexcept(false) {\n  auto v = mi_new_aligned(n, static_cast<size_t>(al));\n  dfly::AllocationTracker::Get().ProcessNew(v, n);\n  return v;\n}\nvoid* operator new[](std::size_t n, std::align_val_t al) noexcept(false) {\n  auto v = mi_new_aligned(n, static_cast<size_t>(al));\n  dfly::AllocationTracker::Get().ProcessNew(v, n);\n  return v;\n}\nvoid* operator new(std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept {\n  auto v = mi_new_aligned_nothrow(n, static_cast<size_t>(al));\n  dfly::AllocationTracker::Get().ProcessNew(v, n);\n  return v;\n}\nvoid* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept {\n  auto v = mi_new_aligned_nothrow(n, static_cast<size_t>(al));\n  dfly::AllocationTracker::Get().ProcessNew(v, n);\n  return v;\n}\n#endif\n#endif  // INJECT_ALLOCATION_TRACKER\n"
  },
  {
    "path": "src/core/allocation_tracker_test.cc",
    "content": "#include <absl/strings/match.h>\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n\n#include <string>\n#include <vector>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\n#define INJECT_ALLOCATION_TRACKER\n#include \"core/allocation_tracker.h\"\n\nnamespace dfly {\nnamespace {\nusing namespace std;\nusing namespace testing;\n\nclass LogSink : public google::LogSink {\n public:\n  void send(google::LogSeverity severity, const char* full_filename, const char* base_filename,\n            int line, const struct tm* tm_time, const char* message, size_t message_len) override {\n    logs_.push_back(string(message, message_len));\n  }\n\n  const vector<string>& GetLogs() const {\n    return logs_;\n  }\n\n  void Clear() {\n    logs_.clear();\n  }\n\n private:\n  vector<string> logs_;\n};\n\nclass AllocationTrackerTest : public Test {\n protected:\n  AllocationTrackerTest() {\n    google::AddLogSink(&log_sink_);\n  }\n\n  ~AllocationTrackerTest() {\n    google::RemoveLogSink(&log_sink_);\n    AllocationTracker::Get().Clear();\n  }\n\n  vector<string> GetLogsDelta() {\n    auto logs = log_sink_.GetLogs();\n    log_sink_.Clear();\n    return logs;\n  }\n\n  void Allocate(size_t s) {\n    CHECK(buffer_.empty());\n    buffer_.resize(s);  // allocate 1mb before setting up tracking\n  }\n\n  void Deallocate() {\n    buffer_.clear();\n    // Force deallocation\n    buffer_.shrink_to_fit();\n  }\n\n private:\n  LogSink log_sink_;\n  string buffer_;\n};\n\nTEST_F(AllocationTrackerTest, UnusedTracker) {\n  Allocate(1'000'000);  // allocate 1mb before setting up tracking\n  EXPECT_THAT(GetLogsDelta(), Not(Contains(HasSubstr(\"Allocating\"))));\n  Deallocate();\n}\n\nTEST_F(AllocationTrackerTest, UsedTracker) {\n  AllocationTracker::Get().Add(\n      {.lower_bound = 1'000'000, .upper_bound = 2'000'000, .sample_odds = 1.0});\n  Allocate(1'000'000);  // allocate 1mb before setting up tracking\n  EXPECT_THAT(GetLogsDelta(), Contains(HasSubstr(\"Allocating\")));\n  EXPECT_THAT(GetLogsDelta(), Not(Contains(HasSubstr(\"Deallocating\"))));\n  Deallocate();\n  EXPECT_THAT(GetLogsDelta(), Contains(HasSubstr(\"Deallocating\")));\n\n  // Allocate below threshold\n  Allocate(100'000);\n  EXPECT_THAT(GetLogsDelta(), Not(Contains(HasSubstr(\"Allocating\"))));\n  Deallocate();\n  EXPECT_THAT(GetLogsDelta(), Not(Contains(HasSubstr(\"Deallocating\"))));\n\n  // Allocate above threshold\n  Allocate(10'000'000);\n  EXPECT_THAT(GetLogsDelta(), Not(Contains(HasSubstr(\"Allocating\"))));\n  Deallocate();\n  EXPECT_THAT(GetLogsDelta(), Not(Contains(HasSubstr(\"Deallocating\"))));\n\n  // Remove allocator - stops logging\n  EXPECT_TRUE(AllocationTracker::Get().Remove(1'000'000, 2'000'000));\n  Allocate(1'000'000);  // allocate 1mb before setting up tracking\n  EXPECT_THAT(GetLogsDelta(), Not(Contains(HasSubstr(\"Allocating\"))));\n  Deallocate();\n  EXPECT_THAT(GetLogsDelta(), Not(Contains(HasSubstr(\"Deallocating\"))));\n}\n\nTEST_F(AllocationTrackerTest, MultipleRanges) {\n  AllocationTracker::Get().Add(\n      {.lower_bound = 1'000'000, .upper_bound = 2'000'000, .sample_odds = 1.0});\n  AllocationTracker::Get().Add(\n      {.lower_bound = 100'000'000, .upper_bound = 200'000'000, .sample_odds = 1.0});\n\n  // Below all ranges\n  Allocate(100'000);\n  EXPECT_THAT(GetLogsDelta(), Not(Contains(HasSubstr(\"Allocating\"))));\n  Deallocate();\n\n  // Between ranges\n  Allocate(10'000'000);\n  EXPECT_THAT(GetLogsDelta(), Not(Contains(HasSubstr(\"Allocating\"))));\n  Deallocate();\n\n  // Above all ranges\n  Allocate(500'000'000);\n  EXPECT_THAT(GetLogsDelta(), Not(Contains(HasSubstr(\"Allocating\"))));\n  Deallocate();\n\n  // First range\n  Allocate(1'000'000);\n  EXPECT_THAT(GetLogsDelta(), Contains(HasSubstr(\"Allocating\")));\n  Deallocate();\n\n  // Second range\n  Allocate(100'000'000);\n  EXPECT_THAT(GetLogsDelta(), Contains(HasSubstr(\"Allocating\")));\n  Deallocate();\n}\n\nTEST_F(AllocationTrackerTest, Sampling) {\n  // Statistically, 80% of logs should be logged\n  AllocationTracker::Get().Add(\n      {.lower_bound = 1'000'000, .upper_bound = 2'000'000, .sample_odds = 0.8});\n\n  const int kIterations = 10'000;\n  for (int i = 0; i < kIterations; ++i) {\n    Allocate(1'000'000);\n    Deallocate();\n  }\n\n  int allocations = 0;\n  int deallocations = 0;\n  for (const string& s : GetLogsDelta()) {\n    if (absl::StrContains(s, \"Allocating\")) {\n      ++allocations;\n    }\n    if (absl::StrContains(s, \"Deallocating\")) {\n      ++deallocations;\n    }\n  }\n\n  EXPECT_GE(allocations, kIterations * 0.7);\n  EXPECT_LE(allocations, kIterations * 0.9);\n  EXPECT_EQ(deallocations, 0);  // we only track deletions when sample_odds == 1.0\n}\n\n}  // namespace\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/bloom.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/bloom.h\"\n\n#include <absl/base/internal/endian.h>\n#include <absl/numeric/bits.h>\n#include <xxhash.h>\n\n#include <algorithm>\n#include <cmath>\n\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nnamespace {\n\nXXH128_hash_t Hash(string_view str) {\n  return XXH3_128bits_withSeed(str.data(), str.size(), 0xc6a4a7935bd1e995ULL);  // murmur2 seed\n}\n\nuint64_t GetMask(unsigned log) {\n  return (1ULL << log) - 1;\n}\n\nuint64_t BitIndex(uint64_t low, uint64_t hi, unsigned i, uint64_t mask) {\n  return (low + hi * i) & mask;\n}\n\nconstexpr double kDenom = M_LN2 * M_LN2;\nconstexpr double kSBFErrorFactor = 0.5;\n\ndouble BPE(double fp_prob) {\n  return -log(fp_prob) / kDenom;\n}\n\n}  // namespace\n\nBloom::~Bloom() {\n  CHECK(bf_ == nullptr);\n}\n\nBloom::Bloom(Bloom&& o) noexcept : hash_cnt_(o.hash_cnt_), bit_log_(o.bit_log_), bf_(o.bf_) {\n  o.bf_ = nullptr;\n}\n\nvoid Bloom::Init(uint64_t entries, double fp_prob, PMR_NS::memory_resource* heap) {\n  CHECK(bf_ == nullptr);\n  CHECK(fp_prob > 0 && fp_prob < 1);\n\n  if (fp_prob > 0.5)\n    fp_prob = 0.5;\n  double bpe = BPE(fp_prob);\n\n  hash_cnt_ = ceil(M_LN2 * bpe);\n\n  uint64_t bits = uint64_t(ceil(entries * bpe));\n  if (bits < 512) {\n    bits = 512;\n  }\n  bits = absl::bit_ceil(bits);  // make it power of 2.\n\n  uint64_t length = bits / 8;\n  bf_ = (uint8_t*)heap->allocate(length);\n  memset(bf_, 0, length);\n  bit_log_ = absl::countr_zero(bits);\n}\n\nvoid Bloom::Init(uint8_t* blob, size_t len, unsigned hash_cnt) {\n  DCHECK_EQ(len * 8, absl::bit_ceil(len * 8));  // must be power of two.\n  CHECK(bf_ == nullptr);\n  hash_cnt_ = hash_cnt;\n  bf_ = blob;\n  bit_log_ = absl::countr_zero(len * 8);\n}\n\nvoid Bloom::Destroy(PMR_NS::memory_resource* resource) {\n  resource->deallocate(CHECK_NOTNULL(bf_), bitlen() / 8);\n  bf_ = nullptr;\n}\n\nbool Bloom::Exists(std::string_view str) const {\n  XXH128_hash_t hash = Hash(str);\n  uint64_t fp[2] = {hash.low64, hash.high64};\n\n  return Exists(fp);\n}\n\nbool Bloom::Exists(const uint64_t fp[2]) const {\n  uint64_t mask = GetMask(bit_log_);\n  for (unsigned i = 0; i < hash_cnt_; ++i) {\n    uint64_t index = BitIndex(fp[0], fp[1], i, mask);\n    if (!IsSet(index))\n      return false;\n  }\n  return true;\n}\n\nbool Bloom::Add(std::string_view str) {\n  XXH128_hash_t hash = Hash(str);\n  uint64_t fp[2] = {hash.low64, hash.high64};\n  return Add(fp);\n}\n\nbool Bloom::Add(const uint64_t fp[2]) {\n  uint64_t mask = GetMask(bit_log_);\n\n  unsigned changes = 0;\n  for (uint64_t i = 0; i < hash_cnt_; i++) {\n    uint64_t index = BitIndex(fp[0], fp[1], i, mask);\n    changes += Set(index);\n  }\n\n  return changes != 0;\n}\n\nsize_t Bloom::Capacity(double fp_prob) const {\n  if (fp_prob > 0.5)\n    fp_prob = 0.5;\n  double bpe = BPE(fp_prob);\n  return floor(bitlen() / bpe);\n}\n\ninline bool Bloom::IsSet(size_t bit_idx) const {\n  uint64_t byte_idx = bit_idx / 8;\n  bit_idx %= 8;  // index within the byte\n  uint8_t b = bf_[byte_idx];\n  return (b & (1 << bit_idx)) != 0;\n}\n\ninline bool Bloom::Set(size_t bit_idx) {\n  uint64_t byte_idx = bit_idx / 8;\n  bit_idx %= 8;\n\n  uint8_t b = bf_[byte_idx];\n  bf_[byte_idx] |= (1 << bit_idx);\n  return bf_[byte_idx] != b;\n}\n\n///////////////////////////////////////////////////////////////////////////////\n// SBF implementation\n///////////////////////////////////////////////////////////////////////////////\nSBF::SBF(uint64_t initial_capacity, double fp_prob, double grow_factor, PMR_NS::memory_resource* mr)\n    : filters_(1, mr), grow_factor_(grow_factor), fp_prob_(fp_prob * kSBFErrorFactor) {\n  filters_.front().Init(initial_capacity, fp_prob_, mr);\n  max_capacity_ = filters_.front().Capacity(fp_prob_);\n}\n\nSBF::SBF(double grow_factor, double fp_prob, size_t max_capacity, size_t prev_size,\n         size_t current_size, PMR_NS::memory_resource* mr)\n    : filters_(mr),\n      grow_factor_(grow_factor),\n      fp_prob_(fp_prob),\n      prev_size_(prev_size),\n      current_size_(current_size),\n      max_capacity_(max_capacity) {\n}\n\nSBF::~SBF() {\n  PMR_NS::memory_resource* mr = filters_.get_allocator().resource();\n  for (auto& f : filters_)\n    f.Destroy(mr);\n}\n\nSBF& SBF::operator=(SBF&& src) noexcept {\n  filters_.clear();\n  filters_.swap(src.filters_);\n  grow_factor_ = src.grow_factor_;\n  fp_prob_ = src.fp_prob_;\n  current_size_ = src.current_size_;\n  max_capacity_ = src.max_capacity_;\n\n  return *this;\n}\n\nvoid SBF::AddFilter(const std::string& blob, unsigned hash_cnt) {\n  PMR_NS::memory_resource* mr = filters_.get_allocator().resource();\n  uint8_t* ptr = (uint8_t*)mr->allocate(blob.size(), 1);\n  memcpy(ptr, blob.data(), blob.size());\n  filters_.emplace_back().Init(ptr, blob.size(), hash_cnt);\n}\n\nbool SBF::Add(std::string_view str) {\n  DCHECK_LT(current_size_, max_capacity_);\n\n  XXH128_hash_t hash = Hash(str);\n  uint64_t fp[2] = {hash.low64, hash.high64};\n\n  auto exists = [fp](const Bloom& b) { return b.Exists(fp); };\n\n  // Check for all the previous filters whether the item exists.\n  if (any_of(next(filters_.crbegin()), filters_.crend(), exists)) {\n    return false;\n  }\n\n  if (!filters_.back().Add(fp))\n    return false;\n\n  ++current_size_;\n\n  // Based on the paper, the optimal fill ratio for SBF is 50%.\n  // Lets add a new slice if we reach it.\n  if (current_size_ >= max_capacity_) {\n    fp_prob_ *= kSBFErrorFactor;\n    filters_.emplace_back().Init(max_capacity_ * grow_factor_, fp_prob_,\n                                 filters_.get_allocator().resource());\n    current_size_ = 0;\n    max_capacity_ = filters_.back().Capacity(fp_prob_);\n  }\n\n  return true;\n}\n\nbool SBF::Exists(std::string_view str) const {\n  XXH128_hash_t hash = Hash(str);\n  uint64_t fp[2] = {hash.low64, hash.high64};\n\n  auto exists = [fp](const Bloom& b) { return b.Exists(fp); };\n\n  return any_of(filters_.crbegin(), filters_.crend(), exists);\n}\n\nsize_t SBF::MallocUsed() const {\n  size_t res = filters_.capacity() * sizeof(Bloom);\n  for (const auto& b : filters_) {\n    res += (b.bitlen() / 8);\n  }\n  res += sizeof(SBF);\n\n  return res;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/bloom.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <string_view>\n#include <vector>\n\n#include \"base/pmr/memory_resource.h\"\n\nnamespace dfly {\n\n/// Bloom filter based on the design of https://github.com/jvirkki/libbloom\nclass Bloom {\n public:\n  Bloom() = default;\n  Bloom(const Bloom&) = delete;\n  Bloom& operator=(const Bloom&) = delete;\n\n  // Note, that Destroy() must be called before calling the d'tor\n  ~Bloom();\n\n  // Initializes a new Bloom object\n  // entries - entries are silently rounded up to the minimum capacity.\n  // fp_prob - False-positive probability of collision. Must be in (0, 1) range.\n  // heap\n  void Init(uint64_t entries, double fp_prob, PMR_NS::memory_resource* resource);\n\n  // Direct initializer. len*8 must be power of 2.\n  void Init(uint8_t* blob, size_t len, unsigned hash_cnt);\n\n  // Destroys the object, must be called before destructing the object.\n  // resource - resource with which the object was initialized.\n  void Destroy(PMR_NS::memory_resource* resource);\n\n  Bloom(Bloom&& o) noexcept;\n\n  bool Exists(std::string_view str) const;\n\n  // Equivalent to the Exist above but accepts two fingerprints of the item.\n  bool Exists(const uint64_t fp[2]) const;\n\n  // Adds an item to the bloom filter.\n  // Returns true if element was not present and was added,\n  // false - if element (or a collision) had already been added previously.\n  bool Add(std::string_view str);\n  bool Add(const uint64_t fp[2]);\n\n  size_t bitlen() const {\n    return 1ULL << bit_log_;\n  }\n\n  // Max element capacity for this bloom filter.\n  // Note that capacity is floor(bit_len / bpe), where bpe (bits per element) is\n  // derived from fp_prob.\n  size_t Capacity(double fp_prob) const;\n\n  std::string_view data() const {\n    return std::string_view{reinterpret_cast<const char*>(bf_), bitlen() / 8};\n  }\n\n  unsigned hash_cnt() const {\n    return hash_cnt_;\n  }\n\n private:\n  bool IsSet(size_t index) const;\n  bool Set(size_t index);  // return true if bit was set (i.e was 0 before)\n\n  uint8_t hash_cnt_ = 0;\n  uint8_t bit_log_ = 0;    // log of bit length of the filter. bit length is always power of 2.\n  uint8_t* bf_ = nullptr;  // pointer to the blob.\n};\n\n/**\n * @brief Scalable bloom filter.\n * Based on https://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf\n * Please note that for SBF, the original paper assumes partitioning of bit space into K\n * disjoint segments where K is number of hash functions. This is done to reduce index collisions.\n * We do not do this, because we use power of 2 bit lengths.\n * TODO: to test the actual rate of this filter.\n */\nclass SBF {\n public:\n  SBF(uint64_t initial_capacity, double fp_prob, double grow_factor, PMR_NS::memory_resource* mr);\n  SBF(const SBF&) = delete;\n\n  // C'tor used for loading persisted filters into SBF.\n  // Should be followed by AddFilter.\n  SBF(double grow_factor, double fp_prob, size_t max_capacity, size_t prev_size,\n      size_t current_size, PMR_NS::memory_resource* mr);\n  ~SBF();\n\n  SBF& operator=(SBF&& src) noexcept;\n\n  void AddFilter(const std::string& blob, unsigned hash_cnt);\n\n  bool Add(std::string_view str);\n  bool Exists(std::string_view str) const;\n\n  size_t current_size() const {\n    return current_size_;\n  }\n\n  size_t prev_size() const {\n    return prev_size_;\n  }\n\n  double grow_factor() const {\n    return grow_factor_;\n  }\n\n  // expected fp probability for the current filter.\n  double fp_probability() const {\n    return fp_prob_;\n  }\n\n  uint32_t num_filters() const {\n    return filters_.size();\n  }\n\n  std::string_view data(size_t idx) const {\n    return filters_[idx].data();\n  }\n\n  unsigned hashfunc_cnt(size_t idx) const {\n    return filters_[idx].hash_cnt();\n  }\n\n  // max capacity of the current filter.\n  size_t max_capacity() const {\n    return max_capacity_;\n  }\n\n  size_t MallocUsed() const;\n\n private:\n  // multiple filters from the smallest to the largest.\n  std::vector<Bloom, PMR_NS::polymorphic_allocator<Bloom>> filters_;\n  double grow_factor_;\n  double fp_prob_;\n  size_t prev_size_ = 0;\n  size_t current_size_ = 0;\n  size_t max_capacity_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/bloom_test.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/bloom.h\"\n\n#include <absl/strings/str_cat.h>\n#include <gmock/gmock.h>\n\n#include \"base/gtest.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nclass BloomTest : public ::testing::Test {\n protected:\n  BloomTest() {\n    bloom_.Init(1000, 0.001, PMR_NS::get_default_resource());\n  }\n\n  ~BloomTest() {\n    bloom_.Destroy(PMR_NS::get_default_resource());\n  }\n\n  Bloom bloom_;\n};\n\nTEST_F(BloomTest, Basic) {\n  EXPECT_FALSE(bloom_.Exists(string_view{}));\n  EXPECT_TRUE(bloom_.Add(string_view{}));\n  EXPECT_TRUE(bloom_.Exists(string_view{}));\n  EXPECT_FALSE(bloom_.Add(string_view{}));\n\n  vector<string> values;\n  for (unsigned i = 0; i < 100; ++i) {\n    values.push_back(absl::StrCat(\"val\", i));\n  }\n\n  for (const auto& val : values) {\n    EXPECT_FALSE(bloom_.Exists(val));\n    EXPECT_TRUE(bloom_.Add(val));\n    EXPECT_TRUE(bloom_.Exists(val));\n    EXPECT_FALSE(bloom_.Add(val));\n  }\n}\n\nTEST_F(BloomTest, ErrorBound) {\n  size_t max_capacity = bloom_.Capacity(0.001);\n  for (unsigned i = 0; i < max_capacity; ++i) {\n    ASSERT_FALSE(bloom_.Exists(absl::StrCat(\"item\", i)));\n  }\n\n  unsigned collisions = 0;\n  for (unsigned i = 0; i < max_capacity; ++i) {\n    if (!bloom_.Add(absl::StrCat(\"item\", i))) {\n      ++collisions;\n    }\n  }\n\n  EXPECT_EQ(collisions, 0) << max_capacity;\n}\n\nTEST_F(BloomTest, Extreme) {\n  Bloom b2;\n\n  // Init with unreasonable large error probability.\n  b2.Init(10, 0.999, PMR_NS::get_default_resource());\n\n  EXPECT_EQ(512, b2.bitlen());  // minimal bit length, even though requested smaller capacity.\n  EXPECT_LT(b2.Capacity(0.999), 512);  // make sure our element capacity is smaller.\n  b2.Destroy(PMR_NS::get_default_resource());\n}\n\nTEST_F(BloomTest, SBF) {\n  SBF sbf(10, 0.001, 2, PMR_NS::get_default_resource());\n\n  unsigned collisions = 0;\n  constexpr unsigned kNumElems = 2000000;\n  for (unsigned i = 0; i < kNumElems; ++i) {\n    if (!sbf.Add(absl::StrCat(\"item\", i))) {\n      ++collisions;\n    }\n  }\n\n  // TODO: to revisit the math for deriving number of hash functions for each filter\n  // according the the SBF paper.\n  EXPECT_LE(collisions, kNumElems * 0.008);\n}\n\nstatic void BM_BloomExist(benchmark::State& state) {\n  constexpr size_t kCapacity = 1U << 22;\n  Bloom bloom;\n  bloom.Init(kCapacity, 0.001, PMR_NS::get_default_resource());\n  for (size_t i = 0; i < kCapacity * 0.8; ++i) {\n    bloom.Add(absl::StrCat(\"val\", i));\n  }\n  unsigned i = 0;\n  char buf[32];\n  memset(buf, 'x', sizeof(buf));\n  string_view sv{buf, sizeof(buf)};\n  while (state.KeepRunning()) {\n    absl::numbers_internal::FastIntToBuffer(i, buf);\n    bloom.Exists(sv);\n  }\n  bloom.Destroy(PMR_NS::get_default_resource());\n}\nBENCHMARK(BM_BloomExist);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/bptree_set.h",
    "content": "// Copyright 2023, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <functional>\n#include <optional>\n\n#include \"core/detail/bptree_internal.h\"\n#include \"core/detail/stateless_allocator.h\"\n\nnamespace dfly {\n\ntemplate <typename T> struct DefaultCompareTo {\n  int operator()(const T& a, const T& b) const {\n    std::less<T> cmp;\n    return cmp(a, b) ? -1 : (cmp(b, a) ? 1 : 0);\n  }\n};\n\ntemplate <typename T> struct BPTreePolicy {\n  using KeyT = T;\n\n  // The three way comparator that should accept a query ( or key) on the left, and the key\n  // on the right.\n  using KeyCompareTo = DefaultCompareTo<T>;\n};\n\ntemplate <typename T, typename Policy = BPTreePolicy<T>> class BPTree {\n  BPTree(const BPTree&) = delete;\n  BPTree& operator=(const BPTree&) = delete;\n\n  using BPTreeNode = detail::BPTreeNode<T>;\n  using BPTreePath = detail::BPTreePath<T>;\n\n public:\n  using KeyT = typename Policy::KeyT;\n\n  BPTree(PMR_NS::memory_resource* mr = PMR_NS::get_default_resource()) : mr_(mr) {\n  }\n\n  ~BPTree() {\n    Clear();\n  }\n\n  // true if inserted, false if skipped.\n  bool Insert(KeyT item);\n\n  bool Contains(KeyT item) const;\n\n  bool Delete(KeyT item);\n\n  std::optional<uint32_t> GetRank(KeyT item, bool reverse = false) const;\n\n  size_t Height() const {\n    return height_;\n  }\n\n  size_t Size() const {\n    return count_;  // number of items in the tree\n  }\n\n  bool Empty() const {\n    return count_ == 0;\n  }\n\n  size_t NodeCount() const {\n    // number of nodes in the tree (usually, order of magnitude smaller than Size()).\n    return num_nodes_;\n  }\n\n  void Clear();\n\n  const BPTreeNode* DEBUG_root() const {\n    return root_;\n  }\n\n  BPTreePath FromRank(uint32_t rank) const {\n    BPTreePath path;\n    ToRank(rank, &path);\n    return path;\n  }\n\n  /// @brief Iterates over all items in the range [rank_start, rank_end] by rank.\n  /// @param rank_start\n  /// @param rank_end - inclusive.\n  /// @param cb - callback to be called for each item in the range.\n  ///             Should return false to stop iteration.\n  bool Iterate(uint32_t rank_start, uint32_t rank_end, std::function<bool(KeyT)> cb) const;\n\n  /// @brief Iterates over all items in the range [rank_start, rank_end] by rank in reverse order.\n  /// @param rank_start\n  /// @param rank_end\n  /// @param cb - callback to be called for each item in the range.\n  ///             Should return false to stop iteration.\n  bool IterateReverse(uint32_t rank_start, uint32_t rank_end, std::function<bool(KeyT)> cb) const;\n\n  /// @brief Returns the path to the first item in the tree for which comp(q, key) >= 0.\n  /// @param item\n  /// @return the path if such item exists, empty path otherwise.\n  template <typename Q> BPTreePath GEQ(Q&& query) const;\n\n  /// @brief Returns the path to the largest item in the tree such that comp(q, key) <= 0.\n  /// @param key\n  /// @return the path if such item exists, empty path otherwise.\n  template <typename Q> BPTreePath LEQ(Q&& query) const;\n\n  /// @brief Deletes the element pointed by path.\n  /// @param path\n  void Delete(BPTreePath path);\n\n  /// @brief Forces an update to the key. Assumes key has the same value.\n  /// Replaces old with new_obj.\n  void ForceUpdate(KeyT old, KeyT new_obj);\n\n private:\n  BPTreeNode* CreateNode(bool leaf);\n\n  void DestroyNode(BPTreeNode* node);\n\n  void InsertToFullLeaf(KeyT item, const BPTreePath& path);\n\n  // Returns true if insertion was handled by rebalancing.\n  bool RebalanceLeafAndInsert(const BPTreePath& path, unsigned parent_depth, KeyT item,\n                              unsigned insert_pos);\n\n  void IncreaseSubtreeCounts(const BPTreePath& path, unsigned depth, int32_t delta);\n\n  // Charts the path towards key. Returns true if key is found.\n  // In that case comp(q, path->Last().first->Key(path->Last().second)) == 0.\n  // Fills the tree path not including the key itself. In case key was not found,\n  // returns the path to the item that is greater than the key.\n  template <typename Q> bool Locate(Q&& q, BPTreePath* path) const;\n\n  // Sets the tree path to item at specified rank. Rank is 0-based and must be less than Size().\n  // returns the index of the key in the last node of the path.\n  void ToRank(uint32_t rank, BPTreePath* path) const;\n\n  BPTreeNode* root_ = nullptr;  // root node or NULL if empty tree\n  uint32_t count_ = 0;          // number of items in tree\n  uint32_t height_ = 0;         // height of tree from root to leaf\n  uint32_t num_nodes_ = 0;      // number of nodes in tree\n  PMR_NS::memory_resource* mr_;\n};\n\ntemplate <typename T, typename Policy> bool BPTree<T, Policy>::Contains(KeyT item) const {\n  BPTreePath path;\n  bool found = Locate(item, &path);\n  return found;\n}\n\ntemplate <typename T, typename Policy> void BPTree<T, Policy>::Clear() {\n  if (!root_)\n    return;\n\n  BPTreePath path;\n  BPTreeNode* node = root_;\n\n  auto deep_left = [&](unsigned pos) {\n    do {\n      path.Push(node, pos);\n      node = node->Child(pos);\n      pos = 0;\n    } while (!node->IsLeaf());\n  };\n\n  if (!root_->IsLeaf())\n    deep_left(0);\n\n  while (true) {\n    DestroyNode(node);\n\n    if (path.Depth() == 0) {\n      break;\n    }\n    node = path.Last().first;\n    unsigned pos = path.Last().second;\n    path.Pop();\n    if (pos < node->NumItems()) {\n      deep_left(pos + 1);\n    }\n  }\n  root_ = nullptr;\n  height_ = count_ = 0;\n}\n\ntemplate <typename T, typename Policy> bool BPTree<T, Policy>::Insert(KeyT item) {\n  if (!root_) {\n    root_ = CreateNode(true);\n    root_->InitSingle(item);\n    count_ = height_ = 1;\n\n    return true;\n  }\n\n  BPTreePath path;\n  bool found = Locate(item, &path);\n\n  if (found) {\n    return false;\n  }\n\n  assert(path.Depth() > 0u);\n\n  BPTreeNode* leaf = path.Last().first;\n  assert(leaf->IsLeaf());\n\n  if (leaf->NumItems() == detail::BPNodeLayout<T>::kMaxLeafKeys) {\n    InsertToFullLeaf(item, path);\n  } else {\n    unsigned pos = path.Last().second;\n    leaf->LeafInsert(pos, item);\n    if (path.Depth() > 1)\n      IncreaseSubtreeCounts(path, path.Depth() - 2, 1);\n  }\n  count_++;\n  return true;\n}\n\ntemplate <typename T, typename Policy> bool BPTree<T, Policy>::Delete(KeyT item) {\n  if (!root_)\n    return false;\n\n  BPTreePath path;\n  bool found = Locate(item, &path);\n  if (!found)\n    return false;\n\n  Delete(path);\n  return true;\n}\n\ntemplate <typename T, typename Policy>\nstd::optional<uint32_t> BPTree<T, Policy>::GetRank(KeyT item, bool reverse) const {\n  if (!root_)\n    return std::nullopt;\n\n  BPTreePath path;\n  bool found = Locate(item, &path);\n  if (!found)\n    return std::nullopt;\n\n  if (reverse) {\n    return count_ - path.Rank() - 1;\n  }\n\n  return path.Rank();\n}\n\ntemplate <typename T, typename Policy>\ntemplate <typename Q>\nbool BPTree<T, Policy>::Locate(Q&& q, BPTreePath* path) const {\n  assert(root_);\n  BPTreeNode* node = root_;\n  typename Policy::KeyCompareTo cmp;\n  auto cmp_cb = [&](const KeyT& key) { return cmp(q, key); };\n\n  while (true) {\n    typename BPTreeNode::SearchResult res = node->BSearch(cmp_cb);\n    path->Push(node, res.index);\n    if (res.found) {\n      return true;\n    }\n    assert(res.index <= node->NumItems());\n\n    if (node->IsLeaf()) {\n      break;\n    }\n    node = node->Child(res.index);\n  }\n  return false;\n}\n\ntemplate <typename T, typename Policy>\nvoid BPTree<T, Policy>::InsertToFullLeaf(KeyT item, const BPTreePath& path) {\n  using Layout = detail::BPNodeLayout<T>;\n  using Comp [[maybe_unused]] = typename Policy::KeyCompareTo;\n\n  assert(path.Depth() > 0u);\n\n  BPTreeNode* node = path.Last().first;\n  assert(node->IsLeaf() && node->AvailableSlotCount() == 0);\n\n  unsigned insert_pos = path.Last().second;\n  unsigned level = path.Depth() - 1;\n  if (level > 0 && RebalanceLeafAndInsert(path, level - 1, item, insert_pos)) {\n    // Update the tree count of the ascendants.\n    IncreaseSubtreeCounts(path, level - 1, 1);\n    return;\n  }\n\n  KeyT median;\n  BPTreeNode* right = CreateNode(true);\n  node->Split(right, &median);\n\n  assert(node->NumItems() < Layout::kMaxLeafKeys);\n\n  if (insert_pos <= node->NumItems()) {\n    assert(Comp()(item, median) < 0);\n    node->LeafInsert(insert_pos, item);\n  } else {\n    assert(Comp()(item, median) > 0);\n    right->LeafInsert(insert_pos - node->NumItems() - 1, item);\n  }\n\n  // we must add the newly created `right` to the parent and update its tree count.\n  while (level > 0) {\n    --level;\n    // level up, now node is parent.\n    node = path.Node(level);\n    unsigned pos = path.Position(level);  // position of the child node in parent.\n\n    assert(!node->IsLeaf() && pos <= node->NumItems());\n    assert(right);\n\n    // Terminal case: Node is not full so we can just add `right` to it.\n    if (node->NumItems() < Layout::kMaxInnerKeys) {\n      // We do not update the subtree count of the node here because the surpus of another item\n      // resulted with the additional key in this node.\n      node->InnerInsert(pos, median, right);\n      node->IncreaseTreeCount(1);\n      right = nullptr;\n      break;\n    }\n\n    // We need to insert right into a node as position pos. Node is full so we must handle it\n    // either via rebalancing \"node\" or via its splitting. Rebalancing is a better case, we try\n    // it first.\n    if (level > 0) {\n      // see if we can rebalance node (right's parent) via node's parent.\n      BPTreeNode* parent = path.Node(level - 1);\n      unsigned parent_pos = path.Position(level - 1);\n      assert(parent->Child(parent_pos) == node);\n\n      auto [new_node, inner_pos] = parent->RebalanceChild(parent_pos, pos);\n      if (new_node) {\n        // we rebalanced inner_full so we can insert (median, right) and stop propagating.\n        new_node->InnerInsert(inner_pos, median, right);\n\n        if (new_node != node) {\n          // Fix subtree counts if right was migrated to the sibling.\n          node->IncreaseTreeCount(-right->TreeCount());\n          new_node->IncreaseTreeCount(right->TreeCount() + 1);\n        } else {\n          node->IncreaseTreeCount(1);\n        }\n        right = nullptr;\n        break;\n      }\n    }\n\n    // node is not rebalanced, so we need to split it.\n    BPTreeNode* next_right = CreateNode(false);\n    KeyT next_median;\n    node->Split(next_right, &next_median);\n    assert(node->NumItems() < Layout::kMaxInnerKeys);\n\n    if (pos <= node->NumItems()) {\n      assert(Comp()(median, next_median) < 0);\n\n      node->InnerInsert(pos, median, right);\n      node->IncreaseTreeCount(1);\n    } else {\n      assert(Comp()(median, next_median) > 0);\n\n      next_right->InnerInsert(pos - node->NumItems() - 1, median, right);\n\n      // Fix tree counts.\n      node->IncreaseTreeCount(-right->TreeCount());\n      next_right->IncreaseTreeCount(right->TreeCount() + 1);\n    }\n    right = next_right;\n    median = next_median;\n  }\n\n  if (right) {\n    assert(level == 0);\n    BPTreeNode* new_root = CreateNode(false);\n    new_root->InitSingle(median);\n    new_root->SetChild(0, root_);\n    new_root->SetChild(1, right);\n    new_root->SetTreeCount(root_->TreeCount() + right->TreeCount() + 1);\n    root_ = new_root;\n    height_++;\n  } else {\n    if (level > 0) {\n      IncreaseSubtreeCounts(path, level - 1, 1);\n    }\n  }\n}\n\ntemplate <typename T, typename Policy>\nbool BPTree<T, Policy>::RebalanceLeafAndInsert(const BPTreePath& path, unsigned parent_depth,\n                                               KeyT item, unsigned insert_pos) {\n  BPTreeNode* parent = path.Node(parent_depth);\n  unsigned pos = path.Position(parent_depth);\n\n  std::pair<BPTreeNode*, unsigned> rebalance_res = parent->RebalanceChild(pos, insert_pos);\n  if (rebalance_res.first) {\n    rebalance_res.first->LeafInsert(rebalance_res.second, item);\n    return true;\n  }\n  return false;\n}\n\ntemplate <typename T, typename Policy>\nvoid BPTree<T, Policy>::IncreaseSubtreeCounts(const BPTreePath& path, unsigned depth,\n                                              int32_t delta) {\n  for (int i = depth; i >= 0; --i) {\n    BPTreeNode* node = path.Node(i);\n    node->IncreaseTreeCount(delta);\n  }\n}\n\ntemplate <typename T, typename Policy>\nbool BPTree<T, Policy>::Iterate(uint32_t rank_start, uint32_t rank_end,\n                                std::function<bool(KeyT)> cb) const {\n  if (rank_start >= Size())\n    return true;\n\n  assert(rank_start <= rank_end);\n\n  BPTreePath path;\n  ToRank(rank_start, &path);\n  for (uint32_t i = rank_start; i <= rank_end; ++i) {\n    if (!cb(path.Terminal()))\n      return false;\n\n    if (!path.Next())\n      return true;\n  }\n  return true;\n}\n\ntemplate <typename T, typename Policy>\nbool BPTree<T, Policy>::IterateReverse(uint32_t rank_start, uint32_t rank_end,\n                                       std::function<bool(KeyT)> cb) const {\n  assert(rank_start <= rank_end && rank_end < count_);\n\n  BPTreePath path;\n  ToRank(count_ - 1 - rank_start, &path);\n  for (uint32_t i = rank_start; i <= rank_end; ++i) {\n    if (!cb(path.Terminal()))\n      return false;\n\n    path.Prev();\n  }\n  return true;\n}\n\ntemplate <typename T, typename Policy>\nvoid BPTree<T, Policy>::ToRank(uint32_t rank, BPTreePath* path) const {\n  assert(root_ && rank < count_);\n  BPTreeNode* node = root_;\n\n  if (rank + 1 == count_) {\n    // Corner case where we search for the node on the right.\n    while (!node->IsLeaf()) {\n      path->Push(node, node->NumItems());\n      node = node->Child(node->NumItems());\n    }\n    path->Push(node, node->NumItems() - 1);\n    return;\n  }\n\n  while (!node->IsLeaf()) {\n    // handle common corner case of search of left-most node, and avoid counting sub-tree count.\n    if (rank == 0) {\n      path->Push(node, 0);\n      node = node->Child(0);\n      continue;\n    }\n\n    for (unsigned i = 0; i <= node->NumItems(); ++i) {\n      uint32_t subtree_cnt = node->GetChildTreeCount(i);\n      if (subtree_cnt > rank) {\n        path->Push(node, i);\n        node = node->Child(i);\n        break;\n      }\n      assert(i < node->NumItems());\n      rank -= subtree_cnt;\n      if (rank == 0) {\n        path->Push(node, i);\n        return;\n      }\n      --rank;\n    }\n  }\n\n  assert(node->IsLeaf());\n  assert(rank < node->NumItems());\n  path->Push(node, rank);\n}\n\ntemplate <typename T, typename Policy>\ntemplate <typename Q>\nauto BPTree<T, Policy>::GEQ(Q&& query) const -> BPTreePath {\n  BPTreePath path;\n\n  bool res = Locate(query, &path);\n\n  // if we did not find the item and the path does not lead to any key in the node,\n  // adjust the path to point to the next key in the tree.\n  // In case we are past all items in the tree, Next() will collapse to the empty path.\n  if (!res && path.Last().second >= path.Last().first->NumItems()) {\n    path.Next();\n  }\n\n  return path;\n}\n\ntemplate <typename T, typename Policy>\ntemplate <typename Q>\nauto BPTree<T, Policy>::LEQ(Q&& query) const -> BPTreePath {\n  BPTreePath path;\n  bool res = Locate(query, &path);\n\n  if (!res) {  // fix the result in case the path leads to key greater than item.\n    path.Prev();\n  }\n\n  return path;\n}\n\ntemplate <typename T, typename Policy>\ndetail::BPTreeNode<T>* BPTree<T, Policy>::CreateNode(bool leaf) {\n  num_nodes_++;\n  void* ptr = mr_->allocate(detail::kBPNodeSize, 8);\n  BPTreeNode* node = new (ptr) BPTreeNode(leaf);\n\n  return node;\n}\n\ntemplate <typename T, typename Policy> void BPTree<T, Policy>::Delete(BPTreePath path) {\n  using Comp [[maybe_unused]] = typename Policy::KeyCompareTo;\n\n  BPTreeNode* node = path.Last().first;\n  unsigned key_pos = path.Last().second;\n\n  // Remove the key from the node.\n  if (node->IsLeaf()) {\n    node->ShiftLeft(key_pos);  // shift left everything after key_pos.\n  } else {\n    // We can not remove the item from the inner node because it also serves as a separator.\n    // Therefore, we swap it the rightmost key in the left subtree and pop from there instead.\n    path.DigRight();\n\n    BPTreeNode* leaf = path.Last().first;\n    assert(Comp()(leaf->Key(leaf->NumItems() - 1), node->Key(key_pos)) < 0);\n\n    // set a new separator.\n    node->SetKey(key_pos, leaf->Key(leaf->NumItems() - 1));\n    leaf->LeafEraseRight();  // pop the rightmost key from the leaf.\n    node = leaf;\n  }\n  count_--;\n\n  assert(node->IsLeaf());\n\n  // go up the tree and rebalance if number of items in the node is less\n  // than low limit. We either merge or rebalance nodes.\n  while (node->NumItems() < node->MinItems()) {\n    if (node == root_) {\n      if (node->NumItems() == 0) {\n        // terminal case, we reached the root - and it has either a single child (0 delimiters)\n        // or no children at all (leaf). The former is more common case: the tree can only shrink\n        // through the root.\n        if (node->IsLeaf()) {\n          assert(count_ == 0u);\n          root_ = nullptr;\n        } else {\n          root_ = root_->Child(0);\n        }\n        --height_;\n        DestroyNode(node);\n      }\n      return;\n    }\n\n    // The node has a parent. Pop the node from the path and try rebalance it via its parent.\n    assert(path.Depth() > 0u);\n    path.Pop();\n\n    BPTreeNode* parent = path.Last().first;\n    unsigned pos = path.Last().second;\n    assert(parent->Child(pos) == node);\n    node = parent->MergeOrRebalanceChild(pos);\n\n    parent->IncreaseTreeCount(-1);\n\n    if (node == nullptr)  // succeeded to merge/rebalance without the need to propagate.\n      break;\n\n    DestroyNode(node);\n\n    // assert(parent->TreeCount() == parent->DEBUG_TreeCount());\n    node = parent;\n  }\n\n  if (path.Depth() >= 2) {\n    IncreaseSubtreeCounts(path, path.Depth() - 2, -1);\n  }\n}\n\ntemplate <typename T, typename Policy> void BPTree<T, Policy>::DestroyNode(BPTreeNode* node) {\n  void* ptr = node;\n  mr_->deallocate(ptr, detail::kBPNodeSize, 8);\n  num_nodes_--;\n}\n\ntemplate <typename T, typename Policy> void BPTree<T, Policy>::ForceUpdate(KeyT old, KeyT new_obj) {\n  BPTreePath path;\n  [[maybe_unused]] bool found = Locate(old, &path);\n\n  assert(path.Depth() > 0u);\n  assert(found);\n\n  BPTreeNode* node = path.Last().first;\n  node->SetKey(path.Last().second, new_obj);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/bptree_set_test.cc",
    "content": "// Copyright 2023, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"core/bptree_set.h\"\n\n#include <absl/container/btree_set.h>\n#include <gmock/gmock.h>\n#include <mimalloc.h>\n\n#include <random>\n\nextern \"C\" {\n#include \"redis/sds.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#include \"base/gtest.h\"\n#include \"base/init.h\"\n#include \"base/logging.h\"\n#include \"core/mi_memory_resource.h\"\n\nusing namespace std;\n\nnamespace dfly {\n\nnamespace {\n\ntemplate <typename Node, typename Policy>\nbool ValidateNode(const Node* node, typename Node::KeyT ubound) {\n  typename Policy::KeyCompareTo cmp;\n\n  for (unsigned i = 1; i < node->NumItems(); ++i) {\n    if (cmp(node->Key(i - 1), node->Key(i)) > -1)\n      return false;\n  }\n\n  if (!node->IsLeaf()) {\n    unsigned mask = 0;\n    uint32_t subtree_cnt = node->NumItems();\n    for (unsigned i = 0; i <= node->NumItems(); ++i) {\n      mask |= (1 << node->Child(i)->IsLeaf());\n      DCHECK_EQ(node->Child(i)->DEBUG_TreeCount(), node->Child(i)->TreeCount());\n      subtree_cnt += node->Child(i)->TreeCount();\n    }\n    if (mask == 3)\n      return false;\n\n    if (subtree_cnt != node->TreeCount()) {\n      LOG(ERROR) << \"Expected \" << subtree_cnt << \" got \" << node->TreeCount();\n      return false;\n    }\n  }\n\n  return cmp(node->Key(node->NumItems() - 1), ubound) == -1;\n}\n\nstruct ZsetPolicy {\n  struct KeyT {\n    double d;\n    sds s;\n  };\n\n  struct KeyCompareTo {\n    int operator()(const KeyT& left, const KeyT& right) {\n      if (left.d < right.d)\n        return -1;\n      if (left.d > right.d)\n        return 1;\n\n      // Note that sdscmp can return values outside of [-1, 1] range.\n      return sdscmp(left.s, right.s);\n    }\n  };\n};\n\nusing SDSTree = BPTree<ZsetPolicy::KeyT, ZsetPolicy>;\n\n}  // namespace\n\nclass BPTreeSetTest : public ::testing::Test {\n  using Node = detail::BPTreeNode<uint64_t>;\n\n protected:\n  static constexpr size_t kNumElems = 7000;\n\n  BPTreeSetTest() : mi_alloc_(mi_heap_get_backing()), bptree_(&mi_alloc_) {\n  }\n  static void SetUpTestSuite() {\n  }\n\n  void FillTree(unsigned start, unsigned factor) {\n    for (unsigned i = start; i < kNumElems; ++i) {\n      bptree_.Insert(i * factor);\n    }\n  }\n\n  void FillTree(unsigned factor = 1) {\n    FillTree(0, factor);\n  }\n\n  bool Validate();\n\n  MiMemoryResource mi_alloc_;\n  BPTree<uint64_t> bptree_;\n  mt19937 generator_{1};\n};\n\nbool BPTreeSetTest::Validate() {\n  auto* root = bptree_.DEBUG_root();\n  if (!root)\n    return true;\n\n  // node, upper bound\n  vector<pair<const Node*, uint64_t>> stack;\n\n  stack.emplace_back(root, UINT64_MAX);\n\n  while (!stack.empty()) {\n    const Node* node = stack.back().first;\n    uint64_t ubound = stack.back().second;\n    stack.pop_back();\n\n    if (!ValidateNode<Node, BPTreePolicy<uint64_t>>(node, ubound))\n      return false;\n\n    if (!node->IsLeaf()) {\n      for (unsigned i = 0; i < node->NumItems(); ++i) {\n        stack.emplace_back(node->Child(i), node->Key(i));\n      }\n      stack.emplace_back(node->Child(node->NumItems()), ubound);\n    }\n  }\n  return true;\n}\n\nTEST_F(BPTreeSetTest, BPtreeInsert) {\n  for (unsigned i = 1; i < 7000; ++i) {\n    ASSERT_TRUE(bptree_.Insert(i));\n    ASSERT_EQ(i, bptree_.Size());\n    ASSERT_EQ(i - 1, bptree_.GetRank(i));\n    // ASSERT_TRUE(Validate()) << i;\n  }\n  ASSERT_TRUE(Validate());\n\n  ASSERT_GT(mi_alloc_.used(), 56000u);\n  ASSERT_LT(mi_alloc_.used(), 66000u);\n\n  for (unsigned i = 1; i < 7000; ++i) {\n    ASSERT_TRUE(bptree_.Contains(i));\n  }\n\n  bptree_.Clear();\n  ASSERT_EQ(mi_alloc_.used(), 0u);\n\n  uniform_int_distribution<uint64_t> dist(0, 100000);\n  for (unsigned i = 0; i < 20000; ++i) {\n    bptree_.Insert(dist(generator_));\n    // ASSERT_TRUE(Validate()) << i;\n  }\n  ASSERT_TRUE(Validate());\n  ASSERT_GT(mi_alloc_.used(), 10000u);\n  LOG(INFO) << bptree_.Height() << \" \" << bptree_.Size();\n\n  bptree_.Clear();\n  ASSERT_EQ(mi_alloc_.used(), 0u);\n\n  for (unsigned i = 20000; i > 1; --i) {\n    bptree_.Insert(i);\n  }\n  ASSERT_TRUE(Validate());\n  for (unsigned i = 2; i <= 20000; ++i) {\n    ASSERT_EQ(i - 2, bptree_.GetRank(i));\n  }\n\n  LOG(INFO) << bptree_.Height() << \" \" << bptree_.Size();\n  ASSERT_GT(mi_alloc_.used(), 20000 * 8);\n  ASSERT_LT(mi_alloc_.used(), 20000 * 10);\n  bptree_.Clear();\n  ASSERT_EQ(mi_alloc_.used(), 0u);\n}\n\nTEST_F(BPTreeSetTest, Delete) {\n  for (unsigned i = 31; i > 10; --i) {\n    bptree_.Insert(i);\n  }\n\n  for (unsigned i = 1; i < 10; ++i) {\n    ASSERT_FALSE(bptree_.Delete(i));\n  }\n\n  for (unsigned i = 11; i < 32; ++i) {\n    ASSERT_TRUE(bptree_.Delete(i));\n  }\n  ASSERT_EQ(mi_alloc_.used(), 0u);\n  ASSERT_EQ(bptree_.Size(), 0u);\n\n  FillTree();\n\n  ASSERT_GT(bptree_.NodeCount(), 2u);\n  unsigned sz = bptree_.Size();\n  for (unsigned i = 0; i < kNumElems; ++i) {\n    --sz;\n    ASSERT_EQ(bptree_.GetRank(kNumElems - 1), sz);\n\n    ASSERT_TRUE(bptree_.Delete(i));\n    ASSERT_EQ(bptree_.Size(), sz);\n    // ASSERT_TRUE(Validate()) << i;\n  }\n\n  ASSERT_EQ(mi_alloc_.used(), 0u);\n  ASSERT_EQ(bptree_.Size(), 0u);\n  ASSERT_EQ(bptree_.Height(), 0u);\n  ASSERT_EQ(bptree_.NodeCount(), 0u);\n\n  FillTree(2);\n  for (unsigned i = 0; i < 20000; ++i) {\n    unsigned val = generator_() % 15000;\n    bool res = bptree_.Delete(val);\n\n    if (val % 2 == 1) {\n      ASSERT_FALSE(res);\n    }\n    if (res) {\n      ASSERT_TRUE(Validate());\n    }\n  }\n}\n\nTEST_F(BPTreeSetTest, Iterate) {\n  FillTree(2);\n\n  unsigned cnt = 0;\n  bool res = bptree_.Iterate(31, 543, [&](uint64_t val) {\n    if ((31 + cnt) * 2 != val)\n      return false;\n    ++cnt;\n    return true;\n  });\n  ASSERT_EQ(543 - 31 + 1, cnt);\n  ASSERT_TRUE(res);\n\n  for (unsigned j = 0; j < 10; ++j) {\n    cnt = 0;\n    unsigned from = generator_() % kNumElems;\n    unsigned to = from + generator_() % (kNumElems - from);\n    res = bptree_.Iterate(from, to, [&](uint64_t val) {\n      if ((from + cnt) * 2 != val)\n        return false;\n      ++cnt;\n      return true;\n    });\n\n    ASSERT_EQ(to - from + 1, cnt);\n    ASSERT_TRUE(res);\n  }\n}\n\nTEST_F(BPTreeSetTest, Ranges) {\n  FillTree(2);\n\n  auto path = bptree_.GEQ(31);\n  EXPECT_EQ(32, path.Terminal());\n\n  path = bptree_.GEQ(32);\n  EXPECT_EQ(32, path.Terminal());\n\n  path = bptree_.GEQ(13998);\n  EXPECT_EQ(13998, path.Terminal());\n\n  path = bptree_.LEQ(14000);\n  EXPECT_EQ(13998, path.Terminal());\n\n  path = bptree_.GEQ(14000);\n  EXPECT_EQ(0, path.Depth());\n\n  ASSERT_TRUE(bptree_.Delete(0));\n  path = bptree_.GEQ(0);\n  EXPECT_EQ(2, path.Terminal());\n\n  path = bptree_.LEQ(1);\n  EXPECT_TRUE(path.Empty());\n}\n\nTEST_F(BPTreeSetTest, HalfRanges) {\n  FillTree(1, 3);  // 3, 6, 9 ...\n  auto path = bptree_.FromRank(bptree_.Size() - 1);\n  uint64_t val = path.Terminal();\n  for (unsigned i = 0; i <= val; ++i) {\n    path = bptree_.GEQ(i);\n    ASSERT_FALSE(path.Empty()) << i;\n  }\n  path = bptree_.GEQ(val + 1);\n  ASSERT_TRUE(path.Empty());\n\n  for (unsigned i = 3; i <= val + 10; ++i) {\n    path = bptree_.LEQ(i);\n    ASSERT_FALSE(path.Empty()) << i;\n  }\n  path = bptree_.LEQ(2);\n  ASSERT_TRUE(path.Empty());\n}\n\n#if 0\nTEST_F(BPTreeSetTest, MemoryUsage) {\n  zskiplist* zsl = zslCreate();\n  std::vector<sds> sds_vec;\n\n  constexpr size_t kLength = 3000;\n  for (size_t i = 0; i < kLength; ++i) {\n    sds_vec.push_back(sdsnew(\"f\"));\n  }\n  size_t sz_before = zmalloc_used_memory_tl;\n  LOG(INFO) << \"zskiplist before: \" << sz_before << \" bytes\";\n\n  for (size_t i = 0; i < sds_vec.size(); ++i) {\n    zslInsert(zsl, i, sds_vec[i]);\n  }\n  LOG(INFO) << \"zskiplist takes: \" << double(zmalloc_used_memory_tl - sz_before) / sds_vec.size()\n            << \" bytes per entry\";\n  zslFree(zsl);\n\n  sds_vec.clear();\n  for (size_t i = 0; i < kLength; ++i) {\n    sds_vec.push_back(sdsnew(\"f\"));\n  }\n\n  MiMemoryResource mi_alloc(mi_heap_get_backing());\n  using AllocType = PMR_NS::polymorphic_allocator<std::pair<double, sds>>;\n  AllocType alloc(&mi_alloc);\n  absl::btree_set<pair<double, sds>, std::greater<pair<double, sds>>, AllocType> btree(alloc);\n\n  ASSERT_EQ(0, mi_alloc.used());\n  for (size_t i = 0; i < sds_vec.size(); ++i) {\n    btree.emplace(i, sds_vec[i]);\n  }\n  ASSERT_GT(mi_alloc.used(), 0u);\n  LOG(INFO) << \"abseil btree: \" << double(mi_alloc.used()) / sds_vec.size() << \" bytes per entry\";\n  btree.clear();\n\n  ASSERT_EQ(0, mi_alloc.used());\n  SDSTree df_tree(&mi_alloc);\n  for (size_t i = 0; i < sds_vec.size(); ++i) {\n    btree.emplace(i, sds_vec[i]);\n    VLOG(1) << \"df btree: \" << i << \" \" << double(mi_alloc.used()) / btree.size()\n            << \" bytes per entry\";\n  }\n  ASSERT_GT(mi_alloc.used(), 0u);\n  LOG(INFO) << \"df btree: \" << double(mi_alloc.used()) / sds_vec.size() << \" bytes per entry\";\n}\n#endif\n\nTEST_F(BPTreeSetTest, InsertSDS) {\n  vector<ZsetPolicy::KeyT> vals;\n  for (unsigned i = 0; i < 256; ++i) {\n    sds s = sdsempty();\n\n    s = sdscatfmt(s, \"a%u\", i);\n    vals.emplace_back(ZsetPolicy::KeyT{.d = 1000, .s = s});\n  }\n\n  SDSTree tree(&mi_alloc_);\n  for (size_t i = 0; i < vals.size(); ++i) {\n    ASSERT_TRUE(tree.Insert(vals[i]));\n  }\n\n  for (auto v : vals) {\n    sdsfree(v.s);\n  }\n}\n\nTEST_F(BPTreeSetTest, ReverseIterate) {\n  vector<ZsetPolicy::KeyT> vals;\n  for (int i = -1000; i < 1000; ++i) {\n    sds s = sdsempty();\n\n    s = sdscatfmt(s, \"a%u\", i);\n    vals.emplace_back(ZsetPolicy::KeyT{.d = (double)i, .s = s});\n  }\n\n  SDSTree tree(&mi_alloc_);\n  for (auto v : vals) {\n    ASSERT_TRUE(tree.Insert(v));\n    {\n      double score = 0;\n      tree.IterateReverse(0, 0, [&score](auto i) {\n        score = i.d;\n        return false;\n      });\n      EXPECT_EQ(score, v.d);\n    }\n    {\n      double score = 0;\n      tree.Iterate(0, 0, [&score](auto i) {\n        score = i.d;\n        return false;\n      });\n      EXPECT_EQ(score, vals[0].d);\n    }\n  }\n\n  vector<int> res;\n  tree.IterateReverse(0, 1, [&](auto i) {\n    res.push_back(i.d);\n    return true;\n  });\n  EXPECT_THAT(res, testing::ElementsAre(999, 998));\n\n  for (auto v : vals) {\n    sdsfree(v.s);\n  }\n}\n\nstatic string RandomString(mt19937& rand, unsigned len) {\n  const string_view alpanum = \"1234567890abcdefghijklmnopqrstuvwxyz\";\n  string ret;\n  ret.reserve(len);\n\n  for (size_t i = 0; i < len; ++i) {\n    ret += alpanum[rand() % alpanum.size()];\n  }\n\n  return ret;\n}\n\nstd::vector<ZsetPolicy::KeyT> GenerateRandomPairs(unsigned len) {\n  mt19937 dre(10);\n  std::vector<ZsetPolicy::KeyT> vals(len, ZsetPolicy::KeyT{});\n  for (unsigned i = 0; i < len; ++i) {\n    vals[i].d = dre();\n    vals[i].s = sdsnew(RandomString(dre, 10).c_str());\n  }\n  return vals;\n}\n\nstatic void BM_FindRandomBPTree(benchmark::State& state) {\n  unsigned iters = state.range(0);\n  std::vector<ZsetPolicy::KeyT> vals = GenerateRandomPairs(iters);\n  SDSTree bptree;\n  for (unsigned i = 0; i < iters; ++i) {\n    bptree.Insert(vals[i]);\n  }\n\n  unsigned i = 0;\n  while (state.KeepRunningBatch(10)) {\n    for (unsigned j = 0; j < 10; ++j) {\n      benchmark::DoNotOptimize(bptree.GEQ(vals[i]));\n      ++i;\n      if (vals.size() == i)\n        i = 0;\n    }\n  }\n  for (const auto v : vals) {\n    sdsfree(v.s);\n  }\n}\nBENCHMARK(BM_FindRandomBPTree)->Arg(1024)->Arg(1 << 16)->Arg(1 << 20);\n\n#if 0\nstatic void BM_FindRandomZSL(benchmark::State& state) {\n  zskiplist* zsl = zslCreate();\n  unsigned iters = state.range(0);\n  std::vector<ZsetPolicy::KeyT> vals = GenerateRandomPairs(iters);\n  for (unsigned i = 0; i < iters; ++i) {\n    zslInsert(zsl, vals[i].d, sdsdup(vals[i].s));\n  }\n\n  zrangespec spec;\n  spec.maxex = 0;\n  spec.minex = 0;\n\n  unsigned i = 0;\n  while (state.KeepRunningBatch(10)) {\n    for (unsigned j = 0; j < 10; ++j) {\n      spec.min = vals[i].d;\n      spec.max = spec.min;\n      benchmark::DoNotOptimize(zslFirstInRange(zsl, &spec));\n\n      ++i;\n      if (vals.size() == i)\n        i = 0;\n    }\n  }\n\n  zslFree(zsl);\n\n  for (const auto v : vals) {\n    sdsfree(v.s);\n  }\n}\nBENCHMARK(BM_FindRandomZSL)->Arg(1024)->Arg(1 << 16)->Arg(1 << 20);\n#endif\n\nvoid RegisterBPTreeBench() {\n  auto* tlh = mi_heap_get_backing();\n  init_zmalloc_threadlocal(tlh);\n};\n\nREGISTER_MODULE_INITIALIZER(Bptree, RegisterBPTreeBench());\n\nTEST_F(BPTreeSetTest, ForceUpdate) {\n  struct Policy {\n    // Similar to how it's used in SortedMap just a little simpler.\n    using KeyT = int*;\n\n    struct KeyCompareTo {\n      int operator()(KeyT a, KeyT b) const {\n        if (*a < *b)\n          return -1;\n        if (*a > *b)\n          return 1;\n        return 0;\n      }\n    };\n  };\n\n  auto gen_vector = []() {\n    std::vector<std::unique_ptr<int>> tmp;\n    for (size_t i = 0; i < 1000; ++i) {\n      tmp.push_back(std::make_unique<int>(i));\n    }\n    return tmp;\n  };\n\n  std::vector<std::unique_ptr<int>> original = gen_vector();\n  std::vector<std::unique_ptr<int>> modified = gen_vector();\n\n  BPTree<int*, Policy> bptree;\n  for (auto& item : original) {\n    bptree.Insert(item.get());\n  }\n\n  for (auto& item : modified) {\n    bptree.ForceUpdate(item.get(), item.get());\n  }\n\n  original.clear();\n  size_t index = 0;\n  bptree.Iterate(0, 1000, [&](int* ptr) {\n    EXPECT_EQ(modified[index].get(), ptr);\n    ++index;\n    return true;\n  });\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/cms.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/cms.h\"\n\n#include <xxhash.h>\n\n#include <algorithm>\n#include <cmath>\n#include <limits>\n\n#include \"base/logging.h\"\n\nnamespace dfly {\nnamespace {\n\nuint32_t Offset(uint64_t h1, uint64_t h2, uint32_t row, uint32_t width) {\n  uint32_t idx = static_cast<uint32_t>((h1 + (row * h2)) % width);\n  return row * width + idx;\n}\n\n}  // namespace\n\nCMS::CMS(uint32_t width, uint32_t depth, PMR_NS::memory_resource* mr)\n    : width_(width), depth_(depth), mr_(mr) {\n  size_t len = NumCounters();\n  counters_ = static_cast<int64_t*>(mr_->allocate(len * sizeof(int64_t), alignof(int64_t)));\n  std::fill_n(counters_, len, 0);\n}\n\nCMS::~CMS() {\n  if (counters_) {\n    mr_->deallocate(counters_, NumCounters() * sizeof(int64_t), alignof(int64_t));\n  }\n}\n\nCMS::CMS(CMS&& other) noexcept\n    : width_(other.width_),\n      depth_(other.depth_),\n      mr_(other.mr_),\n      count_(other.count_),\n      counters_(other.counters_) {\n  other.width_ = 0;\n  other.depth_ = 0;\n  other.count_ = 0;\n  other.counters_ = nullptr;\n}\n\nCMS& CMS::operator=(CMS&& other) noexcept {\n  if (this != &other) {\n    if (counters_) {\n      mr_->deallocate(counters_, NumCounters() * sizeof(int64_t), alignof(int64_t));\n    }\n    width_ = other.width_;\n    depth_ = other.depth_;\n    mr_ = other.mr_;\n    count_ = other.count_;\n    counters_ = other.counters_;\n    other.width_ = 0;\n    other.depth_ = 0;\n    other.count_ = 0;\n    other.counters_ = nullptr;\n  }\n  return *this;\n}\n\nCMS::CMS(ErrorRateTag /*tag*/, double error, double probability, PMR_NS::memory_resource* mr)\n    : CMS(static_cast<uint32_t>(std::ceil(M_E / error)),\n          static_cast<uint32_t>(std::ceil(std::log(1.0 / probability))), mr) {\n}\n\nint64_t CMS::IncrBy(std::string_view item, int64_t increment) {\n  count_ += increment;\n\n  int64_t min_count = std::numeric_limits<int64_t>::max();\n  XXH128_hash_t hash = XXH3_128bits(item.data(), item.size());\n  uint64_t h1 = hash.low64;\n  uint64_t h2 = hash.high64;\n\n  for (uint32_t row = 0; row < depth_; ++row) {\n    uint32_t offset = Offset(h1, h2, row, width_);\n    counters_[offset] += increment;\n    min_count = std::min(min_count, counters_[offset]);\n  }\n\n  return min_count;\n}\n\nint64_t CMS::Query(std::string_view item) const {\n  XXH128_hash_t hash = XXH3_128bits(item.data(), item.size());\n  uint64_t h1 = hash.low64;\n  uint64_t h2 = hash.high64;\n\n  int64_t min_count = std::numeric_limits<int64_t>::max();\n  for (uint32_t row = 0; row < depth_; ++row) {\n    uint32_t offset = Offset(h1, h2, row, width_);\n    min_count = std::min(min_count, counters_[offset]);\n  }\n\n  return min_count;\n}\n\nbool CMS::MergeFrom(const CMS& other, int64_t weight) {\n  if (width_ != other.width_ || depth_ != other.depth_) {\n    return false;\n  }\n\n  for (size_t i = 0; i < NumCounters(); ++i) {\n    counters_[i] += other.counters_[i] * weight;\n  }\n\n  count_ += other.count_ * weight;\n  return true;\n}\n\nvoid CMS::Reset() {\n  std::fill_n(counters_, NumCounters(), 0);\n  count_ = 0;\n}\n\nvoid CMS::Load(int64_t total_incr_count, const int64_t* data) {\n  count_ = total_incr_count;\n  std::copy_n(data, NumCounters(), counters_);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/cms.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <string_view>\n\n#include \"base/pmr/memory_resource.h\"\n\nnamespace dfly {\n\n/// Count-Min Sketch implementation compatible with Redis CMS commands.\nclass CMS {\n public:\n  // Create a CMS with given width and depth dimensions.\n  // width: number of counters per row\n  // depth: number of rows (hash functions)\n  CMS(uint32_t width, uint32_t depth, PMR_NS::memory_resource* mr);\n\n  CMS(const CMS&) = delete;\n  CMS& operator=(const CMS&) = delete;\n\n  CMS(CMS&& other) noexcept;\n  CMS& operator=(CMS&& other) noexcept;\n\n  ~CMS();\n\n  // Tag type to disambiguate CMS construction by error rate and probability.\n  struct ErrorRateTag {};\n\n  // Create a CMS from error rate and probability parameters.\n  // error: relative error (e.g. 0.01 for 1%), must be in (0, 1).\n  // probability: probability of exceeding the error, must be in (0, 1).\n  // width = ceil(e / error), depth = ceil(ln(1 / probability)).\n  CMS(ErrorRateTag, double error, double probability, PMR_NS::memory_resource* mr);\n\n  // Increment the count for an item by the given value.\n  // Returns the new estimated count for the item.\n  int64_t IncrBy(std::string_view item, int64_t increment);\n\n  // Query the estimated count for an item.\n  int64_t Query(std::string_view item) const;\n\n  // Merge another CMS into this one with the given weight.\n  // The other CMS must have the same dimensions.\n  // Returns false if dimensions don't match.\n  bool MergeFrom(const CMS& other, int64_t weight = 1);\n\n  // Reset all counters and total count to zero.\n  void Reset();\n\n  // Load serialized counter state. data must have exactly NumCounters() elements.\n  void Load(int64_t total_incr_count, const int64_t* data);\n\n  // Accessors for CMS properties\n  uint32_t width() const {\n    return width_;\n  }\n\n  uint32_t depth() const {\n    return depth_;\n  }\n\n  // Total count of all IncrBy operations (used by CMS.INFO).\n  int64_t total_count() const {\n    return count_;\n  }\n\n  // Memory usage in bytes\n  size_t MallocUsed() const {\n    return NumCounters() * sizeof(int64_t);\n  }\n\n  size_t NumCounters() const {\n    return static_cast<size_t>(width_) * depth_;\n  }\n\n  const int64_t* Data() const {\n    return counters_;\n  }\n\n private:\n  uint32_t width_;\n  uint32_t depth_;\n  PMR_NS::memory_resource* mr_ = nullptr;\n  int64_t count_ = 0;  // Total count of all IncrBy operations\n  int64_t* counters_ = nullptr;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/cms_test.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/cms.h\"\n\n#include <absl/strings/str_cat.h>\n\n#include <cmath>\n\n#include \"base/gtest.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nclass CMSTest : public ::testing::Test {\n protected:\n  CMSTest() : cms_(CMS(1000, 5, PMR_NS::get_default_resource())) {\n  }\n\n  CMS cms_;\n};\n\n// A freshly created CMS must return 0 for any item.\nTEST_F(CMSTest, InitialCountIsZero) {\n  EXPECT_EQ(cms_.Query(\"nonexistent\"), 0);\n  EXPECT_EQ(cms_.Query(\"\"), 0);\n  EXPECT_EQ(cms_.Query(\"anything\"), 0);\n}\n\n// Use width=1 so every item maps to column 0, exercising all counters.\n// This catches initialization bugs (e.g. counters not zeroed).\nTEST(CMSBasic, InitialCountIsZeroSmall) {\n  CMS cms(1, 1, PMR_NS::get_default_resource());\n  EXPECT_EQ(cms.Query(\"x\"), 0);\n  EXPECT_EQ(cms.Query(\"y\"), 0);\n}\n\nTEST(CMSBasic, IncrBySmall) {\n  CMS cms(1, 1, PMR_NS::get_default_resource());\n  EXPECT_EQ(cms.IncrBy(\"a\", 3), 3);\n  // width=1 means all items collide; \"b\" should also return 3.\n  EXPECT_EQ(cms.Query(\"b\"), 3);\n}\n\n// Inspired by fakeredis test_cms_create: initbyprob computes correct dimensions.\nTEST(CMSBasic, InitByProb) {\n  CMS cms(CMS::ErrorRateTag{}, 0.01, 0.01, PMR_NS::get_default_resource());\n\n  // width = ceil(e / 0.01) = ceil(271.8..) = 272\n  EXPECT_EQ(cms.width(), static_cast<uint32_t>(std::ceil(M_E / 0.01)));\n  // depth = ceil(ln(1/0.01)) = ceil(4.605..) = 5\n  EXPECT_EQ(cms.depth(), static_cast<uint32_t>(std::ceil(std::log(100.0))));\n  EXPECT_EQ(cms.Query(\"anything\"), 0);\n}\n\n// Inspired by fakeredis test_cms_incrby: multiple items, incremental updates.\nTEST_F(CMSTest, IncrByMultipleItems) {\n  EXPECT_EQ(cms_.IncrBy(\"foo\", 3), 3);\n  cms_.IncrBy(\"foo\", 4);\n  cms_.IncrBy(\"bar\", 1);\n\n  EXPECT_GE(cms_.Query(\"foo\"), 7);\n  EXPECT_GE(cms_.Query(\"bar\"), 1);\n  EXPECT_EQ(cms_.Query(\"noexist\"), 0);\n}\n\nTEST_F(CMSTest, BasicIncrBy) {\n  int64_t count = cms_.IncrBy(\"foo\", 5);\n  EXPECT_EQ(count, 5);\n\n  count = cms_.IncrBy(\"foo\", 3);\n  EXPECT_EQ(count, 8);\n\n  EXPECT_EQ(cms_.Query(\"foo\"), 8);\n}\n\nTEST_F(CMSTest, QueryReturnsMinimum) {\n  cms_.IncrBy(\"a\", 10);\n  cms_.IncrBy(\"b\", 20);\n\n  // CMS can overestimate, but never underestimate.\n  EXPECT_GE(cms_.Query(\"a\"), 10);\n  EXPECT_GE(cms_.Query(\"b\"), 20);\n}\n\nTEST_F(CMSTest, NeverUnderestimates) {\n  for (int i = 0; i < 500; ++i) {\n    string key = absl::StrCat(\"item\", i);\n    cms_.IncrBy(key, i + 1);\n  }\n\n  for (int i = 0; i < 500; ++i) {\n    string key = absl::StrCat(\"item\", i);\n    EXPECT_GE(cms_.Query(key), i + 1) << \"Underestimate for \" << key;\n  }\n}\n\nTEST_F(CMSTest, UnseenItemIsZero) {\n  cms_.IncrBy(\"known\", 100);\n  // With width=1000 and depth=5 and only one item inserted, collisions are unlikely.\n  EXPECT_LE(cms_.Query(\"unknown\"), 5);\n}\n\nTEST_F(CMSTest, Dimensions) {\n  EXPECT_EQ(cms_.width(), 1000u);\n  EXPECT_EQ(cms_.depth(), 5u);\n}\n\nTEST_F(CMSTest, MallocUsed) {\n  EXPECT_EQ(cms_.MallocUsed(), 1000u * 5 * sizeof(int64_t));\n}\n\n// Inspired by fakeredis test_cms_merge: basic merge of two sketches.\nTEST_F(CMSTest, MergeFrom) {\n  CMS other(1000, 5, PMR_NS::get_default_resource());\n  cms_.IncrBy(\"foo\", 3);\n  other.IncrBy(\"foo\", 4);\n  other.IncrBy(\"bar\", 1);\n\n  EXPECT_TRUE(cms_.MergeFrom(other));\n  EXPECT_GE(cms_.Query(\"foo\"), 7);\n  EXPECT_GE(cms_.Query(\"bar\"), 1);\n}\n\nTEST_F(CMSTest, MergeFromWithWeight) {\n  CMS other(1000, 5, PMR_NS::get_default_resource());\n  other.IncrBy(\"x\", 5);\n\n  cms_.IncrBy(\"x\", 10);\n  EXPECT_TRUE(cms_.MergeFrom(other, 3));\n  // 10 + 5*3 = 25\n  EXPECT_GE(cms_.Query(\"x\"), 25);\n}\n\nTEST_F(CMSTest, MergeDimensionMismatch) {\n  CMS other(500, 5, PMR_NS::get_default_resource());\n  EXPECT_FALSE(cms_.MergeFrom(other));\n\n  CMS other2(1000, 3, PMR_NS::get_default_resource());\n  EXPECT_FALSE(cms_.MergeFrom(other2));\n}\n\n// Inspired by fakeredis test_cms_info: merge multiple sources with weights, verify counts.\n// Mirrors the exact sequence: C=A+B, C+=A*1+B*2, C+=A*2+B*3, then check info.count.\nTEST(CMSBasic, MergeMultipleWithWeights) {\n  auto* mr = PMR_NS::get_default_resource();\n  CMS a(1000, 5, mr);\n  CMS b(1000, 5, mr);\n  CMS c(1000, 5, mr);\n\n  a.IncrBy(\"foo\", 5);\n  a.IncrBy(\"bar\", 3);\n  a.IncrBy(\"baz\", 9);\n\n  b.IncrBy(\"foo\", 2);\n  b.IncrBy(\"bar\", 3);\n  b.IncrBy(\"baz\", 1);\n\n  EXPECT_EQ(a.Query(\"foo\"), 5);\n  EXPECT_EQ(a.Query(\"bar\"), 3);\n  EXPECT_EQ(a.Query(\"baz\"), 9);\n  EXPECT_EQ(b.Query(\"foo\"), 2);\n  EXPECT_EQ(b.Query(\"bar\"), 3);\n  EXPECT_EQ(b.Query(\"baz\"), 1);\n\n  // C = A*1 + B*1\n  EXPECT_TRUE(c.MergeFrom(a));\n  EXPECT_TRUE(c.MergeFrom(b));\n  EXPECT_EQ(c.Query(\"foo\"), 7);\n  EXPECT_EQ(c.Query(\"bar\"), 6);\n  EXPECT_EQ(c.Query(\"baz\"), 10);\n\n  // C += A*1 + B*2\n  EXPECT_TRUE(c.MergeFrom(a, 1));\n  EXPECT_TRUE(c.MergeFrom(b, 2));\n  EXPECT_EQ(c.Query(\"foo\"), 16);\n  EXPECT_EQ(c.Query(\"bar\"), 15);\n  EXPECT_EQ(c.Query(\"baz\"), 21);\n\n  // C += A*2 + B*3\n  EXPECT_TRUE(c.MergeFrom(a, 2));\n  EXPECT_TRUE(c.MergeFrom(b, 3));\n  EXPECT_EQ(c.Query(\"foo\"), 32);\n  EXPECT_EQ(c.Query(\"bar\"), 30);\n  EXPECT_EQ(c.Query(\"baz\"), 42);\n}\n\n// Inspired by fakeredis test_cms_info: verify count tracks total of all IncrBy operations.\nTEST(CMSBasic, CountTracking) {\n  auto* mr = PMR_NS::get_default_resource();\n  CMS a(1000, 5, mr);\n\n  EXPECT_EQ(a.total_count(), 0);\n\n  a.IncrBy(\"foo\", 5);\n  a.IncrBy(\"bar\", 3);\n  a.IncrBy(\"baz\", 9);\n  // total_count = 5 + 3 + 9 = 17 (matches fakeredis test_cms_info assertion)\n  EXPECT_EQ(a.total_count(), 17);\n}\n\n// Inspired by fakeredis test_cms_info: count is updated by MergeFrom.\nTEST(CMSBasic, CountAfterMerge) {\n  auto* mr = PMR_NS::get_default_resource();\n  CMS a(1000, 5, mr);\n  CMS b(1000, 5, mr);\n  CMS c(1000, 5, mr);\n\n  a.IncrBy(\"foo\", 5);\n  a.IncrBy(\"bar\", 3);\n  a.IncrBy(\"baz\", 9);\n  EXPECT_EQ(a.total_count(), 17);\n\n  b.IncrBy(\"foo\", 2);\n  b.IncrBy(\"bar\", 3);\n  b.IncrBy(\"baz\", 1);\n  EXPECT_EQ(b.total_count(), 6);\n\n  // C = A + B -> total_count = 17 + 6 = 23\n  c.MergeFrom(a);\n  c.MergeFrom(b);\n  EXPECT_EQ(c.total_count(), 23);\n\n  // C += A*1 + B*2 -> total_count = 23 + 17*1 + 6*2 = 52\n  // (matches fakeredis test_cms_merge_fail assertion: count == 52)\n  c.MergeFrom(a, 1);\n  c.MergeFrom(b, 2);\n  EXPECT_EQ(c.total_count(), 52);\n}\n\nTEST_F(CMSTest, MoveConstruct) {\n  cms_.IncrBy(\"foo\", 42);\n  CMS moved(std::move(cms_));\n\n  EXPECT_EQ(moved.Query(\"foo\"), 42);\n  EXPECT_EQ(moved.width(), 1000u);\n  EXPECT_EQ(moved.depth(), 5u);\n}\n\nTEST_F(CMSTest, MoveAssign) {\n  cms_.IncrBy(\"foo\", 42);\n  CMS other(500, 3, PMR_NS::get_default_resource());\n  other = std::move(cms_);\n\n  EXPECT_EQ(other.Query(\"foo\"), 42);\n  EXPECT_EQ(other.width(), 1000u);\n  EXPECT_EQ(other.depth(), 5u);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/collection_entry.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/strings/str_cat.h>\n\n#include <cstddef>\n#include <string>\n#include <string_view>\n\nnamespace dfly {\n\n// Stores either:\n// - A single long long value (longval) when value = nullptr\n// - A single char* (value) when value != nullptr\nstruct CollectionEntry {\n  CollectionEntry(const char* value, size_t length) : value_{value}, length_{length} {\n  }\n  explicit CollectionEntry(long long longval) : value_{nullptr}, longval_{longval} {\n  }\n\n  CollectionEntry(const CollectionEntry&) = default;\n  CollectionEntry& operator=(const CollectionEntry&) = default;\n\n  std::string ToString() const {\n    if (value_)\n      return {value_, length_};\n    else\n      return absl::StrCat(longval_);\n  }\n\n  bool IsString() const {\n    return value_ != nullptr;\n  }\n\n  bool is_int() const {\n    return value_ == nullptr;\n  }\n\n  const char* data() const {\n    return value_;\n  }\n\n  size_t size() const {\n    return length_;\n  }\n\n  long long as_long() const {\n    return longval_;\n  }\n\n  // Assumes value is not null.\n  std::string_view view() const {\n    return {value_, length_};\n  }\n\n  // compatibility method\n  std::string to_string() const {\n    return ToString();\n  }\n\n  // compatibility method\n  long long ival() const {\n    return longval_;\n  }\n\n  bool operator==(std::string_view sv) const;\n  friend bool operator==(std::string_view sv, const CollectionEntry& entry) {\n    return entry == sv;\n  }\n\n private:\n  const char* value_;\n  union {\n    size_t length_;\n    long long longval_;\n  };\n};\n\ninline bool CollectionEntry::operator==(std::string_view sv) const {\n  if (value_ == nullptr) {\n    char buf[absl::numbers_internal::kFastToBufferSize];\n    char* end = absl::numbers_internal::FastIntToBuffer(longval_, buf);\n    return sv == std::string_view(buf, end - buf);\n  }\n  return view() == sv;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/compact_object.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/compact_object.h\"\n\n// #define XXH_INLINE_ALL\n#include <xxhash.h>\n\n#include <array>\n\nextern \"C\" {\n#include \"redis/intset.h\"\n#include \"redis/listpack.h\"\n#include \"redis/redis_aux.h\"\n#include \"redis/sds.h\"\n#include \"redis/stream.h\"\n#include \"redis/util.h\"\n#include \"redis/zmalloc.h\"  // for non-string objects.\n}\n#include <absl/strings/str_cat.h>\n#include <absl/strings/strip.h>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"base/pod_array.h\"\n#include \"core/bloom.h\"\n#include \"core/cms.h\"\n#include \"core/detail/bitpacking.h\"\n#include \"core/huff_coder.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"core/qlist.h\"\n#include \"core/sorted_map.h\"\n#include \"core/string_map.h\"\n#include \"core/string_set.h\"\n#include \"core/tiering_types.h\"\n#include \"core/topk.h\"\n\nABSL_FLAG(bool, experimental_flat_json, false, \"If true uses flat json implementation.\");\nABSL_FLAG(bool, disable_json_defragmentation, false, \"If true disable json object defragmentation\");\n\nnamespace dfly {\nusing namespace std;\nusing detail::ascii_len;\nusing detail::binpacked_len;\nusing MemoryResource = detail::RobjWrapper::MemoryResource;\n\nnamespace {\n\nconstexpr XXH64_hash_t kHashSeed = 24061983;\nconstexpr size_t kAlignSize = 8u;\n\nsize_t UpdateSize(size_t size, int64_t update) {\n  int64_t result = static_cast<int64_t>(size) + update;\n  if (result < 0) {\n    DCHECK(false) << \"Can't decrease \" << size << \" from \" << -update;\n    LOG_EVERY_T(ERROR, 30) << \"Can't decrease \" << size << \" from \" << -update;\n  }\n  return result;\n}\n\ninline void FreeObjSet(unsigned encoding, void* ptr, MemoryResource* mr) {\n  switch (encoding) {\n    case kEncodingStrMap2: {\n      CompactObj::DeleteMR<StringSet>(ptr);\n      break;\n    }\n\n    case kEncodingIntSet:\n      zfree((void*)ptr);\n      break;\n    default:\n      LOG(FATAL) << \"Unknown set encoding type\";\n  }\n}\n\nvoid FreeList(unsigned encoding, void* ptr, MemoryResource* mr) {\n  if (encoding == kEncodingListPack) {\n    lpFree((uint8_t*)ptr);\n    return;\n  }\n  CHECK_EQ(encoding, kEncodingQL2);\n  CompactObj::DeleteMR<QList>(ptr);\n}\n\nsize_t MallocUsedSet(unsigned encoding, void* ptr) {\n  switch (encoding) {\n    case kEncodingStrMap2: {\n      StringSet* ss = (StringSet*)ptr;\n      return ss->ObjMallocUsed() + ss->SetMallocUsed() + zmalloc_usable_size(ptr);\n    }\n    case kEncodingIntSet:\n      return intsetBlobLen((intset*)ptr);\n  }\n\n  LOG(DFATAL) << \"Unknown set encoding type \" << encoding;\n  return 0;\n}\n\nsize_t MallocUsedHSet(unsigned encoding, void* ptr) {\n  switch (encoding) {\n    case kEncodingListPack:\n      return zmalloc_usable_size(reinterpret_cast<uint8_t*>(ptr));\n    case kEncodingStrMap2: {\n      StringMap* sm = (StringMap*)ptr;\n      return sm->ObjMallocUsed() + sm->SetMallocUsed() + zmalloc_usable_size(ptr);\n    }\n  }\n  LOG(DFATAL) << \"Unknown set encoding type \" << encoding;\n  return 0;\n}\n\nsize_t MallocUsedZSet(unsigned encoding, void* ptr) {\n  switch (encoding) {\n    case OBJ_ENCODING_LISTPACK:\n      return zmalloc_usable_size(reinterpret_cast<uint8_t*>(ptr));\n    case OBJ_ENCODING_SKIPLIST: {\n      detail::SortedMap* ss = (detail::SortedMap*)ptr;\n      return ss->MallocSize() + zmalloc_usable_size(ptr);  // DictMallocSize(zs->dict);\n    }\n  }\n  LOG(DFATAL) << \"Unknown set encoding type \" << encoding;\n  return 0;\n}\n\n/* This is a helper function with the goal of estimating the memory\n * size of a radix tree that is used to store Stream IDs.\n *\n * Note: to guess the size of the radix tree is not trivial, so we\n * approximate it considering 16 bytes of data overhead for each\n * key (the ID), and then adding the number of bare nodes, plus some\n * overhead due by the data and child pointers. This secret recipe\n * was obtained by checking the average radix tree created by real\n * workloads, and then adjusting the constants to get numbers that\n * more or less match the real memory usage.\n *\n * Actually the number of nodes and keys may be different depending\n * on the insertion speed and thus the ability of the radix tree\n * to compress prefixes. */\nsize_t streamRadixTreeMemoryUsage(rax* rax) {\n  size_t size = sizeof(*rax);\n  size = rax->numele * sizeof(streamID);\n  size += rax->numnodes * sizeof(raxNode);\n  /* Add a fixed overhead due to the aux data pointer, children, ... */\n  size += rax->numnodes * sizeof(long) * 30;\n  return size;\n}\n\nsize_t MallocUsedStream(stream* s) {\n  size_t asize = sizeof(*s);\n  asize += streamRadixTreeMemoryUsage(s->rax);\n\n  /* Now we have to add the listpacks. The last listpack is often non\n   * complete, so we estimate the size of the first N listpacks, and\n   * use the average to compute the size of the first N-1 listpacks, and\n   * finally add the real size of the last node. */\n  raxIterator ri;\n  raxStart(&ri, s->rax);\n  raxSeek(&ri, \"^\", NULL, 0);\n  size_t lpsize = 0, samples = 0;\n  while (raxNext(&ri)) {\n    uint8_t* lp = (uint8_t*)ri.data;\n    /* Use the allocated size, since we overprovision the node initially. */\n    lpsize += zmalloc_size(lp);\n    samples++;\n  }\n  if (s->rax->numele <= samples) {\n    asize += lpsize;\n  } else {\n    if (samples)\n      lpsize /= samples; /* Compute the average. */\n    asize += lpsize * (s->rax->numele - 1);\n    /* No need to check if seek succeeded, we enter this branch only\n     * if there are a few elements in the radix tree. */\n    raxSeek(&ri, \"$\", NULL, 0);\n    raxNext(&ri);\n    /* Use the allocated size, since we overprovision the node initially. */\n    asize += zmalloc_size(ri.data);\n  }\n  raxStop(&ri);\n\n  /* Consumer groups also have a non trivial memory overhead if there\n   * are many consumers and many groups, let's count at least the\n   * overhead of the pending entries in the groups and consumers\n   * PELs. */\n  if (s->cgroups) {\n    raxStart(&ri, s->cgroups);\n    raxSeek(&ri, \"^\", NULL, 0);\n    while (raxNext(&ri)) {\n      streamCG* cg = (streamCG*)ri.data;\n      asize += sizeof(*cg);\n      asize += streamRadixTreeMemoryUsage(cg->pel);\n      asize += sizeof(streamNACK) * raxSize(cg->pel);\n\n      /* For each consumer we also need to add the basic data\n       * structures and the PEL memory usage. */\n      raxIterator cri;\n      raxStart(&cri, cg->consumers);\n      raxSeek(&cri, \"^\", NULL, 0);\n      while (raxNext(&cri)) {\n        const streamConsumer* consumer = (const streamConsumer*)cri.data;\n        asize += sizeof(*consumer);\n        asize += sdslen(consumer->name);\n        asize += streamRadixTreeMemoryUsage(consumer->pel);\n        /* Don't count NACKs again, they are shared with the\n         * consumer group PEL. */\n      }\n      raxStop(&cri);\n    }\n    raxStop(&ri);\n  }\n  return asize;\n}\n\ninline void FreeObjHash(unsigned encoding, void* ptr) {\n  switch (encoding) {\n    case kEncodingStrMap2:\n      CompactObj::DeleteMR<StringMap>(ptr);\n      break;\n    case kEncodingListPack:\n      lpFree((uint8_t*)ptr);\n      break;\n    default:\n      LOG(FATAL) << \"Unknown hset encoding type \" << encoding;\n  }\n}\n\ninline void FreeObjZset(unsigned encoding, void* ptr) {\n  switch (encoding) {\n    case OBJ_ENCODING_SKIPLIST:\n      CompactObj::DeleteMR<detail::SortedMap>(ptr);\n      break;\n    case OBJ_ENCODING_LISTPACK:\n      zfree(ptr);\n      break;\n    default:\n      LOG(FATAL) << \"Unknown sorted set encoding\" << encoding;\n  }\n}\n\npair<void*, bool> DefragStrMap2(StringMap* sm, PageUsage* page_usage) {\n  bool realloced = false;\n\n  for (auto it = sm->begin(); it != sm->end(); ++it)\n    realloced |= it.ReallocIfNeeded(page_usage);\n\n  return {sm, realloced};\n}\n\npair<void*, bool> DefragListPack(uint8_t* lp, PageUsage* page_usage) {\n  if (!page_usage->IsPageForObjectUnderUtilized(lp))\n    return {lp, false};\n\n  size_t lp_bytes = lpBytes(lp);\n  uint8_t* replacement = lpNew(lpBytes(lp));\n  memcpy(replacement, lp, lp_bytes);\n  lpFree(lp);\n\n  return {replacement, true};\n}\n\npair<void*, bool> DefragIntSet(intset* is, PageUsage* page_usage) {\n  if (!page_usage->IsPageForObjectUnderUtilized(is))\n    return {is, false};\n\n  const size_t blob_len = intsetBlobLen(is);\n  intset* replacement = (intset*)zmalloc(blob_len);\n  memcpy(replacement, is, blob_len);\n\n  zfree(is);\n  return {replacement, true};\n}\n\npair<void*, bool> DefragSortedMap(detail::SortedMap* sm, PageUsage* page_usage) {\n  const bool reallocated = sm->DefragIfNeeded(page_usage);\n  return {sm, reallocated};\n}\n\npair<void*, bool> DefragStrSet(StringSet* ss, PageUsage* page_usage) {\n  bool realloced = false;\n\n  for (auto it = ss->begin(); it != ss->end(); ++it)\n    realloced |= it.ReallocIfNeeded(page_usage);\n\n  return {ss, realloced};\n}\n\n// Iterates over allocations of internal hash data structures and re-allocates\n// them if their pages are underutilized.\n// Returns pointer to new object ptr and whether any re-allocations happened.\npair<void*, bool> DefragHash(unsigned encoding, void* ptr, PageUsage* page_usage) {\n  switch (encoding) {\n    // Listpack is stored as a single contiguous array\n    case kEncodingListPack: {\n      return DefragListPack((uint8_t*)ptr, page_usage);\n    }\n\n    // StringMap supports re-allocation of it's internal nodes\n    case kEncodingStrMap2: {\n      return DefragStrMap2((StringMap*)ptr, page_usage);\n    }\n\n    default:\n      ABSL_UNREACHABLE();\n  }\n}\n\npair<void*, bool> DefragSet(unsigned encoding, void* ptr, PageUsage* page_usage) {\n  switch (encoding) {\n    // Int sets have flat storage\n    case kEncodingIntSet: {\n      return DefragIntSet((intset*)ptr, page_usage);\n    }\n\n    case kEncodingStrMap2: {\n      return DefragStrSet((StringSet*)ptr, page_usage);\n    }\n\n    default:\n      ABSL_UNREACHABLE();\n  }\n}\n\npair<void*, bool> DefragZSet(unsigned encoding, void* ptr, PageUsage* page_usage) {\n  switch (encoding) {\n    // Listpack is stored as a single contiguous array\n    case OBJ_ENCODING_LISTPACK: {\n      return DefragListPack((uint8_t*)ptr, page_usage);\n    }\n\n    // SKIPLIST really means ScoreMap\n    case OBJ_ENCODING_SKIPLIST: {\n      return DefragSortedMap((detail::SortedMap*)ptr, page_usage);\n    }\n\n    default:\n      ABSL_UNREACHABLE();\n  }\n}\n\npair<void*, bool> DefragList(unsigned encoding, void* ptr, PageUsage* page_usage) {\n  if (encoding == kEncodingListPack) {\n    return DefragListPack((uint8_t*)ptr, page_usage);\n  }\n  auto* qlist_ptr = static_cast<QList*>(ptr);\n  bool reallocated = qlist_ptr->DefragIfNeeded(page_usage);\n  return {ptr, reallocated};\n}\n\ninline void FreeObjStream(void* ptr) {\n  freeStream((stream*)ptr);\n}\n\ninline const uint8_t* to_byte(const void* s) {\n  return reinterpret_cast<const uint8_t*>(s);\n}\n\nstatic_assert(binpacked_len(7) == 7);\nstatic_assert(binpacked_len(8) == 7);\nstatic_assert(binpacked_len(15) == 14);\nstatic_assert(binpacked_len(16) == 14);\nstatic_assert(binpacked_len(17) == 15);\nstatic_assert(binpacked_len(18) == 16);\nstatic_assert(binpacked_len(19) == 17);\nstatic_assert(binpacked_len(20) == 18);\nstatic_assert(ascii_len(14) == 16);\nstatic_assert(ascii_len(15) == 17);\nstatic_assert(ascii_len(16) == 18);\nstatic_assert(ascii_len(17) == 19);\n\nstruct Huffman {\n  HuffmanEncoder encoder;\n  HuffmanDecoder decoder;\n};\n\nstruct TL {\n  MemoryResource* local_mr = PMR_NS::get_default_resource();\n  base::PODArray<uint8_t> tmp_buf;\n  string tmp_str;\n  size_t small_str_bytes;\n  Huffman huff_keys, huff_string_values;\n  uint64_t huff_encode_total = 0, huff_encode_success = 0;  // success/total metrics.\n\n  const HuffmanDecoder& GetHuffmanDecoder(uint8_t huffman_domain) const {\n    return huffman_domain == CompactObj::HUFF_KEYS ? huff_keys.decoder : huff_string_values.decoder;\n  }\n};\n\nthread_local TL tl;\n\nconstexpr bool kUseAsciiEncoding = true;\n\n}  // namespace\n\nstatic_assert(sizeof(CompactObj) == 18);\n\nnamespace detail {\n\nsize_t RobjWrapper::MallocUsed(bool slow) const {\n  if (!inner_obj_)\n    return 0;\n\n  switch (type_) {\n    case OBJ_STRING:\n      CHECK_EQ(OBJ_ENCODING_RAW, encoding_);\n      return InnerObjMallocUsed();\n    case OBJ_LIST:\n      if (encoding_ == kEncodingListPack) {\n        return zmalloc_usable_size(inner_obj_);\n      }\n      return ((QList*)inner_obj_)->MallocUsed(slow);\n    case OBJ_SET:\n      return MallocUsedSet(encoding_, inner_obj_);\n    case OBJ_HASH:\n      return MallocUsedHSet(encoding_, inner_obj_);\n    case OBJ_ZSET:\n      return MallocUsedZSet(encoding_, inner_obj_);\n    case OBJ_STREAM:\n      return slow ? MallocUsedStream((stream*)inner_obj_) : sz_;\n\n    default:\n      LOG(FATAL) << \"Not supported \" << type_;\n  }\n\n  return 0;\n}\n\nsize_t RobjWrapper::Size() const {\n  switch (type_) {\n    case OBJ_STRING:\n      DCHECK_EQ(OBJ_ENCODING_RAW, encoding_);\n      return sz_;\n    case OBJ_LIST:\n      if (encoding_ == kEncodingListPack) {\n        return lpLength((uint8_t*)inner_obj_);\n      }\n      return ((QList*)inner_obj_)->Size();\n    case OBJ_ZSET: {\n      switch (encoding_) {\n        case OBJ_ENCODING_SKIPLIST: {\n          SortedMap* ss = (SortedMap*)inner_obj_;\n          return ss->Size();\n        }\n        case OBJ_ENCODING_LISTPACK:\n          return lpLength((uint8_t*)inner_obj_) / 2;\n        default:\n          LOG(FATAL) << \"Unknown sorted set encoding\" << encoding_;\n      }\n    }\n    case OBJ_SET:\n      switch (encoding_) {\n        case kEncodingIntSet: {\n          intset* is = (intset*)inner_obj_;\n          return intsetLen(is);\n        }\n        case kEncodingStrMap2: {\n          StringSet* ss = (StringSet*)inner_obj_;\n          return ss->UpperBoundSize();\n        }\n        default:\n          LOG(FATAL) << \"Unexpected encoding \" << encoding_;\n      };\n    case OBJ_HASH:\n      switch (encoding_) {\n        case kEncodingListPack: {\n          uint8_t* lp = (uint8_t*)inner_obj_;\n          return lpLength(lp) / 2;\n        } break;\n\n        case kEncodingStrMap2: {\n          StringMap* sm = (StringMap*)inner_obj_;\n          return sm->UpperBoundSize();\n        }\n        default:\n          LOG(FATAL) << \"Unexpected encoding \" << encoding_;\n      }\n    case OBJ_STREAM:\n      // Size mean malloc bytes for streams\n      return sz_;\n    default:;\n  }\n  return 0;\n}\n\nvoid RobjWrapper::Free(MemoryResource* mr) {\n  if (!inner_obj_)\n    return;\n  DVLOG(1) << \"RobjWrapper::Free \" << inner_obj_;\n\n  switch (type_) {\n    case OBJ_STRING:\n      DVLOG(2) << \"Freeing string object\";\n      DCHECK_EQ(OBJ_ENCODING_RAW, encoding_);\n      mr->deallocate(inner_obj_, 0, 8);  // we do not keep the allocated size.\n      break;\n    case OBJ_LIST:\n      FreeList(encoding_, inner_obj_, mr);\n      break;\n    case OBJ_SET:\n      FreeObjSet(encoding_, inner_obj_, mr);\n      break;\n    case OBJ_ZSET:\n      FreeObjZset(encoding_, inner_obj_);\n      break;\n    case OBJ_HASH:\n      FreeObjHash(encoding_, inner_obj_);\n      break;\n    case OBJ_MODULE:\n      LOG(FATAL) << \"Unsupported OBJ_MODULE type\";\n      break;\n    case OBJ_STREAM:\n      FreeObjStream(inner_obj_);\n      break;\n    default:\n      LOG(FATAL) << \"Unknown object type\";\n      break;\n  }\n  Set(nullptr, 0);\n}\n\nuint64_t RobjWrapper::HashCode() const {\n  switch (type_) {\n    case OBJ_STRING:\n      DCHECK_EQ(OBJ_ENCODING_RAW, encoding());\n      {\n        auto str = AsView();\n        return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed);\n      }\n      break;\n    default:\n      LOG(FATAL) << \"Unsupported type for hashcode \" << type_;\n  }\n  return 0;\n}\n\nbool RobjWrapper::Equal(const RobjWrapper& ow) const {\n  if (ow.type_ != type_ || ow.encoding_ != encoding_)\n    return false;\n\n  if (type_ == OBJ_STRING) {\n    DCHECK_EQ(OBJ_ENCODING_RAW, encoding());\n    return AsView() == ow.AsView();\n  }\n  LOG(FATAL) << \"Unsupported type \" << type_;\n  return false;\n}\n\nbool RobjWrapper::Equal(string_view sv) const {\n  if (type() != OBJ_STRING)\n    return false;\n\n  DCHECK_EQ(OBJ_ENCODING_RAW, encoding());\n  return AsView() == sv;\n}\n\nvoid RobjWrapper::SetString(string_view s, MemoryResource* mr) {\n  type_ = OBJ_STRING;\n  encoding_ = OBJ_ENCODING_RAW;\n\n  if (s.size() > sz_) {\n    size_t cur_cap = InnerObjMallocUsed();\n    if (s.size() > cur_cap) {\n      MakeInnerRoom(cur_cap, s.size(), mr);\n    }\n    memcpy(inner_obj_, s.data(), s.size());\n    sz_ = s.size();\n  }\n}\n\nvoid RobjWrapper::ReserveString(size_t size, MemoryResource* mr) {\n  CHECK_EQ(inner_obj_, nullptr);\n  type_ = OBJ_STRING;\n  encoding_ = OBJ_ENCODING_RAW;\n  MakeInnerRoom(0, size, mr);\n}\n\nvoid RobjWrapper::AppendString(string_view s, MemoryResource* mr) {\n  size_t cur_cap = InnerObjMallocUsed();\n  CHECK(cur_cap >= sz_ + s.size()) << cur_cap << \" \" << sz_ << \" \" << s.size();\n  memcpy(reinterpret_cast<uint8_t*>(inner_obj_) + sz_, s.data(), s.size());\n  sz_ += s.size();\n}\n\nvoid RobjWrapper::SetSize(uint64_t size) {\n  sz_ = size;\n}\n\nbool RobjWrapper::DefragIfNeeded(PageUsage* page_usage) {\n  auto do_defrag = [this, &page_usage](auto defrag_fun) mutable {\n    auto [new_ptr, realloced] = defrag_fun(encoding_, inner_obj_, page_usage);\n    inner_obj_ = new_ptr;\n    return realloced;\n  };\n\n  if (type() == OBJ_STRING) {\n    if (page_usage->IsPageForObjectUnderUtilized(inner_obj())) {\n      ReallocateString(tl.local_mr);\n      return true;\n    }\n  } else if (type() == OBJ_HASH) {\n    return do_defrag(DefragHash);\n  } else if (type() == OBJ_SET) {\n    return do_defrag(DefragSet);\n  } else if (type() == OBJ_ZSET) {\n    return do_defrag(DefragZSet);\n  } else if (type() == OBJ_LIST) {\n    return do_defrag(DefragList);\n  }\n\n  page_usage->RecordNotSupported();\n  return false;\n}\n\nvoid RobjWrapper::ReallocateString(MemoryResource* mr) {\n  DCHECK_EQ(type(), OBJ_STRING);\n  void* old_ptr = inner_obj_;\n  inner_obj_ = mr->allocate(sz_, kAlignSize);\n  memcpy(inner_obj_, old_ptr, sz_);\n  mr->deallocate(old_ptr, 0, kAlignSize);\n}\n\nvoid RobjWrapper::Init(unsigned type, unsigned encoding, void* inner) {\n  type_ = type;\n  encoding_ = encoding;\n  Set(inner, 0);\n}\n\ninline size_t RobjWrapper::InnerObjMallocUsed() const {\n  return zmalloc_size(inner_obj_);\n}\n\nvoid RobjWrapper::MakeInnerRoom(size_t current_cap, size_t desired, MemoryResource* mr) {\n  if (current_cap * 2 > desired) {\n    if (desired < SDS_MAX_PREALLOC)\n      desired *= 2;\n    else\n      desired += SDS_MAX_PREALLOC;\n  }\n\n  void* newp = mr->allocate(desired, kAlignSize);\n  if (sz_) {\n    memcpy(newp, inner_obj_, sz_);\n  }\n\n  if (current_cap) {\n    mr->deallocate(inner_obj_, current_cap, kAlignSize);\n  }\n  inner_obj_ = newp;\n}\n\n}  // namespace detail\n\nuint32_t JsonEnconding() {\n  thread_local uint32_t json_enc =\n      absl::GetFlag(FLAGS_experimental_flat_json) ? kEncodingJsonFlat : kEncodingJsonCons;\n  return json_enc;\n}\n\nusing namespace std;\n\nauto CompactObj::GetStatsThreadLocal() -> Stats {\n  Stats res;\n  res.small_string_bytes = tl.small_str_bytes;\n  res.huff_encode_total = tl.huff_encode_total;\n  res.huff_encode_success = tl.huff_encode_success;\n  return res;\n}\n\nvoid CompactObj::InitThreadLocal(MemoryResource* mr) {\n  tl.local_mr = mr;\n  tl.tmp_buf = base::PODArray<uint8_t>{mr};\n}\n\nbool CompactObj::InitHuffmanThreadLocal(HuffmanDomain domain, std::string_view hufftable) {\n  string err_msg;\n\n  Huffman* huffman = nullptr;\n  switch (domain) {\n    case HUFF_KEYS:\n      huffman = &tl.huff_keys;\n      break;\n    case HUFF_STRING_VALUES:\n      huffman = &tl.huff_string_values;\n      break;\n  }\n\n  // We do not allow overriding the existing huffman table once it is set.\n  if (huffman->encoder.valid()) {\n    return false;\n  }\n\n  if (!huffman->encoder.Load(hufftable, &err_msg)) {\n    LOG(DFATAL) << \"Failed to load huffman table: \" << err_msg;\n    return false;\n  }\n\n  if (!huffman->decoder.Load(hufftable, &err_msg)) {\n    LOG(DFATAL) << \"Failed to load huffman table: \" << err_msg;\n    return false;\n  }\n  return true;\n}\n\nCompactObj::~CompactObj() {\n  if (HasAllocated()) {\n    Free();\n  }\n}\n\nCompactObj& CompactObj::operator=(CompactObj&& o) noexcept {\n  DCHECK(&o != this);\n  DCHECK_EQ(is_key_, o.is_key_);\n\n  SetMeta(o.taglen_, o.mask_);  // frees own previous resources\n  encoding_ = o.encoding_;\n  memcpy(&u_, &o.u_, sizeof(u_));\n\n  o.taglen_ = 0;  // forget all data\n  o.encoding_ = 0;\n  o.mask_ = 0;\n  return *this;\n}\n\nsize_t CompactObj::Size() const {\n  auto decoded_str_size = [this](size_t raw_size, uint8_t first_byte) {\n    DCHECK_EQ(ObjType(), OBJ_STRING);\n    return GetStrEncoding().DecodedSize(raw_size, first_byte);\n  };\n\n  if (IsInline())\n    return decoded_str_size(taglen_, u_.inline_str[0]);\n\n  switch (taglen_) {\n    case SMALL_TAG:\n      return decoded_str_size(u_.small_str.size(), u_.small_str.first_byte());\n    case EXTERNAL_TAG:\n      if (ObjType() == OBJ_STRING)\n        return decoded_str_size(u_.ext_ptr.serialized_size, GetFirstByte());\n      else\n        return u_.ext_ptr.serialized_size;\n    case ROBJ_TAG:\n      if (size_t size = u_.r_obj.Size(); u_.r_obj.type() != OBJ_STRING)\n        return size;\n      else\n        return decoded_str_size(size, *(uint8_t*)u_.r_obj.inner_obj());\n    case INT_TAG:\n      return absl::AlphaNum(u_.ival).size();\n    case SDS_TTL_TAG:\n      return decoded_str_size(sdslen(u_.sds_ttl.sds_ptr), u_.sds_ttl.sds_ptr[0]);\n    case JSON_TAG:\n      if (JsonEnconding() == kEncodingJsonFlat)\n        return u_.json_obj.flat.json_len;\n      else\n        return u_.json_obj.cons.json_ptr->size();\n    case SBF_TAG:\n      return u_.sbf->current_size();\n    case CMS_TAG:\n      return 0;\n    case TOPK_TAG:\n      return u_.topk->Size();\n    default:\n      LOG(DFATAL) << \"Should not reach \" << int(taglen_);\n      return 0;\n  }\n}\n\nuint64_t CompactObj::HashCode() const {\n  DCHECK(taglen_ != JSON_TAG) << \"JSON type cannot be used for keys!\";\n\n  if (encoding_ == NONE_ENC) {\n    if (IsInline()) {\n      return XXH3_64bits_withSeed(u_.inline_str, taglen_, kHashSeed);\n    }\n\n    switch (taglen_) {\n      case SMALL_TAG:\n        return u_.small_str.HashCode();\n      case ROBJ_TAG:\n        return u_.r_obj.HashCode();\n      case INT_TAG: {\n        absl::AlphaNum an(u_.ival);\n        return XXH3_64bits_withSeed(an.data(), an.size(), kHashSeed);\n      }\n      case SDS_TTL_TAG:\n        return XXH3_64bits_withSeed(u_.sds_ttl.sds_ptr, sdslen(u_.sds_ttl.sds_ptr), kHashSeed);\n    }\n  }\n\n  DCHECK(encoding_);\n\n  if (IsInline()) {\n    // Buffer must accommodate maximum decompressed size from inline storage\n    // Highly compressible data can achieve ~8x compression (e.g., repeated character)\n    // kInlineLen (16 bytes) compressed -> up to 128 bytes decompressed\n    char buf[kInlineLen * 8];\n    size_t decoded_len = GetStrEncoding().Decode(string_view{u_.inline_str, taglen_}, buf);\n    return XXH3_64bits_withSeed(buf, decoded_len, kHashSeed);\n  }\n\n  string_view sv = GetSlice(&tl.tmp_str);\n  return XXH3_64bits_withSeed(sv.data(), sv.size(), kHashSeed);\n}\n\nuint64_t CompactObj::HashCode(string_view str) {\n  return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed);\n}\n\nCompactObjType CompactObj::ObjType() const {\n  if (IsInline() || taglen_ == INT_TAG || taglen_ == SMALL_TAG || taglen_ == SDS_TTL_TAG)\n    return OBJ_STRING;\n\n  if (taglen_ == EXTERNAL_TAG) {\n    switch (static_cast<ExternalRep>(u_.ext_ptr.representation)) {\n      case ExternalRep::STRING:\n        return OBJ_STRING;\n      case ExternalRep::SERIALIZED_MAP:\n        return OBJ_HASH;\n    };\n  }\n\n  if (taglen_ == ROBJ_TAG)\n    return u_.r_obj.type();\n\n  if (taglen_ == JSON_TAG) {\n    return OBJ_JSON;\n  }\n\n  if (taglen_ == SBF_TAG) {\n    return OBJ_SBF;\n  }\n\n  if (taglen_ == CMS_TAG) {\n    return OBJ_CMS;\n  }\n\n  if (taglen_ == TOPK_TAG) {\n    return OBJ_TOPK;\n  }\n\n  LOG(FATAL) << \"TBD \" << int(taglen_);\n  return kInvalidCompactObjType;\n}\n\nunsigned CompactObj::Encoding() const {\n  switch (taglen_) {\n    case ROBJ_TAG:\n      return u_.r_obj.encoding();\n    case INT_TAG:\n      return OBJ_ENCODING_INT;\n    default:\n      return OBJ_ENCODING_RAW;\n  }\n}\n\nvoid CompactObj::InitRobj(CompactObjType type, unsigned encoding, void* obj) {\n  DCHECK_NE(type, OBJ_STRING);\n  SetMeta(ROBJ_TAG, mask_);\n  u_.r_obj.Init(type, encoding, obj);\n}\n\nvoid CompactObj::SetInt(int64_t val) {\n  DCHECK(!IsExternal());\n\n  if (INT_TAG != taglen_) {\n    SetMeta(INT_TAG, mask_);\n    encoding_ = NONE_ENC;\n  }\n\n  u_.ival = val;\n}\n\nstd::optional<int64_t> CompactObj::TryGetInt() const {\n  if (taglen_ != INT_TAG)\n    return std::nullopt;\n  int64_t val = u_.ival;\n  return val;\n}\n\nauto CompactObj::GetJson() const -> JsonType* {\n  if (ObjType() == OBJ_JSON) {\n    DCHECK_EQ(JsonEnconding(), kEncodingJsonCons);\n    return u_.json_obj.cons.json_ptr;\n  }\n  return nullptr;\n}\n\nvoid CompactObj::SetJson(JsonType&& j) {\n  if (taglen_ == JSON_TAG && JsonEnconding() == kEncodingJsonCons) {\n    DCHECK(u_.json_obj.cons.json_ptr != nullptr);  // must be allocated\n    u_.json_obj.cons.json_ptr->swap(j);\n    DCHECK(jsoncons::is_trivial_storage(u_.json_obj.cons.json_ptr->storage_kind()) ||\n           u_.json_obj.cons.json_ptr->get_allocator().resource() == tl.local_mr);\n\n    // We do not set bytes_used as this is needed. Consider the two following cases:\n    // 1. old json contains 50 bytes. The delta for new one is 50, so the total bytes\n    // the new json occupies is 100.\n    // 2. old json contains 100 bytes. The delta for new one is -50, so the total bytes\n    // the new json occupies is 50.\n    // Both of the cases are covered in SetJsonSize and JsonMemTracker. See below.\n    return;\n  }\n\n  SetMeta(JSON_TAG);\n  u_.json_obj.cons.json_ptr = AllocateMR<JsonType>(std::move(j));\n\n  // With trivial storage json_ptr->get_allocator() throws an exception.\n  DCHECK(jsoncons::is_trivial_storage(u_.json_obj.cons.json_ptr->storage_kind()) ||\n         u_.json_obj.cons.json_ptr->get_allocator().resource() == tl.local_mr);\n  u_.json_obj.cons.bytes_used = 0;\n}\n\nvoid CompactObj::SetJsonSize(int64_t size) {\n  if (taglen_ == JSON_TAG && JsonEnconding() == kEncodingJsonCons) {\n    // JSON.SET or if mem hasn't changed from a JSON op then we just update.\n    int64_t result = static_cast<int64_t>(u_.json_obj.cons.bytes_used) + size;\n    if (result < 1) {\n      LOG_EVERY_T(ERROR, 20) << \"JSON size underflow: \" << u_.json_obj.cons.bytes_used << \" + \"\n                             << size << \" = \" << result;\n      u_.json_obj.cons.bytes_used = 1;\n    } else {\n      u_.json_obj.cons.bytes_used = static_cast<size_t>(result);\n    }\n  }\n}\n\nvoid CompactObj::AddStreamSize(int64_t size) {\n  if (size < 0) {\n    // We might have a negative size. For example, if we remove a consumer,\n    // the tracker will report a negative net (since we deallocated),\n    // so the object now consumes less memory than it did before. This DCHECK\n    // is for fanity and to catch any potential issues with our tracking approach.\n    DCHECK(static_cast<int64_t>(u_.r_obj.Size()) >= size);\n  }\n  u_.r_obj.SetSize((u_.r_obj.Size() + size));\n}\n\nvoid CompactObj::SetJson(const uint8_t* buf, size_t len) {\n  SetMeta(JSON_TAG);\n  u_.json_obj.flat.flat_ptr = (uint8_t*)tl.local_mr->allocate(len, kAlignSize);\n  memcpy(u_.json_obj.flat.flat_ptr, buf, len);\n  u_.json_obj.flat.json_len = len;\n}\n\nvoid CompactObj::SetSBF(uint64_t initial_capacity, double fp_prob, double grow_factor) {\n  if (taglen_ == SBF_TAG) {  // already json\n    *u_.sbf = SBF(initial_capacity, fp_prob, grow_factor, tl.local_mr);\n  } else {\n    SetMeta(SBF_TAG);\n    u_.sbf = AllocateMR<SBF>(initial_capacity, fp_prob, grow_factor, tl.local_mr);\n  }\n}\n\nSBF* CompactObj::GetSBF() const {\n  DCHECK_EQ(SBF_TAG, taglen_);\n  return u_.sbf;\n}\n\nvoid CompactObj::SetCMS(uint32_t width, uint32_t depth) {\n  if (taglen_ == CMS_TAG) {\n    *u_.cms = CMS(width, depth, tl.local_mr);\n  } else {\n    SetMeta(CMS_TAG);\n    u_.cms = AllocateMR<CMS>(width, depth, tl.local_mr);\n  }\n}\n\nCMS* CompactObj::GetCMS() const {\n  DCHECK_EQ(CMS_TAG, taglen_);\n  return u_.cms;\n}\n\nvoid CompactObj::SetTOPK(uint32_t k, uint32_t width, uint32_t depth, double decay) {\n  if (taglen_ == TOPK_TAG) {\n    *u_.topk = TOPK(memory_resource(), k, width, depth, decay);\n  } else {\n    SetMeta(TOPK_TAG);\n    u_.topk = AllocateMR<TOPK>(memory_resource(), k, width, depth, decay);\n  }\n}\n\nTOPK* CompactObj::GetTOPK() const {\n  DCHECK_EQ(TOPK_TAG, taglen_);\n  return u_.topk;\n}\n\nvoid CompactObj::SetString(std::string_view str) {\n  CHECK(!IsExternal());\n  encoding_ = NONE_ENC;\n\n  // Trying auto-detection heuristics first.\n  if (str.size() <= 20) {\n    long long ival;\n    static_assert(sizeof(long long) == 8);\n\n    // We use redis string2ll to be compatible with Redis.\n    if (string2ll(str.data(), str.size(), &ival)) {\n      SetMeta(INT_TAG, mask_);\n      u_.ival = ival;\n\n      return;\n    }\n\n    if (str.size() <= kInlineLen) {\n      SetMeta(str.size(), mask_);\n      if (!str.empty())\n        memcpy(u_.inline_str, str.data(), str.size());\n      return;\n    }\n  }\n\n  EncodeString(str);\n}\n\nvoid CompactObj::ReserveString(size_t size) {\n  encoding_ = NONE_ENC;\n  SetMeta(ROBJ_TAG, mask_);\n\n  u_.r_obj.ReserveString(size, tl.local_mr);\n}\n\nvoid CompactObj::AppendString(std::string_view str) {\n  u_.r_obj.AppendString(str, tl.local_mr);\n}\n\nstring_view CompactObj::GetSlice(string* scratch) const {\n  CHECK(!IsExternal());\n\n  if (encoding_) {\n    GetString(scratch);\n    return *scratch;\n  }\n\n  if (IsInline()) {\n    return string_view{u_.inline_str, taglen_};\n  }\n\n  if (taglen_ == INT_TAG) {\n    absl::AlphaNum an(u_.ival);\n    scratch->assign(an.Piece());\n\n    return *scratch;\n  }\n\n  // no encoding.\n  if (taglen_ == ROBJ_TAG) {\n    CHECK_EQ(OBJ_STRING, u_.r_obj.type());\n    DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());\n    return u_.r_obj.AsView();\n  }\n\n  if (taglen_ == SMALL_TAG) {\n    u_.small_str.Get(scratch);\n    return *scratch;\n  }\n\n  if (taglen_ == SDS_TTL_TAG) {\n    return u_.sds_ttl.view();\n  }\n\n  LOG(FATAL) << \"Bad tag \" << int(taglen_);\n\n  return string_view{};\n}\n\nbool CompactObj::DefragIfNeeded(PageUsage* page_usage) {\n  static const bool disable_json_defragmentation =\n      absl::GetFlag(FLAGS_disable_json_defragmentation);\n\n  if (OmitDefrag()) {\n    page_usage->RecordNotRequired();\n    return false;\n  }\n\n  switch (taglen_) {\n    case ROBJ_TAG:\n      // currently only these object types are supported for this operation\n      if (u_.r_obj.inner_obj() != nullptr) {\n        return u_.r_obj.DefragIfNeeded(page_usage);\n      }\n      return false;\n    case SMALL_TAG:\n      return u_.small_str.DefragIfNeeded(page_usage);\n    case JSON_TAG:\n      if (disable_json_defragmentation) {\n        return false;\n      }\n      return u_.json_obj.DefragIfNeeded(page_usage);\n    case SDS_TTL_TAG:\n      if (page_usage->IsPageForObjectUnderUtilized(u_.sds_ttl.sds_ptr)) {\n        size_t len = sdslen(u_.sds_ttl.sds_ptr);\n        char* new_sds = sdsnewlen(u_.sds_ttl.sds_ptr, len);\n        sdsfree(u_.sds_ttl.sds_ptr);\n        u_.sds_ttl.sds_ptr = new_sds;\n        return true;\n      }\n      return false;\n    case INT_TAG:\n      page_usage->RecordNotRequired();\n      // this is not relevant in this case\n      return false;\n    case EXTERNAL_TAG:\n      page_usage->RecordNotRequired();\n      return false;\n    default:\n      page_usage->RecordNotRequired();\n      // This is the case when the object is at inline_str\n      return false;\n  }\n}\n\nbool CompactObj::HasAllocated() const {\n  if (IsRef() || taglen_ == INT_TAG || IsInline() || taglen_ == EXTERNAL_TAG ||\n      (taglen_ == ROBJ_TAG && u_.r_obj.inner_obj() == nullptr))\n    return false;\n\n  DCHECK(taglen_ == ROBJ_TAG || taglen_ == SMALL_TAG || taglen_ == JSON_TAG || taglen_ == SBF_TAG ||\n         taglen_ == CMS_TAG || taglen_ == SDS_TTL_TAG || taglen_ == TOPK_TAG);\n  return true;\n}\n\nbool CompactObj::TagAllowsEmptyValue() const {\n  const auto type = ObjType();\n  return type == OBJ_JSON || type == OBJ_STREAM || type == OBJ_STRING || type == OBJ_SBF ||\n         type == OBJ_CMS || type == OBJ_TOPK || type == OBJ_SET;\n}\n\nvoid __attribute__((noinline)) CompactObj::GetString(string* res) const {\n  res->resize(Size());\n  GetString(res->data());\n}\n\nvoid CompactObj::GetString(char* dest) const {\n  CHECK(!IsExternal());\n\n  if (IsInline()) {\n    GetStrEncoding().Decode({u_.inline_str, taglen_}, dest);\n    return;\n  }\n\n  if (taglen_ == INT_TAG) {\n    absl::AlphaNum an(u_.ival);\n    memcpy(dest, an.data(), an.size());\n    return;\n  }\n\n  if (encoding_) {\n    StrEncoding str_encoding = GetStrEncoding();\n    string_view decode_blob = GetEncodedBlob(str_encoding, dest);\n\n    str_encoding.Decode(decode_blob, dest);\n    return;\n  }\n\n  // no encoding.\n  if (taglen_ == ROBJ_TAG) {\n    CHECK_EQ(OBJ_STRING, u_.r_obj.type());\n    DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());\n    memcpy(dest, u_.r_obj.inner_obj(), u_.r_obj.Size());\n    return;\n  }\n\n  if (taglen_ == SDS_TTL_TAG) {\n    memcpy(dest, u_.sds_ttl.sds_ptr, sdslen(u_.sds_ttl.sds_ptr));\n    return;\n  }\n\n  if (taglen_ == SMALL_TAG)\n    return u_.small_str.Get(dest);\n\n  LOG(FATAL) << \"Bad tag \" << int(taglen_);\n}\n\nvoid CompactObj::SetExternal(size_t offset, uint32_t sz, ExternalRep rep) {\n  uint8_t first_byte = 0;\n  if (encoding_ == HUFFMAN_ENC) {\n    CHECK(rep == ExternalRep::STRING);\n    first_byte = GetFirstByte();\n  }\n  SetMeta(EXTERNAL_TAG, mask_);\n\n  u_.ext_ptr.is_cool = 0;\n  u_.ext_ptr.representation = static_cast<uint8_t>(rep);\n  u_.ext_ptr.first_byte = first_byte;\n  u_.ext_ptr.page_offset = offset % 4096;\n  u_.ext_ptr.serialized_size = sz;\n  u_.ext_ptr.offload.page_index = offset / 4096;\n}\n\nCompactObj::ExternalRep CompactObj::GetExternalRep() const {\n  DCHECK(IsExternal());\n  return static_cast<CompactObj::ExternalRep>(u_.ext_ptr.representation);\n}\n\nvoid CompactObj::SetCool(size_t offset, uint32_t sz, ExternalRep rep,\n                         tiering::TieredCoolRecord* record) {\n  encoding_ = record->value.encoding_;\n  SetMeta(EXTERNAL_TAG, record->value.mask_);\n\n  u_.ext_ptr.is_cool = 1;\n  u_.ext_ptr.representation = static_cast<uint8_t>(rep);\n  u_.ext_ptr.page_offset = offset % 4096;\n  u_.ext_ptr.serialized_size = sz;\n  u_.ext_ptr.cool_record = record;\n}\n\nauto CompactObj::GetCool() const -> CoolItem {\n  DCHECK(IsExternal() && u_.ext_ptr.is_cool);\n\n  CoolItem res;\n  res.page_offset = u_.ext_ptr.page_offset;\n  res.serialized_size = u_.ext_ptr.serialized_size;\n  res.record = u_.ext_ptr.cool_record;\n  return res;\n}\n\nvoid CompactObj::Freeze(size_t offset, size_t sz) {\n  SetExternal(offset, sz, GetExternalRep());\n}\n\nstd::pair<size_t, size_t> CompactObj::GetExternalSlice() const {\n  DCHECK_EQ(EXTERNAL_TAG, taglen_);\n  auto& ext = u_.ext_ptr;\n  size_t offset = ext.page_offset;\n  offset += size_t(ext.is_cool ? ext.cool_record->page_index : ext.offload.page_index) * 4096;\n  return {offset, size_t(u_.ext_ptr.serialized_size)};\n}\n\nstring_view CompactObj::GetEncodedBlob(StrEncoding str_encoding, char* opt_dest) const {\n  if (taglen_ == ROBJ_TAG) {\n    CHECK_EQ(OBJ_STRING, u_.r_obj.type());\n    DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());\n    return u_.r_obj.AsView();\n  } else if (IsInline()) {\n    return {u_.inline_str, taglen_};\n  } else if (taglen_ == SDS_TTL_TAG) {\n    return u_.sds_ttl.view();\n  }\n\n  CHECK_EQ(taglen_, SMALL_TAG);\n  auto& ss = u_.small_str;\n  char* copy_dest = nullptr;\n  if (opt_dest && str_encoding.enc_ != HUFFMAN_ENC) {\n    // Write to rightmost location of dest buffer to leave some bytes for inline unpacking\n    size_t decoded_len = str_encoding.DecodedSize(ss.size(), ss.first_byte());\n    copy_dest = opt_dest + (decoded_len - ss.size());\n  } else {\n    tl.tmp_buf.resize(ss.size());\n    copy_dest = reinterpret_cast<char*>(tl.tmp_buf.data());\n  }\n  ss.Get(copy_dest);\n  return {copy_dest, ss.size()};\n}\n\nvoid CompactObj::Materialize(std::string_view blob, bool is_raw) {\n  CHECK(IsExternal()) << int(taglen_);\n  DCHECK_EQ(u_.ext_ptr.representation, static_cast<uint8_t>(ExternalRep::STRING));\n  DCHECK_GT(blob.size(), kInlineLen);  // There are no mutable commands that shrink strings\n\n  if (is_raw) {\n    if (SmallString::CanAllocate(blob.size())) {\n      SetMeta(SMALL_TAG, mask_);\n      tl.small_str_bytes += u_.small_str.Assign(blob);\n    } else {\n      SetMeta(ROBJ_TAG, mask_);\n      u_.r_obj.SetString(blob, tl.local_mr);\n    }\n  } else {\n    encoding_ = NONE_ENC;  // reset encoding\n    EncodeString(blob);\n  }\n}\n\nvoid CompactObj::Reset() {\n  if (HasAllocated()) {\n    Free();\n  }\n  taglen_ = 0;\n  encoding_ = 0;\n  mask_ = 0;\n}\n\nuint8_t CompactObj::GetFirstByte() const {\n  DCHECK_EQ(ObjType(), OBJ_STRING);\n\n  if (IsInline()) {\n    return u_.inline_str[0];\n  }\n\n  if (taglen_ == ROBJ_TAG) {\n    CHECK_EQ(OBJ_STRING, u_.r_obj.type());\n    DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());\n    return *(uint8_t*)u_.r_obj.inner_obj();\n  }\n\n  if (taglen_ == SMALL_TAG) {\n    return u_.small_str.first_byte();\n  }\n\n  if (taglen_ == SDS_TTL_TAG) {\n    return u_.sds_ttl.sds_ptr[0];\n  }\n\n  if (taglen_ == EXTERNAL_TAG) {\n    if (u_.ext_ptr.is_cool) {\n      const CompactObj& cooled_obj = u_.ext_ptr.cool_record->value;\n      return cooled_obj.GetFirstByte();\n    }\n    return u_.ext_ptr.first_byte;\n  }\n\n  LOG(DFATAL) << \"Bad tag \" << int(taglen_);\n  return 0;\n}\n\nbool CompactObj::GetByteAtIndex(size_t idx, uint8_t* res) const {\n  CHECK(!IsExternal());\n  DCHECK_EQ(ObjType(), OBJ_STRING);\n\n  if (encoding_) {\n    StrEncoding str_encoding = GetStrEncoding();\n    string_view decode_blob = GetEncodedBlob(str_encoding, nullptr);\n\n    if (!str_encoding.DecodeByte(decode_blob, idx, res)) {\n      VLOG(1) << \"Offset out of bounds for encoded string: \" << idx\n              << \" >= \" << str_encoding.DecodedSize(decode_blob.size(), decode_blob[0]);\n      *res = 0;\n      return false;\n    }\n    return true;\n  }\n\n  // No encoding, we can directly access the byte at index.\n  string_view sv = GetSlice(&tl.tmp_str);\n  if (idx >= sv.size()) {\n    VLOG(1) << \"Offset out of bounds: \" << idx << \" >= \" << sv.size();\n    *res = 0;\n    return false;\n  }\n  *res = sv[idx];\n  return true;\n}\n\nstd::pair<bool, bool> CompactObj::SetByteAtIndex(size_t idx, uint8_t val) {\n  CHECK(!IsExternal());\n  DCHECK_EQ(ObjType(), OBJ_STRING);\n\n  // Inline string without encoding: modify directly.\n  if (IsInline() && !encoding_) {\n    if (idx >= taglen_) {\n      VLOG(1) << \"Offset out of bounds for inline string: \" << idx << \" >= \" << int(taglen_);\n      return {false, false};\n    }\n    u_.inline_str[idx] = val;\n    return {true, true};\n  }\n\n  // SDS_TTL_TAG raw string without encoding: modify directly.\n  if (taglen_ == SDS_TTL_TAG && !encoding_) {\n    size_t len = sdslen(u_.sds_ttl.sds_ptr);\n    if (idx >= len) {\n      return {false, false};\n    }\n    u_.sds_ttl.sds_ptr[idx] = val;\n    return {true, true};\n  }\n\n  // ROBJ_TAG raw string without encoding: modify the underlying buffer directly.\n  if (taglen_ == ROBJ_TAG && !encoding_) {\n    CHECK_EQ(OBJ_STRING, u_.r_obj.type());\n    DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());\n    if (idx >= u_.r_obj.Size()) {\n      VLOG(1) << \"Offset out of bounds for raw string: \" << idx << \" >= \" << u_.r_obj.Size();\n      return {false, false};\n    }\n    reinterpret_cast<char*>(u_.r_obj.inner_obj())[idx] = val;\n    return {true, true};\n  }\n\n  // For ASCII encoded ROBJ strings we can modify the underlying buffer directly.\n  if (encoding_ && (encoding_ == ASCII1_ENC || encoding_ == ASCII2_ENC) && taglen_ == ROBJ_TAG &&\n      absl::ascii_isascii(val)) {\n    DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());\n    auto* buf = reinterpret_cast<uint8_t*>(u_.r_obj.inner_obj());\n    size_t decoded_len = GetStrEncoding().DecodedSize(u_.r_obj.Size(), buf[0]);\n    if (idx >= decoded_len) {\n      VLOG(1) << \"Offset out of bounds for ASCII encoded string: \" << idx << \" >= \" << decoded_len;\n      return {false, false};\n    }\n    detail::ascii_pack_byte(buf, decoded_len, idx, val);\n    return {true, true};\n  }\n\n  // For other encoded strings, INT_TAG, SMALL_TAG we need to decode, modify, and re-encode.\n  string str;\n  GetString(&str);\n  if (idx >= str.size()) {\n    VLOG(1) << \"Offset out of bounds: \" << idx << \" >= \" << str.size();\n    return {false, false};\n  }\n  str[idx] = val;\n  SetString(str);\n  return {true, false};\n}\n\n// Frees all resources if owns.\nvoid CompactObj::Free() {\n  DCHECK(HasAllocated());\n\n  if (taglen_ == ROBJ_TAG) {\n    u_.r_obj.Free(tl.local_mr);\n  } else if (taglen_ == SMALL_TAG) {\n    tl.small_str_bytes -= u_.small_str.MallocUsed();\n    u_.small_str.Free();\n  } else if (taglen_ == JSON_TAG) {\n    DVLOG(1) << \"Freeing JSON object\";\n    if (JsonEnconding() == kEncodingJsonCons) {\n      DeleteMR<JsonType>(u_.json_obj.cons.json_ptr);\n    } else {\n      tl.local_mr->deallocate(u_.json_obj.flat.flat_ptr, u_.json_obj.flat.json_len, kAlignSize);\n    }\n  } else if (taglen_ == SBF_TAG) {\n    DeleteMR<SBF>(u_.sbf);\n  } else if (taglen_ == TOPK_TAG) {\n    DeleteMR<TOPK>(u_.topk);\n  } else if (taglen_ == CMS_TAG) {\n    DeleteMR<CMS>(u_.cms);\n  } else if (taglen_ == SDS_TTL_TAG) {\n    sdsfree(u_.sds_ttl.sds_ptr);\n  } else {\n    LOG(FATAL) << \"Unsupported tag \" << int(taglen_);\n  }\n\n  memset(u_.inline_str, 0, kInlineLen);\n}\n\nsize_t CompactObj::MallocUsed(bool slow) const {\n  if (!HasAllocated())\n    return 0;\n\n  if (taglen_ == ROBJ_TAG) {\n    return u_.r_obj.MallocUsed(slow);\n  }\n\n  if (taglen_ == JSON_TAG) {\n    // TODO fix this once we fully support flat json\n    // This is here because accessing a union field that is not active\n    // is UB.\n    if (JsonEnconding() == kEncodingJsonFlat) {\n      return 0;\n    }\n    return u_.json_obj.cons.bytes_used;\n  }\n\n  if (taglen_ == SMALL_TAG) {\n    return u_.small_str.MallocUsed();\n  }\n\n  if (taglen_ == SBF_TAG) {\n    return u_.sbf->MallocUsed();\n  }\n\n  if (taglen_ == CMS_TAG) {\n    return u_.cms->MallocUsed();\n  }\n\n  if (taglen_ == SDS_TTL_TAG) {\n    return sdsAllocSize(u_.sds_ttl.sds_ptr);\n  }\n\n  if (taglen_ == TOPK_TAG) {\n    return u_.topk->MallocUsed();\n  }\n\n  LOG(DFATAL) << \"should not reach\";\n  return 0;\n}\n\n// TODO: we need this operator ONLY because we search in prime-table based on the ExpireKey\n// which is a reference to the CompactKey. Therefore operator== currently works\n// specifically for this particular use-case.\n// So once we remove the expire table, we can remove this operator too.\n// In addition - we MUST remove AsRef/IsRef api as well as it will break\n// once we start using SetExpireTime/ClearExpireTime methods.\n// All in all, we will free up two additional bits.\nbool CompactKey::operator==(const CompactKey& o) const {\n  DCHECK(taglen_ != JSON_TAG && o.taglen_ != JSON_TAG) << \"cannot use JSON type to check equal\";\n\n  // Cross-tag/encoding comparison: fall back to decoded string comparison for OBJ_STRING.\n  // This handles e.g. SDS_TTL_TAG vs ROBJ_TAG/inline/INT_TAG with same logical content.\n  if (taglen_ != o.taglen_ || encoding_ != o.encoding_) {\n    if (ObjType() == OBJ_STRING && o.ObjType() == OBJ_STRING) {\n      std::string tmp;\n      return *this == o.GetSlice(&tmp);\n    }\n    return false;\n  }\n\n  if (taglen_ == ROBJ_TAG)\n    return u_.r_obj.Equal(o.u_.r_obj);\n\n  if (taglen_ == INT_TAG)\n    return u_.ival == o.u_.ival;\n\n  if (taglen_ == SMALL_TAG)\n    return u_.small_str.Equal(o.u_.small_str);\n\n  if (taglen_ == SDS_TTL_TAG)\n    return u_.sds_ttl.view() == o.u_.sds_ttl.view();\n\n  DCHECK(IsInline() && o.IsInline());\n\n  return memcmp(u_.inline_str, o.u_.inline_str, taglen_) == 0;\n}\n\nbool CompactObj::CmpNonInline(std::string_view sv) const {\n  DCHECK_GT(taglen_, kInlineLen);\n  switch (taglen_) {\n    case INT_TAG:\n      return absl::AlphaNum(u_.ival).Piece() == sv;\n    case ROBJ_TAG:\n      return u_.r_obj.Equal(sv);\n    case SMALL_TAG:\n      return u_.small_str.Equal(sv);\n    case SDS_TTL_TAG:\n      return u_.sds_ttl.view() == sv;\n    default:\n      break;\n  }\n  return false;\n}\n\nbool CompactObj::CmpEncoded(string_view sv) const {\n  DCHECK(encoding_);\n\n  if (encoding_ == HUFFMAN_ENC) {\n    size_t sz = Size();\n    if (sv.size() != sz)\n      return false;\n\n    if (IsInline()) {\n      // Buffer must accommodate maximum decompressed size from inline storage (~8x compression)\n      constexpr size_t kMaxHuffLen = kInlineLen * 8;\n      if (sz <= kMaxHuffLen) {\n        char buf[kMaxHuffLen];\n        auto domain = is_key_ ? HUFF_KEYS : HUFF_STRING_VALUES;\n        const auto& decoder = tl.GetHuffmanDecoder(domain);\n        CHECK(decoder.Decode({u_.inline_str + 1, size_t(taglen_ - 1)}, sz, buf));\n        return sv == string_view(buf, sz);\n      }\n    }\n    tl.tmp_str.resize(sz);\n    GetString(tl.tmp_str.data());\n    return sv == tl.tmp_str;\n  }\n\n  size_t encode_len = binpacked_len(sv.size());\n  if (IsInline()) {\n    if (encode_len != taglen_)\n      return false;\n\n    char buf[kInlineLen * 2];\n    detail::ascii_unpack(to_byte(u_.inline_str), sv.size(), buf);\n\n    return sv == string_view(buf, sv.size());\n  }\n\n  if (taglen_ == ROBJ_TAG) {\n    if (u_.r_obj.type() != OBJ_STRING)\n      return false;\n\n    if (u_.r_obj.Size() != encode_len)\n      return false;\n\n    if (!detail::validate_ascii_fast(sv.data(), sv.size()))\n      return false;\n\n    return detail::compare_packed(to_byte(u_.r_obj.inner_obj()), sv.data(), sv.size());\n  }\n\n  if (taglen_ == SDS_TTL_TAG) {\n    size_t sds_len = sdslen(u_.sds_ttl.sds_ptr);\n    if (sds_len != encode_len)\n      return false;\n\n    if (!detail::validate_ascii_fast(sv.data(), sv.size()))\n      return false;\n\n    return detail::compare_packed(to_byte(u_.sds_ttl.sds_ptr), sv.data(), sv.size());\n  }\n\n  if (taglen_ == JSON_TAG) {\n    return false;  // cannot compare json with string\n  }\n\n  if (taglen_ == SMALL_TAG) {\n    if (u_.small_str.size() != encode_len)\n      return false;\n\n    if (!detail::validate_ascii_fast(sv.data(), sv.size()))\n      return false;\n\n    // We need to compare an unpacked sv with 2 packed parts.\n    // To compare easily ascii with binary we would need to split ascii at 8 bytes boundaries\n    // so that we could pack it into complete binary bytes (8 ascii chars produce 7 bytes).\n    // I choose a minimal 16 byte prefix:\n    // 1. sv must be longer than 16 if we are here (at least 18 actually).\n    // 2. 16 chars produce 14 byte blob that should cover the first slice (10 bytes) and 4 bytes\n    //    of the second slice.\n    // 3. I assume that the first slice is less than 14 bytes which is correct since small string\n    //    has only 9-10 bytes in its inline prefix storage.\n    DCHECK_GT(sv.size(), 16u);  // we would not be in SMALL_TAG, otherwise.\n\n    auto slice = u_.small_str.Get();\n    DCHECK_LT(slice[0].size(), 14u);\n\n    uint8_t tmpbuf[14];\n    detail::ascii_pack(sv.data(), 16, tmpbuf);\n\n    // Compare the first slice.\n    if (memcmp(slice[0].data(), tmpbuf, slice[0].size()) != 0)\n      return false;\n\n    // Compare the prefix of the second slice.\n    size_t pref_len = 14 - slice[0].size();\n\n    if (memcmp(slice[1].data(), tmpbuf + slice[0].size(), pref_len) != 0)\n      return false;\n\n    // We verified that the first 16 chars (or 14 bytes) are equal.\n    // Lets verify the rest - suffix of the second slice and the suffix of sv.\n    return detail::compare_packed(to_byte(slice[1].data() + pref_len), sv.data() + 16,\n                                  sv.size() - 16);\n  }\n  LOG(FATAL) << \"Unsupported tag \" << int(taglen_);\n  return false;\n}\n\nvoid CompactObj::EncodeString(string_view str) {\n  DCHECK_GT(str.size(), kInlineLen);\n  DCHECK_EQ(NONE_ENC, encoding_);\n\n  string_view encoded = str;\n  bool huff_encoded = false;\n\n  // We chose such length that we can store the decoded length delta into 1 byte.\n  // The maximum huffman compression is 1/8, so 288 / 8 = 36.\n  // 288 - 36 = 252, which is smaller than 256.\n  // TODO: introduce variable length huffman length.\n  constexpr unsigned kMaxHuffLen = 288;\n\n  // For sizes 17, 18 we would like to test ascii encoding first as it's more efficient.\n  // And if it succeeds we can squash into the inline buffer.\n  bool is_ascii =\n      kUseAsciiEncoding && str.size() < 19 && detail::validate_ascii_fast(str.data(), str.size());\n\n  // if !is_ascii, we try huffman encoding next.\n  if (!is_ascii && str.size() <= kMaxHuffLen) {\n    auto& huffman = is_key_ ? tl.huff_keys : tl.huff_string_values;\n    if (huffman.encoder.valid()) {\n      unsigned dest_len = huffman.encoder.CompressedBound(str.size());\n      // 1 byte for storing the size delta.\n      tl.tmp_buf.resize(1 + dest_len);\n      string err_msg;\n      ++tl.huff_encode_total;\n      bool res = huffman.encoder.Encode(str, tl.tmp_buf.data() + 1, &dest_len, &err_msg);\n      if (res) {\n        // we accept huffman encoding only if it is:\n        // 1. smaller than the original string by 20%\n        // 2. allows us to store the encoded string in the inline buffer\n        if (dest_len && (dest_len < kInlineLen || (dest_len + dest_len / 5) < str.size())) {\n          huff_encoded = true;\n          tl.huff_encode_success++;\n          encoded = string_view{reinterpret_cast<char*>(tl.tmp_buf.data()), dest_len + 1};\n          unsigned delta = str.size() - dest_len;\n          DCHECK_LT(delta, 256u);\n          tl.tmp_buf[0] = static_cast<uint8_t>(delta);\n          encoding_ = HUFFMAN_ENC;\n          if (encoded.size() <= kInlineLen) {\n            SetMeta(encoded.size(), mask_);\n            memcpy(u_.inline_str, tl.tmp_buf.data(), encoded.size());\n            return;\n          }\n        }\n      } else {\n        // Should not happen, means we have an internal buf.\n        LOG(DFATAL) << \"Failed to encode string with huffman: \" << err_msg;\n      }\n    }\n  }\n\n  // Finally we try ascii encoding for longer strings if we have not encoded them with huffman.\n  if (kUseAsciiEncoding && !is_ascii && str.size() >= 19 && !huff_encoded) {\n    is_ascii = detail::validate_ascii_fast(str.data(), str.size());\n  }\n\n  if (is_ascii) {\n    size_t encode_len = binpacked_len(str.size());\n    size_t rev_len = ascii_len(encode_len);\n\n    if (rev_len == str.size()) {\n      encoding_ = ASCII2_ENC;  // str hits its highest bound.\n    } else {\n      CHECK_EQ(str.size(), rev_len - 1) << \"Bad ascii encoding for len \" << str.size();\n      encoding_ = ASCII1_ENC;  // str is shorter than its highest bound.\n    }\n\n    tl.tmp_buf.resize(encode_len);\n    detail::ascii_pack_simd2(str.data(), str.size(), tl.tmp_buf.data());\n    encoded = string_view{reinterpret_cast<char*>(tl.tmp_buf.data()), encode_len};\n\n    if (encoded.size() <= kInlineLen) {\n      SetMeta(encoded.size(), mask_);\n      detail::ascii_pack(str.data(), str.size(), reinterpret_cast<uint8_t*>(u_.inline_str));\n\n      return;\n    }\n  }\n\n  DCHECK_GT(encoded.size(), kInlineLen);\n\n  if (SmallString::CanAllocate(encoded.size())) {\n    if (taglen_ == SMALL_TAG)\n      tl.small_str_bytes -= u_.small_str.MallocUsed();\n    else\n      SetMeta(SMALL_TAG, mask_);\n\n    tl.small_str_bytes += u_.small_str.Assign(encoded);\n    return;\n  }\n\n  SetMeta(ROBJ_TAG, mask_);\n  u_.r_obj.SetString(encoded, tl.local_mr);\n}\n\nstd::array<std::string_view, 2> CompactObj::GetRawString() const {\n  DCHECK(!IsExternal());\n\n  if (taglen_ == ROBJ_TAG) {\n    CHECK_EQ(OBJ_STRING, u_.r_obj.type());\n    DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());\n    return {u_.r_obj.AsView(), {}};\n  }\n\n  if (taglen_ == SMALL_TAG) {\n    return u_.small_str.Get();\n  }\n\n  if (taglen_ == SDS_TTL_TAG) {\n    return {u_.sds_ttl.view(), {}};\n  }\n\n  LOG(FATAL) << \"Unsupported tag for GetRawString(): \" << int(taglen_);\n  return {};\n}\n\nMemoryResource* CompactObj::memory_resource() {\n  return tl.local_mr;\n}\n\nstring_view CompactObj::SdsTtlString::view() const {\n  return string_view{sds_ptr, sdslen(sds_ptr)};\n}\n\nbool CompactObj::JsonConsT::DefragIfNeeded(PageUsage* page_usage) {\n  const MiMemoryResource* mr = static_cast<MiMemoryResource*>(memory_resource());\n\n  const int64_t before = static_cast<int64_t>(mr->used());\n  DCHECK_GE(before, 0) << \"Memory usage is more than int64_t max value\";\n\n  bool did_defragment = Defragment(*json_ptr, page_usage);\n\n  const int64_t after = static_cast<int64_t>(mr->used());\n  DCHECK_GE(after, 0) << \"Memory usage is more than int64_t max value\";\n\n  if (const int64_t delta = after - before; delta != 0) {\n    bytes_used = UpdateSize(bytes_used, delta);\n  }\n\n  return did_defragment;\n}\n\nbool CompactObj::FlatJsonT::DefragIfNeeded(PageUsage* page_usage) {\n  if (uint8_t* old = flat_ptr; page_usage->IsPageForObjectUnderUtilized(old)) {\n    const uint32_t size = json_len;\n    flat_ptr = static_cast<uint8_t*>(tl.local_mr->allocate(size, kAlignSize));\n    memcpy(flat_ptr, old, size);\n    tl.local_mr->deallocate(old, size, kAlignSize);\n    return true;\n  }\n\n  return false;\n}\n\nbool CompactObj::JsonWrapper::DefragIfNeeded(PageUsage* page_usage) {\n  if (JsonEnconding() == kEncodingJsonCons) {\n    return cons.DefragIfNeeded(page_usage);\n  }\n\n  return flat.DefragIfNeeded(page_usage);\n}\n\nconstexpr std::pair<CompactObjType, std::string_view> kObjTypeToString[] = {\n    {OBJ_STRING, \"string\"sv},  {OBJ_LIST, \"list\"sv},     {OBJ_SET, \"set\"sv},\n    {OBJ_ZSET, \"zset\"sv},      {OBJ_HASH, \"hash\"sv},     {OBJ_STREAM, \"stream\"sv},\n    {OBJ_KEY, \"key\"sv},  // pseudo-type used for memory tracking\n    {OBJ_JSON, \"ReJSON-RL\"sv}, {OBJ_SBF, \"MBbloom--\"sv}, {OBJ_CMS, \"CMSk-TYPE\"sv},\n    {OBJ_TOPK, \"TopK-TYPE\"sv}};\n\nstd::string_view ObjTypeToString(CompactObjType type) {\n  for (auto& p : kObjTypeToString) {\n    if (type == p.first) {\n      return p.second;\n    }\n  }\n\n  LOG(DFATAL) << \"Unsupported type \" << type;\n  return \"Invalid type\"sv;\n}\n\nCompactObjType ObjTypeFromString(std::string_view sv) {\n  for (auto& p : kObjTypeToString) {\n    if (absl::EqualsIgnoreCase(sv, p.second)) {\n      return p.first;\n    }\n  }\n  return kInvalidCompactObjType;\n}\n\nvoid CompactKey::SetExpireTime(uint64_t abs_ms) {\n  DCHECK(!IsRef() && !IsExternal());\n\n  // Already SDS_TTL_TAG — update TTL in place.\n  if (taglen_ == SDS_TTL_TAG) {\n    u_.sds_ttl.exp_ms = abs_ms;\n    return;\n  }\n\n  char* new_sds = nullptr;\n\n  if (IsInline()) {\n    new_sds = sdsnewlen(u_.inline_str, taglen_);\n    // encoding_ preserved as-is.\n  } else if (taglen_ == INT_TAG) {\n    absl::AlphaNum an(u_.ival);\n    new_sds = sdsnewlen(an.data(), an.size());\n    encoding_ = NONE_ENC;\n  } else if (taglen_ == SMALL_TAG) {\n    size_t total = u_.small_str.size();\n    new_sds = sdsnewlen(nullptr, total);\n    u_.small_str.Get(new_sds);\n    tl.small_str_bytes -= u_.small_str.MallocUsed();\n    u_.small_str.Free();\n  } else if (taglen_ == ROBJ_TAG) {\n    CHECK_EQ(OBJ_STRING, u_.r_obj.type());\n    auto view = u_.r_obj.AsView();\n    new_sds = sdsnewlen(view.data(), view.size());\n    u_.r_obj.Free(tl.local_mr);\n  } else {\n    LOG(FATAL) << \"Unexpected tag for SetExpireTime: \" << int(taglen_);\n  }\n\n  u_.sds_ttl.sds_ptr = new_sds;\n  u_.sds_ttl.exp_ms = abs_ms;\n  taglen_ = SDS_TTL_TAG;\n  mask_bits_.expire = 1;\n}\n\nbool CompactKey::ClearExpireTime() {\n  if (taglen_ != SDS_TTL_TAG)\n    return false;\n  DCHECK(!IsRef() && !IsExternal());\n\n  string decoded;\n  GetString(&decoded);\n  SetMeta(0, mask_);\n  encoding_ = NONE_ENC;\n  mask_bits_.expire = 0;\n\n  SetString(decoded);\n  return true;\n}\n\nuint64_t CompactKey::GetExpireTime() const {\n  if (taglen_ != SDS_TTL_TAG)\n    return 0;\n  DCHECK(!IsRef() && !IsExternal());\n  return u_.sds_ttl.exp_ms;\n}\n\nsize_t CompactObj::StrEncoding::DecodedSize(string_view blob) const {\n  return DecodedSize(blob.size(), blob[0]);\n}\n\nsize_t CompactObj::StrEncoding::DecodedSize(size_t blob_size, uint8_t first_byte) const {\n  switch (enc_) {\n    case NONE_ENC:\n      return blob_size;\n    case ASCII1_ENC:\n    case ASCII2_ENC:\n      return ascii_len(blob_size) - (enc_ == ASCII1_ENC);\n    case HUFFMAN_ENC:\n      return blob_size + int(first_byte) - 1;\n  };\n  return 0;\n}\n\nsize_t CompactObj::StrEncoding::Decode(std::string_view blob, char* dest) const {\n  if (blob.empty())\n    return 0;\n  size_t decoded_len = DecodedSize(blob);\n  switch (enc_) {\n    case NONE_ENC:\n      memcpy(dest, blob.data(), blob.size());\n      break;\n    case ASCII1_ENC:\n    case ASCII2_ENC:\n      detail::ascii_unpack(reinterpret_cast<const uint8_t*>(blob.data()), decoded_len, dest);\n      break;\n    case HUFFMAN_ENC: {\n      auto domain = is_key_ ? HUFF_KEYS : HUFF_STRING_VALUES;\n      const auto& decoder = tl.GetHuffmanDecoder(domain);\n      decoder.Decode(blob.substr(1), decoded_len, dest);\n      break;\n    }\n  };\n  return decoded_len;\n}\n\nbool CompactObj::StrEncoding::DecodeByte(std::string_view blob, size_t idx, uint8_t* dest) const {\n  if (blob.empty()) {\n    return false;\n  }\n  size_t decoded_len = DecodedSize(blob);\n  if (idx >= decoded_len) {\n    return false;\n  }\n  switch (enc_) {\n    case NONE_ENC:\n      *dest = blob[idx];\n      break;\n    case ASCII1_ENC:\n    case ASCII2_ENC:\n      *dest = detail::ascii_unpack_byte(reinterpret_cast<const uint8_t*>(blob.data()), decoded_len,\n                                        idx);\n      break;\n    case HUFFMAN_ENC: {\n      std::string decoded_huff_string(decoded_len, 0);\n      auto domain = is_key_ ? HUFF_KEYS : HUFF_STRING_VALUES;\n      const auto& decoder = tl.GetHuffmanDecoder(domain);\n      decoder.Decode(blob.substr(1), decoded_len, decoded_huff_string.data());\n      *dest = decoded_huff_string[idx];\n      break;\n    }\n  };\n  return true;\n}\n\nStringOrView CompactObj::StrEncoding::Decode(std::string_view blob) const {\n  switch (enc_) {\n    case NONE_ENC:\n      return StringOrView::FromView(blob);\n    default: {\n      string out;\n      out.resize(DecodedSize(blob));\n      Decode(blob, out.data());\n      return StringOrView::FromString(std::move(out));\n    }\n  }\n  return {};\n}\n\n/* Create a new stream data structure. */\nstream* streamNew() {\n  stream* s = (stream*)zmalloc(sizeof(stream));\n  s->rax = raxNew();\n  s->length = 0;\n  s->first_id.ms = 0;\n  s->first_id.seq = 0;\n  s->last_id.ms = 0;\n  s->last_id.seq = 0;\n  s->max_deleted_entry_id.seq = 0;\n  s->max_deleted_entry_id.ms = 0;\n  s->entries_added = 0;\n  s->cgroups = NULL; /* Created on demand to save memory when not used. */\n  return s;\n}\n\n/* Free a consumer and associated data structures. Note that this function\n * will not reassign the pending messages associated with this consumer\n * nor will delete them from the stream, so when this function is called\n * to delete a consumer, and not when the whole stream is destroyed, the caller\n * should do some work before. */\nstatic void streamFreeConsumer(streamConsumer* sc) {\n  raxFree(sc->pel); /* No value free callback: the PEL entries are shared\n                       between the consumer and the main stream PEL. */\n  sdsfree(sc->name);\n  zfree(sc);\n}\n\n/* Used for generic free functions. */\nstatic void streamFreeConsumerVoid(void* sc) {\n  streamFreeConsumer((streamConsumer*)sc);\n}\n\n/* Used for generic free functions. */\nstatic void streamFreeCGVoid(void* cg_) {\n  streamCG* cg = (streamCG*)cg_;\n  raxFreeWithCallback(cg->pel, zfree);\n  raxFreeWithCallback(cg->consumers, streamFreeConsumerVoid);\n  zfree(cg);\n}\n\nstatic void lpFreeVoid(void* lp) {\n  lpFree((uint8_t*)lp);\n}\n\n/* Free a stream, including the listpacks stored inside the radix tree. */\nvoid freeStream(stream* s) {\n  raxFreeWithCallback(s->rax, lpFreeVoid);\n  if (s->cgroups)\n    raxFreeWithCallback(s->cgroups, streamFreeCGVoid);\n  zfree(s);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/compact_object.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/base/internal/endian.h>\n\n#include <optional>\n#include <type_traits>\n\n#include \"base/pmr/memory_resource.h\"\n#include \"common/string_or_view.h\"\n#include \"core/json/json_object.h\"\n#include \"core/mi_memory_resource.h\"\n#include \"core/small_string.h\"\n\ntypedef struct stream stream;\n\nnamespace dfly {\n\nnamespace tiering {\nstruct TieredCoolRecord;\n}\n\nconstexpr unsigned kEncodingIntSet = 0;\nconstexpr unsigned kEncodingStrMap2 = 2;  // for set/map encodings of strings using DenseSet\nconstexpr unsigned kEncodingQL2 = 1;\nconstexpr unsigned kEncodingListPack = 3;\nconstexpr unsigned kEncodingJsonCons = 0;\nconstexpr unsigned kEncodingJsonFlat = 1;\n\nclass SBF;\nclass TOPK;\nclass CMS;\nclass PageUsage;\n\nusing cmn::StringOrView;\nnamespace detail {\n\n// redis objects or blobs of upto 4GB size.\nclass RobjWrapper {\n public:\n  using MemoryResource = PMR_NS::memory_resource;\n\n  RobjWrapper() : sz_(0), type_(0), encoding_(0) {\n  }\n\n  size_t MallocUsed(bool slow) const;\n\n  uint64_t HashCode() const;\n  bool Equal(const RobjWrapper& ow) const;\n  bool Equal(std::string_view sv) const;\n  size_t Size() const;\n  void Free(MemoryResource* mr);\n\n  void SetString(std::string_view s, MemoryResource* mr);\n  void ReserveString(size_t size, MemoryResource* mr);\n  void AppendString(std::string_view s, MemoryResource* mr);\n  // Used when sz_ is used to denote memory usage\n  void SetSize(uint64_t size);\n  void Init(unsigned type, unsigned encoding, void* inner);\n\n  unsigned type() const {\n    return type_;\n  }\n  unsigned encoding() const {\n    return encoding_;\n  }\n  void* inner_obj() const {\n    return inner_obj_;\n  }\n\n  void set_inner_obj(void* ptr) {\n    inner_obj_ = ptr;\n  }\n\n  std::string_view AsView() const {\n    return std::string_view{reinterpret_cast<char*>(inner_obj_), sz_};\n  }\n\n  // Try reducing memory fragmentation by re-allocating values from underutilized pages.\n  // Returns true if re-allocated.\n  bool DefragIfNeeded(PageUsage* page_usage);\n\n private:\n  void ReallocateString(MemoryResource* mr);\n\n  size_t InnerObjMallocUsed() const;\n  void MakeInnerRoom(size_t current_cap, size_t desired, MemoryResource* mr);\n\n  void Set(void* p, size_t s) {\n    inner_obj_ = p;\n    sz_ = s;\n  }\n\n  void* inner_obj_ = nullptr;\n\n  // semantics depend on the type. For OBJ_STRING it's string length.\n  uint64_t sz_ : 56;\n\n  uint64_t type_ : 4;\n  uint64_t encoding_ : 4;\n} __attribute__((packed));\n\nstatic_assert(sizeof(RobjWrapper) == 16);\n\n}  // namespace detail\n\nusing CompactObjType = unsigned;\n\nconstexpr CompactObjType kInvalidCompactObjType = std::numeric_limits<CompactObjType>::max();\n\nuint32_t JsonEnconding();\n\nclass CompactObj {\n  static constexpr unsigned kInlineLen = 16;\n\n  void operator=(const CompactObj&) = delete;\n  CompactObj(const CompactObj&) = delete;\n\n protected:\n  // 0-16 is reserved for inline lengths of string type.\n  enum TagEnum : uint8_t {\n    INT_TAG = 17,\n    SMALL_TAG = 18,\n    ROBJ_TAG = 19,\n    EXTERNAL_TAG = 20,\n    JSON_TAG = 21,\n    SBF_TAG = 22,\n    CMS_TAG = 23,\n    SDS_TTL_TAG = 24,\n    TOPK_TAG = 25,\n  };\n\n  // String encoding types.\n  // With ascii compression it compresses 8 bytes to 7 but also 7 to 7.\n  // Therefore, in order to know the original length we introduce 2 states that\n  // correct the length upon decoding. ASCII1_ENC rounds down the decoded length,\n  // while ASCII2_ENC rounds it up. See DecodedLen implementation for more info.\n  enum EncodingEnum : uint8_t {\n    NONE_ENC = 0,\n    ASCII1_ENC = 1,\n    ASCII2_ENC = 2,\n    HUFFMAN_ENC = 3,\n  };\n\n public:\n  // Utility class for working with different string encodings (ascii, huffman, etc)\n  struct StrEncoding {\n    size_t DecodedSize(std::string_view blob) const;         // Size of decoded blob\n    size_t Decode(std::string_view blob, char* dest) const;  // Decode into dest, return size\n    StringOrView Decode(std::string_view blob) const;\n    // Decode a byte at offset into dest. Return true if decoded successfully,\n    // false if idx is out of bounds.\n    bool DecodeByte(std::string_view blob, size_t idx, uint8_t* dest) const;\n\n   private:\n    friend class CompactObj;\n    explicit StrEncoding(uint8_t enc, bool is_key)\n        : enc_(static_cast<EncodingEnum>(enc)), is_key_(is_key) {\n    }\n\n    size_t DecodedSize(size_t compr_size, uint8_t first_byte) const;\n\n    EncodingEnum enc_;\n    bool is_key_;\n  };\n\n  using MemoryResource = detail::RobjWrapper::MemoryResource;\n\n  // Different representations of external values\n  enum class ExternalRep : uint8_t {\n    STRING,         // OBJ_STRING, Basic representation with various string encodings\n    SERIALIZED_MAP  // OBJ_HASH, Serialized map\n  };\n\n  explicit CompactObj(bool is_key)\n      : is_key_{is_key}, taglen_{0}, encoding_{0} {  // default - empty string\n  }\n\n  CompactObj(std::string_view str, bool is_key) : CompactObj(is_key) {\n    SetString(str);\n  }\n\n  CompactObj(CompactObj&& cs) noexcept : CompactObj(cs.is_key_) {\n    operator=(std::move(cs));\n  };\n\n  ~CompactObj();\n\n  CompactObj& operator=(CompactObj&& o) noexcept;\n\n  // Returns object size depending on the semantics.\n  // For strings - returns the length of the string.\n  // For containers - returns number of elements in the container.\n  size_t Size() const;\n\n  bool IsRef() const {\n    return mask_bits_.ref;\n  }\n\n  std::string_view GetSlice(std::string* scratch) const;\n\n  std::string ToString() const {\n    std::string res;\n    GetString(&res);\n    return res;\n  }\n\n  uint64_t HashCode() const;\n  static uint64_t HashCode(std::string_view str);\n\n  bool HasFlag() const {\n    return mask_bits_.mc_flag;\n  }\n\n  void SetFlag(bool e) {\n    mask_bits_.mc_flag = e;\n  }\n\n  bool WasTouched() const {\n    return mask_bits_.touched;\n  }\n\n  void SetTouched(bool e) {\n    mask_bits_.touched = e;\n  }\n\n  bool DefragIfNeeded(PageUsage* page_usage);\n\n  void SetOmitDefrag(bool v) {\n    mask_bits_.omit_defrag = v;\n  }\n\n  bool OmitDefrag() const {\n    return mask_bits_.omit_defrag;\n  }\n\n  bool HasStashPending() const {\n    return mask_bits_.io_pending;\n  }\n\n  void SetStashPending(bool b) {\n    mask_bits_.io_pending = b;\n  }\n\n  bool IsSticky() const {\n    return mask_bits_.sticky;\n  }\n\n  void SetSticky(bool e) {\n    mask_bits_.sticky = e;\n  }\n\n  unsigned Encoding() const;\n  CompactObjType ObjType() const;\n\n  void* RObjPtr() const {\n    return u_.r_obj.inner_obj();\n  }\n\n  void SetRObjPtr(void* ptr) {\n    u_.r_obj.Init(u_.r_obj.type(), u_.r_obj.encoding(), ptr);\n  }\n\n  // takes ownership over obj_inner.\n  // type should not be OBJ_STRING.\n  void InitRobj(CompactObjType type, unsigned encoding, void* obj_inner);\n\n  // For STR object.\n  void SetInt(int64_t val);\n  std::optional<int64_t> TryGetInt() const;\n\n  void GetString(std::string* res) const;\n\n  void SetString(std::string_view str);\n  void ReserveString(size_t size);\n  void AppendString(std::string_view str);\n\n  // Will set this to hold OBJ_JSON, after that it is safe to call GetJson\n  // NOTE: in order to avid copy which can be expensive in this case,\n  // you need to move an object that created with the function JsonFromString\n  // into here, no copying is allowed!\n  void SetJson(JsonType&& j);\n  void SetJson(const uint8_t* buf, size_t len);\n  // Adjusts the size used by json\n  void SetJsonSize(int64_t size);\n  // Adjusts the size used by a stream\n  void AddStreamSize(int64_t size);\n\n  // pre condition - the type here is OBJ_JSON and was set with SetJson\n  JsonType* GetJson() const;\n\n  void SetSBF(SBF* sbf) {\n    SetMeta(SBF_TAG);\n    u_.sbf = sbf;\n  }\n\n  void SetSBF(uint64_t initial_capacity, double fp_prob, double grow_factor);\n  SBF* GetSBF() const;\n\n  void SetTOPK(TOPK* topk) {\n    SetMeta(TOPK_TAG);\n    u_.topk = topk;\n  }\n\n  void SetTOPK(uint32_t k, uint32_t width, uint32_t depth, double decay);\n  TOPK* GetTOPK() const;\n\n  void SetCMS(CMS* cms) {\n    SetMeta(CMS_TAG);\n    u_.cms = cms;\n  }\n\n  void SetCMS(uint32_t width, uint32_t depth);\n  CMS* GetCMS() const;\n\n  // dest must have at least Size() bytes available\n  void GetString(char* dest) const;\n\n  bool IsExternal() const {\n    return taglen_ == EXTERNAL_TAG;\n  }\n\n  // returns true if the value is stored in the cooling storage. Cooling storage has an item both\n  // on disk and in memory.\n  bool IsCool() const {\n    assert(IsExternal());\n    return u_.ext_ptr.is_cool;\n  }\n\n  void SetExternal(size_t offset, uint32_t sz, ExternalRep rep);\n  ExternalRep GetExternalRep() const;\n\n  // Switches to empty, non-external string.\n  // Preserves all the attributes.\n  void RemoveExternal() {\n    encoding_ = NONE_ENC;\n    SetMeta(0, mask_);\n  }\n\n  // Assigns a cooling record to the object together with its external slice.\n  void SetCool(size_t offset, uint32_t serialized_size, ExternalRep rep,\n               tiering::TieredCoolRecord* record);\n\n  struct CoolItem {\n    uint16_t page_offset;\n    size_t serialized_size;\n    tiering::TieredCoolRecord* record;\n  };\n\n  // Prerequisite: IsCool() is true.\n  // Returns the external data of the object incuding its ColdRecord.\n  CoolItem GetCool() const;\n\n  // Prequisite: IsCool() is true.\n  // Keeps cool record only as external value and discard in-memory part.\n  void Freeze(size_t offset, size_t sz);\n\n  std::pair<size_t, size_t> GetExternalSlice() const;\n\n  // Injects either the the raw string (extracted with GetRawString()) or the usual string\n  // back to the compact object. In the latter case, encoding is performed.\n  // Precondition: The object must be in the EXTERNAL state.\n  // Postcondition: The object is an in-memory string.\n  void Materialize(std::string_view str, bool is_raw);\n\n  // Returns the approximation of memory used by the object.\n  // If slow is true, may use more expensive methods to calculate the precise size.\n  size_t MallocUsed(bool slow = false) const;\n\n  // Resets the object to empty state (string).\n  void Reset();\n\n  bool IsInline() const {\n    return taglen_ <= kInlineLen;\n  }\n\n  uint8_t GetFirstByte() const;\n  // Returns true if the byte was decoded successfully, false if idx is out of bounds.\n  bool GetByteAtIndex(size_t idx, uint8_t* res) const;\n  // Returns a pair of booleans: {success, in_place}. success is false if offset is out of bounds\n  // in_place is true if the byte was set without needing to rewrite the string.\n  std::pair<bool, bool> SetByteAtIndex(size_t idx, uint8_t val);\n\n  struct Stats {\n    size_t small_string_bytes = 0;\n    uint64_t huff_encode_total = 0, huff_encode_success = 0;\n  };\n\n  static Stats GetStatsThreadLocal();\n  static void InitThreadLocal(MemoryResource* mr);\n\n  enum HuffmanDomain : uint8_t {\n    HUFF_KEYS = 0,\n    HUFF_STRING_VALUES = 1,\n    // TODO: add more domains.\n  };\n\n  static bool InitHuffmanThreadLocal(HuffmanDomain domain, std::string_view hufftable);\n  static MemoryResource* memory_resource();  // thread-local.\n\n  template <typename T, typename... Args> static T* AllocateMR(Args&&... args) {\n    void* ptr = memory_resource()->allocate(sizeof(T), alignof(T));\n    if constexpr (std::is_constructible_v<T, decltype(memory_resource())> && sizeof...(args) == 0)\n      return new (ptr) T{memory_resource()};\n    else\n      return new (ptr) T{std::forward<Args>(args)...};\n  }\n\n  template <typename T> static void DeleteMR(void* ptr) {\n    T* t = (T*)ptr;\n    t->~T();\n    memory_resource()->deallocate(ptr, sizeof(T), alignof(T));\n  }\n\n  // Return raw (non-decoded) string as two views. First is guaranteed to be non-empty.\n  // Precondition: the object is a non-inline string.\n  std::array<std::string_view, 2> GetRawString() const;\n\n  StrEncoding GetStrEncoding() const {\n    return StrEncoding{encoding_, is_key_};\n  }\n\n  bool HasAllocated() const;\n\n  bool TagAllowsEmptyValue() const;\n\n  uint8_t Tag() const {\n    return taglen_;\n  }\n\n private:\n  // Returns a string_view corresponding to the serialized encoded blob.\n  // If opt_dest is provided, it may be used to decode directly into the destination buffer.\n  std::string_view GetEncodedBlob(StrEncoding str_encoding, char* opt_dest) const;\n\n protected:\n  void EncodeString(std::string_view str);\n\n  // Requires: HasAllocated() - true.\n  void Free();\n\n  bool CmpEncoded(std::string_view sv) const;\n  bool CmpNonInline(std::string_view sv) const;\n\n  void SetMeta(uint8_t taglen, uint8_t mask = 0) {\n    if (HasAllocated()) {\n      Free();\n    } else {\n      memset(u_.inline_str, 0, kInlineLen);\n    }\n    taglen_ = taglen;\n    mask_ = mask;\n  }\n\n  struct ExternalPtr {\n    uint32_t serialized_size;\n    uint16_t page_offset;  // 0 for multi-page blobs. != 0 for small blobs.\n    uint8_t is_cool : 1;\n    uint8_t representation : 2;  // See ExternalRep\n    uint8_t is_reserved : 5;\n    uint8_t first_byte;\n\n    // We do not have enough space in the common area to store page_index together with\n    // cool_record pointer. Therefore, we moved this field into TieredCoolRecord itself.\n    struct Offload {\n      uint32_t page_index;\n      uint32_t reserved;\n    };\n\n    union {\n      Offload offload;\n      tiering::TieredCoolRecord* cool_record;\n    };\n  } __attribute__((packed));\n  static_assert(sizeof(ExternalPtr) == 16);\n\n  struct SdsTtlString {\n    char* sds_ptr;    // SDS string (length via sdslen)\n    uint64_t exp_ms;  // absolute expiry time in ms\n\n    std::string_view view() const;\n  } __attribute__((packed));\n\n  struct JsonConsT {\n    JsonType* json_ptr;\n    size_t bytes_used;\n\n    bool DefragIfNeeded(PageUsage* page_usage);\n  };\n\n  struct FlatJsonT {\n    uint32_t json_len;\n    uint8_t* flat_ptr;\n\n    bool DefragIfNeeded(PageUsage* page_usage);\n  };\n\n  struct JsonWrapper {\n    union {\n      JsonConsT cons;\n      FlatJsonT flat;\n    };\n\n    bool DefragIfNeeded(PageUsage* page_usage);\n  };\n\n  // Union of different representations\n  union U {\n    char inline_str[kInlineLen];\n\n    SmallString small_str;\n    detail::RobjWrapper r_obj;\n\n    // using 'packed' to reduce alignment of U to 1.\n    JsonWrapper json_obj __attribute__((packed));\n    SBF* sbf __attribute__((packed));\n    TOPK* topk __attribute__((packed));\n    CMS* cms __attribute__((packed));\n    int64_t ival __attribute__((packed));\n    ExternalPtr ext_ptr;\n    SdsTtlString sds_ttl;\n\n    U() : r_obj() {\n    }\n  } u_;\n\n  static_assert(sizeof(u_) == 16);\n\n  union {\n    uint8_t mask_ = 0;\n    struct {\n      uint8_t ref : 1;      // Mark objects that don't own their allocation.\n      uint8_t expire : 1;   // Mark objects that have expiry timestamp assigned.\n      uint8_t mc_flag : 1;  // Marks keys that have memcache flags assigned.\n\n      // IO_PENDING is set when the tiered storage has issued an i/o request to save the value.\n      // It is cleared when the io request finishes or is cancelled.\n      uint8_t io_pending : 1;\n      uint8_t sticky : 1;\n\n      // TOUCHED used to determin which items are hot/cold.\n      // by checking if the item was touched from the last time we\n      // reached this item while travering the database to set items as cold.\n      // https://junchengyang.com/publication/nsdi24-SIEVE.pdf\n      uint8_t touched : 1;  // used to mark keys that were accessed.\n\n      uint8_t omit_defrag : 1;  // mark object to skip defragmentation.\n    } mask_bits_;\n  };\n\n  // TODO: use c++20 bitfield initializers\n  const bool is_key_ : 1;\n  uint8_t taglen_ : 5;    // Either length of inline string or tag of type\n  uint8_t encoding_ : 2;  // Encoding of string values\n};\n\nstruct CompactKey : public CompactObj {\n  CompactKey() : CompactObj(true) {\n  }\n\n  explicit CompactKey(std::string_view str) : CompactObj{str, true} {\n  }\n\n  CompactKey AsRef() const {\n    CompactKey res;\n    memcpy(&res.u_, &u_, sizeof(u_));\n    res.encoding_ = encoding_;\n    res.taglen_ = taglen_;\n    res.mask_ = mask_;\n    res.mask_bits_.ref = 1;\n\n    return res;\n  }\n\n  bool HasExpire() const {\n    return mask_bits_.expire;\n  }\n\n  void SetExpire(bool e) {\n    mask_bits_.expire = e;\n  }\n\n  // Embed expire time directly in the key by converting to SDS_TTL_TAG.\n  void SetExpireTime(uint64_t abs_ms);\n\n  // Remove embedded expire time and convert back to optimal string form.\n  bool ClearExpireTime();\n\n  // Read the embedded expire time.\n  // Returns 0 if there is no embedded expire time, otherwise\n  // returns the absolute expire time in ms.\n  uint64_t GetExpireTime() const;\n\n  CompactKey& operator=(std::string_view sv) noexcept {\n    SetString(sv);\n    return *this;\n  }\n\n  bool operator==(const CompactKey& o) const;\n\n  bool operator==(std::string_view sl) const;\n\n  bool operator!=(std::string_view sl) const {\n    return !(*this == sl);\n  }\n\n  friend bool operator!=(const CompactKey& lhs, const CompactKey& rhs) {\n    return !(lhs == rhs);\n  }\n\n  friend bool operator==(std::string_view sl, const CompactKey& o) {\n    return o.operator==(sl);\n  }\n};\n\ninline bool CompactKey::operator==(std::string_view sv) const {\n  if (encoding_)\n    return CmpEncoded(sv);\n\n  if (IsInline()) {\n    return std::string_view{u_.inline_str, taglen_} == sv;\n  }\n  return CmpNonInline(sv);\n}\n\nstruct CompactValue : public CompactObj {\n  CompactValue() : CompactObj(false) {\n  }\n\n  explicit CompactValue(std::string_view str) : CompactObj{str, false} {\n  }\n};\n\nstd::string_view ObjTypeToString(CompactObjType type);\n\n// Returns kInvalidCompactObjType if sv is not a valid type.\nCompactObjType ObjTypeFromString(std::string_view sv);\n\nstream* streamNew();\nvoid freeStream(stream* s);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/compact_object_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"core/compact_object.h\"\n\n#include <absl/functional/overload.h>\n#include <absl/strings/str_cat.h>\n#include <gtest/gtest.h>\n#include <mimalloc.h>\n#include <xxhash.h>\n\n#include <cstddef>\n#include <random>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/detail/bitpacking.h\"\n#include \"core/huff_coder.h\"\n#include \"core/mi_memory_resource.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"core/string_map.h\"\n#include \"core/string_set.h\"\n\nextern \"C\" {\n#include \"redis/intset.h\"\n#include \"redis/redis_aux.h\"\n#include \"redis/stream.h\"\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly {\n\nXXH64_hash_t kSeed = 24061983;\nconstexpr size_t kRandomStartIndex = 24;\nconstexpr size_t kRandomStep = 26;\nconstexpr float kUnderUtilizedRatio = 1.0f;  // ensure that we would detect\nusing namespace std;\nusing namespace jsoncons;\nusing namespace jsoncons::jsonpath;\n\nvoid PrintTo(const CompactObj& cobj, std::ostream* os) {\n  if (cobj.ObjType() == OBJ_STRING) {\n    *os << \"'\" << cobj.ToString() << \"' \";\n    return;\n  }\n  *os << \"cobj: [\" << cobj.ObjType() << \"]\";\n}\n\n// This is for the mimalloc test - being able to find an address in memory\n// where we have memory underutilzation\n// see issue number 448 (https://github.com/dragonflydb/dragonfly/issues/448)\nstd::vector<void*> AllocateForTest(int size, std::size_t allocate_size, int factor1 = 1,\n                                   int factor2 = 1) {\n  const int kAllocRandomChangeSize = 13;  // just some random value\n  std::vector<void*> ptrs;\n  for (int index = 0; index < size; index++) {\n    auto alloc_size =\n        index % kAllocRandomChangeSize == 0 ? allocate_size * factor1 : allocate_size * factor2;\n    auto heap_alloc = mi_heap_get_backing();\n    void* ptr = mi_heap_malloc(heap_alloc, alloc_size);\n    ptrs.push_back(ptr);\n  }\n  return ptrs;\n}\n\nbool HasUnderutilizedMemory(const std::vector<void*>& ptrs, float ratio) {\n  PageUsage page_usage{CollectPageStats::NO, ratio};\n  auto it = std::find_if(ptrs.begin(), ptrs.end(), [&](auto p) {\n    int r = p && page_usage.IsPageForObjectUnderUtilized(p);\n    return r > 0;\n  });\n  return it != ptrs.end();\n}\n\n// Go over ptrs vector and free memory at locations every \"steps\".\n// This is so that we will trigger the under utilization - some\n// pages will have \"holes\" in them and we are expecting to find these pages.\nvoid DeallocateAtRandom(size_t steps, std::vector<void*>* ptrs) {\n  for (size_t i = kRandomStartIndex; i < ptrs->size(); i += steps) {\n    mi_free(ptrs->at(i));\n    ptrs->at(i) = nullptr;\n  }\n}\n\nstatic void InitThreadStructs() {\n  auto* tlh = mi_heap_get_backing();\n  init_zmalloc_threadlocal(tlh);\n  SmallString::InitThreadLocal(tlh);\n  thread_local MiMemoryResource mi_resource(tlh);\n  CompactObj::InitThreadLocal(&mi_resource);\n  InitTLStatelessAllocMR(&mi_resource);\n};\n\nstatic void CheckEverythingDeallocated() {\n  mi_heap_collect(mi_heap_get_backing(), true);\n\n  auto cb_visit = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,\n                     size_t block_size, void* arg) {\n    LOG(ERROR) << \"Unfreed allocations: block_size \" << block_size\n               << \", allocated: \" << area->used * block_size;\n    return true;\n  };\n\n  mi_heap_visit_blocks(mi_heap_get_backing(), false /* do not visit all blocks*/, cb_visit,\n                       nullptr);\n}\n\nclass CompactObjectTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    InitRedisTables();  // to initialize server struct.\n\n    InitThreadStructs();\n  }\n\n  static void TearDownTestSuite() {\n    CheckEverythingDeallocated();\n    CleanupStatelessAllocMR();\n  }\n\n  CompactValue cobj_;\n  CompactKey ckey_;\n  string tmp_;\n};\n\nTEST_F(CompactObjectTest, WastedMemoryDetection) {\n  size_t allocated = 0, commited = 0, wasted = 0;\n  // By setting the threshold to high value we are expecting\n  // To find locations where we have wasted memory\n  float ratio = 0.8;\n  zmalloc_get_allocator_wasted_blocks(ratio, &allocated, &commited, &wasted);\n  EXPECT_EQ(allocated, 0);\n  EXPECT_EQ(commited, 0);\n  EXPECT_EQ(wasted, (commited - allocated));\n\n  std::size_t allocated_mem = 64;\n  auto* myheap = mi_heap_get_backing();\n\n  void* p1 = mi_heap_malloc(myheap, 64);\n\n  void* ptrs_end[50];\n  for (size_t i = 0; i < 50; ++i) {\n    ptrs_end[i] = mi_heap_malloc(myheap, 128);\n    allocated_mem += 128;\n  }\n\n  allocated = commited = wasted = 0;\n  zmalloc_get_allocator_wasted_blocks(ratio, &allocated, &commited, &wasted);\n  EXPECT_EQ(allocated, allocated_mem);\n  EXPECT_GT(commited, allocated_mem);\n  EXPECT_EQ(wasted, (commited - allocated));\n  void* ptr[50];\n  // allocate 50\n  for (size_t i = 0; i < 50; ++i) {\n    ptr[i] = mi_heap_malloc(myheap, 256);\n    allocated_mem += 256;\n  }\n\n  // At this point all the blocks has committed > 0 and used > 0\n  // and since we expecting to find these locations, the size of\n  // wasted == commited memory - allocated memory.\n  allocated = commited = wasted = 0;\n  zmalloc_get_allocator_wasted_blocks(ratio, &allocated, &commited, &wasted);\n  EXPECT_EQ(allocated, allocated_mem);\n  EXPECT_GT(commited, allocated_mem);\n  EXPECT_EQ(wasted, (commited - allocated));\n\n  // free 50/50 -\n  for (size_t i = 0; i < 50; ++i) {\n    mi_free(ptr[i]);\n    allocated_mem -= 256;\n  }\n\n  // After all the memory at block size 256 is free, we would have commited there\n  // but the used is expected to be 0, so the number now is different from the\n  // case above\n  allocated = commited = wasted = 0;\n  zmalloc_get_allocator_wasted_blocks(ratio, &allocated, &commited, &wasted);\n  EXPECT_EQ(allocated, allocated_mem);\n  EXPECT_GT(commited, allocated_mem);\n  // since we release all 256 memory block, it should not be counted\n  EXPECT_EQ(wasted, (commited - allocated));\n  for (size_t i = 0; i < 50; ++i) {\n    mi_free(ptrs_end[i]);\n  }\n  mi_free(p1);\n\n  // Now that its all freed, we are not expecting to have any wasted memory any more\n  allocated = commited = wasted = 0;\n  zmalloc_get_allocator_wasted_blocks(ratio, &allocated, &commited, &wasted);\n  EXPECT_EQ(allocated, 0);\n  EXPECT_GT(commited, allocated);\n  EXPECT_EQ(wasted, (commited - allocated));\n\n  mi_collect(false);\n}\n\nTEST_F(CompactObjectTest, WastedMemoryDontCount) {\n  // The commited memory per blocks are:\n  // 64bit => 4K\n  // 128bit => 8k\n  // 256 => 16k\n  // and so on, which mean every n * sizeof(ptr) ^ 2 == 2^11*2*(n-1) (where n starts with 1)\n  constexpr std::size_t kExpectedFor256MemWasted = 0x4000;  // memory block 256\n  auto* myheap = mi_heap_get_backing();\n\n  size_t allocated = 0, commited = 0, wasted = 0;\n  // By setting the threshold to a very low number\n  // we don't expect to find and locations where memory is wasted\n  float ratio = 0.01;\n  zmalloc_get_allocator_wasted_blocks(ratio, &allocated, &commited, &wasted);\n  EXPECT_EQ(allocated, 0);\n  EXPECT_EQ(commited, 0);\n  EXPECT_EQ(wasted, (commited - allocated));\n\n  std::size_t allocated_mem = 64;\n\n  void* p1 = mi_heap_malloc(myheap, 64);\n\n  void* ptrs_end[50];\n  for (size_t i = 0; i < 50; ++i) {\n    ptrs_end[i] = mi_heap_malloc(myheap, 128);\n    (void)p1;\n    allocated_mem += 128;\n  }\n\n  void* ptr[50];\n\n  // allocate 50\n  for (size_t i = 0; i < 50; ++i) {\n    ptr[i] = mi_heap_malloc(myheap, 256);\n    allocated_mem += 256;\n  }\n  allocated = commited = wasted = 0;\n  zmalloc_get_allocator_wasted_blocks(ratio, &allocated, &commited, &wasted);\n  // Threshold is low so we are not expecting any wasted memory to be found.\n  EXPECT_EQ(allocated, allocated_mem);\n  EXPECT_GT(commited, allocated_mem);\n  EXPECT_EQ(wasted, 0);\n\n  // free 50/50 -\n  for (size_t i = 0; i < 50; ++i) {\n    mi_free(ptr[i]);\n    allocated_mem -= 256;\n  }\n  allocated = commited = wasted = 0;\n  zmalloc_get_allocator_wasted_blocks(ratio, &allocated, &commited, &wasted);\n\n  EXPECT_EQ(allocated, allocated_mem);\n  EXPECT_GT(commited, allocated_mem);\n  // We will detect only wasted memory for block size of\n  // 256 - and all of it is wasted.\n  EXPECT_EQ(wasted, kExpectedFor256MemWasted);\n  // Threshold is low so we are not expecting any wasted memory to be found.\n  for (size_t i = 0; i < 50; ++i) {\n    mi_free(ptrs_end[i]);\n  }\n  mi_free(p1);\n\n  mi_collect(false);\n}\n\nTEST_F(CompactObjectTest, NonInline) {\n  string s(22, 'a');\n  CompactKey obj{s};\n\n  uint64_t expected_val = XXH3_64bits_withSeed(s.data(), s.size(), kSeed);\n  EXPECT_EQ(18261733907982517826UL, expected_val);\n  EXPECT_EQ(expected_val, obj.HashCode());\n  EXPECT_EQ(s, obj);\n\n  s.assign(25, 'b');\n  obj.SetString(s);\n  EXPECT_EQ(s, obj);\n  EXPECT_EQ(s.size(), obj.Size());\n}\n\nTEST_F(CompactObjectTest, InlineAsciiEncoded) {\n  string s = \"key:0000000000000\";\n  uint64_t expected_val = XXH3_64bits_withSeed(s.data(), s.size(), kSeed);\n  CompactValue obj{s};\n  EXPECT_EQ(expected_val, obj.HashCode());\n  EXPECT_EQ(s.size(), obj.Size());\n}\n\nTEST_F(CompactObjectTest, Int) {\n  ckey_.SetString(\"0\");\n  EXPECT_EQ(0, ckey_.TryGetInt());\n  EXPECT_EQ(1, ckey_.Size());\n  EXPECT_EQ(ckey_, \"0\");\n  EXPECT_EQ(\"0\", ckey_.GetSlice(&tmp_));\n  EXPECT_EQ(OBJ_STRING, ckey_.ObjType());\n}\n\nTEST_F(CompactObjectTest, Expire) {\n  CompactKey key;\n  key.SetExpire(true);\n  key.SetString(\"42\");\n  EXPECT_EQ(8181779779123079347, key.HashCode());\n  EXPECT_EQ(OBJ_ENCODING_INT, key.Encoding());\n  EXPECT_EQ(2, key.Size());\n  EXPECT_TRUE(key.HasExpire());\n}\n\nTEST_F(CompactObjectTest, SdsTtlTag) {\n  // 1. Inline key + SetTtl\n  {\n    CompactKey key(\"hello\");\n    ASSERT_TRUE(key.IsInline());\n    uint64_t hash_before = key.HashCode();\n\n    key.SetExpireTime(1000);\n    EXPECT_TRUE(key.HasExpire());\n    EXPECT_EQ(1000, key.GetExpireTime());\n    EXPECT_EQ(hash_before, key.HashCode());\n    EXPECT_TRUE(key == string_view(\"hello\"));\n    EXPECT_EQ(5, key.Size());\n    EXPECT_EQ(OBJ_STRING, key.ObjType());\n\n    string slice;\n    EXPECT_EQ(\"hello\", key.GetSlice(&slice));\n    EXPECT_GT(key.MallocUsed(), 0u);\n  }\n\n  // 2. INT_TAG key + SetTtl\n  {\n    CompactKey key(\"42\");\n    ASSERT_TRUE(key.TryGetInt().has_value());\n    uint64_t hash_before = key.HashCode();\n\n    key.SetExpireTime(2000);\n    EXPECT_TRUE(key.HasExpire());\n    EXPECT_EQ(2000, key.GetExpireTime());\n    EXPECT_TRUE(key == string_view(\"42\"));\n    EXPECT_EQ(hash_before, key.HashCode());\n    // No longer INT_TAG — TryGetInt should return nullopt.\n    EXPECT_FALSE(key.TryGetInt().has_value());\n  }\n\n  // 3. SMALL_TAG key + SetTtl\n  {\n    string s(64, 'x');\n    for (size_t i = 0; i < s.size(); ++i)\n      s[i] = 'a' + (i % 26);\n    CompactKey key(s);\n    uint64_t hash_before = key.HashCode();\n\n    key.SetExpireTime(3000);\n    EXPECT_TRUE(key.HasExpire());\n    EXPECT_EQ(3000, key.GetExpireTime());\n    EXPECT_TRUE(key == string_view(s));\n    EXPECT_EQ(hash_before, key.HashCode());\n    EXPECT_EQ(s.size(), key.Size());\n  }\n\n  // 4. ROBJ_TAG key + SetExpireTime\n  {\n    string s(512, 'z');\n    for (size_t i = 0; i < s.size(); ++i)\n      s[i] = static_cast<char>(128 + (i % 128));\n    CompactKey key(s);\n    uint64_t hash_before = key.HashCode();\n\n    key.SetExpireTime(4000);\n    EXPECT_TRUE(key.HasExpire());\n    EXPECT_EQ(4000, key.GetExpireTime());\n    EXPECT_TRUE(key == string_view(s));\n    EXPECT_EQ(hash_before, key.HashCode());\n    EXPECT_EQ(s.size(), key.Size());\n  }\n\n  // 5. ExpireTime update in-place\n  {\n    CompactKey key(\"hello\");\n    key.SetExpireTime(1000);\n    EXPECT_EQ(1000, key.GetExpireTime());\n\n    key.SetExpireTime(2000);\n    EXPECT_EQ(2000, key.GetExpireTime());\n    EXPECT_TRUE(key == string_view(\"hello\"));\n  }\n\n  // 6. ClearTtl (inline recovery)\n  {\n    CompactKey key(\"hello\");\n    key.SetExpireTime(1000);\n    EXPECT_TRUE(key.ClearExpireTime());\n\n    EXPECT_FALSE(key.HasExpire());\n    EXPECT_TRUE(key.IsInline());\n    EXPECT_TRUE(key == string_view(\"hello\"));\n  }\n\n  // 7. ClearTtl (INT recovery)\n  {\n    CompactKey key(\"42\");\n    key.SetExpireTime(1000);\n    EXPECT_TRUE(key.ClearExpireTime());\n    EXPECT_FALSE(key.HasExpire());\n    EXPECT_TRUE(key.TryGetInt().has_value());\n    EXPECT_EQ(42, key.TryGetInt().value());\n  }\n\n  // 8. ClearTtl (SMALL recovery)\n  {\n    string s(64, 'x');\n    for (size_t i = 0; i < s.size(); ++i)\n      s[i] = 'a' + (i % 26);\n    CompactKey key(s);\n    key.SetExpireTime(1000);\n    EXPECT_TRUE(key.ClearExpireTime());\n    EXPECT_FALSE(key.HasExpire());\n    EXPECT_TRUE(key == string_view(s));\n  }\n\n  // 9. Move semantics\n  {\n    CompactKey a(\"test\");\n    a.SetExpireTime(100);\n    CompactKey b(std::move(a));\n    EXPECT_TRUE(b.HasExpire());\n    EXPECT_EQ(100, b.GetExpireTime());\n    EXPECT_TRUE(b == string_view(\"test\"));\n  }\n\n  // 10. Free/destructor — just verify no leaks (TearDown catches them).\n  {\n    CompactKey key(\"hello\");\n    key.SetExpireTime(5000);\n  }\n\n  // 11. Cross-tag operator== (SDS_TTL_TAG vs inline/INT_TAG).\n  {\n    CompactKey a(\"hello\");\n    CompactKey b(\"hello\");\n    b.SetExpireTime(999);\n    // b is SDS_TTL_TAG, a is inline — must compare equal as OBJ_STRING.\n    EXPECT_TRUE(a == b);\n    EXPECT_TRUE(b == a);\n\n    CompactKey c(\"42\");\n    CompactKey d(\"42\");\n    d.SetExpireTime(1);\n    EXPECT_TRUE(c == d);\n    EXPECT_TRUE(d == c);\n\n    // Different content must not compare equal.\n    CompactKey e(\"world\");\n    e.SetExpireTime(1);\n    EXPECT_FALSE(a == e);\n  }\n}\n\nTEST_F(CompactObjectTest, MediumString) {\n  string tmp(511, 'b');\n\n  cobj_.SetString(tmp);\n  EXPECT_EQ(tmp.size(), cobj_.Size());\n\n  cobj_.SetString(tmp);\n  EXPECT_EQ(tmp.size(), cobj_.Size());\n  cobj_.Reset();\n\n  tmp.assign(27463, 'c');\n  cobj_.SetString(tmp);\n  EXPECT_EQ(27463, cobj_.Size());\n}\n\nTEST_F(CompactObjectTest, AsciiUtil) {\n  std::string_view data{\"aaaaaabb\"};\n  uint8_t buf[32];\n\n  char outbuf[32] = \"xxxxxxxxxxxxxx\";\n  detail::ascii_pack_simd(data.data(), 7, buf);\n  detail::ascii_unpack_simd(buf, 7, outbuf);\n\n  ASSERT_EQ('x', outbuf[7]) << outbuf;\n  std::string_view actual{outbuf, 7};\n  ASSERT_EQ(data.substr(0, 7), actual);\n\n  string data3;\n  for (unsigned i = 0; i < 13; ++i) {\n    data3.append(\"12345678910\");\n  }\n  string act_str(data3.size(), 'y');\n  std::vector<uint8_t> binvec(detail::binpacked_len(data3.size()));\n  detail::ascii_pack_simd2(data3.data(), data3.size(), binvec.data());\n  detail::ascii_unpack_simd(binvec.data(), data3.size(), act_str.data());\n\n  ASSERT_EQ(data3, act_str);\n}\n\nTEST_F(CompactObjectTest, AsciiPackByte) {\n  // Test ascii_pack_byte and ascii_unpack_byte for correctness.\n  for (size_t len : {8, 16, 24, 31, 32, 33, 64, 100}) {\n    string original(len, 'a');\n    for (size_t i = 0; i < len; ++i)\n      original[i] = 'A' + (i % 26);\n\n    size_t packed_len = detail::binpacked_len(len);\n    vector<uint8_t> packed(packed_len);\n    detail::ascii_pack(original.data(), len, packed.data());\n\n    // Verify initial pack/unpack round-trip at byte level.\n    for (size_t i = 0; i < len; ++i) {\n      uint8_t got = detail::ascii_unpack_byte(packed.data(), len, i);\n      ASSERT_EQ(static_cast<uint8_t>(original[i]), got) << \"len=\" << len << \" offset=\" << i;\n    }\n\n    // Now set each byte to a different value via ascii_pack_byte, verify round-trip.\n    for (size_t i = 0; i < len; ++i) {\n      uint8_t new_val = 'a' + ((i + 3) % 26);\n\n      // Pack the full string, then modify one byte.\n      vector<uint8_t> modified(packed);\n      detail::ascii_pack_byte(modified.data(), len, i, new_val);\n\n      // The modified byte should read back correctly.\n      uint8_t got = detail::ascii_unpack_byte(modified.data(), len, i);\n      EXPECT_EQ(new_val, got) << \"len=\" << len << \" set offset=\" << i;\n\n      // All other bytes should be unchanged.\n      for (size_t j = 0; j < len; ++j) {\n        if (j == i)\n          continue;\n        uint8_t other = detail::ascii_unpack_byte(modified.data(), len, j);\n        EXPECT_EQ(static_cast<uint8_t>(original[j]), other)\n            << \"len=\" << len << \" set offset=\" << i << \" check offset=\" << j;\n      }\n    }\n\n    // Test setting all bytes to zero (edge case: clearing bits).\n    {\n      vector<uint8_t> zeroed(packed);\n      string expected = original;\n      for (size_t i = 0; i < len; ++i) {\n        detail::ascii_pack_byte(zeroed.data(), len, i, 0);\n        expected[i] = '\\0';\n      }\n      for (size_t i = 0; i < len; ++i) {\n        uint8_t got = detail::ascii_unpack_byte(zeroed.data(), len, i);\n        EXPECT_EQ(0, got) << \"len=\" << len << \" zero check offset=\" << i;\n      }\n    }\n\n    // Test setting all bytes to 0x7F (all bits set in 7-bit ASCII).\n    {\n      vector<uint8_t> maxed(packed);\n      for (size_t i = 0; i < len; ++i) {\n        detail::ascii_pack_byte(maxed.data(), len, i, 0x7F);\n      }\n      for (size_t i = 0; i < len; ++i) {\n        uint8_t got = detail::ascii_unpack_byte(maxed.data(), len, i);\n        EXPECT_EQ(0x7F, got) << \"len=\" << len << \" max check offset=\" << i;\n      }\n    }\n  }\n}\n\nTEST_F(CompactObjectTest, IntSet) {\n  intset* is = intsetNew();\n  cobj_.InitRobj(OBJ_SET, kEncodingIntSet, is);\n\n  EXPECT_EQ(0, cobj_.Size());\n  is = (intset*)cobj_.RObjPtr();\n  uint8_t success = 0;\n\n  is = intsetAdd(is, 10, &success);\n  EXPECT_EQ(1, success);\n  is = intsetAdd(is, 10, &success);\n  EXPECT_EQ(0, success);\n  cobj_.SetRObjPtr(is);\n\n  EXPECT_GT(cobj_.MallocUsed(), 0);\n}\n\nTEST_F(CompactObjectTest, ZSet) {\n  // unrelated, checking that sds static encoding works.\n  // it is used in zset special strings.\n  char kMinStrData[] =\n      \"\\110\"\n      \"minstring\";\n  EXPECT_EQ(9, sdslen(kMinStrData + 1));\n\n  cobj_.InitRobj(OBJ_ZSET, OBJ_ENCODING_LISTPACK, lpNew(0));\n\n  EXPECT_EQ(OBJ_ZSET, cobj_.ObjType());\n  EXPECT_EQ(OBJ_ENCODING_LISTPACK, cobj_.Encoding());\n}\n\nTEST_F(CompactObjectTest, Hash) {\n  uint8_t* lp = lpNew(0);\n  lp = lpAppend(lp, reinterpret_cast<const uint8_t*>(\"foo\"), 3);\n  lp = lpAppend(lp, reinterpret_cast<const uint8_t*>(\"barrr\"), 5);\n  cobj_.InitRobj(OBJ_HASH, kEncodingListPack, lp);\n  EXPECT_EQ(OBJ_HASH, cobj_.ObjType());\n  EXPECT_EQ(1, cobj_.Size());\n}\n\nTEST_F(CompactObjectTest, SBF) {\n  cobj_.SetSBF(1000, 0.001, 2);\n  EXPECT_EQ(cobj_.ObjType(), OBJ_SBF);\n  EXPECT_GT(cobj_.MallocUsed(), 0);\n}\n\nTEST_F(CompactObjectTest, MimallocUnderutilzation) {\n  // We are testing with the same object size allocation here\n  // This test is for https://github.com/dragonflydb/dragonfly/issues/448\n  size_t allocation_size = 94;\n  int count = 2000;\n  std::vector<void*> ptrs = AllocateForTest(count, allocation_size);\n  bool found = HasUnderutilizedMemory(ptrs, kUnderUtilizedRatio);\n  ASSERT_FALSE(found);\n  DeallocateAtRandom(kRandomStep, &ptrs);\n  found = HasUnderutilizedMemory(ptrs, kUnderUtilizedRatio);\n  ASSERT_TRUE(found);\n  for (auto* ptr : ptrs) {\n    mi_free(ptr);\n  }\n}\n\nTEST_F(CompactObjectTest, MimallocUnderutilzationDifferentSizes) {\n  // This test uses different objects sizes to cover more use cases\n  // related to issue https://github.com/dragonflydb/dragonfly/issues/448\n  size_t allocation_size = 97;\n  int count = 2000;\n  int mem_factor_1 = 3;\n  int mem_factor_2 = 2;\n  std::vector<void*> ptrs = AllocateForTest(count, allocation_size, mem_factor_1, mem_factor_2);\n  bool found = HasUnderutilizedMemory(ptrs, kUnderUtilizedRatio);\n  ASSERT_FALSE(found);\n  DeallocateAtRandom(kRandomStep, &ptrs);\n  found = HasUnderutilizedMemory(ptrs, kUnderUtilizedRatio);\n  ASSERT_TRUE(found);\n  for (auto* ptr : ptrs) {\n    mi_free(ptr);\n  }\n}\n\nTEST_F(CompactObjectTest, MimallocUnderutilzationWithRealloc) {\n  // This test is checking underutilzation with reallocation as well as deallocation\n  // of the memory - see issue https://github.com/dragonflydb/dragonfly/issues/448\n  size_t allocation_size = 102;\n  int count = 2000;\n  int mem_factor_1 = 4;\n  int mem_factor_2 = 1;\n\n  std::vector<void*> ptrs = AllocateForTest(count, allocation_size, mem_factor_1, mem_factor_2);\n  bool found = HasUnderutilizedMemory(ptrs, kUnderUtilizedRatio);\n  ASSERT_FALSE(found);\n  DeallocateAtRandom(kRandomStep, &ptrs);\n\n  //  This is another case, where we are filling the \"gaps\" by doing re-allocations\n  //  in this case, since we are not setting all the values back it should still have\n  //  places that are not used. Plus since we are not looking at the first page\n  //  other pages should be underutilized.\n  for (size_t i = kRandomStartIndex; i < ptrs.size(); i += kRandomStep) {\n    if (!ptrs[i]) {\n      ptrs[i] = mi_heap_malloc(mi_heap_get_backing(), allocation_size);\n    }\n  }\n  found = HasUnderutilizedMemory(ptrs, kUnderUtilizedRatio);\n  ASSERT_TRUE(found);\n  for (auto* ptr : ptrs) {\n    mi_free(ptr);\n  }\n}\n\nTEST_F(CompactObjectTest, JsonTypeTest) {\n  using namespace jsoncons;\n  // This test verify that we can set a json type\n  // and that we \"know\", it JSON and not a string\n  std::string_view json_str = R\"(\n    {\"firstName\":\"John\",\"lastName\":\"Smith\",\"age\":27,\"weight\":135.25,\"isAlive\":true,\n    \"address\":{\"street\":\"21 2nd Street\",\"city\":\"New York\",\"state\":\"NY\",\"zipcode\":\"10021-3100\"},\n    \"phoneNumbers\":[{\"type\":\"home\",\"number\":\"212 555-1234\"},{\"type\":\"office\",\"number\":\"646 555-4567\"}],\n    \"children\":[],\"spouse\":null}\n  )\";\n  std::optional<JsonType> json_option2 =\n      ParseJsonUsingShardHeap(R\"({\"a\":{}, \"b\":{\"a\":1}, \"c\":{\"a\":1, \"b\":2}})\");\n\n  cobj_.SetString(json_str);\n  ASSERT_TRUE(cobj_.ObjType() == OBJ_STRING);  // we set this as a string\n  JsonType* failed_json = cobj_.GetJson();\n  ASSERT_TRUE(failed_json == nullptr);\n  ASSERT_TRUE(cobj_.ObjType() == OBJ_STRING);\n  std::optional<JsonType> json_option = ParseJsonUsingShardHeap(json_str);\n  ASSERT_TRUE(json_option.has_value());\n  cobj_.SetJson(std::move(json_option.value()));\n  ASSERT_TRUE(cobj_.ObjType() == OBJ_JSON);  // and now this is a JSON type\n  JsonType* json = cobj_.GetJson();\n  ASSERT_TRUE(json != nullptr);\n  ASSERT_TRUE(json->contains(\"firstName\"));\n  // set second object make sure that we don't have any memory issue\n  ASSERT_TRUE(json_option2.has_value());\n  cobj_.SetJson(std::move(json_option2.value()));\n  ASSERT_TRUE(cobj_.ObjType() == OBJ_JSON);  // still is a JSON type\n  json = cobj_.GetJson();\n  ASSERT_TRUE(json != nullptr);\n  ASSERT_TRUE(json->contains(\"b\"));\n  ASSERT_FALSE(json->contains(\"firstName\"));\n  std::optional<JsonType> set_array = ParseJsonUsingShardHeap(\"\");\n  // now set it to string again\n  cobj_.SetString(R\"({\"a\":{}, \"b\":{\"a\":1}, \"c\":{\"a\":1, \"b\":2}})\");\n  ASSERT_TRUE(cobj_.ObjType() == OBJ_STRING);  // we set this as a string\n  failed_json = cobj_.GetJson();\n  ASSERT_TRUE(failed_json == nullptr);\n}\n\nTEST_F(CompactObjectTest, JsonTypeWithPathTest) {\n  std::string_view books_json =\n      R\"({\"books\":[{\n            \"category\": \"fiction\",\n            \"title\" : \"A Wild Sheep Chase\",\n            \"author\" : \"Haruki Murakami\"\n        },{\n            \"category\": \"fiction\",\n            \"title\" : \"The Night Watch\",\n            \"author\" : \"Sergei Lukyanenko\"\n        },{\n            \"category\": \"fiction\",\n            \"title\" : \"The Comedians\",\n            \"author\" : \"Graham Greene\"\n        },{\n            \"category\": \"memoir\",\n            \"title\" : \"The Night Watch\",\n            \"author\" : \"Phillips, David Atlee\"\n        }]})\";\n  std::optional<JsonType> json_array = ParseJsonUsingShardHeap(books_json);\n  ASSERT_TRUE(json_array.has_value());\n  cobj_.SetJson(std::move(json_array.value()));\n  ASSERT_TRUE(cobj_.ObjType() == OBJ_JSON);  // and now this is a JSON type\n  auto f = [](const auto& /*path*/, JsonType& book) {\n    if (book.at(\"category\") == \"memoir\" && !book.contains(\"price\")) {\n      book.try_emplace(\"price\", 140.0);\n    }\n  };\n  JsonType* json = cobj_.GetJson();\n  ASSERT_TRUE(json != nullptr);\n  auto allocator_set = jsoncons::combine_allocators(json->get_allocator());\n  jsonpath::json_replace(allocator_set, *json, \"$.books[*]\"sv, f);\n\n  // Check whether we've changed the entry for json in place\n  // we should have prices only for memoir books\n  JsonType* json2 = cobj_.GetJson();\n  ASSERT_TRUE(json != nullptr);\n  ASSERT_TRUE(json->contains(\"books\"));\n  for (auto&& book : (*json2)[\"books\"].array_range()) {\n    // make sure that we add prices only to \"memoir\"\n    if (book.at(\"category\") == \"memoir\") {\n      ASSERT_TRUE(book.contains(\"price\"));\n    } else {\n      ASSERT_FALSE(book.contains(\"price\"));\n    }\n  }\n}\n\n// Test listpack defragmentation.\n// StringMap has built-in defragmantation that is tested in its own test suite.\nTEST_F(CompactObjectTest, DefragHash) {\n  auto build_str = [](size_t i) { return string(111, 'v') + to_string(i); };\n\n  vector<uint8_t*> lps(10'00);\n\n  for (size_t i = 0; i < lps.size(); i++) {\n    uint8_t* lp = lpNew(100);\n    for (size_t j = 0; j < 100; j++) {\n      auto s = build_str(j);\n      lp = lpAppend(lp, reinterpret_cast<const unsigned char*>(s.data()), s.length());\n    }\n    DCHECK_EQ(lpLength(lp), 100u);\n    lps[i] = lp;\n  }\n\n  for (size_t i = 0; i < lps.size(); i++) {\n    if (i % 10 == 0)\n      continue;\n    lpFree(lps[i]);\n  }\n\n  // Find a listpack that is located on a underutilized page\n  uint8_t* target_lp = nullptr;\n  PageUsage page_usage{CollectPageStats::NO, 0.8};\n  for (size_t i = 0; i < lps.size(); i += 10) {\n    if (page_usage.IsPageForObjectUnderUtilized(lps[i]))\n      target_lp = lps[i];\n  }\n  CHECK_NE(target_lp, nullptr);\n\n  // Trigger re-allocation\n  cobj_.InitRobj(OBJ_HASH, kEncodingListPack, target_lp);\n  ASSERT_TRUE(cobj_.DefragIfNeeded(&page_usage));\n\n  // Check the pointer changes as the listpack needed defragmentation\n  auto lp = (uint8_t*)cobj_.RObjPtr();\n  EXPECT_NE(lp, target_lp) << \"must have changed due to realloc\";\n\n  uint8_t* fptr = lpFirst(lp);\n  for (size_t i = 0; i < 100; i++) {\n    int64_t len;\n    auto* s = lpGet(fptr, &len, nullptr);\n\n    string_view sv{reinterpret_cast<const char*>(s), static_cast<uint64_t>(len)};\n    EXPECT_EQ(sv, build_str(i));\n\n    fptr = lpNext(lp, fptr);\n  }\n\n  for (size_t i = 0; i < lps.size(); i += 10) {\n    if (lps[i] != target_lp)\n      lpFree(lps[i]);\n  }\n}\n\nTEST_F(CompactObjectTest, DefragSet) {\n  // This is still not implemented\n  StringSet* s = CompactObj::AllocateMR<StringSet>();\n  s->Add(\"str\");\n  cobj_.InitRobj(OBJ_SET, kEncodingStrMap2, s);\n  PageUsage page_usage{CollectPageStats::NO, 0.8};\n  ASSERT_FALSE(cobj_.DefragIfNeeded(&page_usage));\n}\n\nTEST_F(CompactObjectTest, StrEncodingAndMaterialize) {\n  for (bool ascii : {true, false}) {\n    for (size_t len : {64, 128, 256, 512, 1024}) {\n      string test_str(len, 'a');\n      for (size_t i = 0; i < len; i++)\n        test_str[i] = char('a' + (i % 10));\n      if (!ascii)\n        test_str.push_back(char(200));  // non-ascii\n\n      CompactValue obj;\n      obj.SetString(test_str);\n\n      // Test StrEncoding helper\n      auto strs = obj.GetRawString();\n      string raw_str = string{strs[0]} + string{strs[1]};\n      CompactObj::StrEncoding enc = obj.GetStrEncoding();\n      EXPECT_EQ(test_str, enc.Decode(raw_str).Take());\n\n      // Test Materialize\n      obj.SetExternal(0, 0, CompactObj::ExternalRep::STRING);  // dummy values\n      obj.Materialize(raw_str, true);\n      EXPECT_EQ(test_str, obj.ToString());\n\n      // Restore from external again, but not as a raw value\n      obj.SetExternal(0, 0, CompactObj::ExternalRep::STRING);\n      auto test_str2 = test_str + \"updated\";\n      obj.Materialize(test_str2, false);\n      EXPECT_EQ(obj.ToString(), test_str2);\n    }\n  }\n}\n\nTEST_F(CompactObjectTest, ExternalRepresentation) {\n  {\n    CompactValue obj;\n    obj.SetString(\"test\");\n    obj.SetExternal(0, 4, CompactObj::ExternalRep::STRING);\n    EXPECT_EQ(obj.ObjType(), OBJ_STRING);\n  }\n  {\n    StringMap sm{};\n    CompactValue obj;\n    obj.SetRObjPtr(&sm);\n    obj.SetExternal(0, 4, CompactObj::ExternalRep::SERIALIZED_MAP);\n    EXPECT_EQ(obj.ObjType(), OBJ_HASH);\n  }\n}\n\nTEST_F(CompactObjectTest, AsanTriggerReadOverflow) {\n  cobj_.SetString(string(32, 'a'));\n  auto dest = make_unique<char[]>(32);\n  cobj_.GetString(dest.get());\n}\n\nTEST_F(CompactObjectTest, lpGetInteger) {\n  int64_t val = -1;\n  uint8_t* lp = lpNew(0);\n  for (int j = 0; j < 60; ++j) {\n    lp = lpAppendInteger(lp, val);\n    val *= 2;\n  }\n  val = 1;\n  for (int j = 0; j < 600; ++j) {\n    string str(j * 500, 'a');\n    lp = lpAppend(lp, reinterpret_cast<const uint8_t*>(str.data()), str.size());\n  }\n  uint8_t* ptr = lpFirst(lp);\n  while (ptr) {\n    int64_t len1, len2;\n    uint8_t* val1 = lpGet(ptr, &len1, nullptr);\n    int res = lpGetInteger(ptr, &len2);\n    if (res) {\n      ASSERT_EQ(len1, len2);\n      ASSERT_TRUE(val1 == NULL);\n    } else {\n      ASSERT_TRUE(val1 != NULL);\n    }\n    ptr = lpNext(lp, ptr);\n  }\n  lpFree(lp);\n}\n\nstatic void BuildEncoderAB(HuffmanEncoder* encoder) {\n  array<unsigned, 256> hist;\n  hist.fill(1);\n  hist['a'] = 100;\n  hist['b'] = 50;\n  CHECK(encoder->Build(hist.data(), hist.size() - 1, nullptr));\n}\n\nTEST_F(CompactObjectTest, Huffman) {\n  HuffmanEncoder encoder;\n  BuildEncoderAB(&encoder);\n  string bindata = encoder.Export();\n\n  for (CompactObj::HuffmanDomain domain : {CompactObj::HUFF_KEYS, CompactObj::HUFF_STRING_VALUES}) {\n    ASSERT_TRUE(CompactObj::InitHuffmanThreadLocal(domain, bindata));\n    for (unsigned i = 30; i < 2048; i += 10) {\n      string data(i, 'a');\n\n      variant<CompactKey, CompactValue> obj_backing;\n      if (domain)\n        obj_backing = CompactValue{};\n      auto& cobj = visit([&](auto& co) -> CompactObj& { return co; }, obj_backing);\n\n      visit([&](auto& co) { co.SetString(data); }, obj_backing);\n      bool malloc_used = i >= 60;\n      ASSERT_EQ(malloc_used, cobj.MallocUsed() > 0) << i;\n      ASSERT_EQ(data.size(), cobj.Size());\n      ASSERT_EQ(CompactObj::HashCode(data), cobj.HashCode());\n\n      string actual;\n      cobj.GetString(&actual);\n      EXPECT_EQ(data, actual);\n      visit(absl::Overload{[&](CompactKey& co) { EXPECT_EQ(co, data); }, [&](CompactValue& co) {}},\n            obj_backing);\n    }\n  }\n}\n\nTEST_F(CompactObjectTest, GetByteAtOffset) {\n  // Inline string (INLINE_TAG)\n  {\n    string s = \"hello\";\n    cobj_.SetString(s);\n    for (size_t i = 0; i < s.size(); ++i) {\n      uint8_t res = 0;\n      EXPECT_TRUE(cobj_.GetByteAtIndex(i, &res));\n      EXPECT_EQ(s[i], res) << \"inline offset \" << i;\n    }\n  }\n\n  // Integer-encoded string (INT_TAG)\n  {\n    cobj_.SetString(\"12345\");\n    string expected = \"12345\";\n    for (size_t i = 0; i < expected.size(); ++i) {\n      uint8_t res = 0;\n      EXPECT_TRUE(cobj_.GetByteAtIndex(i, &res));\n      EXPECT_EQ(expected[i], res) << \"int offset \" << i;\n    }\n  }\n\n  //  ASCII string with SMALL_TAG\n  {\n    string s(64, 'x');\n    for (size_t i = 0; i < s.size(); ++i)\n      s[i] = 'a' + (i % 26);\n    cobj_.SetString(s);\n    for (size_t i = 0; i < s.size(); ++i) {\n      uint8_t res = 0;\n      EXPECT_TRUE(cobj_.GetByteAtIndex(i, &res));\n      EXPECT_EQ(static_cast<uint8_t>(s[i]), res) << \"long ascii offset \" << i;\n    }\n  }\n\n  // Non-ASCII string with SMALL_TAG\n  {\n    string s(64, '\\xC0');\n    for (size_t i = 0; i < s.size(); ++i)\n      s[i] = static_cast<char>(128 + (i % 128));\n    cobj_.SetString(s);\n    for (size_t i = 0; i < s.size(); ++i) {\n      uint8_t res = 0;\n      EXPECT_TRUE(cobj_.GetByteAtIndex(i, &res));\n      EXPECT_EQ(static_cast<uint8_t>(s[i]), res) << \"non-ascii offset \" << i;\n    }\n  }\n\n  // ASCII string ROBJ_TAG\n  {\n    string s(512, 'z');\n    for (size_t i = 0; i < s.size(); ++i)\n      s[i] = 'A' + (i % 26);\n    cobj_.SetString(s);\n    for (size_t i = 0; i < s.size(); ++i) {\n      uint8_t res = 0;\n      EXPECT_TRUE(cobj_.GetByteAtIndex(i, &res));\n      EXPECT_EQ(static_cast<uint8_t>(s[i]), res) << \"medium offset \" << i;\n    }\n  }\n\n  // Non-ASCII string ROBJ_TAG\n  {\n    string s(512, 'z');\n    for (size_t i = 0; i < s.size(); ++i)\n      s[i] = static_cast<char>(128 + (i % 128));\n    cobj_.SetString(s);\n    for (size_t i = 0; i < s.size(); ++i) {\n      uint8_t res = 0;\n      EXPECT_TRUE(cobj_.GetByteAtIndex(i, &res));\n      EXPECT_EQ(static_cast<uint8_t>(s[i]), res) << \"medium offset \" << i;\n    }\n  }\n\n  cobj_.Reset();\n}\n\nTEST_F(CompactObjectTest, SetByteAtOffset) {\n  // Inline string (INLINE_TAG)\n  {\n    string s = \"abcde\";\n    cobj_.SetString(s);\n    for (size_t i = 0; i < s.size(); ++i) {\n      std::pair<bool, bool> res_set_byte = cobj_.SetByteAtIndex(i, 'Z');\n      EXPECT_TRUE(res_set_byte.first);\n      EXPECT_TRUE(res_set_byte.second);\n      uint8_t res = 0;\n      EXPECT_TRUE(cobj_.GetByteAtIndex(i, &res));\n      EXPECT_EQ('Z', res) << \"inline set offset \" << i;\n    }\n    // All bytes should now be 'Z'\n    string result;\n    cobj_.GetString(&result);\n    EXPECT_EQ(string(5, 'Z'), result);\n  }\n\n  // Integer-encoded string (INT_TAG)\n  {\n    cobj_.SetString(\"999\");\n    std::pair<bool, bool> res_set_byte = cobj_.SetByteAtIndex(0, 'x');\n    EXPECT_TRUE(res_set_byte.first);\n    // We didn't modify in-place, SetString is called\n    EXPECT_FALSE(res_set_byte.second);\n    string result;\n    cobj_.GetString(&result);\n    EXPECT_EQ(\"x99\", result);\n  }\n\n  // ASCII string with SMALL_TAG\n  {\n    string s(64, 'a');\n    for (size_t i = 0; i < s.size(); ++i)\n      s[i] = 'a' + (i % 26);\n    cobj_.SetString(s);\n\n    // Modify every 10th byte\n    for (size_t i = 0; i < s.size(); i += 10) {\n      std::pair<bool, bool> res_set_byte = cobj_.SetByteAtIndex(i, '!');\n      EXPECT_TRUE(res_set_byte.first);\n      EXPECT_FALSE(res_set_byte.second);\n      s[i] = '!';\n    }\n\n    // Verify all bytes\n    for (size_t i = 0; i < s.size(); ++i) {\n      uint8_t res = 0;\n      EXPECT_TRUE(cobj_.GetByteAtIndex(i, &res));\n      EXPECT_EQ(static_cast<uint8_t>(s[i]), res) << \"long ascii set offset \" << i;\n    }\n  }\n\n  // Non-ASCII string with SMALL_TAG\n  {\n    string s(64, '\\x80');\n    for (size_t i = 0; i < s.size(); ++i)\n      s[i] = static_cast<char>(128 + (i % 128));\n    cobj_.SetString(s);\n\n    std::pair<bool, bool> res_set_byte = cobj_.SetByteAtIndex(63, 0xFF);\n    EXPECT_TRUE(res_set_byte.first);\n    EXPECT_FALSE(res_set_byte.second);\n    s[63] = '\\xFF';\n\n    for (size_t i = 0; i < s.size(); ++i) {\n      uint8_t res = 0;\n      EXPECT_TRUE(cobj_.GetByteAtIndex(i, &res));\n      EXPECT_EQ(static_cast<uint8_t>(s[i]), res) << \"non-ascii set offset \" << i;\n    }\n  }\n\n  // ASCII string with ROBJ_TAG\n  {\n    string s(512, 'a');\n    for (size_t i = 0; i < s.size(); ++i)\n      s[i] = 'a' + (i % 26);\n    cobj_.SetString(s);\n\n    // Modify every 10th byte\n    for (size_t i = 0; i < s.size(); i += 10) {\n      std::pair<bool, bool> res_set_byte = cobj_.SetByteAtIndex(i, '!');\n      EXPECT_TRUE(res_set_byte.first);\n      EXPECT_TRUE(res_set_byte.second);\n      s[i] = '!';\n    }\n\n    // Verify all bytes\n    for (size_t i = 0; i < s.size(); ++i) {\n      uint8_t res = 0;\n      EXPECT_TRUE(cobj_.GetByteAtIndex(i, &res));\n      EXPECT_EQ(static_cast<uint8_t>(s[i]), res) << \"long ascii set offset \" << i;\n    }\n  }\n\n  // ASCII string with ROBJ_TAG modified to non-ASCII\n  {\n    string s(512, 'a');\n    for (size_t i = 0; i < s.size(); ++i)\n      s[i] = 'a' + (i % 26);\n    cobj_.SetString(s);\n\n    // Modify in-place ascii packed string\n    std::pair<bool, bool> res_set_byte = cobj_.SetByteAtIndex(0, 'A');\n    EXPECT_TRUE(res_set_byte.first);\n    EXPECT_TRUE(res_set_byte.second);\n\n    // Adding non-ascii byte modification should still succeed, but not in-place\n    res_set_byte = cobj_.SetByteAtIndex(255, 0xFF);\n    EXPECT_TRUE(res_set_byte.first);\n    EXPECT_FALSE(res_set_byte.second);\n\n    // Modification of non-ascii ROBJ string should succeed and in-place\n    res_set_byte = cobj_.SetByteAtIndex(511, 'C');\n    EXPECT_TRUE(res_set_byte.first);\n    EXPECT_TRUE(res_set_byte.second);\n\n    uint8_t res;\n    EXPECT_TRUE(cobj_.GetByteAtIndex(0, &res));\n    EXPECT_EQ('A', res);\n    EXPECT_TRUE(cobj_.GetByteAtIndex(255, &res));\n    EXPECT_EQ(0xFF, res);\n    EXPECT_TRUE(cobj_.GetByteAtIndex(511, &res));\n    EXPECT_EQ('C', res);\n  }\n\n  // Out-of-bounds access should be handled gracefully.\n  {\n    string s = \"abc\";\n    cobj_.SetString(s);\n    // SetByteAtIndex: index equal to size() is out-of-bounds.\n    auto res_pair = cobj_.SetByteAtIndex(s.size(), 'X');\n    EXPECT_FALSE(res_pair.first);\n    EXPECT_FALSE(res_pair.second);\n    // GetByteAtIndex: out-of-bounds should set result to 0.\n    uint8_t res = 123;  // sentinel non-zero value\n    EXPECT_FALSE(cobj_.GetByteAtIndex(s.size(), &res));\n    EXPECT_EQ(0u, res);\n  }\n\n  cobj_.Reset();\n}\n\nstatic void ascii_pack_naive(const char* ascii, size_t len, uint8_t* bin) {\n  const char* end = ascii + len;\n\n  unsigned i = 0;\n  while (ascii + 8 <= end) {\n    for (i = 0; i < 7; ++i) {\n      *bin++ = (ascii[0] >> i) | (ascii[1] << (7 - i));\n      ++ascii;\n    }\n    ++ascii;\n  }\n\n  // epilog - we do not pack since we have less than 8 bytes.\n  while (ascii < end) {\n    *bin++ = *ascii++;\n  }\n}\n\nstatic void BM_PackNaive(benchmark::State& state) {\n  string val(1024, 'a');\n  uint8_t buf[1024];\n\n  while (state.KeepRunning()) {\n    ascii_pack_naive(val.data(), val.size(), buf);\n  }\n}\nBENCHMARK(BM_PackNaive);\n\nstatic void BM_Pack(benchmark::State& state) {\n  string val(1024, 'a');\n  uint8_t buf[1024];\n\n  while (state.KeepRunning()) {\n    detail::ascii_pack(val.data(), val.size(), buf);\n  }\n}\nBENCHMARK(BM_Pack);\n\nstatic void BM_PackSimd(benchmark::State& state) {\n  string val(1024, 'a');\n  uint8_t buf[1024];\n\n  while (state.KeepRunning()) {\n    detail::ascii_pack_simd(val.data(), val.size(), buf);\n  }\n}\nBENCHMARK(BM_PackSimd);\n\nstatic void BM_PackSimd2(benchmark::State& state) {\n  string val(1024, 'a');\n  uint8_t buf[1024];\n\n  while (state.KeepRunning()) {\n    detail::ascii_pack_simd2(val.data(), val.size(), buf);\n  }\n}\nBENCHMARK(BM_PackSimd2);\n\nstatic void BM_Unpack(benchmark::State& state) {\n  string val(1024, 'a');\n  uint8_t buf[1024];\n\n  detail::ascii_pack(val.data(), val.size(), buf);\n\n  while (state.KeepRunning()) {\n    detail::ascii_unpack(buf, val.size(), val.data());\n  }\n}\nBENCHMARK(BM_Unpack);\n\nstatic void BM_UnpackSimd(benchmark::State& state) {\n  string val(1024, 'a');\n  uint8_t buf[1024];\n\n  detail::ascii_pack(val.data(), val.size(), buf);\n\n  while (state.KeepRunning()) {\n    detail::ascii_unpack_simd(buf, val.size(), val.data());\n  }\n}\nBENCHMARK(BM_UnpackSimd);\n\nstatic void BM_LpCompare(benchmark::State& state) {\n  std::mt19937_64 rd;\n  uint8_t* lp = lpNew(0);\n  for (unsigned i = 0; i < 100; ++i) {\n    lp = lpAppendInteger(lp, rd() % (1ULL << 48));\n  }\n\n  string val = absl::StrCat(1ULL << 49);\n  while (state.KeepRunning()) {\n    uint8_t* elem = lpLast(lp);\n    while (elem) {\n      lpCompare(elem, reinterpret_cast<const uint8_t*>(val.data()), val.size());\n      elem = lpPrev(lp, elem);\n    }\n  }\n  lpFree(lp);\n}\nBENCHMARK(BM_LpCompare);\n\nstatic void BM_LpCompareInt(benchmark::State& state) {\n  std::mt19937_64 rd;\n  uint8_t* lp = lpNew(0);\n  for (unsigned i = 0; i < 100; ++i) {\n    lp = lpAppendInteger(lp, rd() % (1ULL << 48));\n  }\n\n  int64_t val = 1ULL << 49;\n  while (state.KeepRunning()) {\n    uint8_t* elem = lpLast(lp);\n    int64_t sz;\n    while (elem) {\n      DCHECK_NE(0xFF, *elem);\n      lpGetInteger(elem, &sz);\n      int res = sz == val;\n      benchmark::DoNotOptimize(res);\n      elem = lpPrev(lp, elem);\n    }\n  }\n  lpFree(lp);\n}\nBENCHMARK(BM_LpCompareInt);\n\nstatic void BM_LpGet(benchmark::State& state) {\n  unsigned version = state.range(0);\n  uint8_t* lp = lpNew(0);\n  int64_t val = -1;\n  for (unsigned i = 0; i < 60; ++i) {\n    lp = lpAppendInteger(lp, val);\n    val *= 2;\n  }\n\n  while (state.KeepRunning()) {\n    uint8_t* elem = lpLast(lp);\n    int64_t ival;\n    if (version == 1) {\n      while (elem) {\n        unsigned char* value = lpGet(elem, &ival, NULL);\n        benchmark::DoNotOptimize(value);\n        elem = lpPrev(lp, elem);\n      }\n    } else {\n      while (elem) {\n        int res = lpGetInteger(elem, &ival);\n        benchmark::DoNotOptimize(res);\n        elem = lpPrev(lp, elem);\n      }\n    }\n  }\n  lpFree(lp);\n}\nBENCHMARK(BM_LpGet)->Arg(1)->Arg(2);\n\nextern \"C\" int lpStringToInt64(const char* s, unsigned long slen, int64_t* value);\n\nstatic void BM_LpString2Int(benchmark::State& state) {\n  int version = state.range(0);\n  std::mt19937_64 rd;\n  vector<string> values;\n  for (unsigned i = 0; i < 1000; ++i) {\n    int64_t val = rd();\n    values.push_back(absl::StrCat(val));\n  }\n\n  int64_t ival = 0;\n  while (state.KeepRunning()) {\n    for (const auto& val : values) {\n      int res = version == 1 ? lpStringToInt64(val.data(), val.size(), &ival)\n                             : absl::SimpleAtoi(val, &ival);\n      benchmark::DoNotOptimize(res);\n    }\n  }\n}\nBENCHMARK(BM_LpString2Int)->Arg(1)->Arg(2);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/dash.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <vector>\n\n#include \"absl/random/random.h\"\n#include \"base/pmr/memory_resource.h\"\n#include \"core/dash_internal.h\"\n\nnamespace dfly {\n\n// DASH: Dynamic And Scalable Hashing.\n\ntemplate <typename _Key, typename _Value, typename Policy>\nclass DashTable : public detail::DashTableBase {\n  DashTable(const DashTable&) = delete;\n  DashTable& operator=(const DashTable&) = delete;\n\n  using Base = detail::DashTableBase;\n  using SegmentType = detail::Segment<_Key, _Value, Policy>;\n  using SegmentIterator = typename SegmentType::Iterator;\n\n public:\n  using Key_t = _Key;\n  using Value_t = _Value;\n  using Segment_t = SegmentType;\n\n  //! Total number of buckets in a segment (including stash).\n  static constexpr double kTaxAmount = SegmentType::kTaxSize;\n  static constexpr size_t kSegBytes = sizeof(SegmentType);\n\n  // How many bytes the non-stash part is taking.\n  static constexpr size_t kSegRegularBytes =\n      kSegBytes - (SegmentType::kStashBucketNum * SegmentType::kBucketSz);\n\n  static constexpr size_t kSegCapacity = SegmentType::capacity();\n  static constexpr size_t kSlotNum = SegmentType::kSlotNum;\n  static constexpr size_t kBucketNum = SegmentType::kBucketNum;\n\n  // if IsSingleBucket is true - iterates only over a single bucket.\n  template <bool IsConst, bool IsSingleBucket = false> class Iterator;\n\n  using const_iterator = Iterator<true>;\n  using iterator = Iterator<false>;\n\n  using const_bucket_iterator = Iterator<true, true>;\n  using bucket_iterator = Iterator<false, true>;\n  using Cursor = detail::DashCursor;\n\n  struct HotBuckets {\n    static constexpr size_t kRegularBuckets = 4;\n    static constexpr size_t kNumBuckets = kRegularBuckets + SegmentType::kStashBucketNum;\n\n    struct ByType {\n      bucket_iterator regular_buckets[kRegularBuckets];\n      bucket_iterator stash_buckets[SegmentType::kStashBucketNum];\n    };\n\n    union Probes {\n      ByType by_type;\n      bucket_iterator arr[kNumBuckets];\n\n      Probes() : arr() {\n      }\n    } probes;\n\n    // id must be in the range [0, kNumBuckets).\n    bucket_iterator at(unsigned id) const {\n      return probes.arr[id];\n    }\n\n    unsigned num_buckets;\n    // key_hash of a key that we try to insert.\n    // I use it as pseudo-random number in my gc/eviction heuristics.\n    uint64_t key_hash;\n  };\n\n  struct DefaultEvictionPolicy {\n    static constexpr bool can_gc = false;\n    static constexpr bool can_evict = false;\n\n    bool CanGrow(const DashTable&) {\n      return true;\n    }\n\n    void OnMove(Cursor source, Cursor dest) {\n    }\n\n    void RecordSplit(SegmentType* segment) {\n    }\n    /*\n       /// Required interface in case can_gc is true\n       // Returns number of garbage collected items deleted. 0 - means nothing has been\n       // deleted.\n       unsigned GarbageCollect(const EvictionBuckets& eb, DashTable* me) const {\n         return 0;\n       }\n\n       // Required interface in case can_gc is true\n       // returns number of items evicted from the table.\n       // 0 means - nothing has been evicted.\n       unsigned Evict(const EvictionBuckets& eb, DashTable* me) {\n         return 0;\n       }\n   */\n  };\n\n  DashTable(size_t capacity_log = 1, const Policy& policy = Policy{},\n            PMR_NS::memory_resource* mr = PMR_NS::get_default_resource());\n  ~DashTable();\n\n  void Reserve(size_t size);\n\n  // false for duplicate, true if inserted.\n  template <typename U, typename V> std::pair<iterator, bool> Insert(U&& key, V&& value) {\n    DefaultEvictionPolicy policy;\n    return InsertInternal(std::forward<U>(key), std::forward<V>(value), policy,\n                          InsertMode::kInsertIfNotFound);\n  }\n\n  template <typename U, typename V, typename EvictionPolicy>\n  std::pair<iterator, bool> Insert(U&& key, V&& value, EvictionPolicy& ev) {\n    return InsertInternal(std::forward<U>(key), std::forward<V>(value), ev,\n                          InsertMode::kInsertIfNotFound);\n  }\n\n  template <typename U, typename V> iterator InsertNew(U&& key, V&& value) {\n    DefaultEvictionPolicy policy;\n    return InsertNew(std::forward<U>(key), std::forward<V>(value), policy);\n  }\n\n  template <typename U, typename V, typename EvictionPolicy>\n  iterator InsertNew(U&& key, V&& value, EvictionPolicy& ev) {\n    return InsertInternal(std::forward<U>(key), std::forward<V>(value), ev,\n                          InsertMode::kForceInsert)\n        .first;\n  }\n\n  template <typename U> const_iterator Find(U&& key) const;\n  template <typename U> iterator Find(U&& key);\n\n  // Prefetches the memory where the key would resize into the cache.\n  template <typename U> void Prefetch(U&& key) const;\n\n  // Find first entry with given key hash that evaulates to true on pred.\n  // Pred accepts either (const key&) or (const key&, const value&)\n  template <typename Pred> iterator FindFirst(uint64_t key_hash, Pred&& pred);\n\n  // it must be valid.\n  void Erase(iterator it);\n\n  size_t Erase(const Key_t& k);\n\n  iterator begin() {\n    iterator it{this, 0, 0, 0};\n    it.Seek2Occupied();\n    return it;\n  }\n\n  const_iterator cbegin() const {\n    const_iterator it{this, 0, 0, 0};\n    it.Seek2Occupied();\n    return it;\n  }\n\n  iterator end() const {\n    return iterator{};\n  }\n  const_iterator cend() const {\n    return const_iterator{};\n  }\n\n  using Base::depth;\n  using Base::Empty;\n  using Base::size;\n  using Base::unique_segments;\n\n  // Direct access to the segment for debugging purposes.\n  Segment_t* GetSegment(unsigned segment_id) {\n    return segment_[segment_id];\n  }\n\n  // - If there is no buddy for segment_id return segment_id.\n  //   Otherwise, return buddy_id.\n  // - A buddy is a sibling segment that was created from the\n  //   same parent during split and can be merged back together.\n  //   It's the adjacent subtree of the same depth.\n  unsigned FindBuddyId(unsigned segment_id) {\n    auto* seg = GetSegment(segment_id);\n    uint8_t depth = seg->local_depth();\n\n    if (depth <= 1) {\n      return segment_id;\n    }\n\n    const size_t bit_pos = global_depth_ - depth;\n    const size_t buddy_idx = segment_id ^ (1u << bit_pos);\n    assert(buddy_idx < segment_.size());\n\n    auto* buddy = GetSegment(buddy_idx);\n    // There is no adjacent subtree of the same depth\n    if (buddy->local_depth() != depth) {\n      return segment_id;\n    }\n\n    return buddy_idx;\n  }\n\n  // - Moves all items from `buddy_id` to `keep_id` (merges the two segments).\n  //   After merge completes, `buddy_id` segment is deleted.\n  // - Return true if the two segments merged successfully.\n  // - If an insertion fails we rollback and abort the merge (return false).\n  // - Merge can run only if there are no active snapshots.\n  // - Prefer calling this function only when the combined size of both segments\n  //   than x * segment_capacity. With x: 0 < x < 0.25 as statistically this won't\n  //   trigger rollbacks.\n  bool Merge(unsigned keep_id, unsigned buddy_id) {\n    auto* keep = GetSegment(keep_id);\n    auto* buddy = GetSegment(buddy_id);\n\n    assert((keep->local_depth() == buddy->local_depth()));\n    // assert((keep->SlowSize() + buddy->SlowSize() < (0.25 * buddy->capacity())));\n    assert(keep->local_depth() != 1);\n    assert(keep != buddy);\n    assert(keep_id < buddy_id);  // Callers must iterate low to high to ensure correct orientation\n\n    // Don't merge below initial_depth to maintain Clear() invariant\n    // After merge, keep will have depth-1, which determines unique_segments\n    uint8_t depth_after_merge = keep->local_depth() - 1;\n    if (depth_after_merge < initial_depth_) {\n      return false;\n    }\n\n    bool should_rollback = false;\n\n    // Decrease depth (merge back to parent)\n    keep->set_local_depth(keep->local_depth() - 1);\n\n    // Move all items from buddy to keep\n    buddy->TraverseAll([&](const auto& it) {\n      if (should_rollback) {\n        return;\n      }\n\n      uint64_t hash = DoHash(buddy->Key(it.index, it.slot));\n\n      auto& src_bucket = buddy->GetBucket(it.index);\n      auto res =\n          keep->InsertUniq(std::move(src_bucket.key[it.slot]), std::move(src_bucket.value[it.slot]),\n                           hash, false, [](auto&&...) {});\n\n      if (!res.found()) {\n        should_rollback = true;\n        return;\n      }\n\n      // Clear the slot in buddy so rollback can reuse the space\n      src_bucket.Delete(it.slot);\n    });\n\n    if (should_rollback) {\n      auto hash_fn = [this](const auto& k) { return policy_.HashFn(k); };\n      keep->Split(hash_fn, buddy, [](auto&&...) {});\n\n      return false;\n    }\n\n    // Same as Split()\n    uint32_t buddy_chunk_size = 1u << (global_depth_ - buddy->local_depth());\n    uint32_t buddy_start = buddy_id & ~(buddy_chunk_size - 1u);\n    for (size_t i = buddy_start; i < buddy_start + buddy_chunk_size; ++i) {\n      segment_[i] = keep;\n    }\n\n    // Free buddy segment\n    PMR_NS::polymorphic_allocator<SegmentType> pa(segment_.get_allocator());\n    using alloc_traits = std::allocator_traits<decltype(pa)>;\n    alloc_traits::destroy(pa, buddy);\n    alloc_traits::deallocate(pa, buddy, 1);\n\n    // Decrement unique segment counter\n    --unique_segments_;\n    bucket_count_ -= keep->num_buckets();\n\n    return true;\n  }\n\n  size_t GetSegmentCount() const {\n    return segment_.size();\n  }\n\n  size_t NextSeg(size_t sid) const {\n    size_t delta = (1u << (global_depth_ - segment_[sid]->local_depth()));\n    return sid + delta;\n  }\n\n  template <typename U> uint64_t DoHash(const U& k) const {\n    return policy_.HashFn(k);\n  }\n\n  // Flat memory usage (allocated) of the table, not including the the memory allocated\n  // by the hosted objects.\n  size_t mem_usage() const {\n    return segment_.capacity() * sizeof(void*) + sizeof(SegmentType) * unique_segments_;\n  }\n\n  // Returns the total number of buckets in the table, in contrast to capacity() which\n  // returns the total number of slots.\n  size_t bucket_count() const {\n    return bucket_count_;\n  }\n\n  // Overall capacity of the table (including stash buckets) in number of keys.\n  size_t capacity() const {\n    return bucket_count() * kSlotNum;\n  }\n\n  double load_factor() const {\n    return double(size()) / capacity();\n  }\n\n  static constexpr unsigned LargestBucketId() {\n    return SegmentType::kBucketNum + SegmentType::kStashBucketNum - 1;\n  }\n\n  // Gets a random cursor based on the available segments and buckets.\n  // Returns: cursor with a random position\n  Cursor GetRandomCursor(absl::BitGen* bitgen);\n\n  // Traverses over a single logical bucket in table and calls cb(iterator) 0 or more\n  // times. if cursor=0 starts traversing from the beginning, otherwise continues from where it\n  // stopped. returns 0 if the supplied cursor reached end of traversal. Traverse iterates at bucket\n  // logical granularity, which means for each non-empty bucket it calls cb per each entry in the\n  // logical bucket before returning. Unlike begin/end interface, traverse is stable during table\n  // mutations. It guarantees that if key exists (1)at the beginning of traversal, (2) stays in the\n  // table during the traversal, then Traverse() will eventually reach it even when the table\n  // shrinks or grows. Returns: cursor that is guaranteed to be less than 2^40.\n  template <typename Cb> Cursor Traverse(Cursor curs, Cb&& cb);\n\n  // Traverses over physical buckets. It calls cb once for each bucket by passing a bucket iterator.\n  // if cursor=0 starts traversing from the beginning, otherwise continues from where\n  // it stopped. returns 0 if the supplied cursor reached end of traversal.\n  // Unlike Traverse, TraverseBuckets calls cb once on bucket iterator and not on each entry in\n  // bucket. TraverseBuckets is stable during table mutations. It guarantees traversing all buckets\n  // that existed at the beginning of traversal.\n  template <typename Cb> Cursor TraverseBuckets(Cursor curs, Cb&& cb);\n\n  // Traverses over a single bucket in table and calls cb(iterator). The traverse order will be\n  // segment by segment over physical backets.\n  // traverse by segment order does not guarantees coverage if the table grows/shrinks, it is useful\n  // when formal full coverage is not critically important.\n  template <typename Cb> Cursor TraverseBySegmentOrder(Cursor curs, Cb&& cb);\n\n  // Discards slots information.\n  static const_bucket_iterator BucketIt(const_iterator it) {\n    return const_bucket_iterator{it.owner_, it.seg_id_, it.bucket_id_, 0};\n  }\n\n  // Seeks to the first occupied slot if exists in the bucket.\n  const_bucket_iterator BucketIt(unsigned segment_id, unsigned bucket_id) const {\n    return const_bucket_iterator{this, segment_id, uint8_t(bucket_id)};\n  }\n\n  bucket_iterator BucketIt(unsigned segment_id, unsigned bucket_id) {\n    return bucket_iterator{this, segment_id, uint8_t(bucket_id)};\n  }\n\n  iterator GetIterator(unsigned segment_id, unsigned bucket_id, unsigned slot_id) {\n    return iterator{this, segment_id, uint8_t(bucket_id), uint8_t(slot_id)};\n  }\n\n  const_bucket_iterator CursorToBucketIt(Cursor c) const {\n    return const_bucket_iterator{this, c.segment_id(global_depth_), c.bucket_id(), 0};\n  }\n  bucket_iterator CursorToBucketIt(Cursor c) {\n    return bucket_iterator{this, c.segment_id(global_depth_), c.bucket_id(), 0};\n  }\n\n  // Capture Version Change. Runs cb(it) on every bucket! (not entry) in the table whose version\n  // would potentially change upon insertion of 'k'.\n  // In practice traversal is limited to a single segment. The operation is read-only and\n  // simulates insertion process. 'cb' must accept bucket_iterator.\n  // Note: the interface a bit hacky.\n  // The functions call cb on physical buckets with version smaller than ver_threshold that\n  // due to entry movements might update its version to version greater than ver_threshold.\n  //\n  // These are not const functions because they send non-const iterators that allow\n  // updating contents/versions of the passed iterators.\n  template <typename U, typename Cb>\n  void CVCUponInsert(uint64_t ver_threshold, const U& key, Cb&& cb);\n\n  template <typename Cb> void CVCUponBump(uint64_t ver_threshold, const_iterator it, Cb&& cb);\n\n  void Clear();\n\n  // Returns true if an element was deleted i.e the rightmost slot was busy.\n  bool ShiftRight(bucket_iterator it);\n\n  template <typename BumpPolicy> iterator BumpUp(iterator it, BumpPolicy& bp) {\n    SegmentIterator seg_it = segment_[it.seg_id_]->BumpUp(\n        it.bucket_id_, it.slot_id_, DoHash(it->first), bp,\n        [&](uint32_t segment_id, detail::PhysicalBid from, detail::PhysicalBid to) {\n          // OnMove is used to notify policy about the items moves across buckets.\n          bp.OnMove(Cursor{global_depth_, segment_id, from}, Cursor{global_depth_, segment_id, to});\n        });\n\n    return iterator{this, it.seg_id_, seg_it.index, seg_it.slot};\n  }\n\n  uint64_t garbage_collected() const {\n    return garbage_collected_;\n  }\n\n  uint64_t stash_unloaded() const {\n    return stash_unloaded_;\n  }\n\n private:\n  enum class InsertMode {\n    kInsertIfNotFound,\n    kForceInsert,\n  };\n\n  Cursor AdvanceCursorBucketOrder(Cursor cursor);\n\n  template <typename U, typename V, typename EvictionPolicy>\n  std::pair<iterator, bool> InsertInternal(U&& key, V&& value, EvictionPolicy& policy,\n                                           InsertMode mode);\n\n  void IncreaseDepth(unsigned new_depth);\n  template <typename EvictionPolicy> void Split(uint32_t seg_id, EvictionPolicy& ev);\n\n  // Segment directory contains multiple segment pointers, some of them pointing to\n  // the same object. IterateDistinct goes over all distinct segments in the table.\n  template <typename Cb> void IterateDistinct(Cb&& cb);\n\n  template <typename K> auto EqPred(const K& key) const {\n    return [p = &policy_, &key](const auto& probe) -> bool { return p->Equal(probe, key); };\n  }\n\n  SegmentType* ConstructSegment(uint8_t depth, uint32_t id) {\n    auto* mr = segment_.get_allocator().resource();\n    PMR_NS::polymorphic_allocator<SegmentType> pa(mr);\n    SegmentType* res = pa.allocate(1);\n    pa.construct(res, depth, id, mr);  //   new SegmentType(depth);\n    bucket_count_ += res->num_buckets();\n    return res;\n  }\n\n  Policy policy_;\n  std::vector<SegmentType*, PMR_NS::polymorphic_allocator<SegmentType*>> segment_;\n\n  uint64_t garbage_collected_ = 0;\n  uint64_t stash_unloaded_ = 0;\n};  // DashTable\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <bool IsConst, bool IsSingleBucket>\nclass DashTable<_Key, _Value, Policy>::Iterator {\n  using Owner = std::conditional_t<IsConst, const DashTable, DashTable>;\n\n  Owner* owner_;\n  uint32_t seg_id_;\n  detail::PhysicalBid bucket_id_;\n  uint8_t slot_id_;\n\n  friend class DashTable;\n\n  Iterator(Owner* me, uint32_t seg_id, detail::PhysicalBid bid, uint8_t sid)\n      : owner_(me), seg_id_(seg_id), bucket_id_(bid), slot_id_(sid) {\n  }\n\n  Iterator(Owner* me, uint32_t seg_id, detail::PhysicalBid bid)\n      : owner_(me), seg_id_(seg_id), bucket_id_(bid), slot_id_(0) {\n    Seek2Occupied();\n  }\n\n public:\n  using iterator_category = std::forward_iterator_tag;\n  using difference_type = std::ptrdiff_t;\n  using IteratorPairType =\n      std::conditional_t<IsConst, detail::IteratorPair<const Key_t, const Value_t>,\n                         detail::IteratorPair<Key_t, Value_t>>;\n\n  // Copy constructor from iterator to const_iterator.\n  template <bool TIsConst = IsConst, bool TIsSingleB,\n            typename std::enable_if<TIsConst>::type* = nullptr>\n  Iterator(const Iterator<!TIsConst, TIsSingleB>& other) noexcept\n      : owner_(other.owner_),\n        seg_id_(other.seg_id_),\n        bucket_id_(other.bucket_id_),\n        slot_id_(other.slot_id_) {\n  }\n\n  // Copy constructor from iterator to bucket_iterator and vice versa.\n  template <bool TIsSingle>\n  Iterator(const Iterator<IsConst, TIsSingle>& other) noexcept\n      : owner_(other.owner_),\n        seg_id_(other.seg_id_),\n        bucket_id_(other.bucket_id_),\n        slot_id_(IsSingleBucket ? 0 : other.slot_id_) {\n    // if this - is a bucket_iterator - we reset slot_id to the first occupied space.\n    if constexpr (IsSingleBucket) {\n      Seek2Occupied();\n    }\n  }\n\n  Iterator() : owner_(nullptr), seg_id_(0), bucket_id_(0), slot_id_(0) {\n  }\n\n  Iterator(const Iterator& other) = default;\n\n  Iterator(Iterator&& other) = default;\n\n  Iterator& operator=(const Iterator& other) = default;\n  Iterator& operator=(Iterator&& other) = default;\n\n  // pre\n  Iterator& operator++() {\n    ++slot_id_;\n    Seek2Occupied();\n    return *this;\n  }\n\n  Iterator& operator+=(int delta) {\n    slot_id_ += delta;\n    Seek2Occupied();\n    return *this;\n  }\n\n  Iterator& AdvanceIfNotOccupied() {\n    if (!IsOccupied()) {\n      this->operator++();\n    }\n    return *this;\n  }\n\n  IteratorPairType operator->() const {\n    auto* seg = owner_->segment_[seg_id_];\n    return {seg->Key(bucket_id_, slot_id_), seg->Value(bucket_id_, slot_id_)};\n  }\n\n  // Make it self-contained. Does not need container::end().\n  bool is_done() const {\n    return owner_ == nullptr;\n  }\n\n  bool IsOccupied() const {\n    return (seg_id_ < owner_->segment_.size()) &&\n           ((owner_->segment_[seg_id_]->IsBusy(bucket_id_, slot_id_)));\n  }\n\n  Owner& owner() const {\n    return *owner_;\n  }\n\n  template <bool B = Policy::kUseVersion> std::enable_if_t<B, uint64_t> GetVersion() const {\n    assert(owner_ && seg_id_ < owner_->segment_.size());\n    return owner_->segment_[seg_id_]->GetVersion(bucket_id_);\n  }\n\n  template <bool B = Policy::kUseVersion> std::enable_if_t<B> SetVersion(uint64_t v) {\n    return owner_->segment_[seg_id_]->SetVersion(bucket_id_, v);\n  }\n\n  friend bool operator==(const Iterator& lhs, const Iterator& rhs) {\n    if (lhs.owner_ == nullptr && rhs.owner_ == nullptr)\n      return true;\n    return lhs.owner_ == rhs.owner_ && lhs.seg_id_ == rhs.seg_id_ &&\n           lhs.bucket_id_ == rhs.bucket_id_ && lhs.slot_id_ == rhs.slot_id_;\n  }\n\n  friend bool operator!=(const Iterator& lhs, const Iterator& rhs) {\n    return !(lhs == rhs);\n  }\n\n  // Bucket resolution cursor that is safe to use with insertions/removals.\n  // Serves as a hint really to the placement of the original item, i.e. the item\n  // could have moved.\n  detail::DashCursor bucket_cursor() const {\n    return detail::DashCursor(owner_->global_depth_, seg_id_, bucket_id_);\n  }\n\n  detail::PhysicalBid bucket_id() const {\n    return bucket_id_;\n  }\n\n  // Returns the unique address of the physical bucket as an integer.\n  // Stable for the lifetime of a serialization (mutations that could trigger\n  // segment splits are blocked while a snapshot version is registered).\n  uintptr_t bucket_address() const {\n    assert(owner_ && seg_id_ < owner_->segment_.size());\n    return reinterpret_cast<uintptr_t>(&owner_->segment_[seg_id_]->GetBucket(bucket_id_));\n  }\n\n  unsigned slot_id() const {\n    return slot_id_;\n  }\n\n  unsigned segment_id() const {\n    return seg_id_;\n  }\n\n private:\n  void Seek2Occupied();\n};  // Iterator\n\n/**\n  _____                 _                           _        _   _\n |_   _|               | |                         | |      | | (_)\n   | |  _ __ ___  _ __ | | ___ _ __ ___   ___ _ __ | |_ __ _| |_ _  ___  _ __\n   | | | '_ ` _ \\| '_ \\| |/ _ \\ '_ ` _ \\ / _ \\ '_ \\| __/ _` | __| |/ _ \\| '_ \\\n  _| |_| | | | | | |_) | |  __/ | | | | |  __/ | | | || (_| | |_| | (_) | | | |\n |_____|_| |_| |_| .__/|_|\\___|_| |_| |_|\\___|_| |_|\\__\\__,_|\\__|_|\\___/|_| |_|\n                 | |\n                 |_|\n\n**/\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <bool IsConst, bool IsSingleBucket>\nvoid DashTable<_Key, _Value, Policy>::Iterator<IsConst, IsSingleBucket>::Seek2Occupied() {\n  if (owner_ == nullptr)\n    return;\n  assert(seg_id_ < owner_->segment_.size());\n\n  if constexpr (IsSingleBucket) {\n    const auto& b = owner_->segment_[seg_id_]->GetBucket(bucket_id_);\n    uint32_t mask = b.GetBusy() >> slot_id_;\n    if (mask) {\n      int slot = __builtin_ctz(mask);\n      slot_id_ += slot;\n      return;\n    }\n  } else {\n    while (seg_id_ < owner_->segment_.size()) {\n      auto seg_it = owner_->segment_[seg_id_]->FindValidStartingFrom(bucket_id_, slot_id_);\n      if (seg_it.found()) {\n        bucket_id_ = seg_it.index;\n        slot_id_ = seg_it.slot;\n        return;\n      }\n      seg_id_ = owner_->NextSeg(seg_id_);\n      bucket_id_ = slot_id_ = 0;\n    }\n  }\n  owner_ = nullptr;\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\nDashTable<_Key, _Value, Policy>::DashTable(size_t capacity_log, const Policy& policy,\n                                           PMR_NS::memory_resource* mr)\n    : Base(capacity_log), policy_(policy), segment_(mr) {\n  segment_.resize(unique_segments_);\n\n  // I assume we have enough memory to create the initial table and do not check allocations.\n  for (uint32_t i = 0; i < segment_.size(); ++i) {\n    segment_[i] = ConstructSegment(global_depth_, i);  //   new SegmentType(global_depth_);\n  }\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\nDashTable<_Key, _Value, Policy>::~DashTable() {\n  Clear();\n  auto* resource = segment_.get_allocator().resource();\n  PMR_NS::polymorphic_allocator<SegmentType> pa(resource);\n  using alloc_traits = std::allocator_traits<decltype(pa)>;\n\n  IterateDistinct([&](SegmentType* seg) {\n    alloc_traits::destroy(pa, seg);\n    alloc_traits::deallocate(pa, seg, 1);\n    return false;\n  });\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename U, typename Cb>\nvoid DashTable<_Key, _Value, Policy>::CVCUponInsert(uint64_t ver_threshold, const U& key, Cb&& cb) {\n  uint64_t key_hash = DoHash(key);\n  uint32_t seg_id = SegmentId(key_hash);\n  assert(seg_id < segment_.size());\n  const SegmentType* target = segment_[seg_id];\n\n  uint8_t bids[2];\n  unsigned num_touched = target->CVCOnInsert(ver_threshold, key_hash, bids);\n  if (num_touched < UINT16_MAX) {\n    for (unsigned i = 0; i < num_touched; ++i) {\n      cb(bucket_iterator{this, seg_id, bids[i]});\n    }\n    return;\n  }\n\n  // Segment is full, we need to return the whole segment, because it can be split\n  // and its entries can be reshuffled into different buckets.\n  for (uint8_t i = 0; i < target->num_buckets(); ++i) {\n    if (target->GetVersion(i) < ver_threshold && !target->GetBucket(i).IsEmpty()) {\n      cb(bucket_iterator{this, seg_id, i});\n    }\n  }\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename Cb>\nvoid DashTable<_Key, _Value, Policy>::CVCUponBump(uint64_t ver_upperbound, const_iterator it,\n                                                  Cb&& cb) {\n  uint64_t key_hash = DoHash(it->first);\n  uint32_t seg_id = it.segment_id();\n  assert(seg_id < segment_.size());\n  const SegmentType* target = segment_[seg_id];\n\n  uint8_t bids[3];\n  unsigned num_touched =\n      target->CVCOnBump(ver_upperbound, it.bucket_id(), it.slot_id(), key_hash, bids);\n\n  for (unsigned i = 0; i < num_touched; ++i) {\n    cb(bucket_iterator{this, seg_id, bids[i]});\n  }\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\nvoid DashTable<_Key, _Value, Policy>::Clear() {\n  auto cb = [this](SegmentType* seg) {\n    seg->TraverseAll([this, seg](const SegmentIterator& it) {\n      policy_.DestroyKey(seg->Key(it.index, it.slot));\n      policy_.DestroyValue(seg->Value(it.index, it.slot));\n    });\n    seg->Clear();\n    return false;\n  };\n\n  IterateDistinct(cb);\n  size_ = 0;\n\n  // Consider the following case: table with 8 segments overall, 4 distinct.\n  // S1, S1, S1, S1, S2, S3, S4, S4\n  /* This corresponds to the tree:\n            R\n          /  \\\n        S1   /\\\n            /\\ S4\n           S2 S3\n     We want to collapse this tree into, say, 2 segment directory.\n     That means we need to keep S1, S2 but delete S3, S4.\n     That means, we need to move representative segments until we reached the desired size\n     and then erase all other distinct segments.\n  **********/\n  if (global_depth_ > initial_depth_) {\n    PMR_NS::polymorphic_allocator<SegmentType> pa(segment_.get_allocator());\n    using alloc_traits = std::allocator_traits<decltype(pa)>;\n\n    size_t dest = 0, src = 0;\n    size_t new_size = (1 << initial_depth_);\n    bucket_count_ = 0;\n    while (src < segment_.size()) {\n      auto* seg = segment_[src];\n      size_t next_src = NextSeg(src);  // must do before because NextSeg is dependent on seg.\n      if (dest < new_size) {\n        seg->set_local_depth(initial_depth_);\n        bucket_count_ += seg->num_buckets();\n        segment_[dest++] = seg;\n      } else {\n        alloc_traits::destroy(pa, seg);\n        alloc_traits::deallocate(pa, seg, 1);\n      }\n\n      src = next_src;\n    }\n\n    global_depth_ = initial_depth_;\n    unique_segments_ = new_size;\n    segment_.resize(new_size);\n  }\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\nbool DashTable<_Key, _Value, Policy>::ShiftRight(bucket_iterator it) {\n  auto* seg = segment_[it.seg_id_];\n\n  typename Segment_t::Hash_t hash_val = 0;\n  auto& bucket = seg->GetBucket(it.bucket_id_);\n\n  if (bucket.GetBusy() & (1 << (kSlotNum - 1))) {\n    it.slot_id_ = kSlotNum - 1;\n    hash_val = DoHash(it->first);\n    policy_.DestroyKey(it->first);\n    policy_.DestroyValue(it->second);\n  }\n\n  bool deleted = seg->ShiftRight(it.bucket_id_, hash_val);\n  size_ -= unsigned(deleted);\n\n  return deleted;\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename Cb>\nvoid DashTable<_Key, _Value, Policy>::IterateDistinct(Cb&& cb) {\n  size_t i = 0;\n  while (i < segment_.size()) {\n    auto* seg = segment_[i];\n    size_t next_id = NextSeg(i);\n    if (cb(seg))\n      break;\n    i = next_id;\n  }\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename U>\nauto DashTable<_Key, _Value, Policy>::Find(U&& key) const -> const_iterator {\n  uint64_t key_hash = DoHash(key);\n  uint32_t seg_id = SegmentId(key_hash);  // seg_id takes up global_depth_ high bits.\n\n  // Hash structure is like this: [SSUUUUBF], where S is segment id, U - unused,\n  // B - bucket id and F is a fingerprint. Segment id is needed to identify the correct segment.\n  // Once identified, the segment instance uses the lower part of hash to locate the key.\n  // It uses 8 least significant bits for a fingerprint and few more bits for bucket id.\n  if (auto seg_it = segment_[seg_id]->FindIt(key_hash, EqPred(key)); seg_it.found()) {\n    return {this, seg_id, seg_it.index, seg_it.slot};\n  }\n  return {};\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename U>\nauto DashTable<_Key, _Value, Policy>::Find(U&& key) -> iterator {\n  return FindFirst(DoHash(key), EqPred(key));\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename U>\nvoid DashTable<_Key, _Value, Policy>::Prefetch(U&& key) const {\n  uint64_t key_hash = DoHash(key);\n  uint32_t seg_id = SegmentId(key_hash);\n  segment_[seg_id]->Prefetch(key_hash);\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename Pred>\nauto DashTable<_Key, _Value, Policy>::FindFirst(uint64_t key_hash, Pred&& pred) -> iterator {\n  uint32_t seg_id = SegmentId(key_hash);\n  if (auto seg_it = segment_[seg_id]->FindIt(key_hash, pred); seg_it.found()) {\n    return {this, seg_id, seg_it.index, seg_it.slot};\n  }\n  return {};\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\nsize_t DashTable<_Key, _Value, Policy>::Erase(const Key_t& key) {\n  uint64_t key_hash = DoHash(key);\n  size_t x = SegmentId(key_hash);\n  auto* target = segment_[x];\n  auto it = target->FindIt(key_hash, EqPred(key));\n  if (!it.found())\n    return 0;\n\n  policy_.DestroyKey(target->Key(it.index, it.slot));\n  policy_.DestroyValue(target->Value(it.index, it.slot));\n  target->Delete(it, key_hash);\n  --size_;\n\n  return 1;\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\nvoid DashTable<_Key, _Value, Policy>::Erase(iterator it) {\n  auto* target = segment_[it.seg_id_];\n  uint64_t key_hash = DoHash(it->first);\n  SegmentIterator sit{it.bucket_id_, it.slot_id_};\n\n  policy_.DestroyKey(it->first);\n  policy_.DestroyValue(it->second);\n\n  target->Delete(sit, key_hash);\n  --size_;\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\nvoid DashTable<_Key, _Value, Policy>::Reserve(size_t size) {\n  if (size <= capacity())\n    return;\n\n  size_t sg_floor = (size - 1) / SegmentType::capacity();\n  if (sg_floor < segment_.size()) {\n    return;\n  }\n  assert(sg_floor > 1u);\n  unsigned new_depth = 1 + (63 ^ __builtin_clzll(sg_floor));\n\n  IncreaseDepth(new_depth);\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename U, typename V, typename EvictionPolicy>\nauto DashTable<_Key, _Value, Policy>::InsertInternal(U&& key, V&& value, EvictionPolicy& ev,\n                                                     InsertMode mode) -> std::pair<iterator, bool> {\n  uint64_t key_hash = DoHash(key);\n  uint32_t target_seg_id = SegmentId(key_hash);\n\n  while (true) {\n    // Keep last global_depth_ msb bits of the hash.\n    assert(target_seg_id < segment_.size());\n    SegmentType* target = segment_[target_seg_id];\n\n    // Load heap allocated segment data - to avoid TLB miss when accessing the bucket.\n    __builtin_prefetch(target, 0, 1);\n\n    typename SegmentType::Iterator it;\n    bool res = true;\n    unsigned num_buckets = target->num_buckets();\n\n    auto move_cb = [&](uint32_t segment_id, detail::PhysicalBid from, detail::PhysicalBid to) {\n      // OnMove is used to notify policy about the move of items across buckets.\n      ev.OnMove(Cursor{global_depth_, segment_id, from}, Cursor{global_depth_, segment_id, to});\n    };\n\n    if (mode == InsertMode::kForceInsert) {\n      it =\n          target->InsertUniq(std::forward<U>(key), std::forward<V>(value), key_hash, true, move_cb);\n      res = it.found();\n    } else {\n      std::tie(it, res) = target->Insert(std::forward<U>(key), std::forward<V>(value), key_hash,\n                                         EqPred(key), move_cb);\n    }\n\n    if (res) {  // success\n      // in case segment bucket count changed, we need to update total bucket count.\n      bucket_count_ += (target->num_buckets() - num_buckets);\n      ++size_;\n      return std::make_pair(iterator{this, target_seg_id, it.index, it.slot}, true);\n    }\n\n    /*duplicate insert, insertion failure*/\n    if (it.found()) {\n      return std::make_pair(iterator{this, target_seg_id, it.index, it.slot}, false);\n    }\n\n    bool consider_throw = true;\n\n    // At this point we must split the segment.\n    // try garbage collect or evict.\n    if constexpr (EvictionPolicy::can_evict || EvictionPolicy::can_gc) {\n      // Try gc.\n      uint8_t bid[HotBuckets::kRegularBuckets];\n      SegmentType::FillProbeArray(key_hash, bid);\n      HotBuckets hotspot;\n      hotspot.key_hash = key_hash;\n\n      for (unsigned j = 0; j < HotBuckets::kRegularBuckets; ++j) {\n        hotspot.probes.by_type.regular_buckets[j] = bucket_iterator{this, target_seg_id, bid[j]};\n      }\n\n      for (unsigned i = 0; i < SegmentType::kStashBucketNum; ++i) {\n        hotspot.probes.by_type.stash_buckets[i] =\n            bucket_iterator{this, target_seg_id, uint8_t(Policy::kBucketNum + i), 0};\n      }\n      hotspot.num_buckets = HotBuckets::kNumBuckets;\n\n      // The difference between gc and eviction is that gc can be applied even if\n      // the table can grow since we throw away logically deleted items.\n      // For eviction to be applied we should reach the growth limit.\n      if constexpr (EvictionPolicy::can_gc) {\n        unsigned res = ev.GarbageCollect(hotspot, this);\n        garbage_collected_ += res;\n        if (res) {\n          // We succeeded to gc. Lets continue with the momentum.\n          // In terms of API abuse it's an awful hack, just to see if it works.\n          /*unsigned start = (bid[HotBuckets::kNumBuckets - 1] + 1) % kLogicalBucketNum;\n          for (unsigned i = 0; i < HotBuckets::kNumBuckets; ++i) {\n            uint8_t id = (start + i) % kLogicalBucketNum;\n            buckets.probes.arr[i] = bucket_iterator{this, target_seg_id, id};\n          }\n          garbage_collected_ += ev.GarbageCollect(buckets, this);\n          */\n          continue;\n        }\n      }\n\n      auto hash_fn = [this](const auto& k) { return policy_.HashFn(k); };\n      unsigned moved = target->UnloadStash(hash_fn, move_cb);\n      if (moved > 0) {\n        stash_unloaded_ += moved;\n        continue;\n      }\n\n      // We evict only if our policy says we can not grow\n      if constexpr (EvictionPolicy::can_evict) {\n        bool can_grow = ev.CanGrow(*this);\n        if (can_grow) {\n          consider_throw = false;\n        } else {\n          unsigned res = ev.Evict(hotspot, this);\n          if (res)\n            continue;\n        }\n      }\n    }\n\n    if (consider_throw && !ev.CanGrow(*this)) {\n      throw std::bad_alloc{};\n    }\n\n    // Split the segment.\n    if (target->local_depth() == global_depth_) {\n      IncreaseDepth(global_depth_ + 1);\n\n      target_seg_id = SegmentId(key_hash);\n      assert(target_seg_id < segment_.size() && segment_[target_seg_id] == target);\n    }\n\n    ev.RecordSplit(target);\n    Split(target_seg_id, ev);\n  }\n\n  return std::make_pair(iterator{}, false);\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\nvoid DashTable<_Key, _Value, Policy>::IncreaseDepth(unsigned new_depth) {\n  assert(!segment_.empty());\n  assert(new_depth > global_depth_);\n  size_t prev_sz = segment_.size();\n  size_t repl_cnt = 1ul << (new_depth - global_depth_);\n  segment_.resize(1ul << new_depth);\n\n  for (int i = prev_sz - 1; i >= 0; --i) {\n    size_t offs = i * repl_cnt;\n    std::fill(segment_.begin() + offs, segment_.begin() + offs + repl_cnt, segment_[i]);\n    segment_[i]->set_segment_id(offs);  // update segment id.\n  }\n  global_depth_ = new_depth;\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename EvictionPolicy>\nvoid DashTable<_Key, _Value, Policy>::Split(uint32_t seg_id, EvictionPolicy& ev) {\n  SegmentType* source = segment_[seg_id];\n\n  uint32_t chunk_size = 1u << (global_depth_ - source->local_depth());\n  uint32_t start_idx = seg_id & (~(chunk_size - 1));\n  assert(segment_[start_idx] == source && segment_[start_idx + chunk_size - 1] == source);\n  uint32_t target_id = start_idx + chunk_size / 2;\n  SegmentType* target = ConstructSegment(source->local_depth() + 1, target_id);\n\n  auto hash_fn = [this](const auto& k) { return policy_.HashFn(k); };\n\n  // remove current segment bucket count.\n  bucket_count_ -= (source->num_buckets() + target->num_buckets());\n\n  source->Split(\n      std::move(hash_fn), target,\n      [&](uint32_t segment_from, detail::PhysicalBid from, uint32_t segment_to,\n          detail::PhysicalBid to) {\n        // OnMove is used to notify eviction policy about the moves across\n        // buckets/segments during the split.\n        ev.OnMove(Cursor{global_depth_, segment_from, from}, Cursor{global_depth_, segment_to, to});\n      });\n\n  // add back the updated bucket count.\n  bucket_count_ += (target->num_buckets() + source->num_buckets());\n  ++unique_segments_;\n\n  for (size_t i = target_id; i < start_idx + chunk_size; ++i) {\n    segment_[i] = target;\n  }\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename Cb>\nauto DashTable<_Key, _Value, Policy>::TraverseBySegmentOrder(Cursor curs, Cb&& cb) -> Cursor {\n  uint32_t sid = curs.segment_id(global_depth_);\n  assert(sid < segment_.size());\n  SegmentType* s = segment_[sid];\n  assert(s);\n  uint8_t bid = curs.bucket_id();\n\n  auto dt_cb = [&](const SegmentIterator& it) { cb(iterator{this, sid, it.index, it.slot}); };\n  s->TraverseBucket(bid, std::move(dt_cb));\n\n  ++bid;\n  if (SegmentType::OutOfRange(bid)) {\n    sid = NextSeg(sid);\n    if (sid >= segment_.size()) {\n      return Cursor::end();\n    }\n    bid = 0;\n  }\n\n  return Cursor{global_depth_, sid, bid};\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\nauto DashTable<_Key, _Value, Policy>::GetRandomCursor(absl::BitGen* bitgen) -> Cursor {\n  uint32_t sid = absl::Uniform<uint32_t>(*bitgen, 0, segment_.size());\n  uint8_t bid = absl::Uniform<uint8_t>(*bitgen, 0, Policy::kBucketNum);\n\n  return Cursor{global_depth_, sid, bid};\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename Cb>\nauto DashTable<_Key, _Value, Policy>::Traverse(Cursor curs, Cb&& cb) -> Cursor {\n  uint32_t sid = curs.segment_id(global_depth_);\n  uint8_t bid = curs.bucket_id();\n\n  // Test validity of the cursor.\n  if (bid >= Policy::kBucketNum || sid >= segment_.size())\n    return Cursor::end();\n\n  auto hash_fun = [this](const auto& k) { return policy_.HashFn(k); };\n\n  bool fetched = false;\n\n  // We fix bid and go over all segments. Once we reach the end we increase bid and repeat.\n  do {\n    SegmentType* s = segment_[sid];\n    assert(s);\n\n    auto dt_cb = [&](const SegmentIterator& it) { cb(iterator{this, sid, it.index, it.slot}); };\n\n    fetched = s->TraverseLogicalBucket(bid, hash_fun, std::move(dt_cb));\n    sid = NextSeg(sid);\n    if (sid >= segment_.size()) {\n      sid = 0;\n      ++bid;\n\n      if (bid >= Policy::kBucketNum)\n        return Cursor::end();\n    }\n  } while (!fetched);\n\n  return Cursor{global_depth_, sid, bid};\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\nauto DashTable<_Key, _Value, Policy>::AdvanceCursorBucketOrder(Cursor cursor) -> Cursor {\n  // We fix bid and go over all segments. Once we reach the end we increase bid and repeat.\n  uint32_t sid = cursor.segment_id(global_depth_);\n  uint8_t bid = cursor.bucket_id();\n  sid = NextSeg(sid);\n  if (sid >= segment_.size()) {\n    sid = 0;\n    ++bid;\n\n    if (SegmentType::OutOfRange(bid))\n      return Cursor::end();\n  }\n  return Cursor{global_depth_, sid, bid};\n}\n\ntemplate <typename _Key, typename _Value, typename Policy>\ntemplate <typename Cb>\nauto DashTable<_Key, _Value, Policy>::TraverseBuckets(Cursor cursor, Cb&& cb) -> Cursor {\n  if (SegmentType::OutOfRange(cursor.bucket_id()))  // sanity.\n    return Cursor::end();\n\n  constexpr uint32_t kMaxIterations = 8;\n  bool invoked = false;\n\n  for (uint32_t i = 0; i < kMaxIterations; ++i) {\n    uint32_t sid = cursor.segment_id(global_depth_);\n    uint8_t bid = cursor.bucket_id();\n    SegmentType* s = segment_[sid];\n    assert(s);\n    if (bid < s->num_buckets()) {\n      const auto& bucket = s->GetBucket(bid);\n      if (bucket.GetBusy()) {  // Invoke callback only if bucket has elements.\n        cb(BucketIt(sid, bid));\n        invoked = true;\n      }\n    }\n    cursor = AdvanceCursorBucketOrder(cursor);\n    if (invoked || !cursor)  // Break end of traversal or callback invoked.\n      return cursor;\n  }\n  return cursor;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/dash_bench.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/base/internal/cycleclock.h>\n#include <absl/container/flat_hash_map.h>\n#include <mimalloc.h>\n\n#include \"base/hash.h\"\n#include \"base/histogram.h\"\n#include \"base/init.h\"\n#include \"core/dash.h\"\n\nextern \"C\" {\n#include \"redis/dict.h\"\n#include \"redis/sds.h\"\n#include \"redis/zmalloc.h\"\n}\n\nusing namespace std;\n\nABSL_FLAG(uint32_t, n, 100000, \"num items\");\nABSL_FLAG(string, type, \"dash\", \"\");\nABSL_FLAG(bool, sds, false, \"If true, uses sds as primary key\");\n\nnamespace dfly {\n\nstatic uint64_t dictSdsHash(const void* key) {\n  return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));\n}\n\nstatic int dictSdsKeyCompare(dict*, const void* key1, const void* key2) {\n  int l1, l2;\n\n  l1 = sdslen((sds)key1);\n  l2 = sdslen((sds)key2);\n  if (l1 != l2)\n    return 0;\n  return memcmp(key1, key2, l1) == 0;\n}\n\nstatic dictType SdsDict = {\n    dictSdsHash,       /* hash function */\n    NULL,              /* key dup */\n    NULL,              /* val dup */\n    dictSdsKeyCompare, /* key compare */\n    NULL,\n    // dictSdsDestructor, /* key destructor */\n    NULL, /* val destructor */\n    NULL,\n};\n\nstruct UInt64Policy {\n  enum { kSlotNum = 12, kBucketNum = 64, kStashBucketNum = 2 };\n  static constexpr bool kUseVersion = false;\n\n  static uint64_t HashFn(uint64_t v) {\n    return XXH3_64bits(&v, sizeof(v));\n  }\n\n  template <typename U> static void DestroyValue(const U&) {\n  }\n  template <typename U> static void DestroyKey(const U&) {\n  }\n\n  template <typename U, typename V> static bool Equal(U&& u, V&& v) {\n    return u == v;\n  }\n};\n\nstruct SdsDashPolicy {\n  enum { kSlotNum = 14, kBucketNum = 56, kStashBucketNum = 4 };\n  static constexpr bool kUseVersion = false;\n\n  static uint64_t HashFn(sds u) {\n    return XXH3_64bits(reinterpret_cast<const uint8_t*>(u), sdslen(u));\n  }\n\n  static uint64_t HashFn(std::string_view u) {\n    return XXH3_64bits(u.data(), u.size());\n  }\n\n  static void DestroyKey(sds s) {\n    sdsfree(s);\n  }\n\n  static void DestroyValue(uint64_t) {\n  }\n\n  static bool Equal(sds u1, sds u2) {\n    return dictSdsKeyCompare(nullptr, u1, u2) == 0;\n  }\n\n  static bool Equal(sds u1, std::string_view u2) {\n    return u2 == std::string_view{u1, sdslen(u1)};\n  }\n};\n\nusing Dash64 = DashTable<uint64_t, uint64_t, UInt64Policy>;\nusing DashSds = DashTable<sds, uint64_t, SdsDashPolicy>;\n\nusing absl::GetFlag;\n\ninline void Sample(int64_t start, int64_t end, base::Histogram* hist) {\n  hist->Add((end - start) / 100);\n}\n\nDash64 udt;\nDashSds sds_dt;\nbase::Histogram hist;\n\n#define USE_TIME 1\n\nint64_t GetNow() {\n#if USE_TIME\n  return absl::GetCurrentTimeNanos();\n#else\n  return absl::base_internal::CycleClock::Now();\n#endif\n}\n\n#if defined(__i386__) || defined(__amd64__)\n#define LFENCE __asm__ __volatile__(\"lfence\")\n#else\n#define LFENCE __asm__ __volatile__(\"ISB\")\n#endif\n\nabsl::flat_hash_map<uint64_t, uint64_t> mymap;\n\nvoid BenchFlat(uint64_t num) {\n  for (uint64_t i = 0; i < num; ++i) {\n    time_t start = GetNow();\n    mymap.emplace(i, 0);\n    LFENCE;\n\n    time_t end = GetNow();\n    Sample(start, end, &hist);\n  }\n}\n\nvoid BenchDash(uint64_t num) {\n  for (uint64_t i = 0; i < num; ++i) {\n    time_t start = GetNow();\n    udt.Insert(i, 0);\n    LFENCE;\n\n    time_t end = GetNow();\n    Sample(start, end, &hist);\n  }\n}\n\ninline sds Prefix() {\n  return sdsnew(\"xxxxxxxxxxxxxxxxxxxxxxx\");\n}\n\nvoid BenchDashSds(uint64_t num) {\n  sds key = sdscatsds(Prefix(), sdsfromlonglong(0));\n  for (uint64_t i = 0; i < num; ++i) {\n    time_t start = GetNow();\n    sds_dt.Insert(key, 0);\n    time_t end = GetNow();\n    Sample(start, end, &hist);\n\n    key = sdscatsds(Prefix(), sdsfromlonglong(i + 1));\n  }\n}\n\nstatic uint64_t callbackHash(const void* key) {\n  return XXH64(&key, sizeof(key), 0);\n}\n\nstatic dictType IntDict = {callbackHash, NULL, NULL, NULL, NULL, NULL, NULL};\n\ndict* redis_dict = nullptr;\n\nvoid BenchDict(uint64_t num) {\n  redis_dict = dictCreate(&IntDict);\n\n  for (uint64_t i = 0; i < num; ++i) {\n    time_t start = GetNow();\n    dictAdd(redis_dict, (void*)i, nullptr);\n    LFENCE;\n    time_t end = GetNow();\n    Sample(start, end, &hist);\n  }\n}\n\nvoid BenchDictSds() {\n  uint64_t num = GetFlag(FLAGS_n);\n\n  sds key = sdscat(Prefix(), sdsfromlonglong(0));\n  redis_dict = dictCreate(&SdsDict);\n\n  for (uint64_t i = 0; i < num; ++i) {\n    time_t start = GetNow();\n    dictAdd(redis_dict, key, nullptr);\n    time_t end = GetNow();\n    Sample(start, end, &hist);\n\n    key = sdscatsds(Prefix(), sdsfromlonglong(i + 1));\n  }\n}\n\n}  // namespace dfly\n\nusing namespace dfly;\n\nint main(int argc, char* argv[]) {\n  MainInitGuard guard(&argc, &argv);\n\n  init_zmalloc_threadlocal(mi_heap_get_backing());\n\n  string table_type = GetFlag(FLAGS_type);\n\n  bool is_sds = GetFlag(FLAGS_sds);\n  uint64_t start = absl::GetCurrentTimeNanos();\n  uint64_t num = GetFlag(FLAGS_n);\n\n  if (table_type == \"dash\") {\n    if (is_sds) {\n      BenchDashSds(num);\n    } else {\n      BenchDash(num);\n    }\n  } else if (table_type == \"dict\") {\n    if (is_sds) {\n      BenchDictSds();\n    } else {\n      BenchDict(num);\n    }\n  } else if (table_type == \"flat\") {\n    BenchFlat(num);\n  } else {\n    LOG(FATAL) << \"Unknown type \" << table_type;\n  }\n\n  CONSOLE_INFO << \"latencies histogram (jiffies, 100ns):\\n\" << hist.ToString();\n  uint64_t delta = (absl::GetCurrentTimeNanos() - start) / 1000000;\n  CONSOLE_INFO << \"Took \" << delta << \" ms\";\n\n  return 0;\n}\n"
  },
  {
    "path": "src/core/dash_internal.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/base/internal/endian.h>\n\n#include <array>\n#include <cassert>\n#include <cstdint>\n#include <cstdio>\n#include <cstring>\n#include <type_traits>\n\n#include \"base/pmr/memory_resource.h\"\n#include \"core/sse_port.h\"\n\nnamespace dfly {\nnamespace detail {\n\ntemplate <unsigned NUM_SLOTS> class SlotBitmap {\n  static_assert(NUM_SLOTS > 0 && NUM_SLOTS <= 28);\n  static constexpr bool SINGLE = NUM_SLOTS <= 14;\n  static constexpr unsigned kLen = SINGLE ? 1 : 2;\n  static constexpr unsigned kAllocMask = (1u << NUM_SLOTS) - 1;\n  static constexpr unsigned kBitmapLenMask = (1 << 4) - 1;\n\n public:\n  // probe - true means the entry is probing, i.e. not owning.\n  // probe=true GetProbe returns index of probing entries, i.e. hosted but not owned by this bucket.\n  // probe=false - mask of owning entries\n  uint32_t GetProbe(bool probe) const {\n    if constexpr (SINGLE)\n      return ((val_[0].d >> 4) & kAllocMask) ^ ((!probe) * kAllocMask);\n    else\n      return (val_[1].d & kAllocMask) ^ ((!probe) * kAllocMask);\n  }\n\n  // GetBusy returns the busy mask.\n  uint32_t GetBusy() const {\n    return SINGLE ? val_[0].d >> 18 : val_[0].d;\n  }\n\n  bool IsFull() const {\n    return Size() == NUM_SLOTS;\n  }\n\n  unsigned Size() const {\n    return SINGLE ? (val_[0].d & kBitmapLenMask) : __builtin_popcount(val_[0].d);\n  }\n\n  // Precondition: Must have empty slot\n  // returns result in [0, NUM_SLOTS) range.\n  int FindEmptySlot() const {\n    uint32_t mask = ~(GetBusy());\n\n    // returns the index for first set bit (FindLSBSetNonZero). mask must be non-zero.\n    int slot = __builtin_ctz(mask);\n    assert(slot < int(NUM_SLOTS));\n    return slot;\n  }\n\n  // mask is NUM_SLOTS bits saying which slots needs to be freed (1 - should clear).\n  void ClearSlots(uint32_t mask);\n\n  void Clear() {\n    if (SINGLE) {\n      val_[0].d = 0;\n    } else {\n      val_[0].d = val_[1].d = 0;\n    }\n  }\n\n  void ClearSlot(unsigned index);\n  void SetSlot(unsigned index, bool probe);\n\n  // cell 0 corresponds to first lsb bit in the busy mask, hence we need to shift left\n  // the bitmap in order to shift right the cell-array.\n  // Returns true if discarded the last slot (i.e. it was busy).\n  bool ShiftLeft();\n\n  void Swap(unsigned slot_a, unsigned slot_b);\n\n private:\n  // SINGLE:\n  //   val_[0] is [14 bit- busy][14bit-probing, whether the key does not belong to this\n  //   bucket][4bit-count]\n  // kLen == 2:\n  //  val_[0] is 28 bit busy\n  //  val_[1] is 28 bit probing\n  //  count is implemented via popcount of val_[0].\n  struct Unaligned {\n    // Apparently with wrapping struct we can persuade compiler to declare an unaligned int.\n    // https://stackoverflow.com/questions/19915303/packed-qualifier-ignored\n    uint32_t d __attribute__((packed, aligned(1)));\n\n    Unaligned() : d(0) {\n    }\n  };\n\n  Unaligned val_[kLen];\n};  // SlotBitmap\n\ntemplate <unsigned NUM_SLOTS> class BucketBase {\n  // We can not allow more than 4 stash fps because we hold stash positions in single byte\n  // stash_pos_ variable that uses 2 bits per stash bucket to point which bucket holds that fp.\n  // Hence we can point at most from 4 fps to 4 stash buckets.\n  // If any of those limits need to be raised we should increase stash_pos_ similarly to how we did\n  // with SlotBitmap.\n  static constexpr unsigned kStashFpLen = 4;\n  static constexpr unsigned kStashPresentBit = 1 << 4;\n\n  using FpArray = std::array<uint8_t, NUM_SLOTS>;\n  using StashFpArray = std::array<uint8_t, kStashFpLen>;\n\n public:\n  using SlotId = uint8_t;\n  static constexpr SlotId kNanSlot = 255;\n\n  bool IsFull() const {\n    return Size() == NUM_SLOTS;\n  }\n\n  bool IsEmpty() const {\n    return GetBusy() == 0;\n  }\n\n  unsigned Size() const {\n    return slotb_.Size();\n  }\n\n  void Delete(SlotId sid) {\n    slotb_.ClearSlot(sid);\n  }\n\n  unsigned Find(uint8_t fp_hash, bool probe) const {\n    unsigned mask = CompareFP(fp_hash) & GetBusy();\n    return mask & GetProbe(probe);\n  }\n\n  uint8_t Fp(unsigned i) const {\n    assert(i < finger_arr_.size());\n    return finger_arr_[i];\n  }\n\n  void SetStashPtr(unsigned stash_pos, uint8_t meta_hash, BucketBase* next);\n\n  // returns 0 if stash was cleared from this bucket, 1 if it was cleared from next bucket.\n  unsigned UnsetStashPtr(uint8_t fp_hash, unsigned stash_pos, BucketBase* next);\n\n  // probe - true means the entry is probing, i.e. not owning.\n  // probe=true GetProbe returns index of probing entries, i.e. hosted but not owned by this bucket.\n  // probe=false - mask of owning entries\n  uint32_t GetProbe(bool probe) const {\n    return slotb_.GetProbe(probe);\n  }\n\n  // GetBusy returns the busy mask.\n  uint32_t GetBusy() const {\n    return slotb_.GetBusy();\n  }\n\n  bool IsBusy(unsigned slot) const {\n    return (GetBusy() & (1u << slot)) != 0;\n  }\n\n  // mask is saying which slots needs to be freed (1 - should clear).\n  void ClearSlots(uint32_t mask) {\n    slotb_.ClearSlots(mask);\n  }\n\n  void Clear() {\n    slotb_.Clear();\n  }\n\n  void ClearStashPtrs() {\n    stash_busy_ = 0;\n    stash_pos_ = 0;\n    stash_probe_mask_ = 0;\n    overflow_count_ = 0;\n  }\n\n  bool HasStash() const {\n    return stash_busy_ & kStashPresentBit;\n  }\n\n  void SetHash(unsigned slot_id, uint8_t meta_hash, bool probe);\n\n  bool HasStashOverflow() const {\n    return overflow_count_ > 0;\n  }\n\n  // func accepts an fp_index in range [0, kStashFpLen) and\n  // stash position [0, STASH_BUCKET_NUM) that with fingerprint=fp. func must return\n  // a slot id if it found whatever it searched for when iterating or kNanSlot to continue.\n  // IterateStash returns: first - stash position [0, STASH_BUCKET_NUM), second - slot id\n  // pointing to that stash.\n  template <typename F>\n  std::pair<unsigned, SlotId> IterateStash(uint8_t fp, bool is_probe, F&& func) const;\n\n  void Swap(unsigned slot_a, unsigned slot_b) {\n    slotb_.Swap(slot_a, slot_b);\n    std::swap(finger_arr_[slot_a], finger_arr_[slot_b]);\n  }\n\n protected:\n  uint32_t CompareFP(uint8_t fp) const;\n  bool ShiftRight();\n\n  // Returns true if stash_pos was stored, false overwise\n  bool SetStash(uint8_t fp, unsigned stash_pos, bool probe);\n  bool ClearStash(uint8_t fp, unsigned stash_pos, bool probe);\n\n  SlotBitmap<NUM_SLOTS> slotb_;  // allocation bitmap + pointer bitmap + counter\n\n  /*only use the first 14 bytes, can be accelerated by\n    SSE instruction,0-13 for finger, 14-17 for overflowed*/\n  FpArray finger_arr_;\n  StashFpArray stash_arr_;\n\n  uint8_t stash_busy_ = 0;  // kStashFpLen+1 bits are used\n  uint8_t stash_pos_ = 0;   // 4x2 bits for pointing to stash bucket.\n\n  // stash_probe_mask_ indicates whether the overflow fingerprint is for the neighbour (1)\n  // or for this bucket (0). kStashFpLen bits are used.\n  uint8_t stash_probe_mask_ = 0;\n\n  // number of overflowed items stored in stash buckets that do not have fp hashes.\n  uint8_t overflow_count_ = 0;\n};  // BucketBase\n\nstatic_assert(sizeof(BucketBase<12>) == 24);\nstatic_assert(alignof(BucketBase<14>) == 1);\nstatic_assert(alignof(BucketBase<12>) == 1);\n\n// Optional version support as part of DashTable.\n// This works like this: each slot has 2 bytes for version and a bucket has another 6.\n// therefore all slots in the bucket shared the same 6 high bytes of 8-byte version.\n// In order to achieve this we store high6(max{version(entry)}) for every entry.\n// Hence our version control may have false positives, i.e. signal that an entry has changed\n// when in practice its neighbour incremented the high6 part of its bucket.\ntemplate <unsigned NUM_SLOTS> class VersionedBB : public BucketBase<NUM_SLOTS> {\n  using Base = BucketBase<NUM_SLOTS>;\n\n public:\n  // one common version per bucket.\n  void SetVersion(uint64_t version);\n\n  uint64_t GetVersion() const {\n    uint64_t c = absl::little_endian::Load64(version_);\n    // c |= low_[slot_id];\n    return c;\n  }\n\n  void UpdateVersion(uint64_t version) {\n    uint64_t c = std::max(GetVersion(), version);\n    absl::little_endian::Store64(version_, c);\n  }\n\n  void Clear() {\n    Base::Clear();\n    // low_.fill(0);\n    memset(version_, 0, sizeof(version_));\n  }\n\n  bool ShiftRight() {\n    bool res = Base::ShiftRight();\n    return res;\n  }\n\n  void Swap(unsigned slot_a, unsigned slot_b) {\n    Base::Swap(slot_a, slot_b);\n  }\n\n private:\n  uint8_t version_[8] = {0};\n};\n\nstatic_assert(alignof(VersionedBB<14>) == 1);\nstatic_assert(sizeof(VersionedBB<12>) == 12 * 2 + 8);\nstatic_assert(sizeof(VersionedBB<14>) <= 14 * 2 + 8);\n\n// Segment - static-hashtable of size kSlotNum*(kBucketNum + kStashBucketNum).\nstruct DefaultSegmentPolicy {\n  static constexpr unsigned kSlotNum = 12;\n  static constexpr unsigned kBucketNum = 64;\n  static constexpr bool kUseVersion = true;\n};\n\nusing PhysicalBid = uint8_t;\nusing LogicalBid = uint8_t;\n\ntemplate <typename KeyType, typename ValueType, typename Policy = DefaultSegmentPolicy>\nclass Segment {\n public:\n  static constexpr unsigned kSlotNum = Policy::kSlotNum;\n  static constexpr unsigned kBucketNum = Policy::kBucketNum;\n  static constexpr unsigned kStashBucketNum = 4;\n  static constexpr bool kUseVersion = Policy::kUseVersion;\n\n private:\n  static_assert(kBucketNum + kStashBucketNum < 255);\n  static constexpr unsigned kFingerBits = 8;\n\n  using BucketType = std::conditional_t<kUseVersion, VersionedBB<kSlotNum>, BucketBase<kSlotNum>>;\n\n  struct Bucket : public BucketType {\n    using BucketType::kNanSlot;\n    using typename BucketType::SlotId;\n\n    KeyType key[kSlotNum];\n    ValueType value[kSlotNum];\n\n    template <typename U, typename V>\n    void Insert(uint8_t slot, U&& u, V&& v, uint8_t meta_hash, bool probe) {\n      assert(slot < kSlotNum);\n\n      key[slot] = std::forward<U>(u);\n      value[slot] = std::forward<V>(v);\n\n      this->SetHash(slot, meta_hash, probe);\n    }\n\n    // Returns slot id if insertion is successful, -1 if no free slots are found.\n    template <typename U, typename V>\n    int TryInsertToBucket(U&& key, V&& value, uint8_t meta_hash, bool probe) {\n      if (this->IsFull()) {\n        return -1;  // no free space in the bucket.\n      }\n\n      int slot = this->slotb_.FindEmptySlot();\n      assert(slot >= 0);\n      Insert(slot, std::forward<U>(key), std::forward<V>(value), meta_hash, probe);\n      return slot;\n    }\n\n    template <typename Pred> SlotId FindByFp(uint8_t fp_hash, bool probe, Pred&& pred) const;\n\n    bool ShiftRight();\n\n    void Swap(unsigned slot_a, unsigned slot_b) {\n      BucketType::Swap(slot_a, slot_b);\n      std::swap(key[slot_a], key[slot_b]);\n      std::swap(value[slot_a], value[slot_b]);\n    }\n\n    template <typename This, typename Cb> void ForEachSlotImpl(This obj, Cb&& cb) const {\n      uint32_t mask = this->GetBusy();\n      uint32_t probe_mask = this->GetProbe(true);\n\n      for (unsigned j = 0; j < kSlotNum; ++j) {\n        if (mask & 1) {\n          cb(obj, j, probe_mask & 1);\n        }\n        mask >>= 1;\n        probe_mask >>= 1;\n      }\n    }\n\n    // calls for each busy slot: cb(iterator, probe)\n    template <typename Cb> void ForEachSlot(Cb&& cb) const {\n      ForEachSlotImpl(this, std::forward<Cb&&>(cb));\n    }\n\n    // calls for each busy slot: cb(iterator, probe)\n    template <typename Cb> void ForEachSlot(Cb&& cb) {\n      ForEachSlotImpl(this, std::forward<Cb&&>(cb));\n    }\n  };  // class Bucket\n\n  static constexpr PhysicalBid kNanBid = 0xFF;\n  using SlotId = typename BucketType::SlotId;\n\n public:\n  struct Iterator {\n    PhysicalBid index;  // bucket index\n    uint8_t slot;\n\n    Iterator() : index(kNanBid), slot(BucketType::kNanSlot) {\n    }\n\n    Iterator(PhysicalBid bi, uint8_t sid) : index(bi), slot(sid) {\n    }\n\n    bool found() const {\n      return index != kNanBid;\n    }\n  };\n\n  struct Stats {\n    size_t neighbour_probes = 0;\n    size_t stash_probes = 0;\n    size_t stash_overflow_probes = 0;\n  };\n\n  static constexpr size_t kFpMask = (1 << kFingerBits) - 1;\n\n  using Value_t = ValueType;\n  using Key_t = KeyType;\n  using Hash_t = uint64_t;\n\n  explicit Segment(size_t depth, uint32_t id, PMR_NS::memory_resource* mr)\n      : local_depth_(depth), segment_id_(id), mr_(mr) {\n  }\n\n  ~Segment() {\n    Clear();\n  }\n\n  Segment(const Segment&) = delete;\n  Segment& operator=(const Segment&) = delete;\n\n  // Returns (iterator, true) if insert succeeds,\n  // (iterator, false) for duplicate and (invalid-iterator, false) if it's full\n  template <typename K, typename V, typename Pred, typename OnMoveCb>\n  std::pair<Iterator, bool> Insert(K&& key, V&& value, Hash_t key_hash, Pred&& pred,\n                                   OnMoveCb&& on_move_cb);\n\n  template <typename HashFn, typename OnMoveCb>\n  void Split(HashFn&& hfunc, Segment* dest, OnMoveCb&& on_move_cb);\n\n  void Delete(const Iterator& it, Hash_t key_hash);\n\n  void Clear();  // clears the segment.\n\n  size_t SlowSize() const;\n\n  static constexpr size_t capacity() {\n    return kMaxSize;\n  }\n\n  static constexpr bool OutOfRange(PhysicalBid bid) {\n    return bid >= kBucketNum + kStashBucketNum;\n  }\n\n  size_t local_depth() const {\n    return local_depth_;\n  }\n\n  void set_local_depth(uint32_t depth) {\n    local_depth_ = depth;\n  }\n\n  template <bool UV = kUseVersion>\n  std::enable_if_t<UV, uint64_t> GetVersion(PhysicalBid bid) const {\n    return GetBucket(bid).GetVersion();\n  }\n\n  template <bool UV = kUseVersion> std::enable_if_t<UV> SetVersion(PhysicalBid bid, uint64_t v) {\n    return GetBucket(bid).SetVersion(v);\n  }\n\n  // Traverses over Segment's bucket bid and calls cb(const Iterator& it) 0 or more times\n  // for each slot in the bucket. returns false if bucket is empty.\n  // Please note that `it` will not necessary point to bid due to probing and stash buckets\n  // containing items that should have been resided in bid.\n  template <typename Cb, typename HashFn>\n  bool TraverseLogicalBucket(LogicalBid bid, HashFn&& hfun, Cb&& cb) const;\n\n  // Cb  accepts (const Iterator&).\n  template <typename Cb> void TraverseAll(Cb&& cb) const;\n\n  // Traverses over Segment's bucket bid and calls cb(Iterator& it)\n  // for each slot in the bucket. The iteration goes over a physical bucket.\n  template <typename Cb> void TraverseBucket(PhysicalBid bid, Cb&& cb);\n\n  // Used in test.\n  unsigned NumProbingBuckets() const {\n    unsigned res = 0;\n    for (PhysicalBid i = 0; i < kBucketNum; ++i) {\n      res += (bucket_[i].GetProbe(true) != 0);\n    }\n    return res;\n  };\n\n  const Bucket& GetBucket(PhysicalBid i) const {\n    return bucket_[i];\n  }\n\n  Bucket& GetBucket(PhysicalBid i) {\n    return bucket_[i];\n  }\n\n  bool IsBusy(PhysicalBid bid, unsigned slot) const {\n    return GetBucket(bid).GetBusy() & (1U << slot);\n  }\n\n  Key_t& Key(PhysicalBid bid, unsigned slot) {\n    assert(IsBusy(bid, slot));\n    return GetBucket(bid).key[slot];\n  }\n\n  const Key_t& Key(PhysicalBid bid, unsigned slot) const {\n    assert(IsBusy(bid, slot));\n    return GetBucket(bid).key[slot];\n  }\n\n  Value_t& Value(PhysicalBid bid, unsigned slot) {\n    assert(IsBusy(bid, slot));\n    return GetBucket(bid).value[slot];\n  }\n\n  const Value_t& Value(PhysicalBid bid, unsigned slot) const {\n    assert(IsBusy(bid, slot));\n    return GetBucket(bid).value[slot];\n  }\n\n  // fill bucket ids that may be used probing for this key_hash.\n  // The order is: exact, neighbour buckets.\n  static void FillProbeArray(Hash_t key_hash, uint8_t dest[4]) {\n    dest[1] = HomeIndex(key_hash);\n    dest[0] = PrevBid(dest[1]);\n    dest[2] = NextBid(dest[1]);\n    dest[3] = NextBid(dest[2]);\n  }\n\n  // Find item with given key hash and truthy predicate\n  template <typename Pred> Iterator FindIt(Hash_t key_hash, Pred&& pred) const;\n  void Prefetch(Hash_t key_hash) const;\n\n  // Returns valid iterator if succeeded or invalid if not (it's full).\n  // Requires: key should be not present in the segment.\n  // if spread is true, tries to spread the load between neighbour and home buckets,\n  // otherwise chooses home bucket first.\n  // TODO: I am actually not sure if spread optimization is helpful. Worth checking\n  // whether we get higher occupancy rates when using it.\n  template <typename U, typename V, typename OnMoveCb>\n  Iterator InsertUniq(U&& key, V&& value, Hash_t key_hash, bool spread, OnMoveCb&& on_move_cb);\n\n  // capture version change in case of insert.\n  // Returns ids of buckets whose version would cross ver_threshold upon insertion of key_hash\n  // into the segment.\n  // Returns UINT16_MAX if segment is full. Otherwise, returns number of touched bucket ids (1 or 2)\n  // if the insertion would happen. The ids are put into bid array that should have at least 2\n  // spaces.\n  template <bool UV = kUseVersion>\n  std::enable_if_t<UV, unsigned> CVCOnInsert(uint64_t ver_threshold, Hash_t key_hash,\n                                             PhysicalBid bid[2]) const;\n\n  // Returns bucket ids whose versions will change as a result of bumping up the item\n  // Can return upto 3 buckets.\n  template <bool UV = kUseVersion>\n  std::enable_if_t<UV, unsigned> CVCOnBump(uint64_t ver_threshold, unsigned bid, unsigned slot,\n                                           Hash_t hash, PhysicalBid result_bid[3]) const;\n\n  // Finds a valid entry going from specified indices up.\n  Iterator FindValidStartingFrom(PhysicalBid bid, unsigned slot) const;\n\n  // Shifts all slots in the bucket right.\n  // Returns true if the last slot was busy and the entry has been deleted.\n  bool ShiftRight(PhysicalBid bid, Hash_t right_hashval) {\n    if (bid >= kBucketNum) {  // Stash\n      constexpr auto kLastSlotMask = 1u << (kSlotNum - 1);\n      if (GetBucket(bid).GetBusy() & kLastSlotMask)\n        RemoveStashReference(bid - kBucketNum, right_hashval);\n    }\n\n    return bucket_[bid].ShiftRight();\n  }\n\n  // Bumps up this entry making it more \"important\" for the eviction policy.\n  template <typename BumpPolicy, typename OnMoveCb>\n  Iterator BumpUp(PhysicalBid bid, SlotId slot, Hash_t key_hash, const BumpPolicy& ev,\n                  OnMoveCb&& cb);\n\n  // Tries to move stash entries back to their normal buckets (exact or neighbour).\n  // Returns number of entries that succeeded to unload.\n  // Important! Affects versions of the moved items and the items in the destination\n  // buckets.\n  template <typename HFunc, typename OnMoveCb> unsigned UnloadStash(HFunc&& hfunc, OnMoveCb&& cb);\n\n  unsigned num_buckets() const {\n    return kBucketNum + kStashBucketNum;\n  }\n\n  uint32_t segment_id() const {\n    return segment_id_;\n  }\n\n  // needed only when DashTable grows its segment table.\n  void set_segment_id(uint32_t new_id) {\n    segment_id_ = new_id;\n  }\n\n private:\n  static_assert(sizeof(Iterator) == 2);\n\n  static LogicalBid HomeIndex(Hash_t hash) {\n    return (hash >> kFingerBits) % kBucketNum;\n  }\n\n  static LogicalBid NextBid(LogicalBid bid) {\n    return bid < kBucketNum - 1 ? bid + 1 : 0;\n  }\n\n  static LogicalBid PrevBid(LogicalBid bid) {\n    return bid ? bid - 1 : kBucketNum - 1;\n  }\n\n  // if own_items is true it means we try to move owned item to probing bucket.\n  // if own_items false it means we try to move non-owned item from probing bucket back to its host.\n  int MoveToOther(bool own_items, unsigned from, unsigned to);\n\n  // dry-run version of MoveToOther.\n  bool CheckIfMovesToOther(bool own_items, unsigned from, unsigned to) const;\n\n  /*both clear this bucket and its neighbor bucket*/\n  void RemoveStashReference(unsigned stash_pos, Hash_t key_hash);\n\n  // returns a valid iterator if succeeded.\n  Iterator TryMoveFromStash(unsigned stash_id, unsigned stash_slot_id, Hash_t key_hash);\n\n  const static unsigned kTotalBuckets = kBucketNum + kStashBucketNum;\n  static_assert(kTotalBuckets < 0xFF);\n\n  Bucket bucket_[kTotalBuckets];\n  uint8_t local_depth_;\n  uint32_t segment_id_;  // segment id in the table.\n  PMR_NS::memory_resource* mr_ = nullptr;\n\n public:\n  static constexpr size_t kBucketSz = sizeof(Bucket);\n  static constexpr size_t kMaxSize = (kBucketNum + kStashBucketNum) * kSlotNum;\n  static constexpr double kTaxSize =\n      (double(sizeof(Segment)) / kMaxSize) - sizeof(Key_t) - sizeof(Value_t);\n\n#ifdef ENABLE_DASH_STATS\n  mutable Stats stats;\n#endif\n};  // Segment\n\nclass DashTableBase {\n public:\n  explicit DashTableBase(uint32_t gd)\n      : unique_segments_(1 << gd), initial_depth_(gd), global_depth_(gd) {\n  }\n\n  DashTableBase(const DashTableBase&) = delete;\n  DashTableBase& operator=(const DashTableBase&) = delete;\n\n  uint32_t unique_segments() const {\n    return unique_segments_;\n  }\n\n  uint16_t depth() const {\n    return global_depth_;\n  }\n\n  size_t size() const {\n    return size_;\n  }\n\n  size_t Empty() const {\n    return size_ == 0;\n  }\n\n protected:\n  uint32_t SegmentId(size_t hash) const {\n    if (global_depth_) {\n      return hash >> (64 - global_depth_);\n    }\n\n    return 0;\n  }\n\n  size_t size_ = 0;\n  uint32_t unique_segments_ = 0, bucket_count_ = 0;\n  uint8_t initial_depth_;\n  uint8_t global_depth_;\n};  // DashTableBase\n\ntemplate <typename KeyType, typename ValueType> class IteratorPair {\n public:\n  IteratorPair(KeyType& k, ValueType& v) : first(k), second(v) {\n  }\n\n  IteratorPair* operator->() {\n    return this;\n  }\n\n  const IteratorPair* operator->() const {\n    return this;\n  }\n\n  KeyType& first;\n  ValueType& second;\n};\n\n// Represents a cursor that points to a bucket in dash table.\n// One major difference with iterator is that the cursor survives dash table resizes and\n// will always point to the most appropriate segment with the same bucket.\n// It uses 40 lsb bits out of 64 assuming that number of segments does not cross 4B.\n// It's a reasonable assumption in shared nothing architecture when we usually have no more than\n// 32GB per CPU. Each segment spawns hundreds of entries so we can not grow segment table\n// to billions.\nclass DashCursor {\n public:\n  explicit DashCursor(uint64_t token = 0) : val_(token) {\n  }\n\n  DashCursor(uint8_t depth, uint32_t seg_id, PhysicalBid bid)\n      : val_((uint64_t(seg_id) << (40 - depth)) | bid) {\n  }\n\n  static DashCursor end() {\n    return DashCursor{};\n  }\n\n  PhysicalBid bucket_id() const {\n    return val_ & 0xFF;\n  }\n\n  // segment_id is padded to the left of 32 bit region:\n  // | segment_id......| bucket_id\n  // 40                8          0\n  // By using depth we take most significant bits of segment_id if depth has decreased\n  // since the cursor has been created, or extend the least significant bits with zeros,\n  // if depth was increased.\n  uint32_t segment_id(uint8_t depth) const {\n    return val_ >> (40 - depth);\n  }\n\n  uint64_t token() const {\n    return val_;\n  }\n\n  explicit operator bool() const {\n    return val_ != 0;\n  }\n\n private:\n  uint64_t val_;\n};\n\n/***********************************************************\n * Implementation section.\n */\n\ntemplate <unsigned NUM_SLOTS> void SlotBitmap<NUM_SLOTS>::SetSlot(unsigned index, bool probe) {\n  if constexpr (SINGLE) {\n    assert(((val_[0].d >> (index + 18)) & 1) == 0);\n    val_[0].d |= (1 << (index + 18));\n    val_[0].d |= (unsigned(probe) << (index + 4));\n\n    assert((val_[0].d & kBitmapLenMask) < NUM_SLOTS);\n    ++val_[0].d;\n    assert(__builtin_popcount(val_[0].d >> 18) == (val_[0].d & kBitmapLenMask));\n  } else {\n    assert(((val_[0].d >> index) & 1) == 0);\n    val_[0].d |= (1u << index);\n    val_[1].d |= (unsigned(probe) << index);\n  }\n}\n\ntemplate <unsigned NUM_SLOTS> void SlotBitmap<NUM_SLOTS>::ClearSlot(unsigned index) {\n  assert(Size() > 0);\n  if constexpr (SINGLE) {\n    uint32_t new_bitmap = val_[0].d & (~(1u << (index + 18))) & (~(1u << (index + 4)));\n    new_bitmap -= 1;\n    val_[0].d = new_bitmap;\n  } else {\n    uint32_t mask = 1u << index;\n    val_[0].d &= ~mask;\n    val_[1].d &= ~mask;\n  }\n}\n\ntemplate <unsigned NUM_SLOTS> bool SlotBitmap<NUM_SLOTS>::ShiftLeft() {\n  constexpr uint32_t kBusyLastSlot = (kAllocMask >> 1) + 1;\n  bool res;\n  if constexpr (SINGLE) {\n    constexpr uint32_t kShlMask = kAllocMask - 1;  // reset lsb\n    res = (val_[0].d & (kBusyLastSlot << 18)) != 0;\n    uint32_t l = (val_[0].d << 1) & (kShlMask << 4);\n    uint32_t p = (val_[0].d << 1) & (kShlMask << 18);\n    val_[0].d = __builtin_popcount(p) | l | p;\n  } else {\n    res = (val_[0].d & kBusyLastSlot) != 0;\n    val_[0].d <<= 1;\n    val_[0].d &= kAllocMask;\n    val_[1].d <<= 1;\n    val_[1].d &= kAllocMask;\n  }\n  return res;\n}\n\ntemplate <unsigned NUM_SLOTS> void SlotBitmap<NUM_SLOTS>::ClearSlots(uint32_t mask) {\n  if (SINGLE) {\n    uint32_t count = __builtin_popcount(mask);\n    assert(count <= (val_[0].d & 0xFF));\n    mask = (mask << 4) | (mask << 18);\n    val_[0].d &= ~mask;\n    val_[0].d -= count;\n  } else {\n    val_[0].d &= ~mask;\n    val_[1].d &= ~mask;\n  }\n}\n\ntemplate <unsigned NUM_SLOTS> void SlotBitmap<NUM_SLOTS>::Swap(unsigned slot_a, unsigned slot_b) {\n  if (slot_a > slot_b)\n    std::swap(slot_a, slot_b);\n\n  if constexpr (SINGLE) {\n    uint32_t a = (val_[0].d << (slot_b - slot_a)) ^ val_[0].d;\n    uint32_t bm = (1 << (slot_b + 4)) | (1 << (slot_b + 18));\n    a &= bm;\n    a |= (a >> (slot_b - slot_a));\n    val_[0].d ^= a;\n  } else {\n    uint32_t a = (val_[0].d << (slot_b - slot_a)) ^ val_[0].d;\n    a &= (1 << slot_b);\n    a |= (a >> (slot_b - slot_a));\n    val_[0].d ^= a;\n\n    a = (val_[1].d << (slot_b - slot_a)) ^ val_[1].d;\n    a &= (1 << slot_b);\n    a |= (a >> (slot_b - slot_a));\n    val_[1].d ^= a;\n  }\n}\n\n/*\n___  _  _ ____ _  _ ____ ___    ___  ____ ____ ____\n|__] |  | |    |_/  |___  |     |__] |__| [__  |___\n|__] |__| |___ | \\_ |___  |     |__] |  | ___] |___\n\n*/\n\ntemplate <unsigned NUM_SLOTS>\nbool BucketBase<NUM_SLOTS>::ClearStash(uint8_t fp, unsigned stash_pos, bool probe) {\n  auto cb = [stash_pos, this](unsigned i, unsigned pos) -> SlotId {\n    if (pos == stash_pos) {\n      stash_busy_ &= (~(1u << i));\n      stash_probe_mask_ &= (~(1u << i));\n      stash_pos_ &= (~(3u << (i * 2)));\n\n      assert(0u == ((stash_pos_ >> (i * 2)) & 3));\n      return 0;\n    }\n    return kNanSlot;\n  };\n\n  std::pair<unsigned, SlotId> res = IterateStash(fp, probe, std::move(cb));\n  return res.second != kNanSlot;\n}\n\ntemplate <unsigned NUM_SLOTS>\nvoid BucketBase<NUM_SLOTS>::SetHash(unsigned slot_id, uint8_t meta_hash, bool probe) {\n  assert(slot_id < finger_arr_.size());\n\n  finger_arr_[slot_id] = meta_hash;\n  slotb_.SetSlot(slot_id, probe);\n}\n\ntemplate <unsigned NUM_SLOTS>\nbool BucketBase<NUM_SLOTS>::SetStash(uint8_t fp, unsigned stash_pos, bool probe) {\n  // stash_busy_ is never 0xFFFFF so it's safe to run __builtin_ctz below.\n  unsigned free_slot = __builtin_ctz(~stash_busy_);\n  if (free_slot >= kStashFpLen)\n    return false;\n\n  stash_arr_[free_slot] = fp;\n  stash_busy_ |= (1u << free_slot);  // set the overflow slot\n\n  // stash_probe_mask_ specifies which records relate to other bucket.\n  stash_probe_mask_ |= (unsigned(probe) << free_slot);\n\n  // 2 bits denote the bucket index.\n  free_slot *= 2;\n  stash_pos_ &= (~(3 << free_slot));       // clear (can be removed?)\n  stash_pos_ |= (stash_pos << free_slot);  // and set\n  return true;\n}\n\ntemplate <unsigned NUM_SLOTS>\nvoid BucketBase<NUM_SLOTS>::SetStashPtr(unsigned stash_pos, uint8_t meta_hash, BucketBase* next) {\n  assert(stash_pos < 4);\n\n  // we use only kStashFpLen fp slots for handling stash buckets,\n  // therefore if all those slots are used we try neighbor (probing bucket) as a fallback to point\n  // to stash buckets. otherwise we increment overflow count.\n  // if overflow is incremented we will need to check all the stash buckets when looking for a key,\n  //  otherwise we can use overflow_index_ to find the the stash bucket efficiently.\n  if (!SetStash(meta_hash, stash_pos, false)) {\n    if (!next->SetStash(meta_hash, stash_pos, true)) {\n      overflow_count_++;\n    }\n  }\n  stash_busy_ |= kStashPresentBit;\n}\n\ntemplate <unsigned NUM_SLOTS>\nunsigned BucketBase<NUM_SLOTS>::UnsetStashPtr(uint8_t fp_hash, unsigned stash_pos,\n                                              BucketBase* next) {\n  /*also needs to ensure that this meta_hash must belongs to other bucket*/\n  bool clear_success = ClearStash(fp_hash, stash_pos, false);\n  unsigned res = 0;\n\n  if (!clear_success) {\n    clear_success = next->ClearStash(fp_hash, stash_pos, true);\n    res += clear_success;\n  }\n\n  if (!clear_success) {\n    assert(overflow_count_ > 0);\n    overflow_count_--;\n  }\n\n  // kStashPresentBit helps with summarizing all the stash states into a single binary flag.\n  // We need it because of the next, though if we make sure to move stash pointers upon split/delete\n  // towards the owner we should not reach the state where mask1 == 0 but mask2 &\n  // next->stash_probe_mask_ != 0.\n  unsigned mask1 = stash_busy_ & (kStashPresentBit - 1);\n  unsigned mask2 = next->stash_busy_ & (kStashPresentBit - 1);\n\n  if (((mask1 & (~stash_probe_mask_)) == 0) && (overflow_count_ == 0) &&\n      ((mask2 & next->stash_probe_mask_) == 0)) {\n    stash_busy_ &= ~kStashPresentBit;\n  }\n\n  return res;\n}\n\n#ifdef __s390x__\ntemplate <unsigned NUM_SLOTS> uint32_t BucketBase<NUM_SLOTS>::CompareFP(uint8_t fp) const {\n  static_assert(FpArray{}.size() <= 16);\n  vector unsigned char v1;\n\n  // Replicate 16 times fp to key_data.\n  for (int i = 0; i < 16; i++) {\n    v1[i] = fp;\n  }\n\n  // Loads 16 bytes of src into seg_data.\n  vector unsigned char v2 = vec_load_len(finger_arr_.data(), 16);\n\n  // compare 1-byte vectors seg_data and key_data, dst[i] := ( a[i] == b[i] ) ? 0xFF : 0.\n  vector bool char rv_mask = vec_cmpeq(v1, v2);\n\n  // collapses 16 msb bits from each byte in rv_mask into mask.\n  int mask = 0;\n  for (int i = 0; i < 16; i++) {\n    if (rv_mask[i]) {\n      mask |= 1 << i;\n    }\n  }\n\n  return mask;\n}\n#else\ntemplate <unsigned NUM_SLOTS> uint32_t BucketBase<NUM_SLOTS>::CompareFP(uint8_t fp) const {\n  static_assert(FpArray{}.size() <= 16);\n\n  // Replicate 16 times fp to key_data.\n  const __m128i key_data = _mm_set1_epi8(fp);\n\n  // Loads 16 bytes of src into seg_data.\n  __m128i seg_data = mm_loadu_si128(reinterpret_cast<const __m128i*>(finger_arr_.data()));\n\n  // compare 16-byte vectors seg_data and key_data, dst[i] := ( a[i] == b[i] ) ? 0xFF : 0.\n  __m128i rv_mask = _mm_cmpeq_epi8(seg_data, key_data);\n\n  // collapses 16 msb bits from each byte in rv_mask into mask.\n  int mask = _mm_movemask_epi8(rv_mask);\n\n  // Note: Last 2 operations can be combined in skylake with _mm_cmpeq_epi8_mask.\n  return mask;\n}\n#endif\n\n// Bucket slot array goes from left to right: [x, x, ...]\n// Shift right vacates the first slot on the left by shifting all the elements right and\n// possibly deleting the last one on the right.\ntemplate <unsigned NUM_SLOTS> bool BucketBase<NUM_SLOTS>::ShiftRight() {\n  for (int i = NUM_SLOTS - 1; i > 0; --i) {\n    finger_arr_[i] = finger_arr_[i - 1];\n  }\n\n  // confusing but correct - slot bit mask LSB corresponds to left part of slot array.\n  // therefore, we shift left slot mask.\n  bool res = slotb_.ShiftLeft();\n  assert(slotb_.FindEmptySlot() == 0);\n  return res;\n}\n\ntemplate <unsigned NUM_SLOTS>\ntemplate <typename F>\nauto BucketBase<NUM_SLOTS>::IterateStash(uint8_t fp, bool is_probe, F&& func) const\n    -> ::std::pair<unsigned, SlotId> {\n  unsigned om = is_probe ? stash_probe_mask_ : ~stash_probe_mask_;\n  unsigned ob = stash_busy_;\n\n  for (unsigned i = 0; i < kStashFpLen; ++i) {\n    if ((ob & 1) && (stash_arr_[i] == fp) && (om & 1)) {\n      unsigned pos = (stash_pos_ >> (i * 2)) & 3;\n      auto sid = func(i, pos);\n      if (sid != BucketBase::kNanSlot) {\n        return std::pair<unsigned, SlotId>(pos, sid);\n      }\n    }\n    ob >>= 1;\n    om >>= 1;\n  }\n  return {0, BucketBase::kNanSlot};\n}\n\ntemplate <unsigned NUM_SLOTS> void VersionedBB<NUM_SLOTS>::SetVersion(uint64_t version) {\n  absl::little_endian::Store64(version_, version);\n}\n\n/*\n____ ____ ____ _  _ ____ _  _ ___\n[__  |___ | __ |\\/| |___ |\\ |  |\n___] |___ |__] |  | |___ | \\|  |\n\n*/\n\n// for clang ignore -Wunused-lambda-capture\n#ifdef __clang__\n#pragma clang diagnostic ignored \"-Wunused-lambda-capture\"\n#endif\n\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <typename Pred>\nauto Segment<Key, Value, Policy>::Bucket::FindByFp(uint8_t fp_hash, bool probe, Pred&& pred) const\n    -> SlotId {\n  unsigned mask = this->Find(fp_hash, probe);\n  if (!mask)\n    return kNanSlot;\n\n  unsigned delta = __builtin_ctz(mask);\n  mask >>= delta;\n  for (unsigned i = delta; i < kSlotNum; ++i) {\n    // Filterable just by key\n    if constexpr (std::is_invocable_v<Pred, const Key_t&>) {\n      if ((mask & 1) && pred(key[i]))\n        return i;\n    }\n\n    // Filterable by key and value\n    if constexpr (std::is_invocable_v<Pred, const Key_t&, const Value_t&>) {\n      if ((mask & 1) && pred(key[i], value[i]))\n        return i;\n    }\n\n    mask >>= 1;\n  };\n\n  return kNanSlot;\n}\n\ntemplate <typename Key, typename Value, typename Policy>\nbool Segment<Key, Value, Policy>::Bucket::ShiftRight() {\n  bool res = BucketType::ShiftRight();\n  for (int i = kSlotNum - 1; i > 0; i--) {\n    std::swap(key[i], key[i - 1]);\n    std::swap(value[i], value[i - 1]);\n  }\n  return res;\n}\n\n// stash_pos is index of the stash bucket, in the range of [0, STASH_BUCKET_NUM).\ntemplate <typename Key, typename Value, typename Policy>\nvoid Segment<Key, Value, Policy>::RemoveStashReference(unsigned stash_pos, Hash_t key_hash) {\n  LogicalBid y = HomeIndex(key_hash);\n  uint8_t fp_hash = key_hash & kFpMask;\n  auto* target = &bucket_[y];\n  auto* next = &bucket_[NextBid(y)];\n\n  target->UnsetStashPtr(fp_hash, stash_pos, next);\n}\n\ntemplate <typename Key, typename Value, typename Policy>\nauto Segment<Key, Value, Policy>::TryMoveFromStash(unsigned stash_id, unsigned stash_slot_id,\n                                                   Hash_t key_hash) -> Iterator {\n  LogicalBid bid = HomeIndex(key_hash);\n  uint8_t hash_fp = key_hash & kFpMask;\n  PhysicalBid stash_bid = kBucketNum + stash_id;\n  auto& key = Key(stash_bid, stash_slot_id);\n  auto& value = Value(stash_bid, stash_slot_id);\n\n  int reg_slot = bucket_[bid].TryInsertToBucket(std::forward<Key_t>(key),\n                                                std::forward<Value_t>(value), hash_fp, false);\n\n  if (reg_slot < 0) {\n    bid = NextBid(bid);\n    reg_slot = bucket_[bid].TryInsertToBucket(std::forward<Key_t>(key),\n                                              std::forward<Value_t>(value), hash_fp, true);\n  }\n\n  if (reg_slot >= 0) {\n    if constexpr (kUseVersion) {\n      // We maintain the invariant for the physical bucket by updating the version when\n      // the entries move between buckets.\n      uint64_t ver = bucket_[stash_bid].GetVersion();\n      bucket_[bid].UpdateVersion(ver);\n    }\n    RemoveStashReference(stash_id, key_hash);\n    return Iterator{bid, SlotId(reg_slot)};\n  }\n\n  return Iterator{};\n}\n\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <typename U, typename V, typename Pred, typename OnMoveCb>\nauto Segment<Key, Value, Policy>::Insert(U&& key, V&& value, Hash_t key_hash, Pred&& pred,\n                                         OnMoveCb&& on_move_cb) -> std::pair<Iterator, bool> {\n  Iterator it = FindIt(key_hash, pred);\n  if (it.found()) {\n    return std::make_pair(it, false); /* duplicate insert*/\n  }\n\n  it = InsertUniq(std::forward<U>(key), std::forward<V>(value), key_hash, true,\n                  std::forward<OnMoveCb>(on_move_cb));\n\n  return std::make_pair(it, it.found());\n}\n\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <typename Pred>\nauto Segment<Key, Value, Policy>::FindIt(Hash_t key_hash, Pred&& pred) const -> Iterator {\n  LogicalBid bidx = HomeIndex(key_hash);\n  const Bucket& target = bucket_[bidx];\n\n  // It helps a bit (10% on my home machine) and more importantly, it does not hurt\n  // since we are going to access this memory in a bit.\n  __builtin_prefetch(&target);\n\n  uint8_t fp_hash = key_hash & kFpMask;\n  SlotId sid = target.FindByFp(fp_hash, false, pred);\n  if (sid != BucketType::kNanSlot) {\n    return Iterator{bidx, sid};\n  }\n\n  LogicalBid nid = NextBid(bidx);\n  const Bucket& probe = GetBucket(nid);\n\n  sid = probe.FindByFp(fp_hash, true, pred);\n\n#ifdef ENABLE_DASH_STATS\n  stats.neighbour_probes++;\n#endif\n\n  if (sid != BucketType::kNanSlot) {\n    return Iterator{nid, sid};\n  }\n\n  if (!target.HasStash()) {\n    return Iterator{};\n  }\n\n  auto stash_cb = [&](unsigned overflow_index, PhysicalBid pos) -> SlotId {\n    assert(pos < kStashBucketNum);\n\n    pos += kBucketNum;\n    const Bucket& bucket = bucket_[pos];\n    return bucket.FindByFp(fp_hash, false, pred);\n  };\n\n  if (target.HasStashOverflow()) {\n#ifdef ENABLE_DASH_STATS\n    stats.stash_overflow_probes++;\n#endif\n\n    for (unsigned i = 0; i < kStashBucketNum; ++i) {\n      auto sid = stash_cb(0, i);\n      if (sid != BucketType::kNanSlot) {\n        return Iterator{PhysicalBid(kBucketNum + i), sid};\n      }\n    }\n\n    // We exit because we searched through all stash buckets anyway, no need to use overflow fps.\n    return Iterator{};\n  }\n\n#ifdef ENABLE_DASH_STATS\n  stats.stash_probes++;\n#endif\n\n  auto stash_res = target.IterateStash(fp_hash, false, stash_cb);\n  if (stash_res.second != BucketType::kNanSlot) {\n    return Iterator{PhysicalBid(kBucketNum + stash_res.first), stash_res.second};\n  }\n\n  stash_res = probe.IterateStash(fp_hash, true, stash_cb);\n  if (stash_res.second != BucketType::kNanSlot) {\n    return Iterator{PhysicalBid(kBucketNum + stash_res.first), stash_res.second};\n  }\n  return Iterator{};\n}\n\ntemplate <typename Key, typename Value, typename Policy>\nvoid Segment<Key, Value, Policy>::Prefetch(Hash_t key_hash) const {\n  LogicalBid bidx = HomeIndex(key_hash);\n  const Bucket& target = bucket_[bidx];\n\n  // Prefetch the home bucket that might hold the key with high probability.\n  __builtin_prefetch(&target, 0, 1);\n}\n\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <typename Cb>\nvoid Segment<Key, Value, Policy>::TraverseAll(Cb&& cb) const {\n  for (uint8_t i = 0; i < kTotalBuckets; ++i) {\n    bucket_[i].ForEachSlot([&](auto*, SlotId slot, bool) { cb(Iterator{i, slot}); });\n  }\n}\n\ntemplate <typename Key, typename Value, typename Policy> void Segment<Key, Value, Policy>::Clear() {\n  for (unsigned i = 0; i < kTotalBuckets; ++i) {\n    bucket_[i].Clear();\n    bucket_[i].ClearStashPtrs();\n  }\n}\n\ntemplate <typename Key, typename Value, typename Policy>\nvoid Segment<Key, Value, Policy>::Delete(const Iterator& it, Hash_t key_hash) {\n  assert(it.found());\n\n  auto& b = bucket_[it.index];\n\n  if (it.index >= kBucketNum) {\n    RemoveStashReference(it.index - kBucketNum, key_hash);\n  }\n\n  b.Delete(it.slot);\n}\n\n// Split items from the left segment to the right during the growth phase.\n// right segment will have all the items with lsb at local_depth ==1 .\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <typename HFunc, typename MoveCb>\nvoid Segment<Key, Value, Policy>::Split(HFunc&& hfn, Segment* dest_right, MoveCb&& on_move_cb) {\n  ++local_depth_;\n  dest_right->local_depth_ = local_depth_;\n\n  // versioning does not work when entries move across buckets.\n  // we need to setup rules on how we do that\n  // do_versioning();\n  auto is_mine = [this](Hash_t hash) { return (hash >> (64 - local_depth_) & 1) == 0; };\n\n  auto update_version = [dest_right](const Bucket& src, PhysicalBid dest_id) {\n    (void)dest_id;\n    if constexpr (kUseVersion) {\n      // Maintaining consistent versioning.\n      uint64_t ver = src.GetVersion();\n      dest_right->bucket_[dest_id].UpdateVersion(ver);\n    }\n  };\n\n  for (unsigned i = 0; i < kBucketNum; ++i) {\n    uint32_t invalid_mask = 0;\n\n    auto cb = [&](auto* bucket, unsigned slot, bool probe) {\n      auto& key = bucket->key[slot];\n      Hash_t hash = hfn(key);\n\n      // we extract local_depth bits from the left part of the hash. Since we extended local_depth,\n      // we added an additional bit to the right, therefore we need to look at lsb of the extract.\n      if (is_mine(hash))\n        return;  // keep this key in the source\n\n      invalid_mask |= (1u << slot);\n\n      // We pass dummy callback because we are not interested to track movements in the newly\n      // created segment.\n      Iterator it = dest_right->InsertUniq(std::forward<Key_t>(bucket->key[slot]),\n                                           std::forward<Value_t>(bucket->value[slot]), hash, false,\n                                           [](auto&&...) {});\n\n      // we move items residing in a regular bucket to a new segment.\n      // Note 1: in case we are somehow attacked with items that after the split\n      // will go into the same segment, we may have a problem.\n      // It is highly unlikely that this happens with real world data.\n      // Note 2: Dragonfly replication is in fact is such unlikely attack. Since we go over\n      // the source table in a special order (go over all the segments for bucket 0,\n      // then for all the segments for bucket 1 etc), what happens is that the rdb stream is full\n      // of items with the same bucket id, say 0. Lots of items will go to the initial segment\n      // into bucket 0, which will become full, then bucket 1 will get full,\n      // and then the 4 stash buckets in the segment. Then the segment will have to split even\n      // though only 6 buckets are used just because of this\n      // extreme skewness of keys distribution. When a segment splits, we will still\n      // have items going into bucket 0 in the new segment. To alleviate this effect we usually\n      // reserve dash table to have enough segments during full sync to avoid handling those\n      // ill-formed splits.\n      // TODO: To protect ourselves again such situations we should use random seed\n      // for our dash hash function, thus avoiding the case where someone, on purpose or due to\n      // selective bias will be able to hit our dashtable with items with the same bucket id.\n      assert(it.found());\n      update_version(*bucket, it.index);\n      on_move_cb(segment_id_, i, dest_right->segment_id_, it.index);\n    };\n\n    bucket_[i].ForEachSlot(std::move(cb));\n    bucket_[i].ClearSlots(invalid_mask);\n  }\n\n  for (unsigned i = 0; i < kStashBucketNum; ++i) {\n    uint32_t invalid_mask = 0;\n    PhysicalBid bid = kBucketNum + i;\n    Bucket& stash = bucket_[bid];\n\n    auto cb = [&](auto* bucket, unsigned slot, bool probe) {\n      auto& key = bucket->key[slot];\n      Hash_t hash = hfn(key);\n\n      if (is_mine(hash)) {\n        // If the entry stays in the same segment we try to unload it back to the regular bucket.\n        Iterator it = TryMoveFromStash(i, slot, hash);\n        if (it.found()) {\n          invalid_mask |= (1u << slot);\n          on_move_cb(segment_id_, i, segment_id_, it.index);\n        }\n\n        return;\n      }\n\n      invalid_mask |= (1u << slot);\n      auto it = dest_right->InsertUniq(std::forward<Key_t>(bucket->key[slot]),\n                                       std::forward<Value_t>(bucket->value[slot]), hash, false,\n                                       /* not interested in these movements */ [](auto&&...) {});\n      (void)it;\n      assert(it.index != kNanBid);\n      update_version(*bucket, it.index);\n      on_move_cb(segment_id_, i, dest_right->segment_id_, it.index);\n\n      // Remove stash reference pointing to stash bucket i.\n      RemoveStashReference(i, hash);\n    };\n\n    stash.ForEachSlot(std::move(cb));\n    stash.ClearSlots(invalid_mask);\n  }\n}\n\ntemplate <typename Key, typename Value, typename Policy>\nint Segment<Key, Value, Policy>::MoveToOther(bool own_items, unsigned from_bid, unsigned to_bid) {\n  assert(from_bid < kBucketNum && to_bid < kBucketNum);\n  auto& src = bucket_[from_bid];\n  uint32_t mask = src.GetProbe(!own_items);\n  if (mask == 0) {\n    return -1;\n  }\n\n  int src_slot = __builtin_ctz(mask);\n  int dst_slot = bucket_[to_bid].TryInsertToBucket(std::forward<Key_t>(src.key[src_slot]),\n                                                   std::forward<Value_t>(src.value[src_slot]),\n                                                   src.Fp(src_slot), own_items);\n  if (dst_slot < 0)\n    return -1;\n\n  // We never decrease the version of the entry.\n  if constexpr (kUseVersion) {\n    auto& dst = bucket_[to_bid];\n    dst.UpdateVersion(src.GetVersion());\n  }\n\n  src.Delete(src_slot);\n\n  return src_slot;\n}\n\ntemplate <typename Key, typename Value, typename Policy>\nbool Segment<Key, Value, Policy>::CheckIfMovesToOther(bool own_items, unsigned from,\n                                                      unsigned to) const {\n  const auto& src = GetBucket(from);\n  uint32_t mask = src.GetProbe(!own_items);\n  if (mask == 0) {\n    return false;\n  }\n\n  const auto& dest = GetBucket(to);\n  return dest.IsFull() ? false : true;\n}\n\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <typename U, typename V, typename OnMoveCb>\nauto Segment<Key, Value, Policy>::InsertUniq(U&& key, V&& value, Hash_t key_hash, bool spread,\n                                             OnMoveCb&& on_move_cb) -> Iterator {\n  const uint8_t bid = HomeIndex(key_hash);\n  const uint8_t nid = NextBid(bid);\n\n  Bucket& target = bucket_[bid];\n  Bucket& neighbor = bucket_[nid];\n  Bucket* insert_first = &target;\n\n  uint8_t meta_hash = key_hash & kFpMask;\n  unsigned ts = target.Size(), ns = neighbor.Size();\n  bool probe = false;\n\n  if (spread && ts > ns) {\n    insert_first = &neighbor;\n    probe = true;\n  }\n\n  int slot = insert_first->TryInsertToBucket(std::forward<U>(key), std::forward<V>(value),\n                                             meta_hash, probe);\n\n  if (slot >= 0) {\n    return Iterator{PhysicalBid(insert_first - bucket_), uint8_t(slot)};\n  }\n\n  if (!spread) {\n    int slot =\n        neighbor.TryInsertToBucket(std::forward<U>(key), std::forward<V>(value), meta_hash, true);\n    if (slot >= 0) {\n      return Iterator{nid, uint8_t(slot)};\n    }\n  }\n\n  int displace_index = MoveToOther(true, nid, NextBid(nid));\n  if (displace_index >= 0) {\n    neighbor.Insert(displace_index, std::forward<U>(key), std::forward<V>(value), meta_hash, true);\n    on_move_cb(segment_id_, nid, NextBid(nid));\n    return Iterator{nid, uint8_t(displace_index)};\n  }\n\n  unsigned prev_idx = PrevBid(bid);\n  displace_index = MoveToOther(false, bid, prev_idx);\n  if (displace_index >= 0) {\n    target.Insert(displace_index, std::forward<U>(key), std::forward<V>(value), meta_hash, false);\n    on_move_cb(segment_id_, bid, prev_idx);\n    return Iterator{bid, uint8_t(displace_index)};\n  }\n\n  // we balance stash fill rate  by starting from y % STASH_BUCKET_NUM.\n  for (unsigned i = 0; i < kStashBucketNum; ++i) {\n    unsigned stash_pos = (bid + i) % kStashBucketNum;\n\n    int stash_slot = bucket_[kBucketNum + stash_pos].TryInsertToBucket(\n        std::forward<U>(key), std::forward<V>(value), meta_hash, false);\n    if (stash_slot >= 0) {\n      target.SetStashPtr(stash_pos, meta_hash, &neighbor);\n      return Iterator{PhysicalBid(kBucketNum + stash_pos), uint8_t(stash_slot)};\n    }\n  }\n\n  return Iterator{};\n}\n\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <bool UV>\nstd::enable_if_t<UV, unsigned> Segment<Key, Value, Policy>::CVCOnInsert(uint64_t ver_threshold,\n                                                                        Hash_t key_hash,\n                                                                        uint8_t bid_res[2]) const {\n  const LogicalBid bid = HomeIndex(key_hash);\n  const LogicalBid nid = NextBid(bid);\n\n  const Bucket& target = GetBucket(bid);\n  const Bucket& neighbor = GetBucket(nid);\n  uint8_t first = target.Size() > neighbor.Size() ? nid : bid;\n\n  const Bucket& bfirst = bucket_[first];\n  if (!bfirst.IsFull()) {\n    unsigned cnt = 0;\n    if (!bfirst.IsEmpty() && bfirst.GetVersion() < ver_threshold) {\n      bid_res[cnt++] = first;\n    }\n    return cnt;\n  }\n\n  // both nid and bid are full.\n  const LogicalBid after_next = NextBid(nid);\n\n  auto do_fun = [this, ver_threshold, &bid_res](auto bid, auto nid) {\n    unsigned cnt = 0;\n    // We could tighten the checks here and below because\n    // if nid is less than ver_threshold, than nid won't be affected and won't cross\n    // ver_threshold as well.\n    if (GetBucket(bid).GetVersion() < ver_threshold)\n      bid_res[cnt++] = bid;\n\n    if (!GetBucket(nid).IsEmpty() && GetBucket(nid).GetVersion() < ver_threshold)\n      bid_res[cnt++] = nid;\n    return cnt;\n  };\n\n  if (CheckIfMovesToOther(true, nid, after_next)) {\n    return do_fun(nid, after_next);\n  }\n\n  const uint8_t prev_bid = PrevBid(bid);\n  if (CheckIfMovesToOther(false, bid, prev_bid)) {\n    return do_fun(bid, prev_bid);\n  }\n\n  // Important to repeat exactly the insertion logic of InsertUnique.\n  for (unsigned i = 0; i < kStashBucketNum; ++i) {\n    PhysicalBid stash_bid = kBucketNum + ((bid + i) % kStashBucketNum);\n    const Bucket& stash = GetBucket(stash_bid);\n    if (!stash.IsFull()) {\n      unsigned cnt = 0;\n      if (!stash.IsEmpty() && stash.GetVersion() < ver_threshold)\n        bid_res[cnt++] = stash_bid;\n\n      return cnt;\n    }\n  }\n\n  return UINT16_MAX;\n}\n\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <bool UV>\nstd::enable_if_t<UV, unsigned> Segment<Key, Value, Policy>::CVCOnBump(uint64_t ver_threshold,\n                                                                      unsigned bid, unsigned slot,\n                                                                      Hash_t hash,\n                                                                      uint8_t result_bid[3]) const {\n  if (bid < kBucketNum) {\n    // Right now we do not migrate entries from nid to bid, only from stash to normal buckets.\n    // The reason for this is that CVCOnBump implementation swaps the slots of the same bucket\n    // so there is no further action needed.\n    return 0;\n  }\n\n  // Stash case.\n  // There are three actors (interesting buckets). The stash bucket, the target bucket and its\n  // adjacent bucket (probe). To understand the code below consider the cases in CVCOnBump:\n  // 1. If the bid is not a stash bucket, then just swap the slots of the target.\n  // 2. If there is empty space in target or probe bucket insert the slot there and remove\n  //    it from the stash bucket.\n  // 3. If there is no empty space then we need to swap slots with either the target or the probe\n  //    bucket. Furthermore, if the target or the probe have one of their stash bits reference the\n  //    stash, then the stash bit entry is cleared. In total 2 buckets are modified.\n  // Case 1 is handled by the if statement above and cases 2 and 3 below. We should return via\n  // result_bid all the buckets(with version less than threshold) that CVCOnBump will modify.\n  // Note, that for case 2 & 3 we might return an extra bucket id even though this bucket was not\n  // changed. An example of that is TryMoveFromStash which will first try to insert on the target\n  // bucket and if that fails it will retry with the probe bucket. Since we don't really know\n  // which of the two we insert to we are pesimistic and assume that both of them got modified. I\n  // suspect we could optimize this out by looking at the fingerprints but for now I care about\n  // correctness and returning the correct modified buckets. Besides, we are on a path of updating\n  // the version anyway which will assert that the bucket won't be send again during snapshotting.\n  unsigned result = 0;\n  if (bucket_[bid].GetVersion() < ver_threshold) {\n    result_bid[result++] = bid;\n  }\n  const uint8_t target_bid = HomeIndex(hash);\n  result_bid[result++] = target_bid;\n  const uint8_t probing_bid = NextBid(target_bid);\n  result_bid[result++] = probing_bid;\n\n  return result;\n}\n\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <typename Cb>\nvoid Segment<Key, Value, Policy>::TraverseBucket(PhysicalBid bid, Cb&& cb) {\n  assert(bid < kTotalBuckets);\n\n  const Bucket& b = GetBucket(bid);\n  b.ForEachSlot([&](auto* bucket, uint8_t slot, bool probe) { cb(Iterator{bid, slot}); });\n}\n\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <typename Cb, typename HashFn>\nbool Segment<Key, Value, Policy>::TraverseLogicalBucket(LogicalBid bid, HashFn&& hfun,\n                                                        Cb&& cb) const {\n  assert(bid < kBucketNum);\n\n  const Bucket& b = bucket_[bid];\n  bool found = false;\n  if (b.GetProbe(false)) {  // Check items that this bucket owns.\n    b.ForEachSlot([&](auto* bucket, SlotId slot, bool probe) {\n      if (!probe) {\n        found = true;\n        cb(Iterator{bid, slot});\n      }\n    });\n  }\n\n  uint8_t nid = NextBid(bid);\n  const Bucket& next = GetBucket(nid);\n\n  // check for probing entries in the next bucket, i.e. those that should reside in b.\n  if (next.GetProbe(true)) {\n    next.ForEachSlot([&](auto* bucket, SlotId slot, bool probe) {\n      if (probe) {\n        found = true;\n        assert(HomeIndex(hfun(bucket->key[slot])) == bid);\n        cb(Iterator{nid, slot});\n      }\n    });\n  }\n\n  // Finally go over stash buckets and find those entries that belong to b.\n  if (b.HasStash()) {\n    // do not bother with overflow fps. Just go over all the stash buckets.\n    for (uint8_t j = kBucketNum; j < kTotalBuckets; ++j) {\n      const auto& stashb = bucket_[j];\n      stashb.ForEachSlot([&](auto* bucket, SlotId slot, bool probe) {\n        if (HomeIndex(hfun(bucket->key[slot])) == bid) {\n          found = true;\n          cb(Iterator{j, slot});\n        }\n      });\n    }\n  }\n\n  return found;\n}\n\ntemplate <typename Key, typename Value, typename Policy>\nsize_t Segment<Key, Value, Policy>::SlowSize() const {\n  size_t res = 0;\n  for (unsigned i = 0; i < kTotalBuckets; ++i) {\n    res += bucket_[i].Size();\n  }\n  return res;\n}\n\ntemplate <typename Key, typename Value, typename Policy>\nauto Segment<Key, Value, Policy>::FindValidStartingFrom(PhysicalBid bid, unsigned slot) const\n    -> Iterator {\n  while (bid < kTotalBuckets) {\n    uint32_t mask = bucket_[bid].GetBusy();\n    mask >>= slot;\n    if (mask) {\n      return Iterator(bid, slot + __builtin_ctz(mask));\n    }\n    ++bid;\n    slot = 0;\n  }\n  return Iterator{};\n}\n\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <typename BumpPolicy, typename OnMoveCb>\nauto Segment<Key, Value, Policy>::BumpUp(uint8_t bid, SlotId slot, Hash_t key_hash,\n                                         const BumpPolicy& bp, OnMoveCb&& on_move_cb) -> Iterator {\n  auto& from = GetBucket(bid);\n\n  if (!bp.CanBump(from.key[slot])) {\n    return Iterator{bid, slot};\n  }\n\n  if (bid < kBucketNum) {\n    // non stash case.\n    if (slot > 0 && bp.CanBump(from.key[slot - 1])) {\n      from.Swap(slot - 1, slot);\n      return Iterator{bid, uint8_t(slot - 1)};\n    }\n    // TODO: We could promote further, by swapping probing bucket with its previous one.\n    return Iterator{bid, slot};\n  }\n\n  // stash bucket\n  // We swap the item with the item in the \"normal\" bucket in the last slot.\n  unsigned stash_pos = bid - kBucketNum;\n\n  // If we have an empty space for some reason just unload the stash entry.\n  if (Iterator it = TryMoveFromStash(stash_pos, slot, key_hash); it.found()) {\n    // TryMoveFromStash handles versions internally.\n    from.Delete(slot);\n    on_move_cb(segment_id_, bid, it.index);\n    return it;\n  }\n\n  uint8_t target_bid = HomeIndex(key_hash);\n  uint8_t nid = NextBid(target_bid);\n  uint8_t fp_hash = key_hash & kFpMask;\n  assert(fp_hash == from.Fp(slot));\n\n  // determine which bucket one we gonna swap.\n  // we swap with the bucket the references the stash entry, not necessary its owning\n  // bucket.\n  auto& target = bucket_[target_bid];\n  auto& next = bucket_[nid];\n\n  // bucket_offs - 0 if exact bucket, 1 if neighbour\n  unsigned bucket_offs = target.UnsetStashPtr(fp_hash, stash_pos, &next);\n  uint8_t swap_bid = (target_bid + bucket_offs) % kBucketNum;\n  auto& swapb = bucket_[swap_bid];\n\n  constexpr unsigned kLastSlot = kSlotNum - 1;\n  assert(swapb.GetBusy() & (1 << kLastSlot));\n\n  // Don't move sticky items back to the stash because they're not evictable\n  // TODO: search for first swappable item\n  if (!bp.CanBump(swapb.key[kLastSlot])) {\n    target.SetStashPtr(stash_pos, fp_hash, &next);\n    return Iterator{bid, slot};\n  }\n\n  uint8_t swap_fp = swapb.Fp(kLastSlot);\n\n  // is_probing for the existing entry in swapb. It's unrelated to bucket_offs,\n  // i.e. it could be true even if bucket_offs is 0.\n  bool is_probing = swapb.GetProbe(true) & (1 << kLastSlot);\n\n  // swap keys, values and fps. update slots meta.\n  std::swap(from.key[slot], swapb.key[kLastSlot]);\n  std::swap(from.value[slot], swapb.value[kLastSlot]);\n  from.Delete(slot);\n  from.SetHash(slot, swap_fp, false);\n\n  swapb.Delete(kLastSlot);\n  swapb.SetHash(kLastSlot, fp_hash, bucket_offs == 1);\n\n  // update versions.\n  if constexpr (kUseVersion) {\n    uint64_t from_ver = from.GetVersion();\n    uint64_t swap_ver = swapb.GetVersion();\n    if (from_ver < swap_ver) {\n      from.SetVersion(swap_ver);\n    } else {\n      swapb.SetVersion(from_ver);\n    }\n  }\n\n  // update ptr for swapped items\n  if (is_probing) {\n    LogicalBid prev_bid = PrevBid(swap_bid);\n    auto& prevb = bucket_[prev_bid];\n    prevb.SetStashPtr(stash_pos, swap_fp, &swapb);\n  } else {\n    // stash_ptr resides in the current or the next bucket.\n    LogicalBid next_bid = NextBid(swap_bid);\n    swapb.SetStashPtr(stash_pos, swap_fp, bucket_ + next_bid);\n  }\n\n  on_move_cb(segment_id_, bid, swap_bid);\n  on_move_cb(segment_id_, swap_bid, bid);\n  return Iterator{swap_bid, kLastSlot};\n}\n\ntemplate <typename Key, typename Value, typename Policy>\ntemplate <typename HFunc, typename OnMoveCb>\nunsigned Segment<Key, Value, Policy>::UnloadStash(HFunc&& hfunc, OnMoveCb&& on_move_cb) {\n  unsigned moved = 0;\n\n  for (unsigned i = 0; i < kStashBucketNum; ++i) {\n    unsigned bid = kBucketNum + i;\n    Bucket& stash = bucket_[bid];\n    uint32_t invalid_mask = 0;\n\n    auto cb = [&](auto* bucket, unsigned slot, bool probe) {\n      auto& key = bucket->key[slot];\n      Hash_t hash = hfunc(key);\n      Iterator res = TryMoveFromStash(i, slot, hash);\n      if (res.found()) {\n        ++moved;\n        invalid_mask |= (1u << slot);\n        on_move_cb(segment_id_, i, res.index);\n      }\n    };\n\n    stash.ForEachSlot(cb);\n    stash.ClearSlots(invalid_mask);\n  }\n\n  return moved;\n}\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/dash_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <cstdint>\n#define ENABLE_DASH_STATS\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/strings/str_cat.h>\n#include <mimalloc.h>\n\n#include <functional>\n#include <set>\n\n#include \"base/gtest.h\"\n#include \"base/hash.h\"\n#include \"base/logging.h\"\n#include \"base/zipf_gen.h\"\n#include \"core/dash.h\"\n#include \"io/file.h\"\n#include \"io/line_reader.h\"\n\nextern \"C\" {\n#include \"redis/dict.h\"\n#include \"redis/sds.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#if defined(__clang__)\n#pragma clang diagnostic ignored \"-Wunused-const-variable\"\n#endif\n\nnamespace dfly {\n\nstatic uint64_t callbackHash(const void* key) {\n  return XXH64(&key, sizeof(key), 0);\n}\n\ntemplate <typename K> auto EqTo(const K& key) {\n  return [&key](const auto& probe) { return key == probe; };\n}\n\nstatic dictType IntDict = {callbackHash, NULL, NULL, NULL, NULL, NULL, NULL};\n\nstatic uint64_t dictSdsHash(const void* key) {\n  return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));\n}\n\nstatic int dictSdsKeyCompare(dict*, const void* key1, const void* key2) {\n  int l1, l2;\n\n  l1 = sdslen((sds)key1);\n  l2 = sdslen((sds)key2);\n  if (l1 != l2)\n    return 0;\n  return memcmp(key1, key2, l1) == 0;\n}\n\nstatic dictType SdsDict = {\n    dictSdsHash,       /* hash function */\n    NULL,              /* key dup */\n    NULL,              /* val dup */\n    dictSdsKeyCompare, /* key compare */\n    NULL,\n    // dictSdsDestructor, /* key destructor */\n    NULL, /* val destructor */\n    NULL,\n};\n\nusing namespace std;\nstruct Buf24 {\n  char buf[20];\n  uint32_t index;\n\n  Buf24(uint32_t i = 0) : index(i) {\n  }\n};\n\nstruct BasicDashPolicy {\n  enum { kSlotNum = 12, kBucketNum = 64 };\n  static constexpr bool kUseVersion = false;\n\n  template <typename U> static void DestroyValue(const U&) {\n  }\n  template <typename U> static void DestroyKey(const U&) {\n  }\n\n  template <typename U, typename V> static bool Equal(U&& u, V&& v) {\n    return u == v;\n  }\n};\nstruct UInt64Policy : public BasicDashPolicy {\n  static uint64_t HashFn(uint64_t v) {\n    return XXH3_64bits(&v, sizeof(v));\n  }\n};\n\nclass CappedResource final : public PMR_NS::memory_resource {\n public:\n  explicit CappedResource(size_t cap) : cap_(cap) {\n  }\n\n  size_t used() const {\n    return used_;\n  }\n\n private:\n  void* do_allocate(std::size_t size, std::size_t align) {\n    if (used_ + size > cap_)\n      throw std::bad_alloc{};\n\n    void* res = PMR_NS::get_default_resource()->allocate(size, align);\n    used_ += size;\n\n    return res;\n  }\n\n  void do_deallocate(void* ptr, std::size_t size, std::size_t align) {\n    used_ -= size;\n    PMR_NS::get_default_resource()->deallocate(ptr, size, align);\n  }\n\n  bool do_is_equal(const PMR_NS::memory_resource& o) const noexcept {\n    return this == &o;\n  }\n\n  size_t cap_;\n  size_t used_ = 0;\n};\n\nusing Segment = detail::Segment<uint64_t, Buf24>;\nusing Dash64 = DashTable<uint64_t, uint64_t, UInt64Policy>;\n\nstruct RelaxedBumpPolicy {\n  bool CanBump(uint64_t key) const {\n    return true;\n  }\n  void OnMove(Dash64::Cursor source, Dash64::Cursor dest) {\n  }\n};\n\nconstexpr auto kSegTax = Segment::kTaxSize;\nconstexpr size_t kMaxSize = Segment::kMaxSize;\nconstexpr size_t kSegSize = sizeof(Segment);\n\nclass DashTest : public testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    init_zmalloc_threadlocal(mi_heap_get_backing());\n  }\n\n  DashTest() : segment_(1, 0, PMR_NS::get_default_resource()) {\n  }\n\n  bool Find(Segment::Key_t key, Segment::Value_t* val) const {\n    uint64_t hash = dt_.DoHash(key);\n\n    auto it = segment_.FindIt(hash, EqTo(key));\n    if (!it.found())\n      return false;\n    *val = segment_.Value(it.index, it.slot);\n    return true;\n  }\n\n  bool Contains(Segment::Key_t key) const {\n    uint64_t hash = dt_.DoHash(key);\n    auto it = segment_.FindIt(hash, EqTo(key));\n    return it.found();\n  }\n\n  set<Segment::Key_t> FillSegment(unsigned bid);\n\n  Segment segment_;\n  Dash64 dt_;\n};\n\nset<Segment::Key_t> DashTest::FillSegment(unsigned bid) {\n  std::set<Segment::Key_t> keys;\n  for (Segment::Key_t key = 0; key < 1000000u; ++key) {\n    uint64_t hash = dt_.DoHash(key);\n    unsigned bi = (hash >> 8) % Segment::kBucketNum;\n    if (bi != bid)\n      continue;\n    uint8_t fp = hash & 0xFF;\n    if (fp > 2)  // limit fps considerably to find interesting cases.\n      continue;\n    auto [it, success] = segment_.Insert(key, 0, hash, EqTo(key), [](auto&&...) {});\n    if (!success) {\n      LOG(INFO) << \"Stopped at \" << key;\n      break;\n    }\n    CHECK(it.found());\n    keys.insert(key);\n  }\n\n  return keys;\n}\n\nTEST_F(DashTest, Hash) {\n  for (uint64_t i = 0; i < 100; ++i) {\n    uint64_t hash = dt_.DoHash(i);\n    if (hash >> 63) {\n      VLOG(1) << \"i \" << i << \", Hash \" << hash;\n    }\n  }\n}\n\nTEST_F(DashTest, SlotBitmap) {\n  detail::SlotBitmap<14> slot;\n  slot.SetSlot(1, true);\n  slot.SetSlot(5, false);\n  EXPECT_EQ(34, slot.GetBusy());\n  EXPECT_EQ(2, slot.GetProbe(true));\n}\n\nTEST_F(DashTest, Basic) {\n  Segment::Key_t key = 0;\n  Segment::Value_t val = 0;\n  uint64_t hash = dt_.DoHash(key);\n\n  EXPECT_TRUE(segment_.Insert(key, val, hash, EqTo(key), [](auto&&...) {}).second);\n  auto [it, res] = segment_.Insert(key, val, hash, EqTo(key), [](auto&&...) {});\n  EXPECT_TRUE(!res && it.found());\n\n  EXPECT_TRUE(Find(key, &val));\n  EXPECT_EQ(0, val.index);\n\n  EXPECT_FALSE(Find(1, &val));\n  EXPECT_EQ(1, segment_.SlowSize());\n\n  unsigned has_called = 0;\n  auto cb = [&](const auto& it) { ++has_called; };\n\n  auto hfun = &UInt64Policy::HashFn;\n\n  auto cursor = segment_.TraverseLogicalBucket((hash >> 8) % Segment::kBucketNum, hfun, cb);\n  ASSERT_EQ(1, has_called);\n  ASSERT_EQ(0, segment_.TraverseLogicalBucket(cursor, hfun, cb));\n  ASSERT_EQ(1, has_called);\n  EXPECT_EQ(0, segment_.GetVersion(0));\n}\n\nTEST_F(DashTest, Segment) {\n  std::unique_ptr<Segment> seg(new Segment(1, 0, PMR_NS::get_default_resource()));\n\n#ifndef __APPLE__\n  LOG(INFO) << \"Segment size \" << sizeof(Segment)\n            << \" malloc size: \" << malloc_usable_size(seg.get());\n#endif\n\n  set<Segment::Key_t> keys = FillSegment(0);\n\n  EXPECT_TRUE(segment_.GetBucket(0).IsFull() && segment_.GetBucket(1).IsFull());\n  for (size_t i = 2; i < Segment::kBucketNum; ++i) {\n    EXPECT_EQ(0, segment_.GetBucket(i).Size());\n  }\n  EXPECT_EQ(6 * Segment::kSlotNum, keys.size());\n  EXPECT_EQ(6 * Segment::kSlotNum, segment_.SlowSize());\n\n  auto hfun = &UInt64Policy::HashFn;\n  unsigned has_called = 0;\n\n  auto cb = [&](const Segment::Iterator& it) {\n    ++has_called;\n    ASSERT_EQ(1, keys.count(segment_.Key(it.index, it.slot)));\n  };\n\n  segment_.TraverseAll(cb);\n  ASSERT_EQ(keys.size(), has_called);\n\n  ASSERT_TRUE(segment_.GetBucket(Segment::kBucketNum).IsFull());\n  std::array<uint64_t, Segment::kSlotNum * 2> arr;\n  uint64_t* next = arr.begin();\n  for (unsigned i = Segment::kBucketNum; i < Segment::kBucketNum + 2; ++i) {\n    const auto* k = &segment_.Key(i, 0);\n    next = std::copy(k, k + Segment::kSlotNum, next);\n  }\n\n  for (auto k : arr) {\n    auto hash = hfun(k);\n    auto it = segment_.FindIt(hash, [&k](const auto& probe) { return k == probe; });\n    ASSERT_TRUE(it.found());\n    segment_.Delete(it, hash);\n  }\n  EXPECT_EQ(4 * Segment::kSlotNum, segment_.SlowSize());\n  ASSERT_FALSE(Contains(arr.front()));\n}\n\nTEST_F(DashTest, SegmentFull) {\n  std::equal_to<> eq;\n  for (Segment::Key_t key = 8000; key < 15000u; ++key) {\n    uint64_t hash = dt_.DoHash(key);\n    bool res = segment_.Insert(key, 0, hash, eq, [](auto&&...) {}).second;\n    if (!res) {\n      LOG(INFO) << \"Stopped at \" << key;\n      break;\n    }\n  }\n  EXPECT_GT(segment_.SlowSize(), Segment::capacity() * 0.85);\n\n  LOG(INFO) << \"Utilization \" << double(segment_.SlowSize()) / Segment::capacity()\n            << \" num probing buckets: \" << segment_.NumProbingBuckets();\n\n  LOG(INFO) << \"NB: \" << segment_.stats.neighbour_probes << \" SP: \" << segment_.stats.stash_probes\n            << \" SOP: \" << segment_.stats.stash_overflow_probes;\n  segment_.stats.neighbour_probes = segment_.stats.stash_overflow_probes =\n      segment_.stats.stash_probes = 0;\n  for (Segment::Key_t key = 0; key < 10000u; ++key) {\n    Contains(key);\n  }\n  LOG(INFO) << segment_.stats.neighbour_probes << \" \" << segment_.stats.stash_probes << \" \"\n            << segment_.stats.stash_overflow_probes;\n\n  uint32_t busy = segment_.GetBucket(0).GetBusy();\n  uint32_t probe = segment_.GetBucket(0).GetProbe(true);\n\n  EXPECT_EQ((1 << 12) - 1, busy);  // Size 12\n  EXPECT_EQ(539, probe);           // verified by running since the test is deterministic.\n\n  unsigned keys[12] = {8045, 8085, 8217, 8330, 8337, 8381, 8432, 8506, 8587, 8605, 8612, 8725};\n  for (unsigned i = 0; i < 12; ++i) {\n    ASSERT_EQ(keys[i], segment_.Key(0, i));\n  }\n}\n\nTEST_F(DashTest, FirstStash) {\n  constexpr unsigned kRegularCapacity = Segment::kBucketNum * Segment::kSlotNum;\n  unsigned less_seventy = 0;\n  for (unsigned j = 0; j < 100; ++j) {\n    unsigned num_items = 0;\n    for (unsigned i = 0; i < 1000; ++i) {\n      uint64_t key = i + j * 2000;\n      uint64_t hash = dt_.DoHash(key);\n      auto [it, inserted] = segment_.Insert(key, 0, hash, equal_to<>{}, [](auto&&...) {});\n      ASSERT_TRUE(inserted);\n      if (it.index >= Segment::kBucketNum) {  // stash iterator\n        break;\n      }\n      ++num_items;\n    }\n    segment_.Clear();\n\n    // With high probability, we can expect 66% of the keys added without stashes.\n    ASSERT_GT(num_items, kRegularCapacity * 0.66);\n    if (num_items < kRegularCapacity * 0.7) {\n      ++less_seventy;\n    }\n  }\n  LOG(INFO) << \"Less than 70% of keys in regular buckets: \" << less_seventy;\n}\n\nTEST_F(DashTest, Split) {\n  // fills segment with maximum keys that must reside in bucket id 0.\n  set<Segment::Key_t> keys = FillSegment(0);\n  Segment::Value_t val;\n  Segment s2{2, 0, PMR_NS::get_default_resource()};  // segment with local depth 2.\n\n  segment_.Split(&UInt64Policy::HashFn, &s2, [](auto&...) {});\n  unsigned sum[2] = {0};\n  for (auto key : keys) {\n    auto eq = [key](const auto& probe) { return key == probe; };\n    auto it1 = segment_.FindIt(dt_.DoHash(key), eq);\n    auto it2 = s2.FindIt(dt_.DoHash(key), eq);\n    ASSERT_NE(it1.found(), it2.found()) << key;\n\n    sum[0] += it1.found();\n    sum[1] += it2.found();\n  }\n\n  ASSERT_EQ(segment_.SlowSize(), sum[0]);\n  EXPECT_EQ(s2.SlowSize(), sum[1]);\n  EXPECT_EQ(keys.size(), sum[0] + sum[1]);\n  EXPECT_EQ(6 * Segment::kSlotNum, keys.size());\n}\n\nTEST_F(DashTest, Merge) {\n  constexpr size_t kNumItems = 4000;\n  std::vector<uint64_t> keys;\n\n  for (uint64_t i = 0; i < kNumItems; ++i) {\n    auto [it, inserted] = dt_.Insert(i, i);\n    if (inserted) {\n      keys.push_back(i);\n    }\n  }\n\n  EXPECT_EQ(dt_.depth(), 3);\n\n  // keep only ~5%\n  size_t keys_to_keep = keys.size() * 0.05;\n\n  for (size_t i = keys_to_keep; i < keys.size(); ++i) {\n    dt_.Erase(keys[i]);\n  }\n\n  keys.resize(keys_to_keep);\n\n  EXPECT_EQ(dt_.unique_segments(), 8);\n  size_t dir_size = dt_.GetSegmentCount();\n\n  // Iteratively merge segments until all reach depth 1\n  // Use multiple passes since merging changes buddy relationships\n  while (true) {\n    bool merged_any = false;\n\n    for (size_t seg_id = 0; seg_id < dir_size; seg_id++) {\n      auto* seg = dt_.GetSegment(seg_id);\n\n      size_t local_depth = seg->local_depth();\n      if (local_depth == 1)\n        continue;\n\n      size_t buddy_id = dt_.FindBuddyId(seg_id);\n      if (buddy_id == seg_id)\n        continue;\n\n      // Skip if seg_id > buddy_id to avoid processing the same pair twice\n      // (FindBuddyId is symmetric, so we see each pair from both directions)\n      if (seg_id > buddy_id)\n        continue;\n\n      auto* buddy = dt_.GetSegment(buddy_id);\n\n      // Preconditions to merge: (< 25% of capacity)\n      size_t combined_size = seg->SlowSize() + buddy->SlowSize();\n      size_t safe_threshold = static_cast<size_t>(0.25 * seg->capacity());\n\n      if (combined_size <= safe_threshold) {\n        dt_.Merge(seg_id, buddy_id);\n        merged_any = true;\n      }\n    }\n\n    if (!merged_any)\n      break;\n  }\n  EXPECT_EQ(dt_.unique_segments(), 2);\n  for (size_t seg_id = 0; seg_id < dir_size; seg_id++) {\n    auto* seg = dt_.GetSegment(seg_id);\n    EXPECT_EQ(seg->local_depth(), 1);\n  }\n\n  for (size_t key : keys) {\n    EXPECT_EQ(dt_.Find(key).is_done(), false);\n  }\n  EXPECT_EQ(dt_.bucket_count(), (Segment::kBucketNum + Segment::kStashBucketNum) * 2);\n}\n\nTEST_F(DashTest, MergeFailureRollback) {\n  std::vector<uint64_t> all_keys;\n  std::vector<uint64_t> keep_keys;\n  std::vector<uint64_t> buddy_keys;\n\n  // Insert enough items to create 4 segments (depth 2) and fill them more\n  for (uint64_t i = 0; i < 5000; ++i) {\n    auto [it, inserted] = dt_.Insert(i, i);\n    if (inserted) {\n      all_keys.push_back(i);\n    }\n  }\n\n  EXPECT_GE(dt_.depth(), 2);\n\n  unsigned sid = 0;\n  size_t buddy_id = dt_.FindBuddyId(sid);\n  EXPECT_NE(buddy_id, sid);\n\n  auto* src = dt_.GetSegment(sid);\n  auto* buddy = dt_.GetSegment(buddy_id);\n\n  for (uint64_t key : all_keys) {\n    auto it = dt_.Find(key);\n    if (!it.is_done()) {\n      uint64_t hash = dt_.DoHash(key);\n      uint32_t seg_id = hash >> (64 - dt_.depth());\n\n      if (seg_id == 0) {\n        keep_keys.push_back(key);\n      } else if (seg_id == buddy_id) {\n        buddy_keys.push_back(key);\n      }\n    }\n  }\n\n  size_t total_size_before = dt_.size();\n\n  bool merge_succeeded = dt_.Merge(sid, buddy_id);\n\n  EXPECT_EQ(dt_.size(), total_size_before);\n\n  // Bucket layout might change after rollback. We only get data parity, not\n  // a complete layout rollback.\n  // For example, InsertUniq can displace existing items in the keep segment\n  // to make room for items being moved from buddy.\n  // After rollback, src and buddy pointers should still be valid\n  for (auto key : keep_keys) {\n    uint64_t hash = dt_.DoHash(key);\n    auto it = src->FindIt(hash, EqTo(key));\n    EXPECT_TRUE(it.found());\n  }\n\n  for (auto key : buddy_keys) {\n    uint64_t hash = dt_.DoHash(key);\n    auto it = buddy->FindIt(hash, EqTo(key));\n    EXPECT_TRUE(it.found());\n  }\n\n  EXPECT_FALSE(merge_succeeded);\n}\n\n// Verify that FindBuddyId is symmetric: if FindBuddyId(x) = y, then FindBuddyId(y) = x.\nTEST_F(DashTest, FindBuddySymmetry) {\n  for (uint64_t i = 0; i < 4000; ++i) {\n    dt_.Insert(i, i);\n  }\n\n  EXPECT_GE(dt_.depth(), 3);\n  size_t dir_size = dt_.GetSegmentCount();\n\n  for (size_t seg_id = 0; seg_id < dir_size; seg_id++) {\n    auto* seg = dt_.GetSegment(seg_id);\n    if (seg->local_depth() == 1)\n      continue;\n\n    size_t buddy_id = dt_.FindBuddyId(seg_id);\n    if (buddy_id == seg_id)\n      continue;\n\n    // Symmetry check\n    size_t reverse_buddy_id = dt_.FindBuddyId(buddy_id);\n    EXPECT_EQ(reverse_buddy_id, seg_id)\n        << \"FindBuddyId not symmetric: FindBuddyId(\" << seg_id << \")=\" << buddy_id\n        << \" but FindBuddyId(\" << buddy_id << \")=\" << reverse_buddy_id;\n  }\n}\n\n// Verify dt_.size() is unchanged after merge (items moved, not deleted).\nTEST_F(DashTest, MergePreservesSize) {\n  for (uint64_t i = 0; i < 4000; ++i) {\n    dt_.Insert(i, i);\n  }\n\n  // Delete most keys to make merge feasible\n  for (uint64_t i = 200; i < 4000; ++i) {\n    dt_.Erase(i);\n  }\n\n  size_t size_before = dt_.size();\n  size_t dir_size = dt_.GetSegmentCount();\n\n  // Do one merge pass\n  for (size_t seg_id = 0; seg_id < dir_size; seg_id++) {\n    auto* seg = dt_.GetSegment(seg_id);\n    if (seg->local_depth() == 1)\n      continue;\n\n    size_t buddy_id = dt_.FindBuddyId(seg_id);\n    if (buddy_id == seg_id || seg_id > buddy_id)\n      continue;\n\n    auto* buddy = dt_.GetSegment(buddy_id);\n    size_t combined_size = seg->SlowSize() + buddy->SlowSize();\n    if (combined_size <= static_cast<size_t>(0.25 * seg->capacity())) {\n      bool merged = dt_.Merge(seg_id, buddy_id);\n      if (merged) {\n        // Size must be unchanged after each merge\n        EXPECT_EQ(dt_.size(), size_before)\n            << \"size changed after merging seg_id=\" << seg_id << \" buddy_id=\" << buddy_id;\n      }\n    }\n  }\n}\n\n// After merging, verify all remaining keys are still findable via dt_.Find().\n// This tests that directory routing is correct after merge.\nTEST_F(DashTest, MergeKeyLookupConsistency) {\n  constexpr size_t kNumItems = 4000;\n  std::vector<uint64_t> all_keys;\n\n  for (uint64_t i = 0; i < kNumItems; ++i) {\n    auto [it, inserted] = dt_.Insert(i, i);\n    if (inserted)\n      all_keys.push_back(i);\n  }\n\n  // Keep only ~10% of keys\n  size_t keep_count = all_keys.size() / 10;\n  for (size_t i = keep_count; i < all_keys.size(); ++i) {\n    dt_.Erase(all_keys[i]);\n  }\n  all_keys.resize(keep_count);\n\n  size_t dir_size = dt_.GetSegmentCount();\n\n  // Merge all eligible pairs\n  bool merged_any = true;\n  while (merged_any) {\n    merged_any = false;\n    for (size_t seg_id = 0; seg_id < dir_size; seg_id++) {\n      auto* seg = dt_.GetSegment(seg_id);\n      if (seg->local_depth() == 1)\n        continue;\n\n      size_t buddy_id = dt_.FindBuddyId(seg_id);\n      if (buddy_id == seg_id || seg_id > buddy_id)\n        continue;\n\n      auto* buddy = dt_.GetSegment(buddy_id);\n      size_t combined_size = seg->SlowSize() + buddy->SlowSize();\n      if (combined_size <= static_cast<size_t>(0.25 * seg->capacity())) {\n        if (dt_.Merge(seg_id, buddy_id)) {\n          merged_any = true;\n        }\n      }\n    }\n  }\n\n  // All remaining keys must be findable via the table-level Find\n  for (uint64_t key : all_keys) {\n    auto it = dt_.Find(key);\n    EXPECT_FALSE(it.is_done()) << \"Key \" << key << \" not found after merge\";\n  }\n}\n\n// Test that after merging to depth 1, inserting more keys works correctly —\n// the table can split again and all data remains intact.\nTEST_F(DashTest, MergeAndGrow) {\n  constexpr size_t kPhase1 = 4000;\n  std::vector<uint64_t> surviving_keys;\n\n  for (uint64_t i = 0; i < kPhase1; ++i) {\n    dt_.Insert(i, i);\n  }\n\n  // Delete enough to enable merge\n  size_t keep_count = kPhase1 / 20;  // ~5%\n  for (uint64_t i = keep_count; i < kPhase1; ++i) {\n    dt_.Erase(i);\n  }\n  for (uint64_t i = 0; i < keep_count; ++i) {\n    surviving_keys.push_back(i);\n  }\n\n  size_t dir_size = dt_.GetSegmentCount();\n  bool merged_any = true;\n  while (merged_any) {\n    merged_any = false;\n    for (size_t seg_id = 0; seg_id < dir_size; seg_id++) {\n      auto* seg = dt_.GetSegment(seg_id);\n      if (seg->local_depth() == 1)\n        continue;\n\n      size_t buddy_id = dt_.FindBuddyId(seg_id);\n      if (buddy_id == seg_id || seg_id > buddy_id)\n        continue;\n\n      auto* buddy = dt_.GetSegment(buddy_id);\n      size_t combined = seg->SlowSize() + buddy->SlowSize();\n      if (combined <= static_cast<size_t>(0.25 * seg->capacity())) {\n        dt_.Merge(seg_id, buddy_id);\n        merged_any = true;\n      }\n    }\n  }\n\n  EXPECT_EQ(dt_.unique_segments(), 2);\n\n  // Now insert a new batch — the table should grow (split) again\n  constexpr size_t kPhase2 = 3000;\n  for (uint64_t i = kPhase1; i < kPhase1 + kPhase2; ++i) {\n    auto [it, inserted] = dt_.Insert(i, i);\n    if (inserted)\n      surviving_keys.push_back(i);\n  }\n\n  EXPECT_GT(dt_.depth(), 1);\n\n  // ALL surviving keys must be findable after growth\n  for (uint64_t key : surviving_keys) {\n    auto it = dt_.Find(key);\n    EXPECT_FALSE(it.is_done()) << \"Key \" << key << \" lost after merge+grow\";\n  }\n}\n\n// Verify that after merging, all directory entries that span the merged\n// segment range point to the same segment object (the kept one).\nTEST_F(DashTest, MergeDirectoryConsistency) {\n  // Insert enough for depth 2 (4 segments)\n  for (uint64_t i = 0; i < 2000; ++i) {\n    dt_.Insert(i, i);\n  }\n\n  EXPECT_GE(dt_.depth(), 2);\n\n  // Delete most items to enable merge\n  for (uint64_t i = 50; i < 2000; ++i) {\n    dt_.Erase(i);\n  }\n\n  unsigned keep_id = 0;\n  unsigned buddy_id = dt_.FindBuddyId(0);\n\n  if (buddy_id == 0) {\n    // No buddy for segment 0 - try segment 2\n    keep_id = 2;\n    buddy_id = dt_.FindBuddyId(2);\n  }\n\n  // Only proceed if we found a mergeable buddy pair\n  if (buddy_id != keep_id) {\n    auto* keep = dt_.GetSegment(keep_id);\n    auto* buddy = dt_.GetSegment(buddy_id);\n\n    if (keep->local_depth() == buddy->local_depth() && keep->local_depth() > 1 &&\n        keep_id < buddy_id) {\n      uint8_t depth = keep->local_depth();\n      size_t combined = keep->SlowSize() + buddy->SlowSize();\n\n      if (combined <= static_cast<size_t>(0.25 * keep->capacity())) {\n        bool merged = dt_.Merge(keep_id, buddy_id);\n        ASSERT_TRUE(merged);\n\n        // After merge, all dir entries that covered buddy must now point to keep\n        auto* kept_seg = dt_.GetSegment(keep_id);\n        uint32_t chunk_size = 1u << (dt_.depth() - (depth - 1));\n        uint32_t start = keep_id & ~(chunk_size - 1u);\n\n        for (size_t i = start; i < start + chunk_size; ++i) {\n          EXPECT_EQ(dt_.GetSegment(i), kept_seg)\n              << \"Directory entry \" << i << \" does not point to merged segment\";\n        }\n      }\n    }\n  }\n}\n\n// Test merging a table with global_depth > local_depth (aliased directory entries).\n// When a segment at depth D < global_depth is merged with its buddy,\n// the merged segment at depth D-1 should span the correct directory range.\nTEST_F(DashTest, MergeWithAliasedEntries) {\n  // Create depth-3 table (8 dir entries), then merge two depth-3 pairs to get depth-2 segments\n  // alongside other depth-3 segments. This creates aliased entries.\n  for (uint64_t i = 0; i < 4000; ++i) {\n    dt_.Insert(i, i);\n  }\n\n  EXPECT_EQ(dt_.depth(), 3);\n\n  // Delete most items\n  for (uint64_t i = 200; i < 4000; ++i) {\n    dt_.Erase(i);\n  }\n\n  // Merge segments 0 and 1 (both at depth 3) -> depth 2 segment spanning entries {0,1}\n  auto* seg0 = dt_.GetSegment(0);\n  auto* seg1 = dt_.GetSegment(1);\n\n  if (seg0->local_depth() == 3 && seg1->local_depth() == 3) {\n    size_t combined = seg0->SlowSize() + seg1->SlowSize();\n    size_t threshold = static_cast<size_t>(0.25 * seg0->capacity());\n\n    if (combined <= threshold) {\n      bool ok = dt_.Merge(0, 1);\n      ASSERT_TRUE(ok);\n\n      // Now segment at entries 0 and 1 is the same depth-2 object\n      EXPECT_EQ(dt_.GetSegment(0), dt_.GetSegment(1));\n      EXPECT_EQ(dt_.GetSegment(0)->local_depth(), 2);\n\n      // global_depth should still be 3\n      EXPECT_EQ(dt_.depth(), 3);\n\n      // Entries 2 and 3 should still be distinct depth-3 segments\n      EXPECT_NE(dt_.GetSegment(2), dt_.GetSegment(3));\n\n      // Since entries 2 and 3 are still at depth 3 (not yet merged into a depth-2 segment),\n      // the true buddy of the depth-2 segment {0,1} does NOT yet exist.\n      // FindBuddyId computes: bit_pos = global_depth(3) - local_depth(2) = 1\n      //   FindBuddyId(0) -> buddy_idx = 0^2 = 2, GetSegment(2)->local_depth() = 3 != 2 -> returns 0\n      //   FindBuddyId(1) -> buddy_idx = 1^2 = 3, GetSegment(3)->local_depth() = 3 != 2 -> returns 1\n      // Both aliased entries correctly report \"no buddy\" (returning themselves).\n      EXPECT_EQ(dt_.FindBuddyId(0), 0u)\n          << \"No buddy exists for depth-2 segment when entries 2,3 are still depth-3\";\n      EXPECT_EQ(dt_.FindBuddyId(1), 1u)\n          << \"Aliased entry 1 of same depth-2 segment also finds no buddy\";\n\n      // Now merge entries 2 and 3 to create a second depth-2 segment covering {2,3}\n      auto* seg2 = dt_.GetSegment(2);\n      auto* seg3 = dt_.GetSegment(3);\n      if (seg2 != seg3) {\n        size_t combined23 = seg2->SlowSize() + seg3->SlowSize();\n        if (combined23 <= static_cast<size_t>(0.25 * seg2->capacity())) {\n          bool ok23 = dt_.Merge(2, 3);\n          if (ok23) {\n            // Now both {0,1} and {2,3} are depth-2 segments — they ARE buddies\n            // FindBuddyId(0): bit_pos=1, buddy_idx=0^2=2, GetSegment(2)->local_depth()=2 == 2 -> 2\n            // FindBuddyId(2): bit_pos=1, buddy_idx=2^2=0, GetSegment(0)->local_depth()=2 == 2 -> 0\n            EXPECT_EQ(dt_.FindBuddyId(0), 2u)\n                << \"After both pairs merged to depth-2, FindBuddyId(0)=2\";\n            EXPECT_EQ(dt_.FindBuddyId(2), 0u) << \"FindBuddyId(2) should return 0 (symmetric)\";\n            // Aliased entry 1 looks for buddy at 1^2=3\n            EXPECT_EQ(dt_.FindBuddyId(1), 3u) << \"FindBuddyId(1) returns 3 (alias buddy)\";\n          }\n        }\n      }\n    }\n  }\n}\n\n// Test that FindBuddyId resolves to the same buddy *instance* for all alias ids in a stripe.\n//\n// When global_depth > local_depth a segment is referenced by a contiguous \"stripe\" of\n// stripe_size = 2^(global_depth - local_depth) directory entries that all point to the\n// same segment object.\n// The canonical id is the stripe's first entry (lowest index).\n//\n// FindBuddyId(alias) computes:\n//   depth    = GetSegment(alias)->local_depth()    // reads from the instance, same for all\n//   bit_pos  = global_depth - depth                // same for every alias in the stripe\n//   buddy_ix = alias ^ (1 << bit_pos)              // XOR differs per alias\n//\n// For a stripe starting at canonical id C (i.e. C is a multiple of stripe_size):\n//   alias k = C + k  (0 <= k < stripe_size)\n//   buddy_ix(k) = (C + k) ^ (1 << bit_pos)\n//              = C ^ (1 << bit_pos) + k    (because k < stripe_size = 1<<bit_pos, so k\n//                                           does not interfere with bit bit_pos)\n//\n// buddy_ix(k) and buddy_ix(0) differ by k, which is still within the buddy stripe\n// (a stripe of the same size starting at C ^ (1<<bit_pos)).  Therefore\n// GetSegment(buddy_ix(k)) returns the same buddy instance for all k.\n//\n// In other words: FindBuddyId returns *different id values* for different alias ids,\n// but all those ids are aliases of the *same buddy segment instance*.\nTEST_F(DashTest, FindBuddyIdCanonicalForStripe) {\n  // Fill enough to force global_depth >= 3, giving segments at local_depth 3.\n  for (uint64_t i = 0; i < 8000; ++i) {\n    dt_.Insert(i, i);\n  }\n  ASSERT_GE(dt_.depth(), 3u);\n\n  // Erase most items so segments are sparse enough to merge.\n  for (uint64_t i = 100; i < 8000; ++i) {\n    dt_.Erase(i);\n  }\n\n  // To get a real buddy we must merge TWO adjacent pairs at the same depth.\n  // After merging pair A (keep_a, buddy_a) the kept segment drops to depth d-1,\n  // but its buddy stripe still has the old depth d, so FindBuddyId returns self.\n  // Only after merging the adjacent pair B (keep_b, buddy_b) to d-1 as well do\n  // the two resulting stripes become buddies of each other.\n  //\n  // We find four consecutive canonical segments at the same depth d > 2 and merge\n  // pairs (0,1) and (2,3) within that group.\n  unsigned keep_a = UINT_MAX, bud_a = UINT_MAX, keep_b = UINT_MAX, bud_b = UINT_MAX;\n  for (size_t i = 0; i < dt_.GetSegmentCount();) {\n    auto* s0 = dt_.GetSegment(i);\n    uint8_t d = s0->local_depth();\n    if (d <= 2) {\n      i = dt_.NextSeg(i);\n      continue;\n    }\n    size_t i1 = dt_.NextSeg(i);\n    if (i1 >= dt_.GetSegmentCount())\n      break;\n    size_t i2 = dt_.NextSeg(i1);\n    if (i2 >= dt_.GetSegmentCount())\n      break;\n    size_t i3 = dt_.NextSeg(i2);\n    if (i3 >= dt_.GetSegmentCount())\n      break;\n\n    auto* s1 = dt_.GetSegment(i1);\n    auto* s2 = dt_.GetSegment(i2);\n    auto* s3 = dt_.GetSegment(i3);\n    size_t cap = s0->capacity();\n    if (s1->local_depth() == d && s2->local_depth() == d && s3->local_depth() == d &&\n        s0->SlowSize() + s1->SlowSize() <= static_cast<size_t>(0.25 * cap) &&\n        s2->SlowSize() + s3->SlowSize() <= static_cast<size_t>(0.25 * cap)) {\n      keep_a = static_cast<unsigned>(i);\n      bud_a = static_cast<unsigned>(i1);\n      keep_b = static_cast<unsigned>(i2);\n      bud_b = static_cast<unsigned>(i3);\n      break;\n    }\n    i = dt_.NextSeg(i);\n  }\n\n  ASSERT_NE(keep_a, UINT_MAX);\n  ASSERT_TRUE(dt_.Merge(keep_a, bud_a));\n  ASSERT_TRUE(dt_.Merge(keep_b, bud_b));\n\n  // After both merges:\n  //   - segment at keep_a has local_depth = d-1, aliased by stripe {keep_a, keep_a+1}\n  //   - segment at keep_b has local_depth = d-1, aliased by stripe {keep_b, keep_b+1}\n  //   - The two stripes are buddies of each other (same depth, adjacent subtrees).\n  auto* seg_a = dt_.GetSegment(keep_a);\n  uint8_t new_depth = seg_a->local_depth();\n  ASSERT_GE(new_depth, 2u);  // depth<=1 guard in FindBuddyId must not fire\n\n  size_t stripe_size = 1u << (dt_.depth() - new_depth);\n  size_t stripe_start = keep_a & ~(stripe_size - 1);\n\n  // FindBuddyId from the canonical id of stripe A must resolve to seg_b.\n  auto* seg_b = dt_.GetSegment(keep_b);\n  unsigned canonical_bid = dt_.FindBuddyId(static_cast<unsigned>(stripe_start));\n  ASSERT_EQ(dt_.GetSegment(canonical_bid), seg_b)\n      << \"FindBuddyId from canonical id must resolve to the buddy segment\";\n\n  EXPECT_EQ(stripe_size, 2);\n  for (size_t k = 0; k < stripe_size; ++k) {\n    size_t alias = stripe_start + k;\n    EXPECT_EQ(dt_.GetSegment(alias), seg_a) << \"Directory entry \" << alias << \" must alias seg_a\";\n\n    unsigned bid = dt_.FindBuddyId(static_cast<unsigned>(alias));\n    // Different alias -> different buddy id value, but same buddy instance.\n    EXPECT_EQ(bid, canonical_bid + k)\n        << \"FindBuddyId(\" << alias << \") should equal canonical_bid + \" << k;\n    EXPECT_EQ(dt_.GetSegment(bid), seg_b)\n        << \"FindBuddyId(\" << alias << \") must resolve to seg_b for all aliases\";\n    // Stripe B is at higher indices than stripe A (Merge requires keep_id < buddy_id).\n    EXPECT_GT(bid, alias);\n  }\n}\n\n// Test that NextSeg is correct when called with the canonical (first) id of a stripe,\n// and documents the expected behavior for non-canonical (middle-of-stripe) ids.\n//\n// NextSeg(sid) computes:\n//   delta = 1 << (global_depth - segment_[sid]->local_depth())\n//   return sid + delta\n//\n// For the canonical (first) id of a stripe, sid is already aligned to a multiple of\n// delta, so sid + delta is exactly the first id of the next stripe — correct.\n//\n// For a non-canonical id sid = canonical + k  (0 < k < delta), the result is\n//   (canonical + k) + delta\n// which lands k positions into the next stripe, not at its start.\nTEST_F(DashTest, NextSegCanonicalBehavior) {\n  // Build a table large enough for global_depth >= 2.\n  for (uint64_t i = 0; i < 2000; ++i) {\n    dt_.Insert(i, i);\n  }\n  ASSERT_GE(dt_.depth(), 2u);\n\n  // NextSeg from id 0 always uses canonical ids (0 is always canonical).\n  // Verify it visits every distinct segment exactly once by comparing against\n  // unique_segments() which is maintained as a counter by Insert/Merge.\n  size_t visited = 0;\n  for (size_t i = 0; i < dt_.GetSegmentCount(); i = dt_.NextSeg(i)) {\n    ++visited;\n  }\n  EXPECT_EQ(visited, dt_.unique_segments())\n      << \"NextSeg traversal from id 0 (canonical) must visit each unique segment once\";\n\n  // Erase most entries and merge to create a stripe (local_depth < global_depth).\n  for (uint64_t i = 100; i < 2000; ++i) {\n    dt_.Erase(i);\n  }\n\n  // Find and perform a merge to produce a stripe.\n  for (size_t i = 0; i < dt_.GetSegmentCount(); i = dt_.NextSeg(i)) {\n    auto* seg = dt_.GetSegment(i);\n    if (seg->local_depth() <= 1)\n      continue;\n    size_t next = dt_.NextSeg(i);\n    if (next >= dt_.GetSegmentCount())\n      break;\n    auto* buddy = dt_.GetSegment(next);\n    if (buddy->local_depth() == seg->local_depth() &&\n        seg->SlowSize() + buddy->SlowSize() <= static_cast<size_t>(0.25 * seg->capacity())) {\n      bool ok = dt_.Merge(static_cast<unsigned>(i), static_cast<unsigned>(next));\n      if (ok)\n        break;\n    }\n  }\n\n  // After a potential merge, re-verify that canonical traversal is consistent.\n  size_t manual2 = 0;\n  for (size_t i = 0; i < dt_.GetSegmentCount(); i = dt_.NextSeg(i)) {\n    ++manual2;\n  }\n  EXPECT_EQ(manual2, dt_.unique_segments())\n      << \"After merge, canonical NextSeg traversal must still match unique_segments()\";\n\n  // Show the non-canonical case: for any stripe of size > 1, NextSeg from a non-first\n  // alias does NOT land on the start of the next stripe.\n  for (size_t i = 0; i < dt_.GetSegmentCount(); i = dt_.NextSeg(i)) {\n    auto* seg = dt_.GetSegment(i);\n    size_t delta = 1u << (dt_.depth() - seg->local_depth());\n    if (delta <= 1)\n      continue;  // no stripe aliases for this segment\n\n    // i is canonical; i+1 is a non-canonical alias of the same segment.\n    size_t non_canonical = i + 1;\n    ASSERT_LT(non_canonical, i + delta) << \"non_canonical must still be within the stripe\";\n\n    // NextSeg from the non-canonical id lands at (non_canonical + delta), which is\n    // one position past the start of the next stripe — demonstrating the offset.\n    size_t next_from_canonical = dt_.NextSeg(i);          // i + delta  (correct)\n    size_t next_from_alias = dt_.NextSeg(non_canonical);  // i+1+delta  (offset by 1)\n    EXPECT_EQ(next_from_alias, next_from_canonical + 1)\n        << \"NextSeg from a non-canonical alias is offset by the same amount as the alias \"\n           \"itself; callers must always use canonical (stripe-start) ids\";\n    break;  // one example is sufficient to document the behavior\n  }\n}\n\nTEST_F(DashTest, BumpUp) {\n  set<Segment::Key_t> keys = FillSegment(0);\n  constexpr unsigned kFirstStashId = Segment::kBucketNum;\n  constexpr unsigned kSecondStashId = Segment::kBucketNum + 1;\n  constexpr unsigned kSlotNum = Segment::kSlotNum;\n\n  EXPECT_TRUE(segment_.GetBucket(0).IsFull());\n  EXPECT_TRUE(segment_.GetBucket(1).IsFull());\n  EXPECT_TRUE(segment_.GetBucket(kFirstStashId).IsFull());\n  EXPECT_TRUE(segment_.GetBucket(kSecondStashId).IsFull());\n\n  // Segment::Iterator it{kFirstStashId, 1};\n  Segment::Key_t key = segment_.Key(1, 2);  // key at bucket 1, slot 2\n  uint8_t touched_bid[3];\n\n  uint64_t hash = dt_.DoHash(key);\n\n  segment_.Delete(Segment::Iterator{1, 2}, hash);\n  EXPECT_FALSE(segment_.GetBucket(1).IsFull());\n\n  segment_.SetVersion(kFirstStashId, 1);\n  key = segment_.Key(kFirstStashId, 5);\n  hash = dt_.DoHash(key);\n\n  EXPECT_EQ(2, segment_.CVCOnBump(1, kFirstStashId, 5, hash, touched_bid));\n  EXPECT_EQ(touched_bid[0], 0);\n  EXPECT_EQ(touched_bid[1], 1);\n\n  // Bump up\n  std::vector<std::pair<uint8_t, uint8_t>> moved_buckets;\n  auto move_cb = [&moved_buckets](uint32_t /* segment_id */, uint8_t a, uint8_t b) {\n    moved_buckets.emplace_back(a, b);\n  };\n  segment_.BumpUp(kFirstStashId, 5, hash, RelaxedBumpPolicy{}, move_cb);\n\n  // expect the key to move\n  EXPECT_TRUE(segment_.GetBucket(1).IsFull());\n  EXPECT_FALSE(segment_.GetBucket(kFirstStashId).IsFull());\n  EXPECT_EQ(segment_.Key(1, 2), key);\n  EXPECT_EQ(moved_buckets.size(), 1);\n  EXPECT_EQ(moved_buckets.at(0).first, kFirstStashId);\n  EXPECT_EQ(moved_buckets.at(0).second, 1);\n  moved_buckets.clear();\n\n  EXPECT_TRUE(Contains(key));\n\n  // 9 is just a random slot id.\n  key = segment_.Key(kSecondStashId, 9);\n  hash = dt_.DoHash(key);\n\n  EXPECT_EQ(3, segment_.CVCOnBump(2, kSecondStashId, 9, hash, touched_bid));\n  EXPECT_EQ(touched_bid[0], kSecondStashId);\n  // Bumpup will move the key to either its original bucket or a probing bucket.\n  // Since we can't determine the exact bucket before calling bumpup, CVCOnBump\n  // returns both the original bucket and the probing bucket.\n  EXPECT_EQ(touched_bid[1], 0);\n  EXPECT_EQ(touched_bid[2], 1);\n\n  auto it = segment_.BumpUp(kSecondStashId, 9, hash, RelaxedBumpPolicy{}, move_cb);\n  ASSERT_TRUE(key == segment_.Key(0, kSlotNum - 1) || key == segment_.Key(1, kSlotNum - 1));\n  EXPECT_TRUE(segment_.GetBucket(kSecondStashId).IsFull());\n  EXPECT_TRUE(Contains(key));\n  EXPECT_TRUE(segment_.Key(kSecondStashId, 9));\n  EXPECT_EQ(moved_buckets.size(), 2);\n  EXPECT_EQ(moved_buckets.at(0).first, kSecondStashId);\n  EXPECT_EQ(moved_buckets.at(0).second, it.index);\n  EXPECT_EQ(moved_buckets.at(1).first, it.index);\n  EXPECT_EQ(moved_buckets.at(1).second, kSecondStashId);\n}\n\nTEST_F(DashTest, BumpPolicy) {\n  struct RestrictedBumpPolicy {\n    bool CanBump(uint64_t key) const {\n      return false;\n    }\n    void OnMove(Dash64::Cursor source, Dash64::Cursor dest) {\n    }\n  };\n\n  set<Segment::Key_t> keys = FillSegment(0);\n  constexpr unsigned kFirstStashId = Segment::kBucketNum;\n\n  EXPECT_TRUE(segment_.GetBucket(0).IsFull());\n  EXPECT_TRUE(segment_.GetBucket(1).IsFull());\n  EXPECT_TRUE(segment_.GetBucket(kFirstStashId).IsFull());\n\n  // check items are immovable in bucket\n  Segment::Key_t key = segment_.Key(1, 2);\n  uint64_t hash = dt_.DoHash(key);\n  segment_.BumpUp(1, 2, hash, RestrictedBumpPolicy{}, [](auto&&...) {});\n  EXPECT_EQ(key, segment_.Key(1, 2));\n\n  // check items don't swap from stash\n  key = segment_.Key(kFirstStashId, 2);\n  hash = dt_.DoHash(key);\n  segment_.BumpUp(kFirstStashId, 2, hash, RestrictedBumpPolicy{}, [](auto&&...) {});\n  EXPECT_EQ(key, segment_.Key(kFirstStashId, 2));\n}\n\nTEST_F(DashTest, Insert2) {\n  uint64_t k = 1191;\n  ASSERT_EQ(2019837007031366716, UInt64Policy::HashFn(k));\n\n  Dash64 dt;\n  for (unsigned i = 0; i < 2000; ++i) {\n    dt.Insert(i, 0);\n  }\n}\n\nTEST_F(DashTest, InsertOOM) {\n  CappedResource resource(1 << 15);\n  Dash64 dt{1, UInt64Policy{}, &resource};\n\n  ASSERT_THROW(\n      {\n        for (size_t i = 0; i < (1 << 14); ++i) {\n          dt.Insert(i, 0);\n        }\n      },\n      bad_alloc);\n}\n\nstruct Item {\n  char buf[24];\n};\n\nconstexpr size_t ItemAlign = alignof(Item);\n\nstruct MyBucket : public detail::BucketBase<16> {\n  Item key[14];\n};\n\nconstexpr size_t kMySz = sizeof(MyBucket);\nconstexpr size_t kBBSz = sizeof(detail::BucketBase<16>);\n\nTEST_F(DashTest, Custom) {\n  using ItemSegment = detail::Segment<Item, uint64_t>;\n  constexpr double kTax = ItemSegment::kTaxSize;\n  constexpr size_t kMaxSize = ItemSegment::kMaxSize;\n  constexpr size_t kSegSize = sizeof(ItemSegment);\n  constexpr size_t kBuckSz = ItemSegment::kBucketSz;\n  (void)kTax;\n  (void)kMaxSize;\n  (void)kSegSize;\n  (void)kBuckSz;\n\n  ItemSegment seg{2, 0, PMR_NS::get_default_resource()};\n\n  auto eq = [v = Item{1, 1}](auto u) { return v.buf[0] == u.buf[0] && v.buf[1] == u.buf[1]; };\n  auto it = seg.FindIt(42, eq);\n  ASSERT_FALSE(it.found());\n}\n\nTEST_F(DashTest, FindByValue) {\n  using ItemSegment = detail::Segment<Item, uint64_t>;\n  auto no_op_cb = [](auto&&...) {};\n\n  // Insert three different values with the same hash\n  ItemSegment segment{2, 0, PMR_NS::get_default_resource()};\n  segment.Insert(\n      Item{1}, 1, 42, [](const auto& pred) { return pred.buf[0] == 1; }, no_op_cb);\n  segment.Insert(\n      Item{2}, 2, 42, [](const auto& pred) { return pred.buf[0] == 2; }, no_op_cb);\n  segment.Insert(\n      Item{3}, 3, 42, [](const auto& pred) { return pred.buf[0] == 3; }, no_op_cb);\n\n  // We should be able to find the middle one by value\n  auto it = segment.FindIt(42, [](const auto& key, const auto& value) { return value == 2; });\n  EXPECT_TRUE(it.found());\n  EXPECT_EQ(segment.Value(it.index, it.slot), 2);\n}\n\nTEST_F(DashTest, Reserve) {\n  unsigned bc = dt_.capacity();\n  for (unsigned i = 0; i <= bc * 2; ++i) {\n    dt_.Reserve(i);\n    ASSERT_GE((1 << dt_.depth()) * Dash64::kSegCapacity, i);\n  }\n}\n\nTEST_F(DashTest, Insert) {\n  constexpr size_t kNumItems = 10000;\n  double sum = 0;\n  for (size_t i = 0; i < kNumItems; ++i) {\n    dt_.Insert(i, i);\n    double u = (dt_.size() * 100.0) / (dt_.unique_segments() * Segment::capacity());\n\n    sum += u;\n    VLOG(1) << \"Num items \" << dt_.size() << \", load factor \" << u << \", size per entry \"\n            << double(dt_.mem_usage()) / dt_.size();\n  }\n  EXPECT_EQ(kNumItems, dt_.size());\n  LOG(INFO) << \"Average load factor is \" << sum / kNumItems;\n\n  for (size_t i = 0; i < kNumItems; ++i) {\n    Dash64::const_iterator it = dt_.Find(i);\n    ASSERT_TRUE(it != dt_.end());\n\n    ASSERT_EQ(it->second, i);\n    ASSERT_LE(dt_.load_factor(), 1) << i;\n  }\n\n  for (size_t i = kNumItems; i < kNumItems * 10; ++i) {\n    Dash64::const_iterator it = dt_.Find(i);\n    ASSERT_TRUE(it == dt_.end());\n  }\n\n  EXPECT_EQ(kNumItems, dt_.size());\n  EXPECT_EQ(1, dt_.Erase(0));\n  EXPECT_EQ(0, dt_.Erase(0));\n  EXPECT_EQ(kNumItems - 1, dt_.size());\n\n  auto it = dt_.begin();\n  ASSERT_FALSE(it.is_done());\n  auto some_val = it->second;\n  dt_.Erase(it);\n  ASSERT_TRUE(dt_.Find(some_val).is_done());\n}\n\nTEST_F(DashTest, Traverse) {\n  constexpr auto kNumItems = 50;\n  for (size_t i = 0; i < kNumItems; ++i) {\n    dt_.Insert(i, i);\n  }\n\n  Dash64::Cursor cursor;\n  vector<unsigned> nums;\n  auto tr_cb = [&](Dash64::iterator it) {\n    nums.push_back(it->first);\n    VLOG(1) << it.bucket_id() << \" \" << it.slot_id() << \" \" << it->first;\n  };\n\n  do {\n    cursor = dt_.Traverse(cursor, tr_cb);\n  } while (cursor);\n  sort(nums.begin(), nums.end());\n  nums.resize(unique(nums.begin(), nums.end()) - nums.begin());\n  ASSERT_EQ(kNumItems, nums.size());\n  EXPECT_EQ(0, nums[0]);\n  EXPECT_EQ(kNumItems - 1, nums.back());\n}\n\nTEST_F(DashTest, TraverseSegmentOrder) {\n  constexpr auto kNumItems = 50;\n  for (size_t i = 0; i < kNumItems; ++i) {\n    dt_.Insert(i, i);\n  }\n\n  vector<unsigned> nums;\n  auto tr_cb = [&](Dash64::iterator it) {\n    nums.push_back(it->first);\n    VLOG(1) << it.bucket_id() << \" \" << it.slot_id() << \" \" << it->first;\n  };\n\n  Dash64::Cursor cursor;\n  do {\n    cursor = dt_.TraverseBySegmentOrder(cursor, tr_cb);\n  } while (cursor);\n\n  sort(nums.begin(), nums.end());\n  nums.resize(unique(nums.begin(), nums.end()) - nums.begin());\n  ASSERT_EQ(kNumItems, nums.size());\n  EXPECT_EQ(0, nums[0]);\n  EXPECT_EQ(kNumItems - 1, nums.back());\n}\n\nTEST_F(DashTest, TraverseBucketOrder) {\n  constexpr auto kNumItems = 18000;\n  for (size_t i = 0; i < kNumItems; ++i) {\n    dt_.Insert(i, i);\n  }\n  for (size_t i = 0; i < kNumItems; ++i) {\n    dt_.Erase(i);\n  }\n  constexpr auto kSparseItems = kNumItems / 50;\n  for (size_t i = 0; i < kSparseItems; ++i) {  // create sparse table\n    dt_.Insert(i, i);\n  }\n\n  vector<unsigned> nums;\n  auto tr_cb = [&](Dash64::bucket_iterator it) {\n    VLOG(1) << \"call cb\";\n    while (!it.is_done()) {\n      nums.push_back(it->first);\n      VLOG(1) << it.bucket_id() << \" \" << it.slot_id() << \" \" << it->first;\n      ++it;\n    }\n  };\n\n  Dash64::Cursor cursor;\n  do {\n    cursor = dt_.TraverseBuckets(cursor, tr_cb);\n  } while (cursor);\n\n  sort(nums.begin(), nums.end());\n  nums.resize(unique(nums.begin(), nums.end()) - nums.begin());\n  ASSERT_EQ(kSparseItems, nums.size());\n  EXPECT_EQ(0, nums[0]);\n  EXPECT_EQ(kSparseItems - 1, nums.back());\n}\n\nstruct TestEvictionPolicy {\n  static constexpr bool can_evict = true;\n  static constexpr bool can_gc = false;\n\n  explicit TestEvictionPolicy(unsigned max_cap) : max_capacity(max_cap) {\n  }\n\n  bool CanGrow(const Dash64& tbl) const {\n    return tbl.capacity() < max_capacity;\n  }\n  void OnMove(Dash64::Cursor source, Dash64::Cursor dest) {\n  }\n\n  void RecordSplit(Dash64::Segment_t*) {\n  }\n\n  unsigned Evict(const Dash64::HotBuckets& hotb, Dash64* me) const {\n    if (!evict_enabled)\n      return 0;\n\n    auto it = hotb.probes.by_type.regular_buckets[0];\n    unsigned res = 0;\n    for (; !it.is_done(); ++it) {\n      LOG(INFO) << \"Deleting \" << it->first;\n      me->Erase(it);\n      ++res;\n    }\n\n    return res;\n  }\n\n  bool evict_enabled = false;\n  unsigned max_capacity;\n};\n\nTEST_F(DashTest, Eviction) {\n  TestEvictionPolicy ev(1540);\n\n  size_t num = 0;\n  auto loop = [&] {\n    for (; num < 5000; ++num) {\n      dt_.Insert(num, 0, ev);\n    }\n  };\n\n  ASSERT_THROW(loop(), bad_alloc);\n  ASSERT_LT(num, 5000);\n  ASSERT_EQ(2, dt_.unique_segments());\n  EXPECT_LT(dt_.size(), ev.max_capacity);\n  LOG(INFO) << \"size is \" << dt_.size();\n\n  set<uint64_t> keys;\n  Dash64::bucket_iterator bit = dt_.begin();\n  unsigned last_slot = 0;\n  while (!bit.is_done()) {\n    keys.insert(bit->first);\n    last_slot = bit.slot_id();\n    ++bit;\n  }\n  ASSERT_LT(last_slot, Dash64::kSlotNum);\n\n  bit = dt_.begin();\n  dt_.ShiftRight(bit);\n  bit = dt_.begin();\n  size_t sz = 0;\n  while (!bit.is_done()) {\n    EXPECT_EQ(1, keys.count(bit->first));\n    ++sz;\n    ++bit;\n  }\n  EXPECT_EQ(sz, keys.size());\n\n  while (!dt_.GetSegment(0)->GetBucket(0).IsFull()) {\n    try {\n      dt_.Insert(num++, 0, ev);\n    } catch (bad_alloc&) {\n    }\n  }\n\n  // Now the bucket is full.\n  keys.clear();\n  uint64_t last_key = dt_.GetSegment(0)->Key(0, Dash64::kSlotNum - 1);\n  for (Dash64::bucket_iterator bit = dt_.begin(); !bit.is_done(); ++bit) {\n    keys.insert(bit->first);\n  }\n\n  bit = dt_.begin();\n  dt_.ShiftRight(bit);\n  bit = dt_.begin();\n  sz = 0;\n\n  while (!bit.is_done()) {\n    EXPECT_NE(last_key, bit->first);\n    EXPECT_EQ(1, keys.count(bit->first));\n    ++sz;\n    ++bit;\n  }\n  EXPECT_EQ(sz + 1, keys.size());\n\n  ev.evict_enabled = true;\n  unsigned bucket_cnt = dt_.bucket_count();\n  auto [it, res] = dt_.Insert(num, 0, ev);\n  EXPECT_TRUE(res);\n  EXPECT_EQ(bucket_cnt, dt_.bucket_count());\n}\n\nstruct VersionPolicy : public BasicDashPolicy {\n  static constexpr bool kUseVersion = true;\n\n  static uint64_t HashFn(int v) {\n    return XXH3_64bits(&v, sizeof(v));\n  }\n};\n\nusing VersionDT = DashTable<int, int, VersionPolicy>;\nTEST_F(DashTest, Version) {\n  VersionDT dt;\n  auto [it, inserted] = dt.Insert(1, 1);\n\n  EXPECT_EQ(0, it.GetVersion());\n  it.SetVersion(5);\n  EXPECT_EQ(5, it.GetVersion());\n\n  dt.Clear();\n  ASSERT_EQ(0, dt.size());\n  ASSERT_EQ(2, dt.unique_segments());\n  ASSERT_EQ(136, dt.bucket_count());\n  constexpr int kNum = 68000;\n  for (int i = 0; i < kNum; ++i) {\n    auto it = dt.Insert(i, 0).first;\n    it.SetVersion(i + 65000);\n    if (i) {\n      auto p = dt.Find(i - 1);\n      ASSERT_GE(p.GetVersion(), i - 1 + 65000) << i;\n    }\n  }\n\n  unsigned items = 0;\n  for (auto it = dt.begin(); it != dt.end(); ++it) {\n    ASSERT_FALSE(it.is_done());\n    ASSERT_GE(it.GetVersion(), it->first + 65000)\n        << it.segment_id() << \" \" << it.bucket_id() << \" \" << it.slot_id();\n    ++items;\n  }\n  ASSERT_EQ(kNum, items);\n}\n\nTEST_F(DashTest, CVCUponInsert) {\n  VersionDT dt;\n  auto [it, added] = dt.Insert(10, 20);  // added to slot 0\n  ASSERT_TRUE(added);\n\n  int i = 11;\n  while (true) {\n    auto [it2, added] = dt.Insert(i, 30);\n    if (it2.bucket_id() == it.bucket_id() && it2.segment_id() == it.segment_id()) {\n      ASSERT_EQ(1, it2.slot_id());\n\n      break;\n    }\n    ++i;\n  }\n\n  // freed slot 0 but the bucket still has i at slot 1.\n  dt.Erase(10);\n\n  auto cb = [](VersionDT::bucket_iterator bit) {\n    LOG(INFO) << \"sid: \" << bit.segment_id() << \" \" << bit.bucket_id();\n    while (!bit.is_done()) {\n      LOG(INFO) << \"key: \" << bit->first;\n      ++bit;\n    }\n  };\n  dt.CVCUponInsert(1, i, cb);\n}\n\nTEST_F(DashTest, CVCUponInsertStress) {\n  VersionDT dt;\n  for (int i = 0; i < 5000; ++i) {\n    dt.CVCUponInsert(1, i, [](VersionDT::bucket_iterator) {\n      // empty callback\n    });\n    dt.Insert(i, 0);\n  }\n}\n\nstruct A {\n  int a = 0;\n  unsigned moved = 0;\n\n  A(int i = 0) : a(i) {\n  }\n  A(const A&) = delete;\n  A(A&& o) : a(o.a), moved(o.moved + 1) {\n    o.a = -1;\n  }\n\n  A& operator=(const A&) = delete;\n  A& operator=(A&& o) noexcept {\n    o.moved = o.moved + 1;\n    a = o.a;\n    o.a = -1;\n    return *this;\n  }\n\n  bool operator==(const A& o) const {\n    return o.a == a;\n  }\n};\n\nstruct ADashPolicy : public BasicDashPolicy {\n  static uint64_t HashFn(const A& a) {\n    auto val = XXH3_64bits(&a.a, sizeof(a.a));\n    return val;\n  }\n};\n\nTEST_F(DashTest, Moveable) {\n  using DType = DashTable<A, A, ADashPolicy>;\n\n  DType table{1};\n  ASSERT_TRUE(table.Insert(A{1}, A{2}).second);\n  ASSERT_FALSE(table.Insert(A{1}, A{3}).second);\n  EXPECT_EQ(1, table.size());\n  table.Clear();\n  EXPECT_EQ(0, table.size());\n}\n\nstruct SdsDashPolicy {\n  enum { kSlotNum = 12, kBucketNum = 64, kStashBucketNum = 2 };\n  static constexpr bool kUseVersion = false;\n\n  static uint64_t HashFn(sds u) {\n    return XXH3_64bits(reinterpret_cast<const uint8_t*>(u), sdslen(u));\n  }\n\n  static uint64_t HashFn(std::string_view u) {\n    return XXH3_64bits(u.data(), u.size());\n  }\n\n  static void DestroyValue(uint64_t) {\n  }\n  static void DestroyKey(sds s) {\n    sdsfree(s);\n  }\n\n  static bool Equal(sds u1, sds u2) {\n    return dictSdsKeyCompare(nullptr, u1, u2) == 0;\n  }\n\n  static bool Equal(sds u1, std::string_view u2) {\n    return u2 == std::string_view{u1, sdslen(u1)};\n  }\n};\n\nTEST_F(DashTest, Sds) {\n  DashTable<sds, uint64_t, SdsDashPolicy> dt;\n\n  sds foo = sdscatlen(sdsempty(), \"foo\", 3);\n  dt.Insert(foo, 0);\n  // dt.Insert(std::string_view{\"bar\"}, 1);\n}\n\nstruct BlankPolicy : public BasicDashPolicy {\n  static uint64_t HashFn(uint64_t v) {\n    return v;\n  }\n};\n\n// The bug was that for very rare cases when during segment splitting we move all the items\n// into a new segment, not every item finds a place.\nTEST_F(DashTest, SplitBug) {\n  DashTable<uint64_t, uint64_t, BlankPolicy> table;\n  string path = base::ProgramRunfile(\"testdata/ids.txt.zst\");\n  io::Result<io::Source*> src = io::OpenUncompressed(path);\n  ASSERT_TRUE(src) << src.error();\n\n  io::LineReader lr(*src, TAKE_OWNERSHIP);\n  string_view line;\n  uint64_t val;\n  while (lr.Next(&line)) {\n    CHECK(absl::SimpleHexAtoi(line, &val)) << line;\n    table.Insert(val, 0);\n  }\n  EXPECT_EQ(746, table.size());\n}\n\n/**\n ______     _      _   _               _______        _\n|  ____|   (_)    | | (_)             |__   __|      | |\n| |____   ___  ___| |_ _  ___  _ __      | | ___  ___| |_ ___\n|  __\\ \\ / / |/ __| __| |/ _ \\| '_ \\     | |/ _ \\/ __| __/ __|\n| |___\\ V /| | (__| |_| | (_) | | | |    | |  __/\\__ \\ |_\\__ \\\n|______\\_/ |_|\\___|\\__|_|\\___/|_| |_|    |_|\\___||___/\\__|___/\n *\n */\nstruct EvictParams {\n  bool use_bumpups;\n  double zipf_param;\n\n  string PrintTo() const {\n    string name = absl::StrCat(use_bumpups ? \"\" : \"no\", \"bumps\");\n    absl::StrAppend(&name, unsigned(zipf_param * 1000));\n\n    return name;\n  }\n};\n\nstring PrintParams(const testing::TestParamInfo<EvictParams>& info) {\n  return info.param.PrintTo();\n}\n\nstruct U64DashPolicy {\n  enum { kSlotNum = 14, kBucketNum = 64, kStashBucketNum = 4 };\n  static constexpr bool kUseVersion = false;\n\n  static void DestroyValue(uint64_t) {\n  }\n  static void DestroyKey(uint64_t) {\n  }\n\n  static bool Equal(uint64_t u, uint64_t v) {\n    return u == v;\n  }\n\n  static uint64_t HashFn(uint64_t v) {\n    return XXH3_64bits(&v, sizeof(v));\n  }\n};\n\nusing U64Dash = DashTable<uint64_t, unsigned, U64DashPolicy>;\n\nstruct SimpleEvictPolicy {\n  static constexpr bool can_gc = false;\n  static constexpr bool can_evict = true;\n\n  bool CanGrow(const U64Dash& tbl) {\n    return tbl.capacity() + U64Dash::kSegCapacity < max_capacity;\n  }\n\n  void OnMove(U64Dash::Cursor source, U64Dash::Cursor dest) {\n  }\n\n  void RecordSplit(U64Dash::Segment_t* segment) {\n  }\n\n  // Required interface in case can_gc is true\n  // returns number of items evicted from the table.\n  // 0 means - nothing has been evicted.\n  unsigned Evict(const U64Dash::HotBuckets& hotb, U64Dash* me) {\n    constexpr unsigned kBucketNum = U64Dash::HotBuckets::kNumBuckets;\n\n    uint32_t bid = hotb.key_hash % kBucketNum;\n\n    unsigned slot_index = (hotb.key_hash >> 32) % U64Dash::kSlotNum;\n\n    for (unsigned i = 0; i < kBucketNum; ++i) {\n      auto it = hotb.at((bid + i) % kBucketNum);\n      it += slot_index;\n\n      if (it.is_done())\n        continue;\n\n      me->Erase(it);\n      ++evicted;\n\n      return 1;\n    }\n    return 0;\n  }\n\n  size_t max_capacity = SIZE_MAX;\n  unsigned evicted = 0;\n  // default_random_engine rand_eng_{42};\n};\n\nstruct ShiftRightPolicy {\n  absl::flat_hash_map<uint64_t, unsigned> evicted;\n  size_t max_capacity = SIZE_MAX;\n  unsigned evicted_sum = 0;\n\n  static constexpr bool can_gc = false;\n  static constexpr bool can_evict = true;\n\n  bool CanGrow(const U64Dash& tbl) {\n    return tbl.capacity() + U64Dash::kSegCapacity < max_capacity;\n  }\n\n  void RecordSplit(U64Dash::Segment_t* segment) {\n  }\n\n  void OnMove(U64Dash::Cursor source, U64Dash::Cursor dest) {\n  }\n\n  unsigned Evict(const U64Dash::HotBuckets& hotb, U64Dash* me) {\n    constexpr unsigned kNumStashBuckets = ABSL_ARRAYSIZE(hotb.probes.by_type.stash_buckets);\n\n    unsigned stash_pos = hotb.key_hash % kNumStashBuckets;\n    auto stash_it = hotb.probes.by_type.stash_buckets[stash_pos];\n    stash_it += (U64Dash::kSlotNum - 1);  // go to the last slot.\n\n    uint64_t k = stash_it->first;\n    DVLOG(1) << \"Deleting key \" << k << \" from \" << unsigned(stash_it.bucket_id()) << \"/\"\n             << stash_it.slot_id();\n    evicted[k]++;\n\n    CHECK(me->ShiftRight(stash_it));\n    ++evicted_sum;\n\n    return 1;\n  };\n};\n\nclass EvictionPolicyTest : public testing::TestWithParam<EvictParams> {\n protected:\n  template <typename Policy> void FillUniform(unsigned max_range, Policy& policy);\n\n  uint64_t Rand() {\n    return zipf_ ? zipf_->Next(rand_eng_) : udist_(rand_eng_);\n  }\n\n  void SetUp() final {\n    if (GetParam().zipf_param > 0)\n      zipf_.emplace(0, 15000, GetParam().zipf_param);\n    else {\n      uniform_int_distribution<uint64_t>::param_type p{0, 15000};\n      udist_.param(p);\n    }\n  }\n\n  default_random_engine rand_eng_{42};\n  U64Dash dt_;\n  std::optional<base::ZipfianGenerator> zipf_;\n  uniform_int_distribution<uint64_t> udist_;\n};\n\ntemplate <typename Policy>\nvoid EvictionPolicyTest::FillUniform(unsigned max_range, Policy& policy) {\n  std::uniform_int_distribution<uint64_t> dist(0, max_range - 1);\n  for (unsigned i = 0; i < 100000; ++i) {\n    auto [it, res] = dt_.Insert(dist(rand_eng_), 0, policy);\n    if (!res && it.is_done())  // filled up till the capacity limit\n      break;\n  }\n  LOG(INFO) << dt_.size();\n}\n\nTEST_P(EvictionPolicyTest, HitRate) {\n  CHECK_LT(GetParam().zipf_param, 1);\n  SimpleEvictPolicy ev_policy;\n  ev_policy.max_capacity = 3000;\n  FillUniform(15000, ev_policy);\n\n  unsigned hits = 0;\n  for (unsigned i = 0; i < 150000; ++i) {\n    auto [it, res] = dt_.Insert(Rand(), 0, ev_policy);\n    CHECK(!it.is_done());\n    if (!res) {\n      ++hits;\n    }\n  }\n  LOG(INFO) << \"Zipf: \" << GetParam().zipf_param << \", hits \" << hits << \" evictions \"\n            << ev_policy.evicted;\n}\n\nTEST_P(EvictionPolicyTest, HitRateZipf) {\n  base::ZipfianGenerator gen(1, 15000, 0.9);\n  SimpleEvictPolicy ev_policy;\n  ev_policy.max_capacity = 3000;\n\n  FillUniform(15000, ev_policy);\n\n  bool use_bumps = GetParam().use_bumpups;\n\n  unsigned hits = 0;\n  for (unsigned i = 0; i < 150000; ++i) {\n    uint64_t key = Rand();\n    auto [it, res] = dt_.Insert(key, 0, ev_policy);\n    CHECK(!it.is_done());\n    if (res) {\n      DVLOG(1) << \"Inserted new key \" << key << \" to bucket \" << it.bucket_id() << \" slot \"\n               << it.slot_id();\n    } else {\n      if (use_bumps) {\n        RelaxedBumpPolicy policy;\n        dt_.BumpUp(it, policy);\n      }\n\n      ++hits;\n    }\n  }\n  LOG(INFO) << \"Zipf: \" << GetParam().PrintTo() << \" hits \" << hits << \" evictions \"\n            << ev_policy.evicted;\n}\n\nTEST_P(EvictionPolicyTest, HitRateZipfShr) {\n  ShiftRightPolicy ev_policy;\n  ev_policy.max_capacity = 3000;\n\n  FillUniform(15000, ev_policy);\n\n  unsigned hits = 0;\n  unsigned inserted_evicted = 0;\n  bool use_bumps = GetParam().use_bumpups;\n  for (unsigned i = 0; i < 150000; ++i) {\n    unsigned key = Rand();\n\n    auto [it, res] = dt_.Insert(key, 0, ev_policy);\n    if (!it.is_done()) {\n      if (res) {\n        DVLOG(1) << \"Inserted new key \" << key << \" to bucket \" << it.bucket_id() << \" slot \"\n                 << it.slot_id();\n        if (ev_policy.evicted.contains(key)) {\n          ++inserted_evicted;\n        }\n      } else {\n        if (use_bumps) {\n          RelaxedBumpPolicy policy;\n          dt_.BumpUp(it, policy);\n          DVLOG(1) << \"Bump up key \" << key << \" \" << it.bucket_id() << \" slot \" << it.slot_id();\n        } else {\n          DVLOG(1) << \"Hit on key \" << key;\n        }\n        ++hits;\n      }\n    }\n  }\n\n  vector<pair<unsigned, uint64_t>> freq_evicted;\n  for (const auto& k_v : ev_policy.evicted) {\n    freq_evicted.emplace_back(k_v.second, k_v.first);\n  }\n  sort(freq_evicted.rbegin(), freq_evicted.rend());\n\n  LOG(INFO) << \"Params \" << GetParam().PrintTo() << \" hits \" << hits << \" evictions \"\n            << ev_policy.evicted_sum << \" \"\n            << \"reinserted \" << inserted_evicted;\n  unsigned num_outs = 0;\n  for (const auto& k_v : freq_evicted) {\n    LOG(INFO) << \"Evicted \" << k_v.first << \" : \" << k_v.second;\n    if (++num_outs > 100 || k_v.first < 5)\n      break;\n  }\n}\n\nINSTANTIATE_TEST_SUITE_P(Eviction, EvictionPolicyTest,\n                         testing::Values(EvictParams{false, 0}, EvictParams{false, 0.9},\n                                         EvictParams{true, 0.9}),\n                         PrintParams);\n\n// Benchmarks\nstatic void BM_Insert(benchmark::State& state) {\n  unsigned count = state.range(0);\n\n  size_t next = 0;\n  while (state.KeepRunning()) {\n    Dash64 dt;\n\n    for (unsigned i = 0; i < count; ++i) {\n      dt.Insert(next++, 0);\n    }\n  }\n}\nBENCHMARK(BM_Insert)->Arg(10000)->Arg(100000)->Arg(1000000);\n\nstruct NoDestroySdsPolicy : public SdsDashPolicy {\n  static void DestroyKey(sds s) {\n  }\n};\n\nstatic void BM_StringInsert(benchmark::State& state) {\n  unsigned count = state.range(0);\n\n  std::vector<sds> strs(count);\n  for (unsigned i = 0; i < count; ++i) {\n    strs[i] = sdscatprintf(sdsempty(), \"key__%x\", 100 + i);\n  }\n\n  while (state.KeepRunning()) {\n    DashTable<sds, uint64_t, NoDestroySdsPolicy> dt;\n\n    for (unsigned i = 0; i < count; ++i) {\n      dt.Insert(strs[i], 0);\n    }\n  }\n\n  for (sds s : strs) {\n    sdsfree(s);\n  }\n}\nBENCHMARK(BM_StringInsert)->Arg(1000)->Arg(10000)->Arg(100000);\n\nstatic void BM_FindExisting(benchmark::State& state) {\n  unsigned count = state.range(0);\n\n  Dash64 dt;\n  for (unsigned i = 0; i < count; ++i) {\n    dt.Insert(i, 0);\n  }\n\n  size_t next = 0;\n  while (state.KeepRunning()) {\n    for (unsigned i = 0; i < 100; ++i) {\n      dt.Find(next++);\n    }\n  }\n}\nBENCHMARK(BM_FindExisting)->Arg(1000000)->Arg(2000000);\n\n// dict memory usage is in [32*n + 8*n, 32*n + 16*n], or\n// per entry usage is [40, 48].\nstatic void BM_RedisDictFind(benchmark::State& state) {\n  unsigned count = state.range(0);\n  dict* d = dictCreate(&IntDict);\n\n  for (unsigned i = 0; i < count; ++i) {\n    size_t key = i;\n    dictAdd(d, (void*)key, nullptr);\n  }\n\n  size_t next = 0;\n  while (state.KeepRunning()) {\n    for (size_t i = 0; i < 100; ++i) {\n      size_t k = next++;\n      dictFind(d, (void*)k);\n    }\n  }\n  dictRelease(d);\n}\nBENCHMARK(BM_RedisDictFind)->Arg(1000000)->Arg(2000000);\n\n// dict memory usage is in [32*n + 8*n, 32*n + 16*n], or\n// per entry usage is [40, 48].\nstatic void BM_RedisDictInsert(benchmark::State& state) {\n  unsigned count = state.range(0);\n  size_t next = 0;\n  while (state.KeepRunning()) {\n    dict* d = dictCreate(&IntDict);\n    for (unsigned i = 0; i < count; ++i) {\n      dictAdd(d, (void*)next, nullptr);\n      ++next;\n    }\n    dictRelease(d);\n  }\n}\nBENCHMARK(BM_RedisDictInsert)->Arg(10000)->Arg(100000)->Arg(1000000);\n\nstatic void BM_RedisStringInsert(benchmark::State& state) {\n  unsigned count = state.range(0);\n  std::vector<sds> strs(count);\n  for (unsigned i = 0; i < count; ++i) {\n    strs[i] = sdscatprintf(sdsempty(), \"key__%x\", 100 + i);\n  }\n\n  while (state.KeepRunning()) {\n    dict* d = dictCreate(&SdsDict);\n    for (unsigned i = 0; i < count; ++i) {\n      dictAdd(d, strs[i], nullptr);\n    }\n    dictRelease(d);\n  }\n\n  for (sds s : strs) {\n    sdsfree(s);\n  }\n}\nBENCHMARK(BM_RedisStringInsert)->Arg(1000)->Arg(10000)->Arg(100000);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/dense_set.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/dense_set.h\"\n\n#include <absl/numeric/bits.h>\n\n#include <cstddef>\n#include <cstdint>\n#include <stack>\n#include <type_traits>\n#include <vector>\n\n#include \"absl/random/distributions.h\"\n#include \"absl/random/random.h\"\n#include \"base/logging.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly {\nusing namespace std;\n\nconstexpr size_t kMinSizeShift = 2;\nconstexpr size_t kMinSize = 1 << kMinSizeShift;\nconstexpr bool kAllowDisplacements = true;\n\nthread_local absl::InsecureBitGen tl_bit_gen;\n\n#define PREFETCH_READ(x) __builtin_prefetch(x, 0, 1)\n\nDenseSet::IteratorBase::IteratorBase(const DenseSet* owner, bool is_end)\n    : owner_(const_cast<DenseSet*>(owner)), curr_entry_(nullptr) {\n  curr_list_ = is_end ? owner_->entries_.end() : owner_->entries_.begin();\n\n  // Even if `is_end` is `false`, the list can be empty.\n  if (curr_list_ == owner->entries_.end()) {\n    curr_entry_ = nullptr;\n    owner_ = nullptr;\n  } else {\n    curr_entry_ = &(*curr_list_);\n    owner->ExpireIfNeeded(nullptr, curr_entry_);\n\n    // find the first non null entry\n    if (curr_entry_->IsEmpty()) {\n      Advance();\n    }\n  }\n}\n\nvoid DenseSet::IteratorBase::SetExpiryTime(uint32_t ttl_sec) {\n  DensePtr* ptr = curr_entry_->IsLink() ? curr_entry_->AsLink() : curr_entry_;\n  void* src = ptr->GetObject();\n  if (!HasExpiry()) {\n    const size_t old_size = owner_->ObjectAllocSize(ptr->Raw());\n    void* new_obj = owner_->ObjectClone(src, false, true);\n    ptr->SetObject(new_obj);\n\n    const size_t new_size = owner_->ObjectAllocSize(ptr->Raw());\n\n    // Important: we set the ttl bit on the wrapping pointer.\n    curr_entry_->SetTtl(true);\n    owner_->ObjDelete(src, false);\n    src = new_obj;\n\n    // Because setting TTL requires an extra 4 bytes for the key, the allocated size may push the\n    // object into a different mi-malloc page category (e.g. 16 byte page -> 32 byte page). This\n    // results in increased reporting in ObjAllocSize.\n    //\n    // If this size increase is not accounted for, it will cause an overflow in\n    // DenseSet::AddOrReplaceObj due to subtracting larger size from smaller and the type of\n    // obj_malloc_used_ being size_t.\n    if (old_size != new_size) {\n      owner_->DecreaseMallocUsed(old_size);\n      owner_->IncreaseMallocUsed(new_size);\n    }\n  }\n  owner_->ObjUpdateExpireTime(src, ttl_sec);\n}\n\nvoid DenseSet::IteratorBase::Advance() {\n  bool step_link = false;\n  DCHECK(curr_entry_);\n\n  if (curr_entry_->IsLink()) {\n    DenseLinkKey* plink = curr_entry_->AsLink();\n    if (!owner_->ExpireIfNeeded(curr_entry_, &plink->next) || curr_entry_->IsLink()) {\n      curr_entry_ = &plink->next;\n      step_link = true;\n    }\n  }\n\n  if (!step_link) {\n    DCHECK(curr_list_ != owner_->entries_.end());\n    do {\n      ++curr_list_;\n      if (curr_list_ == owner_->entries_.end()) {\n        curr_entry_ = nullptr;\n        owner_ = nullptr;\n        return;\n      }\n      owner_->ExpireIfNeeded(nullptr, &(*curr_list_));\n    } while (curr_list_->IsEmpty());\n    DCHECK(curr_list_ != owner_->entries_.end());\n    curr_entry_ = &(*curr_list_);\n  }\n  DCHECK(!curr_entry_->IsEmpty());\n}\n\nDenseSet::DenseSet() {\n  static_assert(sizeof(entries_) == 24);\n}\n\nDenseSet::~DenseSet() {\n  // We can not call Clear from the base class because it internally calls ObjDelete which is\n  // a virtual function. Therefore, destructor of the derived classes must clean up the table.\n  CHECK(entries_.empty());\n}\n\nsize_t DenseSet::PushFront(DenseSet::ChainVectorIterator it, void* data, bool has_ttl) {\n  // if this is an empty list assign the value to the empty placeholder pointer\n  DCHECK(!it->IsDisplaced());\n  if (it->IsEmpty()) {\n    it->SetObject(data);\n  } else {\n    // otherwise make a new link and connect it to the front of the list\n    it->SetLink(NewLink(data, *it));\n  }\n\n  if (has_ttl) {\n    it->SetTtl(true);\n    expiration_used_ = true;\n  }\n  return ObjectAllocSize(data);\n}\n\nvoid DenseSet::PushFront(DenseSet::ChainVectorIterator it, DenseSet::DensePtr ptr) {\n  DVLOG(2) << \"PushFront to \" << distance(entries_.begin(), it) << \", \"\n           << ObjectAllocSize(ptr.GetObject());\n  DCHECK(!it->IsDisplaced());\n\n  if (it->IsEmpty()) {\n    it->SetObject(ptr.GetObject());\n    if (ptr.HasTtl()) {\n      it->SetTtl(true);\n      expiration_used_ = true;\n    }\n    if (ptr.IsLink()) {\n      FreeLink(ptr.AsLink());\n    }\n  } else if (ptr.IsLink()) {\n    // if the pointer is already a link then no allocation needed.\n    *ptr.Next() = *it;\n    *it = ptr;\n    DCHECK(!it->AsLink()->next.IsEmpty());\n  } else {\n    DCHECK(ptr.IsObject());\n\n    // allocate a new link if needed and copy the pointer to the new link\n    it->SetLink(NewLink(ptr.Raw(), *it));\n    if (ptr.HasTtl()) {\n      it->SetTtl(true);\n      expiration_used_ = true;\n    }\n    DCHECK(!it->AsLink()->next.IsEmpty());\n  }\n}\n\nauto DenseSet::PopPtrFront(DenseSet::ChainVectorIterator it) -> DensePtr {\n  if (it->IsEmpty()) {\n    return DensePtr{};\n  }\n\n  DensePtr front = *it;\n\n  // if this is an object, then it's also the only record in this chain.\n  // therefore, we should just reset DensePtr.\n  if (it->IsObject()) {\n    it->Reset();\n  } else {\n    DCHECK(it->IsLink());\n    DenseLinkKey* link = it->AsLink();\n    *it = link->next;\n  }\n\n  return front;\n}\n\nuint32_t DenseSet::ClearStep(uint32_t start, uint32_t count) {\n  constexpr unsigned kArrLen = 32;\n  ClearItem arr[kArrLen];\n  unsigned len = 0;\n\n  size_t end = min<size_t>(entries_.size(), start + count);\n  for (size_t i = start; i < end; ++i) {\n    DensePtr& ptr = entries_[i];\n    if (ptr.IsEmpty())\n      continue;\n\n    auto& dest = arr[len++];\n    dest.has_ttl = ptr.HasTtl();\n\n    PREFETCH_READ(ptr.Raw());\n    if (ptr.IsObject()) {\n      dest.obj = ptr.Raw();\n      dest.ptr.Reset();\n    } else {\n      dest.ptr = ptr;\n      dest.obj = nullptr;\n    }\n    ptr.Reset();\n    if (len == kArrLen) {\n      ClearBatch(kArrLen, arr);\n      len = 0;\n    }\n  }\n\n  ClearBatch(len, arr);\n\n  if (size_ == 0) {\n    entries_.clear();\n    num_links_ = 0;\n    obj_malloc_used_ = 0;\n    expiration_used_ = false;\n  }\n  return end;\n}\n\nbool DenseSet::Equal(DensePtr dptr, const void* ptr, uint32_t cookie) const {\n  if (dptr.IsEmpty()) {\n    return false;\n  }\n\n  return ObjEqual(dptr.GetObject(), ptr, cookie);\n}\n\nvoid DenseSet::CloneBatch(unsigned len, CloneItem* items, DenseSet* other) const {\n  // We handle a batch of items to minimize data dependencies when accessing memory for a single\n  // item. We prefetch the memory for entire batch before actually reading data from any of the\n  // elements.\n\n  auto clone = [this](void* obj, bool has_ttl, DenseSet* other) {\n    // The majority of the CPU is spent in this block.\n    void* new_obj = other->ObjectClone(obj, has_ttl, false);\n    uint64_t hash = this->Hash(obj, 0);\n    other->AddUnique(new_obj, has_ttl, hash);\n  };\n\n  while (len) {\n    unsigned dest_id = 0;\n    // we walk \"len\" linked lists in parallel, and prefetch their next, obj pointers\n    // before actually processing them.\n    for (unsigned i = 0; i < len; ++i) {\n      auto& src = items[i];\n      if (src.obj) {\n        clone(src.obj, src.has_ttl, other);\n        src.obj = nullptr;\n      }\n\n      if (src.ptr.IsEmpty()) {\n        continue;\n      }\n\n      if (src.ptr.IsObject()) {\n        clone(src.ptr.Raw(), src.has_ttl, other);\n      } else {\n        auto& dest = items[dest_id++];\n        DenseLinkKey* link = src.ptr.AsLink();\n        dest.obj = link->Raw();\n        DCHECK(!link->HasTtl());\n\n        // ttl is attached to the wrapping pointer.\n        dest.has_ttl = src.ptr.HasTtl();\n        dest.ptr = link->next;\n        PREFETCH_READ(dest.ptr.Raw());\n        PREFETCH_READ(dest.obj);\n      }\n    }\n\n    // update the length of the batch for the next iteration.\n    len = dest_id;\n  }\n}\n\nvoid DenseSet::ClearBatch(unsigned len, ClearItem* items) {\n  while (len) {\n    unsigned dest_id = 0;\n    // we walk \"len\" linked lists in parallel, and prefetch their next, obj pointers\n    // before actually processing them.\n    for (unsigned i = 0; i < len; ++i) {\n      auto& src = items[i];\n      if (src.obj) {\n        ObjDelete(src.obj, src.has_ttl);\n        --size_;\n        src.obj = nullptr;\n      }\n\n      if (src.ptr.IsEmpty())\n        continue;\n\n      if (src.ptr.IsObject()) {\n        ObjDelete(src.ptr.Raw(), src.has_ttl);\n        --size_;\n      } else {\n        auto& dest = items[dest_id++];\n        DenseLinkKey* link = src.ptr.AsLink();\n        DCHECK(!link->HasTtl());\n        dest.obj = link->Raw();\n        dest.has_ttl = src.ptr.HasTtl();\n        dest.ptr = link->next;\n        PREFETCH_READ(dest.ptr.Raw());\n        PREFETCH_READ(dest.obj);\n        FreeLink(link);\n      }\n    }\n\n    // update the length of the batch for the next iteration.\n    len = dest_id;\n  }\n}\nbool DenseSet::NoItemBelongsBucket(uint32_t bid) const {\n  auto& entries = const_cast<DenseSet*>(this)->entries_;\n  DensePtr* curr = &entries[bid];\n  ExpireIfNeeded(nullptr, curr);\n  if (!curr->IsEmpty() && !curr->IsDisplaced()) {\n    return false;\n  }\n\n  if (bid + 1 < entries_.size()) {\n    DensePtr* right_bucket = &entries[bid + 1];\n    ExpireIfNeeded(nullptr, right_bucket);\n    if (!right_bucket->IsEmpty() && right_bucket->IsDisplaced() &&\n        right_bucket->GetDisplacedDirection() == 1)\n      return false;\n  }\n\n  if (bid > 0) {\n    DensePtr* left_bucket = &entries[bid - 1];\n    ExpireIfNeeded(nullptr, left_bucket);\n    if (!left_bucket->IsEmpty() && left_bucket->IsDisplaced() &&\n        left_bucket->GetDisplacedDirection() == -1)\n      return false;\n  }\n  return true;\n}\n\nauto DenseSet::FindEmptyAround(uint32_t bid) -> ChainVectorIterator {\n  ExpireIfNeeded(nullptr, &entries_[bid]);\n\n  if (entries_[bid].IsEmpty()) {\n    return entries_.begin() + bid;\n  }\n\n  if (!kAllowDisplacements) {\n    return entries_.end();\n  }\n\n  if (bid + 1 < entries_.size()) {\n    auto it = next(entries_.begin(), bid + 1);\n    ExpireIfNeeded(nullptr, &(*it));\n    if (it->IsEmpty())\n      return it;\n  }\n\n  if (bid) {\n    auto it = next(entries_.begin(), bid - 1);\n    ExpireIfNeeded(nullptr, &(*it));\n    if (it->IsEmpty())\n      return it;\n  }\n\n  return entries_.end();\n}\n\nvoid DenseSet::Reserve(size_t sz) {\n  sz = std::max<size_t>(sz, kMinSize);\n\n  sz = absl::bit_ceil(sz);\n  if (sz > entries_.size()) {\n    size_t prev_size = entries_.size();\n    entries_.resize(sz);\n    capacity_log_ = absl::bit_width(sz) - 1;\n    Grow(prev_size);\n  }\n}\n\nvoid DenseSet::ShrinkBucket(size_t bucket_idx) {\n  // Take the entire bucket to avoid infinite loop when new_bid == bucket_idx\n  DensePtr bucket = entries_[bucket_idx];\n  entries_[bucket_idx].Reset();\n\n  // Process the taken bucket chain\n  while (!bucket.IsEmpty()) {\n    // Pop front from local chain\n    DensePtr dptr = bucket;\n    bucket = bucket.IsObject() ? DensePtr{} : bucket.AsLink()->next;\n\n    void* obj = dptr.GetObject();\n    bool has_ttl = dptr.HasTtl();\n\n    // Free link unconditionally - PushFront will create new one if needed\n    if (dptr.IsLink()) {\n      FreeLink(dptr.AsLink());\n    }\n\n    if (has_ttl && ObjExpireTime(obj) <= time_now_) {\n      ObjDelete(obj, true);\n      --size_;\n      continue;\n    }\n\n    uint32_t new_bid = BucketId(obj, 0);\n    DVLOG(2) << \" Shrink: Moving from \" << bucket_idx << \" to \" << new_bid;\n    PushFront(entries_.begin() + new_bid, obj, has_ttl);\n  }\n}\n\nvoid DenseSet::Shrink(size_t new_size) {\n  DCHECK(absl::has_single_bit(new_size));\n  DCHECK_GE(new_size, kMinSize);\n  DCHECK_LT(new_size, entries_.size());\n\n  size_t prev_size = entries_.size();\n  capacity_log_ = absl::bit_width(new_size) - 1;\n\n  // Process from low to high (opposite of Grow).\n  // This prevents double-processing: when moving elements from bucket i to bucket j < i,\n  // bucket j has already been processed, so the element won't be processed again.\n  for (size_t i = 0; i < prev_size; ++i) {\n    ShrinkBucket(i);\n  }\n\n  entries_.resize(new_size);\n}\n\nvoid DenseSet::Fill(DenseSet* other) const {\n  DCHECK(other->entries_.empty());\n\n  other->Reserve(UpperBoundSize());\n\n  constexpr unsigned kArrLen = 32;\n  CloneItem arr[kArrLen];\n  unsigned len = 0;\n\n  for (auto it = entries_.begin(); it != entries_.end(); ++it) {\n    DensePtr ptr = *it;\n\n    if (ptr.IsEmpty())\n      continue;\n\n    auto& item = arr[len++];\n    item.has_ttl = ptr.HasTtl();\n\n    if (ptr.IsObject()) {\n      item.ptr.Reset();\n      item.obj = ptr.Raw();\n      PREFETCH_READ(item.obj);\n    } else {\n      item.ptr = ptr;\n      item.obj = nullptr;\n      PREFETCH_READ(item.ptr.Raw());\n    }\n\n    if (len == kArrLen) {\n      CloneBatch(kArrLen, arr, other);\n      len = 0;\n    }\n  }\n  CloneBatch(len, arr, other);\n}\n\nvoid DenseSet::Grow(size_t prev_size) {\n  DensePtr first;\n\n  // Corner case. Usually elements are moved to higher buckets during rehashing.\n  // By moving upper elements first we make sure that there are no displaced elements\n  // when we move the lower elements.\n  // However the (displaced) elements at bucket_id=1 can move to bucket 0, and\n  // bucket 0 can host displaced elements from bucket 1. To avoid this situation, we\n  // stash the displaced element from bucket 0 and move it to the correct bucket at the end.\n  if (entries_.front().IsDisplaced()) {\n    first = PopPtrFront(entries_.begin());\n  }\n\n  // perform rehashing of items in the array, chain by chain.\n  for (long i = prev_size - 1; i >= 0; --i) {\n    DensePtr* curr = &entries_[i];\n    DensePtr* prev = nullptr;\n\n    do {\n      if (ExpireIfNeeded(prev, curr)) {\n        // if curr has disappeared due to expiry and prev was converted from Link to a\n        // regular DensePtr\n        if (prev && !prev->IsLink())\n          break;\n      }\n\n      if (curr->IsEmpty())\n        break;\n      void* ptr = curr->GetObject();\n\n      DCHECK(ptr != nullptr && ObjectAllocSize(ptr));\n\n      uint32_t bid = BucketId(ptr, 0);\n\n      // if the item does not move from the current chain, ensure\n      // it is not marked as displaced and move to the next item in the chain\n      if (bid == i) {\n        curr->ClearDisplaced();\n        prev = curr;\n        curr = curr->Next();\n        if (curr == nullptr)\n          break;\n      } else {\n        // if the entry is in the wrong chain remove it and\n        // add it to the correct chain. This will also correct\n        // displaced entries\n        auto dest = entries_.begin() + bid;\n        DensePtr dptr = *curr;\n\n        if (curr->IsObject()) {\n          if (prev) {\n            DCHECK(prev->IsLink());\n\n            DenseLinkKey* plink = prev->AsLink();\n            DCHECK(&plink->next == curr);\n\n            // we want to make *prev a DensePtr instead of DenseLink and we\n            // want to deallocate the link.\n            DensePtr tmp = DensePtr::From(plink);\n\n            // Important to transfer the ttl flag.\n            tmp.SetTtl(prev->HasTtl());\n            DCHECK(ObjectAllocSize(tmp.GetObject()));\n\n            FreeLink(plink);\n            // we deallocated the link, curr is invalid now.\n            curr = nullptr;\n            *prev = tmp;\n          } else {\n            // prev == nullptr\n            curr->Reset();  // reset the root placeholder.\n          }\n        } else {\n          // !curr.IsObject\n          *curr = *dptr.Next();\n          DCHECK(!curr->IsEmpty());\n        }\n\n        DVLOG(2) << \" Pushing to \" << bid << \" \" << dptr.GetObject();\n        DCHECK_EQ(BucketId(dptr.GetObject(), 0), bid);\n        PushFront(dest, dptr);\n      }\n    } while (curr);\n  }\n  if (!first.IsEmpty()) {\n    uint32_t bid = BucketId(first.GetObject(), 0);\n    PushFront(entries_.begin() + bid, first);\n  }\n}\n\n// Assumes that the object does not exist in the set.\nvoid DenseSet::AddUnique(void* obj, bool has_ttl, uint64_t hashcode) {\n  if (entries_.empty()) {\n    capacity_log_ = kMinSizeShift;\n    entries_.resize(kMinSize);\n  }\n\n  uint32_t bucket_id = BucketId(hashcode);\n\n  DCHECK_LT(bucket_id, entries_.size());\n\n  // Try insert into flat surface first. Also handle the grow case\n  // if utilization is too high.\n  for (unsigned j = 0; j < 2; ++j) {\n    ChainVectorIterator list = FindEmptyAround(bucket_id);\n    if (list != entries_.end()) {\n      obj_malloc_used_ += PushFront(list, obj, has_ttl);\n      if (std::distance(entries_.begin(), list) != bucket_id) {\n        list->SetDisplaced(std::distance(entries_.begin() + bucket_id, list));\n      }\n      ++size_;\n      return;\n    }\n\n    if (size_ < entries_.size()) {\n      break;\n    }\n\n    size_t prev_size = entries_.size();\n    entries_.resize(prev_size * 2);\n    ++capacity_log_;\n\n    Grow(prev_size);\n    bucket_id = BucketId(hashcode);\n  }\n\n  DCHECK(!entries_[bucket_id].IsEmpty());\n\n  /**\n   * Since the current entry is not empty, it is either a valid chain\n   * or there is a displaced node here. In the latter case it is best to\n   * move the displaced node to its correct bucket. However there could be\n   * a displaced node there and so forth. Keep to avoid having to keep a stack\n   * of displacements we can keep track of the current displaced node, add it\n   * to the correct chain, and if the correct chain contains a displaced node\n   * unlink it and repeat the steps\n   */\n\n  DensePtr to_insert(obj);\n  if (has_ttl) {\n    to_insert.SetTtl(true);\n    expiration_used_ = true;\n  }\n\n  while (!entries_[bucket_id].IsEmpty() && entries_[bucket_id].IsDisplaced()) {\n    DensePtr unlinked = PopPtrFront(entries_.begin() + bucket_id);\n\n    PushFront(entries_.begin() + bucket_id, to_insert);\n    to_insert = unlinked;\n    bucket_id -= unlinked.GetDisplacedDirection();\n  }\n\n  DCHECK_EQ(BucketId(to_insert.GetObject(), 0), bucket_id);\n  ChainVectorIterator list = entries_.begin() + bucket_id;\n  PushFront(list, to_insert);\n  obj_malloc_used_ += ObjectAllocSize(obj);\n  DCHECK(!entries_[bucket_id].IsDisplaced());\n\n  ++size_;\n}\n\nvoid DenseSet::Prefetch(uint64_t hash) {\n  uint32_t bid = BucketId(hash);\n  PREFETCH_READ(&entries_[bid]);\n}\n\nauto DenseSet::Find2(const void* ptr, uint32_t bid, uint32_t cookie)\n    -> tuple<size_t, DensePtr*, DensePtr*> {\n  DCHECK_LT(bid, entries_.size());\n\n  DensePtr* curr = &entries_[bid];\n  ExpireIfNeeded(nullptr, curr);\n\n  if (Equal(*curr, ptr, cookie)) {\n    return {bid, nullptr, curr};\n  }\n\n  // first look for displaced nodes since this is quicker than iterating a potential long chain\n  if (bid > 0) {\n    curr = &entries_[bid - 1];\n    if (curr->IsDisplaced() && curr->GetDisplacedDirection() == -1) {\n      ExpireIfNeeded(nullptr, curr);\n\n      if (Equal(*curr, ptr, cookie)) {\n        return {bid - 1, nullptr, curr};\n      }\n    }\n  }\n\n  if (bid + 1 < entries_.size()) {\n    curr = &entries_[bid + 1];\n    if (curr->IsDisplaced() && curr->GetDisplacedDirection() == 1) {\n      ExpireIfNeeded(nullptr, curr);\n\n      if (Equal(*curr, ptr, cookie)) {\n        return {bid + 1, nullptr, curr};\n      }\n    }\n  }\n\n  // if the node is not displaced, search the correct chain\n  DensePtr* prev = &entries_[bid];\n  curr = prev->Next();\n  while (curr != nullptr) {\n    ExpireIfNeeded(prev, curr);\n\n    if (Equal(*curr, ptr, cookie)) {\n      return {bid, prev, curr};\n    }\n    prev = curr;\n    curr = curr->Next();\n  }\n\n  // not in the Set\n  return {0, nullptr, nullptr};\n}\n\nvoid* DenseSet::Delete(DensePtr* prev, DensePtr* ptr, bool detach) {\n  void* obj = nullptr;\n\n  if (ptr->IsObject()) {\n    obj = ptr->Raw();\n    ptr->Reset();\n    if (prev) {\n      DCHECK(prev->IsLink());\n\n      DenseLinkKey* plink = prev->AsLink();\n      DensePtr tmp = DensePtr::From(plink);\n      // Transfer TTL flag\n      tmp.SetTtl(prev->HasTtl());\n      DCHECK(ObjectAllocSize(tmp.GetObject()));\n\n      FreeLink(plink);\n      *prev = tmp;\n      DCHECK(!prev->IsLink());\n    }\n  } else {\n    DCHECK(ptr->IsLink());\n\n    DenseLinkKey* link = ptr->AsLink();\n    obj = link->Raw();\n    *ptr = link->next;\n    FreeLink(link);\n  }\n\n  obj_malloc_used_ -= ObjectAllocSize(obj);\n  --size_;\n\n  if (detach) {\n    return obj;\n  }\n  ObjDelete(obj, false);\n  return nullptr;\n}\n\nDenseSet::ChainVectorIterator DenseSet::GetRandomChain() {\n  if (entries_.empty() || size_ == 0) {\n    return entries_.end();\n  }\n\n  size_t offset = absl::Uniform<size_t>(tl_bit_gen, 0u, entries_.size());\n\n  // Start at random position and scan linearly with wrap-around\n  auto it = entries_.begin() + offset;\n  for (size_t n = 0; n < entries_.size(); n++) {\n    // Check IsEmpty first to avoid ExpireIfNeeded overhead on empty buckets\n    if (!it->IsEmpty()) {\n      ExpireIfNeeded(nullptr, &*it);\n      if (!it->IsEmpty()) {\n        return it;\n      }\n    }\n\n    if (++it == entries_.end()) {\n      it = entries_.begin();\n    }\n  }\n\n  return entries_.end();\n}\n\nDenseSet::IteratorBase DenseSet::GetRandomIterator() {\n  ChainVectorIterator chain_it = GetRandomChain();\n  if (chain_it == entries_.end())\n    return IteratorBase{};\n\n  DensePtr* ptr = &*chain_it;\n  while (ptr->IsLink() && absl::Bernoulli(tl_bit_gen, 0.5)) {\n    DensePtr* next = ptr->Next();\n    if (ExpireIfNeeded(ptr, next))  // stop if we break the chain with expiration\n      break;\n    ptr = next;\n  }\n\n  return IteratorBase{(DenseSet*)this, chain_it, ptr};\n}\n\nvoid* DenseSet::PopInternal() {\n  auto bucket_iter = GetRandomChain();  // Find first non empty chain\n  if (bucket_iter == entries_.end())\n    return nullptr;\n\n  // unlink the first node in the first non-empty chain\n  obj_malloc_used_ -= ObjectAllocSize(bucket_iter->GetObject());\n\n  DensePtr front = PopPtrFront(bucket_iter);\n  void* ret = front.GetObject();\n\n  if (front.IsLink()) {\n    FreeLink(front.AsLink());\n  }\n\n  --size_;\n  return ret;\n}\n\nvoid* DenseSet::AddOrReplaceObj(void* obj, bool has_ttl) {\n  uint64_t hc = Hash(obj, 0);\n\n  DensePtr* dptr = entries_.empty() ? nullptr : Find(obj, BucketId(hc), 0).second;\n  if (dptr) {  // replace existing object.\n    // A bit confusing design: ttl bit is located on the wrapping pointer,\n    // therefore we must set ttl bit before unrapping below.\n    dptr->SetTtl(has_ttl);\n\n    if (dptr->IsLink())  // unwrap the pointer.\n      dptr = dptr->AsLink();\n\n    void* res = dptr->Raw();\n    const size_t res_sz = ObjectAllocSize(res);\n    DCHECK_GE(obj_malloc_used_, res_sz);\n    obj_malloc_used_ -= res_sz;\n    obj_malloc_used_ += ObjectAllocSize(obj);\n\n    dptr->SetObject(obj);\n\n    return res;\n  }\n\n  AddUnique(obj, has_ttl, hc);\n  return nullptr;\n}\n\n/**\n * stable scanning api. has the same guarantees as redis scan command.\n * we avoid doing bit-reverse by using a different function to derive a bucket id\n * from hash values. By using msb part of hash we make it \"stable\" with respect to\n * rehashes. For example, with table log size 4 (size 16), entries in bucket id\n * 1110 come from hashes 1110XXXXX.... When a table grows to log size 5,\n * these entries can move either to 11100 or 11101. So if we traversed with our cursor\n * range [0000-1110], it's guaranteed that in grown table we do not need to cover again\n * [00000-11100]. Similarly with shrinkage, if a table is shrunk to log size 3,\n * keys from 1110 and 1111 will move to bucket 111. Again, it's guaranteed that we\n * covered the range [000-111] (all keys in that case).\n * Returns: next cursor or 0 if reached the end of scan.\n * cursor = 0 - initiates a new scan.\n */\n\nuint32_t DenseSet::Scan(uint32_t cursor, const ItemCb& cb) const {\n  // empty set\n  if (capacity_log_ == 0) {\n    return 0;\n  }\n\n  uint32_t entries_idx = cursor >> (32 - capacity_log_);\n\n  auto& entries = const_cast<DenseSet*>(this)->entries_;\n\n  // First find the bucket to scan, skip empty buckets.\n  // A bucket is empty if the current index is empty and the data is not displaced\n  // to the right or to the left.\n  while (entries_idx < entries_.size() && NoItemBelongsBucket(entries_idx)) {\n    ++entries_idx;\n  }\n\n  if (entries_idx == entries_.size()) {\n    return 0;\n  }\n\n  DensePtr* curr = &entries[entries_idx];\n\n  // Check home bucket\n  if (!curr->IsEmpty() && !curr->IsDisplaced()) {\n    // scanning add all entries in a given chain\n    while (true) {\n      cb(curr->GetObject());\n      if (!curr->IsLink())\n        break;\n\n      DensePtr* mcurr = const_cast<DensePtr*>(curr);\n\n      if (ExpireIfNeeded(mcurr, &mcurr->AsLink()->next) && !mcurr->IsLink()) {\n        break;\n      }\n      curr = &curr->AsLink()->next;\n    }\n  }\n\n  // Check if the bucket on the left belongs to the home bucket.\n  if (entries_idx > 0) {\n    DensePtr* left_bucket = &entries[entries_idx - 1];\n    ExpireIfNeeded(nullptr, left_bucket);\n\n    if (left_bucket->IsDisplaced() &&\n        left_bucket->GetDisplacedDirection() == -1) {  // left of the home bucket\n      cb(left_bucket->GetObject());\n    }\n  }\n\n  // move to the next index for the next scan and check if we are done\n  ++entries_idx;\n  if (entries_idx >= entries_.size()) {\n    return 0;\n  }\n\n  // Check if the bucket on the right belongs to the home bucket.\n  DensePtr* right_bucket = &entries[entries_idx];\n  ExpireIfNeeded(nullptr, right_bucket);\n\n  if (right_bucket->IsDisplaced() &&\n      right_bucket->GetDisplacedDirection() == 1) {  // right of the home bucket\n    cb(right_bucket->GetObject());\n  }\n\n  return entries_idx << (32 - capacity_log_);\n}\n\nauto DenseSet::NewLink(void* data, DensePtr next) -> DenseLinkKey* {\n  using LinkAllocator = StatelessAllocator<DenseLinkKey>;\n\n  LinkAllocator la;\n  DenseLinkKey* lk = la.allocate(1);\n  la.construct(lk);\n\n  lk->next = next;\n  lk->SetObject(data);\n  ++num_links_;\n\n  return lk;\n}\n\nbool DenseSet::ExpireIfNeededInternal(DensePtr* prev, DensePtr* node) const {\n  DCHECK(node != nullptr);\n  DCHECK(node->HasTtl());\n\n  bool deleted = false;\n  do {\n    uint32_t obj_time = ObjExpireTime(node->GetObject());\n    if (obj_time > time_now_) {\n      break;\n    }\n\n    // updates the *node to next item if relevant or resets it to empty.\n    const_cast<DenseSet*>(this)->Delete(prev, node);\n    deleted = true;\n  } while (node->HasTtl());\n\n  return deleted;\n}\n\nvoid DenseSet::CollectExpired() {\n  // Simply iterating over all items will remove expired\n  auto it = IteratorBase(this, false);\n  while (it.curr_entry_ != nullptr) {\n    it.Advance();\n  }\n}\n\nsize_t DenseSet::SizeSlow() {\n  CollectExpired();\n  return size_;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/dense_set.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <cassert>\n#include <cstddef>\n#include <cstdint>\n#include <functional>\n#include <type_traits>\n#include <vector>\n\n#include \"core/detail/stateless_allocator.h\"\n\nnamespace dfly {\n\n// DenseSet is a nice but over-optimized data-structure. Probably is not worth it in the first\n// place but sometimes the OCD kicks in and one can not resist.\n// The advantage of it over redis-dict is smaller meta-data waste.\n// dictEntry is 24 bytes, i.e it uses at least 32N bytes where N is the expected length.\n// dict requires to allocate dictEntry per each addition in addition to the supplied key.\n// It also wastes space in case of a set because it stores a value pointer inside dictEntry.\n// To summarize:\n// 100% utilized dict uses N*24 + N*8 = 32N bytes not including the key space.\n// for 75% utilization (1/0.75 buckets): N*1.33*8 + N*24 = 35N\n//\n// This class uses 8 bytes per bucket (similarly to dictEntry*) but it used it for both\n// links and keys. For most cases, we remove the need for another redirection layer\n// and just store the key, so no \"dictEntry\" allocations occur.\n// For those cells that require chaining, the bucket is\n// changed in run-time to represent a linked chain.\n// Additional feature - in order to to reduce collisions, we insert items into\n// neighbour cells but only if they are empty (not chains). This way we reduce the number of\n// empty (unused) spaces at full utilization from 36% to ~21%.\n// 100% utilized table requires: N*8 + 0.2N*16 = 11.2N bytes or ~20 bytes savings.\n// 75% utilization: N*1.33*8 + 0.12N*16 = 13N or ~22 bytes savings per record.\n// with potential replacements of hset/zset data structures.\n// static_assert(sizeof(dictEntry) == 24);\n\nclass DenseSet {\n  struct DenseLinkKey;\n  // we can assume that high 12 bits of user address space\n  // can be used for tagging. At most 52 bits of address are reserved for\n  // some configurations, and usually it's 48 bits.\n  // https://docs.kernel.org/arch/arm64/memory.html\n  static constexpr size_t kLinkBit = 1ULL << 52;\n  static constexpr size_t kDisplaceBit = 1ULL << 53;\n  static constexpr size_t kDisplaceDirectionBit = 1ULL << 54;\n  static constexpr size_t kTtlBit = 1ULL << 55;\n  static constexpr size_t kTagMask = 4095ULL << 52;  // we reserve 12 high bits.\n\n  class DensePtr {\n   public:\n    explicit DensePtr(void* p = nullptr) : ptr_(p) {\n    }\n\n    // Imports the object with its metadata except the link bit that is reset.\n    static DensePtr From(DenseLinkKey* o) {\n      DensePtr res;\n      res.ptr_ = (void*)(o->uptr() & (~kLinkBit));\n      return res;\n    }\n\n    uint64_t uptr() const {\n      return uint64_t(ptr_);\n    }\n\n    bool IsObject() const {\n      return (uptr() & kLinkBit) == 0;\n    }\n\n    bool IsLink() const {\n      return (uptr() & kLinkBit) != 0;\n    }\n\n    bool HasTtl() const {\n      return (uptr() & kTtlBit) != 0;\n    }\n\n    bool IsEmpty() const {\n      return ptr_ == nullptr;\n    }\n\n    void* Raw() const {\n      return (void*)(uptr() & ~kTagMask);\n    }\n\n    bool IsDisplaced() const {\n      return (uptr() & kDisplaceBit) == kDisplaceBit;\n    }\n\n    void SetLink(DenseLinkKey* lk) {\n      ptr_ = (void*)(uintptr_t(lk) | kLinkBit);\n    }\n\n    void SetDisplaced(int direction) {\n      ptr_ = (void*)(uptr() | kDisplaceBit);\n      if (direction == 1) {\n        ptr_ = (void*)(uptr() | kDisplaceDirectionBit);\n      }\n    }\n\n    void ClearDisplaced() {\n      ptr_ = (void*)(uptr() & ~(kDisplaceBit | kDisplaceDirectionBit));\n    }\n\n    // returns 1 if the displaced node is right of the correct bucket and -1 if it is left\n    int GetDisplacedDirection() const {\n      return (uptr() & kDisplaceDirectionBit) == kDisplaceDirectionBit ? 1 : -1;\n    }\n\n    void SetTtl(bool b) {\n      if (b)\n        ptr_ = (void*)(uptr() | kTtlBit);\n      else\n        ptr_ = (void*)(uptr() & (~kTtlBit));\n    }\n\n    void Reset() {\n      ptr_ = nullptr;\n    }\n\n    void* GetObject() const {\n      if (IsObject()) {\n        return Raw();\n      }\n\n      return AsLink()->Raw();\n    }\n\n    // Sets pointer but preserves tagging info\n    void SetObject(void* obj) {\n      assert(IsObject());\n      ptr_ = (void*)((uptr() & kTagMask) | (uintptr_t(obj) & ~kTagMask));\n    }\n\n    DenseLinkKey* AsLink() {\n      return (DenseLinkKey*)Raw();\n    }\n\n    const DenseLinkKey* AsLink() const {\n      return (const DenseLinkKey*)Raw();\n    }\n\n    DensePtr* Next() {\n      if (!IsLink()) {\n        return nullptr;\n      }\n\n      return &AsLink()->next;\n    }\n\n    const DensePtr* Next() const {\n      if (!IsLink()) {\n        return nullptr;\n      }\n\n      return &AsLink()->next;\n    }\n\n   private:\n    void* ptr_ = nullptr;\n  };\n\n  struct DenseLinkKey : public DensePtr {\n    DensePtr next;  // could be LinkKey* or Object *.\n  };\n\n  static_assert(sizeof(DensePtr) == sizeof(uintptr_t));\n  static_assert(sizeof(DenseLinkKey) == 2 * sizeof(uintptr_t));\n\n protected:\n  using DensePtrAllocator = StatelessAllocator<DensePtr>;\n  using ChainVectorIterator = std::vector<DensePtr, DensePtrAllocator>::iterator;\n  using ChainVectorConstIterator = std::vector<DensePtr, DensePtrAllocator>::const_iterator;\n\n  class IteratorBase {\n    friend class DenseSet;\n\n   public:\n    IteratorBase(DenseSet* owner, ChainVectorIterator list_it, DensePtr* e)\n        : owner_(owner), curr_list_(list_it), curr_entry_(e) {\n    }\n\n    // returns the expiry time of the current entry or UINT32_MAX if no ttl is set.\n    uint32_t ExpiryTime() const {\n      return curr_entry_->HasTtl() ? owner_->ObjExpireTime(curr_entry_->GetObject()) : UINT32_MAX;\n    }\n\n    void SetExpiryTime(uint32_t ttl_sec);\n\n    bool HasExpiry() const {\n      return curr_entry_->HasTtl();\n    }\n\n   protected:\n    IteratorBase() : owner_(nullptr), curr_entry_(nullptr) {\n    }\n\n    IteratorBase(const DenseSet* owner, bool is_end);\n\n    void Advance();\n\n    DenseSet* owner_;\n    ChainVectorIterator curr_list_;\n    DensePtr* curr_entry_;\n  };\n\n public:\n  static constexpr uint32_t kMaxBatchLen = 32;\n\n  explicit DenseSet();\n  virtual ~DenseSet();\n\n  void Clear() {\n    ClearStep(0, entries_.size());\n  }\n\n  // Returns the next bucket index that should be cleared.\n  // Returns BucketCount when all objects are erased.\n  uint32_t ClearStep(uint32_t start, uint32_t count);\n\n  // Returns the number of elements in the map. Note that it might be that some of these elements\n  // have expired and can't be accessed.\n  size_t UpperBoundSize() const {\n    return size_;\n  }\n\n  // Returns an accurate size, post-expiration. O(n).\n  size_t SizeSlow();\n\n  bool Empty() const {\n    return size_ == 0;\n  }\n\n  size_t BucketCount() const {\n    return entries_.size();\n  }\n\n  size_t ObjMallocUsed() const {\n    return obj_malloc_used_;\n  }\n\n  size_t SetMallocUsed() const {\n    return entries_.capacity() * sizeof(DensePtr) + num_links_ * sizeof(DenseLinkKey);\n  }\n\n  using ItemCb = std::function<void(const void*)>;\n\n  uint32_t Scan(uint32_t cursor, const ItemCb& cb) const;\n  void Reserve(size_t sz);\n\n  // Shrinks the table to the specified size. The size must be a power of 2,\n  // >= kMinSize, and >= current number of elements.\n  // This method should be called explicitly when memory reclamation is needed.\n  void Shrink(size_t new_size);\n\n  void Fill(DenseSet* other) const;\n\n  // set an abstract time that allows expiry.\n  void set_time(uint32_t val) {\n    time_now_ = val;\n  }\n\n  uint32_t time_now() const {\n    return time_now_;\n  }\n\n  bool ExpirationUsed() const {\n    return expiration_used_;\n  }\n\n protected:\n  // Virtual functions to be implemented for generic data\n  virtual uint64_t Hash(const void* obj, uint32_t cookie) const = 0;\n  virtual bool ObjEqual(const void* left, const void* right, uint32_t right_cookie) const = 0;\n  virtual size_t ObjectAllocSize(const void* obj) const = 0;\n  virtual uint32_t ObjExpireTime(const void* obj) const = 0;\n  virtual void ObjUpdateExpireTime(const void* obj, uint32_t ttl_sec) = 0;\n  virtual void ObjDelete(void* obj, bool has_ttl) const = 0;\n  virtual void* ObjectClone(const void* obj, bool has_ttl, bool add_ttl) const = 0;\n\n  void CollectExpired();\n\n  bool EraseInternal(void* obj, uint32_t cookie) {\n    auto [prev, found] = Find(obj, BucketId(obj, cookie), cookie);\n    if (found) {\n      Delete(prev, found);\n      return true;\n    }\n    return false;\n  }\n\n  // Like EraseInternal but returns the detached object instead of deleting it.\n  // Returns nullptr if the object was not found.\n  void* DetachInternal(void* obj, uint32_t cookie) {\n    auto [prev, found] = Find(obj, BucketId(obj, cookie), cookie);\n    if (found) {\n      return Delete(prev, found, true);\n    }\n    return nullptr;\n  }\n\n  void* FindInternal(const void* obj, uint64_t hashcode, uint32_t cookie) const;\n\n  IteratorBase FindIt(const void* ptr, uint32_t cookie) {\n    if (Empty())\n      return IteratorBase{};\n\n    auto [bid, _, curr] = Find2(ptr, BucketId(ptr, cookie), cookie);\n    if (curr) {\n      return IteratorBase(this, entries_.begin() + bid, curr);\n    }\n    return IteratorBase{};\n  }\n\n  // Get iterator to start of random non-empty chain (bucket)\n  ChainVectorIterator GetRandomChain();\n\n  // Wrap RandomChain() into iterator and advance with reservoir sampling\n  IteratorBase GetRandomIterator();\n\n  void* PopInternal();\n\n  void IncreaseMallocUsed(size_t delta) {\n    obj_malloc_used_ += delta;\n  }\n\n  void DecreaseMallocUsed(size_t delta) {\n    obj_malloc_used_ -= delta;\n  }\n\n  // Returns the previous object if it has been replaced.\n  // nullptr, if obj was added.\n  void* AddOrReplaceObj(void* obj, bool has_ttl);\n\n  // Assumes that the object does not exist in the set.\n  void AddUnique(void* obj, bool has_ttl, uint64_t hashcode);\n\n  void Prefetch(uint64_t hash);\n\n private:\n  DenseSet(const DenseSet&) = delete;\n  DenseSet& operator=(DenseSet&) = delete;\n\n  bool Equal(DensePtr dptr, const void* ptr, uint32_t cookie) const;\n\n  struct CloneItem {\n    DensePtr ptr;\n    void* obj = nullptr;\n    bool has_ttl = false;\n  };\n\n  void CloneBatch(unsigned len, CloneItem* items, DenseSet* other) const;\n\n  using ClearItem = CloneItem;\n  void ClearBatch(unsigned len, ClearItem* items);\n\n  uint32_t BucketId(uint64_t hash) const {\n    assert(capacity_log_ > 0);\n    return hash >> (64 - capacity_log_);\n  }\n\n  uint32_t BucketId(const void* ptr, uint32_t cookie) const {\n    return BucketId(Hash(ptr, cookie));\n  }\n\n  // return a ChainVectorIterator (a.k.a iterator) or end if there is an empty chain found\n  ChainVectorIterator FindEmptyAround(uint32_t bid);\n\n  // Return if bucket has no item which is not displaced and right/left bucket has no displaced item\n  // belong to given bid\n  bool NoItemBelongsBucket(uint32_t bid) const;\n  void Grow(size_t prev_size);\n\n  // ============ Pseudo Linked List Functions for interacting with Chains ==================\n  size_t PushFront(ChainVectorIterator, void* obj, bool has_ttl);\n  void PushFront(ChainVectorIterator, DensePtr);\n\n  DensePtr PopPtrFront(ChainVectorIterator);\n\n  // ============ Pseudo Linked List in DenseSet end ==================\n\n  // returns (prev, item) pair. If item is root, then prev is null.\n  std::pair<DensePtr*, DensePtr*> Find(const void* ptr, uint32_t bid, uint32_t cookie) {\n    auto [_, p, c] = Find2(ptr, bid, cookie);\n    return {p, c};\n  }\n\n  // returns bid and (prev, item) pair. If item is root, then prev is null.\n  std::tuple<size_t, DensePtr*, DensePtr*> Find2(const void* ptr, uint32_t bid, uint32_t cookie);\n\n  DenseLinkKey* NewLink(void* data, DensePtr next);\n\n  inline void FreeLink(DenseLinkKey* plink) {\n    // deallocate the link if it is no longer a link as it is now in an empty list\n    DensePtrAllocator::resource()->deallocate(plink, sizeof(DenseLinkKey), alignof(DenseLinkKey));\n    --num_links_;\n  }\n\n  // Returns true if *node was deleted.\n  bool ExpireIfNeeded(DensePtr* prev, DensePtr* node) const {\n    if (node->HasTtl()) {\n      return ExpireIfNeededInternal(prev, node);\n    }\n    return false;\n  }\n\n  bool ExpireIfNeededInternal(DensePtr* prev, DensePtr* node) const;\n\n  // Deletes the object pointed by ptr and removes it from the set.\n  // If ptr is a link then it will be deleted internally.\n  // If detach is true, returns the raw object instead of calling ObjDelete.\n  void* Delete(DensePtr* prev, DensePtr* ptr, bool detach = false);\n\n  // Processes a single bucket during Shrink, relocating elements as needed.\n  void ShrinkBucket(size_t bucket_idx);\n\n  std::vector<DensePtr, DensePtrAllocator> entries_;\n\n  mutable size_t obj_malloc_used_ = 0;\n  mutable uint32_t size_ = 0;       // number of elements in the set.\n  mutable uint32_t num_links_ = 0;  // number of links in the set.\n  unsigned capacity_log_ = 0;\n\n  uint32_t time_now_ = 0;\n\n  mutable bool expiration_used_ = false;\n};\n\ninline void* DenseSet::FindInternal(const void* obj, uint64_t hashcode, uint32_t cookie) const {\n  if (entries_.empty())\n    return nullptr;\n\n  uint32_t bid = BucketId(hashcode);\n  DensePtr* ptr = const_cast<DenseSet*>(this)->Find(obj, bid, cookie).second;\n  return ptr ? ptr->GetObject() : nullptr;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/detail/bitpacking.cc",
    "content": "// Copyright 2022, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"src/core/detail/bitpacking.h\"\n\n#include <absl/base/internal/endian.h>\n\n#include \"base/logging.h\"\n#include \"core/sse_port.h\"\n\nusing namespace std;\n\nnamespace dfly {\n\nnamespace detail {\n\n#if defined(__GNUC__) && !defined(__clang__)\n#pragma GCC push_options\n#pragma GCC optimize(\"Ofast\")\n#endif\n\nstatic inline uint64_t Compress8x7bit(uint64_t x) {\n  x = ((x & 0x7F007F007F007F00) >> 1) | (x & 0x007F007F007F007F);\n  x = ((x & 0x3FFF00003FFF0000) >> 2) | (x & 0x00003FFF00003FFF);\n  x = ((x & 0x0FFFFFFF00000000) >> 4) | (x & 0x000000000FFFFFFF);\n\n  return x;\n}\n\n#if defined(__SSE3__) || defined(__aarch64__)\nstatic inline pair<const char*, uint8_t*> simd_variant1_pack(const char* ascii, const char* end,\n                                                             uint8_t* bin) {\n  __m128i val, rpart, lpart;\n\n  // Skips 8th byte (indexc 7) in the lower 8-byte part.\n  const __m128i control = _mm_set_epi8(-1, -1, 14, 13, 12, 11, 10, 9, 8, 6, 5, 4, 3, 2, 1, 0);\n\n  // Based on the question I asked here: https://stackoverflow.com/q/74831843/2280111\n  while (ascii <= end) {\n    val = mm_loadu_si128(reinterpret_cast<const __m128i*>(ascii));\n\n    /*\n    x = ((x & 0x7F007F007F007F00) >> 1) | (x & 0x007F007F007F007F);\n    x = ((x & 0x3FFF00003FFF0000) >> 2) | (x & 0x00003FFF00003FFF);\n    x = ((x & 0x0FFFFFFF00000000) >> 4) | (x & 0x000000000FFFFFFF);\n    */\n\n    rpart = _mm_and_si128(val, _mm_set1_epi64x(0x007F007F007F007F));\n    lpart = _mm_and_si128(val, _mm_set1_epi64x(0x7F007F007F007F00));\n    val = _mm_or_si128(_mm_srli_epi64(lpart, 1), rpart);\n\n    rpart = _mm_and_si128(val, _mm_set1_epi64x(0x00003FFF00003FFF));\n    lpart = _mm_and_si128(val, _mm_set1_epi64x(0x3FFF00003FFF0000));\n    val = _mm_or_si128(_mm_srli_epi64(lpart, 2), rpart);\n\n    rpart = _mm_and_si128(val, _mm_set1_epi64x(0x000000000FFFFFFF));\n    lpart = _mm_and_si128(val, _mm_set1_epi64x(0x0FFFFFFF00000000));\n    val = _mm_or_si128(_mm_srli_epi64(lpart, 4), rpart);\n\n    val = _mm_shuffle_epi8(val, control);\n    _mm_storeu_si128(reinterpret_cast<__m128i*>(bin), val);\n    bin += 14;\n    ascii += 16;\n  }\n\n  return make_pair(ascii, bin);\n}\n\nstatic inline pair<const char*, uint8_t*> simd_variant2_pack(const char* ascii, const char* end,\n                                                             uint8_t* bin) {\n  // Skips 8th byte (indexc 7) in the lower 8-byte part.\n  const __m128i control = _mm_set_epi8(-1, -1, 14, 13, 12, 11, 10, 9, 8, 6, 5, 4, 3, 2, 1, 0);\n\n  __m128i val, rpart, lpart;\n\n  // Based on the question I asked here: https://stackoverflow.com/q/74831843/2280111\n  while (ascii <= end) {\n    val = mm_loadu_si128(reinterpret_cast<const __m128i*>(ascii));\n\n    /*\n    x = ((x & 0x7F007F007F007F00) >> 1) | (x & 0x007F007F007F007F);\n    x = ((x & 0x3FFF00003FFF0000) >> 2) | (x & 0x00003FFF00003FFF);\n    x = ((x & 0x0FFFFFFF00000000) >> 4) | (x & 0x000000000FFFFFFF);\n    */\n    val = _mm_maddubs_epi16(_mm_set1_epi16(0x8001), val);\n    val = _mm_madd_epi16(_mm_set1_epi32(0x40000001), val);\n\n    rpart = _mm_and_si128(val, _mm_set1_epi64x(0x000000000FFFFFFF));\n    lpart = _mm_and_si128(val, _mm_set1_epi64x(0x0FFFFFFF00000000));\n    val = _mm_or_si128(_mm_srli_epi64(lpart, 4), rpart);\n\n    val = _mm_shuffle_epi8(val, control);\n    _mm_storeu_si128(reinterpret_cast<__m128i*>(bin), val);\n    bin += 14;\n    ascii += 16;\n  }\n  return make_pair(ascii, bin);\n}\n\n#endif\n\n// Daniel Lemire's function validate_ascii_fast() - under Apache/MIT license.\n// See https://github.com/lemire/fastvalidate-utf-8/\n// The function returns true (1) if all chars passed in src are\n// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).\n#ifdef __s390x__\nbool validate_ascii_fast(const char* src, size_t len) {\n  size_t i = 0;\n\n  // Initialize a vector in which all the elements are set to zero.\n  vector unsigned char has_error = vec_splat_s8(0);\n  if (len >= 16) {\n    for (; i <= len - 16; i += 16) {\n      // Load 16 bytes from buffer into a vector.\n      vector unsigned char current_bytes = vec_load_len((signed char*)(src + i), 16);\n      // Perform a bitwise OR operation between the current and the previously loaded contents.\n      has_error = vec_orc(has_error, current_bytes);\n    }\n  }\n\n  // Initialize a vector in which all the elements are set to an invalid ASCII value.\n  vector unsigned char rep_invalid_values = vec_splat_s8(0x80);\n\n  // Perform bitwise AND-complement operation between two vectors.\n  vector unsigned char andc_result = vec_andc(rep_invalid_values, has_error);\n\n  // Tests whether any of corresponding elements of the given vectors are not equal.\n  // After the bitwise operation, both vectors should be equal if ASCII values.\n  if (!vec_all_eq(rep_invalid_values, andc_result)) {\n    return false;\n  }\n\n  for (; i < len; i++) {\n    if (src[i] & 0x80) {\n      return false;\n    }\n  }\n\n  return true;\n}\n#else\nbool validate_ascii_fast(const char* src, size_t len) {\n  size_t i = 0;\n  __m128i has_error = _mm_setzero_si128();\n  if (len >= 16) {\n    for (; i <= len - 16; i += 16) {\n      __m128i current_bytes = mm_loadu_si128((const __m128i*)(src + i));\n      has_error = _mm_or_si128(has_error, current_bytes);\n    }\n  }\n  int error_mask = _mm_movemask_epi8(has_error);\n\n  char tail_has_error = 0;\n  for (; i < len; i++) {\n    tail_has_error |= src[i];\n  }\n  error_mask |= (tail_has_error & 0x80);\n\n  return !error_mask;\n}\n#endif\n\n// len must be at least 16\nvoid ascii_pack(const char* ascii, size_t len, uint8_t* bin) {\n  uint64_t val;\n  const char* end = ascii + len;\n\n  while (ascii + 8 <= end) {\n    val = absl::little_endian::Load64(ascii);\n    uint64_t dest = (val & 0xFF);\n    for (unsigned i = 1; i <= 7; ++i) {\n      val >>= 1;\n      dest |= (val & (0x7FUL << 7 * i));\n    }\n    memcpy(bin, &dest, 7);\n    bin += 7;\n    ascii += 8;\n  }\n\n  // epilog - we do not pack since we have less than 8 bytes.\n  while (ascii < end) {\n    *bin++ = *ascii++;\n  }\n}\n\nvoid ascii_pack2(const char* ascii, size_t len, uint8_t* bin) {\n  uint64_t val;\n  const char* end = ascii + len;\n\n  while (ascii + 8 <= end) {\n    val = absl::little_endian::Load64(ascii);\n    val = Compress8x7bit(val);\n    memcpy(bin, &val, 7);\n    bin += 7;\n    ascii += 8;\n  }\n\n  // epilog - we do not pack since we have less than 8 bytes.\n  while (ascii < end) {\n    *bin++ = *ascii++;\n  }\n}\n\n// The algo - do in parallel what ascii_pack does on two uint64_t integers\nvoid ascii_pack_simd(const char* ascii, size_t len, uint8_t* bin) {\n#if defined(__SSE3__) || defined(__aarch64__)\n  // I leave out 16 bytes in addition to 16 that we load in the loop\n  // because we store into bin full 16 bytes instead of 14. To prevent data\n  // overwrite we finish loop one iteration earlier.\n  const char* end = ascii + len - 32;\n\n  tie(ascii, bin) = simd_variant1_pack(ascii, end, bin);\n\n  end += 32;  // Bring back end.\n  DCHECK(ascii < end);\n  ascii_pack(ascii, end - ascii, bin);\n#else\n  ascii_pack(ascii, len, bin);\n#endif\n}\n\nvoid ascii_pack_simd2(const char* ascii, size_t len, uint8_t* bin) {\n#if defined(__SSE3__) || defined(__aarch64__)\n  // I leave out 16 bytes in addition to 16 that we load in the loop\n  // because we store into bin full 16 bytes instead of 14. To prevent data\n  // overwrite we finish loop one iteration earlier.\n  const char* end = ascii + len - 32;\n\n  // on arm var\n#if defined(__aarch64__)\n  tie(ascii, bin) = simd_variant1_pack(ascii, end, bin);\n#else\n  tie(ascii, bin) = simd_variant2_pack(ascii, end, bin);\n#endif\n\n  end += 32;  // Bring back end.\n  DCHECK(ascii < end);\n  ascii_pack(ascii, end - ascii, bin);\n#else\n  ascii_pack(ascii, len, bin);\n#endif\n}\n\n// unpacks 8->7 encoded blob back to ascii.\n// generally, we can not unpack inplace because ascii (dest) buffer is 8/7 bigger than\n// the source buffer.\n// however, if binary data is positioned on the right of the ascii buffer with empty space on the\n// left than we can unpack inplace.\nvoid ascii_unpack(const uint8_t* bin, size_t ascii_len, char* ascii) {\n  constexpr uint8_t kM = 0x7F;\n  uint8_t p = 0;\n  unsigned i = 0;\n\n  while (ascii_len >= 8) {\n    for (i = 0; i < 7; ++i) {\n      uint8_t src = *bin;  // keep on stack in case we unpack inplace.\n      *ascii++ = (p >> (8 - i)) | ((src << i) & kM);\n      p = src;\n      ++bin;\n    }\n\n    ascii_len -= 8;\n    *ascii++ = p >> 1;\n  }\n\n  DCHECK_LT(ascii_len, 8u);\n  for (i = 0; i < ascii_len; ++i) {\n    *ascii++ = *bin++;\n  }\n}\n\nuint8_t ascii_unpack_byte(const uint8_t* bin, size_t ascii_len, size_t idx) {\n  DCHECK(idx < ascii_len) << \"Index oob for ascii byte unpacking: \" << idx << \" >= \" << ascii_len;\n  const size_t packed_groups = ascii_len / 8;\n  const size_t group = idx / 8;\n  const size_t idx_in_group = idx % 8;\n\n  // Tail bytes (after the last full 8-char group) are stored unpacked.\n  if (group >= packed_groups) {\n    return bin[packed_groups * 7 + idx_in_group];\n  }\n\n  // Unpack ascii group and return byte at idx.\n  char buf[8];\n  ascii_unpack(bin + group * 7, 8, buf);\n  return buf[idx_in_group];\n}\n\nvoid ascii_pack_byte(uint8_t* bin, size_t ascii_len, size_t idx, uint8_t val) {\n  DCHECK(idx < ascii_len) << \"Index oob for ascii byte packing: \" << idx << \" >= \" << ascii_len;\n  DCHECK_LT(val, 128u) << \"Only 7-bit ASCII values can be packed\";\n\n  const size_t packed_groups = ascii_len / 8;\n  const size_t group = idx / 8;\n  const size_t idx_in_group = idx % 8;\n\n  // Tail bytes (after the last full 8-char group) are stored unpacked.\n  if (group >= packed_groups) {\n    bin[packed_groups * 7 + idx_in_group] = val;\n    return;\n  }\n\n  // Unpack ascii group and return, modify byte at idx and pack back.\n  uint8_t* group_bin = bin + group * 7;\n  char buf[8];\n  ascii_unpack(group_bin, 8, buf);\n  buf[idx_in_group] = val;\n  ascii_pack(buf, 8, group_bin);\n}\n\n// See CompactObjectTest.AsanTriggerReadOverflow for more details.\nvoid ascii_unpack_simd(const uint8_t* bin, size_t ascii_len, char* ascii) {\n#if defined(__SSE3__) || defined(__aarch64__)\n\n  if (ascii_len < 18) {  // ascii_len >=18 means bin length >=16.\n    ascii_unpack(bin, ascii_len, ascii);\n    return;\n  }\n\n  __m128i val, rpart, lpart;\n\n  // we read 16 bytes from bin even when we need only 14 bytes.\n  // So for last iteration we may access 2 bytes outside of the bin buffer.\n  // To prevent this we need to round down the length of the bin buffer but since we\n  // limit by ascii_len we reduce the ascii_len by two before computing number of iterations.\n  size_t simd_len = ((ascii_len - 2) / 16) * 16;\n  const char* end = ascii + simd_len;\n\n  // shifts the second 7-byte blob to the left.\n  const __m128i control = _mm_set_epi8(14, 13, 12, 11, 10, 9, 8, 7, -1, 6, 5, 4, 3, 2, 1, 0);\n\n  while (ascii < end) {\n    val = mm_loadu_si128(reinterpret_cast<const __m128i*>(bin));\n    val = _mm_shuffle_epi8(val, control);\n\n    rpart = _mm_and_si128(val, _mm_set1_epi64x(0x000000000FFFFFFF));\n    lpart = _mm_and_si128(val, _mm_set1_epi64x(0x00FFFFFFF0000000));\n    val = _mm_or_si128(_mm_slli_epi64(lpart, 4), rpart);\n\n    rpart = _mm_and_si128(val, _mm_set1_epi64x(0x00003FFF00003FFF));\n    lpart = _mm_and_si128(val, _mm_set1_epi64x(0xFFFFC000FFFFC000));\n    val = _mm_or_si128(_mm_slli_epi64(lpart, 2), rpart);\n\n    rpart = _mm_and_si128(val, _mm_set1_epi64x(0x007F007F007F007F));\n    lpart = _mm_and_si128(val, _mm_set1_epi64x(0x7F807F807F807F80));\n    val = _mm_or_si128(_mm_slli_epi64(lpart, 1), rpart);\n\n    _mm_storeu_si128(reinterpret_cast<__m128i*>(ascii), val);\n    ascii += 16;\n    bin += 14;\n  }\n\n  ascii_len -= simd_len;\n  if (ascii_len)\n    ascii_unpack(bin, ascii_len, ascii);\n#else\n  ascii_unpack(bin, ascii_len, ascii);\n#endif\n}\n\n// compares packed and unpacked strings. packed must be of length = binpacked_len(ascii_len).\nbool compare_packed(const uint8_t* packed, const char* ascii, size_t ascii_len) {\n  unsigned i = 0;\n  bool res = true;\n  const char* end = ascii + ascii_len;\n\n  while (ascii + 8 <= end) {\n    for (i = 0; i < 7; ++i) {\n      uint8_t conv = (ascii[0] >> i) | (ascii[1] << (7 - i));\n      res &= (conv == *packed);\n      ++ascii;\n      ++packed;\n    }\n\n    if (!res)\n      return false;\n\n    ++ascii;\n  }\n\n  while (ascii < end) {\n    if (*ascii++ != *packed++) {\n      return false;\n    }\n  }\n\n  return true;\n}\n\n#if defined(__GNUC__) && !defined(__clang__)\n#pragma GCC pop_options\n#endif\n\n}  // namespace detail\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/detail/bitpacking.h",
    "content": "// Copyright 2022, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstddef>\n#include <cstdint>\n\nnamespace dfly {\n\nnamespace detail {\n\nbool validate_ascii_fast(const char* src, size_t len);\n\n// unpacks 8->7 encoded blob back to ascii.\n// generally, we can not unpack inplace because ascii (dest) buffer is 8/7 bigger than\n// the source buffer.\n// however, if binary data is positioned on the right of the ascii buffer with empty space on the\n// left than we can unpack inplace.\nvoid ascii_unpack(const uint8_t* bin, size_t ascii_len, char* ascii);\nvoid ascii_unpack_simd(const uint8_t* bin, size_t ascii_len, char* ascii);\n\n// Access a single byte in a 7-bit ASCII-packed string without unpacking the entire buffer.\n// These helpers read/write the ASCII byte at logical position `idx` in the unpacked string\n// directly from/into the packed `bin` representation.\n// It's up to caller to verify:\n// `1. idx` must be less than `ascii_len` to avoid out-of-bounds access.\n// 2. `ascii` must be less than 128 (7-bit ASCII) for packing.\nuint8_t ascii_unpack_byte(const uint8_t* bin, size_t ascii_len, size_t idx);\nvoid ascii_pack_byte(uint8_t* bin, size_t ascii_len, size_t idx, uint8_t ascii);\n\n// packs ascii string (does not verify) into binary form saving 1 bit per byte on average (12.5%).\nvoid ascii_pack(const char* ascii, size_t len, uint8_t* bin);\nvoid ascii_pack2(const char* ascii, size_t len, uint8_t* bin);\n\n// SIMD implementation 1 of ascii_pack.\nvoid ascii_pack_simd(const char* ascii, size_t len, uint8_t* bin);\n\n// SIMD implementation 2 of ascii_pack.\nvoid ascii_pack_simd2(const char* ascii, size_t len, uint8_t* bin);\n\nbool compare_packed(const uint8_t* packed, const char* ascii, size_t ascii_len);\n\n// maps ascii len to 7-bit packed length. Each 8 bytes are converted to 7 bytes.\ninline constexpr size_t binpacked_len(size_t ascii_len) {\n  return (ascii_len * 7 + 7) / 8; /* rounded up */\n}\n\n// converts 7-bit packed length back to ascii length. Note that this conversion\n// is not accurate since it maps 7 bytes to 8 bytes (rounds up), while we may have\n// 7 byte strings converted to 7 byte as well.\ninline constexpr size_t ascii_len(size_t bin_len) {\n  return (bin_len * 8) / 7;\n}\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/detail/bptree_internal.h",
    "content": "// Copyright 2023, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <array>\n#include <cassert>\n#include <cstdint>\n#include <cstring>\n\nnamespace dfly {\n\ntemplate <typename T, typename Policy> class BPTree;\n\nnamespace detail {\n\n// Internal classes related to B+tree implementation. The design is largely based on the\n// implementation of absl::bPtree_map/set.\n// The motivation for replacing zskiplist - significant size reduction:\n//   we reduce the metadata overhead per record from 45 bytes in zskiplist to just a\n//   few bytes with b-tree. The trick is using significantly large nodes (256 bytes) so that\n//   their overhead is negligible compared to the items they store.\n//   Why not use absl::bPtree_set? We must support Rank tree functionality that\n//   absl does not supply.\n//   Hacking into absl is not a simple task, implementing our own tree is easier.\n// Below some design decisions:\n// 1. We use predefined node size of 256 bytes and derive number of items in each node from it.\n//    Inner nodes have less items than leaf nodes because they also need to store child pointers.\n// 2. BPTreeNode does not predeclare fields besides the 8 bytes metadata - everything else is\n//    calculated at run-time and has dedicated accessors (similarly to absl). This allows\n//    dense and efficient representation of tree nodes.\n// 3. We assume that we store small items (8, 16 bytes) which will have a large branching\n//    factor (248/16), meaning the tree will stay shallow even for sizes reaching billion nodes.\n// 4. We do not store parent pointer like in absl tree. Instead we use BPTreePath to store\n//    hierarchy of parent nodes. That should reduce our overhead even further by few bits per item.\n// 5. We assume we store trivially copyable types - this reduces the\n//    complexity of the generics in the code.\n// 6. We support pmr memory resource. This allows us to use pluggable heaps.\n//\n// TODO: (all the ideas taken from absl implementation)\n//       1. to introduce slices when removing items from the tree (avoid shifts).\n//       2. to avoid merging/rebalancing when removing max/min items from the tree.\n//       3. Small tree optimization: when the tree is small with a single root node, we can\n//          allocate less then 256 bytes (special case) to avoid relative blowups in memory for\n//          small trees.\n\nconstexpr uint16_t kBPNodeSize = 256;\n\n/**\n * @brief The BPNodeLayout class is a helper class that defines the layout of the B+tree node.\n *        The inner node looks like this:\n *        | 4 bytes metadata | keys ... | 4 bytes tree-count | children nodes |\n *        The leaf node looks like this:\n *        | 4 bytes metadata | keys ... |\n *\n * @tparam T\n */\ntemplate <typename T> class BPNodeLayout {\n  static_assert(std::is_trivially_copyable<T>::value, \"KeyT must be triviall copyable\");\n\n  static constexpr uint16_t kKeyOffset = 4;                  // 4 bytes for metadata\n  static constexpr uint16_t kSubTreeLen = sizeof(uint32_t);  // 4 bytes for count.\n public:\n  static constexpr uint16_t kKeySize = sizeof(T);\n  static constexpr uint16_t kMaxLeafKeys = (kBPNodeSize - kKeyOffset) / kKeySize;\n  static constexpr uint16_t kMinLeafKeys = kMaxLeafKeys / 2;\n\n  // internal node:\n  // x slots, (x+1) children: x * kKeySize + (x+1) * sizeof(BPTreeNode*) = x * (kKeySize + 8) + 8\n  // x = (kBPNodeSize - kInnerKeyOffset - 8) / (kKeySize + 8)\n  static constexpr uint16_t kMaxInnerKeys =\n      (kBPNodeSize - sizeof(void*) - kKeyOffset - kSubTreeLen) / (kKeySize + sizeof(void*));\n  static constexpr uint16_t kMinInnerKeys = kMaxInnerKeys / 2;\n\n  using KeyT = T;\n\n  // The class is constructed inside a block of memory of size kBPNodeSize.\n  // Only BPTree can create it, hence it can access the memory outside its fields.\n  static uint8_t* KeyPtr(unsigned index, void* node) {\n    return reinterpret_cast<uint8_t*>(node) + kKeyOffset + kKeySize * index;\n  }\n\n  static const uint8_t* KeyPtr(unsigned index, const void* node) {\n    return reinterpret_cast<const uint8_t*>(node) + kKeyOffset + kKeySize * index;\n  }\n\n  static uint8_t* TreeCountPtr(void* node) {\n    return reinterpret_cast<uint8_t*>(node) + kKeyOffset + kKeySize * kMaxInnerKeys;\n  }\n\n  static const uint8_t* TreeCountPtr(const void* node) {\n    return reinterpret_cast<const uint8_t*>(node) + kKeyOffset + kKeySize * kMaxInnerKeys;\n  }\n\n  static uint8_t* ChildrenStart(void* node) {\n    return TreeCountPtr(node) + kSubTreeLen;\n  }\n\n  static const uint8_t* ChildrenStart(const void* node) {\n    return TreeCountPtr(node) + kSubTreeLen;\n  }\n\n  static_assert(kMaxLeafKeys < 128);\n};\n\ntemplate <typename T> class BPTreeNode {\n  template <typename K, typename Policy> friend class ::dfly::BPTree;\n\n  BPTreeNode(const BPTreeNode&) = delete;\n  BPTreeNode& operator=(const BPTreeNode&) = delete;\n\n  BPTreeNode(bool leaf) : num_items_(0), leaf_(leaf) {\n  }\n\n  using Layout = BPNodeLayout<T>;\n\n public:\n  using KeyT = T;\n\n  void InitSingle(T key) {\n    SetKey(0, key);\n    num_items_ = 1;\n  }\n\n  KeyT Key(unsigned index) const {\n    KeyT res;\n    memcpy(&res, Layout::KeyPtr(index, this), sizeof(KeyT));\n    return res;\n  }\n\n  void SetKey(size_t index, KeyT item) {\n    uint8_t* slot = Layout::KeyPtr(index, this);\n    memcpy(slot, &item, sizeof(KeyT));\n  }\n\n  bool IsLeaf() const {\n    return leaf_;\n  }\n\n  struct SearchResult {\n    uint16_t index;\n    bool found;\n  };\n\n  // Searches for key in the node using binary search.\n  // Returns SearchResult with index of the smallest key for which comp(key) >=0.\n  // comp: is a three way comparator.\n  template <typename Comp> SearchResult BSearch(Comp&& comp) const;\n\n  void Split(BPTreeNode* right, KeyT* median);\n\n  unsigned NumItems() const {\n    return num_items_;\n  }\n\n  unsigned AvailableSlotCount() const {\n    return MaxItems() - num_items_;\n  }\n\n  unsigned MaxItems() const {\n    return IsLeaf() ? Layout::kMaxLeafKeys : Layout::kMaxInnerKeys;\n  }\n\n  unsigned MinItems() const {\n    return IsLeaf() ? Layout::kMinLeafKeys : Layout::kMinInnerKeys;\n  }\n\n  // Returns the overall number of iterms for a subtree rooted at this node.\n  // Equals to NumItems() for leaf nodes and GetInnerTreeCount() for inner nodes.\n  uint32_t TreeCount() const {\n    return IsLeaf() ? NumItems() : GetInnerTreeCount();\n  }\n\n  void ShiftRight(unsigned index);\n  void ShiftLeft(unsigned index, bool child_step_right = false);\n\n  void LeafEraseRight() {\n    assert(IsLeaf() && num_items_ > 0);\n    --num_items_;\n  }\n\n  // Inserts item into a leaf node.\n  // Assumes: the node is IsLeaf() and has some space.\n  void LeafInsert(unsigned index, KeyT item) {\n    assert(IsLeaf() && NumItems() < MaxItems());\n    InsertItem(index, item);\n  }\n\n  void Validate(KeyT upper_bound) const;\n\n  //\n  // Below is the inner node API\n  //\n\n  BPTreeNode* Child(unsigned i) {\n    BPTreeNode* res;\n    memcpy(&res, Layout::ChildrenStart(this) + sizeof(BPTreeNode*) * i, sizeof(BPTreeNode*));\n    return res;\n  }\n\n  const BPTreeNode* Child(unsigned i) const {\n    BPTreeNode* res;\n    memcpy(&res, Layout::ChildrenStart(this) + sizeof(BPTreeNode*) * i, sizeof(BPTreeNode*));\n    return res;\n  }\n\n  void SetChild(unsigned i, BPTreeNode* child) {\n    memcpy(Layout::ChildrenStart(this) + sizeof(BPTreeNode*) * i, &child, sizeof(BPTreeNode*));\n  }\n\n  // TODO: instead of storing counts at nodes we could keep at parent level\n  //       along the children array. Unfortunately, this complicates implementation of the tree,\n  //       so we will do it after the whole functionality is completed.\n  uint32_t GetChildTreeCount(unsigned i) {\n    return Child(i)->TreeCount();\n  }\n\n  void SetChildTreeCount(unsigned i, uint32_t cnt) {\n    Child(i)->SetTreeCount(cnt);\n  }\n\n  void IncreaseTreeCount(int32_t delta) {\n    uint32_t cnt = GetInnerTreeCount();\n    cnt += delta;\n    memcpy(Layout::TreeCountPtr(this), &cnt, sizeof(uint32_t));\n  }\n\n  // Rebalance a full child at position pos, at which we tried to insert at insert_pos.\n  // Returns the node and the position to insert into if rebalancing succeeded.\n  // Returns nullptr if rebalancing did not succeed.\n  std::pair<BPTreeNode*, unsigned> RebalanceChild(unsigned pos, unsigned insert_pos);\n\n  // We do not update tree count and it is done on the caller side.\n  // Inserts item into a inner node at position pos and adds `child` at position pos+1.\n  void InnerInsert(unsigned index, KeyT item, BPTreeNode* child) {\n    InsertItem(index, item);\n    SetChild(index + 1, child);\n  }\n\n  // Tries to merge the child at position pos with its sibling.\n  // If we did not succeed to merge, we try to rebalance.\n  // Returns retired BPTreeNode* if children got merged and this parent node's children\n  // count decreased, otherwise, we return nullptr (rebalanced).\n  BPTreeNode* MergeOrRebalanceChild(unsigned pos);\n\n  uint32_t DEBUG_TreeCount() const {\n    uint32_t res = NumItems();\n    if (!IsLeaf()) {\n      for (unsigned i = 0; i <= NumItems(); ++i) {\n        res += Child(i)->DEBUG_TreeCount();\n      }\n    }\n    return res;\n  }\n\n private:\n  void SetTreeCount(uint32_t cnt) {\n    assert(!IsLeaf());\n    memcpy(Layout::TreeCountPtr(this), &cnt, sizeof(uint32_t));\n  }\n\n  void RebalanceChildToLeft(unsigned child_pos, unsigned count);\n  void RebalanceChildToRight(unsigned child_pos, unsigned count);\n\n  void MergeFromRight(KeyT key, BPTreeNode* right);\n\n  void InsertItem(unsigned index, KeyT item) {\n    assert(index <= num_items_);\n\n    ShiftRight(index);\n    SetKey(index, item);\n  }\n\n  uint32_t GetInnerTreeCount() const {\n    assert(!IsLeaf());\n    uint32_t res;\n    memcpy(&res, Layout::TreeCountPtr(this), sizeof(uint32_t));\n    return res;\n  }\n\n  struct {\n    uint32_t num_items_ : 7;\n    uint32_t leaf_ : 1;\n    uint32_t : 24;\n  };\n};\n\n// Contains parent/index pairs. Meaning that node0->Child(index0) == node1.\ntemplate <typename T> class BPTreePath {\n  static constexpr unsigned kMaxDepth = 16;\n\n public:\n  void Push(BPTreeNode<T>* node, unsigned pos) {\n    assert(depth_ < kMaxDepth);\n    assert(depth_ == 0 || !record_[depth_ - 1].node->IsLeaf());\n    record_[depth_].node = node;\n    record_[depth_].pos = pos;\n    depth_++;\n  }\n\n  unsigned Depth() const {\n    return depth_;\n  }\n\n  void Clear() {\n    depth_ = 0;\n  }\n\n  bool Empty() const {\n    return depth_ == 0;\n  }\n\n  std::pair<BPTreeNode<T>*, unsigned> Last() const {\n    assert(depth_ > 0u);\n    return {record_[depth_ - 1].node, record_[depth_ - 1].pos};\n  }\n\n  BPTreeNode<T>* Node(unsigned i) const {\n    assert(i < depth_);\n    return record_[i].node;\n  }\n\n  unsigned Position(unsigned i) const {\n    assert(i < depth_);\n    return record_[i].pos;\n  }\n\n  void Pop() {\n    assert(depth_ > 0u);\n    depth_--;\n  }\n\n  bool HasValidTerminal() const {\n    return depth_ > 0u && Last().second < Last().first->NumItems();\n  }\n\n  T Terminal() const {\n    assert(Last().second < Last().first->NumItems());\n    return Last().first->Key(Last().second);\n  }\n\n  /// @brief Returns the rank of the path's terminal item.\n  /// Requires that the path is valid and has a terminal item.\n  uint32_t Rank() const;\n\n  /// @brief Advances the path to the next item.\n  /// @return true if succeeded, false if reached the end.\n  bool Next();\n\n  /// @brief Advances the path to the previous item.\n  /// @return true if succeeded, false if reached the end.\n  bool Prev();\n\n  // Extend the path to the leaf by always taking the rightmost child.\n  void DigRight();\n\n private:\n  struct Record {\n    BPTreeNode<T>* node;\n    unsigned pos;\n  };\n\n  std::array<Record, kMaxDepth> record_;\n  unsigned depth_ = 0;\n};\n\n// Returns the position of the first item whose key is greater or equal than key.\n// if all items are smaller than key, returns num_items_.\ntemplate <typename T>\ntemplate <typename Comp>\nauto BPTreeNode<T>::BSearch(Comp&& cmp_op) const -> SearchResult {\n  uint16_t lo = 0;\n  uint16_t hi = num_items_;\n  assert(hi > 0);\n\n  // optimization: check the last item first.\n  int cmp_res = cmp_op(Key(hi - 1));\n  if (cmp_res >= 0) {\n    return cmp_res > 0 ? SearchResult{.index = hi, .found = false}\n                       : SearchResult{.index = uint16_t(hi - 1), .found = true};\n  }\n\n  // key < Key(hi - 1)\n\n  --hi;\n  while (lo < hi) {\n    uint16_t mid = (lo + hi) >> 1;\n    assert(mid < hi);\n\n    KeyT item = Key(mid);\n\n    int cmp_res = cmp_op(item);\n    if (cmp_res == 0) {\n      return SearchResult{.index = mid, .found = true};\n    }\n\n    if (cmp_res < 0) {\n      hi = mid;\n    } else {\n      lo = mid + 1;  // we never return indices upto mid because they are strictly less than key.\n    }\n  }\n  assert(lo == hi);\n\n  return {.index = hi, .found = false};\n}\n\ntemplate <typename T> void BPTreeNode<T>::ShiftRight(unsigned index) {\n  unsigned num_items_to_shift = num_items_ - index;\n  if (num_items_to_shift > 0) {\n    uint8_t* ptr = Layout::KeyPtr(index, this);\n    memmove(ptr + Layout::kKeySize, ptr, num_items_to_shift * Layout::kKeySize);\n\n    if (!IsLeaf()) {\n      uint8_t* src = Layout::ChildrenStart(this) + index * sizeof(BPTreeNode*);\n      uint8_t* dest = src + sizeof(BPTreeNode*);\n      memmove(dest, src, (num_items_to_shift + 1) * sizeof(BPTreeNode*));\n    }\n  }\n  num_items_++;\n}\n\ntemplate <typename T> void BPTreeNode<T>::ShiftLeft(unsigned index, bool child_step_right) {\n  assert(index < num_items_);\n\n  unsigned num_items_to_shift = num_items_ - index - 1;\n  if (num_items_to_shift > 0) {\n    memmove(Layout::KeyPtr(index, this), Layout::KeyPtr(index + 1, this),\n            num_items_to_shift * Layout::kKeySize);\n    if (!leaf_) {\n      index += unsigned(child_step_right);\n      num_items_to_shift = num_items_ - index;\n      if (num_items_to_shift > 0) {\n        uint8_t* dest = Layout::ChildrenStart(this) + index * sizeof(BPTreeNode*);\n        uint8_t* src = dest + sizeof(BPTreeNode*);\n        memmove(dest, src, num_items_to_shift * sizeof(BPTreeNode*));\n      }\n    }\n  }\n  num_items_--;\n}\n\n/***\n *  Rebalances the (full) child at position pos with its sibling. `this` node is an inner node.\n *  It first tried to rebalance (move items) from the full child to its left sibling. If the left\n *  sibling does not have enough space, it tries to rebalance to the right sibling. The caller\n *  passes the original position of the item it tried to insert into the full child. In case the\n *  rebalance succeeds the function returns the new node and the position to insert into. Otherwise,\n *  it returns result.first == nullptr.\n */\ntemplate <typename T>\nstd::pair<BPTreeNode<T>*, unsigned> BPTreeNode<T>::RebalanceChild(unsigned pos,\n                                                                  unsigned insert_pos) {\n  unsigned to_move = 0;\n  BPTreeNode* node = Child(pos);\n\n  if (pos > 0) {\n    BPTreeNode* left = Child(pos - 1);\n    unsigned dest_free = left->AvailableSlotCount();\n    if (dest_free > 0) {\n      // We bias rebalancing based on the position being inserted. If we're\n      // inserting at the end of the right node then we bias rebalancing to\n      // fill up the left node.\n      if (insert_pos == node->NumItems()) {\n        to_move = dest_free;\n        assert(to_move < node->NumItems());\n      } else if (dest_free > 1) {\n        // we move less than left free capacity which leaves as some space in the node.\n        to_move = dest_free / 2;\n      }\n\n      if (to_move) {\n        unsigned dest_old_count = left->NumItems();\n        RebalanceChildToLeft(pos, to_move);\n        assert(node->AvailableSlotCount() == to_move);\n        if (insert_pos < to_move) {\n          assert(left->AvailableSlotCount() > 0u);       // we did not fill up the left node.\n          insert_pos = dest_old_count + insert_pos + 1;  // +1 because we moved the separator.\n          node = left;\n        } else {\n          insert_pos -= to_move;\n        }\n\n        return {node, insert_pos};\n      }\n    }\n  }\n\n  if (pos < NumItems()) {\n    BPTreeNode* right = Child(pos + 1);\n    unsigned dest_free = right->AvailableSlotCount();\n    if (dest_free > 0) {\n      if (insert_pos == 0) {\n        to_move = dest_free;\n        assert(to_move < node->NumItems());\n      } else if (dest_free > 1) {\n        to_move = dest_free / 2;\n      }\n\n      if (to_move) {\n        RebalanceChildToRight(pos, to_move);\n        if (insert_pos > node->NumItems()) {\n          insert_pos -= (node->NumItems() + 1);\n          node = right;\n        }\n        return {node, insert_pos};\n      }\n    }\n  }\n  return {nullptr, 0};\n}\n\ntemplate <typename T> void BPTreeNode<T>::RebalanceChildToLeft(unsigned child_pos, unsigned count) {\n  assert(child_pos > 0u);\n  BPTreeNode* src = Child(child_pos);\n  BPTreeNode* dest = Child(child_pos - 1);\n  assert(src->NumItems() >= count);\n  assert(count >= 1u);\n  assert(dest->AvailableSlotCount() >= count);\n\n  unsigned dest_items = dest->NumItems();\n\n  // Move the delimiting value to the left node.\n  dest->SetKey(dest_items, Key(child_pos - 1));\n\n  // Copy src keys [0, count-1] to dest keys [dest_items+1, dest_items+count].\n  for (unsigned i = 1; i < count; ++i) {\n    dest->SetKey(dest_items + i, src->Key(i - 1));\n  }\n\n  SetKey(child_pos - 1, src->Key(count - 1));\n\n  // Shift the values in the right node to their correct position.\n  for (unsigned i = count; i < src->NumItems(); ++i) {\n    src->SetKey(i - count, src->Key(i));\n  }\n\n  if (!src->IsLeaf()) {\n    // Move the child pointers from the right to the left node.\n    uint32_t src_move_count = 0;\n    for (unsigned i = 0; i < count; ++i) {\n      src_move_count += src->GetChildTreeCount(i);\n      dest->SetChild(1 + dest->NumItems() + i, src->Child(i));\n    }\n\n    uint32_t dest_tree_count = GetChildTreeCount(child_pos - 1);\n    uint32_t src_tree_count = GetChildTreeCount(child_pos);\n    SetChildTreeCount(child_pos - 1, dest_tree_count + src_move_count + count);\n    SetChildTreeCount(child_pos, src_tree_count - src_move_count - count);\n\n    for (unsigned i = count; i <= src->NumItems(); ++i) {\n      src->SetChild(i - count, src->Child(i));\n      src->SetChild(i, NULL);\n    }\n  }\n\n  // Fixup the counts on the src and dest nodes.\n  dest->num_items_ += count;\n  src->num_items_ -= count;\n}\n\ntemplate <typename T>\nvoid BPTreeNode<T>::RebalanceChildToRight(unsigned child_pos, unsigned count) {\n  assert(child_pos < NumItems());\n  BPTreeNode* src = Child(child_pos);\n  BPTreeNode* dest = Child(child_pos + 1);\n\n  assert(src->NumItems() >= count);\n  assert(count >= 1u);\n  assert(dest->AvailableSlotCount() >= count);\n\n  unsigned dest_items = dest->NumItems();\n\n  assert(dest_items > 0u);\n\n  // Shift the values in the right node to their correct position.\n  for (int i = dest_items - 1; i >= 0; --i) {\n    dest->SetKey(i + count, dest->Key(i));\n  }\n\n  // Move the delimiting value to the left node and the new delimiting value\n  // from the right node.\n  KeyT new_delim = src->Key(src->NumItems() - count);\n  for (unsigned i = 1; i < count; ++i) {\n    unsigned src_id = src->NumItems() - count + i;\n    dest->SetKey(i - 1, src->Key(src_id));\n  }\n  // Move parent's delimiter to destination and update it with new delimiter.\n  dest->SetKey(count - 1, Key(child_pos));\n  SetKey(child_pos, new_delim);\n\n  if (!src->IsLeaf()) {\n    // Shift child pointers in the right node to their correct position.\n    for (int i = dest_items; i >= 0; --i) {\n      dest->SetChild(i + count, dest->Child(i));\n    }\n\n    // Move child pointers from the left node to the right.\n    uint32_t src_move_count = 0;\n    for (unsigned i = 0; i < count; ++i) {\n      unsigned src_id = src->NumItems() - (count - 1) + i;\n      src_move_count += src->Child(src_id)->TreeCount();\n      dest->SetChild(i, src->Child(src_id));\n      src->SetChild(src_id, NULL);\n    }\n\n    uint32_t dest_tree_count = GetChildTreeCount(child_pos + 1);\n    uint32_t src_tree_count = GetChildTreeCount(child_pos);\n    SetChildTreeCount(child_pos + 1, dest_tree_count + src_move_count + count);\n    SetChildTreeCount(child_pos, src_tree_count - src_move_count - count);\n  }\n\n  // Fixup the counts on the src and dest nodes.\n  dest->num_items_ += count;\n  src->num_items_ -= count;\n}\n\ntemplate <typename T> BPTreeNode<T>* BPTreeNode<T>::MergeOrRebalanceChild(unsigned pos) {\n  BPTreeNode* node = Child(pos);\n  BPTreeNode* left = nullptr;\n\n  assert(NumItems() >= 1u);\n  assert(node->NumItems() < node->MinItems());\n\n  if (pos > 0) {\n    left = Child(pos - 1);\n    if (left->NumItems() + 1 + node->NumItems() <= left->MaxItems()) {\n      left->MergeFromRight(Key(pos - 1), node);\n      ShiftLeft(pos - 1, true);\n      return node;\n    }\n  }\n\n  if (pos < NumItems()) {\n    BPTreeNode* right = Child(pos + 1);\n    if (node->NumItems() + 1 + right->NumItems() <= right->MaxItems()) {\n      node->MergeFromRight(Key(pos), right);\n      ShiftLeft(pos, true);\n      return right;\n    }\n\n    // Try rebalancing with our right sibling.\n    // TODO: don't perform rebalancing if\n    // we deleted the first element from node and the node is not\n    // empty. This is a small optimization for the common pattern of deleting\n    // from the front of the tree.\n    if (true) {\n      unsigned to_move = (right->NumItems() - node->NumItems()) / 2;\n      assert(to_move < right->NumItems());\n\n      RebalanceChildToLeft(pos + 1, to_move);\n      return nullptr;\n    }\n  }\n\n  assert(left);\n\n  if (left) {\n    // Try rebalancing with our left sibling.\n    // TODO: don't perform rebalancing if we deleted the last element from node and the\n    // node is not empty. This is a small optimization for the common pattern of deleting\n    // from the back of the tree.\n    if (true) {\n      unsigned to_move = (left->NumItems() - node->NumItems()) / 2;\n      assert(to_move < left->NumItems());\n      RebalanceChildToRight(pos - 1, to_move);\n      return nullptr;\n    }\n  }\n  return nullptr;\n}\n\n// splits the node into two nodes. The left node is the current node and the right node is\n// is filled with the right half of the items. The median key is returned in *median.\ntemplate <typename T> void BPTreeNode<T>::Split(BPTreeNode<T>* right, T* median) {\n  unsigned mid = num_items_ / 2;\n  *median = Key(mid);\n  right->leaf_ = leaf_;\n  right->num_items_ = num_items_ - (mid + 1);\n  memmove(Layout::KeyPtr(0, right), Layout::KeyPtr(mid + 1, this),\n          right->num_items_ * Layout::kKeySize);\n  if (!IsLeaf()) {\n    uint32_t right_subtree_count = right->num_items_;\n    for (size_t i = 0; i <= right->num_items_; i++) {\n      BPTreeNode* child = Child(mid + 1 + i);\n      right_subtree_count += child->TreeCount();\n      right->SetChild(i, child);\n    }\n    right->SetTreeCount(right_subtree_count);\n    IncreaseTreeCount(-(right_subtree_count + 1));\n  }\n  num_items_ = mid;\n}\n\ntemplate <typename T> void BPTreeNode<T>::MergeFromRight(KeyT key, BPTreeNode<T>* right) {\n  assert(NumItems() + 1 + right->NumItems() <= MaxItems());\n\n  unsigned dest_items = NumItems();\n  SetKey(dest_items, key);\n  for (unsigned i = 0; i < right->NumItems(); ++i) {\n    SetKey(dest_items + 1 + i, right->Key(i));\n  }\n\n  if (!IsLeaf()) {\n    for (unsigned i = 0; i <= right->NumItems(); ++i) {\n      SetChild(dest_items + 1 + i, right->Child(i));\n    }\n    IncreaseTreeCount(right->TreeCount() + 1);\n  }\n  num_items_ += 1 + right->NumItems();\n  right->num_items_ = 0;\n}\n\ntemplate <typename T> uint32_t BPTreePath<T>::Rank() const {\n  uint32_t rank = 0;\n  unsigned bound = Depth();\n\n  for (unsigned i = 0; i < bound; ++i) {\n    auto* node = Node(i);\n    unsigned pos = Position(i);\n    if (!node->IsLeaf()) {\n      unsigned delta = (i == bound - 1) ? 1 : 0;\n      for (unsigned j = 0; j < pos + delta; ++j) {\n        rank += node->Child(j)->TreeCount();\n      }\n    }\n    rank += pos;\n  }\n\n  return rank;\n}\n\ntemplate <typename T> bool BPTreePath<T>::Next() {\n  assert(depth_ > 0);\n  BPTreeNode<T>* node = Last().first;\n\n  // The data in BPTree is stored in both the leaf nodes and the inner nodes.\n  if (node->IsLeaf()) {\n    ++record_[depth_ - 1].pos;\n    if (record_[depth_ - 1].pos < node->NumItems()) {\n      return true;\n    }\n\n    // Advance to the next item, which is Key(i) in some ascendent of the subtree with\n    // root Child(i). i in that case must be less than NumItems().\n    // Note, that subtree Child(i) in a inner node is located before Key(i).\n    do {\n      Pop();\n    } while (depth_ > 0 && Position(depth_ - 1) == Node(depth_ - 1)->NumItems());\n\n    // we either point now on separator Key(i) in the parent node or we finished the tree.\n    return depth_ > 0;\n  }\n\n  // We are in the inner node after the ascent from the leaf node. We need to advance to the next\n  // Child and dig left.\n  assert(!node->IsLeaf());\n  assert(record_[depth_ - 1].pos < node->NumItems());\n\n  // we are in the inner node pointing to the separator.\n  // now we need to advance to the next child and dig to the leftmost leaf.\n  record_[depth_ - 1].pos++;\n  do {\n    node = node->Child(record_[depth_ - 1].pos);\n    Push(node, 0);\n  } while (!node->IsLeaf());\n\n  return true;\n}\n\ntemplate <typename T> bool BPTreePath<T>::Prev() {\n  assert(depth_ > 0);\n\n  auto* node = record_[depth_ - 1].node;\n  if (node->IsLeaf()) {\n    /*\n        node\n        / \\\n       l   r\n\n       We must go left (decrement pos), and if there is no left, we must go up until we can\n       go left.\n    */\n    while (record_[depth_ - 1].pos == 0) {\n      Pop();\n      if (depth_ == 0) {\n        return false;\n      }\n    }\n    assert(depth_ > 0 && record_[depth_ - 1].pos > 0);\n\n    // we finished backtracking from child(i+1) or stayed in the leaf.\n    // either way stop at the next key on the left.\n    --record_[depth_ - 1].pos;\n    return true;\n  }\n\n  DigRight();\n  return true;\n}\n\ntemplate <typename T> void BPTreePath<T>::DigRight() {\n  assert(depth_ > 0);\n  BPTreeNode<T>* node = Last().first;\n\n  assert(!node->IsLeaf());\n\n  // we are in the inner node pointing to the separator.\n  // we now must explore the left subtree which is located under the same index as the separator.\n  // we go far-right in the left subtree.\n  do {\n    node = node->Child(record_[depth_ - 1].pos);\n    Push(node, node->NumItems());\n  } while (!node->IsLeaf());\n\n  // we reached the leaf node, fix the position to point to the last key.\n  assert(record_[depth_ - 1].node->IsLeaf());\n  --record_[depth_ - 1].pos;\n}\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/detail/gen_utils.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/random/random.h>\n#include <absl/strings/str_cat.h>\n\n#include <string>\n\nnamespace dfly {\n\ninline std::string GetRandomHex(absl::InsecureBitGen& gen, size_t len, size_t len_deviation = 0) {\n  static_assert(std::is_same<uint64_t, decltype(gen())>::value);\n  if (len_deviation) {\n    len += (gen() % len_deviation);\n  }\n\n  std::string res(len, '\\0');\n  size_t indx = 0;\n\n  for (size_t i = 0; i < len / 16; ++i) {  // 2 chars per byte\n    absl::numbers_internal::FastHexToBufferZeroPad16(gen(), res.data() + indx);\n    indx += 16;\n  }\n\n  if (indx < res.size()) {\n    char buf[32];\n    absl::numbers_internal::FastHexToBufferZeroPad16(gen(), buf);\n\n    for (unsigned j = 0; indx < res.size(); indx++, j++) {\n      res[indx] = buf[j];\n    }\n  }\n\n  return res;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/detail/listpack.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/detail/listpack.h\"\n\n#include \"base/logging.h\"\n\nnamespace dfly {\nnamespace detail {\n\nusing namespace std;\n\nQList::Entry ListPack::GetEntry(uint8_t* pos) {\n  unsigned int slen;\n  long long lval;\n  uint8_t* vstr = lpGetValue(pos, &slen, &lval);\n  return vstr ? QList::Entry(reinterpret_cast<char*>(vstr), slen) : QList::Entry(lval);\n}\n\nstring ListPack::Pop(QList::Where where) {\n  uint8_t* pos = GetFirst(where);\n  DCHECK(pos);\n\n  string res = GetEntry(pos).to_string();\n  lp_ = lpDelete(lp_, pos, nullptr);\n  return res;\n}\n\nvoid ListPack::Push(string_view value, QList::Where where) {\n  if (where == QList::HEAD) {\n    lp_ = lpPrepend(lp_, (unsigned char*)value.data(), value.size());\n  } else {\n    lp_ = lpAppend(lp_, (unsigned char*)value.data(), value.size());\n  }\n}\n\nstring ListPack::First(QList::Where where) const {\n  uint8_t* pos = GetFirst(where);\n  DCHECK(pos);\n\n  return GetEntry(pos).to_string();\n}\n\nstd::optional<string> ListPack::At(long index) const {\n  uint8_t* pos = lpSeek(lp_, index);\n  if (!pos)\n    return nullopt;\n\n  return GetEntry(pos).to_string();\n}\n\nvector<uint32_t> ListPack::Pos(string_view element, uint32_t rank, uint32_t count, uint32_t max_len,\n                               QList::Where where) const {\n  DCHECK_GT(rank, 0u);\n  vector<uint32_t> matches;\n\n  uint8_t* p = GetFirst(where);\n  unsigned index = 0;\n  while (p && (max_len == 0 || index < max_len)) {\n    if (GetEntry(p) == element) {\n      if (rank == 1) {\n        size_t sz = lpLength(lp_);\n        auto k = (where == QList::HEAD) ? index : sz - index - 1;\n        matches.push_back(k);\n        if (count && matches.size() >= count)\n          break;\n      } else {\n        rank--;\n      }\n    }\n    index++;\n    p = (where == QList::HEAD) ? lpNext(lp_, p) : lpPrev(lp_, p);\n  }\n  return matches;\n}\n\nuint8_t* ListPack::Find(std::string_view elem) const {\n  uint8_t* p = lpFirst(lp_);\n  while (p) {\n    if (GetEntry(p) == elem) {\n      return p;\n    }\n    p = lpNext(lp_, p);\n  }\n  return nullptr;\n}\n\nunsigned ListPack::Remove(const CollectionEntry& elem, unsigned count, QList::Where where) {\n  unsigned removed = 0;\n\n  auto is_match = [&](const QList::Entry& entry) {\n    return elem.is_int() ? entry.is_int() && entry.ival() == elem.ival() : entry == elem.view();\n  };\n\n  uint8_t* p = GetFirst(where);\n\n  while (p) {\n    if (is_match(GetEntry(p))) {\n      // lpDelete returns pointer to the element AFTER the deleted one (toward tail)\n      lp_ = lpDelete(lp_, p, &p);\n\n      if (where == QList::TAIL) {\n        // Iterating backward (from TAIL): need to get the previous element\n        if (p) {\n          p = lpPrev(lp_, p);\n        } else {\n          // Deleted the tail element, lpDelete returned nullptr (no element after tail).\n          // We need to continue from the new tail to keep moving towards HEAD.\n          p = lpLast(lp_);\n        }\n      }\n      // For HEAD direction, 'p' already points to the next element to check\n\n      removed++;\n      if (count && removed == count)\n        break;\n      continue;\n    }\n\n    p = (where == QList::HEAD) ? lpNext(lp_, p) : lpPrev(lp_, p);\n  }\n\n  return removed;\n}\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/detail/listpack.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <optional>\n#include <string>\n#include <string_view>\n\n#include \"core/qlist.h\"\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n}\n\nnamespace dfly {\nnamespace detail {\n\n// A listpack wrapper that provides basic list operations.\n// Unfortunately, we already have a listpack wrapper in core/detail/listpack_wrap.h but\n// it's more map oriented and doesn't provide the basic list operations we need here.\n// TODO: to unify both wrappers into one.\nclass ListPack {\n public:\n  explicit ListPack(uint8_t* lp = nullptr) : lp_(lp) {\n  }\n\n  size_t Size() const {\n    return lpLength(lp_);\n  }\n\n  // Removes and returns an element from the specified end (HEAD or TAIL).\n  std::string Pop(QList::Where where);\n\n  // Adds an element to the specified end (HEAD or TAIL).\n  void Push(std::string_view value, QList::Where where);\n\n  // Returns the first element from the specified end without removing it.\n  std::string First(QList::Where where) const;\n\n  // Returns the element at the specified index, or std::nullopt if out of bounds.\n  std::optional<std::string> At(long index) const;\n\n  // Finds positions of an element matching the given criteria.\n  std::vector<uint32_t> Pos(std::string_view element, uint32_t rank, uint32_t count,\n                            uint32_t max_len, QList::Where where) const;\n\n  uint8_t* Find(std::string_view elem) const;\n\n  uint8_t* Seek(long index) const {\n    return lpSeek(lp_, index);\n  }\n\n  // Inserts an element before or after the specified pivot element.\n  void Insert(uint8_t* pivot, std::string_view elem, QList::InsertOpt insert_opt) {\n    int where = (insert_opt == QList::BEFORE) ? LP_BEFORE : LP_AFTER;\n    lp_ = lpInsertString(lp_, (unsigned char*)elem.data(), elem.size(), pivot, where, nullptr);\n  }\n\n  // Removes up to count occurrences of elem from the specified direction.\n  unsigned Remove(const CollectionEntry& elem, unsigned count, QList::Where where);\n\n  // Replaces the element at the specified index with a new value.\n  void Replace(uint8_t* pos, std::string_view elem) {\n    lp_ = lpReplace(lp_, &pos, (unsigned char*)elem.data(), elem.size());\n  }\n\n  // Removes count elements starting from the specified index.\n  void Erase(long start, long count) {\n    lp_ = lpDeleteRange(lp_, start, count);\n  }\n\n  // Returns the raw listpack pointer.\n  uint8_t* GetPointer() const {\n    return lp_;\n  }\n\n  size_t BytesSize() const {\n    return lpBytes(lp_);\n  }\n\n private:\n  static CollectionEntry GetEntry(uint8_t* pos);\n\n  uint8_t* GetFirst(QList::Where where) const {\n    return (where == QList::HEAD) ? lpFirst(lp_) : lpLast(lp_);\n  }\n\n  uint8_t* lp_;\n};\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/detail/listpack_wrap.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"core/detail/listpack_wrap.h\"\n\n#include \"base/logging.h\"\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n}\n\nnamespace dfly::detail {\n\nListpackWrap::Iterator::Iterator(uint8_t* lp, uint8_t* ptr, IntBuf& intbuf)\n    : lp_{lp}, ptr_{ptr}, next_ptr_{nullptr}, intbuf_(intbuf) {\n  static_assert(sizeof(intbuf_[0]) >= LP_INTBUF_SIZE);  // to avoid header dependency\n  Read();\n}\n\nListpackWrap::Iterator& ListpackWrap::Iterator::operator++() {\n  ptr_ = next_ptr_;\n  Read();\n  return *this;\n}\n\nvoid ListpackWrap::Iterator::Read() {\n  if (!ptr_)\n    return;\n\n  key_v_ = GetView(ptr_, intbuf_[0]);\n  next_ptr_ = lpNext(lp_, ptr_);\n  value_v_ = GetView(next_ptr_, intbuf_[1]);\n  next_ptr_ = lpNext(lp_, next_ptr_);\n}\n\nListpackWrap::~ListpackWrap() {\n  DCHECK(!dirty_);\n}\n\nListpackWrap ListpackWrap::WithCapacity(size_t capacity) {\n  return ListpackWrap{lpNew(capacity)};\n}\n\nuint8_t* ListpackWrap::GetPointer() {\n  dirty_ = false;\n  return lp_;\n}\n\nListpackWrap::Iterator ListpackWrap::Find(std::string_view key) const {\n  if (size() == 0)\n    return end();\n\n  uint8_t* ptr = lpFind(lp_, lpFirst(lp_), (unsigned char*)key.data(), key.size(), 1);\n  return Iterator{lp_, ptr, intbuf_};\n}\n\nbool ListpackWrap::Delete(std::string_view key) {\n  if (size() == 0)\n    return false;\n\n  uint8_t* ptr = lpFind(lp_, lpFirst(lp_), (unsigned char*)key.data(), key.size(), 1);\n  if (ptr == nullptr)\n    return false;\n\n  lp_ = lpDeleteRangeWithEntry(lp_, &ptr, 2);\n  dirty_ = true;\n  return true;\n}\n\nbool ListpackWrap::Insert(std::string_view key, std::string_view value, bool skip_exists) {\n  uint8_t* vptr;\n  uint8_t* fptr = lpFirst(lp_);\n  uint8_t* fsrc = key.empty() ? lp_ : (uint8_t*)key.data();\n  // if we vsrc is NULL then lpReplace will delete the element, which is not what we want.\n  // therefore, for an empty val we set it to some other valid address so that lpReplace\n  // will do the right thing and encode empty string instead of deleting the element.\n  uint8_t* vsrc = value.empty() ? lp_ : (uint8_t*)value.data();\n\n  bool updated = false;\n  if (fptr) {\n    fptr = lpFind(lp_, fptr, fsrc, key.size(), 1);\n    if (fptr) {\n      if (skip_exists)\n        return false;\n\n      // Grab pointer to the value (fptr points to the field)\n      vptr = lpNext(lp_, fptr);\n\n      // Replace value\n      lp_ = lpReplace(lp_, &vptr, vsrc, value.size());\n      DCHECK_EQ(0u, lpLength(lp_) % 2);\n\n      dirty_ = true;\n      updated = true;\n    }\n  }\n\n  if (!updated) {\n    // Push new field/value pair onto the tail of the listpack.\n    // TODO: we should at least allocate once for both elements\n    lp_ = lpAppend(lp_, fsrc, key.size());\n    lp_ = lpAppend(lp_, vsrc, value.size());\n    dirty_ = true;\n  }\n\n  return !updated;\n}\n\nsize_t ListpackWrap::size() const {\n  return lpLength(lp_) / 2;\n}\n\nListpackWrap::Iterator ListpackWrap::begin() const {\n  return Iterator{lp_, lpFirst(lp_), intbuf_};\n}\n\nListpackWrap::Iterator ListpackWrap::end() const {\n  return Iterator{lp_, nullptr, intbuf_};\n}\n\nsize_t ListpackWrap::UsedBytes() const {\n  return lpBytes(lp_);\n}\n\nstd::string_view ListpackWrap::GetView(uint8_t* lp_it, uint8_t int_buf[]) {\n  int64_t ele_len = 0;\n  uint8_t* elem = lpGet(lp_it, &ele_len, int_buf);\n  DCHECK(elem);\n  return std::string_view{reinterpret_cast<char*>(elem), size_t(ele_len)};\n}\n\nbool ListpackWrap::Iterator::operator==(const Iterator& other) const {\n  return lp_ == other.lp_ && ptr_ == other.ptr_;\n}\n}  // namespace dfly::detail\n"
  },
  {
    "path": "src/core/detail/listpack_wrap.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <cstdint>\n#include <cstdio>\n#include <string_view>\n\nnamespace dfly::detail {\n\n// Wrapper around map data structure based on listpack\nstruct ListpackWrap {\n private:\n  using IntBuf = uint8_t[2][24];\n\n public:\n  ~ListpackWrap();\n\n  struct Iterator {\n    using iterator_category = std::forward_iterator_tag;\n    using difference_type = std::ptrdiff_t;\n    using value_type = std::pair<std::string_view, std::string_view>;\n    using reference = value_type;\n    using pointer = value_type*;\n\n    Iterator(uint8_t* lp, uint8_t* ptr, IntBuf& intbuf);\n    Iterator& operator++();\n\n    value_type operator*() const {\n      return {key_v_, value_v_};\n    }\n\n    bool operator==(const Iterator& other) const;\n\n    bool operator!=(const Iterator& other) const {\n      return !(operator==(other));\n    }\n\n   private:\n    void Read();  // Read next entry at ptr and determine next_ptr\n\n    uint8_t *lp_ = nullptr, *ptr_ = nullptr, *next_ptr_ = nullptr;\n    std::string_view key_v_, value_v_;\n    IntBuf& intbuf_;\n  };\n\n  explicit ListpackWrap(uint8_t* lp) : lp_{lp} {\n  }\n\n  // Create listpack with capacity\n  static ListpackWrap WithCapacity(size_t capacity);\n\n  uint8_t* GetPointer();                      // Get new updated pointer\n  Iterator Find(std::string_view key) const;  // Linear search\n  bool Delete(std::string_view key);\n  bool Insert(std::string_view key, std::string_view value, bool skip_exists);\n\n  Iterator begin() const;\n  Iterator end() const;\n  size_t size() const;  // number of entries\n  size_t UsedBytes() const;\n\n  // Get view from raw listpack iterator\n  static std::string_view GetView(uint8_t* lp_it, uint8_t int_buf[]);\n\n private:\n  uint8_t* lp_;            // the listpack itself\n  mutable IntBuf intbuf_;  // buffer for integers decoded to strings\n  bool dirty_ = false;     // whether lp_ was updated, but never retrieved with GetPointer\n};\n\n}  // namespace dfly::detail\n"
  },
  {
    "path": "src/core/detail/stateless_allocator.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n\n#pragma once\n\n#include <cassert>\n\n#include \"base/pmr/memory_resource.h\"\n\nnamespace dfly {\n\nnamespace detail {\ninline thread_local PMR_NS::memory_resource* tl_mr = nullptr;\n}  // namespace detail\n\ntemplate <typename T, typename Impl> class StatelessAllocatorBase {\n public:\n  using value_type = T;\n  using size_type = std::size_t;\n  using difference_type = std::ptrdiff_t;\n  using is_always_equal = std::true_type;\n\n  template <typename U, typename... _Args> void construct(U* __p, _Args&&... __args) {\n    ::new (static_cast<void*>(__p)) U(std::forward<_Args>(__args)...);\n  }\n\n  static value_type* allocate(size_type n) {\n    static_assert(\n        std::is_empty_v<Impl>,\n        \"StatelessAllocator must not contain state, so it can use empty base optimization\");\n\n    void* ptr = Impl::resource()->allocate(n * sizeof(value_type), alignof(value_type));\n    return static_cast<value_type*>(ptr);\n  }\n\n  static void deallocate(value_type* ptr, size_type n) noexcept {\n    Impl::resource()->deallocate(ptr, n * sizeof(value_type), alignof(value_type));\n  }\n};\n\ntemplate <typename T>\nclass StatelessAllocator : public StatelessAllocatorBase<T, StatelessAllocator<T>> {\n public:\n  StatelessAllocator() noexcept {\n    assert(detail::tl_mr != nullptr);\n  }\n\n  template <typename U> StatelessAllocator(const StatelessAllocator<U>&) noexcept {  // NOLINT\n  }\n\n  static PMR_NS::memory_resource* resource() {\n    return detail::tl_mr;\n  }\n};\n\ntemplate <typename T, typename U>\nbool operator==(const StatelessAllocator<T>&, const StatelessAllocator<U>&) noexcept {\n  return true;\n}\n\ntemplate <typename T, typename U>\nbool operator!=(const StatelessAllocator<T>&, const StatelessAllocator<U>&) noexcept {\n  return false;\n}\n\ninline void InitTLStatelessAllocMR(PMR_NS::memory_resource* mr) {\n  detail::tl_mr = mr;\n}\n\ninline void CleanupStatelessAllocMR() {\n  detail::tl_mr = nullptr;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/dfly_core_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/strings/charconv.h>\n#include <absl/strings/numbers.h>\n#include <fast_float/fast_float.h>\n\n#ifdef USE_PCRE2\n#define PCRE2_CODE_UNIT_WIDTH 8\n#include <pcre2.h>\n#endif\n\n#ifdef USE_RE2\n#include <re2/re2.h>\n#endif\n\n#include <reflex/matcher.h>\n\n#include <random>\n#include <regex>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/glob_matcher.h\"\n#include \"core/huff_coder.h\"\n#include \"core/intent_lock.h\"\n#include \"core/tx_queue.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nstd::random_device rd;\n\nstatic string GetRandomHex(size_t len) {\n  std::string res(len, '\\0');\n  size_t indx = 0;\n\n  for (; indx < len; indx += 16) {  // 2 chars per byte\n    absl::numbers_internal::FastHexToBufferZeroPad16(rd(), res.data() + indx);\n  }\n\n  if (indx < len) {\n    char buf[24];\n    absl::numbers_internal::FastHexToBufferZeroPad16(rd(), buf);\n\n    for (unsigned j = 0; indx < len; indx++, j++) {\n      res[indx] = buf[j];\n    }\n  }\n\n  return res;\n}\n\nextern int stringmatchlen(const char* pattern, int patternLen, const char* string, int stringLen,\n                          int nocase);\n\nclass TxQueueTest : public ::testing::Test {\n protected:\n  TxQueueTest() {\n  }\n\n  uint64_t Pop() {\n    if (pq_.Empty())\n      return uint64_t(-1);\n    TxQueue::ValueType val = pq_.Front();\n    pq_.PopFront();\n\n    return std::get<uint64_t>(val);\n  }\n\n  TxQueue pq_;\n};\n\nTEST_F(TxQueueTest, Basic) {\n  pq_.Insert(4);\n  pq_.Insert(3);\n  pq_.Insert(2);\n\n  unsigned cnt = 0;\n  auto head = pq_.Head();\n  auto it = head;\n  do {\n    ++cnt;\n    it = pq_.Next(it);\n  } while (it != head);\n  EXPECT_EQ(3, cnt);\n\n  ASSERT_EQ(2, Pop());\n  ASSERT_EQ(3, Pop());\n  ASSERT_EQ(4, Pop());\n  ASSERT_TRUE(pq_.Empty());\n\n  EXPECT_EQ(TxQueue::kEnd, pq_.Head());\n\n  pq_.Insert(10);\n  ASSERT_EQ(10, Pop());\n}\n\nclass IntentLockTest : public ::testing::Test {\n protected:\n  IntentLock lk_;\n};\n\nTEST_F(IntentLockTest, Basic) {\n  ASSERT_TRUE(lk_.Acquire(IntentLock::SHARED));\n  ASSERT_FALSE(lk_.Acquire(IntentLock::EXCLUSIVE));\n  lk_.Release(IntentLock::EXCLUSIVE);\n\n  ASSERT_FALSE(lk_.Check(IntentLock::EXCLUSIVE));\n  lk_.Release(IntentLock::SHARED);\n  ASSERT_TRUE(lk_.Check(IntentLock::EXCLUSIVE));\n}\n\nclass StringMatchTest : public ::testing::Test {\n protected:\n  // wrapper around stringmatchlen with stringview arguments\n  bool MatchLen(string_view pattern, string_view str, bool nocase) {\n    GlobMatcher matcher(pattern, !nocase);\n    return matcher.Matches(str);\n  }\n};\n\nTEST_F(StringMatchTest, Glob2Regex) {\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"\"), \"\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"*\"), \".*\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"\\\\*\"), \"\\\\*\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"\\\\?\"), \"\\\\?\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"[abc]\"), \"[abc]\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"[^abc]\"), \"[^abc]\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"h\\\\[^|\"), \"h\\\\[\\\\^\\\\|\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"[$?^]a\"), \"[$?^]a\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"[^]a\"), \".a\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"[]a\"), \"[]a\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"\\\\d\"), \"d\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"[\\\\d]\"), \"[\\\\\\\\d]\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"abc\\\\\"), \"abc\\\\\\\\\");\n  EXPECT_EQ(GlobMatcher::Glob2Regex(\"[\\\\]]\"), \"[\\\\]]\");\n  reflex::Matcher matcher(\"abc[\\\\\\\\d]e\");\n  matcher.input(\"abcde\");\n  ASSERT_TRUE(matcher.find());\n}\n\nTEST_F(StringMatchTest, Basic) {\n  EXPECT_EQ(MatchLen(\"\", \"\", 0), 1);\n\n  EXPECT_EQ(MatchLen(\"*\", \"\", 0), 0);\n  EXPECT_EQ(MatchLen(\"*\", \"\", 1), 0);\n  EXPECT_EQ(MatchLen(\"\\\\\\\\\", \"\\\\\", 0), 1);\n  EXPECT_EQ(MatchLen(\"h\\\\\\\\llo\", \"h\\\\llo\", 0), 1);\n  EXPECT_EQ(MatchLen(\"a\\\\bc\", \"ABC\", 1), 1);\n\n  // ExactMatch\n  EXPECT_EQ(MatchLen(\"hello\", \"hello\", 0), 1);\n  EXPECT_EQ(MatchLen(\"hello\", \"world\", 0), 0);\n\n  // Wildcards\n  EXPECT_EQ(MatchLen(\"*\", \"hello\", 0), 1);\n  EXPECT_EQ(MatchLen(\"*\", \"1234567890123456\", 0), 1);\n  EXPECT_EQ(MatchLen(\"h*\", \"hello\", 0), 1);\n  EXPECT_EQ(MatchLen(\"h*\", \"abc\", 0), 0);\n  EXPECT_EQ(MatchLen(\"h*o\", \"hello\", 0), 1);\n  EXPECT_EQ(MatchLen(\"hel*o*\", \"hello*\", 0), 1);\n  EXPECT_EQ(MatchLen(\"h\\\\*llo\", \"h*llo\", 0), 1);\n\n  // Single character wildcard\n  EXPECT_EQ(MatchLen(\"h[aeiou]llo\", \"hello\", 0), 1);\n  EXPECT_EQ(MatchLen(\"h[aeiou]llo\", \"hallo\", 0), 1);\n  EXPECT_EQ(MatchLen(\"h[^aeiou]llo\", \"hallo\", 0), 0);\n  EXPECT_EQ(MatchLen(\"h[a-z]llo\", \"hello\", 0), 1);\n  EXPECT_EQ(MatchLen(\"h[A-Z]llo\", \"HeLLO\", 1), 1);\n  EXPECT_EQ(MatchLen(\"[[]\", \"[\", 0), 1);\n  EXPECT_EQ(MatchLen(\"[^]a\", \"xa\", 0), 1);\n\n  // ?\n  EXPECT_EQ(MatchLen(\"h?llo\", \"hello\", 0), 1);\n  EXPECT_EQ(MatchLen(\"h??llo\", \"ha llo\", 0), 1);\n  EXPECT_EQ(MatchLen(\"h??llo\", \"hallo\", 0), 0);\n  EXPECT_EQ(MatchLen(\"h\\\\?llo\", \"hallo\", 0), 0);\n  EXPECT_EQ(MatchLen(\"h\\\\?llo\", \"h?llo\", 0), 1);\n  EXPECT_EQ(MatchLen(\"abc?\", \"abc\\n\", 0), 1);\n}\n\n#define TEST_STRINGMATCH(pattern, str, case_res, nocase_res) \\\n  {                                                          \\\n    EXPECT_EQ(int(MatchLen(pattern, str, 0)), case_res);     \\\n    EXPECT_EQ(int(MatchLen(pattern, str, 1)), nocase_res);   \\\n  }\n\nTEST_F(StringMatchTest, Special) {\n  EXPECT_TRUE(MatchLen(\"h\\\\[^|\", \"h[^|\", 0));\n  EXPECT_FALSE(MatchLen(\"[^\", \"[^\", 0));\n  EXPECT_TRUE(MatchLen(\"[$?^]a\", \"?a\", 0));\n  EXPECT_TRUE(MatchLen(\"abc[\\\\d]e\", \"abcde\", 0));\n  EXPECT_TRUE(MatchLen(\"foo\\\\\", \"foo\\\\\", 0));\n\n  /* Case sensitivity: */\n  TEST_STRINGMATCH(\"a\", \"a\", 1, 1);\n  TEST_STRINGMATCH(\"a\", \"A\", 0, 1);\n  TEST_STRINGMATCH(\"A\", \"A\", 1, 1);\n  TEST_STRINGMATCH(\"A\", \"a\", 0, 1);\n  TEST_STRINGMATCH(\"\\\\a\", \"a\", 1, 1);\n  TEST_STRINGMATCH(\"\\\\a\", \"A\", 0, 1);\n  TEST_STRINGMATCH(\"\\\\A\", \"A\", 1, 1);\n  TEST_STRINGMATCH(\"\\\\A\", \"a\", 0, 1);\n  TEST_STRINGMATCH(\"[\\\\a]\", \"a\", 1, 1);\n\n  // TODO: to fix this: TEST_STRINGMATCH(\"[\\\\a]\", \"A\", 0, 1);\n  TEST_STRINGMATCH(\"[\\\\A]\", \"A\", 1, 1);\n  // TODO: to fix this: TEST_STRINGMATCH(\"[\\\\A]\", \"a\", 0, 1);\n\n  /* Escaped metacharacters: */\n  TEST_STRINGMATCH(\"\\\\*\", \"*\", 1, 1);\n  TEST_STRINGMATCH(\"\\\\?\", \"?\", 1, 1);\n  TEST_STRINGMATCH(\"\\\\\\\\\", \"\\\\\", 1, 1);\n  TEST_STRINGMATCH(\"\\\\[\", \"[\", 1, 1);\n  TEST_STRINGMATCH(\"\\\\]\", \"]\", 1, 1);\n  TEST_STRINGMATCH(\"\\\\^\", \"^\", 1, 1);\n  TEST_STRINGMATCH(\"\\\\-\", \"-\", 1, 1);\n  TEST_STRINGMATCH(\"[\\\\*]\", \"*\", 1, 1);\n  TEST_STRINGMATCH(\"[\\\\?]\", \"?\", 1, 1);\n  TEST_STRINGMATCH(\"[\\\\\\\\]\", \"\\\\\", 1, 1);\n  TEST_STRINGMATCH(\"[\\\\[]\", \"[\", 1, 1);\n  TEST_STRINGMATCH(\"[\\\\]]\", \"]\", 1, 1);\n  TEST_STRINGMATCH(\"[\\\\^]\", \"^\", 1, 1);\n  TEST_STRINGMATCH(\"[\\\\-]\", \"-\", 1, 1);\n\n  /* Not special outside character classes: */\n  TEST_STRINGMATCH(\"]\", \"]\", 1, 1);\n  TEST_STRINGMATCH(\"^\", \"^\", 1, 1);\n  TEST_STRINGMATCH(\"-\", \"-\", 1, 1);\n  /* Not special inside character classes: */\n  TEST_STRINGMATCH(\"[*]\", \"*\", 1, 1);\n  TEST_STRINGMATCH(\"[?]\", \"?\", 1, 1);\n  TEST_STRINGMATCH(\"[[]\", \"[\", 1, 1);\n  /* Not special as the first character in a character class: */\n  TEST_STRINGMATCH(\"[-]\", \"-\", 1, 1);\n\n  /* Not special as range end (undocumented): */\n  TEST_STRINGMATCH(\"[+-]]\", \"*\", 0, 0); /*   but not * (below) */\n  TEST_STRINGMATCH(\"[+-]]\", \"^\", 0, 0); /*   or ^ (above) */\n  TEST_STRINGMATCH(\"[+--]\", \",\", 1, 1); /* ASCII range + to - includes , */\n  TEST_STRINGMATCH(\"[+--]\", \"*\", 0, 0); /*   but not * (below) */\n  TEST_STRINGMATCH(\"[+--]\", \".\", 0, 0); /*   or . (above) */\n\n  /* And the same, but unclosed: */\n  TEST_STRINGMATCH(\"[+-]\", \"*\", 0, 0);\n  TEST_STRINGMATCH(\"[+-]\", \"^\", 0, 0);\n  TEST_STRINGMATCH(\"[+--\", \",\", 1, 1);\n  TEST_STRINGMATCH(\"[+--\", \"*\", 0, 0);\n  TEST_STRINGMATCH(\"[+--\", \".\", 0, 0);\n\n  /* Escaped ] alone is literal: */\n  TEST_STRINGMATCH(\"[\\\\]a]\", \"]\", 1, 1);\n  TEST_STRINGMATCH(\"[\\\\]a]\", \"a\", 1, 1);\n\n  /* Escapes at range end: */\n  TEST_STRINGMATCH(\"[+-\\\\\\\\]\", \",\", 1, 1); /* ASCII range + to \\ includes , */\n  TEST_STRINGMATCH(\"[+-\\\\\\\\]\", \"*\", 0, 0); /*   but not * (below) */\n  TEST_STRINGMATCH(\"[+-\\\\]]\", \"*\", 0, 0);  /*   but not * (below) */\n  TEST_STRINGMATCH(\"[+-\\\\]]\", \"^\", 0, 0);  /*   or ^ (above) */\n\n  /* Unclosed is the same: */\n  TEST_STRINGMATCH(\"[+-\\\\\\\\\", \",\", 1, 1);\n  TEST_STRINGMATCH(\"[+-\\\\\\\\\", \"*\", 0, 0);\n  TEST_STRINGMATCH(\"[+-\\\\\\\\\", \"]\", 0, 0);\n  TEST_STRINGMATCH(\"[+-\\\\]\", \",\", 1, 1);\n  TEST_STRINGMATCH(\"[+-\\\\]\", \"*\", 0, 0);\n  TEST_STRINGMATCH(\"[+-\\\\]\", \"^\", 0, 0);\n  /* An incomplete escape is treated as literal backslash: */\n  TEST_STRINGMATCH(\"[+-\\\\\", \",\", 1, 1);\n  TEST_STRINGMATCH(\"[+-\\\\\", \"*\", 0, 0);\n  TEST_STRINGMATCH(\"[+-\\\\\", \"]\", 0, 0);\n\n  /* Empty character class matches nothing: */\n  TEST_STRINGMATCH(\"[]\", \"\", 0, 0);\n  TEST_STRINGMATCH(\"[]\", \"a\", 0, 0);\n  TEST_STRINGMATCH(\"[\", \"\", 0, 0); /* Unclosed is the same */\n  TEST_STRINGMATCH(\"[\", \"a\", 0, 0);\n\n  /* Empty negated character class is equivalent to pattern \"?\": */\n  TEST_STRINGMATCH(\"[^]\", \"\", 0, 0);\n  TEST_STRINGMATCH(\"[^]\", \"a\", 1, 1);\n  TEST_STRINGMATCH(\"[^]\", \"ab\", 0, 0);\n  TEST_STRINGMATCH(\"[^\", \"\", 0, 0); /* Unclosed is the same */\n  TEST_STRINGMATCH(\"[^\", \"a\", 1, 1);\n  TEST_STRINGMATCH(\"[^\", \"ab\", 0, 0);\n\n  /* Unclosed character classes are not an error (undocumented): */\n  TEST_STRINGMATCH(\"[A-\", \"B\", 0, 0);\n}\n\nclass HuffCoderTest : public ::testing::Test {\n protected:\n  HuffmanEncoder encoder_;\n  HuffmanDecoder decoder_;\n  string error_msg_;\n  const string_view good_table_{\n      \"\\x1b\\x10\\xd8\\n\\n\\x19\\xc6\\x0c\\xc3\\x30\\x0c\\x43\\x1e\\x93\\xe4\\x11roB\\xf6\\xde\\xbb\\x18V\\xc2Zk\\x03\"sv};\n};\n\nTEST_F(HuffCoderTest, Load) {\n  string data(\"bad\");\n\n  ASSERT_FALSE(encoder_.Load(data, &error_msg_));\n\n  data = good_table_;\n  ASSERT_TRUE(encoder_.Load(data, &error_msg_)) << error_msg_;\n\n  data.append(\"foo\");\n  encoder_.Reset();\n  ASSERT_FALSE(encoder_.Load(data, &error_msg_));\n}\n\nTEST_F(HuffCoderTest, Encode) {\n  ASSERT_TRUE(encoder_.Load(good_table_, &error_msg_)) << error_msg_;\n\n  EXPECT_EQ(1, encoder_.GetNBits('x'));\n  EXPECT_EQ(3, encoder_.GetNBits(':'));\n  EXPECT_EQ(5, encoder_.GetNBits('2'));\n  EXPECT_EQ(5, encoder_.GetNBits('3'));\n\n  string data(\"x:23xx\");\n\n  array<uint8_t, 100> dest;\n  uint32_t dest_size = dest.size();\n  ASSERT_TRUE(encoder_.Encode(data, dest.data(), &dest_size, &error_msg_));\n  ASSERT_EQ(3, dest_size);\n\n  // testing small destination buffer.\n  data = \"3333333333333333333\";\n  dest_size = 16;\n  EXPECT_TRUE(encoder_.Encode(data, dest.data(), &dest_size, &error_msg_));\n\n  // destination too small\n  ASSERT_EQ(0, dest_size);\n  ASSERT_EQ(\"\", error_msg_);\n}\n\nTEST_F(HuffCoderTest, Decode) {\n  array<unsigned, 256> hist;\n  hist.fill(1);\n  hist['a'] = 100;\n  hist['b'] = 50;\n\n  ASSERT_TRUE(encoder_.Build(hist.data(), hist.size() - 1, &error_msg_));\n  string data(\"aab\");\n\n  array<uint8_t, 100> encoded{0};\n  uint32_t encoded_size = encoded.size();\n  ASSERT_TRUE(encoder_.Encode(data, encoded.data(), &encoded_size, &error_msg_));\n  ASSERT_EQ(1, encoded_size);\n\n  EXPECT_EQ(2, encoder_.GetNBits('a'));\n  EXPECT_EQ(3, encoder_.GetNBits('b'));\n\n  string bindata = encoder_.Export();\n  ASSERT_TRUE(decoder_.Load(bindata, &error_msg_)) << error_msg_;\n\n  const char* src_ptr = reinterpret_cast<const char*>(encoded.data());\n  array<char, 100> decode_dest{0};\n  size_t decoded_size = data.size();\n  ASSERT_TRUE(decoder_.Decode({src_ptr, encoded_size}, decoded_size, decode_dest.data()));\n  ASSERT_EQ(\"aab\", string_view(decode_dest.data(), decoded_size));\n}\n\nTEST_F(HuffCoderTest, HugeHistogram) {\n  array<unsigned, 256> hist{\n      1,         1,         1,         1,         1,         1,         1,         1,\n      5,         26,        543,       1,         1,         1,         1,         1,\n      4,         1,         1,         1,         1,         1,         1,         1,\n      1,         1,         1,         1,         1,         1,         1,         1,\n      114012534, 12081,     13038,     1596,      1334,      83320,     706165,    475568,\n      2779,      2548,      998,       29249967,  53961,     13175485,  99000,     69726435,\n      69422967,  182172009, 123544533, 76493373,  96341977,  64601914,  48105392,  60215630,\n      69253599,  48811529,  818580990, 1226,      69,        922,       140,       720,\n      230,       333714212, 95995178,  65692203,  50995122,  52156728,  44187793,  32988519,\n      46978428,  49648957,  43769567,  68958857,  56765240,  80721594,  51577447,  70298692,\n      56957407,  93372706,  47400672,  70912347,  78241282,  49291723,  69807896,  48372387,\n      39312015,  58020704,  60084247,  1378,      2471,      1584,      14,        37880886,\n      117,       184273430, 80952783,  135676228, 101229664, 230479318, 70652028,  137836653,\n      70943805,  154072333, 29316298,  58302725,  109445030, 117306062, 129270567, 166048852,\n      103000639, 54174517,  174819705, 166323524, 124543976, 80215452,  49650895,  101281709,\n      49817574,  56668585,  50459552,  273352049, 166,       273352009, 16,        1,\n      57668,     1724,      1886,      3668,      3960,      1963,      1124,      945,\n      1836,      1882,      1709,      2389,      921,       2154,      1020,      1792,\n      3747,      6750,      1318,      3100,      4506,      1175,      1514,      1430,\n      3474,      44548,     3179,      1149,      2410,      9689,      727,       2348,\n      2148,      1785,      5025,      1040,      3246,      1699,      505,       1034,\n      9995,      24776,     3345,      1897,      1019,      1614,      35349,     988,\n      2469,      5759,      2043,      7976,      1229,      896,       2692,      962,\n      3341,      2490,      2648,      1162,      4812,      8404,      949,       3132,\n      1,         1,         34754,     58694,     3400,      561,       6,         5,\n      3,         47,        41,        19,        292,       24,        17,        12,\n      626,       382,       6,         1,         1,         9,         1,         433,\n      879,       743,       7,         9,         1,         1,         1,         60,\n      746,       224,       54115,     4566,      5463,      10917,     5446,      7960,\n      5382,      2204,      281,       649,       761,       188,       1,         2630,\n      6680,      1,         1,         1,         1,         1,         1,         1,\n      1,         1,         1,         1,         1,         1,         1,         1};\n\n  // for huge values we need to scale down the histogram because the Huffman algorithm\n  // implementation crashes otherwise.\n  // The bug is in the following code in huf_compress.c:\n  // huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */\n  // where it uses the count as a sentinel assuming that no other counts can be larger than 2^31.\n  // this may not be true for histograms with huge counts, so we need to make sure that sum of all\n  // counts is smaller than 2^31.\n  uint64_t sum = 0;\n  for (unsigned i = 0; i < hist.size(); ++i) {\n    sum += hist[i];\n    hist[i] /= 4;  // Without this the algorithm causes a data race and crash.\n  }\n  LOG(INFO) << \"Total sum: \" << sum << \" reduced sum: \" << sum / 4;\n  ASSERT_TRUE(encoder_.Build(hist.data(), hist.size() - 1, &error_msg_)) << error_msg_;\n\n  string bindata = encoder_.Export();\n  encoder_.Reset();\n  ASSERT_TRUE(encoder_.Load(bindata, &error_msg_)) << error_msg_;\n}\n\nusing benchmark::DoNotOptimize;\n\n// Parse Double benchmarks\nstatic void BM_ParseFastFloat(benchmark::State& state) {\n  std::vector<std::string> args(100);\n  std::random_device rd;\n\n  for (auto& arg : args) {\n    arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));\n  }\n  double res;\n  while (state.KeepRunning()) {\n    for (const auto& arg : args) {\n      fast_float::from_chars(arg.data(), arg.data() + arg.size(), res);\n    }\n  }\n}\nBENCHMARK(BM_ParseFastFloat);\n\nstatic void BM_ParseDoubleAbsl(benchmark::State& state) {\n  std::vector<std::string> args(100);\n\n  for (auto& arg : args) {\n    arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));\n  }\n\n  double res;\n  while (state.KeepRunning()) {\n    for (const auto& arg : args) {\n      absl::from_chars(arg.data(), arg.data() + arg.size(), res);\n    }\n  }\n}\nBENCHMARK(BM_ParseDoubleAbsl);\n\ntemplate <clockid_t cid> void BM_ClockType(benchmark::State& state) {\n  timespec ts;\n  while (state.KeepRunning()) {\n    DoNotOptimize(clock_gettime(cid, &ts));\n  }\n}\n\nBENCHMARK_TEMPLATE(BM_ClockType, CLOCK_REALTIME);\nBENCHMARK_TEMPLATE(BM_ClockType, CLOCK_MONOTONIC);\nBENCHMARK_TEMPLATE(BM_ClockType, CLOCK_PROCESS_CPUTIME_ID);\nBENCHMARK_TEMPLATE(BM_ClockType, CLOCK_THREAD_CPUTIME_ID);\n\n// These clocks are not available on apple platform\n#if !defined(__APPLE__)\nBENCHMARK_TEMPLATE(BM_ClockType, CLOCK_REALTIME_COARSE);\nBENCHMARK_TEMPLATE(BM_ClockType, CLOCK_MONOTONIC_COARSE);\nBENCHMARK_TEMPLATE(BM_ClockType, CLOCK_BOOTTIME);\nBENCHMARK_TEMPLATE(BM_ClockType, CLOCK_BOOTTIME_ALARM);\n#endif\n\nstatic void BM_MatchGlob(benchmark::State& state) {\n  string random_val = GetRandomHex(state.range(0));\n  GlobMatcher matcher(\"*foobar*\", true);\n  while (state.KeepRunning()) {\n    DoNotOptimize(matcher.Matches(random_val));\n  }\n}\nBENCHMARK(BM_MatchGlob)->Arg(32)->Arg(1000)->Arg(10000);\n\nstatic void BM_MatchGlob2(benchmark::State& state) {\n  string random_val = GetRandomHex(state.range(0));\n  GlobMatcher matcher(\"bull:*:meta\", true);\n  while (state.KeepRunning()) {\n    DoNotOptimize(matcher.Matches(random_val));\n  }\n}\nBENCHMARK(BM_MatchGlob2)->Arg(32)->Arg(1000)->Arg(10000);\n\n// See https://nvd.nist.gov/vuln/detail/cve-2022-36021\nstatic void BM_MatchGlobExp(benchmark::State& state) {\n  GlobMatcher matcher(\"a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*b\", true);\n  while (state.KeepRunning()) {\n    DoNotOptimize(matcher.Matches(\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"));\n  }\n}\nBENCHMARK(BM_MatchGlobExp);\n\nstatic void BM_MatchFindSubstr(benchmark::State& state) {\n  string random_val = GetRandomHex(state.range(0));\n\n  while (state.KeepRunning()) {\n    DoNotOptimize(random_val.find(\"foobar\"));\n  }\n}\nBENCHMARK(BM_MatchFindSubstr)->Arg(1000)->Arg(10000);\n\nstatic void BM_MatchReflexFind(benchmark::State& state) {\n  string random_val = GetRandomHex(state.range(0));\n  reflex::Matcher matcher(\"foobar\");\n  while (state.KeepRunning()) {\n    matcher.input(random_val);\n    DoNotOptimize(matcher.find());\n  }\n}\nBENCHMARK(BM_MatchReflexFind)->Arg(1000)->Arg(10000);\n\nstatic void BM_MatchReflexFindStar(benchmark::State& state) {\n  string random_val = GetRandomHex(state.range(0));\n  reflex::Matcher matcher(\".*foobar\");\n\n  while (state.KeepRunning()) {\n    matcher.input(random_val);\n    DoNotOptimize(matcher.find());\n  }\n}\nBENCHMARK(BM_MatchReflexFindStar)->Arg(1000)->Arg(10000);\n\nstatic void BM_MatchStd(benchmark::State& state) {\n  string random_val = GetRandomHex(state.range(0));\n  std::regex regex(\".*foobar\");\n  std::match_results<std::string::const_iterator> results;\n  while (state.KeepRunning()) {\n    std::regex_match(random_val, results, regex);\n  }\n}\nBENCHMARK(BM_MatchStd)->Arg(1000)->Arg(10000);\n\nstatic void BM_MatchRedisGlob(benchmark::State& state) {\n  string random_val = GetRandomHex(state.range(0));\n  const char* pattern = \"*foobar*\";\n  while (state.KeepRunning()) {\n    DoNotOptimize(\n        stringmatchlen(pattern, strlen(pattern), random_val.c_str(), random_val.size(), 0));\n  }\n}\nBENCHMARK(BM_MatchRedisGlob)->Arg(1000)->Arg(10000);\n\nstatic void BM_MatchRedisGlob2(benchmark::State& state) {\n  string random_val = GetRandomHex(state.range(0));\n  const char* pattern = \"bull:*:meta\";\n  while (state.KeepRunning()) {\n    DoNotOptimize(\n        stringmatchlen(pattern, strlen(pattern), random_val.c_str(), random_val.size(), 0));\n  }\n}\nBENCHMARK(BM_MatchRedisGlob2)->Arg(32)->Arg(1000)->Arg(10000);\n\nstatic void BM_MatchData(benchmark::State& state) {\n  vector<string> keys(5000);\n  for (unsigned i = 0; i < keys.size(); ++i) {\n    keys[i] = GetRandomHex(80);\n  }\n  string_view pattern =\n      \"*2addb1c3-eae5-5265-ac8e-9fc9106dda8d*77de68daecd823babbb58edb1c8e14d7106e83bb\"sv;\n  if (state.range(0) == 1) {\n    GlobMatcher matcher(pattern, true);\n    while (state.KeepRunning()) {\n      for (const auto& key : keys) {\n        DoNotOptimize(matcher.Matches(key));\n      }\n    }\n  } else {\n    while (state.KeepRunning()) {\n      for (const auto& key : keys) {\n        DoNotOptimize(stringmatchlen(pattern.data(), pattern.size(), key.c_str(), key.size(), 0));\n      }\n    }\n  }\n}\nBENCHMARK(BM_MatchData)->ArgName(\"algo\")->Arg(0)->Arg(1);\n\n#ifdef USE_RE2\nstatic void BM_MatchRe2(benchmark::State& state) {\n  string random_val = GetRandomHex(state.range(0));\n  re2::RE2 re(\".*foobar.*\", re2::RE2::Latin1);\n  CHECK(re.ok());\n\n  while (state.KeepRunning()) {\n    DoNotOptimize(re2::RE2::FullMatch(random_val, re));\n  }\n}\nBENCHMARK(BM_MatchRe2)->Arg(1000)->Arg(10000);\n#endif\n\n#ifdef USE_PCRE2\n\npair<pcre2_code*, pcre2_match_data*> create_pcre2(const char* pattern) {\n  int errnum;\n  PCRE2_SIZE erroffset;\n  pcre2_code* re =\n      pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED, 0, &errnum, &erroffset, nullptr);\n  CHECK(re);\n  CHECK_EQ(0, pcre2_jit_compile(re, PCRE2_JIT_COMPLETE));\n\n  pcre2_match_data* match_data = pcre2_match_data_create_from_pattern(re, NULL);\n  return {re, match_data};\n}\n\nint pcre2_do_match(string_view str, pcre2_code* re, pcre2_match_data* match_data) {\n  int rc = pcre2_jit_match(re, (PCRE2_SPTR)str.data(), str.size(), 0,\n                           PCRE2_ANCHORED | PCRE2_ENDANCHORED, match_data, NULL);\n  return rc;\n}\n\nstatic void BM_MatchPcre2Jit(benchmark::State& state) {\n  string random_val = GetRandomHex(state.range(0));\n  auto [re, match_data] = create_pcre2(\".*foobar.*\");\n  const char sample[] = \"aaaaaaaaaaaaafoobar\";\n  int rc = pcre2_do_match(sample, re, match_data);\n  CHECK_EQ(1, rc);\n\n  while (state.KeepRunning()) {\n    rc = pcre2_do_match(random_val, re, match_data);\n    CHECK_EQ(PCRE2_ERROR_NOMATCH, rc);\n  }\n  pcre2_match_data_free(match_data);\n  pcre2_code_free(re);\n}\nBENCHMARK(BM_MatchPcre2Jit)->Arg(32)->Arg(1000)->Arg(10000);\n\nstatic void BM_MatchPcre2Jit2(benchmark::State& state) {\n  string random_val = GetRandomHex(state.range(0));\n  auto [re, match_data] = create_pcre2(\"foo.*bar\");\n\n  while (state.KeepRunning()) {\n    int rc = pcre2_do_match(random_val, re, match_data);\n    CHECK_EQ(PCRE2_ERROR_NOMATCH, rc);\n  }\n  pcre2_match_data_free(match_data);\n  pcre2_code_free(re);\n}\nBENCHMARK(BM_MatchPcre2Jit2)->Arg(32)->Arg(1000)->Arg(10000);\n\nstatic void BM_MatchPcre2JitExp(benchmark::State& state) {\n  string exponent_pattern = \"a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*b\";\n  string str = \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\";\n  auto [re, match_data] = create_pcre2(exponent_pattern.c_str());\n  while (state.KeepRunning()) {\n    int rc = pcre2_do_match(str, re, match_data);\n    CHECK_EQ(PCRE2_ERROR_NOMATCH, rc);\n  }\n  pcre2_match_data_free(match_data);\n  pcre2_code_free(re);\n}\nBENCHMARK(BM_MatchPcre2JitExp);\n\n#endif\n\nstatic void BM_MatchGlobSlow(benchmark::State& state) {\n  GlobMatcher matcher(\"a*a*a*a*a*.pt\", false);\n  while (state.KeepRunning()) {\n    DoNotOptimize(GlobMatcher(\"a*a*a*a*a*.pt\", false));\n  }\n}\nBENCHMARK(BM_MatchGlobSlow);\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/dict_builder.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/dict_builder.h\"\n\n#include <algorithm>\n#include <bit>\n#include <cmath>\n#include <cstring>\n#include <memory>\n#include <vector>\n\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nusing namespace std;\nnamespace {\n\nconstexpr unsigned kDmerLength = 6;\n\n// Fast hash for 6-byte d-mers. Uses a simple multiplicative hash.\ninline uint32_t HashDmer(const uint8_t* data) {\n  uint64_t val = 0;\n  memcpy(&val, data, 6);\n\n  // ZSTD_hash6 algorithm\n  constexpr uint64_t kPrime6Bytes = 227718039650203ULL;\n  uint64_t hash64 = ((val << 16) * kPrime6Bytes) >> 32;\n  return static_cast<uint32_t>(hash64);\n}\n\nconstexpr unsigned kRegisterLen = 1024;\nconstexpr uint32_t kRegisterMask = kRegisterLen - 1;\nconstexpr unsigned kRegisterBits = 10;\nconstexpr unsigned kRankBits = 32 - kRegisterBits;\n\ninline void UpdateHllRegister(uint32_t h, uint8_t* registers) {\n  uint32_t index = h & kRegisterMask;\n  // Use upper bits for rank calculation, ensuring it's never zero\n  uint32_t w = (h >> kRegisterBits) | (1u << kRankBits);\n  uint8_t rank = countr_zero(w) + 1;\n  registers[index] = std::max(registers[index], rank);\n}\n\ndouble EstimateHllCardinality(const uint8_t* registers) {\n  double sum = 0.0;\n  int zero_registers = 0;\n  for (unsigned i = 0; i < kRegisterLen; ++i) {\n    if (registers[i] == 0) {\n      zero_registers++;\n    }\n    sum += 1.0 / (1 << registers[i]);\n  }\n\n  // alpha_m * m^2 where m = kRegisterLen\n  // Constants from original HyperLogLog paper (Flajolet et al.)\n  constexpr double kAlphaInf = 0.7213;\n  constexpr double kAlphaCorrection = 1.079;\n  constexpr double kM = static_cast<double>(kRegisterLen);\n  constexpr double kAlphaM2 = (kAlphaInf / (1.0 + kAlphaCorrection / kM)) * (kM * kM);\n  double estimate = kAlphaM2 / sum;\n\n  // Small range correction\n  constexpr double kSmallRangeThreshold = 2.5 * kM;\n  if (estimate <= kSmallRangeThreshold && zero_registers > 0) {\n    estimate = kM * std::log(kM / zero_registers);\n  }\n  return estimate;\n}\n\nuint32_t CalculateFreqTableSize(absl::Span<const std::pair<const uint8_t*, size_t>> data_pieces) {\n  size_t total_input_size = 0;\n  for (const auto& [data, sz] : data_pieces) {\n    total_input_size += sz;\n  }\n  size_t target_size = std::max<size_t>(1024, total_input_size);\n  return std::bit_ceil(static_cast<uint32_t>(std::min<size_t>(target_size, 1u << 24)));\n}\n\n// Scans all provided data pieces to compute a histogram of 6-byte sequence (d-mer) hashes.\nvoid PopulateFrequencyTable(absl::Span<const std::pair<const uint8_t*, size_t>> data_pieces,\n                            uint16_t* freq, uint32_t freq_table_mask) {\n  for (const auto& [data, sz] : data_pieces) {\n    if (sz < kDmerLength)\n      continue;\n\n    size_t limit = sz - kDmerLength + 1;\n    for (size_t i = 0; i < limit; ++i) {\n      uint32_t idx = HashDmer(data + i) & freq_table_mask;\n      if (freq[idx] < UINT16_MAX) {\n        ++freq[idx];\n      }\n    }\n  }\n}\n\nstruct BestSegmentResult {\n  std::pair<const uint8_t*, size_t> data_piece{nullptr, 0};\n  uint64_t score = 0;\n};\n\n// Iterates across all data pieces to find a contiguous byte window of `segment_size`\n// that maximizes the sum of previously computed sequence frequencies.\nBestSegmentResult FindBestSegment(absl::Span<const std::pair<const uint8_t*, size_t>> data_pieces,\n                                  size_t segment_size, const uint16_t* freq,\n                                  uint32_t freq_table_mask) {\n  BestSegmentResult best;\n\n  for (const auto& [data, sz] : data_pieces) {\n    if (sz < segment_size)\n      continue;\n\n    size_t window_dmers = segment_size - kDmerLength + 1;\n    uint64_t score = 0;\n\n    // Compute initial window score\n    for (size_t j = 0; j < window_dmers; ++j) {\n      score += freq[HashDmer(data + j) & freq_table_mask];\n    }\n\n    if (score > best.score) {\n      best.score = score;\n      best.data_piece = {data, segment_size};\n    }\n\n    // Slide the window\n    size_t limit = sz - segment_size;\n    for (size_t i = 1; i <= limit; ++i) {\n      score -= freq[HashDmer(data + i - 1) & freq_table_mask];\n      score += freq[HashDmer(data + i + window_dmers - 1) & freq_table_mask];\n\n      if (score > best.score) {\n        best.score = score;\n        best.data_piece = {data + i, segment_size};\n      }\n    }\n  }\n\n  return best;\n}\n\nvoid ZeroOutFrequencies(std::pair<const uint8_t*, size_t> data_piece, uint16_t* freq,\n                        uint32_t freq_table_mask) {\n  if (data_piece.second < kDmerLength)\n    return;\n  size_t seg_dmers = data_piece.second - kDmerLength + 1;\n  for (size_t j = 0; j < seg_dmers; ++j) {\n    freq[HashDmer(data_piece.first + j) & freq_table_mask] = 0;\n  }\n}\n\n}  // namespace\n\n// Estimates dictionary compressibility by observing the cardinality\n// of unique 6-byte substrings via a simplified internal HyperLogLog.\ndouble EstimateCompressibility(absl::Span<const std::pair<const uint8_t*, size_t>> data_pieces,\n                               unsigned step) {\n  DCHECK_GT(step, 0u);\n\n  unique_ptr<uint8_t[]> registers(new uint8_t[kRegisterLen]());\n  uint64_t total_dmers = 0;\n\n  for (const auto& [data, sz] : data_pieces) {\n    if (sz < kDmerLength)\n      continue;\n    size_t limit = sz - kDmerLength + 1;\n    for (size_t i = 0; i < limit; i += step) {\n      UpdateHllRegister(HashDmer(data + i), registers.get());\n      ++total_dmers;\n    }\n  }\n\n  if (total_dmers == 0) {\n    return 1.0;  // No d-mers - we consider it incompressible\n  }\n\n  double estimate = EstimateHllCardinality(registers.get());\n  double ratio = estimate / static_cast<double>(total_dmers);\n  return std::min(ratio, 1.0);\n}\n\n// Trains a dictionary using FastCover-style iterative segment selection.\n// 1. Builds a frequency table of 6-byte d-mer hashes.\n// 2. For each data piece (epoch), selects the segment of segment_size bytes\n//    that maximizes the sum of d-mer frequencies.\n// 3. Appends selected segment to dictionary, zeros out its d-mer frequencies.\n// Returns raw dictionary bytes of approximately dict_size.\nstring TrainDictionary(absl::Span<const pair<const uint8_t*, size_t>> data_pieces, size_t dict_size,\n                       size_t segment_size) {\n  DCHECK_GT(dict_size, 0u);\n  DCHECK_GT(segment_size, kDmerLength);\n\n  uint32_t freq_table_size = CalculateFreqTableSize(data_pieces);\n  uint32_t freq_table_mask = freq_table_size - 1;\n\n  unique_ptr<uint16_t[]> freq(new uint16_t[freq_table_size]());\n  PopulateFrequencyTable(data_pieces, freq.get(), freq_table_mask);\n\n  std::string dictionary;\n  dictionary.reserve(dict_size);\n\n  while (dictionary.size() < dict_size) {\n    auto best = FindBestSegment(data_pieces, segment_size, freq.get(), freq_table_mask);\n\n    if (!best.data_piece.first || best.score == 0) {\n      break;  // No useful segments left.\n    }\n\n    size_t append_size = std::min(best.data_piece.second, dict_size - dictionary.size());\n    dictionary.append(reinterpret_cast<const char*>(best.data_piece.first), append_size);\n\n    ZeroOutFrequencies(best.data_piece, freq.get(), freq_table_mask);\n  }\n\n  return dictionary;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/dict_builder.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/types/span.h>\n\n#include <cstddef>\n#include <cstdint>\n#include <string>\n#include <utility>\n\nnamespace dfly {\n\n// Estimates compressibility by counting unique 6-byte d-mers using HyperLogLog.\n// data_pieces: spans of raw data (e.g., one per QList node).\n// step: sampling stride (1 = every offset, higher = faster but less accurate).\n// Returns a value in [0, 1] where 0 means very compressible, and 1 means incompressible.\ndouble EstimateCompressibility(absl::Span<const std::pair<const uint8_t*, size_t>> data_pieces,\n                               unsigned step);\n\n// Trains a compression dictionary from a collection of sample data.\n//\n// Arguments:\n//   data_pieces:  Input data sources (spans of bytes) to extract dictionary segments from.\n//   dict_size:    The maximum target size of the resulting dictionary in bytes.\n//   segment_size: The size of continuous byte segments chosen and appended per iteration.\n//\n// Returns a raw string containing the trained dictionary up to `dict_size` bytes.\nstd::string TrainDictionary(absl::Span<const std::pair<const uint8_t*, size_t>> data_pieces,\n                            size_t dict_size = 4096, size_t segment_size = 256);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/dict_builder_test.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/dict_builder.h\"\n\n#include <gmock/gmock.h>\n#include <zstd.h>\n\n#include <random>\n#include <string>\n#include <vector>\n\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nclass DictBuilderTest : public ::testing::Test {\n protected:\n  using DataPiece = pair<const uint8_t*, size_t>;\n\n  // Generate Celery-like JSON entries with small variations.\n  vector<string> GenerateCeleryEntries(unsigned count) {\n    vector<string> entries;\n    entries.reserve(count);\n    for (unsigned i = 0; i < count; ++i) {\n      string id = to_string(100000 + i);\n      string entry =\n          \"{\\\"body\\\": \\\"W10=\\\", \\\"content-encoding\\\": \\\"utf-8\\\", \"\n          \"\\\"content-type\\\": \\\"application/json\\\", \"\n          \"\\\"headers\\\": {\\\"lang\\\": \\\"py\\\", \\\"task\\\": \\\"process_job\\\", \"\n          \"\\\"id\\\": \\\"b3e4b923-8a77-4053-aff0-\" +\n          id +\n          \"\\\", \\\"shadow\\\": null, \\\"eta\\\": null, \"\n          \"\\\"expires\\\": null, \\\"group\\\": null, \\\"retries\\\": 0, \"\n          \"\\\"timelimit\\\": [null, null], \"\n          \"\\\"root_id\\\": \\\"b3e4b923-8a77-4053-aff0-\" +\n          id +\n          \"\\\", \\\"parent_id\\\": null, \"\n          \"\\\"argsrepr\\\": \\\"('job\" +\n          to_string(i) +\n          \"',)\\\", \\\"kwargsrepr\\\": \\\"{}\\\", \"\n          \"\\\"origin\\\": \\\"gen917779@hut\\\"}, \"\n          \"\\\"properties\\\": {\\\"correlation_id\\\": \\\"b3e4b923\\\", \"\n          \"\\\"reply_to\\\": \\\"9933040c\\\", \\\"delivery_mode\\\": 2, \"\n          \"\\\"delivery_info\\\": {\\\"exchange\\\": \\\"\\\", \\\"routing_key\\\": \\\"my_queue\\\"}, \"\n          \"\\\"priority\\\": 0}}\";\n      entries.push_back(std::move(entry));\n    }\n    return entries;\n  }\n\n  vector<DataPiece> ToPieces(const vector<string>& entries) {\n    vector<DataPiece> pieces;\n    pieces.reserve(entries.size());\n    for (const auto& e : entries) {\n      pieces.emplace_back(reinterpret_cast<const uint8_t*>(e.data()), e.size());\n    }\n    return pieces;\n  }\n\n  // Generate random binary data.\n  vector<string> GenerateRandomEntries(unsigned count, size_t entry_size) {\n    vector<string> entries;\n    entries.reserve(count);\n    mt19937 rng(42);\n    for (unsigned i = 0; i < count; ++i) {\n      string entry(entry_size, '\\0');\n      for (auto& c : entry) {\n        c = static_cast<char>(rng() & 0xFF);\n      }\n      entries.push_back(std::move(entry));\n    }\n    return entries;\n  }\n};\n\nTEST_F(DictBuilderTest, RepetitiveDataIsCompressible) {\n  auto entries = GenerateCeleryEntries(200);\n  auto pieces = ToPieces(entries);\n\n  double ratio = EstimateCompressibility(pieces, 1);\n  LOG(INFO) << \"Celery data uniqueness ratio: \" << ratio;\n  EXPECT_LT(ratio, 0.5f);\n}\n\nTEST_F(DictBuilderTest, RandomDataIsIncompressible) {\n  auto entries = GenerateRandomEntries(200, 400);\n  auto pieces = ToPieces(entries);\n\n  double ratio = EstimateCompressibility(pieces, 1);\n  LOG(INFO) << \"Random data uniqueness ratio: \" << ratio;\n  EXPECT_FALSE(ratio < 0.85);\n}\n\nTEST_F(DictBuilderTest, TrainDictionaryProducesOutput) {\n  auto entries = GenerateCeleryEntries(200);\n  auto pieces = ToPieces(entries);\n\n  string dict = TrainDictionary(pieces, 4096, 256);\n  LOG(INFO) << \"Trained dictionary size: \" << dict.size() << \" bytes\";\n  EXPECT_GT(dict.size(), 0u);\n  EXPECT_LE(dict.size(), 4096u);\n}\n\nTEST_F(DictBuilderTest, TrainDictionaryEmptyForTinyData) {\n  // Single small entry - not enough for segment selection.\n  string tiny = \"hello\";\n  vector<DataPiece> pieces = {{reinterpret_cast<const uint8_t*>(tiny.data()), tiny.size()}};\n\n  string dict = TrainDictionary(pieces, 4096, 256);\n  EXPECT_TRUE(dict.empty());\n}\n\nTEST_F(DictBuilderTest, ZstdCompressionWithTrainedDict) {\n  auto entries = GenerateCeleryEntries(200);\n  auto pieces = ToPieces(entries);\n\n  string dict = TrainDictionary(pieces, 4096, 256);\n  ASSERT_GT(dict.size(), 0u);\n\n  // Create ZSTD CDict/DDict from trained dictionary.\n  ZSTD_CDict* cdict = ZSTD_createCDict(dict.data(), dict.size(), 1);\n  ASSERT_TRUE(cdict);\n  ZSTD_DDict* ddict = ZSTD_createDDict(dict.data(), dict.size());\n  ASSERT_TRUE(ddict);\n\n  ZSTD_CCtx* cctx = ZSTD_createCCtx();\n  ZSTD_DCtx* dctx = ZSTD_createDCtx();\n\n  size_t total_raw = 0;\n  size_t total_compressed_dict = 0;\n  size_t total_compressed_nodict = 0;\n\n  for (const auto& entry : entries) {\n    total_raw += entry.size();\n\n    // Compress with dictionary.\n    size_t bound = ZSTD_compressBound(entry.size());\n    string compressed(bound, '\\0');\n    size_t csz =\n        ZSTD_compress_usingCDict(cctx, compressed.data(), bound, entry.data(), entry.size(), cdict);\n    ASSERT_FALSE(ZSTD_isError(csz)) << ZSTD_getErrorName(csz);\n    compressed.resize(csz);\n    total_compressed_dict += csz;\n\n    // Compress without dictionary for comparison.\n    string compressed_nodict(bound, '\\0');\n    size_t csz_nodict =\n        ZSTD_compressCCtx(cctx, compressed_nodict.data(), bound, entry.data(), entry.size(), 1);\n    ASSERT_FALSE(ZSTD_isError(csz_nodict));\n    total_compressed_nodict += csz_nodict;\n\n    // Verify roundtrip.\n    string decompressed(entry.size(), '\\0');\n    size_t dsz = ZSTD_decompress_usingDDict(dctx, decompressed.data(), entry.size(),\n                                            compressed.data(), csz, ddict);\n    ASSERT_FALSE(ZSTD_isError(dsz)) << ZSTD_getErrorName(dsz);\n    ASSERT_EQ(dsz, entry.size());\n    EXPECT_EQ(decompressed, entry);\n  }\n\n  double ratio_dict = double(total_raw) / double(total_compressed_dict);\n  double ratio_nodict = double(total_raw) / double(total_compressed_nodict);\n  LOG(INFO) << \"Total raw: \" << total_raw << \" bytes\";\n  LOG(INFO) << \"With dict: \" << total_compressed_dict << \" bytes (ratio \" << ratio_dict << \"x)\";\n  LOG(INFO) << \"No dict:   \" << total_compressed_nodict << \" bytes (ratio \" << ratio_nodict << \"x)\";\n  LOG(INFO) << \"Dict advantage: \" << ratio_dict / ratio_nodict << \"x better\";\n\n  // Dictionary compression should be significantly better for repetitive data.\n  EXPECT_GT(ratio_dict, ratio_nodict);\n  EXPECT_GT(ratio_dict, 3.0f);  // Expect at least 3x compression with dict.\n\n  ZSTD_freeCCtx(cctx);\n  ZSTD_freeDCtx(dctx);\n  ZSTD_freeCDict(cdict);\n  ZSTD_freeDDict(ddict);\n}\n\nTEST_F(DictBuilderTest, StepParameterWorks) {\n  auto entries = GenerateCeleryEntries(200);\n  auto pieces = ToPieces(entries);\n\n  double step1_ratio = EstimateCompressibility(pieces, 1);\n  double step4_ratio = EstimateCompressibility(pieces, 4);\n\n  // Both should detect compressibility, though with slightly different ratios.\n  EXPECT_TRUE(step1_ratio < 0.85);\n  EXPECT_TRUE(step4_ratio < 0.85);\n  LOG(INFO) << \"Step=1 ratio: \" << step1_ratio << \", Step=4 ratio: \" << step4_ratio;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/dragonfly_core.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/base/macros.h>\n\n#include \"base/logging.h\"\n#include \"core/intent_lock.h\"\n\nnamespace dfly {\n\nconst char* IntentLock::ModeName(Mode m) {\n  switch (m) {\n    case IntentLock::SHARED:\n      return \"SHARED\";\n    case IntentLock::EXCLUSIVE:\n      return \"EXCLUSIVE\";\n  }\n\n  ABSL_UNREACHABLE();\n}\n\nvoid IntentLock::VerifyDebug() {\n  constexpr uint32_t kMsb = 1ULL << (sizeof(cnt_[0]) * 8 - 1);\n  DCHECK_EQ(0u, cnt_[0] & kMsb);\n  DCHECK_EQ(0u, cnt_[1] & kMsb);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/expire_period.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n\nnamespace dfly {\n\nclass ExpirePeriod {\n public:\n  static constexpr size_t kMaxGenId = 15;\n\n  ExpirePeriod() : val_(0), gen_(0), precision_(0) {\n    static_assert(sizeof(ExpirePeriod) == 8);  // TODO\n  }\n\n  explicit ExpirePeriod(uint64_t ms, unsigned gen = 0) : ExpirePeriod() {\n    Set(ms);\n  }\n\n  // always returns milliseconds value.\n  uint64_t duration_ms() const {\n    return precision_ ? uint64_t(val_) * 1000 : val_;\n  }\n\n  // generation id for the base of this duration.\n  // when we update the generation, we need to update the value as well according to this\n  // logic:\n  // new_val = (old_val + old_base) - new_base.\n  unsigned generation_id() const {\n    return gen_;\n  }\n\n  void Set(uint64_t ms);\n\n  bool is_second_precision() { return precision_ == 1;}\n\n private:\n  uint64_t val_ : 59;\n  uint64_t gen_ : 4;\n  uint64_t precision_ : 1;  // 0 - ms, 1 - sec.\n};\n\ninline void ExpirePeriod::Set(uint64_t ms) {\n  constexpr uint64_t kBarrier = (1ULL << 48);\n\n  if (ms < kBarrier) {\n    val_ = ms;\n    precision_ = 0;   // ms\n    return;\n  }\n\n  precision_ = 1;\n  if (ms < kBarrier << 10) {\n    ms = (ms + 500) / 1000;   // seconds\n  }\n  val_ = ms >= kBarrier ? kBarrier - 1 : ms;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/extent_tree.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/extent_tree.h\"\n\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\n// offset, len must be multiplies of 256MB.\nvoid ExtentTree::Add(size_t start, size_t len) {\n  DCHECK_GT(len, 0u);\n  DCHECK_EQ(len_extents_.size(), extents_.size());\n\n  auto it = extents_.lower_bound(start);\n  optional<size_t> prev_extent_key;\n\n  if (it != extents_.begin()) {\n    auto prev = it;\n    --prev;\n\n    DCHECK_LE(prev->second, start);\n    if (prev->second == start) {  // combine with the previous extent\n      size_t prev_len = prev->second - prev->first;\n      CHECK_EQ(1u, len_extents_.erase(pair{prev_len, prev->first}));\n      prev->second += len;\n      start = prev->first;\n      len += prev_len;\n      prev_extent_key = prev->first;\n    }\n  }\n\n  if (it != extents_.end()) {\n    DCHECK_GE(it->first, start + len);\n    if (start + len == it->first) {  // merge with the next extent\n      size_t it_len = it->second - it->first;\n      CHECK_EQ(1u, len_extents_.erase(pair{it_len, it->first}));\n      extents_.erase(it);\n      len += it_len;\n    }\n  }\n\n  len_extents_.emplace(len, start);\n  if (prev_extent_key) {\n    DCHECK(extents_.find(*prev_extent_key) != extents_.end());\n    extents_[*prev_extent_key] = start + len;\n  } else {\n    extents_.emplace(start, start + len);\n  }\n}\n\noptional<pair<size_t, size_t>> ExtentTree::GetRange(size_t len, size_t align) {\n  DCHECK_GT(align, 0u);\n  DCHECK_EQ(0u, align & (align - 1));\n  DCHECK_EQ(0u, len & (align - 1));\n\n  auto it = len_extents_.lower_bound(pair{len, 0});\n  if (it == len_extents_.end())\n    return nullopt;\n\n  size_t amask = align - 1;\n  size_t aligned_start = it->second;\n  size_t extent_end = it->first + it->second;\n\n  while (true) {\n    if ((aligned_start & amask) == 0)  // aligned\n      break;\n\n    // round up to the next aligned address\n    aligned_start = (aligned_start + amask) & (~amask);\n\n    if (aligned_start + len <= extent_end)  // check if we still inside the extent\n      break;\n    ++it;\n\n    if (it == len_extents_.end())\n      return nullopt;\n\n    aligned_start = it->second;\n    extent_end = it->first + it->second;\n  }\n\n  DCHECK_GE(aligned_start, it->second);\n\n  // if we are here - we found the range starting at aligned_start.\n  // now we need to possibly break the existing extent to several parts or completely\n  // delete it.\n  auto eit = extents_.find(it->second);\n  DCHECK(eit != extents_.end());\n  size_t range_end = aligned_start + len;\n\n  len_extents_.erase(it);\n\n  // we break the extent [eit->first, eit->second] to either 0, 1 or 2 intervals.\n  if (aligned_start > eit->first) {  // do we have prefix?\n    eit->second = aligned_start;\n    len_extents_.emplace(eit->second - eit->first, eit->first);\n  } else {\n    extents_.erase(eit);\n  }\n\n  if (range_end < extent_end) {  // do we have suffix?\n    extents_.emplace(range_end, extent_end);\n    len_extents_.emplace(extent_end - range_end, range_end);\n  }\n\n  DCHECK_EQ(range_end - aligned_start, len);\n\n  return pair{aligned_start, range_end};\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/extent_tree.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/btree_map.h>\n#include <absl/container/btree_set.h>\n\n#include <optional>\n\nnamespace dfly {\n\n// represents a tree of disjoint extents.\n// check-fails if overlapping ranges are added.\n// automatically handles union of the consequent ranges that are added to the tree.\nclass ExtentTree {\n public:\n  void Add(size_t start, size_t len);\n\n  // in case of success, returns (start, end) pair, where (end-start) >= len and\n  // start is aligned by align.\n  std::optional<std::pair<size_t, size_t>> GetRange(size_t len, size_t align);\n\n private:\n  absl::btree_map<size_t, size_t> extents_;                 // start -> end).\n  absl::btree_set<std::pair<size_t, size_t>> len_extents_;  // (length, start)\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/extent_tree_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/extent_tree.h\"\n\n#include <gmock/gmock.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nclass ExtentTreeTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n  }\n\n  static void TearDownTestSuite() {\n  }\n\n  ExtentTree tree_;\n};\n\nTEST_F(ExtentTreeTest, Basic) {\n  tree_.Add(0, 256);\n  auto op = tree_.GetRange(64, 16);\n  EXPECT_TRUE(op);\n  EXPECT_THAT(*op, testing::Pair(0, 64));  // [64, 256)\n\n  tree_.Add(56, 8);\n  op = tree_.GetRange(64, 16);\n  EXPECT_TRUE(op);\n  EXPECT_THAT(*op, testing::Pair(64, 128));  // {[56, 64), [128, 256)}\n\n  op = tree_.GetRange(18, 2);\n  EXPECT_TRUE(op);\n  EXPECT_THAT(*op, testing::Pair(128, 146));  // {[56, 64), [146, 256)}\n\n  op = tree_.GetRange(80, 16);\n  EXPECT_TRUE(op);\n  EXPECT_THAT(*op, testing::Pair(160, 240));  // {[56, 64), [146, 160), [240, 256)}\n\n  op = tree_.GetRange(4, 1);\n  EXPECT_TRUE(op);\n  EXPECT_THAT(*op, testing::Pair(56, 60));  // {[60, 64), [146, 160), [240, 256)}\n\n  op = tree_.GetRange(32, 1);\n  EXPECT_FALSE(op);\n  tree_.Add(64, 146 - 64);\n  op = tree_.GetRange(32, 4);\n  EXPECT_TRUE(op);\n  EXPECT_THAT(*op, testing::Pair(60, 92));\n}\n\nTEST_F(ExtentTreeTest, Union) {\n  tree_.Add(0, 16);\n  tree_.Add(16, 16);\n  auto range = tree_.GetRange(32, 1);\n  ASSERT_TRUE(range);\n  EXPECT_THAT(*range, testing::Pair(0, 32));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/flatbuffers.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#ifndef __USE_GNU  // needed to flatbuffers to compile with musl libc.\n#define FLATBUFFERS_LOCALE_INDEPENDENT 0\n#endif\n\n#include <flatbuffers/flatbuffers.h>\n#include <flatbuffers/flexbuffers.h>\n#include <flatbuffers/idl.h>\n\nnamespace dfly {\nusing FlatJson = flexbuffers::Reference;\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/flatbuffers_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/flatbuffers.h\"\n\n#include <absl/strings/escaping.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nusing namespace std;\n\nnamespace dfly {\nclass FlatBuffersTest : public ::testing::Test {\n protected:\n};\n\nTEST_F(FlatBuffersTest, Basic) {\n  flexbuffers::Builder fbb;\n  fbb.Map([&] {\n    fbb.String(\"foo\", \"bar\");\n    fbb.Double(\"bar\", 1.5);\n    fbb.Vector(\"strs\", [&] {\n      fbb.String(\"hello\");\n      fbb.String(\"world\");\n    });\n  });\n\n  fbb.Finish();\n  auto buffer = fbb.GetBuffer();\n  flexbuffers::Reference ref = flexbuffers::GetRoot(buffer);\n  auto map = ref.AsMap();\n  EXPECT_EQ(\"bar\", map[\"foo\"].AsString().str());\n}\n\nTEST_F(FlatBuffersTest, FlexiParser) {\n  flatbuffers::Parser parser;\n  const char* json = R\"(\n    {\n      \"foo\": \"bar\",\n      \"bar\": 1.5,\n      \"strs\": [\"hello\", \"world\"]\n    }\n  )\";\n  flexbuffers::Builder fbb;\n  ASSERT_TRUE(parser.ParseFlexBuffer(json, nullptr, &fbb));\n  fbb.Finish();\n  const auto& buffer = fbb.GetBuffer();\n  string_view buf_view{reinterpret_cast<const char*>(buffer.data()), buffer.size()};\n  LOG(INFO) << \"Binary buffer: \" << absl::CHexEscape(buf_view);\n  flexbuffers::Reference root = flexbuffers::GetRoot(buffer);\n  auto map = root.AsMap();\n  EXPECT_EQ(\"bar\", map[\"foo\"].AsString().str());\n}\n\nTEST_F(FlatBuffersTest, ParseJson) {\n  const char* schema = R\"(\n    namespace dfly;\n    table Foo {\n      foo: string;\n      bar: double;\n      strs: [string];\n    }\n    root_type Foo;\n  )\";\n\n  flatbuffers::Parser parser;\n  ASSERT_TRUE(parser.Parse(schema));\n  parser.Serialize();\n  flatbuffers::DetachedBuffer bsb = parser.builder_.Release();\n\n  // This schema will always reference bsb.\n  auto* fbs_schema = reflection::GetSchema(bsb.data());\n\n  flatbuffers::Verifier verifier(bsb.data(), bsb.size());\n  ASSERT_TRUE(fbs_schema->Verify(verifier));\n\n  auto* root_table = fbs_schema->root_table();\n  auto* fields = root_table->fields();\n  auto* field_foo = fields->LookupByKey(\"foo\");\n  ASSERT_EQ(field_foo->type()->base_type(), reflection::String);\n\n  const char* json = R\"(\n    {\n      \"foo\": \"value\",\n      \"bar\": 1.5,\n      \"strs\": [\"hello\", \"world\"]\n    }\n  )\";\n\n  ASSERT_TRUE(parser.Parse(json));\n  size_t buf_size = parser.builder_.GetSize();\n\n  ASSERT_TRUE(\n      flatbuffers::Verify(*fbs_schema, *root_table, parser.builder_.GetBufferPointer(), buf_size));\n  auto* root_obj = flatbuffers::GetAnyRoot(parser.builder_.GetBufferPointer());\n\n  const flatbuffers::String* value = flatbuffers::GetFieldS(*root_obj, *field_foo);\n  EXPECT_EQ(\"value\", value->str());\n\n  // wrong type.\n  ASSERT_FALSE(parser.Parse(R\"({\"foo\": 1})\"));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/generate_bin_sizes.py",
    "content": "#!/usr/bin/env python3\n\nimport argparse\nimport random\nfrom array import array\n\n# We print in 64 bit words.\nALIGN = 1 << 10  # 1KB alignment\n\n\ndef print_small_bins():\n    prev_val = 0\n    for i in range(56, 1, -1):\n        len = (4096 - i*8)  # reduce by size of hashes\n        len = (len // 8)*8  # make it 8 bytes aligned\n        if len != prev_val:\n            print(i, len)\n            prev_val = len\n    print()\n\n\ndef main():\n    parser = argparse.ArgumentParser(description='')\n    parser.add_argument('-n', type=int, dest='num',\n                        help='number of quadruplets', default=9)\n    parser.add_argument('-small', action='store_true')\n\n    args = parser.parse_args()\n    if args.small:\n        print(\"small\")\n        print_small_bins()\n        return\n\n    size = 512*4\n    print ('{512, 512*2, 512*3, ', end=' ')\n    # print ('{', end=' ')\n    for i in range(args.num):\n        incr = size // 4\n        for j in range(4):\n            assert size % 512 == 0, size\n            print (f'{size}, ', end=' ')\n            size += incr\n        if i % 2 == 1:\n            print('')\n    print('};')\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "src/core/glob_matcher.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/glob_matcher.h\"\n\n#include <absl/strings/ascii.h>\n\n#include \"base/logging.h\"\n\nnamespace dfly {\nusing namespace std;\n\n/* Glob-style pattern matching taken from Valkey. */\nstatic int stringmatchlen_impl(const char* pattern, int patternLen, const char* string,\n                               int stringLen, int nocase, int* skipLongerMatches, int nesting) {\n  /* Protection against abusive patterns. */\n  if (nesting > 1000)\n    return 0;\n\n  while (patternLen && stringLen) {\n    switch (pattern[0]) {\n      case '*':\n        while (patternLen && pattern[1] == '*') {\n          pattern++;\n          patternLen--;\n        }\n        if (patternLen == 1)\n          return 1; /* match */\n        while (stringLen) {\n          if (stringmatchlen_impl(pattern + 1, patternLen - 1, string, stringLen, nocase,\n                                  skipLongerMatches, nesting + 1))\n            return 1; /* match */\n          if (*skipLongerMatches)\n            return 0; /* no match */\n          string++;\n          stringLen--;\n        }\n        /* There was no match for the rest of the pattern starting\n         * from anywhere in the rest of the string. If there were\n         * any '*' earlier in the pattern, we can terminate the\n         * search early without trying to match them to longer\n         * substrings. This is because a longer match for the\n         * earlier part of the pattern would require the rest of the\n         * pattern to match starting later in the string, and we\n         * have just determined that there is no match for the rest\n         * of the pattern starting from anywhere in the current\n         * string. */\n        *skipLongerMatches = 1;\n        return 0; /* no match */\n        break;\n      case '?':\n        string++;\n        stringLen--;\n        break;\n      case '[': {\n        int not_op, match;\n\n        pattern++;\n        patternLen--;\n        not_op = patternLen && pattern[0] == '^';\n        if (not_op) {\n          pattern++;\n          patternLen--;\n        }\n        match = 0;\n        while (1) {\n          if (patternLen >= 2 && pattern[0] == '\\\\') {\n            pattern++;\n            patternLen--;\n            if (pattern[0] == string[0])\n              match = 1;\n          } else if (patternLen == 0) {\n            pattern--;\n            patternLen++;\n            break;\n          } else if (pattern[0] == ']') {\n            break;\n          } else if (patternLen >= 3 && pattern[1] == '-') {\n            int start = pattern[0];\n            int end = pattern[2];\n            int c = string[0];\n            if (start > end) {\n              int t = start;\n              start = end;\n              end = t;\n            }\n            if (nocase) {\n              start = tolower(start);\n              end = tolower(end);\n              c = tolower(c);\n            }\n            pattern += 2;\n            patternLen -= 2;\n            if (c >= start && c <= end)\n              match = 1;\n          } else {\n            if (!nocase) {\n              if (pattern[0] == string[0])\n                match = 1;\n            } else {\n              if (tolower((int)pattern[0]) == tolower((int)string[0]))\n                match = 1;\n            }\n          }\n          pattern++;\n          patternLen--;\n        }\n        if (not_op)\n          match = !match;\n        if (!match)\n          return 0; /* no match */\n        string++;\n        stringLen--;\n        break;\n      }\n      case '\\\\':\n        if (patternLen >= 2) {\n          pattern++;\n          patternLen--;\n        }\n        /* fall through */\n      default:\n        if (!nocase) {\n          if (pattern[0] != string[0])\n            return 0; /* no match */\n        } else {\n          if (tolower((int)pattern[0]) != tolower((int)string[0]))\n            return 0; /* no match */\n        }\n        string++;\n        stringLen--;\n        break;\n    }\n    pattern++;\n    patternLen--;\n    if (stringLen == 0) {\n      while (patternLen && *pattern == '*') {\n        pattern++;\n        patternLen--;\n      }\n      break;\n    }\n  }\n  if (patternLen == 0 && stringLen == 0)\n    return 1;\n  return 0;\n}\n\nint stringmatchlen(const char* pattern, int patternLen, const char* string, int stringLen,\n                   int nocase) {\n  int skipLongerMatches = 0;\n  return stringmatchlen_impl(pattern, patternLen, string, stringLen, nocase, &skipLongerMatches, 0);\n}\n\nstring GlobMatcher::Glob2Regex(string_view glob) {\n  string regex;\n  regex.reserve(glob.size());\n  size_t in_group = 0;\n\n  for (size_t i = 0; i < glob.size(); i++) {\n    char c = glob[i];\n    if (in_group > 0) {\n      if (c == ']') {\n        if (i == in_group + 1) {\n          if (glob[in_group] == '^') {  // [^\n            regex.pop_back();\n            regex.back() = '.';\n            in_group = 0;\n            continue;\n          }\n        }\n        in_group = 0;\n      }\n      regex.push_back(c);\n      if (c == '\\\\') {\n        if (i + 1 < glob.size() && glob[i + 1] == ']') {\n          ++i;\n          regex.push_back(']');\n        } else {\n          regex.push_back('\\\\');  // escape the backslash\n        }\n      }\n      continue;\n    }\n\n    switch (c) {\n      case '*':\n        regex.append(\".*\");\n        break;\n      case '?':\n        regex.append(\".\");\n        break;\n      case '.':\n      case '(':\n      case ')':\n      case '{':\n      case '}':\n      case '^':\n      case '$':\n      case '+':\n      case '|':\n        regex.push_back('\\\\');\n        regex.push_back(c);\n        break;\n      case '\\\\':\n        if (i + 1 < glob.size()) {\n          ++i;\n        }\n        if (absl::ascii_ispunct(glob[i])) {\n          regex.push_back('\\\\');\n        }\n        regex.push_back(glob[i]);\n        break;\n      case '[':\n        regex.push_back('[');\n        if (i + 1 < glob.size()) {\n          in_group = i + 1;\n        }\n        break;\n      default:\n        regex.push_back(c);\n        break;\n    }\n  }\n  return regex;\n}\n\nGlobMatcher::GlobMatcher(string_view pattern, bool case_sensitive)\n    : glob_(pattern), case_sensitive_(case_sensitive) {\n#ifdef REFLEX_PERFORMANCE\n  if (!pattern.empty()) {\n    starts_with_star_ = pattern.front() == '*';\n    pattern.remove_prefix(starts_with_star_);\n\n    if (!pattern.empty()) {\n      ends_with_star_ =\n          (pattern.back() == '*') && (pattern.size() == 1 || pattern[pattern.size() - 2] != '\\\\');\n      pattern.remove_suffix(ends_with_star_);\n    }\n  }\n\n  string regex(\"(?s\");  // dotall mode\n  if (!case_sensitive) {\n    regex.push_back('i');\n  }\n  regex.push_back(')');\n  if (pattern.empty()) {\n    regex.append(Glob2Regex(\"*\"));\n  } else {\n    regex.append(Glob2Regex(pattern));\n  }\n  matcher_.pattern(regex);\n#elif defined(USE_PCRE2)\n  string regex(\"(?s\");  // dotall mode\n  if (!case_sensitive) {\n    regex.push_back('i');\n  }\n  regex.push_back(')');\n  regex.append(Glob2Regex(pattern));\n\n  int errnum;\n  PCRE2_SIZE erroffset;\n  re_ = pcre2_compile((PCRE2_SPTR)regex.c_str(), regex.size(), 0, &errnum, &erroffset, nullptr);\n  if (re_) {\n    CHECK_EQ(0, pcre2_jit_compile(re_, PCRE2_JIT_COMPLETE));\n    match_data_ = pcre2_match_data_create_from_pattern(re_, NULL);\n  }\n#endif\n}\n\nbool GlobMatcher::Matches(std::string_view str) const {\n#ifdef REFLEX_PERFORMANCE\n  if (str.size() < 16) {\n    return stringmatchlen(glob_.data(), glob_.size(), str.data(), str.size(), !case_sensitive_);\n  }\n  if (glob_.empty()) {\n    return true;\n  }\n\n  DCHECK(!matcher_.pattern().empty());\n\n  matcher_.input(reflex::Input(str.data(), str.size()));\n\n  bool use_find = starts_with_star_ || ends_with_star_;\n  if (!use_find) {\n    return matcher_.matches() > 0;\n  }\n\n  bool found = matcher_.find() > 0;\n  if (!found) {\n    return false;\n  }\n\n  if (!ends_with_star_ && matcher_.last() != str.size()) {\n    return false;\n  }\n  if (!starts_with_star_ && matcher_.first() != 0) {\n    return false;\n  }\n\n  return true;\n#elif defined(USE_PCRE2)\n  if (!re_ || str.size() < 16) {\n    return stringmatchlen(glob_.data(), glob_.size(), str.data(), str.size(), !case_sensitive_);\n  }\n\n  if (glob_.empty()) {\n    return true;\n  }\n\n  int rc = pcre2_jit_match(re_, (PCRE2_SPTR)str.data(), str.size(), 0, 0, match_data_, NULL);\n  return rc > 0;\n\n#else\n  return stringmatchlen(glob_.data(), glob_.size(), str.data(), str.size(), !case_sensitive_);\n#endif\n}\n\nGlobMatcher::~GlobMatcher() {\n#ifdef REFLEX_PERFORMANCE\n#elif defined(USE_PCRE2)\n  if (re_) {\n    pcre2_code_free(re_);\n    pcre2_match_data_free(match_data_);\n  }\n#endif\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/glob_matcher.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <reflex/matcher.h>\n\n#include <string>\n#include <string_view>\n\n// We opt for using Reflex library for glob matching.\n// While I find PCRE2 faster, it's not substantially faster to justify the shared lib dependency.\n\n// For some regex, Reflex (and pcre2) have extremely slow compile times(70+ms).\n// This latency is significant for the hot path and therefore both are disabled\n// and we fall back to the plain old stringmatchlen. For more info, refer to #5547 on gh.\n//#define REFLEX_PERFORMANCE\n\n#ifndef REFLEX_PERFORMANCE\n#ifdef USE_PCRE2\n#define PCRE2_CODE_UNIT_WIDTH 8\n#include <pcre2.h>\n#endif\n#endif\n\nnamespace dfly {\n\nclass GlobMatcher {\n  GlobMatcher(const GlobMatcher&) = delete;\n  GlobMatcher& operator=(const GlobMatcher&) = delete;\n\n public:\n  explicit GlobMatcher(std::string_view pattern, bool case_sensitive);\n  ~GlobMatcher();\n\n  bool Matches(std::string_view str) const;\n\n  // Exposed for testing purposes.\n  static std::string Glob2Regex(std::string_view glob);\n\n private:\n  // TODO: we fix the problem of stringmatchlen being much\n  // faster when the result is immediately known to be false, for example: \"a*\" vs \"bxxxxx\".\n  // The goal is to demonstrate on-par performance for the following case:\n  // > debug populate 5000000 keys 32 RAND\n  // > while true; do time valkey-cli scan 0 match 'foo*bar'; done\n  // Also demonstrate that the \"improved\" performance via SCAN command and not only via\n  // micro-benchmark.\n  // The performance of naive algorithm becomes worse in cases where string is long enough,\n  // and the pattern has a star at the start (or it matches at first).\n#ifdef REFLEX_PERFORMANCE\n  mutable reflex::Matcher matcher_;\n\n  bool starts_with_star_ = false;\n  bool ends_with_star_ = false;\n#elif defined(USE_PCRE2)\n  pcre2_code_8* re_ = nullptr;\n  pcre2_match_data_8* match_data_ = nullptr;\n#endif\n  std::string_view glob_;\n  bool case_sensitive_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/huff_coder.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/huff_coder.h\"\n\n#include \"base/logging.h\"\n\nextern \"C\" {\n#include \"huff/huf.h\"\n}\n\nusing namespace std;\n\nnamespace dfly {\n\nconstexpr size_t kWspSize = HUF_CTABLE_WORKSPACE_SIZE;\n\nbool HuffmanEncoder::Load(std::string_view binary_data, std::string* error_msg) {\n  CHECK(!huf_ctable_);\n\n  huf_ctable_.reset(new HUF_CElt[HUF_CTABLE_SIZE_ST(255)]);\n  table_max_symbol_ = 255;\n\n  unsigned has_zero_weights = 0;\n  size_t read_size = HUF_readCTable(huf_ctable_.get(), &table_max_symbol_, binary_data.data(),\n                                    binary_data.size(), &has_zero_weights);\n\n  if (HUF_isError(read_size)) {\n    huf_ctable_.reset();\n    *error_msg = HUF_getErrorName(read_size);\n    return false;\n  }\n  if (read_size != binary_data.size()) {\n    *error_msg = \"Corrupted data\";\n    huf_ctable_.reset();\n    return false;\n  }\n  HUF_CTableHeader header = HUF_readCTableHeader(huf_ctable_.get());\n  num_bits_ = header.tableLog;\n  table_max_symbol_ = header.maxSymbolValue;\n\n  return true;\n}\n\nbool HuffmanEncoder::Build(const unsigned hist[], unsigned max_symbol, std::string* error_msg) {\n  CHECK(!huf_ctable_);\n  huf_ctable_.reset(new HUF_CElt[HUF_CTABLE_SIZE_ST(max_symbol)]);\n\n  unique_ptr<uint32_t[]> wrkspace(new uint32_t[HUF_CTABLE_WORKSPACE_SIZE_U32]);\n\n  size_t num_bits =\n      HUF_buildCTable_wksp(huf_ctable_.get(), hist, max_symbol, 0, wrkspace.get(), kWspSize);\n  if (HUF_isError(num_bits)) {\n    *error_msg = HUF_getErrorName(num_bits);\n    huf_ctable_.reset();\n    return false;\n  }\n  num_bits_ = static_cast<uint8_t>(num_bits);\n  table_max_symbol_ = max_symbol;\n  return true;\n}\n\nvoid HuffmanEncoder::Reset() {\n  huf_ctable_.reset();\n  table_max_symbol_ = 0;\n}\n\nbool HuffmanEncoder::Encode(std::string_view data, uint8_t* dest, uint32_t* dest_size,\n                            std::string* error_msg) const {\n  DCHECK(huf_ctable_);\n\n  size_t res =\n      HUF_compress1X_usingCTable(dest, *dest_size, data.data(), data.size(), huf_ctable_.get(), 0);\n\n  if (HUF_isError(res)) {\n    *error_msg = HUF_getErrorName(res);\n    return false;\n  }\n  *dest_size = static_cast<uint32_t>(res);\n  return true;\n}\n\nunsigned HuffmanEncoder::GetNBits(uint8_t symbol) const {\n  DCHECK(huf_ctable_);\n  return HUF_getNbBitsFromCTable(huf_ctable_.get(), symbol);\n}\n\nsize_t HuffmanEncoder::EstimateCompressedSize(const unsigned hist[], unsigned max_symbol) const {\n  DCHECK(huf_ctable_);\n  size_t res = HUF_estimateCompressedSize(huf_ctable_.get(), hist, max_symbol);\n  return res;\n}\n\nstring HuffmanEncoder::Export() const {\n  DCHECK(huf_ctable_);\n\n  // Reverse engineered: (maxSymbolValue + 1) / 2 + 1.\n  constexpr unsigned kMaxTableSize = 130;\n  string res;\n  res.resize(kMaxTableSize);\n\n  unique_ptr<uint32_t[]> wrkspace(new uint32_t[HUF_CTABLE_WORKSPACE_SIZE_U32]);\n\n  // Seems we can reuse the same workspace, its capacity is enough.\n  size_t size = HUF_writeCTable_wksp(res.data(), res.size(), huf_ctable_.get(), table_max_symbol_,\n                                     num_bits_, wrkspace.get(), kWspSize);\n  CHECK(!HUF_isError(size));\n  res.resize(size);\n  return res;\n}\n\n// Copied from HUF_tightCompressBound.\nsize_t HuffmanEncoder::CompressedBound(size_t src_size) const {\n  return ((src_size * num_bits_) >> 3) + 8;\n}\n\nbool HuffmanDecoder::Load(std::string_view binary_data, std::string* error_msg) {\n  DCHECK(!huf_dtable_);\n  huf_dtable_.reset(new HUF_DTable[HUF_DTABLE_SIZE(HUF_TABLELOG_MAX)]);\n  huf_dtable_[0] = (HUF_TABLELOG_MAX - 1) * 0x01000001;  // some sort of magic number\n\n  constexpr size_t kWspSize = HUF_DECOMPRESS_WORKSPACE_SIZE;\n  unique_ptr<uint8_t[]> wrksp(new uint8_t[kWspSize]);\n\n  size_t res = HUF_readDTableX1_wksp(huf_dtable_.get(), binary_data.data(), binary_data.size(),\n                                     wrksp.get(), kWspSize, 0);\n  if (HUF_isError(res)) {\n    *error_msg = HUF_getErrorName(res);\n    huf_dtable_.reset();\n    return false;\n  }\n  if (res != binary_data.size()) {\n    *error_msg = \"Corrupted data\";\n    huf_dtable_.reset();\n    return false;\n  }\n  return true;\n}\n\nbool HuffmanDecoder::Decode(std::string_view src, size_t dest_size, char* dest) const {\n  DCHECK(huf_dtable_);\n  size_t res =\n      HUF_decompress1X_usingDTable(dest, dest_size, src.data(), src.size(), huf_dtable_.get(), 1);\n\n  if (HUF_isError(res)) {\n    LOG(DFATAL) << \"Failed to decompress: \" << HUF_getErrorName(res);\n    return false;\n  }\n  return true;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/huff_coder.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <memory>\n#include <string_view>\n\nnamespace dfly {\n\nclass HuffmanEncoder {\n public:\n  bool Build(const unsigned hist[], unsigned max_symbol, std::string* error_msg);\n\n  bool Encode(std::string_view data, uint8_t* dest, uint32_t* dest_size,\n              std::string* error_msg) const;\n\n  size_t EstimateCompressedSize(const unsigned hist[], unsigned max_symbol) const;\n\n  void Reset();\n\n  // Load using the serialized data produced by Export().\n  bool Load(std::string_view binary_data, std::string* error_msg);\n\n  // Exports a binary representation of the table, that can be loaded using Load().\n  std::string Export() const;\n\n  uint8_t num_bits() const {\n    return num_bits_;\n  }\n\n  bool valid() const {\n    return bool(huf_ctable_);\n  }\n\n  unsigned max_symbol() const {\n    return table_max_symbol_;\n  }\n\n  unsigned GetNBits(uint8_t symbol) const;\n\n  // Estimation of the size of the destination buffer needed to store the compressed data.\n  // destination of this size must be passed to Encode().\n  size_t CompressedBound(size_t src_size) const;\n\n private:\n  using HUF_CElt = size_t;\n  std::unique_ptr<HUF_CElt[]> huf_ctable_;\n  unsigned table_max_symbol_ = 0;\n  uint8_t num_bits_ = 0;\n};\n\nclass HuffmanDecoder {\n public:\n  bool Load(std::string_view binary_data, std::string* error_msg);\n  bool valid() const {\n    return bool(huf_dtable_);\n  }\n\n  // decoded_size should be the *precise* size of the decoded data, otherwise the function will\n  // fail. dest should point to a buffer of at least decoded_size bytes.\n  // Returns true if decompression was successful, false if the data is corrupted.\n  bool Decode(std::string_view src, size_t decoded_size, char* dest) const;\n\n private:\n  using HUF_DTable = uint32_t;\n  std::unique_ptr<HUF_DTable[]> huf_dtable_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/intent_lock.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include <assert.h>\n\n#include <ostream>\n\n#pragma once\n\nnamespace dfly {\n\n// SHARED - can be acquired multiple times as long as other intents are absent.\n// EXCLUSIVE - is acquired only if it's the only lock recorded.\n// Transactions at the head of tx-queue are considered to be the ones that acquired the lock\nclass IntentLock {\n public:\n  enum Mode { SHARED = 0, EXCLUSIVE = 1 };\n\n  // Returns true if lock was acquired. In any case, the intent is recorded.\n  bool Acquire(Mode m) {\n    ++cnt_[m];\n\n    if (cnt_[1 ^ int(m)])\n      return false;\n    return m == SHARED || cnt_[EXCLUSIVE] == 1;\n  }\n\n  // Returns true if lock can be acquired using `m` mode.\n  bool Check(Mode m) const {\n    unsigned s = cnt_[EXCLUSIVE];\n    if (s)\n      return false;\n\n    return (m == SHARED) ? true : cnt_[SHARED] == 0;\n  }\n\n  // Returns true if this lock would block transactions from running unless they are at the head\n  // of the transaction queue (first ones)\n  bool IsContended() const {\n    return (cnt_[EXCLUSIVE] > 1) || (cnt_[EXCLUSIVE] == 1 && cnt_[SHARED] > 0);\n  }\n\n  // A heuristic function to estimate the contention amount with a single score.\n  unsigned ContentionScore() const {\n    return cnt_[EXCLUSIVE] * 256 + cnt_[SHARED];\n  }\n\n  void Release(Mode m, unsigned val = 1) {\n    assert(cnt_[m] >= val);\n\n    cnt_[m] -= val;\n    // return cnt_[m] == 0 ? cnt_[1 ^ int(m)] : 0;\n  }\n\n  bool IsFree() const {\n    return (cnt_[0] | cnt_[1]) == 0;\n  }\n\n  static const char* ModeName(Mode m);\n\n  void VerifyDebug();\n\n  friend std::ostream& operator<<(std::ostream& o, const IntentLock& lock) {\n    return o << \"{SHARED: \" << lock.cnt_[0] << \", EXCLUSIVE: \" << lock.cnt_[1] << \"}\";\n  }\n\n private:\n  unsigned cnt_[2] = {0, 0};\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/interpreter.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/interpreter.h\"\n\n#include <absl/base/casts.h>\n#include <absl/container/fixed_array.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_split.h>\n#include <absl/time/clock.h>\n#include <mimalloc.h>\n#include <openssl/evp.h>\n#include <xxhash.h>\n\n#include <cstring>\n#include <optional>\n#include <regex>\n#include <set>\n#include <variant>\n\n#include \"base/flags.h\"\n#include \"core/interpreter_polyfill.h\"\n#include \"overloaded.h\"\n\nextern \"C\" {\n#include <lauxlib.h>\n#include <lua.h>\n#include <lualib.h>\n\n#include \"redis/sds.h\"\n#include \"redis/util.h\"\n\nLUALIB_API int(luaopen_cjson)(lua_State* L);\nLUALIB_API int(luaopen_struct)(lua_State* L);\nLUALIB_API int(luaopen_cmsgpack)(lua_State* L);\nLUALIB_API int(luaopen_bit)(lua_State* L);\n}\n\n#include <absl/strings/str_format.h>\n\n#include \"base/logging.h\"\n\nstruct LuaGcGen {\n  int minormul = 20;\n  int majormul = 100;\n};\nstruct LuaGcInc {\n  int pause = 200;\n  int stepmul = 100;\n  int stepsize = 13;\n};\n\nusing LuaGcFlag = std::variant<std::monostate, LuaGcGen, LuaGcInc>;\n\nABSL_FLAG(LuaGcFlag, luagc, {},\n          \"Specifies Lua garabage collector preferences. By default used default lua GC parameters.\"\n          \"Format should be 'inc/200/100/13' or 'gen/20/100' where 'inc' and 'gen' are types of \"\n          \"GC, numbers are parameters.\"\n          \"For more information check https://www.lua.org/manual/5.4/manual.html#2.5\");\n\nABSL_FLAG(uint64_t, lua_mem_gc_threshold, 10000000,\n          \"Specifies Lua interpreter's per thread memory limit in bytes after which the GC will be \"\n          \"called forcefully. 0 value remove forced GC calls\");\n\nABSL_FLAG(bool, lua_enable_redis_log, false, \"Enable redis.log to write logs from lua script.\");\n\nstatic bool AbslParseFlag(std::string_view in, LuaGcFlag* flag, std::string* err) {\n  if (in.empty()) {\n    *flag = LuaGcFlag{};\n    return true;\n  }\n  std::vector<std::string_view> parts = absl::StrSplit(in, '/');\n  if (parts.size() == 3) {\n    if (parts[0] == \"gen\") {\n      LuaGcGen args;\n      if (absl::SimpleAtoi(parts[1], &args.minormul) &&\n          absl::SimpleAtoi(parts[2], &args.majormul)) {\n        *flag = args;\n        return true;\n      }\n    }\n  } else if (parts.size() == 4) {\n    if (parts[0] == \"inc\") {\n      LuaGcInc args;\n      if (absl::SimpleAtoi(parts[1], &args.pause) && absl::SimpleAtoi(parts[2], &args.stepmul) &&\n          absl::SimpleAtoi(parts[3], &args.stepsize)) {\n        *flag = LuaGcFlag{args};\n        return true;\n      }\n    }\n  }\n  *err = absl::StrCat(\"Invalid luagc flag parameters\");\n  return false;\n}\n\nstatic std::string AbslUnparseFlag(const LuaGcFlag& flag) {\n  return std::visit(dfly::Overloaded{\n                        [](std::monostate) { return std::string(); },\n                        [](const LuaGcGen& gen) {\n                          return absl::StrCat(\"gen\", \"/\", gen.minormul, \"/\", gen.majormul);\n                        },\n                        [](const LuaGcInc& inc) {\n                          return absl::StrCat(\"inc\", \"/\", inc.pause, \"/\", inc.stepmul, \"/\",\n                                              inc.stepsize);\n                        },\n                    },\n                    flag);\n}\n\nnamespace dfly {\nusing namespace std;\n\nnamespace {\n\n// EVP_Q_digest is not present in the older versions of OpenSSL.\nint EVPDigest(const void* data, size_t datalen, unsigned char* md, size_t* mdlen) {\n  unsigned int temp = 0;\n  int ret = EVP_Digest(data, datalen, md, &temp, EVP_sha1(), NULL);\n\n  if (mdlen != NULL)\n    *mdlen = temp;\n  return ret;\n}\n\n/* This function is used in order to push an error on the Lua stack in the\n * format used by redis.pcall to return errors, which is a lua table\n * with a single \"err\" field set to the error string. Note that this\n * table is never a valid reply by proper commands, since the returned\n * tables are otherwise always indexed by integers, never by strings. */\nvoid PushError(lua_State* lua, string_view error, bool trace = true) {\n  lua_Debug dbg;\n\n  lua_newtable(lua);\n  lua_pushstring(lua, \"err\");\n\n  /* Attempt to figure out where this function was called, if possible */\n  if (trace && lua_getstack(lua, 1, &dbg) && lua_getinfo(lua, \"nSl\", &dbg)) {\n    string msg = absl::StrCat(dbg.source, \": \", dbg.currentline, \": \", error);\n    lua_pushlstring(lua, msg.c_str(), msg.size());\n  } else {\n    lua_pushlstring(lua, error.data(), error.size());\n  }\n  lua_settable(lua, -3);\n}\n\n// Custom object explorer that collects all values into string array\nstruct StringCollectorTranslator : public ObjectExplorer {\n  void OnString(std::string_view str) final {\n    values.emplace_back(str);\n  }\n  void OnArrayStart(unsigned len) final {\n    // if values is n't empty it means we can not predict the needed size so reserve can\n    // significantly decrease performance\n    if (values.empty()) {\n      values.reserve(len);\n    }\n  }\n  void OnArrayEnd() final {\n  }\n  void OnBool(bool b) final {\n    OnString(absl::AlphaNum(b).Piece());\n  }\n  void OnDouble(double d) final {\n    OnString(absl::AlphaNum(d).Piece());\n  }\n  void OnInt(int64_t val) final {\n    OnString(absl::AlphaNum(val).Piece());\n  }\n  void OnNil() final {\n    OnString(\"\");\n  }\n  void OnStatus(std::string_view str) final {\n    OnString(str);\n  }\n  void OnError(std::string_view str) final {\n    LOG(ERROR) << str;\n  }\n\n  vector<string> values;\n};\n\nclass RedisTranslator : public ObjectExplorer {\n public:\n  RedisTranslator(lua_State* lua) : lua_(lua) {\n  }\n  void OnBool(bool b) final;\n  void OnString(std::string_view str) final;\n  void OnDouble(double d) final;\n  void OnInt(int64_t val) final;\n  void OnArrayStart(unsigned len) final;\n  void OnArrayEnd() final;\n  void OnNil() final;\n  void OnStatus(std::string_view str) final;\n  void OnError(std::string_view str) final;\n\n  bool HasError();\n\n private:\n  void ArrayPre() {\n  }\n\n  void ArrayPost() {\n    if (!array_index_.empty()) {\n      lua_rawseti(lua_, -2, array_index_.back()++); /* set table at key `i' */\n    }\n  }\n\n  lua_State* lua_;\n  bool has_error_{false};\n  vector<unsigned> array_index_{};\n};\n\nvoid RedisTranslator::OnBool(bool b) {\n  CHECK(!b) << \"Only false (nil) supported\";\n  ArrayPre();\n  lua_pushboolean(lua_, 0);\n  ArrayPost();\n}\n\nvoid RedisTranslator::OnString(std::string_view str) {\n  ArrayPre();\n  lua_pushlstring(lua_, str.data(), str.size());\n  ArrayPost();\n}\n\nvoid RedisTranslator::OnDouble(double d) {\n  const double kConvertEps = std::numeric_limits<double>::epsilon();\n\n  double fractpart, intpart;\n  fractpart = modf(d, &intpart);\n\n  ArrayPre();\n\n  // Convert to integer when possible to allow converting to string without trailing zeros.\n  if (abs(fractpart) < kConvertEps && intpart < double(std::numeric_limits<lua_Integer>::max()) &&\n      intpart > std::numeric_limits<lua_Integer>::min())\n    lua_pushinteger(lua_, static_cast<lua_Integer>(d));\n  else\n    lua_pushnumber(lua_, d);\n  ArrayPost();\n}\n\nvoid RedisTranslator::OnInt(int64_t val) {\n  ArrayPre();\n  lua_pushinteger(lua_, val);\n  ArrayPost();\n}\n\nvoid RedisTranslator::OnNil() {\n  ArrayPre();\n  lua_pushboolean(lua_, 0);\n  ArrayPost();\n}\n\nvoid RedisTranslator::OnStatus(std::string_view str) {\n  CHECK(array_index_.empty()) << \"unexpected status\";\n  lua_createtable(lua_, 0, 1);\n  lua_pushstring(lua_, \"ok\");\n  lua_pushlstring(lua_, str.data(), str.size());\n  lua_settable(lua_, -3);\n}\n\nvoid RedisTranslator::OnError(std::string_view str) {\n  has_error_ = true;\n  PushError(lua_, str, false);\n}\n\nvoid RedisTranslator::OnArrayStart(unsigned len) {\n  ArrayPre();\n  lua_createtable(lua_, len, 0);\n  array_index_.push_back(1);\n}\n\nvoid RedisTranslator::OnArrayEnd() {\n  CHECK(!array_index_.empty());\n  DCHECK(lua_istable(lua_, -1));\n\n  array_index_.pop_back();\n  ArrayPost();\n}\n\nbool RedisTranslator::HasError() {\n  return has_error_;\n}\n\nvoid RunSafe(lua_State* lua, string_view buf, const char* name) {\n  CHECK_EQ(0, luaL_loadbuffer(lua, buf.data(), buf.size(), name));\n  int err = lua_pcall(lua, 0, 0, 0);\n  if (err) {\n    const char* errstr = lua_tostring(lua, -1);\n    LOG(FATAL) << \"Error running \" << name << \" \" << errstr;\n  }\n}\n\nvoid Require(lua_State* lua, const char* name, lua_CFunction openf) {\n  luaL_requiref(lua, name, openf, 1);\n  lua_pop(lua, 1); /* remove lib */\n}\n\nstring_view TopSv(lua_State* lua) {\n  return string_view{lua_tostring(lua, -1), lua_rawlen(lua, -1)};\n}\n\noptional<int> FetchKey(lua_State* lua, const char* key) {\n  lua_pushcfunction(lua, [](lua_State* lua) -> int {\n    lua_gettable(lua, -3);\n    return 1;\n  });\n  lua_pushstring(lua, key);\n  int status = lua_pcall(lua, 1, 1, 0);\n  if (status != LUA_OK) {\n    lua_pop(lua, 1);\n    return nullopt;\n  }\n  int type = lua_type(lua, -1);\n  if (type == LUA_TNIL) {\n    lua_pop(lua, 1);\n    return nullopt;\n  }\n  return type;\n}\n\nvoid SetGlobalArrayInternal(lua_State* lua, const char* name, Interpreter::SliceSpan args) {\n  lua_createtable(lua, args.size(), 0);\n  for (size_t j = 0; j < args.size(); j++) {\n    lua_pushlstring(lua, args[j].data(), args[j].size());\n    lua_rawseti(lua, -2, j + 1);\n  }\n  lua_setglobal(lua, name);\n}\n\n/* In case the error set into the Lua stack by PushError() was generated\n * by the non-error-trapping version of redis.pcall(), which is redis.call(),\n * this function will raise the Lua error so that the execution of the\n * script will be halted.\n * This function never returns, it unwinds the Lua call stack until an error handler is found or the\n * script exits */\nint RaiseErrorAndAbort(lua_State* lua) {\n  lua_pushstring(lua, \"err\");\n  lua_gettable(lua, -2);\n  return lua_error(lua);\n}\n\nvoid LoadLibrary(lua_State* lua, const char* libname, lua_CFunction luafunc) {\n  lua_pushcfunction(lua, luafunc);\n  lua_pushstring(lua, libname);\n  lua_call(lua, 1, 0);\n}\n\nvoid InitLua(lua_State* lua) {\n  Require(lua, \"\", luaopen_base);\n  Require(lua, LUA_TABLIBNAME, luaopen_table);\n  Require(lua, LUA_STRLIBNAME, luaopen_string);\n  Require(lua, LUA_MATHLIBNAME, luaopen_math);\n  Require(lua, LUA_DBLIBNAME, luaopen_debug);\n\n  LoadLibrary(lua, \"cjson\", luaopen_cjson);\n  LoadLibrary(lua, \"struct\", luaopen_struct);\n  LoadLibrary(lua, \"cmsgpack\", luaopen_cmsgpack);\n  LoadLibrary(lua, \"bit\", luaopen_bit);\n\n  /* Add a helper function we use for pcall error reporting.\n   * Note that when the error is in the C function we want to report the\n   * information about the caller, that's what makes sense from the point\n   * of view of the user debugging a script. */\n  {\n    const char errh_func[] =\n        \"local dbg = debug\\n\"\n        \"function __redis__err__handler(err)\\n\"\n        \"  local i = dbg.getinfo(2,'nSl')\\n\"\n        \"  if i and i.what == 'C' then\\n\"\n        \"    i = dbg.getinfo(3,'nSl')\\n\"\n        \"  end\\n\"\n        \"  if i then\\n\"\n        \"    return i.source .. ':' .. i.currentline .. ': ' .. err\\n\"\n        \"  else\\n\"\n        \"    return err\\n\"\n        \"  end\\n\"\n        \"end\\n\";\n    RunSafe(lua, errh_func, \"@err_handler_def\");\n  }\n\n  {\n    const char code[] = R\"(\nlocal dbg=debug\nlocal mt = {}\n\nsetmetatable(_G, mt)\nmt.__newindex = function (t, n, v)\n  if dbg.getinfo(2) then\n    local w = dbg.getinfo(2, \"S\").what\n    if w ~= \"main\" and w ~= \"C\" then\n      error(\"Script attempted to create global variable '\"..tostring(n)..\"'\", 2)\n    end\n  end\n  rawset(t, n, v)\nend\nmt.__index = function (t, n)\n  if dbg.getinfo(2) and dbg.getinfo(2, \"S\").what ~= \"C\" then\n    error(\"Script attempted to access nonexistent global variable '\"..tostring(n)..\"'\", 2)\n  end\n  return rawget(t, n)\nend\ndebug = nil\n)\";\n    RunSafe(lua, code, \"@enable_strict_lua\");\n  }\n\n  lua_pushnil(lua);\n  lua_setglobal(lua, \"loadfile\");\n  lua_pushnil(lua);\n  lua_setglobal(lua, \"dofile\");\n\n  // Register deprecated or removed functions to maintain compatibility with 5.1\n  register_polyfills(lua);\n}\n\n// dest must have at least 41 chars.\nvoid ToHex(const uint8_t* src, char* dest) {\n  const char cset[] = \"0123456789abcdef\";\n  for (size_t j = 0; j < 20; j++) {\n    dest[j * 2] = cset[((src[j] & 0xF0) >> 4)];\n    dest[j * 2 + 1] = cset[(src[j] & 0xF)];\n  }\n  dest[40] = '\\0';\n}\n\nint DragonflyHashCommand(lua_State* lua) {\n  XXH64_hash_t hash = absl::bit_cast<XXH64_hash_t>(lua_tointeger(lua, 1));\n  bool requires_sort = lua_toboolean(lua, 2);\n\n  // Pop first two arguments to call RedisGenericCommand from this function with tail\n  lua_remove(lua, 1);\n  lua_remove(lua, 1);\n\n  // Compute key hash; for MGET hash all key arguments, otherwise just the first\n  {\n    size_t cmd_len;\n    const char* cmd = lua_tolstring(lua, 1, &cmd_len);\n    int top = lua_gettop(lua);\n    int key_end = absl::EqualsIgnoreCase(absl::string_view(cmd, cmd_len), \"mget\") ? top : 2;\n    for (int i = 2; i <= key_end; ++i) {\n      size_t len;\n      const char* key = lua_tolstring(lua, i, &len);\n      hash = XXH64(key, len, hash);\n    }\n  }\n\n  // Collect output into custom string collector\n  StringCollectorTranslator translator;\n  void** ptr = static_cast<void**>(lua_getextraspace(lua));\n  reinterpret_cast<Interpreter*>(*ptr)->RedisGenericCommand(false, false, &translator);\n\n  if (requires_sort)\n    sort(translator.values.begin(), translator.values.end());\n\n  // Compute new hash and return it\n  for (string_view str : translator.values)\n    hash = XXH64(str.data(), str.size(), hash);\n\n  lua_pushinteger(lua, absl::bit_cast<lua_Integer>(hash));\n  return 1;\n}\n\nint DragonflyRandstrCommand(lua_State* state) {\n  int argc = lua_gettop(state);\n  lua_Integer dsize = lua_tonumber(state, 1);\n  lua_remove(state, 1);\n\n  std::string buf(dsize, ' ');\n\n  auto push_str = [dsize, state, &buf]() {\n    static const char alphanum[] =\n        \"0123456789\"\n        \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"\n        \"abcdefghijklmnopqrstuvwxyz\";\n\n    static const char pattern[] = \"DRAGONFLY\";\n    constexpr int pattern_len = sizeof(pattern) - 1;\n    constexpr int pattern_interval = 53;\n    for (int i = 0; i < dsize; ++i) {\n      if (i % pattern_interval == 0 && i + pattern_len <= dsize) {\n        // Insert the repeating pattern for better compression of random string.\n        buf.replace(i, pattern_len, pattern, pattern_len);\n        i += pattern_len - 1;  // Adjust index to skip the pattern\n      } else {\n        // Fill the rest with semi-random characters for variation\n        buf[i] = alphanum[rand() % (sizeof(alphanum) - 1)];\n      }\n    }\n    lua_pushlstring(state, buf.c_str(), buf.length());\n  };\n\n  if (argc == 1) {\n    push_str();\n  } else {\n    lua_Integer num = lua_tonumber(state, 1);\n    lua_createtable(state, num, 0);\n    for (int i = 1; i <= num; i++) {\n      push_str();\n      lua_rawseti(state, -2, i);\n    }\n  }\n\n  return 1;\n}\n\nint RedisSha1Command(lua_State* lua) {\n  int argc = lua_gettop(lua);\n  if (argc != 1) {\n    lua_pushstring(lua, \"wrong number of arguments\");\n    return lua_error(lua);\n  }\n\n  size_t len;\n  const char* s = lua_tolstring(lua, 1, &len);\n\n  uint8_t digest[EVP_MAX_MD_SIZE];\n  EVPDigest(s, len, digest, NULL);\n\n  char hex[41];\n  ToHex(digest, hex);\n\n  lua_pushstring(lua, hex);\n  return 1;\n}\n\n/* Returns a table with a single field 'field' set to the string value\n * passed as argument. This helper function is handy when returning\n * a Redis Protocol error or status reply from Lua:\n *\n * return redis.error_reply(\"ERR Some Error\")\n * return redis.status_reply(\"ERR Some Error\")\n */\nint SingleFieldTable(lua_State* lua, const char* field) {\n  if (lua_gettop(lua) != 1 || lua_type(lua, -1) != LUA_TSTRING) {\n    PushError(lua, \"wrong number or type of arguments\");\n    return 1;\n  }\n\n  lua_newtable(lua);\n  lua_pushstring(lua, field);\n  lua_pushvalue(lua, -3);\n  lua_settable(lua, -3);\n  return 1;\n}\n\nint RedisErrorReplyCommand(lua_State* lua) {\n  return SingleFieldTable(lua, \"err\");\n}\n\nint RedisStatusReplyCommand(lua_State* lua) {\n  return SingleFieldTable(lua, \"ok\");\n}\n\n// no-op\nint RedisReplicateCommands(lua_State* lua) {\n  lua_pushinteger(lua, 1);\n  // number of results (the number of elements pushed to the lua stack\n  return 1;\n}\n\nint RedisLogCommand(lua_State* lua) {\n  int j, argc = lua_gettop(lua);\n  sds log;\n\n  if (argc < 2) {\n    PushError(lua, \"redis.log() requires two arguments or more.\");\n    return RaiseErrorAndAbort(lua);\n  } else if (!lua_isnumber(lua, -argc)) {\n    PushError(lua, \"First argument must be a number (log level).\");\n    return RaiseErrorAndAbort(lua);\n  }\n\n  if (absl::GetFlag(FLAGS_lua_enable_redis_log)) {\n    int level = lua_tonumber(lua, -argc);\n    if (level < LL_DEBUG || level > LL_WARNING) {\n      PushError(lua, \"Invalid log level.\");\n      return RaiseErrorAndAbort(lua);\n    }\n\n    /* Glue together all the arguments */\n    log = sdsempty();\n    for (j = 1; j < argc; j++) {\n      size_t len;\n      char* s;\n\n      s = (char*)lua_tolstring(lua, (-argc) + j, &len);\n      if (s) {\n        if (j != 1)\n          log = sdscatlen(log, \" \", 1);\n        log = sdscatlen(log, s, len);\n      }\n    }\n\n    switch (level) {\n      case LL_DEBUG:\n      case LL_VERBOSE:\n        VLOG(1) << log;\n        break;\n      case LL_NOTICE:\n        LOG(INFO) << log;\n        break;\n      case LL_WARNING:\n        LOG(WARNING) << log;\n      default:\n        break;\n    }\n    sdsfree(log);\n  }\n\n  return 0;\n}\n\n// See https://www.lua.org/manual/5.3/manual.html#lua_Alloc\nvoid* mimalloc_glue(void* ud, void* ptr, size_t osize, size_t nsize) {\n  int64_t& used_bytes = *static_cast<int64_t*>(ud);\n\n  if (nsize == 0) {\n    used_bytes -= mi_usable_size(ptr);\n    mi_free_size(ptr, osize);\n    return nullptr;\n  } else if (ptr == nullptr) {\n    ptr = mi_malloc(nsize);\n    used_bytes += mi_usable_size(ptr);\n    return ptr;\n  } else {\n    const auto old_size = mi_usable_size(ptr);\n    ptr = mi_realloc(ptr, nsize);\n    if (ptr) {\n      used_bytes -= old_size;\n      used_bytes += mi_usable_size(ptr);\n    }\n\n    return ptr;\n  }\n}\n\n}  // namespace\n\nInterpreter::Interpreter() {\n  InterpreterManager::tl_stats().interpreter_cnt++;\n\n  // interpreter can be runnned in different threads so we need to calculate\n  // used memory via &used_bytes_ additional parameter\n  lua_ = lua_newstate(mimalloc_glue, &used_bytes_);\n  InitLua(lua_);\n  void** ptr = static_cast<void**>(lua_getextraspace(lua_));\n  *ptr = this;\n  // SaveOnRegistry(lua_, kInstanceKey, this);\n\n  /* Register the dragonfly commands table and fields */\n  lua_newtable(lua_);\n\n  /* dragonfly.ihash - compute quick integer hash of command result */\n  lua_pushstring(lua_, \"ihash\");\n  lua_pushcfunction(lua_, DragonflyHashCommand);\n  lua_settable(lua_, -3);\n\n  /* dragonfly.randstr - generate random string or table of random strings */\n  lua_pushstring(lua_, \"randstr\");\n  lua_pushcfunction(lua_, DragonflyRandstrCommand);\n  lua_settable(lua_, -3);\n\n  /* Finally set the table as 'dragonfly' global var. */\n  lua_setglobal(lua_, \"dragonfly\");\n  CHECK(lua_checkstack(lua_, 64));\n\n  /* Register the redis commands table and fields */\n  lua_newtable(lua_);\n\n  /* redis.call */\n  lua_pushstring(lua_, \"call\");\n  lua_pushcfunction(lua_, RedisCallCommand);\n  lua_settable(lua_, -3);\n\n  /* redis.pcall */\n  lua_pushstring(lua_, \"pcall\");\n  lua_pushcfunction(lua_, RedisPCallCommand);\n  lua_settable(lua_, -3);\n\n  /* redis.acall */\n  lua_pushstring(lua_, \"acall\");\n  lua_pushcfunction(lua_, RedisACallCommand);\n  lua_settable(lua_, -3);\n\n  /* redis.apcall */\n  lua_pushstring(lua_, \"apcall\");\n  lua_pushcfunction(lua_, RedisAPCallCommand);\n  lua_settable(lua_, -3);\n\n  lua_pushstring(lua_, \"sha1hex\");\n  lua_pushcfunction(lua_, RedisSha1Command);\n  lua_settable(lua_, -3);\n\n  /* redis.error_reply and redis.status_reply */\n  lua_pushstring(lua_, \"error_reply\");\n  lua_pushcfunction(lua_, RedisErrorReplyCommand);\n  lua_settable(lua_, -3);\n  lua_pushstring(lua_, \"status_reply\");\n  lua_pushcfunction(lua_, RedisStatusReplyCommand);\n  lua_settable(lua_, -3);\n\n  /* no-op functions */\n\n  /* redis.replicate_commands*/\n  lua_pushstring(lua_, \"replicate_commands\");\n  lua_pushcfunction(lua_, RedisReplicateCommands);\n  lua_settable(lua_, -3);\n\n  /* redis.log*/\n  lua_pushstring(lua_, \"log\");\n  lua_pushcfunction(lua_, RedisLogCommand);\n  lua_settable(lua_, -3);\n\n  lua_pushinteger(lua_, LL_DEBUG);\n  lua_setfield(lua_, -2, \"LOG_DEBUG\");\n\n  lua_pushinteger(lua_, LL_VERBOSE);\n  lua_setfield(lua_, -2, \"LOG_VERBOSE\");\n\n  lua_pushinteger(lua_, LL_NOTICE);\n  lua_setfield(lua_, -2, \"LOG_NOTICE\");\n\n  lua_pushinteger(lua_, LL_WARNING);\n  lua_setfield(lua_, -2, \"LOG_WARNING\");\n\n  /* Finally set the table as 'redis' global var. */\n  lua_setglobal(lua_, \"redis\");\n  CHECK(lua_checkstack(lua_, 64));\n\n  UpdateGCParameters();\n}\n\nInterpreter::~Interpreter() {\n  InterpreterManager::tl_stats().interpreter_cnt--;\n\n  lua_close(lua_);\n}\n\nvoid Interpreter::FuncSha1(string_view body, char* fp) {\n  uint8_t digest[EVP_MAX_MD_SIZE];\n  EVPDigest(body.data(), body.size(), digest, NULL);\n\n  ToHex(digest, fp);\n}\n\nauto Interpreter::AddFunction(string_view sha, string_view body, string* result) -> AddResult {\n  char funcname[43];\n  funcname[0] = 'f';\n  funcname[1] = '_';\n  DCHECK(sha.size() == 40);\n  memcpy(funcname + 2, sha.data(), sha.size());\n  funcname[42] = '\\0';\n\n  int type = lua_getglobal(lua_, funcname);\n  lua_pop(lua_, 1);\n\n  if (type == LUA_TNIL && !AddInternal(funcname, body, result))\n    return COMPILE_ERR;\n\n  return type == LUA_TNIL ? ADD_OK : ALREADY_EXISTS;\n}\n\nbool Interpreter::Exists(string_view sha) const {\n  DCHECK(lua_);\n\n  if (sha.size() != 40)\n    return false;\n\n  char fname[43];\n  fname[0] = 'f';\n  fname[1] = '_';\n  fname[42] = '\\0';\n  memcpy(fname + 2, sha.data(), 40);\n\n  int type = lua_getglobal(lua_, fname);\n  lua_pop(lua_, 1);\n\n  return type == LUA_TFUNCTION;\n}\n\nauto Interpreter::RunFunction(string_view sha, std::string* error) -> RunResult {\n  DVLOG(2) << \"RunFunction \" << sha << \" \" << lua_gettop(lua_);\n\n  DCHECK_EQ(40u, sha.size());\n\n  lua_getglobal(lua_, \"__redis__err__handler\");\n  char fname[43];\n  fname[0] = 'f';\n  fname[1] = '_';\n  memcpy(fname + 2, sha.data(), 40);\n  fname[42] = '\\0';\n\n  int type = lua_getglobal(lua_, fname);\n  if (type != LUA_TFUNCTION) {\n    lua_pop(lua_, 2);\n\n    return NOT_EXISTS;\n  }\n\n  // At this point lua stack has 2 globals.\n\n  /* We have zero arguments and expect\n   * a single return value. */\n  int err = lua_pcall(lua_, 0, 1, -2);\n\n  if (err) {\n    *error = lua_tostring(lua_, -1);\n  }\n\n  return err == 0 ? RUN_OK : RUN_ERR;\n}\n\nvoid Interpreter::SetGlobalArray(const char* name, SliceSpan args) {\n  SetGlobalArrayInternal(lua_, name, args);\n}\n\noptional<string> Interpreter::DetectPossibleAsyncCalls(string_view body_sv) {\n  // We want to detect `redis.call` expressions with unused return values, i.e. they are a\n  // standalone statement, not part of a expression, condition, function call or assignment.\n  //\n  // We search for all `redis.(p)call` statements, that are preceeded on the same line by\n  // - `do` or `then` -> first statement in a new block, certainly unused value\n  // - no tokens      -> we need to check the previous line, if its part of a multi-line expression.\n  //\n  // If we need to check the previous line, we search for the last word (before comments, if it has\n  // one).\n  static const regex kRegex{\"(?:(\\\\S+)(\\\\s*--.*?)*\\\\s*\\n|(then)|(do)|(^))\\\\s*redis\\\\.(p*call)\"};\n\n  // Taken from https://www.lua.org/manual/5.4/manual.html - 3.1 - Lexical conventions\n\n  // If a line ends with it, then most likely the next line belongs to it as well\n  static const set<string_view> kContOperators = {\n      \"+\",  \"-\",  \"*\",  \"/\", \"%\", \"^\", \"#\", \"&\", \"~\", \"|\",  \"<<\", \">>\", \"//\", \"==\",\n      \"~=\", \"<=\", \">=\", \"<\", \">\", \"=\", \"(\", \"{\", \"[\", \"::\", \":\",  \",\",  \".\",  \"..\"};\n\n  // If a line ends with it, then most likely the next line belongs to it as well\n  static const set<string_view> kContTokens = {\"and\",    \"else\",   \"elseif\", \"for\",  \"goto\",\n                                               \"if\",     \"in\",     \"local\",  \"not\",  \"or\",\n                                               \"repeat\", \"return\", \"until\",  \"while\"};\n\n  auto last_n = [](const string& s, size_t n) {\n    return s.size() < n ? s : s.substr(s.size() - n, n);\n  };\n\n  smatch sm;\n  string body{body_sv};\n  vector<size_t> targets;\n\n  // We don't handle comment blocks yet.\n  if (body.find(\"--[[\") != string::npos)\n    return {};\n\n  sregex_iterator it{body.begin(), body.end(), kRegex};\n  sregex_iterator end{};\n\n  for (; it != end; it++) {\n    auto last_word = it->str(1);\n\n    if (kContOperators.count(last_n(last_word, 2)) > 0 ||\n        kContOperators.count(last_n(last_word, 1)) > 0)\n      continue;\n\n    if (kContTokens.count(last_word) > 0)\n      continue;\n\n    targets.push_back(it->position(it->size() - 1));\n  }\n\n  if (targets.empty())\n    return nullopt;\n\n  // Insert 'a' before 'call' and 'pcall'. Reverse order to preserve positions\n  reverse(targets.begin(), targets.end());\n  body.reserve(body.size() + targets.size());\n  for (auto pos : targets)\n    body.insert(pos, \"a\");\n\n  VLOG(1) << \"Detected \" << targets.size() << \" aync calls in script\";\n\n  return body;\n}\n\nbool Interpreter::IsResultSafe() const {\n  int top = lua_gettop(lua_);\n  if (top >= 128)\n    return false;\n\n  int t = lua_type(lua_, -1);\n  if (t != LUA_TTABLE)\n    return true;\n\n  bool res = IsTableSafe();\n\n  // Stack can contain intermediate unwindings that were not clean up.\n  DCHECK_GE(lua_gettop(lua_), top);\n  lua_settop(lua_, top);  // restore to the original setting.\n\n  return res;\n}\n\nbool Interpreter::AddInternal(const char* f_id, string_view body, string* error) {\n  string script = absl::StrCat(\"function \", f_id, \"() \\n\");\n  absl::StrAppend(&script, body, \"\\nend\");\n\n  int res = luaL_loadbuffer(lua_, script.data(), script.size(), \"@user_script\");\n  if (res == 0) {\n    res = lua_pcall(lua_, 0, 0, 0);  // run func definition code\n  }\n\n  if (res) {\n    error->assign(lua_tostring(lua_, -1));\n    lua_pop(lua_, 1);  // Remove the error.\n\n    return false;\n  }\n\n  return true;\n}\n\n// Stack is cleaned for us, we can leave it dirty\nbool Interpreter::IsTableSafe() const {\n  auto fres = FetchKey(lua_, \"err\");\n  if (fres && *fres == LUA_TSTRING) {\n    return true;\n  }\n\n  fres = FetchKey(lua_, \"ok\");\n  if (fres && *fres == LUA_TSTRING) {\n    return true;\n  }\n\n  // Copy root table because we remove it upon finishing traversal\n  lua_pushnil(lua_);\n  lua_copy(lua_, -2, -1);\n\n  int depth = 1;\n  lua_pushnil(lua_);\n\n  // DFS based on lua stack: [parent-table] [parent-key] [parent-value = table] [key]\n  while (depth > 0) {\n    if (lua_checkstack(lua_, 3) == 0 || depth > 128)\n      return false;\n\n    bool descending = false;\n    for (; lua_next(lua_, -2) != 0; lua_pop(lua_, 1)) {\n      if (lua_type(lua_, -1) != LUA_TTABLE)\n        continue;\n\n      // If we descend, keep value as new table and push nil for start key\n      depth++;\n      lua_pushnil(lua_);\n      descending = true;\n      break;\n    }\n\n    if (!descending) {\n      lua_pop(lua_, 1);\n      depth--;\n    }\n  }\n\n  return true;\n}\n\nvoid Interpreter::SerializeResult(ObjectExplorer* serializer) {\n  int t = lua_type(lua_, -1);\n\n  switch (t) {\n    case LUA_TSTRING:\n      serializer->OnString(TopSv(lua_));\n      break;\n    case LUA_TBOOLEAN:\n      serializer->OnBool(lua_toboolean(lua_, -1));\n      break;\n    case LUA_TNUMBER:\n      if (lua_isinteger(lua_, -1)) {\n        serializer->OnInt(lua_tointeger(lua_, -1));\n      } else {\n        serializer->OnDouble(lua_tonumber(lua_, -1));\n      }\n      break;\n    case LUA_TTABLE: {\n      auto fres = FetchKey(lua_, \"err\");\n      if (fres && *fres == LUA_TSTRING) {\n        serializer->OnError(TopSv(lua_));\n        lua_pop(lua_, 1);\n        break;\n      }\n\n      fres = FetchKey(lua_, \"ok\");\n      if (fres && *fres == LUA_TSTRING) {\n        serializer->OnStatus(TopSv(lua_));\n        lua_pop(lua_, 1);\n        break;\n      }\n\n      fres = FetchKey(lua_, \"map\");\n      if (fres && *fres == LUA_TTABLE) {\n        // Calculate length of map part, there is sadly no other way\n        unsigned len = 0;\n        for (lua_pushnil(lua_); lua_next(lua_, -2) != 0; lua_pop(lua_, 1))\n          len++;\n\n        serializer->OnMapStart(len);\n        for (lua_pushnil(lua_); lua_next(lua_, -2) != 0;) {\n          // Push key to stack top: key value key\n          lua_pushnil(lua_);\n          lua_copy(lua_, -3, -1);\n          SerializeResult(serializer);  // pops key\n          SerializeResult(serializer);  // pop value\n        }\n        serializer->OnMapEnd();\n\n        lua_pop(lua_, 2);\n        break;\n      }\n\n      unsigned len = lua_rawlen(lua_, -1);\n\n      serializer->OnArrayStart(len);\n      for (unsigned i = 0; i < len; ++i) {\n        t = lua_rawgeti(lua_, -1, i + 1);  // push table element\n\n        // TODO: we should make sure that we have enough stack space\n        // to traverse each object. This can be done as a dry-run before doing real serialization.\n        // Once we are sure we are safe we can simplify the serialization flow and\n        // remove the error factor.\n        SerializeResult(serializer);  // pops the element\n      }\n      serializer->OnArrayEnd();\n      break;\n    }\n    case LUA_TNIL:\n      serializer->OnNil();\n      break;\n    default:\n      LOG(ERROR) << \"Unsupported type \" << lua_typename(lua_, t);\n      serializer->OnNil();\n  }\n\n  lua_pop(lua_, 1);\n}\n\nvoid Interpreter::ResetStack() {\n  lua_settop(lua_, 0);\n}\n\nint64_t Interpreter::RunGC() {\n  int64_t before_kb = lua_gc(lua_, LUA_GCCOUNT);\n  lua_gc(lua_, LUA_GCCOLLECT);\n  int64_t after_kb = lua_gc(lua_, LUA_GCCOUNT);\n  LOG_IF(DFATAL, after_kb > before_kb) << \"LUA_GCCOLLECT increase memory consumption from \"\n                                       << before_kb << \"kB to \" << after_kb << \"kB\";\n  int64_t res = (before_kb - after_kb) * 1024;\n  return std::max(int64_t(0), res);\n}\n\nvoid Interpreter::UpdateGCParameters() {\n  auto gc = absl::GetFlag(FLAGS_luagc);\n\n  std::visit(dfly::Overloaded{\n                 [](std::monostate) {},\n                 [&](const LuaGcGen& gen) { lua_gc(lua_, LUA_GCGEN, gen.minormul, gen.majormul); },\n                 [&](const LuaGcInc& inc) {\n                   lua_gc(lua_, LUA_GCINC, inc.pause, inc.stepmul, inc.stepsize);\n                 },\n             },\n             gc);\n}\n\nstd::optional<absl::FixedArray<std::string_view, 4>> Interpreter::PrepareArgs() {\n  int argc = lua_gettop(lua_);\n  /* Require at least one argument */\n  if (argc == 0) {\n    PushError(lua_, \"Please specify at least one argument for redis.call()\");\n    return std::nullopt;\n  }\n\n  size_t blob_len = 0;\n  char tmpbuf[64];\n\n  // Determine size required for backing storage for all args.\n  // Skip command name (idx=1), as its stored in a separate buffer.\n  for (int idx = 2; idx <= argc; idx++) {\n    switch (lua_type(lua_, idx)) {\n      case LUA_TNUMBER:\n        if (lua_isinteger(lua_, idx)) {\n          blob_len += absl::AlphaNum(lua_tointeger(lua_, idx)).size();\n        } else {\n          int fmt_len = absl::SNPrintF(tmpbuf, sizeof(tmpbuf), \"%.17g\", lua_tonumber(lua_, idx));\n          CHECK_GT(fmt_len, 0);\n          blob_len += fmt_len;\n        }\n        continue;\n      case LUA_TSTRING:\n        blob_len += lua_rawlen(lua_, idx) + 1;\n        continue;\n      default:\n        PushError(lua_, \"Lua redis() command arguments must be strings or integers\");\n        return std::nullopt;\n    }\n  }\n\n  absl::FixedArray<string_view, 4> args(argc);\n\n  // Copy command name to name_buffer and set it as first arg.\n  unsigned name_len = lua_rawlen(lua_, 1);\n  if (name_len >= sizeof(name_buffer_)) {\n    PushError(lua_, \"Lua redis() command name too long\");\n    return std::nullopt;\n  }\n\n  memcpy(name_buffer_, lua_tostring(lua_, 1), name_len);\n  args[0] = {name_buffer_, name_len};\n  buffer_.resize(blob_len + 4, '\\0');  // backing storage for args\n\n  char* cur = buffer_.data();\n  char* end = cur + blob_len;\n  for (int idx = 2; idx <= argc; idx++) {\n    size_t len = 0;\n    switch (lua_type(lua_, idx)) {\n      case LUA_TNUMBER:\n        if (lua_isinteger(lua_, idx)) {\n          char* next = absl::numbers_internal::FastIntToBuffer(lua_tointeger(lua_, idx), cur);\n          len = next - cur;\n        } else if (lua_isnumber(lua_, idx)) {\n          // we pass `end - cur + 1` because we do not want to skip the last character\n          // if it's the last argument.\n          int fmt_len = absl::SNPrintF(cur, end - cur + 1, \"%.17g\", lua_tonumber(lua_, idx));\n          CHECK_GT(fmt_len, 0);\n          len = fmt_len;\n        }\n        break;\n      case LUA_TSTRING:\n        len = lua_rawlen(lua_, idx);\n        memcpy(cur, lua_tostring(lua_, idx), len + 1);  // + 1 for null terminator\n    };\n\n    args[idx - 1] = {cur, len};\n    cur += len;\n  }\n\n  /* Pop all arguments from the stack, we do not need them anymore\n   * and this way we guaranty we will have room on the stack for the result. */\n  lua_pop(lua_, argc);\n  return args;\n}\n\n// Calls redis function\n// Returns false if error needs to be raised.\nbool Interpreter::CallRedisFunction(bool raise_error, bool async, ObjectExplorer* explorer,\n                                    SliceSpan args) {\n  // Calling with custom explorer is not supported with errors or async\n  DCHECK(explorer == nullptr || (!raise_error && !async));\n\n  // If no custom explorer is set, use default translator\n  optional<RedisTranslator> translator;\n  if (explorer == nullptr) {\n    translator.emplace(lua_);\n    explorer = &*translator;\n  }\n  cmd_depth_++;\n  redis_func_(CallArgs{args, &buffer_, explorer, async, raise_error, &raise_error});\n  cmd_depth_--;\n\n  // Shrink reusable buffer if it's too big.\n  if (buffer_.capacity() > 128) {\n    buffer_.clear();\n    buffer_.shrink_to_fit();\n  }\n\n  if (!translator)\n    return true;\n\n  // Raise error for regular 'call' command if needed.\n  if (raise_error && translator->HasError()) {\n    // error is already on top of stack\n    return false;\n  }\n\n  if (!async)\n    DCHECK_EQ(1, lua_gettop(lua_));\n\n  return true;\n}\n\n// Returns number of results, which is always 1 in this case.\n// Please note that lua resets the stack once the function returns so no need\n// to unwind the stack manually in the function (though lua allows doing this).\nint Interpreter::RedisGenericCommand(bool raise_error, bool async, ObjectExplorer* explorer) {\n  /* By using Lua debug hooks it is possible to trigger a recursive call\n   * to luaRedisGenericCommand(), which normally should never happen.\n   * To make this function reentrant is futile and makes it slower, but\n   * we should at least detect such a misuse, and abort. */\n  if (cmd_depth_) {\n    const char* recursion_warning =\n        \"luaRedisGenericCommand() recursive call detected. \"\n        \"Are you doing funny stuff with Lua debug hooks?\";\n    PushError(lua_, recursion_warning);\n    return 1;\n  }\n\n  if (!redis_func_) {\n    PushError(lua_, \"internal error - redis function not defined\");\n    if (raise_error) {\n      return RaiseErrorAndAbort(lua_);\n    }\n    return 1;\n  }\n\n  // IMPORTANT! all allocations within this funciton must be freed\n  // BEFORE calling RaiseErrorAndAbort in case of script error. RaiseErrorAndAbort\n  // uses longjmp which bypasses stack unwinding and skips the destruction of objects.\n  {\n    std::optional<absl::FixedArray<std::string_view, 4>> args = PrepareArgs();\n    if (args.has_value()) {\n      raise_error = !CallRedisFunction(raise_error, async, explorer, SliceSpan{*args});\n    }\n  }\n  if (!raise_error) {\n    return 1;\n  }\n  return RaiseErrorAndAbort(lua_);  // this function never returns, it unwinds the Lua call stack\n}\n\nint Interpreter::RedisCallCommand(lua_State* lua) {\n  void** ptr = static_cast<void**>(lua_getextraspace(lua));\n  return reinterpret_cast<Interpreter*>(*ptr)->RedisGenericCommand(true, false);\n}\n\nint Interpreter::RedisPCallCommand(lua_State* lua) {\n  void** ptr = static_cast<void**>(lua_getextraspace(lua));\n  return reinterpret_cast<Interpreter*>(*ptr)->RedisGenericCommand(false, false);\n}\n\nint Interpreter::RedisACallCommand(lua_State* lua) {\n  void** ptr = static_cast<void**>(lua_getextraspace(lua));\n  return reinterpret_cast<Interpreter*>(*ptr)->RedisGenericCommand(true, true);\n}\n\nint Interpreter::RedisAPCallCommand(lua_State* lua) {\n  void** ptr = static_cast<void**>(lua_getextraspace(lua));\n  return reinterpret_cast<Interpreter*>(*ptr)->RedisGenericCommand(false, true);\n}\n\nInterpreterManager::Stats& InterpreterManager::Stats::operator+=(const Stats& other) {\n  this->used_bytes += other.used_bytes;\n  this->interpreter_cnt += other.interpreter_cnt;\n  this->blocked_cnt += other.blocked_cnt;\n\n  this->force_gc_calls += other.force_gc_calls;\n  this->gc_duration_ns += other.gc_duration_ns;\n  this->interpreter_return += other.interpreter_return;\n  this->gc_freed_memory += other.gc_freed_memory;\n\n  return *this;\n}\n\nInterpreterManager::Stats& InterpreterManager::tl_stats() {\n  static thread_local Stats stats;\n  return stats;\n}\n\nInterpreter* InterpreterManager::Get() {\n  // Grow if none is available and we have unused capacity left.\n  if (available_.empty() && storage_.size() < storage_.capacity()) {\n    storage_.emplace_back();\n    return &storage_.back();\n  }\n\n  bool blocked = waker_.await([this]() { return !available_.empty(); });\n  tl_stats().blocked_cnt += (uint64_t)blocked;\n\n  Interpreter* ir = available_.back();\n  available_.pop_back();\n  return ir;\n}\n\nvoid InterpreterManager::Return(Interpreter* ir) {\n  const uint64_t max_memory_usage = absl::GetFlag(FLAGS_lua_mem_gc_threshold);\n  using namespace chrono;\n  ++tl_stats().interpreter_return;\n  tl_stats().used_bytes += ir->TakeUsedBytes();\n  if (max_memory_usage != 0 && tl_stats().used_bytes > max_memory_usage) {\n    ++tl_stats().force_gc_calls;\n    auto before = steady_clock::now();\n    tl_stats().gc_freed_memory += ir->RunGC();\n\n    VLOG(2) << \"stats_used_bytes: \" << tl_stats().used_bytes\n            << \" lua_mem_gc_threshold: \" << max_memory_usage\n            << \" force_gc_calls: \" << tl_stats().force_gc_calls\n            << \" freed_mem: \" << tl_stats().gc_freed_memory;\n\n    auto after = steady_clock::now();\n    tl_stats().gc_duration_ns += duration_cast<nanoseconds>(after - before).count();\n  }\n  if (ir >= storage_.data() && ir < storage_.data() + storage_.size()) {\n    available_.push_back(ir);\n    waker_.notify();\n  } else if (return_untracked_ > 0) {\n    return_untracked_--;\n    if (return_untracked_ == 0) {\n      reset_ec_.notify();\n    }\n  } else {\n    LOG(DFATAL) << \"Returning untracked interpreter\";\n  }\n}\n\nvoid InterpreterManager::Reset() {\n  lock_guard guard{reset_mu_};\n\n  // we perform double buffer swapping with storage and wait for the old interepreters to be\n  // returned.\n  return_untracked_ = storage_.size() - available_.size();\n\n  std::vector<Interpreter> next_storage;\n  next_storage.reserve(storage_.capacity());\n  next_storage.resize(storage_.size());\n  next_storage.swap(storage_);\n\n  available_.clear();\n  for (auto& ir : storage_) {\n    available_.push_back(&ir);\n  }\n\n  reset_ec_.await([this]() { return return_untracked_ == 0; });\n  VLOG(1) << \"InterpreterManager::Reset ended\";\n}\n\nvoid InterpreterManager::Alter(std::function<void(Interpreter*)> modf) {\n  vector<Interpreter*> taken;\n  swap(taken, available_);  // swap data because modf can preempt\n\n  for (Interpreter* ir : taken) {\n    modf(ir);\n    Return(ir);\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/interpreter.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/fixed_array.h>\n#include <absl/types/span.h>\n\n#include <functional>\n#include <optional>\n#include <string_view>\n\n#include \"util/fibers/synchronization.h\"\n\ntypedef struct lua_State lua_State;\n\nnamespace dfly {\n\nclass ObjectExplorer {\n public:\n  virtual ~ObjectExplorer() = default;\n\n  virtual void OnBool(bool b) = 0;\n  virtual void OnString(std::string_view str) = 0;\n  virtual void OnDouble(double d) = 0;\n  virtual void OnInt(int64_t val) = 0;\n  virtual void OnArrayStart(unsigned len) = 0;\n  virtual void OnArrayEnd() = 0;\n  virtual void OnNil() = 0;\n  virtual void OnStatus(std::string_view str) = 0;\n  virtual void OnError(std::string_view str) = 0;\n\n  virtual void OnMapStart(unsigned len) {\n    OnArrayStart(len * 2);\n  }\n\n  virtual void OnMapEnd() {\n    OnArrayEnd();\n  }\n};\n\nclass Interpreter {\n public:\n  using SliceSpan = absl::Span<const std::string_view>;\n\n  // Arguments received from redis.call\n  struct CallArgs {\n    // Full arguments, including cmd name.\n    SliceSpan args;\n\n    // Pointer to backing storage for args (excluding cmd name).\n    // Moving can invalidate arg slice pointers. Moved by async to re-use buffer.\n    std::string* buffer;\n\n    ObjectExplorer* translator;\n\n    bool async;        // async by acall\n    bool error_abort;  // abort on errors (not pcall)\n\n    // The function can request an abort due to an error, even if error_abort is false.\n    // It happens when async cmds are flushed and result in an uncatched error.\n    bool* requested_abort;\n  };\n\n  using RedisFunc = std::function<void(CallArgs)>;\n\n  Interpreter();\n  ~Interpreter();\n\n  Interpreter(const Interpreter&) = delete;\n  void operator=(const Interpreter&) = delete;\n\n  Interpreter(Interpreter&&) = default;\n  Interpreter& operator=(Interpreter&&) = default;\n\n  // Note: We leak the state for now.\n  // Production code should not access this method.\n  lua_State* lua() {\n    return lua_;\n  }\n\n  enum AddResult {\n    ADD_OK = 0,\n    ALREADY_EXISTS = 1,\n    COMPILE_ERR = 2,\n  };\n\n  // Add function with sha and body to interpreter.\n  AddResult AddFunction(std::string_view sha, std::string_view body, std::string* error);\n\n  int64_t TakeUsedBytes() {\n    return std::exchange(used_bytes_, 0);\n  }\n\n  bool Exists(std::string_view sha) const;\n\n  enum RunResult {\n    RUN_OK = 0,\n    NOT_EXISTS = 1,\n    RUN_ERR = 2,\n  };\n\n  void SetGlobalArray(const char* name, SliceSpan args);\n\n  // Runs already added function sha returned by a successful call to AddFunction().\n  // Returns: true if the call succeeded, otherwise fills error and returns false.\n  // sha must be 40 char length.\n  RunResult RunFunction(std::string_view sha, std::string* err);\n\n  // Checks whether the result is safe to serialize.\n  // Should fit 2 conditions:\n  // 1. Be the only value on the stack.\n  // 2. Should have depth of no more than 128.\n  bool IsResultSafe() const;\n\n  void SerializeResult(ObjectExplorer* serializer);\n\n  void ResetStack();\n\n  // run gc and returns size of freed memory in bytes\n  int64_t RunGC();\n\n  void UpdateGCParameters();\n\n  // fp must point to buffer with at least 41 chars.\n  // fp[40] will be set to '\\0'.\n  static void FuncSha1(std::string_view body, char* fp);\n\n  static std::optional<std::string> DetectPossibleAsyncCalls(std::string_view body);\n\n  template <typename U> void SetRedisFunc(U&& u) {\n    redis_func_ = std::forward<U>(u);\n  }\n\n  // Invoke command with arguments from lua stack, given options and possibly custom explorer\n  int RedisGenericCommand(bool raise_error, bool async, ObjectExplorer* explorer = nullptr);\n\n private:\n  // Returns true if function was successfully added,\n  // otherwise returns false and sets the error.\n  bool AddInternal(const char* f_id, std::string_view body, std::string* error);\n  bool IsTableSafe() const;\n\n  static int RedisCallCommand(lua_State* lua);\n  static int RedisPCallCommand(lua_State* lua);\n  static int RedisACallCommand(lua_State* lua);\n  static int RedisAPCallCommand(lua_State* lua);\n\n  std::optional<absl::FixedArray<std::string_view, 4>> PrepareArgs();\n  bool CallRedisFunction(bool raise_error, bool async, ObjectExplorer* explorer, SliceSpan args);\n\n  lua_State* lua_;\n  unsigned cmd_depth_ = 0;\n  RedisFunc redis_func_;\n  std::string buffer_;\n  int64_t used_bytes_ = 0;\n  char name_buffer_[32];  // backing storage for cmd name\n};\n\n// Manages an internal interpreter pool. This allows multiple connections residing on the same\n// thread to run multiple lua scripts in parallel.\nclass InterpreterManager {\n public:\n  struct Stats {\n    Stats& operator+=(const Stats& other);\n\n    uint64_t used_bytes = 0;\n    uint64_t interpreter_cnt = 0;\n    uint64_t blocked_cnt = 0;\n    uint64_t force_gc_calls = 0;\n    uint64_t gc_duration_ns = 0;\n    uint64_t interpreter_return = 0;\n    int64_t gc_freed_memory = 0;\n  };\n\n public:\n  InterpreterManager(unsigned num) : waker_{}, available_{}, storage_{} {\n    // We pre-allocate the backing storage during initialization and\n    // start storing pointers to slots in the available vector.\n    storage_.reserve(num);\n  }\n\n  // Borrow interpreter. Always return it after usage.\n  Interpreter* Get();\n  void Return(Interpreter*);\n\n  // Clear all interpreters, keeps capacity. Waits until all are returned.\n  void Reset();\n\n  // Run on all unused interpreters. Those are marked as used at once, so the callback can preempt\n  void Alter(std::function<void(Interpreter*)> modf);\n\n  static Stats& tl_stats();\n\n private:\n  util::fb2::EventCount waker_, reset_ec_;\n  std::vector<Interpreter*> available_;\n  std::vector<Interpreter> storage_;\n\n  util::fb2::Mutex reset_mu_;  // Acts as a singleton.\n\n  unsigned return_untracked_ = 0;  // Number of returned interpreters during reset.\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/interpreter_polyfill.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n// This header contains implementations of deprecated, removed or renamed lua functions.\n\n#pragma once\n\nextern \"C\" {\n#include <lauxlib.h>\n#include <lua.h>\n#include <lualib.h>\n\n// TODO: Fix checktab\n#define aux_getn(L, n, w) (luaL_len(L, n))\n\nLUA_API void lua_len(lua_State* L, int idx);\n\nstatic int polyfill_table_getn(lua_State* L) {\n  lua_len(L, 1);\n  return 1;\n}\n\nstatic int polyfill_table_setn(lua_State* L) {\n  // From Lua 5.1, ltablib.c\n  luaL_checktype(L, 1, LUA_TTABLE);\n  luaL_error(L, \"setn is obsolete\");\n  lua_pushvalue(L, 1);\n  return 1;\n}\n\nstatic int polyfill_table_foreach(lua_State* L) {\n  // From Lua 5.1, ltablib.c\n  luaL_checktype(L, 1, LUA_TTABLE);\n  luaL_checktype(L, 2, LUA_TFUNCTION);\n  lua_pushnil(L); /* first key */\n  while (lua_next(L, 1)) {\n    lua_pushvalue(L, 2);  /* function */\n    lua_pushvalue(L, -3); /* key */\n    lua_pushvalue(L, -3); /* value */\n    lua_call(L, 2, 1);\n    if (!lua_isnil(L, -1))\n      return 1;\n    lua_pop(L, 2); /* remove value and result */\n  }\n  return 0;\n}\n\nstatic int polyfill_table_foreachi(lua_State* L) {\n  luaL_checktype(L, 1, LUA_TTABLE);  // Check type here because aux_getn is stripped\n  // From Lua 5.1, ltablib.c\n  int i;\n  int n = aux_getn(L, 1, 0b11);\n  luaL_checktype(L, 2, LUA_TFUNCTION);\n  for (i = 1; i <= n; i++) {\n    lua_pushvalue(L, 2);   /* function */\n    lua_pushinteger(L, i); /* 1st argument */\n    lua_rawgeti(L, 1, i);  /* 2nd argument */\n    lua_call(L, 2, 1);\n    if (!lua_isnil(L, -1))\n      return 1;\n    lua_pop(L, 1); /* remove nil result */\n  }\n  return 0;\n}\n\nstatic void register_polyfills(lua_State* lua) {\n  lua_getglobal(lua, \"table\");\n\n  // unpack was a global function until Lua 5.2\n  lua_getfield(lua, -1, \"unpack\");\n  lua_setglobal(lua, \"unpack\");\n\n  // table.getn - removed, length operator # should be used instead\n  lua_pushcfunction(lua, polyfill_table_getn);\n  lua_setfield(lua, -2, \"getn\");\n\n  // table.setn - removed, freely resizing a table is no longer possible\n  lua_pushcfunction(lua, polyfill_table_setn);\n  lua_setfield(lua, -2, \"setn\");\n\n  // table.getn - removed, instead the length operator # should be used\n  lua_pushcfunction(lua, polyfill_table_foreach);\n  lua_setfield(lua, -2, \"foreach\");\n\n  // table.forachi - removed, use for loops should be used instead\n  lua_pushcfunction(lua, polyfill_table_foreachi);\n  lua_setfield(lua, -2, \"foreachi\");\n\n  lua_remove(lua, -1);\n}\n}\n"
  },
  {
    "path": "src/core/interpreter_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/interpreter.h\"\n\nextern \"C\" {\n#include <lauxlib.h>\n#include <lua.h>\n}\n\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_replace.h>\n#include <gmock/gmock.h>\n#include <mimalloc.h>\n\n#include <thread>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly {\nusing namespace std;\n\nclass TestSerializer : public ObjectExplorer {\n public:\n  string res;\n\n  void OnBool(bool b) final {\n    absl::StrAppend(&res, \"bool(\", b, \") \");\n  }\n\n  void OnString(std::string_view str) final {\n    absl::StrAppend(&res, \"str(\", str, \") \");\n  }\n\n  void OnDouble(double d) final {\n    absl::StrAppend(&res, \"d(\", d, \") \");\n  }\n\n  void OnInt(int64_t val) final {\n    absl::StrAppend(&res, \"i(\", val, \") \");\n  }\n\n  void OnArrayStart(unsigned len) final {\n    absl::StrAppend(&res, \"[\");\n  }\n\n  void OnArrayEnd() final {\n    if (res.back() == ' ')\n      res.pop_back();\n\n    absl::StrAppend(&res, \"] \");\n  }\n\n  void OnNil() final {\n    absl::StrAppend(&res, \"nil \");\n  }\n\n  void OnMapStart(unsigned len) final {\n    absl::StrAppend(&res, \"{\");\n  }\n\n  void OnMapEnd() final {\n    if (res.back() == ' ')\n      res.pop_back();\n    absl::StrAppend(&res, \"} \");\n  }\n\n  void OnStatus(std::string_view str) {\n    absl::StrAppend(&res, \"status(\", str, \") \");\n  }\n\n  void OnError(std::string_view str) {\n    absl::StrAppend(&res, \"err(\", str, \") \");\n  }\n};\n\nusing SliceSpan = Interpreter::SliceSpan;\nclass InterpreterTest : public ::testing::Test {\n protected:\n  InterpreterTest() {\n    // configure redis lib zmalloc which requires mimalloc heap to work.\n    auto* tlh = mi_heap_get_backing();\n    init_zmalloc_threadlocal(tlh);\n  }\n\n  lua_State* lua() {\n    return intptr_.lua();\n  }\n\n  void RunInline(string_view buf, const char* name, unsigned num_results = 0) {\n    CHECK_EQ(0, luaL_loadbuffer(lua(), buf.data(), buf.size(), name));\n    CHECK_EQ(0, lua_pcall(lua(), 0, num_results, 0));\n  }\n\n  void SetGlobalArray(const char* name, const vector<string_view>& vec);\n\n  // returns true if script run successfully.\n  bool Execute(string_view script);\n\n  Interpreter intptr_;\n  TestSerializer ser_;\n  string error_;\n  vector<unique_ptr<string>> strings_;\n};\n\nvoid InterpreterTest::SetGlobalArray(const char* name, const vector<string_view>& vec) {\n  vector<string_view> slices(vec.size());\n  for (size_t i = 0; i < vec.size(); ++i) {\n    strings_.emplace_back(new string(vec[i]));\n    slices[i] = string_view{*strings_.back()};\n  }\n  intptr_.SetGlobalArray(name, SliceSpan{slices});\n}\n\nbool InterpreterTest::Execute(string_view script) {\n  char sha_buf[64];\n  Interpreter::FuncSha1(script, sha_buf);\n  string_view sha{sha_buf, std::strlen(sha_buf)};\n\n  string result;\n  Interpreter::AddResult add_res = intptr_.AddFunction(sha, script, &result);\n  if (add_res == Interpreter::COMPILE_ERR) {\n    error_ = result;\n    return false;\n  }\n\n  Interpreter::RunResult run_res = intptr_.RunFunction(sha, &error_);\n  if (run_res != Interpreter::RUN_OK) {\n    return false;\n  }\n\n  ser_.res.clear();\n  intptr_.SerializeResult(&ser_);\n  ser_.res.pop_back();\n\n  return true;\n}\n\nTEST_F(InterpreterTest, Basic) {\n  RunInline(R\"(\n    function foo(n)\n      return n,n+1\n    end)\",\n            \"code1\");\n\n  int type = lua_getglobal(lua(), \"foo\");\n  ASSERT_EQ(LUA_TFUNCTION, type);\n  lua_pushnumber(lua(), 42);\n  lua_pcall(lua(), 1, 2, 0);\n  int val1 = lua_tointeger(lua(), -1);\n  int val2 = lua_tointeger(lua(), -2);\n  lua_pop(lua(), 2);\n\n  EXPECT_EQ(43, val1);\n  EXPECT_EQ(42, val2);\n  EXPECT_EQ(0, lua_gettop(lua()));\n\n  lua_pushstring(lua(), \"foo\");\n  EXPECT_EQ(3, lua_rawlen(lua(), 1));\n  lua_pop(lua(), 1);\n\n  RunInline(\"return {nil, 'b'}\", \"code2\", 1);\n  ASSERT_EQ(1, lua_gettop(lua()));\n  LOG(INFO) << lua_typename(lua(), lua_type(lua(), -1));\n\n  ASSERT_TRUE(lua_istable(lua(), -1));\n  ASSERT_EQ(2, lua_rawlen(lua(), -1));\n  lua_len(lua(), -1);\n  ASSERT_EQ(2, lua_tointeger(lua(), -1));\n  lua_pop(lua(), 1);\n\n  lua_pushnil(lua());\n  while (lua_next(lua(), -2)) {\n    /* uses 'key' (at index -2) and 'value' (at index -1) */\n    int kt = lua_type(lua(), -2);\n    int vt = lua_type(lua(), -1);\n    LOG(INFO) << \"k/v : \" << lua_typename(lua(), kt) << \"/\" << lua_tonumber(lua(), -2) << \" \"\n              << lua_typename(lua(), vt);\n    lua_pop(lua(), 1);\n  }\n}\n\nTEST_F(InterpreterTest, UnknownFunc) {\n  string_view code(R\"(\n    function foo(n)\n      return myunknownfunc(1, n)\n    end)\");\n\n  CHECK_EQ(0, luaL_loadbuffer(lua(), code.data(), code.size(), \"code1\"));\n  CHECK_EQ(0, lua_pcall(lua(), 0, 0, 0));\n  int type = lua_getglobal(lua(), \"myunknownfunc\");\n  ASSERT_EQ(LUA_TNIL, type);\n}\n\nTEST_F(InterpreterTest, Stack) {\n  RunInline(R\"(\nlocal x = {}\nfor i=1,127 do\n   x = {x}\nend\nreturn x\n)\",\n            \"code1\", 1);\n\n  ASSERT_EQ(1, lua_gettop(lua()));\n  ASSERT_TRUE(intptr_.IsResultSafe());\n  lua_pop(lua(), 1);\n\n  RunInline(R\"(\nlocal x = {}\nfor i=1,128 do\n   x = {x}\nend\nreturn x\n)\",\n            \"code1\", 1);\n\n  ASSERT_EQ(1, lua_gettop(lua()));\n  ASSERT_FALSE(intptr_.IsResultSafe());\n}\n\nTEST_F(InterpreterTest, Add) {\n  const char* s1 = \"return 0\";\n  const char* s2 = \"foobar\";\n\n  char sha_buf1[64], sha_buf2[64];\n  Interpreter::FuncSha1(s1, sha_buf1);\n  Interpreter::FuncSha1(s2, sha_buf2);\n  string_view sha1{sha_buf1, std::strlen(sha_buf1)};\n  string_view sha2{sha_buf2, std::strlen(sha_buf2)};\n\n  string err;\n\n  EXPECT_EQ(Interpreter::ADD_OK, intptr_.AddFunction(sha1, \"return 0\", &err));\n  EXPECT_EQ(0, lua_gettop(lua()));\n\n  EXPECT_EQ(Interpreter::COMPILE_ERR, intptr_.AddFunction(sha2, \"foobar\", &err));\n  EXPECT_THAT(err, testing::HasSubstr(\"syntax error\"));\n  EXPECT_EQ(0, lua_gettop(lua()));\n\n  EXPECT_TRUE(intptr_.Exists(sha1));\n}\n\n// Test cases taken from scripting.tcl\nTEST_F(InterpreterTest, Execute) {\n  ASSERT_TRUE(Execute(\"return 42\"));\n  EXPECT_EQ(\"i(42)\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"return 'hello'\"));\n  EXPECT_EQ(\"str(hello)\", ser_.res);\n\n  // Breaks compatibility.\n  EXPECT_TRUE(Execute(\"return 100.5\"));\n  EXPECT_EQ(\"d(100.5)\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"return true\"));\n  EXPECT_EQ(\"bool(1)\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"return false\"));\n  EXPECT_EQ(\"bool(0)\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"return {ok='fine'}\"));\n  EXPECT_EQ(\"status(fine)\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"return {err= 'bla'}\"));\n  EXPECT_EQ(\"err(bla)\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"return {1, 2, nil, 3}\"));\n  EXPECT_EQ(\"[i(1) i(2) nil i(3)]\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"return {1,2,3,'ciao', {1,2}}\"));\n  EXPECT_EQ(\"[i(1) i(2) i(3) str(ciao) [i(1) i(2)]]\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"return {map={a=1,b=2}}\"));\n  EXPECT_THAT(ser_.res, testing::AnyOf(\"{str(a) i(1) str(b) i(2)}\", \"{str(b) i(2) str(a) i(1)}\"));\n}\n\nTEST_F(InterpreterTest, Call) {\n  auto cb = [](auto ca) {\n    auto* reply = ca.translator;\n    auto span = ca.args;\n    CHECK_GE(span.size(), 1u);\n    string_view cmd{span[0].data(), span[0].size()};\n    if (cmd == \"string\") {\n      reply->OnString(\"foo\");\n    } else if (cmd == \"double\") {\n      reply->OnDouble(3.1415);\n    } else if (cmd == \"int\") {\n      reply->OnInt(42);\n    } else if (cmd == \"err\") {\n      reply->OnError(\"myerr\");\n    } else if (cmd == \"status\") {\n      reply->OnStatus(\"mystatus\");\n    } else {\n      LOG(FATAL) << \"Invalid param\";\n    }\n  };\n\n  intptr_.SetRedisFunc(cb);\n  ASSERT_TRUE(Execute(\"local var = redis.pcall('string'); return {type(var), var}\"));\n  EXPECT_EQ(\"[str(string) str(foo)]\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"local var = redis.pcall('double'); return {type(var), var}\"));\n  EXPECT_EQ(\"[str(number) d(3.1415)]\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"local var = redis.pcall('int'); return {type(var), var}\"));\n  EXPECT_EQ(\"[str(number) i(42)]\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"local var = redis.pcall('err'); return {type(var), var}\"));\n  EXPECT_EQ(\"[str(table) err(myerr)]\", ser_.res);\n\n  EXPECT_TRUE(Execute(\"local var = redis.pcall('status'); return {type(var), var}\"));\n  EXPECT_EQ(\"[str(table) status(mystatus)]\", ser_.res);\n}\n\nTEST_F(InterpreterTest, CallArray) {\n  auto cb = [](auto ca) {\n    auto* reply = ca.translator;\n    reply->OnArrayStart(2);\n    reply->OnArrayStart(1);\n    reply->OnArrayStart(2);\n    reply->OnNil();\n    reply->OnString(\"s2\");\n    reply->OnArrayEnd();\n    reply->OnArrayEnd();\n    reply->OnInt(42);\n    reply->OnArrayEnd();\n  };\n\n  intptr_.SetRedisFunc(cb);\n  EXPECT_TRUE(Execute(\"local var = redis.call(''); return {type(var), var}\"));\n  EXPECT_EQ(\"[str(table) [[[bool(0) str(s2)]] i(42)]]\", ser_.res);\n}\n\nTEST_F(InterpreterTest, ArgKeys) {\n  vector<string> vec_arr{};\n  vector<string_view> slices;\n  SetGlobalArray(\"ARGV\", {\"foo\", \"bar\"});\n  SetGlobalArray(\"KEYS\", {\"key1\", \"key2\"});\n  EXPECT_TRUE(Execute(\"return {ARGV[1], KEYS[1], KEYS[2]}\"));\n  EXPECT_EQ(\"[str(foo) str(key1) str(key2)]\", ser_.res);\n\n  SetGlobalArray(\"INTKEYS\", {\"123456\", \"1\"});\n  EXPECT_TRUE(Execute(\"return INTKEYS[1] + 0\")) << error_;\n  EXPECT_EQ(\"i(123456)\", ser_.res);\n}\n\nTEST_F(InterpreterTest, Modules) {\n  // cjson module\n  EXPECT_TRUE(Execute(\"return cjson.encode({1, 2, 3})\"));\n  EXPECT_EQ(\"str([1,2,3])\", ser_.res);\n  EXPECT_TRUE(Execute(\"return cjson.decode('{\\\"a\\\": 1}')['a']\"));\n  EXPECT_EQ(\"i(1)\", ser_.res);\n\n  // cmsgpack module\n  EXPECT_TRUE(Execute(\"return cmsgpack.pack('ok', true)\"));\n  EXPECT_EQ(\"str(\\xA2ok\\xC3)\", ser_.res);\n\n  // bit module\n  EXPECT_TRUE(Execute(\"return bit.bor(8, 4, 5)\"));\n  EXPECT_EQ(\"i(13)\", ser_.res);\n\n  // struct module\n  EXPECT_TRUE(Execute(\"return struct.pack('bbc4', 1, 2, 'test')\"));\n  EXPECT_EQ(\"str(\\x1\\x2test)\", ser_.res);\n}\n\n// Check compatibility with Lua 5.1\nTEST_F(InterpreterTest, Compatibility) {\n  // unpack is no longer global\n  EXPECT_TRUE(Execute(\"return unpack{1,2,3}\"));\n  EXPECT_EQ(\"i(1)\", ser_.res);\n\n  string_view test_foreach_template =\n      \"local t = {1,'two',3;four='yes'}; local out = {};\"\n      \"table.{TESTF} (t, function(k, v) table.insert(out, {k, v}) end); \"\n      \"return out; \";\n\n  // table.foreach was removed\n  string test_foreach = absl::StrReplaceAll(test_foreach_template, {{\"{TESTF}\", \"foreach\"}});\n  EXPECT_TRUE(Execute(test_foreach));\n  EXPECT_EQ(\"[[i(1) i(1)] [i(2) str(two)] [i(3) i(3)] [str(four) str(yes)]]\", ser_.res);\n\n  // table.foreachi was removed\n  string test_foreachi = absl::StrReplaceAll(test_foreach_template, {{\"{TESTF}\", \"foreachi\"}});\n  EXPECT_TRUE(Execute(test_foreachi));\n  EXPECT_EQ(\"[[i(1) i(1)] [i(2) str(two)] [i(3) i(3)]]\", ser_.res);\n\n  EXPECT_FALSE(Execute(\"table.foreachi('not-a-table', print);\"));  // check invalid args\n\n  // table.getn was replaced with length operator\n  EXPECT_TRUE(Execute(\"return table.getn{1, 2, 3};\"));\n  EXPECT_EQ(\"i(3)\", ser_.res);\n\n  // table.setn was removed, resizing is no longer needed, it thows an error\n  EXPECT_FALSE(Execute(\"local t = {}; local a = 1; table.setn(t, 100); return a+123;\"));\n}\n\nTEST_F(InterpreterTest, AsyncReplacement) {\n  const string_view kCases[] = {\n      R\"(\n      redis.[A]call('INCR', 'A')\n      redis.[A]call('INCR', 'A')\n    )\",\n      R\"(\n      function test()\n        redis.[A]call('INCR', 'A')\n      end\n    )\",\n      R\"(\n      local b = redis.call('GET', 'A') + redis.call('GET', 'B')\n    )\",\n      R\"(\n      if redis.call('EXISTS', 'A') then redis.[A]call('SET', 'B', 1) end\n    )\",\n      R\"(\n      while redis.call('EXISTS', 'A') do redis.[A]call('SET', 'B', 1) end\n    )\",\n      R\"(\n      while\n      redis.call('EXISTS', 'A') do\n        print(\"OK\")\n      end\n    )\",\n      R\"(\n      print(redis.call('GET', 'A'))\n    )\",\n      R\"(\n      local table = {\n        redis.call('GET', 'A')\n      }\n    )\",\n      R\"(\n      while true do\n        redis.[A]call('INCR', 'A')\n      end\n    )\",\n      R\"(\n      if 1 + -- now this is a tricky comment\n        redis.call('GET', 'A')\n        > 0\n      then end\n    )\",\n      R\"(\n      print('Output'\n      ..\n      redis.call('GET', 'A')\n      )\n    )\",\n      R\"(\n      while\n      0 < -- we have a comment here unfortunately\n      redis.call('GET', 'A')\n      then end\n    )\",\n      R\"(\n    while\n    -- we have\n    -- a tricky\n    -- multiline comment\n    redis.call('EXISTS')\n    do end\n    )\",\n      R\"(\n    --[[ WE SKIP COMMENT BLOCKS FOR NOW ]]\n    redis.call('ECHO', 'TEST')\n    )\"};\n\n  for (auto test : kCases) {\n    auto expected = absl::StrReplaceAll(test, {{\"[A]\", \"a\"}});\n    auto input = absl::StrReplaceAll(test, {{\"[A]\", \"\"}});\n\n    auto result = Interpreter::DetectPossibleAsyncCalls(input);\n    string_view output = result ? *result : input;\n\n    EXPECT_EQ(expected, output);\n  }\n}\n\nTEST_F(InterpreterTest, ReplicateCommands) {\n  EXPECT_TRUE(Execute(\"return redis.replicate_commands()\"));\n  EXPECT_EQ(\"i(1)\", ser_.res);\n  EXPECT_TRUE(Execute(\"redis.replicate_commands()\"));\n  EXPECT_EQ(\"nil\", ser_.res);\n}\n\nTEST_F(InterpreterTest, Log) {\n  EXPECT_FALSE(Execute(R\"(redis.log('nonsense', 'nonsense'))\"));\n  EXPECT_THAT(error_, testing::HasSubstr(\"First argument must be a number (log level).\"));\n  EXPECT_TRUE(Execute(R\"(redis.log(redis.LOG_WARNING, 'warn'))\"));\n  EXPECT_EQ(\"nil\", ser_.res);\n  EXPECT_FALSE(Execute(R\"(redis.log(4))\"));\n  EXPECT_THAT(error_, testing::HasSubstr(\"requires two arguments or more\"));\n}\n\nTEST_F(InterpreterTest, Robust) {\n  EXPECT_FALSE(Execute(R\"(eval \"local a = {}\n      setmetatable(a,{__index=function() foo() end})\n      return a\")\"));\n  EXPECT_EQ(\"\", ser_.res);\n}\n\nTEST_F(InterpreterTest, Unpack) {\n  auto cb = [](Interpreter::CallArgs ca) {\n    auto* reply = ca.translator;\n    reply->OnInt(1);\n  };\n  intptr_.SetRedisFunc(cb);\n  ASSERT_TRUE(lua_checkstack(lua(), 7000));\n  bool res = Execute(R\"(\nlocal N = 7000\n\nlocal stringTable = {}\nfor i = 1, N do\n    stringTable[i] = \"String \" .. i\nend\n  return redis.pcall('func', unpack(stringTable))\n)\");\n\n  ASSERT_TRUE(res) << error_;\n  EXPECT_EQ(\"i(1)\", ser_.res);\n}\n\nTEST_F(InterpreterTest, AvoidIntOverflow) {\n  EXPECT_TRUE(Execute(\"return bit.tohex(65535, -2147483648)\"));\n  EXPECT_EQ(\"str(0000FFFF)\", ser_.res);\n}\n\nTEST_F(InterpreterTest, LuaIntOverflow) {\n  EXPECT_FALSE(Execute(\"EVAL \\\"struct.pack('>I2147483648', '10')\\\" 0\"));\n}\n\nTEST_F(InterpreterTest, LuaGcStatistic) {\n  InterpreterManager im(1);\n  auto* interpreter = im.Get();\n\n  std::string_view keys[] = {\"key1\", \"key2\", \"key3\", \"key4\", \"key5\", \"key6\", \"key7\"};\n  interpreter->SetGlobalArray(\"KEYS\", SliceSpan{keys});\n\n  auto cb = [](Interpreter::CallArgs ca) {\n    auto* reply = ca.translator;\n    reply->OnInt(1);\n  };\n  interpreter->SetRedisFunc(cb);\n  // next script generate several big values and set them to the keys\n  // after the script is finished, GM isn't called for all values and\n  // in the most cases we have more than 300k allocated memory\n  // that will be cleaned later in the separate thread\n  std::string script = R\"(\n        for i = 1, 7 do\n          local str = string.rep(i, 1024 * 100)\n          redis.call('SET', KEYS[1], str .. str)\n        end\n       )\";\n\n  char sha_buf[64];\n  Interpreter::FuncSha1(script, sha_buf);\n  string_view sha{sha_buf, std::strlen(sha_buf)};\n\n  string result;\n  Interpreter::AddResult add_res = interpreter->AddFunction(sha, script, &result);\n  EXPECT_EQ(Interpreter::ADD_OK, add_res);\n\n  // When script is executed in the most cases we see that not all memory was deallocated\n  // immediately and can be deallocated later\n  Interpreter::RunResult run_res = interpreter->RunFunction(sha, &error_);\n  EXPECT_EQ(Interpreter::RUN_OK, run_res);\n\n  // check that after script is finished not the all memory was deallocated\n  uint64_t used_bytes = InterpreterManager::tl_stats().used_bytes;\n  EXPECT_GE(used_bytes, 0);\n\n  auto force_gc_calls = InterpreterManager::tl_stats().force_gc_calls;\n  // we need return interpreter to update statistic\n  // force_gc_calls shouldn't be called\n  im.Return(interpreter);\n  EXPECT_EQ(force_gc_calls, InterpreterManager::tl_stats().force_gc_calls);\n  EXPECT_LE(used_bytes, InterpreterManager::tl_stats().used_bytes);\n\n  used_bytes = InterpreterManager::tl_stats().used_bytes;\n\n  // we get the same interpeter again to call GC in separate thread\n  auto* new_interpreter = im.Get();\n  EXPECT_EQ(interpreter, new_interpreter);\n\n  // check that even if memory is deallocated in separate thread our statistic is correct\n  std::thread t([&] {\n    interpreter->RunGC();\n    EXPECT_EQ(InterpreterManager::tl_stats().used_bytes, 0);\n  });\n  t.join();\n\n  im.Return(interpreter);\n  EXPECT_GE(used_bytes, InterpreterManager::tl_stats().used_bytes);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/json/CMakeLists.txt",
    "content": "gen_flex(jsonpath_lexer)\ngen_bison(jsonpath_grammar)\n\ncur_gen_dir(gen_dir)\n\nadd_library(jsonpath lexer_impl.cc driver.cc path.cc\n            ${gen_dir}/jsonpath_lexer.cc ${gen_dir}/jsonpath_grammar.cc json_object.cc\n            detail/jsoncons_dfs.cc detail/flat_dfs.cc\n            detail/interned_blob.cc\n            detail/interned_string.cc)\ntarget_link_libraries(jsonpath base absl::strings TRDP::reflex TRDP::jsoncons TRDP::flatbuffers dfly_page_usage)\n\nhelio_cxx_test(jsonpath_test jsonpath dfly_core LABELS DFLY)\nhelio_cxx_test(json_test jsonpath TRDP::jsoncons LABELS DFLY)\nhelio_cxx_test(interned_blob_test dfly_core TRDP::mimalloc2 LABELS DFLY)\n"
  },
  {
    "path": "src/core/json/detail/common.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\nnamespace dfly::json::detail {\nenum MatchStatus {\n  OUT_OF_BOUNDS,\n  MISMATCH,\n};\n\n}\n"
  },
  {
    "path": "src/core/json/detail/flat_dfs.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/json/detail/flat_dfs.h\"\n\n#include \"base/logging.h\"\n\nnamespace dfly::json::detail {\n\nusing namespace std;\nusing nonstd::make_unexpected;\n\ninline bool IsRecursive(flexbuffers::Type type) {\n  return type == flexbuffers::FBT_MAP || type == flexbuffers::FBT_VECTOR;\n}\n\n// Binary search of a key, returns UINT_MAX if not found.\nunsigned FindByKey(const flexbuffers::TypedVector& keys, const char* elem) {\n  unsigned s = 0, end = keys.size();\n  while (s < end) {\n    unsigned mid = (s + end) / 2;\n    flexbuffers::String mid_elem = keys[mid].AsString();\n    int res = strcmp(elem, mid_elem.c_str());\n    if (res < 0) {\n      end = mid;\n    } else if (res > 0) {\n      s = mid + 1;\n    } else {\n      return mid;\n    }\n  }\n  return UINT_MAX;\n}\n\nauto FlatDfsItem::Init(const PathSegment& segment) -> AdvanceResult {\n  switch (segment.type()) {\n    case SegmentType::IDENTIFIER: {\n      if (obj().IsMap()) {\n        auto map = obj().AsMap();\n        flexbuffers::TypedVector keys = map.Keys();\n        unsigned index = FindByKey(keys, segment.identifier().c_str());\n        if (index == UINT_MAX) {\n          return Exhausted();\n        }\n        state_.emplace(index, index);\n        return DepthState{obj().AsVector()[index], depth_state_.second + 1};\n      }\n      break;\n    }\n    case SegmentType::INDEX: {\n      auto vec = obj().AsVector();\n      IndexExpr index = segment.index().Normalize(vec.size());\n      if (index.Empty()) {\n        return make_unexpected(OUT_OF_BOUNDS);\n      }\n\n      state_ = index;\n      return Next(vec[index.first]);\n      break;\n    }\n\n    case SegmentType::DESCENT:\n      if (segment_step_ == 1) {\n        // first time, branching to return the same object but with the next segment,\n        // exploring the path of ignoring the DESCENT operator.\n        // Also, shift the state (segment_step) to bypass this branch next time.\n        segment_step_ = 0;\n        return DepthState{depth_state_.first, depth_state_.second + 1};\n      }\n\n      // Now traverse all the children but do not progress with segment path.\n      // This is why segment_step_ is set to 0.\n      [[fallthrough]];\n    case SegmentType::WILDCARD: {\n      auto vec = obj().AsVector();\n      if (vec.size() == 0) {\n        return Exhausted();\n      }\n      state_ = IndexExpr::All();\n      return Next(vec[0]);\n    } break;\n\n    default:\n      LOG(DFATAL) << \"Unknown segment \" << SegmentName(segment.type());\n  }  // end switch\n\n  return nonstd::make_unexpected(MISMATCH);\n}\n\nauto FlatDfsItem::Advance(const PathSegment& segment) -> AdvanceResult {\n  if (!state_) {\n    return Init(segment);\n  }\n\n  ++state_->first;\n  if (state_->Empty())\n    return Exhausted();\n  auto vec = obj().AsVector();\n\n  return Next(vec[state_->first]);\n}\n\nFlatDfs FlatDfs::Traverse(absl::Span<const PathSegment> path, const flexbuffers::Reference root,\n                          const PathFlatCallback& callback) {\n  DCHECK(!path.empty());\n  FlatDfs dfs;\n\n  if (path.size() == 1) {\n    dfs.PerformStep(path[0], root, callback);\n    return dfs;\n  }\n\n  using ConstItem = FlatDfsItem;\n  vector<ConstItem> stack;\n  stack.emplace_back(root);\n\n  do {\n    unsigned segment_index = stack.back().segment_idx();\n    const auto& path_segment = path[segment_index];\n\n    // init or advance the current object\n    ConstItem::AdvanceResult res = stack.back().Advance(path_segment);\n    if (res && !res->first.IsNull()) {\n      const flexbuffers::Reference next = res->first;\n      DVLOG(2) << \"Handling now \" << next.GetType() << \" \" << next.ToString();\n\n      // We descent only if next is object or an array.\n      if (IsRecursive(next.GetType())) {\n        unsigned next_seg_id = res->second;\n\n        if (next_seg_id + 1 < path.size()) {\n          stack.emplace_back(next, next_seg_id);\n        } else {\n          // terminal step\n          // TODO: to take into account MatchStatus\n          // for `json.set foo $.a[10]` or for `json.set foo $.*.b`\n          dfs.PerformStep(path[next_seg_id], next, callback);\n        }\n      }\n    } else {\n      stack.pop_back();\n    }\n  } while (!stack.empty());\n\n  return dfs;\n}\n\nauto FlatDfs::PerformStep(const PathSegment& segment, const flexbuffers::Reference node,\n                          const PathFlatCallback& callback) -> nonstd::expected<void, MatchStatus> {\n  switch (segment.type()) {\n    case SegmentType::IDENTIFIER: {\n      if (!node.IsMap())\n        return make_unexpected(MISMATCH);\n      auto map = node.AsMap();\n      flexbuffers::Reference value = map[segment.identifier().c_str()];\n      if (!value.IsNull()) {\n        DoCall(callback, string_view{segment.identifier()}, value);\n      }\n    } break;\n    case SegmentType::INDEX: {\n      if (!node.IsUntypedVector())\n        return make_unexpected(MISMATCH);\n      auto vec = node.AsVector();\n      IndexExpr index = segment.index().Normalize(vec.size());\n      if (index.Empty()) {\n        return make_unexpected(OUT_OF_BOUNDS);\n      }\n      for (; index.first <= index.second; ++index.first)\n        DoCall(callback, nullopt, vec[index.first]);\n    } break;\n\n    case SegmentType::DESCENT:\n    case SegmentType::WILDCARD: {\n      auto vec = node.AsVector();       // always succeeds\n      auto keys = node.AsMap().Keys();  // always succeeds\n      string str;\n      for (size_t i = 0; i < vec.size(); ++i) {\n        flexbuffers::Reference key = keys[i];\n        optional<string_view> opt_key;\n        if (key.IsString()) {\n          str = key.ToString();\n          opt_key = str;\n        }\n        DoCall(callback, opt_key, vec[i]);\n      }\n    } break;\n    default:\n      LOG(DFATAL) << \"Unknown segment \" << SegmentName(segment.type());\n  }\n  return {};\n}\n\n}  // namespace dfly::json::detail\n"
  },
  {
    "path": "src/core/json/detail/flat_dfs.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/types/span.h>\n\n#include <nonstd/expected.hpp>\n#include <variant>\n\n#include \"core/flatbuffers.h\"\n#include \"core/json/detail/common.h\"\n#include \"core/json/path.h\"\n\nnamespace dfly::json::detail {\n\nclass FlatDfsItem {\n public:\n  using ValueType = flexbuffers::Reference;\n  using DepthState = std::pair<ValueType, unsigned>;  // object, segment_idx pair\n  using AdvanceResult = nonstd::expected<DepthState, MatchStatus>;\n\n  FlatDfsItem(ValueType val, unsigned idx = 0) : depth_state_(val, idx) {\n  }\n\n  // Returns the next object to traverse\n  // or null if traverse was exhausted or the segment does not match.\n  AdvanceResult Advance(const PathSegment& segment);\n\n  unsigned segment_idx() const {\n    return depth_state_.second;\n  }\n\n private:\n  ValueType obj() const {\n    return depth_state_.first;\n  }\n\n  DepthState Next(ValueType obj) const {\n    return {obj, depth_state_.second + segment_step_};\n  }\n\n  DepthState Exhausted() const {\n    return {ValueType(), 0};\n  }\n\n  AdvanceResult Init(const PathSegment& segment);\n\n  // For most operations we advance the path segment by 1 when we descent into the children.\n  unsigned segment_step_ = 1;\n\n  DepthState depth_state_;\n  std::optional<IndexExpr> state_;\n};\n\n// Traverses a json object according to the given path and calls the callback for each matching\n// field. With DESCENT segments it will match 0 or more fields in depth.\n// MATCH(node, DESCENT|SUFFIX) = MATCH(node, SUFFIX) ||\n// { MATCH(node->child, DESCENT/SUFFIX) for each child of node }\n\nclass FlatDfs {\n public:\n  // TODO: for some operations we need to know the type of mismatches.\n  static FlatDfs Traverse(absl::Span<const PathSegment> path, const flexbuffers::Reference root,\n                          const PathFlatCallback& callback);\n  unsigned matches() const {\n    return matches_;\n  }\n\n private:\n  bool TraverseImpl(absl::Span<const PathSegment> path, const PathFlatCallback& callback);\n\n  nonstd::expected<void, MatchStatus> PerformStep(const PathSegment& segment,\n                                                  const flexbuffers::Reference node,\n                                                  const PathFlatCallback& callback);\n\n  void DoCall(const PathFlatCallback& callback, std::optional<std::string_view> key,\n              const flexbuffers::Reference node) {\n    ++matches_;\n    callback(key, node);\n  }\n\n  unsigned matches_ = 0;\n};\n\n}  // namespace dfly::json::detail\n"
  },
  {
    "path": "src/core/json/detail/interned_blob.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n\n#include \"core/json/detail/interned_blob.h\"\n\n#include <glog/logging.h>\n#include <mimalloc.h>\n\n#include \"core/detail/stateless_allocator.h\"\n\nnamespace {\nconstexpr size_t kUint32Size = sizeof(uint32_t);\nconstexpr size_t kHeaderSize = sizeof(uint32_t) * 2;\n}  // namespace\n\nnamespace dfly::detail {\n\nInternedBlobHandle InternedBlobHandle::Create(std::string_view sv) {\n  if (sv.empty()) {\n    return InternedBlobHandle{nullptr};\n  }\n\n  constexpr uint32_t ref_count = 1;\n  DCHECK_LE(sv.size(), std::numeric_limits<uint32_t>::max());\n\n  const uint32_t str_len = sv.size();\n\n  // We need +1 byte for \\0 because jsoncons expects c_str() and data() style accessors on keys\n  BlobPtr blob = StatelessAllocator<char>{}.allocate(kHeaderSize + str_len + 1);\n\n  std::memcpy(blob, &str_len, kUint32Size);\n  std::memcpy(blob + kUint32Size, &ref_count, kUint32Size);\n\n  std::memcpy(blob + kHeaderSize, sv.data(), str_len);\n\n  // null terminate so jsoncons can directly access the char* as string\n  blob[kHeaderSize + str_len] = '\\0';\n  return InternedBlobHandle{blob + kHeaderSize};\n}\n\nuint32_t InternedBlobHandle::Size() const {\n  if (!blob_)\n    return 0;\n  uint32_t size;\n  std::memcpy(&size, blob_ - kHeaderSize, kUint32Size);\n  return size;\n}\n\nuint32_t InternedBlobHandle::RefCount() const {\n  DCHECK(blob_) << \"Called RefCount() on empty blob\";\n  uint32_t ref_count;\n  std::memcpy(&ref_count, blob_ - kUint32Size, kUint32Size);\n  return ref_count;\n}\n\nvoid InternedBlobHandle::IncrRefCount() {  // NOLINT - non-const, mutates via ptr\n  const uint32_t ref_count = RefCount();\n  DCHECK_LT(ref_count, std::numeric_limits<uint32_t>::max()) << \"Attempt to increase max refcount\";\n  const uint32_t updated_count = ref_count + 1;\n  std::memcpy(blob_ - kUint32Size, &updated_count, kUint32Size);\n}\n\nvoid InternedBlobHandle::DecrRefCount() {  // NOLINT - non-const, mutates via ptr\n  const uint32_t ref_count = RefCount();\n  DCHECK_GE(ref_count, 1ul) << \"Attempt to decrease zero refcount\";\n  const uint32_t updated_count = ref_count - 1;\n  std::memcpy(blob_ - kUint32Size, &updated_count, kUint32Size);\n}\n\nsize_t InternedBlobHandle::MemUsed() const {\n  return blob_ ? mi_usable_size(blob_ - kHeaderSize) : 0;\n}\n\nvoid InternedBlobHandle::Destroy(InternedBlobHandle& handle) {\n  if (handle.blob_) {\n    const size_t to_destroy = kHeaderSize + handle.Size() + 1;\n    StatelessAllocator<char>{}.deallocate(handle.blob_ - kHeaderSize, to_destroy);\n    handle.blob_ = nullptr;\n  }\n}\n\n}  // namespace dfly::detail\n"
  },
  {
    "path": "src/core/json/detail/interned_blob.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n\n#pragma once\n\n#include <absl/container/flat_hash_set.h>\n\n#include <string_view>\n\nnamespace dfly::detail {\n\n// Layout is: 4 bytes size, 4 bytes refcount, char data, followed by nul-char.\n// The trailing nul-char is required because jsoncons needs to access c_str/data without a\n// size. The blob_ itself points directly to the data, so that callers do not have to perform\n// pointer arithmetic for c_str() and data() calls:\n//     [size:4] [refcount:4] [string] [\\0]\n//     ^-8      ^- 4         ^blob_\nusing BlobPtr = char*;\n\n// A lightweight handle around a blob pointer, used to wrap the blob data when storing it in hashset\n// and also within interned strings. Does not handle lifetime of the data. Only provides convenience\n// methods to change state inside the blob and \"view\" style methods to access the string inside the\n// blob. Multiple handles can point to the same blob.\nclass InternedBlobHandle {\n public:\n  InternedBlobHandle() = default;\n\n  [[nodiscard]] static InternedBlobHandle Create(std::string_view sv);\n\n  uint32_t Size() const;\n\n  uint32_t RefCount() const;\n\n  const char* Data() const {\n    return blob_;\n  }\n\n  // The refcount methods are explicitly part of the public API and not tied to the handle lifetime\n  // to keep control over exactly when we modify data in the blob ptr. We do not want to increase\n  // ref count on each handle creation and conversely decrease it when a handle is destroyed, eg on\n  // every hash table lookup etc. The ref count is only increased or decreased at the InternedString\n  // API level, when a new string is created, and when a string is destroyed. This allows us to\n  // avoid writing to memory unless absolutely necessary, making the handle cheap.\n\n  // Increment ref count, asserts if count grows over type max limit\n  void IncrRefCount();\n\n  // Decrement ref count, asserts if count falls below 0\n  void DecrRefCount();\n\n  // Returns bytes used, including string, header and trailing byte\n  size_t MemUsed() const;\n\n  // Convenience method to deallocate storage. Not for use in destructor.\n  static void Destroy(InternedBlobHandle& handle);\n\n  operator std::string_view() const {  // NOLINT (non-explicit operator for easier comparisons)\n    return blob_ ? std::string_view{blob_, Size()} : \"\";\n  }\n  auto operator<=>(const InternedBlobHandle& other) const = default;\n  bool operator==(const InternedBlobHandle& other) const = default;\n\n  explicit operator bool() const {\n    return blob_;\n  }\n\n private:\n  explicit InternedBlobHandle(BlobPtr blob) : blob_{blob} {\n  }\n\n  BlobPtr blob_{nullptr};\n};\n\nstruct BlobHash {\n  using is_transparent = void;\n  size_t operator()(std::string_view sv) const {\n    return std::hash<std::string_view>{}(sv);\n  }\n};\n\nstruct BlobEq {\n  using is_transparent = void;\n  bool operator()(const InternedBlobHandle& a, const InternedBlobHandle& b) const {\n    return a.Data() == b.Data();\n  }\n\n  bool operator()(std::string_view a, std::string_view b) const {\n    return a == b;\n  }\n};\n\n// This pool holds blob handles and is used by InternedString to manage string access. It would be\n// nice to keep this on the mimalloc heap by using StatelessAllocator. However, JSON memory usage is\n// estimated by comparing mimalloc usage before and after creating an object. If we keep this pool\n// on mimalloc, it can introduce variations such as resizing of its internal store when adding a new\n// object. This results in non-deterministic memory usage, which introduces incorrectness in tests\n// and the memory usage command. To keep memory estimation per object accurate, the pool is\n// allocated on the default heap.\nusing InternedBlobPool = absl::flat_hash_set<InternedBlobHandle, BlobHash, BlobEq>;\nstatic_assert(sizeof(InternedBlobHandle) == sizeof(char*));\n\n}  // namespace dfly::detail\n"
  },
  {
    "path": "src/core/json/detail/interned_string.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n\n#include \"core/json/detail/interned_string.h\"\n\nnamespace {\nconstexpr auto kLoadFactorToShrinkPool = 0.2;\n\nthread_local dfly::InternedStringStats tl_stats;\n\n}  // namespace\n\nnamespace dfly::detail {\n\nInternedString& InternedString::operator=(InternedString other) {\n  swap(other);\n  return *this;\n}\n\nvoid InternedString::ResetPool() {\n  InternedBlobPool& pool = GetPoolRef();\n  for (InternedBlobHandle handle : pool) {\n    InternedBlobHandle::Destroy(handle);\n  }\n  pool.clear();\n\n  // Pool hits and misses are not reset, they are monotonically increasing counters\n  // TODO reset these two fields in config resetstats\n  tl_stats.pool_bytes = 0;\n  tl_stats.pool_entries = 0;\n  tl_stats.pool_table_bytes = 0;\n  tl_stats.live_references = 0;\n}\n\nInternedBlobHandle InternedString::Intern(const std::string_view sv) {\n  if (sv.empty())\n    return {};\n\n  tl_stats.live_references += 1;\n  InternedBlobPool& pool_ref = GetPoolRef();\n  if (const auto it = pool_ref.find(sv); it != pool_ref.end()) {\n    tl_stats.hits++;\n    InternedBlobHandle blob = *it;\n    blob.IncrRefCount();\n    return blob;\n  }\n\n  InternedBlobHandle handle = InternedBlobHandle::Create(sv);\n  pool_ref.emplace(handle);\n  tl_stats.pool_entries++;\n  tl_stats.pool_bytes += handle.MemUsed();\n  tl_stats.misses++;\n  return handle;\n}\n\nvoid InternedString::Acquire() {  // NOLINT\n  if (!entry_)\n    return;\n\n  tl_stats.live_references += 1;\n  entry_.IncrRefCount();\n}\n\nvoid InternedString::Release() {\n  if (!entry_)\n    return;\n\n  entry_.DecrRefCount();\n  tl_stats.live_references -= 1;\n\n  if (entry_.RefCount() == 0) {\n    InternedBlobPool& pool_ref = GetPoolRef();\n    pool_ref.erase(entry_);\n    tl_stats.pool_entries--;\n    tl_stats.pool_bytes -= entry_.MemUsed();\n    InternedBlobHandle::Destroy(entry_);\n\n    // When pool is underutilized, shrink it by swapping.\n    if (const auto load_factor = pool_ref.load_factor();\n        ABSL_PREDICT_FALSE(load_factor > 0 && load_factor < kLoadFactorToShrinkPool)) {\n      // The LHS of swap is a new pool constructed from the original pool reference. The RHS is the\n      // original pool. After the swap, the temporary is destroyed. Note that this is not a strict\n      // shrink. The new pool internally allocates enough capacity so that the load factor is around\n      // 0.8. So the capacity after swap is still larger than size, but the load factor is improved.\n      InternedBlobPool(pool_ref).swap(pool_ref);\n    }\n  }\n}\n\nInternedBlobPool& InternedString::GetPoolRef() {\n  // Note on lifetimes: this pool is thread local and depends on the thread local memory resource\n  // defined in the stateless allocator in src/core/detail/stateless_allocator.h. Since there is no\n  // well-defined order of destruction, this pool must be manually reset before the memory resource\n  // destruction.\n  thread_local InternedBlobPool pool;\n  return pool;\n}\n\n}  // namespace dfly::detail\n\nnamespace dfly {\n\nInternedStringStats& InternedStringStats::operator+=(const InternedStringStats& other) {\n  pool_entries += other.pool_entries;\n  pool_bytes += other.pool_bytes;\n  hits += other.hits;\n  misses += other.misses;\n  pool_table_bytes += other.pool_table_bytes;\n  live_references += other.live_references;\n  return *this;\n}\n\nInternedStringStats GetInternedStringStats() {\n  tl_stats.pool_table_bytes =\n      detail::InternedString::GetPoolRef().capacity() * (sizeof(detail::InternedBlobHandle) + 1);\n  return tl_stats;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/json/detail/interned_string.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n\n#pragma once\n\n#include \"core/detail/stateless_allocator.h\"\n#include \"core/json/detail/interned_blob.h\"\n\nnamespace dfly::detail {\n\n// InternedString handles incrementing and decrementing reference counts of the blobs tied to its\n// own lifecycle. It deletes the blob from a shard local pool when refcount is 0.\n// TODO examine cross shard json object interactions. Can a pool end up access from another shard?\nclass InternedString {\n public:\n  using allocator_type = StatelessAllocator<char>;\n\n  InternedString() = default;\n\n  explicit InternedString(const std::string_view sv) : entry_(Intern(sv)) {\n  }\n\n  // The following constructors and members are added because they are required by jsoncons for\n  // keys. Each of these is added in response to compiler errors and should not be removed, even if\n  // they are seemingly a no-op or duplicated.\n\n  // jsoncons sometimes creates empty obj with custom allocator. If it creates an object with any\n  // other allocator, we should fail during compilation.\n  template <typename T> explicit InternedString(StatelessAllocator<T> /*unused*/) {\n  }\n\n  template <typename Alloc> InternedString(const char* data, size_t size, Alloc alloc);\n\n  template <std::contiguous_iterator It> InternedString(It begin, It end);\n\n  InternedString(const InternedString& other) : entry_{other.entry_} {\n    Acquire();\n  }\n\n  InternedString(InternedString&& other) noexcept : entry_{other.entry_} {\n    other.entry_ = {};\n  }\n\n  InternedString& operator=(InternedString other);\n\n  ~InternedString() {\n    Release();\n  }\n\n  operator std::string_view() const {\n    return entry_;\n  }\n\n  const char* data() const {\n    return entry_ ? entry_.Data() : \"\";\n  }\n\n  const char* c_str() const {\n    return data();\n  }\n\n  void swap(InternedString& other) noexcept {\n    std::swap(entry_, other.entry_);\n  }\n\n  size_t length() const {\n    return size();\n  }\n\n  size_t size() const {\n    return entry_.Size();\n  }\n\n  int compare(const InternedString& other) const {\n    return std::string_view{*this}.compare(other);\n  }\n\n  int compare(std::string_view other) const {\n    return std::string_view{*this}.compare(other);\n  }\n\n  // lex. comparison\n  auto operator<=>(const InternedString& other) const {\n    return std::string_view{*this} <=> std::string_view{other};\n  }\n\n  bool operator==(const InternedString& other) const = default;\n\n  void shrink_to_fit() {  // NOLINT (must be non-const to align with jsoncons usage)\n  }\n\n  // Destroys all strings in the pool. Must be called on process shutdown before the backing memory\n  // resource is destroyed.\n  static void ResetPool();\n  static InternedBlobPool& GetPoolRef();\n\n  size_t MemUsed() const {\n    return entry_.MemUsed();\n  }\n\n private:\n  // If a string exists in the pool, increments its refcount. If not, adds the string to the pool.\n  // Returns a handle wrapping the string.\n  static InternedBlobHandle Intern(std::string_view sv);\n\n  // Increments the refcount if the entry is not null\n  void Acquire();\n\n  // Decrements the refcount, removes entry from the pool if necessary, destroying the interned\n  // blob. A side effect may be shrinking the pool if the load factor is suboptimal (see\n  // kLoadFactorToShrinkPool in the implementation)\n  void Release();\n\n  // Wraps a null pointer by default\n  InternedBlobHandle entry_;\n};\n\ntemplate <typename Alloc>\nInternedString::InternedString(const char* data, size_t size, Alloc /*unused*/)\n    : InternedString(std::string_view{data, size}) {\n}\n\ntemplate <std::contiguous_iterator It> InternedString::InternedString(It begin, It end) {\n  if (begin == end) {\n    return;\n  }\n\n  const auto size = std::distance(begin, end);\n  const auto data_ptr = &*begin;\n  entry_ = Intern(std::string_view(data_ptr, size));\n}\n\n}  // namespace dfly::detail\n\nnamespace dfly {\n\nstruct InternedStringStats {\n  size_t pool_entries = 0;\n  size_t pool_bytes = 0;\n  size_t hits = 0;\n  size_t misses = 0;\n  size_t pool_table_bytes = 0;\n  size_t live_references = 0;\n\n  InternedStringStats& operator+=(const InternedStringStats& other);\n};\n\nInternedStringStats GetInternedStringStats();\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/json/detail/jsoncons_dfs.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n// clang-format off\n#include <glog/logging.h>\n// clang-format on\n\n#include \"core/json/detail/jsoncons_dfs.h\"\n\nnamespace dfly::json::detail {\n\nusing namespace std;\nusing nonstd::make_unexpected;\n\nostream& operator<<(ostream& os, const PathSegment& ps) {\n  os << SegmentName(ps.type());\n  return os;\n}\n\ninline bool IsRecursive(jsoncons::json_type type) {\n  return type == jsoncons::json_type::object_value || type == jsoncons::json_type::array_value;\n}\n\nDfs Dfs::Traverse(absl::Span<const PathSegment> path, const JsonType& root, const Cb& callback) {\n  DCHECK(!path.empty());\n\n  Dfs dfs;\n\n  if (path.size() == 1) {\n    dfs.PerformStep(path[0], root, callback);\n    return dfs;\n  }\n\n  using ConstItem = JsonconsDfsItem<true>;\n  vector<ConstItem> stack;\n  stack.emplace_back(&root);\n\n  do {\n    unsigned segment_index = stack.back().segment_idx();\n    const auto& path_segment = path[segment_index];\n\n    // init or advance the current object\n    DVLOG(2) << \"Advance segment [\" << segment_index << \"] \" << path_segment;\n    ConstItem::AdvanceResult res = stack.back().Advance(path_segment);\n    if (res && res->first != nullptr) {\n      const JsonType* next = res->first;\n\n      // We descent only if next is object or an array.\n      if (IsRecursive(next->type())) {\n        unsigned next_seg_id = res->second;\n\n        if (next_seg_id + 1 < path.size()) {\n          DVLOG(2) << \"Exploring node[\" << stack.size() << \"] \" << next->type() << \" \"\n                   << next->to_string();\n          stack.emplace_back(next, next_seg_id);\n        } else {\n          DVLOG(2) << \"Terminal node[\" << stack.size() << \"] \" << next->type() << \" \"\n                   << next->to_string() << \", segment:\" << path[next_seg_id];\n          // terminal step\n          // TODO: to take into account MatchStatus\n          // for `json.set foo $.a[10]` or for `json.set foo $.*.b`\n          dfs.PerformStep(path[next_seg_id], *next, callback);\n        }\n      }\n    } else {\n      stack.pop_back();\n    }\n  } while (!stack.empty());\n\n  return dfs;\n}\n\nDfs Dfs::Mutate(absl::Span<const PathSegment> path, const MutateCallback& callback,\n                JsonType* json) {\n  DCHECK(!path.empty());\n\n  Dfs dfs;\n\n  if (path.size() == 1) {\n    dfs.MutateStep(path[0], callback, json);\n    return dfs;\n  }\n\n  // Use vector to maintain order\n  std::vector<JsonType*> nodes_to_mutate;\n\n  using Item = detail::JsonconsDfsItem<false>;\n  vector<Item> stack;\n  stack.emplace_back(json);\n\n  do {\n    unsigned segment_index = stack.back().segment_idx();\n    const auto& path_segment = path[segment_index];\n\n    // init or advance the current object\n    Item::AdvanceResult res = stack.back().Advance(path_segment);\n    if (res && res->first != nullptr) {\n      JsonType* next = res->first;\n      DVLOG(2) << \"Handling now \" << next->type() << \" \" << next->to_string();\n\n      // We descent only if next is object or an array.\n      if (IsRecursive(next->type())) {\n        unsigned next_seg_id = res->second;\n\n        if (next_seg_id + 1 < path.size()) {\n          stack.emplace_back(next, next_seg_id);\n        } else {\n          // Terminal step: collect node for mutation\n          nodes_to_mutate.push_back(next);\n        }\n      }\n    } else {\n      // If Advance failed (e.g., MISMATCH or OUT_OF_BOUNDS), the current node itself\n      // might still be a terminal match because of the previous DESCENT segment.\n      // Instead of mutating immediately (which could break ordering guarantees),\n      // collect the node and defer mutation until after traversal.\n      if (!res && segment_index > 0 && path[segment_index - 1].type() == SegmentType::DESCENT &&\n          stack.back().get_segment_step() == 0) {\n        if (segment_index + 1 == path.size()) {\n          // Terminal node discovered via DESCENT – store for later processing.\n          nodes_to_mutate.push_back(stack.back().obj_ptr());\n        }\n      }\n      stack.pop_back();\n    }\n  } while (!stack.empty());\n\n  // Apply mutations after DFS traversal is complete\n  const PathSegment& terminal_segment = path.back();\n\n  for (auto it = nodes_to_mutate.begin(); it != nodes_to_mutate.end(); ++it) {\n    dfs.MutateStep(terminal_segment, callback, *it);\n  }\n\n  return dfs;\n}\n\nDfs Dfs::Delete(absl::Span<const PathSegment> path, JsonType* json) {\n  DCHECK(!path.empty());\n\n  Dfs dfs;\n\n  if (path.size() == 1) {\n    dfs.DeleteStep(path[0], json);\n    return dfs;\n  }\n\n  using Item = detail::JsonconsDfsItem<false>;\n  vector<Item> stack;\n  stack.emplace_back(json);\n\n  do {\n    unsigned segment_index = stack.back().segment_idx();\n    const auto& path_segment = path[segment_index];\n\n    Item::AdvanceResult res = stack.back().Advance(path_segment);\n    if (res && res->first != nullptr) {\n      JsonType* next = res->first;\n\n      if (IsRecursive(next->type())) {\n        unsigned next_seg_id = res->second;\n\n        if (next_seg_id + 1 < path.size()) {\n          stack.emplace_back(next, next_seg_id);\n        } else {\n          // Terminal step: perform deletion immediately\n          // At this point we're in the deepest level, so safe to delete\n          dfs.DeleteStep(path[next_seg_id], next);\n        }\n      }\n    } else {\n      if (!res && segment_index > 0 && path[segment_index - 1].type() == SegmentType::DESCENT &&\n          stack.back().get_segment_step() == 0) {\n        if (segment_index + 1 == path.size()) {\n          // Terminal node discovered via DESCENT - safe to delete immediately\n          // as we're backtracking\n          dfs.DeleteStep(path[segment_index], stack.back().obj_ptr());\n        }\n      }\n      stack.pop_back();\n    }\n  } while (!stack.empty());\n\n  return dfs;\n}\n\nauto Dfs::PerformStep(const PathSegment& segment, const JsonType& node, const Cb& callback)\n    -> nonstd::expected<void, MatchStatus> {\n  switch (segment.type()) {\n    case SegmentType::IDENTIFIER: {\n      if (!node.is_object())\n        return make_unexpected(MISMATCH);\n\n      auto it = node.find(segment.identifier());\n      if (it != node.object_range().end()) {\n        DoCall(callback, it->key(), it->value());\n      }\n    } break;\n    case SegmentType::INDEX: {\n      if (!node.is_array())\n        return make_unexpected(MISMATCH);\n      IndexExpr index = segment.index().Normalize(node.size());\n      if (index.Empty()) {\n        return make_unexpected(OUT_OF_BOUNDS);\n      }\n      for (; index.first <= index.second; ++index.first) {\n        DoCall(callback, nullopt, node[index.first]);\n      }\n    } break;\n\n    case SegmentType::DESCENT:\n    case SegmentType::WILDCARD: {\n      if (node.is_object()) {\n        for (const auto& k_v : node.object_range()) {\n          DoCall(callback, k_v.key(), k_v.value());\n        }\n      } else if (node.is_array()) {\n        for (const auto& item : node.array_range()) {\n          DoCall(callback, nullopt, item);\n        }\n      }\n    } break;\n    default:\n      LOG(DFATAL) << \"Unknown segment \" << SegmentName(segment.type());\n  }\n  return {};\n}\n\nauto Dfs::MutateStep(const PathSegment& segment, const MutateCallback& cb, JsonType* node)\n    -> nonstd::expected<void, MatchStatus> {\n  switch (segment.type()) {\n    case SegmentType::IDENTIFIER: {\n      if (!node->is_object())\n        return make_unexpected(MISMATCH);\n\n      auto it = node->find(segment.identifier());\n      if (it != node->object_range().end()) {\n        cb(it->key(), &it->value());\n      }\n    } break;\n    case SegmentType::INDEX: {\n      if (!node->is_array())\n        return make_unexpected(MISMATCH);\n      IndexExpr index = segment.index().Normalize(node->size());\n      if (index.Empty()) {\n        return make_unexpected(OUT_OF_BOUNDS);\n      }\n\n      while (index.first <= index.second) {\n        auto it = node->array_range().begin() + index.first;\n        cb(nullopt, &*it);\n        ++index.first;\n      }\n    } break;\n\n    case SegmentType::DESCENT:\n    case SegmentType::WILDCARD: {\n      if (node->is_object()) {\n        auto it = node->object_range().begin();\n        while (it != node->object_range().end()) {\n          cb(it->key(), &it->value());\n          ++it;\n        }\n      } else if (node->is_array()) {\n        auto it = node->array_range().begin();\n        while (it != node->array_range().end()) {\n          cb(nullopt, &*it);\n          ++it;\n        }\n      }\n    } break;\n    case SegmentType::FUNCTION:\n      LOG(DFATAL) << \"Function segment is not supported for mutation\";\n      break;\n  }\n  return {};\n}\n\nauto Dfs::DeleteStep(const PathSegment& segment, JsonType* node)\n    -> nonstd::expected<void, MatchStatus> {\n  switch (segment.type()) {\n    case SegmentType::IDENTIFIER: {\n      if (!node->is_object())\n        return make_unexpected(MISMATCH);\n\n      auto it = node->find(segment.identifier());\n      if (it != node->object_range().end()) {\n        node->erase(it);\n        ++matches_;\n      }\n    } break;\n    case SegmentType::INDEX: {\n      if (!node->is_array())\n        return make_unexpected(MISMATCH);\n      IndexExpr index = segment.index().Normalize(node->size());\n      if (index.Empty()) {\n        return make_unexpected(OUT_OF_BOUNDS);\n      }\n\n      // Delete from end to beginning to maintain indices\n      for (int i = index.second; i >= index.first; --i) {\n        auto it = node->array_range().begin() + i;\n        node->erase(it);\n        ++matches_;\n      }\n    } break;\n\n    case SegmentType::DESCENT:\n    case SegmentType::WILDCARD: {\n      size_t initial_size = node->size();\n      node->clear();\n      matches_ += initial_size;\n    } break;\n    case SegmentType::FUNCTION:\n      LOG(DFATAL) << \"Function segment is not supported for deletion\";\n      break;\n  }\n  return {};\n}\n\n}  // namespace dfly::json::detail\n"
  },
  {
    "path": "src/core/json/detail/jsoncons_dfs.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/types/span.h>\n\n#include <nonstd/expected.hpp>\n#include <variant>\n\n#include \"core/json/detail/common.h\"\n#include \"core/json/json_object.h\"\n#include \"core/json/path.h\"\n#include \"core/overloaded.h\"\n\nnamespace dfly::json::detail {\n\n// Describes the current state of the DFS traversal for a single node inside json hierarchy.\n// Specifically it holds the parent object (can be a either a real object or an array),\n// and the iterator to one of its children that is currently being traversed.\ntemplate <bool IsConst> class JsonconsDfsItem {\n public:\n  using ValueType = std::conditional_t<IsConst, const JsonType, JsonType>;\n  using Ptr = ValueType*;\n  using Ref = ValueType&;\n  using ObjIterator =\n      std::conditional_t<IsConst, JsonType::const_object_iterator, JsonType::object_iterator>;\n  using ArrayIterator =\n      std::conditional_t<IsConst, JsonType::const_array_iterator, JsonType::array_iterator>;\n\n  using DepthState = std::pair<Ptr, unsigned>;  // object, segment_idx pair\n  using AdvanceResult = nonstd::expected<DepthState, MatchStatus>;\n\n  JsonconsDfsItem(Ptr o, unsigned idx = 0) : depth_state_(o, idx) {\n  }\n\n  // Returns the next object to traverse\n  // or null if traverse was exhausted or the segment does not match.\n  AdvanceResult Advance(const PathSegment& segment);\n\n  unsigned segment_idx() const {\n    return depth_state_.second;\n  }\n\n  Ptr obj_ptr() const {\n    return depth_state_.first;\n  }\n\n  unsigned get_segment_step() const {\n    return segment_step_;\n  }\n\n private:\n  static bool ShouldIterateAll(SegmentType type) {\n    return type == SegmentType::WILDCARD || type == SegmentType::DESCENT;\n  }\n\n  ObjIterator Begin() const {\n    if constexpr (IsConst) {\n      return obj().object_range().cbegin();\n    } else {\n      return obj().object_range().begin();\n    }\n  }\n\n  ArrayIterator ArrBegin() const {\n    if constexpr (IsConst) {\n      return obj().array_range().cbegin();\n    } else {\n      return obj().array_range().begin();\n    }\n  }\n\n  ArrayIterator ArrEnd() const {\n    if constexpr (IsConst) {\n      return obj().array_range().cend();\n    } else {\n      return obj().array_range().end();\n    }\n  }\n\n  Ref obj() const {\n    return *depth_state_.first;\n  }\n\n  DepthState Next(Ref obj) const {\n    return {&obj, depth_state_.second + segment_step_};\n  }\n\n  DepthState Exhausted() const {\n    return {nullptr, 0};\n  }\n\n  AdvanceResult Init(const PathSegment& segment);\n\n  // For most operations we advance the path segment by 1 when we descent into the children.\n  unsigned segment_step_ = 1;\n\n  DepthState depth_state_;\n  std::variant<std::monostate, ObjIterator, std::pair<ArrayIterator, ArrayIterator>> state_;\n};\n\n// Traverses a json object according to the given path and calls the callback for each matching\n// field. With DESCENT segments it will match 0 or more fields in depth.\n// MATCH(node, DESCENT|SUFFIX) = MATCH(node, SUFFIX) ||\n// { MATCH(node->child, DESCENT/SUFFIX) for each child of node }\n\nclass Dfs {\n public:\n  using Cb = PathCallback;\n\n  // TODO: for some operations we need to know the type of mismatches.\n  static Dfs Traverse(absl::Span<const PathSegment> path, const JsonType& json, const Cb& callback);\n  static Dfs Mutate(absl::Span<const PathSegment> path, const MutateCallback& callback,\n                    JsonType* json);\n\n  // Simplified deletion without callback - more efficient for deletion operations\n  static Dfs Delete(absl::Span<const PathSegment> path, JsonType* json);\n\n  unsigned matches() const {\n    return matches_;\n  }\n\n private:\n  bool TraverseImpl(absl::Span<const PathSegment> path, const Cb& callback);\n\n  nonstd::expected<void, MatchStatus> PerformStep(const PathSegment& segment, const JsonType& node,\n                                                  const Cb& callback);\n\n  nonstd::expected<void, MatchStatus> MutateStep(const PathSegment& segment,\n                                                 const MutateCallback& cb, JsonType* node);\n\n  nonstd::expected<void, MatchStatus> DeleteStep(const PathSegment& segment, JsonType* node);\n\n  void DoCall(const Cb& callback, std::optional<std::string_view> key, const JsonType& node) {\n    ++matches_;\n    callback(key, node);\n  }\n\n  unsigned matches_ = 0;\n};\n\ntemplate <bool IsConst>\nauto JsonconsDfsItem<IsConst>::Advance(const PathSegment& segment) -> AdvanceResult {\n  AdvanceResult result = std::visit(  // line break\n      Overloaded{\n          [&](std::monostate) { return Init(segment); },  // Init state\n          [&](ObjIterator& it) -> AdvanceResult {\n            if (!ShouldIterateAll(segment.type()))\n              return Exhausted();\n\n            ++it;\n            return it == obj().object_range().end() ? Exhausted() : Next(it->value());\n          },\n          [&](std::pair<ArrayIterator, ArrayIterator>& pair) -> AdvanceResult {\n            if (pair.first == pair.second)\n              return Exhausted();\n            ++pair.first;\n            return Next(*pair.first);\n          },\n      },\n      state_);\n  return result;\n}\n\ntemplate <bool IsConst>\nauto JsonconsDfsItem<IsConst>::Init(const PathSegment& segment) -> AdvanceResult {\n  switch (segment.type()) {\n    case SegmentType::IDENTIFIER: {\n      if (obj().is_object()) {\n        auto it = obj().find(segment.identifier());\n        if (it != obj().object_range().end()) {\n          state_ = it;\n          return DepthState{&it->value(), depth_state_.second + 1};\n        } else {\n          return Exhausted();\n        }\n      }\n      break;\n    }\n    case SegmentType::INDEX:\n      if (obj().is_array()) {\n        IndexExpr index = segment.index().Normalize(obj().size());\n        if (index.Empty()) {\n          return nonstd::make_unexpected(OUT_OF_BOUNDS);\n        }\n\n        auto start = ArrBegin() + index.first, end = ArrBegin() + index.second;\n        state_ = std::make_pair(start, end);\n        return Next(*start);\n      }\n      break;\n    case SegmentType::DESCENT:\n      if (segment_step_ == 1) {\n        // first time, branching to return the same object but with the next segment,\n        // exploring the path of ignoring the DESCENT operator.\n        // Also, shift the state (segment_step) to bypass this branch next time.\n        segment_step_ = 0;\n        return DepthState{depth_state_.first, depth_state_.second + 1};\n      }\n\n      // Now traverse all the children but do not progress with segment path.\n      // This is why segment_step_ is set to 0.\n      [[fallthrough]];\n    case SegmentType::WILDCARD: {\n      if (obj().is_object()) {\n        jsoncons::range rng = obj().object_range();\n        if (rng.cbegin() == rng.cend()) {\n          return Exhausted();\n        }\n        state_ = Begin();\n        return Next(Begin()->value());\n      }\n\n      if (obj().is_array()) {\n        auto start = ArrBegin(), end = ArrEnd();\n        if (start == end) {\n          return Exhausted();\n        }\n        state_ = std::make_pair(start, end - 1);  // end is inclusive\n        return Next(*start);\n      }\n      break;\n    }\n    default:\n      LOG(DFATAL) << \"Unknown segment \" << SegmentName(segment.type());\n  }  // end switch\n\n  return nonstd::make_unexpected(MISMATCH);\n}\n\n}  // namespace dfly::json::detail\n"
  },
  {
    "path": "src/core/json/driver.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"src/core/json/driver.h\"\n\n#include <absl/strings/str_cat.h>\n\n#include \"base/logging.h\"\n#include \"src/core/json/lexer_impl.h\"\n#include \"src/core/overloaded.h\"\n\nusing namespace std;\n\nnamespace dfly::json {\n\nnamespace {\n\nclass SingleValueImpl : public AggFunction {\n  Result GetResultImpl() const final {\n    return val_;\n  }\n\n protected:\n  void Init(const JsonType& src) {\n    if (src.is_double()) {\n      val_.emplace<double>(src.as_double());\n    } else {\n      val_.emplace<int64_t>(src.as<int64_t>());\n    }\n  }\n\n  void Init(const flexbuffers::Reference src) {\n    if (src.IsFloat()) {\n      val_.emplace<double>(src.AsDouble());\n    } else {\n      val_.emplace<int64_t>(src.AsInt64());\n    }\n  }\n\n  Result val_;\n};\n\nclass MaxImpl : public SingleValueImpl {\n  bool ApplyImpl(const JsonType& src) final {\n    if (!src.is_number()) {\n      return false;\n    }\n\n    visit(Overloaded{\n              [&](monostate) { Init(src); },\n              [&](double d) { val_ = max(d, src.as_double()); },\n              [&](int64_t i) {\n                if (src.is_double())\n                  val_ = max(double(i), src.as_double());\n                else\n                  val_ = max(i, src.as<int64_t>());\n              },\n          },\n          val_);\n\n    return true;\n  }\n\n  bool ApplyImpl(flexbuffers::Reference src) final {\n    if (!src.IsNumeric()) {\n      return false;\n    }\n\n    visit(Overloaded{\n              [&](monostate) { Init(src); },\n              [&](double d) { val_ = max(d, src.AsDouble()); },\n              [&](int64_t i) {\n                if (src.IsFloat())\n                  val_ = max(double(i), src.AsDouble());\n                else\n                  val_ = max(i, src.AsInt64());\n              },\n          },\n          val_);\n    return true;\n  }\n};\n\nclass MinImpl : public SingleValueImpl {\n private:\n  bool ApplyImpl(const JsonType& src) final {\n    if (!src.is_number()) {\n      return false;\n    }\n\n    visit(Overloaded{\n              [&](monostate) { Init(src); },\n              [&](double d) { val_ = min(d, src.as_double()); },\n              [&](int64_t i) {\n                if (src.is_double())\n                  val_ = min(double(i), src.as_double());\n                else\n                  val_ = min(i, src.as<int64_t>());\n              },\n          },\n          val_);\n\n    return true;\n  }\n\n  bool ApplyImpl(flexbuffers::Reference src) final {\n    if (!src.IsNumeric()) {\n      return false;\n    }\n\n    visit(Overloaded{\n              [&](monostate) { Init(src); },\n              [&](double d) { val_ = min(d, src.AsDouble()); },\n              [&](int64_t i) {\n                if (src.IsFloat())\n                  val_ = min(double(i), src.AsDouble());\n                else\n                  val_ = min(i, src.AsInt64());\n              },\n          },\n          val_);\n    return true;\n  }\n};\n\nclass AvgImpl : public AggFunction {\n private:\n  bool ApplyImpl(const JsonType& src) final {\n    if (!src.is_number()) {\n      return false;\n    }\n    sum_ += src.as_double();\n    count_++;\n\n    return true;\n  }\n\n  bool ApplyImpl(flexbuffers::Reference src) final {\n    if (!src.IsNumeric()) {\n      return false;\n    }\n    sum_ += src.AsDouble();\n    count_++;\n\n    return true;\n  }\n\n  Result GetResultImpl() const final {\n    DCHECK_GT(count_, 0u);  // AggFunction guarantees that\n    return Result(double(sum_ / count_));\n  }\n\n  double sum_ = 0;\n  uint64_t count_ = 0;\n};\n\n}  // namespace\n\nDriver::Driver() : lexer_(make_unique<Lexer>()) {\n}\n\nDriver::~Driver() {\n}\n\nvoid Driver::SetInput(string str) {\n  cur_str_ = std::move(str);\n  lexer_->in(cur_str_);\n  path_.clear();\n}\n\nvoid Driver::ResetScanner() {\n  lexer_ = make_unique<Lexer>();\n}\n\nvoid Driver::AddFunction(string_view fname) {\n  if (!path_.empty()) {\n    throw Parser::syntax_error(lexer_->location(),\n                               \"function can be only at the beginning of the path\");\n  }\n\n  shared_ptr<AggFunction> func;\n  if (fname == \"max\") {\n    func = make_shared<MaxImpl>();\n  } else if (fname == \"min\") {\n    func = make_shared<MinImpl>();\n  } else if (fname == \"avg\") {\n    func = make_shared<AvgImpl>();\n  } else {\n    throw Parser::syntax_error(lexer_->location(), absl::StrCat(\"Unknown function: \", fname));\n  }\n  path_.emplace_back(std::move(func));\n}\n\n}  // namespace dfly::json\n"
  },
  {
    "path": "src/core/json/driver.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n#include <string>\n\n#include \"src/core/json/path.h\"\n\nnamespace dfly {\nnamespace json {\n\nclass Lexer;\nclass location;  // from jsonpath_grammar.hh\n\nclass Driver {\n public:\n  Driver();\n  virtual ~Driver();\n\n  Lexer* lexer() {\n    return lexer_.get();\n  }\n\n  void SetInput(std::string str);\n  void ResetScanner();\n  virtual void Error(const location& l, const std::string& msg) = 0;\n\n  void AddIdentifier(const std::string& identifier) {\n    AddSegment(PathSegment(SegmentType::IDENTIFIER, identifier));\n  }\n\n  void AddFunction(std::string_view fname);\n\n  void AddWildcard() {\n    AddSegment(PathSegment(SegmentType::WILDCARD));\n  }\n\n  void AddSegment(PathSegment segment) {\n    path_.push_back(std::move(segment));\n  }\n\n  Path TakePath() {\n    return std::move(path_);\n  }\n\n private:\n  Path path_;\n  std::string cur_str_;\n  std::unique_ptr<Lexer> lexer_;\n};\n\n}  // namespace json\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/json/interned_blob_test.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n\n#include \"base/gtest.h\"\n#include \"core/detail/stateless_allocator.h\"\n#include \"core/json/detail/interned_string.h\"\n#include \"core/mi_memory_resource.h\"\n\nusing namespace std::literals;\nusing namespace dfly;\n\nnamespace {\n\nMiMemoryResource* MemoryResource() {\n  thread_local mi_heap_t* heap = mi_heap_new();\n  thread_local MiMemoryResource memory_resource{heap};\n  return &memory_resource;\n}\n\n}  // namespace\n\nclass InternedBlobTest : public testing::Test {\n protected:\n  void SetUp() override {\n    InitTLStatelessAllocMR(MemoryResource());\n  }\n\n  void TearDown() override {\n    CleanupStatelessAllocMR();\n  }\n};\n\nusing detail::BlobPtr;\nusing detail::InternedBlobHandle;\n\nTEST_F(InternedBlobTest, MemoryUsage) {\n  const auto* mr = MemoryResource();\n  const auto usage_before = mr->used();\n  InternedBlobHandle blob = InternedBlobHandle::Create(\"1234567\");\n  const auto usage_after = mr->used();\n  const auto expected_delta = blob.MemUsed();\n  EXPECT_EQ(usage_before + expected_delta, usage_after);\n  InternedBlobHandle::Destroy(blob);\n  EXPECT_EQ(usage_before, mr->used());\n}\n\nvoid CheckBlob(InternedBlobHandle& blob, std::string_view expected, uint32_t ref_cnt = 1) {\n  EXPECT_EQ(blob, expected);\n  EXPECT_EQ(blob.Size(), expected.size());\n  EXPECT_EQ(blob.RefCount(), ref_cnt);\n}\n\nTEST_F(InternedBlobTest, Ctors) {\n  auto blob = InternedBlobHandle::Create(\"\");\n  EXPECT_EQ(blob.Size(), 0);\n  EXPECT_FALSE(blob);\n  InternedBlobHandle::Destroy(blob);\n\n  InternedBlobHandle src = InternedBlobHandle::Create(\"foobar\");\n  InternedBlobHandle dest{src};\n  CheckBlob(dest, \"foobar\");\n  CheckBlob(src, \"foobar\");\n  InternedBlobHandle::Destroy(dest);\n}\n\nTEST_F(InternedBlobTest, Comparison) {\n  auto blob = InternedBlobHandle::Create(\"foobar\");\n  constexpr detail::BlobEq blob_eq;\n\n  EXPECT_TRUE(blob_eq(blob, \"foobar\"));\n  EXPECT_TRUE(blob_eq(\"foobar\", blob));\n\n  InternedBlobHandle second = blob;\n  second.IncrRefCount();\n\n  EXPECT_TRUE(blob_eq(blob, second));\n  InternedBlobHandle::Destroy(blob);\n}\n\nTEST_F(InternedBlobTest, RefCounts) {\n  auto blob = InternedBlobHandle::Create(\"1234567\");\n  EXPECT_EQ(blob.RefCount(), 1);\n  blob.DecrRefCount();\n  EXPECT_DEBUG_DEATH(blob.DecrRefCount(), \"Attempt to decrease zero refcount\");\n  InternedBlobHandle::Destroy(blob);\n}\n\nTEST_F(InternedBlobTest, Pool) {\n  detail::InternedBlobPool pool{};\n  InternedBlobHandle b1 = InternedBlobHandle::Create(\"foo\");\n  pool.emplace(b1);\n\n  // search by string view\n  EXPECT_TRUE(pool.contains(\"foo\"));\n\n  // increment the refcount. The blob is still found because the hasher only looks at the string\n  b1.IncrRefCount();\n  EXPECT_TRUE(pool.contains(\"foo\"));\n  InternedBlobHandle::Destroy(b1);\n}\n\nusing detail::InternedString;\n\nnamespace {\n\nvoid StringCheck(const InternedString& s, const char* ptr) {\n  std::string_view sv{ptr};\n\n  EXPECT_STREQ(s.data(), ptr);\n  EXPECT_STREQ(s.c_str(), ptr);\n\n  EXPECT_EQ(s.size(), sv.size());\n  EXPECT_EQ(s.length(), sv.size());\n\n  EXPECT_EQ(std::string_view(s), sv);\n  EXPECT_EQ(std::string_view(s.data(), s.size()), sv);\n  EXPECT_EQ(std::string_view(s.c_str(), s.size()), sv);\n}\n\n}  // namespace\n\nTEST_F(InternedBlobTest, StringPool) {\n  size_t hits = GetInternedStringStats().hits;\n  size_t misses = GetInternedStringStats().misses;\n  const auto& pool = InternedString::GetPoolRef();\n  EXPECT_TRUE(pool.empty());\n  {\n    const InternedString s1{\"foobar\"};\n    StringCheck(s1, \"foobar\");\n    EXPECT_EQ(pool.size(), 1);\n    misses += 1;\n    EXPECT_EQ(GetInternedStringStats().misses, misses);\n    EXPECT_EQ(GetInternedStringStats().pool_entries, 1);\n    {\n      const InternedString s2{\"foobar\"};\n      StringCheck(s2, \"foobar\");\n      EXPECT_EQ(pool.size(), 1);\n      EXPECT_EQ(GetInternedStringStats().misses, misses);\n      EXPECT_EQ(GetInternedStringStats().pool_entries, 1);\n      hits += 1;\n      EXPECT_EQ(GetInternedStringStats().hits, hits);\n    }\n    EXPECT_EQ(pool.size(), 1);\n  }\n  EXPECT_TRUE(pool.empty());\n  EXPECT_EQ(GetInternedStringStats().misses, misses);\n  EXPECT_EQ(GetInternedStringStats().pool_entries, 0);\n  EXPECT_EQ(GetInternedStringStats().pool_bytes, 0);\n  EXPECT_EQ(GetInternedStringStats().hits, hits);\n\n  std::vector<InternedString> strings;\n  for (auto i = 0; i < 1000; ++i) {\n    strings.emplace_back(std::to_string(i));\n  }\n\n  EXPECT_EQ(pool.size(), 1000);\n  EXPECT_EQ(GetInternedStringStats().pool_entries, 1000);\n  misses += 1000;\n  EXPECT_EQ(GetInternedStringStats().misses, misses);\n  strings.clear();\n  EXPECT_TRUE(pool.empty());\n  EXPECT_EQ(GetInternedStringStats().pool_entries, 0);\n  EXPECT_EQ(GetInternedStringStats().pool_bytes, 0);\n\n  for (auto i = 0; i < 1000; ++i) {\n    strings.emplace_back(\"zyx\");\n  }\n  EXPECT_EQ(pool.size(), 1);\n  EXPECT_EQ(GetInternedStringStats().pool_entries, 1);\n  hits += 999;\n  EXPECT_EQ(GetInternedStringStats().hits, hits);\n  strings.clear();\n  EXPECT_TRUE(pool.empty());\n\n  InternedString empty;\n  EXPECT_TRUE(pool.empty());\n}\n\nTEST_F(InternedBlobTest, StringApi) {\n  InternedString s1{\"foobar\"};\n  EXPECT_EQ(std::string_view{s1}, \"foobar\"sv);\n  StringCheck(s1, \"foobar\");\n\n  const auto& pool = InternedString::GetPoolRef();\n  InternedString s2{\"psi\"};\n  StringCheck(s2, \"psi\");\n\n  EXPECT_EQ(pool.size(), 2);\n\n  // swap pointers into the pool\n  s1.swap(s2);\n\n  EXPECT_EQ(pool.size(), 2);\n\n  StringCheck(s1, \"psi\");\n  StringCheck(s2, \"foobar\");\n\n  EXPECT_NE(s1, s2);\n  EXPECT_EQ(s1, s1);\n  // foobar < psi lexicographically\n  EXPECT_LT(s2, s1);\n}\n\nTEST_F(InternedBlobTest, StringCtors) {\n  const auto& pool = InternedString::GetPoolRef();\n  InternedString s1{\"foobar\"};\n  EXPECT_EQ(pool.size(), 1);\n\n  // move ctor\n  auto to = std::move(s1);\n  EXPECT_EQ(pool.size(), 1);\n\n  StringCheck(to, \"foobar\");\n  StringCheck(s1, \"\");\n\n  // These tests exercise self-move and self-copy behavior. This causes errors on newer GCC when\n  // warnings are treated as errors (on CI). We need to version gate this because on older GCC this\n  // check is not present.\n#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 13\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wself-move\"\n#endif\n  to = std::move(to);\n  StringCheck(to, \"foobar\");\n\n  auto copied = to;\n  EXPECT_EQ(pool.size(), 1);\n\n  StringCheck(to, \"foobar\");\n  StringCheck(copied, \"foobar\");\n\n  copied = copied;\n#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 13\n#pragma GCC diagnostic pop\n#endif\n  StringCheck(copied, \"foobar\");\n  EXPECT_EQ(pool.size(), 1);\n\n  const auto* mr = MemoryResource();\n  const auto before = mr->used();\n\n  std::string_view sv{\".......\"};\n  // ptr and size with some allocator, allocator will be ignored\n  InternedString x{sv.data(), sv.size(), std::allocator<char>{}};\n  StringCheck(x, \".......\");\n  EXPECT_EQ(pool.size(), 2);\n\n  EXPECT_GE(mr->used(), before + x.MemUsed());\n\n  InternedString k{sv.begin(), sv.end()};\n  StringCheck(k, \".......\");\n  EXPECT_EQ(pool.size(), 2);\n}\n\nTEST_F(InternedBlobTest, PoolShrink) {\n  InternedString::ResetPool();\n  std::vector<InternedString> v;\n  const auto& ref = InternedString::GetPoolRef();\n  for (const auto i : std::views::iota(0, 1000))\n    v.emplace_back(std::to_string(i));\n\n  std::vector<size_t> caps;\n\n  constexpr auto jitter = std::views::iota(0, 6);\n\n  while (!v.empty()) {\n    constexpr auto step = 20;\n    const auto from = v.end() - std::min<size_t>(step, v.size());\n    v.erase(from, v.end());\n    // Interleaving inserts right after a possible resize, to ensure we don't have to increase\n    // capacity right after a shrink. The caps vector should remain monotonically decreasing.\n    for (const auto j : jitter)\n      v.emplace_back(std::to_string(10000 + j));\n    caps.push_back(ref.capacity());\n    for (size_t i = 0; i < jitter.size(); ++i)\n      v.pop_back();\n  }\n\n  EXPECT_EQ(ref.load_factor(), 0);\n  EXPECT_TRUE(std::ranges::is_sorted(caps, std::ranges::greater{}));\n\n  // Check that capacity changes very infrequently\n  size_t cap_trans = 0;\n  for (size_t i = 1; i < caps.size(); ++i) {\n    if (caps[i] != caps[i - 1])\n      ++cap_trans;\n  }\n  EXPECT_LT(cap_trans, caps.size() / 2);\n}\n"
  },
  {
    "path": "src/core/json/json_object.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/json/json_object.h\"\n\n#include <stack>\n\n#include \"base/logging.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n\nusing namespace jsoncons;\n\nnamespace {\n\ntemplate <typename T>\nstd::optional<T> ParseWithDecoder(std::string_view input, json_decoder<T>&& decoder) {\n  std::error_code ec;\n  auto JsonErrorHandler = [](json_errc ec, const ser_context&) {\n    VLOG(1) << \"Error while decode JSON: \" << make_error_code(ec).message();\n    return false;\n  };\n\n  // The maximum allowed JSON nesting depth is 64.\n  // The limit was reduced from 256 to 64. This change is reasonable, as most documents contain\n  // no more than 20-30 levels of nesting. In the test case, over 128 levels were used, causing\n  // the parser to enter a long stall due to excessive resource consumption. Even a limit of 128\n  // does not mitigate the issue. A limit of 64 is a sensible compromise.\n  // See https://github.com/dragonflydb/dragonfly/issues/5028\n  const uint32_t json_nesting_depth_limit = 64;\n\n  /* The maximum possible JSON nesting depth is either the specified json_nesting_depth_limit or\n     half of the input size. Since nesting a JSON object requires at least 2 characters. */\n  auto parser_options = json_options{}.max_nesting_depth(\n      std::min(json_nesting_depth_limit, uint32_t(input.size() / 2)));\n\n  json_parser parser(parser_options, JsonErrorHandler);\n\n  parser.update(input);\n  parser.finish_parse(decoder, ec);\n\n  if (!ec && decoder.is_valid()) {\n    return decoder.get_result();\n  }\n  return std::nullopt;\n}\n\nusing namespace dfly;\n\n// The following two functions allocate a string-based object by copying data to a fresh memory\n// page. Then the move-assignment operator swaps it with the input node (swap_l_r in jsoncons), and\n// the temporary is destroyed at the end of the scope.\nbool DefragmentByteString(JsonType& j, PageUsage* page_usage) {\n  const auto& byte_storage = j.cast<JsonType::byte_string_storage>();\n  if (byte_storage.length() == 0 ||\n      !page_usage->IsPageForObjectUnderUtilized(const_cast<uint8_t*>(byte_storage.data())))\n    return false;\n\n  const byte_string_view bsv{byte_storage.data(), byte_storage.length()};\n  if (j.tag() == semantic_tag::ext) {\n    j = JsonType(byte_string_arg, bsv, j.ext_tag(), byte_storage.get_allocator());\n    return true;\n  }\n\n  j = JsonType(byte_string_arg, bsv, j.tag(), byte_storage.get_allocator());\n  return true;\n}\n\nbool DefragmentLongString(JsonType& j, PageUsage* page_usage) {\n  const auto& str_storage = j.cast<JsonType::long_string_storage>();\n  if (str_storage.length() == 0 ||\n      !page_usage->IsPageForObjectUnderUtilized(const_cast<char*>(str_storage.data())))\n    return false;\n\n  JsonType::string_view_type svt{str_storage.data(), str_storage.length()};\n  j = JsonType(svt, j.tag(), str_storage.get_allocator());\n  return true;\n}\n\n// Allocates a new json object of type json_object_arg, with fresh memory allocation for its\n// contained vector of key value pairs. Then moves members from j to this new object. Finally j is\n// swapped with the new object.\nbool DefragmentJsonObject(JsonType& j, PageUsage* page_usage) {\n  auto& object = j.cast<JsonType::object_storage>().value();\n  if (object.empty() || !page_usage->IsPageForObjectUnderUtilized(&*object.begin()))\n    return false;\n\n  // Creates a fresh object and reserves space for the underlying vector.\n  JsonType new_node{json_object_arg, j.tag(), object.get_allocator()};\n  new_node.reserve(object.size());\n\n  for (auto& member : object) {\n    // The member values are JsonType themselves, they just wrap pointers to actual storage.\n    // Their move invokes the move ctor in jsoncons, which will move the value wrappers to new_node,\n    // and leave the original in `j` holding references to `null_storage` type, see\n    // `uninitialized_move_a` in jsoncons. The member key (a string) is not moved but copied into\n    // new_node members.\n    new_node.try_emplace(member.key(), std::move(member.value()));\n  }\n\n  // Invokes move assignment. A swap is performed, and new_node now holds null_storage\n  // references instead of `j`. It will be destroyed on leaving scope, cleaning up its memory.\n  j = std::move(new_node);\n  return true;\n}\n\n// Same as DefragmentJsonObject except uses an array object. The contained members are moved\n// similarly, and on exit the old node is destroyed.\nbool DefragmentJsonArray(JsonType& j, PageUsage* page_usage) {\n  auto& array = j.cast<JsonType::array_storage>().value();\n  if (array.empty() || !page_usage->IsPageForObjectUnderUtilized(&*array.begin()))\n    return false;\n\n  JsonType new_node{json_array_arg, j.tag(), array.get_allocator()};\n  new_node.reserve(array.size());\n\n  for (JsonType& member : array) {\n    new_node.push_back(std::move(member));\n  }\n\n  j = std::move(new_node);\n  return true;\n}\n\n}  // namespace\n\nnamespace dfly {\n\nstd::optional<TmpJson> JsonFromString(std::string_view input) {\n  return ParseWithDecoder(input, json_decoder<TmpJson>{});\n}\n\noptional<JsonType> ParseJsonUsingShardHeap(string_view input) {\n  return ParseWithDecoder(input, json_decoder<JsonType>{StatelessAllocator<char>{}});\n}\n\nbool Defragment(JsonType& j, PageUsage* page_usage) {\n  bool did_defragment = false;\n  // stack-based traversal inspired from jsoncons::basic_json::compute_memory_size\n  std::stack<JsonType*> stack;\n  stack.push(&j);\n\n  while (!stack.empty()) {\n    JsonType* current = stack.top();\n    stack.pop();\n\n    const json_storage_kind storage_kind = current->storage_kind();\n    switch (storage_kind) {\n      case json_storage_kind::byte_str:\n        did_defragment |= DefragmentByteString(*current, page_usage);\n        break;\n      case json_storage_kind::long_str:\n        did_defragment |= DefragmentLongString(*current, page_usage);\n        break;\n      case json_storage_kind::object: {\n        did_defragment |= DefragmentJsonObject(*current, page_usage);\n        auto& object = current->cast<JsonType::object_storage>().value();\n        for (auto& member : object) {\n          stack.push(&member.value());\n        }\n        break;\n      }\n      case json_storage_kind::array: {\n        did_defragment |= DefragmentJsonArray(*current, page_usage);\n        auto& array = current->cast<JsonType::array_storage>().value();\n        for (auto& member : array) {\n          stack.push(&member);\n        }\n        break;\n      }\n      default:\n        DCHECK(is_trivial_storage(storage_kind))\n            << \"unexpected non trivial storage type:\" << storage_kind;\n        break;\n    }\n  }\n  return did_defragment;\n}\n\nsize_t ComputeMemorySize(const JsonType& j) {\n  std::stack<const JsonType*> stack;\n  stack.push(&j);\n\n  size_t total = 0;\n  auto add_used_memory = [&total](const auto* data) {\n    if (data)\n      total += mi_usable_size(data);\n  };\n\n  using enum json_storage_kind;\n  while (!stack.empty()) {\n    const auto* current = stack.top();\n    stack.pop();\n\n    const auto storage = current->storage_kind();\n    if (is_trivial_storage(storage))\n      continue;\n\n    switch (storage) {\n      case object: {\n        const auto& object_storage = current->cast<JsonType::object_storage>().value();\n        if (!object_storage.empty())\n          add_used_memory(&*object_storage.begin());\n        for (const auto& member : object_storage) {\n          total += member.key().MemUsed();\n          const auto& value = member.value();\n          if (!is_trivial_storage(value.storage_kind()))\n            stack.push(&value);\n        }\n      } break;\n      case array: {\n        const auto& arr = current->cast<JsonType::array_storage>().value();\n        if (!arr.empty())\n          add_used_memory(&arr[0]);\n        for (const auto& elem : arr)\n          if (!is_trivial_storage(elem.storage_kind()))\n            stack.push(&elem);\n      } break;\n      case long_str:\n        add_used_memory(current->cast<JsonType::long_string_storage>().data());\n        break;\n      case byte_str:\n        add_used_memory(current->cast<JsonType::byte_string_storage>().data());\n        break;\n      default:\n        DCHECK(false) << \"unexpected non trivial storage type:\" << storage;\n    }\n  }\n  return total;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/json/json_object.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <version>  // for __cpp_lib_to_chars macro.\n\n#include \"core/detail/stateless_allocator.h\"\n#include \"core/json/detail/interned_string.h\"\n\n// std::from_chars is available in C++17 if __cpp_lib_to_chars is defined.\n#if __cpp_lib_to_chars >= 201611L\n#define JSONCONS_HAS_STD_FROM_CHARS 1\n#endif\n\n#include <jsoncons/json.hpp>\n#include <jsoncons_ext/jsonpath/jsonpath.hpp>\n#include <memory>\n#include <optional>\n#include <string_view>\n\nnamespace dfly {\nclass PageUsage;\n\nusing TmpJson = jsoncons::json;\n\nstruct InternedStringPolicy : jsoncons::sorted_policy {\n  template <typename, typename, typename> using member_key = detail::InternedString;\n};\n\nusing JsonType = jsoncons::basic_json<char, InternedStringPolicy, StatelessAllocator<char>>;\n\n// A helper type to use in template functions which are expected to work with both TmpJson\n// and JsonType\ntemplate <typename Allocator>\nusing JsonWithAllocator = jsoncons::basic_json<char, jsoncons::sorted_policy, Allocator>;\n\n// Parses string into JSON. Any allocatons are done using the std allocator. This method should be\n// used for generic JSON parsing, in particular, it should not be used to parse objects which will\n// be stored in the db, as the backing storage is not managed by mimalloc.\nstd::optional<TmpJson> JsonFromString(std::string_view input);\n\n// Parses string into JSON, using mimalloc heap for allocations. This method should only be used on\n// shards where mimalloc heap is initialized.\nstd::optional<JsonType> ParseJsonUsingShardHeap(std::string_view input);\n\n// Defragments the given json object by traversing its tree structure non-recursively, examining\n// nodes and defragmenting as needed. Returns true if any object within the node was reallocated\nbool Defragment(JsonType& j, PageUsage* page_usage);\n\ntemplate <typename Json = JsonType>\nauto MakeJsonPathExpr(std::string_view path, std::error_code& ec)\n    -> jsoncons::jsonpath::jsonpath_expression<Json> {\n  using ResultAllocT = typename Json::allocator_type;\n  using TmpAllocT = std::allocator<char>;\n  using AllocSetT = jsoncons::allocator_set<ResultAllocT, TmpAllocT>;\n  return jsoncons::jsonpath::make_expression<Json, TmpAllocT>(AllocSetT(), path, ec);\n}\n\nsize_t ComputeMemorySize(const JsonType& j);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/json/json_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <gmock/gmock.h>\n\n#include <jsoncons/json.hpp>\n#include <jsoncons_ext/jsonpath/jsonpath.hpp>\n#include <memory_resource>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nnamespace dfly {\nusing namespace jsoncons;\nusing namespace jsoncons::literals;\nusing namespace testing;\n\nclass JsonTest : public ::testing::Test {\n protected:\n  JsonTest() {\n  }\n};\n\nTEST_F(JsonTest, Basic) {\n  std::string data = R\"(\n    {\n       \"application\": \"hiking\",\n       \"reputons\": [\n       {\n           \"rater\": \"HikingAsylum\",\n           \"assertion\": \"advanced\",\n           \"rated\": \"Marilyn C\",\n           \"rating\": 0.90,\n           \"confidence\": 0.99\n         }\n       ]\n    }\n)\";\n\n  pmr::json j = pmr::json::parse(data);\n  EXPECT_TRUE(j.contains(\"reputons\"));\n  jsonpath::json_replace(j, \"$.reputons[*].rating\", 1.1);\n  EXPECT_EQ(1.1, j[\"reputons\"][0][\"rating\"].as_double());\n}\n\nTEST_F(JsonTest, SetEmpty) {\n  pmr::json dest{json_object_arg};  // crashes on UB without the tag.\n  dest[\"bar\"] = \"foo\";\n}\n\nTEST_F(JsonTest, Query) {\n  json j = R\"(\n{\"a\":{}, \"b\":{\"a\":1}, \"c\":{\"a\":1, \"b\":2}}\n)\"_json;\n\n  json out = jsonpath::json_query(j, \"$..*\");\n  EXPECT_EQ(R\"([{},{\"a\":1},{\"a\":1,\"b\":2},1,1,2])\"_json, out);\n\n  json j2 = R\"(\n    {\"firstName\":\"John\",\"lastName\":\"Smith\",\"age\":27,\"weight\":135.25,\"isAlive\":true,\"address\":{\"street\":\"21 2nd Street\",\"city\":\"New York\",\"state\":\"NY\",\"zipcode\":\"10021-3100\"},\"phoneNumbers\":[{\"type\":\"home\",\"number\":\"212 555-1234\"},{\"type\":\"office\",\"number\":\"646 555-4567\"}],\"children\":[],\"spouse\":null}\n  )\"_json;\n\n  // json_query always returns arrays.\n  // See here: https://github.com/danielaparker/jsoncons/issues/82\n  // Therefore we are going to only support the \"extended\" semantics\n  // of json API (as they are called in AWS documentation).\n  out = jsonpath::json_query(j2, \"$.address\");\n  EXPECT_EQ(R\"([{\"street\":\"21 2nd Street\",\"city\":\"New York\",\n      \"state\":\"NY\",\"zipcode\":\"10021-3100\"}])\"_json,\n            out);\n}\n\nTEST_F(JsonTest, Errors) {\n  auto cb = [](json_errc, const ser_context&) { return false; };\n\n  json_decoder<json> decoder;\n  basic_json_parser<char> parser(basic_json_decode_options<char>{}, cb);\n\n  std::string_view input{\"\\000bla\"};\n  parser.update(input.data(), input.size());\n\n  std::error_code ec;\n  parser.parse_some(decoder, ec);\n\n  EXPECT_TRUE(ec);\n\n  EXPECT_EQ(ec, json_errc::unexpected_eof);\n  EXPECT_FALSE(decoder.is_valid());\n}\n\nTEST_F(JsonTest, Path) {\n  std::error_code ec;\n  json j1 = R\"({\"field\" : 1, \"field-dash\": 2})\"_json;\n\n  auto expr = jsonpath::make_expression<json>(\"$.field\", ec);\n  EXPECT_FALSE(ec);\n\n  expr.evaluate(j1, [](const std::string& path, const json& val) {\n    ASSERT_EQ(\"$['field']\", path);\n    ASSERT_EQ(1, val.as<int>());\n  });\n\n  expr = jsonpath::make_expression<json>(\"$.field-dash\", ec);\n  ASSERT_FALSE(ec);  // parses '-'\n\n  expr.evaluate(j1, [](const std::string& path, const json& val) {\n    ASSERT_EQ(\"$['field-dash']\", path);\n    ASSERT_EQ(2, val.as<int>());\n  });\n\n  int called = 0;\n  jsonpath::json_query(j1, \"max($.*)\", [&](const std::string& path, const json& val) {\n    EXPECT_EQ(\"$\", path);\n    ASSERT_EQ(2, val.as<int>());\n    ++called;\n  });\n  EXPECT_EQ(1, called);\n\n  auto res = jsonpath::json_query(j1, \"max($.*)\");\n  ASSERT_TRUE(res.is_array() && res.size() == 1);\n  EXPECT_EQ(2, res[0].as<int>());\n\n  called = 0;\n  json j2 = R\"({\"field\" : [1, 2, 3, 4, 5]})\"_json;\n  jsonpath::json_query(j2, \"$.field[1:2]\", [&](const std::string& path, const json& val) {\n    EXPECT_EQ(\"$['field'][1]\", path);\n    ASSERT_EQ(2, val.as<int>());\n    ++called;\n  });\n  EXPECT_EQ(1, called);\n\n  std::vector<int> vals;\n  jsonpath::json_query(j2, \"$.field[1:]\", [&](const std::string& path, const json& val) {\n    vals.push_back(val.as<int>());\n  });\n  EXPECT_THAT(vals, ElementsAre(2, 3, 4, 5));\n\n  jsonpath::json_query(j2, \"$.field[-1]\", [&](const std::string& path, const json& val) {\n    EXPECT_EQ(5, val.as<int>());\n  });\n\n  jsonpath::json_query(j2, \"$.field[-6:1]\", [&](const std::string& path, const json& val) {\n    EXPECT_EQ(1, val.as<int>());\n  });\n}\n\nTEST_F(JsonTest, Delete) {\n  json j1 = R\"({\"c\":{\"a\":1, \"b\":2}, \"d\":{\"a\":1, \"b\":2, \"c\":3}, \"e\": [1,2]})\"_json;\n\n  auto deleter = [](const json::string_view_type& path, json& val) {\n    LOG(INFO) << \"path: \" << path;\n    // val.evaluate();\n    // if (val.is_object())\n    //   val.erase(val.object_range().begin(), val.object_range().end());\n  };\n  jsonpath::json_replace(j1, \"$.d.*\", deleter);\n\n  auto expr = jsonpath::make_expression<json>(\"$.d.*\");\n\n  auto callback = [](const std::string& path, const json& val) {\n    LOG(INFO) << path << \": \" << val << \"\\n\";\n  };\n  expr.evaluate(j1, callback, jsonpath::result_options::path);\n  auto it = j1.find(\"d\");\n  ASSERT_TRUE(it != j1.object_range().end());\n\n  it->value().erase(\"a\");\n  EXPECT_EQ(R\"({\"c\":{\"a\":1, \"b\":2}, \"d\":{\"b\":2, \"c\":3}, \"e\": [1,2]})\"_json, j1);\n}\n\nTEST_F(JsonTest, JsonWithPolymorhicAllocator) {\n  char buffer[1024] = {};\n  std::pmr::monotonic_buffer_resource pool{std::data(buffer), std::size(buffer)};\n  std::pmr::polymorphic_allocator<char> alloc(&pool);\n\n  std::string input = R\"(\n{ \"store\": {\n    \"book\": [\n      { \"category\": \"Roman\",\n        \"author\": \"Felix Lobrecht\",\n        \"title\": \"Sonne und Beton\",\n        \"price\": 12.99\n      },\n      { \"category\": \"Roman\",\n        \"author\": \"Thomas F. Schneider\",\n        \"title\": \"Im Westen nichts Neues\",\n        \"price\": 10.00\n      }\n    ]\n  }\n}\n)\";\n\n  auto j1 = pmr::json::parse(combine_allocators(alloc), input, json_options{});\n  EXPECT_EQ(\"Roman\", j1[\"store\"][\"book\"][0][\"category\"].as_string());\n  EXPECT_EQ(\"Felix Lobrecht\", j1[\"store\"][\"book\"][0][\"author\"].as_string());\n  EXPECT_EQ(12.99, j1[\"store\"][\"book\"][0][\"price\"].as_double());\n\n  EXPECT_EQ(\"Roman\", j1[\"store\"][\"book\"][1][\"category\"].as_string());\n  EXPECT_EQ(\"Im Westen nichts Neues\", j1[\"store\"][\"book\"][1][\"title\"].as_string());\n  EXPECT_EQ(10.00, j1[\"store\"][\"book\"][1][\"price\"].as_double());\n}\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/json/jsonpath_grammar.y",
    "content": "%skeleton \"lalr1.cc\" // -*- C++ -*-\n%require \"3.5\"  // fedora 32 has this one.\n\n%defines  // %header starts from 3.8.1\n\n%define api.namespace {dfly::json}\n%define api.token.raw\n%define api.token.constructor\n%define api.value.type variant\n%define api.parser.class {Parser}\n%define parse.assert\n\n// Added to header file before parser declaration.\n%code requires {\n  #include \"src/core/json/path.h\"\n  namespace dfly {\n  namespace json {\n    class Driver;\n  }\n  }\n}\n\n// Added to cc file\n%code {\n\n#include \"src/core/json/lexer_impl.h\"\n#include \"src/core/json/driver.h\"\n#include <absl/strings/numbers.h>\n#include \"base/logging.h\"\n\n// GCC 13+ yields spurious warnings about uninitialized variant members in bison-generated code\n#if !defined(__clang__) && __GNUC__ >= 13\n#pragma GCC diagnostic ignored \"-Wmaybe-uninitialized\"\n#endif\n\n#define yylex driver->lexer()->Lex\n\nusing namespace std;\n\nstatic int unsafe_stoi(std::string_view s) {\n  int value;\n  bool success = absl::SimpleAtoi(s, &value);\n  DCHECK(success);\n  return value;\n}\n}\n\n%parse-param { Driver *driver  }\n\n%locations\n\n%define parse.trace\n%define parse.error verbose  // detailed\n%define parse.lac full\n%define api.token.prefix {TOK_}\n\n%token\n  LBRACKET \"[\"\n  RBRACKET \"]\"\n  COLON    \":\"\n  LPARENT  \"(\"\n  RPARENT  \")\"\n  ROOT \"$\"\n  DOT  \".\"\n  WILDCARD \"*\"\n  DESCENT \"..\"\n  SINGLE_QUOTE \"'\"\n  DOUBLE_QUOTE \"\\\"\"\n\n// Needed 0 at the end to satisfy bison 3.5.1\n%token YYEOF 0\n%token <std::string> UNQ_STR \"unquoted string\"\n%token <std::string> INT \"integer\"\n\n%nterm <std::string> identifier\n%nterm <PathSegment> bracket_index\n%nterm <std::string> single_quoted_string\n%nterm <std::string> double_quoted_string\n%nterm <std::string> quoted_content\n\n\n%%\n// Based on the following specification:\n// https://danielaparker.github.io/JsonCons.Net/articles/JsonPath/Specification.html\n\njsonpath: ROOT { /* skip adding root */ } opt_relative_location\n         | function_expr opt_relative_location\n\nopt_relative_location:\n        | relative_location\n\nrelative_location: DOT relative_path\n        | DESCENT { driver->AddSegment(PathSegment{SegmentType::DESCENT}); } relative_path\n        | bracket_expr\n\nrelative_path: identifier { driver->AddIdentifier($1); } opt_relative_location\n        | WILDCARD { driver->AddWildcard(); } opt_relative_location\n        | bracket_expr\n\nidentifier: UNQ_STR\n        | INT\n\nbracket_expr: LBRACKET bracket_index RBRACKET { driver->AddSegment($2); } opt_relative_location\n\nbracket_index: single_quoted_string { $$ = PathSegment(SegmentType::IDENTIFIER, $1); }\n              | double_quoted_string { $$ = PathSegment(SegmentType::IDENTIFIER, $1); }\n              | WILDCARD { $$ = PathSegment{SegmentType::INDEX, IndexExpr::All()}; }\n              | INT { int tmp_idx = unsafe_stoi($1);\n                      $$ = PathSegment(SegmentType::INDEX, IndexExpr(tmp_idx, tmp_idx)); }\n              | INT COLON INT { $$ = PathSegment(SegmentType::INDEX, IndexExpr::HalfOpen(\n                unsafe_stoi($1), unsafe_stoi($3))); }\n              | INT COLON { $$ = PathSegment(SegmentType::INDEX, IndexExpr(unsafe_stoi($1), INT_MAX)); }\n              | COLON INT { $$ = PathSegment(SegmentType::INDEX, IndexExpr::HalfOpen(0, unsafe_stoi($2))); }\n\nsingle_quoted_string: SINGLE_QUOTE quoted_content SINGLE_QUOTE { $$ = $2; }\n\ndouble_quoted_string: DOUBLE_QUOTE quoted_content DOUBLE_QUOTE { $$ = $2; }\n\nquoted_content: UNQ_STR { $$ = $1; }\n              | INT { $$ = $1; }\n              | quoted_content DOT UNQ_STR { $$ = $1 + \".\" + $3; }\n              | quoted_content DOT INT { $$ = $1 + \".\" + $3; }\n\nfunction_expr: UNQ_STR { driver->AddFunction($1); } LPARENT ROOT relative_location RPARENT\n%%\n\n\nvoid dfly::json::Parser::error(const location_type& l, const string& m)\n{\n  driver->Error(l, m);\n}\n"
  },
  {
    "path": "src/core/json/jsonpath_lexer.lex",
    "content": "%top{\n  // generated in the header file.\n  #include \"core/json/jsonpath_grammar.hh\"\n}\n\n\n%o bison-cc-namespace=\"dfly.json\" bison-cc-parser=\"Parser\"\n%o namespace=\"dfly.json\"\n\n// Generated class and main function\n%o lexer=\"AbstractLexer\" lex=\"Lex\"\n\n// our derived class from AbstractLexer\n%o class=\"Lexer\"\n\n/* nodefault removes default echo rule */\n%o nodefault batch\n%option unicode\n\n/* Declarations before lexer implementation.  */\n%{\n    #define DFLY_LEXER_CC 1\n    #include \"src/core/json/lexer_impl.h\"\n    #undef DFLY_LEXER_CC\n%}\n\n\n%{\n  // Code run each time a pattern is matched.\n%}\n\n%%\n\n%{\n  // Code run each time lex() is called.\n%}\n\n[[:space:]]+     ; // skip white space\n\n\"$\"         return Parser::make_ROOT(loc());\n\"..\"        return Parser::make_DESCENT(loc());\n\".\"         return Parser::make_DOT(loc());\n\":\"         return Parser::make_COLON(loc());\n\"[\"         return Parser::make_LBRACKET(loc());\n\"]\"         return Parser::make_RBRACKET(loc());\n\"*\"         return Parser::make_WILDCARD(loc());\n\"(\"         return Parser::make_LPARENT(loc());\n\")\"         return Parser::make_RPARENT(loc());\n\"'\"         return Parser::make_SINGLE_QUOTE(loc());\n\"\\\"\"        return Parser::make_DOUBLE_QUOTE(loc());\n-?[0-9]{1,9} return Parser::make_INT(str(), loc());\n\n[\\w_\\-]+    return Parser::make_UNQ_STR(str(), loc());\n<<EOF>>     return Parser::make_YYEOF(loc());\n.           throw Parser::syntax_error(loc(), UnknownTokenMsg());\n%%\n\n// Function definitions\n"
  },
  {
    "path": "src/core/json/jsonpath_test.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <gmock/gmock.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/json/driver.h\"\n#include \"core/json/lexer_impl.h\"\n#include \"core/mi_memory_resource.h\"\n\nnamespace flexbuffers {\nbool operator==(const Reference left, const Reference right) {\n  return left.ToString() == right.ToString();\n}\n}  // namespace flexbuffers\n\nnamespace dfly::json {\n\nusing namespace std;\n\nusing testing::ElementsAre;\n\nMATCHER_P(SegType, value, \"\") {\n  return ExplainMatchResult(testing::Property(&PathSegment::type, value), arg, result_listener);\n}\n\nvoid PrintTo(SegmentType st, std::ostream* os) {\n  *os << \" segment(\" << SegmentName(st) << \")\";\n}\n\nclass TestDriver : public Driver {\n public:\n  void Error(const location& l, const std::string& msg) final {\n    LOG(INFO) << \"Error at \" << l << \": \" << msg;\n  }\n};\n\ntemplate <typename JSON> JSON ValidJson(string_view str);\n\ntemplate <> JsonType ValidJson<JsonType>(string_view str) {\n  auto res = ParseJsonUsingShardHeap(str);\n  CHECK(res) << \"Failed to parse json: \" << str;\n  return *res;\n}\n\ntemplate <> FlatJson ValidJson<FlatJson>(string_view str) {\n  static flexbuffers::Builder fbb;\n  flatbuffers::Parser parser;\n\n  fbb.Clear();\n  CHECK(parser.ParseFlexBuffer(str.data(), nullptr, &fbb));\n  fbb.Finish();\n  const auto& buffer = fbb.GetBuffer();\n  return flexbuffers::GetRoot(buffer);\n}\n\nbool is_int(const JsonType& val) {\n  return val.is<int>();\n}\n\nint to_int(const JsonType& val) {\n  return val.as<int>();\n}\n\nbool is_object(const JsonType& val) {\n  return val.is_object();\n}\n\nbool is_array(const JsonType& val) {\n  return val.is_array();\n}\n\nint is_int(FlatJson ref) {\n  return ref.IsInt();\n}\n\nint to_int(FlatJson ref) {\n  return ref.AsInt32();\n}\n\nbool is_object(FlatJson ref) {\n  return ref.IsMap();\n}\n\nbool is_array(FlatJson ref) {\n  return ref.IsUntypedVector();\n}\n\nclass ScannerTest : public ::testing::Test {\n protected:\n  void SetUp() override {\n    Test::SetUp();\n    InitTLStatelessAllocMR(&m_);\n  }\n\n  ScannerTest() : m_(mi_heap_get_backing()) {\n    driver_.lexer()->set_debug(1);\n  }\n\n  void SetInput(const std::string& str) {\n    driver_.SetInput(str);\n  }\n\n  Parser::symbol_type Lex() {\n    try {\n      return driver_.lexer()->Lex();\n    } catch (const Parser::syntax_error& e) {\n      LOG(INFO) << \"Caught exception: \" << e.what();\n\n      // with later bison versions we can return make_YYerror\n      return Parser::make_YYEOF(e.location);\n    }\n  }\n\n  MiMemoryResource m_;\n  TestDriver driver_;\n};\n\ntemplate <typename JSON> class JsonPathTest : public ScannerTest {\n protected:\n  int Parse(const std::string& str) {\n    driver_.ResetScanner();\n    driver_.SetInput(str);\n\n    return Parser(&driver_)();\n  }\n};\nusing MyTypes = ::testing::Types<JsonType, FlatJson>;\nTYPED_TEST_SUITE(JsonPathTest, MyTypes);\n\n#define NEXT_TOK(tok_enum)                                    \\\n  {                                                           \\\n    auto tok = Lex();                                         \\\n    ASSERT_EQ(Parser::token::TOK_##tok_enum, tok.type_get()); \\\n  }\n\n#define NEXT_EQ(tok_enum, type, val)                          \\\n  {                                                           \\\n    auto tok = Lex();                                         \\\n    ASSERT_EQ(Parser::token::TOK_##tok_enum, tok.type_get()); \\\n    EXPECT_EQ(val, tok.value.as<type>());                     \\\n  }\n\nTEST_F(ScannerTest, Basic) {\n  SetInput(\"$.мага-зин2.book[0].*\");\n  NEXT_TOK(ROOT);\n  NEXT_TOK(DOT);\n  NEXT_EQ(UNQ_STR, string, \"мага-зин2\");\n  NEXT_TOK(DOT);\n  NEXT_EQ(UNQ_STR, string, \"book\");\n  NEXT_TOK(LBRACKET);\n  NEXT_EQ(INT, string, \"0\");\n  NEXT_TOK(RBRACKET);\n  NEXT_TOK(DOT);\n  NEXT_TOK(WILDCARD);\n\n  SetInput(\"|\");\n  NEXT_TOK(YYEOF);\n\n  SetInput(\"$..*\");\n  NEXT_TOK(ROOT);\n  NEXT_TOK(DESCENT);\n  NEXT_TOK(WILDCARD);\n}\n\nTEST_F(ScannerTest, FlatToJson) {\n  flatbuffers::Parser parser;\n  const char* json = R\"(\n    {\n      \"foo\": \"bar\",\n      \"bar\": 1.5,\n      \"strs\": [\"hello\", \"world\"]\n    }\n  )\";\n  flexbuffers::Builder fbb;\n  ASSERT_TRUE(parser.ParseFlexBuffer(json, nullptr, &fbb));\n  fbb.Finish();\n\n  flexbuffers::Reference root = flexbuffers::GetRoot(fbb.GetBuffer());\n  JsonType res = FromFlat(root);\n  EXPECT_EQ(res, JsonType::parse(json));\n  fbb.Clear();\n  FromJsonType(res, &fbb);\n  fbb.Finish();\n  string actual;\n  flexbuffers::GetRoot(fbb.GetBuffer()).ToString(false, true, actual);\n  EXPECT_EQ(res, JsonType::parse(actual));\n}\n\nTYPED_TEST(JsonPathTest, Parser) {\n  EXPECT_NE(0, this->Parse(\"foo\"));\n  EXPECT_NE(0, this->Parse(\"$foo\"));\n  EXPECT_NE(0, this->Parse(\"$|foo\"));\n\n  EXPECT_EQ(0, this->Parse(\"$.foo.bar\"));\n  Path path = this->driver_.TakePath();\n\n  // TODO: to improve the UX with gmock/c++ magic.\n  ASSERT_EQ(2, path.size());\n  EXPECT_THAT(path[0], SegType(SegmentType::IDENTIFIER));\n  EXPECT_THAT(path[1], SegType(SegmentType::IDENTIFIER));\n  EXPECT_EQ(\"foo\", path[0].identifier());\n  EXPECT_EQ(\"bar\", path[1].identifier());\n\n  EXPECT_EQ(0, this->Parse(\"$.*.bar[1]\"));\n  path = this->driver_.TakePath();\n  ASSERT_EQ(3, path.size());\n  EXPECT_THAT(path[0], SegType(SegmentType::WILDCARD));\n  EXPECT_THAT(path[1], SegType(SegmentType::IDENTIFIER));\n  EXPECT_THAT(path[2], SegType(SegmentType::INDEX));\n  EXPECT_EQ(\"bar\", path[1].identifier());\n  EXPECT_EQ(IndexExpr(1, 1), path[2].index());\n\n  EXPECT_EQ(0, this->Parse(\"$.plays[*].game\"));\n  EXPECT_EQ(0, this->Parse(\"$.bar[ -1]\"));\n  path = this->driver_.TakePath();\n  EXPECT_THAT(path[1], SegType(SegmentType::INDEX));\n  EXPECT_EQ(IndexExpr(-1, -1), path[1].index());\n}\n\nTYPED_TEST(JsonPathTest, Root) {\n  TypeParam json = ValidJson<TypeParam>(R\"({\"foo\" : 1, \"bar\": \"str\" })\");\n  ASSERT_EQ(0, this->Parse(\"$\"));\n  Path path = this->driver_.TakePath();\n  int called = 0;\n  EvaluatePath(path, json, [&](optional<string_view>, const TypeParam& val) {\n    ++called;\n    ASSERT_TRUE(is_object(val));\n    ASSERT_EQ(json, val);\n  });\n  ASSERT_EQ(1, called);\n}\n\nTYPED_TEST(JsonPathTest, Functions) {\n  ASSERT_EQ(0, this->Parse(\"max($.plays[*].score)\"));\n  Path path = this->driver_.TakePath();\n  ASSERT_EQ(4, path.size());\n  EXPECT_THAT(path[0], SegType(SegmentType::FUNCTION));\n  EXPECT_THAT(path[1], SegType(SegmentType::IDENTIFIER));\n  EXPECT_THAT(path[2], SegType(SegmentType::INDEX));\n  EXPECT_THAT(path[3], SegType(SegmentType::IDENTIFIER));\n  EXPECT_EQ(IndexExpr::All(), path[2].index());\n\n  TypeParam json = ValidJson<TypeParam>(R\"({\"plays\": [{\"score\": 1}, {\"score\": 2}]})\");\n  int called = 0;\n  EvaluatePath(path, json, [&](auto, const TypeParam& val) {\n    ++called;\n    ASSERT_TRUE(is_int(val));\n    ASSERT_EQ(2, to_int(val));\n  });\n  ASSERT_EQ(1, called);\n}\n\nTYPED_TEST(JsonPathTest, Descent) {\n  EXPECT_EQ(0, this->Parse(\"$..foo\"));\n  Path path = this->driver_.TakePath();\n  ASSERT_EQ(2, path.size());\n  EXPECT_THAT(path[0], SegType(SegmentType::DESCENT));\n  EXPECT_THAT(path[1], SegType(SegmentType::IDENTIFIER));\n  EXPECT_EQ(\"foo\", path[1].identifier());\n\n  EXPECT_EQ(0, this->Parse(\"$..*\"));\n  ASSERT_EQ(2, path.size());\n  path = this->driver_.TakePath();\n  EXPECT_THAT(path[0], SegType(SegmentType::DESCENT));\n  EXPECT_THAT(path[1], SegType(SegmentType::WILDCARD));\n\n  EXPECT_NE(0, this->Parse(\"$..\"));\n  EXPECT_NE(0, this->Parse(\"$...foo\"));\n}\n\nTYPED_TEST(JsonPathTest, QuotedStrings) {\n  EXPECT_EQ(0, this->Parse(\"$[\\\"foo\\\"]\"));\n  Path path = this->driver_.TakePath();\n\n  ASSERT_EQ(1, path.size());\n  EXPECT_THAT(path[0], SegType(SegmentType::IDENTIFIER));\n  EXPECT_EQ(\"foo\", path[0].identifier());\n\n  EXPECT_EQ(0, this->Parse(\"$['foo']\"));  // single quoted string\n  path = this->driver_.TakePath();\n\n  ASSERT_EQ(1, path.size());\n  EXPECT_THAT(path[0], SegType(SegmentType::IDENTIFIER));\n  EXPECT_EQ(\"foo\", path[0].identifier());\n\n  EXPECT_EQ(0, this->Parse(\"$.[\\\"foo\\\"]\"));\n  path = this->driver_.TakePath();\n\n  ASSERT_EQ(1, path.size());\n  EXPECT_THAT(path[0], SegType(SegmentType::IDENTIFIER));\n  EXPECT_EQ(\"foo\", path[0].identifier());\n\n  EXPECT_EQ(0, this->Parse(\"$..[\\\"foo\\\"]\"));\n  path = this->driver_.TakePath();\n\n  ASSERT_EQ(2, path.size());\n  EXPECT_THAT(path[0], SegType(SegmentType::DESCENT));\n  EXPECT_THAT(path[1], SegType(SegmentType::IDENTIFIER));\n  EXPECT_EQ(\"foo\", path[1].identifier());\n\n  EXPECT_NE(0, this->Parse(\"\\\"a\\\"\"));\n  EXPECT_NE(0, this->Parse(\"$\\\"a\\\"\"));\n  EXPECT_NE(0, this->Parse(\"$.\\\"a\\\"\"));\n  EXPECT_NE(0, this->Parse(\"$..\\\"a\\\"\"));\n\n  // Single quoted string\n  EXPECT_NE(0, this->Parse(\"'a'\"));\n  EXPECT_NE(0, this->Parse(\"$'a'\"));\n  EXPECT_NE(0, this->Parse(\"$.'a'\"));\n  EXPECT_NE(0, this->Parse(\"$..'a'\"));\n}\n\nTYPED_TEST(JsonPathTest, Path) {\n  Path path;\n  TypeParam json = ValidJson<TypeParam>(R\"({\"v11\":{ \"f\" : 1, \"a2\": [0]}, \"v12\": {\"f\": 2, \"a2\": [1]},\n      \"v13\": 3\n      })\");\n  int called = 0;\n\n  // Empty path\n  EvaluatePath(path, json, [&](optional<string_view>, const TypeParam& val) { ++called; });\n  ASSERT_EQ(1, called);\n  called = 0;\n\n  path.emplace_back(SegmentType::IDENTIFIER, \"v13\");\n  EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {\n    ++called;\n    ASSERT_EQ(3, to_int(val));\n    EXPECT_EQ(\"v13\", key);\n  });\n  ASSERT_EQ(1, called);\n\n  path.clear();\n  path.emplace_back(SegmentType::IDENTIFIER, \"v11\");\n  path.emplace_back(SegmentType::IDENTIFIER, \"f\");\n  called = 0;\n  EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {\n    ++called;\n    ASSERT_EQ(1, to_int(val));\n    EXPECT_EQ(\"f\", key);\n  });\n  ASSERT_EQ(1, called);\n\n  path.clear();\n  path.emplace_back(SegmentType::WILDCARD);\n  path.emplace_back(SegmentType::IDENTIFIER, \"f\");\n  called = 0;\n  EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {\n    ++called;\n    ASSERT_TRUE(is_int(val));\n    EXPECT_EQ(\"f\", key);\n  });\n  ASSERT_EQ(2, called);\n}\n\nTYPED_TEST(JsonPathTest, EvalDescent) {\n  TypeParam json = ValidJson<TypeParam>(R\"(\n    {\"v11\":{ \"f\" : 1, \"a2\": [0]}, \"v12\": {\"f\": 2, \"v21\": {\"f\": 3, \"a2\": [1]}},\n      \"v13\": { \"a2\" : { \"b\" : {\"f\" : 4}}}\n      })\");\n\n  Path path;\n\n  int called_arr = 0, called_obj = 0;\n\n  path.emplace_back(SegmentType::DESCENT);\n  path.emplace_back(SegmentType::IDENTIFIER, \"a2\");\n  EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {\n    EXPECT_EQ(\"a2\", key);\n    if (is_array(val)) {\n      ++called_arr;\n    } else if (is_object(val)) {\n      ++called_obj;\n    } else {\n      FAIL() << \"Unexpected type\";\n    }\n  });\n  ASSERT_EQ(2, called_arr);\n  ASSERT_EQ(1, called_obj);\n\n  path.pop_back();\n  path.emplace_back(SegmentType::IDENTIFIER, \"f\");\n  int called = 0;\n  EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {\n    ASSERT_TRUE(is_int(val));\n    ASSERT_EQ(\"f\", key);\n    ++called;\n  });\n  ASSERT_EQ(4, called);\n\n  json = ValidJson<TypeParam>(R\"(\n    {\"a\":[7], \"inner\": {\"a\": {\"b\": 2, \"c\": 1337}}}\n  )\");\n  path.pop_back();\n  path.emplace_back(SegmentType::IDENTIFIER, \"a\");\n\n  vector<char> arr;\n  auto gettype = [](const TypeParam& p) {\n    if (is_array(p))\n      return 'a';\n    return is_object(p) ? 'o' : 'u';\n  };\n\n  EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {\n    arr.push_back(gettype(val));\n    ASSERT_EQ(\"a\", key);\n  });\n  ASSERT_THAT(arr, ElementsAre('a', 'o'));\n}\n\nTYPED_TEST(JsonPathTest, EvalDescent2) {\n  TypeParam json = ValidJson<TypeParam>(R\"(\n    {\"a\":[{\"val\": 1}, {\"val\": 2}, {\"val\": 3}]}\n  )\");\n\n  ASSERT_EQ(0, this->Parse(\"$..val\"));\n  Path path = this->driver_.TakePath();\n  vector<int> arr;\n  EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {\n    arr.push_back(to_int(val));\n  });\n  ASSERT_THAT(arr, ElementsAre(1, 2, 3));\n\n  int called = 0;\n  ASSERT_EQ(0, this->Parse(\"$..*\"));\n  path = this->driver_.TakePath();\n  EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) { ++called; });\n  EXPECT_EQ(7, called);\n\n  called = 0;\n  json = ValidJson<TypeParam>(R\"(\n    {\n       \"store\": {\n        \"nums\": [\n         5\n       ]\n      }\n    }\n    )\");\n  EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) { ++called; });\n  EXPECT_EQ(3, called);\n}\n\nTYPED_TEST(JsonPathTest, Wildcard) {\n  ASSERT_EQ(0, this->Parse(\"$.arr[*]\"));\n  Path path = this->driver_.TakePath();\n  ASSERT_EQ(2, path.size());\n  EXPECT_THAT(path[1], SegType(SegmentType::INDEX));\n\n  TypeParam json = ValidJson<TypeParam>(R\"({\"arr\": [1, 2, 3], \"i\":1})\");\n  vector<int> arr;\n  EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {\n    ASSERT_FALSE(key);\n    arr.push_back(to_int(val));\n  });\n  ASSERT_THAT(arr, ElementsAre(1, 2, 3));\n\n  ASSERT_EQ(0, this->Parse(\"$.i[*]\"));\n  path = this->driver_.TakePath();\n  arr.clear();\n  EvaluatePath(path, json, [&](optional<string_view> key, const TypeParam& val) {\n    arr.push_back(to_int(val));\n  });\n  ASSERT_THAT(arr, ElementsAre());\n}\n\nTYPED_TEST(JsonPathTest, Mutate) {\n  ASSERT_EQ(0, this->Parse(\"$[*]\"));\n  Path path = this->driver_.TakePath();\n\n  TypeParam json = ValidJson<TypeParam>(R\"([1, 2, 3, 5, 6])\");\n  auto cb = [](optional<string_view>, JsonType* val) {\n    int intval = val->as<int>();\n    *val = intval + 1;\n  };\n\n  vector<int> arr;\n\n  if constexpr (std::is_same_v<TypeParam, JsonType>) {\n    MutatePath(path, cb, &json);\n\n    for (JsonType& el : json.array_range()) {\n      arr.push_back(to_int(el));\n    }\n  } else {\n    flexbuffers::Builder fbb;\n    MutatePath(path, cb, json, &fbb);\n    FlatJson fj = flexbuffers::GetRoot(fbb.GetBuffer());\n    auto vec = fj.AsVector();\n    for (unsigned i = 0; i < vec.size(); ++i) {\n      arr.push_back(to_int(vec[i]));\n    }\n  }\n  ASSERT_THAT(arr, ElementsAre(2, 3, 4, 6, 7));\n\n  json = ValidJson<TypeParam>(R\"(\n    {\"a\":[7], \"inner\": {\"a\": {\"bool\": true, \"c\": 42}}}\n  )\");\n  ASSERT_EQ(0, this->Parse(\"$..a.*\"));\n  path = this->driver_.TakePath();\n\n  auto cb2 = [](optional<string_view> key, JsonType* val) {\n    if (val->is_int64() && !key) {  // array element\n      *val = 42;\n    }\n    if (val->is_bool()) {\n      *val = false;\n    }\n  };\n\n  auto expected = ValidJson<JsonType>(R\"({\"a\":[42],\"inner\":{\"a\":{\"bool\":false,\"c\":42}}})\");\n  if constexpr (std::is_same_v<TypeParam, JsonType>) {\n    MutatePath(path, cb2, &json);\n\n    ASSERT_EQ(expected, json);\n  } else {\n    flexbuffers::Builder fbb;\n    MutatePath(path, cb2, json, &fbb);\n    FlatJson fj = flexbuffers::GetRoot(fbb.GetBuffer());\n    ASSERT_EQ(expected, FromFlat(fj));\n  }\n}\n\nTYPED_TEST(JsonPathTest, MutateRecursiveDescentKey) {\n  ASSERT_EQ(0, this->Parse(\"$..value\"));\n  Path path = this->driver_.TakePath();\n\n  JsonType json = ValidJson<JsonType>(R\"({\"data\":{\"value\":10,\"subdata\":{\"value\":20}}})\");\n  JsonType replacement = ValidJson<JsonType>(R\"({\"value\": 30})\");\n\n  auto cb = [&](optional<string_view> key, JsonType* val) {\n    if (key && key.value() == \"value\" && (val->is_int64() || val->is_double())) {\n      *val = replacement;\n    }\n  };\n\n  unsigned reported_matches = MutatePath(path, cb, &json);\n\n  JsonType expected =\n      ValidJson<JsonType>(R\"({\"data\":{\"subdata\":{\"value\":{\"value\":30}},\"value\":{\"value\":30}}})\");\n\n  EXPECT_EQ(expected, json);\n  EXPECT_EQ(0, reported_matches);\n}\n\nTYPED_TEST(JsonPathTest, SubRange) {\n  TypeParam json = ValidJson<TypeParam>(R\"({\"arr\": [1, 2, 3, 4, 5]})\");\n  ASSERT_EQ(0, this->Parse(\"$.arr[1:2]\"));\n  Path path = this->driver_.TakePath();\n  ASSERT_EQ(2, path.size());\n  EXPECT_THAT(path[1], SegType(SegmentType::INDEX));\n\n  vector<int> arr;\n  auto cb = [&arr](optional<string_view> key, const TypeParam& val) {\n    ASSERT_FALSE(key);\n    arr.push_back(to_int(val));\n  };\n\n  EvaluatePath(path, json, cb);\n  ASSERT_THAT(arr, ElementsAre(2));\n  arr.clear();\n\n  ASSERT_EQ(0, this->Parse(\"$.arr[0:2]\"));\n  path = this->driver_.TakePath();\n  EvaluatePath(path, json, cb);\n  ASSERT_THAT(arr, ElementsAre(1, 2));\n  arr.clear();\n\n  ASSERT_EQ(0, this->Parse(\"$.arr[2:-1]\"));\n  path = this->driver_.TakePath();\n  EvaluatePath(path, json, cb);\n  ASSERT_THAT(arr, ElementsAre(3, 4));\n  arr.clear();\n\n  ASSERT_EQ(0, this->Parse(\"$.arr[-2:-1]\"));\n  path = this->driver_.TakePath();\n  EvaluatePath(path, json, cb);\n  ASSERT_THAT(arr, ElementsAre(4));\n  arr.clear();\n\n  ASSERT_EQ(0, this->Parse(\"$.arr[-2:-2]\"));\n  path = this->driver_.TakePath();\n  EvaluatePath(path, json, cb);\n  ASSERT_THAT(arr, ElementsAre());\n  arr.clear();\n\n  ASSERT_EQ(0, this->Parse(\"$.arr[:2]\"));\n  path = this->driver_.TakePath();\n  EvaluatePath(path, json, cb);\n  ASSERT_THAT(arr, ElementsAre(1, 2));\n  arr.clear();\n\n  ASSERT_EQ(0, this->Parse(\"$.arr[2:]\"));\n  path = this->driver_.TakePath();\n  EvaluatePath(path, json, cb);\n  ASSERT_THAT(arr, ElementsAre(3, 4, 5));\n  arr.clear();\n}\n\nTYPED_TEST(JsonPathTest, DeleteNestedWithSameKey) {\n  // Test for deleting nested elements with the same key using \"$..a\"\n  // Corresponds to command: JSON.DEL doc1 \"$..a\"\n  ASSERT_EQ(0, this->Parse(\"$..a\"));\n  Path path = this->driver_.TakePath();\n\n  TypeParam json = ValidJson<TypeParam>(R\"({\"a\": 1, \"nested\": {\"a\": 2, \"b\": 3}})\");\n\n  if constexpr (std::is_same_v<TypeParam, JsonType>) {\n    unsigned reported_matches = DeletePath(path, &json);\n    EXPECT_EQ(2, reported_matches);\n\n    auto expected = ValidJson<JsonType>(R\"({\"nested\": {\"b\": 3}})\");\n    EXPECT_EQ(expected, json);\n  } else {\n    flexbuffers::Builder fbb;\n    unsigned reported_matches = DeletePath(path, json, &fbb);\n\n    EXPECT_EQ(2, reported_matches);\n\n    FlatJson result = flexbuffers::GetRoot(fbb.GetBuffer());\n    auto expected = ValidJson<JsonType>(R\"({\"nested\": {\"b\": 3}})\");\n    EXPECT_EQ(expected, FromFlat(result));\n  }\n}\n\nTYPED_TEST(JsonPathTest, DeleteRecursiveWithKeysAndArrayValues) {\n  ASSERT_EQ(0, this->Parse(\"$..a\"));\n  Path path = this->driver_.TakePath();\n\n  TypeParam json = ValidJson<TypeParam>(\n      R\"({\"a\": {\"a\": 2, \"b\": 3}, \"b\": [\"a\", \"b\"], \"nested\": {\"b\": [true, \"a\", \"b\"]}})\");\n\n  if constexpr (std::is_same_v<TypeParam, JsonType>) {\n    unsigned reported_matches = DeletePath(path, &json);\n    EXPECT_EQ(1, reported_matches);\n\n    auto expected = ValidJson<JsonType>(R\"({\"b\": [\"a\", \"b\"], \"nested\": {\"b\": [true, \"a\", \"b\"]}})\");\n    EXPECT_EQ(expected, json);\n  } else {\n    flexbuffers::Builder fbb;\n    unsigned reported_matches = DeletePath(path, json, &fbb);\n    EXPECT_EQ(1, reported_matches);\n\n    FlatJson result = flexbuffers::GetRoot(fbb.GetBuffer());\n    auto expected = ValidJson<JsonType>(R\"({\"b\": [\"a\", \"b\"], \"nested\": {\"b\": [true, \"a\", \"b\"]}})\");\n    EXPECT_EQ(expected, FromFlat(result));\n  }\n}\n\n}  // namespace dfly::json\n"
  },
  {
    "path": "src/core/json/lexer_impl.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"src/core/json/lexer_impl.h\"\n\n#include <absl/strings/str_cat.h>\n\nnamespace dfly::json {\n\nLexer::Lexer() {\n}\n\nLexer::~Lexer() {\n}\n\nstd::string Lexer::UnknownTokenMsg() const {\n  std::string res = absl::StrCat(\"Unknown token '\", text(), \"'\");\n  return res;\n}\n\n}  // namespace dfly::json\n"
  },
  {
    "path": "src/core/json/lexer_impl.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n// We should not include lexer.h when compiling from lexer.cc file because it already\n// includes lexer.h\n#ifndef DFLY_LEXER_CC\n#include \"src/core/json/jsonpath_lexer.h\"\n#endif\n\n#include \"src/core/json/jsonpath_grammar.hh\"\n\nnamespace dfly {\nnamespace json {\n\nclass Lexer : public AbstractLexer {\n public:\n  Lexer();\n  ~Lexer();\n\n  Parser::symbol_type Lex() final;\n\n private:\n  dfly::json::location loc() {\n    return location();\n  }\n\n  std::string UnknownTokenMsg() const;\n};\n\n}  // namespace json\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/json/path.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"src/core/json/path.h\"\n\n#include <absl/strings/str_cat.h>\n#include <absl/types/span.h>\n\n#include \"base/logging.h\"\n#include \"core/json/detail/flat_dfs.h\"\n#include \"core/json/detail/jsoncons_dfs.h\"\n#include \"core/json/jsonpath_grammar.hh\"\n#include \"src/core/json/driver.h\"\n#include \"src/core/overloaded.h\"\n\nusing namespace std;\nusing nonstd::make_unexpected;\n\nnamespace dfly::json {\n\nusing detail::Dfs;\nusing detail::FlatDfs;\n\nnamespace {\n\nclass JsonPathDriver : public json::Driver {\n public:\n  string msg;\n  void Error(const json::location& l, const std::string& msg) final {\n    this->msg = absl::StrCat(\"Error: \", msg);\n  }\n};\n\n}  // namespace\n\nconst char* SegmentName(SegmentType type) {\n  switch (type) {\n    case SegmentType::IDENTIFIER:\n      return \"IDENTIFIER\";\n    case SegmentType::INDEX:\n      return \"INDEX\";\n    case SegmentType::WILDCARD:\n      return \"WILDCARD\";\n    case SegmentType::DESCENT:\n      return \"DESCENT\";\n    case SegmentType::FUNCTION:\n      return \"FUNCTION\";\n  }\n  return nullptr;\n}\n\nIndexExpr IndexExpr::Normalize(size_t array_len) const {\n  if (array_len == 0)\n    return IndexExpr(1, 0);  // empty range.\n\n  IndexExpr res = *this;\n  auto wrap = [array_len](int negative) {\n    unsigned positive = -negative;\n    return positive > array_len ? 0 : array_len - positive;\n  };\n\n  if (res.second >= int(array_len)) {\n    res.second = array_len - 1;\n  } else if (res.second < 0) {\n    res.second = wrap(res.second);\n    DCHECK_GE(res.second, 0);\n  }\n  if (res.first < 0) {\n    res.first = wrap(res.first);\n    DCHECK_GE(res.first, 0);\n  }\n  return res;\n}\n\nvoid PathSegment::Evaluate(const JsonType& json) const {\n  CHECK(type() == SegmentType::FUNCTION);\n  AggFunction* func = std::get<shared_ptr<AggFunction>>(value_).get();\n  CHECK(func);\n  func->Apply(json);\n}\n\nvoid PathSegment::Evaluate(FlatJson json) const {\n  CHECK(type() == SegmentType::FUNCTION);\n  AggFunction* func = std::get<shared_ptr<AggFunction>>(value_).get();\n  CHECK(func);\n  func->Apply(json);\n}\n\nAggFunction::Result PathSegment::GetResult() const {\n  CHECK(type() == SegmentType::FUNCTION);\n  const auto& func = std::get<shared_ptr<AggFunction>>(value_).get();\n  CHECK(func);\n  return func->GetResult();\n}\n\nvoid EvaluatePath(const Path& path, const JsonType& json, PathCallback callback) {\n  if (path.empty()) {  // root node\n    callback(nullopt, json);\n    return;\n  }\n\n  if (path.front().type() != SegmentType::FUNCTION) {\n    Dfs::Traverse(path, json, std::move(callback));\n    return;\n  }\n\n  // Handling the case of `func($.somepath)`\n  // We pass our own callback to gather all the results and then call the function.\n  JsonType result(JsonType::null());\n  absl::Span<const PathSegment> path_tail(path.data() + 1, path.size() - 1);\n\n  const PathSegment& func_segment = path.front();\n\n  if (path_tail.empty()) {\n    LOG(DFATAL) << \"Invalid path\";  // parser should not allow this.\n  } else {\n    Dfs::Traverse(path_tail, json, [&](auto, const JsonType& val) { func_segment.Evaluate(val); });\n  }\n\n  AggFunction::Result res = func_segment.GetResult();\n  JsonType val = visit(  // Transform the result to JsonType.\n      Overloaded{\n          [](monostate) { return JsonType::null(); },\n          [&](double d) { return JsonType(d); },\n\n          [&](int64_t i) { return JsonType(i); },\n      },\n      res);\n  callback(nullopt, val);\n}\n\nnonstd::expected<json::Path, string> ParsePath(string_view path) {\n  if (path.size() > 8192)\n    return nonstd::make_unexpected(\"Path too long\");\n\n  VLOG(2) << \"Parsing path: \" << path;\n\n  JsonPathDriver driver;\n  Parser parser(&driver);\n\n  driver.SetInput(string(path));\n  int res = parser();\n  if (res != 0) {\n    return nonstd::make_unexpected(driver.msg);\n  }\n\n  return driver.TakePath();\n}\n\nunsigned MutatePath(const Path& path, MutateCallback callback, JsonType* json) {\n  if (path.empty()) {\n    callback(nullopt, json);\n    return 1;\n  }\n\n  Dfs dfs = Dfs::Mutate(path, callback, json);\n  return dfs.matches();\n}\n\nunsigned DeletePath(const Path& path, JsonType* json) {\n  if (path.empty()) {\n    // For empty path, we cannot delete the root JSON itself within this function\n    // as it would require modifying the pointer itself. Return 0 for no deletion.\n    return 0;\n  }\n\n  Dfs dfs = Dfs::Delete(path, json);\n  return dfs.matches();\n}\n\n// Flat json path evaluation\nvoid EvaluatePath(const Path& path, FlatJson json, PathFlatCallback callback) {\n  if (path.empty()) {  // root node\n    callback(nullopt, json);\n    return;\n  }\n\n  if (path.front().type() != SegmentType::FUNCTION) {\n    FlatDfs::Traverse(path, json, std::move(callback));\n    return;\n  }\n\n  // Handling the case of `func($.somepath)`\n  // We pass our own callback to gather all the results and then call the function.\n  FlatJson result;\n  absl::Span<const PathSegment> path_tail(path.data() + 1, path.size() - 1);\n\n  const PathSegment& func_segment = path.front();\n\n  if (path_tail.empty()) {\n    LOG(DFATAL) << \"Invalid path\";  // parser should not allow this.\n  } else {\n    FlatDfs::Traverse(path_tail, json, [&](auto, FlatJson val) { func_segment.Evaluate(val); });\n  }\n  AggFunction::Result res = func_segment.GetResult();\n  flexbuffers::Builder fbb;\n  FlatJson val = visit(  // Transform the result to a flexbuffer reference.\n      Overloaded{\n          [](monostate) { return FlatJson{}; },\n          [&](double d) {\n            fbb.Double(d);\n            fbb.Finish();\n            return flexbuffers::GetRoot(fbb.GetBuffer());\n          },\n\n          [&](int64_t i) {\n            fbb.Int(i);\n            fbb.Finish();\n            return flexbuffers::GetRoot(fbb.GetBuffer());\n          },\n      },\n      res);\n\n  callback(nullopt, val);\n}\n\nJsonType FromFlat(FlatJson src) {\n  if (src.IsNull()) {\n    return JsonType::null();\n  }\n\n  if (src.IsBool()) {\n    return JsonType(src.AsBool());\n  }\n\n  if (src.IsInt()) {\n    return JsonType(src.AsInt64());\n  }\n\n  if (src.IsFloat()) {\n    return JsonType(src.AsDouble());\n  }\n  if (src.IsString()) {\n    flexbuffers::String str = src.AsString();\n    return JsonType(string_view{str.c_str(), str.size()});\n  }\n\n  CHECK(src.IsVector());\n  auto vec = src.AsVector();\n  JsonType js =\n      src.IsMap() ? JsonType{jsoncons::json_object_arg} : JsonType{jsoncons::json_array_arg};\n  auto keys = src.AsMap().Keys();\n  for (unsigned i = 0; i < vec.size(); ++i) {\n    JsonType value = FromFlat(vec[i]);\n    if (src.IsMap()) {\n      js[keys[i].AsKey()] = std::move(value);\n    } else {\n      js.push_back(std::move(value));\n    }\n  }\n  return js;\n}\n\nvoid FromJsonType(const JsonType& src, flexbuffers::Builder* fbb) {\n  if (src.is_null()) {\n    return fbb->Null();\n  }\n\n  if (src.is_bool()) {\n    return fbb->Bool(src.as_bool());\n  }\n\n  if (src.is_int64()) {\n    return fbb->Int(src.as<int64_t>());\n  }\n\n  if (src.is_double()) {\n    return fbb->Double(src.as_double());\n  }\n\n  if (src.is_string()) {\n    string_view sv = src.as_string_view();\n    fbb->String(sv.data(), sv.size());\n    return;\n  }\n\n  if (src.is_object()) {\n    auto range = src.object_range();\n    size_t start = fbb->StartMap();\n    for (auto it = range.cbegin(); it != range.cend(); ++it) {\n      fbb->Key(it->key().c_str(), it->key().size());\n      FromJsonType(it->value(), fbb);\n    }\n    fbb->EndMap(start);\n    return;\n  }\n\n  CHECK(src.is_array());\n  auto range = src.array_range();\n  size_t start = fbb->StartVector();\n  for (auto it = range.cbegin(); it != range.cend(); ++it) {\n    FromJsonType(*it, fbb);\n  }\n  fbb->EndVector(start, false, false);\n}\n\nunsigned MutatePath(const Path& path, MutateCallback callback, FlatJson json,\n                    flexbuffers::Builder* fbb) {\n  JsonType mut_json = FromFlat(json);\n  unsigned res = MutatePath(path, std::move(callback), &mut_json);\n\n  // Populate the output builder 'fbb' with the resulting JSON state\n  // (mutated or original if res == 0) and finalize it.\n  // The builder MUST be finished before returning so that the caller\n  // can safely access the resulting flatbuffer data (e.g., via GetBuffer()).\n  // Skipping Finish() would leave the builder in an invalid, unusable state.\n  FromJsonType(mut_json, fbb);  // Always convert (changed or not) JSON\n  fbb->Finish();                // Always finish the builder\n\n  // Return the number of actual mutations that occurred.\n  return res;\n}\n\nunsigned DeletePath(const Path& path, FlatJson json, flexbuffers::Builder* fbb) {\n  JsonType mut_json = FromFlat(json);\n  unsigned res = DeletePath(path, &mut_json);\n\n  FromJsonType(mut_json, fbb);\n  fbb->Finish();\n  return res;\n}\n\n}  // namespace dfly::json\n"
  },
  {
    "path": "src/core/json/path.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/functional/function_ref.h>\n\n#include <nonstd/expected.hpp>\n#include <string>\n#include <variant>\n#include <vector>\n\n#include \"core/flatbuffers.h\"\n#include \"core/json/json_object.h\"\n\nnamespace dfly::json {\n\nenum class SegmentType {\n  IDENTIFIER = 1,  // $.identifier\n  INDEX = 2,       // $.array[index_expr]\n  WILDCARD = 3,    // $.*\n  DESCENT = 4,     // $..identifier\n  FUNCTION = 5,    // max($.prices[*])\n};\n\nconst char* SegmentName(SegmentType type);\n\nclass AggFunction {\n public:\n  using Result = std::variant<std::monostate, double, int64_t>;\n  virtual ~AggFunction() {\n  }\n\n  void Apply(const JsonType& src) {\n    if (valid_ != 0)\n      valid_ = ApplyImpl(src);\n  }\n\n  void Apply(FlatJson src) {\n    if (valid_ != 0)\n      valid_ = ApplyImpl(src);\n  }\n\n  // returns null if Apply was not called or ApplyImpl failed.\n  Result GetResult() const {\n    return valid_ == 1 ? GetResultImpl() : Result{};\n  }\n\n protected:\n  virtual bool ApplyImpl(const JsonType& src) = 0;\n  virtual bool ApplyImpl(FlatJson src) = 0;\n  virtual Result GetResultImpl() const = 0;\n\n  int valid_ = -1;\n};\n\n// Bracket index representation, IndexExpr is a closed range, i.e. both ends are inclusive.\n// Single index is: <I, I>, wildcard: <0, INT_MAX>,\n// [begin:end): <begin, end - 1>\n// IndexExpr is 0-based, with negative indices referring to the array size of the applied object.\nstruct IndexExpr : public std::pair<int, int> {\n  bool Empty() const {\n    return first > second;\n  }\n\n  static IndexExpr All() {\n    return IndexExpr{0, INT_MAX};\n  }\n\n  using pair::pair;\n\n  // Returns subrange with length `array_len`.\n  IndexExpr Normalize(size_t array_len) const;\n\n  // Returns IndexExpr representing [left_closed, right_open) range.\n  static IndexExpr HalfOpen(int left_closed, int right_open) {\n    return IndexExpr(left_closed, right_open - 1);\n  }\n};\n\nclass PathSegment {\n public:\n  PathSegment() : PathSegment(SegmentType::IDENTIFIER) {\n  }\n\n  PathSegment(SegmentType type, std::string identifier = std::string())\n      : type_(type), value_(std::move(identifier)) {\n  }\n\n  PathSegment(SegmentType type, IndexExpr index) : type_(type), value_(index) {\n  }\n\n  explicit PathSegment(std::shared_ptr<AggFunction> func)\n      : type_(SegmentType::FUNCTION), value_(std::move(func)) {\n  }\n\n  SegmentType type() const {\n    return type_;\n  }\n\n  const std::string& identifier() const {\n    return std::get<std::string>(value_);\n  }\n\n  IndexExpr index() const {\n    return std::get<IndexExpr>(value_);\n  }\n\n  void Evaluate(const JsonType& json) const;\n  void Evaluate(FlatJson json) const;\n  AggFunction::Result GetResult() const;\n\n private:\n  SegmentType type_;\n\n  // shared_ptr to preserve copy semantics.\n  std::variant<std::string, IndexExpr, std::shared_ptr<AggFunction>> value_;\n};\n\nusing Path = std::vector<PathSegment>;\n\n// Passes the key name for object fields or nullopt for array elements.\n// The second argument is a json value of either object fields or array elements.\nusing PathCallback = absl::FunctionRef<void(std::optional<std::string_view>, const JsonType&)>;\nusing PathFlatCallback = absl::FunctionRef<void(std::optional<std::string_view>, FlatJson)>;\n\n// Returns true if the entry should be deleted, false otherwise.\nusing MutateCallback = absl::FunctionRef<void(std::optional<std::string_view>, JsonType*)>;\n\nvoid EvaluatePath(const Path& path, const JsonType& json, PathCallback callback);\n\n// Same as above but for flatbuffers.\nvoid EvaluatePath(const Path& path, FlatJson json, PathFlatCallback callback);\n\n// returns number of matches found with the given path.\nunsigned MutatePath(const Path& path, MutateCallback callback, JsonType* json);\nunsigned MutatePath(const Path& path, MutateCallback callback, FlatJson json,\n                    flexbuffers::Builder* fbb);\n\n// Simplified deletion operation without callback - more efficient for JSON.DEL operations\nunsigned DeletePath(const Path& path, JsonType* json);\nunsigned DeletePath(const Path& path, FlatJson json, flexbuffers::Builder* fbb);\n\n// utility function to parse a jsonpath. Returns an error message if a parse error was\n// encountered.\nnonstd::expected<Path, std::string> ParsePath(std::string_view path);\n\n// Transforms FlatJson to JsonType.\nJsonType FromFlat(FlatJson src);\n\n// Transforms JsonType to a buffer using flexbuffers::Builder.\n// Does not call flexbuffers::Builder::Finish.\nvoid FromJsonType(const JsonType& src, flexbuffers::Builder* fbb);\n\n}  // namespace dfly::json\n"
  },
  {
    "path": "src/core/linear_search_map.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/inlined_vector.h>\n\n#include \"base/logging.h\"\n\nnamespace dfly {\n\n/* LinearSearchMap is a small key-value map implemented using an inlined vector of (key, value)\n   pairs. It performs key lookup using linear search (O(n)) and is optimized for small maps\n   (typically <32 keys).\n\n   Compared to a hash map, it avoids hashing overhead and has better memory locality and cache\n   performance. Use it when:\n    - The number of keys is small\n    - You care about minimal memory usage\n    - Fast iteration is more important than fast lookup\n\n   NOTE:\n     - Insert() and Emplace() do NOT check for duplicate keys at runtime.\n       Inserting a duplicate key results in undefined behavior.\n     - You must ensure keys are unique when inserting.\n     - This syntax is used to maintain compatibility with absl::InlinedVector. */\ntemplate <typename Key, typename Value, size_t N = 8>\nclass LinearSearchMap : public absl::InlinedVector<std::pair<Key, Value>, N> {\n private:\n  using Base = absl::InlinedVector<std::pair<Key, Value>, N>;\n\n public:\n  using Base::operator[];\n  using Base::erase;\n\n  using iterator = typename Base::iterator;\n  using const_iterator = typename Base::const_iterator;\n\n  // Does not check if key already exists.\n  // If key already exists - undefined behavior.\n  void insert(Key key, Value value);\n  template <typename... Args> void emplace(Key key, Args&&... args);\n\n  void erase(const Key& key);\n\n  bool contains(const Key& key) const;\n\n  iterator find(const Key& key);\n  const_iterator find(const Key& key) const;\n  size_t find_index(const Key& key) const;\n\n  Value& operator[](const Key& key);\n  const Value& operator[](const Key& key) const;\n};\n\n// Implementation\n/******************************************************************/\ntemplate <typename Key, typename Value, size_t N>\nvoid LinearSearchMap<Key, Value, N>::insert(Key key, Value value) {\n  DCHECK(!contains(key)) << \"Key already exists: \" << key;\n  this->emplace_back(std::move(key), std::move(value));\n}\n\ntemplate <typename Key, typename Value, size_t N>\ntemplate <typename... Args>\nvoid LinearSearchMap<Key, Value, N>::emplace(Key key, Args&&... args) {\n  DCHECK(!contains(key)) << \"Key already exists: \" << key;\n  this->emplace_back(std::piecewise_construct, std::forward_as_tuple(std::move(key)),\n                     std::forward_as_tuple(std::forward<Args>(args)...));\n}\n\ntemplate <typename Key, typename Value, size_t N>\nvoid LinearSearchMap<Key, Value, N>::erase(const Key& key) {\n  erase(find(key));\n}\n\ntemplate <typename Key, typename Value, size_t N>\nbool LinearSearchMap<Key, Value, N>::contains(const Key& key) const {\n  return find(key) != this->end();\n}\n\ntemplate <typename Key, typename Value, size_t N>\ntypename LinearSearchMap<Key, Value, N>::iterator LinearSearchMap<Key, Value, N>::find(\n    const Key& key) {\n  return std::find_if(this->begin(), this->end(),\n                      [&key](const auto& pair) { return pair.first == key; });\n}\n\ntemplate <typename Key, typename Value, size_t N>\ntypename LinearSearchMap<Key, Value, N>::const_iterator LinearSearchMap<Key, Value, N>::find(\n    const Key& key) const {\n  return std::find_if(this->begin(), this->end(),\n                      [&key](const auto& pair) { return pair.first == key; });\n}\n\ntemplate <typename Key, typename Value, size_t N>\nsize_t LinearSearchMap<Key, Value, N>::find_index(const Key& key) const {\n  return std::distance(this->begin(), find(key));\n}\n\ntemplate <typename Key, typename Value, size_t N>\nValue& LinearSearchMap<Key, Value, N>::operator[](const Key& key) {\n  return find(key)->second;\n}\n\ntemplate <typename Key, typename Value, size_t N>\nconst Value& LinearSearchMap<Key, Value, N>::operator[](const Key& key) const {\n  return find(key)->second;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/linear_search_map_test.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/linear_search_map.h\"\n\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n\n#include <utility>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nclass LinearSearchMapTest : public testing::Test {\n protected:\n};\n\nTEST_F(LinearSearchMapTest, Insert) {\n  LinearSearchMap<int, double> map;\n\n  for (int i = 0; i < 100; ++i) {\n    map.insert(i, i * 1.1);\n  }\n\n  for (int i = 199; i >= 100; --i) {\n    map.insert(i, i * 12.1);\n  }\n\n  for (int i = 0; i < 200; ++i) {\n    auto it = map.find(i);\n    EXPECT_NE(it, map.end());\n    EXPECT_TRUE(map.contains(i));\n\n    EXPECT_EQ(it->second, (i < 100) ? i * 1.1 : i * 12.1);\n  }\n}\n\nTEST_F(LinearSearchMapTest, Emplace) {\n  struct Value {\n    Value(double value_, std::string str_) : value(value_), str(std::move(str_)) {\n    }\n\n    double value;\n    std::string str;\n  };\n\n  LinearSearchMap<int, Value> map;\n\n  for (int i = 0; i < 100; ++i) {\n    map.emplace(i, i * 1.1, \"value_\" + std::to_string(i));\n  }\n\n  for (int i = 199; i >= 100; --i) {\n    map.emplace(i, i * 12.1, \"value_\" + std::to_string(i));\n  }\n\n  for (int i = 0; i < 200; ++i) {\n    auto it = map.find(i);\n    EXPECT_NE(it, map.end());\n    EXPECT_TRUE(map.contains(i));\n\n    EXPECT_EQ(it->second.value, (i < 100) ? i * 1.1 : i * 12.1);\n    EXPECT_EQ(it->second.str, \"value_\" + std::to_string(i));\n  }\n}\n\nTEST_F(LinearSearchMapTest, EraseSimple) {\n  LinearSearchMap<int, double> map;\n\n  for (int i = 0; i < 200; ++i) {\n    map.insert(i, i * 1.1);\n  }\n\n  // Erase by iterator\n  for (int i = 0; i < 100; ++i) {\n    auto it = map.find(i);\n    EXPECT_NE(it, map.end());\n    EXPECT_TRUE(map.contains(i));\n\n    map.erase(it);\n    EXPECT_FALSE(map.contains(i));\n  }\n\n  // Erase by key\n  for (int i = 100; i < 200; ++i) {\n    EXPECT_TRUE(map.contains(i));\n    map.erase(i);\n    EXPECT_FALSE(map.contains(i));\n  }\n\n  EXPECT_TRUE(map.empty());\n}\n\nTEST_F(LinearSearchMapTest, Erase) {\n  std::unordered_map<int, double> expected_map;\n  LinearSearchMap<int, double> map;\n\n  // First wave insert / erase\n  for (int i = 0; i < 300; i++) {\n    double value = i * 1.1;\n    map.insert(i, value);\n    expected_map[i] = value;\n  }\n\n  for (int i = 0; i < 300; i += 3) {\n    EXPECT_TRUE(map.contains(i));\n    map.erase(i);\n    expected_map.erase(i);\n    EXPECT_FALSE(map.contains(i));\n  }\n\n  // Second wave insert / erase\n  for (int i = 300; i < 600; i++) {\n    double value = i * 2.2;\n    map.insert(i, value);\n    expected_map[i] = value;\n  }\n\n  for (int i = 300; i < 600; i += 5) {\n    EXPECT_TRUE(map.contains(i));\n    map.erase(i);\n    expected_map.erase(i);\n    EXPECT_FALSE(map.contains(i));\n  }\n\n  // Erase all remaining elements\n  while (!expected_map.empty()) {\n    size_t index = 0;\n    const size_t step = 7;\n\n    for (auto it = expected_map.begin(); it != expected_map.end(); ++index) {\n      auto [i, value] = *it;\n      EXPECT_TRUE(map.contains(i));\n      EXPECT_EQ(map.find(i)->second, value);\n\n      if (index % step == 0) {\n        map.erase(i);\n        it = expected_map.erase(it);\n      } else {\n        ++it;\n      }\n    }\n  }\n\n  EXPECT_TRUE(map.empty());\n}\n\nTEST_F(LinearSearchMapTest, BasicFunctionality) {\n  LinearSearchMap<double, double> map;\n\n  for (double i = 0; i < 100; ++i) {\n    map.insert(i, i * 1.1);\n  }\n\n  EXPECT_EQ(map.size(), 100);\n\n  // Using indexes\n  for (size_t i = 0; i < map.size(); ++i) {\n    auto [key, value] = map[i];\n    EXPECT_EQ(value, key * 1.1);\n  }\n\n  // Get index by key\n  for (double i = 0; i < 100; ++i) {\n    size_t index = map.find_index(i);\n    auto [key, value] = map[index];\n    EXPECT_EQ(value, key * 1.1);\n  }\n\n  // Get value by key\n  for (double i = 0; i < 100; ++i) {\n    EXPECT_EQ(map[i], i * 1.1);\n  }\n\n  // Iterate through the map\n  for (const auto& [key, value] : map) {\n    EXPECT_EQ(value, key * 1.1);\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/listpack_test.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/detail/listpack.h\"\n\n#include <gmock/gmock.h>\n#include <mimalloc.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly {\nnamespace detail {\n\nusing namespace std;\nusing namespace testing;\n\nclass ListPackTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    init_zmalloc_threadlocal(mi_heap_get_backing());\n  }\n\n  void SetUp() override {\n    ptr_ = lpNew(0);\n    lp_ = ListPack(ptr_);\n  }\n\n  void TearDown() override {\n    ptr_ = lp_.GetPointer();\n    lpFree(ptr_);\n    // Ensure there are no memory leaks after every test\n    EXPECT_EQ(zmalloc_used_memory_tl, 0);\n  }\n\n  unsigned Remove(string_view elem, unsigned count, QList::Where where) {\n    return lp_.Remove(CollectionEntry{elem.data(), elem.size()}, count, where);\n  }\n\n  ListPack lp_;\n  uint8_t* ptr_ = nullptr;\n};\n\nTEST_F(ListPackTest, FindNotFound) {\n  lp_.Push(\"first\", QList::TAIL);\n  lp_.Push(\"third\", QList::TAIL);\n\n  EXPECT_EQ(lp_.Find(\"second\"), nullptr);\n}\n\nTEST_F(ListPackTest, RemoveIntegerFromHead) {\n  lp_.Push(\"1\", QList::TAIL);\n  lp_.Push(\"2\", QList::TAIL);\n  lp_.Push(\"1\", QList::TAIL);\n  lp_.Push(\"3\", QList::TAIL);\n\n  // Remove integer value \"1\" from head\n  unsigned removed = Remove(\"1\", 0, QList::HEAD);\n  EXPECT_EQ(2, removed);\n  EXPECT_EQ(2, lp_.Size());\n\n  EXPECT_EQ(\"2\", lp_.At(0));\n  EXPECT_EQ(\"3\", lp_.At(1));\n}\n\nTEST_F(ListPackTest, RemoveFromTailAll) {\n  // List: a, b, a, c, a\n  lp_.Push(\"a\", QList::TAIL);\n  lp_.Push(\"b\", QList::TAIL);\n  lp_.Push(\"a\", QList::TAIL);\n  lp_.Push(\"c\", QList::TAIL);\n  lp_.Push(\"a\", QList::TAIL);\n\n  // Remove all \"a\" from tail direction\n  unsigned removed = Remove(\"a\", 0, QList::TAIL);\n  EXPECT_EQ(3, removed);\n  EXPECT_EQ(2, lp_.Size());\n\n  // Remaining elements: b, c\n  EXPECT_EQ(\"b\", lp_.At(0));\n  EXPECT_EQ(\"c\", lp_.At(1));\n}\n\nTEST_F(ListPackTest, RemoveFromTailWithCount) {\n  // List: a, b, a, c, a\n  lp_.Push(\"a\", QList::TAIL);\n  lp_.Push(\"b\", QList::TAIL);\n  lp_.Push(\"a\", QList::TAIL);\n  lp_.Push(\"c\", QList::TAIL);\n  lp_.Push(\"a\", QList::TAIL);\n\n  // Remove only 2 occurrences of \"a\" from tail (removes indices 4 and 2)\n  unsigned removed = Remove(\"a\", 2, QList::TAIL);\n  EXPECT_EQ(2, removed);\n  EXPECT_EQ(3, lp_.Size());\n\n  // Remaining elements: a, b, c\n  EXPECT_EQ(\"a\", lp_.At(0));\n  EXPECT_EQ(\"b\", lp_.At(1));\n  EXPECT_EQ(\"c\", lp_.At(2));\n}\n\n// Test removing consecutive tail elements - verifies lpLast is called correctly\n// after deleting the tail element to continue finding remaining matches.\nTEST_F(ListPackTest, RemoveFromTailConsecutive) {\n  // List: x, target, target, target - three consecutive at tail\n  lp_.Push(\"x\", QList::TAIL);\n  lp_.Push(\"target\", QList::TAIL);\n  lp_.Push(\"target\", QList::TAIL);\n  lp_.Push(\"target\", QList::TAIL);\n\n  unsigned removed = Remove(\"target\", 0, QList::TAIL);\n  EXPECT_EQ(3, removed);\n  EXPECT_EQ(1, lp_.Size());\n  EXPECT_EQ(\"x\", lp_.At(0));\n}\n\n// Test removing the head element while iterating from TAIL direction.\n// After checking all elements from tail to head and deleting the head,\n// lpDelete returns pointer to element after head, and lpPrev on that returns nullptr,\n// correctly ending iteration.\nTEST_F(ListPackTest, RemoveFromTailDeletesHead) {\n  // List: a, b, c - removing \"a\" (at head) while iterating from tail\n  lp_.Push(\"a\", QList::TAIL);\n  lp_.Push(\"b\", QList::TAIL);\n  lp_.Push(\"c\", QList::TAIL);\n\n  unsigned removed = Remove(\"a\", 0, QList::TAIL);\n  EXPECT_EQ(1, removed);\n  EXPECT_EQ(2, lp_.Size());\n\n  EXPECT_EQ(\"b\", lp_.At(0));\n  EXPECT_EQ(\"c\", lp_.At(1));\n}\n\nTEST_F(ListPackTest, ReplaceAtIndex) {\n  lp_.Push(\"first\", QList::TAIL);\n  lp_.Push(\"second\", QList::TAIL);\n  lp_.Push(\"third\", QList::TAIL);\n\n  // Replace element at index 1\n  uint8_t* pos = lp_.Seek(1);\n  EXPECT_NE(pos, nullptr);\n  lp_.Replace(pos, \"replaced\");\n  EXPECT_EQ(3, lp_.Size());\n\n  EXPECT_EQ(\"first\", lp_.At(0));\n  EXPECT_EQ(\"replaced\", lp_.At(1));\n  EXPECT_EQ(\"third\", lp_.At(2));\n}\n\nTEST_F(ListPackTest, ReplaceAtNegativeIndex) {\n  lp_.Push(\"first\", QList::TAIL);\n  lp_.Push(\"second\", QList::TAIL);\n  lp_.Push(\"third\", QList::TAIL);\n\n  // Replace element at index -1 (last element)\n  uint8_t* pos = lp_.Seek(-1);\n  EXPECT_NE(pos, nullptr);\n  lp_.Replace(pos, \"new_last\");\n  EXPECT_EQ(3, lp_.Size());\n\n  EXPECT_EQ(\"first\", lp_.At(0));\n  EXPECT_EQ(\"second\", lp_.At(1));\n  EXPECT_EQ(\"new_last\", lp_.At(2));\n}\n\nTEST_F(ListPackTest, ReplaceOutOfBounds) {\n  lp_.Push(\"first\", QList::TAIL);\n  lp_.Push(\"second\", QList::TAIL);\n\n  // Replace at out-of-bounds index should return false\n  uint8_t* pos = lp_.Seek(5);\n  EXPECT_EQ(pos, nullptr);\n  pos = lp_.Seek(-5);\n  EXPECT_EQ(pos, nullptr);\n}\n\nTEST_F(ListPackTest, ReplaceWithLargerString) {\n  lp_.Push(\"a\", QList::TAIL);\n  lp_.Push(\"b\", QList::TAIL);\n\n  // Replace with a much larger string\n  string large(500, 'x');\n  uint8_t* pos = lp_.Seek(0);\n  EXPECT_NE(pos, nullptr);\n  lp_.Replace(pos, large);\n  EXPECT_EQ(2, lp_.Size());\n\n  EXPECT_EQ(large, lp_.At(0));\n  EXPECT_EQ(\"b\", lp_.At(1));\n}\n\nTEST_F(ListPackTest, ReplaceWithEmptyString) {\n  lp_.Push(\"first\", QList::TAIL);\n  lp_.Push(\"second\", QList::TAIL);\n\n  // Replace with empty string\n  uint8_t* pos = lp_.Seek(0);\n  EXPECT_NE(pos, nullptr);\n  lp_.Replace(pos, \"\");\n  EXPECT_EQ(2, lp_.Size());\n\n  EXPECT_EQ(\"\", lp_.At(0));\n  EXPECT_EQ(\"second\", lp_.At(1));\n}\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/memory_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n// Disable mimalloc internal debug assertions for accessing internal structures\n#define MI_DEBUG 0\n\n#include <mimalloc.h>\n#include <mimalloc/internal.h>\n#include <mimalloc/types.h>\n\n#include <thread>\n#include <vector>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\n// Stub out internal mimalloc assertions that aren't exported\n// These are used by inline functions in internal.h\n[[noreturn]] void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line,\n                                  const char* func) noexcept {\n  fprintf(stderr, \"mimalloc assertion failed: %s at %s:%u in %s\\n\", assertion, fname, line, func);\n  abort();\n}\n\nnamespace dfly {\n\nclass MiHeapTest : public ::testing::Test {\n protected:\n  MiHeapTest() {\n  }\n};\n\nTEST_F(MiHeapTest, Basic) {\n  mi_heap_t* heap = mi_heap_get_default();\n  void* ptr = mi_heap_malloc_aligned(heap, 1024 /* size*/, 64 /* alignment*/);\n  ASSERT_TRUE(ptr != nullptr);\n\n  EXPECT_EQ(heap->tld->stats.malloc_normal.current, 1024);\n  EXPECT_EQ(heap->tld->stats.malloc_huge.current, 0);\n\n  void* ptr2 = mi_heap_malloc_aligned(heap, 1024 * 1024 /* size*/, 64 /* alignment*/);\n\n  EXPECT_EQ(heap->tld->stats.malloc_normal.current, 1024);\n  EXPECT_GE(heap->tld->stats.malloc_huge.current, 1024 * 1024);\n\n  mi_free(ptr);\n\n  EXPECT_EQ(heap->tld->stats.malloc_normal.current, 0);\n  EXPECT_GE(heap->tld->stats.malloc_huge.current, 1024 * 1024);\n\n  mi_free(ptr2);\n  EXPECT_EQ(heap->tld->stats.malloc_huge.current, 0);\n}\n\nTEST_F(MiHeapTest, Threaded) {\n  mi_heap_t* heap = mi_heap_get_default();\n\n  void* ptr = mi_heap_malloc_aligned(heap, 1024 /* size*/, 64 /* alignment*/);\n  ASSERT_TRUE(ptr != nullptr);\n\n  // adding ptr to heap->thread_delayed_free\n  std::thread t2([ptr]() {\n    mi_free(ptr);\n    // thread local stats are updated.\n    EXPECT_EQ(mi_heap_get_default()->tld->stats.malloc_normal.current, -1024);\n  });\n\n  t2.join();\n  EXPECT_EQ(heap->tld->stats.malloc_normal.current, 1024);\n  EXPECT_EQ(heap->generic_collect_count, 0);\n\n  // Force many mallocs to trigger delayed blocks collection.\n  for (unsigned i = 0; i < 200; ++i) {\n    ptr = mi_malloc(16 * i);\n    mi_free(ptr);\n  }\n\n  // delayed collections was triggered\n  EXPECT_GE(heap->generic_collect_count, 1);\n\n  // mi_malloc does not track malloc back sizes back to the original heap threads.\n  EXPECT_EQ(heap->tld->stats.malloc_normal.current, 1024);\n}\n\n// Verify that xthread_free lists are processed correctly during force collection\n// on full pages.\nTEST_F(MiHeapTest, FullPageThreadFreeInternal) {\n  mi_heap_t* heap = mi_heap_get_default();\n  constexpr size_t block_size = 64;\n  std::vector<void*> allocations;\n\n  // Allocate blocks until page is full\n  void* first_ptr = mi_heap_malloc(heap, block_size);\n  ASSERT_TRUE(first_ptr != nullptr);\n  allocations.push_back(first_ptr);\n\n  mi_page_t* page = _mi_ptr_page(first_ptr);\n  ASSERT_TRUE(page != nullptr);\n\n  while (page->used < page->capacity) {\n    void* ptr = mi_heap_malloc(heap, block_size);\n    ASSERT_TRUE(ptr != nullptr);\n    if (_mi_ptr_page(ptr) == page) {\n      allocations.push_back(ptr);\n    } else {\n      mi_free(ptr);\n      break;\n    }\n  }\n\n  EXPECT_EQ(page->used, page->capacity);\n\n  // Free one block from another thread\n  void* cross_thread_ptr = allocations.back();\n  allocations.pop_back();\n\n  std::thread t([cross_thread_ptr]() { mi_free(cross_thread_ptr); });\n  t.join();\n\n  EXPECT_EQ(page->used, page->capacity);\n  EXPECT_NE(mi_atomic_load_relaxed(&page->xthread_free), 0);\n\n  // Force collection should process xthread_free\n  mi_heap_collect(heap, true);\n\n  EXPECT_LT(page->used, page->capacity);\n  EXPECT_EQ(mi_atomic_load_relaxed(&page->xthread_free), 0);\n\n  // New allocation should reuse the freed block\n  void* new_ptr = mi_heap_malloc(heap, block_size);\n  EXPECT_EQ(_mi_ptr_page(new_ptr), page);\n\n  // Clean up\n  mi_free(new_ptr);\n  for (void* ptr : allocations) {\n    mi_free(ptr);\n  }\n}\n\n// Verify that MI_BIN_FULL pages are cleared during collection.\nTEST_F(MiHeapTest, FullBinQueueCollection) {\n  mi_heap_t* heap = mi_heap_get_default();\n  constexpr size_t block_size = 64;\n\n  auto count_xthread_free = [&heap]() {\n    size_t count = 0;\n    for (size_t i = 0; i <= MI_BIN_FULL; ++i) {\n      for (mi_page_t* page = heap->pages[i].first; page != nullptr; page = page->next) {\n        if (mi_atomic_load_relaxed(&page->xthread_free) != 0) {\n          count++;\n        }\n      }\n    }\n    return count;\n  };\n\n  // Allocate and cross-thread free to populate xthread_free lists\n  std::vector<void*> allocations(2000);\n  for (size_t i = 0; i < allocations.size(); ++i) {\n    allocations[i] = mi_heap_malloc(heap, block_size);\n    ASSERT_TRUE(allocations[i] != nullptr);\n  }\n\n  std::thread t([&allocations]() {\n    for (size_t i = 0; i < allocations.size() / 2; ++i) {\n      mi_free(allocations[i]);\n    }\n  });\n  t.join();\n\n  size_t xthread_before = count_xthread_free();\n  EXPECT_GT(xthread_before, 0);\n\n  mi_heap_collect(heap, true);\n\n  EXPECT_EQ(count_xthread_free(), 0) << \"All xthread_free lists should be cleared\";\n\n  // Clean up\n  for (size_t i = allocations.size() / 2; i < allocations.size(); ++i) {\n    mi_free(allocations[i]);\n  }\n}\n\n// Test that verifies memory accounting and reclamation behavior when allocations are made in\n// one thread and freed in another after the allocating thread exits. This exercises the\n// MI_ABANDON / cross-thread free handling where mimalloc should properly reclaim pages from\n// the abandoned thread heap once collection runs.\n//\n// This test uses the default heap and verifies reclamation by checking its statistics.\nTEST_F(MiHeapTest, AbandonedHeapReclamation) {\n  constexpr size_t block_size = 128;\n  constexpr size_t num_blocks = 2000;\n  std::vector<void*> allocations(num_blocks);\n\n  mi_heap_t* main_heap = mi_heap_get_default();\n\n  // Allocate memory in a separate thread, then exit the thread\n  std::thread allocator_thread([&]() {\n    for (size_t i = 0; i < num_blocks; ++i) {\n      allocations[i] = mi_malloc(block_size);\n      ASSERT_TRUE(allocations[i] != nullptr);\n    }\n  });\n\n  allocator_thread.join();\n\n  // Free all allocations from the main thread (cross-thread free to abandoned heap)\n  for (void* ptr : allocations) {\n    mi_free(ptr);\n  }\n\n  // Force collection to reclaim abandoned segments\n  mi_collect(true);\n\n  // Verify memory and abandoned pages are reclaimed\n  EXPECT_EQ(main_heap->tld->stats.malloc_normal.current, 0);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/mi_memory_resource.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"core/mi_memory_resource.h\"\n\n#include <sys/mman.h>\n\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nvoid* MiMemoryResource::do_allocate(size_t size, size_t align) {\n  DCHECK(align);\n\n  void* res = mi_heap_malloc_aligned(heap_, size, align);\n\n  if (!res)\n    throw bad_alloc{};\n\n  // It seems that mimalloc has a bug with larger allocations that causes\n  // mi_heap_contains_block to lie. See https://github.com/microsoft/mimalloc/issues/587\n  // For now I avoid the check by checking the size. mi_usable_size works though.\n  DCHECK(size > 33554400 || mi_heap_contains_block(heap_, res));\n  size_t delta = mi_usable_size(res);\n\n  used_ += delta;\n  DVLOG(1) << \"do_allocate: \" << heap_ << \" \" << delta;\n\n  return res;\n}\n\nvoid MiMemoryResource::do_deallocate(void* ptr, size_t size, size_t align) {\n  DCHECK(size > 33554400 || mi_heap_contains_block(heap_, ptr));\n\n  size_t usable = mi_usable_size(ptr);\n\n  DVLOG(1) << \"do_deallocate: \" << heap_ << \" \" << usable;\n\n  DCHECK_GE(used_, size);\n  used_ -= usable;\n  mi_free_size_aligned(ptr, size, align);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/mi_memory_resource.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <mimalloc.h>\n\n#include \"base/pmr/memory_resource.h\"\n\nnamespace dfly {\n\n// Per thread memory resource that uses mimalloc.\nclass MiMemoryResource : public PMR_NS::memory_resource {\n public:\n  explicit MiMemoryResource(mi_heap_t* heap) : heap_(heap) {\n  }\n\n  mi_heap_t* heap() {\n    return heap_;\n  }\n\n  size_t used() const {\n    return used_;\n  }\n\n private:\n  void* do_allocate(std::size_t size, std::size_t align) final;\n\n  void do_deallocate(void* ptr, std::size_t size, std::size_t align) final;\n\n  bool do_is_equal(const PMR_NS::memory_resource& o) const noexcept {\n    return this == &o;\n  }\n\n  mi_heap_t* heap_;\n  size_t used_ = 0;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/oah_entry.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/oah_entry.h\"\n\n#include \"base/hash.h\"\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nOAHEntry::OAHEntry(std::string_view key, uint32_t expiry) {\n  uint32_t key_size = key.size();\n\n  uint32_t expiry_size = (expiry != UINT32_MAX) * sizeof(expiry);\n\n  uint32_t key_len_field_size = key_size <= std::numeric_limits<uint8_t>::max() ? 1 : 4;\n\n  auto size = key_len_field_size + key_size + expiry_size;\n\n  auto* expiry_pos = (char*)zmalloc(size);\n  data_ = reinterpret_cast<uint64_t>(expiry_pos);\n  if (expiry_size) {\n    SetExpiryBit(true);\n    std::memcpy(expiry_pos, &expiry, sizeof(expiry));\n  }\n\n  auto* key_size_pos = expiry_pos + expiry_size;\n  if (key_len_field_size == 1) {\n    SetSsoBit();\n    uint8_t sso_key_size = key_size;\n    std::memcpy(key_size_pos, &sso_key_size, key_len_field_size);\n  } else {\n    std::memcpy(key_size_pos, &key_size, key_len_field_size);\n  }\n\n  auto* key_pos = key_size_pos + key_len_field_size;\n  std::memcpy(key_pos, key.data(), key_size);\n}\n\n// returns the expiry time of the current entry or UINT32_MAX if no expiry is set.\nuint32_t OAHEntry::GetExpiry() const {\n  std::uint32_t res = UINT32_MAX;\n  if (HasExpiry()) {\n    assert(!IsVector());\n    std::memcpy(&res, Raw(), sizeof(res));\n  }\n  return res;\n}\n\nbool OAHEntry::CheckNoCollisions(const uint64_t ext_hash) {\n  auto stored_hash = GetHash();\n  return ((stored_hash != ext_hash) & (stored_hash != 0)) | (Empty());\n}\n\nvoid OAHEntry::SetExtHash(uint64_t ext_hash) {\n  assert(data_);\n  assert(!IsVector());\n  data_ = (data_ & ~kExtHashShiftedMask) | (ext_hash << kExtHashShift);\n}\n\nvoid OAHEntry::SetExpiry(uint32_t at_sec) {\n  assert(!IsVector());\n  if (HasExpiry()) {\n    auto* expiry_pos = Raw();\n    std::memcpy(expiry_pos, &at_sec, sizeof(at_sec));\n  } else {\n    *this = OAHEntry(Key(), at_sec);\n  }\n}\n\nvoid OAHEntry::ExpireIfNeeded(uint32_t time_now, uint32_t* set_size, size_t* alloc_used) {\n  assert(!IsVector());\n  if (GetExpiry() <= time_now) {\n    *alloc_used -= AllocSize();\n    Clear();\n    --*set_size;\n  }\n}\n\n// TODO refactor, because it's inefficient\nsize_t OAHEntry::Insert(OAHEntry&& e) {\n  if (Empty()) {\n    *this = std::move(e);\n    return 0;\n  } else if (!IsVector()) {\n    OAHEntry tmp(PtrVector<OAHEntry>::FromLogSize(1));\n    auto& arr = tmp.AsVector();\n    arr[0] = std::move(*this);\n    arr[1] = std::move(e);\n    auto res = arr.AllocSize();\n    *this = std::move(tmp);\n    return res;\n  } else {\n    auto& arr = AsVector();\n    size_t i = 0;\n    for (; i < arr.Size(); ++i) {\n      if (!arr[i]) {\n        arr[i] = std::move(e);\n        return 0;\n      }\n    }\n    size_t prev_alloc_size = arr.AllocSize();\n    auto new_pos = arr.Size();\n    arr.ResizeLog(arr.LogSize() + 1);\n    arr[new_pos] = (std::move(e));\n    return arr.AllocSize() - prev_alloc_size;\n  }\n}\n\nuint32_t OAHEntry::ElementsNum() {\n  if (Empty()) {\n    return 0;\n  } else if (!IsVector()) {\n    return 1;\n  }\n  return AsVector().Size();\n}\n\n// TODO remove, it is inefficient\nOAHEntry& OAHEntry::operator[](uint32_t pos) {\n  assert(!Empty());\n  if (!IsVector()) {\n    assert(pos == 0);\n    return *this;\n  } else {\n    auto& arr = AsVector();\n    assert(pos < arr.Size());\n    return arr[pos];\n  }\n}\n\nOAHEntry OAHEntry::Remove(uint32_t pos) {\n  if (Empty()) {\n    // I'm not sure that this scenario should be check at all\n    assert(pos == 0);\n    return OAHEntry();\n  } else if (!IsVector()) {\n    assert(pos == 0);\n    return std::move(*this);\n  } else {\n    auto& arr = AsVector();\n    assert(pos < arr.Size());\n    return std::move(arr[pos]);\n  }\n}\n\nOAHEntry OAHEntry::Pop() {\n  if (IsVector()) {\n    auto& arr = AsVector();\n    for (auto& e : arr) {\n      if (e)\n        return std::move(e);\n    }\n    return {};\n  }\n  return std::move(*this);\n}\n\nvoid OAHEntry::Clear() {\n  // TODO add optimization to avoid destructor calls during vector allocator\n  if (!data_)\n    return;\n\n  if (IsVector()) {\n    AsVector().~PtrVector<OAHEntry>();\n  } else {\n    zfree(Raw());\n  }\n  data_ = 0;\n}\n\nuint32_t OAHEntry::GetKeySize() const {\n  if (HasSso()) {\n    uint8_t size = 0;\n    std::memcpy(&size, Raw() + GetExpirySize(), sizeof(size));\n    return size;\n  }\n  uint32_t size = 0;\n  std::memcpy(&size, Raw() + GetExpirySize(), sizeof(size));\n  return size;\n}\n\nvoid OAHEntry::SetExpiryBit(bool b) {\n  if (b)\n    data_ |= kExpiryBit;\n  else\n    data_ &= ~kExpiryBit;\n}\n\nsize_t OAHEntry::Size() {\n  size_t key_field_size = HasSso() ? 1 : 4;\n  size_t expiry_field_size = HasExpiry() ? 4 : 0;\n  return expiry_field_size + key_field_size + GetKeySize();\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/oah_entry.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cassert>\n#include <cstring>\n#include <string_view>\n\n#include \"base/hash.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly {\n\n#define PREFETCH_READ(x) __builtin_prefetch(x, 0, 1)\n#define FORCE_INLINE __attribute__((always_inline))\n\n// TODO add allocator support\ntemplate <class T> class PtrVector {\n  static constexpr size_t kVectorBit = 1ULL << 0;          // first 3 bits aren't used by pointer\n  static constexpr size_t kTagMask = (4095ULL << 52) | 7;  // we reserve 12 high bits and 3 low bits\n\n  static constexpr size_t kLogSizeShift = 56;\n  static constexpr size_t kLogSizeMask = 0xFFULL;\n  static constexpr size_t kLogSizeShiftedMask = kLogSizeMask << kLogSizeShift;\n\n public:\n  static PtrVector FromLogSize(uint64_t log_size) {\n    return PtrVector(log_size);\n  }\n\n  T* begin() const {\n    return &Raw()[0];\n  }\n\n  T* end() const {\n    return &Raw()[Size()];\n  }\n\n  PtrVector(PtrVector&& other) {\n    uptr_ = other.uptr_;\n    other.uptr_ = 0;\n  }\n\n  ~PtrVector() {\n    Clear();\n  }\n\n  size_t LogSize() const {\n    return (uptr_ >> kLogSizeShift) & kLogSizeMask;\n  }\n\n  size_t Size() const {\n    return 1 << LogSize();\n  }\n\n  uint64_t Release() {\n    uint64_t res = uptr_;\n    uptr_ = 0;\n    return res;\n  }\n\n  bool Empty() const {\n    if (uptr_ == 0)\n      return true;\n\n    for (auto& el : *this) {\n      if (el)\n        return false;\n    }\n    return true;\n  }\n\n  void ResizeLog(uint64_t new_log_size) {\n    auto new_ptr = reinterpret_cast<T*>(zmalloc(sizeof(T) << new_log_size));\n    size_t new_size = 1 << new_log_size;\n    const size_t size = std::min(Size(), new_size);\n    for (size_t i = 0; i < size; ++i) {\n      new (new_ptr + i) T(std::move(Raw()[i]));\n    }\n    for (size_t i = size; i < new_size; ++i) {\n      new (new_ptr + i) T();\n    }\n    Clear();\n    uptr_ = reinterpret_cast<uint64_t>(new_ptr);\n    SetLogSize(new_log_size);\n  }\n\n  T& operator[](size_t idx) {\n    return Raw()[idx];\n  }\n\n  const T& operator[](size_t idx) const {\n    return Raw()[idx];\n  }\n\n  T* Raw() const {\n    return (T*)(uptr_ & ~kTagMask);\n  }\n\n  size_t AllocSize() const {\n    return Size() * sizeof(T);\n  }\n\n private:\n  void Clear() {\n    const size_t size = Size();\n    T* raw = Raw();\n    if (!raw)\n      return;\n    for (size_t i = 0; i < size; ++i) {\n      if (raw[i])\n        raw[i].~T();\n    }\n\n    zfree(Raw());\n    uptr_ = 0;\n  }\n  // because of log_size I prefer to hide it\n  PtrVector(uint64_t log_size) {\n    assert(log_size <= 32);\n    uptr_ = reinterpret_cast<uint64_t>(zmalloc(sizeof(T) << log_size));\n    const uint64_t size = 1 << log_size;\n    for (uint64_t i = 0; i < size; ++i) {\n      new (reinterpret_cast<T*>(uptr_) + i) T();\n    }\n    SetLogSize(log_size);\n  }\n\n  void SetLogSize(uint64_t log_size) {\n    uptr_ = (uptr_ & ~kLogSizeShiftedMask) | kVectorBit | (uint64_t(log_size) << kLogSizeShift);\n  }\n\n  uint64_t uptr_ = 0;\n};\n\n// doesn't possess memory, it should be created and release manually\nclass OAHEntry {\n public:\n  // we can assume that high 12 bits of user address space\n  // can be used for tagging. At most 52 bits of address are reserved for\n  // some configurations, and usually it's 48 bits.\n  // https://docs.kernel.org/arch/arm64/memory.html\n  // first 3 bits aren't used by pointer\n  static constexpr size_t kVectorBit = 1ULL << 0;\n  static constexpr size_t kExpiryBit = 1ULL << 1;\n  // if bit is set the string length field is 1 byte instead of 4\n  static constexpr size_t kSsoBit = 1ULL << 2;\n\n  // extended hash allows us to reduce keys comparisons\n  static constexpr size_t kExtHashShift = 52;\n  static constexpr uint32_t kExtHashSize = 12;\n  static constexpr size_t kExtHashMask = 0xFFFULL;\n  static constexpr size_t kExtHashShiftedMask = kExtHashMask << kExtHashShift;\n\n  static constexpr size_t kTagMask = (4095ULL << 52) | 7;  // we reserve 12 high bits and 3 low.\n\n  OAHEntry() = default;\n\n  OAHEntry(std::string_view key, uint32_t expiry = UINT32_MAX);\n\n  // TODO add initializer list constructor\n  OAHEntry(PtrVector<OAHEntry>&& vec) {\n    data_ = vec.Release() | kVectorBit;\n  }\n\n  OAHEntry(const OAHEntry& e) = delete;\n  OAHEntry(OAHEntry&& e) {\n    data_ = e.data_;\n    e.data_ = 0;\n  }\n\n  // consider manual removing, we waste a lot of time to check nullptr\n  ~OAHEntry() {\n    Clear();\n  }\n\n  OAHEntry& operator=(const OAHEntry& e) = delete;\n  OAHEntry& operator=(OAHEntry&& e) {\n    std::swap(data_, e.data_);\n    return *this;\n  }\n\n  bool Empty() const {\n    return data_ == 0;\n  }\n\n  operator bool() const {\n    return !Empty();\n  }\n\n  bool IsVector() const {\n    return (data_ & kVectorBit) != 0;\n  }\n\n  bool IsEntry() const {\n    return (data_ != 0) & !(data_ & kVectorBit);\n  }\n\n  size_t AllocSize() const {\n    return zmalloc_usable_size(Raw());\n  }\n\n  PtrVector<OAHEntry>& AsVector() {\n    static_assert(sizeof(PtrVector<OAHEntry>) == sizeof(uint64_t));\n    return *reinterpret_cast<PtrVector<OAHEntry>*>(&data_);\n  }\n\n  std::string_view Key() const {\n    assert(!IsVector());\n    return {GetKeyData(), GetKeySize()};\n  }\n\n  bool HasExpiry() const {\n    return (data_ & kExpiryBit) != 0;\n  }\n\n  // returns the expiry time of the current entry or UINT32_MAX if no expiry is set.\n  uint32_t GetExpiry() const;\n\n  // TODO consider another option to implement iterator\n  OAHEntry* operator->() {\n    return this;\n  }\n\n  uint64_t GetHash() const {\n    return (data_ & kExtHashShiftedMask) >> kExtHashShift;\n  }\n\n  bool CheckNoCollisions(const uint64_t ext_hash);\n\n  void SetExtHash(uint64_t ext_hash);\n\n  void ClearHash() {\n    data_ &= ~kExtHashShiftedMask;\n  }\n\n  void SetExpiry(uint32_t at_sec);\n\n  void ExpireIfNeeded(uint32_t time_now, uint32_t* set_size, size_t* alloc_used);\n\n  // TODO refactor, because it's inefficient\n  // Returns additional allocation size of ptrVector\n  [[nodiscard]] size_t Insert(OAHEntry&& e);\n\n  uint32_t ElementsNum();\n\n  // TODO remove, it is inefficient\n  OAHEntry& operator[](uint32_t pos);\n\n  OAHEntry Remove(uint32_t pos);\n\n  OAHEntry Pop();\n\n  char* Raw() const {\n    return (char*)(data_ & ~kTagMask);\n  }\n\n protected:\n  void Clear();\n\n  const char* GetKeyData() const {\n    uint32_t key_field_size = HasSso() ? 1 : 4;\n    return Raw() + GetExpirySize() + key_field_size;\n  }\n\n  uint32_t GetKeySize() const;\n\n  void SetExpiryBit(bool b);\n\n  void SetVectorBit() {\n    data_ |= kVectorBit;\n  }\n\n  void SetSsoBit() {\n    data_ |= kSsoBit;\n  }\n\n  bool HasSso() const {\n    return (data_ & kSsoBit) != 0;\n  }\n\n  size_t Size();\n\n  std::uint32_t GetExpirySize() const {\n    return HasExpiry() ? sizeof(std::uint32_t) : 0;\n  }\n\n  // memory daya layout [Expiry, key_size, key]\n  uint64_t data_ = 0;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/oah_set.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/numeric/bits.h>\n#include <absl/types/span.h>\n\n#include <vector>\n\n#include \"core/detail/stateless_allocator.h\"\n#include \"oah_entry.h\"\n\nnamespace dfly {\n\n// TODO add template parameter instead of OAHEntry\nclass OAHSet {  // Open Addressing Hash Set\n  using OAHEntryAllocator = StatelessAllocator<OAHEntry>;\n  using Buckets = std::vector<OAHEntry, OAHEntryAllocator>;\n\n public:\n  class iterator {\n   public:\n    using iterator_category = std::forward_iterator_tag;\n    using difference_type = std::ptrdiff_t;\n    using value_type = OAHEntry;\n    using pointer = OAHEntry*;\n    using reference = OAHEntry&;\n\n    iterator(OAHSet* owner, uint32_t bucket_id, uint32_t pos_in_bucket)\n        : owner_(owner), bucket_(bucket_id), pos_(pos_in_bucket) {\n    }\n\n    void SetExpiryTime(uint32_t ttl_sec) {\n      auto& entry = owner_->entries_[bucket_][pos_];\n      owner_->obj_alloc_used_ -= entry.AllocSize();\n      owner_->entries_[bucket_][pos_].SetExpiry(owner_->EntryTTL(ttl_sec));\n      owner_->obj_alloc_used_ += entry.AllocSize();\n    }\n\n    iterator& operator++() {\n      ++pos_;\n      SetEntryIt();\n      return *this;\n    }\n\n    bool operator==(const iterator& r) const {\n      if (owner_ == nullptr || r.owner_ == nullptr) {\n        return owner_ == r.owner_;\n      }\n      assert(owner_ == r.owner_);\n      return bucket_ == r.bucket_ && pos_ == r.pos_;\n    }\n\n    bool operator!=(const iterator& r) const {\n      return !operator==(r);\n    }\n\n    reference operator*() {\n      return owner_->entries_[bucket_][pos_];\n    }\n\n    reference operator->() {\n      return owner_->entries_[bucket_][pos_];\n    }\n\n    bool HasExpiry() {\n      return owner_->entries_[bucket_][pos_].HasExpiry();\n    }\n\n    uint32_t ExpiryTime() {\n      return owner_->entries_[bucket_][pos_].GetExpiry();\n    }\n\n    uint32_t bucket_id() const {\n      return bucket_;\n    }\n\n    operator bool() const {\n      return owner_;\n    }\n\n    // find valid entry_ iterator starting from buckets_it_ and set it\n    void SetEntryIt() {\n      if (!owner_)\n        return;\n      for (auto num_entries = owner_->entries_.size(); bucket_ < num_entries; ++bucket_) {\n        auto& bucket = owner_->entries_[bucket_];\n        for (uint32_t bucket_size = bucket.ElementsNum(); pos_ < bucket_size; ++pos_) {\n          if (bucket[pos_])\n            return;\n        }\n        pos_ = 0;\n      }\n      owner_ = nullptr;\n    }\n\n   private:\n    OAHSet* owner_ = nullptr;\n    uint32_t bucket_ = 0;\n    uint32_t pos_ = 0;\n  };\n\n  iterator begin() {\n    iterator res(this, 0, 0);\n    res.SetEntryIt();\n    return res;\n  }\n\n  iterator end() {\n    return iterator(nullptr, 0, 0);\n  }\n\n  explicit OAHSet() = default;\n\n  bool Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) {\n    uint64_t hash = Hash(str);\n    auto bucket_id = BucketId(hash, capacity_log_);\n    PREFETCH_READ(entries_.data() + bucket_id);\n    PREFETCH_READ(entries_.data() + bucket_id + 8);\n\n    if (size_ >= entries_.size()) {\n      Reserve(BucketCount() * 2);\n      bucket_id = BucketId(hash, capacity_log_);\n    }\n\n    uint32_t at = EntryTTL(ttl_sec);\n    // TODO maybe we should split memory allocation and copying for the case when we can't add it\n    // into set\n    OAHEntry entry(str, at);\n    SetEntryHash(entry, hash);\n\n    if (FastCheck(bucket_id, str, hash)) {\n      return false;\n    }\n\n    obj_alloc_used_ += entry.AllocSize();\n    AddUnique(std::move(entry), bucket_id, ttl_sec);\n    return true;\n  }\n\n  void Reserve(size_t sz) {\n    sz = absl::bit_ceil(sz);\n    if (sz > entries_.size()) {\n      auto prev_capacity_log = capacity_log_;\n      capacity_log_ = std::max(kMinCapacityLog, uint32_t(absl::bit_width(sz) - 1));\n      size_t prev_size = entries_.size();\n      entries_.resize(Capacity());\n      Rehash(prev_capacity_log, prev_size);\n    }\n    assert(entries_.size() >= kDisplacementSize);\n  }\n\n  // Shrinks the table to the specified size. The new_size must be a power of 2,\n  // >= kMinCapacity (which is 1 << kMinCapacityLog), and >= current number of elements.\n  // This method should be called explicitly when memory reclamation is needed.\n  void Shrink(size_t new_size) {\n    assert(absl::has_single_bit(new_size));\n    assert(new_size >= (1u << kMinCapacityLog));\n    assert(new_size < entries_.size());\n\n    size_t prev_size = entries_.size();\n    capacity_log_ = absl::bit_width(new_size) - 1;\n\n    // Process from low to high (opposite of Grow/Rehash).\n    for (size_t i = 0; i < prev_size; ++i) {\n      ShrinkBucket(i);\n    }\n\n    entries_.resize(Capacity());\n    entries_.shrink_to_fit();\n  }\n\n  void Clear() {\n    capacity_log_ = 0;\n    entries_.resize(0);\n    size_ = 0;\n    obj_alloc_used_ = 0;\n    ptr_vectors_alloc_used_ = 0;\n  }\n\n  // TODO should be removed, inefficient\n  void AddUnique(OAHEntry&& e, uint32_t bid, uint32_t ttl_sec = UINT32_MAX) {\n    ++size_;\n    assert(Capacity() >= kDisplacementSize);\n    for (uint32_t i = 0; i < kDisplacementSize; i++) {\n      const uint32_t bucket_id = bid + i;\n      if (entries_[bucket_id].Empty()) {\n        entries_[bucket_id] = std::move(e);\n        return;\n      }\n\n      // TODO add expiration logic\n    }\n\n    bid = GetExtensionPoint(bid);\n    assert(bid < entries_.size());\n\n    ptr_vectors_alloc_used_ += entries_[bid].Insert(std::move(e));\n  }\n\n  unsigned AddMany(absl::Span<std::string_view> span, uint32_t ttl_sec = UINT32_MAX) {\n    Reserve(span.size());\n    unsigned res = 0;\n    for (auto& s : span) {\n      if (Add(s, ttl_sec) != end()) {\n        res++;\n      }\n    }\n    return res;\n  }\n\n  // TODO: Consider using chunks for this as in StringSet\n  void Fill(OAHSet* other) {\n    assert(other->entries_.empty());\n    other->Reserve(UpperBoundSize());\n    other->set_time(time_now());\n    for (auto it = begin(), it_end = end(); it != it_end; ++it) {\n      other->Add(it->Key(), it.HasExpiry() ? it.ExpiryTime() - time_now() : UINT32_MAX);\n    }\n  }\n\n  /**\n   * stable scanning api. has the same guarantees as redis scan command.\n   * we avoid doing bit-reverse by using a different function to derive a bucket id\n   * from hash values. By using msb part of hash we make it \"stable\" with respect to\n   * rehashes. For example, with table log size 4 (size 16), entries in bucket id\n   * 1110 come from hashes 1110XXXXX.... When a table grows to log size 5,\n   * these entries can move either to 11100 or 11101. So if we traversed with our cursor\n   * range [0000-1110], it's guaranteed that in grown table we do not need to cover again\n   * [00000-11100]. Similarly with shrinkage, if a table is shrunk to log size 3,\n   * keys from 1110 and 1111 will move to bucket 111. Again, it's guaranteed that we\n   * covered the range [000-111] (all keys in that case).\n   * Returns: next cursor or 0 if reached the end of scan.\n   * cursor = 0 - initiates a new scan.\n   */\n\n  using ItemCb = std::function<void(std::string_view)>;\n\n  uint32_t Scan(uint32_t cursor, const ItemCb& cb) {\n    if (entries_.empty())\n      return 0;\n\n    uint32_t bucket_id = cursor >> (32 - capacity_log_);\n\n    // First find the bucket to scan, skip empty buckets.\n    for (; bucket_id < BucketCount(); ++bucket_id) {\n      bool res = false;\n      for (uint32_t i = 0; i < kDisplacementSize; i++) {\n        const uint32_t shifted_bid = bucket_id + i;\n        res |= ScanBucket(entries_[shifted_bid], cb, bucket_id);\n      }\n      if (res)\n        break;\n    }\n\n    if (++bucket_id >= BucketCount()) {\n      return 0;\n    }\n\n    return bucket_id << (32 - capacity_log_);\n  }\n\n  OAHEntry Pop() {\n    for (auto& bucket : entries_) {\n      if (auto res = bucket.Pop(); !res.Empty()) {\n        assert(!res.IsVector());\n        --size_;\n        obj_alloc_used_ -= res.AllocSize();\n        if (bucket.IsVector()) {\n          if (bucket.AsVector().Empty()) {\n            ptr_vectors_alloc_used_ -= bucket.AsVector().AllocSize();\n            bucket = OAHEntry();\n          }\n        }\n        return res;\n      }\n    }\n    return {};\n  }\n\n  bool Erase(std::string_view str) {\n    if (entries_.empty())\n      return false;\n\n    uint64_t hash = Hash(str);\n    auto bucket_id = BucketId(hash, capacity_log_);\n    auto item = FindInternal(bucket_id, str, hash);\n    if (item != end()) {\n      --size_;\n      obj_alloc_used_ -= item->AllocSize();\n      *item = OAHEntry();\n      uint32_t erase_bucket = item.bucket_id();\n      if (entries_[erase_bucket].IsVector()) {\n        if (entries_[erase_bucket].AsVector().Empty()) {\n          ptr_vectors_alloc_used_ -= entries_[erase_bucket].AsVector().AllocSize();\n          entries_[erase_bucket] = OAHEntry();\n        }\n      }\n      return true;\n    }\n    return false;\n  }\n\n  iterator Find(std::string_view member) {\n    if (entries_.empty())\n      return end();\n\n    uint64_t hash = Hash(member);\n    auto bucket_id = BucketId(hash, capacity_log_);\n\n    const auto ext_hash = CalcExtHash(hash, capacity_log_);\n\n    // fast check\n    for (uint32_t i = 0; i < kDisplacementSize; i++) {\n      const uint32_t bid = bucket_id + i;\n      if ((entries_[bid].GetHash() == ext_hash) && entries_[bid].IsEntry()) {\n        if (entries_[bid].Key() == member) {\n          entries_[bid].ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_);\n          return !entries_[bid].Empty() ? iterator{this, bid, 0} : end();\n        }\n      }\n    }\n\n    auto res = FindInternal(bucket_id, member, hash);\n    return res;\n  }\n\n  bool Contains(std::string_view member) {\n    return Find(member) != end();\n  }\n\n  // Returns the number of elements in the map. Note that it might be that some of these elements\n  // have expired and can't be accessed.\n  size_t UpperBoundSize() const {\n    return size_;\n  }\n\n  bool Empty() const {\n    return size_ == 0;\n  }\n\n  std::uint32_t BucketCount() const {\n    return entries_.empty() ? 0 : (1 << capacity_log_);\n  }\n\n  std::uint32_t Capacity() const {\n    return (1 << capacity_log_) + kDisplacementSize - 1;\n  }\n\n  // set an abstract time that allows expiry.\n  void set_time(uint32_t val) {\n    time_now_ = val;\n  }\n\n  uint32_t time_now() const {\n    return time_now_;\n  }\n\n  size_t ObjAllocUsed() const {\n    return obj_alloc_used_;\n  }\n\n  size_t SetAllocUsed() const {\n    return entries_.capacity() * sizeof(OAHEntry) + ptr_vectors_alloc_used_;\n  }\n\n  bool ExpirationUsed() const {\n    // TODO\n    assert(false);\n    return true;\n  }\n\n  size_t SizeSlow() {\n    // TODO\n    assert(false);\n    // CollectExpired();\n    return size_;\n  }\n\n private:\n  static uint64_t Hash(std::string_view str) {\n    constexpr XXH64_hash_t kHashSeed = 24061983;\n    return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed);\n  }\n\n  static uint32_t BucketId(uint64_t hash, uint32_t capacity_log) {\n    return hash >> (64 - capacity_log);\n  }\n  // was Grow in StringSet\n  void Rehash(uint32_t prev_capacity_log, uint32_t prev_size) {\n    if (prev_size == 0) {\n      return;\n    }\n    // we should prevent moving elements before current possition to avoid double processing\n    constexpr size_t mix_size = (2 << kShiftLog) - 1;\n    std::array<OAHEntry, mix_size> old_buckets{};\n    for (size_t i = 0; i < mix_size; ++i) {\n      old_buckets[i] = std::move(entries_[i]);\n    }\n\n    for (size_t bucket_id = prev_size - 1; bucket_id >= mix_size; --bucket_id) {\n      auto bucket = std::move(entries_[bucket_id]);\n      for (uint32_t pos = 0, size = bucket.ElementsNum(); pos < size; ++pos) {\n        if (bucket[pos]) {\n          auto new_bucket_id = RehashEntry(bucket[pos], bucket_id, prev_capacity_log);\n          new_bucket_id = FindEmptyAround(new_bucket_id);\n          ptr_vectors_alloc_used_ += entries_[new_bucket_id].Insert(std::move(bucket[pos]));\n        }\n      }\n      if (bucket.IsVector())\n        ptr_vectors_alloc_used_ -= bucket.AsVector().AllocSize();\n    }\n\n    for (size_t bucket_id = 0; bucket_id < mix_size; ++bucket_id) {\n      auto& bucket = old_buckets[bucket_id];\n      for (uint32_t pos = 0, size = bucket.ElementsNum(); pos < size; ++pos) {\n        if (bucket[pos]) {\n          auto new_bucket_id = RehashEntry(bucket[pos], bucket_id, prev_capacity_log);\n          new_bucket_id = FindEmptyAround(new_bucket_id);\n          ptr_vectors_alloc_used_ += entries_[new_bucket_id].Insert(std::move(bucket[pos]));\n        }\n      }\n      if (bucket.IsVector())\n        ptr_vectors_alloc_used_ -= bucket.AsVector().AllocSize();\n    }\n  }\n\n  // it is inefficient for now,\n  // TODO predict new position by current position and extended hash\n  void ShrinkBucket(uint32_t bucket_id) {\n    auto bucket = std::move(entries_[bucket_id]);\n    if (bucket.Empty())\n      return;\n\n    for (uint32_t pos = 0, size = bucket.ElementsNum(); pos < size; ++pos) {\n      if (bucket[pos]) {\n        // Check for TTL expiration during shrink - skip expired elements\n        if (bucket[pos].HasExpiry() && bucket[pos].GetExpiry() <= time_now_) {\n          obj_alloc_used_ -= bucket[pos].AllocSize();\n          --size_;\n          continue;\n        }\n\n        auto hash = Hash(bucket[pos].Key());\n        auto new_bucket_id = BucketId(hash, capacity_log_);\n        SetEntryHash(bucket[pos], hash);\n        new_bucket_id = FindEmptyAround(new_bucket_id);\n        ptr_vectors_alloc_used_ += entries_[new_bucket_id].Insert(std::move(bucket[pos]));\n      }\n    }\n\n    if (bucket.IsVector()) {\n      ptr_vectors_alloc_used_ -= bucket.AsVector().AllocSize();\n    }\n  }\n\n  uint32_t GetExtensionPoint(const uint32_t bid) const {\n    constexpr uint32_t extension_point_shift = kDisplacementSize - 1;\n    return bid | extension_point_shift;\n  }\n\n  bool FastCheck(const uint32_t bid, std::string_view str, uint64_t hash) {\n    const auto ext_hash = CalcExtHash(hash, capacity_log_);\n    const auto ext_bid = GetExtensionPoint(bid);\n\n    bool res = true;\n    for (uint32_t i = 0; i < kDisplacementSize; i++) {\n      const uint32_t bucket_id = bid + i;\n      res &= entries_[bucket_id].CheckNoCollisions(ext_hash);\n    }\n\n    if (res) {\n      if (entries_[ext_bid].IsVector()) {\n        auto& vec = entries_[ext_bid].AsVector();\n        auto raw_arr = vec.Raw();\n        for (size_t i = 0, size = vec.Size(); i < size; ++i) {\n          res &= raw_arr[i].CheckNoCollisions(ext_hash);\n        }\n      }\n      if (!res) {\n        auto pos = FindInBucket(entries_[ext_bid], str, ext_hash);\n        if (pos) {\n          return true;\n        }\n      }\n    } else {\n      return FindInternal(bid, str, hash);\n    }\n    return false;\n  }\n\n  template <class T, std::enable_if_t<std::is_invocable_v<T, std::string_view>>* = nullptr>\n  bool ScanBucket(OAHEntry& entry, const T& cb, uint32_t bucket_id) {\n    if (!entry.IsVector()) {\n      entry.ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_);\n      if (CheckBucketAffiliation(entry, bucket_id)) {\n        cb(entry.Key());\n        return true;\n      }\n    } else {\n      auto& arr = entry.AsVector();\n      bool result = false;\n      for (auto& el : arr) {\n        el.ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_);\n        if (CheckBucketAffiliation(el, bucket_id)) {\n          cb(el.Key());\n          result = true;\n        }\n      }\n      return result;\n    }\n    return false;\n  }\n\n  uint32_t EntryTTL(uint32_t ttl_sec) const {\n    return ttl_sec == UINT32_MAX ? ttl_sec : time_now_ + ttl_sec;\n  }\n\n  uint32_t FindEmptyAround(uint32_t bid) {\n    for (uint32_t i = 0; i < kDisplacementSize; i++) {\n      const uint32_t bucket_id = bid + i;\n      if (entries_[bucket_id].Empty())\n        return bucket_id;\n      // TODO add expiration logic\n    }\n\n    bid = GetExtensionPoint(bid);\n    assert(bid < entries_.size());\n    return bid;\n  }\n\n  // Searches for a string within a bucket entry (which may be a single entry or a vector).\n  // Returns the position within the bucket if found, or std::nullopt if not found.\n  std::optional<uint32_t> FindInBucket(OAHEntry& bucket, std::string_view str, uint64_t ext_hash) {\n    if (bucket.IsEntry()) {\n      bucket.ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_);\n      return CheckExtendedHash(bucket, ext_hash) && bucket.Key() == str ? 0\n                                                                        : std::optional<uint32_t>();\n    }\n    if (bucket.IsVector()) {\n      auto& vec = bucket.AsVector();\n      auto raw_arr = vec.Raw();\n      for (size_t i = 0, size = vec.Size(); i < size; ++i) {\n        raw_arr[i].ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_);\n        if (CheckExtendedHash(raw_arr[i], ext_hash) && raw_arr[i].Key() == str) {\n          return i;\n        }\n      }\n    }\n    return std::nullopt;\n  }\n\n  // return bucket_id and position otherwise max\n  iterator FindInternal(uint32_t bid, std::string_view str, uint64_t hash) {\n    const auto ext_hash = CalcExtHash(hash, capacity_log_);\n    for (uint32_t i = 0; i < kDisplacementSize; i++) {\n      const uint32_t bucket_id = bid + i;\n      auto pos = FindInBucket(entries_[bucket_id], str, ext_hash);\n      if (pos) {\n        return iterator{this, bucket_id, *pos};\n      }\n    }\n    return end();\n  }\n\n private:\n  static constexpr std::uint32_t kShiftLog = 2;                         // TODO make template\n  static constexpr std::uint32_t kMinCapacityLog = kShiftLog;           // should be >= ShiftLog\n  static constexpr std::uint32_t kDisplacementSize = (1 << kShiftLog);  // TODO check\n\n  static uint64_t CalcExtHash(uint64_t hash, uint32_t capacity_log) {\n    const uint32_t start_hash_bit = capacity_log > kShiftLog ? capacity_log - kShiftLog : 0;\n    const uint32_t ext_hash_shift = 64 - start_hash_bit - OAHEntry::kExtHashSize;\n    return (hash >> ext_hash_shift) & OAHEntry::kExtHashMask;\n  }\n\n  uint64_t SetEntryHash(OAHEntry& entry, uint64_t hash) {\n    uint64_t ext_hash = CalcExtHash(hash, capacity_log_);\n    entry.SetExtHash(ext_hash);\n    return ext_hash;\n  }\n\n  bool CheckBucketAffiliation(OAHEntry& entry, uint32_t bucket_id) {\n    assert(!entry.IsVector());\n    if (entry.Empty())\n      return false;\n    uint32_t bucket_id_hash_part = capacity_log_ > kShiftLog ? kShiftLog : capacity_log_;\n    uint32_t bucket_mask = (1 << bucket_id_hash_part) - 1;\n    bucket_id &= bucket_mask;\n    auto stored_hash = entry.GetHash();\n    if (!stored_hash) {\n      stored_hash = SetEntryHash(entry, Hash(entry.Key()));\n    }\n    uint32_t stored_bucket_id = stored_hash >> (OAHEntry::kExtHashSize - bucket_id_hash_part);\n    return bucket_id == stored_bucket_id;\n  }\n\n  bool CheckExtendedHash(OAHEntry& entry, uint64_t ext_hash) {\n    auto stored_hash = entry.GetHash();\n    if (!stored_hash) {\n      if (entry.IsEntry()) {\n        stored_hash = SetEntryHash(entry, Hash(entry.Key()));\n      } else {\n        return false;\n      }\n    }\n    return stored_hash == ext_hash;\n  }\n\n  // return new bucket_id\n  uint32_t RehashEntry(OAHEntry& entry, uint32_t current_bucket_id, uint32_t prev_capacity_log) {\n    assert(!entry.IsVector());\n    auto stored_hash = entry.GetHash();\n\n    const uint32_t logs_diff = capacity_log_ - prev_capacity_log;\n    const uint32_t prev_significant_bits =\n        prev_capacity_log > kShiftLog ? kShiftLog : prev_capacity_log;\n    const uint32_t needed_hash_bits = prev_significant_bits + logs_diff;\n\n    if (!stored_hash || needed_hash_bits > OAHEntry::kExtHashSize) {\n      auto hash = Hash(entry.Key());\n      SetEntryHash(entry, hash);\n      return BucketId(hash, capacity_log_);\n    }\n\n    const uint32_t real_bucket_end =\n        stored_hash >> (OAHEntry::kExtHashSize - prev_significant_bits);\n    const uint32_t prev_shift_mask = (1 << prev_significant_bits) - 1;\n    const uint32_t curr_shift = (current_bucket_id - real_bucket_end) & prev_shift_mask;\n    const uint32_t prev_bucket_mask = (1 << prev_capacity_log) - 1;\n    const uint32_t base_bucket_id = (current_bucket_id - curr_shift) & prev_bucket_mask;\n\n    const uint32_t last_bits_mask = (1 << logs_diff) - 1;\n    const uint32_t stored_hash_shift = OAHEntry::kExtHashSize - needed_hash_bits;\n    const uint32_t last_bits = (stored_hash >> stored_hash_shift) & last_bits_mask;\n    const uint32_t new_bucket_id = (base_bucket_id << logs_diff) | last_bits;\n\n    entry.ClearHash();  // the cache is invalid after rehash operation\n\n    assert(BucketId(Hash(entry.Key()), capacity_log_) == new_bucket_id);\n\n    return new_bucket_id;\n  }\n\n  mutable size_t obj_alloc_used_ = 0;\n  mutable size_t ptr_vectors_alloc_used_ = 0;\n\n  std::uint32_t capacity_log_ = 0;\n  std::uint32_t size_ = 0;  // number of elements in the set.\n  std::uint32_t time_now_ = 0;\n  Buckets entries_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/oah_set_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/oah_set.h\"\n\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <mimalloc.h>\n\n#include <random>\n#include <set>\n#include <unordered_set>\n\n#include \"base/gtest.h\"\n#include \"core/mi_memory_resource.h\"\n#include \"glog/logging.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly {\n\nusing namespace std;\n\nclass OAHSetTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    auto* tlh = mi_heap_get_backing();\n    init_zmalloc_threadlocal(tlh);\n    InitTLStatelessAllocMR(PMR_NS::get_default_resource());\n  }\n\n  static void TearDownTestSuite() {\n  }\n\n  void SetUp() override {\n    ss_ = new OAHSet;\n    generator_.seed(0);\n  }\n\n  void TearDown() override {\n    delete ss_;\n\n    // ensure there are no memory leaks after every test\n    EXPECT_EQ(zmalloc_used_memory_tl, 0);\n  }\n\n  OAHSet* ss_;\n  mt19937 generator_;\n};\n\nstatic string random_string(mt19937& rand, unsigned len) {\n  const string_view alpanum = \"1234567890abcdefghijklmnopqrstuvwxyz\";\n  string ret;\n  ret.reserve(len);\n\n  for (size_t i = 0; i < len; ++i) {\n    ret += alpanum[rand() % alpanum.size()];\n  }\n\n  return ret;\n}\n\nTEST_F(OAHSetTest, PtrVectorTest) {\n  PtrVector<int> vp(PtrVector<int>::FromLogSize(3));\n  EXPECT_EQ(vp.Size(), 8);\n  EXPECT_EQ(vp.LogSize(), 3);\n  size_t i = 0;\n  for (; i < vp.Size(); ++i) {\n    EXPECT_EQ(vp[i], 0);\n    vp[i] = i + 1;\n  }\n  vp.ResizeLog(4);\n\n  for (; i < vp.Size(); ++i) {\n    EXPECT_EQ(vp[i], 0);\n    vp[i] = i + 1;\n  }\n  EXPECT_EQ(vp.Size(), 16);\n  EXPECT_EQ(vp.LogSize(), 4);\n  for (size_t i = 0; i < vp.Size(); ++i) {\n    EXPECT_EQ(vp[i], i + 1);\n  }\n}\n\nTEST_F(OAHSetTest, OAHEntryTest) {\n  OAHEntry test(\"0123456789\", 2);\n\n  EXPECT_EQ(test.Key(), \"0123456789\"sv);\n  EXPECT_EQ(test.GetExpiry(), 2);\n\n  OAHEntry first(\"123456789\");\n\n  EXPECT_EQ(test.Insert(std::move(first)), 16);\n\n  EXPECT_EQ(test.Insert(OAHEntry(\"23456789\")), 16);\n\n  EXPECT_TRUE(test.Remove(0));\n  EXPECT_FALSE(test.Remove(0));\n\n  EXPECT_EQ(test.Remove(2).Key(), \"23456789\");\n  EXPECT_EQ(test.Pop().Key(), \"123456789\");\n}\n\nTEST_F(OAHSetTest, OAHSetAddFindTest) {\n  OAHSet ss;\n  std::set<std::string> test_set;\n\n  for (int i = 0; i < 10000; ++i) {\n    test_set.insert(base::RandStr(20));\n  }\n\n  for (const auto& s : test_set) {\n    EXPECT_TRUE(ss.Add(s));\n  }\n\n  for (const auto& s : test_set) {\n    auto e = ss.Find(s);\n    EXPECT_EQ(e->Key(), s);\n  }\n\n  EXPECT_EQ(ss.BucketCount(), 16384);\n}\n\nTEST_F(OAHSetTest, Basic) {\n  EXPECT_TRUE(ss_->Add(\"foo\"sv));\n  EXPECT_TRUE(ss_->Add(\"bar\"sv));\n  uint32_t size = ss_->UpperBoundSize();\n  EXPECT_FALSE(ss_->Add(\"foo\"sv));\n  EXPECT_FALSE(ss_->Add(\"bar\"sv));\n  EXPECT_EQ(ss_->UpperBoundSize(), size);\n  EXPECT_TRUE(ss_->Contains(\"foo\"sv));\n  EXPECT_TRUE(ss_->Contains(\"bar\"sv));\n  EXPECT_EQ(2, ss_->UpperBoundSize());\n}\n\nTEST_F(OAHSetTest, StandardAddErase) {\n  EXPECT_TRUE(ss_->Add(\"@@@@@@@@@@@@@@@@\") != ss_->end());\n  EXPECT_TRUE(ss_->Add(\"A@@@@@@@@@@@@@@@\") != ss_->end());\n  EXPECT_TRUE(ss_->Add(\"AA@@@@@@@@@@@@@@\") != ss_->end());\n  EXPECT_TRUE(ss_->Add(\"AAA@@@@@@@@@@@@@\") != ss_->end());\n  EXPECT_TRUE(ss_->Add(\"AAAAAAAAA@@@@@@@\") != ss_->end());\n  EXPECT_TRUE(ss_->Add(\"AAAAAAAAAA@@@@@@\") != ss_->end());\n  EXPECT_TRUE(ss_->Add(\"AAAAAAAAAAAAAAA@\") != ss_->end());\n  EXPECT_TRUE(ss_->Add(\"AAAAAAAAAAAAAAAA\") != ss_->end());\n  EXPECT_TRUE(ss_->Add(\"AAAAAAAAAAAAAAAD\") != ss_->end());\n  EXPECT_TRUE(ss_->Add(\"BBBBBAAAAAAAAAAA\") != ss_->end());\n  EXPECT_TRUE(ss_->Add(\"BBBBBBBBAAAAAAAA\") != ss_->end());\n  EXPECT_TRUE(ss_->Add(\"CCCCCBBBBBBBBBBB\") != ss_->end());\n\n  // Remove link in the middle of chain\n  EXPECT_TRUE(ss_->Erase(\"BBBBBBBBAAAAAAAA\"));\n  // Remove start of a chain\n  EXPECT_TRUE(ss_->Erase(\"CCCCCBBBBBBBBBBB\"));\n  // Remove end of link\n  EXPECT_TRUE(ss_->Erase(\"AAA@@@@@@@@@@@@@\"));\n  // Remove only item in chain\n  EXPECT_TRUE(ss_->Erase(\"AA@@@@@@@@@@@@@@\"));\n  EXPECT_TRUE(ss_->Erase(\"AAAAAAAAA@@@@@@@\"));\n  EXPECT_TRUE(ss_->Erase(\"AAAAAAAAAA@@@@@@\"));\n  EXPECT_TRUE(ss_->Erase(\"AAAAAAAAAAAAAAA@\"));\n}\n\nTEST_F(OAHSetTest, DisplacedBug) {\n  string_view vals[] = {\"imY\", \"OVl\", \"NhH\", \"BCe\", \"YDL\", \"lpb\",\n                        \"nhF\", \"xod\", \"zYR\", \"PSa\", \"hce\", \"cTR\"};\n  ss_->AddMany(absl::MakeSpan(vals), UINT32_MAX);\n\n  ss_->Add(\"fIc\");\n  ss_->Erase(\"YDL\");\n  ss_->Add(\"fYs\");\n  ss_->Erase(\"hce\");\n  ss_->Erase(\"nhF\");\n  ss_->Add(\"dye\");\n  ss_->Add(\"xZT\");\n  ss_->Add(\"LVK\");\n  ss_->Erase(\"zYR\");\n  ss_->Erase(\"fYs\");\n  ss_->Add(\"ueB\");\n  ss_->Erase(\"PSa\");\n  ss_->Erase(\"OVl\");\n  ss_->Add(\"cga\");\n  ss_->Add(\"too\");\n  ss_->Erase(\"ueB\");\n  ss_->Add(\"HZe\");\n  ss_->Add(\"oQn\");\n  ss_->Erase(\"too\");\n  ss_->Erase(\"HZe\");\n  ss_->Erase(\"xZT\");\n  ss_->Erase(\"cga\");\n  ss_->Erase(\"cTR\");\n  ss_->Erase(\"BCe\");\n  ss_->Add(\"eua\");\n  ss_->Erase(\"lpb\");\n  ss_->Add(\"OXK\");\n  ss_->Add(\"QmO\");\n  ss_->Add(\"SzV\");\n  ss_->Erase(\"QmO\");\n  ss_->Add(\"jbe\");\n  ss_->Add(\"BPN\");\n  ss_->Add(\"OfH\");\n  ss_->Add(\"Muf\");\n  ss_->Add(\"CwP\");\n  ss_->Erase(\"Muf\");\n  ss_->Erase(\"xod\");\n  ss_->Add(\"Cis\");\n  ss_->Add(\"Xvd\");\n  ss_->Erase(\"SzV\");\n  ss_->Erase(\"eua\");\n  ss_->Add(\"DGb\");\n  ss_->Add(\"leD\");\n  ss_->Add(\"MVX\");\n  ss_->Add(\"HPq\");\n}\n\nTEST_F(OAHSetTest, Resizing) {\n  constexpr size_t num_strs = 4096;\n  unordered_set<string> strs;\n  while (strs.size() != num_strs) {\n    auto str = random_string(generator_, 10);\n    strs.insert(str);\n  }\n\n  unsigned size = 0;\n  for (auto it = strs.begin(); it != strs.end(); ++it) {\n    const auto& str = *it;\n    EXPECT_TRUE(ss_->Add(str, 1));\n    EXPECT_EQ(ss_->UpperBoundSize(), size + 1);\n\n    // make sure we haven't lost any items after a grow\n    // which happens every power of 2\n    if ((size & (size - 1)) == 0) {\n      for (auto j = strs.begin(); j != it; ++j) {\n        const auto& str = *j;\n        auto it = ss_->Find(str);\n        ASSERT_NE(it, ss_->end());\n        EXPECT_TRUE(it.HasExpiry());\n        EXPECT_EQ(it.ExpiryTime(), ss_->time_now() + 1);\n      }\n    }\n    ++size;\n  }\n}\n\nTEST_F(OAHSetTest, SimpleScan) {\n  unordered_set<string_view> info = {\"foo\", \"bar\"};\n  unordered_set<string_view> seen;\n\n  for (auto str : info) {\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  uint32_t cursor = 0;\n  do {\n    cursor = ss_->Scan(cursor, [&](std::string_view str) {\n      EXPECT_TRUE(info.count(str));\n      seen.insert(str);\n    });\n  } while (cursor != 0);\n\n  EXPECT_EQ(seen.size(), info.size());\n  EXPECT_TRUE(equal(seen.begin(), seen.end(), info.begin()));\n}\n\n// // Ensure REDIS scan guarantees are met\nTEST_F(OAHSetTest, ScanGuarantees) {\n  unordered_set<string_view> to_be_seen = {\"foo\", \"bar\"};\n  unordered_set<string_view> not_be_seen = {\"AAA\", \"BBB\"};\n  unordered_set<string_view> maybe_seen = {\"AA@@@@@@@@@@@@@@\", \"AAA@@@@@@@@@@@@@\",\n                                           \"AAAAAAAAA@@@@@@@\", \"AAAAAAAAAA@@@@@@\"};\n  unordered_set<string_view> seen;\n\n  auto scan_callback = [&](std::string_view str) {\n    EXPECT_TRUE(to_be_seen.count(str) || maybe_seen.count(str));\n    EXPECT_FALSE(not_be_seen.count(str));\n    if (to_be_seen.count(str)) {\n      seen.insert(str);\n    }\n  };\n\n  EXPECT_EQ(ss_->Scan(0, scan_callback), 0);\n\n  for (auto str : not_be_seen) {\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  for (auto str : not_be_seen) {\n    EXPECT_TRUE(ss_->Erase(str));\n  }\n\n  for (auto str : to_be_seen) {\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  // should reach at least the first item in the set\n  uint32_t cursor = ss_->Scan(0, scan_callback);\n\n  for (auto str : maybe_seen) {\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  while (cursor != 0) {\n    cursor = ss_->Scan(cursor, scan_callback);\n  }\n\n  EXPECT_TRUE(seen.size() == to_be_seen.size());\n}\n\nTEST_F(OAHSetTest, IntOnly) {\n  constexpr size_t num_ints = 8192;\n  unordered_set<unsigned int> numbers;\n  for (size_t i = 0; i < num_ints; ++i) {\n    numbers.insert(i);\n    EXPECT_TRUE(ss_->Add(to_string(i)));\n  }\n  EXPECT_EQ(ss_->UpperBoundSize(), num_ints);\n\n  for (size_t i = 0; i < num_ints; ++i) {\n    ASSERT_FALSE(ss_->Add(to_string(i)));\n  }\n  EXPECT_EQ(ss_->UpperBoundSize(), num_ints);\n\n  size_t num_remove = generator_() % 4096;\n  unordered_set<string> removed;\n\n  for (size_t i = 0; i < num_remove; ++i) {\n    auto remove_int = generator_() % num_ints;\n    auto remove = to_string(remove_int);\n    if (numbers.count(remove_int)) {\n      ASSERT_TRUE(ss_->Contains(remove)) << remove_int;\n      EXPECT_TRUE(ss_->Erase(remove));\n      numbers.erase(remove_int);\n    } else {\n      EXPECT_FALSE(ss_->Erase(remove));\n    }\n\n    EXPECT_FALSE(ss_->Contains(remove));\n    removed.insert(remove);\n  }\n\n  size_t expected_seen = 0;\n  auto scan_callback = [&](std::string_view str_v) {\n    std::string str(str_v);\n    EXPECT_FALSE(removed.count(str));\n\n    if (numbers.count(std::atoi(str.data()))) {\n      ++expected_seen;\n    }\n  };\n\n  uint32_t cursor = 0;\n  do {\n    cursor = ss_->Scan(cursor, scan_callback);\n    // randomly throw in some new numbers\n    uint32_t val = generator_();\n    ss_->Add(to_string(val));\n  } while (cursor != 0);\n\n  EXPECT_GE(expected_seen + removed.size(), num_ints);\n}\n\nTEST_F(OAHSetTest, XtremeScanGrow) {\n  unordered_set<string> to_see, force_grow, seen;\n\n  while (to_see.size() != 8) {\n    to_see.insert(random_string(generator_, 10));\n  }\n\n  while (force_grow.size() != 8192) {\n    string str = random_string(generator_, 10);\n\n    if (to_see.count(str)) {\n      continue;\n    }\n\n    force_grow.insert(random_string(generator_, 10));\n  }\n\n  for (auto& str : to_see) {\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  auto scan_callback = [&](string_view strv) {\n    std::string str(strv);\n    if (to_see.count(str)) {\n      seen.insert(str);\n    }\n  };\n\n  uint32_t cursor = ss_->Scan(0, scan_callback);\n\n  // force approx 10 grows\n  for (auto& s : force_grow) {\n    EXPECT_TRUE(ss_->Add(s));\n  }\n\n  while (cursor != 0) {\n    cursor = ss_->Scan(cursor, scan_callback);\n  }\n\n  EXPECT_EQ(seen.size(), to_see.size());\n}\n\nTEST_F(OAHSetTest, Pop) {\n  constexpr size_t num_items = 8;\n  unordered_set<string> to_insert;\n\n  while (to_insert.size() != num_items) {\n    auto str = random_string(generator_, 10);\n    if (to_insert.count(str)) {\n      continue;\n    }\n\n    to_insert.insert(str);\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  while (!ss_->Empty()) {\n    size_t size = ss_->UpperBoundSize();\n    auto str = ss_->Pop();\n    DCHECK(ss_->UpperBoundSize() == to_insert.size() - 1);\n    DCHECK(str);\n    DCHECK(to_insert.count(std::string(str.Key())));\n    DCHECK_EQ(ss_->UpperBoundSize(), size - 1);\n    to_insert.erase(std::string(str.Key()));\n  }\n\n  DCHECK(ss_->Empty());\n  DCHECK(to_insert.empty());\n}\n\nTEST_F(OAHSetTest, Iteration) {\n  ss_->Add(\"foo\");\n  for (const auto& ptr : *ss_) {\n    LOG(INFO) << ptr;\n  }\n  ss_->Clear();\n  constexpr size_t num_items = 8192;\n  unordered_set<string> to_insert;\n\n  while (to_insert.size() != num_items) {\n    auto str = random_string(generator_, 10);\n    if (to_insert.count(str)) {\n      continue;\n    }\n\n    to_insert.insert(str);\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  for (const auto& ptr : *ss_) {\n    std::string str(ptr.Key());\n    EXPECT_TRUE(to_insert.count(str));\n    to_insert.erase(str);\n  }\n\n  EXPECT_EQ(to_insert.size(), 0);\n}\n\nTEST_F(OAHSetTest, SetFieldExpireHasExpiry) {\n  EXPECT_TRUE(ss_->Add(\"k1\", 100));\n  auto k = ss_->Find(\"k1\");\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 100);\n  k.SetExpiryTime(1);\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 1);\n}\n\nTEST_F(OAHSetTest, SetFieldExpireNoHasExpiry) {\n  EXPECT_TRUE(ss_->Add(\"k1\"));\n  auto k = ss_->Find(\"k1\");\n  EXPECT_FALSE(k.HasExpiry());\n  k.SetExpiryTime(10);\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 10);\n}\n\nTEST_F(OAHSetTest, Ttl) {\n  EXPECT_TRUE(ss_->Add(\"bla\"sv, 1));\n  EXPECT_FALSE(ss_->Add(\"bla\"sv, 1));\n  auto it = ss_->Find(\"bla\"sv);\n  EXPECT_EQ(1u, it.ExpiryTime());\n\n  ss_->set_time(1);\n  EXPECT_TRUE(ss_->Add(\"bla\"sv, 1));\n  EXPECT_EQ(1u, ss_->UpperBoundSize());\n\n  for (unsigned i = 0; i < 100; ++i) {\n    EXPECT_TRUE(ss_->Add(absl::StrCat(\"foo\", i), 1));\n  }\n  EXPECT_EQ(101u, ss_->UpperBoundSize());\n  it = ss_->Find(\"foo50\");\n  EXPECT_EQ(\"foo50\"sv, it->Key());\n  EXPECT_EQ(2u, it.ExpiryTime());\n\n  ss_->set_time(2);\n  // Cleanup all `foo` entries\n  uint32_t cursor = 0;\n  do {\n    cursor = ss_->Scan(cursor, [&](std::string_view) {});\n  } while (cursor != 0);\n\n  for (unsigned i = 0; i < 100; ++i) {\n    EXPECT_TRUE(ss_->Add(absl::StrCat(\"bar\", i)));\n  }\n  EXPECT_EQ(100u, ss_->UpperBoundSize());\n  it = ss_->Find(\"bar50\");\n  EXPECT_FALSE(it.HasExpiry());\n\n  for (auto it = ss_->begin(); it != ss_->end(); ++it) {\n    ASSERT_TRUE(absl::StartsWith(it->Key(), \"bar\")) << it->Key();\n    string str(it->Key());\n    VLOG(1) << *it;\n  }\n}\n\nTEST_F(OAHSetTest, Grow) {\n  for (size_t j = 0; j < 10; ++j) {\n    for (size_t i = 0; i < 4098; ++i) {\n      ss_->Reserve(generator_() % 256);\n      auto str = random_string(generator_, 3);\n      ss_->Add(str);\n    }\n    ss_->Clear();\n  }\n}\n\nTEST_F(OAHSetTest, Reserve) {\n  vector<string> strs;\n\n  for (size_t i = 0; i < 10; ++i) {\n    strs.push_back(random_string(generator_, 10));\n    ss_->Add(strs.back());\n  }\n\n  for (size_t j = 2; j < 20; j += 3) {\n    ss_->Reserve(j * 20);\n    for (size_t i = 0; i < 10; ++i) {\n      ASSERT_TRUE(ss_->Contains(strs[i]));\n    }\n  }\n}\n\nTEST_F(OAHSetTest, Fill) {\n  for (size_t i = 0; i < 100; ++i) {\n    ss_->Add(random_string(generator_, 10));\n  }\n  OAHSet s2;\n  ss_->Fill(&s2);\n  EXPECT_EQ(s2.UpperBoundSize(), ss_->UpperBoundSize());\n  for (const auto& s : *ss_) {\n    EXPECT_TRUE(s2.Contains(s.Key()));\n  }\n}\n\nTEST_F(OAHSetTest, IterateEmpty) {\n  for (const auto& s : *ss_) {\n    // We're iterating to make sure there is no crash. However, if we got here, it's a bug\n    CHECK(false) << \"Found entry \" << s << \" in empty set\";\n  }\n}\n\nstatic size_t MemUsed(OAHSet& obj) {\n  return obj.ObjAllocUsed() + obj.SetAllocUsed();\n}\n\nvoid BM_Clone(benchmark::State& state) {\n  vector<string> strs;\n  mt19937 generator(0);\n  OAHSet ss1, ss2;\n  unsigned elems = state.range(0);\n  for (size_t i = 0; i < elems; ++i) {\n    string str = random_string(generator, 10);\n    ss1.Add(str);\n  }\n  ss2.Reserve(ss1.UpperBoundSize());\n  while (state.KeepRunning()) {\n    for (auto& src : ss1) {\n      ss2.Add(src.Key());\n    }\n    state.PauseTiming();\n    ss2.Clear();\n    ss2.Reserve(ss1.UpperBoundSize());\n    state.ResumeTiming();\n  }\n}\nBENCHMARK(BM_Clone)->ArgName(\"elements\")->Arg(32000);\n\nvoid BM_Fill(benchmark::State& state) {\n  unsigned elems = state.range(0);\n  vector<string> strs;\n  mt19937 generator(0);\n  OAHSet ss1, ss2;\n  for (size_t i = 0; i < elems; ++i) {\n    string str = random_string(generator, 10);\n    ss1.Add(str);\n  }\n\n  while (state.KeepRunning()) {\n    ss1.Fill(&ss2);\n    state.PauseTiming();\n    ss2.Clear();\n    state.ResumeTiming();\n  }\n}\nBENCHMARK(BM_Fill)->ArgName(\"elements\")->Arg(32000);\n\nvoid BM_Clear(benchmark::State& state) {\n  unsigned elems = state.range(0);\n  mt19937 generator(0);\n  OAHSet ss;\n  while (state.KeepRunning()) {\n    state.PauseTiming();\n    for (size_t i = 0; i < elems; ++i) {\n      string str = random_string(generator, 16);\n      ss.Add(str);\n    }\n    state.ResumeTiming();\n    ss.Clear();\n  }\n}\nBENCHMARK(BM_Clear)->ArgName(\"elements\")->Arg(32000);\n\nvoid BM_Add(benchmark::State& state) {\n  vector<string> strs;\n  mt19937 generator(0);\n  OAHSet ss;\n  unsigned elems = state.range(0);\n  unsigned keySize = state.range(1);\n  for (size_t i = 0; i < elems; ++i) {\n    string str = random_string(generator, keySize);\n    strs.push_back(str);\n  }\n  ss.Reserve(elems);\n  size_t mem_used = 0;\n  while (state.KeepRunning()) {\n    for (auto& str : strs)\n      ss.Add(str);\n    state.PauseTiming();\n    mem_used += MemUsed(ss);\n    ss.Clear();\n    ss.Reserve(elems);\n    state.ResumeTiming();\n  }\n  state.counters[\"Memory_Used\"] = mem_used / state.iterations();\n}\nBENCHMARK(BM_Add)\n    ->ArgNames({\"elements\", \"KeySize\"})\n    ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});\n\nvoid BM_AddMany(benchmark::State& state) {\n  vector<string> strs;\n  mt19937 generator(0);\n  OAHSet ss;\n  unsigned elems = state.range(0);\n  unsigned keySize = state.range(1);\n  for (size_t i = 0; i < elems; ++i) {\n    string str = random_string(generator, keySize);\n    strs.push_back(str);\n  }\n  ss.Reserve(elems);\n  vector<string_view> svs;\n  size_t mem_used = 0;\n  for (const auto& str : strs) {\n    svs.push_back(str);\n  }\n  while (state.KeepRunning()) {\n    ss.AddMany(absl::MakeSpan(svs));\n    state.PauseTiming();\n    CHECK_EQ(ss.UpperBoundSize(), elems);\n    mem_used += MemUsed(ss);\n    ss.Clear();\n    ss.Reserve(elems);\n    state.ResumeTiming();\n  }\n  state.counters[\"Memory_Used\"] = mem_used / state.iterations();\n}\nBENCHMARK(BM_AddMany)\n    ->ArgNames({\"elements\", \"KeySize\"})\n    ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});\n\nvoid BM_Erase(benchmark::State& state) {\n  std::vector<std::string> strs;\n  mt19937 generator(0);\n  OAHSet ss;\n  auto elems = state.range(0);\n  auto keySize = state.range(1);\n  for (long int i = 0; i < elems; ++i) {\n    std::string str = random_string(generator, keySize);\n    strs.push_back(str);\n    ss.Add(str);\n  }\n  state.counters[\"Memory_Before_Erase\"] = MemUsed(ss);\n  size_t mem_used = 0;\n  while (state.KeepRunning()) {\n    for (auto& str : strs) {\n      ss.Erase(str);\n    }\n    state.PauseTiming();\n    mem_used += MemUsed(ss);\n    for (auto& str : strs) {\n      ss.Add(str);\n    }\n    state.ResumeTiming();\n  }\n  state.counters[\"Memory_After_Erase\"] = mem_used / state.iterations();\n}\nBENCHMARK(BM_Erase)\n    ->ArgNames({\"elements\", \"KeySize\"})\n    ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});\n\nvoid BM_Get(benchmark::State& state) {\n  std::vector<std::string> strs;\n  mt19937 generator(0);\n  OAHSet ss;\n  auto elems = state.range(0);\n  auto keySize = state.range(1);\n  for (long int i = 0; i < elems; ++i) {\n    std::string str = random_string(generator, keySize);\n    strs.push_back(str);\n    ss.Add(str);\n  }\n  while (state.KeepRunning()) {\n    for (auto& str : strs) {\n      ss.Find(str);\n    }\n  }\n}\nBENCHMARK(BM_Get)\n    ->ArgNames({\"elements\", \"KeySize\"})\n    ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});\n\nvoid BM_Grow(benchmark::State& state) {\n  vector<string> strs;\n  mt19937 generator(0);\n  OAHSet src;\n  unsigned elems = 1 << 18;\n  for (size_t i = 0; i < elems; ++i) {\n    src.Add(random_string(generator, 16), UINT32_MAX);\n    strs.push_back(random_string(generator, 16));\n  }\n\n  while (state.KeepRunning()) {\n    state.PauseTiming();\n    OAHSet tmp;\n    src.Fill(&tmp);\n    CHECK_EQ(tmp.BucketCount(), elems);\n    state.ResumeTiming();\n    for (const auto& str : strs) {\n      tmp.Add(str);\n      if (tmp.BucketCount() > elems) {\n        break;  // we grew\n      }\n    }\n\n    CHECK_GT(tmp.BucketCount(), elems);\n  }\n}\nBENCHMARK(BM_Grow);\n\n// unsigned total_wasted_memory = 0;\n\n// TEST_F(OAHSetTest, ReallocIfNeeded) {\n//   auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); };\n\n//   auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,\n//                         size_t block_size, void* arg) {\n//     size_t used = block_size * area->used;\n//     total_wasted_memory += area->committed - used;\n//     return true;\n//   };\n\n//   for (size_t i = 0; i < 10'000; i++)\n//     ss_->Add(build_str(i));\n\n//   for (size_t i = 0; i < 10'000; i++) {\n//     if (i % 10 == 0)\n//       continue;\n//     ss_->Erase(build_str(i));\n//   }\n\n//   mi_heap_collect(mi_heap_get_backing(), true);\n//   mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);\n//   size_t wasted_before = total_wasted_memory;\n\n//   size_t underutilized = 0;\n//   for (auto it = ss_->begin(); it != ss_->end(); ++it) {\n//     underutilized += zmalloc_page_is_underutilized(*it, 0.9);\n//     it.ReallocIfNeeded(0.9);\n//   }\n//   // Check there are underutilized pages\n//   CHECK_GT(underutilized, 0u);\n\n//   total_wasted_memory = 0;\n//   mi_heap_collect(mi_heap_get_backing(), true);\n//   mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);\n//   size_t wasted_after = total_wasted_memory;\n\n//   // Check we waste significanlty less now\n//   EXPECT_GT(wasted_before, wasted_after * 2);\n\n//   EXPECT_EQ(ss_->UpperBoundSize(), 1000);\n//   for (size_t i = 0; i < 1000; i++)\n//     EXPECT_EQ(*ss_->Find(build_str(i * 10)), build_str(i * 10));\n// }\n\nclass ShrinkTest : public OAHSetTest, public ::testing::WithParamInterface<size_t> {};\n\nTEST_P(ShrinkTest, BasicShrink) {\n  constexpr size_t num_strs = 1000000;\n  size_t shrink_to = GetParam();\n\n  vector<string> strs;\n  for (size_t i = 0; i < num_strs; ++i) {\n    strs.push_back(random_string(generator_, 10));\n    EXPECT_TRUE(ss_->Add(strs.back()));\n  }\n\n  // Grow to a larger size\n  ss_->Reserve(1 << 22);\n  size_t original_bucket_count = ss_->BucketCount();\n  EXPECT_EQ(original_bucket_count, 1u << 22);\n\n  // Shrink to the parameterized size\n  ss_->Shrink(shrink_to);\n\n  EXPECT_EQ(ss_->BucketCount(), shrink_to);\n  EXPECT_EQ(ss_->UpperBoundSize(), num_strs);\n\n  // Verify all elements are still accessible\n  for (const auto& str : strs) {\n    EXPECT_TRUE(ss_->Contains(str)) << \"Missing: \" << str;\n  }\n}\n\nINSTANTIATE_TEST_SUITE_P(ShrinkSizes, ShrinkTest,\n                         ::testing::Values(1u << 21,   // 2M buckets (sparse)\n                                           1u << 20,   // 1M buckets (~1 per bucket)\n                                           1u << 19),  // 512K buckets (~2 per bucket)\n                         [](const auto& info) { return absl::StrCat(\"buckets_\", info.param); });\n\nTEST_F(OAHSetTest, ShrinkWithTTL) {\n  constexpr size_t num_strs = 1000000;\n\n  // Track elements by their TTL category\n  vector<string> expired_strs;    // TTL 1-50, will expire\n  vector<string> surviving_strs;  // TTL 51-100, will survive\n  vector<string> no_ttl_strs;     // No TTL, will survive\n\n  for (size_t i = 0; i < num_strs; ++i) {\n    string str = random_string(generator_, 10);\n    if (i % 3 == 0) {\n      // No TTL\n      EXPECT_TRUE(ss_->Add(str));\n      no_ttl_strs.push_back(str);\n    } else if (i % 3 == 1) {\n      // TTL 1-50 (will expire when time=50)\n      uint32_t ttl = (i % 50) + 1;\n      EXPECT_TRUE(ss_->Add(str, ttl));\n      expired_strs.push_back(str);\n    } else {\n      // TTL 51-100 (will survive when time=50)\n      uint32_t ttl = (i % 50) + 51;\n      EXPECT_TRUE(ss_->Add(str, ttl));\n      surviving_strs.push_back(str);\n    }\n  }\n\n  // Grow to larger size\n  ss_->Reserve(1 << 22);\n\n  // Set time to 50 - this will expire elements with TTL <= 50\n  ss_->set_time(50);\n\n  // Shrink\n  ss_->Shrink(1 << 21);\n  EXPECT_EQ(ss_->BucketCount(), 1u << 21);\n\n  // Verify expired elements are gone\n  for (const auto& str : expired_strs) {\n    EXPECT_EQ(ss_->Find(str), ss_->end()) << \"Should be expired: \" << str;\n  }\n\n  // Verify surviving TTL elements are still accessible with correct TTL\n  for (const auto& str : surviving_strs) {\n    auto it = ss_->Find(str);\n    ASSERT_NE(it, ss_->end()) << \"Missing surviving TTL element: \" << str;\n    EXPECT_TRUE(it.HasExpiry());\n    EXPECT_GT(it.ExpiryTime(), 50u);\n  }\n\n  // Verify no-TTL elements are still accessible\n  for (const auto& str : no_ttl_strs) {\n    auto it = ss_->Find(str);\n    ASSERT_NE(it, ss_->end()) << \"Missing no-TTL element: \" << str;\n    EXPECT_FALSE(it.HasExpiry());\n  }\n}\n\nTEST_F(OAHSetTest, ScanWithShrinkBetweenCalls) {\n  // Test that cursor-based scanning works correctly when Grow and Shrink happen between Scan calls\n  // This verifies SCAN guarantees: elements present at start and end of scan must be seen\n  constexpr size_t num_strs = 1000000;\n  vector<string> strs;\n  unordered_set<string> must_see;\n\n  // Add elements and track them\n  for (size_t i = 0; i < num_strs; ++i) {\n    strs.push_back(random_string(generator_, 10));\n    EXPECT_TRUE(ss_->Add(strs.back()));\n    must_see.insert(strs.back());\n  }\n\n  // Note initial bucket count (will be ~1M after adding 1M elements)\n  size_t initial_bucket_count = ss_->BucketCount();\n\n  unordered_set<string> seen;\n  auto scan_callback = [&](const string_view str) { seen.emplace(str); };\n\n  // Start scanning BEFORE Grow\n  uint32_t cursor = ss_->Scan(0, scan_callback);\n  EXPECT_NE(cursor, 0u) << \"Should not finish in one iteration\";\n\n  // Grow to large size in the middle of scanning\n  ss_->Reserve(1 << 22);\n  EXPECT_EQ(ss_->BucketCount(), 1u << 22);\n  EXPECT_GT(ss_->BucketCount(), initial_bucket_count);\n\n  // Continue scanning a bit after Grow\n  cursor = ss_->Scan(cursor, scan_callback);\n\n  // Now Shrink in the middle of scanning - this is the key test\n  // Elements that existed at scan start must still be visible\n  ss_->Shrink(1 << 21);\n  EXPECT_EQ(ss_->BucketCount(), 1u << 21);\n\n  // Continue scanning with the same cursor\n  constexpr int max_iterations = 1 << 22;\n  int iterations = 0;\n  while (cursor != 0 && iterations < max_iterations) {\n    cursor = ss_->Scan(cursor, scan_callback);\n    iterations++;\n  }\n  EXPECT_LT(iterations, max_iterations) << \"Hit iteration limit\";\n  EXPECT_EQ(cursor, 0u) << \"Scan should complete\";\n\n  // Verify all original elements were seen\n  for (const auto& str : must_see) {\n    ASSERT_TRUE(seen.count(str)) << \"Missing element after shrink: \" << str;\n  }\n  EXPECT_EQ(seen.size(), must_see.size()) << \"Should see exactly all original elements\";\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/overloaded.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n//\n\n#pragma once\n\nnamespace dfly {\ntemplate <class... Ts> struct Overloaded : Ts... { using Ts::operator()...; };\n\ntemplate <class... Ts> Overloaded(Ts...) -> Overloaded<Ts...>;\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/page_usage/CMakeLists.txt",
    "content": "add_library(dfly_page_usage page_usage_stats.cc)\ntarget_link_libraries(dfly_page_usage base TRDP::hdr_histogram redis_lib absl::strings)\n"
  },
  {
    "path": "src/core/page_usage/page_usage_stats.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/page_usage/page_usage_stats.h\"\n\n#include <absl/container/flat_hash_set.h>\n#include <absl/strings/ascii.h>\n#include <absl/strings/str_join.h>\n#include <glog/logging.h>\n#include <hdr/hdr_histogram.h>\n\n#include <string>\n\n#include \"base/cycle_clock.h\"\n\nextern \"C\" {\n#include <unistd.h>\n\n#include \"redis/zmalloc.h\"\nmi_page_usage_stats_t mi_heap_page_is_underutilized(mi_heap_t* heap, void* p, float ratio,\n                                                    bool collect_stats);\n}\n\nnamespace dfly {\n\nusing absl::StrAppend;\nusing absl::StrFormat;\nusing absl::StripTrailingAsciiWhitespace;\n\nnamespace {\nconstexpr auto kUsageHistPoints = std::array{50, 90, 99};\nconstexpr auto kHistSignificantFigures = 3;\n\nHllBufferPtr InitHllPtr() {\n  HllBufferPtr p;\n  p.size = getDenseHllSize();\n  p.hll = new uint8_t[p.size];\n  CHECK_EQ(0, createDenseHll(p));\n  return p;\n}\n\n}  // namespace\n\nCycleQuota::CycleQuota(const uint64_t quota_usec)\n    : CycleQuota(base::CycleClock::FromUsec(quota_usec), true) {\n}\n\nvoid CycleQuota::Arm() {\n  start_cycles_ = base::CycleClock::Now();\n}\n\nbool CycleQuota::Depleted() const {\n  if (quota_cycles_ == kMaxQuota)\n    return false;\n  return UsedCycles() >= quota_cycles_;\n}\n\nuint64_t CycleQuota::UsedCycles() const {\n  return base::CycleClock::Now() - start_cycles_;\n}\n\nCycleQuota CycleQuota::Unlimited() {\n  return CycleQuota(kMaxQuota, true);\n}\n\nvoid CycleQuota::Extend(const uint64_t quota_usec) {\n  if (quota_cycles_ == kMaxQuota)\n    return;\n\n  quota_cycles_ += base::CycleClock::FromUsec(quota_usec);\n}\n\nCycleQuota::CycleQuota(const uint64_t quota_cycles, bool /*tag*/) : quota_cycles_{quota_cycles} {\n  Arm();\n}\n\nvoid CollectedPageStats::Merge(CollectedPageStats&& other, uint16_t shard_id) {\n  this->pages_scanned += other.pages_scanned;\n  this->pages_marked_for_realloc += other.pages_marked_for_realloc;\n  this->pages_full += other.pages_full;\n  this->pages_reserved_for_malloc += other.pages_reserved_for_malloc;\n  this->pages_with_heap_mismatch += other.pages_with_heap_mismatch;\n  this->pages_above_threshold += other.pages_above_threshold;\n  this->objects_skipped_not_required += other.objects_skipped_not_required;\n  this->objects_skipped_not_supported += other.objects_skipped_not_supported;\n  shard_wide_summary.emplace(std::make_pair(shard_id, std::move(other.page_usage_hist)));\n}\n\nCollectedPageStats CollectedPageStats::Merge(std::vector<CollectedPageStats>&& stats,\n                                             const float threshold) {\n  CollectedPageStats result;\n  result.threshold = threshold;\n\n  size_t shard_index = 0;\n  for (CollectedPageStats& stat : stats) {\n    result.Merge(std::move(stat), shard_index++);\n  }\n  return result;\n}\n\nstd::string CollectedPageStats::ToString() const {\n  std::string response;\n  StrAppend(&response, \"Page usage threshold: \", threshold * 100, \"\\n\");\n  StrAppend(&response, \"Pages scanned: \", pages_scanned, \"\\n\");\n  StrAppend(&response, \"Pages marked for reallocation: \", pages_marked_for_realloc, \"\\n\");\n  StrAppend(&response, \"Pages full: \", pages_full, \"\\n\");\n  StrAppend(&response, \"Pages reserved for malloc: \", pages_reserved_for_malloc, \"\\n\");\n  StrAppend(&response, \"Pages skipped due to heap mismatch: \", pages_with_heap_mismatch, \"\\n\");\n  StrAppend(&response, \"Pages with usage above threshold: \", pages_above_threshold, \"\\n\");\n  StrAppend(&response,\n            \"Objects skipped (do not require defragmentation): \", objects_skipped_not_required,\n            \"\\n\");\n  StrAppend(&response,\n            \"Objects skipped (do not support defragmentation): \", objects_skipped_not_supported,\n            \"\\n\");\n  for (const auto& [shard_id, usage] : shard_wide_summary) {\n    StrAppend(&response, \"[Shard \", shard_id, \"]\\n\");\n    for (const auto& [percentage, count] : usage) {\n      StrAppend(&response,\n                StrFormat(\" %d%% pages are below %d%% block usage\\n\", percentage, count));\n    }\n  }\n  StripTrailingAsciiWhitespace(&response);\n  return response;\n}\n\nPageUsage::UniquePages::UniquePages()\n    : pages_scanned{InitHllPtr()},\n      pages_marked_for_realloc{InitHllPtr()},\n      pages_full{InitHllPtr()},\n      pages_reserved_for_malloc{InitHllPtr()},\n      pages_with_heap_mismatch{InitHllPtr()},\n      pages_above_threshold{InitHllPtr()} {\n  hdr_histogram* h = nullptr;\n  const auto init_result = hdr_init(1, 100, kHistSignificantFigures, &h);\n  CHECK_EQ(0, init_result) << \"failed to initialize histogram\";\n  page_usage_hist = h;\n}\n\nPageUsage::UniquePages::~UniquePages() {\n  delete[] pages_scanned.hll;\n  delete[] pages_marked_for_realloc.hll;\n  delete[] pages_full.hll;\n  delete[] pages_reserved_for_malloc.hll;\n  delete[] pages_with_heap_mismatch.hll;\n  delete[] pages_above_threshold.hll;\n  hdr_close(page_usage_hist);\n}\n\nvoid PageUsage::UniquePages::AddStat(mi_page_usage_stats_t stat) {  // NOLINT should not be const\n  const auto data = reinterpret_cast<const unsigned char*>(&stat.page_address);\n\n  auto record = [&data](HllBufferPtr ctr) { pfadd_dense(ctr, data, sizeof(stat.page_address)); };\n\n  record(pages_scanned);\n\n  if (stat.flags & MI_DFLY_PAGE_BELOW_THRESHOLD) {\n    record(pages_marked_for_realloc);\n  }\n  if (stat.flags & MI_DFLY_PAGE_FULL) {\n    record(pages_full);\n  }\n  if (stat.flags & MI_DFLY_HEAP_MISMATCH) {\n    record(pages_with_heap_mismatch);\n  }\n  if (stat.flags & MI_DFLY_PAGE_USED_FOR_MALLOC) {\n    record(pages_reserved_for_malloc);\n  }\n  if (stat.flags == 0) {\n    // No special flags means the page is above the threshold but not full - record usage for\n    // histogram. This allows tuning the threshold for future commands.\n    record(pages_above_threshold);\n    hdr_record_value(page_usage_hist, 100.0 * stat.used / stat.capacity);\n  }\n}\n\nCollectedPageStats PageUsage::UniquePages::CollectedStats() const {\n  CollectedPageStats::ShardUsageSummary usage;\n  for (const auto p : kUsageHistPoints) {\n    usage[p] = hdr_value_at_percentile(page_usage_hist, p);\n  }\n\n  return CollectedPageStats{\n      .pages_scanned = static_cast<uint64_t>(pfcountSingle(pages_scanned)),\n      .pages_marked_for_realloc = static_cast<uint64_t>(pfcountSingle(pages_marked_for_realloc)),\n      .pages_full = static_cast<uint64_t>(pfcountSingle(pages_full)),\n      .pages_reserved_for_malloc = static_cast<uint64_t>(pfcountSingle(pages_reserved_for_malloc)),\n      .pages_with_heap_mismatch = static_cast<uint64_t>(pfcountSingle(pages_with_heap_mismatch)),\n      .pages_above_threshold = static_cast<uint64_t>(pfcountSingle(pages_above_threshold)),\n      .objects_skipped_not_required = objects_skipped_not_required,\n      .objects_skipped_not_supported = objects_skipped_not_supported,\n      .page_usage_hist = std::move(usage),\n      .shard_wide_summary = {}};\n}\n\nPageUsage::PageUsage(CollectPageStats collect_stats, float threshold, CycleQuota quota)\n    : collect_stats_{collect_stats}, threshold_{threshold}, quota_{quota} {\n}\n\nvoid PageUsage::ArmQuotaTimer() {\n  quota_.Arm();\n}\n\nuint64_t PageUsage::UsedQuotaCycles() const {\n  return quota_.UsedCycles();\n}\n\nbool PageUsage::IsPageForObjectUnderUtilized(void* object) {\n  mi_page_usage_stats_t stat;\n  zmalloc_page_is_underutilized(object, threshold_, collect_stats_ == CollectPageStats::YES, &stat);\n  return ConsumePageStats(stat);\n}\n\nbool PageUsage::IsPageForObjectUnderUtilized(mi_heap_t* heap, void* object) {\n  return ConsumePageStats(mi_heap_page_is_underutilized(heap, object, threshold_,\n                                                        collect_stats_ == CollectPageStats::YES));\n}\n\nbool PageUsage::ConsumePageStats(mi_page_usage_stats_t stat) {\n  const bool should_reallocate = stat.flags == MI_DFLY_PAGE_BELOW_THRESHOLD;\n  if (collect_stats_ == CollectPageStats::YES) {\n    unique_pages_.AddStat(stat);\n  }\n  return force_reallocate_ || should_reallocate;\n}\n\nbool PageUsage::QuotaDepleted() const {\n  return quota_.Depleted();\n}\n\nvoid PageUsage::ExtendQuota(uint64_t quota_usec) {\n  quota_.Extend(quota_usec);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/page_usage/page_usage_stats.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/btree_map.h>\n\n#define MI_BUILD_RELEASE 1\n#include <mimalloc/types.h>\n\nextern \"C\" {\n#include \"redis/hyperloglog.h\"\n}\n\nstruct hdr_histogram;\n\nnamespace dfly {\n\nclass CycleQuota {\n public:\n  static constexpr uint64_t kMaxQuota = std::numeric_limits<uint64_t>::max();\n  static constexpr uint64_t kDefaultDefragQuota = 150;\n\n  explicit CycleQuota(uint64_t quota_usec);\n\n  // Sets the starting point for the quota to be counted from. Can be called multiple times to reset\n  // the quota counter.\n  void Arm();\n\n  bool Depleted() const;\n\n  uint64_t UsedCycles() const;\n\n  static CycleQuota Unlimited();\n\n  // Extends the quota by the given amount. If any quota was already left over, it is also retained\n  // on top of the newly added quota. For example, if 80 usec was left, and we extend by 50 usec,\n  // the task now has 130 usec before the quota will be depleted.\n  void Extend(uint64_t quota_usec);\n\n private:\n  explicit CycleQuota(uint64_t quota_cycles, bool /*tag*/);\n\n  uint64_t quota_cycles_;\n  uint64_t start_cycles_{0};\n};\n\nenum class CollectPageStats : uint8_t { YES, NO };\n\nstruct CollectedPageStats {\n  double threshold{0.0};\n  uint64_t pages_scanned{0};\n  uint64_t pages_marked_for_realloc{0};\n  uint64_t pages_full{0};\n  uint64_t pages_reserved_for_malloc{0};\n  uint64_t pages_with_heap_mismatch{0};\n  uint64_t pages_above_threshold{0};\n  uint64_t objects_skipped_not_required{0};\n  uint64_t objects_skipped_not_supported{0};\n\n  using ShardUsageSummary = absl::btree_map<uint8_t, uint64_t>;\n  ShardUsageSummary page_usage_hist;\n  absl::btree_map<uint16_t, ShardUsageSummary> shard_wide_summary;\n\n  void Merge(CollectedPageStats&& other, uint16_t shard_id);\n  static CollectedPageStats Merge(std::vector<CollectedPageStats>&& stats, float threshold);\n\n  std::string ToString() const;\n};\n\nclass PageUsage {\n public:\n  PageUsage(CollectPageStats collect_stats, float threshold,\n            CycleQuota quota = CycleQuota::Unlimited());\n\n  virtual ~PageUsage() = default;\n\n  // Resets the quota timer to split defragmentation into different groups with separate quotas.\n  // For example, first defragment objects with a quota and then defragment search indices with the\n  // same quota independently.\n  void ArmQuotaTimer();\n\n  uint64_t UsedQuotaCycles() const;\n\n  virtual bool IsPageForObjectUnderUtilized(void* object);\n\n  bool IsPageForObjectUnderUtilized(mi_heap_t* heap, void* object);\n\n  CollectedPageStats CollectedStats() const {\n    return unique_pages_.CollectedStats();\n  }\n\n  bool ConsumePageStats(mi_page_usage_stats_t stats);\n\n  void RecordNotRequired() {\n    unique_pages_.objects_skipped_not_required += 1;\n  }\n\n  void RecordNotSupported() {\n    unique_pages_.objects_skipped_not_supported += 1;\n  }\n\n  void SetForceReallocate(bool force_reallocate) {\n    force_reallocate_ = force_reallocate;\n  }\n\n  bool QuotaDepleted() const;\n\n  void ExtendQuota(uint64_t quota_usec);\n\n private:\n  CollectPageStats collect_stats_{CollectPageStats::NO};\n  float threshold_;\n\n  struct UniquePages {\n    HllBufferPtr pages_scanned;\n    HllBufferPtr pages_marked_for_realloc;\n    HllBufferPtr pages_full;\n    HllBufferPtr pages_reserved_for_malloc;\n    HllBufferPtr pages_with_heap_mismatch;\n    HllBufferPtr pages_above_threshold;\n    hdr_histogram* page_usage_hist{};\n\n    uint64_t objects_skipped_not_required{0};\n    uint64_t objects_skipped_not_supported{0};\n\n    explicit UniquePages();\n    ~UniquePages();\n\n    void AddStat(mi_page_usage_stats_t stat);\n    CollectedPageStats CollectedStats() const;\n  };\n\n  UniquePages unique_pages_;\n\n  CycleQuota quota_;\n\n  // For use in testing, forces reallocate check to always return true\n  bool force_reallocate_{false};\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/page_usage_stats_test.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/page_usage/page_usage_stats.h\"\n\n#include <absl/flags/reflection.h>\n#include <gmock/gmock-matchers.h>\n\n#include <random>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/compact_object.h\"\n#include \"core/qlist.h\"\n#include \"core/score_map.h\"\n#include \"core/search/block_list.h\"\n#include \"core/search/search.h\"\n#include \"core/small_string.h\"\n#include \"core/sorted_map.h\"\n#include \"core/string_map.h\"\n#include \"core/string_set.h\"\n#include \"redis/redis_aux.h\"\n#include \"util/fibers/fibers.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nABSL_DECLARE_FLAG(bool, experimental_flat_json);\n\nusing namespace dfly;\nusing namespace std::chrono_literals;\n\nnamespace {\n\nstd::string GenerateTestJSON(size_t num_objects) {\n  std::string data = R\"({\"contents\":[)\";\n  for (size_t i = 0; i < num_objects; ++i) {\n    const auto si = std::to_string(i);\n    data += R\"({\"id\":)\" + si + R\"(,\"class\":\"v___)\" + si + R\"(\",\"value\":)\" + si + R\"(})\";\n    if (i < num_objects - 1) {\n      data += \",\";\n    }\n  }\n  data += R\"(], \"data\": \"some\", \"count\": 1, \"checked\": false})\";\n  return data;\n}\n\n// Helper to defragment only if a randomly generated value is less than preset probability. For\n// benchmarking realistic situations, where some nodes are fragmented and others are not\nclass SelectiveDefragment : public PageUsage {\n public:\n  explicit SelectiveDefragment(const double fragmentation_probability)\n      : PageUsage(CollectPageStats::NO, 0), frag_prob_{fragmentation_probability} {\n  }\n\n  bool IsPageForObjectUnderUtilized(void*) override {\n    return dist_(rng_) < frag_prob_;\n  }\n\n private:\n  double frag_prob_;\n  std::mt19937 rng_{99};\n  std::uniform_real_distribution<double> dist_{0.0, 1.0};\n};\n\nstruct MemStats {\n  size_t total_reserved{0};\n  size_t total_committed{0};\n  size_t total_used{0};\n  size_t total_wasted{0};\n  size_t num_pages{0};\n};\n\nMemStats LogMemStats(const mi_heap_t* heap) {\n  MemStats stats;\n  mi_heap_visit_blocks(\n      heap, false,\n      [](const mi_heap_t* /*h*/, const mi_heap_area_t* area, void* /*block*/, size_t block_size,\n         void* arg) {\n        const size_t committed = area->committed;\n        const size_t used = area->used * block_size;\n\n        const auto s = static_cast<MemStats*>(arg);\n        s->num_pages++;\n        s->total_committed += committed;\n        s->total_reserved += area->reserved;\n        s->total_used += used;\n        s->total_wasted += committed - used;\n\n        return true;\n      },\n      &stats);\n\n  LOG(INFO) << \"Pages: \" << stats.num_pages;\n  LOG(INFO) << \"Reserved : \" << stats.total_reserved << \" bytes\";\n  LOG(INFO) << \"Committed: \" << stats.total_committed << \" bytes\";\n  LOG(INFO) << \"Used: \" << stats.total_used << \" bytes\";\n  LOG(INFO) << \"Wasted: \" << stats.total_wasted << \" bytes\";\n  if (stats.total_committed) {\n    LOG(INFO) << \"Wasted%: \"\n              << static_cast<double>(stats.total_wasted) / stats.total_committed * 100.0;\n    LOG(INFO) << \"Utilization%: \"\n              << static_cast<double>(stats.total_used) / stats.total_committed * 100.0;\n  }\n\n  return stats;\n}\n\n}  // namespace\n\nclass PageUsageStatsTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    init_zmalloc_threadlocal(mi_heap_get_backing());\n  }\n\n  static void TearDownTestSuite() {\n    mi_heap_collect(mi_heap_get_backing(), true);\n    mi_heap_visit_blocks(\n        mi_heap_get_backing(), false,\n        [](auto*, auto* a, void*, size_t block_sz, void*) {\n          LOG(ERROR) << \"Unfreed allocations: block_size \" << block_sz\n                     << \", allocated: \" << a->used * block_sz;\n          return true;\n        },\n        nullptr);\n  }\n\n  PageUsageStatsTest() : m_(mi_heap_get_backing()) {\n    InitTLStatelessAllocMR(&m_);\n  }\n\n  void SetUp() override {\n    CompactObj::InitThreadLocal(&m_);\n\n    score_map_ = std::make_unique<ScoreMap>();\n    sorted_map_ = std::make_unique<detail::SortedMap>();\n    string_set_ = std::make_unique<StringSet>();\n    string_map_ = std::make_unique<StringMap>();\n    SmallString::InitThreadLocal(m_.heap());\n    qlist_ = std::make_unique<QList>(2, 2);\n  }\n\n  void TearDown() override {\n    score_map_.reset();\n    sorted_map_.reset();\n    string_set_.reset();\n    string_map_.reset();\n    small_string_.Free();\n    qlist_->Clear();\n    EXPECT_EQ(zmalloc_used_memory_tl, 0);\n    c_obj_.Reset();\n    CleanupStatelessAllocMR();\n  }\n\n  MiMemoryResource m_;\n  std::unique_ptr<ScoreMap> score_map_;\n  std::unique_ptr<detail::SortedMap> sorted_map_;\n  std::unique_ptr<StringSet> string_set_;\n  std::unique_ptr<StringMap> string_map_;\n  SmallString small_string_{};\n  std::unique_ptr<QList> qlist_;\n  CompactValue c_obj_{};\n};\n\nTEST_F(PageUsageStatsTest, Defrag) {\n  score_map_->AddOrUpdate(\"test\", 0.1);\n  sorted_map_->InsertNew(0.1, \"x\");\n  string_set_->Add(\"a\");\n  string_map_->AddOrUpdate(\"key\", \"value\");\n  small_string_.Assign(\"small-string\");\n\n  // INT_TAG, defrag will be skipped\n  c_obj_.SetString(\"1\");\n\n  qlist_->Push(\"xxxx\", QList::HEAD);\n\n  {\n    PageUsage p{CollectPageStats::YES, 0.1};\n    score_map_->begin().ReallocIfNeeded(&p);\n    sorted_map_->DefragIfNeeded(&p);\n    string_set_->begin().ReallocIfNeeded(&p);\n    string_map_->begin().ReallocIfNeeded(&p);\n    small_string_.DefragIfNeeded(&p);\n    c_obj_.DefragIfNeeded(&p);\n    qlist_->DefragIfNeeded(&p);\n\n    const auto stats = p.CollectedStats();\n    EXPECT_GT(stats.pages_scanned, 0);\n    EXPECT_EQ(stats.objects_skipped_not_required, 1);\n  }\n\n  {\n    PageUsage p{CollectPageStats::NO, 0.1};\n    score_map_->begin().ReallocIfNeeded(&p);\n    sorted_map_->DefragIfNeeded(&p);\n    string_set_->begin().ReallocIfNeeded(&p);\n    string_map_->begin().ReallocIfNeeded(&p);\n    small_string_.DefragIfNeeded(&p);\n    qlist_->DefragIfNeeded(&p);\n    EXPECT_EQ(p.CollectedStats().pages_scanned, 0);\n  }\n}\n\nTEST_F(PageUsageStatsTest, StatCollection) {\n  constexpr auto threshold = 0.5;\n  PageUsage p{CollectPageStats::YES, threshold};\n  for (size_t i = 0; i < 10000; ++i) {\n    p.ConsumePageStats({.page_address = uintptr_t{100000 + i},\n                        .block_size = 1,\n                        .capacity = 100,\n                        .reserved = 100,\n                        .used = 65,\n                        .flags = 0});\n  }\n\n  for (size_t i = 0; i < 2000; ++i) {\n    p.ConsumePageStats({.page_address = uintptr_t{200000 + i},\n                        .block_size = 1,\n                        .capacity = 100,\n                        .reserved = 100,\n                        .used = 85,\n                        .flags = 0});\n  }\n\n  for (size_t i = 0; i < 1000; ++i) {\n    p.ConsumePageStats({.page_address = uintptr_t{300000 + i},\n                        .block_size = 1,\n                        .capacity = 100,\n                        .reserved = 100,\n                        .used = 89,\n                        .flags = 0});\n  }\n\n  constexpr auto page_count_per_flag = 150;\n\n  auto start = 0;\n  for (const uint8_t flag : {MI_DFLY_PAGE_FULL, MI_DFLY_PAGE_USED_FOR_MALLOC, MI_DFLY_HEAP_MISMATCH,\n                             MI_DFLY_PAGE_BELOW_THRESHOLD}) {\n    for (size_t i = 0; i < page_count_per_flag; ++i) {\n      p.ConsumePageStats({.page_address = uintptr_t{start + i},\n                          .block_size = 1,\n                          .capacity = 100,\n                          .reserved = 100,\n                          .used = 100,\n                          .flags = flag});\n    }\n    start += page_count_per_flag;\n  }\n\n  CollectedPageStats st;\n  st.Merge(p.CollectedStats(), 1);\n\n  EXPECT_GT(st.pages_scanned, 12000);\n\n  // Expect a small error margin due to HLL\n  EXPECT_NEAR(st.pages_full, page_count_per_flag, 5);\n  EXPECT_NEAR(st.pages_reserved_for_malloc, page_count_per_flag, 5);\n  EXPECT_NEAR(st.pages_marked_for_realloc, page_count_per_flag, 5);\n\n  const auto usage = st.shard_wide_summary;\n\n  EXPECT_EQ(usage.size(), 1);\n  EXPECT_TRUE(usage.contains(1));\n\n  const CollectedPageStats::ShardUsageSummary expected{{50, 65}, {90, 85}, {99, 89}};\n  EXPECT_EQ(usage.at(1), expected);\n}\n\nTEST_F(PageUsageStatsTest, JSONCons) {\n  // Because of the static encoding it is not possible to easily test the flat encoding. Once the\n  // encoding flag is set, it is not re-read. If friend class is used to access the compact object\n  // inner fields and call `DefragIfNeeded` directly on the flat variant of the union, the test will\n  // still fail. This is because freeing the compact object code path takes the wrong branch based\n  // on encoding. The flat encoding was tested manually adjusting this same test with changed\n  // encoding.\n  std::string data = GenerateTestJSON(1000);\n\n  auto* mr = static_cast<MiMemoryResource*>(CompactObj::memory_resource());\n  size_t before = mr->used();\n\n  auto parsed = ParseJsonUsingShardHeap(data);\n  EXPECT_TRUE(parsed.has_value());\n\n  c_obj_.SetJson(std::move(parsed.value()));\n  c_obj_.SetJsonSize(mr->used() - before);\n  EXPECT_GT(c_obj_.MallocUsed(), 0);\n\n  PageUsage p{CollectPageStats::YES, 0.1};\n  p.SetForceReallocate(true);\n\n  c_obj_.DefragIfNeeded(&p);\n  EXPECT_GT(c_obj_.MallocUsed(), 0);\n\n  const auto stats = p.CollectedStats();\n  EXPECT_GT(stats.pages_scanned, 0);\n  EXPECT_EQ(stats.objects_skipped_not_required, 0);\n\n  EXPECT_EQ(c_obj_.ObjType(), OBJ_JSON);\n\n  auto json_obj = c_obj_.GetJson();\n  EXPECT_EQ(json_obj->at(\"data\").as_string_view(), \"some\");\n  EXPECT_EQ(json_obj->at(\"count\").as_integer<uint8_t>(), 1);\n  EXPECT_EQ(json_obj->at(\"checked\").as_bool(), false);\n}\n\nTEST_F(PageUsageStatsTest, JsonDefragEmpty) {\n  auto parsed = ParseJsonUsingShardHeap(R\"({})\");\n  EXPECT_TRUE(parsed.has_value());\n\n  PageUsage p{CollectPageStats::NO, 0};\n  p.SetForceReallocate(true);\n\n  Defragment(parsed.value(), &p);\n  EXPECT_TRUE(parsed->empty());\n}\n\nTEST_F(PageUsageStatsTest, JsonDefragNested) {\n  constexpr auto data = R\"({\"a\":{\"b\":{\"c\":{\"d\":\"value\"}}}})\";\n  auto parsed = ParseJsonUsingShardHeap(data);\n  EXPECT_TRUE(parsed.has_value());\n\n  PageUsage p{CollectPageStats::NO, 0};\n  p.SetForceReallocate(true);\n\n  Defragment(parsed.value(), &p);\n  EXPECT_EQ(parsed->at(\"a\").at(\"b\").at(\"c\").at(\"d\").as_string_view(), \"value\");\n}\n\nTEST_F(PageUsageStatsTest, JsonDefragRemainsInSameHeap) {\n  // This is a brute force test that defragmentation does not erroneously move data to the default\n  // heap. Comparing allocators before/after defragmentation is not useful as stateless allocators\n  // are all equal. It might be possible to compare the allocator type, but this approach checks\n  // that the pointers in a JSON object belong to the same heap as they did before defragmentation.\n\n  const std::string data = R\"({\n    \"data\": {\"sub-data\": \"attr1\"},\n    \"values\": [true, false, 1.11, 2],\n    \"secretkey\": \")\" + std::string(1024, '.') +\n                           \"\\\"}\";\n\n  auto json = ParseJsonUsingShardHeap(data);\n  EXPECT_TRUE(json.has_value());\n\n  auto key_before = json->at(\"secretkey\").as_string_view();\n  auto sub_before = json->at(\"data\").at(\"sub-data\").as_string_view();\n  auto values_before = &*json->at(\"values\").array_range().begin();\n\n  EXPECT_TRUE(mi_heap_contains_block(m_.heap(), key_before.data()));\n  EXPECT_TRUE(mi_heap_contains_block(m_.heap(), sub_before.data()));\n  EXPECT_TRUE(mi_heap_contains_block(m_.heap(), values_before));\n\n  PageUsage p{CollectPageStats::NO, 0};\n  p.SetForceReallocate(true);\n\n  Defragment(json.value(), &p);\n\n  auto key_after = json->at(\"secretkey\").as_string_view();\n  auto sub_after = json->at(\"data\").at(\"sub-data\").as_string_view();\n  auto values_after = &*json->at(\"values\").array_range().begin();\n\n  // Data still managed by the same heap.\n  EXPECT_TRUE(mi_heap_contains_block(m_.heap(), key_after.data()));\n  EXPECT_TRUE(mi_heap_contains_block(m_.heap(), sub_after.data()));\n  EXPECT_TRUE(mi_heap_contains_block(m_.heap(), values_after));\n\n  // Defragment actually changed addresses\n  EXPECT_NE(key_after.data(), key_before.data());\n  EXPECT_NE(sub_after.data(), sub_before.data());\n  EXPECT_NE(values_after, values_before);\n}\n\nTEST_F(PageUsageStatsTest, QuotaChecks) {\n  {\n    PageUsage p{CollectPageStats::NO, 0};\n    EXPECT_FALSE(p.QuotaDepleted());\n  }\n  {\n    PageUsage p{CollectPageStats::NO, 0, CycleQuota{4}};\n    util::ThisFiber::SleepFor(5us);\n    EXPECT_TRUE(p.QuotaDepleted());\n  }\n}\n\nTEST_F(PageUsageStatsTest, BlockList) {\n  search::BlockList<search::SortedVector<search::DocId>> bl{&m_, 20};\n  PageUsage p{CollectPageStats::NO, 0.1};\n  p.SetForceReallocate(true);\n\n  // empty list\n  auto result = bl.Defragment(&p);\n  EXPECT_FALSE(result.quota_depleted);\n  EXPECT_EQ(result.objects_moved, 0);\n\n  // single item will move twice, once for the blocklist and once for the sorted vector\n  bl.Insert(1);\n  result = bl.Defragment(&p);\n  EXPECT_FALSE(result.quota_depleted);\n  EXPECT_EQ(result.objects_moved, 2);\n\n  // quota depleted without defragmentation\n  PageUsage p_zero{CollectPageStats::NO, 0.1, CycleQuota{0}};\n  p_zero.SetForceReallocate(true);\n  result = bl.Defragment(&p_zero);\n  EXPECT_TRUE(result.quota_depleted);\n  EXPECT_EQ(result.objects_moved, 0);\n}\n\nTEST_F(PageUsageStatsTest, BlockListDefragmentResumes) {\n  search::BlockList<search::SortedVector<search::DocId>> bl{&m_, 20};\n  PageUsage p{CollectPageStats::NO, 0.1};\n  p.SetForceReallocate(true);\n\n  for (size_t i = 0; i < 1000; ++i) {\n    bl.Insert(i);\n  }\n\n  PageUsage p_small_quota{CollectPageStats::NO, 0.1, CycleQuota{10}};\n  p_small_quota.SetForceReallocate(true);\n  util::ThisFiber::SleepFor(10us);\n  auto result = bl.Defragment(&p_small_quota);\n  EXPECT_TRUE(result.quota_depleted);\n  EXPECT_GE(result.objects_moved, 0);\n\n  result = bl.Defragment(&p);\n  EXPECT_FALSE(result.quota_depleted);\n  EXPECT_GT(result.objects_moved, 0);\n}\n\nTEST_F(PageUsageStatsTest, BlockListWithPairs) {\n  search::BlockList<search::SortedVector<std::pair<search::DocId, double>>> bl{&m_, 20};\n  PageUsage p{CollectPageStats::NO, 0.1};\n  p.SetForceReallocate(true);\n\n  for (size_t i = 0; i < 100; ++i) {\n    bl.Insert({i, i * 1.1});\n  }\n\n  const auto result = bl.Defragment(&p);\n  EXPECT_FALSE(result.quota_depleted);\n  EXPECT_GT(result.objects_moved, 0);\n}\n\nTEST_F(PageUsageStatsTest, BlockListWithNonDefragmentableContainer) {\n  search::BlockList<search::CompressedSortedSet> bl{&m_, 20};\n  PageUsage p{CollectPageStats::NO, 0.1};\n  p.SetForceReallocate(true);\n\n  // empty list\n  auto result = bl.Defragment(&p);\n  EXPECT_FALSE(result.quota_depleted);\n  EXPECT_EQ(result.objects_moved, 0);\n\n  // will reallocate once for the blocklist, the inner sorted set will be skipped\n  bl.Insert(1);\n  result = bl.Defragment(&p);\n  EXPECT_FALSE(result.quota_depleted);\n  EXPECT_EQ(result.objects_moved, 1);\n}\n\nclass MockDocument final : public search::DocumentAccessor {\n public:\n  MockDocument() {\n    words.reserve(1000);\n    for (size_t i = 0; i < 1000; ++i) {\n      words.push_back(absl::StrFormat(\"word-%d\", i));\n    }\n  }\n\n  std::optional<StringList> GetStrings(std::string_view active_field) const override {\n    return {{words[absl::GetCurrentTimeNanos() % words.size()]}};\n  }\n  std::optional<VectorInfo> GetVector(std::string_view active_field, size_t dim) const override {\n    return std::nullopt;\n  }\n  std::optional<NumsList> GetNumbers(std::string_view active_field) const override {\n    return {{1, 2, 3, 4}};\n  }\n  std::optional<StringList> GetTags(std::string_view active_field) const override {\n    return {{words[absl::GetCurrentTimeNanos() % words.size()]}};\n  }\n\n  std::vector<std::string> words;\n};\n\nTEST_F(PageUsageStatsTest, DefragmentTagIndex) {\n  search::Schema schema;\n  schema.fields[\"field_name\"] =\n      search::SchemaField{search::SchemaField::TAG, 0, \"fn\", search::SchemaField::TagParams{}};\n  search::FieldIndices index{schema, {}, &m_, nullptr};\n\n  PageUsage p{CollectPageStats::NO, 0.1};\n  p.SetForceReallocate(true);\n\n  // Empty index\n  search::DefragmentResult result = index.Defragment(&p);\n  EXPECT_FALSE(result.quota_depleted);\n  EXPECT_EQ(result.objects_moved, 0);\n\n  const MockDocument md;\n  index.Add(1, md);\n\n  result = index.Defragment(&p);\n  EXPECT_FALSE(result.quota_depleted);\n  // single doc with single term returned by `GetTags` should result in two reallocations.\n  EXPECT_EQ(result.objects_moved, 2);\n\n  PageUsage p_zero{CollectPageStats::NO, 0.1, CycleQuota{0}};\n  p_zero.SetForceReallocate(true);\n  result = index.Defragment(&p_zero);\n  EXPECT_TRUE(result.quota_depleted);\n  EXPECT_EQ(result.objects_moved, 0);\n}\n\nTEST_F(PageUsageStatsTest, TagIndexDefragResumeWithChanges) {\n  search::Schema schema;\n  schema.fields[\"field_name\"] =\n      search::SchemaField{search::SchemaField::TAG, 0, \"fn\", search::SchemaField::TagParams{}};\n  search::FieldIndices index{schema, {}, &m_, nullptr};\n\n  PageUsage p{CollectPageStats::NO, 0.1};\n  p.SetForceReallocate(true);\n\n  const MockDocument md;\n  for (size_t i = 0; i < 100; ++i) {\n    index.Add(i, md);\n  }\n\n  PageUsage p_small_quota{CollectPageStats::NO, 0.1, CycleQuota{10}};\n  p_small_quota.SetForceReallocate(true);\n  util::ThisFiber::SleepFor(10us);\n  search::DefragmentResult result = index.Defragment(&p_small_quota);\n  EXPECT_TRUE(result.quota_depleted);\n  EXPECT_GE(result.objects_moved, 0);\n\n  index.Remove(99, md);\n\n  for (size_t i = 200; i < 300; ++i) {\n    index.Add(i, md);\n  }\n\n  result = index.Defragment(&p);\n  EXPECT_FALSE(result.quota_depleted);\n  EXPECT_GT(result.objects_moved, 0);\n}\n\nTEST_F(PageUsageStatsTest, DefragmentIndexWithNonDefragmentableFields) {\n  search::Schema schema;\n  schema.fields[\"text\"] =\n      search::SchemaField{search::SchemaField::TEXT, 0, \"fn\", search::SchemaField::TextParams{}};\n  schema.fields[\"num\"] = search::SchemaField{search::SchemaField::NUMERIC, 0, \"fn\",\n                                             search::SchemaField::NumericParams{}};\n  search::IndicesOptions options{{}};\n  search::FieldIndices index{schema, options, &m_, nullptr};\n\n  PageUsage p{CollectPageStats::NO, 0.1};\n  p.SetForceReallocate(true);\n\n  const MockDocument md;\n  index.Add(1, md);\n\n  // Unsupported index types will skip defragmenting themselves\n  const search::DefragmentResult result = index.Defragment(&p);\n  EXPECT_FALSE(result.quota_depleted);\n  EXPECT_EQ(result.objects_moved, 0);\n}\n\nTEST_F(PageUsageStatsTest, DefragReducesWaste) {\n  // This test works with actual defragmentation, by deleting every other json object which creates\n  // holes in pages which cannot be directly freed. The test asserts that wasted memory goes down as\n  // well as committed memory after defragmentation.\n\n  std::vector<std::optional<JsonType>> all_objects;\n\n  constexpr auto total_json = 100;\n  all_objects.reserve(total_json);\n\n  for (auto i = 0; i < total_json; ++i) {\n    auto parsed = ParseJsonUsingShardHeap(GenerateTestJSON(500));\n    EXPECT_TRUE(parsed.has_value());\n    all_objects.emplace_back(std::move(parsed.value()));\n  }\n\n  // Delete every other object to create gaps, so that the pages are partially used.\n  for (size_t i = 0; i < all_objects.size(); i += 2) {\n    all_objects[i].reset();\n  }\n\n  // Allow mimalloc to free any completely empty pages, if any\n  mi_heap_collect(m_.heap(), true);\n\n  // Collects stats using mi_visit.. also logs, to see logs run the test with:\n  // --vmodule=page_usage_stats_test=1 --logtostderr\n  const auto before = LogMemStats(m_.heap());\n\n  PageUsage p{CollectPageStats::NO, 0.8};\n  for (auto& j : all_objects) {\n    if (j.has_value()) {\n      Defragment(j.value(), &p);\n    }\n  }\n\n  mi_heap_collect(m_.heap(), true);\n  const auto after = LogMemStats(m_.heap());\n\n  EXPECT_LT(after.total_wasted, before.total_wasted);\n  EXPECT_LT(after.total_committed, before.total_committed);\n}\n\nTEST_F(PageUsageStatsTest, MixedFlagHandling) {\n  PageUsage p{CollectPageStats::YES, 0.0};\n  auto add_pages = [&](size_t count, uintptr_t start_address, uint8_t flags) {\n    for (const size_t i : std::views::iota(0UL, count)) {\n      p.ConsumePageStats({.page_address = uintptr_t{start_address + i},\n                          .block_size = 100,\n                          .capacity = 1000,\n                          .reserved = 100,\n                          .used = 99,\n                          .flags = flags});\n    }\n  };\n\n  add_pages(2000, 10, MI_DFLY_PAGE_FULL | MI_DFLY_PAGE_USED_FOR_MALLOC | MI_DFLY_HEAP_MISMATCH);\n  add_pages(500, 50000, MI_DFLY_PAGE_BELOW_THRESHOLD);\n\n  const auto stats = p.CollectedStats();\n\n  constexpr auto tolerance = 60;\n  EXPECT_NEAR(stats.pages_full, 2000, tolerance);\n  EXPECT_NEAR(stats.pages_reserved_for_malloc, 2000, tolerance);\n  EXPECT_NEAR(stats.pages_with_heap_mismatch, 2000, tolerance);\n  EXPECT_EQ(stats.pages_full, stats.pages_reserved_for_malloc);\n  EXPECT_EQ(stats.pages_full, stats.pages_with_heap_mismatch);\n\n  EXPECT_NEAR(stats.pages_marked_for_realloc, 500, 15);\n}\n\nnamespace {\n\nvoid InitBenchMemRes() {\n  static bool initialized = false;\n  if (!initialized) {\n    auto* tlh = mi_heap_get_backing();\n    init_zmalloc_threadlocal(tlh);\n    static MiMemoryResource m{tlh};\n    InitTLStatelessAllocMR(&m);\n    CompactObj::InitThreadLocal(&m);\n    initialized = true;\n  }\n}\n\n}  // namespace\n\nvoid BM_JSONDefragSelective(benchmark::State& state) {\n  InitBenchMemRes();\n\n  std::string json_data = GenerateTestJSON(state.range(0));\n\n  for (auto _ : state) {\n    state.PauseTiming();\n    auto parsed = ParseJsonUsingShardHeap(json_data);\n    DCHECK(parsed.has_value());\n    SelectiveDefragment p{state.range(1) / 100.0};\n    state.ResumeTiming();\n\n    Defragment(parsed.value(), &p);\n\n    benchmark::DoNotOptimize(parsed);\n  }\n}\n\nBENCHMARK(BM_JSONDefragSelective)\n    ->ArgNames({\"objects_per_json\", \"fragmentation_probability\"})\n    ->Args({250, 0})\n    ->Args({250, 30})\n    ->Args({250, 70})\n    ->Args({250, 100})\n    ->Args({1000, 0})\n    ->Args({1000, 30})\n    ->Args({1000, 70})\n    ->Args({1000, 100})\n    ->Args({4000, 0})\n    ->Args({4000, 30})\n    ->Args({4000, 70})\n    ->Args({4000, 100});\n"
  },
  {
    "path": "src/core/qlist.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/qlist.h\"\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n#include \"redis/lzfP.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#include <absl/base/macros.h>\n#include <absl/base/optimization.h>\n#include <absl/strings/escaping.h>\n#include <absl/strings/str_cat.h>\n#include <lz4frame.h>\n\n#include \"base/logging.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n\nusing namespace std;\n\n/* Maximum size in bytes of any multi-element listpack.\n * Larger values will live in their own isolated listpacks.\n * This is used only if we're limited by record count. when we're limited by\n * size, the maximum limit is bigger, but still safe.\n * 8k is a recommended / default size limit */\n#define SIZE_SAFETY_LIMIT 8192\n\n/* Maximum estimate of the listpack entry overhead.\n * Although in the worst case(sz < 64), we will waste 6 bytes in one\n * quicklistNode, but can avoid memory waste due to internal fragmentation\n * when the listpack exceeds the size limit by a few bytes (e.g. being 16388). */\n#define SIZE_ESTIMATE_OVERHEAD 8\n\n/* Minimum listpack size in bytes for attempting compression. */\n#define MIN_COMPRESS_BYTES 256\n\n/* Minimum size reduction in bytes to store compressed quicklistNode data.\n * This also prevents us from storing compression if the compression\n * resulted in a larger size than the original data. */\n#define MIN_COMPRESS_IMPROVE 32\n\n#define QL_NODE_IS_PLAIN(node) ((node)->container == QUICKLIST_NODE_CONTAINER_PLAIN)\n\nnamespace dfly {\n\nnamespace {\n\nstatic_assert(sizeof(QList) == 48);\nstatic_assert(sizeof(QList::Node) == 40);\n\nenum IterDir : uint8_t { FWD = 1, REV = 0 };\n\n/* This is for test suite development purposes only, 0 means disabled. */\nsize_t packed_threshold = 0;\n\n/* Optimization levels for size-based filling.\n * Note that the largest possible limit is 64k, so even if each record takes\n * just one byte, it still won't overflow the 16 bit count field. */\nconst size_t kOptLevel[] = {4096, 8192, 16384, 32768, 65536};\n\n/* Calculate the size limit of the quicklist node based on negative 'fill'. */\nsize_t NodeNegFillLimit(int fill) {\n  DCHECK_LT(fill, 0);\n\n  size_t offset = (-fill) - 1;\n  constexpr size_t max_level = ABSL_ARRAYSIZE(kOptLevel);\n  if (offset >= max_level)\n    offset = max_level - 1;\n  return kOptLevel[offset];\n}\n\nconst uint8_t* uint_ptr(string_view sv) {\n  static uint8_t empty = 0;\n  return sv.empty() ? &empty : reinterpret_cast<const uint8_t*>(sv.data());\n}\n\nbool IsLargeElement(size_t sz, int fill) {\n  if (ABSL_PREDICT_FALSE(packed_threshold != 0))\n    return sz >= packed_threshold;\n  if (fill >= 0)\n    return sz > SIZE_SAFETY_LIMIT;\n  else\n    return sz > NodeNegFillLimit(fill);\n}\n\n/* Calculate the size limit or length limit of the quicklist node\n * based on 'fill', and is also used to limit list listpack. */\nvoid quicklistNodeLimit(int fill, size_t* size, unsigned int* count) {\n  *size = SIZE_MAX;\n  *count = UINT_MAX;\n\n  if (fill >= 0) {\n    /* Ensure that one node have at least one entry */\n    *count = (fill == 0) ? 1 : fill;\n  } else {\n    *size = NodeNegFillLimit(fill);\n  }\n}\n\n#define sizeMeetsSafetyLimit(sz) ((sz) <= SIZE_SAFETY_LIMIT)\n\n/* Check if the limit of the quicklist node has been reached to determine if\n * insertions, merges or other operations that would increase the size of\n * the node can be performed.\n * Return 1 if exceeds the limit, otherwise 0. */\nint quicklistNodeExceedsLimit(int fill, size_t new_sz, unsigned int new_count) {\n  size_t sz_limit;\n  unsigned int count_limit;\n  quicklistNodeLimit(fill, &sz_limit, &count_limit);\n\n  if (ABSL_PREDICT_TRUE(sz_limit != SIZE_MAX)) {\n    return new_sz > sz_limit;\n  } else if (count_limit != UINT_MAX) {\n    /* when we reach here we know that the limit is a size limit (which is\n     * safe, see comments next to optimization_level and SIZE_SAFETY_LIMIT) */\n    if (!sizeMeetsSafetyLimit(new_sz))\n      return 1;\n    return new_count > count_limit;\n  }\n\n  ABSL_UNREACHABLE();\n}\n\nbool NodeAllowInsert(const QList::Node* node, const int fill, const size_t sz) {\n  if (ABSL_PREDICT_FALSE(!node))\n    return false;\n\n  if (ABSL_PREDICT_FALSE(QL_NODE_IS_PLAIN(node) || IsLargeElement(sz, fill)))\n    return false;\n\n  /* Estimate how many bytes will be added to the listpack by this one entry.\n   * We prefer an overestimation, which would at worse lead to a few bytes\n   * below the lowest limit of 4k (see optimization_level).\n   * Note: No need to check for overflow below since both `node->sz` and\n   * `sz` are to be less than 1GB after the plain/large element check above. */\n  size_t new_sz = node->sz + sz + SIZE_ESTIMATE_OVERHEAD;\n  return !quicklistNodeExceedsLimit(fill, new_sz, node->count + 1);\n}\n\nbool NodeAllowMerge(const QList::Node* a, const QList::Node* b, const int fill) {\n  if (!a || !b)\n    return false;\n\n  if (ABSL_PREDICT_FALSE(QL_NODE_IS_PLAIN(a) || QL_NODE_IS_PLAIN(b)))\n    return false;\n\n  /* approximate merged listpack size (- 7 to remove one listpack\n   * header/trailer, see LP_HDR_SIZE and LP_EOF) */\n  unsigned int merge_sz = a->sz + b->sz - 7;\n\n  // Allow merge if new node will not exceed the limit.\n  return !quicklistNodeExceedsLimit(fill, merge_sz, a->count + b->count);\n}\n\n// the owner over entry is passed to the node.\nQList::Node* CreateRAW(int container, uint8_t* entry, size_t sz) {\n  QList::Node* node = (QList::Node*)zmalloc(sizeof(*node));\n  node->entry = entry;\n  node->count = 1;\n  node->sz = sz;\n  node->next = node->prev = NULL;\n  node->encoding = QUICKLIST_NODE_ENCODING_RAW;\n  node->container = container;\n  node->recompress = 0;\n  node->dont_compress = 0;\n  node->offloaded = 0;\n\n  return node;\n}\n\nuint8_t* LP_Insert(uint8_t* lp, string_view elem, uint8_t* pos, int lp_where) {\n  DCHECK(pos);\n  return lpInsertString(lp, uint_ptr(elem), elem.size(), pos, lp_where, NULL);\n}\n\nuint8_t* LP_Append(uint8_t* lp, string_view elem) {\n  return lpAppend(lp, uint_ptr(elem), elem.size());\n}\n\nuint8_t* LP_Prepend(uint8_t* lp, string_view elem) {\n  return lpPrepend(lp, uint_ptr(elem), elem.size());\n}\n\nQList::Node* CreateFromSV(int container, string_view value) {\n  uint8_t* entry = nullptr;\n  size_t sz = 0;\n  if (container == QUICKLIST_NODE_CONTAINER_PLAIN) {\n    DCHECK(!value.empty());\n    sz = value.size();\n    entry = (uint8_t*)zmalloc(sz);\n    memcpy(entry, value.data(), sz);\n  } else {\n    entry = LP_Append(lpNew(0), value);\n    sz = lpBytes(entry);\n  }\n\n  return CreateRAW(container, entry, sz);\n}\n\n// Returns the relative increase in size.\ninline ssize_t NodeSetEntry(QList::Node* node, uint8_t* entry) {\n  node->entry = entry;\n  size_t new_sz = lpBytes(node->entry);\n  ssize_t diff = new_sz - node->sz;\n  node->sz = new_sz;\n  return diff;\n}\n\n/* quicklistLZF is a 8+N byte struct holding 'sz' followed by 'compressed'.\n * 'sz' is byte length of 'compressed' field.\n * 'compressed' is LZF data with total (compressed) length 'sz'\n * NOTE: uncompressed length is stored in quicklistNode->sz.\n * When quicklistNode->entry is compressed, node->entry points to a quicklistLZF */\nusing quicklistLZF = struct quicklistLZF {\n  size_t sz; /* LZF size in bytes*/\n  char compressed[];\n};\n\ninline quicklistLZF* GetLzf(QList::Node* node) {\n  DCHECK(node->encoding == QUICKLIST_NODE_ENCODING_LZF ||\n         node->encoding == QLIST_NODE_ENCODING_LZ4);\n  return (quicklistLZF*)node->entry;\n}\n\nbool CompressLZF(QList::Node* node) {\n  // We allocate LZF_STATE on heap, piggy-backing on the existing allocation.\n  char* uptr = (char*)zmalloc(sizeof(quicklistLZF) + node->sz + sizeof(LZF_STATE));\n  quicklistLZF* lzf = (quicklistLZF*)uptr;\n  LZF_HSLOT* sdata = (LZF_HSLOT*)(uptr + sizeof(quicklistLZF) + node->sz);\n\n  /* Cancel if compression fails or doesn't compress small enough */\n  if (((lzf->sz = lzf_compress(node->entry, node->sz, lzf->compressed, node->sz, sdata)) == 0) ||\n      lzf->sz + MIN_COMPRESS_IMPROVE >= node->sz) {\n    /* lzf_compress aborts/rejects compression if value not compressible. */\n    DVLOG(2) << \"Uncompressable \" << node->sz << \" vs \" << lzf->sz;\n    zfree(lzf);\n    QList::stats.bad_compression_attempts++;\n    return false;\n  }\n  DVLOG(2) << \"Compressed \" << node->sz << \" to \" << lzf->sz;\n  QList::stats.compressed_bytes += lzf->sz;\n  QList::stats.raw_compressed_bytes += node->sz;\n\n  lzf = (quicklistLZF*)zrealloc(lzf, sizeof(*lzf) + lzf->sz);\n  zfree(node->entry);\n  node->entry = (unsigned char*)lzf;\n  node->encoding = QUICKLIST_NODE_ENCODING_LZF;\n  return true;\n}\n\nbool CompressLZ4(QList::Node* node) {\n  LZ4F_cctx* cntx;\n  LZ4F_errorCode_t code = LZ4F_createCompressionContext(&cntx, LZ4F_VERSION);\n  CHECK(!LZ4F_isError(code));\n\n  LZ4F_preferences_t lz4_pref = LZ4F_INIT_PREFERENCES;\n  lz4_pref.compressionLevel = -1;\n  lz4_pref.frameInfo.contentSize = node->sz;\n  size_t buf_size = LZ4F_compressFrameBound(node->sz, &lz4_pref);\n\n  // We reuse quicklistLZF struct for LZ4 metadata.\n  quicklistLZF* dest = (quicklistLZF*)zmalloc(sizeof(quicklistLZF) + buf_size);\n  size_t compr_sz = LZ4F_compressFrame_usingCDict(cntx, dest->compressed, buf_size, node->entry,\n                                                  node->sz, nullptr /* dict */, &lz4_pref);\n  CHECK(!LZ4F_isError(compr_sz));\n\n  code = LZ4F_freeCompressionContext(cntx);\n  CHECK(!LZ4F_isError(code));\n\n  if (compr_sz + MIN_COMPRESS_IMPROVE >= node->sz) {\n    QList::stats.bad_compression_attempts++;\n    zfree(dest);\n    return false;\n  }\n\n  dest->sz = compr_sz;\n  dest = (quicklistLZF*)zrealloc(dest, sizeof(quicklistLZF) + compr_sz);\n  QList::stats.compressed_bytes += compr_sz;\n  QList::stats.raw_compressed_bytes += node->sz;\n\n  zfree(node->entry);\n  node->entry = (unsigned char*)dest;\n  node->encoding = QLIST_NODE_ENCODING_LZ4;\n  return true;\n}\n\n/* Compress the listpack in 'node' and update encoding details.\n * Returns true if listpack compressed successfully.\n * Returns false if compression failed or if listpack too small to compress. */\nbool CompressRaw(QList::Node* node, unsigned method) {\n  DCHECK(node->encoding == QUICKLIST_NODE_ENCODING_RAW);\n  DCHECK(!node->dont_compress);\n\n  /* validate that the node is neither\n   * tail nor head (it has prev and next)*/\n  DCHECK(node->prev && node->next);\n\n  node->recompress = 0;\n  /* Don't bother compressing small values */\n  if (node->sz < MIN_COMPRESS_BYTES)\n    return false;\n\n  QList::stats.compression_attempts++;\n  if (method == static_cast<unsigned>(QList::LZF)) {\n    return CompressLZF(node);\n  }\n\n  return CompressLZ4(node);\n}\n\nssize_t TryCompress(QList::Node* node, unsigned method) {\n  DCHECK(node);\n  if (node->encoding == QUICKLIST_NODE_ENCODING_RAW) {\n    node->attempted_compress = 1;\n    if (!node->dont_compress) {\n      if (CompressRaw(node, method))\n        return ssize_t(GetLzf(node)->sz) - node->sz;\n    }\n  }\n  return 0;\n}\n\n/* Uncompress the listpack in 'node' and update encoding details.\n * Returns 1 on successful decode, 0 on failure to decode. */\nbool DecompressRaw(bool recompress, QList::Node* node) {\n  DCHECK(node->encoding == QUICKLIST_NODE_ENCODING_LZF ||\n         node->encoding == QLIST_NODE_ENCODING_LZ4);\n\n  node->recompress = int(recompress);\n\n  void* decompressed = zmalloc(node->sz);\n  quicklistLZF* lzf = GetLzf(node);\n  QList::stats.decompression_calls++;\n  QList::stats.compressed_bytes -= lzf->sz;\n  QList::stats.raw_compressed_bytes -= node->sz;\n\n  if (node->encoding == QLIST_NODE_ENCODING_LZ4) {\n    LZ4F_dctx* dctx = nullptr;\n    LZ4F_errorCode_t code = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);\n    CHECK(!LZ4F_isError(code));\n    size_t decompressed_sz = node->sz;\n    size_t left =\n        LZ4F_decompress(dctx, decompressed, &decompressed_sz, lzf->compressed, &lzf->sz, nullptr);\n    CHECK_EQ(left, 0u);\n    CHECK_EQ(decompressed_sz, node->sz);\n    LZ4F_freeDecompressionContext(dctx);\n  } else {\n    if (lzf_decompress(lzf->compressed, lzf->sz, decompressed, node->sz) == 0) {\n      LOG(DFATAL) << \"Invalid LZF compressed data\";\n      /* Someone requested decompress, but we can't decompress.  Not good. */\n      zfree(decompressed);\n      return false;\n    }\n  }\n  zfree(lzf);\n  node->entry = (uint8_t*)decompressed;\n  node->encoding = QUICKLIST_NODE_ENCODING_RAW;\n  return true;\n}\n\n/* Decompress only compressed nodes.\n   recompress: if true, the node will be marked for recompression after decompression.\n   returns by how much the size of the node has increased.\n*/\nssize_t TryDecompressInternal(bool recompress, QList::Node* node) {\n  if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {\n    size_t compressed_sz = GetLzf(node)->sz;\n    if (DecompressRaw(recompress, node)) {\n      return node->sz - compressed_sz;\n    }\n  }\n  return 0;\n}\n\nssize_t RecompressOnly(QList::Node* node, unsigned method) {\n  if (node->recompress && !node->dont_compress) {\n    if (CompressRaw(node, method))\n      return (GetLzf(node))->sz - node->sz;\n  }\n  return 0;\n}\n\n// If after is true, returns a new node with elements in [offset, inf), otherwise\n// returns [0, offset-1].\nQList::Node* SplitNode(QList::Node* node, int offset, bool after, ssize_t* diff) {\n  DCHECK(node->container == QUICKLIST_NODE_CONTAINER_PACKED);\n  size_t zl_sz = node->sz;\n  uint8_t* entry = (uint8_t*)zmalloc(zl_sz);\n\n  memcpy(entry, node->entry, zl_sz);\n\n  /* Need positive offset for calculating extent below. */\n  if (offset < 0)\n    offset = node->count + offset;\n\n  /* Ranges to be trimmed: -1 here means \"continue deleting until the list ends\" */\n  int orig_start = after ? offset + 1 : 0;\n  int orig_extent = after ? -1 : offset;\n  int new_start = after ? 0 : offset;\n  int new_extent = after ? offset + 1 : -1;\n\n  ssize_t diff_existing = NodeSetEntry(node, lpDeleteRange(node->entry, orig_start, orig_extent));\n  node->count = lpLength(node->entry);\n\n  entry = lpDeleteRange(entry, new_start, new_extent);\n  QList::Node* new_node = CreateRAW(QUICKLIST_NODE_CONTAINER_PACKED, entry, lpBytes(entry));\n  new_node->count = lpLength(new_node->entry);\n  *diff = diff_existing;\n\n  return new_node;\n}\n\n}  // namespace\n\n__thread QList::Stats QList::stats;\n\nQList::Stats& QList::Stats::operator+=(const Stats& other) {\n#define ADD_FIELD(field) this->field += other.field;\n\n  ADD_FIELD(compression_attempts);\n  ADD_FIELD(bad_compression_attempts);\n  ADD_FIELD(decompression_calls);\n  ADD_FIELD(compressed_bytes);\n  ADD_FIELD(raw_compressed_bytes);\n  ADD_FIELD(interior_node_reads);\n  ADD_FIELD(total_node_reads);\n  ADD_FIELD(offload_requests);\n  ADD_FIELD(onload_requests);\n\n#undef ADD_FIELD\n\n  return *this;\n}\n\nsize_t QList::Node::GetLZF(void** data) const {\n  DCHECK(encoding == QUICKLIST_NODE_ENCODING_LZF || encoding == QLIST_NODE_ENCODING_LZ4);\n  quicklistLZF* lzf = (quicklistLZF*)entry;\n  *data = lzf->compressed;\n  return lzf->sz;\n}\n\nvoid QList::SetPackedThreshold(unsigned threshold) {\n  packed_threshold = threshold;\n}\n\nsize_t QList::DefragIfNeeded(PageUsage* page_usage) {\n  size_t reallocated = 0;\n\n  for (Node* curr = head_; curr; curr = curr->next) {\n    if (!page_usage->IsPageForObjectUnderUtilized(curr->entry)) {\n      continue;\n    }\n\n    // Data pointed to by the nodes is reallocated. The nodes themselves are not reallocated because\n    // of their constant (and relatively small, ~40 bytes per object) size. Defragmentation fixes\n    // fragmented memory allocation, which usually happens when variable-sized blocks of data are\n    // allocated and deallocated, which is not expected with nodes.\n    uint8_t* new_entry = static_cast<uint8_t*>(zmalloc(curr->sz));\n    memcpy(new_entry, curr->entry, curr->sz);\n\n    uint8_t* old_entry = curr->entry;\n    curr->entry = new_entry;\n\n    zfree(old_entry);\n    ++reallocated;\n  }\n  return reallocated;\n}\n\nvoid QList::SetTieringParams(const TieringParams& params) {\n  tiering_params_ = make_unique<TieringParams>(params);\n}\n\nQList::QList(int fill, int compress) : fill_(fill), compress_(compress), bookmark_count_(0) {\n  compr_method_ = 0;\n}\n\nQList::QList(QList&& other) noexcept\n    : head_(other.head_),\n      count_(other.count_),\n      len_(other.len_),\n      fill_(other.fill_),\n      compress_(other.compress_),\n      bookmark_count_(other.bookmark_count_) {\n  other.head_ = nullptr;\n  other.len_ = other.count_ = 0;\n}\n\nQList::~QList() {\n  Clear();\n}\n\nQList& QList::operator=(QList&& other) noexcept {\n  if (this != &other) {\n    Clear();\n    head_ = other.head_;\n    len_ = other.len_;\n    count_ = other.count_;\n    fill_ = other.fill_;\n    compress_ = other.compress_;\n    bookmark_count_ = other.bookmark_count_;\n    tiering_params_ = std::move(other.tiering_params_);\n    num_offloaded_nodes_ = other.num_offloaded_nodes_;\n    other.head_ = nullptr;\n    other.len_ = other.count_ = other.num_offloaded_nodes_ = 0;\n  }\n  return *this;\n}\n\nvoid QList::Clear() noexcept {\n  Node* current = head_;\n\n  while (len_) {\n    Node* next = current->next;\n    if (current->encoding != QUICKLIST_NODE_ENCODING_RAW) {\n      quicklistLZF* lzf = (quicklistLZF*)current->entry;\n      stats.compressed_bytes -= lzf->sz;\n      stats.raw_compressed_bytes -= current->sz;\n    }\n    zfree(current->entry);\n    zfree(current);\n\n    len_--;\n    current = next;\n  }\n  head_ = nullptr;\n  count_ = 0;\n  malloc_size_ = 0;\n  num_offloaded_nodes_ = 0;\n}\n\nvoid QList::Push(string_view value, Where where) {\n  DVLOG(3) << \"Push \" << absl::CHexEscape(value) << \" \" << (where == HEAD ? \"HEAD\" : \"TAIL\");\n\n  /* The head and tail should never be compressed (we don't attempt to decompress them) */\n  if (head_) {\n    DCHECK(head_->encoding != QUICKLIST_NODE_ENCODING_LZF);\n    DCHECK(head_->prev->encoding != QUICKLIST_NODE_ENCODING_LZF);\n  }\n\n  Node* orig = head_;\n  uint32_t orig_id = 0;\n  if (where == TAIL && orig) {\n    orig = orig->prev;\n    orig_id = len_ - 1;\n  }\n\n  InsertOpt opt = where == HEAD ? BEFORE : AFTER;\n\n  size_t sz = value.size();\n  if (ABSL_PREDICT_FALSE(IsLargeElement(sz, fill_))) {\n    InsertPlainNode(orig, value, orig_id, opt);\n    return;\n  }\n\n  count_++;\n\n  if (ABSL_PREDICT_TRUE(NodeAllowInsert(orig, fill_, sz))) {\n    auto func = (where == HEAD) ? LP_Prepend : LP_Append;\n    malloc_size_ += NodeSetEntry(orig, func(orig->entry, value));\n    orig->count++;\n    if (len_ == 1) {  // sanity check\n      DCHECK_EQ(malloc_size_, orig->sz);\n    }\n    DCHECK(head_->prev->next == nullptr);\n    return;\n  }\n\n  Node* node = CreateFromSV(QUICKLIST_NODE_CONTAINER_PACKED, value);\n  InsertNode(orig, node, orig_id, opt);\n  DCHECK(head_->prev->next == nullptr);\n}\n\nstring QList::Pop(Where where) {\n  DCHECK_GT(count_, 0u);\n  Node* node = head_;\n  if (where == TAIL) {\n    node = head_->prev;\n  }\n\n  /* The head and tail should never be compressed */\n  DCHECK(node->encoding != QUICKLIST_NODE_ENCODING_LZF);\n  DCHECK(head_->prev->next == nullptr);\n\n  string res;\n  if (ABSL_PREDICT_FALSE(QL_NODE_IS_PLAIN(node))) {\n    // TODO: We could avoid this copy by returning the pointer of the plain node.\n    // But the higher level APIs should support this.\n    res.assign(reinterpret_cast<char*>(node->entry), node->sz);\n    DelNode(node);\n  } else {\n    uint8_t* pos = where == HEAD ? lpFirst(node->entry) : lpLast(node->entry);\n    unsigned int vlen;\n    long long vlong;\n    uint8_t* vstr = lpGetValue(pos, &vlen, &vlong);\n    if (vstr) {\n      res.assign(reinterpret_cast<char*>(vstr), vlen);\n    } else {\n      res = absl::StrCat(vlong);\n    }\n    DelPackedIndex(node, pos);\n  }\n  DCHECK(head_ == nullptr || head_->prev->next == nullptr);\n  return res;\n}\n\nvoid QList::AppendListpack(unsigned char* zl) {\n  Node* node = CreateRAW(QUICKLIST_NODE_CONTAINER_PACKED, zl, lpBytes(zl));\n  node->count = lpLength(node->entry);\n\n  InsertNode(_Tail(), node, len_ ? len_ - 1 : 0, AFTER);\n  count_ += node->count;\n}\n\nvoid QList::AppendPlain(unsigned char* data, size_t sz) {\n  Node* node = CreateRAW(QUICKLIST_NODE_CONTAINER_PLAIN, data, sz);\n  InsertNode(_Tail(), node, len_ ? len_ - 1 : 0, AFTER);\n  ++count_;\n}\n\nbool QList::Insert(std::string_view pivot, std::string_view elem, InsertOpt opt) {\n  Iterator it = GetIterator(HEAD);\n\n  if (it.Valid()) {\n    do {\n      if (it.Get() == pivot) {\n        Insert(it, elem, opt);\n        return true;\n      }\n    } while (it.Next());\n  }\n\n  return false;\n}\n\nbool QList::Replace(long index, std::string_view elem) {\n  Iterator it = GetIterator(index);\n  if (it.Valid()) {\n    Replace(it, elem);\n    return true;\n  }\n  return false;\n}\n\nsize_t QList::MallocUsed(bool slow) const {\n  size_t node_size = len_ * sizeof(Node) + znallocx(sizeof(QList));\n  if (slow) {\n    for (Node* node = head_; node; node = node->next) {\n      node_size += zmalloc_usable_size(node->entry);\n    }\n    return node_size;\n  }\n\n  return node_size + malloc_size_;\n}\n\nvoid QList::Iterate(IterateFunc cb, long start, long end) const {\n  long llen = Size();\n  if (llen == 0)\n    return;\n\n  if (end < 0 || end >= long(Size()))\n    end = Size() - 1;\n  Iterator it = GetIterator(start);\n  if (it.Valid()) {\n    do {\n      if (start > end || !cb(it.Get()))\n        break;\n      start++;\n    } while (it.Next());\n  }\n}\n\nauto QList::InsertPlainNode(Node* old_node, string_view value, uint32_t old_node_id,\n                            InsertOpt insert_opt) -> Node* {\n  Node* new_node = CreateFromSV(QUICKLIST_NODE_CONTAINER_PLAIN, value);\n  InsertNode(old_node, new_node, old_node_id, insert_opt);\n  count_++;\n  return new_node;\n}\n\nvoid QList::InsertNode(Node* old_node, Node* new_node, uint32_t old_node_id, InsertOpt insert_opt) {\n  if (insert_opt == AFTER) {\n    new_node->prev = old_node;\n    if (old_node) {\n      new_node->next = old_node->next;\n      if (old_node->next)\n        old_node->next->prev = new_node;\n      old_node->next = new_node;\n      if (head_->prev == old_node)  // if old_node is tail, update the tail to the new node.\n        head_->prev = new_node;\n    }\n  } else {  // BEFORE\n    new_node->next = old_node;\n    if (old_node) {\n      new_node->prev = old_node->prev;\n      // if old_node is not head, link its prev to the new node.\n      // head->prev is tail, so we don't need to update it.\n      if (old_node != head_)\n        old_node->prev->next = new_node;\n      old_node->prev = new_node;\n    }\n    if (head_ == old_node)\n      head_ = new_node;\n  }\n\n  /* If this insert creates the only element so far, initialize head/tail. */\n  if (len_ == 0) {\n    head_ = new_node;\n    head_->prev = new_node;\n  }\n\n  /* Update len first, so in Compress we know exactly len */\n  len_++;\n  malloc_size_ += new_node->sz;\n\n  // Calculate final positions AFTER all linkage and len_ updates are complete.\n  uint32_t new_node_id;\n  if (insert_opt == AFTER && old_node) {\n    new_node_id = old_node_id + 1;  // new_node inserted after, old_node position unchanged\n  } else {\n    new_node_id = old_node_id;  // new_node takes old_node's position\n    old_node_id++;              // old_node shifts one position forward\n  }\n\n  if (old_node)\n    CoolOff(old_node, old_node_id);\n\n  CoolOff(new_node, new_node_id);\n}\n\nvoid QList::Insert(Iterator it, std::string_view elem, InsertOpt insert_opt) {\n  DCHECK(it.current_);\n  DCHECK(it.zi_);\n\n  int full = 0, at_tail = 0, at_head = 0, avail_next = 0, avail_prev = 0;\n  Node* node = it.current_;\n  size_t sz = elem.size();\n  bool after = insert_opt == AFTER;\n\n  /* Populate accounting flags for easier boolean checks later */\n  if (!NodeAllowInsert(node, fill_, sz)) {\n    full = 1;\n  }\n\n  if (after && (it.offset_ == node->count - 1 || it.offset_ == -1)) {\n    at_tail = 1;\n    if (NodeAllowInsert(node->next, fill_, sz)) {\n      avail_next = 1;\n    }\n  }\n\n  if (!after && (it.offset_ == 0 || it.offset_ == -(node->count))) {\n    at_head = 1;\n    if (NodeAllowInsert(node->prev, fill_, sz)) {\n      avail_prev = 1;\n    }\n  }\n  uint32_t node_id = it.node_id_;\n  if (ABSL_PREDICT_FALSE(IsLargeElement(sz, fill_))) {\n    if (QL_NODE_IS_PLAIN(node) || (at_tail && after) || (at_head && !after)) {\n      InsertPlainNode(node, elem, node_id, insert_opt);\n    } else {\n      AccessForReads(true, node);\n      ssize_t diff_existing = 0;\n      // if after == true, the order will be node, entry_node, new_node\n      // otherwise: new_node, entry_node, node.\n      Node* new_node = SplitNode(node, it.offset_, after, &diff_existing);\n      Node* entry_node = InsertPlainNode(node, elem, node_id, insert_opt);\n      uint32_t entry_node_id = after ? node_id + 1 : node_id;\n      InsertNode(entry_node, new_node, entry_node_id, insert_opt);\n      malloc_size_ += diff_existing;\n    }\n    return;\n  }\n\n  /* Now determine where and how to insert the new element */\n  if (!full) {\n    AccessForReads(true, node);\n    uint8_t* new_entry = LP_Insert(node->entry, elem, it.zi_, after ? LP_AFTER : LP_BEFORE);\n    malloc_size_ += NodeSetEntry(node, new_entry);\n    node->count++;\n    malloc_size_ += RecompressOnly(node, compr_method_);\n  } else {\n    bool insert_tail = at_tail && after;\n    bool insert_head = at_head && !after;\n    if (insert_tail && avail_next) {\n      /* If we are: at tail, next has free space, and inserting after:\n       *   - insert entry at head of next node. */\n      auto* new_node = node->next;\n      AccessForReads(true, new_node);\n      malloc_size_ += NodeSetEntry(new_node, LP_Prepend(new_node->entry, elem));\n      new_node->count++;\n      malloc_size_ += RecompressOnly(new_node, compr_method_);\n      malloc_size_ += RecompressOnly(node, compr_method_);\n    } else if (insert_head && avail_prev) {\n      /* If we are: at head, previous has free space, and inserting before:\n       *   - insert entry at tail of previous node. */\n      auto* new_node = node->prev;\n      AccessForReads(true, new_node);\n      malloc_size_ += NodeSetEntry(new_node, LP_Append(new_node->entry, elem));\n      new_node->count++;\n      malloc_size_ += RecompressOnly(new_node, compr_method_);\n      malloc_size_ += RecompressOnly(node, compr_method_);\n    } else if (insert_tail || insert_head) {\n      /* If we are: full, and our prev/next has no available space, then:\n       *   - create new node and attach to qlist */\n      auto* new_node = CreateFromSV(QUICKLIST_NODE_CONTAINER_PACKED, elem);\n      InsertNode(node, new_node, node_id, insert_opt);\n    } else {\n      /* else, node is full we need to split it. */\n      /* covers both after and !after cases */\n      AccessForReads(true, node);\n      ssize_t diff_existing = 0;\n      auto* new_node = SplitNode(node, it.offset_, after, &diff_existing);\n      auto func = after ? LP_Prepend : LP_Append;\n      malloc_size_ += NodeSetEntry(new_node, func(new_node->entry, elem));\n      new_node->count++;\n      InsertNode(node, new_node, node_id, insert_opt);\n      MergeNodes(node);\n      malloc_size_ += diff_existing;\n    }\n  }\n  count_++;\n}\n\nvoid QList::Replace(Iterator it, std::string_view elem) {\n  Node* node = it.current_;\n  uint8_t* newentry = nullptr;\n  size_t sz = elem.size();\n  uint32_t node_id = it.node_id_;\n  if (ABSL_PREDICT_TRUE(!QL_NODE_IS_PLAIN(node) && !IsLargeElement(sz, fill_) &&\n                        (newentry = lpReplace(node->entry, &it.zi_, uint_ptr(elem), sz)) != NULL)) {\n    malloc_size_ += NodeSetEntry(node, newentry);\n    CoolOff(node, node_id);\n  } else if (QL_NODE_IS_PLAIN(node)) {\n    if (IsLargeElement(sz, fill_)) {\n      zfree(node->entry);\n      uint8_t* new_entry = (uint8_t*)zmalloc(sz);\n      memcpy(new_entry, elem.data(), sz);\n      malloc_size_ += NodeSetEntry(node, new_entry);\n      CoolOff(node, node_id);\n    } else {\n      Insert(it, elem, AFTER);\n      DelNode(node);\n    }\n  } else { /* The node is full or data is a large element */\n    Node *split_node = NULL, *new_node;\n    node->dont_compress = 1; /* Prevent compression in InsertNode() */\n\n    /* If the entry is not at the tail, split the node at the entry's offset. */\n    if (it.offset_ != node->count - 1 && it.offset_ != -1) {\n      ssize_t diff_existing = 0;\n      split_node = SplitNode(node, it.offset_, 1, &diff_existing);\n      malloc_size_ += diff_existing;\n    }\n\n    /* Create a new node and insert it after the original node.\n     * If the original node was split, insert the split node after the new node. */\n    new_node = CreateFromSV(IsLargeElement(sz, fill_) ? QUICKLIST_NODE_CONTAINER_PLAIN\n                                                      : QUICKLIST_NODE_CONTAINER_PACKED,\n                            elem);\n    // The order is: node, new_node, split_node.\n    InsertNode(node, new_node, node_id, AFTER);\n    if (split_node)\n      InsertNode(new_node, split_node, node_id + 1, AFTER);\n    count_++;\n\n    /* Delete the replaced element. */\n    if (node->count == 1) {\n      DelNode(node);\n    } else {\n      unsigned char* p = lpSeek(node->entry, -1);\n      DelPackedIndex(node, p);\n      node->dont_compress = 0; /* Re-enable compression */\n      new_node = MergeNodes(new_node);\n\n      /* We can't know if the current node and its sibling nodes are correctly compressed,\n       * and we don't know if they are within the range of compress depth, so we need to\n       * use UpdateCompression() for compression, which checks if node is within compress\n       * depth before compressing. */\n      // TODO: node_id might be off after merges.\n      CoolOff(new_node, node_id + 1);\n      CoolOff(new_node->prev, node_id);\n      if (new_node->next)\n        CoolOff(new_node->next, node_id + 2);\n    }\n  }\n}\n\nvoid QList::CoolOff(Node* node, uint32_t node_id) {\n  if (tiering_params_) {\n    // Dry run for offloading decision.\n    // a. Node id is withing the offloadable depth - offload it if not already offloaded.\n    // b. Node id is outside the offloadable depth - but we have too many nodes that are not\n    //    offloaded - take the O(n) route to traverse and offload them. The reason for having such\n    //    nodes is because (a) handles node that we touch during operations.\n    //    if for example we just perform lpush, then we won't touch any interior nodes, and they\n    //    will never get offloaded. The good news is that once interior nodes are offloaded,\n    //    we won't need to traverse them again for \"trivial\" access patterns unless they\n    //    get accessed again. Another reason for missing offloaded nodes is that node_id can be\n    //    off due to merges (can be improved in future).\n    if (node_id >= tiering_params_->node_depth_threshold &&\n        node_id + tiering_params_->node_depth_threshold < len_) {\n      if (!node->offloaded) {\n        OffloadNode(node);\n      }\n    } else if (num_offloaded_nodes_ * 2 + tiering_params_->node_depth_threshold * 2 < len_) {\n      // We check `num_offloaded_nodes_ * 2` above to avoid frequent traversals.\n      // So only when the gap between offloaded and non-offloaded nodes is large enough,\n      // we do a traversal to offload more nodes.\n      auto* fw = head_;\n      auto* rev = head_->prev;\n      uint32_t traverse_node_id = 0;\n\n      // Traverse from both ends towards the middle as we expect more offloads towards the ends\n      // due to usual access patterns of adding items via lpush/rpush.\n      while (traverse_node_id <= len_ / 2 &&\n             (num_offloaded_nodes_ + 2 * tiering_params_->node_depth_threshold) < len_) {\n        if (traverse_node_id >= tiering_params_->node_depth_threshold) {\n          if (fw->offloaded == 0) {\n            OffloadNode(fw);\n          }\n\n          // Avoid offloading the same node twice when fw and rev meet in the middle.\n          if (rev != fw && rev->offloaded == 0) {\n            OffloadNode(rev);\n          }\n        }\n        fw = fw->next;\n        rev = rev->prev;\n        traverse_node_id++;\n      }\n    }\n  }\n\n  /* Force 'quicklist' to meet compression guidelines set by compress depth.\n   * The only way to guarantee interior nodes get compressed is to iterate\n   * to our \"interior\" compress depth then compress the next node we find.\n   * If compress depth is larger than the entire list, we return immediately. */\n\n  if (node->recompress)\n    CompressRaw(node, this->compr_method_);\n  else\n    this->CompressByDepth(node);\n}\n\nvoid QList::CompressByDepth(Node* node) {\n  if (len_ == 0)\n    return;\n\n  /* The head and tail should never be compressed (we should not attempt to recompress them) */\n  DCHECK(head_->recompress == 0 && head_->prev->recompress == 0);\n\n  /* If length is less than our compress depth (from both sides),\n   * we can't compress anything. */\n  if (!AllowCompression() || len_ < (unsigned int)(compress_ * 2))\n    return;\n\n  /* Iterate until we reach compress depth for both sides of the list.a\n   * Note: because we do length checks at the *top* of this function,\n   *       we can skip explicit null checks below. Everything exists. */\n  Node* forward = head_;\n  Node* reverse = head_->prev;\n  int depth = 0;\n  int in_depth = 0;\n  while (depth++ < compress_) {\n    malloc_size_ += TryDecompressInternal(false, forward);\n    malloc_size_ += TryDecompressInternal(false, reverse);\n\n    if (forward == node || reverse == node)\n      in_depth = 1;\n\n    /* We passed into compress depth of opposite side of the quicklist\n     * so there's no need to compress anything and we can exit. */\n    if (forward == reverse || forward->next == reverse)\n      return;\n\n    forward = forward->next;\n    reverse = reverse->prev;\n  }\n\n  if (!in_depth && node) {\n    malloc_size_ += TryCompress(node, this->compr_method_);\n  }\n  /* At this point, forward and reverse are one node beyond depth */\n  malloc_size_ += TryCompress(forward, this->compr_method_);\n  malloc_size_ += TryCompress(reverse, this->compr_method_);\n}\n\nvoid QList::AccessForReads(bool recompress, Node* node) {\n  DCHECK(node);\n  stats.total_node_reads++;\n  if (node->offloaded) {\n    DCHECK(tiering_params_);\n    stats.onload_requests++;\n    num_offloaded_nodes_--;\n    node->offloaded = 0;\n  }\n  if (len_ > 2 && node != head_ && node->next != nullptr) {\n    stats.interior_node_reads++;\n  }\n  ssize_t res = TryDecompressInternal(recompress, node);\n  malloc_size_ += res;\n}\n\n/* Attempt to merge listpacks within two nodes on either side of 'center'.\n *\n * We attempt to merge:\n *   - (center->prev->prev, center->prev)\n *   - (center->next, center->next->next)\n *   - (center->prev, center)\n *   - (center, center->next)\n *\n * Returns the new 'center' after merging.\n */\nauto QList::MergeNodes(Node* center) -> Node* {\n  Node *prev = NULL, *prev_prev = NULL, *next = NULL;\n  Node *next_next = NULL, *target = NULL;\n\n  if (center->prev) {\n    prev = center->prev;\n    if (center->prev->prev)\n      prev_prev = center->prev->prev;\n  }\n\n  if (center->next) {\n    next = center->next;\n    if (center->next->next)\n      next_next = center->next->next;\n  }\n\n  /* Try to merge prev_prev and prev */\n  if (NodeAllowMerge(prev, prev_prev, fill_)) {\n    ListpackMerge(prev_prev, prev);\n    prev_prev = prev = NULL; /* they could have moved, invalidate them. */\n  }\n\n  /* Try to merge next and next_next */\n  if (NodeAllowMerge(next, next_next, fill_)) {\n    ListpackMerge(next, next_next);\n    next = next_next = NULL; /* they could have moved, invalidate them. */\n  }\n\n  /* Try to merge center node and previous node */\n  if (NodeAllowMerge(center, center->prev, fill_)) {\n    target = ListpackMerge(center->prev, center);\n    center = NULL; /* center could have been deleted, invalidate it. */\n  } else {\n    /* else, we didn't merge here, but target needs to be valid below. */\n    target = center;\n  }\n\n  /* Use result of center merge (or original) to merge with next node. */\n  if (NodeAllowMerge(target, target->next, fill_)) {\n    target = ListpackMerge(target, target->next);\n  }\n  return target;\n}\n\n/* Given two nodes, try to merge their listpacks.\n *\n * This helps us not have a quicklist with 3 element listpacks if\n * our fill factor can handle much higher levels.\n *\n * Note: 'a' must be to the LEFT of 'b'.\n *\n * After calling this function, both 'a' and 'b' should be considered\n * unusable.  The return value from this function must be used\n * instead of re-using any of the quicklistNode input arguments.\n *\n * Returns the input node picked to merge against or NULL if\n * merging was not possible. */\nauto QList::ListpackMerge(Node* a, Node* b) -> Node* {\n  AccessForReads(false, a);\n  AccessForReads(false, b);\n  if ((lpMerge(&a->entry, &b->entry))) {\n    /* We merged listpacks! Now remove the unused Node. */\n    Node *keep = NULL, *nokeep = NULL;\n    if (!a->entry) {\n      nokeep = a;\n      keep = b;\n    } else if (!b->entry) {\n      nokeep = b;\n      keep = a;\n    }\n    keep->count = lpLength(keep->entry);\n    malloc_size_ += NodeSetEntry(keep, keep->entry);\n\n    keep->recompress = 0; /* Prevent 'keep' from being recompressed if\n                           * it becomes head or tail after merging. */\n\n    nokeep->count = 0;\n    DelNode(nokeep);\n    CoolOff(keep, 0);  // TODO: node_id is unknown here, so just pass 0.\n    return keep;\n  }\n\n  /* else, the merge returned NULL and nothing changed. */\n  return NULL;\n}\n\nvoid QList::DelNode(Node* node) {\n  if (node->next)\n    node->next->prev = node->prev;\n\n  if (node == head_) {\n    head_ = node->next;\n  } else {\n    // for non-head nodes, update prev->next to point to node->next\n    // (If node==head, prev is tail and should always point to NULL).\n    node->prev->next = node->next;\n    if (node == head_->prev)  // tail\n      head_->prev = node->prev;\n  }\n\n  /* Update len first, so in CompressByDepth we know exactly len */\n  len_--;\n  count_ -= node->count;\n  malloc_size_ -= node->sz;\n  if (node->offloaded) {\n    num_offloaded_nodes_--;\n  }\n\n  /* If we deleted a node within our compress depth, we\n   * now have compressed nodes needing to be decompressed. */\n  CompressByDepth(NULL);\n\n  zfree(node->entry);\n  zfree(node);\n}\n\n/* Delete one entry from list given the node for the entry and a pointer\n * to the entry in the node.\n *\n * Note: DelPackedIndex() *requires* uncompressed nodes because you\n *       already had to get *p from an uncompressed node somewhere.\n *\n * Returns true if the entire node was deleted, false if node still exists.\n * Also updates in/out param 'p' with the next offset in the listpack. */\nbool QList::DelPackedIndex(Node* node, uint8_t* p) {\n  DCHECK(!QL_NODE_IS_PLAIN(node));\n\n  if (node->count == 1) {\n    DelNode(node);\n    return true;\n  }\n\n  malloc_size_ += NodeSetEntry(node, lpDelete(node->entry, p, NULL));\n  node->count--;\n  count_--;\n\n  return false;\n}\n\nvoid QList::OffloadNode(Node* node) {\n  DCHECK(tiering_params_ && node->offloaded == 0);\n  num_offloaded_nodes_++;\n  stats.offload_requests++;\n  node->offloaded = 1;\n}\n\nvoid QList::InitIteratorEntry(Iterator* it) const {\n  DCHECK(it->current_);\n  const_cast<QList*>(this)->AccessForReads(true, it->current_);\n  if (QL_NODE_IS_PLAIN(it->current_)) {\n    it->zi_ = it->current_->entry;\n  } else {\n    it->zi_ = lpSeek(it->current_->entry, it->offset_);\n  }\n}\n\nauto QList::GetIterator(Where where) const -> Iterator {\n  Iterator it;\n  it.owner_ = this;\n  it.zi_ = NULL;\n  if (where == HEAD) {\n    it.current_ = head_;\n    it.offset_ = 0;\n    it.direction_ = FWD;\n    it.node_id_ = 0;\n  } else {\n    it.current_ = _Tail();\n    it.offset_ = -1;\n    it.direction_ = REV;\n    it.node_id_ = len_ - 1;\n  }\n\n  if (it.current_) {\n    InitIteratorEntry(&it);\n  }\n\n  return it;\n}\n\nauto QList::GetIterator(long idx) const -> Iterator {\n  unsigned long long accum = 0;\n  int forward = idx < 0 ? 0 : 1; /* < 0 -> reverse, 0+ -> forward */\n  uint64_t index = forward ? idx : (-idx) - 1;\n  if (index >= count_)\n    return {};\n\n  DCHECK(head_);\n\n  /* Seek in the other direction if that way is shorter. */\n  int seek_forward = forward;\n  unsigned long long seek_index = index;\n  if (index > (count_ - 1) / 2) {\n    seek_forward = !forward;\n    seek_index = count_ - 1 - index;\n  }\n\n  Node* n = seek_forward ? head_ : head_->prev;\n  unsigned node_cnt = 0;\n  while (ABSL_PREDICT_TRUE(n)) {\n    if ((accum + n->count) > seek_index) {\n      break;\n    } else {\n      accum += n->count;\n      n = seek_forward ? n->next : n->prev;\n      node_cnt++;\n    }\n  }\n  DCHECK(n);\n  if (!n)\n    return {};\n\n  /* Fix accum so it looks like we seeked in the other direction. */\n  if (seek_forward != forward)\n    accum = count_ - n->count - accum;\n\n  Iterator iter;\n  iter.owner_ = this;\n  iter.direction_ = forward ? FWD : REV;\n  iter.current_ = n;\n  iter.node_id_ = seek_forward ? node_cnt : (len_ - 1 - node_cnt);\n  if (forward) {\n    /* forward = normal head-to-tail offset. */\n    iter.offset_ = index - accum;\n  } else {\n    /* reverse = need negative offset for tail-to-head, so undo\n     * the result of the original index = (-idx) - 1 above. */\n    iter.offset_ = (-index) - 1 + accum;\n  }\n\n  InitIteratorEntry(&iter);\n\n  return iter;\n}\n\nauto QList::Erase(Iterator it) -> Iterator {\n  DCHECK(it.current_);\n\n  Node* node = it.current_;\n  Node* prev = node->prev;\n  Node* next = node->next;\n\n  bool deleted_node = false;\n  if (QL_NODE_IS_PLAIN(node)) {\n    DelNode(node);\n    deleted_node = true;\n  } else {\n    deleted_node = DelPackedIndex(node, it.zi_);\n  }\n\n  it.zi_ = NULL;  // Reset current entry pointer\n\n  // If current node is deleted, we must update iterator node and offset.\n  if (deleted_node) {\n    if (it.direction_ == FWD) {\n      it.current_ = next;\n      it.offset_ = 0;\n      it.node_id_++;\n    } else if (it.direction_ == REV) {\n      it.current_ = len_ ? prev : nullptr;\n      it.offset_ = -1;\n      it.node_id_ = it.node_id_ ? it.node_id_ - 1 : len_ - 1;\n    }\n  }\n\n  if (it.current_) {\n    InitIteratorEntry(&it);\n  }\n\n  // Sanity, should be noop in release mode.\n  if (len_ == 1) {\n    DCHECK_EQ(count_, head_->count);\n    DCHECK_EQ(malloc_size_, head_->sz);\n  }\n\n  /* else if (!deleted_node), no changes needed.\n   * we already reset iter->zi above, and the existing iter->offset\n   * doesn't move again because:\n   *   - [1, 2, 3] => delete offset 1 => [1, 3]: next element still offset 1\n   *   - [1, 2, 3] => delete offset 0 => [2, 3]: next element still offset 0\n   *  if we deleted the last element at offset N and now\n   *  length of this listpack is N-1, the next call into\n   *  quicklistNext() will jump to the next node. */\n  return it;\n}\n\nbool QList::Erase(const long start, unsigned count) {\n  if (count == 0)\n    return false;\n\n  unsigned extent = count; /* range is inclusive of start position */\n\n  if (start >= 0 && extent > (count_ - start)) {\n    /* if requesting delete more elements than exist, limit to list size. */\n    extent = count_ - start;\n  } else if (start < 0 && extent > (unsigned long)(-start)) {\n    /* else, if at negative offset, limit max size to rest of list. */\n    extent = -start; /* c.f. LREM -29 29; just delete until end. */\n  }\n\n  Iterator it = GetIterator(start);\n  Node* node = it.current_;\n  long offset = it.offset_;\n\n  /* iterate over next nodes until everything is deleted. */\n  while (extent) {\n    Node* next = node->next;\n\n    unsigned long del;\n    int delete_entire_node = 0;\n    if (offset == 0 && extent >= node->count) {\n      /* If we are deleting more than the count of this node, we\n       * can just delete the entire node without listpack math. */\n      delete_entire_node = 1;\n      del = node->count;\n    } else if (offset >= 0 && extent + offset >= node->count) {\n      /* If deleting more nodes after this one, calculate delete based\n       * on size of current node. */\n      del = node->count - offset;\n    } else if (offset < 0) {\n      /* If offset is negative, we are in the first run of this loop\n       * and we are deleting the entire range\n       * from this start offset to end of list.  Since the Negative\n       * offset is the number of elements until the tail of the list,\n       * just use it directly as the deletion count. */\n      del = -offset;\n\n      /* If the positive offset is greater than the remaining extent,\n       * we only delete the remaining extent, not the entire offset.\n       */\n      if (del > extent)\n        del = extent;\n    } else {\n      /* else, we are deleting less than the extent of this node, so\n       * use extent directly. */\n      del = extent;\n    }\n\n    if (delete_entire_node || QL_NODE_IS_PLAIN(node)) {\n      DelNode(node);\n    } else {\n      AccessForReads(true, node);\n      malloc_size_ += NodeSetEntry(node, lpDeleteRange(node->entry, offset, del));\n      node->count -= del;\n      count_ -= del;\n      if (node->count == 0) {\n        DelNode(node);\n      } else {\n        malloc_size_ += RecompressOnly(node, compr_method_);\n      }\n    }\n\n    extent -= del;\n    node = next;\n    offset = 0;\n  }\n  return true;\n}\n\nuint8_t* QList::TryExtractListpack() {\n  if (len_ != 1 || QL_NODE_IS_PLAIN(head_) || !ShouldStoreAsListPack(head_->sz) ||\n      head_->IsCompressed()) {\n    return nullptr;\n  }\n\n  uint8_t* res = std::exchange(head_->entry, nullptr);\n  DelNode(head_);\n\n  return res;\n}\n\nbool QList::Iterator::Next() {\n  if (!current_)\n    return false;\n\n  int plain = QL_NODE_IS_PLAIN(current_);\n\n  // Advance to the next element in the current node.\n  if (ABSL_PREDICT_FALSE(plain)) {\n    zi_ = NULL;\n  } else {\n    unsigned char* (*nextFn)(unsigned char*, unsigned char*) = lpNext;\n    int offset_update = 1;\n\n    if (direction_ == REV) {\n      DCHECK_EQ(REV, direction_);\n      nextFn = lpPrev;\n      offset_update = -1;\n    }\n    zi_ = nextFn(current_->entry, zi_);\n    offset_ += offset_update;\n  }\n\n  if (zi_)\n    return true;\n\n  // Move to the next node.\n  const_cast<QList*>(owner_)->CompressByDepth(current_);\n\n  if (direction_ == FWD) {\n    /* Forward traversal, Jumping to start of next node */\n    current_ = current_->next;\n    offset_ = 0;\n    node_id_++;\n  } else {\n    /* Reverse traversal, Jumping to end of previous node */\n    DCHECK_EQ(REV, direction_);\n    offset_ = -1;\n    current_ = (current_ == owner_->head_) ? nullptr : current_->prev;\n    node_id_--;\n  }\n\n  if (!current_)\n    return false;\n\n  owner_->InitIteratorEntry(this);\n  return zi_ != nullptr;\n}\n\nauto QList::Iterator::Get() const -> Entry {\n  int plain = QL_NODE_IS_PLAIN(current_);\n  if (ABSL_PREDICT_FALSE(plain)) {\n    char* str = reinterpret_cast<char*>(current_->entry);\n    return Entry(str, current_->sz);\n  }\n\n  DCHECK(zi_);\n\n  /* Populate value from existing listpack position */\n  unsigned int sz = 0;\n  long long val;\n  uint8_t* ptr = lpGetValue(zi_, &sz, &val);\n\n  return ptr ? Entry(reinterpret_cast<char*>(ptr), sz) : Entry(val);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/qlist.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/functional/function_ref.h>\n\n#include <cstdint>\n#include <memory>\n#include <string>\n\n#include \"core/collection_entry.h\"\n\n#define QL_COMP_BITS 16\n#define QL_BM_BITS 4\n\n/* quicklist node encodings */\n#define QUICKLIST_NODE_ENCODING_RAW 1\n#define QUICKLIST_NODE_ENCODING_LZF 2\n#define QLIST_NODE_ENCODING_LZ4 3\n\n/* quicklist node container formats */\n#define QUICKLIST_NODE_CONTAINER_PLAIN 1\n#define QUICKLIST_NODE_CONTAINER_PACKED 2\n\nnamespace dfly {\n\nclass PageUsage;\n\n// Heuristic: for values smaller than 2 KiB we prefer the compact listpack\n// representation. 2048 was chosen as a conservative threshold that matches\n// common quicklist usage patterns and avoids creating very large listpacks\n// that are costly to reallocate or compress.\ninline bool ShouldStoreAsListPack(size_t size) {\n  return size < 2048;\n}\n\nclass QList {\n public:\n  enum Where : uint8_t { TAIL, HEAD };\n  enum COMPR_METHOD : uint8_t { LZF = 0, LZ4 = 1 };\n\n  /* Node is a 40 byte struct describing a listpack for a quicklist.\n   * We use bit fields keep the Node at 40 bytes.\n   * count: 16 bits, max 65536 (max lp bytes is 65k, so max count actually < 32k).\n   * encoding: 2 bits, RAW=1, LZF=2.\n   * container: 2 bits, PLAIN=1 (a single item as char array), PACKED=2 (listpack with multiple\n   * items). recompress: 1 bit, bool, true if node is temporary decompressed for usage.\n   * attempted_compress: 1 bit, boolean, used for verifying during testing.\n   * dont_compress: 1 bit, boolean, used for preventing compression of entry.\n   * */\n\n  struct Node {\n    Node* prev;\n    Node* next;\n    unsigned char* entry;\n    size_t sz : 48;    /* entry size in bytes */\n    size_t count : 16; /* count of items in listpack */\n\n    uint16_t encoding : 2;           /* RAW==1 or LZF==2 */\n    uint16_t container : 2;          /* PLAIN==1 or PACKED==2 */\n    uint16_t recompress : 1;         /* was this node previous compressed? */\n    uint16_t attempted_compress : 1; /* node can't compress; too small */\n    uint16_t dont_compress : 1;      /* prevent compression of entry that will be used later */\n    uint16_t offloaded : 1;          /* node is offloaded to colder storage */\n    uint16_t reserved1 : 8;          /* reserved for future use */\n\n    uint16_t reserved2; /* more bits to steal for future usage */\n    uint32_t reserved3; /* more bits to steal for future usage */\n\n    bool IsCompressed() const {\n      return encoding != QUICKLIST_NODE_ENCODING_RAW;\n    }\n\n    size_t GetLZF(void** data) const;\n  };\n\n  using Entry = CollectionEntry;\n  class Iterator {\n   public:\n    // Returns true if the iterator is valid (points to an element).\n    bool Valid() const {\n      return zi_ != nullptr;\n    }\n\n    Entry Get() const;\n\n    // Advances to the next/prev element. Returns false if no more entries.\n    bool Next();\n\n   private:\n    const QList* owner_ = nullptr;\n    Node* current_ = nullptr;\n    unsigned char* zi_ = nullptr; /* points to the current element */\n    int32_t offset_ = 0;          /* offset in current listpack */\n    int32_t node_id_ = 0;         /* node index in the list, 0 is head */\n    uint8_t direction_ = 1;\n\n    friend class QList;\n  };\n\n  using IterateFunc = absl::FunctionRef<bool(Entry)>;\n  enum InsertOpt : uint8_t { BEFORE, AFTER };\n\n  struct TieringParams {\n    // TODO: hook functions and params that allow qlist offloading nodes to colder storage.\n    uint32_t node_depth_threshold = 2;\n  };\n\n  /**\n   * fill: The number of entries allowed per internal list node can be specified\n   * as a fixed maximum size or a maximum number of elements.\n   * For a fixed maximum size, use -5 through -1, meaning:\n   * -5: max size: 64 Kb  <-- not recommended for normal workloads\n   * -4: max size: 32 Kb  <-- not recommended\n   * -3: max size: 16 Kb  <-- probably not recommended\n   * -2: max size: 8 Kb   <-- good\n   * -1: max size: 4 Kb   <-- good\n   * Positive numbers mean store up to _exactly_ that number of elements\n   * per list node.\n   * The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size),\n   * but if your use case is unique, adjust the settings as necessary.\n   *\n   *\n   * Lists may also be compressed.\n   * \"compress\" is the number of quicklist listpack nodes from *each* side of\n   * the list to *exclude* from compression.  The head and tail of the list\n   * are always uncompressed for fast push/pop operations.  Settings are:\n   * 0: disable all list compression\n   * 1: depth 1 means \"don't start compressing until after 1 node into the list,\n   *    going from either the head or tail\"\n   *    So: [head]->node->node->...->node->[tail]\n   *    [head], [tail] will always be uncompressed; inner nodes will compress.\n   * 2: [head]->[next]->node->node->...->node->[prev]->[tail]\n   *    2 here means: don't compress head or head->next or tail->prev or tail,\n   *    but compress all nodes between them.\n   * 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail]\n   * etc.\n   *\n   */\n  explicit QList(int fill = -2, int compress = 0);\n\n  QList(QList&&) noexcept;\n  QList(const QList&) = delete;\n  ~QList();\n\n  QList& operator=(const QList&) = delete;\n  QList& operator=(QList&&) noexcept;\n\n  size_t Size() const {\n    return count_;\n  }\n\n  void Clear() noexcept;\n\n  void Push(std::string_view value, Where where);\n\n  // Returns the popped value. Precondition: list is not empty.\n  std::string Pop(Where where);\n\n  void AppendListpack(uint8_t* zl);\n  void AppendPlain(uint8_t* zl, size_t sz);\n\n  // Returns true if pivot found and elem inserted, false otherwise.\n  bool Insert(std::string_view pivot, std::string_view elem, InsertOpt opt);\n\n  void Insert(Iterator it, std::string_view elem, InsertOpt opt);\n\n  // Returns true if item was replaced, false if index is out of range.\n  bool Replace(long index, std::string_view elem);\n\n  size_t MallocUsed(bool slow) const;\n\n  // Iterates over entries from start to end (inclusive).\n  void Iterate(IterateFunc cb, long start, long end) const;\n\n  // Returns an iterator to tail or the head of the list.\n  // result.Valid() is true if the list is not empty.\n  Iterator GetIterator(Where where) const;\n\n  // Returns an iterator at a specific index 'idx',\n  // or Invalid iterator if index is out of range.\n  // negative index - means counting from the tail.\n  // result.Valid() is true if the index is within range.\n  Iterator GetIterator(long idx) const;\n\n  uint32_t node_count() const {\n    return len_;\n  }\n\n  unsigned compress_param() const {\n    return compress_;\n  }\n\n  Iterator Erase(Iterator it);\n\n  // Returns true if elements were deleted, false if list has not changed.\n  // Negative start index is allowed.\n  bool Erase(long start, unsigned count);\n\n  // Needed by tests and the rdb code.\n  const Node* Head() const {\n    return head_;\n  }\n\n  const Node* Tail() const {\n    return _Tail();\n  }\n\n  // Returns nullptr if quicklist does not fit the necessary requirements\n  // to be converted to listpack, and listpack otherwise. The ownership over the listpack\n  // blob is moved to the caller.\n  uint8_t* TryExtractListpack();\n\n  void set_fill(int fill) {\n    fill_ = fill;\n  }\n\n  void set_compr_method(COMPR_METHOD cm) {\n    compr_method_ = static_cast<unsigned>(cm);\n  }\n\n  static void SetPackedThreshold(unsigned threshold);\n\n  // Moves nodes away from underused pages by reallocating if the underlying page usage is low.\n  // Returns count of nodes reallocated to help in testing.\n  size_t DefragIfNeeded(PageUsage* page_usage);\n\n  void SetTieringParams(const TieringParams& params);\n\n  struct Stats {\n    uint64_t compression_attempts = 0;\n\n    // compression attempts with compression ratio that was not good enough to keep.\n    // Subset of compression_attempts.\n    uint64_t bad_compression_attempts = 0;\n\n    uint64_t decompression_calls = 0;\n\n    // How many bytes we currently keep compressed.\n    size_t compressed_bytes = 0;\n\n    // how many bytes we compressed from.\n    // Compressed savings are calculated as raw_compressed_bytes - compressed_bytes.\n    size_t raw_compressed_bytes = 0;\n    uint64_t interior_node_reads = 0;\n    uint64_t total_node_reads = 0;\n    uint64_t offload_requests = 0;\n    uint64_t onload_requests = 0;\n\n    Stats& operator+=(const Stats& other);\n  };\n  static __thread Stats stats;\n\n private:\n  bool AllowCompression() const {\n    return compress_ != 0;\n  }\n\n  Node* _Tail() const {\n    return head_ ? head_->prev : nullptr;\n  }\n\n  // Returns newly created plain node.\n  Node* InsertPlainNode(Node* old_node, std::string_view elem, uint32_t old_node_id,\n                        InsertOpt insert_opt);\n  void InsertNode(Node* old_node, Node* new_node, uint32_t old_node_id, InsertOpt insert_opt);\n\n  // Reduces the \"warmth\" of the node. Current implementation can decide on\n  // compressing the node based on its position in the list.\n  void CoolOff(Node* node, uint32_t node_id);\n\n  void Replace(Iterator it, std::string_view elem);\n  void CompressByDepth(Node* node);\n\n  // Prepares the node for read access.\n  void AccessForReads(bool recompress, Node* node);\n\n  Node* MergeNodes(Node* node);\n\n  // Deletes one of the nodes and returns the other.\n  Node* ListpackMerge(Node* a, Node* b);\n\n  void DelNode(Node* node);\n  bool DelPackedIndex(Node* node, uint8_t* p);\n  void OffloadNode(Node* node);\n\n  // Initializes iterator's zi_ to point to the element at offset_.\n  // Decompresses the node if needed. Assumes current_ is not null.\n  void InitIteratorEntry(Iterator* it) const;\n\n  Node* head_ = nullptr;\n  size_t malloc_size_ = 0;    // size of the quicklist struct\n  uint32_t count_ = 0;        /* total count of all entries in all listpacks */\n  uint32_t len_ = 0;          /* number of quicklistNodes */\n  int16_t fill_;              /* fill factor for individual nodes */\n  int16_t compr_method_ : 2;  // 0 - lzf, 1 - lz4\n  int16_t reserved1_ : 14;\n  unsigned compress_ : QL_COMP_BITS; /* depth of end nodes not to compress;0=off */\n  unsigned bookmark_count_ : QL_BM_BITS;\n  unsigned reserved2_ : 12;\n  uint32_t num_offloaded_nodes_ = 0;\n  std::unique_ptr<TieringParams> tiering_params_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/qlist_test.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/qlist.h\"\n\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_format.h>\n#include <gmock/gmock.h>\n#include <mimalloc.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/mi_memory_resource.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"io/file.h\"\n#include \"io/line_reader.h\"\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n#include \"redis/zmalloc.h\"\n}\n\n/* quicklist compression disable */\n#define QUICKLIST_NOCOMPRESS 0\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace testing;\nusing absl::StrCat;\n\nstatic int ql_verify_compress(const QList& ql) {\n  int errors = 0;\n  unsigned compress_param = ql.compress_param();\n  if (compress_param > 0) {\n    const auto* node = ql.Head();\n    unsigned int low_raw = compress_param;\n    unsigned int high_raw = ql.node_count() - compress_param;\n\n    for (unsigned int at = 0; at < ql.node_count(); at++, node = node->next) {\n      if (node && (at < low_raw || at >= high_raw)) {\n        if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {\n          LOG(ERROR) << \"Incorrect compression: node \" << at << \" is compressed at depth \"\n                     << compress_param << \" ((\" << low_raw << \",\" << high_raw\n                     << \" total nodes: \" << ql.node_count() << \"; size: \" << node->sz\n                     << \"; recompress: \" << node->recompress;\n          errors++;\n        }\n      } else {\n        if (node->encoding != QUICKLIST_NODE_ENCODING_LZF && !node->attempted_compress) {\n          LOG(ERROR) << absl::StrFormat(\n              \"Incorrect non-compression: node %d is NOT \"\n              \"compressed at depth %d ((%u, %u); total \"\n              \"nodes: %lu; size: %zu; recompress: %d; attempted: %d)\",\n              at, compress_param, low_raw, high_raw, ql.node_count(), node->sz, node->recompress,\n              node->attempted_compress);\n          errors++;\n        }\n      }\n    }\n  }\n  return errors;\n}\n\n/* Verify list metadata matches physical list contents. */\nstatic int ql_verify(const QList& ql, uint32_t nc, uint32_t count, uint32_t head_count,\n                     uint32_t tail_count) {\n  int errors = 0;\n\n  if (nc != ql.node_count()) {\n    LOG(ERROR) << \"quicklist length wrong: expected \" << nc << \" got \" << ql.node_count();\n    errors++;\n  }\n\n  if (count != ql.Size()) {\n    LOG(ERROR) << \"quicklist count wrong: expected \" << count << \" got \" << ql.Size();\n    errors++;\n  }\n\n  auto* node = ql.Head();\n  size_t node_size = 0;\n  while (node) {\n    node_size += node->count;\n    node = node->next;\n    CHECK(node != ql.Head());\n  }\n\n  if (node_size != ql.Size()) {\n    LOG(ERROR) << \"quicklist cached count not match actual count: expected \" << ql.Size() << \" got \"\n               << node_size;\n    errors++;\n  }\n\n  node = ql.Tail();\n  node_size = 0;\n  while (node) {\n    node_size += node->count;\n    node = (node == ql.Head()) ? nullptr : node->prev;\n  }\n  if (node_size != ql.Size()) {\n    LOG(ERROR) << \"has different forward count than reverse count!  \"\n                  \"Forward count is \"\n               << ql.Size() << \", reverse count is \" << node_size;\n    errors++;\n  }\n\n  if (ql.node_count() == 0 && errors == 0) {\n    return 0;\n  }\n\n  if (head_count != ql.Head()->count && head_count != lpLength(ql.Head()->entry)) {\n    LOG(ERROR) << absl::StrFormat(\"head count wrong: expected %u got cached %u vs. actual %lu\",\n                                  head_count, ql.Head()->count, lpLength(ql.Head()->entry));\n    errors++;\n  }\n\n  if (tail_count != ql.Tail()->count && tail_count != lpLength(ql.Tail()->entry)) {\n    LOG(ERROR) << \"tail count wrong: expected \" << tail_count << \"got cached \" << ql.Tail()->count\n               << \" vs. actual \" << lpLength(ql.Tail()->entry);\n    errors++;\n  }\n\n  errors += ql_verify_compress(ql);\n  return errors;\n}\n\nstatic void SetupMalloc() {\n  // configure redis lib zmalloc which requires mimalloc heap to work.\n  auto* tlh = mi_heap_get_backing();\n  init_zmalloc_threadlocal(tlh);\n  mi_option_set(mi_option_purge_delay, -1);  // disable purging of segments (affects benchmarks)\n}\n\nclass QListTest : public ::testing::Test {\n protected:\n  QListTest() : mr_(mi_heap_get_backing()) {\n  }\n\n  static void SetUpTestSuite() {\n    SetupMalloc();\n  }\n\n  static void TearDownTestSuite() {\n    mi_heap_collect(mi_heap_get_backing(), true);\n\n    auto cb_visit = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,\n                       size_t block_size, void* arg) {\n      LOG(ERROR) << \"Unfreed allocations: block_size \" << block_size\n                 << \", allocated: \" << area->used * block_size;\n      return true;\n    };\n\n    mi_heap_visit_blocks(mi_heap_get_backing(), false /* do not visit all blocks*/, cb_visit,\n                         nullptr);\n  }\n\n  vector<string> ToItems() const;\n\n  MiMemoryResource mr_;\n  QList ql_;\n};\n\nvector<string> QListTest::ToItems() const {\n  vector<string> res;\n  auto cb = [&](const QList::Entry& e) {\n    res.push_back(e.to_string());\n    return true;\n  };\n\n  ql_.Iterate(cb, 0, ql_.Size());\n  return res;\n}\n\nTEST_F(QListTest, Basic) {\n  EXPECT_EQ(0, ql_.Size());\n  ql_.Push(\"abc\", QList::HEAD);\n  EXPECT_EQ(1, ql_.Size());\n  EXPECT_TRUE(ql_.Tail() == ql_.Head());\n  EXPECT_LE(ql_.MallocUsed(false), ql_.MallocUsed(true));\n\n  auto it = ql_.GetIterator(QList::HEAD);\n  ASSERT_TRUE(it.Valid());  // Iterator is valid immediately.\n\n  EXPECT_EQ(\"abc\", it.Get().view());\n\n  ASSERT_FALSE(it.Next());\n\n  ql_.Push(\"def\", QList::TAIL);\n  EXPECT_EQ(2, ql_.Size());\n  EXPECT_LE(ql_.MallocUsed(false), ql_.MallocUsed(true));\n\n  it = ql_.GetIterator(QList::TAIL);\n  ASSERT_TRUE(it.Valid());\n  EXPECT_EQ(\"def\", it.Get().view());\n\n  ASSERT_TRUE(it.Next());\n  EXPECT_EQ(\"abc\", it.Get().view());\n  ASSERT_FALSE(it.Next());\n\n  it = ql_.GetIterator(0);\n  ASSERT_TRUE(it.Valid());\n  EXPECT_EQ(\"abc\", it.Get().view());\n  it = ql_.GetIterator(-1);\n  ASSERT_TRUE(it.Valid());\n  EXPECT_EQ(\"def\", it.Get().view());\n\n  vector<string> items = ToItems();\n\n  EXPECT_THAT(items, ElementsAre(\"abc\", \"def\"));\n  EXPECT_GT(ql_.MallocUsed(false), ql_.MallocUsed(true) * 0.8);\n}\n\nTEST_F(QListTest, ListPack) {\n  string_view sv = \"abcded\"sv;\n  uint8_t* lp1 = lpPrepend(lpNew(0), (uint8_t*)sv.data(), sv.size());\n  uint8_t* lp2 = lpAppend(lpNew(0), (uint8_t*)sv.data(), sv.size());\n  ASSERT_EQ(lpBytes(lp1), lpBytes(lp2));\n  ASSERT_EQ(0, memcmp(lp1, lp2, lpBytes(lp1)));\n  lpFree(lp1);\n  lpFree(lp2);\n}\n\nTEST_F(QListTest, InsertDelete) {\n  EXPECT_FALSE(ql_.Insert(\"abc\", \"def\", QList::BEFORE));\n  ql_.Push(\"abc\", QList::HEAD);\n  EXPECT_TRUE(ql_.Insert(\"abc\", \"def\", QList::BEFORE));\n  auto items = ToItems();\n  EXPECT_THAT(items, ElementsAre(\"def\", \"abc\"));\n  EXPECT_TRUE(ql_.Insert(\"abc\", \"123456\", QList::AFTER));\n  items = ToItems();\n  EXPECT_THAT(items, ElementsAre(\"def\", \"abc\", \"123456\"));\n\n  auto it = ql_.GetIterator(QList::HEAD);\n  ASSERT_TRUE(it.Valid());\n\n  // Erase the items one by one.\n  it = ql_.Erase(it);\n  items = ToItems();\n  EXPECT_THAT(items, ElementsAre(\"abc\", \"123456\"));\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"abc\", it.Get().view());\n\n  it = ql_.Erase(it);\n  items = ToItems();\n  EXPECT_THAT(items, ElementsAre(\"123456\"));\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(123456, it.Get().ival());\n\n  it = ql_.Erase(it);\n  items = ToItems();\n  EXPECT_THAT(items, ElementsAre());\n  ASSERT_FALSE(it.Valid());\n  EXPECT_EQ(0, ql_.Size());\n}\n\nTEST_F(QListTest, EraseLastElementInNodeAdvancesToNextNode) {\n  // Regression test for iterator semantics: when erasing the last element\n  // within a multi-entry node and another node follows, the iterator should\n  // correctly advance to the first element of the next node.\n\n  // Create a QList with fill=2 to ensure max 2 elements per node\n  ql_ = QList(2, QUICKLIST_NOCOMPRESS);\n\n  // Push 3 elements: this creates 2 nodes (first with 2 elements, second with 1)\n  ql_.Push(\"first\", QList::HEAD);   // Will be at index 2 after all pushes\n  ql_.Push(\"second\", QList::HEAD);  // Will be at index 1 after all pushes\n  ql_.Push(\"third\", QList::HEAD);   // Will be at index 0 after all pushes\n\n  // Verify we have 2 nodes as expected\n  ASSERT_EQ(2, ql_.node_count());\n  ASSERT_EQ(3, ql_.Size());\n\n  // Node structure should be:\n  // Node 1: [\"third\", \"second\"]\n  // Node 2: [\"first\"]\n\n  auto items = ToItems();\n  EXPECT_THAT(items, ElementsAre(\"third\", \"second\", \"first\"));\n\n  // Get iterator to \"second\" (last element in first node)\n  auto it = ql_.GetIterator(1);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"second\", it.Get().view());\n\n  // Erase \"second\" - this is the last element in the first node\n  it = ql_.Erase(it);\n\n  // Iterator should now point to \"first\" (first element of the second node)\n  ASSERT_TRUE(it.Valid());\n  EXPECT_EQ(\"first\", it.Get().view());\n\n  // Verify the list is correct\n  items = ToItems();\n  EXPECT_THAT(items, ElementsAre(\"third\", \"first\"));\n  EXPECT_EQ(2, ql_.Size());\n}\n\nTEST_F(QListTest, PushPlain) {\n  // push a value large enough to trigger plain node insertion.\n  string val(9000, 'a');\n  ql_.Push(val, QList::HEAD);\n  auto items = ToItems();\n  EXPECT_THAT(items, ElementsAre(val));\n}\n\nTEST_F(QListTest, GetNum) {\n  ql_.Push(\"1251977\", QList::HEAD);\n  QList::Iterator it = ql_.GetIterator(QList::HEAD);\n  ASSERT_TRUE(it.Valid());\n  EXPECT_EQ(1251977, it.Get().ival());\n}\n\nTEST_F(QListTest, CompressionPlain) {\n  char buf[256];\n  QList::SetPackedThreshold(1);\n  ql_ = QList(-2, 1);\n\n  for (int i = 0; i < 500; i++) {\n    /* Set to 256 to allow the node to be triggered to compress,\n     * if it is less than 48(nocompress), the test will be successful. */\n    snprintf(buf, sizeof(buf), \"hello%d\", i);\n    ql_.Push(string_view{buf, sizeof(buf)}, QList::HEAD);\n  }\n  QList::SetPackedThreshold(0);\n\n  QList::Iterator it = ql_.GetIterator(QList::TAIL);\n  int i = 0;\n  ASSERT_TRUE(it.Valid());\n  do {\n    string_view sv = it.Get().view();\n    ASSERT_EQ(sizeof(buf), sv.size());\n    ASSERT_TRUE(absl::StartsWith(sv, StrCat(\"hello\", i)));\n    i++;\n  } while (it.Next());\n  EXPECT_EQ(500, i);\n}\n\nTEST_F(QListTest, LargeValues) {\n  string val(100000, 'a');\n  ql_.Push(val, QList::HEAD);\n  ql_.Push(val, QList::HEAD);\n  ql_.Pop(QList::HEAD);\n  auto items = ToItems();\n  EXPECT_THAT(items, ElementsAre(val));\n}\n\nTEST_F(QListTest, RemoveListpack) {\n  ql_.Push(\"ABC\", QList::TAIL);\n  ql_.Push(\"DEF\", QList::TAIL);\n  auto it = ql_.GetIterator(QList::TAIL);\n  ASSERT_TRUE(it.Valid());  // Iterator is valid immediately.\n  ql_.Erase(it);\n  it = ql_.GetIterator(QList::TAIL);\n  ASSERT_TRUE(it.Valid());\n  it = ql_.Erase(it);\n  ASSERT_FALSE(it.Valid());\n}\n\nTEST_F(QListTest, DefragListpackRaw) {\n  PageUsage page_usage{CollectPageStats::YES, 100.0};\n  page_usage.SetForceReallocate(true);\n\n  ql_.Push(\"first\", QList::TAIL);\n  ql_.Push(\"second\", QList::TAIL);\n\n  ASSERT_EQ(ql_.DefragIfNeeded(&page_usage), 1);\n  EXPECT_THAT(ToItems(), ElementsAre(\"first\", \"second\"));\n  ql_.Clear();\n}\n\nTEST_F(QListTest, DefragPlainTextRaw) {\n  PageUsage page_usage{CollectPageStats::YES, 100.0};\n  page_usage.SetForceReallocate(true);\n  string big(100000, 'x');\n  ql_.Push(big, QList::HEAD);\n  ASSERT_EQ(ql_.DefragIfNeeded(&page_usage), 1);\n  EXPECT_THAT(ToItems(), ElementsAre(big));\n  ql_.Clear();\n}\n\nTEST_F(QListTest, DefragmentListpackCompressed) {\n  PageUsage page_usage{CollectPageStats::YES, 100.0};\n  page_usage.SetForceReallocate(true);\n\n  // MIN_COMPRESS_BYTES = 256\n  char buf[256];\n  constexpr auto items_per_list = 4;\n  constexpr auto total_items = 20;\n  ql_ = QList{items_per_list, 1};\n\n  for (auto i = 0; i < total_items; ++i) {\n    absl::SNPrintF(buf, 256, \"test__%d\", i);\n    ql_.Push(string_view{buf, 256}, QList::TAIL);\n  }\n\n  ASSERT_EQ(total_items / items_per_list, ql_.DefragIfNeeded(&page_usage));\n\n  auto i = 0;\n  auto it = ql_.GetIterator(QList::HEAD);\n  ASSERT_TRUE(it.Valid());\n  do {\n    auto v = it.Get().view();\n    ASSERT_EQ(v.size(), 256);\n    ASSERT_TRUE(absl::StartsWith(v, StrCat(\"test__\", i)));\n    ++i;\n  } while (it.Next());\n  ASSERT_EQ(i, total_items);\n}\n\nTEST_F(QListTest, Tiering) {\n  QList::stats.offload_requests = 0;\n  ql_.SetTieringParams(QList::TieringParams{.node_depth_threshold = 1});\n  for (int i = 0; i < 8000; i++) {\n    ql_.Push(absl::StrCat(\"value\", i), QList::TAIL);\n  }\n  EXPECT_EQ(QList::stats.offload_requests, 9);\n}\n\nusing FillCompress = tuple<int, unsigned, QList::COMPR_METHOD>;\n\nclass PrintToFillCompress {\n public:\n  std::string operator()(const TestParamInfo<FillCompress>& info) const {\n    int fill = get<0>(info.param);\n    int compress = get<1>(info.param);\n    QList::COMPR_METHOD method = get<2>(info.param);\n    string fill_str = fill >= 0 ? absl::StrCat(\"f\", fill) : absl::StrCat(\"fminus\", -fill);\n    string method_str = method == QList::LZF ? \"lzf\" : \"lz4\";\n    return absl::StrCat(fill_str, \"compr\", compress, method_str);\n  }\n};\n\nclass OptionsTest : public QListTest, public WithParamInterface<FillCompress> {};\n\nINSTANTIATE_TEST_SUITE_P(Matrix, OptionsTest,\n                         Combine(Values(-5, -4, -3, -2, -1, 0, 1, 2, 32, 66, 128, 999),\n                                 Values(0, 1, 2, 3, 4, 5, 6, 10), Values(QList::LZF, QList::LZ4)),\n                         PrintToFillCompress());\n\nTEST_P(OptionsTest, Numbers) {\n  auto [fill, compress, method] = GetParam();\n  ql_ = QList(fill, compress);\n  ql_.set_compr_method(method);\n  array<int64_t, 5000> nums;\n\n  for (unsigned i = 0; i < nums.size(); i++) {\n    nums[i] = -5157318210846258176 + i;\n    string val = absl::StrCat(nums[i]);\n    ql_.Push(val, QList::TAIL);\n  }\n  ql_.Push(\"xxxxxxxxxxxxxxxxxxxx\", QList::TAIL);\n\n  for (unsigned i = 0; i < nums.size(); i++) {\n    auto it = ql_.GetIterator(i);\n    ASSERT_TRUE(it.Valid());\n    ASSERT_EQ(nums[i], it.Get().ival()) << i;\n  }\n\n  auto it = ql_.GetIterator(nums.size());\n  ASSERT_TRUE(it.Valid());\n  EXPECT_EQ(\"xxxxxxxxxxxxxxxxxxxx\", it.Get().view());\n}\n\nTEST_P(OptionsTest, NumbersIndex) {\n  auto [fill, compress, method] = GetParam();\n  ql_ = QList(fill, compress);\n  ql_.set_compr_method(method);\n\n  long long nums[5000];\n  for (int i = 0; i < 760; i++) {\n    nums[i] = -5157318210846258176 + i;\n    ql_.Push(absl::StrCat(nums[i]), QList::TAIL);\n  }\n\n  unsigned i = 437;\n  QList::Iterator it = ql_.GetIterator(i);\n  ASSERT_TRUE(it.Valid());\n  do {\n    ASSERT_EQ(nums[i], it.Get().ival());\n    i++;\n  } while (it.Next());\n  ASSERT_EQ(760, i);\n}\n\nTEST_P(OptionsTest, DelRangeA) {\n  auto [fill, compress, method] = GetParam();\n  ql_ = QList(fill, compress);\n  ql_.set_compr_method(method);\n  long long nums[5000];\n  for (int i = 0; i < 33; i++) {\n    nums[i] = -5157318210846258176 + i;\n    ql_.Push(absl::StrCat(nums[i]), QList::TAIL);\n  }\n\n  if (fill == 32) {\n    ASSERT_EQ(0, ql_verify(ql_, 2, 33, 32, 1));\n  }\n\n  /* ltrim 3 3 (keep [3,3] inclusive = 1 remaining) */\n  ql_.Erase(0, 3);\n  ql_.Erase(-29, 4000); /* make sure not loop forever */\n  if (fill == 32) {\n    ASSERT_EQ(0, ql_verify(ql_, 1, 1, 1, 1));\n  }\n  auto it = ql_.GetIterator(0);\n  ASSERT_TRUE(it.Valid());\n  EXPECT_EQ(-5157318210846258173, it.Get().ival());\n}\n\nTEST_P(OptionsTest, DelRangeB) {\n  auto [fill, _, method] = GetParam();\n  ql_ = QList(fill, QUICKLIST_NOCOMPRESS);  // ignore compress parameter\n  ql_.set_compr_method(method);\n\n  long long nums[5000];\n  for (int i = 0; i < 33; i++) {\n    nums[i] = i;\n    ql_.Push(absl::StrCat(nums[i]), QList::TAIL);\n  }\n  if (fill == 32) {\n    ASSERT_EQ(0, ql_verify(ql_, 2, 33, 32, 1));\n  }\n  /* ltrim 5 16 (keep [5,16] inclusive = 12 remaining) */\n  ql_.Erase(0, 5);\n  ql_.Erase(-16, 16);\n  if (fill == 32) {\n    ASSERT_EQ(0, ql_verify(ql_, 1, 12, 12, 12));\n  }\n\n  auto it = ql_.GetIterator(0);\n  ASSERT_TRUE(it.Valid());\n  EXPECT_EQ(5, it.Get().ival());\n\n  it = ql_.GetIterator(-1);\n  ASSERT_TRUE(it.Valid());\n  EXPECT_EQ(16, it.Get().ival());\n\n  ql_.Push(\"bobobob\", QList::TAIL);\n  it = ql_.GetIterator(-1);\n  ASSERT_TRUE(it.Valid());\n  EXPECT_EQ(\"bobobob\", it.Get().view());\n\n  for (int i = 0; i < 12; i++) {\n    it = ql_.GetIterator(i);\n    ASSERT_TRUE(it.Valid());\n    EXPECT_EQ(i + 5, it.Get().ival());\n  }\n}\n\nTEST_P(OptionsTest, DelRangeC) {\n  auto [fill, compress, method] = GetParam();\n  ql_ = QList(fill, compress);\n  ql_.set_compr_method(method);\n\n  long long nums[5000];\n  for (int i = 0; i < 33; i++) {\n    nums[i] = -5157318210846258176 + i;\n    ql_.Push(absl::StrCat(nums[i]), QList::TAIL);\n  }\n  if (fill == 32) {\n    ASSERT_EQ(0, ql_verify(ql_, 2, 33, 32, 1));\n  }\n\n  /* ltrim 3 3 (keep [3,3] inclusive = 1 remaining) */\n  ql_.Erase(0, 3);\n  ql_.Erase(-29, 4000); /* make sure not loop forever */\n  if (fill == 32) {\n    ASSERT_EQ(0, ql_verify(ql_, 1, 1, 1, 1));\n  }\n  auto it = ql_.GetIterator(0);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(-5157318210846258173, it.Get().ival());\n}\n\nTEST_P(OptionsTest, DelRangeD) {\n  auto [fill, compress, method] = GetParam();\n  ql_ = QList(fill, compress);\n  ql_.set_compr_method(method);\n\n  long long nums[5000];\n  for (int i = 0; i < 33; i++) {\n    nums[i] = -5157318210846258176 + i;\n    ql_.Push(absl::StrCat(nums[i]), QList::TAIL);\n  }\n  if (fill == 32) {\n    ASSERT_EQ(0, ql_verify(ql_, 2, 33, 32, 1));\n  }\n  ql_.Erase(-12, 3);\n\n  ASSERT_EQ(30, ql_.Size());\n}\n\nTEST_P(OptionsTest, DelRangeNode) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(-2, compress);\n  ql_.set_compr_method(method);\n\n  for (int i = 0; i < 32; i++)\n    ql_.Push(StrCat(\"hello\", i), QList::HEAD);\n\n  ASSERT_EQ(0, ql_verify(ql_, 1, 32, 32, 32));\n  ql_.Erase(0, 32);\n  ASSERT_EQ(0, ql_verify(ql_, 0, 0, 0, 0));\n}\n\nTEST_P(OptionsTest, DelRangeNodeOverflow) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(-2, compress);\n  ql_.set_compr_method(method);\n\n  for (int i = 0; i < 32; i++)\n    ql_.Push(StrCat(\"hello\", i), QList::HEAD);\n  ASSERT_EQ(0, ql_verify(ql_, 1, 32, 32, 32));\n  ql_.Erase(0, 128);\n  ASSERT_EQ(0, ql_verify(ql_, 0, 0, 0, 0));\n}\n\nTEST_P(OptionsTest, DelRangeMiddle100of500) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(32, compress);\n\n  for (int i = 0; i < 500; i++)\n    ql_.Push(StrCat(\"hello\", i + 1), QList::TAIL);\n\n  ASSERT_EQ(0, ql_verify(ql_, 16, 500, 32, 20));\n  ql_.Erase(200, 100);\n  ASSERT_EQ(0, ql_verify(ql_, 14, 400, 32, 20));\n}\n\nTEST_P(OptionsTest, DelLessFillAcrossNodes) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(32, compress);\n\n  for (int i = 0; i < 500; i++)\n    ql_.Push(StrCat(\"hello\", i + 1), QList::TAIL);\n  ASSERT_EQ(0, ql_verify(ql_, 16, 500, 32, 20));\n  ql_.Erase(60, 10);\n  ASSERT_EQ(0, ql_verify(ql_, 16, 490, 32, 20));\n}\n\nTEST_P(OptionsTest, DelNegOne) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(32, compress);\n  for (int i = 0; i < 500; i++)\n    ql_.Push(StrCat(\"hello\", i + 1), QList::TAIL);\n  ASSERT_EQ(0, ql_verify(ql_, 16, 500, 32, 20));\n  ql_.Erase(-1, 1);\n  ASSERT_EQ(0, ql_verify(ql_, 16, 499, 32, 19));\n}\n\nTEST_P(OptionsTest, DelNegOneOverflow) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(32, compress);\n  for (int i = 0; i < 500; i++)\n    ql_.Push(StrCat(\"hello\", i + 1), QList::TAIL);\n\n  ASSERT_EQ(0, ql_verify(ql_, 16, 500, 32, 20));\n  ql_.Erase(-1, 128);\n\n  ASSERT_EQ(0, ql_verify(ql_, 16, 499, 32, 19));\n}\n\nTEST_P(OptionsTest, DelNeg100From500) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(32, compress);\n  for (int i = 0; i < 500; i++)\n    ql_.Push(StrCat(\"hello\", i + 1), QList::TAIL);\n  ql_.Erase(-100, 100);\n\n  QList::Iterator it = ql_.GetIterator(QList::TAIL);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"hello400\", it.Get());\n  ASSERT_EQ(0, ql_verify(ql_, 13, 400, 32, 16));\n}\n\nTEST_P(OptionsTest, DelMin10_5_from50) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(32, compress);\n\n  for (int i = 0; i < 50; i++)\n    ql_.Push(StrCat(\"hello\", i + 1), QList::TAIL);\n  ASSERT_EQ(0, ql_verify(ql_, 2, 50, 32, 18));\n  ql_.Erase(-10, 5);\n  ASSERT_EQ(0, ql_verify(ql_, 2, 45, 32, 13));\n}\n\nTEST_P(OptionsTest, DelElems) {\n  auto [fill, compress, method] = GetParam();\n  ql_ = QList(fill, compress);\n\n  const char* words[] = {\"abc\", \"foo\", \"bar\", \"foobar\", \"foobared\", \"zap\", \"bar\", \"test\", \"foo\"};\n  const char* result[] = {\"abc\", \"foo\", \"foobar\", \"foobared\", \"zap\", \"test\", \"foo\"};\n  const char* resultB[] = {\"abc\", \"foo\", \"foobar\", \"foobared\", \"zap\", \"test\"};\n\n  for (int i = 0; i < 9; i++)\n    ql_.Push(words[i], QList::TAIL);\n\n  /* lrem 0 bar */\n  auto iter = ql_.GetIterator(QList::HEAD);\n  while (iter.Valid()) {\n    if (iter.Get() == \"bar\") {\n      iter = ql_.Erase(iter);\n      // iter now points to next element, don't call Next()\n    } else {\n      if (!iter.Next())\n        break;\n    }\n  }\n  EXPECT_THAT(ToItems(), ElementsAreArray(result));\n\n  ql_.Push(\"foo\", QList::TAIL);\n\n  /* lrem -2 foo */\n  iter = ql_.GetIterator(QList::TAIL);\n  int del = 2;\n  while (iter.Valid()) {\n    if (iter.Get() == \"foo\") {\n      iter = ql_.Erase(iter);\n      del--;\n      if (del == 0)\n        break;\n      // iter now points to next element, don't call Next()\n    } else {\n      if (!iter.Next())\n        break;\n    }\n  }\n\n  /* check result of lrem -2 foo */\n  /* (we're ignoring the '2' part and still deleting all foo\n   * because we only have two foo) */\n  EXPECT_THAT(ToItems(), ElementsAreArray(resultB));\n}\n\nTEST_P(OptionsTest, IterateReverse) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(32, compress);\n\n  for (int i = 0; i < 500; i++)\n    ql_.Push(StrCat(\"hello\", i), QList::HEAD);\n  QList::Iterator it = ql_.GetIterator(QList::TAIL);\n  int i = 0;\n  ASSERT_TRUE(it.Valid());\n  do {\n    ASSERT_EQ(StrCat(\"hello\", i), it.Get());\n    i++;\n  } while (it.Next());\n  ASSERT_EQ(500, i);\n  ASSERT_EQ(0, ql_verify(ql_, 16, 500, 20, 32));\n}\n\nTEST_P(OptionsTest, Iterate500) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(32, compress);\n  for (int i = 0; i < 500; i++)\n    ql_.Push(StrCat(\"hello\", i), QList::HEAD);\n\n  QList::Iterator it = ql_.GetIterator(QList::HEAD);\n  int i = 499, count = 0;\n  ASSERT_TRUE(it.Valid());\n  do {\n    QList::Entry entry = it.Get();\n    ASSERT_EQ(StrCat(\"hello\", i), entry);\n    i--;\n    count++;\n  } while (it.Next());\n  EXPECT_EQ(500, count);\n  ASSERT_EQ(0, ql_verify(ql_, 16, 500, 20, 32));\n\n  it = ql_.GetIterator(QList::TAIL);\n  i = 0;\n  ASSERT_TRUE(it.Valid());\n  do {\n    ASSERT_EQ(StrCat(\"hello\", i), it.Get());\n    i++;\n  } while (it.Next());\n  EXPECT_EQ(500, i);\n}\n\nTEST_P(OptionsTest, IterateAfterOne) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(-2, compress);\n  ql_.Push(\"hello\", QList::HEAD);\n\n  QList::Iterator it = ql_.GetIterator(0);\n  ASSERT_TRUE(it.Valid());\n  ql_.Insert(it, \"abc\", QList::AFTER);\n\n  ASSERT_EQ(0, ql_verify(ql_, 1, 2, 2, 2));\n\n  /* verify results */\n  it = ql_.GetIterator(0);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"hello\", it.Get());\n\n  it = ql_.GetIterator(1);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"abc\", it.Get());\n}\n\nTEST_P(OptionsTest, IterateDelete) {\n  auto [fill, compress, method] = GetParam();\n  ql_ = QList(fill, compress);\n\n  ql_.Push(\"abc\", QList::TAIL);\n  ql_.Push(\"def\", QList::TAIL);\n  ql_.Push(\"hij\", QList::TAIL);\n  ql_.Push(\"jkl\", QList::TAIL);\n  ql_.Push(\"oop\", QList::TAIL);\n\n  QList::Iterator it = ql_.GetIterator(QList::HEAD);\n  while (it.Valid()) {\n    if (it.Get() == \"hij\") {\n      it = ql_.Erase(it);\n    } else {\n      it.Next();\n    }\n  }\n\n  ASSERT_THAT(ToItems(), ElementsAre(\"abc\", \"def\", \"jkl\", \"oop\"));\n}\n\nTEST_P(OptionsTest, InsertBeforeOne) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(-2, compress);\n\n  ql_.Push(\"hello\", QList::HEAD);\n  QList::Iterator it = ql_.GetIterator(0);\n  ASSERT_TRUE(it.Valid());\n  ql_.Insert(it, \"abc\", QList::BEFORE);\n  ql_verify(ql_, 1, 2, 2, 2);\n\n  /* verify results */\n  it = ql_.GetIterator(0);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"abc\", it.Get());\n\n  it = ql_.GetIterator(1);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"hello\", it.Get());\n}\n\nTEST_P(OptionsTest, InsertWithHeadFull) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(4, compress);\n\n  for (int i = 0; i < 10; i++)\n    ql_.Push(StrCat(\"hello\", i), QList::TAIL);\n\n  ql_.set_fill(-1);\n  QList::Iterator it = ql_.GetIterator(-10);\n  ASSERT_TRUE(it.Valid());\n\n  char buf[4096] = {0};\n  ql_.Insert(it, string_view{buf, sizeof(buf)}, QList::BEFORE);\n  ql_verify(ql_, 4, 11, 1, 2);\n}\n\nTEST_P(OptionsTest, InsertWithTailFull) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(4, compress);\n  for (int i = 0; i < 10; i++)\n    ql_.Push(StrCat(\"hello\", i), QList::HEAD);\n\n  ql_.set_fill(-1);\n  QList::Iterator it = ql_.GetIterator(-1);\n  ASSERT_TRUE(it.Valid());\n\n  char buf[4096] = {0};\n  ql_.Insert(it, string_view{buf, sizeof(buf)}, QList::AFTER);\n  ql_verify(ql_, 4, 11, 2, 1);\n}\n\nTEST_P(OptionsTest, InsertOnceWhileIterating) {\n  auto [fill, compress, method] = GetParam();\n  ql_ = QList(fill, compress);\n\n  ql_.Push(\"abc\", QList::TAIL);\n  ql_.set_fill(1);\n\n  ql_.Push(\"def\", QList::TAIL);\n  ql_.set_fill(fill);\n  ql_.Push(\"bob\", QList::TAIL);\n  ql_.Push(\"foo\", QList::TAIL);\n  ql_.Push(\"zoo\", QList::TAIL);\n\n  /* insert \"bar\" before \"bob\" while iterating over list. */\n  QList::Iterator it = ql_.GetIterator(QList::HEAD);\n  if (it.Valid()) {\n    do {\n      if (it.Get() == \"bob\") {\n        ql_.Insert(it, \"bar\", QList::BEFORE);\n        break; /* didn't we fix insert-while-iterating? */\n      }\n    } while (it.Next());\n  }\n  EXPECT_THAT(ToItems(), ElementsAre(\"abc\", \"def\", \"bar\", \"bob\", \"foo\", \"zoo\"));\n}\n\nTEST_P(OptionsTest, InsertBefore250NewInMiddleOf500Elements) {\n  auto [fill, compress, method] = GetParam();\n  ql_ = QList(fill, compress);\n  for (int i = 0; i < 500; i++) {\n    string val = StrCat(\"hello\", i);\n    val.resize(32);\n    ql_.Push(val, QList::TAIL);\n  }\n\n  for (int i = 0; i < 250; i++) {\n    QList::Iterator it = ql_.GetIterator(250);\n    ASSERT_TRUE(it.Valid());\n    ql_.Insert(it, StrCat(\"abc\", i), QList::BEFORE);\n  }\n\n  if (fill == 32) {\n    ASSERT_EQ(0, ql_verify(ql_, 25, 750, 32, 20));\n  }\n}\n\nTEST_P(OptionsTest, InsertAfter250NewInMiddleOf500Elements) {\n  auto [fill, compress, method] = GetParam();\n  ql_ = QList(fill, compress);\n  for (int i = 0; i < 500; i++)\n    ql_.Push(StrCat(\"hello\", i), QList::HEAD);\n\n  for (int i = 0; i < 250; i++) {\n    QList::Iterator it = ql_.GetIterator(250);\n    ASSERT_TRUE(it.Valid());\n    ql_.Insert(it, StrCat(\"abc\", i), QList::AFTER);\n  }\n\n  ASSERT_EQ(750, ql_.Size());\n\n  if (fill == 32) {\n    ASSERT_EQ(0, ql_verify(ql_, 26, 750, 20, 32));\n  }\n}\n\nTEST_P(OptionsTest, NextPlain) {\n  auto [_, compress, method] = GetParam();\n  ql_ = QList(-2, compress);\n\n  QList::SetPackedThreshold(3);\n\n  const char* strings[] = {\"hello1\", \"hello2\", \"h3\", \"h4\", \"hello5\"};\n\n  for (int i = 0; i < 5; ++i)\n    ql_.Push(strings[i], QList::HEAD);\n\n  QList::Iterator it = ql_.GetIterator(QList::TAIL);\n  int j = 0;\n\n  ASSERT_TRUE(it.Valid());\n  do {\n    ASSERT_EQ(strings[j], it.Get());\n    j++;\n  } while (it.Next());\n}\n\nTEST_P(OptionsTest, IndexFrom500) {\n  auto [fill, compress, method] = GetParam();\n  ql_ = QList(fill, compress);\n  for (int i = 0; i < 500; i++)\n    ql_.Push(StrCat(\"hello\", i + 1), QList::TAIL);\n\n  QList::Iterator it = ql_.GetIterator(1);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"hello2\", it.Get());\n  it = ql_.GetIterator(200);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"hello201\", it.Get());\n\n  it = ql_.GetIterator(-1);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"hello500\", it.Get());\n\n  it = ql_.GetIterator(-2);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"hello499\", it.Get());\n\n  it = ql_.GetIterator(-100);\n  ASSERT_TRUE(it.Valid());\n  ASSERT_EQ(\"hello401\", it.Get());\n\n  it = ql_.GetIterator(500);\n  ASSERT_FALSE(it.Valid());\n}\n\nstatic void BM_QListCompress(benchmark::State& state) {\n  SetupMalloc();\n\n  string path = base::ProgramRunfile(\"testdata/list.txt.zst\");\n  io::Result<io::Source*> src = io::OpenUncompressed(path);\n  CHECK(src) << src.error();\n  io::LineReader lr(*src, TAKE_OWNERSHIP);\n  string_view line;\n  vector<string> lines;\n  while (lr.Next(&line)) {\n    lines.push_back(string(line));\n  }\n\n  VLOG(1) << \"Read \" << lines.size() << \" lines \" << state.range(0);\n  while (state.KeepRunning()) {\n    QList ql(-2, state.range(0));  // uses differrent compression modes, see below.\n    ql.set_compr_method(state.range(1) == 0 ? QList::LZF : QList::LZ4);\n\n    for (const string& l : lines) {\n      ql.Push(l, QList::TAIL);\n    }\n    DVLOG(1) << ql.node_count() << \", \" << ql.MallocUsed(true);\n  }\n  CHECK_EQ(0, zmalloc_used_memory_tl);\n}\nBENCHMARK(BM_QListCompress)\n    ->ArgsProduct({{1, 4, 0}, {0, 1}});  // x - compression depth, y compression method.\n                                         // x = 0 no compression, 1 - compress all nodes but edges,\n                                         // 4 - compress all but 4 nodes from edges.\n\nstatic void BM_QListUncompress(benchmark::State& state) {\n  SetupMalloc();\n\n  string path = base::ProgramRunfile(\"testdata/list.txt.zst\");\n  io::Result<io::Source*> src = io::OpenUncompressed(path);\n  CHECK(src) << src.error();\n  io::LineReader lr(*src, TAKE_OWNERSHIP);\n  string_view line;\n  QList ql(-2, state.range(0));\n  ql.set_compr_method(state.range(1) == 0 ? QList::LZF : QList::LZ4);\n  QList::stats.compression_attempts = 0;\n\n  CHECK_EQ(QList::stats.compressed_bytes, 0u);\n  CHECK_EQ(QList::stats.raw_compressed_bytes, 0u);\n\n  size_t line_len = 0;\n  while (lr.Next(&line)) {\n    ql.Push(line, QList::TAIL);\n    line_len += line.size();\n  }\n\n  if (ql.compress_param() > 0) {\n    CHECK_GT(QList::stats.compression_attempts, 0u);\n    CHECK_GT(QList::stats.compressed_bytes, 0u);\n    CHECK_GT(QList::stats.raw_compressed_bytes, QList::stats.compressed_bytes);\n  }\n\n  LOG(INFO) << \"MallocUsed \" << ql.compress_param() << \": \" << ql.MallocUsed(true) << \", \"\n            << ql.MallocUsed(false);\n  size_t exp_count = ql.Size();\n\n  while (state.KeepRunning()) {\n    unsigned actual_count = 0, actual_len = 0;\n    ql.Iterate(\n        [&](const QList::Entry& e) {\n          actual_len += e.view().size();\n          ++actual_count;\n          return true;\n        },\n        0, -1);\n    CHECK_EQ(exp_count, actual_count);\n    CHECK_EQ(line_len, actual_len);\n  }\n}\nBENCHMARK(BM_QListUncompress)->ArgsProduct({{1, 4, 0}, {0, 1}});\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/score_map.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/score_map.h\"\n\n#include \"base/endian.h\"\n#include \"base/logging.h\"\n#include \"core/compact_object.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"core/sds_utils.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nusing namespace std;\n\nnamespace dfly {\n\nnamespace {\n\ninline double GetValue(sds key) {\n  char* valptr = key + sdslen(key) + 1;\n  return absl::bit_cast<double>(absl::little_endian::Load64(valptr));\n}\n\nvoid* AllocateScored(string_view field, double value) {\n  size_t meta_offset = field.size() + 1;\n\n  // The layout is:\n  // key, '\\0', 8-byte double value\n  sds newkey = AllocSdsWithSpace(field.size(), 8);\n\n  if (!field.empty()) {\n    memcpy(newkey, field.data(), field.size());\n  }\n\n  absl::little_endian::Store64(newkey + meta_offset, absl::bit_cast<uint64_t>(value));\n\n  return newkey;\n}\n\n}  // namespace\n\nScoreMap::~ScoreMap() {\n  Clear();\n}\n\npair<void*, bool> ScoreMap::AddOrUpdate(string_view field, double value) {\n  void* newkey = AllocateScored(field, value);\n\n  // Replace the whole entry.\n  sds prev_entry = (sds)AddOrReplaceObj(newkey, false);\n  if (prev_entry) {\n    ObjDelete(prev_entry, false);\n    return {newkey, false};\n  }\n\n  return {newkey, true};\n}\n\nstd::pair<void*, bool> ScoreMap::AddOrSkip(std::string_view field, double value) {\n  uint64_t hashcode = Hash(&field, 1);\n  void* obj = FindInternal(&field, hashcode, 1);  // 1 - string_view\n\n  if (obj)\n    return {obj, false};\n\n  void* newkey = AllocateScored(field, value);\n  DenseSet::AddUnique(newkey, false, hashcode);\n  return {newkey, true};\n}\n\nvoid* ScoreMap::AddUnique(std::string_view field, double value) {\n  void* newkey = AllocateScored(field, value);\n  DenseSet::AddUnique(newkey, false, Hash(&field, 1));\n  return newkey;\n}\n\nstd::optional<double> ScoreMap::Find(std::string_view field) {\n  uint64_t hashcode = Hash(&field, 1);\n  sds str = (sds)FindInternal(&field, hashcode, 1);\n  if (!str)\n    return nullopt;\n\n  return GetValue(str);\n}\n\nuint64_t ScoreMap::Hash(const void* obj, uint32_t cookie) const {\n  DCHECK_LT(cookie, 2u);\n\n  if (cookie == 0) {\n    sds s = (sds)obj;\n    return CompactObj::HashCode(string_view{s, sdslen(s)});\n  }\n\n  const string_view* sv = (const string_view*)obj;\n  return CompactObj::HashCode(*sv);\n}\n\nbool ScoreMap::ObjEqual(const void* left, const void* right, uint32_t right_cookie) const {\n  DCHECK_LT(right_cookie, 2u);\n\n  sds s1 = (sds)left;\n  if (right_cookie == 0) {\n    sds s2 = (sds)right;\n\n    if (sdslen(s1) != sdslen(s2)) {\n      return false;\n    }\n\n    return sdslen(s1) == 0 || memcmp(s1, s2, sdslen(s1)) == 0;\n  }\n\n  const string_view* right_sv = (const string_view*)right;\n  string_view left_sv{s1, sdslen(s1)};\n  return left_sv == (*right_sv);\n}\n\nsize_t ScoreMap::ObjectAllocSize(const void* obj) const {\n  sds s1 = (sds)obj;\n  size_t res = zmalloc_usable_size(sdsAllocPtr(s1));\n  return res;\n}\n\nuint32_t ScoreMap::ObjExpireTime(const void* obj) const {\n  // Should not reach.\n  return UINT32_MAX;\n}\n\nvoid ScoreMap::ObjUpdateExpireTime(const void* obj, uint32_t ttl_sec) {\n  // Should not reach.\n}\n\nvoid ScoreMap::ObjDelete(void* obj, bool has_ttl) const {\n  sds s1 = (sds)obj;\n  sdsfree(s1);\n}\n\nvoid* ScoreMap::ObjectClone(const void* obj, bool has_ttl, bool add_ttl) const {\n  return nullptr;\n}\n\ndetail::SdsScorePair ScoreMap::iterator::BreakToPair(void* obj) {\n  sds f = (sds)obj;\n  return detail::SdsScorePair(f, GetValue(f));\n}\n\nnamespace {\n// Does not Release obj. Callers must do so explicitly if a `Reallocation` happened\npair<sds, bool> DuplicateEntryIfFragmented(void* obj, PageUsage* page_usage) {\n  sds key = (sds)obj;\n  size_t key_len = sdslen(key);\n\n  if (!page_usage->IsPageForObjectUnderUtilized(key))\n    return {key, false};\n\n  sds newkey = AllocSdsWithSpace(key_len, 8);\n  memcpy(newkey, key, key_len + 8 + 1);\n\n  return {newkey, true};\n}\n\n}  // namespace\n\nbool ScoreMap::iterator::ReallocIfNeeded(PageUsage* page_usage, std::function<void(sds, sds)> cb) {\n  auto* ptr = curr_entry_;\n\n  if (ptr->IsLink()) {\n    ptr = ptr->AsLink();\n  }\n\n  DCHECK(!ptr->IsEmpty());\n  DCHECK(ptr->IsObject());\n\n  auto* obj = ptr->GetObject();\n  auto [new_obj, realloced] = DuplicateEntryIfFragmented(obj, page_usage);\n  if (realloced) {\n    if (cb) {\n      cb((sds)obj, (sds)new_obj);\n    }\n    sdsfree((sds)obj);\n    ptr->SetObject(new_obj);\n  }\n  return realloced;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/score_map.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <optional>\n#include <string_view>\n\n#include \"core/dense_set.h\"\n\nextern \"C\" {\n#include \"redis/sds.h\"\n}\n\nnamespace dfly {\n\nclass PageUsage;\n\nnamespace detail {\n\nclass SdsScorePair {\n public:\n  SdsScorePair(sds k, double v) : first(k), second(v) {\n  }\n\n  SdsScorePair* operator->() {\n    return this;\n  }\n\n  const SdsScorePair* operator->() const {\n    return this;\n  }\n\n  const sds first;\n  const double second;\n};\n\n};  // namespace detail\n\nclass ScoreMap : public DenseSet {\n public:\n  ScoreMap() {\n  }\n\n  ~ScoreMap();\n\n  class iterator : private DenseSet::IteratorBase {\n    static detail::SdsScorePair BreakToPair(void* obj);\n\n   public:\n    iterator() : IteratorBase() {\n    }\n\n    iterator(DenseSet* owner, bool is_end) : IteratorBase(owner, is_end) {\n    }\n\n    detail::SdsScorePair operator->() const {\n      void* ptr = curr_entry_->GetObject();\n      return BreakToPair(ptr);\n    }\n\n    detail::SdsScorePair operator*() const {\n      void* ptr = curr_entry_->GetObject();\n      return BreakToPair(ptr);\n    }\n\n    // Try reducing memory fragmentation of the value by re-allocating. Returns true if\n    // re-allocation happened.\n    // If function is set, we call it with the old and the new sds. This is used for data\n    // structures that hold multiple storages that need to be update simultaneously. For example,\n    // SortedMap contains both a B+ tree and a ScoreMap with the former, containing pointers\n    // to the later. Therefore, we need to update those. This is handled by the cb below.\n    bool ReallocIfNeeded(PageUsage* page_usage, std::function<void(sds, sds)> = {});\n\n    iterator& operator++() {\n      Advance();\n      return *this;\n    }\n\n    bool operator==(const iterator& b) const {\n      return curr_list_ == b.curr_list_;\n    }\n\n    bool operator!=(const iterator& b) const {\n      return !(*this == b);\n    }\n  };\n\n  // Returns pointer to the internal objest and the insertion result.\n  // i.e. true if field was added, otherwise updates its value and returns false.\n  std::pair<void*, bool> AddOrUpdate(std::string_view field, double value);\n\n  // Returns true if field was added\n  // false, if already exists. In that case no update is done.\n  std::pair<void*, bool> AddOrSkip(std::string_view field, double value);\n\n  void* AddUnique(std::string_view field, double value);\n\n  bool Erase(std::string_view field) {\n    return EraseInternal(&field, 1);\n  }\n\n  bool Erase(sds field) {\n    return EraseInternal(field, 0);\n  }\n\n  /// @brief  Returns value of the key or nullptr if key not found.\n  /// @param key\n  /// @return sds\n  std::optional<double> Find(std::string_view key);\n\n  void* FindObj(std::string_view sv) {\n    return FindInternal(&sv, Hash(&sv, 1), 1);\n  }\n\n  iterator begin() {\n    return iterator{this, false};\n  }\n\n  iterator end() {\n    return iterator{this, true};\n  }\n\n private:\n  uint64_t Hash(const void* obj, uint32_t cookie) const final;\n  bool ObjEqual(const void* left, const void* right, uint32_t right_cookie) const final;\n  size_t ObjectAllocSize(const void* obj) const final;\n  uint32_t ObjExpireTime(const void* obj) const final;\n  void ObjUpdateExpireTime(const void* obj, uint32_t ttl_sec) override;\n  void ObjDelete(void* obj, bool has_ttl) const override;\n  void* ObjectClone(const void* obj, bool has_ttl, bool add_ttl) const final;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/score_map_test.cc",
    "content": "// Copyright 2023, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/score_map.h\"\n\n#include <mimalloc.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/mi_memory_resource.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nusing namespace std;\n\nnamespace dfly {\n\nclass ScoreMapTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    auto* tlh = mi_heap_get_backing();\n    init_zmalloc_threadlocal(tlh);\n    InitTLStatelessAllocMR(PMR_NS::get_default_resource());\n  }\n\n  static void TearDownTestSuite() {\n    mi_heap_collect(mi_heap_get_backing(), true);\n\n    auto cb_visit = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,\n                       size_t block_size, void* arg) {\n      LOG(ERROR) << \"Unfreed allocations: block_size \" << block_size\n                 << \", allocated: \" << area->used * block_size;\n      return true;\n    };\n\n    mi_heap_visit_blocks(mi_heap_get_backing(), false /* do not visit all blocks*/, cb_visit,\n                         nullptr);\n  }\n\n  ScoreMapTest() : mi_alloc_(mi_heap_get_backing()) {\n  }\n\n  void SetUp() override {\n    sm_.reset(new ScoreMap());\n  }\n\n  void TearDown() override {\n    sm_.reset();\n    EXPECT_EQ(zmalloc_used_memory_tl, 0);\n  }\n\n  MiMemoryResource mi_alloc_;\n  std::unique_ptr<ScoreMap> sm_;\n};\n\nTEST_F(ScoreMapTest, Basic) {\n  EXPECT_TRUE(sm_->AddOrUpdate(\"foo\", 5).second);\n  EXPECT_EQ(5, sm_->Find(\"foo\"));\n\n  auto it = sm_->begin();\n  EXPECT_STREQ(\"foo\", it->first);\n  EXPECT_EQ(5, it->second);\n  ++it;\n\n  EXPECT_TRUE(it == sm_->end());\n\n  for (const auto& k_v : *sm_) {\n    EXPECT_STREQ(\"foo\", k_v.first);\n    EXPECT_EQ(5, k_v.second);\n  }\n\n  size_t sz = sm_->ObjMallocUsed();\n  EXPECT_FALSE(sm_->AddOrUpdate(\"foo\", 17).second);\n  EXPECT_EQ(sm_->ObjMallocUsed(), sz);\n\n  it = sm_->begin();\n  EXPECT_EQ(17, it->second);\n\n  EXPECT_FALSE(sm_->AddOrSkip(\"foo\", 31).second);\n  EXPECT_EQ(17, it->second);\n}\n\nTEST_F(ScoreMapTest, EmptyFind) {\n  EXPECT_EQ(nullopt, sm_->Find(\"bar\"));\n}\n\nuint64_t total_wasted_memory = 0;\n\nTEST_F(ScoreMapTest, ReallocIfNeeded) {\n  auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); };\n\n  auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,\n                        size_t block_size, void* arg) {\n    size_t used = block_size * area->used;\n    total_wasted_memory += area->committed - used;\n    return true;\n  };\n\n  for (size_t i = 0; i < 10'000; i++) {\n    sm_->AddOrUpdate(build_str(i), i);\n  }\n\n  for (size_t i = 0; i < 10'000; i++) {\n    if (i % 10 == 0)\n      continue;\n    sm_->Erase(build_str(i));\n  }\n\n  mi_heap_collect(mi_heap_get_backing(), true);\n  mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);\n  size_t wasted_before = total_wasted_memory;\n\n  size_t underutilized = 0;\n  PageUsage page_usage{CollectPageStats::NO, 0.9};\n  for (auto it = sm_->begin(); it != sm_->end(); ++it) {\n    underutilized += page_usage.IsPageForObjectUnderUtilized(it->first);\n    it.ReallocIfNeeded(&page_usage);\n  }\n  // Check there are underutilized pages\n  CHECK_GT(underutilized, 0u);\n\n  total_wasted_memory = 0;\n  mi_heap_collect(mi_heap_get_backing(), true);\n  mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);\n  size_t wasted_after = total_wasted_memory;\n\n  // Check we waste significanlty less now\n  EXPECT_GT(wasted_before, wasted_after * 2);\n\n  ASSERT_EQ(sm_->UpperBoundSize(), 1000);\n  for (size_t i = 0; i < 1000; i++) {\n    auto res = sm_->Find(build_str(i * 10));\n    ASSERT_EQ(res.has_value(), true);\n    ASSERT_EQ((size_t)*res, i * 10);\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/sds_utils.cc",
    "content": "// Copyright 2022, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/sds_utils.h\"\n\n#include \"base/endian.h\"\n\nextern \"C\" {\n#include \"redis/sds.h\"\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly {\n\nnamespace {\n\ninline char SdsReqType(size_t string_size) {\n  if (string_size < 1 << 5)\n    return SDS_TYPE_5;\n  if (string_size < 1 << 8)\n    return SDS_TYPE_8;\n  if (string_size < 1 << 16)\n    return SDS_TYPE_16;\n  if (string_size < 1ll << 32)\n    return SDS_TYPE_32;\n  return SDS_TYPE_64;\n}\n\ninline int SdsHdrSize(char type) {\n  switch (type & SDS_TYPE_MASK) {\n    case SDS_TYPE_5:\n      return sizeof(struct sdshdr5);\n    case SDS_TYPE_8:\n      return sizeof(struct sdshdr8);\n    case SDS_TYPE_16:\n      return sizeof(struct sdshdr16);\n    case SDS_TYPE_32:\n      return sizeof(struct sdshdr32);\n    case SDS_TYPE_64:\n      return sizeof(struct sdshdr64);\n  }\n  return 0;\n}\n\n}  // namespace\n\nvoid SdsUpdateExpireTime(const void* obj, uint32_t time_at, uint32_t offset) {\n  sds str = (sds)obj;\n  char* valptr = str + sdslen(str) + 1;\n  absl::little_endian::Store32(valptr + offset, time_at);\n}\n\nchar* AllocSdsWithSpace(uint32_t strlen, uint32_t space) {\n  size_t usable;\n  char type = SdsReqType(strlen);\n  int hdrlen = SdsHdrSize(type);\n\n  char* ptr = (char*)zmalloc_usable(hdrlen + strlen + 1 + space, &usable);\n  char* s = ptr + hdrlen;\n  char* fp = s - 1;\n\n  switch (type) {\n    case SDS_TYPE_5: {\n      *fp = type | (strlen << SDS_TYPE_BITS);\n      break;\n    }\n\n    case SDS_TYPE_8: {\n      SDS_HDR_VAR(8, s);\n      sh->len = strlen;\n      sh->alloc = strlen;\n      *fp = type;\n      break;\n    }\n\n    case SDS_TYPE_16: {\n      SDS_HDR_VAR(16, s);\n      sh->len = strlen;\n      sh->alloc = strlen;\n      *fp = type;\n      break;\n    }\n\n    case SDS_TYPE_32: {\n      SDS_HDR_VAR(32, s);\n      sh->len = strlen;\n      sh->alloc = strlen;\n      *fp = type;\n      break;\n    }\n    case SDS_TYPE_64: {\n      SDS_HDR_VAR(64, s);\n      sh->len = strlen;\n      sh->alloc = strlen;\n      *fp = type;\n      break;\n    }\n  }\n\n  s[strlen] = '\\0';\n  return s;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/sds_utils.h",
    "content": "// Copyright 2022, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n\nnamespace dfly {\n\n// Allocates an sds string that has an additional space at the end that\n// sds does is not aware of. Useful when you need to allocate immutable\n// sds string (keys) with metadata attached to them.\nchar* AllocSdsWithSpace(uint32_t strlen, uint32_t space);\n\n// Updates the expire time of the sds object. The offset is the number of bytes\nvoid SdsUpdateExpireTime(const void* obj, uint32_t time_at, uint32_t offset);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/search/CMakeLists.txt",
    "content": "gen_flex(lexer)\ngen_bison(parser)\n\ncur_gen_dir(gen_dir)\n\nset_source_files_properties(${gen_dir}/parser.cc PROPERTIES\n                            COMPILE_FLAGS \"-Wno-maybe-uninitialized\")\nadd_library(dfly_search_core ast_expr.cc base.cc hnsw_index.cc query_driver.cc search.cc\n            indices.cc sort_indices.cc vector_utils.cc compressed_sorted_set.cc block_list.cc\n            renewable_quota.cc range_tree.cc synonyms.cc\n            ${gen_dir}/parser.cc ${gen_dir}/lexer.cc)\n\ntarget_link_libraries(dfly_search_core dfly_page_usage base fibers2 redis_lib absl::strings\n  TRDP::reflex TRDP::uni-algo TRDP::hnswlib Boost::headers)\n\nif(WITH_SIMSIMD)\n  target_link_libraries(dfly_search_core TRDP::simsimd)\n  target_compile_definitions(dfly_search_core PRIVATE\n    WITH_SIMSIMD=1\n    SIMSIMD_DYNAMIC_DISPATCH=1\n    SIMSIMD_NATIVE_F16=$<IF:$<BOOL:${SIMSIMD_NATIVE_F16}>,1,0>\n    SIMSIMD_NATIVE_BF16=$<IF:$<BOOL:${SIMSIMD_NATIVE_F16}>,1,0>)\nendif()\n\nhelio_cxx_test(compressed_sorted_set_test dfly_search_core LABELS DFLY)\nhelio_cxx_test(block_list_test dfly_search_core LABELS DFLY)\nhelio_cxx_test(range_tree_test dfly_search_core absl::random_random LABELS DFLY)\nhelio_cxx_test(rax_tree_test redis_test_lib LABELS DFLY)\nhelio_cxx_test(search_parser_test dfly_search_core LABELS DFLY)\nhelio_cxx_test(search_test redis_test_lib dfly_search_core LABELS DFLY)\nhelio_cxx_test(mrmw_mutex_test redis_test_lib dfly_search_core fibers2 LABELS DFLY)\n\nif(WITH_SIMSIMD)\n  target_link_libraries(search_test TRDP::simsimd)\n  target_compile_definitions(search_test PRIVATE\n    WITH_SIMSIMD=1\n    SIMSIMD_DYNAMIC_DISPATCH=1\n    SIMSIMD_NATIVE_F16=$<IF:$<BOOL:${SIMSIMD_NATIVE_F16}>,1,0>\n    SIMSIMD_NATIVE_BF16=$<IF:$<BOOL:${SIMSIMD_NATIVE_F16}>,1,0>)\nendif()\n"
  },
  {
    "path": "src/core/search/ast_expr.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/ast_expr.h\"\n\n#include <absl/strings/numbers.h>\n\n#include <algorithm>\n#include <cmath>\n#include <regex>\n\n#include \"base/logging.h\"\n\nusing namespace std;\n\nnamespace dfly::search {\n\nAstRangeNode::AstRangeNode(double lo, bool lo_excl, double hi, bool hi_excl)\n    : lo{lo_excl ? nextafter(lo, hi) : lo}, hi{hi_excl ? nextafter(hi, lo) : hi} {\n}\n\nAstGeoNode::AstGeoNode(double lon, double lat, double radius, std::string unit)\n    : lon(lon), lat(lat), radius(radius), unit(std::move(unit)) {\n}\n\nAstNegateNode::AstNegateNode(AstNode&& node) : node{make_unique<AstNode>(std::move(node))} {\n}\n\nAstLogicalNode::AstLogicalNode(AstNode&& l, AstNode&& r, LogicOp op) : op{op}, nodes{} {\n  // If either node is already a logical node with the same op,\n  // we can re-use it, as logical ops are associative.\n  for (auto* node : {&l, &r}) {\n    if (auto* ln = get_if<AstLogicalNode>(node); ln && ln->op == op) {\n      *this = std::move(*ln);\n      nodes.emplace_back(std::move(*(node == &l ? &r : &l)));\n      return;\n    }\n  }\n\n  nodes.emplace_back(std::move(l));\n  nodes.emplace_back(std::move(r));\n}\n\nAstFieldNode::AstFieldNode(string field, AstNode&& node)\n    : field{field.substr(1)}, node{make_unique<AstNode>(std::move(node))} {\n}\n\nAstTagsNode::AstTagsNode(TagValue tag) {\n  tags = {std::move(tag)};\n}\n\nAstTagsNode::AstTagsNode(AstExpr&& l, TagValue tag) {\n  DCHECK(holds_alternative<AstTagsNode>(l));\n  auto& tags_node = get<AstTagsNode>(l);\n\n  tags = std::move(tags_node.tags);\n  tags.push_back(std::move(tag));\n}\n\nAstKnnNode::AstKnnNode(uint32_t limit, std::string_view field, OwnedFtVector vec,\n                       std::string_view score_alias, std::optional<size_t> ef_runtime)\n    : filter{nullptr},\n      limit{limit},\n      field{field.substr(1)},\n      vec{std::move(vec)},\n      score_alias{score_alias},\n      ef_runtime{ef_runtime} {\n}\n\nAstKnnNode::AstKnnNode(AstNode&& filter, AstKnnNode&& self) {\n  *this = std::move(self);\n  this->filter = make_unique<AstNode>(std::move(filter));\n}\n\nAstVectorRangeNode::AstVectorRangeNode(std::string field, double radius, OwnedFtVector vec,\n                                       std::string score_alias)\n    : field{field.substr(1)},\n      radius{radius},\n      vec{std::move(vec)},\n      score_alias{std::move(score_alias)} {\n}\n\nbool AstKnnNode::HasPreFilter() const {\n  // If we have pre filter knn query should not hold filter variable. It will be\n  // moved to SearchAlgorithm::query_ variable.\n  return filter == nullptr;\n}\n\n}  // namespace dfly::search\n\nnamespace std {\nostream& operator<<(ostream& os, optional<size_t> o) {\n  return os;\n}\n\nostream& operator<<(ostream& os, dfly::search::AstTagsNode::TagValueProxy o) {\n  return os;\n}\n}  // namespace std\n"
  },
  {
    "path": "src/core/search/ast_expr.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <algorithm>\n#include <iostream>\n#include <memory>\n#include <ostream>\n#include <variant>\n#include <vector>\n\n#include \"core/search/base.h\"\n#include \"core/search/tag_types.h\"\n\nnamespace dfly {\n\nnamespace search {\n\nstruct AstNode;\n\n// Matches all documents\nstruct AstStarNode {};\n\n// Matches all documents where this field has a non-null value\nstruct AstStarFieldNode {};\n\ntemplate <TagType T> struct AstAffixNode {\n  explicit AstAffixNode(std::string affix) : affix{std::move(affix)} {\n  }\n\n  std::string affix;\n};\n\nusing AstTermNode = AstAffixNode<TagType::REGULAR>;\nusing AstPrefixNode = AstAffixNode<TagType::PREFIX>;\nusing AstSuffixNode = AstAffixNode<TagType::SUFFIX>;\nusing AstInfixNode = AstAffixNode<TagType::INFIX>;\n\n// Matches numeric range\nstruct AstRangeNode {\n  AstRangeNode(double lo, bool lo_excl, double hi, bool hi_excl);\n\n  double lo, hi;\n};\n\nstruct AstGeoNode {\n  AstGeoNode(double lon, double lat, double radius, std::string unit);\n  double lon, lat;\n  double radius;\n  std::string unit;\n};\n\n// Negates subtree\nstruct AstNegateNode {\n  AstNegateNode(AstNode&& node);\n\n  AstNegateNode(const AstNegateNode&) = delete;\n  AstNegateNode& operator=(const AstNegateNode&) = delete;\n\n  AstNegateNode(AstNegateNode&&) noexcept = default;\n  AstNegateNode& operator=(AstNegateNode&&) noexcept = default;\n\n  std::unique_ptr<AstNode> node;\n};\n\n// Applies logical operation to results of all sub-nodes\nstruct AstLogicalNode {\n  enum LogicOp { AND, OR };\n\n  // If either node is already a logical node with the same op, it'll be re-used.\n  AstLogicalNode(AstNode&& l, AstNode&& r, LogicOp op);\n\n  AstLogicalNode(const AstLogicalNode&) = delete;\n  AstLogicalNode& operator=(const AstLogicalNode&) = delete;\n\n  AstLogicalNode(AstLogicalNode&&) noexcept = default;\n  AstLogicalNode& operator=(AstLogicalNode&&) noexcept = default;\n\n  LogicOp op;\n  std::vector<AstNode> nodes;\n};\n\n// Selects specific field for subtree\nstruct AstFieldNode {\n  AstFieldNode(std::string field, AstNode&& node);\n\n  AstFieldNode(const AstFieldNode&) = delete;\n  AstFieldNode& operator=(const AstFieldNode&) = delete;\n\n  AstFieldNode(AstFieldNode&&) noexcept = default;\n  AstFieldNode& operator=(AstFieldNode&&) noexcept = default;\n\n  std::string field;\n  std::unique_ptr<AstNode> node;\n};\n\n// Stores a list of tags for a tag query\nstruct AstTagsNode {\n  using TagValue = std::variant<AstTermNode, AstPrefixNode, AstSuffixNode, AstInfixNode>;\n\n  struct TagValueProxy\n      : public AstTagsNode::TagValue {  // bison needs it to be default constructible\n    TagValueProxy() : AstTagsNode::TagValue(AstTermNode(\"\")) {\n    }\n    template <TagType T> TagValueProxy(AstAffixNode<T> tv) : AstTagsNode::TagValue(std::move(tv)) {\n    }\n  };\n\n  AstTagsNode(TagValue);\n  AstTagsNode(AstNode&& l, TagValue);\n\n  std::vector<TagValue> tags;\n};\n\n// Applies nearest neighbor search to the final result set\nstruct AstKnnNode {\n  AstKnnNode() = default;\n  AstKnnNode(uint32_t limit, std::string_view field, OwnedFtVector vec,\n             std::string_view score_alias, std::optional<size_t> ef_runtime);\n\n  AstKnnNode(AstNode&& sub, AstKnnNode&& self);\n\n  AstKnnNode(const AstKnnNode&) = delete;\n  AstKnnNode& operator=(const AstKnnNode&) = delete;\n\n  AstKnnNode(AstKnnNode&&) noexcept = default;\n  AstKnnNode& operator=(AstKnnNode&&) noexcept = default;\n\n  friend std::ostream& operator<<(std::ostream& stream, const AstKnnNode& matrix) {\n    return stream;\n  }\n\n  std::unique_ptr<AstNode> filter;\n  size_t limit;\n  std::string field;\n  OwnedFtVector vec;\n  std::string score_alias;\n  std::optional<float> ef_runtime;\n\n  bool HasPreFilter() const;\n};\n\n// Applies vector range search: returns all docs with distance(vec, doc_vec) <= radius\nstruct AstVectorRangeNode {\n  AstVectorRangeNode() = default;\n  AstVectorRangeNode(std::string field, double radius, OwnedFtVector vec, std::string score_alias);\n\n  AstVectorRangeNode(const AstVectorRangeNode&) = delete;\n  AstVectorRangeNode& operator=(const AstVectorRangeNode&) = delete;\n\n  AstVectorRangeNode(AstVectorRangeNode&&) noexcept = default;\n  AstVectorRangeNode& operator=(AstVectorRangeNode&&) noexcept = default;\n\n  friend std::ostream& operator<<(std::ostream& stream, const AstVectorRangeNode& /*node*/) {\n    return stream;\n  }\n\n  std::string field;\n  double radius;\n  OwnedFtVector vec;\n  std::string score_alias;\n};\n\nusing NodeVariants =\n    std::variant<std::monostate, AstStarNode, AstStarFieldNode, AstTermNode, AstPrefixNode,\n                 AstSuffixNode, AstInfixNode, AstRangeNode, AstNegateNode, AstLogicalNode,\n                 AstFieldNode, AstTagsNode, AstKnnNode, AstGeoNode, AstVectorRangeNode>;\n\nstruct AstNode : public NodeVariants {\n  using variant::variant;\n\n  AstNode(const AstNode&) = delete;\n  AstNode& operator=(const AstNode&) = delete;\n\n  AstNode(AstNode&&) noexcept = default;\n  AstNode& operator=(AstNode&&) noexcept = default;\n\n  friend std::ostream& operator<<(std::ostream& stream, const AstNode& matrix) {\n    return stream;\n  }\n\n  const NodeVariants& Variant() const& {\n    return *this;\n  }\n};\n\nusing AstExpr = AstNode;\n\n}  // namespace search\n}  // namespace dfly\n\nnamespace std {\nostream& operator<<(ostream& os, optional<size_t> o);\nostream& operator<<(ostream& os, dfly::search::AstTagsNode::TagValueProxy o);\n}  // namespace std\n"
  },
  {
    "path": "src/core/search/base.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/base.h\"\n\n#include <absl/strings/numbers.h>\n\nnamespace dfly::search {\n\nstd::string_view QueryParams::operator[](std::string_view name) const {\n  if (auto it = params.find(name); it != params.end())\n    return it->second;\n  return \"\";\n}\n\nstd::string& QueryParams::operator[](std::string_view k) {\n  return params[k];\n}\n\nstd::optional<double> ParseNumericField(std::string_view value) {\n  double value_as_double;\n  if (absl::SimpleAtod(value, &value_as_double) && std::isfinite(value_as_double))\n    return value_as_double;\n  return std::nullopt;\n}\n\nDefragmentResult& DefragmentResult::Merge(DefragmentResult&& other) {\n  quota_depleted |= other.quota_depleted;\n  objects_moved += other.objects_moved;\n  return *this;\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/base.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n#include <absl/container/inlined_vector.h>\n\n#include <cstdint>\n#include <memory>\n#include <string>\n#include <string_view>\n#include <vector>\n\nnamespace dfly {\nclass PageUsage;\n}\n\nnamespace dfly::search {\n\nstruct DefragmentResult {\n  bool quota_depleted{false};\n  size_t objects_moved{0};\n  DefragmentResult& Merge(DefragmentResult&& other);\n};\n\nusing DocId = uint32_t;\nusing GlobalDocId = uint64_t;\nusing ShardId = uint16_t;\n\ninline GlobalDocId CreateGlobalDocId(ShardId shard_id, DocId local_doc_id) {\n  return ((uint64_t)shard_id << 32) | local_doc_id;\n}\n\ninline std::pair<ShardId, DocId> DecomposeGlobalDocId(GlobalDocId id) {\n  return {(id >> 32), (id)&0xFFFFFFFF};\n}\n\nenum class VectorSimilarity { L2, IP, COSINE };\n\nusing OwnedFtVector = std::pair<std::unique_ptr<float[]>, size_t /* dimension (size) */>;\nusing BorrowedFtVector = const char*;\n\n// Query params represent named parameters for queries supplied via PARAMS.\nstruct QueryParams {\n  std::string_view operator[](std::string_view name) const;\n  std::string& operator[](std::string_view k);\n\n  size_t Size() const {\n    return params.size();\n  }\n\n private:\n  absl::flat_hash_map<std::string, std::string> params;\n};\n\n// Base class for optional search filters\n\nstruct AstNode;\n\nstruct OptionalFilterBase {\n  virtual bool IsEmpty() const = 0;\n  virtual AstNode Node(std::string field) = 0;\n  virtual ~OptionalFilterBase() = default;\n};\n\nusing OptionalFilters =\n    absl::flat_hash_map<std::string /*field*/, std::unique_ptr<OptionalFilterBase> /* filter */>;\n\n// Values are either sortable as doubles or strings, or not sortable at all.\nusing SortableValue = std::variant<std::monostate, double, std::string>;\n\n// Interface for accessing document values with different data structures underneath.\nstruct DocumentAccessor {\n  using VectorInfo = std::variant<search::OwnedFtVector, search::BorrowedFtVector>;\n  using StringList = absl::InlinedVector<std::string_view, 1>;\n  using NumsList = absl::InlinedVector<double, 1>;\n\n  virtual ~DocumentAccessor() = default;\n\n  /* Returns nullopt if the specified field is not a list of strings */\n  virtual std::optional<StringList> GetStrings(std::string_view active_field) const = 0;\n\n  /* Returns nullopt if the specified field is not a vector */\n  virtual std::optional<VectorInfo> GetVector(std::string_view active_field, size_t dim) const = 0;\n\n  /* Return nullopt if the specified field is not a list of doubles */\n  virtual std::optional<NumsList> GetNumbers(std::string_view active_field) const = 0;\n\n  /* Same as GetStrings, but also supports boolean values */\n  virtual std::optional<StringList> GetTags(std::string_view active_field) const = 0;\n};\n\n// Base class for type-specific indices.\n//\n// Queries should be done directly on subclasses with their distinc\n// query functions. All results for all index types should be sorted.\nstruct BaseIndex {\n  virtual ~BaseIndex() = default;\n\n  // Returns true if the document was added / indexed\n  virtual bool Add(DocId id, const DocumentAccessor& doc, std::string_view field) = 0;\n  virtual void Remove(DocId id, const DocumentAccessor& doc, std::string_view field) = 0;\n\n  // Returns documents that have non-null values for this field (used for @field:* queries)\n  // Result must be sorted\n  virtual std::vector<DocId> GetAllDocsWithNonNullValues() const = 0;\n\n  /* Called at the end of indexes rebuilding after all initial Add calls are done.\n     Some indices may need to finalize internal structures. See RangeTree for example. */\n  virtual void FinalizeInitialization() {\n  }\n\n  // Defragments the index by moving objects in underutilized pages to the current malloc page.\n  virtual DefragmentResult Defragment(PageUsage* page_usage) {\n    return DefragmentResult{.quota_depleted = false, .objects_moved = 0};\n  }\n};\n\n// Base class for type-specific sorting indices.\nstruct BaseSortIndex : BaseIndex {\n  virtual SortableValue Lookup(DocId doc) const = 0;\n  virtual std::vector<SortableValue> Sort(std::vector<DocId>* ids, size_t limit,\n                                          bool desc) const = 0;\n};\n\n/* Used in iterators of inverse indices.\n   It is used to mark iterators that can be seeked to doc id that is greater than or equal to\n   the specified value (method name is SeekGE(DocId min_doc_id)).\n   This is used to optimize merging of results from different indices.\n   See index_result.h for more details. */\nstruct SeekableTag {};\n\ntemplate <typename Iterator> void BasicSeekGE(DocId min_doc_id, const Iterator& end, Iterator* it);\n\n/* Used for converting field values to double. Returns std::nullopt if the conversion fails */\nstd::optional<double> ParseNumericField(std::string_view value);\n\n/* Temporary method to create an empty std::optional<InlinedVector> in DocumentAccessor::GetString\n   and DocumentAccessor::GetNumbers methods. The problem is that due to internal implementation\n   details of absl::InlineVector, we are getting a -Wmaybe-uninitialized compiler warning. To\n   suppress this false warning, we temporarily disable it around this block of code using GCC\n   diagnostic directives. */\ntemplate <typename InlinedVector> std::optional<InlinedVector> EmptyAccessResult() {\n#if !defined(__clang__)\n  // GCC 13.1 throws spurious warnings around this code.\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wmaybe-uninitialized\"\n#endif\n\n  return InlinedVector{};\n\n#if !defined(__clang__)\n#pragma GCC diagnostic pop\n#endif\n}\n\n// Implementation\n/******************************************************************/\nnamespace details {\ninline size_t GetHighestPowerOfTwo(size_t n) {\n  static constexpr size_t kBitsNumber = sizeof(size_t) * 8;\n  return size_t(1) << (kBitsNumber - 1 - __builtin_clzl(n));\n}\n}  // namespace details\n\ntemplate <typename Iterator> void BasicSeekGE(DocId min_doc_id, const Iterator& end, Iterator* it) {\n  using Category = typename std::iterator_traits<Iterator>::iterator_category;\n\n  auto extract_doc_id = [](const auto& value) {\n    using T = std::decay_t<decltype(value)>;\n    if constexpr (std::is_same_v<T, DocId>) {\n      return value;\n    } else {\n      return value.first;\n    }\n  };\n\n  if constexpr (std::is_base_of_v<std::random_access_iterator_tag, Category>) {\n    size_t length = std::distance(*it, end);\n    for (size_t step = details::GetHighestPowerOfTwo(length); step > 0; step >>= 1) {\n      if (step < length) {\n        auto next_it = *it + step;\n        if (extract_doc_id(*next_it) < min_doc_id) {\n          *it = next_it;\n          length -= step;\n        }\n      }\n    }\n  }\n\n  while (*it != end && extract_doc_id(**it) < min_doc_id) {\n    ++(*it);\n  }\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/block_list.cc",
    "content": "#include \"core/search/block_list.h\"\n\n#include \"core/page_usage/page_usage_stats.h\"\n\nnamespace {\n\ntemplate <typename T> bool DefragmentVector(PMR_NS::vector<T>& vec, dfly::PageUsage* page_usage) {\n  if (vec.empty() || !page_usage->IsPageForObjectUnderUtilized(vec.data())) {\n    return false;\n  }\n\n  PMR_NS::vector<T> new_vec(vec.get_allocator());\n  new_vec.reserve(vec.size());\n  for (auto&& element : vec) {\n    new_vec.push_back(std::move(element));\n  }\n  vec = std::move(new_vec);\n  return true;\n}\n\n}  // namespace\n\nnamespace dfly::search {\n\nusing namespace std;\n\nSplitResult Split(BlockList<SortedVector<std::pair<DocId, double>>>&& block_list) {\n  using Entry = std::pair<DocId, double>;\n  DCHECK(!block_list.Empty());\n\n  const size_t elements_count = block_list.Size();\n\n  // Extract values to find median\n  std::vector<double> entries_values(elements_count);\n  size_t index = 0;\n  for (const Entry& entry : block_list) {\n    entries_values[index++] = entry.second;\n  }\n\n  // Find median value\n  std::nth_element(entries_values.begin(), entries_values.begin() + elements_count / 2,\n                   entries_values.end());\n  double median_value = entries_values[elements_count / 2];\n\n  /* Now we need to split entries into two parts, left and right, so that:\n   1) left has values < median_value\n   2) right has values >= median_value\n   3) both parts have approximately the same number of elements\n\n   To achieve this, we first split entries into three parts: < median_value (left blocklist), ==\n   median_value (median_entries), > median_value (righ blocklist). Then we add == median_value part\n   to the smaller of the two parts (< or >). This guarantees that both parts have approximately the\n   same number of elements */\n  BlockList<SortedVector<Entry>> left(block_list.blocks_.get_allocator().resource(),\n                                      block_list.block_size_);\n  BlockList<SortedVector<Entry>> right(block_list.blocks_.get_allocator().resource(),\n                                       block_list.block_size_);\n  absl::InlinedVector<Entry, 1> median_entries;\n\n  left.ReserveBlocks(block_list.blocks_.size() / 2 + 1);\n  right.ReserveBlocks(block_list.blocks_.size() / 2 + 1);\n\n  double lmin = std::numeric_limits<double>::infinity(), rmin = lmin;\n  double lmax = -std::numeric_limits<double>::infinity(), rmax = lmax;\n\n  for (const Entry& entry : block_list) {\n    if (entry.second < median_value) {\n      left.PushBack(entry);\n      lmin = std::min(lmin, entry.second);\n      lmax = std::max(lmax, entry.second);\n    } else if (entry.second > median_value) {\n      right.PushBack(entry);\n      rmin = std::min(rmin, entry.second);\n      rmax = std::max(rmax, entry.second);\n    } else {\n      median_entries.push_back(entry);\n    }\n  }\n  block_list.Clear();\n\n  if (left.Size() < right.Size()) {\n    // If left is smaller, we can add median entries to it\n    // We need to change median value to the right part and update lmax\n    lmax = median_value;\n    lmin = std::min(lmin, median_value);\n    median_value = rmin;\n    for (const auto& entry : median_entries) {\n      left.Insert(entry);\n    }\n  } else {\n    // If right part is smaller, we can add median entries to it\n    // Median value is still the same\n    rmax = std::max(rmax, median_value);\n    for (const auto& entry : median_entries) {\n      right.Insert(entry);\n    }\n  }\n\n  return {std::move(left), std::move(right), median_value, lmin, lmax, rmax};\n}\n\ntemplate <typename C> bool BlockList<C>::Insert(ElementType t) {\n  auto block = FindBlock(t);\n  if (block == blocks_.end())\n    block = blocks_.insert(blocks_.end(), C{blocks_.get_allocator().resource()});\n\n  if (!block->Insert(std::move(t)))\n    return false;\n\n  size_++;\n  TrySplit(block);\n  return true;\n}\n\ntemplate <typename C> bool BlockList<C>::PushBack(ElementType t) {\n  // If the last block is full, after insert we will need to split it\n  // So we can prevent split by creating a new block and inserting there\n  if (blocks_.empty() || ShouldSplit(blocks_.back().Size() + 1)) {\n    blocks_.insert(blocks_.end(), C{blocks_.get_allocator().resource()});\n  }\n\n  if (!blocks_.back().Insert(std::move(t)))\n    return false;\n\n  size_++;\n  return true;\n}\n\ntemplate <typename C> bool BlockList<C>::Remove(ElementType t) {\n  if (auto block = FindBlock(t); block != blocks_.end() && block->Remove(std::move(t))) {\n    size_--;\n    TryMerge(block);\n    return true;\n  }\n\n  return false;\n}\n\ntemplate <typename Container>\nDefragmentResult BlockList<Container>::Defragment(PageUsage* page_usage) {\n  if (page_usage->QuotaDepleted()) {\n    return DefragmentResult{.quota_depleted = true, .objects_moved = 0};\n  }\n\n  DefragmentResult result;\n  if (DefragmentVector(blocks_, page_usage)) {\n    result.objects_moved += 1;\n  }\n\n  for (Container& block : blocks_) {\n    if (result.Merge(block.Defragment(page_usage)).quota_depleted) {\n      break;\n    }\n  }\n  return result;\n}\n\ntemplate <typename C> typename BlockList<C>::BlockIt BlockList<C>::FindBlock(const ElementType& t) {\n  DCHECK(blocks_.empty() || !blocks_.back().Empty());\n\n  if (!blocks_.empty() && t >= *blocks_.back().begin())\n    return --blocks_.end();\n\n  // Find first block that can't contain t\n  auto it = std::upper_bound(blocks_.begin(), blocks_.end(), t,\n                             [](const ElementType& t, const C& l) { return *l.begin() > t; });\n\n  // Move to previous if possible\n  if (it != blocks_.begin())\n    --it;\n\n  DCHECK(it == blocks_.begin() || it->Size() * 2 >= block_size_);\n  DCHECK(it == blocks_.end() || it->Size() <= 2 * block_size_);\n  return it;\n}\n\ntemplate <typename C> bool BlockList<C>::ShouldSplit(size_t block_size) const {\n  return block_size >= block_size_ * 2;\n}\n\ntemplate <typename C> void BlockList<C>::TryMerge(BlockIt block) {\n  if (block->Size() == 0) {\n    blocks_.erase(block);\n    return;\n  }\n\n  if (block->Size() >= block_size_ / 2 || block == blocks_.begin())\n    return;\n\n  // Merge strictly right with left to benefit from tail insert optimizations\n  size_t idx = std::distance(blocks_.begin(), block);\n  blocks_[idx - 1].Merge(std::move(*block));\n  blocks_.erase(block);\n\n  TrySplit(blocks_.begin() + (idx - 1));  // to not overgrow it\n}\n\ntemplate <typename C> void BlockList<C>::TrySplit(BlockIt block) {\n  if (!ShouldSplit(block->Size() + 1))\n    return;\n\n  auto [left, right] = std::move(*block).Split();\n\n  *block = std::move(right);\n  blocks_.insert(block, std::move(left));\n}\n\ntemplate <typename C> void BlockList<C>::ReserveBlocks(size_t n) {\n  blocks_.reserve(n);\n}\n\ntemplate <typename C>\ntypename BlockList<C>::BlockListIterator& BlockList<C>::BlockListIterator::operator++() {\n  ++block_it;\n  if (block_it == block_end) {\n    ++it;\n    if (it != it_end) {\n      block_it = it->begin();\n      block_end = it->end();\n    } else {\n      block_it = {};\n      block_end = {};\n    }\n  }\n  return *this;\n}\n\ntemplate <typename C> void BlockList<C>::BlockListIterator::SeekGE(DocId min_doc_id) {\n  if (it == it_end) {\n    block_it = {};\n    block_end = {};\n    return;\n  }\n\n  auto extract_doc_id = [](const auto& value) {\n    using T = std::decay_t<decltype(value)>;\n    if constexpr (std::is_same_v<T, DocId>) {\n      return value;\n    } else {\n      return value.first;\n    }\n  };\n\n  auto needed_block = [&](const auto& it) {\n    return it->begin() != it->end() && min_doc_id <= extract_doc_id(it->Back());\n  };\n\n  // Choose the first block that has the last element >= min_doc_id\n  if (!needed_block(it)) {\n    while (++it != it_end) {\n      if (needed_block(it)) {\n        block_it = it->begin();\n        block_end = it->end();\n        break;\n      }\n    }\n    if (it == it_end) {\n      block_it = {};\n      block_end = {};\n      return;\n    }\n  }\n\n  BasicSeekGE(min_doc_id, block_end, &block_it);\n  DCHECK(block_it != block_end && min_doc_id <= extract_doc_id(*block_it));\n}\n\ntemplate class BlockList<CompressedSortedSet>;\ntemplate class BlockList<SortedVector<DocId>>;\ntemplate class BlockList<SortedVector<std::pair<DocId, double>>>;\n\ntemplate <typename T> bool SortedVector<T>::Insert(T t) {\n  if (entries_.empty() || t > entries_.back()) {\n    entries_.push_back(t);\n    return true;\n  }\n\n  auto it = std::lower_bound(entries_.begin(), entries_.end(), t);\n  if (it != entries_.end() && *it == t)\n    return false;\n\n  entries_.insert(it, t);\n  return true;\n}\n\ntemplate <typename T> bool SortedVector<T>::Remove(T t) {\n  auto it = std::lower_bound(entries_.begin(), entries_.end(), t);\n  if (it != entries_.end() && *it == t) {\n    entries_.erase(it);\n    return true;\n  }\n  return false;\n}\n\ntemplate <typename T> void SortedVector<T>::Merge(SortedVector&& other) {\n  // NLog compexity in theory, but in practice used only to merge with larger values.\n  // Tail insert optimization makes it linear\n  entries_.reserve(entries_.size() + other.entries_.size());\n  for (T& t : other.entries_)\n    Insert(std::move(t));\n}\n\ntemplate <typename T> std::pair<SortedVector<T>, SortedVector<T>> SortedVector<T>::Split() && {\n  PMR_NS::vector<T> tail(entries_.begin() + entries_.size() / 2, entries_.end());\n  entries_.resize(entries_.size() / 2);\n\n  return std::make_pair(std::move(*this), SortedVector<T>{std::move(tail)});\n}\n\ntemplate <typename T> DefragmentResult SortedVector<T>::Defragment(PageUsage* page_usage) {\n  if (DefragmentVector(entries_, page_usage)) {\n    return DefragmentResult{.quota_depleted = false, .objects_moved = 1};\n  }\n  return DefragmentResult{};\n}\n\ntemplate class SortedVector<DocId>;\ntemplate class SortedVector<std::pair<DocId, double>>;\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/block_list.h",
    "content": "#pragma once\n\n#include <algorithm>\n#include <cstdint>\n#include <iterator>\n#include <vector>\n\n#include \"core/search/base.h\"\n#include \"core/search/compressed_sorted_set.h\"\n\nnamespace dfly::search {\n\n// Forward declarations\nstruct SplitResult;\ntemplate <typename Container> class BlockList;\ntemplate <typename T> class SortedVector;\n\n/* Split into two blocks, left and right, so that both blocks have approximately the same number\n   of elements. Returns median value of the split. Garantees that median present in the right\n   block and not present in the left block. Does not work for empty BlockList. */\n// TODO: Move to RangeTree logic\nSplitResult Split(BlockList<SortedVector<std::pair<DocId, double>>>&& result);\n\n// BlockList is a container wrapper for CompressedSortedSet / vector<DocId>\n// to divide the full sorted id range into separate blocks. This reduces modification\n// complexity from O(N) to O(logN + K), where K is the max block size.\n//\n// It tries to balance block sizes in the range [block_size / 2, block_size * 2]\n// by splitting or merging nodes when needed.\n// container must have declare ElementType typename\ntemplate <typename Container /* underlying container */> class BlockList {\n private:\n  using BlockIt = typename PMR_NS::vector<Container>::iterator;\n  using ConstBlockIt = typename PMR_NS::vector<Container>::const_iterator;\n  using ElementType = typename Container::ElementType;\n\n public:\n  BlockList(PMR_NS::memory_resource* mr, size_t block_size = 1000)\n      : block_size_{block_size}, blocks_(mr) {\n  }\n\n  BlockList(const BlockList& other) = default;\n\n  BlockList(BlockList&& other) noexcept {\n    // Consider not to do move if block_size_ is different\n    // DCHECK(block_size_ == other.block_size_);\n    // It seams there is bugs in BaseStringIndex\n    // because this check fails for it\n\n    size_ = other.size_;\n    blocks_ = std::move(other.blocks_);\n    other.Clear();\n  }\n\n  BlockList& operator=(const BlockList& other) = delete;\n  BlockList& operator=(BlockList&& other) = delete;\n\n  ~BlockList() = default;\n\n  // Insert element, returns true if inserted, false if already present.\n  bool Insert(ElementType t);\n  bool PushBack(ElementType t);\n\n  // Remove element, returns true if removed, false if not found.\n  bool Remove(ElementType t);\n\n  size_t Size() const {\n    return size_;\n  }\n\n  size_t size() const {\n    return size_;\n  }\n\n  bool Empty() const {\n    return size_ == 0;\n  }\n\n  void Clear() {\n    size_ = 0;\n    blocks_.clear();\n  }\n\n  struct BlockListIterator : public SeekableTag {\n    // To make it work with std container contructors\n    using iterator_category = std::forward_iterator_tag;\n    using difference_type = std::ptrdiff_t;\n    using value_type = ElementType;\n    using pointer = ElementType*;\n    using reference = ElementType&;\n\n    ElementType operator*() const {\n      return *block_it;\n    }\n\n    BlockListIterator& operator++();\n    void SeekGE(DocId min_doc_id);\n\n    friend class BlockList;\n\n    bool operator==(const BlockListIterator& other) const {\n      return it == other.it && block_it == other.block_it;\n    }\n\n    bool operator!=(const BlockListIterator& other) const {\n      return !operator==(other);\n    }\n\n   private:\n    BlockListIterator(ConstBlockIt begin, ConstBlockIt end) : it(begin), it_end(end) {\n      if (it != it_end) {\n        block_it = it->begin();\n        block_end = it->end();\n      }\n    }\n\n    ConstBlockIt it, it_end;\n    typename Container::iterator block_it, block_end;\n  };\n\n  BlockListIterator begin() const {\n    return BlockListIterator{blocks_.begin(), blocks_.end()};\n  }\n\n  BlockListIterator end() const {\n    return BlockListIterator{blocks_.end(), blocks_.end()};\n  }\n\n  DefragmentResult Defragment(PageUsage* page_usage);\n\n private:\n  // Find block that should contain t. Returns end() only if empty\n  BlockIt FindBlock(const ElementType& t);\n\n  bool ShouldSplit(size_t block_size) const;\n\n  void TryMerge(BlockIt block);  // If needed, merge with previous block\n  void TrySplit(BlockIt block);  // If needed, split into two blocks\n\n  void ReserveBlocks(size_t n);\n\n  friend SplitResult Split(BlockList<SortedVector<std::pair<DocId, double>>>&& block_list);\n\n private:\n  const size_t block_size_ = 1000;\n  size_t size_ = 0;\n  PMR_NS::vector<Container> blocks_;\n};\n\n// Supports Insert and Remove operations for keeping a sorted vector internally.\n// Wrapper to use vectors with BlockList\ntemplate <typename T> class SortedVector {\n public:\n  using ElementType = T;\n\n  explicit SortedVector(PMR_NS::memory_resource* mr) : entries_(mr) {\n  }\n\n  bool Insert(T t);\n  bool Remove(T t);\n  void Merge(SortedVector<T>&& other);\n  std::pair<SortedVector<T>, SortedVector<T>> Split() &&;\n\n  T& operator[](size_t idx) {\n    return entries_[idx];\n  }\n\n  const T& operator[](size_t idx) const {\n    return entries_[idx];\n  }\n\n  size_t Size() const {\n    return entries_.size();\n  }\n\n  bool Empty() const {\n    return entries_.empty();\n  }\n\n  void Clear() {\n    entries_.clear();\n  }\n\n  const T& Back() const {\n    return entries_.back();\n  }\n\n  using iterator = typename PMR_NS::vector<T>::const_iterator;\n\n  iterator begin() const {\n    return entries_.cbegin();\n  }\n\n  iterator end() const {\n    return entries_.cend();\n  }\n\n  DefragmentResult Defragment(PageUsage* page_usage);\n\n private:\n  SortedVector(PMR_NS::vector<T>&& v) : entries_{std::move(v)} {\n  }\n\n  PMR_NS::vector<T> entries_;\n};\n\nextern template class SortedVector<DocId>;\nextern template class SortedVector<std::pair<DocId, double>>;\n\nextern template class BlockList<CompressedSortedSet>;\nextern template class BlockList<SortedVector<DocId>>;\nextern template class BlockList<SortedVector<std::pair<DocId, double>>>;\n\n// Used by Split method\nstruct SplitResult {\n  using Container = BlockList<SortedVector<std::pair<DocId, double>>>;\n\n  Container left;\n  Container right;\n\n  // Median value of split, used as minimum value of right block\n  double median;\n\n  // Min/max values of left (lmin, lmax) and right (rmin=median, rmax) blocks\n  double lmin, lmax, rmax;\n};\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/block_list_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/block_list.h\"\n\n#include <absl/container/btree_set.h>\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n\n#include <algorithm>\n#include <random>\n#include <set>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nnamespace dfly::search {\n\nusing namespace std;\n\ntemplate <typename C> class TemplatedBlockListTest : public testing::Test {\n private:\n  using NumericType = long long;\n\n public:\n  using ElementType = typename C::ElementType;\n\n  auto Make() {\n    // Create list with small block size to test blocking mechanism more extensively\n    return BlockList<C>{PMR_NS::get_default_resource(), 10};\n  }\n\n  auto AddNewBlockListElement(DocId doc_id) {\n    if constexpr (std::is_same_v<ElementType, DocId>) {\n      return ElementType{doc_id};\n    } else {\n      static_assert(std::is_same_v<ElementType, std::pair<DocId, double>>,\n                    \"Unsupported ElementType for BlockListTest\");\n\n      const NumericType number = dist_(rnd_);\n      id_to_values_[doc_id].push_back(number);\n      return ElementType{doc_id, static_cast<double>(number)};\n    }\n  }\n\n  auto RemoveBlockListElement(DocId doc_id) {\n    if constexpr (std::is_same_v<ElementType, DocId>) {\n      return ElementType{doc_id};\n    } else {\n      static_assert(std::is_same_v<ElementType, std::pair<DocId, double>>,\n                    \"Unsupported ElementType for BlockListTest\");\n\n      const NumericType number = id_to_values_[doc_id].back();\n      id_to_values_[doc_id].pop_back();\n      return ElementType{doc_id, static_cast<double>(number)};\n    }\n  }\n\n  DocId GetDocId(const ElementType& element) {\n    if constexpr (std::is_same_v<ElementType, DocId>) {\n      return element;\n    } else {\n      static_assert(std::is_same_v<ElementType, std::pair<DocId, double>>,\n                    \"Unsupported ElementType for GetDocId\");\n      return element.first;\n    }\n  }\n\n private:\n  // Used to save doubles for std::pair<DocId, double>\n  std::unordered_map<DocId, std::vector<NumericType>> id_to_values_;\n\n  // Used to generate random numbers for std::pair<DocId, double>\n  default_random_engine rnd_;\n  uniform_int_distribution<NumericType> dist_{std::numeric_limits<NumericType>::min(),\n                                              std::numeric_limits<NumericType>::max()};\n};\n\nusing ContainerTypes = ::testing::Types<CompressedSortedSet, SortedVector<DocId>,\n                                        SortedVector<std::pair<DocId, double>>>;\nTYPED_TEST_SUITE(TemplatedBlockListTest, ContainerTypes);\n\nTYPED_TEST(TemplatedBlockListTest, LoopMidInsertErase) {\n  using ElementType = typename TypeParam::ElementType;\n\n  const size_t kNumElements = 50;\n  auto list = this->Make();\n\n  for (size_t i = 0; i < kNumElements / 2; i++) {\n    list.Insert(this->AddNewBlockListElement(i));\n    list.Insert(this->AddNewBlockListElement(i + kNumElements / 2));\n  }\n\n  vector<ElementType> out(list.begin(), list.end());\n  ASSERT_EQ(list.Size(), kNumElements);\n  ASSERT_EQ(out.size(), kNumElements);\n  for (size_t i = 0; i < kNumElements; i++)\n    ASSERT_EQ(this->GetDocId(out[i]), i);\n\n  for (size_t i = 0; i < kNumElements / 2; i++) {\n    list.Remove(this->RemoveBlockListElement(i));\n    list.Remove(this->RemoveBlockListElement(i + kNumElements / 2));\n  }\n\n  out = {list.begin(), list.end()};\n  EXPECT_EQ(out.size(), 0u);\n}\n\nTYPED_TEST(TemplatedBlockListTest, InsertReverseRemoveSteps) {\n  using ElementType = typename TypeParam::ElementType;\n\n  const size_t kNumElements = 1000;\n  auto list = this->Make();\n\n  for (size_t i = 0; i < kNumElements; i++) {\n    list.Insert(this->AddNewBlockListElement(kNumElements - i - 1));\n  }\n\n  for (size_t deleted_pref = 0; deleted_pref < 10; deleted_pref++) {\n    vector<ElementType> out{list.begin(), list.end()};\n    reverse(out.begin(), out.end());\n\n    EXPECT_EQ(out.size(), kNumElements / 10 * (10 - deleted_pref));\n    for (size_t i = 0; i < kNumElements; i++) {\n      if (i % 10 >= deleted_pref) {\n        EXPECT_EQ(this->GetDocId(out.back()), DocId(i));\n        out.pop_back();\n      }\n    }\n\n    for (size_t i = 0; i < kNumElements; i++) {\n      if (i % 10 == deleted_pref)\n        list.Remove(this->RemoveBlockListElement(i));\n    }\n  }\n\n  EXPECT_EQ(list.Size(), 0u);\n}\n\nTYPED_TEST(TemplatedBlockListTest, RandomNumbers) {\n  using ElementType = typename TypeParam::ElementType;\n\n  const size_t kNumIterations = 1'000;\n  auto list = this->Make();\n  std::set<ElementType> list_copy;\n\n  for (size_t i = 0; i < kNumIterations; i++) {\n    if (list_copy.size() > 100 && rand() % 5 == 0) {\n      auto it = list_copy.begin();\n      std::advance(it, rand() % list_copy.size());\n      list.Remove(*it);\n      list_copy.erase(it);\n    } else {\n      const ElementType t = this->AddNewBlockListElement(rand() % 1'000'000);\n      list.Insert(t);\n      list_copy.insert(t);\n    }\n\n    ASSERT_TRUE(std::equal(list.begin(), list.end(), list_copy.begin(), list_copy.end()));\n  }\n}\n\nclass BlockListTest : public testing::Test {\n protected:\n};\n\nTEST_F(BlockListTest, Split) {\n  BlockList<SortedVector<std::pair<DocId, double>>> bl{PMR_NS::get_default_resource(), 20};\n\n  const size_t max_value = 100.0;\n  const size_t step = 23.0;\n  size_t value = max_value;\n  for (size_t i = 0; i < 100; i++) {\n    bl.Insert({i, static_cast<double>(value)});\n    value = (max_value + value - step) % max_value;\n  }\n\n  auto split_result = Split(std::move(bl));\n  auto& left = split_result.left;\n  auto& right = split_result.right;\n\n  EXPECT_EQ(left.Size(), 50);\n  EXPECT_EQ(right.Size(), 50);\n\n  // Test that all values in the left part are less than or equal to max_value\n  for (const auto& [_, left_value] : left) {\n    for (const auto& [__, right_value] : right) {\n      EXPECT_LE(left_value, right_value);\n    }\n  }\n\n  double median = split_result.median;\n\n  // Test that left part values do not have this median\n  for (const auto& [_, left_value] : left) {\n    EXPECT_NE(left_value, median);\n  }\n\n  // Test that right part values do have this median\n  bool is_median_found = false;\n  for (const auto& [_, right_value] : right) {\n    if (right_value == median) {\n      is_median_found = true;\n      break;\n    }\n  }\n\n  EXPECT_TRUE(is_median_found);\n\n  // Test that doc_ids in both parts are sorted\n  DocId prev_doc_id = std::numeric_limits<DocId>::min();\n  for (const auto& [doc_id, _] : left) {\n    EXPECT_GE(doc_id, prev_doc_id);\n    prev_doc_id = doc_id;\n  }\n\n  prev_doc_id = std::numeric_limits<DocId>::min();\n  for (const auto& [doc_id, _] : right) {\n    EXPECT_GE(doc_id, prev_doc_id);\n    prev_doc_id = doc_id;\n  }\n}\n\nTEST_F(BlockListTest, SplitHard) {\n  // First test 70 values on the left and 30 on the right\n  BlockList<SortedVector<std::pair<DocId, double>>> bl1{PMR_NS::get_default_resource(), 20};\n\n  for (size_t i = 0; i < 70; i++) {\n    bl1.Insert({i, 1.0});\n  }\n  for (size_t i = 70; i < 100; i++) {\n    bl1.Insert({i, 2.0});\n  }\n\n  auto split_result1 = Split(std::move(bl1));\n\n  EXPECT_EQ(split_result1.median, 2.0);\n  EXPECT_EQ(split_result1.left.Size(), 70u);\n  EXPECT_EQ(split_result1.right.Size(), 30u);\n\n  for (const auto& [_, value] : split_result1.left) {\n    EXPECT_EQ(value, 1.0);\n  }\n\n  for (const auto& [_, value] : split_result1.right) {\n    EXPECT_EQ(value, 2.0);\n  }\n\n  // Now test 30 values on the left and 70 on the right\n  BlockList<SortedVector<std::pair<DocId, double>>> bl2{PMR_NS::get_default_resource(), 20};\n  for (size_t i = 0; i < 30; i++) {\n    bl2.Insert({i, 1.0});\n  }\n  for (size_t i = 30; i < 100; i++) {\n    bl2.Insert({i, 2.0});\n  }\n  auto split_result2 = Split(std::move(bl2));\n\n  EXPECT_EQ(split_result2.median, 2.0);\n  EXPECT_EQ(split_result2.left.Size(), 30u);\n  EXPECT_EQ(split_result2.right.Size(), 70u);\n\n  for (const auto& [_, value] : split_result2.left) {\n    EXPECT_EQ(value, 1.0);\n  }\n\n  for (const auto& [_, value] : split_result2.right) {\n    EXPECT_EQ(value, 2.0);\n  }\n}\n\nTEST_F(BlockListTest, SplitSingleDoubleValue) {\n  BlockList<SortedVector<std::pair<DocId, double>>> bl{PMR_NS::get_default_resource(), 20};\n\n  for (size_t i = 0; i < 100; i++) {\n    bl.Insert({i, 1.0});\n  }\n\n  auto split_result = Split(std::move(bl));\n  auto& left = split_result.left;\n  auto& right = split_result.right;\n\n  EXPECT_EQ(left.Size(), 0u);\n  EXPECT_EQ(right.Size(), 100u);\n  EXPECT_EQ(split_result.median, 1.0);\n}\n\nstatic void BM_Erase90PctTail(benchmark::State& state) {\n  BlockList<CompressedSortedSet> bl{PMR_NS::get_default_resource()};\n\n  unsigned size = state.range(0);\n  for (size_t i = 0; i < size; i++)\n    bl.Insert(i);\n\n  size_t base = size / 10;\n  size_t i = 0;\n  while (state.KeepRunning()) {\n    benchmark::DoNotOptimize(bl.Remove(base + i));\n    i = (i + 1) % (size * 9 / 10);\n  }\n}\n\nBENCHMARK(BM_Erase90PctTail)->Args({100'000});\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/compressed_sorted_set.cc",
    "content": "#include \"core/search/compressed_sorted_set.h\"\n\n#include <array>\n#include <bitset>\n\n#include \"absl/types/span.h\"\n#include \"base/flit.h\"\n#include \"base/logging.h\"\n\nnamespace dfly::search {\n\nusing namespace std;\n\nnamespace {\n\nusing VarintBuffer = array<uint8_t, sizeof(CompressedSortedSet::IntType) * 3>;\n\n}  // namespace\n\nCompressedSortedSet::CompressedSortedSet(PMR_NS::memory_resource* mr) : diffs_{mr} {\n}\n\nCompressedSortedSet::ConstIterator::ConstIterator(const CompressedSortedSet& list)\n    : stash_{}, diffs_{list.diffs_} {\n  ReadNext();\n}\n\nCompressedSortedSet::IntType CompressedSortedSet::ConstIterator::operator*() const {\n  DCHECK(stash_);\n  return *stash_;\n}\n\nCompressedSortedSet::ConstIterator& CompressedSortedSet::ConstIterator::operator++() {\n  ReadNext();\n  return *this;\n}\n\nbool operator==(const CompressedSortedSet::ConstIterator& l,\n                const CompressedSortedSet::ConstIterator& r) {\n  return l.diffs_.data() == r.diffs_.data() && l.diffs_.size() == r.diffs_.size();\n}\n\nbool operator!=(const CompressedSortedSet::ConstIterator& l,\n                const CompressedSortedSet::ConstIterator& r) {\n  return !(l == r);\n}\n\nvoid CompressedSortedSet::ConstIterator::ReadNext() {\n  if (diffs_.empty()) {\n    stash_ = nullopt;\n    last_read_ = {nullptr, 0};\n    diffs_ = {nullptr, 0};\n    return;\n  }\n\n  IntType base = stash_.value_or(0);\n  auto [diff, read] = CompressedSortedSet::ReadVarLen(diffs_);\n\n  stash_ = base + diff;\n  last_read_ = diffs_.subspan(0, read);\n  diffs_.remove_prefix(read);\n}\n\nCompressedSortedSet::ConstIterator CompressedSortedSet::begin() const {\n  return ConstIterator{*this};\n}\n\nCompressedSortedSet::ConstIterator CompressedSortedSet::end() const {\n  return ConstIterator{};\n}\n\n// Simply encode difference and add to end of diffs array\nvoid CompressedSortedSet::PushBackDiff(IntType diff) {\n  size_++;\n\n  VarintBuffer buf;\n  auto diff_span = WriteVarLen(diff, absl::MakeSpan(buf));\n  diffs_.insert(diffs_.end(), diff_span.begin(), diff_span.end());\n}\n\n// Do a linear scan by encoding all diffs to find value\nCompressedSortedSet::EntryLocation CompressedSortedSet::LowerBound(IntType value) const {\n  auto it = begin(), prev_it = end(), next_it = end();\n  while (it != end()) {\n    next_it = it;\n    if (*it >= value || ++next_it == end())\n      break;\n    prev_it = it;\n    it = next_it;\n  }\n\n  return EntryLocation{.value = it.stash_.value_or(0),\n                       .prev_value = prev_it.stash_.value_or(0),\n                       .diff_span = it.last_read_};\n}\n\n// Insert has linear complexity. It tries to find between which two entries A and B the new value V\n// needs to be inserted. Then it computes the differences dif1 = V - A and diff2 = B - V that need\n// to be stored to encode the triple A V B. Those are stored where diff0 = B - A was previously\n// stored, possibly extending the vector\nbool CompressedSortedSet::Insert(IntType value) {\n  if (tail_value_ && *tail_value_ == value)\n    return false;\n\n  if (tail_value_ && value > *tail_value_) {\n    PushBackDiff(value - *tail_value_);\n    tail_value_ = value;\n    return true;\n  }\n\n  auto bound = LowerBound(value);\n\n  // At least one element was read and it's equal to value: return to avoid duplicate\n  if (bound.value == value && !bound.diff_span.empty())\n    return false;\n\n  // Value is bigger than any other (or list is empty): append required diff at the end\n  if (value > bound.value || bound.diff_span.empty()) {\n    PushBackDiff(value - bound.value);\n    tail_value_ = value;\n    return true;\n  }\n\n  size_++;\n\n  // Now the list certainly contains the bound B > V and possibly A < V (or 0 by default),\n  // so we need to encode both differences diff1 and diff2\n  DCHECK_GT(bound.value, value);\n  DCHECK_LE(bound.prev_value, value);\n\n  // Compute and encode new diff1 and diff2 into buf1 and buf2 respectivaly\n  VarintBuffer buf1, buf2;\n  auto diff1_span = WriteVarLen(value - bound.prev_value, absl::MakeSpan(buf1));\n  auto diff2_span = WriteVarLen(bound.value - value, absl::MakeSpan(buf2));\n\n  // Extend the location where diff0 is stored with optional zeros before overwriting it\n  ptrdiff_t diff_offset = bound.diff_span.data() - diffs_.data();\n  size_t required_len = diff1_span.size() + diff2_span.size();\n  DCHECK_LE(bound.diff_span.size(), required_len);  // It can't shrink for sure\n  diffs_.insert(diffs_.begin() + diff_offset, required_len - bound.diff_span.size(), 0u);\n\n  // Now overwrite diff0 and 0s with the two new differences\n  copy(diff1_span.begin(), diff1_span.end(), diffs_.begin() + diff_offset);\n  copy(diff2_span.begin(), diff2_span.end(), diffs_.begin() + diff_offset + diff1_span.size());\n\n  return true;\n}\n\n// Remove has linear complexity. It tries to find the element V and its neighbors A and B,\n// which are encoded as diff1 = V - A and diff2 = B - V. Adjacently stored diff1 and diff2\n// need to be replaced with diff3 = diff1 + diff2s\nbool CompressedSortedSet::Remove(IntType value) {\n  auto bound = LowerBound(value);\n\n  // Nothing was read or the element was not found\n  if (bound.diff_span.empty() || bound.value != value)\n    return false;\n\n  // We're removing below unconditionally\n  size_--;\n\n  // Calculate offset where values diff is stored and determine diffs tail\n  ptrdiff_t diff_offset = bound.diff_span.data() - diffs_.data();\n  auto diffs_tail = absl::MakeSpan(diffs_).subspan(diff_offset + bound.diff_span.size());\n\n  // If it's stored at the end, simply truncate it away\n  if (diffs_tail.empty()) {\n    diffs_.resize(diffs_.size() - bound.diff_span.size());\n    tail_value_ = bound.prev_value;\n    if (diffs_.empty())\n      tail_value_ = nullopt;\n    return true;\n  }\n\n  // Now the list certainly contains a succeeding element B > V and possibly A < V (or 0)\n  // Read diff2 and calculate diff3 = diff1 + diff2\n  auto [diff2, diff2_read] = ReadVarLen(diffs_tail);\n  IntType diff3 = (bound.value - bound.prev_value) + diff2;\n\n  // Encode diff3\n  VarintBuffer buf;\n  auto diff3_buf = WriteVarLen(diff3, absl::MakeSpan(buf));\n\n  // Shrink vector before overwriting\n  DCHECK_LE(diff3_buf.size(), diff2_read + bound.diff_span.size());\n  size_t to_remove = diff2_read + bound.diff_span.size() - diff3_buf.size();\n  diffs_.erase(diffs_.begin() + diff_offset, diffs_.begin() + diff_offset + to_remove);\n\n  // Overwrite diff1/diff2 with new diff3\n  copy(diff3_buf.begin(), diff3_buf.end(), diffs_.begin() + diff_offset);\n\n  return true;\n}\n\nvoid CompressedSortedSet::Merge(CompressedSortedSet&& other) {\n  // Quadratic compexity in theory, but in practice used only to merge with larger values.\n  // Tail insert optimization makes it linear\n  for (int v : other)\n    Insert(v);\n}\n\nstd::pair<CompressedSortedSet, CompressedSortedSet> CompressedSortedSet::Split() && {\n  DCHECK_GT(Size(), 5u);\n\n  CompressedSortedSet second(diffs_.get_allocator().resource());\n\n  // Move iterator to middle position and save size of diffs tail\n  auto it = begin();\n  std::advance(it, (size_ - 1) / 2);\n\n  // Save last value in the first set\n  tail_value_ = *it;\n  ++it;\n\n  size_t keep_bytes = it.last_read_.data() - diffs_.data();\n\n  // Copy second half into second set\n  for (; it != end(); ++it)\n    second.Insert(*it);\n\n  // Erase diffs tail\n  diffs_.resize(keep_bytes);\n  size_ -= second.Size();\n\n  return std::make_pair(std::move(*this), std::move(second));\n}\n\n// The leftmost three bits of the first byte store the number of additional bytes. All following\n// bits store the number itself.\nabsl::Span<uint8_t> CompressedSortedSet::WriteVarLen(IntType value, absl::Span<uint8_t> buf) {\n  // TODO: fix flit encoding of large numbers\n  size_t written = base::flit::EncodeT(static_cast<uint64_t>(value), buf.data());\n  return buf.first(written);\n}\n\nstd::pair<CompressedSortedSet::IntType, size_t> CompressedSortedSet::ReadVarLen(\n    absl::Span<const uint8_t> source) {\n  uint64_t out = 0;\n  size_t read = 0;\n\n  // We need this because ParseT may read 8 bytes even if source can be less than that\n  // due to the encoding and we end up accessing an invalid memory location.\n  // (not really a bug because ParseT ignores the extra bytes it reads).\n  if (source.size() < 8) {\n    VarintBuffer ranged_source{0};\n    memcpy(&ranged_source, source.data(), source.size());\n    read = base::flit::ParseT(ranged_source.data(), &out);\n  } else {\n    read = base::flit::ParseT(source.data(), &out);\n  }\n\n  CHECK_LE(out, numeric_limits<IntType>::max());\n  return {out, read};\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/compressed_sorted_set.h",
    "content": "#pragma once\n\n#include <absl/types/span.h>\n\n#include <cstdint>\n#include <iterator>\n#include <optional>\n#include <vector>\n\n#include \"base/logging.h\"\n#include \"base/pmr/memory_resource.h\"\n#include \"core/search/base.h\"\n\nnamespace dfly::search {\n\n// A list of sorted unique integers with reduced memory usage.\n// Only differences between successive elements are stored\n// in a variable length encoding.\nclass CompressedSortedSet {\n public:\n  using IntType = DocId;\n  using ElementType = IntType;\n\n  // Const access iterator that decodes the compressed list on traversal\n  struct ConstIterator {\n    friend class CompressedSortedSet;\n\n    // To make it work with std container contructors\n    using iterator_category = std::forward_iterator_tag;\n    using difference_type = std::ptrdiff_t;\n    using value_type = IntType;\n    using pointer = IntType*;\n    using reference = IntType&;\n\n    IntType operator*() const;\n    ConstIterator& operator++();\n\n    friend class CompressedSortedSet;\n    friend bool operator==(const ConstIterator& l, const ConstIterator& r);\n    friend bool operator!=(const ConstIterator& l, const ConstIterator& r);\n\n    ConstIterator() = default;\n\n   private:\n    explicit ConstIterator(const CompressedSortedSet& list);\n\n    void ReadNext();  // Decode next value to stash\n\n    std::optional<IntType> stash_{};\n    absl::Span<const uint8_t> last_read_{};\n    absl::Span<const uint8_t> diffs_{};\n  };\n\n  using iterator = ConstIterator;\n\n public:\n  explicit CompressedSortedSet(PMR_NS::memory_resource* mr);\n\n  ConstIterator begin() const;\n  ConstIterator end() const;\n\n  bool Insert(IntType value);  // Insert arbitrary element, needs to scan whole list\n  bool Remove(IntType value);  // Remove arbitrary element, needs to scan whole list\n\n  size_t Size() const {\n    return size_;\n  }\n\n  size_t ByteSize() const {\n    return diffs_.size();\n  }\n\n  bool Empty() const {\n    return size_ == 0;\n  }\n\n  void Clear() {\n    size_ = 0;\n    tail_value_.reset();\n    diffs_.clear();\n  }\n\n  // Add all values from other\n  void Merge(CompressedSortedSet&& other);\n\n  // Split into two equally sized halves\n  std::pair<CompressedSortedSet, CompressedSortedSet> Split() &&;\n\n  IntType Back() const {\n    DCHECK(!Empty() && tail_value_.has_value());\n    return tail_value_.value();\n  }\n\n  static DefragmentResult Defragment([[maybe_unused]] PageUsage* page_usage) {\n    return {};\n  }\n\n private:\n  struct EntryLocation {\n    IntType value;                        // Value or 0\n    IntType prev_value;                   // Preceding value or 0\n    absl::Span<const uint8_t> diff_span;  // Location of value encoded diff, empty if none read\n  };\n\n private:\n  // Find EntryLocation of first entry that is not less than value (std::lower_bound)\n  EntryLocation LowerBound(IntType value) const;\n\n  // Push back difference without any decoding. Used only for efficient construction from sorted\n  // list\n  void PushBackDiff(IntType diff);\n\n  // Encode integer with variable length encoding into buf and return written subspan\n  static absl::Span<uint8_t> WriteVarLen(IntType value, absl::Span<uint8_t> buf);\n\n  // Decode integer with variable length encoding from source\n  static std::pair<IntType /*value*/, size_t /*read*/> ReadVarLen(absl::Span<const uint8_t> source);\n\n private:\n  uint32_t size_{0};\n\n  std::optional<IntType> tail_value_{};\n  std::vector<uint8_t, PMR_NS::polymorphic_allocator<uint8_t>> diffs_;\n};\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/compressed_sorted_set_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/compressed_sorted_set.h\"\n\n#include <absl/container/btree_set.h>\n\n#include <algorithm>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/bptree_set.h\"\n\nnamespace dfly::search {\n\nusing namespace std;\n\nnamespace {\n\nstruct SetInserter {\n  using iterator_category = std::forward_iterator_tag;\n  using difference_type = std::ptrdiff_t;\n  using value_type = CompressedSortedSet::IntType;\n  using pointer = value_type*;\n  using reference = value_type&;\n\n  explicit SetInserter(CompressedSortedSet* set) : set_{set} {};\n\n  SetInserter& operator*() {\n    return *this;\n  }\n  SetInserter& operator++() {\n    return *this;\n  }\n\n  SetInserter& operator=(value_type value) {\n    set_->Insert(value);\n    return *this;\n  }\n\n private:\n  CompressedSortedSet* set_;\n};\n\n}  // namespace\n\nclass CompressedSortedSetTest : public ::testing::Test {\n protected:\n};\n\nusing IdVec = vector<uint32_t>;\n\nTEST_F(CompressedSortedSetTest, BasicInsert) {\n  CompressedSortedSet list{PMR_NS::get_default_resource()};\n  IdVec list_copy;\n\n  auto current = [&list]() { return IdVec{list.begin(), list.end()}; };\n  auto add = [&list, &list_copy](uint32_t value) {\n    list.Insert(value);\n    set<uint32_t> list_copy_set{list_copy.begin(), list_copy.end()};\n    list_copy_set.insert(value);\n    list_copy = IdVec{list_copy_set.begin(), list_copy_set.end()};\n  };\n\n  // Check empty list is empty\n  EXPECT_EQ(current(), list_copy);\n\n  // Insert some numbers in sorted order\n  add(10);\n  EXPECT_EQ(current(), list_copy);\n  add(15);\n  EXPECT_EQ(current(), list_copy);\n  add(22);\n  EXPECT_EQ(current(), list_copy);\n  add(25);\n  add(31);\n  EXPECT_EQ(current(), list_copy);\n\n  // Now insert front\n  add(7);\n  EXPECT_EQ(current(), list_copy);\n  add(2);\n  EXPECT_EQ(current(), list_copy);\n\n  // Insert in-between\n  add(13);\n  EXPECT_EQ(current(), list_copy);\n  add(23);\n  add(19);\n  EXPECT_EQ(current(), list_copy);\n  add(30);\n  add(27);\n  EXPECT_EQ(current(), list_copy);\n\n  // Now add some numbers in reverse order\n  add(41);\n  add(40);\n  add(37);\n  add(34);\n  EXPECT_EQ(current(), list_copy);\n\n  // Now add a 0\n  add(0);\n  EXPECT_EQ(current(), list_copy);\n\n  // Make sure all test integers fit into a single byte\n  EXPECT_EQ(list.ByteSize(), list.Size());\n}\n\nTEST_F(CompressedSortedSetTest, BasicInsertLargeValues) {\n  CompressedSortedSet list{PMR_NS::get_default_resource()};\n  IdVec list_copy;\n\n  const uint32_t kBase = 1'000'000'000;\n\n  // Add big integers in reverse order\n  uint32_t base = kBase;\n  while (base > 0) {\n    list.Insert(base);\n    list_copy.insert(list_copy.begin(), base);\n    base /= 10;\n  }\n\n  EXPECT_EQ(IdVec(list.begin(), list.end()), list_copy);\n\n  // Now add neighboring  integers with an offset of one\n  base = kBase;\n  while (base > 0) {\n    list.Insert(base + 1);\n    list_copy.push_back(base + 1);\n    base /= 10;\n  }\n  sort(list_copy.begin(), list_copy.end());\n\n  EXPECT_EQ(IdVec(list.begin(), list.end()), list_copy);\n\n  // Make sure we use at least twice less memory\n  EXPECT_LE(list.ByteSize() * 2, list.Size() * sizeof(uint32_t));\n}\n\nTEST_F(CompressedSortedSetTest, SortedBackInserter) {\n  CompressedSortedSet list{PMR_NS::get_default_resource()};\n\n  vector<uint32_t> v1 = {1, 3, 5};\n  vector<uint32_t> v2 = {2, 4, 6};\n\n  merge(v1.begin(), v1.end(), v2.begin(), v2.end(), SetInserter{&list});\n\n  EXPECT_EQ(IdVec(list.begin(), list.end()), IdVec({1, 2, 3, 4, 5, 6}));\n}\n\nTEST_F(CompressedSortedSetTest, BasicRemove) {\n  CompressedSortedSet list{PMR_NS::get_default_resource()};\n\n  IdVec values = {1, 3, 4, 7, 8, 11, 15, 17, 20, 22, 27};\n  copy(values.begin(), values.end(), SetInserter{&list});\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n\n  auto remove = [&list, &values](uint32_t value) {\n    values.erase(find(values.begin(), values.end(), value));\n    list.Remove(value);\n  };\n\n  // Remove back and front\n  remove(27);\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n  remove(1);\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n\n  // Remove from middle\n  remove(11);\n  remove(4);\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n  remove(17);\n  remove(8);\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n\n  // Remove non existing\n  list.Remove(16);\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n}\n\nTEST_F(CompressedSortedSetTest, BasicRemoveLargeValues) {\n  CompressedSortedSet list{PMR_NS::get_default_resource()};\n\n  IdVec values = {1, 12, 123, 123'4, 123'45, 123'456, 1'234'567, 12'345'678};\n  copy(values.begin(), values.end(), SetInserter{&list});\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n\n  auto remove = [&list, &values](uint32_t value) {\n    values.erase(find(values.begin(), values.end(), value));\n    list.Remove(value);\n  };\n\n  // Remove from middle\n  remove(123'45);\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n  remove(12);\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n  remove(1'234'567);\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n\n  // Remove front\n  remove(1);\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n\n  // Remove back\n  remove(12'345'678);\n  EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n}\n\nTEST_F(CompressedSortedSetTest, InsertRemoveLargeValues) {\n  CompressedSortedSet list{PMR_NS::get_default_resource()};\n\n  for (int shift = 3; shift < 30; shift++) {\n    uint32_t value = 1u << shift;\n\n    IdVec values{value + 3, value, value - 5};\n    for (auto v : values)\n      list.Insert(v);\n\n    sort(values.begin(), values.end());\n    EXPECT_EQ(IdVec(list.begin(), list.end()), values);\n\n    for (auto v : values)\n      list.Remove(v);\n\n    EXPECT_EQ(IdVec(list.begin(), list.end()), IdVec({}));\n  }\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/hnsw_alg.h",
    "content": "// This file is copied from hnswlib and modified to fit Dragonfly's needs.\n\n#include <hnswlib/hnswalg.h>\n#include <hnswlib/visited_list_pool.h>\n#include <mimalloc.h>\n\n#pragma once\n\nnamespace dfly::search {\n\nenum class HnswErrorStatus : int8_t {\n  SUCCESS = 0,\n  /* markDelete errors */\n  LABEL_NOT_FOUND,\n  ELEMENT_ALREADY_DELETED,\n};\n\ntemplate <typename dist_t> class HierarchicalNSW : public hnswlib::AlgorithmInterface<dist_t> {\n public:\n  using tableint = hnswlib::tableint;\n  using labeltype = hnswlib::labeltype;\n  using linklistsizeint = hnswlib::linklistsizeint;\n  using VisitedListPool = hnswlib::VisitedListPool;\n  using vl_type = hnswlib::vl_type;\n  using VisitedList = hnswlib::VisitedList;\n  using BaseFilterFunctor = hnswlib::BaseFilterFunctor;\n  using BaseSearchStopCondition = hnswlib::BaseSearchStopCondition<dist_t>;\n\n  static const tableint MAX_LABEL_OPERATION_LOCKS = 65536;\n  static const unsigned char DELETE_MARK = 0x01;\n\n  size_t max_elements_{0};\n  mutable std::atomic<size_t> cur_element_count{0};  // current number of elements\n  size_t size_data_per_element_{0};\n  size_t size_links_per_element_{0};\n  mutable std::atomic<size_t> num_deleted_{0};  // number of deleted elements\n  size_t M_{0};\n  size_t maxM_{0};\n  size_t maxM0_{0};\n  size_t ef_construction_{0};\n  size_t ef_{0};\n\n  double mult_{0.0}, revSize_{0.0};\n  int maxlevel_{0};\n\n  std::unique_ptr<VisitedListPool> visited_list_pool_{nullptr};\n\n  // Locks operations with element by label value\n  mutable std::vector<std::mutex> label_op_locks_;\n\n  std::mutex global;\n  std::vector<std::mutex> link_list_locks_;\n\n  tableint enterpoint_node_{0};\n\n  size_t size_links_level0_{0};\n  size_t offsetData_{0}, offsetLevel0_{0}, label_offset_{0};\n\n  char* data_level0_memory_{nullptr};  // Level 0 memory block. Contains links + ptr to data + label\n  char* data_vector_memory_{nullptr};  // Memory block for copied vectors\n  char** linkLists_{nullptr};\n  std::vector<int> element_levels_;  // keeps level of each element\n\n  size_t data_size_{0};\n\n  hnswlib::DISTFUNC<dist_t> fstdistfunc_;\n  void* dist_func_param_{nullptr};\n\n  mutable std::mutex label_lookup_lock;  // lock for label_lookup_\n  std::unordered_map<labeltype, tableint> label_lookup_;\n\n  std::default_random_engine level_generator_;\n  std::default_random_engine update_probability_generator_;\n\n  mutable std::atomic<long> metric_distance_computations{0};\n  mutable std::atomic<long> metric_hops{0};\n\n  bool copy_vector_ = true;\n\n  bool allow_replace_deleted_ =\n      false;  // flag to replace deleted elements (marked as deleted) during insertions\n\n  std::mutex deleted_elements_lock;               // lock for deleted_elements\n  std::unordered_set<tableint> deleted_elements;  // contains internal ids of deleted elements\n\n  HierarchicalNSW(hnswlib::SpaceInterface<dist_t>* s) {\n  }\n\n  HierarchicalNSW(hnswlib::SpaceInterface<dist_t>* s, const std::string& location,\n                  bool nmslib = false, size_t max_elements = 0, bool allow_replace_deleted = false)\n      : allow_replace_deleted_(allow_replace_deleted) {\n    loadIndex(location, s, max_elements);\n  }\n\n  HierarchicalNSW(hnswlib::SpaceInterface<dist_t>* s, size_t max_elements, size_t M = 16,\n                  size_t ef_construction = 200, size_t random_seed = 100, bool copy_vector = true,\n                  bool allow_replace_deleted = false)\n      : label_op_locks_(MAX_LABEL_OPERATION_LOCKS),\n        link_list_locks_(max_elements),\n        element_levels_(max_elements),\n        copy_vector_(copy_vector),\n        allow_replace_deleted_(allow_replace_deleted) {\n    max_elements_ = max_elements;\n    num_deleted_ = 0;\n    data_size_ = s->get_data_size();\n    fstdistfunc_ = s->get_dist_func();\n    dist_func_param_ = s->get_dist_func_param();\n    if (M <= 10000) {\n      M_ = M;\n    } else {\n      HNSWERR << \"warning: M parameter exceeds 10000 which may lead to adverse effects.\"\n              << std::endl;\n      HNSWERR << \"         Cap to 10000 will be applied for the rest of the processing.\"\n              << std::endl;\n      M_ = 10000;\n    }\n    maxM_ = M_;\n    maxM0_ = M_ * 2;\n    ef_construction_ = std::max(ef_construction, M_);\n    ef_ = 10;\n\n    level_generator_.seed(random_seed);\n    update_probability_generator_.seed(random_seed + 1);\n\n    // If we copy vector we don't use pointer to data\n    size_t vector_ptr_size = copy_vector_ ? 0 : sizeof(char*);\n    size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint);\n    size_data_per_element_ = size_links_level0_ + vector_ptr_size + sizeof(labeltype);\n    offsetData_ = size_links_level0_;\n    label_offset_ = size_links_level0_ + vector_ptr_size;\n    offsetLevel0_ = 0;\n\n    data_level0_memory_ = (char*)mi_malloc(max_elements_ * size_data_per_element_);\n    if (data_level0_memory_ == nullptr)\n      throw std::runtime_error(\"Not enough memory\");\n\n    if (copy_vector) {\n      data_vector_memory_ = (char*)mi_malloc(max_elements_ * data_size_);\n      if (data_vector_memory_ == nullptr)\n        throw std::runtime_error(\"Not enough memory\");\n    }\n\n    cur_element_count = 0;\n\n    visited_list_pool_ = std::unique_ptr<VisitedListPool>(new VisitedListPool(1, max_elements));\n\n    // initializations for special treatment of the first node\n    enterpoint_node_ = -1;\n    maxlevel_ = -1;\n\n    linkLists_ = (char**)mi_malloc(sizeof(void*) * max_elements_);\n    if (linkLists_ == nullptr)\n      throw std::runtime_error(\"Not enough memory: HierarchicalNSW failed to allocate linklists\");\n    size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint);\n    mult_ = 1 / log(1.0 * M_);\n    revSize_ = 1.0 / mult_;\n  }\n\n  ~HierarchicalNSW() {\n    clear();\n  }\n\n  void clear() {\n    mi_free(data_level0_memory_);\n    data_level0_memory_ = nullptr;\n    for (tableint i = 0; i < cur_element_count; i++) {\n      if (element_levels_[i] > 0)\n        mi_free(linkLists_[i]);\n    }\n    if (copy_vector_) {\n      mi_free(data_vector_memory_);\n    }\n    mi_free(linkLists_);\n    linkLists_ = nullptr;\n    cur_element_count = 0;\n    visited_list_pool_.reset(nullptr);\n  }\n\n  struct CompareByFirst {\n    constexpr bool operator()(std::pair<dist_t, tableint> const& a,\n                              std::pair<dist_t, tableint> const& b) const noexcept {\n      return a.first < b.first;\n    }\n  };\n\n  void setEf(size_t ef) {\n    ef_ = ef;\n  }\n\n  inline std::mutex& getLabelOpMutex(labeltype label) const {\n    // calculate hash\n    size_t lock_id = label & (MAX_LABEL_OPERATION_LOCKS - 1);\n    return label_op_locks_[lock_id];\n  }\n\n  inline labeltype getExternalLabel(tableint internal_id) const {\n    labeltype return_label;\n    memcpy(&return_label,\n           (data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_),\n           sizeof(labeltype));\n    return return_label;\n  }\n\n  inline void setExternalLabel(tableint internal_id, labeltype label) const {\n    memcpy((data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), &label,\n           sizeof(labeltype));\n  }\n\n  inline char* getDataPtrByInternalId(tableint internal_id) const {\n    return (data_level0_memory_ + internal_id * size_data_per_element_ + offsetData_);\n  }\n\n  // Return pointer to data by internal id\n  inline char* getDataByInternalId(tableint internal_id) const {\n    if (copy_vector_) {\n      return (data_vector_memory_ + internal_id * data_size_);\n    } else {\n      char* unaligned_data_ptr = (char*)(getDataPtrByInternalId(internal_id));\n      char* data_ptr = nullptr;\n      memcpy(static_cast<void*>(&data_ptr), unaligned_data_ptr, sizeof(void*));\n      return data_ptr;\n    }\n  }\n\n  int getRandomLevel(double reverse_size) {\n    std::uniform_real_distribution<double> distribution(0.0, 1.0);\n    double r = -log(distribution(level_generator_)) * reverse_size;\n    return (int)r;\n  }\n\n  size_t getMaxElements() {\n    return max_elements_;\n  }\n\n  size_t getCurrentElementCount() {\n    return cur_element_count;\n  }\n\n  size_t getDeletedCount() {\n    return num_deleted_;\n  }\n\n  std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                      CompareByFirst>\n  searchBaseLayer(tableint ep_id, const void* data_point, int layer) {\n    VisitedList* vl = visited_list_pool_->getFreeVisitedList();\n    vl_type* visited_array = vl->mass;\n    vl_type visited_array_tag = vl->curV;\n\n    std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                        CompareByFirst>\n        top_candidates;\n    std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                        CompareByFirst>\n        candidateSet;\n\n    dist_t lowerBound;\n    if (!isMarkedDeleted(ep_id)) {\n      dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), dist_func_param_);\n      top_candidates.emplace(dist, ep_id);\n      lowerBound = dist;\n      candidateSet.emplace(-dist, ep_id);\n    } else {\n      lowerBound = std::numeric_limits<dist_t>::max();\n      candidateSet.emplace(-lowerBound, ep_id);\n    }\n    visited_array[ep_id] = visited_array_tag;\n\n    while (!candidateSet.empty()) {\n      std::pair<dist_t, tableint> curr_el_pair = candidateSet.top();\n      if ((-curr_el_pair.first) > lowerBound && top_candidates.size() == ef_construction_) {\n        break;\n      }\n      candidateSet.pop();\n\n      tableint curNodeNum = curr_el_pair.second;\n\n      std::unique_lock<std::mutex> lock(link_list_locks_[curNodeNum]);\n\n      int* data;  // = (int *)(linkList0_ + curNodeNum * size_links_per_element0_);\n      if (layer == 0) {\n        data = (int*)get_linklist0(curNodeNum);\n      } else {\n        data = (int*)get_linklist(curNodeNum, layer);\n        //                    data = (int *) (linkLists_[curNodeNum] + (layer - 1) *\n        //                    size_links_per_element_);\n      }\n      size_t size = getListCount((linklistsizeint*)data);\n      tableint* datal = (tableint*)(data + 1);\n\n      __builtin_prefetch((char*)(visited_array + *(data + 1)), 0, 3);\n      __builtin_prefetch((char*)(visited_array + *(data + 1) + 64), 0, 3);\n      __builtin_prefetch(getDataByInternalId(*datal), 0, 3);\n\n      for (size_t j = 0; j < size; j++) {\n        tableint candidate_id = *(datal + j);\n        //                    if (candidate_id == 0) continue;\n\n        // Request prefetching next vector data memory\n        if (j + 1 < size) {\n          __builtin_prefetch(getDataByInternalId(*(datal + j + 1)), 0, 3);\n        }\n\n        if (visited_array[candidate_id] == visited_array_tag)\n          continue;\n        visited_array[candidate_id] = visited_array_tag;\n        char* currObj1 = (getDataByInternalId(candidate_id));\n\n        dist_t dist1 = fstdistfunc_(data_point, currObj1, dist_func_param_);\n        if (top_candidates.size() < ef_construction_ || lowerBound > dist1) {\n          candidateSet.emplace(-dist1, candidate_id);\n\n          __builtin_prefetch(getDataByInternalId(candidateSet.top().second), 0, 3);\n\n          if (!isMarkedDeleted(candidate_id))\n            top_candidates.emplace(dist1, candidate_id);\n\n          if (top_candidates.size() > ef_construction_)\n            top_candidates.pop();\n\n          if (!top_candidates.empty())\n            lowerBound = top_candidates.top().first;\n        }\n      }\n    }\n    visited_list_pool_->releaseVisitedList(vl);\n\n    return top_candidates;\n  }\n\n  // bare_bone_search means there is no check for deletions and stop condition is ignored in return\n  // of extra performance\n  template <bool bare_bone_search = true, bool collect_metrics = false>\n  std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                      CompareByFirst>\n  searchBaseLayerST(tableint ep_id, const void* data_point, size_t ef,\n                    BaseFilterFunctor* isIdAllowed = nullptr,\n                    BaseSearchStopCondition* stop_condition = nullptr) const {\n    VisitedList* vl = visited_list_pool_->getFreeVisitedList();\n    vl_type* visited_array = vl->mass;\n    vl_type visited_array_tag = vl->curV;\n\n    std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                        CompareByFirst>\n        top_candidates;\n    std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                        CompareByFirst>\n        candidate_set;\n\n    dist_t lowerBound;\n    if (bare_bone_search ||\n        (!isMarkedDeleted(ep_id) && ((!isIdAllowed) || (*isIdAllowed)(getExternalLabel(ep_id))))) {\n      char* ep_data = getDataByInternalId(ep_id);\n      dist_t dist = fstdistfunc_(data_point, ep_data, dist_func_param_);\n      lowerBound = dist;\n      top_candidates.emplace(dist, ep_id);\n      if (!bare_bone_search && stop_condition) {\n        stop_condition->add_point_to_result(getExternalLabel(ep_id), ep_data, dist);\n      }\n      candidate_set.emplace(-dist, ep_id);\n    } else {\n      lowerBound = std::numeric_limits<dist_t>::max();\n      candidate_set.emplace(-lowerBound, ep_id);\n    }\n\n    visited_array[ep_id] = visited_array_tag;\n\n    while (!candidate_set.empty()) {\n      std::pair<dist_t, tableint> current_node_pair = candidate_set.top();\n      dist_t candidate_dist = -current_node_pair.first;\n\n      bool flag_stop_search;\n      if (bare_bone_search) {\n        flag_stop_search = candidate_dist > lowerBound;\n      } else {\n        if (stop_condition) {\n          flag_stop_search = stop_condition->should_stop_search(candidate_dist, lowerBound);\n        } else {\n          flag_stop_search = candidate_dist > lowerBound && top_candidates.size() == ef;\n        }\n      }\n      if (flag_stop_search) {\n        break;\n      }\n      candidate_set.pop();\n\n      tableint current_node_id = current_node_pair.second;\n      int* data = (int*)get_linklist0(current_node_id);\n      size_t size = getListCount((linklistsizeint*)data);\n      //                bool cur_node_deleted = isMarkedDeleted(current_node_id);\n      if (collect_metrics) {\n        metric_hops++;\n        metric_distance_computations += size;\n      }\n\n      __builtin_prefetch((char*)(visited_array + *(data + 1)), 0, 3);\n      __builtin_prefetch((char*)(visited_array + *(data + 1) + 64), 0, 3);\n      __builtin_prefetch(getDataByInternalId(*(data + 1)), 0, 3);\n      __builtin_prefetch((char*)(data + 2), 0, 3);\n\n      for (size_t j = 1; j <= size; j++) {\n        int candidate_id = *(data + j);\n        //                    if (candidate_id == 0) continue;\n\n        // Request prefetching next vector data memory\n        if (j + 1 < size) {\n          __builtin_prefetch(getDataByInternalId(*(data + j + 1)), 0, 3);\n        }\n\n        if (!(visited_array[candidate_id] == visited_array_tag)) {\n          visited_array[candidate_id] = visited_array_tag;\n\n          char* currObj1 = (getDataByInternalId(candidate_id));\n          dist_t dist = fstdistfunc_(data_point, currObj1, dist_func_param_);\n\n          bool flag_consider_candidate;\n          if (!bare_bone_search && stop_condition) {\n            flag_consider_candidate = stop_condition->should_consider_candidate(dist, lowerBound);\n          } else {\n            flag_consider_candidate = top_candidates.size() < ef || lowerBound > dist;\n          }\n\n          if (flag_consider_candidate) {\n            candidate_set.emplace(-dist, candidate_id);\n\n            __builtin_prefetch(data_level0_memory_ +\n                                   candidate_set.top().second * size_data_per_element_ +\n                                   offsetLevel0_,  ///////////\n                               0, 3);              ////////////////////////\n\n            if (bare_bone_search ||\n                (!isMarkedDeleted(candidate_id) &&\n                 ((!isIdAllowed) || (*isIdAllowed)(getExternalLabel(candidate_id))))) {\n              top_candidates.emplace(dist, candidate_id);\n              if (!bare_bone_search && stop_condition) {\n                stop_condition->add_point_to_result(getExternalLabel(candidate_id), currObj1, dist);\n              }\n            }\n\n            bool flag_remove_extra = false;\n            if (!bare_bone_search && stop_condition) {\n              flag_remove_extra = stop_condition->should_remove_extra();\n            } else {\n              flag_remove_extra = top_candidates.size() > ef;\n            }\n            while (flag_remove_extra) {\n              tableint id = top_candidates.top().second;\n              top_candidates.pop();\n              if (!bare_bone_search && stop_condition) {\n                stop_condition->remove_point_from_result(getExternalLabel(id),\n                                                         getDataByInternalId(id), dist);\n                flag_remove_extra = stop_condition->should_remove_extra();\n              } else {\n                flag_remove_extra = top_candidates.size() > ef;\n              }\n            }\n\n            if (!top_candidates.empty())\n              lowerBound = top_candidates.top().first;\n          }\n        }\n      }\n    }\n\n    visited_list_pool_->releaseVisitedList(vl);\n    return top_candidates;\n  }\n\n  void getNeighborsByHeuristic2(\n      std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                          CompareByFirst>& top_candidates,\n      const size_t M) {\n    if (top_candidates.size() < M) {\n      return;\n    }\n\n    std::priority_queue<std::pair<dist_t, tableint>> queue_closest;\n    std::vector<std::pair<dist_t, tableint>> return_list;\n    while (top_candidates.size() > 0) {\n      queue_closest.emplace(-top_candidates.top().first, top_candidates.top().second);\n      top_candidates.pop();\n    }\n\n    while (queue_closest.size()) {\n      if (return_list.size() >= M)\n        break;\n      std::pair<dist_t, tableint> curent_pair = queue_closest.top();\n      dist_t dist_to_query = -curent_pair.first;\n      queue_closest.pop();\n      bool good = true;\n\n      for (std::pair<dist_t, tableint> second_pair : return_list) {\n        dist_t curdist = fstdistfunc_(getDataByInternalId(second_pair.second),\n                                      getDataByInternalId(curent_pair.second), dist_func_param_);\n        if (curdist < dist_to_query) {\n          good = false;\n          break;\n        }\n      }\n      if (good) {\n        return_list.push_back(curent_pair);\n      }\n    }\n\n    for (std::pair<dist_t, tableint> curent_pair : return_list) {\n      top_candidates.emplace(-curent_pair.first, curent_pair.second);\n    }\n  }\n\n  linklistsizeint* get_linklist0(tableint internal_id) const {\n    return (linklistsizeint*)(data_level0_memory_ + internal_id * size_data_per_element_ +\n                              offsetLevel0_);\n  }\n\n  linklistsizeint* get_linklist0(tableint internal_id, char* data_level0_memory_) const {\n    return (linklistsizeint*)(data_level0_memory_ + internal_id * size_data_per_element_ +\n                              offsetLevel0_);\n  }\n\n  linklistsizeint* get_linklist(tableint internal_id, int level) const {\n    return (linklistsizeint*)(linkLists_[internal_id] + (level - 1) * size_links_per_element_);\n  }\n\n  linklistsizeint* get_linklist_at_level(tableint internal_id, int level) const {\n    return level == 0 ? get_linklist0(internal_id) : get_linklist(internal_id, level);\n  }\n\n  tableint mutuallyConnectNewElement(\n      const void* data_point, tableint cur_c,\n      std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                          CompareByFirst>& top_candidates,\n      int level, bool isUpdate) {\n    size_t Mcurmax = level ? maxM_ : maxM0_;\n    getNeighborsByHeuristic2(top_candidates, M_);\n    if (top_candidates.size() > M_)\n      throw std::runtime_error(\n          \"Should be not be more than M_ candidates returned by the heuristic\");\n\n    std::vector<tableint> selectedNeighbors;\n    selectedNeighbors.reserve(M_);\n    while (top_candidates.size() > 0) {\n      selectedNeighbors.push_back(top_candidates.top().second);\n      top_candidates.pop();\n    }\n\n    tableint next_closest_entry_point = selectedNeighbors.back();\n\n    {\n      // lock only during the update\n      // because during the addition the lock for cur_c is already acquired\n      std::unique_lock<std::mutex> lock(link_list_locks_[cur_c], std::defer_lock);\n      if (isUpdate) {\n        lock.lock();\n      }\n      linklistsizeint* ll_cur;\n      if (level == 0)\n        ll_cur = get_linklist0(cur_c);\n      else\n        ll_cur = get_linklist(cur_c, level);\n\n      if (*ll_cur && !isUpdate) {\n        throw std::runtime_error(\"The newly inserted element should have blank link list\");\n      }\n      setListCount(ll_cur, selectedNeighbors.size());\n      tableint* data = (tableint*)(ll_cur + 1);\n      for (size_t idx = 0; idx < selectedNeighbors.size(); idx++) {\n        if (data[idx] && !isUpdate)\n          throw std::runtime_error(\"Possible memory corruption\");\n        if (level > element_levels_[selectedNeighbors[idx]])\n          throw std::runtime_error(\"Trying to make a link on a non-existent level\");\n\n        data[idx] = selectedNeighbors[idx];\n      }\n    }\n\n    for (size_t idx = 0; idx < selectedNeighbors.size(); idx++) {\n      std::unique_lock<std::mutex> lock(link_list_locks_[selectedNeighbors[idx]]);\n\n      linklistsizeint* ll_other;\n      if (level == 0)\n        ll_other = get_linklist0(selectedNeighbors[idx]);\n      else\n        ll_other = get_linklist(selectedNeighbors[idx], level);\n\n      size_t sz_link_list_other = getListCount(ll_other);\n\n      if (sz_link_list_other > Mcurmax)\n        throw std::runtime_error(\"Bad value of sz_link_list_other\");\n      if (selectedNeighbors[idx] == cur_c)\n        throw std::runtime_error(\"Trying to connect an element to itself\");\n      if (level > element_levels_[selectedNeighbors[idx]])\n        throw std::runtime_error(\"Trying to make a link on a non-existent level\");\n\n      tableint* data = (tableint*)(ll_other + 1);\n\n      bool is_cur_c_present = false;\n      if (isUpdate) {\n        for (size_t j = 0; j < sz_link_list_other; j++) {\n          if (data[j] == cur_c) {\n            is_cur_c_present = true;\n            break;\n          }\n        }\n      }\n\n      // If cur_c is already present in the neighboring connections of `selectedNeighbors[idx]` then\n      // no need to modify any connections or run the heuristics.\n      if (!is_cur_c_present) {\n        if (sz_link_list_other < Mcurmax) {\n          data[sz_link_list_other] = cur_c;\n          setListCount(ll_other, sz_link_list_other + 1);\n        } else {\n          // finding the \"weakest\" element to replace it with the new one\n          dist_t d_max =\n              fstdistfunc_(getDataByInternalId(cur_c), getDataByInternalId(selectedNeighbors[idx]),\n                           dist_func_param_);\n          // Heuristic:\n          std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                              CompareByFirst>\n              candidates;\n          candidates.emplace(d_max, cur_c);\n\n          for (size_t j = 0; j < sz_link_list_other; j++) {\n            candidates.emplace(\n                fstdistfunc_(getDataByInternalId(data[j]),\n                             getDataByInternalId(selectedNeighbors[idx]), dist_func_param_),\n                data[j]);\n          }\n\n          getNeighborsByHeuristic2(candidates, Mcurmax);\n\n          int indx = 0;\n          while (candidates.size() > 0) {\n            data[indx] = candidates.top().second;\n            candidates.pop();\n            indx++;\n          }\n\n          setListCount(ll_other, indx);\n          // Nearest K:\n          /*int indx = -1;\n          for (int j = 0; j < sz_link_list_other; j++) {\n              dist_t d = fstdistfunc_(getDataByInternalId(data[j]), getDataByInternalId(rez[idx]),\n          dist_func_param_); if (d > d_max) { indx = j; d_max = d;\n              }\n          }\n          if (indx >= 0) {\n              data[indx] = cur_c;\n          } */\n        }\n      }\n    }\n\n    return next_closest_entry_point;\n  }\n\n  void resizeIndex(size_t new_max_elements) {\n    if (new_max_elements < cur_element_count)\n      throw std::runtime_error(\n          \"Cannot resize, max element is less than the current number of elements\");\n\n    visited_list_pool_.reset(new VisitedListPool(1, new_max_elements));\n\n    element_levels_.resize(new_max_elements);\n\n    std::vector<std::mutex>(new_max_elements).swap(link_list_locks_);\n\n    // Reallocate base layer\n    char* data_level0_memory_new =\n        (char*)mi_realloc(data_level0_memory_, new_max_elements * size_data_per_element_);\n    if (data_level0_memory_new == nullptr)\n      throw std::runtime_error(\"Not enough memory: resizeIndex failed to allocate base layer\");\n    data_level0_memory_ = data_level0_memory_new;\n\n    // If we copy vectors, reallocate also vector data memory\n    if (copy_vector_) {\n      char* data_vector_memory_new =\n          (char*)mi_realloc(data_vector_memory_, new_max_elements * data_size_);\n      if (data_vector_memory_new == nullptr)\n        throw std::runtime_error(\"Not enough memory: resizeIndex failed to allocate vector memory\");\n      data_vector_memory_ = data_vector_memory_new;\n    }\n\n    // Reallocate all other layers\n    char** linkLists_new = (char**)mi_realloc(linkLists_, sizeof(void*) * new_max_elements);\n    if (linkLists_new == nullptr)\n      throw std::runtime_error(\"Not enough memory: resizeIndex failed to allocate other layers\");\n    linkLists_ = linkLists_new;\n\n    max_elements_ = new_max_elements;\n  }\n\n  size_t indexFileSize() const {\n    size_t size = 0;\n    size += sizeof(offsetLevel0_);\n    size += sizeof(max_elements_);\n    size += sizeof(cur_element_count);\n    size += sizeof(size_data_per_element_);\n    size += sizeof(label_offset_);\n    size += sizeof(offsetData_);\n    size += sizeof(maxlevel_);\n    size += sizeof(enterpoint_node_);\n    size += sizeof(maxM_);\n\n    size += sizeof(maxM0_);\n    size += sizeof(M_);\n    size += sizeof(mult_);\n    size += sizeof(ef_construction_);\n\n    size += cur_element_count * size_data_per_element_;\n\n    for (size_t i = 0; i < cur_element_count; i++) {\n      unsigned int linkListSize =\n          element_levels_[i] > 0 ? size_links_per_element_ * element_levels_[i] : 0;\n      size += sizeof(linkListSize);\n      size += linkListSize;\n    }\n    return size;\n  }\n\n  void saveIndex(const std::string& location) {\n#if 0\n        std::ofstream output(location, std::ios::binary);\n        std::streampos position;\n\n        writeBinaryPOD(output, offsetLevel0_);\n        writeBinaryPOD(output, max_elements_);\n        writeBinaryPOD(output, cur_element_count);\n        writeBinaryPOD(output, size_data_per_element_);\n        writeBinaryPOD(output, label_offset_);\n        writeBinaryPOD(output, offsetData_);\n        writeBinaryPOD(output, maxlevel_);\n        writeBinaryPOD(output, enterpoint_node_);\n        writeBinaryPOD(output, maxM_);\n\n        writeBinaryPOD(output, maxM0_);\n        writeBinaryPOD(output, M_);\n        writeBinaryPOD(output, mult_);\n        writeBinaryPOD(output, ef_construction_);\n        writeBinaryPOD(output, copy_vector_);\n\n        output.write(data_level0_memory_, cur_element_count * size_data_per_element_);\n\n        if(copy_vector_) {\n          output.write(data_vector_memory_, cur_element_count * data_size_);\n        }\n\n        for (size_t i = 0; i < cur_element_count; i++) {\n            unsigned int linkListSize = element_levels_[i] > 0 ? size_links_per_element_ * element_levels_[i] : 0;\n            writeBinaryPOD(output, linkListSize);\n            if (linkListSize)\n                output.write(linkLists_[i], linkListSize);\n        }\n        output.close();\n#endif\n  }\n\n  void loadIndex(const std::string& location, hnswlib::SpaceInterface<dist_t>* s,\n                 size_t max_elements_i = 0) {\n#if 0\n        std::ifstream input(location, std::ios::binary);\n\n        if (!input.is_open())\n            throw std::runtime_error(\"Cannot open file\");\n\n        clear();\n        // get file size:\n        input.seekg(0, input.end);\n        std::streampos total_filesize = input.tellg();\n        input.seekg(0, input.beg);\n\n        readBinaryPOD(input, offsetLevel0_);\n        readBinaryPOD(input, max_elements_);\n        readBinaryPOD(input, cur_element_count);\n\n        size_t max_elements = max_elements_i;\n        if (max_elements < cur_element_count)\n            max_elements = max_elements_;\n        max_elements_ = max_elements;\n        readBinaryPOD(input, size_data_per_element_);\n        readBinaryPOD(input, label_offset_);\n        readBinaryPOD(input, offsetData_);\n        readBinaryPOD(input, maxlevel_);\n        readBinaryPOD(input, enterpoint_node_);\n\n        readBinaryPOD(input, maxM_);\n        readBinaryPOD(input, maxM0_);\n        readBinaryPOD(input, M_);\n        readBinaryPOD(input, mult_);\n        readBinaryPOD(input, ef_construction_);\n\n        readBinaryPOD(input, copy_vector_);\n\n        data_size_ = s->get_data_size();\n        fstdistfunc_ = s->get_dist_func();\n        dist_func_param_ = s->get_dist_func_param();\n\n        auto pos = input.tellg();\n\n        /// Optional - check if index is ok:\n        input.seekg(cur_element_count * size_data_per_element_, input.cur);\n        for (size_t i = 0; i < cur_element_count; i++) {\n            if (input.tellg() < 0 || input.tellg() >= total_filesize) {\n                throw std::runtime_error(\"Index seems to be corrupted or unsupported\");\n            }\n\n            unsigned int linkListSize;\n            readBinaryPOD(input, linkListSize);\n            if (linkListSize != 0) {\n                input.seekg(linkListSize, input.cur);\n            }\n        }\n\n        // throw exception if it either corrupted or old index\n        if (input.tellg() != total_filesize)\n            throw std::runtime_error(\"Index seems to be corrupted or unsupported\");\n\n        input.clear();\n        /// Optional check end\n\n        input.seekg(pos, input.beg);\n\n        data_level0_memory_ = (char *) mi_malloc(max_elements * size_data_per_element_);\n        if (data_level0_memory_ == nullptr)\n            throw std::runtime_error(\"Not enough memory: loadIndex failed to allocate level0\");\n        input.read(data_level0_memory_, cur_element_count * size_data_per_element_);\n\n        if(copy_vector_) {\n          data_vector_memory_ = (char *) mi_malloc(max_elements * data_size_);\n          if (data_vector_memory_ == nullptr)\n              throw std::runtime_error(\"Not enough memory: loadIndex failed to allocate vector memory\");\n          input.read(data_vector_memory_, cur_element_count * data_size_);\n        }\n\n        size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint);\n\n        size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint);\n        std::vector<std::mutex>(max_elements).swap(link_list_locks_);\n        std::vector<std::mutex>(MAX_LABEL_OPERATION_LOCKS).swap(label_op_locks_);\n\n        visited_list_pool_.reset(new VisitedListPool(1, max_elements));\n\n        linkLists_ = (char **) mi_malloc(sizeof(void *) * max_elements);\n        if (linkLists_ == nullptr)\n            throw std::runtime_error(\"Not enough memory: loadIndex failed to allocate linklists\");\n        element_levels_ = std::vector<int>(max_elements);\n        revSize_ = 1.0 / mult_;\n        ef_ = 10;\n        for (size_t i = 0; i < cur_element_count; i++) {\n            label_lookup_[getExternalLabel(i)] = i;\n            unsigned int linkListSize;\n            readBinaryPOD(input, linkListSize);\n            if (linkListSize == 0) {\n                element_levels_[i] = 0;\n                linkLists_[i] = nullptr;\n            } else {\n                element_levels_[i] = linkListSize / size_links_per_element_;\n                linkLists_[i] = (char *) mi_malloc(linkListSize);\n                if (linkLists_[i] == nullptr)\n                    throw std::runtime_error(\"Not enough memory: loadIndex failed to allocate linklist\");\n                input.read(linkLists_[i], linkListSize);\n            }\n        }\n\n        for (size_t i = 0; i < cur_element_count; i++) {\n            if (isMarkedDeleted(i)) {\n                num_deleted_ += 1;\n                if (allow_replace_deleted_) deleted_elements.insert(i);\n            }\n        }\n\n        input.close();\n#endif\n  }\n\n  template <typename data_t> std::vector<data_t> getDataByLabel(labeltype label) const {\n    // lock all operations with element by label\n    std::unique_lock<std::mutex> lock_label(getLabelOpMutex(label));\n\n    std::unique_lock<std::mutex> lock_table(label_lookup_lock);\n    auto search = label_lookup_.find(label);\n    if (search == label_lookup_.end() || isMarkedDeleted(search->second)) {\n      throw std::runtime_error(\"Label not found\");\n    }\n    tableint internalId = search->second;\n    lock_table.unlock();\n\n    char* data_ptrv = getDataByInternalId(internalId);\n    size_t dim = *((size_t*)dist_func_param_);\n    std::vector<data_t> data;\n    data_t* data_ptr = (data_t*)data_ptrv;\n    for (size_t i = 0; i < dim; i++) {\n      data.push_back(*data_ptr);\n      data_ptr += 1;\n    }\n    return data;\n  }\n\n  /*\n   * Marks an element with the given label deleted, does NOT really change the current graph.\n   */\n  HnswErrorStatus markDelete(labeltype label) {\n    // lock all operations with element by label\n    std::unique_lock<std::mutex> lock_label(getLabelOpMutex(label));\n\n    std::unique_lock<std::mutex> lock_table(label_lookup_lock);\n    auto search = label_lookup_.find(label);\n    if (search == label_lookup_.end()) {\n      return HnswErrorStatus::LABEL_NOT_FOUND;\n    }\n    tableint internalId = search->second;\n    lock_table.unlock();\n    if (!markDeletedInternal(internalId)) {\n      return HnswErrorStatus::ELEMENT_ALREADY_DELETED;\n    }\n    return HnswErrorStatus::SUCCESS;\n  }\n\n  /*\n   * Uses the last 16 bits of the memory for the linked list size to store the mark,\n   * whereas maxM0_ has to be limited to the lower 16 bits, however, still large enough in almost\n   * all cases.\n   */\n  bool markDeletedInternal(tableint internalId) {\n    assert(internalId < cur_element_count);\n    if (!isMarkedDeleted(internalId)) {\n      unsigned char* ll_cur = ((unsigned char*)get_linklist0(internalId)) + 2;\n      *ll_cur |= DELETE_MARK;\n      num_deleted_ += 1;\n      if (allow_replace_deleted_) {\n        std::unique_lock<std::mutex> lock_deleted_elements(deleted_elements_lock);\n        deleted_elements.insert(internalId);\n      }\n      return true;\n    } else {\n      return false;\n    }\n  }\n\n  /*\n   * Removes the deleted mark of the node, does NOT really change the current graph.\n   *\n   * Note: the method is not safe to use when replacement of deleted elements is enabled,\n   *  because elements marked as deleted can be completely removed by addPoint\n   */\n  void unmarkDelete(labeltype label) {\n    // lock all operations with element by label\n    std::unique_lock<std::mutex> lock_label(getLabelOpMutex(label));\n\n    std::unique_lock<std::mutex> lock_table(label_lookup_lock);\n    auto search = label_lookup_.find(label);\n    if (search == label_lookup_.end()) {\n      throw std::runtime_error(\"Label not found\");\n    }\n    tableint internalId = search->second;\n    lock_table.unlock();\n\n    unmarkDeletedInternal(internalId);\n  }\n\n  /*\n   * Remove the deleted mark of the node.\n   */\n  void unmarkDeletedInternal(tableint internalId) {\n    assert(internalId < cur_element_count);\n    if (isMarkedDeleted(internalId)) {\n      unsigned char* ll_cur = ((unsigned char*)get_linklist0(internalId)) + 2;\n      *ll_cur &= ~DELETE_MARK;\n      num_deleted_ -= 1;\n      if (allow_replace_deleted_) {\n        std::unique_lock<std::mutex> lock_deleted_elements(deleted_elements_lock);\n        deleted_elements.erase(internalId);\n      }\n    } else {\n      throw std::runtime_error(\"The requested to undelete element is not deleted\");\n    }\n  }\n\n  /*\n   * Checks the first 16 bits of the memory to see if the element is marked deleted.\n   */\n  bool isMarkedDeleted(tableint internalId) const {\n    unsigned char* ll_cur = ((unsigned char*)get_linklist0(internalId)) + 2;\n    return *ll_cur & DELETE_MARK;\n  }\n\n  unsigned short int getListCount(linklistsizeint* ptr) const {\n    return *((unsigned short int*)ptr);\n  }\n\n  void setListCount(linklistsizeint* ptr, unsigned short int size) const {\n    *((unsigned short int*)(ptr)) = *((unsigned short int*)&size);\n  }\n\n  /*\n   * Adds point. Updates the point if it is already in the index.\n   * If replacement of deleted elements is enabled: replaces previously deleted point if any,\n   * updating it with new point\n   */\n  void addPoint(const void* data_point, labeltype label, bool replace_deleted = false) {\n    if ((allow_replace_deleted_ == false) && (replace_deleted == true)) {\n      throw std::runtime_error(\"Replacement of deleted elements is disabled in constructor\");\n    }\n\n    // lock all operations with element by label\n    std::unique_lock<std::mutex> lock_label(getLabelOpMutex(label));\n    if (!replace_deleted) {\n      addPoint(data_point, label, -1);\n      return;\n    }\n    // check if there is vacant place\n    tableint internal_id_replaced;\n    std::unique_lock<std::mutex> lock_deleted_elements(deleted_elements_lock);\n    bool is_vacant_place = !deleted_elements.empty();\n    if (is_vacant_place) {\n      internal_id_replaced = *deleted_elements.begin();\n      deleted_elements.erase(internal_id_replaced);\n    }\n    lock_deleted_elements.unlock();\n\n    // if there is no vacant place then add or update point\n    // else add point to vacant place\n    if (!is_vacant_place) {\n      addPoint(data_point, label, -1);\n    } else {\n      // we assume that there are no concurrent operations on deleted element\n      labeltype label_replaced = getExternalLabel(internal_id_replaced);\n      setExternalLabel(internal_id_replaced, label);\n\n      std::unique_lock<std::mutex> lock_table(label_lookup_lock);\n      label_lookup_.erase(label_replaced);\n      label_lookup_[label] = internal_id_replaced;\n      lock_table.unlock();\n\n      unmarkDeletedInternal(internal_id_replaced);\n      updatePoint(data_point, internal_id_replaced, 1.0);\n    }\n  }\n\n  void updatePoint(const void* dataPointIn, tableint internalId, float updateNeighborProbability) {\n    if (copy_vector_) {\n      memcpy(getDataByInternalId(internalId), dataPointIn, data_size_);\n    } else {\n      memcpy(getDataPtrByInternalId(internalId), &dataPointIn, sizeof(void*));\n    }\n\n    const void* dataPoint = getDataByInternalId(internalId);\n    assert(dataPoint != nullptr);\n\n    int maxLevelCopy = maxlevel_;\n    tableint entryPointCopy = enterpoint_node_;\n    // If point to be updated is entry point and graph just contains single element then just\n    // return.\n    if (entryPointCopy == internalId && cur_element_count == 1)\n      return;\n\n    int elemLevel = element_levels_[internalId];\n    std::uniform_real_distribution<float> distribution(0.0, 1.0);\n    for (int layer = 0; layer <= elemLevel; layer++) {\n      std::unordered_set<tableint> sCand;\n      std::unordered_set<tableint> sNeigh;\n      std::vector<tableint> listOneHop = getConnectionsWithLock(internalId, layer);\n      if (listOneHop.size() == 0)\n        continue;\n\n      sCand.insert(internalId);\n\n      for (auto&& elOneHop : listOneHop) {\n        sCand.insert(elOneHop);\n\n        if (distribution(update_probability_generator_) > updateNeighborProbability)\n          continue;\n\n        sNeigh.insert(elOneHop);\n\n        std::vector<tableint> listTwoHop = getConnectionsWithLock(elOneHop, layer);\n        for (auto&& elTwoHop : listTwoHop) {\n          sCand.insert(elTwoHop);\n        }\n      }\n\n      for (auto&& neigh : sNeigh) {\n        // if (neigh == internalId)\n        //     continue;\n\n        std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                            CompareByFirst>\n            candidates;\n        size_t size = sCand.find(neigh) == sCand.end()\n                          ? sCand.size()\n                          : sCand.size() - 1;  // sCand guaranteed to have size >= 1\n        size_t elementsToKeep = std::min(ef_construction_, size);\n        for (auto&& cand : sCand) {\n          if (cand == neigh)\n            continue;\n\n          dist_t distance =\n              fstdistfunc_(getDataByInternalId(neigh), getDataByInternalId(cand), dist_func_param_);\n          if (candidates.size() < elementsToKeep) {\n            candidates.emplace(distance, cand);\n          } else {\n            if (distance < candidates.top().first) {\n              candidates.pop();\n              candidates.emplace(distance, cand);\n            }\n          }\n        }\n\n        // Retrieve neighbours using heuristic and set connections.\n        getNeighborsByHeuristic2(candidates, layer == 0 ? maxM0_ : maxM_);\n\n        {\n          std::unique_lock<std::mutex> lock(link_list_locks_[neigh]);\n          linklistsizeint* ll_cur;\n          ll_cur = get_linklist_at_level(neigh, layer);\n          size_t candSize = candidates.size();\n          setListCount(ll_cur, candSize);\n          tableint* data = (tableint*)(ll_cur + 1);\n          for (size_t idx = 0; idx < candSize; idx++) {\n            data[idx] = candidates.top().second;\n            candidates.pop();\n          }\n        }\n      }\n    }\n\n    repairConnectionsForUpdate(dataPoint, entryPointCopy, internalId, elemLevel, maxLevelCopy);\n  }\n\n  void repairConnectionsForUpdate(const void* dataPoint, tableint entryPointInternalId,\n                                  tableint dataPointInternalId, int dataPointLevel, int maxLevel) {\n    tableint currObj = entryPointInternalId;\n    if (dataPointLevel < maxLevel) {\n      dist_t curdist = fstdistfunc_(dataPoint, getDataByInternalId(currObj), dist_func_param_);\n      for (int level = maxLevel; level > dataPointLevel; level--) {\n        bool changed = true;\n        while (changed) {\n          changed = false;\n          unsigned int* data;\n          std::unique_lock<std::mutex> lock(link_list_locks_[currObj]);\n          data = get_linklist_at_level(currObj, level);\n          int size = getListCount(data);\n          tableint* datal = (tableint*)(data + 1);\n\n          __builtin_prefetch(getDataByInternalId(*datal), 0, 3);\n\n          for (int i = 0; i < size; i++) {\n            if (i + 1 < size) {\n              __builtin_prefetch(getDataByInternalId(*(datal + i + 1)), 1, 3);\n            }\n\n            tableint cand = datal[i];\n            dist_t d = fstdistfunc_(dataPoint, getDataByInternalId(cand), dist_func_param_);\n            if (d < curdist) {\n              curdist = d;\n              currObj = cand;\n              changed = true;\n            }\n          }\n        }\n      }\n    }\n\n    if (dataPointLevel > maxLevel)\n      throw std::runtime_error(\"Level of item to be updated cannot be bigger than max level\");\n\n    for (int level = dataPointLevel; level >= 0; level--) {\n      std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                          CompareByFirst>\n          topCandidates = searchBaseLayer(currObj, dataPoint, level);\n\n      std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                          CompareByFirst>\n          filteredTopCandidates;\n      while (topCandidates.size() > 0) {\n        if (topCandidates.top().second != dataPointInternalId)\n          filteredTopCandidates.push(topCandidates.top());\n\n        topCandidates.pop();\n      }\n\n      // Since element_levels_ is being used to get `dataPointLevel`, there could be cases where\n      // `topCandidates` could just contains entry point itself. To prevent self loops, the\n      // `topCandidates` is filtered and thus can be empty.\n      if (filteredTopCandidates.size() > 0) {\n        bool epDeleted = isMarkedDeleted(entryPointInternalId);\n        if (epDeleted) {\n          filteredTopCandidates.emplace(\n              fstdistfunc_(dataPoint, getDataByInternalId(entryPointInternalId), dist_func_param_),\n              entryPointInternalId);\n          if (filteredTopCandidates.size() > ef_construction_)\n            filteredTopCandidates.pop();\n        }\n\n        currObj = mutuallyConnectNewElement(dataPoint, dataPointInternalId, filteredTopCandidates,\n                                            level, true);\n      }\n    }\n  }\n\n  std::vector<tableint> getConnectionsWithLock(tableint internalId, int level) {\n    std::unique_lock<std::mutex> lock(link_list_locks_[internalId]);\n    unsigned int* data = get_linklist_at_level(internalId, level);\n    int size = getListCount(data);\n    std::vector<tableint> result(size);\n    tableint* ll = (tableint*)(data + 1);\n    memcpy(result.data(), ll, size * sizeof(tableint));\n    return result;\n  }\n\n  tableint addPoint(const void* data_point_in, labeltype label, int level) {\n    tableint cur_c = 0;\n    {\n      // Checking if the element with the same label already exists\n      // if so, updating it *instead* of creating a new element.\n      std::unique_lock<std::mutex> lock_table(label_lookup_lock);\n      auto search = label_lookup_.find(label);\n      if (search != label_lookup_.end()) {\n        tableint existingInternalId = search->second;\n        if (allow_replace_deleted_) {\n          if (isMarkedDeleted(existingInternalId)) {\n            throw std::runtime_error(\n                \"Can't use addPoint to update deleted elements if replacement of deleted elements \"\n                \"is enabled.\");\n          }\n        }\n        lock_table.unlock();\n\n        if (isMarkedDeleted(existingInternalId)) {\n          unmarkDeletedInternal(existingInternalId);\n        }\n        updatePoint(data_point_in, existingInternalId, 1.0);\n\n        return existingInternalId;\n      }\n\n      if (cur_element_count >= max_elements_) {\n        throw std::runtime_error(\"The number of elements exceeds the specified limit\");\n      }\n\n      cur_c = cur_element_count;\n      cur_element_count++;\n      label_lookup_[label] = cur_c;\n    }\n\n    std::unique_lock<std::mutex> lock_el(link_list_locks_[cur_c]);\n    int curlevel = getRandomLevel(mult_);\n    if (level > 0)\n      curlevel = level;\n\n    element_levels_[cur_c] = curlevel;\n\n    std::unique_lock<std::mutex> templock(global);\n    int maxlevelcopy = maxlevel_;\n    if (curlevel <= maxlevelcopy)\n      templock.unlock();\n    tableint currObj = enterpoint_node_;\n    tableint enterpoint_copy = enterpoint_node_;\n\n    memset(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_, 0,\n           size_data_per_element_);\n\n    if (copy_vector_) {\n      memset(data_vector_memory_ + cur_c * data_size_, 0, data_size_);\n    }\n\n    // Initialisation of the data and label\n    setExternalLabel(cur_c, label);\n\n    if (copy_vector_) {\n      memcpy(getDataByInternalId(cur_c), data_point_in, data_size_);\n    } else {\n      memcpy(getDataPtrByInternalId(cur_c), &data_point_in, sizeof(void*));\n    }\n\n    const void* data_point = getDataByInternalId(cur_c);\n    assert(data_point != nullptr);\n\n    if (curlevel) {\n      linkLists_[cur_c] = (char*)mi_malloc(size_links_per_element_ * curlevel + 1);\n      if (linkLists_[cur_c] == nullptr)\n        throw std::runtime_error(\"Not enough memory: addPoint failed to allocate linklist\");\n      memset(linkLists_[cur_c], 0, size_links_per_element_ * curlevel + 1);\n    }\n\n    if ((signed)currObj != -1) {\n      if (curlevel < maxlevelcopy) {\n        dist_t curdist = fstdistfunc_(data_point, getDataByInternalId(currObj), dist_func_param_);\n        for (int level = maxlevelcopy; level > curlevel; level--) {\n          bool changed = true;\n          while (changed) {\n            changed = false;\n            unsigned int* data;\n            std::unique_lock<std::mutex> lock(link_list_locks_[currObj]);\n            data = get_linklist(currObj, level);\n            int size = getListCount(data);\n\n            tableint* datal = (tableint*)(data + 1);\n            for (int i = 0; i < size; i++) {\n              tableint cand = datal[i];\n              if (cand > max_elements_)\n                throw std::runtime_error(\"cand error\");\n              dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), dist_func_param_);\n              if (d < curdist) {\n                curdist = d;\n                currObj = cand;\n                changed = true;\n              }\n            }\n          }\n        }\n      }\n\n      bool epDeleted = isMarkedDeleted(enterpoint_copy);\n      for (int level = std::min(curlevel, maxlevelcopy); level >= 0; level--) {\n        if (level > maxlevelcopy || level < 0)  // possible?\n          throw std::runtime_error(\"Level error\");\n\n        std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                            CompareByFirst>\n            top_candidates = searchBaseLayer(currObj, data_point, level);\n        if (epDeleted) {\n          top_candidates.emplace(\n              fstdistfunc_(data_point, getDataByInternalId(enterpoint_copy), dist_func_param_),\n              enterpoint_copy);\n          if (top_candidates.size() > ef_construction_)\n            top_candidates.pop();\n        }\n        currObj = mutuallyConnectNewElement(data_point, cur_c, top_candidates, level, false);\n      }\n    } else {\n      // Do nothing for the first element\n      enterpoint_node_ = 0;\n      maxlevel_ = curlevel;\n    }\n\n    // Releasing lock for the maximum level\n    if (curlevel > maxlevelcopy) {\n      enterpoint_node_ = cur_c;\n      maxlevel_ = curlevel;\n    }\n    return cur_c;\n  }\n\n  std::priority_queue<std::pair<dist_t, labeltype>> searchKnn(\n      const void* query_data, size_t k, BaseFilterFunctor* isIdAllowed = nullptr) const {\n    std::priority_queue<std::pair<dist_t, labeltype>> result;\n    if (cur_element_count == 0)\n      return result;\n\n    tableint currObj = enterpoint_node_;\n    dist_t curdist =\n        fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_);\n\n    for (int level = maxlevel_; level > 0; level--) {\n      bool changed = true;\n      while (changed) {\n        changed = false;\n        unsigned int* data;\n\n        data = (unsigned int*)get_linklist(currObj, level);\n        int size = getListCount(data);\n        metric_hops++;\n        metric_distance_computations += size;\n\n        tableint* datal = (tableint*)(data + 1);\n        for (int i = 0; i < size; i++) {\n          tableint cand = datal[i];\n          if (cand > max_elements_)\n            throw std::runtime_error(\"cand error\");\n          dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);\n\n          if (d < curdist) {\n            curdist = d;\n            currObj = cand;\n            changed = true;\n          }\n        }\n      }\n    }\n\n    std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                        CompareByFirst>\n        top_candidates;\n    bool bare_bone_search = !num_deleted_ && !isIdAllowed;\n    if (bare_bone_search) {\n      top_candidates = searchBaseLayerST<true>(currObj, query_data, std::max(ef_, k), isIdAllowed);\n    } else {\n      top_candidates = searchBaseLayerST<false>(currObj, query_data, std::max(ef_, k), isIdAllowed);\n    }\n\n    while (top_candidates.size() > k) {\n      top_candidates.pop();\n    }\n    while (top_candidates.size() > 0) {\n      std::pair<dist_t, tableint> rez = top_candidates.top();\n      result.push(std::pair<dist_t, labeltype>(rez.first, getExternalLabel(rez.second)));\n      top_candidates.pop();\n    }\n    return result;\n  }\n\n  // Brute-force KNN search over a pre-filtered set of label IDs.\n  // Computes distances for all provided IDs and returns the top-k closest, ordered by distance.\n  std::priority_queue<std::pair<dist_t, labeltype>> subsetKnnSearch(\n      const void* query_data, size_t k, const std::vector<labeltype>& ids) const {\n    std::priority_queue<std::pair<dist_t, labeltype>> result;\n\n    if (cur_element_count == 0 || ids.empty() || k == 0)\n      return result;\n\n    for (const auto& label : ids) {\n      auto it = label_lookup_.find(label);\n\n      if (it == label_lookup_.end()) {\n        continue;\n      }\n\n      tableint internal_id = it->second;\n\n      if (isMarkedDeleted(internal_id)) {\n        continue;\n      }\n\n      dist_t dist = fstdistfunc_(query_data, getDataByInternalId(internal_id), dist_func_param_);\n      if (result.size() < k) {\n        result.emplace(dist, label);\n      } else if (dist < result.top().first) {\n        result.pop();\n        result.emplace(dist, label);\n      }\n    }\n\n    return result;\n  }\n\n  std::vector<std::pair<dist_t, labeltype>> searchStopConditionClosest(\n      const void* query_data, BaseSearchStopCondition& stop_condition,\n      BaseFilterFunctor* isIdAllowed = nullptr) const {\n    std::vector<std::pair<dist_t, labeltype>> result;\n    if (cur_element_count == 0)\n      return result;\n\n    tableint currObj = enterpoint_node_;\n    dist_t curdist =\n        fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_);\n\n    for (int level = maxlevel_; level > 0; level--) {\n      bool changed = true;\n      while (changed) {\n        changed = false;\n        unsigned int* data;\n\n        data = (unsigned int*)get_linklist(currObj, level);\n        int size = getListCount(data);\n        metric_hops++;\n        metric_distance_computations += size;\n\n        tableint* datal = (tableint*)(data + 1);\n        for (int i = 0; i < size; i++) {\n          tableint cand = datal[i];\n          if (cand < 0 || cand > max_elements_)\n            throw std::runtime_error(\"cand error\");\n          dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);\n\n          if (d < curdist) {\n            curdist = d;\n            currObj = cand;\n            changed = true;\n          }\n        }\n      }\n    }\n\n    std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                        CompareByFirst>\n        top_candidates;\n    top_candidates = searchBaseLayerST<false>(currObj, query_data, 0, isIdAllowed, &stop_condition);\n\n    size_t sz = top_candidates.size();\n    result.resize(sz);\n    while (!top_candidates.empty()) {\n      result[--sz] = top_candidates.top();\n      top_candidates.pop();\n    }\n\n    stop_condition.filter_results(result);\n\n    return result;\n  }\n\n  // Returns all elements within `radius` distance from query_data.\n  // Adapts the HNSW beam search from Malkov & Yashunin (2018), https://arxiv.org/abs/1603.09320:\n  // Phase 1 is the standard greedy descent to find the level-0 entry point; Phase 2 replaces\n  // the top-k heap with a radius threshold, collecting all nodes with dist <= radius.\n  // The dynamic search boundary starts at max(entry_point_distance, radius) and shrinks as\n  // closer out-of-radius candidates are found; `epsilon` controls the overscan factor\n  // (default 0.01) to improve recall near the boundary.\n  std::vector<std::pair<dist_t, labeltype>> searchRange(const void* query_data, dist_t radius,\n                                                        double epsilon = 0.01) const {\n    std::vector<std::pair<dist_t, labeltype>> result;\n    if (cur_element_count == 0)\n      return result;\n\n    // Phase 1: greedy descent from top level to find the best entry point for level 0.\n    tableint currObj = enterpoint_node_;\n    dist_t curdist =\n        fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_);\n    for (int level = maxlevel_; level > 0; level--) {\n      bool changed = true;\n      while (changed) {\n        changed = false;\n        unsigned int* data = (unsigned int*)get_linklist(currObj, level);\n        int size = getListCount(data);\n        tableint* datal = (tableint*)(data + 1);\n        for (int i = 0; i < size; i++) {\n          tableint cand = datal[i];\n          if (cand >= max_elements_)\n            throw std::runtime_error(\"cand error\");\n          dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_);\n          if (d < curdist) {\n            curdist = d;\n            currObj = cand;\n            changed = true;\n          }\n        }\n      }\n    }\n\n    // Phase 2: range search on bottom layer (level 0) with dynamic search boundary.\n    VisitedList* vl = visited_list_pool_->getFreeVisitedList();\n    vl_type* visited_array = vl->mass;\n    vl_type visited_array_tag = vl->curV;\n\n    std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>,\n                        CompareByFirst>\n        candidate_set;\n\n    // Dynamic range starts at max(entry_point_dist, radius) so we never stop early just\n    // because the entry point is farther than radius.\n    dist_t ep_dist = curdist;\n    dist_t dynamic_range = std::max(ep_dist, radius);\n    dist_t dyn_boundary = static_cast<dist_t>(dynamic_range * (1.0 + epsilon));\n\n    if (!isMarkedDeleted(currObj) && ep_dist <= radius)\n      result.emplace_back(ep_dist, getExternalLabel(currObj));\n\n    candidate_set.emplace(-ep_dist, currObj);\n    visited_array[currObj] = visited_array_tag;\n\n    while (!candidate_set.empty()) {\n      auto curr_pair = candidate_set.top();\n      dist_t curr_dist = -curr_pair.first;\n\n      if (curr_dist > dyn_boundary)\n        break;\n\n      candidate_set.pop();\n      tableint curr_id = curr_pair.second;\n\n      // Shrink dynamic_range: if candidate is between radius and current range, pull the\n      // boundary down toward radius. If candidate is within radius and dynamic_range is\n      // still above radius (entry point was far), clamp to radius so we stop over-scanning.\n      if (curr_dist < dynamic_range) {\n        if (curr_dist >= radius) {\n          dynamic_range = curr_dist;\n        } else if (dynamic_range > radius) {\n          dynamic_range = radius;\n        }\n        dyn_boundary = static_cast<dist_t>(dynamic_range * (1.0 + epsilon));\n      }\n\n      int* data = (int*)get_linklist0(curr_id);\n      size_t size = getListCount((linklistsizeint*)data);\n\n      for (size_t j = 1; j <= size; j++) {\n        tableint candidate_id = *(data + j);\n        if (candidate_id >= max_elements_)\n          throw std::runtime_error(\"cand error\");\n\n        if (j < size)\n          __builtin_prefetch(getDataByInternalId(*(data + j + 1)), 0, 3);\n\n        if (visited_array[candidate_id] == visited_array_tag)\n          continue;\n        visited_array[candidate_id] = visited_array_tag;\n\n        dist_t d = fstdistfunc_(query_data, getDataByInternalId(candidate_id), dist_func_param_);\n        if (d < dyn_boundary) {\n          candidate_set.emplace(-d, candidate_id);\n          if (!isMarkedDeleted(candidate_id) && d <= radius)\n            result.emplace_back(d, getExternalLabel(candidate_id));\n        }\n      }\n    }\n\n    visited_list_pool_->releaseVisitedList(vl);\n    return result;\n  }\n\n#if 0\n    void checkIntegrity() {\n        int connections_checked = 0;\n        std::vector <int > inbound_connections_num(cur_element_count, 0);\n        for (int i = 0; i < cur_element_count; i++) {\n            for (int l = 0; l <= element_levels_[i]; l++) {\n                linklistsizeint *ll_cur = get_linklist_at_level(i, l);\n                int size = getListCount(ll_cur);\n                tableint *data = (tableint *) (ll_cur + 1);\n                std::unordered_set<tableint> s;\n                for (int j = 0; j < size; j++) {\n                    assert(data[j] < cur_element_count);\n                    assert(data[j] != i);\n                    inbound_connections_num[data[j]]++;\n                    s.insert(data[j]);\n                    connections_checked++;\n                }\n                assert(s.size() == size);\n            }\n        }\n        if (cur_element_count > 1) {\n            int min1 = inbound_connections_num[0], max1 = inbound_connections_num[0];\n            for (int i=0; i < cur_element_count; i++) {\n                assert(inbound_connections_num[i] > 0);\n                min1 = std::min(inbound_connections_num[i], min1);\n                max1 = std::max(inbound_connections_num[i], max1);\n            }\n            std::cout << \"Min inbound: \" << min1 << \", Max inbound:\" << max1 << \"\\n\";\n        }\n        std::cout << \"integrity ok, checked \" << connections_checked << \" connections\\n\";\n    }\n#endif\n};\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/hnsw_index.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/hnsw_index.h\"\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/strings/match.h>\n#include <hnswlib/hnswlib.h>\n#include <hnswlib/space_ip.h>\n#include <hnswlib/space_l2.h>\n\n#include \"base/logging.h\"\n#include \"core/search/hnsw_alg.h\"\n#include \"core/search/mrmw_mutex.h\"\n#include \"core/search/vector_utils.h\"\n\nnamespace dfly::search {\n\nusing namespace std;\n\nnamespace {\n\nclass HnswSpace : public hnswlib::SpaceInterface<float> {\n  unsigned dim_;\n  VectorSimilarity sim_;\n\n  static float L2DistanceStatic(const void* pVect1, const void* pVect2, const void* param) {\n    return L2Distance(static_cast<const float*>(pVect1), static_cast<const float*>(pVect2),\n                      *static_cast<const unsigned*>(param));\n  }\n\n  static float IPDistanceStatic(const void* pVect1, const void* pVect2, const void* param) {\n    return IPDistance(static_cast<const float*>(pVect1), static_cast<const float*>(pVect2),\n                      *static_cast<const unsigned*>(param));\n  }\n\n  static float CosineDistanceStatic(const void* pVect1, const void* pVect2, const void* param) {\n    return CosineDistance(static_cast<const float*>(pVect1), static_cast<const float*>(pVect2),\n                          *static_cast<const unsigned*>(param));\n  }\n\n public:\n  explicit HnswSpace(size_t dim, VectorSimilarity sim) : dim_(dim), sim_(sim) {\n  }\n\n  size_t get_data_size() {\n    return dim_ * sizeof(float);\n  }\n\n  hnswlib::DISTFUNC<float> get_dist_func() {\n    if (sim_ == VectorSimilarity::L2) {\n      return L2DistanceStatic;\n    } else if (sim_ == VectorSimilarity::COSINE) {\n      return CosineDistanceStatic;\n    } else {\n      return IPDistanceStatic;\n    }\n  }\n\n  void* get_dist_func_param() {\n    return &dim_;\n  }\n};\n}  // namespace\n\n// TODO: to replace it and use HierarchicalNSW directly.\nstruct HnswlibAdapter {\n  // Default setting of hnswlib/hnswalg\n  constexpr static size_t kDefaultEfRuntime = 10;\n\n  explicit HnswlibAdapter(const SchemaField::VectorParams& params, bool copy_vector)\n      : space_{params.dim, params.sim},\n        world_{&space_,       params.capacity, params.hnsw_m, params.hnsw_ef_construction,\n               100 /* seed*/, copy_vector},\n        copy_vector_{copy_vector},\n        data_size_{params.dim * sizeof(float)} {\n  }\n\n  // Adds a point to the index. If the write lock cannot be acquired (e.g.\n  // serialization holds a read lock), the operation is deferred and will be\n  // replayed by a subsequent write or TryProcessDeferred() call.\n  // When copy_vector_ is false the index stores a raw pointer to external data,\n  // so we must add the point synchronously before the caller's pointer goes out\n  // of scope — use a blocking write lock in that case.\n  void Add(const void* data, GlobalDocId id) {\n    if (copy_vector_) {\n      {\n        MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kWriteLock, std::try_to_lock);\n        if (lock.locked()) {\n          ProcessDeferred();\n          DoAdd(data, id);\n          return;\n        }\n      }\n      // Could not acquire write lock — defer the operation.\n      AddDeferredOp(id, DeferredOp(true, data, data_size_, /*copy=*/true));\n      TryProcessDeferred();\n    } else {\n      MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kWriteLock);\n      ProcessDeferred();\n      DoAdd(data, id);\n    }\n  }\n\n  // Removes a point from the index. If the write lock cannot be acquired, the\n  // operation is deferred.\n  void Remove(GlobalDocId id) {\n    {\n      MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kWriteLock, std::try_to_lock);\n      if (lock.locked()) {\n        ProcessDeferred();\n        DoRemove(id);\n        return;\n      }\n    }\n    AddDeferredOp(id, DeferredOp(false, nullptr, 0, false));\n    TryProcessDeferred();\n  }\n\n  vector<pair<float, GlobalDocId>> Knn(float* target, size_t k, std::optional<size_t> ef) {\n    TryProcessDeferred();\n    world_.setEf(ef.value_or(kDefaultEfRuntime));\n    MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kReadLock);\n    return QueueToVec(world_.searchKnn(target, k));\n  }\n\n  vector<pair<float, GlobalDocId>> Knn(float* target, size_t k, std::optional<size_t> ef,\n                                       const vector<GlobalDocId>& allowed) {\n    struct BinsearchFilter : hnswlib::BaseFilterFunctor {\n      virtual bool operator()(hnswlib::labeltype id) {\n        return binary_search(allowed->begin(), allowed->end(), id);\n      }\n\n      BinsearchFilter(const vector<GlobalDocId>* allowed) : allowed{allowed} {\n      }\n      const vector<GlobalDocId>* allowed;\n    };\n\n    TryProcessDeferred();\n    world_.setEf(ef.value_or(kDefaultEfRuntime));\n    BinsearchFilter filter{&allowed};\n    MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kReadLock);\n    return QueueToVec(world_.searchKnn(target, k, &filter));\n  }\n\n  // Brute-force KNN search over a specific subset of documents.\n  // Computes distances for all provided document IDs and returns the k nearest neighbors.\n  vector<pair<float, GlobalDocId>> SubsetKnn(float* target, size_t k,\n                                             const vector<GlobalDocId>& docs) {\n    MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kReadLock);\n    return QueueToVec(world_.subsetKnnSearch(target, k, docs));\n  }\n\n  // Returns all documents within the given radius, with their distances.\n  // Uses dynamic-range exploration (searchRange) to correctly handle cases where\n  // the entry point is farther than radius.\n  vector<pair<float, GlobalDocId>> RangeSearch(float* target, float radius) {\n    TryProcessDeferred();\n    MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kReadLock);\n    return world_.searchRange(target, radius);\n  }\n\n  HnswIndexMetadata GetMetadata() const {\n    MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kReadLock);\n    HnswIndexMetadata metadata;\n    metadata.max_elements = world_.max_elements_;\n    metadata.cur_element_count = world_.cur_element_count.load();\n    metadata.maxlevel = world_.maxlevel_;\n    metadata.enterpoint_node = world_.enterpoint_node_;\n    return metadata;\n  }\n\n  void SetMetadata(const HnswIndexMetadata& metadata) {\n    MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kWriteLock);\n    absl::WriterMutexLock resize_lock(&resize_mutex_);\n\n    // SetMetadata is only called during deserialization before the index is used.\n    // Assert the index is empty to ensure no concurrent operations are possible.\n    DCHECK_EQ(world_.cur_element_count.load(), 0u)\n        << \"SetMetadata should only be called on an empty index during deserialization\";\n\n    // Runtime check for release builds to prevent silent corruption\n    if (world_.cur_element_count.load() != 0) {\n      LOG(ERROR) << \"SetMetadata called on non-empty HNSW index with \"\n                 << world_.cur_element_count.load() << \" elements, ignoring\";\n      return;\n    }\n\n    // Pre-allocate capacity based on expected element count, but don't set cur_element_count.\n    // cur_element_count will be set by RestoreFromNodes when the actual nodes are restored.\n    if (world_.max_elements_ < metadata.cur_element_count) {\n      world_.resizeIndex(metadata.cur_element_count);\n    }\n    // Note: Don't set cur_element_count here - RestoreFromNodes will set it after restoring nodes.\n  }\n\n  size_t GetNodeCount() const {\n    MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kReadLock);\n    return world_.cur_element_count.load();\n  }\n\n  std::vector<HnswNodeData> GetNodesRange(size_t start, size_t end) const {\n    DCHECK(mrmw_mutex_.IsReadLocked());\n    size_t count = world_.cur_element_count.load();\n    end = std::min(end, count);\n    start = std::min(start, end);\n\n    std::vector<HnswNodeData> result;\n    result.reserve(end - start);\n\n    for (size_t internal_id = start; internal_id < end; ++internal_id) {\n      HnswNodeData node_data;\n      node_data.internal_id = internal_id;\n      node_data.global_id = world_.getExternalLabel(internal_id);\n      node_data.level = world_.element_levels_[internal_id];\n\n      node_data.levels_links.resize(node_data.level + 1);\n\n      auto* ll0 = world_.get_linklist0(internal_id);\n      unsigned short link_count0 = world_.getListCount(ll0);\n      auto* links0 = reinterpret_cast<uint32_t*>(ll0 + 1);\n      node_data.levels_links[0].assign(links0, links0 + link_count0);\n\n      for (int lvl = 1; lvl <= node_data.level; ++lvl) {\n        auto* ll = world_.get_linklist(internal_id, lvl);\n        unsigned short link_count = world_.getListCount(ll);\n        auto* links = reinterpret_cast<uint32_t*>(ll + 1);\n        node_data.levels_links[lvl].assign(links, links + link_count);\n      }\n\n      result.push_back(std::move(node_data));\n    }\n    return result;\n  }\n\n private:\n  // A single deferred Add or Remove operation.\n  struct DeferredOp {\n    bool is_add;\n    bool owns_data;        // If true, data_ptr was allocated by us and must be freed.\n    const void* data_ptr;  // Pointer to vector data (owned or borrowed).\n\n    DeferredOp(bool is_add, const void* data, size_t data_size, bool copy)\n        : is_add(is_add), owns_data(copy && data != nullptr) {\n      if (owns_data) {\n        void* buf = mi_malloc(data_size);\n        memcpy(buf, data, data_size);\n        data_ptr = buf;\n      } else {\n        data_ptr = data;\n      }\n    }\n\n    ~DeferredOp() {\n      if (owns_data)\n        mi_free(const_cast<void*>(data_ptr));\n    }\n\n    DeferredOp(DeferredOp&& o) noexcept\n        : is_add(o.is_add), owns_data(o.owns_data), data_ptr(o.data_ptr) {\n      o.owns_data = false;\n      o.data_ptr = nullptr;\n    }\n\n    DeferredOp& operator=(DeferredOp&& o) noexcept {\n      auto lhs = std::tie(is_add, owns_data, data_ptr);\n      auto rhs = std::tie(o.is_add, o.owns_data, o.data_ptr);\n      std::swap(lhs, rhs);\n      return *this;\n    }\n\n    DeferredOp(const DeferredOp&) = delete;\n    DeferredOp& operator=(const DeferredOp&) = delete;\n  };\n\n  // Actually add the point. Must be called while holding mrmw write lock.\n  void DoAdd(const void* data, GlobalDocId id) {\n    while (true) {\n      try {\n        absl::ReaderMutexLock resize_lock(&resize_mutex_);\n        world_.addPoint(data, id);\n        return;\n      } catch (const std::exception& e) {\n        std::string error_msg = e.what();\n        if (absl::StrContains(error_msg, \"The number of elements exceeds the specified limit\")) {\n          ResizeIfFull();\n          continue;\n        }\n        LOG(ERROR) << \"HnswlibAdapter::DoAdd exception: \" << e.what();\n        return;\n      }\n    }\n  }\n\n  void DoRemove(GlobalDocId id) {\n    HnswErrorStatus status = world_.markDelete(id);\n    if (status != HnswErrorStatus::SUCCESS) {\n      VLOG(1) << \"HnswlibAdapter::Remove failed with status: \" << static_cast<int>(status)\n              << \" for global id: \" << id;\n    }\n  }\n\n  // Add a deferred operation, replacing any previous one for the same document.\n  void AddDeferredOp(GlobalDocId id, DeferredOp op) {\n    std::lock_guard g(deferred_mu_);\n    deferred_ops_.insert_or_assign(id, std::move(op));\n  }\n\n  // Take all deferred operations out of the queue.\n  absl::flat_hash_map<GlobalDocId, DeferredOp> TakeDeferredOps() {\n    std::lock_guard g(deferred_mu_);\n    absl::flat_hash_map<GlobalDocId, DeferredOp> ops;\n    ops.swap(deferred_ops_);\n    return ops;\n  }\n\n  // Drain the deferred operations queue. Must be called while holding the mrmw\n  // write lock.  Only copy_vector_=true adds and removes can be deferred, so\n  // ordering within the queue does not matter.\n  void ProcessDeferred() {\n    auto ops = TakeDeferredOps();\n    for (auto& [id, op] : ops) {\n      if (op.is_add) {\n        DoAdd(op.data_ptr, id);\n      } else {\n        DoRemove(id);\n      }\n    }\n  }\n\n  // Non-blocking attempt to drain the deferred queue.\n  void TryProcessDeferred() {\n    MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kWriteLock, std::try_to_lock);\n    if (lock.locked()) {\n      ProcessDeferred();\n    }\n  }\n\n  // Function requires that we hold mutex while resizing index. resizeIndex is not thread safe with\n  // insertion (https://github.com/nmslib/hnswlib/issues/267)\n  void ResizeIfFull() {\n    {\n      // First check with reader lock to avoid contention.\n      absl::ReaderMutexLock lock(&resize_mutex_);\n      if (world_.getCurrentElementCount() < world_.getMaxElements() ||\n          (world_.allow_replace_deleted_ && world_.getDeletedCount() > 0)) {\n        return;\n      }\n    }\n    try {\n      // Upgrade to writer lock.\n      absl::WriterMutexLock lock(&resize_mutex_);\n      if (world_.getCurrentElementCount() == world_.getMaxElements() &&\n          (!world_.allow_replace_deleted_ || world_.getDeletedCount() == 0)) {\n        auto max_elements = world_.getMaxElements();\n        world_.resizeIndex(max_elements * 2);\n        VLOG(1) << \"Resizing HNSW Index from \" << max_elements << \" to \" << max_elements * 2;\n      }\n    } catch (const std::exception& e) {\n      LOG(FATAL) << \"HnswlibAdapter::ResizeIfFull exception: \" << e.what();\n    }\n  }\n\n  template <typename Q> static vector<pair<float, GlobalDocId>> QueueToVec(Q queue) {\n    vector<pair<float, GlobalDocId>> out(queue.size());\n    size_t idx = out.size();\n    while (!queue.empty()) {\n      out[--idx] = queue.top();\n      queue.pop();\n    }\n    return out;\n  }\n\n public:\n  // Restore HNSW graph structure from serialized nodes with metadata\n  void RestoreFromNodes(const std::vector<HnswNodeData>& nodes, const HnswIndexMetadata& metadata) {\n    MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kWriteLock);\n    absl::WriterMutexLock resize_lock(&resize_mutex_);\n\n    if (nodes.empty()) {\n      return;\n    }\n\n    // RestoreFromNodes is only called during deserialization on a freshly created index.\n    // Assert the index is empty to prevent memory leaks from double-allocation of linkLists_.\n    DCHECK_EQ(world_.cur_element_count.load(), 0u)\n        << \"RestoreFromNodes should only be called on an empty index during deserialization\";\n\n    // Ensure we have enough capacity.\n    // Metadata may have been captured before the snapshot read-lock, so\n    // cur_element_count can be smaller than actual node internal_ids when\n    // concurrent writes happen.  Compute the real requirement from nodes.\n    size_t max_internal_id = 0;\n    for (const auto& node : nodes) {\n      max_internal_id = std::max<size_t>(max_internal_id, node.internal_id);\n    }\n    size_t required_capacity = std::max(metadata.cur_element_count, max_internal_id + 1);\n    if (world_.max_elements_ < required_capacity) {\n      world_.resizeIndex(required_capacity);\n    }\n\n    // Restore each node - directly set up memory and fields\n    size_t restored_count = 0;\n\n    for (const auto& node : nodes) {\n      size_t internal_id = node.internal_id;\n\n      // Validate internal_id is within bounds - invalid internal_id indicates corrupted data\n      CHECK(internal_id < world_.max_elements_);\n\n      // Register label in lookup table\n      world_.label_lookup_[node.global_id] = internal_id;\n\n      // Set the level\n      world_.element_levels_[internal_id] = node.level;\n\n      // Clear level 0 memory and set label.\n      // Memory layout: each element occupies size_data_per_element_ bytes starting at\n      // data_level0_memory_ + internal_id * size_data_per_element_.\n      // offsetLevel0_ is always 0, so we clear exactly one element's worth of data.\n      // This matches the pattern in hnswlib's addPoint().\n      memset(world_.data_level0_memory_ + internal_id * world_.size_data_per_element_, 0,\n             world_.size_data_per_element_);\n      world_.setExternalLabel(internal_id, node.global_id);\n\n      // In copy mode, zero the vector memory so distance computations don't use\n      // uninitialized data for nodes that are marked deleted.\n      if (world_.copy_vector_) {\n        char* data_ptr = world_.data_vector_memory_ + internal_id * world_.data_size_;\n        memset(data_ptr, 0, world_.data_size_);\n      }\n\n      // Allocate upper layer links if needed\n      if (node.level > 0) {\n        world_.linkLists_[internal_id] =\n            (char*)mi_malloc(world_.size_links_per_element_ * node.level + 1);\n        memset(world_.linkLists_[internal_id], 0, world_.size_links_per_element_ * node.level + 1);\n      }\n\n      // Restore links for layer 0\n      if (!node.levels_links.empty()) {\n        auto* ll0 = world_.get_linklist0(internal_id);\n        world_.setListCount(ll0, node.levels_links[0].size());\n        auto* links0 = reinterpret_cast<uint32_t*>(ll0 + 1);\n        std::copy(node.levels_links[0].begin(), node.levels_links[0].end(), links0);\n      }\n\n      // Restore links for upper layers\n      for (int lvl = 1; lvl <= node.level && lvl < static_cast<int>(node.levels_links.size());\n           ++lvl) {\n        auto* ll = world_.get_linklist(internal_id, lvl);\n        world_.setListCount(ll, node.levels_links[lvl].size());\n        auto* links = reinterpret_cast<uint32_t*>(ll + 1);\n        std::copy(node.levels_links[lvl].begin(), node.levels_links[lvl].end(), links);\n      }\n\n      // Track restored count so markDeletedInternal can validate internal_id bounds.\n      world_.cur_element_count.store(++restored_count);\n\n      // Mark node as deleted until UpdateVectorData provides valid vector data.\n      // This prevents crashes from dereferencing uninitialised data pointers\n      // (especially in borrowed-vector mode).\n      world_.markDeletedInternal(internal_id);\n    }\n\n    // Set the metadata for the graph\n    world_.maxlevel_ = metadata.maxlevel;\n    world_.enterpoint_node_ = metadata.enterpoint_node;\n\n    VLOG(1) << \"Restored HNSW index with \" << restored_count\n            << \" nodes, maxlevel=\" << metadata.maxlevel\n            << \", enterpoint=\" << metadata.enterpoint_node;\n  }\n\n  // Update vector data for an existing node (used after RestoreFromNodes).\n  // Returns false if the node doesn't exist in the index.\n  bool UpdateVectorData(GlobalDocId id, const void* data) {\n    TryProcessDeferred();\n    MRMWMutexLock lock(&mrmw_mutex_, MRMWMutex::LockMode::kWriteLock);\n\n    // Find the internal id for this label\n    auto it = world_.label_lookup_.find(id);\n    if (it == world_.label_lookup_.end()) {\n      VLOG(1) << \"UpdateVectorData: label \" << id << \" not found in index\";\n      return false;\n    }\n\n    size_t internal_id = it->second;\n\n    // Copy/store the vector data based on copy_vector_ mode\n    if (world_.copy_vector_) {\n      // Owned mode: copy data into world's vector memory\n      char* data_ptr = world_.data_vector_memory_ + internal_id * world_.data_size_;\n      memcpy(data_ptr, data, world_.data_size_);\n    } else {\n      // Borrowed mode: store pointer to external data\n      char* ptr_location = world_.getDataPtrByInternalId(internal_id);\n      memcpy(ptr_location, &data, sizeof(void*));\n    }\n\n    // Unmark deleted so the node participates in KNN searches now that it\n    // has valid vector data. During RestoreFromNodes all nodes are marked\n    // deleted by default to prevent dereferencing uninitialised data.\n    if (world_.isMarkedDeleted(internal_id)) {\n      world_.unmarkDeletedInternal(internal_id);\n    }\n    return true;\n  }\n\n  std::unique_ptr<MRMWMutexLock> GetReadLock() const {\n    return std::make_unique<MRMWMutexLock>(&mrmw_mutex_, MRMWMutex::LockMode::kReadLock);\n  }\n\n private:\n  HnswSpace space_;\n  HierarchicalNSW<float> world_;\n  absl::Mutex resize_mutex_;\n  mutable MRMWMutex mrmw_mutex_;\n\n  bool copy_vector_;                    // Whether vectors are copied into hnswlib.\n  size_t data_size_;                    // Byte size of a single vector.\n  mutable base::SpinLock deferred_mu_;  // Protects deferred_ops_.\n  absl::flat_hash_map<GlobalDocId, DeferredOp> deferred_ops_;  // GUARDED_BY(deferred_mu_)\n};\n\nHnswVectorIndex::HnswVectorIndex(const SchemaField::VectorParams& params, bool copy_vector,\n                                 PMR_NS::memory_resource*)\n    : copy_vector_(copy_vector),\n      dim_{params.dim},\n      adapter_{make_unique<HnswlibAdapter>(params, copy_vector)} {\n  DCHECK(params.use_hnsw);\n  // TODO: Patch hnsw to use MR\n}\n\nHnswVectorIndex::~HnswVectorIndex() {\n}\n\nbool HnswVectorIndex::Add(GlobalDocId id, const DocumentAccessor& doc, std::string_view field) {\n  auto vector_ptr = doc.GetVector(field, dim_);\n\n  if (!vector_ptr) {\n    return false;\n  }\n\n  const void* data = nullptr;\n  if (std::holds_alternative<OwnedFtVector>(*vector_ptr)) {\n    data = std::get<OwnedFtVector>(*vector_ptr).first.get();\n  } else {\n    data = std::get<BorrowedFtVector>(*vector_ptr);\n  }\n\n  if (!data) {\n    return false;\n  }\n\n  adapter_->Add(data, id);\n  return true;\n}\n\nstd::vector<std::pair<float, GlobalDocId>> HnswVectorIndex::Knn(float* target, size_t k,\n                                                                std::optional<size_t> ef) const {\n  return adapter_->Knn(target, k, ef);\n}\n\nstd::vector<std::pair<float, GlobalDocId>> HnswVectorIndex::Knn(\n    float* target, size_t k, std::optional<size_t> ef,\n    const std::vector<GlobalDocId>& allowed) const {\n  return adapter_->Knn(target, k, ef, allowed);\n}\n\nstd::vector<std::pair<float, GlobalDocId>> HnswVectorIndex::SubsetKnn(\n    float* target, size_t k, const std::vector<GlobalDocId>& docs) const {\n  return adapter_->SubsetKnn(target, k, docs);\n}\n\nstd::vector<std::pair<float, GlobalDocId>> HnswVectorIndex::RangeQuery(float* target,\n                                                                       float radius) const {\n  return adapter_->RangeSearch(target, radius);\n}\n\nvoid HnswVectorIndex::Remove(GlobalDocId id, const DocumentAccessor& doc, string_view field) {\n  adapter_->Remove(id);\n}\n\nvoid HnswVectorIndex::Remove(GlobalDocId id) {\n  adapter_->Remove(id);\n}\n\nHnswIndexMetadata HnswVectorIndex::GetMetadata() const {\n  return adapter_->GetMetadata();\n}\n\nvoid HnswVectorIndex::SetMetadata(const HnswIndexMetadata& metadata) {\n  adapter_->SetMetadata(metadata);\n}\n\nsize_t HnswVectorIndex::GetNodeCount() const {\n  return adapter_->GetNodeCount();\n}\n\nstd::vector<HnswNodeData> HnswVectorIndex::GetNodesRange(size_t start, size_t end) const {\n  return adapter_->GetNodesRange(start, end);\n}\n\nvoid HnswVectorIndex::RestoreFromNodes(const std::vector<HnswNodeData>& nodes,\n                                       const HnswIndexMetadata& metadata) {\n  adapter_->RestoreFromNodes(nodes, metadata);\n}\n\nbool HnswVectorIndex::UpdateVectorData(GlobalDocId id, const DocumentAccessor& doc,\n                                       std::string_view field) {\n  auto vector_ptr = doc.GetVector(field, dim_);\n  if (!vector_ptr ||\n      *vector_ptr == search::DocumentAccessor::VectorInfo(search::BorrowedFtVector(nullptr))) {\n    // Document doesn't have the vector field - mark node as deleted to prevent\n    // \"ghost\" nodes with invalid vector data from participating in searches\n    LOG(WARNING) << \"UpdateVectorData: document \" << id\n                 << \" missing vector field, marking node as deleted in HNSW index\";\n    adapter_->Remove(id);\n    return false;\n  }\n\n  const void* data = nullptr;\n  if (std::holds_alternative<OwnedFtVector>(*vector_ptr)) {\n    data = std::get<OwnedFtVector>(*vector_ptr).first.get();\n  } else {\n    data = std::get<BorrowedFtVector>(*vector_ptr);\n  }\n\n  return adapter_->UpdateVectorData(id, data);\n}\n\nstd::unique_ptr<MRMWMutexLock> HnswVectorIndex::GetReadLock() const {\n  return adapter_->GetReadLock();\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/hnsw_index.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n\n#include \"core/search/mrmw_mutex.h\"\n#include \"core/search/search.h\"\n\nnamespace dfly::search {\n\n// Metadata structure for HNSW index serialization\n// Contains the key parameters needed to restore the index state\nstruct HnswIndexMetadata {\n  size_t max_elements = 0;  // Maximum number of elements the index can hold\n  // Note: cur_element_count may be smaller than actual node count during concurrent writes,\n  // so we compute the real requirement from nodes during restoration.\n  // TODO: consider removing it from metadata and rely entirely on node data for restoration.\n  size_t cur_element_count = 0;  // Current number of elements in the index\n  int maxlevel = -1;             // Maximum level of the graph\n  size_t enterpoint_node = 0;    // Entry point node for the graph\n};\n\n// Node data structure for HNSW serialization\nstruct HnswNodeData {\n  uint32_t internal_id;\n  GlobalDocId global_id;\n  int level;\n  std::vector<std::vector<uint32_t>> levels_links;  // Links for each level (0 to level)\n\n  // Returns the total serialized size in bytes.\n  // Format: internal_id(4) + global_id(8) + level(4)\n  //         + for each level: links_num(4) + links(4 each)\n  size_t TotalSize() const {\n    size_t size = 4 + 8 + 4;  // internal_id + global_id + level\n    for (const auto& links : levels_links) {\n      size += 4 + links.size() * 4;  // links_num + links\n    }\n    return size;\n  }\n};\n\nstruct HnswlibAdapter;\nclass HnswVectorIndex {\n public:\n  explicit HnswVectorIndex(const search::SchemaField::VectorParams& params, bool copy_vector,\n                           PMR_NS::memory_resource* mr = PMR_NS::get_default_resource());\n\n  ~HnswVectorIndex();\n\n  bool Add(search::GlobalDocId id, const search::DocumentAccessor& doc, std::string_view field);\n  void Remove(search::GlobalDocId id, const search::DocumentAccessor& doc, std::string_view field);\n  void Remove(search::GlobalDocId id);\n\n  bool IsVectorCopied() const {\n    return copy_vector_;\n  }\n\n  std::vector<std::pair<float, GlobalDocId>> Knn(float* target, size_t k,\n                                                 std::optional<size_t> ef) const;\n  std::vector<std::pair<float, GlobalDocId>> Knn(float* target, size_t k, std::optional<size_t> ef,\n                                                 const std::vector<GlobalDocId>& allowed) const;\n  std::vector<std::pair<float, GlobalDocId>> SubsetKnn(float* target, size_t k,\n                                                       const std::vector<GlobalDocId>& docs) const;\n\n  // Returns all documents within radius, with their distances.\n  std::vector<std::pair<float, GlobalDocId>> RangeQuery(float* target, float radius) const;\n\n  size_t GetDim() const {\n    return dim_;\n  }\n\n  // Get metadata for serialization\n  HnswIndexMetadata GetMetadata() const;\n\n  // Set metadata (used during restoration)\n  void SetMetadata(const HnswIndexMetadata& metadata);\n\n  // Get total number of nodes in the index\n  size_t GetNodeCount() const;\n\n  // Get nodes in the specified range [start, end)\n  // Returns vector of node data for serialization\n  std::vector<HnswNodeData> GetNodesRange(size_t start, size_t end) const;\n\n  // Restore graph structure from serialized nodes with metadata\n  // This restores the HNSW graph links but NOT the vector data\n  // Vector data must be populated separately via UpdateVectorData\n  void RestoreFromNodes(const std::vector<HnswNodeData>& nodes, const HnswIndexMetadata& metadata);\n\n  // Update vector data for an existing node (used after RestoreFromNodes)\n  // This populates the vector data for a node that already has graph links\n  bool UpdateVectorData(GlobalDocId id, const DocumentAccessor& doc, std::string_view field);\n\n  // Acquire a read lock on the internal MRMW mutex.\n  // Use this during serialization to block concurrent Add/Remove (write) operations.\n  std::unique_ptr<MRMWMutexLock> GetReadLock() const;\n\n private:\n  bool copy_vector_;\n  size_t dim_;\n  std::unique_ptr<HnswlibAdapter> adapter_;\n};\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/index_result.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <variant>\n#include <vector>\n\n#include \"core/search/ast_expr.h\"\n#include \"core/search/block_list.h\"\n#include \"core/search/range_tree.h\"\n\nnamespace dfly::search {\n\n// Represents an either owned or non-owned result set that can be accessed and merged transparently.\nclass IndexResult {\n private:\n  using DocVec = std::vector<DocId>;\n  using Variant =\n      std::variant<DocVec /*owned*/, const DocVec*, const BlockList<CompressedSortedSet>*,\n                   const BlockList<SortedVector<DocId>>*, RangeResult>;\n\n  template <typename... Ts> using VariantOfConstPtrs = std::variant<const Ts*...>;\n  using BorrowedView =\n      VariantOfConstPtrs<DocVec, BlockList<CompressedSortedSet>, BlockList<SortedVector<DocId>>,\n                         SingleBlockRangeResult, TwoBlocksRangeResult>;\n\n public:\n  IndexResult() = default;\n\n  explicit IndexResult(Variant value);\n\n  template <typename Container> explicit IndexResult(const Container* container = nullptr);\n\n  /* It will return approximate size of the result set.\n     Actual result can be smaller than the size returned by this method. */\n  size_t ApproximateSize() const;\n\n  BorrowedView Borrowed() const;\n\n  // Move out of owned or copy borrowed. Take up to `limit` entries and return original size.\n  std::pair<DocVec, size_t /* full size */> Take(size_t limit = std::numeric_limits<size_t>::max());\n\n private:\n  bool IsOwned() const;\n\n  Variant value_;\n};\n\nstd::vector<DocId> MergeIndexResults(const IndexResult& left, const IndexResult& right,\n                                     AstLogicalNode::LogicOp op);\n\n// Implementation\n/******************************************************************/\ninline IndexResult::IndexResult(Variant value) : value_{std::move(value)} {\n}\n\ntemplate <typename Container>\nIndexResult::IndexResult(const Container* container) : value_{container} {\n  if (container == nullptr) {\n    value_ = DocVec{};\n  }\n}\n\ninline size_t IndexResult::ApproximateSize() const {\n  return std::visit([](auto* set) { return set->size(); }, Borrowed());\n}\n\ninline IndexResult::BorrowedView IndexResult::Borrowed() const {\n  auto cb = [](const auto& v) -> BorrowedView {\n    using T = std::decay_t<decltype(v)>;\n    if constexpr (std::is_pointer_v<std::remove_reference_t<decltype(v)>>) {\n      return v;\n    } else if constexpr (std::is_same_v<T, RangeResult>) {\n      auto range_cb = [](const auto& set) -> BorrowedView { return &set; };\n      return std::visit(range_cb, v.GetResult());\n    } else {\n      return &v;\n    }\n  };\n  return std::visit(cb, value_);\n}\n\ninline std::pair<IndexResult::DocVec, size_t> IndexResult::Take(size_t limit) {\n  if (IsOwned()) {\n    auto& vec = std::get<DocVec>(value_);\n    size_t size = vec.size();\n    return {std::move(vec), size};\n  }\n\n  // Numeric ranges need to be filtered and don't know their exact size ahead\n  if (std::holds_alternative<RangeResult>(value_)) {\n    auto cb = [limit](auto* range) -> std::pair<DocVec, size_t> {\n      DocVec out;\n      size_t total = 0;\n      out.reserve(std::min(limit, range->size()));\n      for (auto it = range->begin(); it != range->end(); ++it) {\n        total++;\n        if (out.size() < limit)\n          out.push_back(*it);\n      }\n      return {std::move(out), total};\n    };\n    return std::visit(cb, Borrowed());\n  }\n\n  // Generic borrowed results sets don't need to be filtered, so we can tell the result size ahead\n  auto cb = [limit](auto* set) -> std::pair<DocVec, size_t> {\n    DocVec out;\n    out.reserve(std::min(limit, set->size()));\n    for (auto it = set->begin(); it != set->end() && out.size() < limit; ++it)\n      out.push_back(*it);\n    return {std::move(out), set->size()};\n  };\n  return std::visit(cb, Borrowed());\n}\n\ninline bool IndexResult::IsOwned() const {\n  return std::holds_alternative<DocVec>(value_);\n}\n\nnamespace details {\nusing BackInserter = std::back_insert_iterator<std::vector<DocId>>;\n\ntemplate <typename T> constexpr bool IsSeekableIterator = std::is_base_of_v<SeekableTag, T>;\n\ntemplate <typename Iterator> void Seek(DocId min_doc_id, const Iterator& end, Iterator* it) {\n  static constexpr DocId kFastSeekThreshold = 15;\n\n  auto extract_doc_id = [](const auto& value) {\n    using T = std::decay_t<decltype(value)>;\n    if constexpr (std::is_same_v<T, DocId>) {\n      return value;\n    } else {\n      return value.first;\n    }\n  };\n\n  DocId current_value = extract_doc_id(**it);\n  DCHECK(current_value < min_doc_id);\n\n  if (min_doc_id - current_value > kFastSeekThreshold) {  // If the gap is large, use a fast seek\n    if constexpr (IsSeekableIterator<Iterator>) {\n      it->SeekGE(min_doc_id);\n    } else {\n      BasicSeekGE(min_doc_id, end, it);\n    }\n  } else {\n    // If the gap is small, just iterate\n    do {\n      ++(*it);\n    } while (*it != end && extract_doc_id(**it) < min_doc_id);\n  }\n}\n\ntemplate <typename FirstIterator, typename SecondIterator>\nvoid SetIntersection(FirstIterator first_begin, FirstIterator first_end,\n                     SecondIterator second_begin, SecondIterator second_end, BackInserter out) {\n  auto l_it = first_begin;\n  auto r_it = second_begin;\n\n  while (l_it != first_end && r_it != second_end) {\n    DocId l_value = *l_it;\n    DocId r_value = *r_it;\n\n    if (l_value == r_value) {\n      *out++ = l_value;\n      ++l_it;\n      if (l_it != first_end) {\n        Seek(*l_it, second_end, &r_it);\n      }\n    } else if (l_value < r_value) {\n      Seek(r_value, first_end, &l_it);\n    } else {\n      DCHECK(l_value > r_value);\n      Seek(l_value, second_end, &r_it);\n    }\n  }\n}\n\n}  // namespace details\n\ninline std::vector<DocId> MergeIndexResults(const IndexResult& left, const IndexResult& right,\n                                            AstLogicalNode::LogicOp op) {\n  std::vector<DocId> result;\n\n  if (op == AstLogicalNode::LogicOp::AND) {\n    result.reserve(std::min(left.ApproximateSize(), right.ApproximateSize()));\n    auto cb = [&result](auto* s1, auto* s2) {\n      details::SetIntersection(s1->begin(), s1->end(), s2->begin(), s2->end(),\n                               std::back_inserter(result));\n    };\n    std::visit(cb, left.Borrowed(), right.Borrowed());\n  } else {\n    result.reserve(std::max(left.ApproximateSize(), right.ApproximateSize()));\n    auto cb = [&result](auto* s1, auto* s2) {\n      std::set_union(s1->begin(), s1->end(), s2->begin(), s2->end(), std::back_inserter(result));\n    };\n    std::visit(cb, left.Borrowed(), right.Borrowed());\n  }\n\n  return result;\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/indices.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/indices.h\"\n\n#include <absl/container/flat_hash_set.h>\n#include <absl/strings/ascii.h>\n#include <absl/strings/numbers.h>\n#include <absl/strings/str_join.h>\n#include <absl/strings/str_split.h>\n\n#include <boost/iterator/function_output_iterator.hpp>\n#include <string_view>\n\n#define UNI_ALGO_DISABLE_NFKC_NFKD\n\n#include <absl/container/btree_set.h>\n#include <uni_algo/case.h>\n#include <uni_algo/ranges_word.h>\n\n#include <algorithm>\n#include <cctype>\n\n#include \"base/flags.h\"\n\nABSL_FLAG(bool, use_numeric_range_tree, true,\n          \"Use range tree for numeric index. \"\n          \"If false, use a simple implementation with btree_set. \"\n          \"Range tree is more memory efficient and faster for range queries, \"\n          \"but slower for single value queries.\");\n\nnamespace dfly::search {\n\nusing namespace std;\nusing cmn::StringOrView;\n\nnamespace {\n\nbool IsAllAscii(string_view sv) {\n  return all_of(sv.begin(), sv.end(), [](unsigned char c) { return isascii(c); });\n}\n\nstring ToLower(string_view word) {\n  return IsAllAscii(word) ? absl::AsciiStrToLower(word) : una::cases::to_lowercase_utf8(word);\n}\n\n// Get all words from text as matched by the ICU library\nabsl::flat_hash_set<std::string> TokenizeWords(std::string_view text,\n                                               const TextIndex::StopWords& stopwords,\n                                               const Synonyms* synonyms) {\n  absl::flat_hash_set<std::string> words;\n  for (std::string_view word : una::views::word_only::utf8(text)) {\n    if (std::string word_lc = una::cases::to_lowercase_utf8(word); !stopwords.contains(word_lc)) {\n      if (synonyms) {\n        if (auto group_id = synonyms->GetGroupToken(word_lc); group_id) {\n          words.insert(*group_id);\n        }\n      }\n\n      words.insert(std::move(word_lc));\n    }\n  }\n  return words;\n}\n\n// Split taglist, remove duplicates and convert all to lowercase\nabsl::flat_hash_set<string> NormalizeTags(string_view taglist, bool case_sensitive,\n                                          char separator) {\n  // Splitting utf8 by ascii character is safe\n  absl::flat_hash_set<string> tags;\n  for (string_view tag : absl::StrSplit(taglist, separator, absl::SkipEmpty())) {\n    string_view str = absl::StripAsciiWhitespace(tag);\n    if (case_sensitive)\n      tags.insert(string{str});\n    else\n      tags.insert(ToLower(str));\n  }\n  return tags;\n}\n\n// Iterate over all suffixes of all words\nvoid IterateAllSuffixes(const absl::flat_hash_set<string>& words,\n                        absl::FunctionRef<void(std::string_view)> cb) {\n  for (string_view word : words) {\n    for (size_t offs = 0; offs < word.length(); offs++) {\n      cb(word.substr(offs));\n    }\n  }\n}\n\n// Haversine with earth radius in meters. Used to calculate distance.\nboost::geometry::strategy::distance::haversine haversine_(6372797.560856);\n\ndouble ConvertToRadiusInMeters(size_t radius, std::string_view arg) {\n  const std::string unit = absl::AsciiStrToUpper(arg);\n  if (unit == \"M\") {\n    return radius * 1;\n  } else if (unit == \"KM\") {\n    return radius * 1000;\n  } else if (unit == \"FT\") {\n    return radius * 0.3048;\n  } else if (unit == \"MI\") {\n    return radius * 1609.34;\n  } else {\n    return -1;\n  }\n}\n\n// Verify if geo string is valid and convert to point\nstd::optional<GeoIndex::point> GetGeoPoint(const string_view& geo_string) {\n  // Empty geo string\n  if (geo_string.empty())\n    return nullopt;\n\n  absl::InlinedVector<string_view, 2> coordinates = absl::StrSplit(geo_string, \",\");\n\n  // Invalid coordinate format\n  if (coordinates.size() != 2)\n    return std::nullopt;\n\n  // Convert coordinates to double\n  double lon, lat;\n  if (!absl::SimpleAtod(coordinates[0], &lon) || !absl::SimpleAtod(coordinates[1], &lat))\n    return nullopt;\n\n  // Verify that coordinates are within valid ranges\n  if (lon < -180 || lon > 180 || lat < -90 || lat > 90)\n    return nullopt;\n\n  return GeoIndex::point{lon, lat};\n}\n\n};  // namespace\n\nclass RangeTreeAdapter : public NumericIndex::RangeTreeBase {\n public:\n  explicit RangeTreeAdapter(size_t max_range_block_size, PMR_NS::memory_resource* mr)\n      : range_tree_{mr, max_range_block_size}, builder_{RangeTree::Builder{}} {\n  }\n\n  void Add(DocId id, absl::Span<double> values) override {\n    for (double value : values) {\n      if (builder_)\n        builder_->Add(id, value);\n      else\n        range_tree_.Add(id, value);\n    }\n  }\n\n  void Remove(DocId id, absl::Span<double> values) override {\n    for (double value : values) {\n      if (builder_)\n        builder_->Remove(id, value);\n      else\n        range_tree_.Remove(id, value);\n    }\n  }\n\n  RangeResult Range(double l, double r) const override {\n    return range_tree_.Range(l, r);\n  }\n\n  vector<DocId> GetAllDocIds() const override {\n    // TODO: remove take\n    return range_tree_.GetAllDocIds().Take();\n  }\n\n  void FinalizeInitialization() override {\n    builder_->Populate(&range_tree_, {500});\n    builder_.reset();\n  }\n\n private:\n  RangeTree range_tree_;\n  std::optional<RangeTree::Builder> builder_;\n};\n\nclass BtreeSetImpl : public NumericIndex::RangeTreeBase {\n public:\n  explicit BtreeSetImpl(PMR_NS::memory_resource* mr) : entries_(mr) {\n  }\n\n  void Add(DocId id, absl::Span<double> values) override {\n    if (values.size() > 1) {\n      unique_ids_ = false;\n    }\n    for (double value : values) {\n      entries_.insert({value, id});\n    }\n  }\n\n  void Remove(DocId id, absl::Span<double> values) override {\n    for (double value : values) {\n      entries_.erase({value, id});\n    }\n  }\n\n  RangeResult Range(double l, double r) const override {\n    DCHECK(l <= r);\n\n    auto it_l = entries_.lower_bound({l, 0});\n    auto it_r = entries_.lower_bound({r, numeric_limits<DocId>::max()});\n    DCHECK_GE(it_r - it_l, 0);\n\n    vector<DocId> out;\n    for (auto it = it_l; it != it_r; ++it)\n      out.push_back(it->second);\n\n    sort(out.begin(), out.end());\n\n    if (!unique_ids_) {\n      out.erase(unique(out.begin(), out.end()), out.end());\n    }\n    return RangeResult(std::move(out));\n  }\n\n  vector<DocId> GetAllDocIds() const override {\n    std::vector<DocId> result;\n\n    result.reserve(entries_.size());\n\n    if (unique_ids_) {\n      // If unique_ids_ is true, we can just take the second element of each entry\n      for (const auto& [_, doc_id] : entries_) {\n        result.push_back(doc_id);\n      }\n    } else {\n      absl::flat_hash_set<DocId> unique_docs;\n      unique_docs.reserve(entries_.size());\n      for (const auto& [_, doc_id] : entries_) {\n        const auto [__, is_new] = unique_docs.insert(doc_id);\n        if (is_new) {\n          result.push_back(doc_id);\n        }\n      }\n    }\n\n    std::sort(result.begin(), result.end());\n    return result;\n  }\n\n private:\n  bool unique_ids_ = true;  // If true, docs ids are unique in the index, otherwise they can repeat.\n  using Entry = std::pair<double, DocId>;\n  absl::btree_set<Entry, std::less<Entry>, PMR_NS::polymorphic_allocator<Entry>> entries_;\n};\n\nNumericIndex::NumericIndex(size_t max_range_block_size, PMR_NS::memory_resource* mr) {\n  if (absl::GetFlag(FLAGS_use_numeric_range_tree)) {\n    range_tree_ = make_unique<RangeTreeAdapter>(max_range_block_size, mr);\n  } else {\n    range_tree_ = make_unique<BtreeSetImpl>(mr);\n  }\n}\n\nbool NumericIndex::Add(DocId id, const DocumentAccessor& doc, string_view field) {\n  auto numbers = doc.GetNumbers(field);\n  if (!numbers) {\n    return false;\n  }\n\n  range_tree_->Add(id, absl::MakeSpan(numbers.value()));\n  return true;\n}\n\nvoid NumericIndex::Remove(DocId id, const DocumentAccessor& doc, string_view field) {\n  auto numbers = doc.GetNumbers(field).value();\n  range_tree_->Remove(id, absl::MakeSpan(numbers));\n}\n\nvoid NumericIndex::FinalizeInitialization() {\n  range_tree_->FinalizeInitialization();\n}\n\nRangeResult NumericIndex::Range(double l, double r) const {\n  if (r < l)\n    return {};\n  return range_tree_->Range(l, r);\n}\n\nvector<DocId> NumericIndex::GetAllDocsWithNonNullValues() const {\n  return range_tree_->GetAllDocIds();\n}\n\ntemplate <typename C>\nBaseStringIndex<C>::BaseStringIndex(PMR_NS::memory_resource* mr, bool case_sensitive,\n                                    bool with_suffix)\n    : case_sensitive_{case_sensitive}, entries_{mr} {\n  if (with_suffix)\n    suffix_trie_.emplace(mr);\n}\n\ntemplate <typename C>\nconst typename BaseStringIndex<C>::Container* BaseStringIndex<C>::Matching(\n    string_view word, bool strip_whitespace) const {\n  if (strip_whitespace)\n    word = absl::StripAsciiWhitespace(word);\n\n  auto it = entries_.find(NormalizeQueryWord(word).view());\n  return (it != entries_.end()) ? &it->second : nullptr;\n}\n\ntemplate <typename C>\nvoid BaseStringIndex<C>::MatchPrefix(std::string_view prefix,\n                                     absl::FunctionRef<void(const Container*)> cb) const {\n  StringOrView prefix_norm{NormalizeQueryWord(prefix)};\n  prefix = prefix_norm.view();\n\n  // TODO(vlad): Use right iterator to avoid string comparison?\n  for (auto it = entries_.lower_bound(prefix);\n       it != entries_.end() && (*it).first.rfind(prefix, 0) == 0; ++it) {\n    cb(&(*it).second);\n  }\n}\n\ntemplate <typename C>\nvoid BaseStringIndex<C>::MatchSuffix(std::string_view suffix,\n                                     absl::FunctionRef<void(const Container*)> cb) const {\n  StringOrView suffix_norm{NormalizeQueryWord(suffix)};\n  suffix = suffix_norm.view();\n\n  // If we have a suffix trie built, we just need to fetch the relevant suffix\n  if (suffix_trie_) {\n    auto it = suffix_trie_->find(suffix);\n    cb((it != suffix_trie_->end()) ? &it->second : nullptr);\n    return;\n  }\n\n  // Otherwise, iterate over all entries and look for the suffix\n  for (const auto& entry : entries_) {\n    int32_t start = entry.first.size() - suffix.size();\n    if (start >= 0 && entry.first.substr(start) == suffix)\n      cb(&entry.second);\n  }\n}\n\ntemplate <typename C>\nvoid BaseStringIndex<C>::MatchInfix(std::string_view infix,\n                                    absl::FunctionRef<void(const Container*)> cb) const {\n  StringOrView infix_norm{NormalizeQueryWord(infix)};\n  infix = infix_norm.view();\n\n  // If we have a suffix trie built, we just need to match the prefix\n  if (suffix_trie_) {\n    for (auto it = suffix_trie_->lower_bound(infix);\n         it != suffix_trie_->end() && (*it).first.rfind(infix, 0) == 0; ++it)\n      cb(&(*it).second);\n    return;\n  }\n\n  // Otherwise, iterate over all entries and check if it contains the entry\n  for (const auto& entry : entries_) {\n    if (entry.first.find(infix) != string::npos)\n      cb(&entry.second);\n  }\n}\n\ntemplate <typename C>\nbool BaseStringIndex<C>::Add(DocId id, const DocumentAccessor& doc, string_view field) {\n  auto strings_list = GetStrings(doc, field);\n  if (!strings_list) {\n    return false;\n  }\n\n  absl::flat_hash_set<std::string> tokens;\n  for (string_view str : strings_list.value())\n    tokens.merge(Tokenize(str));\n\n  if (tokens.size() > 1)\n    unique_ids_ = false;\n  for (string_view token : tokens)\n    GetOrCreate(&entries_, token)->Insert(id);\n\n  if (suffix_trie_)\n    IterateAllSuffixes(tokens,\n                       [&](string_view str) { GetOrCreate(&*suffix_trie_, str)->Insert(id); });\n\n  return true;\n}\n\ntemplate <typename C>\nvoid BaseStringIndex<C>::Remove(DocId id, const DocumentAccessor& doc, string_view field) {\n  auto strings_list = GetStrings(doc, field).value();\n\n  absl::flat_hash_set<std::string> tokens;\n  for (string_view str : strings_list)\n    tokens.merge(Tokenize(str));\n\n  for (string_view token : tokens)\n    Remove(&entries_, id, token);\n\n  if (suffix_trie_)\n    IterateAllSuffixes(tokens, [&](string_view str) { Remove(&*suffix_trie_, id, str); });\n}\n\ntemplate <typename C> vector<string> BaseStringIndex<C>::GetTerms() const {\n  vector<string> res;\n  res.reserve(entries_.size());\n  for (const auto& [term, _] : entries_) {\n    res.push_back(string{term});\n  }\n  return res;\n}\n\ntemplate <typename C> vector<DocId> BaseStringIndex<C>::GetAllDocsWithNonNullValues() const {\n  std::vector<DocId> result;\n\n  result.reserve(entries_.size());\n\n  if (unique_ids_) {\n    // If unique_ids_ is true, we can just take the second element of each entry\n    for (const auto& [_, container] : entries_) {\n      for (const auto& doc_id : container) {\n        result.push_back(doc_id);\n      }\n    }\n  } else {\n    absl::flat_hash_set<DocId> unique_docs;\n    unique_docs.reserve(entries_.size());\n\n    for (const auto& [_, container] : entries_) {\n      for (const auto& doc_id : container) {\n        auto [_, is_new] = unique_docs.insert(doc_id);\n        if (is_new) {\n          result.push_back(doc_id);\n        }\n      }\n    }\n  }\n  std::sort(result.begin(), result.end());\n  return result;\n}\n\ntemplate <typename C>\nStringOrView BaseStringIndex<C>::NormalizeQueryWord(std::string_view query) const {\n  if (case_sensitive_)\n    return StringOrView::FromView(query);\n\n  return StringOrView::FromString(ToLower(query));\n}\n\ntemplate <typename C>\ntypename BaseStringIndex<C>::Container* BaseStringIndex<C>::GetOrCreate(\n    search::RaxTreeMap<Container>* map, string_view word) {\n  auto* mr = map->get_allocator().resource();\n  return &map->try_emplace(PMR_NS::string{word, mr}, mr, 1000 /* block size */).first->second;\n}\n\ntemplate <typename C>\nvoid BaseStringIndex<C>::Remove(search::RaxTreeMap<Container>* map, DocId id, string_view word) {\n  auto it = map->find(word);\n  if (it == map->end())\n    return;\n\n  it->second.Remove(id);\n  if (it->second.Size() == 0)\n    map->erase(it);\n}\n\ntemplate struct BaseStringIndex<CompressedSortedSet>;\ntemplate struct BaseStringIndex<SortedVector<DocId>>;\n\nTextIndex::TextIndex(PMR_NS::memory_resource* mr, const StopWords* stopwords,\n                     const Synonyms* synonyms, bool with_suffixtrie)\n    : BaseStringIndex(mr, false, with_suffixtrie), stopwords_{stopwords}, synonyms_{synonyms} {\n}\n\nstd::optional<DocumentAccessor::StringList> TextIndex::GetStrings(const DocumentAccessor& doc,\n                                                                  std::string_view field) const {\n  return doc.GetStrings(field);\n}\n\nabsl::flat_hash_set<std::string> TextIndex::Tokenize(std::string_view value) const {\n  return TokenizeWords(value, *stopwords_, synonyms_);\n}\n\nDefragmentResult TagIndex::Defragment(PageUsage* page_usage) {\n  auto defrag = [&](auto& tree, string* key) {\n    DefragmentMap dm{tree, key};\n    return dm.Defragment(page_usage);\n  };\n\n  DefragmentResult result = defrag(entries_, &next_defrag_entry_);\n\n  if (suffix_trie_) {\n    result.Merge(defrag(suffix_trie_.value(), &next_defrag_suffix_entry_));\n  }\n\n  return result;\n}\n\nstd::optional<DocumentAccessor::StringList> TagIndex::GetStrings(const DocumentAccessor& doc,\n                                                                 std::string_view field) const {\n  return doc.GetTags(field);\n}\n\nabsl::flat_hash_set<std::string> TagIndex::Tokenize(std::string_view value) const {\n  return NormalizeTags(value, case_sensitive_, separator_);\n}\n\nBaseVectorIndex::BaseVectorIndex(size_t dim, VectorSimilarity sim) : dim_{dim}, sim_{sim} {\n}\n\nstd::pair<size_t /*dim*/, VectorSimilarity> BaseVectorIndex::Info() const {\n  return {dim_, sim_};\n}\n\nbool BaseVectorIndex::Add(DocId id, const DocumentAccessor& doc, std::string_view field) {\n  auto vector = doc.GetVector(field, dim_);\n\n  if (!vector)\n    return false;\n\n  if (std::holds_alternative<OwnedFtVector>(*vector)) {\n    const auto& owned_vector = std::get<OwnedFtVector>(*vector);\n    AddVector(id, owned_vector.first.get());\n  } else {\n    const auto& borrowed_vector = std::get<BorrowedFtVector>(*vector);\n    AddVector(id, borrowed_vector);\n  }\n\n  return true;\n}\n\n// Each document occupies (dim_ + 1) floats in entries_: dim_ floats for the vector data,\n// followed by one float as a presence marker (1.0 = present, 0.0 = absent/removed).\n// This avoids the previous heuristic of treating all-zero vectors as null.\nstatic constexpr float kPresent = 1.0f;\nstatic constexpr float kAbsent = 0.0f;\n\nFlatVectorIndex::FlatVectorIndex(const SchemaField::VectorParams& params,\n                                 PMR_NS::memory_resource* mr)\n    : BaseVectorIndex{params.dim, params.sim}, entries_{mr} {\n  DCHECK(!params.use_hnsw);\n  entries_.reserve(params.capacity * (params.dim + 1));\n}\n\nvoid FlatVectorIndex::AddVector(DocId id, const void* vector) {\n  const size_t stride = dim_ + 1;\n  DCHECK_LE(id * stride, entries_.size());\n  if (id * stride == entries_.size())\n    entries_.resize((id + 1) * stride, 0.0f);\n\n  if (vector) {\n    memcpy(&entries_[id * stride], vector, dim_ * sizeof(float));\n    entries_[id * stride + dim_] = kPresent;\n  }\n}\n\nvoid FlatVectorIndex::Remove(DocId id, const DocumentAccessor& doc, string_view field) {\n  const size_t stride = dim_ + 1;\n  if (id * stride + dim_ < entries_.size())\n    entries_[id * stride + dim_] = kAbsent;\n}\n\nconst float* FlatVectorIndex::Get(DocId doc) const {\n  const size_t stride = dim_ + 1;\n  if (doc * stride + dim_ >= entries_.size() || entries_[doc * stride + dim_] != kPresent)\n    return nullptr;\n  return &entries_[doc * stride];\n}\n\nstd::vector<DocId> FlatVectorIndex::GetAllDocsWithNonNullValues() const {\n  const size_t stride = dim_ + 1;\n  size_t num_slots = entries_.size() / stride;\n  std::vector<DocId> result;\n  result.reserve(num_slots);\n  for (DocId id = 0; id < num_slots; ++id) {\n    if (entries_[id * stride + dim_] == kPresent)\n      result.push_back(id);\n  }\n  return result;\n}\n\nGeoIndex::GeoIndex(PMR_NS::memory_resource* mr) : rtree_(make_unique<rtree>()) {\n}\n\nGeoIndex::~GeoIndex() {\n}\n\nbool GeoIndex::Add(DocId id, const DocumentAccessor& doc, std::string_view field) {\n  auto geo_string = doc.GetStrings(field);\n\n  if (!geo_string) {\n    return false;\n  }\n\n  // If field doesn't exists don't add to index.\n  if (geo_string->empty()) {\n    return true;\n  }\n\n  std::vector<GeoIndex::point> points;\n  for (string_view str : *geo_string) {\n    auto doc_point = GetGeoPoint(str);\n    if (!doc_point) {\n      return false;\n    }\n    points.emplace_back(*doc_point);\n  }\n  for (point p : points) {\n    rtree_->insert({p, id});\n  }\n\n  return true;\n}\n\nvoid GeoIndex::Remove(DocId id, const DocumentAccessor& doc, string_view field) {\n  auto geo_string = doc.GetStrings(field);\n\n  if (!geo_string || geo_string->empty()) {\n    return;\n  }\n\n  std::vector<GeoIndex::point> points;\n  for (string_view str : *geo_string) {\n    auto doc_point = GetGeoPoint(str);\n    if (!doc_point) {\n      return;\n    }\n    points.emplace_back(*doc_point);\n  }\n  for (point p : points) {\n    rtree_->remove({p, id});\n  }\n}\n\nstd::vector<DocId> GeoIndex::RadiusSearch(double lon, double lat, double radius,\n                                          std::string_view unit) {\n  std::set<DocId> unique_results;\n\n  // Get radius in meters\n  double converted_radius = ConvertToRadiusInMeters(radius, unit);\n\n  // Declare the geographic_point_circle strategy with 4 points\n  boost::geometry::strategy::buffer::geographic_point_circle<> point_strategy(4);\n\n  // Declare the distance strategy in meters around the point\n  boost::geometry::strategy::buffer::distance_symmetric<double> distance_strategy(converted_radius);\n\n  // Declare other necessary strategies, unused for point\n  boost::geometry::strategy::buffer::join_round join_strategy;\n  boost::geometry::strategy::buffer::end_round end_strategy;\n  boost::geometry::strategy::buffer::side_straight side_strategy;\n\n  point p{lon, lat};\n\n  // Create polygon with 4 point around point\n  boost::geometry::model::multi_polygon<boost::geometry::model::polygon<point>> buffer_polygon;\n\n  boost::geometry::buffer(p, buffer_polygon, distance_strategy, side_strategy, join_strategy,\n                          end_strategy, point_strategy);\n\n  // Create bouding box around polygon to include all possible points\n  boost::geometry::model::box<point> box;\n  boost::geometry::envelope(buffer_polygon, box);\n\n  rtree_->query(boost::geometry::index::within(box),\n                boost::make_function_output_iterator(\n                    [&unique_results, &p, &converted_radius](auto const& val) {\n                      if (haversine_.apply(val.first, p) <= converted_radius) {\n                        unique_results.insert(val.second);\n                      }\n                    }));\n\n  // TODO: we should return sorted results by radius distance\n  return {unique_results.begin(), unique_results.end()};\n}\n\nstd::vector<DocId> GeoIndex::GetAllDocsWithNonNullValues() const {\n  std::set<DocId> unique_results;\n  std::for_each(boost::geometry::index::begin(*rtree_), boost::geometry::index::end(*rtree_),\n                [&unique_results](auto const& val) { unique_results.insert(val.second); });\n  return {unique_results.begin(), unique_results.end()};\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/indices.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n\n// Wrong warning reported when geometry.hpp is loaded\n#ifndef __clang__\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wmaybe-uninitialized\"\n#endif\n#include <boost/geometry.hpp>\n#ifndef __clang__\n#pragma GCC diagnostic pop\n#endif\n\n#include <absl/functional/function_ref.h>\n\n#include <memory>\n#include <optional>\n#include <vector>\n\n#include \"base/pmr/memory_resource.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"core/search/base.h\"\n#include \"core/search/block_list.h\"\n#include \"core/search/compressed_sorted_set.h\"\n#include \"core/search/range_tree.h\"\n#include \"core/search/rax_tree.h\"\n\n// TODO: move core field definitions out of big header\n#include \"common/string_or_view.h\"\n#include \"core/search/search.h\"\n\nnamespace dfly::search {\n\n// Index for integer fields.\n// Range bounds are queried in logarithmic time, iteration is constant.\nstruct NumericIndex : public BaseIndex {\n  // Temporary base class for range tree.\n  // It is used to use two different range trees depending on the flag use_range_tree.\n  // If the flag is true, RangeTree is used, otherwise a simple implementation with btree_set.\n  struct RangeTreeBase {\n    virtual void Add(DocId id, absl::Span<double> values) = 0;\n    virtual void Remove(DocId id, absl::Span<double> values) = 0;\n\n    // Returns all DocIds that match the range [l, r].\n    virtual RangeResult Range(double l, double r) const = 0;\n\n    // Returns all DocIds that have non-null values in the index.\n    virtual std::vector<DocId> GetAllDocIds() const = 0;\n\n    virtual void FinalizeInitialization(){};\n\n    virtual ~RangeTreeBase() = default;\n  };\n\n  // max_range_block_size is the maximum number of entries in a single range block.\n  // It is used in RangeTree. Check RangeTree for details.\n  explicit NumericIndex(size_t max_range_block_size, PMR_NS::memory_resource* mr);\n\n  bool Add(DocId id, const DocumentAccessor& doc, std::string_view field) override;\n  void Remove(DocId id, const DocumentAccessor& doc, std::string_view field) override;\n\n  void FinalizeInitialization() override;\n\n  RangeResult Range(double l, double r) const;\n\n  std::vector<DocId> GetAllDocsWithNonNullValues() const override;\n\n private:\n  std::unique_ptr<RangeTreeBase> range_tree_;\n};\n\n// Base index for string based indices.\ntemplate <typename C> struct BaseStringIndex : public BaseIndex {\n  using Container = BlockList<C>;\n  using VecOrPtr = std::variant<std::vector<DocId>, const Container*>;\n\n  BaseStringIndex(PMR_NS::memory_resource* mr, bool case_sensitive, bool with_suffixtrie);\n\n  bool Add(DocId id, const DocumentAccessor& doc, std::string_view field) override;\n  void Remove(DocId id, const DocumentAccessor& doc, std::string_view field) override;\n\n  // Pointer is valid as long as index is not mutated. Nullptr if not found\n  const Container* Matching(std::string_view str, bool strip_whitespace = true) const;\n\n  // Iterate over all nodes matching on prefix.\n  void MatchPrefix(std::string_view prefix, absl::FunctionRef<void(const Container*)> cb) const;\n\n  // Iterate over all nodes matching suffix query. Faster if suffix trie is built.\n  void MatchSuffix(std::string_view suffix, absl::FunctionRef<void(const Container*)> cb) const;\n\n  // Iterate over all nodes matching infix query. Faster if suffix trie is built.\n  void MatchInfix(std::string_view prefix, absl::FunctionRef<void(const Container*)> cb) const;\n\n  // Returns all the terms that appear as keys in the reverse index.\n  std::vector<std::string> GetTerms() const;\n\n  std::vector<DocId> GetAllDocsWithNonNullValues() const override;\n\n protected:\n  using StringList = DocumentAccessor::StringList;\n\n  // Used by Add & Remove to get strings from document\n  virtual std::optional<StringList> GetStrings(const DocumentAccessor& doc,\n                                               std::string_view field) const = 0;\n\n  // Used by Add & Remove to tokenize text value\n  virtual absl::flat_hash_set<std::string> Tokenize(std::string_view value) const = 0;\n\n  cmn::StringOrView NormalizeQueryWord(std::string_view word) const;\n  static Container* GetOrCreate(search::RaxTreeMap<Container>* map, std::string_view word);\n  static void Remove(search::RaxTreeMap<Container>* map, DocId id, std::string_view word);\n\n  bool case_sensitive_ = false;\n  bool unique_ids_ = true;  // If true, docs ids are unique in the index, otherwise they can repeat.\n  search::RaxTreeMap<Container> entries_;\n  std::optional<search::RaxTreeMap<Container>> suffix_trie_;\n};\n\n// Index for text fields.\n// Hashmap based lookup per word.\nstruct TextIndex : public BaseStringIndex<CompressedSortedSet> {\n  using StopWords = absl::flat_hash_set<std::string>;\n\n  TextIndex(PMR_NS::memory_resource* mr, const StopWords* stopwords, const Synonyms* synonyms,\n            bool with_suffixtrie);\n\n protected:\n  std::optional<StringList> GetStrings(const DocumentAccessor& doc,\n                                       std::string_view field) const override;\n  absl::flat_hash_set<std::string> Tokenize(std::string_view value) const override;\n\n private:\n  const StopWords* stopwords_;\n  const Synonyms* synonyms_;\n};\n\n// Index for text fields.\n// Hashmap based lookup per word.\nstruct TagIndex : public BaseStringIndex<SortedVector<DocId>> {\n  TagIndex(PMR_NS::memory_resource* mr, SchemaField::TagParams params)\n      : BaseStringIndex(mr, params.case_sensitive, params.with_suffixtrie),\n        separator_{params.separator} {\n  }\n\n  DefragmentResult Defragment(PageUsage* page_usage) override;\n\n protected:\n  std::optional<StringList> GetStrings(const DocumentAccessor& doc,\n                                       std::string_view field) const override;\n  absl::flat_hash_set<std::string> Tokenize(std::string_view value) const override;\n\n private:\n  char separator_;\n  std::string next_defrag_entry_;\n  std::string next_defrag_suffix_entry_;\n};\n\nstruct BaseVectorIndex : public BaseIndex {\n  std::pair<size_t /*dim*/, VectorSimilarity> Info() const;\n\n  bool Add(DocId id, const DocumentAccessor& doc, std::string_view field) override final;\n\n protected:\n  BaseVectorIndex(size_t dim, VectorSimilarity sim);\n\n  virtual void AddVector(DocId id, const void* vector) = 0;\n\n  size_t dim_;\n  VectorSimilarity sim_;\n};\n\n// Index for vector fields.\n// Only supports lookup by id.\nstruct FlatVectorIndex : public BaseVectorIndex {\n  FlatVectorIndex(const SchemaField::VectorParams& params, PMR_NS::memory_resource* mr);\n\n  void Remove(DocId id, const DocumentAccessor& doc, std::string_view field) override;\n\n  const float* Get(DocId doc) const;\n\n  // Return all documents that have vectors in this index\n  std::vector<DocId> GetAllDocsWithNonNullValues() const override;\n\n protected:\n  void AddVector(DocId id, const void* vector) override;\n\n private:\n  PMR_NS::vector<float> entries_;\n};\n\nstruct GeoIndex : public BaseIndex {\n  using point =\n      boost::geometry::model::point<double, 2,\n                                    boost::geometry::cs::geographic<boost::geometry::degree>>;\n  using index_entry = std::pair<point, DocId>;\n\n  explicit GeoIndex(PMR_NS::memory_resource* mr);\n  ~GeoIndex();\n\n  bool Add(DocId id, const DocumentAccessor& doc, std::string_view field) override;\n  void Remove(DocId id, const DocumentAccessor& doc, std::string_view field) override;\n  std::vector<DocId> RadiusSearch(double lon, double lat, double radius, std::string_view arg);\n  std::vector<DocId> GetAllDocsWithNonNullValues() const override;\n\n private:\n  using rtree = boost::geometry::index::rtree<index_entry, boost::geometry::index::linear<16>>;\n  std::unique_ptr<rtree> rtree_;\n};\n\n// Defragments a map like data structure. The values in the map must have a `Defragment` method.\n// Works with rax tree map and hash based maps\ntemplate <typename Container> struct DefragmentMap {\n  using ValueType = Container::value_type;\n  using Iterator = Container::iterator;\n\n  DefragmentMap(Container& container, std::string* key) : key{key} {\n    if (key->empty()) {\n      it = container.end();\n    } else if constexpr (requires { container.lower_bound(*key); }) {\n      it = container.lower_bound(*key);\n    } else {\n      it = container.find(*key);\n    }\n\n    if (it == container.end()) {\n      it = container.begin();\n    }\n\n    end = container.end();\n  }\n\n  // The key is set if the defragmentation has to stop mid way due to depleted quota\n  DefragmentResult Defragment(PageUsage* page_usage) {\n    if (page_usage->QuotaDepleted()) {\n      return DefragmentResult{.quota_depleted = true, .objects_moved = 0};\n    }\n\n    DefragmentResult result;\n    for (; it != end; ++it) {\n      const auto& [k, map] = *it;\n      if (result.Merge(DefragmentIndex(map, page_usage)).quota_depleted) {\n        *key = k;\n        break;\n      }\n    }\n\n    if (it == end) {\n      key->clear();\n    }\n\n    return result;\n  }\n\n private:\n  template <typename T> static auto DefragmentIndex(T& t, PageUsage* page_usage) {\n    if constexpr (requires { t->Defragment(page_usage); }) {\n      return t->Defragment(page_usage);\n    } else {\n      return t.Defragment(page_usage);\n    }\n  }\n\n  std::string* key;\n  Iterator it;\n  Iterator end;\n};\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/lexer.lex",
    "content": "%top{\n  // Our lexer need to know about Parser::symbol_type\n  #include \"core/search/parser.hh\"\n  #include \"core/search/tag_types.h\" // Include TagType enum\n}\n\n%{\n  #include <absl/strings/escaping.h>\n  #include <absl/strings/numbers.h>\n\n  #include \"base/logging.h\"\n\n  #define DFLY_LEXER_CC 1\n     #include \"core/search/scanner.h\"\n  #undef DFLY_LEXER_CC\n%}\n\n%o bison-cc-namespace=\"dfly.search\" bison-cc-parser=\"Parser\"\n%o namespace=\"dfly.search\"\n%o class=\"Scanner\" lex=\"Lex\"\n%o nodefault batch case-insensitive\n/* %o debug */\n\n/* Declarations before lexer implementation.  */\n%{\n  // A number symbol corresponding to the value in S.\n  using dfly::search::Parser;\n  using namespace std;\n  using dfly::search::TagType;\n\n  Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc);\n  Parser::symbol_type make_Tag(string_view src, TagType type, const Parser::location_type& loc);\n%}\n\ndq         \\\"\nsq         \\'\nesc_chars  ['\"\\?\\\\abfnrtv]\nesc_seq    \\\\{esc_chars}\nterm_ch    \\w\ntag_val_base_ch [^,.<>{}\\[\\]\\\\\\\"\\?':;!@#$%^&*()\\-+=~\\/| ]|\\\\.\ntag_val_ch {tag_val_base_ch}+(:+{tag_val_base_ch}*)*\nastrsk_ch  \\*\n\n\n%{\n  // Code run each time a pattern is matched.\n%}\n\n%%\n\n%{\n  // Code run each time lex() is called.\n%}\n\n[[:space:]]+   // skip white space\n\n\"(\"                  return Parser::make_LPAREN (loc());\n\")\"                  return Parser::make_RPAREN (loc());\n\"*\"                  return Parser::make_STAR (loc());\n\"-\"                  return Parser::make_NOT_OP (loc());\n\":\"                  return Parser::make_COLON (loc());\n\"=>\"                 return Parser::make_ARROW (loc());\n\"[\"                  return Parser::make_LBRACKET (loc());\n\"]\"                  return Parser::make_RBRACKET (loc());\n\"{\"                  return Parser::make_LCURLBR (loc());\n\"}\"                  return Parser::make_RCURLBR (loc());\n\"|\"                  return Parser::make_OR_OP (loc());\n\",\"                  return Parser::make_COMMA (loc());\n\"KNN\"                return Parser::make_KNN (loc());\n\"AS\"                 return Parser::make_AS (loc());\n\"EF_RUNTIME\"         return Parser::make_EF_RUNTIME (loc());\n\"VECTOR_RANGE\"       return Parser::make_VECTOR_RANGE (loc());\n\"$YIELD_DISTANCE_AS\" return Parser::make_YIELD_DISTANCE_AS (loc());\n\n[0-9]{1,9}                          return Parser::make_UINT32(str(), loc());\n[+-]?(([0-9]*[.])?[0-9]+|inf)       return Parser::make_DOUBLE(str(), loc());\n\n{dq}([^\"]|{esc_seq})*{dq}           return make_StringLit(matched_view(1, 1), loc());\n{sq}([^']|{esc_seq})*{sq}           return make_StringLit(matched_view(1, 1), loc());\n\n\"$\"{term_ch}+                       return ParseParam(str(), loc());\n\"@\"{term_ch}+                       return Parser::make_FIELD(str(), loc());\n{astrsk_ch}{term_ch}+{astrsk_ch}    return Parser::make_INFIX(string{matched_view(1, 1)}, loc());\n{term_ch}+{astrsk_ch}               return Parser::make_PREFIX(string{matched_view(0, 1)}, loc());\n{astrsk_ch}{term_ch}+               return Parser::make_SUFFIX(string{matched_view(1, 0)}, loc());\n\n{term_ch}+                          return Parser::make_TERM(str(), loc());\n{tag_val_ch}+{astrsk_ch}            return make_Tag(str(), TagType::PREFIX, loc());\n{astrsk_ch}{tag_val_ch}+            return make_Tag(str(), TagType::SUFFIX, loc());\n{astrsk_ch}{tag_val_ch}+{astrsk_ch} return make_Tag(str(), TagType::INFIX, loc());\n{tag_val_ch}+                       return make_Tag(str(), TagType::REGULAR, loc());\n\n<<EOF>> return Parser::make_YYEOF(loc());\n%%\n\nParser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc) {\n  string res;\n  if (!absl::CUnescape(src, &res))\n    throw Parser::syntax_error (loc, \"bad escaped string: \" + string(src));\n\n  return Parser::make_TERM(res, loc);\n}\n\nParser::symbol_type make_Tag(string_view src, TagType type, const Parser::location_type& loc) {\n  string res;\n  res.reserve(src.size());\n\n  // Determine processing boundaries\n  size_t start = (type == TagType::SUFFIX || type == TagType::INFIX) ? 1 : 0;\n  size_t end = src.size();\n  if (type == TagType::PREFIX || type == TagType::INFIX) {\n    end--; // Skip the last '*' character\n  }\n\n    // Handle escaping\n  bool escaped = false;\n  for (size_t i = start; i < end; ++i) {\n    if (escaped) {\n      escaped = false;\n    } else if (src[i] == '\\\\') {\n      escaped = true;\n      continue;\n    }\n    res.push_back(src[i]);\n  }\n\n  // Return the appropriate token type\n  switch (type) {\n    case TagType::PREFIX:\n      return Parser::make_PREFIX(res, loc);\n    case TagType::SUFFIX:\n      return Parser::make_SUFFIX(res, loc);\n    case TagType::INFIX:\n      return Parser::make_INFIX(res, loc);\n    case TagType::REGULAR:\n    default:\n      return Parser::make_TAG_VAL(res, loc);\n  }\n}\n"
  },
  {
    "path": "src/core/search/mrmw_mutex.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <condition_variable>\n#include <mutex>\n\n#include \"base/logging.h\"\n#include \"base/spinlock.h\"\n\nnamespace dfly::search {\n\n// Simple implementation of multi-Reader multi-Writer Mutex\n// MRMWMutex supports concurrent reads or concurrent writes but not a mix of\n// concurrent reads and writes at the same time.\n\nclass MRMWMutex {\n public:\n  enum class LockMode : uint8_t { kReadLock, kWriteLock };\n\n  MRMWMutex() : lock_mode_(LockMode::kReadLock) {\n  }\n\n  void Lock(LockMode mode) {\n    std::unique_lock lk(mutex_);\n\n    // If we have any active_runners we need to check lock mode\n    if (active_runners_) {\n      auto& waiters = GetWaiters(mode);\n      waiters++;\n      GetCondVar(mode).wait(lk, [&] { return lock_mode_ == mode; });\n      waiters--;\n    } else {\n      // No active runners so just update to requested lock mode\n      lock_mode_ = mode;\n    }\n    active_runners_++;\n  }\n\n  void Unlock(LockMode mode) {\n    std::lock_guard lk(mutex_);\n    LockMode inverse_mode = GetInverseMode(mode);\n    active_runners_--;\n    // If this was last runner and there are waiters on inverse mode\n    if (!active_runners_ && GetWaiters(inverse_mode) > 0) {\n      lock_mode_ = inverse_mode;\n      GetCondVar(inverse_mode).notify_all();\n    }\n  }\n\n  // Check if the mutex is currently held in read mode with at least one active runner.\n  // For use in DCHECKs only - not thread-safe without external synchronization.\n  bool IsReadLocked() const {\n    return active_runners_ > 0 && lock_mode_ == LockMode::kReadLock;\n  }\n\n  // Non-blocking lock attempt. Returns true if the lock was acquired.\n  bool TryLock(LockMode mode) {\n    if (!mutex_.try_lock()) {\n      return false;\n    }\n    if (active_runners_ && lock_mode_ != mode) {\n      mutex_.unlock();\n      return false;\n    }\n    if (!active_runners_) {\n      lock_mode_ = mode;\n    }\n    active_runners_++;\n    mutex_.unlock();\n    return true;\n  }\n\n private:\n  inline size_t& GetWaiters(LockMode target_mode) {\n    return target_mode == LockMode::kReadLock ? reader_waiters_ : writer_waiters_;\n  };\n\n  inline std::condition_variable_any& GetCondVar(LockMode target_mode) {\n    return target_mode == LockMode::kReadLock ? reader_cond_var_ : writer_cond_var_;\n  };\n\n  static inline LockMode GetInverseMode(LockMode mode) {\n    return mode == LockMode::kReadLock ? LockMode::kWriteLock : LockMode::kReadLock;\n  }\n\n  // TODO: use fiber sync primitives in future\n  base::SpinLock mutex_;\n  std::condition_variable_any reader_cond_var_, writer_cond_var_;\n\n  size_t writer_waiters_ = 0, reader_waiters_ = 0;\n  size_t active_runners_ = 0;\n  LockMode lock_mode_;\n};\n\nclass MRMWMutexLock {\n public:\n  // Blocking lock.\n  explicit MRMWMutexLock(MRMWMutex* mutex, MRMWMutex::LockMode mode)\n      : mutex_(mutex), lock_mode_(mode), locked_(true) {\n    mutex->Lock(lock_mode_);\n  }\n\n  // Non-blocking try-lock. Check locked() to see if the lock was acquired.\n  MRMWMutexLock(MRMWMutex* mutex, MRMWMutex::LockMode mode, std::try_to_lock_t)\n      : mutex_(mutex), lock_mode_(mode), locked_(mutex->TryLock(mode)) {\n  }\n\n  bool locked() const {\n    return locked_;\n  }\n\n  ~MRMWMutexLock() {\n    if (locked_)\n      mutex_->Unlock(lock_mode_);\n  }\n\n  MRMWMutexLock(const MRMWMutexLock&) = delete;\n  MRMWMutexLock(MRMWMutexLock&&) = delete;\n  MRMWMutexLock& operator=(const MRMWMutexLock&) = delete;\n  MRMWMutexLock& operator=(MRMWMutexLock&&) = delete;\n\n private:\n  MRMWMutex* const mutex_;\n  MRMWMutex::LockMode lock_mode_;\n  bool locked_;\n};\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/mrmw_mutex_test.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/mrmw_mutex.h\"\n\n#include <random>\n#include <thread>\n\n#include \"absl/flags/flag.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"util/fibers/pool.h\"\n\nABSL_FLAG(bool, force_epoll, false, \"If true, uses epoll api instead iouring to run tests\");\n\nnamespace dfly::search {\n\nnamespace {\n\n// Helper function to simulate reading operation\nvoid ReadTask(MRMWMutex* mutex, std::atomic<size_t>& read_count, size_t sleep_time) {\n  read_count.fetch_add(1, std::memory_order_relaxed);\n  MRMWMutexLock lock(mutex, MRMWMutex::LockMode::kReadLock);\n  util::ThisFiber::SleepFor(std::chrono::milliseconds(sleep_time));\n  read_count.fetch_sub(1, std::memory_order_relaxed);\n}\n\n// Helper function to simulate writing operation\nvoid WriteTask(MRMWMutex* mutex, std::atomic<size_t>& write_count, size_t sleep_time) {\n  write_count.fetch_add(1, std::memory_order_relaxed);\n  MRMWMutexLock lock(mutex, MRMWMutex::LockMode::kWriteLock);\n  util::ThisFiber::SleepFor(std::chrono::milliseconds(sleep_time));\n  write_count.fetch_sub(1, std::memory_order_relaxed);\n}\n\nconstexpr size_t kReadTaskSleepTime = 50;\nconstexpr size_t kWriteTaskSleepTime = 100;\n\n}  // namespace\n\nclass MRMWMutexTest : public ::testing::Test {\n protected:\n  MRMWMutex mutex_;\n  std::mt19937 generator_;\n  void SetUp() override {\n#ifdef __linux__\n    if (absl::GetFlag(FLAGS_force_epoll)) {\n      pp_.reset(util::fb2::Pool::Epoll(2));\n    } else {\n      pp_.reset(util::fb2::Pool::IOUring(16, 2));\n    }\n#else\n    pp_.reset(util::fb2::Pool::Epoll(2));\n#endif\n    pp_->Run();\n  }\n  void TearDown() override {\n    pp_->Stop();\n    pp_.reset();\n  }\n  std::unique_ptr<util::ProactorPool> pp_;\n};\n\n// Test 1: Multiple readers can lock concurrently\nTEST_F(MRMWMutexTest, MultipleReadersConcurrently) {\n  std::atomic<size_t> read_count(0);\n  const int num_readers = 5;\n\n  std::vector<util::fb2::Fiber> readers;\n  readers.reserve(num_readers);\n\n  for (int i = 0; i < num_readers; ++i) {\n    readers.emplace_back(pp_->at(0)->LaunchFiber(util::fb2::Launch::post, [&] {\n      ReadTask(&mutex_, std::ref(read_count), kReadTaskSleepTime);\n    }));\n  }\n\n  // Wait for all reader threads to finish\n  for (auto& t : readers) {\n    t.Join();\n  }\n\n  // All readers should have been able to lock the mutex concurrently\n  EXPECT_EQ(read_count.load(), 0);\n}\n\n// Test 2: Writer blocks readers and writer should get the lock exclusively\nTEST_F(MRMWMutexTest, ReadersBlockWriters) {\n  std::atomic<size_t> read_count(0);\n  std::atomic<size_t> write_count(0);\n\n  const int num_readers = 10;\n\n  // Start multiple readers\n  std::vector<util::fb2::Fiber> readers;\n  readers.reserve(num_readers);\n\n  for (int i = 0; i < num_readers; ++i) {\n    readers.emplace_back(pp_->at(0)->LaunchFiber(util::fb2::Launch::post, [&] {\n      ReadTask(&mutex_, std::ref(read_count), kReadTaskSleepTime);\n    }));\n  }\n\n  // Give readers time to acquire the lock\n  util::ThisFiber::SleepFor(std::chrono::milliseconds(10));\n\n  pp_->at(1)\n      ->LaunchFiber(util::fb2::Launch::post,\n                    [&] { WriteTask(&mutex_, std::ref(write_count), kWriteTaskSleepTime); })\n      .Join();\n\n  // Wait for all reader threads to finish\n  for (auto& t : readers) {\n    t.Join();\n  }\n\n  EXPECT_EQ(read_count.load(), 0);\n  EXPECT_EQ(write_count.load(), 0);\n}\n\n// Test 3: Unlock transitions correctly and wakes up waiting threads\nTEST_F(MRMWMutexTest, ReaderAfterWriter) {\n  std::atomic<size_t> write_count(0);\n  std::atomic<size_t> read_count(0);\n\n  // Start a writer thread\n  auto writer = pp_->at(1)->LaunchFiber(util::fb2::Launch::post, [&] {\n    WriteTask(&mutex_, std::ref(write_count), kWriteTaskSleepTime);\n  });\n\n  // Give writer time to acquire the lock\n  util::ThisFiber::SleepFor(std::chrono::milliseconds(10));\n\n  // Now start a reader task that will block until the writer is done\n  pp_->at(0)\n      ->LaunchFiber(util::fb2::Launch::post,\n                    [&] { ReadTask(&mutex_, std::ref(read_count), kReadTaskSleepTime); })\n      .Join();\n\n  // Ensure that writer has completed\n  writer.Join();\n\n  EXPECT_EQ(read_count.load(), 0);\n  EXPECT_EQ(write_count.load(), 0);\n}\n\n// Test 4: Ensure writer gets the lock after readers finish\nTEST_F(MRMWMutexTest, WriterAfterReaders) {\n  std::atomic<size_t> read_count(0);\n  std::atomic<size_t> write_count(0);\n\n  // Start multiple readers\n  const int num_readers = 10;\n  std::vector<util::fb2::Fiber> readers;\n  readers.reserve(num_readers);\n\n  for (int i = 0; i < num_readers; ++i) {\n    readers.emplace_back(pp_->at(0)->LaunchFiber(util::fb2::Launch::post, [&] {\n      ReadTask(&mutex_, std::ref(read_count), kReadTaskSleepTime);\n    }));\n  }\n\n  // Wait for all readers to acquire and release the lock\n  for (auto& t : readers) {\n    t.Join();\n  }\n\n  // Start the writer after all readers are done\n  pp_->at(1)\n      ->LaunchFiber(util::fb2::Launch::post,\n                    [&] { WriteTask(&mutex_, std::ref(write_count), kWriteTaskSleepTime); })\n      .Join();\n\n  EXPECT_EQ(read_count.load(), 0);\n  EXPECT_EQ(write_count.load(), 0);\n}\n\nTEST_F(MRMWMutexTest, MixWritersReadersOnDifferentFibers) {\n  std::atomic<size_t> read_count(0);\n  std::atomic<size_t> write_count(0);\n\n  // Start multiple readers and writers\n  const int num_threads = 100;\n  std::vector<util::fb2::Fiber> threads;\n  threads.reserve(num_threads);\n\n  for (int i = 0; i < num_threads; ++i) {\n    if (rand() % 3) {\n      threads.emplace_back(pp_->at(0)->LaunchFiber(util::fb2::Launch::post, [&] {\n        ReadTask(&mutex_, std::ref(read_count), kReadTaskSleepTime);\n      }));\n    } else {\n      threads.emplace_back(pp_->at(1)->LaunchFiber(util::fb2::Launch::post, [&] {\n        WriteTask(&mutex_, std::ref(write_count), kWriteTaskSleepTime);\n      }));\n    }\n  }\n\n  // Wait for all readers to acquire and release the lock\n  for (auto& t : threads) {\n    t.Join();\n  }\n}\n\n// TODO: Once we have fiber locking we can test scenario where we write/read on same fibers\n// current implementation block thread so it is not possible to test this for now.\n\n// Test 6: Mix of readers and writes on random fibers\n// TEST_F(MRMWMutexTest, MixWritersReadersOnFibers) {\n//   std::atomic<size_t> read_count(0);\n//   std::atomic<size_t> write_count(0);\n\n//   // Start multiple readers and writers\n//   const int num_threads = 100;\n//   std::vector<util::fb2::Fiber> threads;\n//   threads.reserve(num_threads + 1);\n\n//   // Add long read task that will block all write tasks\n//   threads.emplace_back(\n//       pp_->at(0)->LaunchFiber([&] { ReadTask(&mutex_, std::ref(read_count), 2000); }));\n\n//   // Give long writer time to acquire the lock\n//   util::ThisFiber::SleepFor(std::chrono::milliseconds(100));\n\n//   size_t write_threads = 0;\n//   for (int i = 0; i < num_threads; ++i) {\n//     size_t fiber_id = rand() % 2;\n//     if (rand() % 3) {\n//       threads.emplace_back(pp_->at(fiber_id)->LaunchFiber(util::fb2::Launch::post, [&] {\n//         ReadTask(&mutex_, std::ref(read_count), kReadTaskSleepTime);\n//       }));\n//     } else {\n//       write_threads++;\n//       threads.emplace_back(pp_->at(fiber_id)->LaunchFiber(util::fb2::Launch::post, [&] {\n//         WriteTask(&mutex_, std::ref(write_count), kWriteTaskSleepTime);\n//       }));\n//     }\n//   }\n\n//   // All shorter threads should be done and only long one remains\n//   util::ThisFiber::SleepFor(std::chrono::milliseconds(500));\n\n//   EXPECT_EQ(read_count.load(), 1);\n\n//   EXPECT_EQ(write_count.load(), write_threads);\n\n//   // Wait for all readers to acquire and release the lock\n//   for (auto& t : threads) {\n//     t.Join();\n//   }\n// }\n\nTEST_F(MRMWMutexTest, IsReadLockedReflectsState) {\n  // Initially no lock is held.\n  EXPECT_FALSE(mutex_.IsReadLocked());\n\n  // Acquire a read lock and verify.\n  mutex_.Lock(MRMWMutex::LockMode::kReadLock);\n  EXPECT_TRUE(mutex_.IsReadLocked());\n\n  // A second concurrent reader should still report read-locked.\n  mutex_.Lock(MRMWMutex::LockMode::kReadLock);\n  EXPECT_TRUE(mutex_.IsReadLocked());\n\n  // Release one reader — still locked by the other.\n  mutex_.Unlock(MRMWMutex::LockMode::kReadLock);\n  EXPECT_TRUE(mutex_.IsReadLocked());\n\n  // Release the last reader.\n  mutex_.Unlock(MRMWMutex::LockMode::kReadLock);\n  EXPECT_FALSE(mutex_.IsReadLocked());\n}\n\nTEST_F(MRMWMutexTest, IsReadLockedFalseUnderWriteLock) {\n  mutex_.Lock(MRMWMutex::LockMode::kWriteLock);\n  EXPECT_FALSE(mutex_.IsReadLocked());\n  mutex_.Unlock(MRMWMutex::LockMode::kWriteLock);\n}\n\nTEST_F(MRMWMutexTest, TryLockSucceedsWhenFree) {\n  // TryLock on a free mutex should succeed for both modes.\n  EXPECT_TRUE(mutex_.TryLock(MRMWMutex::LockMode::kReadLock));\n  mutex_.Unlock(MRMWMutex::LockMode::kReadLock);\n\n  EXPECT_TRUE(mutex_.TryLock(MRMWMutex::LockMode::kWriteLock));\n  mutex_.Unlock(MRMWMutex::LockMode::kWriteLock);\n}\n\nTEST_F(MRMWMutexTest, TryLockFailsOnConflict) {\n  // Hold a read lock, then try-lock for write should fail.\n  mutex_.Lock(MRMWMutex::LockMode::kReadLock);\n  EXPECT_FALSE(mutex_.TryLock(MRMWMutex::LockMode::kWriteLock));\n  mutex_.Unlock(MRMWMutex::LockMode::kReadLock);\n\n  // Hold a write lock, then try-lock for read should fail.\n  mutex_.Lock(MRMWMutex::LockMode::kWriteLock);\n  EXPECT_FALSE(mutex_.TryLock(MRMWMutex::LockMode::kReadLock));\n  mutex_.Unlock(MRMWMutex::LockMode::kWriteLock);\n}\n\nTEST_F(MRMWMutexTest, TryLockSucceedsForSameMode) {\n  // Multiple readers via TryLock should all succeed.\n  mutex_.Lock(MRMWMutex::LockMode::kReadLock);\n  EXPECT_TRUE(mutex_.TryLock(MRMWMutex::LockMode::kReadLock));\n  mutex_.Unlock(MRMWMutex::LockMode::kReadLock);\n  mutex_.Unlock(MRMWMutex::LockMode::kReadLock);\n\n  // Multiple writers via TryLock should all succeed.\n  mutex_.Lock(MRMWMutex::LockMode::kWriteLock);\n  EXPECT_TRUE(mutex_.TryLock(MRMWMutex::LockMode::kWriteLock));\n  mutex_.Unlock(MRMWMutex::LockMode::kWriteLock);\n  mutex_.Unlock(MRMWMutex::LockMode::kWriteLock);\n}\n\nTEST_F(MRMWMutexTest, MRMWMutexLockTryLockSemantics) {\n  // Hold a read lock, then try a MRMWMutexLock for write — should not be locked.\n  MRMWMutexLock read_lock(&mutex_, MRMWMutex::LockMode::kReadLock);\n  MRMWMutexLock try_write(&mutex_, MRMWMutex::LockMode::kWriteLock, std::try_to_lock);\n  EXPECT_FALSE(try_write.locked());\n\n  // Same-mode try-lock via RAII should succeed.\n  MRMWMutexLock try_read(&mutex_, MRMWMutex::LockMode::kReadLock, std::try_to_lock);\n  EXPECT_TRUE(try_read.locked());\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/parser.y",
    "content": "%skeleton \"lalr1.cc\" // -*- C++ -*-\n%require \"3.5\"  // fedora 32 has this one.\n\n%defines  // %header starts from 3.8.1\n\n%define api.namespace {dfly::search}\n\n%define api.token.raw\n%define api.token.constructor\n%define api.value.type variant\n%define api.parser.class {Parser}\n%define parse.assert\n%define api.value.automove true\n\n// Added to header file before parser declaration.\n%code requires {\n  #include \"core/search/ast_expr.h\"\n\n  namespace dfly {\n  namespace search {\n    class QueryDriver;\n  }\n  }\n}\n\n// Added to cc file\n%code {\n#include <absl/strings/ascii.h>\n#include \"core/search/query_driver.h\"\n#include \"core/search/vector_utils.h\"\n\n#define yylex driver->scanner()->Lex\n\nusing namespace std;\n\nuint32_t toUint32(string_view src);\ndouble toDouble(string_view src);\n\n}\n\n%parse-param { QueryDriver *driver  }\n\n%locations\n\n%define parse.trace\n%define parse.error verbose  // detailed\n%define parse.lac full\n%define api.token.prefix {TOK_}\n\n%token\n  LPAREN      \"(\"\n  RPAREN      \")\"\n  STAR        \"*\"\n  ARROW       \"=>\"\n  COLON       \":\"\n  LBRACKET    \"[\"\n  RBRACKET    \"]\"\n  LCURLBR     \"{\"\n  RCURLBR     \"}\"\n  OR_OP       \"|\"\n  COMMA       \",\"\n  KNN         \"KNN\"\n  AS          \"AS\"\n  EF_RUNTIME  \"EF_RUNTIME\"\n  VECTOR_RANGE      \"VECTOR_RANGE\"\n  YIELD_DISTANCE_AS \"$YIELD_DISTANCE_AS\"\n;\n\n%token AND_OP\n\n// Needed 0 at the end to satisfy bison 3.5.1\n%token YYEOF 0\n%token <std::string> TERM \"term\" TAG_VAL \"tag_val\" PARAM \"param\" FIELD \"field\" PREFIX \"prefix\" SUFFIX \"suffix\" INFIX \"infix\"\n\n%precedence TERM TAG_VAL\n%left OR_OP\n%left AND_OP\n%right NOT_OP\n%precedence LPAREN RPAREN\n\n%token <std::string> DOUBLE \"double\"\n%token <std::string> UINT32 \"uint32\"\n%nterm <AstExpr> final_query filter star_expr search_expr search_unary_expr search_or_expr search_and_expr bracket_filter_expr\n%nterm <AstExpr> field_cond field_cond_expr field_unary_expr field_or_expr field_and_expr tag_list\n%nterm <AstTagsNode::TagValueProxy> tag_list_element\n\n%nterm <AstKnnNode> knn_query\n%nterm <std::string> opt_knn_alias\n%nterm <std::string> geounit\n%nterm <std::optional<size_t>> opt_ef_runtime\n%nterm <AstVectorRangeNode> vector_range_query\n%nterm <double> vec_range_radius\n\n%printer { yyo << $$; } <*>;\n\n%%\n\nfinal_query:\n  filter\n      { driver->Set(std::move($1)); }\n  | filter ARROW knn_query\n      { driver->Set(AstKnnNode(std::move($1), std::move($3))); }\n  | vector_range_query\n      { driver->Set(std::move($1)); }\n\nknn_query:\n  LBRACKET KNN UINT32 FIELD TERM opt_ef_runtime opt_knn_alias RBRACKET\n    {\n      // Accept any string as vector - validation happens later during search execution\n      uint32_t knn_count = toUint32($3);\n      auto field = std::move($4);\n      auto alias = std::move($7);\n      auto ef = $6;\n\n      auto vec_result = BytesToFtVectorSafe($5);\n      if (!vec_result) {\n        // Create empty vector for invalid data - will return empty results during search\n        auto empty_vec = std::make_unique<float[]>(0);\n        $$ = AstKnnNode(knn_count, std::move(field), std::make_pair(std::move(empty_vec), size_t{0}), std::move(alias), ef);\n      } else {\n        $$ = AstKnnNode(knn_count, std::move(field), std::move(*vec_result), std::move(alias), ef);\n      }\n    }\n\nopt_knn_alias:\n  AS TERM { $$ = std::move($2); }\n  | { $$ = std::string{}; }\n\nopt_ef_runtime:\n  /* empty */ { $$ = std::nullopt; }\n  | EF_RUNTIME UINT32 { $$ = toUint32($2); }\n\nvector_range_query:\n  FIELD COLON LBRACKET VECTOR_RANGE vec_range_radius TERM RBRACKET ARROW LCURLBR YIELD_DISTANCE_AS COLON TERM RCURLBR\n    {\n      double radius = $5;\n      auto field = std::move($1);\n      auto alias = std::move($12);\n      auto vec_result = BytesToFtVectorSafe($6);\n      if (!vec_result) {\n        auto empty_vec = std::make_unique<float[]>(0);\n        $$ = AstVectorRangeNode(std::move(field), radius,\n                                {std::move(empty_vec), size_t{0}}, std::move(alias));\n      } else {\n        $$ = AstVectorRangeNode(std::move(field), radius, std::move(*vec_result),\n                                std::move(alias));\n      }\n    }\n\nvec_range_radius:\n  DOUBLE  { $$ = toDouble($1); }\n  | UINT32 { $$ = static_cast<double>(toUint32($1)); }\n  | TERM   { double v = 0; if (!absl::SimpleAtod($1, &v)) YYABORT; $$ = v; }\n\nfilter:\n  search_expr               { $$ = std::move($1); }\n  | star_expr               { $$ = std::move($1); }\n\nstar_expr:\n  STAR                      { $$ = AstStarNode(); }\n  | LPAREN star_expr RPAREN { $$ = std::move($2); }\n\nsearch_expr:\n  search_unary_expr         { $$ = std::move($1); }\n  | search_and_expr         { $$ = std::move($1); }\n  | search_or_expr          { $$ = std::move($1); }\n\nsearch_and_expr:\n  search_unary_expr search_unary_expr %prec AND_OP { $$ = AstLogicalNode(std::move($1), std::move($2), AstLogicalNode::AND); }\n  | search_and_expr search_unary_expr %prec AND_OP { $$ = AstLogicalNode(std::move($1), std::move($2), AstLogicalNode::AND); }\n\nsearch_or_expr:\n  search_expr OR_OP search_and_expr                { $$ = AstLogicalNode(std::move($1), std::move($3), AstLogicalNode::OR); }\n  | search_expr OR_OP search_unary_expr            { $$ = AstLogicalNode(std::move($1), std::move($3), AstLogicalNode::OR); }\n\nsearch_unary_expr:\n  LPAREN search_expr RPAREN           { $$ = std::move($2);                }\n  | NOT_OP search_unary_expr          { $$ = AstNegateNode(std::move($2)); }\n  | TERM                              { $$ = AstTermNode(std::move($1));   }\n  | PREFIX                            { $$ = AstPrefixNode(std::move($1)); }\n  | SUFFIX                            { $$ = AstSuffixNode(std::move($1)); }\n  | INFIX                             { $$ = AstInfixNode(std::move($1));  }\n  | UINT32                            { $$ = AstTermNode(std::move($1));   }\n  | FIELD COLON field_cond            { $$ = AstFieldNode(std::move($1), std::move($3)); }\n\nfield_cond:\n  TERM                                                  { $$ = AstTermNode(std::move($1));   }\n  | UINT32                                              { $$ = AstTermNode(std::move($1));   }\n  | STAR                                                { $$ = AstStarFieldNode();           }\n  | NOT_OP field_cond                                   { $$ = AstNegateNode(std::move($2)); }\n  | LPAREN field_cond_expr RPAREN                       { $$ = std::move($2); }\n  | LBRACKET bracket_filter_expr RBRACKET               { $$ = std::move($2); }\n  | LCURLBR tag_list RCURLBR                            { $$ = std::move($2); }\n  | PREFIX                                              { $$ = AstPrefixNode(std::move($1)); }\n  | SUFFIX                                              { $$ = AstSuffixNode(std::move($1)); }\n  | INFIX                                               { $$ = AstInfixNode(std::move($1));  }\n\nbracket_filter_expr:\n  /* Numeric filter has form [(] UINT32|DOUBLE [COMMA] [(] UINT32|DOUBLE */\n  DOUBLE DOUBLE                                { $$ = AstRangeNode(toDouble($1), false, toDouble($2), false); }\n  | LPAREN DOUBLE DOUBLE                       { $$ = AstRangeNode(toDouble($2), true, toDouble($3), false); }\n  | DOUBLE LPAREN DOUBLE                       { $$ = AstRangeNode(toDouble($1), false, toDouble($3), true); }\n  | LPAREN DOUBLE LPAREN DOUBLE                { $$ = AstRangeNode(toDouble($2), true, toDouble($4), true); }\n  | DOUBLE UINT32                              { $$ = AstRangeNode(toDouble($1), false, toUint32($2), false); }\n  | LPAREN DOUBLE UINT32                       { $$ = AstRangeNode(toDouble($2), true, toUint32($3), false); }\n  | DOUBLE LPAREN UINT32                       { $$ = AstRangeNode(toDouble($1), false, toUint32($3), true); }\n  | LPAREN DOUBLE LPAREN UINT32                { $$ = AstRangeNode(toDouble($2), true, toUint32($4), true); }\n  | UINT32 DOUBLE                              { $$ = AstRangeNode(toUint32($1), false, toDouble($2), false); }\n  | LPAREN UINT32 DOUBLE                       { $$ = AstRangeNode(toUint32($2), true, toDouble($3), false); }\n  | UINT32 LPAREN DOUBLE                       { $$ = AstRangeNode(toUint32($1), false, toDouble($3), true); }\n  | LPAREN UINT32 LPAREN DOUBLE                { $$ = AstRangeNode(toUint32($2), true, toDouble($4), true); }\n  | UINT32 UINT32                              { $$ = AstRangeNode(toUint32($1), false, toUint32($2), false); }\n  | LPAREN UINT32 UINT32                       { $$ = AstRangeNode(toUint32($2), true, toUint32($3), false); }\n  | UINT32 LPAREN UINT32                       { $$ = AstRangeNode(toUint32($1), false, toUint32($3), true); }\n  | LPAREN UINT32 LPAREN UINT32                { $$ = AstRangeNode(toUint32($2), true, toUint32($4), true); }\n  | DOUBLE COMMA DOUBLE                        { $$ = AstRangeNode(toDouble($1), false, toDouble($3), false); }\n  | DOUBLE COMMA UINT32                        { $$ = AstRangeNode(toDouble($1), false, toUint32($3), false); }\n  | UINT32 COMMA DOUBLE                        { $$ = AstRangeNode(toUint32($1), false, toDouble($3), false); }\n  | UINT32 COMMA UINT32                        { $$ = AstRangeNode(toUint32($1), false, toUint32($3), false); }\n  | LPAREN DOUBLE COMMA DOUBLE                 { $$ = AstRangeNode(toDouble($2), true, toDouble($4), false); }\n  | DOUBLE COMMA LPAREN DOUBLE                 { $$ = AstRangeNode(toDouble($1), false, toDouble($4), true); }\n  | LPAREN DOUBLE COMMA LPAREN DOUBLE          { $$ = AstRangeNode(toDouble($2), true, toDouble($5), true); }\n  | LPAREN DOUBLE COMMA UINT32                 { $$ = AstRangeNode(toDouble($2), true, toUint32($4), false); }\n  | DOUBLE COMMA LPAREN UINT32                 { $$ = AstRangeNode(toDouble($1), false, toUint32($4), true); }\n  | LPAREN DOUBLE COMMA LPAREN UINT32          { $$ = AstRangeNode(toDouble($2), true, toUint32($5), true); }\n  | LPAREN UINT32 COMMA DOUBLE                 { $$ = AstRangeNode(toUint32($2), true, toDouble($4), false); }\n  | UINT32 COMMA LPAREN DOUBLE                 { $$ = AstRangeNode(toUint32($1), false, toDouble($4), true); }\n  | LPAREN UINT32 COMMA LPAREN DOUBLE          { $$ = AstRangeNode(toUint32($2), true, toDouble($5), true); }\n  | LPAREN UINT32 COMMA UINT32                 { $$ = AstRangeNode(toUint32($2), true, toUint32($4), false); }\n  | UINT32 COMMA LPAREN UINT32                 { $$ = AstRangeNode(toUint32($1), false, toUint32($4), true); }\n  | LPAREN UINT32 COMMA LPAREN UINT32          { $$ = AstRangeNode(toUint32($2), true, toUint32($5), true); }\n  /* GEO filter */\n  | DOUBLE DOUBLE UINT32 geounit               { $$ = AstGeoNode(toDouble($1), toDouble($2), toUint32($3), std::move($4)); }\n  | DOUBLE DOUBLE DOUBLE geounit               { $$ = AstGeoNode(toDouble($1), toDouble($2), toDouble($3), std::move($4)); }\n\ngeounit:\n  TERM\n  {\n    std::string unit = $1;\n    absl::AsciiStrToUpper(&unit);\n    if ((unit == \"M\") || (unit == \"KM\") || (unit == \"MI\") || (unit == \"FT\")) {\n        $$ = unit;\n    } else {\n        YYABORT;\n    }\n  }\n\nfield_cond_expr:\n  field_unary_expr { $$ = std::move($1); }\n  | field_and_expr { $$ = std::move($1); }\n  | field_or_expr  { $$ = std::move($1); }\n\nfield_and_expr:\n  field_unary_expr field_unary_expr %prec AND_OP  { $$ = AstLogicalNode(std::move($1), std::move($2), AstLogicalNode::AND); }\n  | field_and_expr field_unary_expr %prec AND_OP  { $$ = AstLogicalNode(std::move($1), std::move($2), AstLogicalNode::AND); }\n\nfield_or_expr:\n  field_cond_expr OR_OP field_unary_expr          { $$ = AstLogicalNode(std::move($1), std::move($3), AstLogicalNode::OR); }\n  | field_cond_expr OR_OP field_and_expr          { $$ = AstLogicalNode(std::move($1), std::move($3), AstLogicalNode::OR); }\n\nfield_unary_expr:\n  LPAREN field_cond_expr RPAREN { $$ = std::move($2);                }\n  | NOT_OP field_unary_expr     { $$ = AstNegateNode(std::move($2)); }\n  | TERM                        { $$ = AstTermNode(std::move($1));   }\n  | UINT32                      { $$ = AstTermNode(std::move($1));   }\n\ntag_list:\n  tag_list_element                       { $$ = AstTagsNode(std::move($1));                }\n  | tag_list OR_OP tag_list_element      { $$ = AstTagsNode(std::move($1), std::move($3)); }\n\ntag_list_element:\n  TERM        { $$ = AstTermNode(std::move($1));   }\n  | PREFIX    { $$ = AstPrefixNode(std::move($1)); }\n  | SUFFIX    { $$ = AstSuffixNode(std::move($1)); }\n  | INFIX     { $$ = AstInfixNode(std::move($1));  }\n  | UINT32    { $$ = AstTermNode(std::move($1));   }\n  | DOUBLE    { $$ = AstTermNode(std::move($1));   }\n  | TAG_VAL   { $$ = AstTermNode(std::move($1));   }\n\n\n%%\n\nvoid\ndfly::search::Parser::error(const location_type& l, const string& m)\n{\n  driver->Error(l, m);\n}\n\nstd::uint32_t toUint32(string_view str) {\n  uint32_t val = 0;\n  std::ignore = absl::SimpleAtoi(str, &val); // no need to check the result because str is parsed by regex\n  return val;\n}\n\ndouble toDouble(string_view str) {\n  double val = 0;\n  std::ignore = absl::SimpleAtod(str, &val); // no need to check the result because str is parsed by regex\n  return val;\n}\n"
  },
  {
    "path": "src/core/search/query_driver.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/query_driver.h\"\n\nnamespace dfly {\nnamespace search {\n\nQueryDriver::QueryDriver() : scanner_(std::make_unique<Scanner>()) {\n}\n\nQueryDriver::~QueryDriver() {\n}\n\nvoid QueryDriver::ResetScanner() {\n  scanner_ = std::make_unique<Scanner>();\n  scanner_->SetParams(params_);\n}\n\nvoid QueryDriver::Error(const Parser::location_type& loc, std::string_view msg) {\n  VLOG(1) << \"Parse error \" << loc << \": \" << msg;\n}\n\nvoid QueryDriver::SetOptionalFilters(const OptionalFilters* filters) {\n  if (filters) {\n    for (auto& [field, filter] : *filters) {\n      expr_ = AstLogicalNode(std::move(expr_), filter->Node(field), AstLogicalNode::AND);\n    }\n  }\n}\n\n}  // namespace search\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/search/query_driver.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n\n#include \"core/search/ast_expr.h\"\n#include \"core/search/base.h\"\n#include \"core/search/parser.hh\"\n#include \"core/search/scanner.h\"\n\nnamespace dfly {\n\nnamespace search {\n\nclass QueryDriver {\n public:\n  QueryDriver();\n  ~QueryDriver();\n\n  void SetInput(std::string str) {\n    cur_str_ = std::move(str);\n    scanner()->in(cur_str_);\n  }\n\n  void SetParams(const QueryParams* params) {\n    params_ = params;\n    scanner_->SetParams(params);\n  }\n\n  void SetOptionalFilters(const OptionalFilters* filters);\n\n  Parser::symbol_type Lex() {\n    return scanner()->Lex();\n  }\n\n  void ResetScanner();\n\n  void Set(AstExpr expr) {\n    expr_ = std::move(expr);\n  }\n\n  AstExpr Take() {\n    return std::move(expr_);\n  }\n\n  const QueryParams& GetParams() const {\n    return *params_;\n  }\n\n  Scanner* scanner() {\n    return scanner_.get();\n  }\n\n  void Error(const Parser::location_type& loc, std::string_view msg);\n\n public:\n  Parser::location_type location;\n\n private:\n  const QueryParams* params_;\n  AstExpr expr_;\n\n  std::string cur_str_;\n  std::unique_ptr<Scanner> scanner_;\n};\n\n}  // namespace search\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/search/range_tree.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/range_tree.h\"\n\nnamespace dfly::search {\n\nnamespace {\n\nstd::vector<DocId> MergeAllResults(absl::Span<const RangeTree::RangeBlock*> blocks, double l,\n                                   double r) {\n  DCHECK(blocks.size() != 1 && blocks.size() != 2);\n\n  // After the benchmarking, it is better to use inlined vector\n  // than std::priority_queue\n  absl::InlinedVector<RangeFilterIterator, 10> heap;\n  heap.reserve(blocks.size());\n\n  size_t doc_ids_count = 0;\n  for (const auto* block : blocks) {\n    auto it = MakeBegin(*block, l, r);\n    if (!it.HasReachedEnd()) {\n      heap.emplace_back(it);\n      doc_ids_count += block->Size();\n    }\n  }\n\n  std::vector<DocId> result;\n  result.reserve(doc_ids_count);\n\n  size_t size = heap.size();\n  while (size) {\n    DCHECK(!heap[0].HasReachedEnd());\n\n    size_t min_doc_id_index = 0;\n    for (size_t i = 1; i < size; ++i) {\n      DCHECK(!heap[i].HasReachedEnd());\n\n      if (*heap[i] < *heap[min_doc_id_index]) {\n        min_doc_id_index = i;\n      }\n    }\n\n    auto& it = heap[min_doc_id_index];\n    result.push_back(*it);\n    ++it;\n\n    if (it.HasReachedEnd()) {\n      // If we reached the end of the current block, remove it from the heap\n      std::swap(heap[min_doc_id_index], heap[size - 1]);\n      --size;\n    }\n  }\n\n  DCHECK(std::is_sorted(result.begin(), result.end()));\n  return result;\n}\n\ntemplate <typename MapT> auto FindRangeBlockImpl(MapT& entries, double value) {\n  DCHECK(!entries.empty());\n\n  auto it = entries.lower_bound(value);\n  if (it != entries.begin() && (it == entries.end() || it->first > value)) {\n    // TODO: remove this, we do log N here\n    // we can use negative left bouding to find the block\n    --it;  // Move to the block that contains the value\n  }\n\n  DCHECK(it != entries.end() && it->first <= value);\n  return it;\n}\n\n}  // namespace\n\nRangeTree::RangeTree(PMR_NS::memory_resource* mr, size_t max_range_block_size)\n    : max_range_block_size_(max_range_block_size), entries_(mr) {\n  // The tree has at least always a block with a negative infinity bound, so that any new insertion\n  // goes at least somewhere\n  CreateEmptyBlock(-std::numeric_limits<double>::infinity());\n}\n\nvoid RangeTree::Add(DocId id, double value) {\n  DCHECK(std::isfinite(value));\n\n  auto it = FindRangeBlock(value);\n  auto& [lower_bound, block] = *it;\n\n  // Don't disrupt large monovalue blocks, instead create new nextafter block\n  if (block.Size() >= max_range_block_size_ && lower_bound == block.max_seen /* monovalue */ &&\n      value != lower_bound /* but new value is different*/\n  ) {\n    // We use nextafter as the lower bound to \"catch\" all other possible inserts into the block,\n    // as a decreasing `value` sequence would otherwise create lots of single-value blocks\n    double lb2 = std::nextafter(lower_bound, std::numeric_limits<double>::infinity());\n    CreateEmptyBlock(lb2)->second.Insert({id, value});\n    return;\n  }\n\n  auto insert_result = block.Insert({id, value});\n  LOG_IF(ERROR, !insert_result) << \"RangeTree: Failed to insert id: \" << id << \", value: \" << value;\n\n  // Small block or large monovalue block, not reducable by splitting\n  if (block.Size() <= max_range_block_size_ || lower_bound == block.max_seen)\n    return;\n\n  SplitBlock(it);\n}\n\nvoid RangeTree::Remove(DocId id, double value) {\n  DCHECK(std::isfinite(value));\n\n  auto it = FindRangeBlock(value);\n  RangeBlock& block = it->second;\n\n  auto remove_result = block.Remove({id, value});\n  LOG_IF(ERROR, !remove_result) << \"RangeTree: Failed to remove id: \" << id << \", value: \" << value;\n\n  // Merge with left block if both are relatively small and won't be forced to split soon\n  if (block.size() < max_range_block_size_ / 4 && it != entries_.begin()) {\n    auto lit = it;\n    --lit;\n\n    auto& lblock = lit->second;\n    if (block.Size() + lblock.Size() < max_range_block_size_ / 2) {\n      for (auto e : block)\n        lblock.Insert(e);\n      entries_.erase(it);\n      stats_.merges++;\n    }\n  }\n}\n\nRangeResult RangeTree::Range(double l, double r) const {\n  return {RangeBlocks(l, r), l, r};\n}\n\nabsl::InlinedVector<const RangeTree::RangeBlock*, 5> RangeTree::RangeBlocks(double l,\n                                                                            double r) const {\n  DCHECK(l <= r);\n\n  auto it_l = FindRangeBlock(l);\n  auto it_r = FindRangeBlock(r);\n\n  absl::InlinedVector<const RangeBlock*, 5> blocks;\n  for (auto it = it_l;; ++it) {\n    blocks.push_back(&it->second);\n    if (it == it_r) {\n      break;\n    }\n  }\n\n  DCHECK(!blocks.empty());\n  return blocks;\n}\n\nRangeResult RangeTree::GetAllDocIds() const {\n  return RangeResult{GetAllBlocks()};\n}\n\nabsl::InlinedVector<const RangeTree::RangeBlock*, 5> RangeTree::GetAllBlocks() const {\n  absl::InlinedVector<const RangeBlock*, 5> blocks;\n  blocks.reserve(entries_.size());\n\n  for (const auto& entry : entries_) {\n    blocks.push_back(&entry.second);\n  }\n\n  return blocks;\n}\n\nRangeTree::Map::iterator RangeTree::FindRangeBlock(double value) {\n  return FindRangeBlockImpl(entries_, value);\n}\n\nRangeTree::Map::const_iterator RangeTree::FindRangeBlock(double value) const {\n  return FindRangeBlockImpl(entries_, value);\n}\n\nRangeTree::Map::iterator RangeTree::CreateEmptyBlock(double lb) {\n  return entries_\n      .emplace(std::piecewise_construct, std::forward_as_tuple(lb),\n               std::forward_as_tuple(entries_.get_allocator().resource(), max_range_block_size_))\n      .first;\n}\n\n/*\nThere is an edge case in the SplitBlock method:\nIf split_result.left.Size() == 0, it means that all values in the block\nwere equal to the median value.\nBecause split works like this:\n  - at the beginning it does not insert median values into the left or right block,\n  - then it checks if left block is smaller than right block, if so, it adds\n    median values to the left block, otherwise it adds it to the right block.\nSo if left block is empty, it means that left.Size() < right.Size() was false,\nwhat means that right.Size() was also zero.\nAfter that all median entries were added to the right block.\n\nThat means that we have equal values in the whole block,\nand their count is greater than max_range_block_size_.\nSo we will do cascade splits of the right block.\nTODO: we can optimize this case by splitting to three blocks:\n - empty left block with range [l, m),\n - middle block with range [m, std::nextafter(m, +inf)),\n - empty right block with range [std::nextafter(m, +inf), r)\n*/\nvoid RangeTree::SplitBlock(Map::iterator it) {\n  double lower_bound = it->first;\n\n  auto split_result = Split(std::move(it->second));\n\n  const double m = split_result.median;\n  DCHECK(!split_result.right.Empty());\n\n  entries_.erase(it);\n  stats_.splits++;\n\n  // Insert left block if it's not empty or if its the first one (negative inf bound)\n  if (!split_result.left.Empty() || std::isinf(lower_bound)) {\n    if (!std::isinf(lower_bound))  // keep negative inf bound\n      lower_bound = split_result.lmin;\n\n    entries_.emplace(std::piecewise_construct, std::forward_as_tuple(lower_bound),\n                     std::forward_as_tuple(std::move(split_result.left), split_result.lmax));\n  }\n\n  entries_.emplace(std::piecewise_construct, std::forward_as_tuple(m),\n                   std::forward_as_tuple(std::move(split_result.right), split_result.rmax));\n\n  DCHECK(TreeIsInCorrectState());\n}\n\nRangeTree::Stats RangeTree::GetStats() const {\n  return Stats{.splits = stats_.splits, .merges = stats_.merges, .block_count = entries_.size()};\n}\n\n// Used for DCHECKs to check that the tree is in a correct state.\n[[maybe_unused]] bool RangeTree::TreeIsInCorrectState() const {\n  if (entries_.empty()) {\n    return false;\n  }\n\n  double prev_range = entries_.begin()->first;\n  for (auto it = std::next(entries_.begin()); it != entries_.end(); ++it) {\n    const double& current_range = it->first;\n\n    // Check that ranges are non-overlapping and sorted\n    // Also there can not be gaps between ranges\n    if (prev_range >= current_range) {\n      return false;\n    }\n\n    prev_range = current_range;\n  }\n\n  return true;\n}\n\nRangeResult::RangeResult(std::vector<DocId> doc_ids) : result_(std::move(doc_ids)) {\n}\n\nRangeResult::RangeResult(absl::InlinedVector<RangeBlockPointer, 5> blocks)\n    : RangeResult(std::move(blocks), -std::numeric_limits<double>::infinity(),\n                  std::numeric_limits<double>::infinity()) {\n}\n\nRangeResult::RangeResult(absl::InlinedVector<RangeBlockPointer, 5> blocks, double l, double r) {\n  if (blocks.size() == 1) {\n    result_ = SingleBlockRangeResult(blocks[0], l, r);\n  } else if (blocks.size() == 2) {\n    result_ = TwoBlocksRangeResult(blocks[0], blocks[1], l, r);\n  } else {\n    result_ = MergeAllResults(absl::MakeSpan(blocks), l, r);\n  }\n}\n\nstd::vector<DocId> RangeResult::Take() {\n  if (std::holds_alternative<DocsList>(result_)) {\n    DCHECK(std::is_sorted(std::get<DocsList>(result_).begin(), std::get<DocsList>(result_).end()));\n    return std::get<DocsList>(std::move(result_));\n  }\n\n  auto cb = [](const auto& v) {\n    std::vector<DocId> result;\n    result.reserve(v.size());\n    std::copy(v.begin(), v.end(), std::back_inserter(result));\n    DCHECK(std::is_sorted(result.begin(), result.end()));\n    return result;\n  };\n\n  return std::visit(cb, result_);\n}\n\nvoid RangeTree::Builder::Add(DocId id, double value) {\n  bool inserted = updates_.emplace(id, value).second;\n  DCHECK(inserted);\n}\n\nvoid RangeTree::Builder::Remove(DocId id, double value) {\n  if (!updates_.erase({id, value}))\n    delayed_erased_.emplace(id, value);\n}\n\nvoid RangeTree::Builder::Populate(RangeTree* tree, const RenewableQuota& quota) {\n  // Sort all elements by value\n  std::vector<Entry> sorted_entries(updates_.begin(), updates_.end());\n  std::ranges::sort(sorted_entries, {}, &Entry::second);\n  updates_.clear();\n\n  quota.Check();  // TODO: sort might take a long time\n\n  // Add sorted elements in batches\n  size_t max_size = tree->max_range_block_size_;\n  RangeBlock* block = &tree->entries_.begin()->second;\n  for (size_t idx = 0; idx < sorted_entries.size();) {\n    // Create new block for each insertion batch (first goes into only first block)\n    if (idx)\n      block = &tree->CreateEmptyBlock(sorted_entries[idx].second)->second;\n\n    // Insert until we filled a block and a new value started (equal value must be in same block)\n    while (idx < sorted_entries.size()) {\n      if (block->Size() >= max_size && sorted_entries[idx - 1].second != sorted_entries[idx].second)\n        break;\n\n      block->Insert(sorted_entries[idx]);\n      idx++;\n\n      // If we filled a new multiple of the block size due to equal entries, check quota\n      if ((block->Size() - 1) / max_size != block->Size() / max_size)\n        quota.Check();\n    }\n\n    quota.Check();  // Yield if needed\n  }\n\n  // Update entries accumulated during yields in batches while respecting quota.\n  // Last loop is atomic (without quota checks) to ensure consistency\n  size_t iterations = 3;\n  while (iterations--) {\n    // Take updates to allow new ones during suspensions\n    auto stolen_erased = std::move(delayed_erased_);\n    auto stolen_updates = std::move(updates_);\n    delayed_erased_.clear();\n    updates_.clear();\n\n    auto check_quota = [&, ops = size_t(0)]() mutable {\n      ops++;\n      if (iterations && ops / max_size != (ops + 1) / max_size)\n        quota.Check();\n    };\n\n    for (auto [id, v] : stolen_erased) {\n      tree->Remove(id, v);\n      check_quota();\n    }\n\n    for (auto [id, v] : stolen_updates) {\n      tree->Add(id, v);\n      check_quota();\n    }\n  }\n\n  // Because last iteration was atomic\n  DCHECK(updates_.empty());\n  DCHECK(delayed_erased_.empty());\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/range_tree.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/btree_map.h>\n#include <absl/container/flat_hash_set.h>\n\n#include <memory>\n#include <queue>\n#include <vector>\n\n#include \"base/pmr/memory_resource.h\"\n#include \"core/search/base.h\"\n#include \"core/search/block_list.h\"\n#include \"core/search/renewable_quota.h\"\n\nnamespace dfly::search {\nclass RangeResult;\n\n/* RangeTree is an index structure for numeric fields that allows efficient range queries.\n   It maps disjoint numeric ranges (e.g., [0, 5), [5, 10), [10, 15), ...) to sorted sets of document\n   IDs.\n\n   Internally, it uses absl::btree_map<std::pair<double, double>, RangeBlock>, where each key\n   represents a numeric value range, and the corresponding RangeBlock (similar to std::vector)\n   stores (DocId, value) pairs, sorted by DocId.\n\n   The parameter `max_range_block_size_` defines the maximum number of entries in a single\n   RangeBlock. When a block exceeds this limit, it is split into two to maintain balanced\n   performance.\n*/\nclass RangeTree {\n public:\n  friend class RangeResult;\n  using Entry = std::pair<DocId, double>;\n\n  // More efficient builder for range tree where updates are batched\n  // and then applied in an optimized order inside Populate.\n  struct Builder {\n    void Add(DocId id, double value);\n    void Remove(DocId id, double value);\n\n    // Build tree from batched updates. Accepts new updates during suspensions.\n    void Populate(RangeTree* tree, const RenewableQuota& quota);\n\n   private:\n    absl::flat_hash_set<Entry> updates_, delayed_erased_;\n  };\n\n  // Main node of numeric tree\n  struct RangeBlock : public BlockList<SortedVector<Entry>> {\n    template <typename... Ts>\n    explicit RangeBlock(PMR_NS::memory_resource* mr, Ts... ts) : BlockList{mr, ts...} {\n    }\n\n    RangeBlock(BlockList<SortedVector<Entry>>&& bs, double maxv)\n        : BlockList{std::move(bs)}, max_seen{maxv} {\n    }\n\n    bool Insert(Entry e) {\n      max_seen = std::max(max_seen, e.second);\n      return BlockList::Insert(e);\n    }\n\n    // Max value seen, might be not present anymore\n    double max_seen = -std::numeric_limits<double>::infinity();\n  };\n\n  static constexpr size_t kDefaultMaxRangeBlockSize = 10'000;\n\n  explicit RangeTree(PMR_NS::memory_resource* mr,\n                     size_t max_range_block_size = kDefaultMaxRangeBlockSize);\n\n  // Adds a document with a value to the index.\n  void Add(DocId id, double value);\n\n  // Removes a document with a value from the index.\n  void Remove(DocId id, double value);\n\n  // Returns all documents with values in the range [l, r].\n  RangeResult Range(double l, double r) const;\n  // Same as Range, but returns the blocks that contain the results.\n  absl::InlinedVector<const RangeBlock*, 5> RangeBlocks(double l, double r) const;\n\n  RangeResult GetAllDocIds() const;\n  // Returns all blocks in the tree.\n  absl::InlinedVector<const RangeBlock*, 5> GetAllBlocks() const;\n\n  struct Stats {\n    size_t splits = 0;\n    size_t merges = 0;\n    size_t block_count = 0;\n  };\n\n  Stats GetStats() const;\n\n private:\n  using Map = absl::btree_map<double, RangeBlock, std::less<>,\n                              PMR_NS::polymorphic_allocator<std::pair<double, RangeBlock>>>;\n\n  Map::iterator FindRangeBlock(double value);\n  Map::const_iterator FindRangeBlock(double value) const;\n\n  Map::iterator CreateEmptyBlock(double lb);\n  void SplitBlock(Map::iterator it);\n\n  // Used for DCHECKs\n  bool TreeIsInCorrectState() const;\n\n private:\n  // The maximum size of a range block. If a block exceeds this size, it will be split\n  size_t max_range_block_size_;\n  Map entries_;\n\n  struct {\n    size_t splits = 0;\n    size_t merges = 0;\n  } stats_;\n};\n\n/* This iterator filters out entries that are not in the range [l, r].\n   It is used to iterate over the RangeBlock and return only the entries\n   that are within the specified range.\n   The iterator is initialized with a range [l, r] and will skip entries\n   that are outside this range. */\nclass RangeFilterIterator : public SeekableTag {\n private:\n  static constexpr DocId kInvalidDocId = std::numeric_limits<DocId>::max();\n\n  using RangeBlock = RangeTree::RangeBlock;\n  using BaseIterator = RangeBlock::BlockListIterator;\n\n public:\n  using iterator_category = BaseIterator::iterator_category;\n  using difference_type = BaseIterator::difference_type;\n  using value_type = DocId;\n  using pointer = value_type*;\n  using reference = value_type&;\n\n  RangeFilterIterator(BaseIterator begin, BaseIterator end, double l, double r);\n\n  value_type operator*() const;\n\n  RangeFilterIterator& operator++();\n\n  void SeekGE(DocId min_doc_id);\n\n  bool operator==(const RangeFilterIterator& other) const;\n  bool operator!=(const RangeFilterIterator& other) const;\n\n  bool HasReachedEnd() const;\n\n private:\n  void SkipInvalidEntries(DocId last_id);\n\n  bool InRange(BaseIterator it) const;\n\n  double l_, r_;\n  BaseIterator current_, end_;\n};\n\nRangeFilterIterator MakeBegin(const RangeTree::RangeBlock& block, double l, double r);\nRangeFilterIterator MakeEnd(const RangeTree::RangeBlock& block, double l, double r);\n\n/* Separate class for merging results from a single RangeBlock.\n   It provides an iterator interface to iterate over the entries in the block\n   that are within the specified range [l, r].\n   This is used when the result of a range query is contained within a single block.\n\n   It is needed to avoid unnecessary complexity in the RangeResult class,\n   which can handle both single and multiple blocks.\n   It provides better performance and clarity when dealing with single block results. */\nclass SingleBlockRangeResult {\n public:\n  SingleBlockRangeResult(const RangeTree::RangeBlock* block, double l, double r);\n\n  RangeFilterIterator begin() const;\n  RangeFilterIterator end() const;\n\n  size_t size() const;\n\n private:\n  double l_;\n  double r_;\n  const RangeTree::RangeBlock* block_ = nullptr;\n};\n\n/* Separate class for merging results from two RangeBlocks.\n   It provides an iterator interface to iterate over the entries in both blocks\n   that are within the specified range [l, r].\n   It automatically merges the results from both blocks and provides a unified view.\n   This is used when the result of a range query spans two blocks.\n\n   It provides a more efficient way to handle results that span multiple blocks,\n   avoiding unnecessary complexity in the RangeResult class.\n   TODO: Implement efficient merging for more than two blocks and remove this class. */\nclass TwoBlocksRangeResult {\n public:\n  TwoBlocksRangeResult(const RangeTree::RangeBlock* left_block,\n                       const RangeTree::RangeBlock* right_block, double l, double r);\n\n  size_t size() const;\n\n  class MergingIterator : public SeekableTag {\n   private:\n    static constexpr DocId kInvalidDocId = std::numeric_limits<DocId>::max();\n\n   public:\n    using iterator_category = RangeFilterIterator::iterator_category;\n    using difference_type = RangeFilterIterator::difference_type;\n    using value_type = RangeFilterIterator::value_type;\n    using pointer = RangeFilterIterator::pointer;\n    using reference = RangeFilterIterator::reference;\n\n    MergingIterator(RangeFilterIterator l, RangeFilterIterator r);\n\n    value_type operator*() const;\n\n    MergingIterator& operator++();\n\n    void SeekGE(DocId min_doc_id);\n\n    bool operator==(const MergingIterator& other) const;\n    bool operator!=(const MergingIterator& other) const;\n\n   private:\n    void InitializeMin();\n\n    DocId current_min_ = kInvalidDocId;\n    RangeFilterIterator l_;\n    RangeFilterIterator r_;\n  };\n\n  MergingIterator begin() const;\n  MergingIterator end() const;\n\n private:\n  double l_;\n  double r_;\n  const RangeTree::RangeBlock* left_block_ = nullptr;\n  const RangeTree::RangeBlock* right_block_ = nullptr;\n};\n\n/* Represent the result of a range query on the RangeTree.\n   It can contain results from a single block, two blocks, or several blocks.\n   Several blocks are merged into a single result, which is represented by\n   vector<DocId>.\n\n   TODO: Implement efficient merging for more than two blocks */\nclass RangeResult {\n private:\n  using RangeBlockPointer = const RangeTree::RangeBlock*;\n  using RangeBlockIterator = RangeTree::RangeBlock::BlockListIterator;\n\n  using DocsList = std::vector<DocId>;\n  using Variant = std::variant<DocsList, SingleBlockRangeResult, TwoBlocksRangeResult>;\n\n public:\n  RangeResult() = default;\n\n  explicit RangeResult(std::vector<DocId> doc_ids);\n  explicit RangeResult(absl::InlinedVector<RangeBlockPointer, 5> blocks);\n  RangeResult(absl::InlinedVector<RangeBlockPointer, 5> blocks, double l, double r);\n\n  std::vector<DocId> Take();\n\n  Variant& GetResult();\n  const Variant& GetResult() const;\n\n private:\n  Variant result_;\n};\n\n// Implementation\n/******************************************************************/\ninline RangeFilterIterator::RangeFilterIterator(BaseIterator begin, BaseIterator end, double l,\n                                                double r)\n    : l_(l), r_(r), current_(begin), end_(end) {\n  SkipInvalidEntries(kInvalidDocId);\n}\n\ninline RangeFilterIterator::value_type RangeFilterIterator::operator*() const {\n  return (*current_).first;\n}\n\ninline RangeFilterIterator& RangeFilterIterator::operator++() {\n  const DocId last_id = (*current_).first;\n  ++current_;\n  SkipInvalidEntries(last_id);\n  return *this;\n}\n\ninline void RangeFilterIterator::SeekGE(DocId min_doc_id) {\n  current_.SeekGE(min_doc_id);\n  while (current_ != end_ && !InRange(current_)) {\n    DCHECK((*current_).first >= min_doc_id);\n    ++current_;\n  }\n}\n\ninline bool RangeFilterIterator::operator==(const RangeFilterIterator& other) const {\n  return current_ == other.current_;\n}\n\ninline bool RangeFilterIterator::operator!=(const RangeFilterIterator& other) const {\n  return current_ != other.current_;\n}\n\ninline bool RangeFilterIterator::HasReachedEnd() const {\n  return current_ == end_;\n}\n\ninline void RangeFilterIterator::SkipInvalidEntries(DocId last_id) {\n  // Faster than using std::find_if\n  while (current_ != end_ && (!InRange(current_) || (*current_).first == last_id)) {\n    ++current_;\n  }\n}\n\ninline bool RangeFilterIterator::InRange(BaseIterator it) const {\n  return l_ <= (*it).second && (*it).second <= r_;\n}\n\ninline RangeFilterIterator MakeBegin(const RangeTree::RangeBlock& block, double l, double r) {\n  return {block.begin(), block.end(), l, r};\n}\n\ninline RangeFilterIterator MakeEnd(const RangeTree::RangeBlock& block, double l, double r) {\n  return {block.end(), block.end(), l, r};\n}\n\ninline SingleBlockRangeResult::SingleBlockRangeResult(const RangeTree::RangeBlock* block, double l,\n                                                      double r)\n    : l_(l), r_(r), block_(block) {\n  DCHECK(block_ != nullptr);\n}\n\ninline RangeFilterIterator SingleBlockRangeResult::begin() const {\n  return MakeBegin(*block_, l_, r_);\n}\n\ninline RangeFilterIterator SingleBlockRangeResult::end() const {\n  return MakeEnd(*block_, l_, r_);\n}\n\ninline size_t SingleBlockRangeResult::size() const {\n  return block_->Size();\n}\n\ninline TwoBlocksRangeResult::TwoBlocksRangeResult(const RangeTree::RangeBlock* left_block,\n                                                  const RangeTree::RangeBlock* right_block,\n                                                  double l, double r)\n    : l_(l), r_(r), left_block_(left_block), right_block_(right_block) {\n  DCHECK(left_block_ != nullptr);\n  DCHECK(right_block_ != nullptr);\n}\n\ninline size_t TwoBlocksRangeResult::size() const {\n  return left_block_->Size() + right_block_->Size();\n}\n\ninline TwoBlocksRangeResult::MergingIterator::MergingIterator(RangeFilterIterator l,\n                                                              RangeFilterIterator r)\n    : l_(std::move(l)), r_(std::move(r)) {\n  InitializeMin();\n}\n\ninline TwoBlocksRangeResult::MergingIterator::value_type\nTwoBlocksRangeResult::MergingIterator::operator*() const {\n  return current_min_;\n}\n\ninline TwoBlocksRangeResult::MergingIterator& TwoBlocksRangeResult::MergingIterator::operator++() {\n  auto increase_iterator = [&](RangeFilterIterator& it) {\n    ++it;\n    current_min_ = !it.HasReachedEnd() ? *it : std::numeric_limits<DocId>::max();\n  };\n\n  if (l_.HasReachedEnd()) {\n    increase_iterator(r_);\n  } else if (r_.HasReachedEnd()) {\n    increase_iterator(l_);\n  } else {\n    DCHECK(!l_.HasReachedEnd() && !r_.HasReachedEnd());\n    if (*l_ == current_min_) {\n      ++l_;\n    }\n    if (*r_ == current_min_) {\n      ++r_;\n    }\n    InitializeMin();\n  }\n\n  return *this;\n}\n\ninline void TwoBlocksRangeResult::MergingIterator::SeekGE(DocId min_doc_id) {\n  l_.SeekGE(min_doc_id);\n  r_.SeekGE(min_doc_id);\n  InitializeMin();\n}\n\ninline bool TwoBlocksRangeResult::MergingIterator::operator==(\n    const TwoBlocksRangeResult::MergingIterator& other) const {\n  return l_ == other.l_ && r_ == other.r_;\n}\n\ninline bool TwoBlocksRangeResult::MergingIterator::operator!=(\n    const TwoBlocksRangeResult::MergingIterator& other) const {\n  return !(*this == other);\n}\n\ninline void TwoBlocksRangeResult::MergingIterator::InitializeMin() {\n  DocId left_value = !l_.HasReachedEnd() ? *l_ : std::numeric_limits<DocId>::max();\n  DocId right_value = !r_.HasReachedEnd() ? *r_ : std::numeric_limits<DocId>::max();\n  current_min_ = std::min(left_value, right_value);\n}\n\ninline TwoBlocksRangeResult::MergingIterator TwoBlocksRangeResult::begin() const {\n  return MergingIterator{MakeBegin(*left_block_, l_, r_), MakeBegin(*right_block_, l_, r_)};\n}\n\ninline TwoBlocksRangeResult::MergingIterator TwoBlocksRangeResult::end() const {\n  return MergingIterator{MakeEnd(*left_block_, l_, r_), MakeEnd(*right_block_, l_, r_)};\n}\n\ninline RangeResult::Variant& RangeResult::GetResult() {\n  return result_;\n}\n\ninline const RangeResult::Variant& RangeResult::GetResult() const {\n  return result_;\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/range_tree_test.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/range_tree.h\"\n\n#include <absl/random/random.h>\n#include <benchmark/benchmark.h>\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n\n#include <utility>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"util/fibers/fibers.h\"\n\nnamespace dfly::search {\n\nclass RangeTreeTest : public testing::Test {\n protected:\n};\n\nstatic constexpr double kMinRangeValue = std::numeric_limits<double>::min();\nstatic constexpr double kMaxRangeValue = std::numeric_limits<double>::max();\n\nusing Entry = std::pair<DocId, double>;\nusing BlocksList = absl::InlinedVector<const RangeTree::RangeBlock*, 5>;\n\nstd::vector<Entry> ExtractDocPairs(const BlocksList& result) {\n  std::vector<Entry> out;\n  for (const auto& block : result) {\n    for (const auto& entry : *block) {\n      out.push_back(entry);\n    }\n  }\n  return out;\n}\n\nstd::vector<std::vector<Entry>> ExtractAllBlocks(const BlocksList& result) {\n  std::vector<std::vector<Entry>> all;\n  for (const auto& block : result) {\n    std::vector<Entry> block_entries;\n    for (const auto& entry : *block) {\n      block_entries.push_back(entry);\n    }\n    all.push_back(std::move(block_entries));\n  }\n  return all;\n}\n\nMATCHER_P(UnorderedElementsAreDocPairsMatcher, expected_matchers, \"\") {\n  return testing::ExplainMatchResult(testing::UnorderedElementsAreArray(expected_matchers),\n                                     ExtractDocPairs(arg), result_listener);\n}\n\nMATCHER_P(BlocksAreMatcher, expected_blocks, \"\") {\n  std::vector<testing::Matcher<std::vector<Entry>>> matchers;\n  for (const auto& expected_entries : expected_blocks) {\n    matchers.push_back(testing::UnorderedElementsAreArray(expected_entries));\n  }\n  return testing::ExplainMatchResult(testing::ElementsAreArray(matchers), ExtractAllBlocks(arg),\n                                     result_listener);\n}\n\nauto UnorderedElementsAreDocPairs(std::vector<Entry> list) {\n  return UnorderedElementsAreDocPairsMatcher(std::move(list));\n}\n\nauto BlocksAre(std::initializer_list<std::vector<Entry>> blocks) {\n  return BlocksAreMatcher(std::vector<std::vector<Entry>>(blocks));\n}\n\nstd::vector<DocId> ExtractDocIdsFromRange(const std::vector<Entry>& entries, double l, double r) {\n  std::vector<DocId> result;\n  for (const auto& entry : entries) {\n    if (entry.second >= l && entry.second <= r) {\n      result.push_back(entry.first);\n    }\n  }\n\n  std::sort(result.begin(), result.end());\n  result.erase(std::unique(result.begin(), result.end()), result.end());\n  return result;\n}\n\nstd::vector<DocId> MergeTwoBlocksRangeResult(const RangeTree& tree, double l, double r) {\n  auto result = tree.Range(l, r).GetResult();\n  DCHECK(std::holds_alternative<TwoBlocksRangeResult>(result));\n  auto& two_blocks_result = std::get<TwoBlocksRangeResult>(result);\n  return {two_blocks_result.begin(), two_blocks_result.end()};\n}\n\nTEST_F(RangeTreeTest, AddSimple) {\n  RangeTree tree{PMR_NS::get_default_resource()};\n\n  // Add some values\n  tree.Add(1, 10.0);\n  tree.Add(2, 20.0);\n  tree.Add(2, 10.0);\n  tree.Add(3, 30.0);\n  tree.Add(4, 40.0);\n  tree.Add(4, 60.0);\n\n  auto result = tree.GetAllBlocks();\n  EXPECT_THAT(result, UnorderedElementsAreDocPairs(\n                          {{1, 10.0}, {2, 10.0}, {2, 20.0}, {3, 30.0}, {4, 40.0}, {4, 60.0}}));\n}\n\nTEST_F(RangeTreeTest, Add) {\n  RangeTree tree{PMR_NS::get_default_resource(), 2};\n\n  // Add some values\n  tree.Add(1, 10.0);\n  tree.Add(1, 20.0);\n  tree.Add(2, 20.0);\n  tree.Add(3, 20.0);\n  tree.Add(4, 30.0);\n  tree.Add(5, 30.0);\n  tree.Add(6, 30.0);\n\n  auto result = tree.RangeBlocks(10.0, 30.0);\n  EXPECT_THAT(result,\n              UnorderedElementsAreDocPairs(\n                  {{1, 10.0}, {1, 20.0}, {2, 20.0}, {3, 20.0}, {4, 30.0}, {5, 30.0}, {6, 30.0}}));\n\n  // Test that the ranges was split correctly\n  result = tree.RangeBlocks(kMinRangeValue, 19.0);\n  EXPECT_THAT(result, UnorderedElementsAreDocPairs({{1, 10.0}}));\n\n  result = tree.RangeBlocks(20.0, 29.0);\n  EXPECT_THAT(result, UnorderedElementsAreDocPairs({{1, 20.0}, {2, 20.0}, {3, 20.0}}));\n\n  result = tree.RangeBlocks(30.0, kMaxRangeValue);\n  EXPECT_THAT(result, UnorderedElementsAreDocPairs({{4, 30.0}, {5, 30.0}, {6, 30.0}}));\n}\n\nTEST_F(RangeTreeTest, RemoveSimple) {\n  RangeTree tree{PMR_NS::get_default_resource(), 2};\n\n  // Add some values\n  tree.Add(1, 10.0);\n  tree.Add(2, 20.0);\n  tree.Add(3, 30.0);\n  tree.Add(4, 40.0);\n\n  // Remove some values\n  tree.Remove(1, 10.0);\n  tree.Remove(2, 20.0);\n\n  auto result = tree.GetAllBlocks();\n  EXPECT_THAT(result, UnorderedElementsAreDocPairs({{3, 30.0}, {4, 40.0}}));\n}\n\nTEST_F(RangeTreeTest, Remove) {\n  using Container = std::vector<Entry>;\n\n  Container expected_values;\n  RangeTree tree{PMR_NS::get_default_resource(), 2};\n\n  const long long max_value = 100;\n  long long step = 23;\n  long long current_value = max_value;\n\n  auto do_add = [&](DocId i) {\n    const double value = static_cast<double>(current_value);\n    auto it = std::find(expected_values.begin(), expected_values.end(), std::make_pair(i, value));\n\n    if (it != expected_values.end()) {\n      // If the value already exists, we do not add it again\n      // The problem is that for now RangeTree does not support duplicates\n      // TODO: fix this\n      return;\n    }\n\n    // Otherwise, we add it to the expected values and to the tree\n    expected_values.emplace_back(i, value);\n    tree.Add(i, value);\n    current_value = (max_value + current_value - step) % max_value;\n  };\n\n  auto add_entries_with_step = [&](size_t step) {\n    for (size_t i = 0; i < 100; i += step) {\n      do_add(i);\n    }\n  };\n\n  auto do_remove = [&](size_t i) {\n    auto pair = expected_values[i];\n    tree.Remove(pair.first, pair.second);\n  };\n\n  auto remove_entries_with_step = [&](size_t step) {\n    Container expected_values_copy;\n    for (size_t i = 0; i < expected_values.size(); i++) {\n      if (i % step == 0) {\n        do_remove(i);\n      } else {\n        expected_values_copy.push_back(expected_values[i]);\n      }\n    }\n    expected_values = std::move(expected_values_copy);\n  };\n\n  // First wave of Add and Remove\n  add_entries_with_step(1);\n\n  step = 37;\n  current_value = max_value;\n  add_entries_with_step(3);\n\n  // Remove some values\n  remove_entries_with_step(3);\n\n  auto result = tree.GetAllBlocks();\n  EXPECT_THAT(result, UnorderedElementsAreDocPairs(expected_values));\n\n  // Second wave of Add and Remove\n  step = 31;\n  current_value = max_value;\n  add_entries_with_step(5);\n\n  // Remove a first half of the values\n  remove_entries_with_step(2);\n\n  result = tree.GetAllBlocks();\n  EXPECT_THAT(result, UnorderedElementsAreDocPairs(expected_values));\n\n  // Remove all values\n  remove_entries_with_step(1);\n\n  result = tree.GetAllBlocks();\n  EXPECT_THAT(result, UnorderedElementsAreDocPairs({}));\n}\n\nTEST_F(RangeTreeTest, RangeSimple) {\n  RangeTree tree{PMR_NS::get_default_resource(), 1};\n\n  // Add some values\n  tree.Add(1, 10.0);\n  tree.Add(1, 20.0);\n  tree.Add(2, 20.0);\n  tree.Add(2, 30.0);\n  tree.Add(3, 30.0);\n  tree.Add(3, 40.0);\n  tree.Add(4, 40.0);\n\n  auto result = tree.RangeBlocks(10.0, 10.0);\n  EXPECT_THAT(result, BlocksAre({{{1, 10.0}}}));\n\n  result = tree.RangeBlocks(20.0, 20.0);\n  EXPECT_THAT(result, BlocksAre({{{1, 20.0}, {2, 20.0}}}));\n\n  result = tree.RangeBlocks(30.0, 30.0);\n  EXPECT_THAT(result, BlocksAre({{{2, 30.0}, {3, 30.0}}}));\n\n  result = tree.RangeBlocks(40.0, 40.0);\n  EXPECT_THAT(result, BlocksAre({{{3, 40.0}, {4, 40.0}}}));\n\n  result = tree.RangeBlocks(10.0, 30.0);\n  EXPECT_THAT(result, BlocksAre({{{1, 10.0}}, {{1, 20.0}, {2, 20.0}}, {{2, 30.0}, {3, 30.0}}}));\n\n  result = tree.RangeBlocks(20.0, 40.0);\n  EXPECT_THAT(result,\n              BlocksAre({{{1, 20.0}, {2, 20.0}}, {{2, 30.0}, {3, 30.0}}, {{3, 40.0}, {4, 40.0}}}));\n\n  result = tree.RangeBlocks(10.0, 40.0);\n  EXPECT_THAT(\n      result,\n      BlocksAre(\n          {{{1, 10.0}}, {{1, 20.0}, {2, 20.0}}, {{2, 30.0}, {3, 30.0}}, {{3, 40.0}, {4, 40.0}}}));\n}\n\nTEST_F(RangeTreeTest, Range) {\n  {\n    RangeTree tree{PMR_NS::get_default_resource(), 4};\n\n    tree.Add(1, 10.0);\n    tree.Add(1, 20.0);\n    tree.Add(2, 20.0);\n    tree.Add(3, 30.0);\n    tree.Add(4, 20.0);\n    tree.Add(4, 30.0);\n\n    auto result = tree.RangeBlocks(10.0, 30.0);\n    EXPECT_THAT(\n        result,\n        BlocksAre({{{1, 10.0}}, {{1, 20.0}, {2, 20.0}, {4, 20.0}}, {{3, 30.0}, {4, 30.0}}}));\n  }\n\n  {\n    RangeTree tree{PMR_NS::get_default_resource(), 4};\n\n    tree.Add(1, 10.0);\n    tree.Add(1, 20.0);\n    tree.Add(2, 20.0);\n    tree.Add(3, 20.0);\n    tree.Add(4, 20.0);\n\n    auto result = tree.RangeBlocks(10.0, 20.0);\n    EXPECT_THAT(result, BlocksAre({{{1, 10.0}}, {{1, 20.0}, {2, 20.0}, {3, 20.0}, {4, 20.0}}}));\n  }\n\n  {\n    RangeTree tree{PMR_NS::get_default_resource(), 4};\n\n    tree.Add(1, 10.0);\n    tree.Add(2, 10.0);\n    tree.Add(3, 10.0);\n    tree.Add(4, 20.0);\n    tree.Add(4, 10.0);\n\n    auto result = tree.RangeBlocks(10.0, 20.0);\n    EXPECT_THAT(result, BlocksAre({{{1, 10.0}, {2, 10.0}, {3, 10.0}, {4, 10.0}}, {{4, 20.0}}}));\n  }\n}\n\n// Don't split single block with same value\nTEST_F(RangeTreeTest, SingleBlockSplit) {\n  RangeTree tree{PMR_NS::get_default_resource(), 4};\n\n  for (DocId id = 1; id <= 16; id++)\n    tree.Add(id, 5.0);\n\n  // One split was made to create an empty leftmost block\n  auto stats = tree.GetStats();\n  EXPECT_EQ(stats.splits, 1u);\n  EXPECT_EQ(stats.block_count, 2u);\n\n  // Add value that causes a new block to be started\n  tree.Add(20, 6.0);\n\n  stats = tree.GetStats();\n  EXPECT_EQ(stats.splits, 1u);       // detected ahead, so no split\n  EXPECT_EQ(stats.block_count, 3u);  // but new block\n\n  // No more splits with same 5.0\n  tree.Add(17, 5.0);\n  stats = tree.GetStats();\n  EXPECT_EQ(stats.splits, 1u);\n\n  // Verify block sizes\n  auto blocks = tree.GetAllBlocks();\n  EXPECT_EQ(blocks[0]->Size(), 0u);\n  EXPECT_EQ(blocks[1]->Size(), 17u);\n  EXPECT_EQ(blocks[2]->Size(), 1u);\n}\n\n// Make tree split and then delete every nth value to see if blocks merge properly\nTEST_F(RangeTreeTest, BlockMerge) {\n  RangeTree tree{PMR_NS::get_default_resource(), 8};\n  for (DocId id = 1; id <= 64; id++)\n    tree.Add(id, id);\n\n  auto stats = tree.GetStats();\n  uint64_t splits = stats.splits;\n  EXPECT_GT(splits, 8u);\n\n  // Blocks have at least half occupancy\n  EXPECT_GT(stats.block_count, 64 / 8);\n  EXPECT_LT(stats.block_count, 2 * 64 / 8);\n\n  // Delete all except  %8 = 0, should trigger merge\n  std::vector<Entry> expected;\n  for (DocId id = 1; id <= 64; id++) {\n    if (id % 8)\n      tree.Remove(id, id);\n    else\n      expected.emplace_back(id, id);\n  }\n\n  // Only one block left now\n  stats = tree.GetStats();\n  size_t blocks = stats.block_count;\n  EXPECT_LT(blocks, 4u);\n  EXPECT_EQ(stats.merges + blocks - 1, splits);\n\n  // Check the two entries remained\n  auto result = tree.GetAllBlocks();\n  EXPECT_THAT(result, UnorderedElementsAreDocPairs(expected));\n}\n\nTEST_F(RangeTreeTest, BugNotUniqueDoubleValues) {\n  // TODO: fix the bug\n  GTEST_SKIP() << \"Bug not fixed yet\";\n\n  RangeTree tree{PMR_NS::get_default_resource()};\n\n  tree.Add(1, 10.0);\n  tree.Add(1, 10.0);\n  tree.Remove(1, 10.0);\n\n  auto result = tree.GetAllBlocks();\n  EXPECT_THAT(result, BlocksAre({{{1, 10.0}}}));\n}\n\nTEST_F(RangeTreeTest, RangeResultTwoBlocksSimple) {\n  RangeTree tree{PMR_NS::get_default_resource(), 4};\n\n  // First block: [[1, 10.0], [16, 12.0], [12, 15.0], [5, 17.0]]\n  // Second block: [[8, 20.0], [5, 30.0], [12, 50.0], [20, 55.0]]\n  // [10.0, 12.0, 15.0, 17.0] | [20.0, 30.0, 50.0, 55.0]\n  tree.Add(1, 10.0);   // 1\n  tree.Add(5, 30.0);   // 2\n  tree.Add(20, 55.0);  // 2\n  tree.Add(5, 17.0);   // 1\n  tree.Add(8, 20.0);   // 2\n  tree.Add(16, 12.0);  // 1\n  tree.Add(12, 15.0);  // 1\n  tree.Add(12, 50.0);  // 2\n\n  EXPECT_THAT(tree.RangeBlocks(10.0, 55.0),\n              BlocksAre({{{1, 10.0}, {16, 12.0}, {12, 15.0}, {5, 17.0}},\n                         {{8, 20.0}, {5, 30.0}, {12, 50.0}, {20, 55.0}}}));\n\n  std::vector<Entry> entries = {{1, 10.0}, {16, 12.0}, {12, 15.0}, {5, 17.0},\n                                {8, 20.0}, {5, 30.0},  {12, 50.0}, {20, 55.0}};\n\n  for (size_t i = 0; i < entries.size() / 2; i++) {\n    const double l = entries[i].second;\n    for (size_t j = entries.size() / 2; j < entries.size(); j++) {\n      const double r = entries[j].second;\n      auto range_result = MergeTwoBlocksRangeResult(tree, l, r);\n      EXPECT_THAT(range_result, testing::ElementsAreArray(ExtractDocIdsFromRange(entries, l, r)));\n    }\n  }\n}\n\nTEST_F(RangeTreeTest, RangeResultTwoBlocks) {\n  RangeTree tree{PMR_NS::get_default_resource(), 50};\n\n  const long long max_value = 100;\n  long long step = 23;\n  long long current_value = max_value;\n\n  std::vector<Entry> entries;\n  for (size_t i = 0; i < 20; i++) {\n    const double value = static_cast<double>(current_value);\n    entries.emplace_back(i, value);\n    entries.emplace_back(i, value + 100.0);\n    current_value = (max_value + current_value - step) % max_value;\n  }\n  for (size_t i = 20; i < 80; i++) {\n    const double value = static_cast<double>(current_value);\n    entries.emplace_back(i, value);\n    current_value = (max_value + current_value - step) % max_value;\n  }\n\n  DCHECK(entries.size() == 100);\n\n  std::sort(entries.begin(), entries.end(),\n            [](const Entry& a, const Entry& b) { return a.second < b.second; });\n\n  auto add_entries = [&tree, &entries](size_t start, size_t end) {\n    for (size_t i = start; i < end; i++) {\n      tree.Add(entries[i].first, entries[i].second);\n    }\n  };\n\n  add_entries(0, 25);\n  add_entries(50, 76);\n  add_entries(25, 50);\n  add_entries(76, entries.size());\n\n  for (size_t i = 0; i < 50; i++) {\n    const double l = entries[i].second;\n    for (size_t j = 50; j < entries.size(); j++) {\n      const double r = entries[j].second;\n      auto range_result = MergeTwoBlocksRangeResult(tree, l, r);\n      EXPECT_THAT(range_result, testing::ElementsAreArray(ExtractDocIdsFromRange(entries, l, r)));\n    }\n  }\n}\n\nstruct BuilderTest : public RangeTreeTest {\n  static void Shuffle(std::vector<RangeTree::Entry>* entries) {\n    std::random_device rd;\n    std::shuffle(entries->begin(), entries->end(), std::mt19937(rd()));\n  }\n};\n\n// Test if the builder builds the tree correctly\nTEST_F(BuilderTest, Builder) {\n  RangeTree tree{PMR_NS::get_default_resource(), 4};\n  RangeTree::Builder builder;\n\n  // Prepare entries shuffled\n  std::vector<RangeTree::Entry> entries;\n  entries.reserve(100);\n  for (size_t i = 0; i < 120; i++)\n    entries.emplace_back(i, double(i) / 2);\n  Shuffle(&entries);\n\n  // Add fake entries\n  for (auto [id, v] : entries) {\n    builder.Add(id, v * 2);\n  }\n\n  // Add all entries for real\n  for (auto [id, v] : entries) {\n    builder.Remove(id, v * 2);\n    builder.Add(id, v);\n  }\n\n  // Shuffle again\n  Shuffle(&entries);\n\n  // Remove last\n  while (entries.size() > 100) {\n    builder.Remove(entries.back().first, entries.back().second);\n    entries.pop_back();\n  }\n\n  // Build tree\n  builder.Populate(&tree, RenewableQuota::Unlimited());\n\n  // Sort for comparisons\n  std::ranges::sort(entries, {}, &RangeTree::Entry::first);\n  auto entry_ids = entries | std::views::keys;\n\n  // Check correctness of all ids\n  {\n    auto all_values = tree.Range(-1000, +1000);\n    auto got_ids = all_values.Take();\n    EXPECT_TRUE(std::ranges::equal(got_ids, entry_ids));\n  }\n\n  // Check correctness of all values including ids\n  {\n    auto all_pairs = ExtractDocPairs(tree.GetAllBlocks());\n    std::sort(all_pairs.begin(), all_pairs.end());\n    EXPECT_EQ(all_pairs, entries);\n  }\n}\n\nTEST_F(BuilderTest, BuilderUpdates) {\n  RangeTree tree{PMR_NS::get_default_resource(), 5};\n  RangeTree::Builder builder;\n\n  // Prepare entries shuffled\n  std::vector<RangeTree::Entry> entries;\n  entries.reserve(1000);\n  for (size_t i = 0; i < 1000; i++) {\n    entries.emplace_back(i, double(i) / 2);\n    entries.emplace_back(i, double(i) / 2 + 0.25);\n  }\n  Shuffle(&entries);\n\n  // Insert entries\n  for (auto entry : entries)\n    builder.Add(entry.first, entry.second);\n\n  // Construct while suspending at every node\n  bool done = false;\n  util::fb2::Fiber populate_fb{[&] {\n    builder.Populate(&tree, {0});  // suspend each time\n    done = true;\n  }};\n\n  // In the meantime insert new entries\n  DocId current = entries.size();\n  bool add = false;\n  size_t added = 0;\n  absl::InsecureBitGen gen;\n  while (!done) {\n    if (add) {\n      entries.emplace_back(current, double(current) / 2);\n      builder.Add(entries.back().first, entries.back().second);\n      current++;\n    } else {\n      size_t idx = absl::Uniform(gen, size_t{0}, entries.size());\n      auto it = entries.begin() + idx;\n      builder.Remove(it->first, it->second);\n\n      // Change our mind with 50% prob and just update\n      if (current % 2 == 0) {\n        it->second += 1;\n        builder.Add(it->first, it->second);\n      } else {\n        entries.erase(it);\n      }\n    }\n    add = !add;\n    added++;\n    util::ThisFiber::Yield();\n  }\n\n  EXPECT_GT(added, 5u);  // At least some updates were performed\n\n  populate_fb.Join();\n\n  // Sort for comparisons\n  std::sort(entries.begin(), entries.end());\n  // auto entry_ids_view = entries | std::views::keys;\n\n  // Check correctness of all ids\n  // TODO: Range tree doesn't filter duplicate ids\n  //{\n  //  auto all_values = tree.Range(-100000, +100000);\n  //  auto got_ids = all_values.Take();\n  //\n  //  std::set entry_ids_set(entry_ids_view.begin(), entry_ids_view.end());\n  //  std::vector entry_ids_vec(entry_ids_set.begin(), entry_ids_set.end());\n  //\n  //  EXPECT_EQ(got_ids, entry_ids_vec);\n  //}\n\n  // Check correctness of all values including ids\n  {\n    auto all_pairs = ExtractDocPairs(tree.GetAllBlocks());\n    std::sort(all_pairs.begin(), all_pairs.end());\n    EXPECT_EQ(all_pairs, entries);\n  }\n}\n\n// Test tree doesn't create unnecessary nodes after initialization\nTEST_F(RangeTreeTest, DiscreteIntialization) {\n  RangeTree tree{PMR_NS::get_default_resource(), 4};\n  RangeTree::Builder builder;\n\n  for (size_t i = 0; i < 32; i++) {\n    builder.Add(i, i % 4);\n  }\n  builder.Populate(&tree, RenewableQuota::Unlimited());\n\n  auto result = tree.GetAllBlocks();\n  EXPECT_EQ(result.size(), 4u);\n}\n\n// Benchmark tree insertion performance with set of discrete values\nstatic void BM_DiscreteInsertion(benchmark::State& state) {\n  RangeTree tree{PMR_NS::get_default_resource()};\n\n  absl::InsecureBitGen gen{};\n  size_t variety = state.range(0);\n\n  DocId id = 0;\n  for (auto _ : state) {\n    double v = absl::Uniform(gen, 0u, variety);\n    tree.Add(id++, v);\n  }\n}\n\nBENCHMARK(BM_DiscreteInsertion)->Arg(2)->Arg(12)->Arg(128)->Arg(1024);\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/rax_tree.h",
    "content": "#pragma once\n\n#include <cassert>\n#include <memory>\n#include <optional>\n#include <string>\n#include <string_view>\n#include <utility>\n\n#include \"base/pmr/memory_resource.h\"\n\nextern \"C\" {\n#include \"redis/rax.h\"\n}\n\nnamespace detail {\n\n// Copies an iterators state into another by performing a fresh seek on the source's key. While this\n// is a little more expensive, it is done to avoid deep copying pointers from raxIterator and\n// raxStart while taking care of self-reference links in both structs. The return value is used to\n// decide whether to advance iterator after a successful seek.\ninline bool CopyIteratorState(raxIterator& destination, raxIterator& source) {\n  raxStart(&destination, source.rt);\n  if (!destination.rt)\n    return false;\n\n  if (!raxSeek(&destination, \"=\", source.key, source.key_len)) {\n    // called from constructor, so no error can be returned. but set up the same state as\n    // the SeekIterator constructor, so that it will return true on comparison to RaxTreeMap::end()\n    raxStop(&destination);\n    destination.rt = nullptr;\n    return false;\n  }\n\n  return true;\n}\n\n}  // namespace detail\n\nnamespace dfly::search {\n\n// absl::flat_hash_map/std::unordered_map compatible tree map based on rax tree.\n// Allocates all objects on heap (with custom memory resource) as rax tree operates fully on\n// pointers.\n// TODO: Add full support for polymorphic allocators, including rax trie node allocations\ntemplate <typename V> struct RaxTreeMap {\n  using value_type = V;\n\n  struct FindIterator;\n\n  // Simple seeking iterator\n  struct SeekIterator {\n    SeekIterator() {\n      it_.rt = nullptr;\n    }\n\n    SeekIterator(rax* tree, const char* op, std::string_view key) {\n      raxStart(&it_, tree);\n      if (raxSeek(&it_, op, to_key_ptr(key), key.size())) {  // Successfuly seeked\n        operator++();\n      } else {\n        InvalidateIterator();\n      }\n    }\n\n    explicit SeekIterator(rax* tree) : SeekIterator(tree, \"^\", std::string_view{nullptr, 0}) {\n    }\n\n    SeekIterator(SeekIterator&& other) noexcept : it_{} {\n      *this = std::move(other);\n    }\n\n    SeekIterator& operator=(SeekIterator&& other) noexcept {\n      if (this != &other) {\n        if (IsValid()) {\n          InvalidateIterator();\n        }\n        if (::detail::CopyIteratorState(it_, other.it_))\n          operator++();\n        if (other.IsValid())\n          other.InvalidateIterator();\n      }\n      return *this;\n    }\n\n    /* Copy constructor deleted to avoid double iterator invalidation */\n    SeekIterator(const SeekIterator&) = delete;\n    SeekIterator& operator=(const SeekIterator&) = delete;\n\n    ~SeekIterator() {\n      if (IsValid()) {\n        InvalidateIterator();\n      }\n    }\n\n    bool operator==(const SeekIterator& rhs) const {\n      if (!IsValid() || !rhs.IsValid())\n        return !IsValid() && !rhs.IsValid();\n      return it_.node == rhs.it_.node;\n    }\n\n    bool operator!=(const SeekIterator& rhs) const {\n      return !operator==(rhs);\n    }\n\n    SeekIterator& operator++() {\n      int next_result = raxNext(&it_);\n      if (!next_result) {  // OOM or we reached the end of the tree\n        InvalidateIterator();\n      }\n      return *this;\n    }\n\n    /* After operator++() the first value (string_view) is invalid. So make sure your copied it to\n     * string */\n    std::pair<std::string_view, V&> operator*() const {\n      assert(IsValid() && it_.node && it_.node->iskey && it_.data);\n      return {std::string_view{reinterpret_cast<const char*>(it_.key), it_.key_len},\n              *reinterpret_cast<V*>(it_.data)};\n    }\n\n    bool IsValid() const {\n      return it_.rt;\n    }\n\n   private:\n    void InvalidateIterator() {\n      raxStop(&it_);\n      it_.rt = nullptr;\n    }\n\n    raxIterator it_;\n  };\n  using iterator = SeekIterator;\n\n  // Result of find() call. Inherits from pair to mimic iterator interface, not incrementable.\n  struct FindIterator : public std::optional<std::pair<std::string, V&>> {\n    bool operator==(const SeekIterator& rhs) const {\n      if (!this->has_value() || !rhs.IsValid())\n        return !this->has_value() && !rhs.IsValid();\n      return (*this)->first == (*rhs).first;\n    }\n\n    bool operator!=(const SeekIterator& rhs) const {\n      return !operator==(rhs);\n    }\n  };\n\n public:\n  explicit RaxTreeMap(PMR_NS::memory_resource* mr) : tree_(raxNew()), alloc_(mr) {\n  }\n\n  ~RaxTreeMap() {\n    using Allocator = decltype(alloc_);\n\n    auto free_callback = [](void* data, void* context) {\n      Allocator* allocator = static_cast<Allocator*>(context);\n      V* ptr = static_cast<V*>(data);\n      std::allocator_traits<Allocator>::destroy(*allocator, ptr);\n      allocator->deallocate(ptr, 1);\n    };\n\n    raxFreeWithCallbackAndArgument(tree_, free_callback, &alloc_);\n  }\n\n  size_t size() const {\n    return raxSize(tree_);\n  }\n\n  auto begin() const {\n    return SeekIterator{tree_};\n  }\n\n  auto end() const {\n    return SeekIterator{};\n  }\n\n  auto lower_bound(std::string_view key) const {\n    return SeekIterator{tree_, \">=\", key};\n  }\n\n  FindIterator find(std::string_view key) const {\n    if (void* ptr = nullptr; raxFind(tree_, to_key_ptr(key), key.size(), &ptr))\n      return FindIterator{std::pair<std::string, V&>(std::string(key), *reinterpret_cast<V*>(ptr))};\n\n    return FindIterator{std::nullopt};\n  }\n\n  template <typename... Args>\n  std::pair<FindIterator, bool> try_emplace(std::string_view key, Args&&... args);\n\n  void erase(FindIterator it) {\n    V* old = nullptr;\n    raxRemove(tree_, to_key_ptr(it->first.data()), it->first.size(),\n              reinterpret_cast<void**>(&old));\n    std::allocator_traits<decltype(alloc_)>::destroy(alloc_, old);\n    alloc_.deallocate(old, 1);\n  }\n\n  auto& get_allocator() const {\n    return alloc_;\n  }\n\n private:\n  static unsigned char* to_key_ptr(std::string_view key) {\n    return reinterpret_cast<unsigned char*>(const_cast<char*>(key.data()));\n  }\n\n  rax* tree_;\n  PMR_NS::polymorphic_allocator<V> alloc_;\n};\n\ntemplate <typename V>\ntemplate <typename... Args>\nstd::pair<typename RaxTreeMap<V>::FindIterator, bool> RaxTreeMap<V>::try_emplace(\n    std::string_view key, Args&&... args) {\n  if (auto it = find(key); it)\n    return {it, false};\n\n  V* ptr = alloc_.allocate(1);\n  std::allocator_traits<decltype(alloc_)>::construct(alloc_, ptr, std::forward<Args>(args)...);\n\n  V* old = nullptr;\n  raxInsert(tree_, to_key_ptr(key), key.size(), ptr, reinterpret_cast<void**>(&old));\n  assert(!old);\n\n  auto it = std::make_optional(std::pair<std::string, V&>(std::string(key), *ptr));\n  return std::make_pair(std::move(FindIterator{it}), true);\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/rax_tree_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/rax_tree.h\"\n\n#include <absl/container/btree_set.h>\n#include <absl/strings/str_cat.h>\n#include <gtest/gtest.h>\n#include <mimalloc.h>\n\n#include <algorithm>\n#include <memory_resource>\n\n#include \"base/gtest.h\"\n#include \"base/iterator.h\"\n#include \"base/logging.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly::search {\n\nusing namespace std;\n\nstruct RaxTreeTest : public ::testing::Test {\n  static void SetUpTestSuite() {\n    auto* tlh = mi_heap_get_backing();\n    init_zmalloc_threadlocal(tlh);\n  }\n};\n\nTEST_F(RaxTreeTest, EmplaceAndIterate) {\n  RaxTreeMap<std::string> map(pmr::get_default_resource());\n\n  vector<pair<string, string>> elements(90);\n  for (int i = 10; i < 100; i++)\n    elements[i - 10] = make_pair(absl::StrCat(\"key-\", i), absl::StrCat(\"value-\", i));\n\n  for (auto& [key, value] : elements) {\n    auto [it, inserted] = map.try_emplace(key, value);\n    EXPECT_TRUE(inserted);\n    EXPECT_EQ(it->first, key);\n    EXPECT_EQ(it->second, value);\n  }\n\n  size_t i = 0;\n  for (auto [key, value] : map) {\n    EXPECT_EQ(elements[i].first, key);\n    EXPECT_EQ(elements[i].second, value);\n    i++;\n  }\n}\n\nTEST_F(RaxTreeTest, LowerBound) {\n  RaxTreeMap<int> map(pmr::get_default_resource());\n  vector<string> keys;\n\n  for (unsigned i = 0; i < 5; i++) {\n    for (unsigned j = 0; j < 5; j++) {\n      keys.emplace_back(absl::StrCat(\"key-\", string(1, 'a' + i), \"-\", j));\n      map.try_emplace(keys.back(), 0);\n    }\n  }\n\n  auto it1 = map.lower_bound(\"key-c-3\");\n  auto it2 = lower_bound(keys.begin(), keys.end(), \"key-c-3\");\n\n  while (it1 != map.end()) {\n    EXPECT_EQ((*it1).first, *it2);\n    ++it1;\n    ++it2;\n  }\n\n  EXPECT_TRUE(it1 == map.end());\n  EXPECT_TRUE(it2 == keys.end());\n\n  // Test lower bound empty string\n  vector<string> keys2;\n  for (auto it = map.lower_bound(string_view{}); it != map.end(); ++it)\n    keys2.emplace_back((*it).first);\n  EXPECT_EQ(keys, keys2);\n}\n\nTEST_F(RaxTreeTest, Find) {\n  RaxTreeMap<int> map(pmr::get_default_resource());\n  for (unsigned i = 100; i < 999; i += 2)\n    map.try_emplace(absl::StrCat(\"value-\", i), i);\n\n  auto it = map.begin();\n  for (unsigned i = 100; i < 999; i++) {\n    auto fit = map.find(absl::StrCat(\"value-\", i));\n    if (i % 2 == 0) {\n      EXPECT_TRUE(fit == it);\n      EXPECT_EQ(fit->second, i);\n      ++it;\n    } else {\n      EXPECT_TRUE(fit == map.end());\n    }\n  }\n\n  // Test find with empty string\n  EXPECT_TRUE(map.find(string_view{}) == map.end());\n}\n\n/* Run with mimalloc to make sure there is no double free */\nTEST_F(RaxTreeTest, Iterate) {\n  const char* kKeys[] = {\n      \"aaaaaaaaaaaaaaaaaaaa\",\n      \"bbbbbbbbbbbbbbbbbbbbbb\"\n      \"cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\",\n      \"dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\"\n      \"eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee\",\n  };\n\n  RaxTreeMap<int> map(pmr::get_default_resource());\n  for (const char* key : kKeys) {\n    map.try_emplace(key, 2);\n  }\n\n  for (auto it = map.begin(); it != map.end(); ++it) {\n    EXPECT_EQ((*it).second, 2);\n  }\n\n  for (auto it = map.begin(); it != map.end(); ++it) {\n    EXPECT_EQ((*it).second, 2);\n  }\n}\n\nTEST_F(RaxTreeTest, MoveIterator) {\n  RaxTreeMap<bool> m{pmr::get_default_resource()};\n  RaxTreeMap<bool>::SeekIterator tmp;\n  {\n    // empty map, iterator invalidated on construction\n    tmp = m.begin();\n    const auto it = std::move(tmp);\n    EXPECT_FALSE(tmp.IsValid());\n    EXPECT_FALSE(it.IsValid());\n  }\n\n  {\n    tmp = m.end();\n    const auto it = std::move(tmp);\n    EXPECT_FALSE(tmp.IsValid());\n    EXPECT_FALSE(it.IsValid());\n    EXPECT_EQ(it, m.end());\n  }\n\n  m.try_emplace(\"first\", true);\n  m.try_emplace(\"second\", false);\n\n  {\n    tmp = m.begin();\n    RaxTreeMap<bool>::SeekIterator it{std::move(tmp)};\n    EXPECT_FALSE(tmp.IsValid());\n    EXPECT_TRUE(it.IsValid());\n    EXPECT_EQ((*it).first, \"first\");\n    EXPECT_TRUE((*it).second);\n\n    ++it;\n\n    EXPECT_EQ((*it).first, \"second\");\n    EXPECT_FALSE((*it).second);\n\n    ++it;\n    EXPECT_EQ(it, m.end());\n  }\n\n  {\n    // advance before moving, the moved-to iterator should pick where the moved-from left off\n    tmp = m.lower_bound(\"fig\");\n    EXPECT_TRUE(tmp.IsValid());\n\n    ++tmp;\n    EXPECT_EQ((*tmp).first, \"second\");\n\n    auto it = std::move(tmp);\n    EXPECT_FALSE(tmp.IsValid());\n    EXPECT_TRUE(it.IsValid());\n    EXPECT_EQ((*it).first, \"second\");\n\n    ++it;\n    EXPECT_FALSE(it.IsValid());\n    EXPECT_EQ(it, m.end());\n  }\n\n  {\n    // move into valid iterator\n    auto it = m.begin();\n    EXPECT_EQ((*it).first, \"first\");\n\n    tmp = m.lower_bound(\"sea\");\n    EXPECT_EQ((*tmp).first, \"second\");\n\n    it = std::move(tmp);\n    EXPECT_FALSE(tmp.IsValid());\n    EXPECT_TRUE(it.IsValid());\n\n    EXPECT_EQ((*it).first, \"second\");\n    ++it;\n    EXPECT_FALSE(it.IsValid());\n    EXPECT_EQ(it, m.end());\n  }\n\n  {\n    auto it = m.lower_bound(\"sea\");\n    EXPECT_EQ((*it).first, \"second\");\n\n    tmp = m.end();\n    it = std::move(tmp);\n\n    EXPECT_FALSE(it.IsValid());\n  }\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/renewable_quota.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/renewable_quota.h\"\n\n#include \"base/cycle_clock.h\"\n#include \"base/logging.h\"\n#include \"util/fibers/fibers.h\"\n\nnamespace dfly::search {\n\nRenewableQuota RenewableQuota::Unlimited() {\n  return RenewableQuota{std::numeric_limits<size_t>::max()};\n}\n\n// Quota that yields if the fiber is running for too long\nvoid RenewableQuota::Check(std::source_location location) const {\n  size_t cycles = util::ThisFiber::GetRunningTimeCycles();\n  size_t usec = base::CycleClock::ToUsec(cycles);\n  if (usec >= max_usec) {\n    size_t ms = usec / 1'000;\n    VLOG_IF(1, ms >= 50) << \"Grabbed \" << ms << \"ms for \" << location.file_name() << \":\"\n                         << location.line();\n\n    util::ThisFiber::Yield();\n  }\n}\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/renewable_quota.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstddef>\n#include <source_location>\n\nnamespace dfly::search {\n\n// Running time quota that can be reset by suspending the fiber\nstruct RenewableQuota {\n  // Create unlimited quota\n  static RenewableQuota Unlimited();\n\n  // Check if quota is remaining and suspend the fiber if it ran out\n  void Check(std::source_location location = std::source_location::current()) const;\n\n  const size_t max_usec;\n};\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/scanner.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n// We should not include lexer.h when compiling from lexer.cc file because it already\n// includes lexer.h\n#ifndef DFLY_LEXER_CC\n#include \"core/search/lexer.h\"\n#endif\n\n#include <absl/strings/str_cat.h>\n\n#include \"base/logging.h\"\n\nnamespace dfly {\nnamespace search {\n\nclass Scanner : public Lexer {\n public:\n  Scanner() : params_{nullptr} {\n  }\n\n  Parser::symbol_type Lex();\n\n  void SetParams(const QueryParams* params) {\n    params_ = params;\n  }\n\n private:\n  std::string_view matched_view(size_t skip_left = 0, size_t skip_right = 0) const {\n    std::string_view res(matcher().begin() + skip_left, matcher().size() - skip_left - skip_right);\n    return res;\n  }\n\n  dfly::search::location loc() {\n    return location();\n  }\n\n  Parser::symbol_type ParseParam(std::string_view name, const Parser::location_type& loc) {\n    name.remove_prefix(1);  // drop $ symbol\n\n    std::string_view str = (*params_)[name];\n    if (str.empty())\n      throw std::runtime_error(absl::StrCat(\"Query parameter \", name, \" not found\"));\n\n    uint32_t val = 0;\n    if (!absl::SimpleAtoi(str, &val))\n      return Parser::make_TERM(std::string{str}, loc);\n\n    return Parser::make_UINT32(std::string{str}, loc);\n  }\n\n private:\n  const QueryParams* params_;\n};\n\n}  // namespace search\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/search/search.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/search.h\"\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/container/flat_hash_set.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_join.h>\n\n#include <chrono>\n#include <type_traits>\n#include <variant>\n\n#include \"base/logging.h\"\n#include \"core/overloaded.h\"\n#include \"core/search/ast_expr.h\"\n#include \"core/search/index_result.h\"\n#include \"core/search/indices.h\"\n#include \"core/search/query_driver.h\"\n#include \"core/search/sort_indices.h\"\n#include \"core/search/tag_types.h\"\n#include \"core/search/vector_utils.h\"\n\nusing namespace std;\n\nnamespace dfly::search {\n\nnamespace {\n\nAstExpr ParseQuery(std::string_view query, const QueryParams* params,\n                   const OptionalFilters* filters) {\n  QueryDriver driver{};\n  driver.ResetScanner();\n  driver.SetParams(params);\n  driver.SetInput(std::string{query});\n  (void)Parser (&driver)();  // can throw\n  driver.SetOptionalFilters(filters);\n  return driver.Take();\n}\n\n// GCC 12 yields a wrong warning in a deeply inlined call in UnifyResults, only ignoring the whole\n// scope solves it\n#ifndef __clang__\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wmaybe-uninitialized\"\n#endif\n\nstruct ProfileBuilder {\n  struct NodeFormatter {\n    template <TagType T> void operator()(std::string* out, const AstAffixNode<T>& node) const {\n      out->append(node.affix);\n    }\n    void operator()(std::string* out, const AstTagsNode::TagValue& value) const {\n      visit([this, out](const auto& n) { this->operator()(out, n); }, value);\n    }\n  };\n\n  string GetNodeInfo(const AstNode& node) {\n    Overloaded node_info{\n        [](monostate) -> string { return \"\"s; },\n        [](const AstTermNode& n) { return absl::StrCat(\"Term{\", n.affix, \"}\"); },\n        [](const AstPrefixNode& n) { return absl::StrCat(\"Prefix{\", n.affix, \"}\"); },\n        [](const AstSuffixNode& n) { return absl::StrCat(\"Suffix{\", n.affix, \"}\"); },\n        [](const AstInfixNode& n) { return absl::StrCat(\"Infix{\", n.affix, \"}\"); },\n        [](const AstRangeNode& n) { return absl::StrCat(\"Range{\", n.lo, \"<>\", n.hi, \"}\"); },\n        [](const AstLogicalNode& n) {\n          auto op = n.op == AstLogicalNode::AND ? \"and\" : \"or\";\n          return absl::StrCat(\"Logical{n=\", n.nodes.size(), \",o=\", op, \"}\");\n        },\n        [](const AstTagsNode& n) {\n          return absl::StrCat(\"Tags{\", absl::StrJoin(n.tags, \",\", NodeFormatter()), \"}\");\n        },\n        [](const AstFieldNode& n) { return absl::StrCat(\"Field{\", n.field, \"}\"); },\n        [](const AstKnnNode& n) { return absl::StrCat(\"KNN{l=\", n.limit, \"}\"); },\n        [](const AstNegateNode& n) { return absl::StrCat(\"Negate{}\"); },\n        [](const AstStarNode& n) { return absl::StrCat(\"Star{}\"); },\n        [](const AstStarFieldNode& n) { return absl::StrCat(\"StarField{}\"); },\n        [](const AstGeoNode& n) {\n          return absl::StrCat(\"Geo{\", n.lat, \" \", n.lon, \" \", n.radius, \" \", n.unit, \"}\");\n        },\n        [](const AstVectorRangeNode& n) { return absl::StrCat(\"VectorRange{r=\", n.radius, \"}\"); },\n    };\n    return visit(node_info, node.Variant());\n  }\n\n  using Tp = std::chrono::steady_clock::time_point;\n\n  Tp Start() {\n    depth_++;\n    return chrono::steady_clock::now();\n  }\n\n  void Finish(Tp start, const AstNode& node, const IndexResult& result) {\n    DCHECK_GE(depth_, 1u);\n    auto took = chrono::steady_clock::now() - start;\n    size_t micros = chrono::duration_cast<chrono::microseconds>(took).count();\n    auto descr = GetNodeInfo(node);\n    profile_.events.push_back({std::move(descr), micros, depth_ - 1, result.ApproximateSize()});\n    depth_--;\n  }\n\n  AlgorithmProfile Take() {\n    reverse(profile_.events.begin(), profile_.events.end());\n    return std::move(profile_);\n  }\n\n private:\n  size_t depth_;\n  AlgorithmProfile profile_;\n};\n\nstruct BasicSearch {\n  using LogicOp = AstLogicalNode::LogicOp;\n\n  BasicSearch(const FieldIndices* indices) : indices_{indices} {\n  }\n\n  void EnableProfiling() {\n    profile_builder_ = ProfileBuilder{};\n  }\n\n  BaseIndex* GetBaseIndex(string_view field) {\n    auto index = indices_->GetIndex(field);\n    if (!index) {\n      error_ = absl::StrCat(\"Invalid field: \", field);\n      return nullptr;\n    }\n    return index;\n  }\n\n  // Get casted sub index by field\n  template <typename T> T* GetIndex(string_view field) {\n    static_assert(is_base_of_v<BaseIndex, T>);\n\n    auto base_index = GetBaseIndex(field);\n    if (!base_index) {\n      return nullptr;\n    }\n\n    auto* casted_ptr = dynamic_cast<T*>(base_index);\n    if (!casted_ptr) {\n      error_ = absl::StrCat(\"Wrong access type for field: \", field);\n      return nullptr;\n    }\n\n    return casted_ptr;\n  }\n\n  BaseSortIndex* GetSortIndex(string_view field) {\n    auto index = indices_->GetSortIndex(field);\n    if (!index) {\n      error_ = absl::StrCat(\"Invalid sort field: \", field);\n      return nullptr;\n    }\n\n    return index;\n  }\n\n  // Collect all index results from F(C[i])\n  template <typename C, typename F>\n  vector<IndexResult> GetSubResults(const C& container, const F& f) {\n    vector<IndexResult> sub_results(container.size());\n    for (size_t i = 0; i < container.size(); i++)\n      sub_results[i] = IndexResult{f(container[i])};\n    return sub_results;\n  }\n\n  void Merge(IndexResult matched, IndexResult* current_ptr, LogicOp op) {\n    IndexResult& current = *current_ptr;\n    auto vec = MergeIndexResults(matched, current, op);\n    current = IndexResult{std::move(vec)};\n  }\n\n  // Efficiently unify multiple sub results with specified logical op\n  IndexResult UnifyResults(vector<IndexResult>&& sub_results, LogicOp op) {\n    if (sub_results.empty())\n      return IndexResult{};\n\n    // Unifying from smallest to largest is more efficient.\n    // AND: the result only shrinks, so starting with the smallest is most optimal.\n    // OR: unifying smaller sets first reduces the number of element traversals on average.\n    sort(sub_results.begin(), sub_results.end(),\n         [](const auto& l, const auto& r) { return l.ApproximateSize() < r.ApproximateSize(); });\n\n    IndexResult out{std::move(sub_results[0])};\n    for (auto& matched : absl::MakeSpan(sub_results).subspan(1))\n      Merge(std::move(matched), &out, op);\n    return out;\n  }\n\n  template <typename C, typename F>\n  IndexResult CollectMatches(BaseStringIndex<C>* index, std::string_view word, F&& f) {\n    IndexResult result{};\n    invoke(f, *index, word,\n           [&result, this](const auto* c) { Merge(IndexResult{c}, &result, LogicOp::OR); });\n    return result;\n  }\n\n  IndexResult Search(monostate, string_view) {\n    return IndexResult{};\n  }\n\n  IndexResult Search(const AstStarNode& node, string_view active_field) {\n    DCHECK(active_field.empty());\n    return IndexResult{&indices_->GetAllDocs()};\n  }\n\n  IndexResult Search(const AstStarFieldNode& node, string_view active_field) {\n    // Try to get a sort index first, as `@field:*` might imply wanting sortable behavior\n    BaseSortIndex* sort_index = indices_->GetSortIndex(active_field);\n    if (sort_index) {\n      return IndexResult{sort_index->GetAllDocsWithNonNullValues()};\n    }\n\n    // If sort index doesn't exist try regular index\n    BaseIndex* base_index = GetBaseIndex(active_field);\n    return base_index ? IndexResult{base_index->GetAllDocsWithNonNullValues()} : IndexResult{};\n  }\n\n  template <TagType T> IndexResult Search(const AstAffixNode<T>& node, string_view active_field) {\n    vector<TextIndex*> indices;\n    if (!active_field.empty()) {\n      if (auto* index = GetIndex<TextIndex>(active_field); index)\n        indices = {index};\n      else\n        return IndexResult{};\n    } else {\n      indices = indices_->GetAllTextIndices();\n    }\n\n    auto mapping = [&node, this](TextIndex* index) {\n      if constexpr (T == TagType::PREFIX)\n        return CollectMatches(index, node.affix, &TextIndex::MatchPrefix);\n      else if constexpr (T == TagType::SUFFIX)\n        return CollectMatches(index, node.affix, &TextIndex::MatchSuffix);\n      else if constexpr (T == TagType::INFIX)\n        return CollectMatches(index, node.affix, &TextIndex::MatchInfix);\n      else\n        return vector<DocId>{};\n    };\n    return UnifyResults(GetSubResults(indices, mapping), LogicOp::OR);\n  }\n\n  // \"term\": access field's text index or unify results from all text indices if no field is set\n  IndexResult Search(const AstAffixNode<TagType::REGULAR> node, string_view active_field) {\n    std::string term = node.affix;\n    bool strip_whitespace = true;\n\n    if (auto synonyms = indices_->GetSynonyms(); synonyms) {\n      if (auto group_id = synonyms->GetGroupToken(term); group_id) {\n        term = *group_id;\n        strip_whitespace = false;\n      }\n    }\n\n    if (!active_field.empty()) {\n      if (auto* index = GetIndex<TextIndex>(active_field); index)\n        return IndexResult{index->Matching(term, strip_whitespace)};\n      return IndexResult{};\n    }\n\n    vector<TextIndex*> selected_indices = indices_->GetAllTextIndices();\n    auto mapping = [&term, strip_whitespace](TextIndex* index) {\n      return index->Matching(term, strip_whitespace);\n    };\n\n    return UnifyResults(GetSubResults(selected_indices, mapping), LogicOp::OR);\n  }\n\n  // [range]: access field's numeric index\n  IndexResult Search(const AstRangeNode& node, string_view active_field) {\n    DCHECK(!active_field.empty());\n    if (auto* index = GetIndex<NumericIndex>(active_field); index) {\n      return IndexResult{index->Range(node.lo, node.hi)};\n    }\n    return IndexResult{};\n  }\n\n  IndexResult Search(const AstGeoNode& node, string_view active_field) {\n    DCHECK(!active_field.empty());\n    if (auto* index = GetIndex<GeoIndex>(active_field); index) {\n      return IndexResult{index->RadiusSearch(node.lon, node.lat, node.radius, node.unit)};\n    }\n    return IndexResult{};\n  }\n\n  // negate -(*subquery*): explicitly compute result complement. Needs further optimizations\n  IndexResult Search(const AstNegateNode& node, string_view active_field) {\n    auto matched = SearchGeneric(*node.node, active_field).Take().first;\n    vector<DocId> all = indices_->GetAllDocs();\n\n    // To negate a result, we have to find the complement of matched to all documents,\n    // so we remove all matched documents from the set of all documents.\n    auto pred = [&matched](DocId doc) {\n      return binary_search(matched.begin(), matched.end(), doc);\n    };\n    all.erase(remove_if(all.begin(), all.end(), pred), all.end());\n    return IndexResult{std::move(all)};\n  }\n\n  // logical query: unify all sub results\n  IndexResult Search(const AstLogicalNode& node, string_view active_field) {\n    auto mapping = [&](auto& node) { return SearchGeneric(node, active_field); };\n    return UnifyResults(GetSubResults(node.nodes, mapping), node.op);\n  }\n\n  // @field: set active field for sub tree\n  IndexResult Search(const AstFieldNode& node, string_view active_field) {\n    DCHECK(active_field.empty());\n    DCHECK(node.node);\n    return SearchGeneric(*node.node, node.field);\n  }\n\n  // {tags | ...}: Unify results for all tags\n  IndexResult Search(const AstTagsNode& node, string_view active_field) {\n    auto* tag_index = GetIndex<TagIndex>(active_field);\n    if (!tag_index)\n      return IndexResult{};\n\n    Overloaded ov{[tag_index](const AstTermNode& term) -> IndexResult {\n                    return IndexResult{tag_index->Matching(term.affix)};\n                  },\n                  [tag_index, this](const AstPrefixNode& prefix) {\n                    return CollectMatches(tag_index, prefix.affix, &TagIndex::MatchPrefix);\n                  },\n                  [tag_index, this](const AstSuffixNode& suffix) {\n                    return CollectMatches(tag_index, suffix.affix, &TagIndex::MatchSuffix);\n                  },\n                  [tag_index, this](const AstInfixNode& infix) {\n                    return CollectMatches(tag_index, infix.affix, &TagIndex::MatchInfix);\n                  }};\n    auto mapping = [ov](const auto& tag) { return visit(ov, tag); };\n    return UnifyResults(GetSubResults(node.tags, mapping), LogicOp::OR);\n  }\n\n  void SearchKnnFlat(FlatVectorIndex* vec_index, const AstKnnNode& knn, IndexResult&& sub_results) {\n    knn_distances_.reserve(sub_results.ApproximateSize());\n    auto cb = [&](auto* set) {\n      auto [dim, sim] = vec_index->Info();\n      for (DocId matched_doc : *set) {\n        const float* vec = vec_index->Get(matched_doc);\n        if (!vec)\n          continue;\n        float dist = VectorDistance(knn.vec.first.get(), vec, dim, sim);\n        knn_distances_.emplace_back(dist, matched_doc);\n      }\n    };\n    visit(cb, sub_results.Borrowed());\n\n    size_t prefix_size = min(knn.limit, knn_distances_.size());\n    partial_sort(knn_distances_.begin(), knn_distances_.begin() + prefix_size,\n                 knn_distances_.end());\n    knn_distances_.resize(prefix_size);\n  }\n\n  void SearchVectorRangeFlat(FlatVectorIndex* vec_index, const AstVectorRangeNode& node) {\n    const auto& all_docs = indices_->GetAllDocs();\n    auto [dim, sim] = vec_index->Info();\n    for (DocId doc : all_docs) {\n      const float* vec = vec_index->Get(doc);\n      if (!vec)\n        continue;\n      float dist = VectorDistance(node.vec.first.get(), vec, dim, sim);\n      if (dist <= static_cast<float>(node.radius)) {\n        knn_scores_.emplace_back(doc, dist);\n      }\n    }\n  }\n\n  // [@field:[VECTOR_RANGE r vec]=>{$YIELD_DISTANCE_AS: alias}]:\n  // Return all docs within distance radius, storing distances in knn_scores_\n  IndexResult Search(const AstVectorRangeNode& node, string_view active_field) {\n    DCHECK(active_field.empty());\n\n    auto* vec_index = GetIndex<BaseVectorIndex>(node.field);\n    if (!vec_index)\n      return IndexResult{};\n\n    if (node.vec.second == 0)\n      return IndexResult{};\n\n    if (node.radius < 0 || std::isnan(node.radius)) {\n      error_ = absl::StrCat(\"VECTOR_RANGE radius must be non-negative, got: \", node.radius);\n      return IndexResult{};\n    }\n\n    if (auto [dim, _] = vec_index->Info(); dim != node.vec.second) {\n      error_ = absl::StrCat(\"Wrong vector index dimensions, got: \", node.vec.second,\n                            \", expected: \", dim);\n      return IndexResult{};\n    }\n\n    knn_scores_.clear();\n\n    // HNSW fields are not stored in FieldIndices::indices_, so GetIndex<BaseVectorIndex> above\n    // returns nullptr for HNSW before we reach this point.\n    // HNSW range search support is planned separately (see hnsw_index.h).\n    if (auto* flat_index = dynamic_cast<FlatVectorIndex*>(vec_index); flat_index)\n      SearchVectorRangeFlat(flat_index, node);\n\n    vector<DocId> out(knn_scores_.size());\n    for (size_t i = 0; i < knn_scores_.size(); i++)\n      out[i] = knn_scores_[i].first;\n    return IndexResult{std::move(out)};\n  }\n\n  // [KNN limit @field vec]: Compute distance from `vec` to all vectors keep closest `limit`\n  IndexResult Search(const AstKnnNode& knn, string_view active_field) {\n    DCHECK(active_field.empty());\n    auto sub_results = SearchGeneric(*knn.filter, active_field);\n\n    auto* vec_index = GetIndex<BaseVectorIndex>(knn.field);\n    if (!vec_index)\n      return IndexResult{};\n\n    // If vector dimension is 0, treat as placeholder/invalid - return empty results\n    // This allows tests to use dummy vector values like \"<your_vector_blob>\"\n    if (knn.vec.second == 0)\n      return IndexResult{};\n\n    if (auto [dim, _] = vec_index->Info(); dim != knn.vec.second) {\n      error_ =\n          absl::StrCat(\"Wrong vector index dimensions, got: \", knn.vec.second, \", expected: \", dim);\n      return IndexResult{};\n    }\n\n    knn_scores_.clear();\n\n    if (auto flat_index = dynamic_cast<FlatVectorIndex*>(vec_index); flat_index)\n      SearchKnnFlat(dynamic_cast<FlatVectorIndex*>(vec_index), knn, std::move(sub_results));\n\n    vector<DocId> out(knn_distances_.size());\n    knn_scores_.reserve(knn_distances_.size());\n\n    for (size_t i = 0; i < knn_distances_.size(); i++) {\n      knn_scores_.emplace_back(knn_distances_[i].second, knn_distances_[i].first);\n      out[i] = knn_distances_[i].second;\n    }\n\n    return IndexResult{std::move(out)};\n  }\n\n  // Determine node type and call specific search function\n  IndexResult SearchGeneric(const AstNode& node, string_view active_field, bool top_level = false) {\n    if (!error_.empty())\n      return IndexResult{};\n\n    ProfileBuilder::Tp start = profile_builder_ ? profile_builder_->Start() : ProfileBuilder::Tp{};\n\n    auto cb = [this, active_field](const auto& inner) { return Search(inner, active_field); };\n    auto result = visit(cb, node.Variant());\n\n    // Top level results don't need to be sorted, because they will be scored, sorted by fields or\n    // used by knn\n    DCHECK(top_level || holds_alternative<AstKnnNode>(node.Variant()) ||\n           holds_alternative<AstGeoNode>(node.Variant()) ||\n           holds_alternative<AstVectorRangeNode>(node.Variant()) ||\n           visit([](auto* set) { return is_sorted(set->begin(), set->end()); }, result.Borrowed()));\n\n    if (profile_builder_)\n      profile_builder_->Finish(start, node, result);\n\n    return result;\n  }\n\n  SearchResult Search(const AstNode& query, size_t cuttoff_limit) {\n    IndexResult result = SearchGeneric(query, \"\", true);\n\n    // Extract profile if enabled\n    optional<AlgorithmProfile> profile =\n        profile_builder_ ? make_optional(profile_builder_->Take()) : nullopt;\n\n    auto [out, total_size] = result.Take(cuttoff_limit);\n    return SearchResult{total_size, std::move(out), std::move(knn_scores_), std::move(profile),\n                        std::move(error_)};\n  }\n\n  const FieldIndices* indices_;\n\n  string error_;\n  optional<ProfileBuilder> profile_builder_ = ProfileBuilder{};\n\n  std::vector<pair<DocId, float>> knn_scores_;\n  vector<pair<float, DocId>> knn_distances_;\n};\n\n#ifndef __clang__\n#pragma GCC diagnostic pop\n#endif\n\n}  // namespace\n\nAstNode OptionalNumericFilter::Node(std::string field) {\n  return AstFieldNode{\"@\" + field, AstRangeNode(lo_, false, hi_, false)};\n}\n\nstring_view Schema::LookupAlias(string_view alias) const {\n  if (auto it = field_names.find(alias); it != field_names.end())\n    return it->second;\n  return alias;\n}\n\nstring_view Schema::LookupIdentifier(string_view identifier) const {\n  if (auto it = fields.find(identifier); it != fields.end())\n    return it->second.short_name;\n  return identifier;\n}\n\nIndicesOptions::IndicesOptions() {\n  static absl::flat_hash_set<std::string> kDefaultStopwords{\n      \"a\",    \"is\",    \"the\",  \"an\",    \"and\",   \"are\",  \"as\",   \"at\", \"be\",  \"but\",  \"by\",\n      \"for\",  \"if\",    \"in\",   \"into\",  \"it\",    \"no\",   \"not\",  \"of\", \"on\",  \"or\",   \"such\",\n      \"that\", \"their\", \"then\", \"there\", \"these\", \"they\", \"this\", \"to\", \"was\", \"will\", \"with\"};\n\n  stopwords = kDefaultStopwords;\n}\n\nFieldIndices::FieldIndices(const Schema& schema, const IndicesOptions& options,\n                           PMR_NS::memory_resource* mr, const Synonyms* synonyms)\n    : schema_{schema}, options_{options}, synonyms_{synonyms} {\n  CreateIndices(mr);\n  CreateSortIndices();\n}\n\nvoid FieldIndices::CreateIndices(PMR_NS::memory_resource* mr) {\n  for (const auto& [field_ident, field_info] : schema_.fields) {\n    if ((field_info.flags & SchemaField::NOINDEX) > 0)\n      continue;\n\n    switch (field_info.type) {\n      case SchemaField::TEXT: {\n        const auto& tparams = std::get<SchemaField::TextParams>(field_info.special_params);\n        indices_[field_ident] =\n            make_unique<TextIndex>(mr, &options_.stopwords, synonyms_, tparams.with_suffixtrie);\n        break;\n      }\n      case SchemaField::NUMERIC: {\n        const auto& nparams = std::get<SchemaField::NumericParams>(field_info.special_params);\n        indices_[field_ident] = make_unique<NumericIndex>(nparams.block_size, mr);\n        break;\n      }\n      case SchemaField::TAG: {\n        const auto& tparams = std::get<SchemaField::TagParams>(field_info.special_params);\n        indices_[field_ident] = make_unique<TagIndex>(mr, tparams);\n        break;\n      }\n      case SchemaField::VECTOR: {\n        unique_ptr<BaseVectorIndex> vector_index;\n\n        DCHECK(holds_alternative<SchemaField::VectorParams>(field_info.special_params));\n        const auto& vparams = std::get<SchemaField::VectorParams>(field_info.special_params);\n\n        // Use global HNSW index\n        if (vparams.use_hnsw)\n          break;\n\n        vector_index = make_unique<FlatVectorIndex>(vparams, mr);\n        indices_[field_ident] = std::move(vector_index);\n\n        break;\n      }\n      case SchemaField::GEO: {\n        indices_[field_ident] = make_unique<GeoIndex>(mr);\n        break;\n      }\n    }\n  }\n}\n\nvoid FieldIndices::CreateSortIndices() {\n  for (const auto& [field_ident, field_info] : schema_.fields) {\n    if ((field_info.flags & SchemaField::SORTABLE) == 0)\n      continue;\n\n    switch (field_info.type) {\n      case SchemaField::TAG:\n      case SchemaField::TEXT:\n        sort_indices_[field_ident] = make_unique<StringSortIndex>();\n        break;\n      case SchemaField::NUMERIC:\n        sort_indices_[field_ident] = make_unique<NumericSortIndex>();\n        break;\n      case SchemaField::VECTOR:\n      case SchemaField::GEO:\n        break;\n    }\n  }\n}\n\nbool FieldIndices::Add(DocId doc, const DocumentAccessor& access) {\n  bool was_added = true;\n\n  std::vector<std::pair<std::string_view, BaseIndex*>> successfully_added_indices;\n  successfully_added_indices.reserve(indices_.size() + sort_indices_.size());\n\n  auto try_add = [&](const auto& indices_container) {\n    for (auto& [field, index] : indices_container) {\n      if (index->Add(doc, access, field)) {\n        successfully_added_indices.emplace_back(field, index.get());\n      } else {\n        was_added = false;\n        break;\n      }\n    }\n  };\n\n  try_add(indices_);\n\n  if (was_added) {\n    try_add(sort_indices_);\n  }\n\n  if (!was_added) {\n    for (auto& [field, index] : successfully_added_indices) {\n      index->Remove(doc, access, field);\n    }\n    return false;\n  }\n\n  all_ids_.insert(upper_bound(all_ids_.begin(), all_ids_.end(), doc), doc);\n  return true;\n}\n\nvoid FieldIndices::Remove(DocId doc, const DocumentAccessor& access) {\n  for (auto& [field, index] : indices_)\n    index->Remove(doc, access, field);\n  for (auto& [field, sort_index] : sort_indices_)\n    sort_index->Remove(doc, access, field);\n\n  auto it = lower_bound(all_ids_.begin(), all_ids_.end(), doc);\n  DCHECK(it != all_ids_.end() && *it == doc);\n  all_ids_.erase(it);\n}\n\nBaseIndex* FieldIndices::GetIndex(string_view field) const {\n  auto it = indices_.find(schema_.LookupAlias(field));\n  return it != indices_.end() ? it->second.get() : nullptr;\n}\n\nBaseSortIndex* FieldIndices::GetSortIndex(string_view field) const {\n  auto it = sort_indices_.find(schema_.LookupAlias(field));\n  return it != sort_indices_.end() ? it->second.get() : nullptr;\n}\n\nstd::vector<TextIndex*> FieldIndices::GetAllTextIndices() const {\n  vector<TextIndex*> out;\n  for (const auto& [field_name, field_info] : schema_.fields) {\n    if (field_info.type != SchemaField::TEXT || (field_info.flags & SchemaField::NOINDEX) > 0)\n      continue;\n    auto* index = dynamic_cast<TextIndex*>(GetIndex(field_name));\n    DCHECK(index);\n    out.push_back(index);\n  }\n  return out;\n}\n\nconst vector<DocId>& FieldIndices::GetAllDocs() const {\n  return all_ids_;\n}\n\nconst Schema& FieldIndices::GetSchema() const {\n  return schema_;\n}\n\nSortableValue FieldIndices::GetSortIndexValue(DocId doc, std::string_view field_identifier) const {\n  auto it = sort_indices_.find(field_identifier);\n  DCHECK(it != sort_indices_.end());\n  return it->second->Lookup(doc);\n}\n\nvoid FieldIndices::FinalizeInitialization() {\n  for (auto& [field, index] : indices_) {\n    index->FinalizeInitialization();\n  }\n}\n\nDefragmentResult FieldIndices::Defragment(PageUsage* page_usage) {\n  auto defrag = [&](auto& indices, string* key) {\n    DefragmentMap dm{indices, key};\n    return dm.Defragment(page_usage);\n  };\n\n  DefragmentResult result = defrag(indices_, &next_defrag_field_);\n  result.Merge(defrag(sort_indices_, &next_defrag_sort_field_));\n  return result;\n}\n\nconst Synonyms* FieldIndices::GetSynonyms() const {\n  return synonyms_;\n}\n\nSearchAlgorithm::SearchAlgorithm() = default;\nSearchAlgorithm::~SearchAlgorithm() = default;\n\nbool SearchAlgorithm::Init(string_view query, const QueryParams* params,\n                           const OptionalFilters* filters) {\n  try {\n    query_ = make_unique<AstExpr>(ParseQuery(query, params, filters));\n  } catch (const Parser::syntax_error& se) {\n    LOG(INFO) << \"Failed to parse query \\\"\" << query << \"\\\":\" << se.what();\n    return false;\n  } catch (...) {\n    LOG_EVERY_T(INFO, 10) << \"Unexpected query parser error \\\"\" << query << \"\\\"\";\n    return false;\n  }\n\n  if (holds_alternative<monostate>(*query_)) {\n    LOG_EVERY_T(INFO, 10) << \"Empty result after parsing query \\\"\" << query << \"\\\"\";\n    return false;\n  }\n\n  return true;\n}\n\nSearchResult SearchAlgorithm::Search(const FieldIndices* index, size_t cuttoff_limit) const {\n  DCHECK(query_);\n\n  auto bs = BasicSearch{index};\n  if (profiling_enabled_)\n    bs.EnableProfiling();\n  return bs.Search(*query_, cuttoff_limit);\n}\n\nstd::optional<KnnScoreSortOption> SearchAlgorithm::GetKnnScoreSortOption() const {\n  // HNSW KNN query\n  if (knn_hnsw_score_sort_option_) {\n    return knn_hnsw_score_sort_option_;\n  }\n\n  // FLAT KNN query\n  if (auto* knn = get_if<AstKnnNode>(query_.get()); knn)\n    return KnnScoreSortOption{string_view{knn->score_alias}, knn->limit};\n\n  return nullopt;\n}\n\nbool SearchAlgorithm::IsKnnQuery() const {\n  DCHECK(query_);\n  return std::holds_alternative<AstKnnNode>(*query_);\n}\n\nAstKnnNode* SearchAlgorithm::GetKnnNode() const {\n  if (auto* knn = get_if<AstKnnNode>(query_.get()); knn) {\n    return knn;\n  }\n  return nullptr;\n}\n\nstd::unique_ptr<AstNode> SearchAlgorithm::PopKnnNode() {\n  if (auto* knn = get_if<AstKnnNode>(query_.get()); knn) {\n    // Save knn score sort option\n    knn_hnsw_score_sort_option_ = KnnScoreSortOption{string_view{knn->score_alias}, knn->limit};\n    auto node = std::move(query_);\n    AstKnnNode* moved_knn_node = reinterpret_cast<AstKnnNode*>(node.get());\n    if (!std::holds_alternative<AstStarNode>(*moved_knn_node->filter))\n      query_.swap(moved_knn_node->filter);\n    return node;\n  }\n  LOG(DFATAL) << \"Should not reach here\";\n  return nullptr;\n}\n\nvoid SearchAlgorithm::EnableProfiling() {\n  profiling_enabled_ = true;\n}\n\nconst AstVectorRangeNode* SearchAlgorithm::GetVectorRangeNode() const {\n  return get_if<AstVectorRangeNode>(query_.get());\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/search.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n\n#include <memory>\n#include <optional>\n#include <string>\n#include <variant>\n\n#include \"base/pmr/memory_resource.h\"\n#include \"core/search/base.h\"\n#include \"core/search/range_tree.h\"\n#include \"core/search/synonyms.h\"\n\nnamespace dfly::search {\n\nstruct AstNode;\nstruct TextIndex;\nstruct AstKnnNode;\nstruct AstVectorRangeNode;\n\n// Optional FILTER\nstruct OptionalNumericFilter : public OptionalFilterBase {\n  OptionalNumericFilter(size_t lo, size_t hi) : empty_(false), lo_(lo), hi_(hi) {\n  }\n\n  bool IsEmpty() const override {\n    return empty_;\n  }\n\n  AstNode Node(std::string field) override;\n\n  void AddRange(size_t lo, size_t hi) {\n    if (empty_) {\n      return;\n    }\n    if ((hi_ < lo) || (hi < lo_)) {\n      empty_ = true;\n    } else {\n      lo_ = std::max(lo_, lo);\n      hi_ = std::min(hi_, hi);\n    }\n  }\n\n private:\n  bool empty_;\n  size_t lo_;\n  size_t hi_;\n};\n\n// Describes a specific index field\nstruct SchemaField {\n  enum FieldType { TAG, TEXT, NUMERIC, VECTOR, GEO };\n  enum FieldFlags : uint8_t { NOINDEX = 1 << 0, SORTABLE = 1 << 1 };\n\n  struct VectorParams {\n    bool use_hnsw = false;\n\n    size_t dim = 0u;                              // dimension of knn vectors\n    VectorSimilarity sim = VectorSimilarity::L2;  // similarity type\n    size_t capacity = 1000;                       // initial capacity\n    size_t hnsw_ef_construction = 200;\n    size_t hnsw_m = 16;\n  };\n\n  struct TagParams {\n    char separator = ',';\n    bool case_sensitive = false;\n    bool with_suffixtrie = false;  // see TextParams\n  };\n\n  struct TextParams {\n    // if enabled, suffix trie is build for efficient suffix and infix queries\n    bool with_suffixtrie = false;\n  };\n\n  struct NumericParams {\n    // Block size of the range tree\n    // Check RangeTree for details.\n    size_t block_size = RangeTree::kDefaultMaxRangeBlockSize;\n  };\n\n  bool IsIndexableHnswField() const {\n    return type == VECTOR && !(flags & NOINDEX) && std::get<VectorParams>(special_params).use_hnsw;\n  }\n\n  using ParamsVariant =\n      std::variant<std::monostate, VectorParams, TagParams, TextParams, NumericParams>;\n\n  FieldType type;\n  uint8_t flags;\n  std::string short_name;  // equal to ident if none provided\n  ParamsVariant special_params{std::monostate{}};\n};\n\n// Describes the fields of an index\nstruct Schema {\n  // List of fields by identifier.\n  absl::flat_hash_map<std::string /*identifier*/, SchemaField> fields;\n\n  // Mapping for short field names (aliases).\n  absl::flat_hash_map<std::string /* short name*/, std::string /*identifier*/> field_names;\n\n  // Return identifier for alias if found, otherwise return passed value\n  std::string_view LookupAlias(std::string_view alias) const;\n\n  // Return alias for identifier if found, otherwise return passed value\n  std::string_view LookupIdentifier(std::string_view identifier) const;\n};\n\nstruct IndicesOptions {\n  IndicesOptions();\n  explicit IndicesOptions(absl::flat_hash_set<std::string> stopwords)\n      : stopwords{std::move(stopwords)} {\n  }\n\n  absl::flat_hash_set<std::string> stopwords;\n};\n\n// Collection of indices for all fields in schema\nclass FieldIndices {\n public:\n  // Create indices based on schema and options. Both must outlive the indices\n  FieldIndices(const Schema& schema, const IndicesOptions& options, PMR_NS::memory_resource* mr,\n               const Synonyms* synonyms);\n\n  // Returns true if document was added\n  bool Add(DocId doc, const DocumentAccessor& access);\n  void Remove(DocId doc, const DocumentAccessor& access);\n\n  BaseIndex* GetIndex(std::string_view field) const;\n  BaseSortIndex* GetSortIndex(std::string_view field) const;\n  std::vector<TextIndex*> GetAllTextIndices() const;\n\n  const std::vector<DocId>& GetAllDocs() const;\n  const Schema& GetSchema() const;\n\n  const Synonyms* GetSynonyms() const;\n\n  SortableValue GetSortIndexValue(DocId doc, std::string_view field_identifier) const;\n\n  void FinalizeInitialization();\n\n  DefragmentResult Defragment(PageUsage* page_usage);\n\n private:\n  void CreateIndices(PMR_NS::memory_resource* mr);\n  void CreateSortIndices();\n\n  const Schema& schema_;\n  const IndicesOptions& options_;\n  std::vector<DocId> all_ids_;\n  absl::flat_hash_map<std::string_view, std::unique_ptr<BaseIndex>> indices_;\n  absl::flat_hash_map<std::string_view, std::unique_ptr<BaseSortIndex>> sort_indices_;\n  const Synonyms* synonyms_;\n\n  std::string next_defrag_field_;\n  std::string next_defrag_sort_field_;\n};\n\nstruct AlgorithmProfile {\n  struct ProfileEvent {\n    std::string descr;\n    size_t micros;         // time event took in microseconds\n    size_t depth;          // tree depth of event\n    size_t num_processed;  // number of results processed by the event\n  };\n\n  std::vector<ProfileEvent> events;\n};\n\n// Represents a search result returned from the search algorithm.\nstruct SearchResult {\n  size_t total;  // how many documents were matched in total\n\n  // The ids of the matched documents\n  std::vector<DocId> ids;\n\n  // Contains final scores if an aggregation was present\n  std::vector<std::pair<DocId, float>> knn_scores;\n\n  // If profiling was enabled\n  std::optional<AlgorithmProfile> profile;\n\n  // If an error occurred, last recent one\n  std::string error;\n};\n\nstruct KnnScoreSortOption {\n  std::string_view score_field_alias;\n  size_t limit = std::numeric_limits<size_t>::max();\n};\n\n// SearchAlgorithm allows searching field indices with a query\nclass SearchAlgorithm {\n public:\n  SearchAlgorithm();\n  ~SearchAlgorithm();\n\n  // Init with query and optional filters and return true if successful.\n  bool Init(std::string_view query, const QueryParams* params,\n            const OptionalFilters* filters = nullptr);\n\n  // Search on given index with predefined limit for cutting off result ids\n  SearchResult Search(const FieldIndices* index,\n                      size_t cuttoff_limit = std::numeric_limits<size_t>::max()) const;\n\n  std::optional<KnnScoreSortOption> GetKnnScoreSortOption() const;\n\n  bool IsKnnQuery() const;\n\n  AstKnnNode* GetKnnNode() const;\n\n  std::unique_ptr<AstNode> PopKnnNode();\n\n  const AstVectorRangeNode* GetVectorRangeNode() const;\n\n  void EnableProfiling();\n\n private:\n  bool profiling_enabled_ = false;\n  std::unique_ptr<AstNode> query_;\n  std::optional<KnnScoreSortOption> knn_hnsw_score_sort_option_;\n};\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/search_parser_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/search/base.h\"\n#include \"core/search/query_driver.h\"\n#include \"core/search/search.h\"\n\nnamespace dfly::search {\n\nusing namespace std;\n\nclass SearchParserTest : public ::testing::Test {\n protected:\n  SearchParserTest() {\n    query_driver_.scanner()->set_debug(1);\n  }\n\n  void SetInput(const std::string& str) {\n    query_driver_.SetInput(str);\n  }\n\n  Parser::symbol_type Lex() {\n    return query_driver_.Lex();\n  }\n\n  int Parse(const std::string& str) {\n    query_driver_.ResetScanner();\n    query_driver_.SetInput(str);\n\n    return Parser(&query_driver_)();\n  }\n\n  void SetParams(const QueryParams* params) {\n    query_driver_.SetParams(params);\n  }\n\n  QueryDriver query_driver_;\n};\n\n// tokens are not assignable, so we can not reuse them. This macros reduce the boilerplate.\n#define NEXT_EQ(tok_enum, type, val)                    \\\n  {                                                     \\\n    auto tok = Lex();                                   \\\n    ASSERT_EQ(tok.type_get(), Parser::token::tok_enum); \\\n    EXPECT_EQ(val, tok.value.as<type>());               \\\n  }\n\n#define NEXT_TOK(tok_enum)                              \\\n  {                                                     \\\n    auto tok = Lex();                                   \\\n    ASSERT_EQ(tok.type_get(), Parser::token::tok_enum); \\\n  }\n#define NEXT_ERROR()                          \\\n  {                                           \\\n    bool caught = false;                      \\\n    try {                                     \\\n      auto tok = Lex();                       \\\n    } catch (const Parser::syntax_error& e) { \\\n      caught = true;                          \\\n    }                                         \\\n    ASSERT_TRUE(caught);                      \\\n  }\n\nTEST_F(SearchParserTest, Scanner) {\n  SetInput(\"ab cd\");\n  // 3.5.1 does not have name() method.\n  // EXPECT_STREQ(\"term\", tok.name());\n\n  NEXT_EQ(TOK_TERM, string, \"ab\");\n  NEXT_EQ(TOK_TERM, string, \"cd\");\n  NEXT_TOK(TOK_YYEOF);\n\n  SetInput(\"*\");\n  NEXT_TOK(TOK_STAR);\n\n  SetInput(\"(5a 6) \");\n  NEXT_TOK(TOK_LPAREN);\n  NEXT_EQ(TOK_TERM, string, \"5a\");\n  NEXT_EQ(TOK_UINT32, string, \"6\");\n  NEXT_TOK(TOK_RPAREN);\n\n  SetInput(R\"( \"hello\\\"world\" )\");\n  NEXT_EQ(TOK_TERM, string, R\"(hello\"world)\");\n\n  SetInput(\"@field:hello\");\n  NEXT_EQ(TOK_FIELD, string, \"@field\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_EQ(TOK_TERM, string, \"hello\");\n\n  SetInput(\"@field:{ tag }\");\n  NEXT_EQ(TOK_FIELD, string, \"@field\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_TERM, string, \"tag\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  SetInput(\"@color:{blue\\\\,1\\\\\\\\\\\\$\\\\+}\");\n  NEXT_EQ(TOK_FIELD, string, \"@color\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_TAG_VAL, string, R\"(blue,1\\$+)\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  SetInput(\"@color:{blue\\\\.1\\\\\\\"\\\\%\\\\=}\");\n  NEXT_EQ(TOK_FIELD, string, \"@color\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_TAG_VAL, string, \"blue.1\\\"%=\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  SetInput(\"@color:{blue\\\\<1\\\\'\\\\^\\\\~}\");\n  NEXT_EQ(TOK_FIELD, string, \"@color\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_TAG_VAL, string, \"blue<1'^~\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  SetInput(\"@color:{blue\\\\>1\\\\:\\\\&\\\\/}\");\n  NEXT_EQ(TOK_FIELD, string, \"@color\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_TAG_VAL, string, \"blue>1:&/\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  SetInput(\"@color:{blue\\\\{1\\\\;\\\\*\\\\ }\");\n  NEXT_EQ(TOK_FIELD, string, \"@color\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_TAG_VAL, string, \"blue{1;* \");\n  NEXT_TOK(TOK_RCURLBR);\n\n  SetInput(\"@color:{blue\\\\}1\\\\!\\\\(}\");\n  NEXT_EQ(TOK_FIELD, string, \"@color\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_TAG_VAL, string, \"blue}1!(\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  SetInput(\"@color:{blue\\\\[1\\\\@\\\\)}\");\n  NEXT_EQ(TOK_FIELD, string, \"@color\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_TAG_VAL, string, \"blue[1@)\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  SetInput(\"@color:{blue\\\\]1\\\\#\\\\-}\");\n  NEXT_EQ(TOK_FIELD, string, \"@color\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_TAG_VAL, string, \"blue]1#-\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  // Colon in tag value (unescaped)\n  SetInput(\"@t:{Tag:value}\");\n  NEXT_EQ(TOK_FIELD, string, \"@t\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_TAG_VAL, string, \"Tag:value\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  // Prefix simple\n  SetInput(\"pre*\");\n  NEXT_EQ(TOK_PREFIX, string, \"pre\");\n\n  // TODO: uncomment when we support escaped terms\n  // Prefix escaped (redis doesn't support quoted prefix matches)\n  // SetInput(\"pre\\\\**\");\n  // NEXT_EQ(TOK_PREFIX, string, \"pre*\");\n\n  // Prefix in tag\n  SetInput(\"@color:{prefix*}\");\n  NEXT_EQ(TOK_FIELD, string, \"@color\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_PREFIX, string, \"prefix\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  // Prefix escaped star\n  SetInput(\"@color:{\\\"prefix*\\\"}\");\n  NEXT_EQ(TOK_FIELD, string, \"@color\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_TERM, string, \"prefix*\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  // Prefix spaced with star\n  SetInput(\"pre *\");\n  NEXT_EQ(TOK_TERM, string, \"pre\");\n  NEXT_TOK(TOK_STAR);\n\n  SetInput(\"почтальон Печкин\");\n  NEXT_EQ(TOK_TERM, string, \"почтальон\");\n  NEXT_EQ(TOK_TERM, string, \"Печкин\");\n\n  SetInput(\"33.3\");\n  NEXT_EQ(TOK_DOUBLE, string, \"33.3\");\n}\n\nTEST_F(SearchParserTest, EscapedTagPrefixes) {\n  SetInput(\"@name:{escape\\\\-err*}\");\n  NEXT_EQ(TOK_FIELD, string, \"@name\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_PREFIX, string, \"escape-err\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  SetInput(\"@name:{escape\\\\+pre*}\");\n  NEXT_EQ(TOK_FIELD, string, \"@name\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_PREFIX, string, \"escape+pre\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  SetInput(\"@name:{escape\\\\.pre*}\");\n  NEXT_EQ(TOK_FIELD, string, \"@name\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_PREFIX, string, \"escape.pre\");\n  NEXT_TOK(TOK_RCURLBR);\n\n  SetInput(\"@name:{complex\\\\-escape\\\\+with\\\\.many\\\\*chars*}\");\n  NEXT_EQ(TOK_FIELD, string, \"@name\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LCURLBR);\n  NEXT_EQ(TOK_PREFIX, string, \"complex-escape+with.many*chars\");\n  NEXT_TOK(TOK_RCURLBR);\n}\n\nTEST_F(SearchParserTest, Parse) {\n  EXPECT_EQ(0, Parse(\" foo bar (baz) \"));\n  EXPECT_EQ(0, Parse(\" -(foo) @foo:bar @ss:[1 2]\"));\n  EXPECT_EQ(0, Parse(\"@foo:{ tag1 | tag2 }\"));\n\n  EXPECT_EQ(0, Parse(\"@foo:{1|2}\"));\n  EXPECT_EQ(0, Parse(\"@foo:{1|2.0|4|3.0}\"));\n  EXPECT_EQ(0, Parse(\"@foo:{1|hello|3.0|world|4}\"));\n\n  EXPECT_EQ(0, Parse(\"@name:{escape\\\\-err*}\"));\n\n  // Parenthesized star - used by LangChain for KNN queries (issue #6342)\n  EXPECT_EQ(0, Parse(\"(*)\"));\n  EXPECT_EQ(0, Parse(\"((*))\"));\n  EXPECT_EQ(0, Parse(\"(((*)))\"));\n\n  // Colon in tag value\n  EXPECT_EQ(0, Parse(\"@t:{Tag:value}\"));\n  EXPECT_EQ(0, Parse(\"@t:{Tag:*}\"));\n  EXPECT_EQ(0, Parse(\"@category:{Product:Electronics}\"));\n\n  EXPECT_EQ(1, Parse(\" -(foo \"));\n  EXPECT_EQ(1, Parse(\" foo:bar \"));\n  EXPECT_EQ(1, Parse(\" @foo:@bar \"));\n  EXPECT_EQ(1, Parse(\" @foo: \"));\n\n  EXPECT_EQ(0, Parse(\"*suffix\"));\n  EXPECT_EQ(0, Parse(\"*infix*\"));\n\n  EXPECT_EQ(1, Parse(\"pre***\"));\n\n  // Geo units\n  EXPECT_EQ(0, Parse(\"@t:{km}\"));\n  EXPECT_EQ(0, Parse(\"@t:{Km|M}\"));\n  EXPECT_EQ(0, Parse(\"@t:{ft|mi}\"));\n  EXPECT_EQ(0, Parse(\"@location:[0.0 0.0 1 m]\"));\n  EXPECT_EQ(0, Parse(\"@location:[0.0 0.0 1 Km]\"));\n  EXPECT_EQ(1, Parse(\"@location:[0.0 0.0 1 yd]\"));\n}\n\nTEST_F(SearchParserTest, ParseParams) {\n  QueryParams params;\n  params[\"k\"] = \"10\";\n  params[\"name\"] = \"alex\";\n  SetParams(&params);\n\n  SetInput(\"$name $k\");\n  NEXT_EQ(TOK_TERM, string, \"alex\");\n  NEXT_EQ(TOK_UINT32, string, \"10\");\n}\n\nTEST_F(SearchParserTest, Quotes) {\n  SetInput(\" \\\"fir  st\\\"  'sec@o@nd' \\\":third:\\\" 'four\\\\\\\"th' \");\n  NEXT_EQ(TOK_TERM, string, \"fir  st\");\n  NEXT_EQ(TOK_TERM, string, \"sec@o@nd\");\n  NEXT_EQ(TOK_TERM, string, \":third:\");\n  NEXT_EQ(TOK_TERM, string, \"four\\\"th\");\n}\n\nTEST_F(SearchParserTest, Numeric) {\n  SetInput(\"11 123123123123 '22'\");\n  NEXT_EQ(TOK_UINT32, string, \"11\");\n  NEXT_EQ(TOK_DOUBLE, string, \"123123123123\");\n  NEXT_EQ(TOK_TERM, string, \"22\");\n}\n\nTEST_F(SearchParserTest, VectorRange) {\n  // Full vector range query tokenization\n  SetInput(\"@vector:[VECTOR_RANGE $radius $vec]=>{$YIELD_DISTANCE_AS: dist}\");\n  NEXT_EQ(TOK_FIELD, string, \"@vector\");\n  NEXT_TOK(TOK_COLON);\n  NEXT_TOK(TOK_LBRACKET);\n  NEXT_TOK(TOK_VECTOR_RANGE);\n}\n\nTEST_F(SearchParserTest, VectorRangeParse) {\n  QueryParams params;\n  params[\"radius\"] = \"1\";\n  // 4 bytes = one float dimension\n  params[\"vec\"] = std::string(4, '\\0');\n  SetParams(&params);\n\n  // Basic syntax parses without error\n  EXPECT_EQ(0, Parse(\"@f:[VECTOR_RANGE $radius $vec]=>{$YIELD_DISTANCE_AS: dist}\"));\n}\n\nTEST_F(SearchParserTest, KNN) {\n  SetInput(\"*=>[KNN 1 @vector field_vec]\");\n  NEXT_TOK(TOK_STAR);\n  NEXT_TOK(TOK_ARROW);\n  NEXT_TOK(TOK_LBRACKET);\n}\n\nTEST_F(SearchParserTest, KNNfull) {\n  SetInput(\"*=>[Knn 1 @vector field_vec EF_Runtime 15 as vec_sort]\");\n  NEXT_TOK(TOK_STAR);\n  NEXT_TOK(TOK_ARROW);\n  NEXT_TOK(TOK_LBRACKET);\n\n  NEXT_TOK(TOK_KNN);\n  NEXT_EQ(TOK_UINT32, string, \"1\");\n  NEXT_TOK(TOK_FIELD);\n  NEXT_TOK(TOK_TERM);\n\n  NEXT_TOK(TOK_EF_RUNTIME);\n  NEXT_EQ(TOK_UINT32, string, \"15\");\n\n  NEXT_TOK(TOK_AS);\n  NEXT_EQ(TOK_TERM, string, \"vec_sort\");\n\n  NEXT_TOK(TOK_RBRACKET);\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/search_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/search.h\"\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/container/flat_hash_map.h>\n#include <absl/strings/escaping.h>\n#include <absl/strings/numbers.h>\n#include <absl/strings/str_split.h>\n#include <benchmark/benchmark.h>\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n#include <mimalloc.h>\n\n#include <algorithm>\n#include <cmath>\n#include <memory_resource>\n#include <random>\n\n#include \"absl/base/macros.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/search/base.h\"\n#include \"core/search/hnsw_index.h\"\n#include \"core/search/query_driver.h\"\n#include \"core/search/stateless_allocator.h\"\n#include \"core/search/vector_utils.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly {\nnamespace search {\n\nusing namespace std;\n\nusing ::testing::HasSubstr;\n\n// Used for NumericIndex benchmarks.\n// The value is used to determine the maximum size of a range block in the range tree.\nconstexpr size_t kMaxRangeBlockSize = 500000;\n\nstruct MockedDocument : public DocumentAccessor {\n public:\n  using Map = absl::flat_hash_map<std::string, std::string>;\n\n  MockedDocument() = default;\n  MockedDocument(Map map) : fields_{map} {\n  }\n  MockedDocument(std::string test_field) : fields_{{\"field\", test_field}} {\n  }\n\n  std::optional<StringList> GetStrings(string_view field) const override {\n    auto it = fields_.find(field);\n    if (it == fields_.end()) {\n      return EmptyAccessResult<StringList>();\n    }\n    return StringList{string_view{it->second}};\n  }\n\n  std::optional<StringList> GetTags(string_view field) const override {\n    return GetStrings(field);\n  }\n\n  std::optional<VectorInfo> GetVector(string_view field, size_t dim) const override {\n    auto strings_list = GetStrings(field);\n    if (!strings_list)\n      return std::nullopt;\n    return !strings_list->empty() ? BytesToFtVectorSafe(strings_list->front()) : OwnedFtVector{};\n  }\n\n  std::optional<NumsList> GetNumbers(std::string_view field) const override {\n    auto strings_list = GetStrings(field);\n    if (!strings_list)\n      return std::nullopt;\n\n    NumsList nums_list;\n    nums_list.reserve(strings_list->size());\n    for (auto str : strings_list.value()) {\n      auto num = ParseNumericField(str);\n      if (!num) {\n        return std::nullopt;\n      }\n      nums_list.push_back(num.value());\n    }\n    return nums_list;\n  }\n\n  string DebugFormat() {\n    string out = \"{\";\n    for (const auto& [field, value] : fields_)\n      absl::StrAppend(&out, field, \"=\", value, \",\");\n    if (out.size() > 1)\n      out.pop_back();\n    out += \"}\";\n    return out;\n  }\n\n  void Set(Map hset) {\n    fields_ = hset;\n  }\n\n private:\n  Map fields_{};\n};\n\nIndicesOptions kEmptyOptions{{}};\n\nstruct SchemaFieldInitializer {\n  SchemaFieldInitializer(std::string_view name, SchemaField::FieldType type)\n      : name{name}, type{type} {\n    switch (type) {\n      case SchemaField::TAG:\n        special_params = SchemaField::TagParams{};\n        break;\n      case SchemaField::TEXT:\n        special_params = SchemaField::TextParams{};\n        break;\n      case SchemaField::NUMERIC:\n        special_params = SchemaField::NumericParams{};\n        break;\n      case SchemaField::VECTOR:\n        special_params = SchemaField::VectorParams{};\n        break;\n      case SchemaField::GEO:\n        break;\n    }\n  }\n\n  SchemaFieldInitializer(std::string_view name, SchemaField::FieldType type,\n                         SchemaField::ParamsVariant special_params)\n      : name{name}, type{type}, special_params{special_params} {\n  }\n\n  std::string_view name;\n  SchemaField::FieldType type;\n  SchemaField::ParamsVariant special_params{std::monostate{}};\n};\n\nSchema MakeSimpleSchema(initializer_list<SchemaFieldInitializer> ilist,\n                        bool make_sortable = false) {\n  Schema schema;\n  uint8_t flags = make_sortable ? SchemaField::SORTABLE : 0;\n  for (auto ifield : ilist) {\n    auto& field = schema.fields[ifield.name];\n    field = {ifield.type, flags, string{ifield.name}, ifield.special_params};\n  }\n  return schema;\n}\n\nclass SearchTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    auto* tlh = mi_heap_get_backing();\n    init_zmalloc_threadlocal(tlh);\n    // Initialize SimSIMD runtime for tests that may exercise vector kernels\n    InitSimSIMD();\n  }\n\n  SearchTest() {\n    PrepareSchema({{\"field\", SchemaField::TEXT}});\n  }\n\n  ~SearchTest() {\n    EXPECT_EQ(entries_.size(), 0u) << \"Missing check\";\n  }\n\n  void PrepareSchema(initializer_list<SchemaFieldInitializer> ilist) {\n    schema_ = MakeSimpleSchema(ilist);\n  }\n\n  void PrepareQuery(string_view query) {\n    query_ = query;\n  }\n\n  template <typename... Args> void ExpectAll(Args... args) {\n    (entries_.emplace_back(args, true), ...);\n  }\n\n  template <typename... Args> void ExpectNone(Args... args) {\n    (entries_.emplace_back(args, false), ...);\n  }\n\n  bool Check() {\n    absl::Cleanup cl{[this] { entries_.clear(); }};\n\n    FieldIndices index{schema_, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n    shuffle(entries_.begin(), entries_.end(), default_random_engine{});\n    for (DocId i = 0; i < entries_.size(); i++)\n      index.Add(i, entries_[i].first);\n    index.FinalizeInitialization();\n\n    SearchAlgorithm search_algo{};\n    if (!search_algo.Init(query_, &params_)) {\n      error_ = \"Failed to parse query\";\n      return false;\n    }\n\n    auto matched = search_algo.Search(&index);\n\n    if (!is_sorted(matched.ids.begin(), matched.ids.end()))\n      LOG(FATAL) << \"Search result is not sorted\";\n\n    for (DocId i = 0; i < entries_.size(); i++) {\n      bool doc_matched = binary_search(matched.ids.begin(), matched.ids.end(), i);\n      if (doc_matched != entries_[i].second) {\n        error_ = \"doc: \\\"\" + entries_[i].first.DebugFormat() + \"\\\"\" + \" was expected\" +\n                 (entries_[i].second ? \"\" : \" not\") + \" to match\" + \" query: \\\"\" + query_ + \"\\\"\";\n        return false;\n      }\n    }\n\n    return true;\n  }\n\n  string_view GetError() const {\n    return error_;\n  }\n\n private:\n  using DocEntry = pair<MockedDocument, bool /*should_match*/>;\n\n  QueryParams params_;\n  Schema schema_;\n  vector<DocEntry> entries_;\n  string query_, error_;\n};\n\nTEST_F(SearchTest, MatchTerm) {\n  PrepareQuery(\"foo\");\n\n  // Check basic cases\n  ExpectAll(\"foo\", \"foo bar\", \"more foo bar\");\n  ExpectNone(\"wrong\", \"nomatch\");\n\n  // Check part of sentence + case.\n  ExpectAll(\"Foo is cool.\", \"Where is foo?\", \"One. FOO!. More\", \"Foo is foo.\");\n\n  // Check part of word is not matched\n  ExpectNone(\"foocool\", \"veryfoos\", \"ufoo\", \"morefoomore\", \"thefoo\");\n\n  EXPECT_TRUE(Check()) << GetError();\n}\n\nTEST_F(SearchTest, MatchNotTerm) {\n  PrepareQuery(\"-foo\");\n\n  ExpectAll(\"faa\", \"definitielyright\");\n  ExpectNone(\"foo\", \"foo bar\", \"more foo bar\");\n\n  EXPECT_TRUE(Check()) << GetError();\n}\n\nTEST_F(SearchTest, MatchLogicalNode) {\n  {\n    PrepareQuery(\"foo bar\");\n\n    ExpectAll(\"foo bar\", \"bar foo\", \"more bar and foo\");\n    ExpectNone(\"wrong\", \"foo\", \"bar\", \"foob\", \"far\");\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n\n  {\n    PrepareQuery(\"foo | bar\");\n\n    ExpectAll(\"foo bar\", \"foo\", \"bar\", \"foo and more\", \"or only bar\");\n    ExpectNone(\"wrong\", \"only far\");\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n\n  {\n    PrepareQuery(\"foo bar baz\");\n\n    ExpectAll(\"baz bar foo\", \"bar and foo and baz\");\n    ExpectNone(\"wrong\", \"foo baz\", \"bar baz\", \"and foo\");\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n}\n\nTEST_F(SearchTest, MatchParenthesis) {\n  PrepareQuery(\"( foo | oof ) ( bar | rab )\");\n\n  ExpectAll(\"foo bar\", \"oof rab\", \"foo rab\", \"oof bar\", \"foo oof bar rab\");\n  ExpectNone(\"wrong\", \"bar rab\", \"foo oof\");\n\n  EXPECT_TRUE(Check()) << GetError();\n}\n\nTEST_F(SearchTest, CheckNotPriority) {\n  for (auto expr : {\"-bar foo baz\", \"foo -bar baz\", \"foo baz -bar\"}) {\n    PrepareQuery(expr);\n\n    ExpectAll(\"foo baz\", \"foo rab baz\", \"baz rab foo\");\n    ExpectNone(\"wrong\", \"bar\", \"foo bar baz\", \"foo baz bar\");\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n\n  for (auto expr : {\"-bar | foo\", \"foo | -bar\"}) {\n    PrepareQuery(expr);\n\n    ExpectAll(\"foo\", \"right\", \"foo bar\");\n    ExpectNone(\"bar\", \"bar baz\");\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n\n  for (auto expr : {\"-bar far|-foo tam\"}) {\n    PrepareQuery(expr);\n\n    ExpectAll(\"far baz\", \"far foo\", \"bar tam\");\n    ExpectNone(\"bar far\", \"foo tam\", \"bar foo\", \"far bar foo\");\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n}\n\nTEST_F(SearchTest, CheckParenthesisPriority) {\n  {\n    PrepareQuery(\"foo | -(bar baz)\");\n\n    ExpectAll(\"foo\", \"not b/r and b/z\", \"foo bar baz\", \"single bar\", \"only baz\");\n    ExpectNone(\"bar baz\", \"some more bar and baz\");\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n  {\n    PrepareQuery(\"( foo (bar | baz) (rab | zab) ) | true\");\n\n    ExpectAll(\"true\", \"foo bar rab\", \"foo baz zab\", \"foo bar zab\");\n    ExpectNone(\"wrong\", \"foo bar baz\", \"foo rab zab\", \"foo bar what\", \"foo rab foo\");\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n}\n\nTEST_F(SearchTest, CheckPrefix) {\n  {\n    PrepareQuery(\"pre*\");\n\n    ExpectAll(\"pre\", \"prepre\", \"preachers\", \"prepared\", \"pRetty\", \"PRedators\", \"prEcisely!\");\n    ExpectNone(\"pristine\", \"represent\", \"repair\", \"depreciation\");\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n  {\n    PrepareQuery(\"new*\");\n\n    ExpectAll(\"new\", \"New York\", \"Newham\", \"newbie\", \"news\", \"Welcome to Newark!\");\n    ExpectNone(\"ne\", \"renew\", \"nev\", \"ne-w\", \"notnew\", \"casino in neVada\");\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n}\n\nusing Map = MockedDocument::Map;\n\nTEST_F(SearchTest, MatchField) {\n  PrepareSchema({{\"f1\", SchemaField::TEXT}, {\"f2\", SchemaField::TEXT}, {\"f3\", SchemaField::TEXT}});\n  PrepareQuery(\"@f1:foo @f2:bar @f3:baz\");\n\n  ExpectAll(Map{{\"f1\", \"foo\"}, {\"f2\", \"bar\"}, {\"f3\", \"baz\"}});\n  ExpectNone(Map{{\"f1\", \"foo\"}, {\"f2\", \"bar\"}, {\"f3\", \"last is wrong\"}},\n             Map{{\"f1\", \"its\"}, {\"f2\", \"totally\"}, {\"f3\", \"wrong\"}},\n             Map{{\"f1\", \"im foo but its only me and\"}, {\"f2\", \"bar\"}});\n\n  EXPECT_TRUE(Check()) << GetError();\n}\n\nTEST_F(SearchTest, MatchRange) {\n  PrepareSchema({{\"f1\", SchemaField::NUMERIC}, {\"f2\", SchemaField::NUMERIC}});\n  PrepareQuery(\"@f1:[1 10] @f2:[50 100]\");\n\n  ExpectAll(Map{{\"f1\", \"5\"}, {\"f2\", \"50\"}}, Map{{\"f1\", \"1\"}, {\"f2\", \"100\"}},\n            Map{{\"f1\", \"10\"}, {\"f2\", \"50\"}});\n  ExpectNone(Map{{\"f1\", \"11\"}, {\"f2\", \"49\"}}, Map{{\"f1\", \"0\"}, {\"f2\", \"101\"}});\n\n  EXPECT_TRUE(Check()) << GetError();\n}\n\nTEST_F(SearchTest, MatchDoubleRange) {\n  PrepareSchema({{\"f1\", SchemaField::NUMERIC}});\n\n  {\n    PrepareQuery(\"@f1: [100.03 199.97]\");\n\n    ExpectAll(Map{{\"f1\", \"130\"}}, Map{{\"f1\", \"170\"}}, Map{{\"f1\", \"100.03\"}}, Map{{\"f1\", \"199.97\"}});\n\n    ExpectNone(Map{{\"f1\", \"0\"}}, Map{{\"f1\", \"200\"}}, Map{{\"f1\", \"100.02999\"}},\n               Map{{\"f1\", \"199.9700001\"}});\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n\n  {\n    PrepareQuery(\"@f1: [(100 (199.9]\");\n\n    ExpectAll(Map{{\"f1\", \"150\"}}, Map{{\"f1\", \"100.00001\"}}, Map{{\"f1\", \"199.8999999\"}});\n\n    ExpectNone(Map{{\"f1\", \"50\"}}, Map{{\"f1\", \"100\"}}, Map{{\"f1\", \"199.9\"}}, Map{{\"f1\", \"200\"}});\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n}\n\nTEST_F(SearchTest, MatchStar) {\n  PrepareQuery(\"*\");\n  ExpectAll(\"one\", \"two\", \"three\", \"and\", \"all\", \"documents\");\n  EXPECT_TRUE(Check()) << GetError();\n}\n\nTEST_F(SearchTest, CheckExprInField) {\n  PrepareSchema({{\"f1\", SchemaField::TEXT}, {\"f2\", SchemaField::TEXT}, {\"f3\", SchemaField::TEXT}});\n  {\n    PrepareQuery(\"@f1:(a|b) @f2:(c d) @f3:-e\");\n\n    ExpectAll(Map{{\"f1\", \"a\"}, {\"f2\", \"c and d\"}, {\"f3\", \"right\"}},\n              Map{{\"f1\", \"b\"}, {\"f2\", \"d and c\"}, {\"f3\", \"ok\"}});\n    ExpectNone(Map{{\"f1\", \"none\"}, {\"f2\", \"only d\"}, {\"f3\", \"ok\"}},\n               Map{{\"f1\", \"b\"}, {\"f2\", \"d and c\"}, {\"f3\", \"it has an e\"}});\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n  {\n    PrepareQuery({\"@f1:(a (b | c) -(d | e)) @f2:-(a|b)\"});\n\n    ExpectAll(Map{{\"f1\", \"a b w\"}, {\"f2\", \"c\"}});\n    ExpectNone(Map{{\"f1\", \"a b d\"}, {\"f2\", \"c\"}}, Map{{\"f1\", \"a b w\"}, {\"f2\", \"a\"}},\n               Map{{\"f1\", \"a w\"}, {\"f2\", \"c\"}});\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n  {\n    PrepareQuery(\"@f1:(-a c|-b d)\");\n\n    ExpectAll(Map{{\"f1\", \"c\"}}, Map{{\"f1\", \"d\"}});\n    ExpectNone(Map{{\"f1\", \"a\"}}, Map{{\"f1\", \"b\"}});\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n}\n\nTEST_F(SearchTest, CheckTag) {\n  PrepareSchema({{\"f1\", SchemaField::TAG}, {\"f2\", SchemaField::TAG}});\n\n  PrepareQuery(\"@f1:{red | blue} @f2:{circle | square}\");\n\n  ExpectAll(Map{{\"f1\", \"red\"}, {\"f2\", \"square\"}}, Map{{\"f1\", \"blue\"}, {\"f2\", \"square\"}},\n            Map{{\"f1\", \"red\"}, {\"f2\", \"circle\"}}, Map{{\"f1\", \"red\"}, {\"f2\", \"circle, square\"}},\n            Map{{\"f1\", \"red\"}, {\"f2\", \"triangle, circle\"}},\n            Map{{\"f1\", \"red, green\"}, {\"f2\", \"square\"}},\n            Map{{\"f1\", \"green, blue\"}, {\"f2\", \"circle\"}});\n  ExpectNone(Map{{\"f1\", \"green\"}, {\"f2\", \"square\"}}, Map{{\"f1\", \"green\"}, {\"f2\", \"circle\"}},\n             Map{{\"f1\", \"red\"}, {\"f2\", \"triangle\"}}, Map{{\"f1\", \"blue\"}, {\"f2\", \"line, triangle\"}});\n\n  EXPECT_TRUE(Check()) << GetError();\n}\n\nTEST_F(SearchTest, CheckTagPrefix) {\n  PrepareSchema({{\"color\", SchemaField::TAG}});\n  PrepareQuery(\"@color:{green* | orange | yellow*}\");\n\n  ExpectAll(Map{{\"color\", \"green\"}}, Map{{\"color\", \"yellow\"}}, Map{{\"color\", \"greenish\"}},\n            Map{{\"color\", \"yellowish\"}}, Map{{\"color\", \"green-forestish\"}},\n            Map{{\"color\", \"yellowsunish\"}}, Map{{\"color\", \"orange\"}});\n  ExpectNone(Map{{\"color\", \"red\"}}, Map{{\"color\", \"blue\"}}, Map{{\"color\", \"orangeish\"}},\n             Map{{\"color\", \"darkgreen\"}}, Map{{\"color\", \"light-yellow\"}});\n\n  EXPECT_TRUE(Check()) << GetError();\n}\n\nTEST_F(SearchTest, IntegerTerms) {\n  PrepareSchema({{\"status\", SchemaField::TAG}, {\"title\", SchemaField::TEXT}});\n\n  PrepareQuery(\"@status:{1} @title:33\");\n\n  ExpectAll(Map{{\"status\", \"1\"}, {\"title\", \"33 cars on the road\"}});\n  ExpectNone(Map{{\"status\", \"0\"}, {\"title\", \"22 trains on the tracks\"}});\n\n  EXPECT_TRUE(Check()) << GetError();\n}\n\nTEST_F(SearchTest, StopWords) {\n  auto schema = MakeSimpleSchema({{\"title\", SchemaField::TEXT}});\n  IndicesOptions options{{\"some\", \"words\", \"are\", \"left\", \"out\"}};\n\n  FieldIndices indices{schema, options, PMR_NS::get_default_resource(), nullptr};\n  SearchAlgorithm algo{};\n  QueryParams params;\n\n  vector<string> documents = {\"some words left out\",      //\n                              \"some can be found\",        //\n                              \"words are never matched\",  //\n                              \"explicitly found!\"};\n  for (size_t i = 0; i < documents.size(); i++) {\n    MockedDocument doc{{{\"title\", documents[i]}}};\n    indices.Add(i, doc);\n  }\n\n  // words is a stopword\n  algo.Init(\"words\", &params);\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre());\n\n  // some is a stopword\n  algo.Init(\"some\", &params);\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre());\n\n  // found is not a stopword\n  algo.Init(\"found\", &params);\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(1, 3));\n}\n\nclass SearchRaxTest\n    : public SearchTest,\n      public testing::WithParamInterface<pair<bool /* build suffix trie */, bool /* tag index */>> {\n};\n\nTEST_P(SearchRaxTest, SuffixInfix) {\n  auto [with_trie, use_tag] = GetParam();\n  Schema schema = MakeSimpleSchema({{\"title\", use_tag ? SchemaField::TAG : SchemaField::TEXT}});\n  if (use_tag) {\n    schema.fields[\"title\"].special_params = SchemaField::TagParams{.with_suffixtrie = with_trie};\n  } else {\n    schema.fields[\"title\"].special_params = SchemaField::TextParams{.with_suffixtrie = with_trie};\n  }\n\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n  SearchAlgorithm algo{};\n  QueryParams params;\n\n  vector<string> documents = {\"Berries\",     \"BlueBeRRies\", \"Blackberries\", \"APPLES\",\n                              \"CranbeRRies\", \"Wolfberry\",   \"StraWberry\"};\n  for (size_t i = 0; i < documents.size(); i++) {\n    MockedDocument doc{{{\"title\", documents[i]}}};\n    indices.Add(i, doc);\n  }\n\n  auto prepare = [&, use_tag = use_tag](string q) {\n    if (use_tag)\n      q = \"@title:{\"s + q + \"}\"s;\n    algo.Init(q, &params);\n  };\n\n  // suffix queries\n\n  prepare(\"*Es\");\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0, 1, 2, 3, 4));\n\n  prepare(\"*beRRies\");\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0, 1, 2, 4));\n\n  prepare(\"*les\");\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(3));\n\n  prepare(\"*lueBERRies\");\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(1));\n\n  prepare(\"*berrY\");\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(5, 6));\n\n  // infix queries\n\n  prepare(\"*berr*\");\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0, 1, 2, 4, 5, 6));\n\n  prepare(\"*ANB*\");\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(4));\n\n  prepare(\"*berries*\");\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0, 1, 2, 4));\n\n  prepare(\"*bL*\");\n  EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(1, 2));\n}\n\nINSTANTIATE_TEST_SUITE_P(NoTrieText, SearchRaxTest, testing::Values(pair{false, false}));\nINSTANTIATE_TEST_SUITE_P(WithTrieText, SearchRaxTest, testing::Values(pair{true, false}));\nINSTANTIATE_TEST_SUITE_P(NoTrieTag, SearchRaxTest, testing::Values(pair{false, true}));\nINSTANTIATE_TEST_SUITE_P(WithTrieTag, SearchRaxTest, testing::Values(pair{true, true}));\n\nstd::string ToBytes(absl::Span<const float> vec) {\n  return string{reinterpret_cast<const char*>(vec.data()), sizeof(float) * vec.size()};\n}\n\nTEST_F(SearchTest, Errors) {\n  auto schema = MakeSimpleSchema(\n      {{\"score\", SchemaField::NUMERIC}, {\"even\", SchemaField::TAG}, {\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params = SchemaField::VectorParams{false, 1};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n\n  // Non-existent field\n  algo.Init(\"@cantfindme:[1 10]\", &params);\n  EXPECT_THAT(algo.Search(&indices).error, HasSubstr(\"Invalid field\"));\n\n  // Invalid type\n  algo.Init(\"@even:[1 10]\", &params);\n  EXPECT_THAT(algo.Search(&indices).error, HasSubstr(\"Wrong access type\"));\n\n  // Wrong vector index dimensions\n  params[\"vec\"] = ToBytes({1, 2, 3, 4});\n  algo.Init(\"* => [KNN 5 @pos $vec]\", &params);\n  EXPECT_THAT(algo.Search(&indices).error, HasSubstr(\"Wrong vector index dimensions\"));\n}\n\nTEST_F(SearchTest, MatchNumericRangeWithCommas) {\n  PrepareSchema({{\"f1\", SchemaField::NUMERIC}, {\"draw_end\", SchemaField::NUMERIC}});\n\n  // Main tests for point range with identical values and different delimiters\n  {\n    PrepareQuery(\"@draw_end:[1742916180 1742916180]\");\n    ExpectAll(Map{{\"draw_end\", \"1742916180\"}});\n    ExpectNone(Map{{\"draw_end\", \"1742916181\"}}, Map{{\"draw_end\", \"1742916179\"}});\n    EXPECT_TRUE(Check()) << GetError();\n  }\n\n  {\n    PrepareQuery(\"@draw_end:[1742916180, 1742916180]\");\n    ExpectAll(Map{{\"draw_end\", \"1742916180\"}});\n    ExpectNone(Map{{\"draw_end\", \"1742916181\"}}, Map{{\"draw_end\", \"1742916179\"}});\n    EXPECT_TRUE(Check()) << GetError();\n  }\n\n  {\n    PrepareQuery(\"@draw_end:[1742916180 ,1742916180]\");\n    ExpectAll(Map{{\"draw_end\", \"1742916180\"}});\n    ExpectNone(Map{{\"draw_end\", \"1742916181\"}}, Map{{\"draw_end\", \"1742916179\"}});\n    EXPECT_TRUE(Check()) << GetError();\n  }\n\n  {\n    PrepareQuery(\"@draw_end:[1742916180   1742916180]\");\n    ExpectAll(Map{{\"draw_end\", \"1742916180\"}});\n    ExpectNone(Map{{\"draw_end\", \"1742916181\"}}, Map{{\"draw_end\", \"1742916179\"}});\n    EXPECT_TRUE(Check()) << GetError();\n  }\n\n  {\n    PrepareQuery(\"@f1:[100   ,     200]\");\n    ExpectAll(Map{{\"f1\", \"100\"}}, Map{{\"f1\", \"150\"}}, Map{{\"f1\", \"200\"}});\n    ExpectNone(Map{{\"f1\", \"99\"}}, Map{{\"f1\", \"201\"}});\n    EXPECT_TRUE(Check()) << GetError();\n  }\n}\n\nclass KnnTest : public SearchTest {};\n\nclass VectorRangeTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    auto* tlh = mi_heap_get_backing();\n    init_zmalloc_threadlocal(tlh);\n    InitSimSIMD();\n  }\n};\n\nTEST_F(VectorRangeTest, FlatRange1D) {\n  auto schema = MakeSimpleSchema({{\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params = SchemaField::VectorParams{false, 1};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  // Place 10 points on a line: 1, 2, ..., 10 (avoid zero vector for doc 0)\n  for (size_t i = 0; i < 10; i++) {\n    MockedDocument doc{Map{{\"pos\", ToBytes({float(i + 1)})}}};\n    indices.Add(i, doc);\n  }\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n\n  // Query at 5.0 with radius 1.5 → points at pos 4,5,6 → doc ids 3,4,5\n  {\n    params[\"vec\"] = ToBytes({5.0f});\n    algo.Init(\"@pos:[VECTOR_RANGE 1.5 $vec]=>{$YIELD_DISTANCE_AS: dist}\", &params);\n    auto result = algo.Search(&indices);\n    EXPECT_THAT(result.ids, testing::UnorderedElementsAre(3, 4, 5));\n  }\n\n  // Exact match at pos 4.0 with radius 0 → only doc 3\n  {\n    params[\"vec\"] = ToBytes({4.0f});\n    algo.Init(\"@pos:[VECTOR_RANGE 0 $vec]=>{$YIELD_DISTANCE_AS: dist}\", &params);\n    auto result = algo.Search(&indices);\n    EXPECT_THAT(result.ids, testing::UnorderedElementsAre(3));\n  }\n\n  // Large radius → all 10 points\n  {\n    params[\"vec\"] = ToBytes({5.0f});\n    algo.Init(\"@pos:[VECTOR_RANGE 100 $vec]=>{$YIELD_DISTANCE_AS: dist}\", &params);\n    auto result = algo.Search(&indices);\n    EXPECT_EQ(result.ids.size(), 10u);\n  }\n\n  // Empty result when radius is too small\n  {\n    params[\"vec\"] = ToBytes({5.5f});\n    algo.Init(\"@pos:[VECTOR_RANGE 0.1 $vec]=>{$YIELD_DISTANCE_AS: dist}\", &params);\n    auto result = algo.Search(&indices);\n    EXPECT_TRUE(result.ids.empty());\n  }\n}\n\nTEST_F(VectorRangeTest, FlatRangeDistancesStoredInScores) {\n  auto schema = MakeSimpleSchema({{\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params = SchemaField::VectorParams{false, 1};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  // Use i+1 so doc positions are 1..5 (query radius 1.5 from pos 2.0 catches docs 0,1,2)\n  for (size_t i = 0; i < 5; i++) {\n    MockedDocument doc{Map{{\"pos\", ToBytes({float(i + 1)})}}};\n    indices.Add(i, doc);\n  }\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n  params[\"vec\"] = ToBytes({2.0f});\n\n  algo.Init(\"@pos:[VECTOR_RANGE 1.5 $vec]=>{$YIELD_DISTANCE_AS: vector_distance}\", &params);\n  ASSERT_NE(nullptr, algo.GetVectorRangeNode());\n  EXPECT_STREQ(\"vector_distance\", algo.GetVectorRangeNode()->score_alias.c_str());\n\n  auto result = algo.Search(&indices);\n  // Positions 1,2,3 (docs 0,1,2) are within L2 distance 1.5 from query pos 2.0\n  EXPECT_THAT(result.ids, testing::UnorderedElementsAre(0, 1, 2));\n  // knn_scores should contain distances for all matched docs\n  EXPECT_EQ(result.knn_scores.size(), 3u);\n}\n\nTEST_F(VectorRangeTest, FlatStarQueryZeroVectorIsValid) {\n  // Regression: @field:* on a FLAT vector index uses GetAllDocsWithNonNullValues(), which\n  // incorrectly skips zero vectors. The zero vector [0.0,...,0.0] is a valid embedding.\n  auto schema = MakeSimpleSchema({{\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params = SchemaField::VectorParams{false, 2};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  // doc 0: zero vector [0.0, 0.0] — valid embedding, must not be skipped\n  indices.Add(0, MockedDocument{Map{{\"pos\", ToBytes({0.0f, 0.0f})}}});\n  // doc 1: non-zero vector [1.0, 0.0]\n  indices.Add(1, MockedDocument{Map{{\"pos\", ToBytes({1.0f, 0.0f})}}});\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n  algo.Init(\"@pos:*\", &params);\n  auto result = algo.Search(&indices);\n  // Both docs must appear — zero vector is NOT null\n  EXPECT_THAT(result.ids, testing::UnorderedElementsAre(0, 1));\n}\n\nTEST_F(VectorRangeTest, FlatStarQueryRemovedDocNotMatched) {\n  // Regression: @field:* on a FLAT vector index uses GetAllDocsWithNonNullValues(), which\n  // iterates entries_ directly and does NOT respect all_ids_. After Remove(), the doc's\n  // slot in entries_ is still non-zero, so the removed doc incorrectly appears in results.\n  auto schema = MakeSimpleSchema({{\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params = SchemaField::VectorParams{false, 1};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  indices.Add(0, MockedDocument{Map{{\"pos\", ToBytes({1.0f})}}});\n  indices.Add(1, MockedDocument{Map{{\"pos\", ToBytes({2.0f})}}});\n  indices.Add(2, MockedDocument{Map{{\"pos\", ToBytes({3.0f})}}});\n\n  // Remove doc 1\n  MockedDocument doc1{Map{{\"pos\", ToBytes({2.0f})}}};\n  indices.Remove(1, doc1);\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n  algo.Init(\"@pos:*\", &params);\n  auto result = algo.Search(&indices);\n  // Doc 1 was removed, only docs 0 and 2 should appear\n  EXPECT_THAT(result.ids, testing::UnorderedElementsAre(0, 2));\n}\n\nTEST_F(KnnTest, Simple1D) {\n  auto schema = MakeSimpleSchema({{\"even\", SchemaField::TAG}, {\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params = SchemaField::VectorParams{false, 1};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  // Place points on a straight line\n  for (size_t i = 0; i < 100; i++) {\n    Map values{{{\"even\", i % 2 == 0 ? \"YES\" : \"NO\"}, {\"pos\", ToBytes({float(i)})}}};\n    MockedDocument doc{values};\n    indices.Add(i, doc);\n  }\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n\n  // Five closest to 50\n  {\n    params[\"vec\"] = ToBytes({50.0});\n    algo.Init(\"*=>[KNN 5 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(48, 49, 50, 51, 52));\n  }\n\n  // Five closest to 0\n  {\n    params[\"vec\"] = ToBytes({0.0});\n    algo.Init(\"*=>[KNN 5 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0, 1, 2, 3, 4));\n  }\n\n  // Five closest to 20, all even\n  {\n    params[\"vec\"] = ToBytes({20.0});\n    algo.Init(\"@even:{yes} =>[KNN 5 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(16, 18, 20, 22, 24));\n  }\n\n  // Three closest to 31, all odd\n  {\n    params[\"vec\"] = ToBytes({31.0});\n    algo.Init(\"@even:{no} =>[KNN 3 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(29, 31, 33));\n  }\n\n  // Two closest to 70.5\n  {\n    params[\"vec\"] = ToBytes({70.5});\n    algo.Init(\"* =>[KNN 2 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(70, 71));\n  }\n\n  // Two closest to 70.5\n  {\n    params[\"vec\"] = ToBytes({70.5});\n    algo.Init(\"* =>[KNN 2 @pos $vec as vector_distance]\", &params);\n    EXPECT_EQ(\"vector_distance\", algo.GetKnnScoreSortOption()->score_field_alias);\n    SearchResult result = algo.Search(&indices);\n    EXPECT_THAT(result.ids, testing::UnorderedElementsAre(70, 71));\n  }\n}\n\nTEST_F(KnnTest, Simple2D) {\n  // Square:\n  // 3      2\n  //    4\n  // 0      1\n  const pair<float, float> kTestCoords[] = {{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0.5, 0.5}};\n\n  auto schema = MakeSimpleSchema({{\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params = SchemaField::VectorParams{false, 2};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  for (size_t i = 0; i < ABSL_ARRAYSIZE(kTestCoords); i++) {\n    string coords = ToBytes({kTestCoords[i].first, kTestCoords[i].second});\n    MockedDocument doc{Map{{\"pos\", coords}}};\n    indices.Add(i, doc);\n  }\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n\n  // Single center\n  {\n    params[\"vec\"] = ToBytes({0.5, 0.5});\n    algo.Init(\"* =>[KNN 1 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(4));\n  }\n\n  // Lower left\n  {\n    params[\"vec\"] = ToBytes({0, 0});\n    algo.Init(\"* =>[KNN 4 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0, 1, 3, 4));\n  }\n\n  // Upper right\n  {\n    params[\"vec\"] = ToBytes({1, 1});\n    algo.Init(\"* =>[KNN 4 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(1, 2, 3, 4));\n  }\n\n  // Request more than there is\n  {\n    params[\"vec\"] = ToBytes({0, 0});\n    algo.Init(\"* => [KNN 10 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0, 1, 2, 3, 4));\n  }\n\n  // Test correct order: (0.7, 0.15)\n  {\n    params[\"vec\"] = ToBytes({0.7, 0.15});\n    algo.Init(\"* => [KNN 10 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::ElementsAre(1, 4, 0, 2, 3));\n  }\n\n  // Test correct order: (0.8, 0.9)\n  {\n    params[\"vec\"] = ToBytes({0.8, 0.9});\n    algo.Init(\"* => [KNN 10 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::ElementsAre(2, 4, 3, 1, 0));\n  }\n}\n\nTEST_F(KnnTest, Cosine) {\n  // Four arrows, closest cosing distance will be closes by angle\n  // 0 🡢 1 🡣 2 🡠 3 🡡\n  const pair<float, float> kTestCoords[] = {{1, 0}, {0, -1}, {-1, 0}, {0, 1}};\n\n  auto schema = MakeSimpleSchema({{\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params =\n      SchemaField::VectorParams{false, 2, VectorSimilarity::COSINE};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  for (size_t i = 0; i < ABSL_ARRAYSIZE(kTestCoords); i++) {\n    string coords = ToBytes({kTestCoords[i].first, kTestCoords[i].second});\n    MockedDocument doc{Map{{\"pos\", coords}}};\n    indices.Add(i, doc);\n  }\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n\n  // Point down\n  {\n    params[\"vec\"] = ToBytes({-0.1, -10});\n    algo.Init(\"* =>[KNN 1 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(1));\n  }\n\n  // Point left\n  {\n    params[\"vec\"] = ToBytes({-0.1, -0.01});\n    algo.Init(\"* =>[KNN 1 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(2));\n  }\n\n  // Point up\n  {\n    params[\"vec\"] = ToBytes({0, 5});\n    algo.Init(\"* =>[KNN 1 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(3));\n  }\n\n  // Point right\n  {\n    params[\"vec\"] = ToBytes({0.2, 0.05});\n    algo.Init(\"* =>[KNN 1 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0));\n  }\n}\n\nTEST_F(KnnTest, IP) {\n  // Test with normalized unit vectors for IP distance\n  // Using unit vectors pointing in different directions\n  const pair<float, float> kTestCoords[] = {\n      {1.0f, 0.0f}, {0.0f, 1.0f}, {-1.0f, 0.0f}, {0.0f, -1.0f}};\n\n  auto schema = MakeSimpleSchema({{\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params = SchemaField::VectorParams{false, 2, VectorSimilarity::IP};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  for (size_t i = 0; i < ABSL_ARRAYSIZE(kTestCoords); i++) {\n    string coords = ToBytes({kTestCoords[i].first, kTestCoords[i].second});\n    MockedDocument doc{Map{{\"pos\", coords}}};\n    indices.Add(i, doc);\n  }\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n\n  // Query with vector pointing right - should find exact match (highest dot product)\n  {\n    params[\"vec\"] = ToBytes({1.0f, 0.0f});\n    algo.Init(\"* =>[KNN 1 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0));\n  }\n\n  // Query with vector pointing up - should find exact match (highest dot product)\n  {\n    params[\"vec\"] = ToBytes({0.0f, 1.0f});\n    algo.Init(\"* =>[KNN 1 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(1));\n  }\n}\n\nTEST_F(KnnTest, AddRemove) {\n  auto schema = MakeSimpleSchema({{\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params = SchemaField::VectorParams{false, 1, VectorSimilarity::L2};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  vector<MockedDocument> documents(10);\n  for (size_t i = 0; i < 10; i++) {\n    documents[i] = Map{{\"pos\", ToBytes({float(i)})}};\n    indices.Add(i, documents[i]);\n  }\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n\n  // search leftmost 5\n  {\n    params[\"vec\"] = ToBytes({-1.0});\n    algo.Init(\"* =>[KNN 5 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::ElementsAre(0, 1, 2, 3, 4));\n  }\n\n  // delete leftmost 5\n  for (size_t i = 0; i < 5; i++)\n    indices.Remove(i, documents[i]);\n\n  // search leftmost 5 again\n  {\n    params[\"vec\"] = ToBytes({-1.0});\n    algo.Init(\"* =>[KNN 5 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::ElementsAre(5, 6, 7, 8, 9));\n  }\n\n  // add removed elements\n  for (size_t i = 0; i < 5; i++)\n    indices.Add(i, documents[i]);\n\n  // repeat first search\n  {\n    params[\"vec\"] = ToBytes({-1.0});\n    algo.Init(\"* =>[KNN 5 @pos $vec]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::ElementsAre(0, 1, 2, 3, 4));\n  }\n}\n\nTEST_F(KnnTest, AutoResize) {\n  // Make sure index resizes automatically even with a small initial capacity\n  const size_t kInitialCapacity = 5;\n\n  auto schema = MakeSimpleSchema({{\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params =\n      SchemaField::VectorParams{false, 1, VectorSimilarity::L2, kInitialCapacity};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  for (size_t i = 0; i < 100; i++) {\n    MockedDocument doc{Map{{\"pos\", ToBytes({float(i)})}}};\n    indices.Add(i, doc);\n  }\n\n  EXPECT_EQ(indices.GetAllDocs().size(), 100);\n}\n\n// Parameterized HNSW serialization round-trip test.\n// Parameters: {num_elements, dim, similarity}\nstruct HnswSerParam {\n  size_t num_elements;\n  size_t dim;\n  VectorSimilarity sim;\n\n  friend std::ostream& operator<<(std::ostream& os, const HnswSerParam& p) {\n    const char* sim_name[] = {\"L2\", \"IP\", \"COSINE\"};\n    return os << p.num_elements << \"el_\" << p.dim << \"d_\" << sim_name[static_cast<int>(p.sim)];\n  }\n};\n\nclass HnswSerializationTest : public ::testing::TestWithParam<HnswSerParam> {\n protected:\n  void SetUp() override {\n    InitTLSearchMR(PMR_NS::get_default_resource());\n  }\n\n  void TearDown() override {\n    InitTLSearchMR(nullptr);\n  }\n};\n\nTEST_P(HnswSerializationTest, RoundTrip) {\n  const auto [num_elements, dim, sim] = GetParam();\n\n  SchemaField::VectorParams params;\n  params.use_hnsw = true;\n  params.dim = dim;\n  params.sim = sim;\n  params.capacity = std::max<size_t>(num_elements, 10);\n  params.hnsw_m = 16;\n  params.hnsw_ef_construction = 200;\n\n  HnswVectorIndex original(params, /*copy_vector=*/true);\n\n  std::mt19937 rng(42);\n  std::uniform_real_distribution<float> dist(0.0f, 1.0f);\n  vector<MockedDocument> docs(num_elements);\n  for (size_t i = 0; i < num_elements; i++) {\n    vector<float> coords(dim);\n    for (size_t d = 0; d < dim; d++)\n      coords[d] = dist(rng);\n    docs[i] = MockedDocument::Map{{\"vec\", ToBytes(absl::MakeConstSpan(coords))}};\n    original.Add(i, docs[i], \"vec\");\n  }\n\n  // Serialize\n  auto metadata = original.GetMetadata();\n  ASSERT_EQ(metadata.cur_element_count, num_elements);\n\n  std::vector<HnswNodeData> nodes;\n  {\n    auto lock = original.GetReadLock();\n    nodes = original.GetNodesRange(0, metadata.cur_element_count);\n  }\n  ASSERT_EQ(nodes.size(), num_elements);\n\n  // Verify node data integrity\n  for (const auto& node : nodes) {\n    EXPECT_EQ(node.levels_links.size(), static_cast<size_t>(node.level + 1));\n    EXPECT_GT(node.TotalSize(), 0u);\n  }\n\n  // Deserialize into a fresh index\n  HnswVectorIndex restored(params, /*copy_vector=*/true);\n  restored.SetMetadata(metadata);\n  restored.RestoreFromNodes(nodes, metadata);\n\n  // Before UpdateVectorData, all nodes must be marked deleted.\n  // KNN should safely return empty results (no crash from nullptr dereference).\n  if (num_elements > 0) {\n    vector<float> probe(dim, 0.5f);\n    auto pre_results = restored.Knn(probe.data(), 10, std::nullopt);\n    EXPECT_TRUE(pre_results.empty()) << \"All nodes should be deleted before UpdateVectorData\";\n  }\n\n  for (size_t i = 0; i < num_elements; i++)\n    restored.UpdateVectorData(i, docs[i], \"vec\");\n\n  // Metadata must match\n  auto rm = restored.GetMetadata();\n  EXPECT_EQ(rm.cur_element_count, metadata.cur_element_count);\n  EXPECT_EQ(rm.maxlevel, metadata.maxlevel);\n  EXPECT_EQ(rm.enterpoint_node, metadata.enterpoint_node);\n\n  // Graph links must be identical\n  std::vector<HnswNodeData> restored_nodes;\n  {\n    auto lock = restored.GetReadLock();\n    restored_nodes = restored.GetNodesRange(0, rm.cur_element_count);\n  }\n  ASSERT_EQ(restored_nodes.size(), nodes.size());\n  for (size_t i = 0; i < nodes.size(); i++) {\n    EXPECT_EQ(restored_nodes[i].internal_id, nodes[i].internal_id);\n    EXPECT_EQ(restored_nodes[i].global_id, nodes[i].global_id);\n    EXPECT_EQ(restored_nodes[i].level, nodes[i].level);\n    ASSERT_EQ(restored_nodes[i].levels_links.size(), nodes[i].levels_links.size());\n    for (size_t lvl = 0; lvl < nodes[i].levels_links.size(); lvl++)\n      EXPECT_EQ(restored_nodes[i].levels_links[lvl], nodes[i].levels_links[lvl]);\n  }\n\n  if (num_elements == 0)\n    return;\n\n  // KNN results must match for several queries\n  auto compare_knn = [&](vector<float> query, size_t k) {\n    auto orig = original.Knn(query.data(), k, std::nullopt);\n    auto rest = restored.Knn(query.data(), k, std::nullopt);\n    ASSERT_EQ(orig.size(), rest.size());\n    for (size_t j = 0; j < orig.size(); j++) {\n      EXPECT_EQ(orig[j].second, rest[j].second);\n      EXPECT_NEAR(orig[j].first, rest[j].first, 1e-5);\n    }\n  };\n\n  size_t k = std::min<size_t>(num_elements, 10);\n  compare_knn(vector<float>(dim, 0.0f), k);\n  compare_knn(vector<float>(dim, 0.5f), k);\n  compare_knn(vector<float>(dim, 1.0f), k);\n\n  // Filtered KNN must also match\n  vector<GlobalDocId> allowed;\n  for (size_t i = 0; i < num_elements; i += 2)\n    allowed.push_back(i);\n  size_t fk = std::min<size_t>(allowed.size(), 5);\n  vector<float> q(dim, 0.5f);\n  auto orig_f = original.Knn(q.data(), fk, std::nullopt, allowed);\n  auto rest_f = restored.Knn(q.data(), fk, std::nullopt, allowed);\n  ASSERT_EQ(orig_f.size(), rest_f.size());\n  for (size_t i = 0; i < orig_f.size(); i++) {\n    EXPECT_EQ(orig_f[i].second, rest_f[i].second);\n    EXPECT_NEAR(orig_f[i].first, rest_f[i].first, 1e-5);\n  }\n}\n\nINSTANTIATE_TEST_SUITE_P(HnswSer, HnswSerializationTest,\n                         testing::Values(HnswSerParam{0, 2, VectorSimilarity::L2},\n                                         HnswSerParam{10, 2, VectorSimilarity::L2},\n                                         HnswSerParam{1000, 4, VectorSimilarity::L2},\n                                         HnswSerParam{10000, 8, VectorSimilarity::L2},\n                                         HnswSerParam{10, 3, VectorSimilarity::COSINE},\n                                         HnswSerParam{1000, 4, VectorSimilarity::COSINE},\n                                         HnswSerParam{10, 2, VectorSimilarity::IP},\n                                         HnswSerParam{1000, 4, VectorSimilarity::IP}),\n                         [](const testing::TestParamInfo<HnswSerParam>& info) {\n                           std::ostringstream name;\n                           name << info.param;\n                           return name.str();\n                         });\n\n// Test fixture for HNSW deferred operations.\n// Verifies that Add/Remove called while a read lock is held are properly\n// deferred and replayed once the lock is released.\nclass HnswDeferredOpsTest : public ::testing::Test {\n protected:\n  static constexpr size_t kDim = 4;\n  static constexpr size_t kCapacity = 100;\n\n  void SetUp() override {\n    InitTLSearchMR(PMR_NS::get_default_resource());\n\n    SchemaField::VectorParams params;\n    params.use_hnsw = true;\n    params.dim = kDim;\n    params.sim = VectorSimilarity::L2;\n    params.capacity = kCapacity;\n    params.hnsw_m = 16;\n    params.hnsw_ef_construction = 200;\n    index_ = std::make_unique<HnswVectorIndex>(params, /*copy_vector=*/true);\n  }\n\n  void TearDown() override {\n    index_.reset();\n    InitTLSearchMR(nullptr);\n  }\n\n  MockedDocument MakeDoc(std::initializer_list<float> coords) {\n    return MockedDocument::Map{{\"vec\", ToBytes(coords)}};\n  }\n\n  // Helper: run KNN for the zero vector and return the set of found GlobalDocIds.\n  absl::flat_hash_set<GlobalDocId> KnnIds(size_t k) {\n    vector<float> q(kDim, 0.0f);\n    auto results = index_->Knn(q.data(), k, std::nullopt);\n    absl::flat_hash_set<GlobalDocId> ids;\n    for (auto& [dist, id] : results)\n      ids.insert(id);\n    return ids;\n  }\n\n  std::unique_ptr<HnswVectorIndex> index_;\n};\n\nTEST_F(HnswDeferredOpsTest, AddWhileReadLocked) {\n  // Hold a read lock (simulating serialization), then add elements.\n  auto doc0 = MakeDoc({1, 0, 0, 0});\n  auto doc1 = MakeDoc({0, 1, 0, 0});\n\n  {\n    auto lock = index_->GetReadLock();\n\n    // These Adds cannot acquire the write lock and must be deferred.\n    index_->Add(0, doc0, \"vec\");\n    index_->Add(1, doc1, \"vec\");\n\n    // While the read lock is still held, KNN should not find the deferred docs.\n    auto ids = KnnIds(10);\n    EXPECT_TRUE(ids.empty());\n  }\n\n  // After the read lock is released, deferred ops should replay.\n  // The next operation that touches the index triggers ProcessDeferred.\n  auto ids = KnnIds(10);\n  EXPECT_EQ(ids.size(), 2u);\n  EXPECT_TRUE(ids.contains(0));\n  EXPECT_TRUE(ids.contains(1));\n}\n\nTEST_F(HnswDeferredOpsTest, RemoveWhileReadLocked) {\n  // Pre-populate the index.\n  auto doc0 = MakeDoc({1, 0, 0, 0});\n  auto doc1 = MakeDoc({0, 1, 0, 0});\n  auto doc2 = MakeDoc({0, 0, 1, 0});\n  index_->Add(0, doc0, \"vec\");\n  index_->Add(1, doc1, \"vec\");\n  index_->Add(2, doc2, \"vec\");\n\n  {\n    auto lock = index_->GetReadLock();\n\n    // Remove doc1 while read-locked — should be deferred.\n    index_->Remove(1, doc1, \"vec\");\n\n    // doc1 is still visible because the remove is deferred.\n    auto ids = KnnIds(10);\n    EXPECT_EQ(ids.size(), 3u);\n  }\n\n  // After releasing the lock, removal should take effect.\n  auto ids = KnnIds(10);\n  EXPECT_EQ(ids.size(), 2u);\n  EXPECT_TRUE(ids.contains(0));\n  EXPECT_TRUE(ids.contains(2));\n  EXPECT_FALSE(ids.contains(1));\n}\n\nTEST_F(HnswDeferredOpsTest, DuplicateDeferredOpsKeepLatest) {\n  // Pre-populate with doc0.\n  auto doc0 = MakeDoc({1, 0, 0, 0});\n  index_->Add(0, doc0, \"vec\");\n\n  auto doc1 = MakeDoc({0, 1, 0, 0});\n\n  {\n    auto lock = index_->GetReadLock();\n\n    // Add doc1, then remove doc1 — both deferred for the same id.\n    // Only the last operation (remove) should survive.\n    index_->Add(1, doc1, \"vec\");\n    index_->Remove(1, doc1, \"vec\");\n  }\n\n  // After lock release, doc1 should not exist (remove was last).\n  auto ids = KnnIds(10);\n  EXPECT_EQ(ids.size(), 1u);\n  EXPECT_TRUE(ids.contains(0));\n  EXPECT_FALSE(ids.contains(1));\n}\n\nTEST_F(HnswDeferredOpsTest, DuplicateDeferredOpsAddOverridesRemove) {\n  // Pre-populate with doc0 and doc1.\n  auto doc0 = MakeDoc({1, 0, 0, 0});\n  auto doc1 = MakeDoc({0, 1, 0, 0});\n  index_->Add(0, doc0, \"vec\");\n  index_->Add(1, doc1, \"vec\");\n\n  auto doc1_new = MakeDoc({0, 0, 1, 0});\n\n  {\n    auto lock = index_->GetReadLock();\n\n    // Remove doc1, then re-add it with new data — the add should win.\n    index_->Remove(1, doc1, \"vec\");\n    index_->Add(1, doc1_new, \"vec\");\n  }\n\n  // After lock release, doc1 should still be present with updated data.\n  auto ids = KnnIds(10);\n  EXPECT_EQ(ids.size(), 2u);\n  EXPECT_TRUE(ids.contains(0));\n  EXPECT_TRUE(ids.contains(1));\n}\n\n// Verify that Remove without a read lock also works correctly.\nTEST_F(HnswDeferredOpsTest, RemoveWithoutReadLock) {\n  auto doc0 = MakeDoc({1, 0, 0, 0});\n  auto doc1 = MakeDoc({0, 1, 0, 0});\n  index_->Add(0, doc0, \"vec\");\n  index_->Add(1, doc1, \"vec\");\n\n  index_->Remove(1, doc1, \"vec\");\n\n  auto ids = KnnIds(10);\n  EXPECT_EQ(ids.size(), 1u);\n  EXPECT_TRUE(ids.contains(0));\n  EXPECT_FALSE(ids.contains(1));\n}\n\nclass HnswSubsetKnnTest : public ::testing::TestWithParam<VectorSimilarity> {\n protected:\n  void SetUp() override {\n    InitTLSearchMR(PMR_NS::get_default_resource());\n  }\n\n  void TearDown() override {\n    InitTLSearchMR(nullptr);\n  }\n\n  // Helper to create a simple index with vectors on a line for easy verification\n  unique_ptr<HnswVectorIndex> CreateSimple1DIndex(size_t num_elements, VectorSimilarity sim) {\n    SchemaField::VectorParams params;\n    params.use_hnsw = true;\n    params.dim = 1;\n    params.sim = sim;\n    params.capacity = std::max<size_t>(num_elements, 10);\n    params.hnsw_m = 16;\n    params.hnsw_ef_construction = 200;\n\n    auto index = make_unique<HnswVectorIndex>(params, /*copy_vector=*/true);\n\n    for (size_t i = 0; i < num_elements; i++) {\n      vector<float> coords = {static_cast<float>(i)};\n      auto doc = MockedDocument::Map{{\"vec\", ToBytes(absl::MakeConstSpan(coords))}};\n      index->Add(i, MockedDocument(doc), \"vec\");\n    }\n\n    return index;\n  }\n\n  // Helper to create a 2D index with unit-circle vectors, for COSINE similarity testing.\n  // Vector i is placed at angle i * (2π / num_elements), giving meaningful cosine distances.\n  unique_ptr<HnswVectorIndex> CreateCircle2DIndex(size_t num_elements, VectorSimilarity sim) {\n    SchemaField::VectorParams params;\n    params.use_hnsw = true;\n    params.dim = 2;\n    params.sim = sim;\n    params.capacity = std::max<size_t>(num_elements, 10);\n    params.hnsw_m = 16;\n    params.hnsw_ef_construction = 200;\n\n    auto index = make_unique<HnswVectorIndex>(params, /*copy_vector=*/true);\n\n    const float step = 2.0f * static_cast<float>(acos(-1.0)) / static_cast<float>(num_elements);\n    for (size_t i = 0; i < num_elements; i++) {\n      float angle = step * static_cast<float>(i);\n      vector<float> coords = {cosf(angle), sinf(angle)};\n      auto doc = MockedDocument::Map{{\"vec\", ToBytes(absl::MakeConstSpan(coords))}};\n      index->Add(i, MockedDocument(doc), \"vec\");\n    }\n\n    return index;\n  }\n};\n\nTEST_P(HnswSubsetKnnTest, CorrectResults) {\n  // Test that SubsetKnn returns correct top-k from a subset\n  auto sim = GetParam();\n  auto index = CreateSimple1DIndex(100, sim);\n\n  vector<float> query = {50.0f};\n  vector<GlobalDocId> subset;\n\n  // Create subset: only even numbers from 40 to 60\n  for (size_t i = 40; i <= 60; i += 2) {\n    subset.push_back(i);\n  }\n\n  // Ask for top 5\n  auto results = index->SubsetKnn(query.data(), 5, subset);\n\n  // Should get exactly 5 results\n  ASSERT_EQ(results.size(), 5u);\n\n  // All results should be from the subset\n  for (const auto& [dist, id] : results) {\n    EXPECT_TRUE(std::find(subset.begin(), subset.end(), id) != subset.end())\n        << \"Result ID \" << id << \" not in subset\";\n  }\n\n  // For L2 similarity, verify the closest point is 50\n  if (sim == VectorSimilarity::L2) {\n    bool found_50 = false;\n    for (const auto& [dist, id] : results) {\n      if (id == 50) {\n        found_50 = true;\n        break;\n      }\n    }\n    EXPECT_TRUE(found_50) << \"For L2, point 50 should be in top 5 closest to query {50}\";\n  }\n}\n\nTEST_P(HnswSubsetKnnTest, EmptySubset) {\n  // Test edge case: empty subset\n  auto sim = GetParam();\n  auto index = CreateSimple1DIndex(10, sim);\n\n  vector<float> query = {5.0f};\n  vector<GlobalDocId> empty_subset;\n\n  auto results = index->SubsetKnn(query.data(), 5, empty_subset);\n  EXPECT_TRUE(results.empty()) << \"SubsetKnn with empty subset should return empty results\";\n}\n\nTEST_P(HnswSubsetKnnTest, KEqualsZero) {\n  // Test edge case: k = 0\n  auto sim = GetParam();\n  auto index = CreateSimple1DIndex(10, sim);\n\n  vector<float> query = {5.0f};\n  vector<GlobalDocId> subset = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};\n\n  auto results = index->SubsetKnn(query.data(), 0, subset);\n  EXPECT_TRUE(results.empty()) << \"SubsetKnn with k=0 should return empty results\";\n}\n\nTEST_P(HnswSubsetKnnTest, KGreaterThanSubsetSize) {\n  // Test edge case: k > number of valid documents in subset\n  auto sim = GetParam();\n  auto index = CreateSimple1DIndex(10, sim);\n\n  vector<float> query = {5.0f};\n  vector<GlobalDocId> subset = {1, 3, 5};  // Only 3 elements\n\n  auto results = index->SubsetKnn(query.data(), 10, subset);  // Ask for 10\n  EXPECT_EQ(results.size(), 3u) << \"SubsetKnn should return at most subset.size() results\";\n\n  // Verify all 3 are returned\n  vector<GlobalDocId> result_ids;\n  for (const auto& [dist, id] : results) {\n    result_ids.push_back(id);\n  }\n  EXPECT_THAT(result_ids, testing::UnorderedElementsAre(1, 3, 5));\n}\n\nTEST_P(HnswSubsetKnnTest, NonExistentIds) {\n  // Test that non-existent IDs in subset are gracefully ignored\n  auto sim = GetParam();\n  auto index = CreateSimple1DIndex(10, sim);\n\n  vector<float> query = {5.0f};\n  // Mix of valid (0-9) and invalid (100-105) IDs\n  vector<GlobalDocId> subset = {100, 4, 101, 5, 102, 6, 103, 104, 105};\n\n  auto results = index->SubsetKnn(query.data(), 3, subset);\n  EXPECT_EQ(results.size(), 3u);\n\n  // Should only return valid IDs: 5, 4, 6 (closest to 5)\n  vector<GlobalDocId> result_ids;\n  for (const auto& [dist, id] : results) {\n    result_ids.push_back(id);\n  }\n  EXPECT_THAT(result_ids, testing::UnorderedElementsAre(4, 5, 6));\n}\n\nTEST_P(HnswSubsetKnnTest, AllDeletedDocuments) {\n  // Test edge case: all documents in subset are marked deleted\n  auto sim = GetParam();\n\n  SchemaField::VectorParams params;\n  params.use_hnsw = true;\n  params.dim = 1;\n  params.sim = sim;\n  params.capacity = 10;\n  params.hnsw_m = 16;\n  params.hnsw_ef_construction = 200;\n\n  HnswVectorIndex index(params, /*copy_vector=*/true);\n\n  // Add and then remove documents\n  vector<MockedDocument> docs;\n  for (size_t i = 0; i < 5; i++) {\n    vector<float> coords = {static_cast<float>(i)};\n    docs.push_back(\n        MockedDocument(MockedDocument::Map{{\"vec\", ToBytes(absl::MakeConstSpan(coords))}}));\n    index.Add(i, docs[i], \"vec\");\n  }\n\n  // Delete all documents\n  for (size_t i = 0; i < 5; i++) {\n    index.Remove(i, docs[i], \"vec\");\n  }\n\n  vector<float> query = {2.5f};\n  vector<GlobalDocId> subset = {0, 1, 2, 3, 4};\n\n  auto results = index.SubsetKnn(query.data(), 3, subset);\n  EXPECT_TRUE(results.empty()) << \"SubsetKnn should return empty when all docs are deleted\";\n}\n\nTEST_P(HnswSubsetKnnTest, MixedDeletedAndValidDocs) {\n  // Test with a mix of deleted and valid documents\n  auto sim = GetParam();\n\n  SchemaField::VectorParams params;\n  params.use_hnsw = true;\n  params.dim = 1;\n  params.sim = sim;\n  params.capacity = 10;\n  params.hnsw_m = 16;\n  params.hnsw_ef_construction = 200;\n\n  HnswVectorIndex index(params, /*copy_vector=*/true);\n\n  // Add documents\n  vector<MockedDocument> docs;\n  for (size_t i = 0; i < 10; i++) {\n    vector<float> coords = {static_cast<float>(i)};\n    docs.push_back(\n        MockedDocument(MockedDocument::Map{{\"vec\", ToBytes(absl::MakeConstSpan(coords))}}));\n    index.Add(i, docs[i], \"vec\");\n  }\n\n  // Delete even documents\n  for (size_t i = 0; i < 10; i += 2) {\n    index.Remove(i, docs[i], \"vec\");\n  }\n\n  vector<float> query = {5.0f};\n  // Subset includes both deleted (even) and valid (odd) docs\n  vector<GlobalDocId> subset = {2, 3, 4, 5, 6, 7, 8};\n\n  auto results = index.SubsetKnn(query.data(), 3, subset);\n  EXPECT_EQ(results.size(), 3u);\n\n  // Should only return odd (non-deleted) IDs: 5, 3, 7 (closest to 5)\n  vector<GlobalDocId> result_ids;\n  for (const auto& [dist, id] : results) {\n    result_ids.push_back(id);\n  }\n  EXPECT_THAT(result_ids, testing::UnorderedElementsAre(3, 5, 7));\n}\n\nTEST_P(HnswSubsetKnnTest, CompareWithFilteredKnn) {\n  // Integration test: verify SubsetKnn produces similar results to filtered Knn\n  // SubsetKnn uses brute-force exact search, while Knn uses HNSW approximate search\n  // So results may differ slightly, but should have significant overlap\n  constexpr double kMinOverlapRatio = 0.7;  // 70% minimum overlap threshold\n\n  auto sim = GetParam();\n\n  // COSINE similarity is undefined for 1D positive vectors (all share the same direction,\n  // so all cosine distances equal 0). Use 2D unit-circle vectors instead, where element i\n  // is at angle i * 2π/100, giving each pair a distinct, meaningful cosine distance.\n  unique_ptr<HnswVectorIndex> index;\n  vector<float> query;\n  if (sim == VectorSimilarity::COSINE) {\n    constexpr size_t kNumElements = 100;\n    index = CreateCircle2DIndex(kNumElements, sim);\n    const float step = 2.0f * static_cast<float>(acos(-1.0)) / static_cast<float>(kNumElements);\n    float angle = step * 50.0f;\n    query = {cosf(angle), sinf(angle)};\n  } else {\n    index = CreateSimple1DIndex(100, sim);\n    query = {50.0f};\n  }\n\n  vector<GlobalDocId> subset;\n\n  // Create a small subset (well below typical 8192 threshold)\n  for (size_t i = 40; i <= 60; i++) {\n    subset.push_back(i);\n  }\n\n  size_t k = 10;\n\n  // Get results from SubsetKnn (exact brute-force)\n  auto subset_results = index->SubsetKnn(query.data(), k, subset);\n\n  // Get results from regular filtered Knn (HNSW approximate)\n  auto knn_results = index->Knn(query.data(), k, std::nullopt, subset);\n\n  // Both should return k results (or fewer if subset is smaller)\n  EXPECT_LE(subset_results.size(), k);\n  EXPECT_LE(knn_results.size(), k);\n\n  // Extract IDs from both\n  std::set<GlobalDocId> subset_ids;\n  for (const auto& [dist, id] : subset_results) {\n    subset_ids.insert(id);\n  }\n\n  std::set<GlobalDocId> knn_ids;\n  for (const auto& [dist, id] : knn_results) {\n    knn_ids.insert(id);\n  }\n\n  // Count overlap - since HNSW is approximate, we expect good but not perfect overlap\n  size_t overlap = 0;\n  for (const auto& id : subset_ids) {\n    if (knn_ids.count(id) > 0) {\n      overlap++;\n    }\n  }\n\n  // Expect at least kMinOverlapRatio overlap (HNSW is approximate, so some difference is expected)\n  size_t min_overlap =\n      static_cast<size_t>(std::min(subset_ids.size(), knn_ids.size()) * kMinOverlapRatio);\n  EXPECT_GE(overlap, min_overlap) << \"Expected at least \" << min_overlap\n                                  << \" overlapping results, got \" << overlap;\n}\n\nINSTANTIATE_TEST_SUITE_P(SubsetKnnSimilarities, HnswSubsetKnnTest,\n                         testing::Values(VectorSimilarity::L2, VectorSimilarity::COSINE,\n                                         VectorSimilarity::IP),\n                         [](const testing::TestParamInfo<VectorSimilarity>& info) {\n                           switch (info.param) {\n                             case VectorSimilarity::L2:\n                               return \"L2\";\n                             case VectorSimilarity::COSINE:\n                               return \"COSINE\";\n                             case VectorSimilarity::IP:\n                               return \"IP\";\n                             default:\n                               return \"Unknown\";\n                           }\n                         });\n\n// Tests for HnswVectorIndex::RangeQuery\nclass HnswRangeQueryTest : public ::testing::TestWithParam<VectorSimilarity> {\n protected:\n  void SetUp() override {\n    InitTLSearchMR(PMR_NS::get_default_resource());\n  }\n\n  void TearDown() override {\n    InitTLSearchMR(nullptr);\n  }\n\n  // 1-D index: doc i has vector {float(i)}, GlobalDocId = i\n  unique_ptr<HnswVectorIndex> CreateSimple1DIndex(size_t num_elements) {\n    SchemaField::VectorParams params;\n    params.use_hnsw = true;\n    params.dim = 1;\n    params.sim = VectorSimilarity::L2;\n    params.capacity = std::max<size_t>(num_elements, 10);\n    params.hnsw_m = 16;\n    params.hnsw_ef_construction = 200;\n\n    auto index = make_unique<HnswVectorIndex>(params, /*copy_vector=*/true);\n    for (size_t i = 0; i < num_elements; i++) {\n      vector<float> coords = {static_cast<float>(i)};\n      index->Add(i,\n                 MockedDocument(MockedDocument::Map{{\"vec\", ToBytes(absl::MakeConstSpan(coords))}}),\n                 \"vec\");\n    }\n    return index;\n  }\n};\n\nTEST_P(HnswRangeQueryTest, BasicRange) {\n  // 10 docs at positions 0..9. Query at 5.0 with radius 1.5 → docs 4,5,6 (dist 1.0,0.0,1.0)\n  (void)GetParam();  // L2 only for 1-D\n  auto index = CreateSimple1DIndex(10);\n\n  vector<float> query = {5.0f};\n  auto results = index->RangeQuery(query.data(), 1.5f);\n\n  set<GlobalDocId> ids;\n  for (const auto& [dist, id] : results)\n    ids.insert(id);\n\n  EXPECT_THAT(ids, testing::UnorderedElementsAre(4, 5, 6));\n}\n\nTEST_P(HnswRangeQueryTest, ExactMatch) {\n  // Radius 0: only the doc at exact position\n  (void)GetParam();\n  auto index = CreateSimple1DIndex(10);\n\n  vector<float> query = {3.0f};\n  auto results = index->RangeQuery(query.data(), 0.0f);\n\n  ASSERT_EQ(results.size(), 1u);\n  EXPECT_EQ(results[0].second, GlobalDocId{3});\n  EXPECT_FLOAT_EQ(results[0].first, 0.0f);\n}\n\nTEST_P(HnswRangeQueryTest, LargeRadiusReturnsAll) {\n  (void)GetParam();\n  auto index = CreateSimple1DIndex(20);\n\n  vector<float> query = {10.0f};\n  auto results = index->RangeQuery(query.data(), 1000.0f);\n\n  EXPECT_EQ(results.size(), 20u);\n}\n\nTEST_P(HnswRangeQueryTest, EmptyResultOutsideRadius) {\n  (void)GetParam();\n  auto index = CreateSimple1DIndex(10);\n\n  vector<float> query = {5.5f};\n  auto results = index->RangeQuery(query.data(), 0.1f);\n\n  EXPECT_TRUE(results.empty());\n}\n\nTEST_P(HnswRangeQueryTest, EmptyIndex) {\n  (void)GetParam();\n  auto index = CreateSimple1DIndex(0);\n\n  vector<float> query = {0.0f};\n  auto results = index->RangeQuery(query.data(), 100.0f);\n\n  EXPECT_TRUE(results.empty());\n}\n\nTEST_P(HnswRangeQueryTest, DistancesCorrect) {\n  // Verify returned distances match actual L2 distances\n  (void)GetParam();\n  auto index = CreateSimple1DIndex(10);\n\n  vector<float> query = {5.0f};\n  auto results = index->RangeQuery(query.data(), 2.0f);  // docs 3,4,5,6,7\n\n  EXPECT_EQ(results.size(), 5u);\n  for (const auto& [dist, id] : results) {\n    float expected = std::abs(static_cast<float>(id) - 5.0f);\n    // L2Distance returns sqrt(sum of squares); for 1-D: sqrt((a-b)²) = |a-b|\n    EXPECT_FLOAT_EQ(dist, expected);\n  }\n}\n\nTEST_P(HnswRangeQueryTest, DeletedDocNotReturned) {\n  (void)GetParam();\n  auto index = CreateSimple1DIndex(10);\n\n  // Remove doc 5 (at position 5.0, distance 0 from query)\n  index->Remove(5);\n\n  vector<float> query = {5.0f};\n  auto results = index->RangeQuery(query.data(), 1.5f);\n\n  set<GlobalDocId> ids;\n  for (const auto& [dist, id] : results)\n    ids.insert(id);\n\n  EXPECT_THAT(ids, testing::UnorderedElementsAre(4, 6));\n  EXPECT_THAT(ids, testing::Not(testing::Contains(GlobalDocId{5})));\n}\n\nTEST_P(HnswRangeQueryTest, ConsistentWithBruteForce) {\n  // Compare RangeQuery results against brute-force SubsetKnn-based check\n  (void)GetParam();\n  const size_t n = 50;\n  auto index = CreateSimple1DIndex(n);\n\n  vector<float> query = {25.0f};\n  float radius = 5.0f;\n\n  auto results = index->RangeQuery(query.data(), radius);\n\n  // Brute force: collect all docs within radius.\n  // L2Distance returns |a-b| for 1-D vectors (actual Euclidean, not squared).\n  set<GlobalDocId> expected;\n  for (size_t i = 0; i < n; i++) {\n    float dist = std::abs(static_cast<float>(i) - 25.0f);\n    if (dist <= radius)\n      expected.insert(i);\n  }\n\n  set<GlobalDocId> got;\n  for (const auto& [dist, id] : results)\n    got.insert(id);\n\n  EXPECT_EQ(got, expected);\n}\n\nINSTANTIATE_TEST_SUITE_P(HnswRangeL2, HnswRangeQueryTest, testing::Values(VectorSimilarity::L2),\n                         [](const testing::TestParamInfo<VectorSimilarity>&) { return \"L2\"; });\n\nTEST_F(SearchTest, GeoSearch) {\n  auto schema = MakeSimpleSchema({{\"name\", SchemaField::TEXT}, {\"location\", SchemaField::GEO}});\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  indices.Add(0, MockedDocument(Map{{\"name\", \"Mountain View\"}, {\"location\", \"-122.08, 37.386\"}}));\n  indices.Add(1, MockedDocument(Map{{\"name\", \"Palo Alto\"}, {\"location\", \"-122.143, 37.444\"}}));\n  indices.Add(2, MockedDocument(Map{{\"name\", \"San Jose\"}, {\"location\", \"-121.886, 37.338\"}}));\n  indices.Add(3, MockedDocument(Map{{\"name\", \"San Francisco\"}, {\"location\", \"-122.419, 37.774\"}}));\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n\n  // Search around Mount View 30 miles - San Francisco not included\n  {\n    algo.Init(\"@location:[-122.083 37.386 30 mi]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0, 1, 2));\n  }\n\n  // Search around Mount View 50 miles - all points included\n  {\n    algo.Init(\"@location:[-122.083 37.386 50 mi]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0, 1, 2, 3));\n  }\n\n  // Return all indexes\n  {\n    algo.Init(\"@location:*\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(0, 1, 2, 3));\n  }\n\n  // Search around Mount View 50 miles - all points included and filter on prefix\n  {\n    algo.Init(\"San* @location:[-122.083 37.386 50 mi]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(2, 3));\n  }\n\n  // Add duplicate point of San Francisco and search again to include this point also\n  {\n    indices.Add(4,\n                MockedDocument(Map{{\"name\", \"San Francisco\"}, {\"location\", \"-122.419, 37.774\"}}));\n    algo.Init(\"San* @location:[-122.083 37.386 50 mi]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(2, 3, 4));\n  }\n\n  // Remove first index of San Francisco (id = 3) and search\n  {\n    indices.Remove(\n        3, MockedDocument(Map{{\"name\", \"San Francisco\"}, {\"location\", \"-122.419, 37.774\"}}));\n    algo.Init(\"San* @location:[-122.083 37.386 50 mi]\", &params);\n    EXPECT_THAT(algo.Search(&indices).ids, testing::UnorderedElementsAre(2, 4));\n  }\n}\n\nTEST_F(SearchTest, VectorDistanceBasic) {\n  // Test basic vector distance calculations\n  std::vector<float> vec1 = {1.0f, 2.0f, 3.0f};\n  std::vector<float> vec2 = {4.0f, 5.0f, 6.0f};\n\n  // Test L2 distance\n  float l2_dist = VectorDistance(vec1.data(), vec2.data(), 3, VectorSimilarity::L2);\n  EXPECT_GT(l2_dist, 0.0f);\n  EXPECT_LT(l2_dist, 10.0f);  // Should be reasonable value\n\n  // Test Cosine distance\n  float cos_dist = VectorDistance(vec1.data(), vec2.data(), 3, VectorSimilarity::COSINE);\n  EXPECT_GE(cos_dist, 0.0f);\n  EXPECT_LE(cos_dist, 2.0f);  // Cosine distance range\n\n  // Test IP distance\n  float ip_dist = VectorDistance(vec1.data(), vec2.data(), 3, VectorSimilarity::IP);\n  // IP distance can be negative for non-normalized vectors\n  EXPECT_NE(ip_dist, 0.0f);  // Should be non-zero for different vectors\n\n  // Test identical vectors\n  float l2_same = VectorDistance(vec1.data(), vec1.data(), 3, VectorSimilarity::L2);\n  EXPECT_NEAR(l2_same, 0.0f, 1e-6);\n\n  float cos_same = VectorDistance(vec1.data(), vec1.data(), 3, VectorSimilarity::COSINE);\n  EXPECT_NEAR(cos_same, 0.0f, 1e-6);\n\n  float ip_same = VectorDistance(vec1.data(), vec1.data(), 3, VectorSimilarity::IP);\n  // For identical vectors: IP = 1 - dot_product(v, v) = 1 - ||v||^2\n  // For vec1 = {1, 2, 3}: ||v||^2 = 1 + 4 + 9 = 14, so IP = 1 - 14 = -13\n  EXPECT_LT(ip_same, 0.0f);  // Should be negative for non-normalized vectors\n}\n\nTEST_F(SearchTest, VectorDistanceConsistency) {\n  // Test that results are consistent across multiple calls\n  std::vector<float> vec1 = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f};\n  std::vector<float> vec2 = {0.6f, 0.7f, 0.8f, 0.9f, 1.0f};\n\n  float l2_dist1 = VectorDistance(vec1.data(), vec2.data(), 5, VectorSimilarity::L2);\n  float l2_dist2 = VectorDistance(vec1.data(), vec2.data(), 5, VectorSimilarity::L2);\n  EXPECT_EQ(l2_dist1, l2_dist2);\n\n  float cos_dist1 = VectorDistance(vec1.data(), vec2.data(), 5, VectorSimilarity::COSINE);\n  float cos_dist2 = VectorDistance(vec1.data(), vec2.data(), 5, VectorSimilarity::COSINE);\n  EXPECT_EQ(cos_dist1, cos_dist2);\n\n  float ip_dist1 = VectorDistance(vec1.data(), vec2.data(), 5, VectorSimilarity::IP);\n  float ip_dist2 = VectorDistance(vec1.data(), vec2.data(), 5, VectorSimilarity::IP);\n  EXPECT_EQ(ip_dist1, ip_dist2);\n}\n\nstatic void BM_VectorSearch(benchmark::State& state) {\n  // Ensure SimSIMD dynamic dispatch is initialized for the benchmark\n  InitSimSIMD();\n  unsigned ndims = state.range(0);\n  unsigned nvecs = state.range(1);\n\n  auto schema = MakeSimpleSchema({{\"pos\", SchemaField::VECTOR}});\n  schema.fields[\"pos\"].special_params = SchemaField::VectorParams{false, ndims};\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  auto random_vec = [ndims]() {\n    vector<float> coords;\n    for (size_t j = 0; j < ndims; j++)\n      coords.push_back(static_cast<float>(rand()) / static_cast<float>(RAND_MAX));\n    return coords;\n  };\n\n  for (size_t i = 0; i < nvecs; i++) {\n    auto rv = random_vec();\n    MockedDocument doc{Map{{\"pos\", ToBytes(rv)}}};\n    indices.Add(i, doc);\n  }\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n\n  auto rv = random_vec();\n  params[\"vec\"] = ToBytes(rv);\n  algo.Init(\"* =>[KNN 1 @pos $vec]\", &params);\n\n  while (state.KeepRunningBatch(10)) {\n    for (size_t i = 0; i < 10; i++)\n      benchmark::DoNotOptimize(algo.Search(&indices));\n  }\n}\n\nBENCHMARK(BM_VectorSearch)->Args({120, 10'000});\n\nTEST_F(SearchTest, MatchNonNullField) {\n  PrepareSchema({{\"text_field\", SchemaField::TEXT},\n                 {\"tag_field\", SchemaField::TAG},\n                 {\"num_field\", SchemaField::NUMERIC}});\n\n  {\n    PrepareQuery(\"@text_field:*\");\n\n    ExpectAll(Map{{\"text_field\", \"any value\"}}, Map{{\"text_field\", \"another value\"}},\n              Map{{\"text_field\", \"third\"}, {\"tag_field\", \"tag1\"}});\n\n    ExpectNone(Map{{\"tag_field\", \"wrong field\"}}, Map{{\"num_field\", \"123\"}}, Map{});\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n\n  {\n    PrepareQuery(\"@tag_field:*\");\n\n    ExpectAll(Map{{\"tag_field\", \"tag1\"}}, Map{{\"tag_field\", \"tag2\"}},\n              Map{{\"text_field\", \"value\"}, {\"tag_field\", \"tag3\"}});\n\n    ExpectNone(Map{{\"text_field\", \"wrong field\"}}, Map{{\"num_field\", \"456\"}}, Map{});\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n\n  {\n    PrepareQuery(\"@num_field:*\");\n\n    ExpectAll(Map{{\"num_field\", \"123\"}}, Map{{\"num_field\", \"456\"}},\n              Map{{\"text_field\", \"value\"}, {\"num_field\", \"789\"}});\n\n    ExpectNone(Map{{\"text_field\", \"wrong field\"}}, Map{{\"tag_field\", \"tag1\"}}, Map{});\n\n    EXPECT_TRUE(Check()) << GetError();\n  }\n}\n\nTEST_F(SearchTest, InvalidVectorParameter) {\n  search::Schema schema;\n  schema.fields[\"v\"] = search::SchemaField{\n      search::SchemaField::VECTOR,\n      0,   // flags\n      \"v\"  // short_name\n  };\n\n  search::SchemaField::VectorParams params;\n  params.use_hnsw = true;\n  params.dim = 2;\n  params.sim = search::VectorSimilarity::L2;\n  params.capacity = 10;\n  params.hnsw_m = 16;\n  params.hnsw_ef_construction = 200;\n  schema.fields[\"v\"].special_params = params;\n\n  search::IndicesOptions options;\n  search::FieldIndices indices{schema, options, PMR_NS::get_default_resource(), nullptr};\n\n  search::SearchAlgorithm algo;\n  search::QueryParams query_params;\n\n  query_params[\"b\"] = \"abcdefg\";\n\n  // Parser accepts any string as placeholder\n  // Invalid vectors result in empty vector (dimension 0) which returns empty results\n  ASSERT_TRUE(algo.Init(\"*=>[KNN 2 @v $b]\", &query_params));\n\n  // Search should return empty results for invalid vector\n  auto result = algo.Search(&indices);\n  EXPECT_TRUE(result.ids.empty());\n}\n\nclass SortIndexTest : public testing::Test {\n protected:\n  void SetUp() override {\n    InitTLSearchMR(PMR_NS::get_default_resource());\n  }\n\n  void TearDown() override {\n    InitTLSearchMR(nullptr);\n  }\n};\n\nTEST_F(SortIndexTest, StringSort) {\n  constexpr auto field = \"name\";\n  const auto schema = MakeSimpleSchema({{field, SchemaField::TAG}}, true);\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  indices.Add(0, MockedDocument{Map{{field, \"charlie\"}}});\n  indices.Add(1, MockedDocument{Map{{field, \"alpha\"}}});\n  indices.Add(2, MockedDocument{Map{{field, \"bravo\"}}});\n\n  std::vector<DocId> ids{0, 1, 2};\n  constexpr bool desc = false;\n\n  const auto index = indices.GetSortIndex(field);\n\n  index->Sort(&ids, ids.size(), desc);\n  std::vector<DocId> expected{1, 2, 0};\n  EXPECT_EQ(ids, expected);\n\n  index->Sort(&ids, ids.size(), !desc);\n  expected = {0, 2, 1};\n  EXPECT_EQ(ids, expected);\n\n  // conversion from stateless to normal string\n  auto lookup = index->Lookup(1);\n  EXPECT_TRUE(std::holds_alternative<std::string>(lookup));\n  EXPECT_EQ(std::get<std::string>(lookup), \"alpha\");\n}\n\nTEST_F(SortIndexTest, NumSort) {\n  constexpr auto field = \"cost\";\n  const auto schema = MakeSimpleSchema({{field, SchemaField::NUMERIC}}, true);\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  indices.Add(0, MockedDocument{Map{{field, \"2999\"}}});\n  indices.Add(1, MockedDocument{Map{{field, \"999\"}}});\n  indices.Add(2, MockedDocument{Map{{field, \"12\"}}});\n\n  std::vector<DocId> ids{0, 1, 2};\n  constexpr bool desc = false;\n\n  auto index = indices.GetSortIndex(field);\n  index->Sort(&ids, ids.size(), desc);\n  std::vector<DocId> expected{2, 1, 0};\n  EXPECT_EQ(ids, expected);\n\n  index->Sort(&ids, ids.size(), !desc);\n  expected = {0, 1, 2};\n  EXPECT_EQ(ids, expected);\n\n  auto lookup = index->Lookup(1);\n  EXPECT_TRUE(std::holds_alternative<double>(lookup));\n  EXPECT_EQ(std::get<double>(lookup), 999);\n}\n\n// Enumeration for different search types\nenum class SearchType { PREFIX = 0, SUFFIX = 1, INFIX = 2 };\n\n// Helper function to generate content with ASCII characters\nstatic std::string GenerateWordSequence(size_t word_count, size_t doc_offset = 0) {\n  std::string content;\n  for (size_t i = 0; i < word_count; ++i) {\n    std::string word;\n    char start_char = 'a' + ((doc_offset + i) % 26);\n    size_t word_len = 3 + (i % 5);  // Word length 3-7 chars\n\n    for (size_t j = 0; j < word_len; ++j) {\n      char c = start_char + (j % 26);\n      if (c > 'z')\n        c = 'a' + (c - 'z' - 1);\n      word += c;\n    }\n\n    if (i > 0)\n      content += \" \";\n    content += word;\n  }\n  return content;\n}\n\n// Helper function to generate pattern with variety\nstatic std::string GeneratePattern(SearchType search_type, size_t pattern_len, bool use_uniform) {\n  if (use_uniform) {\n    // Original uniform pattern for comparison\n    switch (search_type) {\n      case SearchType::PREFIX:\n        return std::string(pattern_len, 'p');\n      case SearchType::SUFFIX:\n        return std::string(pattern_len, 's');\n      case SearchType::INFIX:\n        return std::string(pattern_len, 'i');\n    }\n  } else {\n    // Diverse ASCII pattern\n    std::string pattern;\n    char base_char = (search_type == SearchType::PREFIX)   ? 'p'\n                     : (search_type == SearchType::SUFFIX) ? 's'\n                                                           : 'i';\n\n    for (size_t i = 0; i < pattern_len; ++i) {\n      char c = base_char + (i % 10);  // Use variety of chars\n      if (c > 'z')\n        c = 'a' + (c - 'z' - 1);\n      pattern += c;\n    }\n    return pattern;\n  }\n  return \"\";\n}\n\nstatic void BM_SearchByTypeImpl(benchmark::State& state, bool use_diverse_pattern) {\n  size_t num_docs = state.range(0);\n  size_t pattern_len = state.range(1);\n  SearchType search_type = static_cast<SearchType>(state.range(2));\n\n  auto schema = MakeSimpleSchema({{\"title\", SchemaField::TEXT}});\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  // Generate pattern\n  std::string pattern = GeneratePattern(search_type, pattern_len, !use_diverse_pattern);\n  std::string search_type_name = (search_type == SearchType::PREFIX)   ? \"prefix\"\n                                 : (search_type == SearchType::SUFFIX) ? \"suffix\"\n                                                                       : \"infix\";\n\n  // Generate test data with more realistic content\n  for (size_t i = 0; i < num_docs; i++) {\n    std::string content;\n    if (i < num_docs / 2) {\n      // Half documents have the pattern in appropriate position\n      std::string base_content = GenerateWordSequence(5 + (i % 5), i);\n\n      switch (search_type) {\n        case SearchType::PREFIX:\n          content = pattern + base_content;\n          break;\n        case SearchType::SUFFIX:\n          content = base_content + pattern;\n          break;\n        case SearchType::INFIX:\n          // Fix: embed pattern inside a word, not as separate word\n          size_t split_pos = base_content.length() / 2;\n          content = base_content.substr(0, split_pos) + pattern + base_content.substr(split_pos);\n          break;\n      }\n    } else {\n      // Half don't have the pattern - generate different content\n      content = GenerateWordSequence(8 + (i % 3), i + 1000);\n    }\n    MockedDocument doc{Map{{\"title\", content}}};\n    indices.Add(i, doc);\n  }\n\n  SearchAlgorithm algo{};\n  QueryParams params;\n  std::string query;\n\n  // Generate query based on search type\n  switch (search_type) {\n    case SearchType::PREFIX:\n      query = pattern + \"*\";\n      break;\n    case SearchType::SUFFIX:\n      query = \"*\" + pattern;\n      break;\n    case SearchType::INFIX:\n      query = \"*\" + pattern + \"*\";\n      break;\n  }\n\n  if (!algo.Init(query, &params)) {\n    state.SkipWithError(\"Failed to initialize \" + search_type_name + \" search\");\n    return;\n  }\n\n  while (state.KeepRunning()) {\n    auto result = algo.Search(&indices);\n    benchmark::DoNotOptimize(result);\n\n    // If result has error, skip the benchmark\n    if (!result.error.empty()) {\n      state.SkipWithError(search_type_name + \" search returned error: \" + result.error);\n      return;\n    }\n  }\n\n  // Set counters for analysis\n  state.counters[\"docs_total\"] = num_docs;\n  state.counters[\"pattern_length\"] = pattern_len;\n  state.counters[\"diverse_pattern\"] = use_diverse_pattern ? 1 : 0;\n  state.SetLabel(search_type_name + (use_diverse_pattern ? \"_diverse\" : \"_uniform\"));\n}\n\n// Instantiate template functions\nstatic void BM_SearchByType_Uniform(benchmark::State& state) {\n  BM_SearchByTypeImpl(state, false);\n}\n\nstatic void BM_SearchByType_Diverse(benchmark::State& state) {\n  BM_SearchByTypeImpl(state, true);\n}\n\n// Benchmark to compare all search types - removed 100K docs per romange's suggestion\nBENCHMARK(BM_SearchByType_Uniform)\n    // Uniform patterns (original test)\n    ->Args({1000, 3, static_cast<int>(SearchType::PREFIX)})\n    ->Args({1000, 5, static_cast<int>(SearchType::PREFIX)})\n    ->Args({10000, 3, static_cast<int>(SearchType::PREFIX)})\n    ->Args({10000, 5, static_cast<int>(SearchType::PREFIX)})\n    ->Args({1000, 3, static_cast<int>(SearchType::SUFFIX)})\n    ->Args({1000, 5, static_cast<int>(SearchType::SUFFIX)})\n    ->Args({10000, 3, static_cast<int>(SearchType::SUFFIX)})\n    ->Args({10000, 5, static_cast<int>(SearchType::SUFFIX)})\n    ->Args({1000, 3, static_cast<int>(SearchType::INFIX)})\n    ->Args({1000, 5, static_cast<int>(SearchType::INFIX)})\n    ->Args({10000, 3, static_cast<int>(SearchType::INFIX)})\n    ->Args({10000, 5, static_cast<int>(SearchType::INFIX)})\n    ->ArgNames({\"docs\", \"pattern_len\", \"search_type\"})\n    ->Unit(benchmark::kMicrosecond);\n\nBENCHMARK(BM_SearchByType_Diverse)\n    // Diverse patterns (new test with ASCII variety)\n    ->Args({1000, 3, static_cast<int>(SearchType::PREFIX)})\n    ->Args({1000, 5, static_cast<int>(SearchType::PREFIX)})\n    ->Args({10000, 3, static_cast<int>(SearchType::PREFIX)})\n    ->Args({10000, 5, static_cast<int>(SearchType::PREFIX)})\n    ->Args({1000, 3, static_cast<int>(SearchType::SUFFIX)})\n    ->Args({1000, 5, static_cast<int>(SearchType::SUFFIX)})\n    ->Args({10000, 3, static_cast<int>(SearchType::SUFFIX)})\n    ->Args({10000, 5, static_cast<int>(SearchType::SUFFIX)})\n    ->Args({1000, 3, static_cast<int>(SearchType::INFIX)})\n    ->Args({1000, 5, static_cast<int>(SearchType::INFIX)})\n    ->Args({10000, 3, static_cast<int>(SearchType::INFIX)})\n    ->Args({10000, 5, static_cast<int>(SearchType::INFIX)})\n    ->ArgNames({\"docs\", \"pattern_len\", \"search_type\"})\n    ->Unit(benchmark::kMicrosecond);\n\n// Helper function to generate random vector\nstatic std::vector<float> GenerateRandomVector(size_t dims, unsigned seed = 42) {\n  std::mt19937 gen(seed);\n  std::uniform_real_distribution<float> dis(-1.0f, 1.0f);\n\n  std::vector<float> vec(dims);\n  for (size_t i = 0; i < dims; ++i) {\n    vec[i] = dis(gen);\n  }\n  return vec;\n}\n\nstatic void BM_SearchDocIds(benchmark::State& state) {\n  auto schema = MakeSimpleSchema({{\"score\", SchemaField::NUMERIC}, {\"tag\", SchemaField::TAG}});\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  SearchAlgorithm algo;\n  QueryParams params;\n  default_random_engine rnd;\n  const char* tag_vals[] = {\"test\", \"example\", \"sample\", \"demo\", \"demo2\"};\n  uniform_int_distribution<size_t> tag_dist(0, ABSL_ARRAYSIZE(tag_vals) - 1);\n  uniform_int_distribution<size_t> score_dist(0, 100);\n\n  for (size_t i = 0; i < 1000; i++) {\n    MockedDocument doc{\n        Map{{\"score\", std::to_string(score_dist(rnd))}, {\"tag\", tag_vals[tag_dist(rnd)]}}};\n    indices.Add(i, doc);\n  }\n\n  std::string queries[] = {\"@tag:{test} @score:[10 50]\", \"@tag: *\", \"@score:*\"};\n  size_t query_type = state.range(0);\n  CHECK_LT(query_type, ABSL_ARRAYSIZE(queries));\n  CHECK(algo.Init(queries[query_type], &params));\n  while (state.KeepRunning()) {\n    auto result = algo.Search(&indices);\n    CHECK(result.error.empty());\n  }\n}\nBENCHMARK(BM_SearchDocIds)->Range(0, 2);\n\nstatic void BM_SearchNumericIndexes(benchmark::State& state) {\n  auto schema = MakeSimpleSchema({{\"numeric\", SchemaField::NUMERIC,\n                                   SchemaField::NumericParams{.block_size = kMaxRangeBlockSize}}});\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  SearchAlgorithm algo;\n  QueryParams params;\n  default_random_engine rnd;\n\n  using NumericType = long long;\n  uniform_int_distribution<NumericType> dist(std::numeric_limits<NumericType>::min(),\n                                             std::numeric_limits<NumericType>::max());\n\n  const size_t num_docs = state.range(0);\n  for (size_t i = 0; i < num_docs; i++) {\n    MockedDocument doc{Map{{\"numeric\", std::to_string(dist(rnd))}}};\n    indices.Add(i, doc);\n  }\n\n  std::string queries[] = {\"@numeric:[15 +inf]\", \"@numeric:[-inf 20]\", \"@numeric:[-inf +inf]\",\n                           \"@numeric:[0 100000]\"};\n\n  std::unordered_map<size_t, std::vector<size_t>> expected_results_per_num_docs = {\n      {10000, {4982, 5018, 10000, 0}},\n      {100000, {49885, 50115, 100000, 0}},\n      {1000000, {500853, 499147, 1000000, 0}},\n  };\n\n  while (state.KeepRunning()) {\n    for (size_t i = 0; i < ABSL_ARRAYSIZE(queries); ++i) {\n      const auto& query = queries[i];\n\n      CHECK(algo.Init(query, &params));\n      auto result = algo.Search(&indices);\n      CHECK(result.error.empty());\n\n      const size_t expected_result = expected_results_per_num_docs[num_docs][i];\n      CHECK_EQ(result.total, expected_result);\n      CHECK_EQ(result.ids.size(), expected_result);\n    }\n  }\n}\n\nBENCHMARK(BM_SearchNumericIndexes)->Arg(10000)->Arg(100000)->Arg(1000000)->ArgNames({\"num_docs\"});\n\nstatic void BM_SearchNumericIndexesSmallRanges(benchmark::State& state) {\n  auto schema = MakeSimpleSchema({{\"numeric\", SchemaField::NUMERIC,\n                                   SchemaField::NumericParams{.block_size = kMaxRangeBlockSize}}});\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  SearchAlgorithm algo;\n  QueryParams params;\n  default_random_engine rnd;\n\n  using NumericType = uint16_t;\n  uniform_int_distribution<NumericType> dist(0, std::numeric_limits<NumericType>::max());\n\n  const size_t num_docs = state.range(0);\n  // Insert zero values\n  for (size_t i = 0; i < num_docs / 50; i++) {\n    MockedDocument doc{Map{{\"numeric\", \"0\"}}};\n    indices.Add(i, doc);\n  }\n  for (size_t i = num_docs / 50; i < num_docs; i++) {\n    MockedDocument doc{Map{{\"numeric\", std::to_string(dist(rnd))}}};\n    indices.Add(i, doc);\n  }\n\n  std::string queries[] = {\"@numeric:[0 40000]\", \"@numeric:[-inf +inf]\"};\n\n  std::unordered_map<size_t, std::vector<size_t>> expected_results_per_num_docs = {\n      {100000, {61939, 100000}},\n      {1000000, {618365, 1000000}},\n  };\n\n  while (state.KeepRunning()) {\n    for (size_t i = 0; i < ABSL_ARRAYSIZE(queries); ++i) {\n      const auto& query = queries[i];\n\n      CHECK(algo.Init(query, &params));\n      auto result = algo.Search(&indices);\n      CHECK(result.error.empty());\n\n      const size_t expected_result = expected_results_per_num_docs[num_docs][i];\n      CHECK_EQ(result.total, expected_result);\n      CHECK_EQ(result.ids.size(), expected_result);\n    }\n  }\n}\n\nBENCHMARK(BM_SearchNumericIndexesSmallRanges)\n    ->Arg(100000)   // One block\n    ->Arg(1000000)  // Two blocks\n    ->ArgNames({\"num_docs\"});\n\nstatic void BM_SearchTwoNumericIndexes(benchmark::State& state) {\n  auto schema = MakeSimpleSchema({\n      {\"numeric1\", SchemaField::NUMERIC,\n       SchemaField::NumericParams{.block_size = kMaxRangeBlockSize}},\n      {\"numeric2\", SchemaField::NUMERIC,\n       SchemaField::NumericParams{.block_size = kMaxRangeBlockSize}},\n  });\n\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  SearchAlgorithm algo;\n  QueryParams params;\n  std::default_random_engine rnd;\n\n  using NumericType = long long;\n  uniform_int_distribution<NumericType> dist1(std::numeric_limits<NumericType>::min(),\n                                              std::numeric_limits<NumericType>::max());\n  uniform_int_distribution<NumericType> dist2(std::numeric_limits<NumericType>::min(),\n                                              std::numeric_limits<NumericType>::max());\n\n  const size_t num_docs = state.range(0);\n  for (size_t i = 0; i < num_docs; ++i) {\n    MockedDocument doc{Map{\n        {\"numeric1\", std::to_string(dist1(rnd))},\n        {\"numeric2\", std::to_string(dist2(rnd))},\n    }};\n    indices.Add(i, doc);\n  }\n\n  std::string queries[] = {absl::StrCat(\"@numeric1:[15 +inf] @numeric2:[-inf 20]\"),\n                           absl::StrCat(\"@numeric1:[-inf 20] @numeric2:[15 +inf]\"),\n                           absl::StrCat(\"@numeric1:[0 100000] @numeric2:[-100000 0]\"),\n                           absl::StrCat(\"@numeric1:[-100000 0] @numeric2:[0 100000]\")};\n\n  std::unordered_map<size_t, std::vector<size_t>> expected_results_per_num_docs = {\n      {10000, {2508, 2507, 0, 0}},\n      {100000, {25119, 25232, 0, 0}},\n      {1000000, {250623, 250643, 0, 0}},\n  };\n\n  while (state.KeepRunning()) {\n    for (size_t i = 0; i < ABSL_ARRAYSIZE(queries); ++i) {\n      const auto& query = queries[i];\n\n      CHECK(algo.Init(query, &params));\n      auto result = algo.Search(&indices);\n      CHECK(result.error.empty());\n\n      const size_t expected_result = expected_results_per_num_docs[num_docs][i];\n      CHECK_EQ(result.total, expected_result);\n      CHECK_EQ(result.ids.size(), expected_result);\n    }\n  }\n}\n\nBENCHMARK(BM_SearchTwoNumericIndexes)\n    ->Arg(10000)\n    ->Arg(100000)\n    ->Arg(1000000)\n    ->ArgNames({\"num_docs\"});\n\nstatic void BM_SearchNumericAndTagIndexes(benchmark::State& state) {\n  auto schema = MakeSimpleSchema({{\"tag\", SchemaField::TAG},\n                                  {\"numeric\", SchemaField::NUMERIC,\n                                   SchemaField::NumericParams{.block_size = kMaxRangeBlockSize}}});\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  SearchAlgorithm algo;\n  QueryParams params;\n  default_random_engine rnd;\n\n  using NumericType = long long;\n  uniform_int_distribution<NumericType> dist(std::numeric_limits<NumericType>::min(),\n                                             std::numeric_limits<NumericType>::max());\n\n  size_t tag_number = 0;\n  const size_t max_tag_number = 1000;\n\n  const size_t num_docs = state.range(0);\n  for (size_t i = 0; i < num_docs; i++) {\n    MockedDocument doc{\n        Map{{\"tag\", absl::StrCat(\"tag\", tag_number)}, {\"numeric\", std::to_string(dist(rnd))}}};\n    indices.Add(i, doc);\n\n    tag_number = (tag_number + 1) % max_tag_number;\n  }\n\n  std::string queries[] = {absl::StrCat(\"@tag:{tag230|tag3|tag942} @numeric:[15 +inf]\"),\n                           absl::StrCat(\"@tag:{tag1|tag829|tag236} @numeric:[-inf 20]\"),\n                           absl::StrCat(\"@tag:{tag0|tag999} @numeric:[-1000000 +inf]\")};\n\n  std::unordered_map<size_t, std::vector<size_t>> expected_results_per_num_docs = {\n      {10000, {19, 16, 8}},\n      {100000, {164, 157, 97}},\n      {1000000, {1528, 1518, 1017}},\n  };\n\n  while (state.KeepRunning()) {\n    for (size_t i = 0; i < ABSL_ARRAYSIZE(queries); ++i) {\n      const auto& query = queries[i];\n\n      CHECK(algo.Init(query, &params));\n      auto result = algo.Search(&indices);\n      CHECK(result.error.empty());\n\n      const size_t expected_result = expected_results_per_num_docs[num_docs][i];\n      CHECK_EQ(result.total, expected_result);\n      CHECK_EQ(result.ids.size(), expected_result);\n    }\n  }\n}\n\nBENCHMARK(BM_SearchNumericAndTagIndexes)\n    ->Arg(10000)\n    ->Arg(100000)\n    ->Arg(1000000)\n    ->ArgNames({\"num_docs\"});\n\nstatic void BM_SearchSeveralNumericAndTagIndexes(benchmark::State& state) {\n  auto schema = MakeSimpleSchema({{\"tag\", SchemaField::TAG},\n                                  {\"numeric1\", SchemaField::NUMERIC,\n                                   SchemaField::NumericParams{.block_size = kMaxRangeBlockSize}},\n                                  {\"numeric2\", SchemaField::NUMERIC,\n                                   SchemaField::NumericParams{.block_size = kMaxRangeBlockSize}},\n                                  {\"numeric3\", SchemaField::NUMERIC,\n                                   SchemaField::NumericParams{.block_size = kMaxRangeBlockSize}}});\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  SearchAlgorithm algo;\n  QueryParams params;\n  default_random_engine rnd;\n\n  using NumericType = uint16_t;\n  uniform_int_distribution<NumericType> dist(std::numeric_limits<NumericType>::min(),\n                                             std::numeric_limits<NumericType>::max());\n\n  const size_t num_docs = state.range(0);\n\n  size_t tag_number = 0;\n  const size_t max_tag_number = num_docs / 30;\n\n  for (size_t i = 0; i < num_docs; i++) {\n    MockedDocument doc{Map{{\"tag\", absl::StrCat(\"tag\", tag_number)},\n                           {\"numeric1\", std::to_string(dist(rnd))},\n                           {\"numeric2\", std::to_string(dist(rnd))},\n                           {\"numeric3\", std::to_string(dist(rnd))}}};\n    indices.Add(i, doc);\n\n    tag_number = (tag_number + 1) % max_tag_number;\n  }\n\n  std::string queries[] = {\n      absl::StrCat(\n          \"@tag:{tag230|tag3} @numeric1:[0 10000] @numeric2:[20000 30000] @numeric3:[-1000 +inf]\"),\n      absl::StrCat(\"@tag:{tag829|tag236} @numeric1:[-inf 10000] @numeric2:[40000 +inf] \"\n                   \"@numeric3:[10000 30000]\"),\n      absl::StrCat(\n          \"@tag:{tag0|tag999} @numeric1:[-inf +inf] @numeric2:[20 +inf] @numeric3:[1000 10000]\")};\n\n  std::unordered_map<size_t, std::vector<size_t>> expected_results_per_num_docs = {\n      {10000, {1, 0, 4}},\n      {100000, {1, 1, 10}},\n      {1000000, {0, 1, 9}},\n  };\n\n  while (state.KeepRunning()) {\n    for (size_t i = 0; i < ABSL_ARRAYSIZE(queries); ++i) {\n      const auto& query = queries[i];\n\n      CHECK(algo.Init(query, &params));\n      auto result = algo.Search(&indices);\n      CHECK(result.error.empty());\n\n      const size_t expected_result = expected_results_per_num_docs[num_docs][i];\n      CHECK_EQ(result.total, expected_result);\n      CHECK_EQ(result.ids.size(), expected_result);\n    }\n  }\n}\n\nBENCHMARK(BM_SearchSeveralNumericAndTagIndexes)\n    ->Arg(10000)\n    ->Arg(100000)\n    ->Arg(1000000)\n    ->ArgNames({\"num_docs\"});\n\nstatic void BM_SearchMergeEqualSets(benchmark::State& state) {\n  auto schema = MakeSimpleSchema({\n      {\"numeric1\", SchemaField::NUMERIC,\n       SchemaField::NumericParams{.block_size = kMaxRangeBlockSize}},\n      {\"numeric2\", SchemaField::NUMERIC,\n       SchemaField::NumericParams{.block_size = kMaxRangeBlockSize}},\n  });\n\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  SearchAlgorithm algo;\n  QueryParams params;\n  std::default_random_engine rnd;\n\n  using NumericType = long long;\n  uniform_int_distribution<NumericType> dist1(std::numeric_limits<NumericType>::min(),\n                                              std::numeric_limits<NumericType>::max());\n  uniform_int_distribution<NumericType> dist2(std::numeric_limits<NumericType>::min(),\n                                              std::numeric_limits<NumericType>::max());\n\n  const size_t num_docs = state.range(0);\n  for (size_t i = 0; i < num_docs; ++i) {\n    MockedDocument doc{Map{\n        {\"numeric1\", std::to_string(dist1(rnd))},\n        {\"numeric2\", std::to_string(dist2(rnd))},\n    }};\n    indices.Add(i, doc);\n  }\n\n  std::string query = absl::StrCat(\"@numeric1:[-inf +inf] @numeric2:[-inf +inf]\");\n\n  while (state.KeepRunning()) {\n    CHECK(algo.Init(query, &params));\n    auto result = algo.Search(&indices);\n    CHECK(result.error.empty());\n\n    // All documents should match both conditions, so total should equal num_docs\n    CHECK_EQ(result.total, num_docs);\n    CHECK_EQ(result.ids.size(), num_docs);\n  }\n}\n\nBENCHMARK(BM_SearchMergeEqualSets)\n    ->Arg(100)\n    ->Arg(1000)\n    ->Arg(10000)\n    ->Arg(100000)\n    ->Arg(1000000)\n    ->ArgNames({\"num_docs\"});\n\nstatic void BM_SearchRangeTreeSplits(benchmark::State& state) {\n  auto schema = MakeSimpleSchema({\n      {\"num\", SchemaField::NUMERIC, SchemaField::NumericParams{}},\n  });\n\n  FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};\n\n  const size_t batch_size = state.range(0);\n  std::default_random_engine rnd;\n\n  using NumericType = long long;\n  uniform_int_distribution<NumericType> dist(0, batch_size + 1);\n\n  size_t doc_index = 0;\n  while (state.KeepRunning()) {\n    for (size_t i = 0; i < batch_size; i++) {\n      MockedDocument doc{Map{{\"num\", std::to_string(dist(rnd))}}};\n      indices.Add(doc_index++, doc);\n    }\n  }\n}\n\nBENCHMARK(BM_SearchRangeTreeSplits)\n    ->Arg(100000)\n    ->Arg(1000000)\n    ->Arg(3000000)\n    ->ArgNames({\"batch_size\"});\n\n// Semantics test for cosine on zero vectors (independent of SimSIMD)\nTEST(CosineDistanceTest, ZeroVectors) {\n  const size_t dims = 128;\n  std::vector<float> zero(dims, 0.0f);\n  float d = VectorDistance(zero.data(), zero.data(), dims, VectorSimilarity::COSINE);\n  EXPECT_EQ(d, 0.0f);\n}\n\n// Unified vector distance benchmarks using VectorDistance function\nstatic void BM_VectorDistance(benchmark::State& state) {\n  // Ensure SimSIMD dynamic dispatch is initialized for the benchmark\n  InitSimSIMD();\n  size_t dims = state.range(0);\n  size_t num_pairs = state.range(1);\n  VectorSimilarity sim = static_cast<VectorSimilarity>(state.range(2));\n\n  std::vector<std::vector<float>> vectors_a, vectors_b;\n  vectors_a.reserve(num_pairs);\n  vectors_b.reserve(num_pairs);\n\n  for (size_t i = 0; i < num_pairs; ++i) {\n    vectors_a.push_back(GenerateRandomVector(dims, i));\n    vectors_b.push_back(GenerateRandomVector(dims, i + 1000));\n  }\n\n  size_t pair_idx = 0;\n  for (auto _ : state) {\n    float distance =\n        VectorDistance(vectors_a[pair_idx].data(), vectors_b[pair_idx].data(), dims, sim);\n    benchmark::DoNotOptimize(distance);\n    pair_idx = (pair_idx + 1) % num_pairs;\n  }\n\n  state.counters[\"dims\"] = dims;\n  state.counters[\"pairs\"] = num_pairs;\n\n  std::string sim_name = (sim == VectorSimilarity::L2)       ? \"L2\"\n                         : (sim == VectorSimilarity::COSINE) ? \"Cosine\"\n                                                             : \"IP\";\n  state.SetLabel(sim_name);\n}\n\n// Intensive benchmark with batch processing\nstatic void BM_VectorDistance_Intensive(benchmark::State& state) {\n  // Ensure SimSIMD dynamic dispatch is initialized for the benchmark\n  InitSimSIMD();\n  size_t dims = 512;  // Fixed medium size\n  size_t batch_size = 1000;\n  VectorSimilarity sim = static_cast<VectorSimilarity>(state.range(0));\n\n  std::vector<std::vector<float>> vectors_a, vectors_b;\n  vectors_a.reserve(batch_size);\n  vectors_b.reserve(batch_size);\n\n  for (size_t i = 0; i < batch_size; ++i) {\n    vectors_a.push_back(GenerateRandomVector(dims, i));\n    vectors_b.push_back(GenerateRandomVector(dims, i + 4000));\n  }\n\n  size_t total_ops = 0;\n  while (state.KeepRunning()) {\n    for (size_t i = 0; i < batch_size; ++i) {\n      float distance = VectorDistance(vectors_a[i].data(), vectors_b[i].data(), dims, sim);\n      benchmark::DoNotOptimize(distance);\n      ++total_ops;\n    }\n  }\n\n  state.counters[\"ops\"] = total_ops;\n  state.counters[\"ops_per_sec\"] = benchmark::Counter(total_ops, benchmark::Counter::kIsRate);\n\n  std::string sim_name = (sim == VectorSimilarity::L2)       ? \"L2\"\n                         : (sim == VectorSimilarity::COSINE) ? \"Cosine\"\n                                                             : \"IP\";\n  state.SetLabel(sim_name + \"_Intensive\");\n}\n\n// Benchmark declarations\nBENCHMARK(BM_VectorDistance)\n    // Small vectors - L2 Distance\n    ->Args({32, 100, static_cast<int>(VectorSimilarity::L2)})\n    ->Args({32, 1000, static_cast<int>(VectorSimilarity::L2)})\n    ->Args({32, 10000, static_cast<int>(VectorSimilarity::L2)})\n    // Medium vectors - L2 Distance\n    ->Args({128, 100, static_cast<int>(VectorSimilarity::L2)})\n    ->Args({128, 1000, static_cast<int>(VectorSimilarity::L2)})\n    ->Args({128, 10000, static_cast<int>(VectorSimilarity::L2)})\n    // Large vectors - L2 Distance\n    ->Args({512, 100, static_cast<int>(VectorSimilarity::L2)})\n    ->Args({512, 1000, static_cast<int>(VectorSimilarity::L2)})\n    ->Args({512, 5000, static_cast<int>(VectorSimilarity::L2)})\n    // Very large vectors - L2 Distance\n    ->Args({1536, 100, static_cast<int>(VectorSimilarity::L2)})\n    ->Args({1536, 1000, static_cast<int>(VectorSimilarity::L2)})\n\n    // Small vectors - Cosine Distance\n    ->Args({32, 100, static_cast<int>(VectorSimilarity::COSINE)})\n    ->Args({32, 1000, static_cast<int>(VectorSimilarity::COSINE)})\n    ->Args({32, 10000, static_cast<int>(VectorSimilarity::COSINE)})\n    // Medium vectors - Cosine Distance\n    ->Args({128, 100, static_cast<int>(VectorSimilarity::COSINE)})\n    ->Args({128, 1000, static_cast<int>(VectorSimilarity::COSINE)})\n    ->Args({128, 10000, static_cast<int>(VectorSimilarity::COSINE)})\n    // Large vectors - Cosine Distance\n    ->Args({512, 100, static_cast<int>(VectorSimilarity::COSINE)})\n    ->Args({512, 1000, static_cast<int>(VectorSimilarity::COSINE)})\n    ->Args({512, 5000, static_cast<int>(VectorSimilarity::COSINE)})\n    // Very large vectors - Cosine Distance\n    ->Args({1536, 100, static_cast<int>(VectorSimilarity::COSINE)})\n    ->Args({1536, 1000, static_cast<int>(VectorSimilarity::COSINE)})\n\n    // Small vectors - IP Distance\n    ->Args({32, 100, static_cast<int>(VectorSimilarity::IP)})\n    ->Args({32, 1000, static_cast<int>(VectorSimilarity::IP)})\n    ->Args({32, 10000, static_cast<int>(VectorSimilarity::IP)})\n    // Medium vectors - IP Distance\n    ->Args({128, 100, static_cast<int>(VectorSimilarity::IP)})\n    ->Args({128, 1000, static_cast<int>(VectorSimilarity::IP)})\n    ->Args({128, 10000, static_cast<int>(VectorSimilarity::IP)})\n    // Large vectors - IP Distance\n    ->Args({512, 100, static_cast<int>(VectorSimilarity::IP)})\n    ->Args({512, 1000, static_cast<int>(VectorSimilarity::IP)})\n    ->Args({512, 5000, static_cast<int>(VectorSimilarity::IP)})\n    // Very large vectors - IP Distance\n    ->Args({1536, 100, static_cast<int>(VectorSimilarity::IP)})\n    ->Args({1536, 1000, static_cast<int>(VectorSimilarity::IP)})\n    ->ArgNames({\"dims\", \"pairs\", \"similarity\"})\n    ->Unit(benchmark::kMicrosecond);\n\nBENCHMARK(BM_VectorDistance_Intensive)\n    ->Arg(static_cast<int>(VectorSimilarity::L2))\n    ->Arg(static_cast<int>(VectorSimilarity::COSINE))\n    ->Arg(static_cast<int>(VectorSimilarity::IP))\n    ->ArgNames({\"similarity_type\"})\n    ->Unit(benchmark::kMicrosecond);\n\n}  // namespace search\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/search/sort_indices.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/sort_indices.h\"\n\n#include <absl/strings/ascii.h>\n#include <absl/strings/str_split.h>\n#include <base/logging.h>\n\n#include <algorithm>\n#include <optional>\n#include <type_traits>\n#include <variant>\n\nnamespace dfly::search {\n\nusing namespace std;\n\nnamespace {\ntemplate <typename T>\nusing ScoreT = std::conditional_t<is_same_v<T, StatelessString>, std::string, T>;\n}  // namespace\n\ntemplate <typename T> bool SimpleValueSortIndex<T>::ParsedSortValue::HasValue() const {\n  return !std::holds_alternative<std::monostate>(value);\n}\n\ntemplate <typename T> bool SimpleValueSortIndex<T>::ParsedSortValue::IsNullValue() const {\n  return std::holds_alternative<std::nullopt_t>(value);\n}\n\ntemplate <typename T> SortableValue SimpleValueSortIndex<T>::Lookup(DocId doc) const {\n  DCHECK_LT(doc, occupied_.size());\n  if (!occupied_[doc])\n    return std::monostate{};\n\n  DCHECK_LT(doc, values_.size());\n  return ScoreT<T>{values_[doc]};\n}\n\ntemplate <typename T>\nstd::vector<SortableValue> SimpleValueSortIndex<T>::Sort(std::vector<DocId>* ids, size_t limit,\n                                                         bool desc) const {\n  auto cb = [this, desc](const auto& lhs, const auto& rhs) {\n    // null values are at the end\n    auto p1 = make_pair(!occupied_[lhs], cref(values_[lhs]));\n    auto p2 = make_pair(!occupied_[rhs], cref(values_[rhs]));\n    return desc ? (p1 > p2) : (p1 < p2);\n  };\n  std::partial_sort(ids->begin(), ids->begin() + std::min(ids->size(), limit), ids->end(), cb);\n\n  // Turn stateless string into std::string\n  vector<SortableValue> out(min(ids->size(), limit));\n  for (size_t i = 0; i < out.size(); i++)\n    out[i] = ScoreT<T>{values_[(*ids)[i]]};\n  return out;\n}\n\ntemplate <typename T>\nbool SimpleValueSortIndex<T>::Add(DocId id, const DocumentAccessor& doc, std::string_view field) {\n  auto field_value = Get(doc, field);\n  if (!field_value.HasValue()) {\n    return false;\n  }\n\n  if (id >= values_.size()) {\n    values_.resize(id + 1);\n    occupied_.resize(id + 1);\n  }\n\n  if (!field_value.IsNullValue()) {\n    values_[id] = std::move(std::get<T>(field_value.value));\n    occupied_[id] = true;\n  }\n  return true;\n}\n\ntemplate <typename T>\nvoid SimpleValueSortIndex<T>::Remove(DocId id, const DocumentAccessor& doc,\n                                     std::string_view field) {\n  DCHECK_LT(id, values_.size());\n  DCHECK_EQ(values_.size(), occupied_.size());\n  values_[id] = T{};\n  occupied_[id] = false;\n}\n\ntemplate <typename T>\nstd::vector<DocId> SimpleValueSortIndex<T>::GetAllDocsWithNonNullValues() const {\n  std::vector<DocId> result;\n  result.reserve(values_.size());\n\n  for (DocId id = 0; id < values_.size(); ++id) {\n    if (occupied_[id])\n      result.push_back(id);\n  }\n\n  return result;\n}\n\ntemplate struct SimpleValueSortIndex<double>;\ntemplate struct SimpleValueSortIndex<StatelessString>;\n\nSimpleValueSortIndex<double>::ParsedSortValue NumericSortIndex::Get(const DocumentAccessor& doc,\n                                                                    std::string_view field) {\n  auto numbers_list = doc.GetNumbers(field);\n  if (!numbers_list) {\n    return {};\n  }\n  if (numbers_list->empty()) {\n    return ParsedSortValue{std::nullopt};\n  }\n  return ParsedSortValue{numbers_list->front()};\n}\n\nSimpleValueSortIndex<StatelessString>::ParsedSortValue StringSortIndex::Get(\n    const DocumentAccessor& doc, std::string_view field) {\n  auto strings_list = doc.GetTags(field);\n  if (!strings_list) {\n    return {};\n  }\n  if (strings_list->empty()) {\n    return ParsedSortValue{std::nullopt};\n  }\n  return ParsedSortValue{StatelessString{strings_list->front()}};\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/sort_indices.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"core/search/base.h\"\n#include \"core/search/stateless_allocator.h\"\n\nnamespace dfly::search {\n\nusing StatelessString =\n    std::basic_string<char, std::char_traits<char>, StatelessSearchAllocator<char>>;\nstatic_assert(sizeof(StatelessString) == sizeof(std::string));\n\ntemplate <typename T> using StatelessVector = std::vector<T, StatelessSearchAllocator<T>>;\nstatic_assert(sizeof(StatelessVector<StatelessString>) == sizeof(std::vector<std::string>));\n\ntemplate <typename T> struct SimpleValueSortIndex : BaseSortIndex {\n protected:\n  struct ParsedSortValue {\n    bool HasValue() const;\n    bool IsNullValue() const;\n\n    // std::monostate - no value was found.\n    // std::nullopt - found value is null.\n    // T - found value.\n    std::variant<std::monostate, std::nullopt_t, T> value;\n  };\n\n public:\n  SortableValue Lookup(DocId doc) const override;\n  std::vector<SortableValue> Sort(std::vector<DocId>* ids, size_t limit, bool desc) const override;\n\n  bool Add(DocId id, const DocumentAccessor& doc, std::string_view field) override;\n  void Remove(DocId id, const DocumentAccessor& doc, std::string_view field) override;\n\n  // Override GetAllResults to return all documents with non-null values\n  std::vector<DocId> GetAllDocsWithNonNullValues() const override;\n\n protected:\n  virtual ParsedSortValue Get(const DocumentAccessor& doc, std::string_view field_value) = 0;\n\n private:\n  StatelessVector<T> values_;\n  StatelessVector<bool> occupied_;  // instead of optional<T> in values to avoid memory overhead\n};\n\nstruct NumericSortIndex : SimpleValueSortIndex<double> {\n  ParsedSortValue Get(const DocumentAccessor& doc, std::string_view field) override;\n};\n\n// TODO: Map tags to integers for fast sort\nstruct StringSortIndex : SimpleValueSortIndex<StatelessString> {\n  ParsedSortValue Get(const DocumentAccessor& doc, std::string_view field) override;\n};\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/stateless_allocator.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n\n#pragma once\n\n#include <cassert>\n\n#include \"base/pmr/memory_resource.h\"\n#include \"core/detail/stateless_allocator.h\"\n\nnamespace dfly {\n\nnamespace detail {\ninline thread_local PMR_NS::memory_resource* search_tl_mr = nullptr;\n}\n\ntemplate <typename T>\nclass StatelessSearchAllocator : public StatelessAllocatorBase<T, StatelessSearchAllocator<T>> {\n public:\n  StatelessSearchAllocator() noexcept {\n    assert(detail::search_tl_mr != nullptr);\n  }\n\n  template <typename U>\n  StatelessSearchAllocator(const StatelessSearchAllocator<U>&) noexcept {  // NOLINT\n  }\n\n  static PMR_NS::memory_resource* resource() {\n    return detail::search_tl_mr;\n  }\n};\n\ntemplate <typename T, typename U>\nbool operator==(const StatelessSearchAllocator<T>&, const StatelessSearchAllocator<U>&) noexcept {\n  return true;\n}\n\ntemplate <typename T, typename U>\nbool operator!=(const StatelessSearchAllocator<T>&, const StatelessSearchAllocator<U>&) noexcept {\n  return false;\n}\n\ninline void InitTLSearchMR(PMR_NS::memory_resource* mr) {\n  detail::search_tl_mr = mr;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/search/synonyms.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"synonyms.h\"\n\n#include <absl/strings/str_cat.h>\n#include <uni_algo/case.h>\n\nnamespace dfly::search {\n\nconst absl::flat_hash_map<std::string, Synonyms::Group>& Synonyms::GetGroups() const {\n  return groups_;\n}\n\nvoid Synonyms::UpdateGroup(const std::string_view& id, const std::vector<std::string_view>& terms) {\n  auto& group = groups_[id];\n\n  // Convert all terms to lowercase before adding them to the group\n  for (const std::string_view& term : terms) {\n    group.insert(una::cases::to_lowercase_utf8(term));\n  }\n}\n\nstd::optional<std::string> Synonyms::GetGroupToken(std::string term) const {\n  term = una::cases::to_lowercase_utf8(term);\n  for (const auto& [id, group] : groups_) {\n    if (group.count(term)) {\n      // Add space before group id to avoid matching the term itself\n      return absl::StrCat(\" \", id);\n    }\n  }\n\n  return std::nullopt;\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/synonyms.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n\nnamespace dfly::search {\n\n// Class that manages synonym groups for search indices.\n// Allows defining groups of related terms that should be considered equivalent during search.\n// All terms are converted to lowercase for normalization.\n//\n// When retrieving a group token via GetGroupToken, the group identifier is returned with a space\n// prefix. The space is intentionally added to avoid matching with the term itself during text\n// tokenization and to distinguish the group identifier from regular terms during search.\nclass Synonyms {\n public:\n  // Represents a group of synonymous terms\n  using Group = absl::flat_hash_set<std::string>;\n\n  // Get all synonym groups\n  const absl::flat_hash_map<std::string, Group>& GetGroups() const;\n\n  // Update or create a synonym group\n  void UpdateGroup(const std::string_view& id, const std::vector<std::string_view>& terms);\n\n  // Get the group ID for a term\n  std::optional<std::string> GetGroupToken(std::string term) const;\n\n private:\n  // Maps group ID to synonym group\n  absl::flat_hash_map<std::string, Group> groups_;\n};\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/tag_types.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\nnamespace dfly {\nnamespace search {\n\nenum class TagType { PREFIX, SUFFIX, INFIX, REGULAR };\n\n}  // namespace search\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/search/vector_utils.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/search/vector_utils.h\"\n\n#include <cmath>\n#include <memory>\n\n#include \"base/logging.h\"\n\nnamespace dfly::search {\n\nusing namespace std;\n\nnamespace {\n\n#ifdef WITH_SIMSIMD\n#include <simsimd/simsimd.h>\n#endif\n\n#if defined(__GNUC__) && !defined(__clang__)\n#define FAST_MATH __attribute__((optimize(\"fast-math\")))\n#else\n#define FAST_MATH\n#endif\n\nOwnedFtVector ConvertToFtVector(string_view value) {\n  // Value cannot be casted directly as it might be not aligned as a float (4 bytes).\n  // Misaligned memory access is UB.\n  size_t size = value.size() / sizeof(float);\n  auto out = make_unique<float[]>(size);\n  memcpy(out.get(), value.data(), size * sizeof(float));\n\n  return OwnedFtVector{std::move(out), size};\n}\n\n}  // namespace\n\n// Euclidean vector distance: sqrt( sum: (u[i] - v[i])^2  )\nFAST_MATH float L2Distance(const float* u, const float* v, size_t dims) {\n#ifdef WITH_SIMSIMD\n  simsimd_distance_t distance = 0;\n  simsimd_l2_f32(u, v, dims, &distance);\n  return static_cast<float>(distance);\n#else\n  float sum = 0;\n  for (size_t i = 0; i < dims; i++)\n    sum += (u[i] - v[i]) * (u[i] - v[i]);\n  return sqrt(sum);\n#endif\n}\n\n// Inner product distance: 1 - dot_product(u, v)\n// For normalized vectors, this is equivalent to cosine distance\nFAST_MATH float IPDistance(const float* u, const float* v, size_t dims) {\n#ifdef WITH_SIMSIMD\n  // Use SimSIMD dot product and convert to inner product distance: 1 - dot(u, v).\n  simsimd_distance_t dot = 0;\n  simsimd_dot_f32(u, v, dims, &dot);\n  return 1.0f - static_cast<float>(dot);\n#else\n  float sum_uv = 0;\n  for (size_t i = 0; i < dims; i++)\n    sum_uv += u[i] * v[i];\n  return 1.0f - sum_uv;\n#endif\n}\n\n// Cosine distance: 1 - (dot_product(u, v) / (||u|| * ||v||))\nFAST_MATH float CosineDistance(const float* u, const float* v, size_t dims) {\n#ifdef WITH_SIMSIMD\n  simsimd_distance_t distance = 0;\n  simsimd_cos_f32(u, v, dims, &distance);\n  return static_cast<float>(distance);\n#else\n  float sum_uv = 0, sum_uu = 0, sum_vv = 0;\n  for (size_t i = 0; i < dims; i++) {\n    sum_uv += u[i] * v[i];\n    sum_uu += u[i] * u[i];\n    sum_vv += v[i] * v[i];\n  }\n\n  if (float denom = sum_uu * sum_vv; denom != 0.0f)\n    return 1 - sum_uv / sqrt(denom);\n  return 0.0f;\n#endif\n}\n\nOwnedFtVector BytesToFtVector(string_view value) {\n  DCHECK_EQ(value.size() % sizeof(float), 0u) << value.size();\n  return ConvertToFtVector(value);\n}\n\nstd::optional<OwnedFtVector> BytesToFtVectorSafe(string_view value) {\n  if (value.size() % sizeof(float)) {\n    return std::nullopt;\n  }\n  return ConvertToFtVector(value);\n}\n\nfloat VectorDistance(const float* u, const float* v, size_t dims, VectorSimilarity sim) {\n  switch (sim) {\n    case VectorSimilarity::L2:\n      return L2Distance(u, v, dims);\n    case VectorSimilarity::IP:\n      return IPDistance(u, v, dims);\n    case VectorSimilarity::COSINE:\n      return CosineDistance(u, v, dims);\n  };\n  return 0.0f;\n}\n\nvoid InitSimSIMD() {\n#if defined(WITH_SIMSIMD)\n  (void)simsimd_capabilities();\n#endif\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/search/vector_utils.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"core/search/base.h\"\n\nnamespace dfly::search {\n\n// Initializes SimSIMD runtime if dynamic dispatch is enabled.\nvoid InitSimSIMD();\n\nOwnedFtVector BytesToFtVector(std::string_view value);\n\n// Returns std::nullopt if value can not be converted to the vector\n// TODO: Remove unsafe version\nstd::optional<OwnedFtVector> BytesToFtVectorSafe(std::string_view value);\n\nfloat L2Distance(const float* u, const float* v, size_t dims);\nfloat IPDistance(const float* u, const float* v, size_t dims);\nfloat CosineDistance(const float* u, const float* v, size_t dims);\nfloat VectorDistance(const float* u, const float* v, size_t dims, VectorSimilarity sim);\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/core/segment_allocator.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"core/segment_allocator.h\"\n\n#define MI_BUILD_RELEASE 1\n#include <mimalloc/types.h>\n\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nSegmentAllocator::SegmentAllocator(mi_heap_t* heap) : heap_(heap) {\n  // 256GB\n  constexpr size_t limit = 1ULL << 35;\n  static_assert((1ULL << (kSegmentIdBits + kSegmentShift)) == limit);\n  // mimalloc uses 32MiB segments and we might need change this code if it changes.\n  static_assert(kSegmentShift == MI_SEGMENT_SHIFT);\n  static_assert((~kSegmentAlignMask) == (MI_SEGMENT_MASK));\n}\n\nvoid SegmentAllocator::ValidateMapSize() {\n  if (address_table_.size() > (1u << kSegmentIdBits)) {\n    // This can happen if we restrict dragonfly to small number of threads on high-memory machine,\n    // for example.\n    LOG(WARNING) << \"address_table_ map is growing too large: \" << address_table_.size();\n  }\n}\n\nbool SegmentAllocator::CanAllocate() {\n  return address_table_.size() < (1u << kSegmentIdBits);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/segment_allocator.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <mimalloc.h>\n\n/***\n * This class is tightly coupled with mimalloc segment allocation logic and is designed to provide\n * a compact pointer representation (4bytes ptr) over 64bit address space that gives you\n * 32GB of allocations with option to extend it to 32*256GB if needed.\n *\n */\n\nnamespace dfly {\n\n/**\n * @brief Tightly coupled with mi_malloc 2.x implementation.\n *        Fetches 32MiB segment pointers from the allocated pointers.\n *        Provides own indexing of small pointers to real address space using the segment ptrs/\n */\n\nclass SegmentAllocator {\n  // (2 ^ 10) total segments\n  static constexpr uint32_t kSegmentIdBits = 10;\n  static constexpr uint32_t kSegmentIdMask = (1u << kSegmentIdBits) - 1;\n  // (2 ^ 25) total bytes per segment = 32MiB\n  static constexpr uint32_t kSegmentShift = 25;\n\n  // Segment range that we cover within a single segment.\n  static constexpr uint64_t kSegmentAlignMask = ~((1ULL << kSegmentShift) - 1);\n\n public:\n  using Ptr = uint32_t;\n\n  SegmentAllocator(mi_heap_t* heap);\n  bool CanAllocate();\n\n  uint8_t* Translate(Ptr p) const {\n    return address_table_[p & kSegmentIdMask] + Offset(p);\n  }\n\n  std::pair<Ptr, uint8_t*> Allocate(uint32_t size);\n\n  void Free(Ptr ptr) {\n    void* p = Translate(ptr);\n    used_ -= mi_usable_size(p);\n    mi_free(p);\n  }\n\n  mi_heap_t* heap() {\n    return heap_;\n  }\n\n  size_t used() const {\n    return used_;\n  }\n\n private:\n  static uint32_t Offset(Ptr p) {\n    return (p >> kSegmentIdBits) * 8;\n  }\n\n  void ValidateMapSize();\n\n  std::vector<uint8_t*> address_table_;\n  absl::flat_hash_map<uint64_t, uint16_t> rev_indx_;\n  mi_heap_t* heap_;\n  size_t used_ = 0;\n};\n\ninline auto SegmentAllocator::Allocate(uint32_t size) -> std::pair<Ptr, uint8_t*> {\n  void* ptr = mi_heap_malloc(heap_, size);\n  if (!ptr)\n    throw std::bad_alloc{};\n\n  uint64_t iptr = (uint64_t)ptr;\n  uint64_t seg_ptr = iptr & kSegmentAlignMask;\n\n  // could be speed up using last used seg_ptr.\n  auto [it, inserted] = rev_indx_.emplace(seg_ptr, address_table_.size());\n  if (inserted) {\n    ValidateMapSize();\n    address_table_.push_back((uint8_t*)seg_ptr);\n  }\n\n  uint32_t seg_offset = (iptr - seg_ptr) / 8;\n  Ptr res = (seg_offset << kSegmentIdBits) | it->second;\n  used_ += mi_good_size(size);\n\n  return std::make_pair(res, (uint8_t*)ptr);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/size_tracking_channel.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <atomic>\n\n#include \"util/fibers/simple_channel.h\"\n\nnamespace dfly {\n\n// SimpleQueue-like interface, but also keeps track over the size of Ts it owns.\n// It has a slightly less efficient TryPush() API as it forces construction of Ts even if they are\n// not pushed.\n// T must have a .size() method, which should return the heap-allocated size of T, excluding\n// anything included in sizeof(T). We could generalize this in the future.\ntemplate <typename T, typename Queue = folly::ProducerConsumerQueue<T>> class SizeTrackingChannel {\n public:\n  SizeTrackingChannel(size_t n, unsigned num_producers = 1) : queue_(n, num_producers) {\n  }\n\n  // Here and below, we must accept a T instead of building it from variadic args, as we need to\n  // know its size in case it is added.\n  size_t Push(T t) noexcept {\n    size_t tsize = t.size();\n    size_t res = size_.fetch_add(tsize, std::memory_order_relaxed);\n    queue_.Push(std::move(t));\n    return res + tsize;\n  }\n\n  bool TryPush(T t) noexcept {\n    const size_t size = t.size();\n    if (queue_.TryPush(std::move(t))) {\n      size_.fetch_add(size, std::memory_order_relaxed);\n      return true;\n    }\n\n    return false;\n  }\n\n  bool Pop(T& dest) {\n    if (queue_.Pop(dest)) {\n      size_.fetch_sub(dest.size(), std::memory_order_relaxed);\n      return true;\n    }\n\n    return false;\n  }\n\n  void StartClosing() {\n    queue_.StartClosing();\n  }\n\n  bool TryPop(T& dest) {\n    if (queue_.TryPop(dest)) {\n      size_.fetch_sub(dest.size(), std::memory_order_relaxed);\n      return true;\n    }\n\n    return false;\n  }\n\n  bool IsClosing() const {\n    return queue_.IsClosing();\n  }\n\n  size_t GetSize() const {\n    return queue_.Capacity() * sizeof(T) + size_.load(std::memory_order_relaxed);\n  }\n\n private:\n  util::fb2::SimpleChannel<T, Queue> queue_;\n  std::atomic<size_t> size_ = 0;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/small_string.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/small_string.h\"\n\n#include <mimalloc.h>\n#include <xxhash.h>\n\n#include <memory>\n\n#include \"base/logging.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"core/segment_allocator.h\"\n\nnamespace dfly {\nusing namespace std;\n\nnamespace {\n\nclass XXH3_Deleter {\n public:\n  void operator()(XXH3_state_t* ptr) const {\n    XXH3_freeState(ptr);\n  }\n};\n\nstruct TL {\n  unique_ptr<XXH3_state_t, XXH3_Deleter> xxh_state;\n  unique_ptr<SegmentAllocator> seg_alloc;\n};\n\nthread_local TL tl;\n\nconstexpr XXH64_hash_t kHashSeed = 24061983;  // same as in compact_object.cc\n\n}  // namespace\n\nvoid SmallString::InitThreadLocal(void* heap) {\n  SegmentAllocator* ns = new SegmentAllocator((mi_heap_t*)heap);\n\n  tl.seg_alloc.reset(ns);\n  tl.xxh_state.reset(XXH3_createState());\n  XXH3_64bits_reset_withSeed(tl.xxh_state.get(), kHashSeed);\n}\n\nbool SmallString::CanAllocate(size_t size) {\n  return size <= kMaxSize && tl.seg_alloc->CanAllocate();\n}\n\nsize_t SmallString::UsedThreadLocal() {\n  return tl.seg_alloc ? tl.seg_alloc->used() : 0;\n}\n\nstatic_assert(sizeof(SmallString) == 16);\n\nsize_t SmallString::Assign(std::string_view s) {\n  DCHECK_GT(s.size(), kPrefLen);\n  DCHECK(CanAllocate(s.size()));\n  uint8_t* realptr = nullptr;\n\n  // reallocate if we need a larger allocation or it becomes space-inefficient\n  size_t heap_len = s.size() - kPrefLen;\n  if (size_t available = MallocUsed(); available < heap_len || heap_len * 2 < available) {\n    Free();\n\n    auto [sp, rp] = tl.seg_alloc->Allocate(heap_len);\n    small_ptr_ = sp;\n    realptr = rp;\n  } else {\n    realptr = tl.seg_alloc->Translate(small_ptr_);\n  }\n\n  size_ = s.size();\n  memcpy(prefix_, s.data(), kPrefLen);\n  memcpy(realptr, s.data() + kPrefLen, heap_len);\n  return mi_malloc_usable_size(realptr);\n}\n\nvoid SmallString::Free() {\n  if (size_)\n    tl.seg_alloc->Free(small_ptr_);\n  size_ = 0;\n}\n\nuint16_t SmallString::MallocUsed() const {\n  if (size_)\n    return mi_malloc_usable_size(tl.seg_alloc->Translate(small_ptr_));\n  return 0;\n}\n\nbool SmallString::Equal(std::string_view o) const {\n  if (size_ != o.size())\n    return false;\n\n  if (size_ == 0)\n    return true;\n\n  if (memcmp(prefix_, o.data(), kPrefLen) != 0)\n    return false;\n\n  uint8_t* realp = tl.seg_alloc->Translate(small_ptr_);\n  return memcmp(realp, o.data() + kPrefLen, size_ - kPrefLen) == 0;\n}\n\nbool SmallString::Equal(const SmallString& os) const {\n  if (size_ != os.size_)\n    return false;\n\n  return Get() == os.Get();\n}\n\nuint64_t SmallString::HashCode() const {\n  array<string_view, 2> slice = Get();\n\n  XXH3_state_t* state = tl.xxh_state.get();\n  XXH3_64bits_reset_withSeed(state, kHashSeed);\n  XXH3_64bits_update(state, slice[0].data(), slice[0].size());\n  XXH3_64bits_update(state, slice[1].data(), slice[1].size());\n\n  return XXH3_64bits_digest(state);\n}\n\narray<string_view, 2> SmallString::Get() const {\n  DCHECK(size_);\n\n  array<string_view, 2> dest;\n  dest[0] = string_view{prefix_, kPrefLen};\n  uint8_t* ptr = tl.seg_alloc->Translate(small_ptr_);\n  dest[1] = string_view{reinterpret_cast<char*>(ptr), size_ - kPrefLen};\n  return dest;\n}\n\nvoid SmallString::Get(char* out) const {\n  auto strs = Get();\n  memcpy(out, strs[0].data(), strs[0].size());\n  memcpy(out + strs[0].size(), strs[1].data(), strs[1].size());\n}\n\nvoid SmallString::Get(std::string* dest) const {\n  dest->resize(size_);\n  Get(dest->data());\n}\n\nbool SmallString::DefragIfNeeded(PageUsage* page_usage) {\n  uint8_t* cur_real_ptr = tl.seg_alloc->Translate(small_ptr_);\n  if (!page_usage->IsPageForObjectUnderUtilized(tl.seg_alloc->heap(), cur_real_ptr))\n    return false;\n\n  if (!CanAllocate(size_ - kPrefLen))  // Forced\n    return false;\n\n  auto [sp, rp] = tl.seg_alloc->Allocate(size_ - kPrefLen);\n  memcpy(rp, cur_real_ptr, size_ - kPrefLen);\n  tl.seg_alloc->Free(small_ptr_);\n  small_ptr_ = sp;\n\n  return true;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/small_string.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <array>\n#include <cstdint>\n#include <string_view>\n\nnamespace dfly {\n\nclass PageUsage;\n\n// Efficient storage of strings longer than 10 bytes.\n// Requires explicit memory management\nclass SmallString {\n  static constexpr unsigned kPrefLen = 10;\n  static constexpr unsigned kMaxSize = (1 << 8) - 1;\n\n public:\n  static void InitThreadLocal(void* heap);\n  static size_t UsedThreadLocal();\n  static bool CanAllocate(size_t size);\n\n  // Returns malloc used.\n  size_t Assign(std::string_view s);\n  void Free();\n\n  bool Equal(std::string_view o) const;\n  bool Equal(const SmallString& mps) const;\n\n  uint64_t HashCode() const;\n  uint16_t MallocUsed() const;\n\n  std::array<std::string_view, 2> Get() const;\n  void Get(char* out) const;\n  void Get(std::string* dest) const;\n\n  bool DefragIfNeeded(PageUsage* page_usage);\n\n  size_t size() const {\n    return size_;\n  }\n\n  uint8_t first_byte() const {\n    return prefix_[0];\n  }\n\n private:\n  // The string is stored broken up into two parts, the first one - in this array\n  char prefix_[kPrefLen];\n\n  uint32_t small_ptr_;  // 32GB capacity because we ignore 3 lsb bits (i.e. x8).\n  uint16_t size_;       // uint16_t - total size (including prefix)\n\n} __attribute__((packed));\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/sorted_map.cc",
    "content": "// Copyright 2023, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/sorted_map.h\"\n\n#include <absl/strings/str_cat.h>\n\n#include <cmath>\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n#include \"redis/redis_aux.h\"\n#include \"redis/util.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#include <double-conversion/double-to-string.h>\n\n#include \"base/endian.h\"\n#include \"base/logging.h\"\n\nusing namespace std;\n\nnamespace dfly {\nnamespace detail {\n\nnamespace {\n\ndouble GetObjScore(const void* obj) {\n  sds s = (sds)obj;\n  char* ptr = s + sdslen(s) + 1;\n  return absl::bit_cast<double>(absl::little_endian::Load64(ptr));\n}\n\nvoid SetObjScore(void* obj, double score) {\n  sds s = (sds)obj;\n  char* ptr = s + sdslen(s) + 1;\n  absl::little_endian::Store64(ptr, absl::bit_cast<uint64_t>(score));\n}\n\n// buf must be at least 10 chars long.\nvoid* BuildScoredKey(double score, char buf[]) {\n  buf[0] = SDS_TYPE_5;  // length 0.\n  buf[1] = 0;\n  absl::little_endian::Store64(buf + 2, absl::bit_cast<uint64_t>(score));\n  void* key = buf + 1;\n\n  return key;\n}\n\n// Copied from t_zset.c\n/* Returns 1 if the double value can safely be represented in long long without\n * precision loss, in which case the corresponding long long is stored in the out variable. */\nstatic int double2ll(double d, long long* out) {\n#if (DBL_MANT_DIG >= 52) && (DBL_MANT_DIG <= 63) && (LLONG_MAX == 0x7fffffffffffffffLL)\n  /* Check if the float is in a safe range to be casted into a\n   * long long. We are assuming that long long is 64 bit here.\n   * Also we are assuming that there are no implementations around where\n   * double has precision < 52 bit.\n   *\n   * Under this assumptions we test if a double is inside a range\n   * where casting to long long is safe. Then using two castings we\n   * make sure the decimal part is zero. If all this is true we can use\n   * integer without precision loss.\n   *\n   * Note that numbers above 2^52 and below 2^63 use all the fraction bits as real part,\n   * and the exponent bits are positive, which means the \"decimal\" part must be 0.\n   * i.e. all double values in that range are representable as a long without precision loss,\n   * but not all long values in that range can be represented as a double.\n   * we only care about the first part here. */\n  if (d < (double)(-LLONG_MAX / 2) || d > (double)(LLONG_MAX / 2))\n    return 0;\n  long long ll = d;\n  if (ll == d) {\n    *out = ll;\n    return 1;\n  }\n#endif\n  return 0;\n}\n\n/* Compare element in sorted set with given element. */\nint zzlCompareElements(unsigned char* eptr, unsigned char* cstr, unsigned int clen) {\n  unsigned char* vstr;\n  unsigned int vlen;\n  long long vlong;\n  unsigned char vbuf[32];\n  int minlen, cmp;\n\n  vstr = lpGetValue(eptr, &vlen, &vlong);\n  if (vstr == NULL) {\n    /* Store string representation of long long in buf. */\n    vlen = ll2string((char*)vbuf, sizeof(vbuf), vlong);\n    vstr = vbuf;\n  }\n\n  minlen = (vlen < clen) ? vlen : clen;\n  cmp = memcmp(vstr, cstr, minlen);\n  if (cmp == 0)\n    return vlen - clen;\n  return cmp;\n}\n\nusing double_conversion::DoubleToStringConverter;\nconstexpr unsigned kConvFlags = DoubleToStringConverter::UNIQUE_ZERO;\n\nDoubleToStringConverter score_conv(kConvFlags, \"inf\", \"nan\", 'e', -6, 21, 6, 0);\n\n// Copied from redis code but uses double_conversion to encode double values.\nunsigned char* ZzlInsertAt(unsigned char* zl, unsigned char* eptr, std::string_view ele,\n                           double score) {\n  unsigned char* sptr;\n  char scorebuf[128];\n  unsigned scorelen = 0;\n  long long lscore;\n  int score_is_long = double2ll(score, &lscore);\n  if (!score_is_long) {\n    // Use double converter to get the shortest representation.\n    double_conversion::StringBuilder sb(scorebuf, sizeof(scorebuf));\n    score_conv.ToShortest(score, &sb);\n    scorelen = sb.position();\n    sb.Finalize();\n    DCHECK_EQ(scorelen, strlen(scorebuf));\n  }\n\n  // Argument parsing converts empty strings to default initialized string views.\n  // Such string views have a null data field, which if passed into lpAppend (via zzlInsertAt)\n  // results in the replace operation being applied on the listpack. In addition to being wrong, it\n  // also causes assertion failures. To circumvent this corner case we pass here a string view\n  // pointing to an empty string on the stack, which has a non-null data field.\n  if (ele.data() == nullptr) {\n    ele = \"\"sv;\n  }\n\n  if (eptr == NULL) {\n    zl = lpAppend(zl, (const unsigned char*)(ele.data()), ele.size());\n    if (score_is_long)\n      zl = lpAppendInteger(zl, lscore);\n    else\n      zl = lpAppend(zl, (unsigned char*)scorebuf, scorelen);\n  } else {\n    /* Insert member before the element 'eptr'. */\n    zl = lpInsertString(zl, (const unsigned char*)ele.data(), ele.size(), eptr, LP_BEFORE, &sptr);\n\n    /* Insert score after the member. */\n    if (score_is_long)\n      zl = lpInsertInteger(zl, lscore, sptr, LP_AFTER, NULL);\n    else\n      zl = lpInsertString(zl, (unsigned char*)scorebuf, scorelen, sptr, LP_AFTER, NULL);\n  }\n  return zl;\n}\n\ndouble ZzlStrtod(unsigned char* vstr, unsigned int vlen) {\n  char buf[128];\n  if (vlen > sizeof(buf))\n    vlen = sizeof(buf);\n  memcpy(buf, vstr, vlen);\n  buf[vlen] = '\\0';\n  return strtod(buf, NULL);\n}\n\n/* Return a listpack element as an SDS string. */\nsds LpGetObject(const uint8_t* sptr) {\n  unsigned char* vstr;\n  unsigned int vlen;\n  long long vlong;\n\n  serverAssert(sptr != NULL);\n  vstr = lpGetValue(const_cast<uint8_t*>(sptr), &vlen, &vlong);\n\n  if (vstr) {\n    return sdsnewlen((char*)vstr, vlen);\n  } else {\n    return sdsfromlonglong(vlong);\n  }\n}\n\n// static representation of sds strings\nchar kMinStrData[] =\n    \"\\110\"\n    \"minstring\";\nchar kMaxStrData[] =\n    \"\\110\"\n    \"maxstring\";\n\n}  // namespace\n\ndouble ZzlGetScore(const uint8_t* sptr) {\n  unsigned char* vstr;\n  unsigned int vlen;\n  long long vlong;\n  double score;\n\n  DCHECK(sptr != NULL);\n  vstr = lpGetValue(const_cast<uint8_t*>(sptr), &vlen, &vlong);\n\n  if (vstr) {\n    score = ZzlStrtod(vstr, vlen);\n  } else {\n    score = vlong;\n  }\n\n  return score;\n}\n\n/* Move to the previous entry based on the values in eptr and sptr. Both are\n * set to NULL when there is no prev entry. */\nvoid ZzlPrev(const uint8_t* zl, uint8_t** eptr, uint8_t** sptr) {\n  unsigned char *_eptr, *_sptr;\n  serverAssert(*eptr != NULL && *sptr != NULL);\n\n  _sptr = lpPrev(const_cast<uint8_t*>(zl), *eptr);\n  if (_sptr != NULL) {\n    _eptr = lpPrev(const_cast<uint8_t*>(zl), _sptr);\n    DCHECK(_eptr != NULL);\n  } else {\n    /* No previous entry. */\n    _eptr = NULL;\n  }\n\n  *eptr = _eptr;\n  *sptr = _sptr;\n}\n\n/* Move to next entry based on the values in eptr and sptr. Both are set to\n * NULL when there is no next entry. */\nvoid ZzlNext(const uint8_t* zl, uint8_t** eptr, uint8_t** sptr) {\n  unsigned char *_eptr, *_sptr;\n  DCHECK(*eptr != NULL && *sptr != NULL);\n\n  _eptr = lpNext(const_cast<uint8_t*>(zl), *sptr);\n  if (_eptr != NULL) {\n    _sptr = lpNext(const_cast<uint8_t*>(zl), _eptr);\n    DCHECK(_sptr != NULL);\n  } else {\n    /* No next entry. */\n    _sptr = NULL;\n  }\n\n  *eptr = _eptr;\n  *sptr = _sptr;\n}\n\n/* Free a lex range structure, must be called only after zslParseLexRange()\n * populated the structure with success (C_OK returned). */\nvoid ZslFreeLexRange(const zlexrangespec* spec) {\n  if (spec->min != cminstring && spec->min != cmaxstring)\n    sdsfree(spec->min);\n  if (spec->max != cminstring && spec->max != cmaxstring)\n    sdsfree(spec->max);\n}\n\n/* This is just a wrapper to sdscmp() that is able to\n * handle shared.minstring and shared.maxstring as the equivalent of\n * -inf and +inf for strings */\nint sdscmplex(sds a, sds b) {\n  if (a == b)\n    return 0;\n  if (a == cminstring || b == cmaxstring)\n    return -1;\n  if (a == cmaxstring || b == cminstring)\n    return 1;\n  return sdscmp(a, b);\n}\n\nint zslLexValueGteMin(sds value, const zlexrangespec* spec) {\n  return spec->minex ? (sdscmplex(value, spec->min) > 0) : (sdscmplex(value, spec->min) >= 0);\n}\n\nint zslLexValueLteMax(sds value, const zlexrangespec* spec) {\n  return spec->maxex ? (sdscmplex(value, spec->max) < 0) : (sdscmplex(value, spec->max) <= 0);\n}\n\nint ZzlLexValueGteMin(unsigned char* p, const zlexrangespec* spec) {\n  sds value = LpGetObject(p);\n  int res = zslLexValueGteMin(value, spec);\n  sdsfree(value);\n  return res;\n}\n\nint ZzlLexValueLteMax(unsigned char* p, const zlexrangespec* spec) {\n  sds value = LpGetObject(p);\n  int res = zslLexValueLteMax(value, spec);\n  sdsfree(value);\n  return res;\n}\n\n/* Returns if there is a part of the zset is in range. Should only be used\n * internally by zzlFirstInRange and zzlLastInRange. */\nint zzlIsInRange(unsigned char* zl, const zrangespec* range) {\n  unsigned char* p;\n  double score;\n\n  /* Test for ranges that will always be empty. */\n  if (range->min > range->max || (range->min == range->max && (range->minex || range->maxex)))\n    return 0;\n\n  p = lpSeek(zl, -1); /* Last score. */\n  if (p == NULL)\n    return 0; /* Empty sorted set */\n  score = ZzlGetScore(p);\n  if (!ZslValueGteMin(score, range))\n    return 0;\n\n  p = lpSeek(zl, 1); /* First score. */\n  serverAssert(p != NULL);\n  score = ZzlGetScore(p);\n  if (!ZslValueLteMax(score, range))\n    return 0;\n\n  return 1;\n}\n\n/* Find pointer to the first element contained in the specified range.\n * Returns NULL when no element is contained in the range. */\nunsigned char* ZzlFirstInRange(unsigned char* zl, const zrangespec* range) {\n  unsigned char *eptr = lpSeek(zl, 0), *sptr;\n  double score;\n\n  /* If everything is out of range, return early. */\n  if (!zzlIsInRange(zl, range))\n    return NULL;\n\n  while (eptr != NULL) {\n    sptr = lpNext(zl, eptr);\n    serverAssert(sptr != NULL);\n\n    score = ZzlGetScore(sptr);\n    if (ZslValueGteMin(score, range)) {\n      /* Check if score <= max. */\n      if (ZslValueLteMax(score, range))\n        return eptr;\n      return NULL;\n    }\n\n    /* Move to next element. */\n    eptr = lpNext(zl, sptr);\n  }\n\n  return NULL;\n}\n\n/* Find pointer to the last element contained in the specified range.\n * Returns NULL when no element is contained in the range. */\nunsigned char* ZzlLastInRange(unsigned char* zl, const zrangespec* range) {\n  unsigned char *eptr = lpSeek(zl, -2), *sptr;\n  double score;\n\n  /* If everything is out of range, return early. */\n  if (!zzlIsInRange(zl, range))\n    return NULL;\n\n  while (eptr != NULL) {\n    sptr = lpNext(zl, eptr);\n    serverAssert(sptr != NULL);\n\n    score = ZzlGetScore(sptr);\n    if (ZslValueLteMax(score, range)) {\n      /* Check if score >= min. */\n      if (ZslValueGteMin(score, range))\n        return eptr;\n      return NULL;\n    }\n\n    /* Move to previous element by moving to the score of previous element.\n     * When this returns NULL, we know there also is no element. */\n    sptr = lpPrev(zl, eptr);\n    if (sptr != NULL)\n      serverAssert((eptr = lpPrev(zl, sptr)) != NULL);\n    else\n      eptr = NULL;\n  }\n\n  return NULL;\n}\n\n/* Returns if there is a part of the zset is in range. Should only be used\n * internally by zzlFirstInRange and zzlLastInRange. */\nint ZzlIsInLexRange(unsigned char* zl, const zlexrangespec* range) {\n  unsigned char* p;\n\n  /* Test for ranges that will always be empty. */\n  int cmp = sdscmplex(range->min, range->max);\n  if (cmp > 0 || (cmp == 0 && (range->minex || range->maxex)))\n    return 0;\n\n  p = lpSeek(zl, -2); /* Last element. */\n  if (p == NULL)\n    return 0;\n  if (!ZzlLexValueGteMin(p, range))\n    return 0;\n\n  p = lpSeek(zl, 0); /* First element. */\n  serverAssert(p != NULL);\n  if (!ZzlLexValueLteMax(p, range))\n    return 0;\n\n  return 1;\n}\n\n/* Find pointer to the first element contained in the specified lex range.\n * Returns NULL when no element is contained in the range. */\nunsigned char* ZzlFirstInLexRange(unsigned char* zl, const zlexrangespec* range) {\n  unsigned char *eptr = lpSeek(zl, 0), *sptr;\n\n  /* If everything is out of range, return early. */\n  if (!ZzlIsInLexRange(zl, range))\n    return NULL;\n\n  while (eptr != NULL) {\n    if (ZzlLexValueGteMin(eptr, range)) {\n      /* Check if score <= max. */\n      if (ZzlLexValueLteMax(eptr, range))\n        return eptr;\n      return NULL;\n    }\n\n    /* Move to next element. */\n    sptr = lpNext(zl, eptr); /* This element score. Skip it. */\n    serverAssert(sptr != NULL);\n    eptr = lpNext(zl, sptr); /* Next element. */\n  }\n\n  return NULL;\n}\n\n/* Find pointer to the last element contained in the specified lex range.\n * Returns NULL when no element is contained in the range. */\nunsigned char* ZzlLastInLexRange(unsigned char* zl, const zlexrangespec* range) {\n  unsigned char *eptr = lpSeek(zl, -2), *sptr;\n\n  /* If everything is out of range, return early. */\n  if (!ZzlIsInLexRange(zl, range))\n    return NULL;\n\n  while (eptr != NULL) {\n    if (ZzlLexValueLteMax(eptr, range)) {\n      /* Check if score >= min. */\n      if (ZzlLexValueGteMin(eptr, range))\n        return eptr;\n      return NULL;\n    }\n\n    /* Move to previous element by moving to the score of previous element.\n     * When this returns NULL, we know there also is no element. */\n    sptr = lpPrev(zl, eptr);\n    if (sptr != NULL)\n      serverAssert((eptr = lpPrev(zl, sptr)) != NULL);\n    else\n      eptr = NULL;\n  }\n\n  return NULL;\n}\n\nunsigned char* ZzlDeleteRangeByLex(unsigned char* zl, const zlexrangespec* range,\n                                   unsigned long* deleted) {\n  unsigned char *eptr, *sptr;\n  unsigned long num = 0;\n\n  if (deleted != NULL)\n    *deleted = 0;\n\n  eptr = ZzlFirstInLexRange(zl, range);\n  if (eptr == NULL)\n    return zl;\n\n  /* When the tail of the listpack is deleted, eptr will be NULL. */\n  while (eptr && (sptr = lpNext(zl, eptr)) != NULL) {\n    if (ZzlLexValueLteMax(eptr, range)) {\n      /* Delete both the element and the score. */\n      zl = lpDeleteRangeWithEntry(zl, &eptr, 2);\n      num++;\n    } else {\n      /* No longer in range. */\n      break;\n    }\n  }\n\n  if (deleted != NULL)\n    *deleted = num;\n  return zl;\n}\n\nunsigned char* ZzlDeleteRangeByScore(unsigned char* zl, const zrangespec* range,\n                                     unsigned long* deleted) {\n  unsigned char *eptr, *sptr;\n  double score;\n  unsigned long num = 0;\n\n  if (deleted != NULL)\n    *deleted = 0;\n\n  eptr = ZzlFirstInRange(zl, range);\n  if (eptr == NULL)\n    return zl;\n\n  /* When the tail of the listpack is deleted, eptr will be NULL. */\n  while (eptr && (sptr = lpNext(zl, eptr)) != NULL) {\n    score = ZzlGetScore(sptr);\n    if (ZslValueLteMax(score, range)) {\n      /* Delete both the element and the score. */\n      zl = lpDeleteRangeWithEntry(zl, &eptr, 2);\n      num++;\n    } else {\n      /* No longer in range. */\n      break;\n    }\n  }\n\n  if (deleted != NULL)\n    *deleted = num;\n  return zl;\n}\n\n/* Insert (element,score) pair in listpack. This function assumes the element is\n * not yet present in the list. */\nunsigned char* ZzlInsert(unsigned char* zl, std::string_view ele, double score) {\n  unsigned char *eptr = NULL, *sptr = lpSeek(zl, -1);\n  double s;\n\n  // Optimization: check first whether the new element should be the last.\n  if (sptr != NULL) {\n    s = ZzlGetScore(sptr);\n    if (s >= score) {\n      // It should not be the last, so fallback to the forward iteration.\n      eptr = lpSeek(zl, 0);\n    }\n  }\n\n  while (eptr != NULL) {\n    sptr = lpNext(zl, eptr);\n    s = ZzlGetScore(sptr);\n\n    if (s > score) {\n      /* First element with score larger than score for element to be\n       * inserted. This means we should take its spot in the list to\n       * maintain ordering. */\n      return ZzlInsertAt(zl, eptr, ele, score);\n    } else if (s == score) {\n      /* Ensure lexicographical ordering for elements. */\n      if (zzlCompareElements(eptr, (unsigned char*)ele.data(), ele.size()) > 0) {\n        return ZzlInsertAt(zl, eptr, ele, score);\n      }\n    }\n\n    /* Move to next element. */\n    eptr = lpNext(zl, sptr);\n  }\n\n  /* Push on tail of list when it was not yet inserted. */\n  return ZzlInsertAt(zl, NULL, ele, score);\n}\n\nunsigned char* ZzlFind(unsigned char* lp, std::string_view ele, double* score) {\n  uint8_t *sptr, *eptr = lpFirst(lp);\n\n  if (eptr == nullptr)\n    return nullptr;\n  eptr = lpFind(lp, eptr, (unsigned char*)ele.data(), ele.size(), 1);\n  if (eptr) {\n    sptr = lpNext(lp, eptr);\n    serverAssert(sptr != NULL);\n\n    /* Matching element, pull out score. */\n    if (score != nullptr)\n      *score = ZzlGetScore(sptr);\n    return eptr;\n  }\n\n  return nullptr;\n}\n\nSortedMap::SortedMap()\n    : score_map(new ScoreMap), score_tree(new ScoreTree(StatelessAllocator<char>::resource())) {\n}\n\nSortedMap::~SortedMap() {\n  delete score_tree;\n  delete score_map;\n}\n\n// Three way comparison of q and key.\n// Compares scores first and then the keys, unless q.ignore_score is set.\n// In that case only keys are compared.\n// In order to support close/open intervals, we introduce a special flag for +inf strings.\n// So, in case of score equality (or if scores are ignored), q.str_is_infinite means q > key,\n// and 1 is returned.\nint SortedMap::ScoreSdsPolicy::KeyCompareTo::operator()(Query q, ScoreSds key) const {\n  sds sdsa = (sds)q.item;\n\n  if (!q.ignore_score) {\n    double sa = GetObjScore(sdsa);\n    double sb = GetObjScore(key);\n\n    if (sa < sb)\n      return -1;\n    if (sa > sb)\n      return 1;\n  }\n\n  // if q.str_is_infinite is set, it means q > key at this point.\n  if (q.str_is_infinite)\n    return 1;\n\n  return sdscmp(sdsa, (sds)key);\n}\n\nint SortedMap::AddElem(double score, std::string_view ele, int in_flags, int* out_flags,\n                       double* newscore) {\n  // does not take ownership over ele.\n  DCHECK(!isnan(score));\n\n  ScoreSds obj = nullptr;\n  bool added = false;\n\n  if (in_flags & ZADD_IN_XX) {\n    obj = score_map->FindObj(ele);\n    if (obj == nullptr) {\n      *out_flags = ZADD_OUT_NOP;\n      return 1;\n    }\n  } else {\n    tie(obj, added) = score_map->AddOrSkip(ele, score);\n  }\n\n  if (added) {\n    // Adding a new element.\n    DCHECK_EQ(in_flags & ZADD_IN_XX, 0);\n\n    *out_flags = ZADD_OUT_ADDED;\n    *newscore = score;\n    bool added = score_tree->Insert(obj);\n    DCHECK(added);\n\n    return 1;\n  }\n\n  // Updating an existing element.\n  if ((in_flags & ZADD_IN_NX)) {\n    // Updating an existing element.\n    *out_flags = ZADD_OUT_NOP;\n    return 1;\n  }\n\n  if (in_flags & ZADD_IN_INCR) {\n    score += GetObjScore(obj);\n    if (isnan(score)) {\n      *out_flags = ZADD_OUT_NAN;\n      return 0;\n    }\n  }\n\n  // Update the score.\n  CHECK(score_tree->Delete(obj));\n  SetObjScore(obj, score);\n  CHECK(score_tree->Insert(obj));\n  *out_flags = ZADD_OUT_UPDATED;\n  *newscore = score;\n  return 1;\n}\n\noptional<double> SortedMap::GetScore(std::string_view ele) const {\n  ScoreSds obj = score_map->FindObj(ele);\n  if (obj != nullptr) {\n    return GetObjScore(obj);\n  }\n\n  return std::nullopt;\n}\n\nbool SortedMap::InsertNew(double score, std::string_view member) {\n  DVLOG(2) << \"InsertNew \" << score << \" \" << member;\n\n  auto [newk, added] = score_map->AddOrSkip(member, score);\n  if (!added)\n    return false;\n\n  added = score_tree->Insert(newk);\n  CHECK(added);\n  return true;\n}\n\noptional<unsigned> SortedMap::GetRank(std::string_view ele, bool reverse) const {\n  ScoreSds obj = score_map->FindObj(ele);\n  if (obj == nullptr)\n    return std::nullopt;\n\n  optional rank = score_tree->GetRank(obj, reverse);\n  DCHECK(rank);\n  return *rank;\n}\n\nSortedMap::ScoredArray SortedMap::GetRange(const zrangespec& range, unsigned offset, unsigned limit,\n                                           bool reverse) const {\n  ScoredArray arr;\n  if (score_tree->Size() <= offset || limit == 0)\n    return arr;\n\n  char buf[16];\n  if (reverse) {\n    ScoreSds key = BuildScoredKey(range.max, buf);\n    auto path = score_tree->LEQ(Query{key, false, !range.maxex});\n    if (path.Empty())\n      return arr;\n\n    if (range.maxex && range.max == GetObjScore(path.Terminal())) {\n      ++offset;\n    }\n    DCHECK_LE(GetObjScore(path.Terminal()), range.max);\n\n    while (offset--) {\n      if (!path.Prev())\n        return arr;\n    }\n\n    while (limit--) {\n      ScoreSds ele = path.Terminal();\n\n      double score = GetObjScore(ele);\n      if (range.min > score || (range.min == score && range.minex))\n        break;\n      arr.emplace_back(string{(sds)ele, sdslen((sds)ele)}, score);\n      if (!path.Prev())\n        break;\n    }\n  } else {\n    ScoreSds key = BuildScoredKey(range.min, buf);\n    auto path = score_tree->GEQ(Query{key, false, range.minex});\n    if (path.Empty())\n      return arr;\n\n    while (offset--) {\n      if (!path.Next())\n        return arr;\n    }\n\n    auto path2 = path;\n    size_t num_elems = 0;\n\n    // Count the number of elements in the range.\n    while (limit--) {\n      ScoreSds ele = path.Terminal();\n\n      double score = GetObjScore(ele);\n      if (range.max < score || (range.max == score && range.maxex))\n        break;\n      ++num_elems;\n      if (!path.Next())\n        break;\n    }\n\n    // reserve enough space.\n    arr.resize(num_elems);\n    for (size_t i = 0; i < num_elems; ++i) {\n      ScoreSds ele = path2.Terminal();\n      arr[i] = {string{(sds)ele, sdslen((sds)ele)}, GetObjScore(ele)};\n      path2.Next();\n    }\n  }\n\n  return arr;\n}\n\nSortedMap::ScoredArray SortedMap::GetLexRange(const zlexrangespec& range, unsigned offset,\n                                              unsigned limit, bool reverse) const {\n  if (score_tree->Size() <= offset || limit == 0)\n    return {};\n\n  detail::BPTreePath<ScoreSds> path;\n  ScoredArray arr;\n\n  if (reverse) {\n    if (range.max != cmaxstring) {\n      path = score_tree->LEQ(Query{range.max, true});\n      if (path.Empty())\n        return {};\n\n      if (range.maxex && sdscmp((sds)path.Terminal(), range.max) == 0) {\n        ++offset;\n      }\n      while (offset--) {\n        if (!path.Prev())\n          return {};\n      }\n    } else {\n      path = score_tree->FromRank(score_tree->Size() - offset - 1);\n    }\n\n    while (limit--) {\n      ScoreSds ele = path.Terminal();\n\n      if (range.min != cminstring) {\n        int cmp = sdscmp((sds)ele, range.min);\n        if (cmp < 0 || (cmp == 0 && range.minex))\n          break;\n      }\n      arr.emplace_back(string{(sds)ele, sdslen((sds)ele)}, GetObjScore(ele));\n      if (!path.Prev())\n        break;\n    }\n  } else {\n    if (range.min != cminstring) {\n      path = score_tree->GEQ(Query{range.min, true});\n      if (path.Empty())\n        return {};\n\n      if (range.minex && sdscmp((sds)path.Terminal(), range.min) == 0) {\n        ++offset;\n      }\n      while (offset--) {\n        if (!path.Next())\n          return {};\n      }\n    } else {\n      path = score_tree->FromRank(offset);\n    }\n\n    while (limit--) {\n      ScoreSds ele = path.Terminal();\n\n      if (range.max != cmaxstring) {\n        int cmp = sdscmp((sds)ele, range.max);\n        if (cmp > 0 || (cmp == 0 && range.maxex))\n          break;\n      }\n      arr.emplace_back(string{(sds)ele, sdslen((sds)ele)}, GetObjScore(ele));\n      if (!path.Next())\n        break;\n    }\n  }\n  return arr;\n}\n\nuint8_t* SortedMap::ToListPack() const {\n  uint8_t* lp = lpNew(0);\n\n  score_tree->Iterate(0, UINT32_MAX, [&](ScoreSds ele) {\n    const std::string_view v{(sds)ele, sdslen((sds)ele)};\n    lp = ZzlInsertAt(lp, NULL, v, GetObjScore(ele));\n    return true;\n  });\n\n  return lp;\n}\n\nbool SortedMap::Delete(std::string_view ele) const {\n  ScoreSds obj = score_map->FindObj(ele);\n  if (obj == nullptr)\n    return false;\n\n  CHECK(score_tree->Delete(obj));\n  CHECK(score_map->Erase(ele));\n  return true;\n}\n\nsize_t SortedMap::MallocSize() const {\n  // TODO: add malloc used to BPTree.\n  return score_map->SetMallocUsed() + score_map->ObjMallocUsed() + score_tree->NodeCount() * 256;\n}\n\nbool SortedMap::Reserve(size_t sz) {\n  score_map->Reserve(sz);\n  return true;\n}\n\nsize_t SortedMap::DeleteRangeByRank(unsigned start, unsigned end) {\n  DCHECK_LE(start, end);\n  DCHECK_LT(end, score_tree->Size());\n\n  for (uint32_t i = start; i <= end; ++i) {\n    /* Ideally, we would want to advance path to the next item and delete the previous one.\n     * However, we can not do that because the path is invalidated after the\n     * deletion. So we have to recreate the path for each item using the same rank.\n     * Note, it is probably could be improved, but it's much more complicated.\n     */\n\n    auto path = score_tree->FromRank(start);\n    sds ele = (sds)path.Terminal();\n    score_tree->Delete(path);\n    score_map->Erase(ele);\n  }\n\n  return end - start + 1;\n}\n\nsize_t SortedMap::DeleteRangeByScore(const zrangespec& range) {\n  char buf[16] = {0};\n  size_t deleted = 0;\n\n  while (!score_tree->Empty()) {\n    ScoreSds min_key = BuildScoredKey(range.min, buf);\n    auto path = score_tree->GEQ(Query{min_key, false, range.minex});\n    if (path.Empty())\n      break;\n\n    ScoreSds item = path.Terminal();\n    double score = GetObjScore(item);\n\n    if (range.minex) {\n      DCHECK_GT(score, range.min);\n    } else {\n      DCHECK_GE(score, range.min);\n    }\n    if (score > range.max || (range.maxex && score == range.max))\n      break;\n\n    score_tree->Delete(item);\n    ++deleted;\n    score_map->Erase((sds)item);\n  }\n\n  return deleted;\n}\n\nsize_t SortedMap::DeleteRangeByLex(const zlexrangespec& range) {\n  if (score_tree->Size() == 0)\n    return 0;\n\n  size_t deleted = 0;\n\n  uint32_t rank = 0;\n  if (range.min != cminstring) {\n    auto path = score_tree->GEQ(Query{range.min, true});\n    if (path.Empty())\n      return {};\n\n    rank = path.Rank();\n    if (range.minex && sdscmp((sds)path.Terminal(), range.min) == 0) {\n      ++rank;\n    }\n  }\n\n  while (rank < score_tree->Size()) {\n    auto path = score_tree->FromRank(rank);\n    ScoreSds item = path.Terminal();\n    if (range.max != cmaxstring) {\n      int cmp = sdscmp((sds)item, range.max);\n      if (cmp > 0 || (cmp == 0 && range.maxex))\n        break;\n    }\n    ++deleted;\n    score_tree->Delete(path);\n    score_map->Erase((sds)item);\n  }\n\n  return deleted;\n}\n\nSortedMap::ScoredArray SortedMap::PopTopScores(unsigned count, bool reverse) {\n  DCHECK_GT(count, 0u);\n  DCHECK_EQ(score_map->UpperBoundSize(), score_tree->Size());\n  size_t sz = score_map->UpperBoundSize();\n\n  ScoredArray res;\n\n  DCHECK_GT(sz, 0u);  // Empty sets are not allowed.\n\n  if (sz == 0 || count == 0)\n    return res;\n\n  if (count > sz)\n    count = sz;\n\n  res.reserve(count);\n\n  auto cb = [&](ScoreSds obj) {\n    res.emplace_back(string{(sds)obj, sdslen((sds)obj)}, GetObjScore(obj));\n\n    // We can not delete from score_tree because we are in the middle of the iteration.\n    CHECK(score_map->Erase((sds)obj));\n    return true;  // continue with the iteration.\n  };\n\n  unsigned rank = 0;\n  unsigned step = 0;\n  if (reverse) {\n    score_tree->IterateReverse(0, count - 1, std::move(cb));\n    rank = score_tree->Size() - 1;\n    step = 1;\n  } else {\n    score_tree->Iterate(0, count - 1, std::move(cb));\n  }\n\n  // We already deleted elements from score_map, so what's left is to delete from the tree.\n  if (score_map->Empty()) {\n    // Corner case optimization.\n    score_tree->Clear();\n  } else {\n    for (unsigned i = 0; i < res.size(); ++i) {\n      auto path = score_tree->FromRank(rank);\n      score_tree->Delete(path);\n      rank -= step;\n    }\n  }\n\n  return res;\n}\n\nsize_t SortedMap::Count(const zrangespec& range) const {\n  DCHECK_LE(range.min, range.max);\n\n  if (score_tree->Size() == 0)\n    return 0;\n\n  // build min key.\n  char buf[16];\n\n  ScoreSds range_key = BuildScoredKey(range.min, buf);\n  auto path = score_tree->GEQ(Query{range_key, false, range.minex});\n  if (path.Empty())\n    return 0;\n\n  ScoreSds bound = path.Terminal();\n\n  if (range.minex) {\n    DCHECK_GT(GetObjScore(bound), range.min);\n  } else {\n    DCHECK_GE(GetObjScore(bound), range.min);\n  }\n\n  uint32_t min_rank = path.Rank();\n\n  // Now build the max key.\n  // If we need to exclude the maximum score, set the key'sstring part to empty string,\n  // otherwise set it to infinity.\n  range_key = BuildScoredKey(range.max, buf);\n  path = score_tree->GEQ(Query{range_key, false, !range.maxex});\n  if (path.Empty()) {\n    return score_tree->Size() - min_rank;\n  }\n\n  bound = path.Terminal();\n  uint32_t max_rank = path.Rank();\n  if (range.maxex || GetObjScore(bound) > range.max) {\n    if (max_rank <= min_rank)\n      return 0;\n    --max_rank;\n  }\n\n  // max_rank could be less than min_rank, for example, if the range is [a, a).\n  return max_rank < min_rank ? 0 : max_rank - min_rank + 1;\n}\n\nsize_t SortedMap::LexCount(const zlexrangespec& range) const {\n  if (score_tree->Size() == 0)\n    return 0;\n\n  // Ranges that will always be zero - (+inf, anything) or (anything, -inf)\n  if (range.min == cmaxstring || range.max == cminstring) {\n    return 0;\n  }\n\n  uint32_t min_rank = 0;\n  detail::BPTreePath<ScoreSds> path;\n\n  if (range.min != cminstring) {\n    path = score_tree->GEQ(Query{range.min, true});\n    if (path.Empty())\n      return 0;\n\n    min_rank = path.Rank();\n    if (range.minex && sdscmp((sds)path.Terminal(), range.min) == 0) {\n      ++min_rank;\n      if (min_rank >= score_tree->Size())\n        return 0;\n    }\n  }\n\n  uint32_t max_rank = score_tree->Size() - 1;\n  if (range.max != cmaxstring) {\n    path = score_tree->GEQ(Query{range.max, true});\n    if (!path.Empty()) {\n      max_rank = path.Rank();\n\n      // fix the max rank, if needed.\n      int cmp = sdscmp((sds)path.Terminal(), range.max);\n      DCHECK_GE(cmp, 0);\n      if (cmp > 0 || range.maxex) {\n        if (max_rank <= min_rank)\n          return 0;\n        --max_rank;\n      }\n    }\n  }\n\n  return max_rank < min_rank ? 0 : max_rank - min_rank + 1;\n}\n\nbool SortedMap::Iterate(unsigned start_rank, unsigned len, bool reverse,\n                        std::function<bool(sds, double)> cb) const {\n  DCHECK_GT(len, 0u);\n  unsigned end_rank = start_rank + len - 1;\n  bool success;\n  if (reverse) {\n    success = score_tree->IterateReverse(\n        start_rank, end_rank, [&](ScoreSds obj) { return cb((sds)obj, GetObjScore(obj)); });\n  } else {\n    success = score_tree->Iterate(start_rank, end_rank,\n                                  [&](ScoreSds obj) { return cb((sds)obj, GetObjScore(obj)); });\n  }\n\n  return success;\n}\n\nuint64_t SortedMap::Scan(uint64_t cursor,\n                         absl::FunctionRef<void(std::string_view, double)> cb) const {\n  auto scan_cb = [&cb](const void* obj) {\n    sds ele = (sds)obj;\n    cb(string_view{ele, sdslen(ele)}, GetObjScore(obj));\n  };\n\n  return this->score_map->Scan(cursor, std::move(scan_cb));\n}\n\n// taken from zsetConvert\nSortedMap* SortedMap::FromListPack(PMR_NS::memory_resource* res, const uint8_t* lp) {\n  uint8_t* zl = (uint8_t*)lp;\n  unsigned char *eptr, *sptr;\n  unsigned char* vstr;\n  unsigned int vlen;\n  long long vlong;\n\n  void* ptr = res->allocate(sizeof(SortedMap), alignof(SortedMap));\n  SortedMap* zs = new (ptr) SortedMap;\n\n  eptr = lpSeek(zl, 0);\n  if (eptr != NULL) {\n    sptr = lpNext(zl, eptr);\n    CHECK(sptr != NULL);\n  }\n\n  while (eptr != NULL) {\n    double score = ZzlGetScore(sptr);\n    vstr = lpGetValue(eptr, &vlen, &vlong);\n    if (vstr == NULL) {\n      CHECK(zs->InsertNew(score, absl::StrCat(vlong)));\n    } else {\n      CHECK(zs->InsertNew(score, string_view{reinterpret_cast<const char*>(vstr), vlen}));\n    }\n\n    ZzlNext(zl, &eptr, &sptr);\n  }\n\n  return zs;\n}\n\nbool SortedMap::DefragIfNeeded(PageUsage* page_usage) {\n  auto cb = [this](sds old_obj, sds new_obj) { score_tree->ForceUpdate(old_obj, new_obj); };\n  bool reallocated = false;\n\n  for (auto it = score_map->begin(); it != score_map->end(); ++it) {\n    reallocated |= it.ReallocIfNeeded(page_usage, cb);\n  }\n\n  return reallocated;\n}\n\nstd::optional<SortedMap::RankAndScore> SortedMap::GetRankAndScore(std::string_view ele,\n                                                                  bool reverse) const {\n  ScoreSds obj = score_map->FindObj(ele);\n  if (obj == nullptr)\n    return std::nullopt;\n\n  optional rank = score_tree->GetRank(obj, reverse);\n  DCHECK(rank);\n\n  return SortedMap::RankAndScore{*rank, GetObjScore(obj)};\n}\n}  // namespace detail\n\nsds cminstring = detail::kMinStrData + 1;\nsds cmaxstring = detail::kMaxStrData + 1;\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/sorted_map.h",
    "content": "// Copyright 2023, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/functional/function_ref.h>\n\n#include <functional>\n#include <memory>\n#include <optional>\n#include <string>\n#include <variant>\n#include <vector>\n\n#include \"core/bptree_set.h\"\n#include \"core/score_map.h\"\n\nextern \"C\" {\n\n/* Struct to hold an inclusive/exclusive range spec by score comparison. */\ntypedef struct {\n  double min, max;\n  int minex, maxex; /* are min or max exclusive? */\n} zrangespec;\n\n/* Struct to hold an inclusive/exclusive range spec by lexicographic comparison. */\ntypedef struct {\n  sds min, max;     /* May be set to shared.(minstring|maxstring) */\n  int minex, maxex; /* are min or max exclusive? */\n} zlexrangespec;\n\n}  // extern \"C\"\n\n/* Input flags. */\n#define ZADD_IN_NONE 0\n#define ZADD_IN_INCR (1 << 0) /* Increment the score instead of setting it. */\n#define ZADD_IN_NX (1 << 1)   /* Don't touch elements already existing. */\n#define ZADD_IN_XX (1 << 2)   /* Only touch elements already existing. */\n#define ZADD_IN_GT (1 << 3)   /* Only update existing when new scores are higher. */\n#define ZADD_IN_LT (1 << 4)   /* Only update existing when new scores are lower. */\n\n/* Output flags. */\n#define ZADD_OUT_NOP (1 << 0)     /* Operation not performed because of conditionals.*/\n#define ZADD_OUT_NAN (1 << 1)     /* Only touch elements already existing. */\n#define ZADD_OUT_ADDED (1 << 2)   /* The element was new and was added. */\n#define ZADD_OUT_UPDATED (1 << 3) /* The element already existed, score updated. */\n\nnamespace dfly {\n\nclass PageUsage;\n\n// Copied from zset.h\nextern sds cmaxstring;\nextern sds cminstring;\n\nnamespace detail {\n\n/**\n * @brief SortedMap is a sorted map implementation based on zset.h. It holds unique strings that\n * are ordered by score and lexicographically. The score is a double value and has higher priority.\n * The map is implemented as a skip list and a hash table. For more details see\n * zset.h and t_zset.c files in Redis.\n */\nclass SortedMap {\n public:\n  using ScoredMember = std::pair<std::string, double>;\n  using ScoredArray = std::vector<ScoredMember>;\n  using ScoreSds = void*;\n  using RankAndScore = std::pair<unsigned, double>;\n\n  SortedMap();\n  ~SortedMap();\n\n  SortedMap(const SortedMap&) = delete;\n  SortedMap& operator=(const SortedMap&) = delete;\n\n  bool Reserve(size_t sz);\n  int AddElem(double score, std::string_view ele, int in_flags, int* out_flags, double* newscore);\n\n  // Inserts a new element. Returns false if the element already exists.\n  // No score update is performed in this case.\n  bool InsertNew(double score, std::string_view member);\n\n  bool Delete(std::string_view ele) const;\n\n  // Upper bound size of the set.\n  // Note: Currently we do not allow member expiry in sorted sets, therefore it's exact\n  // But if we decide to add expire, this method will provide an approximation from above.\n  size_t Size() const {\n    return score_map->UpperBoundSize();\n  }\n\n  size_t MallocSize() const;\n\n  size_t DeleteRangeByRank(unsigned start, unsigned end);\n  size_t DeleteRangeByScore(const zrangespec& range);\n  size_t DeleteRangeByLex(const zlexrangespec& range);\n\n  ScoredArray PopTopScores(unsigned count, bool reverse);\n\n  std::optional<double> GetScore(std::string_view ele) const;\n  std::optional<unsigned> GetRank(std::string_view ele, bool reverse) const;\n  std::optional<RankAndScore> GetRankAndScore(std::string_view ele, bool reverse) const;\n  ScoredArray GetRange(const zrangespec& r, unsigned offs, unsigned len, bool rev) const;\n  ScoredArray GetLexRange(const zlexrangespec& r, unsigned o, unsigned l, bool rev) const;\n\n  size_t Count(const zrangespec& range) const;\n  size_t LexCount(const zlexrangespec& range) const;\n\n  // Runs cb for each element in the range [start_rank, start_rank + len).\n  // Stops iteration if cb returns false. Returns false in this case.\n  bool Iterate(unsigned start_rank, unsigned len, bool reverse,\n               std::function<bool(sds, double)> cb) const;\n\n  uint64_t Scan(uint64_t cursor, absl::FunctionRef<void(std::string_view, double)> cb) const;\n\n  uint8_t* ToListPack() const;\n  static SortedMap* FromListPack(PMR_NS::memory_resource* res, const uint8_t* lp);\n\n  bool DefragIfNeeded(PageUsage* page_usage);\n\n private:\n  struct Query {\n    ScoreSds item;\n    bool ignore_score;\n    bool str_is_infinite;\n\n    Query(ScoreSds key, bool ign_score = false, int is_inf = 0)\n        : item(key), ignore_score(ign_score), str_is_infinite(is_inf != 0) {\n    }\n  };\n\n  struct ScoreSdsPolicy {\n    using KeyT = ScoreSds;\n\n    struct KeyCompareTo {\n      int operator()(Query q, ScoreSds key) const;\n    };\n  };\n\n  using ScoreTree = BPTree<ScoreSds, ScoreSdsPolicy>;\n\n  // hash map from fields to scores.\n  ScoreMap* score_map = nullptr;\n\n  // sorted tree of (score,field) items.\n  ScoreTree* score_tree = nullptr;\n};\n\n// Used by CompactObject.\nunsigned char* ZzlInsert(unsigned char* zl, std::string_view ele, double score);\nunsigned char* ZzlFind(unsigned char* lp, std::string_view ele, double* score);\n\n// Used by SortedMap and ZsetFamily.\ndouble ZzlGetScore(const uint8_t* sptr);\nvoid ZzlNext(const uint8_t* zl, uint8_t** eptr, uint8_t** sptr);\nvoid ZzlPrev(const uint8_t* zl, uint8_t** eptr, uint8_t** sptr);\nvoid ZslFreeLexRange(const zlexrangespec* spec);\nuint8_t* ZzlLastInRange(uint8_t* zl, const zrangespec* range);\nuint8_t* ZzlFirstInRange(uint8_t* zl, const zrangespec* range);\n\nuint8_t* ZzlFirstInLexRange(uint8_t* zl, const zlexrangespec* range);\nuint8_t* ZzlLastInLexRange(uint8_t* zl, const zlexrangespec* range);\n\nint ZzlLexValueGteMin(uint8_t* p, const zlexrangespec* spec);\nint ZzlLexValueLteMax(uint8_t* p, const zlexrangespec* spec);\n\nuint8_t* ZzlDeleteRangeByLex(uint8_t* zl, const zlexrangespec* range, unsigned long* deleted);\nuint8_t* ZzlDeleteRangeByScore(uint8_t* zl, const zrangespec* range, unsigned long* deleted);\n\ninline int ZslValueGteMin(double value, const zrangespec* spec) {\n  return spec->minex ? (value > spec->min) : (value >= spec->min);\n}\n\ninline int ZslValueLteMax(double value, const zrangespec* spec) {\n  return spec->maxex ? (value < spec->max) : (value <= spec->max);\n}\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/sorted_map_test.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/sorted_map.h\"\n\n#include <absl/strings/str_cat.h>\n#include <gmock/gmock.h>\n#include <mimalloc.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/mi_memory_resource.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nusing namespace std;\nusing absl::StrCat;\nusing testing::ElementsAre;\nusing testing::Pair;\nusing testing::StrEq;\n\nnamespace dfly {\nusing detail::SortedMap;\n\nclass SortedMapTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    // configure redis lib zmalloc which requires mimalloc heap to work.\n    auto* tlh = mi_heap_get_backing();\n    init_zmalloc_threadlocal(tlh);\n    InitTLStatelessAllocMR(PMR_NS::get_default_resource());\n  }\n\n  SortedMap sm_;\n};\n\nTEST_F(SortedMapTest, Add) {\n  int out_flags;\n  double new_score;\n\n  int res = sm_.AddElem(1.0, \"a\", 0, &out_flags, &new_score);\n  EXPECT_EQ(1, res);\n  EXPECT_EQ(ZADD_OUT_ADDED, out_flags);\n  EXPECT_EQ(1, new_score);\n\n  res = sm_.AddElem(2.0, \"a\", ZADD_IN_NX, &out_flags, &new_score);\n  EXPECT_EQ(1, res);\n  EXPECT_EQ(ZADD_OUT_NOP, out_flags);\n\n  res = sm_.AddElem(2.0, \"a\", ZADD_IN_INCR, &out_flags, &new_score);\n  EXPECT_EQ(1, res);\n  EXPECT_EQ(ZADD_OUT_UPDATED, out_flags);\n  EXPECT_EQ(3, new_score);\n  sds ele = sdsnew(\"a\");\n  EXPECT_EQ(3, sm_.GetScore(ele));\n  sdsfree(ele);\n}\n\nTEST_F(SortedMapTest, Scan) {\n  for (unsigned i = 0; i < 972; ++i) {\n    sm_.InsertNew(i, StrCat(i));\n  }\n  uint64_t cursor = 0;\n\n  unsigned cnt = 0;\n  do {\n    cursor = sm_.Scan(cursor, [&](string_view str, double score) { ++cnt; });\n  } while (cursor != 0);\n  EXPECT_EQ(972, cnt);\n}\n\nTEST_F(SortedMapTest, InsertPop) {\n  for (unsigned i = 0; i < 256; ++i) {\n    ASSERT_TRUE(sm_.InsertNew(1000, StrCat(\"a\", i)));\n  }\n\n  vector<sds> vec;\n  bool res = sm_.Iterate(1, 2, false, [&](sds ele, double score) {\n    vec.push_back(ele);\n    return true;\n  });\n  EXPECT_TRUE(res);\n  EXPECT_THAT(vec, ElementsAre(StrEq(\"a1\"), StrEq(\"a10\")));\n\n  sds s = sdsnew(\"a1\");\n  EXPECT_EQ(1, sm_.GetRank(s, false));\n  EXPECT_EQ(254, sm_.GetRank(s, true));\n  sdsfree(s);\n\n  auto top_scores = sm_.PopTopScores(3, false);\n  EXPECT_THAT(top_scores, ElementsAre(Pair(StrEq(\"a0\"), 1000), Pair(StrEq(\"a1\"), 1000),\n                                      Pair(StrEq(\"a10\"), 1000)));\n  top_scores = sm_.PopTopScores(3, true);\n  EXPECT_THAT(top_scores, ElementsAre(Pair(StrEq(\"a99\"), 1000), Pair(StrEq(\"a98\"), 1000),\n                                      Pair(StrEq(\"a97\"), 1000)));\n}\n\nTEST_F(SortedMapTest, LexRanges) {\n  for (unsigned i = 0; i < 100; ++i) {\n    ASSERT_TRUE(sm_.InsertNew(1, StrCat(\"a\", i)));\n  }\n\n  zlexrangespec range;\n  range.max = sdsnew(\"a96\");\n  range.min = sdsnew(\"a93\");\n  range.maxex = 0;\n  range.minex = 0;\n  EXPECT_EQ(4, sm_.LexCount(range));\n  auto array = sm_.GetLexRange(range, 1, 1000, false);\n  ASSERT_EQ(3, array.size());\n  EXPECT_THAT(array.front(), Pair(\"a94\", 1));\n\n  range.maxex = 1;\n  EXPECT_EQ(3, sm_.LexCount(range));\n  array = sm_.GetLexRange(range, 1, 1000, true);\n  ASSERT_EQ(2, array.size());\n  EXPECT_THAT(array.front(), Pair(\"a94\", 1));\n\n  range.minex = 1;\n  EXPECT_EQ(2, sm_.LexCount(range));\n  array = sm_.GetLexRange(range, 1, 1000, false);\n  ASSERT_EQ(1, array.size());\n  EXPECT_THAT(array.front(), Pair(\"a95\", 1));\n  sdsfree(range.min);\n\n  range.min = range.max;\n  EXPECT_EQ(0, sm_.LexCount(range));\n  range.minex = 0;\n  EXPECT_EQ(0, sm_.LexCount(range));\n  sdsfree(range.max);\n\n  range.maxex = 0;\n  range.min = cminstring;\n  range.max = sdsnew(\"a\");\n  EXPECT_EQ(0, sm_.LexCount(range));\n  sdsfree(range.max);\n\n  range.max = sdsnew(\"a0\");\n  EXPECT_EQ(1, sm_.LexCount(range));\n  range.maxex = 1;\n  EXPECT_EQ(0, sm_.LexCount(range));\n  sdsfree(range.max);\n}\n\nTEST_F(SortedMapTest, ScoreRanges) {\n  for (unsigned i = 0; i < 10; ++i) {\n    ASSERT_TRUE(sm_.InsertNew(1, StrCat(\"a\", i)));\n  }\n\n  for (unsigned i = 0; i < 10; ++i) {\n    ASSERT_TRUE(sm_.InsertNew(2, StrCat(\"b\", i)));\n  }\n\n  zrangespec range;\n  range.max = 5;\n  range.min = 1;\n  range.maxex = 0;\n  range.minex = 0;\n  EXPECT_EQ(20, sm_.Count(range));\n  detail::SortedMap::ScoredArray array = sm_.GetRange(range, 0, 1000, false);\n  ASSERT_EQ(20, array.size());\n  EXPECT_THAT(array.front(), Pair(\"a0\", 1));\n  EXPECT_THAT(array.back(), Pair(\"b9\", 2));\n\n  range.minex = 1;  // exclude all the \"1\" scores.\n  EXPECT_EQ(10, sm_.Count(range));\n  array = sm_.GetRange(range, 2, 1, false);\n  ASSERT_EQ(1, array.size());\n  EXPECT_THAT(array.front(), Pair(\"b2\", 2));\n\n  range.max = 1;\n  range.minex = 0;\n  range.min = -HUGE_VAL;\n  EXPECT_EQ(10, sm_.Count(range));\n  array = sm_.GetRange(range, 2, 2, true);\n  ASSERT_EQ(2, array.size());\n  EXPECT_THAT(array.back(), Pair(\"a6\", 1));\n\n  range.maxex = 1;\n  EXPECT_EQ(0, sm_.Count(range));\n  array = sm_.GetRange(range, 0, 2, true);\n  ASSERT_EQ(0, array.size());\n\n  range.min = 3;\n  array = sm_.GetRange(range, 0, 2, true);\n  ASSERT_EQ(0, array.size());\n}\n\nTEST_F(SortedMapTest, DeleteRange) {\n  for (unsigned i = 0; i <= 100; ++i) {\n    ASSERT_TRUE(sm_.InsertNew(i * 2, StrCat(\"a\", i)));\n  }\n\n  zrangespec range;\n  range.min = range.max = 200;\n  range.minex = range.maxex = 1;\n  EXPECT_EQ(0, sm_.DeleteRangeByScore(range));\n\n  range.min = 199;\n  EXPECT_EQ(0, sm_.DeleteRangeByScore(range));\n\n  range.minex = 0;\n  EXPECT_EQ(0, sm_.DeleteRangeByScore(range));\n\n  range.max = 199;\n  range.min = 198;\n  EXPECT_EQ(1, sm_.DeleteRangeByScore(range));\n\n  range.max = 197;\n  range.min = 193;\n  EXPECT_EQ(2, sm_.DeleteRangeByScore(range));\n\n  EXPECT_EQ(2, sm_.DeleteRangeByRank(0, 1));\n\n  zlexrangespec lex_range;\n  lex_range.min = sdsnew(\"b\");\n  lex_range.max = sdsnew(\"c\");\n  EXPECT_EQ(0, sm_.DeleteRangeByLex(lex_range));\n\n  sdsfree(lex_range.min);\n  sdsfree(lex_range.max);\n  lex_range.min = cminstring;\n  lex_range.max = cmaxstring;\n  EXPECT_EQ(96, sm_.DeleteRangeByLex(lex_range));\n}\n\nTEST_F(SortedMapTest, RangeBug) {\n  constexpr size_t kArrLen = 80;\n  for (unsigned i = 0; i < kArrLen; i++) {\n    ASSERT_TRUE(sm_.InsertNew(i, StrCat(\"score\", i)));\n  }\n\n  for (unsigned i = 0; i < kArrLen; i++) {\n    zrangespec range;\n    range.max = HUGE_VAL;\n    range.min = i;\n    range.minex = 0;\n    range.maxex = 0;\n    auto arr = sm_.GetRange(range, 0, 5, false);\n    ASSERT_GT(arr.size(), 0) << i;\n  }\n}\n\nuint64_t total_wasted_memory = 0;\n\nTEST_F(SortedMapTest, ReallocIfNeeded) {\n  auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); };\n\n  auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,\n                        size_t block_size, void* arg) {\n    size_t used = block_size * area->used;\n    total_wasted_memory += area->committed - used;\n    return true;\n  };\n\n  for (size_t i = 0; i < 10'000; i++) {\n    int out_flags;\n    double new_val;\n    auto str = build_str(i);\n    sm_.AddElem(i, str, 0, &out_flags, &new_val);\n  }\n\n  for (size_t i = 0; i < 10'000; i++) {\n    if (i % 10 == 0)\n      continue;\n    auto str = build_str(i);\n    sds ele = sdsnew(str.c_str());\n    sm_.Delete(ele);\n    sdsfree(ele);\n  }\n\n  mi_heap_collect(mi_heap_get_backing(), true);\n  mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);\n  size_t wasted_before = total_wasted_memory;\n\n  PageUsage page_usage{CollectPageStats::NO, 9};\n  ASSERT_TRUE(sm_.DefragIfNeeded(&page_usage));\n\n  total_wasted_memory = 0;\n  mi_heap_collect(mi_heap_get_backing(), true);\n  mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);\n  size_t wasted_after = total_wasted_memory;\n\n  // Check we waste significanlty less now\n  EXPECT_GT(wasted_before, wasted_after * 2);\n\n  ASSERT_EQ(sm_.Size(), 1000);\n  auto cb = [i = 0, build_str](sds ele, double score) mutable -> bool {\n    EXPECT_EQ(std::string_view(ele), build_str(i * 10));\n    EXPECT_EQ((size_t)score, i * 10);\n    ++i;\n    return true;\n  };\n\n  sm_.Iterate(0, 10000, false, cb);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/sse_port.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n#if defined(__aarch64__)\n#define SSE2NEON_SUPPRESS_WARNINGS\n#include \"base/sse2neon.h\"\n#elif defined(__riscv) || defined(__riscv__)\n#include \"base/sse2rvv.h\"\n#elif defined(__s390x__)\n#include <vecintrin.h>\n#else\n#include <emmintrin.h>\n#include <tmmintrin.h>\n#endif\n\nnamespace dfly {\n\n#ifndef __s390x__\ninline __m128i mm_loadu_si128(const __m128i* ptr) {\n#if defined(__aarch64__)\n  __m128i res;\n  memcpy(&res, ptr, sizeof(res));\n  return res;\n// return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p));\n#else\n  return _mm_loadu_si128(ptr);\n#endif\n}\n#endif\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/string_map.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/string_map.h\"\n\n#include \"base/endian.h\"\n#include \"base/logging.h\"\n#include \"core/compact_object.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"core/sds_utils.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nusing namespace std;\n\nnamespace dfly {\n\nnamespace {\n\nconstexpr uint64_t kValTtlBit = 1ULL << 63;\nconstexpr uint64_t kValMask = ~kValTtlBit;\n\n// Returns key, tagged value pair\npair<sds, uint64_t> CreateEntry(string_view field, string_view value, uint32_t time_now,\n                                uint32_t ttl_sec) {\n  // 8 additional bytes for a pointer to value.\n  sds newkey;\n  size_t meta_offset = field.size() + 1;\n  sds sdsval = sdsnewlen(value.data(), value.size());\n  uint64_t sdsval_tag = uint64_t(sdsval);\n\n  if (ttl_sec == UINT32_MAX) {\n    // The layout is:\n    // key, '\\0', 8-byte pointer to value\n    newkey = AllocSdsWithSpace(field.size(), 8);\n  } else {\n    // The layout is:\n    // key, '\\0', 8-byte pointer to value, 4-byte absolute time.\n    // the value pointer it tagged.\n    newkey = AllocSdsWithSpace(field.size(), 8 + 4);\n    uint32_t at = time_now + ttl_sec;\n    absl::little_endian::Store32(newkey + meta_offset + 8, at);  // skip the value pointer.\n    sdsval_tag |= kValTtlBit;\n  }\n\n  if (!field.empty()) {\n    memcpy(newkey, field.data(), field.size());\n  }\n\n  absl::little_endian::Store64(newkey + meta_offset, sdsval_tag);\n  return {newkey, sdsval_tag};\n}\n\nbool HasTtl(sds entry) {\n  const uint64_t tag = absl::little_endian::Load64(entry + sdslen(entry) + 1);\n  return (tag & kValTtlBit) != 0;\n}\n\n}  // namespace\n\nStringMap::~StringMap() {\n  Clear();\n}\n\nbool StringMap::AddOrUpdate(std::string_view field, std::string_view value, uint32_t ttl_sec,\n                            bool keepttl) {\n  sds prev = AddOrExchange(field, value, ttl_sec, keepttl);\n  if (prev) {\n    ObjDelete(prev, false);\n    return false;\n  }\n  return true;\n}\n\nsds StringMap::AddOrExchange(std::string_view field, std::string_view value, uint32_t ttl_sec,\n                             bool keepttl) {\n  const uint32_t computed_ttl = ComputeTtl(field, ttl_sec, keepttl);\n  auto [newkey, sdsval_tag] = CreateEntry(field, value, time_now(), computed_ttl);\n  auto prev_entry = static_cast<sds>(AddOrReplaceObj(newkey, sdsval_tag & kValTtlBit));\n  return prev_entry;\n}\n\nuint32_t StringMap::ComputeTtl(string_view field, uint32_t ttl_sec, bool keepttl) const {\n  if (!keepttl)\n    return ttl_sec;\n\n  auto* prev = static_cast<sds>(FindInternal(&field, Hash(&field, 1), 1));\n  if (!prev)\n    return ttl_sec;\n\n  if (!HasTtl(prev))\n    return ttl_sec;\n\n  return ObjExpireTime(prev) - time_now();\n}\n\nbool StringMap::AddOrSkip(std::string_view field, std::string_view value, uint32_t ttl_sec) {\n  uint64_t hashcode = Hash(&field, 1);\n  void* obj = FindInternal(&field, hashcode, 1);  // 1 - string_view\n\n  if (obj)\n    return false;\n\n  auto [newkey, sdsval_tag] = CreateEntry(field, value, time_now(), ttl_sec);\n  AddUnique(newkey, sdsval_tag & kValTtlBit, hashcode);\n  return true;\n}\n\nbool StringMap::Erase(string_view key) {\n  return EraseInternal(&key, 1);\n}\n\nStringMap::SdsEntry StringMap::Extract(string_view key) {\n  return SdsEntry(static_cast<sds>(DetachInternal(const_cast<string_view*>(&key), 1)), DeleteEntry);\n}\n\nvoid StringMap::DeleteEntry(sds entry) {\n  sds value = GetValue(entry);\n  sdsfree(value);\n  sdsfree(entry);\n}\n\nbool StringMap::Contains(string_view field) const {\n  // 1 - means it's string_view. See ObjEqual for details.\n  uint64_t hashcode = Hash(&field, 1);\n  return FindInternal(&field, hashcode, 1) != nullptr;\n}\n\noptional<pair<sds, sds>> StringMap::RandomPair() {\n  // Iteration may remove elements, and so we need to loop if we happen to reach the end\n  while (true) {\n    auto it = begin();\n\n    // It may be that begin() will invalidate all elements, getting us to an Empty() state\n    if (Empty()) {\n      break;\n    }\n\n    it += rand() % UpperBoundSize();\n    if (it != end()) {\n      return std::make_pair(it->first, it->second);\n    }\n  }\n  return nullopt;\n}\n\nvoid StringMap::RandomPairsUnique(unsigned int count, std::vector<sds>& keys,\n                                  std::vector<sds>& vals, bool with_value) {\n  unsigned int total_size = SizeSlow();\n  unsigned int index = 0;\n  if (count > total_size)\n    count = total_size;\n\n  auto itr = begin();\n  uint32_t picked = 0, remaining = count;\n  while (picked < count && itr != end()) {\n    double random_double = ((double)rand()) / RAND_MAX;\n    double threshold = ((double)remaining) / (total_size - index);\n    if (random_double <= threshold) {\n      keys.push_back(itr->first);\n      if (with_value) {\n        vals.push_back(itr->second);\n      }\n      remaining--;\n      picked++;\n    }\n    ++itr;\n    index++;\n  }\n\n  DCHECK(keys.size() == count);\n  if (with_value)\n    DCHECK(vals.size() == count);\n}\n\nvoid StringMap::RandomPairs(unsigned int count, std::vector<sds>& keys, std::vector<sds>& vals,\n                            bool with_value) {\n  using RandomPick = std::pair<unsigned int, unsigned int>;\n  std::vector<RandomPick> picks;\n  unsigned int total_size = SizeSlow();\n\n  for (unsigned int i = 0; i < count; ++i) {\n    RandomPick pick{rand() % total_size, i};\n    picks.push_back(pick);\n  }\n\n  std::sort(picks.begin(), picks.end(), [](auto& x, auto& y) { return x.first < y.first; });\n\n  unsigned int index = picks[0].first, pick_index = 0;\n  auto itr = begin();\n  for (unsigned int i = 0; i < index; ++i)\n    ++itr;\n\n  keys.resize(count);\n  if (with_value)\n    vals.resize(count);\n\n  while (itr != end() && pick_index < count) {\n    auto [key, val] = *itr;\n    while (pick_index < count && index == picks[pick_index].first) {\n      int store_order = picks[pick_index].second;\n      keys[store_order] = key;\n      if (with_value)\n        vals[store_order] = val;\n      ++pick_index;\n    }\n    ++index;\n    ++itr;\n  }\n}\n\nsds StringMap::GetValue(sds key) {\n  char* valptr = key + sdslen(key) + 1;\n  const uint64_t val = absl::little_endian::Load64(valptr);\n  return (sds)(kValMask & val);\n}\n\npair<sds, bool> StringMap::ReallocIfNeeded(void* obj, PageUsage* page_usage) {\n  sds key = (sds)obj;\n  size_t key_len = sdslen(key);\n\n  auto* value_ptr = key + key_len + 1;\n  uint64_t value_tag = absl::little_endian::Load64(value_ptr);\n  sds value = (sds)(uint64_t(value_tag) & kValMask);\n\n  bool realloced_value = false;\n\n  // If the allocated value is underutilized, re-allocate it and update the pointer inside the key\n  if (page_usage->IsPageForObjectUnderUtilized(value)) {\n    size_t value_len = sdslen(value);\n    sds new_value = sdsnewlen(value, value_len);\n    memcpy(new_value, value, value_len);\n    uint64_t new_value_tag = (uint64_t(new_value) & kValMask) | (value_tag & ~kValMask);\n    absl::little_endian::Store64(value_ptr, new_value_tag);\n    sdsfree(value);\n    realloced_value = true;\n  }\n\n  if (!page_usage->IsPageForObjectUnderUtilized(key))\n    return {key, realloced_value};\n\n  size_t space_size = 8 /* value ptr */ + ((value_tag & kValTtlBit) ? 4 : 0) /* optional expiry */;\n\n  sds new_key = AllocSdsWithSpace(key_len, space_size);\n  memcpy(new_key, key, key_len + 1 /* \\0 */ + space_size);\n  sdsfree(key);\n\n  return {new_key, true};\n}\n\nuint64_t StringMap::Hash(const void* obj, uint32_t cookie) const {\n  DCHECK_LT(cookie, 2u);\n\n  if (cookie == 0) {\n    sds s = (sds)obj;\n    return CompactObj::HashCode(string_view{s, sdslen(s)});\n  }\n\n  const string_view* sv = (const string_view*)obj;\n  return CompactObj::HashCode(*sv);\n}\n\nbool StringMap::ObjEqual(const void* left, const void* right, uint32_t right_cookie) const {\n  DCHECK_LT(right_cookie, 2u);\n\n  sds s1 = (sds)left;\n  if (right_cookie == 0) {\n    sds s2 = (sds)right;\n\n    if (sdslen(s1) != sdslen(s2)) {\n      return false;\n    }\n\n    return sdslen(s1) == 0 || memcmp(s1, s2, sdslen(s1)) == 0;\n  }\n\n  const string_view* right_sv = (const string_view*)right;\n  string_view left_sv{s1, sdslen(s1)};\n  return left_sv == (*right_sv);\n}\n\nsize_t StringMap::ObjectAllocSize(const void* obj) const {\n  sds s1 = (sds)obj;\n  size_t res = zmalloc_usable_size(sdsAllocPtr(s1));\n  sds val = GetValue(s1);\n  res += zmalloc_usable_size(sdsAllocPtr(val));\n\n  return res;\n}\n\nuint32_t StringMap::ObjExpireTime(const void* obj) const {\n  sds str = (sds)obj;\n  const char* valptr = str + sdslen(str) + 1;\n\n  uint64_t val = absl::little_endian::Load64(valptr);\n\n  DCHECK(val & kValTtlBit);\n  if (val & kValTtlBit) {\n    return absl::little_endian::Load32(valptr + 8);\n  }\n\n  // Should not reach.\n  return UINT32_MAX;\n}\n\nvoid StringMap::ObjUpdateExpireTime(const void* obj, uint32_t ttl_sec) {\n  return SdsUpdateExpireTime(obj, time_now() + ttl_sec, 8);\n}\n\nvoid StringMap::ObjDelete(void* obj, bool has_ttl) const {\n  sds s1 = (sds)obj;\n  sds value = GetValue(s1);\n  sdsfree(value);\n  sdsfree(s1);\n}\n\nvoid* StringMap::ObjectClone(const void* obj, bool has_ttl, bool add_ttl) const {\n  uint32_t ttl_sec = add_ttl ? 0 : (has_ttl ? ObjExpireTime(obj) : UINT32_MAX);\n  sds str = (sds)obj;\n  auto pair = detail::SdsPair(str, GetValue(str));\n  // Use explicit string_view constructor with length to preserve null characters\n  string_view key_sv(pair->first, sdslen(pair->first));\n  string_view value_sv(pair->second, sdslen(pair->second));\n  auto [newkey, sdsval_tag] = CreateEntry(key_sv, value_sv, time_now(), ttl_sec);\n\n  return (void*)newkey;\n}\n\ndetail::SdsPair StringMap::iterator::BreakToPair(void* obj) {\n  sds f = (sds)obj;\n  return detail::SdsPair(f, GetValue(f));\n}\n\nbool StringMap::iterator::ReallocIfNeeded(PageUsage* page_usage) {\n  auto* ptr = curr_entry_;\n  if (ptr->IsLink()) {\n    ptr = ptr->AsLink();\n  }\n\n  DCHECK(!ptr->IsEmpty());\n  DCHECK(ptr->IsObject());\n\n  auto* obj = ptr->GetObject();\n  auto [new_obj, realloced] = static_cast<StringMap*>(owner_)->ReallocIfNeeded(obj, page_usage);\n  ptr->SetObject(new_obj);\n\n  return realloced;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/string_map.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n#include <optional>\n#include <string_view>\n\n#include \"core/dense_set.h\"\n\nextern \"C\" {\n#include \"redis/sds.h\"\n}\n\nnamespace dfly {\n\nclass PageUsage;\n\nnamespace detail {\n\nclass SdsPair {\n public:\n  SdsPair(sds k, sds v) : first(k), second(v) {\n  }\n\n  SdsPair* operator->() {\n    return this;\n  }\n\n  const SdsPair* operator->() const {\n    return this;\n  }\n\n  operator std::pair<std::string_view, std::string_view>() const {\n    return {{first, sdslen(first)}, {second, sdslen(second)}};\n  }\n\n  const sds first;\n  const sds second;\n};\n\n};  // namespace detail\n\nclass StringMap : public DenseSet {\n public:\n  explicit StringMap(void* unused = nullptr) {\n  }\n\n  ~StringMap();\n\n  class iterator : private DenseSet::IteratorBase {\n    static detail::SdsPair BreakToPair(void* obj);\n\n   public:\n    iterator() : IteratorBase() {\n    }\n\n    explicit iterator(const IteratorBase& o) : IteratorBase(o) {\n    }\n\n    iterator(DenseSet* owner) : IteratorBase(owner, false) {\n    }\n\n    detail::SdsPair operator->() const {\n      void* ptr = curr_entry_->GetObject();\n      return BreakToPair(ptr);\n    }\n\n    detail::SdsPair operator*() const {\n      void* ptr = curr_entry_->GetObject();\n      return BreakToPair(ptr);\n    }\n\n    // Try reducing memory fragmentation of the value by re-allocating. Returns true if\n    // re-allocation happened.\n    bool ReallocIfNeeded(PageUsage* page_usage);\n\n    iterator& operator++() {\n      Advance();\n      return *this;\n    }\n\n    // Advances at most `n` steps, but stops at end.\n    iterator& operator+=(unsigned int n) {\n      for (unsigned int i = 0; i < n; ++i) {\n        if (curr_entry_ == nullptr) {\n          break;\n        }\n\n        Advance();\n      }\n      return *this;\n    }\n\n    bool operator==(const iterator& b) const {\n      if (owner_ == nullptr && b.owner_ == nullptr) {  // to allow comparison with end()\n        return true;\n      }\n      return owner_ == b.owner_ && curr_entry_ == b.curr_entry_;\n    }\n\n    bool operator!=(const iterator& b) const {\n      return !(*this == b);\n    }\n\n    using IteratorBase::ExpiryTime;\n    using IteratorBase::HasExpiry;\n    using IteratorBase::SetExpiryTime;\n  };\n\n  // Adds a new field or updates its value. Returns true if added, false if updated.\n  bool AddOrUpdate(std::string_view field, std::string_view value, uint32_t ttl_sec = UINT32_MAX,\n                   bool keepttl = false);\n\n  // Like AddOrUpdate but on update returns the previous sds entry\n  // instead of deleting it. Caller must free the returned entry via DeleteEntry().\n  // Returns nullptr if a new field was added.\n  sds AddOrExchange(std::string_view field, std::string_view value, uint32_t ttl_sec = UINT32_MAX,\n                    bool keepttl = false);\n\n  // Returns true if field was added\n  // false, if already exists. In that case no update is done.\n  bool AddOrSkip(std::string_view field, std::string_view value, uint32_t ttl_sec = UINT32_MAX);\n\n  bool Erase(std::string_view s1);\n\n  using SdsEntry = std::unique_ptr<char, void (*)(sds)>;\n\n  // Removes and returns the sds entry for the given key without freeing it.\n  // Returns nullptr if the key was not found.\n  SdsEntry Extract(std::string_view s1);\n\n  // Frees a StringMap sds entry (key + embedded value).\n  static void DeleteEntry(sds entry);\n\n  bool Contains(std::string_view s1) const;\n\n  /// @brief  Returns value of the key or an empty iterator if key not found.\n  /// @param key\n  /// @return sds\n  iterator Find(std::string_view member) {\n    return iterator{FindIt(&member, 1)};\n  }\n\n  iterator begin() {\n    return iterator{this};\n  }\n\n  iterator end() {\n    return iterator{};\n  }\n\n  // Returns a random key value pair.\n  // Returns key only if value is a nullptr.\n  std::optional<std::pair<sds, sds>> RandomPair();\n\n  // Randomly selects count of key value pairs. The selections are unique.\n  // if count is larger than the total number of key value pairs, returns\n  // every pair.\n  // Executes at O(n) (i.e. slow for large sets).\n  void RandomPairsUnique(unsigned int count, std::vector<sds>& keys, std::vector<sds>& vals,\n                         bool with_value);\n\n  // Randomly selects count of key value pairs. The select key value pairs\n  // are allowed to have duplications.\n  // Executes at O(n) (i.e. slow for large sets).\n  void RandomPairs(unsigned int count, std::vector<sds>& keys, std::vector<sds>& vals,\n                   bool with_value);\n\n  static sds GetValue(sds key);\n\n private:\n  // If keepttl is specified, performs a lookup for given field and computes ttl by comparing\n  // existing expiry against time_now(). If keepttl is false, or field is not found, or it expires,\n  // or the field has no ttl, returns ttl_sec. set_time() must have been called before computing\n  // ttl.\n  uint32_t ComputeTtl(std::string_view field, uint32_t ttl_sec, bool keepttl) const;\n\n  // Reallocate key and/or value if their pages are underutilized.\n  // Returns new pointer (stays same if key utilization is enough) and if reallocation happened.\n  std::pair<sds, bool> ReallocIfNeeded(void* obj, PageUsage* page_usage);\n\n  uint64_t Hash(const void* obj, uint32_t cookie) const final;\n  bool ObjEqual(const void* left, const void* right, uint32_t right_cookie) const final;\n  size_t ObjectAllocSize(const void* obj) const final;\n  uint32_t ObjExpireTime(const void* obj) const final;\n  void ObjUpdateExpireTime(const void* obj, uint32_t ttl_sec) override;\n  void ObjDelete(void* obj, bool has_ttl) const override;\n  void* ObjectClone(const void* obj, bool has_ttl, bool add_ttl) const final;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/string_map_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/string_map.h\"\n\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <gtest/gtest.h>\n#include <mimalloc.h>\n\n#include <algorithm>\n#include <cstddef>\n#include <memory_resource>\n#include <random>\n#include <string>\n#include <string_view>\n#include <unordered_set>\n#include <vector>\n\n#include \"base/logging.h\"\n#include \"core/compact_object.h\"\n#include \"core/detail/stateless_allocator.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly {\n\nusing namespace std;\n\nclass StringMapTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    auto* tlh = mi_heap_get_backing();\n    init_zmalloc_threadlocal(tlh);\n    InitTLStatelessAllocMR(PMR_NS::get_default_resource());\n  }\n\n  static void TearDownTestSuite() {\n    mi_heap_collect(mi_heap_get_backing(), true);\n\n    auto cb_visit = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,\n                       size_t block_size, void* arg) {\n      LOG(ERROR) << \"Unfreed allocations: block_size \" << block_size\n                 << \", allocated: \" << area->used * block_size;\n      return true;\n    };\n\n    mi_heap_visit_blocks(mi_heap_get_backing(), false /* do not visit all blocks*/, cb_visit,\n                         nullptr);\n  }\n\n  StringMapTest() : mi_alloc_(mi_heap_get_backing()) {\n  }\n\n  void SetUp() override {\n    sm_.reset(new StringMap(&mi_alloc_));\n  }\n\n  void TearDown() override {\n    sm_.reset();\n    EXPECT_EQ(zmalloc_used_memory_tl, 0);\n  }\n\n  MiMemoryResource mi_alloc_;\n  std::unique_ptr<StringMap> sm_;\n};\n\nTEST_F(StringMapTest, Basic) {\n  EXPECT_TRUE(sm_->AddOrUpdate(\"foo\", \"bar\"));\n  EXPECT_TRUE(sm_->Contains(\"foo\"));\n  auto it = sm_->Find(\"foo\");\n  EXPECT_STREQ(\"bar\", it->second);\n\n  it = sm_->begin();\n  EXPECT_STREQ(\"foo\", it->first);\n  EXPECT_STREQ(\"bar\", it->second);\n  ++it;\n  EXPECT_TRUE(it == sm_->end());\n\n  for (const auto& k_v : *sm_) {\n    EXPECT_STREQ(\"foo\", k_v.first);\n    EXPECT_STREQ(\"bar\", k_v.second);\n  }\n\n  size_t sz = sm_->ObjMallocUsed();\n  EXPECT_FALSE(sm_->AddOrUpdate(\"foo\", \"baraaaaaaaaaaaa2\"));\n  EXPECT_GT(sm_->ObjMallocUsed(), sz);\n  it = sm_->begin();\n  EXPECT_STREQ(\"baraaaaaaaaaaaa2\", it->second);\n\n  EXPECT_FALSE(sm_->AddOrSkip(\"foo\", \"bar2\"));\n  EXPECT_STREQ(\"baraaaaaaaaaaaa2\", it->second);\n}\n\nTEST_F(StringMapTest, EmptyFind) {\n  sm_->Find(\"bar\");\n}\n\nTEST_F(StringMapTest, Ttl) {\n  EXPECT_TRUE(sm_->AddOrUpdate(\"bla\", \"val1\", 1));\n  EXPECT_FALSE(sm_->AddOrUpdate(\"bla\", \"val2\", 1));\n  sm_->set_time(1);\n  EXPECT_TRUE(sm_->AddOrUpdate(\"bla\", \"val2\", 1));\n  EXPECT_EQ(1u, sm_->UpperBoundSize());\n\n  EXPECT_FALSE(sm_->AddOrSkip(\"bla\", \"val3\", 2));\n\n  // set ttl to 2, meaning that the key will expire at time 3.\n  EXPECT_TRUE(sm_->AddOrSkip(\"bla2\", \"val3\", 2));\n  EXPECT_TRUE(sm_->Contains(\"bla2\"));\n\n  sm_->set_time(3);\n  auto it = sm_->begin();\n  EXPECT_TRUE(it == sm_->end());\n}\n\nTEST_F(StringMapTest, IterateExpired) {\n  EXPECT_TRUE(sm_->AddOrUpdate(\"k1\", \"v1\", 1));\n  EXPECT_TRUE(sm_->AddOrUpdate(\"k2\", \"v2\", 1));\n  sm_->set_time(1);\n  auto it = sm_->begin();\n  it += 1;\n  EXPECT_EQ(it, sm_->end());\n}\n\nTEST_F(StringMapTest, SetFieldExpireHasExpiry) {\n  EXPECT_TRUE(sm_->AddOrUpdate(\"k1\", \"v1\", 5));\n  auto k = sm_->Find(\"k1\");\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 5);\n  k.SetExpiryTime(1);\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 1);\n}\n\nTEST_F(StringMapTest, SetFieldExpireNoHasExpiry) {\n  EXPECT_TRUE(sm_->AddOrUpdate(\"k1\", \"v1\"));\n  auto k = sm_->Find(\"k1\");\n  EXPECT_FALSE(k.HasExpiry());\n  k.SetExpiryTime(1);\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 1);\n}\n\nTEST_F(StringMapTest, Bug3973) {\n  for (unsigned i = 0; i < 8; i++) {\n    EXPECT_TRUE(sm_->AddOrUpdate(to_string(i), \"val\"));\n  }\n  for (unsigned i = 0; i < 8; i++) {\n    auto k = sm_->Find(to_string(i));\n    ASSERT_FALSE(k.HasExpiry());\n    k.SetExpiryTime(1);\n    EXPECT_EQ(k.ExpiryTime(), 1);\n  }\n  for (unsigned i = 100; i < 1000; i++) {\n    EXPECT_TRUE(sm_->AddOrUpdate(to_string(i), \"val\"));\n  }\n\n  // make sure the first 8 keys have expiry set\n  for (unsigned i = 0; i < 8; i++) {\n    auto k = sm_->Find(to_string(i));\n    ASSERT_TRUE(k.HasExpiry());\n    EXPECT_EQ(k.ExpiryTime(), 1);\n  }\n}\n\nTEST_F(StringMapTest, Bug3984) {\n  for (unsigned i = 0; i < 6; i++) {\n    EXPECT_TRUE(sm_->AddOrUpdate(to_string(i), \"val\"));\n  }\n  for (unsigned i = 0; i < 6; i++) {\n    auto k = sm_->Find(to_string(i));\n    ASSERT_FALSE(k.HasExpiry());\n    k.SetExpiryTime(1);\n    EXPECT_EQ(k.ExpiryTime(), 1);\n  }\n\n  for (unsigned i = 0; i < 6; i++) {\n    EXPECT_FALSE(sm_->AddOrUpdate(to_string(i), \"val\"));\n  }\n}\n\nunsigned total_wasted_memory = 0;\n\nTEST_F(StringMapTest, ReallocIfNeeded) {\n  auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); };\n\n  auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,\n                        size_t block_size, void* arg) {\n    size_t used = block_size * area->used;\n    total_wasted_memory += area->committed - used;\n    return true;\n  };\n\n  for (size_t i = 0; i < 10'000; i++)\n    sm_->AddOrUpdate(build_str(i), build_str(i + 1), i * 10 + 1);\n\n  for (size_t i = 0; i < 10'000; i++) {\n    if (i % 10 == 0)\n      continue;\n    sm_->Erase(build_str(i));\n  }\n\n  mi_heap_collect(mi_heap_get_backing(), true);\n  mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);\n  size_t wasted_before = total_wasted_memory;\n\n  size_t underutilized = 0;\n  PageUsage page_usage{CollectPageStats::NO, 0.9};\n  for (auto it = sm_->begin(); it != sm_->end(); ++it) {\n    underutilized += page_usage.IsPageForObjectUnderUtilized(it->first);\n    it.ReallocIfNeeded(&page_usage);\n  }\n  // Check there are underutilized pages\n  CHECK_GT(underutilized, 0u);\n\n  total_wasted_memory = 0;\n  mi_heap_collect(mi_heap_get_backing(), true);\n  mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);\n  size_t wasted_after = total_wasted_memory;\n\n  // Check we waste significanlty less now\n  EXPECT_GT(wasted_before, wasted_after * 2);\n\n  EXPECT_EQ(sm_->UpperBoundSize(), 1000);\n  for (size_t i = 0; i < 1000; i++)\n    EXPECT_EQ(sm_->Find(build_str(i * 10))->second, build_str(i * 10 + 1));\n}\n\nTEST_F(StringMapTest, ExpiryChangesSize) {\n  sm_->AddOrUpdate(\"field\", \"value\");\n  const size_t old_size = sm_->ObjMallocUsed();\n\n  auto it = sm_->Find(\"field\");\n  it.SetExpiryTime(1);\n\n  const size_t new_size = sm_->ObjMallocUsed();\n  EXPECT_LT(old_size, new_size);\n\n  sm_->AddOrUpdate(\"field\", \"value\", 1);\n  EXPECT_EQ(new_size, sm_->ObjMallocUsed());\n}\n\nTEST_F(StringMapTest, ExpiryWithMaxAndKeepTTL) {\n  sm_->AddOrUpdate(\"field\", \"value\", 100);\n  auto k = sm_->Find(\"field\");\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 100);\n\n  // ttl is copied from prev. if max value is supplied\n  sm_->AddOrUpdate(\"field\", \"value\", UINT32_MAX, true);\n  k = sm_->Find(\"field\");\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 100);\n\n  // max ttl value results in no expiry without keepttl\n  sm_->AddOrUpdate(\"field\", \"value\", UINT32_MAX);\n  EXPECT_FALSE(sm_->Find(\"field\").HasExpiry());\n\n  // No prev. expiry, supplied ttl_sec value is used\n  sm_->AddOrUpdate(\"field\", \"value\", 10, true);\n  k = sm_->Find(\"field\");\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 10);\n\n  // object removed while adding due to expiry\n  sm_->set_time(11);\n  sm_->AddOrUpdate(\"field\", \"value\", UINT32_MAX, true);\n  k = sm_->Find(\"field\");\n  EXPECT_FALSE(k.HasExpiry());\n}\n\nTEST_F(StringMapTest, ExtractExisting) {\n  sm_->AddOrUpdate(\"f1\", \"v1\");\n  sm_->AddOrUpdate(\"f2\", \"v2\");\n  EXPECT_EQ(sm_->UpperBoundSize(), 2u);\n\n  auto entry = sm_->Extract(\"f1\");\n  ASSERT_TRUE(entry);\n\n  // Verify the extracted entry has the correct value\n  sds val = StringMap::GetValue(entry.get());\n  EXPECT_EQ(string_view(val, sdslen(val)), \"v1\");\n\n  // Verify it was removed from the map\n  EXPECT_EQ(sm_->UpperBoundSize(), 1u);\n  EXPECT_FALSE(sm_->Contains(\"f1\"));\n  EXPECT_TRUE(sm_->Contains(\"f2\"));\n}\n\nTEST_F(StringMapTest, ExtractNonExisting) {\n  sm_->AddOrUpdate(\"f1\", \"v1\");\n  auto entry = sm_->Extract(\"no_such_key\");\n  EXPECT_FALSE(entry);\n  EXPECT_EQ(sm_->UpperBoundSize(), 1u);\n}\n\nTEST_F(StringMapTest, AddOrExchangeNew) {\n  // Adding a new field returns nullptr (no previous entry)\n  sds prev = sm_->AddOrExchange(\"f1\", \"v1\");\n  EXPECT_EQ(prev, nullptr);\n  EXPECT_TRUE(sm_->Contains(\"f1\"));\n  EXPECT_STREQ(sm_->Find(\"f1\")->second, \"v1\");\n}\n\nTEST_F(StringMapTest, AddOrExchangeReplace) {\n  sm_->AddOrUpdate(\"f1\", \"old_value\");\n  EXPECT_EQ(sm_->UpperBoundSize(), 1u);\n\n  sds prev = sm_->AddOrExchange(\"f1\", \"new_value\");\n  ASSERT_NE(prev, nullptr);\n\n  // Verify the extracted entry has the old value\n  sds val = StringMap::GetValue(prev);\n  EXPECT_EQ(string_view(val, sdslen(val)), \"old_value\");\n\n  // Verify map now has the new value\n  EXPECT_STREQ(sm_->Find(\"f1\")->second, \"new_value\");\n  EXPECT_EQ(sm_->UpperBoundSize(), 1u);\n\n  StringMap::DeleteEntry(prev);\n}\n\nTEST_F(StringMapTest, AddOrExchangeWithTtl) {\n  sm_->AddOrUpdate(\"f1\", \"v1\", 100);\n\n  sds prev = sm_->AddOrExchange(\"f1\", \"v2\", 200);\n  ASSERT_NE(prev, nullptr);\n\n  sds val = StringMap::GetValue(prev);\n  EXPECT_EQ(string_view(val, sdslen(val)), \"v1\");\n\n  // Make sure new entry has correct value and ttl\n  auto it = sm_->Find(\"f1\");\n  EXPECT_STREQ(it->second, \"v2\");\n  EXPECT_TRUE(it.HasExpiry());\n  EXPECT_EQ(it.ExpiryTime(), 200u);\n\n  StringMap::DeleteEntry(prev);\n}\n\nTEST_F(StringMapTest, ExtractMultiple) {\n  for (unsigned i = 0; i < 20; i++) {\n    sm_->AddOrUpdate(to_string(i), \"val\" + to_string(i));\n  }\n  EXPECT_EQ(sm_->UpperBoundSize(), 20u);\n\n  // Extract every other entry\n  vector<StringMap::SdsEntry> extracted;\n  for (unsigned i = 0; i < 20; i += 2) {\n    auto entry = sm_->Extract(to_string(i));\n    ASSERT_TRUE(entry);\n    extracted.push_back(std::move(entry));\n  }\n\n  EXPECT_EQ(sm_->UpperBoundSize(), 10u);\n\n  // Verify remaining entries\n  for (unsigned i = 1; i < 20; i += 2) {\n    EXPECT_TRUE(sm_->Contains(to_string(i)));\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/string_set.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/string_set.h\"\n\n#include \"absl/flags/flag.h\"\n#include \"core/compact_object.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"core/sds_utils.h\"\n\nextern \"C\" {\n#include \"redis/sds.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#include \"base/logging.h\"\n\nusing namespace std;\n\nnamespace dfly {\n\nnamespace {\n\ninline bool MayHaveTtl(sds s) {\n  char* alloc_ptr = (char*)sdsAllocPtr(s);\n  return sdslen(s) + 1 + 4 <= zmalloc_usable_size(alloc_ptr);\n}\n\nsds AllocImmutableWithTtl(uint32_t len, uint32_t at) {\n  sds res = AllocSdsWithSpace(len, sizeof(at));\n  absl::little_endian::Store32(res + len + 1, at);  // Save TTL\n\n  return res;\n}\n\n}  // namespace\n\nStringSet::~StringSet() {\n  Clear();\n}\n\nbool StringSet::Add(string_view src, uint32_t ttl_sec) {\n  uint64_t hash = Hash(&src, 1);\n  void* prev = FindInternal(&src, hash, 1);\n  if (prev != nullptr) {\n    return false;\n  }\n\n  sds newsds = MakeSetSds(src, ttl_sec);\n  bool has_ttl = ttl_sec != UINT32_MAX;\n  AddUnique(newsds, has_ttl, hash);\n  return true;\n}\n\nunsigned StringSet::AddMany(absl::Span<std::string_view> span, uint32_t ttl_sec, bool keepttl) {\n  std::string_view views[kMaxBatchLen];\n  unsigned res = 0;\n  if (BucketCount() < span.size()) {\n    Reserve(span.size());\n  }\n\n  while (span.size() >= kMaxBatchLen) {\n    for (size_t i = 0; i < kMaxBatchLen; i++)\n      views[i] = span[i];\n\n    span.remove_prefix(kMaxBatchLen);\n    res += AddBatch(absl::MakeSpan(views), ttl_sec, keepttl);\n  }\n\n  if (span.size()) {\n    for (size_t i = 0; i < span.size(); i++)\n      views[i] = span[i];\n\n    res += AddBatch(absl::MakeSpan(views, span.size()), ttl_sec, keepttl);\n  }\n  return res;\n}\n\nunsigned StringSet::AddBatch(absl::Span<std::string_view> span, uint32_t ttl_sec, bool keepttl) {\n  uint64_t hash[kMaxBatchLen];\n  bool has_ttl = ttl_sec != UINT32_MAX;\n  unsigned count = span.size();\n  unsigned res = 0;\n\n  DCHECK_LE(count, kMaxBatchLen);\n\n  for (size_t i = 0; i < count; i++) {\n    hash[i] = CompactObj::HashCode(span[i]);\n    Prefetch(hash[i]);\n  }\n\n  for (unsigned i = 0; i < count; ++i) {\n    void* prev = FindInternal(&span[i], hash[i], 1);\n    if (prev == nullptr) {\n      ++res;\n      sds field = MakeSetSds(span[i], ttl_sec);\n      AddUnique(field, has_ttl, hash[i]);\n    } else if (has_ttl && !keepttl) {\n      ObjUpdateExpireTime(prev, ttl_sec);\n    }\n  }\n\n  return res;\n}\n\nStringSet::iterator StringSet::GetRandomMember() {\n  return iterator{DenseSet::GetRandomIterator()};\n}\n\nstd::optional<std::string> StringSet::Pop() {\n  sds str = (sds)PopInternal();\n\n  if (str == nullptr) {\n    return std::nullopt;\n  }\n\n  std::string ret{str, sdslen(str)};\n  sdsfree(str);\n\n  return ret;\n}\n\nuint32_t StringSet::Scan(uint32_t cursor, const std::function<void(const sds)>& func) const {\n  return DenseSet::Scan(cursor, [func](const void* ptr) { func((sds)ptr); });\n}\n\nuint64_t StringSet::Hash(const void* ptr, uint32_t cookie) const {\n  DCHECK_LT(cookie, 2u);\n\n  if (cookie == 0) {\n    sds s = (sds)ptr;\n    return CompactObj::HashCode(string_view{s, sdslen(s)});\n  }\n\n  const string_view* sv = (const string_view*)ptr;\n  return CompactObj::HashCode(*sv);\n}\n\nbool StringSet::ObjEqual(const void* left, const void* right, uint32_t right_cookie) const {\n  DCHECK_LT(right_cookie, 2u);\n\n  sds s1 = (sds)left;\n\n  if (right_cookie == 0) {\n    sds s2 = (sds)right;\n\n    if (sdslen(s1) != sdslen(s2)) {\n      return false;\n    }\n\n    return sdslen(s1) == 0 || memcmp(s1, s2, sdslen(s1)) == 0;\n  }\n\n  const string_view* right_sv = (const string_view*)right;\n  string_view left_sv{s1, sdslen(s1)};\n  return left_sv == (*right_sv);\n}\n\nsize_t StringSet::ObjectAllocSize(const void* s1) const {\n  return zmalloc_usable_size(sdsAllocPtr((sds)s1));\n}\n\nuint32_t StringSet::ObjExpireTime(const void* str) const {\n  sds s = (sds)str;\n  DCHECK(MayHaveTtl(s));\n\n  char* ttlptr = s + sdslen(s) + 1;\n  return absl::little_endian::Load32(ttlptr);\n}\n\nvoid StringSet::ObjUpdateExpireTime(const void* obj, uint32_t ttl_sec) {\n  return SdsUpdateExpireTime(obj, time_now() + ttl_sec, 0);\n}\n\nvoid StringSet::ObjDelete(void* obj, bool has_ttl) const {\n  sdsfree((sds)obj);\n}\n\nvoid* StringSet::ObjectClone(const void* obj, bool has_ttl, bool add_ttl) const {\n  sds src = (sds)obj;\n  string_view sv{src, sdslen(src)};\n  uint32_t ttl_sec = add_ttl ? 0 : (has_ttl ? ObjExpireTime(obj) : UINT32_MAX);\n  return (void*)MakeSetSds(sv, ttl_sec);\n}\n\nsds StringSet::MakeSetSds(string_view src, uint32_t ttl_sec) const {\n  if (ttl_sec != UINT32_MAX) {\n    uint32_t at = time_now() + ttl_sec;\n\n    sds newsds = AllocImmutableWithTtl(src.size(), at);\n    if (!src.empty())\n      memcpy(newsds, src.data(), src.size());\n    return newsds;\n  }\n\n  return sdsnewlen(src.data(), src.size());\n}\n\n// Does not release obj. Callers must deallocate with sdsfree explicitly\npair<sds, bool> StringSet::DuplicateEntryIfFragmented(void* obj, PageUsage* page_usage) {\n  sds key = (sds)obj;\n\n  if (!page_usage->IsPageForObjectUnderUtilized(key))\n    return {key, false};\n\n  size_t key_len = sdslen(key);\n  bool has_ttl = MayHaveTtl(key);\n\n  if (has_ttl) {\n    sds res = AllocSdsWithSpace(key_len, sizeof(uint32_t));\n    std::memcpy(res, key, key_len + sizeof(uint32_t));\n    return {res, true};\n  }\n\n  return {sdsnewlen(key, key_len), true};\n}\n\nbool StringSet::iterator::ReallocIfNeeded(PageUsage* page_usage) {\n  auto* ptr = curr_entry_;\n  if (ptr->IsLink()) {\n    ptr = ptr->AsLink();\n  }\n\n  DCHECK(!ptr->IsEmpty());\n  DCHECK(ptr->IsObject());\n\n  auto* obj = ptr->GetObject();\n  auto [new_obj, realloced] =\n      static_cast<StringSet*>(owner_)->DuplicateEntryIfFragmented(obj, page_usage);\n\n  if (realloced) {\n    ptr->SetObject(new_obj);\n    sdsfree((sds)obj);\n  }\n\n  return realloced;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/string_set.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/types/span.h>\n\n#include <cstdint>\n#include <functional>\n#include <optional>\n#include <string_view>\n\n#include \"core/dense_set.h\"\n\nextern \"C\" {\n#include \"redis/sds.h\"\n}\n\nnamespace dfly {\nclass PageUsage;\n\nclass StringSet : public DenseSet {\n public:\n  StringSet() = default;\n\n  ~StringSet();\n\n  // Returns true if elem was added.\n  bool Add(std::string_view s1, uint32_t ttl_sec = UINT32_MAX);\n\n  unsigned AddMany(absl::Span<std::string_view> span, uint32_t ttl_sec, bool keepttl);\n\n  bool Erase(std::string_view str) {\n    return EraseInternal(&str, 1);\n  }\n\n  bool Contains(std::string_view s1) const {\n    return FindInternal(&s1, Hash(&s1, 1), 1) != nullptr;\n  }\n\n  class iterator : private IteratorBase {\n   public:\n    using iterator_category = std::forward_iterator_tag;\n    using difference_type = std::ptrdiff_t;\n    using value_type = sds;\n    using pointer = sds*;\n    using reference = sds&;\n\n    explicit iterator(const IteratorBase& o) : IteratorBase(o) {\n    }\n\n    iterator() : IteratorBase() {\n    }\n\n    iterator(DenseSet* set) : IteratorBase(set, false) {\n    }\n\n    iterator& operator++() {\n      Advance();\n      return *this;\n    }\n\n    bool operator==(const iterator& b) const {\n      if (owner_ == nullptr && b.owner_ == nullptr) {  // to allow comparison with end()\n        return true;\n      }\n      return owner_ == b.owner_ && curr_entry_ == b.curr_entry_;\n    }\n\n    bool operator!=(const iterator& b) const {\n      return !(*this == b);\n    }\n\n    value_type operator*() {\n      return (value_type)curr_entry_->GetObject();\n    }\n\n    value_type operator->() {\n      return (value_type)curr_entry_->GetObject();\n    }\n\n    using IteratorBase::ExpiryTime;\n    using IteratorBase::HasExpiry;\n    using IteratorBase::SetExpiryTime;\n\n    // Try reducing memory fragmentation of the value by re-allocating. Returns true if\n    // re-allocation happened.\n    bool ReallocIfNeeded(PageUsage* page_usage);\n  };\n\n  iterator begin() {\n    return iterator{this};\n  }\n\n  iterator end() {\n    return iterator{};\n  }\n\n  // See DenseSet::GetRandomIterator\n  iterator GetRandomMember();\n\n  std::optional<std::string> Pop();\n\n  uint32_t Scan(uint32_t, const std::function<void(sds)>&) const;\n\n  iterator Find(std::string_view member) {\n    return iterator{FindIt(&member, 1)};\n  }\n\n protected:\n  uint64_t Hash(const void* ptr, uint32_t cookie) const override;\n\n  unsigned AddBatch(absl::Span<std::string_view> span, uint32_t ttl_sec, bool keepttl);\n\n  bool ObjEqual(const void* left, const void* right, uint32_t right_cookie) const override;\n\n  size_t ObjectAllocSize(const void* s1) const override;\n  uint32_t ObjExpireTime(const void* obj) const override;\n  void ObjUpdateExpireTime(const void* obj, uint32_t ttl_sec) override;\n  void ObjDelete(void* obj, bool has_ttl) const override;\n  void* ObjectClone(const void* obj, bool has_ttl, bool add_ttl) const override;\n  sds MakeSetSds(std::string_view src, uint32_t ttl_sec) const;\n\n private:\n  std::pair<sds, bool> DuplicateEntryIfFragmented(void* obj, PageUsage* page_usage);\n};\n\n}  // end namespace dfly\n"
  },
  {
    "path": "src/core/string_set_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/string_set.h\"\n\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <mimalloc.h>\n\n#include <algorithm>\n#include <memory_resource>\n#include <random>\n#include <string>\n#include <string_view>\n#include <unordered_set>\n#include <vector>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/compact_object.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"redis/sds.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly {\n\nusing namespace std;\nusing absl::StrCat;\n\nclass DenseSetAllocator : public PMR_NS::memory_resource {\n public:\n  bool all_freed() const {\n    return alloced_ == 0;\n  }\n\n  void* do_allocate(size_t bytes, size_t alignment) override {\n    alloced_ += bytes;\n    void* p = PMR_NS::new_delete_resource()->allocate(bytes, alignment);\n    return p;\n  }\n\n  void do_deallocate(void* p, size_t bytes, size_t alignment) override {\n    alloced_ -= bytes;\n    return PMR_NS::new_delete_resource()->deallocate(p, bytes, alignment);\n  }\n\n  bool do_is_equal(const PMR_NS::memory_resource& other) const noexcept override {\n    return PMR_NS::new_delete_resource()->is_equal(other);\n  }\n\n private:\n  size_t alloced_ = 0;\n};\n\nclass StringSetTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    auto* tlh = mi_heap_get_backing();\n    init_zmalloc_threadlocal(tlh);\n    InitTLStatelessAllocMR(PMR_NS::get_default_resource());\n  }\n\n  static void TearDownTestSuite() {\n  }\n\n  void SetUp() override {\n    ss_ = new StringSet;\n    generator_.seed(0);\n  }\n\n  void TearDown() override {\n    delete ss_;\n\n    // ensure there are no memory leaks after every test\n    EXPECT_TRUE(alloc_.all_freed());\n    EXPECT_EQ(zmalloc_used_memory_tl, 0);\n  }\n\n  StringSet* ss_;\n  DenseSetAllocator alloc_;\n  mt19937 generator_;\n};\n\nTEST_F(StringSetTest, Basic) {\n  EXPECT_TRUE(ss_->Add(\"foo\"sv));\n  EXPECT_TRUE(ss_->Add(\"bar\"sv));\n  EXPECT_FALSE(ss_->Add(\"foo\"sv));\n  EXPECT_FALSE(ss_->Add(\"bar\"sv));\n  EXPECT_TRUE(ss_->Contains(\"foo\"sv));\n  EXPECT_TRUE(ss_->Contains(\"bar\"sv));\n  EXPECT_EQ(2, ss_->UpperBoundSize());\n}\n\nTEST_F(StringSetTest, StandardAddErase) {\n  EXPECT_TRUE(ss_->Add(\"@@@@@@@@@@@@@@@@\"));\n  EXPECT_TRUE(ss_->Add(\"A@@@@@@@@@@@@@@@\"));\n  EXPECT_TRUE(ss_->Add(\"AA@@@@@@@@@@@@@@\"));\n  EXPECT_TRUE(ss_->Add(\"AAA@@@@@@@@@@@@@\"));\n  EXPECT_TRUE(ss_->Add(\"AAAAAAAAA@@@@@@@\"));\n  EXPECT_TRUE(ss_->Add(\"AAAAAAAAAA@@@@@@\"));\n  EXPECT_TRUE(ss_->Add(\"AAAAAAAAAAAAAAA@\"));\n  EXPECT_TRUE(ss_->Add(\"AAAAAAAAAAAAAAAA\"));\n  EXPECT_TRUE(ss_->Add(\"AAAAAAAAAAAAAAAD\"));\n  EXPECT_TRUE(ss_->Add(\"BBBBBAAAAAAAAAAA\"));\n  EXPECT_TRUE(ss_->Add(\"BBBBBBBBAAAAAAAA\"));\n  EXPECT_TRUE(ss_->Add(\"CCCCCBBBBBBBBBBB\"));\n\n  // Remove link in the middle of chain\n  EXPECT_TRUE(ss_->Erase(\"BBBBBBBBAAAAAAAA\"));\n  // Remove start of a chain\n  EXPECT_TRUE(ss_->Erase(\"CCCCCBBBBBBBBBBB\"));\n  // Remove end of link\n  EXPECT_TRUE(ss_->Erase(\"AAA@@@@@@@@@@@@@\"));\n  // Remove only item in chain\n  EXPECT_TRUE(ss_->Erase(\"AA@@@@@@@@@@@@@@\"));\n  EXPECT_TRUE(ss_->Erase(\"AAAAAAAAA@@@@@@@\"));\n  EXPECT_TRUE(ss_->Erase(\"AAAAAAAAAA@@@@@@\"));\n  EXPECT_TRUE(ss_->Erase(\"AAAAAAAAAAAAAAA@\"));\n}\n\nTEST_F(StringSetTest, DisplacedBug) {\n  string_view vals[] = {\"imY\", \"OVl\", \"NhH\", \"BCe\", \"YDL\", \"lpb\",\n                        \"nhF\", \"xod\", \"zYR\", \"PSa\", \"hce\", \"cTR\"};\n  ss_->AddMany(absl::MakeSpan(vals), UINT32_MAX, false);\n\n  ss_->Add(\"fIc\");\n  ss_->Erase(\"YDL\");\n  ss_->Add(\"fYs\");\n  ss_->Erase(\"hce\");\n  ss_->Erase(\"nhF\");\n  ss_->Add(\"dye\");\n  ss_->Add(\"xZT\");\n  ss_->Add(\"LVK\");\n  ss_->Erase(\"zYR\");\n  ss_->Erase(\"fYs\");\n  ss_->Add(\"ueB\");\n  ss_->Erase(\"PSa\");\n  ss_->Erase(\"OVl\");\n  ss_->Add(\"cga\");\n  ss_->Add(\"too\");\n  ss_->Erase(\"ueB\");\n  ss_->Add(\"HZe\");\n  ss_->Add(\"oQn\");\n  ss_->Erase(\"too\");\n  ss_->Erase(\"HZe\");\n  ss_->Erase(\"xZT\");\n  ss_->Erase(\"cga\");\n  ss_->Erase(\"cTR\");\n  ss_->Erase(\"BCe\");\n  ss_->Add(\"eua\");\n  ss_->Erase(\"lpb\");\n  ss_->Add(\"OXK\");\n  ss_->Add(\"QmO\");\n  ss_->Add(\"SzV\");\n  ss_->Erase(\"QmO\");\n  ss_->Add(\"jbe\");\n  ss_->Add(\"BPN\");\n  ss_->Add(\"OfH\");\n  ss_->Add(\"Muf\");\n  ss_->Add(\"CwP\");\n  ss_->Erase(\"Muf\");\n  ss_->Erase(\"xod\");\n  ss_->Add(\"Cis\");\n  ss_->Add(\"Xvd\");\n  ss_->Erase(\"SzV\");\n  ss_->Erase(\"eua\");\n  ss_->Add(\"DGb\");\n  ss_->Add(\"leD\");\n  ss_->Add(\"MVX\");\n  ss_->Add(\"HPq\");\n}\n\nstatic string random_string(mt19937& rand, unsigned len) {\n  const string_view alpanum = \"1234567890abcdefghijklmnopqrstuvwxyz\";\n  string ret;\n  ret.reserve(len);\n\n  for (size_t i = 0; i < len; ++i) {\n    ret += alpanum[rand() % alpanum.size()];\n  }\n\n  return ret;\n}\n\nTEST_F(StringSetTest, Resizing) {\n  constexpr size_t num_strs = 4096;\n  unordered_set<string> strs;\n  while (strs.size() != num_strs) {\n    auto str = random_string(generator_, 10);\n    strs.insert(str);\n  }\n\n  unsigned size = 0;\n  for (auto it = strs.begin(); it != strs.end(); ++it) {\n    const auto& str = *it;\n    EXPECT_TRUE(ss_->Add(str, 1));\n    EXPECT_EQ(ss_->UpperBoundSize(), size + 1);\n\n    // make sure we haven't lost any items after a grow\n    // which happens every power of 2\n    if ((size & (size - 1)) == 0) {\n      for (auto j = strs.begin(); j != it; ++j) {\n        const auto& str = *j;\n        auto it = ss_->Find(str);\n        ASSERT_TRUE(it != ss_->end());\n        EXPECT_TRUE(it.HasExpiry());\n        EXPECT_EQ(it.ExpiryTime(), ss_->time_now() + 1);\n      }\n    }\n    ++size;\n  }\n}\n\nTEST_F(StringSetTest, SimpleScan) {\n  unordered_set<string_view> info = {\"foo\", \"bar\"};\n  unordered_set<string_view> seen;\n\n  for (auto str : info) {\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  uint32_t cursor = 0;\n  do {\n    cursor = ss_->Scan(cursor, [&](const sds ptr) {\n      sds s = (sds)ptr;\n      string_view str{s, sdslen(s)};\n      EXPECT_TRUE(info.count(str));\n      seen.insert(str);\n    });\n  } while (cursor != 0);\n\n  EXPECT_TRUE(seen.size() == info.size() && equal(seen.begin(), seen.end(), info.begin()));\n}\n\n// Ensure REDIS scan guarantees are met\nTEST_F(StringSetTest, ScanGuarantees) {\n  unordered_set<string_view> to_be_seen = {\"foo\", \"bar\"};\n  unordered_set<string_view> not_be_seen = {\"AAA\", \"BBB\"};\n  unordered_set<string_view> maybe_seen = {\"AA@@@@@@@@@@@@@@\", \"AAA@@@@@@@@@@@@@\",\n                                           \"AAAAAAAAA@@@@@@@\", \"AAAAAAAAAA@@@@@@\"};\n  unordered_set<string_view> seen;\n\n  auto scan_callback = [&](const sds ptr) {\n    sds s = (sds)ptr;\n    string_view str{s, sdslen(s)};\n    EXPECT_TRUE(to_be_seen.count(str) || maybe_seen.count(str));\n    EXPECT_FALSE(not_be_seen.count(str));\n    if (to_be_seen.count(str)) {\n      seen.insert(str);\n    }\n  };\n\n  EXPECT_EQ(ss_->Scan(0, scan_callback), 0);\n\n  for (auto str : not_be_seen) {\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  for (auto str : not_be_seen) {\n    EXPECT_TRUE(ss_->Erase(str));\n  }\n\n  for (auto str : to_be_seen) {\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  // should reach at least the first item in the set\n  uint32_t cursor = ss_->Scan(0, scan_callback);\n\n  for (auto str : maybe_seen) {\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  while (cursor != 0) {\n    cursor = ss_->Scan(cursor, scan_callback);\n  }\n\n  EXPECT_TRUE(seen.size() == to_be_seen.size());\n}\n\nTEST_F(StringSetTest, IntOnly) {\n  constexpr size_t num_ints = 8192;\n  unordered_set<unsigned int> numbers;\n  for (size_t i = 0; i < num_ints; ++i) {\n    numbers.insert(i);\n    EXPECT_TRUE(ss_->Add(to_string(i)));\n  }\n\n  for (size_t i = 0; i < num_ints; ++i) {\n    ASSERT_FALSE(ss_->Add(to_string(i)));\n  }\n\n  size_t num_remove = generator_() % 4096;\n  unordered_set<string> removed;\n\n  for (size_t i = 0; i < num_remove; ++i) {\n    auto remove_int = generator_() % num_ints;\n    auto remove = to_string(remove_int);\n    if (numbers.count(remove_int)) {\n      ASSERT_TRUE(ss_->Contains(remove)) << remove_int;\n      EXPECT_TRUE(ss_->Erase(remove));\n      numbers.erase(remove_int);\n    } else {\n      EXPECT_FALSE(ss_->Erase(remove));\n    }\n\n    EXPECT_FALSE(ss_->Contains(remove));\n    removed.insert(remove);\n  }\n\n  size_t expected_seen = 0;\n  auto scan_callback = [&](const sds ptr) {\n    string str{ptr, sdslen(ptr)};\n    EXPECT_FALSE(removed.count(str));\n\n    if (numbers.count(atoi(str.data()))) {\n      ++expected_seen;\n    }\n  };\n\n  uint32_t cursor = 0;\n  do {\n    cursor = ss_->Scan(cursor, scan_callback);\n    // randomly throw in some new numbers\n    uint32_t val = generator_();\n    VLOG(1) << \"Val \" << val;\n    ss_->Add(to_string(val));\n  } while (cursor != 0);\n\n  EXPECT_GE(expected_seen + removed.size(), num_ints);\n}\n\nTEST_F(StringSetTest, XtremeScanGrow) {\n  unordered_set<string> to_see, force_grow, seen;\n\n  while (to_see.size() != 8) {\n    to_see.insert(random_string(generator_, 10));\n  }\n\n  while (force_grow.size() != 8192) {\n    string str = random_string(generator_, 10);\n\n    if (to_see.count(str)) {\n      continue;\n    }\n\n    force_grow.insert(random_string(generator_, 10));\n  }\n\n  for (auto& str : to_see) {\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  auto scan_callback = [&](const sds ptr) {\n    sds s = (sds)ptr;\n    string_view str{s, sdslen(s)};\n    if (to_see.count(string(str))) {\n      seen.insert(string(str));\n    }\n  };\n\n  uint32_t cursor = ss_->Scan(0, scan_callback);\n\n  // force approx 10 grows\n  for (auto& s : force_grow) {\n    EXPECT_TRUE(ss_->Add(s));\n  }\n\n  while (cursor != 0) {\n    cursor = ss_->Scan(cursor, scan_callback);\n  }\n\n  EXPECT_EQ(seen.size(), to_see.size());\n}\n\nTEST_F(StringSetTest, Pop) {\n  constexpr size_t num_items = 8;\n  unordered_set<string> to_insert;\n\n  while (to_insert.size() != num_items) {\n    auto str = random_string(generator_, 10);\n    if (to_insert.count(str)) {\n      continue;\n    }\n\n    to_insert.insert(str);\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  while (!ss_->Empty()) {\n    size_t size = ss_->UpperBoundSize();\n    auto str = ss_->Pop();\n    DCHECK(ss_->UpperBoundSize() == to_insert.size() - 1);\n    DCHECK(str.has_value());\n    DCHECK(to_insert.count(str.value()));\n    DCHECK_EQ(ss_->UpperBoundSize(), size - 1);\n    to_insert.erase(str.value());\n  }\n\n  DCHECK(ss_->Empty());\n  DCHECK(to_insert.empty());\n}\n\nTEST_F(StringSetTest, Iteration) {\n  ss_->Add(\"foo\");\n  for (const sds ptr : *ss_) {\n    LOG(INFO) << ptr;\n  }\n  ss_->Clear();\n  constexpr size_t num_items = 8192;\n  unordered_set<string> to_insert;\n\n  while (to_insert.size() != num_items) {\n    auto str = random_string(generator_, 10);\n    if (to_insert.count(str)) {\n      continue;\n    }\n\n    to_insert.insert(str);\n    EXPECT_TRUE(ss_->Add(str));\n  }\n\n  for (const sds ptr : *ss_) {\n    string str{ptr, sdslen(ptr)};\n    EXPECT_TRUE(to_insert.count(str));\n    to_insert.erase(str);\n  }\n\n  EXPECT_EQ(to_insert.size(), 0);\n}\n\nTEST_F(StringSetTest, SetFieldExpireHasExpiry) {\n  EXPECT_TRUE(ss_->Add(\"k1\", 100));\n  auto k = ss_->Find(\"k1\");\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 100);\n  k.SetExpiryTime(1);\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 1);\n}\n\nTEST_F(StringSetTest, SetFieldExpireNoHasExpiry) {\n  EXPECT_TRUE(ss_->Add(\"k1\"));\n  auto k = ss_->Find(\"k1\");\n  EXPECT_FALSE(k.HasExpiry());\n  k.SetExpiryTime(10);\n  EXPECT_TRUE(k.HasExpiry());\n  EXPECT_EQ(k.ExpiryTime(), 10);\n}\n\nTEST_F(StringSetTest, Ttl) {\n  EXPECT_TRUE(ss_->Add(\"bla\"sv, 1));\n  EXPECT_FALSE(ss_->Add(\"bla\"sv, 1));\n  auto it = ss_->Find(\"bla\"sv);\n  EXPECT_EQ(1u, it.ExpiryTime());\n\n  ss_->set_time(1);\n  EXPECT_TRUE(ss_->Add(\"bla\"sv, 1));\n  EXPECT_EQ(1u, ss_->UpperBoundSize());\n\n  for (unsigned i = 0; i < 100; ++i) {\n    EXPECT_TRUE(ss_->Add(StrCat(\"foo\", i), 1));\n  }\n  EXPECT_EQ(101u, ss_->UpperBoundSize());\n  it = ss_->Find(\"foo50\");\n  EXPECT_STREQ(\"foo50\", *it);\n  EXPECT_EQ(2u, it.ExpiryTime());\n\n  ss_->set_time(2);\n  for (unsigned i = 0; i < 100; ++i) {\n    EXPECT_TRUE(ss_->Add(StrCat(\"bar\", i)));\n  }\n  it = ss_->Find(\"bar50\");\n  EXPECT_FALSE(it.HasExpiry());\n\n  for (auto it = ss_->begin(); it != ss_->end(); ++it) {\n    ASSERT_TRUE(absl::StartsWith(*it, \"bar\")) << *it;\n    string str = *it;\n    VLOG(1) << *it;\n  }\n}\n\nTEST_F(StringSetTest, Grow) {\n  for (size_t j = 0; j < 10; ++j) {\n    for (size_t i = 0; i < 4098; ++i) {\n      ss_->Reserve(generator_() % 256);\n      auto str = random_string(generator_, 3);\n      ss_->Add(str);\n    }\n    ss_->Clear();\n  }\n}\n\nTEST_F(StringSetTest, Reserve) {\n  vector<string> strs;\n\n  for (size_t i = 0; i < 10; ++i) {\n    strs.push_back(random_string(generator_, 10));\n    ss_->Add(strs.back());\n  }\n\n  for (size_t j = 2; j < 20; j += 3) {\n    ss_->Reserve(j * 20);\n    for (size_t i = 0; i < 10; ++i) {\n      ASSERT_TRUE(ss_->Contains(strs[i]));\n    }\n  }\n}\n\nTEST_F(StringSetTest, Fill) {\n  for (size_t i = 0; i < 100; ++i) {\n    ss_->Add(random_string(generator_, 10));\n  }\n  StringSet s2;\n  ss_->Fill(&s2);\n  EXPECT_EQ(s2.UpperBoundSize(), ss_->UpperBoundSize());\n  for (sds str : *ss_) {\n    EXPECT_TRUE(s2.Contains(str));\n  }\n}\n\nTEST_F(StringSetTest, ClearResetsObjMallocUsed) {\n  // Add some items\n  for (size_t i = 0; i < 100; ++i) {\n    ss_->Add(random_string(generator_, 10));\n  }\n\n  // Verify ObjMallocUsed() > 0 after adding items\n  EXPECT_GT(ss_->ObjMallocUsed(), 0u);\n  EXPECT_GT(ss_->UpperBoundSize(), 0u);\n\n  // Clear the set\n  ss_->Clear();\n\n  // Verify ObjMallocUsed() is reset to 0 after Clear\n  EXPECT_EQ(ss_->ObjMallocUsed(), 0u);\n  EXPECT_EQ(ss_->UpperBoundSize(), 0u);\n}\n\nTEST_F(StringSetTest, IterateEmpty) {\n  for (const auto& s : *ss_) {\n    // We're iterating to make sure there is no crash. However, if we got here, it's a bug\n    CHECK(false) << \"Found entry \" << s << \" in empty set\";\n  }\n}\n\nstatic size_t MemUsed(StringSet& obj) {\n  return obj.ObjMallocUsed() + obj.SetMallocUsed();\n}\n\nvoid BM_Clone(benchmark::State& state) {\n  vector<string> strs;\n  mt19937 generator(0);\n  StringSet ss1, ss2;\n  unsigned elems = state.range(0);\n  for (size_t i = 0; i < elems; ++i) {\n    string str = random_string(generator, 10);\n    ss1.Add(str);\n  }\n  ss2.Reserve(ss1.UpperBoundSize());\n  while (state.KeepRunning()) {\n    for (auto src : ss1) {\n      ss2.Add(src);\n    }\n    state.PauseTiming();\n    ss2.Clear();\n    ss2.Reserve(ss1.UpperBoundSize());\n    state.ResumeTiming();\n  }\n}\nBENCHMARK(BM_Clone)->ArgName(\"elements\")->Arg(32000);\n\nvoid BM_Fill(benchmark::State& state) {\n  unsigned elems = state.range(0);\n  vector<string> strs;\n  mt19937 generator(0);\n  StringSet ss1, ss2;\n  for (size_t i = 0; i < elems; ++i) {\n    string str = random_string(generator, 10);\n    ss1.Add(str);\n  }\n\n  while (state.KeepRunning()) {\n    ss1.Fill(&ss2);\n    state.PauseTiming();\n    ss2.Clear();\n    state.ResumeTiming();\n  }\n}\nBENCHMARK(BM_Fill)->ArgName(\"elements\")->Arg(32000);\n\nvoid BM_Clear(benchmark::State& state) {\n  unsigned elems = state.range(0);\n  mt19937 generator(0);\n  StringSet ss;\n  while (state.KeepRunning()) {\n    state.PauseTiming();\n    for (size_t i = 0; i < elems; ++i) {\n      string str = random_string(generator, 16);\n      ss.Add(str);\n    }\n    state.ResumeTiming();\n    ss.Clear();\n  }\n}\nBENCHMARK(BM_Clear)->ArgName(\"elements\")->Arg(32000);\n\nvoid BM_Add(benchmark::State& state) {\n  vector<string> strs;\n  mt19937 generator(0);\n  StringSet ss;\n  unsigned elems = state.range(0);\n  unsigned keySize = state.range(1);\n  for (size_t i = 0; i < elems; ++i) {\n    string str = random_string(generator, keySize);\n    strs.push_back(str);\n  }\n  ss.Reserve(elems);\n  size_t mem_used = 0;\n  while (state.KeepRunning()) {\n    for (auto& str : strs)\n      ss.Add(str);\n    state.PauseTiming();\n    mem_used += MemUsed(ss);\n    ss.Clear();\n    ss.Reserve(elems);\n    state.ResumeTiming();\n  }\n  state.counters[\"Memory_Used\"] = mem_used / state.iterations();\n}\nBENCHMARK(BM_Add)\n    ->ArgNames({\"elements\", \"Key Size\"})\n    ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});\n\nvoid BM_AddMany(benchmark::State& state) {\n  vector<string> strs;\n  mt19937 generator(0);\n  StringSet ss;\n  unsigned elems = state.range(0);\n  unsigned keySize = state.range(1);\n  for (size_t i = 0; i < elems; ++i) {\n    string str = random_string(generator, keySize);\n    strs.push_back(str);\n  }\n  ss.Reserve(elems);\n  vector<string_view> svs;\n  for (const auto& str : strs) {\n    svs.push_back(str);\n  }\n  size_t mem_used = 0;\n  while (state.KeepRunning()) {\n    ss.AddMany(absl::MakeSpan(svs), UINT32_MAX, false);\n    state.PauseTiming();\n    CHECK_EQ(ss.UpperBoundSize(), elems);\n    mem_used += MemUsed(ss);\n    ss.Clear();\n    ss.Reserve(elems);\n    state.ResumeTiming();\n  }\n  state.counters[\"Memory_Used\"] = mem_used / state.iterations();\n}\nBENCHMARK(BM_AddMany)\n    ->ArgNames({\"elements\", \"Key Size\"})\n    ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});\n\nvoid BM_Erase(benchmark::State& state) {\n  std::vector<std::string> strs;\n  mt19937 generator(0);\n  StringSet ss;\n  auto elems = state.range(0);\n  auto keySize = state.range(1);\n  for (long int i = 0; i < elems; ++i) {\n    std::string str = random_string(generator, keySize);\n    strs.push_back(str);\n    ss.Add(str);\n  }\n  state.counters[\"Memory_Before_Erase\"] = MemUsed(ss);\n  size_t mem_used = 0;\n  while (state.KeepRunning()) {\n    for (auto& str : strs) {\n      ss.Erase(str);\n    }\n    state.PauseTiming();\n    mem_used += MemUsed(ss);\n    for (auto& str : strs) {\n      ss.Add(str);\n    }\n    state.ResumeTiming();\n  }\n  state.counters[\"Memory_After_Erase\"] = mem_used / state.iterations();\n}\nBENCHMARK(BM_Erase)\n    ->ArgNames({\"elements\", \"Key Size\"})\n    ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});\n\nvoid BM_Get(benchmark::State& state) {\n  std::vector<std::string> strs;\n  mt19937 generator(0);\n  StringSet ss;\n  auto elems = state.range(0);\n  auto keySize = state.range(1);\n  for (long int i = 0; i < elems; ++i) {\n    std::string str = random_string(generator, keySize);\n    strs.push_back(str);\n    ss.Add(str);\n  }\n  while (state.KeepRunning()) {\n    for (auto& str : strs) {\n      ss.Find(str);\n    }\n  }\n}\nBENCHMARK(BM_Get)\n    ->ArgNames({\"elements\", \"Key Size\"})\n    ->ArgsProduct({{1000, 10000, 100000}, {10, 100, 1000}});\n\nvoid BM_Grow(benchmark::State& state) {\n  vector<string> strs;\n  mt19937 generator(0);\n  StringSet src;\n  unsigned elems = 1 << 18;\n  for (size_t i = 0; i < elems; ++i) {\n    src.Add(random_string(generator, 16), UINT32_MAX);\n    strs.push_back(random_string(generator, 16));\n  }\n\n  while (state.KeepRunning()) {\n    state.PauseTiming();\n    StringSet tmp;\n    src.Fill(&tmp);\n    CHECK_EQ(tmp.BucketCount(), elems);\n    state.ResumeTiming();\n    for (const auto& str : strs) {\n      tmp.Add(str);\n      if (tmp.BucketCount() > elems) {\n        break;  // we grew\n      }\n    }\n\n    CHECK_GT(tmp.BucketCount(), elems);\n  }\n}\nBENCHMARK(BM_Grow);\n\nvoid BM_Spop1000(benchmark::State& state) {\n  mt19937 generator(0);\n  StringSet src;\n  unsigned elems = 1 << 14;\n  for (size_t i = 0; i < elems; ++i) {\n    src.Add(random_string(generator, 16), UINT32_MAX);\n  }\n\n  auto sparseness = state.range(0);\n  while (state.KeepRunning()) {\n    state.PauseTiming();\n    StringSet tmp;\n    src.Fill(&tmp);\n    tmp.Reserve(elems * sparseness);\n    state.ResumeTiming();\n    for (int i = 0; i < 1000; ++i) {\n      tmp.Pop();\n    }\n  }\n}\nBENCHMARK(BM_Spop1000)->ArgName(\"sparseness\")->ArgsProduct({{1, 4, 10}});\n\nunsigned total_wasted_memory = 0;\n\nTEST_F(StringSetTest, ReallocIfNeeded) {\n  auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); };\n\n  auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,\n                        size_t block_size, void* arg) {\n    size_t used = block_size * area->used;\n    total_wasted_memory += area->committed - used;\n    return true;\n  };\n\n  for (size_t i = 0; i < 10'000; i++)\n    ss_->Add(build_str(i));\n\n  for (size_t i = 0; i < 10'000; i++) {\n    if (i % 10 == 0)\n      continue;\n    ss_->Erase(build_str(i));\n  }\n\n  mi_heap_collect(mi_heap_get_backing(), true);\n  mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);\n  size_t wasted_before = total_wasted_memory;\n\n  size_t underutilized = 0;\n  PageUsage page_usage{CollectPageStats::NO, 0.9};\n  for (auto it = ss_->begin(); it != ss_->end(); ++it) {\n    underutilized += page_usage.IsPageForObjectUnderUtilized(*it);\n    it.ReallocIfNeeded(&page_usage);\n  }\n  // Check there are underutilized pages\n  CHECK_GT(underutilized, 0u);\n\n  total_wasted_memory = 0;\n  mi_heap_collect(mi_heap_get_backing(), true);\n  mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);\n  size_t wasted_after = total_wasted_memory;\n\n  // Check we waste significanlty less now\n  EXPECT_GT(wasted_before, wasted_after * 2);\n\n  EXPECT_EQ(ss_->UpperBoundSize(), 1000);\n  for (size_t i = 0; i < 1000; i++)\n    EXPECT_EQ(*ss_->Find(build_str(i * 10)), build_str(i * 10));\n}\n\nTEST_F(StringSetTest, TransferTTLFlagLinkToObjectOnDelete) {\n  for (size_t i = 0; i < 10; i++) {\n    EXPECT_TRUE(ss_->Add(absl::StrCat(i), 1));\n  }\n  for (size_t i = 0; i < 9; i++) {\n    EXPECT_TRUE(ss_->Erase(absl::StrCat(i)));\n  }\n  auto it = ss_->Find(\"9\"sv);\n  EXPECT_TRUE(it.HasExpiry());\n  EXPECT_EQ(1u, it.ExpiryTime());\n}\n\nclass ShrinkTest : public StringSetTest, public ::testing::WithParamInterface<size_t> {};\n\nTEST_P(ShrinkTest, BasicShrink) {\n  constexpr size_t num_strs = 1000000;\n  size_t shrink_to = GetParam();\n\n  vector<string> strs;\n  for (size_t i = 0; i < num_strs; ++i) {\n    strs.push_back(random_string(generator_, 10));\n    EXPECT_TRUE(ss_->Add(strs.back()));\n  }\n\n  // Grow to a larger size\n  ss_->Reserve(1 << 22);\n  size_t original_bucket_count = ss_->BucketCount();\n  EXPECT_EQ(original_bucket_count, 1u << 22);\n\n  // Shrink to the parameterized size\n  ss_->Shrink(shrink_to);\n\n  EXPECT_EQ(ss_->BucketCount(), shrink_to);\n  EXPECT_EQ(ss_->UpperBoundSize(), num_strs);\n\n  // Verify all elements are still accessible\n  for (const auto& str : strs) {\n    EXPECT_TRUE(ss_->Contains(str)) << \"Missing: \" << str;\n  }\n}\n\nINSTANTIATE_TEST_SUITE_P(ShrinkSizes, ShrinkTest,\n                         ::testing::Values(1u << 21,   // 2M buckets (sparse)\n                                           1u << 20,   // 1M buckets (~1 per bucket)\n                                           1u << 19),  // 512K buckets (~2 per bucket)\n                         [](const auto& info) { return absl::StrCat(\"buckets_\", info.param); });\n\nTEST_F(StringSetTest, ShrinkWithTTL) {\n  constexpr size_t num_strs = 1000000;\n\n  // Track elements by their TTL category\n  vector<string> expired_strs;    // TTL 1-50, will expire\n  vector<string> surviving_strs;  // TTL 51-100, will survive\n  vector<string> no_ttl_strs;     // No TTL, will survive\n\n  for (size_t i = 0; i < num_strs; ++i) {\n    string str = random_string(generator_, 10);\n    if (i % 3 == 0) {\n      // No TTL\n      EXPECT_TRUE(ss_->Add(str));\n      no_ttl_strs.push_back(str);\n    } else if (i % 3 == 1) {\n      // TTL 1-50 (will expire when time=50)\n      uint32_t ttl = (i % 50) + 1;\n      EXPECT_TRUE(ss_->Add(str, ttl));\n      expired_strs.push_back(str);\n    } else {\n      // TTL 51-100 (will survive when time=50)\n      uint32_t ttl = (i % 50) + 51;\n      EXPECT_TRUE(ss_->Add(str, ttl));\n      surviving_strs.push_back(str);\n    }\n  }\n\n  // Grow to larger size\n  ss_->Reserve(1 << 22);\n\n  // Set time to 50 - this will expire elements with TTL <= 50\n  ss_->set_time(50);\n\n  // Shrink\n  ss_->Shrink(1 << 21);\n  EXPECT_EQ(ss_->BucketCount(), 1u << 21);\n\n  // Verify expired elements are gone\n  for (const auto& str : expired_strs) {\n    EXPECT_EQ(ss_->Find(str), ss_->end()) << \"Should be expired: \" << str;\n  }\n\n  // Verify surviving TTL elements are still accessible with correct TTL\n  for (const auto& str : surviving_strs) {\n    auto it = ss_->Find(str);\n    ASSERT_NE(it, ss_->end()) << \"Missing surviving TTL element: \" << str;\n    EXPECT_TRUE(it.HasExpiry());\n    EXPECT_GT(it.ExpiryTime(), 50u);\n  }\n\n  // Verify no-TTL elements are still accessible\n  for (const auto& str : no_ttl_strs) {\n    auto it = ss_->Find(str);\n    ASSERT_NE(it, ss_->end()) << \"Missing no-TTL element: \" << str;\n    EXPECT_FALSE(it.HasExpiry());\n  }\n}\n\nTEST_F(StringSetTest, ScanWithShrinkBetweenCalls) {\n  // Test that cursor-based scanning works correctly when Grow and Shrink happen between Scan calls\n  // This verifies SCAN guarantees: elements present at start and end of scan must be seen\n  constexpr size_t num_strs = 1000000;\n  vector<string> strs;\n  unordered_set<string> must_see;\n\n  // Add elements and track them\n  for (size_t i = 0; i < num_strs; ++i) {\n    strs.push_back(random_string(generator_, 10));\n    EXPECT_TRUE(ss_->Add(strs.back()));\n    must_see.insert(strs.back());\n  }\n\n  // Note initial bucket count (will be ~1M after adding 1M elements)\n  size_t initial_bucket_count = ss_->BucketCount();\n\n  unordered_set<string> seen;\n  auto scan_callback = [&](const sds ptr) {\n    string str{ptr, sdslen(ptr)};\n    seen.insert(str);\n  };\n\n  // Start scanning BEFORE Grow\n  uint32_t cursor = ss_->Scan(0, scan_callback);\n  EXPECT_NE(cursor, 0u) << \"Should not finish in one iteration\";\n\n  // Grow to large size in the middle of scanning\n  ss_->Reserve(1 << 22);\n  EXPECT_EQ(ss_->BucketCount(), 1u << 22);\n  EXPECT_GT(ss_->BucketCount(), initial_bucket_count);\n\n  // Continue scanning a bit after Grow\n  cursor = ss_->Scan(cursor, scan_callback);\n\n  // Now Shrink in the middle of scanning - this is the key test\n  // Elements that existed at scan start must still be visible\n  ss_->Shrink(1 << 21);\n  EXPECT_EQ(ss_->BucketCount(), 1u << 21);\n\n  // Continue scanning with the same cursor\n  constexpr int max_iterations = 1 << 22;\n  int iterations = 0;\n  while (cursor != 0 && iterations < max_iterations) {\n    cursor = ss_->Scan(cursor, scan_callback);\n    iterations++;\n  }\n  EXPECT_LT(iterations, max_iterations) << \"Hit iteration limit\";\n  EXPECT_EQ(cursor, 0u) << \"Scan should complete\";\n\n  // Verify all original elements were seen\n  for (const auto& str : must_see) {\n    EXPECT_TRUE(seen.count(str)) << \"Missing element after shrink: \" << str;\n  }\n  EXPECT_EQ(seen.size(), must_see.size()) << \"Should see exactly all original elements\";\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/task_queue.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/task_queue.h\"\n\n#include <absl/strings/str_cat.h>\n\n#include \"base/logging.h\"\n\nusing namespace std;\nusing namespace util::fb2;\n\nnamespace dfly {\n\n__thread unsigned TaskQueue::blocked_submitters_ = 0;\n\nTaskQueue::TaskQueue(unsigned queue_size, unsigned start_size, unsigned pool_max_size)\n    : queue_(queue_size), consumer_fibers_(start_size) {\n  CHECK_GT(start_size, 0u);\n  CHECK_LE(start_size, pool_max_size);\n}\n\nvoid TaskQueue::Start(std::string_view base_name) {\n  for (size_t i = 0; i < consumer_fibers_.size(); ++i) {\n    auto& fb = consumer_fibers_[i];\n    CHECK(!fb.IsJoinable());\n\n    string name = absl::StrCat(base_name, \"/\", i);\n    fb =\n        Fiber(Fiber::Opts{.priority = FiberPriority::HIGH, .name = name}, [this] { queue_.Run(); });\n  }\n}\n\nvoid TaskQueue::Shutdown() {\n  queue_.Shutdown();\n  for (auto& fb : consumer_fibers_)\n    fb.JoinIfNeeded();\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/task_queue.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"util/fibers/fiberqueue_threadpool.h\"\n#include \"util/fibers/fibers.h\"\n\nnamespace dfly {\n\n/**\n *  MPSC task-queue that is handled by a single consumer thread.\n *  The queue is just a wrapper around FiberQueue that manages its fiber itself.\n */\nclass TaskQueue {\n public:\n  // TODO: to add a mechanism to moderate pool size. Currently it's static with pool_start_size.\n  TaskQueue(unsigned queue_size, unsigned pool_start_size, unsigned pool_max_size);\n\n  template <typename F> bool TryAdd(F&& f) {\n    return queue_.TryAdd(std::forward<F>(f));\n  }\n\n  // Returns true if task queue was blocked when adding the task.\n  template <typename F> bool Add(F&& f) {\n    if (queue_.TryAdd(std::forward<F>(f)))\n      return false;\n\n    ++blocked_submitters_;\n    auto res = queue_.Add(std::forward<F>(f));\n    --blocked_submitters_;\n    return res;\n  }\n\n  template <typename F> auto Await(F&& f) -> decltype(f()) {\n    util::fb2::Done done;\n    using ResultType = decltype(f());\n    util::detail::ResultMover<ResultType> mover;\n\n    ++blocked_submitters_;\n    Add([&mover, f = std::forward<F>(f), done]() mutable {\n      mover.Apply(f);\n      done.Notify();\n    });\n    --blocked_submitters_;\n    done.Wait();\n    return std::move(mover).get();\n  }\n\n  /**\n   * @brief Start running consumer loop in the caller thread by spawning fibers.\n   *        Returns immediately.\n   */\n  void Start(std::string_view base_name);\n\n  /**\n   * @brief Notifies Run() function to empty the queue and to exit and waits for the consumer\n   *        fiber to finish.\n   */\n  void Shutdown();\n\n  static unsigned blocked_submitters() {\n    return blocked_submitters_;\n  }\n\n private:\n  util::fb2::FiberQueue queue_;\n  std::vector<util::fb2::Fiber> consumer_fibers_;\n\n  static __thread unsigned blocked_submitters_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/tiering_types.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/tiering_types.h\"\n\n#include \"redis/redis_aux.h\"\n\nnamespace dfly::tiering {\n\nauto FragmentRef::GetDescr(const CompactValue* pv) -> SerializationDescr {\n  switch (pv->ObjType()) {\n    case OBJ_STRING: {\n      if (!pv->HasAllocated())\n        return {};\n      auto strs = pv->GetRawString();\n      return {strs, CompactObj::ExternalRep::STRING};\n    }\n    case OBJ_HASH: {\n      if (pv->Encoding() == kEncodingListPack) {\n        return {static_cast<uint8_t*>(pv->RObjPtr()), CompactObj::ExternalRep::SERIALIZED_MAP};\n      }\n      return {};\n    }\n    default:\n      return {};\n  };\n}\n\nTieredCoolRecord* FragmentRef::GetCoolRecord() const {\n  return std::visit(\n      [](auto* pv) -> TieredCoolRecord* {\n        return pv->IsExternal() && pv->IsCool() ? pv->GetCool().record : nullptr;\n      },\n      val_);\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/core/tiering_types.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <boost/intrusive/list_hook.hpp>\n\n#include \"core/compact_object.h\"\n\nnamespace dfly::tiering {\n\n// TieredCoolRecord is part of the cooling cache. It allows offloading values to disk\n// while still keeping some of them in-memory to avoid disk reads in case they are requested again\n// soon after offloading. When a value is moved to the cold storage, TieredCoolRecord and only\n// the external reference is kept. When the value is warmed up, the record is removed from the cool\n// storage and the value is read back to memory.\nstruct TieredCoolRecord : public ::boost::intrusive::list_base_hook<\n                              boost::intrusive::link_mode<boost::intrusive::normal_link>> {\n  uint64_t key_hash;  // Allows searching the entry in the dbslice.\n  CompactValue value;\n  uint16_t db_index;\n  uint32_t page_index;\n};\nstatic_assert(sizeof(TieredCoolRecord) == 48);\n\nclass FragmentRef {\n public:\n  // Describes how this fragment should be serialized for offloading.\n  // Used by stashing flow.\n  struct SerializationDescr {\n    std::variant<std::array<std::string_view, 2>, uint8_t*> blob;\n    CompactObj::ExternalRep rep = CompactObj::ExternalRep::STRING;\n  };\n\n  FragmentRef(CompactValue& pv) : val_(&pv) {  // NOLINT\n  }\n\n  FragmentRef(CompactValue* pv) : val_(pv) {  // NOLINT\n  }\n\n  bool IsOffloaded() const {\n    return std::visit([](auto* pv) { return pv->IsExternal(); }, val_);\n  }\n\n  // Resets offloaded state for this fragment.\n  void ClearOffloaded() {\n    std::visit([](auto* pv) { pv->RemoveExternal(); }, val_);\n  }\n\n  bool HasStashPending() const {\n    return std::visit([](auto* pv) { return pv->HasStashPending(); }, val_);\n  }\n\n  void ClearStashPending() {\n    std::visit([](auto* pv) { pv->SetStashPending(false); }, val_);\n  }\n\n  CompactObjType ObjType() const {\n    return std::visit([](auto* pv) { return pv->ObjType(); }, val_);\n  }\n\n  // Determine required byte size and encoding type based on value.\n  SerializationDescr GetSerializationDescr() const {\n    return std::visit([](auto* pv) { return GetDescr(pv); }, val_);\n  }\n\n  // Returns a pointer to TieredCoolRecord if this fragment is cool, and null otherwise.\n  TieredCoolRecord* GetCoolRecord() const;\n\n  // Returns the external slice of the offloaded value. Only valid if IsOffloaded() is true.\n  std::pair<size_t, size_t> GetExternalSlice() const {\n    return std::visit([](auto* pv) { return pv->GetExternalSlice(); }, val_);\n  }\n\n private:\n  static SerializationDescr GetDescr(const CompactValue* pv);\n\n  // TODO: to support more types, for example Node* from qlist.h.\n  std::variant<CompactValue*> val_;\n};\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/core/top_keys.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/top_keys.h\"\n\n#include <xxhash.h>\n\n#include \"absl/numeric/bits.h\"\n#include \"absl/random/distributions.h\"\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nTopKeys::TopKeys(Options options)\n    : options_(options), fingerprints_(options_.buckets * options_.depth) {\n  if (options_.min_key_count_to_record < 2) {\n    options_.min_key_count_to_record = 2;\n  }\n}\n\nvoid TopKeys::Touch(std::string_view key) {\n  auto ResetCell = [&](Cell& cell, uint64_t fingerprint) {\n    cell.fingerprint = fingerprint;\n    cell.count = 1;\n    cell.key.clear();\n  };\n\n  uint64_t fingerprint = XXH3_64bits(key.data(), key.size());\n  constexpr uint64_t kPrime = 0xff51afd7ed558ccd;\n  for (uint64_t id = 0; id < options_.depth; ++id) {\n    const unsigned bucket = fingerprint % options_.buckets;\n    fingerprint *= kPrime;\n    Cell& cell = GetCell(id, bucket);\n    if (cell.count == 0) {\n      // No fingerprint in cell.\n      ResetCell(cell, fingerprint);\n    } else if (cell.fingerprint == fingerprint) {\n      // Same fingerprint, simply increment count.\n\n      // We could make sure that, if !cell.key.empty(), then key == cell.key.empty() here. However,\n      // what do we do in case they are different?\n      ++cell.count;\n\n      if (cell.count >= options_.min_key_count_to_record && cell.key.empty()) {\n        cell.key = key;\n      }\n    } else {\n      // Different fingerprint, apply exponential decay.\n      const double rand = absl::Uniform(bitgen_, 0, 1.0);\n      if (rand < std::pow(options_.decay_base, -static_cast<double>(cell.count))) {\n        --cell.count;\n        if (cell.count == 0) {\n          ResetCell(cell, fingerprint);\n        }\n      }\n    }\n  }\n}\n\nabsl::flat_hash_map<std::string, uint64_t> TopKeys::GetTopKeys() const {\n  absl::flat_hash_map<std::string, uint64_t> results;\n  for (unsigned array = 0; array < options_.depth; ++array) {\n    for (unsigned bucket = 0; bucket < options_.buckets; ++bucket) {\n      const Cell& cell = GetCell(array, bucket);\n      if (!cell.key.empty()) {\n        auto [it, added] = results.emplace(cell.key, cell.count);\n        if (!added && it->second < cell.count) {\n          it->second = cell.count;\n        }\n      }\n    }\n  }\n  return results;\n}\n\nTopKeys::Cell& TopKeys::GetCell(uint32_t d, uint32_t bucket) {\n  DCHECK(d < options_.depth);\n  DCHECK(bucket < options_.buckets);\n  return fingerprints_[d * options_.buckets + bucket];\n}\n\nconst TopKeys::Cell& TopKeys::GetCell(uint32_t d, uint32_t bucket) const {\n  DCHECK(d < options_.depth);\n  DCHECK(bucket < options_.buckets);\n  return fingerprints_[d * options_.buckets + bucket];\n}\n\n}  // end of namespace dfly\n"
  },
  {
    "path": "src/core/top_keys.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <string>\n#include <string_view>\n#include <vector>\n\n#include \"base/random.h\"\n\nnamespace dfly {\n\n// INTERNAL USE ONLY: This class is an optimized, O(1) probabilistic hot-key tracker designed\n// specifically to run on the database's hot path (e.g., tracking hot keys using DEBUG TOPK).\n// It cannot and should not be used for user-facing Redis TOPK commands. It intentionally\n// omits a Min-Heap (preventing instant eviction reporting), does not support arbitrary\n// increments, and does not use PMR allocators (which are required for strict memory\n// tracking and RDB serialization of user data).\n//\n// For the public Redis TOPK module API, use the `TOPK` class defined in `core/topk.h`.\n//\n// TopKeys is a utility class that helps determine the most frequently used keys.\n// Based on: HeavyKeeper paper,  https://www.usenix.org/conference/atc18/presentation/gong\n//\n// Usage:\n// - Instantiate this class with proper options (see below)\n// - For every used key k, call Touch(k)\n// - At some point(s) in time, call GetTopKeys() to get an estimated list of top keys along with\n//   their approximate count (i.e. how many times Touch() was invoked for them).\n//\n// Notes:\n// - This class implements a slightly modified version of HeavyKeeper, a data structure designed\n//   for a similar problem domain. The modification made is to store the keys directly within the\n//   tables, when they meet a certain threshold, instead of using a min-heap.\n// - This class is statistical in nature. Do *not* expect accurate counts.\n// - When misconfigured, real top keys may be missing from GetTopKeys(). This can occur when there\n//   are too few buckets, or when min_key_count_to_record is too high, depending on actual usage.\nclass TopKeys {\n  TopKeys(const TopKeys&) = delete;\n  TopKeys& operator=(const TopKeys&) = delete;\n\n public:\n  struct Options {\n    // HeavyKeeper options\n    uint32_t buckets = 1 << 16;\n    uint32_t depth = 4;\n\n    // What is the minimum times Touch() has to be called for a given key in order for the key to be\n    // saved. Use lower values when load is low, or higher values when load is high. The cost of a\n    // low value for high load is frequent string copying and memory allocation.\n    // Min value: 2\n    uint32_t min_key_count_to_record = 50;\n\n    double decay_base = 1.08;\n  };\n\n  explicit TopKeys(Options options);\n\n  void Touch(std::string_view key);\n  absl::flat_hash_map<std::string, uint64_t> GetTopKeys() const;\n\n private:\n  // Each cell consists of a key-fingerprint, a count, and potentially the key itself, when it's\n  // above options_.min_key_count_to_record.\n  struct Cell {\n    uint64_t fingerprint = 0;\n    uint64_t count = 0;\n    std::string key;\n  };\n  Cell& GetCell(uint32_t d, uint32_t bucket);\n  const Cell& GetCell(uint32_t d, uint32_t bucket) const;\n\n  Options options_;\n  base::Xoroshiro128p bitgen_;\n\n  // fingerprints_'s size is options_.buckets * options_.arrays. Always access fields via GetCell().\n  std::vector<Cell> fingerprints_;\n};\n\n}  // end of namespace dfly\n"
  },
  {
    "path": "src/core/top_keys_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/top_keys.h\"\n\n#include <absl/strings/str_cat.h>\n#include <gmock/gmock.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nusing ::testing::Pair;\nusing ::testing::UnorderedElementsAre;\n\nnamespace dfly {\n\nTEST(TopKeysTest, Basic) {\n  TopKeys top_keys({.min_key_count_to_record = 2});\n  top_keys.Touch(\"key1\");\n  top_keys.Touch(\"key1\");\n  top_keys.Touch(\"key2\");\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key1\", 2)));\n}\n\nTEST(TopKeysTest, MultiTouch) {\n  TopKeys top_keys({.min_key_count_to_record = 2});\n  top_keys.Touch(\"key1\");\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre());\n  top_keys.Touch(\"key1\");\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key1\", 2)));\n  top_keys.Touch(\"key1\");\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key1\", 3)));\n}\n\nTEST(TopKeysTest, MinKeyCountToRecord) {\n  TopKeys top_keys({.min_key_count_to_record = 3});\n  top_keys.Touch(\"key1\");\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre());\n  top_keys.Touch(\"key1\");\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre());\n  top_keys.Touch(\"key1\");\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key1\", 3)));\n  top_keys.Touch(\"key1\");\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key1\", 4)));\n  top_keys.Touch(\"key1\");\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key1\", 5)));\n}\n\nTEST(TopKeysTest, MultiKeys) {\n  TopKeys top_keys({.min_key_count_to_record = 2});\n  for (int i = 0; i < 2; ++i) {\n    top_keys.Touch(\"key1\");\n    top_keys.Touch(\"key2\");\n  }\n  top_keys.Touch(\"key3\");\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key1\", 2), Pair(\"key2\", 2)));\n}\n\nTEST(TopKeysTest, BucketCollision) {\n  TopKeys top_keys({.buckets = 1, .min_key_count_to_record = 1});\n  for (int i = 0; i < 5; ++i) {\n    top_keys.Touch(\"key1\");\n  }\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key1\", 5)));\n\n  for (int i = 0; i < 100; ++i) {\n    top_keys.Touch(\"key2\");\n  }\n\n  auto top_keys_table = top_keys.GetTopKeys();\n  EXPECT_EQ(top_keys_table.size(), 1);\n  EXPECT_LE(top_keys_table[\"key2\"], 100);\n  EXPECT_GE(top_keys_table[\"key2\"], 50);\n\n  // Touching \"key1\" should *not* replace \"key2\".\n  top_keys.Touch(\"key1\");\n  EXPECT_FALSE(top_keys.GetTopKeys().contains(\"key1\"));\n}\n\nTEST(TopKeysTest, BucketCollisionAggressiveDecay) {\n  TopKeys top_keys({.buckets = 1, .min_key_count_to_record = 2, .decay_base = 1.0});\n  for (int i = 0; i < 5; ++i) {\n    top_keys.Touch(\"key1\");\n  }\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key1\", 5)));\n\n  for (int i = 0; i < 100; ++i) {\n    top_keys.Touch(\"key2\");\n  }\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key2\", 96)));\n}\n\nTEST(TopKeysTest, BucketCollisionHesitantDecay) {\n  TopKeys top_keys({.buckets = 1, .min_key_count_to_record = 2, .decay_base = 1000.0});\n  for (int i = 0; i < 5; ++i) {\n    top_keys.Touch(\"key1\");\n  }\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key1\", 5)));\n\n  for (int i = 0; i < 100; ++i) {\n    top_keys.Touch(\"key2\");\n  }\n  // \"key2\" will never replace \"key1\", as the decay practically never happens (1000^-5)\n  EXPECT_THAT(top_keys.GetTopKeys(), UnorderedElementsAre(Pair(\"key1\", 5)));\n}\n\nTEST(TopKeysTest, SavedByMultipleArrays) {\n  // This test is not trivial. It tests that having multiple arrays inside TopKeys saves keys in\n  // case of collision. The way it does it is by inserting an arbitrary key (= \"key\"), and then (at\n  // runtime) finding another key which *does* collide with that key.\n  //\n  // Once we've found such a key, we create another TopKeys instance, but this time with 10 arrays\n  // which should mean that for some hash value, the keys won't be present in the same bucket.\n\n  std::string collision_key;\n\n  TopKeys::Options options(\n      {.buckets = 2, .depth = 1, .min_key_count_to_record = 2, .decay_base = 1});\n  {\n    TopKeys top_keys(options);\n\n    // Insert some key\n    top_keys.Touch(\"key\");\n    top_keys.Touch(\"key\");\n\n    // Find a key with a collision\n    int i = 0;\n    while (true) {\n      collision_key = absl::StrCat(\"key\", i);\n      top_keys.Touch(collision_key);\n      if (!top_keys.GetTopKeys().contains(collision_key)) {\n        break;\n      }\n      ++i;\n    }\n  }\n\n  options.depth = 10;\n  {\n    TopKeys top_keys(options);\n\n    // Insert some key\n    top_keys.Touch(\"key\");\n    top_keys.Touch(\"key\");\n\n    // Insert collision key, expect result to be present\n    top_keys.Touch(collision_key);\n    top_keys.Touch(collision_key);\n    EXPECT_THAT(top_keys.GetTopKeys(),\n                UnorderedElementsAre(Pair(\"key\", 2), Pair(collision_key, 2)));\n  }\n}\n\n}  // end of namespace dfly\n"
  },
  {
    "path": "src/core/topk.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/topk.h\"\n\n#include <xxhash.h>\n\n#include <algorithm>\n#include <cmath>\n#include <limits>\n#include <utility>\n\n#include \"absl/random/distributions.h\"\n#include \"base/logging.h\"\n#include \"base/random.h\"\n\nnamespace dfly {\n\nnamespace {\n\nconst std::array<double, TOPK::kDecayLookupSize>& GetDefaultDecayTable() {\n  static const auto table = [] {\n    std::array<double, TOPK::kDecayLookupSize> t{};\n    for (size_t i = 0; i < TOPK::kDecayLookupSize; ++i) {\n      t[i] = std::pow(TOPK::kDefaultDecay, static_cast<double>(i));\n    }\n    return t;\n  }();\n  return table;\n}\n\n}  // namespace\n\nTOPK::TOPK(PMR_NS::memory_resource* mr, uint32_t k, uint32_t width, uint32_t depth, double decay)\n    : k_(k),\n      width_(width),\n      depth_(depth),\n      decay_(decay),\n      counters_(static_cast<size_t>(width) * depth, 0, PMR_NS::polymorphic_allocator<uint32_t>(mr)),\n      min_heap_(PMR_NS::polymorphic_allocator<HeapItem>(mr)) {\n  DCHECK(mr != nullptr);\n  DCHECK_GT(k_, 0u);\n  DCHECK_GT(width_, 0u);\n  DCHECK_GT(depth_, 0u);\n  DCHECK_GE(decay_, 0.0);\n  DCHECK_LE(decay_, 1.0);\n  min_heap_.reserve(k_);\n\n  if (std::abs(decay_ - TOPK::kDefaultDecay) < TOPK::kDecayEpsilon) {\n    // default decay value: use shared static table to save memory and initialization time\n    decay_lookup_ = &GetDefaultDecayTable();\n  } else {\n    // custom decay value: build a dedicated table for this instance\n    custom_decay_table_ = std::make_unique<std::array<double, TOPK::kDecayLookupSize>>();\n    for (size_t i = 0; i < TOPK::kDecayLookupSize; ++i) {\n      (*custom_decay_table_)[i] = std::pow(decay_, static_cast<double>(i));\n    }\n    decay_lookup_ = custom_decay_table_.get();\n  }\n}\n\nTOPK::TOPK(TOPK&& other) noexcept\n    : k_(std::exchange(other.k_, 0)),\n      width_(std::exchange(other.width_, 0)),\n      depth_(std::exchange(other.depth_, 0)),\n      decay_(std::exchange(other.decay_, 0.0)),\n      decay_lookup_(std::exchange(other.decay_lookup_, nullptr)),\n      custom_decay_table_(std::move(other.custom_decay_table_)),\n      counters_(std::move(other.counters_)),\n      min_heap_(std::move(other.min_heap_)) {\n}\n\nTOPK& TOPK::operator=(TOPK&& other) noexcept {\n  if (this != &other) {\n    k_ = std::exchange(other.k_, 0);\n    width_ = std::exchange(other.width_, 0);\n    depth_ = std::exchange(other.depth_, 0);\n    decay_ = std::exchange(other.decay_, 0.0);\n    decay_lookup_ = std::exchange(other.decay_lookup_, nullptr);\n    custom_decay_table_ = std::move(other.custom_decay_table_);\n    counters_ = std::move(other.counters_);\n    min_heap_ = std::move(other.min_heap_);\n  }\n  return *this;\n}\n\nuint64_t TOPK::Hash(std::string_view item, uint32_t row) const {\n  auto full_hash = XXH3_64bits_withSeed(item.data(), item.size(), row);\n\n  // Lemire's Fast Range Reduction avoids the expensive CPU integer division penalty of the modulo\n  // (%) operator. The main principle: multiplication is much faster than division, so we multiply\n  // a 32-bit slice of the hash by the width, and then shift right by 32 bits to get the bucket\n  // index. See: https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/\n  uint32_t hash32 = static_cast<uint32_t>(full_hash);\n\n  uint64_t bucket = (static_cast<uint64_t>(hash32) * width_) >> 32;\n  DCHECK_LT(bucket, width_);\n  return bucket;\n}\n\ndouble TOPK::ComputeDecayProbability(uint32_t count) const {\n  DCHECK(decay_lookup_);\n  DCHECK_GT(count, 0u);\n  const auto& table = *decay_lookup_;\n  if (count < kDecayLookupSize) {\n    return table[count];\n  }\n\n  // If the probability is already less than kDecayEpsilon, the chance of decay is\n  // statistically zero (see ShouldDecay). Skip the expensive std::pow extrapolation entirely.\n  if (table[TOPK::kDecayLookupSize - 1] < TOPK::kDecayEpsilon) {\n    return 0.0;\n  }\n\n  // Extrapolate probabilities for counts that exceed our lookup table's max index.\n  // Let M = the maximum table index (kDecayLookupSize - 1)\n  // Let Q = the quotient (count / M)\n  // Let R = the remainder (count % M)\n  //\n  // Using the Laws of Exponents, we break down decay^count:\n  // decay^count = decay^((Q * M) + R) = (decay^M)^Q * decay^R\n  //\n  // This translates directly to reusing our cached table:\n  // std::pow(table[M], Q) * table[R]\n  uint32_t quotient = count / (TOPK::kDecayLookupSize - 1);\n  uint32_t remainder = count % (TOPK::kDecayLookupSize - 1);\n  double base = table[TOPK::kDecayLookupSize - 1];\n  return std::pow(base, static_cast<double>(quotient)) * table[remainder];\n}\n\nbool TOPK::ShouldDecay(uint32_t current_count) const {\n  if (current_count == 0)\n    return false;\n\n  // Exponential decay probability: decay^count\n  thread_local base::Xoroshiro128p bitgen;\n  double prob = ComputeDecayProbability(current_count);\n  return absl::Uniform(bitgen, 0.0, 1.0) < prob;\n}\n\nvoid TOPK::HeapifyUp(size_t index) {\n  DCHECK_LT(index, min_heap_.size());\n  // Restores the min-heap property by shifting the element at 'index' upward.\n  // Triggered in two cases:\n  // 1. Initial insertion: A new item is appended to the array and needs to bubble up.\n  // 2. Count decrease: An existing item's count drops (becomes smaller), floating higher.\n  while (index > 0) {\n    size_t parent = (index - 1) / 2;\n    if (min_heap_[parent].count <= min_heap_[index].count) {\n      break;  // Heap property satisfied\n    }\n\n    // Swap with parent\n    std::swap(min_heap_[parent], min_heap_[index]);\n    index = parent;\n  }\n}\n\nvoid TOPK::HeapifyDown(size_t index) {\n  DCHECK_LT(index, min_heap_.size());\n  // Restores the min-heap property by shifting the element at 'index' downward.\n  // Triggered in two cases:\n  // 1. Root replacement/removal: The minimum item is evicted/replaced and the new root must sink.\n  // 2. Count increase: An existing item's count grows (becomes heavier), sinking lower.\n  size_t size = min_heap_.size();\n\n  while (true) {\n    size_t left = (2 * index) + 1;\n    size_t right = (2 * index) + 2;\n    size_t smallest = index;\n\n    if ((left < size) && (min_heap_[left].count) < (min_heap_[smallest].count)) {\n      smallest = left;\n    }\n    if ((right < size) && (min_heap_[right].count) < (min_heap_[smallest].count)) {\n      smallest = right;\n    }\n\n    if (smallest == index) {\n      break;  // Heap property satisfied\n    }\n\n    // Swap with smallest child\n    std::swap(min_heap_[smallest], min_heap_[index]);\n    index = smallest;\n  }\n}\n\nsize_t TOPK::GetCounterIndex(std::string_view item, uint32_t row) const {\n  DCHECK_LT(row, depth_);\n  // Note:\n  // - bucket is mathematically guaranteed to be in the range [0, width_ - 1]\n  // - The max possible idx is depth * width - 1, which is within the bounds of our counters_\n  // vector\n  uint64_t bucket = Hash(item, row);\n  size_t idx = static_cast<size_t>(row) * width_ + bucket;\n  DCHECK_LT(idx, counters_.size());\n  return idx;\n}\n\nuint32_t TOPK::Count(std::string_view item) const {\n  uint32_t min_count = std::numeric_limits<uint32_t>::max();\n\n  for (uint32_t row = 0; row < depth_; ++row) {\n    size_t idx = GetCounterIndex(item, row);\n    min_count = std::min(min_count, counters_[idx]);\n  }\n\n  return min_count;\n}\n\nstd::optional<std::string> TOPK::IncrementInternal(std::string_view item, uint32_t increment) {\n  uint32_t min_count = std::numeric_limits<uint32_t>::max();\n\n  // Update counters using HeavyKeeper logic\n  for (uint32_t row = 0; row < depth_; ++row) {\n    size_t idx = GetCounterIndex(item, row);\n\n    // HeavyKeeper: decay and increment are mutually exclusive.\n    // - With probability decay^count, the counter is decremented (colliding items suppress each\n    // other).\n    // - Otherwise, the counter is incremented for the item being added.\n    if ((counters_[idx] > 0) && ShouldDecay(counters_[idx])) {\n      --counters_[idx];\n    } else {\n      counters_[idx] = static_cast<uint32_t>(\n          std::min(static_cast<uint64_t>(counters_[idx]) + increment,\n                   static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())));\n    }\n\n    // Count-Min Sketch property: The minimum counter across all rows is the\n    // most accurate, as it has suffered the fewest hash collisions.\n    min_count = std::min(min_count, counters_[idx]);\n  }\n\n  return UpdateHeap(item, min_count);\n}\n\nstd::optional<std::string> TOPK::Add(std::string_view item) {\n  return IncrementInternal(item, 1);\n}\n\nstd::optional<std::string> TOPK::IncrBy(std::string_view item, uint32_t increment) {\n  if (increment < 1) {\n    return std::nullopt;\n  }\n  return IncrementInternal(item, increment);\n}\n\nstd::vector<TOPK::TopKItem> TOPK::List() const {\n  std::vector<TopKItem> result;\n  result.reserve(min_heap_.size());\n\n  for (const auto& heap_item : min_heap_) {\n    result.push_back({heap_item.key, heap_item.count});\n  }\n\n  // Sort by count (descending) for output\n  std::sort(result.begin(), result.end(),\n            [](const TopKItem& a, const TopKItem& b) { return a.count > b.count; });\n\n  return result;\n}\n\nstd::optional<std::string> TOPK::UpdateHeap(std::string_view item, uint32_t new_count) {\n  // Fast path: O(K) linear scan.\n  // For small K, this avoids hash map overhead. Short keys benefit from SSO\n  // (Small String Optimization), keeping memory contiguous and cache-friendly.\n  // TODO: Benchmark to find the crossover point where larger K OR long strings (SSO not applicable)\n  // justify re-introducing a hash map.\n  for (size_t i = 0; i < min_heap_.size(); ++i) {\n    if (min_heap_[i].key == item) {\n      uint32_t old_count = min_heap_[i].count;\n      min_heap_[i].count = new_count;\n      if (new_count > old_count) {\n        HeapifyDown(i);\n      } else if (new_count < old_count) {\n        HeapifyUp(i);\n      }\n      return std::nullopt;\n    }\n  }\n\n  // Fast reject: item doesn't qualify for the heap. Just exit without any memory allocations or\n  // modifications.\n  if ((min_heap_.size() >= k_) && (new_count <= min_heap_.front().count)) {\n    return std::nullopt;\n  }\n  DCHECK_LE(min_heap_.size(), k_);\n\n  // Slow path: item will enter the heap. Now allocate.\n  std::string item_str(item);\n\n  if (min_heap_.size() < k_) {\n    // Heap not full, add the item, no eviction needed\n    size_t new_idx = min_heap_.size();\n    min_heap_.push_back({std::move(item_str), new_count});\n    HeapifyUp(new_idx);\n    return std::nullopt;\n  }\n\n  // Heap is full, evict minimum and add new item\n  DCHECK_EQ(min_heap_.size(), k_);\n  std::string old_key = std::move(min_heap_[0].key);\n  min_heap_[0] = {std::move(item_str), new_count};\n  HeapifyDown(0);\n  return old_key;\n}\n\nsize_t TOPK::MallocUsed() const {\n  size_t size = 0;\n\n  // Custom decay table (only for non-default decay values)\n  if (custom_decay_table_) {\n    size += sizeof(std::array<double, kDecayLookupSize>);\n  }\n\n  // Counter array\n  size += counters_.capacity() * sizeof(uint32_t);\n\n  // Heap items - calculate actual string sizes\n  size += min_heap_.capacity() * sizeof(HeapItem);\n  for (const auto& item : min_heap_) {\n    size += item.key.capacity();\n  }\n\n  return size;\n}\n\nTOPK::SerializedData TOPK::Serialize() const {\n  SerializedData data;\n  data.k = k_;\n  data.width = width_;\n  data.depth = depth_;\n  data.decay = decay_;\n\n  // Serialize heap items\n  data.heap_items.reserve(min_heap_.size());\n  for (const auto& heap_item : min_heap_) {\n    data.heap_items.push_back({heap_item.key, heap_item.count});\n  }\n\n  // Serialize counter array\n  data.counters.assign(counters_.begin(), counters_.end());\n\n  return data;\n}\n\nvoid TOPK::Deserialize(const SerializedData& data) {\n  DCHECK_EQ(data.counters.size(), static_cast<size_t>(width_) * depth_);\n  DCHECK_LE(data.heap_items.size(), k_);\n  DCHECK_EQ(data.k, k_);\n  DCHECK_EQ(data.width, width_);\n  DCHECK_EQ(data.depth, depth_);\n  DCHECK_EQ(data.decay, decay_);\n\n  // Clear existing data\n  min_heap_.clear();\n\n  // Restore counters\n  counters_.assign(data.counters.begin(), data.counters.end());\n\n  // Restore heap\n  min_heap_.reserve(data.heap_items.size());\n  for (const auto& item : data.heap_items) {\n    min_heap_.push_back({item.item, item.count});\n  }\n\n  // Rebuild heap property\n  std::make_heap(min_heap_.begin(), min_heap_.end(), std::greater<HeapItem>());\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/topk.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <array>\n#include <cstdint>\n#include <memory>\n#include <optional>\n#include <queue>\n#include <string>\n#include <string_view>\n#include <vector>\n\n#include \"base/pmr/memory_resource.h\"\n\nnamespace dfly {\n\nclass TOPKTest;\n\n//\n// TOPK: User-Facing API Data Structure\n//\n// This class implements the data structure required to support the public Redis\n// TOPK module API (e.g., TOPK.RESERVE, TOPK.ADD, TOPK.INCRBY).\n//\n// WHY WE HAVE TWO TOP-K IMPLEMENTATIONS:\n// Dragonfly maintains two separate Top-K tracking structures to protect the\n// performance of the database's hot path:\n// 1. `TopKeys` (src/core/top_keys.h): An internal-only, hyper-optimized O(1)\n//    tracker that runs on every single database command to detect hot keys.\n//    It intentionally lacks a min-heap and uses standard memory allocation to\n//    maximize raw speed and minimize instruction cache pollution.\n// 2. `TOPK` (this file): The user-facing implementation. To comply with the Redis\n//    API contract, this class MUST support instant eviction reporting (requiring an\n//    O(log K) Min-Heap), arbitrary increments, and PMR allocators for strict\n//    memory limit tracking and RDB snapshot serialization.\n//\n// Forcing the internal tracker to support Min-Heaps and PMR would severely\n// degrade overall database throughput, hence the strict separation of concerns.\n//\n// Algorithm Deviation Note:\n// While heavily inspired by the HeavyKeeper algorithm, this is NOT a strict\n// implementation. The original HeavyKeeper paper requires storing a\n// (fingerprint, count) pair in each cell so that decay only penalizes a specific\n// item. This implementation uses a bare `uint32_t` counter grid, making it closer\n// to a Count-Min Sketch coupled with a Min-Heap and a decay heuristic. This\n// design safely overestimates counts (which is acceptable for Top-K bounds)\n// while simplifying PMR memory layout and RDB serialization.\n//\n// TODO: Full PMR Integration for String Ownership\n// Currently, min_heap_ and counters_ use the provided memory_resource, ensuring the\n// dominant allocations are tracked. However, the std::string keys inside HeapItem\n// use the default heap.\n// Future optimization: Upgrade HeapItem to use PMR_NS::string with proper\n// uses_allocator construction.\nclass TOPK {\n  friend class TOPKTest;\n\n public:\n  // Initializes a Top-K tracking sketch with the specified dimensions.\n  //\n  // mr: Pointer to the memory resource used for allocations (MUST NOT be null).\n  // k: Maximum number of most frequent items to maintain in the min-heap.\n  // width: Number of counter buckets per row in the hash grid (default: 8).\n  // depth: Number of independent hash functions (rows) used (default: 7).\n  // decay: Probability multiplier for exponential decay (must be 0.0 to 1.0, default: 0.9).\n  TOPK(PMR_NS::memory_resource* mr, uint32_t k, uint32_t width = kDefaultWidth,\n       uint32_t depth = kDefaultDepth, double decay = kDefaultDecay);\n\n  TOPK(const TOPK&) = delete;\n  TOPK& operator=(const TOPK&) = delete;\n  TOPK(TOPK&& other) noexcept;\n  TOPK& operator=(TOPK&& other) noexcept;\n  ~TOPK() = default;\n\n  static constexpr double kDefaultDecay = 0.9;\n  static constexpr uint32_t kDefaultWidth = 8;\n  static constexpr uint32_t kDefaultDepth = 7;\n  static constexpr double kDecayEpsilon = 1e-9;\n  // Size is 4097 so that (kDecayLookupSize - 1) equals exactly 4096 (2^12).\n  // This allows the C++ compiler to optimize the division and modulo operations\n  // in the extrapolation hot-path into very-fast bitwise shifts & ANDs.\n  static constexpr size_t kDecayLookupSize = 4097;\n\n  // Represents an item in the Top-K list with its estimated count\n  struct TopKItem {\n    std::string item;\n    uint32_t count;\n  };\n\n  // Inserts a single item into the Top-K sketch, incrementing its estimated frequency by 1.\n  //\n  // Returns: The string of the evicted item if this insertion caused a resident\n  //          item to be displaced from the Top-K min-heap, or std::nullopt\n  //          if no eviction occurred.\n  std::optional<std::string> Add(std::string_view item);\n\n  // Increments an item's estimated frequency by a specific amount.\n  //\n  // If 'increment' is 0, this operation is a safe no-op and returns std::nullopt.\n  // Otherwise, returns the string of the evicted item if this operation caused\n  // a resident item to be displaced from the Top-K min-heap, or std::nullopt.\n  std::optional<std::string> IncrBy(std::string_view item, uint32_t increment);\n\n  // Queries whether an item currently resides in the Top-K min-heap.\n  [[nodiscard]] bool Query(std::string_view item) const {\n    return IsInHeap(item);\n  }\n\n  // Estimates the frequency count for an item using the underlying sketch.\n  // Returns the minimum counter value across all hash rows (Count-Min Sketch estimate).\n  [[nodiscard]] uint32_t Count(std::string_view item) const;\n\n  // Retrieves the complete list of current Top-K high-frequency items.\n  //\n  // Returns: A vector of TopKItem structures (containing the key and its count),\n  //          sorted in descending order by estimated frequency (highest first).\n  [[nodiscard]] std::vector<TopKItem> List() const;\n\n  // --------------------------------------------------------------------------\n  // Accessors for Top-K Configuration Parameters\n  // --------------------------------------------------------------------------\n\n  // Returns the maximum capacity (K) of the Top-K min-heap.\n  [[nodiscard]] uint32_t K() const {\n    return k_;\n  }\n\n  // Returns the number of items currently tracked in the Top-K heap.\n  [[nodiscard]] size_t Size() const {\n    return min_heap_.size();\n  }\n\n  // Returns the width (number of columns/buckets) of the Count-Min Sketch array.\n  [[nodiscard]] uint32_t Width() const {\n    return width_;\n  }\n\n  // Returns the depth (number of rows/hash functions) of the Count-Min Sketch array.\n  [[nodiscard]] uint32_t Depth() const {\n    return depth_;\n  }\n\n  // Returns the exponential decay probability base used by the HeavyKeeper algorithm.\n  [[nodiscard]] double Decay() const {\n    return decay_;\n  }\n\n  // Calculates the total heap memory dynamically allocated by this Top-K instance,\n  // including sketch counters, min-heap allocations, and hash map overhead.\n  //\n  // Returns: Total memory usage in bytes.\n  [[nodiscard]] size_t MallocUsed() const;\n\n  // --------------------------------------------------------------------------\n  // Serialization and Persistence\n  // --------------------------------------------------------------------------\n\n  // Pod-like structure to hold the exact internal state of the Top-K instance.\n  struct SerializedData {\n    uint32_t k;\n    uint32_t width;\n    uint32_t depth;\n    double decay;\n    std::vector<TopKItem> heap_items;\n    std::vector<uint32_t> counters;\n  };\n\n  // Extracts the current structural state of the sketch for RDB persistence.\n  [[nodiscard]] SerializedData Serialize() const;\n\n  // Reconstructs the internal state of the sketch from a previously serialized dataset.\n  void Deserialize(const SerializedData& data);\n\n private:\n  struct HeapItem {\n    std::string key;\n    uint32_t count;\n\n    // Min heap comparator\n    bool operator>(const HeapItem& other) const {\n      return count > other.count;\n    }\n  };\n\n  // Hash function for bucket selection in row\n  [[nodiscard]] uint64_t Hash(std::string_view item, uint32_t row) const;\n\n  // Exponential decay logic\n  [[nodiscard]] bool ShouldDecay(uint32_t current_count) const;\n\n  // Updates the min-heap with the new count for the given item.\n  // Returns the evicted item's key if the heap is at capacity and a new item displaces an existing\n  // one. Otherwise, returns std::nullopt.\n  std::optional<std::string> UpdateHeap(std::string_view item, uint32_t new_count);\n\n  // Check if an item is in the Top-K heap\n  [[nodiscard]] bool IsInHeap(std::string_view item) const {\n    for (const auto& heap_item : min_heap_) {\n      if (heap_item.key == item)\n        return true;\n    }\n    return false;\n  }\n\n  // Hashes the item for a specific row and calculates its flattened 1D index\n  // within the counters_ array. Maps the 2D Count-Min Sketch grid (depth x width)\n  // into a single contiguous block of memory for better CPU cache locality.\n  size_t GetCounterIndex(std::string_view item, uint32_t row) const;\n\n  // Shared increment logic\n  std::optional<std::string> IncrementInternal(std::string_view item, uint32_t increment);\n\n  // Compute decay probability using lookup table or extrapolation\n  double ComputeDecayProbability(uint32_t count) const;\n\n  // Heap maintenance functions\n  // O(log k) ops\n  void HeapifyUp(size_t index);\n  void HeapifyDown(size_t index);\n\n  uint32_t k_;      // Number of top items to track\n  uint32_t width_;  // Hash table width (buckets per row)\n  uint32_t depth_;  // Hash table depth (number of rows)\n  double decay_;    // Decay constant (0.0-1.0, typically 0.9)\n\n  // Pointer to the active decay lookup table. For the default decay (0.9), this points to\n  // a process-wide shared static table (32KB, allocated once). For custom (non-default) decay\n  // values, it points to custom_decay_table_ below. This pattern can help to avoid embedding a 32KB\n  // array in every TOPK object.\n  // Assumption: >99% of TOPK instances will use the default decay, so\n  // this optimization can significantly reduce memory usage and improve startup performance by\n  // avoiding the need to build a custom table for each instance.\n  const std::array<double, kDecayLookupSize>* decay_lookup_ = nullptr;\n\n  // Heap-allocated table for non-default decay values. Null for the common case (decay=0.9).\n  std::unique_ptr<std::array<double, kDecayLookupSize>> custom_decay_table_;\n\n  // HeavyKeeper data structures\n  // Hash table: width × depth matrix of counters\n  std::vector<uint32_t, PMR_NS::polymorphic_allocator<uint32_t>> counters_;\n\n  // Min heap: vector of top-K items maintained as a min heap\n  std::vector<HeapItem, PMR_NS::polymorphic_allocator<HeapItem>> min_heap_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/topk_test.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/topk.h\"\n\n#include <absl/strings/str_cat.h>\n\n#include <cmath>\n#include <limits>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"base/gtest.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nclass TOPKTest : public ::testing::Test {\n protected:\n  // Use decay=0 to disable probabilistic decay, making tests deterministic.\n  // With decay=0, ShouldDecay always returns false (0^count = 0 for count>0),\n  // so counters only grow and are never decremented by colliding items.\n  // Having a decay != 0 will cause probabilistic flakiness in tests, as items may be randomly\n  // evicted due to decay rather than true count comparisons.\n  TOPKTest() : topk_(PMR_NS::get_default_resource(), 5, 100, 5, 0.0) {\n  }\n\n  double ComputeDecayProbability(TOPK* topk, uint32_t count) const {\n    return topk->ComputeDecayProbability(count);\n  }\n  TOPK topk_;\n};\n\n// ---------------------------------------------------------------------------\n// Construction & Configuration\n// ---------------------------------------------------------------------------\n\n// Verify K(), Width(), Depth(), Decay() return the exact values passed to the constructor.\nTEST(TOPKBasic, ConstructorStoresParameters) {\n  TOPK topk(PMR_NS::get_default_resource(), 10, 200, 7, 0.85);\n  EXPECT_EQ(topk.K(), 10u);\n  EXPECT_EQ(topk.Width(), 200u);\n  EXPECT_EQ(topk.Depth(), 7u);\n  EXPECT_DOUBLE_EQ(topk.Decay(), 0.85);\n}\n\n// Verify that default decay reuses the static process-wide table (saving memory),\n// while a custom decay value allocates its own ~32KB lookup table.\nTEST(TOPKBasic, DecayTableMemoryAllocation) {\n  TOPK default_topk(PMR_NS::get_default_resource(), 5, 100, 5, TOPK::kDefaultDecay);\n  TOPK custom_topk(PMR_NS::get_default_resource(), 5, 100, 5, 0.75);\n\n  size_t default_mem = default_topk.MallocUsed();\n  size_t custom_mem = custom_topk.MallocUsed();\n\n  // Test that the custom one uses strictly more memory\n  EXPECT_LT(default_mem, custom_mem);\n\n  // Test that the difference in memory is exactly the size of the custom decay array\n  size_t expected_table_size = TOPK::kDecayLookupSize * sizeof(double);\n  EXPECT_GE(custom_mem - default_mem, expected_table_size);\n}\n\n// Move-construct a populated TOPK; source should be emptied and destination should hold the items.\nTEST_F(TOPKTest, MoveConstructorTransfersOwnership) {\n  topk_.Add(\"alpha\");\n  topk_.Add(\"beta\");\n\n  TOPK moved(std::move(topk_));\n\n  EXPECT_EQ(moved.K(), 5u);\n  auto list = moved.List();\n  EXPECT_FALSE(list.empty());\n\n  // Source is zeroed out.\n  EXPECT_EQ(topk_.K(), 0u);\n}\n\n// Move-assign a populated TOPK into another; verify same post-conditions as move constructor.\nTEST(TOPKBasic, MoveAssignmentTransfersOwnership) {\n  TOPK src(PMR_NS::get_default_resource(), 3, 50, 3, 0.0);\n  src.Add(\"x\");\n  src.Add(\"y\");\n\n  TOPK dst(PMR_NS::get_default_resource(), 1, 10, 1, 0.0);\n  dst = std::move(src);\n\n  EXPECT_EQ(dst.K(), 3u);\n  EXPECT_EQ(dst.Width(), 50u);\n  auto list = dst.List();\n  EXPECT_EQ(list.size(), 2u);\n  EXPECT_EQ(src.K(), 0u);\n}\n\n// ---------------------------------------------------------------------------\n// Add\n// ---------------------------------------------------------------------------\n\n// Add exactly K distinct items; List() should return exactly K items with no evictions.\nTEST_F(TOPKTest, AddFillsHeapUpToK) {\n  for (uint32_t i{}; i < topk_.K(); ++i) {\n    auto evicted = topk_.Add(absl::StrCat(\"item\", i));\n    EXPECT_FALSE(evicted.has_value()) << \"Unexpected eviction at i=\" << i;\n  }\n  EXPECT_EQ(topk_.List().size(), topk_.K());\n}\n\n// Each Add() while the heap has room returns std::nullopt.\n// Note: adding a K+1th item with the same count as the minimum also returns nullopt,\n// because the fast-reject path correctly requires new_count > min to trigger an eviction.\nTEST_F(TOPKTest, AddReturnsNulloptWhileHeapNotFull) {\n  for (uint32_t i{}; i < topk_.K(); ++i) {\n    EXPECT_EQ(topk_.Add(absl::StrCat(\"item\", i)), nullopt);\n  }\n}\n\n// After filling the heap, IncrBy a new item with a large count to force an eviction.\nTEST_F(TOPKTest, AddEvictsMinimumWhenHeapFull) {\n  // Fill the heap with K items, each added once (count=1).\n  for (uint32_t i{}; i < topk_.K(); ++i) {\n    topk_.Add(absl::StrCat(\"filler\", i));\n  }\n\n  // Force a new item in with a large count; it must evict the minimum.\n  auto evicted = topk_.IncrBy(\"heavy_hitter\", 1000);\n  EXPECT_TRUE(evicted.has_value());\n}\n\n// After filling the heap, adding an item whose count can't exceed the minimum shouldn't evict.\nTEST_F(TOPKTest, AddDoesNotEvictWhenNewItemScoreTooLow) {\n  // Fill the heap with items pumped to high counts.\n  for (uint32_t i{}; i < topk_.K(); ++i) {\n    topk_.IncrBy(absl::StrCat(\"big\", i), 1000);\n  }\n\n  // Single add of a brand-new item (count=1) won't beat any existing item.\n  auto evicted = topk_.Add(\"tiny_newcomer\");\n  EXPECT_FALSE(evicted.has_value());\n}\n\n// Adding the same item repeatedly increases its count in the heap.\n// Because decay=0.0 and there are no collisions, the count must be exactly 100.\nTEST_F(TOPKTest, AddSameItemRepeatedlyIncreasesCount) {\n  for (int i{}; i < 100; ++i) {\n    topk_.Add(\"repeat\");\n  }\n\n  auto list = topk_.List();\n  bool found = false;\n  for (const auto& item : list) {\n    if (item.item == \"repeat\") {\n      EXPECT_EQ(item.count, 100u);\n      found = true;\n    }\n  }\n  EXPECT_TRUE(found);\n}\n\n// ---------------------------------------------------------------------------\n// IncrBy\n// ---------------------------------------------------------------------------\n\n// IncrBy with increment=0 must return nullopt and not modify state.\nTEST_F(TOPKTest, IncrByZeroReturnsNullopt) {\n  topk_.Add(\"existing\");\n  auto before = topk_.Count(\"existing\");\n  auto result = topk_.IncrBy(\"existing\", 0);\n  EXPECT_EQ(result, nullopt);\n  auto after = topk_.Count(\"existing\");\n  EXPECT_EQ(before, after);\n}\n\n// IncrBy(item, 1) should behave the same as Add(item) — both increment by 1.\nTEST(TOPKBasic, IncrByOneBehavesLikeAdd) {\n  TOPK a(PMR_NS::get_default_resource(), 3, 100, 5, 0.0);\n  TOPK b(PMR_NS::get_default_resource(), 3, 100, 5, 0.0);\n\n  a.Add(\"x\");\n  b.IncrBy(\"x\", 1);\n\n  EXPECT_EQ(a.Count(\"x\"), b.Count(\"x\"));\n}\n\n// A single IncrBy with a large increment should immediately promote the item into the heap,\n// evicting the current minimum.\nTEST_F(TOPKTest, IncrByLargeValueCausesImmediateEviction) {\n  for (uint32_t i{}; i < topk_.K(); ++i) {\n    topk_.Add(absl::StrCat(\"base\", i));\n  }\n  auto evicted = topk_.IncrBy(\"newcomer\", 10000);\n  EXPECT_TRUE(evicted.has_value());\n\n  EXPECT_TRUE(topk_.Query(\"newcomer\"));\n}\n\n// IncrBy on an item already in the heap should increase its count without eviction.\nTEST_F(TOPKTest, IncrByExistingHeapItemUpdatesCount) {\n  topk_.IncrBy(\"item_a\", 50);\n  auto count_before = topk_.Count(\"item_a\");\n\n  auto evicted = topk_.IncrBy(\"item_a\", 100);\n  EXPECT_EQ(evicted, nullopt);\n\n  auto count_after = topk_.Count(\"item_a\");\n  EXPECT_GT(count_after, count_before);\n}\n\n// ---------------------------------------------------------------------------\n// Query\n// ---------------------------------------------------------------------------\n\n// All K items currently in the heap should return true from Query.\nTEST_F(TOPKTest, QueryReturnsTrueForHeapItems) {\n  for (uint32_t i{}; i < topk_.K(); ++i) {\n    string key = absl::StrCat(\"key\", i);\n    topk_.Add(key);\n    EXPECT_TRUE(topk_.Query(key)) << key << \" should be in heap\";\n  }\n}\n\n// Items that were never inserted should return false from Query.\nTEST_F(TOPKTest, QueryReturnsFalseForNonHeapItems) {\n  EXPECT_FALSE(topk_.Query(\"never_seen\"));\n  EXPECT_FALSE(topk_.Query(\"also_absent\"));\n  EXPECT_FALSE(topk_.Query(\"nope\"));\n}\n\n// An item that was once in the heap but got evicted should return false from Query.\nTEST_F(TOPKTest, QueryReturnsFalseForEvictedItems) {\n  // Add our target victim. Count = 1.\n  string victim = \"low0\";\n  topk_.Add(victim);\n\n  // Fill the rest of the heap (K=5) with items that are heavier.\n  for (uint32_t i{1}; i < topk_.K(); ++i) {\n    topk_.IncrBy(absl::StrCat(\"heavier\", i), 50);\n  }\n\n  // Verify the victim is currently in the heap.\n  EXPECT_TRUE(topk_.Query(victim));\n\n  // Evict by adding a massive item.\n  topk_.IncrBy(\"massive\", 10000);\n\n  // Strictly assert that the victim is gone.\n  EXPECT_FALSE(topk_.Query(victim));\n}\n\n// Mixed: item in heap vs item not in heap.\nTEST_F(TOPKTest, QueryMixedBatch) {\n  topk_.IncrBy(\"inheap\", 100);\n  EXPECT_TRUE(topk_.Query(\"inheap\"));\n  EXPECT_FALSE(topk_.Query(\"notheap\"));\n}\n\n// ---------------------------------------------------------------------------\n// Count\n// ---------------------------------------------------------------------------\n\n// Items never inserted should return count 0.\nTEST_F(TOPKTest, CountReturnsZeroForUnseen) {\n  EXPECT_EQ(topk_.Count(\"never_added\"), 0u);\n  EXPECT_EQ(topk_.Count(\"also_missing\"), 0u);\n}\n\n// Items that have been added should return a count >= 1.\nTEST_F(TOPKTest, CountReturnsNonZeroForSeenItems) {\n  topk_.Add(\"seen\");\n  EXPECT_GE(topk_.Count(\"seen\"), 1u);\n}\n\n// The count from Count() for a heap item should match the count reported in List().\nTEST_F(TOPKTest, CountForHeapItemMatchesListCount) {\n  topk_.IncrBy(\"match_me\", 50);\n  auto count_val = topk_.Count(\"match_me\");\n  auto list = topk_.List();\n\n  bool found = false;\n  for (const auto& item : list) {\n    if (item.item == \"match_me\") {\n      EXPECT_EQ(item.count, count_val);\n      found = true;\n    }\n  }\n  EXPECT_TRUE(found);\n}\n\n// ---------------------------------------------------------------------------\n// List\n// ---------------------------------------------------------------------------\n\n// List() returns an empty vector on a freshly constructed TOPK.\nTEST(TOPKBasic, ListEmptyOnConstruction) {\n  TOPK fresh(PMR_NS::get_default_resource(), 5, 100, 5, 0.0);\n  EXPECT_TRUE(fresh.List().empty());\n}\n\n// List() output is sorted in descending order by count.\nTEST_F(TOPKTest, ListReturnsSortedByCountDescending) {\n  topk_.IncrBy(\"low\", 10);\n  topk_.IncrBy(\"mid\", 50);\n  topk_.IncrBy(\"high\", 100);\n\n  auto list = topk_.List();\n\n  // 1. Guarantee the items actually returned\n  ASSERT_EQ(list.size(), 3u);\n\n  // 2. Exact match the deterministic order\n  EXPECT_EQ(list[0].item, \"high\");\n  EXPECT_EQ(list[0].count, 100u);\n\n  EXPECT_EQ(list[1].item, \"mid\");\n  EXPECT_EQ(list[1].count, 50u);\n\n  EXPECT_EQ(list[2].item, \"low\");\n  EXPECT_EQ(list[2].count, 10u);\n}\n\n// After inserting more than K distinct items, List().size() == K.\nTEST_F(TOPKTest, ListNeverExceedsKItems) {\n  for (int i{}; i < 100; ++i) {\n    topk_.IncrBy(absl::StrCat(\"x\", i), (i + 1) * 10);\n  }\n  // We inserted 100 items. The heap MUST be exactly full.\n  EXPECT_EQ(topk_.List().size(), topk_.K());\n}\n\n// ---------------------------------------------------------------------------\n// Decay & ComputeDecayProbability\n// ---------------------------------------------------------------------------\n\n// For count < kDecayLookupSize, ComputeDecayProbability equals std::pow(decay, count).\nTEST_F(TOPKTest, ProbabilityBelowTableSize) {\n  double decay_val = 0.85;\n  TOPK topk(PMR_NS::get_default_resource(), 5, 100, 5, decay_val);\n\n  // ComputeDecayProbability enforces DCHECK_GT(count, 0u), so we start at 1.\n  for (uint32_t count = 1; count < TOPK::kDecayLookupSize; ++count) {\n    double expected = std::pow(decay_val, static_cast<double>(count));\n\n    // EXPECT_DOUBLE_EQ allows up to 4 ULPs of rounding difference.\n    EXPECT_DOUBLE_EQ(ComputeDecayProbability(&topk, count), expected);\n  }\n}\n\n// For count >= kDecayLookupSize, the extrapolation path should not crash or produce NaN.\nTEST(TOPKBasic, ProbabilityAboveTableSizeNoCrash) {\n  TOPK topk(PMR_NS::get_default_resource(), 3, 10, 3, 0.999);\n\n  // Push counter safely above kDecayLookupSize (4097)\n  topk.IncrBy(\"big\", 5000);\n\n  // 2. NOW call Add. This forces ShouldDecay(5000) to execute!\n  // It shouldn't crash, segfault, or produce NaN.\n  for (int i = 0; i < 10; ++i) {\n    topk.Add(\"big\");\n  }\n\n  // Just verify the state isn't corrupted (count is still around 5000)\n  EXPECT_GT(topk.Count(\"big\"), 4000u);\n}\n\n// For an extremely large count with a small decay, probability drops to effectively zero.\n// This means ShouldDecay always returns false for very high counts, so counters aren't decremented.\nTEST(TOPKBasic, VeryHighCountApproachesZero) {\n  // decay=0.5: 0.5^4096 is astronomically small (< kDecayEpsilon). The extrapolation\n  // path should return 0.0, meaning no decay fires for counts above the table range.\n  TOPK topk(PMR_NS::get_default_resource(), 3, 10, 3, 0.5);\n  topk.IncrBy(\"stable\", 10000);\n  auto count_before = topk.Count(\"stable\");\n  // Adding more items should not decay \"stable\"'s counter because the decay\n  // probability for such high counts is effectively zero.\n  for (int i{}; i < 100; ++i) {\n    topk.Add(absl::StrCat(\"other\", i));\n  }\n  auto count_after = topk.Count(\"stable\");\n  // Count may increase from hash collisions but should never decrease.\n  EXPECT_GE(count_after, count_before);\n}\n\n// With decay=0.0, the decay probability is always 0 (0^n = 0 for n>0),\n// so counters should grow monotonically.\nTEST(TOPKBasic, ZeroDecayNeverDecays) {\n  TOPK topk(PMR_NS::get_default_resource(), 3, 50, 3, 0.0);\n  topk.IncrBy(\"mono\", 100);\n  auto count1 = topk.Count(\"mono\");\n  topk.IncrBy(\"mono\", 50);\n  auto count2 = topk.Count(\"mono\");\n  EXPECT_GE(count2, count1);\n  EXPECT_EQ(count2, 150u);\n}\n\n// With decay=1.0, every non-zero counter has ShouldDecay probability exactly 1.0 (1^n = 1).\n// Because this implementation uses no fingerprints (unlike the original HeavyKeeper paper),\n// decay fires even when re-adding the same item to its own non-zero counter.\n// The counter therefore oscillates: 0 → 1 (add to zero-counter) → 0 (decay fires) → repeat.\n// It is mathematically impossible for the counter to exceed 1.\nTEST(TOPKBasic, DecayOneAlwaysDecays) {\n  TOPK topk(PMR_NS::get_default_resource(), 3, 10, 3, 1.0);\n\n  for (int i{}; i < 1000; ++i) {\n    topk.Add(\"suppressed\");\n  }\n\n  // Because decay is 100%, the counter just oscillates between 0 and 1.\n  // It is mathematically impossible for it to exceed 1.\n  EXPECT_LE(topk.Count(\"suppressed\"), 1u);\n}\n\n// ---------------------------------------------------------------------------\n// MallocUsed\n// ---------------------------------------------------------------------------\n\n// MallocUsed() after filling the heap should be larger than right after construction.\nTEST(TOPKBasic, MallocUsedIncreaseWithHeapGrowth) {\n  TOPK topk(PMR_NS::get_default_resource(), 5, 100, 5, 0.0);\n  size_t before = topk.MallocUsed();\n  for (int i{}; i < 5; ++i) {\n    topk.IncrBy(absl::StrCat(\"item_with_a_long_name_\", i), 100);\n  }\n  size_t after = topk.MallocUsed();\n  EXPECT_GT(after, before);\n}\n\n// ---------------------------------------------------------------------------\n// Serialize / Deserialize\n// ---------------------------------------------------------------------------\n\n// After Serialize() + Deserialize(), K(), Width(), Depth(), Decay() are unchanged.\nTEST_F(TOPKTest, SerializeRoundTripPreservesConfiguration) {\n  topk_.IncrBy(\"a\", 10);\n  auto data = topk_.Serialize();\n\n  TOPK restored(PMR_NS::get_default_resource(), data.k, data.width, data.depth, data.decay);\n  restored.Deserialize(data);\n\n  EXPECT_EQ(restored.K(), topk_.K());\n  EXPECT_EQ(restored.Width(), topk_.Width());\n  EXPECT_EQ(restored.Depth(), topk_.Depth());\n  EXPECT_DOUBLE_EQ(restored.Decay(), topk_.Decay());\n}\n\n// After round-trip, List() returns the same items with the same counts.\nTEST_F(TOPKTest, SerializeRoundTripPreservesHeapItems) {\n  topk_.IncrBy(\"alpha\", 100);\n  topk_.IncrBy(\"beta\", 50);\n  topk_.IncrBy(\"gamma\", 25);\n\n  auto data = topk_.Serialize();\n  TOPK restored(PMR_NS::get_default_resource(), data.k, data.width, data.depth, data.decay);\n  restored.Deserialize(data);\n\n  auto orig_list = topk_.List();\n  auto rest_list = restored.List();\n  ASSERT_EQ(orig_list.size(), rest_list.size());\n  for (size_t i{}; i < orig_list.size(); ++i) {\n    EXPECT_EQ(orig_list[i].item, rest_list[i].item);\n    EXPECT_EQ(orig_list[i].count, rest_list[i].count);\n  }\n}\n\n// After round-trip, Count() returns the same estimated frequencies.\nTEST_F(TOPKTest, SerializeRoundTripPreservesCounters) {\n  topk_.IncrBy(\"foo\", 42);\n  topk_.IncrBy(\"bar\", 77);\n\n  auto data = topk_.Serialize();\n  TOPK restored(PMR_NS::get_default_resource(), data.k, data.width, data.depth, data.decay);\n  restored.Deserialize(data);\n\n  EXPECT_EQ(topk_.Count(\"foo\"), restored.Count(\"foo\"));\n  EXPECT_EQ(topk_.Count(\"bar\"), restored.Count(\"bar\"));\n}\n\n// After Deserialize(), subsequent Add() calls work correctly and evictions are reported.\nTEST_F(TOPKTest, DeserializeRebuildsValidHeapProperty) {\n  for (uint32_t i{}; i < topk_.K(); ++i) {\n    topk_.IncrBy(absl::StrCat(\"pre\", i), 10);\n  }\n\n  auto data = topk_.Serialize();\n  TOPK restored(PMR_NS::get_default_resource(), data.k, data.width, data.depth, data.decay);\n  restored.Deserialize(data);\n\n  // The restored heap is full (K items). A heavy new item should evict the minimum.\n  auto evicted = restored.IncrBy(\"post_restore_big\", 10000);\n  EXPECT_TRUE(evicted.has_value());\n  EXPECT_TRUE(restored.Query(\"post_restore_big\"));\n}\n\n// Serializing a fresh TOPK produces empty heap_items and a zero-filled counters vector.\nTEST(TOPKBasic, SerializeEmptyTOPK) {\n  TOPK topk(PMR_NS::get_default_resource(), 5, 100, 5, 0.0);\n  auto data = topk.Serialize();\n\n  EXPECT_TRUE(data.heap_items.empty());\n  EXPECT_EQ(data.counters.size(), 100u * 5);\n  for (auto c : data.counters) {\n    EXPECT_EQ(c, 0u);\n  }\n}\n\n// ---------------------------------------------------------------------------\n// PMR Allocator\n// ---------------------------------------------------------------------------\n\n// Explicitly passing get_default_resource() works correctly without crashing.\nTEST(TOPKBasic, PMRExplicitDefaultResourceWorks) {\n  TOPK topk(PMR_NS::get_default_resource(), 5, 100, 5, 0.9);\n  topk.Add(\"works\");\n  EXPECT_EQ(topk.List().size(), 1u);\n}\n\n// ---------------------------------------------------------------------------\n// Statistical / Accuracy\n// ---------------------------------------------------------------------------\n\n// Verify that the Top-K correctly identifies \"Hot\" items even when\n// the sketch is flooded with \"Cold\" noise (many items seen only once).\n//\n// SETUP:\n// 1. We disable Decay (decay=0.0) to make the test 100% predictable (no RNG).\n// 2. We use IncrBy to give 5 \"Hot\" items a guaranteed high score of 1000.\n// 3. We use Add to insert 200 \"Cold\" items once each (score of 1).\n//\n// WHY INCRBY?\n// In a real-world scenario with decay, an item's count eventually hits a\n// \"ceiling\" where decay and growth balance out. By using IncrBy and decay=0,\n// we bypass that math to ensure our \"Hot\" items are strictly,\n// deterministically larger than the noise.\nTEST(TOPKBasic, TopKItemsIdentifiedUnderHeavyLoad) {\n  TOPK topk(PMR_NS::get_default_resource(), 5, 500, 5, 0.0);\n  // Hot items get a large, deterministic count via IncrBy.\n  for (int h{}; h < 5; ++h) {\n    topk.IncrBy(absl::StrCat(\"hot\", h), 1000);\n  }\n  // Cold items are each seen only once.\n  for (int c{}; c < 200; ++c) {\n    topk.Add(absl::StrCat(\"cold\", c));\n  }\n\n  auto list = topk.List();\n  ASSERT_EQ(list.size(), 5u);\n  // All 5 hot items should be present in the top-K list.\n  for (int h{}; h < 5; ++h) {\n    string hot_key = absl::StrCat(\"hot\", h);\n    bool found{};\n    for (const auto& item : list) {\n      if (item.item == hot_key) {\n        found = true;\n        break;\n      }\n    }\n    EXPECT_TRUE(found) << hot_key << \" should be in the top-K list\";\n  }\n}\n\n// With k=1, only the single most-frequent item survives in the heap.\n// Uses decay=0.0 and IncrBy so \"dominant\" has a deterministically high count\n// that minor items (each added once, count=1) can never exceed.\nTEST(TOPKBasic, KEqualsOneTracksOnlyTopItem) {\n  TOPK topk(PMR_NS::get_default_resource(), 1, 500, 5, 0.0);\n\n  // \"dominant\" gets a large, fixed count.\n  topk.IncrBy(\"dominant\", 1000);\n  // Minor items are each seen only once; count=1 < 1000, so none can displace dominant.\n  for (int i{}; i < 50; ++i) {\n    topk.Add(absl::StrCat(\"minor\", i));\n  }\n\n  auto list = topk.List();\n  ASSERT_EQ(list.size(), 1u);\n  EXPECT_EQ(list[0].item, \"dominant\");\n}\n\n// ---------------------------------------------------------------------------\n// Deserialization Heap Repair\n// ---------------------------------------------------------------------------\n\n// Deserialize() must call std::make_heap to restore the min-heap invariant even when\n// heap_items are stored out-of-order in the RDB snapshot (e.g. saved in List() order).\nTEST(TOPKBasic, DeserializeRestoresHeapProperty) {\n  TOPK::SerializedData data;\n  data.k = 5;\n  data.width = 100;\n  data.depth = 5;\n  data.decay = 0.0;\n  data.counters.resize(500, 0);\n\n  // Items deliberately out of min-heap order: smallest must end up at the root.\n  data.heap_items.push_back({\"heavy\", 1000});\n  data.heap_items.push_back({\"medium\", 500});\n  data.heap_items.push_back({\"light\", 10});\n\n  TOPK restored(PMR_NS::get_default_resource(), 5, 100, 5, 0.0);\n  restored.Deserialize(data);\n\n  // List() sorts descending — correct only if make_heap built a valid heap.\n  auto list = restored.List();\n  ASSERT_EQ(list.size(), 3u);\n  EXPECT_EQ(list[0].item, \"heavy\");\n  EXPECT_EQ(list[1].item, \"medium\");\n  EXPECT_EQ(list[2].item, \"light\");\n\n  // Heap is not yet full (3 of 5 slots used), so fill it to capacity.\n  restored.IncrBy(\"filler1\", 20);\n  restored.IncrBy(\"filler2\", 30);\n\n  // Now heap is full (5 items: light=10, filler1=20, filler2=30, medium=500, heavy=1000).\n  // A new item with count > 10 must evict \"light\" — the min-heap root.\n  auto evicted = restored.IncrBy(\"newcomer\", 50);\n  ASSERT_TRUE(evicted.has_value());\n  EXPECT_EQ(evicted.value(), \"light\");\n}\n\n// ---------------------------------------------------------------------------\n// Counter Saturation (Overflow Prevention)\n// ---------------------------------------------------------------------------\n\n// IncrBy must saturate at UINT32_MAX rather than wrapping around to 0.\n// A wrap-around would trick the heap into evicting a top item — a correctness\n// and security issue (malicious TOPK.INCRBY with a huge increment).\nTEST_F(TOPKTest, CounterSaturationPreventsOverflow) {\n  const uint32_t max_val = numeric_limits<uint32_t>::max();\n  topk_.IncrBy(\"max_item\", max_val);\n  EXPECT_EQ(topk_.Count(\"max_item\"), max_val);\n\n  // Adding more must not wrap the counter back to a small number.\n  topk_.IncrBy(\"max_item\", 100);\n  EXPECT_EQ(topk_.Count(\"max_item\"), max_val);\n}\n\n// ---------------------------------------------------------------------------\n// Death Tests (DCHECKs active in debug builds only)\n// ---------------------------------------------------------------------------\n\n#ifndef NDEBUG\n// k=0 violates DCHECK_GT(k_, 0u) in the constructor.\nTEST(TOPKDeathTest, ZeroKCrashes) {\n  EXPECT_DEBUG_DEATH(TOPK(PMR_NS::get_default_resource(), 0, 100, 5, 0.9), \"k_ > 0\");\n}\n\n// width=0 violates DCHECK_GT(width_, 0u) in the constructor.\nTEST(TOPKDeathTest, ZeroWidthCrashes) {\n  EXPECT_DEBUG_DEATH(TOPK(PMR_NS::get_default_resource(), 5, 0, 5, 0.9), \"width_ > 0\");\n}\n\n// decay=1.5 violates DCHECK_LE(decay_, 1.0) in the constructor.\nTEST(TOPKDeathTest, DecayAboveOneCrashes) {\n  EXPECT_DEBUG_DEATH(TOPK(PMR_NS::get_default_resource(), 5, 100, 5, 1.5), \"decay_ <= 1.0\");\n}\n\n// Deserializing data with a mismatched k violates DCHECK_EQ(data.k, k_).\nTEST(TOPKDeathTest, DeserializeDimensionMismatchCrashes) {\n  TOPK topk(PMR_NS::get_default_resource(), 5, 100, 5, 0.9);\n  TOPK::SerializedData bad;\n  bad.k = 10;  // Mismatch: object was constructed with k=5.\n  bad.width = 100;\n  bad.depth = 5;\n  bad.decay = 0.9;\n  bad.counters.resize(500, 0);\n  EXPECT_DEBUG_DEATH(topk.Deserialize(bad), \"data.k == k_\");\n}\n\n// Deserializing data with a mismatched decay violates DCHECK_EQ(data.decay, decay_).\nTEST(TOPKDeathTest, DeserializeDecayMismatchCrashes) {\n  TOPK topk(PMR_NS::get_default_resource(), 5, 100, 5, 0.9);\n  TOPK::SerializedData bad;\n  bad.k = 5;\n  bad.width = 100;\n  bad.depth = 5;\n  bad.decay = 0.5;  // Mismatch: object was constructed with decay=0.9.\n  bad.counters.resize(500, 0);\n  EXPECT_DEBUG_DEATH(topk.Deserialize(bad), \"data.decay == decay_\");\n}\n#endif\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/tx_queue.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"core/tx_queue.h\"\n\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nTxQueue::TxQueue(std::function<uint64_t(const Transaction*)> sf)\n    : score_fun_(sf), vec_(32) {\n  for (size_t i = 0; i < vec_.size(); ++i) {\n    vec_[i].next = i + 1;\n  }\n}\n\nauto TxQueue::Insert(Transaction* t) -> Iterator {\n  if (next_free_ >= vec_.size()) {\n    Grow();\n  }\n  DCHECK_LT(next_free_, vec_.size());\n  DCHECK_EQ(FREE_TAG, vec_[next_free_].tag);\n\n  Iterator res = next_free_;\n  vec_[next_free_].u.trans = t;\n  vec_[next_free_].tag = TRANS_TAG;\n  DVLOG(1) << \"Insert \" << next_free_ << \" \" << t;\n  LinkFree(score_fun_(t));\n  return res;\n}\n\nauto TxQueue::Insert(uint64_t val) -> Iterator {\n  if (next_free_ >= vec_.size()) {\n    Grow();\n  }\n  DCHECK_LT(next_free_, vec_.size());\n\n  Iterator res = next_free_;\n\n  vec_[next_free_].u.uval = val;\n  vec_[next_free_].tag = UINT_TAG;\n\n  LinkFree(val);\n  return res;\n}\n\nvoid TxQueue::LinkFree(uint64_t weight) {\n  uint32_t taken = next_free_;\n  next_free_ = vec_[taken].next;\n\n  if (size_ == 0) {\n    head_ = taken;\n    vec_[head_].next = vec_[head_].prev = head_;\n  } else {\n    uint32_t cur = vec_[head_].prev;\n    while (true) {\n      if (Rank(vec_[cur]) < weight) {\n        Link(cur, taken);\n        break;\n      }\n      if (cur == head_) {\n        Link(vec_[head_].prev, taken);\n        head_ = taken;\n        break;\n      }\n      cur = vec_[cur].prev;\n    }\n  }\n  ++size_;\n}\n\nvoid TxQueue::Grow() {\n  size_t start = vec_.size();\n  DVLOG(1) << \"Grow from \" << start << \" to \" << start * 2;\n\n  vec_.resize(start * 2);\n  for (size_t i = start; i < vec_.size(); ++i) {\n    vec_[i].next = i + 1;\n  }\n}\n\nvoid TxQueue::Remove(Iterator it) {\n  DCHECK_GT(size_, 0u);\n  DCHECK_LT(it, vec_.size());\n  DCHECK_NE(FREE_TAG, vec_[it].tag);\n\n  DVLOG(1) << \"Remove \" << it << \" \" << vec_[it].u.trans;\n  Iterator next = kEnd;\n  if (size_ > 1) {\n    Iterator prev = vec_[it].prev;\n    next = vec_[it].next;\n\n    vec_[prev].next = next;\n    vec_[next].prev = prev;\n  }\n  --size_;\n  vec_[it].next = next_free_;\n  vec_[it].tag = FREE_TAG;\n  next_free_ = it;\n  if (head_ == it) {\n    head_ = next;\n  }\n}\n\nuint64_t TxQueue::Rank(const QRecord& r) const {\n  switch (r.tag) {\n    case UINT_TAG:\n      return r.u.uval;\n    case TRANS_TAG:\n      return score_fun_(r.u.trans);\n  }\n  return 0;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/tx_queue.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <cstdint>\n#include <functional>\n#include <variant>\n#include <vector>\n\nnamespace dfly {\n\nclass Transaction;\n\n// TxQueue implemmented as a circular doubly-linked list.\nclass TxQueue {\n  void Link(uint32_t p, uint32_t n) {\n    uint32_t next = vec_[p].next;\n    vec_[n].next = next;\n    vec_[n].prev = p;\n    vec_[p].next = n;\n    vec_[next].prev = n;\n  }\n\n public:\n  // uint64_t is used for unit-tests.\n  using ValueType = std::variant<Transaction*, uint64_t>;\n  using Iterator = uint32_t;\n  enum { kEnd = Iterator(-1) };\n\n  TxQueue(std::function<uint64_t(const Transaction*)> score_fun = nullptr);\n\n  // returns iterator to that item the list\n  Iterator Insert(Transaction* t);\n\n  Iterator Insert(uint64_t val);\n  void Remove(Iterator);\n\n  ValueType At(Iterator it) const {\n    switch (vec_[it].tag) {\n      case TRANS_TAG:\n        return vec_[it].u.trans;\n      case UINT_TAG:\n        return vec_[it].u.uval;\n    }\n    return 0u;\n  }\n\n  ValueType Front() const {\n    return At(head_);\n  }\n\n  void PopFront() {\n    Remove(head_);\n  }\n\n  size_t size() const {\n    return size_;\n  }\n\n  bool Empty() const {\n    return size_ == 0;\n  }\n\n  //! returns the score of the tail record. Can be called only if !Empty().\n  uint64_t TailScore() const {\n    return Rank(vec_[vec_[head_].prev]);\n  }\n\n  //! returns the score of the head record. Can be called only if !Empty().\n  uint64_t HeadScore() const {\n    return Rank(vec_[head_]);\n  }\n\n  //! Can be called only if !Empty().\n  Iterator Head() const {\n    return head_;\n  }\n\n  // Returns the next iterator, it's circular so it always returns a valid\n  // iterator. Can be called only if !Empty().\n  Iterator Next(Iterator it) const {\n    return vec_[it].next;\n  }\n\n private:\n  enum { TRANS_TAG = 0, UINT_TAG = 11, FREE_TAG = 12 };\n\n  void Grow();\n  void LinkFree(uint64_t rank);\n\n  struct QRecord {\n    union {\n      Transaction* trans;\n      uint64_t uval;\n    } u;\n\n    uint32_t tag : 8;\n    uint32_t next : 24;\n    uint32_t prev;\n\n    QRecord() : tag(FREE_TAG), prev(kEnd) {\n    }\n  };\n\n  static_assert(sizeof(QRecord) == 16, \"\");\n\n  uint64_t Rank(const QRecord& r) const;\n\n  std::function<uint64_t(const Transaction*)> score_fun_;\n  std::vector<QRecord> vec_;\n  uint32_t next_free_ = 0, head_ = kEnd;\n  size_t size_ = 0;\n\n  TxQueue(const TxQueue&) = delete;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/core/zstd_test.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/base/macros.h>\n#include <gmock/gmock.h>\n#include <zstd.h>\n\n#include <random>\n\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nconstexpr unsigned kLevel = 1;\n\nclass ZStdTest : public ::testing::Test {\n protected:\n  string Compress(const string& src, const ZSTD_CDict* cdict) {\n    ZSTD_CCtx* cctx = ZSTD_createCCtx();\n    size_t c_buffer_size = ZSTD_compressBound(src.size());\n    string res(c_buffer_size, '\\0');\n    size_t compressed_size =\n        ZSTD_compress_usingCDict(cctx, res.data(), c_buffer_size, src.c_str(), src.size(), cdict);\n\n    ZSTD_freeCCtx(cctx);\n    res.resize(compressed_size);\n    return res;\n  }\n\n  string Decompress(const string& src, const ZSTD_DDict* ddict, size_t decompressed_size) {\n    string res(decompressed_size, '\\0');\n    ZSTD_DCtx* dctx = ZSTD_createDCtx();\n    size_t decompressed_size_actual = ZSTD_decompress_usingDDict(\n        dctx, res.data(), decompressed_size, src.c_str(), src.size(), ddict);\n    CHECK_EQ(decompressed_size, decompressed_size_actual);\n    ZSTD_freeDCtx(dctx);\n    return res;\n  }\n\n  string CompressNoDict(const string& src) {\n    ZSTD_CCtx* cctx = ZSTD_createCCtx();\n    size_t c_buffer_size = ZSTD_compressBound(src.size());\n    string res(c_buffer_size, '\\0');\n    size_t compressed_size =\n        ZSTD_compressCCtx(cctx, res.data(), c_buffer_size, src.c_str(), src.size(), kLevel);\n    ZSTD_freeCCtx(cctx);\n    res.resize(compressed_size);\n    return res;\n  }\n};\n\n// Dictionary works well for small messages where we do not have enough data to reference\n// previous stream to have significant savings.\n// For large messages, it may not be less beneficial.\nTEST_F(ZStdTest, Dict) {\n  const char* kRandomPieces[] = {\"ABCD\", \"EFGH\", \"IJKL\", \"MNOP\", \"QRST\", \"UVWX\", \"YZAB\", \"CDEF\"};\n  string dict_source;\n  random_device rd;\n\n  for (unsigned i = 0; i < 1000; ++i) {\n    dict_source += kRandomPieces[rd() % ABSL_ARRAYSIZE(kRandomPieces)];\n  }\n  LOG(INFO) << \"Creating CDICT from \" << dict_source.size() << \" bytes of random data\";\n  ZSTD_CDict* cdict = ZSTD_createCDict(dict_source.data(), dict_source.size(), 7);\n  ASSERT_TRUE(cdict);\n  size_t actual_dict_size = ZSTD_sizeof_CDict(cdict);\n  LOG(INFO) << \"ZSTD_CDict created, size: \" << actual_dict_size << \" bytes\";\n\n  ZSTD_DDict* ddict = ZSTD_createDDict(dict_source.data(), dict_source.size());\n  ASSERT_TRUE(ddict);\n  size_t actual_ddict_size = ZSTD_sizeof_DDict(ddict);\n  LOG(INFO) << \"ZSTD_DDict created, size: \" << actual_ddict_size << \" bytes\";\n\n  // 3. Data to compress\n  std::string data_to_compress;\n  for (unsigned j = 0; j < 30; ++j) {\n    data_to_compress += kRandomPieces[rd() % ABSL_ARRAYSIZE(kRandomPieces)];\n  }\n  size_t data_to_compress_size = data_to_compress.size();\n\n  // 4. Compress data\n  string compressed = Compress(data_to_compress, cdict);\n\n  LOG(INFO) << \"Compressed data size: \" << compressed.size() << \" bytes vs \"\n            << data_to_compress_size << \" bytes of original data\";\n\n  string compress_no_dict = CompressNoDict(data_to_compress);\n  LOG(INFO) << \"Compressed data size without dict: \" << compress_no_dict.size() << \" bytes\";\n\n  // 5. Decompress data\n  string decompressed = Decompress(compressed, ddict, data_to_compress_size);\n  ASSERT_EQ(data_to_compress, decompressed);\n\n  // 7. Free memory\n  ZSTD_freeCDict(cdict);\n  ZSTD_freeDDict(ddict);\n}\n\n}  // namespace dfly"
  },
  {
    "path": "src/external_libs.cmake",
    "content": "add_third_party(\n  lua\n  GIT_REPOSITORY https://github.com/dragonflydb/lua\n  GIT_TAG Dragonfly-5.4.6a\n  CONFIGURE_COMMAND echo\n  BUILD_IN_SOURCE 1\n  BUILD_COMMAND ${DFLY_TOOLS_MAKE} all\n  INSTALL_COMMAND cp <SOURCE_DIR>/liblua.a ${THIRD_PARTY_LIB_DIR}/lua/lib/\n  COMMAND cp <SOURCE_DIR>/lualib.h <SOURCE_DIR>/lua.h <SOURCE_DIR>/lauxlib.h\n          <SOURCE_DIR>/luaconf.h ${THIRD_PARTY_LIB_DIR}/lua/include\n)\n\n\nif (APPLE OR ${CMAKE_SYSTEM_NAME} MATCHES \"FreeBSD\")\n  set(SED_REPL sed \"-i\" '')\nelse()\n  set(SED_REPL sed \"-i\")\nendif()\n\nadd_third_party(\n  dconv\n  GIT_REPOSITORY https://github.com/google/double-conversion\n  # URL https://github.com/google/double-conversion/archive/refs/tags/v3.3.1.tar.gz\n  GIT_TAG 0604b4c\n  PATCH_COMMAND ${SED_REPL} \"/static const std::ctype/d\"\n                <SOURCE_DIR>/double-conversion/string-to-double.cc\n  COMMAND ${SED_REPL} \"/std::use_facet</d\" <SOURCE_DIR>/double-conversion/string-to-double.cc\n  COMMAND ${SED_REPL} \"s/cType.tolower/std::tolower/g\" <SOURCE_DIR>/double-conversion/string-to-double.cc\n  LIB libdouble-conversion.a\n)\n\nadd_third_party(\n  reflex\n  URL https://github.com/Genivia/RE-flex/archive/refs/tags/v5.2.2.tar.gz\n  PATCH_COMMAND autoreconf -fi\n  CONFIGURE_COMMAND <SOURCE_DIR>/configure --disable-avx2 --prefix=${THIRD_PARTY_LIB_DIR}/reflex\n          CXX=${CMAKE_CXX_COMPILER} CC=${CMAKE_C_COMPILER}\n)\n\nset(REFLEX \"${THIRD_PARTY_LIB_DIR}/reflex/bin/reflex\")\n\nadd_third_party(\n  jsoncons\n  GIT_REPOSITORY https://github.com/dragonflydb/jsoncons\n  GIT_TAG Dragonfly1.5.0\n  GIT_SHALLOW 1\n  CMAKE_PASS_FLAGS \"-DJSONCONS_BUILD_TESTS=OFF -DJSONCONS_HAS_POLYMORPHIC_ALLOCATOR=ON\"\n  LIB \"none\"\n)\n\nadd_third_party(\n  lz4\n  URL https://github.com/lz4/lz4/archive/refs/tags/v1.10.0.tar.gz\n\n  BUILD_IN_SOURCE 1\n  CONFIGURE_COMMAND echo skip\n  BUILD_COMMAND ${DFLY_TOOLS_MAKE} lib-release\n  INSTALL_COMMAND ${DFLY_TOOLS_MAKE} install BUILD_SHARED=no PREFIX=${THIRD_PARTY_LIB_DIR}/lz4\n)\n\nset(MIMALLOC_ROOT_DIR ${THIRD_PARTY_LIB_DIR}/mimalloc2)\nset(MIMALLOC_INCLUDE_DIR ${MIMALLOC_ROOT_DIR}/include)\nset(MIMALLOC_PATCH_DIR ${CMAKE_CURRENT_LIST_DIR}/../patches/mimalloc-v2.2.4)\nset(MIMALLOC_C_FLAGS \"-O3 -g -DMI_STAT=1 -DNDEBUG\")\nfile(MAKE_DIRECTORY ${MIMALLOC_INCLUDE_DIR})\n\nExternalProject_Add(mimalloc2_project\n  URL https://github.com/microsoft/mimalloc/archive/refs/tags/v2.2.4.tar.gz\n  DOWNLOAD_DIR ${THIRD_PARTY_DIR}/mimalloc2\n  SOURCE_DIR ${THIRD_PARTY_DIR}/mimalloc2\n  # INSTALL_DIR ${MIMALLOC_ROOT_DIR}\n  UPDATE_COMMAND \"\"\n\n  PATCH_COMMAND\n      patch -p1 -d ${THIRD_PARTY_DIR}/mimalloc2/ -i ${MIMALLOC_PATCH_DIR}/0_base.patch\n      COMMAND patch -p1 -d ${THIRD_PARTY_DIR}/mimalloc2/ -i ${MIMALLOC_PATCH_DIR}/1_add_stat_type.patch\n      COMMAND patch -p1 -d ${THIRD_PARTY_DIR}/mimalloc2/ -i ${MIMALLOC_PATCH_DIR}/2_return_stat.patch\n      COMMAND patch -p1 -d ${THIRD_PARTY_DIR}/mimalloc2/ -i ${MIMALLOC_PATCH_DIR}/3_track_full_size.patch\n      COMMAND patch -p1 -d ${THIRD_PARTY_DIR}/mimalloc2/ -i ${MIMALLOC_PATCH_DIR}/4_fix_heap_collect.patch\n  BUILD_COMMAND make mimalloc-static\n\n  INSTALL_COMMAND make install\n  # Copy internal types like mi_page_usage_stats_s and mi_heap_s\n  COMMAND cp -r <SOURCE_DIR>/include/mimalloc ${MIMALLOC_INCLUDE_DIR}/\n\n  LOG_INSTALL ON\n  LOG_DOWNLOAD ON\n  LOG_CONFIGURE ON\n  LOG_BUILD ON\n  LOG_PATCH ON\n  LOG_UPDATE ON\n  DOWNLOAD_EXTRACT_TIMESTAMP YES\n\n  CMAKE_GENERATOR \"Unix Makefiles\"\n\n  # Add -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS=-O0 to debug, and set BUILD_BYPRODUCTS to\n  # libmimalloc-debug.a\n\n  BUILD_BYPRODUCTS ${MIMALLOC_ROOT_DIR}/lib/libmimalloc.a\n\n  CMAKE_ARGS -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY:PATH=${MIMALLOC_ROOT_DIR}/lib\n        -DCMAKE_LIBRARY_OUTPUT_DIRECTORY:PATH=${MIMALLOC_ROOT_DIR}/lib\n        -DCMAKE_BUILD_TYPE:STRING=Release\n        -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER}\n        -DMI_INSTALL_TOPLEVEL=ON\n        -DMI_OVERRIDE=OFF\n        -DMI_NO_PADDING=ON\n        -DMI_BUILD_TESTS=OFF\n        -DMI_BUILD_SHARED=OFF\n        -DMI_BUILD_OBJECT=OFF\n        -DCMAKE_C_FLAGS=${MIMALLOC_C_FLAGS}\n        -DCMAKE_INSTALL_PREFIX:PATH=${MIMALLOC_ROOT_DIR}\n)\n\nadd_library(TRDP::mimalloc2 STATIC IMPORTED)\nadd_dependencies(TRDP::mimalloc2 mimalloc2_project)\nset_target_properties(TRDP::mimalloc2 PROPERTIES IMPORTED_LOCATION ${MIMALLOC_ROOT_DIR}/lib/libmimalloc.a\n                      INTERFACE_INCLUDE_DIRECTORIES ${MIMALLOC_ROOT_DIR}/include)\n\nadd_third_party(\n  croncpp\n  URL https://github.com/mariusbancila/croncpp/archive/refs/tags/v2023.03.30.tar.gz\n  LIB \"none\"\n)\n\nif (WITH_SEARCH)\n  add_third_party(\n    uni-algo\n    URL https://github.com/uni-algo/uni-algo/archive/refs/tags/v1.0.0.tar.gz\n\n    CMAKE_PASS_FLAGS \"-DCMAKE_CXX_STANDARD:STRING=20\"\n  )\n\n  add_third_party(\n    hnswlib\n    GIT_REPOSITORY https://github.com/dragonflydb/hnswlib.git\n    # HEAD of dragonfly branch\n    GIT_TAG d07dd1da2bf48b85d2f03b8396193ad7120f75c2\n\n    BUILD_COMMAND echo SKIP\n    INSTALL_COMMAND cp -R <SOURCE_DIR>/hnswlib ${THIRD_PARTY_LIB_DIR}/hnswlib/include/\n    LIB \"none\"\n  )\nendif()\n\nadd_third_party(\n  fast_float\n  URL https://github.com/fastfloat/fast_float/archive/refs/tags/v5.2.0.tar.gz\n  LIB \"none\"\n)\n\nadd_third_party(\n  flatbuffers\n  URL https://github.com/google/flatbuffers/archive/refs/tags/v23.5.26.tar.gz\n  CMAKE_PASS_FLAGS \"-DFLATBUFFERS_BUILD_TESTS=OFF -DFLATBUFFERS_LIBCXX_WITH_CLANG=OFF\n                    -DFLATBUFFERS_BUILD_FLATC=OFF\"\n)\n\nadd_third_party(\n  hdr_histogram\n  GIT_REPOSITORY https://github.com/HdrHistogram/HdrHistogram_c/\n  GIT_TAG 652d51bcc36744fd1a6debfeb1a8a5f58b14022c\n  CMAKE_PASS_FLAGS \"-DHDR_LOG_REQUIRED=OFF -DHDR_HISTOGRAM_BUILD_PROGRAMS=OFF\n                    -DHDR_HISTOGRAM_INSTALL_SHARED=OFF\"\n  LIB libhdr_histogram_static.a\n)\n\nif(WITH_SIMSIMD)\n  # Compute integer macros for native half-precision support.\n  set(SIMSIMD_NATIVE_F16_VAL 0)\n  set(SIMSIMD_NATIVE_BF16_VAL 0)\n  if(SIMSIMD_NATIVE_F16)\n    set(SIMSIMD_NATIVE_F16_VAL 1)\n    set(SIMSIMD_NATIVE_BF16_VAL 1)\n  endif()\n\n  # Build statically via add_third_party using the C shim with dynamic dispatch.\n  add_third_party(\n    simsimd\n    URL https://github.com/ashvardanian/SimSIMD/archive/refs/tags/v6.5.3.tar.gz\n    BUILD_IN_SOURCE 1\n    CONFIGURE_COMMAND echo skip\n    BUILD_COMMAND bash -c \"\\\n      mkdir -p ${THIRD_PARTY_LIB_DIR}/simsimd/lib && \\\n      ${CMAKE_C_COMPILER} -O3 -fPIC -DNDEBUG \\\n        -DSIMSIMD_DYNAMIC_DISPATCH=1 \\\n        -DSIMSIMD_NATIVE_F16=${SIMSIMD_NATIVE_F16_VAL} \\\n        -DSIMSIMD_NATIVE_BF16=${SIMSIMD_NATIVE_BF16_VAL} \\\n        -I<SOURCE_DIR>/include -c <SOURCE_DIR>/c/lib.c -o <SOURCE_DIR>/lib.o && \\\n      ar rcs <SOURCE_DIR>/libsimsimd.a <SOURCE_DIR>/lib.o\"\n    INSTALL_COMMAND bash -c \"\\\n      mkdir -p ${THIRD_PARTY_LIB_DIR}/simsimd/include ${THIRD_PARTY_LIB_DIR}/simsimd/lib && \\\n      cp -R <SOURCE_DIR>/include/* ${THIRD_PARTY_LIB_DIR}/simsimd/include/ && \\\n      cp <SOURCE_DIR>/libsimsimd.a ${THIRD_PARTY_LIB_DIR}/simsimd/lib/\"\n    LIB libsimsimd.a\n  )\nendif()\n\n\nadd_library(TRDP::jsoncons INTERFACE IMPORTED)\nadd_dependencies(TRDP::jsoncons jsoncons_project)\nset_target_properties(TRDP::jsoncons PROPERTIES\n                      INTERFACE_INCLUDE_DIRECTORIES \"${JSONCONS_INCLUDE_DIR}\")\n\nadd_library(TRDP::croncpp INTERFACE IMPORTED)\nadd_dependencies(TRDP::croncpp croncpp_project)\nset_target_properties(TRDP::croncpp PROPERTIES\n                      INTERFACE_INCLUDE_DIRECTORIES \"${CRONCPP_INCLUDE_DIR}\")\n\nif (WITH_SEARCH)\n  add_library(TRDP::hnswlib INTERFACE IMPORTED)\n  add_dependencies(TRDP::hnswlib hnswlib_project)\n  set_target_properties(TRDP::hnswlib PROPERTIES\n                        INTERFACE_INCLUDE_DIRECTORIES \"${HNSWLIB_INCLUDE_DIR}\")\nendif()\n\nadd_library(TRDP::fast_float INTERFACE IMPORTED)\nadd_dependencies(TRDP::fast_float fast_float_project)\nset_target_properties(TRDP::fast_float PROPERTIES\n                      INTERFACE_INCLUDE_DIRECTORIES \"${FAST_FLOAT_INCLUDE_DIR}\")\n"
  },
  {
    "path": "src/facade/CMakeLists.txt",
    "content": "add_library(dfly_parser_lib redis_parser.cc resp_expr.cc resp_parser.cc\n            resp_srv_parser.cc)\ncxx_link(dfly_parser_lib base strings_lib redis_lib)\n\nadd_library(dfly_facade dragonfly_listener.cc dragonfly_connection.cc facade.cc\n            memcache_parser.cc reply_builder.cc op_status.cc parsed_command.cc service_interface.cc\n            reply_capture.cc cmd_arg_parser.cc tls_helpers.cc socket_utils.cc disk_backed_queue.cc)\n\nif (DF_USE_SSL)\n  set(TLS_LIB tls_lib)\n  target_compile_definitions(dfly_facade PRIVATE DFLY_USE_SSL)\nendif()\n\ncxx_link(dfly_facade dfly_parser_lib http_server_lib fibers2\n         ${TLS_LIB} TRDP::mimalloc2 TRDP::dconv redis_lib)\n\nadd_library(facade_test facade_test.cc resp_expr_test_utils.cc)\ncxx_link(facade_test dfly_facade gtest_main_ext)\n\nhelio_cxx_test(memcache_parser_test dfly_facade LABELS DFLY)\nhelio_cxx_test(redis_parser_test facade_test LABELS DFLY)\nhelio_cxx_test(resp_srv_parser_test facade_test LABELS DFLY)\nhelio_cxx_test(reply_builder_test facade_test LABELS DFLY)\nhelio_cxx_test(resp_parser_test facade_test LABELS DFLY)\nhelio_cxx_test(cmd_arg_parser_test facade_test LABELS DFLY)\nhelio_cxx_test(disk_backed_queue_test facade_test LABELS DFLY)\n\nadd_executable(ok_backend ok_main.cc)\ncxx_link(ok_backend dfly_facade)\n\nadd_executable(resp_validator resp_validator.cc)\ncxx_link(resp_validator dfly_parser_lib)\n"
  },
  {
    "path": "src/facade/README.md",
    "content": "## A facade library\n\nThe library is responsible for opening dragonfly-like TCP client connections.\nI call it facade because \"client\" term is often abused.\n\nIt should be separated from the rest of dragonfly server logic and should be self-contained, i.e\nno redis-lib or server dependencies are allowed.\n\n"
  },
  {
    "path": "src/facade/cmd_arg_parser.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/cmd_arg_parser.h\"\n\n#include <absl/strings/ascii.h>\n\n#include \"base/logging.h\"\n#include \"facade/error.h\"\n\nnamespace facade {\n\nvoid CmdArgParser::ExpectTag(std::string_view tag) {\n  if (cur_i_ >= args_.size()) {\n    Report(OUT_OF_BOUNDS, cur_i_);\n    return;\n  }\n\n  auto idx = cur_i_++;\n  auto val = ToSV(args_[idx]);\n  if (!absl::EqualsIgnoreCase(val, tag)) {\n    Report(INVALID_NEXT, idx);\n  }\n}\n\nCmdArgParser::ErrorInfo CmdArgParser::TakeError() {\n  return std::exchange(error_, {});\n}\n\nErrorReply CmdArgParser::ErrorInfo::MakeReply() const {\n  DCHECK(operator bool());\n  switch (type) {\n    case INVALID_INT:\n      return ErrorReply{kInvalidIntErr};\n    case INVALID_FLOAT:\n      return ErrorReply{kInvalidFloatErr};\n    default:\n      return ErrorReply{kSyntaxErr};\n  };\n  return ErrorReply{kSyntaxErr};\n}\n\nCmdArgParser::~CmdArgParser() {\n  DCHECK(!error_) << \"Parsing error occured but not checked\";\n  // TODO DCHECK(!HasNext()) << \"Not all args were processed\";\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/cmd_arg_parser.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/strings/match.h>\n#include <absl/strings/numbers.h>\n\n#include <optional>\n#include <string_view>\n#include <utility>\n\n#include \"facade/facade_types.h\"\n\nnamespace facade {\n\n// Helper class for numerical range restriction during parsing\ntemplate <auto min, auto max> struct FInt {\n  decltype(min) value = {};\n  operator decltype(min)() {\n    return value;\n  }\n\n  static_assert(std::is_same_v<decltype(min), decltype(max)>, \"inconsistent types\");\n  static constexpr auto kMin = min;\n  static constexpr auto kMax = max;\n};\n\ntemplate <class T> constexpr bool is_fint = false;\n\ntemplate <auto min, auto max> constexpr bool is_fint<FInt<min, max>> = true;\n\n// Utility class for easily parsing command options from argument lists.\nstruct CmdArgParser {\n  enum ErrorType {\n    NO_ERROR,\n    OUT_OF_BOUNDS,\n    SHORT_OPT_TAIL,\n    INVALID_INT,\n    INVALID_FLOAT,\n    INVALID_CASES,\n    INVALID_NEXT,\n    UNPROCESSED,\n    CUSTOM_ERROR  // should be the last one\n  };\n\n  struct ErrorInfo {\n    int type = NO_ERROR;\n    size_t index = 0;\n\n    operator bool() const {\n      return type != ErrorType::NO_ERROR;\n    }\n    ErrorReply MakeReply() const;\n  };\n\n public:\n  CmdArgParser(ArgSlice args) : args_{args} {\n  }\n\n  // Debug asserts sure error was consumed\n  ~CmdArgParser();\n\n  // Get next value without consuming it\n  std::string_view Peek() {\n    return SafeSV(cur_i_);\n  }\n\n  // Consume next value\n  template <class T = std::string_view, class... Ts> auto Next() {\n    if (cur_i_ + sizeof...(Ts) >= args_.size()) {\n      Report(OUT_OF_BOUNDS, cur_i_);\n      return std::conditional_t<sizeof...(Ts) == 0, T, std::tuple<T, Ts...>>();\n    }\n\n    if constexpr (sizeof...(Ts) == 0) {\n      auto idx = cur_i_++;\n      return Convert<T>(idx);\n    } else {\n      std::tuple<T, Ts...> res;\n      NextImpl<0>(&res);\n      cur_i_ += sizeof...(Ts) + 1;\n      return res;\n    }\n  }\n\n  // returns next value if exists or default value\n  template <class T = std::string_view> auto NextOrDefault(T default_value = {}) {\n    return HasNext() ? Next<T>() : default_value;\n  }\n\n  // check next value ignoring case and consume it\n  void ExpectTag(std::string_view tag);\n\n  // Consume next value\n  template <class... Cases> auto MapNext(Cases&&... cases) {\n    if (cur_i_ >= args_.size()) {\n      Report(OUT_OF_BOUNDS, cur_i_);\n      return typename decltype(MapImpl(std::string_view(),\n                                       std::forward<Cases>(cases)...))::value_type{};\n    }\n\n    auto idx = cur_i_++;\n    auto res = MapImpl(SafeSV(idx), std::forward<Cases>(cases)...);\n    if (!res) {\n      Report(INVALID_CASES, idx);\n      return typename decltype(res)::value_type{};\n    }\n    return *res;\n  }\n\n  // Consume next value if can map it and return mapped result or return nullopt\n  template <class... Cases>\n  auto TryMapNext(Cases&&... cases)\n      -> std::optional<std::tuple_element_t<1, std::tuple<Cases...>>> {\n    if (cur_i_ >= args_.size()) {\n      return std::nullopt;\n    }\n\n    auto res = MapImpl(SafeSV(cur_i_), std::forward<Cases>(cases)...);\n    cur_i_ = res ? cur_i_ + 1 : cur_i_;\n    return res;\n  }\n\n  // Check if the next value is equal to a specific tag. If equal, its consumed.\n  template <class... Args> bool Check(std::string_view tag, Args*... args) {\n    if (cur_i_ + sizeof...(Args) >= args_.size())\n      return false;\n\n    std::string_view arg = SafeSV(cur_i_);\n    if (!absl::EqualsIgnoreCase(arg, tag))\n      return false;\n\n    ((*args = Convert<Args>(++cur_i_)), ...);\n\n    ++cur_i_;\n\n    return true;\n  }\n\n  // Skip specified number of arguments\n  CmdArgParser& Skip(size_t n) {\n    if (cur_i_ + n > args_.size()) {\n      Report(OUT_OF_BOUNDS, cur_i_);\n    } else {\n      cur_i_ += n;\n    }\n    return *this;\n  }\n\n  // Expect no more arguments and return if no error has occured\n  bool Finalize() {\n    if (HasNext()) {\n      Report(UNPROCESSED, cur_i_);\n      return false;\n    }\n    return !HasError();\n  }\n\n  // Return remaining arguments\n  ArgSlice Tail() const {\n    return args_.subspan(cur_i_);\n  }\n\n  // Return true if arguments are left and no errors occured\n  bool HasNext() {\n    return cur_i_ < args_.size() && !error_;\n  }\n\n  bool HasError() const {\n    return bool(error_);\n  }\n\n  ErrorInfo TakeError();\n\n  bool HasAtLeast(size_t i) const {\n    return cur_i_ + i <= args_.size() && !error_;\n  }\n\n  size_t GetCurrentIndex() const {\n    return cur_i_;\n  }\n\n  // Custom error_type should start from CUSTOM_ERROR\n  void Report(int error_type) {\n    // we use previous index, because the check was done outside and it's done after element is\n    // processed\n    Report(error_type, cur_i_ - 1);\n  }\n\n private:\n  void Report(int error_type, size_t idx) {\n    if (!error_) {\n      error_ = {error_type, idx};\n      cur_i_ = args_.size();\n    }\n  }\n\n  template <class T, class... Cases>\n  std::optional<std::decay_t<T>> MapImpl(std::string_view arg, std::string_view tag, T&& value,\n                                         Cases&&... cases) {\n    if (absl::EqualsIgnoreCase(arg, tag))\n      return std::forward<T>(value);\n\n    if constexpr (sizeof...(cases) > 0)\n      return MapImpl(arg, cases...);\n\n    return std::nullopt;\n  }\n\n  template <size_t shift, class Tuple> void NextImpl(Tuple* t) {\n    std::get<shift>(*t) = Convert<std::tuple_element_t<shift, Tuple>>(cur_i_ + shift);\n    if constexpr (constexpr auto next = shift + 1; next < std::tuple_size_v<Tuple>)\n      NextImpl<next>(t);\n  }\n\n  template <class T> T Convert(size_t idx) {\n    static_assert(\n        std::is_arithmetic_v<T> || std::is_constructible_v<T, std::string_view> || is_fint<T>,\n        \"incorrect type\");\n    if constexpr (std::is_arithmetic_v<T>) {\n      return Num<T>(idx);\n    } else if constexpr (std::is_constructible_v<T, std::string_view>) {\n      return static_cast<T>(SafeSV(idx));\n    } else if constexpr (is_fint<T>) {\n      return {ConvertFInt<T::kMin, T::kMax>(idx)};\n    }\n  }\n\n  template <auto min, auto max> FInt<min, max> ConvertFInt(size_t idx) {\n    auto res = Num<decltype(min)>(idx);\n    if (res < min || res > max) {\n      Report(INVALID_INT, idx);\n      return {};\n    }\n    return {res};\n  }\n\n  std::string_view SafeSV(size_t i) const {\n    using namespace std::literals::string_view_literals;\n    if (i >= args_.size())\n      return \"\"sv;\n    return args_[i].empty() ? \"\"sv : ToSV(args_[i]);\n  }\n\n  template <typename T> T Num(size_t idx) {\n    auto arg = SafeSV(idx);\n    T out;\n    if constexpr (std::is_same_v<T, float>) {\n      if (absl::SimpleAtof(arg, &out))\n        return out;\n    } else if constexpr (std::is_same_v<T, double>) {\n      if (absl::SimpleAtod(arg, &out))\n        return out;\n    } else if constexpr (std::is_integral_v<T> && sizeof(T) >= sizeof(int32_t)) {\n      if (absl::SimpleAtoi(arg, &out))\n        return out;\n    } else if constexpr (std::is_integral_v<T> && sizeof(T) < sizeof(int32_t)) {\n      int32_t tmp;\n      if (absl::SimpleAtoi(arg, &tmp)) {\n        out = tmp;  // out can not store the whole tmp\n        if (tmp == out)\n          return out;\n      }\n    }\n\n    if constexpr (std::is_floating_point_v<T>) {\n      Report(INVALID_FLOAT, idx);\n    } else {\n      Report(INVALID_INT, idx);\n    }\n    return {};\n  }\n\n private:\n  size_t cur_i_ = 0;\n  ArgSlice args_;\n\n  ErrorInfo error_;\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/cmd_arg_parser_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/cmd_arg_parser.h\"\n\n#include <absl/base/casts.h>\n#include <gmock/gmock.h>\n\n#include \"facade/memcache_parser.h\"\n\nusing namespace testing;\nusing namespace std;\n\nnamespace facade {\n\nclass CmdArgParserTest : public testing::Test {\n public:\n  CmdArgParser Make(absl::Span<const std::string_view> args) {\n    storage_.assign(args.begin(), args.end());\n    arg_vec_.clear();\n    for (auto& s : storage_)\n      arg_vec_.push_back(MutableSlice{s.data(), s.size()});\n    return CmdArgParser{absl::MakeSpan(arg_vec_)};\n  }\n\n private:\n  CmdArgVec arg_vec_;\n  std::vector<std::string> storage_;\n};\n\nTEST_F(CmdArgParserTest, BasicTypes) {\n  auto parser = Make({\"STRING\", \"VIEW\", \"11\", \"22\", \"33\", \"44\"});\n\n  EXPECT_TRUE(parser.HasNext());\n\n  EXPECT_EQ(parser.Next<string>(), \"STRING\"s);\n  EXPECT_EQ(parser.Next<string_view>(), \"VIEW\"sv);\n\n  EXPECT_EQ(parser.Next<size_t>(), 11u);\n  EXPECT_EQ(parser.Next<size_t>(), 22u);\n  auto [a, b] = parser.Next<size_t, size_t>();\n  EXPECT_EQ(a, 33u);\n  EXPECT_EQ(b, 44u);\n\n  EXPECT_FALSE(parser.HasNext());\n  EXPECT_FALSE(parser.HasError());\n}\n\nTEST_F(CmdArgParserTest, BoundError) {\n  auto parser = Make({});\n\n  EXPECT_EQ(absl::implicit_cast<string_view>(parser.Next()), \"\"sv);\n\n  auto err = parser.TakeError();\n  EXPECT_TRUE(err);\n  EXPECT_EQ(err.type, CmdArgParser::OUT_OF_BOUNDS);\n  EXPECT_EQ(err.index, 0);\n}\n\n#ifndef __APPLE__\nTEST_F(CmdArgParserTest, IntError) {\n  auto parser = Make({\"NOTANINT\"});\n\n  EXPECT_EQ(parser.Next<size_t>(), 0u);\n\n  auto err = parser.TakeError();\n  EXPECT_TRUE(err);\n  EXPECT_EQ(err.type, CmdArgParser::INVALID_INT);\n  EXPECT_EQ(err.index, 0);\n}\n#endif\n\nTEST_F(CmdArgParserTest, Check) {\n  auto parser = Make({\"TAG\", \"TAG_2\", \"22\"});\n\n  EXPECT_FALSE(parser.Check(\"NOT_TAG\"));\n  EXPECT_TRUE(parser.Check(\"TAG\"));\n\n  EXPECT_FALSE(parser.Check(\"NOT_TAG_2\"));\n  EXPECT_TRUE(parser.Check(\"TAG_2\"));\n  EXPECT_EQ(parser.Next<int>(), 22);\n}\n\nTEST_F(CmdArgParserTest, NextStatement) {\n  auto parser = Make({\"TAG\", \"tag_2\", \"tag_3\"});\n\n  parser.ExpectTag(\"TAG\");\n  EXPECT_FALSE(parser.TakeError());\n\n  parser.ExpectTag(\"TAG_2\");\n  EXPECT_FALSE(parser.TakeError());\n\n  parser.ExpectTag(\"TAG_2\");\n  EXPECT_TRUE(parser.TakeError());\n}\n\nTEST_F(CmdArgParserTest, CheckTailFail) {\n  auto parser = Make({\"TAG\", \"11\", \"22\", \"TAG\", \"text\"});\n\n  int first;\n  string_view second;\n  EXPECT_TRUE(parser.Check(\"TAG\", &first, &second));\n  EXPECT_EQ(first, 11);\n  EXPECT_EQ(second, \"22\");\n\n  EXPECT_FALSE(parser.Check(\"TAG\", &first, &second));\n  EXPECT_TRUE(parser.Check(\"TAG\", &first));\n  EXPECT_TRUE(parser.TakeError());\n}\n\nTEST_F(CmdArgParserTest, Map) {\n  auto parser = Make({\"TWO\", \"NONE\"});\n\n  EXPECT_EQ(parser.MapNext(\"ONE\", 1, \"TWO\", 2), 2);\n\n  EXPECT_EQ(parser.MapNext(\"ONE\", 1, \"TWO\", 2), 0);\n  auto err = parser.TakeError();\n  EXPECT_TRUE(err);\n  EXPECT_EQ(err.type, CmdArgParser::INVALID_CASES);\n  EXPECT_EQ(err.index, 1);\n}\n\nTEST_F(CmdArgParserTest, TryMapNext) {\n  auto parser = Make({\"TWO\", \"GREEN\"});\n\n  EXPECT_EQ(parser.TryMapNext(\"ONE\", 1, \"TWO\", 2), std::make_optional(2));\n\n  EXPECT_EQ(parser.TryMapNext(\"ONE\", 1, \"TWO\", 2), std::nullopt);\n  EXPECT_FALSE(parser.HasError());\n  EXPECT_EQ(parser.TryMapNext(\"green\", 1, \"yellow\", 2), std::make_optional(1));\n  EXPECT_FALSE(parser.HasError());\n}\n\nTEST_F(CmdArgParserTest, IgnoreCase) {\n  auto parser = Make({\"hello\", \"marker\", \"taail\", \"world\"});\n\n  EXPECT_EQ(absl::implicit_cast<string_view>(parser.Next()), \"hello\"sv);\n\n  EXPECT_TRUE(parser.Check(\"MARKER\"sv));\n  parser.Skip(1);\n\n  EXPECT_EQ(absl::implicit_cast<string_view>(parser.Next()), \"world\"sv);\n}\n\nTEST_F(CmdArgParserTest, FixedRangeInt) {\n  {\n    auto parser = Make({\"10\", \"-10\", \"12\"});\n\n    EXPECT_EQ((parser.Next<FInt<-11, 11>>().value), 10);\n    EXPECT_EQ((parser.Next<FInt<-11, 11>>().value), -10);\n    EXPECT_EQ((parser.Next<FInt<-11, 11>>().value), 0);\n\n    auto err = parser.TakeError();\n    EXPECT_TRUE(err);\n    EXPECT_EQ(err.type, CmdArgParser::INVALID_INT);\n    EXPECT_EQ(err.index, 2);\n  }\n\n  {\n    auto parser = Make({\"-12\"});\n    EXPECT_EQ((parser.Next<FInt<-11, 11>>().value), 0);\n\n    auto err = parser.TakeError();\n    EXPECT_TRUE(err);\n    EXPECT_EQ(err.type, CmdArgParser::INVALID_INT);\n    EXPECT_EQ(err.index, 0);\n  }\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/command_id.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <string>\n#include <string_view>\n\nnamespace facade {\n\nclass CommandId {\n public:\n  /**\n   * @brief Construct a new Command Id object\n   *\n   * When creating a new command use the https://github.com/redis/redis/tree/unstable/src/commands\n   * files to find the right arguments.\n   *\n   * @param name\n   * @param mask\n   * @param arity -     positive if command has fixed number of required arguments including\n   *                    the command, negative if command has minimum number of required arguments,\n   *                    but may have more.\n   * @param first_key - position of first key in argument list\n   * @param last_key  - position of last key in argument list,\n   *                    -1 means the last key index is (arg_length - 1), -2 means that the last key\n   * index is (arg_length - 2).\n   * @param acl_categories - bitfield for acl categories of the command\n   */\n  CommandId(const char* name, uint32_t mask, int8_t arity, int8_t first_key, int8_t last_key,\n            uint32_t acl_categories);\n\n  std::string_view name() const {\n    return name_;\n  }\n\n  int arity() const {\n    return arity_;\n  }\n\n  uint32_t opt_mask() const {\n    return opt_mask_;\n  }\n\n  int8_t first_key_pos() const {\n    return first_key_;\n  }\n\n  int8_t last_key_pos() const {\n    return last_key_;\n  }\n\n  uint32_t acl_categories() const {\n    return acl_categories_;\n  }\n\n  void SetFamily(size_t fam) {\n    family_ = fam;\n  }\n\n  void SetBitIndex(uint64_t bit) {\n    bit_index_ = bit;\n  }\n\n  size_t GetFamily() const {\n    return family_;\n  }\n\n  uint64_t GetBitIndex() const {\n    return bit_index_;\n  }\n\n  // Returns true if the command can only be used by admin connections, false\n  // otherwise.\n  bool IsRestricted() const {\n    return restricted_;\n  }\n\n  void SetRestricted(bool restricted) {\n    restricted_ = restricted;\n  }\n\n  void SetFlag(uint32_t flag) {\n    opt_mask_ |= flag;\n  }\n\n protected:\n  std::string name_;\n\n  uint32_t opt_mask_;\n  int8_t arity_;\n  int8_t first_key_;\n  int8_t last_key_;\n\n  // Acl categories\n  uint32_t acl_categories_;\n  // Acl commands indices\n  size_t family_;\n  uint64_t bit_index_;\n\n  // Whether the command can only be used by admin connections.\n  bool restricted_ = false;\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/conn_context.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_set.h>\n\n#include <string_view>\n\nnamespace facade {\n\nclass Connection;\n\nclass ConnectionContext {\n public:\n  explicit ConnectionContext(Connection* owner) : owner_(owner) {\n    conn_closing = false;\n    req_auth = false;\n    replica_conn = false;\n    authenticated = false;\n    async_dispatch = false;\n    sync_dispatch = false;\n    paused = false;\n    blocked = false;\n\n    subscriptions = 0;\n  }\n\n  virtual ~ConnectionContext() {\n  }\n\n  Connection* conn() {\n    return owner_;\n  }\n\n  const Connection* conn() const {\n    return owner_;\n  }\n\n  virtual size_t UsedMemory() const {\n    return 0;\n  }\n\n  // Noop.\n  virtual void Unsubscribe(std::string_view channel) {\n  }\n\n  // connection state / properties.\n  bool conn_closing : 1;\n  bool req_auth : 1;\n  bool replica_conn : 1;  // whether it's a replica connection on the master side.\n  bool authenticated : 1;\n  bool async_dispatch : 1;  // whether this connection is amid an async dispatch\n  bool sync_dispatch : 1;   // whether this connection is amid a sync dispatch\n\n  bool paused = false;  // whether this connection is paused due to CLIENT PAUSE\n  // whether it's blocked on blocking commands like BLPOP, needs to be addressable\n  bool blocked = false;\n\n  // How many async subscription sources are active: monitor and/or pubsub - at most 2.\n  uint8_t subscriptions;\n\n private:\n  Connection* owner_;\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/connection_ref.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <memory>\n\nnamespace facade {\n\nclass Connection;\n\n// Weak reference to a connection, invalidated upon connection close.\n// Used to dispatch async operations for the connection without worrying about pointer lifetime.\nstruct ConnectionRef {\n public:\n  // Get residing thread of connection. Thread-safe.\n  unsigned LastKnownThreadId() const {\n    return last_known_thread_id_;\n  }\n  // Get pointer to connection if still valid, nullptr if expired.\n  // Can only be called from connection's thread. Validity is guaranteed\n  // only until the next suspension point.\n  Connection* Get() const;\n\n  // Returns true if the reference expired. Thread-safe.\n  bool IsExpired() const;\n\n  // Returns client id.Thread-safe.\n  uint32_t GetClientId() const;\n\n  bool operator<(const ConnectionRef& other) const;\n  bool operator==(const ConnectionRef& other) const;\n\n private:\n  friend class Connection;\n\n  ConnectionRef(const std::shared_ptr<Connection>& ptr, unsigned thread_id, uint32_t client_id);\n\n  std::weak_ptr<Connection> ptr_;\n  unsigned last_known_thread_id_;\n  uint32_t client_id_;\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/disk_backed_queue.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n//\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/disk_backed_queue.h\"\n\n#include <absl/strings/str_cat.h>\n#include <fcntl.h>\n\n#include <cerrno>\n#include <cstring>\n#include <string>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_types.h\"\n#include \"io/io.h\"\n#include \"util/fibers/uring_file.h\"\n#include \"util/fibers/uring_proactor.h\"\n\nusing facade::operator\"\"_MB;\n\nABSL_FLAG(std::string, disk_backpressure_folder, \"/tmp/\",\n          \"Folder to store disk-backed connection backpressure\");\n\nABSL_FLAG(size_t, disk_backpressure_file_max_bytes, 50_MB,\n          \"Maximum size of the backing file. When max size is reached, connection will \"\n          \"stop offloading backpressure to disk and block on client read.\");\n\nnamespace facade {\n\nDiskBackedQueue::DiskBackedQueue(uint32_t conn_id)\n    : max_backing_size_(absl::GetFlag(FLAGS_disk_backpressure_file_max_bytes)), id_(conn_id) {\n}\n\nstd::error_code DiskBackedQueue::Init() {\n  std::string backing_name = absl::StrCat(absl::GetFlag(FLAGS_disk_backpressure_folder), id_);\n  // Open a single O_RDWR file so the same fd serves writes, reads, and fallocate punch holes.\n  // Kernel transparently handles buffering via the page cache.\n  auto res = util::fb2::OpenLinux(backing_name, O_RDWR | O_CREAT | O_TRUNC | O_CLOEXEC, 0600);\n  if (!res) {\n    return res.error();\n  }\n  file_ = std::move(*res);\n\n  VLOG(3) << \"Created backing for connection \" << this << \" \" << backing_name;\n\n  return {};\n}\n\nDiskBackedQueue::~DiskBackedQueue() {\n  DCHECK_EQ(in_flight_callbacks_, 0ul);\n}\n\nstd::error_code DiskBackedQueue::Close() {\n  if (file_) {\n    auto ec = file_->Close();\n    LOG_IF(WARNING, ec) << ec.message();\n\n    std::string backing = absl::StrCat(absl::GetFlag(FLAGS_disk_backpressure_folder), id_);\n    int errc = unlink(backing.c_str());\n    LOG_IF(ERROR, errc != 0) << \"Failed to unlink backing file: \"\n                             << std::error_code{errc, std::system_category()};\n    return ec;\n  }\n\n  return {};\n}\n\n// Check if backing file is empty, i.e. backing file has 0 bytes.\nbool DiskBackedQueue::Empty() const {\n  return total_backing_bytes_ == 0;\n}\n\nbool DiskBackedQueue::HasEnoughBackingSpaceFor(size_t bytes) const {\n  return (bytes + total_backing_bytes_) < max_backing_size_;\n}\n\nvoid DiskBackedQueue::MaybePunchHole() {\n  // Punch holes over the aligned region we have fully read past so the OS can reclaim pages.\n  // Both offset and length must be multiples of the filesystem block size: XFS returns EINVAL\n  // otherwise, and ext4/tmpfs only zero partial blocks rather than freeing them.\n  // We assume 4096-byte blocks (correct for virtually all deployments); a fully robust\n  // implementation would query the actual block size via fstatfs(file_->GetFd(), &fsst) and\n  // align to fsst.f_bsize instead.\n  const size_t aligned_end = (next_read_offset_ / 4096) * 4096;\n  if (aligned_end > punch_offset_) {\n    int res = fallocate(file_->GetFd(), FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, punch_offset_,\n                        aligned_end - punch_offset_);\n    DCHECK_EQ(res, 0) << \"fallocate punch failed: \" << strerror(errno);\n    punch_offset_ = aligned_end;\n  }\n}\n\nvoid DiskBackedQueue::PushAsync(io::Bytes bytes, AsyncPushCallback cb) {\n  const size_t offset = write_offset_;\n  const size_t size = bytes.size();\n  ++in_flight_callbacks_;\n\n  file_->WriteAsync(bytes, offset, [this, size, cb = std::move(cb)](int res) {\n    --in_flight_callbacks_;\n    if (res < 0) {\n      std::error_code ec{-res, std::system_category()};\n      VLOG(2) << \"Failed to offload blob of size \" << size << \" to backing with error: \" << ec;\n      cb(ec);\n      return;\n    }\n\n    write_offset_ += size;\n    total_backing_bytes_ += size;\n    VLOG(2) << \"Offload connection \" << this << \" backpressure of \" << size;\n    cb({});\n  });\n}\n\nvoid DiskBackedQueue::PopAsync(io::MutableBytes out, AsyncPopCallback cb) {\n  const size_t to_read = std::min(total_backing_bytes_, out.size());\n  const size_t offset = next_read_offset_;\n  ++in_flight_callbacks_;\n\n  // Capture a subset of out for the actual read size\n  io::MutableBytes read_buf = out.subspan(0, to_read);\n\n  file_->ReadAsync(read_buf, offset, [this, to_read, offset, cb = std::move(cb)](int res) {\n    --in_flight_callbacks_;\n    if (res < 0) {\n      std::error_code ec{-res, std::system_category()};\n      LOG(ERROR) << \"Could not load item at offset \" << offset << \" of size \" << to_read\n                 << \" from disk with error: \" << ec.value() << \" \" << ec.message();\n      cb(nonstd::make_unexpected(ec));\n      return;\n    }\n\n    size_t bytes_read = static_cast<size_t>(res);\n    next_read_offset_ += bytes_read;\n    total_backing_bytes_ -= bytes_read;\n\n    VLOG(2) << \"Loaded item with offset \" << offset << \" of size \" << bytes_read\n            << \" for connection \" << this;\n\n    MaybePunchHole();\n\n    cb(bytes_read);\n  });\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/disk_backed_queue.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <functional>\n#include <memory>\n#include <string_view>\n#include <system_error>\n\n#include \"io/io.h\"\n#include \"util/fibers/uring_file.h\"\n\nnamespace facade {\n\nclass DiskBackedQueue {\n public:\n  explicit DiskBackedQueue(uint32_t conn_id);\n  ~DiskBackedQueue();\n\n  std::error_code Init();\n\n  // Check if we can offload bytes to backing file.\n  bool HasEnoughBackingSpaceFor(size_t bytes) const;\n\n  using AsyncPushCallback = std::function<void(std::error_code)>;\n\n  void PushAsync(io::Bytes bytes, AsyncPushCallback cb);\n\n  using AsyncPopCallback = std::function<void(io::Result<size_t>)>;\n\n  // Async read variant. Callback is invoked with Result containing bytes read or error.\n  void PopAsync(io::MutableBytes out, AsyncPopCallback cb);\n\n  // Check if backing file is empty, i.e. backing file has 0 bytes.\n  bool Empty() const;\n\n  std::error_code Close();\n\n private:\n  // Punch holes over the aligned region we have fully read past so the OS can reclaim pages.\n  void MaybePunchHole();\n\n  // Single O_RDWR file used for both writes and reads, avoiding a separate fd for fallocate.\n  std::unique_ptr<util::fb2::LinuxFile> file_;\n\n  size_t write_offset_ = 0;\n  size_t total_backing_bytes_ = 0;\n  size_t next_read_offset_ = 0;\n  // Tracks how far into the file holes have been punched (always 4096-aligned).\n  size_t punch_offset_ = 0;\n\n  // Read only constants\n  const size_t max_backing_size_ = 0;\n\n  // same as connection id. Used to uniquely identify the backed file\n  const size_t id_ = 0;\n  size_t in_flight_callbacks_ = 0;\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/disk_backed_queue_test.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/disk_backed_queue.h\"\n\n#include <absl/strings/str_cat.h>\n#include <fcntl.h>\n#include <gmock/gmock.h>\n#include <unistd.h>\n\n#include <memory>\n#include <string>\n#include <vector>\n\n#include \"base/flags.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"io/io.h\"\n#include \"util/fibers/pool.h\"\n\nnamespace dfly {\nnamespace {\n\nusing namespace facade;\n\nclass DiskBackedQueueTest : public testing::Test {\n protected:\n  void SetUp() override {\n    pp_.reset(util::fb2::Pool::IOUring(16, 1));\n    pp_->Run();\n  }\n\n  void TearDown() override {\n    pp_->Stop();\n    pp_.reset();\n  }\n\n  std::unique_ptr<util::ProactorPool> pp_;\n};\n\n// Verifies that after reading >= 4096 bytes, punch_hole is called correctly\n// and disk space is reclaimed.\nTEST_F(DiskBackedQueueTest, PunchHoleReleasesSpace) {\n  pp_->at(0)->Await([]() {\n    // Use id=2 to avoid collision with ReadWrite test.\n    DiskBackedQueue backing(2);\n    ASSERT_FALSE(backing.Init());\n\n    // Write 3 pages (12288 bytes) so the punch logic is triggered on reads.\n    std::string data(12288, 'x');\n    {\n      util::fb2::Done done;\n      backing.PushAsync(io::MutableBytes(reinterpret_cast<uint8_t*>(data.data()), data.size()),\n                        [&done](std::error_code ec) {\n                          ASSERT_FALSE(ec);\n                          done.Notify();\n                        });\n      done.Wait();\n    }\n\n    // Read all data back in 4096-byte chunks.\n    std::string results;\n    while (!backing.Empty()) {\n      std::string buf(4096, '\\0');\n      auto out = io::MutableBytes(reinterpret_cast<uint8_t*>(buf.data()), buf.size());\n      util::fb2::Done done;\n      backing.PopAsync(out, [&done, &results, &buf](io::Result<size_t> res) {\n        ASSERT_TRUE(res);\n        results.append(buf.data(), *res);\n        done.Notify();\n      });\n      done.Wait();\n    }\n    EXPECT_EQ(results, data);\n\n    // After reading all 3 pages the punch should have freed the first 3 aligned pages.\n    // SEEK_HOLE at offset 0 returns 0 when a hole starts at the beginning of the file.\n    int check_fd = open(\"/tmp/2\", O_RDONLY);\n    ASSERT_GE(check_fd, 0);\n    off_t hole_start = lseek(check_fd, 0, SEEK_HOLE);\n    close(check_fd);\n    EXPECT_EQ(hole_start, 0) << \"Expected hole at start of file - punch_hole did not free space\";\n\n    ASSERT_FALSE(backing.Close());\n  });\n}\n\n// Verifies that reading across multiple pages advances the punch offset correctly so that\n// successive reads keep freeing space (not re-punching offset 0 or skipping blocks).\nTEST_F(DiskBackedQueueTest, PunchHoleAdvancesOffset) {\n  pp_->at(0)->Await([]() {\n    DiskBackedQueue backing(3);\n    ASSERT_FALSE(backing.Init());\n\n    // Write 8 pages so we can do several reads and check the hole grows.\n    std::string data(32768, 'y');\n    {\n      util::fb2::Done done;\n      backing.PushAsync(io::MutableBytes(reinterpret_cast<uint8_t*>(data.data()), data.size()),\n                        [&done](std::error_code ec) {\n                          ASSERT_FALSE(ec);\n                          done.Notify();\n                        });\n      done.Wait();\n    }\n\n    // Read exactly 4096 bytes (1 page).\n    {\n      std::string buf(4096, '\\0');\n      auto out = io::MutableBytes(reinterpret_cast<uint8_t*>(buf.data()), buf.size());\n      util::fb2::Done done;\n      backing.PopAsync(out, [&done](io::Result<size_t> res) {\n        ASSERT_TRUE(res);\n        done.Notify();\n      });\n      done.Wait();\n    }\n\n    // After 1 page read the hole should start at 0 and the first non-hole (data) should be at\n    // offset 4096 (i.e., lseek SEEK_DATA starting from 0 skips the punched hole).\n    int check_fd = open(\"/tmp/3\", O_RDONLY);\n    ASSERT_GE(check_fd, 0);\n    off_t first_hole = lseek(check_fd, 0, SEEK_HOLE);\n    off_t first_data = lseek(check_fd, 0, SEEK_DATA);\n    close(check_fd);\n\n    EXPECT_EQ(first_hole, 0) << \"Hole should begin at offset 0 after first page read\";\n    EXPECT_EQ(first_data, 4096) << \"Non-hole data should start at 4096 after punching first page\";\n\n    ASSERT_FALSE(backing.Close());\n  });\n}\n\n// Verifies that unaligned writes and reads correctly punch holes at aligned boundaries.\n// Punch should only occur when we've fully read past 4096-byte boundaries.\nTEST_F(DiskBackedQueueTest, PunchHoleUnalignedReadsAndWrites) {\n  pp_->at(0)->Await([]() {\n    DiskBackedQueue backing(4);\n    ASSERT_FALSE(backing.Init());\n\n    // Write 10000 bytes (not a multiple of 4096).\n    // This is 2 full pages (8192 bytes) + 1808 partial bytes.\n    std::string data(10000, 'z');\n    {\n      util::fb2::Done done;\n      backing.PushAsync(io::MutableBytes(reinterpret_cast<uint8_t*>(data.data()), data.size()),\n                        [&done](std::error_code ec) {\n                          ASSERT_FALSE(ec);\n                          done.Notify();\n                        });\n      done.Wait();\n    }\n\n    // Read 3000 bytes (unaligned, less than 1 page).\n    // next_read_offset_ will be 3000, but aligned_end = (3000/4096)*4096 = 0.\n    // So no punch should happen yet.\n    std::string results;\n    {\n      std::string buf(3000, '\\0');\n      auto out = io::MutableBytes(reinterpret_cast<uint8_t*>(buf.data()), buf.size());\n      util::fb2::Done done;\n      backing.PopAsync(out, [&done, &results, &buf](io::Result<size_t> res) {\n        ASSERT_TRUE(res);\n        results.append(buf.data(), *res);\n        done.Notify();\n      });\n      done.Wait();\n    }\n\n    // Check that no hole exists yet (first 3000 bytes read but not 4096-aligned).\n    int check_fd = open(\"/tmp/4\", O_RDONLY);\n    ASSERT_GE(check_fd, 0);\n    off_t hole_at_start = lseek(check_fd, 0, SEEK_HOLE);\n    // SEEK_HOLE from offset 0 should jump to EOF if no hole exists at start.\n    EXPECT_GT(hole_at_start, 0) << \"No hole should exist yet after reading 3000 bytes\";\n    close(check_fd);\n\n    // Read another 2000 bytes (total read = 5000 bytes).\n    // next_read_offset_ will be 5000, aligned_end = (5000/4096)*4096 = 4096.\n    // Now the first page (0-4095) should be punched.\n    {\n      std::string buf(2000, '\\0');\n      auto out = io::MutableBytes(reinterpret_cast<uint8_t*>(buf.data()), buf.size());\n      util::fb2::Done done;\n      backing.PopAsync(out, [&done, &results, &buf](io::Result<size_t> res) {\n        ASSERT_TRUE(res);\n        results.append(buf.data(), *res);\n        done.Notify();\n      });\n      done.Wait();\n    }\n\n    // Verify first page is now a hole.\n    check_fd = open(\"/tmp/4\", O_RDONLY);\n    ASSERT_GE(check_fd, 0);\n    off_t first_hole = lseek(check_fd, 0, SEEK_HOLE);\n    off_t first_data = lseek(check_fd, 0, SEEK_DATA);\n    EXPECT_EQ(first_hole, 0) << \"Hole should start at offset 0 after reading past 4096 bytes\";\n    EXPECT_EQ(first_data, 4096) << \"Data should start at 4096 (second page)\";\n\n    // Read another 3500 bytes (total read = 8500 bytes).\n    // next_read_offset_ will be 8500, aligned_end = (8500/4096)*4096 = 8192.\n    // Now the first two pages (0-8191) should be punched.\n    {\n      std::string buf(3500, '\\0');\n      auto out = io::MutableBytes(reinterpret_cast<uint8_t*>(buf.data()), buf.size());\n      util::fb2::Done done;\n      backing.PopAsync(out, [&done, &results, &buf](io::Result<size_t> res) {\n        ASSERT_TRUE(res);\n        results.append(buf.data(), *res);\n        done.Notify();\n      });\n      done.Wait();\n    }\n\n    // Verify first two pages are holes.\n    first_hole = lseek(check_fd, 0, SEEK_HOLE);\n    first_data = lseek(check_fd, 0, SEEK_DATA);\n    close(check_fd);\n    EXPECT_EQ(first_hole, 0) << \"Hole should start at offset 0\";\n    EXPECT_EQ(first_data, 8192) << \"Data should start at 8192 (third page)\";\n\n    // Read remaining data and verify results match.\n    while (!backing.Empty()) {\n      std::string buf(4096, '\\0');\n      auto out = io::MutableBytes(reinterpret_cast<uint8_t*>(buf.data()), buf.size());\n      util::fb2::Done done;\n      backing.PopAsync(out, [&done, &results, &buf](io::Result<size_t> res) {\n        ASSERT_TRUE(res);\n        results.append(buf.data(), *res);\n        done.Notify();\n      });\n      done.Wait();\n    }\n    EXPECT_EQ(results, data);\n\n    ASSERT_FALSE(backing.Close());\n  });\n}\n\nTEST_F(DiskBackedQueueTest, AsyncReadWrite) {\n  pp_->at(0)->Await([]() {\n    DiskBackedQueue backing(5 /* id */);\n    EXPECT_FALSE(backing.Init());\n\n    std::string commands;\n    for (size_t i = 0; i < 100; ++i) {\n      auto cmd = absl::StrCat(\"SET FOO\", i, \" BAR\");\n      commands += cmd;\n    }\n\n    // Async write all commands\n    util::fb2::Fiber write_fiber = util::fb2::Fiber(\"writer\", [&]() {\n      for (size_t i = 0; i < 100; ++i) {\n        auto cmd = absl::StrCat(\"SET FOO\", i, \" BAR\");\n        auto bytes = io::MutableBytes(reinterpret_cast<uint8_t*>(cmd.data()), cmd.size());\n\n        util::fb2::Done done;\n        backing.PushAsync(bytes, [&done](std::error_code ec) {\n          EXPECT_FALSE(ec);\n          done.Notify();\n        });\n        done.Wait();\n      }\n    });\n\n    write_fiber.Join();\n\n    // Async read all results\n    std::string results;\n    util::fb2::Fiber read_fiber = util::fb2::Fiber(\"reader\", [&]() {\n      while (!backing.Empty()) {\n        std::string buf(1024, 'c');\n        auto bytes = io::MutableBytes(reinterpret_cast<uint8_t*>(buf.data()), buf.size());\n\n        util::fb2::Done done;\n        backing.PopAsync(bytes, [&done, &results, &buf](io::Result<size_t> res) {\n          EXPECT_TRUE(res);\n          results.append(buf.data(), *res);\n          done.Notify();\n        });\n        done.Wait();\n      }\n    });\n\n    read_fiber.Join();\n\n    EXPECT_EQ(results.size(), commands.size());\n    EXPECT_EQ(results, commands);\n\n    EXPECT_FALSE(backing.Close());\n  });\n}\n\nTEST_F(DiskBackedQueueTest, AsyncPunchHole) {\n  pp_->at(0)->Await([]() {\n    DiskBackedQueue backing(6);\n    ASSERT_FALSE(backing.Init());\n\n    // Write 3 pages (12288 bytes) asynchronously\n    std::string data(12288, 'x');\n\n    util::fb2::Done write_done;\n    backing.PushAsync(io::MutableBytes(reinterpret_cast<uint8_t*>(data.data()), data.size()),\n                      [&write_done](std::error_code ec) {\n                        ASSERT_FALSE(ec);\n                        write_done.Notify();\n                      });\n    write_done.Wait();\n\n    // Async read all data back in 4096-byte chunks\n    std::string results;\n    while (!backing.Empty()) {\n      std::string buf(4096, '\\0');\n      auto out = io::MutableBytes(reinterpret_cast<uint8_t*>(buf.data()), buf.size());\n\n      util::fb2::Done read_done;\n      backing.PopAsync(out, [&read_done, &results, &buf](io::Result<size_t> res) {\n        ASSERT_TRUE(res);\n        results.append(buf.data(), *res);\n        read_done.Notify();\n      });\n      read_done.Wait();\n    }\n    EXPECT_EQ(results, data);\n\n    // Verify punch hole freed space\n    int check_fd = open(\"/tmp/6\", O_RDONLY);\n    ASSERT_GE(check_fd, 0);\n    off_t hole_start = lseek(check_fd, 0, SEEK_HOLE);\n    close(check_fd);\n    EXPECT_EQ(hole_start, 0) << \"Expected hole at start of file - async punch did not free space\";\n\n    ASSERT_FALSE(backing.Close());\n  });\n}\n\n}  // namespace\n}  // namespace dfly\n"
  },
  {
    "path": "src/facade/dragonfly_connection.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n//\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/dragonfly_connection.h\"\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/container/flat_hash_map.h>\n#include <absl/strings/escaping.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <absl/time/time.h>\n\n#include <numeric>\n#include <variant>\n\n#include \"base/cycle_clock.h\"\n#include \"base/flag_utils.h\"\n#include \"base/flags.h\"\n#include \"base/histogram.h\"\n#include \"base/io_buf.h\"\n#include \"base/logging.h\"\n#include \"base/stl_util.h\"\n#include \"common/heap_size.h\"\n#include \"facade/conn_context.h\"\n#include \"facade/dragonfly_listener.h\"\n#include \"facade/facade_types.h\"\n#include \"facade/memcache_parser.h\"\n#include \"facade/redis_parser.h\"\n#include \"facade/reply_builder.h\"\n#include \"facade/resp_srv_parser.h\"\n#include \"facade/service_interface.h\"\n#include \"facade/socket_utils.h\"\n#include \"io/file.h\"\n#include \"strings/human_readable.h\"\n#include \"util/fiber_socket_base.h\"\n#include \"util/fibers/fibers.h\"\n#include \"util/fibers/proactor_base.h\"\n\n#ifdef DFLY_USE_SSL\n#include \"util/tls/tls_socket.h\"\n#endif\n\n#ifdef __linux__\n#include \"util/fibers/uring_file.h\"\n#include \"util/fibers/uring_proactor.h\"\n#include \"util/fibers/uring_socket.h\"\n#endif\n\nusing namespace std;\nusing facade::operator\"\"_MB;\n\nABSL_FLAG(bool, tcp_nodelay, true,\n          \"Configures dragonfly connections with socket option TCP_NODELAY\");\nABSL_FLAG(bool, primary_port_http_enabled, true,\n          \"If true allows accessing http console on main TCP port\");\n\nABSL_FLAG(uint16_t, admin_port, 0,\n          \"If set, would enable admin access to console on the assigned port. \"\n          \"This supports both HTTP and RESP protocols\");\n\nABSL_FLAG(string, admin_bind, \"\",\n          \"If set, the admin consol TCP connection would be bind the given address. \"\n          \"This supports both HTTP and RESP protocols\");\n\nABSL_FLAG(strings::MemoryBytesFlag, request_cache_limit, 64_MB,\n          \"Amount of memory to use for request cache in bytes - per IO thread.\");\n\nABSL_FLAG(strings::MemoryBytesFlag, pipeline_buffer_limit, 128_MB,\n          \"Amount of memory to use for storing pipeline requests - per IO thread.\"\n          \"Please note that clients that send excecissively huge pipelines, \"\n          \"may deadlock themselves. See https://github.com/dragonflydb/dragonfly/discussions/3997\"\n          \"for details.\");\n\nABSL_FLAG(uint32_t, pipeline_queue_limit, 10000,\n          \"Pipeline queue max length, the server will stop reading from the client socket\"\n          \" once its pipeline queue crosses this limit, and will resume once it processes \"\n          \"excessive requests. This is to prevent OOM states. Users of huge pipelines sizes \"\n          \"may require increasing this limit to prevent the risk of deadlocking.\"\n          \"See https://github.com/dragonflydb/dragonfly/discussions/3997 for details\");\n\nABSL_FLAG(strings::MemoryBytesFlag, publish_buffer_limit, 128_MB,\n          \"Amount of memory to use for storing pub commands in bytes - per IO thread\");\n\nABSL_FLAG(uint32_t, pipeline_squash, 1,\n          \"Number of queued pipelined commands above which squashing is enabled, 0 means disabled\");\n\n// When changing this constant, also update `test_large_cmd` test in connection_test.py.\nABSL_FLAG(uint32_t, max_multi_bulk_len, 1u << 16,\n          \"Maximum multi-bulk (array) length that is \"\n          \"allowed to be accepted when parsing RESP protocol\");\n\nABSL_FLAG(uint64_t, max_bulk_len, 2u << 30,\n          \"Maximum bulk length that is \"\n          \"allowed to be accepted when parsing RESP protocol\");\n\nABSL_FLAG(strings::MemoryBytesFlag, max_client_iobuf_len, 1u << 16,\n          \"Maximum io buffer length that is used to read client requests.\");\n\nABSL_FLAG(bool, migrate_connections, true,\n          \"When enabled, Dragonfly will try to migrate connections to the target thread on which \"\n          \"they operate. Currently this is only supported for Lua script invocations, and can \"\n          \"happen at most once per connection.\");\n\nABSL_FLAG(uint32_t, max_busy_read_usec, 200,\n          \"Maximum time we read and parse from \"\n          \"a socket without yielding. In microseconds.\");\n\nABSL_FLAG(size_t, squashed_reply_size_limit, 0,\n          \"Max bytes allowed for squashing_current_reply_size. If this limit is reached, \"\n          \"connections dispatching pipelines won't squash them.\");\n\nABSL_FLAG(bool, always_flush_pipeline, false,\n          \"if true will flush pipeline response after each pipeline squashing\");\n\nABSL_FLAG(uint32_t, async_dispatch_quota, 100,\n          \"Maximum number of consecutive async dispatch messages to process before either \"\n          \"yielding to I/O when the pipeline appears empty or forcibly processing a queued \"\n          \"pipelined command to prevent starvation. Set to 0 to disable this mechanism.\");\n\nABSL_FLAG(uint32_t, pipeline_squash_limit, 1 << 30, \"Limit on the size of a squashed pipeline. \");\nABSL_FLAG(uint32_t, pipeline_wait_batch_usec, 0,\n          \"If non-zero, waits for this time for more I/O \"\n          \" events to come for the connection in case there is only one command in the pipeline. \");\n\nABSL_FLAG(bool, experimental_io_loop_v2, true, \"new io loop\");\n\nusing namespace util;\nusing namespace std;\nusing absl::GetFlag;\nusing base::CycleClock;\nusing nonstd::make_unexpected;\n\nnamespace facade {\n\nnamespace {\n\nvoid SendProtocolError(RespSrvParser::Result pres, SinkReplyBuilder* builder) {\n  constexpr string_view res = \"-ERR Protocol error: \"sv;\n  if (pres == RespSrvParser::BAD_BULKLEN) {\n    builder->SendProtocolError(absl::StrCat(res, \"invalid bulk length\"));\n  } else if (pres == RespSrvParser::BAD_ARRAYLEN) {\n    builder->SendProtocolError(absl::StrCat(res, \"invalid multibulk length\"));\n  } else {\n    builder->SendProtocolError(absl::StrCat(res, \"parse error\"));\n  }\n}\n\n// TODO: to implement correct matcher according to HTTP spec\n// https://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html\n// One place to find a good implementation would be https://github.com/h2o/picohttpparser\nbool MatchHttp11Line(string_view line) {\n  return (absl::StartsWith(line, \"GET \") || absl::StartsWith(line, \"POST \")) &&\n         absl::EndsWith(line, \"HTTP/1.1\");\n}\n\nvoid UpdateIoBufCapacity(const io::IoBuf& io_buf, ConnectionStats* stats,\n                         absl::FunctionRef<void()> f) {\n  const size_t prev_capacity = io_buf.Capacity();\n  f();\n  const size_t capacity = io_buf.Capacity();\n  if (prev_capacity != capacity) {\n    VLOG(2) << \"Grown io_buf to \" << capacity;\n    stats->read_buf_capacity += capacity - prev_capacity;\n  }\n}\n\nsize_t UsedMemoryInternal(const ParsedCommand& msg) {\n  return msg.GetSize() + msg.HeapMemory();\n}\n\nstruct TrafficLogger {\n  // protects agains closing the file while writing or data races when opening the file.\n  // Also, makes sure that LogTraffic are executed atomically.\n  fb2::Mutex mutex;\n  unique_ptr<io::WriteFile> log_file;\n\n  void ResetLocked();\n  // Returns true if Write succeeded, false if it failed and the recording should be aborted.\n  bool Write(string_view blob);\n  bool Write(iovec* blobs, size_t len);\n};\n\nvoid TrafficLogger::ResetLocked() {\n  if (log_file) {\n    std::ignore = log_file->Close();\n    log_file.reset();\n  }\n}\n\n// Returns true if Write succeeded, false if it failed and the recording should be aborted.\nbool TrafficLogger::Write(string_view blob) {\n  auto ec = log_file->Write(io::Buffer(blob));\n  if (ec) {\n    LOG(ERROR) << \"Error writing to traffic log: \" << ec;\n    ResetLocked();\n    return false;\n  }\n  return true;\n}\n\nbool TrafficLogger::Write(iovec* blobs, size_t len) {\n  auto ec = log_file->Write(blobs, len);\n  if (ec) {\n    LOG(ERROR) << \"Error writing to traffic log: \" << ec;\n    ResetLocked();\n    return false;\n  }\n  return true;\n}\n\nthread_local TrafficLogger tl_traffic_logger{};\nthread_local base::Histogram* io_req_size_hist = nullptr;\n\nthread_local const size_t reply_size_limit = absl::GetFlag(FLAGS_squashed_reply_size_limit);\nthread_local uint32 pipeline_wait_batch_usec = absl::GetFlag(FLAGS_pipeline_wait_batch_usec);\n\nvoid OpenTrafficLogger(string_view base_path) {\n  unique_lock lk{tl_traffic_logger.mutex};\n  if (tl_traffic_logger.log_file)\n    return;\n\n#ifdef __linux__\n  // Open file with append mode, without it concurrent fiber writes seem to conflict\n  string path = absl::StrCat(\n      base_path, \"-\", absl::Dec(ProactorBase::me()->GetPoolIndex(), absl::kZeroPad3), \".bin\");\n  auto file = util::fb2::OpenWrite(path, io::WriteFile::Options{/*.append = */ false});\n  if (!file) {\n    LOG(ERROR) << \"Error opening a file \" << path << \" for traffic logging: \" << file.error();\n    return;\n  }\n  tl_traffic_logger.log_file = unique_ptr<io::WriteFile>{file.value()};\n#else\n  LOG(WARNING) << \"Traffic logger is only supported on Linux\";\n#endif\n\n  // Write version, incremental numbering :)\n  uint8_t version[1] = {2};\n  std::ignore = tl_traffic_logger.log_file->Write(version);\n}\n\nvoid LogTraffic(uint32_t id, bool has_more, const cmn::BackedArguments& args,\n                ServiceInterface::ContextInfo ci) {\n  string_view cmd = args.Front();\n  if (absl::EqualsIgnoreCase(cmd, \"debug\"sv))\n    return;\n\n  DVLOG(2) << \"Recording \" << cmd;\n\n  char stack_buf[1024];\n  char* next = stack_buf;\n\n  // We write id, timestamp, db_index, has_more, num_parts, part_len, part_len, part_len, ...\n  // And then all the part blobs concatenated together.\n  auto write_u32 = [&next](uint32_t i) {\n    absl::little_endian::Store32(next, i);\n    next += 4;\n  };\n\n  // id\n  write_u32(id);\n\n  // timestamp\n  absl::little_endian::Store64(next, absl::GetCurrentTimeNanos());\n  next += 8;\n\n  // db_index\n  write_u32(ci.db_index);\n\n  // has_more, num_parts\n  write_u32(has_more ? 1 : 0);\n  write_u32(uint32_t(args.size()));\n\n  // Grab the lock and check if the file is still open.\n  lock_guard lk{tl_traffic_logger.mutex};\n  if (!tl_traffic_logger.log_file)\n    return;\n\n  // part_len, ...\n  for (auto part : args) {\n    if (size_t(next - stack_buf + 4) > sizeof(stack_buf)) {\n      if (!tl_traffic_logger.Write(string_view{stack_buf, size_t(next - stack_buf)})) {\n        return;\n      }\n      next = stack_buf;\n    }\n    write_u32(part.size());\n  }\n\n  // Write the data itself.\n  array<iovec, 16> blobs;\n  unsigned index = 0;\n  if (next != stack_buf) {\n    blobs[index++] = iovec{.iov_base = stack_buf, .iov_len = size_t(next - stack_buf)};\n  }\n\n  for (auto part : args) {\n    if (auto blob_len = part.size(); blob_len > 0) {\n      blobs[index++] = iovec{.iov_base = const_cast<char*>(part.data()), .iov_len = blob_len};\n\n      if (index >= blobs.size()) {\n        if (!tl_traffic_logger.Write(blobs.data(), blobs.size())) {\n          return;\n        }\n        index = 0;\n      }\n    }\n  }\n\n  if (index) {\n    tl_traffic_logger.Write(blobs.data(), index);\n  }\n}\n\nconstexpr size_t kMinReadSize = 256;\n\nconst char* kPhaseName[Connection::NUM_PHASES] = {\"SETUP\", \"READ\", \"PROCESS\", \"SHUTTING_DOWN\",\n                                                  \"PRECLOSE\"};\n\n// Keeps track of total per-thread sizes of dispatch queues to limit memory taken up by messages\n// in these queues.\nstruct QueueBackpressure {\n  QueueBackpressure() {\n  }\n\n  // Block until subscriber memory usage is below limit, can be called from any thread.\n  void EnsureBelowLimit();\n\n  // Checks if backpressure should be applied.\n  // 'size' should be the total bytes currently consumed by all connections on this thread.\n  // 'q_len' should be the length of the pipeline queue for the current connection.\n  //\n  // Returns true if EITHER:\n  // 1. Thread-local: memory limit (on all thread's connections) is exceeded (protects server from\n  // OOM).\n  // 2. Per-Connection queue length limit is exceeded (protects against single-client abuse).\n  bool IsPipelineBufferOverLimit(size_t size, uint32_t q_len) const {\n    return size >= (pipeline_buffer_limit) || (q_len > pipeline_queue_max_len);\n  }\n\n  // Checks if usage has dropped below the limit in at least one criteria.\n  // Used to determine if we should notify waiters.\n  // 'size' should be the total bytes currently consumed by all connections on this thread.\n  // 'q_len' should be the length of the pipeline queue for the current connection.\n  //\n  // Returns true if EITHER:\n  // 1. Thread-Global memory is now under the limit (allows neighbors to wake up).\n  // 2. Per-Connection queue length is now within the limit (allows self to wake up).\n  bool IsPipelineBufferUnderLimit(size_t size, uint32_t q_len) const {\n    return (size < pipeline_buffer_limit) || (q_len <= pipeline_queue_max_len);\n  }\n\n  // Used by publisher/subscriber actors to make sure we do not publish too many messages\n  // into the queue. Thread-safe to allow safe access in EnsureBelowLimit.\n  util::fb2::EventCount pubsub_ec;\n  atomic_size_t subscriber_bytes = 0;\n\n  // Used by pipelining/execution fiber to throttle the incoming pipeline messages.\n  // Used together with pipeline_buffer_limit to limit the pipeline usage per thread.\n  util::fb2::CondVarAny pipeline_cnd;\n\n  size_t publish_buffer_limit = 0;        // cached flag publish_buffer_limit\n  size_t pipeline_cache_limit = 0;        // cached flag pipeline_cache_limit\n  size_t pipeline_buffer_limit = 0;       // cached flag for buffer size in bytes\n  uint32_t pipeline_queue_max_len = 256;  // cached flag for pipeline queue max length.\n};\n\nvoid QueueBackpressure::EnsureBelowLimit() {\n  pubsub_ec.await(\n      [this] { return subscriber_bytes.load(memory_order_relaxed) <= publish_buffer_limit; });\n}\n\n// Global array for each io thread to keep track of the total memory usage of the dispatch queues.\nQueueBackpressure* thread_queue_backpressure = nullptr;\n\nQueueBackpressure& GetQueueBackpressure() {\n  DCHECK(thread_queue_backpressure != nullptr);\n\n  return thread_queue_backpressure[ProactorBase::me()->GetPoolIndex()];\n}\n\n// A special accessor for accessing thread local ConnectionStats that is robust to fiber-thread\n// migrations. Compiler optimizations can cache a stale thread local pointer, and not refresh it\n// after HandleMigrateRequest() is called. This function should be used to force loading\n// the variable from memory every time, preventing such bugs.\nConnectionStats& __attribute__((noinline)) GetLocalConnStats() {\n  // https://stackoverflow.com/a/75622732\n  asm volatile(\"\");\n\n  return tl_facade_stats->conn_stats;\n}\n\nthread_local uint64_t max_busy_read_cycles_cached = 1ULL << 32;\nthread_local bool always_flush_pipeline_cached = absl::GetFlag(FLAGS_always_flush_pipeline);\nthread_local uint32_t pipeline_squash_limit_cached = absl::GetFlag(FLAGS_pipeline_squash_limit);\n\n}  // namespace\n\nthread_local vector<Connection::PipelineMessagePtr> Connection::pipeline_req_pool_;\n\nclass PipelineCacheSizeTracker {\n public:\n  bool CheckAndUpdateWatermark(size_t pipeline_sz) {\n    const auto now = absl::Now();\n    const auto elapsed = now - last_check_;\n    min_ = std::min(min_, pipeline_sz);\n    if (elapsed < absl::Milliseconds(10)) {\n      return false;\n    }\n\n    const bool watermark_reached = (min_ > 0);\n    min_ = Limits::max();\n    last_check_ = absl::Now();\n\n    return watermark_reached;\n  }\n\n private:\n  using Limits = std::numeric_limits<size_t>;\n\n  absl::Time last_check_ = absl::Now();\n  size_t min_ = Limits::max();\n};\n\nthread_local PipelineCacheSizeTracker tl_pipe_cache_sz_tracker;\n\nsize_t Connection::MessageHandle::UsedMemory() const {\n  struct MessageSize {\n    size_t operator()(const PubMessagePtr& msg) {\n      return sizeof(PubMessage) + (msg->channel.size() + msg->message.size());\n    }\n    size_t operator()(const MonitorMessage& msg) {\n      return msg.capacity();\n    }\n    size_t operator()(const MigrationRequestMessage& msg) {\n      return 0;\n    }\n    size_t operator()(const CheckpointMessage& msg) {\n      return 0;  // no access to internal type, memory usage negligible\n    }\n    size_t operator()(const InvalidationMessage& msg) {\n      return 0;\n    }\n  };\n\n  return sizeof(MessageHandle) + visit(MessageSize{}, this->handle);\n}\n\nbool Connection::MessageHandle::IsReplying() const {\n  return IsPubMsg() || holds_alternative<MonitorMessage>(handle);\n}\n\nstruct Connection::AsyncOperations {\n  AsyncOperations(SinkReplyBuilder* b, Connection* me) : builder{b}, self(me) {\n  }\n\n  void operator()(const PubMessage& msg);\n  void operator()(ParsedCommand& msg);\n  void operator()(const MonitorMessage& msg);\n  void operator()(const MigrationRequestMessage& msg);\n  void operator()(CheckpointMessage msg);\n  void operator()(const InvalidationMessage& msg);\n\n  template <typename T, typename D> void operator()(unique_ptr<T, D>& ptr) {\n    operator()(*ptr.get());\n  }\n\n  SinkReplyBuilder* builder = nullptr;\n  Connection* self = nullptr;\n};\n\nvoid Connection::AsyncOperations::operator()(const MonitorMessage& msg) {\n  RedisReplyBuilder* rbuilder = (RedisReplyBuilder*)builder;\n  rbuilder->SendSimpleString(msg);\n}\n\nvoid Connection::AsyncOperations::operator()(const PubMessage& pub_msg) {\n  RedisReplyBuilder* rb = static_cast<RedisReplyBuilder*>(builder);\n\n  // Discard stale messages to not break the protocol after exiting \"pubsub\" mode.\n  // Even after removing all subscriptions, we still can receive messages delayed\n  // by inter-thread dispatches or backpressure.\n  // TODO: filter messages from channels the client unsubscribed from\n  if (self->cntx()->subscriptions == 0 &&\n      !base::_in(pub_msg.channel, {\"unsubscribe\", \"punsubscribe\"}))\n    return;\n\n  if (pub_msg.force_unsubscribe) {\n    rb->StartCollection(3, CollectionType::PUSH);\n    rb->SendBulkString(\"sunsubscribe\");\n    rb->SendBulkString(pub_msg.channel);\n    rb->SendLong(0);\n    self->cntx()->Unsubscribe(pub_msg.channel);\n    return;\n  }\n\n  unsigned i = 0;\n  array<string_view, 4> arr;\n  if (pub_msg.pattern.empty()) {\n    arr[i++] = pub_msg.is_sharded ? \"smessage\" : \"message\";\n  } else {\n    arr[i++] = \"pmessage\";\n    arr[i++] = pub_msg.pattern;\n  }\n\n  arr[i++] = pub_msg.channel;\n  arr[i++] = pub_msg.message;\n\n  rb->SendBulkStrArr(absl::Span<string_view>{arr.data(), i}, CollectionType::PUSH);\n}\n\nvoid Connection::AsyncOperations::operator()(ParsedCommand& cmd) {\n  DVLOG(2) << \"Dispatching pipeline: \" << cmd.Front();\n\n  ++self->local_stats_.cmds;\n  self->service_->DispatchCommand(ParsedArgs{cmd}, &cmd, facade::AsyncPreference::ONLY_SYNC);\n\n  self->last_interaction_ = time(nullptr);\n  self->skip_next_squashing_ = false;\n}\n\nvoid Connection::AsyncOperations::operator()(const MigrationRequestMessage& msg) {\n  // no-op\n}\n\nvoid Connection::AsyncOperations::operator()(CheckpointMessage msg) {\n  VLOG(2) << \"Decremented checkpoint at \" << self->DebugInfo();\n\n  msg.bc->Dec();\n}\n\nvoid Connection::AsyncOperations::operator()(const InvalidationMessage& msg) {\n  RedisReplyBuilder* rbuilder = (RedisReplyBuilder*)builder;\n  DCHECK(rbuilder->IsResp3());\n  rbuilder->StartCollection(2, facade::CollectionType::PUSH);\n  rbuilder->SendBulkString(\"invalidate\");\n  if (msg.invalidate_due_to_flush) {\n    rbuilder->SendNull();\n  } else {\n    string_view keys[] = {msg.key};\n    rbuilder->SendBulkStrArr(keys);\n  }\n}\n\nnamespace {\nthread_local absl::flat_hash_map<string, uint64_t> g_libname_ver_map;\n\nvoid UpdateLibNameVerMap(const string& name, const string& ver, int delta) {\n  string key = absl::StrCat(name, \":\", ver);\n  uint64_t& val = g_libname_ver_map[key];\n  val += delta;\n  if (val == 0) {\n    g_libname_ver_map.erase(key);\n  }\n}\n}  // namespace\n\nvoid Connection::Init(unsigned io_threads) {\n  CHECK(thread_queue_backpressure == nullptr);\n  thread_queue_backpressure = new QueueBackpressure[io_threads];\n\n  for (unsigned i = 0; i < io_threads; ++i) {\n    auto& qbp = thread_queue_backpressure[i];\n    qbp.publish_buffer_limit = GetFlag(FLAGS_publish_buffer_limit);\n    qbp.pipeline_cache_limit = GetFlag(FLAGS_request_cache_limit);\n    qbp.pipeline_buffer_limit = GetFlag(FLAGS_pipeline_buffer_limit);\n    qbp.pipeline_queue_max_len = GetFlag(FLAGS_pipeline_queue_limit);\n\n    if (qbp.publish_buffer_limit == 0 || qbp.pipeline_cache_limit == 0 ||\n        qbp.pipeline_buffer_limit == 0 || qbp.pipeline_queue_max_len == 0) {\n      LOG(ERROR) << \"pipeline flag limit is 0\";\n      exit(-1);\n    }\n  }\n}\n\nvoid Connection::Shutdown() {\n  delete[] thread_queue_backpressure;\n  thread_queue_backpressure = nullptr;\n}\n\nConnection::Connection(Protocol protocol, util::HttpListenerBase* http_listener, SSL_CTX* ctx,\n                       ServiceInterface* service)\n    : io_buf_(kMinReadSize),\n      protocol_(protocol),\n      http_listener_(http_listener),\n      ssl_ctx_(ctx),\n      service_(service),\n      flags_(0) {\n  static atomic_uint32_t next_id{1};\n\n  constexpr size_t kReqSz = sizeof(ParsedCommand);\n  static_assert(kReqSz <= 256);\n\n  // TODO: to move parser initialization to where we initialize the reply builder.\n  switch (protocol) {\n    case Protocol::REDIS:\n      redis_parser_.reset(\n          new RespSrvParser(GetFlag(FLAGS_max_multi_bulk_len), GetFlag(FLAGS_max_bulk_len)));\n      break;\n    case Protocol::MEMCACHE:\n      memcache_parser_ =\n          make_unique<MemcacheParser>(std::min<uint64_t>(GetFlag(FLAGS_max_bulk_len), UINT32_MAX));\n      break;\n  }\n\n  creation_time_ = time(nullptr);\n  last_interaction_ = creation_time_;\n  id_ = next_id.fetch_add(1, memory_order_relaxed);\n\n  migration_enabled_ = GetFlag(FLAGS_migrate_connections);\n\n  // Create shared_ptr with empty value and associate it with `this` pointer (aliasing constructor).\n  // We use it for reference counting and accessing `this` (without managing it).\n  self_ = {make_shared<std::monostate>(), this};\n\n#ifdef DFLY_USE_SSL\n  // Increment reference counter so Listener won't free the context while we're\n  // still using it.\n  if (ctx) {\n    SSL_CTX_up_ref(ctx);\n  }\n#endif\n\n  UpdateLibNameVerMap(lib_name_, lib_ver_, +1);\n  migration_allowed_to_register_ = false;\n}\n\nConnection::~Connection() {\n#ifdef DFLY_USE_SSL\n  SSL_CTX_free(ssl_ctx_);\n#endif\n  UpdateLibNameVerMap(lib_name_, lib_ver_, -1);\n}\n\nbool Connection::IsSending() const {\n  return reply_builder_ && reply_builder_->IsSendActive();\n}\n\nvoid Connection::MarkForClose() {\n  if (reply_builder_) {\n    reply_builder_->CloseConnection();\n  }\n  request_shutdown_ = true;\n}\n\n// Called from Connection::Shutdown() right after socket_->Shutdown call.\nvoid Connection::OnShutdown() {\n  VLOG(1) << \"Connection::OnShutdown\";\n\n  BreakOnce(POLLHUP);\n  io_ec_ = make_error_code(errc::connection_aborted);\n  io_event_.notify();\n}\n\nvoid Connection::OnPreMigrateThread() {\n  DVLOG(1) << \"OnPreMigrateThread \" << GetClientId();\n\n  CHECK(!cc_->conn_closing);\n\n  DCHECK(!migration_in_process_);\n\n  // CancelOnErrorCb is a preemption point, so we make sure the Migration start\n  // is marked beforehand.\n  migration_in_process_ = true;\n\n  // Mark as not owned by any thread as it going through the dark hole\n  self_.reset();\n\n  socket_->CancelOnErrorCb();\n  DCHECK(!async_fb_.IsJoinable()) << GetClientId();\n\n  DecreaseConnStats();\n}\n\nvoid Connection::OnPostMigrateThread() {\n  DVLOG(1) << \"[\" << id_ << \"] OnPostMigrateThread\";\n\n  // Once we migrated, we should rearm OnBreakCb callback.\n  if (breaker_cb_ && socket()->IsOpen()) {\n    socket_->RegisterOnErrorCb([this](int32_t mask) { this->OnBreakCb(mask); });\n  }\n\n  if (ioloop_v2_ && socket_ && socket_->IsOpen() && migration_allowed_to_register_) {\n    socket_->RegisterOnRecv([this](const FiberSocketBase::RecvNotification& n) {\n      DoReadOnRecv(n);\n      io_event_.notify();\n    });\n  }\n\n  migration_in_process_ = false;\n  self_ = {make_shared<std::monostate>(), this};  // Recreate shared_ptr to self.\n  DCHECK(!async_fb_.IsJoinable());\n\n  // If someone had sent Async during the migration, we must create async_fb_.\n  if (HasPendingMessages()) {\n    LaunchAsyncFiberIfNeeded();\n  }\n\n  IncreaseConnStats();\n}\n\nvoid Connection::OnConnectionStart() {\n  SetName(absl::StrCat(id_));\n\n  // is null in unit-tests.\n  if (const Listener* lsnr = static_cast<Listener*>(listener()); lsnr) {\n    is_main_ = lsnr->IsMainInterface();\n  }\n\n  if (GetFlag(FLAGS_tcp_nodelay) && !socket_->IsUDS()) {\n    int val = 1;\n    int res = setsockopt(socket_->native_handle(), IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val));\n    DCHECK_EQ(res, 0);\n  }\n}\n\nvoid Connection::HandleRequests() {\n  VLOG(1) << \"[\" << id_ << \"] HandleRequests\";\n  DCHECK(tl_facade_stats);\n  auto& conn_stats = tl_facade_stats->conn_stats;\n\n  auto remote_ep = RemoteEndpointStr();\n\n#ifdef DFLY_USE_SSL\n  if (ssl_ctx_) {\n    // Early TLS connection filter\n    //\n    // Before entering the expensive OpenSSL handshake we pre-read the 5-byte TLS Record Layer\n    // header on the raw TCP socket. This serves two purposes:\n    //\n    //  1. Wrong-client detection:\n    //     Clients that forgot to enable TLS (e.g. a plaintext Redis client connecting to the TLS\n    //     port) will not send a valid TLS Record Layer header.  We detect this immediately and\n    //     reply with a human-readable \"-ERR\" message before disconnecting, instead of letting\n    //     OpenSSL produce a cryptic handshake failure.\n    //\n    //  2. Zombie-connection rejection:\n    //     Zombie connections —— open a TCP socket but never send any data.  By demanding at least\n    //     the 5-byte header before allocating any SSL state, we drop these cheaply on the raw\n    //     socket instead of tying up an OpenSSL context and handshake state machine that will never\n    //     complete.\n    //\n    // The pre-read header bytes are injected into the TlsSocket via InitSSL(), which writes them\n    // into OpenSSL's internal BIO so that Accept() can drive the normal handshake from there.\n    //\n    // Reminder: TLS Record Layer header structure (universal across TLS 1.0 – 1.3):\n    // - Byte 0: ContentType (0x16 = Handshake)\n    // - Bytes 1–2: ProtocolVersion. While the minor version varies (0x01 for TLS 1.0,\n    //   0x03 for TLS 1.2/1.3), the major version is consistently 0x03 for all\n    //   modern TLS versions.\n    // - Bytes 3–4: Length (uint16 BE) — payload length, max 2^14 = 16384\n    uint8_t buf[5];  // universal TLS Record Header size is 5 bytes\n    auto read_sz = socket_->Read(io::MutableBytes(buf));\n    if (!read_sz || *read_sz < sizeof(buf)) {\n      auto msg = read_sz ? absl::StrCat(*read_sz, \" < \", sizeof(buf)) : read_sz.error().message();\n      LOG_EVERY_T(INFO, 1) << \"Error reading from peer \" << remote_ep << \" \" << msg\n                           << \", socket state: \" + dfly::GetSocketInfo(socket_->native_handle());\n      conn_stats.tls_accept_disconnects++;\n      return;\n    }\n\n    // Byte 0: ContentType must be 0x16 (Handshake).\n    // Byte 1: major ProtocolVersion — always 0x03 for TLS 1.0 through TLS 1.3.\n    // Byte 2: minor ProtocolVersion — 0x01 (TLS 1.0), 0x02 (TLS 1.1), 0x03 (TLS 1.2/1.3).\n    //         SSL 3.0 (0x00) is deprecated (RFC 7568) and rejected.\n    if ((buf[0] != 0x16) || (buf[1] != 0x03) || (buf[2] < 0x01) || (buf[2] > 0x03)) {\n      VLOG(1) << \"Bad TLS header \"\n              << absl::StrCat(absl::Hex(buf[0], absl::kZeroPad2),\n                              absl::Hex(buf[1], absl::kZeroPad2),\n                              absl::Hex(buf[2], absl::kZeroPad2));\n      std::ignore =\n          socket_->Write(io::Buffer(\"-ERR Bad TLS header, double check \"\n                                    \"if you enabled TLS for your client.\\r\\n\"));\n      conn_stats.tls_accept_disconnects++;\n      return;\n    }\n\n    // Must be done atomically before the preemption point in Accept so that at any\n    // point in time, the socket_ is defined.\n    {\n      FiberAtomicGuard fg;\n      unique_ptr<tls::TlsSocket> tls_sock = make_unique<tls::TlsSocket>(std::move(socket_));\n      tls_sock->InitSSL(ssl_ctx_, buf);\n      SetSocket(tls_sock.release());\n    }\n    FiberSocketBase::AcceptResult aresult = socket_->Accept();\n\n    if (!aresult) {\n      // This can flood the logs -- don't change\n      LOG_EVERY_T(INFO, 1) << \"Error handshaking \" << aresult.error().message()\n                           << \", socket state: \" + dfly::GetSocketInfo(socket_->native_handle());\n      conn_stats.tls_accept_disconnects++;\n      return;\n    }\n    is_tls_ = 1;\n    VLOG(1) << \"TLS handshake succeeded\";\n  }\n#endif\n\n  io::Result<bool> http_res{false};\n\n  http_res = CheckForHttpProto();\n\n  // We need to check if the socket is open because the server might be\n  // shutting down. During the shutdown process, the server iterates over\n  // the connections of each shard and shuts down their socket. Since the\n  // main listener dispatches the connection into the next proactor, we\n  // allow a schedule order that first shuts down the socket and then calls\n  // this function which triggers a DCHECK on the socket while it tries to\n  // RegisterOnErrorCb. Furthermore, we can get away with one check here\n  // because both Write and Recv internally check if the socket was shut\n  // down and return with an error accordingly.\n  if (http_res && socket_->IsOpen()) {\n    cc_.reset(service_->CreateContext(this));\n\n    if (*http_res) {\n      VLOG(1) << \"HTTP1.1 identified\";\n      is_http_ = true;\n      HttpConnection http_conn{http_listener_};\n      http_conn.SetSocket(socket_.get());\n      http_conn.set_user_data(cc_.get());\n\n      // We validate the http request using basic-auth inside HttpConnection::HandleSingleRequest.\n      cc_->authenticated = true;\n      auto ec = http_conn.ParseFromBuffer(io_buf_.InputBuffer());\n      io_buf_.ConsumeInput(io_buf_.InputLen());\n      if (!ec) {\n        http_conn.HandleRequests();\n      }\n\n      // Release the ownership of the socket from http_conn so it would stay with\n      // this connection.\n      http_conn.ReleaseSocket();\n    } else {  // non-http\n      // ioloop_v2 not supported for TLS & redis connections yet.\n      ioloop_v2_ =\n          GetFlag(FLAGS_experimental_io_loop_v2) && !is_tls_ && protocol_ == Protocol::MEMCACHE;\n\n      if (breaker_cb_) {\n        socket_->RegisterOnErrorCb([this](int32_t mask) { this->OnBreakCb(mask); });\n      }\n      switch (protocol_) {\n        case Protocol::REDIS:\n          reply_builder_.reset(new RedisReplyBuilder(socket_.get()));\n          break;\n        case Protocol::MEMCACHE:\n          reply_builder_.reset(new MCReplyBuilder(socket_.get()));\n          break;\n        default:\n          break;\n      }\n      parsed_cmd_ = CreateParsedCommand();\n      ConnectionFlow();\n\n      socket_->CancelOnErrorCb();  // noop if nothing is registered.\n      VLOG(1) << \"Closed connection for peer \"\n              << GetClientInfo(fb2::ProactorBase::me()->GetPoolIndex());\n      reply_builder_.reset();\n      DestroyParsedQueue();\n    }\n    cc_.reset();\n  }\n}\n\nunsigned Connection::GetSendWaitTimeSec() const {\n  if (reply_builder_ && reply_builder_->IsSendActive()) {\n    return (util::fb2::ProactorBase::GetMonotonicTimeNs() - reply_builder_->GetLastSendTimeNs()) /\n           1'000'000'000;\n  }\n\n  return 0;\n}\n\nvoid Connection::RegisterBreakHook(BreakerCb breaker_cb) {\n  breaker_cb_ = std::move(breaker_cb);\n}\n\nvoid Connection::FlushReplies() {  // NOLINT must not be const due to flush side effect\n  DCHECK(reply_builder_);\n  reply_builder_->Flush();\n}\n\npair<string, string> Connection::GetClientInfoBeforeAfterTid() const {\n  if (!socket_) {\n    LOG(DFATAL) << \"unexpected null socket_ \"\n                << \" phase \" << unsigned(phase_) << \", is_http: \" << unsigned(is_http_);\n    return {};\n  }\n\n  CHECK_LT(unsigned(phase_), NUM_PHASES);\n\n  string before;\n  auto le = LocalBindStr();\n  auto re = RemoteEndpointStr();\n  time_t now = time(nullptr);\n\n  int cpu = 0;\n  socklen_t len = sizeof(cpu);\n  getsockopt(socket_->native_handle(), SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len);\n\n#ifdef __APPLE__\n  int my_cpu_id = -1;  // __APPLE__ does not have sched_getcpu()\n#else\n  int my_cpu_id = sched_getcpu();\n#endif\n\n  static constexpr string_view PHASE_NAMES[] = {\"setup\", \"readsock\", \"process\", \"shutting_down\",\n                                                \"preclose\"};\n  static_assert(NUM_PHASES == ABSL_ARRAYSIZE(PHASE_NAMES));\n  static_assert(PHASE_NAMES[SHUTTING_DOWN] == \"shutting_down\");\n\n  absl::StrAppend(&before, \"id=\", id_, \" addr=\", re, \" laddr=\", le);\n  absl::StrAppend(&before, \" fd=\", socket_->native_handle());\n  if (is_http_) {\n    absl::StrAppend(&before, \" http=true\");\n  } else {\n    absl::StrAppend(&before, \" name=\", name_);\n  }\n#ifdef DFLY_USE_SSL\n  if (is_tls_) {\n    tls::TlsSocket* tls_sock = static_cast<tls::TlsSocket*>(socket_.get());\n    string_view proto_version = SSL_get_version(tls_sock->ssl_handle());\n    const SSL_CIPHER* cipher = SSL_get_current_cipher(tls_sock->ssl_handle());\n    absl::StrAppend(&before, \" tls=\", proto_version, \"|\", SSL_CIPHER_get_name(cipher));\n  }\n#endif\n  string after;\n  absl::StrAppend(&after, \" irqmatch=\", int(cpu == my_cpu_id));\n  if (parsed_cmd_q_len_ > 0) {\n    absl::StrAppend(&after, \" pipeline=\", parsed_cmd_q_len_);\n    absl::StrAppend(&after, \" pbuf=\", parsed_cmd_q_bytes_);\n  }\n  absl::StrAppend(&after, \" age=\", now - creation_time_, \" idle=\", now - last_interaction_);\n  string_view phase_name = PHASE_NAMES[phase_];\n\n  absl::StrAppend(&after, \" tot-cmds=\", local_stats_.cmds,\n                  \" tot-net-in=\", local_stats_.net_bytes_in,\n                  \" tot-read-calls=\", local_stats_.read_cnt,\n                  \" tot-dispatches=\", local_stats_.dispatch_entries_added);\n\n  if (cc_) {\n    string cc_info = service_->GetContextInfo(cc_.get()).Format();\n\n    // reply_builder_ may be null if the connection is in the setup phase, for example.\n    if (reply_builder_ && reply_builder_->IsSendActive())\n      phase_name = \"send\";\n    absl::StrAppend(&after, \" \", cc_info);\n  }\n  absl::StrAppend(&after, \" phase=\", phase_name);\n\n  if (IsSending()) {\n    absl::StrAppend(&before, \" send-wait-time=\", GetSendWaitTimeSec());\n  }\n\n  return {std::move(before), std::move(after)};\n}\n\nstring Connection::GetClientInfo(unsigned thread_id) const {\n  auto [before, after] = GetClientInfoBeforeAfterTid();\n  absl::StrAppend(&before, \" tid=\", thread_id);\n  absl::StrAppend(&before, after);\n  absl::StrAppend(&before, \" lib-name=\", lib_name_, \" lib-ver=\", lib_ver_);\n  return before;\n}\n\nstring Connection::GetClientInfo() const {\n  auto [before, after] = GetClientInfoBeforeAfterTid();\n  absl::StrAppend(&before, after);\n  // The following are dummy fields and users should not rely on those unless\n  // we decide to implement them.\n  // This is only done because the redis pyclient parser for the field \"client-info\"\n  // for the command ACL LOG hardcodes the expected values. This behaviour does not\n  // conform to the actual expected values, since it's missing half of them.\n  // That is, even for redis-server, issuing an ACL LOG command via redis-cli and the pyclient\n  // will return different results! For example, the fields:\n  // addr=127.0.0.1:57275\n  // laddr=127.0.0.1:6379\n  // are missing from the pyclient.\n\n  absl::StrAppend(&before, \" qbuf=0 \", \"qbuf-free=0 \", \"obl=0 \", \"argv-mem=0 \");\n  absl::StrAppend(&before, \"oll=0 \", \"omem=0 \", \"tot-mem=0 \", \"multi=0 \");\n  absl::StrAppend(&before, \"psub=0 \", \"sub=0\");\n  return before;\n}\n\nuint32_t Connection::GetClientId() const {\n  return id_;\n}\n\nbool Connection::IsPrivileged() const {\n  return static_cast<Listener*>(listener())->IsPrivilegedInterface();\n}\n\nbool Connection::IsMain() const {\n  return is_main_;\n}\n\nbool Connection::IsMainOrMemcache() const {\n  return is_main_ || protocol_ == Protocol::MEMCACHE;\n}\n\nvoid Connection::SetName(string name) {\n  util::ThisFiber::SetName(absl::StrCat(\"DflyConn_\", name));\n  name_ = std::move(name);\n}\n\nvoid Connection::SetLibName(string name) {\n  UpdateLibNameVerMap(lib_name_, lib_ver_, -1);\n  lib_name_ = std::move(name);\n  UpdateLibNameVerMap(lib_name_, lib_ver_, +1);\n}\n\nvoid Connection::SetLibVersion(string version) {\n  UpdateLibNameVerMap(lib_name_, lib_ver_, -1);\n  lib_ver_ = std::move(version);\n  UpdateLibNameVerMap(lib_name_, lib_ver_, +1);\n}\n\nconst absl::flat_hash_map<string, uint64_t>& Connection::GetLibStatsTL() {\n  return g_libname_ver_map;\n}\n\nio::Result<bool> Connection::CheckForHttpProto() {\n  if (!IsPrivileged() && !IsMain()) {\n    return false;\n  }\n\n  const bool primary_port_enabled = GetFlag(FLAGS_primary_port_http_enabled);\n  if (!primary_port_enabled && !IsPrivileged()) {\n    return false;\n  }\n\n  size_t last_len = 0;\n  auto* peer = socket_.get();\n  auto& conn_stats = tl_facade_stats->conn_stats;\n  do {\n    auto buf = io_buf_.AppendBuffer();\n    DCHECK(!buf.empty());\n\n    ::io::Result<size_t> recv_sz = peer->Recv(buf);\n    if (!recv_sz) {\n      return make_unexpected(recv_sz.error());\n    }\n    if (recv_sz == 0) {\n      // Peer closed connection.\n      return false;\n    }\n\n    io_buf_.CommitWrite(*recv_sz);\n    string_view ib = io::View(io_buf_.InputBuffer());\n    if (ib.size() >= 2 && ib[0] == 22 && ib[1] == 3) {\n      // We matched the TLS handshake raw data, which means \"peer\" is a TCP socket.\n      // Reject the connection.\n      return make_unexpected(make_error_code(errc::protocol_not_supported));\n    }\n\n    ib = ib.substr(last_len);\n    size_t pos = ib.find('\\n');\n    if (pos != string_view::npos) {\n      ib = io::View(io_buf_.InputBuffer().first(last_len + pos));\n      if (ib.size() < 10 || ib.back() != '\\r')\n        return false;\n\n      ib.remove_suffix(1);\n      return MatchHttp11Line(ib);\n    }\n    last_len = io_buf_.InputLen();\n    UpdateIoBufCapacity(io_buf_, &conn_stats, [&]() { io_buf_.EnsureCapacity(128); });\n  } while (last_len < 1024);\n\n  return false;\n}\n\nvoid Connection::ConnectionFlow() {\n  DCHECK(reply_builder_);\n  auto& conn_stats = tl_facade_stats->conn_stats;\n\n  // Register the new connection with the thread-local statistics.\n  // At this point (connection birth), local queue stats/luggage are 0,\n  // so only connection counts and buffer capacities are incremented.\n  IncreaseConnStats();\n  ++conn_stats.conn_received_cnt;\n\n  ++local_stats_.read_cnt;\n  local_stats_.net_bytes_in += io_buf_.InputLen();\n\n  ParserStatus parse_status = OK;\n\n  // At the start we read from the socket to determine the HTTP/Memstore protocol.\n  // Therefore we may already have some data in the buffer.\n  if (io_buf_.InputLen() > 0) {\n    phase_ = PROCESS;\n    if (redis_parser_) {\n      parse_status = ParseRedis(10000);\n    } else {\n      DCHECK(memcache_parser_);\n      parse_status = ParseLoop();\n    }\n  }\n\n  error_code ec = reply_builder_->GetError();\n\n  // Main loop.\n  if (parse_status != ERROR && !ec) {\n    UpdateIoBufCapacity(io_buf_, &conn_stats, [&]() { io_buf_.EnsureCapacity(64); });\n    variant<error_code, Connection::ParserStatus> res;\n    if (ioloop_v2_) {\n      // Everything above the IoLoopV2 is fiber blocking. A connection can migrate before\n      // it reaches here and will cause a double RegisterOnRecv check fail. To avoid this,\n      // a migration shall only call RegisterOnRecv if it reached the main IoLoopV2 below.\n      migration_allowed_to_register_ = true;\n      res = IoLoopV2();\n    } else {\n      res = IoLoop();\n    }\n\n    if (holds_alternative<error_code>(res)) {\n      ec = get<error_code>(res);\n    } else {\n      parse_status = get<ParserStatus>(res);\n    }\n  }\n\n  // After the client disconnected.\n  cc_->conn_closing = true;  // Signal dispatch to close.\n  cnd_.notify_one();\n  phase_ = SHUTTING_DOWN;\n  VLOG(2) << \"Before dispatch_fb.join()\";\n  async_fb_.JoinIfNeeded();\n  VLOG(2) << \"After dispatch_fb.join()\";\n\n  phase_ = PRECLOSE;\n\n  ClearPipelinedMessages();\n  DCHECK(!HasPendingMessages());\n\n  service_->OnConnectionClose(cc_.get());\n\n  // We have already cleared the queues above (ClearPipelinedMessages), so local queue stats\n  // (dispatch_q_bytes_, etc.) represent 0 usage. DecreaseConnStats will safely subtract 0 for those\n  // stats, while correctly removing this connection from the global connection counts and buffer\n  // capacity tracking.\n  DecreaseConnStats();\n\n  if (ioloop_v2_) {\n    socket_->ResetOnRecvHook();\n  }\n\n  // We wait for dispatch_fb to finish writing the previous replies before replying to the last\n  // offending request.\n  if (parse_status == ERROR) {\n    VLOG(1) << \"Error parser status \" << parser_error_;\n\n    if (redis_parser_) {\n      SendProtocolError(RespSrvParser::Result(parser_error_), reply_builder_.get());\n    } else {\n      DCHECK(memcache_parser_);\n      reply_builder_->SendProtocolError(\"bad command line format\");\n    }\n\n    // Shut down the servers side of the socket to send a FIN to the client\n    // then keep draining the socket (discarding any received data) until\n    // the client closes the connection.\n    //\n    // Otherwise the clients write could fail (or block), so they would never\n    // read the above protocol error (see issue #1327).\n    // TODO: we have a bug that can potentially deadlock the code below.\n    // If the socket does not close the socket on the other side, the while loop will never finish.\n    // to reproduce: nc localhost 6379  and then run invalid sequence: *1 <enter> *1 <enter>\n    error_code ec2 = socket_->Shutdown(SHUT_WR);\n    LOG_IF(WARNING, ec2) << \"Could not shutdown socket \" << ec2;\n    while (!ec2) {\n      // Discard any received data.\n      io_buf_.Clear();\n      auto recv_sz = socket_->Recv(io_buf_.AppendBuffer());\n      if (!recv_sz || *recv_sz == 0) {\n        break;  // Peer closed connection.\n      }\n    }\n  }\n\n  if (ec && !FiberSocketBase::IsConnClosed(ec)) {\n    string conn_info = service_->GetContextInfo(cc_.get()).Format();\n    LOG_EVERY_T(WARNING, 1) << \"Socket error for connection \" << conn_info << \" \" << GetName()\n                            << \" during phase \" << kPhaseName[phase_] << \" : \" << ec << \" \"\n                            << ec.message();\n  }\n}\n\nvoid Connection::DispatchSingle(bool has_more, absl::FunctionRef<void()> invoke_cb,\n                                absl::FunctionRef<void()> enqueue_cmd_cb) {\n  // Unconditional return when closing:\n  // else, non-throttled connections skip the check below and enqueue data even if they are closing.\n  // No one will read that data anyway.\n  if (cc_->conn_closing)\n    return;\n  auto can_dispatch_sync_fn = [this]() {\n    return !cc_->async_dispatch && !HasPendingMessages() && (cc_->subscriptions == 0);\n  };\n  bool optimize_for_async = has_more;\n  bool can_dispatch_sync = can_dispatch_sync_fn();\n  QueueBackpressure& qbp = GetQueueBackpressure();\n  ConnectionStats* conn_stats = &tl_facade_stats->conn_stats;\n  if ((optimize_for_async || !can_dispatch_sync) &&\n      qbp.IsPipelineBufferOverLimit(conn_stats->pipeline_queue_bytes, parsed_cmd_q_len_)) {\n    conn_stats->pipeline_throttle_count++;\n    LOG_EVERY_T(WARNING, 10) << \"Pipeline buffer over limit.\"\n                             << \", Thread pipeline_queue_bytes: \"\n                             << conn_stats->pipeline_queue_bytes\n                             << \", Thread pipeline_queue_entries: \"\n                             << conn_stats->pipeline_queue_entries\n                             << \", Connection parsed_cmd_q_bytes_: \" << parsed_cmd_q_bytes_\n                             << \", Connection parsed commands queue size: \" << parsed_cmd_q_len_\n                             << \", consider increasing pipeline_buffer_limit/pipeline_queue_limit\";\n    fb2::NoOpLock noop;\n    qbp.pipeline_cnd.wait(noop, [this, &qbp, &can_dispatch_sync_fn] {\n      // Wait until at least one is true:\n      // 1) Connection is closing.\n      // 2) Can dispatch synchronously.\n      // 3) Not over limits (for an async dispatch).\n      bool can_dispatch_sync = can_dispatch_sync_fn();\n      if (can_dispatch_sync)\n        return true;\n      bool over_limits = qbp.IsPipelineBufferOverLimit(\n          tl_facade_stats->conn_stats.pipeline_queue_bytes, parsed_cmd_q_len_);\n      return !over_limits || cc_->conn_closing;\n    });\n\n    // prefer synchronous dispatching to save memory.\n    optimize_for_async = false;\n    last_interaction_ = time(nullptr);\n  }\n\n  // Avoid sync dispatch if we can interleave with an ongoing async dispatch.\n  can_dispatch_sync = can_dispatch_sync_fn();\n\n  // Dispatch async if we're handling a pipeline or if we can't dispatch sync.\n  if (optimize_for_async || !can_dispatch_sync) {\n    LaunchAsyncFiberIfNeeded();\n    enqueue_cmd_cb();\n  } else {\n    ShrinkPipelinePool();  // Gradually release pipeline request pool.\n    {\n      ++local_stats_.cmds;\n      cc_->sync_dispatch = true;\n      invoke_cb();\n      cc_->sync_dispatch = false;\n    }\n    last_interaction_ = time(nullptr);\n\n    // We might have blocked the dispatch queue from processing, wake it up.\n    if (HasPendingMessages())\n      cnd_.notify_one();\n  }\n}\n\nConnection::ParserStatus Connection::ParseRedis(unsigned max_busy_cycles, bool enqueue_only) {\n  uint32_t consumed = 0;\n  RespSrvParser::Result result = RespSrvParser::OK;\n\n  auto dispatch_sync = [this] {\n    service_->DispatchCommand(ParsedArgs{*parsed_cmd_}, parsed_cmd_,\n                              facade::AsyncPreference::ONLY_SYNC);\n  };\n  auto dispatch_async = [this]() -> void {\n    PipelineMessagePtr ptr = GetFromPoolOrCreate();\n    // parsed_cmd_ holds the parsed arguments. Move it to 'cmd' to be enqueued and set it with a new\n    // empty ParsedCommand for the next parse.\n    auto* cmd = std::exchange(parsed_cmd_, ptr.release());\n    EnqueueParsedCommand(cmd);\n  };\n  io::Bytes read_buffer = io_buf_.InputBuffer();\n  // Keep track of total bytes consumed/parsed. The do/while{} loop below preempts,\n  // and InputBuffer() size might change between preemption points. There is a corner case,\n  // that ConsumeInput() will strip a portion of the request which makes the test_publish_stuck\n  // test fail.\n  // TODO(kostas): follow up on this\n  size_t total_consumed = 0;\n  do {\n    DCHECK(parsed_cmd_);\n    result = redis_parser_->Parse(read_buffer, &consumed, parsed_cmd_);\n    request_consumed_bytes_ += consumed;\n    total_consumed += consumed;\n    if (result == RespSrvParser::OK) {\n      DCHECK(!parsed_cmd_->empty());\n      DVLOG(2) << \"Got Args with first token \" << parsed_cmd_->Front();\n\n      if (io_req_size_hist)\n        io_req_size_hist->Add(request_consumed_bytes_);\n      request_consumed_bytes_ = 0;\n      bool has_more = consumed < read_buffer.size();\n\n      if (tl_traffic_logger.log_file && IsMain() /* log only on the main interface */) {\n        LogTraffic(id_, has_more, *parsed_cmd_, service_->GetContextInfo(cc_.get()));\n      }\n\n      if (enqueue_only)\n        dispatch_async();\n      else\n        DispatchSingle(has_more, dispatch_sync, dispatch_async);\n    }\n    if (result != RespSrvParser::OK && result != RespSrvParser::INPUT_PENDING) {\n      // We do not expect that a replica sends an invalid command so we log if it happens.\n      LOG_IF(WARNING, cntx()->replica_conn)\n          << \"Redis parser error: \" << result << \" during parse: \" << io::View(read_buffer);\n    }\n    read_buffer.remove_prefix(consumed);\n\n    // We must yield from time to time to allow other fibers to run.\n    // Specifically, if a client sends a huge chunk of data resulting in a very long pipeline,\n    // we want to yield to allow AsyncFiber to actually execute on the pending pipeline.\n    if (ThisFiber::GetRunningTimeCycles() > max_busy_cycles) {\n      GetLocalConnStats().num_read_yields++;\n      ThisFiber::Yield();\n    }\n  } while (RespSrvParser::OK == result && read_buffer.size() > 0 && !reply_builder_->GetError());\n\n  io_buf_.ConsumeInput(total_consumed);\n\n  parser_error_ = result;\n  if (result == RespSrvParser::OK)\n    return OK;\n\n  if (result == RespSrvParser::INPUT_PENDING) {\n    DCHECK_EQ(read_buffer.size(), 0u);\n\n    return NEED_MORE;\n  }\n\n  VLOG(1) << \"Parser error \" << result;\n\n  return ERROR;\n}\n\nauto Connection::ParseLoop() -> ParserStatus {\n  auto parse_func =\n      protocol_ == Protocol::MEMCACHE ? &Connection::ParseMCBatch : &Connection::ParseRedisBatch;\n\n  bool commands_parsed = false;\n  do {\n    commands_parsed = (this->*parse_func)();\n\n    if (!ExecuteBatch())\n      return ERROR;\n\n    if (!ReplyBatch())\n      return ERROR;\n  } while (commands_parsed && io_buf_.InputLen() > 0);\n\n  return commands_parsed ? OK : NEED_MORE;\n}\n\nvoid Connection::OnBreakCb(int32_t mask) {\n  if (mask <= 0)\n    return;  // we cancelled the poller, which means we do not need to break from anything.\n\n  if (!cc_) {\n    LOG(ERROR) << \"Unexpected event \" << mask;\n    return;\n  }\n\n  DCHECK(reply_builder_) << \"[\" << id_ << \"] \" << phase_ << \" \" << migration_in_process_;\n\n  VLOG(1) << \"[\" << id_ << \"] Got event \" << mask << \" \" << phase_ << \" \"\n          << reply_builder_->IsSendActive() << \" \" << reply_builder_->GetError();\n\n  cc_->conn_closing = true;\n  BreakOnce(mask);\n  cnd_.notify_one();  // Notify dispatch fiber.\n}\n\nvoid Connection::HandleMigrateRequest() {\n  if (cc_->conn_closing || !migration_request_) {\n    return;\n  }\n  ProactorBase* dest = migration_request_;\n\n  if (async_fb_.IsJoinable()) {\n    SendAsync({MigrationRequestMessage{}});\n    async_fb_.Join();\n  }\n\n  // We don't support migrating with subscriptions as it would require moving thread local\n  // handles. We can't check above, as the queue might have contained a subscribe request.\n\n  if (cc_->subscriptions == 0) {\n    // RegisterOnErrorCb might be called on POLLHUP and the join above is a preemption point.\n    // So, it could be the case that after this fiber wakes up the connection might be closing.\n    if (cc_->conn_closing) {\n      return;\n    }\n\n    tl_facade_stats->conn_stats.num_migrations++;\n    migration_request_ = nullptr;\n\n    // We need to return early as the socket is closing and IoLoop will clean up.\n    // The reason that this is true is because of the following DCHECK\n    DCHECK(!async_fb_.IsJoinable());\n\n    // which can never trigger since we Joined on the async_fb_ above and we are\n    // atomic in respect to our proactor meaning that no other fiber will\n    // launch the DispatchFiber.\n    std::ignore = !this->Migrate(dest);\n  }\n}\n\nio::Result<size_t> Connection::HandleRecvSocket() {\n  phase_ = READ_SOCKET;\n  auto& conn_stats = tl_facade_stats->conn_stats;\n\n  io::MutableBytes append_buf = io_buf_.AppendBuffer();\n  DCHECK(!append_buf.empty());\n  ::io::Result<size_t> recv_sz = socket_->Recv(append_buf);\n  last_interaction_ = time(nullptr);\n\n  // In case the socket was closed orderly, we get 0 bytes read.\n  if (recv_sz && *recv_sz) {\n    size_t commit_sz = *recv_sz;\n    io_buf_.CommitWrite(commit_sz);\n\n    conn_stats.io_read_bytes += commit_sz;\n    local_stats_.net_bytes_in += commit_sz;\n\n    ++conn_stats.io_read_cnt;\n    ++local_stats_.read_cnt;\n  }\n  return recv_sz;\n}\n\nvariant<error_code, Connection::ParserStatus> Connection::IoLoop() {\n  error_code ec;\n  ParserStatus parse_status = OK;\n  size_t max_iobfuf_len = GetFlag(FLAGS_max_client_iobuf_len);\n\n  auto* peer = socket_.get();\n  recv_buf_.res_len = 0;\n\n  do {\n    HandleMigrateRequest();\n    auto recv_sz = HandleRecvSocket();\n    if (!recv_sz) {\n      LOG_IF(WARNING, cntx()->replica_conn) << \"HandleRecvSocket() error: \" << recv_sz.error();\n      return recv_sz.error();\n    }\n    if (*recv_sz == 0) {\n      break;\n    }\n\n    phase_ = PROCESS;\n    bool is_iobuf_full = io_buf_.AppendLen() == 0;\n\n    if (redis_parser_) {\n      parse_status = ParseRedis(max_busy_read_cycles_cached);\n    } else {\n      DCHECK(memcache_parser_);\n      parse_status = ParseLoop();\n    }\n\n    if (reply_builder_->GetError()) {\n      return reply_builder_->GetError();\n    }\n\n    if (parse_status == NEED_MORE) {\n      parse_status = OK;\n\n      size_t capacity = io_buf_.Capacity();\n      if (capacity < max_iobfuf_len) {\n        size_t parser_hint = 0;\n        if (redis_parser_)\n          parser_hint = redis_parser_->parselen_hint();  // Could be done for MC as well.\n\n        // If we got a partial request and we managed to parse its\n        // length, make sure we have space to store it instead of\n        // increasing space incrementally.\n        // (Note: The buffer object is only working in power-of-2 sizes,\n        // so there's no danger of accidental O(n^2) behavior.)\n        if (parser_hint > capacity) {\n          auto& conn_stats = GetLocalConnStats();\n          UpdateIoBufCapacity(io_buf_, &conn_stats,\n                              [&]() { io_buf_.Reserve(std::min(max_iobfuf_len, parser_hint)); });\n        }\n\n        // If we got a partial request because iobuf was full, grow it up to\n        // a reasonable limit to save on Recv() calls.\n        if (is_iobuf_full && capacity < max_iobfuf_len / 2) {\n          auto& conn_stats = GetLocalConnStats();\n          // Last io used most of the io_buf to the end.\n          UpdateIoBufCapacity(io_buf_, &conn_stats, [&]() {\n            io_buf_.Reserve(capacity * 2);  // Valid growth range.\n          });\n        }\n\n        if (io_buf_.AppendLen() == 0U) {\n          // it can happen with memcached but not for RedisParser, because RedisParser fully\n          // consumes the passed buffer\n          LOG_EVERY_T(WARNING, 10)\n              << \"Maximum io_buf length reached, consider to increase max_client_iobuf_len flag\";\n        }\n      }\n    } else if (parse_status != OK) {\n      break;\n    }\n  } while (peer->IsOpen());\n\n  return parse_status;\n}\n\nbool Connection::ShouldEndAsyncFiber(const MessageHandle& msg) {\n  if (!holds_alternative<MigrationRequestMessage>(msg.handle)) {\n    return false;\n  }\n\n  if (!HasPendingMessages()) {\n    // Migration requests means we should terminate this function (and allow the fiber to\n    // join), so that we can re-launch the fiber in the new thread.\n    // We intentionally return and not break in order to keep the connection open.\n    return true;\n  }\n\n  // There shouldn't be any other migration requests in the queue, but it's worth checking\n  // as otherwise it would lead to an endless loop.\n  bool has_migration_req =\n      any_of(dispatch_q_.begin(), dispatch_q_.end(), [](const MessageHandle& msg) {\n        return holds_alternative<MigrationRequestMessage>(msg.handle);\n      });\n  if (!has_migration_req) {\n    SendAsync({MigrationRequestMessage{}});\n  }\n\n  return false;\n}\n\nvoid Connection::SquashPipeline() {\n  DCHECK_EQ(GetPendingMessageCount(), parsed_cmd_q_len_);\n  DCHECK_EQ(reply_builder_->GetProtocol(), Protocol::REDIS);  // Only Redis is supported.\n  unsigned pipeline_count = std::min<uint32_t>(parsed_cmd_q_len_, pipeline_squash_limit_cached);\n  auto& conn_stats = tl_facade_stats->conn_stats;\n\n  uint64_t start = CycleClock::Now();\n\n  // Define a \"Feeder\" Lambda\n  // This lambda advances a temporary pointer exec_cmd_ptr to feed the execution engine.\n  // We do not modify parsed_to_execute_ yet, in case execution throws/fails.\n  auto exec_cmd_ptr{parsed_to_execute_};\n  auto get_next_fn = [&exec_cmd_ptr]() mutable -> ParsedArgs {\n    DCHECK(exec_cmd_ptr);\n    return ParsedArgs{*std::exchange(exec_cmd_ptr, exec_cmd_ptr->next)};\n  };\n\n  // async_dispatch is a guard to prevent concurrent writes into reply_builder_, hence\n  // it must guard the Flush() as well.\n  cc_->async_dispatch = true;\n\n  DispatchManyResult result =\n      service_->DispatchManyCommands(get_next_fn, pipeline_count, reply_builder_.get(), cc_.get());\n\n  local_stats_.cmds += result.processed;\n  last_interaction_ = time(nullptr);\n  uint32_t num_dispatched_cmds = result.processed;\n  uint64_t flush_start_cycle_cnt = CycleClock::Now();\n  //\n  // TODO: to investigate if always flushing will improve P99 latency because otherwise we\n  // wait for the next batch to finish before fully flushing the current response.\n  if (parsed_cmd_q_len_ == pipeline_count ||\n      always_flush_pipeline_cached) {  // Flush if no new commands appeared\n    reply_builder_->Flush();\n    reply_builder_->SetBatchMode(false);  // in case the next dispatch is sync\n  } else {\n    conn_stats.skip_pipeline_flushing++;\n  }\n\n  cc_->async_dispatch = false;\n\n  if (result.account_in_stats) {\n    conn_stats.pipeline_dispatch_calls++;\n    conn_stats.pipeline_dispatch_commands += num_dispatched_cmds;\n    conn_stats.pipeline_dispatch_flush_usec +=\n        CycleClock::ToUsec(CycleClock::Now() - flush_start_cycle_cnt);\n  }\n\n  auto* current{parsed_head_};\n  for (size_t i = 0; (i < num_dispatched_cmds) && current; ++i) {\n    auto* next{current->next};\n\n    if (result.account_in_stats) {\n      conn_stats.pipelined_wait_latency += CycleClock::ToUsec(start - current->parsed_cycle);\n    }\n\n    ReleaseParsedCommand(current, result.account_in_stats /* is_pipelined */);\n    current = next;\n  }\n  parsed_head_ = current;\n  if (!parsed_head_) {\n    parsed_tail_ = nullptr;\n  }\n  parsed_to_execute_ = parsed_head_;\n\n  // If interrupted due to pause, fall back to regular dispatch\n  skip_next_squashing_ = (num_dispatched_cmds != pipeline_count);\n}\n\nvoid Connection::ClearPipelinedMessages() {\n  AsyncOperations async_op{reply_builder_.get(), this};\n\n  // First, clear dispatch queue\n  // Recycle messages even from disconnecting client to keep properly track of memory stats\n  // As well as to avoid pubsub backpressure leakage.\n  for (auto& msg : dispatch_q_) {\n    FiberAtomicGuard guard;  // don't suspend when concluding to avoid getting new messages\n    if (msg.IsCheckPoint())\n      visit(async_op, msg.handle);  // to not miss checkpoints\n    UpdateDispatchStats(msg, false /* subtract */);\n  }\n\n  dispatch_q_.clear();\n\n  // Second, drain the pending pipeline queue: release memory and update stats without executing\n  // commands.\n  while (parsed_head_) {\n    auto* curr{parsed_head_};\n    parsed_head_ = parsed_head_->next;\n\n    // Wait for the in-flight async commands processing by consumer to finish before recycling.\n    if (curr->IsDeferredReply() && !curr->CanReply()) {\n      curr->Blocker()->Wait();\n    }\n\n    ReleaseParsedCommand(curr, false);\n  }\n\n  DCHECK_EQ(parsed_cmd_q_len_, 0u);\n  DCHECK_EQ(parsed_cmd_q_bytes_, 0u);\n  parsed_tail_ = nullptr;\n  parsed_to_execute_ = nullptr;\n\n  QueueBackpressure& qbp = GetQueueBackpressure();\n  qbp.pipeline_cnd.notify_all();\n  qbp.pubsub_ec.notifyAll();\n}\n\nstring Connection::DebugInfo() const {\n  string info = \"{\";\n\n  absl::StrAppend(&info, \"id=\", id_, \", \");\n  absl::StrAppend(&info, \"phase=\", phase_, \", \");\n  if (cc_) {\n    // In some rare cases cc_ can be null, see https://github.com/dragonflydb/dragonfly/pull/3873\n    absl::StrAppend(&info, \"dispatch(s/a)=\", cc_->sync_dispatch, \" \", cc_->async_dispatch, \", \");\n    absl::StrAppend(&info, \"closing=\", cc_->conn_closing, \", \");\n  }\n  absl::StrAppend(&info, \"df:joinable=\", async_fb_.IsJoinable(), \", \");\n\n  absl::StrAppend(&info, \"dq:size=\", dispatch_q_.size(), \", \");\n  absl::StrAppend(&info, \"pq:parsed_cmd_q_len=\", parsed_cmd_q_len_, \", \");\n  absl::StrAppend(&info, \"pq:is_empty=\", (parsed_head_ == nullptr), \", \");\n\n  if (cc_) {\n    absl::StrAppend(&info, \"state=\");\n    if (cc_->paused)\n      absl::StrAppend(&info, \"p\");\n    if (cc_->blocked)\n      absl::StrAppend(&info, \"b\");\n  }\n  time_t now = time(nullptr);\n  absl::StrAppend(&info, \" age=\", now - creation_time_, \" idle=\", now - last_interaction_, \"}\");\n\n  return info;\n}\n\nbool Connection::ProcessAdminMessage(MessageHandle* msg, AsyncOperations* async_op) {\n  // Guard: Automatically subtract stats when this scope exits (via return or exception).\n  absl::Cleanup stats_guard = [this, msg] { UpdateDispatchStats(*msg, false /* subtract */); };\n  bool is_replying = msg->IsReplying();\n\n  // Pre-execution Flush\n  // If this is a non-replying control message (e.g. Migration) and it's the last item,\n  // we MUST flush the buffer now. Otherwise, previous pipelined replies might wait\n  // indefinitely or be lost if the fiber terminates.\n  if (!HasPendingMessages() && !is_replying) {\n    reply_builder_->Flush();\n  }\n\n  // Fiber Termination Check\n  if (ShouldEndAsyncFiber(*msg)) {\n    CHECK(!HasPendingMessages()) << DebugInfo();\n    GetQueueBackpressure().pipeline_cnd.notify_all();\n    return true;  // Signal to terminate AsyncFiber\n  }\n\n  // Execution\n  auto replies_recorded_before = reply_builder_->RepliesRecorded();\n  cc_->async_dispatch = true;\n  std::visit(*async_op, msg->handle);\n  cc_->async_dispatch = false;\n\n  // Post-execution Flush\n  // We force a flush If the message is supposed to reply (e.g. PubSub) but didn't write to the\n  // buffer (e.g. subscription filter), and the queues are empty.\n  if (!HasPendingMessages() && is_replying &&\n      (replies_recorded_before == reply_builder_->RepliesRecorded())) {\n    reply_builder_->Flush();\n  }\n  return false;\n}\n\nvoid Connection::ProcessPipelineCommand() {\n  DCHECK(parsed_head_ && parsed_to_execute_) << DebugInfo();\n  auto* cmd = parsed_to_execute_;\n  parsed_to_execute_ = cmd->next;\n  parsed_head_ = parsed_to_execute_;\n  if (!parsed_head_) {\n    parsed_tail_ = nullptr;\n  }\n\n  tl_facade_stats->conn_stats.pipelined_wait_latency +=\n      CycleClock::ToUsec(CycleClock::Now() - cmd->parsed_cycle);\n\n  cc_->async_dispatch = true;\n  local_stats_.cmds++;\n  service_->DispatchCommand(ParsedArgs{*cmd}, cmd, facade::AsyncPreference::ONLY_SYNC);\n  last_interaction_ = time(nullptr);\n  skip_next_squashing_ = false;\n  cc_->async_dispatch = false;\n\n  ReleaseParsedCommand(cmd, true);\n\n  // If we drained the pipeline and no admin messages are waiting, flush.\n  if (!HasPendingMessages()) {\n    reply_builder_->Flush();\n  }\n}\n\n// AsyncFiber acts as the consumer for all asynchronous connection tasks.\n//\n// It operates on a producer-consumer model where the InputLoop parses socket data\n// and routes it into two distinct streams:\n// 1. Data Path: Pipelined commands are queued in a Parsed Commands linked list\n// 2. Control Path: Admin events (Migrations, Checkpoints, PubSub) use a deque (dispatch_q_)\n//\n// AsyncFiber drains these queues according to system prioritization, ensuring\n// high-priority events are handled promptly while preventing priority inversion\n// during thread migrations. For simple requests, the InputLoop may bypass this\n// fiber and dispatch synchronously to minimize latency.\nvoid Connection::AsyncFiber() {\n  ThisFiber::SetName(\"AsyncFiber\");\n\n  AsyncOperations async_op{reply_builder_.get(), this};\n  size_t squashing_threshold = GetFlag(FLAGS_pipeline_squash);\n  uint64_t prev_epoch = fb2::FiberSwitchEpoch();\n  fb2::NoOpLock noop_lk;\n  QueueBackpressure& qbp = GetQueueBackpressure();\n  auto& conn_stats = tl_facade_stats->conn_stats;\n  uint32_t dispatch_q_cmd_processed = 0;\n  uint32_t async_dispatch_quota = GetFlag(FLAGS_async_dispatch_quota);\n\n  while (!reply_builder_->GetError()) {\n    DCHECK_EQ(socket()->proactor(), ProactorBase::me());\n    cnd_.wait(noop_lk, [this] {\n      if (cc_->conn_closing)\n        return true;\n\n      // If we are currently executing a synchronous dispatch (e.g. inside IoLoop),\n      // we must wait until it finishes to avoid race conditions.\n      if (cc_->sync_dispatch)\n        return false;\n\n      // For Memcache, we ONLY wake up for Admin messages (dispatch_q_) as we process\n      // parsed_head_  in the connection fiber. For RESP, we wake up for both queues.\n      if (protocol_ == Protocol::MEMCACHE) {\n        return !dispatch_q_.empty();\n      }\n      return HasPendingMessages();\n    });\n\n    if (cc_->conn_closing)\n      break;\n\n    // We really want to have batching in the builder if possible. This is especially\n    // critical in situations where Nagle's algorithm can introduce unwanted high\n    // latencies. However we can only batch if we're sure that there are more commands\n    // on the way that will trigger a flush. To know if there are, we sometimes yield before\n    // executing the last command in the queue and let the producer fiber push more commands if it\n    // wants to.\n    // As an optimization, we only yield if the fiber was not suspended since the last dispatch.\n    uint64_t cur_epoch = fb2::FiberSwitchEpoch();\n    if ((GetPendingMessageCount() == 1) && (cur_epoch == prev_epoch)) {\n      if (pipeline_wait_batch_usec > 0) {\n        ThisFiber::SleepFor(chrono::microseconds(pipeline_wait_batch_usec));\n      } else {\n        ThisFiber::Yield();\n      }\n      DVLOG(2) << \"After yielding to producer, parsed_cmd_q_len_=\" << parsed_cmd_q_len_\n               << \" dispatch_q size=\" << dispatch_q_.size();\n      if (cc_->conn_closing)\n        break;\n    }\n    prev_epoch = cur_epoch;\n\n    reply_builder_->SetBatchMode(GetPendingMessageCount() > 1);\n\n    bool subscriber_over_limit =\n        conn_stats.dispatch_queue_subscriber_bytes >= qbp.publish_buffer_limit;\n\n    // The below if/else conditionally choose between 3 message processing policies:\n    // 1. Pipeline squashing\n    // 2. Process pipeline queue\n    // 3. Process admin queue\n    //\n    // Special case: if the dispatch queue accumulated a big number of commands,\n    // we can try to squash them\n    // It is only enabled if the threshold is reached and the whole dispatch queue\n    // consists only of commands (no pubsub or monitor messages)\n    bool squashing_enabled = squashing_threshold > 0;\n    bool threshold_reached = parsed_cmd_q_len_ > squashing_threshold;\n    if (squashing_enabled && threshold_reached && dispatch_q_.empty() && !skip_next_squashing_ &&\n        !IsReplySizeOverLimit()) {  // 1. Pipeline squashing\n      SquashPipeline();\n      dispatch_q_cmd_processed = 0;\n    } else {\n      MessageHandle msg;\n\n      // If the front message is a Migration Request, but we still have pipeline data\n      // (parsed_head_), we must block the migration and process the pipeline messages first.\n      bool is_migration_req =\n          !dispatch_q_.empty() &&\n          std::holds_alternative<MigrationRequestMessage>(dispatch_q_.front().handle);\n\n      // If the quota is reached but the pipeline appears empty, we must yield to the IoLoop\n      // (producer). This allows the discovery and parsing of commands potentially sitting in the\n      // TCP buffer. Without this yield, AsyncFiber would monopolize the CPU, starving the IoLoop\n      // and remaining blind to pending pipeline data.\n      bool quota_reached =\n          (async_dispatch_quota > 0) && (dispatch_q_cmd_processed >= async_dispatch_quota);\n      if (quota_reached && (parsed_head_ == nullptr)) {\n        ThisFiber::Yield();\n\n        // If it is STILL empty after IoLoop got a chance to run, the client hasn't sent anything.\n        // Reset the counter so we don't yield on every single loop.\n        if (parsed_head_ == nullptr) {\n          dispatch_q_cmd_processed = 0;\n        }\n      }\n\n      // We prioritize pipeline execution over the admin queue in two distinct cases (Pipeline queue\n      // must be non-empty for both cases):\n      // 1. A migration is requested (Redis only), but we must drain the existing\n      // pipeline first.\n      // 2.  The dispatch quota was reached, forcing a pipeline execution to prevent\n      // starvation.\n      bool prefer_pipeline_execution = false;\n      if (parsed_head_ != nullptr) {\n        prefer_pipeline_execution =\n            quota_reached || (is_migration_req && (protocol_ == Protocol::REDIS));\n      }\n      if (dispatch_q_.empty() || prefer_pipeline_execution) {  // 2. Process pipeline Queue\n        VLOG_IF(1, prefer_pipeline_execution)\n            << \"[\" << id_ << \"] Preferring pipeline execution over admin queue. \"\n            << \"Migration requested: \" << is_migration_req\n            << \", dispatch quota reached: \" << quota_reached\n            << \", async_dispatch_quota: \" << async_dispatch_quota\n            << \", dispatch_q_cmd_processed: \" << dispatch_q_cmd_processed;\n        ProcessPipelineCommand();\n        dispatch_q_cmd_processed = 0;\n      } else {  // 3. Process admin Queue\n        msg = std::move(dispatch_q_.front());\n        dispatch_q_.pop_front();\n        dispatch_q_cmd_processed++;\n\n        // Execute and check if we need to terminate the fiber\n        if (ProcessAdminMessage(&msg, &async_op)) {\n          return;  // don't set conn closing flag\n        }\n      }\n    }\n\n    // Notify waiters if backpressure constraints are relieved.\n    // 1. Global memory (bytes) is under limit -> Wakes up neighbors on this thread.\n    // 2. Local queue (length) is under limit -> Wakes up this connection's producer.\n    if (qbp.IsPipelineBufferUnderLimit(conn_stats.pipeline_queue_bytes, parsed_cmd_q_len_) ||\n        !HasPendingMessages()) {\n      qbp.pipeline_cnd.notify_all();\n    }\n\n    if (subscriber_over_limit &&\n        conn_stats.dispatch_queue_subscriber_bytes < qbp.publish_buffer_limit)\n      qbp.pubsub_ec.notify();\n  }\n\n  DCHECK(cc_->conn_closing || reply_builder_->GetError());\n\n  cc_->conn_closing = true;\n  qbp.pipeline_cnd.notify_all();\n\n  // If shutdown was requested, we need to break the receive call in case the i/o fiber\n  // is blocked there. With io loop v2, we can have a different mechanism to break from recv flow.\n  if (request_shutdown_) {\n    ShutdownSelfBlocking();\n  }\n}\n\nvoid Connection::ShrinkPipelinePool() {\n  if (pipeline_req_pool_.empty())\n    return;\n  auto& conn_stats = tl_facade_stats->conn_stats;\n\n  if (tl_pipe_cache_sz_tracker.CheckAndUpdateWatermark(pipeline_req_pool_.size())) {\n    conn_stats.pipeline_cmd_cache_bytes -= UsedMemoryInternal(*pipeline_req_pool_.back());\n    pipeline_req_pool_.pop_back();\n  }\n}\n\nConnection::PipelineMessagePtr Connection::GetFromPoolOrCreate() {\n  if (pipeline_req_pool_.empty())\n    return PipelineMessagePtr{CreateParsedCommand()};\n  auto& conn_stats = tl_facade_stats->conn_stats;\n\n  auto ptr = std::move(pipeline_req_pool_.back());\n  pipeline_req_pool_.pop_back();\n\n  conn_stats.pipeline_cmd_cache_bytes -= UsedMemoryInternal(*ptr);\n  ptr->ResetForReuse();\n\n  ptr->Init(reply_builder_.get(), cc_.get());\n  ptr->ConfigureMCExtension(protocol_ == Protocol::MEMCACHE);\n\n  return ptr;\n}\n\nvoid Connection::ShutdownSelfBlocking() {\n  util::Connection::Shutdown();\n}\n\nbool Connection::Migrate(util::fb2::ProactorBase* dest) {\n  // Migrate is used only by replication, so it doesn't have properties of full-fledged\n  // connections\n  CHECK(!cc_->async_dispatch);\n  CHECK_EQ(cc_->subscriptions, 0);  // are bound to thread local caches\n  CHECK_EQ(self_.use_count(), 1u);  // references cache our thread and backpressure\n                                    //\n  if (ioloop_v2_ && socket_ && socket_->IsOpen()) {\n    socket_->ResetOnRecvHook();\n  }\n\n  // Migrate is only used by DFLY Thread and Flow command which both check against\n  // the result of Migration and handle it explicitly in their flows so this can act\n  // as a weak if condition instead of a crash prone CHECK.\n  if (async_fb_.IsJoinable() || cc_->conn_closing) {\n    return false;\n  }\n\n  listener()->Migrate(this, dest);\n\n  // After we migrate, it could be the case the connection was shut down. We should\n  // act accordingly.\n  if (!socket()->IsOpen()) {\n    return false;\n  }\n\n  return true;\n}\n\nConnection::WeakRef Connection::Borrow() {\n  DCHECK(self_);\n\n  return {self_, unsigned(socket_->proactor()->GetPoolIndex()), id_};\n}\n\nvoid Connection::ShutdownThreadLocal() {\n  pipeline_req_pool_.clear();\n}\n\nbool Connection::IsCurrentlyDispatching() const {\n  if (!cc_)\n    return false;\n\n  return cc_->async_dispatch || cc_->sync_dispatch;\n}\n\nvoid Connection::SendPubMessageAsync(PubMessage msg) {\n  SendAsync({make_unique<PubMessage>(std::move(msg))});\n}\n\nvoid Connection::SendMonitorMessageAsync(string msg) {\n  SendAsync({MonitorMessage{std::move(msg)}});\n}\n\nvoid Connection::SendCheckpoint(fb2::BlockingCounter bc, bool ignore_paused, bool ignore_blocked) {\n  if (!IsCurrentlyDispatching())\n    return;\n\n  if (cc_->paused && ignore_paused)\n    return;\n\n  if (cc_->blocked && ignore_blocked)\n    return;\n\n  VLOG(2) << \"Sent checkpoint to \" << DebugInfo();\n\n  bc->Add(1);\n  SendAsync({CheckpointMessage{bc}});\n}\n\nvoid Connection::SendInvalidationMessageAsync(InvalidationMessage msg) {\n  SendAsync({std::move(msg)});\n}\n\nvoid Connection::LaunchAsyncFiberIfNeeded() {\n  if (!async_fb_.IsJoinable() && !migration_in_process_) {\n    VLOG(1) << \"[\" << id_ << \"] LaunchAsyncFiberIfNeeded \";\n    async_fb_ = fb2::Fiber(fb2::Launch::post, \"connection_dispatch\", [this]() { AsyncFiber(); });\n  }\n}\n\n// SendAsync is now strictly for the Control Path (Admin/Events).\n// Pipeline commands are handled separately via EnqueueParsedCommand to maintain\n// clean separation between Data and Control paths.\n// Note: Should never block - the callers may run in as a brief callback.\nvoid Connection::SendAsync(MessageHandle msg) {\n  DCHECK(cc_);\n  DCHECK(listener());\n  DCHECK_EQ(ProactorBase::me(), socket_->proactor());\n  auto& conn_stats = tl_facade_stats->conn_stats;\n\n  // \"Closing\" connections might be still processing commands, as we don't interrupt them.\n  // So we still want to deliver control messages to them (like checkpoints) if\n  // async_fb_ is running (joinable).\n  if (cc_->conn_closing && (!msg.IsCheckPoint() || !async_fb_.IsJoinable()))\n    return;\n\n  // If we launch while closing, it won't be awaited. Control messages will be processed on cleanup.\n  if (!cc_->conn_closing) {\n    LaunchAsyncFiberIfNeeded();\n  }\n  DCHECK_NE(phase_, PRECLOSE);  // No more messages are processed after this point\n\n  // Close MONITOR connection if we overflow limits.\n  // We must check the Thread-Global memory usage of BOTH:\n  // 1. The Control Path (dispatch_queue_bytes)\n  // 2. The Data Path (pipeline_queue_bytes)\n  if (msg.IsMonitor()) {\n    if (GetQueueBackpressure().IsPipelineBufferOverLimit(\n            conn_stats.dispatch_queue_bytes + conn_stats.pipeline_queue_bytes,\n            GetPendingMessageCount())) {\n      cc_->conn_closing = true;\n      request_shutdown_ = true;\n      // We don't shutdown here. The reason is that TLS socket is preemptive\n      // and SendAsync is atomic.\n      cnd_.notify_one();\n      return;\n    }\n  }\n\n  local_stats_.dispatch_entries_added++;\n  UpdateDispatchStats(msg, true /* add */);\n  msg.dispatch_cycle = CycleClock::Now();\n\n  // Admin Queueing Rules:\n  // Checkpoints go to the front (after existing checkpoints), while all others to the back.\n  bool had_pending_messages = HasPendingMessages();  // check the queues before enqueuing\n  if (msg.IsCheckPoint()) {\n    auto it = dispatch_q_.begin();\n    while (it < dispatch_q_.end() && it->IsCheckPoint())\n      ++it;\n    dispatch_q_.insert(it, std::move(msg));\n  } else {\n    dispatch_q_.push_back(std::move(msg));\n  }\n\n  // Control Path Notification:\n  // We need to wake up the AsyncFiber only if it is currently sleeping.\n  // 1. Memcache: Sleeps if dispatch_q_ is empty. Must notify on 0->1 transition.\n  // 2. Redis: Sleeps if BOTH queues are empty. If pipeline has items, it's already awake.\n  bool should_notify = false;\n  if (protocol_ == Protocol::REDIS) {\n    if (!had_pending_messages) {\n      should_notify = true;\n    }\n  } else {  // MEMCACHE\n    should_notify = (dispatch_q_.size() == 1);\n  }\n\n  if (should_notify && !cc_->sync_dispatch) {\n    cnd_.notify_one();\n  }\n}\n\nvoid Connection::UpdateDispatchStats(const MessageHandle& msg, bool add) {\n  size_t mem = msg.UsedMemory();\n  auto& qbp = GetQueueBackpressure();\n  auto& conn_stats = tl_facade_stats->conn_stats;\n  if (add) {\n    conn_stats.dispatch_queue_entries++;\n    conn_stats.dispatch_queue_bytes += mem;\n    dispatch_q_bytes_ += mem;\n    if (msg.IsPubMsg()) {\n      qbp.subscriber_bytes.fetch_add(mem, std::memory_order_relaxed);\n      conn_stats.dispatch_queue_subscriber_bytes += mem;\n      dispatch_q_subscriber_bytes_ += mem;\n    }\n  } else {\n    DCHECK_GT(conn_stats.dispatch_queue_entries, 0u);\n    DCHECK_GE(conn_stats.dispatch_queue_bytes, mem);\n    conn_stats.dispatch_queue_entries--;\n    conn_stats.dispatch_queue_bytes -= mem;\n    dispatch_q_bytes_ -= mem;\n    if (msg.IsPubMsg()) {\n      DCHECK_GE(conn_stats.dispatch_queue_subscriber_bytes, mem);\n      DCHECK_GE(qbp.subscriber_bytes.load(std::memory_order_relaxed), mem);\n      qbp.subscriber_bytes.fetch_sub(mem, std::memory_order_relaxed);\n      conn_stats.dispatch_queue_subscriber_bytes -= mem;\n      dispatch_q_subscriber_bytes_ -= mem;\n    }\n  }\n}\n\nstd::string Connection::LocalBindStr() const {\n  if (socket_->IsUDS())\n    return \"unix-domain-socket\";\n\n  auto le = socket_->LocalEndpoint();\n  return absl::StrCat(le.address().to_string(), \":\", le.port());\n}\n\nstd::string Connection::LocalBindAddress() const {\n  if (socket_->IsUDS())\n    return \"unix-domain-socket\";\n\n  auto le = socket_->LocalEndpoint();\n  return le.address().to_string();\n}\n\nstd::string Connection::RemoteEndpointStr() const {\n  if (socket_->IsUDS())\n    return \"unix-domain-socket\";\n\n  auto re = socket_->RemoteEndpoint();\n  return absl::StrCat(re.address().to_string(), \":\", re.port());\n}\n\nstd::string Connection::RemoteEndpointAddress() const {\n  if (socket_->IsUDS())\n    return \"unix-domain-socket\";\n\n  auto re = socket_->RemoteEndpoint();\n  return re.address().to_string();\n}\n\nfacade::ConnectionContext* Connection::cntx() {\n  return cc_.get();\n}\n\nvoid Connection::RequestAsyncMigration(util::fb2::ProactorBase* dest, bool force) {\n  if ((!force && !migration_enabled_) || cc_ == nullptr) {\n    return;\n  }\n\n  // Connections can migrate at most once.\n  migration_enabled_ = false;\n  migration_request_ = dest;\n}\n\nvoid Connection::StartTrafficLogging(string_view path) {\n  OpenTrafficLogger(path);\n}\n\nvoid Connection::StopTrafficLogging() {\n  lock_guard lk(tl_traffic_logger.mutex);\n  tl_traffic_logger.ResetLocked();\n}\n\nbool Connection::IsHttp() const {\n  return is_http_;\n}\n\nsize_t Connection::GetMemoryUsage() const {\n  size_t mem = sizeof(*this) + cmn::HeapSize(name_) + cmn::HeapSize(memcache_parser_) +\n               cmn::HeapSize(redis_parser_) + cmn::HeapSize(cc_) + cmn::HeapSize(reply_builder_);\n\n  // parsed_cmd_ can be null when dispatching a command, or for http connections.\n  if (parsed_cmd_) {\n    mem += UsedMemoryInternal(*parsed_cmd_);\n  }\n\n  // We add a hardcoded 9k value to accommodate for the part of the Fiber stack that is in use.\n  // The allocated stack is actually larger (~130k), but only a small fraction of that (9k\n  // according to our checks) is actually part of the RSS.\n  mem += 9'000;\n\n  return mem;\n}\n\nvoid Connection::IncreaseConnStats() {\n  DCHECK(tl_facade_stats);\n  auto& conn_stats = tl_facade_stats->conn_stats;\n  if (IsMainOrMemcache())\n    ++conn_stats.num_conns_main;\n  else\n    ++conn_stats.num_conns_other;\n  conn_stats.read_buf_capacity += io_buf_.Capacity();\n\n  conn_stats.dispatch_queue_entries += dispatch_q_.size();\n  conn_stats.dispatch_queue_bytes += dispatch_q_bytes_;\n  conn_stats.pipeline_queue_entries += parsed_cmd_q_len_;\n  conn_stats.pipeline_queue_bytes += parsed_cmd_q_bytes_;\n  if (dispatch_q_subscriber_bytes_ > 0) {\n    auto& qbp = GetQueueBackpressure();\n    conn_stats.dispatch_queue_subscriber_bytes += dispatch_q_subscriber_bytes_;\n    qbp.subscriber_bytes.fetch_add(dispatch_q_subscriber_bytes_, std::memory_order_relaxed);\n  }\n}\n\nvoid Connection::DecreaseConnStats() {\n  DCHECK(tl_facade_stats);\n  auto& conn_stats = tl_facade_stats->conn_stats;\n  if (IsMainOrMemcache()) {\n    DCHECK_GT(conn_stats.num_conns_main, 0u);\n    --conn_stats.num_conns_main;\n  } else {\n    DCHECK_GT(conn_stats.num_conns_other, 0u);\n    --conn_stats.num_conns_other;\n  }\n  DCHECK_GE(conn_stats.read_buf_capacity, io_buf_.Capacity());\n  conn_stats.read_buf_capacity -= io_buf_.Capacity();\n\n  DCHECK_GE(conn_stats.dispatch_queue_entries, dispatch_q_.size());\n  conn_stats.dispatch_queue_entries -= dispatch_q_.size();\n  DCHECK_GE(conn_stats.dispatch_queue_bytes, dispatch_q_bytes_);\n  conn_stats.dispatch_queue_bytes -= dispatch_q_bytes_;\n  if (dispatch_q_subscriber_bytes_ > 0) {\n    auto& qbp = GetQueueBackpressure();\n    DCHECK_GE(conn_stats.dispatch_queue_subscriber_bytes, dispatch_q_subscriber_bytes_);\n    conn_stats.dispatch_queue_subscriber_bytes -= dispatch_q_subscriber_bytes_;\n    DCHECK_GE(qbp.subscriber_bytes.load(std::memory_order_relaxed), dispatch_q_subscriber_bytes_);\n    qbp.subscriber_bytes.fetch_sub(dispatch_q_subscriber_bytes_, std::memory_order_relaxed);\n  }\n  DCHECK_GE(conn_stats.pipeline_queue_entries, parsed_cmd_q_len_);\n  conn_stats.pipeline_queue_entries -= parsed_cmd_q_len_;\n  DCHECK_GE(conn_stats.pipeline_queue_bytes, parsed_cmd_q_bytes_);\n  conn_stats.pipeline_queue_bytes -= parsed_cmd_q_bytes_;\n}\n\nvoid Connection::BreakOnce(uint32_t ev_mask) {\n  if (breaker_cb_) {\n    DVLOG(1) << \"[\" << id_ << \"] Connection::breaker_cb_ \" << ev_mask;\n    auto fun = std::move(breaker_cb_);\n    DCHECK(!breaker_cb_);\n    fun(ev_mask);\n  }\n}\n\nbool Connection::IsReplySizeOverLimit() const {\n  std::atomic<size_t>& reply_sz = tl_facade_stats->reply_stats.squashing_current_reply_size;\n  size_t current = reply_sz.load(std::memory_order_acquire);\n  const bool over_limit = reply_size_limit != 0 && current > 0 && current > reply_size_limit;\n  if (over_limit) {\n    LOG_EVERY_T(INFO, 10) << \"Commands squashing current reply size is overlimit: \" << current\n                          << \"/\" << reply_size_limit\n                          << \". Falling back to single command dispatch (instead of squashing)\";\n    // Used by testing. Should not be used in production, therefore debug log level 5.\n    DVLOG(5) << \"Commands squashing current reply size is overlimit: \" << current << \"/\"\n             << reply_size_limit\n             << \". Falling back to single command dispatch (instead of squashing)\";\n  }\n  return over_limit;\n}\n\nbool Connection::ParseRedisBatch() {\n  return ParseRedis(max_busy_read_cycles_cached, true) == ParserStatus::OK;\n}\n\nbool Connection::ParseMCBatch() {\n  CHECK(io_buf_.InputLen() > 0);\n\n  do {\n    if (parsed_cmd_ == nullptr) {\n      // Happens with pipelined commands after the first one.\n      PipelineMessagePtr ptr = GetFromPoolOrCreate();\n      parsed_cmd_ = ptr.release();\n    }\n    uint32_t consumed = 0;\n    memcache_parser_->set_last_unix_time(time(nullptr));\n    MemcacheParser::Result result = memcache_parser_->Parse(io::View(io_buf_.InputBuffer()),\n                                                            &consumed, parsed_cmd_->mc_command());\n    io_buf_.ConsumeInput(consumed);\n\n    DVLOG(2) << \"mc_result \" << unsigned(result) << \" consumed: \" << consumed << \" type \"\n             << unsigned(parsed_cmd_->mc_command()->type);\n    if (result == MemcacheParser::INPUT_PENDING)\n      return false;\n\n    // We push the command to the parsed queue even in case of parse errors,\n    // so that we can reply in order.\n    EnqueueParsedCommand(parsed_cmd_);\n    parsed_cmd_ = nullptr;  // ownership transferred.\n\n    if (result != MemcacheParser::OK) {\n      // We can not just reply directly to parse error, as we may have pipelined commands before.\n      // Fill the reply_payload into parsed_tail_ with the error and continue parsing.\n      memcache_parser_->Reset();\n      // TODO(vlad): Use Proper SendError calls instead of SendSimpleString and error building\n      auto client_error = [](string_view msg) { return absl::StrCat(\"CLIENT_ERROR \", msg); };\n\n      parsed_tail_->SetDeferredReply();\n      switch (result) {\n        case MemcacheParser::UNKNOWN_CMD:\n          parsed_tail_->SendSimpleString(\"ERROR\");\n          break;\n        case MemcacheParser::PARSE_ERROR:\n          parsed_tail_->SendSimpleString(client_error(\"bad data chunk\"));\n          break;\n        case MemcacheParser::BAD_DELTA:\n          parsed_tail_->SendSimpleString(client_error(\"invalid numeric delta argument\"));\n          break;\n        default:\n          parsed_tail_->SendSimpleString(client_error(\"bad command line format\"));\n          break;\n      }\n    }\n  } while (parsed_cmd_q_len_ < 128 && io_buf_.InputLen() > 0);\n  return true;\n}\n\nbool Connection::ExecuteBatch() {\n  auto& conn_stats = tl_facade_stats->conn_stats;\n  auto advance_head = [this]() -> ParsedCommand* {\n    auto* cmd = parsed_head_;\n    parsed_head_ = cmd->next;\n    ReleaseParsedCommand(cmd, parsed_head_ != nullptr /* is_pipelined */);\n    return parsed_head_;\n  };\n\n  auto dispatch = protocol_ == Protocol::MEMCACHE ? &ServiceInterface::DispatchMC\n                                                  : &ServiceInterface::DispatchCommandSimple;\n\n  // Execute sequentially all parsed commands.\n  for (auto& cmd = parsed_to_execute_; cmd != nullptr;) {\n    if (reply_builder_->GetError())\n      return false;\n    bool is_head = cmd == parsed_head_;\n\n    // parser errors are stored as deferred replies\n    if (cmd->IsDeferredReply() && cmd->CanReply()) {\n      if (is_head) {\n        cmd->SendReply();\n        cmd = advance_head();\n      } else {\n        cmd = cmd->next;\n      }\n      continue;\n    }\n\n    // We must continue with async execution if we already have executing commands\n    auto mode = is_head ? AsyncPreference::PREFER_ASYNC : AsyncPreference::ONLY_ASYNC;\n\n    if (!ioloop_v2_)  // only v2 loop supports any async commands so far\n      mode = AsyncPreference::ONLY_SYNC;\n\n    auto dispatch_res = (service_->*dispatch)(cmd, mode);\n\n    // Enforce the pipeline invariant between the IO loop (producer) and AsyncFiber (consumer).\n    // To prevent stream corruption, the command state must satisfy ONE of these rules:\n    // 1. It is the head command (safely writes to the socket directly).\n    // 2. It did not stall the pipeline (dispatch_res != WOULD_BLOCK) and therefore\n    //    must have buffered its reply locally (is_deferred == true).\n    // 3. It stalled the pipeline because it requires synchronous execution\n    //    (dispatch_res == WOULD_BLOCK) and therefore must NOT have buffered\n    //    a reply (is_deferred == false).\n    bool is_deferred = cmd->IsDeferredReply();\n    DCHECK(is_head || (is_deferred == (dispatch_res != DispatchResult::WOULD_BLOCK)))\n        << \"Pipeline contract breach! Invalid state for non-head command. \"\n        << \"DispatchResult: \" << static_cast<int>(dispatch_res) << \", IsDeferred: \" << is_deferred\n        << \", Command Type: \" << cmd->mc_command()->type;\n\n    if (dispatch_res == DispatchResult::WOULD_BLOCK)\n      break;  // Sync command. Wait for current async commands to finish\n\n    conn_stats.pipeline_dispatch_commands++;\n    if (is_head)\n      conn_stats.pipeline_dispatch_calls++;\n\n    if (cmd->IsDeferredReply()) {\n      cmd = cmd->next;\n    } else {\n      DCHECK(is_head);       // only head can execute sync\n      cmd = advance_head();  // advance it\n    }\n  }\n\n  if (parsed_head_ == nullptr)\n    parsed_tail_ = nullptr;\n  return true;\n}\n\nbool Connection::ReplyBatch() {\n  reply_builder_->SetBatchMode(true);\n  for (auto& cmd = parsed_head_; cmd != parsed_to_execute_;) {\n    if (!cmd->CanReply())\n      break;\n\n    current_wait_.reset();  // we must free waiter before proceeding with other commands\n    cmd->SendReply();\n\n    auto* prev = exchange(cmd, cmd->next);\n    ReleaseParsedCommand(prev, cmd != parsed_to_execute_ /* is_pipelined */);\n    if (reply_builder_->GetError())\n      return false;\n  }\n\n  if (parsed_head_ == nullptr)\n    parsed_tail_ = nullptr;\n\n  reply_builder_->SetBatchMode(false);\n  reply_builder_->Flush();\n  return !reply_builder_->GetError();\n}\n\nParsedCommand* Connection::CreateParsedCommand() {\n  auto* res = service_->AllocateParsedCommand();\n  res->Init(reply_builder_.get(), cc_.get());\n  res->ConfigureMCExtension(protocol_ == Protocol::MEMCACHE);\n  return res;\n}\n\nvoid Connection::EnqueueParsedCommand(ParsedCommand* cmd) {\n  DCHECK(cmd);\n  cmd->next = nullptr;\n  auto& conn_stats = tl_facade_stats->conn_stats;\n\n  cmd->parsed_cycle = base::CycleClock::Now();\n\n  if (parsed_head_ == nullptr) {\n    parsed_head_ = cmd;\n    parsed_to_execute_ = cmd;\n  } else {\n    parsed_tail_->next = cmd;\n    if (parsed_to_execute_ == nullptr) {\n      // we've executed all the parsed commands so far.\n      parsed_to_execute_ = cmd;\n    }\n  }\n  parsed_tail_ = cmd;\n\n  size_t used_mem = cmd->UsedMemory();\n  parsed_cmd_q_len_++;\n  parsed_cmd_q_bytes_ += used_mem;\n  local_stats_.dispatch_entries_added++;\n  conn_stats.pipeline_queue_entries++;\n  conn_stats.pipeline_queue_bytes += used_mem;\n\n  // AsyncFiber for Memcache only wakes up on dispatch_q_, notify only redis as this is the parse\n  // commands queue.\n  if ((!cc_->sync_dispatch) && (protocol_ == Protocol::REDIS)) {\n    cnd_.notify_one();\n  }\n}\n\nvoid Connection::ReleaseParsedCommand(ParsedCommand* cmd, bool is_pipelined) {\n  size_t used_mem = cmd->UsedMemory();\n  auto& conn_stats = tl_facade_stats->conn_stats;\n\n  DCHECK_GT(parsed_cmd_q_len_, 0u);\n  DCHECK_GE(parsed_cmd_q_bytes_, used_mem);\n  DCHECK_GT(conn_stats.pipeline_queue_entries, 0u);\n  DCHECK_GE(conn_stats.pipeline_queue_bytes, used_mem);\n  parsed_cmd_q_len_--;\n  parsed_cmd_q_bytes_ -= used_mem;\n\n  conn_stats.pipeline_queue_entries--;\n  conn_stats.pipeline_queue_bytes -= used_mem;\n\n  if (is_pipelined) {\n    conn_stats.pipelined_cmd_cnt++;\n    uint64_t latency_usec = CycleClock::ToUsec(CycleClock::Now() - cmd->parsed_cycle);\n    conn_stats.pipelined_cmd_latency += latency_usec;\n    conn_stats.pipelined_latency_hist.Add(latency_usec);\n    // Decay the histogram every kPipelineLatencyDecayPeriod samples to\n    // approximate a moving-window distribution; older observations contribute\n    // half as much after each decay period.\n    constexpr uint64_t kPipelineLatencyDecayPeriod = 1 << 14;  // 16384\n    if ((conn_stats.pipelined_latency_hist.count() & (kPipelineLatencyDecayPeriod - 1)) == 0) {\n      conn_stats.pipelined_latency_hist.Decay();\n    }\n  }\n\n  if (parsed_cmd_ == nullptr) {\n    parsed_cmd_ = cmd;\n    parsed_cmd_->ResetForReuse();\n  } else {\n    // If we are over the limit, destroy the command instead of caching it.\n    size_t cmd_mem = UsedMemoryInternal(*cmd);\n    QueueBackpressure& qbp = GetQueueBackpressure();\n    if (conn_stats.pipeline_cmd_cache_bytes + cmd_mem <= qbp.pipeline_cache_limit) {\n      conn_stats.pipeline_cmd_cache_bytes += cmd_mem;\n      pipeline_req_pool_.emplace_back(cmd);\n    } else {\n      delete cmd;\n    }\n  }\n}\n\nvoid Connection::DestroyParsedQueue() {\n  while (parsed_head_ != nullptr) {\n    auto* cmd = parsed_head_;\n    parsed_head_ = cmd->next;\n\n    // Being able to drop an in-flight transaction would require it keeping no pointers\n    // at all to any context data - too costly for now! (maybe let it own the arguments?)\n    if (cmd->IsDeferredReply() && !cmd->CanReply())\n      cmd->Blocker()->Wait();  // explicitly wait for it to finish\n    ReleaseParsedCommand(cmd, false);\n  }\n\n  parsed_tail_ = nullptr;\n  CHECK_EQ(parsed_cmd_q_len_, 0u);\n  CHECK_EQ(parsed_cmd_q_bytes_, 0u);\n  delete parsed_cmd_;\n  parsed_cmd_ = nullptr;\n}\n\nvoid Connection::UpdateFromFlags() {\n  unsigned tid = fb2::ProactorBase::me()->GetPoolIndex();\n  thread_queue_backpressure[tid].pipeline_queue_max_len = GetFlag(FLAGS_pipeline_queue_limit);\n  thread_queue_backpressure[tid].pipeline_buffer_limit = GetFlag(FLAGS_pipeline_buffer_limit);\n  thread_queue_backpressure[tid].pipeline_cnd.notify_all();\n\n  max_busy_read_cycles_cached = base::CycleClock::FromUsec(GetFlag(FLAGS_max_busy_read_usec));\n  always_flush_pipeline_cached = GetFlag(FLAGS_always_flush_pipeline);\n  pipeline_squash_limit_cached = GetFlag(FLAGS_pipeline_squash_limit);\n  pipeline_wait_batch_usec = GetFlag(FLAGS_pipeline_wait_batch_usec);\n}\n\nstd::vector<std::string> Connection::GetMutableFlagNames() {\n  return base::GetFlagNames(FLAGS_pipeline_queue_limit, FLAGS_pipeline_buffer_limit,\n                            FLAGS_max_busy_read_usec, FLAGS_always_flush_pipeline,\n                            FLAGS_pipeline_squash_limit, FLAGS_pipeline_wait_batch_usec);\n}\n\nvoid Connection::GetRequestSizeHistogramThreadLocal(std::string* hist) {\n  if (io_req_size_hist)\n    *hist = io_req_size_hist->ToString();\n}\n\nvoid Connection::TrackRequestSize(bool enable) {\n  if (enable && !io_req_size_hist) {\n    io_req_size_hist = new base::Histogram;\n  } else if (!enable && io_req_size_hist) {\n    delete io_req_size_hist;\n    io_req_size_hist = nullptr;\n  }\n}\n\nvoid Connection::EnsureMemoryBudget(unsigned tid) {\n  thread_queue_backpressure[tid].EnsureBelowLimit();\n}\n\nConnectionRef::ConnectionRef(const std::shared_ptr<Connection>& ptr, unsigned thread_id,\n                             uint32_t client_id)\n    : ptr_{ptr}, last_known_thread_id_{thread_id}, client_id_{client_id} {\n}\n\nConnection* ConnectionRef::Get() const {\n  auto sptr = ptr_.lock();\n\n  //  The connection can only be deleted on this thread, so\n  //  this pointer is valid until the next suspension.\n  //  Note: keeping a shared_ptr doesn't prolong the lifetime because\n  //  it doesn't manage the underlying connection. See definition of `self_`.\n  return sptr.get();\n}\n\nbool Connection::WeakRef::IsExpired() const {\n  return ptr_.expired();\n}\n\nuint32_t Connection::WeakRef::GetClientId() const {\n  return client_id_;\n}\n\nbool ConnectionRef::operator<(const ConnectionRef& other) const {\n  return client_id_ < other.client_id_;\n}\n\nbool ConnectionRef::operator==(const ConnectionRef& other) const {\n  return client_id_ == other.client_id_;\n}\n\nvoid Connection::DoReadOnRecv(const util::FiberSocketBase::RecvNotification& n) {\n  if (std::holds_alternative<std::error_code>(n.read_result)) {\n    io_ec_ = std::get<std::error_code>(n.read_result);\n    return;\n  }\n\n  using RecvNoti = util::FiberSocketBase::RecvNotification::RecvCompletion;\n  if (std::holds_alternative<RecvNoti>(n.read_result)) {\n    if (!std::get<RecvNoti>(n.read_result)) {\n      io_ec_ = make_error_code(errc::connection_aborted);\n      return;\n    }\n\n    if (io_buf_.AppendLen() == 0) {\n      // We will regrow in IoLoopV2\n      return;\n    }\n\n    io::MutableBytes buf = io_buf_.AppendBuffer();\n    io::Result<size_t> res = socket_->TryRecv(buf);\n\n    if (res) {\n      if (*res > 0) {\n        // A recv call can return fewer bytes than requested even if the\n        // socket buffer actually contains enough data to satisfy the full request.\n        // TODO maybe worth looping here and try another recv call until it fails\n        // with EAGAIN or EWOULDBLOCK. The problem there is that we need to handle\n        // resizing if AppendBuffer is zero.\n        io_buf_.CommitWrite(*res);\n        return;\n      }\n      // *res == 0\n      io_ec_ = make_error_code(errc::connection_aborted);\n      return;\n    }\n\n    // error path (!res)\n    auto ec = res.error();\n    // EAGAIN and EWOULDBLOCK\n    if (ec == errc::resource_unavailable_try_again || ec == errc::operation_would_block) {\n      return;\n    }\n\n    io_ec_ = ec;\n  } else if (std::holds_alternative<io::MutableBytes>(n.read_result)) {  // provided buffer.\n    io::MutableBytes buf = std::get<io::MutableBytes>(n.read_result);\n    UpdateIoBufCapacity(io_buf_, &tl_facade_stats->conn_stats,\n                        [&]() { io_buf_.WriteAndCommit(buf.data(), buf.size()); });\n  } else {\n    LOG(FATAL) << \"Should not reach here\";\n  }\n}\n\nvoid Connection::CheckIoBufCapacity(bool is_iobuf_full) {\n  auto& conn_stats = tl_facade_stats->conn_stats;\n  size_t max_io_buf_len = GetFlag(FLAGS_max_client_iobuf_len);\n\n  size_t capacity = io_buf_.Capacity();\n  if (capacity < max_io_buf_len) {\n    size_t parser_hint = 0;\n    if (redis_parser_)\n      parser_hint = redis_parser_->parselen_hint();  // Could be done for MC as well.\n\n    // If we got a partial request and we managed to parse its\n    // length, make sure we have space to store it instead of\n    // increasing space incrementally.\n    // (Note: The buffer object is only working in power-of-2 sizes,\n    // so there's no danger of accidental O(n^2) behavior.)\n    if (parser_hint > capacity) {\n      UpdateIoBufCapacity(io_buf_, &conn_stats,\n                          [&]() { io_buf_.Reserve(std::min(max_io_buf_len, parser_hint)); });\n    }\n\n    // If we got a partial request because iobuf was full, grow it up to\n    // a reasonable limit to save on Recv() calls.\n    if (is_iobuf_full && capacity < max_io_buf_len / 2) {\n      // Last io used most of the io_buf to the end.\n      UpdateIoBufCapacity(io_buf_, &conn_stats, [&]() {\n        io_buf_.Reserve(capacity * 2);  // Valid growth range.\n      });\n    }\n\n    if (io_buf_.AppendLen() == 0U) {\n      // it can happen with memcached but not for RedisParser, because RedisParser fully\n      // consumes the passed buffer\n      LOG_EVERY_T(WARNING, 10) << \"Maximum io_buf length reached \" << io_buf_.Capacity()\n                               << \", consider to increase max_client_iobuf_len flag\";\n    }\n  }\n}\n\nvariant<error_code, Connection::ParserStatus> Connection::IoLoopV2() {\n  DCHECK(memcache_parser_) << \"Not supported for redis yet\";\n\n  auto* peer = socket_.get();\n  recv_buf_.res_len = 0;\n\n  // Don't proceed with RegisterOnRecv() if socket is closed (possible cancellation)\n  if (!peer->IsOpen())\n    return ParserStatus::OK;\n\n  if (fb2::ProactorBase::me()->GetKind() == fb2::ProactorBase::Kind::IOURING) {\n#ifdef __linux__\n    fb2::UringProactor* up = static_cast<fb2::UringProactor*>(fb2::ProactorBase::me());\n    if (up->BufRingEntrySize(kRecvSockGid) > 0 && !is_tls_) {\n      static_cast<fb2::UringSocket*>(peer)->EnableRecvMultishot();\n    }\n#endif\n  }\n\n  peer->RegisterOnRecv([this](const FiberSocketBase::RecvNotification& n) {\n    DVLOG(2) << \"Calling DoReadOnRecv iobuf_len: \" << io_buf_.InputLen();\n    DoReadOnRecv(n);\n    io_event_.notify();\n  });\n\n  ParserStatus parse_status = OK;\n\n  // Waiter that is passed to the current async command head to be notified on completion\n  auto ioevent_cb = [this]() { io_event_.notify(); };\n  util::fb2::detail::Waiter ioevent_waiter{ioevent_cb};  // takes callback by reference\n  absl::Cleanup waiter_cleanup = [this] { current_wait_.reset(); };\n\n  do {\n    HandleMigrateRequest();\n\n    // Register completion for current head if its pending and we don't wait\n    if (auto* cmd = parsed_head_; cmd && cmd != parsed_to_execute_ && !current_wait_.has_value()) {\n      current_wait_.emplace(cmd, &ioevent_waiter);\n    }\n\n    if (io_buf_.InputLen() == 0) {\n      // Poll again for readiness. The event handler registered above is edge triggered\n      // We should read from the socket until EAGAIN or EWOULDBLOCK\n      // to make sure we consume all available data.\n      // See \"Do I need to continuously read/write\" question\n      // under https://man7.org/linux/man-pages/man7/epoll.7.html\n      // The exception is when we use io_uring with multishot recv enabled, in which case\n      // we rely on the kernel to keep feeding us data until we multishot is disabled.\n      DoReadOnRecv(FiberSocketBase::RecvNotification{true});\n      io_event_.await([this]() {\n        // TODO: optimize CanReply with looking up waiter key\n        bool cmd_executable = parsed_head_ && parsed_head_ == parsed_to_execute_;\n        bool cmd_ready = !cmd_executable && parsed_head_ && parsed_head_->CanReply();\n        return io_buf_.InputLen() > 0 || cmd_ready || cmd_executable || io_ec_;\n      });\n    }\n\n    if (io_ec_) {\n      LOG_IF(WARNING, cntx()->replica_conn) << \"async io error: \" << io_ec_;\n      return std::exchange(io_ec_, {});\n    }\n\n    phase_ = PROCESS;\n    bool is_iobuf_full = io_buf_.AppendLen() == 0;\n\n    if (io_buf_.InputLen() > 0) {\n      parse_status = ParseLoop();\n    } else {\n      parse_status = NEED_MORE;\n\n      if (parsed_head_) {\n        if (parsed_head_ == parsed_to_execute_)\n          ExecuteBatch();\n        ReplyBatch();\n      }\n    }\n\n    if (reply_builder_->GetError()) {\n      return reply_builder_->GetError();\n    }\n\n    if (parse_status == NEED_MORE) {\n      parse_status = OK;\n      CheckIoBufCapacity(is_iobuf_full);\n    } else if (parse_status != OK) {\n      break;\n    }\n  } while (peer->IsOpen());\n\n  return parse_status;\n}\n\nConnection::WaitEvent::WaitEvent(ParsedCommand* cmd, util::fb2::detail::Waiter* w)\n    : key(cmd->Blocker()->OnCompletion(w)) {\n}\n\nvoid ResetStats() {\n  auto& cstats = tl_facade_stats->conn_stats;\n  cstats.pipelined_cmd_cnt = 0;\n  cstats.conn_received_cnt = 0;\n  cstats.command_cnt_main = 0;\n  cstats.command_cnt_other = 0;\n  cstats.io_read_cnt = 0;\n  cstats.io_read_bytes = 0;\n\n  tl_facade_stats->reply_stats = {};\n  if (io_req_size_hist)\n    io_req_size_hist->Clear();\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/dragonfly_connection.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/fixed_array.h>\n#include <sys/socket.h>\n\n#include <deque>\n#include <memory>\n#include <string_view>\n#include <utility>\n#include <variant>\n\n#include \"facade/connection_ref.h\"\n#include \"facade/facade_types.h\"\n#include \"facade/parsed_command.h\"\n#include \"io/io_buf.h\"\n#include \"util/connection.h\"\n#include \"util/fibers/fibers.h\"\n#include \"util/fibers/synchronization.h\"\n\ntypedef struct ssl_ctx_st SSL_CTX;\n\n// need to declare for older linux distributions like CentOS 7\n#ifndef SO_INCOMING_CPU\n#define SO_INCOMING_CPU 49\n#endif\n\n#ifndef SO_INCOMING_NAPI_ID\n#define SO_INCOMING_NAPI_ID 56\n#endif\n\n#ifdef ABSL_HAVE_ADDRESS_SANITIZER\nconstexpr size_t kReqStorageSize = 88;\n#else\nconstexpr size_t kReqStorageSize = 120;\n#endif\n\nnamespace util {\nclass HttpListenerBase;\n}  // namespace util\n\nnamespace facade {\n\nstruct ConnectionStats;\nclass ConnectionContext;\nclass ServiceInterface;\nclass SinkReplyBuilder;\nclass RespSrvParser;\n\n// Connection represents an active connection for a client.\n//\n// It directly dispatches regular commands from the io-loop.\n// For pipelined requests, monitor and pubsub messages it uses\n// a separate dispatch queue that is processed on a separate fiber.\nclass Connection : public util::Connection {\n public:\n  static void Init(unsigned io_threads);\n  static void Shutdown();\n  static void ShutdownThreadLocal();\n\n  Connection(Protocol protocol, util::HttpListenerBase* http_listener, SSL_CTX* ctx,\n             ServiceInterface* service);\n  ~Connection();\n\n  // A callback called by Listener::OnConnectionStart in the same thread where\n  // HandleRequests will run.\n  void OnConnectionStart();\n\n  using BreakerCb = std::function<void(uint32_t)>;\n  using ShutdownCb = std::function<void()>;\n\n  // PubSub message, either incoming message for active subscription or reply for new subscription.\n  struct PubMessage {\n    std::string pattern;                // non-empty for pattern subscriber\n    std::shared_ptr<char[]> buf;        // stores channel name and message\n    std::string_view channel, message;  // channel and message parts from buf\n    bool is_sharded = false;\n\n    // Unsubscribe simultaneously when sending unsubscribe message. Used for cluster migrations\n    bool force_unsubscribe = false;\n  };\n\n  // Monitor message, carries a simple payload with the registered event to be sent.\n  struct MonitorMessage : public std::string {};\n\n  // Migration request message, the async fiber stops to give way for thread migration.\n  struct MigrationRequestMessage {};\n\n  // Checkpoint message, used to track when the connection finishes executing the current command.\n  struct CheckpointMessage {\n    util::fb2::BlockingCounter bc;  // Decremented counter when processed\n  };\n\n  struct InvalidationMessage {\n    std::string key;\n    bool invalidate_due_to_flush = false;\n  };\n\n  // Pipeline message, accumulated Redis command to be executed.\n  using PipelineMessagePtr = std::unique_ptr<ParsedCommand>;\n  using PubMessagePtr = std::unique_ptr<PubMessage>;\n\n  // Variant wrapper around different message types\n  struct MessageHandle {\n    size_t UsedMemory() const;  // How much bytes this handle takes up in total.\n\n    // Checkpoint messages put themselves at the front of the queue, but only in relative\n    // order to the rest of the messages in the queue.\n    bool IsCheckPoint() const {\n      return std::holds_alternative<CheckpointMessage>(handle);\n    }\n\n    bool IsPubMsg() const {\n      return std::holds_alternative<PubMessagePtr>(handle);\n    }\n\n    bool IsMonitor() const {\n      return std::holds_alternative<MonitorMessage>(handle);\n    }\n\n    bool IsReplying() const;  // control messages don't reply, messages carrying data do\n\n    std::variant<MonitorMessage, PubMessagePtr, MigrationRequestMessage, CheckpointMessage,\n                 InvalidationMessage>\n        handle;\n\n    // time when the message was dispatched to the dispatch queue as reported by\n    // CycleClock::Now()\n    uint64_t dispatch_cycle = 0;\n  };\n\n  static_assert(sizeof(MessageHandle) <= 80,\n                \"Big structs should use indirection to avoid wasting deque space!\");\n\n  enum Phase : uint8_t { SETUP, READ_SOCKET, PROCESS, SHUTTING_DOWN, PRECLOSE, NUM_PHASES };\n\n  using WeakRef = ConnectionRef;\n\n  // Add PubMessage to dispatch queue.\n  // Virtual because behavior is overridden in test_utils.\n  virtual void SendPubMessageAsync(PubMessage);\n\n  // Add monitor message to dispatch queue.\n  void SendMonitorMessageAsync(std::string);\n\n  // If any dispatch is currently in progress, increment counter and send checkpoint message to\n  // decrement it once finished.\n  void SendCheckpoint(util::fb2::BlockingCounter bc, bool ignore_paused = false,\n                      bool ignore_blocked = false);\n\n  // Add InvalidationMessage to dispatch queue.\n  virtual void SendInvalidationMessageAsync(InvalidationMessage);\n\n  // Register hook that is executen when the connection breaks.\n  void RegisterBreakHook(BreakerCb breaker_cb);\n\n  void FlushReplies();\n\n  // Manually shutdown self.\n  void ShutdownSelfBlocking();\n\n  // Migrate this connecton to a different thread.\n  // Return true if Migrate succeeded\n  // Return false if dispatch_fb_ is active\n  bool Migrate(util::fb2::ProactorBase* dest);\n\n  // Borrow weak reference to connection. Can be called from any thread.\n  WeakRef Borrow();\n\n  bool IsCurrentlyDispatching() const;\n\n  std::string GetClientInfo(unsigned thread_id) const;\n  std::string GetClientInfo() const;\n\n  virtual std::string RemoteEndpointStr() const;  // virtual because overwritten in test_utils\n  std::string RemoteEndpointAddress() const;\n\n  std::string LocalBindStr() const;\n  std::string LocalBindAddress() const;\n\n  uint32_t GetClientId() const;\n\n  virtual bool IsPrivileged() const;  // virtual because overwritten in test_utils\n\n  bool IsMain() const;\n\n  // In addition to the listener role being main, also returns true if the protocol is Memcached.\n  // This method returns true for customer facing listeners.\n  bool IsMainOrMemcache() const;\n\n  void SetName(std::string name);\n\n  void SetLibName(std::string name);\n  void SetLibVersion(std::string version);\n\n  // Returns a map of 'libname:libver'->count, thread local data\n  static const absl::flat_hash_map<std::string, uint64_t>& GetLibStatsTL();\n\n  std::string_view GetName() const {\n    return name_;\n  }\n\n  // Returns protocol type of this connection\n  Protocol GetProtocol() const {\n    return protocol_;\n  }\n\n  // Returns memory usage of this connection's auxiliary members in bytes.\n  size_t GetMemoryUsage() const;\n\n  ConnectionContext* cntx();\n\n  // Requests that at some point, this connection will be migrated to `dest` thread.\n  // If force is false, the connection will migrate at most once,\n  // and only when the flag --migrate_connections is true.\n  void RequestAsyncMigration(util::fb2::ProactorBase* dest, bool force);\n\n  // Starts traffic logging in the calling thread. Must be a proactor thread.\n  // Each thread creates its own log file combining requests from all the connections in\n  // that thread. A noop if the thread is already logging.\n  static void StartTrafficLogging(std::string_view base_path);\n\n  // Stops traffic logging in this thread. A noop if the thread is not logging.\n  static void StopTrafficLogging();\n\n  // Get quick debug info for logs\n  std::string DebugInfo() const;\n\n  bool IsHttp() const;\n\n  static void UpdateFromFlags();                          // Set values from flags\n  static std::vector<std::string> GetMutableFlagNames();  // Triggers UpdateFromFlags\n\n  static void TrackRequestSize(bool enable);\n  static void EnsureMemoryBudget(unsigned tid);\n  static void GetRequestSizeHistogramThreadLocal(std::string* hist);\n\n  unsigned idle_time() const {\n    return time(nullptr) - last_interaction_;\n  }\n\n  unsigned GetSendWaitTimeSec() const;\n\n  Phase phase() const {\n    return phase_;\n  }\n\n  bool IsSending() const;\n\n  void Notify() {\n    io_event_.notify();\n  }\n\n  void MarkForClose();\n\n protected:\n  void OnShutdown() override;\n  void OnPreMigrateThread() override;\n  void OnPostMigrateThread() override;\n\n  std::unique_ptr<ConnectionContext> cc_;  // Null for http connections\n\n private:\n  enum ParserStatus : uint8_t { OK, NEED_MORE, ERROR };\n\n  struct AsyncOperations;\n\n  // Check protocol and handle connection.\n  void HandleRequests() final;\n\n  // Start dispatch fiber and run IoLoop.\n  void ConnectionFlow();\n\n  // Main loop reading client messages and passing requests to dispatch queue.\n  std::variant<std::error_code, ParserStatus> IoLoop();\n\n  void DoReadOnRecv(const util::FiberSocketBase::RecvNotification& n);\n\n  void CheckIoBufCapacity(bool is_iobuf_full);\n\n  // Main loop reading client messages and passing requests to dispatch queue.\n  std::variant<std::error_code, ParserStatus> IoLoopV2();\n\n  // Returns true if HTTP header is detected.\n  io::Result<bool> CheckForHttpProto();\n\n  // Dispatches a single (Redis or MC) command.\n  // `has_more` should indicate whether the io buffer has more commands\n  // (pipelining in progress). Performs async dispatch if forced (already in async mode) or if\n  // has_more is true, otherwise uses synchronous dispatch.\n  void DispatchSingle(bool has_more, absl::FunctionRef<void()> invoke_cb,\n                      absl::FunctionRef<void()> enqueue_cmd_cb);\n\n  // Handles events from the dispatch queue.\n  void AsyncFiber();\n\n  // Processes a single Admin/Control message from dispatch_q_.\n  // Returns true if the fiber should terminate (e.g. Migration).\n  bool ProcessAdminMessage(MessageHandle* msg, AsyncOperations* async_op);\n\n  // Processes the next Pipeline command from parsed_head_.\n  void ProcessPipelineCommand();\n\n  void SendAsync(MessageHandle msg);\n\n  // Updates Control Path statistics and backpressure counters for administrative\n  // events, monitor messages, and PubSub notifications.\n  // If add is true, stats are incremented, otherwise decremented.\n  void UpdateDispatchStats(const MessageHandle& msg, bool add);\n\n  ParserStatus ParseRedis(unsigned max_busy_cycles, bool enqueue_only = false);\n\n  void OnBreakCb(int32_t mask);\n\n  // Shrink pipeline pool by a little while handling regular commands.\n  void ShrinkPipelinePool();\n\n  // Returns non-null request ptr if pool has vacant entries.\n  PipelineMessagePtr GetFromPoolOrCreate();\n\n  void HandleMigrateRequest();\n  io::Result<size_t> HandleRecvSocket();\n\n  bool ShouldEndAsyncFiber(const MessageHandle& msg);\n\n  void LaunchAsyncFiberIfNeeded();  // Async fiber is started lazily\n\n  // Squashes pipelined commands from the dispatch queue to spread load over all threads\n  void SquashPipeline();\n\n  // Clear pipelined messages, disaptching only intrusive ones.\n  void ClearPipelinedMessages();\n\n  std::pair<std::string, std::string> GetClientInfoBeforeAfterTid() const;\n\n  void IncreaseConnStats();\n  void DecreaseConnStats();\n  void BreakOnce(uint32_t ev_mask);\n\n  // The read buffer with read data that needs to be parsed and processed.\n  // For io_uring bundles we may have available_bytes larger than slice.size()\n  // which means that there are more buffers available to read.\n  struct ReadBuffer {\n    size_t available_bytes;\n    io::Bytes slice;\n\n    void Consume(size_t len) {\n      available_bytes -= len;\n      slice.remove_prefix(len);\n    }\n  };\n\n  bool IsReplySizeOverLimit() const;\n\n  // Returns true if one or more commands were parsed from the read buffer,\n  // and false if no complete commands could be parsed (for example, when\n  // parsing is pending more input).\n  bool ParseMCBatch();\n\n  bool ParseRedisBatch();\n\n  // Call appropriate ParseBatch function, proceed with Execute and Reply all why input is remaining\n  ParserStatus ParseLoop();\n\n  // Loop over enqueued async commands and enqueue them for async execution.\n  // If async execution is not possible, handle them in synchronous mode one by one.\n  // Returns true on successful execution, false on reply builder error.\n  bool ExecuteBatch();\n\n  // Loop over finished async commands and let them reply.\n  // Returns true on successful execution, false on reply builder error.\n  bool ReplyBatch();\n\n  // Guard of the current subscription to a parsed commands async task blocker\n  struct WaitEvent {\n    explicit WaitEvent(ParsedCommand* cmd, util::fb2::detail::Waiter* w);\n\n    std::optional<util::fb2::EventCount::SubKey> key;\n  };\n\n  ParsedCommand* CreateParsedCommand();\n  void EnqueueParsedCommand(ParsedCommand* cmd);\n\n  // Releases the command memory back to the pool.\n  // - Set is_pipelined=true if the command was successfully executed and should be counted\n  // in latency/throughput stats.\n  // - Set is_pipelined=false if the command is being dropped/cleaned up without execution or should\n  // not be counted in stats.\n  void ReleaseParsedCommand(ParsedCommand* cmd, bool is_pipelined);\n\n  void DestroyParsedQueue();\n\n  // Dispatch Queue - Queue for the Control Path.\n  // Handles asynchronous administrative tasks, events, and high-priority control\n  // messages (e.g., PubSub, Monitor, Migration requests, Checkpoints) processed\n  // by the AsyncFiber.\n  std::deque<MessageHandle> dispatch_q_;    // dispatch queue\n  util::fb2::CondVarAny cnd_;               // dispatch queue waker\n  util::fb2::Fiber async_fb_;               // async fiber (if started)\n  size_t dispatch_q_bytes_ = 0;             // total bytes in dispatch queue\n  size_t dispatch_q_subscriber_bytes_ = 0;  // total bytes from subscribers in dispatch queue\n\n  std::error_code io_ec_;\n  util::fb2::EventCount io_event_;\n  std::optional<WaitEvent> current_wait_;\n\n  // how many bytes of the current request have been consumed\n  size_t request_consumed_bytes_ = 0;\n\n  util::FiberSocketBase::ProvidedBuffer recv_buf_;\n  io::IoBuf io_buf_;  // used in io loop and parsers\n  std::unique_ptr<RespSrvParser> redis_parser_;\n  std::unique_ptr<MemcacheParser> memcache_parser_;\n  ParsedCommand* parsed_cmd_ = nullptr;\n\n  // Parsed Commands Queue - Queue for the Data Path.\n  //\n  // Commands move through the following stages in a single linked list:\n  //   1) parsed but not yet dispatched        : [parsed_to_execute_, ..., parsed_tail_]\n  //   2) dispatched but not yet completed     : between parsed_head_ and parsed_to_execute_\n  //   3) completed (replies ready to send)    : a prefix of [parsed_head_, ..., parsed_to_execute_)\n  //   4) replied and removed                  : before parsed_head_ (no longer in the list)\n  //\n  // Logical order diagram:\n  //   head -> ... -> (dispatched, waiting for completion) -> ... -> parsed_to_execute_ -> ... ->\n  //   tail\n  //\n  // parsed_to_execute_ is advanced as commands are dispatched for execution.\n  // Executed (completed) commands are kept in the queue until their replies are sent,\n  // in order to preserve reply ordering.\n  // ReplyMCBatch walks from parsed_head_ up to (but not including) parsed_to_execute_,\n  // replies commands that have completed, and removes only those replied commands from\n  // the queue, advancing parsed_head_ accordingly.\n  ParsedCommand* parsed_head_ = nullptr;\n  ParsedCommand* parsed_tail_ = nullptr;\n  ParsedCommand* parsed_to_execute_ = nullptr;\n  // Total number of commands in parsed command queue\n  size_t parsed_cmd_q_len_ = 0;\n  // Total bytes used by commands in parsed command queue\n  size_t parsed_cmd_q_bytes_ = 0;\n  // Returns true if there are any commands pending in the parsed command queue or dispatch queue.\n  bool HasPendingMessages() const {\n    return parsed_head_ || !dispatch_q_.empty();\n  }\n\n  // Returns total count of commands pending in the parsed command queue and dispatch queue.\n  size_t GetPendingMessageCount() const {\n    return parsed_cmd_q_len_ + dispatch_q_.size();\n  }\n\n  uint32_t id_;\n  Protocol protocol_;\n  Phase phase_ = SETUP;\n\n  struct {\n    size_t read_cnt = 0;                // total number of read calls\n    size_t net_bytes_in = 0;            // total number of bytes read\n    size_t dispatch_entries_added = 0;  // total number of dispatch queue entries\n    size_t cmds = 0;                    // total number of commands executed\n  } local_stats_;\n\n  std::unique_ptr<SinkReplyBuilder> reply_builder_;\n  util::HttpListenerBase* http_listener_;\n  SSL_CTX* ssl_ctx_;\n\n  ServiceInterface* service_;\n\n  time_t creation_time_, last_interaction_;\n  std::string name_;\n\n  std::string lib_name_;\n  std::string lib_ver_;\n\n  unsigned parser_error_ = 0;\n\n  BreakerCb breaker_cb_;\n\n  // Used to keep track of borrowed references. Does not really own itself\n  std::shared_ptr<Connection> self_;\n\n  util::fb2::ProactorBase* migration_request_ = nullptr;\n\n  // Pooled pipeline messages per-thread\n  // Aggregated while handling pipelines, gradually released while handling regular commands.\n  static thread_local std::vector<PipelineMessagePtr> pipeline_req_pool_;\n\n  union {\n    uint16_t flags_;\n    struct {\n      // a flag indicating whether the client has turned on client tracking.\n      bool tracking_enabled_ : 1;\n      bool skip_next_squashing_ : 1;  // Forcefully skip next squashing\n\n      // Connection migration vars, see RequestAsyncMigration() above.\n      bool migration_enabled_ : 1;\n      bool migration_in_process_ : 1;\n      bool is_http_ : 1;\n\n      // whether the connection is TLS. We can be sure our socket is TlsSocket\n      // if the flag is set.\n      bool is_tls_ : 1;\n      bool is_main_ : 1;\n      bool ioloop_v2_ : 1;  // whether this connection is running on ioloop v2\n\n      // If post migration is allowed to call RegisterRecv\n      bool migration_allowed_to_register_ : 1;\n    };\n  };\n\n  bool request_shutdown_ = false;\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/dragonfly_listener.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/dragonfly_listener.h\"\n\n#include <mimalloc.h>\n#include <netinet/tcp.h>\n#include <openssl/err.h>\n\n#include <memory>\n\n#include \"absl/functional/bind_front.h\"\n#include \"facade/tls_helpers.h\"\n\n#ifdef DFLY_USE_SSL\n#include <openssl/ssl.h>\n#endif\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/service_interface.h\"\n#include \"util/proactor_pool.h\"\n\nusing namespace std;\n\nABSL_FLAG(uint32_t, conn_io_threads, 0, \"Number of threads used for handing server connections\");\nABSL_FLAG(uint32_t, conn_io_thread_start, 0, \"Starting thread id for handling server connections\");\nABSL_FLAG(bool, tls, false, \"\");\nABSL_FLAG(bool, no_tls_on_admin_port, false, \"Allow non-tls connections on admin port\");\nABSL_FLAG(bool, enable_tcp_defer_accept, true, \"Enable TCP_DEFER_ACCEPT option on server sockets\");\n\nABSL_FLAG(bool, conn_use_incoming_cpu, false,\n          \"If true uses incoming cpu of a socket in order to distribute\"\n          \" incoming connections\");\n\nABSL_DECLARE_FLAG(std::string, tls_cert_file);\nABSL_DECLARE_FLAG(std::string, tls_key_file);\nABSL_DECLARE_FLAG(std::string, tls_ca_cert_file);\nABSL_DECLARE_FLAG(std::string, tls_ca_cert_dir);\n\nABSL_FLAG(uint32_t, tcp_keepalive, 300,\n          \"the period in seconds of inactivity after which keep-alives are triggerred,\"\n          \"the duration until an inactive connection is terminated is twice the specified time\");\nABSL_FLAG(uint32_t, tcp_user_timeout, 0,\n          \"the maximum period in milliseconds that transimitted data may stay unacknowledged \"\n          \"before TCP aborts the connection. 0 means OS default timeout\");\n\nABSL_DECLARE_FLAG(bool, primary_port_http_enabled);\n\n#if 0\nenum TlsClientAuth {\n  CL_AUTH_NO = 0,\n  CL_AUTH_YES = 1,\n  CL_AUTH_OPTIONAL = 2,\n};\n\nfacade::ConfigEnum tls_auth_clients_enum[] = {\n    {\"no\", CL_AUTH_NO},\n    {\"yes\", CL_AUTH_YES},\n    {\"optional\", CL_AUTH_OPTIONAL},\n};\n\nstatic int tls_auth_clients_opt = CL_AUTH_YES;\n\nCONFIG_enum(tls_auth_clients, \"yes\", \"\", tls_auth_clients_enum, tls_auth_clients_opt);\n#endif\n\nnamespace facade {\n\n// See dragonfly_listener.h\nstd::atomic<bool> g_shutdown_fast{false};\n\nusing namespace util;\nusing util::detail::SafeErrorMessage;\n\nusing absl::GetFlag;\n\nnamespace {\n\nbool ConfigureKeepAlive(int fd) {\n  int val = 1;\n  if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)) < 0)\n    return false;\n\n  val = absl::GetFlag(FLAGS_tcp_keepalive);\n#ifdef __APPLE__\n  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &val, sizeof(val)) < 0)\n    return false;\n#else\n  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0)\n    return false;\n#endif\n\n  /* Send next probes after the specified interval. Note that we set the\n   * delay as interval / 3, as we send three probes before detecting\n   * an error (see the next setsockopt call). */\n  val = std::max(val / 3, 1);\n  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val)) < 0)\n    return false;\n\n  /* Consider the socket in error state after three we send three ACK\n   * probes without getting a reply. */\n  val = 3;\n  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val)) < 0)\n    return false;\n\n  return true;\n}\n\nstruct ListenerStats {\n  size_t tls_allocated_bytes = 0;\n  uint64_t refused_conn_maxclients_reached_cnt = 0;\n};\n\nthread_local ListenerStats listener_tl_stats;\natomic_int ssl_init_refcount = 0;\n\nvoid* OverriddenSSLMalloc(size_t size, const char* file, int line) {\n  void* res = mi_malloc(size);\n  listener_tl_stats.tls_allocated_bytes += mi_malloc_usable_size(res);\n  return res;\n}\n\nvoid* OverriddenSSLRealloc(void* addr, size_t size, const char* file, int line) {\n  size_t prev_size = mi_malloc_usable_size(addr);\n  void* res = mi_realloc(addr, size);\n  listener_tl_stats.tls_allocated_bytes += mi_malloc_usable_size(res);\n  listener_tl_stats.tls_allocated_bytes -= prev_size;\n  return res;\n}\n\nvoid OverriddenSSLFree(void* addr, const char* file, int line) {\n  listener_tl_stats.tls_allocated_bytes -= mi_malloc_usable_size(addr);\n  mi_free(addr);\n}\n\n}  // namespace\n\nListener::Listener(Protocol protocol, ServiceInterface* si, Role role)\n    : service_(si), role_(role), protocol_(protocol) {\n#ifdef DFLY_USE_SSL\n  if (ssl_init_refcount.fetch_add(1) == 0) {\n    CRYPTO_set_mem_functions(&OverriddenSSLMalloc, &OverriddenSSLRealloc, &OverriddenSSLFree);\n  }\n\n  // Always initialise OpenSSL so we can enable TLS at runtime.\n  OPENSSL_init_ssl(OPENSSL_INIT_SSL_DEFAULT, nullptr);\n  // Print this only for main interface\n  if (IsMainInterface()) {\n    std::string_view ssl_version = SSLeay_version(SSLEAY_VERSION);\n    LOG(INFO) << \"SSL version: \" << ssl_version;\n  }\n  if (!ReconfigureTLS()) {\n    exit(-1);\n  }\n#endif\n\n  // We only set the HTTP interface for:\n  // 1. Privileged users (on privileged listener)\n  // 2. Main listener (if enabled)\n  const bool is_main_enabled = GetFlag(FLAGS_primary_port_http_enabled);\n  if (IsPrivilegedInterface() || (IsMainInterface() && is_main_enabled)) {\n    http_base_ = std::make_unique<HttpListener<>>();\n    http_base_->set_resource_prefix(\"http://static.dragonflydb.io/data-plane\");\n    si->ConfigureHttpHandlers(http_base_.get(), IsPrivilegedInterface());\n  }\n}\n\nListener::~Listener() {\n#ifdef DFLY_USE_SSL\n  SSL_CTX_free(ctx_);\n\n  if (ssl_init_refcount.fetch_sub(1) == 1) {\n    OPENSSL_cleanup();\n  }\n#endif\n}\n\nutil::Connection* Listener::NewConnection(ProactorBase* proactor) {\n  return new Connection{protocol_, http_base_.get(), ctx_, service_};\n}\n\nerror_code Listener::ConfigureServerSocket(int fd) {\n  int val = 1;\n\n  if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) < 0) {\n    LOG(WARNING) << \"Could not set reuse addr on socket \" << SafeErrorMessage(errno);\n  }\n\n#ifdef TCP_DEFER_ACCEPT  // TCP_DEFER_ACCEPT is only for Linux, and defined by Linux OS-Kernel\n  if (GetFlag(FLAGS_enable_tcp_defer_accept)) {\n    sockaddr_storage addr;\n    socklen_t len = sizeof(addr);\n    // TCP_DEFER_ACCEPT is only applicable to TCP (IPv4/IPv6) sockets, not Unix domain sockets\n    // (UDS).\n    if (getsockname(fd, reinterpret_cast<sockaddr*>(&addr), &len) == 0 &&\n        (addr.ss_family == AF_INET || addr.ss_family == AF_INET6)) {\n      // Instruct the kernel to defer waking up accept() until actual payload data arrives,\n      // with a timeout of 1 second.\n      // This provides a kernel-level shield against \"Pure Zombie\" storms - where malicious or\n      // misconfigured clients complete the TCP 3-way handshake but never send data (or immediately\n      // send FIN/RST). The kernel will silently clean up these empty connections without\n      // consuming Dragonfly fibers or OpenSSL memory.\n      // This imposes zero latency penalty on well-behaved clients, as the kernel instantly\n      // yields the connection to user-space the moment their first byte (e.g., TLS ClientHello\n      // or RESP command) arrives.\n      static constexpr int kDeferAcceptTimeoutSec = 1;\n      if (setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &kDeferAcceptTimeoutSec,\n                     sizeof(kDeferAcceptTimeoutSec)) < 0) {\n        LOG(WARNING) << \"Could not set TCP_DEFER_ACCEPT \" << SafeErrorMessage(errno);\n      }\n    }\n  }\n#endif\n  bool success = ConfigureKeepAlive(fd);\n\n#ifdef __linux__\n  int user_timeout = absl::GetFlag(FLAGS_tcp_user_timeout);\n  if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &user_timeout, sizeof(int)) < 0) {\n    LOG(WARNING) << \"Could not set user timeout on socket \" << SafeErrorMessage(errno);\n  }\n#endif\n\n  if (!success) {\n#ifndef __APPLE__\n    int myerr = errno;\n\n    int socket_type;\n    socklen_t length = sizeof(socket_type);\n\n    // Ignore the error on UDS.\n    if (getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &socket_type, &length) != 0 ||\n        socket_type != AF_UNIX) {\n      LOG(WARNING) << \"Could not configure keep alive \" << SafeErrorMessage(myerr);\n    }\n#endif\n  }\n\n  return error_code{};\n}\n\nbool Listener::ReconfigureTLS() {\n#ifdef DFLY_USE_SSL\n  SSL_CTX* prev_ctx = ctx_;\n  const bool tls_on_privileged_port = !GetFlag(FLAGS_no_tls_on_admin_port);\n\n  if (GetFlag(FLAGS_tls) && (!IsPrivilegedInterface() || tls_on_privileged_port)) {\n    SSL_CTX* ctx = CreateSslCntx(facade::TlsContextRole::SERVER);\n    if (!ctx) {\n      return false;\n    }\n    ctx_ = ctx;\n  } else {\n    ctx_ = nullptr;\n  }\n\n  if (prev_ctx) {\n    // SSL_CTX is reference counted so if other connections have a reference\n    // to the context it won't be freed yet.\n    SSL_CTX_free(prev_ctx);\n  }\n#endif\n  return true;\n}\n\nsize_t Listener::TLSUsedMemoryThreadLocal() {\n  return listener_tl_stats.tls_allocated_bytes;\n}\n\nuint64_t Listener::RefusedConnectionMaxClientsCount() {\n  return listener_tl_stats.refused_conn_maxclients_reached_cnt;\n}\n\nvoid Listener::PreAcceptLoop(util::ProactorBase* pb) {\n}\n\nbool Listener::IsPrivilegedInterface() const {\n  return role_ == Role::PRIVILEGED;\n}\n\nbool Listener::IsMainInterface() const {\n  return role_ == Role::MAIN;\n}\n\nvoid Listener::PreShutdown() {\n  // If NOW/FORCE requested, expedite shutdown without waiting.\n  if (g_shutdown_fast.load(std::memory_order_acquire)) {\n    return;\n  }\n\n  // Otherwise: Iterate on all connections and allow them to finish their commands for\n  // a short period.\n  // Executed commands can be visible in snapshots or replicas, but if we close the client\n  // connections too fast we might not send the acknowledgment for those commands.\n  // This shouldn't take a long time: All clients should reject incoming commands\n  // at this stage since we're in SHUTDOWN mode.\n  // If a command is running for too long we give up and proceed.\n  DispatchTracker tracker{\n      {this}, nullptr, false /* paused connections */, false /* blocking connections*/};\n  tracker.TrackAll();\n\n  if (!tracker.Wait(absl::Milliseconds(10))) {\n    LOG(WARNING) << \"Some commands are still being dispatched but didn't conclude in time. \"\n                    \"Proceeding in shutdown.\";\n  }\n}\n\nvoid Listener::PostShutdown() {\n}\n\nvoid Listener::OnConnectionStart(util::Connection* conn) {\n  facade::Connection* facade_conn = static_cast<facade::Connection*>(conn);\n  VLOG(1) << \"Opening connection \" << facade_conn->GetClientId();\n\n  facade_conn->OnConnectionStart();\n}\n\nvoid Listener::OnConnectionClose(util::Connection* conn) {\n  Connection* facade_conn = static_cast<Connection*>(conn);\n  VLOG(1) << \"Closing connection \" << facade_conn->GetClientId();\n}\n\nvoid Listener::OnMaxConnectionsReached(util::FiberSocketBase* sock) {\n  listener_tl_stats.refused_conn_maxclients_reached_cnt++;\n  sock->Write(io::Buffer(\"-ERR max number of clients reached\\r\\n\"));\n}\n\n// We can limit number of threads handling dragonfly connections.\nProactorBase* Listener::PickConnectionProactor(util::FiberSocketBase* sock) {\n  util::ProactorPool* pp = pool();\n\n  uint32_t res_id = kuint32max;\n\n  if (!sock->IsUDS()) {\n    int fd = sock->native_handle();\n\n    int cpu, napi_id;\n    socklen_t len = sizeof(cpu);\n\n    // I suspect that the advantage of using SO_INCOMING_NAPI_ID is that\n    // we can also track the affinity changes during the lifetime of the process\n    // i.e. when a different CPU is assigned to handle the RX traffic.\n    // On some distributions (WSL1, for example), SO_INCOMING_CPU is not supported.\n    if (0 == getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len)) {\n      VLOG(1) << \"CPU for connection \" << fd << \" is \" << cpu;\n      // Avoid CHECKINGing success, it sometimes fail on WSL\n      // https://github.com/dragonflydb/dragonfly/issues/2090\n      if (0 == getsockopt(fd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, &len)) {\n        VLOG(1) << \"NAPI for connection \" << fd << \" is \" << napi_id;\n      }\n\n      if (GetFlag(FLAGS_conn_use_incoming_cpu)) {\n        // We choose a thread that is running on the incoming CPU. Usually there is\n        // a single thread per CPU. SO_INCOMING_CPU returns the CPU that the kernel\n        // uses to steer the packets to. In order to make\n        // conn_use_incoming_cpu effective, we should make sure that the receive packets are\n        // steered to enough CPUs. This can be done by setting the RPS mask in\n        // /sys/class/net/<dev>/queues/rx-<n>/rps_cpus. For more details, see\n        // https://docs.kernel.org/networking/scaling.html#rps-configuration\n        // Please note that if conn_use_incoming_cpu is true, connections will be handled only\n        // on the CPUs that handle the softirqs for the incoming packets.\n        // To avoid imbalance in CPU load, RPS tuning is strongly advised.\n        const vector<unsigned>& ids = pool()->MapCpuToThreads(cpu);\n        if (!ids.empty()) {\n          res_id = ids[0];\n        }\n      }\n    }\n  }\n\n  if (res_id == kuint32max) {\n    uint32_t total = GetFlag(FLAGS_conn_io_threads);\n    uint32_t start = GetFlag(FLAGS_conn_io_thread_start) % pp->size();\n\n    if (total == 0 || total + start > pp->size()) {\n      total = pp->size() - start;\n    }\n\n    res_id = start + (next_id_.fetch_add(1, std::memory_order_relaxed) % total);\n  }\n\n  return pp->at(res_id);\n}\n\nDispatchTracker::DispatchTracker(absl::Span<facade::Listener* const> listeners,\n                                 facade::Connection* issuer, bool ignore_paused,\n                                 bool ignore_blocked)\n    : listeners_{listeners.begin(), listeners.end()},\n      issuer_{issuer},\n      ignore_paused_{ignore_paused},\n      ignore_blocked_{ignore_blocked} {\n}\n\nvoid DispatchTracker::TrackOnThread() {\n  for (auto* listener : listeners_) {\n    listener->TraverseConnectionsOnThread(\n        [this](unsigned thread_index, util::Connection* conn) { Handle(thread_index, conn); },\n        UINT32_MAX, nullptr);\n  }\n}\n\nbool DispatchTracker::Wait(absl::Duration duration) {\n  bool res = bc_->WaitFor(absl::ToChronoMilliseconds(duration));\n  if (!res && ignore_blocked_) {\n    LOG(INFO) << \"Retrying DispatchTracker::Wait, as bc=\" << bc_->DEBUG_Count();\n    // We track all connections again because a connection might became blocked between the time\n    // we call tracking the last time.\n    bc_ = BlockingCounter{0};\n    TrackAll();\n    res = bc_->WaitFor(absl::ToChronoMilliseconds(duration));\n    LOG_IF(INFO, !res) << \"DispatchTracker::Wait failed again, bc=\" << bc_->DEBUG_Count();\n  }\n  return res;\n}\n\nvoid DispatchTracker::TrackAll() {\n  for (auto* listener : listeners_)\n    listener->TraverseConnections(absl::bind_front(&DispatchTracker::Handle, this));\n}\n\nvoid DispatchTracker::Handle(unsigned thread_index, util::Connection* conn) {\n  if (auto* fconn = static_cast<facade::Connection*>(conn); fconn != issuer_)\n    fconn->SendCheckpoint(bc_, ignore_paused_, ignore_blocked_);\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/dragonfly_listener.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/base/internal/spinlock.h>\n#include <absl/time/time.h>\n\n#include <atomic>\n#include <memory>\n#include <system_error>\n#include <vector>\n\n#include \"facade/facade_types.h\"\n#include \"util/fiber_socket_base.h\"\n#include \"util/fibers/proactor_base.h\"\n#include \"util/http/http_handler.h\"\n#include \"util/listener_interface.h\"\n\ntypedef struct ssl_ctx_st SSL_CTX;\n\nnamespace facade {\n\nclass ServiceInterface;\nclass Connection;\n\nclass Listener : public util::ListenerInterface {\n public:\n  // The Role PRIVILEGED is for admin port/listener\n  // The Role MAIN is for the main listener on main port\n  // The Role OTHER is for all the other listeners\n  enum class Role { PRIVILEGED, MAIN, OTHER };\n  Listener(Protocol protocol, ServiceInterface*, Role role = Role::OTHER);\n  ~Listener();\n\n  std::error_code ConfigureServerSocket(int fd) final;\n\n  // Wait until all command dispatches that are currently in progress finish,\n  // ignore commands from issuer connection.\n  bool AwaitCurrentDispatches(absl::Duration timeout, util::Connection* issuer);\n\n  // ReconfigureTLS MUST be called from the same proactor as the listener.\n  bool ReconfigureTLS();\n\n  // Returns thread-local dynamic memory usage by TLS.\n  static size_t TLSUsedMemoryThreadLocal();\n  static uint64_t RefusedConnectionMaxClientsCount();\n\n  bool IsPrivilegedInterface() const;\n  bool IsMainInterface() const;\n\n  Protocol protocol() const {\n    return protocol_;\n  }\n\n private:\n  util::Connection* NewConnection(ProactorBase* proactor) final;\n  ProactorBase* PickConnectionProactor(util::FiberSocketBase* sock) final;\n\n  void OnConnectionStart(util::Connection* conn) final;\n  void OnConnectionClose(util::Connection* conn) final;\n  void OnMaxConnectionsReached(util::FiberSocketBase* sock) final;\n  void PreAcceptLoop(ProactorBase* pb) final;\n\n  void PreShutdown() final;\n  void PostShutdown() final;\n\n  std::unique_ptr<util::HttpListenerBase> http_base_;\n\n  ServiceInterface* service_;\n\n  std::atomic_uint32_t next_id_{0};\n\n  Role role_;\n\n  uint32_t conn_cnt_{0};\n\n  Protocol protocol_;\n  SSL_CTX* ctx_ = nullptr;\n};\n\n// Dispatch tracker allows tracking the dispatch state of connections and blocking until all\n// detected busy connections finished dispatching. Ignores issuer connection.\n//\n// Mostly used to detect when global state changes (takeover, pause, cluster config update) are\n// visible to all commands and no commands are still running according to the old state / config.\nclass DispatchTracker {\n public:\n  DispatchTracker(absl::Span<facade::Listener* const>, facade::Connection* issuer,\n                  bool ignore_paused, bool ignore_blocked);\n\n  void TrackAll();       // Track busy connection on all threads\n  void TrackOnThread();  // Track busy connections on current thread\n\n  // Wait until all tracked connections finished dispatching.\n  // Returns true on success, false if timeout was reached.\n  bool Wait(absl::Duration timeout);\n\n private:\n  void Handle(unsigned thread_index, util::Connection* conn);\n\n  std::vector<facade::Listener*> listeners_;\n  facade::Connection* issuer_;\n  util::fb2::BlockingCounter bc_{0};  // tracks number of pending checkpoints\n  bool ignore_paused_;\n  bool ignore_blocked_;\n};\n\n// Global shutdown tuning flag, controlled by SHUTDOWN options.\n// When true, listeners perform expedited shutdown without waiting for\n// in-flight dispatches (used by NOW/FORCE).\nextern std::atomic<bool> g_shutdown_fast;\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/error.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <string>\n#include <string_view>\n\nnamespace facade {\n\nstd::string WrongNumArgsError(std::string_view cmd);\nstd::string ConfigSetFailed(std::string_view config_name);\nstd::string InvalidExpireTime(std::string_view cmd);\nstd::string UnknownSubCmd(std::string_view subcmd, std::string_view cmd);\n\ninline constexpr char kSyntaxErr[] = \"syntax error\";\ninline constexpr char kWrongTypeErr[] =\n    \"-WRONGTYPE Operation against a key holding the wrong kind of value\";\ninline constexpr char kWrongJsonTypeErr[] = \"-WRONGTYPE wrong JSON type of path value\";\ninline constexpr char kKeyNotFoundErr[] = \"no such key\";\ninline constexpr char kInvalidIntErr[] = \"value is not an integer or out of range\";\ninline constexpr char kInvalidFloatErr[] = \"value is not a valid float\";\ninline constexpr char kUintErr[] = \"value is out of range, must be positive\";\ninline constexpr char kIncrOverflow[] = \"increment or decrement would overflow\";\ninline constexpr char kDbIndOutOfRangeErr[] = \"DB index is out of range\";\ninline constexpr char kInvalidDbIndErr[] = \"invalid DB index\";\ninline constexpr char kScriptNotFound[] = \"-NOSCRIPT No matching script. Please use EVAL.\";\ninline constexpr char kAuthRejected[] =\n    \"-WRONGPASS invalid username-password pair or user is disabled.\";\ninline constexpr char kExpiryOutOfRange[] = \"expiry is out of range\";\ninline constexpr char kIndexOutOfRange[] = \"index out of range\";\ninline constexpr char kOutOfMemory[] = \"Out of memory\";\ninline constexpr char kInvalidNumericResult[] = \"result is not a number\";\ninline constexpr char kClusterNotConfigured[] = \"Cluster is not yet configured\";\ninline constexpr char kLoadingErr[] = \"-LOADING Dragonfly is loading the dataset in memory\";\ninline constexpr char kUndeclaredKeyErr[] = \"script tried accessing undeclared key\";\ninline constexpr char kInvalidDumpValueErr[] = \"DUMP payload version or checksum are wrong\";\ninline constexpr char kInvalidJsonPathErr[] = \"invalid JSON path\";\ninline constexpr char kJsonParseError[] = \"failed to parse JSON\";\ninline constexpr char kNanOrInfDuringIncr[] = \"increment would produce NaN or Infinity\";\ninline constexpr char kCrossSlotError[] = \"-CROSSSLOT Keys in request don't hash to the same slot\";\ninline constexpr char kTieredIoError[] = \"IO error when reading value from tiered storage\";\ninline constexpr char kInvalidHllError[] = \"Key is not a valid HyperLogLog string value\";\n\ninline constexpr char kSyntaxErrType[] = \"syntax_error\";\ninline constexpr char kScriptErrType[] = \"script_error\";\ninline constexpr char kConfigErrType[] = \"config_error\";\ninline constexpr char kSearchErrType[] = \"search_error\";\ninline constexpr char kWrongTypeErrType[] = \"wrong_type\";\ninline constexpr char kRestrictDenied[] = \"restrict_denied\";\ninline constexpr char kNoGroupErrType[] = \"no_group_error\";\ninline constexpr char kNoAuthErrType[] = \"no_auth\";\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/facade.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/strings/escaping.h>\n#include <absl/strings/str_cat.h>\n\n#include \"base/logging.h\"\n#include \"facade/command_id.h\"\n#include \"facade/error.h\"\n#include \"facade/facade_stats.h\"\n#include \"facade/parsed_command.h\"\n#include \"facade/reply_builder.h\"\n#include \"facade/resp_expr.h\"\n#include \"strings/human_readable.h\"\n\nnamespace facade {\n\nusing namespace std;\n\n#define ADD(x) (x) += o.x\n\nconstexpr size_t kSizeConnStats = sizeof(ConnectionStats);\n\nConnectionStats& ConnectionStats::operator+=(const ConnectionStats& o) {\n  static_assert(kSizeConnStats == 272);\n\n  ADD(read_buf_capacity);\n  ADD(dispatch_queue_entries);\n  ADD(dispatch_queue_bytes);\n  ADD(pipeline_queue_entries);\n  ADD(pipeline_queue_bytes);\n  ADD(dispatch_queue_subscriber_bytes);\n  ADD(pipeline_cmd_cache_bytes);\n  ADD(io_read_cnt);\n  ADD(io_read_bytes);\n  ADD(command_cnt_main);\n  ADD(command_cnt_other);\n  ADD(pipelined_cmd_cnt);\n  ADD(pipelined_cmd_latency);\n  pipelined_latency_hist.Merge(o.pipelined_latency_hist);\n  ADD(pipelined_wait_latency);\n  ADD(conn_received_cnt);\n  ADD(num_conns_main);\n  ADD(num_conns_other);\n  ADD(num_blocked_clients);\n  ADD(num_read_yields);\n  ADD(num_migrations);\n  ADD(num_recv_provided_calls);\n  ADD(pipeline_throttle_count);\n  ADD(tls_accept_disconnects);\n  ADD(handshakes_started);\n  ADD(handshakes_completed);\n  ADD(pipeline_dispatch_calls);\n  ADD(pipeline_dispatch_commands);\n  ADD(pipeline_dispatch_flush_usec);\n  ADD(skip_pipeline_flushing);\n\n  return *this;\n}\n\nReplyStats::ReplyStats(ReplyStats&& other) noexcept {\n  *this = other;\n}\n\nReplyStats& ReplyStats::operator+=(const ReplyStats& o) {\n  static_assert(sizeof(ReplyStats) == 80u + kSanitizerOverhead);\n  ADD(io_write_cnt);\n  ADD(io_write_bytes);\n\n  for (const auto& k_v : o.err_count) {\n    err_count[k_v.first] += k_v.second;\n  }\n\n  ADD(script_error_count);\n\n  send_stats += o.send_stats;\n  squashing_current_reply_size.fetch_add(o.squashing_current_reply_size.load(memory_order_relaxed),\n                                         memory_order_relaxed);\n  return *this;\n}\n\n#undef ADD\n\nReplyStats& ReplyStats::operator=(const ReplyStats& o) {\n  static_assert(sizeof(ReplyStats) == 80u + kSanitizerOverhead);\n\n  if (this == &o) {\n    return *this;\n  }\n\n  send_stats = o.send_stats;\n  io_write_cnt = o.io_write_cnt;\n  io_write_bytes = o.io_write_bytes;\n  err_count = o.err_count;\n  script_error_count = o.script_error_count;\n  squashing_current_reply_size.store(o.squashing_current_reply_size.load(memory_order_relaxed),\n                                     memory_order_relaxed);\n  return *this;\n}\n\nstring WrongNumArgsError(string_view cmd) {\n  return absl::StrCat(\"wrong number of arguments for '\", absl::AsciiStrToLower(cmd), \"' command\");\n}\n\nstring InvalidExpireTime(string_view cmd) {\n  return absl::StrCat(\"invalid expire time in '\", absl::AsciiStrToLower(cmd), \"' command\");\n}\n\nstring UnknownSubCmd(string_view subcmd, string_view cmd) {\n  return absl::StrCat(\"Unknown subcommand or wrong number of arguments for '\", subcmd, \"'. Try \",\n                      cmd, \" HELP.\");\n}\n\nstring ConfigSetFailed(string_view config_name) {\n  return absl::StrCat(\"CONFIG SET failed (possibly related to argument '\", config_name, \"').\");\n}\n\nconst char* RespExpr::TypeName(Type t) {\n  switch (t) {\n    case STRING:\n      return \"string\";\n    case INT64:\n      return \"int\";\n    case DOUBLE:\n      return \"double\";\n    case ARRAY:\n      return \"array\";\n    case NIL_ARRAY:\n      return \"nil-array\";\n    case NIL:\n      return \"nil\";\n    case ERROR:\n      return \"error\";\n  }\n  ABSL_UNREACHABLE();\n}\n\nCommandId::CommandId(const char* name, uint32_t mask, int8_t arity, int8_t first_key,\n                     int8_t last_key, uint32_t acl_categories)\n    : name_(name),\n      opt_mask_(mask),\n      arity_(arity),\n      first_key_(first_key),\n      last_key_(last_key),\n      acl_categories_(acl_categories) {\n}\n\n}  // namespace facade\n\nnamespace std {\n\nusing facade::ArgS;\n\nostream& operator<<(ostream& os, facade::CmdArgList ras) {\n  os << \"[\";\n  if (!ras.empty()) {\n    for (size_t i = 0; i < ras.size() - 1; ++i) {\n      os << absl::CHexEscape(ArgS(ras, i)) << \",\";\n    }\n    os << absl::CHexEscape(ArgS(ras, ras.size() - 1));\n  }\n  os << \"]\";\n\n  return os;\n}\n\nostream& operator<<(ostream& os, const facade::RespExpr& e) {\n  using facade::RespExpr;\n  using facade::ToSV;\n\n  switch (e.type) {\n    case RespExpr::INT64:\n      os << \"i\" << get<int64_t>(e.u);\n      break;\n    case RespExpr::DOUBLE:\n      os << \"d\" << get<double>(e.u);\n      break;\n    case RespExpr::STRING:\n      os << \"'\" << ToSV(get<RespExpr::Buffer>(e.u)) << \"'\";\n      break;\n    case RespExpr::NIL:\n      os << \"nil\";\n      break;\n    case RespExpr::NIL_ARRAY:\n      os << \"[]\";\n      break;\n    case RespExpr::ARRAY:\n      os << facade::RespSpan{*get<RespExpr::Vec*>(e.u)};\n      break;\n    case RespExpr::ERROR:\n      os << \"e(\" << ToSV(get<RespExpr::Buffer>(e.u)) << \")\";\n      break;\n  }\n\n  return os;\n}\n\nostream& operator<<(ostream& os, facade::RespSpan ras) {\n  os << \"[\";\n  if (!ras.empty()) {\n    for (size_t i = 0; i < ras.size() - 1; ++i) {\n      os << ras[i] << \",\";\n    }\n    os << ras.back();\n  }\n  os << \"]\";\n\n  return os;\n}\n\nostream& operator<<(ostream& os, facade::Protocol p) {\n  switch (p) {\n    case facade::Protocol::REDIS:\n      os << \"REDIS\";\n      break;\n    case facade::Protocol::MEMCACHE:\n      os << \"MEMCACHE\";\n      break;\n  }\n\n  return os;\n}\n\n}  // namespace std\n"
  },
  {
    "path": "src/facade/facade_stats.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <atomic>\n#include <cstdint>\n\n#include \"base/histogram.h\"\nnamespace facade {\n\nstruct ConnectionStats {\n  size_t read_buf_capacity = 0;  // total capacity of input buffers\n  // Count of pending messages in dispatch queue\n  uint64_t dispatch_queue_entries = 0;\n  // Memory used by pending messages in dispatch queue\n  size_t dispatch_queue_bytes = 0;\n  // Count of pending parsed commands in the pipeline queue (Data Path)\n  uint64_t pipeline_queue_entries = 0;\n  // Memory used by pending parsed commands in the pipeline queue (Data Path)\n  size_t pipeline_queue_bytes = 0;\n  // total size of all publish messages (subset of dispatch_queue_bytes)\n  size_t dispatch_queue_subscriber_bytes = 0;\n\n  size_t pipeline_cmd_cache_bytes = 0;\n\n  uint64_t io_read_cnt = 0;\n  size_t io_read_bytes = 0;\n\n  uint64_t command_cnt_main = 0;\n  uint64_t command_cnt_other = 0;\n  uint64_t pipelined_cmd_cnt = 0;\n  uint64_t pipelined_cmd_latency = 0;      // in microseconds\n  base::Histogram pipelined_latency_hist;  // distribution of per-command latencies (usec)\n\n  // in microseconds, time spent waiting for the pipelined commands to start executing\n  uint64_t pipelined_wait_latency = 0;\n  uint64_t conn_received_cnt = 0;\n\n  uint32_t num_conns_main = 0;\n  uint32_t num_conns_other = 0;\n  uint32_t num_blocked_clients = 0;\n\n  // number of times the connection yielded due to max_busy_read_usec limit\n  uint32_t num_read_yields = 0;\n  uint64_t num_migrations = 0;\n  uint64_t num_recv_provided_calls = 0;\n\n  // Number of times the tls connection was closed by the time we started reading from it.\n  uint64_t tls_accept_disconnects = 0;  // number of TLS socket disconnects during the handshake\n                                        //\n  uint64_t handshakes_started = 0;\n  uint64_t handshakes_completed = 0;\n\n  // Number of events when the pipeline queue was over the limit and was throttled.\n  uint64_t pipeline_throttle_count = 0;\n  uint64_t pipeline_dispatch_calls = 0;\n  uint64_t pipeline_dispatch_commands = 0;\n  uint64_t pipeline_dispatch_flush_usec = 0;\n\n  uint64_t skip_pipeline_flushing = 0;  // number of times we skipped flushing the pipeline\n\n  ConnectionStats& operator+=(const ConnectionStats& o);\n};\n\nstruct ReplyStats {\n  struct SendStats {\n    int64_t count = 0;\n    int64_t total_duration = 0;\n\n    SendStats& operator+=(const SendStats& other) {\n      static_assert(sizeof(SendStats) == 16u);\n\n      count += other.count;\n      total_duration += other.total_duration;\n      return *this;\n    }\n  };\n\n  // Send() operations that are written to sockets\n  SendStats send_stats;\n\n  size_t io_write_cnt = 0;\n  size_t io_write_bytes = 0;\n  absl::flat_hash_map<std::string, uint64_t> err_count;\n  size_t script_error_count = 0;\n\n  // This variable can be updated directly from shard threads when they allocate memory for replies.\n  std::atomic<size_t> squashing_current_reply_size{0};\n\n  ReplyStats() = default;\n  ReplyStats(ReplyStats&& other) noexcept;\n  ReplyStats& operator+=(const ReplyStats& other);\n  ReplyStats& operator=(const ReplyStats& other);\n};\n\nstruct FacadeStats {\n  ConnectionStats conn_stats;\n  ReplyStats reply_stats;\n\n  FacadeStats& operator+=(const FacadeStats& other) {\n    conn_stats += other.conn_stats;\n    reply_stats += other.reply_stats;\n    return *this;\n  }\n};\n\ninline thread_local FacadeStats* tl_facade_stats = nullptr;\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/facade_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/facade_test.h\"\n\n#include <absl/strings/match.h>\n#include <absl/strings/numbers.h>\n\n#include \"base/logging.h\"\n\nnamespace facade {\n\nusing namespace testing;\nusing namespace std;\n\nbool RespMatcher::MatchAndExplain(RespExpr e, MatchResultListener* listener) const {\n  if (e.type != type_) {\n    if (e.type == RespExpr::STRING && type_ == RespExpr::DOUBLE) {\n      // Doubles are encoded as strings, unless RESP3 is selected. So parse string and try to\n      // compare it.\n      double d = 0;\n      if (!absl::SimpleAtod(e.GetString(), &d)) {\n        *listener << \"\\nCan't parse as double: \" << e.GetString();\n        return false;\n      }\n      e.type = RespExpr::DOUBLE;\n      e.u = d;\n    } else {\n      *listener << \"\\nWrong type: \" << RespExpr::TypeName(e.type);\n      return false;\n    }\n  }\n\n  if (type_ == RespExpr::STRING || type_ == RespExpr::ERROR) {\n    RespExpr::Buffer ebuf = e.GetBuf();\n    std::string_view actual{reinterpret_cast<const char*>(ebuf.data()), ebuf.size()};\n\n    if (type_ == RespExpr::ERROR && !absl::StrContains(actual, exp_str_)) {\n      *listener << \"Actual does not contain '\" << exp_str_ << \"'\";\n      return false;\n    }\n    if (type_ == RespExpr::STRING && exp_str_ != actual) {\n      *listener << \"\\nActual string: \" << actual;\n      return false;\n    }\n  } else if (type_ == RespExpr::INT64) {\n    auto actual = get<int64_t>(e.u);\n    if (exp_int_ != actual) {\n      *listener << \"\\nActual : \" << actual << \" expected: \" << exp_int_;\n      return false;\n    }\n  } else if (type_ == RespExpr::DOUBLE) {\n    auto actual = get<double>(e.u);\n    if (abs(exp_double_ - actual) > 0.0001) {\n      *listener << \"\\nActual : \" << actual << \" expected: \" << exp_double_;\n      return false;\n    }\n  } else if (type_ == RespExpr::ARRAY) {\n    size_t len = get<RespVec*>(e.u)->size();\n    if (len != size_t(exp_int_)) {\n      *listener << \"Actual length \" << len << \", expected: \" << exp_int_;\n      return false;\n    }\n  }\n\n  return true;\n}\n\nvoid RespMatcher::DescribeTo(std::ostream* os) const {\n  *os << \"is \";\n  switch (type_) {\n    case RespExpr::STRING:\n    case RespExpr::ERROR:\n      *os << exp_str_;\n      break;\n\n    case RespExpr::INT64:\n      *os << exp_str_;\n      break;\n    case RespExpr::ARRAY:\n      *os << \"array of length \" << exp_int_;\n      break;\n    case RespExpr::DOUBLE:\n      *os << exp_double_;\n      break;\n    default:\n      *os << \"TBD\";\n      break;\n  }\n}\n\nvoid RespMatcher::DescribeNegationTo(std::ostream* os) const {\n  *os << \"is not \";\n}\n\nbool RespTypeMatcher::MatchAndExplain(const RespExpr& e, MatchResultListener* listener) const {\n  if (e.type != type_) {\n    *listener << \"\\nWrong type: \" << RespExpr::TypeName(e.type);\n    return false;\n  }\n\n  return true;\n}\n\nvoid RespTypeMatcher::DescribeTo(std::ostream* os) const {\n  *os << \"is \" << RespExpr::TypeName(type_);\n}\n\nvoid RespTypeMatcher::DescribeNegationTo(std::ostream* os) const {\n  *os << \"is not \" << RespExpr::TypeName(type_);\n}\n\nvoid PrintTo(const RespExpr::Vec& vec, std::ostream* os) {\n  *os << \"Vec: [\";\n  if (!vec.empty()) {\n    for (size_t i = 0; i < vec.size() - 1; ++i) {\n      *os << vec[i] << \",\";\n    }\n    *os << vec.back();\n  }\n  *os << \"]\\n\";\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/facade_test.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <gmock/gmock.h>\n\n#include <ostream>\n#include <string>\n#include <string_view>\n\n#include \"facade/resp_expr.h\"\n\nnamespace facade {\n\nclass RespMatcher {\n public:\n  RespMatcher(std::string_view val, RespExpr::Type t = RespExpr::STRING) : type_(t), exp_str_(val) {\n  }\n\n  RespMatcher(int64_t val, RespExpr::Type t = RespExpr::INT64) : type_(t), exp_int_(val) {\n  }\n\n  RespMatcher(double_t val, RespExpr::Type t = RespExpr::DOUBLE) : type_(t), exp_double_(val) {\n  }\n  using is_gtest_matcher = void;\n\n  bool MatchAndExplain(RespExpr e, testing::MatchResultListener*) const;\n\n  void DescribeTo(std::ostream* os) const;\n\n  void DescribeNegationTo(std::ostream* os) const;\n\n private:\n  RespExpr::Type type_;\n\n  std::string exp_str_;\n  int64_t exp_int_ = 0;\n  double_t exp_double_ = 0;\n};\n\nclass RespTypeMatcher {\n public:\n  RespTypeMatcher(RespExpr::Type type) : type_(type) {\n  }\n\n  using is_gtest_matcher = void;\n\n  bool MatchAndExplain(const RespExpr& e, testing::MatchResultListener*) const;\n\n  void DescribeTo(std::ostream* os) const;\n\n  void DescribeNegationTo(std::ostream* os) const;\n\n private:\n  RespExpr::Type type_;\n};\n\ninline ::testing::PolymorphicMatcher<RespMatcher> ErrArg(std::string_view str) {\n  return ::testing::MakePolymorphicMatcher(RespMatcher(str, RespExpr::ERROR));\n}\n\ninline ::testing::PolymorphicMatcher<RespMatcher> IntArg(int64_t ival) {\n  return ::testing::MakePolymorphicMatcher(RespMatcher(ival));\n}\n\ninline ::testing::PolymorphicMatcher<RespMatcher> DoubleArg(double_t dval) {\n  return ::testing::MakePolymorphicMatcher(RespMatcher(dval));\n}\n\ninline ::testing::PolymorphicMatcher<RespMatcher> ArrLen(size_t len) {\n  return ::testing::MakePolymorphicMatcher(RespMatcher((int64_t)len, RespExpr::ARRAY));\n}\n\ninline ::testing::PolymorphicMatcher<RespTypeMatcher> ArgType(RespExpr::Type t) {\n  return ::testing::MakePolymorphicMatcher(RespTypeMatcher(t));\n}\n\nMATCHER_P(RespArray, value, \"\") {\n  return ExplainMatchResult(\n      testing::AllOf(ArgType(RespExpr::ARRAY), testing::Property(&RespExpr::GetVec, value)), arg,\n      result_listener);\n}\n\ntemplate <typename... Args> auto RespElementsAre(const Args&... matchers) {\n  return RespArray(::testing::ElementsAre(matchers...));\n}\n\ninline bool operator==(const RespExpr& left, std::string_view s) {\n  return left.type == RespExpr::STRING && ToSV(left.GetBuf()) == s;\n}\n\ninline bool operator==(const RespExpr& left, int64_t val) {\n  return left.type == RespExpr::INT64 && left.GetInt() == val;\n}\n\ninline bool operator!=(const RespExpr& left, std::string_view s) {\n  return !(left == s);\n}\n\ninline bool operator==(std::string_view s, const RespExpr& right) {\n  return right == s;\n}\n\ninline bool operator!=(std::string_view s, const RespExpr& right) {\n  return !(right == s);\n}\n\nvoid PrintTo(const RespExpr::Vec& vec, std::ostream* os);\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/facade_types.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <optional>\n#include <string>\n#include <string_view>\n#include <variant>\n\n#include \"common/arg_range.h\"\n#include \"common/backed_args.h\"\n#include \"facade/op_status.h\"\n\nnamespace facade {\n\n#if defined(__clang__)\n#if defined(__has_feature)\n#if __has_feature(address_sanitizer)\nconstexpr size_t kSanitizerOverhead = 24u;\n#else\nconstexpr size_t kSanitizerOverhead = 0u;\n#endif\n#endif\n#else\n#ifdef __SANITIZE_ADDRESS__\nconstexpr size_t kSanitizerOverhead = 24u;\n#else\nconstexpr size_t kSanitizerOverhead = 0u;\n#endif\n#endif\n\nenum class Protocol : uint8_t { MEMCACHE = 1, REDIS = 2 };\nenum class CollectionType : uint8_t { ARRAY, SET, MAP, PUSH };\n\nusing MutableSlice = std::string_view;\nusing CmdArgVec = std::vector<std::string_view>;\nusing cmn::ArgSlice;\nusing CmdArgList = cmn::ArgSlice;\nusing cmn::ArgRange;\n\nclass ParsedArgs {\n public:\n  ParsedArgs() = default;\n\n  // References backed arguments. The object must outlive this ParsedArgs.\n  ParsedArgs(const cmn::BackedArguments& bargs)  // NOLINT google-explicit-constructor\n      : args_(&bargs) {\n  }\n\n  ParsedArgs(ArgSlice slice)  // NOLINT google-explicit-constructor\n      : args_(slice) {\n  }\n\n  ParsedArgs(const ParsedArgs& other) = default;\n  ParsedArgs& operator=(const ParsedArgs& bargs) = default;\n\n  size_t size() const {\n    return std::visit([](const auto& args) { return args.size(); }, args_);\n  }\n\n  bool empty() const {\n    return size() == 0;\n  }\n\n  ParsedArgs Tail() const {\n    return std::visit([](const auto& args) { return args.Tail(); }, args_);\n  }\n\n  std::string_view Front() const {\n    return std::visit([](const auto& args) { return args.front(); }, args_);\n  }\n\n  ArgSlice ToSlice(CmdArgVec* scratch) const {\n    return std::visit([scratch](const auto& args) { return args.ToSlice(scratch); }, args_);\n  }\n\n  void ToVec(CmdArgVec* vec) const {\n    std::visit([vec](const auto& args) { return args.ToVec(vec); }, args_);\n  }\n\n private:\n  struct WrapperBacked {\n    WrapperBacked(const cmn::BackedArguments* args) : args_(args) {  // NOLINT\n    }\n\n    const cmn::BackedArguments* args_;\n    uint32_t index_ = 0;\n\n    ParsedArgs Tail() const {\n      ParsedArgs res(*args_);\n      WrapperBacked* wb = std::get_if<WrapperBacked>(&res.args_);\n      wb->index_ = index_ + 1;\n      return res;\n    };\n\n    size_t size() const {\n      return args_->size() - index_;\n    }\n\n    std::string_view front() const {\n      return args_->at(index_);\n    }\n\n    ArgSlice ToSlice(CmdArgVec* scratch) const {\n      ToVec(scratch);\n      return *scratch;\n    }\n\n    void ToVec(CmdArgVec* vec) const {\n      vec->assign(args_->begin() + index_, args_->end());\n    }\n  };\n\n  struct Slice : public ArgSlice {\n    using ArgSlice::ArgSlice;\n    Slice(ArgSlice other) : ArgSlice(other) {  // NOLINT\n    }\n\n    ParsedArgs Tail() const {\n      return ParsedArgs{subspan(1)};\n    }\n\n    ArgSlice ToSlice(void* /*scratch*/) const {\n      return *this;\n    }\n\n    void ToVec(CmdArgVec* vec) const {\n      vec->assign(begin(), end());\n    }\n  };\n  std::variant<Slice, WrapperBacked> args_;\n};\n\ninline std::string_view ToSV(std::string_view slice) {\n  return slice;\n}\n\ninline std::string_view ToSV(const std::string& slice) {\n  return slice;\n}\n\ninline std::string_view ToSV(std::string&& slice) = delete;\n\ninline std::string_view ArgS(ArgSlice args, size_t i) {\n  return args[i];\n}\n\nstruct ErrorReply {\n  explicit ErrorReply(std::string&& msg, std::string_view kind = {})\n      : message{std::move(msg)}, kind{kind} {\n  }\n  explicit ErrorReply(std::string_view msg, std::string_view kind = {}) : message{msg}, kind{kind} {\n  }\n  explicit ErrorReply(const char* msg,\n                      std::string_view kind = {})  // to resolve ambiguity of constructors above\n      : message{std::string_view{msg}}, kind{kind} {\n  }\n\n  ErrorReply(OpStatus status)  // NOLINT google-explicit-constructor)\n      : status{status} {\n  }\n\n  std::string_view ToSv() const {\n    return std::visit(cmn::kToSV, message);\n  }\n\n  std::variant<std::string, std::string_view> message;\n  std::string_view kind;\n  std::optional<OpStatus> status{std::nullopt};\n};\n\nstruct MemcacheCmdFlags {\n  MemcacheCmdFlags() : raw(0) {\n  }\n\n  union {\n    uint16_t raw = 0;\n    struct {\n      uint16_t no_reply : 1;  // q\n      uint16_t meta : 1;\n\n      // meta flags\n      uint16_t base64 : 1;              // b\n      uint16_t return_flags : 1;        // f\n      uint16_t return_value : 1;        // v\n      uint16_t return_ttl : 1;          // t\n      uint16_t return_access_time : 1;  // l\n      uint16_t return_hit : 1;          // h\n      uint16_t return_cas : 1;          // c\n    };\n  };\n};\n\nstatic_assert(sizeof(MemcacheCmdFlags) == 2);\n\nconstexpr unsigned long long operator\"\"_MB(unsigned long long x) {\n  return 1024L * 1024L * x;\n}\n\nconstexpr unsigned long long operator\"\"_KB(unsigned long long x) {\n  return 1024L * x;\n}\n\nvoid ResetStats();\n\n// Constants for socket bufring.\nconstexpr uint16_t kRecvSockGid = 0;\n\n// Size of the buffer in bufring (kRecvSockGid).\nconstexpr size_t kRecvBufSize = 1500;\n\n}  // namespace facade\n\nnamespace std {\nostream& operator<<(ostream& os, cmn::ArgSlice args);\nostream& operator<<(ostream& os, facade::Protocol protocol);\n\n}  // namespace std\n"
  },
  {
    "path": "src/facade/memcache_parser.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"facade/memcache_parser.h\"\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/inlined_vector.h>\n#include <absl/strings/ascii.h>\n#include <absl/strings/escaping.h>\n#include <absl/strings/numbers.h>\n#include <absl/strings/str_split.h>\n#include <absl/types/span.h>\n\n#include \"base/logging.h\"\n#include \"base/stl_util.h\"\n#include \"facade/facade_types.h\"\n\nnamespace facade {\nusing namespace std;\nusing MP = MemcacheParser;\n\nnamespace {\n\nint64_t ToAbsolute(uint32_t ts, uint64_t now) {\n  // if expire_ts is greater than month it's a unix timestamp\n  // https://github.com/memcached/memcached/blob/master/doc/protocol.txt#L139\n  constexpr uint32_t kExpireLimit = 60 * 60 * 24 * 30;\n  int64_t expire_ts = ts && ts <= kExpireLimit ? ts + now : ts;\n  return expire_ts;\n}\n\nMP::CmdType From(string_view token) {\n  static absl::flat_hash_map<string_view, MP::CmdType> cmd_map{\n      {\"set\", MP::SET},       {\"add\", MP::ADD},         {\"replace\", MP::REPLACE},\n      {\"append\", MP::APPEND}, {\"prepend\", MP::PREPEND}, {\"cas\", MP::CAS},\n      {\"get\", MP::GET},       {\"gets\", MP::GETS},       {\"gat\", MP::GAT},\n      {\"gats\", MP::GATS},     {\"stats\", MP::STATS},     {\"incr\", MP::INCR},\n      {\"decr\", MP::DECR},     {\"delete\", MP::DELETE},   {\"flush_all\", MP::FLUSHALL},\n      {\"quit\", MP::QUIT},     {\"version\", MP::VERSION},\n  };\n\n  if (token.size() == 2) {\n    // META_COMMANDS\n    if (token[0] != 'm')\n      return MP::INVALID;\n    switch (token[1]) {\n      case 's':\n        return MP::META_SET;\n      case 'g':\n        return MP::META_GET;\n      case 'd':\n        return MP::META_DEL;\n      case 'a':\n        return MP::META_ARITHM;\n      case 'n':\n        return MP::META_NOOP;\n      case 'e':\n        return MP::META_DEBUG;\n    }\n    return MP::INVALID;\n  }\n\n  if (token.size() > 2) {\n    auto it = cmd_map.find(token);\n    if (it == cmd_map.end())\n      return MP::INVALID;\n    return it->second;\n  }\n  return MP::INVALID;\n}\n\nMP::Result ParseStore(ArgSlice tokens, int64_t now, MP::Command* res, uint32_t max_value_len) {\n  DCHECK_EQ(res->size(), 0u);\n\n  const size_t num_tokens = tokens.size();\n  unsigned opt_pos = 4;\n  if (res->type == MP::CAS) {\n    if (num_tokens <= opt_pos)\n      return MP::PARSE_ERROR;\n    ++opt_pos;\n  }\n\n  // tokens[0] is key\n  uint32_t bytes_len = 0;\n  uint32_t flags;\n  uint32_t expire_ts;\n  if (!absl::SimpleAtoi(tokens[1], &flags) || !absl::SimpleAtoi(tokens[2], &expire_ts) ||\n      !absl::SimpleAtoi(tokens[3], &bytes_len))\n    return MP::BAD_INT;\n\n  if (bytes_len > max_value_len) {\n    LOG_EVERY_T(WARNING, 1) << \"Memcache value size \" << bytes_len << \" exceeds max_bulk_len \"\n                            << max_value_len;\n    return MP::PARSE_ERROR;\n  }\n\n  res->expire_ts = ToAbsolute(expire_ts, now);\n\n  if (res->type == MP::CAS && !absl::SimpleAtoi(tokens[4], &res->cas_unique)) {\n    return MP::BAD_INT;\n  }\n\n  res->flags = flags;\n  if (num_tokens == opt_pos + 1) {\n    if (tokens[opt_pos] == \"noreply\") {\n      res->cmd_flags.no_reply = true;\n    } else {\n      return MP::PARSE_ERROR;\n    }\n  } else if (num_tokens > opt_pos + 1) {\n    return MP::PARSE_ERROR;\n  }\n\n  string_view key = tokens[0];\n  res->backed_args->PushArg(key);\n  res->backed_args->PushArg(bytes_len);\n\n  return MP::OK;\n}\n\nMP::Result ParseValueless(ArgSlice tokens, int64_t now, MP::Command* res) {\n  const size_t num_tokens = tokens.size();\n  size_t key_pos = 0;\n  uint32_t expire_ts;\n  if (res->type == MP::GAT || res->type == MP::GATS) {\n    if (!absl::SimpleAtoi(tokens[0], &expire_ts)) {\n      return MP::BAD_INT;\n    }\n    res->expire_ts = ToAbsolute(expire_ts, now);\n    ++key_pos;\n  }\n\n  // We support only `flushall` or `flushall 0`\n  if (key_pos < num_tokens && res->type == MP::FLUSHALL) {\n    DCHECK_EQ(res->size(), 0u);\n\n    int delay = 0;\n    if (key_pos + 1 == num_tokens && absl::SimpleAtoi(tokens[key_pos], &delay) && delay == 0)\n      return MP::OK;\n    return MP::PARSE_ERROR;\n  }\n\n  if (key_pos >= num_tokens)\n    return MP::PARSE_ERROR;\n\n  res->cmd_flags.return_cas = (res->type == MP::GETS || res->type == MP::GATS);\n  res->cmd_flags.return_value = true;\n  res->cmd_flags.return_flags = true;\n\n  res->backed_args->PushArg(tokens[key_pos++]);\n\n  if (key_pos < num_tokens && res->type == MP::STATS)\n    return MP::PARSE_ERROR;  // we don't support additional arguments to stats for now\n\n  if (res->type == MP::INCR || res->type == MP::DECR) {\n    if (key_pos == num_tokens)\n      return MP::PARSE_ERROR;\n\n    if (!absl::SimpleAtoi(tokens[key_pos], &res->delta))\n      return MP::BAD_DELTA;\n    ++key_pos;\n  }\n\n  while (key_pos < num_tokens) {\n    res->backed_args->PushArg(tokens[key_pos++]);\n  }\n\n  if (res->type >= MP::DELETE) {  // write commands\n    if (res->size() > 1 && res->backed_args->back() == \"noreply\") {\n      res->cmd_flags.no_reply = true;\n      res->backed_args->PopArg();\n    }\n  }\n\n  return MP::OK;\n}\n\nbool ParseMetaMode(char m, MP::Command* res) {\n  if (res->type == MP::SET) {\n    switch (m) {\n      case 'E':\n        res->type = MP::ADD;\n        break;\n      case 'A':\n        res->type = MP::APPEND;\n        break;\n      case 'R':\n        res->type = MP::REPLACE;\n        break;\n      case 'P':\n        res->type = MP::PREPEND;\n        break;\n      case 'S':\n        break;\n      default:\n        return false;\n    }\n    return true;\n  }\n\n  if (res->type == MP::INCR) {\n    switch (m) {\n      case 'I':\n      case '+':\n        break;\n      case 'D':\n      case '-':\n        res->type = MP::DECR;\n        break;\n      default:\n        return false;\n    }\n    return true;\n  }\n  return false;\n}\n\n// See https://raw.githubusercontent.com/memcached/memcached/refs/heads/master/doc/protocol.txt\nMP::Result ParseMeta(ArgSlice tokens, int64_t now, MP::Command* res, uint32_t max_value_len) {\n  DCHECK(!tokens.empty());\n\n  if (res->type == MP::META_DEBUG) {\n    LOG(ERROR) << \"meta debug not yet implemented\";\n    return MP::PARSE_ERROR;\n  }\n\n  if (tokens[0].size() > 250)\n    return MP::PARSE_ERROR;\n\n  res->cmd_flags.meta = true;\n  res->flags = 0;\n  res->expire_ts = 0;\n\n  string_view arg0 = tokens[0];\n  tokens.remove_prefix(1);\n  uint32_t bytes_len = 0;\n\n  // We emulate the behavior by returning the high level commands.\n  // TODO: we should reverse the interface in the future, so that a high level command\n  // will be represented in MemcacheParser::Command by a meta command with flags.\n  // high level commands should not be part of the interface in the future.\n  switch (res->type) {\n    case MP::META_GET:\n      res->type = MP::GET;\n      break;\n    case MP::META_DEL:\n      res->type = MP::DELETE;\n      break;\n    case MP::META_SET:\n      if (tokens.empty())\n        return MP::PARSE_ERROR;\n      if (!absl::SimpleAtoi(tokens[0], &bytes_len))\n        return MP::BAD_INT;\n      if (bytes_len > max_value_len) {\n        LOG_EVERY_T(WARNING, 1) << \"Memcache value size \" << bytes_len << \" exceeds max_bulk_len \"\n                                << max_value_len;\n        return MP::PARSE_ERROR;\n      }\n\n      res->type = MP::SET;\n      tokens.remove_prefix(1);\n      break;\n    case MP::META_ARITHM:\n      res->type = MP::INCR;\n      res->delta = 1;\n      break;\n    default:\n      return MP::PARSE_ERROR;\n  }\n\n  string blob;\n  uint32_t expire_ts;\n  for (size_t i = 0; i < tokens.size(); ++i) {\n    string_view token = tokens[i];\n\n    switch (token[0]) {\n      case 'T':\n        if (!absl::SimpleAtoi(token.substr(1), &expire_ts))\n          return MP::BAD_INT;\n        res->expire_ts = ToAbsolute(expire_ts, now);\n        if (res->type == MP::GET)\n          res->type = MP::GAT;\n        break;\n      case 'b':\n        if (token.size() != 1)\n          return MP::PARSE_ERROR;\n        if (!absl::Base64Unescape(arg0, &blob))\n          return MP::PARSE_ERROR;\n        arg0 = blob;\n        res->cmd_flags.base64 = true;\n        break;\n      case 'F':\n        if (!absl::SimpleAtoi(token.substr(1), &res->flags))\n          return MP::BAD_INT;\n        break;\n      case 'M':\n        if (token.size() != 2 || !ParseMetaMode(token[1], res))\n          return MP::PARSE_ERROR;\n        break;\n      case 'D':\n        if (!absl::SimpleAtoi(token.substr(1), &res->delta))\n          return MP::BAD_INT;\n        break;\n      case 'q':\n        res->cmd_flags.no_reply = true;\n        break;\n      case 'f':\n        res->cmd_flags.return_flags = true;\n        break;\n      case 'v':\n        res->cmd_flags.return_value = true;\n        break;\n      case 't':\n        res->cmd_flags.return_ttl = true;\n        break;\n      case 'l':\n        res->cmd_flags.return_access_time = true;\n        break;\n      case 'h':\n        res->cmd_flags.return_hit = true;\n        break;\n      case 'c':\n        res->cmd_flags.return_cas = true;\n        break;\n      default:\n        LOG(WARNING) << \"unknown meta flag: \" << token;  // not yet implemented\n        return MP::PARSE_ERROR;\n    }\n  }\n  res->backed_args->PushArg(arg0);\n  if (MP::IsStoreCmd(res->type)) {\n    res->backed_args->PushArg(bytes_len);\n  }\n  return MP::OK;\n}\n\n}  // namespace\n\nauto MP::Parse(string_view str, uint32_t* consumed, Command* cmd) -> Result {\n  DVLOG(1) << \"Parsing memcache input: [\" << str << \"]\";\n\n  *consumed = 0;\n\n  if (val_len_to_read_ > 0) {\n    return ConsumeValue(str, consumed, cmd);\n  }\n\n  cmd->cmd_flags.raw = 0;  // re-initialize\n\n  size_t pos = str.find('\\n');\n  if (pos == string_view::npos) {\n    // We need more data to parse the command. For get/gets commands this line can be very long.\n    // we limit maximum buffer capacity in the higher levels using max_client_iobuf_len.\n    tmp_buf_.append(str);\n    *consumed = str.size();\n    return INPUT_PENDING;\n  }\n\n  *consumed = pos + 1;\n  string_view main_cmd;\n\n  if (tmp_buf_.empty()) {\n    main_cmd = str.substr(0, pos);\n  } else {\n    tmp_buf_.append(str.substr(0, pos));\n    main_cmd = tmp_buf_;\n  }\n\n  // main_cmd is \\n stripped, so it should end with \\r.\n  if (main_cmd.empty() || main_cmd.back() != '\\r') {\n    return PARSE_ERROR;\n  }\n  main_cmd.remove_suffix(1);  // remove trailing \\r\n\n  // cas <key> <flags> <exptime> <bytes> <cas unique> [noreply]\\r\\n\n  // get <key>*\\r\\n\n  // ms <key> <datalen> <flags>*\\r\\n\n  absl::InlinedVector<string_view, 32> tokens =\n      absl::StrSplit(main_cmd, ' ', absl::SkipWhitespace());\n\n  Result res = ParseInternal(absl::MakeSpan(tokens), cmd);\n  tmp_buf_.clear();\n  if (val_len_to_read_ > 0)\n    return ConsumeValue(str.substr(pos + 1), consumed, cmd);\n  return res;\n};\n\nauto MP::ParseInternal(ArgSlice tokens_view, Command* cmd) -> Result {\n  if (tokens_view.empty())\n    return PARSE_ERROR;\n\n  cmd->type = From(tokens_view[0]);\n  if (cmd->type == INVALID) {\n    return UNKNOWN_CMD;\n  }\n\n  tokens_view.remove_prefix(1);\n  cmd->backed_args->clear();\n\n  if (cmd->type <= CAS) {                                         // Store command\n    if (tokens_view.size() < 4 || tokens_view[0].size() > 250) {  // key length limit\n      return MP::PARSE_ERROR;\n    }\n\n    auto res = ParseStore(tokens_view, last_unix_time_, cmd, max_value_len_);\n    if (res != MP::OK)\n      return res;\n    val_len_to_read_ = cmd->value().size() + 2;\n    return MP::OK;\n  }\n\n  if (cmd->type >= META_SET) {\n    if (tokens_view.empty())\n      return MP::PARSE_ERROR;\n\n    auto res = ParseMeta(tokens_view, last_unix_time_, cmd, max_value_len_);\n    if (res != MP::OK)\n      return res;\n\n    if (IsStoreCmd(cmd->type)) {\n      val_len_to_read_ = cmd->value().size() + 2;\n      res = MP::OK;\n    }\n    return res;\n  }\n\n  if (tokens_view.empty()) {\n    if (base::_in(cmd->type, {MP::STATS, MP::FLUSHALL, MP::QUIT, MP::VERSION, MP::META_NOOP})) {\n      return MP::OK;\n    }\n    return MP::PARSE_ERROR;\n  }\n\n  return ParseValueless(tokens_view, last_unix_time_, cmd);\n}\n\nauto MP::ConsumeValue(std::string_view str, uint32_t* consumed, Command* dest) -> Result {\n  DCHECK_EQ(dest->size(), 2u);  // key and value\n  DCHECK_GT(val_len_to_read_, 0u);\n\n  if (val_len_to_read_ > 2) {\n    uint32_t need_copy = val_len_to_read_ - 2;\n    uint32_t dest_len = dest->backed_args->elem_len(1);\n    DCHECK_GE(dest_len, need_copy);  // should be ensured during parsing\n\n    char* start = dest->value_ptr() + (dest_len - need_copy);\n    uint32_t to_fill = std::min<uint32_t>(need_copy, str.size());\n    if (to_fill) {\n      memcpy(start, str.data(), to_fill);\n      val_len_to_read_ -= to_fill;\n      *consumed += to_fill;\n      str.remove_prefix(to_fill);\n    }\n  }\n\n  if (str.empty()) {\n    return MP::INPUT_PENDING;\n  }\n\n  DCHECK(val_len_to_read_ <= 2u && val_len_to_read_ > 0);\n  // consume \\r\\n\n  char end[] = \"\\r\\n\";\n\n  do {\n    if (str.front() != end[2 - val_len_to_read_])  // val_len_to_read_ 2 -> '\\r', 1 -> '\\n'\n      return MP::PARSE_ERROR;\n\n    ++(*consumed);\n    --val_len_to_read_;\n    str.remove_prefix(1);\n  } while (val_len_to_read_ && !str.empty());\n\n  return val_len_to_read_ > 0 ? MP::INPUT_PENDING : MP::OK;\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/memcache_parser.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <string>\n#include <string_view>\n#include <vector>\n\n#include \"common/backed_args.h\"\n#include \"facade/facade_types.h\"\n\nnamespace facade {\n\n// Memcache parser does not parse value blobs, only the commands.\n// The expectation is that the caller will parse the command and\n// then will follow up with reading the blob data directly from source.\nclass MemcacheParser {\n public:\n  explicit MemcacheParser(uint32_t max_value_len = UINT32_MAX) : max_value_len_(max_value_len) {\n  }\n\n  enum CmdType : uint8_t {\n    INVALID = 0,\n    SET = 1,\n    ADD = 2,\n    REPLACE = 3,\n    APPEND = 4,\n    PREPEND = 5,\n    CAS = 6,\n\n    // Retrieval\n    GET = 10,\n    GETS = 11,\n    GAT = 12,\n    GATS = 13,\n    STATS = 14,\n\n    QUIT = 20,\n    VERSION = 21,\n\n    // The rest of write commands.\n    DELETE = 31,\n    INCR = 32,\n    DECR = 33,\n    FLUSHALL = 34,\n\n    // META_COMMANDS\n    META_NOOP = 50,\n    META_SET = 51,\n    META_DEL = 52,\n    META_ARITHM = 53,\n    META_GET = 54,\n    META_DEBUG = 55,\n  };\n\n  // According to https://github.com/memcached/memcached/wiki/Commands#standard-protocol\n  struct Command {\n    Command() = default;\n    Command(const Command&) = delete;\n    Command(Command&&) noexcept = default;\n\n    CmdType type = INVALID;\n\n    std::string_view key() const {\n      return backed_args->empty() ? std::string_view{} : backed_args->Front();\n    }\n\n    // For STORE commands, value is at index 1.\n    // For both key and value we provide convenience accessors that return empty string_view\n    // if not present.\n    std::string_view value() const {\n      return backed_args->size() < 2 ? std::string_view{} : backed_args->at(1);\n    }\n\n    size_t size() const {\n      return backed_args->size();\n    }\n\n    char* value_ptr() {  // NOLINT\n      return backed_args->data(1);\n    }\n\n    union {\n      uint64_t cas_unique = 0;  // for CAS COMMAND\n      uint64_t delta;           // for DECR/INCR commands.\n    };\n\n    int64_t expire_ts = 0;  // unix time (expire_ts > month) in seconds\n\n    // flags for STORE commands\n    uint32_t flags = 0;\n\n    MemcacheCmdFlags cmd_flags;\n\n    // Does not own this object, only references it.\n    cmn::BackedArguments* backed_args = nullptr;\n  };\n\n  static_assert(sizeof(Command) == 40);\n\n  enum Result : uint8_t {\n    OK,\n    INPUT_PENDING,\n    UNKNOWN_CMD,\n    BAD_INT,\n    PARSE_ERROR,  // request parse error, but can continue parsing within the same connection.\n    BAD_DELTA,\n  };\n\n  static bool IsStoreCmd(CmdType type) {\n    return type >= SET && type <= CAS;\n  }\n\n  size_t UsedMemory() const {\n    return tmp_buf_.capacity();\n  }\n\n  void Reset() {\n    val_len_to_read_ = 0;\n    tmp_buf_.clear();\n  }\n\n  Result Parse(std::string_view str, uint32_t* consumed, Command* res);\n\n  void set_last_unix_time(int64_t t) {\n    last_unix_time_ = t;\n  }\n\n private:\n  Result ConsumeValue(std::string_view str, uint32_t* consumed, Command* dest);\n  Result ParseInternal(ArgSlice tokens_view, Command* cmd);\n\n  uint32_t val_len_to_read_ = 0;\n  uint32_t max_value_len_ = UINT32_MAX;\n  std::string tmp_buf_;\n  int64_t last_unix_time_ = 0;\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/memcache_parser_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/memcache_parser.h\"\n\n#include <gmock/gmock.h>\n\n#include \"absl/strings/str_cat.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n\nusing namespace testing;\nusing namespace std;\n\nnamespace facade {\n\nclass MCParserTest : public testing::Test {\n protected:\n  MCParserTest() {\n    cmd_.backed_args = &backed_args_;\n  }\n  MemcacheParser::Result Parse(string_view input) {\n    parser_.Reset();\n    return parser_.Parse(input, &consumed_, &cmd_);\n  }\n\n  vector<string_view> ToArgs() const {\n    return {cmd_.backed_args->begin(), cmd_.backed_args->end()};\n  }\n\n  MemcacheParser parser_;\n  cmn::BackedArguments backed_args_;\n  MemcacheParser::Command cmd_;\n  uint32_t consumed_;\n};\n\nTEST_F(MCParserTest, Basic) {\n  MemcacheParser::Result st = Parse(\"set a 1 20 3\\r\\n\");\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n  EXPECT_EQ(\"a\", cmd_.key());\n  EXPECT_EQ(1, cmd_.flags);\n  EXPECT_EQ(20, cmd_.expire_ts);\n  EXPECT_EQ(3, cmd_.value().size());\n  EXPECT_EQ(MemcacheParser::SET, cmd_.type);\n\n  st = Parse(\"quit\\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(MemcacheParser::QUIT, cmd_.type);\n}\n\nTEST_F(MCParserTest, Incr) {\n  MemcacheParser::Result st = Parse(\"incr a\\r\\n\");\n  EXPECT_EQ(MemcacheParser::PARSE_ERROR, st);\n\n  st = Parse(\"incr a 1\\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(MemcacheParser::INCR, cmd_.type);\n  EXPECT_EQ(\"a\", cmd_.key());\n  EXPECT_EQ(1, cmd_.delta);\n  EXPECT_FALSE(cmd_.cmd_flags.no_reply);\n\n  st = Parse(\"incr a -1\\r\\n\");\n  EXPECT_EQ(MemcacheParser::BAD_DELTA, st);\n\n  st = Parse(\"decr b 10 noreply\\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(MemcacheParser::DECR, cmd_.type);\n  EXPECT_EQ(10, cmd_.delta);\n}\n\nTEST_F(MCParserTest, Stats) {\n  MemcacheParser::Result st = Parse(\"stats foo\\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(consumed_, 11);\n  EXPECT_EQ(cmd_.type, MemcacheParser::STATS);\n  EXPECT_EQ(\"foo\", cmd_.key());\n\n  st = Parse(\"stats  \\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(consumed_, 9);\n  EXPECT_EQ(cmd_.type, MemcacheParser::STATS);\n  EXPECT_EQ(0, cmd_.size());\n\n  st = Parse(\"stats  fpp bar\\r\\n\");\n  EXPECT_EQ(MemcacheParser::PARSE_ERROR, st);\n}\n\nTEST_F(MCParserTest, NoreplyBasic) {\n  MemcacheParser::Result st = Parse(\"set mykey 1 2 3 noreply\\r\\n\");\n\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n  EXPECT_EQ(\"mykey\", cmd_.key());\n  EXPECT_EQ(1, cmd_.flags);\n  EXPECT_EQ(2, cmd_.expire_ts);\n  EXPECT_EQ(3, cmd_.value().size());\n  EXPECT_EQ(MemcacheParser::SET, cmd_.type);\n  EXPECT_TRUE(cmd_.cmd_flags.no_reply);\n\n  st = Parse(\"set mykey2 4 5 6\\r\\n\");\n\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n  EXPECT_EQ(\"mykey2\", cmd_.key());\n  EXPECT_EQ(4, cmd_.flags);\n  EXPECT_EQ(5, cmd_.expire_ts);\n  EXPECT_EQ(6, cmd_.value().size());\n  EXPECT_EQ(MemcacheParser::SET, cmd_.type);\n  EXPECT_FALSE(cmd_.cmd_flags.no_reply);\n}\n\nTEST_F(MCParserTest, Meta) {\n  MemcacheParser::Result st = Parse(\"ms key1 \");\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n  EXPECT_EQ(8, consumed_);\n  st = parser_.Parse(\"6 T1 F2\\r\\naaaaaa\\r\\n\", &consumed_, &cmd_);\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(17, consumed_);\n  EXPECT_EQ(MemcacheParser::SET, cmd_.type);\n  EXPECT_EQ(\"key1\", cmd_.key());\n  EXPECT_EQ(2, cmd_.flags);\n  EXPECT_EQ(1, cmd_.expire_ts);\n  st = Parse(\"ms 16nXnNeV150= 5 b ME\\r\\nbbbbb\");\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n  EXPECT_EQ(29, consumed_);\n  EXPECT_EQ(MemcacheParser::ADD, cmd_.type);\n  EXPECT_EQ(\"שלום\", cmd_.key());\n  EXPECT_EQ(5, cmd_.value().size());\n\n  st = Parse(\"mg 16nXnNeV150= b\\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(19, consumed_);\n  EXPECT_EQ(MemcacheParser::GET, cmd_.type);\n  EXPECT_EQ(\"שלום\", cmd_.key());\n\n  st = Parse(\"ma val b\\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(10, consumed_);\n  EXPECT_EQ(MemcacheParser::INCR, cmd_.type);\n\n  st = Parse(\"ma val M- D10\\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(15, consumed_);\n  EXPECT_EQ(MemcacheParser::DECR, cmd_.type);\n  EXPECT_EQ(10, cmd_.delta);\n\n  st = Parse(\"mg key f v t l h\\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(18, consumed_);\n  EXPECT_EQ(MemcacheParser::GET, cmd_.type);\n  EXPECT_EQ(\"key\", cmd_.key());\n  EXPECT_TRUE(cmd_.cmd_flags.return_flags);\n  EXPECT_TRUE(cmd_.cmd_flags.return_value);\n  EXPECT_TRUE(cmd_.cmd_flags.return_ttl);\n  EXPECT_TRUE(cmd_.cmd_flags.return_access_time);\n  EXPECT_TRUE(cmd_.cmd_flags.return_hit);\n}\n\nTEST_F(MCParserTest, Gat) {\n  auto res = Parse(\"gat 1000 foo bar baz\\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, res);\n  EXPECT_EQ(consumed_, 22);\n  EXPECT_EQ(cmd_.type, MemcacheParser::GAT);\n  EXPECT_THAT(ToArgs(), ElementsAre(\"foo\", \"bar\", \"baz\"));\n  EXPECT_EQ(cmd_.expire_ts, 1000);\n\n  res = Parse(\"gat foo bar\\r\\n\");\n  EXPECT_EQ(MemcacheParser::BAD_INT, res);\n\n  res = Parse(\"gats 1000 foo bar baz\\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, res);\n  EXPECT_EQ(consumed_, 23);\n  EXPECT_EQ(cmd_.type, MemcacheParser::GATS);\n  EXPECT_THAT(ToArgs(), ElementsAre(\"foo\", \"bar\", \"baz\"));\n  EXPECT_EQ(cmd_.expire_ts, 1000);\n\n  parser_.set_last_unix_time(2000);\n  res = Parse(\"gats 1000 foo bar baz\\r\\n\");\n  EXPECT_EQ(MemcacheParser::OK, res);\n  EXPECT_EQ(cmd_.expire_ts, 3000);\n\n  res = Parse(\"gats 100\\r\\n\");\n  EXPECT_EQ(MemcacheParser::PARSE_ERROR, res);\n\n  res = Parse(\"gat 100\\r\\n\");\n  EXPECT_EQ(MemcacheParser::PARSE_ERROR, res);\n}\n\nTEST_F(MCParserTest, ValueState) {\n  auto st = Parse(\"ms key1 6\\r\\nabc\");\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n  EXPECT_EQ(consumed_, 14);\n  st = parser_.Parse(\"de\", &consumed_, &cmd_);\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n  EXPECT_EQ(consumed_, 2);\n\n  st = parser_.Parse(\"f\\r\", &consumed_, &cmd_);\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n  EXPECT_EQ(consumed_, 2);\n  EXPECT_EQ(cmd_.value(), \"abcdef\");\n\n  st = parser_.Parse(\"\\n\", &consumed_, &cmd_);\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(consumed_, 1);\n}\n\nTEST_F(MCParserTest, MaxValueLen) {\n  MemcacheParser capped_parser(10);\n  cmn::BackedArguments ba;\n  MemcacheParser::Command cmd;\n  cmd.backed_args = &ba;\n  uint32_t consumed;\n\n  // Value within limit — accepted.\n  auto st = capped_parser.Parse(\"set k 0 0 10\\r\\n\", &consumed, &cmd);\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n\n  // Value exceeds limit — rejected.\n  capped_parser.Reset();\n  st = capped_parser.Parse(\"set k 0 0 11\\r\\n\", &consumed, &cmd);\n  EXPECT_EQ(MemcacheParser::PARSE_ERROR, st);\n\n  // Meta set within limit.\n  capped_parser.Reset();\n  st = capped_parser.Parse(\"ms key 10\\r\\n\", &consumed, &cmd);\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n\n  // Meta set exceeds limit.\n  capped_parser.Reset();\n  st = capped_parser.Parse(\"ms key 11\\r\\n\", &consumed, &cmd);\n  EXPECT_EQ(MemcacheParser::PARSE_ERROR, st);\n}\n\nTEST_F(MCParserTest, ParseError) {\n  EXPECT_EQ(MemcacheParser::PARSE_ERROR, Parse(\"ms key1 3\\r\\nabcd\"));\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, Parse(\"ms key1 3\\r\\nabc\"));\n  EXPECT_EQ(MemcacheParser::PARSE_ERROR, parser_.Parse(\"\\ra\", &consumed_, &cmd_));\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, Parse(\"ms key1 3\\r\\nabc\\r\"));\n  EXPECT_EQ(MemcacheParser::PARSE_ERROR, parser_.Parse(\"\\r\", &consumed_, &cmd_));\n}\n\n// Test for the bug where \\r\\n command line terminator split across TCP packets\n// would cause parse errors.\nTEST_F(MCParserTest, SplitCRLFInCommandLine) {\n  // Simulate TCP fragmentation where command line ends with \\r but \\n comes in next packet\n  auto st = Parse(\"set k10 0 0 3 noreply\\r\");\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n  EXPECT_EQ(consumed_, 22);\n\n  // Now the \\n arrives followed by the value and another command\n  st = parser_.Parse(\"\\nd10\\r\\nget k11\\r\\n\", &consumed_, &cmd_);\n  EXPECT_EQ(MemcacheParser::OK, st);\n  EXPECT_EQ(consumed_, 6);  // \\n + d10\\r\\n\n  EXPECT_EQ(cmd_.type, MemcacheParser::SET);\n  EXPECT_EQ(cmd_.key(), \"k10\");\n  EXPECT_EQ(cmd_.value(), \"d10\");\n  EXPECT_TRUE(cmd_.cmd_flags.no_reply);\n}\n\n// Test edge case: empty command line when \\r\\n split\nTEST_F(MCParserTest, SplitCRLFEmptyCommand) {\n  // Just \\r with nothing before it\n  auto st = Parse(\"\\r\");\n  EXPECT_EQ(MemcacheParser::INPUT_PENDING, st);\n\n  // Now \\n arrives - should be parse error since command line is empty\n  st = parser_.Parse(\"\\nget key\\r\\n\", &consumed_, &cmd_);\n  EXPECT_EQ(MemcacheParser::PARSE_ERROR, st);\n}\n\nclass MCParserNoreplyTest : public MCParserTest {\n protected:\n  void RunTest(string_view str, bool noreply,\n               MemcacheParser::Result expected_res = MemcacheParser::OK) {\n    MemcacheParser::Result st = Parse(str);\n\n    EXPECT_EQ(expected_res, st);\n    EXPECT_EQ(cmd_.cmd_flags.no_reply, noreply);\n  }\n};\n\nTEST_F(MCParserNoreplyTest, StoreCommands) {\n  RunTest(\"set mykey 0 0 3 noreply\\r\\n\", true, MemcacheParser::INPUT_PENDING);\n  RunTest(\"set mykey 0 0 3\\r\\n\", false, MemcacheParser::INPUT_PENDING);\n  RunTest(\"add mykey 0 0 3\\r\\n\", false, MemcacheParser::INPUT_PENDING);\n  RunTest(\"replace mykey 0 0 3\\r\\n\", false, MemcacheParser::INPUT_PENDING);\n  RunTest(\"append mykey 0 0 3\\r\\n\", false, MemcacheParser::INPUT_PENDING);\n  RunTest(\"prepend mykey 0 0 3\\r\\n\", false, MemcacheParser::INPUT_PENDING);\n}\n\nTEST_F(MCParserNoreplyTest, Other) {\n  RunTest(\"quit\\r\\n\", false);\n  RunTest(\"delete mykey\\r\\n\", false);\n  RunTest(\"incr mykey 1\\r\\n\", false);\n  RunTest(\"decr mykey 1\\r\\n\", false);\n  RunTest(\"flush_all\\r\\n\", false);\n}\n\nTEST_F(MCParserNoreplyTest, LargeGetRequest) {\n  std::string large_request = \"get\";\n  for (size_t i = 0; i < 100; ++i) {\n    absl::StrAppend(&large_request, \" mykey\", i, \" \");\n  }\n  absl::StrAppend(&large_request, \"\\r\\n\");\n\n  RunTest(large_request, false);\n\n  EXPECT_EQ(cmd_.type, MemcacheParser::CmdType::GET);\n  auto keys = ToArgs();\n  EXPECT_TRUE(std::all_of(keys.begin(), keys.end(), [i = 0u](const auto& elem) mutable {\n    return elem == absl::StrCat(\"mykey\", i++);\n  }));\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/ok_main.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"base/init.h\"\n#include \"facade/conn_context.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/dragonfly_listener.h\"\n#include \"facade/reply_builder.h\"\n#include \"facade/service_interface.h\"\n#include \"util/accept_server.h\"\n#include \"util/fibers/pool.h\"\n\nABSL_FLAG(uint32_t, port, 6379, \"server port\");\n\nusing namespace util;\nusing namespace std;\nusing absl::GetFlag;\n\nnamespace facade {\n\nnamespace {\n\nstruct CmdContext : public facade::ParsedCommand {\n  void ReuseInternal() final {\n  }\n};\n\nclass OkService : public ServiceInterface {\n public:\n  DispatchResult DispatchCommand(ParsedArgs args, ParsedCommand* cmd, AsyncPreference) final {\n    cmd->rb()->SendOk();\n    return DispatchResult::OK;\n  }\n\n  DispatchManyResult DispatchManyCommands(std::function<ParsedArgs()> arg_gen, unsigned count,\n                                          SinkReplyBuilder* builder,\n                                          ConnectionContext* cntx) final {\n    for (unsigned i = 0; i < count; i++) {\n      ParsedArgs args = arg_gen();\n      ParsedCommand* cmd = AllocateParsedCommand();\n      cmd->Init(builder, cntx);\n\n      DispatchCommand(args, cmd, AsyncPreference::ONLY_SYNC);\n      delete cmd;\n    }\n    DispatchManyResult result{\n        .processed = static_cast<uint32_t>(count),\n        .account_in_stats = true,\n    };\n    return result;\n  }\n\n  DispatchResult DispatchMC(ParsedCommand* cmd, AsyncPreference) final {\n    cmd->rb()->SendError(\"\");\n    return DispatchResult::OK;\n  }\n\n  ConnectionContext* CreateContext(Connection* owner) final {\n    return new ConnectionContext{owner};\n  }\n\n  ParsedCommand* AllocateParsedCommand() final {\n    return new CmdContext{};\n  }\n};\n\nvoid RunEngine(ProactorPool* pool, AcceptServer* acceptor) {\n  OkService service;\n\n  Connection::Init(pool->size());\n  pool->Await([](auto*) { tl_facade_stats = new FacadeStats; });\n\n  acceptor->AddListener(GetFlag(FLAGS_port), new Listener{Protocol::REDIS, &service});\n\n  acceptor->Run();\n  acceptor->Wait();\n}\n\n}  // namespace\n\n}  // namespace facade\n\n#ifdef __linux__\n#define USE_URING 1\n#else\n#define USE_URING 0\n#endif\n\nint main(int argc, char* argv[]) {\n  MainInitGuard guard(&argc, &argv);\n\n  CHECK_GT(GetFlag(FLAGS_port), 0u);\n\n#if USE_URING\n  unique_ptr<util::ProactorPool> pp(fb2::Pool::IOUring(1024));\n#else\n  unique_ptr<util::ProactorPool> pp(fb2::Pool::Epoll());\n#endif\n  pp->Run();\n\n  AcceptServer acceptor(pp.get());\n  facade::RunEngine(pp.get(), &acceptor);\n\n  pp->Stop();\n\n  return 0;\n}\n"
  },
  {
    "path": "src/facade/op_status.cc",
    "content": "#include \"facade/op_status.h\"\n\n#include \"base/logging.h\"\n#include \"facade/error.h\"\n#include \"facade/resp_expr.h\"\n\nnamespace facade {\n\nstd::string_view StatusToMsg(OpStatus status) {\n  switch (status) {\n    case OpStatus::OK:\n      return \"OK\";\n    case OpStatus::KEY_NOTFOUND:\n      return kKeyNotFoundErr;\n    case OpStatus::WRONG_TYPE:\n      return kWrongTypeErr;\n    case OpStatus::WRONG_JSON_TYPE:\n      return kWrongJsonTypeErr;\n    case OpStatus::OUT_OF_RANGE:\n      return kIndexOutOfRange;\n    case OpStatus::INVALID_FLOAT:\n      return kInvalidFloatErr;\n    case OpStatus::INVALID_INT:\n      return kInvalidIntErr;\n    case OpStatus::SYNTAX_ERR:\n      return kSyntaxErr;\n    case OpStatus::OUT_OF_MEMORY:\n      return kOutOfMemory;\n    case OpStatus::CORRUPTED_HLL:\n      return \"-INVALIDOBJ Corrupted HLL object detected.\";\n    case OpStatus::BUSY_GROUP:\n      return \"-BUSYGROUP Consumer Group name already exists\";\n    case OpStatus::INVALID_NUMERIC_RESULT:\n      return kInvalidNumericResult;\n    case OpStatus::AT_LEAST_ONE_KEY:\n      return \"at least 1 input key is needed for this command\";\n    case OpStatus::MEMBER_NOTFOUND:\n      return kKeyNotFoundErr;\n    case OpStatus::INVALID_JSON_PATH:\n      return kInvalidJsonPathErr;\n    case OpStatus::INVALID_JSON:\n      return kJsonParseError;\n    case OpStatus::NAN_OR_INF_DURING_INCR:\n      return kNanOrInfDuringIncr;\n    case OpStatus::IO_ERROR:\n      return kTieredIoError;\n    default:\n      LOG(ERROR) << \"Unsupported status \" << status;\n      return \"Internal error\";\n  }\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/op_status.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <ostream>\n\nnamespace facade {\n\nenum class OpStatus : uint16_t {\n  OK,\n  KEY_EXISTS,\n  KEY_NOTFOUND,\n  KEY_MOVED,\n  SKIPPED,\n  INVALID_VALUE,\n  CORRUPTED_HLL,\n  OUT_OF_RANGE,\n  WRONG_TYPE,\n  WRONG_JSON_TYPE,\n  TIMED_OUT,\n  OUT_OF_MEMORY,\n  INVALID_FLOAT,\n  INVALID_INT,\n  SYNTAX_ERR,\n  BUSY_GROUP,\n  STREAM_ID_SMALL,\n  INVALID_NUMERIC_RESULT,\n  CANCELLED,\n  AT_LEAST_ONE_KEY,\n  MEMBER_NOTFOUND,\n  INVALID_JSON_PATH,\n  INVALID_JSON,\n  IO_ERROR,\n  NAN_OR_INF_DURING_INCR,\n};\n\nclass OpResultBase {\n public:\n  OpResultBase(OpStatus st = OpStatus::OK) : st_(st) {\n  }\n\n  constexpr explicit operator bool() const {\n    return st_ == OpStatus::OK;\n  }\n\n  OpStatus status() const {\n    return st_;\n  }\n\n  bool operator==(OpStatus st) const {\n    return st_ == st;\n  }\n\n  bool ok() const {\n    return st_ == OpStatus::OK;\n  }\n\n  const char* DebugFormat() const;\n\n private:\n  OpStatus st_;\n};\n\ntemplate <typename V> class OpResult : public OpResultBase {\n public:\n  using Type = V;\n\n  OpResult(V&& v) : v_(std::move(v)) {\n  }\n\n  OpResult(const V& v) : v_(v) {\n  }\n\n  using OpResultBase::OpResultBase;\n\n  const V& value() const {\n    return v_;\n  }\n\n  V& value() {\n    return v_;\n  }\n\n  V value_or(V v) const {\n    return status() == OpStatus::OK ? v_ : v;\n  }\n\n  V* operator->() {\n    return &v_;\n  }\n\n  V& operator*() & {\n    return v_;\n  }\n\n  V&& operator*() && {\n    return std::move(v_);\n  }\n\n  const V* operator->() const {\n    return &v_;\n  }\n\n  const V& operator*() const& {\n    return v_;\n  }\n\n private:\n  V v_{};\n};\n\ntemplate <> class OpResult<void> : public OpResultBase {\n public:\n  using OpResultBase::OpResultBase;\n};\n\ninline bool operator==(OpStatus st, const OpResultBase& ob) {\n  return ob.operator==(st);\n}\n\nstd::string_view StatusToMsg(OpStatus status);\n\n}  // namespace facade\n\nnamespace std {\n\ntemplate <typename T> std::ostream& operator<<(std::ostream& os, const facade::OpResult<T>& res) {\n  os << res.status();\n  return os;\n}\n\ninline std::ostream& operator<<(std::ostream& os, const facade::OpStatus op) {\n  os << int(op);\n  return os;\n}\n\n}  // namespace std\n"
  },
  {
    "path": "src/facade/parsed_command.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/parsed_command.h\"\n\n#include \"base/logging.h\"\n#include \"core/overloaded.h\"\n#include \"facade/conn_context.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/reply_builder.h\"\n#include \"facade/reply_capture.h\"\n#include \"facade/reply_payload.h\"\n\nnamespace facade {\n\nusing namespace std;\n\nstring MCRender::RenderNotFound() const {\n  if (flags_.no_reply)\n    return {};\n  return flags_.meta ? \"NF\" : \"NOT_FOUND\";\n}\n\nstring MCRender::RenderGetEnd() const {\n  if (flags_.no_reply || flags_.meta)\n    return {};\n  return \"END\";\n}\n\nstd::string MCRender::RenderStored(bool ok) const {\n  if (flags_.no_reply)\n    return {};\n  if (ok)\n    return flags_.meta ? \"HD\" : \"STORED\";\n  return flags_.meta ? \"NS\" : \"NOT_STORED\";\n}\n\nstring MCRender::RenderMiss() const {\n  if (flags_.no_reply || !flags_.meta)\n    return {};\n  return \"EN\";\n}\n\nstring MCRender::RenderDeleted() const {\n  if (flags_.no_reply)\n    return {};\n  return flags_.meta ? \"HD\" : \"DELETED\";\n}\n\nvoid ParsedCommand::ResetForReuse() {\n  is_deferred_reply_ = false;\n  reply_ = std::monostate{};\n\n  offsets_.clear();\n  if (HeapMemory() > 1024) {\n    storage_.clear();  // also deallocates the heap.\n    offsets_.shrink_to_fit();\n  }\n  ReuseInternal();\n}\n\nvoid ParsedCommand::SendError(std::string_view str, std::string_view type) {\n  if (!is_deferred_reply_) {\n    rb_->SendError(str, type);\n  } else {\n    reply_ = payload::make_error(str, type);\n  }\n}\n\nvoid ParsedCommand::SendError(facade::OpStatus status) {\n  if (!is_deferred_reply_) {\n    rb_->SendError(status);\n  } else {\n    if (status == OpStatus::OK)\n      reply_ = payload::SimpleString{\"OK\"};\n    else\n      reply_ = payload::make_error(StatusToMsg(status));\n  }\n}\n\nvoid ParsedCommand::SendError(const facade::ErrorReply& error) {\n  if (error.status)\n    return SendError(*error.status);\n  SendError(error.ToSv(), error.kind);\n}\n\nvoid ParsedCommand::SendSimpleString(std::string_view str) {\n  if (!is_deferred_reply_) {\n    rb_->SendSimpleString(str);\n  } else {\n    reply_ = payload::make_simple_or_noreply(str);\n  }\n}\n\nvoid ParsedCommand::SendLong(long val) {\n  DCHECK(!is_deferred_reply_);\n  rb_->SendLong(val);\n}\n\nbool ParsedCommand::CanReply() const {\n  DCHECK(is_deferred_reply_);\n  dfly::Overloaded ov{[](const payload::Payload& pl) { return pl.index() > 0 /* not monostate */; },\n                      [](const SuspendedCommand& task) { return task.blocker->IsCompleted(); }};\n  return std::visit(ov, reply_);\n}\n\nvoid ParsedCommand::SendReply() {\n  auto payload_handler = [this](payload::Payload& pl) {\n    CapturingReplyBuilder::Apply(std::move(pl), rb_);\n  };\n  auto task_handler = [](SuspendedCommand& task) {\n    DCHECK(task.coro);\n    task.coro.resume();\n    task.coro = {};\n  };\n  std::visit(dfly::Overloaded{task_handler, payload_handler}, reply_);\n}\n\nParsedCommand::SuspendedCommand::~SuspendedCommand() {\n  if (coro) {\n    coro.destroy();\n    coro = {};\n  }\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/parsed_command.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <coroutine>\n#include <variant>\n\n#include \"base/function2.hpp\"\n#include \"common/backed_args.h\"\n#include \"facade/memcache_parser.h\"\n#include \"facade/reply_payload.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace facade {\n\nclass ConnectionContext;\nclass SinkReplyBuilder;\n\n// Renders simple string responses based on flags.\n// Returns empty string if no response is to be sent.\nclass MCRender {\n public:\n  explicit MCRender(MemcacheCmdFlags flags) : flags_(flags) {\n  }\n\n  std::string RenderNotFound() const;\n  std::string RenderMiss() const;\n  std::string RenderDeleted() const;\n  std::string RenderGetEnd() const;\n  std::string RenderStored(bool ok) const;\n\n private:\n  MemcacheCmdFlags flags_;\n};\n\n// ParsedCommand is a protocol-agnostic holder for parsed request state.\n// It wraps cmn::BackedArguments so the facade can populate RESP arguments and\n// optionally attach a MemcacheParser::Command, complementing the arguments\n// with memcache-specific data.\n// The purpose of ParsedCommand is to hold the entire state of a parsed request\n// during its lifetime, from parsing to dispatching and reply building including\n// any async dispatching.\nclass ParsedCommand : public cmn::BackedArguments {\n  friend class ServiceInterface;\n\n protected:\n  SinkReplyBuilder* rb_ = nullptr;  // either RedisReplyBuilder or MCReplyBuilder\n  ConnectionContext* conn_cntx_ = nullptr;\n\n  std::unique_ptr<MemcacheParser::Command> mc_cmd_;  // only for memcache protocol\n\n  ParsedCommand() = default;\n\n  // Helper function to get the only argument type\n  template <typename C, typename Arg> static Arg OnlyArgType(void (C::*)(Arg) const);\n\n public:\n  using ReplyFunc = fu2::function_base<true, false, fu2::capacity_fixed<16, 8>, false, false,\n                                       void(SinkReplyBuilder*)>;\n\n  virtual ~ParsedCommand() = default;\n\n  virtual size_t GetSize() const {\n    return sizeof(ParsedCommand);\n  }\n\n  // time when the message was parsed as reported by CycleClock::Now()\n  // Also serves as the enqueue timestamp for calculating pipeline wait latency.\n  uint64_t parsed_cycle = 0;\n  ParsedCommand* next = nullptr;\n\n  void Init(SinkReplyBuilder* rb, ConnectionContext* conn_cntx) {\n    rb_ = rb;\n    conn_cntx_ = conn_cntx;\n  }\n\n  // If true, creates mc specific fields, false - destroys them.\n  void ConfigureMCExtension(bool is_mc) {\n    if (is_mc && !mc_cmd_) {\n      mc_cmd_ = std::make_unique<MemcacheParser::Command>();\n      mc_cmd_->backed_args = this;\n    } else if (!is_mc) {\n      mc_cmd_.reset();\n    }\n  }\n\n  SinkReplyBuilder* rb() const {\n    return rb_;\n  }\n\n  ConnectionContext* conn_cntx() const {\n    return conn_cntx_;\n  }\n  MemcacheParser::Command* mc_command() const {\n    return mc_cmd_.get();\n  }\n\n  size_t UsedMemory() const {\n    size_t sz = HeapMemory() + GetSize();\n    if (mc_cmd_) {\n      sz += sizeof(*mc_cmd_);\n    }\n    return sz;\n  }\n\n  // Marks this command as having reply stored in its payload instead of being sent directly.\n  void SetDeferredReply() {\n    is_deferred_reply_ = true;\n  }\n\n  bool IsDeferredReply() const {\n    return is_deferred_reply_;\n  }\n\n  void ResetForReuse();\n\n  void SendError(std::string_view str, std::string_view type = std::string_view{});\n  void SendError(facade::OpStatus status);\n  void SendError(const facade::ErrorReply& error);\n\n  void SendSimpleString(std::string_view str);\n  void SendOk() {\n    SendSimpleString(\"OK\");\n  }\n\n  void SendLong(long val);\n  template <typename F> void ReplyWith(F&& func) {\n    assert(!is_deferred_reply_);\n    using RbType = decltype(OnlyArgType(&std::decay_t<F>::operator()));\n    func(static_cast<RbType>(rb_));\n  }\n\n  // Below are main commands for the async api and all assume that the command defers replies\n\n  // Whether SendReply() can be called. If not, it must be waited via Blocker()\n  bool CanReply() const;\n\n  // Reaching zero on blocker means CanReply() turns true\n  util::fb2::EmbeddedBlockingCounter* Blocker() const {\n    return std::get<SuspendedCommand>(reply_).blocker;\n  }\n\n  // Assumes CanReply() is true. Sends reply\n  void SendReply();\n\n  // Resolve deferred command with reply\n  void Resolve(const facade::ErrorReply& error) {\n    SendError(error);\n  }\n\n  // Resolve deferred command with async task\n  void Resolve(util::fb2::EmbeddedBlockingCounter* blocker, std::coroutine_handle<> coro) {\n    reply_ = SuspendedCommand{blocker, coro};\n  }\n\n protected:\n  virtual void ReuseInternal() = 0;\n\n private:\n  // Suspended asynchronous command. Once blocker is done, the coroutine can be resumed.\n  // Deletes the coroutine on drop.\n  struct SuspendedCommand {\n    SuspendedCommand(util::fb2::EmbeddedBlockingCounter* blocker, std::coroutine_handle<> coro)\n        : blocker{blocker}, coro{coro} {\n    }\n\n    SuspendedCommand(SuspendedCommand&& other) noexcept\n        : blocker{other.blocker}, coro{std::exchange(other.coro, {})} {\n    }\n\n    SuspendedCommand& operator=(SuspendedCommand&& other) noexcept {\n      blocker = other.blocker;\n      coro = std::exchange(other.coro, {});\n      return *this;\n    }\n\n    // To destroy the coroutine when cancelling (as the handle is non owning)\n    ~SuspendedCommand();\n\n    util::fb2::EmbeddedBlockingCounter* blocker;\n    std::coroutine_handle<> coro;\n  };\n\n  // if false then the reply was sent directly to reply builder,\n  // otherwise, moved asynchronously into reply_payload_\n  bool is_deferred_reply_ = false;\n\n  std::variant<payload::Payload, SuspendedCommand> reply_;\n};\n\n#ifdef __linux__\nstatic_assert(sizeof(ParsedCommand) == 232);\n#endif\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/redis_parser.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"facade/redis_parser.h\"\n\n#include <absl/strings/escaping.h>\n#include <absl/strings/numbers.h>\n\n#include \"base/logging.h\"\n#include \"common/heap_size.h\"\n\nnamespace facade {\n\nusing namespace std;\n\nauto RedisParser::Parse(Buffer str, uint32_t* consumed, RespExpr::Vec* res) -> Result {\n  DCHECK(!str.empty());\n  *consumed = 0;\n  res->clear();\n\n  DVLOG(2) << \"Parsing: \"\n           << absl::CHexEscape(string_view{reinterpret_cast<const char*>(str.data()), str.size()});\n\n  if (state_ == CMD_COMPLETE_S) {\n    if (InitStart(str[0], res)) {\n      // We recognized a non-INLINE state, starting with a special char.\n      str.remove_prefix(1);\n      *consumed += 1;\n      if (server_mode_ && state_ == PARSE_ARG_S) {  // server requests start with ARRAY_LEN_S.\n        state_ = CMD_COMPLETE_S;                    // reject and reset the state.\n        return BAD_ARRAYLEN;\n      }\n      if (str.empty())\n        return INPUT_PENDING;\n    }\n  } else {  // INLINE mode, aka PING\\n\n    // We continue parsing in the middle.\n    if (!cached_expr_)\n      cached_expr_ = res;\n  }\n  DCHECK(state_ != CMD_COMPLETE_S);\n\n  ResultConsumed resultc{OK, 0};\n\n  do {\n    switch (state_) {\n      case MAP_LEN_S:\n      case ARRAY_LEN_S:\n        resultc = ConsumeArrayLen(str);\n        break;\n      case PARSE_ARG_TYPE:\n        arg_c_ = str[0];\n        if (server_mode_ && arg_c_ != '$')  // server side only supports bulk strings.\n          return BAD_BULKLEN;\n        resultc.second = 1;\n        state_ = PARSE_ARG_S;\n        break;\n      case PARSE_ARG_S:\n        resultc = ParseArg(str);\n        break;\n      case INLINE_S:\n        DCHECK(parse_stack_.empty());\n        resultc = ParseInline(str);\n        break;\n      case BULK_STR_S:\n        resultc = ConsumeBulk(str);\n        break;\n      case SLASH_N_S:\n        if (str[0] != '\\n') {\n          resultc.first = BAD_STRING;\n        } else {\n          resultc = {OK, 1};\n          if (arg_c_ == '_') {\n            cached_expr_->emplace_back(RespExpr::NIL);\n            cached_expr_->back().u = Buffer{};\n          }\n          HandleFinishArg();\n        }\n        break;\n      default:\n        LOG(FATAL) << \"Unexpected state \" << int(state_);\n    }\n\n    *consumed += resultc.second;\n    str.remove_prefix(exchange(resultc.second, 0));\n  } while (state_ != CMD_COMPLETE_S && resultc.first == OK && !str.empty());\n\n  if (state_ != CMD_COMPLETE_S) {\n    if (resultc.first == OK) {\n      resultc.first = INPUT_PENDING;\n    }\n\n    if (resultc.first == INPUT_PENDING) {\n      // TODO: we still need to handle ':' and ',' cases for client mode\n      // to consume them completely.\n      if (server_mode_ && !str.empty()) {\n        LOG(DFATAL) << \"Did not consume all input: \"\n                    << absl::CHexEscape({reinterpret_cast<const char*>(str.data()), str.size()})\n                    << \", state: \" << int(state_) << \" smallbuf: \"\n                    << absl::CHexEscape(\n                           {reinterpret_cast<const char*>(small_buf_.data()), small_len_});\n      }\n      StashState(res);\n    }\n    return resultc.first;\n  }\n\n  if (resultc.first == OK) {\n    DCHECK(cached_expr_);\n    DCHECK_EQ(0, small_len_);\n\n    if (res != cached_expr_) {\n      DCHECK(!stash_.empty());\n\n      *res = *cached_expr_;\n    }\n  }\n\n  return resultc.first;\n}\n\nbool RedisParser::InitStart(char prefix_b, RespExpr::Vec* res) {\n  buf_stash_.clear();\n  stash_.clear();\n  cached_expr_ = res;\n  parse_stack_.clear();\n  last_stashed_level_ = 0;\n  last_stashed_index_ = 0;\n\n  switch (prefix_b) {\n    case '$':\n    case ':':\n    case '+':\n    case '-':\n    case '_':  // Resp3 NULL\n    case ',':  // Resp3 DOUBLE\n      state_ = PARSE_ARG_S;\n      parse_stack_.emplace_back(1, cached_expr_);  // expression of length 1.\n      arg_c_ = prefix_b;\n      return true;\n    case '*':\n    case '~':  // Resp3 SET\n      state_ = ARRAY_LEN_S;\n      return true;\n    case '%':  // Resp3 MAP\n      state_ = MAP_LEN_S;\n      return true;\n  }\n\n  state_ = INLINE_S;\n  return false;\n}\n\nvoid RedisParser::StashState(RespExpr::Vec* res) {\n  if (cached_expr_->empty() && stash_.empty()) {\n    cached_expr_ = nullptr;\n    return;\n  }\n\n  if (cached_expr_ == res) {\n    stash_.emplace_back(new RespExpr::Vec(*res));\n    cached_expr_ = stash_.back().get();\n  }\n\n  DCHECK_LT(last_stashed_level_, stash_.size());\n  while (true) {\n    auto& cur = *stash_[last_stashed_level_];\n\n    for (; last_stashed_index_ < cur.size(); ++last_stashed_index_) {\n      auto& e = cur[last_stashed_index_];\n      if (RespExpr::STRING == e.type) {\n        Buffer& ebuf = get<Buffer>(e.u);\n        if (ebuf.empty() && last_stashed_index_ + 1 == cur.size())\n          break;\n        if (!ebuf.empty() && !e.has_support) {\n          Blob blob(ebuf.size());\n          memcpy(blob.data(), ebuf.data(), ebuf.size());\n          ebuf = Buffer{blob.data(), blob.size()};\n          buf_stash_.push_back(std::move(blob));\n          e.has_support = true;\n        }\n      }\n    }\n\n    if (last_stashed_level_ + 1 == stash_.size())\n      break;\n    ++last_stashed_level_;\n    last_stashed_index_ = 0;\n  }\n}\n\nauto RedisParser::ParseInline(Buffer str) -> ResultConsumed {\n  DCHECK(!str.empty());\n\n  const uint8_t* ptr = str.begin();\n  const uint8_t* end = str.end();\n  const uint8_t* token_start = ptr;\n\n  auto find_token_end = [](const uint8_t* ptr, const uint8_t* end) {\n    while (ptr != end && *ptr > 32)\n      ++ptr;\n    return ptr;\n  };\n\n  if (is_broken_token_) {\n    ptr = find_token_end(ptr, end);\n    size_t len = ptr - token_start;\n\n    ExtendLastString(Buffer(token_start, len));\n    if (ptr == end) {\n      return {INPUT_PENDING, ptr - token_start};\n    }\n    is_broken_token_ = false;\n  }\n\n  while (ptr != end) {\n    // For inline input we only require \\n.\n    if (*ptr == '\\n') {\n      if (cached_expr_->empty()) {\n        ++ptr;\n        continue;  // skip empty line\n      }\n      break;\n    }\n\n    if (*ptr <= 32) {  // skip ws/control chars\n      ++ptr;\n      continue;\n    }\n\n    // token start\n    DCHECK(!is_broken_token_);\n\n    token_start = ptr;\n    ptr = find_token_end(ptr, end);\n\n    cached_expr_->emplace_back(RespExpr::STRING);\n    cached_expr_->back().u = Buffer{token_start, size_t(ptr - token_start)};\n  }\n\n  uint32_t last_consumed = ptr - str.data();\n  if (ptr == end) {  // we have not finished parsing.\n    if (cached_expr_->empty()) {\n      state_ = CMD_COMPLETE_S;  // have not found anything besides whitespace.\n    } else {\n      is_broken_token_ = ptr[-1] > 32;  // we stopped in the middle of the token.\n    }\n    return {INPUT_PENDING, last_consumed};\n  }\n\n  DCHECK_EQ('\\n', *ptr);\n\n  ++last_consumed;  // consume \\n as well.\n  state_ = CMD_COMPLETE_S;\n\n  return {OK, last_consumed};\n}\n\n// Parse lines like:'$5\\r\\n' or '*2\\r\\n'. The first character is already consumed by the caller.\nauto RedisParser::ParseLen(Buffer str, int64_t* res) -> ResultConsumed {\n  DCHECK(!str.empty());\n\n  const char* s = reinterpret_cast<const char*>(str.data());\n  const char* pos = reinterpret_cast<const char*>(memchr(s, '\\n', str.size()));\n  if (!pos) {\n    if (str.size() + small_len_ < small_buf_.size()) {\n      memcpy(&small_buf_[small_len_], str.data(), str.size());\n      small_len_ += str.size();\n      return {INPUT_PENDING, str.size()};\n    }\n    LOG(WARNING) << \"Unexpected format \" << string_view{s, str.size()};\n    return ResultConsumed{BAD_ARRAYLEN, 0};\n  }\n\n  unsigned consumed = pos - s + 1;\n  if (small_len_ > 0) {\n    if (small_len_ + consumed >= small_buf_.size()) {\n      return ResultConsumed{BAD_ARRAYLEN, consumed};\n    }\n    memcpy(&small_buf_[small_len_], str.data(), consumed);\n    small_len_ += consumed;\n    s = small_buf_.data();\n    pos = s + small_len_ - 1;\n    small_len_ = 0;\n  }\n\n  if (pos[-1] != '\\r') {\n    return {BAD_ARRAYLEN, consumed};\n  }\n\n  // Skip 2 last characters (\\r\\n).\n  string_view len_token{s, size_t(pos - 1 - s)};\n  bool success = absl::SimpleAtoi(len_token, res);\n\n  if (success && *res >= -1) {\n    return ResultConsumed{OK, consumed};\n  }\n\n  LOG(ERROR) << \"Failed to parse len \" << absl::CHexEscape(len_token) << \" \"\n             << absl::CHexEscape(string_view{reinterpret_cast<const char*>(str.data()), str.size()})\n             << \" \" << consumed << \" \" << int(s == small_buf_.data());\n  return ResultConsumed{BAD_ARRAYLEN, consumed};\n}\n\nauto RedisParser::ConsumeArrayLen(Buffer str) -> ResultConsumed {\n  int64_t len;\n\n  ResultConsumed res = ParseLen(str, &len);\n  if (res.first != OK) {\n    return res;\n  }\n\n  if (state_ == MAP_LEN_S) {\n    // Map starts with %N followed by an array of 2*N elements.\n    // Even elements are keys, odd elements are values.\n    len *= 2;\n  }\n\n  if (len > max_arr_len_) {\n    LOG(WARNING) << \"Multibulk len is too large \" << len;\n\n    return {BAD_ARRAYLEN, res.second};\n  }\n\n  if (server_mode_ && (!parse_stack_.empty() || !cached_expr_->empty()))\n    return {BAD_STRING, res.second};\n\n  if (len <= 0) {\n    if (len < 0) {\n      cached_expr_->emplace_back(RespExpr::NIL_ARRAY);\n      cached_expr_->back().u.emplace<RespVec*>(nullptr);  // nil\n    } else {\n      static RespVec empty_vec;\n      cached_expr_->emplace_back(RespExpr::ARRAY);\n      cached_expr_->back().u = &empty_vec;\n    }\n    if (parse_stack_.empty()) {\n      state_ = CMD_COMPLETE_S;\n    } else {\n      HandleFinishArg();\n    }\n\n    return {OK, res.second};\n  }\n\n  if (state_ == PARSE_ARG_S) {\n    DCHECK(!server_mode_);\n\n    cached_expr_->emplace_back(RespExpr::ARRAY);\n    stash_.emplace_back(new RespExpr::Vec());\n    RespExpr::Vec* arr = stash_.back().get();\n    arr->reserve(len);\n    cached_expr_->back().u = arr;\n    cached_expr_ = arr;\n  }\n  state_ = PARSE_ARG_TYPE;\n\n  DVLOG(1) << \"PushStack: (\" << len << \", \" << cached_expr_ << \")\";\n  parse_stack_.emplace_back(len, cached_expr_);\n\n  return {OK, res.second};\n}\n\nauto RedisParser::ParseArg(Buffer str) -> ResultConsumed {\n  DCHECK(!str.empty());\n\n  if (arg_c_ == '$') {\n    int64_t len;\n\n    ResultConsumed res = ParseLen(str, &len);\n    if (res.first != OK) {\n      return res;\n    }\n\n    if (len > 0 && static_cast<uint64_t>(len) > max_bulk_len_) {\n      LOG_EVERY_T(WARNING, 1) << \"Threshold reached with bulk len: \" << len\n                              << \", consider increasing max_bulk_len\";\n      return {BAD_ARRAYLEN, res.second};\n    }\n\n    if (len == -1) {  // Resp2 NIL\n      cached_expr_->emplace_back(RespExpr::NIL);\n      cached_expr_->back().u = Buffer{};\n      HandleFinishArg();\n    } else {\n      DVLOG(1) << \"String(\" << len << \")\";\n\n      cached_expr_->emplace_back(RespExpr::STRING);\n      cached_expr_->back().u = Buffer{};\n      bulk_len_ = len;\n      state_ = BULK_STR_S;\n    }\n\n    return {OK, res.second};\n  }\n\n  DCHECK(!server_mode_);\n\n  if (arg_c_ == '_') {  // Resp3 NIL\n    // \"_\\r\\n\", with '_' consumed into arg_c_.\n    DCHECK_LT(small_len_, 2u);  // must be because we never fill here with more than 2 bytes.\n    DCHECK_GE(str.size(), 1u);\n\n    if (str[0] != '\\r' || (str.size() > 1 && str[1] != '\\n')) {\n      return {BAD_STRING, 0};\n    }\n\n    if (str.size() == 1) {\n      state_ = SLASH_N_S;\n      return {INPUT_PENDING, 1};\n    }\n\n    cached_expr_->emplace_back(RespExpr::NIL);\n    cached_expr_->back().u = Buffer{};\n    HandleFinishArg();\n    return {OK, 2};\n  }\n\n  if (arg_c_ == '*') {\n    return ConsumeArrayLen(str);\n  }\n\n  const char* s = reinterpret_cast<const char*>(str.data());\n  const char* eol = reinterpret_cast<const char*>(memchr(s, '\\n', str.size()));\n\n  if (arg_c_ == '+' || arg_c_ == '-') {  // Simple string or error.\n    DCHECK(!server_mode_);\n    if (!eol) {\n      // if eol is not found we should still read input as bulk string\n      cached_expr_->emplace_back(RespExpr::STRING);\n      cached_expr_->back().u = Buffer{};\n      bulk_len_ = str.length();\n      // eol is not found but if '\\r' is present decrease bulk_len\n      if (s[bulk_len_ - 1] == '\\r')\n        bulk_len_--;\n      state_ = BULK_STR_S;\n      Result r = str.size() < 256 ? OK : BAD_STRING;\n      return {r, 0};\n    }\n\n    if (eol[-1] != '\\r')\n      return {BAD_STRING, 0};\n\n    cached_expr_->emplace_back(arg_c_ == '+' ? RespExpr::STRING : RespExpr::ERROR);\n    cached_expr_->back().u = Buffer{reinterpret_cast<const uint8_t*>(s), size_t((eol - 1) - s)};\n  } else if (arg_c_ == ':') {\n    DCHECK(!server_mode_);\n    if (!eol) {\n      Result r = str.size() < 32 ? INPUT_PENDING : BAD_INT;\n      return {r, 0};\n    }\n    int64_t ival;\n    std::string_view tok{s, size_t((eol - s) - 1)};\n\n    if (eol[-1] != '\\r' || !absl::SimpleAtoi(tok, &ival))\n      return {BAD_INT, 0};\n\n    cached_expr_->emplace_back(RespExpr::INT64);\n    cached_expr_->back().u = ival;\n  } else if (arg_c_ == ',') {\n    DCHECK(!server_mode_);\n    if (!eol) {\n      Result r = str.size() < 32 ? INPUT_PENDING : BAD_DOUBLE;\n      return {r, 0};\n    }\n    double_t dval;\n    std::string_view tok{s, size_t((eol - s) - 1)};\n\n    if (eol[-1] != '\\r' || !absl::SimpleAtod(tok, &dval))\n      return {BAD_DOUBLE, 0};\n\n    cached_expr_->emplace_back(RespExpr::DOUBLE);\n    cached_expr_->back().u = dval;\n  } else {\n    return {BAD_STRING, 0};\n  }\n\n  HandleFinishArg();\n\n  return {OK, (eol - s) + 1};\n}\n\nauto RedisParser::ConsumeBulk(Buffer str) -> ResultConsumed {\n  DCHECK_EQ(small_len_, 0);\n  uint32_t consumed = 0;\n  auto& bulk_str = get<Buffer>(cached_expr_->back().u);\n\n  bool extend = false;\n  // Handle split simple message or error in client mode\n  if (!server_mode_ && (arg_c_ == '+' || arg_c_ == '-') && !bulk_len_) {\n    // Search first '\\r' in next partial message which ends bulk string\n    const char* s = reinterpret_cast<const char*>(str.data());\n    const char* pos = reinterpret_cast<const char*>(memchr(s, '\\r', str.size()));\n    bulk_len_ = pos ? pos - s : str.size();\n    extend = true;\n  }\n\n  if (str.size() >= bulk_len_) {\n    consumed = bulk_len_;\n    if (bulk_len_) {\n      // is_broken_token_ can be false, if we just parsed the bulk length but have\n      // not parsed the token itself.\n      if (is_broken_token_) {\n        memcpy(const_cast<uint8_t*>(bulk_str.end()), str.data(), bulk_len_);\n        bulk_str = Buffer{bulk_str.data(), bulk_str.size() + bulk_len_};\n      } else if (extend) {\n        ExtendBulkString(Buffer(str.begin(), bulk_len_));\n      } else {\n        bulk_str = str.subspan(0, bulk_len_);\n      }\n      str.remove_prefix(exchange(bulk_len_, 0));\n      is_broken_token_ = false;\n    }\n\n    if (str.size() >= 2) {\n      if (str[0] != '\\r' || str[1] != '\\n') {\n        return {BAD_STRING, consumed};\n      }\n      HandleFinishArg();\n      return {OK, consumed + 2};\n    } else if (str.size() == 1) {\n      if (str[0] != '\\r') {\n        return {BAD_STRING, consumed};\n      }\n      state_ = SLASH_N_S;\n      consumed++;\n    }\n    return {INPUT_PENDING, consumed};\n  }\n\n  DCHECK(bulk_len_);\n  size_t len = std::min<size_t>(str.size(), bulk_len_);\n\n  if (is_broken_token_) {\n    memcpy(const_cast<uint8_t*>(bulk_str.end()), str.data(), len);\n    bulk_str = Buffer{bulk_str.data(), bulk_str.size() + len};\n    DVLOG(1) << \"Extending bulk stash to size \" << bulk_str.size();\n  } else {\n    DVLOG(1) << \"New bulk stash size \" << bulk_len_;\n    vector<uint8_t> nb(bulk_len_);\n    memcpy(nb.data(), str.data(), len);\n    bulk_str = Buffer{nb.data(), len};\n    buf_stash_.emplace_back(std::move(nb));\n    is_broken_token_ = true;\n    cached_expr_->back().has_support = true;\n  }\n  consumed = len;\n  bulk_len_ -= len;\n\n  return {INPUT_PENDING, consumed};\n}\n\nvoid RedisParser::HandleFinishArg() {\n  DCHECK(!parse_stack_.empty());\n  DCHECK_GT(parse_stack_.back().first, 0u);\n\n  state_ = PARSE_ARG_TYPE;\n  while (true) {\n    --parse_stack_.back().first;\n    if (parse_stack_.back().first != 0)\n      break;\n    auto* arr = parse_stack_.back().second;\n    DVLOG(1) << \"PopStack (\" << arr << \")\";\n    parse_stack_.pop_back();  // pop 0.\n    if (parse_stack_.empty()) {\n      state_ = CMD_COMPLETE_S;\n      break;\n    }\n    cached_expr_ = parse_stack_.back().second;\n  }\n  small_len_ = 0;\n}\n\nvoid RedisParser::ExtendLastString(Buffer str) {\n  DCHECK(!cached_expr_->empty() && cached_expr_->back().type == RespExpr::STRING);\n  DCHECK(!buf_stash_.empty());\n\n  Buffer& last_str = get<Buffer>(cached_expr_->back().u);\n\n  DCHECK(last_str.data() == buf_stash_.back().data());\n\n  vector<uint8_t> nb(last_str.size() + str.size());\n  memcpy(nb.data(), last_str.data(), last_str.size());\n  memcpy(nb.data() + last_str.size(), str.data(), str.size());\n  last_str = RespExpr::Buffer{nb.data(), last_str.size() + str.size()};\n  buf_stash_.back() = std::move(nb);\n}\n\nvoid RedisParser::ExtendBulkString(Buffer str) {\n  DCHECK(!cached_expr_->empty() && cached_expr_->back().type == RespExpr::STRING);\n\n  Buffer& bulk_str = get<Buffer>(cached_expr_->back().u);\n\n  DCHECK(bulk_str.data() == buf_stash_.back().data());\n\n  vector<uint8_t> nb(bulk_str.size() + str.size());\n  memcpy(nb.data(), bulk_str.data(), bulk_str.size());\n  memcpy(nb.data() + bulk_str.size(), str.data(), str.size());\n  bulk_str = RespExpr::Buffer{nb.data(), bulk_str.size() + str.size()};\n  buf_stash_.back() = std::move(nb);\n}\n\nsize_t RedisParser::UsedMemory() const {\n  return cmn::HeapSize(parse_stack_) + cmn::HeapSize(stash_) + cmn::HeapSize(buf_stash_);\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/redis_parser.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <memory>\n#include <utility>\n#include <vector>\n\n#include \"facade/resp_expr.h\"\n\nnamespace facade {\n\n/**\n * @brief Zero-copy (best-effort) parser.\n * Note: The client-mode parsing is buggy and should not be used in production.\n *       Currently we only use server-mode parsing in production and client-mode in tests.\n *       It works because tests do not do any incremental parsing.\n *\n */\nclass RedisParser {\n public:\n  enum Result : uint8_t {\n    OK,\n    INPUT_PENDING,\n    BAD_ARRAYLEN,\n    BAD_BULKLEN,\n    BAD_STRING,\n    BAD_INT,\n    BAD_DOUBLE\n  };\n  using Buffer = RespExpr::Buffer;\n  enum Mode : uint8_t { SERVER, CLIENT };\n\n  explicit RedisParser(Mode mode = Mode::SERVER, uint32_t max_arr_len = UINT32_MAX,\n                       uint64_t max_bulk_len = UINT64_MAX)\n      : server_mode_(mode == Mode::SERVER), max_arr_len_(max_arr_len), max_bulk_len_(max_bulk_len) {\n  }\n\n  /**\n   * @brief Parses str into res. \"consumed\" stores number of bytes consumed from str.\n   *\n   * A caller should not invalidate str if the parser returns RESP_OK as long as he continues\n   * accessing res. However, if parser returns INPUT_PENDING a caller may discard consumed\n   * part of str because parser caches the intermediate state internally according to 'consumed'\n   * result.\n   *\n   *\n   */\n\n  Result Parse(Buffer str, uint32_t* consumed, RespVec* res);\n\n  void SetClientMode() {\n    server_mode_ = false;\n  }\n\n  size_t parselen_hint() const {\n    return bulk_len_;\n  }\n\n  size_t stash_size() const {\n    return stash_.size();\n  }\n  const std::vector<std::unique_ptr<RespVec>>& stash() const {\n    return stash_;\n  }\n\n  size_t UsedMemory() const;\n\n private:\n  using ResultConsumed = std::pair<Result, uint32_t>;\n\n  // Returns true if this is a RESP message, false if INLINE.\n  bool InitStart(char prefix_b, RespVec* res);\n  void StashState(RespVec* res);\n\n  // Skips the first character (*).\n  ResultConsumed ConsumeArrayLen(Buffer str);\n  ResultConsumed ParseArg(Buffer str);\n  ResultConsumed ConsumeBulk(Buffer str);\n  ResultConsumed ParseInline(Buffer str);\n  ResultConsumed ParseLen(Buffer str, int64_t* res);\n\n  void HandleFinishArg();\n  void ExtendLastString(Buffer str);\n  void ExtendBulkString(Buffer str);\n\n  enum State : uint8_t {\n    INLINE_S,\n    ARRAY_LEN_S,\n    MAP_LEN_S,\n    PARSE_ARG_TYPE,  // Parse [$:+-]\n    PARSE_ARG_S,     // Parse string\\r\\n\n    BULK_STR_S,\n    SLASH_N_S,\n    CMD_COMPLETE_S,\n  };\n\n  State state_ = CMD_COMPLETE_S;\n  bool is_broken_token_ = false;  // true, if a token (inline or bulk) is broken during the parsing.\n  bool server_mode_ = true;\n  uint8_t small_len_ = 0;\n  char arg_c_ = 0;\n\n  uint32_t bulk_len_ = 0;\n  uint32_t last_stashed_level_ = 0, last_stashed_index_ = 0;\n  uint32_t max_arr_len_;\n  uint64_t max_bulk_len_;\n\n  // Points either to the result passed by the caller or to the stash.\n  RespVec* cached_expr_ = nullptr;\n\n  // expected expression length, pointer to expression vector.\n  // For server mode, the length is at most 1.\n  absl::InlinedVector<std::pair<uint32_t, RespVec*>, 4> parse_stack_;\n  std::vector<std::unique_ptr<RespVec>> stash_;\n\n  using Blob = std::vector<uint8_t>;\n  std::vector<Blob> buf_stash_;\n  std::array<char, 32> small_buf_;\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/redis_parser_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/redis_parser.h\"\n\n#include <absl/strings/str_cat.h>\n#include <gmock/gmock.h>\n\n#include \"absl/strings/str_cat.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"common/heap_size.h\"\n#include \"facade/facade_test.h\"\n\nusing namespace testing;\nusing namespace std;\nnamespace facade {\n\nMATCHER_P(ArrArg, expected, absl::StrCat(negation ? \"is not\" : \"is\", \" equal to:\\n\", expected)) {\n  if (arg.type != RespExpr::ARRAY) {\n    *result_listener << \"\\nWrong type: \" << arg.type;\n    return false;\n  }\n  size_t exp_sz = expected;\n  size_t actual = get<RespVec*>(arg.u)->size();\n\n  if (exp_sz != actual) {\n    *result_listener << \"\\nActual size: \" << actual;\n    return false;\n  }\n  return true;\n}\n\nclass RedisParserTest : public testing::Test {\n protected:\n  static void SetUpTestSuite() {\n  }\n\n  RedisParser::Result Parse(std::string_view str);\n\n  RedisParser parser_;\n  RespExpr::Vec args_;\n  uint32_t consumed_;\n\n  unique_ptr<uint8_t[]> stash_;\n};\n\nRedisParser::Result RedisParserTest::Parse(std::string_view str) {\n  stash_.reset(new uint8_t[str.size()]);\n  auto* ptr = stash_.get();\n  memcpy(ptr, str.data(), str.size());\n  return parser_.Parse(RedisParser::Buffer{ptr, str.size()}, &consumed_, &args_);\n}\n\nTEST_F(RedisParserTest, Inline) {\n  RespExpr e{RespExpr::STRING};\n  ASSERT_EQ(RespExpr::STRING, e.type);\n\n  const char kCmd1[] = \"KEY   VAL\\r\\n\";\n\n  ASSERT_EQ(RedisParser::OK, Parse(kCmd1));\n  EXPECT_EQ(strlen(kCmd1), consumed_);\n  EXPECT_THAT(args_, ElementsAre(\"KEY\", \"VAL\"));\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"KEY\"));\n  EXPECT_EQ(3, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\" FOO \"));\n  EXPECT_EQ(5, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\" BAR\"));\n  EXPECT_EQ(4, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\" \\r\\n \"));\n  EXPECT_EQ(3, consumed_);\n  EXPECT_THAT(args_, ElementsAre(\"KEY\", \"FOO\", \"BAR\"));\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\" 1 2\"));\n  EXPECT_EQ(4, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\" 45\"));\n  EXPECT_EQ(3, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\r\\n\"));\n  EXPECT_EQ(2, consumed_);\n  EXPECT_THAT(args_, ElementsAre(\"1\", \"2\", \"45\"));\n\n  // Empty queries return INPUT_PENDING.\n  EXPECT_EQ(RedisParser::INPUT_PENDING, Parse(\"\\r\\n\"));\n  EXPECT_EQ(2, consumed_);\n}\n\nTEST_F(RedisParserTest, InlineEscaping) {\n  LOG(ERROR) << \"TBD: to be compliant with sdssplitargs\";  // TODO:\n}\n\nTEST_F(RedisParserTest, Multi1) {\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"*1\\r\\n\"));\n  EXPECT_EQ(4, consumed_);\n  EXPECT_EQ(0, parser_.parselen_hint());\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"$4\\r\\n\"));\n  EXPECT_EQ(4, consumed_);\n  EXPECT_EQ(4, parser_.parselen_hint());\n\n  ASSERT_EQ(RedisParser::OK, Parse(\"PING\\r\\n\"));\n  EXPECT_EQ(6, consumed_);\n  EXPECT_EQ(0, parser_.parselen_hint());\n  EXPECT_THAT(args_, ElementsAre(\"PING\"));\n}\n\nTEST_F(RedisParserTest, Multi2) {\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"*1\\r\\n$\"));\n  EXPECT_EQ(5, consumed_);\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"4\\r\\nMSET\"));\n  EXPECT_EQ(7, consumed_);\n\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\r\\n*2\\r\\n\"));\n  EXPECT_EQ(2, consumed_);\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"*2\\r\\n$3\\r\\nKEY\\r\\n$3\\r\\nVAL\"));\n  EXPECT_EQ(20, consumed_);\n\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\r\\n\"));\n  EXPECT_EQ(2, consumed_);\n  EXPECT_THAT(args_, ElementsAre(\"KEY\", \"VAL\"));\n}\n\nTEST_F(RedisParserTest, Multi3) {\n  const char kFirst[] = \"*3\\r\\n$3\\r\\nSET\\r\\n$16\\r\\nkey:\";\n  const char kSecond[] = \"000002273458\\r\\n$3\\r\\nVXK\";\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(kFirst));\n  ASSERT_EQ(strlen(kFirst), consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(kSecond));\n  ASSERT_EQ(strlen(kSecond), consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\r\\n*3\\r\\n$3\\r\\nSET\"));\n  ASSERT_EQ(2, consumed_);\n  EXPECT_THAT(args_, ElementsAre(\"SET\", \"key:000002273458\", \"VXK\"));\n}\n\nTEST_F(RedisParserTest, ClientMode) {\n  parser_.SetClientMode();\n\n  ASSERT_EQ(RedisParser::OK, Parse(\":-1\\r\\n\"));\n  EXPECT_THAT(args_, ElementsAre(IntArg(-1)));\n\n  ASSERT_EQ(RedisParser::OK, Parse(\"+OK\\r\\n\"));\n  EXPECT_EQ(args_[0], \"OK\");\n\n  ASSERT_EQ(RedisParser::OK, Parse(\"-ERR foo bar\\r\\n\"));\n  EXPECT_THAT(args_, ElementsAre(ErrArg(\"ERR foo\")));\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"_\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"\\r\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\n\"));\n  EXPECT_EQ(1, consumed_);\n  EXPECT_THAT(args_, ElementsAre(ArgType(RespExpr::NIL)));\n  ASSERT_EQ(RedisParser::OK, Parse(\"*2\\r\\n_\\r\\n_\\r\\n\"));\n  ASSERT_EQ(10, consumed_);\n\n  ASSERT_EQ(RedisParser::OK, Parse(\"*3\\r\\n+OK\\r\\n$1\\r\\n1\\r\\n*2\\r\\n$1\\r\\n1\\r\\n$-1\\r\\n\"));\n  ASSERT_THAT(args_, ElementsAre(\"OK\", \"1\", ArrLen(2)));\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"+O\"));\n  EXPECT_EQ(2, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"K\\r\"));\n  EXPECT_EQ(2, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\n\"));\n  ASSERT_THAT(args_, ElementsAre(\"OK\"));\n  EXPECT_EQ(1, consumed_);\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"+OK\\r\"));\n  EXPECT_EQ(4, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\n\"));\n  ASSERT_THAT(args_, ElementsAre(\"OK\"));\n  EXPECT_EQ(1, consumed_);\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"+\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"O\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"K\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"\\r\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\n\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_THAT(args_, ElementsAre(\"OK\"));\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"-\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"ERR\\r\\n\"));\n  EXPECT_EQ(5, consumed_);\n  ASSERT_THAT(args_, ElementsAre(ErrArg(\"ERR\")));\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"-ERR foo\"));\n  EXPECT_EQ(8, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\r\\n\"));\n  EXPECT_EQ(2, consumed_);\n  ASSERT_THAT(args_, ElementsAre(\"ERR foo\"));\n}\n\nTEST_F(RedisParserTest, Hierarchy) {\n  parser_.SetClientMode();\n\n  const char* kThirdArg = \"*2\\r\\n$3\\r\\n100\\r\\n$3\\r\\n200\\r\\n\";\n  string resp = absl::StrCat(\"*3\\r\\n$3\\r\\n900\\r\\n$3\\r\\n800\\r\\n\", kThirdArg);\n  ASSERT_EQ(RedisParser::OK, Parse(resp));\n  ASSERT_THAT(args_, ElementsAre(\"900\", \"800\", ArrArg(2)));\n  EXPECT_THAT(args_[2].GetVec(), ElementsAre(\"100\", \"200\"));\n\n  ASSERT_EQ(RedisParser::OK, Parse(\"*2\\r\\n*1\\r\\n$3\\r\\n1-0\\r\\n*1\\r\\n$2\\r\\nf1\\r\\n\"));\n  ASSERT_THAT(args_, ElementsAre(ArrLen(1), ArrLen(1)));\n}\n\nTEST_F(RedisParserTest, InvalidMult1) {\n  ASSERT_EQ(RedisParser::BAD_BULKLEN, Parse(\"*2\\r\\n$3\\r\\nFOO\\r\\nBAR\\r\\n\"));\n}\n\nTEST_F(RedisParserTest, Empty) {\n  ASSERT_EQ(RedisParser::OK, Parse(\"*2\\r\\n$0\\r\\n\\r\\n$0\\r\\n\\r\\n\"));\n}\n\nTEST_F(RedisParserTest, LargeBulk) {\n  string_view prefix(\"*1\\r\\n$1024\\r\\n\");\n\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(prefix));\n  ASSERT_EQ(prefix.size(), consumed_);\n  ASSERT_GE(parser_.parselen_hint(), 1024);\n\n  string half(512, 'a');\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(half));\n  ASSERT_EQ(512, consumed_);\n  ASSERT_GE(parser_.parselen_hint(), 512);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(half));\n  ASSERT_EQ(512, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"\\r\"));\n  ASSERT_EQ(1, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\n\"));\n  EXPECT_EQ(1, consumed_);\n\n  string part1 = absl::StrCat(prefix, half);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(part1));\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(half));\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\r\\n\"));\n\n  prefix = \"*1\\r\\n$270000000\\r\\n\";\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(prefix));\n  ASSERT_EQ(prefix.size(), consumed_);\n  string chunk(1000000, 'a');\n  for (unsigned i = 0; i < 270; ++i) {\n    ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(chunk));\n    ASSERT_EQ(chunk.size(), consumed_);\n  }\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\r\\n\"));\n  ASSERT_THAT(args_, ElementsAre(ArgType(RespExpr::STRING)));\n  EXPECT_EQ(270000000, args_[0].GetBuf().size());\n}\n\nTEST_F(RedisParserTest, NILs) {\n  ASSERT_EQ(RedisParser::BAD_ARRAYLEN, Parse(\"_\\r\\n\"));\n  parser_.SetClientMode();\n  ASSERT_EQ(RedisParser::OK, Parse(\"_\\r\\nfooobar\"));\n  EXPECT_EQ(3, consumed_);\n}\n\nTEST_F(RedisParserTest, NestedArray) {\n  parser_.SetClientMode();\n\n  // [[['foo'],['bar']],['car']]\n  ASSERT_EQ(RedisParser::OK,\n            Parse(\"*2\\r\\n*2\\r\\n*1\\r\\n$3\\r\\nfoo\\r\\n*1\\r\\n$3\\r\\nbar\\r\\n*1\\r\\n$3\\r\\ncar\\r\\n\"));\n\n  ASSERT_THAT(args_, ElementsAre(ArrArg(2), ArrArg(1)));\n  ASSERT_THAT(args_[0].GetVec(), ElementsAre(ArrArg(1), ArrArg(1)));\n  ASSERT_THAT(args_[1].GetVec(), ElementsAre(\"car\"));\n}\n\nTEST_F(RedisParserTest, UsedMemory) {\n  vector<vector<uint8_t>> blobs;\n  for (size_t i = 0; i < 100; ++i) {\n    blobs.emplace_back(vector<uint8_t>(200));\n  }\n  EXPECT_GT(cmn::HeapSize(blobs), 20000);\n\n  std::vector<std::unique_ptr<RespVec>> stash;\n  RespVec vec;\n  for (unsigned i = 0; i < 10; ++i) {\n    vec.emplace_back(RespExpr::STRING);\n    vec.back().u = RespExpr::Buffer(nullptr, 0);\n  }\n\n  for (unsigned i = 0; i < 100; i++) {\n    stash.emplace_back(new RespExpr::Vec(vec));\n  }\n  EXPECT_GT(cmn::HeapSize(stash), 30000);\n}\n\nTEST_F(RedisParserTest, Eol) {\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"*1\\r\"));\n  EXPECT_EQ(3, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"\\n$5\\r\\n\"));\n  EXPECT_EQ(5, consumed_);\n}\n\nTEST_F(RedisParserTest, BulkSplit) {\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"*1\\r\\n$4\\r\\nSADD\\r\"));\n  ASSERT_EQ(13, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\n\"));\n}\n\nTEST_F(RedisParserTest, InlineSplit) {\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"\\n\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"\\nPING\\n\\n\"));\n  EXPECT_EQ(6, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"\\n\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"P\"));\n  ASSERT_EQ(RedisParser::OK, Parse(\"ING\\n\"));\n}\n\nTEST_F(RedisParserTest, InlineReset) {\n  ASSERT_EQ(RedisParser::INPUT_PENDING, Parse(\"\\t \\r\\n\"));\n  EXPECT_EQ(4, consumed_);\n  ASSERT_EQ(RedisParser::OK, Parse(\"*1\\r\\n$3\\r\\nfoo\\r\\n\"));\n  EXPECT_EQ(13, consumed_);\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/reply_builder.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"facade/reply_builder.h\"\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/container/fixed_array.h>\n#include <absl/strings/numbers.h>\n#include <absl/strings/str_cat.h>\n#include <double-conversion/double-to-string.h>\n\n#include <limits>\n\n#include \"absl/strings/escaping.h\"\n#include \"absl/types/span.h\"\n#include \"base/logging.h\"\n#include \"facade/error.h\"\n#include \"util/fibers/proactor_base.h\"\n\n#ifdef __APPLE__\n#ifndef UIO_MAXIOV\n// Some versions of MacOSX dont have IOV_MAX\n#define UIO_MAXIOV 1024\n#endif\n#endif\n\nusing namespace std;\nusing namespace double_conversion;\n\nnamespace facade {\n\nnamespace {\n\nconstexpr char kCRLF[] = \"\\r\\n\";\nconstexpr char kSimplePref[] = \"+\";\nconstexpr char kLengthPrefix[] = \"$\";\nconstexpr char kDoublePref[] = \",\";\nconstexpr char kLongPref[] = \":\";\nconstexpr char kNullStringR2[] = \"$-1\\r\\n\";\nconstexpr char kNullStringR3[] = \"_\\r\\n\";\n\nconstexpr unsigned kConvFlags =\n    DoubleToStringConverter::UNIQUE_ZERO | DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN;\n\nDoubleToStringConverter dfly_conv(kConvFlags, \"inf\", \"nan\", 'e', -6, 21, 6, 0);\n\ntemplate <typename T> size_t piece_size(const T& v) {\n  if constexpr (is_array_v<T>)\n    return ABSL_ARRAYSIZE(v) - 1;  // expect null terminated\n  else if constexpr (is_integral_v<T>)\n    return absl::numbers_internal::kFastToBufferSize;\n  else  // string_view\n    return v.size();\n}\n\ntemplate <size_t S> char* write_piece(const char (&arr)[S], char* dest) {\n  return (char*)memcpy(dest, arr, S - 1) + (S - 1);\n}\n\ntemplate <typename T> enable_if_t<is_integral_v<T>, char*> write_piece(T num, char* dest) {\n  static_assert(!is_same_v<T, char>, \"Use arrays for single chars\");\n  return absl::numbers_internal::FastIntToBuffer(num, dest);\n}\n\nchar* write_piece(string_view str, char* dest) {\n  return (char*)memcpy(dest, str.data(), str.size()) + str.size();\n}\n\n}  // namespace\n\nthread_local SinkReplyBuilder::PendingList SinkReplyBuilder::pending_list;\n\nSinkReplyBuilder::ReplyAggregator::~ReplyAggregator() {\n  rb->batched_ = prev;\n  if (!prev)\n    rb->Flush();\n}\n\nSinkReplyBuilder::ReplyScope::~ReplyScope() {\n  rb->scoped_ = prev;\n  if (!prev)\n    rb->FinishScope();\n}\n\nvoid SinkReplyBuilder::SendError(ErrorReply error) {\n  if (error.status)\n    return SendError(*error.status);\n  SendError(error.ToSv(), error.kind);\n}\n\nvoid SinkReplyBuilder::SendError(OpStatus status) {\n  if (status == OpStatus::OK)\n    return SendSimpleString(\"OK\");\n  SendError(StatusToMsg(status));\n}\n\nvoid SinkReplyBuilder::CloseConnection() {\n  if (!ec_)\n    ec_ = std::make_error_code(std::errc::connection_aborted);\n}\n\ntemplate <typename... Ts> void SinkReplyBuilder::WritePieces(Ts&&... pieces) {\n  if (size_t required = (piece_size(pieces) + ...); buffer_.AppendLen() <= required)\n    Flush(required);\n\n  auto iovec_end = [](const iovec& v) { return reinterpret_cast<char*>(v.iov_base) + v.iov_len; };\n\n  // Ensure last iovec points to buffer segment\n  char* dest = reinterpret_cast<char*>(buffer_.AppendBuffer().data());\n  if (vecs_.empty()) {\n    vecs_.push_back(iovec{dest, 0});\n  } else if (iovec_end(vecs_.back()) != dest) {\n    if (vecs_.size() >= IOV_MAX - 2)\n      Flush();\n    dest = reinterpret_cast<char*>(buffer_.AppendBuffer().data());\n    vecs_.push_back(iovec{dest, 0});\n  }\n\n  DCHECK(iovec_end(vecs_.back()) == dest);\n  char* ptr = dest;\n  ([&]() { ptr = write_piece(pieces, ptr); }(), ...);\n\n  size_t written = ptr - dest;\n  buffer_.CommitWrite(written);\n  vecs_.back().iov_len += written;\n  total_size_ += written;\n}\n\nvoid SinkReplyBuilder::WriteRef(std::string_view str) {\n  if (vecs_.size() >= IOV_MAX - 2)\n    Flush();\n  vecs_.push_back(iovec{const_cast<char*>(str.data()), str.size()});\n  total_size_ += str.size();\n}\n\nvoid SinkReplyBuilder::Flush(size_t expected_buffer_cap) {\n  if (!vecs_.empty())\n    Send();\n\n  // Grow backing buffer if was at least half full and still below it's max size\n  if (buffer_.InputLen() * 2 > buffer_.Capacity() && buffer_.Capacity() * 2 <= kMaxBufferSize)\n    expected_buffer_cap = max(expected_buffer_cap, buffer_.Capacity() * 2);\n\n  total_size_ = 0;\n  buffer_.Clear();\n  vecs_.clear();\n  guaranteed_pieces_ = 0;\n\n  DCHECK_LE(expected_buffer_cap, kMaxBufferSize);  // big strings should be enqueued as iovecs\n\n  if (expected_buffer_cap > buffer_.Capacity())\n    buffer_.Reserve(expected_buffer_cap);\n}\n\nuint64_t SinkReplyBuilder::GetLastSendTimeNs() const {\n  return send_time_ns_;\n}\n\nvoid SinkReplyBuilder::Send() {\n  DCHECK(sink_ != nullptr);\n  DCHECK(!vecs_.empty());\n  auto& reply_stats = tl_facade_stats->reply_stats;\n\n  send_time_ns_ = util::fb2::ProactorBase::GetMonotonicTimeNs();\n  PendingPin pin(send_time_ns_);\n\n  pending_list.push_back(pin);\n\n  reply_stats.io_write_cnt++;\n  reply_stats.io_write_bytes += total_size_;\n  DVLOG(2) << \"Writing \" << total_size_ << \" bytes\";\n  if (auto ec = sink_->Write(vecs_.data(), vecs_.size()); ec)\n    ec_ = ec;\n\n  auto it = PendingList::s_iterator_to(pin);\n  pending_list.erase(it);\n\n  send_time_ns_ = 0;\n\n  uint64_t after_ns = util::fb2::ProactorBase::GetMonotonicTimeNs();\n  reply_stats.send_stats.count++;\n  reply_stats.send_stats.total_duration += (after_ns - pin.timestamp_ns);\n  DVLOG(2) << \"Finished writing \" << total_size_ << \" bytes\";\n}\n\nvoid SinkReplyBuilder::FinishScope() {\n  replies_recorded_++;\n\n  if (!batched_ || total_size_ * 2 >= kMaxBufferSize /* copying isn't worth it */)\n    return Flush();\n\n  // Check if we have enough space to copy all refs to buffer\n  size_t ref_bytes = total_size_ - buffer_.InputLen();\n  if (ref_bytes > buffer_.AppendLen())\n    return Flush(ref_bytes);\n\n  // Copy all external references to buffer to safely keep batching\n  for (size_t i = guaranteed_pieces_; i < vecs_.size(); i++) {\n    auto ib = buffer_.InputBuffer();\n    if (vecs_[i].iov_base >= ib.data() && vecs_[i].iov_base <= ib.data() + ib.size())\n      continue;  // this is a piece\n\n    DCHECK_LE(vecs_[i].iov_len, buffer_.AppendLen());\n    void* dest = buffer_.AppendBuffer().data();\n    memcpy(dest, vecs_[i].iov_base, vecs_[i].iov_len);\n    buffer_.CommitWrite(vecs_[i].iov_len);\n    vecs_[i].iov_base = dest;\n  }\n  guaranteed_pieces_ = vecs_.size();  // all vecs are pieces\n}\n\nMCReplyBuilder::MCReplyBuilder(::io::Sink* sink) : SinkReplyBuilder(sink) {\n}\n\nvoid MCReplyBuilder::SendValue(MemcacheCmdFlags cmd_flags, std::string_view key,\n                               std::string_view value, uint64_t mc_token, uint32_t mc_flag,\n                               uint32_t ttl_sec) {\n  ReplyScope scope(this);\n  if (cmd_flags.meta) {\n    string flags;\n    if (cmd_flags.return_flags)\n      absl::StrAppend(&flags, \" f\", mc_flag);\n    if (cmd_flags.return_cas)\n      absl::StrAppend(&flags, \" c\", mc_token);\n    if (cmd_flags.return_ttl)\n      absl::StrAppend(&flags, \" t\", ttl_sec);\n\n    if (cmd_flags.return_value) {\n      WritePieces(\"VA \", value.size(), flags, kCRLF);\n      if (value.size() <= kMaxInlineSize) {\n        WritePieces(value, kCRLF);\n      } else {\n        WriteRef(value);\n        WritePieces(kCRLF);\n      }\n    } else {\n      WritePieces(\"HD \", flags, kCRLF);\n    }\n  } else {\n    WritePieces(\"VALUE \", key, \" \", mc_flag, \" \", value.size());\n    if (cmd_flags.return_cas)\n      WritePieces(\" \", mc_token);\n\n    if (value.size() <= kMaxInlineSize) {\n      WritePieces(kCRLF, value, kCRLF);\n    } else {\n      WritePieces(kCRLF);\n      WriteRef(value);\n      WritePieces(kCRLF);\n    }\n  }\n}\n\nvoid MCReplyBuilder::SendSimpleString(std::string_view str) {\n  if (str.empty())\n    return;\n  ReplyScope scope(this);\n  WritePieces(str, kCRLF);\n}\n\nvoid MCReplyBuilder::SendLong(long val) {\n  SendSimpleString(absl::StrCat(val));\n}\n\nvoid MCReplyBuilder::SendError(string_view str, std::string_view type) {\n  last_error_ = str;\n  SendSimpleString(absl::StrCat(\"SERVER_ERROR \", str));\n}\n\nvoid MCReplyBuilder::SendProtocolError(std::string_view str) {\n  SendSimpleString(absl::StrCat(\"CLIENT_ERROR \", str));\n}\n\nvoid MCReplyBuilder::SendClientError(string_view str) {\n  SendSimpleString(absl::StrCat(\"CLIENT_ERROR \", str));\n}\n\nvoid MCReplyBuilder::SendRaw(std::string_view str) {\n  ReplyScope scope(this);\n  WriteRef(str);\n}\n\nvoid RedisReplyBuilderBase::SendNull() {\n  ReplyScope scope(this);\n  IsResp3() ? WritePieces(kNullStringR3) : WritePieces(kNullStringR2);\n}\n\nvoid RedisReplyBuilderBase::SendSimpleString(std::string_view str) {\n  ReplyScope scope(this);\n  if (str.size() <= kMaxInlineSize * 2)\n    return WritePieces(kSimplePref, str, kCRLF);\n\n  WritePieces(kSimplePref);\n  WriteRef(str);\n  WritePieces(kCRLF);\n}\n\nvoid RedisReplyBuilderBase::SendBulkString(std::string_view str) {\n  ReplyScope scope(this);\n  if (str.size() <= kMaxInlineSize)\n    return WritePieces(kLengthPrefix, uint32_t(str.size()), kCRLF, str, kCRLF);\n\n  DVLOG(1) << \"SendBulk \" << str.size();\n  WritePieces(kLengthPrefix, uint32_t(str.size()), kCRLF);\n  WriteRef(str);\n  WritePieces(kCRLF);\n}\n\nvoid RedisReplyBuilderBase::SendLong(long val) {\n  ReplyScope scope(this);\n  WritePieces(kLongPref, val, kCRLF);\n}\n\nvoid RedisReplyBuilderBase::SendDouble(double val) {\n  char buf[DoubleToStringConverter::kBase10MaximalLength + 8];  // +8 to be on the safe side.\n  static_assert(ABSL_ARRAYSIZE(buf) < kMaxInlineSize, \"Write temporary string from buf inline\");\n  string_view val_str = FormatDouble(val, buf, ABSL_ARRAYSIZE(buf));\n\n  if (!IsResp3())\n    return SendBulkString(val_str);\n\n  ReplyScope scope(this);\n  WritePieces(kDoublePref, val_str, kCRLF);\n}\n\nvoid RedisReplyBuilderBase::SendNullArray() {\n  ReplyScope scope(this);\n  WritePieces(\"*-1\", kCRLF);\n}\n\nconstexpr static const char START_SYMBOLS2[4][2] = {\"*\", \"~\", \"%\", \">\"};\nstatic_assert(START_SYMBOLS2[unsigned(CollectionType::MAP)][0] == '%' &&\n              START_SYMBOLS2[unsigned(CollectionType::SET)][0] == '~');\n\nvoid RedisReplyBuilderBase::StartCollection(unsigned len, CollectionType ct) {\n  if (!IsResp3()) {  // RESP2 supports only arrays\n    if (ct == CollectionType::MAP)\n      len *= 2;\n    ct = CollectionType::ARRAY;\n  }\n  ReplyScope scope(this);\n  WritePieces(START_SYMBOLS2[unsigned(ct)], len, kCRLF);\n}\n\nvoid RedisReplyBuilderBase::SendError(std::string_view str, std::string_view type) {\n  ReplyScope scope(this);\n\n  if (type.empty()) {\n    type = str;\n    if (type == kSyntaxErr)\n      type = kSyntaxErrType;\n  }\n  tl_facade_stats->reply_stats.err_count[type]++;\n  last_error_ = str;\n\n  if (str[0] != '-') {\n    WritePieces(\"-ERR \");\n  }\n  if (str.size() <= kMaxInlineSize) {\n    WritePieces(str, kCRLF);\n  } else {\n    WriteRef(str);\n    WritePieces(kCRLF);\n  }\n}\n\nvoid RedisReplyBuilderBase::SendProtocolError(std::string_view str) {\n  SendError(absl::StrCat(\"-ERR Protocol error: \", str), \"protocol_error\");\n}\n\nchar* RedisReplyBuilderBase::FormatDouble(double d, char* dest, unsigned len) {\n  StringBuilder sb(dest, len);\n  CHECK(dfly_conv.ToShortest(d, &sb));\n  return sb.Finalize();\n}\n\nvoid RedisReplyBuilderBase::SendVerbatimString(std::string_view str, VerbatimFormat format) {\n  DCHECK(format <= VerbatimFormat::MARKDOWN);\n  if (!IsResp3())\n    return SendBulkString(str);\n\n  ReplyScope scope(this);\n  WritePieces(\"=\", str.size() + 4, kCRLF, format == VerbatimFormat::MARKDOWN ? \"mkd:\" : \"txt:\");\n  if (str.size() <= kMaxInlineSize)\n    WritePieces(str);\n  else\n    WriteRef(str);\n  WritePieces(kCRLF);\n}\n\nstd::string RedisReplyBuilderBase::SerializeCommand(std::string_view command) {\n  return string{command} + kCRLF;\n}\n\nvoid RedisReplyBuilder::SendSimpleStrArr(const facade::ArgRange& strs) {\n  ReplyScope scope(this);\n  StartArray(strs.Size());\n  for (std::string_view str : strs)\n    SendSimpleString(str);\n}\n\nvoid RedisReplyBuilder::SendBulkStrArr(const facade::ArgRange& strs, CollectionType ct) {\n  ReplyScope scope(this);\n  StartCollection(ct == CollectionType::MAP ? strs.Size() / 2 : strs.Size(), ct);\n  for (std::string_view str : strs)\n    SendBulkString(str);\n}\n\nvoid RedisReplyBuilder::SendScoredArray(ScoredArray arr, bool with_scores) {\n  ReplyScope scope(this);\n  StartArray((with_scores && !IsResp3()) ? arr.size() * 2 : arr.size());\n  for (const auto& [str, score] : arr) {\n    if (with_scores && IsResp3())\n      StartArray(2);\n    SendBulkString(str);\n    if (with_scores)\n      SendDouble(score);\n  }\n}\n\nvoid RedisReplyBuilder::SendLabeledScoredArray(std::string_view arr_label, ScoredArray arr) {\n  ReplyScope scope(this);\n\n  StartArray(2);\n\n  SendBulkString(arr_label);\n  StartArray(arr.size());\n  for (const auto& [str, score] : arr) {\n    StartArray(2);\n    SendBulkString(str);\n    SendDouble(score);\n  }\n}\n\ntemplate <typename I> void RedisReplyBuilder::SendLongArr(absl::Span<const I> longs) {\n  static_assert(std::is_integral_v<I>, \"Must use integral type\");\n  ReplyScope scope(this);\n  StartArray(longs.size());\n  for (auto v : longs) {\n    if constexpr (std::is_unsigned_v<I>)\n      DCHECK_LE(uint64_t(v), uint64_t(std::numeric_limits<long>::max()));\n    SendLong(v);\n  }\n}\n\ntemplate void RedisReplyBuilder::SendLongArr<long>(absl::Span<const long>);\ntemplate void RedisReplyBuilder::SendLongArr<int32_t>(absl::Span<const int32_t>);\ntemplate void RedisReplyBuilder::SendLongArr<uint32_t>(absl::Span<const uint32_t>);\ntemplate void RedisReplyBuilder::SendLongArr<uint64_t>(absl::Span<const uint64_t>);\n\nvoid RedisReplyBuilder::StartArray(unsigned len) {\n  StartCollection(len, CollectionType::ARRAY);\n}\n\nvoid RedisReplyBuilder::SendEmptyArray() {\n  StartArray(0);\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/reply_builder.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <boost/intrusive/list.hpp>\n#include <optional>\n#include <string_view>\n\n#include \"facade/facade_stats.h\"\n#include \"facade/facade_types.h\"\n#include \"io/io.h\"\n\nnamespace facade {\n\nenum class RespVersion { kResp2, kResp3 };\n\n// Base class for all reply builders. Offer a simple high level interface for controlling output\n// modes and sending basic response types.\nclass SinkReplyBuilder {\n  struct GuardBase {\n    bool prev;\n    SinkReplyBuilder* rb;\n  };\n\n public:\n  constexpr static size_t kMaxInlineSize = 32;\n  constexpr static size_t kMaxBufferSize = 8192;\n\n  struct PendingPin : public boost::intrusive::list_base_hook<\n                          ::boost::intrusive::link_mode<::boost::intrusive::normal_link>> {\n    uint64_t timestamp_ns;\n\n    PendingPin(uint64_t v = 0) : timestamp_ns(v) {\n    }\n  };\n\n  using PendingList =\n      boost::intrusive::list<PendingPin, boost::intrusive::constant_time_size<false>,\n                             boost::intrusive::cache_last<false>>;\n\n  static thread_local PendingList pending_list;\n\n  explicit SinkReplyBuilder(io::Sink* sink) : sink_(sink) {\n  }\n\n  virtual ~SinkReplyBuilder() = default;\n\n  // USE WITH CARE! ReplyScope assumes that all string views in Send calls keep valid for the scopes\n  // lifetime. This allows the builder to avoid copies by enqueueing long strings directly for\n  // vectorized io.\n  struct ReplyScope : GuardBase {\n    explicit ReplyScope(SinkReplyBuilder* rb) : GuardBase{std::exchange(rb->scoped_, true), rb} {\n    }\n\n    ~ReplyScope();\n  };\n\n  // Aggregator reduces the number of raw send calls by copying data in an intermediate buffer.\n  // Prefer ReplyScope if possible to additionally reduce the number of copies.\n  struct ReplyAggregator : GuardBase {\n    explicit ReplyAggregator(SinkReplyBuilder* rb)\n        : GuardBase{std::exchange(rb->batched_, true), rb} {\n    }\n\n    ~ReplyAggregator();\n  };\n\n  void Flush(size_t expected_buffer_cap = 0);  // Send all accumulated data and reset to clear state\n\n  std::error_code GetError() const {\n    return ec_;\n  }\n\n  size_t UsedMemory() const {\n    return buffer_.Capacity();\n  }\n\n  size_t RepliesRecorded() const {\n    return replies_recorded_;\n  }\n\n  bool IsSendActive() const {\n    return send_time_ns_ > 0;\n  }\n\n  void SetBatchMode(bool b) {\n    batched_ = b;\n  }\n\n  void CloseConnection();\n\n  static const ReplyStats& GetThreadLocalStats() {\n    return tl_facade_stats->reply_stats;\n  }\n\n public:  // High level interface\n  virtual Protocol GetProtocol() const = 0;\n\n  virtual void SendLong(long val) = 0;\n  virtual void SendSimpleString(std::string_view str) = 0;\n\n  void SendOk() {\n    SendSimpleString(\"OK\");\n  }\n\n  virtual void SendError(std::string_view str, std::string_view type = {}) = 0;  // MC and Redis\n  void SendError(OpStatus status);\n  void SendError(ErrorReply error);\n  virtual void SendProtocolError(std::string_view str) = 0;\n\n  std::string ConsumeLastError() {\n    return std::exchange(last_error_, {});\n  }\n\n  uint64_t GetLastSendTimeNs() const;\n\n protected:\n  template <typename... Ts>\n  void WritePieces(Ts&&... pieces);     // Copy pieces into buffer and reference buffer\n  void WriteRef(std::string_view str);  // Add iovec bypassing buffer\n\n  void FinishScope();  // Called when scope ends to flush buffer if needed\n  void Send();\n\n protected:\n  size_t replies_recorded_ = 0;\n  std::string last_error_;\n\n private:\n  io::Sink* sink_;\n  std::error_code ec_;\n\n  bool scoped_ = false, batched_ = false;\n\n  size_t total_size_ = 0;  // sum of vec_ lengths\n  base::IoBuf buffer_;     // backing buffer for pieces\n\n  // Stores iovecs for a single writev call. Can reference either the buffer (WritePiece) or\n  // external data (WriteRef). Validity is ensured by FinishScope that either flushes before ref\n  // lifetime ends or copies refs to the buffer.\n  absl::InlinedVector<iovec, 16> vecs_;\n  size_t guaranteed_pieces_ = 0;  // length of prefix of vecs_ that are guaranteed to be pieces\n  uint64_t send_time_ns_ = 0;\n};\n\nclass MCReplyBuilder : public SinkReplyBuilder {\n public:\n  explicit MCReplyBuilder(::io::Sink* sink);\n\n  ~MCReplyBuilder() override = default;\n\n  Protocol GetProtocol() const final {\n    return Protocol::MEMCACHE;\n  }\n\n  void SendError(std::string_view str, std::string_view type = std::string_view{}) final;\n\n  void SendLong(long val) final;\n\n  void SendClientError(std::string_view str);\n  void SendValue(MemcacheCmdFlags cmd_flags, std::string_view key, std::string_view value,\n                 uint64_t mc_token, uint32_t mc_flag, uint32_t ttl_sec);\n  void SendSimpleString(std::string_view str) final;\n  void SendProtocolError(std::string_view str) final;\n\n  void SendRaw(std::string_view str);\n};\n\n// Redis reply builder interface for sending RESP data.\nclass RedisReplyBuilderBase : public SinkReplyBuilder {\n public:\n  enum VerbatimFormat : uint8_t { TXT, MARKDOWN };\n\n  explicit RedisReplyBuilderBase(io::Sink* sink) : SinkReplyBuilder(sink) {\n  }\n\n  ~RedisReplyBuilderBase() override = default;\n\n  Protocol GetProtocol() const final {\n    return Protocol::REDIS;\n  }\n\n  virtual void SendNull();\n\n  void SendSimpleString(std::string_view str) override;\n  virtual void SendBulkString(std::string_view str);  // RESP: Blob String\n\n  void SendLong(long val) override;\n  virtual void SendDouble(double val);  // RESP: Number\n\n  virtual void SendNullArray();\n  virtual void StartCollection(unsigned len, CollectionType ct);\n\n  using SinkReplyBuilder::SendError;\n  void SendError(std::string_view str, std::string_view type = {}) override;\n  void SendProtocolError(std::string_view str) override;\n\n  virtual void SendVerbatimString(std::string_view str, VerbatimFormat format = TXT);\n\n  static char* FormatDouble(double d, char* dest, unsigned len);\n  static std::string SerializeCommand(std::string_view command);\n\n  bool IsResp3() const {\n    return resp_ == RespVersion::kResp3;\n  }\n\n  void SetRespVersion(RespVersion resp_version) {\n    resp_ = resp_version;\n  }\n\n  RespVersion GetRespVersion() {\n    return resp_;\n  }\n\n private:\n  RespVersion resp_ = RespVersion::kResp2;\n};\n\n// Non essential redis reply builder functions implemented on top of the base resp protocol\nclass RedisReplyBuilder : public RedisReplyBuilderBase {\n public:\n  using ScoredArray = absl::Span<const std::pair<std::string, double>>;\n\n  RedisReplyBuilder(io::Sink* sink) : RedisReplyBuilderBase(sink) {\n  }\n\n  ~RedisReplyBuilder() override = default;\n\n  // One-liner for ReplyScope + StartArray\n  struct ArrayScope : ReplyScope {\n    ArrayScope(RedisReplyBuilder* rb, size_t len) : ReplyScope(rb) {\n      rb->StartArray(len);\n    }\n  };\n\n  void SendSimpleStrArr(const facade::ArgRange& strs);\n  void SendBulkStrArr(const facade::ArgRange& strs, CollectionType ct = CollectionType::ARRAY);\n  template <typename I> void SendLongArr(absl::Span<const I> longs);\n\n  void SendScoredArray(ScoredArray arr, bool with_scores);\n  void SendLabeledScoredArray(std::string_view arr_label, ScoredArray arr);\n  void StartArray(unsigned len);\n  void SendEmptyArray();\n};\n\n#define RETURN_ON_PARSE_ERROR(parser, rb)       \\\n  do {                                          \\\n    if (auto err = (parser).TakeError(); err) { \\\n      return (rb)->SendError(err.MakeReply());  \\\n    }                                           \\\n  } while (0)\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/reply_builder_test.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/reply_builder.h\"\n\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_split.h>\n#include <facade/resp_parser.h>\n#include <mimalloc.h>\n\n#include <random>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/error.h\"\n#include \"facade/facade_test.h\"\n#include \"facade/redis_parser.h\"\n#include \"facade/reply_capture.h\"\n#include \"facade/resp_expr_test_utils.h\"\n\nusing namespace testing;\nusing namespace std;\n\nnamespace facade {\n\nnamespace {\n\nconst std::string_view kErrorStrPreFix = \"-ERR \";\nconstexpr std::string_view kCRLF = \"\\r\\n\";\nconstexpr char kErrorStartChar = '-';\nconstexpr char kStringStartChar = '+';\nconstexpr std::string_view kOKMessage = \"+OK\\r\\n\";\nconstexpr char kArrayStart = '*';\nconstexpr char kBulkString = '$';\nconstexpr char kIntStart = ':';\nconst std::string_view kIntStartString = \":\";\nconst std::string_view kNullBulkString = \"$-1\\r\\n\";\nconst std::string_view kBulkStringStart = \"$\";\nconst std::string_view kStringStart = \"+\";\nconst std::string_view kErrorStart = \"-\";\nconst std::string_view kArrayStartString = \"*\";\nconstexpr std::size_t kMinPayloadLen = 3;  // the begin type char and \"\\r\\n\" at the end\n\nstd::string BuildExpectedErrorString(std::string_view msg) {\n  if (msg.at(0) == kErrorStartChar) {\n    return absl::StrCat(msg, kCRLF);\n  } else {\n    return absl::StrCat(kErrorStrPreFix, msg, kCRLF);\n  }\n}\n\nstd::string_view GetErrorType(std::string_view err) {\n  return err == kSyntaxErr ? kSyntaxErrType : err;\n}\n\n}  // namespace\n\nclass RedisReplyBuilderTest : public testing::Test {\n public:\n  struct ParsingResults {\n    RedisParser::Result result = RedisParser::OK;\n    RespExpr::Vec args;\n    std::uint32_t consumed = 0;\n\n    ParsingResults(std::optional<RESPObj> obj = std::nullopt, size_t buf_pos = 0) {\n      if (!obj.has_value() || obj->Empty()) {\n        return;\n      }\n\n      holder_.emplace(std::move(*obj));\n\n      result = RedisParser::OK;\n      consumed = buf_pos;\n\n      if (holder_->GetType() == RESPObj::Type::ARRAY) {\n        auto arr = holder_->As<RESPArray>();\n        if (!arr.has_value()) {\n          result = RedisParser::BAD_ARRAYLEN;\n          return;\n        }\n\n        args.reserve(arr->Size());\n        for (size_t i = 0; i < arr->Size(); ++i) {\n          args.push_back(expr_builder_.BuildExpr((*arr)[i]));\n        }\n        return;\n      }\n\n      args.push_back(expr_builder_.BuildExpr(*holder_));\n    }\n\n    bool Verify(std::uint32_t expected) const {\n      return consumed == expected && result == RedisParser::OK;\n    }\n\n    bool IsError() const {\n      return result != RedisParser::OK || (args.size() == 1 && args[0].type == RespExpr::ERROR);\n    }\n\n    bool IsOk() const {\n      return IsString();\n    }\n\n    bool IsNull() const {\n      return result == RedisParser::OK && args.size() == 1 && args.at(0).type == RespExpr::NIL;\n    }\n\n    bool IsString() const {\n      return args.size() == 1 && result == RedisParser::OK && args[0].type == RespExpr::STRING;\n    }\n\n   private:\n    std::optional<RESPObj> holder_;\n    RespExprBuilder expr_builder_;\n  };\n\n  void SetUp() {\n    sink_.Clear();\n    builder_.reset(new RedisReplyBuilder(&sink_));\n    ResetStats();\n  }\n\n  static void SetUpTestSuite() {\n    tl_facade_stats = new FacadeStats;\n    init_zmalloc_threadlocal(mi_heap_get_backing());\n  }\n\n protected:\n  std::vector<std::string_view> RawTokenizedMessage() const {\n    CHECK(!str().empty());\n    return absl::StrSplit(str(), kCRLF);\n  }\n\n  std::string_view str() const {\n    return sink_.str();\n  }\n\n  std::string TakePayload() {\n    std::string ret = sink_.str();\n    sink_.Clear();\n    return ret;\n  }\n\n  std::size_t SinkSize() const {\n    return str().size();\n  }\n\n  unsigned GetError(string_view err) const {\n    const auto& map = SinkReplyBuilder::GetThreadLocalStats().err_count;\n    auto it = map.find(err);\n    return it == map.end() ? 0 : it->second;\n  }\n\n  static bool NoErrors() {\n    return tl_facade_stats->reply_stats.err_count.empty();\n  }\n\n  static const ReplyStats& GetReplyStats() {\n    return tl_facade_stats->reply_stats;\n  }\n\n  // Breaks the string we have in sink into tokens.\n  // In  RESP each token is build up from series of bytes follow by \"\\r\\n\"\n  // This function don't try to parse the message, only to break the strings based\n  // on the delimiter \"\\r\\n\". It is up to the test to verify these tokens\n  std::vector<std::string_view> TokenizeMessage() const;\n\n  // Call the redis parser with the data in the sink\n  ParsingResults Parse();\n\n  io::StringSink sink_;\n  std::unique_ptr<RedisReplyBuilder> builder_;\n  std::unique_ptr<std::uint8_t[]> parser_buffer_;\n};\n\nstd::vector<std::string_view> RedisReplyBuilderTest::TokenizeMessage() const {\n  std::vector<std::string_view> message_tokens = RawTokenizedMessage();\n  CHECK(message_tokens.back().empty());  // we're expecting to last to be empty as it only has \\r\\n\n  message_tokens.pop_back();             // remove this empty entry\n  std::string_view data = str();\n  switch (data[0]) {\n    case kArrayStart:\n      // in the case of array. we cannot tell the expected tokens number without doing parsing for\n      // sub elements\n      break;\n    case kBulkString:\n      if (data == kNullBulkString) {\n        CHECK(message_tokens.size() == 1)\n            << \"NULL bulk string should only have one token, got \" << message_tokens.size();\n      } else {\n        CHECK(message_tokens.size() == 2)\n            << \"bulk string should only have two tokens, got \" << message_tokens.size();\n      }\n      break;\n    case kErrorStartChar:\n    case kStringStartChar:\n    case kIntStart:\n      // for errors and string and ints we don't really need to split as there must be only one\n      // entry for \\r\\n\n      CHECK(message_tokens.size() == 1)\n          << \"string/error message must have only one token got \" << message_tokens.size();\n      break;\n    default:\n      LOG(FATAL) << \"invalid start char [\" << data[0] << \"]\";\n      break;\n  }\n  return message_tokens;\n}\n\nstd::ostream& operator<<(std::ostream& os, const RedisReplyBuilderTest::ParsingResults& res) {\n  os << \"result{consumed bytes:\" << res.consumed << \", status: \" << res.result << \" result count \"\n     << res.args.size() << \", first entry result: \";\n  if (!res.args.empty()) {\n    if (res.args.size() > 1) {\n      os << \"ARRAY: \";\n    }\n\n    for (const auto& e : res.args) {\n      os << e << \"\\n\";\n    }\n  } else {\n    os << \"NILL\";\n  }\n  return os << \"}\";\n}\n\nRedisReplyBuilderTest::ParsingResults RedisReplyBuilderTest::Parse() {\n  parser_buffer_.reset(new uint8_t[SinkSize()]);\n  auto* ptr = parser_buffer_.get();\n  memcpy(ptr, str().data(), SinkSize());\n  RESPParser parser;\n  auto resp_obj = parser.Feed(reinterpret_cast<char*>(ptr), SinkSize());\n  size_t buf_pos = parser.BufferPos();\n  buf_pos =\n      resp_obj && !buf_pos ? SinkSize() : buf_pos;  // after parsing if success buf_pos can be 0\n\n  ParsingResults result(std::move(resp_obj), buf_pos);\n  return result;\n}\n\n///////////////////////////////////////////////////////////////////////////////\n\nTEST_F(RedisReplyBuilderTest, MessageSend) {\n  // Test each message that is \"sent\" to the sink\n  builder_->SinkReplyBuilder::SendOk();\n  ASSERT_EQ(TakePayload(), kOKMessage);\n  builder_->StartArray(10);\n\n  std::string_view hello_msg = \"hello\";\n  builder_->SendBulkString(hello_msg);\n  std::string expected_bulk_string = absl::StrCat(\n      \"*10\\r\\n\", kBulkStringStart, std::to_string(hello_msg.size()), kCRLF, hello_msg, kCRLF);\n  ASSERT_EQ(TakePayload(), expected_bulk_string);\n}\n\nTEST_F(RedisReplyBuilderTest, SimpleError) {\n  // test with simple error case. This means that we must comply to\n  // https://redis.io/docs/reference/protocol-spec/#resp-errors\n  std::string_view error = \"my error\";\n  std::string_view empty_type;\n\n  builder_->SendError(error, empty_type);\n  // must start with \"-\" and ends with \"\\r\\n\"\n  // ASSERT_EQ(sink_.str().at(0), kErrorStartChar);\n  ASSERT_TRUE(absl::StartsWith(str(), kErrorStart));\n  ASSERT_TRUE(absl::EndsWith(str(), kCRLF));\n  ASSERT_EQ(GetError(error), 1);\n  ASSERT_EQ(str(), BuildExpectedErrorString(error))\n      << \" error different from expected - '\" << str() << \"'\";\n  auto parsing = Parse();\n  ASSERT_TRUE(parsing.Verify(SinkSize()));\n  ASSERT_TRUE(parsing.IsError()) << \" result: \" << parsing;\n  EXPECT_THAT(parsing.args, ElementsAre(ErrArg(absl::StrCat(\"ERR \", error))));\n\n  sink_.Clear();\n  builder_->SendError(OpStatus::OK);  // in this case we should not have an error string\n  ASSERT_TRUE(absl::StartsWith(str(), kStringStart));\n  ASSERT_EQ(str(), kOKMessage);\n\n  ASSERT_TRUE(absl::EndsWith(str(), kCRLF));\n  ASSERT_EQ(GetError(error), 1);\n\n  parsing = Parse();\n  ASSERT_TRUE(parsing.Verify(SinkSize()));\n  ASSERT_TRUE(parsing.IsOk()) << \" result: \" << parsing;\n  EXPECT_THAT(parsing.args, ElementsAre(\"OK\"));\n}\n\nTEST_F(RedisReplyBuilderTest, VeryLongError) {\n  std::string long_error(10 * 1024, 'X');  // 10KB error\n  std::string_view empty_type;\n\n  builder_->SendError(long_error, empty_type);\n\n  ASSERT_TRUE(absl::StartsWith(str(), kErrorStart));\n  ASSERT_TRUE(absl::EndsWith(str(), kCRLF));\n}\n\nTEST_F(RedisReplyBuilderTest, ErrorBuiltInMessage) {\n  OpStatus error_codes[] = {\n      OpStatus::KEY_NOTFOUND,  OpStatus::OUT_OF_RANGE,  OpStatus::WRONG_TYPE,\n      OpStatus::OUT_OF_MEMORY, OpStatus::INVALID_FLOAT, OpStatus::INVALID_INT,\n      OpStatus::SYNTAX_ERR,    OpStatus::BUSY_GROUP,    OpStatus::INVALID_NUMERIC_RESULT};\n  for (const auto& err : error_codes) {\n    const std::string_view error_name = StatusToMsg(err);\n    const std::string_view error_type = GetErrorType(error_name);\n\n    sink_.Clear();\n    builder_->SendError(err);\n    ASSERT_TRUE(absl::StartsWith(str(), kErrorStart)) << \" invalid start char for \" << err;\n    ASSERT_TRUE(absl::EndsWith(str(), kCRLF)) << \" failed to find correct termination at \" << err;\n    ASSERT_EQ(GetError(error_type), 1) << \" number of error count is invalid for \" << err;\n    ASSERT_EQ(str(), BuildExpectedErrorString(error_name))\n        << \" error different from expected - '\" << str() << \"'\";\n\n    auto parsing_output = Parse();\n    ASSERT_TRUE(parsing_output.Verify(SinkSize()))\n        << \" verify for the result is invalid for \" << err;\n    ASSERT_TRUE(parsing_output.IsError()) << \" expecting error for \" << err;\n  }\n}\n\nTEST_F(RedisReplyBuilderTest, ErrorReplyBuiltInMessage) {\n  ErrorReply err{OpStatus::OUT_OF_RANGE};\n  builder_->SendError(err);\n  ASSERT_TRUE(absl::StartsWith(str(), kErrorStart));\n  ASSERT_TRUE(absl::EndsWith(str(), kCRLF));\n  ASSERT_EQ(GetError(kIndexOutOfRange), 1);\n  ASSERT_EQ(str(), BuildExpectedErrorString(kIndexOutOfRange));\n\n  auto parsing_output = Parse();\n  ASSERT_TRUE(parsing_output.Verify(SinkSize()));\n  ASSERT_TRUE(parsing_output.IsError());\n  sink_.Clear();\n\n  err = ErrorReply{\"e1\", \"e2\"};\n  builder_->SendError(err);\n  ASSERT_TRUE(absl::StartsWith(str(), kErrorStart));\n  ASSERT_TRUE(absl::EndsWith(str(), kCRLF));\n  ASSERT_EQ(GetError(\"e2\"), 1);\n  ASSERT_EQ(str(), BuildExpectedErrorString(\"e1\"));\n\n  parsing_output = Parse();\n  ASSERT_TRUE(parsing_output.Verify(SinkSize()));\n  ASSERT_TRUE(parsing_output.IsError());\n}\n\nTEST_F(RedisReplyBuilderTest, ErrorNoneBuiltInMessage) {\n  // All these op codes creating the same error message\n  OpStatus none_unique_codes[] = {OpStatus::SKIPPED, OpStatus::KEY_EXISTS, OpStatus::INVALID_VALUE,\n                                  OpStatus::TIMED_OUT, OpStatus::STREAM_ID_SMALL};\n  uint64_t error_count = 0;\n  for (const auto& err : none_unique_codes) {\n    const std::string_view error_name = StatusToMsg(err);\n    const std::string_view error_type = GetErrorType(error_name);\n\n    sink_.Clear();\n    builder_->SendError(err);\n    ASSERT_TRUE(absl::StartsWith(str(), kErrorStart)) << \" invalid start char for \" << err;\n    ASSERT_TRUE(absl::EndsWith(str(), kCRLF));\n    auto current_error_count = GetError(error_type);\n    error_count++;\n    ASSERT_EQ(current_error_count, error_count) << \" number of error count is invalid for \" << err;\n    auto parsing_output = Parse();\n    ASSERT_TRUE(parsing_output.Verify(SinkSize()))\n        << \" verify for the result is invalid for \" << err;\n\n    ASSERT_TRUE(parsing_output.IsError()) << \" expecting error for \" << err;\n  }\n}\n\nTEST_F(RedisReplyBuilderTest, StringMessage) {\n  // This would test a message that contain a string in it\n  // For string this is simple, any string message should start with + and ends with \\r\\n\n  // there can never be more than single \\r\\n in it as well as no special chars\n  const std::string_view payloads[] = {\n      \"this is a string message\", \"$$$$$\", \"12334\", \"1v%6&*\", \"@@@\", \"----\", \"!!!\"};\n  for (auto payload : payloads) {\n    const std::size_t expected_len = payload.size() + kCRLF.size() + 1;  // include '+' at the start\n    sink_.Clear();\n    builder_->SendSimpleString(payload);\n    ASSERT_EQ(SinkSize(), expected_len);\n    ASSERT_TRUE(absl::StartsWith(str(), kStringStart));\n    ASSERT_TRUE(absl::EndsWith(str(), kCRLF));\n    // auto message_payload = SimpleStringPayload();\n    //  ASSERT_EQ(message_payload, payload);\n    ASSERT_TRUE(absl::StartsWith(str(), kStringStart));\n    ASSERT_TRUE(absl::EndsWith(str(), kCRLF));\n    auto data = str();\n    data.remove_suffix(kCRLF.size());\n    ASSERT_TRUE(absl::EndsWith(data, payload));\n  }\n}\n\nTEST_F(RedisReplyBuilderTest, EmptyArray) {\n  // This test would build an array and try sending it over the \"wire\"\n  // The array starts with the '*', then the number of elements in the array\n  // then \"\\r\\n\", then each element inside is encoded accordingly\n  // an empty array has this \"*0\\r\\n\" form\n  const std::string_view empty_array = \"*0\\r\\n\";\n  const std::string_view null_array = \"*-1\\r\\n\";\n  builder_->StartArray(0);\n  ASSERT_EQ(str(), empty_array);\n\n  sink_.Clear();\n  builder_->SendNullArray();\n  ASSERT_EQ(null_array, str());\n\n  sink_.Clear();\n  builder_->SendEmptyArray();\n  ASSERT_EQ(str(), empty_array);\n}\n\nTEST_F(RedisReplyBuilderTest, StrArray) {\n  std::vector<std::string_view> string_vector{\"hello\", \"world\", \"111\", \"@3#$^&*~\"};\n  builder_->StartArray(string_vector.size());\n  std::size_t expected_size = kCRLF.size() + 2;\n  for (auto s : string_vector) {\n    builder_->SendSimpleString(s);\n    expected_size += s.size() + kCRLF.size() + 1;\n    ASSERT_TRUE(NoErrors());\n  }\n  ASSERT_EQ(SinkSize(), expected_size);\n  // ASSERT_EQ(kArrayStart, str().at(0));\n  ASSERT_TRUE(absl::StartsWith(str(), absl::StrCat(kArrayStartString, 4)));\n  auto parsing_output = Parse();\n  ASSERT_TRUE(parsing_output.Verify(SinkSize()))\n      << \" invalid parsing for the array message by the parser: \" << parsing_output;\n\n  ASSERT_EQ(string_vector.size(), parsing_output.args.size());\n  ASSERT_THAT(parsing_output.args,\n              ElementsAre(string_vector[0], string_vector[1], string_vector[2], string_vector[3]));\n\n  std::vector<std::string_view> message_tokens = TokenizeMessage();\n  ASSERT_THAT(message_tokens, ElementsAre(\"*4\", absl::StrCat(kStringStart, string_vector[0]),\n                                          absl::StrCat(kStringStart, string_vector[1]),\n                                          absl::StrCat(kStringStart, string_vector[2]),\n                                          absl::StrCat(kStringStart, string_vector[3])));\n}\n\nTEST_F(RedisReplyBuilderTest, SendSimpleStrArr) {\n  // This would send array of strings, but with different API than TestStrArray test\n  const std::string_view kArrayMessage[] = {\n      // random values\n      \"+++\", \"---\", \"$$$\", \"~~~~\", \"@@@\", \"^^^\", \"1234\", \"foo\"};\n  const std::size_t kArrayLen = sizeof(kArrayMessage) / sizeof(kArrayMessage[0]);\n  builder_->SendSimpleStrArr(kArrayMessage);\n  ASSERT_TRUE(NoErrors());\n  // Tokenize the message and verify content\n  std::vector<std::string_view> message_tokens = TokenizeMessage();\n  ASSERT_THAT(message_tokens, ElementsAre(absl::StrCat(kArrayStartString, kArrayLen),\n                                          absl::StrCat(kStringStart, kArrayMessage[0]),\n                                          absl::StrCat(kStringStart, kArrayMessage[1]),\n                                          absl::StrCat(kStringStart, kArrayMessage[2]),\n                                          absl::StrCat(kStringStart, kArrayMessage[3]),\n                                          absl::StrCat(kStringStart, kArrayMessage[4]),\n                                          absl::StrCat(kStringStart, kArrayMessage[5]),\n                                          absl::StrCat(kStringStart, kArrayMessage[6]),\n                                          absl::StrCat(kStringStart, kArrayMessage[7])));\n\n  auto parsed_message = Parse();\n  ASSERT_THAT(parsed_message.args,\n              ElementsAre(kArrayMessage[0], kArrayMessage[1], kArrayMessage[2], kArrayMessage[3],\n                          kArrayMessage[4], kArrayMessage[5], kArrayMessage[6], kArrayMessage[7]));\n}\n\nTEST_F(RedisReplyBuilderTest, SendStringViewArr) {\n  // This would send array of strings, but with different API than TestStrArray test\n  const std::vector<std::string_view> kArrayMessage{\n      // random values\n      \"(((\", \"}}}\", \"&&&&\", \"####\", \"___\", \"+++\", \"0.1234\", \"bar\"};\n  builder_->SendBulkStrArr(kArrayMessage);\n  ASSERT_TRUE(NoErrors());\n  // verify content\n  std::vector<std::string_view> message_tokens = TokenizeMessage();\n  // the form of this is *<array size>\\r\\n$<string1 size>\\r\\n<string1>..$<stringN\n  // size>\\r\\n<stringN>\\r\\n\n  ASSERT_THAT(\n      message_tokens,\n      ElementsAre(absl::StrCat(kArrayStartString, kArrayMessage.size()),  // array size\n                                                                          // size + string 0..N\n                  absl::StrCat(kBulkStringStart, kArrayMessage[0].size()), kArrayMessage[0],\n                  absl::StrCat(kBulkStringStart, kArrayMessage[1].size()), kArrayMessage[1],\n                  absl::StrCat(kBulkStringStart, kArrayMessage[2].size()), kArrayMessage[2],\n                  absl::StrCat(kBulkStringStart, kArrayMessage[3].size()), kArrayMessage[3],\n                  absl::StrCat(kBulkStringStart, kArrayMessage[4].size()), kArrayMessage[4],\n                  absl::StrCat(kBulkStringStart, kArrayMessage[5].size()), kArrayMessage[5],\n                  absl::StrCat(kBulkStringStart, kArrayMessage[6].size()), kArrayMessage[6],\n                  absl::StrCat(kBulkStringStart, kArrayMessage[7].size()), kArrayMessage[7]));\n\n  // Check the parsed message\n  auto parsed_message = Parse();\n  ASSERT_THAT(parsed_message.args,\n              ElementsAre(kArrayMessage[0], kArrayMessage[1], kArrayMessage[2], kArrayMessage[3],\n                          kArrayMessage[4], kArrayMessage[5], kArrayMessage[6], kArrayMessage[7]));\n}\n\nTEST_F(RedisReplyBuilderTest, SendBulkStringArr) {\n  // This would send array of strings, but with different API than TestStrArray test\n  const std::vector<std::string> kArrayMessage{\n      // Test this one with large values\n      std::string(1024, '.'), std::string(2048, ','), std::string(4096, ' ')};\n  builder_->SendBulkStrArr(kArrayMessage);\n  ASSERT_TRUE(NoErrors());\n  std::vector<std::string_view> message_tokens = TokenizeMessage();\n  // the form of this is *<array size>\\r\\n$<string1 size>\\r\\n<string1>..$<stringN\n  // size>\\r\\n<stringN>\\r\\n\n  ASSERT_THAT(\n      message_tokens,\n      ElementsAre(absl::StrCat(kArrayStartString, kArrayMessage.size()),  // array size\n                                                                          // size + string 0..N\n                  absl::StrCat(kBulkStringStart, kArrayMessage[0].size()), kArrayMessage[0],\n                  absl::StrCat(kBulkStringStart, kArrayMessage[1].size()), kArrayMessage[1],\n                  absl::StrCat(kBulkStringStart, kArrayMessage[2].size()), kArrayMessage[2]));\n  // Check the parsed message\n  auto parsed_message = Parse();\n  ASSERT_TRUE(parsed_message.Verify(SinkSize()))\n      << \"message was not successfully parsed: \" << parsed_message;\n  ASSERT_THAT(parsed_message.args,\n              ElementsAre(kArrayMessage[0], kArrayMessage[1], kArrayMessage[2]));\n}\n\nTEST_F(RedisReplyBuilderTest, NullBulkString) {\n  // null bulk string == \"$-1\\r\\n\" i.e. '$' + -1 + \\r + \\n\n  builder_->SendNull();\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(str(), kNullBulkString);\n  auto parsing_output = Parse();\n  ASSERT_TRUE(parsing_output.Verify(SinkSize()));\n  ASSERT_TRUE(parsing_output.IsNull());\n  ASSERT_THAT(parsing_output.args, ElementsAre(ArgType(RespExpr::NIL)));\n}\n\nTEST_F(RedisReplyBuilderTest, EmptyBulkString) {\n  // empty bulk string is in the form of \"$0\\r\\n\\r\\n\", i.e. length 0 after $ follow by \\r\\n*2\n  const std::string_view kEmptyBulkString = \"$0\\r\\n\\r\\n\";\n  builder_->SendBulkString(std::string_view{});\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(str(), kEmptyBulkString);\n  auto parsing_output = Parse();\n  ASSERT_TRUE(parsing_output.Verify(SinkSize()));\n  ASSERT_TRUE(parsing_output.IsString());\n  ASSERT_THAT(parsing_output.args, ElementsAre(std::string_view{}));\n}\n\nTEST_F(RedisReplyBuilderTest, NoAsciiBulkString) {\n  // Bulk string may contain none ascii chars\n  const char random_bytes[] = {0x12, 0x25, 0x37};\n  std::size_t data_size = sizeof(random_bytes) / sizeof(random_bytes[0]);\n  std::string_view none_ascii_payload{random_bytes, data_size};\n  builder_->SendBulkString(none_ascii_payload);\n  ASSERT_TRUE(NoErrors());\n  const std::string expected_payload =\n      absl::StrCat(kBulkStringStart, data_size, kCRLF, none_ascii_payload, kCRLF);\n  ASSERT_EQ(str(), expected_payload);\n  std::vector<std::string_view> message_tokens = TokenizeMessage();\n  ASSERT_EQ(message_tokens.size(), 2);  // length and payload\n  ASSERT_THAT(message_tokens,\n              ElementsAre(absl::StrCat(kBulkStringStart, data_size), none_ascii_payload));\n  auto parsing_output = Parse();\n  ASSERT_TRUE(parsing_output.IsString());\n  ASSERT_THAT(parsing_output.args, ElementsAre(none_ascii_payload));\n}\n\nTEST_F(RedisReplyBuilderTest, BulkStringWithCRLF) {\n  // Verify bulk string that contains the \\r\\n as payload\n  std::string_view crlf_chars{\"\\r\\n\"};\n  builder_->SendBulkString(crlf_chars);\n  ASSERT_TRUE(NoErrors());\n  // the expected message in this case is $2\\r\\n\\r\\n\\r\\n\n  std::string expected_message =\n      absl::StrCat(kBulkStringStart, crlf_chars.size(), kCRLF, crlf_chars, kCRLF);\n  ASSERT_EQ(str(), expected_message);\n  auto parsing_output = Parse();\n  ASSERT_TRUE(parsing_output.IsString());\n  ASSERT_THAT(parsing_output.args, ElementsAre(crlf_chars));\n}\n\nTEST_F(RedisReplyBuilderTest, BulkStringWithStartBulkString) {\n  // check a bulk string that contains $<number> as payload\n  std::string message = absl::StrCat(kBulkStringStart, \"10\");\n  std::string expected_message =\n      absl::StrCat(kBulkStringStart, message.size(), kCRLF, message, kCRLF);\n  builder_->SendBulkString(message);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(str(), expected_message);\n\n  auto parsing_output = Parse();\n  ASSERT_TRUE(parsing_output.IsString());\n  ASSERT_THAT(parsing_output.args, ElementsAre(message));\n}\n\nTEST_F(RedisReplyBuilderTest, BulkStringWithStarString) {\n  std::string message = absl::StrCat(kStringStart, \"a string message\");\n  std::string expected_message =\n      absl::StrCat(kBulkStringStart, message.size(), kCRLF, message, kCRLF);\n  builder_->SendBulkString(message);\n  ASSERT_EQ(str(), expected_message);\n  auto parsing_output = Parse();\n  ASSERT_TRUE(parsing_output.IsString());\n  ASSERT_THAT(parsing_output.args, ElementsAre(message));\n}\n\nTEST_F(RedisReplyBuilderTest, BulkStringWithErrorString) {\n  std::string message = absl::StrCat(kErrorStrPreFix, kSyntaxErrType);\n  std::string expected_message =\n      absl::StrCat(kBulkStringStart, message.size(), kCRLF, message, kCRLF);\n  builder_->SendBulkString(message);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(str(), expected_message);\n  auto parsing_output = Parse();\n  ASSERT_TRUE(parsing_output.IsString());\n  ASSERT_THAT(parsing_output.args, ElementsAre(message));\n}\n\nTEST_F(RedisReplyBuilderTest, Int) {\n  // message in the form of \":0\\r\\n\" and \":1000\\r\\n\"\n  // this message just starts with ':' and ends with \\r\\n\n  // and the payload must be successfully parsed into int type\n  const long kPayloadInt = 12345;\n  const std::string expected_output = absl::StrCat(kIntStartString, kPayloadInt, kCRLF);\n  builder_->SendLong(kPayloadInt);\n  ASSERT_EQ(str(), expected_output);\n  long value = 0;\n  std::string_view expected_payload = str().substr(1, SinkSize() - kMinPayloadLen);\n  ASSERT_TRUE(absl::SimpleAtoi(expected_payload, &value));\n  ASSERT_EQ(value, kPayloadInt);\n  auto parsing_output = Parse();\n  ASSERT_THAT(parsing_output.args, ElementsAre(IntArg(kPayloadInt)));\n}\n\nTEST_F(RedisReplyBuilderTest, Double) {\n  // There is no direct support for double types in RESP\n  // to send this, it is sent as bulk string\n  const std::string_view kPayloadStr = \"23.456\";\n  double double_value = 0;\n  CHECK(absl::SimpleAtod(kPayloadStr, &double_value));\n  const std::string expected_payload =\n      absl::StrCat(kBulkStringStart, kPayloadStr.size(), kCRLF, kPayloadStr, kCRLF);\n  builder_->SendDouble(double_value);\n  ASSERT_TRUE(NoErrors());\n  std::vector<std::string_view> message_tokens = TokenizeMessage();\n  ASSERT_EQ(str(), expected_payload);\n  ASSERT_THAT(message_tokens,\n              ElementsAre(absl::StrCat(kBulkStringStart, kPayloadStr.size()), kPayloadStr));\n  auto parsing_output = Parse();\n  ASSERT_TRUE(parsing_output.IsString());\n  ASSERT_THAT(parsing_output.args, ElementsAre(kPayloadStr));\n}\n\nTEST_F(RedisReplyBuilderTest, MixedTypeArray) {\n  // For arrays, we can send an array that contains more than a single type (string/bulk\n  // string/simple string/null..) In this test we are verifying that this is actually working. note\n  // that this is not part of class RedisReplyBuilder API\n  // The entries are:\n  // array start\n  // bulk string\n  // int\n  // int\n  // simple string\n  // simple string\n  // empty bulk string\n  // double (bulk string)\n  std::string long_string(1024, '-');\n  const unsigned int kArraySize = 6;\n  const char random_bytes[] = {0x12, 0x15, 0x2F};\n  const std::string_view kFirstBulkString{random_bytes, 3};\n  const long kFirstLongValue = 54321;\n  const long kSecondLongValue = 87654321;\n  const std::string_view kLongSimpleString{long_string};\n  const std::string_view kPayloadDoubleStr = \"9987654321.0123\";\n  double double_value = 0;\n  CHECK(absl::SimpleAtod(kPayloadDoubleStr, &double_value));\n\n  builder_->StartArray(kArraySize);\n  builder_->SendBulkString(kFirstBulkString);\n  builder_->SendLong(kFirstLongValue);\n  builder_->SendLong(kSecondLongValue);\n  builder_->SendSimpleString(kLongSimpleString);\n  // builder_->SendNull();\n  builder_->SendBulkString(std::string_view{});\n  builder_->SendDouble(double_value);\n  const std::string_view output_msg = str();\n  ASSERT_FALSE(output_msg.empty());\n  ASSERT_TRUE(NoErrors());\n  std::vector<std::string_view> message_tokens = TokenizeMessage();\n  ASSERT_THAT(\n      message_tokens,\n      ElementsAre(absl::StrCat(kArrayStartString, kArraySize),  // the length\n                  absl::StrCat(kBulkStringStart, kFirstBulkString.size()), kFirstBulkString,\n                  absl::StrCat(kIntStartString, kFirstLongValue),\n                  absl::StrCat(kIntStartString, kSecondLongValue),\n                  absl::StrCat(kStringStart, kLongSimpleString),  // ArgType(RespExpr::NIL),\n                  absl::StrCat(kBulkStringStart, \"0\"), std::string_view{},\n                  absl::StrCat(kBulkStringStart, kPayloadDoubleStr.size()), kPayloadDoubleStr));\n\n  // // Now we need to parse it and make sure that its a valid message by the parser as well\n  auto parsed_message = Parse();\n  ASSERT_THAT(\n      parsed_message.args,\n      ElementsAre(ArgType(RespExpr::STRING), ArgType(RespExpr::INT64), ArgType(RespExpr::INT64),\n                  ArgType(RespExpr::STRING), ArgType(RespExpr::STRING), ArgType(RespExpr::STRING)));\n}\n\nTEST_F(RedisReplyBuilderTest, BatchMode) {\n  GTEST_SKIP() << \"Some differences\";\n\n  // Test that when the batch mode is enabled, we are getting the same correct results\n  builder_->SetBatchMode(true);\n  // Some random values and sizes\n  const std::vector<std::string> kInputArray{\n      std::string(10, 'p'),  std::string(48, 'o'),  std::string(67, 'y'),\n      std::string(167, 'e'), std::string(478, '*'), std::string(164, 't'),\n  };\n  builder_->StartArray(kInputArray.size());\n  ASSERT_EQ(SinkSize(), 0);\n  int count = 0;\n  std::size_t total_bytes = 0;\n  for (const auto& val : kInputArray) {\n    builder_->SendBulkString(val);\n    ASSERT_EQ(SinkSize(), 0) << \" sink is not empty at iteration number \" << count;\n    ASSERT_EQ(GetReplyStats().io_write_bytes, 0);\n    ASSERT_EQ(GetReplyStats().io_write_cnt, 0);\n    total_bytes += val.size();\n    ++count;\n  }\n  // in order to actually see the message, we need to disable the batching, then\n  // write something\n  builder_->SetBatchMode(false);\n  builder_->SendBulkString(std::string_view{});\n  ASSERT_EQ(GetReplyStats().io_write_cnt, 1);\n  // We expecting to have more than the total bytes we count,\n  // since we are not counting the \\r\\n and the type char as well\n  // as length entries\n  ASSERT_GT(GetReplyStats().io_write_bytes, total_bytes);\n  std::vector<std::string_view> array_members = TokenizeMessage();\n  ASSERT_THAT(array_members,\n              ElementsAre(absl::StrCat(kArrayStartString, kInputArray.size()),\n                          absl::StrCat(kBulkStringStart, kInputArray[0].size()), kInputArray[0],\n                          absl::StrCat(kBulkStringStart, kInputArray[1].size()), kInputArray[1],\n                          absl::StrCat(kBulkStringStart, kInputArray[2].size()), kInputArray[2],\n                          absl::StrCat(kBulkStringStart, kInputArray[3].size()), kInputArray[3],\n                          absl::StrCat(kBulkStringStart, kInputArray[4].size()), kInputArray[4],\n                          absl::StrCat(kBulkStringStart, kInputArray[5].size()), kInputArray[5],\n                          absl::StrCat(kBulkStringStart, \"0\"), std::string_view{}));\n}\n\nTEST_F(RedisReplyBuilderTest, Resp3Double) {\n  builder_->SetRespVersion(RespVersion::kResp3);\n  builder_->SendDouble(5.5);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(str(), \",5.5\\r\\n\");\n}\n\nTEST_F(RedisReplyBuilderTest, Resp3NullString) {\n  builder_->SetRespVersion(RespVersion::kResp3);\n  builder_->SendNull();\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(), \"_\\r\\n\");\n}\n\nTEST_F(RedisReplyBuilderTest, SendStringArrayAsMap) {\n  const std::vector<std::string> map_array{\"k1\", \"v1\", \"k2\", \"v2\"};\n\n  builder_->SetRespVersion(RespVersion::kResp2);\n  builder_->SendBulkStrArr(map_array, CollectionType::MAP);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(), \"*4\\r\\n$2\\r\\nk1\\r\\n$2\\r\\nv1\\r\\n$2\\r\\nk2\\r\\n$2\\r\\nv2\\r\\n\")\n      << \"SendStringArrayAsMap Resp2 Failed.\";\n\n  builder_->SetRespVersion(RespVersion::kResp3);\n  builder_->SendBulkStrArr(map_array, CollectionType::MAP);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(), \"%2\\r\\n$2\\r\\nk1\\r\\n$2\\r\\nv1\\r\\n$2\\r\\nk2\\r\\n$2\\r\\nv2\\r\\n\")\n      << \"SendStringArrayAsMap Resp3 Failed.\";\n}\n\nTEST_F(RedisReplyBuilderTest, SendStringArrayAsSet) {\n  const std::vector<std::string> set_array{\"e1\", \"e2\", \"e3\"};\n\n  builder_->SetRespVersion(RespVersion::kResp2);\n  builder_->SendBulkStrArr(set_array, CollectionType::SET);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(), \"*3\\r\\n$2\\r\\ne1\\r\\n$2\\r\\ne2\\r\\n$2\\r\\ne3\\r\\n\")\n      << \"SendStringArrayAsSet Resp2 Failed.\";\n\n  builder_->SetRespVersion(RespVersion::kResp3);\n  builder_->SendBulkStrArr(set_array, CollectionType::SET);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(), \"~3\\r\\n$2\\r\\ne1\\r\\n$2\\r\\ne2\\r\\n$2\\r\\ne3\\r\\n\")\n      << \"SendStringArrayAsSet Resp3 Failed.\";\n}\n\nTEST_F(RedisReplyBuilderTest, SendScoredArray) {\n  const std::vector<std::pair<std::string, double>> scored_array{\n      {\"e1\", 1.1}, {\"e2\", 2.2}, {\"e3\", 3.3}};\n\n  builder_->SetRespVersion(RespVersion::kResp2);\n  builder_->SendScoredArray(scored_array, false);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(), \"*3\\r\\n$2\\r\\ne1\\r\\n$2\\r\\ne2\\r\\n$2\\r\\ne3\\r\\n\")\n      << \"Resp2 WITHOUT scores failed.\";\n\n  builder_->SetRespVersion(RespVersion::kResp3);\n  builder_->SendScoredArray(scored_array, false);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(), \"*3\\r\\n$2\\r\\ne1\\r\\n$2\\r\\ne2\\r\\n$2\\r\\ne3\\r\\n\")\n      << \"Resp3 WITHOUT scores failed.\";\n\n  builder_->SetRespVersion(RespVersion::kResp2);\n  builder_->SendScoredArray(scored_array, true);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(),\n            \"*6\\r\\n$2\\r\\ne1\\r\\n$3\\r\\n1.1\\r\\n$2\\r\\ne2\\r\\n$3\\r\\n2.2\\r\\n$2\\r\\ne3\\r\\n$3\\r\\n3.3\\r\\n\")\n      << \"Resp3 WITHSCORES failed.\";\n\n  builder_->SetRespVersion(RespVersion::kResp3);\n  builder_->SendScoredArray(scored_array, true);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(),\n            \"*3\\r\\n*2\\r\\n$2\\r\\ne1\\r\\n,1.1\\r\\n*2\\r\\n$2\\r\\ne2\\r\\n,2.2\\r\\n*2\\r\\n$2\\r\\ne3\\r\\n,3.3\\r\\n\")\n      << \"Resp3 WITHSCORES failed.\";\n}\n\nTEST_F(RedisReplyBuilderTest, SendLabeledScoredArray) {\n  const std::vector<std::pair<std::string, double>> scored_array{\n      {\"e1\", 1.1}, {\"e2\", 2.2}, {\"e3\", 3.3}};\n\n  builder_->SetRespVersion(RespVersion::kResp2);\n  builder_->SendLabeledScoredArray(\"foobar\", scored_array);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(),\n            \"*2\\r\\n$6\\r\\nfoobar\\r\\n*3\\r\\n*2\\r\\n$2\\r\\ne1\\r\\n$3\\r\\n1.1\\r\\n*2\\r\\n$2\\r\\ne2\\r\\n$3\\r\\n2.\"\n            \"2\\r\\n*2\\r\\n$2\\r\\ne3\\r\\n$3\\r\\n3.3\\r\\n\")\n      << \"Resp3 failed.\\n\";\n\n  builder_->SetRespVersion(RespVersion::kResp3);\n  builder_->SendLabeledScoredArray(\"foobar\", scored_array);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(),\n            \"*2\\r\\n$6\\r\\nfoobar\\r\\n*3\\r\\n*2\\r\\n$2\\r\\ne1\\r\\n,1.1\\r\\n*2\\r\\n$2\\r\\ne2\\r\\n,2.2\\r\\n*\"\n            \"2\\r\\n$2\\r\\ne3\\r\\n,3.3\\r\\n\")\n      << \"Resp3 failed.\";\n}\n\nTEST_F(RedisReplyBuilderTest, BasicCapture) {\n  using namespace std;\n  string_view kTestSws[] = {\"a1\"sv, \"a2\"sv, \"a3\"sv, \"a4\"sv};\n\n  CapturingReplyBuilder crb{};\n  using RRB = RedisReplyBuilder;\n\n  auto big_arr_cb = [](RRB* r) {\n    r->StartArray(4);\n    {\n      r->StartArray(2);\n      r->SendLong(1);\n      r->StartArray(2);\n      {\n        r->SendLong(2);\n        r->SendLong(3);\n      }\n    }\n    r->SendLong(4);\n    {\n      r->StartArray(2);\n      {\n        r->StartArray(2);\n        r->SendLong(5);\n        r->SendLong(6);\n      }\n      r->SendLong(7);\n    }\n    r->SendLong(8);\n  };\n\n  function<void(RRB*)> funcs[] = {\n      [](RRB* r) { r->SendNull(); },\n      [](RRB* r) { r->SendLong(1L); },\n      [](RRB* r) { r->SendDouble(6.7); },\n      [](RRB* r) { r->SendSimpleString(\"ok\"); },\n      [](RRB* r) { r->SendEmptyArray(); },\n      [](RRB* r) { r->SendNullArray(); },\n      [](RRB* r) { r->SendError(\"e1\", \"e2\"); },\n      [kTestSws](RRB* r) { r->SendSimpleStrArr(kTestSws); },\n      [kTestSws](RRB* r) { r->SendBulkStrArr(kTestSws); },\n      [kTestSws](RRB* r) { r->SendBulkStrArr(kTestSws, CollectionType::SET); },\n      [kTestSws](RRB* r) { r->SendBulkStrArr(kTestSws, CollectionType::MAP); },\n      [kTestSws](RRB* r) {\n        r->StartArray(3);\n        r->SendLong(1L);\n        r->SendDouble(2.5);\n        r->SendSimpleStrArr(kTestSws);\n      },\n      big_arr_cb,\n  };\n\n  crb.SetRespVersion(RespVersion::kResp3);\n  builder_->SetRespVersion(RespVersion::kResp3);\n\n  // Run generator functions on both a regular redis builder\n  // and the capturing builder with its capture applied.\n  for (auto& f : funcs) {\n    f(builder_.get());\n    auto expected = TakePayload();\n    f(&crb);\n    CapturingReplyBuilder::Apply(crb.Take(), builder_.get());\n    auto actual = TakePayload();\n    EXPECT_EQ(expected, actual);\n  }\n\n  builder_->SetRespVersion(RespVersion::kResp2);\n}\n\nTEST_F(RedisReplyBuilderTest, FormatDouble) {\n  char buf[64];\n\n  auto format = [&](double d) { return RedisReplyBuilder::FormatDouble(d, buf, sizeof(buf)); };\n\n  EXPECT_STREQ(\"0.1\", format(0.1));\n  EXPECT_STREQ(\"0.2\", format(0.2));\n  EXPECT_STREQ(\"0.8\", format(0.8));\n  EXPECT_STREQ(\"1.1\", format(1.1));\n  EXPECT_STREQ(\"inf\", format(INFINITY));\n  EXPECT_STREQ(\"-inf\", format(-INFINITY));\n  EXPECT_STREQ(\"0\", format(-0.0));\n  EXPECT_STREQ(\"1e-7\", format(0.0000001));\n  EXPECT_STREQ(\"111111111111111110000\", format(111111111111111111111.0));\n  EXPECT_STREQ(\"1.1111111111111111e+21\", format(1111111111111111111111.0));\n  EXPECT_STREQ(\"1e-23\", format(1e-23));\n}\n\nTEST_F(RedisReplyBuilderTest, VerbatimString) {\n  // test resp3\n  std::string str = \"A simple string!\";\n\n  builder_->SetRespVersion(RespVersion::kResp3);\n  builder_->SendVerbatimString(str, RedisReplyBuilder::VerbatimFormat::TXT);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(), \"=20\\r\\ntxt:A simple string!\\r\\n\") << \"Resp3 VerbatimString TXT failed.\";\n\n  builder_->SetRespVersion(RespVersion::kResp3);\n  builder_->SendVerbatimString(str, RedisReplyBuilder::VerbatimFormat::MARKDOWN);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(), \"=20\\r\\nmkd:A simple string!\\r\\n\") << \"Resp3 VerbatimString TXT failed.\";\n\n  builder_->SetRespVersion(RespVersion::kResp2);\n  builder_->SendVerbatimString(str);\n  ASSERT_TRUE(NoErrors());\n  ASSERT_EQ(TakePayload(), \"$16\\r\\nA simple string!\\r\\n\") << \"Resp3 VerbatimString TXT failed.\";\n}\n\nTEST_F(RedisReplyBuilderTest, Issue3449) {\n  vector<string> records;\n  for (unsigned i = 0; i < 10'000; ++i) {\n    records.push_back(absl::StrCat(i));\n  }\n  builder_->SendBulkStrArr(records);\n  ASSERT_TRUE(NoErrors());\n  ParsingResults parse_result = Parse();\n  ASSERT_FALSE(parse_result.IsError());\n  EXPECT_EQ(10000, parse_result.args.size());\n}\n\nTEST_F(RedisReplyBuilderTest, Issue4424) {\n  vector<string> records;\n  for (unsigned i = 0; i < 800; ++i) {\n    records.push_back(string(100, 'a'));\n  }\n\n  for (unsigned j = 0; j < 2; ++j) {\n    builder_->SendBulkStrArr(records);\n    ASSERT_TRUE(NoErrors());\n    ParsingResults parse_result = Parse();\n    ASSERT_FALSE(parse_result.IsError()) << int(parse_result.result);\n    ASSERT_TRUE(parse_result.Verify(SinkSize()));\n    EXPECT_EQ(800, parse_result.args.size());\n    sink_.Clear();\n  }\n}\n\nTEST_F(RedisReplyBuilderTest, MCMetaGetLargeValue) {\n  io::StringSink mc_sink;\n  MCReplyBuilder mc_builder(&mc_sink);\n\n  MemcacheCmdFlags flags;\n  flags.meta = true;\n  flags.return_value = true;\n\n  string large_val(16000, 'x');\n  mc_builder.SendValue(flags, \"key\", large_val, 0, 0, 0);\n\n  string_view output = mc_sink.str();\n  EXPECT_THAT(output, HasSubstr(\"VA 16000\"));\n  EXPECT_THAT(output, HasSubstr(large_val));\n}\n\nstatic void BM_FormatDouble(benchmark::State& state) {\n  vector<double> values;\n  char buf[64];\n\n  uniform_real_distribution<double> unif(0, 1e9);\n  default_random_engine re;\n  for (unsigned i = 0; i < 100; i++) {\n    values.push_back(unif(re));\n  }\n\n  while (state.KeepRunning()) {\n    for (auto d : values) {\n      RedisReplyBuilder::FormatDouble(d, buf, sizeof(buf));\n    }\n  }\n}\nBENCHMARK(BM_FormatDouble);\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/reply_capture.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"facade/reply_capture.h\"\n\n#include \"absl/types/span.h\"\n#include \"base/logging.h\"\n#include \"reply_capture.h\"\n\n#define SKIP_LESS(needed)     \\\n  replies_recorded_++;        \\\n  if (reply_mode_ < needed) { \\\n    current_ = monostate{};   \\\n    return;                   \\\n  }\nnamespace facade {\n\nusing namespace std;\nusing namespace payload;\n\nvoid CapturingReplyBuilder::SendError(std::string_view str, std::string_view type) {\n  last_error_ = str;\n  SKIP_LESS(ReplyMode::ONLY_ERR);\n  Capture(make_error(str, type));\n}\n\nvoid CapturingReplyBuilder::SendNullArray() {\n  SKIP_LESS(ReplyMode::FULL);\n  Capture(unique_ptr<CollectionPayload>{nullptr});\n}\n\nvoid CapturingReplyBuilder::SendNull() {\n  SKIP_LESS(ReplyMode::FULL);\n  Capture(nullptr_t{});\n}\n\nvoid CapturingReplyBuilder::SendLong(long val) {\n  SKIP_LESS(ReplyMode::FULL);\n  Capture(val);\n}\n\nvoid CapturingReplyBuilder::SendDouble(double val) {\n  SKIP_LESS(ReplyMode::FULL);\n  Capture(val);\n}\n\nvoid CapturingReplyBuilder::SendSimpleString(std::string_view str) {\n  SKIP_LESS(ReplyMode::FULL);\n  Capture(SimpleString{string{str}});\n}\n\nvoid CapturingReplyBuilder::SendBulkString(std::string_view str) {\n  SKIP_LESS(ReplyMode::FULL);\n  Capture(BulkString{string{str}});\n}\n\nvoid CapturingReplyBuilder::StartCollection(unsigned len, CollectionType type) {\n  SKIP_LESS(ReplyMode::FULL);\n  stack_.emplace(make_unique<CollectionPayload>(len, type),\n                 type == CollectionType::MAP ? len * 2 : len);\n\n  // If we added an empty collection, it must be collapsed immediately.\n  CollapseFilledCollections();\n}\n\nCapturingReplyBuilder::Payload CapturingReplyBuilder::Take() {\n  CHECK(stack_.empty());\n  Payload pl = std::move(current_);\n  current_ = monostate{};\n  return pl;\n}\n\nvoid CapturingReplyBuilder::SendDirect(Payload&& val) {\n  replies_recorded_ += !holds_alternative<monostate>(val);\n  bool is_err = holds_alternative<Error>(val);\n  ReplyMode min_mode = is_err ? ReplyMode::ONLY_ERR : ReplyMode::FULL;\n  if (reply_mode_ >= min_mode) {\n    DCHECK_EQ(current_.index(), 0u);\n    current_ = std::move(val);\n  } else {\n    current_ = monostate{};\n  }\n}\n\nvoid CapturingReplyBuilder::Capture(Payload val, bool collapse_if_needed) {\n  if (!stack_.empty()) {\n    auto& last = stack_.top();\n    last.first->arr.push_back(std::move(val));\n    if (last.second-- == 1 && collapse_if_needed) {\n      CollapseFilledCollections();\n    }\n  } else {\n    DCHECK_EQ(current_.index(), 0u);\n    current_ = std::move(val);\n  }\n}\n\nvoid CapturingReplyBuilder::CollapseFilledCollections() {\n  while (!stack_.empty() && stack_.top().second == 0) {\n    auto pl = std::move(stack_.top());\n    stack_.pop();\n    Capture(std::move(pl.first), false);\n  }\n}\n\nstruct CaptureVisitor {\n  void operator()(monostate) {\n  }\n\n  void operator()(long v) {\n    rb->SendLong(v);\n  }\n\n  void operator()(double v) {\n    static_cast<RedisReplyBuilder*>(rb)->SendDouble(v);\n  }\n\n  void operator()(const payload::SimpleString& ss) {\n    rb->SendSimpleString(ss);\n  }\n\n  void operator()(const payload::BulkString& bs) {\n    static_cast<RedisReplyBuilder*>(rb)->SendBulkString(bs);\n  }\n\n  void operator()(payload::Null) {\n    static_cast<RedisReplyBuilder*>(rb)->SendNull();\n  }\n\n  void operator()(const payload::Error& err) {\n    rb->SendError(err->first, err->second);\n  }\n\n  void operator()(const unique_ptr<payload::CollectionPayload>& cp) {\n    auto* builder = static_cast<RedisReplyBuilder*>(rb);\n    if (!cp) {\n      builder->SendNullArray();\n      return;\n    }\n    if (cp->len == 0 && cp->type == CollectionType::ARRAY) {\n      builder->SendEmptyArray();\n      return;\n    }\n    builder->StartCollection(cp->len, cp->type);\n    for (auto& pl : cp->arr)\n      visit(*this, std::move(pl));\n  }\n\n  SinkReplyBuilder* rb;\n};\n\nvoid CapturingReplyBuilder::Apply(Payload&& pl, SinkReplyBuilder* rb) {\n  if (auto* crb = dynamic_cast<CapturingReplyBuilder*>(rb); crb != nullptr) {\n    crb->SendDirect(std::move(pl));\n    return;\n  }\n\n  CaptureVisitor cv{rb};\n  visit(cv, std::move(pl));\n}\n\nvoid CapturingReplyBuilder::SetReplyMode(ReplyMode mode) {\n  reply_mode_ = mode;\n  current_ = monostate{};\n}\n\noptional<CapturingReplyBuilder::ErrorRef> CapturingReplyBuilder::TryExtractError(\n    const Payload& pl) {\n  if (auto* err = get_if<Error>(&pl); err != nullptr) {\n    return ErrorRef{(*err)->first, (*err)->second};\n  }\n  return nullopt;\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/reply_capture.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n#include <stack>\n#include <string_view>\n#include <variant>\n\n#include \"facade/reply_builder.h\"\n#include \"facade/reply_mode.h\"\n#include \"facade/reply_payload.h\"\n\nnamespace facade {\n\nstruct CaptureVisitor;\n\n// CapturingReplyBuilder allows capturing replies and retrieveing them with Take().\n// Those replies can be stored standalone and sent with\n// CapturingReplyBuilder::Apply() to another reply builder.\nclass CapturingReplyBuilder : public RedisReplyBuilder {\n  friend struct CaptureVisitor;\n\n public:\n  using RedisReplyBuilder::SendError;\n  void SendError(std::string_view str, std::string_view type) override;\n\n  void SendLong(long val) override;\n  void SendDouble(double val) override;\n  void SendSimpleString(std::string_view str) override;\n  void SendBulkString(std::string_view str) override;\n\n  void StartCollection(unsigned len, CollectionType type) override;\n  void SendNullArray() override;\n  void SendNull() override;\n\n  explicit CapturingReplyBuilder(ReplyMode mode = ReplyMode::FULL,\n                                 RespVersion resp_v = RespVersion::kResp2)\n      : RedisReplyBuilder{nullptr}, reply_mode_{mode} {\n    SetRespVersion(resp_v);\n  }\n\n  using Payload = payload::Payload;\n\n  // Non owned Error based on SendError arguments (msg, type)\n  using ErrorRef = std::pair<std::string_view, std::string_view>;\n\n  void SetReplyMode(ReplyMode mode);\n\n  // Take payload and clear state.\n  Payload Take();\n\n  // Send payload to builder.\n  static void Apply(Payload&& pl, SinkReplyBuilder* builder);\n\n  // If an error is stored inside payload, get a reference to it.\n  static std::optional<ErrorRef> TryExtractError(const Payload& pl);\n\n private:\n  // Send payload directly, bypassing external interface. For efficient passing between two\n  // captures.\n  void SendDirect(Payload&& val);\n\n  // Capture value and store eiter in current topmost collection or as a standalone value.\n  void Capture(Payload val, bool collapse_if_needed = true);\n\n  // While topmost collection in stack is full, finalize it and add it as a regular value.\n  void CollapseFilledCollections();\n\n  ReplyMode reply_mode_;\n\n  // List of nested active collections that are being built.\n  std::stack<std::pair<std::unique_ptr<payload::CollectionPayload>, int>> stack_;\n\n  // Root payload.\n  Payload current_;\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/reply_mode.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\nnamespace facade {\n\n// Reply mode allows filtering replies.\nenum class ReplyMode {\n  NONE,      // No replies are recorded\n  ONLY_ERR,  // Only errors are recorded\n  FULL       // All replies are recorded\n};\n\nclass RedisReplyBuilder;\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/reply_payload.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <variant>\n\n#include \"base/function2.hpp\"\n#include \"facade/facade_types.h\"\n\nnamespace facade {\n\nclass SinkReplyBuilder;\nnamespace payload {\n\n// SendError (msg, type)\nusing Error = std::unique_ptr<std::pair<std::string, std::string>>;\nusing Null = std::nullptr_t;  // SendNull or SendNullArray\n\nstruct CollectionPayload;\nstruct SimpleString : public std::string {};  // SendSimpleString\nstruct BulkString : public std::string {};    // SendBulkString\n\nusing Payload = std::variant<std::monostate, Null, Error, long, double, SimpleString, BulkString,\n                             std::unique_ptr<CollectionPayload>>;\n\n#ifdef __linux__\nstatic_assert(sizeof(Payload) == 40);\n#endif\n\nstruct CollectionPayload {\n  CollectionPayload(unsigned _len, CollectionType _type) : len{_len}, type{_type} {\n    arr.reserve(type == CollectionType::MAP ? len * 2 : len);\n  }\n\n  unsigned len;\n  CollectionType type;\n  std::vector<Payload> arr;\n};\n\ninline Error make_error(std::string_view msg, std::string_view type = \"\") {\n  return std::make_unique<std::pair<std::string, std::string>>(msg, type);\n}\n\ninline Payload make_simple_or_noreply(std::string_view resp) {\n  if (resp.empty())\n    return std::monostate{};\n  else\n    return SimpleString{std::string(resp)};\n}\n\n}  // namespace payload\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/resp_expr.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/resp_expr.h\"\n\n#include \"base/logging.h\"\n\nnamespace facade {\n\nvoid FillBackedArgs(const RespVec& src, cmn::BackedArguments* dest) {\n  auto map = [](const RespExpr& expr) { return expr.GetView(); };\n  auto range = base::it::Transform(map, base::it::Range(src.begin(), src.end()));\n\n  dest->Assign(range.begin(), range.end(), src.size());\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/resp_expr.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/strings/ascii.h>\n#include <absl/types/span.h>\n\n#include <optional>\n#include <string_view>\n#include <variant>\n#include <vector>\n\n#include \"facade/facade_types.h\"\n\nnamespace facade {\n\nclass RespExpr {\n public:\n  using Buffer = absl::Span<const uint8_t>;\n\n  enum Type : uint8_t { STRING, ARRAY, INT64, DOUBLE, NIL, NIL_ARRAY, ERROR };\n\n  using Vec = std::vector<RespExpr>;\n  Type type;\n  bool has_support;  // whether pointers in this item are supported by the external storage.\n\n  std::variant<int64_t, double, Buffer, Vec*> u;\n\n  RespExpr(Type t = NIL) : type(t), has_support(false) {\n  }\n\n  static Buffer buffer(std::string* s) {\n    return Buffer{reinterpret_cast<uint8_t*>(s->data()), s->size()};\n  }\n\n  std::string_view GetView() const {\n    Buffer buffer = GetBuf();\n    return {reinterpret_cast<const char*>(buffer.data()), buffer.size()};\n  }\n\n  std::string GetString() const {\n    return std::string(GetView());\n  }\n\n  Buffer GetBuf() const {\n    return std::get<Buffer>(u);\n  }\n\n  const Vec& GetVec() const {\n    return *std::get<Vec*>(u);\n  }\n\n  std::optional<int64_t> GetInt() const {\n    return std::holds_alternative<int64_t>(u) ? std::make_optional(std::get<int64_t>(u))\n                                              : std::nullopt;\n  }\n\n  size_t UsedMemory() const {\n    return 0;\n  }\n\n  static const char* TypeName(Type t);\n};\n\nusing RespVec = RespExpr::Vec;\nusing RespSpan = absl::Span<const RespExpr>;\n\ninline std::string_view ToSV(RespExpr::Buffer buf) {\n  return std::string_view{reinterpret_cast<const char*>(buf.data()), buf.size()};\n}\n\nvoid FillBackedArgs(const RespVec& src, cmn::BackedArguments* dest);\n\n}  // namespace facade\n\nnamespace std {\n\nostream& operator<<(ostream& os, const facade::RespExpr& e);\nostream& operator<<(ostream& os, facade::RespSpan rspan);\n\n}  // namespace std\n"
  },
  {
    "path": "src/facade/resp_expr_test_utils.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/resp_expr_test_utils.h\"\n\n#include <cstddef>\n#include <cstring>\n\nnamespace facade {\n\nRespExpr RespExprBuilder::BuildExpr(const RESPObj& obj) {\n  RespExpr expr{RespExpr::NIL};\n\n  switch (obj.GetType()) {\n    case RESPObj::Type::INTEGER: {\n      expr.type = RespExpr::INT64;\n      expr.u = obj.As<int64_t>().value();\n      break;\n    }\n    case RESPObj::Type::DOUBLE: {\n      expr.type = RespExpr::DOUBLE;\n      expr.u = obj.As<double>().value();\n      break;\n    }\n    case RESPObj::Type::NIL: {\n      expr.type = RespExpr::NIL;\n      break;\n    }\n    case RESPObj::Type::ERROR: {\n      expr.type = RespExpr::ERROR;\n      SetStringPayload(obj, &expr);\n      break;\n    }\n    case RESPObj::Type::STRING:\n    case RESPObj::Type::REPLY_STATUS: {\n      expr.type = RespExpr::STRING;\n      SetStringPayload(obj, &expr);\n      break;\n    }\n    case RESPObj::Type::ARRAY:\n    case RESPObj::Type::MAP:\n    case RESPObj::Type::SET: {\n      auto arr = obj.As<RESPArray>();\n      if (arr.has_value()) {\n        // Check if this is a null array (elements == SIZE_MAX which represents -1)\n        if (arr->Size() == SIZE_MAX) {\n          expr.type = RespExpr::NIL_ARRAY;\n          expr.u.emplace<RespExpr::Vec*>(nullptr);\n        } else {\n          expr.type = RespExpr::ARRAY;\n          auto vec = std::make_unique<RespExpr::Vec>();\n          vec->reserve(arr->Size());\n          for (size_t i = 0; i < arr->Size(); ++i) {\n            vec->push_back(BuildExpr((*arr)[i]));\n          }\n          expr.u = vec.get();\n          owned_arrays_.emplace_back(std::move(vec));\n          expr.has_support = true;\n        }\n      }\n      break;\n    }\n  }\n\n  return expr;\n}\n\nvoid RespExprBuilder::SetStringPayload(const RESPObj& obj, RespExpr* expr) {\n  auto sv = obj.As<std::string_view>().value_or(std::string_view{});\n  // Copy the string data so we don't hold references into zmalloc-allocated\n  // hiredis replies. The replies can then be freed on their allocating thread.\n  auto owned = std::make_unique<char[]>(sv.size());\n  memcpy(owned.get(), sv.data(), sv.size());\n  expr->u = RespExpr::Buffer{reinterpret_cast<const uint8_t*>(owned.get()), sv.size()};\n  expr->has_support = true;\n  owned_strings_.emplace_back(std::move(owned));\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/resp_expr_test_utils.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n#include <optional>\n#include <vector>\n\n#include \"facade/resp_expr.h\"\n#include \"facade/resp_parser.h\"\n\nnamespace facade {\n\nclass RespExprBuilder {\n public:\n  RespExpr BuildExpr(const RESPObj& obj);\n\n  void Clear() {\n    owned_arrays_.clear();\n    // Note: owned_strings_ is NOT cleared here because test code may still hold\n    // string_view/Buffer references to data from prior ParseResponse calls\n    // (e.g., SHA values, DUMP payloads). This mirrors the old behavior where\n    // tmp_str_vec_ accumulated across calls within a test.\n  }\n\n private:\n  void SetStringPayload(const RESPObj& obj, RespExpr* expr);\n\n  std::vector<std::unique_ptr<RespExpr::Vec>> owned_arrays_;\n  // Own copies of string data so we don't hold references to zmalloc-allocated\n  // hiredis replies (which must be freed on the same thread they were allocated).\n  std::vector<std::unique_ptr<char[]>> owned_strings_;\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/resp_parser.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/resp_parser.h\"\n\n#include <cstring>\n\n#include \"base/logging.h\"\n\nextern \"C\" {\n#include \"redis/hiredis.h\"\n}\n\nnamespace facade {\n\nRESPParser::RESPParser() {\n  reader_ = redisReaderCreate();\n}\n\nRESPObj::RESPObj(RESPObj&& other) noexcept\n    : reply_(other.reply_), needs_to_free_(other.needs_to_free_) {\n  other.reply_ = nullptr;\n  other.needs_to_free_ = false;\n}\n\nRESPObj& RESPObj::operator=(RESPObj&& other) noexcept {\n  std::swap(needs_to_free_, other.needs_to_free_);\n  std::swap(reply_, other.reply_);\n  return *this;\n}\n\nRESPObj::~RESPObj() {\n  if (needs_to_free_)\n    freeReplyObject(reply_);\n}\n\nRESPObj::Type RESPObj::GetType() const {\n  DCHECK(reply_);\n  return static_cast<Type>(reply_->type);\n}\n\nsize_t RESPObj::Size() const {\n  if (!reply_)\n    return 0;\n  Type type = GetType();\n  return (type == Type::ARRAY || type == Type::MAP || type == Type::SET) ? reply_->elements : 1;\n}\n\nstd::optional<RESPObj> RESPParser::Feed(const char* data, size_t len) {\n  int status = REDIS_OK;\n  if (len != 0) {  // if no new data we check is previoud data produced a reply\n    status = redisReaderFeed(reader_, data, len);\n    if (status != REDIS_OK) {\n      LOG(ERROR) << \"RESP parser error: \" << status << \" description: \" << reader_->errstr\n                 << \" data: \" << std::string_view{data, len};\n      return std::nullopt;\n    }\n  }\n  void* reply_obj = nullptr;\n  status = redisReaderGetReply(reader_, &reply_obj);\n  if (status != REDIS_OK) {\n    LOG(ERROR) << \"RESP parser error: \" << status << \" description: \" << reader_->errstr\n               << \" data: \" << data;\n    return std::nullopt;\n  }\n\n  return RESPObj(static_cast<redisReply*>(reply_obj), reply_obj != nullptr);\n}\n\nstd::ostream& operator<<(std::ostream& os, const RESPObj& obj) {\n  if (obj.Empty()) {\n    os << \"nullptr RESPObj\";\n    return os;\n  }\n  switch (obj.GetType()) {\n    // because we check type we don't expect As<T> to return nullopt here\n    case RESPObj::Type::INTEGER: {\n      os << *obj.As<std::int64_t>();\n      break;\n    }\n    case RESPObj::Type::DOUBLE: {\n      os << *obj.As<double>();\n      break;\n    }\n    case RESPObj::Type::ARRAY: {\n      os << *obj.As<RESPArray>();\n      break;\n    }\n    case RESPObj::Type::MAP:\n      [[fallthrough]];\n    case RESPObj::Type::SET: {\n      os << *obj.As<RESPArray>();\n      break;\n    }\n    case RESPObj::Type::STRING:\n      [[fallthrough]];\n    case RESPObj::Type::NIL:\n      [[fallthrough]];\n    case RESPObj::Type::ERROR:\n      [[fallthrough]];\n    case RESPObj::Type::REPLY_STATUS: {\n      os << *obj.As<std::string_view>();\n      break;\n    }\n    default:\n      os << \"Unknown RESPObj type: \" << static_cast<int>(obj.GetType());\n  }\n  return os;\n}\n\nstd::ostream& operator<<(std::ostream& os, const RESPArray& arr) {\n  os << \"[\";\n  for (int64_t i = 0; i < (int64_t)arr.Size() - 1; ++i) {\n    os << arr[i] << \", \";\n  }\n  os << arr[arr.Size() - 1] << \"]\";\n  return os;\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/resp_parser.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <cassert>\n#include <memory>\n#include <optional>\n#include <tuple>\n\n#include \"io/io.h\"\nextern \"C\" {\n#include \"redis/hiredis.h\"\n}\n\nnamespace facade {\n\nclass RESPArray;\nclass RESPIterator;\n\nclass RESPObj {\n public:\n  enum class Type {\n    STRING = REDIS_REPLY_STRING,\n    ARRAY = REDIS_REPLY_ARRAY,\n    INTEGER = REDIS_REPLY_INTEGER,\n    NIL = REDIS_REPLY_NIL,\n    REPLY_STATUS = REDIS_REPLY_STATUS,\n    DOUBLE = REDIS_REPLY_DOUBLE,\n    ERROR = REDIS_REPLY_ERROR,\n    MAP = REDIS_REPLY_MAP,\n    SET = REDIS_REPLY_SET,\n  };\n  RESPObj() = default;\n  RESPObj(redisReply* reply, bool needs_to_free) : reply_(reply), needs_to_free_(needs_to_free) {\n  }\n\n  // TODO remove copy ctor, because it is not a deep copy\n  RESPObj(const RESPObj& other) : reply_(other.reply_), needs_to_free_(false) {\n  }\n  RESPObj& operator=(const RESPObj& other) = delete;\n\n  RESPObj(RESPObj&& other) noexcept;\n  RESPObj& operator=(RESPObj&& other) noexcept;\n\n  ~RESPObj();\n\n  bool Empty() const {\n    return reply_ == nullptr;\n  }\n\n  size_t Size() const;\n\n  Type GetType() const;\n\n  template <class T> std::optional<T> As() const;\n\n private:\n  redisReply* reply_ = nullptr;\n  bool needs_to_free_ = true;\n};\n\nclass RESPArray {\n public:\n  RESPArray(redisReply* arr_obj = nullptr) : arr_obj_(arr_obj) {\n  }\n\n  size_t Size() const {\n    return arr_obj_->elements;\n  }\n\n  bool Empty() const {\n    return Size() == 0;\n  }\n\n  RESPObj operator[](size_t index) const {\n    return RESPObj(arr_obj_->element[index], false);\n  }\n\n private:\n  redisReply* arr_obj_ = nullptr;\n};\n\nclass RESPParser {\n public:\n  RESPParser();\n  ~RESPParser() {\n    redisReaderFree(reader_);\n  }\n\n  std::optional<RESPObj> Feed(const char* data, size_t len);\n\n  size_t BufferPos() const {\n    return reader_->pos;\n  }\n\n private:\n  redisReader* reader_;\n};\n\nstd::ostream& operator<<(std::ostream& os, const RESPObj& obj);\nstd::ostream& operator<<(std::ostream& os, const RESPArray& arr);\n\nclass RESPIterator {\n public:\n  RESPIterator() = default;\n  RESPIterator(const RESPObj& obj) : obj_(obj) {\n  }\n\n  RESPIterator(RESPIterator&&) = default;\n  RESPIterator& operator=(RESPIterator&&) = default;\n\n  bool HasNext() const {\n    return index_ < obj_.Size();\n  }\n\n  bool HasError() const {\n    return index_ == std::numeric_limits<decltype(index_)>::max();\n  }\n\n  // Consume next values and return as tuple or single value\n  // if extraction fails, set error state\n  template <class T = std::string_view, class... Ts> auto Next() {\n    std::conditional_t<sizeof...(Ts) == 0, T, std::tuple<T, Ts...>> res{};\n    bool success = true;\n    if constexpr (sizeof...(Ts) == 0) {\n      success = Check(&res);\n    } else {\n      success = std::apply([this](auto&... args) { return Check<T, Ts...>(&args...); }, res);\n    }\n    SetError(!success);\n    return res;\n  }\n\n  // increase index only if all args are successfully extracted\n  template <class Arg, class... Args> bool Check(Arg* arg, Args*... args) {\n    auto tmp_index = index_;\n    if (index_ + sizeof...(Args) < obj_.Size()) {\n      if (auto arr = obj_.As<RESPArray>(); arr.has_value()) {\n        if (GetEntry(*arr, index_++, arg) && (GetEntry(*arr, index_++, args) && ...)) {\n          return true;\n        }\n      } else if (auto val = obj_.As<Arg>(); val.has_value()) {\n        assert(sizeof...(Args) == 0 && index_ == 0);\n        *arg = std::move(*val);\n        return true;\n      }\n    }\n    index_ = tmp_index;\n    return false;\n  }\n\n  void SetError(bool set = true) {\n    if (set)\n      index_ = std::numeric_limits<decltype(index_)>::max();\n  }\n\n private:\n  template <class Arg> bool GetEntry(const RESPArray& arr, size_t idx, Arg* arg) {\n    if (auto val = arr[idx].As<Arg>(); val.has_value()) {\n      *arg = std::move(*val);\n\n      return true;\n    }\n    return false;\n  }\n\n private:\n  RESPObj obj_;\n  size_t index_ = 0;\n};\n\ntemplate <class T> std::optional<T> RESPObj::As() const {\n  if (!reply_) {\n    return std::nullopt;\n  }\n  if constexpr (std::is_constructible_v<T, std::string_view>) {\n    if (reply_->type == REDIS_REPLY_STRING || reply_->type == REDIS_REPLY_ERROR ||\n        reply_->type == REDIS_REPLY_STATUS) {\n      return T{std::string_view{reply_->str, reply_->len}};\n    } else if (reply_->type == REDIS_REPLY_NIL) {\n      return T{std::string_view(\"NIL\")};\n    }\n  } else if constexpr (std::is_integral_v<T>) {\n    if (reply_->type == REDIS_REPLY_INTEGER) {\n      return static_cast<T>(reply_->integer);\n    }\n  } else if constexpr (std::is_floating_point_v<T>) {\n    if (reply_->type == REDIS_REPLY_DOUBLE) {\n      return static_cast<T>(reply_->dval);\n    }\n  } else if constexpr (std::is_same_v<T, RESPArray>) {\n    // MAP and SET use the same elements/element structure as ARRAY in hiredis\n    if (reply_->type == REDIS_REPLY_ARRAY || reply_->type == REDIS_REPLY_MAP ||\n        reply_->type == REDIS_REPLY_SET) {\n      return RESPArray(reply_);\n    }\n  } else if constexpr (std::is_same_v<T, RESPObj>) {\n    return RESPObj(reply_, false);\n  } else if constexpr (std::is_same_v<T, RESPIterator>) {\n    return RESPIterator(RESPObj(reply_, false));\n  }\n\n  // TODO add other types and errors processing\n  return std::nullopt;\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/resp_parser_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/resp_parser.h\"\n\n#include <mimalloc.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nusing namespace testing;\nusing namespace std;\nnamespace facade {\n\nclass RESPParserTest : public testing::Test {\n protected:\n  static void SetUpTestSuite() {\n    init_zmalloc_threadlocal(mi_heap_get_backing());\n  }\n};\n\nTEST_F(RESPParserTest, BaseRespTypesTest) {\n  using Fields = std::map<std::string, std::string>;\n  using Docs = std::map<std::string, Fields>;\n\n  std::string msg1 =\n      \"*17\\r\\n:8\\r\\n$2\\r\\ns0\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"0\\r\\n$2\\r\\ns3\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"3\\r\\n$2\\r\\ns7\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"7\\r\\n$2\\r\\ns8\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"8\\r\\n$2\\r\\ns4\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"4\\r\\n$2\\r\\ns9\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest 9\\r\\n\";\n\n  std::string msg2 =\n      \"$2\\r\\ns1\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"1\\r\\n$2\\r\\ns5\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest 5\\r\\n\";\n\n  RESPParser reader;\n  auto reply = reader.Feed(msg1.c_str(), msg1.size());\n  ASSERT_TRUE(reply->Empty());\n\n  reply = reader.Feed(msg2.c_str(), msg2.size());\n  ASSERT_FALSE(reply->Empty());\n\n  EXPECT_EQ(reply->GetType(), RESPObj::Type::ARRAY);\n  auto array = *reply->As<RESPArray>();\n  EXPECT_GE(array.Size(), 1);\n  EXPECT_EQ(array[0].GetType(), RESPObj::Type::INTEGER);\n\n  Docs search_results;\n  for (size_t i = 1; i < array.Size(); i += 2) {\n    auto& fields = search_results[*array[i].As<std::string>()];\n\n    auto field_array = *array[i + 1].As<RESPArray>();\n\n    for (size_t j = 0; j < field_array.Size(); j += 2) {\n      std::string field_name = *field_array[j].As<std::string>();\n      std::string field_value = *field_array[j + 1].As<std::string>();\n\n      fields[field_name] = field_value;\n    }\n  }\n\n  EXPECT_EQ(search_results.size(), 8);\n\n  EXPECT_EQ(search_results[\"s0\"][\"title\"], \"test 0\");\n  EXPECT_EQ(search_results[\"s1\"][\"title\"], \"test 1\");\n  EXPECT_EQ(search_results[\"s3\"][\"title\"], \"test 3\");\n  EXPECT_EQ(search_results[\"s4\"][\"title\"], \"test 4\");\n  EXPECT_EQ(search_results[\"s5\"][\"title\"], \"test 5\");\n  EXPECT_EQ(search_results[\"s7\"][\"title\"], \"test 7\");\n  EXPECT_EQ(search_results[\"s8\"][\"title\"], \"test 8\");\n  EXPECT_EQ(search_results[\"s9\"][\"title\"], \"test 9\");\n}\n\nTEST_F(RESPParserTest, RESPIteratorTest) {\n  using Fields = std::map<std::string, std::string>;\n  using Docs = std::map<std::string, Fields>;\n\n  std::string msg1 =\n      \"*17\\r\\n:8\\r\\n$2\\r\\ns0\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"0\\r\\n$2\\r\\ns3\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"3\\r\\n$2\\r\\ns7\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"7\\r\\n$2\\r\\ns8\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"8\\r\\n$2\\r\\ns4\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"4\\r\\n$2\\r\\ns9\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest 9\\r\\n\";\n\n  std::string msg2 =\n      \"$2\\r\\ns1\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"1\\r\\n$2\\r\\ns5\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest 5\\r\\n\";\n\n  RESPParser reader;\n  auto reply = reader.Feed(msg1.c_str(), msg1.size());\n  ASSERT_TRUE(reply->Empty());\n\n  reply = reader.Feed(msg2.c_str(), msg2.size());\n  ASSERT_FALSE(reply->Empty());\n\n  RESPIterator it(*reply);\n  EXPECT_EQ(it.Next<size_t>(), 8);\n\n  Docs search_results;\n  while (it.HasNext()) {\n    auto [doc_id, field_it] = it.Next<std::string, RESPIterator>();\n    auto& fields = search_results[std::move(doc_id)];\n\n    while (field_it.HasNext()) {\n      auto [field_name, field_value] = field_it.Next<std::string_view, std::string_view>();\n      fields.emplace(field_name, field_value);\n    }\n  }\n\n  EXPECT_EQ(search_results.size(), 8);\n\n  EXPECT_EQ(search_results[\"s0\"][\"title\"], \"test 0\");\n  EXPECT_EQ(search_results[\"s1\"][\"title\"], \"test 1\");\n  EXPECT_EQ(search_results[\"s3\"][\"title\"], \"test 3\");\n  EXPECT_EQ(search_results[\"s4\"][\"title\"], \"test 4\");\n  EXPECT_EQ(search_results[\"s5\"][\"title\"], \"test 5\");\n  EXPECT_EQ(search_results[\"s7\"][\"title\"], \"test 7\");\n  EXPECT_EQ(search_results[\"s8\"][\"title\"], \"test 8\");\n  EXPECT_EQ(search_results[\"s9\"][\"title\"], \"test 9\");\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/resp_srv_parser.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"facade/resp_srv_parser.h\"\n\n#include <absl/strings/escaping.h>\n#include <absl/strings/numbers.h>\n\n#include \"base/logging.h\"\n#include \"common/backed_args.h\"\n#include \"common/heap_size.h\"\n\nnamespace facade {\n\nusing namespace std;\n\nauto RespSrvParser::Parse(Buffer str, uint32_t* consumed, cmn::BackedArguments* args) -> Result {\n  DCHECK(!str.empty());\n  *consumed = 0;\n  DVLOG(2) << \"Parsing: \"\n           << absl::CHexEscape(string_view{reinterpret_cast<const char*>(str.data()), str.size()});\n\n  if (state_ == CMD_COMPLETE_S) {\n    args->clear();\n    buf_stash_.clear();\n\n    if (str[0] == '*') {\n      // We recognized a non-INLINE state, starting with '*'\n      str.remove_prefix(1);\n      *consumed += 1;\n      state_ = ARRAY_LEN_S;\n      if (str.empty())\n        return INPUT_PENDING;\n    } else {  // INLINE mode, aka PING\\n\n      state_ = INLINE_S;\n    }\n  }\n\n  ResultConsumed resultc{OK, 0};\n  do {\n    switch (state_) {\n      case ARRAY_LEN_S:\n        resultc = ConsumeArrayLen(str, args);\n        break;\n      case PARSE_ARG_TYPE:\n        if (str[0] != '$')  // server side only supports bulk strings.\n          return BAD_BULKLEN;\n        resultc.second = 1;\n        state_ = PARSE_ARG_S;\n        break;\n      case PARSE_ARG_S:\n        resultc = ParseArg(str, args);\n        break;\n      case INLINE_S:\n        resultc = ParseInline(str, args);\n        break;\n      case BULK_STR_S:\n        resultc = ConsumeBulk(str, args);\n        break;\n      case SLASH_N_S:\n        if (str[0] != '\\n') {\n          resultc.first = BAD_STRING;\n        } else {\n          resultc = {OK, 1};\n          HandleFinishArg();\n        }\n        break;\n      default:\n        LOG(FATAL) << \"Unexpected state \" << int(state_);\n    }\n\n    *consumed += resultc.second;\n    str.remove_prefix(exchange(resultc.second, 0));\n  } while (state_ != CMD_COMPLETE_S && resultc.first == OK && !str.empty());\n\n  if (state_ != CMD_COMPLETE_S) {\n    if (resultc.first == OK) {\n      resultc.first = INPUT_PENDING;\n    }\n\n    if (resultc.first == INPUT_PENDING) {\n      if (!str.empty()) {\n        LOG(DFATAL) << \"Did not consume all input: \"\n                    << absl::CHexEscape({reinterpret_cast<const char*>(str.data()), str.size()})\n                    << \", state: \" << int(state_) << \" smallbuf: \"\n                    << absl::CHexEscape(\n                           {reinterpret_cast<const char*>(small_buf_.data()), small_len_});\n      }\n    }\n    return resultc.first;\n  }\n\n  return resultc.first;\n}\n\nauto RespSrvParser::ParseInline(Buffer str, cmn::BackedArguments* args) -> ResultConsumed {\n  DCHECK(!str.empty());\n\n  const uint8_t* ptr = str.begin();\n  const uint8_t* end = str.end();\n  const uint8_t* token_start = ptr;\n\n  auto find_token_end = [](const uint8_t* ptr, const uint8_t* end) {\n    while (ptr != end && *ptr > 32)\n      ++ptr;\n    return ptr;\n  };\n\n  if (!buf_stash_.empty()) {\n    ptr = find_token_end(ptr, end);\n    size_t len = ptr - token_start;\n\n    buf_stash_.append(reinterpret_cast<const char*>(token_start), len);\n    if (ptr == end) {\n      return {INPUT_PENDING, ptr - token_start};\n    }\n\n    args->PushArg(buf_stash_);\n    buf_stash_.clear();\n  }\n\n  while (ptr != end) {\n    // For inline input we only require \\n.\n    if (*ptr == '\\n') {\n      if (args->empty()) {\n        ++ptr;\n        continue;  // skip empty line\n      }\n      break;\n    }\n\n    if (*ptr <= 32) {  // skip ws/control chars\n      ++ptr;\n      continue;\n    }\n\n    // token start\n    DCHECK(buf_stash_.empty());\n\n    token_start = ptr;\n    ptr = find_token_end(ptr, end);\n    if (ptr != end) {\n      args->PushArg(\n          string_view{reinterpret_cast<const char*>(token_start), size_t(ptr - token_start)});\n    }\n  }\n\n  uint32_t last_consumed = ptr - str.data();\n  if (ptr == end) {                       // we have not finished parsing.\n    bool is_broken_token = ptr[-1] > 32;  // we stopped in the middle of the token.\n    if (is_broken_token) {\n      DCHECK(buf_stash_.empty());\n      buf_stash_.append(reinterpret_cast<const char*>(token_start), size_t(ptr - token_start));\n    } else if (args->empty()) {\n      state_ = CMD_COMPLETE_S;  // have not found anything besides whitespace.\n    }\n    return {INPUT_PENDING, last_consumed};\n  }\n\n  DCHECK_EQ('\\n', *ptr);\n\n  ++last_consumed;  // consume \\n as well.\n  state_ = CMD_COMPLETE_S;\n\n  return {OK, last_consumed};\n}\n\n// Parse lines like:'$5\\r\\n' or '*2\\r\\n'. The first character is already consumed by the caller.\nauto RespSrvParser::ParseLen(Buffer str, int64_t* res) -> ResultConsumed {\n  DCHECK(!str.empty());\n\n  const char* s = reinterpret_cast<const char*>(str.data());\n  const char* pos = reinterpret_cast<const char*>(memchr(s, '\\n', str.size()));\n  if (!pos) {\n    if (str.size() + small_len_ < small_buf_.size()) {\n      memcpy(&small_buf_[small_len_], str.data(), str.size());\n      small_len_ += str.size();\n      return {INPUT_PENDING, str.size()};\n    }\n    LOG(WARNING) << \"Unexpected format \" << string_view{s, str.size()};\n    return ResultConsumed{BAD_ARRAYLEN, 0};\n  }\n\n  unsigned consumed = pos - s + 1;\n  if (small_len_ > 0) {\n    if (small_len_ + consumed >= small_buf_.size()) {\n      return ResultConsumed{BAD_ARRAYLEN, consumed};\n    }\n    memcpy(&small_buf_[small_len_], str.data(), consumed);\n    small_len_ += consumed;\n    s = small_buf_.data();\n    pos = s + small_len_ - 1;\n    small_len_ = 0;\n  }\n\n  if (pos[-1] != '\\r') {\n    return {BAD_ARRAYLEN, consumed};\n  }\n\n  // Skip 2 last characters (\\r\\n).\n  string_view len_token{s, size_t(pos - 1 - s)};\n  bool success = absl::SimpleAtoi(len_token, res);\n\n  if (success && *res >= -1) {\n    return ResultConsumed{OK, consumed};\n  }\n\n  LOG(ERROR) << \"Failed to parse len \" << absl::CHexEscape(len_token) << \" \"\n             << absl::CHexEscape(string_view{reinterpret_cast<const char*>(str.data()), str.size()})\n             << \" \" << consumed << \" \" << int(s == small_buf_.data());\n  return ResultConsumed{BAD_ARRAYLEN, consumed};\n}\n\nauto RespSrvParser::ConsumeArrayLen(Buffer str, cmn::BackedArguments* args) -> ResultConsumed {\n  int64_t len;\n\n  ResultConsumed res = ParseLen(str, &len);\n  if (res.first != OK) {\n    return res;\n  }\n\n  if (len <= 0) {\n    return {BAD_ARRAYLEN, res.second};\n  }\n\n  if (len > max_arr_len_) {\n    LOG(WARNING) << \"Multibulk len is too large \" << len;\n\n    return {BAD_ARRAYLEN, res.second};\n  }\n\n  state_ = PARSE_ARG_TYPE;\n  arg_len_ = len;\n  args->Reserve(len, 0);\n  return {OK, res.second};\n}\n\nauto RespSrvParser::ParseArg(Buffer str, cmn::BackedArguments* args) -> ResultConsumed {\n  DCHECK(!str.empty());\n\n  int64_t len;\n\n  ResultConsumed res = ParseLen(str, &len);\n  if (res.first != OK) {\n    return res;\n  }\n\n  if (len > 0 && static_cast<uint64_t>(len) > max_bulk_len_) {\n    LOG_EVERY_T(WARNING, 1) << \"Threshold reached with bulk len: \" << len\n                            << \", consider increasing max_bulk_len\";\n    return {BAD_BULKLEN, res.second};\n  }\n\n  if (len < 0) {\n    return {BAD_BULKLEN, res.second};\n  }\n\n  bulk_len_ = len;\n  state_ = BULK_STR_S;\n  args->PushArg(size_t(len));\n\n  return {OK, res.second};\n}\n\nauto RespSrvParser::ConsumeBulk(Buffer str, cmn::BackedArguments* args) -> ResultConsumed {\n  DCHECK_EQ(small_len_, 0);\n  uint32_t consumed = 0;\n\n  if (str.size() >= bulk_len_) {\n    consumed = bulk_len_;\n    if (bulk_len_) {\n      char* last_arg = args->data(args->size() - 1);  // Get pointer to last argument.\n      DCHECK_GE(args->elem_len(args->size() - 1), bulk_len_);\n      char* start = last_arg + (args->elem_len(args->size() - 1) - bulk_len_);\n      memcpy(start, str.data(), bulk_len_);\n      str.remove_prefix(exchange(bulk_len_, 0));\n    }\n\n    if (str.size() >= 2) {\n      if (str[0] != '\\r' || str[1] != '\\n') {\n        return {BAD_STRING, consumed};\n      }\n      HandleFinishArg();\n      return {OK, consumed + 2};\n    }\n\n    if (str.size() == 1) {\n      if (str[0] != '\\r') {\n        return {BAD_STRING, consumed};\n      }\n      state_ = SLASH_N_S;\n      consumed++;\n    }\n    return {INPUT_PENDING, consumed};\n  }\n\n  DCHECK(bulk_len_);\n  DCHECK_GE(args->elem_len(args->size() - 1), bulk_len_);\n  size_t len = std::min<size_t>(str.size(), bulk_len_);\n  char* last_arg = args->data(args->size() - 1);  // Get pointer to last argument.\n  char* start = last_arg + (args->elem_len(args->size() - 1) - bulk_len_);\n  memcpy(start, str.data(), len);\n  consumed = len;\n  bulk_len_ -= len;\n\n  return {INPUT_PENDING, consumed};\n}\n\nvoid RespSrvParser::HandleFinishArg() {\n  state_ = (--arg_len_ == 0) ? CMD_COMPLETE_S : PARSE_ARG_TYPE;\n\n  small_len_ = 0;\n}\n\nsize_t RespSrvParser::UsedMemory() const {\n  return cmn::HeapSize(buf_stash_);\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/resp_srv_parser.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <absl/types/span.h>\n\n#include <memory>\n#include <utility>\n#include <vector>\n\n#include \"common/backed_args.h\"\n\nnamespace facade {\n\n/**\n * @brief RESP server-side parser.\n */\nclass RespSrvParser {\n public:\n  enum Result : uint8_t {\n    OK,\n    INPUT_PENDING,\n    BAD_ARRAYLEN,\n    BAD_BULKLEN,\n    BAD_STRING,\n  };\n  using Buffer = absl::Span<const uint8_t>;\n  explicit RespSrvParser(uint32_t max_arr_len = UINT32_MAX, uint32_t max_bulk_len = UINT32_MAX)\n      : max_arr_len_(max_arr_len), max_bulk_len_(max_bulk_len) {\n  }\n\n  /**\n   * @brief Parses str into res. \"consumed\" stores number of bytes consumed from str.\n   *\n   * A caller should not invalidate str if the parser returns RESP_OK as long as he continues\n   * accessing res. However, if parser returns INPUT_PENDING a caller may discard consumed\n   * part of str because parser caches the intermediate state internally according to 'consumed'\n   * result.\n   */\n\n  Result Parse(Buffer str, uint32_t* consumed, cmn::BackedArguments* dest);\n\n  size_t parselen_hint() const {\n    return bulk_len_;\n  }\n\n  size_t UsedMemory() const;\n\n private:\n  using ResultConsumed = std::pair<Result, uint32_t>;\n\n  // Skips the first character (*).\n  ResultConsumed ConsumeArrayLen(Buffer str, cmn::BackedArguments* args);\n  ResultConsumed ParseArg(Buffer str, cmn::BackedArguments* args);\n  ResultConsumed ConsumeBulk(Buffer str, cmn::BackedArguments* args);\n  ResultConsumed ParseInline(Buffer str, cmn::BackedArguments* args);\n  ResultConsumed ParseLen(Buffer str, int64_t* res);\n\n  void HandleFinishArg();\n\n  enum State : uint8_t {\n    INLINE_S,\n    ARRAY_LEN_S,\n    PARSE_ARG_TYPE,\n    PARSE_ARG_S,  // Parse string\\r\\n\n    BULK_STR_S,\n    SLASH_N_S,\n    CMD_COMPLETE_S,\n  };\n\n  State state_ = CMD_COMPLETE_S;\n  uint8_t small_len_ = 0;\n\n  uint32_t bulk_len_ = 0, arg_len_ = 0;\n  uint32_t max_arr_len_;\n  uint32_t max_bulk_len_;\n\n  std::string buf_stash_;\n  std::array<char, 32> small_buf_;\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/resp_srv_parser_test.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/resp_srv_parser.h\"\n\n#include <absl/strings/str_cat.h>\n#include <gmock/gmock.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nusing namespace testing;\nusing namespace std;\nnamespace facade {\n\n// Custom printer for RespSrvParser::Result to make test output more readable\nvoid PrintTo(const RespSrvParser::Result& result, std::ostream* os) {\n  switch (result) {\n    case RespSrvParser::OK:\n      *os << \"OK\";\n      break;\n    case RespSrvParser::INPUT_PENDING:\n      *os << \"INPUT_PENDING\";\n      break;\n    case RespSrvParser::BAD_ARRAYLEN:\n      *os << \"BAD_ARRAYLEN\";\n      break;\n    case RespSrvParser::BAD_BULKLEN:\n      *os << \"BAD_BULKLEN\";\n      break;\n    case RespSrvParser::BAD_STRING:\n      *os << \"BAD_STRING\";\n      break;\n    default:\n      *os << \"UNKNOWN(\" << static_cast<int>(result) << \")\";\n      break;\n  }\n}\n\nclass RespSrvParserTest : public testing::Test {\n protected:\n  RespSrvParser::Result Parse(std::string_view str);\n\n  RespSrvParser parser_;\n  cmn::BackedArguments args_;\n  uint32_t consumed_;\n};\n\nRespSrvParser::Result RespSrvParserTest::Parse(std::string_view str) {\n  RespSrvParser::Buffer buf{reinterpret_cast<const uint8_t*>(str.data()), str.size()};\n  return parser_.Parse(buf, &consumed_, &args_);\n}\n\nTEST_F(RespSrvParserTest, Inline) {\n  const char kCmd1[] = \"KEY   VAL\\r\\n\";\n\n  ASSERT_EQ(RespSrvParser::OK, Parse(kCmd1));\n  EXPECT_EQ(strlen(kCmd1), consumed_);\n  EXPECT_THAT(args_, ElementsAre(\"KEY\", \"VAL\"));\n\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"KEY\"));\n  EXPECT_EQ(3, consumed_);\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\" FOO \"));\n  EXPECT_EQ(5, consumed_);\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\" BAR\"));\n  EXPECT_EQ(4, consumed_);\n  ASSERT_EQ(RespSrvParser::OK, Parse(\" \\r\\n \"));\n  EXPECT_EQ(3, consumed_);\n  EXPECT_THAT(args_, ElementsAre(\"KEY\", \"FOO\", \"BAR\"));\n\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\" 1 2\"));\n  EXPECT_EQ(4, consumed_);\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\" 45\"));\n  EXPECT_EQ(3, consumed_);\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"\\r\\n\"));\n  EXPECT_EQ(2, consumed_);\n  EXPECT_THAT(args_, ElementsAre(\"1\", \"2\", \"45\"));\n\n  // Empty queries return INPUT_PENDING.\n  EXPECT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"\\r\\n\"));\n  EXPECT_EQ(2, consumed_);\n\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"_\\r\\n\"));\n  EXPECT_THAT(args_, ElementsAre(\"_\"));\n}\n\nTEST_F(RespSrvParserTest, Multi1) {\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"*1\\r\\n\"));\n  EXPECT_EQ(4, consumed_);\n  EXPECT_EQ(0, parser_.parselen_hint());\n\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"$4\\r\\n\"));\n  EXPECT_EQ(4, consumed_);\n  EXPECT_EQ(4, parser_.parselen_hint());\n\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"PING\\r\\n\"));\n  EXPECT_EQ(6, consumed_);\n  EXPECT_EQ(0, parser_.parselen_hint());\n  EXPECT_THAT(args_, ElementsAre(\"PING\"));\n}\n\nTEST_F(RespSrvParserTest, Multi2) {\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"*1\\r\\n$\"));\n  EXPECT_EQ(5, consumed_);\n\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"4\\r\\nMSET\"));\n  EXPECT_EQ(7, consumed_);\n\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"\\r\\n*2\\r\\n\"));\n  EXPECT_EQ(2, consumed_);\n\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"*2\\r\\n$3\\r\\nKEY\\r\\n$3\\r\\nVAL\"));\n  EXPECT_EQ(20, consumed_);\n\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"\\r\\n\"));\n  EXPECT_EQ(2, consumed_);\n  EXPECT_THAT(args_, ElementsAre(\"KEY\", \"VAL\"));\n}\n\nTEST_F(RespSrvParserTest, Multi3) {\n  const char kFirst[] = \"*3\\r\\n$3\\r\\nSET\\r\\n$16\\r\\nkey:\";\n  const char kSecond[] = \"000002273458\\r\\n$3\\r\\nVXK\";\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(kFirst));\n  ASSERT_EQ(strlen(kFirst), consumed_);\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(kSecond));\n  ASSERT_EQ(strlen(kSecond), consumed_);\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"\\r\\n*3\\r\\n$3\\r\\nSET\"));\n  ASSERT_EQ(2, consumed_);\n  EXPECT_THAT(args_, ElementsAre(\"SET\", \"key:000002273458\", \"VXK\"));\n}\n\nTEST_F(RespSrvParserTest, InvalidMult1) {\n  ASSERT_EQ(RespSrvParser::BAD_BULKLEN, Parse(\"*2\\r\\n$3\\r\\nFOO\\r\\nBAR\\r\\n\"));\n}\n\nTEST_F(RespSrvParserTest, Empty) {\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"*2\\r\\n$0\\r\\n\\r\\n$0\\r\\n\\r\\n\"));\n}\n\nTEST_F(RespSrvParserTest, LargeBulk) {\n  string_view prefix(\"*1\\r\\n$1024\\r\\n\");\n\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(prefix));\n  ASSERT_EQ(prefix.size(), consumed_);\n  ASSERT_GE(parser_.parselen_hint(), 1024);\n\n  string half(512, 'a');\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(half));\n  ASSERT_EQ(512, consumed_);\n  ASSERT_GE(parser_.parselen_hint(), 512);\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(half));\n  ASSERT_EQ(512, consumed_);\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"\\r\"));\n  ASSERT_EQ(1, consumed_);\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"\\n\"));\n  EXPECT_EQ(1, consumed_);\n\n  string part1 = absl::StrCat(prefix, half);\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(part1));\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(half));\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"\\r\\n\"));\n\n  prefix = \"*1\\r\\n$27000000\\r\\n\";\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(prefix));\n  ASSERT_EQ(prefix.size(), consumed_);\n  string chunk(1000000, 'a');\n  for (unsigned i = 0; i < 27; ++i) {\n    ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(chunk));\n    ASSERT_EQ(chunk.size(), consumed_);\n  }\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"\\r\\n\"));\n  ASSERT_EQ(args_.size(), 1);\n  EXPECT_EQ(27000000u, args_[0].size());\n}\n\nTEST_F(RespSrvParserTest, Eol) {\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"*1\\r\"));\n  EXPECT_EQ(3, consumed_);\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"\\n$5\\r\\n\"));\n  EXPECT_EQ(5, consumed_);\n}\n\nTEST_F(RespSrvParserTest, BulkSplit) {\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"*1\\r\\n$4\\r\\nSADD\\r\"));\n  ASSERT_EQ(13, consumed_);\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"\\n\"));\n}\n\nTEST_F(RespSrvParserTest, InlineSplit) {\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"\\n\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"\\nPING\\n\\n\"));\n  EXPECT_EQ(6, consumed_);\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"\\n\"));\n  EXPECT_EQ(1, consumed_);\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"P\"));\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"ING\\n\"));\n}\n\nTEST_F(RespSrvParserTest, InlineReset) {\n  ASSERT_EQ(RespSrvParser::INPUT_PENDING, Parse(\"\\t \\r\\n\"));\n  EXPECT_EQ(4, consumed_);\n  ASSERT_EQ(RespSrvParser::OK, Parse(\"*1\\r\\n$3\\r\\nfoo\\r\\n\"));\n  EXPECT_EQ(13, consumed_);\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/resp_validator.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/strings/escaping.h>\n\n#include <cstdint>\n#include <fstream>\n#include <iostream>\n\n#include \"base/flags.h\"\n#include \"base/init.h\"\n#include \"facade/redis_parser.h\"\n#include \"io/io.h\"\n\nusing namespace facade;\nusing namespace std;\n\nABSL_FLAG(string, input, \"\", \"If not empty - reads data from the file instead of stdin. \");\n\n// Validates RESP3 server responses by using RespParser.\n// Server traffic can be recorded using:\n// tcpflow  -i any port 6379 -o /tmp/tcp_flow\nint main(int argc, char* argv[]) {\n  MainInitGuard guard(&argc, &argv);\n\n  RedisParser parser(RedisParser::Mode::CLIENT);\n  RedisParser::Result parse_result = RedisParser::OK;\n  char buf[1024];\n  istream* input_stream = &cin;\n  if (!absl::GetFlag(FLAGS_input).empty()) {\n    input_stream = new ifstream(absl::GetFlag(FLAGS_input), ios::binary);\n    if (!input_stream->good()) {\n      cerr << \"Failed to open input file: \" << absl::GetFlag(FLAGS_input) << \"\\n\";\n      return -1;\n    }\n  }\n  size_t len = 0, offset = 0;\n  do {\n    input_stream->read(buf + len, sizeof(buf) - len);\n    size_t read = input_stream->gcount();\n    if (read == 0) {\n      if (parse_result != RedisParser::OK) {\n        cerr << \"unexpected: \" << parse_result << \"\\n\";\n      }\n      break;\n    }\n    DVLOG(1) << \"Read \" << read << \" bytes from input stream, offset: \" << offset;\n    len += read;\n\n    RespExpr::Vec args;\n    uint32_t consumed = 0;\n    char* next = buf;\n    while (len) {\n      string_view sv{next, len};\n      parse_result = parser.Parse(io::Buffer(sv), &consumed, &args);\n      if (parse_result != RedisParser::OK && parse_result != RedisParser::INPUT_PENDING) {\n        cerr << \"Parse error: \" << int(parse_result) << \" at offset \" << offset\n             << \" when parsing: \" << absl::CHexEscape({reinterpret_cast<const char*>(next), len})\n             << \"\\n\";\n        return -1;\n      }\n\n      if (consumed == 0) {  // not enough data to parse.\n        DVLOG(1) << \"No data consumed, waiting for more input.\";\n        memcpy(buf, next, len);  // move the remaining data to the start of the buffer.\n        break;\n      }\n      len -= consumed;\n      next += consumed;\n      offset += consumed;\n    }\n  } while (!input_stream->eof());\n\n  if (input_stream != &cin) {\n    delete input_stream;\n  }\n  cout << \"LGTM\\n\";\n  return 0;\n}\n"
  },
  {
    "path": "src/facade/service_interface.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"facade/service_interface.h\"\n\n#include <absl/strings/str_cat.h>\n\n#include \"facade/facade_types.h\"\n\nnamespace facade {\n\nstd::string ServiceInterface::ContextInfo::Format() const {\n  char buf[16] = {0};\n  std::string res = absl::StrCat(\"db=\", db_index);\n\n  unsigned index = 0;\n\n  if (async_dispatch)\n    buf[index++] = 'a';\n\n  if (conn_closing)\n    buf[index++] = 't';\n\n  if (subscribers)\n    buf[index++] = 'P';\n\n  if (blocked)\n    buf[index++] = 'b';\n\n  if (index)\n    absl::StrAppend(&res, \" flags=\", buf);\n  return res;\n}\n\nDispatchResult ServiceInterface::DispatchCommandSimple(ParsedCommand* cmd, AsyncPreference mode) {\n  return DispatchCommand(ParsedArgs{*cmd}, cmd, mode);\n}\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/service_interface.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <string>\n\n#include \"facade/facade_types.h\"\n#include \"facade/parsed_command.h\"\n#include \"util/fiber_socket_base.h\"\n\nnamespace util {\nclass HttpListenerBase;\n}  // namespace util\n\nnamespace facade {\n\nclass ConnectionContext;\nclass Connection;\nclass SinkReplyBuilder;\nclass MCReplyBuilder;\n\n// Controls asynchronicity of command dispatch\nenum class AsyncPreference : uint8_t {\n  ONLY_SYNC,     // Caller supports only synchronous dispatch\n  PREFER_ASYNC,  // Prefer async if available\n  ONLY_ASYNC,    // Only async execution is possible (command is dispatched in pipeline)\n};\n\nenum class DispatchResult : uint8_t {\n  OK,\n  OOM,\n  ERROR,\n  WOULD_BLOCK  // Returned if ONLY_ASYNC was set, but only synchronous execution is possible\n};\n\nstruct DispatchManyResult {\n  uint32_t processed;  // how many commands out of passed were actually processed\n\n  // whether to account the processed commands in stats. This is needed to consistently\n  // account commands that were included based on squash_stats_latency_lower_limit filter.\n  bool account_in_stats;\n};\n\nclass ServiceInterface {\n public:\n  virtual ~ServiceInterface() {\n  }\n\n  virtual DispatchResult DispatchCommand(ParsedArgs args, ParsedCommand* cmd, AsyncPreference) = 0;\n  DispatchResult DispatchCommandSimple(ParsedCommand* cmd, AsyncPreference mode);\n\n  virtual DispatchManyResult DispatchManyCommands(std::function<ParsedArgs()> arg_gen,\n                                                  unsigned count, SinkReplyBuilder* builder,\n                                                  ConnectionContext* cntx) = 0;\n\n  virtual DispatchResult DispatchMC(ParsedCommand* cmd, AsyncPreference) = 0;\n\n  virtual ConnectionContext* CreateContext(Connection* owner) = 0;\n\n  virtual ParsedCommand* AllocateParsedCommand() = 0;\n\n  virtual void ConfigureHttpHandlers(util::HttpListenerBase* base, bool is_privileged) {\n  }\n\n  virtual void OnConnectionClose(ConnectionContext* cntx) {\n  }\n\n  struct ContextInfo {\n    std::string Format() const;\n\n    unsigned db_index;\n    bool async_dispatch, conn_closing, subscribers, blocked;\n  };\n\n  virtual ContextInfo GetContextInfo(ConnectionContext* cntx) const {\n    return {};\n  }\n};\n\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/socket_utils.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"socket_utils.h\"\n\n#include <arpa/inet.h>\n#include <sys/socket.h>\n\n#ifdef __linux__\n#include <sys/stat.h>\n#include <unistd.h>\n\n#include \"absl/strings/str_cat.h\"\n#include \"io/proc_reader.h\"\n\n#endif\n\nnamespace {\n\nint get_socket_family(int fd) {\n  struct sockaddr_storage ss;\n  socklen_t len = sizeof(ss);\n\n  if (getsockname(fd, (struct sockaddr*)&ss, &len) == -1) {\n    return -1;  // Indicate an error\n  }\n\n  return ss.ss_family;\n}\n\n}  // namespace\n\nnamespace dfly {\n\n// Returns information about the TCP socket state by its descriptor\nstd::string GetSocketInfo(int socket_fd) {\n  if (socket_fd < 0)\n    return \"invalid socket\";\n\n#ifdef __linux__\n  struct stat sock_stat;\n  if (fstat(socket_fd, &sock_stat) != 0) {\n    return \"could not stat socket\";\n  }\n\n  io::Result<io::TcpInfo> tcp_info;\n  int family = get_socket_family(socket_fd);\n  if (family == AF_INET) {\n    tcp_info = io::ReadTcpInfo(sock_stat.st_ino);\n  } else if (family == AF_INET6) {\n    tcp_info = io::ReadTcp6Info(sock_stat.st_ino);\n  } else {\n    return \"unsupported socket family\";\n  }\n\n  if (!tcp_info) {\n    return \"socket not found in /proc/net/tcp or /proc/net/tcp6\";\n  }\n\n  std::string state_str = io::TcpStateToString(tcp_info->state);\n\n  if (tcp_info->is_ipv6) {\n    char local_ip[INET6_ADDRSTRLEN], remote_ip[INET6_ADDRSTRLEN];\n    inet_ntop(AF_INET6, &tcp_info->local_addr6, local_ip, sizeof(local_ip));\n    inet_ntop(AF_INET6, &tcp_info->remote_addr6, remote_ip, sizeof(remote_ip));\n    return absl::StrCat(\"State: \", state_str, \", Local: [\", local_ip, \"]:\", tcp_info->local_port,\n                        \", Remote: [\", remote_ip, \"]:\", tcp_info->remote_port,\n                        \", Inode: \", tcp_info->inode);\n  } else {\n    char local_ip[INET_ADDRSTRLEN], remote_ip[INET_ADDRSTRLEN];\n    struct in_addr addr;\n    addr.s_addr = htonl(tcp_info->local_addr);\n    inet_ntop(AF_INET, &addr, local_ip, sizeof(local_ip));\n    addr.s_addr = htonl(tcp_info->remote_addr);\n    inet_ntop(AF_INET, &addr, remote_ip, sizeof(remote_ip));\n    return absl::StrCat(\"State: \", state_str, \", Local: \", local_ip, \":\", tcp_info->local_port,\n                        \", Remote: \", remote_ip, \":\", tcp_info->remote_port,\n                        \", Inode: \", tcp_info->inode);\n  }\n#else\n  return \"socket info not available on this platform\";\n#endif\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/facade/socket_utils.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <string>\n\nnamespace dfly {\n\n// Returns information about the TCP socket state by its descriptor\nstd::string GetSocketInfo(int socket_fd);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/facade/tls_helpers.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"tls_helpers.h\"\n\n#include <openssl/err.h>\n\n#ifdef DFLY_USE_SSL\n#include <openssl/ssl.h>\n#endif\n\n#include <absl/functional/bind_front.h>\n\n#include <string>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_stats.h\"\n#include \"facade/facade_types.h\"\n\nABSL_FLAG(std::string, tls_cert_file, \"\", \"cert file for tls connections\");\nABSL_FLAG(std::string, tls_key_file, \"\", \"key file for tls connections\");\nABSL_FLAG(std::string, tls_ca_cert_file, \"\", \"ca signed certificate to validate tls connections\");\nABSL_FLAG(std::string, tls_ca_cert_dir, \"\",\n          \"ca signed certificates directory. Use c_rehash before, read description in \"\n          \"https://www.openssl.org/docs/man3.0/man1/c_rehash.html\");\nABSL_FLAG(std::string, tls_ciphers, \"DEFAULT:!MEDIUM\", \"TLS ciphers configuration for tls1.2\");\nABSL_FLAG(std::string, tls_cipher_suites, \"\", \"TLS ciphers configuration for tls1.3\");\nABSL_FLAG(bool, tls_prefer_server_ciphers, false,\n          \"If true, prefer server ciphers over client ciphers\");\nABSL_FLAG(bool, tls_session_caching, false, \"If true enables session caching and tickets\");\nABSL_FLAG(size_t, tls_session_cache_size, 20 * 1024, \"Size of the cache for tls sessions\");\nABSL_FLAG(size_t, tls_session_cache_timeout, 300, \"Timeout for each session/ticket\");\n\nnamespace facade {\n\n#ifdef DFLY_USE_SSL\n\n// Creates the TLS context. Returns nullptr if the TLS configuration is invalid.\n// To connect: openssl s_client -state -crlf -connect 127.0.0.1:6380\nSSL_CTX* CreateSslCntx(TlsContextRole role) {\n  using absl::GetFlag;\n  const auto& tls_key_file = GetFlag(FLAGS_tls_key_file);\n  if (tls_key_file.empty()) {\n    LOG(ERROR) << \"To use TLS, a server certificate must be provided with the --tls_key_file flag!\";\n    return nullptr;\n  }\n\n  SSL_CTX* ctx;\n\n  if (role == TlsContextRole::SERVER) {\n    ctx = SSL_CTX_new(TLS_server_method());\n  } else {\n    ctx = SSL_CTX_new(TLS_client_method());\n  }\n  unsigned mask = SSL_VERIFY_NONE;\n\n  if (SSL_CTX_use_PrivateKey_file(ctx, tls_key_file.c_str(), SSL_FILETYPE_PEM) != 1) {\n    LOG(ERROR) << \"Failed to load TLS key\";\n    return nullptr;\n  }\n  const auto& tls_cert_file = GetFlag(FLAGS_tls_cert_file);\n\n  if (!tls_cert_file.empty()) {\n    // TO connect with redis-cli you need both tls-key-file and tls-cert-file\n    // loaded. Use `redis-cli --tls -p 6380 --insecure  PING` to test\n    if (SSL_CTX_use_certificate_chain_file(ctx, tls_cert_file.c_str()) != 1) {\n      LOG(ERROR) << \"Failed to load TLS certificate\";\n      return nullptr;\n    }\n  }\n\n  const auto tls_ca_cert_file = GetFlag(FLAGS_tls_ca_cert_file);\n  const auto tls_ca_cert_dir = GetFlag(FLAGS_tls_ca_cert_dir);\n  if (!tls_ca_cert_file.empty() || !tls_ca_cert_dir.empty()) {\n    const auto* file = tls_ca_cert_file.empty() ? nullptr : tls_ca_cert_file.data();\n    const auto* dir = tls_ca_cert_dir.empty() ? nullptr : tls_ca_cert_dir.data();\n    if (SSL_CTX_load_verify_locations(ctx, file, dir) != 1) {\n      LOG(ERROR) << \"Failed to load TLS verify locations (CA cert file or CA cert dir)\";\n      return nullptr;\n    }\n    mask = SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT;\n  }\n\n  if (!GetFlag(FLAGS_tls_ciphers).empty()) {\n    DFLY_SSL_CHECK(1 == SSL_CTX_set_cipher_list(ctx, GetFlag(FLAGS_tls_ciphers).c_str()));\n  }\n\n  // Relevant only for TLS 1.3 connections.\n  if (!GetFlag(FLAGS_tls_cipher_suites).empty()) {\n    SSL_CTX_set_ciphersuites(ctx, GetFlag(FLAGS_tls_cipher_suites).c_str());\n  }\n\n  SSL_CTX_set_min_proto_version(ctx, TLS1_2_VERSION);\n\n  SSL_CTX_set_options(ctx, SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS);\n\n  SSL_CTX_set_verify(ctx, mask, NULL);\n\n  DFLY_SSL_CHECK(1 == SSL_CTX_set_dh_auto(ctx, 1));\n\n  if (GetFlag(FLAGS_tls_prefer_server_ciphers)) {\n    SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE);\n  }\n\n  if (GetFlag(FLAGS_tls_session_caching)) {\n    SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_SERVER);\n    SSL_CTX_sess_set_cache_size(ctx, GetFlag(FLAGS_tls_session_cache_size));\n    SSL_CTX_set_timeout(ctx, GetFlag(FLAGS_tls_session_cache_timeout));\n    SSL_CTX_set_session_id_context(ctx, (const unsigned char*)\"dragonfly\", 9);\n  }\n\n  SSL_CTX_set_info_callback(ctx, [](const SSL* ssl, int where, int ret) {\n    // When we skip the handshake we never reach this state.\n    if (where & SSL_CB_HANDSHAKE_START) {\n      ++tl_facade_stats->conn_stats.handshakes_started;\n    }\n    // When we skip the handshake, we never reach this state.\n    if (where & SSL_CB_HANDSHAKE_DONE) {\n      ++tl_facade_stats->conn_stats.handshakes_completed;\n    }\n  });\n\n  return ctx;\n}\n\nvoid PrintSSLError() {\n  ERR_print_errors_cb(\n      [](const char* str, size_t len, void* u) {\n        LOG(ERROR) << std::string_view(str, len);\n        return 1;\n      },\n      nullptr);\n}\n\n#endif\n}  // namespace facade\n"
  },
  {
    "path": "src/facade/tls_helpers.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#ifdef DFLY_USE_SSL\n#include <openssl/ssl.h>\n#endif\n\nnamespace facade {\n\n#ifdef DFLY_USE_SSL\nenum class TlsContextRole { SERVER, CLIENT };\n\nSSL_CTX* CreateSslCntx(TlsContextRole role);\n\nvoid PrintSSLError();\n\n#define DFLY_SSL_CHECK(condition)               \\\n  if (!(condition)) {                           \\\n    LOG(ERROR) << \"OpenSSL Error: \" #condition; \\\n    PrintSSLError();                            \\\n    exit(17);                                   \\\n  }\n\n#endif\n\n}  // namespace facade\n"
  },
  {
    "path": "src/huff/LICENSE",
    "content": "BSD License\n\nFor Zstandard software\n\nCopyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification,\nare permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\n * Neither the name Facebook, nor Meta, nor the names of its contributors may\n   be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\nANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR\nANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\nLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\nSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
  },
  {
    "path": "src/huff/README.md",
    "content": "The code in this folder exposes internal functions that are used by ZSTD.\nThese functions are part of https://github.com/Cyan4973/FiniteStateEntropy project.\n\nSince we already link to ZSTD, it is convenient that we get this functionality for free."
  },
  {
    "path": "src/huff/hist.h",
    "content": "/* ******************************************************************\n * hist : Histogram functions\n * part of Finite State Entropy project\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n *\n *  You can contact the author at :\n *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy\n *  - Public forum : https://groups.google.com/forum/#!forum/lz4c\n *\n * This source code is licensed under both the BSD-style license (found in the\n * LICENSE file in the root directory of this source tree) and the GPLv2 (found\n * in the COPYING file in the root directory of this source tree).\n * You may select, at your option, one of the above-listed licenses.\n****************************************************************** */\n\n/* --- dependencies --- */\n#include <stddef.h>  /* size_t */\n\n\n/* --- simple histogram functions --- */\n\n/*! HIST_count():\n *  Provides the precise count of each byte within a table 'count'.\n * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).\n *  Updates *maxSymbolValuePtr with actual largest symbol value detected.\n * @return : count of the most frequent symbol (which isn't identified).\n *           or an error code, which can be tested using HIST_isError().\n *           note : if return == srcSize, there is only one symbol.\n */\nsize_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,\n                  const void* src, size_t srcSize);\n\nunsigned HIST_isError(size_t code);  /**< tells if a return value is an error code */\n\n\n/* --- advanced histogram functions --- */\n\n#define HIST_WKSP_SIZE_U32 1024\n#define HIST_WKSP_SIZE    (HIST_WKSP_SIZE_U32 * sizeof(unsigned))\n/** HIST_count_wksp() :\n *  Same as HIST_count(), but using an externally provided scratch buffer.\n *  Benefit is this function will use very little stack space.\n * `workSpace` is a writable buffer which must be 4-bytes aligned,\n * `workSpaceSize` must be >= HIST_WKSP_SIZE\n */\nsize_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,\n                       const void* src, size_t srcSize,\n                       void* workSpace, size_t workSpaceSize);\n\n/** HIST_countFast() :\n *  same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.\n *  This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`\n */\nsize_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,\n                      const void* src, size_t srcSize);\n\n/** HIST_countFast_wksp() :\n *  Same as HIST_countFast(), but using an externally provided scratch buffer.\n * `workSpace` is a writable buffer which must be 4-bytes aligned,\n * `workSpaceSize` must be >= HIST_WKSP_SIZE\n */\nsize_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,\n                           const void* src, size_t srcSize,\n                           void* workSpace, size_t workSpaceSize);\n\n/*! HIST_count_simple() :\n *  Same as HIST_countFast(), this function is unsafe,\n *  and will segfault if any value within `src` is `> *maxSymbolValuePtr`.\n *  It is also a bit slower for large inputs.\n *  However, it does not need any additional memory (not even on stack).\n * @return : count of the most frequent symbol.\n *  Note this function doesn't produce any error (i.e. it must succeed).\n */\nunsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,\n                           const void* src, size_t srcSize);\n\n/*! HIST_add() :\n *  Lowest level: just add nb of occurrences of characters from @src into @count.\n *  @count is not reset. @count array is presumed large enough (i.e. 1 KB).\n @  This function does not need any additional stack memory.\n */\nvoid HIST_add(unsigned* count, const void* src, size_t srcSize);\n"
  },
  {
    "path": "src/huff/huf.h",
    "content": "/* ******************************************************************\n * huff0 huffman codec,\n * part of Finite State Entropy library\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n *\n * You can contact the author at :\n * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy\n *\n * This source code is licensed under both the BSD-style license (found in the\n * LICENSE file in the root directory of this source tree) and the GPLv2 (found\n * in the COPYING file in the root directory of this source tree).\n * You may select, at your option, one of the above-listed licenses.\n****************************************************************** */\n\n#ifndef HUF_H_298734234\n#define HUF_H_298734234\n\n/* *** Dependencies *** */\n#include <stddef.h>  /* size_t */\n#include \"mem.h\"          /* U32 */\n\n/* ***   Tool functions *** */\n#define HUF_BLOCKSIZE_MAX (128 * 1024)   /**< maximum input size for a single block compressed with HUF_compress */\nsize_t HUF_compressBound(size_t size);   /**< maximum compressed size (worst case) */\n\n/* Error Management */\nunsigned    HUF_isError(size_t code);       /**< tells if a return value is an error code */\nconst char* HUF_getErrorName(size_t code);  /**< provides error code string (useful for debugging) */\n\n\n#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)\n#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))\n\n/* *** Constants *** */\n#define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */\n#define HUF_TABLELOG_DEFAULT  11      /* default tableLog value when none specified */\n#define HUF_SYMBOLVALUE_MAX  255\n\n#define HUF_TABLELOG_ABSOLUTEMAX  12  /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */\n#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)\n#  error \"HUF_TABLELOG_MAX is too large !\"\n#endif\n\n\n/* ****************************************\n*  Static allocation\n******************************************/\n/* HUF buffer bounds */\n#define HUF_CTABLEBOUND 129\n#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true when incompressible is pre-filtered with fast heuristic */\n#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */\n\n/* static allocation of HUF's Compression Table */\n/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */\ntypedef size_t HUF_CElt;   /* consider it an incomplete type */\n#define HUF_CTABLE_SIZE_ST(maxSymbolValue)   ((maxSymbolValue)+2)   /* Use tables of size_t, for proper alignment */\n#define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))\n#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \\\n    HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */\n\n/* static allocation of HUF's DTable */\ntypedef U32 HUF_DTable;\n#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))\n#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \\\n        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }\n#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \\\n        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }\n\n\n/* ****************************************\n*  Advanced decompression functions\n******************************************/\n\n/**\n * Huffman flags bitset.\n * For all flags, 0 is the default value.\n */\ntypedef enum {\n    /**\n     * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime.\n     * Otherwise: Ignored.\n     */\n    HUF_flags_bmi2 = (1 << 0),\n    /**\n     * If set: Test possible table depths to find the one that produces the smallest header + encoded size.\n     * If unset: Use heuristic to find the table depth.\n     */\n    HUF_flags_optimalDepth = (1 << 1),\n    /**\n     * If set: If the previous table can encode the input, always reuse the previous table.\n     * If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output.\n     */\n    HUF_flags_preferRepeat = (1 << 2),\n    /**\n     * If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress.\n     * If unset: Always histogram the entire input.\n     */\n    HUF_flags_suspectUncompressible = (1 << 3),\n    /**\n     * If set: Don't use assembly implementations\n     * If unset: Allow using assembly implementations\n     */\n    HUF_flags_disableAsm = (1 << 4),\n    /**\n     * If set: Don't use the fast decoding loop, always use the fallback decoding loop.\n     * If unset: Use the fast decoding loop when possible.\n     */\n    HUF_flags_disableFast = (1 << 5)\n} HUF_flags_e;\n\n\n/* ****************************************\n *  HUF detailed API\n * ****************************************/\n#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra\n\n/*! HUF_compress() does the following:\n *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within \"fse.h\")\n *  2. (optional) refine tableLog using HUF_optimalTableLog()\n *  3. build Huffman table from count using HUF_buildCTable()\n *  4. save Huffman table to memory buffer using HUF_writeCTable()\n *  5. encode the data stream using HUF_compress4X_usingCTable()\n *\n *  The following API allows targeting specific sub-functions for advanced tasks.\n *  For example, it's possible to compress several blocks using the same 'CTable',\n *  or to save and regenerate 'CTable' using external methods.\n */\nunsigned HUF_minTableLog(unsigned symbolCardinality);\nunsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue);\nunsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace,\n size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */\nsize_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);\nsize_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);\nsize_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);\nint HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);\n\ntypedef enum {\n   HUF_repeat_none,  /**< Cannot use the previous table */\n   HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */\n   HUF_repeat_valid  /**< Can use the previous table and it is assumed to be valid */\n } HUF_repeat;\n\n/** HUF_compress4X_repeat() :\n *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.\n *  If it uses hufTable it does not modify hufTable or repeat.\n *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.\n *  If preferRepeat then the old table will always be used if valid.\n *  If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */\nsize_t HUF_compress4X_repeat(void* dst, size_t dstSize,\n                       const void* src, size_t srcSize,\n                       unsigned maxSymbolValue, unsigned tableLog,\n                       void* workSpace, size_t wkspSize,    /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */\n                       HUF_CElt* hufTable, HUF_repeat* repeat, int flags);\n\n/** HUF_buildCTable_wksp() :\n *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.\n * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.\n */\n#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)\n#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))\nsize_t HUF_buildCTable_wksp (HUF_CElt* tree,\n                       const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,\n                             void* workSpace, size_t wkspSize);\n\n/*! HUF_readStats() :\n *  Read compact Huffman tree, saved by HUF_writeCTable().\n * `huffWeight` is destination buffer.\n * @return : size read from `src` , or an error Code .\n *  Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */\nsize_t HUF_readStats(BYTE* huffWeight, size_t hwSize,\n                     U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,\n                     const void* src, size_t srcSize);\n\n/*! HUF_readStats_wksp() :\n * Same as HUF_readStats() but takes an external workspace which must be\n * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.\n * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.\n */\n#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)\n#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))\nsize_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,\n                          U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,\n                          const void* src, size_t srcSize,\n                          void* workspace, size_t wkspSize,\n                          int flags);\n\n/** HUF_readCTable() :\n *  Loading a CTable saved with HUF_writeCTable() */\nsize_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);\n\n/** HUF_getNbBitsFromCTable() :\n *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX\n *  Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0\n *  Note 2 : is not inlined, as HUF_CElt definition is private\n */\nU32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);\n\ntypedef struct {\n    BYTE tableLog;\n    BYTE maxSymbolValue;\n    BYTE unused[sizeof(size_t) - 2];\n} HUF_CTableHeader;\n\n/** HUF_readCTableHeader() :\n *  @returns The header from the CTable specifying the tableLog and the maxSymbolValue.\n */\nHUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable);\n\n/*\n * HUF_decompress() does the following:\n * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics\n * 2. build Huffman table from save, using HUF_readDTableX?()\n * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()\n */\n\n/** HUF_selectDecoder() :\n *  Tells which decoder is likely to decode faster,\n *  based on a set of pre-computed metrics.\n * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .\n *  Assumption : 0 < dstSize <= 128 KB */\nU32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);\n\n/**\n *  The minimum workspace size for the `workSpace` used in\n *  HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().\n *\n *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when\n *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.\n *  Buffer overflow errors may potentially occur if code modifications result in\n *  a required workspace size greater than that specified in the following\n *  macro.\n */\n#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))\n#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))\n\n\n/* ====================== */\n/* single stream variants */\n/* ====================== */\n\nsize_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);\n/** HUF_compress1X_repeat() :\n *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.\n *  If it uses hufTable it does not modify hufTable or repeat.\n *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.\n *  If preferRepeat then the old table will always be used if valid.\n *  If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */\nsize_t HUF_compress1X_repeat(void* dst, size_t dstSize,\n                       const void* src, size_t srcSize,\n                       unsigned maxSymbolValue, unsigned tableLog,\n                       void* workSpace, size_t wkspSize,   /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */\n                       HUF_CElt* hufTable, HUF_repeat* repeat, int flags);\n\nsize_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);\n#ifndef HUF_FORCE_DECOMPRESS_X1\nsize_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);   /**< double-symbols decoder */\n#endif\n\n/* BMI2 variants.\n * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.\n */\nsize_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);\n#ifndef HUF_FORCE_DECOMPRESS_X2\nsize_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);\n#endif\nsize_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);\nsize_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);\n#ifndef HUF_FORCE_DECOMPRESS_X2\nsize_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);\n#endif\n#ifndef HUF_FORCE_DECOMPRESS_X1\nsize_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);\n#endif\n\n#endif   /* HUF_H_298734234 */\n"
  },
  {
    "path": "src/huff/mem.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under both the BSD-style license (found in the\n * LICENSE file in the root directory of this source tree) and the GPLv2 (found\n * in the COPYING file in the root directory of this source tree).\n * You may select, at your option, one of the above-listed licenses.\n */\n\n#ifndef MEM_H_MODULE\n#define MEM_H_MODULE\n\n/*-****************************************\n*  Dependencies\n******************************************/\n#include <stddef.h>  /* size_t, ptrdiff_t */\n#include <stdint.h> /* intptr_t */\n#define MEM_STATIC\n\ntypedef  uint32_t U32;\ntypedef   uint8_t BYTE;\n\n#endif /* MEM_H_MODULE */\n"
  },
  {
    "path": "src/redis/CMakeLists.txt",
    "content": "option(REDIS_ZMALLOC_MI \"Implement zmalloc layer using mimalloc allocator\" ON)\n\nif (REDIS_ZMALLOC_MI)\n  set(ZMALLOC_SRC \"zmalloc_mi.c\")\n  set(ZMALLOC_DEPS \"TRDP::mimalloc2\")\nelse()\n  set(ZMALLOC_SRC \"zmalloc.c\")\n  set(ZMALLOC_DEPS \"\")\nendif()\n\nadd_library(redis_lib crc16.c crc64.c crcspeed.c debug.c  intset.c geo.c \n            geohash.c geohash_helper.c hiredis.c read.c\n            listpack.c lzf_c.c lzf_d.c sds.c\n            rax.c redis_aux.c t_stream.c \n            util.c ziplist.c hyperloglog.c ${ZMALLOC_SRC})\n\ncxx_link(redis_lib  ${ZMALLOC_DEPS})\n\nadd_library(redis_test_lib dict.c siphash.c)\ncxx_link(redis_test_lib redis_lib)\n\nif(\"${CMAKE_CXX_COMPILER_ID}\" STREQUAL \"GNU\")\n  target_compile_options(redis_lib PRIVATE -Wno-maybe-uninitialized)\nendif()\n\nif (REDIS_ZMALLOC_MI)\n  target_compile_definitions(redis_lib PUBLIC USE_ZMALLOC_MI)\nendif()\n\nadd_subdirectory(lua)\n"
  },
  {
    "path": "src/redis/LICENSE.redis",
    "content": "Copyright (c) 2006-2020, Salvatore Sanfilippo\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:\n\n    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.\n    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.\n    * Neither the name of Redis nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
  },
  {
    "path": "src/redis/config.h",
    "content": "/*\n * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef __CONFIG_H\n#define __CONFIG_H\n\n#ifdef __APPLE__\n#include <AvailabilityMacros.h>\n#endif\n\n#ifdef __linux__\n#include <features.h>\n#endif\n\n/* Define redis_fstat to fstat or fstat64() */\n#if defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)\n#define redis_fstat fstat64\n#define redis_stat stat64\n#else\n#define redis_fstat fstat\n#define redis_stat stat\n#endif\n\n/* Test for proc filesystem */\n#ifdef __linux__\n#define HAVE_PROC_STAT 1\n#define HAVE_PROC_MAPS 1\n#define HAVE_PROC_SMAPS 1\n#define HAVE_PROC_SOMAXCONN 1\n#define HAVE_PROC_OOM_SCORE_ADJ 1\n#endif\n\n/* Test for task_info() */\n#if defined(__APPLE__)\n#define HAVE_TASKINFO 1\n#endif\n\n/* Test for backtrace() */\n#if defined(__APPLE__) || (defined(__linux__) && defined(__GLIBC__)) || \\\n    defined(__FreeBSD__) || ((defined(__OpenBSD__) || defined(__NetBSD__)) && defined(USE_BACKTRACE))\\\n || defined(__DragonFly__) || (defined(__UCLIBC__) && defined(__UCLIBC_HAS_BACKTRACE__))\n#define HAVE_BACKTRACE 1\n#endif\n\n/* MSG_NOSIGNAL. */\n#ifdef __linux__\n#define HAVE_MSG_NOSIGNAL 1\n#endif\n\n/* Test for polling API */\n#ifdef __linux__\n#define HAVE_EPOLL 1\n#endif\n\n#if (defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined (__NetBSD__)\n#define HAVE_KQUEUE 1\n#endif\n\n#ifdef __sun\n#include <sys/feature_tests.h>\n#ifdef _DTRACE_VERSION\n#define HAVE_EVPORT 1\n#define HAVE_PSINFO 1\n#endif\n#endif\n\n/* Define redis_fsync to fdatasync() in Linux and fsync() for all the rest */\n#ifdef __linux__\n#define redis_fsync fdatasync\n#else\n#define redis_fsync fsync\n#endif\n\n#if __GNUC__ >= 4\n#define valkey_unreachable __builtin_unreachable\n#else\n#define valkey_unreachable abort\n#endif\n#if __GNUC__ >= 3\n#define likely(x) __builtin_expect(!!(x), 1)\n#define unlikely(x) __builtin_expect(!!(x), 0)\n#else\n#define likely(x) (x)\n#define unlikely(x) (x)\n#endif\n\n/* Define rdb_fsync_range to sync_file_range() on Linux, otherwise we use\n * the plain fsync() call. */\n#if (defined(__linux__) && defined(SYNC_FILE_RANGE_WAIT_BEFORE))\n#define rdb_fsync_range(fd,off,size) sync_file_range(fd,off,size,SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE)\n#else\n#define rdb_fsync_range(fd,off,size) fsync(fd)\n#endif\n\n/* Check if we can use setproctitle().\n * BSD systems have support for it, we provide an implementation for\n * Linux and osx. */\n#if (defined __NetBSD__ || defined __FreeBSD__ || defined __OpenBSD__)\n#define USE_SETPROCTITLE\n#endif\n\n#if defined(__HAIKU__)\n#define ESOCKTNOSUPPORT 0\n#endif\n\n#if (defined __linux || defined __APPLE__)\n#define USE_SETPROCTITLE\n#define INIT_SETPROCTITLE_REPLACEMENT\nvoid spt_init(int argc, char *argv[]);\nvoid setproctitle(const char *fmt, ...);\n#endif\n\n/* Byte ordering detection */\n#include <sys/types.h> /* This will likely define BYTE_ORDER */\n\n#ifndef BYTE_ORDER\n#if (BSD >= 199103)\n# include <machine/endian.h>\n#else\n#if defined(linux) || defined(__linux__)\n# include <endian.h>\n#else\n#define\tLITTLE_ENDIAN\t1234\t/* least-significant byte first (vax, pc) */\n#define\tBIG_ENDIAN\t4321\t/* most-significant byte first (IBM, net) */\n#define\tPDP_ENDIAN\t3412\t/* LSB first in word, MSW first in long (pdp)*/\n\n#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) || \\\n   defined(vax) || defined(ns32000) || defined(sun386) || \\\n   defined(MIPSEL) || defined(_MIPSEL) || defined(BIT_ZERO_ON_RIGHT) || \\\n   defined(__alpha__) || defined(__alpha)\n#define BYTE_ORDER    LITTLE_ENDIAN\n#endif\n\n#if defined(sel) || defined(pyr) || defined(mc68000) || defined(sparc) || \\\n    defined(is68k) || defined(tahoe) || defined(ibm032) || defined(ibm370) || \\\n    defined(MIPSEB) || defined(_MIPSEB) || defined(_IBMR2) || defined(DGUX) ||\\\n    defined(apollo) || defined(__convex__) || defined(_CRAY) || \\\n    defined(__hppa) || defined(__hp9000) || \\\n    defined(__hp9000s300) || defined(__hp9000s700) || \\\n    defined (BIT_ZERO_ON_LEFT) || defined(m68k) || defined(__sparc)\n#define BYTE_ORDER\tBIG_ENDIAN\n#endif\n#endif /* linux */\n#endif /* BSD */\n#endif /* BYTE_ORDER */\n\n/* Sometimes after including an OS-specific header that defines the\n * endianness we end with __BYTE_ORDER but not with BYTE_ORDER that is what\n * the Redis code uses. In this case let's define everything without the\n * underscores. */\n#ifndef BYTE_ORDER\n#ifdef __BYTE_ORDER\n#if defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)\n#ifndef LITTLE_ENDIAN\n#define LITTLE_ENDIAN __LITTLE_ENDIAN\n#endif\n#ifndef BIG_ENDIAN\n#define BIG_ENDIAN __BIG_ENDIAN\n#endif\n#if (__BYTE_ORDER == __LITTLE_ENDIAN)\n#define BYTE_ORDER LITTLE_ENDIAN\n#else\n#define BYTE_ORDER BIG_ENDIAN\n#endif\n#endif\n#endif\n#endif\n\n#if !defined(BYTE_ORDER) || \\\n    (BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN)\n\t/* you must determine what the correct bit order is for\n\t * your compiler - the next line is an intentional error\n\t * which will force your compiles to bomb until you fix\n\t * the above macros.\n\t */\n#error \"Undefined or invalid BYTE_ORDER\"\n#endif\n\n#if (__i386 || __amd64 || __powerpc__) && __GNUC__\n#define GNUC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)\n#if defined(__clang__)\n#define HAVE_ATOMIC\n#endif\n#if (defined(__GLIBC__) && defined(__GLIBC_PREREQ))\n#if (GNUC_VERSION >= 40100 && __GLIBC_PREREQ(2, 6))\n#define HAVE_ATOMIC\n#endif\n#endif\n#endif\n\n/* Make sure we can test for ARM just checking for __arm__, since sometimes\n * __arm is defined but __arm__ is not. */\n#if defined(__arm) && !defined(__arm__)\n#define __arm__\n#endif\n#if defined (__aarch64__) && !defined(__arm64__)\n#define __arm64__\n#endif\n\n/* Make sure we can test for SPARC just checking for __sparc__. */\n#if defined(__sparc) && !defined(__sparc__)\n#define __sparc__\n#endif\n\n#if defined(__sparc__) || defined(__arm__)\n#define USE_ALIGNED_ACCESS\n#endif\n\n/* Define for redis_set_thread_title */\n#ifdef __linux__\n#define redis_set_thread_title(name) pthread_setname_np(pthread_self(), name)\n#else\n#if (defined __FreeBSD__ || defined __OpenBSD__)\n#include <pthread_np.h>\n#define redis_set_thread_title(name) pthread_set_name_np(pthread_self(), name)\n#elif defined __NetBSD__\n#include <pthread.h>\n#define redis_set_thread_title(name) pthread_setname_np(pthread_self(), \"%s\", name)\n#elif defined __HAIKU__\n#include <kernel/OS.h>\n#define redis_set_thread_title(name) rename_thread(find_thread(0), name)\n#else\n#if (defined __APPLE__ && defined(MAC_OS_X_VERSION_10_7))\nint pthread_setname_np(const char *name);\n#include <pthread.h>\n#define redis_set_thread_title(name) pthread_setname_np(name)\n#else\n#define redis_set_thread_title(name)\n#endif\n#endif\n#endif\n\n/* Check if we can use setcpuaffinity(). */\n#if (defined __linux || defined __NetBSD__ || defined __FreeBSD__ || defined __DragonFly__)\n#define USE_SETCPUAFFINITY\nvoid setcpuaffinity(const char *cpulist);\n#endif\n\n#endif\n"
  },
  {
    "path": "src/redis/crc16.c",
    "content": "\n\n#include \"crc16.h\"\n\n/*\n * Copyright 2001-2010 Georges Menie (www.menie.org)\n * Copyright 2010-2012 Salvatore Sanfilippo (adapted to Redis coding style)\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *     * Redistributions of source code must retain the above copyright\n *       notice, this list of conditions and the following disclaimer.\n *     * Redistributions in binary form must reproduce the above copyright\n *       notice, this list of conditions and the following disclaimer in the\n *       documentation and/or other materials provided with the distribution.\n *     * Neither the name of the University of California, Berkeley nor the\n *       names of its contributors may be used to endorse or promote products\n *       derived from this software without specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY\n * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY\n * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\n * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n */\n\n/* CRC16 implementation according to CCITT standards.\n *\n * Note by @antirez: this is actually the XMODEM CRC 16 algorithm, using the\n * following parameters:\n *\n * Name                       : \"XMODEM\", also known as \"ZMODEM\", \"CRC-16/ACORN\"\n * Width                      : 16 bit\n * Poly                       : 1021 (That is actually x^16 + x^12 + x^5 + 1)\n * Initialization             : 0000\n * Reflect Input byte         : False\n * Reflect Output CRC         : False\n * Xor constant to output CRC : 0000\n * Output for \"123456789\"     : 31C3\n */\n\nstatic const uint16_t crc16tab[256] = {\n    0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 0x8108, 0x9129, 0xa14a, 0xb16b,\n    0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,\n    0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, 0x2462, 0x3443, 0x0420, 0x1401,\n    0x64e6, 0x74c7, 0x44a4, 0x5485, 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,\n    0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, 0xb75b, 0xa77a, 0x9719, 0x8738,\n    0xf7df, 0xe7fe, 0xd79d, 0xc7bc, 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,\n    0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, 0x5af5, 0x4ad4, 0x7ab7, 0x6a96,\n    0x1a71, 0x0a50, 0x3a33, 0x2a12, 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,\n    0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, 0xedae, 0xfd8f, 0xcdec, 0xddcd,\n    0xad2a, 0xbd0b, 0x8d68, 0x9d49, 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,\n    0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, 0x9188, 0x81a9, 0xb1ca, 0xa1eb,\n    0xd10c, 0xc12d, 0xf14e, 0xe16f, 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,\n    0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, 0x02b1, 0x1290, 0x22f3, 0x32d2,\n    0x4235, 0x5214, 0x6277, 0x7256, 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,\n    0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, 0xa7db, 0xb7fa, 0x8799, 0x97b8,\n    0xe75f, 0xf77e, 0xc71d, 0xd73c, 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,\n    0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, 0x5844, 0x4865, 0x7806, 0x6827,\n    0x18c0, 0x08e1, 0x3882, 0x28a3, 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,\n    0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d,\n    0xbdaa, 0xad8b, 0x9de8, 0x8dc9, 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,\n    0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, 0x6e17, 0x7e36, 0x4e55, 0x5e74,\n    0x2e93, 0x3eb2, 0x0ed1, 0x1ef0};\n\nuint16_t crc16(const char* buf, int len) {\n  int counter;\n  uint16_t crc = 0;\n  for (counter = 0; counter < len; counter++)\n    crc = (crc << 8) ^ crc16tab[((crc >> 8) ^ *buf++) & 0x00FF];\n  return crc;\n}\n"
  },
  {
    "path": "src/redis/crc16.h",
    "content": "#ifndef CRC16_H\n#define CRC16_H\n\n#include <stdint.h>\n\nuint16_t crc16(const char* buf, int len);\n\n#endif\n"
  },
  {
    "path": "src/redis/crc64.c",
    "content": "/* Copyright (c) 2014, Matt Stancliff <matt@genges.com>\n * Copyright (c) 2020, Amazon Web Services\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE. */\n\n#include \"crc64.h\"\n#include \"crcspeed.h\"\nstatic uint64_t crc64_table[8][256] = {{0}};\n\n#define POLY UINT64_C(0xad93d23594c935a9)\n/******************** BEGIN GENERATED PYCRC FUNCTIONS ********************/\n/**\n * Generated on Sun Dec 21 14:14:07 2014,\n * by pycrc v0.8.2, https://www.tty1.net/pycrc/\n *\n * LICENSE ON GENERATED CODE:\n * ==========================\n * As of version 0.6, pycrc is released under the terms of the MIT licence.\n * The code generated by pycrc is not considered a substantial portion of the\n * software, therefore the author of pycrc will not claim any copyright on\n * the generated code.\n * ==========================\n *\n * CRC configuration:\n *    Width        = 64\n *    Poly         = 0xad93d23594c935a9\n *    XorIn        = 0xffffffffffffffff\n *    ReflectIn    = True\n *    XorOut       = 0x0000000000000000\n *    ReflectOut   = True\n *    Algorithm    = bit-by-bit-fast\n *\n * Modifications after generation (by matt):\n *   - included finalize step in-line with update for single-call generation\n *   - re-worked some inner variable architectures\n *   - adjusted function parameters to match expected prototypes.\n *****************************************************************************/\n\n/**\n * Reflect all bits of a \\a data word of \\a data_len bytes.\n *\n * \\param data         The data word to be reflected.\n * \\param data_len     The width of \\a data expressed in number of bits.\n * \\return             The reflected data.\n *****************************************************************************/\nstatic inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) {\n    uint_fast64_t ret = data & 0x01;\n\n    for (size_t i = 1; i < data_len; i++) {\n        data >>= 1;\n        ret = (ret << 1) | (data & 0x01);\n    }\n\n    return ret;\n}\n\n/**\n *  Update the crc value with new data.\n *\n * \\param crc      The current crc value.\n * \\param data     Pointer to a buffer of \\a data_len bytes.\n * \\param data_len Number of bytes in the \\a data buffer.\n * \\return         The updated crc value.\n ******************************************************************************/\nuint64_t _crc64(uint_fast64_t crc, const void *in_data, const uint64_t len) {\n    const uint8_t *data = in_data;\n    unsigned long long bit;\n\n    for (uint64_t offset = 0; offset < len; offset++) {\n        uint8_t c = data[offset];\n        for (uint_fast8_t i = 0x01; i & 0xff; i <<= 1) {\n            bit = crc & 0x8000000000000000;\n            if (c & i) {\n                bit = !bit;\n            }\n\n            crc <<= 1;\n            if (bit) {\n                crc ^= POLY;\n            }\n        }\n\n        crc &= 0xffffffffffffffff;\n    }\n\n    crc = crc & 0xffffffffffffffff;\n    return crc_reflect(crc, 64) ^ 0x0000000000000000;\n}\n\n/******************** END GENERATED PYCRC FUNCTIONS ********************/\n\n/* Initializes the 16KB lookup tables. */\nvoid crc64_init(void) {\n    crcspeed64native_init(_crc64, crc64_table);\n}\n\n/* Compute crc64 */\nuint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) {\n    return crcspeed64native(crc64_table, crc, (void *) s, l);\n}\n\n/* Test main */\n#ifdef REDIS_TEST\n#include <stdio.h>\n\n#define UNUSED(x) (void)(x)\nint crc64Test(int argc, char *argv[], int flags) {\n    UNUSED(argc);\n    UNUSED(argv);\n    UNUSED(flags);\n    crc64_init();\n    printf(\"[calcula]: e9c6d914c4b8d9ca == %016\" PRIx64 \"\\n\",\n           (uint64_t)_crc64(0, \"123456789\", 9));\n    printf(\"[64speed]: e9c6d914c4b8d9ca == %016\" PRIx64 \"\\n\",\n           (uint64_t)crc64(0, (unsigned char*)\"123456789\", 9));\n    char li[] = \"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed \"\n                \"do eiusmod tempor incididunt ut labore et dolore magna \"\n                \"aliqua. Ut enim ad minim veniam, quis nostrud exercitation \"\n                \"ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis \"\n                \"aute irure dolor in reprehenderit in voluptate velit esse \"\n                \"cillum dolore eu fugiat nulla pariatur. Excepteur sint \"\n                \"occaecat cupidatat non proident, sunt in culpa qui officia \"\n                \"deserunt mollit anim id est laborum.\";\n    printf(\"[calcula]: c7794709e69683b3 == %016\" PRIx64 \"\\n\",\n           (uint64_t)_crc64(0, li, sizeof(li)));\n    printf(\"[64speed]: c7794709e69683b3 == %016\" PRIx64 \"\\n\",\n           (uint64_t)crc64(0, (unsigned char*)li, sizeof(li)));\n    return 0;\n}\n\n#endif\n\n#ifdef REDIS_TEST_MAIN\nint main(int argc, char *argv[]) {\n    return crc64Test(argc, argv);\n}\n\n#endif\n"
  },
  {
    "path": "src/redis/crc64.h",
    "content": "#ifndef CRC64_H\n#define CRC64_H\n\n#include <stdint.h>\n\nvoid crc64_init(void);\nuint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);\n\n#ifdef REDIS_TEST\nint crc64Test(int argc, char *argv[], int flags);\n#endif\n\n#endif\n"
  },
  {
    "path": "src/redis/crcspeed.c",
    "content": "/*\n * Copyright (C) 2013 Mark Adler\n * Originally by: crc64.c Version 1.4  16 Dec 2013  Mark Adler\n * Modifications by Matt Stancliff <matt@genges.com>:\n *   - removed CRC64-specific behavior\n *   - added generation of lookup tables by parameters\n *   - removed inversion of CRC input/result\n *   - removed automatic initialization in favor of explicit initialization\n\n  This software is provided 'as-is', without any express or implied\n  warranty.  In no event will the author be held liable for any damages\n  arising from the use of this software.\n\n  Permission is granted to anyone to use this software for any purpose,\n  including commercial applications, and to alter it and redistribute it\n  freely, subject to the following restrictions:\n\n  1. The origin of this software must not be misrepresented; you must not\n     claim that you wrote the original software. If you use this software\n     in a product, an acknowledgment in the product documentation would be\n     appreciated but is not required.\n  2. Altered source versions must be plainly marked as such, and must not be\n     misrepresented as being the original software.\n  3. This notice may not be removed or altered from any source distribution.\n\n  Mark Adler\n  madler@alumni.caltech.edu\n */\n\n#include \"crcspeed.h\"\n\n/* Fill in a CRC constants table. */\nvoid crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) {\n    uint64_t crc;\n\n    /* generate CRCs for all single byte sequences */\n    for (int n = 0; n < 256; n++) {\n        unsigned char v = n;\n        table[0][n] = crcfn(0, &v, 1);\n    }\n\n    /* generate nested CRC table for future slice-by-8 lookup */\n    for (int n = 0; n < 256; n++) {\n        crc = table[0][n];\n        for (int k = 1; k < 8; k++) {\n            crc = table[0][crc & 0xff] ^ (crc >> 8);\n            table[k][n] = crc;\n        }\n    }\n}\n\nvoid crcspeed16little_init(crcfn16 crcfn, uint16_t table[8][256]) {\n    uint16_t crc;\n\n    /* generate CRCs for all single byte sequences */\n    for (int n = 0; n < 256; n++) {\n        table[0][n] = crcfn(0, &n, 1);\n    }\n\n    /* generate nested CRC table for future slice-by-8 lookup */\n    for (int n = 0; n < 256; n++) {\n        crc = table[0][n];\n        for (int k = 1; k < 8; k++) {\n            crc = table[0][(crc >> 8) & 0xff] ^ (crc << 8);\n            table[k][n] = crc;\n        }\n    }\n}\n\n/* Reverse the bytes in a 64-bit word. */\nstatic inline uint64_t rev8(uint64_t a) {\n#if defined(__GNUC__) || defined(__clang__)\n    return __builtin_bswap64(a);\n#else\n    uint64_t m;\n\n    m = UINT64_C(0xff00ff00ff00ff);\n    a = ((a >> 8) & m) | (a & m) << 8;\n    m = UINT64_C(0xffff0000ffff);\n    a = ((a >> 16) & m) | (a & m) << 16;\n    return a >> 32 | a << 32;\n#endif\n}\n\n/* This function is called once to initialize the CRC table for use on a\n   big-endian architecture. */\nvoid crcspeed64big_init(crcfn64 fn, uint64_t big_table[8][256]) {\n    /* Create the little endian table then reverse all the entries. */\n    crcspeed64little_init(fn, big_table);\n    for (int k = 0; k < 8; k++) {\n        for (int n = 0; n < 256; n++) {\n            big_table[k][n] = rev8(big_table[k][n]);\n        }\n    }\n}\n\nvoid crcspeed16big_init(crcfn16 fn, uint16_t big_table[8][256]) {\n    /* Create the little endian table then reverse all the entries. */\n    crcspeed16little_init(fn, big_table);\n    for (int k = 0; k < 8; k++) {\n        for (int n = 0; n < 256; n++) {\n            big_table[k][n] = rev8(big_table[k][n]);\n        }\n    }\n}\n\n/* Calculate a non-inverted CRC multiple bytes at a time on a little-endian\n * architecture. If you need inverted CRC, invert *before* calling and invert\n * *after* calling.\n * 64 bit crc = process 8 bytes at once;\n */\nuint64_t crcspeed64little(uint64_t little_table[8][256], uint64_t crc,\n                          void *buf, size_t len) {\n    unsigned char *next = buf;\n\n    /* process individual bytes until we reach an 8-byte aligned pointer */\n    while (len && ((uintptr_t)next & 7) != 0) {\n        crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);\n        len--;\n    }\n\n    /* fast middle processing, 8 bytes (aligned!) per loop */\n    while (len >= 8) {\n        crc ^= *(uint64_t *)next;\n        crc = little_table[7][crc & 0xff] ^\n              little_table[6][(crc >> 8) & 0xff] ^\n              little_table[5][(crc >> 16) & 0xff] ^\n              little_table[4][(crc >> 24) & 0xff] ^\n              little_table[3][(crc >> 32) & 0xff] ^\n              little_table[2][(crc >> 40) & 0xff] ^\n              little_table[1][(crc >> 48) & 0xff] ^\n              little_table[0][crc >> 56];\n        next += 8;\n        len -= 8;\n    }\n\n    /* process remaining bytes (can't be larger than 8) */\n    while (len) {\n        crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);\n        len--;\n    }\n\n    return crc;\n}\n\nuint16_t crcspeed16little(uint16_t little_table[8][256], uint16_t crc,\n                          void *buf, size_t len) {\n    unsigned char *next = buf;\n\n    /* process individual bytes until we reach an 8-byte aligned pointer */\n    while (len && ((uintptr_t)next & 7) != 0) {\n        crc = little_table[0][((crc >> 8) ^ *next++) & 0xff] ^ (crc << 8);\n        len--;\n    }\n\n    /* fast middle processing, 8 bytes (aligned!) per loop */\n    while (len >= 8) {\n        uint64_t n = *(uint64_t *)next;\n        crc = little_table[7][(n & 0xff) ^ ((crc >> 8) & 0xff)] ^\n              little_table[6][((n >> 8) & 0xff) ^ (crc & 0xff)] ^\n              little_table[5][(n >> 16) & 0xff] ^\n              little_table[4][(n >> 24) & 0xff] ^\n              little_table[3][(n >> 32) & 0xff] ^\n              little_table[2][(n >> 40) & 0xff] ^\n              little_table[1][(n >> 48) & 0xff] ^\n              little_table[0][n >> 56];\n        next += 8;\n        len -= 8;\n    }\n\n    /* process remaining bytes (can't be larger than 8) */\n    while (len) {\n        crc = little_table[0][((crc >> 8) ^ *next++) & 0xff] ^ (crc << 8);\n        len--;\n    }\n\n    return crc;\n}\n\n/* Calculate a non-inverted CRC eight bytes at a time on a big-endian\n * architecture.\n */\nuint64_t crcspeed64big(uint64_t big_table[8][256], uint64_t crc, void *buf,\n                       size_t len) {\n    unsigned char *next = buf;\n\n    crc = rev8(crc);\n    while (len && ((uintptr_t)next & 7) != 0) {\n        crc = big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8);\n        len--;\n    }\n\n    while (len >= 8) {\n        crc ^= *(uint64_t *)next;\n        crc = big_table[0][crc & 0xff] ^\n              big_table[1][(crc >> 8) & 0xff] ^\n              big_table[2][(crc >> 16) & 0xff] ^\n              big_table[3][(crc >> 24) & 0xff] ^\n              big_table[4][(crc >> 32) & 0xff] ^\n              big_table[5][(crc >> 40) & 0xff] ^\n              big_table[6][(crc >> 48) & 0xff] ^\n              big_table[7][crc >> 56];\n        next += 8;\n        len -= 8;\n    }\n\n    while (len) {\n        crc = big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8);\n        len--;\n    }\n\n    return rev8(crc);\n}\n\n/* WARNING: Completely untested on big endian architecture.  Possibly broken. */\nuint16_t crcspeed16big(uint16_t big_table[8][256], uint16_t crc_in, void *buf,\n                       size_t len) {\n    unsigned char *next = buf;\n    uint64_t crc = crc_in;\n\n    crc = rev8(crc);\n    while (len && ((uintptr_t)next & 7) != 0) {\n        crc = big_table[0][((crc >> (56 - 8)) ^ *next++) & 0xff] ^ (crc >> 8);\n        len--;\n    }\n\n    while (len >= 8) {\n        uint64_t n = *(uint64_t *)next;\n        crc = big_table[0][(n & 0xff) ^ ((crc >> (56 - 8)) & 0xff)] ^\n              big_table[1][((n >> 8) & 0xff) ^ (crc & 0xff)] ^\n              big_table[2][(n >> 16) & 0xff] ^\n              big_table[3][(n >> 24) & 0xff] ^\n              big_table[4][(n >> 32) & 0xff] ^\n              big_table[5][(n >> 40) & 0xff] ^\n              big_table[6][(n >> 48) & 0xff] ^\n              big_table[7][n >> 56];\n        next += 8;\n        len -= 8;\n    }\n\n    while (len) {\n        crc = big_table[0][((crc >> (56 - 8)) ^ *next++) & 0xff] ^ (crc >> 8);\n        len--;\n    }\n\n    return rev8(crc);\n}\n\n/* Return the CRC of buf[0..len-1] with initial crc, processing eight bytes\n   at a time using passed-in lookup table.\n   This selects one of two routines depending on the endianess of\n   the architecture. */\nuint64_t crcspeed64native(uint64_t table[8][256], uint64_t crc, void *buf,\n                          size_t len) {\n    uint64_t n = 1;\n\n    return *(char *)&n ? crcspeed64little(table, crc, buf, len)\n                       : crcspeed64big(table, crc, buf, len);\n}\n\nuint16_t crcspeed16native(uint16_t table[8][256], uint16_t crc, void *buf,\n                          size_t len) {\n    uint64_t n = 1;\n\n    return *(char *)&n ? crcspeed16little(table, crc, buf, len)\n                       : crcspeed16big(table, crc, buf, len);\n}\n\n/* Initialize CRC lookup table in architecture-dependent manner. */\nvoid crcspeed64native_init(crcfn64 fn, uint64_t table[8][256]) {\n    uint64_t n = 1;\n\n    *(char *)&n ? crcspeed64little_init(fn, table)\n                : crcspeed64big_init(fn, table);\n}\n\nvoid crcspeed16native_init(crcfn16 fn, uint16_t table[8][256]) {\n    uint64_t n = 1;\n\n    *(char *)&n ? crcspeed16little_init(fn, table)\n                : crcspeed16big_init(fn, table);\n}\n"
  },
  {
    "path": "src/redis/crcspeed.h",
    "content": "/* Copyright (c) 2014, Matt Stancliff <matt@genges.com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE. */\n\n#ifndef CRCSPEED_H\n#define CRCSPEED_H\n\n#include <inttypes.h>\n#include <stdio.h>\n\ntypedef uint64_t (*crcfn64)(uint64_t, const void *, const uint64_t);\ntypedef uint16_t (*crcfn16)(uint16_t, const void *, const uint64_t);\n\n/* CRC-64 */\nvoid crcspeed64little_init(crcfn64 fn, uint64_t table[8][256]);\nvoid crcspeed64big_init(crcfn64 fn, uint64_t table[8][256]);\nvoid crcspeed64native_init(crcfn64 fn, uint64_t table[8][256]);\n\nuint64_t crcspeed64little(uint64_t table[8][256], uint64_t crc, void *buf,\n                          size_t len);\nuint64_t crcspeed64big(uint64_t table[8][256], uint64_t crc, void *buf,\n                       size_t len);\nuint64_t crcspeed64native(uint64_t table[8][256], uint64_t crc, void *buf,\n                          size_t len);\n\n/* CRC-16 */\nvoid crcspeed16little_init(crcfn16 fn, uint16_t table[8][256]);\nvoid crcspeed16big_init(crcfn16 fn, uint16_t table[8][256]);\nvoid crcspeed16native_init(crcfn16 fn, uint16_t table[8][256]);\n\nuint16_t crcspeed16little(uint16_t table[8][256], uint16_t crc, void *buf,\n                          size_t len);\nuint16_t crcspeed16big(uint16_t table[8][256], uint16_t crc, void *buf,\n                       size_t len);\nuint16_t crcspeed16native(uint16_t table[8][256], uint16_t crc, void *buf,\n                          size_t len);\n#endif\n"
  },
  {
    "path": "src/redis/debug.c",
    "content": "/*\n * Copyright (c) 2009-2020, Salvatore Sanfilippo <antirez at gmail dot com>\n * Copyright (c) 2020, Redis Labs, Inc\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n\n#include <stdarg.h>\n#include <stdio.h>\n#include <syslog.h>\n#include <assert.h>\n\n#include \"util.h\"\n\nint verbosity = LL_NOTICE;\n\nvoid serverLog(int level, const char *fmt, ...) {\n    va_list ap;\n    char msg[LOG_MAX_LEN];\n\n    if ((level&0xff) < verbosity) return;\n\n    va_start(ap, fmt);\n    vsnprintf(msg, sizeof(msg), fmt, ap);\n    va_end(ap);\n\n    fprintf(stdout, \"%s\\n\",msg);\n}\n\nvoid _serverPanic(const char *file, int line, const char *msg, ...) {\n    va_list ap;\n    va_start(ap,msg);\n    char fmtmsg[256];\n    vsnprintf(fmtmsg,sizeof(fmtmsg),msg,ap);\n    va_end(ap);\n\n    serverLog(LL_WARNING, \"------------------------------------------------\");\n    serverLog(LL_WARNING, \"!!! Software Failure. Press left mouse button to continue\");\n    serverLog(LL_WARNING, \"Guru Meditation: %s #%s:%d\", fmtmsg,file,line);\n#ifndef NDEBUG\n#if defined(__APPLE__)\n    __assert_rtn(msg, file, line, \"\");\n#elif defined(__FreeBSD__)\n    __assert(\"\", file, line, msg);\n#else      \n    __assert_fail(msg, file, line, \"\");\n#endif    \n#endif \n}\n\nvoid _serverAssert(const char *estr, const char *file, int line) {\n    serverLog(LL_WARNING,\"=== ASSERTION FAILED ===\");\n    serverLog(LL_WARNING,\"==> %s:%d '%s' is not true\",file,line,estr);\n}\n"
  },
  {
    "path": "src/redis/dict.c",
    "content": "/* Hash Tables Implementation.\n *\n * This file implements in memory hash tables with insert/del/replace/find/\n * get-random-element operations. Hash tables will auto resize if needed\n * tables of power of two in size are used, collisions are handled by\n * chaining. See the source code for more information... :)\n *\n * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdint.h>\n#include <string.h>\n#include <stdarg.h>\n#include <limits.h>\n#include <sys/time.h>\n\n#include \"dict.h\"\n#include \"zmalloc.h\"\n\n#if !defined(DICT_BENCHMARK_MAIN) && defined(ROMAN_REDIS_ASSERT_DISABLED)\n#include \"redisassert.h\"\n#else\n#include <assert.h>\n#endif\n\n/* Using dictEnableResize() / dictDisableResize() we make possible to\n * enable/disable resizing of the hash table as needed. This is very important\n * for Redis, as we use copy-on-write and don't want to move too much memory\n * around when there is a child performing saving operations.\n *\n * Note that even when dict_can_resize is set to 0, not all resizes are\n * prevented: a hash table is still allowed to grow if the ratio between\n * the number of elements and the buckets > dict_force_resize_ratio. */\nstatic int dict_can_resize = 1;\nstatic unsigned int dict_force_resize_ratio = 5;\n\n/* -------------------------- private prototypes ---------------------------- */\n\nstatic int _dictExpandIfNeeded(dict *d);\nstatic signed char _dictNextExp(unsigned long size);\nstatic long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing);\nstatic int _dictInit(dict *d, dictType *type);\n\n/* -------------------------- hash functions -------------------------------- */\n\nstatic uint8_t dict_hash_function_seed[16];\n\nvoid dictSetHashFunctionSeed(uint8_t *seed) {\n    memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed));\n}\n\nuint8_t *dictGetHashFunctionSeed(void) {\n    return dict_hash_function_seed;\n}\n\n/* The default hashing function uses SipHash implementation\n * in siphash.c. */\n\nuint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);\nuint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);\n\nuint64_t dictGenHashFunction(const void *key, size_t len) {\n    return siphash(key,len,dict_hash_function_seed);\n}\n\nuint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) {\n    return siphash_nocase(buf,len,dict_hash_function_seed);\n}\n\n/* ----------------------------- API implementation ------------------------- */\n\n/* Reset hash table parameters already initialized with _dictInit()*/\nstatic void _dictReset(dict *d, int htidx)\n{\n    d->ht_table[htidx] = NULL;\n    d->ht_size_exp[htidx] = -1;\n    d->ht_used[htidx] = 0;\n}\n\n/* Create a new hash table */\ndict *dictCreate(dictType *type)\n{\n    dict *d = zmalloc(sizeof(*d));\n\n    _dictInit(d,type);\n    return d;\n}\n\n/* Initialize the hash table */\nint _dictInit(dict *d, dictType *type)\n{\n    _dictReset(d, 0);\n    _dictReset(d, 1);\n    d->type = type;\n    d->rehashidx = -1;\n    d->pauserehash = 0;\n    return DICT_OK;\n}\n\n/* Resize the table to the minimal size that contains all the elements,\n * but with the invariant of a USED/BUCKETS ratio near to <= 1 */\nint dictResize(dict *d)\n{\n    unsigned long minimal;\n\n    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;\n    minimal = d->ht_used[0];\n    if (minimal < DICT_HT_INITIAL_SIZE)\n        minimal = DICT_HT_INITIAL_SIZE;\n    return dictExpand(d, minimal);\n}\n\n/* Expand or create the hash table,\n * when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1).\n * Returns DICT_OK if expand was performed, and DICT_ERR if skipped. */\nint _dictExpand(dict *d, unsigned long size, int* malloc_failed)\n{\n    if (malloc_failed) *malloc_failed = 0;\n\n    /* the size is invalid if it is smaller than the number of\n     * elements already inside the hash table */\n    if (dictIsRehashing(d) || d->ht_used[0] > size)\n        return DICT_ERR;\n\n    /* the new hash table */\n    dictEntry **new_ht_table;\n    unsigned long new_ht_used;\n    signed char new_ht_size_exp = _dictNextExp(size);\n\n    /* Detect overflows */\n    size_t newsize = 1ul<<new_ht_size_exp;\n    if (newsize < size || newsize * sizeof(dictEntry*) < newsize)\n        return DICT_ERR;\n\n    /* Rehashing to the same table size is not useful. */\n    if (new_ht_size_exp == d->ht_size_exp[0]) return DICT_ERR;\n\n    /* Allocate the new hash table and initialize all pointers to NULL */\n    if (malloc_failed) {\n        new_ht_table = ztrycalloc(newsize*sizeof(dictEntry*));\n        *malloc_failed = new_ht_table == NULL;\n        if (*malloc_failed)\n            return DICT_ERR;\n    } else\n        new_ht_table = zcalloc(newsize*sizeof(dictEntry*));\n\n    new_ht_used = 0;\n\n    /* Is this the first initialization? If so it's not really a rehashing\n     * we just set the first hash table so that it can accept keys. */\n    if (d->ht_table[0] == NULL) {\n        d->ht_size_exp[0] = new_ht_size_exp;\n        d->ht_used[0] = new_ht_used;\n        d->ht_table[0] = new_ht_table;\n        return DICT_OK;\n    }\n\n    /* Prepare a second hash table for incremental rehashing */\n    d->ht_size_exp[1] = new_ht_size_exp;\n    d->ht_used[1] = new_ht_used;\n    d->ht_table[1] = new_ht_table;\n    d->rehashidx = 0;\n    return DICT_OK;\n}\n\n/* return DICT_ERR if expand was not performed */\nint dictExpand(dict *d, unsigned long size) {\n    return _dictExpand(d, size, NULL);\n}\n\n/* return DICT_ERR if expand failed due to memory allocation failure */\nint dictTryExpand(dict *d, unsigned long size) {\n    int malloc_failed;\n    _dictExpand(d, size, &malloc_failed);\n    return malloc_failed? DICT_ERR : DICT_OK;\n}\n\n/* Performs N steps of incremental rehashing. Returns 1 if there are still\n * keys to move from the old to the new hash table, otherwise 0 is returned.\n *\n * Note that a rehashing step consists in moving a bucket (that may have more\n * than one key as we use chaining) from the old to the new hash table, however\n * since part of the hash table may be composed of empty spaces, it is not\n * guaranteed that this function will rehash even a single bucket, since it\n * will visit at max N*10 empty buckets in total, otherwise the amount of\n * work it does would be unbound and the function may block for a long time. */\nint dictRehash(dict *d, int n) {\n    int empty_visits = n*10; /* Max number of empty buckets to visit. */\n    if (!dictIsRehashing(d)) return 0;\n\n    while(n-- && d->ht_used[0] != 0) {\n        dictEntry *de, *nextde;\n\n        /* Note that rehashidx can't overflow as we are sure there are more\n         * elements because ht[0].used != 0 */\n        assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx);\n        while(d->ht_table[0][d->rehashidx] == NULL) {\n            d->rehashidx++;\n            if (--empty_visits == 0) return 1;\n        }\n        de = d->ht_table[0][d->rehashidx];\n        /* Move all the keys in this bucket from the old to the new hash HT */\n        while(de) {\n            uint64_t h;\n\n            nextde = de->next;\n            /* Get the index in the new hash table */\n            h = dictHashKey(d, de->key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);\n            de->next = d->ht_table[1][h];\n            d->ht_table[1][h] = de;\n            d->ht_used[0]--;\n            d->ht_used[1]++;\n            de = nextde;\n        }\n        d->ht_table[0][d->rehashidx] = NULL;\n        d->rehashidx++;\n    }\n\n    /* Check if we already rehashed the whole table... */\n    if (d->ht_used[0] == 0) {\n        zfree(d->ht_table[0]);\n        /* Copy the new ht onto the old one */\n        d->ht_table[0] = d->ht_table[1];\n        d->ht_used[0] = d->ht_used[1];\n        d->ht_size_exp[0] = d->ht_size_exp[1];\n        _dictReset(d, 1);\n        d->rehashidx = -1;\n        return 0;\n    }\n\n    /* More to rehash... */\n    return 1;\n}\n\nlong long timeInMilliseconds(void) {\n    struct timeval tv;\n\n    gettimeofday(&tv,NULL);\n    return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);\n}\n\n/* Rehash in ms+\"delta\" milliseconds. The value of \"delta\" is larger \n * than 0, and is smaller than 1 in most cases. The exact upper bound \n * depends on the running time of dictRehash(d,100).*/\nint dictRehashMilliseconds(dict *d, int ms) {\n    if (d->pauserehash > 0) return 0;\n\n    long long start = timeInMilliseconds();\n    int rehashes = 0;\n\n    while(dictRehash(d,100)) {\n        rehashes += 100;\n        if (timeInMilliseconds()-start > ms) break;\n    }\n    return rehashes;\n}\n\n/* This function performs just a step of rehashing, and only if hashing has\n * not been paused for our hash table. When we have iterators in the\n * middle of a rehashing we can't mess with the two hash tables otherwise\n * some element can be missed or duplicated.\n *\n * This function is called by common lookup or update operations in the\n * dictionary so that the hash table automatically migrates from H1 to H2\n * while it is actively used. */\nstatic void _dictRehashStep(dict *d) {\n    if (d->pauserehash == 0) dictRehash(d,1);\n}\n\n/* Add an element to the target hash table */\nint dictAdd(dict *d, void *key, void *val)\n{\n    dictEntry *entry = dictAddRaw(d,key,NULL);\n\n    if (!entry) return DICT_ERR;\n    dictSetVal(d, entry, val);\n    return DICT_OK;\n}\n\n/* Low level add or find:\n * This function adds the entry but instead of setting a value returns the\n * dictEntry structure to the user, that will make sure to fill the value\n * field as they wish.\n *\n * This function is also directly exposed to the user API to be called\n * mainly in order to store non-pointers inside the hash value, example:\n *\n * entry = dictAddRaw(dict,mykey,NULL);\n * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);\n *\n * Return values:\n *\n * If key already exists NULL is returned, and \"*existing\" is populated\n * with the existing entry if existing is not NULL.\n *\n * If key was added, the hash entry is returned to be manipulated by the caller.\n */\ndictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)\n{\n    long index;\n    dictEntry *entry;\n    int htidx;\n\n    if (dictIsRehashing(d)) _dictRehashStep(d);\n\n    /* Get the index of the new element, or -1 if\n     * the element already exists. */\n    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)\n        return NULL;\n\n    /* Allocate the memory and store the new entry.\n     * Insert the element in top, with the assumption that in a database\n     * system it is more likely that recently added entries are accessed\n     * more frequently. */\n    htidx = dictIsRehashing(d) ? 1 : 0;\n    size_t metasize = 0;\n    entry = zmalloc(sizeof(*entry) + metasize);\n    entry->next = d->ht_table[htidx][index];\n    d->ht_table[htidx][index] = entry;\n    d->ht_used[htidx]++;\n\n    /* Set the hash entry fields. */\n    dictSetKey(d, entry, key);\n    return entry;\n}\n\n/* Add or Overwrite:\n * Add an element, discarding the old value if the key already exists.\n * Return 1 if the key was added from scratch, 0 if there was already an\n * element with such key and dictReplace() just performed a value update\n * operation. */\nint dictReplace(dict *d, void *key, void *val)\n{\n    dictEntry *entry, *existing, auxentry;\n\n    /* Try to add the element. If the key\n     * does not exists dictAdd will succeed. */\n    entry = dictAddRaw(d,key,&existing);\n    if (entry) {\n        dictSetVal(d, entry, val);\n        return 1;\n    }\n\n    /* Set the new value and free the old one. Note that it is important\n     * to do that in this order, as the value may just be exactly the same\n     * as the previous one. In this context, think to reference counting,\n     * you want to increment (set), and then decrement (free), and not the\n     * reverse. */\n    auxentry = *existing;\n    dictSetVal(d, existing, val);\n    dictFreeVal(d, &auxentry);\n    return 0;\n}\n\n/* Add or Find:\n * dictAddOrFind() is simply a version of dictAddRaw() that always\n * returns the hash entry of the specified key, even if the key already\n * exists and can't be added (in that case the entry of the already\n * existing key is returned.)\n *\n * See dictAddRaw() for more information. */\ndictEntry *dictAddOrFind(dict *d, void *key) {\n    dictEntry *entry, *existing;\n    entry = dictAddRaw(d,key,&existing);\n    return entry ? entry : existing;\n}\n\n/* Search and remove an element. This is a helper function for\n * dictDelete() and dictUnlink(), please check the top comment\n * of those functions. */\nstatic dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {\n    uint64_t h, idx;\n    dictEntry *he, *prevHe;\n    int table;\n\n    /* dict is empty */\n    if (dictSize(d) == 0) return NULL;\n\n    if (dictIsRehashing(d)) _dictRehashStep(d);\n    h = dictHashKey(d, key);\n\n    for (table = 0; table <= 1; table++) {\n        idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);\n        he = d->ht_table[table][idx];\n        prevHe = NULL;\n        while(he) {\n            if (key==he->key || dictCompareKeys(d, key, he->key)) {\n                /* Unlink the element from the list */\n                if (prevHe)\n                    prevHe->next = he->next;\n                else\n                    d->ht_table[table][idx] = he->next;\n                if (!nofree) {\n                    dictFreeUnlinkedEntry(d, he);\n                }\n                d->ht_used[table]--;\n                return he;\n            }\n            prevHe = he;\n            he = he->next;\n        }\n        if (!dictIsRehashing(d)) break;\n    }\n    return NULL; /* not found */\n}\n\n/* Remove an element, returning DICT_OK on success or DICT_ERR if the\n * element was not found. */\nint dictDelete(dict *ht, const void *key) {\n    return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;\n}\n\n/* Remove an element from the table, but without actually releasing\n * the key, value and dictionary entry. The dictionary entry is returned\n * if the element was found (and unlinked from the table), and the user\n * should later call `dictFreeUnlinkedEntry()` with it in order to release it.\n * Otherwise if the key is not found, NULL is returned.\n *\n * This function is useful when we want to remove something from the hash\n * table but want to use its value before actually deleting the entry.\n * Without this function the pattern would require two lookups:\n *\n *  entry = dictFind(...);\n *  // Do something with entry\n *  dictDelete(dictionary,entry);\n *\n * Thanks to this function it is possible to avoid this, and use\n * instead:\n *\n * entry = dictUnlink(dictionary,entry);\n * // Do something with entry\n * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.\n */\ndictEntry *dictUnlink(dict *d, const void *key) {\n    return dictGenericDelete(d,key,1);\n}\n\n/* You need to call this function to really free the entry after a call\n * to dictUnlink(). It's safe to call this function with 'he' = NULL. */\nvoid dictFreeUnlinkedEntry(dict *d, dictEntry *he) {\n    if (he == NULL) return;\n    dictFreeKey(d, he);\n    dictFreeVal(d, he);\n    zfree(he);\n}\n\n/* Destroy an entire dictionary */\nint _dictClear(dict *d, int htidx, void(callback)(dict*)) {\n    unsigned long i;\n\n    /* Free all the elements */\n    for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]) && d->ht_used[htidx] > 0; i++) {\n        dictEntry *he, *nextHe;\n\n        if (callback && (i & 65535) == 0) callback(d);\n\n        if ((he = d->ht_table[htidx][i]) == NULL) continue;\n        while(he) {\n            nextHe = he->next;\n            dictFreeKey(d, he);\n            dictFreeVal(d, he);\n            zfree(he);\n            d->ht_used[htidx]--;\n            he = nextHe;\n        }\n    }\n    /* Free the table and the allocated cache structure */\n    zfree(d->ht_table[htidx]);\n    /* Re-initialize the table */\n    _dictReset(d, htidx);\n    return DICT_OK; /* never fails */\n}\n\n/* Clear & Release the hash table */\nvoid dictRelease(dict *d)\n{\n    _dictClear(d,0,NULL);\n    _dictClear(d,1,NULL);\n    zfree(d);\n}\n\ndictEntry *dictFind(dict *d, const void *key)\n{\n    dictEntry *he;\n    uint64_t h, idx, table;\n\n    if (dictSize(d) == 0) return NULL; /* dict is empty */\n    if (dictIsRehashing(d)) _dictRehashStep(d);\n    h = dictHashKey(d, key);\n    for (table = 0; table <= 1; table++) {\n        idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);\n        he = d->ht_table[table][idx];\n        while(he) {\n            if (key==he->key || dictCompareKeys(d, key, he->key))\n                return he;\n            he = he->next;\n        }\n        if (!dictIsRehashing(d)) return NULL;\n    }\n    return NULL;\n}\n\nvoid *dictFetchValue(dict *d, const void *key) {\n    dictEntry *he;\n\n    he = dictFind(d,key);\n    return he ? dictGetVal(he) : NULL;\n}\n\n/* A fingerprint is a 64 bit number that represents the state of the dictionary\n * at a given time, it's just a few dict properties xored together.\n * When an unsafe iterator is initialized, we get the dict fingerprint, and check\n * the fingerprint again when the iterator is released.\n * If the two fingerprints are different it means that the user of the iterator\n * performed forbidden operations against the dictionary while iterating. */\nunsigned long long dictFingerprint(dict *d) {\n    unsigned long long integers[6], hash = 0;\n    int j;\n\n    integers[0] = (long) d->ht_table[0];\n    integers[1] = d->ht_size_exp[0];\n    integers[2] = d->ht_used[0];\n    integers[3] = (long) d->ht_table[1];\n    integers[4] = d->ht_size_exp[1];\n    integers[5] = d->ht_used[1];\n\n    /* We hash N integers by summing every successive integer with the integer\n     * hashing of the previous sum. Basically:\n     *\n     * Result = hash(hash(hash(int1)+int2)+int3) ...\n     *\n     * This way the same set of integers in a different order will (likely) hash\n     * to a different number. */\n    for (j = 0; j < 6; j++) {\n        hash += integers[j];\n        /* For the hashing step we use Tomas Wang's 64 bit integer hash. */\n        hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;\n        hash = hash ^ (hash >> 24);\n        hash = (hash + (hash << 3)) + (hash << 8); // hash * 265\n        hash = hash ^ (hash >> 14);\n        hash = (hash + (hash << 2)) + (hash << 4); // hash * 21\n        hash = hash ^ (hash >> 28);\n        hash = hash + (hash << 31);\n    }\n    return hash;\n}\n\ndictIterator *dictGetIterator(dict *d)\n{\n    dictIterator *iter = zmalloc(sizeof(*iter));\n\n    iter->d = d;\n    iter->table = 0;\n    iter->index = -1;\n    iter->safe = 0;\n    iter->entry = NULL;\n    iter->nextEntry = NULL;\n    return iter;\n}\n\ndictIterator *dictGetSafeIterator(dict *d) {\n    dictIterator *i = dictGetIterator(d);\n\n    i->safe = 1;\n    return i;\n}\n\ndictEntry *dictNext(dictIterator *iter)\n{\n    while (1) {\n        if (iter->entry == NULL) {\n            if (iter->index == -1 && iter->table == 0) {\n                if (iter->safe)\n                    dictPauseRehashing(iter->d);\n                else\n                    iter->fingerprint = dictFingerprint(iter->d);\n            }\n            iter->index++;\n            if (iter->index >= (long) DICTHT_SIZE(iter->d->ht_size_exp[iter->table])) {\n                if (dictIsRehashing(iter->d) && iter->table == 0) {\n                    iter->table++;\n                    iter->index = 0;\n                } else {\n                    break;\n                }\n            }\n            iter->entry = iter->d->ht_table[iter->table][iter->index];\n        } else {\n            iter->entry = iter->nextEntry;\n        }\n        if (iter->entry) {\n            /* We need to save the 'next' here, the iterator user\n             * may delete the entry we are returning. */\n            iter->nextEntry = iter->entry->next;\n            return iter->entry;\n        }\n    }\n    return NULL;\n}\n\nvoid dictReleaseIterator(dictIterator *iter)\n{\n    if (!(iter->index == -1 && iter->table == 0)) {\n        if (iter->safe)\n            dictResumeRehashing(iter->d);\n        else\n            assert(iter->fingerprint == dictFingerprint(iter->d));\n    }\n    zfree(iter);\n}\n\n/* Function to reverse bits. Algorithm from:\n * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */\nstatic unsigned long rev(unsigned long v) {\n    unsigned long s = CHAR_BIT * sizeof(v); // bit size; must be power of 2\n    unsigned long mask = ~0UL;\n    while ((s >>= 1) > 0) {\n        mask ^= (mask << s);\n        v = ((v >> s) & mask) | ((v << s) & ~mask);\n    }\n    return v;\n}\n\n/* dictScan() is used to iterate over the elements of a dictionary.\n *\n * Iterating works the following way:\n *\n * 1) Initially you call the function using a cursor (v) value of 0.\n * 2) The function performs one step of the iteration, and returns the\n *    new cursor value you must use in the next call.\n * 3) When the returned cursor is 0, the iteration is complete.\n *\n * The function guarantees all elements present in the\n * dictionary get returned between the start and end of the iteration.\n * However it is possible some elements get returned multiple times.\n *\n * For every element returned, the callback argument 'fn' is\n * called with 'privdata' as first argument and the dictionary entry\n * 'de' as second argument.\n *\n * HOW IT WORKS.\n *\n * The iteration algorithm was designed by Pieter Noordhuis.\n * The main idea is to increment a cursor starting from the higher order\n * bits. That is, instead of incrementing the cursor normally, the bits\n * of the cursor are reversed, then the cursor is incremented, and finally\n * the bits are reversed again.\n *\n * This strategy is needed because the hash table may be resized between\n * iteration calls.\n *\n * dict.c hash tables are always power of two in size, and they\n * use chaining, so the position of an element in a given table is given\n * by computing the bitwise AND between Hash(key) and SIZE-1\n * (where SIZE-1 is always the mask that is equivalent to taking the rest\n *  of the division between the Hash of the key and SIZE).\n *\n * For example if the current hash table size is 16, the mask is\n * (in binary) 1111. The position of a key in the hash table will always be\n * the last four bits of the hash output, and so forth.\n *\n * WHAT HAPPENS IF THE TABLE CHANGES IN SIZE?\n *\n * If the hash table grows, elements can go anywhere in one multiple of\n * the old bucket: for example let's say we already iterated with\n * a 4 bit cursor 1100 (the mask is 1111 because hash table size = 16).\n *\n * If the hash table will be resized to 64 elements, then the new mask will\n * be 111111. The new buckets you obtain by substituting in ??1100\n * with either 0 or 1 can be targeted only by keys we already visited\n * when scanning the bucket 1100 in the smaller hash table.\n *\n * By iterating the higher bits first, because of the inverted counter, the\n * cursor does not need to restart if the table size gets bigger. It will\n * continue iterating using cursors without '1100' at the end, and also\n * without any other combination of the final 4 bits already explored.\n *\n * Similarly when the table size shrinks over time, for example going from\n * 16 to 8, if a combination of the lower three bits (the mask for size 8\n * is 111) were already completely explored, it would not be visited again\n * because we are sure we tried, for example, both 0111 and 1111 (all the\n * variations of the higher bit) so we don't need to test it again.\n *\n * WAIT... YOU HAVE *TWO* TABLES DURING REHASHING!\n *\n * Yes, this is true, but we always iterate the smaller table first, then\n * we test all the expansions of the current cursor into the larger\n * table. For example if the current cursor is 101 and we also have a\n * larger table of size 16, we also test (0)101 and (1)101 inside the larger\n * table. This reduces the problem back to having only one table, where\n * the larger one, if it exists, is just an expansion of the smaller one.\n *\n * LIMITATIONS\n *\n * This iterator is completely stateless, and this is a huge advantage,\n * including no additional memory used.\n *\n * The disadvantages resulting from this design are:\n *\n * 1) It is possible we return elements more than once. However this is usually\n *    easy to deal with in the application level.\n * 2) The iterator must return multiple elements per call, as it needs to always\n *    return all the keys chained in a given bucket, and all the expansions, so\n *    we are sure we don't miss keys moving during rehashing.\n * 3) The reverse cursor is somewhat hard to understand at first, but this\n *    comment is supposed to help.\n */\nunsigned long dictScan(dict *d,\n                       unsigned long v,\n                       dictScanFunction *fn,\n                       dictScanBucketFunction* bucketfn,\n                       void *privdata)\n{\n    int htidx0, htidx1;\n    const dictEntry *de, *next;\n    unsigned long m0, m1;\n\n    if (dictSize(d) == 0) return 0;\n\n    /* This is needed in case the scan callback tries to do dictFind or alike. */\n    dictPauseRehashing(d);\n\n    if (!dictIsRehashing(d)) {\n        htidx0 = 0;\n        m0 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx0]);\n\n        /* Emit entries at cursor */\n        if (bucketfn) bucketfn(d, &d->ht_table[htidx0][v & m0]);\n        de = d->ht_table[htidx0][v & m0];\n        while (de) {\n            next = de->next;\n            fn(privdata, de);\n            de = next;\n        }\n\n        /* Set unmasked bits so incrementing the reversed cursor\n         * operates on the masked bits */\n        v |= ~m0;\n\n        /* Increment the reverse cursor */\n        v = rev(v);\n        v++;\n        v = rev(v);\n\n    } else {\n        htidx0 = 0;\n        htidx1 = 1;\n\n        /* Make sure t0 is the smaller and t1 is the bigger table */\n        if (DICTHT_SIZE(d->ht_size_exp[htidx0]) > DICTHT_SIZE(d->ht_size_exp[htidx1])) {\n            htidx0 = 1;\n            htidx1 = 0;\n        }\n\n        m0 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx0]);\n        m1 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx1]);\n\n        /* Emit entries at cursor */\n        if (bucketfn) bucketfn(d, &d->ht_table[htidx0][v & m0]);\n        de = d->ht_table[htidx0][v & m0];\n        while (de) {\n            next = de->next;\n            fn(privdata, de);\n            de = next;\n        }\n\n        /* Iterate over indices in larger table that are the expansion\n         * of the index pointed to by the cursor in the smaller table */\n        do {\n            /* Emit entries at cursor */\n            if (bucketfn) bucketfn(d, &d->ht_table[htidx1][v & m1]);\n            de = d->ht_table[htidx1][v & m1];\n            while (de) {\n                next = de->next;\n                fn(privdata, de);\n                de = next;\n            }\n\n            /* Increment the reverse cursor not covered by the smaller mask.*/\n            v |= ~m1;\n            v = rev(v);\n            v++;\n            v = rev(v);\n\n            /* Continue while bits covered by mask difference is non-zero */\n        } while (v & (m0 ^ m1));\n    }\n\n    dictResumeRehashing(d);\n\n    return v;\n}\n\n/* ------------------------- private functions ------------------------------ */\n\n/* Because we may need to allocate huge memory chunk at once when dict\n * expands, we will check this allocation is allowed or not if the dict\n * type has expandAllowed member function. */\nstatic int dictTypeExpandAllowed(dict *d) {\n    if (d->type->expandAllowed == NULL) return 1;\n    return d->type->expandAllowed(\n                    DICTHT_SIZE(_dictNextExp(d->ht_used[0] + 1)) * sizeof(dictEntry*),\n                    (double)d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]));\n}\n\n/* Expand the hash table if needed */\nstatic int _dictExpandIfNeeded(dict *d)\n{\n    /* Incremental rehashing already in progress. Return. */\n    if (dictIsRehashing(d)) return DICT_OK;\n\n    /* If the hash table is empty expand it to the initial size. */\n    if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);\n\n    /* If we reached the 1:1 ratio, and we are allowed to resize the hash\n     * table (global setting) or we should avoid it but the ratio between\n     * elements/buckets is over the \"safe\" threshold, we resize doubling\n     * the number of buckets. */\n    if (d->ht_used[0] >= DICTHT_SIZE(d->ht_size_exp[0]) &&\n        (dict_can_resize ||\n         d->ht_used[0]/ DICTHT_SIZE(d->ht_size_exp[0]) > dict_force_resize_ratio) &&\n        dictTypeExpandAllowed(d))\n    {\n        return dictExpand(d, d->ht_used[0] + 1);\n    }\n    return DICT_OK;\n}\n\n/* TODO: clz optimization */\n/* Our hash table capability is a power of two */\nstatic signed char _dictNextExp(unsigned long size)\n{\n    unsigned char e = DICT_HT_INITIAL_EXP;\n\n    if (size >= LONG_MAX) return (8*sizeof(long)-1);\n    while(1) {\n        if (((unsigned long)1<<e) >= size)\n            return e;\n        e++;\n    }\n}\n\n/* Returns the index of a free slot that can be populated with\n * a hash entry for the given 'key'.\n * If the key already exists, -1 is returned\n * and the optional output parameter may be filled.\n *\n * Note that if we are in the process of rehashing the hash table, the\n * index is always returned in the context of the second (new) hash table. */\nstatic long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)\n{\n    unsigned long idx, table;\n    dictEntry *he;\n    if (existing) *existing = NULL;\n\n    /* Expand the hash table if needed */\n    if (_dictExpandIfNeeded(d) == DICT_ERR)\n        return -1;\n    for (table = 0; table <= 1; table++) {\n        idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);\n        /* Search if this slot does not already contain the given key */\n        he = d->ht_table[table][idx];\n        while(he) {\n            if (key==he->key || dictCompareKeys(d, key, he->key)) {\n                if (existing) *existing = he;\n                return -1;\n            }\n            he = he->next;\n        }\n        if (!dictIsRehashing(d)) break;\n    }\n    return idx;\n}\n\nvoid dictEmpty(dict *d, void(callback)(dict*)) {\n    _dictClear(d,0,callback);\n    _dictClear(d,1,callback);\n    d->rehashidx = -1;\n    d->pauserehash = 0;\n}\n\nvoid dictEnableResize(void) {\n    dict_can_resize = 1;\n}\n\nvoid dictDisableResize(void) {\n    dict_can_resize = 0;\n}\n\nuint64_t dictGetHash(dict *d, const void *key) {\n    return dictHashKey(d, key);\n}\n\n/* Finds the dictEntry reference by using pointer and pre-calculated hash.\n * oldkey is a dead pointer and should not be accessed.\n * the hash value should be provided using dictGetHash.\n * no string / key comparison is performed.\n * return value is the reference to the dictEntry if found, or NULL if not found. */\ndictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) {\n    dictEntry *he, **heref;\n    unsigned long idx, table;\n\n    if (dictSize(d) == 0) return NULL; /* dict is empty */\n    for (table = 0; table <= 1; table++) {\n        idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);\n        heref = &d->ht_table[table][idx];\n        he = *heref;\n        while(he) {\n            if (oldptr==he->key)\n                return heref;\n            heref = &he->next;\n            he = *heref;\n        }\n        if (!dictIsRehashing(d)) return NULL;\n    }\n    return NULL;\n}\n\n/* ------------------------------- Debugging ---------------------------------*/\n\n#define DICT_STATS_VECTLEN 50\nsize_t _dictGetStatsHt(char *buf, size_t bufsize, dict *d, int htidx) {\n    unsigned long i, slots = 0, chainlen, maxchainlen = 0;\n    unsigned long totchainlen = 0;\n    unsigned long clvector[DICT_STATS_VECTLEN];\n    size_t l = 0;\n\n    if (d->ht_used[htidx] == 0) {\n        return snprintf(buf,bufsize,\n            \"No stats available for empty dictionaries\\n\");\n    }\n\n    /* Compute stats. */\n    for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;\n    for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]); i++) {\n        dictEntry *he;\n\n        if (d->ht_table[htidx][i] == NULL) {\n            clvector[0]++;\n            continue;\n        }\n        slots++;\n        /* For each hash entry on this slot... */\n        chainlen = 0;\n        he = d->ht_table[htidx][i];\n        while(he) {\n            chainlen++;\n            he = he->next;\n        }\n        clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;\n        if (chainlen > maxchainlen) maxchainlen = chainlen;\n        totchainlen += chainlen;\n    }\n\n    /* Generate human readable stats. */\n    l += snprintf(buf+l,bufsize-l,\n        \"Hash table %d stats (%s):\\n\"\n        \" table size: %lu\\n\"\n        \" number of elements: %lu\\n\"\n        \" different slots: %lu\\n\"\n        \" max chain length: %lu\\n\"\n        \" avg chain length (counted): %.02f\\n\"\n        \" avg chain length (computed): %.02f\\n\"\n        \" Chain length distribution:\\n\",\n        htidx, (htidx == 0) ? \"main hash table\" : \"rehashing target\",\n        DICTHT_SIZE(d->ht_size_exp[htidx]), d->ht_used[htidx], slots, maxchainlen,\n        (float)totchainlen/slots, (float)d->ht_used[htidx]/slots);\n\n    for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {\n        if (clvector[i] == 0) continue;\n        if (l >= bufsize) break;\n        l += snprintf(buf+l,bufsize-l,\n            \"   %ld: %ld (%.02f%%)\\n\",\n            i, clvector[i], ((float)clvector[i]/DICTHT_SIZE(d->ht_size_exp[htidx]))*100);\n    }\n\n    /* Unlike snprintf(), return the number of characters actually written. */\n    if (bufsize) buf[bufsize-1] = '\\0';\n    return strlen(buf);\n}\n\nvoid dictGetStats(char *buf, size_t bufsize, dict *d) {\n    size_t l;\n    char *orig_buf = buf;\n    size_t orig_bufsize = bufsize;\n\n    l = _dictGetStatsHt(buf,bufsize,d,0);\n    buf += l;\n    bufsize -= l;\n    if (dictIsRehashing(d) && bufsize > 0) {\n        _dictGetStatsHt(buf,bufsize,d,1);\n    }\n    /* Make sure there is a NULL term at the end. */\n    if (orig_bufsize) orig_buf[orig_bufsize-1] = '\\0';\n}\n\n/* ------------------------------- Benchmark ---------------------------------*/\n\n#ifdef REDIS_TEST\n#include \"testhelp.h\"\n\n#define UNUSED(V) ((void) V)\n\nuint64_t hashCallback(const void *key) {\n    return dictGenHashFunction((unsigned char*)key, strlen((char*)key));\n}\n\nint compareCallback(dict *d, const void *key1, const void *key2) {\n    int l1,l2;\n    UNUSED(d);\n\n    l1 = strlen((char*)key1);\n    l2 = strlen((char*)key2);\n    if (l1 != l2) return 0;\n    return memcmp(key1, key2, l1) == 0;\n}\n\nvoid freeCallback(dict *d, void *val) {\n    UNUSED(d);\n\n    zfree(val);\n}\n\nchar *stringFromLongLong(long long value) {\n    char buf[32];\n    int len;\n    char *s;\n\n    len = sprintf(buf,\"%lld\",value);\n    s = zmalloc(len+1);\n    memcpy(s, buf, len);\n    s[len] = '\\0';\n    return s;\n}\n\ndictType BenchmarkDictType = {\n    hashCallback,\n    NULL,\n    NULL,\n    compareCallback,\n    freeCallback,\n    NULL,\n    NULL\n};\n\n#define start_benchmark() start = timeInMilliseconds()\n#define end_benchmark(msg) do { \\\n    elapsed = timeInMilliseconds()-start; \\\n    printf(msg \": %ld items in %lld ms\\n\", count, elapsed); \\\n} while(0)\n\n/* ./redis-server test dict [<count> | --accurate] */\nint dictTest(int argc, char **argv, int flags) {\n    long j;\n    long long start, elapsed;\n    dict *dict = dictCreate(&BenchmarkDictType);\n    long count = 0;\n    int accurate = (flags & REDIS_TEST_ACCURATE);\n\n    if (argc == 4) {\n        if (accurate) {\n            count = 5000000;\n        } else {\n            count = strtol(argv[3],NULL,10);\n        }\n    } else {\n        count = 5000;\n    }\n\n    start_benchmark();\n    for (j = 0; j < count; j++) {\n        int retval = dictAdd(dict,stringFromLongLong(j),(void*)j);\n        assert(retval == DICT_OK);\n    }\n    end_benchmark(\"Inserting\");\n    assert((long)dictSize(dict) == count);\n\n    /* Wait for rehashing. */\n    while (dictIsRehashing(dict)) {\n        dictRehashMilliseconds(dict,100);\n    }\n\n    start_benchmark();\n    for (j = 0; j < count; j++) {\n        char *key = stringFromLongLong(j);\n        dictEntry *de = dictFind(dict,key);\n        assert(de != NULL);\n        zfree(key);\n    }\n    end_benchmark(\"Linear access of existing elements\");\n\n    start_benchmark();\n    for (j = 0; j < count; j++) {\n        char *key = stringFromLongLong(j);\n        dictEntry *de = dictFind(dict,key);\n        assert(de != NULL);\n        zfree(key);\n    }\n    end_benchmark(\"Linear access of existing elements (2nd round)\");\n\n    start_benchmark();\n    for (j = 0; j < count; j++) {\n        char *key = stringFromLongLong(rand() % count);\n        dictEntry *de = dictFind(dict,key);\n        assert(de != NULL);\n        zfree(key);\n    }\n    end_benchmark(\"Random access of existing elements\");\n\n    start_benchmark();\n    for (j = 0; j < count; j++) {\n        dictEntry *de = dictGetRandomKey(dict);\n        assert(de != NULL);\n    }\n    end_benchmark(\"Accessing random keys\");\n\n    start_benchmark();\n    for (j = 0; j < count; j++) {\n        char *key = stringFromLongLong(rand() % count);\n        key[0] = 'X';\n        dictEntry *de = dictFind(dict,key);\n        assert(de == NULL);\n        zfree(key);\n    }\n    end_benchmark(\"Accessing missing\");\n\n    start_benchmark();\n    for (j = 0; j < count; j++) {\n        char *key = stringFromLongLong(j);\n        int retval = dictDelete(dict,key);\n        assert(retval == DICT_OK);\n        key[0] += 17; /* Change first number to letter. */\n        retval = dictAdd(dict,key,(void*)j);\n        assert(retval == DICT_OK);\n    }\n    end_benchmark(\"Removing and adding\");\n    dictRelease(dict);\n    return 0;\n}\n#endif\n"
  },
  {
    "path": "src/redis/dict.h",
    "content": "/* Hash Tables Implementation.\n *\n * This file implements in-memory hash tables with insert/del/replace/find/\n * get-random-element operations. Hash tables will auto-resize if needed\n * tables of power of two in size are used, collisions are handled by\n * chaining. See the source code for more information... :)\n *\n * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef __DICT_H\n#define __DICT_H\n\n#include <limits.h>\n#include <stdint.h>\n#include <stdlib.h>\n\n#define DICT_OK 0\n#define DICT_ERR 1\n\n/* Unused arguments generate annoying warnings... */\n#define DICT_NOTUSED(V) ((void) V)\n\ntypedef struct dictEntry {\n    void *key;\n    union {\n        void *val;\n        uint64_t u64;\n        int64_t s64;\n        double d;\n    } v;\n    struct dictEntry *next;     /* Next entry in the same hash bucket. */\n} dictEntry;\n\ntypedef struct dict dict;\n\ntypedef struct dictType {\n    uint64_t (*hashFunction)(const void *key);\n    void *(*keyDup)(dict *d, const void *key);\n    void *(*valDup)(dict *d, const void *obj);\n    int (*keyCompare)(dict *d, const void *key1, const void *key2);\n    void (*keyDestructor)(dict *d, void *key);\n    void (*valDestructor)(dict *d, void *obj);\n    int (*expandAllowed)(size_t moreMem, double usedRatio);\n} dictType;\n\n#define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1<<(exp))\n#define DICTHT_SIZE_MASK(exp) ((exp) == -1 ? 0 : (DICTHT_SIZE(exp))-1)\n\nstruct dict {\n    dictType *type;\n\n    dictEntry **ht_table[2];\n    unsigned long ht_used[2];\n\n    long rehashidx; /* rehashing not in progress if rehashidx == -1 */\n\n    /* Keep small vars at end for optimal (minimal) struct padding */\n    int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */\n    signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */\n};\n\n/* If safe is set to 1 this is a safe iterator, that means, you can call\n * dictAdd, dictFind, and other functions against the dictionary even while\n * iterating. Otherwise it is a non safe iterator, and only dictNext()\n * should be called while iterating. */\ntypedef struct dictIterator {\n    dict *d;\n    long index;\n    int table, safe;\n    dictEntry *entry, *nextEntry;\n    /* unsafe iterator fingerprint for misuse detection. */\n    unsigned long long fingerprint;\n} dictIterator;\n\ntypedef void (dictScanFunction)(void *privdata, const dictEntry *de);\ntypedef void (dictScanBucketFunction)(dict *d, dictEntry **bucketref);\n\n/* This is the initial size of every hash table */\n#define DICT_HT_INITIAL_EXP      2\n#define DICT_HT_INITIAL_SIZE     (1<<(DICT_HT_INITIAL_EXP))\n\n/* ------------------------------- Macros ------------------------------------*/\n#define dictFreeVal(d, entry) \\\n    if ((d)->type->valDestructor) \\\n        (d)->type->valDestructor((d), (entry)->v.val)\n\n#define dictSetVal(d, entry, _val_) do { \\\n    if ((d)->type->valDup) \\\n        (entry)->v.val = (d)->type->valDup((d), _val_); \\\n    else \\\n        (entry)->v.val = (_val_); \\\n} while(0)\n\n#define dictSetSignedIntegerVal(entry, _val_) \\\n    do { (entry)->v.s64 = _val_; } while(0)\n\n#define dictSetUnsignedIntegerVal(entry, _val_) \\\n    do { (entry)->v.u64 = _val_; } while(0)\n\n#define dictSetDoubleVal(entry, _val_) \\\n    do { (entry)->v.d = _val_; } while(0)\n\n#define dictFreeKey(d, entry) \\\n    if ((d)->type->keyDestructor) \\\n        (d)->type->keyDestructor((d), (entry)->key)\n\n#define dictSetKey(d, entry, _key_) do { \\\n    if ((d)->type->keyDup) \\\n        (entry)->key = (d)->type->keyDup((d), _key_); \\\n    else \\\n        (entry)->key = (_key_); \\\n} while(0)\n\n#define dictCompareKeys(d, key1, key2) \\\n    (((d)->type->keyCompare) ? \\\n        (d)->type->keyCompare((d), key1, key2) : \\\n        (key1) == (key2))\n\n#define dictHashKey(d, key) (d)->type->hashFunction(key)\n#define dictGetKey(he) ((he)->key)\n#define dictGetVal(he) ((he)->v.val)\n#define dictGetSignedIntegerVal(he) ((he)->v.s64)\n#define dictGetUnsignedIntegerVal(he) ((he)->v.u64)\n#define dictGetDoubleVal(he) ((he)->v.d)\n#define dictSlots(d) (DICTHT_SIZE((d)->ht_size_exp[0])+DICTHT_SIZE((d)->ht_size_exp[1]))\n#define dictSize(d) ((d)->ht_used[0]+(d)->ht_used[1])\n#define dictIsRehashing(d) ((d)->rehashidx != -1)\n#define dictPauseRehashing(d) (d)->pauserehash++\n#define dictResumeRehashing(d) (d)->pauserehash--\n\n/* If our unsigned long type can store a 64 bit number, use a 64 bit PRNG. */\n#if ULONG_MAX >= 0xffffffffffffffff\n#define randomULong() ((unsigned long) genrand64_int64())\n#else\n#define randomULong() random()\n#endif\n\n/* API */\ndict *dictCreate(dictType *type);\nint dictExpand(dict *d, unsigned long size);\nint dictTryExpand(dict *d, unsigned long size);\nint dictAdd(dict *d, void *key, void *val);\ndictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);\ndictEntry *dictAddOrFind(dict *d, void *key);\nint dictReplace(dict *d, void *key, void *val);\nint dictDelete(dict *d, const void *key);\ndictEntry *dictUnlink(dict *d, const void *key);\nvoid dictFreeUnlinkedEntry(dict *d, dictEntry *he);\nvoid dictRelease(dict *d);\ndictEntry * dictFind(dict *d, const void *key);\nvoid *dictFetchValue(dict *d, const void *key);\nint dictResize(dict *d);\ndictIterator *dictGetIterator(dict *d);\ndictIterator *dictGetSafeIterator(dict *d);\ndictEntry *dictNext(dictIterator *iter);\nvoid dictReleaseIterator(dictIterator *iter);\ndictEntry *dictGetRandomKey(dict *d);\ndictEntry *dictGetFairRandomKey(dict *d);\nunsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);\nvoid dictGetStats(char *buf, size_t bufsize, dict *d);\nuint64_t dictGenHashFunction(const void *key, size_t len);\nuint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len);\nvoid dictEmpty(dict *d, void(callback)(dict*));\nvoid dictEnableResize(void);\nvoid dictDisableResize(void);\nint dictRehash(dict *d, int n);\nint dictRehashMilliseconds(dict *d, int ms);\nvoid dictSetHashFunctionSeed(uint8_t *seed);\nuint8_t *dictGetHashFunctionSeed(void);\nunsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, dictScanBucketFunction *bucketfn, void *privdata);\nuint64_t dictGetHash(dict *d, const void *key);\ndictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);\n\n#endif /* __DICT_H */\n"
  },
  {
    "path": "src/redis/endianconv.h",
    "content": "/* See endianconv.c top comments for more information\n *\n * ----------------------------------------------------------------------------\n *\n * Copyright (c) 2011-2012, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef __ENDIANCONV_H\n#define __ENDIANCONV_H\n\n#include \"config.h\"\n#include <stdint.h>\n\nvoid memrev16(void *p);\nvoid memrev32(void *p);\nvoid memrev64(void *p);\nuint16_t intrev16(uint16_t v);\nuint32_t intrev32(uint32_t v);\nuint64_t intrev64(uint64_t v);\n\n/* variants of the function doing the actual conversion only if the target\n * host is big endian */\n#if (BYTE_ORDER == LITTLE_ENDIAN)\n#define memrev16ifbe(p) ((void)(0))\n#define memrev32ifbe(p) ((void)(0))\n#define memrev64ifbe(p) ((void)(0))\n#define intrev16ifbe(v) (v)\n#define intrev32ifbe(v) (v)\n#define intrev64ifbe(v) (v)\n#else\n#define memrev16ifbe(p) memrev16(p)\n#define memrev32ifbe(p) memrev32(p)\n#define memrev64ifbe(p) memrev64(p)\n#define intrev16ifbe(v) intrev16(v)\n#define intrev32ifbe(v) intrev32(v)\n#define intrev64ifbe(v) intrev64(v)\n#endif\n\n/* The functions htonu64() and ntohu64() convert the specified value to\n * network byte ordering and back. In big endian systems they are no-ops. */\n#if (BYTE_ORDER == BIG_ENDIAN)\n#define htonu64(v) (v)\n#define ntohu64(v) (v)\n#else\n#define htonu64(v) intrev64(v)\n#define ntohu64(v) intrev64(v)\n#endif\n\n#ifdef REDIS_TEST\nint endianconvTest(int argc, char *argv[], int flags);\n#endif\n\n#endif\n"
  },
  {
    "path": "src/redis/geo.c",
    "content": "/*\n * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.\n * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <string.h>\n\n#include \"geo.h\"\n#include \"geohash_helper.h\"\n#include \"listpack.h\"\n#include \"util.h\"\n#include \"zmalloc.h\"\n#include \"sds.h\"\n\n\n// D - noop\n#define D(...) while (0)\n\n/* ====================================================================\n * This file implements the following commands:\n *\n *   - geoadd - add coordinates for value to geoset\n *   - georadius - search radius by coordinates in geoset\n *   - georadiusbymember - search radius based on geoset member position\n * ==================================================================== */\n\n/* ====================================================================\n * geoArray implementation\n * ==================================================================== */\n\n/* Create a new array of geoPoints. */\ngeoArray *geoArrayCreate(void) {\n    geoArray *ga = zmalloc(sizeof(*ga));\n    /* It gets allocated on first geoArrayAppend() call. */\n    ga->array = NULL;\n    ga->buckets = 0;\n    ga->used = 0;\n    return ga;\n}\n\n/* Add and populate with data a new entry to the geoArray. */\ngeoPoint *geoArrayAppend(geoArray *ga, double *xy, double dist,\n                         double score, char *member)\n{\n    if (ga->used == ga->buckets) {\n        ga->buckets = (ga->buckets == 0) ? 8 : ga->buckets*2;\n        ga->array = zrealloc(ga->array,sizeof(geoPoint)*ga->buckets);\n    }\n    geoPoint *gp = ga->array+ga->used;\n    gp->longitude = xy[0];\n    gp->latitude = xy[1];\n    gp->dist = dist;\n    gp->member = member;\n    gp->score = score;\n    ga->used++;\n    return gp;\n}\n\n/* Destroy a geoArray created with geoArrayCreate(). */\nvoid geoArrayFree(geoArray *ga) {\n    size_t i;\n    for (i = 0; i < ga->used; i++) sdsfree(ga->array[i].member);\n    zfree(ga->array);\n    zfree(ga);\n}\n\n/* ====================================================================\n * Helpers\n * ==================================================================== */\nint decodeGeohash(double bits, double *xy) {\n    GeoHashBits hash = { .bits = (uint64_t)bits, .step = GEO_STEP_MAX };\n    return geohashDecodeToLongLatWGS84(hash, xy);\n}\n\n\n/* Helper function for geoGetPointsInRange(): given a sorted set score\n * representing a point, and a GeoShape, checks if the point is within the search area.\n *\n * shape: the rectangle\n * score: the encoded version of lat,long\n * xy: output variable, the decoded lat,long\n * distance: output variable, the distance between the center of the shape and the point\n *\n * Return values:\n *\n * The return value is C_OK if the point is within search area, or C_ERR if it is outside.\n * \"*xy\" is populated with the decoded lat,long.\n * \"*distance\" is populated with the distance between the center of the shape and the point.\n */\nint geoWithinShape(GeoShape *shape, double score, double *xy, double *distance) {\n    if (!decodeGeohash(score,xy)) return C_ERR; /* Can't decode. */\n    /* Note that geohashGetDistanceIfInRadiusWGS84() takes arguments in\n     * reverse order: longitude first, latitude later. */\n    if (shape->type == CIRCULAR_TYPE) {\n        if (!geohashGetDistanceIfInRadiusWGS84(shape->xy[0], shape->xy[1], xy[0], xy[1],\n                                               shape->t.radius*shape->conversion, distance))\n            return C_ERR;\n    } else if (shape->type == RECTANGLE_TYPE) {\n        if (!geohashGetDistanceIfInRectangle(shape->t.r.width * shape->conversion,\n                                             shape->t.r.height * shape->conversion,\n                                             shape->xy[0], shape->xy[1], xy[0], xy[1], distance))\n            return C_ERR;\n    }\n    return C_OK;\n}\n\n/* Compute the sorted set scores min (inclusive), max (exclusive) we should\n * query in order to retrieve all the elements inside the specified area\n * 'hash'. The two scores are returned by reference in *min and *max. */\nvoid scoresOfGeoHashBox(GeoHashBits hash, GeoHashFix52Bits *min, GeoHashFix52Bits *max) {\n    /* We want to compute the sorted set scores that will include all the\n     * elements inside the specified Geohash 'hash', which has as many\n     * bits as specified by hash.step * 2.\n     *\n     * So if step is, for example, 3, and the hash value in binary\n     * is 101010, since our score is 52 bits we want every element which\n     * is in binary: 101010?????????????????????????????????????????????\n     * Where ? can be 0 or 1.\n     *\n     * To get the min score we just use the initial hash value left\n     * shifted enough to get the 52 bit value. Later we increment the\n     * 6 bit prefix (see the hash.bits++ statement), and get the new\n     * prefix: 101011, which we align again to 52 bits to get the maximum\n     * value (which is excluded from the search). So we get everything\n     * between the two following scores (represented in binary):\n     *\n     * 1010100000000000000000000000000000000000000000000000 (included)\n     * and\n     * 1010110000000000000000000000000000000000000000000000 (excluded).\n     */\n    *min = geohashAlign52Bits(hash);\n    hash.bits++;\n    *max = geohashAlign52Bits(hash);\n}"
  },
  {
    "path": "src/redis/geo.h",
    "content": "#ifndef __GEO_H__\n#define __GEO_H__\n\n#include <stddef.h> /* for size_t */\n#include \"geohash_helper.h\"\n\n/* Structures used inside geo.c in order to represent points and array of\n * points on the earth. */\ntypedef struct geoPoint {\n    double longitude;\n    double latitude;\n    double dist;\n    double score;\n    char *member;\n} geoPoint;\n\ntypedef struct geoArray {\n    struct geoPoint *array;\n    size_t buckets;\n    size_t used;\n} geoArray;\n\nint geoWithinShape(GeoShape *shape, double score, double *xy, double *distance);\nvoid scoresOfGeoHashBox(GeoHashBits hash, GeoHashFix52Bits *min, GeoHashFix52Bits *max);\n\n#endif\n"
  },
  {
    "path": "src/redis/geohash.c",
    "content": "/*\n * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>\n * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.\n * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *  * Redistributions of source code must retain the above copyright notice,\n *    this list of conditions and the following disclaimer.\n *  * Redistributions in binary form must reproduce the above copyright\n *    notice, this list of conditions and the following disclaimer in the\n *    documentation and/or other materials provided with the distribution.\n *  * Neither the name of Redis nor the names of its contributors may be used\n *    to endorse or promote products derived from this software without\n *    specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS\n * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF\n * THE POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <string.h>\n\n#include \"geohash.h\"\n\n/**\n * Hashing works like this:\n * Divide the world into 4 buckets.  Label each one as such:\n *  -----------------\n *  |       |       |\n *  |       |       |\n *  | 0,1   | 1,1   |\n *  -----------------\n *  |       |       |\n *  |       |       |\n *  | 0,0   | 1,0   |\n *  -----------------\n */\n\n/* Interleave lower bits of x and y, so the bits of x\n * are in the even positions and bits from y in the odd;\n * x and y must initially be less than 2**32 (4294967296).\n * From:  https://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN\n */\nstatic inline uint64_t interleave64(uint32_t xlo, uint32_t ylo) {\n    static const uint64_t B[] = {0x5555555555555555ULL, 0x3333333333333333ULL,\n                                 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,\n                                 0x0000FFFF0000FFFFULL};\n    static const unsigned int S[] = {1, 2, 4, 8, 16};\n\n    uint64_t x = xlo;\n    uint64_t y = ylo;\n\n    x = (x | (x << S[4])) & B[4];\n    y = (y | (y << S[4])) & B[4];\n\n    x = (x | (x << S[3])) & B[3];\n    y = (y | (y << S[3])) & B[3];\n\n    x = (x | (x << S[2])) & B[2];\n    y = (y | (y << S[2])) & B[2];\n\n    x = (x | (x << S[1])) & B[1];\n    y = (y | (y << S[1])) & B[1];\n\n    x = (x | (x << S[0])) & B[0];\n    y = (y | (y << S[0])) & B[0];\n\n    return x | (y << 1);\n}\n\n/* reverse the interleave process\n * derived from http://stackoverflow.com/questions/4909263\n */\nstatic inline uint64_t deinterleave64(uint64_t interleaved) {\n    static const uint64_t B[] = {0x5555555555555555ULL, 0x3333333333333333ULL,\n                                 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,\n                                 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};\n    static const unsigned int S[] = {0, 1, 2, 4, 8, 16};\n\n    uint64_t x = interleaved;\n    uint64_t y = interleaved >> 1;\n\n    x = (x | (x >> S[0])) & B[0];\n    y = (y | (y >> S[0])) & B[0];\n\n    x = (x | (x >> S[1])) & B[1];\n    y = (y | (y >> S[1])) & B[1];\n\n    x = (x | (x >> S[2])) & B[2];\n    y = (y | (y >> S[2])) & B[2];\n\n    x = (x | (x >> S[3])) & B[3];\n    y = (y | (y >> S[3])) & B[3];\n\n    x = (x | (x >> S[4])) & B[4];\n    y = (y | (y >> S[4])) & B[4];\n\n    x = (x | (x >> S[5])) & B[5];\n    y = (y | (y >> S[5])) & B[5];\n\n    return x | (y << 32);\n}\n\nvoid geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range) {\n    /* These are constraints from EPSG:900913 / EPSG:3785 / OSGEO:41001 */\n    /* We can't geocode at the north/south pole. */\n    long_range->max = GEO_LONG_MAX;\n    long_range->min = GEO_LONG_MIN;\n    lat_range->max = GEO_LAT_MAX;\n    lat_range->min = GEO_LAT_MIN;\n}\n\nint geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,\n                  double longitude, double latitude, uint8_t step,\n                  GeoHashBits *hash) {\n    /* Check basic arguments sanity. */\n    if (hash == NULL || step > 32 || step == 0 ||\n        RANGEPISZERO(lat_range) || RANGEPISZERO(long_range)) return 0;\n\n    /* Return an error when trying to index outside the supported\n     * constraints. */\n    if (longitude > GEO_LONG_MAX || longitude < GEO_LONG_MIN ||\n        latitude > GEO_LAT_MAX || latitude < GEO_LAT_MIN) return 0;\n\n    hash->bits = 0;\n    hash->step = step;\n\n    if (latitude < lat_range->min || latitude > lat_range->max ||\n        longitude < long_range->min || longitude > long_range->max) {\n        return 0;\n    }\n\n    double lat_offset =\n        (latitude - lat_range->min) / (lat_range->max - lat_range->min);\n    double long_offset =\n        (longitude - long_range->min) / (long_range->max - long_range->min);\n\n    /* convert to fixed point based on the step size */\n    lat_offset *= (1ULL << step);\n    long_offset *= (1ULL << step);\n    hash->bits = interleave64(lat_offset, long_offset);\n    return 1;\n}\n\nint geohashEncodeType(double longitude, double latitude, uint8_t step, GeoHashBits *hash) {\n    GeoHashRange r[2] = {{0}};\n    geohashGetCoordRange(&r[0], &r[1]);\n    return geohashEncode(&r[0], &r[1], longitude, latitude, step, hash);\n}\n\nint geohashEncodeWGS84(double longitude, double latitude, uint8_t step,\n                       GeoHashBits *hash) {\n    return geohashEncodeType(longitude, latitude, step, hash);\n}\n\nint geohashDecode(const GeoHashRange long_range, const GeoHashRange lat_range,\n                   const GeoHashBits hash, GeoHashArea *area) {\n    if (HASHISZERO(hash) || NULL == area || RANGEISZERO(lat_range) ||\n        RANGEISZERO(long_range)) {\n        return 0;\n    }\n\n    area->hash = hash;\n    uint8_t step = hash.step;\n    uint64_t hash_sep = deinterleave64(hash.bits); /* hash = [LAT][LONG] */\n\n    double lat_scale = lat_range.max - lat_range.min;\n    double long_scale = long_range.max - long_range.min;\n\n    uint32_t ilato = hash_sep;       /* get lat part of deinterleaved hash */\n    uint32_t ilono = hash_sep >> 32; /* shift over to get long part of hash */\n\n    /* divide by 2**step.\n     * Then, for 0-1 coordinate, multiply times scale and add\n       to the min to get the absolute coordinate. */\n    area->latitude.min =\n        lat_range.min + (ilato * 1.0 / (1ull << step)) * lat_scale;\n    area->latitude.max =\n        lat_range.min + ((ilato + 1) * 1.0 / (1ull << step)) * lat_scale;\n    area->longitude.min =\n        long_range.min + (ilono * 1.0 / (1ull << step)) * long_scale;\n    area->longitude.max =\n        long_range.min + ((ilono + 1) * 1.0 / (1ull << step)) * long_scale;\n\n    return 1;\n}\n\nint geohashDecodeType(const GeoHashBits hash, GeoHashArea *area) {\n    GeoHashRange r[2] = {{0}};\n    geohashGetCoordRange(&r[0], &r[1]);\n    return geohashDecode(r[0], r[1], hash, area);\n}\n\nint geohashDecodeWGS84(const GeoHashBits hash, GeoHashArea *area) {\n    return geohashDecodeType(hash, area);\n}\n\nint geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy) {\n    if (!xy) return 0;\n    xy[0] = (area->longitude.min + area->longitude.max) / 2;\n    if (xy[0] > GEO_LONG_MAX) xy[0] = GEO_LONG_MAX;\n    if (xy[0] < GEO_LONG_MIN) xy[0] = GEO_LONG_MIN;\n    xy[1] = (area->latitude.min + area->latitude.max) / 2;\n    if (xy[1] > GEO_LAT_MAX) xy[1] = GEO_LAT_MAX;\n    if (xy[1] < GEO_LAT_MIN) xy[1] = GEO_LAT_MIN;\n    return 1;\n}\n\nint geohashDecodeToLongLatType(const GeoHashBits hash, double *xy) {\n    GeoHashArea area;\n    memset(&area, 0, sizeof(area));\n\n    if (!xy || !geohashDecodeType(hash, &area))\n        return 0;\n    return geohashDecodeAreaToLongLat(&area, xy);\n}\n\nint geohashDecodeToLongLatWGS84(const GeoHashBits hash, double *xy) {\n    return geohashDecodeToLongLatType(hash, xy);\n}\n\nstatic void geohash_move_x(GeoHashBits *hash, int8_t d) {\n    if (d == 0)\n        return;\n\n    uint64_t x = hash->bits & 0xaaaaaaaaaaaaaaaaULL;\n    uint64_t y = hash->bits & 0x5555555555555555ULL;\n\n    uint64_t zz = 0x5555555555555555ULL >> (64 - hash->step * 2);\n\n    if (d > 0) {\n        x = x + (zz + 1);\n    } else {\n        x = x | zz;\n        x = x - (zz + 1);\n    }\n\n    x &= (0xaaaaaaaaaaaaaaaaULL >> (64 - hash->step * 2));\n    hash->bits = (x | y);\n}\n\nstatic void geohash_move_y(GeoHashBits *hash, int8_t d) {\n    if (d == 0)\n        return;\n\n    uint64_t x = hash->bits & 0xaaaaaaaaaaaaaaaaULL;\n    uint64_t y = hash->bits & 0x5555555555555555ULL;\n\n    uint64_t zz = 0xaaaaaaaaaaaaaaaaULL >> (64 - hash->step * 2);\n    if (d > 0) {\n        y = y + (zz + 1);\n    } else {\n        y = y | zz;\n        y = y - (zz + 1);\n    }\n    y &= (0x5555555555555555ULL >> (64 - hash->step * 2));\n    hash->bits = (x | y);\n}\n\nvoid geohashNeighbors(const GeoHashBits *hash, GeoHashNeighbors *neighbors) {\n    neighbors->east = *hash;\n    neighbors->west = *hash;\n    neighbors->north = *hash;\n    neighbors->south = *hash;\n    neighbors->south_east = *hash;\n    neighbors->south_west = *hash;\n    neighbors->north_east = *hash;\n    neighbors->north_west = *hash;\n\n    geohash_move_x(&neighbors->east, 1);\n    geohash_move_y(&neighbors->east, 0);\n\n    geohash_move_x(&neighbors->west, -1);\n    geohash_move_y(&neighbors->west, 0);\n\n    geohash_move_x(&neighbors->south, 0);\n    geohash_move_y(&neighbors->south, -1);\n\n    geohash_move_x(&neighbors->north, 0);\n    geohash_move_y(&neighbors->north, 1);\n\n    geohash_move_x(&neighbors->north_west, -1);\n    geohash_move_y(&neighbors->north_west, 1);\n\n    geohash_move_x(&neighbors->north_east, 1);\n    geohash_move_y(&neighbors->north_east, 1);\n\n    geohash_move_x(&neighbors->south_east, 1);\n    geohash_move_y(&neighbors->south_east, -1);\n\n    geohash_move_x(&neighbors->south_west, -1);\n    geohash_move_y(&neighbors->south_west, -1);\n}\n"
  },
  {
    "path": "src/redis/geohash.h",
    "content": "/*\n * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>\n * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.\n * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *  * Redistributions of source code must retain the above copyright notice,\n *    this list of conditions and the following disclaimer.\n *  * Redistributions in binary form must reproduce the above copyright\n *    notice, this list of conditions and the following disclaimer in the\n *    documentation and/or other materials provided with the distribution.\n *  * Neither the name of Redis nor the names of its contributors may be used\n *    to endorse or promote products derived from this software without\n *    specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS\n * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF\n * THE POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef GEOHASH_H_\n#define GEOHASH_H_\n\n#include <stddef.h>\n#include <stdint.h>\n\n#if defined(__cplusplus)\nextern \"C\" {\n#endif\n\n#define HASHISZERO(r) (!(r).bits && !(r).step)\n#define RANGEISZERO(r) (!(r).max && !(r).min)\n#define RANGEPISZERO(r) (r == NULL || RANGEISZERO(*r))\n\n#define GEO_STEP_MAX 26 /* 26*2 = 52 bits. */\n\n/* Limits from EPSG:900913 / EPSG:3785 / OSGEO:41001 */\n#define GEO_LAT_MIN -85.05112878\n#define GEO_LAT_MAX 85.05112878\n#define GEO_LONG_MIN -180\n#define GEO_LONG_MAX 180\n\ntypedef enum {\n    GEOHASH_NORTH = 0,\n    GEOHASH_EAST,\n    GEOHASH_WEST,\n    GEOHASH_SOUTH,\n    GEOHASH_SOUTH_WEST,\n    GEOHASH_SOUTH_EAST,\n    GEOHASH_NORT_WEST,\n    GEOHASH_NORT_EAST\n} GeoDirection;\n\ntypedef struct {\n    uint64_t bits;\n    uint8_t step;\n} GeoHashBits;\n\ntypedef struct {\n    double min;\n    double max;\n} GeoHashRange;\n\ntypedef struct {\n    GeoHashBits hash;\n    GeoHashRange longitude;\n    GeoHashRange latitude;\n} GeoHashArea;\n\ntypedef struct {\n    GeoHashBits north;\n    GeoHashBits east;\n    GeoHashBits west;\n    GeoHashBits south;\n    GeoHashBits north_east;\n    GeoHashBits south_east;\n    GeoHashBits north_west;\n    GeoHashBits south_west;\n} GeoHashNeighbors;\n\n#define CIRCULAR_TYPE 1\n#define RECTANGLE_TYPE 2\ntypedef struct {\n    int type; /* search type */\n    double xy[2]; /* search center point, xy[0]: lon, xy[1]: lat */\n    double conversion; /* km: 1000 */\n    double bounds[4]; /* bounds[0]: min_lon, bounds[1]: min_lat\n                       * bounds[2]: max_lon, bounds[3]: max_lat */\n    union {\n        /* CIRCULAR_TYPE */\n        double radius;\n        /* RECTANGLE_TYPE */\n        struct {\n            double height;\n            double width;\n        } r;\n    } t;\n} GeoShape;\n\n/*\n * 0:success\n * -1:failed\n */\nvoid geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range);\nint geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,\n                  double longitude, double latitude, uint8_t step,\n                  GeoHashBits *hash);\nint geohashEncodeType(double longitude, double latitude,\n                      uint8_t step, GeoHashBits *hash);\nint geohashEncodeWGS84(double longitude, double latitude, uint8_t step,\n                       GeoHashBits *hash);\nint geohashDecode(const GeoHashRange long_range, const GeoHashRange lat_range,\n                  const GeoHashBits hash, GeoHashArea *area);\nint geohashDecodeType(const GeoHashBits hash, GeoHashArea *area);\nint geohashDecodeWGS84(const GeoHashBits hash, GeoHashArea *area);\nint geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy);\nint geohashDecodeToLongLatType(const GeoHashBits hash, double *xy);\nint geohashDecodeToLongLatWGS84(const GeoHashBits hash, double *xy);\nvoid geohashNeighbors(const GeoHashBits *hash, GeoHashNeighbors *neighbors);\n\n#if defined(__cplusplus)\n}\n#endif\n#endif /* GEOHASH_H_ */\n"
  },
  {
    "path": "src/redis/geohash_helper.c",
    "content": "/*\n * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>\n * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.\n * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *  * Redistributions of source code must retain the above copyright notice,\n *    this list of conditions and the following disclaimer.\n *  * Redistributions in binary form must reproduce the above copyright\n *    notice, this list of conditions and the following disclaimer in the\n *    documentation and/or other materials provided with the distribution.\n *  * Neither the name of Redis nor the names of its contributors may be used\n *    to endorse or promote products derived from this software without\n *    specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS\n * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF\n * THE POSSIBILITY OF SUCH DAMAGE.\n */\n\n/* This is a C++ to C conversion from the ardb project.\n * This file started out as:\n * https://github.com/yinqiwen/ardb/blob/d42503/src/geo/geohash_helper.cpp\n */\n\n#define __USE_XOPEN\n\n#include \"geohash_helper.h\"\n#include <math.h>\n\n#define D_R (M_PI / 180.0)\n#define R_MAJOR 6378137.0\n#define R_MINOR 6356752.3142\n#define RATIO (R_MINOR / R_MAJOR)\n#define ECCENT (sqrt(1.0 - (RATIO *RATIO)))\n#define COM (0.5 * ECCENT)\n\n/// @brief The usual PI/180 constant\nconst double DEG_TO_RAD = 0.017453292519943295769236907684886;\n/// @brief Earth's quatratic mean radius for WGS-84\nconst double EARTH_RADIUS_IN_METERS = 6372797.560856;\n\nconst double MERCATOR_MAX = 20037726.37;\nconst double MERCATOR_MIN = -20037726.37;\n\nstatic inline double deg_rad(double ang) { return ang * D_R; }\nstatic inline double rad_deg(double ang) { return ang / D_R; }\n\n/* This function is used in order to estimate the step (bits precision)\n * of the 9 search area boxes during radius queries. */\nuint8_t geohashEstimateStepsByRadius(double range_meters, double lat) {\n    if (range_meters == 0) return 26;\n    int step = 1;\n    while (range_meters < MERCATOR_MAX) {\n        range_meters *= 2;\n        step++;\n    }\n    step -= 2; /* Make sure range is included in most of the base cases. */\n\n    /* Wider range towards the poles... Note: it is possible to do better\n     * than this approximation by computing the distance between meridians\n     * at this latitude, but this does the trick for now. */\n    if (lat > 66 || lat < -66) {\n        step--;\n        if (lat > 80 || lat < -80) step--;\n    }\n\n    /* Frame to valid range. */\n    if (step < 1) step = 1;\n    if (step > 26) step = 26;\n    return step;\n}\n\n/* Return the bounding box of the search area by shape (see geohash.h GeoShape)\n * bounds[0] - bounds[2] is the minimum and maximum longitude\n * while bounds[1] - bounds[3] is the minimum and maximum latitude.\n * since the higher the latitude, the shorter the arc length, the box shape is as follows\n * (left and right edges are actually bent), as shown in the following diagram:\n *\n *    \\-----------------/          --------               \\-----------------/\n *     \\               /         /          \\              \\               /\n *      \\  (long,lat) /         / (long,lat) \\              \\  (long,lat) /\n *       \\           /         /              \\             /             \\\n *         ---------          /----------------\\           /---------------\\\n *  Northern Hemisphere       Southern Hemisphere         Around the equator\n */\nint geohashBoundingBox(GeoShape *shape, double *bounds) {\n    if (!bounds) return 0;\n    double longitude = shape->xy[0];\n    double latitude = shape->xy[1];\n    double height = shape->conversion * (shape->type == CIRCULAR_TYPE ? shape->t.radius : shape->t.r.height/2);\n    double width = shape->conversion * (shape->type == CIRCULAR_TYPE ? shape->t.radius : shape->t.r.width/2);\n\n    const double lat_delta = rad_deg(height/EARTH_RADIUS_IN_METERS);\n    const double long_delta_top = rad_deg(width/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude+lat_delta)));\n    const double long_delta_bottom = rad_deg(width/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude-lat_delta)));\n    /* The directions of the northern and southern hemispheres\n     * are opposite, so we choice different points as min/max long/lat */\n    int southern_hemisphere = latitude < 0 ? 1 : 0;\n    bounds[0] = southern_hemisphere ? longitude-long_delta_bottom : longitude-long_delta_top;\n    bounds[2] = southern_hemisphere ? longitude+long_delta_bottom : longitude+long_delta_top;\n    bounds[1] = latitude - lat_delta;\n    bounds[3] = latitude + lat_delta;\n    return 1;\n}\n\n/* Calculate a set of areas (center + 8) that are able to cover a range query\n * for the specified position and shape (see geohash.h GeoShape).\n * the bounding box saved in shaple.bounds */\nGeoHashRadius geohashCalculateAreasByShapeWGS84(GeoShape *shape) {\n    GeoHashRange long_range, lat_range;\n    GeoHashRadius radius;\n    GeoHashBits hash;\n    GeoHashNeighbors neighbors;\n    GeoHashArea area;\n    double min_lon, max_lon, min_lat, max_lat;\n    int steps;\n\n    geohashBoundingBox(shape, shape->bounds);\n    min_lon = shape->bounds[0];\n    min_lat = shape->bounds[1];\n    max_lon = shape->bounds[2];\n    max_lat = shape->bounds[3];\n\n    double longitude = shape->xy[0];\n    double latitude = shape->xy[1];\n    /* radius_meters is calculated differently in different search types:\n     * 1) CIRCULAR_TYPE, just use radius.\n     * 2) RECTANGLE_TYPE, we use sqrt((width/2)^2 + (height/2)^2) to\n     * calculate the distance from the center point to the corner */\n    double radius_meters = shape->type == CIRCULAR_TYPE ? shape->t.radius :\n            sqrt((shape->t.r.width/2)*(shape->t.r.width/2) + (shape->t.r.height/2)*(shape->t.r.height/2));\n    radius_meters *= shape->conversion;\n\n    steps = geohashEstimateStepsByRadius(radius_meters,latitude);\n\n    geohashGetCoordRange(&long_range,&lat_range);\n    geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);\n    geohashNeighbors(&hash,&neighbors);\n    geohashDecode(long_range,lat_range,hash,&area);\n\n    /* Check if the step is enough at the limits of the covered area.\n     * Sometimes when the search area is near an edge of the\n     * area, the estimated step is not small enough, since one of the\n     * north / south / west / east square is too near to the search area\n     * to cover everything. */\n    int decrease_step = 0;\n    {\n        GeoHashArea north, south, east, west;\n\n        geohashDecode(long_range, lat_range, neighbors.north, &north);\n        geohashDecode(long_range, lat_range, neighbors.south, &south);\n        geohashDecode(long_range, lat_range, neighbors.east, &east);\n        geohashDecode(long_range, lat_range, neighbors.west, &west);\n\n        if (north.latitude.max < max_lat) \n            decrease_step = 1;\n        if (south.latitude.min > min_lat) \n            decrease_step = 1;\n        if (east.longitude.max < max_lon) \n            decrease_step = 1;\n        if (west.longitude.min > min_lon)  \n            decrease_step = 1;\n    }\n\n    if (steps > 1 && decrease_step) {\n        steps--;\n        geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);\n        geohashNeighbors(&hash,&neighbors);\n        geohashDecode(long_range,lat_range,hash,&area);\n    }\n\n    /* Exclude the search areas that are useless. */\n    if (steps >= 2) {\n        if (area.latitude.min < min_lat) {\n            GZERO(neighbors.south);\n            GZERO(neighbors.south_west);\n            GZERO(neighbors.south_east);\n        }\n        if (area.latitude.max > max_lat) {\n            GZERO(neighbors.north);\n            GZERO(neighbors.north_east);\n            GZERO(neighbors.north_west);\n        }\n        if (area.longitude.min < min_lon) {\n            GZERO(neighbors.west);\n            GZERO(neighbors.south_west);\n            GZERO(neighbors.north_west);\n        }\n        if (area.longitude.max > max_lon) {\n            GZERO(neighbors.east);\n            GZERO(neighbors.south_east);\n            GZERO(neighbors.north_east);\n        }\n    }\n    radius.hash = hash;\n    radius.neighbors = neighbors;\n    radius.area = area;\n    return radius;\n}\n\nGeoHashFix52Bits geohashAlign52Bits(const GeoHashBits hash) {\n    uint64_t bits = hash.bits;\n    bits <<= (52 - hash.step * 2);\n    return bits;\n}\n\n/* Calculate distance using simplified haversine great circle distance formula.\n * Given longitude diff is 0 the asin(sqrt(a)) on the haversine is asin(sin(abs(u))).\n * arcsin(sin(x)) equal to x when x ∈[−𝜋/2,𝜋/2]. Given latitude is between [−𝜋/2,𝜋/2]\n * we can simplify arcsin(sin(x)) to x.\n */\ndouble geohashGetLatDistance(double lat1d, double lat2d) {\n    return EARTH_RADIUS_IN_METERS * fabs(deg_rad(lat2d) - deg_rad(lat1d));\n}\n\n/* Calculate distance using haversine great circle distance formula. */\ndouble geohashGetDistance(double lon1d, double lat1d, double lon2d, double lat2d) {\n    double lat1r, lon1r, lat2r, lon2r, u, v, a;\n    lon1r = deg_rad(lon1d);\n    lon2r = deg_rad(lon2d);\n    v = sin((lon2r - lon1r) / 2);\n    /* if v == 0 we can avoid doing expensive math when lons are practically the same */\n    if (v == 0.0)\n        return geohashGetLatDistance(lat1d, lat2d);\n    lat1r = deg_rad(lat1d);\n    lat2r = deg_rad(lat2d);\n    u = sin((lat2r - lat1r) / 2);\n    a = u * u + cos(lat1r) * cos(lat2r) * v * v;\n    return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(a));\n}\n\nint geohashGetDistanceIfInRadius(double x1, double y1,\n                                 double x2, double y2, double radius,\n                                 double *distance) {\n    *distance = geohashGetDistance(x1, y1, x2, y2);\n    if (*distance > radius) return 0;\n    return 1;\n}\n\nint geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,\n                                      double y2, double radius,\n                                      double *distance) {\n    return geohashGetDistanceIfInRadius(x1, y1, x2, y2, radius, distance);\n}\n\n/* Judge whether a point is in the axis-aligned rectangle, when the distance\n * between a searched point and the center point is less than or equal to\n * height/2 or width/2 in height and width, the point is in the rectangle.\n *\n * width_m, height_m: the rectangle\n * x1, y1 : the center of the box\n * x2, y2 : the point to be searched\n */\nint geohashGetDistanceIfInRectangle(double width_m, double height_m, double x1, double y1,\n                                    double x2, double y2, double *distance) {\n    /* latitude distance is less expensive to compute than longitude distance\n     * so we check first for the latitude condition */\n    double lat_distance = geohashGetLatDistance(y2, y1);\n    if (lat_distance > height_m/2) {\n        return 0;\n    }\n    double lon_distance = geohashGetDistance(x2, y2, x1, y2);\n    if (lon_distance > width_m/2) {\n        return 0;\n    }\n    *distance = geohashGetDistance(x1, y1, x2, y2);\n    return 1;\n}\n"
  },
  {
    "path": "src/redis/geohash_helper.h",
    "content": "/*\n * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>\n * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.\n * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *  * Redistributions of source code must retain the above copyright notice,\n *    this list of conditions and the following disclaimer.\n *  * Redistributions in binary form must reproduce the above copyright\n *    notice, this list of conditions and the following disclaimer in the\n *    documentation and/or other materials provided with the distribution.\n *  * Neither the name of Redis nor the names of its contributors may be used\n *    to endorse or promote products derived from this software without\n *    specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS\n * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF\n * THE POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef GEOHASH_HELPER_HPP_\n#define GEOHASH_HELPER_HPP_\n\n#include \"geohash.h\"\n\n#define GZERO(s) s.bits = s.step = 0;\n#define GISZERO(s) (!s.bits && !s.step)\n#define GISNOTZERO(s) (s.bits || s.step)\n\ntypedef uint64_t GeoHashFix52Bits;\ntypedef uint64_t GeoHashVarBits;\n\ntypedef struct {\n    GeoHashBits hash;\n    GeoHashArea area;\n    GeoHashNeighbors neighbors;\n} GeoHashRadius;\n\nuint8_t geohashEstimateStepsByRadius(double range_meters, double lat);\nint geohashBoundingBox(GeoShape *shape, double *bounds);\nGeoHashRadius geohashCalculateAreasByShapeWGS84(GeoShape *shape);\nGeoHashFix52Bits geohashAlign52Bits(const GeoHashBits hash);\ndouble geohashGetDistance(double lon1d, double lat1d,\n                          double lon2d, double lat2d);\nint geohashGetDistanceIfInRadius(double x1, double y1,\n                                 double x2, double y2, double radius,\n                                 double *distance);\nint geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,\n                                      double y2, double radius,\n                                      double *distance);\nint geohashGetDistanceIfInRectangle(double width_m, double height_m, double x1, double y1,\n                                    double x2, double y2, double *distance);\n\n#endif /* GEOHASH_HELPER_HPP_ */\n"
  },
  {
    "path": "src/redis/hiredis.c",
    "content": "/*\n * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>\n * Copyright (c) 2010-2014, Pieter Noordhuis <pcnoordhuis at gmail dot com>\n * Copyright (c) 2015, Matt Stancliff <matt at genges dot com>,\n *                     Jan-Erik Rediger <janerik at fnordig dot com>\n *\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <string.h>\n#include <stdlib.h>\n#include <assert.h>\n#include <errno.h>\n#include <ctype.h>\n\n#include \"hiredis.h\"\n#include \"sds.h\"\n\n\nstatic redisReply *createReplyObject(int type);\nstatic void *createStringObject(const redisReadTask *task, char *str, size_t len);\nstatic void *createArrayObject(const redisReadTask *task, size_t elements);\nstatic void *createIntegerObject(const redisReadTask *task, long long value);\nstatic void *createDoubleObject(const redisReadTask *task, double value, char *str, size_t len);\nstatic void *createNilObject(const redisReadTask *task);\nstatic void *createBoolObject(const redisReadTask *task, int bval);\n\n/* Default set of functions to build the reply. Keep in mind that such a\n * function returning NULL is interpreted as OOM. */\nstatic redisReplyObjectFunctions defaultFunctions = {\n    createStringObject,\n    createArrayObject,\n    createIntegerObject,\n    createDoubleObject,\n    createNilObject,\n    createBoolObject,\n    freeReplyObject\n};\n\n/* Create a reply object */\nstatic redisReply *createReplyObject(int type) {\n    redisReply *r = s_calloc(sizeof(*r));\n\n    if (r == NULL)\n        return NULL;\n\n    r->type = type;\n    return r;\n}\n\n/* Free a reply object */\nvoid freeReplyObject(void *reply) {\n    redisReply *r = reply;\n    size_t j;\n\n    if (r == NULL)\n        return;\n\n    switch(r->type) {\n    case REDIS_REPLY_INTEGER:\n    case REDIS_REPLY_NIL:\n    case REDIS_REPLY_BOOL:\n        break; /* Nothing to free */\n    case REDIS_REPLY_ARRAY:\n    case REDIS_REPLY_MAP:\n    case REDIS_REPLY_ATTR:\n    case REDIS_REPLY_SET:\n    case REDIS_REPLY_PUSH:\n        if (r->element != NULL) {\n            for (j = 0; j < r->elements; j++)\n                freeReplyObject(r->element[j]);\n            s_free(r->element);\n        }\n        break;\n    case REDIS_REPLY_ERROR:\n    case REDIS_REPLY_STATUS:\n    case REDIS_REPLY_STRING:\n    case REDIS_REPLY_DOUBLE:\n    case REDIS_REPLY_VERB:\n    case REDIS_REPLY_BIGNUM:\n        s_free(r->str);\n        break;\n    }\n    s_free(r);\n}\n\nstatic void *createStringObject(const redisReadTask *task, char *str, size_t len) {\n    redisReply *r, *parent;\n    char *buf;\n\n    r = createReplyObject(task->type);\n    if (r == NULL)\n        return NULL;\n\n    assert(task->type == REDIS_REPLY_ERROR  ||\n           task->type == REDIS_REPLY_STATUS ||\n           task->type == REDIS_REPLY_STRING ||\n           task->type == REDIS_REPLY_VERB   ||\n           task->type == REDIS_REPLY_BIGNUM);\n\n    /* Copy string value */\n    if (task->type == REDIS_REPLY_VERB) {\n        buf = s_malloc(len-4+1); /* Skip 4 bytes of verbatim type header. */\n        if (buf == NULL) goto oom;\n\n        memcpy(r->vtype,str,3);\n        r->vtype[3] = '\\0';\n        memcpy(buf,str+4,len-4);\n        buf[len-4] = '\\0';\n        r->len = len - 4;\n    } else {\n        buf = s_malloc(len+1);\n        if (buf == NULL) goto oom;\n\n        memcpy(buf,str,len);\n        buf[len] = '\\0';\n        r->len = len;\n    }\n    r->str = buf;\n\n    if (task->parent) {\n        parent = task->parent->obj;\n        assert(parent->type == REDIS_REPLY_ARRAY ||\n               parent->type == REDIS_REPLY_MAP ||\n               parent->type == REDIS_REPLY_ATTR ||\n               parent->type == REDIS_REPLY_SET ||\n               parent->type == REDIS_REPLY_PUSH);\n        parent->element[task->idx] = r;\n    }\n    return r;\n\noom:\n    freeReplyObject(r);\n    return NULL;\n}\n\nstatic void *createArrayObject(const redisReadTask *task, size_t elements) {\n    redisReply *r, *parent;\n\n    r = createReplyObject(task->type);\n    if (r == NULL)\n        return NULL;\n\n    if (elements > 0) {\n        r->element = s_calloc(elements * sizeof(redisReply*));\n        if (r->element == NULL) {\n            freeReplyObject(r);\n            return NULL;\n        }\n    }\n\n    r->elements = elements;\n\n    if (task->parent) {\n        parent = task->parent->obj;\n        assert(parent->type == REDIS_REPLY_ARRAY ||\n               parent->type == REDIS_REPLY_MAP ||\n               parent->type == REDIS_REPLY_ATTR ||\n               parent->type == REDIS_REPLY_SET ||\n               parent->type == REDIS_REPLY_PUSH);\n        parent->element[task->idx] = r;\n    }\n    return r;\n}\n\nstatic void *createIntegerObject(const redisReadTask *task, long long value) {\n    redisReply *r, *parent;\n\n    r = createReplyObject(REDIS_REPLY_INTEGER);\n    if (r == NULL)\n        return NULL;\n\n    r->integer = value;\n\n    if (task->parent) {\n        parent = task->parent->obj;\n        assert(parent->type == REDIS_REPLY_ARRAY ||\n               parent->type == REDIS_REPLY_MAP ||\n               parent->type == REDIS_REPLY_ATTR ||\n               parent->type == REDIS_REPLY_SET ||\n               parent->type == REDIS_REPLY_PUSH);\n        parent->element[task->idx] = r;\n    }\n    return r;\n}\n\nstatic void *createDoubleObject(const redisReadTask *task, double value, char *str, size_t len) {\n    redisReply *r, *parent;\n\n    if (len == SIZE_MAX) // Prevents s_malloc(0) if len equals to SIZE_MAX\n        return NULL;\n\n    r = createReplyObject(REDIS_REPLY_DOUBLE);\n    if (r == NULL)\n        return NULL;\n\n    r->dval = value;\n    r->str = s_malloc(len+1);\n    if (r->str == NULL) {\n        freeReplyObject(r);\n        return NULL;\n    }\n\n    /* The double reply also has the original protocol string representing a\n     * double as a null terminated string. This way the caller does not need\n     * to format back for string conversion, especially since Redis does efforts\n     * to make the string more human readable avoiding the calssical double\n     * decimal string conversion artifacts. */\n    memcpy(r->str, str, len);\n    r->str[len] = '\\0';\n    r->len = len;\n\n    if (task->parent) {\n        parent = task->parent->obj;\n        assert(parent->type == REDIS_REPLY_ARRAY ||\n               parent->type == REDIS_REPLY_MAP ||\n               parent->type == REDIS_REPLY_ATTR ||\n               parent->type == REDIS_REPLY_SET ||\n               parent->type == REDIS_REPLY_PUSH);\n        parent->element[task->idx] = r;\n    }\n    return r;\n}\n\nstatic void *createNilObject(const redisReadTask *task) {\n    int type = task->type;\n    int is_aggregate = (type == REDIS_REPLY_ARRAY || type == REDIS_REPLY_MAP ||\n                        type == REDIS_REPLY_SET || type == REDIS_REPLY_PUSH);\n\n    /* For aggregate nils (*-1, etc.) preserve the original aggregate type\n     * with SIZE_MAX elements as a sentinel, so callers can distinguish\n     * null arrays from null bulk strings. */\n    if (is_aggregate) {\n        void *obj = createArrayObject(task, 0);\n        if (obj == NULL)\n            return NULL;\n        ((redisReply*)obj)->elements = SIZE_MAX;\n        return obj;\n    }\n\n    redisReply *r, *parent;\n\n    r = createReplyObject(REDIS_REPLY_NIL);\n    if (r == NULL)\n        return NULL;\n\n    if (task->parent) {\n        parent = task->parent->obj;\n        assert(parent->type == REDIS_REPLY_ARRAY ||\n               parent->type == REDIS_REPLY_MAP ||\n               parent->type == REDIS_REPLY_ATTR ||\n               parent->type == REDIS_REPLY_SET ||\n               parent->type == REDIS_REPLY_PUSH);\n        parent->element[task->idx] = r;\n    }\n    return r;\n}\n\nstatic void *createBoolObject(const redisReadTask *task, int bval) {\n    redisReply *r, *parent;\n\n    r = createReplyObject(REDIS_REPLY_BOOL);\n    if (r == NULL)\n        return NULL;\n\n    r->integer = bval != 0;\n\n    if (task->parent) {\n        parent = task->parent->obj;\n        assert(parent->type == REDIS_REPLY_ARRAY ||\n               parent->type == REDIS_REPLY_MAP ||\n               parent->type == REDIS_REPLY_ATTR ||\n               parent->type == REDIS_REPLY_SET ||\n               parent->type == REDIS_REPLY_PUSH);\n        parent->element[task->idx] = r;\n    }\n    return r;\n}\n\n/* Return the number of digits of 'v' when converted to string in radix 10.\n * Implementation borrowed from link in redis/src/util.c:string2ll(). */\nstatic uint32_t countDigits(uint64_t v) {\n  uint32_t result = 1;\n  for (;;) {\n    if (v < 10) return result;\n    if (v < 100) return result + 1;\n    if (v < 1000) return result + 2;\n    if (v < 10000) return result + 3;\n    v /= 10000U;\n    result += 4;\n  }\n}\n\n/* Helper that calculates the bulk length given a certain string length. */\nstatic size_t bulklen(size_t len) {\n    return 1+countDigits(len)+2+len+2;\n}\n\nint redisvFormatCommand(char **target, const char *format, va_list ap) {\n    const char *c = format;\n    char *cmd = NULL; /* final command */\n    int pos; /* position in final command */\n    sds curarg, newarg; /* current argument */\n    int touched = 0; /* was the current argument touched? */\n    char **curargv = NULL, **newargv = NULL;\n    int argc = 0;\n    int totlen = 0;\n    int error_type = 0; /* 0 = no error; -1 = memory error; -2 = format error */\n    int j;\n\n    /* Abort if there is not target to set */\n    if (target == NULL)\n        return -1;\n\n    /* Build the command string accordingly to protocol */\n    curarg = sdsempty();\n    if (curarg == NULL)\n        return -1;\n\n    while(*c != '\\0') {\n        if (*c != '%' || c[1] == '\\0') {\n            if (*c == ' ') {\n                if (touched) {\n                    newargv = s_realloc(curargv,sizeof(char*)*(argc+1));\n                    if (newargv == NULL) goto memory_err;\n                    curargv = newargv;\n                    curargv[argc++] = curarg;\n                    totlen += bulklen(sdslen(curarg));\n\n                    /* curarg is put in argv so it can be overwritten. */\n                    curarg = sdsempty();\n                    if (curarg == NULL) goto memory_err;\n                    touched = 0;\n                }\n            } else {\n                newarg = sdscatlen(curarg,c,1);\n                if (newarg == NULL) goto memory_err;\n                curarg = newarg;\n                touched = 1;\n            }\n        } else {\n            char *arg;\n            size_t size;\n\n            /* Set newarg so it can be checked even if it is not touched. */\n            newarg = curarg;\n\n            switch(c[1]) {\n            case 's':\n                arg = va_arg(ap,char*);\n                size = strlen(arg);\n                if (size > 0)\n                    newarg = sdscatlen(curarg,arg,size);\n                break;\n            case 'b':\n                arg = va_arg(ap,char*);\n                size = va_arg(ap,size_t);\n                if (size > 0)\n                    newarg = sdscatlen(curarg,arg,size);\n                break;\n            case '%':\n                newarg = sdscat(curarg,\"%\");\n                break;\n            default:\n                /* Try to detect printf format */\n                {\n                    static const char intfmts[] = \"diouxX\";\n                    static const char flags[] = \"#0-+ \";\n                    char _format[16];\n                    const char *_p = c+1;\n                    size_t _l = 0;\n                    va_list _cpy;\n\n                    /* Flags */\n                    while (*_p != '\\0' && strchr(flags,*_p) != NULL) _p++;\n\n                    /* Field width */\n                    while (*_p != '\\0' && isdigit((int) *_p)) _p++;\n\n                    /* Precision */\n                    if (*_p == '.') {\n                        _p++;\n                        while (*_p != '\\0' && isdigit((int) *_p)) _p++;\n                    }\n\n                    /* Copy va_list before consuming with va_arg */\n                    va_copy(_cpy,ap);\n\n                    /* Make sure we have more characters otherwise strchr() accepts\n                     * '\\0' as an integer specifier. This is checked after above\n                     * va_copy() to avoid UB in fmt_invalid's call to va_end(). */\n                    if (*_p == '\\0') goto fmt_invalid;\n\n                    /* Integer conversion (without modifiers) */\n                    if (strchr(intfmts,*_p) != NULL) {\n                        va_arg(ap,int);\n                        goto fmt_valid;\n                    }\n\n                    /* Double conversion (without modifiers) */\n                    if (strchr(\"eEfFgGaA\",*_p) != NULL) {\n                        va_arg(ap,double);\n                        goto fmt_valid;\n                    }\n\n                    /* Size: char */\n                    if (_p[0] == 'h' && _p[1] == 'h') {\n                        _p += 2;\n                        if (*_p != '\\0' && strchr(intfmts,*_p) != NULL) {\n                            va_arg(ap,int); /* char gets promoted to int */\n                            goto fmt_valid;\n                        }\n                        goto fmt_invalid;\n                    }\n\n                    /* Size: short */\n                    if (_p[0] == 'h') {\n                        _p += 1;\n                        if (*_p != '\\0' && strchr(intfmts,*_p) != NULL) {\n                            va_arg(ap,int); /* short gets promoted to int */\n                            goto fmt_valid;\n                        }\n                        goto fmt_invalid;\n                    }\n\n                    /* Size: long long */\n                    if (_p[0] == 'l' && _p[1] == 'l') {\n                        _p += 2;\n                        if (*_p != '\\0' && strchr(intfmts,*_p) != NULL) {\n                            va_arg(ap,long long);\n                            goto fmt_valid;\n                        }\n                        goto fmt_invalid;\n                    }\n\n                    /* Size: long */\n                    if (_p[0] == 'l') {\n                        _p += 1;\n                        if (*_p != '\\0' && strchr(intfmts,*_p) != NULL) {\n                            va_arg(ap,long);\n                            goto fmt_valid;\n                        }\n                        goto fmt_invalid;\n                    }\n\n                fmt_invalid:\n                    va_end(_cpy);\n                    goto format_err;\n\n                fmt_valid:\n                    _l = (_p+1)-c;\n                    if (_l < sizeof(_format)-2) {\n                        memcpy(_format,c,_l);\n                        _format[_l] = '\\0';\n                        newarg = sdscatvprintf(curarg,_format,_cpy);\n\n                        /* Update current position (note: outer blocks\n                         * increment c twice so compensate here) */\n                        c = _p-1;\n                    }\n\n                    va_end(_cpy);\n                    break;\n                }\n            }\n\n            if (newarg == NULL) goto memory_err;\n            curarg = newarg;\n\n            touched = 1;\n            c++;\n            if (*c == '\\0')\n                break;\n        }\n        c++;\n    }\n\n    /* Add the last argument if needed */\n    if (touched) {\n        newargv = s_realloc(curargv,sizeof(char*)*(argc+1));\n        if (newargv == NULL) goto memory_err;\n        curargv = newargv;\n        curargv[argc++] = curarg;\n        totlen += bulklen(sdslen(curarg));\n    } else {\n        sdsfree(curarg);\n    }\n\n    /* Clear curarg because it was put in curargv or was free'd. */\n    curarg = NULL;\n\n    /* Add bytes needed to hold multi bulk count */\n    totlen += 1+countDigits(argc)+2;\n\n    /* Build the command at protocol level */\n    cmd = s_malloc(totlen+1);\n    if (cmd == NULL) goto memory_err;\n\n    pos = sprintf(cmd,\"*%d\\r\\n\",argc);\n    for (j = 0; j < argc; j++) {\n        pos += sprintf(cmd+pos,\"$%zu\\r\\n\",sdslen(curargv[j]));\n        memcpy(cmd+pos,curargv[j],sdslen(curargv[j]));\n        pos += sdslen(curargv[j]);\n        sdsfree(curargv[j]);\n        cmd[pos++] = '\\r';\n        cmd[pos++] = '\\n';\n    }\n    assert(pos == totlen);\n    cmd[pos] = '\\0';\n\n    s_free(curargv);\n    *target = cmd;\n    return totlen;\n\nformat_err:\n    error_type = -2;\n    goto cleanup;\n\nmemory_err:\n    error_type = -1;\n    goto cleanup;\n\ncleanup:\n    if (curargv) {\n        while(argc--)\n            sdsfree(curargv[argc]);\n        s_free(curargv);\n    }\n\n    sdsfree(curarg);\n    s_free(cmd);\n\n    return error_type;\n}\n\n/* Format a command according to the Redis protocol. This function\n * takes a format similar to printf:\n *\n * %s represents a C null terminated string you want to interpolate\n * %b represents a binary safe string\n *\n * When using %b you need to provide both the pointer to the string\n * and the length in bytes as a size_t. Examples:\n *\n * len = redisFormatCommand(target, \"GET %s\", mykey);\n * len = redisFormatCommand(target, \"SET %s %b\", mykey, myval, myvallen);\n */\nint redisFormatCommand(char **target, const char *format, ...) {\n    va_list ap;\n    int len;\n    va_start(ap,format);\n    len = redisvFormatCommand(target,format,ap);\n    va_end(ap);\n\n    /* The API says \"-1\" means bad result, but we now also return \"-2\" in some\n     * cases.  Force the return value to always be -1. */\n    if (len < 0)\n        len = -1;\n\n    return len;\n}\n\n/* Format a command according to the Redis protocol using an sds string and\n * sdscatfmt for the processing of arguments. This function takes the\n * number of arguments, an array with arguments and an array with their\n * lengths. If the latter is set to NULL, strlen will be used to compute the\n * argument lengths.\n */\nlong long redisFormatSdsCommandArgv(sds *target, int argc, const char **argv,\n                                    const size_t *argvlen)\n{\n    sds cmd, aux;\n    unsigned long long totlen, len;\n    int j;\n\n    /* Abort on a NULL target */\n    if (target == NULL)\n        return -1;\n\n    /* Calculate our total size */\n    totlen = 1+countDigits(argc)+2;\n    for (j = 0; j < argc; j++) {\n        len = argvlen ? argvlen[j] : strlen(argv[j]);\n        totlen += bulklen(len);\n    }\n\n    /* Use an SDS string for command construction */\n    cmd = sdsempty();\n    if (cmd == NULL)\n        return -1;\n\n    /* We already know how much storage we need */\n    aux = sdsMakeRoomFor(cmd, totlen);\n    if (aux == NULL) {\n        sdsfree(cmd);\n        return -1;\n    }\n\n    cmd = aux;\n\n    /* Construct command */\n    cmd = sdscatfmt(cmd, \"*%i\\r\\n\", argc);\n    for (j=0; j < argc; j++) {\n        len = argvlen ? argvlen[j] : strlen(argv[j]);\n        cmd = sdscatfmt(cmd, \"$%U\\r\\n\", len);\n        cmd = sdscatlen(cmd, argv[j], len);\n        cmd = sdscatlen(cmd, \"\\r\\n\", sizeof(\"\\r\\n\")-1);\n    }\n\n    assert(sdslen(cmd)==totlen);\n\n    *target = cmd;\n    return totlen;\n}\n\nvoid redisFreeSdsCommand(sds cmd) {\n    sdsfree(cmd);\n}\n\n/* Format a command according to the Redis protocol. This function takes the\n * number of arguments, an array with arguments and an array with their\n * lengths. If the latter is set to NULL, strlen will be used to compute the\n * argument lengths.\n */\nlong long redisFormatCommandArgv(char **target, int argc, const char **argv, const size_t *argvlen) {\n    char *cmd = NULL; /* final command */\n    size_t pos; /* position in final command */\n    size_t len, totlen;\n    int j;\n\n    /* Abort on a NULL target */\n    if (target == NULL)\n        return -1;\n\n    /* Calculate number of bytes needed for the command */\n    totlen = 1+countDigits(argc)+2;\n    for (j = 0; j < argc; j++) {\n        len = argvlen ? argvlen[j] : strlen(argv[j]);\n        totlen += bulklen(len);\n    }\n\n    /* Build the command at protocol level */\n    cmd = s_malloc(totlen+1);\n    if (cmd == NULL)\n        return -1;\n\n    pos = sprintf(cmd,\"*%d\\r\\n\",argc);\n    for (j = 0; j < argc; j++) {\n        len = argvlen ? argvlen[j] : strlen(argv[j]);\n        pos += sprintf(cmd+pos,\"$%zu\\r\\n\",len);\n        memcpy(cmd+pos,argv[j],len);\n        pos += len;\n        cmd[pos++] = '\\r';\n        cmd[pos++] = '\\n';\n    }\n    assert(pos == totlen);\n    cmd[pos] = '\\0';\n\n    *target = cmd;\n    return totlen;\n}\n\nvoid redisFreeCommand(char *cmd) {\n    s_free(cmd);\n}\n\nredisReader *redisReaderCreate(void) {\n    return redisReaderCreateWithFunctions(&defaultFunctions);\n}\n\n\n"
  },
  {
    "path": "src/redis/hiredis.h",
    "content": "/*\n * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>\n * Copyright (c) 2010-2014, Pieter Noordhuis <pcnoordhuis at gmail dot com>\n * Copyright (c) 2015, Matt Stancliff <matt at genges dot com>,\n *                     Jan-Erik Rediger <janerik at fnordig dot com>\n *\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef __HIREDIS_H\n#define __HIREDIS_H\n#include \"read.h\"\n#include <stdarg.h> /* for va_list */\n#ifndef _MSC_VER\n#include <sys/time.h> /* for struct timeval */\n#else\nstruct timeval; /* forward declaration */\ntypedef long long ssize_t;\n#endif\n#include <stdint.h> /* uintXX_t, etc */\n#include \"sds.h\" /* for sds */\n#include \"sdsalloc.h\" /* for allocation wrappers */\n\n#define HIREDIS_MAJOR 1\n#define HIREDIS_MINOR 3\n#define HIREDIS_PATCH 0\n#define HIREDIS_SONAME 1.3.0\n\n/* Connection type can be blocking or non-blocking and is set in the\n * least significant bit of the flags field in redisContext. */\n#define REDIS_BLOCK 0x1\n\n/* Connection may be disconnected before being free'd. The second bit\n * in the flags field is set when the context is connected. */\n#define REDIS_CONNECTED 0x2\n\n/* The async API might try to disconnect cleanly and flush the output\n * buffer and read all subsequent replies before disconnecting.\n * This flag means no new commands can come in and the connection\n * should be terminated once all replies have been read. */\n#define REDIS_DISCONNECTING 0x4\n\n/* Flag specific to the async API which means that the context should be clean\n * up as soon as possible. */\n#define REDIS_FREEING 0x8\n\n/* Flag that is set when an async callback is executed. */\n#define REDIS_IN_CALLBACK 0x10\n\n/* Flag that is set when the async context has one or more subscriptions. */\n#define REDIS_SUBSCRIBED 0x20\n\n/* Flag that is set when monitor mode is active */\n#define REDIS_MONITORING 0x40\n\n/* Flag that is set when we should set SO_REUSEADDR before calling bind() */\n#define REDIS_REUSEADDR 0x80\n\n/* Flag that is set when the async connection supports push replies. */\n#define REDIS_SUPPORTS_PUSH 0x100\n\n/**\n * Flag that indicates the user does not want the context to\n * be automatically freed upon error\n */\n#define REDIS_NO_AUTO_FREE 0x200\n\n/* Flag that indicates the user does not want replies to be automatically freed */\n#define REDIS_NO_AUTO_FREE_REPLIES 0x400\n\n/* Flags to prefer IPv6 or IPv4 when doing DNS lookup. (If both are set,\n * AF_UNSPEC is used.) */\n#define REDIS_PREFER_IPV4 0x800\n#define REDIS_PREFER_IPV6 0x1000\n\n#define REDIS_KEEPALIVE_INTERVAL 15 /* seconds */\n\n/* number of times we retry to connect in the case of EADDRNOTAVAIL and\n * SO_REUSEADDR is being used. */\n#define REDIS_CONNECT_RETRIES  10\n\n/* Forward declarations for structs defined elsewhere */\nstruct redisAsyncContext;\nstruct redisContext;\n\n/* RESP3 push helpers and callback prototypes */\n#define redisIsPushReply(r) (((redisReply*)(r))->type == REDIS_REPLY_PUSH)\ntypedef void (redisPushFn)(void *, void *);\ntypedef void (redisAsyncPushFn)(struct redisAsyncContext *, void *);\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n/* This is the reply object returned by redisCommand() */\ntypedef struct redisReply {\n    int type; /* REDIS_REPLY_* */\n    long long integer; /* The integer when type is REDIS_REPLY_INTEGER */\n    double dval; /* The double when type is REDIS_REPLY_DOUBLE */\n    size_t len; /* Length of string */\n    char *str; /* Used for REDIS_REPLY_ERROR, REDIS_REPLY_STRING\n                  REDIS_REPLY_VERB, REDIS_REPLY_DOUBLE (in additional to dval),\n                  and REDIS_REPLY_BIGNUM. */\n    char vtype[4]; /* Used for REDIS_REPLY_VERB, contains the null\n                      terminated 3 character content type, such as \"txt\". */\n    size_t elements; /* number of elements, for REDIS_REPLY_ARRAY */\n    struct redisReply **element; /* elements vector for REDIS_REPLY_ARRAY */\n} redisReply;\n\nredisReader *redisReaderCreate(void);\n\n/* Function to free the reply objects hiredis returns by default. */\nvoid freeReplyObject(void *reply);\n\n/* Functions to format a command according to the protocol. */\nint redisvFormatCommand(char **target, const char *format, va_list ap);\nint redisFormatCommand(char **target, const char *format, ...);\nlong long redisFormatCommandArgv(char **target, int argc, const char **argv, const size_t *argvlen);\nlong long redisFormatSdsCommandArgv(sds *target, int argc, const char ** argv, const size_t *argvlen);\nvoid redisFreeCommand(char *cmd);\nvoid redisFreeSdsCommand(sds cmd);\n\n\n\n#ifdef __cplusplus\n}\n#endif\n\n#endif\n"
  },
  {
    "path": "src/redis/hyperloglog.c",
    "content": "/* hyperloglog.c - Redis HyperLogLog probabilistic cardinality approximation.\n * This file implements the algorithm and the exported Redis commands.\n *\n * Copyright (c) 2014, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include \"redis/hyperloglog.h\"\n\n#include <math.h>\n#include <string.h>\n\n#include \"redis/redis_aux.h\"\n#include \"redis/util.h\"\n\n#define min(a, b) ((a) < (b) ? (a) : (b))\n\n/* The Redis HyperLogLog implementation is based on the following ideas:\n *\n * * The use of a 64 bit hash function as proposed in [1], in order to estimate\n *   cardinalities larger than 10^9, at the cost of just 1 additional bit per\n *   register.\n * * The use of 16384 6-bit registers for a great level of accuracy, using\n *   a total of 12k per key.\n * * The use of the Redis string data type. No new type is introduced.\n * * No attempt is made to compress the data structure as in [1]. Also the\n *   algorithm used is the original HyperLogLog Algorithm as in [2], with\n *   the only difference that a 64 bit hash function is used, so no correction\n *   is performed for values near 2^32 as in [1].\n *\n * [1] Heule, Nunkesser, Hall: HyperLogLog in Practice: Algorithmic\n *     Engineering of a State of The Art Cardinality Estimation Algorithm.\n *\n * [2] P. Flajolet, Éric Fusy, O. Gandouet, and F. Meunier. Hyperloglog: The\n *     analysis of a near-optimal cardinality estimation algorithm.\n *\n * Redis uses two representations:\n *\n * 1) A \"dense\" representation where every entry is represented by\n *    a 6-bit integer.\n * 2) A \"sparse\" representation using run length compression suitable\n *    for representing HyperLogLogs with many registers set to 0 in\n *    a memory efficient way.\n *\n *\n * HLL header\n * ===\n *\n * Both the dense and sparse representation have a 16 byte header as follows:\n *\n * +------+---+-----+----------+\n * | HYLL | E | N/U | Cardin.  |\n * +------+---+-----+----------+\n *\n * The first 4 bytes are a magic string set to the bytes \"HYLL\".\n * \"E\" is one byte encoding, currently set to HLL_DENSE or\n * HLL_SPARSE. N/U are three not used bytes.\n *\n * The \"Cardin.\" field is a 64 bit integer stored in little endian format\n * with the latest cardinality computed that can be reused if the data\n * structure was not modified since the last computation (this is useful\n * because there are high probabilities that HLLADD operations don't\n * modify the actual data structure and hence the approximated cardinality).\n *\n * When the most significant bit in the most significant byte of the cached\n * cardinality is set, it means that the data structure was modified and\n * we can't reuse the cached value that must be recomputed.\n *\n * Dense representation\n * ===\n *\n * The dense representation used by Redis is the following:\n *\n * +--------+--------+--------+------//      //--+\n * |11000000|22221111|33333322|55444444 ....     |\n * +--------+--------+--------+------//      //--+\n *\n * The 6 bits counters are encoded one after the other starting from the\n * LSB to the MSB, and using the next bytes as needed.\n *\n * Sparse representation\n * ===\n *\n * The sparse representation encodes registers using a run length\n * encoding composed of three opcodes, two using one byte, and one using\n * of two bytes. The opcodes are called ZERO, XZERO and VAL.\n *\n * ZERO opcode is represented as 00xxxxxx. The 6-bit integer represented\n * by the six bits 'xxxxxx', plus 1, means that there are N registers set\n * to 0. This opcode can represent from 1 to 64 contiguous registers set\n * to the value of 0.\n *\n * XZERO opcode is represented by two bytes 01xxxxxx yyyyyyyy. The 14-bit\n * integer represented by the bits 'xxxxxx' as most significant bits and\n * 'yyyyyyyy' as least significant bits, plus 1, means that there are N\n * registers set to 0. This opcode can represent from 0 to 16384 contiguous\n * registers set to the value of 0.\n *\n * VAL opcode is represented as 1vvvvvxx. It contains a 5-bit integer\n * representing the value of a register, and a 2-bit integer representing\n * the number of contiguous registers set to that value 'vvvvv'.\n * To obtain the value and run length, the integers vvvvv and xx must be\n * incremented by one. This opcode can represent values from 1 to 32,\n * repeated from 1 to 4 times.\n *\n * The sparse representation can't represent registers with a value greater\n * than 32, however it is very unlikely that we find such a register in an\n * HLL with a cardinality where the sparse representation is still more\n * memory efficient than the dense representation. When this happens the\n * HLL is converted to the dense representation.\n *\n * The sparse representation is purely positional. For example a sparse\n * representation of an empty HLL is just: XZERO:16384.\n *\n * An HLL having only 3 non-zero registers at position 1000, 1020, 1021\n * respectively set to 2, 3, 3, is represented by the following three\n * opcodes:\n *\n * XZERO:1000 (Registers 0-999 are set to 0)\n * VAL:2,1    (1 register set to value 2, that is register 1000)\n * ZERO:19    (Registers 1001-1019 set to 0)\n * VAL:3,2    (2 registers set to value 3, that is registers 1020,1021)\n * XZERO:15362 (Registers 1022-16383 set to 0)\n *\n * In the example the sparse representation used just 7 bytes instead\n * of 12k in order to represent the HLL registers. In general for low\n * cardinality there is a big win in terms of space efficiency, traded\n * with CPU time since the sparse representation is slower to access.\n *\n * The following table shows average cardinality vs bytes used, 100\n * samples per cardinality (when the set was not representable because\n * of registers with too big value, the dense representation size was used\n * as a sample).\n *\n * 100 267\n * 200 485\n * 300 678\n * 400 859\n * 500 1033\n * 600 1205\n * 700 1375\n * 800 1544\n * 900 1713\n * 1000 1882\n * 2000 3480\n * 3000 4879\n * 4000 6089\n * 5000 7138\n * 6000 8042\n * 7000 8823\n * 8000 9500\n * 9000 10088\n * 10000 10591\n *\n * The dense representation uses 12288 bytes, so there is a big win up to\n * a cardinality of ~2000-3000. For bigger cardinalities the constant times\n * involved in updating the sparse representation is not justified by the\n * memory savings. The exact maximum length of the sparse representation\n * when this implementation switches to the dense representation is\n * configured via the define HLL_SPARSE_MAX_BYTES.\n */\n#define HLL_SPARSE_MAX_BYTES 3000\n\nstruct hllhdr {\n  char magic[4];       /* \"HYLL\" */\n  uint8_t encoding;    /* HLL_DENSE or HLL_SPARSE. */\n  uint8_t notused[3];  /* Reserved for future use, must be zero. */\n  uint8_t card[8];     /* Cached cardinality, little endian. */\n  uint8_t registers[]; /* Data bytes. */\n};\n\n/* The cached cardinality MSB is used to signal validity of the cached value. */\n#define HLL_INVALIDATE_CACHE(hdr) (hdr)->card[7] |= (1 << 7)\n#define HLL_VALID_CACHE(hdr) (((hdr)->card[7] & (1 << 7)) == 0)\n\n#define HLL_P 14 /* The greater is P, the smaller the error. */\n#define HLL_Q                                                                           \\\n  (64 - HLL_P)                         /* The number of bits of the hash value used for \\\n                                          determining the number of leading zeros. */\n#define HLL_REGISTERS (1 << HLL_P)     /* With P=14, 16384 registers. */\n#define HLL_P_MASK (HLL_REGISTERS - 1) /* Mask to index register. */\n#define HLL_BITS 6                     /* Enough to count up to 63 leading zeroes. */\n#define HLL_REGISTER_MAX ((1 << HLL_BITS) - 1)\n#define HLL_HDR_SIZE sizeof(struct hllhdr)\n#define HLL_DENSE_SIZE (HLL_HDR_SIZE + ((HLL_REGISTERS * HLL_BITS + 7) / 8))\n#define HLL_DENSE 0  /* Dense encoding. */\n#define HLL_SPARSE 1 /* Sparse encoding. */\n#define HLL_RAW 255  /* Only used internally, never exposed. */\n#define HLL_MAX_ENCODING 1\n\n/* =========================== Low level bit macros ========================= */\n\n/* Macros to access the dense representation.\n *\n * We need to get and set 6 bit counters in an array of 8 bit bytes.\n * We use macros to make sure the code is inlined since speed is critical\n * especially in order to compute the approximated cardinality in\n * HLLCOUNT where we need to access all the registers at once.\n * For the same reason we also want to avoid conditionals in this code path.\n *\n * +--------+--------+--------+------//\n * |11000000|22221111|33333322|55444444\n * +--------+--------+--------+------//\n *\n * Note: in the above representation the most significant bit (MSB)\n * of every byte is on the left. We start using bits from the LSB to MSB,\n * and so forth passing to the next byte.\n *\n * Example, we want to access to counter at pos = 1 (\"111111\" in the\n * illustration above).\n *\n * The index of the first byte b0 containing our data is:\n *\n *  b0 = 6 * pos / 8 = 0\n *\n *   +--------+\n *   |11000000|  <- Our byte at b0\n *   +--------+\n *\n * The position of the first bit (counting from the LSB = 0) in the byte\n * is given by:\n *\n *  fb = 6 * pos % 8 -> 6\n *\n * Right shift b0 of 'fb' bits.\n *\n *   +--------+\n *   |11000000|  <- Initial value of b0\n *   |00000011|  <- After right shift of 6 pos.\n *   +--------+\n *\n * Left shift b1 of bits 8-fb bits (2 bits)\n *\n *   +--------+\n *   |22221111|  <- Initial value of b1\n *   |22111100|  <- After left shift of 2 bits.\n *   +--------+\n *\n * OR the two bits, and finally AND with 111111 (63 in decimal) to\n * clean the higher order bits we are not interested in:\n *\n *   +--------+\n *   |00000011|  <- b0 right shifted\n *   |22111100|  <- b1 left shifted\n *   |22111111|  <- b0 OR b1\n *   |  111111|  <- (b0 OR b1) AND 63, our value.\n *   +--------+\n *\n * We can try with a different example, like pos = 0. In this case\n * the 6-bit counter is actually contained in a single byte.\n *\n *  b0 = 6 * pos / 8 = 0\n *\n *   +--------+\n *   |11000000|  <- Our byte at b0\n *   +--------+\n *\n *  fb = 6 * pos % 8 = 0\n *\n *  So we right shift of 0 bits (no shift in practice) and\n *  left shift the next byte of 8 bits, even if we don't use it,\n *  but this has the effect of clearing the bits so the result\n *  will not be affected after the OR.\n *\n * -------------------------------------------------------------------------\n *\n * Setting the register is a bit more complex, let's assume that 'val'\n * is the value we want to set, already in the right range.\n *\n * We need two steps, in one we need to clear the bits, and in the other\n * we need to bitwise-OR the new bits.\n *\n * Let's try with 'pos' = 1, so our first byte at 'b' is 0,\n *\n * \"fb\" is 6 in this case.\n *\n *   +--------+\n *   |11000000|  <- Our byte at b0\n *   +--------+\n *\n * To create an AND-mask to clear the bits about this position, we just\n * initialize the mask with the value 63, left shift it of \"fs\" bits,\n * and finally invert the result.\n *\n *   +--------+\n *   |00111111|  <- \"mask\" starts at 63\n *   |11000000|  <- \"mask\" after left shift of \"ls\" bits.\n *   |00111111|  <- \"mask\" after invert.\n *   +--------+\n *\n * Now we can bitwise-AND the byte at \"b\" with the mask, and bitwise-OR\n * it with \"val\" left-shifted of \"ls\" bits to set the new bits.\n *\n * Now let's focus on the next byte b1:\n *\n *   +--------+\n *   |22221111|  <- Initial value of b1\n *   +--------+\n *\n * To build the AND mask we start again with the 63 value, right shift\n * it by 8-fb bits, and invert it.\n *\n *   +--------+\n *   |00111111|  <- \"mask\" set at 2&6-1\n *   |00001111|  <- \"mask\" after the right shift by 8-fb = 2 bits\n *   |11110000|  <- \"mask\" after bitwise not.\n *   +--------+\n *\n * Now we can mask it with b+1 to clear the old bits, and bitwise-OR\n * with \"val\" left-shifted by \"rs\" bits to set the new value.\n */\n\n/* Note: if we access the last counter, we will also access the b+1 byte\n * that is out of the array, but sds strings always have an implicit null\n * term, so the byte exists, and we can skip the conditional (or the need\n * to allocate 1 byte more explicitly). */\n\n/* Store the value of the register at position 'regnum' into variable 'target'.\n * 'p' is an array of unsigned bytes. */\n#define HLL_DENSE_GET_REGISTER(target, p, regnum)             \\\n  do {                                                        \\\n    uint8_t* _p = (uint8_t*)p;                                \\\n    unsigned long _byte = regnum * HLL_BITS / 8;              \\\n    unsigned long _fb = regnum * HLL_BITS & 7;                \\\n    unsigned long _fb8 = 8 - _fb;                             \\\n    unsigned long b0 = _p[_byte];                             \\\n    unsigned long b1 = _p[_byte + 1];                         \\\n    target = ((b0 >> _fb) | (b1 << _fb8)) & HLL_REGISTER_MAX; \\\n  } while (0)\n\n/* Set the value of the register at position 'regnum' to 'val'.\n * 'p' is an array of unsigned bytes. */\n#define HLL_DENSE_SET_REGISTER(p, regnum, val)    \\\n  do {                                            \\\n    uint8_t* _p = (uint8_t*)p;                    \\\n    unsigned long _byte = (regnum)*HLL_BITS / 8;  \\\n    unsigned long _fb = (regnum)*HLL_BITS & 7;    \\\n    unsigned long _fb8 = 8 - _fb;                 \\\n    unsigned long _v = (val);                     \\\n    _p[_byte] &= ~(HLL_REGISTER_MAX << _fb);      \\\n    _p[_byte] |= _v << _fb;                       \\\n    _p[_byte + 1] &= ~(HLL_REGISTER_MAX >> _fb8); \\\n    _p[_byte + 1] |= _v >> _fb8;                  \\\n  } while (0)\n\n/* Macros to access the sparse representation.\n * The macros parameter is expected to be an uint8_t pointer. */\n#define HLL_SPARSE_XZERO_BIT 0x40                    /* 01xxxxxx */\n#define HLL_SPARSE_VAL_BIT 0x80                      /* 1vvvvvxx */\n#define HLL_SPARSE_IS_ZERO(p) (((*(p)) & 0xc0) == 0) /* 00xxxxxx */\n#define HLL_SPARSE_IS_XZERO(p) (((*(p)) & 0xc0) == HLL_SPARSE_XZERO_BIT)\n#define HLL_SPARSE_IS_VAL(p) ((*(p)) & HLL_SPARSE_VAL_BIT)\n#define HLL_SPARSE_ZERO_LEN(p) (((*(p)) & 0x3f) + 1)\n#define HLL_SPARSE_XZERO_LEN(p) (((((*(p)) & 0x3f) << 8) | (*((p) + 1))) + 1)\n#define HLL_SPARSE_VAL_VALUE(p) ((((*(p)) >> 2) & 0x1f) + 1)\n#define HLL_SPARSE_VAL_LEN(p) (((*(p)) & 0x3) + 1)\n#define HLL_SPARSE_VAL_MAX_VALUE 32\n#define HLL_SPARSE_VAL_MAX_LEN 4\n#define HLL_SPARSE_ZERO_MAX_LEN 64\n#define HLL_SPARSE_XZERO_MAX_LEN 16384\n#define HLL_SPARSE_VAL_SET(p, val, len)                       \\\n  do {                                                        \\\n    *(p) = (((val)-1) << 2 | ((len)-1)) | HLL_SPARSE_VAL_BIT; \\\n  } while (0)\n#define HLL_SPARSE_ZERO_SET(p, len) \\\n  do {                              \\\n    *(p) = (len)-1;                 \\\n  } while (0)\n#define HLL_SPARSE_XZERO_SET(p, len)         \\\n  do {                                       \\\n    int _l = (len)-1;                        \\\n    *(p) = (_l >> 8) | HLL_SPARSE_XZERO_BIT; \\\n    *((p) + 1) = (_l & 0xff);                \\\n  } while (0)\n#define HLL_ALPHA_INF 0.721347520444481703680 /* constant for 0.5/ln(2) */\n\n/* ========================= HyperLogLog algorithm  ========================= */\n\n/* Our hash function is MurmurHash2, 64 bit version.\n * It was modified for Redis in order to provide the same result in\n * big and little endian archs (endian neutral). */\nuint64_t MurmurHash64A(const void* key, int len, unsigned int seed) {\n  const uint64_t m = 0xc6a4a7935bd1e995;\n  const int r = 47;\n  uint64_t h = seed ^ (len * m);\n  const uint8_t* data = (const uint8_t*)key;\n  const uint8_t* end = data + (len - (len & 7));\n\n  while (data != end) {\n    uint64_t k;\n\n#if (BYTE_ORDER == LITTLE_ENDIAN)\n#ifdef USE_ALIGNED_ACCESS\n    memcpy(&k, data, sizeof(uint64_t));\n#else\n    k = *((uint64_t*)data);\n#endif\n#else\n    k = (uint64_t)data[0];\n    k |= (uint64_t)data[1] << 8;\n    k |= (uint64_t)data[2] << 16;\n    k |= (uint64_t)data[3] << 24;\n    k |= (uint64_t)data[4] << 32;\n    k |= (uint64_t)data[5] << 40;\n    k |= (uint64_t)data[6] << 48;\n    k |= (uint64_t)data[7] << 56;\n#endif\n\n    k *= m;\n    k ^= k >> r;\n    k *= m;\n    h ^= k;\n    h *= m;\n    data += 8;\n  }\n\n  switch (len & 7) {\n    case 7:\n      h ^= (uint64_t)data[6] << 48; /* fall-thru */\n    case 6:\n      h ^= (uint64_t)data[5] << 40; /* fall-thru */\n    case 5:\n      h ^= (uint64_t)data[4] << 32; /* fall-thru */\n    case 4:\n      h ^= (uint64_t)data[3] << 24; /* fall-thru */\n    case 3:\n      h ^= (uint64_t)data[2] << 16; /* fall-thru */\n    case 2:\n      h ^= (uint64_t)data[1] << 8; /* fall-thru */\n    case 1:\n      h ^= (uint64_t)data[0];\n      h *= m; /* fall-thru */\n  };\n\n  h ^= h >> r;\n  h *= m;\n  h ^= h >> r;\n  return h;\n}\n\n/* Given a string element to add to the HyperLogLog, returns the length\n * of the pattern 000..1 of the element hash. As a side effect 'regp' is\n * set to the register index this element hashes to. */\nint hllPatLen(unsigned char* ele, size_t elesize, long* regp) {\n  uint64_t hash, bit, index;\n  int count;\n\n  /* Count the number of zeroes starting from bit HLL_REGISTERS\n   * (that is a power of two corresponding to the first bit we don't use\n   * as index). The max run can be 64-P+1 = Q+1 bits.\n   *\n   * Note that the final \"1\" ending the sequence of zeroes must be\n   * included in the count, so if we find \"001\" the count is 3, and\n   * the smallest count possible is no zeroes at all, just a 1 bit\n   * at the first position, that is a count of 1.\n   *\n   * This may sound like inefficient, but actually in the average case\n   * there are high probabilities to find a 1 after a few iterations. */\n  hash = MurmurHash64A(ele, elesize, 0xadc83b19ULL);\n  index = hash & HLL_P_MASK;      /* Register index. */\n  hash >>= HLL_P;                 /* Remove bits used to address the register. */\n  hash |= ((uint64_t)1 << HLL_Q); /* Make sure the loop terminates\n                                     and count will be <= Q+1. */\n  bit = 1;\n  count = 1; /* Initialized to 1 since we count the \"00000...1\" pattern. */\n  while ((hash & bit) == 0) {\n    count++;\n    bit <<= 1;\n  }\n  *regp = (int)index;\n  return count;\n}\n\n/* ================== Dense representation implementation  ================== */\n\n/* Low level function to set the dense HLL register at 'index' to the\n * specified value if the current value is smaller than 'count'.\n *\n * 'registers' is expected to have room for HLL_REGISTERS plus an\n * additional byte on the right. This requirement is met by sds strings\n * automatically since they are implicitly null terminated.\n *\n * The function always succeed, however if as a result of the operation\n * the approximated cardinality changed, 1 is returned. Otherwise 0\n * is returned. */\nint hllDenseSet(uint8_t* registers, long index, uint8_t count) {\n  uint8_t oldcount;\n\n  HLL_DENSE_GET_REGISTER(oldcount, registers, index);\n  if (count > oldcount) {\n    HLL_DENSE_SET_REGISTER(registers, index, count);\n    return 1;\n  } else {\n    return 0;\n  }\n}\n\n/* \"Add\" the element in the dense hyperloglog data structure.\n * Actually nothing is added, but the max 0 pattern counter of the subset\n * the element belongs to is incremented if needed.\n *\n * This is just a wrapper to hllDenseSet(), performing the hashing of the\n * element in order to retrieve the index and zero-run count. */\nint hllDenseAdd(uint8_t* registers, unsigned char* ele, size_t elesize) {\n  long index;\n  uint8_t count = hllPatLen(ele, elesize, &index);\n  /* Update the register if this element produced a longer run of zeroes. */\n  return hllDenseSet(registers, index, count);\n}\n\n/* Compute the register histogram in the dense representation. */\nvoid hllDenseRegHisto(uint8_t* registers, int* reghisto) {\n  int j;\n\n  /* Redis default is to use 16384 registers 6 bits each. The code works\n   * with other values by modifying the defines, but for our target value\n   * we take a faster path with unrolled loops. */\n  if (HLL_REGISTERS == 16384 && HLL_BITS == 6) {\n    uint8_t* r = registers;\n    unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;\n    for (j = 0; j < 1024; j++) {\n      /* Handle 16 registers per iteration. */\n      r0 = r[0] & 63;\n      r1 = (r[0] >> 6 | r[1] << 2) & 63;\n      r2 = (r[1] >> 4 | r[2] << 4) & 63;\n      r3 = (r[2] >> 2) & 63;\n      r4 = r[3] & 63;\n      r5 = (r[3] >> 6 | r[4] << 2) & 63;\n      r6 = (r[4] >> 4 | r[5] << 4) & 63;\n      r7 = (r[5] >> 2) & 63;\n      r8 = r[6] & 63;\n      r9 = (r[6] >> 6 | r[7] << 2) & 63;\n      r10 = (r[7] >> 4 | r[8] << 4) & 63;\n      r11 = (r[8] >> 2) & 63;\n      r12 = r[9] & 63;\n      r13 = (r[9] >> 6 | r[10] << 2) & 63;\n      r14 = (r[10] >> 4 | r[11] << 4) & 63;\n      r15 = (r[11] >> 2) & 63;\n\n      reghisto[r0]++;\n      reghisto[r1]++;\n      reghisto[r2]++;\n      reghisto[r3]++;\n      reghisto[r4]++;\n      reghisto[r5]++;\n      reghisto[r6]++;\n      reghisto[r7]++;\n      reghisto[r8]++;\n      reghisto[r9]++;\n      reghisto[r10]++;\n      reghisto[r11]++;\n      reghisto[r12]++;\n      reghisto[r13]++;\n      reghisto[r14]++;\n      reghisto[r15]++;\n\n      r += 12;\n    }\n  } else {\n    for (j = 0; j < HLL_REGISTERS; j++) {\n      unsigned long reg;\n      HLL_DENSE_GET_REGISTER(reg, registers, j);\n      reghisto[reg]++;\n    }\n  }\n}\n\n/* ================== Sparse representation implementation  ================= */\n\n\n/* Convert the HLL with sparse representation given as input in its dense\n * representation. Both representations are represented by SDS strings, and\n * the input representation is freed as a side effect.\n *\n * The function returns C_OK if the sparse representation was valid,\n * otherwise C_ERR is returned if the representation was corrupted. */\nint hllSparseToDense(sds* hll_ptr) {\n    sds sparse = *hll_ptr, dense;\n    struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse;\n    int idx = 0, runlen, regval;\n    uint8_t *p = (uint8_t*)sparse, *end = p+sdslen(sparse);\n\n    /* If the representation is already the right one return ASAP. */\n    hdr = (struct hllhdr*) sparse;\n    if (hdr->encoding == HLL_DENSE) return C_OK;\n\n    /* Create a string of the right size filled with zero bytes.\n     * Note that the cached cardinality is set to 0 as a side effect\n     * that is exactly the cardinality of an empty HLL. */\n    dense = sdsnewlen(NULL,HLL_DENSE_SIZE);\n    hdr = (struct hllhdr*) dense;\n    *hdr = *oldhdr; /* This will copy the magic and cached cardinality. */\n    hdr->encoding = HLL_DENSE;\n\n    /* Now read the sparse representation and set non-zero registers\n     * accordingly. */\n    p += HLL_HDR_SIZE;\n    while(p < end) {\n        if (HLL_SPARSE_IS_ZERO(p)) {\n            runlen = HLL_SPARSE_ZERO_LEN(p);\n            idx += runlen;\n            p++;\n        } else if (HLL_SPARSE_IS_XZERO(p)) {\n            runlen = HLL_SPARSE_XZERO_LEN(p);\n            idx += runlen;\n            p += 2;\n        } else {\n            runlen = HLL_SPARSE_VAL_LEN(p);\n            regval = HLL_SPARSE_VAL_VALUE(p);\n            if ((runlen + idx) > HLL_REGISTERS) break; /* Overflow. */\n            while(runlen--) {\n                HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval);\n                idx++;\n            }\n            p++;\n        }\n    }\n\n    /* If the sparse representation was valid, we expect to find idx\n     * set to HLL_REGISTERS. */\n    if (idx != HLL_REGISTERS) {\n        sdsfree(dense);\n        return C_ERR;\n    }\n\n    /* Free the old representation and set the new one. */\n    sdsfree(*hll_ptr);\n    *hll_ptr = dense;\n    return C_OK;\n}\n\n/* Low level function to set the sparse HLL register at 'index' to the\n * specified value if the current value is smaller than 'count'.\n *\n * The object 'hll' is the SDS object holding the HLL. The function requires\n * a reference to the object in order to be able to enlarge the string if\n * needed.\n *\n * On success, the function returns 1 if the cardinality changed, or 0\n * if the register for this element was not updated.\n * On error (if the representation is invalid) -1 is returned.\n *\n * As a side effect the function may promote the HLL representation from\n * sparse to dense: this happens when a register requires to be set to a value\n * not representable with the sparse representation, or when the resulting\n * size would be greater than HLL_SPARSE_MAX_BYTES. */\nint hllSparseSet(sds* hll_ptr, long index, uint8_t count, int* promoted) {\n    struct hllhdr *hdr;\n    uint8_t oldcount, *sparse, *end, *p, *prev, *next;\n    long first, span;\n    long is_zero = 0, is_xzero = 0, is_val = 0, runlen = 0;\n\n    /* If the count is too big to be representable by the sparse representation\n     * switch to dense representation. */\n    if (count > HLL_SPARSE_VAL_MAX_VALUE) goto promote;\n\n    /* When updating a sparse representation, sometimes we may need to enlarge the\n     * buffer for up to 3 bytes in the worst case (XZERO split into XZERO-VAL-XZERO),\n     * and the following code does the enlarge job.\n     * Actually, we use a greedy strategy, enlarge more than 3 bytes to avoid the need\n     * for future reallocates on incremental growth. But we do not allocate more than\n     * 'HLL_SPARSE_MAX_BYTES' bytes for the sparse representation.\n     * If the available size of hyperloglog sds string is not enough for the increment\n     * we need, we promote the hypreloglog to dense representation in 'step 3'.\n     */\n    sds hll = *hll_ptr;\n    if (sdsalloc(hll) < HLL_SPARSE_MAX_BYTES && sdsavail(hll) < 3) {\n        size_t newlen = sdslen(hll) + 3;\n        newlen += min(newlen, 300); /* Greediness: double 'newlen' if it is smaller than 300, or add 300 to it when it exceeds 300 */\n        if (newlen > HLL_SPARSE_MAX_BYTES)\n            newlen = HLL_SPARSE_MAX_BYTES;\n        *hll_ptr = sdsResize(hll, newlen);\n        hll = *hll_ptr;\n    }\n\n    /* Step 1: we need to locate the opcode we need to modify to check\n     * if a value update is actually needed. */\n    sparse = p = ((uint8_t*)hll) + HLL_HDR_SIZE;\n    end = p + sdslen(hll) - HLL_HDR_SIZE;\n\n    first = 0;\n    prev = NULL; /* Points to previous opcode at the end of the loop. */\n    next = NULL; /* Points to the next opcode at the end of the loop. */\n    span = 0;\n    while(p < end) {\n        long oplen;\n\n        /* Set span to the number of registers covered by this opcode.\n         *\n         * This is the most performance critical loop of the sparse\n         * representation. Sorting the conditionals from the most to the\n         * least frequent opcode in many-bytes sparse HLLs is faster. */\n        oplen = 1;\n        if (HLL_SPARSE_IS_ZERO(p)) {\n            span = HLL_SPARSE_ZERO_LEN(p);\n        } else if (HLL_SPARSE_IS_VAL(p)) {\n            span = HLL_SPARSE_VAL_LEN(p);\n        } else { /* XZERO. */\n            span = HLL_SPARSE_XZERO_LEN(p);\n            oplen = 2;\n        }\n        /* Break if this opcode covers the register as 'index'. */\n        if (index <= first+span-1) break;\n        prev = p;\n        p += oplen;\n        first += span;\n    }\n    if (span == 0 || p >= end) return -1; /* Invalid format. */\n\n    next = HLL_SPARSE_IS_XZERO(p) ? p+2 : p+1;\n    if (next >= end) next = NULL;\n\n    /* Cache current opcode type to avoid using the macro again and\n     * again for something that will not change.\n     * Also cache the run-length of the opcode. */\n    if (HLL_SPARSE_IS_ZERO(p)) {\n        is_zero = 1;\n        runlen = HLL_SPARSE_ZERO_LEN(p);\n    } else if (HLL_SPARSE_IS_XZERO(p)) {\n        is_xzero = 1;\n        runlen = HLL_SPARSE_XZERO_LEN(p);\n    } else {\n        is_val = 1;\n        runlen = HLL_SPARSE_VAL_LEN(p);\n    }\n\n    /* Step 2: After the loop:\n     *\n     * 'first' stores to the index of the first register covered\n     *  by the current opcode, which is pointed by 'p'.\n     *\n     * 'next' ad 'prev' store respectively the next and previous opcode,\n     *  or NULL if the opcode at 'p' is respectively the last or first.\n     *\n     * 'span' is set to the number of registers covered by the current\n     *  opcode.\n     *\n     * There are different cases in order to update the data structure\n     * in place without generating it from scratch:\n     *\n     * A) If it is a VAL opcode already set to a value >= our 'count'\n     *    no update is needed, regardless of the VAL run-length field.\n     *    In this case PFADD returns 0 since no changes are performed.\n     *\n     * B) If it is a VAL opcode with len = 1 (representing only our\n     *    register) and the value is less than 'count', we just update it\n     *    since this is a trivial case. */\n    if (is_val) {\n        oldcount = HLL_SPARSE_VAL_VALUE(p);\n        /* Case A. */\n        if (oldcount >= count) return 0;\n\n        /* Case B. */\n        if (runlen == 1) {\n            HLL_SPARSE_VAL_SET(p,count,1);\n            goto updated;\n        }\n    }\n\n    /* C) Another trivial to handle case is a ZERO opcode with a len of 1.\n     * We can just replace it with a VAL opcode with our value and len of 1. */\n    if (is_zero && runlen == 1) {\n        HLL_SPARSE_VAL_SET(p,count,1);\n        goto updated;\n    }\n\n    /* D) General case.\n     *\n     * The other cases are more complex: our register requires to be updated\n     * and is either currently represented by a VAL opcode with len > 1,\n     * by a ZERO opcode with len > 1, or by an XZERO opcode.\n     *\n     * In those cases the original opcode must be split into multiple\n     * opcodes. The worst case is an XZERO split in the middle resulting into\n     * XZERO - VAL - XZERO, so the resulting sequence max length is\n     * 5 bytes.\n     *\n     * We perform the split writing the new sequence into the 'new' buffer\n     * with 'newlen' as length. Later the new sequence is inserted in place\n     * of the old one, possibly moving what is on the right a few bytes\n     * if the new sequence is longer than the older one. */\n    uint8_t seq[5], *n = seq;\n    int last = first+span-1; /* Last register covered by the sequence. */\n    int len;\n\n    if (is_zero || is_xzero) {\n        /* Handle splitting of ZERO / XZERO. */\n        if (index != first) {\n            len = index-first;\n            if (len > HLL_SPARSE_ZERO_MAX_LEN) {\n                HLL_SPARSE_XZERO_SET(n,len);\n                n += 2;\n            } else {\n                HLL_SPARSE_ZERO_SET(n,len);\n                n++;\n            }\n        }\n        HLL_SPARSE_VAL_SET(n,count,1);\n        n++;\n        if (index != last) {\n            len = last-index;\n            if (len > HLL_SPARSE_ZERO_MAX_LEN) {\n                HLL_SPARSE_XZERO_SET(n,len);\n                n += 2;\n            } else {\n                HLL_SPARSE_ZERO_SET(n,len);\n                n++;\n            }\n        }\n    } else {\n        /* Handle splitting of VAL. */\n        int curval = HLL_SPARSE_VAL_VALUE(p);\n\n        if (index != first) {\n            len = index-first;\n            HLL_SPARSE_VAL_SET(n,curval,len);\n            n++;\n        }\n        HLL_SPARSE_VAL_SET(n,count,1);\n        n++;\n        if (index != last) {\n            len = last-index;\n            HLL_SPARSE_VAL_SET(n,curval,len);\n            n++;\n        }\n    }\n\n    /* Step 3: substitute the new sequence with the old one.\n     *\n     * Note that we already allocated space on the sds string\n     * calling sdsResize(). */\n    int seqlen = n-seq;\n    int oldlen = is_xzero ? 2 : 1;\n    int deltalen = seqlen-oldlen;\n\n    if (deltalen > 0 &&\n        sdslen(hll) + deltalen > HLL_SPARSE_MAX_BYTES) goto promote;\n    serverAssert(sdslen(hll) + deltalen <= sdsalloc(hll));\n    if (deltalen && next) memmove(next+deltalen,next,end-next);\n    sdsIncrLen(hll,deltalen);\n    memcpy(p,seq,seqlen);\n    end += deltalen;\n\nupdated:\n    /* Step 4: Merge adjacent values if possible.\n     *\n     * The representation was updated, however the resulting representation\n     * may not be optimal: adjacent VAL opcodes can sometimes be merged into\n     * a single one. */\n    p = prev ? prev : sparse;\n    int scanlen = 5; /* Scan up to 5 upcodes starting from prev. */\n    while (p < end && scanlen--) {\n        if (HLL_SPARSE_IS_XZERO(p)) {\n            p += 2;\n            continue;\n        } else if (HLL_SPARSE_IS_ZERO(p)) {\n            p++;\n            continue;\n        }\n        /* We need two adjacent VAL opcodes to try a merge, having\n         * the same value, and a len that fits the VAL opcode max len. */\n        if (p+1 < end && HLL_SPARSE_IS_VAL(p+1)) {\n            int v1 = HLL_SPARSE_VAL_VALUE(p);\n            int v2 = HLL_SPARSE_VAL_VALUE(p+1);\n            if (v1 == v2) {\n                int len = HLL_SPARSE_VAL_LEN(p)+HLL_SPARSE_VAL_LEN(p+1);\n                if (len <= HLL_SPARSE_VAL_MAX_LEN) {\n                    HLL_SPARSE_VAL_SET(p+1,v1,len);\n                    memmove(p,p+1,end-p);\n                    sdsIncrLen(hll,-1);\n                    end--;\n                    /* After a merge we reiterate without incrementing 'p'\n                     * in order to try to merge the just merged value with\n                     * a value on its right. */\n                    continue;\n                }\n            }\n        }\n        p++;\n    }\n\n    /* Invalidate the cached cardinality. */\n    hdr = (struct hllhdr *)hll;\n    HLL_INVALIDATE_CACHE(hdr);\n    return 1;\n\npromote: /* Promote to dense representation. */\n    if (hllSparseToDense(&hll) == C_ERR) return -1; /* Corrupted HLL. */\n    *hll_ptr = hll;\n    hdr = (struct hllhdr *)hll;\n\n    /* We need to call hllDenseAdd() to perform the operation after the\n     * conversion. However the result must be 1, since if we need to\n     * convert from sparse to dense a register requires to be updated.\n     *\n     * Note that this in turn means that PFADD will make sure the command\n     * is propagated to slaves / AOF, so if there is a sparse -> dense\n     * conversion, it will be performed in all the slaves as well. */\n    int dense_retval = hllDenseSet(hdr->registers,index,count);\n    serverAssert(dense_retval == 1);\n    *promoted = 1;\n    return dense_retval;\n}\n\n/* \"Add\" the element in the sparse hyperloglog data structure.\n * Actually nothing is added, but the max 0 pattern counter of the subset\n * the element belongs to is incremented if needed.\n *\n * This function is actually a wrapper for hllSparseSet(), it only performs\n * the hashing of the element to obtain the index and zeros run length. */\nint hllSparseAdd(sds* hll_ptr, unsigned char *ele, size_t elesize, int* promoted) {\n    long index;\n    uint8_t count = hllPatLen(ele,elesize,&index);\n    /* Update the register if this element produced a longer run of zeroes. */\n    return hllSparseSet(hll_ptr,index,count, promoted);\n}\n/* Compute the register histogram in the sparse representation. */\nvoid hllSparseRegHisto(uint8_t* sparse, int sparselen, int* invalid, int* reghisto) {\n  int idx = 0, runlen, regval;\n  uint8_t *end = sparse + sparselen, *p = sparse;\n\n  while (p < end) {\n    if (HLL_SPARSE_IS_ZERO(p)) {\n      runlen = HLL_SPARSE_ZERO_LEN(p);\n      idx += runlen;\n      reghisto[0] += runlen;\n      p++;\n    } else if (HLL_SPARSE_IS_XZERO(p)) {\n      runlen = HLL_SPARSE_XZERO_LEN(p);\n      idx += runlen;\n      reghisto[0] += runlen;\n      p += 2;\n    } else {\n      runlen = HLL_SPARSE_VAL_LEN(p);\n      regval = HLL_SPARSE_VAL_VALUE(p);\n      idx += runlen;\n      reghisto[regval] += runlen;\n      p++;\n    }\n  }\n  if (idx != HLL_REGISTERS && invalid)\n    *invalid = 1;\n}\n\n/* ========================= HyperLogLog Count ==============================\n * This is the core of the algorithm where the approximated count is computed.\n * The function uses the lower level hllDenseRegHisto() and hllSparseRegHisto()\n * functions as helpers to compute histogram of register values part of the\n * computation, which is representation-specific, while all the rest is common. */\n\n/* Implements the register histogram calculation for uint8_t data type\n * which is only used internally as speedup for PFCOUNT with multiple keys. */\nvoid hllRawRegHisto(uint8_t* registers, int* reghisto) {\n  uint64_t* word = (uint64_t*)registers;\n  uint8_t* bytes;\n  int j;\n\n  for (j = 0; j < HLL_REGISTERS / 8; j++) {\n    if (*word == 0) {\n      reghisto[0] += 8;\n    } else {\n      bytes = (uint8_t*)word;\n      reghisto[bytes[0]]++;\n      reghisto[bytes[1]]++;\n      reghisto[bytes[2]]++;\n      reghisto[bytes[3]]++;\n      reghisto[bytes[4]]++;\n      reghisto[bytes[5]]++;\n      reghisto[bytes[6]]++;\n      reghisto[bytes[7]]++;\n    }\n    word++;\n  }\n}\n\n/* Helper function sigma as defined in\n * \"New cardinality estimation algorithms for HyperLogLog sketches\"\n * Otmar Ertl, arXiv:1702.01284 */\ndouble hllSigma(double x) {\n  if (x == 1.)\n    return INFINITY;\n  double zPrime;\n  double y = 1;\n  double z = x;\n  do {\n    x *= x;\n    zPrime = z;\n    z += x * y;\n    y += y;\n  } while (zPrime != z);\n  return z;\n}\n\n/* Helper function tau as defined in\n * \"New cardinality estimation algorithms for HyperLogLog sketches\"\n * Otmar Ertl, arXiv:1702.01284 */\ndouble hllTau(double x) {\n  if (x == 0. || x == 1.)\n    return 0.;\n  double zPrime;\n  double y = 1.0;\n  double z = 1 - x;\n  do {\n    x = sqrt(x);\n    zPrime = z;\n    y *= 0.5;\n    z -= pow(1 - x, 2) * y;\n  } while (zPrime != z);\n  return z / 3;\n}\n\n/* Return the approximated cardinality of the set based on the harmonic\n * mean of the registers values. 'hdr' points to the start of the SDS\n * representing the String object holding the HLL representation.\n *\n * If the sparse representation of the HLL object is not valid, the integer\n * pointed by 'invalid' is set to non-zero, otherwise it is left untouched.\n *\n * hllCount() supports a special internal-only encoding of HLL_RAW, that\n * is, hdr->registers will point to an uint8_t array of HLL_REGISTERS element.\n * This is useful in order to speedup PFCOUNT when called against multiple\n * keys (no need to work with 6-bit integers encoding). */\nuint64_t hllCount(struct hllhdr* hdr, int* invalid) {\n  double m = HLL_REGISTERS;\n  double E;\n  int j;\n  /* Note that reghisto size could be just HLL_Q+2, because HLL_Q+1 is\n   * the maximum frequency of the \"000...1\" sequence the hash function is\n   * able to return. However it is slow to check for sanity of the\n   * input: instead we history array at a safe size: overflows will\n   * just write data to wrong, but correctly allocated, places. */\n  int reghisto[64] = {0};\n\n  /* Compute register histogram */\n  if (hdr->encoding == HLL_DENSE) {\n    hllDenseRegHisto(hdr->registers, reghisto);\n  } else if (hdr->encoding == HLL_SPARSE) {\n    hllSparseRegHisto(hdr->registers, sdslen((sds)hdr) - HLL_HDR_SIZE, invalid, reghisto);\n  } else if (hdr->encoding == HLL_RAW) {\n    hllRawRegHisto(hdr->registers, reghisto);\n  } else {\n    serverPanic(\"Unknown HyperLogLog encoding in hllCount()\");\n  }\n\n  /* Estimate cardinality from register histogram. See:\n   * \"New cardinality estimation algorithms for HyperLogLog sketches\"\n   * Otmar Ertl, arXiv:1702.01284 */\n  double z = m * hllTau((m - reghisto[HLL_Q + 1]) / (double)m);\n  for (j = HLL_Q; j >= 1; --j) {\n    z += reghisto[j];\n    z *= 0.5;\n  }\n  z += m * hllSigma(reghisto[0] / (double)m);\n  E = llroundl(HLL_ALPHA_INF * m * m / z);\n\n  return (uint64_t)E;\n}\n\n#if 0\n/* Merge by computing MAX(registers[i],hll[i]) the HyperLogLog 'hll'\n * with an array of uint8_t HLL_REGISTERS registers pointed by 'max'.\n *\n * The hll object must be already validated via isHLLObjectOrReply()\n * or in some other way.\n *\n * If the HyperLogLog is sparse and is found to be invalid, C_ERR\n * is returned, otherwise the function always succeeds. */\nint hllMerge(uint8_t* max, robj* hll) {\n  struct hllhdr* hdr = hll->ptr;\n  int i;\n\n  if (hdr->encoding == HLL_DENSE) {\n    uint8_t val;\n\n    for (i = 0; i < HLL_REGISTERS; i++) {\n      HLL_DENSE_GET_REGISTER(val, hdr->registers, i);\n      if (val > max[i])\n        max[i] = val;\n    }\n  } else {\n    uint8_t *p = hll->ptr, *end = p + sdslen(hll->ptr);\n    long runlen, regval;\n\n    p += HLL_HDR_SIZE;\n    i = 0;\n    while (p < end) {\n      if (HLL_SPARSE_IS_ZERO(p)) {\n        runlen = HLL_SPARSE_ZERO_LEN(p);\n        i += runlen;\n        p++;\n      } else if (HLL_SPARSE_IS_XZERO(p)) {\n        runlen = HLL_SPARSE_XZERO_LEN(p);\n        i += runlen;\n        p += 2;\n      } else {\n        runlen = HLL_SPARSE_VAL_LEN(p);\n        regval = HLL_SPARSE_VAL_VALUE(p);\n        if ((runlen + i) > HLL_REGISTERS)\n          break; /* Overflow. */\n        while (runlen--) {\n          if (regval > max[i])\n            max[i] = regval;\n          i++;\n        }\n        p++;\n      }\n    }\n    if (i != HLL_REGISTERS)\n      return C_ERR;\n  }\n  return C_OK;\n}\n\n/* ========================== HyperLogLog commands ========================== */\nrobj* createHLLObject(void) {\n  robj* o;\n  struct hllhdr* hdr;\n  sds s;\n  uint8_t* p;\n  int sparselen =\n      HLL_HDR_SIZE +\n      (((HLL_REGISTERS + (HLL_SPARSE_XZERO_MAX_LEN - 1)) / HLL_SPARSE_XZERO_MAX_LEN) * 2);\n  int aux;\n\n  /* Populate the sparse representation with as many XZERO opcodes as\n   * needed to represent all the registers. */\n  aux = HLL_REGISTERS;\n  s = sdsnewlen(NULL, sparselen);\n  p = (uint8_t*)s + HLL_HDR_SIZE;\n  while (aux) {\n    int xzero = HLL_SPARSE_XZERO_MAX_LEN;\n    if (xzero > aux)\n      xzero = aux;\n    HLL_SPARSE_XZERO_SET(p, xzero);\n    p += 2;\n    aux -= xzero;\n  }\n  serverAssert((p - (uint8_t*)s) == sparselen);\n\n  /* Create the actual object. */\n  o = createObject(OBJ_STRING, s);\n  hdr = o->ptr;\n  memcpy(hdr->magic, \"HYLL\", 4);\n  hdr->encoding = HLL_SPARSE;\n  return o;\n}\n#endif\n\n/* ========================== Dragonfly custom functions ===================== */\n\nenum HllValidness isValidHLL(struct HllBufferPtr hll_buffer) {\n  struct hllhdr* hdr;\n\n  if (hll_buffer.size < sizeof(*hdr)) {\n    return HLL_INVALID;\n  }\n\n  hdr = (struct hllhdr*)hll_buffer.hll;\n\n  /* Magic should be \"HYLL\". */\n  if (hdr->magic[0] != 'H' || hdr->magic[1] != 'Y' || hdr->magic[2] != 'L' ||\n      hdr->magic[3] != 'L') {\n    return HLL_INVALID;\n  }\n\n  if (hdr->encoding > HLL_MAX_ENCODING) {\n    return HLL_INVALID;\n  }\n\n  switch (hdr->encoding) {\n    case HLL_DENSE:\n      /* Dense representation string length should match exactly. */\n      return (hll_buffer.size == HLL_DENSE_SIZE) ? HLL_VALID_DENSE : HLL_INVALID;\n    case HLL_SPARSE:\n      return HLL_VALID_SPARSE;\n    default:\n      return HLL_INVALID;\n  }\n}\n\nsize_t getDenseHllSize() {\n  return HLL_DENSE_SIZE;\n}\n\nsize_t getSparseHllInitSize() {\n  return HLL_HDR_SIZE + (((HLL_REGISTERS+(HLL_SPARSE_XZERO_MAX_LEN-1)) /\n                     HLL_SPARSE_XZERO_MAX_LEN)*2);\n}\n\nint initSparseHll(struct HllBufferPtr hll_ptr) {\n  if (hll_ptr.size != getSparseHllInitSize()) {\n    return C_ERR;\n  }\n\n  memset(hll_ptr.hll, 0, hll_ptr.size);\n\n  /* Populate the sparse representation with as many XZERO opcodes as\n    * needed to represent all the registers. */\n  int aux = HLL_REGISTERS;\n  uint8_t* p = (uint8_t*)hll_ptr.hll + HLL_HDR_SIZE;\n  while(aux) {\n      int xzero = HLL_SPARSE_XZERO_MAX_LEN;\n      if (xzero > aux) xzero = aux;\n      HLL_SPARSE_XZERO_SET(p,xzero);\n      p += 2;\n      aux -= xzero;\n  }\n\n  struct hllhdr* hdr = (struct hllhdr*)hll_ptr.hll;\n\n  memcpy(hdr->magic, \"HYLL\", 4);\n  hdr->encoding = HLL_SPARSE;\n  return C_OK;\n}\n\nint createDenseHll(struct HllBufferPtr hll_ptr) {\n  if (hll_ptr.size != getDenseHllSize()) {\n    return C_ERR;\n  }\n\n  memset(hll_ptr.hll, 0, hll_ptr.size);\n  struct hllhdr* hdr = (struct hllhdr*)hll_ptr.hll;\n  memcpy(hdr->magic, \"HYLL\", 4);\n  hdr->encoding = HLL_DENSE;\n  return C_OK;\n}\n\n/* This is a copied & modified version of hllSparseToDense() above that does not use robj */\nint convertSparseToDenseHll(struct HllBufferPtr in_hll, struct HllBufferPtr out_hll) {\n  struct hllhdr *hdr, *oldhdr = (struct hllhdr*)in_hll.hll;\n  int idx = 0, runlen, regval;\n  uint8_t *p = (uint8_t*)in_hll.hll, *end = p + in_hll.size;\n\n  if (oldhdr->encoding != HLL_SPARSE)\n    return C_ERR;\n  if (out_hll.size != getDenseHllSize())\n    return C_ERR;\n\n  /* Create a string of the right size filled with zero bytes.\n   * Note that the cached cardinality is set to 0 as a side effect\n   * that is exactly the cardinality of an empty HLL. */\n  hdr = (struct hllhdr*)out_hll.hll;\n  *hdr = *oldhdr; /* This will copy the magic and cached cardinality. */\n  hdr->encoding = HLL_DENSE;\n\n  /* Now read the sparse representation and set non-zero registers\n   * accordingly. */\n  p += HLL_HDR_SIZE;\n  while (p < end) {\n    if (HLL_SPARSE_IS_ZERO(p)) {\n      runlen = HLL_SPARSE_ZERO_LEN(p);\n      idx += runlen;\n      p++;\n    } else if (HLL_SPARSE_IS_XZERO(p)) {\n      runlen = HLL_SPARSE_XZERO_LEN(p);\n      idx += runlen;\n      p += 2;\n    } else {\n      runlen = HLL_SPARSE_VAL_LEN(p);\n      regval = HLL_SPARSE_VAL_VALUE(p);\n      if ((runlen + idx) > HLL_REGISTERS)\n        break; /* Overflow. */\n      while (runlen--) {\n        HLL_DENSE_SET_REGISTER(hdr->registers, idx, regval);\n        idx++;\n      }\n      p++;\n    }\n  }\n\n  /* If the sparse representation was valid, we expect to find idx\n   * set to HLL_REGISTERS. */\n  if (idx != HLL_REGISTERS) {\n    return C_ERR;\n  }\n\n  return C_OK;\n}\n\nint pfadd_sparse(sds* hll_ptr, const unsigned char* value,\n                 size_t size, int* promoted) {\n  struct hllhdr* hdr = (struct hllhdr*)(*hll_ptr);\n  int retval = hllSparseAdd(hll_ptr, (unsigned char*)value, size, promoted);\n  switch (retval) {\n    case 1:\n      HLL_INVALIDATE_CACHE(hdr);\n      return 1;\n    default:\n      return retval;\n  }\n}\n\nint pfadd_dense(struct HllBufferPtr hll_ptr, const unsigned char* value,\n                size_t size) {\n  if (isValidHLL(hll_ptr) != HLL_VALID_DENSE)\n    return C_ERR;\n\n  struct hllhdr* hdr = (struct hllhdr*)hll_ptr.hll;\n\n  /* Perform the low level ADD operation for every element. */\n  int retval = hllDenseAdd(hdr->registers, (unsigned char*)value, size);\n  switch (retval) {\n    case 1:\n      HLL_INVALIDATE_CACHE(hdr);\n      return 1;\n    default:\n      return retval;\n  }\n}\n\nint64_t pfcountSingle(struct HllBufferPtr hll_ptr) {\n  uint64_t card;\n\n  if (isValidHLL(hll_ptr) != HLL_VALID_DENSE)\n    return C_ERR;\n\n  /* Check if the cached cardinality is valid. */\n  struct hllhdr* hdr = (struct hllhdr*)hll_ptr.hll;\n  if (HLL_VALID_CACHE(hdr)) {\n    /* Just return the cached value. */\n    card = (uint64_t)hdr->card[0];\n    card |= (uint64_t)hdr->card[1] << 8;\n    card |= (uint64_t)hdr->card[2] << 16;\n    card |= (uint64_t)hdr->card[3] << 24;\n    card |= (uint64_t)hdr->card[4] << 32;\n    card |= (uint64_t)hdr->card[5] << 40;\n    card |= (uint64_t)hdr->card[6] << 48;\n    card |= (uint64_t)hdr->card[7] << 56;\n  } else {\n    int invalid = 0;\n    /* Recompute it and update the cached value. */\n    card = hllCount(hdr, &invalid);\n    if (invalid) {\n      return -1;\n    }\n    hdr->card[0] = card & 0xff;\n    hdr->card[1] = (card >> 8) & 0xff;\n    hdr->card[2] = (card >> 16) & 0xff;\n    hdr->card[3] = (card >> 24) & 0xff;\n    hdr->card[4] = (card >> 32) & 0xff;\n    hdr->card[5] = (card >> 40) & 0xff;\n    hdr->card[6] = (card >> 48) & 0xff;\n    hdr->card[7] = (card >> 56) & 0xff;\n  }\n  return card;\n}\n\n/* Merge dense-encoded HLL */\nstatic void hllMergeDense(uint8_t* registers, struct HllBufferPtr to) {\n  uint8_t val;\n  struct hllhdr* hll_hdr = (struct hllhdr*)to.hll;\n\n  for (int i = 0; i < HLL_REGISTERS; i++) {\n    HLL_DENSE_GET_REGISTER(val, hll_hdr->registers, i);\n    if (val > registers[i]) {\n      registers[i] = val;\n    }\n  }\n}\n\nint64_t pfcountMulti(struct HllBufferPtr* hlls, size_t hlls_count) {\n  struct hllhdr* hdr;\n  uint8_t max[HLL_HDR_SIZE + HLL_REGISTERS];\n\n  /* Compute an HLL with M[i] = MAX(M[i]_j). */\n  memset(max, 0, sizeof(max));\n  hdr = (struct hllhdr*)max;\n  hdr->encoding = HLL_RAW; /* Special internal-only encoding. */\n  for (size_t j = 0; j < hlls_count; j++) {\n    /* Check type and size. */\n    struct HllBufferPtr hll = hlls[j];\n    if (isValidHLL(hll) != HLL_VALID_DENSE) {\n      return C_ERR;\n    }\n\n    hllMergeDense(max, hll);\n  }\n\n  /* Compute cardinality of the resulting set. */\n  return hllCount(hdr, NULL);\n}\n\nint pfmerge(struct HllBufferPtr* in_hlls, size_t in_hlls_count, struct HllBufferPtr out_hll) {\n  if (isValidHLL(out_hll) != HLL_VALID_DENSE) {\n    return C_ERR;\n  }\n\n  uint8_t max[HLL_REGISTERS];\n\n  /* Compute an HLL with M[i] = MAX(M[i]_j).\n   * We store the maximum into the max array of registers. We'll write\n   * it to the target variable later. */\n  memset(max, 0, sizeof(max));\n\n  for (size_t j = 0; j < in_hlls_count; j++) {\n    struct HllBufferPtr hll = in_hlls[j];\n    if (isValidHLL(hll) != HLL_VALID_DENSE) {\n      return C_ERR;\n    }\n\n    hllMergeDense(max, hll);\n  }\n\n  struct hllhdr* hdr = (struct hllhdr*)out_hll.hll;\n  for (size_t j = 0; j < HLL_REGISTERS; j++) {\n    hllDenseSet(hdr->registers, j, max[j]);\n  }\n  HLL_INVALIDATE_CACHE(hdr);\n\n  return C_OK;\n}\n"
  },
  {
    "path": "src/redis/hyperloglog.h",
    "content": "#ifndef __REDIS_HYPERLOGLOG_H\n#define __REDIS_HYPERLOGLOG_H\n\n#include <stddef.h>\n#include <stdint.h>\n\n#include \"redis/sds.h\"\n\n/* This version of hyperloglog, forked from Redis, only supports using the dense format of HLL.\n * The reason is that it is of a fixed size, which makes it easier to integrate into Dragonfly.\n * We do support converting of existing sprase-encoded HLL into dense-encoded, which can be useful\n * for replication, serialization, etc. */\n\nenum HllValidness {\n  HLL_INVALID,\n  HLL_VALID_SPARSE,\n  HLL_VALID_DENSE,\n};\n\n/* Convenience struct for pointing to an Hll buffer along with its size */\nstruct HllBufferPtr {\n  unsigned char* hll;\n  size_t size;\n};\n\nenum HllValidness isValidHLL(struct HllBufferPtr hll_ptr);\n\nsize_t getDenseHllSize();\nsize_t getSparseHllInitSize();\n\n\nint initSparseHll(struct HllBufferPtr hll_ptr);\n/* Writes into `hll_ptr` an empty dense-encoded HLL.\n * Returns 0 upon success, or a negative number when `hll_ptr.size` is different from\n * getDenseHllSize() */\nint createDenseHll(struct HllBufferPtr hll_ptr);\n\n/* Converts an existing sparse-encoded HLL pointed by `in_hll`, and writes the converted result into\n * `out_hll`.\n * Returns 0 upon success, otherwise a negative number.\n * Failures can occur when `out_hll.size` is different from getDenseHllSize() or when input is not a\n * valid sparse-encoded HLL. */\nint convertSparseToDenseHll(struct HllBufferPtr in_hll, struct HllBufferPtr out_hll);\n\n/* Adds `value` of size `size`, to `hll_ptr`.\n * If `obj` does not have an underlying type of HLL a negative number is returned. */\nint pfadd_sparse(sds* hll_ptr, const unsigned char* value, size_t size, int* promoted);\nint pfadd_dense(struct HllBufferPtr hll_ptr, const unsigned char* value, size_t size);\n\n/* Returns the estimated count of elements for `hll_ptr`.\n * If `hll_ptr` is not a valid dense-encoded HLL, a negative number is returned. */\nint64_t pfcountSingle(struct HllBufferPtr hll_ptr);\n\n/* Returns the estimated count for all HLLs in `hlls` array of size `hlls_count`.\n * All `hlls` elements must be valid, dense-encoded HLLs. */\nint64_t pfcountMulti(struct HllBufferPtr* hlls, size_t hlls_count);\n\n/* Merges array of HLLs pointed to be `in_hlls` of size `in_hlls_count` into `out_hll`.\n * Returns 0 upon success, otherwise a negative number.\n * Failure can occur when any of `in_hlls` or `out_hll` is not a dense-encoded HLL.\n * `out_hll` *can* be one of the elements in `in_hlls`. */\nint pfmerge(struct HllBufferPtr* in_hlls, size_t in_hlls_count, struct HllBufferPtr out_hll);\n\n#endif\n"
  },
  {
    "path": "src/redis/intset.c",
    "content": "/*\n * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>\n * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <assert.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include \"intset.h\"\n#include \"zmalloc.h\"\n#include \"endianconv.h\"\n\n/* Note that these encodings are ordered, so:\n * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */\n#define INTSET_ENC_INT16 (sizeof(int16_t))\n#define INTSET_ENC_INT32 (sizeof(int32_t))\n#define INTSET_ENC_INT64 (sizeof(int64_t))\n\n/* Return the required encoding for the provided value. */\nstatic uint8_t _intsetValueEncoding(int64_t v) {\n    if (v < INT32_MIN || v > INT32_MAX)\n        return INTSET_ENC_INT64;\n    else if (v < INT16_MIN || v > INT16_MAX)\n        return INTSET_ENC_INT32;\n    else\n        return INTSET_ENC_INT16;\n}\n\n/* Return the value at pos, given an encoding. */\nstatic int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {\n    int64_t v64;\n    int32_t v32;\n    int16_t v16;\n\n    if (enc == INTSET_ENC_INT64) {\n        memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));\n        memrev64ifbe(&v64);\n        return v64;\n    } else if (enc == INTSET_ENC_INT32) {\n        memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));\n        memrev32ifbe(&v32);\n        return v32;\n    } else {\n        memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));\n        memrev16ifbe(&v16);\n        return v16;\n    }\n}\n\n/* Return the value at pos, using the configured encoding. */\nstatic int64_t _intsetGet(intset *is, int pos) {\n    return _intsetGetEncoded(is,pos,intrev32ifbe(is->encoding));\n}\n\n/* Set the value at pos, using the configured encoding. */\nstatic void _intsetSet(intset *is, int pos, int64_t value) {\n    uint32_t encoding = intrev32ifbe(is->encoding);\n\n    if (encoding == INTSET_ENC_INT64) {\n        ((int64_t*)is->contents)[pos] = value;\n        memrev64ifbe(((int64_t*)is->contents)+pos);\n    } else if (encoding == INTSET_ENC_INT32) {\n        ((int32_t*)is->contents)[pos] = value;\n        memrev32ifbe(((int32_t*)is->contents)+pos);\n    } else {\n        ((int16_t*)is->contents)[pos] = value;\n        memrev16ifbe(((int16_t*)is->contents)+pos);\n    }\n}\n\n/* Create an empty intset. */\nintset *intsetNew(void) {\n    intset *is = zmalloc(sizeof(intset));\n    is->encoding = intrev32ifbe(INTSET_ENC_INT16);\n    is->length = 0;\n    return is;\n}\n\n/* Resize the intset */\nstatic intset *intsetResize(intset *is, uint32_t len) {\n    uint64_t size = (uint64_t)len*intrev32ifbe(is->encoding);\n    assert(size <= SIZE_MAX - sizeof(intset));\n    is = zrealloc(is,sizeof(intset)+size);\n    return is;\n}\n\n/* Search for the position of \"value\". Return 1 when the value was found and\n * sets \"pos\" to the position of the value within the intset. Return 0 when\n * the value is not present in the intset and sets \"pos\" to the position\n * where \"value\" can be inserted. */\nstatic uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {\n    int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;\n    int64_t cur = -1;\n\n    /* The value can never be found when the set is empty */\n    if (intrev32ifbe(is->length) == 0) {\n        if (pos) *pos = 0;\n        return 0;\n    } else {\n        /* Check for the case where we know we cannot find the value,\n         * but do know the insert position. */\n        if (value > _intsetGet(is,max)) {\n            if (pos) *pos = intrev32ifbe(is->length);\n            return 0;\n        } else if (value < _intsetGet(is,0)) {\n            if (pos) *pos = 0;\n            return 0;\n        }\n    }\n\n    while(max >= min) {\n        mid = ((unsigned int)min + (unsigned int)max) >> 1;\n        cur = _intsetGet(is,mid);\n        if (value > cur) {\n            min = mid+1;\n        } else if (value < cur) {\n            max = mid-1;\n        } else {\n            break;\n        }\n    }\n\n    if (value == cur) {\n        if (pos) *pos = mid;\n        return 1;\n    } else {\n        if (pos) *pos = min;\n        return 0;\n    }\n}\n\n/* Upgrades the intset to a larger encoding and inserts the given integer. */\nstatic intset *intsetUpgradeAndAdd(intset *is, int64_t value) {\n    uint8_t curenc = intrev32ifbe(is->encoding);\n    uint8_t newenc = _intsetValueEncoding(value);\n    int length = intrev32ifbe(is->length);\n    int prepend = value < 0 ? 1 : 0;\n\n    /* First set new encoding and resize */\n    is->encoding = intrev32ifbe(newenc);\n    is = intsetResize(is,intrev32ifbe(is->length)+1);\n\n    /* Upgrade back-to-front so we don't overwrite values.\n     * Note that the \"prepend\" variable is used to make sure we have an empty\n     * space at either the beginning or the end of the intset. */\n    while(length--)\n        _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));\n\n    /* Set the value at the beginning or the end. */\n    if (prepend)\n        _intsetSet(is,0,value);\n    else\n        _intsetSet(is,intrev32ifbe(is->length),value);\n    is->length = intrev32ifbe(intrev32ifbe(is->length)+1);\n    return is;\n}\n\nstatic void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {\n    void *src, *dst;\n    uint32_t bytes = intrev32ifbe(is->length)-from;\n    uint32_t encoding = intrev32ifbe(is->encoding);\n\n    if (encoding == INTSET_ENC_INT64) {\n        src = (int64_t*)is->contents+from;\n        dst = (int64_t*)is->contents+to;\n        bytes *= sizeof(int64_t);\n    } else if (encoding == INTSET_ENC_INT32) {\n        src = (int32_t*)is->contents+from;\n        dst = (int32_t*)is->contents+to;\n        bytes *= sizeof(int32_t);\n    } else {\n        src = (int16_t*)is->contents+from;\n        dst = (int16_t*)is->contents+to;\n        bytes *= sizeof(int16_t);\n    }\n    memmove(dst,src,bytes);\n}\n\n/* Insert an integer in the intset */\nintset *intsetAdd(intset *is, int64_t value, uint8_t *success) {\n    uint8_t valenc = _intsetValueEncoding(value);\n    uint32_t pos;\n    if (success) *success = 1;\n\n    /* Upgrade encoding if necessary. If we need to upgrade, we know that\n     * this value should be either appended (if > 0) or prepended (if < 0),\n     * because it lies outside the range of existing values. */\n    if (valenc > intrev32ifbe(is->encoding)) {\n        /* This always succeeds, so we don't need to curry *success. */\n        return intsetUpgradeAndAdd(is,value);\n    } else {\n        /* Abort if the value is already present in the set.\n         * This call will populate \"pos\" with the right position to insert\n         * the value when it cannot be found. */\n        if (intsetSearch(is,value,&pos)) {\n            if (success) *success = 0;\n            return is;\n        }\n\n        is = intsetResize(is,intrev32ifbe(is->length)+1);\n        if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);\n    }\n\n    _intsetSet(is,pos,value);\n    is->length = intrev32ifbe(intrev32ifbe(is->length)+1);\n    return is;\n}\n\n/* Delete integer from intset */\nintset *intsetRemove(intset *is, int64_t value, int *success) {\n    uint8_t valenc = _intsetValueEncoding(value);\n    uint32_t pos;\n    if (success) *success = 0;\n\n    if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {\n        uint32_t len = intrev32ifbe(is->length);\n\n        /* We know we can delete */\n        if (success) *success = 1;\n\n        /* Overwrite value with tail and update length */\n        if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);\n        is = intsetResize(is,len-1);\n        is->length = intrev32ifbe(len-1);\n    }\n    return is;\n}\n\nintset *intsetTrimTail(intset *is, uint32_t tail_len) {\n    uint32_t len = intrev32ifbe(is->length);\n    uint32_t new_len = tail_len >= len ? 0 : len - tail_len;\n    is->length = intrev32ifbe(new_len);\n    return intsetResize(is, new_len);\n}\n\n/* Determine whether a value belongs to this set */\nuint8_t intsetFind(intset *is, int64_t value) {\n    uint8_t valenc = _intsetValueEncoding(value);\n    return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL);\n}\n\n/* Return random member */\nint64_t intsetRandom(intset *is) {\n    uint32_t len = intrev32ifbe(is->length);\n    assert(len); /* avoid division by zero on corrupt intset payload. */\n    return _intsetGet(is,rand()%len);\n}\n\n/* Get the value at the given position. When this position is\n * out of range the function returns 0, when in range it returns 1. */\nuint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {\n    if (pos < intrev32ifbe(is->length)) {\n        *value = _intsetGet(is,pos);\n        return 1;\n    }\n    return 0;\n}\n\n/* Return intset length */\nuint32_t intsetLen(const intset *is) {\n    return intrev32ifbe(is->length);\n}\n\n/* Return intset blob size in bytes. */\nsize_t intsetBlobLen(intset *is) {\n    return sizeof(intset)+(size_t)intrev32ifbe(is->length)*intrev32ifbe(is->encoding);\n}\n\n/* Validate the integrity of the data structure.\n * when `deep` is 0, only the integrity of the header is validated.\n * when `deep` is 1, we make sure there are no duplicate or out of order records. */\nint intsetValidateIntegrity(const unsigned char *p, size_t size, int deep) {\n    intset *is = (intset *)p;\n    /* check that we can actually read the header. */\n    if (size < sizeof(*is))\n        return 0;\n\n    uint32_t encoding = intrev32ifbe(is->encoding);\n\n    size_t record_size;\n    if (encoding == INTSET_ENC_INT64) {\n        record_size = INTSET_ENC_INT64;\n    } else if (encoding == INTSET_ENC_INT32) {\n        record_size = INTSET_ENC_INT32;\n    } else if (encoding == INTSET_ENC_INT16){\n        record_size = INTSET_ENC_INT16;\n    } else {\n        return 0;\n    }\n\n    /* check that the size matches (all records are inside the buffer). */\n    uint32_t count = intrev32ifbe(is->length);\n    if (sizeof(*is) + count*record_size != size)\n        return 0;\n\n    /* check that the set is not empty. */\n    if (count==0)\n        return 0;\n\n    if (!deep)\n        return 1;\n\n    /* check that there are no dup or out of order records. */\n    int64_t prev = _intsetGet(is,0);\n    for (uint32_t i=1; i<count; i++) {\n        int64_t cur = _intsetGet(is,i);\n        if (cur <= prev)\n            return 0;\n        prev = cur;\n    }\n\n    return 1;\n}\n\n#ifdef REDIS_TEST\n#include <sys/time.h>\n#include <time.h>\n\n#if 0\nstatic void intsetRepr(intset *is) {\n    for (uint32_t i = 0; i < intrev32ifbe(is->length); i++) {\n        printf(\"%lld\\n\", (uint64_t)_intsetGet(is,i));\n    }\n    printf(\"\\n\");\n}\n\nstatic void error(char *err) {\n    printf(\"%s\\n\", err);\n    exit(1);\n}\n#endif\n\nstatic void ok(void) {\n    printf(\"OK\\n\");\n}\n\nstatic long long usec(void) {\n    struct timeval tv;\n    gettimeofday(&tv,NULL);\n    return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;\n}\n\nstatic intset *createSet(int bits, int size) {\n    uint64_t mask = (1<<bits)-1;\n    uint64_t value;\n    intset *is = intsetNew();\n\n    for (int i = 0; i < size; i++) {\n        if (bits > 32) {\n            value = (rand()*rand()) & mask;\n        } else {\n            value = rand() & mask;\n        }\n        is = intsetAdd(is,value,NULL);\n    }\n    return is;\n}\n\nstatic void checkConsistency(intset *is) {\n    for (uint32_t i = 0; i < (intrev32ifbe(is->length)-1); i++) {\n        uint32_t encoding = intrev32ifbe(is->encoding);\n\n        if (encoding == INTSET_ENC_INT16) {\n            int16_t *i16 = (int16_t*)is->contents;\n            assert(i16[i] < i16[i+1]);\n        } else if (encoding == INTSET_ENC_INT32) {\n            int32_t *i32 = (int32_t*)is->contents;\n            assert(i32[i] < i32[i+1]);\n        } else {\n            int64_t *i64 = (int64_t*)is->contents;\n            assert(i64[i] < i64[i+1]);\n        }\n    }\n}\n\n#define UNUSED(x) (void)(x)\nint intsetTest(int argc, char **argv, int flags) {\n    uint8_t success;\n    int i;\n    intset *is;\n    srand(time(NULL));\n\n    UNUSED(argc);\n    UNUSED(argv);\n    UNUSED(flags);\n\n    printf(\"Value encodings: \"); {\n        assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);\n        assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);\n        assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);\n        assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);\n        assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);\n        assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);\n        assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);\n        assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);\n        assert(_intsetValueEncoding(-9223372036854775808ull) ==\n                    INTSET_ENC_INT64);\n        assert(_intsetValueEncoding(+9223372036854775807ull) ==\n                    INTSET_ENC_INT64);\n        ok();\n    }\n\n    printf(\"Basic adding: \"); {\n        is = intsetNew();\n        is = intsetAdd(is,5,&success); assert(success);\n        is = intsetAdd(is,6,&success); assert(success);\n        is = intsetAdd(is,4,&success); assert(success);\n        is = intsetAdd(is,4,&success); assert(!success);\n        ok();\n        zfree(is);\n    }\n\n    printf(\"Large number of random adds: \"); {\n        uint32_t inserts = 0;\n        is = intsetNew();\n        for (i = 0; i < 1024; i++) {\n            is = intsetAdd(is,rand()%0x800,&success);\n            if (success) inserts++;\n        }\n        assert(intrev32ifbe(is->length) == inserts);\n        checkConsistency(is);\n        ok();\n        zfree(is);\n    }\n\n    printf(\"Upgrade from int16 to int32: \"); {\n        is = intsetNew();\n        is = intsetAdd(is,32,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);\n        is = intsetAdd(is,65535,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);\n        assert(intsetFind(is,32));\n        assert(intsetFind(is,65535));\n        checkConsistency(is);\n        zfree(is);\n\n        is = intsetNew();\n        is = intsetAdd(is,32,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);\n        is = intsetAdd(is,-65535,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);\n        assert(intsetFind(is,32));\n        assert(intsetFind(is,-65535));\n        checkConsistency(is);\n        ok();\n        zfree(is);\n    }\n\n    printf(\"Upgrade from int16 to int64: \"); {\n        is = intsetNew();\n        is = intsetAdd(is,32,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);\n        is = intsetAdd(is,4294967295,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);\n        assert(intsetFind(is,32));\n        assert(intsetFind(is,4294967295));\n        checkConsistency(is);\n        zfree(is);\n\n        is = intsetNew();\n        is = intsetAdd(is,32,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);\n        is = intsetAdd(is,-4294967295,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);\n        assert(intsetFind(is,32));\n        assert(intsetFind(is,-4294967295));\n        checkConsistency(is);\n        ok();\n        zfree(is);\n    }\n\n    printf(\"Upgrade from int32 to int64: \"); {\n        is = intsetNew();\n        is = intsetAdd(is,65535,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);\n        is = intsetAdd(is,4294967295,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);\n        assert(intsetFind(is,65535));\n        assert(intsetFind(is,4294967295));\n        checkConsistency(is);\n        zfree(is);\n\n        is = intsetNew();\n        is = intsetAdd(is,65535,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);\n        is = intsetAdd(is,-4294967295,NULL);\n        assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);\n        assert(intsetFind(is,65535));\n        assert(intsetFind(is,-4294967295));\n        checkConsistency(is);\n        ok();\n        zfree(is);\n    }\n\n    printf(\"Stress lookups: \"); {\n        long num = 100000, size = 10000;\n        int i, bits = 20;\n        long long start;\n        is = createSet(bits,size);\n        checkConsistency(is);\n\n        start = usec();\n        for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);\n        printf(\"%ld lookups, %ld element set, %lldusec\\n\",\n               num,size,usec()-start);\n        zfree(is);\n    }\n\n    printf(\"Stress add+delete: \"); {\n        int i, v1, v2;\n        is = intsetNew();\n        for (i = 0; i < 0xffff; i++) {\n            v1 = rand() % 0xfff;\n            is = intsetAdd(is,v1,NULL);\n            assert(intsetFind(is,v1));\n\n            v2 = rand() % 0xfff;\n            is = intsetRemove(is,v2,NULL);\n            assert(!intsetFind(is,v2));\n        }\n        checkConsistency(is);\n        ok();\n        zfree(is);\n    }\n\n    return 0;\n}\n#endif\n"
  },
  {
    "path": "src/redis/intset.h",
    "content": "/*\n * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>\n * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef __INTSET_H\n#define __INTSET_H\n#include <stdint.h>\n\ntypedef struct intset {\n    uint32_t encoding;\n    uint32_t length;\n    int8_t contents[];\n} intset;\n\nintset *intsetNew(void);\nintset *intsetAdd(intset *is, int64_t value, uint8_t *success);\nintset *intsetRemove(intset *is, int64_t value, int *success);\nintset *intsetTrimTail(intset *is, uint32_t trim_len);  // Removes last trim_len elements.\nuint8_t intsetFind(intset *is, int64_t value);\nint64_t intsetRandom(intset *is);\nuint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);\nuint32_t intsetLen(const intset *is);\nsize_t intsetBlobLen(intset *is);\n\nint intsetValidateIntegrity(const unsigned char *is, size_t size, int deep);\n\n#ifdef REDIS_TEST\nint intsetTest(int argc, char *argv[], int flags);\n#endif\n\n#endif // __INTSET_H\n"
  },
  {
    "path": "src/redis/listpack.c",
    "content": "/* Listpack -- A lists of strings serialization format\n *\n * This file implements the specification you can find at:\n *\n *  https://github.com/antirez/listpack\n *\n * Copyright (c) 2017,2020, Redis Ltd.\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <stdint.h>\n#include <limits.h>\n#include <sys/types.h>\n#include <stdlib.h>\n#include <string.h>\n#include <stdio.h>\n#include <assert.h>\n\n#include \"config.h\"\n#include \"listpack.h\"\n#include \"util.h\"\n#include \"zmalloc.h\"\n\n#define LP_HDR_SIZE 6       /* 32 bit total len + 16 bit number of elements. */\n#define LP_HDR_NUMELE_UNKNOWN UINT16_MAX\n#define LP_MAX_INT_ENCODING_LEN 9\n#define LP_MAX_BACKLEN_SIZE 5\n#define LP_ENCODING_INT 0\n#define LP_ENCODING_STRING 1\n\n#define LP_ENCODING_7BIT_UINT 0\n#define LP_ENCODING_7BIT_UINT_MASK 0x80\n#define LP_ENCODING_IS_7BIT_UINT(byte) (((byte)&LP_ENCODING_7BIT_UINT_MASK)==LP_ENCODING_7BIT_UINT)\n#define LP_ENCODING_7BIT_UINT_ENTRY_SIZE 2\n\n#define LP_ENCODING_6BIT_STR 0x80\n#define LP_ENCODING_6BIT_STR_MASK 0xC0\n#define LP_ENCODING_IS_6BIT_STR(byte) (((byte)&LP_ENCODING_6BIT_STR_MASK)==LP_ENCODING_6BIT_STR)\n\n#define LP_ENCODING_13BIT_INT 0xC0\n#define LP_ENCODING_13BIT_INT_MASK 0xE0\n#define LP_ENCODING_IS_13BIT_INT(byte) (((byte)&LP_ENCODING_13BIT_INT_MASK)==LP_ENCODING_13BIT_INT)\n#define LP_ENCODING_13BIT_INT_ENTRY_SIZE 3\n\n#define LP_ENCODING_12BIT_STR 0xE0\n#define LP_ENCODING_12BIT_STR_MASK 0xF0\n#define LP_ENCODING_IS_12BIT_STR(byte) (((byte)&LP_ENCODING_12BIT_STR_MASK)==LP_ENCODING_12BIT_STR)\n\n#define LP_ENCODING_16BIT_INT 0xF1\n#define LP_ENCODING_16BIT_INT_MASK 0xFF\n#define LP_ENCODING_IS_16BIT_INT(byte) (((byte)&LP_ENCODING_16BIT_INT_MASK)==LP_ENCODING_16BIT_INT)\n#define LP_ENCODING_16BIT_INT_ENTRY_SIZE 4\n\n#define LP_ENCODING_24BIT_INT 0xF2\n#define LP_ENCODING_24BIT_INT_MASK 0xFF\n#define LP_ENCODING_IS_24BIT_INT(byte) (((byte)&LP_ENCODING_24BIT_INT_MASK)==LP_ENCODING_24BIT_INT)\n#define LP_ENCODING_24BIT_INT_ENTRY_SIZE 5\n\n#define LP_ENCODING_32BIT_INT 0xF3\n#define LP_ENCODING_32BIT_INT_MASK 0xFF\n#define LP_ENCODING_IS_32BIT_INT(byte) (((byte)&LP_ENCODING_32BIT_INT_MASK)==LP_ENCODING_32BIT_INT)\n#define LP_ENCODING_32BIT_INT_ENTRY_SIZE 6\n\n#define LP_ENCODING_64BIT_INT 0xF4\n#define LP_ENCODING_64BIT_INT_MASK 0xFF\n#define LP_ENCODING_IS_64BIT_INT(byte) (((byte)&LP_ENCODING_64BIT_INT_MASK)==LP_ENCODING_64BIT_INT)\n#define LP_ENCODING_64BIT_INT_ENTRY_SIZE 10\n\n#define LP_ENCODING_32BIT_STR 0xF0\n#define LP_ENCODING_32BIT_STR_MASK 0xFF\n#define LP_ENCODING_IS_32BIT_STR(byte) (((byte)&LP_ENCODING_32BIT_STR_MASK)==LP_ENCODING_32BIT_STR)\n\n#define LP_EOF 0xFF\n\n#define LP_ENCODING_6BIT_STR_LEN(p) ((p)[0] & 0x3F)\n#define LP_ENCODING_12BIT_STR_LEN(p) ((((p)[0] & 0xF) << 8) | (p)[1])\n#define LP_ENCODING_32BIT_STR_LEN(p)                                                                                   \\\n    (((uint32_t)(p)[1] << 0) | ((uint32_t)(p)[2] << 8) | ((uint32_t)(p)[3] << 16) | ((uint32_t)(p)[4] << 24))\n\n#define lpGetTotalBytes(p)                                                                                             \\\n    (((uint32_t)(p)[0] << 0) | ((uint32_t)(p)[1] << 8) | ((uint32_t)(p)[2] << 16) | ((uint32_t)(p)[3] << 24))\n\n#define lpGetNumElements(p) (((uint32_t)(p)[4] << 0) | ((uint32_t)(p)[5] << 8))\n#define lpSetTotalBytes(p, v)                                                                                          \\\n    do {                                                                                                               \\\n    (p)[0] = (v)&0xff; \\\n    (p)[1] = ((v)>>8)&0xff; \\\n    (p)[2] = ((v)>>16)&0xff; \\\n    (p)[3] = ((v)>>24)&0xff; \\\n} while(0)\n\n/* TODO: delete this function once corruption in the stream code is identified */\nstatic void lpSetTotalBytesChecked(unsigned char *p, uint32_t v) {\n    uint32_t current = lpGetTotalBytes(p);\n    if (current == 0) {\n      fprintf(stderr, \"Error: corrupted listpack size.\");\n      abort();\n    } else if (current > 4194304) { /* 4 MiB */\n      /* suspicous size, lets check its validity*/  \n      size_t block_size = zmalloc_size(p);\n      if (block_size < current) {\n        fprintf(stderr, \"Error: listpack size (%u) is larger than allocated \"\n                \"block size (%lu).\", current, block_size);\n        abort();\n      }\n    }\n\n    lpSetTotalBytes(p, v);\n}\n\n#define lpSetNumElements(p, v)                                                                                         \\\n    do {                                                                                                               \\\n    (p)[4] = (v)&0xff; \\\n    (p)[5] = ((v)>>8)&0xff; \\\n} while(0)\n\n/* Validates that 'p' is not outside the listpack.\n * All function that return a pointer to an element in the listpack will assert\n * that this element is valid, so it can be freely used.\n * Generally functions such lpNext and lpDelete assume the input pointer is\n * already validated (since it's the return value of another function). */\n#define ASSERT_INTEGRITY(lp, p)                                                                                        \\\n    do {                                                                                                               \\\n    assert((p) >= (lp)+LP_HDR_SIZE && (p) < (lp)+lpGetTotalBytes((lp))); \\\n} while (0)\n\n/* Similar to the above, but validates the entire element length rather than just\n * it's pointer. */\n#define ASSERT_INTEGRITY_LEN(lp, p, len)                                                                               \\\n    do {                                                                                                               \\\n    assert((p) >= (lp)+LP_HDR_SIZE && (p)+(len) < (lp)+lpGetTotalBytes((lp))); \\\n} while (0)\n\nstatic inline void lpAssertValidEntry(unsigned char* lp, size_t lpbytes, unsigned char *p);\n\n/* Don't let listpacks grow over 1GB in any case, don't wanna risk overflow in\n * Total Bytes header field */\n#define LISTPACK_MAX_SAFETY_SIZE (1<<30)\nint lpSafeToAdd(unsigned char* lp, size_t add) {\n    size_t len = lp? lpGetTotalBytes(lp): 0;\n    if (len + add > LISTPACK_MAX_SAFETY_SIZE) return 0;\n    return 1;\n}\n\n/* Convert a string into a signed 64 bit integer.\n * The function returns 1 if the string could be parsed into a (non-overflowing)\n * signed 64 bit int, 0 otherwise. The 'value' will be set to the parsed value\n * when the function returns success.\n *\n * Note that this function demands that the string strictly represents\n * a int64 value: no spaces or other characters before or after the string\n * representing the number are accepted, nor zeroes at the start if not\n * for the string \"0\" representing the zero number.\n *\n * Because of its strictness, it is safe to use this function to check if\n * you can convert a string into a long long, and obtain back the string\n * from the number without any loss in the string representation. *\n *\n * -----------------------------------------------------------------------------\n *\n * Credits: this function was adapted from the Redis OSS source code, file\n * \"utils.c\", function string2ll(), and is copyright:\n *\n * Copyright(C) 2011, Pieter Noordhuis\n * Copyright(C) 2011, Redis Ltd.\n *\n * The function is released under the BSD 3-clause license.\n */\nint lpStringToInt64(const char *s, unsigned long slen, int64_t *value) {\n    const char *p = s;\n    unsigned long plen = 0;\n    int negative = 0;\n    uint64_t v;\n\n    /* Abort if length indicates this cannot possibly be an int */\n    if (slen == 0 || slen >= LONG_STR_SIZE) return 0;\n\n    /* Special case: first and only digit is 0. */\n    if (slen == 1 && p[0] == '0') {\n        if (value != NULL) *value = 0;\n        return 1;\n    }\n\n    if (p[0] == '-') {\n        negative = 1;\n        p++;\n        plen++;\n\n        /* Abort on only a negative sign. */\n        if (plen == slen) return 0;\n    }\n\n    /* First digit should be 1-9, otherwise the string should just be 0. */\n    if (p[0] >= '1' && p[0] <= '9') {\n        v = p[0]-'0';\n        p++;\n        plen++;\n    } else {\n        return 0;\n    }\n\n    while (plen < slen && p[0] >= '0' && p[0] <= '9') {\n        if (v > (UINT64_MAX / 10)) /* Overflow. */\n            return 0;\n        v *= 10;\n\n        if (v > (UINT64_MAX - (p[0]-'0'))) /* Overflow. */\n            return 0;\n        v += p[0]-'0';\n\n        p++;\n        plen++;\n    }\n\n    /* Return if not all bytes were used. */\n    if (plen < slen) return 0;\n\n    if (negative) {\n        if (v > ((uint64_t)(-(INT64_MIN+1))+1)) /* Overflow. */\n            return 0;\n        if (value != NULL) *value = -v;\n    } else {\n        if (v > INT64_MAX) /* Overflow. */\n            return 0;\n        if (value != NULL) *value = v;\n    }\n    return 1;\n}\n\n/* Create a new, empty listpack.\n * On success the new listpack is returned, otherwise an error is returned.\n * Pre-allocate at least `capacity` bytes of memory,\n * over-allocated memory can be shrunk by `lpShrinkToFit`.\n * */\nunsigned char *lpNew(size_t capacity) {\n    unsigned char *lp = zmalloc(capacity > LP_HDR_SIZE+1 ? capacity : LP_HDR_SIZE+1);\n    if (lp == NULL) return NULL;\n    lpSetTotalBytes(lp,LP_HDR_SIZE+1);\n    lpSetNumElements(lp,0);\n    lp[LP_HDR_SIZE] = LP_EOF;\n    return lp;\n}\n\n/* Free the specified listpack. */\nvoid lpFree(unsigned char *lp) {\n    zfree(lp);\n}\n\n/* Shrink the memory to fit. */\nunsigned char* lpShrinkToFit(unsigned char *lp) {\n    size_t size = lpGetTotalBytes(lp);\n    if (size < zmalloc_size(lp)) {\n        return zrealloc(lp, size);\n    } else {\n        return lp;\n    }\n}\n\n/* Stores the integer encoded representation of 'v' in the 'intenc' buffer. */\nstatic inline void lpEncodeIntegerGetType(int64_t v, unsigned char *intenc, uint64_t *enclen) {\n    if (v >= 0 && v <= 127) {\n        /* Single byte 0-127 integer. */\n        intenc[0] = v;\n        *enclen = 1;\n    } else if (v >= -4096 && v <= 4095) {\n        /* 13 bit integer. */\n        if (v < 0) v = ((int64_t)1<<13)+v;\n        intenc[0] = (v>>8)|LP_ENCODING_13BIT_INT;\n        intenc[1] = v&0xff;\n        *enclen = 2;\n    } else if (v >= -32768 && v <= 32767) {\n        /* 16 bit integer. */\n        if (v < 0) v = ((int64_t)1<<16)+v;\n        intenc[0] = LP_ENCODING_16BIT_INT;\n        intenc[1] = v&0xff;\n        intenc[2] = v>>8;\n        *enclen = 3;\n    } else if (v >= -8388608 && v <= 8388607) {\n        /* 24 bit integer. */\n        if (v < 0) v = ((int64_t)1<<24)+v;\n        intenc[0] = LP_ENCODING_24BIT_INT;\n        intenc[1] = v&0xff;\n        intenc[2] = (v>>8)&0xff;\n        intenc[3] = v>>16;\n        *enclen = 4;\n    } else if (v >= -2147483648 && v <= 2147483647) {\n        /* 32 bit integer. */\n        if (v < 0) v = ((int64_t)1<<32)+v;\n        intenc[0] = LP_ENCODING_32BIT_INT;\n        intenc[1] = v&0xff;\n        intenc[2] = (v>>8)&0xff;\n        intenc[3] = (v>>16)&0xff;\n        intenc[4] = v>>24;\n        *enclen = 5;\n    } else {\n        /* 64 bit integer. */\n        uint64_t uv = v;\n        intenc[0] = LP_ENCODING_64BIT_INT;\n        intenc[1] = uv&0xff;\n        intenc[2] = (uv>>8)&0xff;\n        intenc[3] = (uv>>16)&0xff;\n        intenc[4] = (uv>>24)&0xff;\n        intenc[5] = (uv>>32)&0xff;\n        intenc[6] = (uv>>40)&0xff;\n        intenc[7] = (uv>>48)&0xff;\n        intenc[8] = uv>>56;\n        *enclen = 9;\n    }\n}\n\n/* Given an element 'ele' of size 'size', determine if the element can be\n * represented inside the listpack encoded as integer, and returns\n * LP_ENCODING_INT if so. Otherwise returns LP_ENCODING_STR if no integer\n * encoding is possible.\n *\n * If the LP_ENCODING_INT is returned, the function stores the integer encoded\n * representation of the element in the 'intenc' buffer.\n *\n * Regardless of the returned encoding, 'enclen' is populated by reference to\n * the number of bytes that the string or integer encoded element will require\n * in order to be represented. */\nstatic inline int lpEncodeGetType(const unsigned char *ele, uint32_t size, unsigned char *intenc, uint64_t *enclen) {\n    int64_t v;\n    if (lpStringToInt64((const char*)ele, size, &v)) {\n        lpEncodeIntegerGetType(v, intenc, enclen);\n        return LP_ENCODING_INT;\n    } else {\n        if (size < 64)\n            *enclen = 1 + size;\n        else if (size < 4096)\n            *enclen = 2 + size;\n        else\n            *enclen = 5 + (uint64_t)size;\n        return LP_ENCODING_STRING;\n    }\n}\n\n/* Store a reverse-encoded variable length field, representing the length\n * of the previous element of size 'l', in the target buffer 'buf'.\n * The function returns the number of bytes used to encode it, from\n * 1 to 5. If 'buf' is NULL the function just returns the number of bytes\n * needed in order to encode the backlen. */\nstatic inline unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) {\n    if (l <= 127) {\n        if (buf) buf[0] = l;\n        return 1;\n    } else if (l < 16383) {\n        if (buf) {\n            buf[0] = l>>7;\n            buf[1] = (l&127)|128;\n        }\n        return 2;\n    } else if (l < 2097151) {\n        if (buf) {\n            buf[0] = l>>14;\n            buf[1] = ((l>>7)&127)|128;\n            buf[2] = (l&127)|128;\n        }\n        return 3;\n    } else if (l < 268435455) {\n        if (buf) {\n            buf[0] = l>>21;\n            buf[1] = ((l>>14)&127)|128;\n            buf[2] = ((l>>7)&127)|128;\n            buf[3] = (l&127)|128;\n        }\n        return 4;\n    } else {\n        if (buf) {\n            buf[0] = l>>28;\n            buf[1] = ((l>>21)&127)|128;\n            buf[2] = ((l>>14)&127)|128;\n            buf[3] = ((l>>7)&127)|128;\n            buf[4] = (l&127)|128;\n        }\n        return 5;\n    }\n}\n\n/* Decode the backlen and returns it. If the encoding looks invalid (more than\n * 5 bytes are used), UINT64_MAX is returned to report the problem. */\nstatic inline uint64_t lpDecodeBacklen(unsigned char *p) {\n    uint64_t val = 0;\n    uint64_t shift = 0;\n    do {\n        val |= (uint64_t)(p[0] & 127) << shift;\n        if (!(p[0] & 128)) break;\n        shift += 7;\n        p--;\n        if (shift > 28) return UINT64_MAX;\n    } while(1);\n    return val;\n}\n\n/* Encode the string element pointed by 's' of size 'len' in the target\n * buffer 's'. The function should be called with 'buf' having always enough\n * space for encoding the string. This is done by calling lpEncodeGetType()\n * before calling this function. */\nstatic inline void lpEncodeString(unsigned char *buf, const unsigned char *s, uint32_t len) {\n    if (len < 64) {\n        buf[0] = len | LP_ENCODING_6BIT_STR;\n        memcpy(buf+1,s,len);\n    } else if (len < 4096) {\n        buf[0] = (len >> 8) | LP_ENCODING_12BIT_STR;\n        buf[1] = len & 0xff;\n        memcpy(buf+2,s,len);\n    } else {\n        buf[0] = LP_ENCODING_32BIT_STR;\n        buf[1] = len & 0xff;\n        buf[2] = (len >> 8) & 0xff;\n        buf[3] = (len >> 16) & 0xff;\n        buf[4] = (len >> 24) & 0xff;\n        memcpy(buf+5,s,len);\n    }\n}\n\n/* Return the encoded length of the listpack element pointed by 'p'.\n * This includes the encoding byte, length bytes, and the element data itself.\n * If the element encoding is wrong then 0 is returned.\n * Note that this method may access additional bytes (in case of 12 and 32 bit\n * str), so should only be called when we know 'p' was already validated by\n * lpCurrentEncodedSizeBytes or ASSERT_INTEGRITY_LEN (possibly since 'p' is\n * a return value of another function that validated its return. */\nstatic inline uint32_t lpCurrentEncodedSizeUnsafe(unsigned char *p) {\n    if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1;\n    if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1+LP_ENCODING_6BIT_STR_LEN(p);\n    if (LP_ENCODING_IS_13BIT_INT(p[0])) return 2;\n    if (LP_ENCODING_IS_16BIT_INT(p[0])) return 3;\n    if (LP_ENCODING_IS_24BIT_INT(p[0])) return 4;\n    if (LP_ENCODING_IS_32BIT_INT(p[0])) return 5;\n    if (LP_ENCODING_IS_64BIT_INT(p[0])) return 9;\n    if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2+LP_ENCODING_12BIT_STR_LEN(p);\n    if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5+LP_ENCODING_32BIT_STR_LEN(p);\n    if (p[0] == LP_EOF) return 1;\n    return 0;\n}\n\n/* Return bytes needed to encode the length of the listpack element pointed by 'p'.\n * This includes just the encoding byte, and the bytes needed to encode the length\n * of the element (excluding the element data itself)\n * If the element encoding is wrong then 0 is returned. */\nstatic inline uint32_t lpCurrentEncodedSizeBytes(unsigned char *p) {\n    if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1;\n    if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1;\n    if (LP_ENCODING_IS_13BIT_INT(p[0])) return 1;\n    if (LP_ENCODING_IS_16BIT_INT(p[0])) return 1;\n    if (LP_ENCODING_IS_24BIT_INT(p[0])) return 1;\n    if (LP_ENCODING_IS_32BIT_INT(p[0])) return 1;\n    if (LP_ENCODING_IS_64BIT_INT(p[0])) return 1;\n    if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2;\n    if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5;\n    if (p[0] == LP_EOF) return 1;\n    return 0;\n}\n\n/* Skip the current entry returning the next. It is invalid to call this\n * function if the current element is the EOF element at the end of the\n * listpack, however, while this function is used to implement lpNext(),\n * it does not return NULL when the EOF element is encountered. */\nunsigned char *lpSkip(unsigned char *p) {\n    unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p);\n    entrylen += lpEncodeBacklen(NULL,entrylen);\n    p += entrylen;\n    return p;\n}\n\n/* If 'p' points to an element of the listpack, calling lpNext() will return\n * the pointer to the next element (the one on the right), or NULL if 'p'\n * already pointed to the last element of the listpack. */\nunsigned char *lpNext(unsigned char *lp, unsigned char *p) {\n    assert(p);\n    p = lpSkip(p);\n    if (p[0] == LP_EOF) return NULL;\n    lpAssertValidEntry(lp, lpBytes(lp), p);\n    return p;\n}\n\n/* If 'p' points to an element of the listpack, calling lpPrev() will return\n * the pointer to the previous element (the one on the left), or NULL if 'p'\n * already pointed to the first element of the listpack. */\nunsigned char *lpPrev(unsigned char *lp, unsigned char *p) {\n    assert(p);\n    if (p-lp == LP_HDR_SIZE) return NULL;\n    p--; /* Seek the first backlen byte of the last element. */\n    uint64_t prevlen = lpDecodeBacklen(p);\n    prevlen += lpEncodeBacklen(NULL,prevlen);\n    p -= prevlen-1; /* Seek the first byte of the previous entry. */\n    lpAssertValidEntry(lp, lpBytes(lp), p);\n    return p;\n}\n\n/* Return a pointer to the first element of the listpack, or NULL if the\n * listpack has no elements. */\nunsigned char *lpFirst(unsigned char *lp) {\n    unsigned char *p = lp + LP_HDR_SIZE; /* Skip the header. */\n    if (p[0] == LP_EOF) return NULL;\n    lpAssertValidEntry(lp, lpBytes(lp), p);\n    return p;\n}\n\n/* Return a pointer to the last element of the listpack, or NULL if the\n * listpack has no elements. */\nunsigned char *lpLast(unsigned char *lp) {\n    unsigned char *p = lp+lpGetTotalBytes(lp)-1; /* Seek EOF element. */\n    return lpPrev(lp,p); /* Will return NULL if EOF is the only element. */\n}\n\n/* Return the number of elements inside the listpack. This function attempts\n * to use the cached value when within range, otherwise a full scan is\n * needed. As a side effect of calling this function, the listpack header\n * could be modified, because if the count is found to be already within\n * the 'numele' header field range, the new value is set. */\nunsigned long lpLength(unsigned char *lp) {\n    uint32_t numele = lpGetNumElements(lp);\n    if (numele != LP_HDR_NUMELE_UNKNOWN) return numele;\n\n    /* Too many elements inside the listpack. We need to scan in order\n     * to get the total number. */\n    uint32_t count = 0;\n    unsigned char *p = lpFirst(lp);\n    while(p) {\n        count++;\n        p = lpNext(lp,p);\n    }\n\n    /* If the count is again within range of the header numele field,\n     * set it. */\n    if (count < LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp,count);\n    return count;\n}\n\n/* Return the listpack element pointed by 'p'.\n *\n * The function changes behavior depending on the passed 'intbuf' value.\n * Specifically, if 'intbuf' is NULL:\n *\n * If the element is internally encoded as an integer, the function returns\n * NULL and populates the integer value by reference in 'count'. Otherwise if\n * the element is encoded as a string a pointer to the string (pointing inside\n * the listpack itself) is returned, and 'count' is set to the length of the\n * string.\n *\n * If instead 'intbuf' points to a buffer passed by the caller, that must be\n * at least LP_INTBUF_SIZE bytes, the function always returns the element as\n * it was a string (returning the pointer to the string and setting the\n * 'count' argument to the string length by reference). However if the element\n * is encoded as an integer, the 'intbuf' buffer is used in order to store\n * the string representation.\n *\n * The user should use one or the other form depending on what the value will\n * be used for. If there is immediate usage for an integer value returned\n * by the function, than to pass a buffer (and convert it back to a number)\n * is of course useless.\n *\n * If 'entry_size' is not NULL, *entry_size is set to the entry length of the\n * listpack element pointed by 'p'. This includes the encoding bytes, length\n * bytes, the element data itself, and the backlen bytes.\n *\n * If the function is called against a badly encoded ziplist, so that there\n * is no valid way to parse it, the function returns like if there was an\n * integer encoded with value 12345678900000000 + <unrecognized byte>, this may\n * be an hint to understand that something is wrong. To crash in this case is\n * not sensible because of the different requirements of the application using\n * this lib.\n *\n * Similarly, there is no error returned since the listpack normally can be\n * assumed to be valid, so that would be a very high API cost. */\nstatic inline unsigned char *\nlpGetWithSize(unsigned char *p, int64_t *count, unsigned char *intbuf, uint64_t *entry_size) {\n    int64_t val;\n    uint64_t uval, negstart, negmax;\n\n    assert(p); /* assertion for valgrind (avoid NPD) */\n    if (LP_ENCODING_IS_7BIT_UINT(p[0])) {\n        negstart = UINT64_MAX; /* 7 bit ints are always positive. */\n        negmax = 0;\n        uval = p[0] & 0x7f;\n        if (entry_size) *entry_size = LP_ENCODING_7BIT_UINT_ENTRY_SIZE;\n    } else if (LP_ENCODING_IS_6BIT_STR(p[0])) {\n        *count = LP_ENCODING_6BIT_STR_LEN(p);\n        if (entry_size) *entry_size = 1 + *count + lpEncodeBacklen(NULL, *count + 1);\n        return p+1;\n    } else if (LP_ENCODING_IS_13BIT_INT(p[0])) {\n        uval = ((p[0]&0x1f)<<8) | p[1];\n        negstart = (uint64_t)1<<12;\n        negmax = 8191;\n        if (entry_size) *entry_size = LP_ENCODING_13BIT_INT_ENTRY_SIZE;\n    } else if (LP_ENCODING_IS_16BIT_INT(p[0])) {\n        uval = (uint64_t)p[1] | (uint64_t)p[2] << 8;\n        negstart = (uint64_t)1<<15;\n        negmax = UINT16_MAX;\n        if (entry_size) *entry_size = LP_ENCODING_16BIT_INT_ENTRY_SIZE;\n    } else if (LP_ENCODING_IS_24BIT_INT(p[0])) {\n        uval = (uint64_t)p[1] | (uint64_t)p[2] << 8 | (uint64_t)p[3] << 16;\n        negstart = (uint64_t)1<<23;\n        negmax = UINT32_MAX>>8;\n        if (entry_size) *entry_size = LP_ENCODING_24BIT_INT_ENTRY_SIZE;\n    } else if (LP_ENCODING_IS_32BIT_INT(p[0])) {\n        uval = (uint64_t)p[1] | (uint64_t)p[2] << 8 | (uint64_t)p[3] << 16 | (uint64_t)p[4] << 24;\n        negstart = (uint64_t)1<<31;\n        negmax = UINT32_MAX;\n        if (entry_size) *entry_size = LP_ENCODING_32BIT_INT_ENTRY_SIZE;\n    } else if (LP_ENCODING_IS_64BIT_INT(p[0])) {\n        uval = (uint64_t)p[1] | (uint64_t)p[2] << 8 | (uint64_t)p[3] << 16 | (uint64_t)p[4] << 24 |\n               (uint64_t)p[5] << 32 | (uint64_t)p[6] << 40 | (uint64_t)p[7] << 48 | (uint64_t)p[8] << 56;\n        negstart = (uint64_t)1<<63;\n        negmax = UINT64_MAX;\n        if (entry_size) *entry_size = LP_ENCODING_64BIT_INT_ENTRY_SIZE;\n    } else if (LP_ENCODING_IS_12BIT_STR(p[0])) {\n        *count = LP_ENCODING_12BIT_STR_LEN(p);\n        if (entry_size) *entry_size = 2 + *count + lpEncodeBacklen(NULL, *count + 2);\n        return p+2;\n    } else if (LP_ENCODING_IS_32BIT_STR(p[0])) {\n        *count = LP_ENCODING_32BIT_STR_LEN(p);\n        if (entry_size) *entry_size = 5 + *count + lpEncodeBacklen(NULL, *count + 5);\n        return p+5;\n    } else {\n        uval = 12345678900000000ULL + p[0];\n        negstart = UINT64_MAX;\n        negmax = 0;\n    }\n\n    /* We reach this code path only for integer encodings.\n     * Convert the unsigned value to the signed one using two's complement\n     * rule. */\n    if (uval >= negstart) {\n        /* This three steps conversion should avoid undefined behaviors\n         * in the unsigned -> signed conversion. */\n        uval = negmax-uval;\n        val = uval;\n        val = -val-1;\n    } else {\n        val = uval;\n    }\n\n    /* Return the string representation of the integer or the value itself\n     * depending on intbuf being NULL or not. */\n    if (intbuf) {\n        *count = ll2string((char*)intbuf,LP_INTBUF_SIZE,(long long)val);\n        return intbuf;\n    } else {\n        *count = val;\n        return NULL;\n    }\n}\n\nint lpGetInteger(unsigned char *p, int64_t *ival) {\n    int64_t val;\n    uint64_t uval = 0, negstart = UINT64_MAX, negmax = 0;\n    uint8_t encoding = p[0];\n    \n    // Prioritize checking for integers first.\n    if (encoding < LP_ENCODING_7BIT_UINT_MASK) {        \n        uval = encoding & 0x7f;    \n    } else if (encoding > LP_ENCODING_32BIT_STR) {\n        switch (encoding) {\n            case LP_ENCODING_16BIT_INT:\n                uval = (uint64_t)p[1] | (uint64_t)p[2] << 8;\n                negstart = (uint64_t)1<<15;\n                negmax = UINT16_MAX;\n                break;\n            case LP_ENCODING_24BIT_INT:\n                uval = (uint64_t)p[1] | (uint64_t)p[2] << 8 | (uint64_t)p[3] << 16;\n                negstart = (uint64_t)1<<23;\n                negmax = UINT32_MAX>>8;\n                break;\n            case LP_ENCODING_32BIT_INT:\n                uval = (uint64_t)p[1] | (uint64_t)p[2] << 8 | (uint64_t)p[3] << 16 | (uint64_t)p[4] << 24;\n                negstart = (uint64_t)1<<31;\n                negmax = UINT32_MAX;\n                break;\n            case LP_ENCODING_64BIT_INT:                \n                uval = (uint64_t)p[1] | (uint64_t)p[2] << 8 | (uint64_t)p[3] << 16 | (uint64_t)p[4] << 24 |\n               (uint64_t)p[5] << 32 | (uint64_t)p[6] << 40 | (uint64_t)p[7] << 48 | (uint64_t)p[8] << 56;\n                negstart = (uint64_t)1<<63;\n                negmax = UINT64_MAX;\n            break;\n            default:\n                return 0;\n        }\n    } else if (encoding < LP_ENCODING_13BIT_INT_MASK && encoding >= LP_ENCODING_6BIT_STR_MASK) {\n   \t    uval = ((encoding & 0x1f) << 8) | p[1];\n        negstart = (uint64_t)1 << 12;\n        negmax = 8191;        \n    } else {\n        // string encodings.\n        return 0;\n    }\n\n     /* We reach this code path only for integer encodings.\n     * Convert the unsigned value to the signed one using two's complement\n     * rule. */\n    if (uval >= negstart) {\n        /* This three steps conversion should avoid undefined behaviors\n         * in the unsigned -> signed conversion. */\n        uval = negmax-uval;\n        val = uval;\n        val = -val-1;\n    } else {\n        val = uval;\n    }\n    \n    *ival = val;\n    return 1;    \n}\n\nunsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) {\n    return lpGetWithSize(p, count, intbuf, NULL);\n}\n\n/* This is just a wrapper to lpGet() that is able to get entry value directly.\n * When the function returns NULL, it populates the integer value by reference in 'lval'.\n * Otherwise if the element is encoded as a string a pointer to the string (pointing\n * inside the listpack itself) is returned, and 'slen' is set to the length of the\n * string. */\nunsigned char *lpGetValue(unsigned char *p, unsigned int *slen, long long *lval) {\n    unsigned char *vstr;\n    int64_t ele_len;\n\n    vstr = lpGet(p, &ele_len, NULL);\n    if (vstr) {\n        *slen = ele_len;\n    } else {\n        *lval = ele_len;\n    }\n    return vstr;\n}\n\n/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries\n * between every comparison. Returns NULL when the field could not be found. */\nunsigned char *lpFind(unsigned char *lp, unsigned char *p, unsigned char *s, uint32_t slen, unsigned int skip) {\n    int skipcnt = 0;\n    unsigned char vencoding = 0;\n    unsigned char *value;\n    int64_t ll, vll;\n    uint64_t entry_size = 123456789; /* initialized to avoid warning. */\n    uint32_t lp_bytes = lpBytes(lp);\n\n    assert(p);\n    while (p) {\n        if (skipcnt == 0) {\n            value = lpGetWithSize(p, &ll, NULL, &entry_size);\n            if (value) {\n                /* check the value doesn't reach outside the listpack before accessing it */\n                assert(p >= lp + LP_HDR_SIZE && p + entry_size < lp + lp_bytes);\n                if (slen == ll && memcmp(value, s, slen) == 0) {\n                    return p;\n                }\n            } else {\n                /* Find out if the searched field can be encoded. Note that\n                 * we do it only the first time, once done vencoding is set\n                 * to non-zero and vll is set to the integer value. */\n                if (vencoding == 0) {\n                    /* If the entry can be encoded as integer we set it to\n                     * 1, else set it to UCHAR_MAX, so that we don't retry\n                     * again the next time. */\n                    if (slen >= 32 || slen == 0 || !lpStringToInt64((const char*)s, slen, &vll)) {\n                        vencoding = UCHAR_MAX;\n                    } else {\n                        vencoding = 1;\n                    }\n                }\n\n                /* Compare current entry with specified entry, do it only\n                 * if vencoding != UCHAR_MAX because if there is no encoding\n                 * possible for the field it can't be a valid integer. */\n                if (vencoding != UCHAR_MAX && ll == vll) {\n                    return p;\n                }\n            }\n\n            /* Reset skip count */\n            skipcnt = skip;\n            p += entry_size;\n        } else {\n            /* Skip entry */\n            skipcnt--;\n\n            /* Move to next entry, avoid use `lpNext` due to `lpAssertValidEntry` in\n            * `lpNext` will call `lpBytes`, will cause performance degradation */\n            p = lpSkip(p);\n        }\n\n        /* The next call to lpGetWithSize could read at most 8 bytes past `p`\n         * We use the slower validation call only when necessary. */\n        if (p + 8 >= lp + lp_bytes)\n            lpAssertValidEntry(lp, lp_bytes, p);\n        else\n            assert(p >= lp + LP_HDR_SIZE && p < lp + lp_bytes);\n        if (p[0] == LP_EOF) break;\n    }\n\n    return NULL;\n}\n\n/* Insert, delete or replace the specified string element 'elestr' of length\n * 'size' or integer element 'eleint' at the specified position 'p', with 'p'\n * being a listpack element pointer obtained with lpFirst(), lpLast(), lpNext(),\n * lpPrev() or lpSeek().\n *\n * The element is inserted before, after, or replaces the element pointed\n * by 'p' depending on the 'where' argument, that can be LP_BEFORE, LP_AFTER\n * or LP_REPLACE.\n * \n * If both 'elestr' and `eleint` are NULL, the function removes the element\n * pointed by 'p' instead of inserting one.\n * If `eleint` is non-NULL, 'size' is the length of 'eleint', the function insert\n * or replace with a 64 bit integer, which is stored in the 'eleint' buffer.\n * If 'elestr` is non-NULL, 'size' is the length of 'elestr', the function insert\n * or replace with a string, which is stored in the 'elestr' buffer.\n * \n * Returns NULL on out of memory or when the listpack total length would exceed\n * the max allowed size of 2^32-1, otherwise the new pointer to the listpack\n * holding the new element is returned (and the old pointer passed is no longer\n * considered valid)\n *\n * If 'newp' is not NULL, at the end of a successful call '*newp' will be set\n * to the address of the element just added, so that it will be possible to\n * continue an interaction with lpNext() and lpPrev().\n *\n * For deletion operations (both 'elestr' and 'eleint' set to NULL) 'newp' is\n * set to the next element, on the right of the deleted one, or to NULL if the\n * deleted element was the last one. */\nunsigned char *lpInsert(unsigned char *lp, const unsigned char *elestr, unsigned char *eleint,\n                        uint32_t size, unsigned char *p, int where, unsigned char **newp)\n{\n    unsigned char intenc[LP_MAX_INT_ENCODING_LEN];\n    unsigned char backlen[LP_MAX_BACKLEN_SIZE];\n\n    uint64_t enclen; /* The length of the encoded element. */\n    int del_ele = (elestr == NULL && eleint == NULL);\n\n    /* when deletion, it is conceptually replacing the element with a\n     * zero-length element. So whatever we get passed as 'where', set\n     * it to LP_REPLACE. */\n    if (del_ele) where = LP_REPLACE;\n\n    /* If we need to insert after the current element, we just jump to the\n     * next element (that could be the EOF one) and handle the case of\n     * inserting before. So the function will actually deal with just two\n     * cases: LP_BEFORE and LP_REPLACE. */\n    if (where == LP_AFTER) {\n        p = lpSkip(p);\n        where = LP_BEFORE;\n        ASSERT_INTEGRITY(lp, p);\n    }\n\n    /* Store the offset of the element 'p', so that we can obtain its\n     * address again after a reallocation. */\n    unsigned long poff = p-lp;\n\n    int enctype;\n    if (elestr) {\n        /* Calling lpEncodeGetType() results into the encoded version of the\n        * element to be stored into 'intenc' in case it is representable as\n        * an integer: in that case, the function returns LP_ENCODING_INT.\n        * Otherwise if LP_ENCODING_STR is returned, we'll have to call\n        * lpEncodeString() to actually write the encoded string on place later.\n        *\n        * Whatever the returned encoding is, 'enclen' is populated with the\n        * length of the encoded element. */\n        enctype = lpEncodeGetType(elestr,size,intenc,&enclen);\n        if (enctype == LP_ENCODING_INT) eleint = intenc;\n    } else if (eleint) {\n        enctype = LP_ENCODING_INT;\n        enclen = size; /* 'size' is the length of the encoded integer element. */\n    } else {\n        enctype = -1;\n        enclen = 0;\n    }\n\n    /* We need to also encode the backward-parsable length of the element\n     * and append it to the end: this allows to traverse the listpack from\n     * the end to the start. */\n    unsigned long backlen_size = (!del_ele) ? lpEncodeBacklen(backlen, enclen) : 0;\n    uint64_t old_listpack_bytes = lpGetTotalBytes(lp);\n    uint32_t replaced_len  = 0;\n    if (where == LP_REPLACE) {\n        replaced_len = lpCurrentEncodedSizeUnsafe(p);\n        replaced_len += lpEncodeBacklen(NULL,replaced_len);\n        ASSERT_INTEGRITY_LEN(lp, p, replaced_len);\n    }\n\n    uint64_t new_listpack_bytes = old_listpack_bytes + enclen + backlen_size - replaced_len;\n    if (new_listpack_bytes > UINT32_MAX) return NULL;\n\n    /* We now need to reallocate in order to make space or shrink the\n     * allocation (in case 'when' value is LP_REPLACE and the new element is\n     * smaller). However we do that before memmoving the memory to\n     * make room for the new element if the final allocation will get\n     * larger, or we do it after if the final allocation will get smaller. */\n\n    unsigned char *dst = lp + poff; /* May be updated after reallocation. */\n\n    /* Realloc before: we need more room. */\n    if (new_listpack_bytes > old_listpack_bytes && new_listpack_bytes > zmalloc_size(lp)) {\n        if ((lp = zrealloc(lp, new_listpack_bytes)) == NULL) return NULL;\n        dst = lp + poff;\n    }\n\n    /* Setup the listpack relocating the elements to make the exact room\n     * we need to store the new one. */\n    if (where == LP_BEFORE) {\n        memmove(dst+enclen+backlen_size,dst,old_listpack_bytes-poff);\n    } else { /* LP_REPLACE. */\n        memmove(dst + enclen + backlen_size, dst + replaced_len, old_listpack_bytes - poff - replaced_len);\n    }\n\n    /* Realloc after: we need to free space. */\n    if (new_listpack_bytes < old_listpack_bytes) {\n        if ((lp = zrealloc(lp,new_listpack_bytes)) == NULL) return NULL;\n        dst = lp + poff;\n    }\n\n    /* Store the entry. */\n    if (newp) {\n        *newp = dst;\n        /* In case of deletion, set 'newp' to NULL if the next element is\n         * the EOF element. */\n        if (del_ele && dst[0] == LP_EOF) *newp = NULL;\n    }\n    if (!del_ele) {\n        if (enctype == LP_ENCODING_INT) {\n            memcpy(dst,eleint,enclen);\n        } else if (elestr) {\n            lpEncodeString(dst,elestr,size);\n        } else {\n            valkey_unreachable();\n        }\n        dst += enclen;\n        memcpy(dst,backlen,backlen_size);\n        dst += backlen_size;\n    }\n\n    /* Update header. */\n    if (where != LP_REPLACE || del_ele) {\n        uint32_t num_elements = lpGetNumElements(lp);\n        if (num_elements != LP_HDR_NUMELE_UNKNOWN) {\n            if (!del_ele)\n                lpSetNumElements(lp,num_elements+1);\n            else\n                lpSetNumElements(lp,num_elements-1);\n        }\n    }\n    lpSetTotalBytesChecked(lp,new_listpack_bytes);\n\n#if 0\n    /* This code path is normally disabled: what it does is to force listpack\n     * to return *always* a new pointer after performing some modification to\n     * the listpack, even if the previous allocation was enough. This is useful\n     * in order to spot bugs in code using listpacks: by doing so we can find\n     * if the caller forgets to set the new pointer where the listpack reference\n     * is stored, after an update. */\n    unsigned char *oldlp = lp;\n    lp = zmalloc(new_listpack_bytes);\n    memcpy(lp,oldlp,new_listpack_bytes);\n    if (newp) {\n        unsigned long offset = (*newp)-oldlp;\n        *newp = lp + offset;\n    }\n    /* Make sure the old allocation contains garbage. */\n    memset(oldlp,'A',new_listpack_bytes);\n    zfree(oldlp);\n#endif\n\n    return lp;\n}\n\n/* This is just a wrapper for lpInsert() to directly use a string. */\nunsigned char *lpInsertString(unsigned char *lp, const unsigned char *s, uint32_t slen,\n                              unsigned char *p, int where, unsigned char **newp)\n{\n    return lpInsert(lp, s, NULL, slen, p, where, newp);\n}\n\n/* This is just a wrapper for lpInsert() to directly use a 64 bit integer\n * instead of a string. */\nunsigned char *lpInsertInteger(unsigned char *lp, long long lval, unsigned char *p, int where, unsigned char **newp) {\n    uint64_t enclen; /* The length of the encoded element. */\n    unsigned char intenc[LP_MAX_INT_ENCODING_LEN];\n\n    lpEncodeIntegerGetType(lval, intenc, &enclen);\n    return lpInsert(lp, NULL, intenc, enclen, p, where, newp);\n}\n\n/* Append the specified element 's' of length 'slen' at the head of the listpack. */\nunsigned char *lpPrepend(unsigned char *lp, const unsigned char *s, uint32_t slen) {\n    unsigned char *p = lpFirst(lp);\n    if (!p) return lpAppend(lp, s, slen);\n    return lpInsert(lp, s, NULL, slen, p, LP_BEFORE, NULL);\n}\n\n/* Append the specified integer element 'lval' at the head of the listpack. */\nunsigned char *lpPrependInteger(unsigned char *lp, long long lval) {\n    unsigned char *p = lpFirst(lp);\n    if (!p) return lpAppendInteger(lp, lval);\n    return lpInsertInteger(lp, lval, p, LP_BEFORE, NULL);\n}\n\n/* Append the specified element 'ele' of length 'size' at the end of the\n * listpack. It is implemented in terms of lpInsert(), so the return value is\n * the same as lpInsert(). */\nunsigned char *lpAppend(unsigned char *lp, const unsigned char *ele, uint32_t size) {\n    uint64_t listpack_bytes = lpGetTotalBytes(lp);\n    unsigned char *eofptr = lp + listpack_bytes - 1;\n    return lpInsert(lp,ele,NULL,size,eofptr,LP_BEFORE,NULL);\n}\n\n/* Append the specified integer element 'lval' at the end of the listpack. */\nunsigned char *lpAppendInteger(unsigned char *lp, long long lval) {\n    uint64_t listpack_bytes = lpGetTotalBytes(lp);\n    unsigned char *eofptr = lp + listpack_bytes - 1;\n    return lpInsertInteger(lp, lval, eofptr, LP_BEFORE, NULL);\n}\n\n/* This is just a wrapper for lpInsert() to directly use a string to replace\n * the current element. The function returns the new listpack as return\n * value, and also updates the current cursor by updating '*p'. */\nunsigned char *lpReplace(unsigned char *lp, unsigned char **p, const unsigned char *s, uint32_t slen) {\n    return lpInsert(lp, s, NULL, slen, *p, LP_REPLACE, p);\n}\n\n/* This is just a wrapper for lpInsertInteger() to directly use a 64 bit integer\n * instead of a string to replace the current element. The function returns\n * the new listpack as return value, and also updates the current cursor\n * by updating '*p'. */\nunsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long lval) {\n    return lpInsertInteger(lp, lval, *p, LP_REPLACE, p);\n}\n\n/* Remove the element pointed by 'p', and return the resulting listpack.\n * If 'newp' is not NULL, the next element pointer (to the right of the\n * deleted one) is returned by reference. If the deleted element was the\n * last one, '*newp' is set to NULL. */\nunsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp) {\n    return lpInsert(lp,NULL,NULL,0,p,LP_REPLACE,newp);\n}\n\n/* Delete a range of entries from the listpack start with the element pointed by 'p'. */\nunsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num) {\n    size_t bytes = lpBytes(lp);\n    unsigned long deleted = 0;\n    unsigned char *eofptr = lp + bytes - 1;\n    unsigned char *first, *tail;\n    first = tail = *p;\n\n    if (num == 0) return lp;  /* Nothing to delete, return ASAP. */\n\n    /* Find the next entry to the last entry that needs to be deleted.\n     * lpLength may be unreliable due to corrupt data, so we cannot\n     * treat 'num' as the number of elements to be deleted. */\n    while (num--) {\n        deleted++;\n        tail = lpSkip(tail);\n        if (tail[0] == LP_EOF) break;\n        lpAssertValidEntry(lp, bytes, tail);\n    }\n\n    /* Store the offset of the element 'first', so that we can obtain its\n     * address again after a reallocation. */\n    unsigned long poff = first-lp;\n\n    /* Move tail to the front of the listpack */\n    memmove(first, tail, eofptr - tail + 1);\n    lpSetTotalBytesChecked(lp, bytes - (tail - first));\n    uint32_t numele = lpGetNumElements(lp);\n    if (numele != LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp, numele - deleted);\n    lp = lpShrinkToFit(lp);\n\n    /* Store the entry. */\n    *p = lp+poff;\n    if ((*p)[0] == LP_EOF) *p = NULL;\n\n    return lp;\n}\n\n/* Delete a range of entries from the listpack. */\nunsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num) {\n    unsigned char *p;\n    uint32_t numele = lpGetNumElements(lp);\n\n    if (num == 0) return lp; /* Nothing to delete, return ASAP. */\n    if ((p = lpSeek(lp, index)) == NULL) return lp;\n\n    /* If we know we're gonna delete beyond the end of the listpack, we can just move\n     * the EOF marker, and there's no need to iterate through the entries,\n     * but if we can't be sure how many entries there are, we rather avoid calling lpLength\n     * since that means an additional iteration on all elements.\n     *\n     * Note that index could overflow, but we use the value after seek, so when we\n     * use it no overflow happens. */\n    if (numele != LP_HDR_NUMELE_UNKNOWN && index < 0) index = (long)numele + index;\n    if (numele != LP_HDR_NUMELE_UNKNOWN && (numele - (unsigned long)index) <= num) {\n        p[0] = LP_EOF;\n        lpSetTotalBytesChecked(lp, p - lp + 1);\n        lpSetNumElements(lp, index);\n        lp = lpShrinkToFit(lp);\n    } else {\n        lp = lpDeleteRangeWithEntry(lp, &p, num);\n    }\n\n    return lp;\n}\n\n/* Merge listpacks 'first' and 'second' by appending 'second' to 'first'.\n *\n * NOTE: The larger listpack is reallocated to contain the new merged listpack.\n * Either 'first' or 'second' can be used for the result.  The parameter not\n * used will be free'd and set to NULL.\n *\n * After calling this function, the input parameters are no longer valid since\n * they are changed and free'd in-place.\n *\n * The result listpack is the contents of 'first' followed by 'second'.\n *\n * On failure: returns NULL if the merge is impossible.\n * On success: returns the merged listpack (which is expanded version of either\n * 'first' or 'second', also frees the other unused input listpack, and sets the\n * input listpack argument equal to newly reallocated listpack return value. */\nunsigned char *lpMerge(unsigned char **first, unsigned char **second) {\n    /* If any params are null, we can't merge, so NULL. */\n    if (first == NULL || *first == NULL || second == NULL || *second == NULL) return NULL;\n\n    /* Can't merge same list into itself. */\n    if (*first == *second) return NULL;\n\n    size_t first_bytes = lpBytes(*first);\n    unsigned long first_len = lpLength(*first);\n\n    size_t second_bytes = lpBytes(*second);\n    unsigned long second_len = lpLength(*second);\n\n    int append;\n    unsigned char *source, *target;\n    size_t target_bytes, source_bytes;\n    /* Pick the largest listpack so we can resize easily in-place.\n     * We must also track if we are now appending or prepending to\n     * the target listpack. */\n    if (first_bytes >= second_bytes) {\n        /* retain first, append second to first. */\n        target = *first;\n        target_bytes = first_bytes;\n        source = *second;\n        source_bytes = second_bytes;\n        append = 1;\n    } else {\n        /* else, retain second, prepend first to second. */\n        target = *second;\n        target_bytes = second_bytes;\n        source = *first;\n        source_bytes = first_bytes;\n        append = 0;\n    }\n\n    /* Calculate final bytes (subtract one pair of metadata) */\n    unsigned long long lpbytes = (unsigned long long)first_bytes + second_bytes - LP_HDR_SIZE - 1;\n    assert(lpbytes < UINT32_MAX); /* larger values can't be stored */\n    unsigned long lplength = first_len + second_len;\n\n    /* Combined lp length should be limited within UINT16_MAX */\n    lplength = lplength < UINT16_MAX ? lplength : UINT16_MAX;\n\n    /* Extend target to new lpbytes then append or prepend source. */\n    target = zrealloc(target, lpbytes);\n    if (append) {\n        /* append == appending to target */\n        /* Copy source after target (copying over original [END]):\n         *   [TARGET - END, SOURCE - HEADER] */\n        memcpy(target + target_bytes - 1, source + LP_HDR_SIZE, source_bytes - LP_HDR_SIZE);\n    } else {\n        /* !append == prepending to target */\n        /* Move target *contents* exactly size of (source - [END]),\n         * then copy source into vacated space (source - [END]):\n         *   [SOURCE - END, TARGET - HEADER] */\n        memmove(target + source_bytes - 1, target + LP_HDR_SIZE, target_bytes - LP_HDR_SIZE);\n        memcpy(target, source, source_bytes - 1);\n    }\n\n    lpSetNumElements(target, lplength);\n    lpSetTotalBytesChecked(target, lpbytes);\n\n    /* Now free and NULL out what we didn't realloc */\n    if (append) {\n        zfree(*second);\n        *second = NULL;\n        *first = target;\n    } else {\n        zfree(*first);\n        *first = NULL;\n        *second = target;\n    }\n\n    return target;\n}\n\n/* Return the total number of bytes the listpack is composed of. */\nsize_t lpBytes(unsigned char *lp) {\n    return lpGetTotalBytes(lp);\n}\n\n/* Seek the specified element and returns the pointer to the seeked element.\n * Positive indexes specify the zero-based element to seek from the head to\n * the tail, negative indexes specify elements starting from the tail, where\n * -1 means the last element, -2 the penultimate and so forth. If the index\n * is out of range, NULL is returned. */\nunsigned char *lpSeek(unsigned char *lp, long index) {\n    int forward = 1; /* Seek forward by default. */\n\n    /* We want to seek from left to right or the other way around\n     * depending on the listpack length and the element position.\n     * However if the listpack length cannot be obtained in constant time,\n     * we always seek from left to right. */\n    uint32_t numele = lpGetNumElements(lp);\n    if (numele != LP_HDR_NUMELE_UNKNOWN) {\n        if (index < 0) index = (long)numele+index;\n        if (index < 0) return NULL; /* Index still < 0 means out of range. */\n        if (index >= (long)numele) return NULL; /* Out of range the other side. */\n        /* We want to scan right-to-left if the element we are looking for\n         * is past the half of the listpack. */\n        if (index > (long)numele/2) {\n            forward = 0;\n            /* Right to left scanning always expects a negative index. Convert\n             * our index to negative form. */\n            index -= numele;\n        }\n    } else {\n        /* If the listpack length is unspecified, for negative indexes we\n         * want to always scan right-to-left. */\n        if (index < 0) forward = 0;\n    }\n\n    /* Forward and backward scanning is trivially based on lpNext()/lpPrev(). */\n    if (forward) {\n        unsigned char *ele = lpFirst(lp);\n        while (index > 0 && ele) {\n            ele = lpNext(lp,ele);\n            index--;\n        }\n        return ele;\n    } else {\n        unsigned char *ele = lpLast(lp);\n        while (index < -1 && ele) {\n            ele = lpPrev(lp,ele);\n            index++;\n        }\n        return ele;\n    }\n}\n\n/* Same as lpFirst but without validation assert, to be used right before lpValidateNext. */\nunsigned char *lpValidateFirst(unsigned char *lp) {\n    unsigned char *p = lp + LP_HDR_SIZE; /* Skip the header. */\n    if (p[0] == LP_EOF) return NULL;\n    return p;\n}\n\n/* Validate the integrity of a single listpack entry and move to the next one.\n * The input argument 'pp' is a reference to the current record and is advanced on exit.\n * Returns 1 if valid, 0 if invalid. */\nint lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes) {\n#define OUT_OF_RANGE(p) ((p) < lp + LP_HDR_SIZE || (p) > lp + lpbytes - 1)\n    unsigned char *p = *pp;\n    if (!p) return 0;\n\n    /* Before accessing p, make sure it's valid. */\n    if (OUT_OF_RANGE(p)) return 0;\n\n    if (*p == LP_EOF) {\n        *pp = NULL;\n        return 1;\n    }\n\n    /* check that we can read the encoded size */\n    uint32_t lenbytes = lpCurrentEncodedSizeBytes(p);\n    if (!lenbytes) return 0;\n\n    /* make sure the encoded entry length doesn't reach outside the edge of the listpack */\n    if (OUT_OF_RANGE(p + lenbytes)) return 0;\n\n    /* get the entry length and encoded backlen. */\n    unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p);\n    unsigned long encodedBacklen = lpEncodeBacklen(NULL,entrylen);\n    entrylen += encodedBacklen;\n\n    /* make sure the entry doesn't reach outside the edge of the listpack */\n    if (OUT_OF_RANGE(p + entrylen)) return 0;\n\n    /* move to the next entry */\n    p += entrylen;\n\n    /* make sure the encoded length at the end patches the one at the beginning. */\n    uint64_t prevlen = lpDecodeBacklen(p-1);\n    if (prevlen + encodedBacklen != entrylen) return 0;\n\n    *pp = p;\n    return 1;\n#undef OUT_OF_RANGE\n}\n\n/* Validate that the entry doesn't reach outside the listpack allocation. */\nstatic inline void lpAssertValidEntry(unsigned char* lp, size_t lpbytes, unsigned char *p) {\n    assert(lpValidateNext(lp, &p, lpbytes));\n}\n\n/* Validate the integrity of the data structure.\n * when `deep` is 0, only the integrity of the header is validated.\n * when `deep` is 1, we scan all the entries one by one. */\nint lpValidateIntegrity(unsigned char *lp, size_t size, int deep, listpackValidateEntryCB entry_cb, void *cb_userdata) {\n    /* Check that we can actually read the header. (and EOF) */\n    if (size < LP_HDR_SIZE + 1) return 0;\n\n    /* Check that the encoded size in the header must match the allocated size. */\n    size_t bytes = lpGetTotalBytes(lp);\n    if (bytes != size) return 0;\n\n    /* The last byte must be the terminator. */\n    if (lp[size - 1] != LP_EOF) return 0;\n\n    if (!deep) return 1;\n\n    /* Validate the individual entries. */\n    uint32_t count = 0;\n    uint32_t numele = lpGetNumElements(lp);\n    unsigned char *p = lp + LP_HDR_SIZE;\n    while(p && p[0] != LP_EOF) {\n        unsigned char *prev = p;\n\n        /* Validate this entry and move to the next entry in advance\n         * to avoid callback crash due to corrupt listpack. */\n        if (!lpValidateNext(lp, &p, bytes)) return 0;\n\n        /* Optionally let the caller validate the entry too. */\n        if (entry_cb && !entry_cb(prev, numele, cb_userdata)) return 0;\n\n        count++;\n    }\n\n    /* Make sure 'p' really does point to the end of the listpack. */\n    if (p != lp + size - 1) return 0;\n\n    /* Check that the count in the header is correct */\n    if (numele != LP_HDR_NUMELE_UNKNOWN && numele != count) return 0;\n\n    return 1;\n}\n\n/* Compare entry pointer to by 'p' with string 's' of length 'slen'.\n * Return 1 if equal. */\nunsigned int lpCompare(unsigned char *p, const unsigned char *s, uint32_t slen) {\n    unsigned char *value;\n    int64_t sz;\n    if (p[0] == LP_EOF) return 0;\n\n    value = lpGet(p, &sz, NULL);\n    if (value) {\n        return (slen == sz) && memcmp(value,s,slen) == 0;\n    } else {\n        /* We use lpStringToInt64() to get an integer representation of the\n         * string 's' and compare it to 'sval', it's much faster than convert\n         * integer to string and comparing. */\n        int64_t sval;\n        if (lpStringToInt64((const char *)s, slen, &sval)) return sz == sval;\n    }\n\n    return 0;\n}\n\n/* uint compare for qsort */\nstatic int uintCompare(const void *a, const void *b) {\n    return (*(unsigned int *) a - *(unsigned int *) b);\n}\n\n/* Helper method to store a string into from val or lval into dest */\nstatic inline void lpSaveValue(unsigned char *val, unsigned int len, int64_t lval, listpackEntry *dest) {\n    dest->sval = val;\n    dest->slen = len;\n    dest->lval = lval;\n}\n\n/* Randomly select a pair of key and value.\n * total_count is a pre-computed length/2 of the listpack (to avoid calls to lpLength)\n * 'key' and 'val' are used to store the result key value pair.\n * 'val' can be NULL if the value is not needed. */\nvoid lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val) {\n    unsigned char *p;\n\n    /* Avoid div by zero on corrupt listpack */\n    assert(total_count);\n\n    /* Generate even numbers, because listpack saved K-V pair */\n    int r = (rand() % total_count) * 2;\n    p = lpSeek(lp, r);\n    assert(p);\n    key->sval = lpGetValue(p, &(key->slen), &(key->lval));\n\n    if (!val)\n        return;\n    p = lpNext(lp, p);\n    assert(p);\n    val->sval = lpGetValue(p, &(val->slen), &(val->lval));\n}\n\n/* Randomly select count of key value pairs and store into 'keys' and\n * 'vals' args. The order of the picked entries is random, and the selections\n * are non-unique (repetitions are possible).\n * The 'vals' arg can be NULL in which case we skip these. */\nvoid lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals) {\n    unsigned char *p, *key, *value;\n    unsigned int klen = 0, vlen = 0;\n    long long klval = 0, vlval = 0;\n\n    /* Notice: the index member must be first due to the use in uintCompare */\n    typedef struct {\n        unsigned int index;\n        unsigned int order;\n    } rand_pick;\n    rand_pick *picks = zmalloc(sizeof(rand_pick)*count);\n    unsigned int total_size = lpLength(lp)/2;\n\n    /* Avoid div by zero on corrupt listpack */\n    assert(total_size);\n\n    /* create a pool of random indexes (some may be duplicate). */\n    for (unsigned int i = 0; i < count; i++) {\n        picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */\n        /* keep track of the order we picked them */\n        picks[i].order = i;\n    }\n\n    /* sort by indexes. */\n    qsort(picks, count, sizeof(rand_pick), uintCompare);\n\n    /* fetch the elements form the listpack into a output array respecting the original order. */\n    unsigned int lpindex = picks[0].index, pickindex = 0;\n    p = lpSeek(lp, lpindex);\n    while (p && pickindex < count) {\n        key = lpGetValue(p, &klen, &klval);\n        p = lpNext(lp, p);\n        assert(p);\n        value = lpGetValue(p, &vlen, &vlval);\n        while (pickindex < count && lpindex == picks[pickindex].index) {\n            int storeorder = picks[pickindex].order;\n            lpSaveValue(key, klen, klval, &keys[storeorder]);\n            if (vals) lpSaveValue(value, vlen, vlval, &vals[storeorder]);\n             pickindex++;\n        }\n        lpindex += 2;\n        p = lpNext(lp, p);\n    }\n\n    zfree(picks);\n}\n\n/* Randomly select count of key value pairs and store into 'keys' and\n * 'vals' args. The selections are unique (no repetitions), and the order of\n * the picked entries is NOT-random.\n * The 'vals' arg can be NULL in which case we skip these.\n * The return value is the number of items picked which can be lower than the\n * requested count if the listpack doesn't hold enough pairs. */\nunsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals) {\n    unsigned char *p, *key;\n    unsigned int klen = 0;\n    long long klval = 0;\n    unsigned int total_size = lpLength(lp)/2;\n    unsigned int index = 0;\n    if (count > total_size) count = total_size;\n\n    /* To only iterate once, every time we try to pick a member, the probability\n     * we pick it is the quotient of the count left we want to pick and the\n     * count still we haven't visited in the dict, this way, we could make every\n     * member be equally picked.*/\n    p = lpFirst(lp);\n    unsigned int picked = 0, remaining = count;\n    while (picked < count && p) {\n        double randomDouble = ((double)rand()) / RAND_MAX;\n        double threshold = ((double)remaining) / (total_size - index);\n        if (randomDouble <= threshold) {\n            key = lpGetValue(p, &klen, &klval);\n            lpSaveValue(key, klen, klval, &keys[picked]);\n            p = lpNext(lp, p);\n            assert(p);\n            if (vals) {\n                key = lpGetValue(p, &klen, &klval);\n                lpSaveValue(key, klen, klval, &vals[picked]);\n            }\n            remaining--;\n            picked++;\n        } else {\n            p = lpNext(lp, p);\n            assert(p);\n        }\n        p = lpNext(lp, p);\n        index++;\n    }\n    return picked;\n}\n\n/* Print info of listpack which is used in debugCommand */\nvoid lpRepr(unsigned char *lp) {\n    unsigned char *p, *vstr;\n    int64_t vlen;\n    unsigned char intbuf[LP_INTBUF_SIZE];\n    int index = 0;\n\n    printf(\"{total bytes %zu} {num entries %lu}\\n\", lpBytes(lp), lpLength(lp));\n        \n    p = lpFirst(lp);\n    while(p) {\n        uint32_t encoded_size_bytes = lpCurrentEncodedSizeBytes(p);\n        uint32_t encoded_size = lpCurrentEncodedSizeUnsafe(p);\n        unsigned long back_len = lpEncodeBacklen(NULL, encoded_size);\n        printf(\"{\\n\"\n                \"\\taddr: 0x%08lx,\\n\"\n                \"\\tindex: %2d,\\n\"\n                \"\\toffset: %1lu,\\n\"\n                \"\\thdr+entrylen+backlen: %2lu,\\n\"\n                \"\\thdrlen: %3u,\\n\"\n                \"\\tbacklen: %2lu,\\n\"\n                \"\\tpayload: %1u\\n\",\n               (long unsigned)p, index, (unsigned long)(p - lp), encoded_size + back_len, encoded_size_bytes, back_len,\n            encoded_size - encoded_size_bytes);\n        printf(\"\\tbytes: \");\n        for (unsigned int i = 0; i < (encoded_size + back_len); i++) {\n            printf(\"%02x|\",p[i]);\n        }\n        printf(\"\\n\");\n\n        vstr = lpGet(p, &vlen, intbuf);\n        printf(\"\\t[str]\");\n        if (vlen > 40) {\n            if (fwrite(vstr, 40, 1, stdout) == 0) perror(\"fwrite\");\n            printf(\"...\");\n        } else {\n            if (fwrite(vstr, vlen, 1, stdout) == 0) perror(\"fwrite\");\n        }\n        printf(\"\\n}\\n\");\n        index++;\n        p = lpNext(lp, p);\n    }\n    printf(\"{end}\\n\\n\");\n}\n\n#ifdef REDIS_TEST\n\n#include <sys/time.h>\n#include \"adlist.h\"\n#include \"sds.h\"\n#include \"testhelp.h\"\n\n#define UNUSED(x) (void)(x)\n#define TEST(name) printf(\"test — %s\\n\", name);\n\nchar *mixlist[] = {\"hello\", \"foo\", \"quux\", \"1024\"};\nchar *intlist[] = {\"4294967296\", \"-100\", \"100\", \"128000\", \n                   \"non integer\", \"much much longer non integer\"};\n\nstatic unsigned char *createList() {\n    unsigned char *lp = lpNew(0);\n    lp = lpAppend(lp, (unsigned char*)mixlist[1], strlen(mixlist[1]));\n    lp = lpAppend(lp, (unsigned char*)mixlist[2], strlen(mixlist[2]));\n    lp = lpPrepend(lp, (unsigned char*)mixlist[0], strlen(mixlist[0]));\n    lp = lpAppend(lp, (unsigned char*)mixlist[3], strlen(mixlist[3]));\n    return lp;\n}\n\nstatic unsigned char *createIntList() {\n    unsigned char *lp = lpNew(0);\n    lp = lpAppend(lp, (unsigned char*)intlist[2], strlen(intlist[2]));\n    lp = lpAppend(lp, (unsigned char*)intlist[3], strlen(intlist[3]));\n    lp = lpPrepend(lp, (unsigned char*)intlist[1], strlen(intlist[1]));\n    lp = lpPrepend(lp, (unsigned char*)intlist[0], strlen(intlist[0]));\n    lp = lpAppend(lp, (unsigned char*)intlist[4], strlen(intlist[4]));\n    lp = lpAppend(lp, (unsigned char*)intlist[5], strlen(intlist[5]));\n    return lp;\n}\n\nstatic long long usec(void) {\n    struct timeval tv;\n    gettimeofday(&tv, NULL);\n    return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;\n}\n\nstatic void stress(int pos, int num, int maxsize, int dnum) {\n    int i, j, k;\n    unsigned char *lp;\n    char posstr[2][5] = { \"HEAD\", \"TAIL\" };\n    long long start;\n    for (i = 0; i < maxsize; i+=dnum) {\n        lp = lpNew(0);\n        for (j = 0; j < i; j++) {\n            lp = lpAppend(lp, (unsigned char*)\"quux\", 4);\n        }\n\n        /* Do num times a push+pop from pos */\n        start = usec();\n        for (k = 0; k < num; k++) {\n            if (pos == 0) {\n                lp = lpPrepend(lp, (unsigned char*)\"quux\", 4);\n            } else {\n                lp = lpAppend(lp, (unsigned char*)\"quux\", 4);\n\n            }\n            lp = lpDelete(lp, lpFirst(lp), NULL);\n        }\n        printf(\"List size: %8d, bytes: %8zu, %dx push+pop (%s): %6lld usec\\n\",\n               i, lpBytes(lp), num, posstr[pos], usec()-start);\n        lpFree(lp);\n    }\n}\n\nstatic unsigned char *pop(unsigned char *lp, int where) {\n    unsigned char *p, *vstr;\n    int64_t vlen;\n\n    p = lpSeek(lp, where == 0 ? 0 : -1);\n    vstr = lpGet(p, &vlen, NULL);\n    if (where == 0)\n        printf(\"Pop head: \");\n    else\n        printf(\"Pop tail: \");\n\n    if (vstr) {\n        if (vlen && fwrite(vstr, vlen, 1, stdout) == 0) perror(\"fwrite\");\n    } else {\n        printf(\"%lld\", (long long)vlen);\n    }\n\n    printf(\"\\n\");\n    return lpDelete(lp, p, &p);\n}\n\nstatic int randstring(char *target, unsigned int min, unsigned int max) {\n    int p = 0;\n    int len = min+rand()%(max-min+1);\n    int minval, maxval;\n    switch(rand() % 3) {\n    case 0:\n        minval = 0;\n        maxval = 255;\n    break;\n    case 1:\n        minval = 48;\n        maxval = 122;\n    break;\n    case 2:\n        minval = 48;\n        maxval = 52;\n    break;\n    default:\n        assert(NULL);\n    }\n\n    while(p < len)\n        target[p++] = minval+rand()%(maxval-minval+1);\n    return len;\n}\n\nstatic void verifyEntry(unsigned char *p, unsigned char *s, size_t slen) {\n    assert(lpCompare(p, s, slen));\n}\n\nstatic int lpValidation(unsigned char *p, unsigned int head_count, void *userdata) {\n    UNUSED(p);\n    UNUSED(head_count);\n\n    int ret;\n    long *count = userdata;\n    ret = lpCompare(p, (unsigned char *)mixlist[*count], strlen(mixlist[*count]));\n    (*count)++;\n    return ret;\n}\n\nint listpackTest(int argc, char *argv[], int flags) {\n    UNUSED(argc);\n    UNUSED(argv);\n\n    int i;\n    unsigned char *lp, *p, *vstr;\n    int64_t vlen;\n    unsigned char intbuf[LP_INTBUF_SIZE];\n    int accurate = (flags & REDIS_TEST_ACCURATE);\n\n    TEST(\"Create int list\") {\n        lp = createIntList();\n        assert(lpLength(lp) == 6);\n        lpFree(lp);\n    }\n\n    TEST(\"Create list\") {\n        lp = createList();\n        assert(lpLength(lp) == 4);\n        lpFree(lp);\n    }\n\n    TEST(\"Test lpPrepend\") {\n        lp = lpNew(0);\n        lp = lpPrepend(lp, (unsigned char*)\"abc\", 3);\n        lp = lpPrepend(lp, (unsigned char*)\"1024\", 4);\n        verifyEntry(lpSeek(lp, 0), (unsigned char*)\"1024\", 4);\n        verifyEntry(lpSeek(lp, 1), (unsigned char*)\"abc\", 3);\n        lpFree(lp);\n    }\n\n    TEST(\"Test lpPrependInteger\") {\n        lp = lpNew(0);\n        lp = lpPrependInteger(lp, 127);\n        lp = lpPrependInteger(lp, 4095);\n        lp = lpPrependInteger(lp, 32767);\n        lp = lpPrependInteger(lp, 8388607);\n        lp = lpPrependInteger(lp, 2147483647);\n        lp = lpPrependInteger(lp, 9223372036854775807);\n        verifyEntry(lpSeek(lp, 0), (unsigned char*)\"9223372036854775807\", 19);\n        verifyEntry(lpSeek(lp, -1), (unsigned char*)\"127\", 3);\n        lpFree(lp);\n    }\n\n    TEST(\"Get element at index\") {\n        lp = createList();\n        verifyEntry(lpSeek(lp, 0), (unsigned char*)\"hello\", 5);\n        verifyEntry(lpSeek(lp, 3), (unsigned char*)\"1024\", 4);\n        verifyEntry(lpSeek(lp, -1), (unsigned char*)\"1024\", 4);\n        verifyEntry(lpSeek(lp, -4), (unsigned char*)\"hello\", 5);\n        assert(lpSeek(lp, 4) == NULL);\n        assert(lpSeek(lp, -5) == NULL);\n        lpFree(lp);\n    }\n    \n    TEST(\"Pop list\") {\n        lp = createList();\n        lp = pop(lp, 1);\n        lp = pop(lp, 0);\n        lp = pop(lp, 1);\n        lp = pop(lp, 1);\n        lpFree(lp);\n    }\n\n    TEST(\"Get element at index\") {\n        lp = createList();\n        verifyEntry(lpSeek(lp, 0), (unsigned char*)\"hello\", 5);\n        verifyEntry(lpSeek(lp, 3), (unsigned char*)\"1024\", 4);\n        verifyEntry(lpSeek(lp, -1), (unsigned char*)\"1024\", 4);\n        verifyEntry(lpSeek(lp, -4), (unsigned char*)\"hello\", 5);\n        assert(lpSeek(lp, 4) == NULL);\n        assert(lpSeek(lp, -5) == NULL);\n        lpFree(lp);\n    }\n\n    TEST(\"Iterate list from 0 to end\") {\n        lp = createList();\n        p = lpFirst(lp);\n        i = 0;\n        while (p) {\n            verifyEntry(p, (unsigned char*)mixlist[i], strlen(mixlist[i]));\n            p = lpNext(lp, p);\n            i++;\n        }\n        lpFree(lp);\n    }\n    \n    TEST(\"Iterate list from 1 to end\") {\n        lp = createList();\n        i = 1;\n        p = lpSeek(lp, i);\n        while (p) {\n            verifyEntry(p, (unsigned char*)mixlist[i], strlen(mixlist[i]));\n            p = lpNext(lp, p);\n            i++;\n        }\n        lpFree(lp);\n    }\n    \n    TEST(\"Iterate list from 2 to end\") {\n        lp = createList();\n        i = 2;\n        p = lpSeek(lp, i);\n        while (p) {\n            verifyEntry(p, (unsigned char*)mixlist[i], strlen(mixlist[i]));\n            p = lpNext(lp, p);\n            i++;\n        }\n        lpFree(lp);\n    }\n    \n    TEST(\"Iterate from back to front\") {\n        lp = createList();\n        p = lpLast(lp);\n        i = 3;\n        while (p) {\n            verifyEntry(p, (unsigned char*)mixlist[i], strlen(mixlist[i]));\n            p = lpPrev(lp, p);\n            i--;\n        }\n        lpFree(lp);\n    }\n    \n    TEST(\"Iterate from back to front, deleting all items\") {\n        lp = createList();\n        p = lpLast(lp);\n        i = 3;\n        while ((p = lpLast(lp))) {\n            verifyEntry(p, (unsigned char*)mixlist[i], strlen(mixlist[i]));\n            lp = lpDelete(lp, p, &p);\n            assert(p == NULL);\n            i--;\n        }\n        lpFree(lp);\n    }\n\n    TEST(\"Delete whole listpack when num == -1\");\n    {\n        lp = createList();\n        lp = lpDeleteRange(lp, 0, -1);\n        assert(lpLength(lp) == 0);\n        assert(lp[LP_HDR_SIZE] == LP_EOF);\n        assert(lpBytes(lp) == (LP_HDR_SIZE + 1));\n        zfree(lp);\n\n        lp = createList();\n        unsigned char *ptr = lpFirst(lp);\n        lp = lpDeleteRangeWithEntry(lp, &ptr, -1);\n        assert(lpLength(lp) == 0);\n        assert(lp[LP_HDR_SIZE] == LP_EOF);\n        assert(lpBytes(lp) == (LP_HDR_SIZE + 1));\n        zfree(lp);\n    }\n\n    TEST(\"Delete whole listpack with negative index\");\n    {\n        lp = createList();\n        lp = lpDeleteRange(lp, -4, 4);\n        assert(lpLength(lp) == 0);\n        assert(lp[LP_HDR_SIZE] == LP_EOF);\n        assert(lpBytes(lp) == (LP_HDR_SIZE + 1));\n        zfree(lp);\n\n        lp = createList();\n        unsigned char *ptr = lpSeek(lp, -4);\n        lp = lpDeleteRangeWithEntry(lp, &ptr, 4);\n        assert(lpLength(lp) == 0);\n        assert(lp[LP_HDR_SIZE] == LP_EOF);\n        assert(lpBytes(lp) == (LP_HDR_SIZE + 1));\n        zfree(lp);\n    }\n\n    TEST(\"Delete inclusive range 0,0\");\n    {\n        lp = createList();\n        lp = lpDeleteRange(lp, 0, 1);\n        assert(lpLength(lp) == 3);\n        assert(lpSkip(lpLast(lp))[0] == LP_EOF); /* check set LP_EOF correctly */\n        zfree(lp);\n\n        lp = createList();\n        unsigned char *ptr = lpFirst(lp);\n        lp = lpDeleteRangeWithEntry(lp, &ptr, 1);\n        assert(lpLength(lp) == 3);\n        assert(lpSkip(lpLast(lp))[0] == LP_EOF); /* check set LP_EOF correctly */\n        zfree(lp);\n    }\n\n    TEST(\"Delete inclusive range 0,1\");\n    {\n        lp = createList();\n        lp = lpDeleteRange(lp, 0, 2);\n        assert(lpLength(lp) == 2);\n        verifyEntry(lpFirst(lp), (unsigned char*)mixlist[2], strlen(mixlist[2]));\n        zfree(lp);\n\n        lp = createList();\n        unsigned char *ptr = lpFirst(lp);\n        lp = lpDeleteRangeWithEntry(lp, &ptr, 2);\n        assert(lpLength(lp) == 2);\n        verifyEntry(lpFirst(lp), (unsigned char*)mixlist[2], strlen(mixlist[2]));\n        zfree(lp);\n    }\n\n    TEST(\"Delete inclusive range 1,2\");\n    {\n        lp = createList();\n        lp = lpDeleteRange(lp, 1, 2);\n        assert(lpLength(lp) == 2);\n        verifyEntry(lpFirst(lp), (unsigned char*)mixlist[0], strlen(mixlist[0]));\n        zfree(lp);\n\n        lp = createList();\n        unsigned char *ptr = lpSeek(lp, 1);\n        lp = lpDeleteRangeWithEntry(lp, &ptr, 2);\n        assert(lpLength(lp) == 2);\n        verifyEntry(lpFirst(lp), (unsigned char*)mixlist[0], strlen(mixlist[0]));\n        zfree(lp);\n    }\n    \n    TEST(\"Delete with start index out of range\");\n    {\n        lp = createList();\n        lp = lpDeleteRange(lp, 5, 1);\n        assert(lpLength(lp) == 4);\n        zfree(lp);\n    }\n\n    TEST(\"Delete with num overflow\");\n    {\n        lp = createList();\n        lp = lpDeleteRange(lp, 1, 5);\n        assert(lpLength(lp) == 1);\n        verifyEntry(lpFirst(lp), (unsigned char*)mixlist[0], strlen(mixlist[0]));\n        zfree(lp);\n\n        lp = createList();\n        unsigned char *ptr = lpSeek(lp, 1);\n        lp = lpDeleteRangeWithEntry(lp, &ptr, 5);\n        assert(lpLength(lp) == 1);\n        verifyEntry(lpFirst(lp), (unsigned char*)mixlist[0], strlen(mixlist[0]));\n        zfree(lp);\n    }\n\n    TEST(\"Delete foo while iterating\") {\n        lp = createList();\n        p = lpFirst(lp);\n        while (p) {\n            if (lpCompare(p, (unsigned char*)\"foo\", 3)) {\n                lp = lpDelete(lp, p, &p);\n            } else {\n                p = lpNext(lp, p);\n            }\n        }\n        lpFree(lp);\n    }\n\n    TEST(\"Replace with same size\") {\n        lp = createList(); /* \"hello\", \"foo\", \"quux\", \"1024\" */\n        unsigned char *orig_lp = lp;\n        p = lpSeek(lp, 0);\n        lp = lpReplace(lp, &p, (unsigned char*)\"zoink\", 5);\n        p = lpSeek(lp, 3);\n        lp = lpReplace(lp, &p, (unsigned char*)\"y\", 1);\n        p = lpSeek(lp, 1);\n        lp = lpReplace(lp, &p, (unsigned char*)\"65536\", 5);\n        p = lpSeek(lp, 0);\n        assert(!memcmp((char*)p,\n                       \"\\x85zoink\\x06\"\n                       \"\\xf2\\x00\\x00\\x01\\x04\" /* 65536 as int24 */\n                       \"\\x84quux\\05\" \"\\x81y\\x02\" \"\\xff\",\n                       22));\n        assert(lp == orig_lp); /* no reallocations have happened */\n        lpFree(lp);\n    }\n\n    TEST(\"Replace with different size\") {\n        lp = createList(); /* \"hello\", \"foo\", \"quux\", \"1024\" */\n        p = lpSeek(lp, 1);\n        lp = lpReplace(lp, &p, (unsigned char*)\"squirrel\", 8);\n        p = lpSeek(lp, 0);\n        assert(!strncmp((char*)p,\n                        \"\\x85hello\\x06\" \"\\x88squirrel\\x09\" \"\\x84quux\\x05\"\n                        \"\\xc4\\x00\\x02\" \"\\xff\",\n                        27));\n        lpFree(lp);\n    }\n\n    TEST(\"Regression test for >255 byte strings\") {\n        char v1[257] = {0}, v2[257] = {0};\n        memset(v1,'x',256);\n        memset(v2,'y',256);\n        lp = lpNew(0);\n        lp = lpAppend(lp, (unsigned char*)v1 ,strlen(v1));\n        lp = lpAppend(lp, (unsigned char*)v2 ,strlen(v2));\n\n        /* Pop values again and compare their value. */\n        p = lpFirst(lp);\n        vstr = lpGet(p, &vlen, NULL);\n        assert(strncmp(v1, (char*)vstr, vlen) == 0);\n        p = lpSeek(lp, 1);\n        vstr = lpGet(p, &vlen, NULL);\n        assert(strncmp(v2, (char*)vstr, vlen) == 0);\n        lpFree(lp);\n    }\n\n    TEST(\"Create long list and check indices\") {\n        lp = lpNew(0);\n        char buf[32];\n        int i,len;\n        for (i = 0; i < 1000; i++) {\n            len = sprintf(buf, \"%d\", i);\n            lp = lpAppend(lp, (unsigned char*)buf, len);\n        }\n        for (i = 0; i < 1000; i++) {\n            p = lpSeek(lp, i);\n            vstr = lpGet(p, &vlen, NULL);\n            assert(i == vlen);\n\n            p = lpSeek(lp, -i-1);\n            vstr = lpGet(p, &vlen, NULL);\n            assert(999-i == vlen);\n        }\n        lpFree(lp);\n    }\n\n    TEST(\"Compare strings with listpack entries\") {\n        lp = createList();\n        p = lpSeek(lp,0);\n        assert(lpCompare(p,(unsigned char*)\"hello\",5));\n        assert(!lpCompare(p,(unsigned char*)\"hella\",5));\n\n        p = lpSeek(lp,3);\n        assert(lpCompare(p,(unsigned char*)\"1024\",4));\n        assert(!lpCompare(p,(unsigned char*)\"1025\",4));\n        lpFree(lp);\n    }\n\n    TEST(\"lpMerge two empty listpacks\") {\n        unsigned char *lp1 = lpNew(0);\n        unsigned char *lp2 = lpNew(0);\n\n        /* Merge two empty listpacks, get empty result back. */\n        lp1 = lpMerge(&lp1, &lp2);\n        assert(lpLength(lp1) == 0);\n        zfree(lp1);\n    }\n\n    TEST(\"lpMerge two listpacks - first larger than second\") {\n        unsigned char *lp1 = createIntList();\n        unsigned char *lp2 = createList();\n\n        size_t lp1_bytes = lpBytes(lp1);\n        size_t lp2_bytes = lpBytes(lp2);\n        unsigned long lp1_len = lpLength(lp1);\n        unsigned long lp2_len = lpLength(lp2);\n\n        unsigned char *lp3 = lpMerge(&lp1, &lp2);\n        assert(lp3 == lp1);\n        assert(lp2 == NULL);\n        assert(lpLength(lp3) == (lp1_len + lp2_len));\n        assert(lpBytes(lp3) == (lp1_bytes + lp2_bytes - LP_HDR_SIZE - 1));\n        verifyEntry(lpSeek(lp3, 0), (unsigned char*)\"4294967296\", 10);\n        verifyEntry(lpSeek(lp3, 5), (unsigned char*)\"much much longer non integer\", 28);\n        verifyEntry(lpSeek(lp3, 6), (unsigned char*)\"hello\", 5);\n        verifyEntry(lpSeek(lp3, -1), (unsigned char*)\"1024\", 4);\n        zfree(lp3);\n    }\n\n    TEST(\"lpMerge two listpacks - second larger than first\") {\n        unsigned char *lp1 = createList();\n        unsigned char *lp2 = createIntList();\n\n        size_t lp1_bytes = lpBytes(lp1);\n        size_t lp2_bytes = lpBytes(lp2);\n        unsigned long lp1_len = lpLength(lp1);\n        unsigned long lp2_len = lpLength(lp2);\n\n        unsigned char *lp3 = lpMerge(&lp1, &lp2);\n        assert(lp3 == lp2);\n        assert(lp1 == NULL);\n        assert(lpLength(lp3) == (lp1_len + lp2_len));\n        assert(lpBytes(lp3) == (lp1_bytes + lp2_bytes - LP_HDR_SIZE - 1));\n        verifyEntry(lpSeek(lp3, 0), (unsigned char*)\"hello\", 5);\n        verifyEntry(lpSeek(lp3, 3), (unsigned char*)\"1024\", 4);\n        verifyEntry(lpSeek(lp3, 4), (unsigned char*)\"4294967296\", 10);\n        verifyEntry(lpSeek(lp3, -1), (unsigned char*)\"much much longer non integer\", 28);\n        zfree(lp3);\n    }\n\n    TEST(\"Random pair with one element\") {\n        listpackEntry key, val;\n        unsigned char *lp = lpNew(0);\n        lp = lpAppend(lp, (unsigned char*)\"abc\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"123\", 3);\n        lpRandomPair(lp, 1, &key, &val);\n        assert(memcmp(key.sval, \"abc\", key.slen) == 0);\n        assert(val.lval == 123);\n        lpFree(lp);\n    }\n\n    TEST(\"Random pair with many elements\") {\n        listpackEntry key, val;\n        unsigned char *lp = lpNew(0);\n        lp = lpAppend(lp, (unsigned char*)\"abc\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"123\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"456\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"def\", 3);\n        lpRandomPair(lp, 2, &key, &val);\n        if (key.sval) {\n            assert(!memcmp(key.sval, \"abc\", key.slen));\n            assert(key.slen == 3);\n            assert(val.lval == 123);\n        }\n        if (!key.sval) {\n            assert(key.lval == 456);\n            assert(!memcmp(val.sval, \"def\", val.slen));\n        }\n        lpFree(lp);\n    }\n\n    TEST(\"Random pairs with one element\") {\n        int count = 5;\n        unsigned char *lp = lpNew(0);\n        listpackEntry *keys = zmalloc(sizeof(listpackEntry) * count);\n        listpackEntry *vals = zmalloc(sizeof(listpackEntry) * count);\n\n        lp = lpAppend(lp, (unsigned char*)\"abc\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"123\", 3);\n        lpRandomPairs(lp, count, keys, vals);\n        assert(memcmp(keys[4].sval, \"abc\", keys[4].slen) == 0);\n        assert(vals[4].lval == 123);\n        zfree(keys);\n        zfree(vals);\n        lpFree(lp);\n    }\n\n    TEST(\"Random pairs with many elements\") {\n        int count = 5;\n        lp = lpNew(0);\n        listpackEntry *keys = zmalloc(sizeof(listpackEntry) * count);\n        listpackEntry *vals = zmalloc(sizeof(listpackEntry) * count);\n\n        lp = lpAppend(lp, (unsigned char*)\"abc\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"123\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"456\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"def\", 3);\n        lpRandomPairs(lp, count, keys, vals);\n        for (int i = 0; i < count; i++) {\n            if (keys[i].sval) {\n                assert(!memcmp(keys[i].sval, \"abc\", keys[i].slen));\n                assert(keys[i].slen == 3);\n                assert(vals[i].lval == 123);\n            }\n            if (!keys[i].sval) {\n                assert(keys[i].lval == 456);\n                assert(!memcmp(vals[i].sval, \"def\", vals[i].slen));\n            }\n        }\n        zfree(keys);\n        zfree(vals);\n        lpFree(lp);\n    }\n\n    TEST(\"Random pairs unique with one element\") {\n        unsigned picked;\n        int count = 5;\n        lp = lpNew(0);\n        listpackEntry *keys = zmalloc(sizeof(listpackEntry) * count);\n        listpackEntry *vals = zmalloc(sizeof(listpackEntry) * count);\n\n        lp = lpAppend(lp, (unsigned char*)\"abc\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"123\", 3);\n        picked = lpRandomPairsUnique(lp, count, keys, vals);\n        assert(picked == 1);\n        assert(memcmp(keys[0].sval, \"abc\", keys[0].slen) == 0);\n        assert(vals[0].lval == 123);\n        zfree(keys);\n        zfree(vals);\n        lpFree(lp);\n    }\n\n    TEST(\"Random pairs unique with many elements\") {\n        unsigned picked;\n        int count = 5;\n        lp = lpNew(0);\n        listpackEntry *keys = zmalloc(sizeof(listpackEntry) * count);\n        listpackEntry *vals = zmalloc(sizeof(listpackEntry) * count);\n\n        lp = lpAppend(lp, (unsigned char*)\"abc\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"123\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"456\", 3);\n        lp = lpAppend(lp, (unsigned char*)\"def\", 3);\n        picked = lpRandomPairsUnique(lp, count, keys, vals);\n        assert(picked == 2);\n        for (int i = 0; i < 2; i++) {\n            if (keys[i].sval) {\n                assert(!memcmp(keys[i].sval, \"abc\", keys[i].slen));\n                assert(keys[i].slen == 3);\n                assert(vals[i].lval == 123);\n            }\n            if (!keys[i].sval) {\n                assert(keys[i].lval == 456);\n                assert(!memcmp(vals[i].sval, \"def\", vals[i].slen));\n            }\n        }\n        zfree(keys);\n        zfree(vals);\n        lpFree(lp);\n    }\n\n    TEST(\"push various encodings\") {\n        lp = lpNew(0);\n\n        /* Push integer encode element using lpAppend */\n        lp = lpAppend(lp, (unsigned char*)\"127\", 3);\n        assert(LP_ENCODING_IS_7BIT_UINT(lpLast(lp)[0]));\n        lp = lpAppend(lp, (unsigned char*)\"4095\", 4);\n        assert(LP_ENCODING_IS_13BIT_INT(lpLast(lp)[0]));\n        lp = lpAppend(lp, (unsigned char*)\"32767\", 5);\n        assert(LP_ENCODING_IS_16BIT_INT(lpLast(lp)[0]));\n        lp = lpAppend(lp, (unsigned char*)\"8388607\", 7);\n        assert(LP_ENCODING_IS_24BIT_INT(lpLast(lp)[0]));\n        lp = lpAppend(lp, (unsigned char*)\"2147483647\", 10);\n        assert(LP_ENCODING_IS_32BIT_INT(lpLast(lp)[0]));\n        lp = lpAppend(lp, (unsigned char*)\"9223372036854775807\", 19);\n        assert(LP_ENCODING_IS_64BIT_INT(lpLast(lp)[0]));\n\n        /* Push integer encode element using lpAppendInteger */\n        lp = lpAppendInteger(lp, 127);\n        assert(LP_ENCODING_IS_7BIT_UINT(lpLast(lp)[0]));\n        verifyEntry(lpLast(lp), (unsigned char*)\"127\", 3);\n        lp = lpAppendInteger(lp, 4095);\n        verifyEntry(lpLast(lp), (unsigned char*)\"4095\", 4);\n        assert(LP_ENCODING_IS_13BIT_INT(lpLast(lp)[0]));\n        lp = lpAppendInteger(lp, 32767);\n        verifyEntry(lpLast(lp), (unsigned char*)\"32767\", 5);\n        assert(LP_ENCODING_IS_16BIT_INT(lpLast(lp)[0]));\n        lp = lpAppendInteger(lp, 8388607);\n        verifyEntry(lpLast(lp), (unsigned char*)\"8388607\", 7);\n        assert(LP_ENCODING_IS_24BIT_INT(lpLast(lp)[0]));\n        lp = lpAppendInteger(lp, 2147483647);\n        verifyEntry(lpLast(lp), (unsigned char*)\"2147483647\", 10);\n        assert(LP_ENCODING_IS_32BIT_INT(lpLast(lp)[0]));\n        lp = lpAppendInteger(lp, 9223372036854775807);\n        verifyEntry(lpLast(lp), (unsigned char*)\"9223372036854775807\", 19);\n        assert(LP_ENCODING_IS_64BIT_INT(lpLast(lp)[0]));\n\n        /* string encode */\n        unsigned char *str = zmalloc(65535);\n        memset(str, 0, 65535);\n        lp = lpAppend(lp, (unsigned char*)str, 63);\n        assert(LP_ENCODING_IS_6BIT_STR(lpLast(lp)[0]));\n        lp = lpAppend(lp, (unsigned char*)str, 4095);\n        assert(LP_ENCODING_IS_12BIT_STR(lpLast(lp)[0]));\n        lp = lpAppend(lp, (unsigned char*)str, 65535);\n        assert(LP_ENCODING_IS_32BIT_STR(lpLast(lp)[0]));\n        zfree(str);\n        lpFree(lp);\n    }\n\n    TEST(\"Test lpFind\") {\n        lp = createList();\n        assert(lpFind(lp, lpFirst(lp), (unsigned char*)\"abc\", 3, 0) == NULL);\n        verifyEntry(lpFind(lp, lpFirst(lp), (unsigned char*)\"hello\", 5, 0), (unsigned char*)\"hello\", 5);\n        verifyEntry(lpFind(lp, lpFirst(lp), (unsigned char*)\"1024\", 4, 0), (unsigned char*)\"1024\", 4);\n        lpFree(lp);\n    }\n\n    TEST(\"Test lpValidateIntegrity\") {\n        lp = createList();\n        long count = 0;\n        assert(lpValidateIntegrity(lp, lpBytes(lp), 1, lpValidation, &count) == 1);\n        lpFree(lp);\n    }\n\n    TEST(\"Test number of elements exceeds LP_HDR_NUMELE_UNKNOWN\") {\n        lp = lpNew(0);\n        for (int i = 0; i < LP_HDR_NUMELE_UNKNOWN + 1; i++)\n            lp = lpAppend(lp, (unsigned char*)\"1\", 1);\n\n        assert(lpGetNumElements(lp) == LP_HDR_NUMELE_UNKNOWN);\n        assert(lpLength(lp) == LP_HDR_NUMELE_UNKNOWN+1);\n\n        lp = lpDeleteRange(lp, -2, 2);\n        assert(lpGetNumElements(lp) == LP_HDR_NUMELE_UNKNOWN);\n        assert(lpLength(lp) == LP_HDR_NUMELE_UNKNOWN-1);\n        assert(lpGetNumElements(lp) == LP_HDR_NUMELE_UNKNOWN-1); /* update length after lpLength */\n        lpFree(lp);\n    }\n\n    TEST(\"Stress with random payloads of different encoding\") {\n        unsigned long long start = usec();\n        int i,j,len,where;\n        unsigned char *p;\n        char buf[1024];\n        int buflen;\n        list *ref;\n        listNode *refnode;\n\n        int iteration = accurate ? 20000 : 20;\n        for (i = 0; i < iteration; i++) {\n            lp = lpNew(0);\n            ref = listCreate();\n            listSetFreeMethod(ref,(void (*)(void*))sdsfree);\n            len = rand() % 256;\n\n            /* Create lists */\n            for (j = 0; j < len; j++) {\n                where = (rand() & 1) ? 0 : 1;\n                if (rand() % 2) {\n                    buflen = randstring(buf,1,sizeof(buf)-1);\n                } else {\n                    switch(rand() % 3) {\n                    case 0:\n                        buflen = sprintf(buf,\"%lld\",(0LL + rand()) >> 20);\n                        break;\n                    case 1:\n                        buflen = sprintf(buf,\"%lld\",(0LL + rand()));\n                        break;\n                    case 2:\n                        buflen = sprintf(buf,\"%lld\",(0LL + rand()) << 20);\n                        break;\n                    default:\n                        assert(NULL);\n                    }\n                }\n\n                /* Add to listpack */\n                if (where == 0) {\n                    lp = lpPrepend(lp, (unsigned char*)buf, buflen);\n                } else {\n                    lp = lpAppend(lp, (unsigned char*)buf, buflen);\n                }\n\n                /* Add to reference list */\n                if (where == 0) {\n                    listAddNodeHead(ref,sdsnewlen(buf, buflen));\n                } else if (where == 1) {\n                    listAddNodeTail(ref,sdsnewlen(buf, buflen));\n                } else {\n                    assert(NULL);\n                }\n            }\n\n            assert(listLength(ref) == lpLength(lp));\n            for (j = 0; j < len; j++) {\n                /* Naive way to get elements, but similar to the stresser\n                 * executed from the Tcl test suite. */\n                p = lpSeek(lp,j);\n                refnode = listIndex(ref,j);\n\n                vstr = lpGet(p, &vlen, intbuf);\n                assert(memcmp(vstr,listNodeValue(refnode),vlen) == 0);\n            }\n            lpFree(lp);\n            listRelease(ref);\n        }\n        printf(\"Done. usec=%lld\\n\\n\", usec()-start);\n    }\n\n    TEST(\"Stress with variable listpack size\") {\n        unsigned long long start = usec();\n        int maxsize = accurate ? 16384 : 16;\n        stress(0,100000,maxsize,256);\n        stress(1,100000,maxsize,256);\n        printf(\"Done. usec=%lld\\n\\n\", usec()-start);\n    }\n\n    /* Benchmarks */\n    {\n        int iteration = accurate ? 100000 : 100;\n        lp = lpNew(0);\n        TEST(\"Benchmark lpAppend\") {\n            unsigned long long start = usec();\n            for (int i=0; i<iteration; i++) {\n                char buf[4096] = \"asdf\";\n                lp = lpAppend(lp, (unsigned char*)buf, 4);\n                lp = lpAppend(lp, (unsigned char*)buf, 40);\n                lp = lpAppend(lp, (unsigned char*)buf, 400);\n                lp = lpAppend(lp, (unsigned char*)buf, 4000);\n                lp = lpAppend(lp, (unsigned char*)\"1\", 1);\n                lp = lpAppend(lp, (unsigned char*)\"10\", 2);\n                lp = lpAppend(lp, (unsigned char*)\"100\", 3);\n                lp = lpAppend(lp, (unsigned char*)\"1000\", 4);\n                lp = lpAppend(lp, (unsigned char*)\"10000\", 5);\n                lp = lpAppend(lp, (unsigned char*)\"100000\", 6);\n            }\n            printf(\"Done. usec=%lld\\n\", usec()-start);\n        }\n\n        TEST(\"Benchmark lpFind string\") {\n            unsigned long long start = usec();\n            for (int i = 0; i < 2000; i++) {\n                unsigned char *fptr = lpFirst(lp);\n                fptr = lpFind(lp, fptr, (unsigned char*)\"nothing\", 7, 1);\n            }\n            printf(\"Done. usec=%lld\\n\", usec()-start);\n        }\n\n        TEST(\"Benchmark lpFind number\") {\n            unsigned long long start = usec();\n            for (int i = 0; i < 2000; i++) {\n                unsigned char *fptr = lpFirst(lp);\n                fptr = lpFind(lp, fptr, (unsigned char*)\"99999\", 5, 1);\n            }\n            printf(\"Done. usec=%lld\\n\", usec()-start);\n        }\n\n        TEST(\"Benchmark lpSeek\") {\n            unsigned long long start = usec();\n            for (int i = 0; i < 2000; i++) {\n                lpSeek(lp, 99999);\n            }\n            printf(\"Done. usec=%lld\\n\", usec()-start);\n        }\n\n        TEST(\"Benchmark lpValidateIntegrity\") {\n            unsigned long long start = usec();\n            for (int i = 0; i < 2000; i++) {\n                lpValidateIntegrity(lp, lpBytes(lp), 1, NULL, NULL);\n            }\n            printf(\"Done. usec=%lld\\n\", usec()-start);\n        }\n\n        TEST(\"Benchmark lpCompare with string\") {\n            unsigned long long start = usec();\n            for (int i = 0; i < 2000; i++) {\n                unsigned char *eptr = lpSeek(lp,0);\n                while (eptr != NULL) {\n                    lpCompare(eptr,(unsigned char*)\"nothing\",7);\n                    eptr = lpNext(lp,eptr);\n                }\n            }\n            printf(\"Done. usec=%lld\\n\", usec()-start);\n        }\n\n        TEST(\"Benchmark lpCompare with number\") {\n            unsigned long long start = usec();\n            for (int i = 0; i < 2000; i++) {\n                unsigned char *eptr = lpSeek(lp,0);\n                while (eptr != NULL) {\n                    lpCompare(lp, (unsigned char*)\"99999\", 5);\n                    eptr = lpNext(lp,eptr);\n                }\n            }\n            printf(\"Done. usec=%lld\\n\", usec()-start);\n        }\n\n        lpFree(lp);\n    }\n\n    return 0;\n}\n\n#endif\n"
  },
  {
    "path": "src/redis/listpack.h",
    "content": "/* Listpack -- A lists of strings serialization format\n *\n * This file implements the specification you can find at:\n *\n *  https://github.com/antirez/listpack\n *\n * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef __LISTPACK_H\n#define __LISTPACK_H\n\n#include <stdlib.h>\n#include <stdint.h>\n\n#define LP_INTBUF_SIZE 21 /* 20 digits of -2^63 + 1 null term = 21. */\n\n/* lpInsert() where argument possible values: */\n#define LP_BEFORE 0\n#define LP_AFTER 1\n#define LP_REPLACE 2\n\n/* Each entry in the listpack is either a string or an integer. */\ntypedef struct {\n    /* When string is used, it is provided with the length (slen). */\n    unsigned char *sval;\n    uint32_t slen;\n    /* When integer is used, 'sval' is NULL, and lval holds the value. */\n    long long lval;\n} listpackEntry;\n\nunsigned char *lpNew(size_t capacity);\nvoid lpFree(unsigned char *lp);\nunsigned char* lpShrinkToFit(unsigned char *lp);\nunsigned char *lpInsertString(unsigned char *lp, const unsigned char *s, uint32_t slen,\n                              unsigned char *p, int where, unsigned char **newp);\nunsigned char *lpPrepend(unsigned char *lp, const unsigned char *s, uint32_t slen);\nunsigned char *lpPrependInteger(unsigned char *lp, long long lval);\nunsigned char *lpAppend(unsigned char *lp, const unsigned char *s, uint32_t slen);\nunsigned char *lpAppendInteger(unsigned char *lp, long long lval);\nunsigned char *lpInsertInteger(unsigned char *lp, long long lval, unsigned char *p, int where,\n                               unsigned char **newp);\nunsigned char *lpReplace(unsigned char *lp, unsigned char **p, const unsigned char *s, uint32_t slen);\nunsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long lval);\nunsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp);\nunsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num);\nunsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num);\nunsigned char *lpMerge(unsigned char **first, unsigned char **second);\nunsigned long lpLength(unsigned char *lp);\nunsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf);\n\n// Fills count and returns 1 if the item is an integer, 0 otherwise.\nint lpGetInteger(unsigned char *p, int64_t *ival);\nint lpStringToInt64(const char *s, unsigned long slen, int64_t *value);\n\nunsigned char *lpGetValue(unsigned char *p, unsigned int *slen, long long *lval);\nunsigned char *lpFind(unsigned char *lp, unsigned char *p, unsigned char *s, uint32_t slen, unsigned int skip);\nunsigned char *lpFirst(unsigned char *lp);\nunsigned char *lpLast(unsigned char *lp);\nunsigned char *lpNext(unsigned char *lp, unsigned char *p);\nunsigned char *lpPrev(unsigned char *lp, unsigned char *p);\nsize_t lpBytes(unsigned char *lp);\nunsigned char *lpSeek(unsigned char *lp, long index);\ntypedef int (*listpackValidateEntryCB)(unsigned char *p, unsigned int head_count, void *userdata);\nint lpValidateIntegrity(unsigned char *lp, size_t size, int deep,\n                        listpackValidateEntryCB entry_cb, void *cb_userdata);\nunsigned char *lpValidateFirst(unsigned char *lp);\nint lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes);\nunsigned int lpCompare(unsigned char *p, const unsigned char *s, uint32_t slen);\nvoid lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val);\nvoid lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);\nunsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);\nint lpSafeToAdd(unsigned char* lp, size_t add);\nvoid lpRepr(unsigned char *lp);\n\n#ifdef REDIS_TEST\nint listpackTest(int argc, char *argv[], int flags);\n#endif\n\n#endif\n"
  },
  {
    "path": "src/redis/lua/CMakeLists.txt",
    "content": "add_library(lua_modules STATIC\n    cjson/fpconv.c cjson/strbuf.c cjson/lua_cjson.c\n    cmsgpack/lua_cmsgpack.c\n    struct/lua_struct.c\n    bit/bit.c\n)\n\ntarget_compile_options(lua_modules PRIVATE\n    -Wno-sign-compare -Wno-misleading-indentation -Wno-implicit-fallthrough -Wno-undefined-inline\n    -Wno-stringop-overflow)\n\ntarget_link_libraries(lua_modules TRDP::lua)\n"
  },
  {
    "path": "src/redis/lua/README.md",
    "content": "Since version 5.2 `luaL_register` is deprecated and removed. The new `luaL_newlib` function doesn't make the module globally available upon registration and is ment to be used with the `require` function.\n\nTo provide the modules globally, `luaL_newlib` is followed by a `lua_setglobal` for bit and struct.\n"
  },
  {
    "path": "src/redis/lua/bit/bit.c",
    "content": "/*\n** Lua BitOp -- a bit operations library for Lua 5.1/5.2.\n** http://bitop.luajit.org/\n**\n** Copyright (C) 2008-2012 Mike Pall. All rights reserved.\n**\n** Permission is hereby granted, free of charge, to any person obtaining\n** a copy of this software and associated documentation files (the\n** \"Software\"), to deal in the Software without restriction, including\n** without limitation the rights to use, copy, modify, merge, publish,\n** distribute, sublicense, and/or sell copies of the Software, and to\n** permit persons to whom the Software is furnished to do so, subject to\n** the following conditions:\n**\n** The above copyright notice and this permission notice shall be\n** included in all copies or substantial portions of the Software.\n**\n** THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\n** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\n** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\n** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n**\n** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]\n*/\n\n#define LUA_BITOP_VERSION\t\"1.0.3\"\n\n#define LUA_LIB\n#include \"lua.h\"\n#include \"lauxlib.h\"\n\n#ifdef _MSC_VER\n/* MSVC is stuck in the last century and doesn't have C99's stdint.h. */\ntypedef __int32 int32_t;\ntypedef unsigned __int32 uint32_t;\ntypedef unsigned __int64 uint64_t;\n#else\n#include <stdint.h>\n#endif\n\ntypedef int32_t SBits;\ntypedef uint32_t UBits;\n\ntypedef union {\n  lua_Number n;\n#if defined(LUA_NUMBER_DOUBLE) || defined(LUA_FLOAT_DOUBLE)\n  uint64_t b;\n#else\n  UBits b;\n#endif\n} BitNum;\n\n/* Convert argument to bit type. */\nstatic UBits barg(lua_State *L, int idx)\n{\n  BitNum bn;\n  UBits b;\n#if LUA_VERSION_NUM < 502\n  bn.n = lua_tonumber(L, idx);\n#else\n  bn.n = luaL_checknumber(L, idx);\n#endif\n#if defined(LUA_NUMBER_DOUBLE) || defined(LUA_FLOAT_DOUBLE)\n  bn.n += 6755399441055744.0;  /* 2^52+2^51 */\n#ifdef SWAPPED_DOUBLE\n  b = (UBits)(bn.b >> 32);\n#else\n  b = (UBits)bn.b;\n#endif\n#elif defined(LUA_NUMBER_INT)       || defined(LUA_INT_INT) || \\\n      defined(LUA_NUMBER_LONG)      || defined(LUA_INT_LONG) || \\\n      defined(LUA_NUMBER_LONGLONG)  || defined(LUA_INT_LONGLONG) || \\\n      defined(LUA_NUMBER_LONG_LONG) || defined(LUA_NUMBER_LLONG)\n  if (sizeof(UBits) == sizeof(lua_Number))\n    b = bn.b;\n  else\n    b = (UBits)(SBits)bn.n;\n#elif defined(LUA_NUMBER_FLOAT) || defined(LUA_FLOAT_FLOAT)\n#error \"A 'float' lua_Number type is incompatible with this library\"\n#else\n#error \"Unknown number type, check LUA_NUMBER_*, LUA_FLOAT_*, LUA_INT_* in luaconf.h\"\n#endif\n#if LUA_VERSION_NUM < 502\n  if (b == 0 && !lua_isnumber(L, idx)) {\n    luaL_typerror(L, idx, \"number\");\n  }\n#endif\n  return b;\n}\n\n/* Return bit type. */\n#if LUA_VERSION_NUM < 503\n#define BRET(b)  lua_pushnumber(L, (lua_Number)(SBits)(b)); return 1;\n#else\n#define BRET(b)  lua_pushinteger(L, (lua_Integer)(SBits)(b)); return 1;\n#endif\n\nstatic int bit_tobit(lua_State *L) { BRET(barg(L, 1)) }\nstatic int bit_bnot(lua_State *L) { BRET(~barg(L, 1)) }\n\n#define BIT_OP(func, opr) \\\n  static int func(lua_State *L) { int i; UBits b = barg(L, 1); \\\n    for (i = lua_gettop(L); i > 1; i--) b opr barg(L, i); BRET(b) }\nBIT_OP(bit_band, &=)\nBIT_OP(bit_bor, |=)\nBIT_OP(bit_bxor, ^=)\n\n#define bshl(b, n)  (b << n)\n#define bshr(b, n)  (b >> n)\n#define bsar(b, n)  ((SBits)b >> n)\n#define brol(b, n)  ((b << n) | (b >> (32-n)))\n#define bror(b, n)  ((b << (32-n)) | (b >> n))\n#define BIT_SH(func, fn) \\\n  static int func(lua_State *L) { \\\n    UBits b = barg(L, 1); UBits n = barg(L, 2) & 31; BRET(fn(b, n)) }\nBIT_SH(bit_lshift, bshl)\nBIT_SH(bit_rshift, bshr)\nBIT_SH(bit_arshift, bsar)\nBIT_SH(bit_rol, brol)\nBIT_SH(bit_ror, bror)\n\nstatic int bit_bswap(lua_State *L)\n{\n  UBits b = barg(L, 1);\n  b = (b >> 24) | ((b >> 8) & 0xff00) | ((b & 0xff00) << 8) | (b << 24);\n  BRET(b)\n}\n\nstatic int bit_tohex(lua_State *L)\n{\n  UBits b = barg(L, 1);\n  SBits n = lua_isnone(L, 2) ? 8 : (SBits)barg(L, 2);\n  const char *hexdigits = \"0123456789abcdef\";\n  char buf[8];\n  int i;\n  if (n == INT32_MIN) n = INT32_MIN+1;\n  if (n < 0) { n = -n; hexdigits = \"0123456789ABCDEF\"; }\n  if (n > 8) n = 8;\n  for (i = (int)n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }\n  lua_pushlstring(L, buf, (size_t)n);\n  return 1;\n}\n\nstatic const struct luaL_Reg bit_funcs[] = {\n  { \"tobit\",\tbit_tobit },\n  { \"bnot\",\tbit_bnot },\n  { \"band\",\tbit_band },\n  { \"bor\",\tbit_bor },\n  { \"bxor\",\tbit_bxor },\n  { \"lshift\",\tbit_lshift },\n  { \"rshift\",\tbit_rshift },\n  { \"arshift\",\tbit_arshift },\n  { \"rol\",\tbit_rol },\n  { \"ror\",\tbit_ror },\n  { \"bswap\",\tbit_bswap },\n  { \"tohex\",\tbit_tohex },\n  { NULL, NULL }\n};\n\n/* Signed right-shifts are implementation-defined per C89/C99.\n** But the de facto standard are arithmetic right-shifts on two's\n** complement CPUs. This behaviour is required here, so test for it.\n*/\n#define BAD_SAR\t\t(bsar(-8, 2) != (SBits)-2)\n\nLUALIB_API int luaopen_bit(lua_State *L)\n{\n  UBits b;\n#if LUA_VERSION_NUM < 503\n  lua_pushnumber(L, (lua_Number)1437217655L);\n#else\n\tlua_pushinteger(L, (lua_Integer)1437217655L);\n#endif\n  b = barg(L, -1);\n  if (b != (UBits)1437217655L || BAD_SAR) {  /* Perform a simple self-test. */\n    const char *msg = \"compiled with incompatible luaconf.h\";\n#if defined(LUA_NUMBER_DOUBLE) || defined(LUA_FLOAT_DOUBLE)\n#ifdef _WIN32\n    if (b == (UBits)1610612736L)\n      msg = \"use D3DCREATE_FPU_PRESERVE with DirectX\";\n#endif\n    if (b == (UBits)1127743488L)\n      msg = \"not compiled with SWAPPED_DOUBLE\";\n#endif\n    if (BAD_SAR)\n      msg = \"arithmetic right-shift broken\";\n    luaL_error(L, \"bit library self-test failed (%s)\", msg);\n  }\n\n  luaL_newlib(L, bit_funcs);\n  lua_setglobal(L, \"bit\");\n\n  return 1;\n}\n"
  },
  {
    "path": "src/redis/lua/cjson/fpconv.c",
    "content": "/* fpconv - Floating point conversion routines\n *\n * Copyright (c) 2011-2012  Mark Pulford <mark@kyne.com.au>\n *\n * Permission is hereby granted, free of charge, to any person obtaining\n * a copy of this software and associated documentation files (the\n * \"Software\"), to deal in the Software without restriction, including\n * without limitation the rights to use, copy, modify, merge, publish,\n * distribute, sublicense, and/or sell copies of the Software, and to\n * permit persons to whom the Software is furnished to do so, subject to\n * the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\n * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\n * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\n * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n */\n\n/* JSON uses a '.' decimal separator. strtod() / sprintf() under C libraries\n * with locale support will break when the decimal separator is a comma.\n *\n * fpconv_* will around these issues with a translation buffer if required.\n */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <assert.h>\n#include <string.h>\n\n#include \"fpconv.h\"\n\n/* Lua CJSON assumes the locale is the same for all threads within a\n * process and doesn't change after initialisation.\n *\n * This avoids the need for per thread storage or expensive checks\n * for call. */\nstatic char locale_decimal_point = '.';\n\n/* In theory multibyte decimal_points are possible, but\n * Lua CJSON only supports UTF-8 and known locales only have\n * single byte decimal points ([.,]).\n *\n * localconv() may not be thread safe (=>crash), and nl_langinfo() is\n * not supported on some platforms. Use sprintf() instead - if the\n * locale does change, at least Lua CJSON won't crash. */\nstatic void fpconv_update_locale()\n{\n    char buf[8];\n\n    snprintf(buf, sizeof(buf), \"%g\", 0.5);\n\n    /* Failing this test might imply the platform has a buggy dtoa\n     * implementation or wide characters */\n    if (buf[0] != '0' || buf[2] != '5' || buf[3] != 0) {\n        fprintf(stderr, \"Error: wide characters found or printf() bug.\");\n        abort();\n    }\n\n    locale_decimal_point = buf[1];\n}\n\n/* Check for a valid number character: [-+0-9a-yA-Y.]\n * Eg: -0.6e+5, infinity, 0xF0.F0pF0\n *\n * Used to find the probable end of a number. It doesn't matter if\n * invalid characters are counted - strtod() will find the valid\n * number if it exists.  The risk is that slightly more memory might\n * be allocated before a parse error occurs. */\nstatic inline int valid_number_character(char ch)\n{\n    char lower_ch;\n\n    if ('0' <= ch && ch <= '9')\n        return 1;\n    if (ch == '-' || ch == '+' || ch == '.')\n        return 1;\n\n    /* Hex digits, exponent (e), base (p), \"infinity\",.. */\n    lower_ch = ch | 0x20;\n    if ('a' <= lower_ch && lower_ch <= 'y')\n        return 1;\n\n    return 0;\n}\n\n/* Calculate the size of the buffer required for a strtod locale\n * conversion. */\nstatic int strtod_buffer_size(const char *s)\n{\n    const char *p = s;\n\n    while (valid_number_character(*p))\n        p++;\n\n    return p - s;\n}\n\n/* Similar to strtod(), but must be passed the current locale's decimal point\n * character. Guaranteed to be called at the start of any valid number in a string */\ndouble fpconv_strtod(const char *nptr, char **endptr)\n{\n    char localbuf[FPCONV_G_FMT_BUFSIZE];\n    char *buf, *endbuf, *dp;\n    int buflen;\n    double value;\n\n    /* System strtod() is fine when decimal point is '.' */\n    if (locale_decimal_point == '.')\n        return strtod(nptr, endptr);\n\n    buflen = strtod_buffer_size(nptr);\n    if (!buflen) {\n        /* No valid characters found, standard strtod() return */\n        *endptr = (char *)nptr;\n        return 0;\n    }\n\n    /* Duplicate number into buffer */\n    if (buflen >= FPCONV_G_FMT_BUFSIZE) {\n        /* Handle unusually large numbers */\n        buf = malloc(buflen + 1);\n        if (!buf) {\n            fprintf(stderr, \"Out of memory\");\n            abort();\n        }\n    } else {\n        /* This is the common case.. */\n        buf = localbuf;\n    }\n    memcpy(buf, nptr, buflen);\n    buf[buflen] = 0;\n\n    /* Update decimal point character if found */\n    dp = strchr(buf, '.');\n    if (dp)\n        *dp = locale_decimal_point;\n\n    value = strtod(buf, &endbuf);\n    *endptr = (char *)&nptr[endbuf - buf];\n    if (buflen >= FPCONV_G_FMT_BUFSIZE)\n        free(buf);\n\n    return value;\n}\n\n/* \"fmt\" must point to a buffer of at least 6 characters */\nstatic void set_number_format(char *fmt, int precision)\n{\n    int d1, d2, i;\n\n    assert(1 <= precision && precision <= 14);\n\n    /* Create printf format (%.14g) from precision */\n    d1 = precision / 10;\n    d2 = precision % 10;\n    fmt[0] = '%';\n    fmt[1] = '.';\n    i = 2;\n    if (d1) {\n        fmt[i++] = '0' + d1;\n    }\n    fmt[i++] = '0' + d2;\n    fmt[i++] = 'g';\n    fmt[i] = 0;\n}\n\n/* Assumes there is always at least 32 characters available in the target buffer */\nint fpconv_g_fmt(char *str, double num, int precision)\n{\n    char buf[FPCONV_G_FMT_BUFSIZE];\n    char fmt[6];\n    int len;\n    char *b;\n\n    set_number_format(fmt, precision);\n\n    /* Pass through when decimal point character is dot. */\n    if (locale_decimal_point == '.')\n        return snprintf(str, FPCONV_G_FMT_BUFSIZE, fmt, num);\n\n    /* snprintf() to a buffer then translate for other decimal point characters */\n    len = snprintf(buf, FPCONV_G_FMT_BUFSIZE, fmt, num);\n\n    /* Copy into target location. Translate decimal point if required */\n    b = buf;\n    do {\n        *str++ = (*b == locale_decimal_point ? '.' : *b);\n    } while(*b++);\n\n    return len;\n}\n\nvoid fpconv_init()\n{\n    fpconv_update_locale();\n}\n\n/* vi:ai et sw=4 ts=4:\n */\n"
  },
  {
    "path": "src/redis/lua/cjson/fpconv.h",
    "content": "/* Lua CJSON floating point conversion routines */\n\n/* Buffer required to store the largest string representation of a double.\n *\n * Longest double printed with %.14g is 21 characters long:\n * -1.7976931348623e+308 */\n# define FPCONV_G_FMT_BUFSIZE   32\n\n#ifdef USE_INTERNAL_FPCONV\nstatic inline void fpconv_init()\n{\n    /* Do nothing - not required */\n}\n#else\nextern void fpconv_init();\n#endif\n\nextern int fpconv_g_fmt(char*, double, int);\nextern double fpconv_strtod(const char*, char**);\n\n/* vi:ai et sw=4 ts=4:\n */\n"
  },
  {
    "path": "src/redis/lua/cjson/lua_cjson.c",
    "content": "/* Lua CJSON - JSON support for Lua\n *\n * Copyright (c) 2010-2012  Mark Pulford <mark@kyne.com.au>\n *\n * Permission is hereby granted, free of charge, to any person obtaining\n * a copy of this software and associated documentation files (the\n * \"Software\"), to deal in the Software without restriction, including\n * without limitation the rights to use, copy, modify, merge, publish,\n * distribute, sublicense, and/or sell copies of the Software, and to\n * permit persons to whom the Software is furnished to do so, subject to\n * the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\n * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\n * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\n * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n */\n\n/* Caveats:\n * - JSON \"null\" values are represented as lightuserdata since Lua\n *   tables cannot contain \"nil\". Compare with cjson.null.\n * - Invalid UTF-8 characters are not detected and will be passed\n *   untouched. If required, UTF-8 error checking should be done\n *   outside this library.\n * - Javascript comments are not part of the JSON spec, and are not\n *   currently supported.\n *\n * Note: Decoding is slower than encoding. Lua spends significant\n *       time (30%) managing tables when parsing JSON since it is\n *       difficult to know object/array sizes ahead of time.\n */\n\n#include <assert.h>\n#include <string.h>\n#include <math.h>\n#include <limits.h>\n#include <lua.h>\n#include <lauxlib.h>\n\n#include \"strbuf.h\"\n#include \"fpconv.h\"\n\n#ifndef CJSON_MODNAME\n#define CJSON_MODNAME   \"cjson\"\n#endif\n\n#ifndef CJSON_VERSION\n#define CJSON_VERSION   \"2.1devel\"\n#endif\n\n/* Workaround for Solaris platforms missing isinf() */\n#if !defined(isinf) && (defined(USE_INTERNAL_ISINF) || defined(MISSING_ISINF))\n#define isinf(x) (!isnan(x) && isnan((x) - (x)))\n#endif\n\n#define DEFAULT_SPARSE_CONVERT 0\n#define DEFAULT_SPARSE_RATIO 2\n#define DEFAULT_SPARSE_SAFE 10\n#define DEFAULT_ENCODE_MAX_DEPTH 1000\n#define DEFAULT_DECODE_MAX_DEPTH 1000\n#define DEFAULT_ENCODE_INVALID_NUMBERS 0\n#define DEFAULT_DECODE_INVALID_NUMBERS 1\n#define DEFAULT_ENCODE_KEEP_BUFFER 1\n#define DEFAULT_ENCODE_NUMBER_PRECISION 14\n\n#ifdef DISABLE_INVALID_NUMBERS\n#undef DEFAULT_DECODE_INVALID_NUMBERS\n#define DEFAULT_DECODE_INVALID_NUMBERS 0\n#endif\n\ntypedef enum {\n    T_OBJ_BEGIN,\n    T_OBJ_END,\n    T_ARR_BEGIN,\n    T_ARR_END,\n    T_STRING,\n    T_NUMBER,\n    T_BOOLEAN,\n    T_NULL,\n    T_COLON,\n    T_COMMA,\n    T_END,\n    T_WHITESPACE,\n    T_ERROR,\n    T_UNKNOWN\n} json_token_type_t;\n\nstatic const char *json_token_type_name[] = {\n    \"T_OBJ_BEGIN\",\n    \"T_OBJ_END\",\n    \"T_ARR_BEGIN\",\n    \"T_ARR_END\",\n    \"T_STRING\",\n    \"T_NUMBER\",\n    \"T_BOOLEAN\",\n    \"T_NULL\",\n    \"T_COLON\",\n    \"T_COMMA\",\n    \"T_END\",\n    \"T_WHITESPACE\",\n    \"T_ERROR\",\n    \"T_UNKNOWN\",\n    NULL\n};\n\ntypedef struct {\n    json_token_type_t ch2token[256];\n    char escape2char[256];  /* Decoding */\n\n    /* encode_buf is only allocated and used when\n     * encode_keep_buffer is set */\n    strbuf_t encode_buf;\n\n    int encode_sparse_convert;\n    int encode_sparse_ratio;\n    int encode_sparse_safe;\n    int encode_max_depth;\n    int encode_invalid_numbers;     /* 2 => Encode as \"null\" */\n    int encode_number_precision;\n    int encode_keep_buffer;\n\n    int decode_invalid_numbers;\n    int decode_max_depth;\n} json_config_t;\n\ntypedef struct {\n    const char *data;\n    const char *ptr;\n    strbuf_t *tmp;    /* Temporary storage for strings */\n    json_config_t *cfg;\n    int current_depth;\n} json_parse_t;\n\ntypedef struct {\n    json_token_type_t type;\n    int index;\n    union {\n        const char *string;\n        double number;\n        int boolean;\n    } value;\n    int string_len;\n} json_token_t;\n\nstatic const char *char2escape[256] = {\n    \"\\\\u0000\", \"\\\\u0001\", \"\\\\u0002\", \"\\\\u0003\",\n    \"\\\\u0004\", \"\\\\u0005\", \"\\\\u0006\", \"\\\\u0007\",\n    \"\\\\b\", \"\\\\t\", \"\\\\n\", \"\\\\u000b\",\n    \"\\\\f\", \"\\\\r\", \"\\\\u000e\", \"\\\\u000f\",\n    \"\\\\u0010\", \"\\\\u0011\", \"\\\\u0012\", \"\\\\u0013\",\n    \"\\\\u0014\", \"\\\\u0015\", \"\\\\u0016\", \"\\\\u0017\",\n    \"\\\\u0018\", \"\\\\u0019\", \"\\\\u001a\", \"\\\\u001b\",\n    \"\\\\u001c\", \"\\\\u001d\", \"\\\\u001e\", \"\\\\u001f\",\n    NULL, NULL, \"\\\\\\\"\", NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, \"\\\\/\",\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, \"\\\\\\\\\", NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, \"\\\\u007f\",\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,\n};\n\n/* ===== CONFIGURATION ===== */\n\nstatic json_config_t *json_fetch_config(lua_State *l)\n{\n    json_config_t *cfg;\n\n    cfg = lua_touserdata(l, lua_upvalueindex(1));\n    if (!cfg)\n        luaL_error(l, \"BUG: Unable to fetch CJSON configuration\");\n\n    return cfg;\n}\n\n/* Ensure the correct number of arguments have been provided.\n * Pad with nil to allow other functions to simply check arg[i]\n * to find whether an argument was provided */\nstatic json_config_t *json_arg_init(lua_State *l, int args)\n{\n    luaL_argcheck(l, lua_gettop(l) <= args, args + 1,\n                  \"found too many arguments\");\n\n    while (lua_gettop(l) < args)\n        lua_pushnil(l);\n\n    return json_fetch_config(l);\n}\n\n/* Process integer options for configuration functions */\nstatic int json_integer_option(lua_State *l, int optindex, int *setting,\n                               int min, int max)\n{\n    char errmsg[64];\n    int value;\n\n    if (!lua_isnil(l, optindex)) {\n        value = luaL_checkinteger(l, optindex);\n        snprintf(errmsg, sizeof(errmsg), \"expected integer between %d and %d\", min, max);\n        luaL_argcheck(l, min <= value && value <= max, 1, errmsg);\n        *setting = value;\n    }\n\n    lua_pushinteger(l, *setting);\n\n    return 1;\n}\n\n/* Process enumerated arguments for a configuration function */\nstatic int json_enum_option(lua_State *l, int optindex, int *setting,\n                            const char **options, int bool_true)\n{\n    static const char *bool_options[] = { \"off\", \"on\", NULL };\n\n    if (!options) {\n        options = bool_options;\n        bool_true = 1;\n    }\n\n    if (!lua_isnil(l, optindex)) {\n        if (bool_true && lua_isboolean(l, optindex))\n            *setting = lua_toboolean(l, optindex) * bool_true;\n        else\n            *setting = luaL_checkoption(l, optindex, NULL, options);\n    }\n\n    if (bool_true && (*setting == 0 || *setting == bool_true))\n        lua_pushboolean(l, *setting);\n    else\n        lua_pushstring(l, options[*setting]);\n\n    return 1;\n}\n\n/* Configures handling of extremely sparse arrays:\n * convert: Convert extremely sparse arrays into objects? Otherwise error.\n * ratio: 0: always allow sparse; 1: never allow sparse; >1: use ratio\n * safe: Always use an array when the max index <= safe */\nstatic int json_cfg_encode_sparse_array(lua_State *l)\n{\n    json_config_t *cfg = json_arg_init(l, 3);\n\n    json_enum_option(l, 1, &cfg->encode_sparse_convert, NULL, 1);\n    json_integer_option(l, 2, &cfg->encode_sparse_ratio, 0, INT_MAX);\n    json_integer_option(l, 3, &cfg->encode_sparse_safe, 0, INT_MAX);\n\n    return 3;\n}\n\n/* Configures the maximum number of nested arrays/objects allowed when\n * encoding */\nstatic int json_cfg_encode_max_depth(lua_State *l)\n{\n    json_config_t *cfg = json_arg_init(l, 1);\n\n    return json_integer_option(l, 1, &cfg->encode_max_depth, 1, INT_MAX);\n}\n\n/* Configures the maximum number of nested arrays/objects allowed when\n * encoding */\nstatic int json_cfg_decode_max_depth(lua_State *l)\n{\n    json_config_t *cfg = json_arg_init(l, 1);\n\n    return json_integer_option(l, 1, &cfg->decode_max_depth, 1, INT_MAX);\n}\n\n/* Configures number precision when converting doubles to text */\nstatic int json_cfg_encode_number_precision(lua_State *l)\n{\n    json_config_t *cfg = json_arg_init(l, 1);\n\n    return json_integer_option(l, 1, &cfg->encode_number_precision, 1, 14);\n}\n\n/* Configures JSON encoding buffer persistence */\nstatic int json_cfg_encode_keep_buffer(lua_State *l)\n{\n    json_config_t *cfg = json_arg_init(l, 1);\n    int old_value;\n\n    old_value = cfg->encode_keep_buffer;\n\n    json_enum_option(l, 1, &cfg->encode_keep_buffer, NULL, 1);\n\n    /* Init / free the buffer if the setting has changed */\n    if (old_value ^ cfg->encode_keep_buffer) {\n        if (cfg->encode_keep_buffer)\n            strbuf_init(&cfg->encode_buf, 0);\n        else\n            strbuf_free(&cfg->encode_buf);\n    }\n\n    return 1;\n}\n\n#if defined(DISABLE_INVALID_NUMBERS) && !defined(USE_INTERNAL_FPCONV)\nvoid json_verify_invalid_number_setting(lua_State *l, int *setting)\n{\n    if (*setting == 1) {\n        *setting = 0;\n        luaL_error(l, \"Infinity, NaN, and/or hexadecimal numbers are not supported.\");\n    }\n}\n#else\n#define json_verify_invalid_number_setting(l, s)    do { } while(0)\n#endif\n\nstatic int json_cfg_encode_invalid_numbers(lua_State *l)\n{\n    static const char *options[] = { \"off\", \"on\", \"null\", NULL };\n    json_config_t *cfg = json_arg_init(l, 1);\n\n    json_enum_option(l, 1, &cfg->encode_invalid_numbers, options, 1);\n\n    json_verify_invalid_number_setting(l, &cfg->encode_invalid_numbers);\n\n    return 1;\n}\n\nstatic int json_cfg_decode_invalid_numbers(lua_State *l)\n{\n    json_config_t *cfg = json_arg_init(l, 1);\n\n    json_enum_option(l, 1, &cfg->decode_invalid_numbers, NULL, 1);\n\n    json_verify_invalid_number_setting(l, &cfg->encode_invalid_numbers);\n\n    return 1;\n}\n\nstatic int json_destroy_config(lua_State *l)\n{\n    json_config_t *cfg;\n\n    cfg = lua_touserdata(l, 1);\n    if (cfg)\n        strbuf_free(&cfg->encode_buf);\n    cfg = NULL;\n\n    return 0;\n}\n\nstatic void json_create_config(lua_State *l)\n{\n    json_config_t *cfg;\n    int i;\n\n    cfg = lua_newuserdata(l, sizeof(*cfg));\n\n    /* Create GC method to clean up strbuf */\n    lua_newtable(l);\n    lua_pushcfunction(l, json_destroy_config);\n    lua_setfield(l, -2, \"__gc\");\n    lua_setmetatable(l, -2);\n\n    cfg->encode_sparse_convert = DEFAULT_SPARSE_CONVERT;\n    cfg->encode_sparse_ratio = DEFAULT_SPARSE_RATIO;\n    cfg->encode_sparse_safe = DEFAULT_SPARSE_SAFE;\n    cfg->encode_max_depth = DEFAULT_ENCODE_MAX_DEPTH;\n    cfg->decode_max_depth = DEFAULT_DECODE_MAX_DEPTH;\n    cfg->encode_invalid_numbers = DEFAULT_ENCODE_INVALID_NUMBERS;\n    cfg->decode_invalid_numbers = DEFAULT_DECODE_INVALID_NUMBERS;\n    cfg->encode_keep_buffer = DEFAULT_ENCODE_KEEP_BUFFER;\n    cfg->encode_number_precision = DEFAULT_ENCODE_NUMBER_PRECISION;\n\n#if DEFAULT_ENCODE_KEEP_BUFFER > 0\n    strbuf_init(&cfg->encode_buf, 0);\n#endif\n\n    /* Decoding init */\n\n    /* Tag all characters as an error */\n    for (i = 0; i < 256; i++)\n        cfg->ch2token[i] = T_ERROR;\n\n    /* Set tokens that require no further processing */\n    cfg->ch2token['{'] = T_OBJ_BEGIN;\n    cfg->ch2token['}'] = T_OBJ_END;\n    cfg->ch2token['['] = T_ARR_BEGIN;\n    cfg->ch2token[']'] = T_ARR_END;\n    cfg->ch2token[','] = T_COMMA;\n    cfg->ch2token[':'] = T_COLON;\n    cfg->ch2token['\\0'] = T_END;\n    cfg->ch2token[' '] = T_WHITESPACE;\n    cfg->ch2token['\\t'] = T_WHITESPACE;\n    cfg->ch2token['\\n'] = T_WHITESPACE;\n    cfg->ch2token['\\r'] = T_WHITESPACE;\n\n    /* Update characters that require further processing */\n    cfg->ch2token['f'] = T_UNKNOWN;     /* false? */\n    cfg->ch2token['i'] = T_UNKNOWN;     /* inf, ininity? */\n    cfg->ch2token['I'] = T_UNKNOWN;\n    cfg->ch2token['n'] = T_UNKNOWN;     /* null, nan? */\n    cfg->ch2token['N'] = T_UNKNOWN;\n    cfg->ch2token['t'] = T_UNKNOWN;     /* true? */\n    cfg->ch2token['\"'] = T_UNKNOWN;     /* string? */\n    cfg->ch2token['+'] = T_UNKNOWN;     /* number? */\n    cfg->ch2token['-'] = T_UNKNOWN;\n    for (i = 0; i < 10; i++)\n        cfg->ch2token['0' + i] = T_UNKNOWN;\n\n    /* Lookup table for parsing escape characters */\n    for (i = 0; i < 256; i++)\n        cfg->escape2char[i] = 0;          /* String error */\n    cfg->escape2char['\"'] = '\"';\n    cfg->escape2char['\\\\'] = '\\\\';\n    cfg->escape2char['/'] = '/';\n    cfg->escape2char['b'] = '\\b';\n    cfg->escape2char['t'] = '\\t';\n    cfg->escape2char['n'] = '\\n';\n    cfg->escape2char['f'] = '\\f';\n    cfg->escape2char['r'] = '\\r';\n    cfg->escape2char['u'] = 'u';          /* Unicode parsing required */\n}\n\n/* ===== ENCODING ===== */\n\nstatic void json_encode_exception(lua_State *l, json_config_t *cfg, strbuf_t *json, int lindex,\n                                  const char *reason)\n{\n    if (!cfg->encode_keep_buffer)\n        strbuf_free(json);\n    luaL_error(l, \"Cannot serialise %s: %s\",\n                  lua_typename(l, lua_type(l, lindex)), reason);\n}\n\n/* json_append_string args:\n * - lua_State\n * - JSON strbuf\n * - String (Lua stack index)\n *\n * Returns nothing. Doesn't remove string from Lua stack */\nstatic void json_append_string(lua_State *l, strbuf_t *json, int lindex)\n{\n    const char *escstr;\n    int i;\n    const char *str;\n    size_t len;\n\n    str = lua_tolstring(l, lindex, &len);\n\n    /* Worst case is len * 6 (all unicode escapes).\n     * This buffer is reused constantly for small strings\n     * If there are any excess pages, they won't be hit anyway.\n     * This gains ~5% speedup. */\n    strbuf_ensure_empty_length(json, len * 6 + 2);\n\n    strbuf_append_char_unsafe(json, '\\\"');\n    for (i = 0; i < len; i++) {\n        escstr = char2escape[(unsigned char)str[i]];\n        if (escstr)\n            strbuf_append_string(json, escstr);\n        else\n            strbuf_append_char_unsafe(json, str[i]);\n    }\n    strbuf_append_char_unsafe(json, '\\\"');\n}\n\n/* Find the size of the array on the top of the Lua stack\n * -1   object (not a pure array)\n * >=0  elements in array\n */\nstatic int lua_array_length(lua_State *l, json_config_t *cfg, strbuf_t *json)\n{\n    double k;\n    int max;\n    int items;\n\n    max = 0;\n    items = 0;\n\n    lua_pushnil(l);\n    /* table, startkey */\n    while (lua_next(l, -2) != 0) {\n        /* table, key, value */\n        if (lua_type(l, -2) == LUA_TNUMBER &&\n            (k = lua_tonumber(l, -2))) {\n            /* Integer >= 1 ? */\n            if (floor(k) == k && k >= 1) {\n                if (k > max)\n                    max = k;\n                items++;\n                lua_pop(l, 1);\n                continue;\n            }\n        }\n\n        /* Must not be an array (non integer key) */\n        lua_pop(l, 2);\n        return -1;\n    }\n\n    /* Encode excessively sparse arrays as objects (if enabled) */\n    if (cfg->encode_sparse_ratio > 0 &&\n        max > items * cfg->encode_sparse_ratio &&\n        max > cfg->encode_sparse_safe) {\n        if (!cfg->encode_sparse_convert)\n            json_encode_exception(l, cfg, json, -1, \"excessively sparse array\");\n\n        return -1;\n    }\n\n    return max;\n}\n\nstatic void json_check_encode_depth(lua_State *l, json_config_t *cfg,\n                                    int current_depth, strbuf_t *json)\n{\n    /* Ensure there are enough slots free to traverse a table (key,\n     * value) and push a string for a potential error message.\n     *\n     * Unlike \"decode\", the key and value are still on the stack when\n     * lua_checkstack() is called.  Hence an extra slot for luaL_error()\n     * below is required just in case the next check to lua_checkstack()\n     * fails.\n     *\n     * While this won't cause a crash due to the EXTRA_STACK reserve\n     * slots, it would still be an improper use of the API. */\n    if (current_depth <= cfg->encode_max_depth && lua_checkstack(l, 3))\n        return;\n\n    if (!cfg->encode_keep_buffer)\n        strbuf_free(json);\n\n    luaL_error(l, \"Cannot serialise, excessive nesting (%d)\",\n               current_depth);\n}\n\nstatic void json_append_data(lua_State *l, json_config_t *cfg,\n                             int current_depth, strbuf_t *json);\n\n/* json_append_array args:\n * - lua_State\n * - JSON strbuf\n * - Size of passwd Lua array (top of stack) */\nstatic void json_append_array(lua_State *l, json_config_t *cfg, int current_depth,\n                              strbuf_t *json, int array_length)\n{\n    int comma, i;\n\n    strbuf_append_char(json, '[');\n\n    comma = 0;\n    for (i = 1; i <= array_length; i++) {\n        if (comma)\n            strbuf_append_char(json, ',');\n        else\n            comma = 1;\n\n        lua_rawgeti(l, -1, i);\n        json_append_data(l, cfg, current_depth, json);\n        lua_pop(l, 1);\n    }\n\n    strbuf_append_char(json, ']');\n}\n\nstatic void json_append_number(lua_State *l, json_config_t *cfg,\n                               strbuf_t *json, int lindex)\n{\n    double num = lua_tonumber(l, lindex);\n    int len;\n\n    if (cfg->encode_invalid_numbers == 0) {\n        /* Prevent encoding invalid numbers */\n        if (isinf(num) || isnan(num))\n            json_encode_exception(l, cfg, json, lindex, \"must not be NaN or Inf\");\n    } else if (cfg->encode_invalid_numbers == 1) {\n        /* Encode invalid numbers, but handle \"nan\" separately\n         * since some platforms may encode as \"-nan\". */\n        if (isnan(num)) {\n            strbuf_append_mem(json, \"nan\", 3);\n            return;\n        }\n    } else {\n        /* Encode invalid numbers as \"null\" */\n        if (isinf(num) || isnan(num)) {\n            strbuf_append_mem(json, \"null\", 4);\n            return;\n        }\n    }\n\n    strbuf_ensure_empty_length(json, FPCONV_G_FMT_BUFSIZE);\n    len = fpconv_g_fmt(strbuf_empty_ptr(json), num, cfg->encode_number_precision);\n    strbuf_extend_length(json, len);\n}\n\nstatic void json_append_object(lua_State *l, json_config_t *cfg,\n                               int current_depth, strbuf_t *json)\n{\n    int comma, keytype;\n\n    /* Object */\n    strbuf_append_char(json, '{');\n\n    lua_pushnil(l);\n    /* table, startkey */\n    comma = 0;\n    while (lua_next(l, -2) != 0) {\n        if (comma)\n            strbuf_append_char(json, ',');\n        else\n            comma = 1;\n\n        /* table, key, value */\n        keytype = lua_type(l, -2);\n        if (keytype == LUA_TNUMBER) {\n            strbuf_append_char(json, '\"');\n            json_append_number(l, cfg, json, -2);\n            strbuf_append_mem(json, \"\\\":\", 2);\n        } else if (keytype == LUA_TSTRING) {\n            json_append_string(l, json, -2);\n            strbuf_append_char(json, ':');\n        } else {\n            json_encode_exception(l, cfg, json, -2,\n                                  \"table key must be a number or string\");\n            /* never returns */\n        }\n\n        /* table, key, value */\n        json_append_data(l, cfg, current_depth, json);\n        lua_pop(l, 1);\n        /* table, key */\n    }\n\n    strbuf_append_char(json, '}');\n}\n\n/* Serialise Lua data into JSON string. */\nstatic void json_append_data(lua_State *l, json_config_t *cfg,\n                             int current_depth, strbuf_t *json)\n{\n    int len;\n\n    switch (lua_type(l, -1)) {\n    case LUA_TSTRING:\n        json_append_string(l, json, -1);\n        break;\n    case LUA_TNUMBER:\n        json_append_number(l, cfg, json, -1);\n        break;\n    case LUA_TBOOLEAN:\n        if (lua_toboolean(l, -1))\n            strbuf_append_mem(json, \"true\", 4);\n        else\n            strbuf_append_mem(json, \"false\", 5);\n        break;\n    case LUA_TTABLE:\n        current_depth++;\n        json_check_encode_depth(l, cfg, current_depth, json);\n        len = lua_array_length(l, cfg, json);\n        if (len > 0)\n            json_append_array(l, cfg, current_depth, json, len);\n        else\n            json_append_object(l, cfg, current_depth, json);\n        break;\n    case LUA_TNIL:\n        strbuf_append_mem(json, \"null\", 4);\n        break;\n    case LUA_TLIGHTUSERDATA:\n        if (lua_touserdata(l, -1) == NULL) {\n            strbuf_append_mem(json, \"null\", 4);\n            break;\n        }\n    default:\n        /* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD,\n         * and LUA_TLIGHTUSERDATA) cannot be serialised */\n        json_encode_exception(l, cfg, json, -1, \"type not supported\");\n        /* never returns */\n    }\n}\n\nstatic int json_encode(lua_State *l)\n{\n    json_config_t *cfg = json_fetch_config(l);\n    strbuf_t local_encode_buf;\n    strbuf_t *encode_buf;\n    char *json;\n    int len;\n\n    luaL_argcheck(l, lua_gettop(l) == 1, 1, \"expected 1 argument\");\n\n    if (!cfg->encode_keep_buffer) {\n        /* Use private buffer */\n        encode_buf = &local_encode_buf;\n        strbuf_init(encode_buf, 0);\n    } else {\n        /* Reuse existing buffer */\n        encode_buf = &cfg->encode_buf;\n        strbuf_reset(encode_buf);\n    }\n\n    json_append_data(l, cfg, 0, encode_buf);\n    json = strbuf_string(encode_buf, &len);\n\n    lua_pushlstring(l, json, len);\n\n    if (!cfg->encode_keep_buffer)\n        strbuf_free(encode_buf);\n\n    return 1;\n}\n\n/* ===== DECODING ===== */\n\nstatic void json_process_value(lua_State *l, json_parse_t *json,\n                               json_token_t *token);\n\nstatic int hexdigit2int(char hex)\n{\n    if ('0' <= hex  && hex <= '9')\n        return hex - '0';\n\n    /* Force lowercase */\n    hex |= 0x20;\n    if ('a' <= hex && hex <= 'f')\n        return 10 + hex - 'a';\n\n    return -1;\n}\n\nstatic int decode_hex4(const char *hex)\n{\n    int digit[4];\n    int i;\n\n    /* Convert ASCII hex digit to numeric digit\n     * Note: this returns an error for invalid hex digits, including\n     *       NULL */\n    for (i = 0; i < 4; i++) {\n        digit[i] = hexdigit2int(hex[i]);\n        if (digit[i] < 0) {\n            return -1;\n        }\n    }\n\n    return (digit[0] << 12) +\n           (digit[1] << 8) +\n           (digit[2] << 4) +\n            digit[3];\n}\n\n/* Converts a Unicode codepoint to UTF-8.\n * Returns UTF-8 string length, and up to 4 bytes in *utf8 */\nstatic int codepoint_to_utf8(char *utf8, int codepoint)\n{\n    /* 0xxxxxxx */\n    if (codepoint <= 0x7F) {\n        utf8[0] = codepoint;\n        return 1;\n    }\n\n    /* 110xxxxx 10xxxxxx */\n    if (codepoint <= 0x7FF) {\n        utf8[0] = (codepoint >> 6) | 0xC0;\n        utf8[1] = (codepoint & 0x3F) | 0x80;\n        return 2;\n    }\n\n    /* 1110xxxx 10xxxxxx 10xxxxxx */\n    if (codepoint <= 0xFFFF) {\n        utf8[0] = (codepoint >> 12) | 0xE0;\n        utf8[1] = ((codepoint >> 6) & 0x3F) | 0x80;\n        utf8[2] = (codepoint & 0x3F) | 0x80;\n        return 3;\n    }\n\n    /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */\n    if (codepoint <= 0x1FFFFF) {\n        utf8[0] = (codepoint >> 18) | 0xF0;\n        utf8[1] = ((codepoint >> 12) & 0x3F) | 0x80;\n        utf8[2] = ((codepoint >> 6) & 0x3F) | 0x80;\n        utf8[3] = (codepoint & 0x3F) | 0x80;\n        return 4;\n    }\n\n    return 0;\n}\n\n\n/* Called when index pointing to beginning of UTF-16 code escape: \\uXXXX\n * \\u is guaranteed to exist, but the remaining hex characters may be\n * missing.\n * Translate to UTF-8 and append to temporary token string.\n * Must advance index to the next character to be processed.\n * Returns: 0   success\n *          -1  error\n */\nstatic int json_append_unicode_escape(json_parse_t *json)\n{\n    char utf8[4];       /* Surrogate pairs require 4 UTF-8 bytes */\n    int codepoint;\n    int surrogate_low;\n    int len;\n    int escape_len = 6;\n\n    /* Fetch UTF-16 code unit */\n    codepoint = decode_hex4(json->ptr + 2);\n    if (codepoint < 0)\n        return -1;\n\n    /* UTF-16 surrogate pairs take the following 2 byte form:\n     *      11011 x yyyyyyyyyy\n     * When x = 0: y is the high 10 bits of the codepoint\n     *      x = 1: y is the low 10 bits of the codepoint\n     *\n     * Check for a surrogate pair (high or low) */\n    if ((codepoint & 0xF800) == 0xD800) {\n        /* Error if the 1st surrogate is not high */\n        if (codepoint & 0x400)\n            return -1;\n\n        /* Ensure the next code is a unicode escape */\n        if (*(json->ptr + escape_len) != '\\\\' ||\n            *(json->ptr + escape_len + 1) != 'u') {\n            return -1;\n        }\n\n        /* Fetch the next codepoint */\n        surrogate_low = decode_hex4(json->ptr + 2 + escape_len);\n        if (surrogate_low < 0)\n            return -1;\n\n        /* Error if the 2nd code is not a low surrogate */\n        if ((surrogate_low & 0xFC00) != 0xDC00)\n            return -1;\n\n        /* Calculate Unicode codepoint */\n        codepoint = (codepoint & 0x3FF) << 10;\n        surrogate_low &= 0x3FF;\n        codepoint = (codepoint | surrogate_low) + 0x10000;\n        escape_len = 12;\n    }\n\n    /* Convert codepoint to UTF-8 */\n    len = codepoint_to_utf8(utf8, codepoint);\n    if (!len)\n        return -1;\n\n    /* Append bytes and advance parse index */\n    strbuf_append_mem_unsafe(json->tmp, utf8, len);\n    json->ptr += escape_len;\n\n    return 0;\n}\n\nstatic void json_set_token_error(json_token_t *token, json_parse_t *json,\n                                 const char *errtype)\n{\n    token->type = T_ERROR;\n    token->index = json->ptr - json->data;\n    token->value.string = errtype;\n}\n\nstatic void json_next_string_token(json_parse_t *json, json_token_t *token)\n{\n    char *escape2char = json->cfg->escape2char;\n    char ch;\n\n    /* Caller must ensure a string is next */\n    assert(*json->ptr == '\"');\n\n    /* Skip \" */\n    json->ptr++;\n\n    /* json->tmp is the temporary strbuf used to accumulate the\n     * decoded string value.\n     * json->tmp is sized to handle JSON containing only a string value.\n     */\n    strbuf_reset(json->tmp);\n\n    while ((ch = *json->ptr) != '\"') {\n        if (!ch) {\n            /* Premature end of the string */\n            json_set_token_error(token, json, \"unexpected end of string\");\n            return;\n        }\n\n        /* Handle escapes */\n        if (ch == '\\\\') {\n            /* Fetch escape character */\n            ch = *(json->ptr + 1);\n\n            /* Translate escape code and append to tmp string */\n            ch = escape2char[(unsigned char)ch];\n            if (ch == 'u') {\n                if (json_append_unicode_escape(json) == 0)\n                    continue;\n\n                json_set_token_error(token, json,\n                                     \"invalid unicode escape code\");\n                return;\n            }\n            if (!ch) {\n                json_set_token_error(token, json, \"invalid escape code\");\n                return;\n            }\n\n            /* Skip '\\' */\n            json->ptr++;\n        }\n        /* Append normal character or translated single character\n         * Unicode escapes are handled above */\n        strbuf_append_char_unsafe(json->tmp, ch);\n        json->ptr++;\n    }\n    json->ptr++;    /* Eat final quote (\") */\n\n    strbuf_ensure_null(json->tmp);\n\n    token->type = T_STRING;\n    token->value.string = strbuf_string(json->tmp, &token->string_len);\n}\n\n/* JSON numbers should take the following form:\n *      -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?\n *\n * json_next_number_token() uses strtod() which allows other forms:\n * - numbers starting with '+'\n * - NaN, -NaN, infinity, -infinity\n * - hexadecimal numbers\n * - numbers with leading zeros\n *\n * json_is_invalid_number() detects \"numbers\" which may pass strtod()'s\n * error checking, but should not be allowed with strict JSON.\n *\n * json_is_invalid_number() may pass numbers which cause strtod()\n * to generate an error.\n */\nstatic int json_is_invalid_number(json_parse_t *json)\n{\n    const char *p = json->ptr;\n\n    /* Reject numbers starting with + */\n    if (*p == '+')\n        return 1;\n\n    /* Skip minus sign if it exists */\n    if (*p == '-')\n        p++;\n\n    /* Reject numbers starting with 0x, or leading zeros */\n    if (*p == '0') {\n        int ch2 = *(p + 1);\n\n        if ((ch2 | 0x20) == 'x' ||          /* Hex */\n            ('0' <= ch2 && ch2 <= '9'))     /* Leading zero */\n            return 1;\n\n        return 0;\n    } else if (*p <= '9') {\n        return 0;                           /* Ordinary number */\n    }\n\n    /* Reject inf/nan */\n    if (!strncasecmp(p, \"inf\", 3))\n        return 1;\n    if (!strncasecmp(p, \"nan\", 3))\n        return 1;\n\n    /* Pass all other numbers which may still be invalid, but\n     * strtod() will catch them. */\n    return 0;\n}\n\nstatic void json_next_number_token(json_parse_t *json, json_token_t *token)\n{\n    char *endptr;\n\n    token->type = T_NUMBER;\n    token->value.number = fpconv_strtod(json->ptr, &endptr);\n    if (json->ptr == endptr)\n        json_set_token_error(token, json, \"invalid number\");\n    else\n        json->ptr = endptr;     /* Skip the processed number */\n\n    return;\n}\n\n/* Fills in the token struct.\n * T_STRING will return a pointer to the json_parse_t temporary string\n * T_ERROR will leave the json->ptr pointer at the error.\n */\nstatic void json_next_token(json_parse_t *json, json_token_t *token)\n{\n    const json_token_type_t *ch2token = json->cfg->ch2token;\n    int ch;\n\n    /* Eat whitespace. */\n    while (1) {\n        ch = (unsigned char)*(json->ptr);\n        token->type = ch2token[ch];\n        if (token->type != T_WHITESPACE)\n            break;\n        json->ptr++;\n    }\n\n    /* Store location of new token. Required when throwing errors\n     * for unexpected tokens (syntax errors). */\n    token->index = json->ptr - json->data;\n\n    /* Don't advance the pointer for an error or the end */\n    if (token->type == T_ERROR) {\n        json_set_token_error(token, json, \"invalid token\");\n        return;\n    }\n\n    if (token->type == T_END) {\n        return;\n    }\n\n    /* Found a known single character token, advance index and return */\n    if (token->type != T_UNKNOWN) {\n        json->ptr++;\n        return;\n    }\n\n    /* Process characters which triggered T_UNKNOWN\n     *\n     * Must use strncmp() to match the front of the JSON string.\n     * JSON identifier must be lowercase.\n     * When strict_numbers if disabled, either case is allowed for\n     * Infinity/NaN (since we are no longer following the spec..) */\n    if (ch == '\"') {\n        json_next_string_token(json, token);\n        return;\n    } else if (ch == '-' || ('0' <= ch && ch <= '9')) {\n        if (!json->cfg->decode_invalid_numbers && json_is_invalid_number(json)) {\n            json_set_token_error(token, json, \"invalid number\");\n            return;\n        }\n        json_next_number_token(json, token);\n        return;\n    } else if (!strncmp(json->ptr, \"true\", 4)) {\n        token->type = T_BOOLEAN;\n        token->value.boolean = 1;\n        json->ptr += 4;\n        return;\n    } else if (!strncmp(json->ptr, \"false\", 5)) {\n        token->type = T_BOOLEAN;\n        token->value.boolean = 0;\n        json->ptr += 5;\n        return;\n    } else if (!strncmp(json->ptr, \"null\", 4)) {\n        token->type = T_NULL;\n        json->ptr += 4;\n        return;\n    } else if (json->cfg->decode_invalid_numbers &&\n               json_is_invalid_number(json)) {\n        /* When decode_invalid_numbers is enabled, only attempt to process\n         * numbers we know are invalid JSON (Inf, NaN, hex)\n         * This is required to generate an appropriate token error,\n         * otherwise all bad tokens will register as \"invalid number\"\n         */\n        json_next_number_token(json, token);\n        return;\n    }\n\n    /* Token starts with t/f/n but isn't recognised above. */\n    json_set_token_error(token, json, \"invalid token\");\n}\n\n/* This function does not return.\n * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.\n * The only supported exception is the temporary parser string\n * json->tmp struct.\n * json and token should exist on the stack somewhere.\n * luaL_error() will long_jmp and release the stack */\nstatic void json_throw_parse_error(lua_State *l, json_parse_t *json,\n                                   const char *exp, json_token_t *token)\n{\n    const char *found;\n\n    strbuf_free(json->tmp);\n\n    if (token->type == T_ERROR)\n        found = token->value.string;\n    else\n        found = json_token_type_name[token->type];\n\n    /* Note: token->index is 0 based, display starting from 1 */\n    luaL_error(l, \"Expected %s but found %s at character %d\",\n               exp, found, token->index + 1);\n}\n\nstatic inline void json_decode_ascend(json_parse_t *json)\n{\n    json->current_depth--;\n}\n\nstatic void json_decode_descend(lua_State *l, json_parse_t *json, int slots)\n{\n    json->current_depth++;\n\n    if (json->current_depth <= json->cfg->decode_max_depth &&\n        lua_checkstack(l, slots)) {\n        return;\n    }\n\n    strbuf_free(json->tmp);\n    luaL_error(l, \"Found too many nested data structures (%d) at character %d\",\n        json->current_depth, json->ptr - json->data);\n}\n\nstatic void json_parse_object_context(lua_State *l, json_parse_t *json)\n{\n    json_token_t token;\n\n    /* 3 slots required:\n     * .., table, key, value */\n    json_decode_descend(l, json, 3);\n\n    lua_newtable(l);\n\n    json_next_token(json, &token);\n\n    /* Handle empty objects */\n    if (token.type == T_OBJ_END) {\n        json_decode_ascend(json);\n        return;\n    }\n\n    while (1) {\n        if (token.type != T_STRING)\n            json_throw_parse_error(l, json, \"object key string\", &token);\n\n        /* Push key */\n        lua_pushlstring(l, token.value.string, token.string_len);\n\n        json_next_token(json, &token);\n        if (token.type != T_COLON)\n            json_throw_parse_error(l, json, \"colon\", &token);\n\n        /* Fetch value */\n        json_next_token(json, &token);\n        json_process_value(l, json, &token);\n\n        /* Set key = value */\n        lua_rawset(l, -3);\n\n        json_next_token(json, &token);\n\n        if (token.type == T_OBJ_END) {\n            json_decode_ascend(json);\n            return;\n        }\n\n        if (token.type != T_COMMA)\n            json_throw_parse_error(l, json, \"comma or object end\", &token);\n\n        json_next_token(json, &token);\n    }\n}\n\n/* Handle the array context */\nstatic void json_parse_array_context(lua_State *l, json_parse_t *json)\n{\n    json_token_t token;\n    int i;\n\n    /* 2 slots required:\n     * .., table, value */\n    json_decode_descend(l, json, 2);\n\n    lua_newtable(l);\n\n    json_next_token(json, &token);\n\n    /* Handle empty arrays */\n    if (token.type == T_ARR_END) {\n        json_decode_ascend(json);\n        return;\n    }\n\n    for (i = 1; ; i++) {\n        json_process_value(l, json, &token);\n        lua_rawseti(l, -2, i);            /* arr[i] = value */\n\n        json_next_token(json, &token);\n\n        if (token.type == T_ARR_END) {\n            json_decode_ascend(json);\n            return;\n        }\n\n        if (token.type != T_COMMA)\n            json_throw_parse_error(l, json, \"comma or array end\", &token);\n\n        json_next_token(json, &token);\n    }\n}\n\n/* Handle the \"value\" context */\nstatic void json_process_value(lua_State *l, json_parse_t *json,\n                               json_token_t *token)\n{\n    switch (token->type) {\n    case T_STRING:\n        lua_pushlstring(l, token->value.string, token->string_len);\n        break;;\n    case T_NUMBER: {\n        double num = token->value.number;\n        double intpart;\n        /* Convert to integer when possible for Lua 5.1 compatibility.\n         * This ensures tostring(cjson.decode('{\"id\":42}').id) returns \"42\" not \"42.0\" */\n        if (modf(num, &intpart) == 0.0 &&\n            intpart >= LUA_MININTEGER && intpart <= LUA_MAXINTEGER) {\n            lua_pushinteger(l, (lua_Integer)intpart);\n        } else {\n            lua_pushnumber(l, num);\n        }\n        break;\n    }\n    case T_BOOLEAN:\n        lua_pushboolean(l, token->value.boolean);\n        break;;\n    case T_OBJ_BEGIN:\n        json_parse_object_context(l, json);\n        break;;\n    case T_ARR_BEGIN:\n        json_parse_array_context(l, json);\n        break;;\n    case T_NULL:\n        /* In Lua, setting \"t[k] = nil\" will delete k from the table.\n         * Hence a NULL pointer lightuserdata object is used instead */\n        lua_pushlightuserdata(l, NULL);\n        break;;\n    default:\n        json_throw_parse_error(l, json, \"value\", token);\n    }\n}\n\nstatic int json_decode(lua_State *l)\n{\n    json_parse_t json;\n    json_token_t token;\n    size_t json_len;\n\n    luaL_argcheck(l, lua_gettop(l) == 1, 1, \"expected 1 argument\");\n\n    json.cfg = json_fetch_config(l);\n    json.data = luaL_checklstring(l, 1, &json_len);\n    json.current_depth = 0;\n    json.ptr = json.data;\n\n    /* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3)\n     *\n     * CJSON can support any simple data type, hence only the first\n     * character is guaranteed to be ASCII (at worst: '\"'). This is\n     * still enough to detect whether the wrong encoding is in use. */\n    if (json_len >= 2 && (!json.data[0] || !json.data[1]))\n        luaL_error(l, \"JSON parser does not support UTF-16 or UTF-32\");\n\n    /* Ensure the temporary buffer can hold the entire string.\n     * This means we no longer need to do length checks since the decoded\n     * string must be smaller than the entire json string */\n    json.tmp = strbuf_new(json_len);\n\n    json_next_token(&json, &token);\n    json_process_value(l, &json, &token);\n\n    /* Ensure there is no more input left */\n    json_next_token(&json, &token);\n\n    if (token.type != T_END)\n        json_throw_parse_error(l, &json, \"the end\", &token);\n\n    strbuf_free(json.tmp);\n\n    return 1;\n}\n\n/* ===== INITIALISATION ===== */\n\n#if !defined(LUA_VERSION_NUM) || LUA_VERSION_NUM < 502\n/* Compatibility for Lua 5.1.\n *\n * luaL_setfuncs() is used to create a module table where the functions have\n * json_config_t as their first upvalue. Code borrowed from Lua 5.2 source. */\nstatic void luaL_setfuncs (lua_State *l, const luaL_Reg *reg, int nup)\n{\n    int i;\n\n    luaL_checkstack(l, nup, \"too many upvalues\");\n    for (; reg->name != NULL; reg++) {  /* fill the table with given functions */\n        for (i = 0; i < nup; i++)  /* copy upvalues to the top */\n            lua_pushvalue(l, -nup);\n        lua_pushcclosure(l, reg->func, nup);  /* closure with those upvalues */\n        lua_setfield(l, -(nup + 2), reg->name);\n    }\n    lua_pop(l, nup);  /* remove upvalues */\n}\n#endif\n\n/* Call target function in protected mode with all supplied args.\n * Assumes target function only returns a single non-nil value.\n * Convert and return thrown errors as: nil, \"error message\" */\nstatic int json_protect_conversion(lua_State *l)\n{\n    int err;\n\n    /* Deliberately throw an error for invalid arguments */\n    luaL_argcheck(l, lua_gettop(l) == 1, 1, \"expected 1 argument\");\n\n    /* pcall() the function stored as upvalue(1) */\n    lua_pushvalue(l, lua_upvalueindex(1));\n    lua_insert(l, 1);\n    err = lua_pcall(l, 1, 1, 0);\n    if (!err)\n        return 1;\n\n    if (err == LUA_ERRRUN) {\n        lua_pushnil(l);\n        lua_insert(l, -2);\n        return 2;\n    }\n\n    /* Since we are not using a custom error handler, the only remaining\n     * errors are memory related */\n    return luaL_error(l, \"Memory allocation error in CJSON protected call\");\n}\n\n/* Return cjson module table */\nstatic int lua_cjson_new(lua_State *l)\n{\n    luaL_Reg reg[] = {\n        { \"encode\", json_encode },\n        { \"decode\", json_decode },\n        { \"encode_sparse_array\", json_cfg_encode_sparse_array },\n        { \"encode_max_depth\", json_cfg_encode_max_depth },\n        { \"decode_max_depth\", json_cfg_decode_max_depth },\n        { \"encode_number_precision\", json_cfg_encode_number_precision },\n        { \"encode_keep_buffer\", json_cfg_encode_keep_buffer },\n        { \"encode_invalid_numbers\", json_cfg_encode_invalid_numbers },\n        { \"decode_invalid_numbers\", json_cfg_decode_invalid_numbers },\n        { \"new\", lua_cjson_new },\n        { NULL, NULL }\n    };\n\n    /* Initialise number conversions */\n    fpconv_init();\n\n    /* cjson module table */\n    lua_newtable(l);\n\n    /* Register functions with config data as upvalue */\n    json_create_config(l);\n    luaL_setfuncs(l, reg, 1);\n\n    /* Set cjson.null */\n    lua_pushlightuserdata(l, NULL);\n    lua_setfield(l, -2, \"null\");\n\n    /* Set module name / version fields */\n    lua_pushliteral(l, CJSON_MODNAME);\n    lua_setfield(l, -2, \"_NAME\");\n    lua_pushliteral(l, CJSON_VERSION);\n    lua_setfield(l, -2, \"_VERSION\");\n\n    return 1;\n}\n\n/* Return cjson.safe module table */\nstatic int lua_cjson_safe_new(lua_State *l)\n{\n    const char *func[] = { \"decode\", \"encode\", NULL };\n    int i;\n\n    lua_cjson_new(l);\n\n    /* Fix new() method */\n    lua_pushcfunction(l, lua_cjson_safe_new);\n    lua_setfield(l, -2, \"new\");\n\n    for (i = 0; func[i]; i++) {\n        lua_getfield(l, -1, func[i]);\n        lua_pushcclosure(l, json_protect_conversion, 1);\n        lua_setfield(l, -2, func[i]);\n    }\n\n    return 1;\n}\n\nint luaopen_cjson(lua_State *l)\n{\n    lua_cjson_new(l);\n\n    lua_pushvalue(l, -1);\n    lua_setglobal(l, CJSON_MODNAME);\n\n    /* Return cjson table */\n    return 1;\n}\n\nint luaopen_cjson_safe(lua_State *l)\n{\n    lua_cjson_safe_new(l);\n\n    /* Return cjson.safe table */\n    return 1;\n}\n\n/* vi:ai et sw=4 ts=4:\n */\n"
  },
  {
    "path": "src/redis/lua/cjson/strbuf.c",
    "content": "/* strbuf - String buffer routines\n *\n * Copyright (c) 2010-2012  Mark Pulford <mark@kyne.com.au>\n *\n * Permission is hereby granted, free of charge, to any person obtaining\n * a copy of this software and associated documentation files (the\n * \"Software\"), to deal in the Software without restriction, including\n * without limitation the rights to use, copy, modify, merge, publish,\n * distribute, sublicense, and/or sell copies of the Software, and to\n * permit persons to whom the Software is furnished to do so, subject to\n * the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\n * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\n * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\n * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdarg.h>\n#include <string.h>\n\n#include \"strbuf.h\"\n\nstatic void die(const char *fmt, ...)\n{\n    va_list arg;\n\n    va_start(arg, fmt);\n    vfprintf(stderr, fmt, arg);\n    va_end(arg);\n    fprintf(stderr, \"\\n\");\n\n    exit(-1);\n}\n\nvoid strbuf_init(strbuf_t *s, int len)\n{\n    int size;\n\n    if (len <= 0)\n        size = STRBUF_DEFAULT_SIZE;\n    else\n        size = len + 1;         /* \\0 terminator */\n\n    s->buf = NULL;\n    s->size = size;\n    s->length = 0;\n    s->increment = STRBUF_DEFAULT_INCREMENT;\n    s->dynamic = 0;\n    s->reallocs = 0;\n    s->debug = 0;\n\n    s->buf = malloc(size);\n    if (!s->buf)\n        die(\"Out of memory\");\n\n    strbuf_ensure_null(s);\n}\n\nstrbuf_t *strbuf_new(int len)\n{\n    strbuf_t *s;\n\n    s = malloc(sizeof(strbuf_t));\n    if (!s)\n        die(\"Out of memory\");\n\n    strbuf_init(s, len);\n\n    /* Dynamic strbuf allocation / deallocation */\n    s->dynamic = 1;\n\n    return s;\n}\n\nvoid strbuf_set_increment(strbuf_t *s, int increment)\n{\n    /* Increment > 0:  Linear buffer growth rate\n     * Increment < -1: Exponential buffer growth rate */\n    if (increment == 0 || increment == -1)\n        die(\"BUG: Invalid string increment\");\n\n    s->increment = increment;\n}\n\nstatic inline void debug_stats(strbuf_t *s)\n{\n    if (s->debug) {\n        fprintf(stderr, \"strbuf(%lx) reallocs: %d, length: %d, size: %d\\n\",\n                (long)s, s->reallocs, s->length, s->size);\n    }\n}\n\n/* If strbuf_t has not been dynamically allocated, strbuf_free() can\n * be called any number of times strbuf_init() */\nvoid strbuf_free(strbuf_t *s)\n{\n    debug_stats(s);\n\n    if (s->buf) {\n        free(s->buf);\n        s->buf = NULL;\n    }\n    if (s->dynamic)\n        free(s);\n}\n\nchar *strbuf_free_to_string(strbuf_t *s, int *len)\n{\n    char *buf;\n\n    debug_stats(s);\n\n    strbuf_ensure_null(s);\n\n    buf = s->buf;\n    if (len)\n        *len = s->length;\n\n    if (s->dynamic)\n        free(s);\n\n    return buf;\n}\n\nstatic int calculate_new_size(strbuf_t *s, int len)\n{\n    int reqsize, newsize;\n\n    if (len <= 0)\n        die(\"BUG: Invalid strbuf length requested\");\n\n    /* Ensure there is room for optional NULL termination */\n    reqsize = len + 1;\n\n    /* If the user has requested to shrink the buffer, do it exactly */\n    if (s->size > reqsize)\n        return reqsize;\n\n    newsize = s->size;\n    if (s->increment < 0) {\n        /* Exponential sizing */\n        while (newsize < reqsize)\n            newsize *= -s->increment;\n    } else {\n        /* Linear sizing */\n        newsize = ((newsize + s->increment - 1) / s->increment) * s->increment;\n    }\n\n    return newsize;\n}\n\n\n/* Ensure strbuf can handle a string length bytes long (ignoring NULL\n * optional termination). */\nvoid strbuf_resize(strbuf_t *s, int len)\n{\n    int newsize;\n\n    newsize = calculate_new_size(s, len);\n\n    if (s->debug > 1) {\n        fprintf(stderr, \"strbuf(%lx) resize: %d => %d\\n\",\n                (long)s, s->size, newsize);\n    }\n\n    s->size = newsize;\n    s->buf = realloc(s->buf, s->size);\n    if (!s->buf)\n        die(\"Out of memory\");\n    s->reallocs++;\n}\n\nvoid strbuf_append_string(strbuf_t *s, const char *str)\n{\n    int space, i;\n\n    space = strbuf_empty_length(s);\n\n    for (i = 0; str[i]; i++) {\n        if (space < 1) {\n            strbuf_resize(s, s->length + 1);\n            space = strbuf_empty_length(s);\n        }\n\n        s->buf[s->length] = str[i];\n        s->length++;\n        space--;\n    }\n}\n\n/* strbuf_append_fmt() should only be used when an upper bound\n * is known for the output string. */\nvoid strbuf_append_fmt(strbuf_t *s, int len, const char *fmt, ...)\n{\n    va_list arg;\n    int fmt_len;\n\n    strbuf_ensure_empty_length(s, len);\n\n    va_start(arg, fmt);\n    fmt_len = vsnprintf(s->buf + s->length, len, fmt, arg);\n    va_end(arg);\n\n    if (fmt_len < 0)\n        die(\"BUG: Unable to convert number\");  /* This should never happen.. */\n\n    s->length += fmt_len;\n}\n\n/* strbuf_append_fmt_retry() can be used when the there is no known\n * upper bound for the output string. */\nvoid strbuf_append_fmt_retry(strbuf_t *s, const char *fmt, ...)\n{\n    va_list arg;\n    int fmt_len, try;\n    int empty_len;\n\n    /* If the first attempt to append fails, resize the buffer appropriately\n     * and try again */\n    for (try = 0; ; try++) {\n        va_start(arg, fmt);\n        /* Append the new formatted string */\n        /* fmt_len is the length of the string required, excluding the\n         * trailing NULL */\n        empty_len = strbuf_empty_length(s);\n        /* Add 1 since there is also space to store the terminating NULL. */\n        fmt_len = vsnprintf(s->buf + s->length, empty_len + 1, fmt, arg);\n        va_end(arg);\n\n        if (fmt_len <= empty_len)\n            break;  /* SUCCESS */\n        if (try > 0)\n            die(\"BUG: length of formatted string changed\");\n\n        strbuf_resize(s, s->length + fmt_len);\n    }\n\n    s->length += fmt_len;\n}\n\n/* vi:ai et sw=4 ts=4:\n */\n"
  },
  {
    "path": "src/redis/lua/cjson/strbuf.h",
    "content": "/* strbuf - String buffer routines\n *\n * Copyright (c) 2010-2012  Mark Pulford <mark@kyne.com.au>\n *\n * Permission is hereby granted, free of charge, to any person obtaining\n * a copy of this software and associated documentation files (the\n * \"Software\"), to deal in the Software without restriction, including\n * without limitation the rights to use, copy, modify, merge, publish,\n * distribute, sublicense, and/or sell copies of the Software, and to\n * permit persons to whom the Software is furnished to do so, subject to\n * the following conditions:\n *\n * The above copyright notice and this permission notice shall be\n * included in all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\n * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\n * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\n * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n */\n\n#include <stdlib.h>\n#include <stdarg.h>\n\n/* Size: Total bytes allocated to *buf\n * Length: String length, excluding optional NULL terminator.\n * Increment: Allocation increments when resizing the string buffer.\n * Dynamic: True if created via strbuf_new()\n */\n\ntypedef struct {\n    char *buf;\n    int size;\n    int length;\n    int increment;\n    int dynamic;\n    int reallocs;\n    int debug;\n} strbuf_t;\n\n#ifndef STRBUF_DEFAULT_SIZE\n#define STRBUF_DEFAULT_SIZE 1023\n#endif\n#ifndef STRBUF_DEFAULT_INCREMENT\n#define STRBUF_DEFAULT_INCREMENT -2\n#endif\n\n/* Initialise */\nextern strbuf_t *strbuf_new(int len);\nextern void strbuf_init(strbuf_t *s, int len);\nextern void strbuf_set_increment(strbuf_t *s, int increment);\n\n/* Release */\nextern void strbuf_free(strbuf_t *s);\nextern char *strbuf_free_to_string(strbuf_t *s, int *len);\n\n/* Management */\nextern void strbuf_resize(strbuf_t *s, int len);\nstatic int strbuf_empty_length(strbuf_t *s);\nstatic int strbuf_length(strbuf_t *s);\nstatic char *strbuf_string(strbuf_t *s, int *len);\nstatic void strbuf_ensure_empty_length(strbuf_t *s, int len);\nstatic char *strbuf_empty_ptr(strbuf_t *s);\nstatic void strbuf_extend_length(strbuf_t *s, int len);\n\n/* Update */\nextern void strbuf_append_fmt(strbuf_t *s, int len, const char *fmt, ...);\nextern void strbuf_append_fmt_retry(strbuf_t *s, const char *format, ...);\nstatic void strbuf_append_mem(strbuf_t *s, const char *c, int len);\nextern void strbuf_append_string(strbuf_t *s, const char *str);\nstatic void strbuf_append_char(strbuf_t *s, const char c);\nstatic void strbuf_ensure_null(strbuf_t *s);\n\n/* Reset string for before use */\nstatic inline void strbuf_reset(strbuf_t *s)\n{\n    s->length = 0;\n}\n\nstatic inline int strbuf_allocated(strbuf_t *s)\n{\n    return s->buf != NULL;\n}\n\n/* Return bytes remaining in the string buffer\n * Ensure there is space for a NULL terminator. */\nstatic inline int strbuf_empty_length(strbuf_t *s)\n{\n    return s->size - s->length - 1;\n}\n\nstatic inline void strbuf_ensure_empty_length(strbuf_t *s, int len)\n{\n    if (len > strbuf_empty_length(s))\n        strbuf_resize(s, s->length + len);\n}\n\nstatic inline char *strbuf_empty_ptr(strbuf_t *s)\n{\n    return s->buf + s->length;\n}\n\nstatic inline void strbuf_extend_length(strbuf_t *s, int len)\n{\n    s->length += len;\n}\n\nstatic inline int strbuf_length(strbuf_t *s)\n{\n    return s->length;\n}\n\nstatic inline void strbuf_append_char(strbuf_t *s, const char c)\n{\n    strbuf_ensure_empty_length(s, 1);\n    s->buf[s->length++] = c;\n}\n\nstatic inline void strbuf_append_char_unsafe(strbuf_t *s, const char c)\n{\n    s->buf[s->length++] = c;\n}\n\nstatic inline void strbuf_append_mem(strbuf_t *s, const char *c, int len)\n{\n    strbuf_ensure_empty_length(s, len);\n    memcpy(s->buf + s->length, c, len);\n    s->length += len;\n}\n\nstatic inline void strbuf_append_mem_unsafe(strbuf_t *s, const char *c, int len)\n{\n    memcpy(s->buf + s->length, c, len);\n    s->length += len;\n}\n\nstatic inline void strbuf_ensure_null(strbuf_t *s)\n{\n    s->buf[s->length] = 0;\n}\n\nstatic inline char *strbuf_string(strbuf_t *s, int *len)\n{\n    if (len)\n        *len = s->length;\n\n    return s->buf;\n}\n\n/* vi:ai et sw=4 ts=4:\n */\n"
  },
  {
    "path": "src/redis/lua/cmsgpack/lua_cmsgpack.c",
    "content": "#include <math.h>\n#include <stdlib.h>\n#include <stdint.h>\n#include <string.h>\n#include <assert.h>\n\n#include \"lua.h\"\n#include \"lauxlib.h\"\n\n#define LUACMSGPACK_NAME        \"cmsgpack\"\n#define LUACMSGPACK_SAFE_NAME   \"cmsgpack_safe\"\n#define LUACMSGPACK_VERSION     \"lua-cmsgpack 0.4.0\"\n#define LUACMSGPACK_COPYRIGHT   \"Copyright (C) 2012, Salvatore Sanfilippo\"\n#define LUACMSGPACK_DESCRIPTION \"MessagePack C implementation for Lua\"\n\n/* Allows a preprocessor directive to override MAX_NESTING */\n#ifndef LUACMSGPACK_MAX_NESTING\n    #define LUACMSGPACK_MAX_NESTING  16 /* Max tables nesting. */\n#endif\n\n/* Check if float or double can be an integer without loss of precision */\n#define IS_INT_TYPE_EQUIVALENT(x, T) (!isinf(x) && (T)(x) == (x))\n\n#define IS_INT64_EQUIVALENT(x) IS_INT_TYPE_EQUIVALENT(x, int64_t)\n#define IS_INT_EQUIVALENT(x) IS_INT_TYPE_EQUIVALENT(x, int)\n\n/* If size of pointer is equal to a 4 byte integer, we're on 32 bits. */\n#if UINTPTR_MAX == UINT_MAX\n    #define BITS_32 1\n#else\n    #define BITS_32 0\n#endif\n\n#if BITS_32\n    #define lua_pushunsigned(L, n) lua_pushnumber(L, n)\n#else\n    #define lua_pushunsigned(L, n) lua_pushinteger(L, n)\n#endif\n\n/* =============================================================================\n * MessagePack implementation and bindings for Lua 5.1/5.2.\n * Copyright(C) 2012 Salvatore Sanfilippo <antirez@gmail.com>\n *\n * http://github.com/antirez/lua-cmsgpack\n *\n * For MessagePack specification check the following web site:\n * http://wiki.msgpack.org/display/MSGPACK/Format+specification\n *\n * See Copyright Notice at the end of this file.\n *\n * CHANGELOG:\n * 19-Feb-2012 (ver 0.1.0): Initial release.\n * 20-Feb-2012 (ver 0.2.0): Tables encoding improved.\n * 20-Feb-2012 (ver 0.2.1): Minor bug fixing.\n * 20-Feb-2012 (ver 0.3.0): Module renamed lua-cmsgpack (was lua-msgpack).\n * 04-Apr-2014 (ver 0.3.1): Lua 5.2 support and minor bug fix.\n * 07-Apr-2014 (ver 0.4.0): Multiple pack/unpack, lua allocator, efficiency.\n * ========================================================================== */\n\n/* -------------------------- Endian conversion --------------------------------\n * We use it only for floats and doubles, all the other conversions performed\n * in an endian independent fashion. So the only thing we need is a function\n * that swaps a binary string if arch is little endian (and left it untouched\n * otherwise). */\n\n/* Reverse memory bytes if arch is little endian. Given the conceptual\n * simplicity of the Lua build system we prefer check for endianess at runtime.\n * The performance difference should be acceptable. */\nvoid memrevifle(void *ptr, size_t len) {\n    unsigned char   *p = (unsigned char *)ptr,\n                    *e = (unsigned char *)p+len-1,\n                    aux;\n    int test = 1;\n    unsigned char *testp = (unsigned char*) &test;\n\n    if (testp[0] == 0) return; /* Big endian, nothing to do. */\n    len /= 2;\n    while(len--) {\n        aux = *p;\n        *p = *e;\n        *e = aux;\n        p++;\n        e--;\n    }\n}\n\n/* ---------------------------- String buffer ----------------------------------\n * This is a simple implementation of string buffers. The only operation\n * supported is creating empty buffers and appending bytes to it.\n * The string buffer uses 2x preallocation on every realloc for O(N) append\n * behavior.  */\n\ntypedef struct mp_buf {\n    unsigned char *b;\n    size_t len, free;\n} mp_buf;\n\nvoid *mp_realloc(lua_State *L, void *target, size_t osize,size_t nsize) {\n    void *(*local_realloc) (void *, void *, size_t osize, size_t nsize) = NULL;\n    void *ud;\n\n    local_realloc = lua_getallocf(L, &ud);\n\n    return local_realloc(ud, target, osize, nsize);\n}\n\nmp_buf *mp_buf_new(lua_State *L) {\n    mp_buf *buf = NULL;\n\n    /* Old size = 0; new size = sizeof(*buf) */\n    buf = (mp_buf*)mp_realloc(L, NULL, 0, sizeof(*buf));\n\n    buf->b = NULL;\n    buf->len = buf->free = 0;\n    return buf;\n}\n\nvoid mp_buf_append(lua_State *L, mp_buf *buf, const unsigned char *s, size_t len) {\n    if (buf->free < len) {\n        size_t newsize = (buf->len+len)*2;\n\n        buf->b = (unsigned char*)mp_realloc(L, buf->b, buf->len + buf->free, newsize);\n        buf->free = newsize - buf->len;\n    }\n    memcpy(buf->b+buf->len,s,len);\n    buf->len += len;\n    buf->free -= len;\n}\n\nvoid mp_buf_free(lua_State *L, mp_buf *buf) {\n    mp_realloc(L, buf->b, buf->len + buf->free, 0); /* realloc to 0 = free */\n    mp_realloc(L, buf, sizeof(*buf), 0);\n}\n\n/* ---------------------------- String cursor ----------------------------------\n * This simple data structure is used for parsing. Basically you create a cursor\n * using a string pointer and a length, then it is possible to access the\n * current string position with cursor->p, check the remaining length\n * in cursor->left, and finally consume more string using\n * mp_cur_consume(cursor,len), to advance 'p' and subtract 'left'.\n * An additional field cursor->error is set to zero on initialization and can\n * be used to report errors. */\n\n#define MP_CUR_ERROR_NONE   0\n#define MP_CUR_ERROR_EOF    1   /* Not enough data to complete operation. */\n#define MP_CUR_ERROR_BADFMT 2   /* Bad data format */\n\ntypedef struct mp_cur {\n    const unsigned char *p;\n    size_t left;\n    int err;\n} mp_cur;\n\nvoid mp_cur_init(mp_cur *cursor, const unsigned char *s, size_t len) {\n    cursor->p = s;\n    cursor->left = len;\n    cursor->err = MP_CUR_ERROR_NONE;\n}\n\n#define mp_cur_consume(_c,_len) do { _c->p += _len; _c->left -= _len; } while(0)\n\n/* When there is not enough room we set an error in the cursor and return. This\n * is very common across the code so we have a macro to make the code look\n * a bit simpler. */\n#define mp_cur_need(_c,_len) do { \\\n    if (_c->left < _len) { \\\n        _c->err = MP_CUR_ERROR_EOF; \\\n        return; \\\n    } \\\n} while(0)\n\n/* ------------------------- Low level MP encoding -------------------------- */\n\nvoid mp_encode_bytes(lua_State *L, mp_buf *buf, const unsigned char *s, size_t len) {\n    unsigned char hdr[5];\n    int hdrlen;\n\n    if (len < 32) {\n        hdr[0] = 0xa0 | (len&0xff); /* fix raw */\n        hdrlen = 1;\n    } else if (len <= 0xff) {\n        hdr[0] = 0xd9;\n        hdr[1] = len;\n        hdrlen = 2;\n    } else if (len <= 0xffff) {\n        hdr[0] = 0xda;\n        hdr[1] = (len&0xff00)>>8;\n        hdr[2] = len&0xff;\n        hdrlen = 3;\n    } else {\n        hdr[0] = 0xdb;\n        hdr[1] = (len&0xff000000)>>24;\n        hdr[2] = (len&0xff0000)>>16;\n        hdr[3] = (len&0xff00)>>8;\n        hdr[4] = len&0xff;\n        hdrlen = 5;\n    }\n    mp_buf_append(L,buf,hdr,hdrlen);\n    mp_buf_append(L,buf,s,len);\n}\n\n/* we assume IEEE 754 internal format for single and double precision floats. */\nvoid mp_encode_double(lua_State *L, mp_buf *buf, double d) {\n    unsigned char b[9];\n    float f = d;\n\n    assert(sizeof(f) == 4 && sizeof(d) == 8);\n    if (d == (double)f) {\n        b[0] = 0xca;    /* float IEEE 754 */\n        memcpy(b+1,&f,4);\n        memrevifle(b+1,4);\n        mp_buf_append(L,buf,b,5);\n    } else if (sizeof(d) == 8) {\n        b[0] = 0xcb;    /* double IEEE 754 */\n        memcpy(b+1,&d,8);\n        memrevifle(b+1,8);\n        mp_buf_append(L,buf,b,9);\n    }\n}\n\nvoid mp_encode_int(lua_State *L, mp_buf *buf, int64_t n) {\n    unsigned char b[9];\n    int enclen;\n\n    if (n >= 0) {\n        if (n <= 127) {\n            b[0] = n & 0x7f;    /* positive fixnum */\n            enclen = 1;\n        } else if (n <= 0xff) {\n            b[0] = 0xcc;        /* uint 8 */\n            b[1] = n & 0xff;\n            enclen = 2;\n        } else if (n <= 0xffff) {\n            b[0] = 0xcd;        /* uint 16 */\n            b[1] = (n & 0xff00) >> 8;\n            b[2] = n & 0xff;\n            enclen = 3;\n        } else if (n <= 0xffffffffLL) {\n            b[0] = 0xce;        /* uint 32 */\n            b[1] = (n & 0xff000000) >> 24;\n            b[2] = (n & 0xff0000) >> 16;\n            b[3] = (n & 0xff00) >> 8;\n            b[4] = n & 0xff;\n            enclen = 5;\n        } else {\n            b[0] = 0xcf;        /* uint 64 */\n            b[1] = (n & 0xff00000000000000LL) >> 56;\n            b[2] = (n & 0xff000000000000LL) >> 48;\n            b[3] = (n & 0xff0000000000LL) >> 40;\n            b[4] = (n & 0xff00000000LL) >> 32;\n            b[5] = (n & 0xff000000) >> 24;\n            b[6] = (n & 0xff0000) >> 16;\n            b[7] = (n & 0xff00) >> 8;\n            b[8] = n & 0xff;\n            enclen = 9;\n        }\n    } else {\n        if (n >= -32) {\n            b[0] = ((signed char)n);   /* negative fixnum */\n            enclen = 1;\n        } else if (n >= -128) {\n            b[0] = 0xd0;        /* int 8 */\n            b[1] = n & 0xff;\n            enclen = 2;\n        } else if (n >= -32768) {\n            b[0] = 0xd1;        /* int 16 */\n            b[1] = (n & 0xff00) >> 8;\n            b[2] = n & 0xff;\n            enclen = 3;\n        } else if (n >= -2147483648LL) {\n            b[0] = 0xd2;        /* int 32 */\n            b[1] = (n & 0xff000000) >> 24;\n            b[2] = (n & 0xff0000) >> 16;\n            b[3] = (n & 0xff00) >> 8;\n            b[4] = n & 0xff;\n            enclen = 5;\n        } else {\n            b[0] = 0xd3;        /* int 64 */\n            b[1] = (n & 0xff00000000000000LL) >> 56;\n            b[2] = (n & 0xff000000000000LL) >> 48;\n            b[3] = (n & 0xff0000000000LL) >> 40;\n            b[4] = (n & 0xff00000000LL) >> 32;\n            b[5] = (n & 0xff000000) >> 24;\n            b[6] = (n & 0xff0000) >> 16;\n            b[7] = (n & 0xff00) >> 8;\n            b[8] = n & 0xff;\n            enclen = 9;\n        }\n    }\n    mp_buf_append(L,buf,b,enclen);\n}\n\nvoid mp_encode_array(lua_State *L, mp_buf *buf, int64_t n) {\n    unsigned char b[5];\n    int enclen;\n\n    if (n <= 15) {\n        b[0] = 0x90 | (n & 0xf);    /* fix array */\n        enclen = 1;\n    } else if (n <= 65535) {\n        b[0] = 0xdc;                /* array 16 */\n        b[1] = (n & 0xff00) >> 8;\n        b[2] = n & 0xff;\n        enclen = 3;\n    } else {\n        b[0] = 0xdd;                /* array 32 */\n        b[1] = (n & 0xff000000) >> 24;\n        b[2] = (n & 0xff0000) >> 16;\n        b[3] = (n & 0xff00) >> 8;\n        b[4] = n & 0xff;\n        enclen = 5;\n    }\n    mp_buf_append(L,buf,b,enclen);\n}\n\nvoid mp_encode_map(lua_State *L, mp_buf *buf, int64_t n) {\n    unsigned char b[5];\n    int enclen;\n\n    if (n <= 15) {\n        b[0] = 0x80 | (n & 0xf);    /* fix map */\n        enclen = 1;\n    } else if (n <= 65535) {\n        b[0] = 0xde;                /* map 16 */\n        b[1] = (n & 0xff00) >> 8;\n        b[2] = n & 0xff;\n        enclen = 3;\n    } else {\n        b[0] = 0xdf;                /* map 32 */\n        b[1] = (n & 0xff000000) >> 24;\n        b[2] = (n & 0xff0000) >> 16;\n        b[3] = (n & 0xff00) >> 8;\n        b[4] = n & 0xff;\n        enclen = 5;\n    }\n    mp_buf_append(L,buf,b,enclen);\n}\n\n/* --------------------------- Lua types encoding --------------------------- */\n\nvoid mp_encode_lua_string(lua_State *L, mp_buf *buf) {\n    size_t len;\n    const char *s;\n\n    s = lua_tolstring(L,-1,&len);\n    mp_encode_bytes(L,buf,(const unsigned char*)s,len);\n}\n\nvoid mp_encode_lua_bool(lua_State *L, mp_buf *buf) {\n    unsigned char b = lua_toboolean(L,-1) ? 0xc3 : 0xc2;\n    mp_buf_append(L,buf,&b,1);\n}\n\n/* Lua 5.3 has a built in 64-bit integer type */\nvoid mp_encode_lua_integer(lua_State *L, mp_buf *buf) {\n#if (LUA_VERSION_NUM < 503) && BITS_32\n    lua_Number i = lua_tonumber(L,-1);\n#else\n    lua_Integer i = lua_tointeger(L,-1);\n#endif\n    mp_encode_int(L, buf, (int64_t)i);\n}\n\n/* Lua 5.2 and lower only has 64-bit doubles, so we need to\n * detect if the double may be representable as an int\n * for Lua < 5.3 */\nvoid mp_encode_lua_number(lua_State *L, mp_buf *buf) {\n    lua_Number n = lua_tonumber(L,-1);\n\n    if (IS_INT64_EQUIVALENT(n)) {\n        mp_encode_lua_integer(L, buf);\n    } else {\n        mp_encode_double(L,buf,(double)n);\n    }\n}\n\nvoid mp_encode_lua_type(lua_State *L, mp_buf *buf, int level);\n\n/* Convert a lua table into a message pack list. */\nvoid mp_encode_lua_table_as_array(lua_State *L, mp_buf *buf, int level) {\n#if LUA_VERSION_NUM < 502\n    size_t len = lua_objlen(L,-1), j;\n#else\n    size_t len = lua_rawlen(L,-1), j;\n#endif\n\n    mp_encode_array(L,buf,len);\n    luaL_checkstack(L, 1, \"in function mp_encode_lua_table_as_array\");\n    for (j = 1; j <= len; j++) {\n        lua_pushnumber(L,j);\n        lua_gettable(L,-2);\n        mp_encode_lua_type(L,buf,level+1);\n    }\n}\n\n/* Convert a lua table into a message pack key-value map. */\nvoid mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {\n    size_t len = 0;\n\n    /* First step: count keys into table. No other way to do it with the\n     * Lua API, we need to iterate a first time. Note that an alternative\n     * would be to do a single run, and then hack the buffer to insert the\n     * map opcodes for message pack. Too hackish for this lib. */\n    luaL_checkstack(L, 3, \"in function mp_encode_lua_table_as_map\");\n    lua_pushnil(L);\n    while(lua_next(L,-2)) {\n        lua_pop(L,1); /* remove value, keep key for next iteration. */\n        len++;\n    }\n\n    /* Step two: actually encoding of the map. */\n    mp_encode_map(L,buf,len);\n    lua_pushnil(L);\n    while(lua_next(L,-2)) {\n        /* Stack: ... key value */\n        lua_pushvalue(L,-2); /* Stack: ... key value key */\n        mp_encode_lua_type(L,buf,level+1); /* encode key */\n        mp_encode_lua_type(L,buf,level+1); /* encode val */\n    }\n}\n\n/* Returns true if the Lua table on top of the stack is exclusively composed\n * of keys from numerical keys from 1 up to N, with N being the total number\n * of elements, without any hole in the middle. */\nint table_is_an_array(lua_State *L) {\n    int count = 0, max = 0;\n#if LUA_VERSION_NUM < 503\n    lua_Number n;\n#else\n    lua_Integer n;\n#endif\n\n    /* Stack top on function entry */\n    int stacktop;\n\n    stacktop = lua_gettop(L);\n\n    lua_pushnil(L);\n    while(lua_next(L,-2)) {\n        /* Stack: ... key value */\n        lua_pop(L,1); /* Stack: ... key */\n        /* The <= 0 check is valid here because we're comparing indexes. */\n#if LUA_VERSION_NUM < 503\n        if ((LUA_TNUMBER != lua_type(L,-1)) || (n = lua_tonumber(L, -1)) <= 0 ||\n            !IS_INT_EQUIVALENT(n))\n#else\n        if (!lua_isinteger(L,-1) || (n = lua_tointeger(L, -1)) <= 0)\n#endif\n        {\n            lua_settop(L, stacktop);\n            return 0;\n        }\n        max = (n > max ? n : max);\n        count++;\n    }\n    /* We have the total number of elements in \"count\". Also we have\n     * the max index encountered in \"max\". We can't reach this code\n     * if there are indexes <= 0. If you also note that there can not be\n     * repeated keys into a table, you have that if max==count you are sure\n     * that there are all the keys form 1 to count (both included). */\n    lua_settop(L, stacktop);\n    return max == count;\n}\n\n/* If the length operator returns non-zero, that is, there is at least\n * an object at key '1', we serialize to message pack list. Otherwise\n * we use a map. */\nvoid mp_encode_lua_table(lua_State *L, mp_buf *buf, int level) {\n    if (table_is_an_array(L))\n        mp_encode_lua_table_as_array(L,buf,level);\n    else\n        mp_encode_lua_table_as_map(L,buf,level);\n}\n\nvoid mp_encode_lua_null(lua_State *L, mp_buf *buf) {\n    unsigned char b[1];\n\n    b[0] = 0xc0;\n    mp_buf_append(L,buf,b,1);\n}\n\nvoid mp_encode_lua_type(lua_State *L, mp_buf *buf, int level) {\n    int t = lua_type(L,-1);\n\n    /* Limit the encoding of nested tables to a specified maximum depth, so that\n     * we survive when called against circular references in tables. */\n    if (t == LUA_TTABLE && level == LUACMSGPACK_MAX_NESTING) t = LUA_TNIL;\n    switch(t) {\n    case LUA_TSTRING: mp_encode_lua_string(L,buf); break;\n    case LUA_TBOOLEAN: mp_encode_lua_bool(L,buf); break;\n    case LUA_TNUMBER:\n    #if LUA_VERSION_NUM < 503\n        mp_encode_lua_number(L,buf); break;\n    #else\n        if (lua_isinteger(L, -1)) {\n            mp_encode_lua_integer(L, buf);\n        } else {\n            mp_encode_lua_number(L, buf);\n        }\n        break;\n    #endif\n    case LUA_TTABLE: mp_encode_lua_table(L,buf,level); break;\n    default: mp_encode_lua_null(L,buf); break;\n    }\n    lua_pop(L,1);\n}\n\n/*\n * Packs all arguments as a stream for multiple upacking later.\n * Returns error if no arguments provided.\n */\nint mp_pack(lua_State *L) {\n    int nargs = lua_gettop(L);\n    int i;\n    mp_buf *buf;\n\n    if (nargs == 0)\n        return luaL_argerror(L, 0, \"MessagePack pack needs input.\");\n\n    if (!lua_checkstack(L, nargs))\n        return luaL_argerror(L, 0, \"Too many arguments for MessagePack pack.\");\n\n    buf = mp_buf_new(L);\n    for(i = 1; i <= nargs; i++) {\n        /* Copy argument i to top of stack for _encode processing;\n         * the encode function pops it from the stack when complete. */\n        luaL_checkstack(L, 1, \"in function mp_check\");\n        lua_pushvalue(L, i);\n\n        mp_encode_lua_type(L,buf,0);\n\n        lua_pushlstring(L,(char*)buf->b,buf->len);\n\n        /* Reuse the buffer for the next operation by\n         * setting its free count to the total buffer size\n         * and the current position to zero. */\n        buf->free += buf->len;\n        buf->len = 0;\n    }\n    mp_buf_free(L, buf);\n\n    /* Concatenate all nargs buffers together */\n    lua_concat(L, nargs);\n    return 1;\n}\n\n/* ------------------------------- Decoding --------------------------------- */\n\nvoid mp_decode_to_lua_type(lua_State *L, mp_cur *c);\n\nvoid mp_decode_to_lua_array(lua_State *L, mp_cur *c, size_t len) {\n    assert(len <= UINT_MAX);\n    int index = 1;\n\n    lua_newtable(L);\n    luaL_checkstack(L, 1, \"in function mp_decode_to_lua_array\");\n    while(len--) {\n        lua_pushnumber(L,index++);\n        mp_decode_to_lua_type(L,c);\n        if (c->err) return;\n        lua_settable(L,-3);\n    }\n}\n\nvoid mp_decode_to_lua_hash(lua_State *L, mp_cur *c, size_t len) {\n    assert(len <= UINT_MAX);\n    lua_newtable(L);\n    while(len--) {\n        mp_decode_to_lua_type(L,c); /* key */\n        if (c->err) return;\n        mp_decode_to_lua_type(L,c); /* value */\n        if (c->err) return;\n        lua_settable(L,-3);\n    }\n}\n\n/* Decode a Message Pack raw object pointed by the string cursor 'c' to\n * a Lua type, that is left as the only result on the stack. */\nvoid mp_decode_to_lua_type(lua_State *L, mp_cur *c) {\n    mp_cur_need(c,1);\n\n    /* If we return more than 18 elements, we must resize the stack to\n     * fit all our return values.  But, there is no way to\n     * determine how many objects a msgpack will unpack to up front, so\n     * we request a +1 larger stack on each iteration (noop if stack is\n     * big enough, and when stack does require resize it doubles in size) */\n    luaL_checkstack(L, 1,\n        \"too many return values at once; \"\n        \"use unpack_one or unpack_limit instead.\");\n\n    switch(c->p[0]) {\n    case 0xcc:  /* uint 8 */\n        mp_cur_need(c,2);\n        lua_pushunsigned(L,c->p[1]);\n        mp_cur_consume(c,2);\n        break;\n    case 0xd0:  /* int 8 */\n        mp_cur_need(c,2);\n        lua_pushinteger(L,(signed char)c->p[1]);\n        mp_cur_consume(c,2);\n        break;\n    case 0xcd:  /* uint 16 */\n        mp_cur_need(c,3);\n        lua_pushunsigned(L,\n            (c->p[1] << 8) |\n             c->p[2]);\n        mp_cur_consume(c,3);\n        break;\n    case 0xd1:  /* int 16 */\n        mp_cur_need(c,3);\n        lua_pushinteger(L,(int16_t)\n            (c->p[1] << 8) |\n             c->p[2]);\n        mp_cur_consume(c,3);\n        break;\n    case 0xce:  /* uint 32 */\n        mp_cur_need(c,5);\n        lua_pushunsigned(L,\n            ((uint32_t)c->p[1] << 24) |\n            ((uint32_t)c->p[2] << 16) |\n            ((uint32_t)c->p[3] << 8) |\n             (uint32_t)c->p[4]);\n        mp_cur_consume(c,5);\n        break;\n    case 0xd2:  /* int 32 */\n        mp_cur_need(c,5);\n        lua_pushinteger(L,\n            ((int32_t)c->p[1] << 24) |\n            ((int32_t)c->p[2] << 16) |\n            ((int32_t)c->p[3] << 8) |\n             (int32_t)c->p[4]);\n        mp_cur_consume(c,5);\n        break;\n    case 0xcf:  /* uint 64 */\n        mp_cur_need(c,9);\n        lua_pushunsigned(L,\n            ((uint64_t)c->p[1] << 56) |\n            ((uint64_t)c->p[2] << 48) |\n            ((uint64_t)c->p[3] << 40) |\n            ((uint64_t)c->p[4] << 32) |\n            ((uint64_t)c->p[5] << 24) |\n            ((uint64_t)c->p[6] << 16) |\n            ((uint64_t)c->p[7] << 8) |\n             (uint64_t)c->p[8]);\n        mp_cur_consume(c,9);\n        break;\n    case 0xd3:  /* int 64 */\n        mp_cur_need(c,9);\n#if LUA_VERSION_NUM < 503\n        lua_pushnumber(L,\n#else\n        lua_pushinteger(L,\n#endif\n            ((int64_t)c->p[1] << 56) |\n            ((int64_t)c->p[2] << 48) |\n            ((int64_t)c->p[3] << 40) |\n            ((int64_t)c->p[4] << 32) |\n            ((int64_t)c->p[5] << 24) |\n            ((int64_t)c->p[6] << 16) |\n            ((int64_t)c->p[7] << 8) |\n             (int64_t)c->p[8]);\n        mp_cur_consume(c,9);\n        break;\n    case 0xc0:  /* nil */\n        lua_pushnil(L);\n        mp_cur_consume(c,1);\n        break;\n    case 0xc3:  /* true */\n        lua_pushboolean(L,1);\n        mp_cur_consume(c,1);\n        break;\n    case 0xc2:  /* false */\n        lua_pushboolean(L,0);\n        mp_cur_consume(c,1);\n        break;\n    case 0xca:  /* float */\n        mp_cur_need(c,5);\n        assert(sizeof(float) == 4);\n        {\n            float f;\n            memcpy(&f,c->p+1,4);\n            memrevifle(&f,4);\n            lua_pushnumber(L,f);\n            mp_cur_consume(c,5);\n        }\n        break;\n    case 0xcb:  /* double */\n        mp_cur_need(c,9);\n        assert(sizeof(double) == 8);\n        {\n            double d;\n            memcpy(&d,c->p+1,8);\n            memrevifle(&d,8);\n            lua_pushnumber(L,d);\n            mp_cur_consume(c,9);\n        }\n        break;\n    case 0xd9:  /* raw 8 */\n        mp_cur_need(c,2);\n        {\n            size_t l = c->p[1];\n            mp_cur_need(c,2+l);\n            lua_pushlstring(L,(char*)c->p+2,l);\n            mp_cur_consume(c,2+l);\n        }\n        break;\n    case 0xda:  /* raw 16 */\n        mp_cur_need(c,3);\n        {\n            size_t l = (c->p[1] << 8) | c->p[2];\n            mp_cur_need(c,3+l);\n            lua_pushlstring(L,(char*)c->p+3,l);\n            mp_cur_consume(c,3+l);\n        }\n        break;\n    case 0xdb:  /* raw 32 */\n        mp_cur_need(c,5);\n        {\n            size_t l = ((size_t)c->p[1] << 24) |\n                       ((size_t)c->p[2] << 16) |\n                       ((size_t)c->p[3] << 8) |\n                       (size_t)c->p[4];\n            mp_cur_consume(c,5);\n            mp_cur_need(c,l);\n            lua_pushlstring(L,(char*)c->p,l);\n            mp_cur_consume(c,l);\n        }\n        break;\n    case 0xdc:  /* array 16 */\n        mp_cur_need(c,3);\n        {\n            size_t l = (c->p[1] << 8) | c->p[2];\n            mp_cur_consume(c,3);\n            mp_decode_to_lua_array(L,c,l);\n        }\n        break;\n    case 0xdd:  /* array 32 */\n        mp_cur_need(c,5);\n        {\n            size_t l = ((size_t)c->p[1] << 24) |\n                       ((size_t)c->p[2] << 16) |\n                       ((size_t)c->p[3] << 8) |\n                       (size_t)c->p[4];\n            mp_cur_consume(c,5);\n            mp_decode_to_lua_array(L,c,l);\n        }\n        break;\n    case 0xde:  /* map 16 */\n        mp_cur_need(c,3);\n        {\n            size_t l = (c->p[1] << 8) | c->p[2];\n            mp_cur_consume(c,3);\n            mp_decode_to_lua_hash(L,c,l);\n        }\n        break;\n    case 0xdf:  /* map 32 */\n        mp_cur_need(c,5);\n        {\n            size_t l = ((size_t)c->p[1] << 24) |\n                       ((size_t)c->p[2] << 16) |\n                       ((size_t)c->p[3] << 8) |\n                       (size_t)c->p[4];\n            mp_cur_consume(c,5);\n            mp_decode_to_lua_hash(L,c,l);\n        }\n        break;\n    default:    /* types that can't be idenitified by first byte value. */\n        if ((c->p[0] & 0x80) == 0) {   /* positive fixnum */\n            lua_pushunsigned(L,c->p[0]);\n            mp_cur_consume(c,1);\n        } else if ((c->p[0] & 0xe0) == 0xe0) {  /* negative fixnum */\n            lua_pushinteger(L,(signed char)c->p[0]);\n            mp_cur_consume(c,1);\n        } else if ((c->p[0] & 0xe0) == 0xa0) {  /* fix raw */\n            size_t l = c->p[0] & 0x1f;\n            mp_cur_need(c,1+l);\n            lua_pushlstring(L,(char*)c->p+1,l);\n            mp_cur_consume(c,1+l);\n        } else if ((c->p[0] & 0xf0) == 0x90) {  /* fix map */\n            size_t l = c->p[0] & 0xf;\n            mp_cur_consume(c,1);\n            mp_decode_to_lua_array(L,c,l);\n        } else if ((c->p[0] & 0xf0) == 0x80) {  /* fix map */\n            size_t l = c->p[0] & 0xf;\n            mp_cur_consume(c,1);\n            mp_decode_to_lua_hash(L,c,l);\n        } else {\n            c->err = MP_CUR_ERROR_BADFMT;\n        }\n    }\n}\n\nint mp_unpack_full(lua_State *L, int limit, int offset) {\n    size_t len;\n    const char *s;\n    mp_cur c;\n    int cnt; /* Number of objects unpacked */\n    int decode_all = (!limit && !offset);\n\n    s = luaL_checklstring(L,1,&len); /* if no match, exits */\n\n    if (offset < 0 || limit < 0) /* requesting negative off or lim is invalid */\n        return luaL_error(L,\n            \"Invalid request to unpack with offset of %d and limit of %d.\",\n            offset, len);\n    else if (offset > len)\n        return luaL_error(L,\n            \"Start offset %d greater than input length %d.\", offset, len);\n\n    if (decode_all) limit = INT_MAX;\n\n    mp_cur_init(&c,(const unsigned char *)s+offset,len-offset);\n\n    /* We loop over the decode because this could be a stream\n     * of multiple top-level values serialized together */\n    for(cnt = 0; c.left > 0 && cnt < limit; cnt++) {\n        mp_decode_to_lua_type(L,&c);\n\n        if (c.err == MP_CUR_ERROR_EOF) {\n            return luaL_error(L,\"Missing bytes in input.\");\n        } else if (c.err == MP_CUR_ERROR_BADFMT) {\n            return luaL_error(L,\"Bad data format in input.\");\n        }\n    }\n\n    if (!decode_all) {\n        /* c->left is the remaining size of the input buffer.\n         * subtract the entire buffer size from the unprocessed size\n         * to get our next start offset */\n        int offset = len - c.left;\n\n        luaL_checkstack(L, 1, \"in function mp_unpack_full\");\n\n        /* Return offset -1 when we have have processed the entire buffer. */\n        lua_pushinteger(L, c.left == 0 ? -1 : offset);\n        /* Results are returned with the arg elements still\n         * in place. Lua takes care of only returning\n         * elements above the args for us.\n         * In this case, we have one arg on the stack\n         * for this function, so we insert our first return\n         * value at position 2. */\n        lua_insert(L, 2);\n        cnt += 1; /* increase return count by one to make room for offset */\n    }\n\n    return cnt;\n}\n\nint mp_unpack(lua_State *L) {\n    return mp_unpack_full(L, 0, 0);\n}\n\nint mp_unpack_one(lua_State *L) {\n    int offset = luaL_optinteger(L, 2, 0);\n    /* Variable pop because offset may not exist */\n    lua_pop(L, lua_gettop(L)-1);\n    return mp_unpack_full(L, 1, offset);\n}\n\nint mp_unpack_limit(lua_State *L) {\n    int limit = luaL_checkinteger(L, 2);\n    int offset = luaL_optinteger(L, 3, 0);\n    /* Variable pop because offset may not exist */\n    lua_pop(L, lua_gettop(L)-1);\n\n    return mp_unpack_full(L, limit, offset);\n}\n\nint mp_safe(lua_State *L) {\n    int argc, err, total_results;\n\n    argc = lua_gettop(L);\n\n    /* This adds our function to the bottom of the stack\n     * (the \"call this function\" position) */\n    lua_pushvalue(L, lua_upvalueindex(1));\n    lua_insert(L, 1);\n\n    err = lua_pcall(L, argc, LUA_MULTRET, 0);\n    total_results = lua_gettop(L);\n\n    if (!err) {\n        return total_results;\n    } else {\n        lua_pushnil(L);\n        lua_insert(L,-2);\n        return 2;\n    }\n}\n\n/* -------------------------------------------------------------------------- */\nconst struct luaL_Reg cmds[] = {\n    {\"pack\", mp_pack},\n    {\"unpack\", mp_unpack},\n    {\"unpack_one\", mp_unpack_one},\n    {\"unpack_limit\", mp_unpack_limit},\n    {0}\n};\n\nint luaopen_create(lua_State *L) {\n    int i;\n    /* Manually construct our module table instead of\n     * relying on _register or _newlib */\n    lua_newtable(L);\n\n    for (i = 0; i < (sizeof(cmds)/sizeof(*cmds) - 1); i++) {\n        lua_pushcfunction(L, cmds[i].func);\n        lua_setfield(L, -2, cmds[i].name);\n    }\n\n    /* Add metadata */\n    lua_pushliteral(L, LUACMSGPACK_NAME);\n    lua_setfield(L, -2, \"_NAME\");\n    lua_pushliteral(L, LUACMSGPACK_VERSION);\n    lua_setfield(L, -2, \"_VERSION\");\n    lua_pushliteral(L, LUACMSGPACK_COPYRIGHT);\n    lua_setfield(L, -2, \"_COPYRIGHT\");\n    lua_pushliteral(L, LUACMSGPACK_DESCRIPTION);\n    lua_setfield(L, -2, \"_DESCRIPTION\");\n    return 1;\n}\n\nLUALIB_API int luaopen_cmsgpack(lua_State *L) {\n    luaopen_create(L);\n\n    lua_pushvalue(L, -1);\n    lua_setglobal(L, LUACMSGPACK_NAME);\n\n    return 1;\n}\n\nLUALIB_API int luaopen_cmsgpack_safe(lua_State *L) {\n    int i;\n\n    luaopen_cmsgpack(L);\n\n    /* Wrap all functions in the safe handler */\n    for (i = 0; i < (sizeof(cmds)/sizeof(*cmds) - 1); i++) {\n        lua_getfield(L, -1, cmds[i].name);\n        lua_pushcclosure(L, mp_safe, 1);\n        lua_setfield(L, -2, cmds[i].name);\n    }\n\n#if LUA_VERSION_NUM < 502\n    /* Register name globally for 5.1 */\n    lua_pushvalue(L, -1);\n    lua_setglobal(L, LUACMSGPACK_SAFE_NAME);\n#endif\n\n    return 1;\n}\n\n/******************************************************************************\n* Copyright (C) 2012 Salvatore Sanfilippo.  All rights reserved.\n*\n* Permission is hereby granted, free of charge, to any person obtaining\n* a copy of this software and associated documentation files (the\n* \"Software\"), to deal in the Software without restriction, including\n* without limitation the rights to use, copy, modify, merge, publish,\n* distribute, sublicense, and/or sell copies of the Software, and to\n* permit persons to whom the Software is furnished to do so, subject to\n* the following conditions:\n*\n* The above copyright notice and this permission notice shall be\n* included in all copies or substantial portions of the Software.\n*\n* THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\n* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\n* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\n* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n******************************************************************************/"
  },
  {
    "path": "src/redis/lua/struct/lua_struct.c",
    "content": "/*\n** {======================================================\n** Library for packing/unpacking structures.\n** $Id: struct.c,v 1.7 2018/05/11 22:04:31 roberto Exp $\n** See Copyright Notice at the end of this file\n** =======================================================\n*/\n/*\n** Valid formats:\n** > - big endian\n** < - little endian\n** ![num] - alignment\n** x - pading\n** b/B - signed/unsigned byte\n** h/H - signed/unsigned short\n** l/L - signed/unsigned long\n** T   - size_t\n** i/In - signed/unsigned integer with size 'n' (default is size of int)\n** cn - sequence of 'n' chars (from/to a string); when packing, n==0 means\n        the whole string; when unpacking, n==0 means use the previous\n        read number as the string length\n** s - zero-terminated string\n** f - float\n** d - double\n** ' ' - ignored\n*/\n\n\n#include <assert.h>\n#include <ctype.h>\n#include <limits.h>\n#include <stddef.h>\n#include <string.h>\n\n\n#include \"lua.h\"\n#include \"lauxlib.h\"\n\n\n/* basic integer type */\n#if !defined(STRUCT_INT)\n#define STRUCT_INT\tlong\n#endif\n\ntypedef STRUCT_INT Inttype;\n\n/* corresponding unsigned version */\ntypedef unsigned STRUCT_INT Uinttype;\n\n\n/* maximum size (in bytes) for integral types */\n#define MAXINTSIZE\t32\n\n/* is 'x' a power of 2? */\n#define isp2(x)\t\t((x) > 0 && ((x) & ((x) - 1)) == 0)\n\n/* dummy structure to get alignment requirements */\nstruct cD {\n  char c;\n  double d;\n};\n\n\n#define PADDING\t\t(sizeof(struct cD) - sizeof(double))\n#define MAXALIGN  \t(PADDING > sizeof(int) ? PADDING : sizeof(int))\n\n\n/* endian options */\n#define BIG\t0\n#define LITTLE\t1\n\n\nstatic union {\n  int dummy;\n  char endian;\n} const native = {1};\n\n\ntypedef struct Header {\n  int endian;\n  int align;\n} Header;\n\n\nstatic int getnum (lua_State *L, const char **fmt, int df) {\n  if (!isdigit(**fmt))  /* no number? */\n    return df;  /* return default value */\n  else {\n    int a = 0;\n    do {\n      if (a > (INT_MAX / 10) || a * 10 > (INT_MAX - (**fmt - '0')))\n        luaL_error(L, \"integral size overflow\");\n      a = a*10 + *((*fmt)++) - '0';\n    } while (isdigit(**fmt));\n    return a;\n  }\n}\n\n#define defaultoptions(h)\t((h)->endian = native.endian, (h)->align = 1)\n\n\n\nstatic size_t optsize (lua_State *L, char opt, const char **fmt) {\n  switch (opt) {\n    case 'B': case 'b': return sizeof(char);\n    case 'H': case 'h': return sizeof(short);\n    case 'L': case 'l': return sizeof(long);\n    case 'T': return sizeof(size_t);\n    case 'f':  return sizeof(float);\n    case 'd':  return sizeof(double);\n    case 'x': return 1;\n    case 'c': return  getnum(L, fmt, 1);\n    case 'i': case 'I': {\n      int sz = getnum(L, fmt, sizeof(int));\n      if (sz > MAXINTSIZE)\n        luaL_error(L, \"integral size %d is larger than limit of %d\",\n                       sz, MAXINTSIZE);\n      return sz;\n    }\n    default: return 0;  /* other cases do not need alignment */\n  }\n}\n\n\n/*\n** return number of bytes needed to align an element of size 'size'\n** at current position 'len'\n*/\nstatic int gettoalign (size_t len, Header *h, int opt, size_t size) {\n  if (size == 0 || opt == 'c') return 0;\n  if (size > (size_t)h->align)\n    size = h->align;  /* respect max. alignment */\n  return (size - (len & (size - 1))) & (size - 1);\n}\n\n\n/*\n** options to control endianess and alignment\n*/\nstatic void controloptions (lua_State *L, int opt, const char **fmt,\n                            Header *h) {\n  switch (opt) {\n    case  ' ': return;  /* ignore white spaces */\n    case '>': h->endian = BIG; return;\n    case '<': h->endian = LITTLE; return;\n    case '!': {\n      int a = getnum(L, fmt, MAXALIGN);\n      if (!isp2(a))\n        luaL_error(L, \"alignment %d is not a power of 2\", a);\n      h->align = a;\n      return;\n    }\n    default: {\n      const char *msg = lua_pushfstring(L, \"invalid format option '%c'\", opt);\n      luaL_argerror(L, 1, msg);\n    }\n  }\n}\n\n\nstatic void putinteger (lua_State *L, luaL_Buffer *b, int arg, int endian,\n                        int size) {\n  lua_Number n = luaL_checknumber(L, arg);\n  Uinttype value;\n  char buff[MAXINTSIZE];\n  if (n < 0)\n    value = (Uinttype)(Inttype)n;\n  else\n    value = (Uinttype)n;\n  if (endian == LITTLE) {\n    int i;\n    for (i = 0; i < size; i++) {\n      buff[i] = (value & 0xff);\n      value >>= 8;\n    }\n  }\n  else {\n    int i;\n    for (i = size - 1; i >= 0; i--) {\n      buff[i] = (value & 0xff);\n      value >>= 8;\n    }\n  }\n  luaL_addlstring(b, buff, size);\n}\n\n\nstatic void correctbytes (char *b, int size, int endian) {\n  if (endian != native.endian) {\n    int i = 0;\n    while (i < --size) {\n      char temp = b[i];\n      b[i++] = b[size];\n      b[size] = temp;\n    }\n  }\n}\n\n\nstatic int b_pack (lua_State *L) {\n  luaL_Buffer b;\n  const char *fmt = luaL_checkstring(L, 1);\n  Header h;\n  int arg = 2;\n  size_t totalsize = 0;\n  defaultoptions(&h);\n  lua_pushnil(L);  /* mark to separate arguments from string buffer */\n  luaL_buffinit(L, &b);\n  while (*fmt != '\\0') {\n    int opt = *fmt++;\n    size_t size = optsize(L, opt, &fmt);\n    int toalign = gettoalign(totalsize, &h, opt, size);\n    totalsize += toalign;\n    while (toalign-- > 0) luaL_addchar(&b, '\\0');\n    switch (opt) {\n      case 'b': case 'B': case 'h': case 'H':\n      case 'l': case 'L': case 'T': case 'i': case 'I': {  /* integer types */\n        putinteger(L, &b, arg++, h.endian, size);\n        break;\n      }\n      case 'x': {\n        luaL_addchar(&b, '\\0');\n        break;\n      }\n      case 'f': {\n        float f = (float)luaL_checknumber(L, arg++);\n        correctbytes((char *)&f, size, h.endian);\n        luaL_addlstring(&b, (char *)&f, size);\n        break;\n      }\n      case 'd': {\n        double d = luaL_checknumber(L, arg++);\n        correctbytes((char *)&d, size, h.endian);\n        luaL_addlstring(&b, (char *)&d, size);\n        break;\n      }\n      case 'c': case 's': {\n        size_t l;\n        const char *s = luaL_checklstring(L, arg++, &l);\n        if (size == 0) size = l;\n        luaL_argcheck(L, l >= (size_t)size, arg, \"string too short\");\n        luaL_addlstring(&b, s, size);\n        if (opt == 's') {\n          luaL_addchar(&b, '\\0');  /* add zero at the end */\n          size++;\n        }\n        break;\n      }\n      default: controloptions(L, opt, &fmt, &h);\n    }\n    totalsize += size;\n  }\n  luaL_pushresult(&b);\n  return 1;\n}\n\n\nstatic lua_Number getinteger (const char *buff, int endian,\n                        int issigned, int size) {\n  Uinttype l = 0;\n  int i;\n  if (endian == BIG) {\n    for (i = 0; i < size; i++) {\n      l <<= 8;\n      l |= (Uinttype)(unsigned char)buff[i];\n    }\n  }\n  else {\n    for (i = size - 1; i >= 0; i--) {\n      l <<= 8;\n      l |= (Uinttype)(unsigned char)buff[i];\n    }\n  }\n  if (!issigned)\n    return (lua_Number)l;\n  else {  /* signed format */\n    Uinttype mask = (Uinttype)(~((Uinttype)0)) << (size*8 - 1);\n    if (l & mask)  /* negative value? */\n      l |= mask;  /* signal extension */\n    return (lua_Number)(Inttype)l;\n  }\n}\n\n\nstatic int b_unpack (lua_State *L) {\n  Header h;\n  const char *fmt = luaL_checkstring(L, 1);\n  size_t ld;\n  const char *data = luaL_checklstring(L, 2, &ld);\n  size_t pos = luaL_optinteger(L, 3, 1);\n  luaL_argcheck(L, pos > 0, 3, \"offset must be 1 or greater\");\n  pos--; /* Lua indexes are 1-based, but here we want 0-based for C\n          * pointer math. */\n  int n = 0;  /* number of results */\n  defaultoptions(&h);\n  while (*fmt) {\n    int opt = *fmt++;\n    size_t size = optsize(L, opt, &fmt);\n    pos += gettoalign(pos, &h, opt, size);\n    luaL_argcheck(L, size <= ld && pos <= ld - size,\n                   2, \"data string too short\");\n    /* stack space for item + next position */\n    luaL_checkstack(L, 2, \"too many results\");\n    switch (opt) {\n      case 'b': case 'B': case 'h': case 'H':\n      case 'l': case 'L': case 'T': case 'i':  case 'I': {  /* integer types */\n        int issigned = islower(opt);\n        lua_Number res = getinteger(data+pos, h.endian, issigned, size);\n        lua_pushnumber(L, res); n++;\n        break;\n      }\n      case 'x': {\n        break;\n      }\n      case 'f': {\n        float f;\n        memcpy(&f, data+pos, size);\n        correctbytes((char *)&f, sizeof(f), h.endian);\n        lua_pushnumber(L, f); n++;\n        break;\n      }\n      case 'd': {\n        double d;\n        memcpy(&d, data+pos, size);\n        correctbytes((char *)&d, sizeof(d), h.endian);\n        lua_pushnumber(L, d); n++;\n        break;\n      }\n      case 'c': {\n        if (size == 0) {\n          if (n == 0 || !lua_isnumber(L, -1))\n            luaL_error(L, \"format 'c0' needs a previous size\");\n          size = lua_tonumber(L, -1);\n          lua_pop(L, 1); n--;\n          luaL_argcheck(L, size <= ld && pos <= ld - size,\n                           2, \"data string too short\");\n        }\n        lua_pushlstring(L, data+pos, size); n++;\n        break;\n      }\n      case 's': {\n        const char *e = (const char *)memchr(data+pos, '\\0', ld - pos);\n        if (e == NULL)\n          luaL_error(L, \"unfinished string in data\");\n        size = (e - (data+pos)) + 1;\n        lua_pushlstring(L, data+pos, size - 1); n++;\n        break;\n      }\n      default: controloptions(L, opt, &fmt, &h);\n    }\n    pos += size;\n  }\n  lua_pushinteger(L, pos + 1);  /* next position */\n  return n + 1;\n}\n\n\nstatic int b_size (lua_State *L) {\n  Header h;\n  const char *fmt = luaL_checkstring(L, 1);\n  size_t pos = 0;\n  defaultoptions(&h);\n  while (*fmt) {\n    int opt = *fmt++;\n    size_t size = optsize(L, opt, &fmt);\n    pos += gettoalign(pos, &h, opt, size);\n    if (opt == 's')\n      luaL_argerror(L, 1, \"option 's' has no fixed size\");\n    else if (opt == 'c' && size == 0)\n      luaL_argerror(L, 1, \"option 'c0' has no fixed size\");\n    if (!isalnum(opt))\n      controloptions(L, opt, &fmt, &h);\n    pos += size;\n  }\n  lua_pushinteger(L, pos);\n  return 1;\n}\n\n/* }====================================================== */\n\n\n\nstatic const struct luaL_Reg thislib[] = {\n  {\"pack\", b_pack},\n  {\"unpack\", b_unpack},\n  {\"size\", b_size},\n  {NULL, NULL}\n};\n\n\nLUALIB_API int luaopen_struct (lua_State *L);\n\nLUALIB_API int luaopen_struct (lua_State *L) {\n  luaL_newlib(L, thislib);\n  lua_setglobal(L, \"struct\");\n  return 1;\n}\n\n\n/******************************************************************************\n* Copyright (C) 2010-2018 Lua.org, PUC-Rio.  All rights reserved.\n*\n* Permission is hereby granted, free of charge, to any person obtaining\n* a copy of this software and associated documentation files (the\n* \"Software\"), to deal in the Software without restriction, including\n* without limitation the rights to use, copy, modify, merge, publish,\n* distribute, sublicense, and/or sell copies of the Software, and to\n* permit persons to whom the Software is furnished to do so, subject to\n* the following conditions:\n*\n* The above copyright notice and this permission notice shall be\n* included in all copies or substantial portions of the Software.\n*\n* THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\n* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\n* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\n* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\n* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n******************************************************************************/\n"
  },
  {
    "path": "src/redis/lzf.h",
    "content": "/*\n * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>\n *\n * Redistribution and use in source and binary forms, with or without modifica-\n * tion, are permitted provided that the following conditions are met:\n *\n *   1.  Redistributions of source code must retain the above copyright notice,\n *       this list of conditions and the following disclaimer.\n *\n *   2.  Redistributions in binary form must reproduce the above copyright\n *       notice, this list of conditions and the following disclaimer in the\n *       documentation and/or other materials provided with the distribution.\n *\n * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED\n * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-\n * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO\n * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-\n * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\n * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;\n * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-\n * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED\n * OF THE POSSIBILITY OF SUCH DAMAGE.\n *\n * Alternatively, the contents of this file may be used under the terms of\n * the GNU General Public License (\"GPL\") version 2 or any later version,\n * in which case the provisions of the GPL are applicable instead of\n * the above. If you wish to allow the use of your version of this file\n * only under the terms of the GPL and not to allow others to use your\n * version of this file under the BSD license, indicate your decision\n * by deleting the provisions above and replace them with the notice\n * and other provisions required by the GPL. If you do not delete the\n * provisions above, a recipient may use your version of this file under\n * either the BSD or the GPL.\n */\n\n#ifndef LZF_H\n#define LZF_H\n\n/***********************************************************************\n**\n**\tlzf -- an extremely fast/free compression/decompression-method\n**\thttp://liblzf.plan9.de/\n**\n**\tThis algorithm is believed to be patent-free.\n**\n***********************************************************************/\n\n#define LZF_VERSION 0x0105 /* 1.5, API version */\n\n/*\n * Compress in_len bytes stored at the memory block starting at\n * in_data and write the result to out_data, up to a maximum length\n * of out_len bytes.\n *\n * If the output buffer is not large enough or any error occurs return 0,\n * otherwise return the number of bytes used, which might be considerably\n * more than in_len (but less than 104% of the original size), so it\n * makes sense to always use out_len == in_len - 1), to ensure _some_\n * compression, and store the data uncompressed otherwise (with a flag, of\n * course.\n *\n * lzf_compress might use different algorithms on different systems and\n * even different runs, thus might result in different compressed strings\n * depending on the phase of the moon or similar factors. However, all\n * these strings are architecture-independent and will result in the\n * original data when decompressed using lzf_decompress.\n *\n * The buffers must not be overlapping.\n *\n * If the option LZF_STATE_ARG is enabled, an extra argument must be\n * supplied which is not reflected in this header file. Refer to lzfP.h\n * and lzf_c.c.\n *\n */\nsize_t\nlzf_compress (const void *const in_data,  size_t in_len,\n              void             *out_data, size_t out_len\n#if LZF_STATE_ARG\n      , LZF_STATE htab\n#endif\n              );\n\n/*\n * Decompress data compressed with some version of the lzf_compress\n * function and stored at location in_data and length in_len. The result\n * will be stored at out_data up to a maximum of out_len characters.\n *\n * If the output buffer is not large enough to hold the decompressed\n * data, a 0 is returned and errno is set to E2BIG. Otherwise the number\n * of decompressed bytes (i.e. the original length of the data) is\n * returned.\n *\n * If an error in the compressed data is detected, a zero is returned and\n * errno is set to EINVAL.\n *\n * This function is very fast, about as fast as a copying loop.\n */\nsize_t\nlzf_decompress (const void *const in_data,  size_t in_len,\n                void             *out_data, size_t out_len);\n\n#endif\n\n"
  },
  {
    "path": "src/redis/lzfP.h",
    "content": "/*\n * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>\n *\n * Redistribution and use in source and binary forms, with or without modifica-\n * tion, are permitted provided that the following conditions are met:\n *\n *   1.  Redistributions of source code must retain the above copyright notice,\n *       this list of conditions and the following disclaimer.\n *\n *   2.  Redistributions in binary form must reproduce the above copyright\n *       notice, this list of conditions and the following disclaimer in the\n *       documentation and/or other materials provided with the distribution.\n *\n * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED\n * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-\n * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO\n * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-\n * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\n * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;\n * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-\n * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED\n * OF THE POSSIBILITY OF SUCH DAMAGE.\n *\n * Alternatively, the contents of this file may be used under the terms of\n * the GNU General Public License (\"GPL\") version 2 or any later version,\n * in which case the provisions of the GPL are applicable instead of\n * the above. If you wish to allow the use of your version of this file\n * only under the terms of the GPL and not to allow others to use your\n * version of this file under the BSD license, indicate your decision\n * by deleting the provisions above and replace them with the notice\n * and other provisions required by the GPL. If you do not delete the\n * provisions above, a recipient may use your version of this file under\n * either the BSD or the GPL.\n */\n\n#ifndef LZFP_h\n#define LZFP_h\n\n// ROMAN: #define STANDALONE 1 /* at the moment, this is ok. */\n\n/*  ROMAN: Moved below since it depends on LZF_STATE\n#ifndef STANDALONE\n# include \"lzf.h\"\n#endif\n\n*/\n\n/*\n * Size of hashtable is (1 << HLOG) * sizeof (char *)\n * decompression is independent of the hash table size\n * the difference between 15 and 14 is very small\n * for small blocks (and 14 is usually a bit faster).\n * For a low-memory/faster configuration, use HLOG == 13;\n * For best compression, use 15 or 16 (or more, up to 22).\n */\n#ifndef HLOG\n# define HLOG 16\n#endif\n\n/*\n * Sacrifice very little compression quality in favour of compression speed.\n * This gives almost the same compression as the default code, and is\n * (very roughly) 15% faster. This is the preferred mode of operation.\n */\n#ifndef VERY_FAST\n# define VERY_FAST 1\n#endif\n\n/*\n * Sacrifice some more compression quality in favour of compression speed.\n * (roughly 1-2% worse compression for large blocks and\n * 9-10% for small, redundant, blocks and >>20% better speed in both cases)\n * In short: when in need for speed, enable this for binary data,\n * possibly disable this for text data.\n */\n#ifndef ULTRA_FAST\n# define ULTRA_FAST 0\n#endif\n\n/*\n * Unconditionally aligning does not cost very much, so do it if unsure\n */\n#ifndef STRICT_ALIGN\n# if !(defined(__i386) || defined (__amd64))\n#  define STRICT_ALIGN 1\n# else\n#  define STRICT_ALIGN 0\n# endif\n#endif\n\n/*\n * You may choose to pre-set the hash table (might be faster on some\n * modern cpus and large (>>64k) blocks, and also makes compression\n * deterministic/repeatable when the configuration otherwise is the same).\n */\n#ifndef INIT_HTAB\n# define INIT_HTAB 0\n#endif\n\n/*\n * Avoid assigning values to errno variable? for some embedding purposes\n * (linux kernel for example), this is necessary. NOTE: this breaks\n * the documentation in lzf.h. Avoiding errno has no speed impact.\n */\n#ifndef AVOID_ERRNO\n# define AVOID_ERRNO 0\n#endif\n\n/*\n * Whether to pass the LZF_STATE variable as argument, or allocate it\n * on the stack. For small-stack environments, define this to 1.\n * NOTE: this breaks the prototype in lzf.h.\n */\n#ifndef LZF_STATE_ARG\n# define LZF_STATE_ARG 1   // ROMAN\n#endif\n\n/*\n * Whether to add extra checks for input validity in lzf_decompress\n * and return EINVAL if the input stream has been corrupted. This\n * only shields against overflowing the input buffer and will not\n * detect most corrupted streams.\n * This check is not normally noticeable on modern hardware\n * (<1% slowdown), but might slow down older cpus considerably.\n */\n#ifndef CHECK_INPUT\n# define CHECK_INPUT 1\n#endif\n\n/*\n * Whether to store pointers or offsets inside the hash table. On\n * 64 bit architectures, pointers take up twice as much space,\n * and might also be slower. Default is to autodetect.\n * Notice: Don't set this value to 1, it will result in 'LZF_HSLOT'\n * not being able to store offset above UINT32_MAX in 64bit. */\n#define LZF_USE_OFFSETS 0\n\n/*****************************************************************************/\n/* nothing should be changed below */\n\n#ifdef __cplusplus\n# include <cstring>\n# include <climits>\nusing namespace std;\n#else\n# include <string.h>\n# include <limits.h>\n#endif\n\n#ifndef LZF_USE_OFFSETS\n# if defined (WIN32)\n#  define LZF_USE_OFFSETS defined(_M_X64)\n# else\n#  if __cplusplus > 199711L\n#   include <cstdint>\n#  else\n#   include <stdint.h>\n#  endif\n#  define LZF_USE_OFFSETS (UINTPTR_MAX > 0xffffffffU)\n# endif\n#endif\n\ntypedef unsigned char u8;\n\n#if LZF_USE_OFFSETS\n# define LZF_HSLOT_BIAS ((const u8 *)in_data)\n  typedef unsigned int LZF_HSLOT;\n#else\n# define LZF_HSLOT_BIAS 0\n  typedef const u8 *LZF_HSLOT;\n#endif\n\ntypedef LZF_HSLOT LZF_STATE[1 << (HLOG)];\n\n// ROMAN: moved here deliberately because we depend on LZF_STATE.\n#ifndef STANDALONE\n# include \"lzf.h\"\n#endif\n\n#if !STRICT_ALIGN\n/* for unaligned accesses we need a 16 bit datatype. */\n# if USHRT_MAX == 65535\n    typedef unsigned short u16;\n# elif UINT_MAX == 65535\n    typedef unsigned int u16;\n# else\n#  undef STRICT_ALIGN\n#  define STRICT_ALIGN 1\n# endif\n#endif\n\n#if ULTRA_FAST\n# undef VERY_FAST\n#endif\n\n#endif\n\n"
  },
  {
    "path": "src/redis/lzf_c.c",
    "content": "/*\n * Copyright (c) 2000-2010 Marc Alexander Lehmann <schmorp@schmorp.de>\n *\n * Redistribution and use in source and binary forms, with or without modifica-\n * tion, are permitted provided that the following conditions are met:\n *\n *   1.  Redistributions of source code must retain the above copyright notice,\n *       this list of conditions and the following disclaimer.\n *\n *   2.  Redistributions in binary form must reproduce the above copyright\n *       notice, this list of conditions and the following disclaimer in the\n *       documentation and/or other materials provided with the distribution.\n *\n * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED\n * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-\n * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO\n * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-\n * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\n * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;\n * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-\n * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED\n * OF THE POSSIBILITY OF SUCH DAMAGE.\n *\n * Alternatively, the contents of this file may be used under the terms of\n * the GNU General Public License (\"GPL\") version 2 or any later version,\n * in which case the provisions of the GPL are applicable instead of\n * the above. If you wish to allow the use of your version of this file\n * only under the terms of the GPL and not to allow others to use your\n * version of this file under the BSD license, indicate your decision\n * by deleting the provisions above and replace them with the notice\n * and other provisions required by the GPL. If you do not delete the\n * provisions above, a recipient may use your version of this file under\n * either the BSD or the GPL.\n */\n\n#include \"lzfP.h\"\n\n#define HSIZE (1 << (HLOG))\n\n/*\n * don't play with this unless you benchmark!\n * the data format is not dependent on the hash function.\n * the hash function might seem strange, just believe me,\n * it works ;)\n */\n#ifndef FRST\n# define FRST(p) (((p[0]) << 8) | p[1])\n# define NEXT(v,p) (((v) << 8) | p[2])\n# if ULTRA_FAST\n#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h  ) & (HSIZE - 1))\n# elif VERY_FAST\n#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))\n# else\n#  define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))\n# endif\n#endif\n/*\n * IDX works because it is very similar to a multiplicative hash, e.g.\n * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))\n * the latter is also quite fast on newer CPUs, and compresses similarly.\n *\n * the next one is also quite good, albeit slow ;)\n * (int)(cos(h & 0xffffff) * 1e6)\n */\n\n#if 0\n/* original lzv-like hash function, much worse and thus slower */\n# define FRST(p) (p[0] << 5) ^ p[1]\n# define NEXT(v,p) ((v) << 5) ^ p[2]\n# define IDX(h) ((h) & (HSIZE - 1))\n#endif\n\n#define        MAX_LIT        (1 <<  5)\n#define        MAX_OFF        (1 << 13)\n#define        MAX_REF        ((1 << 8) + (1 << 3))\n\n#if __GNUC__ >= 3\n# define expect(expr,value)         __builtin_expect ((expr),(value))\n# define inline                     inline\n#else\n# define expect(expr,value)         (expr)\n# define inline                     static\n#endif\n\n#define expect_false(expr) expect ((expr) != 0, 0)\n#define expect_true(expr)  expect ((expr) != 0, 1)\n\n#if defined(__has_attribute)\n# if __has_attribute(no_sanitize)\n#  define NO_SANITIZE(sanitizer) __attribute__((no_sanitize(sanitizer)))\n# endif\n#endif\n\n#if !defined(NO_SANITIZE)\n# define NO_SANITIZE(sanitizer)\n#endif\n\n/*\n * compressed format\n *\n * 000LLLLL <L+1>    ; literal, L+1=1..33 octets\n * LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset\n * 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset\n *\n */\nNO_SANITIZE(\"alignment\")\nsize_t\nlzf_compress (const void *const in_data, size_t in_len,\n\t      void *out_data, size_t out_len\n#if LZF_STATE_ARG\n              , LZF_STATE htab\n#endif\n              )\n{\n#if !LZF_STATE_ARG\n  LZF_STATE htab;\n#endif\n  const u8 *ip = (const u8 *)in_data;\n        u8 *op = (u8 *)out_data;\n  const u8 *in_end  = ip + in_len;\n        u8 *out_end = op + out_len;\n  const u8 *ref;\n\n  /* off requires a type wide enough to hold a general pointer difference.\n   * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only\n   * works for differences within a single object). We also assume that no\n   * no bit pattern traps. Since the only platform that is both non-POSIX\n   * and fails to support both assumptions is windows 64 bit, we make a\n   * special workaround for it.\n   */\n#if defined (WIN32) && defined (_M_X64)\n  unsigned _int64 off; /* workaround for missing POSIX compliance */\n#else\n  size_t off;\n#endif\n  unsigned int hval;\n  int lit;\n\n  if (!in_len || !out_len)\n    return 0;\n\n#if INIT_HTAB\n  memset (htab, 0, sizeof (htab));\n#endif\n\n  lit = 0; op++; /* start run */\n\n  hval = FRST (ip);\n  while (ip < in_end - 2)\n    {\n      LZF_HSLOT *hslot;\n\n      hval = NEXT (hval, ip);\n      hslot = htab + IDX (hval);\n      ref = *hslot ? (*hslot + LZF_HSLOT_BIAS) : NULL; /* avoid applying zero offset to null pointer */\n      *hslot = ip - LZF_HSLOT_BIAS;\n\n      if (1\n#if INIT_HTAB\n          && ref < ip /* the next test will actually take care of this, but this is faster */\n#endif\n          && (off = ip - ref - 1) < MAX_OFF\n          && ref > (u8 *)in_data\n          && ref[2] == ip[2]\n#if STRICT_ALIGN\n          && ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0])\n#else\n          && *(u16 *)ref == *(u16 *)ip\n#endif\n        )\n        {\n          /* match found at *ref++ */\n          unsigned int len = 2;\n          size_t maxlen = in_end - ip - len;\n          maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;\n\n          if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */\n            if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */\n              return 0;\n\n          op [- lit - 1] = lit - 1; /* stop run */\n          op -= !lit; /* undo run if length is zero */\n\n          for (;;)\n            {\n              if (expect_true (maxlen > 16))\n                {\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n                  len++; if (ref [len] != ip [len]) break;\n                }\n\n              do\n                len++;\n              while (len < maxlen && ref[len] == ip[len]);\n\n              break;\n            }\n\n          len -= 2; /* len is now #octets - 1 */\n          ip++;\n\n          if (len < 7)\n            {\n              *op++ = (off >> 8) + (len << 5);\n            }\n          else\n            {\n              *op++ = (off >> 8) + (  7 << 5);\n              *op++ = len - 7;\n            }\n\n          *op++ = off;\n\n          lit = 0; op++; /* start run */\n\n          ip += len + 1;\n\n          if (expect_false (ip >= in_end - 2))\n            break;\n\n#if ULTRA_FAST || VERY_FAST\n          --ip;\n# if VERY_FAST && !ULTRA_FAST\n          --ip;\n# endif\n          hval = FRST (ip);\n\n          hval = NEXT (hval, ip);\n          htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;\n          ip++;\n\n# if VERY_FAST && !ULTRA_FAST\n          hval = NEXT (hval, ip);\n          htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;\n          ip++;\n# endif\n#else\n          ip -= len + 1;\n\n          do\n            {\n              hval = NEXT (hval, ip);\n              htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;\n              ip++;\n            }\n          while (len--);\n#endif\n        }\n      else\n        {\n          /* one more literal byte we must copy */\n          if (expect_false (op >= out_end))\n            return 0;\n\n          lit++; *op++ = *ip++;\n\n          if (expect_false (lit == MAX_LIT))\n            {\n              op [- lit - 1] = lit - 1; /* stop run */\n              lit = 0; op++; /* start run */\n            }\n        }\n    }\n\n  if (op + 3 > out_end) /* at most 3 bytes can be missing here */\n    return 0;\n\n  while (ip < in_end)\n    {\n      lit++; *op++ = *ip++;\n\n      if (expect_false (lit == MAX_LIT))\n        {\n          op [- lit - 1] = lit - 1; /* stop run */\n          lit = 0; op++; /* start run */\n        }\n    }\n\n  op [- lit - 1] = lit - 1; /* end run */\n  op -= !lit; /* undo run if length is zero */\n\n  return op - (u8 *)out_data;\n}\n\n"
  },
  {
    "path": "src/redis/lzf_d.c",
    "content": "/*\n * Copyright (c) 2000-2010 Marc Alexander Lehmann <schmorp@schmorp.de>\n *\n * Redistribution and use in source and binary forms, with or without modifica-\n * tion, are permitted provided that the following conditions are met:\n *\n *   1.  Redistributions of source code must retain the above copyright notice,\n *       this list of conditions and the following disclaimer.\n *\n *   2.  Redistributions in binary form must reproduce the above copyright\n *       notice, this list of conditions and the following disclaimer in the\n *       documentation and/or other materials provided with the distribution.\n *\n * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED\n * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-\n * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO\n * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-\n * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\n * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;\n * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-\n * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED\n * OF THE POSSIBILITY OF SUCH DAMAGE.\n *\n * Alternatively, the contents of this file may be used under the terms of\n * the GNU General Public License (\"GPL\") version 2 or any later version,\n * in which case the provisions of the GPL are applicable instead of\n * the above. If you wish to allow the use of your version of this file\n * only under the terms of the GPL and not to allow others to use your\n * version of this file under the BSD license, indicate your decision\n * by deleting the provisions above and replace them with the notice\n * and other provisions required by the GPL. If you do not delete the\n * provisions above, a recipient may use your version of this file under\n * either the BSD or the GPL.\n */\n\n#include \"lzfP.h\"\n\n#if AVOID_ERRNO\n# define SET_ERRNO(n)\n#else\n# include <errno.h>\n# define SET_ERRNO(n) errno = (n)\n#endif\n\n#if USE_REP_MOVSB /* small win on amd, big loss on intel */\n#if (__i386 || __amd64) && __GNUC__ >= 3\n# define lzf_movsb(dst, src, len)                \\\n   asm (\"rep movsb\"                              \\\n        : \"=D\" (dst), \"=S\" (src), \"=c\" (len)     \\\n        :  \"0\" (dst),  \"1\" (src),  \"2\" (len));\n#endif\n#endif\n\n#if defined(__GNUC__) && __GNUC__ >= 7\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wimplicit-fallthrough\"\n#endif\nsize_t\nlzf_decompress (const void *const in_data,  size_t in_len,\n                void             *out_data, size_t out_len)\n{\n  u8 const *ip = (const u8 *)in_data;\n  u8       *op = (u8 *)out_data;\n  u8 const *const in_end  = ip + in_len;\n  u8       *const out_end = op + out_len;\n\n  while (ip < in_end)\n    {\n      unsigned int ctrl;\n      ctrl = *ip++;\n\n      if (ctrl < (1 << 5)) /* literal run */\n        {\n          ctrl++;\n\n          if (op + ctrl > out_end)\n            {\n              SET_ERRNO (E2BIG);\n              return 0;\n            }\n\n#if CHECK_INPUT\n          if (ip + ctrl > in_end)\n            {\n              SET_ERRNO (EINVAL);\n              return 0;\n            }\n#endif\n\n#ifdef lzf_movsb\n          lzf_movsb (op, ip, ctrl);\n#else\n          switch (ctrl)\n            {\n              case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++;\n              case 28: *op++ = *ip++; case 27: *op++ = *ip++; case 26: *op++ = *ip++; case 25: *op++ = *ip++;\n              case 24: *op++ = *ip++; case 23: *op++ = *ip++; case 22: *op++ = *ip++; case 21: *op++ = *ip++;\n              case 20: *op++ = *ip++; case 19: *op++ = *ip++; case 18: *op++ = *ip++; case 17: *op++ = *ip++;\n              case 16: *op++ = *ip++; case 15: *op++ = *ip++; case 14: *op++ = *ip++; case 13: *op++ = *ip++;\n              case 12: *op++ = *ip++; case 11: *op++ = *ip++; case 10: *op++ = *ip++; case  9: *op++ = *ip++;\n              case  8: *op++ = *ip++; case  7: *op++ = *ip++; case  6: *op++ = *ip++; case  5: *op++ = *ip++;\n              case  4: *op++ = *ip++; case  3: *op++ = *ip++; case  2: *op++ = *ip++; case  1: *op++ = *ip++;\n            }\n#endif\n        }\n      else /* back reference */\n        {\n          unsigned int len = ctrl >> 5;\n\n          u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;\n\n#if CHECK_INPUT\n          if (ip >= in_end)\n            {\n              SET_ERRNO (EINVAL);\n              return 0;\n            }\n#endif\n          if (len == 7)\n            {\n              len += *ip++;\n#if CHECK_INPUT\n              if (ip >= in_end)\n                {\n                  SET_ERRNO (EINVAL);\n                  return 0;\n                }\n#endif\n            }\n\n          ref -= *ip++;\n\n          if (op + len + 2 > out_end)\n            {\n              SET_ERRNO (E2BIG);\n              return 0;\n            }\n\n          if (ref < (u8 *)out_data)\n            {\n              SET_ERRNO (EINVAL);\n              return 0;\n            }\n\n#ifdef lzf_movsb\n          len += 2;\n          lzf_movsb (op, ref, len);\n#else\n          switch (len)\n            {\n              default:\n                len += 2;\n\n                if (op >= ref + len)\n                  {\n                    /* disjunct areas */\n                    memcpy (op, ref, len);\n                    op += len;\n                  }\n                else\n                  {\n                    /* overlapping, use octte by octte copying */\n                    do\n                      *op++ = *ref++;\n                    while (--len);\n                  }\n\n                break;\n\n              case 9: *op++ = *ref++; /* fall-thru */\n              case 8: *op++ = *ref++; /* fall-thru */\n              case 7: *op++ = *ref++; /* fall-thru */\n              case 6: *op++ = *ref++; /* fall-thru */\n              case 5: *op++ = *ref++; /* fall-thru */\n              case 4: *op++ = *ref++; /* fall-thru */\n              case 3: *op++ = *ref++; /* fall-thru */\n              case 2: *op++ = *ref++; /* fall-thru */\n              case 1: *op++ = *ref++; /* fall-thru */\n              case 0: *op++ = *ref++; /* two octets more */\n                      *op++ = *ref++; /* fall-thru */\n            }\n#endif\n        }\n    }\n\n  return op - (u8 *)out_data;\n}\n#if defined(__GNUC__) && __GNUC__ >= 5\n#pragma GCC diagnostic pop\n#endif\n"
  },
  {
    "path": "src/redis/rax.c",
    "content": "/* Rax -- A radix tree implementation.\n *\n * Version 1.2 -- 7 February 2019\n *\n * Copyright (c) 2017-2019, Redis Ltd.\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <stdlib.h>\n#include <string.h>\n#include <assert.h>\n#include <stdio.h>\n#include <errno.h>\n#include <math.h>\n#include \"rax.h\"\n\n\n#ifndef RAX_MALLOC_INCLUDE\n#define RAX_MALLOC_INCLUDE \"rax_malloc.h\"\n#endif\n\n#include RAX_MALLOC_INCLUDE\n\n/* -------------------------------- Debugging ------------------------------ */\n\nvoid raxDebugShowNode(const char *msg, raxNode *n);\n\n/* Turn debugging messages on/off by compiling with RAX_DEBUG_MSG macro on.\n * When RAX_DEBUG_MSG is defined by default Rax operations will emit a lot\n * of debugging info to the standard output, however you can still turn\n * debugging on/off in order to enable it only when you suspect there is an\n * operation causing a bug using the function raxSetDebugMsg(). */\n#ifdef RAX_DEBUG_MSG\n#define debugf(...)                                          \\\n    if (raxDebugMsg) {                                       \\\n        printf(\"%s:%s:%d:\\t\", __FILE__, __func__, __LINE__); \\\n        printf(__VA_ARGS__);                                 \\\n        fflush(stdout);                                      \\\n    }\n\n#define debugnode(msg, n) raxDebugShowNode(msg, n)\n#else\n#define debugf(...)\n#define debugnode(msg, n)\n#endif\n\n/* By default log debug info if RAX_DEBUG_MSG is defined. */\nstatic int raxDebugMsg = 1;\n\n/* When debug messages are enabled, turn them on/off dynamically. By\n * default they are enabled. Set the state to 0 to disable, and 1 to\n * re-enable. */\nvoid raxSetDebugMsg(int onoff) {\n    raxDebugMsg = onoff;\n}\n\n/* ------------------------- raxStack functions --------------------------\n * The raxStack is a simple stack of pointers that is capable of switching\n * from using a stack-allocated array to dynamic heap once a given number of\n * items are reached. It is used in order to retain the list of parent nodes\n * while walking the radix tree in order to implement certain operations that\n * need to navigate the tree upward.\n * ------------------------------------------------------------------------- */\n\n/* Initialize the stack. */\nstatic inline void raxStackInit(raxStack *ts) {\n    ts->stack = ts->static_items;\n    ts->items = 0;\n    ts->maxitems = RAX_STACK_STATIC_ITEMS;\n    ts->oom = 0;\n}\n\n/* Push an item into the stack, returns 1 on success, 0 on out of memory. */\nstatic inline int raxStackPush(raxStack *ts, void *ptr) {\n    if (ts->items == ts->maxitems) {\n        if (ts->stack == ts->static_items) {\n            ts->stack = rax_malloc(sizeof(void *) * ts->maxitems * 2);\n            if (ts->stack == NULL) {\n                ts->stack = ts->static_items;\n                ts->oom = 1;\n                errno = ENOMEM;\n                return 0;\n            }\n            memcpy(ts->stack, ts->static_items, sizeof(void *) * ts->maxitems);\n        } else {\n            void **newalloc = rax_realloc(ts->stack, sizeof(void *) * ts->maxitems * 2);\n            if (newalloc == NULL) {\n                ts->oom = 1;\n                errno = ENOMEM;\n                return 0;\n            }\n            ts->stack = newalloc;\n        }\n        ts->maxitems *= 2;\n    }\n    ts->stack[ts->items] = ptr;\n    ts->items++;\n    return 1;\n}\n\n/* Pop an item from the stack, the function returns NULL if there are no\n * items to pop. */\nstatic inline void *raxStackPop(raxStack *ts) {\n    if (ts->items == 0) return NULL;\n    ts->items--;\n    return ts->stack[ts->items];\n}\n\n/* Return the stack item at the top of the stack without actually consuming\n * it. */\nstatic inline void *raxStackPeek(raxStack *ts) {\n    if (ts->items == 0) return NULL;\n    return ts->stack[ts->items - 1];\n}\n\n/* Free the stack in case we used heap allocation. */\nstatic inline void raxStackFree(raxStack *ts) {\n    if (ts->stack != ts->static_items) rax_free(ts->stack);\n}\n\n/* ----------------------------------------------------------------------------\n * Radix tree implementation\n * --------------------------------------------------------------------------*/\n\n/* Return the padding needed in the characters section of a node having size\n * 'nodesize'. The padding is needed to store the child pointers to aligned\n * addresses. Note that we add 4 to the node size because the node has a four\n * bytes header. */\n#define raxPadding(nodesize) ((sizeof(void *) - (((nodesize) + 4) % sizeof(void *))) & (sizeof(void *) - 1))\n\n/* Return the pointer to the last child pointer in a node. For the compressed\n * nodes this is the only child pointer. */\n#define raxNodeLastChildPtr(n)                                                  \\\n    ((raxNode **)(((char *)(n)) + raxNodeCurrentLength(n) - sizeof(raxNode *) - \\\n                  (((n)->iskey && !(n)->isnull) ? sizeof(void *) : 0)))\n\n/* Return the pointer to the first child pointer. */\n#define raxNodeFirstChildPtr(n) ((raxNode **)((n)->data + (n)->size + raxPadding((n)->size)))\n\n/* Return the current total size of the node. Note that the second line\n * computes the padding after the string of characters, needed in order to\n * save pointers to aligned addresses. */\n#define raxNodeCurrentLength(n)                                           \\\n    (sizeof(raxNode) + (n)->size + raxPadding((n)->size) +                \\\n     ((n)->iscompr ? sizeof(raxNode *) : sizeof(raxNode *) * (n)->size) + \\\n     (((n)->iskey && !(n)->isnull) * sizeof(void *)))\n\n/* Allocate a new non compressed node with the specified number of children.\n * If datafield is true, the allocation is made large enough to hold the\n * associated data pointer.\n * Returns the new node pointer. On out of memory NULL is returned. */\nraxNode *raxNewNode(size_t children, int datafield) {\n    size_t nodesize = sizeof(raxNode) + children + raxPadding(children) + sizeof(raxNode *) * children;\n    if (datafield) nodesize += sizeof(void *);\n    raxNode *node = rax_malloc(nodesize);\n    if (node == NULL) return NULL;\n    node->iskey = 0;\n    node->isnull = 0;\n    node->iscompr = 0;\n    node->size = children;\n    return node;\n}\n\n/* Allocate a new rax and return its pointer. On out of memory the function\n * returns NULL. */\nrax *raxNew(void) {\n    rax *rax = rax_malloc(sizeof(*rax));\n    if (rax == NULL) return NULL;\n    rax->numele = 0;\n    rax->numnodes = 1;\n    rax->head = raxNewNode(0, 0);\n    if (rax->head == NULL) {\n        rax_free(rax);\n        return NULL;\n    } else {\n        rax->alloc_size = rax_ptr_alloc_size(rax) + rax_ptr_alloc_size(rax->head);\n        return rax;\n    }\n}\n\n/* realloc the node to make room for auxiliary data in order\n * to store an item in that node. On out of memory NULL is returned. */\nraxNode *raxReallocForData(raxNode *n, void *data) {\n    if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */\n    size_t curlen = raxNodeCurrentLength(n);\n    return rax_realloc(n, curlen + sizeof(void *));\n}\n\n/* Set the node auxiliary data to the specified pointer. */\nvoid raxSetData(raxNode *n, void *data) {\n    n->iskey = 1;\n    if (data != NULL) {\n        n->isnull = 0;\n        void **ndata = (void **)((char *)n + raxNodeCurrentLength(n) - sizeof(void *));\n        memcpy(ndata, &data, sizeof(data));\n    } else {\n        n->isnull = 1;\n    }\n}\n\n/* Get the node auxiliary data. */\nvoid *raxGetData(raxNode *n) {\n    if (n->isnull) return NULL;\n    void **ndata = (void **)((char *)n + raxNodeCurrentLength(n) - sizeof(void *));\n    void *data;\n    memcpy(&data, ndata, sizeof(data));\n    return data;\n}\n\n/* Add a new child to the node 'n' representing the character 'c' and return\n * its new pointer, as well as the child pointer by reference. Additionally\n * '***parentlink' is populated with the raxNode pointer-to-pointer of where\n * the new child was stored, which is useful for the caller to replace the\n * child pointer if it gets reallocated.\n *\n * On success the new parent node pointer is returned (it may change because\n * of the realloc, so the caller should discard 'n' and use the new value).\n * On out of memory NULL is returned, and the old node is still valid. */\nraxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) {\n    assert(n->iscompr == 0);\n\n    size_t curlen = raxNodeCurrentLength(n);\n    n->size++;\n    size_t newlen = raxNodeCurrentLength(n);\n    n->size--; /* For now restore the original size. We'll update it only on\n                  success at the end. */\n\n    /* Alloc the new child we will link to 'n'. */\n    raxNode *child = raxNewNode(0, 0);\n    if (child == NULL) return NULL;\n\n    /* Make space in the original node. */\n    raxNode *newn = rax_realloc(n, newlen);\n    if (newn == NULL) {\n        rax_free(child);\n        return NULL;\n    }\n    n = newn;\n\n    /* After the reallocation, we have up to 8/16 (depending on the system\n     * pointer size, and the required node padding) bytes at the end, that is,\n     * the additional char in the 'data' section, plus one pointer to the new\n     * child, plus the padding needed in order to store addresses into aligned\n     * locations.\n     *\n     * So if we start with the following node, having \"abde\" edges.\n     *\n     * Note:\n     * - We assume 4 bytes pointer for simplicity.\n     * - Each space below corresponds to one byte\n     *\n     * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|\n     *\n     * After the reallocation we need: 1 byte for the new edge character\n     * plus 4 bytes for a new child pointer (assuming 32 bit machine).\n     * However after adding 1 byte to the edge char, the header + the edge\n     * characters are no longer aligned, so we also need 3 bytes of padding.\n     * In total the reallocation will add 1+4+3 bytes = 8 bytes:\n     *\n     * (Blank bytes are represented by \".\")\n     *\n     * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|[....][....]\n     *\n     * Let's find where to insert the new child in order to make sure\n     * it is inserted in-place lexicographically. Assuming we are adding\n     * a child \"c\" in our case pos will be = 2 after the end of the following\n     * loop. */\n    int pos;\n    for (pos = 0; pos < n->size; pos++) {\n        if (n->data[pos] > c) break;\n    }\n\n    /* Now, if present, move auxiliary data pointer at the end\n     * so that we can mess with the other data without overwriting it.\n     * We will obtain something like that:\n     *\n     * [HDR*][abde][Aptr][Bptr][Dptr][Eptr][....][....]|AUXP|\n     */\n    unsigned char *src, *dst;\n    if (n->iskey && !n->isnull) {\n        src = ((unsigned char *)n + curlen - sizeof(void *));\n        dst = ((unsigned char *)n + newlen - sizeof(void *));\n        memmove(dst, src, sizeof(void *));\n    }\n\n    /* Compute the \"shift\", that is, how many bytes we need to move the\n     * pointers section forward because of the addition of the new child\n     * byte in the string section. Note that if we had no padding, that\n     * would be always \"1\", since we are adding a single byte in the string\n     * section of the node (where now there is \"abde\" basically).\n     *\n     * However we have padding, so it could be zero, or up to 8.\n     *\n     * Another way to think at the shift is, how many bytes we need to\n     * move child pointers forward *other than* the obvious sizeof(void*)\n     * needed for the additional pointer itself. */\n    size_t shift = newlen - curlen - sizeof(void *);\n\n    /* We said we are adding a node with edge 'c'. The insertion\n     * point is between 'b' and 'd', so the 'pos' variable value is\n     * the index of the first child pointer that we need to move forward\n     * to make space for our new pointer.\n     *\n     * To start, move all the child pointers after the insertion point\n     * of shift+sizeof(pointer) bytes on the right, to obtain:\n     *\n     * [HDR*][abde][Aptr][Bptr][....][....][Dptr][Eptr]|AUXP|\n     */\n    src = n->data + n->size + raxPadding(n->size) + sizeof(raxNode *) * pos;\n    memmove(src + shift + sizeof(raxNode *), src, sizeof(raxNode *) * (n->size - pos));\n\n    /* Move the pointers to the left of the insertion position as well. Often\n     * we don't need to do anything if there was already some padding to use. In\n     * that case the final destination of the pointers will be the same, however\n     * in our example there was no pre-existing padding, so we added one byte\n     * plus three bytes of padding. After the next memmove() things will look\n     * like that:\n     *\n     * [HDR*][abde][....][Aptr][Bptr][....][Dptr][Eptr]|AUXP|\n     */\n    if (shift) {\n        src = (unsigned char *)raxNodeFirstChildPtr(n);\n        memmove(src + shift, src, sizeof(raxNode *) * pos);\n    }\n\n    /* Now make the space for the additional char in the data section,\n     * but also move the pointers before the insertion point to the right\n     * by shift bytes, in order to obtain the following:\n     *\n     * [HDR*][ab.d][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|\n     */\n    src = n->data + pos;\n    memmove(src + 1, src, n->size - pos);\n\n    /* We can now set the character and its child node pointer to get:\n     *\n     * [HDR*][abcd][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|\n     * [HDR*][abcd][e...][Aptr][Bptr][Cptr][Dptr][Eptr]|AUXP|\n     */\n    n->data[pos] = c;\n    n->size++;\n    src = (unsigned char *)raxNodeFirstChildPtr(n);\n    raxNode **childfield = (raxNode **)(src + sizeof(raxNode *) * pos);\n    memcpy(childfield, &child, sizeof(child));\n    *childptr = child;\n    *parentlink = childfield;\n    return n;\n}\n\n/* Turn the node 'n', that must be a node without any children, into a\n * compressed node representing a set of nodes linked one after the other\n * and having exactly one child each. The node can be a key or not: this\n * property and the associated value if any will be preserved.\n *\n * The function also returns a child node, since the last node of the\n * compressed chain cannot be part of the chain: it has zero children while\n * we can only compress inner nodes with exactly one child each. */\nraxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) {\n    assert(n->size == 0 && n->iscompr == 0);\n    void *data = NULL; /* Initialized only to avoid warnings. */\n    size_t newsize;\n\n    debugf(\"Compress node: %.*s\\n\", (int)len, s);\n\n    /* Allocate the child to link to this node. */\n    *child = raxNewNode(0, 0);\n    if (*child == NULL) return NULL;\n\n    /* Make space in the parent node. */\n    newsize = sizeof(raxNode) + len + raxPadding(len) + sizeof(raxNode *);\n    if (n->iskey) {\n        data = raxGetData(n); /* To restore it later. */\n        if (!n->isnull) newsize += sizeof(void *);\n    }\n    raxNode *newn = rax_realloc(n, newsize);\n    if (newn == NULL) {\n        rax_free(*child);\n        return NULL;\n    }\n    n = newn;\n\n    n->iscompr = 1;\n    n->size = len;\n    memcpy(n->data, s, len);\n    if (n->iskey) raxSetData(n, data);\n    raxNode **childfield = raxNodeLastChildPtr(n);\n    memcpy(childfield, child, sizeof(*child));\n    return n;\n}\n\n/* Low level function that walks the tree looking for the string\n * 's' of 'len' bytes. The function returns the number of characters\n * of the key that was possible to process: if the returned integer\n * is the same as 'len', then it means that the node corresponding to the\n * string was found (however it may not be a key in case the node->iskey is\n * zero or if simply we stopped in the middle of a compressed node, so that\n * 'splitpos' is non zero).\n *\n * Otherwise if the returned integer is not the same as 'len', there was an\n * early stop during the tree walk because of a character mismatch.\n *\n * The node where the search ended (because the full string was processed\n * or because there was an early stop) is returned by reference as\n * '*stopnode' if the passed pointer is not NULL. This node link in the\n * parent's node is returned as '*plink' if not NULL. Finally, if the\n * search stopped in a compressed node, '*splitpos' returns the index\n * inside the compressed node where the search ended. This is useful to\n * know where to split the node for insertion.\n *\n * Note that when we stop in the middle of a compressed node with\n * a perfect match, this function will return a length equal to the\n * 'len' argument (all the key matched), and will return a *splitpos which is\n * always positive (that will represent the index of the character immediately\n * *after* the last match in the current compressed node).\n *\n * When instead we stop at a compressed node and *splitpos is zero, it\n * means that the current node represents the key (that is, none of the\n * compressed node characters are needed to represent the key, just all\n * its parents nodes). */\nstatic inline size_t\nraxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) {\n    raxNode *h = rax->head;\n    raxNode **parentlink = &rax->head;\n\n    size_t i = 0; /* Position in the string. */\n    size_t j = 0; /* Position in the node children (or bytes if compressed).*/\n    while (h->size && i < len) {\n        debugnode(\"Lookup current node\", h);\n        unsigned char *v = h->data;\n\n        if (h->iscompr) {\n            for (j = 0; j < h->size && i < len; j++, i++) {\n                if (v[j] != s[i]) break;\n            }\n            if (j != h->size) break;\n        } else {\n            /* Even when h->size is large, linear scan provides good\n             * performances compared to other approaches that are in theory\n             * more sounding, like performing a binary search. */\n            for (j = 0; j < h->size; j++) {\n                if (v[j] == s[i]) break;\n            }\n            if (j == h->size) break;\n            i++;\n        }\n\n        if (ts) raxStackPush(ts, h); /* Save stack of parent nodes. */\n        raxNode **children = raxNodeFirstChildPtr(h);\n        if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */\n        memcpy(&h, children + j, sizeof(h));\n        parentlink = children + j;\n        j = 0; /* If the new node is non compressed and we do not\n                  iterate again (since i == len) set the split\n                  position to 0 to signal this node represents\n                  the searched key. */\n    }\n    debugnode(\"Lookup stop node is\", h);\n    if (stopnode) *stopnode = h;\n    if (plink) *plink = parentlink;\n    if (splitpos && h->iscompr) *splitpos = j;\n    return i;\n}\n\n/* Insert the element 's' of size 'len', setting as auxiliary data\n * the pointer 'data'. If the element is already present, the associated\n * data is updated (only if 'overwrite' is set to 1), and 0 is returned,\n * otherwise the element is inserted and 1 is returned. On out of memory the\n * function returns 0 as well but sets errno to ENOMEM, otherwise errno will\n * be set to 0.\n */\nint raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) {\n    size_t i;\n    int j = 0; /* Split position. If raxLowWalk() stops in a compressed\n                  node, the index 'j' represents the char we stopped within the\n                  compressed node, that is, the position where to split the\n                  node for insertion. */\n    raxNode *h, **parentlink;\n\n    debugf(\"### Insert %.*s with value %p\\n\", (int)len, s, data);\n    i = raxLowWalk(rax, s, len, &h, &parentlink, &j, NULL);\n\n    /* If i == len we walked following the whole string. If we are not\n     * in the middle of a compressed node, the string is either already\n     * inserted or this middle node is currently not a key, but can represent\n     * our key. We have just to reallocate the node and make space for the\n     * data pointer. */\n    if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {\n        debugf(\"### Insert: node representing key exists\\n\");\n        /* Make space for the value pointer if needed. */\n        if (!h->iskey || (h->isnull && overwrite)) {\n            size_t oldalloc = rax_ptr_alloc_size(h);\n            h = raxReallocForData(h, data);\n            if (h) {\n                memcpy(parentlink, &h, sizeof(h));\n                rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h);\n            }\n        }\n        if (h == NULL) {\n            errno = ENOMEM;\n            return 0;\n        }\n\n        /* Update the existing key if there is already one. */\n        if (h->iskey) {\n            if (old) *old = raxGetData(h);\n            if (overwrite) raxSetData(h, data);\n            errno = 0;\n            return 0; /* Element already exists. */\n        }\n\n        /* Otherwise set the node as a key. Note that raxSetData()\n         * will set h->iskey. */\n        raxSetData(h, data);\n        rax->numele++;\n        return 1; /* Element inserted. */\n    }\n\n    /* If the node we stopped at is a compressed node, we need to\n     * split it before to continue.\n     *\n     * Splitting a compressed node have a few possible cases.\n     * Imagine that the node 'h' we are currently at is a compressed\n     * node containing the string \"ANNIBALE\" (it means that it represents\n     * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child\n     * pointer of this node pointing at the 'E' node, because remember that\n     * we have characters at the edges of the graph, not inside the nodes\n     * themselves.\n     *\n     * In order to show a real case imagine our node to also point to\n     * another compressed node, that finally points at the node without\n     * children, representing 'O':\n     *\n     *     \"ANNIBALE\" -> \"SCO\" -> []\n     *\n     * When inserting we may face the following cases. Note that all the cases\n     * require the insertion of a non compressed node with exactly two\n     * children, except for the last case which just requires splitting a\n     * compressed node.\n     *\n     * 1) Inserting \"ANNIENTARE\"\n     *\n     *               |B| -> \"ALE\" -> \"SCO\" -> []\n     *     \"ANNI\" -> |-|\n     *               |E| -> (... continue algo ...) \"NTARE\" -> []\n     *\n     * 2) Inserting \"ANNIBALI\"\n     *\n     *                  |E| -> \"SCO\" -> []\n     *     \"ANNIBAL\" -> |-|\n     *                  |I| -> (... continue algo ...) []\n     *\n     * 3) Inserting \"AGO\" (Like case 1, but set iscompr = 0 into original node)\n     *\n     *            |N| -> \"NIBALE\" -> \"SCO\" -> []\n     *     |A| -> |-|\n     *            |G| -> (... continue algo ...) |O| -> []\n     *\n     * 4) Inserting \"CIAO\"\n     *\n     *     |A| -> \"NNIBALE\" -> \"SCO\" -> []\n     *     |-|\n     *     |C| -> (... continue algo ...) \"IAO\" -> []\n     *\n     * 5) Inserting \"ANNI\"\n     *\n     *     \"ANNI\" -> \"BALE\" -> \"SCO\" -> []\n     *\n     * The final algorithm for insertion covering all the above cases is as\n     * follows.\n     *\n     * ============================= ALGO 1 =============================\n     *\n     * For the above cases 1 to 4, that is, all cases where we stopped in\n     * the middle of a compressed node for a character mismatch, do:\n     *\n     * Let $SPLITPOS be the zero-based index at which, in the\n     * compressed node array of characters, we found the mismatching\n     * character. For example if the node contains \"ANNIBALE\" and we add\n     * \"ANNIENTARE\" the $SPLITPOS is 4, that is, the index at which the\n     * mismatching character is found.\n     *\n     * 1. Save the current compressed node $NEXT pointer (the pointer to the\n     *    child element, that is always present in compressed nodes).\n     *\n     * 2. Create \"split node\" having as child the non common letter\n     *    at the compressed node. The other non common letter (at the key)\n     *    will be added later as we continue the normal insertion algorithm\n     *    at step \"6\".\n     *\n     * 3a. IF $SPLITPOS == 0:\n     *     Replace the old node with the split node, by copying the auxiliary\n     *     data if any. Fix parent's reference. Free old node eventually\n     *     (we still need its data for the next steps of the algorithm).\n     *\n     * 3b. IF $SPLITPOS != 0:\n     *     Trim the compressed node (reallocating it as well) in order to\n     *     contain $splitpos characters. Change child pointer in order to link\n     *     to the split node. If new compressed node len is just 1, set\n     *     iscompr to 0 (layout is the same). Fix parent's reference.\n     *\n     * 4a. IF the postfix len (the length of the remaining string of the\n     *     original compressed node after the split character) is non zero,\n     *     create a \"postfix node\". If the postfix node has just one character\n     *     set iscompr to 0, otherwise iscompr to 1. Set the postfix node\n     *     child pointer to $NEXT.\n     *\n     * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer.\n     *\n     * 5. Set child[0] of split node to postfix node.\n     *\n     * 6. Set the split node as the current node, set current index at child[1]\n     *    and continue insertion algorithm as usually.\n     *\n     * ============================= ALGO 2 =============================\n     *\n     * For case 5, that is, if we stopped in the middle of a compressed\n     * node but no mismatch was found, do:\n     *\n     * Let $SPLITPOS be the zero-based index at which, in the\n     * compressed node array of characters, we stopped iterating because\n     * there were no more keys character to match. So in the example of\n     * the node \"ANNIBALE\", adding the string \"ANNI\", the $SPLITPOS is 4.\n     *\n     * 1. Save the current compressed node $NEXT pointer (the pointer to the\n     *    child element, that is always present in compressed nodes).\n     *\n     * 2. Create a \"postfix node\" containing all the characters from $SPLITPOS\n     *    to the end. Use $NEXT as the postfix node child pointer.\n     *    If the postfix node length is 1, set iscompr to 0.\n     *    Set the node as a key with the associated value of the new\n     *    inserted key.\n     *\n     * 3. Trim the current node to contain the first $SPLITPOS characters.\n     *    As usually if the new node length is just 1, set iscompr to 0.\n     *    Take the iskey / associated value as it was in the original node.\n     *    Fix the parent's reference.\n     *\n     * 4. Set the postfix node as the only child pointer of the trimmed\n     *    node created at step 1.\n     */\n\n    /* ------------------------- ALGORITHM 1 --------------------------- */\n    if (h->iscompr && i != len) {\n        debugf(\"ALGO 1: Stopped at compressed node %.*s (%p)\\n\", h->size, h->data, (void *)h);\n        debugf(\"Still to insert: %.*s\\n\", (int)(len - i), s + i);\n        debugf(\"Splitting at %d: '%c'\\n\", j, ((char *)h->data)[j]);\n        debugf(\"Other (key) letter is '%c'\\n\", s[i]);\n\n        /* 1: Save next pointer. */\n        raxNode **childfield = raxNodeLastChildPtr(h);\n        raxNode *next;\n        memcpy(&next, childfield, sizeof(next));\n        debugf(\"Next is %p\\n\", (void *)next);\n        debugf(\"iskey %d\\n\", h->iskey);\n        if (h->iskey) {\n            debugf(\"key value is %p\\n\", raxGetData(h));\n        }\n\n        /* Set the length of the additional nodes we will need. */\n        size_t trimmedlen = j;\n        size_t postfixlen = h->size - j - 1;\n        int split_node_is_key = !trimmedlen && h->iskey && !h->isnull;\n        size_t nodesize;\n\n        /* 2: Create the split node. Also allocate the other nodes we'll need\n         *    ASAP, so that it will be simpler to handle OOM. */\n        raxNode *splitnode = raxNewNode(1, split_node_is_key);\n        raxNode *trimmed = NULL;\n        raxNode *postfix = NULL;\n\n        if (trimmedlen) {\n            nodesize = sizeof(raxNode) + trimmedlen + raxPadding(trimmedlen) + sizeof(raxNode *);\n            if (h->iskey && !h->isnull) nodesize += sizeof(void *);\n            trimmed = rax_malloc(nodesize);\n        }\n\n        if (postfixlen) {\n            nodesize = sizeof(raxNode) + postfixlen + raxPadding(postfixlen) + sizeof(raxNode *);\n            postfix = rax_malloc(nodesize);\n        }\n\n        /* OOM? Abort now that the tree is untouched. */\n        if (splitnode == NULL || (trimmedlen && trimmed == NULL) || (postfixlen && postfix == NULL)) {\n            rax_free(splitnode);\n            rax_free(trimmed);\n            rax_free(postfix);\n            errno = ENOMEM;\n            return 0;\n        }\n        splitnode->data[0] = h->data[j];\n        rax->alloc_size += rax_ptr_alloc_size(splitnode);\n\n        if (j == 0) {\n            /* 3a: Replace the old node with the split node. */\n            if (h->iskey) {\n                void *ndata = raxGetData(h);\n                raxSetData(splitnode, ndata);\n            }\n            memcpy(parentlink, &splitnode, sizeof(splitnode));\n        } else {\n            /* 3b: Trim the compressed node. */\n            trimmed->size = j;\n            memcpy(trimmed->data, h->data, j);\n            trimmed->iscompr = j > 1 ? 1 : 0;\n            trimmed->iskey = h->iskey;\n            trimmed->isnull = h->isnull;\n            if (h->iskey && !h->isnull) {\n                void *ndata = raxGetData(h);\n                raxSetData(trimmed, ndata);\n            }\n            raxNode **cp = raxNodeLastChildPtr(trimmed);\n            memcpy(cp, &splitnode, sizeof(splitnode));\n            memcpy(parentlink, &trimmed, sizeof(trimmed));\n            parentlink = cp; /* Set parentlink to splitnode parent. */\n            rax->numnodes++;\n            rax->alloc_size += rax_ptr_alloc_size(trimmed);\n        }\n\n        /* 4: Create the postfix node: what remains of the original\n         * compressed node after the split. */\n        if (postfixlen) {\n            /* 4a: create a postfix node. */\n            postfix->iskey = 0;\n            postfix->isnull = 0;\n            postfix->size = postfixlen;\n            postfix->iscompr = postfixlen > 1;\n            memcpy(postfix->data, h->data + j + 1, postfixlen);\n            raxNode **cp = raxNodeLastChildPtr(postfix);\n            memcpy(cp, &next, sizeof(next));\n            rax->numnodes++;\n            rax->alloc_size += rax_ptr_alloc_size(postfix);\n        } else {\n            /* 4b: just use next as postfix node. */\n            postfix = next;\n        }\n\n        /* 5: Set splitnode first child as the postfix node. */\n        raxNode **splitchild = raxNodeLastChildPtr(splitnode);\n        memcpy(splitchild, &postfix, sizeof(postfix));\n\n        /* 6. Continue insertion: this will cause the splitnode to\n         * get a new child (the non common character at the currently\n         * inserted key). */\n        rax->alloc_size -= rax_ptr_alloc_size(h);\n        rax_free(h);\n        h = splitnode;\n    } else if (h->iscompr && i == len) {\n        /* ------------------------- ALGORITHM 2 --------------------------- */\n        debugf(\"ALGO 2: Stopped at compressed node %.*s (%p) j = %d\\n\", h->size, h->data, (void *)h, j);\n\n        /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */\n        size_t postfixlen = h->size - j;\n        size_t nodesize = sizeof(raxNode) + postfixlen + raxPadding(postfixlen) + sizeof(raxNode *);\n        if (data != NULL) nodesize += sizeof(void *);\n        raxNode *postfix = rax_malloc(nodesize);\n\n        nodesize = sizeof(raxNode) + j + raxPadding(j) + sizeof(raxNode *);\n        if (h->iskey && !h->isnull) nodesize += sizeof(void *);\n        raxNode *trimmed = rax_malloc(nodesize);\n\n        if (postfix == NULL || trimmed == NULL) {\n            rax_free(postfix);\n            rax_free(trimmed);\n            errno = ENOMEM;\n            return 0;\n        }\n\n        /* 1: Save next pointer. */\n        raxNode **childfield = raxNodeLastChildPtr(h);\n        raxNode *next;\n        memcpy(&next, childfield, sizeof(next));\n\n        /* 2: Create the postfix node. */\n        postfix->size = postfixlen;\n        postfix->iscompr = postfixlen > 1;\n        postfix->iskey = 1;\n        postfix->isnull = 0;\n        memcpy(postfix->data, h->data + j, postfixlen);\n        raxSetData(postfix, data);\n        raxNode **cp = raxNodeLastChildPtr(postfix);\n        memcpy(cp, &next, sizeof(next));\n        rax->numnodes++;\n        rax->alloc_size += rax_ptr_alloc_size(postfix);\n\n        /* 3: Trim the compressed node. */\n        trimmed->size = j;\n        trimmed->iscompr = j > 1;\n        trimmed->iskey = 0;\n        trimmed->isnull = 0;\n        memcpy(trimmed->data, h->data, j);\n        memcpy(parentlink, &trimmed, sizeof(trimmed));\n        if (h->iskey) {\n            void *aux = raxGetData(h);\n            raxSetData(trimmed, aux);\n        }\n        rax->alloc_size += rax_ptr_alloc_size(trimmed);\n\n        /* Fix the trimmed node child pointer to point to\n         * the postfix node. */\n        cp = raxNodeLastChildPtr(trimmed);\n        memcpy(cp, &postfix, sizeof(postfix));\n\n        /* Finish! We don't need to continue with the insertion\n         * algorithm for ALGO 2. The key is already inserted. */\n        rax->numele++;\n        rax->alloc_size -= rax_ptr_alloc_size(h);\n        rax_free(h);\n        return 1; /* Key inserted. */\n    }\n\n    /* We walked the radix tree as far as we could, but still there are left\n     * chars in our string. We need to insert the missing nodes. */\n    while (i < len) {\n        raxNode *child;\n        size_t oldalloc = rax_ptr_alloc_size(h);\n\n        /* If this node is going to have a single child, and there\n         * are other characters, so that that would result in a chain\n         * of single-childed nodes, turn it into a compressed node. */\n        if (h->size == 0 && len - i > 1) {\n            debugf(\"Inserting compressed node\\n\");\n            size_t comprsize = len - i;\n            if (comprsize > RAX_NODE_MAX_SIZE) comprsize = RAX_NODE_MAX_SIZE;\n            raxNode *newh = raxCompressNode(h, s + i, comprsize, &child);\n            if (newh == NULL) goto oom;\n            h = newh;\n            memcpy(parentlink, &h, sizeof(h));\n            parentlink = raxNodeLastChildPtr(h);\n            i += comprsize;\n        } else {\n            debugf(\"Inserting normal node\\n\");\n            raxNode **new_parentlink;\n            raxNode *newh = raxAddChild(h, s[i], &child, &new_parentlink);\n            if (newh == NULL) goto oom;\n            h = newh;\n            memcpy(parentlink, &h, sizeof(h));\n            parentlink = new_parentlink;\n            i++;\n        }\n        rax->numnodes++;\n        rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h) + rax_ptr_alloc_size(child);\n        h = child;\n    }\n    size_t oldalloc = rax_ptr_alloc_size(h);\n    raxNode *newh = raxReallocForData(h, data);\n    if (newh == NULL) goto oom;\n    h = newh;\n    if (!h->iskey) rax->numele++;\n    raxSetData(h, data);\n    memcpy(parentlink, &h, sizeof(h));\n    rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(h);\n    return 1; /* Element inserted. */\n\noom:\n    /* This code path handles out of memory after part of the sub-tree was\n     * already modified. Set the node as a key, and then remove it. However we\n     * do that only if the node is a terminal node, otherwise if the OOM\n     * happened reallocating a node in the middle, we don't need to free\n     * anything. */\n    if (h->size == 0) {\n        h->isnull = 1;\n        h->iskey = 1;\n        rax->numele++; /* Compensate the next remove. */\n        checkedRaxRemove(rax, s, i, NULL);\n    }\n    errno = ENOMEM;\n    return 0;\n}\n\n/* Overwriting insert. Just a wrapper for raxGenericInsert() that will\n * update the element if there is already one for the same key. */\nint raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {\n    return raxGenericInsert(rax, s, len, data, old, 1);\n}\n\n/* Non overwriting insert function: if an element with the same key\n * exists, the value is not updated and the function returns 0.\n * This is just a wrapper for raxGenericInsert(). */\nint raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {\n    return raxGenericInsert(rax, s, len, data, old, 0);\n}\n\n/* Find a key in the rax: return 1 if the item is found, 0 otherwise.\n * If there is an item and 'value' is passed in a non-NULL pointer,\n * the value associated with the item is set at that address. */\nint raxFind(rax *rax, unsigned char *s, size_t len, void **value) {\n    raxNode *h;\n\n    debugf(\"### Lookup: %.*s\\n\", (int)len, s);\n    int splitpos = 0;\n    size_t i = raxLowWalk(rax, s, len, &h, NULL, &splitpos, NULL);\n    if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) return 0;\n    if (value != NULL) *value = raxGetData(h);\n    return 1;\n}\n\n/* Return the memory address where the 'parent' node stores the specified\n * 'child' pointer, so that the caller can update the pointer with another\n * one if needed. The function assumes it will find a match, otherwise the\n * operation is an undefined behavior (it will continue scanning the\n * memory without any bound checking). */\nraxNode **raxFindParentLink(raxNode *parent, raxNode *child) {\n    raxNode **cp = raxNodeFirstChildPtr(parent);\n    raxNode *c;\n    while (1) {\n        memcpy(&c, cp, sizeof(c));\n        if (c == child) break;\n        cp++;\n    }\n    return cp;\n}\n\n/* Low level child removal from node. The new node pointer (after the child\n * removal) is returned. Note that this function does not fix the pointer\n * of the parent node in its parent, so this task is up to the caller.\n * The function never fails for out of memory. */\nraxNode *raxRemoveChild(raxNode *parent, raxNode *child) {\n    debugnode(\"raxRemoveChild before\", parent);\n    /* If parent is a compressed node (having a single child, as for definition\n     * of the data structure), the removal of the child consists into turning\n     * it into a normal node without children. */\n    if (parent->iscompr) {\n        void *data = NULL;\n        if (parent->iskey) data = raxGetData(parent);\n        parent->isnull = 0;\n        parent->iscompr = 0;\n        parent->size = 0;\n        if (parent->iskey) raxSetData(parent, data);\n        debugnode(\"raxRemoveChild after\", parent);\n        return parent;\n    }\n\n    /* Otherwise we need to scan for the child pointer and memmove()\n     * accordingly.\n     *\n     * 1. To start we seek the first element in both the children\n     *    pointers and edge bytes in the node. */\n    raxNode **cp = raxNodeFirstChildPtr(parent);\n    raxNode **c = cp;\n    unsigned char *e = parent->data;\n\n    /* 2. Search the child pointer to remove inside the array of children\n     *    pointers. */\n    while (1) {\n        raxNode *aux;\n        memcpy(&aux, c, sizeof(aux));\n        if (aux == child) break;\n        c++;\n        e++;\n    }\n\n    /* 3. Remove the edge and the pointer by memmoving the remaining children\n     *    pointer and edge bytes one position before. */\n    int taillen = parent->size - (e - parent->data) - 1;\n    debugf(\"raxRemoveChild tail len: %d\\n\", taillen);\n    memmove(e, e + 1, taillen);\n\n    /* Compute the shift, that is the amount of bytes we should move our\n     * child pointers to the left, since the removal of one edge character\n     * and the corresponding padding change, may change the layout.\n     * We just check if in the old version of the node there was at the\n     * end just a single byte and all padding: in that case removing one char\n     * will remove a whole sizeof(void*) word. */\n    size_t shift = ((parent->size + 4) % sizeof(void *)) == 1 ? sizeof(void *) : 0;\n\n    /* Move the children pointers before the deletion point. */\n    if (shift) memmove(((char *)cp) - shift, cp, (parent->size - taillen - 1) * sizeof(raxNode **));\n\n    /* Move the remaining \"tail\" pointers at the right position as well. */\n    size_t valuelen = (parent->iskey && !parent->isnull) ? sizeof(void *) : 0;\n    memmove(((char *)c) - shift, c + 1, taillen * sizeof(raxNode **) + valuelen);\n\n    /* 4. Update size. */\n    parent->size--;\n\n    /* realloc the node according to the theoretical memory usage, to free\n     * data if we are over-allocating right now. */\n    raxNode *newnode = rax_realloc(parent, raxNodeCurrentLength(parent));\n    if (newnode) {\n        debugnode(\"raxRemoveChild after\", newnode);\n    }\n    /* Note: if rax_realloc() fails we just return the old address, which\n     * is valid. */\n    return newnode ? newnode : parent;\n}\n\n/* Remove the specified item. Returns 1 if the item was found and\n * deleted, 0 otherwise. */\nint raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {\n    raxNode *h;\n    raxStack ts;\n\n    debugf(\"### Delete: %.*s\\n\", (int)len, s);\n    raxStackInit(&ts);\n    int splitpos = 0;\n    size_t i = raxLowWalk(rax, s, len, &h, NULL, &splitpos, &ts);\n    if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) {\n        raxStackFree(&ts);\n        return 0;\n    }\n    if (old) *old = raxGetData(h);\n    h->iskey = 0;\n    rax->numele--;\n\n    /* If this node has no children, the deletion needs to reclaim the\n     * no longer used nodes. This is an iterative process that needs to\n     * walk the three upward, deleting all the nodes with just one child\n     * that are not keys, until the head of the rax is reached or the first\n     * node with more than one child is found. */\n\n    int trycompress = 0; /* Will be set to 1 if we should try to optimize the\n                            tree resulting from the deletion. */\n\n    if (h->size == 0) {\n        debugf(\"Key deleted in node without children. Cleanup needed.\\n\");\n        raxNode *child = NULL;\n        while (h != rax->head) {\n            child = h;\n            debugf(\"Freeing child %p [%.*s] key:%d\\n\", (void *)child, (int)child->size, (char *)child->data,\n                   child->iskey);\n            rax->alloc_size -= rax_ptr_alloc_size(child);\n            rax_free(child);\n            rax->numnodes--;\n            h = raxStackPop(&ts);\n            /* If this node has more then one child, or actually holds\n             * a key, stop here. */\n            if (h->iskey || (!h->iscompr && h->size != 1)) break;\n        }\n        if (child) {\n            debugf(\"Unlinking child %p from parent %p\\n\", (void *)child, (void *)h);\n            size_t oldalloc = rax_ptr_alloc_size(h);\n            raxNode *new = raxRemoveChild(h, child);\n            rax->alloc_size = rax->alloc_size - oldalloc + rax_ptr_alloc_size(new);\n            if (new != h) {\n                raxNode *parent = raxStackPeek(&ts);\n                raxNode **parentlink;\n                if (parent == NULL) {\n                    parentlink = &rax->head;\n                } else {\n                    parentlink = raxFindParentLink(parent, h);\n                }\n                memcpy(parentlink, &new, sizeof(new));\n            }\n\n            /* If after the removal the node has just a single child\n             * and is not a key, we need to try to compress it. */\n            if (new->size == 1 && new->iskey == 0) {\n                trycompress = 1;\n                h = new;\n            }\n        }\n    } else if (h->size == 1) {\n        /* If the node had just one child, after the removal of the key\n         * further compression with adjacent nodes is potentially possible. */\n        trycompress = 1;\n    }\n\n    /* Don't try node compression if our nodes pointers stack is not\n     * complete because of OOM while executing raxLowWalk() */\n    if (trycompress && ts.oom) trycompress = 0;\n\n    /* Recompression: if trycompress is true, 'h' points to a radix tree node\n     * that changed in a way that could allow to compress nodes in this\n     * sub-branch. Compressed nodes represent chains of nodes that are not\n     * keys and have a single child, so there are two deletion events that\n     * may alter the tree so that further compression is needed:\n     *\n     * 1) A node with a single child was a key and now no longer is a key.\n     * 2) A node with two children now has just one child.\n     *\n     * We try to navigate upward till there are other nodes that can be\n     * compressed, when we reach the upper node which is not a key and has\n     * a single child, we scan the chain of children to collect the\n     * compressible part of the tree, and replace the current node with the\n     * new one, fixing the child pointer to reference the first non\n     * compressible node.\n     *\n     * Example of case \"1\". A tree stores the keys \"FOO\" = 1 and\n     * \"FOOBAR\" = 2:\n     *\n     *\n     * \"FOO\" -> \"BAR\" -> [] (2)\n     *           (1)\n     *\n     * After the removal of \"FOO\" the tree can be compressed as:\n     *\n     * \"FOOBAR\" -> [] (2)\n     *\n     *\n     * Example of case \"2\". A tree stores the keys \"FOOBAR\" = 1 and\n     * \"FOOTER\" = 2:\n     *\n     *          |B| -> \"AR\" -> [] (1)\n     * \"FOO\" -> |-|\n     *          |T| -> \"ER\" -> [] (2)\n     *\n     * After the removal of \"FOOTER\" the resulting tree is:\n     *\n     * \"FOO\" -> |B| -> \"AR\" -> [] (1)\n     *\n     * That can be compressed into:\n     *\n     * \"FOOBAR\" -> [] (1)\n     */\n    if (trycompress) {\n        debugf(\"After removing %.*s:\\n\", (int)len, s);\n        debugnode(\"Compression may be needed\", h);\n        debugf(\"Seek start node\\n\");\n\n        /* Try to reach the upper node that is compressible.\n         * At the end of the loop 'h' will point to the first node we\n         * can try to compress and 'parent' to its parent. */\n        raxNode *parent;\n        while (1) {\n            parent = raxStackPop(&ts);\n            if (!parent || parent->iskey || (!parent->iscompr && parent->size != 1)) break;\n            h = parent;\n            debugnode(\"Going up to\", h);\n        }\n        raxNode *start = h; /* Compression starting node. */\n\n        /* Scan chain of nodes we can compress. */\n        size_t comprsize = h->size;\n        int nodes = 1;\n        while (h->size != 0) {\n            raxNode **cp = raxNodeLastChildPtr(h);\n            memcpy(&h, cp, sizeof(h));\n            if (h->iskey || (!h->iscompr && h->size != 1)) break;\n            /* Stop here if going to the next node would result into\n             * a compressed node larger than h->size can hold. */\n            if (comprsize + h->size > RAX_NODE_MAX_SIZE) break;\n            nodes++;\n            comprsize += h->size;\n        }\n        if (nodes > 1) {\n            /* If we can compress, create the new node and populate it. */\n            size_t nodesize = sizeof(raxNode) + comprsize + raxPadding(comprsize) + sizeof(raxNode *);\n            raxNode *new = rax_malloc(nodesize);\n            /* An out of memory here just means we cannot optimize this\n             * node, but the tree is left in a consistent state. */\n            if (new == NULL) {\n                raxStackFree(&ts);\n                return 1;\n            }\n            new->iskey = 0;\n            new->isnull = 0;\n            new->iscompr = 1;\n            new->size = comprsize;\n            rax->numnodes++;\n            rax->alloc_size += rax_ptr_alloc_size(new);\n\n            /* Scan again, this time to populate the new node content and\n             * to fix the new node child pointer. At the same time we free\n             * all the nodes that we'll no longer use. */\n            comprsize = 0;\n            h = start;\n            while (h->size != 0) {\n                memcpy(new->data + comprsize, h->data, h->size);\n                comprsize += h->size;\n                raxNode **cp = raxNodeLastChildPtr(h);\n                raxNode *tofree = h;\n                memcpy(&h, cp, sizeof(h));\n                rax->alloc_size -= rax_ptr_alloc_size(tofree);\n                rax_free(tofree);\n                rax->numnodes--;\n                if (h->iskey || (!h->iscompr && h->size != 1)) break;\n                if (comprsize + h->size > RAX_NODE_MAX_SIZE) break;\n            }\n            debugnode(\"New node\", new);\n\n            /* Now 'h' points to the first node that we still need to use,\n             * so our new node child pointer will point to it. */\n            raxNode **cp = raxNodeLastChildPtr(new);\n            memcpy(cp, &h, sizeof(h));\n\n            /* Fix parent link. */\n            if (parent) {\n                raxNode **parentlink = raxFindParentLink(parent, start);\n                memcpy(parentlink, &new, sizeof(new));\n            } else {\n                rax->head = new;\n            }\n\n            debugf(\"Compressed %d nodes, %d total bytes\\n\", nodes, (int)comprsize);\n        }\n    }\n    raxStackFree(&ts);\n    return 1;\n}\n\n/* This is the core of raxFree(): performs a depth-first scan of the\n * tree and releases all the nodes found. */\nvoid raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*, void*), void* argument) {\n    debugnode(\"free traversing\",n);\n    int numchildren = n->iscompr ? 1 : n->size;\n    raxNode **cp = raxNodeLastChildPtr(n);\n    while (numchildren--) {\n        raxNode *child;\n        memcpy(&child, cp, sizeof(child));\n        raxRecursiveFree(rax,child,free_callback,argument);\n        cp--;\n    }\n    debugnode(\"free depth-first\", n);\n    if (free_callback && n->iskey && !n->isnull) free_callback(raxGetData(n), argument);\n    rax_free(n);\n    rax->numnodes--;\n}\n\n/* Free the entire radix tree, invoking a free_callback function for each key's data. \n * An additional argument is passed to the free_callback function.*/\n void raxFreeWithCallbackAndArgument(rax *rax, void (*free_callback)(void*, void*), void* argument) {\n    raxRecursiveFree(rax,rax->head,free_callback, argument);\n    assert(rax->numnodes == 0);\n    rax_free(rax);\n}\n\n/* Wrapper for the callback to adapt it for the context */\nvoid freeCallbackWrapper(void* data, void* argument) {\n    if (!argument) {\n        return;\n    }\n    void (*free_callback)(void*) = (void (*)(void*))argument;\n    free_callback(data);\n}\n\n/* Free a whole radix tree, calling the specified callback in order to\n * free the auxiliary data. */\nvoid raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) {\n    raxFreeWithCallbackAndArgument(rax, freeCallbackWrapper, (void*)free_callback);\n}\n\n/* Free a whole radix tree. */\nvoid raxFree(rax *rax) {\n    raxFreeWithCallback(rax, NULL);\n}\n\n/* ------------------------------- Iterator --------------------------------- */\n\n/* Initialize a Rax iterator. This call should be performed a single time\n * to initialize the iterator, and must be followed by a raxSeek() call,\n * otherwise the raxPrev()/raxNext() functions will just return EOF. */\nvoid raxStart(raxIterator *it, rax *rt) {\n    it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */\n    it->rt = rt;\n    it->key_len = 0;\n    it->key = it->key_static_string;\n    it->key_max = RAX_ITER_STATIC_LEN;\n    it->data = NULL;\n    it->node_cb = NULL;\n    raxStackInit(&it->stack);\n}\n\n/* Append characters at the current key string of the iterator 'it'. This\n * is a low level function used to implement the iterator, not callable by\n * the user. Returns 0 on out of memory, otherwise 1 is returned. */\nint raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) {\n    if (len == 0) return 1;\n    if (it->key_max < it->key_len + len) {\n        unsigned char *old = (it->key == it->key_static_string) ? NULL : it->key;\n        size_t new_max = (it->key_len + len) * 2;\n        it->key = rax_realloc(old, new_max);\n        if (it->key == NULL) {\n            it->key = (!old) ? it->key_static_string : old;\n            errno = ENOMEM;\n            return 0;\n        }\n        if (old == NULL) memcpy(it->key, it->key_static_string, it->key_len);\n        it->key_max = new_max;\n    }\n    /* Use memmove since there could be an overlap between 's' and\n     * it->key when we use the current key in order to re-seek. */\n    memmove(it->key + it->key_len, s, len);\n    it->key_len += len;\n    return 1;\n}\n\n/* Remove the specified number of chars from the right of the current\n * iterator key. */\nvoid raxIteratorDelChars(raxIterator *it, size_t count) {\n    it->key_len -= count;\n}\n\n/* Do an iteration step towards the next element. At the end of the step the\n * iterator key will represent the (new) current key. If it is not possible\n * to step in the specified direction since there are no longer elements, the\n * iterator is flagged with RAX_ITER_EOF.\n *\n * If 'noup' is true the function starts directly scanning for the next\n * lexicographically smaller children, and the current node is already assumed\n * to be the parent of the last key node, so the first operation to go back to\n * the parent will be skipped. This option is used by raxSeek() when\n * implementing seeking a non existing element with the \">\" or \"<\" options:\n * the starting node is not a key in that particular case, so we start the scan\n * from a node that does not represent the key set.\n *\n * The function returns 1 on success or 0 on out of memory. */\nint raxIteratorNextStep(raxIterator *it, int noup) {\n    if (it->flags & RAX_ITER_EOF) {\n        return 1;\n    } else if (it->flags & RAX_ITER_JUST_SEEKED) {\n        it->flags &= ~RAX_ITER_JUST_SEEKED;\n        return 1;\n    }\n\n    /* Save key len, stack items and the node where we are currently\n     * so that on iterator EOF we can restore the current key and state. */\n    size_t orig_key_len = it->key_len;\n    size_t orig_stack_items = it->stack.items;\n    raxNode *orig_node = it->node;\n\n    while (1) {\n        int children = it->node->iscompr ? 1 : it->node->size;\n        if (!noup && children) {\n            debugf(\"GO DEEPER\\n\");\n            /* Seek the lexicographically smaller key in this subtree, which\n             * is the first one found always going towards the first child\n             * of every successive node. */\n            if (!raxStackPush(&it->stack, it->node)) return 0;\n            raxNode **cp = raxNodeFirstChildPtr(it->node);\n            if (!raxIteratorAddChars(it, it->node->data, it->node->iscompr ? it->node->size : 1)) return 0;\n            memcpy(&it->node, cp, sizeof(it->node));\n            /* Call the node callback if any, and replace the node pointer\n             * if the callback returns true. */\n            if (it->node_cb && it->node_cb(&it->node)) memcpy(cp, &it->node, sizeof(it->node));\n            /* For \"next\" step, stop every time we find a key along the\n             * way, since the key is lexicographically smaller compared to\n             * what follows in the sub-children. */\n            if (it->node->iskey) {\n                it->data = raxGetData(it->node);\n                return 1;\n            }\n        } else {\n            /* If we finished exploring the previous sub-tree, switch to the\n             * new one: go upper until a node is found where there are\n             * children representing keys lexicographically greater than the\n             * current key. */\n            while (1) {\n                int old_noup = noup;\n\n                /* Already on head? Can't go up, iteration finished. */\n                if (!noup && it->node == it->rt->head) {\n                    it->flags |= RAX_ITER_EOF;\n                    it->stack.items = orig_stack_items;\n                    it->key_len = orig_key_len;\n                    it->node = orig_node;\n                    return 1;\n                }\n                /* If there are no children at the current node, try parent's\n                 * next child. */\n                unsigned char prevchild = it->key[it->key_len - 1];\n                if (!noup) {\n                    it->node = raxStackPop(&it->stack);\n                } else {\n                    noup = 0;\n                }\n                /* Adjust the current key to represent the node we are\n                 * at. */\n                int todel = it->node->iscompr ? it->node->size : 1;\n                raxIteratorDelChars(it, todel);\n\n                /* Try visiting the next child if there was at least one\n                 * additional child. */\n                if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {\n                    raxNode **cp = raxNodeFirstChildPtr(it->node);\n                    int i = 0;\n                    while (i < it->node->size) {\n                        debugf(\"SCAN NEXT %c\\n\", it->node->data[i]);\n                        if (it->node->data[i] > prevchild) break;\n                        i++;\n                        cp++;\n                    }\n                    if (i != it->node->size) {\n                        debugf(\"SCAN found a new node\\n\");\n                        raxIteratorAddChars(it, it->node->data + i, 1);\n                        if (!raxStackPush(&it->stack, it->node)) return 0;\n                        memcpy(&it->node, cp, sizeof(it->node));\n                        /* Call the node callback if any, and replace the node\n                         * pointer if the callback returns true. */\n                        if (it->node_cb && it->node_cb(&it->node)) memcpy(cp, &it->node, sizeof(it->node));\n                        if (it->node->iskey) {\n                            it->data = raxGetData(it->node);\n                            return 1;\n                        }\n                        break;\n                    }\n                }\n            }\n        }\n    }\n}\n\n/* Seek the greatest key in the subtree at the current node. Return 0 on\n * out of memory, otherwise 1. This is a helper function for different\n * iteration functions below. */\nint raxSeekGreatest(raxIterator *it) {\n    while (it->node->size) {\n        if (it->node->iscompr) {\n            if (!raxIteratorAddChars(it, it->node->data, it->node->size)) return 0;\n        } else {\n            if (!raxIteratorAddChars(it, it->node->data + it->node->size - 1, 1)) return 0;\n        }\n        raxNode **cp = raxNodeLastChildPtr(it->node);\n        if (!raxStackPush(&it->stack, it->node)) return 0;\n        memcpy(&it->node, cp, sizeof(it->node));\n    }\n    return 1;\n}\n\n/* Like raxIteratorNextStep() but implements an iteration step moving\n * to the lexicographically previous element. The 'noup' option has a similar\n * effect to the one of raxIteratorNextStep(). */\nint raxIteratorPrevStep(raxIterator *it, int noup) {\n    if (it->flags & RAX_ITER_EOF) {\n        return 1;\n    } else if (it->flags & RAX_ITER_JUST_SEEKED) {\n        it->flags &= ~RAX_ITER_JUST_SEEKED;\n        return 1;\n    }\n\n    /* Save key len, stack items and the node where we are currently\n     * so that on iterator EOF we can restore the current key and state. */\n    size_t orig_key_len = it->key_len;\n    size_t orig_stack_items = it->stack.items;\n    raxNode *orig_node = it->node;\n\n    while (1) {\n        int old_noup = noup;\n\n        /* Already on head? Can't go up, iteration finished. */\n        if (!noup && it->node == it->rt->head) {\n            it->flags |= RAX_ITER_EOF;\n            it->stack.items = orig_stack_items;\n            it->key_len = orig_key_len;\n            it->node = orig_node;\n            return 1;\n        }\n\n        unsigned char prevchild = it->key[it->key_len - 1];\n        if (!noup) {\n            it->node = raxStackPop(&it->stack);\n        } else {\n            noup = 0;\n        }\n\n        /* Adjust the current key to represent the node we are\n         * at. */\n        int todel = it->node->iscompr ? it->node->size : 1;\n        raxIteratorDelChars(it, todel);\n\n        /* Try visiting the prev child if there is at least one\n         * child. */\n        if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {\n            raxNode **cp = raxNodeLastChildPtr(it->node);\n            int i = it->node->size - 1;\n            while (i >= 0) {\n                debugf(\"SCAN PREV %c\\n\", it->node->data[i]);\n                if (it->node->data[i] < prevchild) break;\n                i--;\n                cp--;\n            }\n            /* If we found a new subtree to explore in this node,\n             * go deeper following all the last children in order to\n             * find the key lexicographically greater. */\n            if (i != -1) {\n                debugf(\"SCAN found a new node\\n\");\n                /* Enter the node we just found. */\n                if (!raxIteratorAddChars(it, it->node->data + i, 1)) return 0;\n                if (!raxStackPush(&it->stack, it->node)) return 0;\n                memcpy(&it->node, cp, sizeof(it->node));\n                /* Seek sub-tree max. */\n                if (!raxSeekGreatest(it)) return 0;\n            }\n        }\n\n        /* Return the key: this could be the key we found scanning a new\n         * subtree, or if we did not find a new subtree to explore here,\n         * before giving up with this node, check if it's a key itself. */\n        if (it->node->iskey) {\n            it->data = raxGetData(it->node);\n            return 1;\n        }\n    }\n}\n\n/* Seek an iterator at the specified element.\n * Return 0 if the seek failed for syntax error or out of memory. Otherwise\n * 1 is returned. When 0 is returned for out of memory, errno is set to\n * the ENOMEM value. */\nint raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) {\n    int eq = 0, lt = 0, gt = 0, first = 0, last = 0;\n\n    it->stack.items = 0; /* Just resetting. Initialized by raxStart(). */\n    it->flags |= RAX_ITER_JUST_SEEKED;\n    it->flags &= ~RAX_ITER_EOF;\n    it->key_len = 0;\n    it->node = NULL;\n\n    /* Set flags according to the operator used to perform the seek. */\n    if (op[0] == '>') {\n        gt = 1;\n        if (op[1] == '=') eq = 1;\n    } else if (op[0] == '<') {\n        lt = 1;\n        if (op[1] == '=') eq = 1;\n    } else if (op[0] == '=') {\n        eq = 1;\n    } else if (op[0] == '^') {\n        first = 1;\n    } else if (op[0] == '$') {\n        last = 1;\n    } else {\n        errno = 0;\n        return 0; /* Error. */\n    }\n\n    /* If there are no elements, set the EOF condition immediately and\n     * return. */\n    if (it->rt->numele == 0) {\n        it->flags |= RAX_ITER_EOF;\n        return 1;\n    }\n\n    if (first) {\n        /* Seeking the first key greater or equal to the empty string\n         * is equivalent to seeking the smaller key available. */\n        return raxSeek(it, \">=\", NULL, 0);\n    }\n\n    if (last) {\n        /* Find the greatest key taking always the last child till a\n         * final node is found. */\n        it->node = it->rt->head;\n        if (!raxSeekGreatest(it)) return 0;\n        assert(it->node->iskey);\n        it->data = raxGetData(it->node);\n        return 1;\n    }\n\n    /* We need to seek the specified key. What we do here is to actually\n     * perform a lookup, and later invoke the prev/next key code that\n     * we already use for iteration. */\n    int splitpos = 0;\n    size_t i = raxLowWalk(it->rt, ele, len, &it->node, NULL, &splitpos, &it->stack);\n\n    /* Return OOM on incomplete stack info. */\n    if (it->stack.oom) return 0;\n\n    if (eq && i == len && (!it->node->iscompr || splitpos == 0) && it->node->iskey) {\n        /* We found our node, since the key matches and we have an\n         * \"equal\" condition. */\n        if (!raxIteratorAddChars(it, ele, len)) return 0; /* OOM. */\n        it->data = raxGetData(it->node);\n    } else if (lt || gt) {\n        /* Exact key not found or eq flag not set. We have to set as current\n         * key the one represented by the node we stopped at, and perform\n         * a next/prev operation to seek. */\n        raxIteratorAddChars(it, ele, i - splitpos);\n\n        /* We need to set the iterator in the correct state to call next/prev\n         * step in order to seek the desired element. */\n        debugf(\"After initial seek: i=%d len=%d key=%.*s\\n\", (int)i, (int)len, (int)it->key_len, it->key);\n        if (i != len && !it->node->iscompr) {\n            /* If we stopped in the middle of a normal node because of a\n             * mismatch, add the mismatching character to the current key\n             * and call the iterator with the 'noup' flag so that it will try\n             * to seek the next/prev child in the current node directly based\n             * on the mismatching character. */\n            if (!raxIteratorAddChars(it, ele + i, 1)) return 0;\n            debugf(\"Seek normal node on mismatch: %.*s\\n\", (int)it->key_len, (char *)it->key);\n\n            it->flags &= ~RAX_ITER_JUST_SEEKED;\n            if (lt && !raxIteratorPrevStep(it, 1)) return 0;\n            if (gt && !raxIteratorNextStep(it, 1)) return 0;\n            it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */\n        } else if (i != len && it->node->iscompr) {\n            debugf(\"Compressed mismatch: %.*s\\n\", (int)it->key_len, (char *)it->key);\n            /* In case of a mismatch within a compressed node. */\n            int nodechar = it->node->data[splitpos];\n            int keychar = ele[i];\n            it->flags &= ~RAX_ITER_JUST_SEEKED;\n            if (gt) {\n                /* If the key the compressed node represents is greater\n                 * than our seek element, continue forward, otherwise set the\n                 * state in order to go back to the next sub-tree. */\n                if (nodechar > keychar) {\n                    if (!raxIteratorNextStep(it, 0)) return 0;\n                } else {\n                    if (!raxIteratorAddChars(it, it->node->data, it->node->size)) return 0;\n                    if (!raxIteratorNextStep(it, 1)) return 0;\n                }\n            }\n            if (lt) {\n                /* If the key the compressed node represents is smaller\n                 * than our seek element, seek the greater key in this\n                 * subtree, otherwise set the state in order to go back to\n                 * the previous sub-tree. */\n                if (nodechar < keychar) {\n                    if (!raxSeekGreatest(it)) return 0;\n                    it->data = raxGetData(it->node);\n                } else {\n                    if (!raxIteratorAddChars(it, it->node->data, it->node->size)) return 0;\n                    if (!raxIteratorPrevStep(it, 1)) return 0;\n                }\n            }\n            it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */\n        } else {\n            debugf(\"No mismatch: %.*s\\n\", (int)it->key_len, (char *)it->key);\n            /* If there was no mismatch we are into a node representing the\n             * key, (but which is not a key or the seek operator does not\n             * include 'eq'), or we stopped in the middle of a compressed node\n             * after processing all the key. Continue iterating as this was\n             * a legitimate key we stopped at. */\n            it->flags &= ~RAX_ITER_JUST_SEEKED;\n            if (it->node->iscompr && it->node->iskey && splitpos && lt) {\n                /* If we stopped in the middle of a compressed node with\n                 * perfect match, and the condition is to seek a key \"<\" than\n                 * the specified one, then if this node is a key it already\n                 * represents our match. For instance we may have nodes:\n                 *\n                 * \"f\" -> \"oobar\" = 1 -> \"\" = 2\n                 *\n                 * Representing keys \"f\" = 1, \"foobar\" = 2. A seek for\n                 * the key < \"foo\" will stop in the middle of the \"oobar\"\n                 * node, but will be our match, representing the key \"f\".\n                 *\n                 * So in that case, we don't seek backward. */\n                it->data = raxGetData(it->node);\n            } else {\n                if (gt && !raxIteratorNextStep(it, 0)) return 0;\n                if (lt && !raxIteratorPrevStep(it, 0)) return 0;\n            }\n            it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */\n        }\n    } else {\n        /* If we are here just eq was set but no match was found. */\n        it->flags |= RAX_ITER_EOF;\n        return 1;\n    }\n    return 1;\n}\n\n/* Go to the next element in the scope of the iterator 'it'.\n * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is\n * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */\nint raxNext(raxIterator *it) {\n    if (!raxIteratorNextStep(it, 0)) {\n        errno = ENOMEM;\n        return 0;\n    }\n    if (it->flags & RAX_ITER_EOF) {\n        errno = 0;\n        return 0;\n    }\n    return 1;\n}\n\n/* Go to the previous element in the scope of the iterator 'it'.\n * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is\n * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */\nint raxPrev(raxIterator *it) {\n    if (!raxIteratorPrevStep(it, 0)) {\n        errno = ENOMEM;\n        return 0;\n    }\n    if (it->flags & RAX_ITER_EOF) {\n        errno = 0;\n        return 0;\n    }\n    return 1;\n}\n\n/* Perform a random walk starting in the current position of the iterator.\n * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned\n * and the iterator is set to the node reached after doing a random walk\n * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed\n * using a random number of steps between 1 and two times the logarithm of\n * the number of elements.\n *\n * NOTE: if you use this function to generate random elements from the radix\n * tree, expect a disappointing distribution. A random walk produces good\n * random elements if the tree is not sparse, however in the case of a radix\n * tree certain keys will be reported much more often than others. At least\n * this function should be able to explore every possible element eventually. */\nint raxRandomWalk(raxIterator *it, size_t steps) {\n    if (it->rt->numele == 0) {\n        it->flags |= RAX_ITER_EOF;\n        return 0;\n    }\n\n    if (steps == 0) {\n        size_t fle = 1 + floor(log(it->rt->numele));\n        fle *= 2;\n        steps = 1 + rand() % fle;\n    }\n\n    raxNode *n = it->node;\n    while (steps > 0 || !n->iskey) {\n        int numchildren = n->iscompr ? 1 : n->size;\n        int r = rand() % (numchildren + (n != it->rt->head));\n\n        if (r == numchildren) {\n            /* Go up to parent. */\n            n = raxStackPop(&it->stack);\n            int todel = n->iscompr ? n->size : 1;\n            raxIteratorDelChars(it, todel);\n        } else {\n            /* Select a random child. */\n            if (n->iscompr) {\n                if (!raxIteratorAddChars(it, n->data, n->size)) return 0;\n            } else {\n                if (!raxIteratorAddChars(it, n->data + r, 1)) return 0;\n            }\n            raxNode **cp = raxNodeFirstChildPtr(n) + r;\n            if (!raxStackPush(&it->stack, n)) return 0;\n            memcpy(&n, cp, sizeof(n));\n        }\n        if (n->iskey) steps--;\n    }\n    it->node = n;\n    it->data = raxGetData(it->node);\n    return 1;\n}\n\n/* Compare the key currently pointed by the iterator to the specified\n * key according to the specified operator. Returns 1 if the comparison is\n * true, otherwise 0 is returned. */\nint raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) {\n    int eq = 0, lt = 0, gt = 0;\n\n    if (op[0] == '=' || op[1] == '=') eq = 1;\n    if (op[0] == '>')\n        gt = 1;\n    else if (op[0] == '<')\n        lt = 1;\n    else if (op[1] != '=')\n        return 0; /* Syntax error. */\n\n    size_t minlen = key_len < iter->key_len ? key_len : iter->key_len;\n    int cmp = memcmp(iter->key, key, minlen);\n\n    /* Handle == */\n    if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len;\n\n    /* Handle >, >=, <, <= */\n    if (cmp == 0) {\n        /* Same prefix: longer wins. */\n        if (eq && key_len == iter->key_len)\n            return 1;\n        else if (lt)\n            return iter->key_len < key_len;\n        else if (gt)\n            return iter->key_len > key_len;\n        else\n            return 0; /* Avoid warning, just 'eq' is handled before. */\n    } else if (cmp > 0) {\n        return gt ? 1 : 0;\n    } else /* (cmp < 0) */ {\n        return lt ? 1 : 0;\n    }\n}\n\n/* Free the iterator. */\nvoid raxStop(raxIterator *it) {\n    if (it->key != it->key_static_string) rax_free(it->key);\n    raxStackFree(&it->stack);\n}\n\n/* Return if the iterator is in an EOF state. This happens when raxSeek()\n * failed to seek an appropriate element, so that raxNext() or raxPrev()\n * will return zero, or when an EOF condition was reached while iterating\n * with raxNext() and raxPrev(). */\nint raxEOF(raxIterator *it) {\n    return it->flags & RAX_ITER_EOF;\n}\n\n/* Return the number of elements inside the radix tree. */\nuint64_t raxSize(rax *rax) {\n    return rax->numele;\n}\n\n/* Return the rax tree allocation size in bytes */\nsize_t raxAllocSize(rax *rax) {\n    return rax->alloc_size;\n}\n\n/* ----------------------------- Introspection ------------------------------ */\n\n/* This function is mostly used for debugging and learning purposes.\n * It shows an ASCII representation of a tree on standard output, outline\n * all the nodes and the contained keys.\n *\n * The representation is as follow:\n *\n *  \"foobar\" (compressed node)\n *  [abc] (normal node with three children)\n *  [abc]=0x12345678 (node is a key, pointing to value 0x12345678)\n *  [] (a normal empty node)\n *\n *  Children are represented in new indented lines, each children prefixed by\n *  the \"`-(x)\" string, where \"x\" is the edge byte.\n *\n *  [abc]\n *   `-(a) \"ladin\"\n *   `-(b) [kj]\n *   `-(c) []\n *\n *  However when a node has a single child the following representation\n *  is used instead:\n *\n *  [abc] -> \"ladin\" -> []\n */\n\n/* The actual implementation of raxShow(). */\nvoid raxRecursiveShow(int level, int lpad, raxNode *n) {\n    char s = n->iscompr ? '\"' : '[';\n    char e = n->iscompr ? '\"' : ']';\n\n    int numchars = printf(\"%c%.*s%c\", s, n->size, n->data, e);\n    if (n->iskey) {\n        numchars += printf(\"=%p\", raxGetData(n));\n    }\n\n    int numchildren = n->iscompr ? 1 : n->size;\n    /* Note that 7 and 4 magic constants are the string length\n     * of \" `-(x) \" and \" -> \" respectively. */\n    if (level) {\n        lpad += (numchildren > 1) ? 7 : 4;\n        if (numchildren == 1) lpad += numchars;\n    }\n    raxNode **cp = raxNodeFirstChildPtr(n);\n    for (int i = 0; i < numchildren; i++) {\n        char *branch = \" `-(%c) \";\n        if (numchildren > 1) {\n            printf(\"\\n\");\n            for (int j = 0; j < lpad; j++) putchar(' ');\n            printf(branch, n->data[i]);\n        } else {\n            printf(\" -> \");\n        }\n        raxNode *child;\n        memcpy(&child, cp, sizeof(child));\n        raxRecursiveShow(level + 1, lpad, child);\n        cp++;\n    }\n}\n\n/* Show a tree, as outlined in the comment above. */\nvoid raxShow(rax *rax) {\n    raxRecursiveShow(0, 0, rax->head);\n    putchar('\\n');\n}\n\n/* Used by debugnode() macro to show info about a given node. */\nvoid raxDebugShowNode(const char *msg, raxNode *n) {\n    if (raxDebugMsg == 0) return;\n    printf(\"%s: %p [%.*s] key:%u size:%u children:\", msg, (void *)n, (int)n->size, (char *)n->data, n->iskey, n->size);\n    int numcld = n->iscompr ? 1 : n->size;\n    raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld - 1);\n    while (numcld--) {\n        raxNode *child;\n        memcpy(&child, cldptr, sizeof(child));\n        cldptr++;\n        printf(\"%p \", (void *)child);\n    }\n    printf(\"\\n\");\n    fflush(stdout);\n}\n\n/* Touch all the nodes of a tree returning a check sum. This is useful\n * in order to make Valgrind detect if there is something wrong while\n * reading the data structure.\n *\n * This function was used in order to identify Rax bugs after a big refactoring\n * using this technique:\n *\n * 1. The rax-test is executed using Valgrind, adding a printf() so that for\n *    the fuzz tester we see what iteration in the loop we are in.\n * 2. After every modification of the radix tree made by the fuzz tester\n *    in rax-test.c, we add a call to raxTouch().\n * 3. Now as soon as an operation will corrupt the tree, raxTouch() will\n *    detect it (via Valgrind) immediately. We can add more calls to narrow\n *    the state.\n * 4. At this point a good idea is to enable Rax debugging messages immediately\n *    before the moment the tree is corrupted, to see what happens.\n */\nunsigned long raxTouch(raxNode *n) {\n    debugf(\"Touching %p\\n\", (void *)n);\n    unsigned long sum = 0;\n    if (n->iskey) {\n        sum += (unsigned long)raxGetData(n);\n    }\n\n    int numchildren = n->iscompr ? 1 : n->size;\n    raxNode **cp = raxNodeFirstChildPtr(n);\n    int count = 0;\n    for (int i = 0; i < numchildren; i++) {\n        if (numchildren > 1) {\n            sum += (long)n->data[i];\n        }\n        raxNode *child;\n        memcpy(&child, cp, sizeof(child));\n        if (child == (void *)0x65d1760) count++;\n        if (count > 1) exit(1);\n        sum += raxTouch(child);\n        cp++;\n    }\n    return sum;\n}\n\nint checkedRaxRemove(rax *rax, unsigned char *s, size_t len, void **old) {\n  int res = raxRemove(rax, s, len, old);\n  if(res == 0) {\n    // lp freed but node not removed!\n    fprintf(stderr, \"Error: corrupted listpack found.\");\n    abort();\n  }\n  return res;\n}\n"
  },
  {
    "path": "src/redis/rax.h",
    "content": "/* Rax -- A radix tree implementation.\n *\n * Copyright (c) 2017-2018, Redis Ltd.\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef RAX_H\n#define RAX_H\n\n#include <stdint.h>\n\n/* Representation of a radix tree as implemented in this file, that contains\n * the strings \"foo\", \"foobar\" and \"footer\" after the insertion of each\n * word. When the node represents a key inside the radix tree, we write it\n * between [], otherwise it is written between ().\n *\n * This is the vanilla representation:\n *\n *              (f) \"\"\n *                \\\n *                (o) \"f\"\n *                  \\\n *                  (o) \"fo\"\n *                    \\\n *                  [t   b] \"foo\"\n *                  /     \\\n *         \"foot\" (e)     (a) \"foob\"\n *                /         \\\n *      \"foote\" (r)         (r) \"fooba\"\n *              /             \\\n *    \"footer\" []             [] \"foobar\"\n *\n * However, this implementation implements a very common optimization where\n * successive nodes having a single child are \"compressed\" into the node\n * itself as a string of characters, each representing a next-level child,\n * and only the link to the node representing the last character node is\n * provided inside the representation. So the above representation is turned\n * into:\n *\n *                  [\"foo\"] \"\"\n *                     |\n *                  [t   b] \"foo\"\n *                  /     \\\n *        \"foot\" (\"er\")    (\"ar\") \"foob\"\n *                 /          \\\n *       \"footer\" []          [] \"foobar\"\n *\n * However this optimization makes the implementation a bit more complex.\n * For instance if a key \"first\" is added in the above radix tree, a\n * \"node splitting\" operation is needed, since the \"foo\" prefix is no longer\n * composed of nodes having a single child one after the other. This is the\n * above tree and the resulting node splitting after this event happens:\n *\n *\n *                    (f) \"\"\n *                    /\n *                 (i o) \"f\"\n *                 /   \\\n *    \"firs\"  (\"rst\")  (o) \"fo\"\n *              /        \\\n *    \"first\" []       [t   b] \"foo\"\n *                     /     \\\n *           \"foot\" (\"er\")    (\"ar\") \"foob\"\n *                    /          \\\n *          \"footer\" []          [] \"foobar\"\n *\n * Similarly after deletion, if a new chain of nodes having a single child\n * is created (the chain must also not include nodes that represent keys),\n * it must be compressed back into a single node.\n *\n */\n\n#define RAX_NODE_MAX_SIZE ((1 << 29) - 1)\ntypedef struct raxNode {\n    uint32_t iskey : 1;   /* Does this node contain a key? */\n    uint32_t isnull : 1;  /* Associated value is NULL (don't store it). */\n    uint32_t iscompr : 1; /* Node is compressed. */\n    uint32_t size : 29;   /* Number of children, or compressed string len. */\n    /* Data layout is as follows:\n     *\n     * If node is not compressed we have 'size' bytes, one for each children\n     * character, and 'size' raxNode pointers, point to each child node.\n     * Note how the character is not stored in the children but in the\n     * edge of the parents:\n     *\n     * [header iscompr=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?)\n     *\n     * if node is compressed (iscompr bit is 1) the node has 1 children.\n     * In that case the 'size' bytes of the string stored immediately at\n     * the start of the data section, represent a sequence of successive\n     * nodes linked one after the other, for which only the last one in\n     * the sequence is actually represented as a node, and pointed to by\n     * the current compressed node.\n     *\n     * [header iscompr=1][xyz][z-ptr](value-ptr?)\n     *\n     * Both compressed and not compressed nodes can represent a key\n     * with associated data in the radix tree at any level (not just terminal\n     * nodes).\n     *\n     * If the node has an associated key (iskey=1) and is not NULL\n     * (isnull=0), then after the raxNode pointers pointing to the\n     * children, an additional value pointer is present (as you can see\n     * in the representation above as \"value-ptr\" field).\n     */\n    unsigned char data[];\n} raxNode;\n\ntypedef struct rax {\n    raxNode *head;     /* Pointer to root node of tree */\n    uint64_t numele;   /* Number of keys in the tree */\n    uint64_t numnodes; /* Number of rax nodes in the tree */\n    size_t alloc_size; /* Total allocation size of the tree in bytes */\n} rax;\n\n/* Stack data structure used by raxLowWalk() in order to, optionally, return\n * a list of parent nodes to the caller. The nodes do not have a \"parent\"\n * field for space concerns, so we use the auxiliary stack when needed. */\n#define RAX_STACK_STATIC_ITEMS 32\ntypedef struct raxStack {\n    void **stack;           /* Points to static_items or an heap allocated array. */\n    size_t items, maxitems; /* Number of items contained and total space. */\n    /* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap\n     * and use this static array of pointers instead. */\n    void *static_items[RAX_STACK_STATIC_ITEMS];\n    int oom; /* True if pushing into this stack failed for OOM at some point. */\n} raxStack;\n\n/* Optional callback used for iterators and be notified on each rax node,\n * including nodes not representing keys. If the callback returns true\n * the callback changed the node pointer in the iterator structure, and the\n * iterator implementation will have to replace the pointer in the radix tree\n * internals. This allows the callback to reallocate the node to perform\n * very special operations, normally not needed by normal applications.\n *\n * This callback is used to perform very low level analysis of the radix tree\n * structure, scanning each possible node (but the root node), or in order to\n * reallocate the nodes to reduce the allocation fragmentation (this is the\n * server's application for this callback).\n *\n * This is currently only supported in forward iterations (raxNext) */\ntypedef int (*raxNodeCallback)(raxNode **noderef);\n\n/* Radix tree iterator state is encapsulated into this data structure. */\n#define RAX_ITER_STATIC_LEN 128\n#define RAX_ITER_JUST_SEEKED (1 << 0) /* Iterator was just seeked. Return current \\\n                                         element for the first iteration and      \\\n                                         clear the flag. */\n#define RAX_ITER_EOF (1 << 1)         /* End of iteration reached. */\n#define RAX_ITER_SAFE (1 << 2)        /* Safe iterator, allows operations while \\\n                                         iterating. But it is slower. */\ntypedef struct raxIterator {\n    int flags;\n    rax *rt;            /* Radix tree we are iterating. */\n    unsigned char *key; /* The current string. */\n    void *data;         /* Data associated to this key. */\n    size_t key_len;     /* Current key length. */\n    size_t key_max;     /* Max key len the current key buffer can hold. */\n    unsigned char key_static_string[RAX_ITER_STATIC_LEN];\n    raxNode *node;           /* Current node. Only for unsafe iteration. */\n    raxStack stack;          /* Stack used for unsafe iteration. */\n    raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */\n} raxIterator;\n\n/* Exported API. */\nrax *raxNew(void);\nint raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);\nint raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);\nint raxRemove(rax *rax, unsigned char *s, size_t len, void **old);\nint raxFind(rax *rax, unsigned char *s, size_t len, void **value);\nvoid raxFree(rax *rax);\nvoid raxFreeWithCallback(rax *rax, void (*free_callback)(void*));\nvoid raxFreeWithCallbackAndArgument(rax *rax, void (*free_callback)(void*, void*), void* argument);\nvoid raxStart(raxIterator *it, rax *rt);\nint raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);\nint raxNext(raxIterator *it);\nint raxPrev(raxIterator *it);\nint raxRandomWalk(raxIterator *it, size_t steps);\nint raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len);\nvoid raxStop(raxIterator *it);\nint raxEOF(raxIterator *it);\nvoid raxShow(rax *rax);\nuint64_t raxSize(rax *rax);\nsize_t raxAllocSize(rax *rax);\nunsigned long raxTouch(raxNode *n);\nvoid raxSetDebugMsg(int onoff);\n\nint checkedRaxRemove(rax *rax, unsigned char *s, size_t len, void **old);\n\n/* Internal API. May be used by the node callback in order to access rax nodes\n * in a low level way, so this function is exported as well. */\nvoid raxSetData(raxNode *n, void *data);\n\n#endif\n"
  },
  {
    "path": "src/redis/rax_malloc.h",
    "content": "/* Rax -- A radix tree implementation.\n *\n * Copyright (c) 2017, Redis Ltd.\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n/* Allocator selection.\n *\n * This file is used in order to change the Rax allocator at compile time.\n * Just define the following defines to what you want to use. Also add\n * the include of your alternate allocator if needed (not needed in order\n * to use the default libc allocator). */\n\n#ifndef RAX_ALLOC_H\n#define RAX_ALLOC_H\n#include \"zmalloc.h\"\n#define rax_malloc zmalloc\n#define rax_realloc zrealloc\n#define rax_free zfree\n#define rax_ptr_alloc_size zmalloc_size\n#endif\n"
  },
  {
    "path": "src/redis/rdb.h",
    "content": "/*\n * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef __RDB_H\n#define __RDB_H\n\n#include <limits.h>\n#include <stdio.h>\n#include <time.h>\n\n#include \"redis_aux.h\"\n\n/* The current RDB version. When the format changes in a way that is no longer\n * backward compatible this number gets incremented. */\n#define RDB_VERSION 12\n\n/* We would like to serialize to version 9 such that our rdb files\n * can be loaded by redis version 6 (RDB_VERSION 9) */\n#define RDB_SER_VERSION 9\n\n/* Defines related to the dump file format. To store 32 bits lengths for short\n * keys requires a lot of space, so we check the most significant 2 bits of\n * the first byte to interpreter the length:\n *\n * 00|XXXXXX => if the two MSB are 00 the len is the 6 bits of this byte\n * 01|XXXXXX XXXXXXXX =>  01, the len is 14 bits, 6 bits + 8 bits of next byte\n * 10|000000 [32 bit integer] => A full 32 bit len in net byte order will follow\n * 10|000001 [64 bit integer] => A full 64 bit len in net byte order will follow\n * 11|OBKIND this means: specially encoded object will follow. The six bits\n *           number specify the kind of object that follows.\n *           See the RDB_ENC_* defines.\n *\n * Lengths up to 63 are stored using a single byte, most DB keys, and may\n * values, will fit inside. */\n#define RDB_6BITLEN 0\n#define RDB_14BITLEN 1\n#define RDB_32BITLEN 0x80\n#define RDB_64BITLEN 0x81\n#define RDB_ENCVAL 3\n#define RDB_LENERR UINT64_MAX\n\n/* When a length of a string object stored on disk has the first two bits\n * set, the remaining six bits specify a special encoding for the object\n * accordingly to the following defines: */\n#define RDB_ENC_INT8 0        /* 8 bit signed integer */\n#define RDB_ENC_INT16 1       /* 16 bit signed integer */\n#define RDB_ENC_INT32 2       /* 32 bit signed integer */\n#define RDB_ENC_LZF 3         /* string compressed with FASTLZ */\n\n/* Map object types to RDB object types. Macros starting with OBJ_ are for\n * memory storage and may change. Instead RDB types must be fixed because\n * we store them on disk. */\n#define RDB_TYPE_STRING 0\n#define RDB_TYPE_LIST   1\n#define RDB_TYPE_SET    2\n#define RDB_TYPE_ZSET   3\n#define RDB_TYPE_HASH   4\n#define RDB_TYPE_ZSET_2 5 /* ZSET version 2 with doubles stored in binary. */\n#define RDB_TYPE_MODULE 6\n#define RDB_TYPE_MODULE_PRE_GA 6 /* Used in 4.0 release candidates */\n#define RDB_TYPE_MODULE_2 7 /* Module value with annotations for parsing without\n                               the generating module being loaded. */\n/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */\n\n/* Object types for encoded objects. */\n#define RDB_TYPE_HASH_ZIPMAP    9\n#define RDB_TYPE_LIST_ZIPLIST  10\n#define RDB_TYPE_SET_INTSET    11\n#define RDB_TYPE_ZSET_ZIPLIST  12\n#define RDB_TYPE_HASH_ZIPLIST  13\n#define RDB_TYPE_LIST_QUICKLIST 14\n#define RDB_TYPE_STREAM_LISTPACKS 15\n#define RDB_TYPE_HASH_LISTPACK 16\n#define RDB_TYPE_ZSET_LISTPACK 17\n#define RDB_TYPE_LIST_QUICKLIST_2   18\n#define RDB_TYPE_STREAM_LISTPACKS_2 19\n#define RDB_TYPE_SET_LISTPACK  20\n#define RDB_TYPE_STREAM_LISTPACKS_3 21\n/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */\n\n/* Test if a type is an object type. */\n#define __rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 21))\n\n/* Range 200-240 is used by Dragonfly specific opcodes */\n\n/* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */\n#define RDB_OPCODE_SLOT_INFO  244   /* Individual slot info, such as slot id and size (cluster mode only). */\n#define RDB_OPCODE_FUNCTION   246   /* engine data */\n#define RDB_OPCODE_FUNCTION2  245   /* function library data */\n#define RDB_OPCODE_FUNCTION_PRE_GA   246   /* old function library data for 7.0 rc1 and rc2 */\n#define RDB_OPCODE_MODULE_AUX 247   /* Module auxiliary data. */\n#define RDB_OPCODE_IDLE       248   /* LRU idle time. */\n#define RDB_OPCODE_FREQ       249   /* LFU frequency. */\n#define RDB_OPCODE_AUX        250   /* RDB aux field. */\n#define RDB_OPCODE_RESIZEDB   251   /* Hash table resize hint. */\n#define RDB_OPCODE_EXPIRETIME_MS 252    /* Expire time in milliseconds. */\n#define RDB_OPCODE_EXPIRETIME 253       /* Old expire time in seconds. */\n#define RDB_OPCODE_SELECTDB   254   /* DB number of the following keys. */\n#define RDB_OPCODE_EOF        255   /* End of the RDB file. */\n\n/* Module serialized values sub opcodes */\n#define RDB_MODULE_OPCODE_EOF   0   /* End of module value. */\n#define RDB_MODULE_OPCODE_SINT  1   /* Signed integer. */\n#define RDB_MODULE_OPCODE_UINT  2   /* Unsigned integer. */\n#define RDB_MODULE_OPCODE_FLOAT 3   /* Float. */\n#define RDB_MODULE_OPCODE_DOUBLE 4  /* Double. */\n#define RDB_MODULE_OPCODE_STRING 5  /* String. */\n\n/* rdbLoad...() functions flags. */\n#define RDB_LOAD_NONE   0\n#define RDB_LOAD_ENC    (1<<0)\n#define RDB_LOAD_PLAIN  (1<<1)\n#define RDB_LOAD_SDS    (1<<2)\n\n/* flags on the purpose of rdb save or load */\n#define RDBFLAGS_NONE 0                 /* No special RDB loading. */\n#define RDBFLAGS_AOF_PREAMBLE (1<<0)    /* Load/save the RDB as AOF preamble. */\n#define RDBFLAGS_REPLICATION (1<<1)     /* Load/save for SYNC. */\n#define RDBFLAGS_ALLOW_DUP (1<<2)       /* Allow duplicated keys when loading.*/\n#define RDBFLAGS_FEED_REPL (1<<3)       /* Feed replication stream when loading.*/\n#define RDBFLAGS_KEEP_CACHE (1<<4)      /* Don't reclaim cache after rdb file is generated */\n\n/* When rdbLoadObject() returns NULL, the err flag is\n * set to hold the type of error that occurred */\n#define RDB_LOAD_ERR_EMPTY_KEY  1   /* Error of empty key */\n#define RDB_LOAD_ERR_OTHER      2   /* Any other errors */\n\n// ROMAN: those constants should be factored out to redis_base.h or something.\n// Currently moved here from server.h\n#define LONG_STR_SIZE      21          /* Bytes needed for long -> str + '\\0' */\n\n#define REDIS_VERSION \"6.2.11\"\n\n#endif\n"
  },
  {
    "path": "src/redis/read.c",
    "content": "/*\n * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>\n * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>\n *\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <string.h>\n#include <stdlib.h>\n\n#include <unistd.h>\n#include <strings.h>\n\n#include <assert.h>\n#include <errno.h>\n#include <ctype.h>\n#include <limits.h>\n#include <math.h>\n\n#include \"sdsalloc.h\"\n#include \"read.h\"\n#include \"sds.h\"\n\n\n/* Initial size of our nested reply stack and how much we grow it when needd */\n#define REDIS_READER_STACK_SIZE 9\n\nstatic void __redisReaderSetError(redisReader *r, int type, const char *str) {\n    size_t len;\n\n    if (r->reply != NULL && r->fn && r->fn->freeObject) {\n        r->fn->freeObject(r->reply);\n        r->reply = NULL;\n    }\n\n    /* Clear input buffer on errors. */\n    sdsfree(r->buf);\n    r->buf = NULL;\n    r->pos = r->len = 0;\n\n    /* Reset task stack. */\n    r->ridx = -1;\n\n    /* Set error. */\n    r->err = type;\n    len = strlen(str);\n    len = len < (sizeof(r->errstr)-1) ? len : (sizeof(r->errstr)-1);\n    memcpy(r->errstr,str,len);\n    r->errstr[len] = '\\0';\n}\n\nstatic size_t chrtos(char *buf, size_t size, char byte) {\n    size_t len = 0;\n\n    switch(byte) {\n    case '\\\\':\n    case '\"':\n        len = snprintf(buf,size,\"\\\"\\\\%c\\\"\",byte);\n        break;\n    case '\\n': len = snprintf(buf,size,\"\\\"\\\\n\\\"\"); break;\n    case '\\r': len = snprintf(buf,size,\"\\\"\\\\r\\\"\"); break;\n    case '\\t': len = snprintf(buf,size,\"\\\"\\\\t\\\"\"); break;\n    case '\\a': len = snprintf(buf,size,\"\\\"\\\\a\\\"\"); break;\n    case '\\b': len = snprintf(buf,size,\"\\\"\\\\b\\\"\"); break;\n    default:\n        if (isprint(byte))\n            len = snprintf(buf,size,\"\\\"%c\\\"\",byte);\n        else\n            len = snprintf(buf,size,\"\\\"\\\\x%02x\\\"\",(unsigned char)byte);\n        break;\n    }\n\n    return len;\n}\n\nstatic void __redisReaderSetErrorProtocolByte(redisReader *r, char byte) {\n    char cbuf[8], sbuf[128];\n\n    chrtos(cbuf,sizeof(cbuf),byte);\n    snprintf(sbuf,sizeof(sbuf),\n        \"Protocol error, got %s as reply type byte\", cbuf);\n    __redisReaderSetError(r,REDIS_ERR_PROTOCOL,sbuf);\n}\n\nstatic void __redisReaderSetErrorOOM(redisReader *r) {\n    __redisReaderSetError(r,REDIS_ERR_OOM,\"Out of memory\");\n}\n\nstatic char *readBytes(redisReader *r, unsigned int bytes) {\n    char *p;\n    if (r->len-r->pos >= bytes) {\n        p = r->buf+r->pos;\n        r->pos += bytes;\n        return p;\n    }\n    return NULL;\n}\n\n/* Find pointer to \\r\\n. */\nstatic char *seekNewline(char *s, size_t len) {\n    char *ret;\n\n    /* We cannot match with fewer than 2 bytes */\n    if (len < 2)\n        return NULL;\n\n    /* Search up to len - 1 characters */\n    len--;\n\n    /* Look for the \\r */\n    while ((ret = memchr(s, '\\r', len)) != NULL) {\n        if (ret[1] == '\\n') {\n            /* Found. */\n            break;\n        }\n        /* Continue searching. */\n        ret++;\n        len -= ret - s;\n        s = ret;\n    }\n\n    return ret;\n}\n\n/* Convert a string into a long long. Returns REDIS_OK if the string could be\n * parsed into a (non-overflowing) long long, REDIS_ERR otherwise. The value\n * will be set to the parsed value when appropriate.\n *\n * Note that this function demands that the string strictly represents\n * a long long: no spaces or other characters before or after the string\n * representing the number are accepted, nor zeroes at the start if not\n * for the string \"0\" representing the zero number.\n *\n * Because of its strictness, it is safe to use this function to check if\n * you can convert a string into a long long, and obtain back the string\n * from the number without any loss in the string representation. */\nstatic int string2ll(const char *s, size_t slen, long long *value) {\n    const char *p = s;\n    size_t plen = 0;\n    int negative = 0;\n    unsigned long long v;\n\n    if (plen == slen)\n        return REDIS_ERR;\n\n    /* Special case: first and only digit is 0. */\n    if (slen == 1 && p[0] == '0') {\n        if (value != NULL) *value = 0;\n        return REDIS_OK;\n    }\n\n    if (p[0] == '-') {\n        negative = 1;\n        p++; plen++;\n\n        /* Abort on only a negative sign. */\n        if (plen == slen)\n            return REDIS_ERR;\n    }\n\n    /* First digit should be 1-9, otherwise the string should just be 0. */\n    if (p[0] >= '1' && p[0] <= '9') {\n        v = p[0]-'0';\n        p++; plen++;\n    } else if (p[0] == '0' && slen == 1) {\n        *value = 0;\n        return REDIS_OK;\n    } else {\n        return REDIS_ERR;\n    }\n\n    while (plen < slen && p[0] >= '0' && p[0] <= '9') {\n        if (v > (ULLONG_MAX / 10)) /* Overflow. */\n            return REDIS_ERR;\n        v *= 10;\n\n        if (v > (ULLONG_MAX - (p[0]-'0'))) /* Overflow. */\n            return REDIS_ERR;\n        v += p[0]-'0';\n\n        p++; plen++;\n    }\n\n    /* Return if not all bytes were used. */\n    if (plen < slen)\n        return REDIS_ERR;\n\n    if (negative) {\n        if (v > ((unsigned long long)(-(LLONG_MIN+1))+1)) /* Overflow. */\n            return REDIS_ERR;\n        if (value != NULL) *value = -v;\n    } else {\n        if (v > LLONG_MAX) /* Overflow. */\n            return REDIS_ERR;\n        if (value != NULL) *value = v;\n    }\n    return REDIS_OK;\n}\n\nstatic char *readLine(redisReader *r, int *_len) {\n    char *p, *s;\n    int len;\n\n    p = r->buf+r->pos;\n    s = seekNewline(p,(r->len-r->pos));\n    if (s != NULL) {\n        len = s-(r->buf+r->pos);\n        r->pos += len+2; /* skip \\r\\n */\n        if (_len) *_len = len;\n        return p;\n    }\n    return NULL;\n}\n\nstatic void moveToNextTask(redisReader *r) {\n    redisReadTask *cur, *prv;\n    while (r->ridx >= 0) {\n        /* Return a.s.a.p. when the stack is now empty. */\n        if (r->ridx == 0) {\n            r->ridx--;\n            return;\n        }\n\n        cur = r->task[r->ridx];\n        prv = r->task[r->ridx-1];\n        assert(prv->type == REDIS_REPLY_ARRAY ||\n               prv->type == REDIS_REPLY_MAP ||\n               prv->type == REDIS_REPLY_ATTR ||\n               prv->type == REDIS_REPLY_SET ||\n               prv->type == REDIS_REPLY_PUSH);\n        if (cur->idx == prv->elements-1) {\n            r->ridx--;\n        } else {\n            /* Reset the type because the next item can be anything */\n            assert(cur->idx < prv->elements);\n            cur->type = -1;\n            cur->elements = -1;\n            cur->idx++;\n            return;\n        }\n    }\n}\n\nstatic int processLineItem(redisReader *r) {\n    redisReadTask *cur = r->task[r->ridx];\n    void *obj;\n    char *p;\n    int len;\n\n    if ((p = readLine(r,&len)) != NULL) {\n        if (cur->type == REDIS_REPLY_INTEGER) {\n            long long v;\n\n            if (string2ll(p, len, &v) == REDIS_ERR) {\n                __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                        \"Bad integer value\");\n                return REDIS_ERR;\n            }\n\n            if (r->fn && r->fn->createInteger) {\n                obj = r->fn->createInteger(cur,v);\n            } else {\n                obj = (void*)REDIS_REPLY_INTEGER;\n            }\n        } else if (cur->type == REDIS_REPLY_DOUBLE) {\n            char buf[326], *eptr;\n            double d;\n\n            if ((size_t)len >= sizeof(buf)) {\n                __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                        \"Double value is too large\");\n                return REDIS_ERR;\n            }\n\n            memcpy(buf,p,len);\n            buf[len] = '\\0';\n\n            if (len == 3 && strcasecmp(buf,\"inf\") == 0) {\n                d = INFINITY; /* Positive infinite. */\n            } else if (len == 4 && strcasecmp(buf,\"-inf\") == 0) {\n                d = -INFINITY; /* Negative infinite. */\n            } else if ((len == 3 && strcasecmp(buf,\"nan\") == 0) ||\n                       (len == 4 && strcasecmp(buf, \"-nan\") == 0)) {\n                d = NAN; /* nan. */\n            } else {\n                d = strtod((char*)buf,&eptr);\n                /* RESP3 only allows \"inf\", \"-inf\", and finite values, while\n                 * strtod() allows other variations on infinity,\n                 * etc. We explicity handle our two allowed infinite cases and NaN\n                 * above, so strtod() should only result in finite values. */\n                if (buf[0] == '\\0' || eptr != &buf[len] || !isfinite(d)) {\n                    __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                            \"Bad double value\");\n                    return REDIS_ERR;\n                }\n            }\n\n            if (r->fn && r->fn->createDouble) {\n                obj = r->fn->createDouble(cur,d,buf,len);\n            } else {\n                obj = (void*)REDIS_REPLY_DOUBLE;\n            }\n        } else if (cur->type == REDIS_REPLY_NIL) {\n            if (len != 0) {\n                __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                        \"Bad nil value\");\n                return REDIS_ERR;\n            }\n\n            if (r->fn && r->fn->createNil)\n                obj = r->fn->createNil(cur);\n            else\n                obj = (void*)REDIS_REPLY_NIL;\n        } else if (cur->type == REDIS_REPLY_BOOL) {\n            int bval;\n\n            if (len != 1 || !strchr(\"tTfF\", p[0])) {\n                __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                        \"Bad bool value\");\n                return REDIS_ERR;\n            }\n\n            bval = p[0] == 't' || p[0] == 'T';\n            if (r->fn && r->fn->createBool)\n                obj = r->fn->createBool(cur,bval);\n            else\n                obj = (void*)REDIS_REPLY_BOOL;\n        } else if (cur->type == REDIS_REPLY_BIGNUM) {\n            /* Ensure all characters are decimal digits (with possible leading\n             * minus sign). */\n            for (int i = 0; i < len; i++) {\n                /* XXX Consider: Allow leading '+'? Error on leading '0's? */\n                if (i == 0 && p[0] == '-') continue;\n                if (p[i] < '0' || p[i] > '9') {\n                    __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                            \"Bad bignum value\");\n                    return REDIS_ERR;\n                }\n            }\n            if (r->fn && r->fn->createString)\n                obj = r->fn->createString(cur,p,len);\n            else\n                obj = (void*)REDIS_REPLY_BIGNUM;\n        } else {\n            /* Type will be error or status. */\n            for (int i = 0; i < len; i++) {\n                if (p[i] == '\\r' || p[i] == '\\n') {\n                    __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                            \"Bad simple string value\");\n                    return REDIS_ERR;\n                }\n            }\n            if (r->fn && r->fn->createString)\n                obj = r->fn->createString(cur,p,len);\n            else\n                obj = (void*)(uintptr_t)(cur->type);\n        }\n\n        if (obj == NULL) {\n            __redisReaderSetErrorOOM(r);\n            return REDIS_ERR;\n        }\n\n        /* Set reply if this is the root object. */\n        if (r->ridx == 0) r->reply = obj;\n        moveToNextTask(r);\n        return REDIS_OK;\n    }\n\n    return REDIS_ERR;\n}\n\nstatic int processBulkItem(redisReader *r) {\n    redisReadTask *cur = r->task[r->ridx];\n    void *obj = NULL;\n    char *p, *s;\n    long long len;\n    unsigned long bytelen;\n    int success = 0;\n\n    p = r->buf+r->pos;\n    s = seekNewline(p,r->len-r->pos);\n    if (s != NULL) {\n        p = r->buf+r->pos;\n        bytelen = s-(r->buf+r->pos)+2; /* include \\r\\n */\n\n        if (string2ll(p, bytelen - 2, &len) == REDIS_ERR) {\n            __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                    \"Bad bulk string length\");\n            return REDIS_ERR;\n        }\n\n        if (len < -1 || (LLONG_MAX > SIZE_MAX && len > (long long)SIZE_MAX)) {\n            __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                    \"Bulk string length out of range\");\n            return REDIS_ERR;\n        }\n\n        if (len == -1) {\n            /* The nil object can always be created. */\n            if (r->fn && r->fn->createNil)\n                obj = r->fn->createNil(cur);\n            else\n                obj = (void*)REDIS_REPLY_NIL;\n            success = 1;\n        } else {\n            /* Only continue when the buffer contains the entire bulk item. */\n            bytelen += len+2; /* include \\r\\n */\n            if (r->pos+bytelen <= r->len) {\n                if ((cur->type == REDIS_REPLY_VERB && len < 4) ||\n                    (cur->type == REDIS_REPLY_VERB && s[5] != ':'))\n                {\n                    __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                            \"Verbatim string 4 bytes of content type are \"\n                            \"missing or incorrectly encoded.\");\n                    return REDIS_ERR;\n                }\n                if (r->fn && r->fn->createString)\n                    obj = r->fn->createString(cur,s+2,len);\n                else\n                    obj = (void*)(uintptr_t)cur->type;\n                success = 1;\n            }\n        }\n\n        /* Proceed when obj was created. */\n        if (success) {\n            if (obj == NULL) {\n                __redisReaderSetErrorOOM(r);\n                return REDIS_ERR;\n            }\n\n            r->pos += bytelen;\n\n            /* Set reply if this is the root object. */\n            if (r->ridx == 0) r->reply = obj;\n            moveToNextTask(r);\n            return REDIS_OK;\n        }\n    }\n\n    return REDIS_ERR;\n}\n\nstatic int redisReaderGrow(redisReader *r) {\n    redisReadTask **aux;\n    int newlen;\n\n    /* Grow our stack size */\n    newlen = r->tasks + REDIS_READER_STACK_SIZE;\n    aux = s_realloc(r->task, sizeof(*r->task) * newlen);\n    if (aux == NULL)\n        goto oom;\n\n    r->task = aux;\n\n    /* Allocate new tasks */\n    for (; r->tasks < newlen; r->tasks++) {\n        r->task[r->tasks] = s_calloc(sizeof(**r->task));\n        if (r->task[r->tasks] == NULL)\n            goto oom;\n    }\n\n    return REDIS_OK;\noom:\n    __redisReaderSetErrorOOM(r);\n    return REDIS_ERR;\n}\n\n/* Process the array, map and set types. */\nstatic int processAggregateItem(redisReader *r) {\n    redisReadTask *cur = r->task[r->ridx];\n    void *obj;\n    char *p;\n    long long elements;\n    int root = 0, len;\n\n    if (r->ridx == r->tasks - 1) {\n        if (redisReaderGrow(r) == REDIS_ERR)\n            return REDIS_ERR;\n    }\n\n    if ((p = readLine(r,&len)) != NULL) {\n        if (string2ll(p, len, &elements) == REDIS_ERR) {\n            __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                    \"Bad multi-bulk length\");\n            return REDIS_ERR;\n        }\n\n        root = (r->ridx == 0);\n\n        if (elements < -1 || (LLONG_MAX > SIZE_MAX && elements > SIZE_MAX) ||\n            (r->maxelements > 0 && elements > r->maxelements))\n        {\n            __redisReaderSetError(r,REDIS_ERR_PROTOCOL,\n                    \"Multi-bulk length out of range\");\n            return REDIS_ERR;\n        }\n\n        if (elements == -1) {\n            if (r->fn && r->fn->createNil)\n                obj = r->fn->createNil(cur);\n            else\n                obj = (void*)REDIS_REPLY_NIL;\n\n            if (obj == NULL) {\n                __redisReaderSetErrorOOM(r);\n                return REDIS_ERR;\n            }\n\n            moveToNextTask(r);\n        } else {\n            if (cur->type == REDIS_REPLY_MAP || cur->type == REDIS_REPLY_ATTR) elements *= 2;\n\n            if (r->fn && r->fn->createArray)\n                obj = r->fn->createArray(cur,elements);\n            else\n                obj = (void*)(uintptr_t)cur->type;\n\n            if (obj == NULL) {\n                __redisReaderSetErrorOOM(r);\n                return REDIS_ERR;\n            }\n\n            /* Modify task stack when there are more than 0 elements. */\n            if (elements > 0) {\n                cur->elements = elements;\n                cur->obj = obj;\n                r->ridx++;\n                r->task[r->ridx]->type = -1;\n                r->task[r->ridx]->elements = -1;\n                r->task[r->ridx]->idx = 0;\n                r->task[r->ridx]->obj = NULL;\n                r->task[r->ridx]->parent = cur;\n                r->task[r->ridx]->privdata = r->privdata;\n            } else {\n                moveToNextTask(r);\n            }\n        }\n\n        /* Set reply if this is the root object. */\n        if (root) r->reply = obj;\n        return REDIS_OK;\n    }\n\n    return REDIS_ERR;\n}\n\nstatic int processItem(redisReader *r) {\n    redisReadTask *cur = r->task[r->ridx];\n    char *p;\n\n    /* check if we need to read type */\n    if (cur->type < 0) {\n        if ((p = readBytes(r,1)) != NULL) {\n            switch (p[0]) {\n            case '-':\n                cur->type = REDIS_REPLY_ERROR;\n                break;\n            case '+':\n                cur->type = REDIS_REPLY_STATUS;\n                break;\n            case ':':\n                cur->type = REDIS_REPLY_INTEGER;\n                break;\n            case ',':\n                cur->type = REDIS_REPLY_DOUBLE;\n                break;\n            case '_':\n                cur->type = REDIS_REPLY_NIL;\n                break;\n            case '$':\n                cur->type = REDIS_REPLY_STRING;\n                break;\n            case '*':\n                cur->type = REDIS_REPLY_ARRAY;\n                break;\n            case '%':\n                cur->type = REDIS_REPLY_MAP;\n                break;\n            case '|':\n                cur->type = REDIS_REPLY_ATTR;\n                break;\n            case '~':\n                cur->type = REDIS_REPLY_SET;\n                break;\n            case '#':\n                cur->type = REDIS_REPLY_BOOL;\n                break;\n            case '=':\n                cur->type = REDIS_REPLY_VERB;\n                break;\n            case '>':\n                cur->type = REDIS_REPLY_PUSH;\n                break;\n            case '(':\n                cur->type = REDIS_REPLY_BIGNUM;\n                break;\n            default:\n                __redisReaderSetErrorProtocolByte(r,*p);\n                return REDIS_ERR;\n            }\n        } else {\n            /* could not consume 1 byte */\n            return REDIS_ERR;\n        }\n    }\n\n    /* process typed item */\n    switch(cur->type) {\n    case REDIS_REPLY_ERROR:\n    case REDIS_REPLY_STATUS:\n    case REDIS_REPLY_INTEGER:\n    case REDIS_REPLY_DOUBLE:\n    case REDIS_REPLY_NIL:\n    case REDIS_REPLY_BOOL:\n    case REDIS_REPLY_BIGNUM:\n        return processLineItem(r);\n    case REDIS_REPLY_STRING:\n    case REDIS_REPLY_VERB:\n        return processBulkItem(r);\n    case REDIS_REPLY_ARRAY:\n    case REDIS_REPLY_MAP:\n    case REDIS_REPLY_ATTR:\n    case REDIS_REPLY_SET:\n    case REDIS_REPLY_PUSH:\n        return processAggregateItem(r);\n    default:\n        assert(NULL);\n        return REDIS_ERR; /* Avoid warning. */\n    }\n}\n\nredisReader *redisReaderCreateWithFunctions(redisReplyObjectFunctions *fn) {\n    redisReader *r;\n\n    r = s_calloc(sizeof(redisReader));\n    if (r == NULL)\n        return NULL;\n\n    r->buf = sdsempty();\n    if (r->buf == NULL)\n        goto oom;\n\n    r->task = s_calloc(REDIS_READER_STACK_SIZE * sizeof(*r->task));\n    if (r->task == NULL)\n        goto oom;\n\n    for (; r->tasks < REDIS_READER_STACK_SIZE; r->tasks++) {\n        r->task[r->tasks] = s_calloc(sizeof(**r->task));\n        if (r->task[r->tasks] == NULL)\n            goto oom;\n    }\n\n    r->fn = fn;\n    r->maxbuf = REDIS_READER_MAX_BUF;\n    r->maxelements = REDIS_READER_MAX_ARRAY_ELEMENTS;\n    r->ridx = -1;\n\n    return r;\noom:\n    redisReaderFree(r);\n    return NULL;\n}\n\nvoid redisReaderFree(redisReader *r) {\n    if (r == NULL)\n        return;\n\n    if (r->reply != NULL && r->fn && r->fn->freeObject)\n        r->fn->freeObject(r->reply);\n\n    if (r->task) {\n        /* We know r->task[i] is allocated if i < r->tasks */\n        for (int i = 0; i < r->tasks; i++) {\n            s_free(r->task[i]);\n        }\n\n        s_free(r->task);\n    }\n\n    sdsfree(r->buf);\n    s_free(r);\n}\n\nint redisReaderFeed(redisReader *r, const char *buf, size_t len) {\n    sds newbuf;\n\n    /* Return early when this reader is in an erroneous state. */\n    if (r->err)\n        return REDIS_ERR;\n\n    /* Copy the provided buffer. */\n    if (buf != NULL && len >= 1) {\n        /* Destroy internal buffer when it is empty and is quite large. */\n        if (r->len == 0 && r->maxbuf != 0 && sdsavail(r->buf) > r->maxbuf) {\n            sdsfree(r->buf);\n            r->buf = sdsempty();\n            if (r->buf == 0) goto oom;\n\n            r->pos = 0;\n        }\n\n        newbuf = sdscatlen(r->buf,buf,len);\n        if (newbuf == NULL) goto oom;\n\n        r->buf = newbuf;\n        r->len = sdslen(r->buf);\n    }\n\n    return REDIS_OK;\noom:\n    __redisReaderSetErrorOOM(r);\n    return REDIS_ERR;\n}\n\nint redisReaderGetReply(redisReader *r, void **reply) {\n    /* Default target pointer to NULL. */\n    if (reply != NULL)\n        *reply = NULL;\n\n    /* Return early when this reader is in an erroneous state. */\n    if (r->err)\n        return REDIS_ERR;\n\n    /* When the buffer is empty, there will never be a reply. */\n    if (r->len == 0)\n        return REDIS_OK;\n\n    /* Set first item to process when the stack is empty. */\n    if (r->ridx == -1) {\n        r->task[0]->type = -1;\n        r->task[0]->elements = -1;\n        r->task[0]->idx = -1;\n        r->task[0]->obj = NULL;\n        r->task[0]->parent = NULL;\n        r->task[0]->privdata = r->privdata;\n        r->ridx = 0;\n    }\n\n    /* Process items in reply. */\n    while (r->ridx >= 0)\n        if (processItem(r) != REDIS_OK)\n            break;\n\n    /* Return ASAP when an error occurred. */\n    if (r->err)\n        return REDIS_ERR;\n\n    /* Discard part of the buffer when we've consumed at least 1k, to avoid\n     * doing unnecessary calls to memmove() in sds.c. */\n    if (r->pos >= 1024) {\n        if (sdsrange(r->buf,r->pos,-1) < 0) return REDIS_ERR;\n        r->pos = 0;\n        r->len = sdslen(r->buf);\n    }\n\n    /* Emit a reply when there is one. */\n    if (r->ridx == -1) {\n        if (reply != NULL) {\n            *reply = r->reply;\n        } else if (r->reply != NULL && r->fn && r->fn->freeObject) {\n            r->fn->freeObject(r->reply);\n        }\n        r->reply = NULL;\n    }\n    return REDIS_OK;\n}\n"
  },
  {
    "path": "src/redis/read.h",
    "content": "/*\n * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>\n * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>\n *\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n\n#ifndef __HIREDIS_READ_H\n#define __HIREDIS_READ_H\n#include <stdio.h> /* for size_t */\n\n#define REDIS_ERR -1\n#define REDIS_OK 0\n\n/* When an error occurs, the err flag in a context is set to hold the type of\n * error that occurred. REDIS_ERR_IO means there was an I/O error and you\n * should use the \"errno\" variable to find out what is wrong.\n * For other values, the \"errstr\" field will hold a description. */\n#define REDIS_ERR_IO 1 /* Error in read or write */\n#define REDIS_ERR_EOF 3 /* End of file */\n#define REDIS_ERR_PROTOCOL 4 /* Protocol error */\n#define REDIS_ERR_OOM 5 /* Out of memory */\n#define REDIS_ERR_TIMEOUT 6 /* Timed out */\n#define REDIS_ERR_OTHER 2 /* Everything else... */\n\n#define REDIS_REPLY_STRING 1\n#define REDIS_REPLY_ARRAY 2\n#define REDIS_REPLY_INTEGER 3\n#define REDIS_REPLY_NIL 4\n#define REDIS_REPLY_STATUS 5\n#define REDIS_REPLY_ERROR 6\n#define REDIS_REPLY_DOUBLE 7\n#define REDIS_REPLY_BOOL 8\n#define REDIS_REPLY_MAP 9\n#define REDIS_REPLY_SET 10\n#define REDIS_REPLY_ATTR 11\n#define REDIS_REPLY_PUSH 12\n#define REDIS_REPLY_BIGNUM 13\n#define REDIS_REPLY_VERB 14\n\n/* Default max unused reader buffer. */\n#define REDIS_READER_MAX_BUF (1024*16)\n\n/* Default multi-bulk element limit */\n#define REDIS_READER_MAX_ARRAY_ELEMENTS ((1LL<<32) - 1)\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\ntypedef struct redisReadTask {\n    int type;\n    long long elements; /* number of elements in multibulk container */\n    int idx; /* index in parent (array) object */\n    void *obj; /* holds user-generated value for a read task */\n    struct redisReadTask *parent; /* parent task */\n    void *privdata; /* user-settable arbitrary field */\n} redisReadTask;\n\ntypedef struct redisReplyObjectFunctions {\n    void *(*createString)(const redisReadTask*, char*, size_t);\n    void *(*createArray)(const redisReadTask*, size_t);\n    void *(*createInteger)(const redisReadTask*, long long);\n    void *(*createDouble)(const redisReadTask*, double, char*, size_t);\n    void *(*createNil)(const redisReadTask*);\n    void *(*createBool)(const redisReadTask*, int);\n    void (*freeObject)(void*);\n} redisReplyObjectFunctions;\n\ntypedef struct redisReader {\n    int err; /* Error flags, 0 when there is no error */\n    char errstr[128]; /* String representation of error when applicable */\n\n    char *buf; /* Read buffer */\n    size_t pos; /* Buffer cursor */\n    size_t len; /* Buffer length */\n    size_t maxbuf; /* Max length of unused buffer */\n    long long maxelements; /* Max multi-bulk elements */\n\n    redisReadTask **task;\n    int tasks;\n\n    int ridx; /* Index of current read task */\n    void *reply; /* Temporary reply pointer */\n\n    redisReplyObjectFunctions *fn;\n    void *privdata;\n} redisReader;\n\n/* Public API for the protocol parser. */\nredisReader *redisReaderCreateWithFunctions(redisReplyObjectFunctions *fn);\nvoid redisReaderFree(redisReader *r);\nint redisReaderFeed(redisReader *r, const char *buf, size_t len);\nint redisReaderGetReply(redisReader *r, void **reply);\n\n#define redisReaderSetPrivdata(_r, _p) (int)(((redisReader*)(_r))->privdata = (_p))\n#define redisReaderGetObject(_r) (((redisReader*)(_r))->reply)\n#define redisReaderGetError(_r) (((redisReader*)(_r))->errstr)\n\n#ifdef __cplusplus\n}\n#endif\n\n#endif\n"
  },
  {
    "path": "src/redis/redis_aux.c",
    "content": "#include \"redis_aux.h\"\n\n#include <string.h>\n#include <unistd.h>\n\n#include \"crc64.h\"\n#include \"endianconv.h\"\n#include \"zmalloc.h\"\n\nServer server;\n\nvoid InitRedisTables() {\n  crc64_init();\n  memset(&server, 0, sizeof(server));\n\n  server.max_map_field_len = 64;\n  server.max_listpack_map_bytes = 1024;\n\n  server.stream_node_max_entries = 100;\n}\n\n/* Toggle the 64 bit unsigned integer pointed by *p from little endian to\n * big endian */\nvoid memrev64(void* p) {\n  unsigned char *x = p, t;\n\n  t = x[0];\n  x[0] = x[7];\n  x[7] = t;\n  t = x[1];\n  x[1] = x[6];\n  x[6] = t;\n  t = x[2];\n  x[2] = x[5];\n  x[5] = t;\n  t = x[3];\n  x[3] = x[4];\n  x[4] = t;\n}\n\n// used by t_stream.c\nuint64_t intrev64(uint64_t v) {\n  memrev64(&v);\n  return v;\n}\n"
  },
  {
    "path": "src/redis/redis_aux.h",
    "content": "#ifndef __REDIS_AUX_H\n#define __REDIS_AUX_H\n\n#include \"sds.h\"\n\n/* redis.h auxiliary definitions */\n/* the last one in object.h is OBJ_STREAM and it is 6,\n * this will add enough place for Redis types to grow */\n#define OBJ_JSON 15U\n#define OBJ_SBF  16U\n#define OBJ_CMS  17U\n#define OBJ_TOPK 18U\n\n// A pseudo type for keys stored in the db, same as OBJ_MODULE which is not used in Dragonfly.\n#define OBJ_KEY  5U\n\n/* How many types of objects exist */\n#define OBJ_TYPE_MAX 19U\n\n#define CONFIG_RUN_ID_SIZE 40U\n\ntypedef struct ServerStub {\n  size_t max_map_field_len, max_listpack_map_bytes;\n\n  long long stream_node_max_entries;\n} Server;\n\nextern Server server;\n\n#define ZSET_MAX_LISTPACK_ENTRIES 128\n#define ZSET_MAX_LISTPACK_VALUE 32\n\nvoid InitRedisTables();\n\n/* The actual Redis Object */\n#define OBJ_STRING 0U    /* String object. */\n#define OBJ_LIST 1U      /* List object. */\n#define OBJ_SET 2U       /* Set object. */\n#define OBJ_ZSET 3U      /* Sorted set object. */\n#define OBJ_HASH 4U      /* Hash object. */\n#define OBJ_MODULE 5U    /* Module object. */\n#define OBJ_STREAM 6U    /* Stream object. */\n\n/* Objects encoding. Some kind of objects like Strings and Hashes can be\n * internally represented in multiple ways. The 'encoding' field of the object\n * is set to one of this fields for this object. */\n#define OBJ_ENCODING_RAW 0U     /* Raw representation */\n#define OBJ_ENCODING_INT 1U     /* Encoded as integer */\n#define OBJ_ENCODING_HT 2U      /* Encoded as hash table */\n#define OBJ_ENCODING_ZIPMAP 3U  /* Encoded as zipmap */\n#define OBJ_ENCODING_LINKEDLIST 4U /* No longer used: old list encoding. */\n#define OBJ_ENCODING_ZIPLIST 5U /* Encoded as ziplist */\n#define OBJ_ENCODING_INTSET 6U  /* Encoded as intset */\n#define OBJ_ENCODING_SKIPLIST 7U  /* Encoded as skiplist */\n#define OBJ_ENCODING_EMBSTR 8U  /* Embedded sds string encoding */\n// #define OBJ_ENCODING_QUICKLIST 9U /* Encoded as linked list of ziplists */\n#define OBJ_ENCODING_STREAM 10U /* Encoded as a radix tree of listpacks */\n#define OBJ_ENCODING_LISTPACK 11 /* Encoded as a listpack */\n#define OBJ_ENCODING_COMPRESS_INTERNAL 15U  /* Kept as lzf compressed, to pass compressed blob to another thread */\n\n\n#endif /* __REDIS_AUX_H */\n"
  },
  {
    "path": "src/redis/sds.c",
    "content": "/* SDSLib 2.0 -- A C dynamic strings library\n *\n * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>\n * Copyright (c) 2015, Oran Agra\n * Copyright (c) 2015, Redis Labs, Inc\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <ctype.h>\n#include <assert.h>\n#include <limits.h>\n#include \"sds.h\"\n#include \"sdsalloc.h\"\n\nconst char *SDS_NOINIT = \"SDS_NOINIT\";\n\nstatic inline int sdsHdrSize(char type) {\n    switch(type&SDS_TYPE_MASK) {\n        case SDS_TYPE_5:\n            return sizeof(struct sdshdr5);\n        case SDS_TYPE_8:\n            return sizeof(struct sdshdr8);\n        case SDS_TYPE_16:\n            return sizeof(struct sdshdr16);\n        case SDS_TYPE_32:\n            return sizeof(struct sdshdr32);\n        case SDS_TYPE_64:\n            return sizeof(struct sdshdr64);\n    }\n    return 0;\n}\n\nstatic inline char sdsReqType(size_t string_size) {\n    if (string_size < 1<<5)\n        return SDS_TYPE_5;\n    if (string_size < 1<<8)\n        return SDS_TYPE_8;\n    if (string_size < 1<<16)\n        return SDS_TYPE_16;\n#if (LONG_MAX == LLONG_MAX)\n    if (string_size < 1ll<<32)\n        return SDS_TYPE_32;\n    return SDS_TYPE_64;\n#else\n    return SDS_TYPE_32;\n#endif\n}\n\nstatic inline size_t sdsTypeMaxSize(char type) {\n    if (type == SDS_TYPE_5)\n        return (1<<5) - 1;\n    if (type == SDS_TYPE_8)\n        return (1<<8) - 1;\n    if (type == SDS_TYPE_16)\n        return (1<<16) - 1;\n#if (LONG_MAX == LLONG_MAX)\n    if (type == SDS_TYPE_32)\n        return (1ll<<32) - 1;\n#endif\n    return -1; /* this is equivalent to the max SDS_TYPE_64 or SDS_TYPE_32 */\n}\n\n/* Create a new sds string with the content specified by the 'init' pointer\n * and 'initlen'.\n * If NULL is used for 'init' the string is initialized with zero bytes.\n * If SDS_NOINIT is used, the buffer is left uninitialized;\n *\n * The string is always null-terminated (all the sds strings are, always) so\n * even if you create an sds string with:\n *\n * mystring = sdsnewlen(\"abc\",3);\n *\n * You can print the string with printf() as there is an implicit \\0 at the\n * end of the string. However the string is binary safe and can contain\n * \\0 characters in the middle, as the length is stored in the sds header. */\nsds _sdsnewlen(const void *init, size_t initlen, int trymalloc) {\n    void *sh;\n    sds s;\n    char type = sdsReqType(initlen);\n    /* Empty strings are usually created in order to append. Use type 8\n     * since type 5 is not good at this. */\n    if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;\n    int hdrlen = sdsHdrSize(type);\n    unsigned char *fp; /* flags pointer. */\n    size_t usable;\n\n    assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow */\n    sh = trymalloc?\n        s_trymalloc_usable(hdrlen+initlen+1, &usable) :\n        s_malloc_usable(hdrlen+initlen+1, &usable);\n    if (sh == NULL) return NULL;\n    if (init==SDS_NOINIT)\n        init = NULL;\n    else if (!init)\n        memset(sh, 0, hdrlen+initlen+1);\n    s = (char*)sh+hdrlen;\n    fp = ((unsigned char*)s)-1;\n    usable = usable-hdrlen-1;\n    if (usable > sdsTypeMaxSize(type))\n        usable = sdsTypeMaxSize(type);\n    switch(type) {\n        case SDS_TYPE_5: {\n            *fp = type | (initlen << SDS_TYPE_BITS);\n            break;\n        }\n        case SDS_TYPE_8: {\n            SDS_HDR_VAR(8,s);\n            sh->len = initlen;\n            sh->alloc = usable;\n            *fp = type;\n            break;\n        }\n        case SDS_TYPE_16: {\n            SDS_HDR_VAR(16,s);\n            sh->len = initlen;\n            sh->alloc = usable;\n            *fp = type;\n            break;\n        }\n        case SDS_TYPE_32: {\n            SDS_HDR_VAR(32,s);\n            sh->len = initlen;\n            sh->alloc = usable;\n            *fp = type;\n            break;\n        }\n        case SDS_TYPE_64: {\n            SDS_HDR_VAR(64,s);\n            sh->len = initlen;\n            sh->alloc = usable;\n            *fp = type;\n            break;\n        }\n    }\n    if (initlen && init)\n        memcpy(s, init, initlen);\n    s[initlen] = '\\0';\n    return s;\n}\n\nsds sdsnewlen(const void *init, size_t initlen) {\n    return _sdsnewlen(init, initlen, 0);\n}\n\n/* Create an empty (zero length) sds string. Even in this case the string\n * always has an implicit null term. */\nsds sdsempty(void) {\n    return sdsnewlen(\"\",0);\n}\n\n/* Create a new sds string starting from a null terminated C string. */\nsds sdsnew(const char *init) {\n    size_t initlen = (init == NULL) ? 0 : strlen(init);\n    return sdsnewlen(init, initlen);\n}\n\n/* Duplicate an sds string. */\nsds sdsdup(const sds s) {\n    return sdsnewlen(s, sdslen(s));\n}\n\n/* Free an sds string. No operation is performed if 's' is NULL. */\nvoid sdsfree(sds s) {\n    if (s == NULL) return;\n    s_free((char*)s-sdsHdrSize(s[-1]));\n}\n\n/* Set the sds string length to the length as obtained with strlen(), so\n * considering as content only up to the first null term character.\n *\n * This function is useful when the sds string is hacked manually in some\n * way, like in the following example:\n *\n * s = sdsnew(\"foobar\");\n * s[2] = '\\0';\n * sdsupdatelen(s);\n * printf(\"%d\\n\", sdslen(s));\n *\n * The output will be \"2\", but if we comment out the call to sdsupdatelen()\n * the output will be \"6\" as the string was modified but the logical length\n * remains 6 bytes. */\nvoid sdsupdatelen(sds s) {\n    size_t reallen = strlen(s);\n    sdssetlen(s, reallen);\n}\n\n/* Modify an sds string in-place to make it empty (zero length).\n * However all the existing buffer is not discarded but set as free space\n * so that next append operations will not require allocations up to the\n * number of bytes previously available. */\nvoid sdsclear(sds s) {\n    sdssetlen(s, 0);\n    s[0] = '\\0';\n}\n\n/* Enlarge the free space at the end of the sds string so that the caller\n * is sure that after calling this function can overwrite up to addlen\n * bytes after the end of the string, plus one more byte for nul term.\n * If there's already sufficient free space, this function returns without any\n * action, if there isn't sufficient free space, it'll allocate what's missing,\n * and possibly more:\n * When greedy is 1, enlarge more than needed, to avoid need for future reallocs\n * on incremental growth.\n * When greedy is 0, enlarge just enough so that there's free space for 'addlen'.\n *\n * Note: this does not change the *length* of the sds string as returned\n * by sdslen(), but only the free buffer space we have. */\nsds _sdsMakeRoomFor(sds s, size_t addlen, int greedy) {\n    void *sh, *newsh;\n    size_t avail = sdsavail(s);\n    size_t len, newlen, reqlen;\n    char type, oldtype = s[-1] & SDS_TYPE_MASK;\n    int hdrlen;\n    size_t usable;\n\n    /* Return ASAP if there is enough space left. */\n    if (avail >= addlen) return s;\n\n    len = sdslen(s);\n    sh = (char*)s-sdsHdrSize(oldtype);\n    reqlen = newlen = (len+addlen);\n    (void)reqlen;\n    assert(newlen > len);   /* Catch size_t overflow */\n    if (greedy == 1) {\n    if (newlen < SDS_MAX_PREALLOC)\n        newlen *= 2;\n    else\n        newlen += SDS_MAX_PREALLOC;\n    }\n\n    type = sdsReqType(newlen);\n\n    /* Don't use type 5: the user is appending to the string and type 5 is\n     * not able to remember empty space, so sdsMakeRoomFor() must be called\n     * at every appending operation. */\n    if (type == SDS_TYPE_5) type = SDS_TYPE_8;\n\n    hdrlen = sdsHdrSize(type);\n    assert(hdrlen + newlen + 1 > reqlen);  /* Catch size_t overflow */\n    if (oldtype==type) {\n        newsh = s_realloc_usable(sh, hdrlen+newlen+1, &usable);\n        if (newsh == NULL) return NULL;\n        s = (char*)newsh+hdrlen;\n    } else {\n        /* Since the header size changes, need to move the string forward,\n         * and can't use realloc */\n        newsh = s_malloc_usable(hdrlen+newlen+1, &usable);\n        if (newsh == NULL) return NULL;\n        memcpy((char*)newsh+hdrlen, s, len+1);\n        s_free(sh);\n        s = (char*)newsh+hdrlen;\n        s[-1] = type;\n        sdssetlen(s, len);\n    }\n    usable = usable-hdrlen-1;\n    if (usable > sdsTypeMaxSize(type))\n        usable = sdsTypeMaxSize(type);\n    sdssetalloc(s, usable);\n    return s;\n}\n\n/* Enlarge the free space at the end of the sds string more than needed,\n * This is useful to avoid repeated re-allocations when repeatedly appending to the sds. */\nsds sdsMakeRoomFor(sds s, size_t addlen) {\n    return _sdsMakeRoomFor(s, addlen, 1);\n}\n\n/* Unlike sdsMakeRoomFor(), this one just grows to the necessary size. */\nsds sdsMakeRoomForNonGreedy(sds s, size_t addlen) {\n    return _sdsMakeRoomFor(s, addlen, 0);\n}\n\n/* Reallocate the sds string so that it has no free space at the end. The\n * contained string remains not altered, but next concatenation operations\n * will require a reallocation.\n *\n * After the call, the passed sds string is no longer valid and all the\n * references must be substituted with the new pointer returned by the call. */\nsds sdsRemoveFreeSpace(sds s) {\n    void *sh, *newsh;\n    char type, oldtype = s[-1] & SDS_TYPE_MASK;\n    int hdrlen, oldhdrlen = sdsHdrSize(oldtype);\n    size_t len = sdslen(s);\n    size_t avail = sdsavail(s);\n    sh = (char*)s-oldhdrlen;\n\n    /* Return ASAP if there is no space left. */\n    if (avail == 0) return s;\n\n    /* Check what would be the minimum SDS header that is just good enough to\n     * fit this string. */\n    type = sdsReqType(len);\n    hdrlen = sdsHdrSize(type);\n\n    /* If the type is the same, or at least a large enough type is still\n     * required, we just realloc(), letting the allocator to do the copy\n     * only if really needed. Otherwise if the change is huge, we manually\n     * reallocate the string to use the different header type. */\n    if (oldtype==type || type > SDS_TYPE_8) {\n        newsh = s_realloc(sh, oldhdrlen+len+1);\n        if (newsh == NULL) return NULL;\n        s = (char*)newsh+oldhdrlen;\n    } else {\n        newsh = s_malloc(hdrlen+len+1);\n        if (newsh == NULL) return NULL;\n        memcpy((char*)newsh+hdrlen, s, len+1);\n        s_free(sh);\n        s = (char*)newsh+hdrlen;\n        s[-1] = type;\n        sdssetlen(s, len);\n    }\n    sdssetalloc(s, len);\n    return s;\n}\n\n/* Resize the allocation, this can make the allocation bigger or smaller,\n * if the size is smaller than currently used len, the data will be truncated */\nsds sdsResize(sds s, size_t size) {\n    void *sh, *newsh;\n    char type, oldtype = s[-1] & SDS_TYPE_MASK;\n    int hdrlen, oldhdrlen = sdsHdrSize(oldtype);\n    size_t len = sdslen(s);\n    sh = (char*)s-oldhdrlen;\n\n    /* Return ASAP if the size is already good. */\n    if (sdsalloc(s) == size) return s;\n\n    /* Truncate len if needed. */\n    if (size < len) len = size;\n\n    /* Check what would be the minimum SDS header that is just good enough to\n     * fit this string. */\n    type = sdsReqType(size);\n    /* Don't use type 5, it is not good for strings that are resized. */\n    if (type == SDS_TYPE_5) type = SDS_TYPE_8;\n    hdrlen = sdsHdrSize(type);\n\n    /* If the type is the same, or can hold the size in it with low overhead\n     * (larger than SDS_TYPE_8), we just realloc(), letting the allocator\n     * to do the copy only if really needed. Otherwise if the change is\n     * huge, we manually reallocate the string to use the different header\n     * type. */\n    if (oldtype==type || (type < oldtype && type > SDS_TYPE_8)) {\n        newsh = s_realloc(sh, oldhdrlen+size+1);\n        if (newsh == NULL) return NULL;\n        s = (char*)newsh+oldhdrlen;\n    } else {\n        newsh = s_malloc(hdrlen+size+1);\n        if (newsh == NULL) return NULL;\n        memcpy((char*)newsh+hdrlen, s, len);\n        s_free(sh);\n        s = (char*)newsh+hdrlen;\n        s[-1] = type;\n    }\n    s[len] = 0;\n    sdssetlen(s, len);\n    sdssetalloc(s, size);\n    return s;\n}\n\n/* Return the total size of the allocation of the specified sds string,\n * including:\n * 1) The sds header before the pointer.\n * 2) The string.\n * 3) The free buffer at the end if any.\n * 4) The implicit null term.\n */\nsize_t sdsAllocSize(sds s) {\n    size_t alloc = sdsalloc(s);\n    return sdsHdrSize(s[-1])+alloc+1;\n}\n\n/* Return the pointer of the actual SDS allocation (normally SDS strings\n * are referenced by the start of the string buffer). */\nvoid *sdsAllocPtr(sds s) {\n    return (void*) (s-sdsHdrSize(s[-1]));\n}\n\n/* Increment the sds length and decrements the left free space at the\n * end of the string according to 'incr'. Also set the null term\n * in the new end of the string.\n *\n * This function is used in order to fix the string length after the\n * user calls sdsMakeRoomFor(), writes something after the end of\n * the current string, and finally needs to set the new length.\n *\n * Note: it is possible to use a negative increment in order to\n * right-trim the string.\n *\n * Usage example:\n *\n * Using sdsIncrLen() and sdsMakeRoomFor() it is possible to mount the\n * following schema, to cat bytes coming from the kernel to the end of an\n * sds string without copying into an intermediate buffer:\n *\n * oldlen = sdslen(s);\n * s = sdsMakeRoomFor(s, BUFFER_SIZE);\n * nread = read(fd, s+oldlen, BUFFER_SIZE);\n * ... check for nread <= 0 and handle it ...\n * sdsIncrLen(s, nread);\n */\nvoid sdsIncrLen(sds s, ssize_t incr) {\n    unsigned char flags = s[-1];\n    size_t len;\n    switch(flags&SDS_TYPE_MASK) {\n        case SDS_TYPE_5: {\n            unsigned char *fp = ((unsigned char*)s)-1;\n            unsigned char oldlen = SDS_TYPE_5_LEN(flags);\n            assert((incr > 0 && oldlen+incr < 32) || (incr < 0 && oldlen >= (unsigned int)(-incr)));\n            *fp = SDS_TYPE_5 | ((oldlen+incr) << SDS_TYPE_BITS);\n            len = oldlen+incr;\n            break;\n        }\n        case SDS_TYPE_8: {\n            SDS_HDR_VAR(8,s);\n            assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));\n            len = (sh->len += incr);\n            break;\n        }\n        case SDS_TYPE_16: {\n            SDS_HDR_VAR(16,s);\n            assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));\n            len = (sh->len += incr);\n            break;\n        }\n        case SDS_TYPE_32: {\n            SDS_HDR_VAR(32,s);\n            assert((incr >= 0 && sh->alloc-sh->len >= (unsigned int)incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));\n            len = (sh->len += incr);\n            break;\n        }\n        case SDS_TYPE_64: {\n            SDS_HDR_VAR(64,s);\n            assert((incr >= 0 && sh->alloc-sh->len >= (uint64_t)incr) || (incr < 0 && sh->len >= (uint64_t)(-incr)));\n            len = (sh->len += incr);\n            break;\n        }\n        default: len = 0; /* Just to avoid compilation warnings. */\n    }\n    s[len] = '\\0';\n}\n\n/* Grow the sds to have the specified length. Bytes that were not part of\n * the original length of the sds will be set to zero.\n *\n * if the specified length is smaller than the current length, no operation\n * is performed. */\nsds sdsgrowzero(sds s, size_t len) {\n    size_t curlen = sdslen(s);\n\n    if (len <= curlen) return s;\n    s = sdsMakeRoomFor(s,len-curlen);\n    if (s == NULL) return NULL;\n\n    /* Make sure added region doesn't contain garbage */\n    memset(s+curlen,0,(len-curlen+1)); /* also set trailing \\0 byte */\n    sdssetlen(s, len);\n    return s;\n}\n\n/* Append the specified binary-safe string pointed by 't' of 'len' bytes to the\n * end of the specified sds string 's'.\n *\n * After the call, the passed sds string is no longer valid and all the\n * references must be substituted with the new pointer returned by the call. */\nsds sdscatlen(sds s, const void *t, size_t len) {\n    size_t curlen = sdslen(s);\n\n    s = sdsMakeRoomFor(s,len);\n    if (s == NULL) return NULL;\n    memcpy(s+curlen, t, len);\n    sdssetlen(s, curlen+len);\n    s[curlen+len] = '\\0';\n    return s;\n}\n\n/* Append the specified null terminated C string to the sds string 's'.\n *\n * After the call, the passed sds string is no longer valid and all the\n * references must be substituted with the new pointer returned by the call. */\nsds sdscat(sds s, const char *t) {\n    return sdscatlen(s, t, strlen(t));\n}\n\n/* Append the specified sds 't' to the existing sds 's'.\n *\n * After the call, the modified sds string is no longer valid and all the\n * references must be substituted with the new pointer returned by the call. */\nsds sdscatsds(sds s, const sds t) {\n    return sdscatlen(s, t, sdslen(t));\n}\n\n/* Destructively modify the sds string 's' to hold the specified binary\n * safe string pointed by 't' of length 'len' bytes. */\nsds sdscpylen(sds s, const char *t, size_t len) {\n    if (sdsalloc(s) < len) {\n        s = sdsMakeRoomFor(s,len-sdslen(s));\n        if (s == NULL) return NULL;\n    }\n    memcpy(s, t, len);\n    s[len] = '\\0';\n    sdssetlen(s, len);\n    return s;\n}\n\n/* Like sdscpylen() but 't' must be a null-terminated string so that the length\n * of the string is obtained with strlen(). */\nsds sdscpy(sds s, const char *t) {\n    return sdscpylen(s, t, strlen(t));\n}\n\n/* Helper for sdscatlonglong() doing the actual number -> string\n * conversion. 's' must point to a string with room for at least\n * SDS_LLSTR_SIZE bytes.\n *\n * The function returns the length of the null-terminated string\n * representation stored at 's'. */\n#define SDS_LLSTR_SIZE 21\nint sdsll2str(char *s, long long value) {\n    char *p, aux;\n    unsigned long long v;\n    size_t l;\n\n    /* Generate the string representation, this method produces\n     * a reversed string. */\n    if (value < 0) {\n        /* Since v is unsigned, if value==LLONG_MIN, -LLONG_MIN will overflow. */\n        if (value != LLONG_MIN) {\n            v = -value;\n        } else {\n            v = ((unsigned long long)LLONG_MAX) + 1;\n        }\n    } else {\n        v = value;\n    }\n\n    p = s;\n    do {\n        *p++ = '0'+(v%10);\n        v /= 10;\n    } while(v);\n    if (value < 0) *p++ = '-';\n\n    /* Compute length and add null term. */\n    l = p-s;\n    *p = '\\0';\n\n    /* Reverse the string. */\n    p--;\n    while(s < p) {\n        aux = *s;\n        *s = *p;\n        *p = aux;\n        s++;\n        p--;\n    }\n    return l;\n}\n\n/* Identical sdsll2str(), but for unsigned long long type. */\nint sdsull2str(char *s, unsigned long long v) {\n    char *p, aux;\n    size_t l;\n\n    /* Generate the string representation, this method produces\n     * a reversed string. */\n    p = s;\n    do {\n        *p++ = '0'+(v%10);\n        v /= 10;\n    } while(v);\n\n    /* Compute length and add null term. */\n    l = p-s;\n    *p = '\\0';\n\n    /* Reverse the string. */\n    p--;\n    while(s < p) {\n        aux = *s;\n        *s = *p;\n        *p = aux;\n        s++;\n        p--;\n    }\n    return l;\n}\n\n/* Create an sds string from a long long value. It is much faster than:\n *\n * sdscatprintf(sdsempty(),\"%lld\\n\", value);\n */\nsds sdsfromlonglong(long long value) {\n    char buf[SDS_LLSTR_SIZE + 10];\n    int len = sdsll2str(buf,value);\n\n    return sdsnewlen(buf,len);\n}\n\n/* Like sdscatprintf() but gets va_list instead of being variadic. */\nsds sdscatvprintf(sds s, const char *fmt, va_list ap) {\n    va_list cpy;\n    char staticbuf[1024], *buf = staticbuf, *t;\n    size_t buflen = strlen(fmt)*2;\n    int bufstrlen;\n\n    /* We try to start using a static buffer for speed.\n     * If not possible we revert to heap allocation. */\n    if (buflen > sizeof(staticbuf)) {\n        buf = s_malloc(buflen);\n        if (buf == NULL) return NULL;\n    } else {\n        buflen = sizeof(staticbuf);\n    }\n\n    /* Alloc enough space for buffer and \\0 after failing to\n     * fit the string in the current buffer size. */\n    while(1) {\n        va_copy(cpy,ap);\n        bufstrlen = vsnprintf(buf, buflen, fmt, cpy);\n        va_end(cpy);\n        if (bufstrlen < 0) {\n            if (buf != staticbuf) s_free(buf);\n            return NULL;\n        }\n        if (((size_t)bufstrlen) >= buflen) {\n            if (buf != staticbuf) s_free(buf);\n            buflen = ((size_t)bufstrlen) + 1;\n            buf = s_malloc(buflen);\n            if (buf == NULL) return NULL;\n            continue;\n        }\n        break;\n    }\n\n    /* Finally concat the obtained string to the SDS string and return it. */\n    t = sdscatlen(s, buf, bufstrlen);\n    if (buf != staticbuf) s_free(buf);\n    return t;\n}\n\n/* Append to the sds string 's' a string obtained using printf-alike format\n * specifier.\n *\n * After the call, the modified sds string is no longer valid and all the\n * references must be substituted with the new pointer returned by the call.\n *\n * Example:\n *\n * s = sdsnew(\"Sum is: \");\n * s = sdscatprintf(s,\"%d+%d = %d\",a,b,a+b).\n *\n * Often you need to create a string from scratch with the printf-alike\n * format. When this is the need, just use sdsempty() as the target string:\n *\n * s = sdscatprintf(sdsempty(), \"... your format ...\", args);\n */\nsds sdscatprintf(sds s, const char *fmt, ...) {\n    va_list ap;\n    char *t;\n    va_start(ap, fmt);\n    t = sdscatvprintf(s,fmt,ap);\n    va_end(ap);\n    return t;\n}\n\n/* This function is similar to sdscatprintf, but much faster as it does\n * not rely on sprintf() family functions implemented by the libc that\n * are often very slow. Moreover directly handling the sds string as\n * new data is concatenated provides a performance improvement.\n *\n * However this function only handles an incompatible subset of printf-alike\n * format specifiers:\n *\n * %s - C String\n * %S - SDS string\n * %i - signed int\n * %I - 64 bit signed integer (long long, int64_t)\n * %u - unsigned int\n * %U - 64 bit unsigned integer (unsigned long long, uint64_t)\n * %% - Verbatim \"%\" character.\n */\nsds sdscatfmt(sds s, char const *fmt, ...) {\n    size_t initlen = sdslen(s);\n    const char *f = fmt;\n    long i;\n    va_list ap;\n\n    /* To avoid continuous reallocations, let's start with a buffer that\n     * can hold at least two times the format string itself. It's not the\n     * best heuristic but seems to work in practice. */\n    s = sdsMakeRoomFor(s, strlen(fmt)*2);\n    va_start(ap,fmt);\n    f = fmt;    /* Next format specifier byte to process. */\n    i = initlen; /* Position of the next byte to write to dest str. */\n    while(*f) {\n        char next, *str;\n        size_t l;\n        long long num;\n        unsigned long long unum;\n\n        /* Make sure there is always space for at least 1 char. */\n        if (sdsavail(s)==0) {\n            s = sdsMakeRoomFor(s,1);\n        }\n\n        switch(*f) {\n        case '%':\n            next = *(f+1);\n            if (next == '\\0') break;\n            f++;\n            switch(next) {\n            case 's':\n            case 'S':\n                str = va_arg(ap,char*);\n                l = (next == 's') ? strlen(str) : sdslen(str);\n                if (sdsavail(s) < l) {\n                    s = sdsMakeRoomFor(s,l);\n                }\n                memcpy(s+i,str,l);\n                sdsinclen(s,l);\n                i += l;\n                break;\n            case 'i':\n            case 'I':\n                if (next == 'i')\n                    num = va_arg(ap,int);\n                else\n                    num = va_arg(ap,long long);\n                {\n                    char buf[SDS_LLSTR_SIZE];\n                    l = sdsll2str(buf,num);\n                    if (sdsavail(s) < l) {\n                        s = sdsMakeRoomFor(s,l);\n                    }\n                    memcpy(s+i,buf,l);\n                    sdsinclen(s,l);\n                    i += l;\n                }\n                break;\n            case 'u':\n            case 'U':\n                if (next == 'u')\n                    unum = va_arg(ap,unsigned int);\n                else\n                    unum = va_arg(ap,unsigned long long);\n                {\n                    char buf[SDS_LLSTR_SIZE];\n                    l = sdsull2str(buf,unum);\n                    if (sdsavail(s) < l) {\n                        s = sdsMakeRoomFor(s,l);\n                    }\n                    memcpy(s+i,buf,l);\n                    sdsinclen(s,l);\n                    i += l;\n                }\n                break;\n            default: /* Handle %% and generally %<unknown>. */\n                s[i++] = next;\n                sdsinclen(s,1);\n                break;\n            }\n            break;\n        default:\n            s[i++] = *f;\n            sdsinclen(s,1);\n            break;\n        }\n        f++;\n    }\n    va_end(ap);\n\n    /* Add null-term */\n    s[i] = '\\0';\n    return s;\n}\n\n/* Remove the part of the string from left and from right composed just of\n * contiguous characters found in 'cset', that is a null terminated C string.\n *\n * After the call, the modified sds string is no longer valid and all the\n * references must be substituted with the new pointer returned by the call.\n *\n * Example:\n *\n * s = sdsnew(\"AA...AA.a.aa.aHelloWorld     :::\");\n * s = sdstrim(s,\"Aa. :\");\n * printf(\"%s\\n\", s);\n *\n * Output will be just \"HelloWorld\".\n */\nsds sdstrim(sds s, const char *cset) {\n    char *end, *sp, *ep;\n    size_t len;\n\n    sp = s;\n    ep = end = s+sdslen(s)-1;\n    while(sp <= end && strchr(cset, *sp)) sp++;\n    while(ep > sp && strchr(cset, *ep)) ep--;\n    len = (ep-sp)+1;\n    if (s != sp) memmove(s, sp, len);\n    s[len] = '\\0';\n    sdssetlen(s,len);\n    return s;\n}\n\n/* Changes the input string to be a subset of the original.\n * It does not release the free space in the string, so a call to\n * sdsRemoveFreeSpace may be wise after. */\nvoid sdssubstr(sds s, size_t start, size_t len) {\n    /* Clamp out of range input */\n    size_t oldlen = sdslen(s);\n    if (start >= oldlen) start = len = 0;\n    if (len > oldlen-start) len = oldlen-start;\n\n    /* Move the data */\n    if (len) memmove(s, s+start, len);\n    s[len] = 0;\n    sdssetlen(s,len);\n}\n\n/* Turn the string into a smaller (or equal) string containing only the\n * substring specified by the 'start' and 'end' indexes.\n *\n * start and end can be negative, where -1 means the last character of the\n * string, -2 the penultimate character, and so forth.\n *\n * The interval is inclusive, so the start and end characters will be part\n * of the resulting string.\n *\n * The string is modified in-place.\n *\n * Return value:\n * -1 (error) if sdslen(s) is larger than maximum positive ssize_t value.\n *  0 on success.\n *\n * Example:\n *\n * s = sdsnew(\"Hello World\");\n * sdsrange(s,1,-1); => \"ello World\"\n */\nint sdsrange(sds s, ssize_t start, ssize_t end) {\n    size_t newlen, len = sdslen(s);\n    if (len > SSIZE_MAX) return -1;\n\n    if (len == 0) return 0;\n    if (start < 0) {\n        start = len+start;\n        if (start < 0) start = 0;\n    }\n    if (end < 0) {\n        end = len+end;\n        if (end < 0) end = 0;\n    }\n    newlen = (start > end) ? 0 : (end-start)+1;\n    if (newlen != 0) {\n        if (start >= (ssize_t)len) {\n            newlen = 0;\n        } else if (end >= (ssize_t)len) {\n            end = len-1;\n            newlen = (start > end) ? 0 : (end-start)+1;\n        }\n    } else {\n        start = 0;\n    }\n    if (start && newlen) memmove(s, s+start, newlen);\n    s[newlen] = 0;\n    sdssetlen(s,newlen);\n    return 0;\n}\n\n/* Apply tolower() to every character of the sds string 's'. */\nvoid sdstolower(sds s) {\n    size_t len = sdslen(s), j;\n\n    for (j = 0; j < len; j++) s[j] = tolower(s[j]);\n}\n\n/* Apply toupper() to every character of the sds string 's'. */\nvoid sdstoupper(sds s) {\n    size_t len = sdslen(s), j;\n\n    for (j = 0; j < len; j++) s[j] = toupper(s[j]);\n}\n\n/* Compare two sds strings s1 and s2 with memcmp().\n *\n * Return value:\n *\n *     positive if s1 > s2.\n *     negative if s1 < s2.\n *     0 if s1 and s2 are exactly the same binary string.\n *\n * If two strings share exactly the same prefix, but one of the two has\n * additional characters, the longer string is considered to be greater than\n * the smaller one. */\nint sdscmp(const sds s1, const sds s2) {\n    size_t l1, l2, minlen;\n    int cmp;\n\n    l1 = sdslen(s1);\n    l2 = sdslen(s2);\n    minlen = (l1 < l2) ? l1 : l2;\n    cmp = memcmp(s1,s2,minlen);\n    if (cmp == 0) return l1>l2? 1: (l1<l2? -1: 0);\n    return cmp;\n}\n\n/* Split 's' with separator in 'sep'. An array\n * of sds strings is returned. *count will be set\n * by reference to the number of tokens returned.\n *\n * On out of memory, zero length string, zero length\n * separator, NULL is returned.\n *\n * Note that 'sep' is able to split a string using\n * a multi-character separator. For example\n * sdssplit(\"foo_-_bar\",\"_-_\"); will return two\n * elements \"foo\" and \"bar\".\n *\n * This version of the function is binary-safe but\n * requires length arguments. sdssplit() is just the\n * same function but for zero-terminated strings.\n */\nsds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count) {\n    int elements = 0, slots = 5;\n    long start = 0, j;\n    sds *tokens;\n\n    if (seplen < 1 || len <= 0) {\n        *count = 0;\n        return NULL;\n    }\n    tokens = s_malloc(sizeof(sds)*slots);\n    if (tokens == NULL) return NULL;\n\n    for (j = 0; j < (len-(seplen-1)); j++) {\n        /* make sure there is room for the next element and the final one */\n        if (slots < elements+2) {\n            sds *newtokens;\n\n            slots *= 2;\n            newtokens = s_realloc(tokens,sizeof(sds)*slots);\n            if (newtokens == NULL) goto cleanup;\n            tokens = newtokens;\n        }\n        /* search the separator */\n        if ((seplen == 1 && *(s+j) == sep[0]) || (memcmp(s+j,sep,seplen) == 0)) {\n            tokens[elements] = sdsnewlen(s+start,j-start);\n            if (tokens[elements] == NULL) goto cleanup;\n            elements++;\n            start = j+seplen;\n            j = j+seplen-1; /* skip the separator */\n        }\n    }\n    /* Add the final element. We are sure there is room in the tokens array. */\n    tokens[elements] = sdsnewlen(s+start,len-start);\n    if (tokens[elements] == NULL) goto cleanup;\n    elements++;\n    *count = elements;\n    return tokens;\n\ncleanup:\n    {\n        int i;\n        for (i = 0; i < elements; i++) sdsfree(tokens[i]);\n        s_free(tokens);\n        *count = 0;\n        return NULL;\n    }\n}\n\n/* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */\nvoid sdsfreesplitres(sds *tokens, int count) {\n    if (!tokens) return;\n    while(count--)\n        sdsfree(tokens[count]);\n    s_free(tokens);\n}\n\n/* Append to the sds string \"s\" an escaped string representation where\n * all the non-printable characters (tested with isprint()) are turned into\n * escapes in the form \"\\n\\r\\a....\" or \"\\x<hex-number>\".\n *\n * After the call, the modified sds string is no longer valid and all the\n * references must be substituted with the new pointer returned by the call. */\nsds sdscatrepr(sds s, const char *p, size_t len) {\n    s = sdscatlen(s,\"\\\"\",1);\n    while(len--) {\n        switch(*p) {\n        case '\\\\':\n        case '\"':\n            s = sdscatprintf(s,\"\\\\%c\",*p);\n            break;\n        case '\\n': s = sdscatlen(s,\"\\\\n\",2); break;\n        case '\\r': s = sdscatlen(s,\"\\\\r\",2); break;\n        case '\\t': s = sdscatlen(s,\"\\\\t\",2); break;\n        case '\\a': s = sdscatlen(s,\"\\\\a\",2); break;\n        case '\\b': s = sdscatlen(s,\"\\\\b\",2); break;\n        default:\n            if (isprint(*p))\n                s = sdscatprintf(s,\"%c\",*p);\n            else\n                s = sdscatprintf(s,\"\\\\x%02x\",(unsigned char)*p);\n            break;\n        }\n        p++;\n    }\n    return sdscatlen(s,\"\\\"\",1);\n}\n\n/* Helper function for sdssplitargs() that returns non zero if 'c'\n * is a valid hex digit. */\nint is_hex_digit(char c) {\n    return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||\n           (c >= 'A' && c <= 'F');\n}\n\n/* Helper function for sdssplitargs() that converts a hex digit into an\n * integer from 0 to 15 */\nint hex_digit_to_int(char c) {\n    switch(c) {\n    case '0': return 0;\n    case '1': return 1;\n    case '2': return 2;\n    case '3': return 3;\n    case '4': return 4;\n    case '5': return 5;\n    case '6': return 6;\n    case '7': return 7;\n    case '8': return 8;\n    case '9': return 9;\n    case 'a': case 'A': return 10;\n    case 'b': case 'B': return 11;\n    case 'c': case 'C': return 12;\n    case 'd': case 'D': return 13;\n    case 'e': case 'E': return 14;\n    case 'f': case 'F': return 15;\n    default: return 0;\n    }\n}\n\n/* Split a line into arguments, where every argument can be in the\n * following programming-language REPL-alike form:\n *\n * foo bar \"newline are supported\\n\" and \"\\xff\\x00otherstuff\"\n *\n * The number of arguments is stored into *argc, and an array\n * of sds is returned.\n *\n * The caller should free the resulting array of sds strings with\n * sdsfreesplitres().\n *\n * Note that sdscatrepr() is able to convert back a string into\n * a quoted string in the same format sdssplitargs() is able to parse.\n *\n * The function returns the allocated tokens on success, even when the\n * input string is empty, or NULL if the input contains unbalanced\n * quotes or closed quotes followed by non space characters\n * as in: \"foo\"bar or \"foo'\n */\nsds *sdssplitargs(const char *line, int *argc) {\n    const char *p = line;\n    char *current = NULL;\n    char **vector = NULL;\n\n    *argc = 0;\n    while(1) {\n        /* skip blanks */\n        while(*p && isspace(*p)) p++;\n        if (*p) {\n            /* get a token */\n            int inq=0;  /* set to 1 if we are in \"quotes\" */\n            int insq=0; /* set to 1 if we are in 'single quotes' */\n            int done=0;\n\n            if (current == NULL) current = sdsempty();\n            while(!done) {\n                if (inq) {\n                    if (*p == '\\\\' && *(p+1) == 'x' &&\n                                             is_hex_digit(*(p+2)) &&\n                                             is_hex_digit(*(p+3)))\n                    {\n                        unsigned char byte;\n\n                        byte = (hex_digit_to_int(*(p+2))*16)+\n                                hex_digit_to_int(*(p+3));\n                        current = sdscatlen(current,(char*)&byte,1);\n                        p += 3;\n                    } else if (*p == '\\\\' && *(p+1)) {\n                        char c;\n\n                        p++;\n                        switch(*p) {\n                        case 'n': c = '\\n'; break;\n                        case 'r': c = '\\r'; break;\n                        case 't': c = '\\t'; break;\n                        case 'b': c = '\\b'; break;\n                        case 'a': c = '\\a'; break;\n                        default: c = *p; break;\n                        }\n                        current = sdscatlen(current,&c,1);\n                    } else if (*p == '\"') {\n                        /* closing quote must be followed by a space or\n                         * nothing at all. */\n                        if (*(p+1) && !isspace(*(p+1))) goto err;\n                        done=1;\n                    } else if (!*p) {\n                        /* unterminated quotes */\n                        goto err;\n                    } else {\n                        current = sdscatlen(current,p,1);\n                    }\n                } else if (insq) {\n                    if (*p == '\\\\' && *(p+1) == '\\'') {\n                        p++;\n                        current = sdscatlen(current,\"'\",1);\n                    } else if (*p == '\\'') {\n                        /* closing quote must be followed by a space or\n                         * nothing at all. */\n                        if (*(p+1) && !isspace(*(p+1))) goto err;\n                        done=1;\n                    } else if (!*p) {\n                        /* unterminated quotes */\n                        goto err;\n                    } else {\n                        current = sdscatlen(current,p,1);\n                    }\n                } else {\n                    switch(*p) {\n                    case ' ':\n                    case '\\n':\n                    case '\\r':\n                    case '\\t':\n                    case '\\0':\n                        done=1;\n                        break;\n                    case '\"':\n                        inq=1;\n                        break;\n                    case '\\'':\n                        insq=1;\n                        break;\n                    default:\n                        current = sdscatlen(current,p,1);\n                        break;\n                    }\n                }\n                if (*p) p++;\n            }\n            /* add the token to the vector */\n            vector = s_realloc(vector,((*argc)+1)*sizeof(char*));\n            vector[*argc] = current;\n            (*argc)++;\n            current = NULL;\n        } else {\n            /* Even on empty input string return something not NULL. */\n            if (vector == NULL) vector = s_malloc(sizeof(void*));\n            return vector;\n        }\n    }\n\nerr:\n    while((*argc)--)\n        sdsfree(vector[*argc]);\n    s_free(vector);\n    if (current) sdsfree(current);\n    *argc = 0;\n    return NULL;\n}\n\n/* Modify the string substituting all the occurrences of the set of\n * characters specified in the 'from' string to the corresponding character\n * in the 'to' array.\n *\n * For instance: sdsmapchars(mystring, \"ho\", \"01\", 2)\n * will have the effect of turning the string \"hello\" into \"0ell1\".\n *\n * The function returns the sds string pointer, that is always the same\n * as the input pointer since no resize is needed. */\nsds sdsmapchars(sds s, const char *from, const char *to, size_t setlen) {\n    size_t j, i, l = sdslen(s);\n\n    for (j = 0; j < l; j++) {\n        for (i = 0; i < setlen; i++) {\n            if (s[j] == from[i]) {\n                s[j] = to[i];\n                break;\n            }\n        }\n    }\n    return s;\n}\n\n/* Join an array of C strings using the specified separator (also a C string).\n * Returns the result as an sds string. */\nsds sdsjoin(char **argv, int argc, char *sep) {\n    sds join = sdsempty();\n    int j;\n\n    for (j = 0; j < argc; j++) {\n        join = sdscat(join, argv[j]);\n        if (j != argc-1) join = sdscat(join,sep);\n    }\n    return join;\n}\n\n/* Like sdsjoin, but joins an array of SDS strings. */\nsds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen) {\n    sds join = sdsempty();\n    int j;\n\n    for (j = 0; j < argc; j++) {\n        join = sdscatsds(join, argv[j]);\n        if (j != argc-1) join = sdscatlen(join,sep,seplen);\n    }\n    return join;\n}\n\n/* Wrappers to the allocators used by SDS. Note that SDS will actually\n * just use the macros defined into sdsalloc.h in order to avoid to pay\n * the overhead of function calls. Here we define these wrappers only for\n * the programs SDS is linked to, if they want to touch the SDS internals\n * even if they use a different allocator. */\nvoid *sds_malloc(size_t size) { return s_malloc(size); }\nvoid *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }\nvoid sds_free(void *ptr) { s_free(ptr); }\n\n/* Perform expansion of a template string and return the result as a newly\n * allocated sds.\n *\n * Template variables are specified using curly brackets, e.g. {variable}.\n * An opening bracket can be quoted by repeating it twice.\n */\nsds sdstemplate(const char *template, sdstemplate_callback_t cb_func, void *cb_arg)\n{\n    sds res = sdsempty();\n    const char *p = template;\n\n    while (*p) {\n        /* Find next variable, copy everything until there */\n        const char *sv = strchr(p, '{');\n        if (!sv) {\n            /* Not found: copy till rest of template and stop */\n            res = sdscat(res, p);\n            break;\n        } else if (sv > p) {\n            /* Found: copy anything up to the beginning of the variable */\n            res = sdscatlen(res, p, sv - p);\n        }\n\n        /* Skip into variable name, handle premature end or quoting */\n        sv++;\n        if (!*sv) goto error;       /* Premature end of template */\n        if (*sv == '{') {\n            /* Quoted '{' */\n            p = sv + 1;\n            res = sdscat(res, \"{\");\n            continue;\n        }\n\n        /* Find end of variable name, handle premature end of template */\n        const char *ev = strchr(sv, '}');\n        if (!ev) goto error;\n\n        /* Pass variable name to callback and obtain value. If callback failed,\n         * abort. */\n        sds varname = sdsnewlen(sv, ev - sv);\n        sds value = cb_func(varname, cb_arg);\n        sdsfree(varname);\n        if (!value) goto error;\n\n        /* Append value to result and continue */\n        res = sdscat(res, value);\n        sdsfree(value);\n        p = ev + 1;\n    }\n\n    return res;\n\nerror:\n    sdsfree(res);\n    return NULL;\n}\n\n#ifdef REDIS_TEST\n#include <stdio.h>\n#include <limits.h>\n#include \"testhelp.h\"\n\n#define UNUSED(x) (void)(x)\n\nstatic sds sdsTestTemplateCallback(sds varname, void *arg) {\n    UNUSED(arg);\n    static const char *_var1 = \"variable1\";\n    static const char *_var2 = \"variable2\";\n\n    if (!strcmp(varname, _var1)) return sdsnew(\"value1\");\n    else if (!strcmp(varname, _var2)) return sdsnew(\"value2\");\n    else return NULL;\n}\n\nint sdsTest(int argc, char **argv, int flags) {\n    UNUSED(argc);\n    UNUSED(argv);\n    UNUSED(flags);\n\n    {\n        sds x = sdsnew(\"foo\"), y;\n\n        test_cond(\"Create a string and obtain the length\",\n            sdslen(x) == 3 && memcmp(x,\"foo\\0\",4) == 0);\n\n        sdsfree(x);\n        x = sdsnewlen(\"foo\",2);\n        test_cond(\"Create a string with specified length\",\n            sdslen(x) == 2 && memcmp(x,\"fo\\0\",3) == 0);\n\n        x = sdscat(x,\"bar\");\n        test_cond(\"Strings concatenation\",\n            sdslen(x) == 5 && memcmp(x,\"fobar\\0\",6) == 0);\n\n        x = sdscpy(x,\"a\");\n        test_cond(\"sdscpy() against an originally longer string\",\n            sdslen(x) == 1 && memcmp(x,\"a\\0\",2) == 0);\n\n        x = sdscpy(x,\"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\");\n        test_cond(\"sdscpy() against an originally shorter string\",\n            sdslen(x) == 33 &&\n            memcmp(x,\"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\\0\",33) == 0);\n\n        sdsfree(x);\n        x = sdscatprintf(sdsempty(),\"%d\",123);\n        test_cond(\"sdscatprintf() seems working in the base case\",\n            sdslen(x) == 3 && memcmp(x,\"123\\0\",4) == 0);\n\n        sdsfree(x);\n        x = sdscatprintf(sdsempty(),\"a%cb\",0);\n        test_cond(\"sdscatprintf() seems working with \\\\0 inside of result\",\n            sdslen(x) == 3 && memcmp(x,\"a\\0\"\"b\\0\",4) == 0);\n\n        {\n            sdsfree(x);\n            char etalon[1024*1024];\n            for (size_t i = 0; i < sizeof(etalon); i++) {\n                etalon[i] = '0';\n            }\n            x = sdscatprintf(sdsempty(),\"%0*d\",(int)sizeof(etalon),0);\n            test_cond(\"sdscatprintf() can print 1MB\",\n                sdslen(x) == sizeof(etalon) && memcmp(x,etalon,sizeof(etalon)) == 0);\n        }\n\n        sdsfree(x);\n        x = sdsnew(\"--\");\n        x = sdscatfmt(x, \"Hello %s World %I,%I--\", \"Hi!\", LLONG_MIN,LLONG_MAX);\n        test_cond(\"sdscatfmt() seems working in the base case\",\n            sdslen(x) == 60 &&\n            memcmp(x,\"--Hello Hi! World -9223372036854775808,\"\n                     \"9223372036854775807--\",60) == 0);\n        printf(\"[%s]\\n\",x);\n\n        sdsfree(x);\n        x = sdsnew(\"--\");\n        x = sdscatfmt(x, \"%u,%U--\", UINT_MAX, ULLONG_MAX);\n        test_cond(\"sdscatfmt() seems working with unsigned numbers\",\n            sdslen(x) == 35 &&\n            memcmp(x,\"--4294967295,18446744073709551615--\",35) == 0);\n\n        sdsfree(x);\n        x = sdsnew(\" x \");\n        sdstrim(x,\" x\");\n        test_cond(\"sdstrim() works when all chars match\",\n            sdslen(x) == 0);\n\n        sdsfree(x);\n        x = sdsnew(\" x \");\n        sdstrim(x,\" \");\n        test_cond(\"sdstrim() works when a single char remains\",\n            sdslen(x) == 1 && x[0] == 'x');\n\n        sdsfree(x);\n        x = sdsnew(\"xxciaoyyy\");\n        sdstrim(x,\"xy\");\n        test_cond(\"sdstrim() correctly trims characters\",\n            sdslen(x) == 4 && memcmp(x,\"ciao\\0\",5) == 0);\n\n        y = sdsdup(x);\n        sdsrange(y,1,1);\n        test_cond(\"sdsrange(...,1,1)\",\n            sdslen(y) == 1 && memcmp(y,\"i\\0\",2) == 0);\n\n        sdsfree(y);\n        y = sdsdup(x);\n        sdsrange(y,1,-1);\n        test_cond(\"sdsrange(...,1,-1)\",\n            sdslen(y) == 3 && memcmp(y,\"iao\\0\",4) == 0);\n\n        sdsfree(y);\n        y = sdsdup(x);\n        sdsrange(y,-2,-1);\n        test_cond(\"sdsrange(...,-2,-1)\",\n            sdslen(y) == 2 && memcmp(y,\"ao\\0\",3) == 0);\n\n        sdsfree(y);\n        y = sdsdup(x);\n        sdsrange(y,2,1);\n        test_cond(\"sdsrange(...,2,1)\",\n            sdslen(y) == 0 && memcmp(y,\"\\0\",1) == 0);\n\n        sdsfree(y);\n        y = sdsdup(x);\n        sdsrange(y,1,100);\n        test_cond(\"sdsrange(...,1,100)\",\n            sdslen(y) == 3 && memcmp(y,\"iao\\0\",4) == 0);\n\n        sdsfree(y);\n        y = sdsdup(x);\n        sdsrange(y,100,100);\n        test_cond(\"sdsrange(...,100,100)\",\n            sdslen(y) == 0 && memcmp(y,\"\\0\",1) == 0);\n\n        sdsfree(y);\n        y = sdsdup(x);\n        sdsrange(y,4,6);\n        test_cond(\"sdsrange(...,4,6)\",\n            sdslen(y) == 0 && memcmp(y,\"\\0\",1) == 0);\n\n        sdsfree(y);\n        y = sdsdup(x);\n        sdsrange(y,3,6);\n        test_cond(\"sdsrange(...,3,6)\",\n            sdslen(y) == 1 && memcmp(y,\"o\\0\",2) == 0);\n\n        sdsfree(y);\n        sdsfree(x);\n        x = sdsnew(\"foo\");\n        y = sdsnew(\"foa\");\n        test_cond(\"sdscmp(foo,foa)\", sdscmp(x,y) > 0);\n\n        sdsfree(y);\n        sdsfree(x);\n        x = sdsnew(\"bar\");\n        y = sdsnew(\"bar\");\n        test_cond(\"sdscmp(bar,bar)\", sdscmp(x,y) == 0);\n\n        sdsfree(y);\n        sdsfree(x);\n        x = sdsnew(\"aar\");\n        y = sdsnew(\"bar\");\n        test_cond(\"sdscmp(bar,bar)\", sdscmp(x,y) < 0);\n\n        sdsfree(y);\n        sdsfree(x);\n        x = sdsnewlen(\"\\a\\n\\0foo\\r\",7);\n        y = sdscatrepr(sdsempty(),x,sdslen(x));\n        test_cond(\"sdscatrepr(...data...)\",\n            memcmp(y,\"\\\"\\\\a\\\\n\\\\x00foo\\\\r\\\"\",15) == 0);\n\n        {\n            unsigned int oldfree;\n            char *p;\n            int i;\n            size_t step = 10, j;\n\n            sdsfree(x);\n            sdsfree(y);\n            x = sdsnew(\"0\");\n            test_cond(\"sdsnew() free/len buffers\", sdslen(x) == 1 && sdsavail(x) == 0);\n\n            /* Run the test a few times in order to hit the first two\n             * SDS header types. */\n            for (i = 0; i < 10; i++) {\n                size_t oldlen = sdslen(x);\n                x = sdsMakeRoomFor(x,step);\n                int type = x[-1]&SDS_TYPE_MASK;\n\n                test_cond(\"sdsMakeRoomFor() len\", sdslen(x) == oldlen);\n                if (type != SDS_TYPE_5) {\n                    test_cond(\"sdsMakeRoomFor() free\", sdsavail(x) >= step);\n                    oldfree = sdsavail(x);\n                    UNUSED(oldfree);\n                }\n                p = x+oldlen;\n                for (j = 0; j < step; j++) {\n                    p[j] = 'A'+j;\n                }\n                sdsIncrLen(x,step);\n            }\n            test_cond(\"sdsMakeRoomFor() content\",\n                memcmp(\"0ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ\",x,101) == 0);\n            test_cond(\"sdsMakeRoomFor() final length\",sdslen(x)==101);\n\n            sdsfree(x);\n        }\n\n        /* Simple template */\n        x = sdstemplate(\"v1={variable1} v2={variable2}\", sdsTestTemplateCallback, NULL);\n        test_cond(\"sdstemplate() normal flow\",\n                  memcmp(x,\"v1=value1 v2=value2\",19) == 0);\n        sdsfree(x);\n\n        /* Template with callback error */\n        x = sdstemplate(\"v1={variable1} v3={doesnotexist}\", sdsTestTemplateCallback, NULL);\n        test_cond(\"sdstemplate() with callback error\", x == NULL);\n\n        /* Template with empty var name */\n        x = sdstemplate(\"v1={\", sdsTestTemplateCallback, NULL);\n        test_cond(\"sdstemplate() with empty var name\", x == NULL);\n\n        /* Template with truncated var name */\n        x = sdstemplate(\"v1={start\", sdsTestTemplateCallback, NULL);\n        test_cond(\"sdstemplate() with truncated var name\", x == NULL);\n\n        /* Template with quoting */\n        x = sdstemplate(\"v1={{{variable1}} {{} v2={variable2}\", sdsTestTemplateCallback, NULL);\n        test_cond(\"sdstemplate() with quoting\",\n                  memcmp(x,\"v1={value1} {} v2=value2\",24) == 0);\n        sdsfree(x);\n\n        /* Test sdsresize - extend */\n        x = sdsnew(\"1234567890123456789012345678901234567890\");\n        x = sdsResize(x, 200);\n        test_cond(\"sdsrezie() expand len\", sdslen(x) == 40);\n        test_cond(\"sdsrezie() expand strlen\", strlen(x) == 40);\n        test_cond(\"sdsrezie() expand alloc\", sdsalloc(x) == 200);\n        /* Test sdsresize - trim free space */\n        x = sdsResize(x, 80);\n        test_cond(\"sdsrezie() shrink len\", sdslen(x) == 40);\n        test_cond(\"sdsrezie() shrink strlen\", strlen(x) == 40);\n        test_cond(\"sdsrezie() shrink alloc\", sdsalloc(x) == 80);\n        /* Test sdsresize - crop used space */\n        x = sdsResize(x, 30);\n        test_cond(\"sdsrezie() crop len\", sdslen(x) == 30);\n        test_cond(\"sdsrezie() crop strlen\", strlen(x) == 30);\n        test_cond(\"sdsrezie() crop alloc\", sdsalloc(x) == 30);\n        /* Test sdsresize - extend to different class */\n        x = sdsResize(x, 400);\n        test_cond(\"sdsrezie() expand len\", sdslen(x) == 30);\n        test_cond(\"sdsrezie() expand strlen\", strlen(x) == 30);\n        test_cond(\"sdsrezie() expand alloc\", sdsalloc(x) == 400);\n        /* Test sdsresize - shrink to different class */\n        x = sdsResize(x, 4);\n        test_cond(\"sdsrezie() crop len\", sdslen(x) == 4);\n        test_cond(\"sdsrezie() crop strlen\", strlen(x) == 4);\n        test_cond(\"sdsrezie() crop alloc\", sdsalloc(x) == 4);\n        sdsfree(x);\n    }\n    return 0;\n}\n#endif\n"
  },
  {
    "path": "src/redis/sds.h",
    "content": "/* SDSLib 2.0 -- A C dynamic strings library\n *\n * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>\n * Copyright (c) 2015, Oran Agra\n * Copyright (c) 2015, Redis Labs, Inc\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef __SDS_H\n#define __SDS_H\n\n#define SDS_MAX_PREALLOC (1024*1024)\nextern const char *SDS_NOINIT;\n\n#include <sys/types.h>\n#include <stdarg.h>\n#include <stdint.h>\n\ntypedef char *sds;\n\n/* Note: sdshdr5 is never used, we just access the flags byte directly.\n * However is here to document the layout of type 5 SDS strings. */\nstruct __attribute__ ((__packed__)) sdshdr5 {\n    unsigned char flags; /* 3 lsb of type, and 5 msb of string length */\n    char buf[];\n};\nstruct __attribute__ ((__packed__)) sdshdr8 {\n    uint8_t len; /* used */\n    uint8_t alloc; /* excluding the header and null terminator */\n    unsigned char flags; /* 3 lsb of type, 5 unused bits */\n    char buf[];\n};\nstruct __attribute__ ((__packed__)) sdshdr16 {\n    uint16_t len; /* used */\n    uint16_t alloc; /* excluding the header and null terminator */\n    unsigned char flags; /* 3 lsb of type, 5 unused bits */\n    char buf[];\n};\nstruct __attribute__ ((__packed__)) sdshdr32 {\n    uint32_t len; /* used */\n    uint32_t alloc; /* excluding the header and null terminator */\n    unsigned char flags; /* 3 lsb of type, 5 unused bits */\n    char buf[];\n};\nstruct __attribute__ ((__packed__)) sdshdr64 {\n    uint64_t len; /* used */\n    uint64_t alloc; /* excluding the header and null terminator */\n    unsigned char flags; /* 3 lsb of type, 5 unused bits */\n    char buf[];\n};\n\n#define SDS_TYPE_5  0\n#define SDS_TYPE_8  1\n#define SDS_TYPE_16 2\n#define SDS_TYPE_32 3\n#define SDS_TYPE_64 4\n#define SDS_TYPE_MASK 7\n#define SDS_TYPE_BITS 3\n#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))\n#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = SDS_HDR(T,s);\n#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)\n\nstatic inline size_t sdslen(const sds s) {\n    unsigned char flags = s[-1];\n    switch(flags&SDS_TYPE_MASK) {\n        case SDS_TYPE_5:\n            return SDS_TYPE_5_LEN(flags);\n        case SDS_TYPE_8:\n            return SDS_HDR(8,s)->len;\n        case SDS_TYPE_16:\n            return SDS_HDR(16,s)->len;\n        case SDS_TYPE_32:\n            return SDS_HDR(32,s)->len;\n        case SDS_TYPE_64:\n            return SDS_HDR(64,s)->len;\n    }\n    return 0;\n}\n\nstatic inline size_t sdsavail(const sds s) {\n    unsigned char flags = s[-1];\n    switch(flags&SDS_TYPE_MASK) {\n        case SDS_TYPE_5: {\n            return 0;\n        }\n        case SDS_TYPE_8: {\n            SDS_HDR_VAR(8,s);\n            return sh->alloc - sh->len;\n        }\n        case SDS_TYPE_16: {\n            SDS_HDR_VAR(16,s);\n            return sh->alloc - sh->len;\n        }\n        case SDS_TYPE_32: {\n            SDS_HDR_VAR(32,s);\n            return sh->alloc - sh->len;\n        }\n        case SDS_TYPE_64: {\n            SDS_HDR_VAR(64,s);\n            return sh->alloc - sh->len;\n        }\n    }\n    return 0;\n}\n\nstatic inline void sdssetlen(sds s, size_t newlen) {\n    unsigned char flags = s[-1];\n    switch(flags&SDS_TYPE_MASK) {\n        case SDS_TYPE_5:\n            {\n                unsigned char *fp = ((unsigned char*)s)-1;\n                *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);\n            }\n            break;\n        case SDS_TYPE_8:\n            SDS_HDR(8,s)->len = newlen;\n            break;\n        case SDS_TYPE_16:\n            SDS_HDR(16,s)->len = newlen;\n            break;\n        case SDS_TYPE_32:\n            SDS_HDR(32,s)->len = newlen;\n            break;\n        case SDS_TYPE_64:\n            SDS_HDR(64,s)->len = newlen;\n            break;\n    }\n}\n\nstatic inline void sdsinclen(sds s, size_t inc) {\n    unsigned char flags = s[-1];\n    switch(flags&SDS_TYPE_MASK) {\n        case SDS_TYPE_5:\n            {\n                unsigned char *fp = ((unsigned char*)s)-1;\n                unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;\n                *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);\n            }\n            break;\n        case SDS_TYPE_8:\n            SDS_HDR(8,s)->len += inc;\n            break;\n        case SDS_TYPE_16:\n            SDS_HDR(16,s)->len += inc;\n            break;\n        case SDS_TYPE_32:\n            SDS_HDR(32,s)->len += inc;\n            break;\n        case SDS_TYPE_64:\n            SDS_HDR(64,s)->len += inc;\n            break;\n    }\n}\n\n/* sdsalloc() = sdsavail() + sdslen() */\nstatic inline size_t sdsalloc(const sds s) {\n    unsigned char flags = s[-1];\n    switch(flags&SDS_TYPE_MASK) {\n        case SDS_TYPE_5:\n            return SDS_TYPE_5_LEN(flags);\n        case SDS_TYPE_8:\n            return SDS_HDR(8,s)->alloc;\n        case SDS_TYPE_16:\n            return SDS_HDR(16,s)->alloc;\n        case SDS_TYPE_32:\n            return SDS_HDR(32,s)->alloc;\n        case SDS_TYPE_64:\n            return SDS_HDR(64,s)->alloc;\n    }\n    return 0;\n}\n\nstatic inline void sdssetalloc(sds s, size_t newlen) {\n    unsigned char flags = s[-1];\n    switch(flags&SDS_TYPE_MASK) {\n        case SDS_TYPE_5:\n            /* Nothing to do, this type has no total allocation info. */\n            break;\n        case SDS_TYPE_8:\n            SDS_HDR(8,s)->alloc = newlen;\n            break;\n        case SDS_TYPE_16:\n            SDS_HDR(16,s)->alloc = newlen;\n            break;\n        case SDS_TYPE_32:\n            SDS_HDR(32,s)->alloc = newlen;\n            break;\n        case SDS_TYPE_64:\n            SDS_HDR(64,s)->alloc = newlen;\n            break;\n    }\n}\n\nsds sdsnewlen(const void *init, size_t initlen);\nsds sdsnew(const char *init);\nsds sdsempty(void);\nsds sdsdup(const sds s);\nvoid sdsfree(sds s);\nsds sdsgrowzero(sds s, size_t len);\nsds sdscatlen(sds s, const void *t, size_t len);\nsds sdscat(sds s, const char *t);\nsds sdscatsds(sds s, const sds t);\nsds sdscpylen(sds s, const char *t, size_t len);\nsds sdscpy(sds s, const char *t);\n\nsds sdscatvprintf(sds s, const char *fmt, va_list ap);\n#ifdef __GNUC__\nsds sdscatprintf(sds s, const char *fmt, ...)\n    __attribute__((format(printf, 2, 3)));\n#else\nsds sdscatprintf(sds s, const char *fmt, ...);\n#endif\n\nsds sdscatfmt(sds s, char const *fmt, ...);\nsds sdstrim(sds s, const char *cset);\nvoid sdssubstr(sds s, size_t start, size_t len);\nint sdsrange(sds s, ssize_t start, ssize_t end);\nvoid sdsupdatelen(sds s);\nvoid sdsclear(sds s);\nint sdscmp(const sds s1, const sds s2);\nsds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count);\nvoid sdsfreesplitres(sds *tokens, int count);\nvoid sdstolower(sds s);\nvoid sdstoupper(sds s);\nsds sdsfromlonglong(long long value);\nsds sdscatrepr(sds s, const char *p, size_t len);\nsds *sdssplitargs(const char *line, int *argc);\nsds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);\nsds sdsjoin(char **argv, int argc, char *sep);\nsds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);\n\n/* Callback for sdstemplate. The function gets called by sdstemplate\n * every time a variable needs to be expanded. The variable name is\n * provided as variable, and the callback is expected to return a\n * substitution value. Returning a NULL indicates an error.\n */\ntypedef sds (*sdstemplate_callback_t)(const sds variable, void *arg);\nsds sdstemplate(const char *templ, sdstemplate_callback_t cb_func, void *cb_arg);\n\n/* Low level functions exposed to the user API */\nsds sdsMakeRoomFor(sds s, size_t addlen);\nsds sdsMakeRoomForNonGreedy(sds s, size_t addlen);\nvoid sdsIncrLen(sds s, ssize_t incr);\nsds sdsRemoveFreeSpace(sds s);\nsds sdsResize(sds s, size_t size);\nsize_t sdsAllocSize(sds s);\nvoid *sdsAllocPtr(sds s);\n\n/* Export the allocator used by SDS to the program using SDS.\n * Sometimes the program SDS is linked to, may use a different set of\n * allocators, but may want to allocate or free things that SDS will\n * respectively free or allocate. */\nvoid *sds_malloc(size_t size);\nvoid *sds_realloc(void *ptr, size_t size);\nvoid sds_free(void *ptr);\n\n#ifdef REDIS_TEST\nint sdsTest(int argc, char *argv[], int flags);\n#endif\n\n#endif\n"
  },
  {
    "path": "src/redis/sdsalloc.h",
    "content": "/* SDSLib 2.0 -- A C dynamic strings library\n *\n * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>\n * Copyright (c) 2015, Redis Labs, Inc\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n/* SDS allocator selection.\n *\n * This file is used in order to change the SDS allocator at compile time.\n * Just define the following defines to what you want to use. Also add\n * the include of your alternate allocator if needed (not needed in order\n * to use the default libc allocator). */\n\n#ifndef __SDS_ALLOC_H__\n#define __SDS_ALLOC_H__\n\n#include \"zmalloc.h\"\n#define s_malloc zmalloc\n#define s_realloc zrealloc\n#define s_calloc zcalloc\n#define s_trymalloc ztrymalloc\n#define s_tryrealloc ztryrealloc\n#define s_free zfree\n#define s_malloc_usable zmalloc_usable\n#define s_realloc_usable zrealloc_usable\n#define s_trymalloc_usable ztrymalloc_usable\n#define s_tryrealloc_usable ztryrealloc_usable\n#define s_free_usable zfree_usable\n\n#endif\n"
  },
  {
    "path": "src/redis/siphash.c",
    "content": "/*\n   SipHash reference C implementation\n\n   Copyright (c) 2012-2016 Jean-Philippe Aumasson\n   <jeanphilippe.aumasson@gmail.com>\n   Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>\n   Copyright (c) 2017 Salvatore Sanfilippo <antirez@gmail.com>\n\n   To the extent possible under law, the author(s) have dedicated all copyright\n   and related and neighboring rights to this software to the public domain\n   worldwide. This software is distributed without any warranty.\n\n   You should have received a copy of the CC0 Public Domain Dedication along\n   with this software. If not, see\n   <http://creativecommons.org/publicdomain/zero/1.0/>.\n\n   ----------------------------------------------------------------------------\n\n   This version was modified by Salvatore Sanfilippo <antirez@gmail.com>\n   in the following ways:\n\n   1. We use SipHash 1-2. This is not believed to be as strong as the\n      suggested 2-4 variant, but AFAIK there are not trivial attacks\n      against this reduced-rounds version, and it runs at the same speed\n      as Murmurhash2 that we used previously, while the 2-4 variant slowed\n      down Redis by a 4% figure more or less.\n   2. Hard-code rounds in the hope the compiler can optimize it more\n      in this raw from. Anyway we always want the standard 2-4 variant.\n   3. Modify the prototype and implementation so that the function directly\n      returns an uint64_t value, the hash itself, instead of receiving an\n      output buffer. This also means that the output size is set to 8 bytes\n      and the 16 bytes output code handling was removed.\n   4. Provide a case insensitive variant to be used when hashing strings that\n      must be considered identical by the hash table regardless of the case.\n      If we don't have directly a case insensitive hash function, we need to\n      perform a text transformation in some temporary buffer, which is costly.\n   5. Remove debugging code.\n   6. Modified the original test.c file to be a stand-alone function testing\n      the function in the new form (returning an uint64_t) using just the\n      relevant test vector.\n */\n#include <assert.h>\n#include <stdint.h>\n#include <stdio.h>\n#include <string.h>\n#include <ctype.h>\n\n/* Fast tolower() alike function that does not care about locale\n * but just returns a-z instead of A-Z. */\nint siptlw(int c) {\n    if (c >= 'A' && c <= 'Z') {\n        return c+('a'-'A');\n    } else {\n        return c;\n    }\n}\n\n#if defined(__has_attribute)\n#if __has_attribute(no_sanitize)\n#define NO_SANITIZE(sanitizer) __attribute__((no_sanitize(sanitizer)))\n#endif\n#endif\n\n#if !defined(NO_SANITIZE)\n#define NO_SANITIZE(sanitizer)\n#endif\n\n/* Test of the CPU is Little Endian and supports not aligned accesses.\n * Two interesting conditions to speedup the function that happen to be\n * in most of x86 servers. */\n#if defined(__X86_64__) || defined(__x86_64__) || defined (__i386__) \\\n\t|| defined (__aarch64__) || defined (__arm64__)\n#define UNALIGNED_LE_CPU\n#endif\n\n#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))\n\n#define U32TO8_LE(p, v)                                                        \\\n    (p)[0] = (uint8_t)((v));                                                   \\\n    (p)[1] = (uint8_t)((v) >> 8);                                              \\\n    (p)[2] = (uint8_t)((v) >> 16);                                             \\\n    (p)[3] = (uint8_t)((v) >> 24);\n\n#define U64TO8_LE(p, v)                                                        \\\n    U32TO8_LE((p), (uint32_t)((v)));                                           \\\n    U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));\n\n#ifdef UNALIGNED_LE_CPU\n#define U8TO64_LE(p) (*((uint64_t*)(p)))\n#else\n#define U8TO64_LE(p)                                                           \\\n    (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |                        \\\n     ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |                 \\\n     ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |                 \\\n     ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))\n#endif\n\n#define U8TO64_LE_NOCASE(p)                                                    \\\n    (((uint64_t)(siptlw((p)[0]))) |                                           \\\n     ((uint64_t)(siptlw((p)[1])) << 8) |                                      \\\n     ((uint64_t)(siptlw((p)[2])) << 16) |                                     \\\n     ((uint64_t)(siptlw((p)[3])) << 24) |                                     \\\n     ((uint64_t)(siptlw((p)[4])) << 32) |                                              \\\n     ((uint64_t)(siptlw((p)[5])) << 40) |                                              \\\n     ((uint64_t)(siptlw((p)[6])) << 48) |                                              \\\n     ((uint64_t)(siptlw((p)[7])) << 56))\n\n#define SIPROUND                                                               \\\n    do {                                                                       \\\n        v0 += v1;                                                              \\\n        v1 = ROTL(v1, 13);                                                     \\\n        v1 ^= v0;                                                              \\\n        v0 = ROTL(v0, 32);                                                     \\\n        v2 += v3;                                                              \\\n        v3 = ROTL(v3, 16);                                                     \\\n        v3 ^= v2;                                                              \\\n        v0 += v3;                                                              \\\n        v3 = ROTL(v3, 21);                                                     \\\n        v3 ^= v0;                                                              \\\n        v2 += v1;                                                              \\\n        v1 = ROTL(v1, 17);                                                     \\\n        v1 ^= v2;                                                              \\\n        v2 = ROTL(v2, 32);                                                     \\\n    } while (0)\n\nNO_SANITIZE(\"alignment\")\nuint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) {\n#ifndef UNALIGNED_LE_CPU\n    uint64_t hash;\n    uint8_t *out = (uint8_t*) &hash;\n#endif\n    uint64_t v0 = 0x736f6d6570736575ULL;\n    uint64_t v1 = 0x646f72616e646f6dULL;\n    uint64_t v2 = 0x6c7967656e657261ULL;\n    uint64_t v3 = 0x7465646279746573ULL;\n    uint64_t k0 = U8TO64_LE(k);\n    uint64_t k1 = U8TO64_LE(k + 8);\n    uint64_t m;\n    const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));\n    const int left = inlen & 7;\n    uint64_t b = ((uint64_t)inlen) << 56;\n    v3 ^= k1;\n    v2 ^= k0;\n    v1 ^= k1;\n    v0 ^= k0;\n\n    for (; in != end; in += 8) {\n        m = U8TO64_LE(in);\n        v3 ^= m;\n\n        SIPROUND;\n\n        v0 ^= m;\n    }\n\n    switch (left) {\n    case 7: b |= ((uint64_t)in[6]) << 48; /* fall-thru */\n    case 6: b |= ((uint64_t)in[5]) << 40; /* fall-thru */\n    case 5: b |= ((uint64_t)in[4]) << 32; /* fall-thru */\n    case 4: b |= ((uint64_t)in[3]) << 24; /* fall-thru */\n    case 3: b |= ((uint64_t)in[2]) << 16; /* fall-thru */\n    case 2: b |= ((uint64_t)in[1]) << 8; /* fall-thru */\n    case 1: b |= ((uint64_t)in[0]); break;\n    case 0: break;\n    }\n\n    v3 ^= b;\n\n    SIPROUND;\n\n    v0 ^= b;\n    v2 ^= 0xff;\n\n    SIPROUND;\n    SIPROUND;\n\n    b = v0 ^ v1 ^ v2 ^ v3;\n#ifndef UNALIGNED_LE_CPU\n    U64TO8_LE(out, b);\n    return hash;\n#else\n    return b;\n#endif\n}\n\nNO_SANITIZE(\"alignment\")\nuint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k)\n{\n#ifndef UNALIGNED_LE_CPU\n    uint64_t hash;\n    uint8_t *out = (uint8_t*) &hash;\n#endif\n    uint64_t v0 = 0x736f6d6570736575ULL;\n    uint64_t v1 = 0x646f72616e646f6dULL;\n    uint64_t v2 = 0x6c7967656e657261ULL;\n    uint64_t v3 = 0x7465646279746573ULL;\n    uint64_t k0 = U8TO64_LE(k);\n    uint64_t k1 = U8TO64_LE(k + 8);\n    uint64_t m;\n    const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));\n    const int left = inlen & 7;\n    uint64_t b = ((uint64_t)inlen) << 56;\n    v3 ^= k1;\n    v2 ^= k0;\n    v1 ^= k1;\n    v0 ^= k0;\n\n    for (; in != end; in += 8) {\n        m = U8TO64_LE_NOCASE(in);\n        v3 ^= m;\n\n        SIPROUND;\n\n        v0 ^= m;\n    }\n\n    switch (left) {\n    case 7: b |= ((uint64_t)siptlw(in[6])) << 48; /* fall-thru */\n    case 6: b |= ((uint64_t)siptlw(in[5])) << 40; /* fall-thru */\n    case 5: b |= ((uint64_t)siptlw(in[4])) << 32; /* fall-thru */\n    case 4: b |= ((uint64_t)siptlw(in[3])) << 24; /* fall-thru */\n    case 3: b |= ((uint64_t)siptlw(in[2])) << 16; /* fall-thru */\n    case 2: b |= ((uint64_t)siptlw(in[1])) << 8; /* fall-thru */\n    case 1: b |= ((uint64_t)siptlw(in[0])); break;\n    case 0: break;\n    }\n\n    v3 ^= b;\n\n    SIPROUND;\n\n    v0 ^= b;\n    v2 ^= 0xff;\n\n    SIPROUND;\n    SIPROUND;\n\n    b = v0 ^ v1 ^ v2 ^ v3;\n#ifndef UNALIGNED_LE_CPU\n    U64TO8_LE(out, b);\n    return hash;\n#else\n    return b;\n#endif\n}\n\n\n/* --------------------------------- TEST ------------------------------------ */\n\n#ifdef SIPHASH_TEST\n\nconst uint8_t vectors_sip64[64][8] = {\n    { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, },\n    { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, },\n    { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, },\n    { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, },\n    { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, },\n    { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, },\n    { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, },\n    { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, },\n    { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, },\n    { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, },\n    { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, },\n    { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, },\n    { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, },\n    { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, },\n    { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, },\n    { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, },\n    { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, },\n    { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, },\n    { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, },\n    { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, },\n    { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, },\n    { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, },\n    { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, },\n    { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, },\n    { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, },\n    { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, },\n    { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, },\n    { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, },\n    { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, },\n    { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, },\n    { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, },\n    { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, },\n    { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, },\n    { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, },\n    { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, },\n    { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, },\n    { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, },\n    { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, },\n    { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, },\n    { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, },\n    { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, },\n    { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, },\n    { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, },\n    { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, },\n    { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, },\n    { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, },\n    { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, },\n    { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, },\n    { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, },\n    { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, },\n    { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, },\n    { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, },\n    { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, },\n    { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, },\n    { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, },\n    { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, },\n    { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, },\n    { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, },\n    { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, },\n    { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, },\n    { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, },\n    { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, },\n    { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, },\n    { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, },\n};\n\n\n/* Test siphash using a test vector. Returns 0 if the function passed\n * all the tests, otherwise 1 is returned.\n *\n * IMPORTANT: The test vector is for SipHash 2-4. Before running\n * the test revert back the siphash() function to 2-4 rounds since\n * now it uses 1-2 rounds. */\nint siphash_test(void) {\n    uint8_t in[64], k[16];\n    int i;\n    int fails = 0;\n\n    for (i = 0; i < 16; ++i)\n        k[i] = i;\n\n    for (i = 0; i < 64; ++i) {\n        in[i] = i;\n        uint64_t hash = siphash(in, i, k);\n        const uint8_t *v = NULL;\n        v = (uint8_t *)vectors_sip64;\n        if (memcmp(&hash, v + (i * 8), 8)) {\n            /* printf(\"fail for %d bytes\\n\", i); */\n            fails++;\n        }\n    }\n\n    /* Run a few basic tests with the case insensitive version. */\n    uint64_t h1, h2;\n    h1 = siphash((uint8_t*)\"hello world\",11,(uint8_t*)\"1234567812345678\");\n    h2 = siphash_nocase((uint8_t*)\"hello world\",11,(uint8_t*)\"1234567812345678\");\n    if (h1 != h2) fails++;\n\n    h1 = siphash((uint8_t*)\"hello world\",11,(uint8_t*)\"1234567812345678\");\n    h2 = siphash_nocase((uint8_t*)\"HELLO world\",11,(uint8_t*)\"1234567812345678\");\n    if (h1 != h2) fails++;\n\n    h1 = siphash((uint8_t*)\"HELLO world\",11,(uint8_t*)\"1234567812345678\");\n    h2 = siphash_nocase((uint8_t*)\"HELLO world\",11,(uint8_t*)\"1234567812345678\");\n    if (h1 == h2) fails++;\n\n    if (!fails) return 0;\n    return 1;\n}\n\nint main(void) {\n    if (siphash_test() == 0) {\n        printf(\"SipHash test: OK\\n\");\n        return 0;\n    } else {\n        printf(\"SipHash test: FAILED\\n\");\n        return 1;\n    }\n}\n\n#endif\n"
  },
  {
    "path": "src/redis/stream.h",
    "content": "#ifndef STREAM_H\n#define STREAM_H\n\n#include \"util.h\"\n#include \"rax.h\"\n#include \"sds.h\"\n#include \"listpack.h\"\n\n\ntypedef struct redisObject robj;\n\n/* Stream item ID: a 128 bit number composed of a milliseconds time and\n * a sequence counter. IDs generated in the same millisecond (or in a past\n * millisecond if the clock jumped backward) will use the millisecond time\n * of the latest generated ID and an incremented sequence. */\ntypedef struct streamID {\n    uint64_t ms;  /* Unix time in milliseconds. */\n    uint64_t seq; /* Sequence number. */\n} streamID;\n\ntypedef struct stream {\n    struct rax *rax;                      /* The radix tree holding the stream. */\n    uint64_t length;               /* Current number of elements inside this stream. */\n    streamID last_id;              /* Zero if there are yet no items. */\n    streamID first_id;             /* The first non-tombstone entry, zero if empty. */\n    streamID max_deleted_entry_id; /* The maximal ID that was deleted. */\n    uint64_t entries_added;        /* All time count of elements added. */\n    struct rax *cgroups;                  /* Consumer groups dictionary: name -> streamCG */\n} stream;\n\n/* We define an iterator to iterate stream items in an abstract way, without\n * caring about the radix tree + listpack representation. Technically speaking\n * the iterator is only used inside streamReplyWithRange(), so could just\n * be implemented inside the function, but practically there is the AOF\n * rewriting code that also needs to iterate the stream to emit the XADD\n * commands. */\ntypedef struct streamIterator {\n    stream *stream;         /* The stream we are iterating. */\n    streamID master_id;     /* ID of the master entry at listpack head. */\n    uint64_t master_fields_count;       /* Master entries # of fields. */\n    unsigned char *master_fields_start; /* Master entries start in listpack. */\n    unsigned char *master_fields_ptr;   /* Master field to emit next. */\n    int entry_flags;                    /* Flags of entry we are emitting. */\n    int rev;                /* True if iterating end to start (reverse). */\n    int skip_tombstones;    /* True if not emitting tombstone entries. */\n    uint64_t start_key[2];  /* Start key as 128 bit big endian. */\n    uint64_t end_key[2];    /* End key as 128 bit big endian. */\n    raxIterator ri;         /* Rax iterator. */\n    unsigned char *lp;      /* Current listpack. */\n    unsigned char *lp_ele;  /* Current listpack cursor. */\n    unsigned char *lp_flags; /* Current entry flags pointer. */\n    /* Buffers used to hold the string of lpGet() when the element is\n     * integer encoded, so that there is no string representation of the\n     * element inside the listpack itself. */\n    unsigned char field_buf[LP_INTBUF_SIZE];\n    unsigned char value_buf[LP_INTBUF_SIZE];\n} streamIterator;\n\n/* Consumer group. */\ntypedef struct streamCG {\n    streamID last_id;       /* Last delivered (not acknowledged) ID for this\n                               group. Consumers that will just ask for more\n                               messages will served with IDs > than this. */\n    long long entries_read; /* In a perfect world (CG starts at 0-0, no dels, no\n                               XGROUP SETID, ...), this is the total number of\n                               group reads. In the real world, the reasoning behind\n                               this value is detailed at the top comment of\n                               streamEstimateDistanceFromFirstEverEntry(). */\n    rax *pel;               /* Pending entries list. This is a radix tree that\n                               has every message delivered to consumers (without\n                               the NOACK option) that was yet not acknowledged\n                               as processed. The key of the radix tree is the\n                               ID as a 64 bit big endian number, while the\n                               associated value is a streamNACK structure.*/\n    rax *consumers;         /* A radix tree representing the consumers by name\n                               and their associated representation in the form\n                               of streamConsumer structures. */\n} streamCG;\n\n/* A specific consumer in a consumer group.  */\ntypedef struct streamConsumer {\n    mstime_t seen_time;   /* Last time this consumer tried to perform an action (attempted reading/claiming). */\n    mstime_t active_time; /* Last time this consumer was active (successful reading/claiming). */\n    sds name;             /* Consumer name. This is how the consumer\n                             will be identified in the consumer group\n                             protocol. Case sensitive. */\n    rax *pel;             /* Consumer specific pending entries list: all\n                             the pending messages delivered to this\n                             consumer not yet acknowledged. Keys are\n                             big endian message IDs, while values are\n                             the same streamNACK structure referenced\n                             in the \"pel\" of the consumer group structure\n                             itself, so the value is shared. */\n} streamConsumer;\n\n/* Pending (yet not acknowledged) message in a consumer group. */\ntypedef struct streamNACK {\n    mstime_t delivery_time;   /* Last time this message was delivered. */\n    uint64_t delivery_count;  /* Number of times this message was delivered.*/\n    streamConsumer *consumer; /* The consumer this message was delivered to\n                                 in the last delivery. */\n} streamNACK;\n\n\ntypedef struct {\n  /* XADD options */\n  streamID id;     /* User-provided ID, for XADD only. */\n  int id_given;    /* Was an ID different than \"*\" specified? for XADD only. */\n  int seq_given;   /* Was an ID different than \"ms-*\" specified? for XADD only. */\n  int no_mkstream; /* if set to 1 do not create new stream */\n\n  /* XADD + XTRIM common options */\n  int trim_strategy;         /* TRIM_STRATEGY_* */\n  int trim_strategy_arg_idx; /* Index of the count in MAXLEN/MINID, for rewriting. */\n  int approx_trim;           /* If 1 only delete whole radix tree nodes, so\n                              * the trim argument is not applied verbatim. */\n  long long limit;           /* Maximum amount of entries to trim. If 0, no limitation\n                              * on the amount of trimming work is enforced. */\n  /* TRIM_STRATEGY_MAXLEN options */\n  long long maxlen; /* After trimming, leave stream at this length . */\n  /* TRIM_STRATEGY_MINID options */\n  streamID minid; /* Trim by ID (No stream entries with ID < 'minid' will remain) */\n} streamAddTrimArgs;\n\n/* Prototypes of exported APIs. */\n// struct client;\n\n/* Flags for streamCreateConsumer */\n#define SCC_DEFAULT 0\n#define SCC_NO_NOTIFY (1 << 0)  /* Do not notify key space if consumer created */\n#define SCC_NO_DIRTIFY (1 << 1) /* Do not dirty++ if consumer created */\n\n#define SCG_INVALID_ENTRIES_READ -1\n#define SCG_INVALID_LAG -1\n\n#define TRIM_STRATEGY_NONE 0\n#define TRIM_STRATEGY_MAXLEN 1\n#define TRIM_STRATEGY_MINID 2\n\n/* Every stream item inside the listpack, has a flags field that is used to\n * mark the entry as deleted, or having the same field as the \"master\"\n * entry at the start of the listpack. */\n#define STREAM_ITEM_FLAG_NONE 0              /* No special flags. */\n#define STREAM_ITEM_FLAG_DELETED (1 << 0)    /* Entry is deleted. Skip it. */\n#define STREAM_ITEM_FLAG_SAMEFIELDS (1 << 1) /* Same fields as primary entry. */\n\nvoid streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev);\nint streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields);\nvoid streamIteratorGetField(streamIterator *si,\n                            unsigned char **fieldptr,\n                            unsigned char **valueptr,\n                            int64_t *fieldlen,\n                            int64_t *valuelen);\nvoid streamIteratorStop(streamIterator *si);\nstreamCG *streamCreateCG(stream *s, const char *name, size_t namelen, streamID *id, long long entries_read);\nvoid streamDecodeID(void *buf, streamID *id);\nint streamCompareID(streamID *a, streamID *b);\nvoid streamFreeNACK(streamNACK *na);\n\nvoid streamGetEdgeID(stream *s, int first, int skip_tombstones, streamID *edge_id);\nlong long streamEstimateDistanceFromFirstEverEntry(stream *s, streamID *id);\n\n#endif\n"
  },
  {
    "path": "src/redis/t_stream.c",
    "content": "/*\n * Copyright (c) 2017, Redis Ltd.\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <errno.h>\n#include <stdio.h>\n#include <string.h>\n\n#include \"endianconv.h\"\n#include \"stream.h\"\n#include \"redis_aux.h\"\n#include \"zmalloc.h\"\n\n\n/* For stream commands that require multiple IDs\n * when the number of IDs is less than 'STREAMID_STATIC_VECTOR_LEN',\n * avoid malloc allocation.*/\n#define STREAMID_STATIC_VECTOR_LEN 8\n\n/* Max pre-allocation for listpack. This is done to avoid abuse of a user\n * setting stream_node_max_bytes to a huge number. */\n#define STREAM_LISTPACK_MAX_PRE_ALLOCATE 4096\n\n/* Don't let listpacks grow too big, even if the user config allows it.\n * doing so can lead to an overflow (trying to store more than 32bit length\n * into the listpack header), or actually an assertion since lpInsert\n * will return NULL. */\n#define STREAM_LISTPACK_MAX_SIZE (1 << 30)\n\n/* -----------------------------------------------------------------------\n * Low level stream encoding: a radix tree of listpacks.\n * ----------------------------------------------------------------------- */\nstatic inline int64_t lpGetIntegerIfValid(unsigned char *ele, int *valid) {\n    int64_t v;\n    unsigned char *e = lpGet(ele, &v, NULL);\n    if (e == NULL) {\n        if (valid) *valid = 1;\n        return v;\n    }\n    long long ll;\n    int ret = string2ll((char *)e, v, &ll);\n    if (valid)\n        *valid = ret;\n    else\n        serverAssert(ret != 0);\n    v = ll;\n    return v;\n}\n\n#define lpGetInteger(ele) lpGetIntegerIfValid(ele, NULL)\n\n/* Get an edge streamID of a given listpack.\n * 'master_id' is an input param, used to build the 'edge_id' output param */\n/* Convert the specified stream entry ID as a 128 bit big endian number, so\n * that the IDs can be sorted lexicographically. */\nstatic void streamEncodeID(void *buf, streamID *id) {\n    uint64_t e[2];\n    e[0] = htonu64(id->ms);\n    e[1] = htonu64(id->seq);\n    memcpy(buf, e, sizeof(e));\n}\n\n/* This is the reverse of streamEncodeID(): the decoded ID will be stored\n * in the 'id' structure passed by reference. The buffer 'buf' must point\n * to a 128 bit big-endian encoded ID. */\nvoid streamDecodeID(void *buf, streamID *id) {\n    uint64_t e[2];\n    memcpy(e, buf, sizeof(e));\n    id->ms = ntohu64(e[0]);\n    id->seq = ntohu64(e[1]);\n}\n\n/* Compare two stream IDs. Return -1 if a < b, 0 if a == b, 1 if a > b. */\nint streamCompareID(streamID *a, streamID *b) {\n    if (a->ms > b->ms)\n        return 1;\n    else if (a->ms < b->ms)\n        return -1;\n    /* The ms part is the same. Check the sequence part. */\n    else if (a->seq > b->seq)\n        return 1;\n    else if (a->seq < b->seq)\n        return -1;\n    /* Everything is the same: IDs are equal. */\n    return 0;\n}\n\n/* Retrieves the ID of the stream edge entry. An edge is either the first or\n * the last ID in the stream, and may be a tombstone. To filter out tombstones,\n * set the'skip_tombstones' argument to 1. */\nvoid streamGetEdgeID(stream *s, int first, int skip_tombstones, streamID *edge_id) {\n    streamIterator si;\n    int64_t numfields;\n    streamIteratorStart(&si, s, NULL, NULL, !first);\n    si.skip_tombstones = skip_tombstones;\n    int found = streamIteratorGetID(&si, edge_id, &numfields);\n    if (!found) {\n        streamID min_id = {0, 0}, max_id = {UINT64_MAX, UINT64_MAX};\n        *edge_id = first ? max_id : min_id;\n    }\n    streamIteratorStop(&si);\n}\n\n/* Initialize the stream iterator, so that we can call iterating functions\n * to get the next items. This requires a corresponding streamIteratorStop()\n * at the end. The 'rev' parameter controls the direction. If it's zero the\n * iteration is from the start to the end element (inclusive), otherwise\n * if rev is non-zero, the iteration is reversed.\n *\n * Once the iterator is initialized, we iterate like this:\n *\n *  streamIterator myiterator;\n *  streamIteratorStart(&myiterator,...);\n *  int64_t numfields;\n *  while(streamIteratorGetID(&myiterator,&ID,&numfields)) {\n *      while(numfields--) {\n *          unsigned char *key, *value;\n *          size_t key_len, value_len;\n *          streamIteratorGetField(&myiterator,&key,&value,&key_len,&value_len);\n *\n *          ... do what you want with key and value ...\n *      }\n *  }\n *  streamIteratorStop(&myiterator); */\nvoid streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev) {\n    /* Initialize the iterator and translates the iteration start/stop\n     * elements into a 128 big big-endian number. */\n    if (start) {\n        streamEncodeID(si->start_key, start);\n    } else {\n        si->start_key[0] = 0;\n        si->start_key[1] = 0;\n    }\n\n    if (end) {\n        streamEncodeID(si->end_key, end);\n    } else {\n        si->end_key[0] = UINT64_MAX;\n        si->end_key[1] = UINT64_MAX;\n    }\n\n    /* Seek the correct node in the radix tree. */\n    raxStart(&si->ri, s->rax);\n    if (!rev) {\n        if (start && (start->ms || start->seq)) {\n            raxSeek(&si->ri, \"<=\", (unsigned char *)si->start_key, sizeof(si->start_key));\n            if (raxEOF(&si->ri)) raxSeek(&si->ri, \"^\", NULL, 0);\n        } else {\n            raxSeek(&si->ri, \"^\", NULL, 0);\n        }\n    } else {\n        if (end && (end->ms || end->seq)) {\n            raxSeek(&si->ri, \"<=\", (unsigned char *)si->end_key, sizeof(si->end_key));\n            if (raxEOF(&si->ri)) raxSeek(&si->ri, \"$\", NULL, 0);\n        } else {\n            raxSeek(&si->ri, \"$\", NULL, 0);\n        }\n    }\n    si->stream = s;\n    si->lp = NULL;           /* There is no current listpack right now. */\n    si->lp_ele = NULL;       /* Current listpack cursor. */\n    si->rev = rev;           /* Direction, if non-zero reversed, from end to start. */\n    si->skip_tombstones = 1; /* By default tombstones aren't emitted. */\n}\n\n/* Return 1 and store the current item ID at 'id' if there are still\n * elements within the iteration range, otherwise return 0 in order to\n * signal the iteration terminated. */\nint streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {\n    while (1) { /* Will stop when element > stop_key or end of radix tree. */\n        /* If the current listpack is set to NULL, this is the start of the\n         * iteration or the previous listpack was completely iterated.\n         * Go to the next node. */\n        if (si->lp == NULL || si->lp_ele == NULL) {\n            if (!si->rev && !raxNext(&si->ri))\n                return 0;\n            else if (si->rev && !raxPrev(&si->ri))\n                return 0;\n            serverAssert(si->ri.key_len == sizeof(streamID));\n            /* Get the master ID. */\n            streamDecodeID(si->ri.key,&si->master_id);\n            /* Get the master fields count. */\n            si->lp = si->ri.data;\n            si->lp_ele = lpFirst(si->lp);           /* Seek items count */\n            si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek deleted count. */\n            si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek num fields. */\n            si->master_fields_count = lpGetInteger(si->lp_ele);\n            si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek first field. */\n            si->master_fields_start = si->lp_ele;\n            /* We are now pointing to the first field of the master entry.\n             * We need to seek either the first or the last entry depending\n             * on the direction of the iteration. */\n            if (!si->rev) {\n                /* If we are iterating in normal order, skip the master fields\n                 * to seek the first actual entry. */\n                for (uint64_t i = 0; i < si->master_fields_count; i++)\n                    si->lp_ele = lpNext(si->lp,si->lp_ele);\n            } else {\n                /* If we are iterating in reverse direction, just seek the\n                 * last part of the last entry in the listpack (that is, the\n                 * fields count). */\n                si->lp_ele = lpLast(si->lp);\n            }\n        } else if (si->rev) {\n            /* If we are iterating in the reverse order, and this is not\n             * the first entry emitted for this listpack, then we already\n             * emitted the current entry, and have to go back to the previous\n             * one. */\n            int64_t lp_count = lpGetInteger(si->lp_ele);\n            while (lp_count--) si->lp_ele = lpPrev(si->lp, si->lp_ele);\n            /* Seek lp-count of prev entry. */\n            si->lp_ele = lpPrev(si->lp, si->lp_ele);\n        }\n\n        /* For every radix tree node, iterate the corresponding listpack,\n         * returning elements when they are within range. */\n        while (1) {\n            if (!si->rev) {\n                /* If we are going forward, skip the previous entry\n                 * lp-count field (or in case of the master entry, the zero\n                 * term field) */\n                si->lp_ele = lpNext(si->lp,si->lp_ele);\n                if (si->lp_ele == NULL) break;\n            } else {\n                /* If we are going backward, read the number of elements this\n                 * entry is composed of, and jump backward N times to seek\n                 * its start. */\n                int64_t lp_count = lpGetInteger(si->lp_ele);\n                if (lp_count == 0) { /* We reached the master entry. */\n                    si->lp = NULL;\n                    si->lp_ele = NULL;\n                    break;\n                }\n                while(lp_count--) si->lp_ele = lpPrev(si->lp,si->lp_ele);\n            }\n\n            /* Get the flags entry. */\n            si->lp_flags = si->lp_ele;\n            int64_t flags = lpGetInteger(si->lp_ele);\n            si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek ID. */\n\n            /* Get the ID: it is encoded as difference between the master\n             * ID and this entry ID. */\n            *id = si->master_id;\n            id->ms += lpGetInteger(si->lp_ele);\n            si->lp_ele = lpNext(si->lp, si->lp_ele);\n            id->seq += lpGetInteger(si->lp_ele);\n            si->lp_ele = lpNext(si->lp, si->lp_ele);\n            unsigned char buf[sizeof(streamID)];\n            streamEncodeID(buf, id);\n\n            /* The number of entries is here or not depending on the\n             * flags. */\n            if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) {\n                *numfields = si->master_fields_count;\n            } else {\n                *numfields = lpGetInteger(si->lp_ele);\n                si->lp_ele = lpNext(si->lp, si->lp_ele);\n            }\n            serverAssert(*numfields >= 0);\n\n            /* If current >= start, and the entry is not marked as\n             * deleted or tombstones are included, emit it. */\n            if (!si->rev) {\n                if (memcmp(buf,si->start_key,sizeof(streamID)) >= 0 &&\n                    (!si->skip_tombstones || !(flags & STREAM_ITEM_FLAG_DELETED)))\n                {\n                    if (memcmp(buf,si->end_key,sizeof(streamID)) > 0)\n                        return 0; /* We are already out of range. */\n                    si->entry_flags = flags;\n                    if (flags & STREAM_ITEM_FLAG_SAMEFIELDS)\n                        si->master_fields_ptr = si->master_fields_start;\n                    return 1; /* Valid item returned. */\n                }\n            } else {\n                if (memcmp(buf, si->end_key, sizeof(streamID)) <= 0 &&\n                    (!si->skip_tombstones || !(flags & STREAM_ITEM_FLAG_DELETED))) {\n                    if (memcmp(buf, si->start_key, sizeof(streamID)) < 0) return 0; /* We are already out of range. */\n                    si->entry_flags = flags;\n                    if (flags & STREAM_ITEM_FLAG_SAMEFIELDS)\n                        si->master_fields_ptr = si->master_fields_start;\n                    return 1; /* Valid item returned. */\n                }\n            }\n\n            /* If we do not emit, we have to discard if we are going\n             * forward, or seek the previous entry if we are going\n             * backward. */\n            if (!si->rev) {\n                int64_t to_discard = (flags & STREAM_ITEM_FLAG_SAMEFIELDS) ? *numfields : *numfields * 2;\n                for (int64_t i = 0; i < to_discard; i++) si->lp_ele = lpNext(si->lp, si->lp_ele);\n            } else {\n                int64_t prev_times = 4; /* flag + id ms + id seq + one more to\n                                           go back to the previous entry \"count\"\n                                           field. */\n                /* If the entry was not flagged SAMEFIELD we also read the\n                 * number of fields, so go back one more. */\n                if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) prev_times++;\n                while (prev_times--) si->lp_ele = lpPrev(si->lp, si->lp_ele);\n            }\n        }\n\n        /* End of listpack reached. Try the next/prev radix tree node. */\n    }\n}\n\n/* Get the field and value of the current item we are iterating. This should\n * be called immediately after streamIteratorGetID(), and for each field\n * according to the number of fields returned by streamIteratorGetID().\n * The function populates the field and value pointers and the corresponding\n * lengths by reference, that are valid until the next iterator call, assuming\n * no one touches the stream meanwhile. */\nvoid streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen) {\n    if (si->entry_flags & STREAM_ITEM_FLAG_SAMEFIELDS) {\n        *fieldptr = lpGet(si->master_fields_ptr,fieldlen,si->field_buf);\n        si->master_fields_ptr = lpNext(si->lp,si->master_fields_ptr);\n    } else {\n        *fieldptr = lpGet(si->lp_ele, fieldlen, si->field_buf);\n        si->lp_ele = lpNext(si->lp, si->lp_ele);\n    }\n    *valueptr = lpGet(si->lp_ele, valuelen, si->value_buf);\n    si->lp_ele = lpNext(si->lp, si->lp_ele);\n}\n\n/* Remove the current entry from the stream: can be called after the\n * GetID() API or after any GetField() call, however we need to iterate\n * a valid entry while calling this function. Moreover the function\n * requires the entry ID we are currently iterating, that was previously\n * returned by GetID().\n *\n * Note that after calling this function, next calls to GetField() can't\n * be performed: the entry is now deleted. Instead the iterator will\n * automatically re-seek to the next entry, so the caller should continue\n * with GetID(). */\n\n/* Stop the stream iterator. The only cleanup we need is to free the rax\n * iterator, since the stream iterator itself is supposed to be stack\n * allocated. */\nvoid streamIteratorStop(streamIterator *si) {\n    raxStop(&si->ri);\n}\n\nstatic int streamIDEqZero(streamID *id) {\n    return !(id->ms || id->seq);\n}\n\n/* This function returns a value that is the ID's logical read counter, or its\n * distance (the number of entries) from the first entry ever to have been added\n * to the stream.\n *\n * A counter is returned only in one of the following cases:\n * 1. The ID is the same as the stream's last ID. In this case, the returned\n *    is the same as the stream's entries_added counter.\n * 2. The ID equals that of the currently first entry in the stream, and the\n *    stream has no tombstones. The returned value, in this case, is the result\n *    of subtracting the stream's length from its added_entries, incremented by\n *    one.\n * 3. The ID less than the stream's first current entry's ID, and there are no\n *    tombstones. Here the estimated counter is the result of subtracting the\n *    stream's length from its added_entries.\n * 4. The stream's added_entries is zero, meaning that no entries were ever\n *    added.\n *\n * The special return value of ULLONG_MAX signals that the counter's value isn't\n * obtainable. It is returned in these cases:\n * 1. The provided ID, if it even exists, is somewhere between the stream's\n *    current first and last entries' IDs, or in the future.\n * 2. The stream contains one or more tombstones. */\nlong long streamEstimateDistanceFromFirstEverEntry(stream *s, streamID *id) {\n    /* The counter of any ID in an empty, never-before-used stream is 0. */\n    if (!s->entries_added) {\n        return 0;\n    }\n\n    /* In the empty stream, if the ID is smaller or equal to the last ID,\n     * it can set to the current added_entries value. */\n    if (!s->length && streamCompareID(id, &s->last_id) < 1) {\n        return s->entries_added;\n    }\n\n    if (!streamIDEqZero(id) && streamCompareID(id, &s->max_deleted_entry_id) < 0) {\n        /* The ID is before the last tombstone, so the counter is unknown. */\n        return SCG_INVALID_ENTRIES_READ;\n    }\n\n    int cmp_last = streamCompareID(id, &s->last_id);\n    if (cmp_last == 0) {\n        /* Return the exact counter of the last entry in the stream. */\n        return s->entries_added;\n    } else if (cmp_last > 0) {\n        /* The counter of a future ID is unknown. */\n        return SCG_INVALID_ENTRIES_READ;\n    }\n\n    int cmp_id_first = streamCompareID(id, &s->first_id);\n    int cmp_xdel_first = streamCompareID(&s->max_deleted_entry_id, &s->first_id);\n    if (streamIDEqZero(&s->max_deleted_entry_id) || cmp_xdel_first < 0) {\n        /* There's definitely no fragmentation ahead. */\n        if (cmp_id_first < 0) {\n            /* Return the estimated counter. */\n            return s->entries_added - s->length;\n        } else if (cmp_id_first == 0) {\n            /* Return the exact counter of the first entry in the stream. */\n            return s->entries_added - s->length + 1;\n        }\n    }\n\n    /* The ID is either before an XDEL that fragments the stream or an arbitrary\n     * ID. Either case, so we can't make a prediction. */\n    return SCG_INVALID_ENTRIES_READ;\n}\n\n/* Send the stream items in the specified range to the client 'c'. The range\n * the client will receive is between start and end inclusive, if 'count' is\n * non zero, no more than 'count' elements are sent.\n *\n * The 'end' pointer can be NULL to mean that we want all the elements from\n * 'start' till the end of the stream. If 'rev' is non zero, elements are\n * produced in reversed order from end to start.\n *\n * The function returns the number of entries emitted.\n *\n * If group and consumer are not NULL, the function performs additional work:\n * 1. It updates the last delivered ID in the group in case we are\n *    sending IDs greater than the current last ID.\n * 2. If the requested IDs are already assigned to some other consumer, the\n *    function will not return it to the client.\n * 3. An entry in the pending list will be created for every entry delivered\n *    for the first time to this consumer.\n * 4. The group's read counter is incremented if it is already valid and there\n *    are no future tombstones, or is invalidated (set to 0) otherwise. If the\n *    counter is invalid to begin with, we try to obtain it for the last\n *    delivered ID.\n *\n * The behavior may be modified passing non-zero flags:\n *\n * STREAM_RWR_NOACK: Do not create PEL entries, that is, the point \"3\" above\n *                   is not performed.\n * STREAM_RWR_RAWENTRIES: Do not emit array boundaries, but just the entries,\n *                        and return the number of entries emitted as usually.\n *                        This is used when the function is just used in order\n *                        to emit data and there is some higher level logic.\n *\n * The final argument 'spi' (stream propagation info pointer) is a structure\n * filled with information needed to propagate the command execution to AOF\n * and replicas, in the case a consumer group was passed: we need to generate\n * XCLAIM commands to create the pending list into AOF/replicas in that case.\n *\n * If 'spi' is set to NULL no propagation will happen even if the group was\n * given, but currently such a feature is never used by the code base that\n * will always pass 'spi' and propagate when a group is passed.\n *\n * Note that this function is recursive in certain cases. When it's called\n * with a non NULL group and consumer argument, it may call\n * streamReplyWithRangeFromConsumerPEL() in order to get entries from the\n * consumer pending entries list. However such a function will then call\n * streamReplyWithRange() in order to emit single entries (found in the\n * PEL by ID) to the client. This is the use case for the STREAM_RWR_RAWENTRIES\n * flag.\n */\n#define STREAM_RWR_NOACK (1 << 0) /* Do not create entries in the PEL. */\n#define STREAM_RWR_RAWENTRIES                                         \\\n    (1 << 1)                        /* Do not emit protocol for array \\\n                                       boundaries, just the entries. */\n#define STREAM_RWR_HISTORY (1 << 2) /* Only serve consumer local PEL. */\n\n\n/* -----------------------------------------------------------------------\n * Low level implementation of consumer groups\n * ----------------------------------------------------------------------- */\n\n\n/* Free a NACK entry. */\nvoid streamFreeNACK(streamNACK *na) {\n    zfree(na);\n}\n\n/* Create a new consumer group in the context of the stream 's', having the\n * specified name, last server ID and reads counter. If a consumer group with\n * the same name already exists NULL is returned, otherwise the pointer to the\n * consumer group is returned. */\nstreamCG *streamCreateCG(stream *s, const char *name, size_t namelen, streamID *id, long long entries_read) {\n    if (s->cgroups == NULL) s->cgroups = raxNew();\n    if (raxFind(s->cgroups, (unsigned char *)name, namelen, NULL)) return NULL;\n\n    streamCG *cg = zmalloc(sizeof(*cg));\n    cg->pel = raxNew();\n    cg->consumers = raxNew();\n    cg->last_id = *id;\n    cg->entries_read = entries_read;\n    raxInsert(s->cgroups, (unsigned char *)name, namelen, cg, NULL);\n    return cg;\n}\n"
  },
  {
    "path": "src/redis/util.c",
    "content": "/*\n * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n\n#include <stdlib.h>\n#include <stdio.h>\n#include <string.h>\n#include <ctype.h>\n#include <limits.h>\n#include <math.h>\n#include <unistd.h>\n#include <sys/time.h>\n#include <float.h>\n#include <stdint.h>\n#include <errno.h>\n#include <time.h>\n#include \"util.h\"\n\n\n/* Return the number of digits of 'v' when converted to string in radix 10.\n * See ll2string() for more information. */\nstatic uint32_t digits10(uint64_t v) {\n    if (v < 10) return 1;\n    if (v < 100) return 2;\n    if (v < 1000) return 3;\n    if (v < 1000000000000UL) {\n        if (v < 100000000UL) {\n            if (v < 1000000) {\n                if (v < 10000) return 4;\n                return 5 + (v >= 100000);\n            }\n            return 7 + (v >= 10000000UL);\n        }\n        if (v < 10000000000UL) {\n            return 9 + (v >= 1000000000UL);\n        }\n        return 11 + (v >= 100000000000UL);\n    }\n    return 12 + digits10(v / 1000000000000UL);\n}\n\n/* Convert a long long into a string. Returns the number of\n * characters needed to represent the number.\n * If the buffer is not big enough to store the string, 0 is returned.\n *\n * Based on the following article (that apparently does not provide a\n * novel approach but only publicizes an already used technique):\n *\n * https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920\n *\n * Modified in order to handle signed integers since the original code was\n * designed for unsigned integers. */\nint ll2string(char *dst, size_t dstlen, long long svalue) {\n    static const char digits[201] =\n        \"0001020304050607080910111213141516171819\"\n        \"2021222324252627282930313233343536373839\"\n        \"4041424344454647484950515253545556575859\"\n        \"6061626364656667686970717273747576777879\"\n        \"8081828384858687888990919293949596979899\";\n    int negative;\n    unsigned long long value;\n\n    /* The main loop works with 64bit unsigned integers for simplicity, so\n     * we convert the number here and remember if it is negative. */\n    if (svalue < 0) {\n        if (svalue != LLONG_MIN) {\n            value = -svalue;\n        } else {\n            value = ((unsigned long long) LLONG_MAX)+1;\n        }\n        negative = 1;\n    } else {\n        value = svalue;\n        negative = 0;\n    }\n\n    /* Check length. */\n    uint32_t const length = digits10(value)+negative;\n    if (length >= dstlen) return 0;\n\n    /* Null term. */\n    uint32_t next = length;\n    dst[next] = '\\0';\n    next--;\n    while (value >= 100) {\n        int const i = (value % 100) * 2;\n        value /= 100;\n        dst[next] = digits[i + 1];\n        dst[next - 1] = digits[i];\n        next -= 2;\n    }\n\n    /* Handle last 1-2 digits. */\n    if (value < 10) {\n        dst[next] = '0' + (uint32_t) value;\n    } else {\n        int i = (uint32_t) value * 2;\n        dst[next] = digits[i + 1];\n        dst[next - 1] = digits[i];\n    }\n\n    /* Add sign. */\n    if (negative) dst[0] = '-';\n    return length;\n}\n\n/* Convert a string into a long long. Returns 1 if the string could be parsed\n * into a (non-overflowing) long long, 0 otherwise. The value will be set to\n * the parsed value when appropriate.\n *\n * Note that this function demands that the string strictly represents\n * a long long: no spaces or other characters before or after the string\n * representing the number are accepted, nor zeroes at the start if not\n * for the string \"0\" representing the zero number.\n *\n * Because of its strictness, it is safe to use this function to check if\n * you can convert a string into a long long, and obtain back the string\n * from the number without any loss in the string representation. */\nint string2ll(const char *s, size_t slen, long long *value) {\n    const char *p = s;\n    size_t plen = 0;\n    int negative = 0;\n    unsigned long long v;\n\n    /* A zero length string is not a valid number. */\n    if (plen == slen)\n        return 0;\n\n    /* Special case: first and only digit is 0. */\n    if (slen == 1 && p[0] == '0') {\n        if (value != NULL) *value = 0;\n        return 1;\n    }\n\n    /* Handle negative numbers: just set a flag and continue like if it\n     * was a positive number. Later convert into negative. */\n    if (p[0] == '-') {\n        negative = 1;\n        p++; plen++;\n\n        /* Abort on only a negative sign. */\n        if (plen == slen)\n            return 0;\n    }\n\n    /* First digit should be 1-9, otherwise the string should just be 0. */\n    if (p[0] >= '1' && p[0] <= '9') {\n        v = p[0]-'0';\n        p++; plen++;\n    } else {\n        return 0;\n    }\n\n    /* Parse all the other digits, checking for overflow at every step. */\n    while (plen < slen && p[0] >= '0' && p[0] <= '9') {\n        if (v > (ULLONG_MAX / 10)) /* Overflow. */\n            return 0;\n        v *= 10;\n\n        if (v > (ULLONG_MAX - (p[0]-'0'))) /* Overflow. */\n            return 0;\n        v += p[0]-'0';\n\n        p++; plen++;\n    }\n\n    /* Return if not all bytes were used. */\n    if (plen < slen)\n        return 0;\n\n    /* Convert to negative if needed, and do the final overflow check when\n     * converting from unsigned long long to long long. */\n    if (negative) {\n        if (v > ((unsigned long long)(-(LLONG_MIN+1))+1)) /* Overflow. */\n            return 0;\n        if (value != NULL) *value = -v;\n    } else {\n        if (v > LLONG_MAX) /* Overflow. */\n            return 0;\n        if (value != NULL) *value = v;\n    }\n    return 1;\n}\n"
  },
  {
    "path": "src/redis/util.h",
    "content": "/*\n * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef __REDIS_UTIL_H\n#define __REDIS_UTIL_H\n\n#include <stdint.h>\n#include <time.h>\n#include <unistd.h>\n\n\n/* The maximum number of characters needed to represent a long double\n * as a string (long double has a huge range).\n * This should be the size of the buffer given to ld2string */\n#define MAX_LONG_DOUBLE_CHARS 5*1024\n\n/* Error codes */\n#define C_OK                    0\n#define C_ERR                   -1\n\n\nint ll2string(char *s, size_t len, long long value);\nint string2ll(const char *s, size_t slen, long long *value);\n\n#define LOG_MAX_LEN    1024 /* Default maximum length of syslog messages.*/\n\n/* Log levels */\n#define LL_DEBUG 0\n#define LL_VERBOSE 1\n#define LL_NOTICE 2\n#define LL_WARNING 3\n#define LL_RAW (1<<10) /* Modifier to log without timestamp */\n\n\n/* Bytes needed for long -> str + '\\0' */\n#define LONG_STR_SIZE 21\n\nvoid serverLog(int level, const char *fmt, ...);\nvoid _serverPanic(const char *file, int line, const char *msg, ...);\nvoid _serverAssert(const char *estr, const char *file, int line);\n\n#define serverPanic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),_exit(1)\n#define serverAssert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),_exit(1)))\n\ntypedef long long mstime_t; /* millisecond time type. */\n\n\n#endif\n"
  },
  {
    "path": "src/redis/ziplist.c",
    "content": "/* The ziplist is a specially encoded dually linked list that is designed\n * to be very memory efficient. It stores both strings and integer values,\n * where integers are encoded as actual integers instead of a series of\n * characters. It allows push and pop operations on either side of the list\n * in O(1) time. However, because every operation requires a reallocation of\n * the memory used by the ziplist, the actual complexity is related to the\n * amount of memory used by the ziplist.\n *\n * ----------------------------------------------------------------------------\n *\n * ZIPLIST OVERALL LAYOUT\n * ======================\n *\n * The general layout of the ziplist is as follows:\n *\n * <zlbytes> <zltail> <zllen> <entry> <entry> ... <entry> <zlend>\n *\n * NOTE: all fields are stored in little endian, if not specified otherwise.\n *\n * <uint32_t zlbytes> is an unsigned integer to hold the number of bytes that\n * the ziplist occupies, including the four bytes of the zlbytes field itself.\n * This value needs to be stored to be able to resize the entire structure\n * without the need to traverse it first.\n *\n * <uint32_t zltail> is the offset to the last entry in the list. This allows\n * a pop operation on the far side of the list without the need for full\n * traversal.\n *\n * <uint16_t zllen> is the number of entries. When there are more than\n * 2^16-2 entries, this value is set to 2^16-1 and we need to traverse the\n * entire list to know how many items it holds.\n *\n * <uint8_t zlend> is a special entry representing the end of the ziplist.\n * Is encoded as a single byte equal to 255. No other normal entry starts\n * with a byte set to the value of 255.\n *\n * ZIPLIST ENTRIES\n * ===============\n *\n * Every entry in the ziplist is prefixed by metadata that contains two pieces\n * of information. First, the length of the previous entry is stored to be\n * able to traverse the list from back to front. Second, the entry encoding is\n * provided. It represents the entry type, integer or string, and in the case\n * of strings it also represents the length of the string payload.\n * So a complete entry is stored like this:\n *\n * <prevlen> <encoding> <entry-data>\n *\n * Sometimes the encoding represents the entry itself, like for small integers\n * as we'll see later. In such a case the <entry-data> part is missing, and we\n * could have just:\n *\n * <prevlen> <encoding>\n *\n * The length of the previous entry, <prevlen>, is encoded in the following way:\n * If this length is smaller than 254 bytes, it will only consume a single\n * byte representing the length as an unsigned 8 bit integer. When the length\n * is greater than or equal to 254, it will consume 5 bytes. The first byte is\n * set to 254 (FE) to indicate a larger value is following. The remaining 4\n * bytes take the length of the previous entry as value.\n *\n * So practically an entry is encoded in the following way:\n *\n * <prevlen from 0 to 253> <encoding> <entry>\n *\n * Or alternatively if the previous entry length is greater than 253 bytes\n * the following encoding is used:\n *\n * 0xFE <4 bytes unsigned little endian prevlen> <encoding> <entry>\n *\n * The encoding field of the entry depends on the content of the\n * entry. When the entry is a string, the first 2 bits of the encoding first\n * byte will hold the type of encoding used to store the length of the string,\n * followed by the actual length of the string. When the entry is an integer\n * the first 2 bits are both set to 1. The following 2 bits are used to specify\n * what kind of integer will be stored after this header. An overview of the\n * different types and encodings is as follows. The first byte is always enough\n * to determine the kind of entry.\n *\n * |00pppppp| - 1 byte\n *      String value with length less than or equal to 63 bytes (6 bits).\n *      \"pppppp\" represents the unsigned 6 bit length.\n * |01pppppp|qqqqqqqq| - 2 bytes\n *      String value with length less than or equal to 16383 bytes (14 bits).\n *      IMPORTANT: The 14 bit number is stored in big endian.\n * |10000000|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes\n *      String value with length greater than or equal to 16384 bytes.\n *      Only the 4 bytes following the first byte represents the length\n *      up to 2^32-1. The 6 lower bits of the first byte are not used and\n *      are set to zero.\n *      IMPORTANT: The 32 bit number is stored in big endian.\n * |11000000| - 3 bytes\n *      Integer encoded as int16_t (2 bytes).\n * |11010000| - 5 bytes\n *      Integer encoded as int32_t (4 bytes).\n * |11100000| - 9 bytes\n *      Integer encoded as int64_t (8 bytes).\n * |11110000| - 4 bytes\n *      Integer encoded as 24 bit signed (3 bytes).\n * |11111110| - 2 bytes\n *      Integer encoded as 8 bit signed (1 byte).\n * |1111xxxx| - (with xxxx between 0001 and 1101) immediate 4 bit integer.\n *      Unsigned integer from 0 to 12. The encoded value is actually from\n *      1 to 13 because 0000 and 1111 can not be used, so 1 should be\n *      subtracted from the encoded 4 bit value to obtain the right value.\n * |11111111| - End of ziplist special entry.\n *\n * Like for the ziplist header, all the integers are represented in little\n * endian byte order, even when this code is compiled in big endian systems.\n *\n * EXAMPLES OF ACTUAL ZIPLISTS\n * ===========================\n *\n * The following is a ziplist containing the two elements representing\n * the strings \"2\" and \"5\". It is composed of 15 bytes, that we visually\n * split into sections:\n *\n *  [0f 00 00 00] [0c 00 00 00] [02 00] [00 f3] [02 f6] [ff]\n *        |             |          |       |       |     |\n *     zlbytes        zltail     zllen    \"2\"     \"5\"   end\n *\n * The first 4 bytes represent the number 15, that is the number of bytes\n * the whole ziplist is composed of. The second 4 bytes are the offset\n * at which the last ziplist entry is found, that is 12, in fact the\n * last entry, that is \"5\", is at offset 12 inside the ziplist.\n * The next 16 bit integer represents the number of elements inside the\n * ziplist, its value is 2 since there are just two elements inside.\n * Finally \"00 f3\" is the first entry representing the number 2. It is\n * composed of the previous entry length, which is zero because this is\n * our first entry, and the byte F3 which corresponds to the encoding\n * |1111xxxx| with xxxx between 0001 and 1101. We need to remove the \"F\"\n * higher order bits 1111, and subtract 1 from the \"3\", so the entry value\n * is \"2\". The next entry has a prevlen of 02, since the first entry is\n * composed of exactly two bytes. The entry itself, F6, is encoded exactly\n * like the first entry, and 6-1 = 5, so the value of the entry is 5.\n * Finally the special entry FF signals the end of the ziplist.\n *\n * Adding another element to the above string with the value \"Hello World\"\n * allows us to show how the ziplist encodes small strings. We'll just show\n * the hex dump of the entry itself. Imagine the bytes as following the\n * entry that stores \"5\" in the ziplist above:\n *\n * [02] [0b] [48 65 6c 6c 6f 20 57 6f 72 6c 64]\n *\n * The first byte, 02, is the length of the previous entry. The next\n * byte represents the encoding in the pattern |00pppppp| that means\n * that the entry is a string of length <pppppp>, so 0B means that\n * an 11 bytes string follows. From the third byte (48) to the last (64)\n * there are just the ASCII characters for \"Hello World\".\n *\n * ----------------------------------------------------------------------------\n *\n * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>\n * Copyright (c) 2009-2017, 2020, Redis Ltd.\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <assert.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <stdint.h>\n#include <limits.h>\n#include \"zmalloc.h\"\n#include \"util.h\"\n#include \"ziplist.h\"\n#include \"config.h\"\n#include \"endianconv.h\"\n\n#define ZIP_END 255         /* Special \"end of ziplist\" entry. */\n#define ZIP_BIG_PREVLEN                                                                                                \\\n    254 /* ZIP_BIG_PREVLEN - 1 is the max number of bytes of                                                           \\\n           the previous entry, for the \"prevlen\" field prefixing                                                       \\\n           each entry, to be represented with just a single byte.                                                      \\\n           Otherwise it is represented as FE AA BB CC DD, where                                                        \\\n           AA BB CC DD are a 4 bytes unsigned integer                                                                  \\\n                               representing the previous entry len. */\n\n/* Different encoding/length possibilities */\n#define ZIP_STR_MASK 0xc0\n#define ZIP_INT_MASK 0x30\n#define ZIP_STR_06B (0 << 6)\n#define ZIP_STR_14B (1 << 6)\n#define ZIP_STR_32B (2 << 6)\n#define ZIP_INT_16B (0xc0 | 0<<4)\n#define ZIP_INT_32B (0xc0 | 1<<4)\n#define ZIP_INT_64B (0xc0 | 2<<4)\n#define ZIP_INT_24B (0xc0 | 3<<4)\n#define ZIP_INT_8B 0xfe\n\n/* 4 bit integer immediate encoding |1111xxxx| with xxxx between\n * 0001 and 1101. */\n#define ZIP_INT_IMM_MASK                                                                                               \\\n    0x0f                     /* Mask to extract the 4 bits value. To add                                               \\\n                                   one is needed to reconstruct the value. */\n#define ZIP_INT_IMM_MIN 0xf1    /* 11110001 */\n#define ZIP_INT_IMM_MAX 0xfd    /* 11111101 */\n\n#define INT24_MAX 0x7fffff\n#define INT24_MIN (-INT24_MAX - 1)\n\n/* Macro to determine if the entry is a string. String entries never start\n * with \"11\" as most significant bits of the first byte. */\n#define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK)\n\n/* Utility macros.*/\n\n/* Return total bytes a ziplist is composed of. */\n#define ZIPLIST_BYTES(zl)       (*((uint32_t*)(zl)))\n\n/* Return the offset of the last item inside the ziplist. */\n#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))\n\n/* Return the length of a ziplist, or UINT16_MAX if the length cannot be\n * determined without scanning the whole ziplist. */\n#define ZIPLIST_LENGTH(zl)      (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))\n\n/* The size of a ziplist header: two 32 bit integers for the total\n * bytes count and last item offset. One 16 bit integer for the number\n * of items field. */\n#define ZIPLIST_HEADER_SIZE     (sizeof(uint32_t)*2+sizeof(uint16_t))\n\n/* Size of the \"end of ziplist\" entry. Just one byte. */\n#define ZIPLIST_END_SIZE        (sizeof(uint8_t))\n\n/* Return the pointer to the first entry of a ziplist. */\n#define ZIPLIST_ENTRY_HEAD(zl)  ((zl)+ZIPLIST_HEADER_SIZE)\n\n/* Return the pointer to the last entry of a ziplist, using the\n * last entry offset inside the ziplist header. */\n#define ZIPLIST_ENTRY_TAIL(zl)  ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)))\n\n/* Return the pointer to the last byte of a ziplist, which is, the\n * end of ziplist FF entry. */\n#define ZIPLIST_ENTRY_END(zl)   ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-ZIPLIST_END_SIZE)\n\n/* Increment the number of items field in the ziplist header. Note that this\n * macro should never overflow the unsigned 16 bit integer, since entries are\n * always pushed one at a time. When UINT16_MAX is reached we want the count\n * to stay there to signal that a full scan is needed to get the number of\n * items inside the ziplist. */\n#define ZIPLIST_INCR_LENGTH(zl, incr)                                                                                  \\\n    {                                                                                                                  \\\n    if (intrev16ifbe(ZIPLIST_LENGTH(zl)) < UINT16_MAX) \\\n        ZIPLIST_LENGTH(zl) = intrev16ifbe(intrev16ifbe(ZIPLIST_LENGTH(zl))+incr); \\\n}\n\n/* Don't let ziplists grow over 1GB in any case, don't wanna risk overflow in\n * zlbytes*/\n#define ZIPLIST_MAX_SAFETY_SIZE (1<<30)\nint ziplistSafeToAdd(unsigned char* zl, size_t add) {\n    size_t len = zl? ziplistBlobLen(zl): 0;\n    if (len + add > ZIPLIST_MAX_SAFETY_SIZE) return 0;\n    return 1;\n}\n\n\n/* We use this function to receive information about a ziplist entry.\n * Note that this is not how the data is actually encoded, is just what we\n * get filled by a function in order to operate more easily. */\ntypedef struct zlentry {\n    unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/\n    unsigned int prevrawlen;     /* Previous entry len. */\n    unsigned int lensize;        /* Bytes used to encode this entry type/len.\n                                    For example strings have a 1, 2 or 5 bytes\n                                    header. Integers always use a single byte.*/\n    unsigned int len;            /* Bytes used to represent the actual entry.\n                                    For strings this is just the string length\n                                    while for integers it is 1, 2, 3, 4, 8 or\n                                    0 (for 4 bit immediate) depending on the\n                                    number range. */\n    unsigned int headersize;     /* prevrawlensize + lensize. */\n    unsigned char encoding;      /* Set to ZIP_STR_* or ZIP_INT_* depending on\n                                    the entry encoding. However for 4 bits\n                                    immediate integers this can assume a range\n                                    of values and must be range-checked. */\n    unsigned char *p;            /* Pointer to the very start of the entry, that\n                                    is, this points to prev-entry-len field. */\n} zlentry;\n\n#define ZIPLIST_ENTRY_ZERO(zle)                                                                                        \\\n    {                                                                                                                  \\\n    (zle)->prevrawlensize = (zle)->prevrawlen = 0; \\\n    (zle)->lensize = (zle)->len = (zle)->headersize = 0; \\\n    (zle)->encoding = 0; \\\n    (zle)->p = NULL; \\\n}\n\n/* Extract the encoding from the byte pointed by 'ptr' and set it into\n * 'encoding' field of the zlentry structure. */\n#define ZIP_ENTRY_ENCODING(ptr, encoding)                                                                              \\\n    do {                                                                                                               \\\n    (encoding) = ((ptr)[0]); \\\n    if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \\\n} while(0)\n\n#define ZIP_ENCODING_SIZE_INVALID 0xff\n/* Return the number of bytes required to encode the entry type + length.\n * On error, return ZIP_ENCODING_SIZE_INVALID */\nstatic inline unsigned int zipEncodingLenSize(unsigned char encoding) {\n    if (encoding == ZIP_INT_16B || encoding == ZIP_INT_32B || encoding == ZIP_INT_24B || encoding == ZIP_INT_64B ||\n        encoding == ZIP_INT_8B)\n        return 1;\n    if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) return 1;\n    if (encoding == ZIP_STR_06B) return 1;\n    if (encoding == ZIP_STR_14B) return 2;\n    if (encoding == ZIP_STR_32B) return 5;\n    return ZIP_ENCODING_SIZE_INVALID;\n}\n\n#define ZIP_ASSERT_ENCODING(encoding)                                                                                  \\\n    do {                                                                                                               \\\n    assert(zipEncodingLenSize(encoding) != ZIP_ENCODING_SIZE_INVALID);         \\\n} while (0)\n\n/* Return bytes needed to store integer encoded by 'encoding' */\nstatic inline unsigned int zipIntSize(unsigned char encoding) {\n    switch(encoding) {\n    case ZIP_INT_8B:  return 1;\n    case ZIP_INT_16B: return 2;\n    case ZIP_INT_24B: return 3;\n    case ZIP_INT_32B: return 4;\n    case ZIP_INT_64B: return 8;\n    }\n    if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) return 0; /* 4 bit immediate */\n    /* bad encoding, covered by a previous call to ZIP_ASSERT_ENCODING */\n    valkey_unreachable();\n    return 0;\n}\n\n/* Write the encoding header of the entry in 'p'. If p is NULL it just returns\n * the amount of bytes required to encode such a length. Arguments:\n *\n * 'encoding' is the encoding we are using for the entry. It could be\n * ZIP_INT_* or ZIP_STR_* or between ZIP_INT_IMM_MIN and ZIP_INT_IMM_MAX\n * for single-byte small immediate integers.\n *\n * 'rawlen' is only used for ZIP_STR_* encodings and is the length of the\n * string that this entry represents.\n *\n * The function returns the number of bytes used by the encoding/length\n * header stored in 'p'. */\nunsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, unsigned int rawlen) {\n    unsigned char len = 1, buf[5];\n\n    if (ZIP_IS_STR(encoding)) {\n        /* Although encoding is given it may not be set for strings,\n         * so we determine it here using the raw length. */\n        if (rawlen <= 0x3f) {\n            if (!p) return len;\n            buf[0] = ZIP_STR_06B | rawlen;\n        } else if (rawlen <= 0x3fff) {\n            len += 1;\n            if (!p) return len;\n            buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f);\n            buf[1] = rawlen & 0xff;\n        } else {\n            len += 4;\n            if (!p) return len;\n            buf[0] = ZIP_STR_32B;\n            buf[1] = (rawlen >> 24) & 0xff;\n            buf[2] = (rawlen >> 16) & 0xff;\n            buf[3] = (rawlen >> 8) & 0xff;\n            buf[4] = rawlen & 0xff;\n        }\n    } else {\n        /* Implies integer encoding, so length is always 1. */\n        if (!p) return len;\n        buf[0] = encoding;\n    }\n\n    /* Store this length at p. */\n    memcpy(p,buf,len);\n    return len;\n}\n\n/* Decode the entry encoding type and data length (string length for strings,\n * number of bytes used for the integer for integer entries) encoded in 'ptr'.\n * The 'encoding' variable is input, extracted by the caller, the 'lensize'\n * variable will hold the number of bytes required to encode the entry\n * length, and the 'len' variable will hold the entry length.\n * On invalid encoding error, lensize is set to 0. */\n#define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len)                                                                 \\\n    do {                                                                                                               \\\n    if ((encoding) < ZIP_STR_MASK) {                                           \\\n        if ((encoding) == ZIP_STR_06B) {                                       \\\n            (lensize) = 1;                                                     \\\n            (len) = (ptr)[0] & 0x3f;                                           \\\n        } else if ((encoding) == ZIP_STR_14B) {                                \\\n            (lensize) = 2;                                                     \\\n            (len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1];                       \\\n        } else if ((encoding) == ZIP_STR_32B) {                                \\\n            (lensize) = 5;                                                     \\\n                (len) = ((uint32_t)(ptr)[1] << 24) | ((uint32_t)(ptr)[2] << 16) | ((uint32_t)(ptr)[3] << 8) |          \\\n                    ((uint32_t)(ptr)[4]);                                      \\\n        } else {                                                               \\\n            (lensize) = 0; /* bad encoding, should be covered by a previous */ \\\n            (len) = 0;     /* ZIP_ASSERT_ENCODING / zipEncodingLenSize, or  */ \\\n                           /* match the lensize after this macro with 0.    */ \\\n        }                                                                      \\\n    } else {                                                                   \\\n        (lensize) = 1;                                                         \\\n            if ((encoding) == ZIP_INT_8B)                                                                              \\\n                (len) = 1;                                                                                             \\\n            else if ((encoding) == ZIP_INT_16B)                                                                        \\\n                (len) = 2;                                                                                             \\\n            else if ((encoding) == ZIP_INT_24B)                                                                        \\\n                (len) = 3;                                                                                             \\\n            else if ((encoding) == ZIP_INT_32B)                                                                        \\\n                (len) = 4;                                                                                             \\\n            else if ((encoding) == ZIP_INT_64B)                                                                        \\\n                (len) = 8;                                                                                             \\\n        else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX)   \\\n            (len) = 0; /* 4 bit immediate */                                   \\\n        else                                                                   \\\n            (lensize) = (len) = 0; /* bad encoding */                          \\\n    }                                                                          \\\n} while(0)\n\n/* Encode the length of the previous entry and write it to \"p\". This only\n * uses the larger encoding (required in __ziplistCascadeUpdate). */\nint zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) {\n    uint32_t u32;\n    if (p != NULL) {\n        p[0] = ZIP_BIG_PREVLEN;\n        u32 = len;\n        memcpy(p+1,&u32,sizeof(u32));\n        memrev32ifbe(p+1);\n    }\n    return 1 + sizeof(uint32_t);\n}\n\n/* Encode the length of the previous entry and write it to \"p\". Return the\n * number of bytes needed to encode this length if \"p\" is NULL. */\nunsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {\n    if (p == NULL) {\n        return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(uint32_t) + 1;\n    } else {\n        if (len < ZIP_BIG_PREVLEN) {\n            p[0] = len;\n            return 1;\n        } else {\n            return zipStorePrevEntryLengthLarge(p,len);\n        }\n    }\n}\n\n/* Return the number of bytes used to encode the length of the previous\n * entry. The length is returned by setting the var 'prevlensize'. */\n#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize)                                                                       \\\n    do {                                                                                                               \\\n    if ((ptr)[0] < ZIP_BIG_PREVLEN) {                                          \\\n        (prevlensize) = 1;                                                     \\\n    } else {                                                                   \\\n        (prevlensize) = 5;                                                     \\\n    }                                                                          \\\n} while(0)\n\n/* Return the length of the previous element, and the number of bytes that\n * are used in order to encode the previous element length.\n * 'ptr' must point to the prevlen prefix of an entry (that encodes the\n * length of the previous entry in order to navigate the elements backward).\n * The length of the previous entry is stored in 'prevlen', the number of\n * bytes needed to encode the previous entry length are stored in\n * 'prevlensize'. */\n#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen)                                                                  \\\n    do {                                                                                                               \\\n    ZIP_DECODE_PREVLENSIZE(ptr, prevlensize);                                  \\\n    if ((prevlensize) == 1) {                                                  \\\n        (prevlen) = (ptr)[0];                                                  \\\n    } else { /* prevlensize == 5 */                                            \\\n            (prevlen) = ((ptr)[4] << 24) | ((ptr)[3] << 16) | ((ptr)[2] << 8) | ((ptr)[1]);                            \\\n    }                                                                          \\\n} while(0)\n\n/* Given a pointer 'p' to the prevlen info that prefixes an entry, this\n * function returns the difference in number of bytes needed to encode\n * the prevlen if the previous entry changes of size.\n *\n * So if A is the number of bytes used right now to encode the 'prevlen'\n * field.\n *\n * And B is the number of bytes that are needed in order to encode the\n * 'prevlen' if the previous element will be updated to one of size 'len'.\n *\n * Then the function returns B - A\n *\n * So the function returns a positive number if more space is needed,\n * a negative number if less space is needed, or zero if the same space\n * is needed. */\nint zipPrevLenByteDiff(unsigned char *p, unsigned int len) {\n    unsigned int prevlensize;\n    ZIP_DECODE_PREVLENSIZE(p, prevlensize);\n    return zipStorePrevEntryLength(NULL, len) - prevlensize;\n}\n\n/* Check if string pointed to by 'entry' can be encoded as an integer.\n * Stores the integer value in 'v' and its encoding in 'encoding'. */\nint zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {\n    long long value;\n\n    if (entrylen >= 32 || entrylen == 0) return 0;\n    if (string2ll((char*)entry,entrylen,&value)) {\n        /* Great, the string can be encoded. Check what's the smallest\n         * of our encoding types that can hold this value. */\n        if (value >= 0 && value <= 12) {\n            *encoding = ZIP_INT_IMM_MIN+value;\n        } else if (value >= INT8_MIN && value <= INT8_MAX) {\n            *encoding = ZIP_INT_8B;\n        } else if (value >= INT16_MIN && value <= INT16_MAX) {\n            *encoding = ZIP_INT_16B;\n        } else if (value >= INT24_MIN && value <= INT24_MAX) {\n            *encoding = ZIP_INT_24B;\n        } else if (value >= INT32_MIN && value <= INT32_MAX) {\n            *encoding = ZIP_INT_32B;\n        } else {\n            *encoding = ZIP_INT_64B;\n        }\n        *v = value;\n        return 1;\n    }\n    return 0;\n}\n\n/* Store integer 'value' at 'p', encoded as 'encoding' */\nvoid zipSaveInteger(unsigned char *p, int64_t value, unsigned char encoding) {\n    int16_t i16;\n    int32_t i32;\n    int64_t i64;\n    if (encoding == ZIP_INT_8B) {\n        ((int8_t*)p)[0] = (int8_t)value;\n    } else if (encoding == ZIP_INT_16B) {\n        i16 = value;\n        memcpy(p,&i16,sizeof(i16));\n        memrev16ifbe(p);\n    } else if (encoding == ZIP_INT_24B) {\n        i32 = ((uint64_t)value)<<8;\n        memrev32ifbe(&i32);\n        memcpy(p,((uint8_t*)&i32)+1,sizeof(i32)-sizeof(uint8_t));\n    } else if (encoding == ZIP_INT_32B) {\n        i32 = value;\n        memcpy(p,&i32,sizeof(i32));\n        memrev32ifbe(p);\n    } else if (encoding == ZIP_INT_64B) {\n        i64 = value;\n        memcpy(p,&i64,sizeof(i64));\n        memrev64ifbe(p);\n    } else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) {\n        /* Nothing to do, the value is stored in the encoding itself. */\n    } else {\n        assert(NULL);\n    }\n}\n\n/* Read integer encoded as 'encoding' from 'p' */\nint64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {\n    int16_t i16;\n    int32_t i32;\n    int64_t i64, ret = 0;\n    if (encoding == ZIP_INT_8B) {\n        ret = ((int8_t*)p)[0];\n    } else if (encoding == ZIP_INT_16B) {\n        memcpy(&i16,p,sizeof(i16));\n        memrev16ifbe(&i16);\n        ret = i16;\n    } else if (encoding == ZIP_INT_32B) {\n        memcpy(&i32,p,sizeof(i32));\n        memrev32ifbe(&i32);\n        ret = i32;\n    } else if (encoding == ZIP_INT_24B) {\n        i32 = 0;\n        memcpy(((uint8_t*)&i32)+1,p,sizeof(i32)-sizeof(uint8_t));\n        memrev32ifbe(&i32);\n        ret = i32>>8;\n    } else if (encoding == ZIP_INT_64B) {\n        memcpy(&i64,p,sizeof(i64));\n        memrev64ifbe(&i64);\n        ret = i64;\n    } else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) {\n        ret = (encoding & ZIP_INT_IMM_MASK)-1;\n    } else {\n        assert(NULL);\n    }\n    return ret;\n}\n\n/* Fills a struct with all information about an entry.\n * This function is the \"unsafe\" alternative to the one below.\n * Generally, all function that return a pointer to an element in the ziplist\n * will assert that this element is valid, so it can be freely used.\n * Generally functions such ziplistGet assume the input pointer is already\n * validated (since it's the return value of another function). */\nstatic inline void zipEntry(unsigned char *p, zlentry *e) {\n    ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);\n    ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);\n    ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);\n    assert(e->lensize != 0); /* check that encoding was valid. */\n    e->headersize = e->prevrawlensize + e->lensize;\n    e->p = p;\n}\n\n/* Fills a struct with all information about an entry.\n * This function is safe to use on untrusted pointers, it'll make sure not to\n * try to access memory outside the ziplist payload.\n * Returns 1 if the entry is valid, and 0 otherwise. */\nstatic inline int zipEntrySafe(unsigned char* zl, size_t zlbytes, unsigned char *p, zlentry *e, int validate_prevlen) {\n    unsigned char *zlfirst = zl + ZIPLIST_HEADER_SIZE;\n    unsigned char *zllast = zl + zlbytes - ZIPLIST_END_SIZE;\n#define OUT_OF_RANGE(p) (unlikely((p) < zlfirst || (p) > zllast))\n\n    /* If there's no possibility for the header to reach outside the ziplist,\n     * take the fast path. (max lensize and prevrawlensize are both 5 bytes) */\n    if (p >= zlfirst && p + 10 < zllast) {\n        ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);\n        ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);\n        ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);\n        e->headersize = e->prevrawlensize + e->lensize;\n        e->p = p;\n        /* We didn't call ZIP_ASSERT_ENCODING, so we check lensize was set to 0. */\n        if (unlikely(e->lensize == 0)) return 0;\n        /* Make sure the entry doesn't reach outside the edge of the ziplist */\n        if (OUT_OF_RANGE(p + e->headersize + e->len)) return 0;\n        /* Make sure prevlen doesn't reach outside the edge of the ziplist */\n        if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen)) return 0;\n        return 1;\n    }\n\n    /* Make sure the pointer doesn't reach outside the edge of the ziplist */\n    if (OUT_OF_RANGE(p)) return 0;\n\n    /* Make sure the encoded prevlen header doesn't reach outside the allocation */\n    ZIP_DECODE_PREVLENSIZE(p, e->prevrawlensize);\n    if (OUT_OF_RANGE(p + e->prevrawlensize)) return 0;\n\n    /* Make sure encoded entry header is valid. */\n    ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);\n    e->lensize = zipEncodingLenSize(e->encoding);\n    if (unlikely(e->lensize == ZIP_ENCODING_SIZE_INVALID)) return 0;\n\n    /* Make sure the encoded entry header doesn't reach outside the allocation */\n    if (OUT_OF_RANGE(p + e->prevrawlensize + e->lensize)) return 0;\n\n    /* Decode the prevlen and entry len headers. */\n    ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);\n    ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);\n    e->headersize = e->prevrawlensize + e->lensize;\n\n    /* Make sure the entry doesn't reach outside the edge of the ziplist */\n    if (OUT_OF_RANGE(p + e->headersize + e->len)) return 0;\n\n    /* Make sure prevlen doesn't reach outside the edge of the ziplist */\n    if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen)) return 0;\n\n    e->p = p;\n    return 1;\n#undef OUT_OF_RANGE\n}\n\n/* Return the total number of bytes used by the entry pointed to by 'p'. */\nstatic inline unsigned int zipRawEntryLengthSafe(unsigned char* zl, size_t zlbytes, unsigned char *p) {\n    zlentry e;\n    zipEntrySafe(zl, zlbytes, p, &e, 0);\n    return e.headersize + e.len;\n}\n\n/* Return the total number of bytes used by the entry pointed to by 'p'. */\nstatic inline unsigned int zipRawEntryLength(unsigned char *p) {\n    zlentry e;\n    zipEntry(p, &e);\n    return e.headersize + e.len;\n}\n\n/* Validate that the entry doesn't reach outside the ziplist allocation. */\nstatic inline void zipAssertValidEntry(unsigned char* zl, size_t zlbytes, unsigned char *p) {\n    zlentry e;\n    int res = zipEntrySafe(zl, zlbytes, p, &e, 1);\n    assert(res);\n    (void)res;\n}\n\n/* Create a new empty ziplist. */\nunsigned char *ziplistNew(void) {\n    unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE;\n    unsigned char *zl = zmalloc(bytes);\n    ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);\n    ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);\n    ZIPLIST_LENGTH(zl) = 0;\n    zl[bytes-1] = ZIP_END;\n    return zl;\n}\n\n/* Resize the ziplist. */\nunsigned char *ziplistResize(unsigned char *zl, size_t len) {\n    assert(len < UINT32_MAX);\n    zl = zrealloc(zl,len);\n    ZIPLIST_BYTES(zl) = intrev32ifbe(len);\n    zl[len-1] = ZIP_END;\n    return zl;\n}\n\n/* When an entry is inserted, we need to set the prevlen field of the next\n * entry to equal the length of the inserted entry. It can occur that this\n * length cannot be encoded in 1 byte and the next entry needs to be grow\n * a bit larger to hold the 5-byte encoded prevlen. This can be done for free,\n * because this only happens when an entry is already being inserted (which\n * causes a realloc and memmove). However, encoding the prevlen may require\n * that this entry is grown as well. This effect may cascade throughout\n * the ziplist when there are consecutive entries with a size close to\n * ZIP_BIG_PREVLEN, so we need to check that the prevlen can be encoded in\n * every consecutive entry.\n *\n * Note that this effect can also happen in reverse, where the bytes required\n * to encode the prevlen field can shrink. This effect is deliberately ignored,\n * because it can cause a \"flapping\" effect where a chain prevlen fields is\n * first grown and then shrunk again after consecutive inserts. Rather, the\n * field is allowed to stay larger than necessary, because a large prevlen\n * field implies the ziplist is holding large entries anyway.\n *\n * The pointer \"p\" points to the first entry that does NOT need to be\n * updated, i.e. consecutive fields MAY need an update. */\nunsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {\n    zlentry cur;\n    size_t prevlen, prevlensize, prevoffset; /* Informat of the last changed entry. */\n    size_t firstentrylen; /* Used to handle insert at head. */\n    size_t rawlen, curlen = intrev32ifbe(ZIPLIST_BYTES(zl));\n    size_t extra = 0, cnt = 0, offset;\n    size_t delta = 4; /* Extra bytes needed to update a entry's prevlen (5-1). */\n    unsigned char *tail = zl + intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl));\n\n    /* Empty ziplist */\n    if (p[0] == ZIP_END) return zl;\n\n    zipEntry(\n        p,\n        &cur); /* no need for \"safe\" variant since the input pointer was validated by the function that returned it. */\n    firstentrylen = prevlen = cur.headersize + cur.len;\n    prevlensize = zipStorePrevEntryLength(NULL, prevlen);\n    prevoffset = p - zl;\n    p += prevlen;\n\n    /* Iterate ziplist to find out how many extra bytes do we need to update it. */\n    while (p[0] != ZIP_END) {\n        assert(zipEntrySafe(zl, curlen, p, &cur, 0));\n\n        /* Abort when \"prevlen\" has not changed. */\n        if (cur.prevrawlen == prevlen) break;\n\n        /* Abort when entry's \"prevlensize\" is big enough. */\n        if (cur.prevrawlensize >= prevlensize) {\n            if (cur.prevrawlensize == prevlensize) {\n                zipStorePrevEntryLength(p, prevlen);\n            } else {\n                /* This would result in shrinking, which we want to avoid.\n                 * So, set \"prevlen\" in the available bytes. */\n                zipStorePrevEntryLengthLarge(p, prevlen);\n            }\n            break;\n        }\n\n        /* cur.prevrawlen means cur is the former head entry. */\n        assert(cur.prevrawlen == 0 || cur.prevrawlen + delta == prevlen);\n\n        /* Update prev entry's info and advance the cursor. */\n        rawlen = cur.headersize + cur.len;\n        prevlen = rawlen + delta; \n        prevlensize = zipStorePrevEntryLength(NULL, prevlen);\n        prevoffset = p - zl;\n        p += rawlen;\n        extra += delta;\n        cnt++;\n    }\n\n    /* Extra bytes is zero all update has been done(or no need to update). */\n    if (extra == 0) return zl;\n\n    /* Update tail offset after loop. */\n    if (tail == zl + prevoffset) {\n        /* When the last entry we need to update is also the tail, update tail offset\n         * unless this is the only entry that was updated (so the tail offset didn't change). */\n        if (extra - delta != 0) {\n            ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)) + extra - delta);\n        }\n    } else {\n        /* Update the tail offset in cases where the last entry we updated is not the tail. */\n        ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)) + extra);\n    }\n\n    /* Now \"p\" points at the first unchanged byte in original ziplist,\n     * move data after that to new ziplist. */\n    offset = p - zl;\n    zl = ziplistResize(zl, curlen + extra);\n    p = zl + offset;\n    memmove(p + extra, p, curlen - offset - 1);\n    p += extra;\n\n    /* Iterate all entries that need to be updated tail to head. */\n    while (cnt) {\n        zipEntry(zl + prevoffset,\n                 &cur); /* no need for \"safe\" variant since we already iterated on all these entries above. */\n        rawlen = cur.headersize + cur.len;\n        /* Move entry to tail and reset prevlen. */\n        memmove(p - (rawlen - cur.prevrawlensize), zl + prevoffset + cur.prevrawlensize, rawlen - cur.prevrawlensize);\n        p -= (rawlen + delta);\n        if (cur.prevrawlen == 0) {\n            /* \"cur\" is the previous head entry, update its prevlen with firstentrylen. */\n            zipStorePrevEntryLength(p, firstentrylen);\n        } else {\n            /* An entry's prevlen can only increment 4 bytes. */\n            zipStorePrevEntryLength(p, cur.prevrawlen+delta);\n        }\n        /* Forward to previous entry. */\n        prevoffset -= cur.prevrawlen;\n        cnt--;\n    }\n    return zl;\n}\n\n/* Delete \"num\" entries, starting at \"p\". Returns pointer to the ziplist. */\nunsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {\n    unsigned int i, totlen, deleted = 0;\n    size_t offset;\n    int nextdiff = 0;\n    zlentry first, tail;\n    size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));\n\n    zipEntry(p, &first); /* no need for \"safe\" variant since the input pointer was validated by the function that\n                            returned it. */\n    for (i = 0; p[0] != ZIP_END && i < num; i++) {\n        p += zipRawEntryLengthSafe(zl, zlbytes, p);\n        deleted++;\n    }\n\n    assert(p >= first.p);\n    totlen = p-first.p; /* Bytes taken by the element(s) to delete. */\n    if (totlen > 0) {\n        uint32_t set_tail;\n        if (p[0] != ZIP_END) {\n            /* Storing `prevrawlen` in this entry may increase or decrease the\n             * number of bytes required compare to the current `prevrawlen`.\n             * There always is room to store this, because it was previously\n             * stored by an entry that is now being deleted. */\n            nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);\n\n            /* Note that there is always space when p jumps backward: if\n             * the new previous entry is large, one of the deleted elements\n             * had a 5 bytes prevlen header, so there is for sure at least\n             * 5 bytes free and we need just 4. */\n            p -= nextdiff;\n            assert(p >= first.p && p<zl+zlbytes-1);\n            zipStorePrevEntryLength(p,first.prevrawlen);\n\n            /* Update offset for tail */\n            set_tail = intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))-totlen;\n\n            /* When the tail contains more than one entry, we need to take\n             * \"nextdiff\" in account as well. Otherwise, a change in the\n             * size of prevlen doesn't have an effect on the *tail* offset. */\n            assert(zipEntrySafe(zl, zlbytes, p, &tail, 1));\n            if (p[tail.headersize+tail.len] != ZIP_END) {\n                set_tail = set_tail + nextdiff;\n            }\n\n            /* Move tail to the front of the ziplist */\n            /* since we asserted that p >= first.p. we know totlen >= 0,\n             * so we know that p > first.p and this is guaranteed not to reach\n             * beyond the allocation, even if the entries lens are corrupted. */\n            size_t bytes_to_move = zlbytes-(p-zl)-1;\n            memmove(first.p,p,bytes_to_move);\n        } else {\n            /* The entire tail was deleted. No need to move memory. */\n            set_tail = (first.p-zl)-first.prevrawlen;\n        }\n\n        /* Resize the ziplist */\n        offset = first.p-zl;\n        zlbytes -= totlen - nextdiff;\n        zl = ziplistResize(zl, zlbytes);\n        p = zl+offset;\n\n        /* Update record count */\n        ZIPLIST_INCR_LENGTH(zl,-deleted);\n\n        /* Set the tail offset computed above */\n        assert(set_tail <= zlbytes - ZIPLIST_END_SIZE);\n        ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(set_tail);\n\n        /* When nextdiff != 0, the raw length of the next entry has changed, so\n         * we need to cascade the update throughout the ziplist */\n        if (nextdiff != 0) zl = __ziplistCascadeUpdate(zl, p);\n    }\n    return zl;\n}\n\n/* Insert item at \"p\". */\nunsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {\n    size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, newlen;\n    unsigned int prevlensize, prevlen = 0;\n    size_t offset;\n    int nextdiff = 0;\n    unsigned char encoding = 0;\n    long long value = 123456789; /* initialized to avoid warning. Using a value\n                                    that is easy to see if for some reason\n                                    we use it uninitialized. */\n    zlentry tail;\n\n    /* Find out prevlen for the entry that is inserted. */\n    if (p[0] != ZIP_END) {\n        ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);\n    } else {\n        unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);\n        if (ptail[0] != ZIP_END) {\n            prevlen = zipRawEntryLengthSafe(zl, curlen, ptail);\n        }\n    }\n\n    /* See if the entry can be encoded */\n    if (zipTryEncoding(s,slen,&value,&encoding)) {\n        /* 'encoding' is set to the appropriate integer encoding */\n        reqlen = zipIntSize(encoding);\n    } else {\n        /* 'encoding' is untouched, however zipStoreEntryEncoding will use the\n         * string length to figure out how to encode it. */\n        reqlen = slen;\n    }\n    /* We need space for both the length of the previous entry and\n     * the length of the payload. */\n    reqlen += zipStorePrevEntryLength(NULL,prevlen);\n    reqlen += zipStoreEntryEncoding(NULL,encoding,slen);\n\n    /* When the insert position is not equal to the tail, we need to\n     * make sure that the next entry can hold this entry's length in\n     * its prevlen field. */\n    int forcelarge = 0;\n    nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;\n    if (nextdiff == -4 && reqlen < 4) {\n        nextdiff = 0;\n        forcelarge = 1;\n    }\n\n    /* Store offset because a realloc may change the address of zl. */\n    offset = p-zl;\n    newlen = curlen+reqlen+nextdiff;\n    zl = ziplistResize(zl,newlen);\n    p = zl+offset;\n\n    /* Apply memory move when necessary and update tail offset. */\n    if (p[0] != ZIP_END) {\n        /* Subtract one because of the ZIP_END bytes */\n        memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);\n\n        /* Encode this entry's raw length in the next entry. */\n        if (forcelarge)\n            zipStorePrevEntryLengthLarge(p+reqlen,reqlen);\n        else\n            zipStorePrevEntryLength(p+reqlen,reqlen);\n\n        /* Update offset for tail */\n        ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)) + reqlen);\n\n        /* When the tail contains more than one entry, we need to take\n         * \"nextdiff\" in account as well. Otherwise, a change in the\n         * size of prevlen doesn't have an effect on the *tail* offset. */\n        zipEntrySafe(zl, newlen, p + reqlen, &tail, 1);\n        if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {\n            ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)) + nextdiff);\n        }\n    } else {\n        /* This element will be the new tail. */\n        ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);\n    }\n\n    /* When nextdiff != 0, the raw length of the next entry has changed, so\n     * we need to cascade the update throughout the ziplist */\n    if (nextdiff != 0) {\n        offset = p-zl;\n        zl = __ziplistCascadeUpdate(zl,p+reqlen);\n        p = zl+offset;\n    }\n\n    /* Write the entry */\n    p += zipStorePrevEntryLength(p,prevlen);\n    p += zipStoreEntryEncoding(p,encoding,slen);\n    if (ZIP_IS_STR(encoding)) {\n        memcpy(p,s,slen);\n    } else {\n        zipSaveInteger(p,value,encoding);\n    }\n    ZIPLIST_INCR_LENGTH(zl,1);\n    return zl;\n}\n\n/* Merge ziplists 'first' and 'second' by appending 'second' to 'first'.\n *\n * NOTE: The larger ziplist is reallocated to contain the new merged ziplist.\n * Either 'first' or 'second' can be used for the result.  The parameter not\n * used will be free'd and set to NULL.\n *\n * After calling this function, the input parameters are no longer valid since\n * they are changed and free'd in-place.\n *\n * The result ziplist is the contents of 'first' followed by 'second'.\n *\n * On failure: returns NULL if the merge is impossible.\n * On success: returns the merged ziplist (which is expanded version of either\n * 'first' or 'second', also frees the other unused input ziplist, and sets the\n * input ziplist argument equal to newly reallocated ziplist return value. */\nunsigned char *ziplistMerge(unsigned char **first, unsigned char **second) {\n    /* If any params are null, we can't merge, so NULL. */\n    if (first == NULL || *first == NULL || second == NULL || *second == NULL) return NULL;\n\n    /* Can't merge same list into itself. */\n    if (*first == *second) return NULL;\n\n    size_t first_bytes = intrev32ifbe(ZIPLIST_BYTES(*first));\n    size_t first_len = intrev16ifbe(ZIPLIST_LENGTH(*first));\n\n    size_t second_bytes = intrev32ifbe(ZIPLIST_BYTES(*second));\n    size_t second_len = intrev16ifbe(ZIPLIST_LENGTH(*second));\n\n    int append;\n    unsigned char *source, *target;\n    size_t target_bytes, source_bytes;\n    /* Pick the largest ziplist so we can resize easily in-place.\n     * We must also track if we are now appending or prepending to\n     * the target ziplist. */\n    if (first_len >= second_len) {\n        /* retain first, append second to first. */\n        target = *first;\n        target_bytes = first_bytes;\n        source = *second;\n        source_bytes = second_bytes;\n        append = 1;\n    } else {\n        /* else, retain second, prepend first to second. */\n        target = *second;\n        target_bytes = second_bytes;\n        source = *first;\n        source_bytes = first_bytes;\n        append = 0;\n    }\n\n    /* Calculate final bytes (subtract one pair of metadata) */\n    size_t zlbytes = first_bytes + second_bytes - ZIPLIST_HEADER_SIZE - ZIPLIST_END_SIZE;\n    size_t zllength = first_len + second_len;\n\n    /* Combined zl length should be limited within UINT16_MAX */\n    zllength = zllength < UINT16_MAX ? zllength : UINT16_MAX;\n\n    /* larger values can't be stored into ZIPLIST_BYTES */\n    assert(zlbytes < UINT32_MAX);\n\n    /* Save offset positions before we start ripping memory apart. */\n    size_t first_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*first));\n    size_t second_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*second));\n\n    /* Extend target to new zlbytes then append or prepend source. */\n    target = zrealloc(target, zlbytes);\n    if (append) {\n        /* append == appending to target */\n        /* Copy source after target (copying over original [END]):\n         *   [TARGET - END, SOURCE - HEADER] */\n        memcpy(target + target_bytes - ZIPLIST_END_SIZE, source + ZIPLIST_HEADER_SIZE,\n               source_bytes - ZIPLIST_HEADER_SIZE);\n    } else {\n        /* !append == prepending to target */\n        /* Move target *contents* exactly size of (source - [END]),\n         * then copy source into vacated space (source - [END]):\n         *   [SOURCE - END, TARGET - HEADER] */\n        memmove(target + source_bytes - ZIPLIST_END_SIZE, target + ZIPLIST_HEADER_SIZE,\n                target_bytes - ZIPLIST_HEADER_SIZE);\n        memcpy(target, source, source_bytes - ZIPLIST_END_SIZE);\n    }\n\n    /* Update header metadata. */\n    ZIPLIST_BYTES(target) = intrev32ifbe(zlbytes);\n    ZIPLIST_LENGTH(target) = intrev16ifbe(zllength);\n    /* New tail offset is:\n     *   + N bytes of first ziplist\n     *   - 1 byte for [END] of first ziplist\n     *   + M bytes for the offset of the original tail of the second ziplist\n     *   - J bytes for HEADER because second_offset keeps no header. */\n    ZIPLIST_TAIL_OFFSET(target) =\n        intrev32ifbe((first_bytes - ZIPLIST_END_SIZE) + (second_offset - ZIPLIST_HEADER_SIZE));\n\n    /* __ziplistCascadeUpdate just fixes the prev length values until it finds a\n     * correct prev length value (then it assumes the rest of the list is okay).\n     * We tell CascadeUpdate to start at the first ziplist's tail element to fix\n     * the merge seam. */\n    target = __ziplistCascadeUpdate(target, target+first_offset);\n\n    /* Now free and NULL out what we didn't realloc */\n    if (append) {\n        zfree(*second);\n        *second = NULL;\n        *first = target;\n    } else {\n        zfree(*first);\n        *first = NULL;\n        *second = target;\n    }\n    return target;\n}\n\nunsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) {\n    unsigned char *p;\n    p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl);\n    return __ziplistInsert(zl,p,s,slen);\n}\n\n/* Returns an offset to use for iterating with ziplistNext. When the given\n * index is negative, the list is traversed back to front. When the list\n * doesn't contain an element at the provided index, NULL is returned. */\nunsigned char *ziplistIndex(unsigned char *zl, int index) {\n    unsigned char *p;\n    unsigned int prevlensize, prevlen = 0;\n    size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));\n    if (index < 0) {\n        index = (-index)-1;\n        p = ZIPLIST_ENTRY_TAIL(zl);\n        if (p[0] != ZIP_END) {\n            /* No need for \"safe\" check: when going backwards, we know the header\n             * we're parsing is in the range, we just need to assert (below) that\n             * the size we take doesn't cause p to go outside the allocation. */\n            ZIP_DECODE_PREVLENSIZE(p, prevlensize);\n            assert(p + prevlensize < zl + zlbytes - ZIPLIST_END_SIZE);\n            ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);\n            while (prevlen > 0 && index--) {\n                p -= prevlen;\n                assert(p >= zl + ZIPLIST_HEADER_SIZE && p < zl + zlbytes - ZIPLIST_END_SIZE);\n                ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);\n            }\n        }\n    } else {\n        p = ZIPLIST_ENTRY_HEAD(zl);\n        while (index--) {\n            /* Use the \"safe\" length: When we go forward, we need to be careful\n             * not to decode an entry header if it's past the ziplist allocation. */\n            p += zipRawEntryLengthSafe(zl, zlbytes, p);\n            if (p[0] == ZIP_END) break;\n        }\n    }\n    if (p[0] == ZIP_END || index > 0) return NULL;\n    zipAssertValidEntry(zl, zlbytes, p);\n    return p;\n}\n\n/* Return pointer to next entry in ziplist.\n *\n * zl is the pointer to the ziplist\n * p is the pointer to the current element\n *\n * The element after 'p' is returned, otherwise NULL if we are at the end. */\nunsigned char *ziplistNext(unsigned char *zl, unsigned char *p) {\n    ((void) zl);\n    size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));\n\n    /* \"p\" could be equal to ZIP_END, caused by ziplistDelete,\n     * and we should return NULL. Otherwise, we should return NULL\n     * when the *next* element is ZIP_END (there is no next entry). */\n    if (p[0] == ZIP_END) {\n        return NULL;\n    }\n\n    p += zipRawEntryLength(p);\n    if (p[0] == ZIP_END) {\n        return NULL;\n    }\n\n    zipAssertValidEntry(zl, zlbytes, p);\n    return p;\n}\n\n/* Return pointer to previous entry in ziplist. */\nunsigned char *ziplistPrev(unsigned char *zl, unsigned char *p) {\n    unsigned int prevlensize, prevlen = 0;\n\n    /* Iterating backwards from ZIP_END should return the tail. When \"p\" is\n     * equal to the first element of the list, we're already at the head,\n     * and should return NULL. */\n    if (p[0] == ZIP_END) {\n        p = ZIPLIST_ENTRY_TAIL(zl);\n        return (p[0] == ZIP_END) ? NULL : p;\n    } else if (p == ZIPLIST_ENTRY_HEAD(zl)) {\n        return NULL;\n    } else {\n        ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);\n        assert(prevlen > 0);\n        p-=prevlen;\n        size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));\n        zipAssertValidEntry(zl, zlbytes, p);\n        return p;\n    }\n}\n\n/* Get entry pointed to by 'p' and store in either '*sstr' or 'sval' depending\n * on the encoding of the entry. '*sstr' is always set to NULL to be able\n * to find out whether the string pointer or the integer value was set.\n * Return 0 if 'p' points to the end of the ziplist, 1 otherwise. */\nunsigned int ziplistGet(unsigned char *p, unsigned char **sstr, unsigned int *slen, long long *sval) {\n    zlentry entry;\n    if (p == NULL || p[0] == ZIP_END) return 0;\n    if (sstr) *sstr = NULL;\n\n    zipEntry(p, &entry); /* no need for \"safe\" variant since the input pointer was validated by the function that\n                            returned it. */\n    if (ZIP_IS_STR(entry.encoding)) {\n        if (sstr) {\n            *slen = entry.len;\n            *sstr = p+entry.headersize;\n        }\n    } else {\n        if (sval) {\n            *sval = zipLoadInteger(p+entry.headersize,entry.encoding);\n        }\n    }\n    return 1;\n}\n\n/* Insert an entry at \"p\". */\nunsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {\n    return __ziplistInsert(zl,p,s,slen);\n}\n\n/* Delete a single entry from the ziplist, pointed to by *p.\n * Also update *p in place, to be able to iterate over the\n * ziplist, while deleting entries. */\nunsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) {\n    size_t offset = *p-zl;\n    zl = __ziplistDelete(zl,*p,1);\n\n    /* Store pointer to current element in p, because ziplistDelete will\n     * do a realloc which might result in a different \"zl\"-pointer.\n     * When the delete direction is back to front, we might delete the last\n     * entry and end up with \"p\" pointing to ZIP_END, so check this. */\n    *p = zl+offset;\n    return zl;\n}\n\n/* Delete a range of entries from the ziplist. */\nunsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num) {\n    unsigned char *p = ziplistIndex(zl,index);\n    return (p == NULL) ? zl : __ziplistDelete(zl,p,num);\n}\n\n/* Replaces the entry at p. This is equivalent to a delete and an insert,\n * but avoids some overhead when replacing a value of the same size. */\nunsigned char *ziplistReplace(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {\n    /* get metadata of the current entry */\n    zlentry entry;\n    zipEntry(p, &entry);\n\n    /* compute length of entry to store, excluding prevlen */\n    unsigned int reqlen;\n    unsigned char encoding = 0;\n    long long value = 123456789; /* initialized to avoid warning. */\n    if (zipTryEncoding(s,slen,&value,&encoding)) {\n        reqlen = zipIntSize(encoding); /* encoding is set */\n    } else {\n        reqlen = slen; /* encoding == 0 */\n    }\n    reqlen += zipStoreEntryEncoding(NULL,encoding,slen);\n\n    if (reqlen == entry.lensize + entry.len) {\n        /* Simply overwrite the element. */\n        p += entry.prevrawlensize;\n        p += zipStoreEntryEncoding(p,encoding,slen);\n        if (ZIP_IS_STR(encoding)) {\n            memcpy(p,s,slen);\n        } else {\n            zipSaveInteger(p,value,encoding);\n        }\n    } else {\n        /* Fallback. */\n        zl = ziplistDelete(zl,&p);\n        zl = ziplistInsert(zl,p,s,slen);\n    }\n    return zl;\n}\n\n/* Compare entry pointer to by 'p' with 'sstr' of length 'slen'. */\n/* Return 1 if equal. */\nunsigned int ziplistCompare(unsigned char *p, unsigned char *sstr, unsigned int slen) {\n    zlentry entry;\n    unsigned char sencoding;\n    long long zval, sval;\n    if (p[0] == ZIP_END) return 0;\n\n    zipEntry(p, &entry); /* no need for \"safe\" variant since the input pointer was validated by the function that\n                            returned it. */\n    if (ZIP_IS_STR(entry.encoding)) {\n        /* Raw compare */\n        if (entry.len == slen) {\n            return memcmp(p+entry.headersize,sstr,slen) == 0;\n        } else {\n            return 0;\n        }\n    } else {\n        /* Try to compare encoded values. Don't compare encoding because\n         * different implementations may encoded integers differently. */\n        if (zipTryEncoding(sstr,slen,&sval,&sencoding)) {\n          zval = zipLoadInteger(p+entry.headersize,entry.encoding);\n          return zval == sval;\n        }\n    }\n    return 0;\n}\n\n/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries\n * between every comparison. Returns NULL when the field could not be found. */\nunsigned char *\nziplistFind(unsigned char *zl, unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip) {\n    int skipcnt = 0;\n    unsigned char vencoding = 0;\n    long long vll = 0;\n    size_t zlbytes = ziplistBlobLen(zl);\n\n    while (p[0] != ZIP_END) {\n        struct zlentry e;\n        unsigned char *q;\n        int res = zipEntrySafe(zl, zlbytes, p, &e, 1);\n        assert(res);\n        (void)res;\n\n        q = p + e.prevrawlensize + e.lensize;\n\n        if (skipcnt == 0) {\n            /* Compare current entry with specified entry */\n            if (ZIP_IS_STR(e.encoding)) {\n                if (e.len == vlen && memcmp(q, vstr, vlen) == 0) {\n                    return p;\n                }\n            } else {\n                /* Find out if the searched field can be encoded. Note that\n                 * we do it only the first time, once done vencoding is set\n                 * to non-zero and vll is set to the integer value. */\n                if (vencoding == 0) {\n                    if (!zipTryEncoding(vstr, vlen, &vll, &vencoding)) {\n                        /* If the entry can't be encoded we set it to\n                         * UCHAR_MAX so that we don't retry again the next\n                         * time. */\n                        vencoding = UCHAR_MAX;\n                    }\n                    /* Must be non-zero by now */\n                    assert(vencoding);\n                }\n\n                /* Compare current entry with specified entry, do it only\n                 * if vencoding != UCHAR_MAX because if there is no encoding\n                 * possible for the field it can't be a valid integer. */\n                if (vencoding != UCHAR_MAX) {\n                    long long ll = zipLoadInteger(q, e.encoding);\n                    if (ll == vll) {\n                        return p;\n                    }\n                }\n            }\n\n            /* Reset skip count */\n            skipcnt = skip;\n        } else {\n            /* Skip entry */\n            skipcnt--;\n        }\n\n        /* Move to next entry */\n        p = q + e.len;\n    }\n\n    return NULL;\n}\n\n/* Return length of ziplist. */\nunsigned int ziplistLen(unsigned char *zl) {\n    unsigned int len = 0;\n    if (intrev16ifbe(ZIPLIST_LENGTH(zl)) < UINT16_MAX) {\n        len = intrev16ifbe(ZIPLIST_LENGTH(zl));\n    } else {\n        unsigned char *p = zl+ZIPLIST_HEADER_SIZE;\n        size_t zlbytes = intrev32ifbe(ZIPLIST_BYTES(zl));\n        while (*p != ZIP_END) {\n            p += zipRawEntryLengthSafe(zl, zlbytes, p);\n            len++;\n        }\n\n        /* Re-store length if small enough */\n        if (len < UINT16_MAX) ZIPLIST_LENGTH(zl) = intrev16ifbe(len);\n    }\n    return len;\n}\n\n/* Return ziplist blob size in bytes. */\nsize_t ziplistBlobLen(unsigned char *zl) {\n    return intrev32ifbe(ZIPLIST_BYTES(zl));\n}\n\nvoid ziplistRepr(unsigned char *zl) {\n    unsigned char *p;\n    int index = 0;\n    zlentry entry;\n    size_t zlbytes = ziplistBlobLen(zl);\n\n    printf(\"{total bytes %u} \"\n        \"{num entries %u}\\n\"\n        \"{tail offset %u}\\n\",\n           intrev32ifbe(ZIPLIST_BYTES(zl)), intrev16ifbe(ZIPLIST_LENGTH(zl)), intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)));\n    p = ZIPLIST_ENTRY_HEAD(zl);\n    while(*p != ZIP_END) {\n        zipEntrySafe(zl, zlbytes, p, &entry, 1);\n        printf(\n            \"{\\n\"\n                \"\\taddr 0x%08lx,\\n\"\n                \"\\tindex %2d,\\n\"\n                \"\\toffset %5lu,\\n\"\n                \"\\thdr+entry len: %5u,\\n\"\n                \"\\thdr len%2u,\\n\"\n                \"\\tprevrawlen: %5u,\\n\"\n                \"\\tprevrawlensize: %2u,\\n\"\n                \"\\tpayload %5u\\n\",\n               (long unsigned)p, index, (unsigned long)(p - zl), entry.headersize + entry.len, entry.headersize,\n               entry.prevrawlen, entry.prevrawlensize, entry.len);\n        printf(\"\\tbytes: \");\n        for (unsigned int i = 0; i < entry.headersize+entry.len; i++) {\n            printf(\"%02x|\",p[i]);\n        }\n        printf(\"\\n\");\n        p += entry.headersize;\n        if (ZIP_IS_STR(entry.encoding)) {\n            printf(\"\\t[str]\");\n            if (entry.len > 40) {\n                if (fwrite(p,40,1,stdout) == 0) perror(\"fwrite\");\n                printf(\"...\");\n            } else {\n                if (entry.len && fwrite(p, entry.len, 1, stdout) == 0) perror(\"fwrite\");\n            }\n        } else {\n            printf(\"\\t[int]%lld\", (long long) zipLoadInteger(p,entry.encoding));\n        }\n        printf(\"\\n}\\n\");\n        p += entry.len;\n        index++;\n    }\n    printf(\"{end}\\n\\n\");\n}\n\n/* Validate the integrity of the data structure.\n * when `deep` is 0, only the integrity of the header is validated.\n * when `deep` is 1, we scan all the entries one by one. */\nint ziplistValidateIntegrity(unsigned char *zl,\n                             size_t size,\n                             int deep,\n                             ziplistValidateEntryCB entry_cb,\n                             void *cb_userdata) {\n    /* check that we can actually read the header. (and ZIP_END) */\n    if (size < ZIPLIST_HEADER_SIZE + ZIPLIST_END_SIZE) return 0;\n\n    /* check that the encoded size in the header must match the allocated size. */\n    size_t bytes = intrev32ifbe(ZIPLIST_BYTES(zl));\n    if (bytes != size) return 0;\n\n    /* the last byte must be the terminator. */\n    if (zl[size - ZIPLIST_END_SIZE] != ZIP_END) return 0;\n\n    /* make sure the tail offset isn't reaching outside the allocation. */\n    if (intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)) > size - ZIPLIST_END_SIZE) return 0;\n\n    if (!deep) return 1;\n\n    unsigned int count = 0;\n    unsigned int header_count = intrev16ifbe(ZIPLIST_LENGTH(zl));\n    unsigned char *p = ZIPLIST_ENTRY_HEAD(zl);\n    unsigned char *prev = NULL;\n    size_t prev_raw_size = 0;\n    while(*p != ZIP_END) {\n        struct zlentry e;\n        /* Decode the entry headers and fail if invalid or reaches outside the allocation */\n        if (!zipEntrySafe(zl, size, p, &e, 1)) return 0;\n\n        /* Make sure the record stating the prev entry size is correct. */\n        if (e.prevrawlen != prev_raw_size) return 0;\n\n        /* Optionally let the caller validate the entry too. */\n        if (entry_cb && !entry_cb(p, header_count, cb_userdata)) return 0;\n\n        /* Move to the next entry */\n        prev_raw_size = e.headersize + e.len;\n        prev = p;\n        p += e.headersize + e.len;\n        count++;\n    }\n\n    /* Make sure 'p' really does point to the end of the ziplist. */\n    if (p != zl + bytes - ZIPLIST_END_SIZE) return 0;\n\n    /* Make sure the <zltail> entry really do point to the start of the last entry. */\n    if (prev != NULL && prev != ZIPLIST_ENTRY_TAIL(zl)) return 0;\n\n    /* Check that the count in the header is correct */\n    if (header_count != UINT16_MAX && count != header_count) return 0;\n\n    return 1;\n}\n\n/* Randomly select a pair of key and value.\n * total_count is a pre-computed length/2 of the ziplist (to avoid calls to ziplistLen)\n * 'key' and 'val' are used to store the result key value pair.\n * 'val' can be NULL if the value is not needed. */\nvoid ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val) {\n    int ret;\n    unsigned char *p;\n\n    /* Avoid div by zero on corrupt ziplist */\n    assert(total_count);\n\n    /* Generate even numbers, because ziplist saved K-V pair */\n    int r = (rand() % total_count) * 2;\n    p = ziplistIndex(zl, r);\n    ret = ziplistGet(p, &key->sval, &key->slen, &key->lval);\n    assert(ret != 0);\n    (void)ret;\n    if (!val)\n        return;\n    p = ziplistNext(zl, p);\n    ret = ziplistGet(p, &val->sval, &val->slen, &val->lval);\n    assert(ret != 0);\n}\n\n/* int compare for qsort */\nint uintCompare(const void *a, const void *b) {\n    return (*(unsigned int *) a - *(unsigned int *) b);\n}\n\n/* Helper method to store a string into from val or lval into dest */\nstatic inline void ziplistSaveValue(unsigned char *val, unsigned int len, long long lval, ziplistEntry *dest) {\n    dest->sval = val;\n    dest->slen = len;\n    dest->lval = lval;\n}\n\n/* Randomly select count of key value pairs and store into 'keys' and\n * 'vals' args. The order of the picked entries is random, and the selections\n * are non-unique (repetitions are possible).\n * The 'vals' arg can be NULL in which case we skip these. */\nvoid ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {\n    unsigned char *p, *key, *value;\n    unsigned int klen = 0, vlen = 0;\n    long long klval = 0, vlval = 0;\n\n    /* Notice: the index member must be first due to the use in uintCompare */\n    typedef struct {\n        unsigned int index;\n        unsigned int order;\n    } rand_pick;\n    rand_pick *picks = zmalloc(sizeof(rand_pick)*count);\n    unsigned int total_size = ziplistLen(zl)/2;\n\n    /* Avoid div by zero on corrupt ziplist */\n    assert(total_size);\n\n    /* create a pool of random indexes (some may be duplicate). */\n    for (unsigned int i = 0; i < count; i++) {\n        picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */\n        /* keep track of the order we picked them */\n        picks[i].order = i;\n    }\n\n    /* sort by indexes. */\n    qsort(picks, count, sizeof(rand_pick), uintCompare);\n\n    /* fetch the elements form the ziplist into a output array respecting the original order. */\n    unsigned int zipindex = picks[0].index, pickindex = 0;\n    p = ziplistIndex(zl, zipindex);\n    while (ziplistGet(p, &key, &klen, &klval) && pickindex < count) {\n        p = ziplistNext(zl, p);\n        assert(ziplistGet(p, &value, &vlen, &vlval));\n        while (pickindex < count && zipindex == picks[pickindex].index) {\n            int storeorder = picks[pickindex].order;\n            ziplistSaveValue(key, klen, klval, &keys[storeorder]);\n            if (vals) ziplistSaveValue(value, vlen, vlval, &vals[storeorder]);\n             pickindex++;\n        }\n        zipindex += 2;\n        p = ziplistNext(zl, p);\n    }\n\n    zfree(picks);\n}\n\n/* Randomly select count of key value pairs and store into 'keys' and\n * 'vals' args. The selections are unique (no repetitions), and the order of\n * the picked entries is NOT-random.\n * The 'vals' arg can be NULL in which case we skip these.\n * The return value is the number of items picked which can be lower than the\n * requested count if the ziplist doesn't hold enough pairs. */\nunsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {\n    unsigned char *p, *key;\n    unsigned int klen = 0;\n    long long klval = 0;\n    unsigned int total_size = ziplistLen(zl)/2;\n    unsigned int index = 0;\n    if (count > total_size) count = total_size;\n\n    /* To only iterate once, every time we try to pick a member, the probability\n     * we pick it is the quotient of the count left we want to pick and the\n     * count still we haven't visited in the dict, this way, we could make every\n     * member be equally picked.*/\n    p = ziplistIndex(zl, 0);\n    unsigned int picked = 0, remaining = count;\n    while (picked < count && p) {\n        double randomDouble = ((double)rand()) / RAND_MAX;\n        double threshold = ((double)remaining) / (total_size - index);\n        if (randomDouble <= threshold) {\n            assert(ziplistGet(p, &key, &klen, &klval));\n            ziplistSaveValue(key, klen, klval, &keys[picked]);\n            p = ziplistNext(zl, p);\n            assert(p);\n            if (vals) {\n                assert(ziplistGet(p, &key, &klen, &klval));\n                ziplistSaveValue(key, klen, klval, &vals[picked]);\n            }\n            remaining--;\n            picked++;\n        } else {\n            p = ziplistNext(zl, p);\n            assert(p);\n        }\n        p = ziplistNext(zl, p);\n        index++;\n    }\n    return picked;\n}\n"
  },
  {
    "path": "src/redis/ziplist.h",
    "content": "/*\n * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>\n * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef _ZIPLIST_H\n#define _ZIPLIST_H\n\n#define ZIPLIST_HEAD 0\n#define ZIPLIST_TAIL 1\n\n/* Each entry in the ziplist is either a string or an integer. */\ntypedef struct {\n    /* When string is used, it is provided with the length (slen). */\n    unsigned char *sval;\n    unsigned int slen;\n    /* When integer is used, 'sval' is NULL, and lval holds the value. */\n    long long lval;\n} ziplistEntry;\n\nunsigned char *ziplistNew(void);\nunsigned char *ziplistMerge(unsigned char **first, unsigned char **second);\nunsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where);\nunsigned char *ziplistIndex(unsigned char *zl, int index);\nunsigned char *ziplistNext(unsigned char *zl, unsigned char *p);\nunsigned char *ziplistPrev(unsigned char *zl, unsigned char *p);\nunsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval);\nunsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);\nunsigned char *ziplistDelete(unsigned char *zl, unsigned char **p);\nunsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num);\nunsigned char *ziplistReplace(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);\nunsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen);\nunsigned char *ziplistFind(unsigned char *zl, unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip);\nunsigned int ziplistLen(unsigned char *zl);\nsize_t ziplistBlobLen(unsigned char *zl);\nvoid ziplistRepr(unsigned char *zl);\ntypedef int (*ziplistValidateEntryCB)(unsigned char* p, unsigned int head_count, void* userdata);\nint ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,\n                             ziplistValidateEntryCB entry_cb, void *cb_userdata);\nvoid ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val);\nvoid ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);\nunsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);\nint ziplistSafeToAdd(unsigned char* zl, size_t add);\n\n#ifdef REDIS_TEST\nint ziplistTest(int argc, char *argv[], int accurate);\n#endif\n\n#endif /* _ZIPLIST_H */\n"
  },
  {
    "path": "src/redis/zmalloc.c",
    "content": "/* zmalloc - total amount of allocated memory aware version of malloc()\n *\n * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <stdint.h>\n#include <unistd.h>\n#include <assert.h>\n\n/* This function provide us access to the original libc free(). This is useful\n * for instance to free results obtained by backtrace_symbols(). We need\n * to define this function before including zmalloc.h that may shadow the\n * free implementation if we use jemalloc or another non standard allocator. */\n/*void zlibc_free(void *ptr) {\n    free(ptr);\n}*/\n\n#include <string.h>\n#include <pthread.h>\n#include \"config.h\"\n#include \"zmalloc.h\"\n#include \"atomicvar.h\"\n\n#ifdef HAVE_MALLOC_SIZE\n#define PREFIX_SIZE (0)\n#define ASSERT_NO_SIZE_OVERFLOW(sz)\n#else\n#if defined(__sun) || defined(__sparc) || defined(__sparc__)\n#define PREFIX_SIZE (sizeof(long long))\n#else\n#define PREFIX_SIZE (sizeof(size_t))\n#endif\n#define ASSERT_NO_SIZE_OVERFLOW(sz) assert((sz) + PREFIX_SIZE > (sz))\n#endif\n\n/* When using the libc allocator, use a minimum allocation size to match the\n * jemalloc behavior that doesn't return NULL in this case.\n */\n#define MALLOC_MIN_SIZE(x) ((x) > 0 ? (x) : sizeof(long))\n/* Explicitly override malloc/free etc when using tcmalloc. */\n#if defined(USE_TCMALLOC)\n#define malloc(size) tc_malloc(size)\n#define calloc(count,size) tc_calloc(count,size)\n#define realloc(ptr,size) tc_realloc(ptr,size)\n#define free(ptr) tc_free(ptr)\n#elif defined(USE_JEMALLOC)\n#define malloc(size) je_malloc(size)\n#define calloc(count,size) je_calloc(count,size)\n#define realloc(ptr,size) je_realloc(ptr,size)\n#define free(ptr) je_free(ptr)\n#define mallocx(size,flags) je_mallocx(size,flags)\n#define dallocx(ptr,flags) je_dallocx(ptr,flags)\n#endif\n\n#define update_zmalloc_stat_alloc(__n) used_memory_tl += (__n)\n#define update_zmalloc_stat_free(__n)  used_memory_tl -= (__n)\n\n__thread ssize_t used_memory_tl = 0;\n\n\n\nstatic void zmalloc_default_oom(size_t size) {\n    fprintf(stderr, \"zmalloc: Out of memory trying to allocate %zu bytes\\n\",\n        size);\n    fflush(stderr);\n    abort();\n}\n\nstatic void (*zmalloc_oom_handler)(size_t) = zmalloc_default_oom;\n\nvoid init_zmalloc_threadlocal() {\n}\n\n/* Try allocating memory, and return NULL if failed.\n * '*usable' is set to the usable size if non NULL. */\nvoid *ztrymalloc_usable(size_t size, size_t *usable) {\n    ASSERT_NO_SIZE_OVERFLOW(size);\n    void *ptr = malloc(MALLOC_MIN_SIZE(size)+PREFIX_SIZE);\n\n    if (!ptr) return NULL;\n#ifdef HAVE_MALLOC_SIZE\n    size = zmalloc_size(ptr);\n    update_zmalloc_stat_alloc(size);\n    if (usable) *usable = size;\n    return ptr;\n#else\n    *((size_t*)ptr) = size;\n    update_zmalloc_stat_alloc(size+PREFIX_SIZE);\n    if (usable) *usable = size;\n    return (char*)ptr+PREFIX_SIZE;\n#endif\n}\n\n/* Allocate memory or panic */\nvoid *zmalloc(size_t size) {\n    void *ptr = ztrymalloc_usable(size, NULL);\n    if (!ptr) zmalloc_oom_handler(size);\n    return ptr;\n}\n\n/* Try allocating memory, and return NULL if failed. */\nvoid *ztrymalloc(size_t size) {\n    void *ptr = ztrymalloc_usable(size, NULL);\n    return ptr;\n}\n\n/* Allocate memory or panic.\n * '*usable' is set to the usable size if non NULL. */\nvoid *zmalloc_usable(size_t size, size_t *usable) {\n    void *ptr = ztrymalloc_usable(size, usable);\n    if (!ptr) zmalloc_oom_handler(size);\n    return ptr;\n}\n\nsize_t znallocx(size_t size) {\n#if defined(USE_JEMALLOC)\n    return je_ncallocx(size, 0);\n#else\n    return size;\n#endif\n}\n\nvoid zfree_size(void* ptr, size_t size) {\n#if defined(USE_JEMALLOC)\n    je_sdallocx(ptr, size, 0);\n#else\n    free(ptr);\n    (void)size;\n#endif\n\n}\n\n/* Allocation and free functions that bypass the thread cache\n * and go straight to the allocator arena bins.\n * Currently implemented only for jemalloc. Used for online defragmentation. */\n#ifdef HAVE_DEFRAG\nvoid *zmalloc_no_tcache(size_t size) {\n    ASSERT_NO_SIZE_OVERFLOW(size);\n    void *ptr = mallocx(size+PREFIX_SIZE, MALLOCX_TCACHE_NONE);\n    if (!ptr) zmalloc_oom_handler(size);\n    update_zmalloc_stat_alloc(zmalloc_size(ptr));\n    return ptr;\n}\n\nvoid zfree_no_tcache(void *ptr) {\n    if (ptr == NULL) return;\n    update_zmalloc_stat_free(zmalloc_size(ptr));\n    dallocx(ptr, MALLOCX_TCACHE_NONE);\n}\n#endif\n\n/* Try allocating memory and zero it, and return NULL if failed.\n * '*usable' is set to the usable size if non NULL. */\nvoid *ztrycalloc_usable(size_t size, size_t *usable) {\n    ASSERT_NO_SIZE_OVERFLOW(size);\n    void *ptr = calloc(1, MALLOC_MIN_SIZE(size)+PREFIX_SIZE);\n    if (ptr == NULL) return NULL;\n\n#ifdef HAVE_MALLOC_SIZE\n    size = zmalloc_size(ptr);\n    update_zmalloc_stat_alloc(size);\n    if (usable) *usable = size;\n    return ptr;\n#else\n    *((size_t*)ptr) = size;\n    update_zmalloc_stat_alloc(size+PREFIX_SIZE);\n    if (usable) *usable = size;\n    return (char*)ptr+PREFIX_SIZE;\n#endif\n}\n\n/* Allocate memory and zero it or panic */\nvoid *zcalloc(size_t size) {\n    void *ptr = ztrycalloc_usable(size, NULL);\n\n    if (!ptr) zmalloc_oom_handler(size);\n    return ptr;\n}\n\n/* Try allocating memory, and return NULL if failed. */\nvoid *ztrycalloc(size_t size) {\n    void *ptr = ztrycalloc_usable(size, NULL);\n    return ptr;\n}\n\n/* Allocate memory or panic.\n * '*usable' is set to the usable size if non NULL. */\nvoid *zcalloc_usable(size_t size, size_t *usable) {\n    void *ptr = ztrycalloc_usable(size, usable);\n    if (!ptr) zmalloc_oom_handler(size);\n    return ptr;\n}\n\n/* Try reallocating memory, and return NULL if failed.\n * '*usable' is set to the usable size if non NULL. */\nvoid *ztryrealloc_usable(void *ptr, size_t size, size_t *usable) {\n    ASSERT_NO_SIZE_OVERFLOW(size);\n#ifndef HAVE_MALLOC_SIZE\n    void *realptr;\n#endif\n    size_t oldsize;\n    void *newptr;\n\n    /* not allocating anything, just redirect to free. */\n    if (size == 0 && ptr != NULL) {\n        zfree(ptr);\n        if (usable) *usable = 0;\n        return NULL;\n    }\n    /* Not freeing anything, just redirect to malloc. */\n    if (ptr == NULL)\n        return ztrymalloc_usable(size, usable);\n#ifdef HAVE_MALLOC_SIZE\n    oldsize = zmalloc_size(ptr);\n    newptr = realloc(ptr,size);\n    if (newptr == NULL) {\n        if (usable) *usable = 0;\n        return NULL;\n    }\n\n    update_zmalloc_stat_free(oldsize);\n    size = zmalloc_size(newptr);\n    update_zmalloc_stat_alloc(size);\n    if (usable) *usable = size;\n    return newptr;\n#else\n    realptr = (char*)ptr-PREFIX_SIZE;\n    oldsize = *((size_t*)realptr);\n    newptr = realloc(realptr,size+PREFIX_SIZE);\n    if (newptr == NULL) {\n        if (usable) *usable = 0;\n        return NULL;\n    }\n\n    *((size_t*)newptr) = size;\n    update_zmalloc_stat_free(oldsize);\n    update_zmalloc_stat_alloc(size);\n    if (usable) *usable = size;\n    return (char*)newptr+PREFIX_SIZE;\n#endif\n}\n\n/* Reallocate memory and zero it or panic */\nvoid *zrealloc(void *ptr, size_t size) {\n    ptr = ztryrealloc_usable(ptr, size, NULL);\n    if (!ptr && size != 0) zmalloc_oom_handler(size);\n    return ptr;\n}\n\n/* Try Reallocating memory, and return NULL if failed. */\nvoid *ztryrealloc(void *ptr, size_t size) {\n    ptr = ztryrealloc_usable(ptr, size, NULL);\n    return ptr;\n}\n\n/* Reallocate memory or panic.\n * '*usable' is set to the usable size if non NULL. */\nvoid *zrealloc_usable(void *ptr, size_t size, size_t *usable) {\n    ptr = ztryrealloc_usable(ptr, size, usable);\n    if (!ptr && size != 0) zmalloc_oom_handler(size);\n    return ptr;\n}\n/* Provide zmalloc_size() for systems where this function is not provided by\n * malloc itself, given that in that case we store a header with this\n * information as the first bytes of every allocation. */\n#ifndef HAVE_MALLOC_SIZE\nsize_t zmalloc_size(void *ptr) {\n    void *realptr = (char*)ptr-PREFIX_SIZE;\n    size_t size = *((size_t*)realptr);\n    return size+PREFIX_SIZE;\n}\nsize_t zmalloc_usable_size(void *ptr) {\n    return zmalloc_size(ptr)-PREFIX_SIZE;\n}\n#endif\n\nvoid zfree(void *ptr) {\n#ifndef HAVE_MALLOC_SIZE\n    void *realptr;\n    size_t oldsize;\n#endif\n\n    if (ptr == NULL) return;\n#ifdef HAVE_MALLOC_SIZE\n    update_zmalloc_stat_free(zmalloc_size(ptr));\n    free(ptr);\n#else\n    realptr = (char*)ptr-PREFIX_SIZE;\n    oldsize = *((size_t*)realptr);\n    update_zmalloc_stat_free(oldsize+PREFIX_SIZE);\n    free(realptr);\n#endif\n}\n\nvoid zmalloc_set_oom_handler(void (*oom_handler)(size_t)) {\n    zmalloc_oom_handler = oom_handler;\n}\n\n/* Get the RSS information in an OS-specific way.\n *\n * WARNING: the function zmalloc_get_rss() is not designed to be fast\n * and may not be called in the busy loops where Redis tries to release\n * memory expiring or swapping out objects.\n *\n * For this kind of \"fast RSS reporting\" usages use instead the\n * function RedisEstimateRSS() that is a much faster (and less precise)\n * version of the function. */\n\n#if defined(HAVE_PROC_STAT)\n#include <sys/types.h>\n#include <sys/stat.h>\n#include <fcntl.h>\n\nsize_t zmalloc_get_rss(void) {\n    int page = sysconf(_SC_PAGESIZE);\n    size_t rss;\n    char buf[4096];\n    char filename[256];\n    int fd, count;\n    char *p, *x;\n\n    snprintf(filename,256,\"/proc/%ld/stat\",(long) getpid());\n    if ((fd = open(filename,O_RDONLY)) == -1) return 0;\n    if (read(fd,buf,4096) <= 0) {\n        close(fd);\n        return 0;\n    }\n    close(fd);\n\n    p = buf;\n    count = 23; /* RSS is the 24th field in /proc/<pid>/stat */\n    while(p && count--) {\n        p = strchr(p,' ');\n        if (p) p++;\n    }\n    if (!p) return 0;\n    x = strchr(p,' ');\n    if (!x) return 0;\n    *x = '\\0';\n\n    rss = strtoll(p,NULL,10);\n    rss *= page;\n    return rss;\n}\n#elif defined(HAVE_TASKINFO)\n#include <sys/types.h>\n#include <sys/sysctl.h>\n#include <mach/task.h>\n#include <mach/mach_init.h>\n\nsize_t zmalloc_get_rss(void) {\n    task_t task = MACH_PORT_NULL;\n    struct task_basic_info t_info;\n    mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;\n\n    if (task_for_pid(current_task(), getpid(), &task) != KERN_SUCCESS)\n        return 0;\n    task_info(task, TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count);\n\n    return t_info.resident_size;\n}\n#elif defined(__FreeBSD__) || defined(__DragonFly__)\n#include <sys/types.h>\n#include <sys/sysctl.h>\n#include <sys/user.h>\n\nsize_t zmalloc_get_rss(void) {\n    struct kinfo_proc info;\n    size_t infolen = sizeof(info);\n    int mib[4];\n    mib[0] = CTL_KERN;\n    mib[1] = KERN_PROC;\n    mib[2] = KERN_PROC_PID;\n    mib[3] = getpid();\n\n    if (sysctl(mib, 4, &info, &infolen, NULL, 0) == 0)\n#if defined(__FreeBSD__)\n        return (size_t)info.ki_rssize * getpagesize();\n#else\n        return (size_t)info.kp_vm_rssize * getpagesize();\n#endif\n\n    return 0L;\n}\n#elif defined(__NetBSD__)\n#include <sys/types.h>\n#include <sys/sysctl.h>\n\nsize_t zmalloc_get_rss(void) {\n    struct kinfo_proc2 info;\n    size_t infolen = sizeof(info);\n    int mib[6];\n    mib[0] = CTL_KERN;\n    mib[1] = KERN_PROC;\n    mib[2] = KERN_PROC_PID;\n    mib[3] = getpid();\n    mib[4] = sizeof(info);\n    mib[5] = 1;\n    if (sysctl(mib, 4, &info, &infolen, NULL, 0) == 0)\n        return (size_t)info.p_vm_rssize * getpagesize();\n\n    return 0L;\n}\n#elif defined(HAVE_PSINFO)\n#include <unistd.h>\n#include <sys/procfs.h>\n#include <fcntl.h>\n\nsize_t zmalloc_get_rss(void) {\n    struct prpsinfo info;\n    char filename[256];\n    int fd;\n\n    snprintf(filename,256,\"/proc/%ld/psinfo\",(long) getpid());\n\n    if ((fd = open(filename,O_RDONLY)) == -1) return 0;\n    if (ioctl(fd, PIOCPSINFO, &info) == -1) {\n        close(fd);\n\treturn 0;\n    }\n\n    close(fd);\n    return info.pr_rssize;\n}\n#else\nsize_t zmalloc_get_rss(void) {\n    /* If we can't get the RSS in an OS-specific way for this system just\n     * return the memory usage we estimated in zmalloc()..\n     *\n     * Fragmentation will appear to be always 1 (no fragmentation)\n     * of course... */\n    return zmalloc_used_memory();\n}\n#endif\n\n#if defined(USE_JEMALLOC)\n\nint zmalloc_get_allocator_info(size_t *allocated,\n                               size_t *active,\n                               size_t *resident) {\n    uint64_t epoch = 1;\n    size_t sz;\n    *allocated = *resident = *active = 0;\n    /* Update the statistics cached by mallctl. */\n    sz = sizeof(epoch);\n    je_mallctl(\"epoch\", &epoch, &sz, &epoch, sz);\n    sz = sizeof(size_t);\n    /* Unlike RSS, this does not include RSS from shared libraries and other non\n     * heap mappings. */\n    je_mallctl(\"stats.resident\", resident, &sz, NULL, 0);\n    /* Unlike resident, this doesn't not include the pages jemalloc reserves\n     * for re-use (purge will clean that). */\n    je_mallctl(\"stats.active\", active, &sz, NULL, 0);\n    /* Unlike zmalloc_used_memory, this matches the stats.resident by taking\n     * into account all allocations done by this process (not only zmalloc). */\n    je_mallctl(\"stats.allocated\", allocated, &sz, NULL, 0);\n    return 1;\n}\n\nvoid set_jemalloc_bg_thread(int enable) {\n    /* let jemalloc do purging asynchronously, required when there's no traffic\n     * after flushdb */\n    char val = !!enable;\n    je_mallctl(\"background_thread\", NULL, 0, &val, 1);\n}\n\nint jemalloc_purge() {\n    /* return all unused (reserved) pages to the OS */\n    char tmp[32];\n    unsigned narenas = 0;\n    size_t sz = sizeof(unsigned);\n    if (!je_mallctl(\"arenas.narenas\", &narenas, &sz, NULL, 0)) {\n        sprintf(tmp, \"arena.%d.purge\", narenas);\n        if (!je_mallctl(tmp, NULL, 0, NULL, 0))\n            return 0;\n    }\n    return -1;\n}\n\n#else\n\nint zmalloc_get_allocator_info(size_t *allocated,\n                               size_t *active,\n                               size_t *resident) {\n    *allocated = *resident = *active = 0;\n    return 1;\n}\n\nvoid set_jemalloc_bg_thread(int enable) {\n    ((void)(enable));\n}\n\nint jemalloc_purge() {\n    return 0;\n}\n\n#endif\n\n#if defined(__APPLE__)\n/* For proc_pidinfo() used later in zmalloc_get_smap_bytes_by_field().\n * Note that this file cannot be included in zmalloc.h because it includes\n * a Darwin queue.h file where there is a \"LIST_HEAD\" macro (!) defined\n * conficting with Redis user code. */\n#include <libproc.h>\n#endif\n\n/* Get the sum of the specified field (converted form kb to bytes) in\n * /proc/self/smaps. The field must be specified with trailing \":\" as it\n * apperas in the smaps output.\n *\n * If a pid is specified, the information is extracted for such a pid,\n * otherwise if pid is -1 the information is reported is about the\n * current process.\n *\n * Example: zmalloc_get_smap_bytes_by_field(\"Rss:\",-1);\n */\n#if defined(HAVE_PROC_SMAPS)\nsize_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {\n    char line[1024];\n    size_t bytes = 0;\n    int flen = strlen(field);\n    FILE *fp;\n\n    if (pid == -1) {\n        fp = fopen(\"/proc/self/smaps\",\"r\");\n    } else {\n        char filename[128];\n        snprintf(filename,sizeof(filename),\"/proc/%ld/smaps\",pid);\n        fp = fopen(filename,\"r\");\n    }\n\n    if (!fp) return 0;\n    while(fgets(line,sizeof(line),fp) != NULL) {\n        if (strncmp(line,field,flen) == 0) {\n            char *p = strchr(line,'k');\n            if (p) {\n                *p = '\\0';\n                bytes += strtol(line+flen,NULL,10) * 1024;\n            }\n        }\n    }\n    fclose(fp);\n    return bytes;\n}\n#else\n/* Get sum of the specified field from libproc api call.\n * As there are per page value basis we need to convert\n * them accordingly.\n *\n * Note that AnonHugePages is a no-op as THP feature\n * is not supported in this platform\n */\nsize_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {\n#if defined(__APPLE__)\n    struct proc_regioninfo pri;\n    if (pid == -1) pid = getpid();\n    if (proc_pidinfo(pid, PROC_PIDREGIONINFO, 0, &pri,\n                     PROC_PIDREGIONINFO_SIZE) == PROC_PIDREGIONINFO_SIZE)\n    {\n        int pagesize = getpagesize();\n        if (!strcmp(field, \"Private_Dirty:\")) {\n            return (size_t)pri.pri_pages_dirtied * pagesize;\n        } else if (!strcmp(field, \"Rss:\")) {\n            return (size_t)pri.pri_pages_resident * pagesize;\n        } else if (!strcmp(field, \"AnonHugePages:\")) {\n            return 0;\n        }\n    }\n    return 0;\n#endif\n    ((void) field);\n    ((void) pid);\n    return 0;\n}\n#endif\n\n/* Return the total number bytes in pages marked as Private Dirty.\n *\n * Note: depending on the platform and memory footprint of the process, this\n * call can be slow, exceeding 1000ms!\n */\nsize_t zmalloc_get_private_dirty(long pid) {\n    return zmalloc_get_smap_bytes_by_field(\"Private_Dirty:\",pid);\n}\n\n/* Returns the size of physical memory (RAM) in bytes.\n * It looks ugly, but this is the cleanest way to achieve cross platform results.\n * Cleaned up from:\n *\n * http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system\n *\n * Note that this function:\n * 1) Was released under the following CC attribution license:\n *    http://creativecommons.org/licenses/by/3.0/deed.en_US.\n * 2) Was originally implemented by David Robert Nadeau.\n * 3) Was modified for Redis by Matt Stancliff.\n * 4) This note exists in order to comply with the original license.\n */\nsize_t zmalloc_get_memory_size(void) {\n#if defined(__unix__) || defined(__unix) || defined(unix) || \\\n    (defined(__APPLE__) && defined(__MACH__))\n#if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64))\n    int mib[2];\n    mib[0] = CTL_HW;\n#if defined(HW_MEMSIZE)\n    mib[1] = HW_MEMSIZE;            /* OSX. --------------------- */\n#elif defined(HW_PHYSMEM64)\n    mib[1] = HW_PHYSMEM64;          /* NetBSD, OpenBSD. --------- */\n#endif\n    int64_t size = 0;               /* 64-bit */\n    size_t len = sizeof(size);\n    if (sysctl( mib, 2, &size, &len, NULL, 0) == 0)\n        return (size_t)size;\n    return 0L;          /* Failed? */\n\n#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)\n    /* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */\n    return (size_t)sysconf(_SC_PHYS_PAGES) * (size_t)sysconf(_SC_PAGESIZE);\n\n#elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM))\n    /* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */\n    int mib[2];\n    mib[0] = CTL_HW;\n#if defined(HW_REALMEM)\n    mib[1] = HW_REALMEM;        /* FreeBSD. ----------------- */\n#elif defined(HW_PHYSMEM)\n    mib[1] = HW_PHYSMEM;        /* Others. ------------------ */\n#endif\n    unsigned int size = 0;      /* 32-bit */\n    size_t len = sizeof(size);\n    if (sysctl(mib, 2, &size, &len, NULL, 0) == 0)\n        return (size_t)size;\n    return 0L;          /* Failed? */\n#else\n    return 0L;          /* Unknown method to get the data. */\n#endif\n#else\n    return 0L;          /* Unknown OS. */\n#endif\n}\n\n#ifdef REDIS_TEST\n#define UNUSED(x) ((void)(x))\nint zmalloc_test(int argc, char **argv, int accurate) {\n    void *ptr;\n\n    UNUSED(argc);\n    UNUSED(argv);\n    UNUSED(accurate);\n    printf(\"Malloc prefix size: %d\\n\", (int) PREFIX_SIZE);\n    printf(\"Initial used memory: %zu\\n\", zmalloc_used_memory());\n    ptr = zmalloc(123);\n    printf(\"Allocated 123 bytes; used: %zu\\n\", zmalloc_used_memory());\n    ptr = zrealloc(ptr, 456);\n    printf(\"Reallocated to 456 bytes; used: %zu\\n\", zmalloc_used_memory());\n    zfree(ptr);\n    printf(\"Freed pointer; used: %zu\\n\", zmalloc_used_memory());\n    return 0;\n}\n#endif\n"
  },
  {
    "path": "src/redis/zmalloc.h",
    "content": "/* zmalloc - total amount of allocated memory aware version of malloc()\n *\n * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>\n * All rights reserved.\n *\n * Redistribution and use in source and binary forms, with or without\n * modification, are permitted provided that the following conditions are met:\n *\n *   * Redistributions of source code must retain the above copyright notice,\n *     this list of conditions and the following disclaimer.\n *   * Redistributions in binary form must reproduce the above copyright\n *     notice, this list of conditions and the following disclaimer in the\n *     documentation and/or other materials provided with the distribution.\n *   * Neither the name of Redis nor the names of its contributors may be used\n *     to endorse or promote products derived from this software without\n *     specific prior written permission.\n *\n * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n * POSSIBILITY OF SUCH DAMAGE.\n */\n\n#ifndef __ZMALLOC_H\n#define __ZMALLOC_H\n\n#include <stdint.h>\n\n/* Double expansion needed for stringification of macro values. */\n#define __xstr(s) __zm_str(s)\n#define __zm_str(s) #s\n\n#if defined(USE_JEMALLOC)\n#define ZMALLOC_LIB (\"jemalloc-\" __xstr(JEMALLOC_VERSION_MAJOR) \".\" __xstr(JEMALLOC_VERSION_MINOR) \".\" __xstr(JEMALLOC_VERSION_BUGFIX))\n#include <jemalloc/jemalloc.h>\n#if (JEMALLOC_VERSION_MAJOR == 2 && JEMALLOC_VERSION_MINOR >= 1) || (JEMALLOC_VERSION_MAJOR > 2)\n#define HAVE_MALLOC_SIZE 1\n#define zmalloc_size(p) je_malloc_usable_size(p)\n#else\n#error \"Newer version of jemalloc required\"\n#endif\n\n#elif defined(__APPLE__)\n#include <malloc/malloc.h>\n#define HAVE_MALLOC_SIZE 1\n#ifdef USE_ZMALLOC_MI\n#define zmalloc_size(p) zmalloc_usable_size(p)\n#else\n#define zmalloc_size(p) malloc_size(p)\n#endif\n#define ZMALLOC_LIB \"macos\"\n#endif\n\n/* On native libc implementations, we should still do our best to provide a\n * HAVE_MALLOC_SIZE capability. This can be set explicitly as well:\n *\n * NO_MALLOC_USABLE_SIZE disables it on all platforms, even if they are\n *      known to support it.\n * USE_MALLOC_USABLE_SIZE forces use of malloc_usable_size() regardless\n *      of platform.\n */\n#ifndef ZMALLOC_LIB\n#define ZMALLOC_LIB \"libc\"\n\n#include <malloc.h>\n\n#define HAVE_MALLOC_SIZE 1\n#ifdef USE_ZMALLOC_MI\n#define zmalloc_size(p) zmalloc_usable_size(p)\n#else\n#define zmalloc_size(p) malloc_usable_size(p)\n#endif\n\n#endif  // ZMALLOC_LIB\n\n/* We can enable the Redis defrag capabilities only if we are using Jemalloc\n * and the version used is our special version modified for Redis having\n * the ability to return per-allocation fragmentation hints. */\n#if defined(USE_JEMALLOC) && defined(JEMALLOC_FRAG_HINT)\n#define HAVE_DEFRAG\n#endif\n\nvoid *zmalloc(size_t size);\nvoid *zcalloc(size_t size);\nvoid *zrealloc(void *ptr, size_t size);\nvoid *ztrymalloc(size_t size);\nvoid *ztrycalloc(size_t size);\nvoid *ztryrealloc(void *ptr, size_t size);\nvoid zfree(void *ptr);\n\nsize_t znallocx(size_t size); // Equivalent to nallocx for jemalloc or mi_good_size for mimalloc.\nvoid zfree_size(void* ptr, size_t size);  // equivalent to sdallocx or mi_free_size\n\nvoid *zmalloc_usable(size_t size, size_t *usable);\nvoid *zcalloc_usable(size_t size, size_t *usable);\nvoid *zrealloc_usable(void *ptr, size_t size, size_t *usable);\nvoid *ztrymalloc_usable(size_t size, size_t *usable);\nvoid *ztrycalloc_usable(size_t size, size_t *usable);\nvoid *ztryrealloc_usable(void *ptr, size_t size, size_t *usable);\n\n// size_t zmalloc_used_memory(void);\nvoid zmalloc_set_oom_handler(void (*oom_handler)(size_t));\nsize_t zmalloc_get_rss(void);\nint zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident);\nvoid set_jemalloc_bg_thread(int enable);\nint jemalloc_purge();\nsize_t zmalloc_get_private_dirty(long pid);\nsize_t zmalloc_get_smap_bytes_by_field(char *field, long pid);\nsize_t zmalloc_get_memory_size(void);\nsize_t zmalloc_usable_size(const void* p);\n\n/* get the memory usage + the number of wasted locations of memory\nBased on a given threshold (ratio < 1).\nNote that if a block is not used, it would not counted as wasted\n*/\nint zmalloc_get_allocator_wasted_blocks(float ratio, size_t* allocated, size_t* commited,\n                                        size_t* wasted);\nstruct fragmentation_info {\n  size_t committed;\n\n  // a temporary metric to compare against \"committed\" in production.\n  // TODO: delete it once we are confident committed is computed correctly.\n  size_t committed_golden;\n  size_t wasted;\n  unsigned bin;\n};\n\n// Like zmalloc_get_allocator_wasted_blocks but incremental.\n// struct fragmentation_info must be passed first set to zero. Returns -1 needs to continue,\n// 0 if done.\nint zmalloc_get_allocator_fragmentation_step(float ratio, struct fragmentation_info* info);\n\n/*\n * checks whether a page that the pointer ptr located at is underutilized.\n * This uses the current local thread heap.\n * return 0 if not, 1 if underutilized\n */\nstruct mi_page_usage_stats_s;\nvoid zmalloc_page_is_underutilized(void* ptr, float ratio, int collect_stats, struct mi_page_usage_stats_s* result);\nchar* zstrdup(const char* s);\n\nvoid init_zmalloc_threadlocal(void* heap);\nextern __thread ssize_t zmalloc_used_memory_tl;\n\n#undef __zm_str\n#undef __xstr\n\n#endif /* __ZMALLOC_H */\n"
  },
  {
    "path": "src/redis/zmalloc_mi.c",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <assert.h>\n#include <mimalloc.h>\n\n#define MI_BUILD_RELEASE 1\n#include <mimalloc/types.h>\n#include <string.h>\n#include <unistd.h>\n\n#include \"zmalloc.h\"\n\n__thread ssize_t zmalloc_used_memory_tl = 0;\n__thread mi_heap_t* zmalloc_heap = NULL;\n\nmi_page_usage_stats_t mi_heap_page_is_underutilized(mi_heap_t* heap, void* p, float ratio,\n                                                    bool collect_stats);\n\n/* Allocate memory or panic */\nvoid* zmalloc(size_t size) {\n  assert(zmalloc_heap);\n  void* res = mi_heap_malloc(zmalloc_heap, size);\n  size_t usable = mi_usable_size(res);\n\n  // assertion does not hold. Basically mi_good_size is not a good function for\n  // doing accounting.\n  // assert(usable == mi_good_size(size));\n  zmalloc_used_memory_tl += usable;\n\n  return res;\n}\n\nvoid* ztrymalloc_usable(size_t size, size_t* usable) {\n  return zmalloc_usable(size, usable);\n}\n\nsize_t zmalloc_usable_size(const void* p) {\n  return mi_usable_size(p);\n}\n\nvoid zfree(void* ptr) {\n  size_t usable = mi_usable_size(ptr);\n\n  // assert(zmalloc_used_memory_tl >= (ssize_t)usable);\n  zmalloc_used_memory_tl -= usable;\n\n  mi_free_size(ptr, usable);\n}\n\nvoid* zrealloc(void* ptr, size_t size) {\n  size_t usable;\n  return zrealloc_usable(ptr, size, &usable);\n}\n\nvoid* zcalloc(size_t size) {\n  // mi_good_size(size) is not working. try for example, size=690557.\n\n  void* res = mi_heap_calloc(zmalloc_heap, 1, size);\n  size_t usable = mi_usable_size(res);\n  zmalloc_used_memory_tl += usable;\n\n  return res;\n}\n\nvoid* zmalloc_usable(size_t size, size_t* usable) {\n  assert(zmalloc_heap);\n  void* res = mi_heap_malloc(zmalloc_heap, size);\n  size_t uss = mi_usable_size(res);\n  *usable = uss;\n\n  zmalloc_used_memory_tl += uss;\n\n  return res;\n}\n\nvoid* zrealloc_usable(void* ptr, size_t size, size_t* usable) {\n  ssize_t prev = mi_usable_size(ptr);\n\n  void* res = mi_heap_realloc(zmalloc_heap, ptr, size);\n  ssize_t uss = mi_usable_size(res);\n  *usable = uss;\n  zmalloc_used_memory_tl += (uss - prev);\n\n  return res;\n}\n\nsize_t znallocx(size_t size) {\n  return mi_good_size(size);\n}\n\nvoid zfree_size(void* ptr, size_t size) {\n  ssize_t uss = mi_usable_size(ptr);\n  zmalloc_used_memory_tl -= uss;\n  mi_free_size(ptr, uss);\n}\n\nvoid* ztrymalloc(size_t size) {\n  size_t usable;\n  return zmalloc_usable(size, &usable);\n}\n\nvoid* ztrycalloc(size_t size) {\n  size_t g = mi_good_size(size);\n  zmalloc_used_memory_tl += g;\n  void* ptr = mi_heap_calloc(zmalloc_heap, 1, size);\n  assert(mi_usable_size(ptr) == g);\n  return ptr;\n}\n\ntypedef struct Sum_s {\n  size_t allocated;\n  size_t comitted;\n} Sum_t;\n\ntypedef struct {\n  size_t allocated;\n  size_t comitted;\n  size_t wasted;\n  float ratio;\n} MemUtilized_t;\n\nbool heap_visit_cb(const mi_heap_t* heap, const mi_heap_area_t* area, void* block,\n                   size_t block_size, void* arg) {\n  assert(area->used < (1u << 31));\n\n  Sum_t* sum = (Sum_t*)arg;\n\n  // mimalloc mistakenly exports used in blocks instead of bytes.\n  sum->allocated += block_size * area->used;\n  sum->comitted += area->committed;\n  return true;  // continue iteration\n};\n\nbool heap_count_wasted_blocks(const mi_heap_t* heap, const mi_heap_area_t* area, void* block,\n                              size_t block_size, void* arg) {\n  assert(area->used < (1u << 31));\n\n  MemUtilized_t* sum = (MemUtilized_t*)arg;\n\n  // mimalloc mistakenly exports used in blocks instead of bytes.\n  size_t used = block_size * area->used;\n  sum->allocated += used;\n  sum->comitted += area->committed;\n\n  if (used < area->committed * sum->ratio) {\n    sum->wasted += (area->committed - used);\n  }\n  return true;  // continue iteration\n};\n\nint zmalloc_get_allocator_info(size_t* allocated, size_t* active, size_t* resident) {\n  Sum_t sum = {0};\n\n  mi_heap_visit_blocks(zmalloc_heap, false /* visit all blocks*/, heap_visit_cb, &sum);\n  *allocated = sum.allocated;\n  *resident = sum.comitted;\n  *active = 0;\n\n  return 1;\n}\n\nint zmalloc_get_allocator_wasted_blocks(float ratio, size_t* allocated, size_t* commited,\n                                        size_t* wasted) {\n  MemUtilized_t sum = {.allocated = 0, .comitted = 0, .wasted = 0, .ratio = ratio};\n\n  mi_heap_visit_blocks(zmalloc_heap, false /* visit all blocks*/, heap_count_wasted_blocks, &sum);\n  *allocated = sum.allocated;\n  *commited = sum.comitted;\n  *wasted = sum.wasted;\n  return 1;\n}\n\n// Implemented based on this mimalloc code:\n// https://github.com/microsoft/mimalloc/blob/main/src/heap.c#L27\nint zmalloc_get_allocator_fragmentation_step(float ratio, struct fragmentation_info* info) {\n  if (zmalloc_heap->page_count == 0 || info->bin >= MI_BIN_FULL) {\n    // We avoid iterating over full pages since they are fully utilized.\n    return 0;\n  }\n\n  mi_page_queue_t* pq = &zmalloc_heap->pages[info->bin];\n  const mi_page_t* page = pq->first;\n  while (page != NULL) {\n    const mi_page_t* next = page->next;\n\n    const size_t bsize = page->block_size;\n\n    size_t committed = page->capacity * bsize;\n    info->committed += committed;\n    if (page->used < page->capacity) {\n      size_t used = page->used * bsize;\n\n      size_t threshold = (double)committed * ratio;\n      if (used < threshold) {\n        info->wasted += (committed - used);\n      }\n    }\n    page = next;\n  }\n\n  info->bin++;\n  if (info->bin == MI_BIN_FULL) {  // reached end of bins, reset state\n    info->committed_golden = info->committed;\n    // Add total comitted size of MI_BIN_FULL that we do not traverse\n    // as its tracked by zmalloc_heap->full_page_size variable.\n    info->committed += zmalloc_heap->full_page_size;\n\n    // TODO: it's a test code that makes sure `full_page_size` is correct.\n    // Remove it once we are confident with the implementation.\n    mi_page_queue_t* pq = &zmalloc_heap->pages[MI_BIN_FULL];\n    const mi_page_t* page = pq->first;\n    while (page != NULL) {\n      info->committed_golden += page->capacity * page->block_size;\n      page = page->next;\n    }\n    info->bin = 0;\n    return 0;\n  }\n\n  return -1;\n}\n\nvoid init_zmalloc_threadlocal(void* heap) {\n  if (zmalloc_heap)\n    return;\n  zmalloc_heap = heap;\n}\n\nvoid zmalloc_page_is_underutilized(void* ptr, float ratio, int collect_stats,\n                                   mi_page_usage_stats_t* result) {\n  *result = mi_heap_page_is_underutilized(zmalloc_heap, ptr, ratio, collect_stats);\n}\n\nchar* zstrdup(const char* s) {\n  size_t l = strlen(s) + 1;\n  char* p = zmalloc(l);\n\n  memcpy(p, s, l);\n  return p;\n}\n"
  },
  {
    "path": "src/server/CMakeLists.txt",
    "content": "option(DF_ENABLE_MEMORY_TRACKING \"Adds memory tracking debugging via MEMORY TRACK command\" ON)\noption(PRINT_STACKTRACES_ON_SIGNAL \"Enables DF to print all fiber stacktraces on SIGUSR1\" OFF)\n\noption(WITH_COLLECTION_CMDS \"Compile SET/HASH/ZSET/STREAM commands\" ON)\noption(WITH_EXTENSION_CMDS \"Compile BLOOM/BITOPS/GEO/HLL/JSON commands\" ON)\n\noption(WITH_TIERING \"Compile for macos\" ON)\nif(APPLE)\n    message(STATUS \"Macos detected. Set WITH_TIERING=off\")\n    set(WITH_TIERING OFF CACHE BOOL \"Compile for macos\" FORCE)\nendif()\n\nadd_executable(dragonfly dfly_main.cc version_monitor.cc)\nadd_custom_target(check_dfly WORKING_DIRECTORY .. COMMAND ctest -L DFLY)\ncxx_link(dragonfly base dragonfly_lib)\n\nif (CMAKE_SYSTEM_PROCESSOR STREQUAL \"x86_64\" AND CMAKE_BUILD_TYPE STREQUAL \"Release\")\n  # Add core2 only to this file, thus avoiding instructions in this object file that\n  # can cause SIGILL.\n  set_source_files_properties(dfly_main.cc PROPERTIES COMPILE_FLAGS \"-march=core2\")\nendif()\n\nset_property(SOURCE dfly_main.cc APPEND PROPERTY COMPILE_DEFINITIONS\n             SOURCE_PATH_FROM_BUILD_ENV=${CMAKE_SOURCE_DIR})\n\nadd_executable(dfly_bench dfly_bench.cc)\ncxx_link(dfly_bench dfly_parser_lib fibers2 absl::random_random redis_lib)\n\n# Include journal sources (not separate target for now)\nadd_subdirectory(journal)\nif(NOT DEFINED DF_JOURNAL_SRCS)\n  message(FATAL_ERROR \"Journal source files not exported via DF_JOURNAL_SRCS\")\nendif()\n\n# Define transaction library\nadd_library(dfly_transaction db_slice.cc blocking_controller.cc\n            cluster_support.cc common.cc command_registry.cc\n            execution_state.cc stats.cc synchronization.cc\n            ${DF_JOURNAL_SRCS}\n            server_state.cc table.cc  transaction.cc tx_base.cc\n            serializer_commons.cc\n            acl/acl_log.cc slowlog.cc channel_store.cc)\ncxx_link(dfly_transaction dfly_core strings_lib TRDP::fast_float TRDP::hdr_histogram)\n\n# Include search module\nadd_subdirectory(search)\nif(NOT DEFINED DF_SEARCH_SRCS)\n  message(FATAL_ERROR \"Search source files not exported via DF_SEARCH_SRCS\")\nendif()\n\nif (WITH_SEARCH)\n  add_definitions(-DWITH_SEARCH)\nendif()\n\n# Include tiering module\nadd_subdirectory(tiering)\nif (WITH_TIERING)\n  add_definitions(-DWITH_TIERING)\n  SET(DF_TIERING_SRCS tiered_storage.cc)\n  helio_cxx_test(tiered_storage_test dfly_test_lib LABELS DFLY)\nendif()\n\n# Include cluster sources definitons (not separate target for now)\nadd_subdirectory(cluster)\nif (NOT DEFINED DF_CLUSTER_SRCS)\n  message(FATAL_ERROR \"Cluster source files not exported via DF_CLUSTER_SRCS\")\nendif()\n\n# Optionally compile collection commands\nif (WITH_COLLECTION_CMDS)\n  set(DF_FAMILY_SRCS set_family.cc hset_family.cc zset_family.cc stream_family.cc)\n  add_definitions(-DWITH_COLLECTION_CMDS)\nelse()\n  set(DF_FAMILY_SRCS collection_family_fallback.cc)\nendif()\n\n# Optionally compile extension commands\nif (WITH_EXTENSION_CMDS)\n  list(APPEND DF_FAMILY_SRCS geo_family.cc hll_family.cc bitops_family.cc bloom_family.cc cms_family.cc json_family.cc)\n  add_definitions(-DWITH_EXTENSION_CMDS)\nendif()\n\n# Optionally include tiered_storage which interfaces with tiering_module\nadd_library(dragonfly_lib\n            engine_shard.cc engine_shard_set.cc\n            config_registry.cc conn_context.cc\n            debugcmd.cc dflycmd.cc error.cc family_utils.cc string_stats.cc ${DF_SEARCH_SRCS}\n            server_family.cc string_family.cc list_family.cc generic_family.cc\n            ${DF_FAMILY_SRCS}\n            main_service.cc memory_cmd.cc rdb_load.cc rdb_load_context.cc rdb_save.cc replica.cc http_api.cc\n            protocol_client.cc serializer_base.cc snapshot.cc script_mgr.cc\n            detail/compressor.cc detail/decompress.cc detail/save_stages_controller.cc detail/snapshot_storage.cc\n            version.cc container_utils.cc\n            multi_command_squasher.cc\n            ${DF_TIERING_SRCS}\n            ${DF_CLUSTER_SRCS}\n            acl/user.cc acl/user_registry.cc acl/acl_family.cc\n            acl/validator.cc\n            sharding.cc cmd_support.cc)\n\nif (DF_ENABLE_MEMORY_TRACKING)\n  target_compile_definitions(dragonfly_lib PRIVATE DFLY_ENABLE_MEMORY_TRACKING)\n  target_compile_definitions(dragonfly PRIVATE DFLY_ENABLE_MEMORY_TRACKING)\nendif()\n\nif (PRINT_STACKTRACES_ON_SIGNAL)\n  target_compile_definitions(dragonfly_lib PRIVATE PRINT_STACKTRACES_ON_SIGNAL)\nendif()\n\nif (WITH_AWS)\n  SET(AWS_LIB awsv2_lib)\n  add_definitions(-DWITH_AWS)\nendif()\n\nif (WITH_GCP)\n  SET(GCP_LIB gcp_lib)\n  add_definitions(-DWITH_GCP)\nendif()\n\ncxx_link(dragonfly_lib dfly_transaction dfly_facade dfly_tiering\n         redis_lib ${AWS_LIB} ${GCP_LIB} azure_lib jsonpath\n         strings_lib html_lib\n         http_client_lib absl::random_random TRDP::jsoncons TRDP::zstd TRDP::lz4\n         TRDP::croncpp TRDP::flatbuffers)\n\nif (DF_USE_SSL)\n  set(TLS_LIB tls_lib)\n  target_compile_definitions(dragonfly_lib PRIVATE DFLY_USE_SSL)\nendif()\n\nadd_library(dfly_test_lib test_utils.cc)\ncxx_link(dfly_test_lib dragonfly_lib facade_test gtest_main_ext)\n\nhelio_cxx_test(dragonfly_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(multi_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(generic_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(hset_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(list_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(server_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(set_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(stream_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(string_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(bitops_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(rdb_test dfly_test_lib DATA testdata/empty.rdb testdata/redis6_small.rdb\n         testdata/redis6_stream.rdb testdata/hll.rdb testdata/redis7_small.rdb\n         testdata/redis_json.rdb testdata/RDB_TYPE_STREAM_LISTPACKS_2.rdb\n         testdata/RDB_TYPE_STREAM_LISTPACKS_3.rdb testdata/ignore_expiry.rdb LABELS DFLY)\nhelio_cxx_test(zset_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(geo_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(blocking_controller_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(json_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(json_family_memory_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(journal/journal_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(hll_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(string_stats_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(bloom_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(cms_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(cluster/cluster_config_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(cluster/cluster_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(acl/acl_family_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(engine_shard_set_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(serializer_base_test dfly_test_lib LABELS DFLY)\n\nadd_dependencies(check_dfly dragonfly_test json_family_test list_family_test\n                 generic_family_test memcache_parser_test rdb_test journal_test\n                 redis_parser_test stream_family_test string_family_test\n                 bitops_family_test set_family_test zset_family_test geo_family_test\n                 hll_family_test cluster_config_test cluster_family_test acl_family_test\n                 json_family_memory_test)\n\nif (WITH_SEARCH)\n  helio_cxx_test(search/search_family_test dfly_test_lib LABELS DFLY)\n  helio_cxx_test(search/aggregator_test dfly_test_lib LABELS DFLY)\n  helio_cxx_test(search/index_join_test dfly_test_lib LABELS DFLY)\n\n  add_dependencies(check_dfly search_family_test aggregator_test index_join_test)\nendif()\n"
  },
  {
    "path": "src/server/acl/acl_commands_def.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <limits>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"absl/container/flat_hash_map.h\"\n#include \"absl/container/flat_hash_set.h\"\n#include \"base/logging.h\"\n\nnamespace dfly::acl {\n\n/* There are 21 ACL categories as of redis 7\n *\n */\n\nenum AclCat {\n  KEYSPACE = 1ULL << 0,\n  READ = 1ULL << 1,\n  WRITE = 1ULL << 2,\n  SET = 1ULL << 3,\n  SORTEDSET = 1ULL << 4,\n  LIST = 1ULL << 5,\n  HASH = 1ULL << 6,\n  STRING = 1ULL << 7,\n  BITMAP = 1ULL << 8,\n  HYPERLOGLOG = 1ULL << 9,\n  GEO = 1ULL << 10,\n  STREAM = 1ULL << 11,\n  PUBSUB = 1ULL << 12,\n  ADMIN = 1ULL << 13,\n  FAST = 1ULL << 14,\n  SLOW = 1ULL << 15,\n  BLOCKING = 1ULL << 16,\n  DANGEROUS = 1ULL << 17,\n  CONNECTION = 1ULL << 18,\n  TRANSACTION = 1ULL << 19,\n  SCRIPTING = 1ULL << 20,\n\n  // Extensions\n  CMS = 1ULL << 27,\n  BLOOM = 1ULL << 28,\n  FT_SEARCH = 1ULL << 29,\n  THROTTLE = 1ULL << 30,\n  JSON = 1ULL << 31\n};\n\nconstexpr uint64_t ALL_COMMANDS = std::numeric_limits<uint64_t>::max();\nconstexpr uint64_t NONE_COMMANDS = std::numeric_limits<uint64_t>::min();\n\ninline size_t NumberOfFamilies(size_t number = 0) {\n  static size_t number_of_families = number;\n  return number_of_families;\n}\n\nusing CategoryIndexTable = absl::flat_hash_map<std::string_view, uint32_t>;\nusing ReverseCategoryIndexTable = std::vector<std::string>;\n// bit index to index in the REVERSE_CATEGORY_INDEX_TABLE\nusing CategoryToIdxStore = absl::flat_hash_map<uint32_t, uint32_t>;\n\nusing RevCommandField = std::vector<std::string>;\nusing RevCommandsIndexStore = std::vector<RevCommandField>;\nusing CategoryToCommandsIndexStore = absl::flat_hash_map<std::string, std::vector<uint64_t>>;\n\n// Special flag/mask for all\nconstexpr uint32_t NONE = 0;\nconstexpr uint32_t ALL = std::numeric_limits<uint32_t>::max();\n\nenum class KeyOp : int8_t { READ, WRITE, READ_WRITE };\n\nusing GlobType = std::pair<std::string, KeyOp>;\n\nstruct AclKeys {\n  std::vector<GlobType> key_globs;\n  // The user is allowed to \"touch\" any key. No glob matching required.\n  // Alias for ~*\n  bool all_keys = false;\n};\n\n// The second bool denotes if the pattern contains an asterisk and it's\n// used to pattern match PSUBSCRIBE that requires exact literals\nusing GlobTypePubSub = std::pair<std::string, bool>;\n\nstruct AclPubSub {\n  std::vector<GlobTypePubSub> globs;\n  // The user can execute any variant of pub/sub/psub. No glob matching required.\n  // Alias for &* just like all_keys for AclKeys above.\n  bool all_channels = false;\n};\n\nstruct UserCredentials {\n  uint32_t acl_categories{0};\n  std::vector<uint64_t> acl_commands;\n  AclKeys keys;\n  AclPubSub pub_sub;\n  std::string ns;\n  size_t db{0};\n};\n\n}  // namespace dfly::acl\n"
  },
  {
    "path": "src/server/acl/acl_family.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n\n#include \"server/acl/acl_family.h\"\n\n#include <algorithm>\n#include <cctype>\n#include <chrono>\n#include <deque>\n#include <memory>\n#include <numeric>\n#include <optional>\n#include <random>\n#include <string>\n#include <string_view>\n#include <utility>\n#include <variant>\n\n#include \"absl/container/flat_hash_set.h\"\n#include \"absl/flags/commandlineflag.h\"\n#include \"absl/strings/escaping.h\"\n#include \"absl/strings/match.h\"\n#include \"absl/strings/numbers.h\"\n#include \"absl/strings/str_cat.h\"\n#include \"absl/strings/str_split.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/overloaded.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/dragonfly_listener.h\"\n#include \"facade/facade_types.h\"\n#include \"facade/reply_builder.h\"\n#include \"io/file.h\"\n#include \"io/file_util.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/acl/acl_log.h\"\n#include \"server/acl/validator.h\"\n#include \"server/command_registry.h\"\n#include \"server/common.h\"\n#include \"server/config_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/error.h\"\n#include \"server/server_state.h\"\n#include \"util/proactor_pool.h\"\n\nusing namespace std;\n\nABSL_FLAG(string, aclfile, \"\", \"Path and name to aclfile\");\nABSL_DECLARE_FLAG(uint32_t, dbnum);\n\nnamespace dfly::acl {\n\nnamespace {\n\nstring PasswordsToString(const absl::flat_hash_set<string>& passwords, bool nopass, bool full_sha);\nusing MaterializedContents = optional<vector<vector<string_view>>>;\n\nMaterializedContents MaterializeFileContents(vector<string>* usernames, string_view file_contents);\n\nstring AclKeysToString(const AclKeys& keys);\n\nstring AclPubSubToString(const AclPubSub& pub_sub);\n\nvoid SendAclSecurityEvents(const AclLog::LogEntry& entry, facade::RedisReplyBuilder* rb);\n\nstring AclDbToString(size_t db);\n\ntemplate <typename P>\nvoid TraverseEvictImpl(P predicate, facade::Listener* main_listener, util::ProactorPool* pool);\n}  // namespace\n\nAclFamily::AclFamily(UserRegistry* registry, util::ProactorPool* pool)\n    : registry_(registry), pool_(pool) {\n  dbnum_ = absl::GetFlag(FLAGS_dbnum);\n}\n\nvoid AclFamily::Acl(CmdArgList args, CommandContext* cmd_cntx) {\n  cmd_cntx->SendError(\"Wrong number of arguments for acl command\");\n}\n\nvoid AclFamily::List(CmdArgList args, CommandContext* cmd_cntx) {\n  const auto registry_with_lock = registry_->GetRegistryWithLock();\n  const auto& registry = registry_with_lock.registry;\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->StartArray(registry.size());\n\n  for (const auto& [username, user] : registry) {\n    string buffer = \"user \";\n    const string password = PasswordsToString(user.Passwords(), user.HasNopass(), false);\n\n    const string acl_keys = AclKeysToString(user.Keys());\n\n    const string acl_pub_sub = AclPubSubToString(user.PubSub());\n\n    const string maybe_space_com = acl_keys.empty() ? \"\" : \" \";\n\n    const string acl_cat_and_commands =\n        AclCatAndCommandToString(user.CatChanges(), user.CmdChanges());\n\n    const string db_index = AclDbToString(user.Db());\n\n    using namespace string_view_literals;\n\n    absl::StrAppend(&buffer, username, \" \", user.IsActive() ? \"on \"sv : \"off \"sv, password,\n                    acl_keys, maybe_space_com, acl_pub_sub, \" \", acl_cat_and_commands, \" $\",\n                    db_index);\n\n    rb->SendSimpleString(buffer);\n  }\n}\n\nvoid AclFamily::StreamUpdatesToAllProactorConnections(const std::string& user,\n                                                      const Commands& update_commands,\n                                                      const AclKeys& update_keys,\n                                                      const AclPubSub& update_pub_sub, size_t db) {\n  auto update_cb = [&]([[maybe_unused]] size_t id, util::Connection* conn) {\n    DCHECK(conn);\n    auto connection = static_cast<facade::Connection*>(conn);\n    if (!connection->IsHttp() && connection->cntx()) {\n      auto* cntx = static_cast<dfly::ConnectionContext*>(connection->cntx());\n      if (user == cntx->authed_username) {\n        cntx->acl_commands = update_commands;\n        cntx->keys = update_keys;\n        cntx->pub_sub = update_pub_sub;\n        cntx->acl_db_idx = db;\n      }\n    }\n  };\n\n  if (main_listener_ && main_listener_->protocol() == facade::Protocol::REDIS) {\n    main_listener_->TraverseConnections(update_cb);\n  }\n}\n\nusing facade::ErrorReply;\n\nvoid AclFamily::SetUser(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view username = facade::ToSV(args[0]);\n  auto reg = registry_->GetRegistryWithWriteLock();\n  const bool exists = reg.registry.contains(username);\n  const bool has_all_keys = exists ? reg.registry.find(username)->second.Keys().all_keys : false;\n  auto req = ParseAclSetUser(args.subspan(1), false, has_all_keys);\n\n  auto error_case = [cmd_cntx](ErrorReply&& error) { cmd_cntx->SendError(error); };\n\n  auto update_case = [username, &reg, cmd_cntx, this, exists](User::UpdateRequest&& req) {\n    auto& user = reg.registry[username];\n    if (!exists) {\n      User::UpdateRequest default_req;\n      default_req.updates = {User::UpdateRequest::CategoryValueType{User::Sign::MINUS, acl::ALL}};\n      user.Update(std::move(default_req), CategoryToIdx(), reverse_cat_table_,\n                  CategoryToCommandsIndex());\n    }\n    const bool reset_channels = req.reset_channels;\n    user.Update(std::move(req), CategoryToIdx(), reverse_cat_table_, CategoryToCommandsIndex());\n    // Send ok first because the connection might get evicted\n    cmd_cntx->SendOk();\n    if (exists) {\n      if (!reset_channels) {\n        StreamUpdatesToAllProactorConnections(string(username), user.AclCommands(), user.Keys(),\n                                              user.PubSub(), user.Db());\n      }\n      // We evict connections that had their channels reseted\n      else {\n        EvictOpenConnectionsOnAllProactors({username});\n      }\n    }\n  };\n\n  std::visit(Overloaded{error_case, update_case}, std::move(req));\n}\n\nvoid AclFamily::EvictOpenConnectionsOnAllProactors(const absl::flat_hash_set<string_view>& users) {\n  return TraverseEvictImpl(\n      [&](auto* ctx) {\n        auto* dfly_ctx = static_cast<dfly::ConnectionContext*>(ctx);\n        return ctx && users.contains(dfly_ctx->authed_username);\n      },\n      main_listener_, pool_);\n}\n\nvoid AclFamily::EvictOpenConnectionsOnAllProactorsWithRegistry(\n    const UserRegistry::RegistryType& registry) {\n  return TraverseEvictImpl(\n      [&](auto* ctx) {\n        auto* dfly_ctx = static_cast<dfly::ConnectionContext*>(ctx);\n        return ctx && dfly_ctx->authed_username != \"default\" &&\n               registry.contains(dfly_ctx->authed_username);\n      },\n      main_listener_, pool_);\n}\n\nvoid AclFamily::DelUser(CmdArgList args, CommandContext* cmd_cntx) {\n  auto& registry = *registry_;\n  absl::flat_hash_set<string_view> users;\n\n  for (auto arg : args) {\n    string_view username = facade::ToSV(arg);\n    if (username == \"default\") {\n      continue;\n    }\n    if (registry.RemoveUser(username)) {\n      users.insert(username);\n    }\n  }\n\n  if (users.empty()) {\n    cmd_cntx->rb()->SendLong(0);\n    return;\n  }\n  VLOG(1) << \"Evicting open acl connections\";\n  EvictOpenConnectionsOnAllProactors(users);\n  VLOG(1) << \"Done evicting open acl connections\";\n  cmd_cntx->rb()->SendLong(users.size());\n}\n\nvoid AclFamily::WhoAmI(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->SendBulkString(absl::StrCat(\"User is \", cmd_cntx->server_conn_cntx()->authed_username));\n}\n\nstring AclFamily::RegistryToString() const {\n  auto registry_with_read_lock = registry_->GetRegistryWithLock();\n  auto& registry = registry_with_read_lock.registry;\n  string result;\n  for (auto& [username, user] : registry) {\n    string command = \"USER \";\n    const string password = PasswordsToString(user.Passwords(), user.HasNopass(), true);\n\n    const string acl_keys = AclKeysToString(user.Keys());\n\n    const string maybe_space = acl_keys.empty() ? \"\" : \" \";\n\n    const string acl_pub_sub = AclPubSubToString(user.PubSub());\n\n    const string acl_cat_and_commands =\n        AclCatAndCommandToString(user.CatChanges(), user.CmdChanges());\n\n    const string db_index = AclDbToString(user.Db());\n\n    using namespace string_view_literals;\n\n    absl::StrAppend(&result, command, username, \" \", user.IsActive() ? \"ON \"sv : \"OFF \"sv, password,\n                    acl_keys, maybe_space, acl_pub_sub, \" \", acl_cat_and_commands, \" $\", db_index,\n                    \"\\n\");\n  }\n\n  return result;\n}\n\nvoid AclFamily::Save(CmdArgList args, CommandContext* cmd_cntx) {\n  auto acl_file_path = absl::GetFlag(FLAGS_aclfile);\n  auto* builder = cmd_cntx->rb();\n  if (acl_file_path.empty()) {\n    builder->SendError(\"Dragonfly is not configured to use an ACL file.\");\n    return;\n  }\n\n  auto res = io::OpenWrite(acl_file_path);\n  if (!res) {\n    std::string error = absl::StrCat(\"Failed to open the aclfile: \", res.error().message());\n    LOG(ERROR) << error;\n    builder->SendError(error);\n    return;\n  }\n\n  std::unique_ptr<io::WriteFile> file(res.value());\n  std::string output = RegistryToString();\n  auto ec = file->Write(output);\n\n  if (ec) {\n    std::string error = absl::StrCat(\"Failed to write to the aclfile: \", ec.message());\n    LOG(ERROR) << error;\n    builder->SendError(error);\n    return;\n  }\n\n  ec = file->Close();\n  if (ec) {\n    std::string error = absl::StrCat(\"Failed to close the aclfile \", ec.message());\n    LOG(WARNING) << error;\n    builder->SendError(error);\n    return;\n  }\n\n  builder->SendOk();\n}\n\nGenericError AclFamily::LoadToRegistryFromFile(std::string_view full_path,\n                                               SinkReplyBuilder* builder) {\n  auto is_file_read = io::ReadFileToString(full_path);\n  if (!is_file_read) {\n    auto error = absl::StrCat(\"Dragonfly could not load ACL file \", full_path, \" with error \",\n                              is_file_read.error().message());\n\n    LOG(WARNING) << error;\n    return {std::move(error)};\n  }\n\n  auto file_contents = std::move(is_file_read.value());\n\n  if (file_contents.empty()) {\n    return {\"Empty file\"};\n  }\n\n  std::vector<std::string> usernames;\n  auto materialized = MaterializeFileContents(&usernames, file_contents);\n\n  if (!materialized) {\n    std::string error = \"Error materializing acl file\";\n    LOG(WARNING) << error;\n    return {std::move(error)};\n  }\n\n  std::vector<User::UpdateRequest> requests;\n\n  for (auto& cmds : *materialized) {\n    auto req = ParseAclSetUser(cmds, true);\n    if (std::holds_alternative<ErrorReply>(req)) {\n      auto error = std::move(std::get<ErrorReply>(req));\n      LOG(WARNING) << \"Error while parsing aclfile: \" << error.ToSv();\n      return {std::string(error.ToSv())};\n    }\n    requests.push_back(std::move(std::get<User::UpdateRequest>(req)));\n  }\n\n  auto registry_with_wlock = registry_->GetRegistryWithWriteLock();\n  auto& registry = registry_with_wlock.registry;\n  if (builder) {\n    builder->SendOk();\n    // Evict open connections for old users\n    EvictOpenConnectionsOnAllProactorsWithRegistry(registry);\n    registry.clear();\n  }\n\n  for (size_t i = 0; i < usernames.size(); ++i) {\n    User::UpdateRequest default_req;\n    default_req.updates = {User::UpdateRequest::CategoryValueType{User::Sign::MINUS, acl::ALL}};\n    auto& user = registry[usernames[i]];\n    user.Update(std::move(default_req), CategoryToIdx(), reverse_cat_table_,\n                CategoryToCommandsIndex());\n    user.Update(std::move(requests[i]), CategoryToIdx(), reverse_cat_table_,\n                CategoryToCommandsIndex());\n  }\n\n  if (!registry.contains(\"default\")) {\n    auto& user = registry[\"default\"];\n    user.Update(registry_->DefaultUserUpdateRequest(), CategoryToIdx(), reverse_cat_table_,\n                CategoryToCommandsIndex());\n  }\n\n  return {};\n}\n\nbool AclFamily::Load() {\n  auto acl_file = absl::GetFlag(FLAGS_aclfile);\n  return !LoadToRegistryFromFile(acl_file, nullptr);\n}\n\nvoid AclFamily::Load(CmdArgList args, CommandContext* cmd_cntx) {\n  auto acl_file = absl::GetFlag(FLAGS_aclfile);\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n  if (acl_file.empty()) {\n    rb->SendError(\"Dragonfly is not configured to use an ACL file.\");\n    return;\n  }\n\n  const auto load_error = LoadToRegistryFromFile(acl_file, rb);\n\n  if (load_error) {\n    rb->SendError(absl::StrCat(\"Error loading: \", acl_file, \" \", load_error.Format()));\n  }\n}\n\nvoid AclFamily::Log(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n  if (args.size() > 1) {\n    return rb->SendError(facade::OpStatus::OUT_OF_RANGE);\n  }\n\n  size_t max_output = 10;\n  if (!args.empty()) {\n    auto option = facade::ToSV(args[0]);\n    if (absl::EqualsIgnoreCase(option, \"RESET\")) {\n      pool_->AwaitFiberOnAll(\n          [](auto index, auto* context) { ServerState::tlocal()->acl_log.Reset(); });\n      rb->SendOk();\n      return;\n    }\n\n    if (!absl::SimpleAtoi(facade::ToSV(args[0]), &max_output)) {\n      rb->SendError(\"Invalid count\");\n      return;\n    }\n  }\n\n  std::vector<AclLog::LogType> logs(pool_->size());\n  pool_->AwaitFiberOnAll([&logs, max_output](auto index, auto* context) {\n    logs[index] = ServerState::tlocal()->acl_log.GetLog(max_output);\n  });\n\n  size_t total_entries = 0;\n  for (auto& log : logs) {\n    total_entries += log.size();\n  }\n\n  if (total_entries == 0) {\n    rb->SendEmptyArray();\n    return;\n  }\n\n  auto n_way_minimum = [](const auto& logs) {\n    size_t id = 0;\n    AclLog::LogEntry limit;\n    const AclLog::LogEntry* max = &limit;\n    for (size_t i = 0; i < logs.size(); ++i) {\n      if (!logs[i].empty() && logs[i].front() < *max) {\n        id = i;\n        max = &logs[i].front();\n      }\n    }\n\n    return id;\n  };\n\n  rb->StartArray(total_entries);\n\n  for (size_t i = 0; i < total_entries; ++i) {\n    const auto min = n_way_minimum(logs);\n    SendAclSecurityEvents(logs[min].front(), rb);\n    logs[min].pop_front();\n  }\n}\n\nvoid AclFamily::Users(CmdArgList args, CommandContext* cmd_cntx) {\n  const auto registry_with_lock = registry_->GetRegistryWithLock();\n  const auto& registry = registry_with_lock.registry;\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n\n  rb->StartArray(registry.size());\n  for (const auto& [username, _] : registry) {\n    rb->SendSimpleString(username);\n  }\n}\n\nvoid AclFamily::Cat(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (args.size() > 1) {\n    rb->SendError(facade::OpStatus::SYNTAX_ERR);\n    return;\n  }\n\n  if (args.size() == 1) {\n    string category = absl::AsciiStrToUpper(ArgS(args, 0));\n\n    if (!cat_table_.contains(category)) {\n      auto error = absl::StrCat(\"Unknown category: \", category);\n      rb->SendError(error);\n      return;\n    }\n\n    const uint32_t cid_mask = cat_table_.find(category)->second;\n    std::vector<std::string_view> results;\n    // TODO replace this with indexer\n    auto cb = [cid_mask, &results](auto name, auto& cid) {\n      if (cid_mask & cid.acl_categories()) {\n        results.push_back(name);\n      }\n    };\n\n    cmd_registry_->Traverse(cb);\n    rb->StartArray(results.size());\n    for (const auto& command : results) {\n      rb->SendSimpleString(command);\n    }\n\n    return;\n  }\n\n  size_t total_categories = 0;\n  for (auto& elem : reverse_cat_table_) {\n    if (elem != \"_RESERVED\") {\n      ++total_categories;\n    }\n  }\n\n  rb->StartArray(total_categories);\n  for (auto& elem : reverse_cat_table_) {\n    if (elem != \"_RESERVED\") {\n      rb->SendSimpleString(elem);\n    }\n  }\n}\n\nvoid AclFamily::GetUser(CmdArgList args, CommandContext* cmd_cntx) {\n  auto username = facade::ToSV(args[0]);\n  const auto registry_with_lock = registry_->GetRegistryWithLock();\n  const auto& registry = registry_with_lock.registry;\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (!registry.contains(username)) {\n    rb->SendNull();\n    return;\n  }\n  auto& user = registry.find(username)->second;\n  std::string status = user.IsActive() ? \"on\" : \"off\";\n  auto pass = PasswordsToString(user.Passwords(), user.HasNopass(), false);\n  if (!pass.empty()) {\n    pass.pop_back();\n  }\n\n  rb->StartArray(10);\n\n  rb->SendSimpleString(\"flags\");\n  const size_t total_elements = (pass != \"nopass\") ? 1 : 2;\n  rb->StartArray(total_elements);\n  rb->SendSimpleString(status);\n  if (total_elements == 2) {\n    rb->SendSimpleString(pass);\n  }\n\n  rb->SendSimpleString(\"passwords\");\n  if (pass != \"nopass\" && !pass.empty()) {\n    rb->SendSimpleString(pass);\n  } else {\n    rb->SendEmptyArray();\n  }\n  rb->SendSimpleString(\"commands\");\n\n  const std::string acl_cat_and_commands =\n      AclCatAndCommandToString(user.CatChanges(), user.CmdChanges());\n\n  rb->SendSimpleString(acl_cat_and_commands);\n\n  rb->SendSimpleString(\"keys\");\n  std::string keys = AclKeysToString(user.Keys());\n  if (!keys.empty()) {\n    rb->SendSimpleString(keys);\n  } else {\n    rb->SendEmptyArray();\n  }\n\n  rb->SendSimpleString(\"channels\");\n  std::string pub_sub = AclPubSubToString(user.PubSub());\n  rb->SendSimpleString(pub_sub);\n}\n\nvoid AclFamily::GenPass(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* builder = cmd_cntx->rb();\n  if (args.length() > 1) {\n    builder->SendError(facade::UnknownSubCmd(\"GENPASS\", \"ACL\"));\n    return;\n  }\n  uint32_t random_bits = 256;\n  if (args.length() == 1) {\n    auto requested_bits = facade::ArgS(args, 0);\n\n    if (!absl::SimpleAtoi(requested_bits, &random_bits) || random_bits == 0 || random_bits > 4096) {\n      return builder->SendError(\n          \"ACL GENPASS argument must be the number of bits for the output password, a positive \"\n          \"number up to 4096\");\n    }\n  }\n  std::random_device urandom(\"/dev/urandom\");\n  const size_t result_length = (random_bits + 3) / 4;\n  constexpr size_t step_size = sizeof(decltype(std::random_device::max()));\n  std::string response;\n  for (size_t bytes_written = 0; bytes_written < result_length; bytes_written += step_size) {\n    absl::StrAppendFormat(&response, \"%08x\", urandom());\n  }\n\n  response.resize(result_length);\n\n  builder->SendSimpleString(response);\n}\n\nvoid AclFamily::DryRun(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n  auto username = facade::ArgS(args, 0);\n  const auto registry_with_lock = registry_->GetRegistryWithLock();\n  const auto& registry = registry_with_lock.registry;\n  if (!registry.contains(username)) {\n    auto error = absl::StrCat(\"User '\", username, \"' not found\");\n    rb->SendError(error);\n    return;\n  }\n\n  string command = absl::AsciiStrToUpper(ArgS(args, 1));\n  auto* cid = cmd_registry_->Find(command);\n  if (!cid || cid->IsAlias()) {\n    auto error = absl::StrCat(\"Command '\", command, \"' not found\");\n    rb->SendError(error);\n    return;\n  }\n\n  const auto& user = registry.find(username)->second;\n  // Stub, used to mimic connection context for a user.\n  ConnectionContext stub(nullptr, acl::UserCredentials{});\n  stub.acl_commands = user.AclCommandsRef();\n  // \"mock\" without an actual connection we can't know which db is active so we skip this check\n  // for DryRun.\n  stub.acl_db_idx = {};\n  stub.keys = {{}, true};\n  const auto [is_allowed, reason] = IsUserAllowedToInvokeCommandGeneric(stub, *cid, {});\n  if (is_allowed) {\n    rb->SendOk();\n    return;\n  }\n\n  auto msg = absl::StrCat(\"This user has no permissions to run the '\", command, \"' command\");\n\n  rb->SendBulkString(msg);\n}\n\nvoid AclFamily::Init(facade::Listener* main_listener, UserRegistry* registry) {\n  main_listener_ = main_listener;\n  registry_ = registry;\n  config_registry.RegisterMutable(\"requirepass\", [this](const absl::CommandLineFlag& flag) {\n    User::UpdateRequest rqst;\n    rqst.passwords.push_back({flag.CurrentValue()});\n    registry_->MaybeAddAndUpdate(\"default\", std::move(rqst));\n    return true;\n  });\n  auto acl_file = absl::GetFlag(FLAGS_aclfile);\n  if (!acl_file.empty() && Load()) {\n    return;\n  }\n  registry_->Init(&CategoryToIdx(), &reverse_cat_table_, &CategoryToCommandsIndex());\n}\n\nstd::string AclFamily::AclCatToString(uint32_t acl_category, User::Sign sign) const {\n  std::string res = sign == User::Sign::PLUS ? \"+@\" : \"-@\";\n  if (acl_category == acl::ALL) {\n    absl::StrAppend(&res, \"all\");\n    return res;\n  }\n\n  const auto& index = CategoryToIdx().at(acl_category);\n  absl::StrAppend(&res, absl::AsciiStrToLower(reverse_cat_table_[index]));\n  return res;\n}\n\nstd::string AclFamily::AclCommandToString(size_t family, uint64_t mask, User::Sign sign) const {\n  // This is constant but can be optimized with an indexer\n  const auto& rev_index = CommandsRevIndexer();\n  std::string res;\n  std::string prefix = (sign == User::Sign::PLUS) ? \"+\" : \"-\";\n  if (mask == ALL_COMMANDS) {\n    for (const auto& cmd : rev_index[family]) {\n      absl::StrAppend(&res, prefix, absl::AsciiStrToLower(cmd), \" \");\n    }\n    res.pop_back();\n    return res;\n  }\n\n  size_t pos = 0;\n  while (mask != 0) {\n    ++pos;\n    mask = mask >> 1;\n  }\n  --pos;\n  absl::StrAppend(&res, prefix, absl::AsciiStrToLower(rev_index[family][pos]));\n  return res;\n}\n\nnamespace {\nstruct CategoryAndMetadata {\n  User::CategoryChange change;\n  User::ChangeMetadata metadata;\n};\n\nstruct CommandAndMetadata {\n  User::CommandChange change;\n  User::ChangeMetadata metadata;\n};\n\nusing MergeResult = std::vector<std::variant<CategoryAndMetadata, CommandAndMetadata>>;\n\nMergeResult MergeTables(const User::CategoryChanges& categories,\n                        const User::CommandChanges& commands) {\n  MergeResult result;\n  for (auto [cat, meta] : categories) {\n    result.push_back(CategoryAndMetadata{cat, meta});\n  }\n\n  for (auto [cmd, meta] : commands) {\n    result.push_back(CommandAndMetadata{cmd, meta});\n  }\n\n  std::sort(result.begin(), result.end(), [](const auto& l, const auto& r) {\n    auto fetch = [](const auto& l) { return l.metadata.seq_no; };\n    return std::visit(fetch, l) < std::visit(fetch, r);\n  });\n\n  return result;\n}\n\nusing MaterializedContents = std::optional<std::vector<std::vector<std::string_view>>>;\n\nMaterializedContents MaterializeFileContents(std::vector<std::string>* usernames,\n                                             std::string_view file_contents) {\n  // This is fine, a very large file will top at 1-2 mb. And that's for 5000+ users with 400\n  // characters per line\n  std::vector<std::string_view> commands = absl::StrSplit(file_contents, \"\\n\");\n  std::vector<std::vector<std::string_view>> materialized;\n  materialized.reserve(commands.size());\n  usernames->reserve(commands.size());\n  for (auto& command : commands) {\n    if (command.empty())\n      continue;\n    std::vector<std::string_view> cmds = absl::StrSplit(command, ' ', absl::SkipEmpty());\n    if (!absl::EqualsIgnoreCase(cmds[0], \"USER\") || cmds.size() < 4) {\n      return {};\n    }\n\n    usernames->push_back(std::string(cmds[1]));\n    cmds.erase(cmds.begin(), cmds.begin() + 2);\n    materialized.push_back(cmds);\n  }\n  return materialized;\n}\n\nstruct ParseKeyResult {\n  std::string glob;\n  KeyOp op;\n  bool all_keys{false};\n  bool reset_keys{false};\n};\n\nstd::optional<ParseKeyResult> MaybeParseAclKey(std::string_view command) {\n  if (absl::EqualsIgnoreCase(command, \"ALLKEYS\") || command == \"~*\") {\n    return ParseKeyResult{\"\", {}, true};\n  }\n\n  if (absl::EqualsIgnoreCase(command, \"RESETKEYS\")) {\n    return ParseKeyResult{\"\", {}, false, true};\n  }\n\n  auto op = KeyOp::READ_WRITE;\n\n  if (absl::StartsWith(command, \"%RW\")) {\n    command = command.substr(3);\n  } else if (absl::StartsWith(command, \"%R\")) {\n    op = KeyOp::READ;\n    command = command.substr(2);\n  } else if (absl::StartsWith(command, \"%W\")) {\n    op = KeyOp::WRITE;\n    command = command.substr(2);\n  }\n\n  if (!absl::StartsWith(command, \"~\")) {\n    return {};\n  }\n\n  auto key = command.substr(1);\n  if (key.empty()) {\n    return {};\n  }\n  return ParseKeyResult{std::string(key), op};\n}\n\nstruct ParsePubSubResult {\n  std::string glob;\n  bool has_asterisk{false};\n  bool all_channels{false};\n  bool reset_channels{false};\n};\n\nstd::optional<ParsePubSubResult> MaybeParseAclPubSub(std::string_view command) {\n  if (absl::EqualsIgnoreCase(command, \"ALLCHANNELS\") || command == \"&*\") {\n    return ParsePubSubResult{\"\", false, true, false};\n  }\n\n  if (absl::EqualsIgnoreCase(command, \"RESETCHANNELS\")) {\n    return ParsePubSubResult{\"\", false, false, true};\n  }\n\n  if (absl::StartsWith(command, \"&\") && command.size() >= 2) {\n    const auto glob = command.substr(1);\n    const bool has_asterisk = glob.find('*') != std::string_view::npos;\n    return ParsePubSubResult{std::string(glob), has_asterisk};\n  }\n\n  return {};\n}\n\nstd::optional<size_t> MaybeParseAclDflySelect(std::string_view command, uint32_t dbnum) {\n  if (!absl::StartsWith(command, \"$\")) {\n    return std::nullopt;\n  }\n\n  size_t res = 0;\n  if (absl::SimpleAtoi(command.substr(1), &res) && res < dbnum) {\n    return {res};\n  }\n\n  if (absl::EqualsIgnoreCase(command.substr(1), \"ALL\")) {\n    return {std::numeric_limits<size_t>::max()};\n  }\n\n  return std::nullopt;\n}\n\nstd::string PrettyPrintSha(std::string_view pass, bool all) {\n  if (all) {\n    return absl::BytesToHexString(pass);\n  }\n  return absl::BytesToHexString(pass.substr(0, 15)).substr(0, 15);\n};\n\nstd::optional<User::UpdatePass> MaybeParsePassword(std::string_view command, bool hashed) {\n  using UpPass = User::UpdatePass;\n  if (command == \"nopass\") {\n    return UpPass{\"\", false, true};\n  }\n\n  if (command == \"resetpass\") {\n    return UpPass{\"\", false, false, true};\n  }\n\n  if (command[0] == '>' || (hashed && command[0] == '#')) {\n    return UpPass{std::string(command.substr(1))};\n  }\n\n  if (command[0] == '<') {\n    return UpPass{std::string(command.substr(1)), true};\n  }\n\n  return {};\n}\n\nstd::optional<bool> MaybeParseStatus(std::string_view command) {\n  if (command == \"ON\") {\n    return true;\n  }\n  if (command == \"OFF\") {\n    return false;\n  }\n  return {};\n}\n\nstd::string PasswordsToString(const absl::flat_hash_set<std::string>& passwords, bool nopass,\n                              bool full_sha) {\n  if (nopass) {\n    return \"nopass \";\n  }\n  std::string result;\n  for (const auto& pass : passwords) {\n    absl::StrAppend(&result, \"#\", PrettyPrintSha(pass, full_sha), \" \");\n  }\n\n  return result;\n}\n\nstd::string AclKeysToString(const AclKeys& keys) {\n  if (keys.all_keys) {\n    return \"~*\";\n  }\n  std::string result;\n  for (auto& [pattern, op] : keys.key_globs) {\n    if (op == KeyOp::READ_WRITE) {\n      absl::StrAppend(&result, \"~\", pattern, \" \");\n      continue;\n    }\n    std::string op_str = (op == KeyOp::READ) ? \"R\" : \"W\";\n    absl::StrAppend(&result, \"%\", op_str, \"~\", pattern, \" \");\n  }\n\n  if (!result.empty()) {\n    result.pop_back();\n  }\n  return result;\n}\n\nstd::string AclPubSubToString(const AclPubSub& pub_sub) {\n  if (pub_sub.all_channels) {\n    return \"&*\";\n  }\n\n  std::string result = \"resetchannels \";\n\n  for (const auto& [glob, has_asterisk] : pub_sub.globs) {\n    absl::StrAppend(&result, \"&\", glob, \" \");\n  }\n\n  if (result.back() == ' ') {\n    result.pop_back();\n  }\n\n  return result;\n}\n\nvoid SendAclSecurityEvents(const AclLog::LogEntry& entry, facade::RedisReplyBuilder* rb) {\n  rb->StartArray(12);\n  rb->SendSimpleString(\"reason\");\n  using Reason = AclLog::Reason;\n  std::string reason;\n  if (entry.reason == Reason::COMMAND) {\n    reason = \"COMMAND\";\n  } else if (entry.reason == Reason::KEY) {\n    reason = \"KEY\";\n  } else if (entry.reason == Reason::PUB_SUB) {\n    reason = \"PUB_SUB\";\n  } else {\n    reason = \"AUTH\";\n  }\n\n  rb->SendSimpleString(reason);\n  rb->SendSimpleString(\"object\");\n  rb->SendSimpleString(entry.object);\n  rb->SendSimpleString(\"username\");\n  rb->SendSimpleString(entry.username);\n  rb->SendSimpleString(\"age-seconds\");\n\n  auto now_diff = std::chrono::system_clock::now() - entry.entry_creation;\n  auto secs = std::chrono::duration_cast<std::chrono::seconds>(now_diff);\n  auto left_over = now_diff - std::chrono::duration_cast<std::chrono::microseconds>(secs);\n  auto age = absl::StrCat(secs.count(), \".\", left_over.count());\n  rb->SendSimpleString(absl::StrCat(age));\n  rb->SendSimpleString(\"client-info\");\n  rb->SendSimpleString(entry.client_info);\n  rb->SendSimpleString(\"timestamp-created\");\n  rb->SendLong(entry.entry_creation.time_since_epoch().count());\n}\n\nstd::string AclDbToString(size_t db) {\n  return std::numeric_limits<size_t>::max() == db ? \"all\" : absl::StrCat(db);\n}\n\n// Fetches the connections that predicate P evaluates to true and shuts them\n// down gracefully.\ntemplate <typename P>\nvoid TraverseEvictImpl(P predicate, facade::Listener* main_listener, util::ProactorPool* pool) {\n  auto close_cb = [&](unsigned idx, util::ProactorBase* p) {\n    std::vector<facade::Connection::WeakRef> connections;\n    auto traverse_cb = [&](unsigned id, util::Connection* conn) {\n      auto connection = static_cast<facade::Connection*>(conn);\n      auto ctx = connection->cntx();\n      if (predicate(ctx)) {\n        connections.push_back(connection->Borrow());\n      }\n    };\n\n    main_listener->TraverseConnectionsOnThread(traverse_cb, UINT32_MAX, nullptr);\n\n    for (auto& tcon : connections) {\n      facade::Connection* conn = tcon.Get();\n      if (conn && conn->socket()->proactor()->GetPoolIndex() == p->GetPoolIndex()) {\n        // preemptive for TlsSocket\n        conn->ShutdownSelfBlocking();\n      }\n    }\n  };\n\n  pool->AwaitFiberOnAll(close_cb);\n}\n\n}  // namespace\n\nstd::string AclFamily::AclCatAndCommandToString(const User::CategoryChanges& cat,\n                                                const User::CommandChanges& cmds) const {\n  std::string result;\n\n  auto tables = MergeTables(cat, cmds);\n\n  auto cat_visitor = [&result, this](const CategoryAndMetadata& val) {\n    const auto& [change, meta] = val;\n    absl::StrAppend(&result, AclCatToString(change, meta.sign), \" \");\n  };\n\n  auto cmd_visitor = [&result, this](const CommandAndMetadata& val) {\n    const auto& [change, meta] = val;\n    const auto [family, bit_index] = change;\n    absl::StrAppend(&result, AclCommandToString(family, bit_index, meta.sign), \" \");\n  };\n\n  Overloaded visitor{cat_visitor, cmd_visitor};\n\n  for (auto change : tables) {\n    std::visit(visitor, change);\n  }\n\n  if (!result.empty()) {\n    result.pop_back();\n  }\n\n  return result;\n}\n\nusing OptCat = std::optional<uint32_t>;\n\n// bool == true if +\n// bool == false if -\nstd::pair<OptCat, bool> AclFamily::MaybeParseAclCategory(std::string_view command) const {\n  if (absl::EqualsIgnoreCase(command, \"ALLCOMMANDS\")) {\n    return {cat_table_.at(\"ALL\"), true};\n  }\n\n  if (absl::EqualsIgnoreCase(command, \"NOCOMMANDS\")) {\n    return {cat_table_.at(\"ALL\"), false};\n  }\n\n  if (absl::StartsWith(command, \"+@\")) {\n    auto res = cat_table_.find(command.substr(2));\n    if (res == cat_table_.end()) {\n      return {};\n    }\n    return {res->second, true};\n  }\n\n  if (absl::StartsWith(command, \"-@\")) {\n    auto res = cat_table_.find(command.substr(2));\n    if (res == cat_table_.end()) {\n      return {};\n    }\n    return {res->second, false};\n  }\n\n  return {};\n}\n\nstd::optional<std::string> AclFamily::MaybeParseNamespace(std::string_view command) const {\n  constexpr std::string_view kPrefix = \"NAMESPACE:\";\n  if (absl::StartsWith(command, kPrefix)) {\n    return std::string(command.substr(kPrefix.size()));\n  }\n  return std::nullopt;\n}\n\nstd::pair<AclFamily::OptCommand, bool> AclFamily::MaybeParseAclCommand(\n    std::string_view command) const {\n  if (absl::StartsWith(command, \"+\")) {\n    auto res = cmd_registry_->Find(command.substr(1));\n    if (!res || res->IsAlias()) {\n      return {};\n    }\n    std::pair<size_t, uint64_t> cmd{res->GetFamily(), res->GetBitIndex()};\n    return {cmd, true};\n  }\n\n  if (absl::StartsWith(command, \"-\")) {\n    auto res = cmd_registry_->Find(command.substr(1));\n    if (!res || res->IsAlias()) {\n      return {};\n    }\n    std::pair<size_t, uint64_t> cmd{res->GetFamily(), res->GetBitIndex()};\n    return {cmd, false};\n  }\n\n  return {};\n}\n\nusing facade::ErrorReply;\n\nstd::variant<User::UpdateRequest, ErrorReply> AclFamily::ParseAclSetUser(\n    const facade::ArgRange& args, bool hashed, bool has_all_keys, bool has_all_channels) const {\n  User::UpdateRequest req;\n\n  for (std::string_view arg : args) {\n    if (auto pass = MaybeParsePassword(facade::ToSV(arg), hashed); pass) {\n      req.passwords.push_back(std::move(*pass));\n\n      if (hashed && absl::StartsWith(facade::ToSV(arg), \"#\")) {\n        req.passwords.back().is_hashed = true;\n      }\n      continue;\n    }\n\n    if (auto res = MaybeParseAclKey(facade::ToSV(arg)); res) {\n      auto& [glob, op, all_keys, reset_keys] = *res;\n      if ((has_all_keys && !all_keys && !reset_keys) ||\n          (req.allow_all_keys && !all_keys && !reset_keys)) {\n        return ErrorReply(absl::StrCat(\n            \"Error in ACL SETUSER modifier \\'\", facade::ToSV(arg),\n            \"\\': Adding a pattern after the * pattern (or the \"\n            \"'allkeys' flag) is not valid and does not have any effect. Try 'resetkeys' to start \"\n            \"with an empty list of patterns\"));\n      }\n\n      req.allow_all_keys = all_keys;\n      req.reset_all_keys = reset_keys;\n      if (reset_keys) {\n        has_all_keys = false;\n      }\n      req.keys.push_back({std::move(glob), op, all_keys, reset_keys});\n      continue;\n    }\n\n    if (auto res = MaybeParseAclPubSub(facade::ToSV(arg)); res) {\n      auto& [glob, has_asterisk, all_channels, reset_channels] = *res;\n      if ((has_all_channels && !all_channels && !reset_channels) ||\n          (req.all_channels && !all_channels && !reset_channels)) {\n        return ErrorReply(\n            absl::StrCat(\"ERR Error in ACL SETUSER modifier \\'\", facade::ToSV(arg),\n                         \"\\': Adding a pattern after the * pattern (or the 'allchannels' flag) is \"\n                         \"not valid and does not have any effect. Try 'resetchannels' to start \"\n                         \"with an empty list of channels\"));\n      }\n\n      req.all_channels = all_channels;\n      req.reset_channels = reset_channels;\n      if (reset_channels) {\n        has_all_channels = false;\n      }\n      req.pub_sub.push_back({std::move(glob), has_asterisk, all_channels, reset_channels});\n      continue;\n    }\n\n    if (auto res = MaybeParseAclDflySelect(facade::ToSV(arg), dbnum_); res) {\n      if (req.select_db) {\n        return ErrorReply(\"ERR Error, select db $ was used twice\");\n      }\n      req.select_db = res;\n      continue;\n    }\n\n    std::string command = absl::AsciiStrToUpper(arg);\n\n    if (auto status = MaybeParseStatus(command); status) {\n      if (req.is_active) {\n        return ErrorReply(\"Multiple ON/OFF are not allowed\");\n      }\n      req.is_active = *status;\n      continue;\n    }\n\n    auto [cat, add] = MaybeParseAclCategory(command);\n    if (cat) {\n      using Sign = User::Sign;\n      using Val = std::pair<Sign, uint32_t>;\n      auto val = add ? Val{Sign::PLUS, *cat} : Val{Sign::MINUS, *cat};\n      req.updates.push_back(val);\n      continue;\n    }\n\n    auto ns = MaybeParseNamespace(command);\n    if (ns.has_value()) {\n      req.ns = *ns;\n      continue;\n    }\n\n    auto [cmd, sign] = MaybeParseAclCommand(command);\n    if (!cmd) {\n      return ErrorReply(absl::StrCat(\"Unrecognized parameter \", command));\n    }\n\n    using Sign = User::Sign;\n    using Val = User::UpdateRequest::CommandsValueType;\n    auto [index, bit] = *cmd;\n    auto val = sign ? Val{Sign::PLUS, index, bit} : Val{Sign::MINUS, index, bit};\n    req.updates.push_back(val);\n  }\n\n  return req;\n}\n\nvoid AclFamily::BuildIndexers(RevCommandsIndexStore families) {\n  size_t family_count = acl::NumberOfFamilies(families.size());\n  CommandsRevIndexer(std::move(families));\n  CategoryToCommandsIndexStore index;\n  cmd_registry_->Traverse([&](std::string_view, auto& cid) {\n    const uint32_t cat = cid.acl_categories();\n    const size_t family = cid.GetFamily();\n    DCHECK_LT(family, family_count);\n    const uint64_t bit_index = cid.GetBitIndex();\n    for (size_t i = 0; i < 32; ++i) {\n      if (cat & 1 << i) {\n        std::string_view cat_name = reverse_cat_table_[i];\n        if (index[cat_name].empty()) {\n          index[cat_name].resize(CommandsRevIndexer().size());\n        }\n        index[cat_name][family] |= bit_index;\n      }\n    }\n  });\n\n  CategoryToCommandsIndex(std::move(index));\n  CategoryToIdxStore idx_store;\n  for (size_t i = 0; i < 32; ++i) {\n    idx_store[1 << i] = i;\n  }\n  CategoryToIdx(std::move(idx_store));\n}\n\nvoid AclFamily::Help(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view help_arr[] = {\n      \"ACL <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n      \"CAT [<category>]\",\n      \"    List all commands that belong to <category>, or all command categories\",\n      \"    when no category is specified.\",\n      \"DELUSER <username> [<username> ...]\",\n      \"    Delete a list of users.\",\n      \"DRYRUN <username> <command> [<arg> ...]\",\n      \"    Returns whether the user can execute the given command without executing the command.\",\n      \"GETUSER <username>\",\n      \"    Get the user's details.\",\n      \"GENPASS [<bits>]\",\n      \"    Generate a secure 256-bit user password. The optional `bits` argument can\",\n      \"    be used to specify a different size.\",\n      \"LIST\",\n      \"    Show users details in config file format.\",\n      \"LOAD\",\n      \"    Reload users from the ACL file.\",\n      \"LOG [<count> | RESET]\",\n      \"    Show the ACL log entries.\",\n      \"SAVE\",\n      \"    Save the current config to the ACL file.\",\n      \"SETUSER <username> <attribute> [<attribute> ...]\",\n      \"    Create or modify a user with the specified attributes.\",\n      \"USERS\",\n      \"    List all the registered usernames.\",\n      \"WHOAMI\",\n      \"    Return the current connection username.\",\n      \"HELP\",\n      \"    Print this help.\"};\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n  return rb->SendSimpleStrArr(help_arr);\n}\n\nusing MemberFunc = void (AclFamily::*)(CmdArgList args, CommandContext* cmd_cntx);\n\nCommandId::Handler HandlerFunc(AclFamily* acl, MemberFunc f) {\n  return [=](CmdArgList args, CommandContext* cmd_cntx) { return (acl->*f)(args, cmd_cntx); };\n}\n\n#define HFUNC(x) SetHandler(HandlerFunc(this, &AclFamily::x))\n\nconstexpr uint32_t kAcl = acl::CONNECTION;\nconstexpr uint32_t kList = acl::ADMIN | acl::SLOW | acl::DANGEROUS;\nconstexpr uint32_t kSetUser = acl::ADMIN | acl::SLOW | acl::DANGEROUS;\nconstexpr uint32_t kDelUser = acl::ADMIN | acl::SLOW | acl::DANGEROUS;\nconstexpr uint32_t kWhoAmI = acl::SLOW;\nconstexpr uint32_t kSave = acl::ADMIN | acl::SLOW | acl::DANGEROUS;\nconstexpr uint32_t kLoad = acl::ADMIN | acl::SLOW | acl::DANGEROUS;\nconstexpr uint32_t kLog = acl::ADMIN | acl::SLOW | acl::DANGEROUS;\nconstexpr uint32_t kUsers = acl::ADMIN | acl::SLOW | acl::DANGEROUS;\nconstexpr uint32_t kCat = acl::SLOW;\nconstexpr uint32_t kGetUser = acl::ADMIN | acl::SLOW | acl::DANGEROUS;\nconstexpr uint32_t kDryRun = acl::ADMIN | acl::SLOW | acl::DANGEROUS;\nconstexpr uint32_t kGenPass = acl::SLOW;\nconstexpr uint32_t kHelp = acl::SLOW;\n\n// We can't implement the ACL commands and its respective subcommands LIST, CAT, etc\n// the usual way, (that is, one command called ACL which then dispatches to the subcommand\n// based on the second argument) because each of the subcommands has different ACL\n// categories. Therefore, to keep it compatible with the CommandId, I need to treat them\n// as separate commands in the registry. This is the least intrusive change because it's very\n// easy to handle that case explicitly in `DispatchCommand`.\n\nvoid AclFamily::Register(dfly::CommandRegistry* registry) {\n  using CI = dfly::CommandId;\n  const uint32_t kAclMask = CO::ADMIN | CO::NOSCRIPT | CO::LOADING;\n  registry->StartFamily();\n  *registry << CI{\"ACL\", CO::NOSCRIPT | CO::LOADING, 0, 0, 0, acl::kAcl}.HFUNC(Acl);\n  *registry << CI{\"ACL LIST\", kAclMask, 1, 0, 0, acl::kList}.HFUNC(List);\n  *registry << CI{\"ACL SETUSER\", kAclMask, -2, 0, 0, acl::kSetUser}.HFUNC(SetUser);\n  *registry << CI{\"ACL DELUSER\", kAclMask, -2, 0, 0, acl::kDelUser}.HFUNC(DelUser);\n  *registry << CI{\"ACL WHOAMI\", kAclMask, 1, 0, 0, acl::kWhoAmI}.HFUNC(WhoAmI);\n  *registry << CI{\"ACL SAVE\", kAclMask, 1, 0, 0, acl::kSave}.HFUNC(Save);\n  *registry << CI{\"ACL LOAD\", kAclMask, 1, 0, 0, acl::kLoad}.HFUNC(Load);\n  *registry << CI{\"ACL LOG\", kAclMask, 0, 0, 0, acl::kLog}.HFUNC(Log);\n  *registry << CI{\"ACL USERS\", kAclMask, 1, 0, 0, acl::kUsers}.HFUNC(Users);\n  *registry << CI{\"ACL CAT\", kAclMask, -1, 0, 0, acl::kCat}.HFUNC(Cat);\n  *registry << CI{\"ACL GETUSER\", kAclMask, 2, 0, 0, acl::kGetUser}.HFUNC(GetUser);\n  *registry << CI{\"ACL DRYRUN\", kAclMask, 3, 0, 0, acl::kDryRun}.HFUNC(DryRun);\n  *registry << CI{\"ACL GENPASS\", CO::NOSCRIPT | CO::LOADING, -1, 0, 0, acl::kGenPass}.HFUNC(\n      GenPass);\n  *registry << CI{\"ACL HELP\", kAclMask, 0, 0, 0, acl::kHelp}.HFUNC(Help);\n  cmd_registry_ = registry;\n\n  // build indexers\n  BuildIndexers(cmd_registry_->GetFamilies());\n}\n\n#undef HFUNC\n\n}  // namespace dfly::acl\n"
  },
  {
    "path": "src/server/acl/acl_family.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <optional>\n#include <string_view>\n#include <vector>\n\n#include \"absl/container/flat_hash_set.h\"\n#include \"facade/facade_types.h\"\n#include \"helio/util/proactor_pool.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/acl/user_registry.h\"\n#include \"server/command_registry.h\"\n#include \"server/execution_state.h\"\n\nnamespace facade {\nclass SinkReplyBuilder;\nclass Listener;\n}  // namespace facade\n\nnamespace dfly {\n\nusing facade::CmdArgList;\n\nclass ConnectionContext;\nnamespace acl {\n\nclass AclFamily final {\n public:\n  explicit AclFamily(UserRegistry* registry, util::ProactorPool* pool);\n\n  void Register(CommandRegistry* registry);\n  void Init(facade::Listener* listener, UserRegistry* registry);\n\n private:\n  using SinkReplyBuilder = facade::SinkReplyBuilder;\n\n  void Acl(CmdArgList args, CommandContext* cmd_cntx);\n  void List(CmdArgList args, CommandContext* cmd_cntx);\n  void SetUser(CmdArgList args, CommandContext* cmd_cntx);\n  void DelUser(CmdArgList args, CommandContext* cmd_cntx);\n  void WhoAmI(CmdArgList args, CommandContext* cmd_cntx);\n  void Save(CmdArgList args, CommandContext* cmd_cntx);\n  void Load(CmdArgList args, CommandContext* cmd_cntx);\n  // Helper function for bootstrap\n  bool Load();\n  void Log(CmdArgList args, CommandContext* cmd_cntx);\n  void Users(CmdArgList args, CommandContext* cmd_cntx);\n  void Cat(CmdArgList args, CommandContext* cmd_cntx);\n  void GetUser(CmdArgList args, CommandContext* cmd_cntx);\n  void DryRun(CmdArgList args, CommandContext* cmd_cntx);\n  void GenPass(CmdArgList args, CommandContext* cmd_cntx);\n  void Help(CmdArgList args, CommandContext* cmd_cntx);\n\n  // Helper function that updates all open connections and their\n  // respective ACL fields on all the available proactor threads\n  using Commands = std::vector<uint64_t>;\n  void StreamUpdatesToAllProactorConnections(const std::string& user,\n                                             const Commands& update_commands,\n                                             const AclKeys& update_keys,\n                                             const AclPubSub& update_pub_sub, size_t db);\n\n  // Helper function that closes all open connection from the deleted user\n  void EvictOpenConnectionsOnAllProactors(const absl::flat_hash_set<std::string_view>& user);\n\n  // Helper function that closes all open connections for users in the registry\n  void EvictOpenConnectionsOnAllProactorsWithRegistry(const UserRegistry::RegistryType& registry);\n\n  // Helper function that loads the acl state of an acl file into the user registry\n  GenericError LoadToRegistryFromFile(std::string_view full_path, SinkReplyBuilder* builder);\n\n  // Serializes the whole registry into a string\n  std::string RegistryToString() const;\n\n  std::string AclCatToString(uint32_t acl_category, User::Sign sign) const;\n\n  std::string AclCommandToString(size_t family, uint64_t mask, User::Sign sign) const;\n\n  // Serializes category and command to string\n  std::string AclCatAndCommandToString(const User::CategoryChanges& cat,\n                                       const User::CommandChanges& cmds) const;\n\n  using OptCat = std::optional<uint32_t>;\n  std::pair<OptCat, bool> MaybeParseAclCategory(std::string_view command) const;\n\n  using OptCommand = std::optional<std::pair<size_t, uint64_t>>;\n  std::pair<OptCommand, bool> MaybeParseAclCommand(std::string_view command) const;\n\n  std::optional<std::string> MaybeParseNamespace(std::string_view command) const;\n\n  std::variant<User::UpdateRequest, facade::ErrorReply> ParseAclSetUser(\n      const facade::ArgRange& args, bool hashed = false, bool has_all_keys = false,\n      bool has_all_channels = false) const;\n\n  void BuildIndexers(RevCommandsIndexStore families);\n\n  // Data members\n\n  facade::Listener* main_listener_{nullptr};\n  UserRegistry* registry_;\n  CommandRegistry* cmd_registry_;\n  util::ProactorPool* pool_;\n\n  // Indexes\n\n  // See definitions for NONE and ALL in facade/acl_commands_def.h\n  const CategoryIndexTable cat_table_{{\"KEYSPACE\", KEYSPACE},\n                                      {\"READ\", READ},\n                                      {\"WRITE\", WRITE},\n                                      {\"SET\", SET},\n                                      {\"SORTEDSET\", SORTEDSET},\n                                      {\"LIST\", LIST},\n                                      {\"HASH\", HASH},\n                                      {\"STRING\", STRING},\n                                      {\"BITMAP\", BITMAP},\n                                      {\"HYPERLOG\", HYPERLOGLOG},\n                                      {\"GEO\", GEO},\n                                      {\"STREAM\", STREAM},\n                                      {\"PUBSUB\", PUBSUB},\n                                      {\"ADMIN\", ADMIN},\n                                      {\"FAST\", FAST},\n                                      {\"SLOW\", SLOW},\n                                      {\"BLOCKING\", BLOCKING},\n                                      {\"DANGEROUS\", DANGEROUS},\n                                      {\"CONNECTION\", CONNECTION},\n                                      {\"TRANSACTION\", TRANSACTION},\n                                      {\"SCRIPTING\", SCRIPTING},\n                                      {\"CMS\", CMS},\n                                      {\"BLOOM\", BLOOM},\n                                      {\"FT_SEARCH\", FT_SEARCH},\n                                      {\"SEARCH\", FT_SEARCH},  // Alias for FT_SEARCH\n                                      {\"THROTTLE\", THROTTLE},\n                                      {\"JSON\", JSON},\n                                      {\"ALL\", ALL}};\n\n  // bit 0 at index 0\n  // bit 1 at index 1\n  // bit n at index n\n  const ReverseCategoryIndexTable reverse_cat_table_{\n      \"KEYSPACE\",  \"READ\",      \"WRITE\",     \"SET\",       \"SORTEDSET\",  \"LIST\",        \"HASH\",\n      \"STRING\",    \"BITMAP\",    \"HYPERLOG\",  \"GEO\",       \"STREAM\",     \"PUBSUB\",      \"ADMIN\",\n      \"FAST\",      \"SLOW\",      \"BLOCKING\",  \"DANGEROUS\", \"CONNECTION\", \"TRANSACTION\", \"SCRIPTING\",\n      \"_RESERVED\", \"_RESERVED\", \"_RESERVED\", \"_RESERVED\", \"_RESERVED\",  \"_RESERVED\",   \"CMS\",\n      \"BLOOM\",     \"FT_SEARCH\", \"THROTTLE\",  \"JSON\"};\n\n  // We need this to act as a const member, since the initialization of const data members\n  // must be done on the constructor. However, these are initialized a little later, when\n  // we Register the commands\n  const CategoryToIdxStore& CategoryToIdx(CategoryToIdxStore store = {}) const {\n    static CategoryToIdxStore cat_idx = std::move(store);\n    return cat_idx;\n  }\n\n  const RevCommandsIndexStore& CommandsRevIndexer(RevCommandsIndexStore store = {}) const {\n    static RevCommandsIndexStore rev_index_store = std::move(store);\n    return rev_index_store;\n  }\n\n  const CategoryToCommandsIndexStore& CategoryToCommandsIndex(\n      CategoryToCommandsIndexStore store = {}) const {\n    static CategoryToCommandsIndexStore index = std::move(store);\n    return index;\n  }\n\n  size_t dbnum_ = 0;\n\n  // Only for testing interface\n public:\n  // Helper accessors for tests. Do not use them directly.\n  const ReverseCategoryIndexTable& GetRevTable() const {\n    return reverse_cat_table_;\n  }\n\n  // We could make CommandsRevIndexer public, but I want this to be\n  // clear that this is for TESTING so do not use this in the codebase\n  const RevCommandsIndexStore& GetCommandsRevIndexer() const {\n    return CommandsRevIndexer();\n  }\n};\n\n}  // namespace acl\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/acl/acl_family_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/acl/acl_family.h\"\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/strings/ascii.h>\n#include <absl/strings/str_cat.h>\n\n#include \"base/flags.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/command_registry.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\n\nABSL_DECLARE_FLAG(std::vector<std::string>, rename_command);\nABSL_DECLARE_FLAG(std::vector<std::string>, command_alias);\n\nnamespace dfly {\n\nclass AclFamilyTest : public BaseFamilyTest {\n protected:\n};\n\nclass AclFamilyTestRename : public BaseFamilyTest {\n  void SetUp() override {\n    absl::SetFlag(&FLAGS_rename_command, {\"ACL=ROCKS\"});\n    absl::SetFlag(&FLAGS_command_alias, {\"___SET=SET\"});\n    ResetService();\n  }\n};\n\nTEST_F(AclFamilyTest, AclSetUser) {\n  TestInitAclFam();\n  auto resp = Run({\"ACL\", \"SETUSER\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR wrong number of arguments for 'acl setuser' command\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"kostas\", \"ONN\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Unrecognized parameter ONN\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"kostas\", \"+@nonsense\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Unrecognized parameter +@NONSENSE\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"vlad\"});\n  EXPECT_THAT(resp, \"OK\");\n  resp = Run({\"ACL\", \"LIST\"});\n  auto vec = resp.GetVec();\n  EXPECT_THAT(vec, UnorderedElementsAre(\"user default on nopass ~* &* +@all $all\",\n                                        \"user vlad off resetchannels -@all $all\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"vlad\", \"+ACL\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"LIST\"});\n  vec = resp.GetVec();\n  EXPECT_THAT(vec, UnorderedElementsAre(\"user default on nopass ~* &* +@all $all\",\n                                        \"user vlad off resetchannels -@all +acl $all\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"vlad\", \"on\", \">pass\", \">temp\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"LIST\"});\n  vec = resp.GetVec();\n  EXPECT_THAT(vec.size(), 2);\n  auto contains_vlad = [](const auto& vec) {\n    const std::string default_user = \"user default on nopass ~* &* +@all $all\";\n    const std::string a_permutation =\n        \"user vlad on #a6864eb339b0e1f #d74ff0ee8da3b98 resetchannels -@all +acl $all\";\n    const std::string b_permutation =\n        \"user vlad on #d74ff0ee8da3b98 #a6864eb339b0e1f resetchannels -@all +acl $all\";\n    std::string_view other;\n    if (vec[0] == default_user) {\n      other = vec[1].GetView();\n    } else if (vec[1] == default_user) {\n      other = vec[0].GetView();\n    } else {\n      return false;\n    }\n\n    return other == a_permutation || other == b_permutation;\n  };\n\n  EXPECT_THAT(contains_vlad(vec), true);\n\n  resp = Run({\"AUTH\", \"vlad\", \"pass\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"AUTH\", \"vlad\", \"temp\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"AUTH\", \"default\", R\"(\"\")\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"vlad\", \">another\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"vlad\", \"<another\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"LIST\"});\n  vec = resp.GetVec();\n  EXPECT_THAT(vec.size(), 2);\n  EXPECT_THAT(contains_vlad(vec), true);\n\n  resp = Run({\"ACL\", \"SETUSER\", \"vlad\", \"resetpass\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"LIST\"});\n  vec = resp.GetVec();\n  EXPECT_THAT(vec, UnorderedElementsAre(\"user default on nopass ~* &* +@all $all\",\n                                        \"user vlad on resetchannels -@all +acl $all\"));\n\n  // +@NONE should not exist anymore. It's not in the spec.\n  resp = Run({\"ACL\", \"SETUSER\", \"rand\", \"+@NONE\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Unrecognized parameter +@NONE\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"rand\", \"ALLCOMMANDS\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"LIST\"});\n  vec = resp.GetVec();\n  EXPECT_THAT(vec, UnorderedElementsAre(\"user default on nopass ~* &* +@all $all\",\n                                        \"user vlad on resetchannels -@all +acl $all\",\n                                        \"user rand off resetchannels +@all $all\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"rand\", \"NOCOMMANDS\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"LIST\"});\n  vec = resp.GetVec();\n  EXPECT_THAT(vec, UnorderedElementsAre(\"user default on nopass ~* &* +@all $all\",\n                                        \"user vlad on resetchannels -@all +acl $all\",\n                                        \"user rand off resetchannels -@all $all\"));\n}\n\nTEST_F(AclFamilyTest, AclDelUser) {\n  TestInitAclFam();\n  auto resp = Run({\"ACL\", \"DELUSER\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR wrong number of arguments for 'acl deluser' command\"));\n\n  resp = Run({\"ACL\", \"DELUSER\", \"default\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"ACL\", \"DELUSER\", \"NOTEXISTS\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"kostas\", \"ON\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"DELUSER\", \"KOSTAS\", \"NONSENSE\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"ACL\", \"DELUSER\", \"kostas\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"ACL\", \"DELUSER\", \"kostas\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"ACL\", \"LIST\"});\n  EXPECT_THAT(resp.GetString(), \"user default on nopass ~* &* +@all $all\");\n\n  Run({\"ACL\", \"SETUSER\", \"michael\", \"ON\"});\n  Run({\"ACL\", \"SETUSER\", \"kobe\", \"ON\"});\n  resp = Run({\"ACL\", \"DELUSER\", \"michael\", \"kobe\"});\n  EXPECT_THAT(resp, IntArg(2));\n}\n\nTEST_F(AclFamilyTest, AclList) {\n  TestInitAclFam();\n  auto resp = Run({\"ACL\", \"LIST\", \"NONSENSE\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR wrong number of arguments for 'acl list' command\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"kostas\", \">pass\", \"+@admin\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"adi\", \">pass\", \"+@fast\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"LIST\"});\n  auto vec = resp.GetVec();\n  EXPECT_THAT(\n      vec, UnorderedElementsAre(\"user default on nopass ~* &* +@all $all\",\n                                \"user kostas off #d74ff0ee8da3b98 resetchannels -@all +@admin $all\",\n                                \"user adi off #d74ff0ee8da3b98 resetchannels -@all +@fast $all\"));\n}\n\nTEST_F(AclFamilyTest, AclAuth) {\n  TestInitAclFam();\n  auto resp = Run({\"AUTH\", \"default\", R\"(\"\")\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"shahar\", \">mypass\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"AUTH\", \"shahar\", \"wrongpass\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONGPASS invalid username-password pair or user is disabled.\"));\n\n  resp = Run({\"AUTH\", \"shahar\", \"mypass\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONGPASS invalid username-password pair or user is disabled.\"));\n\n  // Activate the user\n  resp = Run({\"ACL\", \"SETUSER\", \"shahar\", \"ON\", \"+@fast\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"AUTH\", \"shahar\", \"mypass\"});\n  EXPECT_THAT(resp, \"OK\");\n}\n\nTEST_F(AclFamilyTest, AclWhoAmI) {\n  TestInitAclFam();\n  auto resp = Run({\"ACL\", \"WHOAMI\", \"WHO\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR wrong number of arguments for 'acl whoami' command\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"kostas\", \"ON\", \">pass\", \"+@SLOW\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"AUTH\", \"kostas\", \"pass\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"WHOAMI\"});\n  EXPECT_THAT(resp, \"User is kostas\");\n}\n\nTEST_F(AclFamilyTest, TestAllCategories) {\n  const auto* fam = TestInitAclFam();\n  for (auto& cat : fam->GetRevTable()) {\n    if (cat != \"_RESERVED\") {\n      auto resp = Run({\"ACL\", \"SETUSER\", \"kostas\", absl::StrCat(\"+@\", cat)});\n      EXPECT_THAT(resp, \"OK\");\n\n      resp = Run({\"ACL\", \"LIST\"});\n      EXPECT_THAT(resp.GetVec(),\n                  UnorderedElementsAre(\"user default on nopass ~* &* +@all $all\",\n                                       absl::StrCat(\"user kostas off resetchannels -@all \", \"+@\",\n                                                    absl::AsciiStrToLower(cat), \" $all\")));\n\n      resp = Run({\"ACL\", \"SETUSER\", \"kostas\", absl::StrCat(\"-@\", cat)});\n      EXPECT_THAT(resp, \"OK\");\n\n      resp = Run({\"ACL\", \"LIST\"});\n      EXPECT_THAT(resp.GetVec(),\n                  UnorderedElementsAre(\"user default on nopass ~* &* +@all $all\",\n                                       absl::StrCat(\"user kostas off resetchannels -@all \", \"-@\",\n                                                    absl::AsciiStrToLower(cat), \" $all\")));\n\n      resp = Run({\"ACL\", \"DELUSER\", \"kostas\"});\n      EXPECT_THAT(resp, IntArg(1));\n    }\n  }\n\n  for (auto& cat : fam->GetRevTable()) {\n    if (cat != \"_RESERVED\") {\n      auto resp = Run({\"ACL\", \"SETUSER\", \"kostas\", absl::StrCat(\"+@\", cat)});\n      EXPECT_THAT(resp, \"OK\");\n    }\n  }\n  // This won't work because of __RESERVED\n  // TODO(fix this)\n  //  auto resp = Run({\"ACL\", \"LIST\"});\n  //  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"user default on nopass +@ALL\",\n  //  absl::StrCat(\"user kostas off nopass \", \"+@ALL\")));\n  //\n\n  // TODO(Bug here fix none/all)\n  //  auto resp = Run({\"ACL\", \"SETUSER\", \"kostas\", \"+@NONE\"});\n  //  EXPECT_THAT(resp, \"OK\");\n  //\n  //  resp = Run({\"ACL\", \"LIST\"});\n  //  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"user default on nopass +@ALL\", \"user kostas\n  //  off nopass +@NONE\"));\n}\n\nTEST_F(AclFamilyTest, TestAllCommands) {\n  const auto* fam = TestInitAclFam();\n  const auto& rev_indexer = fam->GetCommandsRevIndexer();\n  for (const auto& family : rev_indexer) {\n    for (const auto& command_name : family) {\n      auto resp = Run({\"ACL\", \"SETUSER\", \"kostas\", absl::StrCat(\"+\", command_name)});\n      EXPECT_THAT(resp, \"OK\");\n\n      resp = Run({\"ACL\", \"LIST\"});\n      EXPECT_THAT(resp.GetVec(),\n                  UnorderedElementsAre(\"user default on nopass ~* &* +@all $all\",\n                                       absl::StrCat(\"user kostas off resetchannels -@all \", \"+\",\n                                                    absl::AsciiStrToLower(command_name), \" $all\")));\n\n      resp = Run({\"ACL\", \"SETUSER\", \"kostas\", absl::StrCat(\"-\", command_name)});\n\n      resp = Run({\"ACL\", \"LIST\"});\n      EXPECT_THAT(resp.GetVec(),\n                  UnorderedElementsAre(\"user default on nopass ~* &* +@all $all\",\n                                       absl::StrCat(\"user kostas off resetchannels -@all \", \"-\",\n                                                    absl::AsciiStrToLower(command_name), \" $all\")));\n\n      resp = Run({\"ACL\", \"DELUSER\", \"kostas\"});\n      EXPECT_THAT(resp, IntArg(1));\n    }\n  }\n}\n\nTEST_F(AclFamilyTest, TestUsers) {\n  TestInitAclFam();\n  auto resp = Run({\"ACL\", \"SETUSER\", \"abhra\", \"ON\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"ari\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"USERS\"});\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"default\", \"abhra\", \"ari\"));\n}\n\nTEST_F(AclFamilyTest, TestCat) {\n  TestInitAclFam();\n  auto resp = Run({\"ACL\", \"CAT\", \"nonsense\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Unknown category: NONSENSE\"));\n\n  resp = Run({\"ACL\", \"CAT\"});\n  EXPECT_GE(resp.GetVec().size(), 24u);\n\n  resp = Run({\"ACL\", \"CAT\", \"STRING\"});\n\n  EXPECT_THAT(resp.GetVec(),\n              IsSupersetOf({\"GETSET\", \"GETRANGE\", \"INCRBYFLOAT\", \"GETDEL\",  \"DECRBY\", \"PREPEND\",\n                            \"SETEX\",  \"MSET\",     \"SET\",         \"PSETEX\",  \"SUBSTR\", \"DECR\",\n                            \"STRLEN\", \"INCR\",     \"INCRBY\",      \"MGET\",    \"GET\",    \"SETNX\",\n                            \"GETEX\",  \"APPEND\",   \"MSETNX\",      \"SETRANGE\"}));\n}\n\nTEST_F(AclFamilyTest, TestGetUser) {\n  TestInitAclFam();\n  auto resp = Run({\"ACL\", \"GETUSER\", \"kostas\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"ACL\", \"GETUSER\", \"default\"});\n  const auto& vec = resp.GetVec();\n  EXPECT_THAT(vec[0], \"flags\");\n  EXPECT_THAT(vec[1].GetVec(), UnorderedElementsAre(\"on\", \"nopass\"));\n  EXPECT_THAT(vec[2], \"passwords\");\n  EXPECT_TRUE(vec[3].GetVec().empty());\n  EXPECT_THAT(vec[4], \"commands\");\n  EXPECT_THAT(vec[5], \"+@all\");\n  EXPECT_THAT(vec[6], \"keys\");\n  EXPECT_THAT(vec[7], \"~*\");\n  EXPECT_THAT(vec[8], \"channels\");\n  EXPECT_THAT(vec[9], \"&*\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"kostas\", \"+@STRING\", \"+HSET\"});\n  resp = Run({\"ACL\", \"GETUSER\", \"kostas\"});\n  const auto& kvec = resp.GetVec();\n  EXPECT_THAT(kvec[0], \"flags\");\n  EXPECT_THAT(kvec[1].GetVec(), UnorderedElementsAre(\"off\"));\n  EXPECT_THAT(kvec[2], \"passwords\");\n  EXPECT_TRUE(kvec[3].GetVec().empty());\n  EXPECT_THAT(kvec[4], \"commands\");\n  EXPECT_THAT(kvec[5], \"-@all +@string +hset\");\n  EXPECT_THAT(kvec[6], \"keys\");\n  EXPECT_THAT(kvec[7], RespArray(ElementsAre()));\n  EXPECT_THAT(kvec[8], \"channels\");\n  EXPECT_THAT(kvec[9], \"resetchannels\");\n}\n\nTEST_F(AclFamilyTest, TestDryRun) {\n  TestInitAclFam();\n  auto resp = Run({\"ACL\", \"DRYRUN\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR wrong number of arguments for 'acl dryrun' command\"));\n\n  resp = Run({\"ACL\", \"DRYRUN\", \"default\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR wrong number of arguments for 'acl dryrun' command\"));\n\n  resp = Run({\"ACL\", \"DRYRUN\", \"default\", \"get\", \"more\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR wrong number of arguments for 'acl dryrun' command\"));\n\n  resp = Run({\"ACL\", \"DRYRUN\", \"kostas\", \"more\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR User 'kostas' not found\"));\n\n  resp = Run({\"ACL\", \"DRYRUN\", \"default\", \"nope\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Command 'NOPE' not found\"));\n\n  resp = Run({\"ACL\", \"DRYRUN\", \"default\", \"SET\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"kostas\", \"+GET\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"DRYRUN\", \"kostas\", \"GET\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"DRYRUN\", \"kostas\", \"SET\"});\n  EXPECT_THAT(resp, \"This user has no permissions to run the 'SET' command\");\n}\n\nTEST_F(AclFamilyTest, AclGenPassTooManyArguments) {\n  TestInitAclFam();\n\n  auto resp = Run({\"ACL\", \"GENPASS\", \"1\", \"2\"});\n  EXPECT_THAT(resp.GetString(),\n              \"ERR Unknown subcommand or wrong number of arguments for 'GENPASS'. Try ACL HELP.\");\n}\n\nTEST_F(AclFamilyTest, AclGenPassOutOfRange) {\n  std::string expectedError =\n      \"ERR ACL GENPASS argument must be the number of bits for the output password, a positive \"\n      \"number up to 4096\";\n\n  auto resp = Run({\"ACL\", \"GENPASS\", \"-1\"});\n  EXPECT_THAT(resp.GetString(), expectedError);\n\n  resp = Run({\"ACL\", \"GENPASS\", \"0\"});\n  EXPECT_THAT(resp.GetString(), expectedError);\n\n  resp = Run({\"ACL\", \"GENPASS\", \"4097\"});\n  EXPECT_THAT(resp.GetString(), expectedError);\n}\n\nTEST_F(AclFamilyTest, AclGenPass) {\n  auto resp = Run({\"ACL\", \"GENPASS\"});\n  auto actualPassword = resp.GetString();\n\n  // should be 256 bits or 64 bytes in hex\n  EXPECT_THAT(actualPassword.length(), 64);\n\n  // 1 bit - 4 bits should all produce a single hex character\n  for (int i = 1; i <= 4; i++) {\n    resp = Run({\"ACL\", \"GENPASS\", std::to_string(i)});\n    EXPECT_THAT(resp.GetString().length(), 1);\n  }\n  // 5 bits - 8 bits should all produce two hex characters\n  for (int i = 5; i <= 8; i++) {\n    resp = Run({\"ACL\", \"GENPASS\", std::to_string(i)});\n    EXPECT_THAT(resp.GetString().length(), 2);\n  }\n\n  // and the pattern continues\n  resp = Run({\"ACL\", \"GENPASS\", \"9\"});\n  EXPECT_THAT(resp.GetString().length(), 3);\n}\n\nTEST_F(AclFamilyTestRename, AclRename) {\n  auto resp = Run({\"ACL\", \"SETUSER\", \"billy\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR unknown command `ACL`\"));\n\n  resp = Run({\"ROCKS\", \"SETUSER\", \"billy\", \"ON\", \">mypass\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n\n  resp = Run({\"ROCKS\", \"DELUSER\", \"billy\"});\n  EXPECT_THAT(resp, IntArg(1));\n}\n\nTEST_F(AclFamilyTest, TestKeys) {\n  TestInitAclFam();\n  auto resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"~foo\", \"~bar*\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"GETUSER\", \"temp\"});\n  auto& vec = resp.GetVec();\n  EXPECT_THAT(vec[6], \"keys\");\n  EXPECT_THAT(vec[7], \"~foo ~bar*\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"~*\", \"~foo\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Error in ACL SETUSER modifier '~foo': Adding a pattern after the * \"\n                           \"pattern (or the 'allkeys' flag) is not valid and does not have any \"\n                           \"effect. Try 'resetkeys' to start with an empty list of patterns\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"~*\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"~foo\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Error in ACL SETUSER modifier '~foo': Adding a pattern after the * \"\n                           \"pattern (or the 'allkeys' flag) is not valid and does not have any \"\n                           \"effect. Try 'resetkeys' to start with an empty list of patterns\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"resetkeys\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"GETUSER\", \"temp\"});\n  EXPECT_TRUE(resp.GetVec()[7].GetVec().empty());\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"%R~foo\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"GETUSER\", \"temp\"});\n  EXPECT_THAT(resp.GetVec()[7], \"%R~foo\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"resetkeys\", \"%W~foo\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"GETUSER\", \"temp\"});\n  EXPECT_THAT(resp.GetVec()[7], \"%W~foo\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"resetkeys\", \"%RW~foo\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"GETUSER\", \"temp\"});\n  EXPECT_THAT(resp.GetVec()[7], \"~foo\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"resetkeys\", \"%K~foo\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Unrecognized parameter %K~FOO\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"resetkeys\", \"%Rfoo\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Unrecognized parameter %RFOO\"));\n}\n\nTEST_F(AclFamilyTest, TestPubSub) {\n  TestInitAclFam();\n\n  auto resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"&foo\", \"&b*r\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"GETUSER\", \"temp\"});\n  auto vec = resp.GetVec();\n  EXPECT_THAT(vec[8], \"channels\");\n  EXPECT_THAT(vec[9], \"resetchannels &foo &b*r\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"allchannels\", \"&bar\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Error in ACL SETUSER modifier '&bar': Adding a pattern after the * \"\n                           \"pattern (or the 'allchannels' flag) is \"\n                           \"not valid and does not have any effect. Try 'resetchannels' to start \"\n                           \"with an empty list of channels\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"allchannels\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"GETUSER\", \"temp\"});\n  vec = resp.GetVec();\n  EXPECT_THAT(vec[8], \"channels\");\n  EXPECT_THAT(vec[9], \"&*\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"temp\", \"resetchannels\", \"&foo\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"GETUSER\", \"temp\"});\n  vec = resp.GetVec();\n  EXPECT_THAT(vec[8], \"channels\");\n  EXPECT_THAT(vec[9], \"resetchannels &foo\");\n\n  resp =\n      Run(\"ACL setuser demo on resetkeys resetchannels ~app|managed-resources|* \"\n          \"&app|managed-resources|* +publish +ping >passwd\");\n  resp = Run(\"AUTH demo passwd\");\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run(\"publish app|managed-resources|xyz test\");\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(AclFamilyTest, TestAlias) {\n  auto resp = Run({\"ACL\", \"SETUSER\", \"luke\", \"+___SET\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Unrecognized parameter +___SET\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"leia\", \"-___SET\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Unrecognized parameter -___SET\"));\n\n  resp = Run({\"ACL\", \"SETUSER\", \"anakin\", \"+SET\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"SETUSER\", \"jarjar\", \"allcommands\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"ACL\", \"DRYRUN\", \"jarjar\", \"___SET\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Command '___SET' not found\"));\n  EXPECT_EQ(Run({\"ACL\", \"DRYRUN\", \"jarjar\", \"SET\"}), \"OK\");\n}\n\nTEST_F(AclFamilyTest, TestAclLogUB) {\n  auto resp = Run({\"ACL\", \"LOG\"});\n  EXPECT_TRUE(resp.GetVec().empty());\n\n  resp = Run({\"ACL\", \"LOG\", \"2\", \"RESET\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR index out of range\"));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/acl/acl_log.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/acl/acl_log.h\"\n\n#include <chrono>\n#include <iterator>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"server/conn_context.h\"\n\nABSL_FLAG(uint32_t, acllog_max_len, 32,\n          \"Specify the number of log entries. Logs are kept locally for each thread \"\n          \"and therefore the total number of entries are acllog_max_len * threads\");\n\nnamespace dfly::acl {\n\nAclLog::AclLog() : total_entries_allowed_(absl::GetFlag(FLAGS_acllog_max_len)) {\n}\n\nvoid AclLog::Add(const ConnectionContext& cntx, std::string object, Reason reason,\n                 std::string tried_to_auth) {\n  if (total_entries_allowed_ == 0) {\n    return;\n  }\n\n  if (log_.size() == total_entries_allowed_) {\n    log_.pop_back();\n  }\n\n  std::string username;\n  // We can't use a conditional here because the result is the common type which is a const-ref\n  if (tried_to_auth.empty()) {\n    username = cntx.authed_username;\n  } else {\n    username = std::move(tried_to_auth);\n  }\n\n  std::string client_info = cntx.conn()->GetClientInfo();\n  using clock = std::chrono::system_clock;\n  LogEntry entry = {std::move(username), std::move(client_info), std::move(object), reason,\n                    clock::now()};\n  log_.push_front(std::move(entry));\n}\n\nvoid AclLog::Reset() {\n  log_.clear();\n}\n\nAclLog::LogType AclLog::GetLog(size_t number_of_entries) const {\n  auto start = log_.begin();\n  auto end = log_.size() <= number_of_entries ? log_.end() : std::next(start, number_of_entries);\n  return {start, end};\n}\n\nvoid AclLog::SetTotalEntries(size_t total_entries) {\n  if (log_.size() > total_entries) {\n    log_.erase(std::next(log_.begin(), total_entries), log_.end());\n  }\n\n  total_entries_allowed_ = total_entries;\n}\n\n}  // namespace dfly::acl\n"
  },
  {
    "path": "src/server/acl/acl_log.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <chrono>\n#include <deque>\n#include <string>\n\nnamespace dfly {\n\nclass ConnectionContext;\n\nnamespace acl {\n\nclass AclLog {\n public:\n  explicit AclLog();\n\n  enum class Reason { COMMAND, AUTH, KEY, PUB_SUB };\n\n  struct LogEntry {\n    std::string username;\n    std::string client_info;\n    std::string object;\n    Reason reason;\n    using TimePoint = std::chrono::time_point<std::chrono::system_clock>;\n    TimePoint entry_creation = TimePoint::max();\n\n    friend bool operator<(const LogEntry& lhs, const LogEntry& rhs) {\n      return lhs.entry_creation < rhs.entry_creation;\n    }\n  };\n\n  void Add(const ConnectionContext& cntx, std::string object, Reason reason,\n           std::string tried_to_auth = \"\");\n  void Reset();\n\n  using LogType = std::deque<LogEntry>;\n\n  LogType GetLog(size_t number_of_entries) const;\n\n  void SetTotalEntries(size_t total_entries);\n\n private:\n  LogType log_;\n  size_t total_entries_allowed_;\n};\n\n}  // namespace acl\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/acl/user.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/acl/user.h\"\n\n#include <openssl/sha.h>\n\n#include <limits>\n\n#include \"absl/container/flat_hash_set.h\"\n#include \"absl/strings/escaping.h\"\n#include \"core/overloaded.h\"\n\nnamespace dfly::acl {\n\nnamespace {\nstd::string StringSHA256(std::string_view password) {\n  std::string hash;\n  hash.resize(SHA256_DIGEST_LENGTH);\n  SHA256(reinterpret_cast<const unsigned char*>(password.data()), password.size(),\n         reinterpret_cast<unsigned char*>(hash.data()));\n  return hash;\n}\n\n}  // namespace\n\nUser::User() {\n  commands_ = std::vector<uint64_t>(NumberOfFamilies(), 0);\n}\n\nvoid User::Update(UpdateRequest&& req, const CategoryToIdxStore& cat_to_id,\n                  const ReverseCategoryIndexTable& reverse_cat,\n                  const CategoryToCommandsIndexStore& cat_to_commands) {\n  for (auto& pass : req.passwords) {\n    if (pass.nopass) {\n      SetNopass();\n      continue;\n    }\n    if (pass.unset) {\n      UnsetPassword(pass.password);\n      continue;\n    }\n    if (pass.reset_password) {\n      password_hashes_.clear();\n      continue;\n    }\n    SetPasswordHash(pass.password, pass.is_hashed);\n  }\n\n  auto cat_visitor = [&, this](UpdateRequest::CategoryValueType cat) {\n    auto [sign, category] = cat;\n    if (sign == Sign::PLUS) {\n      SetAclCategoriesAndIncrSeq(category, cat_to_id, reverse_cat, cat_to_commands);\n      return;\n    }\n    UnsetAclCategoriesAndIncrSeq(category, cat_to_id, reverse_cat, cat_to_commands);\n  };\n\n  auto cmd_visitor = [this](UpdateRequest::CommandsValueType cmd) {\n    auto [sign, index, bit_index] = cmd;\n    if (sign == Sign::PLUS) {\n      SetAclCommandsAndIncrSeq(index, bit_index);\n      return;\n    }\n    UnsetAclCommandsAndIncrSeq(index, bit_index);\n  };\n\n  Overloaded visitor{cat_visitor, cmd_visitor};\n\n  for (auto req : req.updates) {\n    std::visit(visitor, req);\n  }\n\n  if (!req.keys.empty()) {\n    SetKeyGlobs(std::move(req.keys));\n  }\n\n  if (!req.pub_sub.empty()) {\n    SetPubSub(std::move(req.pub_sub));\n  }\n\n  if (req.is_active) {\n    SetIsActive(*req.is_active);\n  }\n\n  SetSelectDb(req.select_db);\n\n  SetNamespace(req.ns);\n}\n\nvoid User::SetPasswordHash(std::string_view password, bool is_hashed) {\n  nopass_ = false;\n  if (is_hashed) {\n    std::string binary;\n    if (absl::HexStringToBytes(password, &binary)) {\n      password_hashes_.insert(binary);\n    } else {\n      LOG(ERROR) << \"Invalid password hash: \" << password;\n    }\n    return;\n  }\n  password_hashes_.insert(StringSHA256(password));\n}\n\nvoid User::UnsetPassword(std::string_view password) {\n  password_hashes_.erase(StringSHA256(password));\n}\n\nvoid User::SetNamespace(const std::string& ns) {\n  namespace_ = ns;\n}\n\nvoid User::SetSelectDb(std::optional<size_t> db) {\n  if (db) {\n    db_ = *db;\n  }\n}\n\nsize_t User::Db() const {\n  return db_;\n}\n\nconst std::string& User::Namespace() const {\n  return namespace_;\n}\n\nbool User::HasPassword(std::string_view password) const {\n  if (nopass_) {\n    return true;\n  }\n  return password_hashes_.contains(StringSHA256(password));\n}\n\nvoid User::SetAclCategoriesAndIncrSeq(uint32_t cat, const CategoryToIdxStore& cat_to_id,\n                                      const ReverseCategoryIndexTable& reverse_cat,\n                                      const CategoryToCommandsIndexStore& cat_to_commands) {\n  acl_categories_ |= cat;\n  if (cat == acl::ALL) {\n    SetAclCommands(std::numeric_limits<size_t>::max(), 0);\n  } else {\n    auto id = cat_to_id.at(cat);\n    std::string_view name = reverse_cat[id];\n    const auto& commands_group = cat_to_commands.at(name);\n    for (size_t fam_id = 0; fam_id < commands_group.size(); ++fam_id) {\n      SetAclCommands(fam_id, commands_group[fam_id]);\n    }\n  }\n\n  CategoryChange change{cat};\n  cat_changes_[change] = ChangeMetadata{Sign::PLUS, seq_++};\n}\n\nvoid User::UnsetAclCategoriesAndIncrSeq(uint32_t cat, const CategoryToIdxStore& cat_to_id,\n                                        const ReverseCategoryIndexTable& reverse_cat,\n                                        const CategoryToCommandsIndexStore& cat_to_commands) {\n  acl_categories_ ^= cat;\n  if (cat == acl::ALL) {\n    UnsetAclCommands(std::numeric_limits<size_t>::max(), 0);\n  } else {\n    auto id = cat_to_id.at(cat);\n    std::string_view name = reverse_cat[id];\n    const auto& commands_group = cat_to_commands.at(name);\n    for (size_t fam_id = 0; fam_id < commands_group.size(); ++fam_id) {\n      UnsetAclCommands(fam_id, commands_group[fam_id]);\n    }\n  }\n\n  CategoryChange change{cat};\n  cat_changes_[change] = ChangeMetadata{Sign::MINUS, seq_++};\n}\n\nvoid User::SetAclCommands(size_t index, uint64_t bit_index) {\n  if (index == std::numeric_limits<size_t>::max()) {\n    for (auto& family : commands_) {\n      family = ALL_COMMANDS;\n    }\n    return;\n  }\n  commands_[index] |= bit_index;\n}\n\nvoid User::SetAclCommandsAndIncrSeq(size_t index, uint64_t bit_index) {\n  SetAclCommands(index, bit_index);\n  CommandChange change{index, bit_index};\n  cmd_changes_[change] = ChangeMetadata{Sign::PLUS, seq_++};\n}\n\nvoid User::UnsetAclCommands(size_t index, uint64_t bit_index) {\n  if (index == std::numeric_limits<size_t>::max()) {\n    for (auto& family : commands_) {\n      family = NONE_COMMANDS;\n    }\n    return;\n  }\n  SetAclCommands(index, bit_index);\n  commands_[index] ^= bit_index;\n}\n\nvoid User::UnsetAclCommandsAndIncrSeq(size_t index, uint64_t bit_index) {\n  UnsetAclCommands(index, bit_index);\n  CommandChange change{index, bit_index};\n  cmd_changes_[change] = ChangeMetadata{Sign::MINUS, seq_++};\n}\n\nuint32_t User::AclCategory() const {\n  return acl_categories_;\n}\n\nstd::vector<uint64_t> User::AclCommands() const {\n  return commands_;\n}\n\nconst std::vector<uint64_t>& User::AclCommandsRef() const {\n  return commands_;\n}\n\nvoid User::SetIsActive(bool is_active) {\n  is_active_ = is_active;\n}\n\nbool User::IsActive() const {\n  return is_active_;\n}\n\nconst absl::flat_hash_set<std::string>& User::Passwords() const {\n  return password_hashes_;\n}\n\nbool User::HasNopass() const {\n  return nopass_;\n}\n\nconst AclKeys& User::Keys() const {\n  return keys_;\n}\n\nconst AclPubSub& User::PubSub() const {\n  return pub_sub_;\n}\n\nconst User::CategoryChanges& User::CatChanges() const {\n  return cat_changes_;\n}\n\nconst User::CommandChanges& User::CmdChanges() const {\n  return cmd_changes_;\n}\n\nvoid User::SetKeyGlobs(std::vector<UpdateKey> keys) {\n  for (auto& key : keys) {\n    if (key.all_keys) {\n      keys_.key_globs.clear();\n      keys_.all_keys = true;\n    } else if (key.reset_keys) {\n      keys_.key_globs.clear();\n      keys_.all_keys = false;\n    } else {\n      keys_.key_globs.push_back({std::move(key.key), key.op});\n    }\n  }\n}\n\nvoid User::SetPubSub(std::vector<UpdatePubSub> pub_sub) {\n  for (auto& pattern : pub_sub) {\n    if (pattern.all_channels) {\n      pub_sub_.globs.clear();\n      pub_sub_.all_channels = true;\n    } else if (pattern.reset_channels) {\n      pub_sub_.globs.clear();\n      pub_sub_.all_channels = false;\n    } else {\n      pub_sub_.globs.push_back({std::move(pattern.pattern), pattern.has_asterisk});\n    }\n  }\n}\n\nvoid User::SetNopass() {\n  nopass_ = true;\n  password_hashes_.clear();\n}\n\n}  // namespace dfly::acl\n"
  },
  {
    "path": "src/server/acl/user.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <limits>\n#include <optional>\n#include <string>\n#include <string_view>\n#include <tuple>\n#include <utility>\n#include <vector>\n\n#include \"absl/container/flat_hash_map.h\"\n#include \"absl/container/flat_hash_set.h\"\n#include \"absl/hash/hash.h\"\n#include \"server/acl/acl_commands_def.h\"\n\nnamespace dfly::acl {\n\nclass User final {\n public:\n  enum class Sign : int8_t { PLUS, MINUS };\n\n  struct UpdateKey {\n    std::string key;\n    KeyOp op;\n    bool all_keys = false;\n    bool reset_keys = false;\n  };\n\n  struct UpdatePass {\n    std::string password;\n    // Set to denote remove password\n    bool unset{false};\n    bool nopass{false};\n    bool reset_password{false};\n    bool is_hashed{false};\n  };\n\n  struct UpdatePubSub {\n    std::string pattern;\n    bool has_asterisk{false};\n    bool all_channels{false};\n    bool reset_channels{false};\n  };\n\n  struct UpdateRequest {\n    std::vector<UpdatePass> passwords;\n\n    std::optional<bool> is_active{};\n\n    bool is_hashed{false};\n\n    // Categories and commands\n    using CategoryValueType = std::pair<Sign, uint32_t>;\n    // If index s numberic_limits::max() then it's a +all flag\n    using CommandsValueType = std::tuple<Sign, size_t /*index*/, uint64_t /*bit*/>;\n    using UpdateType = std::vector<std::variant<CategoryValueType, CommandsValueType>>;\n    UpdateType updates;\n\n    // keys\n    std::vector<UpdateKey> keys;\n    bool reset_all_keys{false};\n    bool allow_all_keys{false};\n\n    // pub/sub\n    std::vector<UpdatePubSub> pub_sub;\n    bool reset_channels{false};\n    bool all_channels{false};\n\n    // TODO allow reset all\n    // bool reset_all{false};\n\n    // DFLY specific\n    std::optional<size_t> select_db;\n    std::string ns;\n  };\n\n  using CategoryChange = uint32_t;\n  using CommandChange = std::pair<size_t, uint64_t>;\n\n  struct ChangeMetadata {\n    Sign sign;\n    size_t seq_no;\n  };\n\n  /* Used for default user\n   * password = nopass\n   * acl_categories = +@all\n   * is_active = true;\n   */\n  User();\n\n  User(const User&) = delete;\n  User(User&&) = default;\n\n  // For single step updates\n  void Update(UpdateRequest&& req, const CategoryToIdxStore& cat_to_id,\n              const ReverseCategoryIndexTable& reverse_cat,\n              const CategoryToCommandsIndexStore& cat_to_commands);\n\n  bool HasPassword(std::string_view password) const;\n\n  uint32_t AclCategory() const;\n\n  std::vector<uint64_t> AclCommands() const;\n  const std::vector<uint64_t>& AclCommandsRef() const;\n\n  bool IsActive() const;\n\n  const absl::flat_hash_set<std::string>& Passwords() const;\n\n  bool HasNopass() const;\n\n  // Selector maps a command string (like HSET, SET etc) to\n  // its respective ID within the commands vector.\n  static size_t Selector(std::string_view);\n\n  const AclKeys& Keys() const;\n\n  const AclPubSub& PubSub() const;\n\n  const std::string& Namespace() const;\n\n  size_t Db() const;\n\n  using CategoryChanges = absl::flat_hash_map<CategoryChange, ChangeMetadata>;\n  using CommandChanges = absl::flat_hash_map<CommandChange, ChangeMetadata>;\n\n  const CategoryChanges& CatChanges() const;\n\n  const CommandChanges& CmdChanges() const;\n\n private:\n  void SetAclCategoriesAndIncrSeq(uint32_t cat, const CategoryToIdxStore& cat_to_id,\n                                  const ReverseCategoryIndexTable& reverse_cat,\n                                  const CategoryToCommandsIndexStore& cat_to_commands);\n  void UnsetAclCategoriesAndIncrSeq(uint32_t cat, const CategoryToIdxStore& cat_to_id,\n                                    const ReverseCategoryIndexTable& reverse_cat,\n                                    const CategoryToCommandsIndexStore& cat_to_commands);\n\n  // For ACL commands\n  void SetAclCommands(size_t index, uint64_t bit_index);\n  void UnsetAclCommands(size_t index, uint64_t bit_index);\n\n  void SetAclCommandsAndIncrSeq(size_t index, uint64_t bit_index);\n  void UnsetAclCommandsAndIncrSeq(size_t index, uint64_t bit_index);\n\n  // For is_active flag\n  void SetIsActive(bool is_active);\n\n  // For passwords\n  void SetPasswordHash(std::string_view password, bool is_hashed);\n  void UnsetPassword(std::string_view password);\n\n  // For ACL key globs\n  void SetKeyGlobs(std::vector<UpdateKey> keys);\n\n  // For ACL pub/sub\n  void SetPubSub(std::vector<UpdatePubSub> pub_sub);\n\n  void SetNamespace(const std::string& ns);\n\n  void SetSelectDb(std::optional<size_t> db);\n\n  // Set NOPASS and remove all passwords\n  void SetNopass();\n\n  // Passwords for each user\n  absl::flat_hash_set<std::string> password_hashes_;\n  // if `nopass` is used\n  bool nopass_ = false;\n\n  uint32_t acl_categories_{NONE};\n  // Each element index in the vector corresponds to a familly of commands\n  // Each bit in the uin64_t field at index id, corresponds to a specific\n  // command of that family. Look on TableCommandBuilder and on Service::Register\n  // on how this mapping is built during the startup/registration of commands\n  std::vector<uint64_t> commands_;\n\n  // We also need to track all the explicit changes (ACL SETUSER) of acl's in-order.\n  // To speed up insertion we use the flat_hash_map and a seq_ variable which is a\n  // strictly monotonically increasing number that is used for ordering. Both of these\n  // indexers are merged and then sorted by the seq_ number when for example we print\n  // the ACL rules of each user via ACL LIST.\n  CategoryChanges cat_changes_;\n  CommandChanges cmd_changes_;\n  // Global modification order for changes in rules for acl commands and categories\n  size_t seq_ = 0;\n\n  // Glob patterns for the keys that a user is allowed to read/write\n  AclKeys keys_;\n\n  // Glob patterns for pub/sub channels\n  AclPubSub pub_sub_;\n\n  // if the user is on/off\n  bool is_active_{false};\n\n  std::string namespace_;\n\n  // if db == std::numeric_limits<size_t>::max() then all db's.\n  // Otherwise user restricted to the value of db_\n  size_t db_{std::numeric_limits<size_t>::max()};\n};\n\n}  // namespace dfly::acl\n"
  },
  {
    "path": "src/server/acl/user_registry.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/acl/user_registry.h\"\n\n#include <limits>\n#include <mutex>\n\n#include \"base/flags.h\"\n#include \"facade/facade_types.h\"\n#include \"server/acl/acl_commands_def.h\"\n\nABSL_DECLARE_FLAG(std::string, requirepass);\n\nusing namespace util;\n\nnamespace dfly::acl {\n\nvoid UserRegistry::MaybeAddAndUpdate(std::string_view username, User::UpdateRequest req) {\n  std::unique_lock<fb2::SharedMutex> lock(mu_);\n  auto& user = registry_[username];\n  user.Update(std::move(req), *cat_to_id_table_, *reverse_cat_table_, *cat_to_commands_table_);\n}\n\nbool UserRegistry::RemoveUser(std::string_view username) {\n  std::unique_lock<fb2::SharedMutex> lock(mu_);\n  return registry_.erase(username);\n}\n\nUserCredentials UserRegistry::GetCredentials(std::string_view username) const {\n  std::shared_lock<fb2::SharedMutex> lock(mu_);\n  auto it = registry_.find(username);\n  if (it == registry_.end()) {\n    return {};\n  }\n  auto& user = it->second;\n  return {user.AclCategory(), user.AclCommands(), user.Keys(),\n          user.PubSub(),      user.Namespace(),   user.Db()};\n}\n\nbool UserRegistry::IsUserActive(std::string_view username) const {\n  std::shared_lock<fb2::SharedMutex> lock(mu_);\n  auto it = registry_.find(username);\n  if (it == registry_.end()) {\n    return false;\n  }\n  return it->second.IsActive();\n}\n\nbool UserRegistry::AuthUser(std::string_view username, std::string_view password) const {\n  std::shared_lock<fb2::SharedMutex> lock(mu_);\n  const auto& user = registry_.find(username);\n  if (user == registry_.end()) {\n    return false;\n  }\n\n  return user->second.IsActive() && user->second.HasPassword(password);\n}\n\nUserRegistry::RegistryViewWithLock UserRegistry::GetRegistryWithLock() const {\n  std::shared_lock<fb2::SharedMutex> lock(mu_);\n  return {std::move(lock), registry_};\n}\n\nUserRegistry::RegistryWithWriteLock UserRegistry::GetRegistryWithWriteLock() {\n  std::unique_lock<fb2::SharedMutex> lock(mu_);\n  return {std::move(lock), registry_};\n}\n\nUserRegistry::UserWithWriteLock::UserWithWriteLock(std::unique_lock<fb2::SharedMutex> lk,\n                                                   const User& user, bool exists)\n    : user(user), exists(exists), registry_lk_(std::move(lk)) {\n}\n\nUser::UpdateRequest UserRegistry::DefaultUserUpdateRequest() const {\n  // Assign field by field to supress an annoying compiler warning\n  User::UpdateRequest req;\n  req.passwords = std::vector<User::UpdatePass>{{\"\", false, true}};\n  req.is_active = true;\n  req.updates = {std::pair<User::Sign, uint32_t>{User::Sign::PLUS, acl::ALL}};\n  req.keys = {User::UpdateKey{\"~*\", KeyOp::READ_WRITE, true, false}};\n  req.pub_sub = {User::UpdatePubSub{\"\", false, true, false}};\n  return req;\n}\n\nvoid UserRegistry::Init(const CategoryToIdxStore* cat_to_id_table,\n                        const ReverseCategoryIndexTable* reverse_cat_table,\n                        const CategoryToCommandsIndexStore* cat_to_commands_table) {\n  // if there exists an acl file to load from, requirepass\n  // will not overwrite the default's user password loaded from\n  // that file. Loading the default's user password from a file\n  // has higher priority than the deprecated flag\n  cat_to_id_table_ = cat_to_id_table;\n  reverse_cat_table_ = reverse_cat_table;\n  cat_to_commands_table_ = cat_to_commands_table;\n  auto default_user = DefaultUserUpdateRequest();\n  auto maybe_password = absl::GetFlag(FLAGS_requirepass);\n  if (!maybe_password.empty()) {\n    default_user.passwords.front().password = std::move(maybe_password);\n    default_user.passwords.front().nopass = false;\n  } else if (const char* env_var = getenv(\"DFLY_PASSWORD\"); env_var) {\n    default_user.passwords.front().password = env_var;\n    default_user.passwords.front().nopass = false;\n  } else if (const char* env_var = getenv(\"DFLY_requirepass\"); env_var) {\n    default_user.passwords.front().password = env_var;\n    default_user.passwords.front().nopass = false;\n  }\n  MaybeAddAndUpdate(\"default\", std::move(default_user));\n}\n\n}  // namespace dfly::acl\n"
  },
  {
    "path": "src/server/acl/user_registry.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <algorithm>\n#include <shared_mutex>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"server/acl/user.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly::acl {\n\nclass UserRegistry {\n private:\n  template <template <typename T> typename LockT, typename RegT> class RegistryWithLock;\n\n public:\n  UserRegistry() = default;\n\n  UserRegistry(const UserRegistry&) = delete;\n  UserRegistry(UserRegistry&&) = delete;\n\n  void Init(const CategoryToIdxStore* cat_to_id_table,\n            const ReverseCategoryIndexTable* reverse_cat_table,\n            const CategoryToCommandsIndexStore* cat_to_commands_table);\n\n  using RegistryType = absl::flat_hash_map<std::string, User>;\n\n  // Acquires a write lock of mu_\n  // If the user with name `username` does not exist, it's added in the store with\n  // the exact fields found in req\n  // If the user exists, the bitfields are updated with a `logical and` operation\n  void MaybeAddAndUpdate(std::string_view username, User::UpdateRequest req);\n\n  // Acquires a write lock on mu_\n  // Removes user from the store\n  // kills already existing connections from the removed user\n  bool RemoveUser(std::string_view username);\n\n  // Acquires a read lock\n  UserCredentials GetCredentials(std::string_view username) const;\n\n  // Acquires a read lock\n  bool IsUserActive(std::string_view username) const;\n\n  // Acquires a read lock\n  bool AuthUser(std::string_view username, std::string_view password) const;\n\n  using RegistryViewWithLock = RegistryWithLock<std::shared_lock, const RegistryType&>;\n  using RegistryWithWriteLock = RegistryWithLock<std::unique_lock, RegistryType&>;\n\n  // Helper function used for printing users via ACL LIST\n  RegistryViewWithLock GetRegistryWithLock() const;\n\n  // Helper function to propagate a write lock outside the registry's scope\n  RegistryWithWriteLock GetRegistryWithWriteLock();\n\n  // Helper class for accessing a user with a ReadLock outside the scope of UserRegistry\n  class UserWithWriteLock {\n   public:\n    UserWithWriteLock(std::unique_lock<util::fb2::SharedMutex> lk, const User& user, bool exists);\n    const User& user;\n    const bool exists;\n\n   private:\n    std::unique_lock<util::fb2::SharedMutex> registry_lk_;\n  };\n\n  User::UpdateRequest DefaultUserUpdateRequest() const;\n\n private:\n  RegistryType registry_;\n  mutable util::fb2::SharedMutex mu_;\n\n  // Helper class for accessing the registry with a ReadLock outside the scope of UserRegistry\n  template <template <typename T> typename LockT, typename RegT> class RegistryWithLock {\n   public:\n    RegistryWithLock(LockT<util::fb2::SharedMutex> lk, RegT reg)\n        : registry(reg), registry_lk_(std::move(lk)) {\n    }\n    RegT registry;\n\n   private:\n    LockT<util::fb2::SharedMutex> registry_lk_;\n  };\n\n  const CategoryToIdxStore* cat_to_id_table_;\n  const ReverseCategoryIndexTable* reverse_cat_table_;\n  const CategoryToCommandsIndexStore* cat_to_commands_table_;\n};\n\n}  // namespace dfly::acl\n"
  },
  {
    "path": "src/server/acl/validator.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/acl/validator.h\"\n\n#include <absl/strings/numbers.h>\n\n#include \"base/logging.h\"\n#include \"core/glob_matcher.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/server_state.h\"\n#include \"server/transaction.h\"\n\nnamespace dfly::acl {\n\nnamespace {\n\nbool Matches(std::string_view pattern, std::string_view target) {\n  GlobMatcher matcher(pattern, true);\n  return matcher.Matches(target);\n};\n\nbool ValidateCommand(const std::vector<uint64_t>& acl_commands, const CommandId& id) {\n  const size_t index = id.GetFamily();\n  const uint64_t command_mask = id.GetBitIndex();\n  DCHECK_LT(index, acl_commands.size());\n\n  return (acl_commands[index] & command_mask) != 0;\n}\n\n[[nodiscard]] std::pair<bool, AclLog::Reason> IsPubSubCommandAuthorized(\n    bool literal_match, const std::vector<uint64_t>& acl_commands, const AclPubSub& pub_sub,\n    CmdArgList tail_args, const CommandId& id) {\n  if (!ValidateCommand(acl_commands, id)) {\n    return {false, AclLog::Reason::COMMAND};\n  }\n\n  auto iterate_globs = [&](std::string_view target) {\n    for (auto& [glob, has_asterisk] : pub_sub.globs) {\n      if (literal_match && (glob == target)) {\n        return true;\n      }\n      if (!literal_match && Matches(glob, target)) {\n        return true;\n      }\n    }\n    return false;\n  };\n\n  bool allowed = true;\n  if (!pub_sub.all_channels) {\n    std::string_view name = id.name();\n    if (name == \"PUBLISH\" || name == \"SPUBLISH\") {\n      auto channel = tail_args[0];\n      allowed &= iterate_globs(facade::ToSV(channel));\n    } else {\n      for (auto channel : tail_args) {\n        allowed &= iterate_globs(facade::ToSV(channel));\n      }\n    }\n  }\n\n  return {allowed, AclLog::Reason::PUB_SUB};\n}\n\n}  // namespace\n\n[[nodiscard]] bool IsUserAllowedToInvokeCommand(const ConnectionContext& cntx, const CommandId& id,\n                                                ArgSlice tail_args) {\n  if (cntx.skip_acl_validation) {\n    return true;\n  }\n\n  if (id.IsAlias()) {\n    return false;\n  }\n\n  std::pair<bool, AclLog::Reason> auth_res;\n\n  if (auto pkind = id.PubSubKind(); pkind) {\n    bool is_pattern = *pkind == CO::PubSubKind::PATTERN;\n    auth_res =\n        IsPubSubCommandAuthorized(is_pattern, cntx.acl_commands, cntx.pub_sub, tail_args, id);\n  } else {\n    auth_res = IsUserAllowedToInvokeCommandGeneric(cntx, id, tail_args);\n  }\n\n  const auto [is_authed, reason] = auth_res;\n\n  if (!is_authed) {\n    auto& log = ServerState::tlocal()->acl_log;\n    log.Add(cntx, std::string(id.name()), reason);\n  }\n\n  return is_authed;\n}\n\n[[nodiscard]] std::pair<bool, AclLog::Reason> IsUserAllowedToInvokeCommandGeneric(\n    const ConnectionContext& cntx, const CommandId& id, CmdArgList tail_args) {\n  const size_t max = std::numeric_limits<size_t>::max();\n  // Once we support ranges this must change\n  const bool reject_move_command = cntx.acl_db_idx != max && id.name() == \"MOVE\";\n  const bool reject_trans_command =\n      cntx.acl_db_idx != max && cntx.acl_db_idx != cntx.db_index() && id.IsTransactional();\n  if (reject_move_command || reject_trans_command) {\n    return {false, AclLog::Reason::AUTH};\n  }\n  size_t res = 0;\n  if (tail_args.size() == 1 && id.name() == \"SELECT\" && absl::SimpleAtoi(tail_args[0], &res) &&\n      cntx.acl_db_idx != max && cntx.acl_db_idx != res) {\n    return {false, AclLog::Reason::AUTH};\n  }\n\n  const auto& acl_commands = cntx.acl_commands;\n  const auto& keys = cntx.keys;\n  if (!ValidateCommand(acl_commands, id)) {\n    return {false, AclLog::Reason::COMMAND};\n  }\n\n  const bool is_read_command = id.IsReadOnly();\n  const bool is_write_command = id.IsJournaled();\n\n  auto iterate_globs = [&](auto target) {\n    for (auto& [elem, op] : keys.key_globs) {\n      if (Matches(elem, target)) {\n        if (is_read_command && (op == KeyOp::READ || op == KeyOp::READ_WRITE)) {\n          return true;\n        }\n        if (is_write_command && (op == KeyOp::WRITE || op == KeyOp::READ_WRITE)) {\n          return true;\n        }\n      }\n    }\n    return false;\n  };\n\n  bool keys_allowed = true;\n  if (!keys.all_keys && id.first_key_pos() != 0 && (is_read_command || is_write_command)) {\n    auto keys_index = DetermineKeys(&id, tail_args);\n    DCHECK(keys_index);\n\n    for (std::string_view key : keys_index->Range(tail_args))\n      keys_allowed &= iterate_globs(key);\n  }\n\n  return {keys_allowed, AclLog::Reason::KEY};\n}\n\n}  // namespace dfly::acl\n"
  },
  {
    "path": "src/server/acl/validator.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <utility>\n\n#include \"facade/facade_types.h\"\n#include \"server/acl/acl_log.h\"\n#include \"server/command_registry.h\"\n\nnamespace dfly::acl {\n\nstruct AclKeys;\nstruct AclPubSub;\n\nstd::pair<bool, AclLog::Reason> IsUserAllowedToInvokeCommandGeneric(const ConnectionContext& cntx,\n                                                                    const CommandId& id,\n                                                                    facade::CmdArgList tail_args);\n\nbool IsUserAllowedToInvokeCommand(const ConnectionContext& cntx, const CommandId& id,\n                                  facade::CmdArgList tail_args);\n}  // namespace dfly::acl\n"
  },
  {
    "path": "src/server/bitops_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/strings/ascii.h>\n#include <absl/strings/match.h>\n\n#include <nonstd/expected.hpp>\n\n#include \"base/logging.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/op_status.h\"\n#include \"facade/reply_builder.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/command_families.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/namespaces.h\"\n#include \"server/transaction.h\"\n#include \"src/core/overloaded.h\"\n#include \"util/varz.h\"\n\nnamespace dfly {\nusing namespace facade;\nusing namespace std;\n\nnamespace {\n\nusing ShardStringResults = vector<OpResult<string>>;\nconst int32_t OFFSET_FACTOR = 8;  // number of bits in byte\nconst char* OR_OP_NAME = \"OR\";\nconst char* XOR_OP_NAME = \"XOR\";\nconst char* AND_OP_NAME = \"AND\";\nconst char* NOT_OP_NAME = \"NOT\";\n\nusing BitsStrVec = vector<string>;\n\n// The following is the list of the functions that would handle the\n// commands that handle the bit operations\nvoid BitPos(CmdArgList args, CommandContext* cmd_cntx);\nvoid BitCount(CmdArgList args, CommandContext* cmd_cntx);\nvoid BitField(CmdArgList args, CommandContext* cmd_cntx);\nvoid BitFieldRo(CmdArgList args, CommandContext* cmd_cntx);\nvoid BitOp(CmdArgList args, CommandContext* cmd_cntx);\nvoid GetBit(CmdArgList args, CommandContext* cmd_cntx);\nvoid SetBit(CmdArgList args, CommandContext* cmd_cntx);\n\nOpResult<string> ReadValue(const DbContext& context, string_view key, EngineShard* shard);\nOpResult<bool> ReadValueBitsetAt(const OpArgs& op_args, string_view key, uint32_t offset);\nOpResult<std::size_t> CountBitsForValue(const OpArgs& op_args, string_view key, int64_t start,\n                                        int64_t end, bool bit_value);\nOpResult<int64_t> FindFirstBitWithValue(const OpArgs& op_args, string_view key, bool value,\n                                        int64_t start, int64_t end, bool as_bit);\nstring GetString(const PrimeValue& pv);\nbool SetBitValue(uint32_t offset, bool bit_value, string* entry);\nstd::size_t CountBitSetByByteIndices(string_view at, std::size_t start, std::size_t end);\nstd::size_t CountBitSet(string_view str, int64_t start, int64_t end, bool bits);\nstd::size_t CountBitSetByBitIndices(string_view at, std::size_t start, std::size_t end);\nstring RunBitOperationOnValues(string_view op, const BitsStrVec& values);\n\n// ------------------------------------------------------------------------- //\n\n// This function can be used for any case where we allowing out of bound\n// access where the default in this case would be 0 -such as bitop\nuint8_t GetByteAt(string_view s, std::size_t at) {\n  return at >= s.size() ? 0 : s[at];\n}\n\n// For XOR, OR, AND operations on a collection of bytes\ntemplate <typename BitOp, typename SkipOp>\nstring BitOpString(BitOp operation_f, SkipOp skip_f, const BitsStrVec& values, string new_value) {\n  // at this point, values are not empty\n  std::size_t max_size = new_value.size();\n\n  if (values.size() > 1) {\n    for (std::size_t i = 0; i < max_size; i++) {\n      std::uint8_t new_entry = operation_f(GetByteAt(values[0], i), GetByteAt(values[1], i));\n      for (std::size_t j = 2; j < values.size(); ++j) {\n        new_entry = operation_f(new_entry, GetByteAt(values[j], i));\n        if (skip_f(new_entry)) {\n          break;\n        }\n      }\n      new_value[i] = new_entry;\n    }\n    return new_value;\n  } else {\n    return values[0];\n  }\n}\n\n// Helper functions to support operations\n// so we would not need to check which\n// operations to run in the look (unlike\n// https://github.com/redis/redis/blob/c2b0c13d5c0fab49131f6f5e844f80bfa43f6219/src/bitops.c#L607)\nconstexpr bool SkipAnd(uint8_t byte) {\n  return byte == 0x0;\n}\n\nconstexpr bool SkipOr(uint8_t byte) {\n  return byte == 0xff;\n}\n\nconstexpr bool SkipXor(uint8_t) {\n  return false;\n}\n\nconstexpr uint8_t AndOp(uint8_t left, uint8_t right) {\n  return left & right;\n}\n\nconstexpr uint8_t OrOp(uint8_t left, uint8_t right) {\n  return left | right;\n}\n\nconstexpr uint8_t XorOp(uint8_t left, uint8_t right) {\n  return left ^ right;\n}\n\nstring BitOpNotString(string from) {\n  std::transform(from.begin(), from.end(), from.begin(), [](auto c) { return ~c; });\n  return from;\n}\n\n//  Bits manipulation functions\nconstexpr int32_t GetBitIndex(uint32_t offset) noexcept {\n  return offset % OFFSET_FACTOR;\n}\n\nconstexpr int32_t GetNormalizedBitIndex(uint32_t offset) noexcept {\n  return (OFFSET_FACTOR - 1) - GetBitIndex(offset);\n}\n\nconstexpr int32_t GetByteIndex(uint32_t offset) noexcept {\n  return offset / OFFSET_FACTOR;\n}\n\nuint8_t GetByteValue(string_view str, uint32_t offset) {\n  return static_cast<uint8_t>(str[GetByteIndex(offset)]);\n}\n\nconstexpr bool CheckBitStatus(uint8_t byte, uint32_t offset) {\n  return byte & (0x1 << offset);\n}\n\nconstexpr std::uint8_t CountBitsRange(std::uint8_t byte, std::uint8_t from, uint8_t to) {\n  int count = 0;\n  for (int i = from; i < to; i++) {\n    count += CheckBitStatus(byte, GetNormalizedBitIndex(i));\n  }\n  return count;\n}\n\n// Count the number of bits that are on, on bytes boundaries: i.e. Start and end are the indices for\n// bytes locations inside str CountBitSetByByteIndices\nstd::size_t CountBitSetByByteIndices(string_view at, std::size_t start, std::size_t end) {\n  if (start >= end) {\n    return 0;\n  }\n  end = std::min(end, at.size());  // don't overflow\n  std::uint32_t count =\n      std::accumulate(std::next(at.begin(), start), std::next(at.begin(), end), 0,\n                      [](auto counter, uint8_t ch) { return counter + absl::popcount(ch); });\n  return count;\n}\n\n// Count the number of bits that are on, on bits boundaries: i.e. Start and end are the indices for\n// bits locations inside str\nstd::size_t CountBitSetByBitIndices(string_view at, std::size_t start, std::size_t end) {\n  auto first_byte_index = GetByteIndex(start);\n  auto last_byte_index = GetByteIndex(end);\n  if (start % OFFSET_FACTOR == 0 && end % OFFSET_FACTOR == 0) {\n    return CountBitSetByByteIndices(at, first_byte_index, last_byte_index);\n  }\n  const auto last_bit_first_byte =\n      first_byte_index != last_byte_index ? OFFSET_FACTOR : GetBitIndex(end);\n  const auto first_byte = GetByteValue(at, start);\n  std::uint32_t count = CountBitsRange(first_byte, GetBitIndex(start), last_bit_first_byte);\n  if (first_byte_index < last_byte_index) {\n    first_byte_index++;\n    const auto last_byte = GetByteValue(at, end);\n    count += CountBitsRange(last_byte, 0, GetBitIndex(end));\n    count += CountBitSetByByteIndices(at, first_byte_index, last_byte_index);\n  }\n  return count;\n}\n\n// Returns normalized offset of `offset` in `size`. `size` is assumed to be a size of a container,\n// and as such the returned value is always in the range [0, size]. If `offset` is negative, it is\n// treated as an offset from the end and is normalized to be a positive offset from the start.\nint64_t NormalizedOffset(int64_t size, int64_t offset) {\n  if (offset < 0) {\n    offset = size + offset;\n  }\n  return std::min(std::max(offset, int64_t{0}), size);\n}\n\n// General purpose function to count the number of bits that are on.\n// The parameters for start, end and bits are defaulted to the start of the string,\n// end of the string and bits are false.\n// Note that when bits is false, it means that we are looking on byte boundaries.\nstd::size_t CountBitSet(string_view str, int64_t start, int64_t end, bool bits) {\n  const int64_t strlen = bits ? str.size() * OFFSET_FACTOR : str.size();\n\n  if (start < 0)\n    start = strlen + start;\n  if (end < 0)\n    end = strlen + end;\n\n  end = min(end, strlen);\n\n  if (strlen == 0 || start > end)\n    return 0;\n\n  start = max(start, int64_t(0));\n  end = max(end, int64_t(0));\n\n  ++end;\n  return bits ? CountBitSetByBitIndices(str, start, end)\n              : CountBitSetByByteIndices(str, start, end);\n}\n\n// return true if bit is on\nbool GetBitValue(const string& entry, uint32_t offset) {\n  const auto byte_val{GetByteValue(entry, offset)};\n  const auto index{GetNormalizedBitIndex(offset)};\n  return CheckBitStatus(byte_val, index);\n}\n\nconstexpr uint8_t TurnBitOn(uint8_t on, uint32_t offset) {\n  return on |= 1 << offset;\n}\n\nconstexpr uint8_t TurnBitOff(uint8_t on, uint32_t offset) {\n  return on &= ~(1 << offset);\n}\n\nbool SetBitValue(uint32_t offset, bool bit_value, string* entry) {\n  // we need to return the old value after setting the value for offset\n  const auto old_value{GetBitValue(*entry, offset)};  // save this as the return value\n  auto byte{GetByteValue(*entry, offset)};\n  const auto bit_index{GetNormalizedBitIndex(offset)};\n  byte = bit_value ? TurnBitOn(byte, bit_index) : TurnBitOff(byte, bit_index);\n  (*entry)[GetByteIndex(offset)] = byte;\n  return old_value;\n}\n\n// ------------------------------------------------------------------------- //\n\nclass ElementAccess {\n private:\n  string_view key_;\n  DbContext context_;\n  mutable DbSlice::ItAndUpdater updater_;\n\n public:\n  ElementAccess(string_view key, const OpArgs& args) : key_{key}, context_{args.db_cntx} {\n  }\n\n  /* If allow_wrong_type = true - it still finds the element even if it's WRONG_TYPE. This is used\n     for blind updates. See BITOP operation. */\n  OpStatus Find(bool allow_wrong_type);\n\n  bool IsNewEntry() const {\n    return updater_.is_new;\n  }\n\n  string Value() const;\n\n  bool GetByteAtIndex(size_t idx, uint8_t* res) const;\n  void SetByteAtIndex(size_t idx, uint8_t value) const;\n\n  void Commit(string_view new_value) const;\n\n  // return nullopt when key exists but it's not encoded as string\n  // return true if key exists and false if it doesn't\n  std::optional<bool> Exists();\n};\n\nstd::optional<bool> ElementAccess::Exists() {\n  auto& db_slice = context_.ns->GetCurrentDbSlice();\n  auto res = db_slice.FindReadOnly(context_, key_, OBJ_STRING);\n  if (res.status() == OpStatus::WRONG_TYPE) {\n    return {};\n  }\n  return res.status() != OpStatus::KEY_NOTFOUND;\n}\n\nOpStatus ElementAccess::Find(bool allow_wrong_type) {\n  auto& db_slice = context_.ns->GetCurrentDbSlice();\n  // If we allow wrong type, we use nullopt to indicate that we don't care about the type.\n  auto op_res = db_slice.AddOrFind(\n      context_, key_, allow_wrong_type ? std::nullopt : std::optional<unsigned>{OBJ_STRING});\n  RETURN_ON_BAD_STATUS(op_res);\n  auto& add_res = *op_res;\n\n  updater_ = std::move(add_res);\n\n  return OpStatus::OK;\n}\n\nstring ElementAccess::Value() const {\n  return IsNewEntry() ? string{} : GetString(updater_.it->second);\n}\n\nbool ElementAccess::GetByteAtIndex(size_t idx, uint8_t* res) const {\n  DCHECK(!IsNewEntry());\n  return updater_.it->second.GetByteAtIndex(idx, res);\n}\n\nvoid ElementAccess::SetByteAtIndex(size_t idx, uint8_t val) const {\n  DCHECK(!IsNewEntry());\n  DCHECK_LT(idx, updater_.it->second.Size());\n  auto [success, _] = updater_.it->second.SetByteAtIndex(idx, val);\n  if (success) {\n    updater_.post_updater.Run();\n  }\n}\n\nvoid ElementAccess::Commit(string_view new_value) const {\n  if (new_value.empty()) {\n    if (!IsNewEntry()) {\n      updater_.post_updater.Run();\n    } else {\n      // No need to run, it was a new entry and it got removed\n      updater_.post_updater.Cancel();\n    }\n    context_.ns->GetCurrentDbSlice().Del(context_, updater_.it);\n  } else {\n    if (!IsNewEntry() && updater_.it->second.ObjType() != OBJ_STRING) {\n      updater_.post_updater.ReduceHeapUsage();\n    }\n    updater_.it->second.SetString(new_value);\n    updater_.post_updater.Run();\n  }\n}\n\n// =============================================\n// Set a new value to a given bit\n\nOpResult<bool> BitNewValue(const OpArgs& args, string_view key, uint32_t offset, bool bit_value) {\n  ElementAccess element_access{key, args};\n  auto& db_slice = args.GetDbSlice();\n  DCHECK(db_slice.IsDbValid(args.db_cntx.db_index));\n  bool old_value = false;\n\n  auto find_res = element_access.Find(false);\n\n  if (find_res != OpStatus::OK) {\n    VLOG(1) << \"Find failed for key: \" << key << \" with error: \" << find_res;\n    return find_res;\n  }\n\n  const size_t byte_index = GetByteIndex(offset);\n\n  // Create a new entry\n  if (element_access.IsNewEntry()) {\n    VLOG(2) << \"Creating new key: \" << key << \" with size: \" << (byte_index + 1) << \" bytes\";\n    string new_entry(byte_index + 1, 0);\n    old_value = SetBitValue(offset, bit_value, &new_entry);\n    element_access.Commit(new_entry);\n    return old_value;\n  }\n\n  // Get byte where bit offset is located. If offset is out of bound it means\n  // that we need to extend the string otherwise we just update.\n  uint8_t existing_byte;\n  if (element_access.GetByteAtIndex(byte_index, &existing_byte)) {\n    VLOG(2) << \"Updating key: \" << key << \" at byte index: \" << byte_index;\n    uint32_t bit_index = GetNormalizedBitIndex(offset);\n    old_value = CheckBitStatus(existing_byte, bit_index);\n    if (old_value != bit_value) {\n      existing_byte =\n          bit_value ? TurnBitOn(existing_byte, bit_index) : TurnBitOff(existing_byte, bit_index);\n      element_access.SetByteAtIndex(byte_index, existing_byte);\n    }\n  } else {\n    VLOG(2) << \"Extending key: \" << key << \" to \" << (byte_index + 1) << \" bytes\";\n    string existing_entry{element_access.Value()};\n    existing_entry.resize(byte_index + 1, 0);\n    SetBitValue(offset, bit_value, &existing_entry);\n    // We always need to commit the extended key\n    element_access.Commit(existing_entry);\n  }\n\n  return old_value;\n}\n\n// ---------------------------------------------------------\n\nstring RunBitOperationOnValues(string_view op, const BitsStrVec& values) {\n  // This function accept an operation (either OR, XOR, NOT or OR), and run bit operation\n  // on all the values we got from the database. Note that in case that one of the values\n  // is shorter than the other it would return a 0 and the operation would continue\n  // until we ran the longest value. The function will return the resulting new value\n  std::size_t max_len = 0;\n  std::size_t max_len_index = 0;\n\n  const auto BitOperation = [&]() {\n    if (op == OR_OP_NAME) {\n      string default_str{values[max_len_index]};\n      return BitOpString(OrOp, SkipOr, values, std::move(default_str));\n    } else if (op == XOR_OP_NAME) {\n      return BitOpString(XorOp, SkipXor, values, string(max_len, 0));\n    } else if (op == AND_OP_NAME) {\n      return BitOpString(AndOp, SkipAnd, values, string(max_len, 0));\n    } else if (op == NOT_OP_NAME) {\n      return BitOpNotString(values[0]);\n    } else {\n      LOG(FATAL) << \"Operation not supported '\" << op << \"'\";\n      return string{};  // otherwise we will have warning of not returning value\n    }\n  };\n\n  if (values.empty()) {  // this is ok in case we don't have the src keys\n    return string{};\n  }\n  // The new result is the max length input\n  max_len = values[0].size();\n  for (std::size_t i = 1; i < values.size(); ++i) {\n    if (values[i].size() > max_len) {\n      max_len = values[i].size();\n      max_len_index = i;\n    }\n  }\n  return BitOperation();\n}\n\nOpResult<string> CombineResultOp(ShardStringResults result, string_view op) {\n  // take valid result for each shard\n  BitsStrVec values;\n  for (auto&& res : result) {\n    if (res) {\n      auto v = res.value();\n      values.emplace_back(std::move(v));\n    } else {\n      if (res.status() != OpStatus::KEY_NOTFOUND) {\n        // something went wrong, just bale out\n        return res;\n      }\n    }\n  }\n\n  // and combine them to single result\n  return RunBitOperationOnValues(op, values);\n}\n\n// For bitop not - we cannot accumulate\nOpResult<string> RunBitOpNot(const OpArgs& op_args, string_view key) {\n  // if we found the value, just return, if not found then skip, otherwise report an error\n  DbSlice& db_slice = op_args.GetDbSlice();\n  auto find_res = db_slice.FindReadOnly(op_args.db_cntx, key, OBJ_STRING);\n  if (find_res) {\n    return GetString(find_res.value()->second);\n  } else {\n    return find_res.status();\n  }\n}\n\n// Read only operation where we are running the bit operation on all the\n// values that belong to same shard.\nOpResult<string> RunBitOpOnShard(string_view op, const OpArgs& op_args, ShardArgs::Iterator start,\n                                 ShardArgs::Iterator end) {\n  DCHECK(start != end);\n  if (op == NOT_OP_NAME) {\n    return RunBitOpNot(op_args, *start);\n  }\n\n  DbSlice& db_slice = op_args.GetDbSlice();\n  BitsStrVec values;\n\n  // collect all the value for this shard\n  for (; start != end; ++start) {\n    auto find_res = db_slice.FindReadOnly(op_args.db_cntx, *start, OBJ_STRING);\n    if (find_res) {\n      values.emplace_back(GetString(find_res.value()->second));\n    } else {\n      if (find_res.status() == OpStatus::KEY_NOTFOUND) {\n        continue;  // this is allowed, just return empty string per Redis\n      } else {\n        return find_res.status();\n      }\n    }\n  }\n  // Run the operation on all the values that we found\n  string op_result = RunBitOperationOnValues(op, values);\n  return op_result;\n}\n\ntemplate <typename T>\nvoid HandleOpValueResult(const OpResult<T>& result, SinkReplyBuilder* builder) {\n  static_assert(std::is_integral<T>::value,\n                \"we are only handling types that are integral types in the return types from \"\n                \"here\");\n  if (result) {\n    builder->SendLong(result.value());\n  } else {\n    switch (result.status()) {\n      case OpStatus::WRONG_TYPE:\n        builder->SendError(kWrongTypeErr);\n        break;\n      case OpStatus::OUT_OF_MEMORY:\n        builder->SendError(kOutOfMemory);\n        break;\n      default:\n        builder->SendLong(0);  // in case we don't have the value we should just send 0\n        break;\n    }\n  }\n}\n\n// ------------------------------------------------------------------------- //\n//  Impl for the command functions\nvoid BitPos(CmdArgList args, CommandContext* cmd_cntx) {\n  // Support for the command BITPOS\n  // See details at https://redis.io/commands/bitpos/\n  auto* builder = cmd_cntx->rb();\n  if (args.size() < 1 || args.size() > 5) {\n    return builder->SendError(kSyntaxErr);\n  }\n\n  string_view key = ArgS(args, 0);\n\n  int32_t value{0};\n  int64_t start = 0;\n  int64_t end = std::numeric_limits<int64_t>::max();\n  bool as_bit = false;\n\n  if (!absl::SimpleAtoi(ArgS(args, 1), &value)) {\n    return builder->SendError(kInvalidIntErr);\n  } else if (value != 0 && value != 1) {\n    return builder->SendError(\"The bit argument must be 1 or 0\");\n  }\n\n  if (args.size() >= 3) {\n    if (!absl::SimpleAtoi(ArgS(args, 2), &start)) {\n      return builder->SendError(kInvalidIntErr);\n    }\n\n    if (args.size() >= 4) {\n      if (!absl::SimpleAtoi(ArgS(args, 3), &end)) {\n        return builder->SendError(kInvalidIntErr);\n      }\n\n      if (args.size() >= 5) {\n        string arg = absl::AsciiStrToUpper(ArgS(args, 4));\n        if (arg == \"BIT\") {\n          as_bit = true;\n        } else if (arg == \"BYTE\") {\n          as_bit = false;\n        } else {\n          return builder->SendError(kSyntaxErr);\n        }\n      }\n    }\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return FindFirstBitWithValue(t->GetOpArgs(shard), key, value, start, end, as_bit);\n  };\n  OpResult<int64_t> res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  HandleOpValueResult(res, builder);\n}\n\nvoid BitCount(CmdArgList args, CommandContext* cmd_cntx) {\n  // Support for the command BITCOUNT\n  // See details at https://redis.io/commands/bitcount/\n  // Please note that if the key don't exists, it would return 0\n\n  CmdArgParser parser(args);\n  auto key = parser.Next<string_view>();\n\n  std::pair<int64_t, int64_t> start_end;\n  if (parser.HasNext()) {\n    auto tuple_result = parser.Next<int64_t, int64_t>();\n    start_end = std::make_pair(std::get<0>(tuple_result), std::get<1>(tuple_result));\n  } else {\n    start_end = std::make_pair(0, std::numeric_limits<int64_t>::max());\n  }\n\n  bool as_bit = parser.HasNext() ? parser.MapNext(\"BYTE\", false, \"BIT\", true) : false;\n  if (!parser.Finalize()) {\n    return cmd_cntx->SendError(parser.TakeError().MakeReply());\n  }\n  auto cb = [&, start_end](Transaction* t, EngineShard* shard) {\n    return CountBitsForValue(t->GetOpArgs(shard), key, start_end.first, start_end.second, as_bit);\n  };\n  OpResult<std::size_t> res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  HandleOpValueResult(res, cmd_cntx->rb());\n}\n\n// GCC yields a wrong warning about uninitialized optional use\n#ifndef __clang__\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wmaybe-uninitialized\"\n#endif\n\nenum class EncodingType { UINT, INT, NILL };\n\nstruct CommonAttributes {\n  EncodingType type;\n  size_t encoding_bit_size;\n  size_t offset;\n};\n\n// We either return the result of the subcommand (int64_t) or nullopt\n// to represent overflow/underflow failures\nusing ResultType = std::optional<int64_t>;\n\nstruct Overflow {\n  enum Policy { WRAP, SAT, FAIL };\n\n  // Used to check for unsigned overflow/underflow.\n  // If incr is non zero, we check for overflows in the expression incr + *value\n  // If incr is zero, we check for overflows in the expression *value\n  // If the overflow fails because of Policy::FAIL, it returns false. Otherwise, true.\n  // The result of handling the overflow is stored in the pointer value\n  bool UIntOverflow(int64_t incr, size_t total_bits, int64_t* value) const;\n\n  // Used to check for signed overflow/underflow.\n  // If incr is non zero, we check for overflows in the expression incr + *value\n  // If incr is zero, we check for overflows in the expression *value\n  // If the overflow fails because of Policy::FAIL, it returns false. Otherwise, true.\n  // The result of handling the overflow is stored in the pointer value\n  bool IntOverflow(size_t total_bits, int64_t incr, bool add, int64_t* value) const;\n\n  Policy type = WRAP;\n};\n\nbool Overflow::UIntOverflow(int64_t incr, size_t total_bits, int64_t* value) const {\n  // total up to 63 bits -- we do not support 64 bit unsigned\n  const uint64_t max = (1UL << total_bits) - 1;\n\n  uint64_t incr_value = incr;\n  if (incr_value + *value > max) {\n    switch (type) {\n      case Overflow::WRAP:\n        // safe to do, won't overflow, both incr and value are <= than 2^63 - 1\n        *value = (incr_value + *value) & max;\n        break;\n      case Overflow::SAT:\n        *value = max;\n        break;\n      case Overflow::FAIL:\n        *value = 0;\n        return false;\n    }\n    return true;\n  }\n\n  *value = incr_value + *value;\n  return true;\n}\n\nbool Overflow::IntOverflow(size_t total_bits, int64_t incr, bool add, int64_t* value) const {\n  // This is exactly how redis handles signed overflow and we use the exact same chore\n  const int64_t int_max = std::numeric_limits<int64_t>::max();\n  const int64_t max = (total_bits == 64) ? int_max : ((1L << (total_bits - 1)) - 1);\n  const int64_t min = (-max) - 1;\n  auto switch_overflow = [&](int64_t wrap_case, int64_t sat_case, int64_t i) {\n    switch (type) {\n      case Overflow::WRAP: {\n        uint64_t msb = 1UL << (total_bits - 1);\n        uint64_t a = *value, b = incr;\n        // Perform addition as unsigned so that's defined\n        uint64_t c = a + b;\n        if (total_bits < 64) {\n          uint64_t mask = static_cast<uint64_t>(-1) << total_bits;\n          if (c & msb) {\n            c |= mask;\n          } else {\n            c &= ~mask;\n          }\n        }\n        *value = c;\n        break;\n      }\n      case Overflow::SAT:\n        *value = sat_case;\n        break;\n      case Overflow::FAIL:\n        *value = 0;\n        return false;\n    }\n    return true;\n  };\n\n  // maxincr/minincr can overflow but it won't be an issue because we only use them\n  // after checking 'value' range, so when they are used no overflow\n  // happens. 'uint64_t' cast is there just to prevent undefined behavior on\n  // overflow */\n  int64_t maxincr = static_cast<uint64_t>(max) - *value;\n  int64_t minincr = min - *value;\n\n  // overflow\n  if (*value > max || (total_bits != 64 && incr > maxincr) ||\n      (*value >= 0 && incr > 0 && incr > maxincr)) {\n    return switch_overflow(min, max, 1);\n  }\n\n  // underflow\n  if (*value < min || (total_bits != 64 && incr < minincr) ||\n      (*value < 0 && incr < 0 && incr < minincr)) {\n    return switch_overflow(max, min, -1);\n  }\n\n  *value = *value + incr;\n\n  return true;\n}\n\nclass Get {\n public:\n  explicit Get(CommonAttributes attr) : attr_(attr) {\n  }\n\n  // Apply the GET subcommand to the bitfield bytes.\n  // Return either the subcommand result (int64_t) or empty optional if failed because of\n  // Policy:FAIL\n  ResultType ApplyTo(Overflow ov, const string* bitfield) const;\n\n private:\n  CommonAttributes attr_;\n};\n\nResultType Get::ApplyTo(Overflow ov, const string* bitfield) const {\n  const auto& bytes = *bitfield;\n  const int32_t total_bytes = static_cast<int32_t>(bytes.size());\n  const size_t offset = attr_.offset;\n  auto last_byte_offset = GetByteIndex(attr_.offset + attr_.encoding_bit_size - 1);\n\n  if (GetByteIndex(offset) >= total_bytes) {\n    return 0;\n  }\n\n  const string* result_str = bitfield;\n  string buff;\n  uint32_t lsb = attr_.offset + attr_.encoding_bit_size - 1;\n  if (last_byte_offset >= total_bytes) {\n    buff = *bitfield;\n    buff.resize(last_byte_offset + 1, 0);\n    result_str = &buff;\n  }\n\n  const bool is_negative =\n      CheckBitStatus(GetByteValue(bytes, offset), GetNormalizedBitIndex(offset));\n\n  int64_t result = 0;\n  for (size_t i = 0; i < attr_.encoding_bit_size; ++i) {\n    uint8_t byte{GetByteValue(*result_str, lsb)};\n    int32_t index = GetNormalizedBitIndex(lsb);\n    int64_t old_bit = CheckBitStatus(byte, index);\n    result |= old_bit << i;\n    --lsb;\n  }\n\n  if (is_negative && attr_.type == EncodingType::INT && result > 0) {\n    result |= -1L ^ ((1L << attr_.encoding_bit_size) - 1);\n  }\n\n  return result;\n}\n\nclass Set {\n public:\n  explicit Set(CommonAttributes attr, int64_t value) : attr_(attr), set_value_(value) {\n  }\n\n  // Apply the SET subcommand to the bitfield value.\n  // Return either the subcommand result (int64_t) or empty optional if failed because of\n  // Policy:FAIL Updates the bitfield to contain the new value\n  ResultType ApplyTo(Overflow ov, string* bitfield);\n\n private:\n  // Helper function that delegates overflow checking to the Overflow object\n  bool HandleOverflow(Overflow ov);\n\n  CommonAttributes attr_;\n  int64_t set_value_;\n};\n\nResultType Set::ApplyTo(Overflow ov, string* bitfield) {\n  string& bytes = *bitfield;\n  const int32_t total_bytes = static_cast<int32_t>(bytes.size());\n  auto last_byte_offset = GetByteIndex(attr_.offset + attr_.encoding_bit_size - 1) + 1;\n  const size_t offset = attr_.offset;\n  if (last_byte_offset > total_bytes) {\n    bytes.resize(last_byte_offset, 0);\n  }\n\n  if (!HandleOverflow(ov)) {\n    return {};\n  }\n\n  uint32_t lsb = attr_.offset + attr_.encoding_bit_size - 1;\n  int64_t old_value = 0;\n\n  const bool is_negative =\n      CheckBitStatus(GetByteValue(*bitfield, offset), GetNormalizedBitIndex(offset));\n  for (size_t i = 0; i < attr_.encoding_bit_size; ++i) {\n    bool bit_value = (set_value_ >> i) & 0x01;\n    uint8_t byte{GetByteValue(bytes, lsb)};\n    int32_t index = GetNormalizedBitIndex(lsb);\n    int64_t old_bit = CheckBitStatus(byte, index);\n    byte = bit_value ? TurnBitOn(byte, index) : TurnBitOff(byte, index);\n    bytes[GetByteIndex(lsb)] = byte;\n    old_value |= old_bit << i;\n    --lsb;\n  }\n\n  if (is_negative && attr_.type == EncodingType::INT && old_value > 0) {\n    // Sign extension for negative signed integers.\n    // Is creates a mask that sets all upper bits to 1\n    // and converts positive old_value (15) to correct negative value (-1)\n    // Example: 4-bit field 1111 should be -1, not 15.\n    old_value |= -1L ^ ((1L << attr_.encoding_bit_size) - 1);\n  }\n\n  return old_value;\n}\n\nbool Set::HandleOverflow(Overflow ov) {\n  size_t total_bits = attr_.encoding_bit_size;\n  if (attr_.type == EncodingType::UINT) {\n    return ov.UIntOverflow(0, attr_.encoding_bit_size, &set_value_);\n  }\n\n  return ov.IntOverflow(total_bits, 0, false, &set_value_);\n}\n\nclass IncrBy {\n public:\n  explicit IncrBy(CommonAttributes attr, int64_t val) : attr_(attr), incr_value_(val) {\n  }\n\n  // Apply the INCRBY subcommand to the bitfield value.\n  // Return either the subcommand result (int64_t) or empty optional if failed because of\n  // Policy:FAIL Updates the bitfield to contain the new incremented value\n  ResultType ApplyTo(Overflow ov, string* bitfield);\n\n private:\n  // Helper function that delegates overflow checking to the Overflow object\n  bool HandleOverflow(Overflow ov, int64_t* previous);\n\n  CommonAttributes attr_;\n  int64_t incr_value_;\n};\n\nResultType IncrBy::ApplyTo(Overflow ov, string* bitfield) {\n  string& bytes = *bitfield;\n  Get get(attr_);\n  auto res = get.ApplyTo(ov, &bytes);\n  const int32_t total_bytes = static_cast<int32_t>(bytes.size());\n  auto last_byte_offset = GetByteIndex(attr_.offset + attr_.encoding_bit_size - 1);\n\n  if (last_byte_offset >= total_bytes) {\n    bytes.resize(last_byte_offset + 1, 0);\n  }\n\n  if (!HandleOverflow(ov, &*res)) {\n    return {};\n  }\n\n  Set set(attr_, *res);\n  set.ApplyTo(ov, &bytes);\n  return *res;\n}\n\nbool IncrBy::HandleOverflow(Overflow ov, int64_t* previous) {\n  if (attr_.type == EncodingType::UINT) {\n    return ov.UIntOverflow(incr_value_, attr_.encoding_bit_size, previous);\n  }\n\n  const size_t total_bits = attr_.encoding_bit_size;\n  return ov.IntOverflow(total_bits, incr_value_, true, previous);\n}\n\n// Subcommand types for each of the subcommands of the BITFIELD command\nusing Command = std::variant<Get, Set, Overflow, IncrBy>;\n\nusing Result = std::optional<ResultType>;\n\n// Visitor for all the subcommand variants. Calls ApplyTo, to execute the subcommand\nclass CommandApplyVisitor {\n public:\n  explicit CommandApplyVisitor(string bitfield) : bitfield_(std::move(bitfield)) {\n  }\n\n  Result operator()(Get get) {\n    return get.ApplyTo(overflow_, &bitfield_);\n  }\n\n  template <typename T> Result operator()(T update) {\n    should_commit_ = true;\n    return update.ApplyTo(overflow_, &bitfield_);\n  }\n\n  Result operator()(Overflow overflow) {\n    overflow_ = overflow;\n    return {};\n  }\n\n  string_view Bitfield() const {\n    return bitfield_;\n  }\n\n  bool ShouldCommit() const {\n    return should_commit_;\n  }\n\n private:\n  // Most recent overflow object encountered. We cache it to make the overflow\n  // policy changes stick among different subcommands\n  Overflow overflow_;\n  // This will be commited if it was updated\n  string bitfield_;\n  // If either of the subcommands SET|INCRBY is used we should persist the changes.\n  // Otherwise, we only used a read only subcommand (GET)\n  bool should_commit_ = false;\n};\n\n// A lit of subcommands used in BITFIELD command\nusing CommandList = vector<Command>;\n\n// Helper class used in the shard cb that abstracts away the iteration and execution of subcommands\nclass StateExecutor {\n public:\n  explicit StateExecutor(ElementAccess access) : access_{std::move(access)} {\n  }\n\n  //  Iterates over all of the parsed subcommands and executes them one by one. At the end,\n  //  if an update subcommand SET|INCRBY was used, commit back the changes via the ElementAccess\n  //  object\n  OpResult<vector<ResultType>> Execute(const CommandList& commands);\n\n private:\n  ElementAccess access_;\n};\n\nOpResult<vector<ResultType>> StateExecutor::Execute(const CommandList& commands) {\n  auto res = access_.Exists();\n  if (!res) {\n    return {OpStatus::WRONG_TYPE};\n  }\n  string value;\n  if (*res) {\n    access_.Find(false);\n    value = access_.Value();\n  }\n\n  vector<ResultType> results;\n  CommandApplyVisitor visitor(std::move(value));\n  for (auto& command : commands) {\n    auto res = std::visit(visitor, command);\n    if (res) {\n      results.push_back(*res);\n    }\n  }\n\n  if (visitor.ShouldCommit()) {\n    access_.Find(false);\n    access_.Commit(visitor.Bitfield());\n  }\n\n  return results;\n}\n\nconst char kInvalidBitfieldTypeErr[] =\n    \"invalid bitfield type. use something like i16 u8. note that u64 is not supported but i64 is.\";\n\nnonstd::expected<CommonAttributes, string> ParseCommonAttr(CmdArgParser* parser) {\n  CommonAttributes parsed;\n  using nonstd::make_unexpected;\n\n  auto [encoding, offset_str] = parser->Next<string_view, string_view>();\n\n  if (encoding.empty()) {\n    return make_unexpected(kSyntaxErr);\n  }\n\n  // Check case-sensitivity - only lowercase 'u' and 'i' are allowed\n  if (encoding[0] == 'u') {\n    parsed.type = EncodingType::UINT;\n  } else if (encoding[0] == 'i') {\n    parsed.type = EncodingType::INT;\n  } else {\n    return make_unexpected(kInvalidBitfieldTypeErr);\n  }\n\n  string_view bits = encoding.substr(1);\n\n  // Additional validation: check if bits part contains any invalid characters\n  for (char c : bits) {\n    if (!std::isdigit(c)) {\n      return make_unexpected(kInvalidBitfieldTypeErr);\n    }\n  }\n\n  if (!absl::SimpleAtoi(bits, &parsed.encoding_bit_size)) {\n    return make_unexpected(kSyntaxErr);\n  }\n\n  if (parsed.encoding_bit_size <= 0 || parsed.encoding_bit_size > 64) {\n    return make_unexpected(kInvalidBitfieldTypeErr);\n  }\n\n  if (parsed.encoding_bit_size == 64 && parsed.type == EncodingType::UINT) {\n    return make_unexpected(kInvalidBitfieldTypeErr);\n  }\n\n  bool is_proxy = false;\n  if (absl::StartsWith(offset_str, \"#\")) {\n    offset_str = offset_str.substr(1);\n    is_proxy = true;\n  }\n  if (!absl::SimpleAtoi(offset_str, &parsed.offset)) {\n    return make_unexpected(kSyntaxErr);\n  }\n  if (is_proxy) {\n    parsed.offset = parsed.offset * parsed.encoding_bit_size;\n  }\n  return parsed;\n}\n\n// Parses a list of arguments (without key) to a CommandList.\n// Returns the CommandList if the parsing completed succefully or string\n// to indicate an error\nnonstd::expected<CommandList, string> ParseToCommandList(CmdArgList args, bool read_only) {\n  enum class Cmds { OVERFLOW_OPT, GET_OPT, SET_OPT, INCRBY_OPT };\n  CommandList result;\n\n  using nonstd::make_unexpected;\n\n  CmdArgParser parser(args);\n  while (parser.HasNext()) {\n    auto cmd = parser.MapNext(\"OVERFLOW\", Cmds::OVERFLOW_OPT, \"GET\", Cmds::GET_OPT, \"SET\",\n                              Cmds::SET_OPT, \"INCRBY\", Cmds::INCRBY_OPT);\n    if (parser.TakeError()) {\n      return make_unexpected(kSyntaxErr);\n    }\n\n    if (cmd == Cmds::OVERFLOW_OPT) {\n      // BITFIELD_RO shouldn't support this cmd, but it is ignored in Valkey so we ignore it too\n      using pol = Overflow::Policy;\n      auto res = parser.MapNext(\"SAT\", pol::SAT, \"WRAP\", pol::WRAP, \"FAIL\", pol::FAIL);\n      if (!parser.HasError()) {\n        result.push_back(Overflow{res});\n        continue;\n      }\n      parser.TakeError();\n      return make_unexpected(kSyntaxErr);\n    }\n\n    auto maybe_attr = ParseCommonAttr(&parser);\n    if (!maybe_attr.has_value()) {\n      parser.TakeError();\n      return make_unexpected(std::move(maybe_attr.error()));\n    }\n\n    auto attr = maybe_attr.value();\n    if (cmd == Cmds::GET_OPT) {\n      result.push_back(Command(Get(attr)));\n      continue;\n    }\n\n    if (read_only) {\n      return make_unexpected(\"BITFIELD_RO only supports the GET subcommand\");\n    }\n\n    int64_t value = parser.Next<int64_t>();\n    if (parser.TakeError()) {\n      return make_unexpected(kSyntaxErr);\n    }\n    if (cmd == Cmds::SET_OPT) {\n      result.push_back(Command(Set(attr, value)));\n      continue;\n    }\n\n    if (cmd == Cmds::INCRBY_OPT) {\n      result.push_back(Command(IncrBy(attr, value)));\n      continue;\n    }\n    parser.TakeError();\n    return make_unexpected(kSyntaxErr);\n  }\n\n  return result;\n}\n\nvoid SendResults(const vector<ResultType>& results, SinkReplyBuilder* builder) {\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  const size_t total = results.size();\n  if (total == 0) {\n    rb->SendEmptyArray();\n    return;\n  }\n\n  RedisReplyBuilder::ArrayScope scope{rb, results.size()};\n  for (const auto& elem : results) {\n    if (elem)\n      rb->SendLong(*elem);\n    else\n      rb->SendNull();\n  }\n}\n\nvoid BitFieldGeneric(CmdArgList args, bool read_only, Transaction* tx, SinkReplyBuilder* builder) {\n  if (args.size() == 1) {\n    auto* rb = static_cast<RedisReplyBuilder*>(builder);\n    rb->SendEmptyArray();\n    return;\n  }\n  auto key = ArgS(args, 0);\n  auto maybe_ops_list = ParseToCommandList(args.subspan(1), read_only);\n\n  if (!maybe_ops_list.has_value()) {\n    builder->SendError(maybe_ops_list.error());\n    return;\n  }\n  CommandList cmd_list = std::move(maybe_ops_list.value());\n\n  auto cb = [&cmd_list, &key](Transaction* t, EngineShard* shard) -> OpResult<vector<ResultType>> {\n    StateExecutor executor(ElementAccess(key, t->GetOpArgs(shard)));\n    return executor.Execute(cmd_list);\n  };\n\n  OpResult<vector<ResultType>> res = tx->ScheduleSingleHopT(std::move(cb));\n\n  if (res == OpStatus::WRONG_TYPE) {\n    builder->SendError(kWrongTypeErr);\n    return;\n  }\n\n  SendResults(*res, builder);\n}\n\nvoid BitField(CmdArgList args, CommandContext* cmd_cntx) {\n  BitFieldGeneric(args, false, cmd_cntx->tx(), cmd_cntx->rb());\n}\n\nvoid BitFieldRo(CmdArgList args, CommandContext* cmd_cntx) {\n  BitFieldGeneric(args, true, cmd_cntx->tx(), cmd_cntx->rb());\n}\n\n#ifndef __clang__\n#pragma GCC diagnostic pop\n#endif\n\nvoid BitOp(CmdArgList args, CommandContext* cmd_cntx) {\n  static const std::array<string_view, 4> BITOP_OP_NAMES{OR_OP_NAME, XOR_OP_NAME, AND_OP_NAME,\n                                                         NOT_OP_NAME};\n  string op = absl::AsciiStrToUpper(ArgS(args, 0));\n  string_view dest_key = ArgS(args, 1);\n  bool illegal = std::none_of(BITOP_OP_NAMES.begin(), BITOP_OP_NAMES.end(),\n                              [&op](auto val) { return op == val; });\n\n  auto* builder = cmd_cntx->rb();\n  if (illegal || (op == NOT_OP_NAME && args.size() > 3)) {\n    return builder->SendError(kSyntaxErr);  // too many arguments\n  }\n\n  // Multi shard access - read only\n  ShardStringResults result_set(shard_set->size(), OpStatus::KEY_NOTFOUND);\n  ShardId dest_shard = Shard(dest_key, result_set.size());\n\n  auto shard_bitop = [&](Transaction* t, EngineShard* shard) {\n    ShardArgs largs = t->GetShardArgs(shard->shard_id());\n    DCHECK(!largs.Empty());\n    ShardArgs::Iterator start = largs.begin(), end = largs.end();\n    if (shard->shard_id() == dest_shard) {\n      CHECK_EQ(*start, dest_key);\n      ++start;\n      if (start == end) {  // no more keys to check\n        return OpStatus::OK;\n      }\n    }\n    OpArgs op_args = t->GetOpArgs(shard);\n    result_set[shard->shard_id()] = RunBitOpOnShard(op, op_args, start, end);\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(shard_bitop), false);  // we still have more work to do\n  // All result from each shard\n  const auto joined_results = CombineResultOp(result_set, op);\n  // Second phase - save to target key if successful\n  if (!joined_results) {\n    cmd_cntx->tx()->Conclude();\n    cmd_cntx->SendError(joined_results.status());\n    return;\n  } else {\n    auto op_result = joined_results.value();\n    auto store_cb = [&](Transaction* t, EngineShard* shard) {\n      if (shard->shard_id() == dest_shard) {\n        ElementAccess operation{dest_key, t->GetOpArgs(shard)};\n        auto find_res = operation.Find(true);\n\n        // BITOP command acts as a blind update. If the key existed and its type\n        // was not a string we still want to Commit with the new value.\n        if (find_res == OpStatus::OK || find_res == OpStatus::WRONG_TYPE) {\n          operation.Commit(op_result);\n\n          if (shard->journal()) {\n            if (op_result.empty()) {\n              // We need to delete it if the key exists. If it doesn't, we just\n              // skip it and do not send it to the replica at all.\n              if (!operation.IsNewEntry()) {\n                RecordJournal(t->GetOpArgs(shard), \"DEL\", {dest_key});\n              }\n            } else {\n              RecordJournal(t->GetOpArgs(shard), \"SET\", {dest_key, op_result});\n            }\n          }\n        }\n      }\n      return OpStatus::OK;\n    };\n\n    cmd_cntx->tx()->Execute(std::move(store_cb), true);\n    builder->SendLong(op_result.size());\n  }\n}\n\nvoid GetBit(CmdArgList args, CommandContext* cmd_cntx) {\n  // Support for the command \"GETBIT key offset\"\n  // see https://redis.io/commands/getbit/\n\n  uint32_t offset{0};\n  string_view key = ArgS(args, 0);\n\n  if (!absl::SimpleAtoi(ArgS(args, 1), &offset)) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return ReadValueBitsetAt(t->GetOpArgs(shard), key, offset);\n  };\n  OpResult<bool> res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  HandleOpValueResult(res, cmd_cntx->rb());\n}\n\nvoid SetBit(CmdArgList args, CommandContext* cmd_cntx) {\n  // Support for the command \"SETBIT key offset new_value\"\n  // see https://redis.io/commands/setbit/\n\n  CmdArgParser parser(args);\n  auto [key, offset, value] = parser.Next<string_view, uint32_t, FInt<0, 1>>();\n\n  if (auto err = parser.TakeError(); err) {\n    return cmd_cntx->SendError(err.MakeReply());\n  }\n\n  auto cb = [&, &key = key, &offset = offset, &value = value](Transaction* t, EngineShard* shard) {\n    return BitNewValue(t->GetOpArgs(shard), key, offset, value != 0);\n  };\n\n  OpResult<bool> res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  HandleOpValueResult(res, cmd_cntx->rb());\n}\n\n// ------------------------------------------------------------------------- //\n// This are the \"callbacks\" that we're using from above\nstring GetString(const PrimeValue& pv) {\n  string res;\n  pv.GetString(&res);\n  return res;\n}\n\nOpResult<bool> ReadValueBitsetAt(const OpArgs& op_args, string_view key, uint32_t offset) {\n  DbSlice& db_slice = op_args.GetDbSlice();\n  auto it_res = db_slice.FindReadOnly(op_args.db_cntx, key, OBJ_STRING);\n\n  if (!it_res.ok()) {\n    return it_res.status();\n  }\n\n  const PrimeValue& pv = it_res.value()->second;\n\n  uint8_t byte_value = 0;\n  if (!pv.GetByteAtIndex(GetByteIndex(offset), &byte_value)) {\n    return false;\n  }\n\n  const auto bit_index = GetNormalizedBitIndex(offset);\n  return CheckBitStatus(byte_value, bit_index);\n}\n\nOpResult<string> ReadValue(const DbContext& context, string_view key, EngineShard* shard) {\n  DbSlice& db_slice = context.GetDbSlice(shard->shard_id());\n  auto it_res = db_slice.FindReadOnly(context, key, OBJ_STRING);\n  if (!it_res.ok()) {\n    return it_res.status();\n  }\n\n  const PrimeValue& pv = it_res.value()->second;\n\n  return GetString(pv);\n}\n\nOpResult<std::size_t> CountBitsForValue(const OpArgs& op_args, string_view key, int64_t start,\n                                        int64_t end, bool bit_value) {\n  OpResult<string> result = ReadValue(op_args.db_cntx, key, op_args.shard);\n\n  if (result) {  // if this is not found, just return 0 - per Redis\n    return CountBitSet(result.value(), start, end, bit_value);\n  } else {\n    return result.status();\n  }\n}\n\n// Returns the bit position (where MSB is 0, LSB is 7) of the leftmost bit that\n// equals `value` in `byte`. Returns 8 if not found.\nstd::size_t GetFirstBitWithValueInByte(uint8_t byte, bool value) {\n  if (value) {\n    return absl::countl_zero(byte);\n  } else {\n    return absl::countl_one(byte);\n  }\n}\n\nint64_t FindFirstBitWithValueAsBit(string_view value_str, bool bit_value, int64_t start,\n                                   int64_t end) {\n  for (int64_t i = start; i <= end; ++i) {\n    if (static_cast<size_t>(GetByteIndex(i)) >= value_str.size()) {\n      break;\n    }\n    const uint8_t current_byte = GetByteValue(value_str, i);\n    bool current_bit = CheckBitStatus(current_byte, GetNormalizedBitIndex(i));\n    if (current_bit != bit_value) {\n      continue;\n    }\n\n    return i;\n  }\n\n  return -1;\n}\n\nint64_t FindFirstBitWithValueAsByte(string_view value_str, bool bit_value, int64_t start,\n                                    int64_t end) {\n  for (int64_t i = start; i <= end; ++i) {\n    if (static_cast<size_t>(i) >= value_str.size()) {\n      break;\n    }\n    const uint8_t current_byte = value_str[i];\n    const uint8_t kNotFoundByte = bit_value ? 0 : std::numeric_limits<uint8_t>::max();\n    if (current_byte == kNotFoundByte) {\n      continue;\n    }\n\n    return i * OFFSET_FACTOR + GetFirstBitWithValueInByte(current_byte, bit_value);\n  }\n\n  return -1;\n}\n\nOpResult<int64_t> FindFirstBitWithValue(const OpArgs& op_args, string_view key, bool bit_value,\n                                        int64_t start, int64_t end, bool as_bit) {\n  OpResult<string> value = ReadValue(op_args.db_cntx, key, op_args.shard);\n\n  // non-existent keys are handled exactly as in Redis's implementation,\n  // even though it contradicts its docs:\n  //     If a clear bit isn't found in the specified range, the function returns -1\n  //     as the user specified a clear range and there are no 0 bits in that range\n  if (!value) {\n    return bit_value ? -1 : 0;\n  }\n\n  string_view value_str = value.value();\n  int64_t size = value_str.size();\n  if (as_bit) {\n    size *= OFFSET_FACTOR;\n  }\n\n  int64_t normalized_start = NormalizedOffset(size, start);\n  int64_t normalized_end = NormalizedOffset(size, end);\n  if (normalized_start > normalized_end) {\n    return -1;  // Return -1 for negative ranges, per Redis\n  }\n\n  int64_t position;\n  if (as_bit) {\n    position = FindFirstBitWithValueAsBit(value_str, bit_value, normalized_start, normalized_end);\n  } else {\n    position = FindFirstBitWithValueAsByte(value_str, bit_value, normalized_start, normalized_end);\n  }\n\n  if (position == -1 && !bit_value && static_cast<size_t>(start) < value_str.size() &&\n      end == std::numeric_limits<int64_t>::max()) {\n    // Returning bit-size of the value, compatible with Redis (but is a weird API).\n    return value_str.size() * OFFSET_FACTOR;\n  } else {\n    return position;\n  }\n}\n\n}  // namespace\n\nvoid RegisterBitopsFamily(CommandRegistry* registry) {\n  using CI = CommandId;\n  registry->StartFamily(acl::BITMAP);\n  *registry << CI{\"BITPOS\", CO::CommandOpt::READONLY, -3, 1, 1}.SetHandler(&BitPos)\n            << CI{\"BITCOUNT\", CO::READONLY, -2, 1, 1}.SetHandler(&BitCount)\n            << CI{\"BITFIELD\", CO::JOURNALED, -2, 1, 1}.SetHandler(&BitField)\n            << CI{\"BITFIELD_RO\", CO::FAST | CO::READONLY, -2, 1, 1}.SetHandler(&BitFieldRo)\n            << CI{\"BITOP\", CO::JOURNALED | CO::NO_AUTOJOURNAL, -4, 2, -1}.SetHandler(&BitOp)\n            << CI{\"GETBIT\", CO::READONLY | CO::FAST, 3, 1, 1}.SetHandler(&GetBit)\n            << CI{\"SETBIT\", CO::JOURNALED | CO::DENYOOM, 4, 1, 1}.SetHandler(&SetBit);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/bitops_family_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include <bitset>\n#include <iomanip>\n#include <iostream>\n#include <limits>\n#include <string>\n#include <string_view>\n\n#include \"absl/strings/str_cat.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/test_utils.h\"\n#include \"server/transaction.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\nusing absl::StrCat;\n\nnamespace dfly {\n\nclass Bytes {\n  using char_t = std::uint8_t;\n  using string_type = std::basic_string<char_t>;\n\n public:\n  enum State { GOOD, ERROR, NIL };\n\n  Bytes(std::initializer_list<std::uint8_t> bytes) : data_(bytes.size(), 0) {\n    // note - we want this to be like its would be used in redis where most significate bit is to\n    // the \"left\"\n    std::copy(rbegin(bytes), rend(bytes), data_.begin());\n  }\n\n  explicit Bytes(unsigned long long n) : data_(sizeof(n), 0) {\n    FromNumber(n);\n  }\n\n  static Bytes From(unsigned long long x) {\n    return Bytes(x);\n  }\n\n  explicit Bytes(State state) : state_{state} {\n  }\n\n  Bytes(const char_t* ch, std::size_t len) : data_(ch, len) {\n  }\n\n  Bytes(const char* ch, std::size_t len) : Bytes(reinterpret_cast<const char_t*>(ch), len) {\n  }\n\n  explicit Bytes(std::string_view from) : Bytes(from.data(), from.size()) {\n  }\n\n  static Bytes From(RespExpr&& r);\n\n  std::size_t Size() const {\n    return data_.size();\n  }\n\n  operator std::string_view() const {\n    return std::string_view(reinterpret_cast<const char*>(data_.data()), Size());\n  }\n\n  std::ostream& Print(std::ostream& os) const;\n\n  std::ostream& PrintHex(std::ostream& os) const;\n\n private:\n  template <typename T> void FromNumber(T num) {\n    // note - we want this to be like its would be used in redis where most significate bit is to\n    // the \"left\"\n    std::size_t i = 0;\n    for (const char_t* s = reinterpret_cast<const char_t*>(&num); i < sizeof(T); s++, i++) {\n      data_[i] = *s;\n    }\n  }\n\n  string_type data_;\n  State state_ = GOOD;\n};\n\nBytes Bytes::From(RespExpr&& r) {\n  if (r.type == RespExpr::STRING) {\n    return Bytes(ToSV(r.GetBuf()));\n  } else {\n    if (r.type == RespExpr::NIL || r.type == RespExpr::NIL_ARRAY) {\n      return Bytes{Bytes::NIL};\n    } else {\n      return Bytes(Bytes::ERROR);\n    }\n  }\n}\n\nstd::ostream& Bytes::Print(std::ostream& os) const {\n  if (state_ == GOOD) {\n    for (auto c : data_) {\n      std::bitset<8> b{c};\n      os << b << \":\";\n    }\n  } else {\n    if (state_ == NIL) {\n      os << \"nil\";\n    } else {\n      os << \"error\";\n    }\n  }\n  return os;\n}\n\nstd::ostream& Bytes::PrintHex(std::ostream& os) const {\n  if (state_ == GOOD) {\n    for (auto c : data_) {\n      os << std::hex << std::setfill('0') << std::setw(2) << (std::uint16_t)c << \":\";\n    }\n  } else {\n    if (state_ == NIL) {\n      os << \"nil\";\n    } else {\n      os << \"error\";\n    }\n  }\n  return os;\n}\n\ninline bool operator==(const Bytes& left, const Bytes& right) {\n  return static_cast<const std::string_view&>(left) == static_cast<const std::string_view&>(right);\n}\n\ninline bool operator!=(const Bytes& left, const Bytes& right) {\n  return !(left == right);\n}\n\ninline Bytes operator\"\" _b(unsigned long long x) {\n  return Bytes::From(x);\n}\n\ninline Bytes operator\"\" _b(const char* x, std::size_t s) {\n  return Bytes{x, s};\n}\n\ninline Bytes operator\"\" _b(const char* x) {\n  return Bytes{x, std::strlen(x)};\n}\n\ninline std::ostream& operator<<(std::ostream& os, const Bytes& bs) {\n  return bs.PrintHex(os);\n}\n\nclass BitOpsFamilyTest : public BaseFamilyTest {\n protected:\n  // only for bitop XOR, OR, AND tests\n  void BitOpSetKeys();\n};\n\n// for the bitop tests we need to test with multiple keys as the issue\n// is that we need to make sure that accessing multiple shards creates\n// the correct result\n// Since this is bit operations, we are using the bytes data type\n// that makes the verification more ergonomics.\nconst std::pair<std::string_view, Bytes> KEY_VALUES_BIT_OP[] = {\n    {\"first_key\", 0xFFAACC01_b},\n    {\"key_second\", {0x1, 0xBB}},\n    {\"_this_is_the_third_key\", {0x01, 0x05, 0x15, 0x20, 0xAA, 0xCC}},\n    {\"the_last_key_we_have\", 0xAACC_b}};\n\n// For the bitop XOR OR and AND we are setting these keys/value pairs\nvoid BitOpsFamilyTest::BitOpSetKeys() {\n  auto resp = Run({\"set\", KEY_VALUES_BIT_OP[0].first, KEY_VALUES_BIT_OP[0].second});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"set\", KEY_VALUES_BIT_OP[1].first, KEY_VALUES_BIT_OP[1].second});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"set\", KEY_VALUES_BIT_OP[2].first, KEY_VALUES_BIT_OP[2].second});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"set\", KEY_VALUES_BIT_OP[3].first, KEY_VALUES_BIT_OP[3].second});\n  EXPECT_EQ(resp, \"OK\");\n}\n\nconst long EXPECTED_VALUE_SETBIT[] = {0, 1, 1, 0, 0, 0,\n                                      0, 1, 0, 1, 1, 0};  // taken from running this on redis\nconst int32_t ITERATIONS = sizeof(EXPECTED_VALUE_SETBIT) / sizeof(EXPECTED_VALUE_SETBIT[0]);\n\nTEST_F(BitOpsFamilyTest, GetBit) {\n  auto resp = Run({\"set\", \"foo\", \"abc\"});\n\n  EXPECT_EQ(resp, \"OK\");\n\n  for (int32_t i = 0; i < ITERATIONS; i++) {\n    EXPECT_EQ(EXPECTED_VALUE_SETBIT[i], CheckedInt({\"getbit\", \"foo\", std::to_string(i)}));\n  }\n\n  // make sure that when accessing bit that is not in the range its working and we are\n  // getting 0\n  EXPECT_EQ(0, CheckedInt({\"getbit\", \"foo\", std::to_string(strlen(\"abc\") + 5)}));\n}\n\nTEST_F(BitOpsFamilyTest, SetBitExistingKey) {\n  // this test would test when we have the value in place and\n  // we are overriding and existing key\n  // so there are no allocations of keys\n  auto resp = Run({\"set\", \"foo\", \"abc\"});\n\n  EXPECT_EQ(resp, \"OK\");\n\n  // we are setting all to 1s first, we are expecting to get the old values\n  for (int32_t i = 0; i < ITERATIONS; i++) {\n    EXPECT_EQ(EXPECTED_VALUE_SETBIT[i], CheckedInt({\"setbit\", \"foo\", std::to_string(i), \"1\"}));\n  }\n\n  for (int32_t i = 0; i < ITERATIONS; i++) {\n    EXPECT_EQ(1, CheckedInt({\"getbit\", \"foo\", std::to_string(i)}));\n  }\n}\n\nTEST_F(BitOpsFamilyTest, SetBitMissingKey) {\n  // This test would run without pre-allocated existing key\n  // so we need to allocate the key as part of setting the values\n  for (int32_t i = 0; i < ITERATIONS; i++) {  // we are setting all to 1s first, we are expecting\n    // get 0s since we didn't have this key before\n    EXPECT_EQ(0, CheckedInt({\"setbit\", \"foo\", std::to_string(i), \"1\"}));\n  }\n  // now all that we set are at 1s\n  for (int32_t i = 0; i < ITERATIONS; i++) {\n    EXPECT_EQ(1, CheckedInt({\"getbit\", \"foo\", std::to_string(i)}));\n  }\n}\n\nTEST_F(BitOpsFamilyTest, SetBitIncorrectValues) {\n  EXPECT_EQ(0, CheckedInt({\"setbit\", \"foo\", \"0\", \"1\"}));\n  EXPECT_THAT(Run({\"setbit\", \"foo\", \"1\", \"-1\"}),\n              ErrArg(\"ERR value is not an integer or out of range\"));\n  EXPECT_THAT(Run({\"setbit\", \"foo\", \"2\", \"11\"}),\n              ErrArg(\"ERR value is not an integer or out of range\"));\n  EXPECT_THAT(Run({\"setbit\", \"foo\", \"3\", \"a\"}),\n              ErrArg(\"ERR value is not an integer or out of range\"));\n  EXPECT_THAT(Run({\"setbit\", \"foo\", \"4\", \"O\"}),\n              ErrArg(\"ERR value is not an integer or out of range\"));\n  EXPECT_EQ(1, CheckedInt({\"getbit\", \"foo\", \"0\"}));\n  EXPECT_EQ(0, CheckedInt({\"getbit\", \"foo\", \"1\"}));\n  EXPECT_EQ(0, CheckedInt({\"getbit\", \"foo\", \"2\"}));\n  EXPECT_EQ(0, CheckedInt({\"getbit\", \"foo\", \"3\"}));\n  EXPECT_EQ(0, CheckedInt({\"getbit\", \"foo\", \"4\"}));\n}\n\nTEST_F(BitOpsFamilyTest, SetBitExtendExistingKey) {\n  // This test verifies SETBIT correctly extends an existing key beyond its current length.\n  // It sets up a small 3-byte key (\"abc\") and then sets a bit far beyond byte index 2,\n  // ensuring the string is extended with zeros and the bit is set correctly.\n  auto resp = Run({\"set\", \"foo\", \"abc\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Verify initial string length is 3 bytes (24 bits)\n  EXPECT_EQ(3, CheckedInt({\"strlen\", \"foo\"}));\n\n  // Set bit at offset 100 (byte index 12, bit 4 within that byte)\n  // This should extend the string from 3 bytes to 13 bytes\n  // The old value should be 0 since the string didn't extend that far\n  EXPECT_EQ(0, CheckedInt({\"setbit\", \"foo\", \"100\", \"1\"}));\n\n  // Verify the string was extended to 13 bytes (100 bits / 8 = 12.5, rounded up to 13)\n  EXPECT_EQ(13, CheckedInt({\"strlen\", \"foo\"}));\n\n  // Verify the bit at offset 100 is now set to 1\n  EXPECT_EQ(1, CheckedInt({\"getbit\", \"foo\", \"100\"}));\n\n  // Verify bits in the extended region (between original end and new bit) are 0\n  EXPECT_EQ(0, CheckedInt({\"getbit\", \"foo\", \"24\"}));  // First bit after \"abc\"\n  EXPECT_EQ(0, CheckedInt({\"getbit\", \"foo\", \"50\"}));  // Middle of extended region\n  EXPECT_EQ(0, CheckedInt({\"getbit\", \"foo\", \"99\"}));  // Just before the set bit\n\n  // Verify original bits are unchanged\n  EXPECT_EQ(EXPECTED_VALUE_SETBIT[0], CheckedInt({\"getbit\", \"foo\", \"0\"}));\n  EXPECT_EQ(EXPECTED_VALUE_SETBIT[1], CheckedInt({\"getbit\", \"foo\", \"1\"}));\n  EXPECT_EQ(EXPECTED_VALUE_SETBIT[2], CheckedInt({\"getbit\", \"foo\", \"2\"}));\n\n  // Set the same bit to 0 and verify we get back 1 (the current value)\n  EXPECT_EQ(1, CheckedInt({\"setbit\", \"foo\", \"100\", \"0\"}));\n  EXPECT_EQ(0, CheckedInt({\"getbit\", \"foo\", \"100\"}));\n}\n\nconst int32_t EXPECTED_VALUES_BYTES_BIT_COUNT[] = {  // got this from redis 0 as start index\n    4, 7, 11, 14, 17, 21, 21, 21, 21};\n\nconst int32_t BYTES_EXPECTED_VALUE_LEN =\n    sizeof(EXPECTED_VALUES_BYTES_BIT_COUNT) / sizeof(EXPECTED_VALUES_BYTES_BIT_COUNT[0]);\n\nTEST_F(BitOpsFamilyTest, BitCountByte) {\n  // This would run without the bit flag - meaning it count on bytes boundaries\n  auto resp = Run({\"set\", \"foo\", \"farbar\"});\n  EXPECT_EQ(resp, \"OK\");\n  EXPECT_EQ(0, CheckedInt({\"bitcount\", \"foo2\"}));  // on none existing key we are expecting 0\n\n  for (int32_t i = 0; i < BYTES_EXPECTED_VALUE_LEN; i++) {\n    EXPECT_EQ(EXPECTED_VALUES_BYTES_BIT_COUNT[i],\n              CheckedInt({\"bitcount\", \"foo\", \"0\", std::to_string(i)}));\n  }\n  EXPECT_EQ(21, CheckedInt({\"bitcount\", \"foo\"}));  // the total number of bits in this value\n}\n\nTEST_F(BitOpsFamilyTest, BitCountByteSubRange) {\n  // This test test using some sub ranges of bit count on bytes\n  auto resp = Run({\"set\", \"foo\", \"farbar\"});\n  EXPECT_EQ(resp, \"OK\");\n  EXPECT_EQ(3, CheckedInt({\"bitcount\", \"foo\", \"1\", \"1\"}));\n  EXPECT_EQ(7, CheckedInt({\"bitcount\", \"foo\", \"1\", \"2\"}));\n  EXPECT_EQ(4, CheckedInt({\"bitcount\", \"foo\", \"2\", \"2\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitcount\", \"foo\", \"3\", \"2\"}));  // illegal range\n  EXPECT_EQ(10, CheckedInt({\"bitcount\", \"foo\", \"-3\", \"-1\"}));\n  EXPECT_EQ(13, CheckedInt({\"bitcount\", \"foo\", \"-5\", \"-2\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitcount\", \"foo\", \"-1\", \"-2\"}));  // illegal range\n  EXPECT_EQ(0, CheckedInt({\"bitcount\", \"foo\", \"1\", \"0\"}));    // illegal range\n}\n\nTEST_F(BitOpsFamilyTest, BitCountByteBitSubRange) {\n  // This test test using some sub ranges of bit count on bytes\n  auto resp = Run({\"set\", \"foo\", \"abcdef\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"bitcount\", \"foo\", \"bar\", \"BIT\"});\n  ASSERT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  EXPECT_EQ(1, CheckedInt({\"bitcount\", \"foo\", \"1\", \"1\", \"BIT\"}));\n  EXPECT_EQ(2, CheckedInt({\"bitcount\", \"foo\", \"1\", \"2\", \"BIT\"}));\n  EXPECT_EQ(1, CheckedInt({\"bitcount\", \"foo\", \"2\", \"2\", \"BIT\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitcount\", \"foo\", \"3\", \"2\", \"bit\"}));  // illegal range\n  EXPECT_EQ(2, CheckedInt({\"bitcount\", \"foo\", \"-3\", \"-1\", \"bit\"}));\n  EXPECT_EQ(2, CheckedInt({\"bitcount\", \"foo\", \"-5\", \"-2\", \"bit\"}));\n  EXPECT_EQ(4, CheckedInt({\"bitcount\", \"foo\", \"1\", \"9\", \"bit\"}));\n  EXPECT_EQ(7, CheckedInt({\"bitcount\", \"foo\", \"2\", \"19\", \"bit\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitcount\", \"foo\", \"-1\", \"-2\", \"bit\"}));  // illegal range\n}\n\n// ------------------------- BITOP tests\n\nconst auto EXPECTED_LEN_BITOP =\n    std::max(KEY_VALUES_BIT_OP[0].second.Size(), KEY_VALUES_BIT_OP[1].second.Size());\nconst auto EXPECTED_LEN_BITOP2 = std::max(EXPECTED_LEN_BITOP, KEY_VALUES_BIT_OP[2].second.Size());\nconst auto EXPECTED_LEN_BITOP3 = std::max(EXPECTED_LEN_BITOP2, KEY_VALUES_BIT_OP[3].second.Size());\n\nTEST_F(BitOpsFamilyTest, BitOpsAnd) {\n  BitOpSetKeys();\n  auto resp = Run({\"bitop\", \"foo\", \"bar\", \"abc\"});  // should failed this is illegal operation\n  ASSERT_THAT(resp, ErrArg(\"syntax error\"));\n  // run with none existing keys, should return 0\n  EXPECT_EQ(0, CheckedInt({\"bitop\", \"and\", \"dest_key\", \"1\", \"2\", \"3\"}));\n\n  // bitop AND single key\n  EXPECT_EQ(KEY_VALUES_BIT_OP[0].second.Size(),\n            CheckedInt({\"bitop\", \"and\", \"foo_out\", KEY_VALUES_BIT_OP[0].first}));\n\n  auto res = Bytes::From(Run({\"get\", \"foo_out\"}));\n  EXPECT_EQ(res, KEY_VALUES_BIT_OP[0].second);\n\n  // this will 0 all values other than one bit it would end with result with length ==\n  //     FOO_KEY_VALUE && value == BAR_KEY_VALUE\n  EXPECT_EQ(EXPECTED_LEN_BITOP, CheckedInt({\"bitop\", \"and\", \"foo-out\", KEY_VALUES_BIT_OP[0].first,\n                                            KEY_VALUES_BIT_OP[1].first}));\n  const auto EXPECTED_RESULT = Bytes((0xffaacc01 & 0x1BB));  // first and second values\n  res = Bytes::From(Run({\"get\", \"foo-out\"}));\n  EXPECT_EQ(res, EXPECTED_RESULT);\n\n  // test bitop AND with 3 keys\n  EXPECT_EQ(EXPECTED_LEN_BITOP2,\n            CheckedInt({\"bitop\", \"and\", \"foo-out\", KEY_VALUES_BIT_OP[0].first,\n                        KEY_VALUES_BIT_OP[1].first, KEY_VALUES_BIT_OP[2].first}));\n  const auto EXPECTED_RES2 = Bytes((0xffaacc01 & 0x1BB & 0x01051520AACC));\n  res = Bytes::From(Run({\"get\", \"foo-out\"}));\n  EXPECT_EQ(EXPECTED_RES2, res);\n\n  // test bitop AND with 4 parameters\n  const auto EXPECTED_RES3 = Bytes((0xffaacc01 & 0x1BB & 0x01051520AACC & 0xAACC));\n  EXPECT_EQ(EXPECTED_LEN_BITOP3, CheckedInt({\"bitop\", \"and\", \"foo-out\", KEY_VALUES_BIT_OP[0].first,\n                                             KEY_VALUES_BIT_OP[1].first, KEY_VALUES_BIT_OP[2].first,\n                                             KEY_VALUES_BIT_OP[3].first}));\n  res = Bytes::From(Run({\"get\", \"foo-out\"}));\n  EXPECT_EQ(EXPECTED_RES3, res);\n}\n\nTEST_F(BitOpsFamilyTest, BitOpsOr) {\n  BitOpSetKeys();\n\n  EXPECT_EQ(0, CheckedInt({\"bitop\", \"or\", \"dest_key\", \"1\", \"2\", \"3\"}));\n\n  // bitop or single key\n  EXPECT_EQ(KEY_VALUES_BIT_OP[0].second.Size(),\n            CheckedInt({\"bitop\", \"or\", \"foo_out\", KEY_VALUES_BIT_OP[0].first}));\n\n  auto res = Bytes::From(Run({\"get\", \"foo_out\"}));\n  EXPECT_EQ(res, KEY_VALUES_BIT_OP[0].second);\n\n  // bitop OR 2 keys\n  EXPECT_EQ(EXPECTED_LEN_BITOP, CheckedInt({\"bitop\", \"or\", \"foo-out\", KEY_VALUES_BIT_OP[0].first,\n                                            KEY_VALUES_BIT_OP[1].first}));\n  const auto EXPECTED_RESULT = Bytes((0xffaacc01 | 0x1BB));  // first or second values\n  res = Bytes::From(Run({\"get\", \"foo-out\"}));\n  EXPECT_EQ(res, EXPECTED_RESULT);\n\n  // bitop OR with 3 keys\n  EXPECT_EQ(EXPECTED_LEN_BITOP2,\n            CheckedInt({\"bitop\", \"or\", \"foo-out\", KEY_VALUES_BIT_OP[0].first,\n                        KEY_VALUES_BIT_OP[1].first, KEY_VALUES_BIT_OP[2].first}));\n  const auto EXPECTED_RES2 = Bytes((0xffaacc01 | 0x1BB | 0x01051520AACC));\n  res = Bytes::From(Run({\"get\", \"foo-out\"}));\n  EXPECT_EQ(EXPECTED_RES2, res);\n\n  // bitop OR with 4 keys\n  const auto EXPECTED_RES3 = Bytes((0xffaacc01 | 0x1BB | 0x01051520AACC | 0xAACC));\n  EXPECT_EQ(EXPECTED_LEN_BITOP3, CheckedInt({\"bitop\", \"or\", \"foo-out\", KEY_VALUES_BIT_OP[0].first,\n                                             KEY_VALUES_BIT_OP[1].first, KEY_VALUES_BIT_OP[2].first,\n                                             KEY_VALUES_BIT_OP[3].first}));\n  res = Bytes::From(Run({\"get\", \"foo-out\"}));\n  EXPECT_EQ(EXPECTED_RES3, res);\n}\n\nTEST_F(BitOpsFamilyTest, BitOpsXor) {\n  BitOpSetKeys();\n\n  EXPECT_EQ(0, CheckedInt({\"bitop\", \"or\", \"dest_key\", \"1\", \"2\", \"3\"}));\n\n  // bitop XOR on single key\n  EXPECT_EQ(KEY_VALUES_BIT_OP[0].second.Size(),\n            CheckedInt({\"bitop\", \"xor\", \"foo_out\", KEY_VALUES_BIT_OP[0].first}));\n  auto res = Bytes::From(Run({\"get\", \"foo_out\"}));\n  EXPECT_EQ(res, KEY_VALUES_BIT_OP[0].second);\n\n  // bitop on XOR with two keys\n  EXPECT_EQ(EXPECTED_LEN_BITOP, CheckedInt({\"bitop\", \"xor\", \"foo-out\", KEY_VALUES_BIT_OP[0].first,\n                                            KEY_VALUES_BIT_OP[1].first}));\n  const auto EXPECTED_RESULT = Bytes((0xffaacc01 ^ 0x1BB));  // first xor second values\n  res = Bytes::From(Run({\"get\", \"foo-out\"}));\n  EXPECT_EQ(res, EXPECTED_RESULT);\n\n  // bitop XOR with 3 keys\n  EXPECT_EQ(EXPECTED_LEN_BITOP2,\n            CheckedInt({\"bitop\", \"xor\", \"foo-out\", KEY_VALUES_BIT_OP[0].first,\n                        KEY_VALUES_BIT_OP[1].first, KEY_VALUES_BIT_OP[2].first}));\n  const auto EXPECTED_RES2 = Bytes((0xffaacc01 ^ 0x1BB ^ 0x01051520AACC));\n  res = Bytes::From(Run({\"get\", \"foo-out\"}));\n  EXPECT_EQ(EXPECTED_RES2, res);\n\n  // bitop XOR with 4 keys\n  const auto EXPECTED_RES3 = Bytes((0xffaacc01 ^ 0x1BB ^ 0x01051520AACC ^ 0xAACC));\n  EXPECT_EQ(EXPECTED_LEN_BITOP3, CheckedInt({\"bitop\", \"xor\", \"foo-out\", KEY_VALUES_BIT_OP[0].first,\n                                             KEY_VALUES_BIT_OP[1].first, KEY_VALUES_BIT_OP[2].first,\n                                             KEY_VALUES_BIT_OP[3].first}));\n  res = Bytes::From(Run({\"get\", \"foo-out\"}));\n  EXPECT_EQ(EXPECTED_RES3, res);\n}\n\nTEST_F(BitOpsFamilyTest, BitOpsNot) {\n  // should failed this is illegal number of args\n  auto resp = Run({\"bitop\", \"not\", \"bar\", \"abc\", \"efg\"});\n  ASSERT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Make sure that this works with none existing key as well\n  EXPECT_EQ(0, CheckedInt({\"bitop\", \"NOT\", \"bit-op-not-none-existing-key-results\",\n                           \"this-key-do-not-exists\"}));\n  ASSERT_THAT(Run({\"get\", \"bit-op-not-none-existing-key-results\"}), ArgType(RespExpr::Type::NIL));\n\n  EXPECT_EQ(Run({\"set\", \"foo\", \"bar\"}), \"OK\");\n  EXPECT_EQ(0, CheckedInt({\"bitop\", \"NOT\", \"foo\", \"this-key-do-not-exists\"}));\n  ASSERT_THAT(Run({\"get\", \"foo\"}), ArgType(RespExpr::Type::NIL));\n\n  // Change the type of foo. Bitops is similar to set command. It's a blind update.\n  ASSERT_THAT(Run({\"hset\", \"foo\", \"bar\", \"val\"}), IntArg(1));\n  EXPECT_EQ(0, CheckedInt({\"bitop\", \"NOT\", \"foo\", \"this-key-do-not-exists\"}));\n  ASSERT_THAT(Run({\"get\", \"foo\"}), ArgType(RespExpr::Type::NIL));\n\n  // test bitop not\n  resp = Run({\"set\", KEY_VALUES_BIT_OP[0].first, KEY_VALUES_BIT_OP[0].second});\n  EXPECT_EQ(KEY_VALUES_BIT_OP[0].second.Size(),\n            CheckedInt({\"bitop\", \"not\", \"foo_out\", KEY_VALUES_BIT_OP[0].first}));\n  auto res = Bytes::From(Run({\"get\", \"foo_out\"}));\n\n  const auto NOT_RESULTS = Bytes(~0xFFAACC01ull);\n  EXPECT_EQ(res, NOT_RESULTS);\n}\n\nTEST_F(BitOpsFamilyTest, BitOpOverwritesNonStringKeyAccounting) {\n  string long_value(128, 'a');\n  auto resp = Run({\"set\", \"src\", long_value});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"rpush\", \"dest\", \"a\", \"b\", \"c\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  Metrics before = GetMetrics();\n  ASSERT_FALSE(before.db_stats.empty());\n  const size_t list_before = before.db_stats[0].memory_usage_by_type[OBJ_LIST];\n  const size_t str_before = before.db_stats[0].memory_usage_by_type[OBJ_STRING];\n  ASSERT_GT(list_before, 0u);\n\n  resp = Run({\"bitop\", \"or\", \"dest\", \"src\"});\n  EXPECT_THAT(resp, IntArg(128));\n  EXPECT_EQ(Run({\"type\", \"dest\"}), \"string\");\n  EXPECT_EQ(Run({\"get\", \"dest\"}), long_value);\n\n  Metrics after = GetMetrics();\n  const size_t list_after = after.db_stats[0].memory_usage_by_type[OBJ_LIST];\n  const size_t str_after = after.db_stats[0].memory_usage_by_type[OBJ_STRING];\n  EXPECT_EQ(0, list_after);\n  EXPECT_GT(str_after, str_before);\n}\n\nTEST_F(BitOpsFamilyTest, BitPos) {\n  ASSERT_EQ(Run({\"set\", \"a\", \"\\x00\\x00\\x06\\xff\\xf0\"_b}), \"OK\");\n\n  // Find clear bits\n  EXPECT_EQ(0, CheckedInt({\"bitpos\", \"a\", \"0\"}));\n  EXPECT_EQ(8, CheckedInt({\"bitpos\", \"a\", \"0\", \"1\"}));\n  EXPECT_EQ(16, CheckedInt({\"bitpos\", \"a\", \"0\", \"2\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"0\", \"100\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"0\", \"100\", \"103\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"0\", \"100\", \"0\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitpos\", \"a\", \"0\", \"0\", \"100\"}));\n  EXPECT_EQ(8, CheckedInt({\"bitpos\", \"a\", \"0\", \"1\", \"100\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitpos\", \"a\", \"0\", \"0\", \"-3\"}));\n  EXPECT_EQ(8, CheckedInt({\"bitpos\", \"a\", \"0\", \"1\", \"-2\"}));\n  EXPECT_EQ(36, CheckedInt({\"bitpos\", \"a\", \"0\", \"3\"}));\n  EXPECT_EQ(36, CheckedInt({\"bitpos\", \"a\", \"0\", \"4\"}));\n  EXPECT_EQ(36, CheckedInt({\"bitpos\", \"a\", \"0\", \"-2\"}));\n  EXPECT_EQ(36, CheckedInt({\"bitpos\", \"a\", \"0\", \"-2\", \"-1\"}));\n  EXPECT_EQ(36, CheckedInt({\"bitpos\", \"a\", \"0\", \"-1\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitpos\", \"a\", \"0\", \"-100\"}));\n\n  // Find clear bits, explicitly mention \"BYTE\"\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"0\", \"100\", \"103\", \"BYTE\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"0\", \"100\", \"0\", \"BYTE\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitpos\", \"a\", \"0\", \"0\", \"100\", \"BYTE\"}));\n  EXPECT_EQ(8, CheckedInt({\"bitpos\", \"a\", \"0\", \"1\", \"100\", \"BYTE\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitpos\", \"a\", \"0\", \"0\", \"-3\", \"BYTE\"}));\n  EXPECT_EQ(8, CheckedInt({\"bitpos\", \"a\", \"0\", \"1\", \"-2\", \"BYTE\"}));\n  EXPECT_EQ(36, CheckedInt({\"bitpos\", \"a\", \"0\", \"-2\", \"-1\", \"BYTE\"}));\n\n  // Find clear bits using \"BIT\"\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"0\", \"100\", \"103\", \"BIT\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"0\", \"100\", \"0\", \"BIT\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitpos\", \"a\", \"0\", \"0\", \"100\", \"BIT\"}));\n  EXPECT_EQ(1, CheckedInt({\"bitpos\", \"a\", \"0\", \"1\", \"100\", \"BIT\"}));\n  EXPECT_EQ(2, CheckedInt({\"bitpos\", \"a\", \"0\", \"2\", \"100\", \"BIT\"}));\n  EXPECT_EQ(16, CheckedInt({\"bitpos\", \"a\", \"0\", \"16\", \"100\", \"BIT\"}));\n  EXPECT_EQ(23, CheckedInt({\"bitpos\", \"a\", \"0\", \"21\", \"100\", \"BIT\"}));\n  EXPECT_EQ(36, CheckedInt({\"bitpos\", \"a\", \"0\", \"24\", \"100\", \"BIT\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitpos\", \"a\", \"0\", \"0\", \"-3\", \"BIT\"}));\n  EXPECT_EQ(1, CheckedInt({\"bitpos\", \"a\", \"0\", \"1\", \"-2\", \"BIT\"}));\n  EXPECT_EQ(38, CheckedInt({\"bitpos\", \"a\", \"0\", \"-2\", \"-1\", \"BIT\"}));\n\n  // Find set bits\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"1\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"2\"}));\n  EXPECT_EQ(24, CheckedInt({\"bitpos\", \"a\", \"1\", \"3\"}));\n  EXPECT_EQ(32, CheckedInt({\"bitpos\", \"a\", \"1\", \"4\"}));\n  EXPECT_EQ(32, CheckedInt({\"bitpos\", \"a\", \"1\", \"-1\"}));\n  EXPECT_EQ(24, CheckedInt({\"bitpos\", \"a\", \"1\", \"-2\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"-3\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"-4\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"-5\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"-6\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"-100\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"0\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"1\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"3\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"100\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"2\", \"2\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"2\", \"3\"}));\n  EXPECT_EQ(32, CheckedInt({\"bitpos\", \"a\", \"1\", \"-1\", \"-1\"}));\n  EXPECT_EQ(24, CheckedInt({\"bitpos\", \"a\", \"1\", \"-2\", \"-1\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"1\", \"-1\", \"-2\"}));\n\n  // Find set bits, explicitly mention \"BYTE\"\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"0\", \"BYTE\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"1\", \"BYTE\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"3\", \"BYTE\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"100\", \"BYTE\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"2\", \"2\", \"BYTE\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"2\", \"3\", \"BYTE\"}));\n  EXPECT_EQ(32, CheckedInt({\"bitpos\", \"a\", \"1\", \"-1\", \"-1\", \"BYTE\"}));\n  EXPECT_EQ(24, CheckedInt({\"bitpos\", \"a\", \"1\", \"-2\", \"-1\", \"BYTE\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"1\", \"-1\", \"-2\", \"BYTE\"}));\n\n  // Find set bits using \"BIT\"\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"0\", \"BIT\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"1\", \"BIT\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"21\", \"BIT\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"21\", \"21\", \"BIT\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"21\", \"100\", \"BIT\"}));\n  EXPECT_EQ(21, CheckedInt({\"bitpos\", \"a\", \"1\", \"0\", \"100\", \"BIT\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"1\", \"-1\", \"-1\", \"BIT\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"a\", \"1\", \"-4\", \"-1\", \"BIT\"}));\n  EXPECT_EQ(35, CheckedInt({\"bitpos\", \"a\", \"1\", \"-5\", \"-1\", \"BIT\"}));\n  EXPECT_EQ(34, CheckedInt({\"bitpos\", \"a\", \"1\", \"-6\", \"-1\", \"BIT\"}));\n\n  // Make sure we behave like Redis does when looking for clear bits in an all-set string.\n  ASSERT_EQ(Run({\"set\", \"b\", \"\\xff\\xff\\xff\"_b}), \"OK\");\n  EXPECT_EQ(24, CheckedInt({\"bitpos\", \"b\", \"0\"}));\n  EXPECT_EQ(24, CheckedInt({\"bitpos\", \"b\", \"0\", \"0\"}));\n  EXPECT_EQ(24, CheckedInt({\"bitpos\", \"b\", \"0\", \"1\"}));\n  EXPECT_EQ(24, CheckedInt({\"bitpos\", \"b\", \"0\", \"2\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"b\", \"0\", \"3\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"b\", \"0\", \"0\", \"1\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"b\", \"0\", \"0\", \"1\", \"BYTE\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"b\", \"0\", \"0\", \"3\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"b\", \"0\", \"0\", \"3\", \"BYTE\"}));\n\n  ASSERT_EQ(Run({\"set\", \"empty\", \"\"_b}), \"OK\");\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"empty\", \"0\"}));\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"empty\", \"0\", \"1\"}));\n\n  // Non-existent key should be treated like padded with zeros string.\n  EXPECT_EQ(-1, CheckedInt({\"bitpos\", \"d\", \"1\"}));\n  EXPECT_EQ(0, CheckedInt({\"bitpos\", \"d\", \"0\"}));\n\n  // Make sure we accept only 0 and 1 for the bit mode arguement.\n  const auto argument_must_be_0_or_1_error = ErrArg(\"ERR The bit argument must be 1 or 0\");\n  ASSERT_THAT(Run({\"bitpos\", \"d\", \"2\"}), argument_must_be_0_or_1_error);\n  ASSERT_THAT(Run({\"bitpos\", \"d\", \"42\"}), argument_must_be_0_or_1_error);\n  ASSERT_THAT(Run({\"bitpos\", \"d\", \"-1\"}), argument_must_be_0_or_1_error);\n}\n\nTEST_F(BitOpsFamilyTest, BitFieldParsing) {\n  const auto syntax_error = ErrArg(\"ERR syntax error\");\n  // Parsing Errors\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u1\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u1\", \"0\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u1\", \"0\", \"0\", \"55\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u1\", \"0\", \"0\", \"get\", \"u1\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"u1\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"u1\", \"0\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u1\", \"0\", \"15\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u1\", \"0\", \"0\", \"set\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"nonsense\"}), syntax_error);\n\n  // Range errors\n  auto expected_error = ErrArg(\n      \"ERR invalid bitfield type. use something like i16 u8. note that u64 is not supported but \"\n      \"i64 is.\");\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u0\", \"0\", \"0\"}), expected_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u0\", \"0\", \"0\"}), expected_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u64\", \"0\", \"0\"}), expected_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u65\", \"0\", \"0\"}), expected_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i65\", \"0\", \"0\"}), expected_error);\n\n  expected_error = ErrArg(\"BITFIELD_RO only supports the GET subcommand\");\n  ASSERT_THAT(Run({\"bitfield_ro\", \"foo\", \"set\", \"u1\", \"0\", \"0\"}), expected_error);\n  ASSERT_THAT(Run({\"bitfield_ro\", \"foo\", \"incrby\", \"i64\", \"0\", \"15\"}), expected_error);\n}\n\nTEST_F(BitOpsFamilyTest, BitFieldCreate) {\n  // check that SET, INCR create the key when it does not exist\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u1\", \"0\", \"1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u1\", \"0\"}), IntArg(1));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"u1\", \"1\", \"1\"}), IntArg(1));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u1\", \"1\"}), IntArg(1));\n}\n\nTEST_F(BitOpsFamilyTest, BitFieldOverflowUnderflow) {\n  Run({\"bitfield\", \"foo\", \"set\", \"u2\", \"0\", \"2\"});\n\n  // unsigned 1bit\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u1\", \"0\", \"2\"}), IntArg(1));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u1\", \"0\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"u1\", \"1\", \"2\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u1\", \"1\"}), IntArg(0));\n\n  // unsigned 63bit\n  int64_t max = std::numeric_limits<int64_t>::max();\n  Run({\"bitfield\", \"foo\", \"set\", \"i64\", \"0\", StrCat(max)});\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"i64\", \"0\", \"1\"}), IntArg(-max - 1));\n\n  // signed 1 bit\n  Run({\"bitfield\", \"foo\", \"set\", \"i1\", \"0\", \"-2\"});\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i1\", \"0\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"i1\", \"0\", \"-1\"}), IntArg(-1));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"i1\", \"0\", \"-1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"i1\", \"0\", \"-3\"}), IntArg(-1));\n\n  int64_t min = std::numeric_limits<int64_t>::min();\n  Run({\"bitfield\", \"foo\", \"set\", \"i8\", \"0\", StrCat(min)});\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i8\", \"0\"}), IntArg(0));\n\n  // signed 64 bit\n  Run({\"bitfield\", \"foo\", \"set\", \"i64\", \"0\", StrCat(min)});\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"i64\", \"0\", \"-1\"}), IntArg(max));\n\n  // overflow sat\n  // unsigned 8 bit\n  Run({\"bitfield\", \"foo\", \"set\", \"u1\", \"0\", \"0\"});\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"sat\", \"incrby\", \"u8\", \"0\", \"300\"}), IntArg(255));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"sat\", \"incrby\", \"u8\", \"0\", \"10\"}), IntArg(255));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"0\"}), IntArg(255));\n\n  // unsigned 63 bit\n  Run({\"bitfield\", \"foo\", \"set\", \"u63\", \"0\", \"0\"});\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"sat\", \"set\", \"u63\", \"0\", StrCat(max)}),\n              IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"sat\", \"incrby\", \"u63\", \"0\", \"10\"}), IntArg(max));\n\n  // signed 8 bit\n  Run({\"bitfield\", \"foo\", \"set\", \"u8\", \"0\", \"0\"});\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"sat\", \"set\", \"i8\", \"0\", \"300\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"sat\", \"incrby\", \"i8\", \"0\", \"-127\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"sat\", \"incrby\", \"i8\", \"0\", \"-255\"}),\n              IntArg(-128));\n\n  // signed 64 bit\n  Run({\"bitfield\", \"foo\", \"set\", \"i64\", \"0\", \"0\"});\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"sat\", \"set\", \"i64\", \"0\", StrCat(max)}),\n              IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"sat\", \"incrby\", \"i64\", \"0\", \"100\"}),\n              IntArg(max));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i64\", \"0\"}), IntArg(max));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"sat\", \"set\", \"i64\", \"0\", StrCat(min)}),\n              IntArg(max));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"sat\", \"incrby\", \"i64\", \"0\", \"-100\"}),\n              IntArg(min));\n\n  // overflow fail\n  // unsigned\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"fail\", \"set\", \"u8\", \"0\", \"300\"}),\n              ArgType(RespExpr::Type::NIL));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"fail\", \"incrby\", \"u1\", \"0\", \"10\"}),\n              ArgType(RespExpr::Type::NIL));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"fail\", \"incrby\", \"u1\", \"0\", \"-10\"}),\n              ArgType(RespExpr::Type::NIL));\n\n  // signed\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"fail\", \"incrby\", \"i8\", \"0\", \"300\"}),\n              ArgType(RespExpr::Type::NIL));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"fail\", \"incrby\", \"i1\", \"0\", \"10\"}),\n              ArgType(RespExpr::Type::NIL));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"fail\", \"incrby\", \"i1\", \"0\", \"-10\"}),\n              ArgType(RespExpr::Type::NIL));\n\n  // stickiness of overflow among operations in a chain\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"overflow\", \"fail\", \"set\", \"u8\", \"0\", \"300\", \"set\", \"u1\", \"0\",\n                   \"400\"}),\n              RespArray(ElementsAre(ArgType(RespExpr::NIL), ArgType(RespExpr::NIL))));\n}\n\nTEST_F(BitOpsFamilyTest, BitFieldOperations) {\n  // alligned offset reads/writes unsigned\n  Run({\"bitfield\", \"foo\", \"set\", \"u32\", \"0\", \"0\"});\n  // Set the bit battern 01111000 00000001 00000001 00001010\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u8\", \"0\", \"120\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"0\"}), IntArg(120));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u8\", \"8\", \"1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"8\"}), IntArg(1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u8\", \"16\", \"1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"16\"}), IntArg(1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u8\", \"24\", \"10\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"24\"}), IntArg(10));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u32\", \"0\"}), IntArg(2013331722));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"u8\", \"0\", \"120\"}), IntArg(240));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"0\"}), IntArg(240));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"u16\", \"0\", \"120\"}), IntArg(61561));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u16\", \"0\"}), IntArg(61561));\n\n  // alligned offset reads/writes signed\n  Run({\"bitfield\", \"foo\", \"set\", \"u32\", \"0\", \"0\"});\n  // Set the bit battern 10001000 11111111 11111111 11110110\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i8\", \"0\", \"-120\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i8\", \"0\"}), IntArg(-120));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i8\", \"8\", \"-1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i8\", \"8\"}), IntArg(-1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i8\", \"16\", \"-1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i8\", \"16\"}), IntArg(-1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i8\", \"24\", \"-10\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i8\", \"24\"}), IntArg(-10));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i32\", \"0\"}), IntArg(-1996488714));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"i8\", \"0\", \"-8\"}), IntArg(-128));\n\n  // nonalligned offset reads/writes unsigned\n  Run({\"bitfield\", \"foo\", \"set\", \"i64\", \"0\", \"0\"});\n  // Set the bit battern 00000000 10000000 10000000 10000000 10000000\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u8\", \"1\", \"1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"1\"}), IntArg(1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u8\", \"9\", \"1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"9\"}), IntArg(1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u8\", \"17\", \"1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"17\"}), IntArg(1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"u8\", \"25\", \"1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"25\"}), IntArg(1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"0\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u1\", \"8\"}), IntArg(1));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u1\", \"16\"}), IntArg(1));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u1\", \"24\"}), IntArg(1));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u1\", \"32\"}), IntArg(1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u33\", \"0\"}), IntArg(16843009));\n\n  // nonalligned offset reads/writes signed\n  Run({\"bitfield\", \"foo\", \"set\", \"i64\", \"0\", \"0\"});\n  // Set the bit battern 1111111 11111111 0000000 000000001\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i8\", \"1\", \"-1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i8\", \"1\"}), IntArg(-1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i8\", \"9\", \"-1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i8\", \"9\"}), IntArg(-1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i8\", \"17\", \"0\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i8\", \"17\"}), IntArg(0));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i8\", \"25\", \"1\"}), IntArg(0));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i8\", \"25\"}), IntArg(1));\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i32\", \"1\"}), IntArg(-65535));\n\n  // chaining\n  Run({\n      \"bitfield\", \"foo\", \"set\", \"u1\", \"0\", \"1\", \"set\", \"u1\", \"1\", \"1\", \"set\", \"u1\",\n      \"2\",        \"1\",   \"set\", \"u1\", \"3\", \"1\", \"set\", \"u1\", \"4\", \"1\", \"set\", \"u1\",\n      \"5\",        \"1\",   \"set\", \"u1\", \"6\", \"1\", \"set\", \"u1\", \"7\", \"1\",\n  });\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u8\", \"0\"}), IntArg(255));\n\n  ASSERT_THAT(Run({\n                  \"bitfield\",\n                  \"foo\",\n                  \"set\",\n                  \"u1\",\n                  \"0\",\n                  \"0\",\n                  \"incrby\",\n                  \"u1\",\n                  \"0\",\n                  \"1\",\n                  \"get\",\n                  \"u1\",\n                  \"0\",\n              }),\n              RespArray(ElementsAre(IntArg(1), IntArg(1), IntArg(1))));\n\n  // check for positional offsets\n  Run({\"bitfield\", \"foo\", \"set\", \"u8\", \"#0\", \"1\", \"set\", \"u8\", \"#1\", \"1\", \"set\", \"u8\", \"#2\", \"1\"});\n\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u1\", \"7\"}), IntArg(1));\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"u1\", \"15\"}), IntArg(1));\n}\n\nTEST_F(BitOpsFamilyTest, BitFieldLargeOffset) {\n  Run({\"set\", \"foo\", \"bar\"});\n\n  auto resp = Run({\"bitfield\", \"foo\", \"get\", \"u32\", \"0\", \"overflow\", \"fail\", \"incrby\", \"u32\", \"0\",\n                   \"4294967295\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(1650553344), ArgType(RespExpr::NIL))));\n\n  resp = Run({\"strlen\", \"foo\"});\n  EXPECT_THAT(resp, 4);\n\n  resp = Run({\"get\", \"foo\"});\n  EXPECT_THAT(ToSV(resp.GetBuf()), Eq(std::string_view(\"bar\\0\", 4)));\n\n  resp = Run({\"bitfield\", \"foo\", \"get\", \"u32\", \"4294967295\"});\n  EXPECT_THAT(resp, 0);\n}\n\nTEST_F(BitOpsFamilyTest, BitFieldIssue5237_SetOverflowSat) {\n  Run({\"set\", \"key:bitfield_set\", \"\\xff\\xf0\\x00\"});\n  auto resp = Run({\"bitfield\", \"key:bitfield_set\", \"overflow\", \"sat\", \"set\", \"i4\", \"0\", \"8\", \"set\",\n                   \"i4\", \"4\", \"7\"});\n\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(-1), IntArg(-1))));\n}\n\nTEST_F(BitOpsFamilyTest, BitFieldIssue5237_IncrbyCorrectness) {\n  Run({\"set\", \"key:bitfield_incr\", \"\\xff\\xf0\\x00\"});\n  auto resp = Run(\n      {\"bitfield\", \"key:bitfield_incr\", \"incrby\", \"u8\", \"0\", \"85\", \"incrby\", \"u8\", \"16\", \"170\"});\n\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(84), IntArg(170))));\n}\n\nTEST_F(BitOpsFamilyTest, BitFieldIssue5237_InvalidTypeUppercase_Set) {\n  auto expected_error = ErrArg(\n      \"ERR invalid bitfield type. use something like i16 u8. note that u64 is not supported but \"\n      \"i64 is.\");\n\n  ASSERT_THAT(Run({\"bitfield\", \"key:bitfield_set:wrong:args\", \"set\", \"I8\", \"0\", \"0\"}),\n              expected_error);\n}\n\nTEST_F(BitOpsFamilyTest, BitFieldIssue5237_InvalidTypeUppercase_Get) {\n  auto expected_error = ErrArg(\n      \"ERR invalid bitfield type. use something like i16 u8. note that u64 is not supported but \"\n      \"i64 is.\");\n\n  ASSERT_THAT(Run({\"bitfield\", \"key:bitfield_get:wrong:args\", \"get\", \"I8\", \"0\"}), expected_error);\n}\n\nTEST_F(BitOpsFamilyTest, BitFieldAdditionalWrongArguments) {\n  // Additional tests to match Python test coverage\n  const auto syntax_error = ErrArg(\"ERR syntax error\");\n  auto expected_error = ErrArg(\n      \"ERR invalid bitfield type. use something like i16 u8. note that u64 is not supported but \"\n      \"i64 is.\");\n\n  // Additional invalid encoding types (from Python tests)\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i-42\", \"0\"}), expected_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i5?\", \"0\"}), expected_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i0\", \"0\"}), expected_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i-42\", \"0\", \"0\"}), expected_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i5?\", \"0\", \"0\"}), expected_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i0\", \"0\", \"0\"}), expected_error);\n\n  // Test negative offsets (should be syntax error)\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"get\", \"i16\", \"-1\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i16\", \"-1\", \"0\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"i16\", \"-1\", \"1\"}), syntax_error);\n\n  // Test invalid values for SET and INCRBY (generates syntax error during parsing)\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"set\", \"i16\", \"0\", \"foo\"}), syntax_error);\n  ASSERT_THAT(Run({\"bitfield\", \"foo\", \"incrby\", \"i16\", \"0\", \"bar\"}), syntax_error);\n}\n\nTEST_F(BitOpsFamilyTest, BitFieldNoOps) {\n  EXPECT_THAT(Run({\"BITFIELD\", \"k\", \"OVERFLOW\", \"SAT\"}), RespArray(ElementsAre()));\n  EXPECT_THAT(Run({\"BITFIELD\", \"k\"}), RespArray(ElementsAre()));\n  EXPECT_THAT(Run({\"BITFIELD_RO\", \"k\", \"OVERFLOW\", \"SAT\"}), RespArray(ElementsAre()));\n  EXPECT_THAT(Run({\"BITFIELD_RO\", \"k\"}), RespArray(ElementsAre()));\n}\n\n}  // end of namespace dfly\n"
  },
  {
    "path": "src/server/blocking_controller.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/blocking_controller.h\"\n\n#include <absl/container/inlined_vector.h>\n\n#include <boost/smart_ptr/intrusive_ptr.hpp>\n\n#include \"base/logging.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/namespaces.h\"\n#include \"server/transaction.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nstruct WatchItem {\n  Transaction* trans;\n  KeyReadyChecker key_ready_checker;\n\n  Transaction* get() const {\n    return trans;\n  }\n\n  WatchItem(Transaction* t, KeyReadyChecker krc) : trans(t), key_ready_checker(std::move(krc)) {\n  }\n};\n\nstruct BlockingController::WatchQueue {\n  deque<WatchItem> items;\n\n  // Updated  by both coordinator and shard threads but at different times.\n  enum State { SUSPENDED, ACTIVE } state = SUSPENDED;\n\n  auto Find(Transaction* tx) const {\n    return find_if(items.begin(), items.end(),\n                   [tx](const WatchItem& wi) { return wi.get() == tx; });\n  }\n};\n\n// Watch state per db.\nstruct BlockingController::DbWatchTable {\n  // Watch queues per key\n  absl::flat_hash_map<std::string, std::unique_ptr<WatchQueue>> queue_map;\n\n  // awakened keys point to blocked keys that can potentially be unblocked.\n  absl::flat_hash_set<std::string> awakened_keys;\n\n  // returns true if awake event was added.\n  // Requires that the key queue be in the required state.\n  bool AddAwakeEvent(string_view key);\n\n  // Returns true if awakened tx was removed from the queue.\n  bool UnwatchTx(string_view key, Transaction* tx);\n};\n\nbool BlockingController::DbWatchTable::UnwatchTx(string_view key, Transaction* tx) {\n  auto wq_it = queue_map.find(key);\n\n  // With multiple same keys we may have misses because the first iteration\n  // on the same key could remove the queue.\n  if (wq_it == queue_map.end())\n    return false;\n\n  WatchQueue* wq = wq_it->second.get();\n  DCHECK(!wq->items.empty());\n\n  bool res = false;\n  if (wq->state == WatchQueue::ACTIVE && wq->items.front().get() == tx) {\n    wq->items.pop_front();\n\n    // We suspend the queue and add keys to re-verification.\n    // If they are still present, this queue will be reactivated below.\n    wq->state = WatchQueue::SUSPENDED;\n\n    if (!wq->items.empty())\n      awakened_keys.insert(wq_it->first);  // send for further validation.\n    res = true;\n  } else {\n    // tx can be is_awakened == true because of some other key and this queue would be\n    // in suspended and we still need to clean it up.\n    // the suspended item does not have to be the first one in the queue.\n    // This shard has not been awakened and in case this transaction in the queue\n    // we must clean it up.\n    if (auto it = wq->Find(tx); it != wq->items.end()) {\n      wq->items.erase(it);\n    }\n  }\n\n  if (wq->items.empty()) {\n    DVLOG(1) << \"queue_map.erase\";\n    awakened_keys.erase(wq_it->first);\n    queue_map.erase(wq_it);\n  }\n  return res;\n}\n\nBlockingController::BlockingController(EngineShard* owner, Namespace* ns) : owner_(owner), ns_(ns) {\n}\n\nBlockingController::~BlockingController() {\n}\n\nbool BlockingController::DbWatchTable::AddAwakeEvent(string_view key) {\n  auto it = queue_map.find(key);\n\n  if (it == queue_map.end() || it->second->state != WatchQueue::SUSPENDED)\n    return false;  /// nobody watches this key or state does not match.\n\n  return awakened_keys.insert(it->first).second;\n}\n\n// Removes tx from its watch queues if tx appears there.\nvoid BlockingController::RemovedWatched(Keys keys, Transaction* tx) {\n  DCHECK(tx);\n  VLOG(1) << \"FinalizeBlocking [\" << owner_->shard_id() << \"]\" << tx->DebugId();\n\n  bool removed = awakened_transactions_.erase(tx);\n  DCHECK(!removed || (tx->DEBUG_GetLocalMask(owner_->shard_id()) & Transaction::AWAKED_Q));\n\n  auto dbit = watched_dbs_.find(tx->GetDbIndex());\n\n  // Can happen if it was the only transaction in the queue and it was notified and removed.\n  if (dbit == watched_dbs_.end())\n    return;\n\n  DbWatchTable& wt = *dbit->second;\n\n  // Add keys of processed transaction so we could awake the next one in the queue\n  // in case those keys still exist.\n  for (string_view key : keys) {\n    bool removed_awakened = wt.UnwatchTx(key, tx);\n    CHECK(!removed_awakened || removed)\n        << tx->DebugId() << \" \" << key << \" \" << tx->DEBUG_GetLocalMask(owner_->shard_id());\n  }\n\n  if (wt.queue_map.empty()) {\n    watched_dbs_.erase(dbit);\n  }\n  awakened_indices_.emplace(tx->GetDbIndex());\n}\n\n// Runs on the shard thread.\nvoid BlockingController::NotifyPending() {\n  const Transaction* tx = owner_->GetContTx();\n  CHECK(tx == nullptr) << tx->DebugId();\n\n  DbContext context;\n  context.ns = ns_;\n  context.time_now_ms = GetCurrentTimeMs();\n\n  for (DbIndex index : awakened_indices_) {\n    auto dbit = watched_dbs_.find(index);\n    if (dbit == watched_dbs_.end())\n      continue;\n\n    context.db_index = index;\n    DbWatchTable& wt = *dbit->second;  // pointer stability due to node_hash_map\n    for (string_view key : wt.awakened_keys) {\n      DVLOG(1) << \"Processing awakened key \" << key;\n      auto w_it = wt.queue_map.find(key);\n      CHECK(w_it != wt.queue_map.end());\n\n      WatchQueue* wq = w_it->second.get();\n      NotifyWatchQueue(key, wq, context);\n      if (wq->items.empty())\n        wt.queue_map.erase(w_it);\n    }\n    wt.awakened_keys.clear();\n\n    if (wt.queue_map.empty()) {\n      watched_dbs_.erase(dbit);\n    }\n  }\n  awakened_indices_.clear();\n}\n\nvoid BlockingController::AddWatched(Keys watch_keys, KeyReadyChecker krc, Transaction* trans) {\n  auto [dbit, added] = watched_dbs_.emplace(trans->GetDbIndex(), nullptr);\n  if (added) {\n    dbit->second = make_unique<DbWatchTable>();\n  }\n\n  DbWatchTable& wt = *dbit->second;\n\n  for (auto key : watch_keys) {\n    auto [res, inserted] = wt.queue_map.emplace(key, nullptr);\n    if (inserted)\n      res->second = make_unique<WatchQueue>();\n\n    if (!res->second->items.empty()) {\n      Transaction* last = res->second->items.back().get();\n      DCHECK_GT(last->GetUseCount(), 0u);\n\n      // Duplicate keys case. We push only once per key.\n      if (last == trans)\n        continue;\n    }\n    DVLOG(2) << \"Emplace \" << trans->DebugId() << \" to watch \" << key;\n    res->second->items.emplace_back(trans, krc);\n  }\n}\n\n// Called from commands like lpush.\nvoid BlockingController::Awaken(DbIndex db_index, string_view db_key) {\n  auto it = watched_dbs_.find(db_index);\n  if (it == watched_dbs_.end())\n    return;\n\n  DbWatchTable& wt = *it->second;\n  DCHECK(!wt.queue_map.empty());\n\n  if (wt.AddAwakeEvent(db_key)) {\n    VLOG(1) << \"Touch: db(\" << db_index << \") \" << db_key;\n    awakened_indices_.insert(db_index);\n  }\n}\n\n// Marks the queue as active and notifies the first transaction in the queue.\nvoid BlockingController::NotifyWatchQueue(std::string_view key, WatchQueue* wq,\n                                          const DbContext& context) {\n  DCHECK_EQ(wq->state, WatchQueue::SUSPENDED);\n\n  auto& queue = wq->items;\n  ShardId sid = owner_->shard_id();\n\n  // In the most cases we shouldn't have skipped elements at all\n  absl::InlinedVector<dfly::WatchItem, 4> skipped;\n  while (!queue.empty()) {\n    auto& wi = queue.front();\n    Transaction* head = wi.get();\n    // We check may the transaction be notified otherwise move it to the end of the queue\n    if (wi.key_ready_checker(owner_, context, head, key)) {\n      DVLOG(2) << \"WQ-Pop \" << head->DebugId() << \" from key \" << key << \" committed txid \"\n               << owner_->committed_txid();\n      if (head->NotifySuspended(sid, key)) {\n        wq->state = WatchQueue::ACTIVE;\n        // We deliberately keep the notified transaction in the queue to know which queue\n        // must handled when this transaction finished.\n        awakened_transactions_.insert(head);\n        break;\n      }\n    } else {\n      skipped.push_back(std::move(wi));\n    }\n\n    queue.pop_front();\n  }\n  std::move(skipped.begin(), skipped.end(), std::back_inserter(queue));\n}\n\nsize_t BlockingController::NumWatched(DbIndex db_indx) const {\n  auto it = watched_dbs_.find(db_indx);\n  if (it == watched_dbs_.end())\n    return 0;\n\n  return it->second->queue_map.size();\n}\n\nvector<string> BlockingController::GetWatchedKeys(DbIndex db_indx) const {\n  vector<string> res;\n  auto it = watched_dbs_.find(db_indx);\n\n  if (it != watched_dbs_.end()) {\n    for (const auto& k_v : it->second->queue_map) {\n      res.push_back(k_v.first);\n    }\n  }\n\n  return res;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/blocking_controller.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/btree_map.h>\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n\n#include \"base/string_view_sso.h\"\n#include \"server/tx_base.h\"\n\nnamespace dfly {\n\n// Used for tracking keys of blocking transactions and properly notifying them.\n// First, keys are marked as watched and associated with an owner transaction. A mutating\n// transaction marks them as touched, and once it concludes, the watching transactions are notified.\nclass BlockingController {\n public:\n  explicit BlockingController(EngineShard* owner, Namespace* ns);\n  ~BlockingController();\n\n  using Keys = ShardArgs;\n\n  bool HasAwakedTransaction() const {\n    return !awakened_transactions_.empty();\n  }\n\n  const auto& awakened_transactions() const {\n    return awakened_transactions_;\n  }\n\n  // Associate given keys with transaction, checked via the krc checker\n  void AddWatched(Keys watch_keys, KeyReadyChecker krc, Transaction* me);\n\n  // Remove transaction from watching these keys\n  void RemovedWatched(Keys keys, Transaction* tx);\n\n  // Mark given key as awakened. Called by commands mutating this key.\n  void Awaken(DbIndex db_index, std::string_view key);\n\n  // Notify transactions of awakened keys\n  void NotifyPending();\n\n  // Used in tests and debugging functions.\n  size_t NumWatched(DbIndex db_indx) const;\n  std::vector<std::string> GetWatchedKeys(DbIndex db_indx) const;\n\n private:\n  struct WatchQueue;\n  struct DbWatchTable;\n\n  void NotifyWatchQueue(std::string_view key, WatchQueue* wqm, const DbContext& context);\n\n  EngineShard* owner_;\n  Namespace* ns_;\n\n  // TODO: check if unique_ptr indirection is required\n  absl::flat_hash_map<DbIndex, std::unique_ptr<DbWatchTable>> watched_dbs_;  // watched keys\n  absl::flat_hash_set<DbIndex> awakened_indices_;  // watched_dbs_ with awakened keys\n\n  // Transactions that got awakened with NotifySuspended\n  // TODO: Used only for one DCHECK\n  absl::flat_hash_set<Transaction*> awakened_transactions_;\n};\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/blocking_controller_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/blocking_controller.h\"\n\n#include <gmock/gmock.h>\n\n#include \"base/logging.h\"\n#include \"facade/facade_stats.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/command_registry.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/namespaces.h\"\n#include \"server/server_state.h\"\n#include \"server/transaction.h\"\n#include \"util/fibers/pool.h\"\n\nnamespace dfly {\n\nusing namespace util;\nusing namespace std;\nusing namespace std::chrono;\nusing namespace testing;\n\nconstexpr size_t kNumThreads = 3;\n\nclass BlockingControllerTest : public Test {\n protected:\n  BlockingControllerTest() : cid_(\"blpop\", 0, -3, 1, -2, acl::NONE) {\n  }\n  void SetUp() override;\n  void TearDown() override;\n\n  static void SetUpTestSuite() {\n    ServerState::Init(kNumThreads, kNumThreads, nullptr, nullptr);\n    facade::tl_facade_stats = new facade::FacadeStats;\n  }\n\n  std::unique_ptr<ProactorPool> pp_;\n  boost::intrusive_ptr<Transaction> trans_;\n  CommandId cid_;\n  StringVec str_vec_;\n  CmdArgVec arg_vec_;\n};\n\nvoid BlockingControllerTest::SetUp() {\n  pp_.reset(fb2::Pool::Epoll(kNumThreads));\n  pp_->Run();\n  pp_->AwaitBrief([](unsigned index, ProactorBase* p) {\n    ServerState::Init(index, kNumThreads, nullptr, nullptr);\n    if (facade::tl_facade_stats == nullptr) {\n      facade::tl_facade_stats = new facade::FacadeStats;\n    }\n  });\n\n  shard_set = new EngineShardSet(pp_.get());\n  shard_set->Init(kNumThreads, nullptr);\n\n  trans_.reset(new Transaction{&cid_});\n\n  str_vec_.assign({\"x\", \"z\", \"0\"});\n  for (auto& s : str_vec_) {\n    arg_vec_.emplace_back(s);\n  }\n\n  trans_->InitByArgs(&namespaces->GetDefaultNamespace(), 0, {arg_vec_.data(), arg_vec_.size()});\n  CHECK_EQ(0u, Shard(\"x\", shard_set->size()));\n  CHECK_EQ(2u, Shard(\"z\", shard_set->size()));\n\n  const TestInfo* const test_info = UnitTest::GetInstance()->current_test_info();\n  LOG(INFO) << \"Starting \" << test_info->name();\n}\n\nvoid BlockingControllerTest::TearDown() {\n  shard_set->PreShutdown();\n  shard_set->Shutdown();\n  delete shard_set;\n\n  pp_->Stop();\n  pp_.reset();\n}\n\nTEST_F(BlockingControllerTest, Basic) {\n  trans_->ScheduleSingleHop([&](Transaction* t, EngineShard* shard) {\n    BlockingController bc(shard, &namespaces->GetDefaultNamespace());\n    auto keys = t->GetShardArgs(shard->shard_id());\n    bc.AddWatched(\n        keys, [](auto...) { return true; }, t);\n    EXPECT_EQ(1, bc.NumWatched(0));\n\n    bc.RemovedWatched(keys, t);\n    EXPECT_EQ(0, bc.NumWatched(0));\n    return OpStatus::OK;\n  });\n}\n\nTEST_F(BlockingControllerTest, Timeout) {\n  time_point tp = steady_clock::now() + chrono::milliseconds(10);\n  bool blocked;\n  bool paused;\n\n  facade::OpStatus status = trans_->WaitOnWatch(\n      tp, Transaction::kShardArgs, [](auto...) { return true; }, &blocked, &paused);\n\n  EXPECT_EQ(status, facade::OpStatus::TIMED_OUT);\n  unsigned num_watched = shard_set->Await(\n\n      0, [&] {\n        return namespaces->GetDefaultNamespace()\n            .GetBlockingController(EngineShard::tlocal()->shard_id())\n            ->NumWatched(0);\n      });\n\n  EXPECT_EQ(0, num_watched);\n  trans_.reset();\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/bloom_family.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"core/bloom.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/error.h\"\n#include \"facade/reply_builder.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/command_families.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/transaction.h\"\n\nnamespace dfly {\n\nusing namespace facade;\nusing namespace std;\n\nnamespace {\n\nconstexpr double kDefaultFpProb = 0.01;\nconstexpr double kDefaultGrowFactor = 2;\nstruct SbfParams {\n  uint32_t init_capacity;\n  double error;\n  double grow_factor = kDefaultGrowFactor;\n\n  bool ok() const {\n    return error > 0 and error < 0.5;\n  }\n};\n\nusing AddResult = absl::InlinedVector<OpResult<bool>, 4>;\nusing ExistsResult = absl::InlinedVector<bool, 4>;\n\nOpStatus OpReserve(const SbfParams& params, const OpArgs& op_args, string_view key) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, OBJ_SBF);\n  RETURN_ON_BAD_STATUS(op_res);\n\n  if (!op_res->is_new)\n    return OpStatus::KEY_EXISTS;\n\n  PrimeValue& pv = op_res->it->second;\n  pv.SetSBF(params.init_capacity, params.error, params.grow_factor);\n\n  return OpStatus::OK;\n}\n\n// Returns true, if item was added, false if it was already \"present\".\nOpResult<AddResult> OpAdd(const OpArgs& op_args, string_view key, CmdArgList items) {\n  auto& db_slice = op_args.GetDbSlice();\n\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, OBJ_SBF);\n  RETURN_ON_BAD_STATUS(op_res);\n\n  PrimeValue& pv = op_res->it->second;\n\n  if (op_res->is_new) {\n    pv.SetSBF(0, kDefaultFpProb, kDefaultGrowFactor);\n  }\n\n  SBF* sbf = pv.GetSBF();\n  AddResult result(items.size());\n  for (size_t i = 0; i < items.size(); ++i) {\n    result[i] = sbf->Add(ToSV(items[i]));\n  }\n  return result;\n}\n\nOpResult<ExistsResult> OpExists(const OpArgs& op_args, string_view key, CmdArgList items) {\n  auto& db_slice = op_args.GetDbSlice();\n  OpResult op_res = db_slice.FindReadOnly(op_args.db_cntx, key, OBJ_SBF);\n  if (!op_res)\n    return op_res.status();\n  auto it = (*op_res);\n\n  const SBF* sbf = it->second.GetSBF();\n  ExistsResult result(items.size());\n\n  for (size_t i = 0; i < items.size(); ++i) {\n    result[i] = sbf->Exists(ToSV(items[i]));\n  }\n\n  return result;\n}\n\nvoid CmdReserve(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser(args);\n  string_view key = parser.Next();\n  SbfParams params;\n\n  tie(params.error, params.init_capacity) = parser.Next<double, uint32_t>();\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (parser.TakeError())\n    return rb->SendError(kSyntaxErr);\n\n  if (!params.ok())\n    return rb->SendError(\"error rate is out of range\", kSyntaxErrType);\n\n  const auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpReserve(params, t->GetOpArgs(shard), key);\n  };\n\n  OpStatus res = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  if (res == OpStatus::KEY_EXISTS) {\n    return rb->SendError(\"item exists\");\n  }\n  return rb->SendError(res);\n}\n\nvoid CmdAdd(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  args.remove_prefix(1);\n\n  const auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpAdd(t->GetOpArgs(shard), key, args);\n  };\n\n  OpResult res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  OpStatus status = res.status();\n  if (res) {\n    if (res->front())\n      return cmd_cntx->SendLong(*res->front());\n    else\n      status = res->front().status();\n  }\n\n  return cmd_cntx->SendError(status);\n}\n\nvoid CmdExists(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  args.remove_prefix(1);\n  const auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpExists(t->GetOpArgs(shard), key, args);\n  };\n\n  OpResult res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  return cmd_cntx->SendLong(res ? res->front() : 0);\n}\n\nvoid CmdMAdd(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  args.remove_prefix(1);\n\n  const auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpAdd(t->GetOpArgs(shard), key, args);\n  };\n\n  RedisReplyBuilder* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  OpResult res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (!res) {\n    return rb->SendError(res.status());\n  }\n  const AddResult& add_res = *res;\n\n  RedisReplyBuilder::ArrayScope scope{rb, add_res.size()};\n  for (const OpResult<bool>& val : add_res) {\n    if (val) {\n      rb->SendLong(*val);\n    } else {\n      rb->SendError(val.status());\n    }\n  }\n}\n\nvoid CmdMExists(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  args.remove_prefix(1);\n\n  const auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpExists(t->GetOpArgs(shard), key, args);\n  };\n\n  OpResult res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  RedisReplyBuilder::ArrayScope scope{rb, args.size()};\n  for (size_t i = 0; i < args.size(); ++i) {\n    rb->SendLong(res ? res->at(i) : 0);\n  }\n}\n\n}  // namespace\n\nusing CI = CommandId;\n\n#define HFUNC(x) SetHandler(&Cmd##x)\n\nvoid RegisterBloomFamily(CommandRegistry* registry) {\n  registry->StartFamily();\n\n  *registry << CI{\"BF.RESERVE\", CO::JOURNALED | CO::DENYOOM | CO::FAST, -4, 1, 1, acl::BLOOM}.HFUNC(\n                   Reserve)\n            << CI{\"BF.ADD\", CO::JOURNALED | CO::DENYOOM | CO::FAST, 3, 1, 1, acl::BLOOM}.HFUNC(Add)\n            << CI{\"BF.MADD\", CO::JOURNALED | CO::DENYOOM | CO::FAST, -3, 1, 1, acl::BLOOM}.HFUNC(\n                   MAdd)\n            << CI{\"BF.EXISTS\", CO::READONLY | CO::FAST, 3, 1, 1, acl::BLOOM}.HFUNC(Exists)\n            << CI{\"BF.MEXISTS\", CO::READONLY | CO::FAST, -3, 1, 1, acl::BLOOM}.HFUNC(MExists);\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/bloom_family_test.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"facade/facade_test.h\"\n#include \"server/test_utils.h\"\n\nnamespace dfly {\n\nusing testing::ElementsAre;\n\nclass BloomFamilyTest : public BaseFamilyTest {\n protected:\n};\n\nTEST_F(BloomFamilyTest, Basic) {\n  auto resp = Run({\"bf.reserve\", \"b1\", \"0.1\", \"32\"});\n  EXPECT_EQ(resp, \"OK\");\n  EXPECT_EQ(Run({\"type\", \"b1\"}), \"MBbloom--\");\n  EXPECT_THAT(Run({\"bf.add\", \"b1\", \"a\"}), IntArg(1));\n  EXPECT_THAT(Run({\"bf.add\", \"b1\", \"b\"}), IntArg(1));\n  EXPECT_THAT(Run({\"bf.add\", \"b1\", \"b\"}), IntArg(0));\n  EXPECT_THAT(Run({\"bf.add\", \"b2\", \"b\"}), IntArg(1));\n  EXPECT_EQ(Run({\"type\", \"b2\"}), \"MBbloom--\");\n\n  EXPECT_THAT(Run({\"bf.exists\", \"b2\", \"c\"}), IntArg(0));\n  EXPECT_THAT(Run({\"bf.exists\", \"b3\", \"c\"}), IntArg(0));\n  EXPECT_THAT(Run({\"bf.exists\", \"b2\", \"b\"}), IntArg(1));\n  Run({\"set\", \"str\", \"foo\"});\n  EXPECT_THAT(Run({\"bf.exists\", \"str\", \"b\"}), IntArg(0));\n}\n\nTEST_F(BloomFamilyTest, Multiple) {\n  auto resp = Run({\"bf.mexists\", \"bf1\", \"a\", \"b\", \"c\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(0), IntArg(0), IntArg(0))));\n\n  Run({\"set\", \"str\", \"foo\"});\n  resp = Run({\"bf.mexists\", \"str\", \"a\", \"b\", \"c\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(0), IntArg(0), IntArg(0))));\n\n  resp = Run({\"bf.madd\", \"str\", \"a\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONG\"));\n\n  resp = Run({\"bf.madd\", \"bf1\", \"a\", \"b\", \"c\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(1), IntArg(1), IntArg(1))));\n  resp = Run({\"bf.madd\", \"bf1\", \"a\", \"b\", \"c\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(0), IntArg(0), IntArg(0))));\n  resp = Run({\"bf.mexists\", \"bf1\", \"a\", \"b\", \"c\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(1), IntArg(1), IntArg(1))));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/channel_store.cc",
    "content": "#include \"server/channel_store.h\"\n\n// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/container/fixed_array.h>\n\n#include \"base/logging.h\"\n#include \"core/glob_matcher.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"server/cluster/slot_set.h\"\n#include \"server/cluster_support.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/server_state.h\"\n\nnamespace dfly {\nusing namespace std;\n\nnamespace {\n\n// Build functor for sending messages to connection\nauto BuildSender(string_view channel, facade::ArgRange messages, bool sharded = false,\n                 bool unsubscribe = false) {\n  absl::FixedArray<string_view, 1> views(messages.Size());\n  size_t messages_size = accumulate(messages.begin(), messages.end(), 0,\n                                    [](int sum, string_view str) { return sum + str.size(); });\n  auto buf = shared_ptr<char[]>{new char[channel.size() + messages_size]};\n  {\n    memcpy(buf.get(), channel.data(), channel.size());\n    char* ptr = buf.get() + channel.size();\n\n    size_t i = 0;\n    for (string_view message : messages) {\n      memcpy(ptr, message.data(), message.size());\n      views[i++] = {ptr, message.size()};\n      ptr += message.size();\n    }\n  }\n\n  return [channel, buf = std::move(buf), views = std::move(views), sharded, unsubscribe](\n             facade::Connection* conn, string pattern) {\n    string_view channel_view{buf.get(), channel.size()};\n    for (std::string_view message_view : views) {\n      conn->SendPubMessageAsync(\n          {std::move(pattern), buf, channel_view, message_view, sharded, unsubscribe});\n    }\n  };\n}\n\n}  // namespace\n\nbool ChannelStore::Subscriber::ByThread(const Subscriber& lhs, const Subscriber& rhs) {\n  return ByThreadId(lhs, rhs.LastKnownThreadId());\n}\n\nbool ChannelStore::Subscriber::ByThreadId(const Subscriber& lhs, const unsigned thread) {\n  return lhs.LastKnownThreadId() < thread;\n}\n\nChannelStore::UpdatablePointer::UpdatablePointer(const UpdatablePointer& other) {\n  ptr.store(other.ptr.load(memory_order_relaxed), memory_order_relaxed);\n}\n\nChannelStore::SubscribeMap* ChannelStore::UpdatablePointer::Get() const {\n  return ptr.load(memory_order_acquire);  // sync pointed memory\n}\n\nvoid ChannelStore::UpdatablePointer::Set(ChannelStore::SubscribeMap* sm) {\n  ptr.store(sm, memory_order_release);  // sync pointed memory\n}\n\nChannelStore::SubscribeMap* ChannelStore::UpdatablePointer::operator->() const {\n  return Get();\n}\n\nconst ChannelStore::SubscribeMap& ChannelStore::UpdatablePointer::operator*() const {\n  return *Get();\n}\n\nvoid ChannelStore::ChannelMap::Add(string_view key, ConnectionContext* me, uint32_t thread_id) {\n  auto it = find(key);\n  if (it == end())\n    it = emplace(key, new SubscribeMap{}).first;\n  it->second->emplace(me, thread_id);\n}\n\nvoid ChannelStore::ChannelMap::Remove(string_view key, ConnectionContext* me) {\n  if (auto it = find(key); it != end()) {\n    it->second->erase(me);\n    if (it->second->empty())\n      erase(it);\n  }\n}\n\nvoid ChannelStore::ChannelMap::DeleteAll() {\n  for (auto [k, ptr] : *this)\n    delete ptr.Get();\n}\n\nChannelStore::ChannelStore() : channels_{new ChannelMap{}}, patterns_{new ChannelMap{}} {\n  control_block.most_recent = this;\n}\n\nChannelStore::ChannelStore(ChannelMap* channels, ChannelMap* patterns)\n    : channels_{channels}, patterns_{patterns} {\n}\n\nvoid ChannelStore::Destroy() {\n  control_block.update_mu.lock();\n  control_block.update_mu.unlock();\n\n  auto* store = control_block.most_recent.load(memory_order_relaxed);\n  for (auto* chan_map : {store->channels_, store->patterns_}) {\n    chan_map->DeleteAll();\n    delete chan_map;\n  }\n  delete control_block.most_recent;\n}\n\nChannelStore::ControlBlock ChannelStore::control_block;\n\nunsigned ChannelStore::SendMessages(std::string_view channel, facade::ArgRange messages,\n                                    bool sharded) const {\n  vector<Subscriber> subscribers = FetchSubscribers(channel);\n  if (subscribers.empty())\n    return 0;\n\n  // Make sure none of the threads publish buffer limits is reached. We don't reserve memory ahead\n  // and don't prevent the buffer from possibly filling, but the approach is good enough for\n  // limiting fast producers. Most importantly, we can use DispatchBrief below as we block here\n  int32_t last_thread = -1;\n\n  for (auto& sub : subscribers) {\n    int sub_thread = sub.LastKnownThreadId();\n    DCHECK_LE(last_thread, sub_thread);\n    if (last_thread == sub_thread)  // skip same thread\n      continue;\n\n    if (sub.IsExpired())\n      continue;\n\n    // Make sure the connection thread has enough memory budget to accept the message.\n    // This is a heuristic and not entirely hermetic since the connection memory might\n    // get filled again.\n    facade::Connection::EnsureMemoryBudget(sub_thread);\n    last_thread = sub_thread;\n  }\n\n  auto subscribers_ptr = make_shared<decltype(subscribers)>(std::move(subscribers));\n  auto cb = [subscribers_ptr, send = BuildSender(channel, messages, sharded)](unsigned idx, auto*) {\n    auto it = lower_bound(subscribers_ptr->begin(), subscribers_ptr->end(), idx,\n                          ChannelStore::Subscriber::ByThreadId);\n    while (it != subscribers_ptr->end() && it->LastKnownThreadId() == idx) {\n      if (auto* ptr = it->Get(); ptr && ptr->cntx() != nullptr)\n        send(ptr, it->pattern);\n      it++;\n    }\n  };\n  shard_set->pool()->DispatchBrief(std::move(cb));\n\n  return subscribers_ptr->size();\n}\n\nvector<ChannelStore::Subscriber> ChannelStore::FetchSubscribers(string_view channel) const {\n  vector<Subscriber> res;\n\n  if (auto it = channels_->find(channel); it != channels_->end())\n    Fill(*it->second, string{}, &res);\n\n  for (const auto& [pat, subs] : *patterns_) {\n    GlobMatcher matcher{pat, true};\n    if (matcher.Matches(channel))\n      Fill(*subs, pat, &res);\n  }\n\n  sort(res.begin(), res.end(), Subscriber::ByThread);\n  return res;\n}\n\nvoid ChannelStore::Fill(const SubscribeMap& src, const string& pattern, vector<Subscriber>* out) {\n  out->reserve(out->size() + src.size());\n  for (const auto [cntx, thread_id] : src) {\n    // `cntx` is expected to be valid as it unregisters itself from the channel_store before\n    // closing.\n    CHECK(cntx->conn_state.subscribe_info);\n    Subscriber sub{cntx->conn()->Borrow(), pattern};\n    out->push_back(std::move(sub));\n  }\n}\n\nstd::vector<string> ChannelStore::ListChannels(const string_view pattern) const {\n  vector<string> res;\n  GlobMatcher matcher{pattern, true};\n  for (const auto& [channel, _] : *channels_) {\n    if (pattern.empty() || matcher.Matches(channel))\n      res.push_back(channel);\n  }\n  return res;\n}\n\nsize_t ChannelStore::PatternCount() const {\n  return patterns_->size();\n}\n\nvoid ChannelStore::UnsubscribeAfterClusterSlotMigration(const cluster::SlotSet& deleted_slots) {\n  if (deleted_slots.Empty()) {\n    return;\n  }\n\n  const uint32_t tid = util::ProactorBase::me()->GetPoolIndex();\n  ChannelStoreUpdater csu(false, false, nullptr, tid);\n\n  for (const auto& [channel, _] : *channels_) {\n    auto channel_slot = KeySlot(channel);\n    if (deleted_slots.Contains(channel_slot)) {\n      csu.Record(channel);\n    }\n  }\n\n  csu.ApplyAndUnsubscribe();\n}\n\n// TODO: Reuse common code with Send function\n// TODO: Find proper solution to hacky `force_unsubscribe` flag or at least move logic out of io\nvoid ChannelStore::UnsubscribeConnectionsFromDeletedSlots(const ChannelsSubMap& sub_map,\n                                                          uint32_t idx) {\n  for (const auto& [channel, subscribers] : sub_map) {\n    // ignored by pub sub handler because should_unsubscribe is true\n    std::string msg = \"__ignore__\";\n    auto send = BuildSender(channel, {facade::ArgSlice{msg}}, false, true);\n\n    auto it = lower_bound(subscribers.begin(), subscribers.end(), idx,\n                          ChannelStore::Subscriber::ByThreadId);\n    while (it != subscribers.end() && it->LastKnownThreadId() == idx) {\n      // if ptr->cntx() is null, a connection might have closed or be in the process of closing\n      if (auto* ptr = it->Get(); ptr && ptr->cntx() != nullptr) {\n        DCHECK(it->pattern.empty());\n        send(ptr, it->pattern);\n      }\n      ++it;\n    }\n  }\n}\n\nChannelStoreUpdater::ChannelStoreUpdater(bool pattern, bool to_add, ConnectionContext* cntx,\n                                         uint32_t thread_id)\n    : pattern_{pattern}, to_add_{to_add}, cntx_{cntx}, thread_id_{thread_id} {\n}\n\nvoid ChannelStoreUpdater::Record(string_view key) {\n  ops_.emplace_back(key);\n}\n\npair<ChannelStore::ChannelMap*, bool> ChannelStoreUpdater::GetTargetMap(ChannelStore* store) {\n  auto* target = pattern_ ? store->patterns_ : store->channels_;\n\n  for (auto key : ops_) {\n    auto it = target->find(key);\n    DCHECK(it != target->end() || to_add_);\n    // We need to make a copy, if we are going to add or delete new map slot.\n    if ((to_add_ && it == target->end()) || (!to_add_ && it->second->size() == 1))\n      return {new ChannelStore::ChannelMap{*target}, true};\n  }\n\n  return {target, false};\n}\n\nvoid ChannelStoreUpdater::Modify(ChannelMap* target, string_view key) {\n  using SubscribeMap = ChannelStore::SubscribeMap;\n\n  auto it = target->find(key);\n\n  // New key, add new slot.\n  if (to_add_ && it == target->end()) {\n    target->emplace(key, new SubscribeMap{{cntx_, thread_id_}});\n    return;\n  }\n\n  // Last entry for key, remove slot.\n  if (!to_add_ && it->second->size() == 1) {\n    DCHECK(it->second->begin()->first == cntx_);\n    freelist_.push_back(it->second.Get());\n    target->erase(it);\n    return;\n  }\n\n  // RCU update existing SubscribeMap entry.\n  DCHECK(!it->second->empty());\n  auto* replacement = new SubscribeMap{*it->second};\n  if (to_add_)\n    replacement->emplace(cntx_, thread_id_);\n  else\n    replacement->erase(cntx_);\n\n  // The pointer can still be in use, so delay freeing it\n  // until the dispatch and update the slot atomically.\n  freelist_.push_back(it->second.Get());\n  it->second.Set(replacement);\n}\n\nvoid ChannelStoreUpdater::Apply() {\n  // Wait for other updates to finish, lock the control block and update store pointer.\n  auto& cb = ChannelStore::control_block;\n  cb.update_mu.lock();\n  auto* store = cb.most_recent.load(memory_order_relaxed);\n\n  // Get target map (copied if needed) and apply operations.\n  auto [target, copied] = GetTargetMap(store);\n  for (auto key : ops_)\n    Modify(target, key);\n\n  // Prepare replacement.\n  auto* replacement = store;\n  if (copied) {\n    auto* new_chans = pattern_ ? store->channels_ : target;\n    auto* new_patterns = pattern_ ? target : store->patterns_;\n    replacement = new ChannelStore{new_chans, new_patterns};\n  }\n\n  // Update control block and unlock it.\n  cb.most_recent.store(replacement, memory_order_relaxed);\n  cb.update_mu.unlock();\n\n  // Update thread local references. Readers fetch subscribers via FetchSubscribers,\n  // which runs without preemption, and store references to them in self container Subscriber\n  // structs. This means that any point on the other thread is safe to update the channel store.\n  // Regardless of whether we need to replace, we dispatch to make sure all\n  // queued SubscribeMaps in the freelist are no longer in use.\n  shard_set->pool()->AwaitBrief([](unsigned idx, util::ProactorBase*) {\n    ServerState::tlocal()->UpdateChannelStore(\n        // Do not use memory_order_relaxed, we need to fetch the latest value of\n        // the control block\n        ChannelStore::control_block.most_recent.load(std::memory_order_seq_cst));\n  });\n\n  // Delete previous map and channel store.\n  if (copied) {\n    delete (pattern_ ? store->patterns_ : store->channels_);\n    delete store;\n  }\n\n  for (auto ptr : freelist_)\n    delete ptr;\n}\n\nvoid ChannelStoreUpdater::ApplyAndUnsubscribe() {\n  DCHECK(to_add_ == false);\n  DCHECK(pattern_ == false);\n  DCHECK(cntx_ == nullptr);\n\n  if (ops_.empty()) {\n    return;\n  }\n\n  // Wait for other updates to finish, lock the control block and update store pointer.\n  auto& cb = ChannelStore::control_block;\n  cb.update_mu.lock();\n  auto* store = cb.most_recent.load(memory_order_relaxed);\n\n  // Deep copy, we will remove channels\n  auto* target = new ChannelStore::ChannelMap{*store->channels_};\n\n  for (auto key : ops_) {\n    auto it = target->find(key);\n    freelist_.push_back(it->second.Get());\n    target->erase(it);\n    continue;\n  }\n\n  // Prepare replacement.\n  auto* replacement = new ChannelStore{target, store->patterns_};\n\n  // Update control block and unlock it.\n  cb.most_recent.store(replacement, memory_order_relaxed);\n  cb.update_mu.unlock();\n\n  // FetchSubscribers is not thead safe so we need to fetch here before we do the hop below.\n  // Bonus points because now we compute subscribers only once.\n  absl::flat_hash_map<std::string_view, std::vector<ChannelStore::Subscriber>> subs;\n  for (auto channel : ops_) {\n    auto channel_subs = ServerState::tlocal()->channel_store()->FetchSubscribers(channel);\n    DCHECK(!subs.contains(channel));\n    subs[channel] = std::move(channel_subs);\n  }\n  // Update thread local references. Readers fetch subscribers via FetchSubscribers,\n  // which runs without preemption, and store references to them in self container Subscriber\n  // structs. This means that any point on the other thread is safe to update the channel store.\n  // Regardless of whether we need to replace, we dispatch to make sure all\n  // queued SubscribeMaps in the freelist are no longer in use.\n  shard_set->pool()->AwaitFiberOnAll([&subs](unsigned idx, util::ProactorBase*) {\n    ServerState::tlocal()->UnsubscribeSlotsAndUpdateChannelStore(\n        subs, ChannelStore::control_block.most_recent.load(memory_order_relaxed));\n  });\n\n  // Delete previous map and channel store.\n  delete store->channels_;\n  delete store;\n\n  for (auto ptr : freelist_)\n    delete ptr;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/channel_store.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <string_view>\n\n#include \"facade/connection_ref.h\"\n#include \"facade/facade_types.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly {\n\nclass ConnectionContext;\nclass ChannelStoreUpdater;\n\nnamespace cluster {\nclass SlotSet;\n}\n\n// ChannelStore manages PUB/SUB subscriptions.\n//\n// Updates are carried out via RCU (read-copy-update). Each thread stores a pointer to ChannelStore\n// in its local ServerState and uses it for reads. Whenever an update needs to be performed,\n// a new ChannelStore is constructed with the requested modifications and broadcasted to all\n// threads.\n//\n// ServerState ChannelStore* -> ChannelMap* -> atomic<SubscribeMap*> (cntx -> thread)\n//\n// Specifically, whenever a new channel is registered or a channel is removed fully,\n// a new ChannelMap for the specified type (channel/pattern) needs to be constructed. However, if\n// only a single SubscribeMap is modified (no map ChannelMap slots are added or removed),\n// we can update only it with a simpler version of RCU, as SubscribeMap is stored as an atomic\n// pointer inside ChannelMap.\n//\n// To prevent parallel (and thus overlapping) updates, a centralized ControlBlock is used.\n// Update operations are carried out by the ChannelStoreUpdater.\n//\n// A centralized ChannelStore, contrary to sharded storage, avoids contention on a single shard\n// thread for heavy throughput on a single channel and thus seamlessly scales on multiple threads\n// even with a small number of channels. In general, it has a slightly lower latency, due to the\n// fact that no hop is required to fetch the subscribers.\nclass ChannelStore {\n  friend class ChannelStoreUpdater;\n\n public:\n  struct Subscriber : public facade::ConnectionRef {\n    Subscriber(ConnectionRef ref, const std::string& pattern)\n        : facade::ConnectionRef(std::move(ref)), pattern(pattern) {\n    }\n\n    // Sort by thread-id. Subscriber without owner comes first.\n    static bool ByThread(const Subscriber& lhs, const Subscriber& rhs);\n    static bool ByThreadId(const Subscriber& lhs, const unsigned thread);\n\n    std::string pattern;  // non-empty if registered via psubscribe\n  };\n\n  ChannelStore();\n\n  // Send messages to channel, block on connection backpressure\n  unsigned SendMessages(std::string_view channel, facade::ArgRange messages, bool sharded) const;\n\n  // Fetch all subscribers for channel, including matching patterns.\n  std::vector<Subscriber> FetchSubscribers(std::string_view channel) const;\n\n  std::vector<std::string> ListChannels(const std::string_view pattern) const;\n\n  size_t PatternCount() const;\n\n  void UnsubscribeAfterClusterSlotMigration(const cluster::SlotSet& deleted_slots);\n\n  using ChannelsSubMap =\n      absl::flat_hash_map<std::string_view, std::vector<ChannelStore::Subscriber>>;\n  void UnsubscribeConnectionsFromDeletedSlots(const ChannelsSubMap& sub_map, uint32_t idx);\n\n  // Destroy current instance and delete it.\n  static void Destroy();\n\n private:\n  using ThreadId = unsigned;\n\n  // Subscribers for a single channel/pattern.\n  using SubscribeMap = absl::flat_hash_map<ConnectionContext*, ThreadId>;\n\n  // Wrapper around atomic pointer that allows copying and moving.\n  // Made to overcome restrictions of absl::flat_hash_map.\n  // Copy/Move don't need to be atomic with RCU.\n  struct UpdatablePointer {\n    UpdatablePointer(SubscribeMap* sm) : ptr{sm} {\n    }\n\n    UpdatablePointer(const UpdatablePointer& other);\n\n    SubscribeMap* Get() const;\n    void Set(SubscribeMap* sm);\n\n    SubscribeMap* operator->() const;\n    const SubscribeMap& operator*() const;\n\n   private:\n    std::atomic<SubscribeMap*> ptr;\n  };\n\n  // SubscriberMaps for channels/patterns.\n  struct ChannelMap : absl::flat_hash_map<std::string, UpdatablePointer> {\n    void Add(std::string_view key, ConnectionContext* me, uint32_t thread_id);\n    void Remove(std::string_view key, ConnectionContext* me);\n\n    // Delete all stored SubscribeMap pointers.\n    void DeleteAll();\n  };\n\n  // Centralized controller to prevent overlaping updates.\n  struct ControlBlock {\n    std::atomic<ChannelStore*> most_recent;\n    util::fb2::Mutex update_mu;  // locked during updates.\n  };\n\n private:\n  static ControlBlock control_block;\n\n  ChannelStore(ChannelMap* channels, ChannelMap* patterns);\n\n  static void Fill(const SubscribeMap& src, const std::string& pattern,\n                   std::vector<Subscriber>* out);\n\n  ChannelMap* channels_;\n  ChannelMap* patterns_;\n};\n\n// Performs RCU (read-copy-update) updates to the channel store.\n// See ChannelStore header top for design details.\n// Queues operations and performs them with Apply().\nclass ChannelStoreUpdater {\n public:\n  ChannelStoreUpdater(bool pattern, bool to_add, ConnectionContext* cntx, uint32_t thread_id);\n\n  void Record(std::string_view key);\n  void Apply();\n\n  // Used for cluster when slots migrate. We need to:\n  // 1. Remove the channel from the copy.\n  // 2. Unsuscribe all the connections from each channel.\n  // 3. Update the control block pointer.\n  void ApplyAndUnsubscribe();\n\n private:\n  using ChannelMap = ChannelStore::ChannelMap;\n\n  // Get target map and flag whether it was copied.\n  // Must be called with locked control block.\n  std::pair<ChannelMap*, bool> GetTargetMap(ChannelStore* store);\n\n  // Apply modify operation to target map.\n  void Modify(ChannelMap* target, std::string_view key);\n\n private:\n  bool pattern_;\n  bool to_add_;\n  ConnectionContext* cntx_;\n  uint32_t thread_id_;\n\n  // Pending operations.\n  std::vector<std::string_view> ops_;\n\n  // Replaced SubscribeMaps that need to be deleted safely.\n  std::vector<ChannelStore::SubscribeMap*> freelist_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/cluster/CMakeLists.txt",
    "content": "SET(DF_CLUSTER_SRCS\n    cluster/cluster_config.cc cluster/cluster_family.cc cluster/incoming_slot_migration.cc\n    cluster/outgoing_slot_migration.cc cluster/cluster_defs.cc cluster/cluster_utility.cc\n    cluster/coordinator.cc\n    PARENT_SCOPE)\n"
  },
  {
    "path": "src/server/cluster/cluster_config.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"cluster_config.h\"\n\n#include <absl/container/flat_hash_set.h>\n#include <absl/strings/match.h>\n\n#include <optional>\n#include <string_view>\n\n#include \"base/logging.h\"\n#include \"core/json/json_object.h\"\n\nusing namespace std;\n\nnamespace dfly::cluster {\n\nnamespace {\n\nthread_local shared_ptr<ClusterConfig> tl_cluster_config;\n\nbool HasValidNodeIds(const ClusterShardInfos& new_config) {\n  absl::flat_hash_set<string_view> nodes;\n\n  auto CheckAndInsertNode = [&](string_view node) {\n    auto [_, inserted] = nodes.insert(node);\n    return inserted;\n  };\n\n  for (const auto& shard : new_config) {\n    if (!CheckAndInsertNode(shard.master.id)) {\n      LOG(ERROR) << \"Master \" << shard.master.id << \" appears more than once\";\n      return false;\n    }\n    for (const auto& replica : shard.replicas) {\n      if (!CheckAndInsertNode(replica.id)) {\n        LOG(ERROR) << \"Replica \" << replica.id << \" appears more than once\";\n        return false;\n      }\n    }\n  }\n\n  return true;\n}\n\nbool IsConfigValid(const ClusterShardInfos& new_config) {\n  // Make sure that all slots are set exactly once.\n  vector<bool> slots_found(kMaxSlotNum + 1);\n\n  if (!HasValidNodeIds(new_config)) {\n    return false;\n  }\n\n  for (const auto& shard : new_config) {\n    for (const auto& slot_range : shard.slot_ranges) {\n      if (slot_range.start > slot_range.end) {\n        LOG(ERROR) << \"Invalid cluster config: start=\" << slot_range.start\n                   << \" is larger than end=\" << slot_range.end;\n        return false;\n      }\n\n      for (SlotId slot = slot_range.start; slot <= slot_range.end; ++slot) {\n        if (slot >= slots_found.size()) {\n          LOG(ERROR) << \"Invalid cluster config: slot=\" << slot\n                     << \" is bigger than allowed max=\" << slots_found.size();\n          return false;\n        }\n\n        if (slots_found[slot]) {\n          LOG(ERROR) << \"Invalid cluster config: slot=\" << slot\n                     << \" was already configured by another slot range.\";\n          return false;\n        }\n\n        slots_found[slot] = true;\n      }\n    }\n  }\n\n  if (!all_of(slots_found.begin(), slots_found.end(), [](bool b) { return b; }) > 0UL) {\n    LOG(ERROR) << \"Invalid cluster config: some slots were missing.\";\n    return false;\n  }\n\n  return true;\n}\n}  // namespace\n\n/* static */\nshared_ptr<ClusterConfig> ClusterConfig::CreateFromConfig(string_view my_id,\n                                                          const ClusterShardInfos& config) {\n  if (!IsConfigValid(config)) {\n    return nullptr;\n  }\n\n  shared_ptr<ClusterConfig> result(new ClusterConfig());\n\n  result->my_id_ = my_id;\n  result->config_ = config;\n\n  for (const auto& shard : result->config_) {\n    const bool is_master = shard.master.id == my_id;\n    const bool owned_by_me =\n        is_master || any_of(shard.replicas.begin(), shard.replicas.end(),\n                            [&](const ClusterNodeInfo& node) { return node.id == my_id; });\n    if (owned_by_me) {\n      result->my_slots_.Set(shard.slot_ranges, true);\n      if (is_master) {\n        result->is_master_ = true;\n        result->my_outgoing_migrations_ = shard.migrations;\n      }\n    } else {\n      for (const auto& m : shard.migrations) {\n        if (my_id == m.node_info.id) {\n          auto incoming_migration = m;\n          // for incoming migration we need the source node\n          incoming_migration.node_info.id = shard.master.id;\n          result->my_incoming_migrations_.push_back(std::move(incoming_migration));\n        }\n      }\n    }\n  }\n\n  return result;\n}\n\nnamespace {\nconstexpr string_view kInvalidConfigPrefix = \"Invalid JSON cluster config: \"sv;\n\ntemplate <typename T> optional<T> ReadNumeric(const TmpJson& obj) {\n  if (!obj.is_number()) {\n    LOG(ERROR) << kInvalidConfigPrefix << \"object is not a number \" << obj;\n    return nullopt;\n  }\n\n  try {\n    return obj.as<T>();\n  } catch (const std::exception& e) {\n    LOG(ERROR) << kInvalidConfigPrefix << \"number conversion error: \" << e.what();\n    return nullopt;\n  }\n}\n\noptional<SlotRanges> GetClusterSlotRanges(const TmpJson& slots) {\n  if (!slots.is_array()) {\n    LOG(ERROR) << kInvalidConfigPrefix << \"slot_ranges is not an array \" << slots;\n    return nullopt;\n  }\n\n  std::vector<SlotRange> ranges;\n\n  for (const auto& range : slots.array_range()) {\n    if (!range.is_object()) {\n      LOG(ERROR) << kInvalidConfigPrefix << \"slot_ranges element is not an object \" << range;\n      return nullopt;\n    }\n\n    optional<SlotId> start = ReadNumeric<SlotId>(range.at_or_null(\"start\"));\n    optional<SlotId> end = ReadNumeric<SlotId>(range.at_or_null(\"end\"));\n    if (!start.has_value() || !end.has_value()) {\n      return nullopt;\n    }\n\n    ranges.push_back({.start = start.value(), .end = end.value()});\n  }\n\n  return SlotRanges(ranges);\n}\n\noptional<ClusterExtendedNodeInfo> ParseClusterNode(const TmpJson& json) {\n  if (!json.is_object()) {\n    LOG(ERROR) << kInvalidConfigPrefix << \"node config is not an object \" << json;\n    return nullopt;\n  }\n\n  ClusterExtendedNodeInfo node;\n\n  {\n    auto id = json.at_or_null(\"id\");\n    if (!id.is_string()) {\n      LOG(ERROR) << kInvalidConfigPrefix << \"invalid id for node \" << json;\n      return nullopt;\n    }\n    node.id = std::move(id).as_string();\n  }\n\n  {\n    auto ip = json.at_or_null(\"ip\");\n    if (!ip.is_string()) {\n      LOG(ERROR) << kInvalidConfigPrefix << \"invalid ip for node \" << json;\n      return nullopt;\n    }\n    node.ip = std::move(ip).as_string();\n  }\n\n  {\n    auto port = ReadNumeric<uint16_t>(json.at_or_null(\"port\"));\n    if (!port.has_value()) {\n      return nullopt;\n    }\n    node.port = port.value();\n  }\n\n  {\n    auto health = json.at_or_null(\"health\");\n    if (!health.is_null()) {\n      if (!health.is_string()) {\n        LOG(ERROR) << kInvalidConfigPrefix << \"invalid health status for node \" << json;\n      } else {\n        auto health_str = std::move(health).as_string();\n        if (absl::EqualsIgnoreCase(health_str, \"FAIL\")) {\n          node.health = NodeHealth::FAIL;\n        } else if (absl::EqualsIgnoreCase(health_str, \"LOADING\")) {\n          node.health = NodeHealth::LOADING;\n        } else if (absl::EqualsIgnoreCase(health_str, \"ONLINE\")) {\n          node.health = NodeHealth::ONLINE;\n        } else if (absl::EqualsIgnoreCase(health_str, \"HIDDEN\")) {\n          node.health = NodeHealth::HIDDEN;\n        } else {\n          LOG(ERROR) << kInvalidConfigPrefix << \"invalid health status for node: \" << health_str;\n        }\n      }\n    }\n  }\n\n  return node;\n}\n\noptional<std::vector<MigrationInfo>> ParseMigrations(const TmpJson& json) {\n  std::vector<MigrationInfo> res;\n  if (json.is_null()) {\n    return res;\n  }\n\n  if (!json.is_array()) {\n    LOG(INFO) << \"no migrations found: \" << json;\n    return nullopt;\n  }\n\n  for (const auto& element : json.array_range()) {\n    auto node_id = element.at_or_null(\"node_id\");\n    auto ip = element.at_or_null(\"ip\");\n    auto port = ReadNumeric<uint16_t>(element.at_or_null(\"port\"));\n    auto slots = GetClusterSlotRanges(element.at_or_null(\"slot_ranges\"));\n\n    if (!node_id.is_string() || !ip.is_string() || !port || !slots) {\n      LOG(ERROR) << kInvalidConfigPrefix << \"invalid migration json \" << json;\n      return nullopt;\n    }\n\n    res.emplace_back(MigrationInfo{\n        .slot_ranges = std::move(*slots),\n        .node_info =\n            ClusterNodeInfo{.id = node_id.as_string(), .ip = ip.as_string(), .port = *port}});\n  }\n  return res;\n}\n\noptional<ClusterShardInfos> BuildClusterConfigFromJson(const TmpJson& json) {\n  std::vector<ClusterShardInfo> config;\n\n  if (!json.is_array()) {\n    LOG(ERROR) << kInvalidConfigPrefix << \"not an array \" << json;\n    return nullopt;\n  }\n\n  for (const auto& element : json.array_range()) {\n    ClusterShardInfo shard;\n\n    if (!element.is_object()) {\n      LOG(ERROR) << kInvalidConfigPrefix << \"shard element is not an object \" << element;\n      return nullopt;\n    }\n\n    auto slots = GetClusterSlotRanges(element.at_or_null(\"slot_ranges\"));\n    if (!slots.has_value()) {\n      return nullopt;\n    }\n    shard.slot_ranges = std::move(slots).value();\n\n    auto master = ParseClusterNode(element.at_or_null(\"master\"));\n    if (!master.has_value()) {\n      return nullopt;\n    }\n    shard.master = std::move(master).value();\n\n    auto replicas = element.at_or_null(\"replicas\");\n    if (!replicas.is_array()) {\n      LOG(ERROR) << kInvalidConfigPrefix << \"replicas is not an array \" << replicas;\n      return nullopt;\n    }\n\n    for (const auto& replica : replicas.array_range()) {\n      auto node = ParseClusterNode(replica);\n      if (!node.has_value()) {\n        return nullopt;\n      }\n      shard.replicas.push_back(std::move(node).value());\n    }\n\n    auto migrations = ParseMigrations(element.at_or_null(\"migrations\"));\n    if (!migrations) {\n      return nullopt;\n    }\n    shard.migrations = std::move(*migrations);\n\n    config.push_back(std::move(shard));\n  }\n\n  return ClusterShardInfos(config);\n}\n}  // namespace\n\n/* static */\nshared_ptr<ClusterConfig> ClusterConfig::CreateFromConfig(string_view my_id,\n                                                          std::string_view json_str) {\n  optional<TmpJson> json_config = JsonFromString(json_str);\n  if (!json_config.has_value()) {\n    LOG(ERROR) << \"Can't parse JSON for ClusterConfig \" << json_str;\n    return nullptr;\n  }\n\n  optional<ClusterShardInfos> config = BuildClusterConfigFromJson(json_config);\n  if (!config.has_value()) {\n    return nullptr;\n  }\n\n  return CreateFromConfig(my_id, config.value());\n}\n\nstd::shared_ptr<ClusterConfig> ClusterConfig::CloneWithChanges(\n    const SlotRanges& enable_slots, const SlotRanges& disable_slots) const {\n  auto new_config = std::make_shared<ClusterConfig>(*this);\n  new_config->my_slots_.Set(enable_slots, true);\n  new_config->my_slots_.Set(disable_slots, false);\n  return new_config;\n}\n\nstd::shared_ptr<ClusterConfig> ClusterConfig::CloneWithoutMigrations() const {\n  auto new_config = std::make_shared<ClusterConfig>(*this);\n  new_config->my_incoming_migrations_.clear();\n  new_config->my_outgoing_migrations_.clear();\n  return new_config;\n}\n\nbool ClusterConfig::IsMySlot(SlotId id) const {\n  if (id > kMaxSlotNum) {\n    DCHECK(false) << \"Requesting a non-existing slot id \" << id;\n    return false;\n  }\n\n  return my_slots_.Contains(id);\n}\n\nbool ClusterConfig::IsMySlot(std::string_view key) const {\n  return IsMySlot(KeySlot(key));\n}\n\nClusterNodeInfo ClusterConfig::GetMasterNodeForSlot(SlotId id) const {\n  CHECK_LE(id, kMaxSlotNum) << \"Requesting a non-existing slot id \" << id;\n  for (const auto& shard : config_) {\n    if (shard.slot_ranges.Contains(id)) {\n      if (shard.master.id == my_id_) {\n        // The only reason why this function call and shard.master == my_id_ is the slot was\n        // migrated\n        for (const auto& m : shard.migrations) {\n          if (m.slot_ranges.Contains(id)) {\n            for (const auto& shard : config_) {\n              if (shard.master.id == m.node_info.id) {\n                return shard.master;\n              }\n            }\n          }\n        }\n      }\n      return shard.master;\n    }\n  }\n\n  DCHECK(false) << \"Can't find master node for slot \" << id;\n  return {};\n}\n\nClusterShardInfos ClusterConfig::GetConfig() const {\n  return config_;\n}\n\nconst SlotSet& ClusterConfig::GetOwnedSlots() const {\n  return my_slots_;\n}\n\nstatic std::vector<MigrationInfo> GetMissingMigrations(const std::vector<MigrationInfo>& haystack,\n                                                       const std::vector<MigrationInfo>& needle) {\n  std::vector<MigrationInfo> res;\n  for (const auto& h : haystack) {\n    if (find(needle.begin(), needle.end(), h) == needle.end()) {\n      res.push_back(h);\n    }\n  }\n  return res;\n}\n\nstd::vector<MigrationInfo> ClusterConfig::GetNewOutgoingMigrations(\n    const std::shared_ptr<ClusterConfig>& prev) const {\n  return prev ? GetMissingMigrations(my_outgoing_migrations_, prev->my_outgoing_migrations_)\n              : my_outgoing_migrations_;\n}\n\nstd::vector<MigrationInfo> ClusterConfig::GetNewIncomingMigrations(\n    const std::shared_ptr<ClusterConfig>& prev) const {\n  return prev ? GetMissingMigrations(my_incoming_migrations_, prev->my_incoming_migrations_)\n              : my_incoming_migrations_;\n}\n\nstd::vector<MigrationInfo> ClusterConfig::GetFinishedOutgoingMigrations(\n    const std::shared_ptr<ClusterConfig>& prev) const {\n  return prev ? GetMissingMigrations(prev->my_outgoing_migrations_, my_outgoing_migrations_)\n              : std::vector<MigrationInfo>();\n}\n\nstd::vector<MigrationInfo> ClusterConfig::GetFinishedIncomingMigrations(\n    const std::shared_ptr<ClusterConfig>& prev) const {\n  return prev ? GetMissingMigrations(prev->my_incoming_migrations_, my_incoming_migrations_)\n              : std::vector<MigrationInfo>();\n}\n\nstd::shared_ptr<ClusterConfig> ClusterConfig::Current() {\n  return tl_cluster_config;\n}\n\nvoid ClusterConfig::SetCurrent(std::shared_ptr<ClusterConfig> config) {\n  tl_cluster_config = std::move(config);\n}\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/cluster_config.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n#include <string_view>\n#include <vector>\n\n#include \"src/server/cluster/slot_set.h\"\n\nnamespace dfly::cluster {\n\nclass ClusterConfig {\n public:\n  // Returns an instance with `config` if it is valid.\n  // Returns heap-allocated object as it is too big for a stack frame.\n  static std::shared_ptr<ClusterConfig> CreateFromConfig(std::string_view my_id,\n                                                         const ClusterShardInfos& config);\n\n  // Parses `json_config` into `ClusterShardInfos` and calls the above overload.\n  static std::shared_ptr<ClusterConfig> CreateFromConfig(std::string_view my_id,\n                                                         std::string_view json_config);\n\n  std::shared_ptr<ClusterConfig> CloneWithChanges(const SlotRanges& enable_slots,\n                                                  const SlotRanges& disable_slots) const;\n\n  std::shared_ptr<ClusterConfig> CloneWithoutMigrations() const;\n\n  // If key is in my slots ownership return true\n  bool IsMySlot(SlotId id) const;\n  bool IsMySlot(std::string_view key) const;\n\n  const std::string& MyId() const {\n    return my_id_;\n  }\n\n  bool is_master() const {\n    return is_master_;\n  }\n\n  // Returns the master configured for `id`.\n  ClusterNodeInfo GetMasterNodeForSlot(SlotId id) const;\n\n  ClusterShardInfos GetConfig() const;\n\n  // Use wisely, only after a deep copy of ClusterConfig and\n  // to edit the config in place.\n  ClusterShardInfos& GetMutableConfig() {\n    return config_;\n  }\n\n  const SlotSet& GetOwnedSlots() const;\n\n  std::vector<MigrationInfo> GetNewOutgoingMigrations(\n      const std::shared_ptr<ClusterConfig>& prev) const;\n  std::vector<MigrationInfo> GetNewIncomingMigrations(\n      const std::shared_ptr<ClusterConfig>& prev) const;\n  std::vector<MigrationInfo> GetFinishedOutgoingMigrations(\n      const std::shared_ptr<ClusterConfig>& prev) const;\n  std::vector<MigrationInfo> GetFinishedIncomingMigrations(\n      const std::shared_ptr<ClusterConfig>& prev) const;\n\n  std::vector<MigrationInfo> GetIncomingMigrations() const {\n    return my_incoming_migrations_;\n  }\n\n  // Returns a thread-local pointer.\n  static std::shared_ptr<ClusterConfig> Current();\n\n  // Set a thread-local pointer.\n  static void SetCurrent(std::shared_ptr<ClusterConfig> config);\n\n private:\n  struct SlotEntry {\n    const ClusterShardInfo* shard = nullptr;\n    bool owned_by_me = false;\n  };\n\n  ClusterConfig() = default;\n\n  bool is_master_ = false;\n  std::string my_id_;\n  ClusterShardInfos config_;\n\n  SlotSet my_slots_;\n  std::vector<MigrationInfo> my_outgoing_migrations_;\n  std::vector<MigrationInfo> my_incoming_migrations_;\n};\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/cluster_config_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/cluster/cluster_config.h\"\n\n#include <gmock/gmock-matchers.h>\n\n#include <jsoncons/json.hpp>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"server/test_utils.h\"\n\nusing namespace std;\nusing namespace testing;\nusing Node = dfly::cluster::ClusterNodeInfo;\n\nnamespace dfly::cluster {\n\nMATCHER_P(NodeMatches, expected, \"\") {\n  return arg.id == expected.id && arg.ip == expected.ip && arg.port == expected.port;\n}\n\nclass ClusterConfigTest : public BaseFamilyTest {\n protected:\n  const string kMyId = \"my-id\";\n};\n\ninline string_view GetTag(string_view key) {\n  return LockTagOptions::instance().Tag(key);\n}\n\nTEST_F(ClusterConfigTest, KeyTagTest) {\n  SetTestFlag(\"lock_on_hashtags\", \"true\");\n\n  EXPECT_EQ(GetTag(\"{user1000}.following\"), \"user1000\");\n\n  EXPECT_EQ(GetTag(\"foo{{bar}}zap\"), \"{bar\");\n\n  EXPECT_EQ(GetTag(\"foo{bar}{zap}\"), \"bar\");\n\n  string_view key = \" foo{}{bar}\";\n  EXPECT_EQ(key, GetTag(key));\n\n  key = \"{}foo{bar}{zap}\";\n  EXPECT_EQ(key, GetTag(key));\n\n  SetTestFlag(\"locktag_delimiter\", \":\");\n  TEST_InvalidateLockTagOptions();\n\n  key = \"{user1000}.following\";\n  EXPECT_EQ(GetTag(key), key);\n\n  EXPECT_EQ(GetTag(\"bull:queue1:123\"), \"queue1\");\n  EXPECT_EQ(GetTag(\"bull:queue:1:123\"), \"queue\");\n  EXPECT_EQ(GetTag(\"bull:queue:1:123:456:789:1000\"), \"queue\");\n\n  key = \"bull::queue:1:123\";\n  EXPECT_EQ(GetTag(key), key);\n\n  SetTestFlag(\"locktag_delimiter\", \":\");\n  SetTestFlag(\"locktag_skip_n_end_delimiters\", \"0\");\n  SetTestFlag(\"locktag_prefix\", \"bull\");\n  TEST_InvalidateLockTagOptions();\n  EXPECT_EQ(GetTag(\"bull:queue:123\"), \"queue\");\n  EXPECT_EQ(GetTag(\"bull:queue:123:456:789:1000\"), \"queue\");\n\n  key = \"not-bull:queue1:123\";\n  EXPECT_EQ(GetTag(key), key);\n\n  SetTestFlag(\"locktag_delimiter\", \":\");\n  SetTestFlag(\"locktag_skip_n_end_delimiters\", \"1\");\n  SetTestFlag(\"locktag_prefix\", \"bull\");\n  TEST_InvalidateLockTagOptions();\n\n  key = \"bull:queue1:123\";\n  EXPECT_EQ(GetTag(key), key);\n  EXPECT_EQ(GetTag(\"bull:queue:1:123\"), \"queue:1\");\n  EXPECT_EQ(GetTag(\"bull:queue:1:123:456:789:1000\"), \"queue:1\");\n\n  key = \"bull::queue:1:123\";\n  EXPECT_EQ(GetTag(key), key);\n\n  SetTestFlag(\"locktag_delimiter\", \"|\");\n  SetTestFlag(\"locktag_skip_n_end_delimiters\", \"2\");\n  SetTestFlag(\"locktag_prefix\", \"\");\n  TEST_InvalidateLockTagOptions();\n\n  EXPECT_EQ(GetTag(\"|a|b|c|d|e\"), \"a|b|c\");\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidEmpty) {\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, ClusterShardInfos{}), nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidMissingSlots) {\n  EXPECT_EQ(\n      ClusterConfig::CreateFromConfig(\n          kMyId,\n          {{.slot_ranges = SlotRanges({{.start = 0, .end = 16000}}),\n            .master = {{.id = \"other\", .ip = \"192.168.0.100\", .port = 7000}, NodeHealth::ONLINE},\n            .replicas = {},\n            .migrations = {}}}),\n      nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidDoubleBookedSlot) {\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(\n                kMyId, ClusterShardInfos(\n                           {{.slot_ranges = SlotRanges({{.start = 0, .end = 0x3FFF}}),\n                             .master = {{.id = \"other\", .ip = \"192.168.0.100\", .port = 7000},\n                                        NodeHealth::ONLINE},\n                             .replicas = {},\n                             .migrations = {}},\n                            {.slot_ranges = SlotRanges({{.start = 0, .end = 0}}),\n                             .master = {{.id = \"other2\", .ip = \"192.168.0.101\", .port = 7001},\n                                        NodeHealth::ONLINE},\n                             .replicas = {},\n                             .migrations = {}}})),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidSlotId) {\n  EXPECT_EQ(\n      ClusterConfig::CreateFromConfig(\n          kMyId,\n          {{.slot_ranges = SlotRanges({{.start = 0, .end = 0x3FFF + 1}}),\n            .master = {{.id = \"other\", .ip = \"192.168.0.100\", .port = 7000}, NodeHealth::ONLINE},\n            .replicas = {},\n            .migrations = {}}}),\n      nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetOk) {\n  auto config = ClusterConfig::CreateFromConfig(\n      kMyId, {{.slot_ranges = SlotRanges({{.start = 0, .end = 0x3FFF}}),\n               .master = {{.id = \"other\", .ip = \"192.168.0.100\", .port = 7000}, NodeHealth::ONLINE},\n               .replicas = {},\n               .migrations = {}}});\n  EXPECT_NE(config, nullptr);\n  EXPECT_THAT(config->GetMasterNodeForSlot(0),\n              NodeMatches(Node{.id = \"other\", .ip = \"192.168.0.100\", .port = 7000}));\n  EXPECT_TRUE(config->GetOwnedSlots().Empty());\n}\n\nTEST_F(ClusterConfigTest, ConfigSetOkWithReplica) {\n  auto config = ClusterConfig::CreateFromConfig(\n      kMyId,\n      {{.slot_ranges = SlotRanges({{.start = 0, .end = 0x3FFF}}),\n        .master = {{.id = \"other-master\", .ip = \"192.168.0.100\", .port = 7000}, NodeHealth::ONLINE},\n        .replicas = {{{.id = \"other-replica\", .ip = \"192.168.0.101\", .port = 7001},\n                      NodeHealth::ONLINE}},\n        .migrations = {}}});\n  EXPECT_NE(config, nullptr);\n  EXPECT_THAT(config->GetMasterNodeForSlot(0),\n              NodeMatches(Node{.id = \"other-master\", .ip = \"192.168.0.100\", .port = 7000}));\n}\n\nTEST_F(ClusterConfigTest, ConfigSetMultipleInstances) {\n  auto config = ClusterConfig::CreateFromConfig(\n      kMyId,\n      ClusterShardInfos(\n          {{.slot_ranges = SlotRanges({{.start = 0, .end = 5'000}}),\n            .master = {{.id = \"other-master\", .ip = \"192.168.0.100\", .port = 7000},\n                       NodeHealth::ONLINE},\n            .replicas = {{{.id = \"other-replica\", .ip = \"192.168.0.101\", .port = 7001},\n                          NodeHealth::ONLINE}},\n            .migrations = {}},\n           {.slot_ranges = SlotRanges({{.start = 5'001, .end = 10'000}}),\n            .master = {{.id = kMyId, .ip = \"192.168.0.102\", .port = 7002}, NodeHealth::ONLINE},\n            .replicas = {{{.id = \"other-replica2\", .ip = \"192.168.0.103\", .port = 7003},\n                          NodeHealth::ONLINE}},\n            .migrations = {}},\n           {.slot_ranges = SlotRanges({{.start = 10'001, .end = 0x3FFF}}),\n            .master = {{.id = \"other-master3\", .ip = \"192.168.0.104\", .port = 7004},\n                       NodeHealth::ONLINE},\n            .replicas = {{{.id = \"other-replica3\", .ip = \"192.168.0.105\", .port = 7005},\n                          NodeHealth::ONLINE}},\n            .migrations = {}}}));\n  EXPECT_NE(config, nullptr);\n  SlotSet owned_slots = config->GetOwnedSlots();\n  EXPECT_EQ(owned_slots.ToSlotRanges().Size(), 1);\n  EXPECT_EQ(owned_slots.Count(), 5'000);\n\n  {\n    for (int i = 0; i <= 5'000; ++i) {\n      EXPECT_THAT(config->GetMasterNodeForSlot(i),\n                  NodeMatches(Node{.id = \"other-master\", .ip = \"192.168.0.100\", .port = 7000}));\n      EXPECT_FALSE(config->IsMySlot(i));\n      EXPECT_FALSE(owned_slots.Contains(i));\n    }\n  }\n  {\n    for (int i = 5'001; i <= 10'000; ++i) {\n      EXPECT_THAT(config->GetMasterNodeForSlot(i),\n                  NodeMatches(Node{.id = kMyId, .ip = \"192.168.0.102\", .port = 7002}));\n      EXPECT_TRUE(config->IsMySlot(i));\n      EXPECT_TRUE(owned_slots.Contains(i));\n    }\n  }\n  {\n    for (int i = 10'001; i <= 0x3FFF; ++i) {\n      EXPECT_THAT(config->GetMasterNodeForSlot(i),\n                  NodeMatches(Node{.id = \"other-master3\", .ip = \"192.168.0.104\", .port = 7004}));\n      EXPECT_FALSE(config->IsMySlot(i));\n      EXPECT_FALSE(owned_slots.Contains(i));\n    }\n  }\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidSlotRanges) {\n  // Note that slot_ranges is not an object\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": \"0,16383\",\n                    \"master\": {\n                      \"id\": \"abcd1234\",\n                      \"ip\": \"10.0.0.1\",\n                      \"port\": 7000\n                    },\n                    \"replicas\": []\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidSlotRangeStart) {\n  // Note that slot_ranges.start is not a number\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": \"0\",\n                        \"end\": 16383\n                      }\n                    ],\n                    \"master\": {\n                      \"id\": \"abcd1234\",\n                      \"ip\": \"10.0.0.1\",\n                      \"port\": 7000\n                    },\n                    \"replicas\": []\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidSlotRangeEnd) {\n  // Note that slot_ranges.end is not a number\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": 0,\n                        \"end\": \"16383\"\n                      }\n                    ],\n                    \"master\": {\n                      \"id\": \"abcd1234\",\n                      \"ip\": \"10.0.0.1\",\n                      \"port\": 7000\n                    },\n                    \"replicas\": []\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidMissingMaster) {\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": 0,\n                        \"end\": 16383\n                      }\n                    ]\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidMasterNotObject) {\n  // Note that master is not an object\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": 0,\n                        \"end\": 16383\n                      }\n                    ],\n                    \"master\": 123,\n                    \"replicas\": []\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidMasterMissingId) {\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": 0,\n                        \"end\": 16383\n                      }\n                    ],\n                    \"master\": {\n                      \"ip\": \"10.0.0.0\",\n                      \"port\": 8000\n                    },\n                    \"replicas\": []\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidMasterMissingIp) {\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": 0,\n                        \"end\": 16383\n                      }\n                    ],\n                    \"master\": {\n                      \"id\": \"abcdefg\",\n                      \"port\": 8000\n                    },\n                    \"replicas\": []\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidMasterMissingPort) {\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": 0,\n                        \"end\": 16383\n                      }\n                    ],\n                    \"master\": {\n                      \"id\": \"abcdefg\",\n                      \"ip\": \"10.0.0.0\"\n                    },\n                    \"replicas\": []\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidMissingReplicas) {\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": 0,\n                        \"end\": 16383\n                      }\n                    ],\n                    \"master\": {\n                      \"id\": \"abcdefg\",\n                      \"ip\": \"10.0.0.0\",\n                      \"port\": 8000\n                    }\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidRepeatingMasterId) {\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": 0,\n                        \"end\": 10000\n                      }\n                    ],\n                    \"master\": {\n                      \"id\": \"abcdefg\",\n                      \"ip\": \"10.0.0.0\",\n                      \"port\": 8000\n                    },\n                    \"replicas\": []\n                  },\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": 10001,\n                        \"end\": 16383\n                      }\n                    ],\n                    \"master\": {\n                      \"id\": \"abcdefg\",\n                      \"ip\": \"10.0.0.0\",\n                      \"port\": 8000\n                    },\n                    \"replicas\": []\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidRepeatingReplicaId) {\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": 0,\n                        \"end\": 16383\n                      }\n                    ],\n                    \"master\": {\n                      \"id\": \"abcdefg\",\n                      \"ip\": \"10.0.0.0\",\n                      \"port\": 8000\n                    },\n                    \"replicas\": [\n                      {\n                        \"id\": \"xyz\",\n                        \"ip\": \"10.0.0.1\",\n                        \"port\": 8001\n                      },\n                      {\n                        \"id\": \"xyz\",\n                        \"ip\": \"10.0.0.2\",\n                        \"port\": 8002\n                      }\n                    ]\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetInvalidRepeatingMasterAndReplicaId) {\n  EXPECT_EQ(ClusterConfig::CreateFromConfig(kMyId, R\"json(\n                [\n                  {\n                    \"slot_ranges\": [\n                      {\n                        \"start\": 0,\n                        \"end\": 16383\n                      }\n                    ],\n                    \"master\": {\n                      \"id\": \"abcdefg\",\n                      \"ip\": \"10.0.0.0\",\n                      \"port\": 8000\n                    },\n                    \"replicas\": [\n                      {\n                        \"id\": \"abcdefg\",\n                        \"ip\": \"10.0.0.1\",\n                        \"port\": 8001\n                      }\n                    ]\n                  }\n                ])json\"),\n            nullptr);\n}\n\nTEST_F(ClusterConfigTest, ConfigSetMigrations) {\n  const auto* config_str = R\"json(\n  [\n    {\n      \"slot_ranges\": [ { \"start\": 0, \"end\": 8000 } ],\n      \"master\": { \"id\": \"id0\", \"ip\": \"localhost\", \"port\": 3000 },\n      \"replicas\": [],\n      \"migrations\": [{ \"slot_ranges\": [ { \"start\": 7000, \"end\": 8000 } ]\n                     , \"ip\": \"127.0.0.1\", \"port\" : 9001, \"node_id\": \"id1\" }]\n    },\n    {\n      \"slot_ranges\": [ { \"start\": 8001, \"end\": 16383 } ],\n      \"master\": { \"id\": \"id1\", \"ip\": \"localhost\", \"port\": 3001 },\n      \"replicas\": []\n    }\n  ])json\";\n\n  auto config1 = ClusterConfig::CreateFromConfig(\"id0\", config_str);\n  EXPECT_EQ(\n      config1->GetNewOutgoingMigrations(nullptr),\n      (std::vector<MigrationInfo>{{.slot_ranges = SlotRanges({{7000, 8000}}),\n                                   .node_info = {.id = \"id1\", .ip = \"127.0.0.1\", .port = 9001}}}));\n\n  EXPECT_TRUE(config1->GetFinishedOutgoingMigrations(nullptr).empty());\n  EXPECT_TRUE(config1->GetNewIncomingMigrations(nullptr).empty());\n  EXPECT_TRUE(config1->GetFinishedIncomingMigrations(nullptr).empty());\n\n  auto config2 = ClusterConfig::CreateFromConfig(\"id1\", config_str);\n  EXPECT_EQ(\n      config2->GetNewIncomingMigrations(nullptr),\n      (std::vector<MigrationInfo>{{.slot_ranges = SlotRanges({{7000, 8000}}),\n                                   .node_info = {.id = \"id0\", .ip = \"127.0.0.1\", .port = 9001}}}));\n\n  EXPECT_TRUE(config2->GetFinishedOutgoingMigrations(nullptr).empty());\n  EXPECT_TRUE(config2->GetNewOutgoingMigrations(nullptr).empty());\n  EXPECT_TRUE(config2->GetFinishedIncomingMigrations(nullptr).empty());\n\n  auto config3 = ClusterConfig::CreateFromConfig(\"id2\", config_str);\n  EXPECT_TRUE(config3->GetFinishedOutgoingMigrations(nullptr).empty());\n  EXPECT_TRUE(config3->GetNewIncomingMigrations(nullptr).empty());\n  EXPECT_TRUE(config3->GetFinishedIncomingMigrations(nullptr).empty());\n  EXPECT_TRUE(config3->GetNewOutgoingMigrations(nullptr).empty());\n\n  const auto* config_str2 = R\"json(\n  [\n    {\n      \"slot_ranges\": [ { \"start\": 0, \"end\": 6999 } ],\n      \"master\": { \"id\": \"id0\", \"ip\": \"localhost\", \"port\": 3000 },\n      \"replicas\": []\n    },\n    {\n      \"slot_ranges\": [ { \"start\": 7000, \"end\": 16383 } ],\n      \"master\": { \"id\": \"id1\", \"ip\": \"localhost\", \"port\": 3001 },\n      \"replicas\": []\n    }\n  ])json\";\n\n  auto config4 = ClusterConfig::CreateFromConfig(\"id0\", config_str2);\n  auto config5 = ClusterConfig::CreateFromConfig(\"id1\", config_str2);\n\n  EXPECT_EQ(\n      config4->GetFinishedOutgoingMigrations(config1),\n      (std::vector<MigrationInfo>{{.slot_ranges = SlotRanges({{7000, 8000}}),\n                                   .node_info = {.id = \"id1\", .ip = \"127.0.0.1\", .port = 9001}}}));\n  EXPECT_TRUE(config4->GetNewIncomingMigrations(config1).empty());\n  EXPECT_TRUE(config4->GetFinishedIncomingMigrations(config1).empty());\n  EXPECT_TRUE(config4->GetNewOutgoingMigrations(config1).empty());\n\n  EXPECT_EQ(\n      config5->GetFinishedIncomingMigrations(config2),\n      (std::vector<MigrationInfo>{{.slot_ranges = SlotRanges({{7000, 8000}}),\n                                   .node_info = {.id = \"id0\", .ip = \"127.0.0.1\", .port = 9001}}}));\n  EXPECT_TRUE(config5->GetNewIncomingMigrations(config2).empty());\n  EXPECT_TRUE(config5->GetFinishedOutgoingMigrations(config2).empty());\n  EXPECT_TRUE(config5->GetNewOutgoingMigrations(config2).empty());\n}\n\nTEST_F(ClusterConfigTest, InvalidConfigMigrationsWithoutIP) {\n  auto config = ClusterConfig::CreateFromConfig(\"id0\", R\"json(\n  [\n    {\n      \"slot_ranges\": [ { \"start\": 0, \"end\": 8000 } ],\n      \"master\": { \"id\": \"id0\", \"ip\": \"localhost\", \"port\": 3000 },\n      \"replicas\": [],\n      \"migrations\": [{ \"slot_ranges\": [ { \"start\": 7000, \"end\": 8000 } ]\n                     , \"port\" : 9001, \"node_id\": \"id1\" }]\n    },\n    {\n      \"slot_ranges\": [ { \"start\": 8001, \"end\": 16383 } ],\n      \"master\": { \"id\": \"id1\", \"ip\": \"localhost\", \"port\": 3001 },\n      \"replicas\": []\n    }\n  ])json\");\n\n  EXPECT_EQ(config, nullptr);\n}\n\nTEST_F(ClusterConfigTest, SlotSetAPI) {\n  {\n    SlotSet ss(false);\n    EXPECT_EQ(ss.ToSlotRanges(), SlotRanges());\n    EXPECT_FALSE(ss.All());\n    EXPECT_TRUE(ss.Empty());\n  }\n  {\n    SlotSet ss(true);\n    EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, SlotRange::kMaxSlotId}}));\n    EXPECT_TRUE(ss.All());\n    EXPECT_FALSE(ss.Empty());\n  }\n  {\n    SlotSet ss(SlotRanges({{0, 1000}, {1001, 2000}}));\n    EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({SlotRange{0, 2000}}));\n    EXPECT_EQ(ss.Count(), 2001);\n\n    for (uint16_t i = 0; i < 2000; ++i) {\n      EXPECT_TRUE(ss.Contains(i));\n    }\n    for (uint16_t i = 2001; i <= SlotRange::kMaxSlotId; ++i) {\n      EXPECT_FALSE(ss.Contains(i));\n    }\n\n    EXPECT_FALSE(ss.All());\n    EXPECT_FALSE(ss.Empty());\n\n    ss.Set(5010, true);\n    EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5010, 5010}}));\n\n    ss.Set(SlotRanges({{5000, 5100}}), true);\n    EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5000, 5100}}));\n\n    ss.Set(5050, false);\n    EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5000, 5049}, {5051, 5100}}));\n\n    ss.Set(5500, false);\n    EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5000, 5049}, {5051, 5100}}));\n\n    ss.Set(SlotRanges({{5090, 5100}}), false);\n    EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5000, 5049}, {5051, 5089}}));\n\n    SlotSet ss1(SlotRanges({{1001, 2000}}));\n\n    EXPECT_EQ(ss.GetRemovedSlots(ss1).ToSlotRanges(),\n              SlotRanges({{0, 1000}, {5000, 5049}, {5051, 5089}}));\n    EXPECT_EQ(ss1.GetRemovedSlots(ss).ToSlotRanges(), SlotRanges());\n  }\n}\n\nTEST_F(ClusterConfigTest, ConfigComparison) {\n  auto config1 = ClusterConfig::CreateFromConfig(\"id0\", R\"json(\n  [\n    {\n      \"slot_ranges\": [ { \"start\": 0, \"end\": 8000 } ],\n      \"master\": { \"id\": \"id0\", \"ip\": \"localhost\", \"port\": 3000 },\n      \"replicas\": [],\n      \"migrations\": [{ \"slot_ranges\": [ { \"start\": 7000, \"end\": 8000 } ]\n                     , \"ip\": \"127.0.0.1\", \"port\" : 9001, \"node_id\": \"id1\" }]\n    },\n    {\n      \"slot_ranges\": [ { \"start\": 8001, \"end\": 16383 } ],\n      \"master\": { \"id\": \"id1\", \"ip\": \"localhost\", \"port\": 3001 },\n      \"replicas\": []\n    }\n  ])json\");\n\n  EXPECT_EQ(config1->GetConfig(), config1->GetConfig());\n\n  auto config2 = ClusterConfig::CreateFromConfig(\"id0\", R\"json(\n  [\n    {\n      \"slot_ranges\": [ { \"start\": 0, \"end\": 16383 } ],\n      \"master\": { \"id\": \"id0\", \"ip\": \"localhost\", \"port\": 3000 },\n      \"replicas\": [],\n      \"migrations\": [{ \"slot_ranges\": [ { \"start\": 7000, \"end\": 8000 } ]\n                     , \"ip\": \"127.0.0.1\", \"port\" : 9001, \"node_id\": \"id1\" }]\n    }\n  ])json\");\n  EXPECT_NE(config1->GetConfig(), config2->GetConfig());\n  EXPECT_EQ(config2->GetConfig(), config2->GetConfig());\n\n  auto config3 = ClusterConfig::CreateFromConfig(\"id0\", R\"json(\n  [\n    {\n      \"slot_ranges\": [ { \"start\": 0, \"end\": 8000 } ],\n      \"master\": { \"id\": \"id0\", \"ip\": \"localhost\", \"port\": 3000 },\n      \"replicas\": [],\n      \"migrations\": [{ \"slot_ranges\": [ { \"start\": 7000, \"end\": 8000 } ]\n                     , \"ip\": \"127.0.0.1\", \"port\" : 9002, \"node_id\": \"id1\" }]\n    },\n    {\n      \"slot_ranges\": [ { \"start\": 8001, \"end\": 16383 } ],\n      \"master\": { \"id\": \"id1\", \"ip\": \"localhost\", \"port\": 3001 },\n      \"replicas\": []\n    }\n  ])json\");\n  EXPECT_NE(config1->GetConfig(), config3->GetConfig());\n  EXPECT_NE(config2->GetConfig(), config3->GetConfig());\n  EXPECT_EQ(config3->GetConfig(), config3->GetConfig());\n\n  auto config4 = ClusterConfig::CreateFromConfig(\"id0\", R\"json(\n  [\n    {\n      \"slot_ranges\": [ { \"start\": 0, \"end\": 8000 } ],\n      \"master\": { \"id\": \"id0\", \"ip\": \"localhost\", \"port\": 3000 },\n      \"replicas\": [],\n      \"migrations\": [{ \"slot_ranges\": [ { \"start\": 7000, \"end\": 8000 } ]\n                     , \"ip\": \"127.0.0.1\", \"port\" : 9001, \"node_id\": \"id2\" }]\n    },\n    {\n      \"slot_ranges\": [ { \"start\": 8001, \"end\": 16383 } ],\n      \"master\": { \"id\": \"id1\", \"ip\": \"localhost\", \"port\": 3001 },\n      \"replicas\": []\n    }\n  ])json\");\n\n  EXPECT_NE(config1->GetConfig(), config4->GetConfig());\n  EXPECT_NE(config2->GetConfig(), config4->GetConfig());\n  EXPECT_NE(config3->GetConfig(), config4->GetConfig());\n  EXPECT_EQ(config4->GetConfig(), config4->GetConfig());\n\n  auto config5 = ClusterConfig::CreateFromConfig(\"id0\", R\"json(\n  [\n    {\n      \"slot_ranges\": [ { \"start\": 0, \"end\": 8000 } ],\n      \"master\": { \"id\": \"id2\", \"ip\": \"localhost\", \"port\": 3000 },\n      \"replicas\": [],\n      \"migrations\": [{ \"slot_ranges\": [ { \"start\": 7000, \"end\": 8000 } ]\n                     , \"ip\": \"127.0.0.1\", \"port\" : 9001, \"node_id\": \"id1\" }]\n    },\n    {\n      \"slot_ranges\": [ { \"start\": 8001, \"end\": 16383 } ],\n      \"master\": { \"id\": \"id1\", \"ip\": \"localhost\", \"port\": 3001 },\n      \"replicas\": []\n    }\n  ])json\");\n  EXPECT_NE(config1->GetConfig(), config5->GetConfig());\n  EXPECT_NE(config2->GetConfig(), config5->GetConfig());\n  EXPECT_NE(config3->GetConfig(), config5->GetConfig());\n  EXPECT_NE(config4->GetConfig(), config5->GetConfig());\n  EXPECT_EQ(config5->GetConfig(), config5->GetConfig());\n}\n\nTEST_F(ClusterConfigTest, NodesHealth) {\n  auto config1 = ClusterConfig::CreateFromConfig(\"id0\", R\"json(\n  [\n    {\n      \"slot_ranges\": [ { \"start\": 0, \"end\": 16383 } ],\n      \"master\": { \"id\": \"id0\", \"ip\": \"localhost\", \"port\": 3000, \"health\" : \"online\" },\n      \"replicas\": [{ \"id\": \"id1\", \"ip\": \"localhost\", \"port\": 3001, \"health\" : \"loading\" },\n                   { \"id\": \"id2\", \"ip\": \"localhost\", \"port\": 3002, \"health\" : \"fail\" }],\n      \"migrations\": [{ \"slot_ranges\": [ { \"start\": 7000, \"end\": 8000 } ]\n                     , \"ip\": \"127.0.0.1\", \"port\" : 9001, \"node_id\": \"id1\" }]\n    }\n\n  ])json\");\n\n  EXPECT_EQ(config1->GetConfig().begin()->master.health, NodeHealth::ONLINE);\n  EXPECT_EQ(config1->GetConfig().begin()->replicas.front().health, NodeHealth::LOADING);\n  EXPECT_EQ(config1->GetConfig().begin()->replicas.back().health, NodeHealth::FAIL);\n}\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/cluster_defs.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"cluster_defs.h\"\n\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_join.h>\n\n#include \"base/logging.h\"\n#include \"cluster_config.h\"\n#include \"facade/error.h\"\n#include \"slot_set.h\"\n\nusing namespace std;\n\nnamespace dfly::cluster {\nstd::string SlotRange::ToString() const {\n  return absl::StrCat(\"[\", start, \", \", end, \"]\");\n}\n\nSlotRanges::SlotRanges(std::vector<SlotRange> ranges) : ranges_(std::move(ranges)) {\n  std::sort(ranges_.begin(), ranges_.end());\n}\n\nvoid SlotRanges::Merge(const SlotRanges& sr) {\n  ranges_.reserve(ranges_.size() + sr.Size());\n  for (const auto& r : sr) {\n    ranges_.push_back(r);\n  }\n  std::sort(ranges_.begin(), ranges_.end());\n}\n\nstd::string SlotRanges::ToString() const {\n  return absl::StrJoin(ranges_, \", \", [](std::string* out, SlotRange range) {\n    absl::StrAppend(out, range.ToString());\n  });\n}\n\nstd::string MigrationInfo::ToString() const {\n  return absl::StrCat(node_info.id, \",\", node_info.ip, \":\", node_info.port, \" (\",\n                      slot_ranges.ToString(), \")\");\n}\n\nbool ClusterShardInfo::operator==(const ClusterShardInfo& r) const {\n  if (slot_ranges == r.slot_ranges && master == r.master) {\n    auto lreplicas = replicas;\n    auto lmigrations = migrations;\n    auto rreplicas = r.replicas;\n    auto rmigrations = r.migrations;\n    std::sort(lreplicas.begin(), lreplicas.end());\n    std::sort(lmigrations.begin(), lmigrations.end());\n    std::sort(rreplicas.begin(), rreplicas.end());\n    std::sort(rmigrations.begin(), rmigrations.end());\n    return lreplicas == rreplicas && lmigrations == rmigrations;\n  }\n  return false;\n}\n\nClusterShardInfos::ClusterShardInfos(std::vector<ClusterShardInfo> infos)\n    : infos_(std::move(infos)) {\n  std::sort(infos_.begin(), infos_.end());\n}\n\nfacade::ErrorReply SlotOwnershipError(SlotId slot_id) {\n  const auto cluster_config = ClusterConfig::Current();\n  if (!cluster_config)\n    return facade::ErrorReply{facade::kClusterNotConfigured};\n\n  if (!cluster_config->IsMySlot(slot_id)) {\n    // See more details here: https://redis.io/docs/reference/cluster-spec/#moved-redirection\n    cluster::ClusterNodeInfo master = cluster_config->GetMasterNodeForSlot(slot_id);\n    return facade::ErrorReply{absl::StrCat(\"-MOVED \", slot_id, \" \", master.ip, \":\", master.port),\n                              \"MOVED\"};\n  }\n  return facade::ErrorReply{facade::OpStatus::OK};\n}\n\nstd::string_view ToString(NodeHealth nh) {\n  switch (nh) {\n    case NodeHealth::FAIL:\n      return \"fail\";\n    case NodeHealth::LOADING:\n      return \"loading\";\n    case NodeHealth::ONLINE:\n      return \"online\";\n    case NodeHealth::HIDDEN:\n      DCHECK(false);  // shouldn't be used\n      return \"hidden\";\n  }\n  DCHECK(false);\n  return \"undefined_health\";\n}\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/cluster_defs.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <optional>\n#include <string>\n#include <string_view>\n#include <vector>\n\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/facade_types.h\"\n#include \"server/cluster_support.h\"\n\nnamespace dfly::cluster {\n\n// A SlotId validated to be within [0, kMaxSlotNum], usable directly with CmdArgParser::Next().\nusing ParsedSlotId = facade::FInt<SlotId{0}, SlotId{kMaxSlotNum}>;\n\nstruct SlotRange {\n  static constexpr SlotId kMaxSlotId = 0x3FFF;\n  SlotId start = 0;\n  SlotId end = 0;\n\n  bool operator==(const SlotRange& r) const noexcept {\n    return start == r.start && end == r.end;\n  }\n\n  bool operator<(const SlotRange& r) const noexcept {\n    return start < r.start || (start == r.start && end < r.end);\n  }\n\n  bool IsValid() const noexcept {\n    return start <= end && start <= kMaxSlotId && end <= kMaxSlotId;\n  }\n\n  bool Contains(SlotId id) const noexcept {\n    return id >= start && id <= end;\n  }\n\n  std::string ToString() const;\n};\n\nclass SlotRanges {\n public:\n  SlotRanges() = default;\n  explicit SlotRanges(std::vector<SlotRange> ranges);\n\n  bool Contains(SlotId id) const noexcept {\n    for (const auto& sr : ranges_) {\n      if (sr.Contains(id))\n        return true;\n    }\n    return false;\n  }\n\n  size_t Size() const noexcept {\n    return ranges_.size();\n  }\n\n  bool Empty() const noexcept {\n    return ranges_.empty();\n  }\n\n  void Merge(const SlotRanges& sr);\n\n  bool operator==(const SlotRanges& r) const noexcept {\n    return ranges_ == r.ranges_;\n  }\n\n  std::string ToString() const;\n\n  auto begin() const noexcept {\n    return ranges_.cbegin();\n  }\n\n  auto end() const noexcept {\n    return ranges_.cend();\n  }\n\n private:\n  std::vector<SlotRange> ranges_;\n};\n\nstruct ClusterNodeInfo {\n  std::string id;\n  std::string ip;\n  uint16_t port = 0;\n\n  bool operator==(const ClusterNodeInfo& r) const noexcept {\n    return port == r.port && ip == r.ip && id == r.id;\n  }\n\n  bool operator<(const ClusterNodeInfo& r) const noexcept {\n    return id < r.id;\n  }\n};\n\nenum class NodeHealth : std::uint8_t { FAIL, LOADING, ONLINE, HIDDEN };\nstd::string_view ToString(NodeHealth nh);\n\nstruct ClusterExtendedNodeInfo : ClusterNodeInfo {\n  NodeHealth health = NodeHealth::ONLINE;\n  bool operator==(const ClusterExtendedNodeInfo& r) const noexcept {\n    return health == r.health && ClusterNodeInfo::operator==(r);\n  }\n};\n\nstruct MigrationInfo {\n  SlotRanges slot_ranges;\n  ClusterNodeInfo node_info;\n\n  bool operator==(const MigrationInfo& r) const noexcept {\n    return node_info == r.node_info && slot_ranges == r.slot_ranges;\n  }\n\n  bool operator<(const MigrationInfo& r) const noexcept {\n    return node_info < r.node_info;\n  }\n\n  std::string ToString() const;\n};\n\nstruct ClusterShardInfo {\n  SlotRanges slot_ranges;\n  ClusterExtendedNodeInfo master;\n  std::vector<ClusterExtendedNodeInfo> replicas;\n  std::vector<MigrationInfo> migrations;\n\n  bool operator==(const ClusterShardInfo& r) const;\n\n  bool operator<(const ClusterShardInfo& r) const noexcept {\n    return master < r.master;\n  }\n};\n\nclass ClusterShardInfos {\n public:\n  ClusterShardInfos() = default;\n  ClusterShardInfos(std::vector<ClusterShardInfo> infos);\n  ClusterShardInfos(ClusterShardInfo info) : infos_({info}) {\n  }\n\n  auto begin() const noexcept {\n    return infos_.cbegin();\n  }\n\n  auto end() const noexcept {\n    return infos_.cend();\n  }\n\n  auto begin() noexcept {\n    return infos_.begin();\n  }\n\n  auto end() noexcept {\n    return infos_.end();\n  }\n\n  auto size() const noexcept {\n    return infos_.size();\n  }\n\n  bool empty() const noexcept {\n    return infos_.empty();\n  }\n\n  bool operator==(const ClusterShardInfos& r) const noexcept {\n    return infos_ == r.infos_;\n  }\n\n  bool operator!=(const ClusterShardInfos& r) const noexcept {\n    return infos_ != r.infos_;\n  }\n\n  auto Unwrap() const {\n    return infos_;\n  }\n\n private:\n  std::vector<ClusterShardInfo> infos_;\n};\n\n// MigrationState constants are ordered in state changing order\nenum class MigrationState : uint8_t { C_CONNECTING, C_SYNC, C_ERROR, C_FINISHED, C_FATAL };\n\n// Errors during slot migration\nstatic constexpr std::string_view kUnknownMigration = \"UNKNOWN_MIGRATION\";\nstatic constexpr std::string_view kIncomingMigrationOOM = \"INCOMING_MIGRATION_OOM\";\n\n// return error message if slot doesn't belong to this node\nfacade::ErrorReply SlotOwnershipError(SlotId slot_id);\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/cluster_family.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/cluster/cluster_family.h\"\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/strings/ascii.h>\n#include <absl/strings/str_cat.h>\n\n#include <memory>\n#include <mutex>\n#include <string>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/dragonfly_listener.h\"\n#include \"facade/error.h\"\n#include \"facade/reply_builder.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/channel_store.h\"\n#include \"server/cluster/coordinator.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/dflycmd.h\"\n#include \"server/error.h\"\n#include \"server/journal/journal.h\"\n#include \"server/main_service.h\"\n#include \"server/namespaces.h\"\n#include \"server/server_family.h\"\n#include \"server/server_state.h\"\n#include \"util/fibers/synchronization.h\"\n\nABSL_FLAG(std::string, cluster_announce_ip, \"\",\n          \"IP address that Dragonfly announces to cluster clients\");\n\nABSL_FLAG(std::string, cluster_node_id, \"\",\n          \"ID within a cluster, used for slot assignment. MUST be unique. If empty, uses master \"\n          \"replication ID (random string)\");\n\nABSL_DECLARE_FLAG(int32_t, port);\nABSL_DECLARE_FLAG(uint16_t, announce_port);\nABSL_DECLARE_FLAG(bool, managed_service_info);\n\nnamespace dfly {\nnamespace acl {\nconstexpr uint32_t kCluster = SLOW;\n// Reconsider to maybe more sensible defaults\nconstexpr uint32_t kDflyCluster = ADMIN | SLOW;\nconstexpr uint32_t kReadOnly = FAST | CONNECTION;\nconstexpr uint32_t kReadWrite = FAST | CONNECTION;\nconstexpr uint32_t kDflyMigrate = ADMIN | SLOW | DANGEROUS;\n}  // namespace acl\n}  // namespace dfly\n\nnamespace dfly::cluster {\nnamespace {\n\nusing namespace std;\nusing namespace facade;\nusing namespace util;\nusing Payload = journal::Entry::Payload;\nusing CI = CommandId;\n\nconstexpr char kIdNotFound[] = \"syncid not found\";\n\nconstexpr string_view kClusterDisabled =\n    \"Cluster is disabled. Enabled via passing --cluster_mode=emulated|yes\";\n\n}  // namespace\n\nClusterFamily::ClusterFamily(ServerFamily* server_family) : server_family_(server_family) {\n  CHECK_NOTNULL(server_family_);\n\n  InitializeCluster();\n\n  id_ = absl::GetFlag(FLAGS_cluster_node_id);\n  if (id_.empty()) {\n    id_ = server_family_->master_replid();\n  } else if (IsClusterEmulated()) {\n    LOG(ERROR) << \"Setting --cluster_node_id in emulated mode is unsupported\";\n    exit(1);\n  }\n}\n\nvoid ClusterFamily::Shutdown() {\n  Coordinator::Current().Shutdown();\n  shard_set->pool()->at(0)->Await([this]() ABSL_LOCKS_EXCLUDED(set_config_mu) {\n    PreparedToRemoveOutgoingMigrations outgoing_migrations;  // should be removed without mutex lock\n    {\n      util::fb2::LockGuard lk(set_config_mu);\n      if (!ClusterConfig::Current())\n        return;\n\n      auto empty_config = ClusterConfig::Current()->CloneWithoutMigrations();\n      outgoing_migrations = TakeOutOutgoingMigrations(empty_config, ClusterConfig::Current());\n      RemoveIncomingMigrations(\n          empty_config->GetFinishedIncomingMigrations(ClusterConfig::Current()));\n\n      util::fb2::LockGuard migration_lk(migration_mu_);\n      DCHECK(outgoing_migration_jobs_.empty());\n      DCHECK(incoming_migrations_jobs_.empty());\n    }\n  });\n}\n\nstd::optional<ClusterShardInfos> ClusterFamily::GetShardInfos(ConnectionContext* cntx) const {\n  if (IsClusterEmulated()) {\n    return {GetEmulatedShardInfo(cntx)};\n  }\n\n  if (ClusterConfig::Current() != nullptr) {\n    return ClusterConfig::Current()->GetConfig();\n  }\n  return nullopt;\n}\n\nClusterShardInfo ClusterFamily::GetEmulatedShardInfo(ConnectionContext* cntx) const {\n  ClusterShardInfo info{.slot_ranges = SlotRanges({{.start = 0, .end = kMaxSlotNum}}),\n                        .master = {},\n                        .replicas = {},\n                        .migrations = {}};\n\n  optional<Metrics::ReplicaInfo> repl_info = server_family_->GetReplicaSummary();\n  ServerState& etl = *ServerState::tlocal();\n  if (!repl_info) {\n    DCHECK(etl.is_master);\n    std::string cluster_announce_ip = absl::GetFlag(FLAGS_cluster_announce_ip);\n    std::string preferred_endpoint =\n        cluster_announce_ip.empty() ? cntx->conn()->LocalBindAddress() : cluster_announce_ip;\n    uint16_t cluster_announce_port = absl::GetFlag(FLAGS_announce_port);\n    uint16_t preferred_port = cluster_announce_port == 0\n                                  ? static_cast<uint16_t>(absl::GetFlag(FLAGS_port))\n                                  : cluster_announce_port;\n\n    info.master = {{.id = id_, .ip = preferred_endpoint, .port = preferred_port},\n                   NodeHealth::ONLINE};\n\n    if (cntx->conn()->IsPrivileged() || !absl::GetFlag(FLAGS_managed_service_info)) {\n      for (const auto& replica : server_family_->GetDflyCmd()->GetReplicasRoleInfo()) {\n        info.replicas.push_back({{.id = replica.id,\n                                  .ip = replica.address,\n                                  .port = static_cast<uint16_t>(replica.listening_port)},\n                                 NodeHealth::ONLINE});\n      }\n    }\n  } else {\n    // TODO: We currently don't save the master's ID in the replica\n    info.master = {{.id = \"\", .ip = repl_info->summary.host, .port = repl_info->summary.port},\n                   NodeHealth::ONLINE};\n    info.replicas.push_back({{.id = id_,\n                              .ip = cntx->conn()->LocalBindAddress(),\n                              .port = static_cast<uint16_t>(absl::GetFlag(FLAGS_port))},\n                             NodeHealth::ONLINE});\n  }\n\n  return info;\n}\n\nvoid ClusterFamily::ClusterHelp(SinkReplyBuilder* builder) {\n  string_view help_arr[] = {\n      \"CLUSTER <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n      \"SLOTS\",\n      \"   Return information about slots range mappings. Each range is made of:\",\n      \"   start, end, master and replicas IP addresses, ports and ids.\",\n      \"NODES\",\n      \"   Return cluster configuration seen by node. Output format:\",\n      \"   <id> <ip:port> <flags> <master> <pings> <pongs> <epoch> <link> <slot> ...\",\n      \"INFO\",\n      \"  Return information about the cluster\",\n      \"HELP\",\n      \"    Prints this help.\",\n  };\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  return rb->SendSimpleStrArr(help_arr);\n}\n\nnamespace {\nvoid ClusterShardsImpl(const ClusterShardInfos& config, SinkReplyBuilder* builder) {\n  // For more details https://redis.io/commands/cluster-shards/\n  constexpr unsigned int kEntrySize = 4;\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n\n  auto WriteNode = [&](const ClusterExtendedNodeInfo& node, string_view role) {\n    constexpr unsigned int kNodeSize = 14;\n    rb->StartArray(kNodeSize);\n    rb->SendBulkString(\"id\");\n    rb->SendBulkString(node.id);\n    rb->SendBulkString(\"endpoint\");\n    rb->SendBulkString(node.ip);\n    rb->SendBulkString(\"ip\");\n    rb->SendBulkString(node.ip);\n    rb->SendBulkString(\"port\");\n    rb->SendLong(node.port);\n    rb->SendBulkString(\"role\");\n    rb->SendBulkString(role);\n    rb->SendBulkString(\"replication-offset\");\n    rb->SendLong(0);\n    rb->SendBulkString(\"health\");\n    rb->SendBulkString(ToString(node.health));\n  };\n\n  rb->StartArray(config.size());\n  for (const auto& shard : config) {\n    rb->StartArray(kEntrySize);\n    rb->SendBulkString(\"slots\");\n\n    rb->StartArray(shard.slot_ranges.Size() * 2);\n    for (const auto& slot_range : shard.slot_ranges) {\n      rb->SendLong(slot_range.start);\n      rb->SendLong(slot_range.end);\n    }\n\n    rb->SendBulkString(\"nodes\");\n    rb->StartArray(1 + shard.replicas.size());\n    WriteNode(shard.master, \"master\");\n    for (const auto& replica : shard.replicas) {\n      WriteNode(replica, \"replica\");\n    }\n  }\n}\n}  // namespace\n\nvoid ClusterFamily::ClusterShards(SinkReplyBuilder* builder, ConnectionContext* cntx) {\n  auto config = GetShardInfos(cntx);\n  if (config) {\n    // we need to remove hiden replicas\n    auto shards_info = config->Unwrap();\n    for (auto& shard : shards_info) {\n      auto new_end = std::remove_if(shard.replicas.begin(), shard.replicas.end(),\n                                    [](const auto& r) { return r.health == NodeHealth::HIDDEN; });\n      shard.replicas.erase(new_end, shard.replicas.end());\n    }\n    return ClusterShardsImpl({shards_info}, builder);\n  }\n  return builder->SendError(kClusterNotConfigured);\n}\n\nnamespace {\nvoid ClusterSlotsImpl(ClusterShardInfos config, SinkReplyBuilder* builder) {\n  // For more details https://redis.io/commands/cluster-slots/\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n\n  auto WriteNode = [&](const ClusterNodeInfo& node) {\n    constexpr unsigned int kNodeSize = 3;\n    rb->StartArray(kNodeSize);\n    rb->SendBulkString(node.ip);\n    rb->SendLong(node.port);\n    rb->SendBulkString(node.id);\n  };\n\n  unsigned int slot_ranges = 0;\n\n  // we need to remove hiden and fail replicas\n  auto shards_info = config.Unwrap();\n  for (auto& shard : shards_info) {\n    slot_ranges += shard.slot_ranges.Size();\n    auto new_end = std::remove_if(shard.replicas.begin(), shard.replicas.end(), [](const auto& r) {\n      return r.health == NodeHealth::HIDDEN || r.health == NodeHealth::FAIL ||\n             r.health == NodeHealth::LOADING;\n    });\n    shard.replicas.erase(new_end, shard.replicas.end());\n  }\n\n  config = {shards_info};\n\n  rb->StartArray(slot_ranges);\n  for (const auto& shard : config) {\n    for (const auto& slot_range : shard.slot_ranges) {\n      const unsigned int array_size =\n          /* slot-start, slot-end */ 2 + /* master */ 1 + /* replicas */ shard.replicas.size();\n      rb->StartArray(array_size);\n      rb->SendLong(slot_range.start);\n      rb->SendLong(slot_range.end);\n      WriteNode(shard.master);\n      for (const auto& replica : shard.replicas) {\n        WriteNode(replica);\n      }\n    }\n  }\n}\n}  // namespace\n\nvoid ClusterFamily::ClusterSlots(SinkReplyBuilder* builder, ConnectionContext* cntx) {\n  auto shard_infos = GetShardInfos(cntx);\n  if (shard_infos) {\n    return ClusterSlotsImpl(*shard_infos, builder);\n  }\n  return builder->SendError(kClusterNotConfigured);\n}\n\nnamespace {\nvoid ClusterNodesImpl(const ClusterShardInfos& config, string_view my_id,\n                      SinkReplyBuilder* builder) {\n  // For more details https://redis.io/commands/cluster-nodes/\n\n  string result;\n\n  auto WriteNode = [&](const ClusterExtendedNodeInfo& node, string_view role, string_view master_id,\n                       const SlotRanges& ranges) {\n    absl::StrAppend(&result, node.id, \" \");\n\n    absl::StrAppend(&result, node.ip, \":\", node.port, \"@\", node.port, \" \");\n\n    if (my_id == node.id) {\n      absl::StrAppend(&result, \"myself,\");\n    }\n    absl::StrAppend(&result, role, \" \");\n\n    absl::StrAppend(&result, master_id, \" \");\n\n    absl::StrAppend(&result,\n                    node.health != NodeHealth::FAIL ? \"0 0 0 connected\" : \"0 0 0 disconnected\");\n\n    for (const auto& range : ranges) {\n      absl::StrAppend(&result, \" \", range.start);\n      if (range.start != range.end) {\n        absl::StrAppend(&result, \"-\", range.end);\n      }\n    }\n\n    // Separate lines with only \\n, not \\r\\n, see #2726\n    absl::StrAppend(&result, \"\\n\");\n  };\n\n  for (const auto& shard : config) {\n    WriteNode(shard.master, \"master\", \"-\", shard.slot_ranges);\n    for (const auto& replica : shard.replicas) {\n      // Only the master prints ranges, so we send an empty set for replicas.\n      if (replica.health != NodeHealth::HIDDEN) {\n        WriteNode(replica, \"slave\", shard.master.id, {});\n      }\n    }\n  }\n\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  return rb->SendBulkString(result);\n}\n}  // namespace\n\nvoid ClusterFamily::ClusterNodes(SinkReplyBuilder* builder, ConnectionContext* cntx) {\n  auto shard_infos = GetShardInfos(cntx);\n  if (shard_infos) {\n    return ClusterNodesImpl(*shard_infos, id_, builder);\n  }\n  return builder->SendError(kClusterNotConfigured);\n}\n\nnamespace {\nvoid ClusterInfoImpl(const ClusterShardInfos& config, SinkReplyBuilder* builder) {\n  std::string msg;\n  auto append = [&msg](absl::AlphaNum a1, absl::AlphaNum a2) {\n    // Separate lines with \\r\\n, not \\n, see #2726\n    absl::StrAppend(&msg, a1, \":\", a2, \"\\r\\n\");\n  };\n\n  // Initialize response variables to emulated mode.\n  string_view state = \"ok\"sv;\n  SlotId slots_assigned = kMaxSlotNum + 1;\n  size_t known_nodes = 1;\n  long epoch = 1;\n  size_t cluster_size = 1;\n\n  if (config.empty()) {\n    state = \"fail\"sv;\n    slots_assigned = 0;\n    cluster_size = 0;\n    known_nodes = 0;\n  } else {\n    known_nodes = 0;\n    cluster_size = 0;\n    for (const auto& shard_config : config) {\n      known_nodes += 1;  // For master\n      known_nodes += shard_config.replicas.size();\n\n      if (!shard_config.slot_ranges.Empty()) {\n        ++cluster_size;\n      }\n    }\n  }\n\n  append(\"cluster_state\", state);\n  append(\"cluster_slots_assigned\", slots_assigned);\n  append(\"cluster_slots_ok\", slots_assigned);  // We do not support other failed nodes.\n  append(\"cluster_slots_pfail\", 0);\n  append(\"cluster_slots_fail\", 0);\n  append(\"cluster_known_nodes\", known_nodes);\n  append(\"cluster_size\", cluster_size);\n  append(\"cluster_current_epoch\", epoch);\n  append(\"cluster_my_epoch\", 1);\n  append(\"cluster_stats_messages_ping_sent\", 1);\n  append(\"cluster_stats_messages_pong_sent\", 1);\n  append(\"cluster_stats_messages_sent\", 1);\n  append(\"cluster_stats_messages_ping_received\", 1);\n  append(\"cluster_stats_messages_pong_received\", 1);\n  append(\"cluster_stats_messages_meet_received\", 0);\n  append(\"cluster_stats_messages_received\", 1);\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  rb->SendBulkString(msg);\n}\n}  // namespace\n\nvoid ClusterFamily::ClusterInfo(SinkReplyBuilder* builder, ConnectionContext* cntx) {\n  auto shard_infos = GetShardInfos(cntx);\n  return ClusterInfoImpl(shard_infos.value_or(ClusterShardInfos{}), builder);\n}\n\nvoid ClusterFamily::KeySlot(CmdArgList args, SinkReplyBuilder* builder) {\n  if (args.size() != 2) {\n    return builder->SendError(WrongNumArgsError(\"CLUSTER KEYSLOT\"));\n  }\n\n  SlotId id = dfly::KeySlot(ArgS(args, 1));\n  return builder->SendLong(id);\n}\n\nvoid ClusterFamily::Cluster(CmdArgList args, CommandContext* cmd_cntx) {\n  // In emulated cluster mode, all slots are mapped to the same host, and number of cluster\n  // instances is thus 1.\n  string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n\n  auto* builder = cmd_cntx->rb();\n  if (!IsClusterEnabledOrEmulated()) {\n    return builder->SendError(kClusterDisabled);\n  }\n\n  if (sub_cmd == \"KEYSLOT\") {\n    return KeySlot(args, builder);\n  }\n\n  if (args.size() > 1) {\n    return builder->SendError(WrongNumArgsError(absl::StrCat(\"CLUSTER \", sub_cmd)));\n  }\n\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  if (sub_cmd == \"HELP\") {\n    return ClusterHelp(builder);\n  } else if (sub_cmd == \"MYID\") {\n    return ClusterMyId(builder);\n  } else if (sub_cmd == \"SHARDS\") {\n    return ClusterShards(builder, cntx);\n  } else if (sub_cmd == \"SLOTS\") {\n    return ClusterSlots(builder, cntx);\n  } else if (sub_cmd == \"NODES\") {\n    return ClusterNodes(builder, cntx);\n  } else if (sub_cmd == \"INFO\") {\n    return ClusterInfo(builder, cntx);\n  } else {\n    return builder->SendError(facade::UnknownSubCmd(sub_cmd, \"CLUSTER\"), facade::kSyntaxErrType);\n  }\n}\n\nvoid ClusterFamily::ReadOnly(CmdArgList args, CommandContext* cmd_cntx) {\n  cmd_cntx->rb()->SendOk();\n}\n\nvoid ClusterFamily::ReadWrite(CmdArgList args, CommandContext* cmd_cntx) {\n  if (!IsClusterEmulated()) {\n    return cmd_cntx->SendError(kClusterDisabled);\n  }\n  cmd_cntx->rb()->SendOk();\n}\n\nvoid ClusterFamily::DflyCluster(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* builder = cmd_cntx->rb();\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  if (!(IsClusterEnabled() || (IsClusterEmulated() && cntx->journal_emulated))) {\n    return builder->SendError(\"Cluster is disabled. Use --cluster_mode=yes to enable.\");\n  }\n\n  string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n  args.remove_prefix(1);  // remove subcommand name\n  if (sub_cmd == \"GETSLOTINFO\") {\n    return DflyClusterGetSlotInfo(args, cmd_cntx);\n  } else if (sub_cmd == \"CONFIG\") {\n    return DflyClusterConfig(args, cmd_cntx);\n  } else if (sub_cmd == \"FLUSHSLOTS\") {\n    return DflyClusterFlushSlots(args, cmd_cntx);\n  } else if (sub_cmd == \"SLOT-MIGRATION-STATUS\") {\n    return DflySlotMigrationStatus(args, cmd_cntx);\n  }\n\n  return builder->SendError(UnknownSubCmd(sub_cmd, \"DFLYCLUSTER\"), kSyntaxErrType);\n}\n\nvoid ClusterFamily::ClusterMyId(SinkReplyBuilder* builder) {\n  builder->SendSimpleString(id_);\n}\n\nnamespace {\n\nvoid DeleteSlots(const SlotRanges& slots_ranges) {\n  if (slots_ranges.Empty()) {\n    return;\n  }\n\n  auto cb = [&](auto*) {\n    EngineShard* shard = EngineShard::tlocal();\n    if (shard == nullptr)\n      return;\n\n    namespaces->GetDefaultNamespace().GetDbSlice(shard->shard_id()).FlushSlots(slots_ranges);\n  };\n  shard_set->pool()->AwaitFiberOnAll(std::move(cb));\n\n  auto* channel_store = ServerState::tlocal()->channel_store();\n  auto deleted = SlotSet(slots_ranges);\n  channel_store->UnsubscribeAfterClusterSlotMigration(deleted);\n}\n\nvoid WriteFlushSlotsToJournal(const SlotRanges& slot_ranges) {\n  if (slot_ranges.Empty()) {\n    return;\n  }\n\n  // Build args\n  vector<string> args;\n  args.reserve(slot_ranges.Size() + 1);\n  args.push_back(\"FLUSHSLOTS\");\n  for (SlotRange range : slot_ranges) {\n    args.push_back(absl::StrCat(range.start));\n    args.push_back(absl::StrCat(range.end));\n  }\n\n  // Build view\n  vector<string_view> args_view(args.size());\n  for (size_t i = 0; i < args.size(); ++i) {\n    args_view[i] = args[i];\n  }\n\n  auto cb = [&](auto*) {\n    EngineShard* shard = EngineShard::tlocal();\n    if (shard == nullptr) {\n      return;\n    }\n\n    if (!shard->journal()) {\n      return;\n    }\n\n    // Send journal entry\n    // TODO: Break slot migration upon FLUSHSLOTS\n    journal::RecordEntry(/* txid= */ 0, journal::Op::COMMAND, /* dbid= */ 0, nullopt,\n                         Payload(\"DFLYCLUSTER\", args_view));\n  };\n  shard_set->pool()->AwaitFiberOnAll(std::move(cb));\n}\n}  // namespace\n\nvoid ClusterFamily::DflyClusterConfig(CmdArgList args, CommandContext* cmd_cntx) {\n  if (args.size() != 1) {\n    return cmd_cntx->SendError(WrongNumArgsError(\"DFLYCLUSTER CONFIG\"));\n  }\n\n  string_view json_str = ArgS(args, 0);\n  shared_ptr<ClusterConfig> new_config = ClusterConfig::CreateFromConfig(id_, json_str);\n  if (new_config == nullptr) {\n    LOG(WARNING) << \"Can't set cluster config\";\n    return cmd_cntx->SendError(\"Invalid cluster configuration.\");\n  } else if (ClusterConfig::Current() &&\n             ClusterConfig::Current()->GetConfig() == new_config->GetConfig()) {\n    return cmd_cntx->SendOk();\n  }\n\n  PreparedToRemoveOutgoingMigrations outgoing_migrations;  // should be removed without mutex lock\n\n  {\n    VLOG(1) << \"Setting new cluster config: \" << json_str;\n    util::fb2::LockGuard gu(set_config_mu);\n\n    outgoing_migrations = TakeOutOutgoingMigrations(new_config, ClusterConfig::Current());\n    RemoveIncomingMigrations(new_config->GetFinishedIncomingMigrations(ClusterConfig::Current()));\n\n    SlotRanges enable_slots, disable_slots;\n\n    {\n      util::fb2::LockGuard lk(migration_mu_);\n      // If migration state is changed simultaneously, the changes to config will be applied after\n      // set_config_mu is unlocked and even if we apply the same changes 2 times it's not a problem\n      for (const auto& m : incoming_migrations_jobs_) {\n        if (m->GetState() == MigrationState::C_FINISHED) {\n          enable_slots.Merge(m->GetSlots());\n        }\n      }\n      for (const auto& m : outgoing_migration_jobs_) {\n        if (m->GetState() == MigrationState::C_FINISHED) {\n          disable_slots.Merge(m->GetSlots());\n        }\n      }\n    }\n\n    new_config = new_config->CloneWithChanges(enable_slots, disable_slots);\n\n    StartNewSlotMigrations(*new_config);\n\n    SlotSet before =\n        ClusterConfig::Current() ? ClusterConfig::Current()->GetOwnedSlots() : SlotSet(true);\n\n    auto* conn = cmd_cntx->conn();\n    // Ignore blocked commands because we filter them with CancelBlockingOnThread\n    DispatchTracker tracker{server_family_->GetNonPriviligedListeners(), conn,\n                            true /* ignore paused */, true /* ignore blocked */};\n\n    auto blocking_filter = [&new_config](ArgSlice keys) {\n      bool moved =\n          any_of(keys.begin(), keys.end(), [&](auto k) { return !new_config->IsMySlot(k); });\n      return moved ? OpStatus::KEY_MOVED : OpStatus::OK;\n    };\n\n    auto cb = [this, &tracker, &new_config, blocking_filter](util::ProactorBase*) {\n      server_family_->CancelBlockingOnThread(blocking_filter);\n      ClusterConfig::SetCurrent(new_config);\n      tracker.TrackOnThread();\n    };\n\n    server_family_->service().proactor_pool().AwaitFiberOnAll(std::move(cb));\n    DCHECK(ClusterConfig::Current() != nullptr);\n\n    if (!tracker.Wait(absl::Seconds(1))) {\n      LOG(WARNING) << \"Cluster config change timed for: \" << MyID();\n    }\n\n    SlotSet after = ClusterConfig::Current()->GetOwnedSlots();\n    if (ServerState::tlocal()->is_master) {\n      auto deleted_slots = (before.GetRemovedSlots(after)).ToSlotRanges();\n      deleted_slots.Merge(outgoing_migrations.slot_ranges);\n      DeleteSlots(deleted_slots);\n      LOG_IF(INFO, !deleted_slots.Empty())\n          << \"Flushing newly unowned slots: \" << deleted_slots.ToString();\n      WriteFlushSlotsToJournal(deleted_slots);\n    }\n  }\n\n  return cmd_cntx->SendOk();\n}\n\nvoid ClusterFamily::DflyClusterGetSlotInfo(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser(args);\n  parser.ExpectTag(\"SLOTS\");\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  vector<std::pair<SlotId, SlotStats>> slots_stats;\n  while (parser.HasNext()) {\n    auto arg = parser.Next<std::string_view>();\n    // Check if argument contains a dash for range notation (e.g., \"1-100\")\n    size_t dash_pos = arg.find('-');\n    if (dash_pos != std::string_view::npos && dash_pos > 0) {\n      // Parse as range: start-end\n      std::string_view start_str = arg.substr(0, dash_pos);\n      std::string_view end_str = arg.substr(dash_pos + 1);\n\n      uint32_t start_slot, end_slot;\n      if (!absl::SimpleAtoi(start_str, &start_slot) || !absl::SimpleAtoi(end_str, &end_slot)) {\n        return cmd_cntx->SendError(\"Invalid slot range format\");\n      }\n\n      if (start_slot > kMaxSlotNum || end_slot > kMaxSlotNum) {\n        return cmd_cntx->SendError(\"Invalid slot id\");\n      }\n\n      // Swap if range is specified in reverse order (e.g., \"100-0\")\n      if (start_slot > end_slot) {\n        std::swap(start_slot, end_slot);\n      }\n\n      for (uint32_t sid = start_slot; sid <= end_slot; ++sid) {\n        slots_stats.emplace_back(sid, SlotStats{});\n      }\n    } else {\n      // Parse as single slot id\n      uint32_t sid;\n      if (!absl::SimpleAtoi(arg, &sid)) {\n        return cmd_cntx->SendError(kInvalidIntErr);\n      }\n      if (sid > kMaxSlotNum) {\n        return cmd_cntx->SendError(\"Invalid slot id\");\n      }\n      slots_stats.emplace_back(sid, SlotStats{});\n    }\n  }\n\n  if (slots_stats.empty()) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  fb2::Mutex mu;\n\n  auto cb = [&](auto*) ABSL_LOCKS_EXCLUDED(mu) {\n    EngineShard* shard = EngineShard::tlocal();\n    if (shard == nullptr)\n      return;\n\n    util::fb2::LockGuard lk(mu);\n    for (auto& [slot, data] : slots_stats) {\n      data += namespaces->GetDefaultNamespace().GetDbSlice(shard->shard_id()).GetSlotStats(slot);\n    }\n  };\n\n  shard_set->pool()->AwaitFiberOnAll(std::move(cb));\n\n  rb->StartArray(slots_stats.size());\n\n  for (const auto& slot_data : slots_stats) {\n    rb->StartArray(9);\n    rb->SendLong(slot_data.first);\n    rb->SendBulkString(\"key_count\");\n    rb->SendLong(slot_data.second.key_count);\n    rb->SendBulkString(\"total_reads\");\n    rb->SendLong(slot_data.second.total_reads);\n    rb->SendBulkString(\"total_writes\");\n    rb->SendLong(slot_data.second.total_writes);\n\n    // Account for both the values and the table space of the entries.\n    // Each entry is comprised from CompactObj for key and CompactObj for value.\n    // Sometimes the values are very small and table space becomes significant.\n    rb->SendBulkString(\"memory_bytes\");\n    rb->SendLong(slot_data.second.memory_bytes +\n                 slot_data.second.key_count * sizeof(CompactObj) * 2);\n  }\n}\n\nvoid ClusterFamily::DflyClusterFlushSlots(CmdArgList args, CommandContext* cmd_cntx) {\n  LOG(INFO) << \"Got DFLYCLUSTER FLUSHSLOTS \" << args;\n\n  std::vector<SlotRange> slot_ranges;\n\n  CmdArgParser parser(args);\n  do {\n    auto [slot_start, slot_end] = parser.Next<ParsedSlotId, ParsedSlotId>();\n    RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n    if (slot_start > slot_end) {\n      return cmd_cntx->SendError(\"Invalid slot range\");\n    }\n    slot_ranges.emplace_back(SlotRange{slot_start, slot_end});\n  } while (parser.HasNext());\n\n  DeleteSlots(SlotRanges(std::move(slot_ranges)));\n\n  return cmd_cntx->SendOk();\n}\n\nvoid ClusterFamily::StartNewSlotMigrations(const ClusterConfig& new_config) {\n  // TODO Add validating and error processing\n  auto out_migrations = new_config.GetNewOutgoingMigrations(ClusterConfig::Current());\n  auto in_migrations = new_config.GetNewIncomingMigrations(ClusterConfig::Current());\n\n  util::fb2::LockGuard lk(migration_mu_);\n\n  for (auto& m : out_migrations) {\n    auto migration = make_shared<OutgoingMigration>(std::move(m), this, server_family_);\n    outgoing_migration_jobs_.emplace_back(migration);\n    migration->Start();\n  }\n\n  for (auto& m : in_migrations) {\n    auto migration = make_shared<IncomingSlotMigration>(m.node_info.id, &server_family_->service(),\n                                                        m.slot_ranges);\n    incoming_migrations_jobs_.emplace_back(migration);\n  }\n}\n\nstatic string_view StateToStr(MigrationState state) {\n  switch (state) {\n    case MigrationState::C_CONNECTING:\n      return \"CONNECTING\"sv;\n    case MigrationState::C_SYNC:\n      return \"SYNC\"sv;\n    case MigrationState::C_ERROR:\n      return \"ERROR\"sv;\n    case MigrationState::C_FINISHED:\n      return \"FINISHED\"sv;\n    case MigrationState::C_FATAL:\n      return \"FATAL\"sv;\n  }\n  DCHECK(false) << \"Unknown State value \" << static_cast<underlying_type_t<MigrationState>>(state);\n  return \"UNDEFINED_STATE\"sv;\n}\n\nvoid ClusterFamily::DflySlotMigrationStatus(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser(args);\n\n  util::fb2::LockGuard lk(migration_mu_);\n\n  string_view node_id;\n  if (parser.HasNext()) {\n    node_id = parser.Next<std::string_view>();\n    RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n  }\n\n  struct Reply {\n    string_view direction;\n    string node_id;\n    string_view state;\n    size_t keys_number;\n    string error;\n  };\n  vector<Reply> reply;\n  reply.reserve(incoming_migrations_jobs_.size() + outgoing_migration_jobs_.size());\n\n  auto append_answer = [&reply](string_view direction, string node_id, string_view filter,\n                                MigrationState state, size_t keys_number, string error) {\n    if (filter.empty() || filter == node_id) {\n      error = error.empty() ? \"0\" : error;\n      reply.emplace_back(\n          Reply{direction, std::move(node_id), StateToStr(state), keys_number, std::move(error)});\n    }\n  };\n\n  for (const auto& m : incoming_migrations_jobs_) {\n    append_answer(\"in\", m->GetSourceID(), node_id, m->GetState(), m->GetKeyCount(),\n                  m->GetErrorStr());\n  }\n  for (const auto& m : outgoing_migration_jobs_) {\n    append_answer(\"out\", m->GetMigrationInfo().node_info.id, node_id, m->GetState(),\n                  m->GetKeyCount(), m->GetErrorStr());\n  }\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->StartArray(reply.size());\n  for (const auto& r : reply) {\n    rb->StartArray(5);\n    rb->SendBulkString(r.direction);\n    rb->SendBulkString(r.node_id);\n    rb->SendBulkString(r.state);\n    rb->SendLong(r.keys_number);\n    rb->SendBulkString(r.error);\n  }\n}\n\nvoid ClusterFamily::DflyMigrate(CmdArgList args, CommandContext* cmd_cntx) {\n  string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n\n  args.remove_prefix(1);\n\n  if (sub_cmd == \"INIT\") {\n    InitMigration(args, cmd_cntx);\n  } else if (sub_cmd == \"FLOW\") {\n    DflyMigrateFlow(args, cmd_cntx);\n  } else if (sub_cmd == \"ACK\") {\n    DflyMigrateAck(args, cmd_cntx);\n  } else {\n    cmd_cntx->SendError(facade::UnknownSubCmd(sub_cmd, \"DFLYMIGRATE\"), facade::kSyntaxErrType);\n  }\n}\n\nstd::shared_ptr<IncomingSlotMigration> ClusterFamily::GetIncomingMigration(\n    std::string_view source_id) {\n  util::fb2::LockGuard lk(migration_mu_);\n  for (const auto& mj : incoming_migrations_jobs_) {\n    if (mj->GetSourceID() == source_id) {\n      return mj;\n    }\n  }\n  return nullptr;\n}\n\nClusterFamily::PreparedToRemoveOutgoingMigrations::~PreparedToRemoveOutgoingMigrations() = default;\n\n[[nodiscard]] ClusterFamily::PreparedToRemoveOutgoingMigrations\nClusterFamily::TakeOutOutgoingMigrations(shared_ptr<ClusterConfig> new_config,\n                                         shared_ptr<ClusterConfig> old_config) {\n  auto migrations = new_config->GetFinishedOutgoingMigrations(old_config);\n  util::fb2::LockGuard lk(migration_mu_);\n  SlotRanges removed_slots;\n  PreparedToRemoveOutgoingMigrations res;\n  for (const auto& m : migrations) {\n    auto it = std::find_if(outgoing_migration_jobs_.begin(), outgoing_migration_jobs_.end(),\n                           [&m](const auto& om) {\n                             // we can have only one migration per target-source pair\n                             return m.node_info.id == om->GetMigrationInfo().node_info.id;\n                           });\n    DCHECK(it != outgoing_migration_jobs_.end());\n    DCHECK(it->get() != nullptr);\n    OutgoingMigration& migration = *it->get();\n    const auto& slots = migration.GetSlots();\n    removed_slots.Merge(slots);\n    LOG(INFO) << \"Outgoing migration cancelled: slots \" << slots.ToString() << \" to \"\n              << migration.GetHostIp() << \":\" << migration.GetPort();\n    migration.Finish();\n    res.migrations.push_back(std::move(*it));\n    outgoing_migration_jobs_.erase(it);\n  }\n\n  // Flush non-owned migrations\n  SlotSet migration_slots(removed_slots);\n  res.slot_ranges = migration_slots.GetRemovedSlots(new_config->GetOwnedSlots()).ToSlotRanges();\n\n  // Flushing of removed slots is done outside this function.\n  return res;\n}\n\nnamespace {\n\n// returns removed incoming migration\nbool RemoveIncomingMigrationImpl(std::vector<std::shared_ptr<IncomingSlotMigration>>& jobs,\n                                 string_view source_id) {\n  auto it = std::find_if(jobs.begin(), jobs.end(), [source_id](const auto& im) {\n    // we can have only one migration per target-source pair\n    return source_id == im->GetSourceID();\n  });\n  if (it == jobs.end()) {\n    return false;\n  }\n  DCHECK(it->get() != nullptr);\n  std::shared_ptr<IncomingSlotMigration> migration = *it;\n\n  // Flush non-owned migrations\n  SlotSet migration_slots(migration->GetSlots());\n  SlotSet removed = migration_slots.GetRemovedSlots(ClusterConfig::Current()->GetOwnedSlots());\n\n  migration->Stop();\n  // all migration fibers has migration shared_ptr so the object can be removed later\n  jobs.erase(it);\n\n  // TODO make it outside in one run with other slots that should be flushed\n  if (!removed.Empty()) {\n    auto removed_ranges = removed.ToSlotRanges();\n    LOG_IF(WARNING, migration->GetState() == MigrationState::C_FINISHED)\n        << \"Flushing slots of removed FINISHED migration \" << migration->GetSourceID()\n        << \", slots: \" << removed_ranges.ToString();\n    DeleteSlots(removed_ranges);\n  }\n\n  return true;\n}\n}  // namespace\n\nvoid ClusterFamily::RemoveIncomingMigrations(const std::vector<MigrationInfo>& migrations) {\n  util::fb2::LockGuard lk(migration_mu_);\n  for (const auto& m : migrations) {\n    RemoveIncomingMigrationImpl(incoming_migrations_jobs_, m.node_info.id);\n    VLOG(1) << \"Migration was canceled from: \" << m.node_info.id;\n  }\n}\n\nvoid ClusterFamily::InitMigration(CmdArgList args, CommandContext* cmd_cntx) {\n  VLOG(1) << \"Create incoming migration, args: \" << args;\n  CmdArgParser parser{args};\n\n  auto [source_id, flows_num] = parser.Next<string_view, uint32_t>();\n\n  std::vector<SlotRange> slots;\n  do {\n    auto [slot_start, slot_end] = parser.Next<SlotId, SlotId>();\n    slots.emplace_back(SlotRange{slot_start, slot_end});\n  } while (parser.HasNext());\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  SlotRanges slot_ranges(std::move(slots));\n\n  std::shared_ptr<IncomingSlotMigration> migration;\n  {\n    util::fb2::LockGuard lk(migration_mu_);\n\n    auto it = find_if(incoming_migrations_jobs_.begin(), incoming_migrations_jobs_.end(),\n                      [source_id = source_id, &slot_ranges](const auto& migration) {\n                        return migration->GetSourceID() == source_id &&\n                               migration->GetSlots() == slot_ranges;\n                      });\n\n    if (it != incoming_migrations_jobs_.end()) {\n      migration = *it;\n    }\n  }\n\n  if (!migration) {\n    VLOG(1) << \"Unrecognized incoming migration from \" << source_id;\n    return cmd_cntx->SendSimpleString(kUnknownMigration);\n  }\n\n  if (migration->GetState() != MigrationState::C_CONNECTING) {\n    migration->Stop();\n    auto slots = migration->GetSlots();\n    LOG(INFO) << \"Flushing slots during migration reinitialization \" << migration->GetSourceID()\n              << \", slots: \" << slots.ToString();\n    DeleteSlots(slots);\n  }\n\n  if (migration->GetState() == MigrationState::C_FATAL) {\n    return cmd_cntx->SendError(absl::StrCat(\"-\", kIncomingMigrationOOM));\n  }\n\n  migration->Init(flows_num);\n\n  return cmd_cntx->SendOk();\n}\n\nvoid ClusterFamily::DflyMigrateFlow(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  auto [source_id, shard_id] = parser.Next<std::string_view, uint32_t>();\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  VLOG(1) << \"Create flow \" << source_id << \" shard_id: \" << shard_id;\n\n  cmd_cntx->conn()->SetName(absl::StrCat(\"migration_flow_\", source_id));\n\n  auto migration = GetIncomingMigration(source_id);\n\n  if (!migration) {\n    return cmd_cntx->SendError(kIdNotFound);\n  }\n\n  auto* conn_cntx = cmd_cntx->server_conn_cntx();\n  DCHECK(conn_cntx->sync_dispatch);\n  // we do this to be ignored by the dispatch tracker\n  // TODO provide a more clear approach\n  conn_cntx->sync_dispatch = false;\n\n  cmd_cntx->SendOk();\n\n  // Try migrating the connection if we have the same shard configuration\n  if (migration->ShardNum() == shard_set->size() &&\n      int32_t(shard_id) != fb2::ProactorBase::me()->GetPoolIndex()) {\n    DCHECK_LT(shard_id, shard_set->size());\n    if (bool success = conn_cntx->conn()->Migrate(shard_set->pool()->at(shard_id)); !success) {\n      cmd_cntx->SendError(\"invalid state\");\n      return;\n    }\n  }\n\n  migration->StartFlow(shard_id, conn_cntx->conn()->socket());\n}\n\nvoid ClusterFamily::ApplyMigrationSlotRangeToConfig(std::string_view node_id,\n                                                    const SlotRanges& slots, bool is_incoming) {\n  VLOG(1) << \"Update config for slots ranges: \" << slots.ToString() << \" for \" << MyID() << \" : \"\n          << node_id;\n  util::fb2::LockGuard gu(set_config_mu);\n  util::fb2::LockGuard lk(migration_mu_);\n\n  bool is_migration_valid = false;\n  if (is_incoming) {\n    for (const auto& mj : incoming_migrations_jobs_) {\n      if (mj->GetSourceID() == node_id && slots == mj->GetSlots()) {\n        is_migration_valid = true;\n        break;\n      }\n    }\n  } else {\n    for (const auto& mj : outgoing_migration_jobs_) {\n      if (mj->GetMigrationInfo().node_info.id == node_id &&\n          mj->GetMigrationInfo().slot_ranges == slots) {\n        is_migration_valid = true;\n        break;\n      }\n    }\n  }\n  if (!is_migration_valid) {\n    LOG(WARNING) << \"Config wasn't updated for slots ranges: \" << slots.ToString() << \" for \"\n                 << MyID() << \" : \" << node_id;\n    return;\n  }\n\n  auto new_config = is_incoming ? ClusterConfig::Current()->CloneWithChanges(slots, {})\n                                : ClusterConfig::Current()->CloneWithChanges({}, slots);\n\n  auto blocking_filter = [&new_config](ArgSlice keys) {\n    bool moved = any_of(keys.begin(), keys.end(), [&](auto k) { return !new_config->IsMySlot(k); });\n    return moved ? OpStatus::KEY_MOVED : OpStatus::OK;\n  };\n  // we don't need to use DispatchTracker here because for IncomingMingration we don't have\n  // connectionas that should be tracked and for Outgoing migration we do it under Pause\n  server_family_->service().proactor_pool().AwaitFiberOnAll(\n      [this, &new_config, &blocking_filter](util::ProactorBase*) {\n        server_family_->CancelBlockingOnThread(blocking_filter);\n        ClusterConfig::SetCurrent(new_config);\n      });\n  DCHECK(ClusterConfig::Current() != nullptr);\n  VLOG(1) << \"Config is updated for slots ranges: \" << slots.ToString() << \" for \" << MyID()\n          << \" : \" << node_id;\n}\n\nvoid ClusterFamily::DflyMigrateAck(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  auto [source_id, attempt] = parser.Next<std::string_view, long>();\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  VLOG(1) << \"DFLYMIGRATE ACK\" << args;\n  auto in_migrations = ClusterConfig::Current()->GetIncomingMigrations();\n  auto m_it =\n      std::find_if(in_migrations.begin(), in_migrations.end(),\n                   [source_id = source_id](const auto& m) { return m.node_info.id == source_id; });\n  if (m_it == in_migrations.end()) {\n    LOG(WARNING) << \"migration isn't in config\";\n    return cmd_cntx->SendSimpleString(kUnknownMigration);\n  }\n\n  auto migration = GetIncomingMigration(source_id);\n  if (!migration)\n    return cmd_cntx->SendError(kIdNotFound);\n\n  if (!migration->Join(attempt)) {\n    if (migration->GetState() == MigrationState::C_FATAL) {\n      return cmd_cntx->SendError(absl::StrCat(\"-\", kIncomingMigrationOOM));\n    } else {\n      return cmd_cntx->SendError(\"Join timeout happened\");\n    }\n  }\n\n  ApplyMigrationSlotRangeToConfig(migration->GetSourceID(), migration->GetSlots(), true);\n\n  return cmd_cntx->rb()->SendLong(attempt);\n}\n\nvoid ClusterFamily::PauseAllIncomingMigrations(bool pause) {\n  util::fb2::LockGuard lk(migration_mu_);\n  LOG_IF(ERROR, incoming_migrations_jobs_.empty()) << \"No incoming migrations!\";\n  for (auto& im : incoming_migrations_jobs_) {\n    im->Pause(pause);\n  }\n}\n\nsize_t ClusterFamily::MigrationsErrorsCount() const {\n  util::fb2::LockGuard lk(migration_mu_);\n\n  size_t error_num = 0;\n\n  for (const auto& mj : incoming_migrations_jobs_) {\n    error_num += mj->GetErrorsCount();\n  }\n\n  for (const auto& mj : outgoing_migration_jobs_) {\n    error_num += mj->GetErrorsCount();\n  }\n\n  return error_num;\n}\n\nvoid ClusterFamily::ReconcileMasterSlots(std::string_view repl_id) {\n  util::fb2::LockGuard gu(set_config_mu);\n  util::fb2::LockGuard lk(migration_mu_);\n\n  auto config = ClusterConfig::Current();\n\n  // Sanity -- we should not reach there\n  if (!config) {\n    LOG(ERROR) << \"Cluster config after takeover is empty\";\n    return;\n  }\n\n  for (auto& info : config->GetMutableConfig()) {\n    // we are updating the old config\n    if (info.master.id == id_) {\n      if (!info.replicas.empty()) {\n        auto target = std::find_if(info.replicas.begin(), info.replicas.end(),\n                                   [repl_id](const auto& e) { return e.id == repl_id; });\n\n        if (target == info.replicas.end()) {\n          auto topology =\n              absl::StrCat(\"[\",\n                           absl::StrJoin(info.replicas, \",\",\n                                         [](std::string* out, const auto& r) { *out = r.id; }),\n                           \"]\");\n          LOG(ERROR) << \"info.master.id=\" << id_ << \". Missing repl_id=\" << repl_id\n                     << \" from cluster topology \" << topology\n                     << \". Slot redirection after takeover corrupted.\";\n\n          return;\n        }\n\n        info.master = *target;\n        info.replicas.clear();\n      }\n      return;\n    }\n  }\n}\n\nvoid ClusterFamily::ReconcileReplicaSlots() {\n  util::fb2::LockGuard gu(set_config_mu);\n  util::fb2::LockGuard lk(migration_mu_);\n\n  auto config = ClusterConfig::Current();\n\n  // Sanity -- we should not reach there\n  if (!config) {\n    LOG(ERROR) << \"Cluster config after takeover is empty\";\n    return;\n  }\n\n  auto new_config = ClusterConfig::Current()->CloneWithChanges({}, {});\n  // Replace master with replica in shard config.\n  bool found = false;\n  for (ClusterShardInfo& info : new_config->GetMutableConfig()) {\n    for (const auto& replica : info.replicas) {\n      if (replica.id == id_) {\n        info.master = replica;\n        // New master has no replicas\n        info.replicas.clear();\n        found = true;\n        break;\n      }\n    }\n    if (found)\n      break;\n  }\n\n  LOG_IF(ERROR, !found) << \"Did not find replica in the cluster map\";\n\n  server_family_->service().proactor_pool().AwaitFiberOnAll(\n      [&new_config](util::ProactorBase*) { ClusterConfig::SetCurrent(new_config); });\n}\n\nusing EngineFunc = void (ClusterFamily::*)(CmdArgList args, CommandContext* cmd_cntx);\n\ninline CommandId::Handler HandlerFunc(ClusterFamily* se, EngineFunc f) {\n  return [=](CmdArgList args, CommandContext* cmd_cntx) { return (se->*f)(args, cmd_cntx); };\n}\n\n#define HFUNC(x) SetHandler(HandlerFunc(this, &ClusterFamily::x))\n\nvoid ClusterFamily::Register(CommandRegistry* registry) {\n  registry->StartFamily();\n  *registry << CI{\"CLUSTER\", CO::READONLY | CO::LOADING, -2, 0, 0, acl::kCluster}.HFUNC(Cluster)\n            << CI{\"DFLYCLUSTER\",    CO::ADMIN | CO::GLOBAL_TRANS | CO::HIDDEN, -2, 0, 0,\n                  acl::kDflyCluster}\n                   .HFUNC(DflyCluster)\n            << CI{\"READONLY\", CO::READONLY, 1, 0, 0, acl::kReadOnly}.HFUNC(ReadOnly)\n            << CI{\"READWRITE\", CO::READONLY, 1, 0, 0, acl::kReadWrite}.HFUNC(ReadWrite)\n            << CI{\"DFLYMIGRATE\", CO::ADMIN | CO::HIDDEN, -1, 0, 0, acl::kDflyMigrate}.HFUNC(\n                   DflyMigrate);\n}\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/cluster_family.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <string>\n\n#include \"facade/conn_context.h\"\n#include \"facade/facade_types.h\"\n#include \"server/cluster/cluster_config.h\"\n#include \"server/cluster/incoming_slot_migration.h\"\n#include \"server/cluster/outgoing_slot_migration.h\"\n\nnamespace facade {\nclass SinkReplyBuilder;\n}  // namespace facade\n\nnamespace dfly {\nclass ServerFamily;\nclass CommandRegistry;\nclass ConnectionContext;\nclass CommandContext;\n}  // namespace dfly\n\nnamespace dfly::cluster {\n\nclass ClusterFamily {\n public:\n  explicit ClusterFamily(ServerFamily* server_family);\n\n  void Register(CommandRegistry* registry);\n\n  void Shutdown() ABSL_LOCKS_EXCLUDED(set_config_mu);\n\n  void ApplyMigrationSlotRangeToConfig(std::string_view node_id, const SlotRanges& slots,\n                                       bool is_outgoing);\n\n  const std::string& MyID() const {\n    return id_;\n  }\n\n  // Only for debug purpose. Pause/Resume all incoming migrations\n  void PauseAllIncomingMigrations(bool pause) ABSL_LOCKS_EXCLUDED(migration_mu_);\n\n  size_t MigrationsErrorsCount() const ABSL_LOCKS_EXCLUDED(migration_mu_);\n\n  // Helper functions to be used during takeover from both nodes (master and replica).\n  // It reconciles the cluster configuration for both nodes to reflect the node\n  // role changes after the takeover.\n  // For the taking over node it's called at the end of the ReplTakeOver flow\n  // and for the taken over node it's called at the end of the dflycmd::TakeOver\n  void ReconcileMasterSlots(std::string_view repl_id)\n      ABSL_LOCKS_EXCLUDED(set_config_mu, migration_mu_);\n\n  void ReconcileReplicaSlots() ABSL_LOCKS_EXCLUDED(set_config_mu, migration_mu_);\n\n private:\n  using SinkReplyBuilder = facade::SinkReplyBuilder;\n\n  // Cluster commands compatible with Redis\n  void Cluster(CmdArgList args, CommandContext* cmd_cntx);\n  void ClusterHelp(SinkReplyBuilder* builder);\n  void ClusterShards(SinkReplyBuilder* builder, ConnectionContext* cntx);\n  void ClusterSlots(SinkReplyBuilder* builder, ConnectionContext* cntx);\n  void ClusterNodes(SinkReplyBuilder* builder, ConnectionContext* cntx);\n  void ClusterInfo(SinkReplyBuilder* builder, ConnectionContext* cntx);\n  void ClusterMyId(SinkReplyBuilder* builder);\n\n  void KeySlot(CmdArgList args, SinkReplyBuilder* builder);\n\n  void ReadOnly(CmdArgList args, CommandContext* cmd_cntx);\n  void ReadWrite(CmdArgList args, CommandContext* cmd_cntx);\n\n  // Custom Dragonfly commands for cluster management\n  void DflyCluster(CmdArgList args, CommandContext* cmd_cntx);\n  void DflyClusterConfig(CmdArgList args, CommandContext* cmd_cntx);\n\n  void DflyClusterGetSlotInfo(CmdArgList args, CommandContext* cmd_cntx)\n      ABSL_LOCKS_EXCLUDED(migration_mu_);\n  void DflyClusterFlushSlots(CmdArgList args, CommandContext* cmd_cntx);\n  void DflySlotMigrationStatus(CmdArgList args, CommandContext* cmd_cntx)\n      ABSL_LOCKS_EXCLUDED(migration_mu_);\n\n  // DFLYMIGRATE is internal command defines several steps in slots migrations process\n  void DflyMigrate(CmdArgList args, CommandContext* cmd_cntx);\n\n  // DFLYMIGRATE INIT is internal command to create incoming migration object\n  void InitMigration(CmdArgList args, CommandContext* cmd_cntx) ABSL_LOCKS_EXCLUDED(migration_mu_);\n\n  // DFLYMIGRATE FLOW initiate second step in slots migration procedure\n  // this request should be done for every shard on the target node\n  // this method assocciate connection and shard that will be the data\n  // source for migration\n  void DflyMigrateFlow(CmdArgList args, CommandContext* cmd_cntx);\n\n  void DflyMigrateAck(CmdArgList args, CommandContext* cmd_cntx);\n\n  std::shared_ptr<IncomingSlotMigration> GetIncomingMigration(std::string_view source_id)\n      ABSL_LOCKS_EXCLUDED(migration_mu_);\n\n  void StartNewSlotMigrations(const ClusterConfig& new_config);\n\n  // must be destroyed excluded set_config_mu and migration_mu_ locks\n  struct PreparedToRemoveOutgoingMigrations {\n    std::vector<std::shared_ptr<OutgoingMigration>> migrations;\n    SlotRanges slot_ranges;\n    ~PreparedToRemoveOutgoingMigrations() ABSL_LOCKS_EXCLUDED(migration_mu_, set_config_mu);\n  };\n\n  [[nodiscard]] PreparedToRemoveOutgoingMigrations TakeOutOutgoingMigrations(\n      std::shared_ptr<ClusterConfig> new_config, std::shared_ptr<ClusterConfig> old_config)\n      ABSL_LOCKS_EXCLUDED(migration_mu_);\n  void RemoveIncomingMigrations(const std::vector<MigrationInfo>& migrations)\n      ABSL_LOCKS_EXCLUDED(migration_mu_);\n\n  mutable util::fb2::Mutex migration_mu_;  // guard migrations operations\n  // holds all incoming slots migrations that are currently in progress.\n  std::vector<std::shared_ptr<IncomingSlotMigration>> incoming_migrations_jobs_\n      ABSL_GUARDED_BY(migration_mu_);\n\n  // holds all outgoing slots migrations that are currently in progress\n  std::vector<std::shared_ptr<OutgoingMigration>> outgoing_migration_jobs_\n      ABSL_GUARDED_BY(migration_mu_);\n\n  std::optional<ClusterShardInfos> GetShardInfos(ConnectionContext* cntx) const;\n\n  ClusterShardInfo GetEmulatedShardInfo(ConnectionContext* cntx) const;\n\n  // Guards set configuration, so that we won't handle 2 in parallel.\n  mutable util::fb2::Mutex set_config_mu;\n\n  std::string id_;\n\n  ServerFamily* server_family_ = nullptr;\n};\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/cluster_family_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/flags/reflection.h>\n#include <gmock/gmock-matchers.h>\n#include <gtest/gtest-matchers.h>\n\n#include <string>\n#include <string_view>\n\n#include \"absl/strings/str_replace.h\"\n#include \"absl/strings/substitute.h\"\n#include \"absl/time/clock.h\"\n#include \"absl/time/time.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/detail/gen_utils.h\"\n#include \"facade/facade_test.h\"\n#include \"server/test_utils.h\"\n\nnamespace dfly::cluster {\nnamespace {\n\nusing namespace std;\nusing namespace testing;\n\nclass ClusterFamilyTest : public BaseFamilyTest {\n public:\n  ClusterFamilyTest() {\n    SetTestFlag(\"cluster_mode\", \"yes\");\n  }\n\n protected:\n  static constexpr string_view kInvalidConfiguration = \"Invalid cluster configuration\";\n\n  string GetMyId() {\n    return Run({\"cluster\", \"myid\"}).GetString();\n  }\n\n  void ConfigSingleNodeCluster(string id) {\n    string config_template = R\"json(\n      [\n        {\n          \"slot_ranges\": [\n            {\n              \"start\": 0,\n              \"end\": 16383\n            }\n          ],\n          \"master\": {\n            \"id\": \"$0\",\n            \"ip\": \"10.0.0.1\",\n            \"port\": 7000,\n            \"health\": \"online\"\n          },\n          \"replicas\": []\n        }\n      ])json\";\n    string config = absl::Substitute(config_template, id);\n    EXPECT_EQ(RunPrivileged({\"dflycluster\", \"config\", config}), \"OK\");\n  }\n};\n\nTEST_F(ClusterFamilyTest, ClusterConfigInvalidJSON) {\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"config\", \"invalid JSON\"}),\n              ErrArg(\"Invalid cluster configuration.\"));\n\n  string cluster_info = Run({\"cluster\", \"info\"}).GetString();\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_state:fail\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_assigned:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_ok:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_known_nodes:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_size:0\"));\n\n  EXPECT_THAT(Run({\"cluster\", \"shards\"}), ErrArg(\"Cluster is not yet configured\"));\n  EXPECT_THAT(Run({\"cluster\", \"slots\"}), ErrArg(\"Cluster is not yet configured\"));\n  EXPECT_THAT(Run({\"cluster\", \"nodes\"}), ErrArg(\"Cluster is not yet configured\"));\n}\n\nTEST_F(ClusterFamilyTest, ClusterConfigInvalidConfig) {\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"config\", \"[]\"}), ErrArg(kInvalidConfiguration));\n\n  string cluster_info = Run({\"cluster\", \"info\"}).GetString();\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_state:fail\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_assigned:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_ok:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_known_nodes:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_size:0\"));\n}\n\nTEST_F(ClusterFamilyTest, ClusterConfigInvalidMissingSlots) {\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"config\", R\"json(\n      [\n        {\n          \"slot_ranges\": [\n            {\n              \"start\": 0,\n              \"end\": 100\n            }\n          ],\n          \"master\": {\n            \"id\": \"abcd1234\",\n            \"ip\": \"10.0.0.1\",\n            \"port\": 7000\n          },\n          \"replicas\": []\n        }\n      ])json\"}),\n              ErrArg(kInvalidConfiguration));\n\n  string cluster_info = Run({\"cluster\", \"info\"}).GetString();\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_state:fail\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_assigned:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_ok:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_known_nodes:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_size:0\"));\n}\n\nTEST_F(ClusterFamilyTest, ClusterConfigInvalidOverlappingSlots) {\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"config\", R\"json(\n      [\n        {\n          \"slot_ranges\": [\n            {\n              \"start\": 0,\n              \"end\": 1000\n            }\n          ],\n          \"master\": {\n            \"id\": \"abcd1234\",\n            \"ip\": \"10.0.0.1\",\n            \"port\": 7000\n          },\n          \"replicas\": []\n        },\n        {\n          \"slot_ranges\": [\n            {\n              \"start\": 800,\n              \"end\": 16383\n            }\n          ],\n          \"master\": {\n            \"id\": \"abcd1234\",\n            \"ip\": \"10.0.0.1\",\n            \"port\": 7000\n          },\n          \"replicas\": []\n        }\n      ])json\"}),\n              ErrArg(kInvalidConfiguration));\n\n  string cluster_info = Run({\"cluster\", \"info\"}).GetString();\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_state:fail\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_assigned:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_ok:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_known_nodes:0\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_size:0\"));\n}\n\nTEST_F(ClusterFamilyTest, ClusterConfigNoReplicas) {\n  ConfigSingleNodeCluster(\"abcd1234\");\n  string cluster_info = Run({\"cluster\", \"info\"}).GetString();\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_state:ok\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_assigned:16384\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_ok:16384\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_known_nodes:1\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_size:1\"));\n\n  EXPECT_THAT(Run({\"cluster\", \"shards\"}),\n              RespArray(ElementsAre(\"slots\",                                            //\n                                    RespArray(ElementsAre(IntArg(0), IntArg(16'383))),  //\n                                    \"nodes\",                                            //\n                                    RespArray(ElementsAre(                              //\n                                        RespArray(ElementsAre(                          //\n                                            \"id\", \"abcd1234\",                           //\n                                            \"endpoint\", \"10.0.0.1\",                     //\n                                            \"ip\", \"10.0.0.1\",                           //\n                                            \"port\", IntArg(7000),                       //\n                                            \"role\", \"master\",                           //\n                                            \"replication-offset\", IntArg(0),            //\n                                            \"health\", \"online\")))))));\n\n  EXPECT_THAT(Run({\"get\", \"x\"}).GetString(),\n              testing::MatchesRegex(R\"(MOVED [0-9]+ 10.0.0.1:7000)\"));\n\n  EXPECT_THAT(Run({\"cluster\", \"slots\"}),\n              RespArray(ElementsAre(IntArg(0),              //\n                                    IntArg(16'383),         //\n                                    RespArray(ElementsAre(  //\n                                        \"10.0.0.1\",         //\n                                        IntArg(7'000),      //\n                                        \"abcd1234\")))));\n\n  EXPECT_EQ(Run({\"cluster\", \"nodes\"}),\n            \"abcd1234 10.0.0.1:7000@7000 master - 0 0 0 connected 0-16383\\n\");\n}\n\nTEST_F(ClusterFamilyTest, ClusterConfigFull) {\n  EXPECT_EQ(RunPrivileged({\"dflycluster\", \"config\", R\"json(\n      [\n        {\n          \"slot_ranges\": [\n            {\n              \"start\": 0,\n              \"end\": 16383\n            }\n          ],\n          \"master\": {\n            \"id\": \"abcd1234\",\n            \"ip\": \"10.0.0.1\",\n            \"port\": 7000,\n            \"health\": \"online\"\n          },\n          \"replicas\": [\n            {\n              \"id\": \"wxyz\",\n              \"ip\": \"10.0.0.10\",\n              \"port\": 8000,\n              \"health\": \"online\"\n            }\n          ]\n        }\n      ])json\"}),\n            \"OK\");\n\n  string cluster_info = Run({\"cluster\", \"info\"}).GetString();\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_state:ok\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_assigned:16384\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_ok:16384\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_known_nodes:2\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_size:1\"));\n\n  EXPECT_THAT(Run({\"cluster\", \"shards\"}),\n              RespArray(ElementsAre(\"slots\",                                            //\n                                    RespArray(ElementsAre(IntArg(0), IntArg(16'383))),  //\n                                    \"nodes\",                                            //\n                                    RespArray(ElementsAre(                              //\n                                        RespArray(ElementsAre(                          //\n                                            \"id\", \"abcd1234\",                           //\n                                            \"endpoint\", \"10.0.0.1\",                     //\n                                            \"ip\", \"10.0.0.1\",                           //\n                                            \"port\", IntArg(7000),                       //\n                                            \"role\", \"master\",                           //\n                                            \"replication-offset\", IntArg(0),            //\n                                            \"health\", \"online\")),                       //\n                                        RespArray(ElementsAre(                          //\n                                            \"id\", \"wxyz\",                               //\n                                            \"endpoint\", \"10.0.0.10\",                    //\n                                            \"ip\", \"10.0.0.10\",                          //\n                                            \"port\", IntArg(8000),                       //\n                                            \"role\", \"replica\",                          //\n                                            \"replication-offset\", IntArg(0),            //\n                                            \"health\", \"online\")))))));\n\n  EXPECT_THAT(Run({\"cluster\", \"slots\"}),\n              RespArray(ElementsAre(IntArg(0),              //\n                                    IntArg(16'383),         //\n                                    RespArray(ElementsAre(  //\n                                        \"10.0.0.1\",         //\n                                        IntArg(7'000),      //\n                                        \"abcd1234\")),       //\n                                    RespArray(ElementsAre(  //\n                                        \"10.0.0.10\",        //\n                                        IntArg(8'000),      //\n                                        \"wxyz\")))));\n\n  EXPECT_EQ(Run({\"cluster\", \"nodes\"}),\n            \"abcd1234 10.0.0.1:7000@7000 master - 0 0 0 connected 0-16383\\n\"\n            \"wxyz 10.0.0.10:8000@8000 slave abcd1234 0 0 0 connected\\n\");\n}\n\nTEST_F(ClusterFamilyTest, ClusterConfigFullMultipleInstances) {\n  EXPECT_EQ(RunPrivileged({\"dflycluster\", \"config\", R\"json(\n      [\n        {\n          \"slot_ranges\": [\n            {\n              \"start\": 0,\n              \"end\": 10000\n            }\n          ],\n          \"master\": {\n            \"id\": \"abcd1234\",\n            \"ip\": \"10.0.0.1\",\n            \"port\": 7000,\n            \"health\": \"fail\"\n          },\n          \"replicas\": [\n            {\n              \"id\": \"wxyz\",\n              \"ip\": \"10.0.0.10\",\n              \"port\": 8000,\n              \"health\": \"online\"\n            }\n          ]\n        },\n        {\n          \"slot_ranges\": [\n            {\n              \"start\": 10001,\n              \"end\": 16383\n            }\n          ],\n          \"master\": {\n            \"id\": \"efgh7890\",\n            \"ip\": \"10.0.0.2\",\n            \"port\": 7001,\n            \"health\": \"online\"\n          },\n          \"replicas\": [\n            {\n              \"id\": \"qwerty\",\n              \"ip\": \"10.0.0.11\",\n              \"port\": 8001,\n              \"health\": \"online\"\n            },\n             {\n              \"id\": \"qwerty1\",\n              \"ip\": \"10.0.0.12\",\n              \"port\": 8002,\n              \"health\": \"loading\"\n            },\n             {\n              \"id\": \"qwerty2\",\n              \"ip\": \"10.0.0.13\",\n              \"port\": 8003,\n              \"health\": \"fail\"\n            },\n             {\n              \"id\": \"qwerty3\",\n              \"ip\": \"10.0.0.14\",\n              \"port\": 8004,\n              \"health\": \"hidden\"\n            }\n          ]\n        }\n      ])json\"}),\n            \"OK\");\n\n  string cluster_info = Run({\"cluster\", \"info\"}).GetString();\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_state:ok\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_assigned:16384\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_ok:16384\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_known_nodes:7\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_size:2\"));\n\n  EXPECT_THAT(Run({\"cluster\", \"shards\"}),\n              RespArray(ElementsAre(\n                  RespArray(ElementsAre(\"slots\",                                                 //\n                                        RespArray(ElementsAre(IntArg(0), IntArg(10'000))),       //\n                                        \"nodes\",                                                 //\n                                        RespArray(ElementsAre(                                   //\n                                            RespArray(ElementsAre(                               //\n                                                \"id\", \"abcd1234\",                                //\n                                                \"endpoint\", \"10.0.0.1\",                          //\n                                                \"ip\", \"10.0.0.1\",                                //\n                                                \"port\", IntArg(7000),                            //\n                                                \"role\", \"master\",                                //\n                                                \"replication-offset\", IntArg(0),                 //\n                                                \"health\", \"fail\")),                              //\n                                            RespArray(ElementsAre(                               //\n                                                \"id\", \"wxyz\",                                    //\n                                                \"endpoint\", \"10.0.0.10\",                         //\n                                                \"ip\", \"10.0.0.10\",                               //\n                                                \"port\", IntArg(8000),                            //\n                                                \"role\", \"replica\",                               //\n                                                \"replication-offset\", IntArg(0),                 //\n                                                \"health\", \"online\")))))),                        //\n                  RespArray(ElementsAre(\"slots\",                                                 //\n                                        RespArray(ElementsAre(IntArg(10'001), IntArg(16'383))),  //\n                                        \"nodes\",                                                 //\n                                        RespArray(ElementsAre(                                   //\n                                            RespArray(ElementsAre(                               //\n                                                \"id\", \"efgh7890\",                                //\n                                                \"endpoint\", \"10.0.0.2\",                          //\n                                                \"ip\", \"10.0.0.2\",                                //\n                                                \"port\", IntArg(7001),                            //\n                                                \"role\", \"master\",                                //\n                                                \"replication-offset\", IntArg(0),                 //\n                                                \"health\", \"online\")),                            //\n                                            RespArray(ElementsAre(                               //\n                                                \"id\", \"qwerty\",                                  //\n                                                \"endpoint\", \"10.0.0.11\",                         //\n                                                \"ip\", \"10.0.0.11\",                               //\n                                                \"port\", IntArg(8001),                            //\n                                                \"role\", \"replica\",                               //\n                                                \"replication-offset\", IntArg(0),                 //\n                                                \"health\", \"online\")),                            //\n                                            RespArray(ElementsAre(                               //\n                                                \"id\", \"qwerty1\",                                 //\n                                                \"endpoint\", \"10.0.0.12\",                         //\n                                                \"ip\", \"10.0.0.12\",                               //\n                                                \"port\", IntArg(8002),                            //\n                                                \"role\", \"replica\",                               //\n                                                \"replication-offset\", IntArg(0),                 //\n                                                \"health\", \"loading\")),                           //\n                                            RespArray(ElementsAre(                               //\n                                                \"id\", \"qwerty2\",                                 //\n                                                \"endpoint\", \"10.0.0.13\",                         //\n                                                \"ip\", \"10.0.0.13\",                               //\n                                                \"port\", IntArg(8003),                            //\n                                                \"role\", \"replica\",                               //\n                                                \"replication-offset\", IntArg(0),                 //\n                                                \"health\", \"fail\")))))))));\n\n  EXPECT_THAT(Run({\"cluster\", \"slots\"}),\n              RespArray(ElementsAre(                            //\n                  RespArray(ElementsAre(IntArg(0),              //\n                                        IntArg(10'000),         //\n                                        RespArray(ElementsAre(  //\n                                            \"10.0.0.1\",         //\n                                            IntArg(7'000),      //\n                                            \"abcd1234\")),       //\n                                        RespArray(ElementsAre(  //\n                                            \"10.0.0.10\",        //\n                                            IntArg(8'000),      //\n                                            \"wxyz\")))),         //\n                  RespArray(ElementsAre(IntArg(10'001),         //\n                                        IntArg(16'383),         //\n                                        RespArray(ElementsAre(  //\n                                            \"10.0.0.2\",         //\n                                            IntArg(7'001),      //\n                                            \"efgh7890\")),       //\n                                        RespArray(ElementsAre(  //\n                                            \"10.0.0.11\",        //\n                                            IntArg(8'001),      //\n                                            \"qwerty\")))))));\n\n  EXPECT_THAT(Run({\"cluster\", \"nodes\"}),\n              \"abcd1234 10.0.0.1:7000@7000 master - 0 0 0 disconnected 0-10000\\n\"\n              \"wxyz 10.0.0.10:8000@8000 slave abcd1234 0 0 0 connected\\n\"\n              \"efgh7890 10.0.0.2:7001@7001 master - 0 0 0 connected 10001-16383\\n\"\n              \"qwerty 10.0.0.11:8001@8001 slave efgh7890 0 0 0 connected\\n\"\n              \"qwerty1 10.0.0.12:8002@8002 slave efgh7890 0 0 0 connected\\n\"\n              \"qwerty2 10.0.0.13:8003@8003 slave efgh7890 0 0 0 disconnected\\n\");\n\n  absl::InsecureBitGen eng;\n  while (true) {\n    string random_key = GetRandomHex(eng, 40);\n    SlotId slot = KeySlot(random_key);\n    if (slot > 10'000) {\n      continue;\n    }\n\n    EXPECT_THAT(Run({\"get\", random_key}).GetString(),\n                testing::MatchesRegex(R\"(MOVED [0-9]+ 10.0.0.1:7000)\"));\n    break;\n  }\n\n  while (true) {\n    string random_key = GetRandomHex(eng, 40);\n    SlotId slot = KeySlot(random_key);\n    if (slot <= 10'000) {\n      continue;\n    }\n\n    EXPECT_THAT(Run({\"get\", random_key}).GetString(),\n                testing::MatchesRegex(R\"(MOVED [0-9]+ 10.0.0.2:7001)\"));\n    break;\n  }\n}\n\nTEST_F(ClusterFamilyTest, ClusterGetSlotInfoInvalid) {\n  constexpr string_view kErr = \"ERR syntax error\";\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\"}), ErrArg(kErr));\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"s\"}), ErrArg(kErr));\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\"}), ErrArg(kErr));\n}\n\nTEST_F(ClusterFamilyTest, ClusterGetSlotInfo) {\n  ConfigSingleNodeCluster(GetMyId());\n\n  constexpr string_view kKey = \"some-key\";\n  const SlotId slot = KeySlot(kKey);\n  EXPECT_NE(slot, 0) << \"We need to choose another key\";\n\n  const string value(1'000, '#');  // Long string - to use heap\n  EXPECT_EQ(Run({\"SET\", kKey, value}), \"OK\");\n\n  EXPECT_THAT(\n      RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"0\", absl::StrCat(slot)}),\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(IntArg(0), \"key_count\", IntArg(0), \"total_reads\", IntArg(0),\n                                \"total_writes\", IntArg(0), \"memory_bytes\", IntArg(0))),\n          RespArray(ElementsAre(IntArg(slot), \"key_count\", IntArg(1), \"total_reads\", IntArg(0),\n                                \"total_writes\", IntArg(1), \"memory_bytes\", Not(IntArg(0)))))));\n\n  EXPECT_EQ(Run({\"GET\", kKey}), value);\n\n  EXPECT_THAT(\n      RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"0\", absl::StrCat(slot)}),\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(IntArg(0), \"key_count\", IntArg(0), \"total_reads\", IntArg(0),\n                                \"total_writes\", IntArg(0), \"memory_bytes\", IntArg(0))),\n          RespArray(ElementsAre(IntArg(slot), \"key_count\", IntArg(1), \"total_reads\", IntArg(1),\n                                \"total_writes\", IntArg(1), \"memory_bytes\", Not(IntArg(0)))))));\n\n  EXPECT_EQ(Run({\"SET\", kKey, \"value2\"}), \"OK\");\n\n  EXPECT_THAT(\n      RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"0\", absl::StrCat(slot)}),\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(IntArg(0), \"key_count\", IntArg(0), \"total_reads\", IntArg(0),\n                                \"total_writes\", IntArg(0), \"memory_bytes\", IntArg(0))),\n          RespArray(ElementsAre(IntArg(slot), \"key_count\", IntArg(1), \"total_reads\", IntArg(1),\n                                \"total_writes\", IntArg(2), \"memory_bytes\", IntArg(36))))));\n}\n\nTEST_F(ClusterFamilyTest, ClusterGetSlotInfoRanges) {\n  ConfigSingleNodeCluster(GetMyId());\n\n  // Test basic range syntax: 0-2 should return 3 slots\n  auto result = RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"0-2\"});\n  ASSERT_EQ(result.GetVec().size(), 3u);\n  EXPECT_THAT(result.GetVec()[0], RespArray(ElementsAre(IntArg(0), _, _, _, _, _, _, _, _)));\n  EXPECT_THAT(result.GetVec()[1], RespArray(ElementsAre(IntArg(1), _, _, _, _, _, _, _, _)));\n  EXPECT_THAT(result.GetVec()[2], RespArray(ElementsAre(IntArg(2), _, _, _, _, _, _, _, _)));\n\n  // Test mixed syntax: range + individual slots\n  result = RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"0-1\", \"5\", \"10-11\"});\n  ASSERT_EQ(result.GetVec().size(), 5u);\n  EXPECT_THAT(result.GetVec()[0], RespArray(ElementsAre(IntArg(0), _, _, _, _, _, _, _, _)));\n  EXPECT_THAT(result.GetVec()[1], RespArray(ElementsAre(IntArg(1), _, _, _, _, _, _, _, _)));\n  EXPECT_THAT(result.GetVec()[2], RespArray(ElementsAre(IntArg(5), _, _, _, _, _, _, _, _)));\n  EXPECT_THAT(result.GetVec()[3], RespArray(ElementsAre(IntArg(10), _, _, _, _, _, _, _, _)));\n  EXPECT_THAT(result.GetVec()[4], RespArray(ElementsAre(IntArg(11), _, _, _, _, _, _, _, _)));\n\n  // Test reversed range (5-2 should be treated as 2-5)\n  result = RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"5-2\"});\n  ASSERT_EQ(result.GetVec().size(), 4u);\n  EXPECT_THAT(result.GetVec()[0], RespArray(ElementsAre(IntArg(2), _, _, _, _, _, _, _, _)));\n  EXPECT_THAT(result.GetVec()[1], RespArray(ElementsAre(IntArg(3), _, _, _, _, _, _, _, _)));\n  EXPECT_THAT(result.GetVec()[2], RespArray(ElementsAre(IntArg(4), _, _, _, _, _, _, _, _)));\n  EXPECT_THAT(result.GetVec()[3], RespArray(ElementsAre(IntArg(5), _, _, _, _, _, _, _, _)));\n\n  // Test invalid slot id in range\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"0-20000\"}),\n              ErrArg(\"Invalid slot id\"));\n\n  // Test invalid range format\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"abc-def\"}),\n              ErrArg(\"Invalid slot range format\"));\n\n  // Edge cases with dashes\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"-1\"}),\n              ErrArg(\"value is not an integer or out of range\"));\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"1-\"}),\n              ErrArg(\"Invalid slot range format\"));\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"1--2\"}),\n              ErrArg(\"Invalid slot range format\"));\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"1-2-3\"}),\n              ErrArg(\"Invalid slot range format\"));\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"1---2\"}),\n              ErrArg(\"Invalid slot range format\"));\n}\n\nTEST_F(ClusterFamilyTest, ClusterSlotsPopulate) {\n  ConfigSingleNodeCluster(GetMyId());\n\n  Run({\"debug\", \"populate\", \"10000\", \"key\", \"4\", \"SLOTS\", \"0\", \"1000\"});\n\n  for (int i = 0; i <= 1'000; ++i) {\n    EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", absl::StrCat(i)}),\n                RespArray(ElementsAre(IntArg(i), \"key_count\", Not(IntArg(0)), _, _, _, _, _, _)));\n  }\n\n  for (int i = 1'001; i <= 16'383; ++i) {\n    EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", absl::StrCat(i)}),\n                RespArray(ElementsAre(IntArg(i), \"key_count\", IntArg(0), _, _, _, _, _, _)));\n  }\n}\n\nTEST_F(ClusterFamilyTest, ClusterEvalCrossslot) {\n  ConfigSingleNodeCluster(GetMyId());\n\n  auto res = Run({\"EVAL\", \"return redis.call('MSET', 'x1', 'x1', 'x2', 'x2', 'x3', 'x3');\", \"3\",\n                  \"x1\", \"x2\", \"x3\"});\n\n  EXPECT_THAT(res, ErrArg(\"CROSSSLOT\"));\n\n  auto sha =\n      Run({\"SCRPIT\", \"LOAD\", \"return redis.call('MSET', 'x1', 'x1', 'x2', 'x2', 'x3', 'x3');\", \"3\",\n           \"x1\", \"x2\", \"x3\"});\n\n  EXPECT_THAT(Run({\"EVALSHA\", sha.GetString(), \"3\", \"x1\", \"x2\", \"x3\"}), ErrArg(\"CROSSSLOT\"));\n}\n\nTEST_F(ClusterFamilyTest, ClusterMultiExec) {\n  ConfigSingleNodeCluster(GetMyId());\n\n  Run({\"MULTI\"});\n  Run({\"SET\", \"X1\", \"X1\"});\n  Run({\"SET\", \"X2\", \"X2\"});\n  Run({\"SET\", \"X3\", \"X3\"});\n\n  EXPECT_THAT(Run({\"EXEC\"}), ErrArg(\"CROSSSLOT\"));\n}\n\nTEST_F(ClusterFamilyTest, ClusterConfigDeleteSlots) {\n  ConfigSingleNodeCluster(GetMyId());\n\n  Run({\"debug\", \"populate\", \"100000\"});\n\n  EXPECT_THAT(\n      RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"1\", \"2\"}),\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(IntArg(1), \"key_count\", Not(IntArg(0)), \"total_reads\", IntArg(0),\n                                \"total_writes\", Not(IntArg(0)), \"memory_bytes\", IntArg(108))),\n          RespArray(ElementsAre(IntArg(2), \"key_count\", Not(IntArg(0)), \"total_reads\", IntArg(0),\n                                \"total_writes\", Not(IntArg(0)), \"memory_bytes\", IntArg(360))))));\n\n  ConfigSingleNodeCluster(\"abc\");\n\n  ExpectConditionWithinTimeout([&]() { return CheckedInt({\"dbsize\"}) == 0; });\n\n  EXPECT_THAT(\n      RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"1\", \"2\"}),\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(IntArg(1), \"key_count\", IntArg(0), \"total_reads\", IntArg(0),\n                                \"total_writes\", Not(IntArg(0)), \"memory_bytes\", IntArg(0))),\n          RespArray(ElementsAre(IntArg(2), \"key_count\", IntArg(0), \"total_reads\", IntArg(0),\n                                \"total_writes\", Not(IntArg(0)), \"memory_bytes\", IntArg(0))))));\n}\n\n// Test issue #1302\nTEST_F(ClusterFamilyTest, ClusterConfigDeleteSlotsNoCrashOnShutdown) {\n  ConfigSingleNodeCluster(GetMyId());\n\n  Run({\"debug\", \"populate\", \"100000\"});\n\n  EXPECT_THAT(\n      RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"1\", \"2\"}),\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(IntArg(1), \"key_count\", Not(IntArg(0)), \"total_reads\", IntArg(0),\n                                \"total_writes\", Not(IntArg(0)), \"memory_bytes\", IntArg(108))),\n          RespArray(ElementsAre(IntArg(2), \"key_count\", Not(IntArg(0)), \"total_reads\", IntArg(0),\n                                \"total_writes\", Not(IntArg(0)), \"memory_bytes\", IntArg(360))))));\n\n  // After running the new config we start a fiber that removes all slots from current instance\n  // we immediately shut down to test that we do not crash.\n  ConfigSingleNodeCluster(\"abc\");\n}\n\nTEST_F(ClusterFamilyTest, ClusterConfigDeleteSomeSlots) {\n  string config_template = R\"json(\n      [\n        {\n          \"slot_ranges\": [\n            {\n              \"start\": 0,\n              \"end\": $1\n            }\n          ],\n          \"master\": {\n            \"id\": \"$0\",\n            \"ip\": \"10.0.0.1\",\n            \"port\": 7000\n          },\n          \"replicas\": []\n        },\n        {\n          \"slot_ranges\": [\n            {\n              \"start\": $2,\n              \"end\": 16383\n            }\n          ],\n          \"master\": {\n            \"id\": \"other\",\n            \"ip\": \"10.0.0.2\",\n            \"port\": 7000\n          },\n          \"replicas\": []\n        }\n      ])json\";\n\n  string config = absl::Substitute(config_template, GetMyId(), \"8000\", \"8001\");\n\n  EXPECT_EQ(RunPrivileged({\"dflycluster\", \"config\", config}), \"OK\");\n\n  Run({\"debug\", \"populate\", \"1\", \"key\", \"4\", \"SLOTS\", \"7999\", \"7999\"});\n  Run({\"debug\", \"populate\", \"2\", \"key\", \"4\", \"SLOTS\", \"8000\", \"8000\"});\n\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"7999\", \"8000\"}),\n              RespArray(ElementsAre(\n                  RespArray(ElementsAre(IntArg(7999), \"key_count\", IntArg(1), _, _, _, _, _, _)),\n                  RespArray(ElementsAre(IntArg(8000), \"key_count\", IntArg(2), _, _, _, _, _, _)))));\n  EXPECT_THAT(Run({\"dbsize\"}), IntArg(3));\n\n  // Move ownership over 8000 to other master\n  config = absl::Substitute(config_template, GetMyId(), \"7999\", \"8000\");\n  EXPECT_EQ(RunPrivileged({\"dflycluster\", \"config\", config}), \"OK\");\n\n  // Verify that keys for slot 8000 were deleted, while key for slot 7999 was kept\n  ExpectConditionWithinTimeout([&]() { return CheckedInt({\"dbsize\"}) == 1; });\n\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"7999\", \"8000\"}),\n              RespArray(ElementsAre(\n                  RespArray(ElementsAre(IntArg(7999), \"key_count\", IntArg(1), _, _, _, _, _, _)),\n                  RespArray(ElementsAre(IntArg(8000), \"key_count\", IntArg(0), _, _, _, _, _, _)))));\n}\n\nTEST_F(ClusterFamilyTest, ClusterModeSelectNotAllowed) {\n  EXPECT_THAT(Run({\"select\", \"1\"}), ErrArg(\"SELECT is not allowed in cluster mode\"));\n  EXPECT_EQ(Run({\"select\", \"0\"}), \"OK\");\n}\n\nTEST_F(ClusterFamilyTest, ClusterModePubSubNotAllowed) {\n  EXPECT_THAT(Run({\"PUBLISH\", \"ch\", \"message\"}),\n              ErrArg(\"PUBLISH is not supported in cluster mode yet\"));\n  EXPECT_THAT(Run({\"SUBSCRIBE\", \"ch\"}), ErrArg(\"SUBSCRIBE is not supported in cluster mode yet\"));\n  EXPECT_THAT(Run({\"UNSUBSCRIBE\", \"ch\"}),\n              ErrArg(\"UNSUBSCRIBE is not supported in cluster mode yet\"));\n  EXPECT_THAT(Run({\"PSUBSCRIBE\", \"ch?\"}),\n              ErrArg(\"PSUBSCRIBE is not supported in cluster mode yet\"));\n  EXPECT_THAT(Run({\"PUNSUBSCRIBE\", \"ch?\"}),\n              ErrArg(\"PUNSUBSCRIBE is not supported in cluster mode yet\"));\n}\n\n// SSUBSCRIBE and SPUBLISH work in cluster mode\nTEST_F(ClusterFamilyTest, ClusterModePubSub) {\n  single_response_ = false;\n  ConfigSingleNodeCluster(GetMyId());\n\n  // Ssubscribe works as expected\n  auto resp = pp_->at(1)->Await([&] { return Run({\"SSUBSCRIBE\", \"cluster-channel\"}); });\n  EXPECT_THAT(resp, RespElementsAre(\"ssubscribe\", \"cluster-channel\", IntArg(1)));\n\n  // Send-receive a single message\n  resp = pp_->at(0)->Await([&] {\n    return Run({\"SPUBLISH\", \"cluster-channel\", \"a simple message\"});\n  });\n  EXPECT_THAT(resp, IntArg(1));\n\n  pp_->AwaitFiberOnAll([](util::ProactorBase* pb) {});\n\n  ASSERT_EQ(1, SubscriberMessagesLen(\"IO1\"));\n  const auto& msg = GetPublishedMessage(\"IO1\", 0);\n  EXPECT_TRUE(msg.is_sharded);\n  EXPECT_EQ(\"cluster-channel\", msg.channel);\n  EXPECT_EQ(\"a simple message\", msg.message);\n\n  // Sunsubscribe\n  resp = pp_->at(1)->Await([&] { return Run({\"SUNSUBSCRIBE\", \"cluster-channel\"}); });\n  EXPECT_THAT(resp, RespElementsAre(\"sunsubscribe\", \"cluster-channel\", IntArg(0)));\n}\n\nTEST_F(ClusterFamilyTest, ClusterFirstConfigCallDropsEntriesNotOwnedByNode) {\n  InitWithDbFilename();\n\n  Run({\"debug\", \"populate\", \"50000\"});\n\n  EXPECT_EQ(Run({\"save\", \"df\"}), \"OK\");\n\n  auto save_info = service_->server_family().GetLastSaveInfo();\n  EXPECT_EQ(Run({\"dfly\", \"load\", save_info.file_name}), \"OK\");\n  EXPECT_EQ(CheckedInt({\"dbsize\"}), 50000);\n\n  ConfigSingleNodeCluster(\"abcd1234\");\n\n  // Make sure `dbsize` all slots were removed\n  ExpectConditionWithinTimeout([&]() { return CheckedInt({\"dbsize\"}) == 0; });\n}\n\nTEST_F(ClusterFamilyTest, SnapshotBiggerThanMaxMemory) {\n  InitWithDbFilename();\n  ConfigSingleNodeCluster(GetMyId());\n\n  Run({\"debug\", \"populate\", \"50000\"});\n  EXPECT_EQ(Run({\"save\", \"df\"}), \"OK\");\n\n  max_memory_limit = 10000;\n  auto save_info = service_->server_family().GetLastSaveInfo();\n  EXPECT_EQ(Run({\"dfly\", \"load\", save_info.file_name}), \"OK\");\n}\n\nTEST_F(ClusterFamilyTest, Keyslot) {\n  // Example from Redis' command reference: https://redis.io/commands/cluster-keyslot/\n  EXPECT_THAT(Run({\"cluster\", \"keyslot\", \"somekey\"}), IntArg(11'058));\n\n  // Test hash tags\n  EXPECT_THAT(Run({\"cluster\", \"keyslot\", \"prefix{somekey}suffix\"}), IntArg(11'058));\n\n  EXPECT_EQ(CheckedInt({\"cluster\", \"keyslot\", \"abc{def}ghi\"}),\n            CheckedInt({\"cluster\", \"keyslot\", \"123{def}456\"}));\n}\n\nTEST_F(ClusterFamilyTest, FlushSlots) {\n  EXPECT_EQ(Run({\"debug\", \"populate\", \"100\", \"key\", \"4\", \"slots\", \"0\", \"1\"}), \"OK\");\n\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"0\", \"1\"}),\n              RespArray(ElementsAre(\n                  RespArray(ElementsAre(IntArg(0), \"key_count\", Not(IntArg(0)), \"total_reads\", _,\n                                        \"total_writes\", _, \"memory_bytes\", _)),\n                  RespArray(ElementsAre(IntArg(1), \"key_count\", Not(IntArg(0)), \"total_reads\", _,\n                                        \"total_writes\", _, \"memory_bytes\", _)))));\n\n  ExpectConditionWithinTimeout([&]() {\n    return RunPrivileged({\"dflycluster\", \"flushslots\", \"0\", \"0\"}) == \"OK\";\n  });\n  util::ThisFiber::SleepFor(10ms);\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"0\", \"1\"}),\n              RespArray(ElementsAre(\n                  RespArray(ElementsAre(IntArg(0), \"key_count\", IntArg(0), \"total_reads\", _,\n                                        \"total_writes\", _, \"memory_bytes\", _)),\n                  RespArray(ElementsAre(IntArg(1), \"key_count\", Not(IntArg(0)), \"total_reads\", _,\n                                        \"total_writes\", _, \"memory_bytes\", _)))));\n\n  EXPECT_EQ(RunPrivileged({\"dflycluster\", \"flushslots\", \"0\", \"1\"}), \"OK\");\n  util::ThisFiber::SleepFor(10ms);\n  EXPECT_THAT(\n      RunPrivileged({\"dflycluster\", \"getslotinfo\", \"slots\", \"0\", \"1\"}),\n      RespArray(ElementsAre(RespArray(ElementsAre(IntArg(0), \"key_count\", IntArg(0), \"total_reads\",\n                                                  _, \"total_writes\", _, \"memory_bytes\", _)),\n                            RespArray(ElementsAre(IntArg(1), \"key_count\", IntArg(0), \"total_reads\",\n                                                  _, \"total_writes\", _, \"memory_bytes\", _)))));\n}\n\nTEST_F(ClusterFamilyTest, FlushSlotsOutOfBounds) {\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"flushslots\", \"0\", \"16384\"}),\n              ErrArg(\"value is not an integer or out of range\"));\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"flushslots\", \"16384\", \"16384\"}),\n              ErrArg(\"value is not an integer or out of range\"));\n  EXPECT_THAT(RunPrivileged({\"dflycluster\", \"flushslots\", \"100\", \"50\"}),\n              ErrArg(\"Invalid slot range\"));\n}\n\nTEST_F(ClusterFamilyTest, FlushSlotsAndImmediatelySetValue) {\n  for (int count : {1, 10, 100, 1000, 10000, 100000}) {\n    ConfigSingleNodeCluster(GetMyId());\n\n    EXPECT_EQ(Run({\"debug\", \"populate\", absl::StrCat(count), \"key\", \"4\"}), \"OK\");\n    EXPECT_EQ(Run({\"get\", \"key:0\"}), \"xxxx\");\n\n    EXPECT_THAT(Run({\"cluster\", \"keyslot\", \"key:0\"}), IntArg(2592));\n    EXPECT_THAT(Run({\"dbsize\"}), IntArg(count));\n    auto slot_size_response = Run({\"dflycluster\", \"getslotinfo\", \"slots\", \"2592\"});\n    EXPECT_THAT(slot_size_response, RespArray(ElementsAre(_, \"key_count\", _, \"total_reads\", _,\n                                                          \"total_writes\", _, \"memory_bytes\", _)));\n    auto slot_size = slot_size_response.GetVec()[2].GetInt();\n    EXPECT_TRUE(slot_size.has_value());\n\n    EXPECT_EQ(Run({\"dflycluster\", \"flushslots\", \"2592\", \"2592\"}), \"OK\");\n    // key:0 should have been removed, so APPEND will end up with key:0 == ZZZZ\n    EXPECT_THAT(Run({\"append\", \"key:0\", \"ZZZZ\"}), IntArg(4));\n    EXPECT_EQ(Run({\"get\", \"key:0\"}), \"ZZZZ\");\n    // db size should be count - (size of slot 2592) + 1, where 1 is for 'key:0'\n    ExpectConditionWithinTimeout(\n        [&]() { return CheckedInt({\"dbsize\"}) == (count - *slot_size + 1); });\n\n    ResetService();\n  }\n}\n\nTEST_F(ClusterFamilyTest, ClusterCrossSlot) {\n  ConfigSingleNodeCluster(GetMyId());\n\n  EXPECT_EQ(Run({\"SET\", \"key\", \"value\"}), \"OK\");\n  EXPECT_EQ(Run({\"GET\", \"key\"}), \"value\");\n\n  EXPECT_EQ(Run({\"MSET\", \"key\", \"value2\"}), \"OK\");\n  EXPECT_EQ(Run({\"MGET\", \"key\"}), \"value2\");\n\n  EXPECT_THAT(Run({\"MSET\", \"key\", \"value\", \"key2\", \"value2\"}), ErrArg(\"CROSSSLOT\"));\n  EXPECT_THAT(Run({\"MGET\", \"key\", \"key2\"}), ErrArg(\"CROSSSLOT\"));\n  EXPECT_THAT(Run({\"ZINTERSTORE\", \"key\", \"2\", \"key1\", \"key2\"}), ErrArg(\"CROSSSLOT\"));\n\n  EXPECT_EQ(Run({\"MSET\", \"key{tag}\", \"value\", \"key2{tag}\", \"value2\"}), \"OK\");\n  EXPECT_THAT(Run({\"MGET\", \"key{tag}\", \"key2{tag}\"}), RespArray(ElementsAre(\"value\", \"value2\")));\n}\n\nclass ClusterFamilyEmulatedTest : public ClusterFamilyTest {\n public:\n  ClusterFamilyEmulatedTest() {\n    SetTestFlag(\"cluster_mode\", \"emulated\");\n    SetTestFlag(\"cluster_announce_ip\", \"fake-host\");\n  }\n};\n\nTEST_F(ClusterFamilyEmulatedTest, ClusterInfo) {\n  string cluster_info = Run({\"cluster\", \"info\"}).GetString();\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_state:ok\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_assigned:16384\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_slots_ok:16384\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_known_nodes:1\"));\n  EXPECT_THAT(cluster_info, HasSubstr(\"cluster_size:1\"));\n}\n\nTEST_F(ClusterFamilyEmulatedTest, ClusterShardInfos) {\n  EXPECT_THAT(Run({\"cluster\", \"shards\"}),\n              RespArray(ElementsAre(\"slots\",                                           //\n                                    RespArray(ElementsAre(IntArg(0), IntArg(16383))),  //\n                                    \"nodes\",                                           //\n                                    RespArray(ElementsAre(                             //\n                                        RespArray(ElementsAre(                         //\n                                            \"id\", GetMyId(),                           //\n                                            \"endpoint\", \"fake-host\",                   //\n                                            \"ip\", \"fake-host\",                         //\n                                            \"port\", IntArg(6379),                      //\n                                            \"role\", \"master\",                          //\n                                            \"replication-offset\", IntArg(0),           //\n                                            \"health\", \"online\")))))));\n}\n\nTEST_F(ClusterFamilyEmulatedTest, ClusterSlots) {\n  EXPECT_THAT(Run({\"cluster\", \"slots\"}),\n              RespArray(ElementsAre(IntArg(0),              //\n                                    IntArg(16383),          //\n                                    RespArray(ElementsAre(  //\n                                        \"fake-host\",        //\n                                        IntArg(6379),       //\n                                        GetMyId())))));\n}\n\nTEST_F(ClusterFamilyEmulatedTest, ClusterNodes) {\n  auto res = Run({\"cluster\", \"nodes\"});\n  EXPECT_THAT(res, GetMyId() + \" fake-host:6379@6379 myself,master - 0 0 0 connected 0-16383\\n\");\n}\n\nTEST_F(ClusterFamilyEmulatedTest, ForbidenCommands) {\n  auto res = Run({\"DFLYCLUSTER\", \"GETSLOTINFO\", \"SLOTS\", \"1\"});\n  EXPECT_THAT(res, ErrArg(\"Cluster is disabled. Use --cluster_mode=yes to enable.\"));\n}\n\n}  // namespace\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/cluster_utility.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/cluster/cluster_utility.h\"\n\n#include \"server/cluster/cluster_defs.h\"\n#include \"server/common.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/namespaces.h\"\n\nusing namespace std;\n\nnamespace dfly::cluster {\n\nuint64_t GetKeyCount(const SlotRanges& slots) {\n  std::atomic_uint64_t keys = 0;\n\n  shard_set->pool()->AwaitFiberOnAll([&](auto*) {\n    EngineShard* shard = EngineShard::tlocal();\n    if (shard == nullptr)\n      return;\n\n    uint64_t shard_keys = 0;\n    for (const SlotRange& range : slots) {\n      for (SlotId slot = range.start; slot <= range.end; slot++) {\n        shard_keys += namespaces->GetDefaultNamespace()\n                          .GetDbSlice(shard->shard_id())\n                          .GetSlotStats(slot)\n                          .key_count;\n      }\n    }\n    keys.fetch_add(shard_keys);\n  });\n\n  return keys.load();\n}\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/cluster_utility.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"server/cluster/cluster_defs.h\"\n\nnamespace dfly::cluster {\n\nuint64_t GetKeyCount(const SlotRanges& slots);\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/coordinator.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/cluster/coordinator.h\"\n\n#include \"base/logging.h\"\n#include \"facade/redis_parser.h\"\n#include \"facade/socket_utils.h\"\n#include \"server/cluster/cluster_config.h\"\n\nusing namespace std;\nusing namespace facade;\n\nnamespace dfly::cluster {\n\nclass Coordinator::CrossShardRequest {\n public:\n  CrossShardRequest(std::string cmd, Coordinator::RespCB cb, uint32_t total_shards)\n      : command_(std::move(cmd)), cb_(std::move(cb)), shard_processed_(total_shards) {\n  }\n\n  const std::string& GetCommand() const {\n    return command_;\n  }\n\n  template <class... Args> void Exec(Args&&... args) {\n    cb_(std::forward<Args>(args)...);\n    if (shard_processed_.fetch_sub(1, std::memory_order_relaxed) == 1) {\n      future_.Resolve(GenericError{});\n    }\n  }\n\n  util::fb2::Future<GenericError>& GetFuture() {\n    return future_;\n  }\n\n private:\n  std::string command_;\n  Coordinator::RespCB cb_;\n  util::fb2::Future<GenericError> future_;\n  std::atomic_uint32_t shard_processed_;\n};\n\nclass Coordinator::CrossShardClient : public ProtocolClient {\n public:\n  CrossShardClient(std::string host, uint16_t port) : ProtocolClient(std::move(host), port) {\n  }\n\n  using ProtocolClient::CloseSocket;\n  ~CrossShardClient() {\n    exec_st_.Cancel();\n    waker_.notifyAll();\n    CloseSocket();\n    send_fb_.Join();\n    resp_fb_.Join();\n  }\n\n  [[nodiscard]] bool Init() {\n    VLOG(1) << \"Resolving host DNS to \" << server().Description();\n    if (error_code ec = ResolveHostDns(); ec) {\n      LOG(WARNING) << \"Could not resolve host DNS to \" << server().Description() << \": \"\n                   << ec.message();\n      exec_st_.ReportError(GenericError(ec, \"Could not resolve host dns.\"));\n      return false;\n    }\n    VLOG(1) << \"Start coordinator connection to \" << server().Description();\n    auto timeout = 3000ms;  // TODO add flag;\n    if (auto ec = ConnectAndAuth(timeout, &exec_st_); ec) {\n      LOG(WARNING) << \"Couldn't connect to \" << server().Description() << \": \" << ec.message()\n                   << \", socket state: \" << GetSocketInfo(Sock()->native_handle());\n      exec_st_.ReportError(GenericError(ec, \"Couldn't connect to source.\"));\n      return false;\n    }\n\n    ResetParser(RedisParser::Mode::CLIENT);\n    send_fb_ = util::fb2::Fiber(\"CSS_SendFb\", &CrossShardClient::SendFb, this);\n    resp_fb_ = util::fb2::Fiber(\"CSS_RespFb\", &CrossShardClient::RespFb, this);\n    return true;\n  }\n\n  void Cancel() {\n    exec_st_.Cancel();\n    ShutdownSocket();\n  }\n\n  void EnqueueCommand(CrossShardRequestPtr req) {\n    {\n      std::lock_guard lk(send_mu_);\n      send_queue_.push(req);\n      ready_to_send_ = true;\n    }\n    {\n      std::lock_guard lk(resp_mu_);\n      resp_queue_.push(req);\n      ready_to_resp_ = true;\n    }\n\n    waker_.notifyAll();\n  }\n\n  void SendFb() {\n    while (!exec_st_.IsCancelled()) {\n      waker_.await([this] { return exec_st_.IsCancelled() || ready_to_send_; });\n      if (exec_st_.IsCancelled())\n        return;\n      std::lock_guard lk(send_mu_);\n      while (!send_queue_.empty()) {\n        if (auto ec = ProtocolClient::SendCommand(send_queue_.front()->GetCommand()); ec) {\n          exec_st_.ReportError(GenericError(\n              ec, absl::StrCat(\"Coordinator could not send command to : \", server().Description(),\n                               \"socket state: \", GetSocketInfo(Sock()->native_handle()))));\n          // TODO reinit connection.\n          break;\n        }\n        send_queue_.pop();\n      }\n      ready_to_send_ = false;\n    }\n  }\n\n  void RespFb() {\n    while (!exec_st_.IsCancelled()) {\n      waker_.await([this] { return exec_st_.IsCancelled() || ready_to_resp_; });\n      if (exec_st_.IsCancelled())\n        return;\n      std::lock_guard lk(resp_mu_);\n      constexpr auto timeout = 3000;  // TODO add flag and add usage in ReadRespReply.\n      while (!resp_queue_.empty()) {\n        auto resp = TakeRespReply(timeout);\n        if (!resp) {\n          LOG(WARNING) << \"Error reading response from \" << server().Description() << \": \"\n                       << resp.error()\n                       << \", socket state: \" + GetSocketInfo(Sock()->native_handle());\n\n          // TODO make all requests fail in this case.\n          // TODO reinit connection.\n          LOG(FATAL) << \"Coordinator RespFb read error, not implemented recovery yet.\";\n          break;\n        }\n        resp_queue_.front()->Exec(*resp);\n        resp_queue_.pop();\n      }\n      ready_to_resp_ = false;\n    }\n  }\n\n private:\n  std::queue<std::shared_ptr<CrossShardRequest>> send_queue_;\n  std::queue<std::shared_ptr<CrossShardRequest>> resp_queue_;\n\n  util::fb2::Fiber send_fb_;\n  util::fb2::Fiber resp_fb_;\n  util::fb2::EventCount waker_;\n\n  mutable util::fb2::Mutex send_mu_;\n  mutable util::fb2::Mutex resp_mu_;\n  std::atomic_bool ready_to_send_ = false;\n  std::atomic_bool ready_to_resp_ = false;\n};\n\nCoordinator& Coordinator::Current() {\n  static Coordinator instance;\n  return instance;\n}\n\nstd::shared_ptr<Coordinator::CrossShardClient> Coordinator::GetClient(const std::string& host,\n                                                                      uint16_t port) {\n  for (const auto& client : clients_) {\n    if (client->GetHost() == host && client->GetPort() == port) {\n      return client;\n    }\n  }\n  auto new_client = std::make_shared<CrossShardClient>(host, port);\n  if (new_client->Init()) {\n    clients_.emplace_back(new_client);\n    return new_client;\n  }\n  return nullptr;\n}\n\nutil::fb2::Future<GenericError> Coordinator::DispatchAll(std::string command, RespCB cb) {\n  auto cluster_config = ClusterConfig::Current();\n  if (!cluster_config) {\n    VLOG(2) << \"No cluster config found for coordinator plan creation.\";\n    LOG(FATAL) << \"No cluster config, not implemented logic yet.\";\n    return {};\n  }\n\n  if (!cluster_config->is_master()) {\n    VLOG(2) << \"Current node isn't master, the command should be executed locally:\" << command;\n    util::fb2::Future<GenericError> res;\n    res.Resolve(GenericError{});\n    return res;\n  }\n\n  VLOG(2) << \"Dispatching command to all shards: \" << command;\n  auto shards_config = cluster_config->GetConfig();\n\n  auto shard_request = std::make_shared<CrossShardRequest>(std::move(command), std::move(cb),\n                                                           shards_config.size() - 1);\n\n  for (const auto& shard : shards_config) {\n    if (shard.master.id == cluster_config->MyId()) {\n      continue;\n    }\n    const auto& client = GetClient(shard.master.ip, shard.master.port);\n    if (!client) {\n      VLOG(1) << \"Could not get coordinator client for \" << shard.master.ip << \":\"\n              << shard.master.port;\n      cb(RESPObj());  // TODO add error propagation.\n      LOG(FATAL) << \"No error processing, not implemented logic yet.\";\n      return {};\n    }\n    client->EnqueueCommand(shard_request);\n  }\n  return shard_request->GetFuture();\n}\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/coordinator.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include \"server/cluster/cluster_defs.h\"\n#include \"server/protocol_client.h\"\n#include \"util/fibers/future.h\"\n\nnamespace dfly::cluster {\n\n// Coordinator needs to create and manage connections between nodes in the cluster for cross shard\n// commands. All cross-shard commands are dispatched through the Coordinator.\n// It can be used to exeute commands on all shards or specific shards.\nclass Coordinator {\n public:\n  using RespCB = std::function<void(const facade::RESPObj&)>;  // TODO add error.\n\n  static Coordinator& Current();\n  [[nodiscard]] util::fb2::Future<GenericError> DispatchAll(std::string command, RespCB cb);\n\n  void Shutdown() {\n    // TODO add proper shutdown logic. We need to prevent new clients creation. Maybe we need to\n    // wait destroying of existing clients.\n    clients_.clear();\n  }\n\n private:\n  Coordinator() = default;\n  class CrossShardClient;\n  class CrossShardRequest;\n  using CrossShardRequestPtr = std::shared_ptr<Coordinator::CrossShardRequest>;\n  std::shared_ptr<CrossShardClient> GetClient(const std::string& host, uint16_t port);\n  std::vector<std::shared_ptr<CrossShardClient>> clients_;\n};\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/incoming_slot_migration.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/cluster/incoming_slot_migration.h\"\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/strings/str_cat.h>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"cluster_utility.h\"\n#include \"facade/service_interface.h\"\n#include \"facade/socket_utils.h\"\n#include \"server/error.h\"\n#include \"server/journal/executor.h\"\n#include \"server/journal/serializer.h\"\n#include \"server/journal/tx_executor.h\"\n#include \"server/main_service.h\"\n#include \"util/fibers/synchronization.h\"\n\nABSL_DECLARE_FLAG(int, migration_finalization_timeout_ms);\nABSL_FLAG(uint32_t, slot_migration_throttle_us, 0,\n          \"Incoming migration throttle time in us, we throttle every 100us of migration commands \"\n          \"processing, 0 to disable. Recommended value is 20. Values more than 50 can \"\n          \"significantly reduce migration speed.\");\n\nnamespace dfly::cluster {\n\nusing namespace std;\nusing namespace util;\nusing namespace facade;\n\n// ClusterShardMigration manage data receiving in slots migration process.\n// It is created per shard on the target node to initiate FLOW step.\nclass ClusterShardMigration {\n public:\n  ClusterShardMigration(uint32_t shard_id, Service* service, IncomingSlotMigration* in_migration,\n                        util::fb2::BlockingCounter bc)\n      : source_shard_id_(shard_id),\n        is_finished_(false),\n        socket_(nullptr),\n        executor_(service),\n        in_migration_(in_migration),\n        bc_(bc) {\n  }\n\n  void Pause(bool pause) {\n    pause_ = pause;\n  }\n\n  void Start(ExecutionState* cntx, util::FiberSocketBase* source) ABSL_LOCKS_EXCLUDED(mu_) {\n    {\n      util::fb2::LockGuard lk(mu_);\n      if (is_finished_) {\n        return;\n      }\n      is_finished_ = true;\n      socket_ = source;\n    }\n\n    absl::Cleanup cleanup([this]() ABSL_LOCKS_EXCLUDED(mu_) {\n      util::fb2::LockGuard lk(mu_);\n      socket_ = nullptr;\n    });\n    JournalReader reader{source, 0};\n    TransactionReader tx_reader;\n    uint64_t last_sleep = fb2::ProactorBase::GetMonotonicTimeNs();\n\n    const uint64_t throttle_us = absl::GetFlag(FLAGS_slot_migration_throttle_us);\n    TransactionData tx_data;\n    while (cntx->IsRunning()) {\n      if (pause_) {\n        ThisFiber::SleepFor(100ms);\n        continue;\n      }\n\n      bool success = tx_reader.NextTxData(&reader, cntx, &tx_data);\n      if (!success) {\n        if (auto err = cntx->GetError(); err) {\n          LOG(WARNING) << \"Error reading from migration socket for shard \" << source_shard_id_\n                       << \": \" << err.Format()\n                       << \", socket state: \" << GetSocketInfo(source->native_handle());\n        }\n        break;\n      }\n\n      while (tx_data.opcode == journal::Op::LSN) {\n        VLOG(2) << \"Attempt to finalize flow \" << source_shard_id_ << \" attempt \" << tx_data.lsn;\n        last_attempt_.store(tx_data.lsn);\n        bc_->Dec();  // we can Join the flow now\n        // if we get new data, attempt is failed\n        if (success = tx_reader.NextTxData(&reader, cntx, &tx_data); !success) {\n          VLOG(1) << \"Finalized flow \" << source_shard_id_;\n          return;\n        }\n\n        if (in_migration_->GetState() == MigrationState::C_FATAL) {\n          VLOG(1) << \"Flow finalization \" << source_shard_id_\n                  << \" canceled due memory limit reached\";\n          return;\n        }\n        if (!tx_data.command.empty()) {\n          VLOG(1) << \"Flow finalization failed \" << source_shard_id_ << \" by \"\n                  << tx_data.command.Front();\n        } else {\n          VLOG(1) << \"Flow finalization failed \" << source_shard_id_ << \" by opcode \"\n                  << (int)tx_data.opcode;\n        }\n\n        bc_->Add();  // the flow isn't finished so we lock it again\n      }\n      if (tx_data.opcode == journal::Op::PING) {\n        // TODO check about ping logic\n      } else {\n        auto err = ExecuteTx(std::move(tx_data), cntx);\n        // Break incoming slot migration if command reported OOM\n        if (err == std::errc::not_enough_memory) {\n          cntx->ReportError(std::string{kIncomingMigrationOOM});\n          in_migration_->ReportFatalError(std::string{kIncomingMigrationOOM});\n          break;\n        }\n      }\n      if (throttle_us > 0) {\n        // every 100us we do sleep for 20us to allow other commands to be processed\n        if (uint64_t now = fb2::ProactorBase::GetMonotonicTimeNs(); now - last_sleep > 100000) {\n          ThisFiber::SleepFor(std::chrono::microseconds(throttle_us));\n          last_sleep = now;\n        }\n      }\n    }\n\n    VLOG(2) << \"Flow \" << source_shard_id_ << \" canceled\";\n    bc_->Dec();  // we should provide ability to join the flow\n  }\n\n  std::error_code Cancel() {\n    util::fb2::LockGuard lk(mu_);\n    if (socket_ != nullptr) {\n      return socket_->proactor()->Await([s = socket_]() {\n        if (s->IsOpen()) {\n          auto ec = s->Shutdown(SHUT_RDWR);  // Does not Close(), only forbids further I/O.\n          LOG_IF(WARNING, ec) << \"Error shutting down socket for shard migration: \" << ec.message()\n                              << \", socket state: \" << GetSocketInfo(s->native_handle());\n          return ec;\n        }\n        return std::error_code();\n      });\n    }\n    if (!is_finished_) {\n      is_finished_ = true;\n      bc_->Dec();  // we should provide ability to join the flow if the Start() wasn't called\n    }\n\n    return {};\n  }\n\n  long GetLastAttempt() const {\n    return last_attempt_.load();\n  }\n\n private:\n  std::error_code ExecuteTx(TransactionData&& tx_data, ExecutionState* cntx) {\n    if (!cntx->IsRunning()) {\n      return {};\n    }\n\n    if (!tx_data.IsGlobalCmd()) {\n      facade::DispatchResult res = executor_.Execute(tx_data.dbid, tx_data.command);\n      return res == facade::DispatchResult::OOM ? make_error_code(errc::not_enough_memory)\n                                                : error_code();\n    } else {\n      // TODO check which global commands should be supported\n      std::string error = absl::StrCat(\"We don't support command: \", tx_data.command[0],\n                                       \" in cluster migration process.\");\n      LOG(ERROR) << error;\n      cntx->ReportError(error);\n      in_migration_->ReportError(error);\n    }\n\n    return {};\n  }\n\n  uint32_t source_shard_id_;\n  util::fb2::Mutex mu_;\n  bool is_finished_ ABSL_GUARDED_BY(mu_);\n  util::FiberSocketBase* socket_ ABSL_GUARDED_BY(mu_);\n  JournalExecutor executor_;\n  IncomingSlotMigration* in_migration_;\n  util::fb2::BlockingCounter bc_;\n  atomic_long last_attempt_{-1};\n  atomic_bool pause_ = false;\n};\n\nIncomingSlotMigration::IncomingSlotMigration(string source_id, Service* se, SlotRanges slots)\n    : source_id_(std::move(source_id)), service_(*se), slots_(std::move(slots)), bc_(0) {\n}\n\nIncomingSlotMigration::~IncomingSlotMigration() {\n}\n\nvoid IncomingSlotMigration::Pause(bool pause) {\n  VLOG(1) << \"Pausing migration \" << pause;\n  for (auto& flow : shard_flows_) {\n    flow->Pause(pause);\n  }\n}\n\nbool IncomingSlotMigration::Join(long attempt) {\n  const absl::Time start = absl::Now();\n  const absl::Duration timeout =\n      absl::Milliseconds(absl::GetFlag(FLAGS_migration_finalization_timeout_ms));\n\n  while (true) {\n    const absl::Time now = absl::Now();\n    const absl::Duration passed = now - start;\n    VLOG_EVERY_N(1, 10000) << \"Checking whether to continue with join \" << passed << \" vs \"\n                           << timeout;\n    if (passed >= timeout) {\n      LOG(WARNING) << \"Can't join migration in time for \" << source_id_;\n      ReportError(GenericError(\"Can't join migration in time\"));\n      return false;\n    }\n\n    // If any of migration shards reported ERROR (OOM) we can return error\n    if (GetState() == MigrationState::C_FATAL) {\n      return false;\n    }\n\n    // if data was sent after LSN, WaitFor() always returns false so to reduce wait time\n    // we check current state and if WaitFor false but GetLastAttempt() == attempt\n    // the Join is failed and we can return false\n    const auto remaining_time = absl::ToInt64Milliseconds(timeout - passed);\n    const auto wait_time = (remaining_time > 100 ? 100 : remaining_time) * 1ms;\n\n    const auto is_attempt_correct =\n        std::all_of(shard_flows_.begin(), shard_flows_.end(),\n                    [attempt](const auto& flow) { return flow->GetLastAttempt() == attempt; });\n\n    auto wait_res = bc_->WaitFor(wait_time);\n    if (is_attempt_correct) {\n      if (wait_res) {\n        util::fb2::LockGuard lk(state_mu_);\n        state_ = MigrationState::C_FINISHED;\n        keys_number_ = cluster::GetKeyCount(slots_);\n      } else {\n        LOG(WARNING) << \"Can't join migration because of data after LSN for \" << source_id_;\n        ReportError(GenericError(\"Can't join migration in time\"));\n      }\n      return wait_res;\n    }\n  }\n}\n\nvoid IncomingSlotMigration::Stop() {\n  util::fb2::LockGuard lk(state_mu_);\n  string_view log_state = state_ == MigrationState::C_FINISHED ? \"Finishing\" : \"Cancelling\";\n  LOG(INFO) << log_state << \" incoming migration of slots \" << slots_.ToString();\n  cntx_.Cancel();\n\n  for (auto& flow : shard_flows_) {\n    if (auto err = flow->Cancel(); err) {\n      VLOG(1) << \"Error during flow Stop: \" << err;\n    }\n  }\n\n  // Don't wait if we reached FATAL state\n  if (state_ == MigrationState::C_FATAL) {\n    return;\n  }\n\n  // we need to Join the migration process to prevent data corruption\n  const absl::Time start = absl::Now();\n  const absl::Duration timeout =\n      absl::Milliseconds(absl::GetFlag(FLAGS_migration_finalization_timeout_ms));\n\n  while (true) {\n    const absl::Time now = absl::Now();\n    const absl::Duration passed = now - start;\n    VLOG(1) << \"Checking whether to continue with stop \" << passed << \" vs \" << timeout;\n\n    if (bc_->WaitFor(absl::ToInt64Milliseconds(timeout - passed) * 1ms)) {\n      return;\n    } else if (passed >= timeout) {\n      LOG(ERROR) << \"Can't stop migration in time\";\n      return;\n    }\n  }\n}\n\nvoid IncomingSlotMigration::Init(uint32_t shards_num) {\n  util::fb2::LockGuard lk(state_mu_);\n  cntx_.Reset(nullptr);\n  state_ = MigrationState::C_SYNC;\n\n  bc_ = BlockingCounter(shards_num);\n  shard_flows_.resize(shards_num);\n  for (unsigned i = 0; i < shards_num; ++i) {\n    shard_flows_[i].reset(new ClusterShardMigration(i, &service_, this, bc_));\n  }\n}\n\nvoid IncomingSlotMigration::StartFlow(uint32_t shard, util::FiberSocketBase* source) {\n  shard_flows_[shard]->Start(&cntx_, source);\n  VLOG(1) << \"Incoming flow \" << shard\n          << (GetState() == MigrationState::C_FINISHED ? \" finished \" : \" cancelled \") << \"for \"\n          << source_id_;\n  if (GetState() == MigrationState::C_FATAL) {\n    Stop();\n  }\n}\n\nsize_t IncomingSlotMigration::GetKeyCount() const {\n  {\n    util::fb2::LockGuard lk(state_mu_);\n    if (state_ == MigrationState::C_FINISHED) {\n      return keys_number_;\n    }\n  }\n\n  return cluster::GetKeyCount(slots_);\n}\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/incoming_slot_migration.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include \"helio/util/fiber_socket_base.h\"\n#include \"server/cluster/cluster_defs.h\"\n#include \"server/execution_state.h\"\n\nnamespace dfly {\nclass Service;\n}\n\nnamespace dfly::cluster {\nclass ClusterShardMigration;\n\n// The main entity on the target side that manage slots migration process\n// Manage connections between the target and source node,\n// manage migration process state and data\nclass IncomingSlotMigration {\n public:\n  IncomingSlotMigration(std::string source_id, Service* se, SlotRanges slots);\n  ~IncomingSlotMigration();\n\n  // process data from FDLYMIGRATE FLOW cmd\n  // executes until Stop called or connection closed\n  void StartFlow(uint32_t shard, util::FiberSocketBase* source);\n\n  // Waits until all flows got FIN opcode.\n  // returns true if we joined false if timeout is readed\n  // After Join we still can get data due to error situation\n  [[nodiscard]] bool Join(long attempt);\n\n  // Stop and join the migration, can be called even after migration is finished\n  void Stop();\n\n  // Init/Reinit migration\n  void Init(uint32_t shards_num);\n\n  MigrationState GetState() const {\n    util::fb2::LockGuard lk(state_mu_);\n    return state_;\n  }\n\n  const SlotRanges& GetSlots() const {\n    return slots_;\n  }\n\n  const std::string& GetSourceID() const {\n    return source_id_;\n  }\n\n  size_t ShardNum() const {\n    return shard_flows_.size();\n  }\n\n  // Switch to  FATAL state and store error message\n  void ReportFatalError(dfly::GenericError err) ABSL_LOCKS_EXCLUDED(state_mu_, error_mu_) {\n    errors_count_.fetch_add(1, std::memory_order_relaxed);\n    util::fb2::LockGuard lk_state(state_mu_);\n    util::fb2::LockGuard lk_error(error_mu_);\n    state_ = MigrationState::C_FATAL;\n    last_error_ = std::move(err);\n  }\n\n  void ReportError(dfly::GenericError err) ABSL_LOCKS_EXCLUDED(error_mu_) {\n    errors_count_.fetch_add(1, std::memory_order_relaxed);\n    util::fb2::LockGuard lk(error_mu_);\n    if (GetState() != MigrationState::C_FATAL)\n      last_error_ = std::move(err);\n  }\n\n  std::string GetErrorStr() const ABSL_LOCKS_EXCLUDED(error_mu_) {\n    util::fb2::LockGuard lk(error_mu_);\n    return last_error_.Format();\n  }\n\n  size_t GetErrorsCount() const {\n    return errors_count_.load(std::memory_order_relaxed);\n  }\n\n  size_t GetKeyCount() const;\n\n  void Pause(bool pause);\n\n private:\n  std::string source_id_;\n  Service& service_;\n  std::vector<std::unique_ptr<ClusterShardMigration>> shard_flows_;\n  SlotRanges slots_;\n  ExecutionState cntx_;\n\n  mutable util::fb2::Mutex error_mu_;\n  dfly::GenericError last_error_ ABSL_GUARDED_BY(error_mu_);\n  std::atomic<size_t> errors_count_ = 0;\n\n  mutable util::fb2::Mutex state_mu_;\n  MigrationState state_ ABSL_GUARDED_BY(state_mu_) = MigrationState::C_CONNECTING;\n\n  // when migration is finished we need to store number of migrated keys\n  // because new request can add or remove keys and we get incorrect statistic\n  size_t keys_number_ = 0;\n\n  util::fb2::BlockingCounter bc_;\n};\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/outgoing_slot_migration.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/cluster/outgoing_slot_migration.h\"\n\n#include <absl/flags/flag.h>\n\n#include <atomic>\n\n#include \"absl/cleanup/cleanup.h\"\n#include \"base/logging.h\"\n#include \"cluster_family.h\"\n#include \"cluster_utility.h\"\n#include \"facade/socket_utils.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/journal/streamer.h\"\n#include \"server/main_service.h\"\n#include \"server/namespaces.h\"\n#include \"server/server_family.h\"\n#include \"util/fibers/synchronization.h\"\n\nABSL_FLAG(int, slot_migration_connection_timeout_ms, 2000,\n          \"Connection creating timeout for migration operations\");\nABSL_FLAG(int, migration_finalization_timeout_ms, 30000,\n          \"Timeout for migration finalization operation\");\n\nusing namespace std;\nusing namespace facade;\nusing namespace util;\n\nnamespace dfly::cluster {\n\nclass OutgoingMigration::SliceSlotMigration : private ProtocolClient {\n public:\n  SliceSlotMigration(DbSlice* slice, ServerContext server_context, SlotSet slots,\n                     OutgoingMigration* om)\n      : ProtocolClient(server_context), streamer_(slice, std::move(slots), &exec_st_) {\n    exec_st_.SwitchErrorHandler([om](auto ge) { om->Finish(std::move(ge)); });\n  }\n\n  ~SliceSlotMigration() {\n    CloseSocket();\n    // it should already be unregistered, this cancel was added to avoid race condition that we\n    // possibly have.\n    bool unregistered = streamer_.Cancel();\n    LOG_IF(DFATAL, unregistered)\n        << \"Streamer was not unregistered properly. Check code for race conditions.\";\n    exec_st_.JoinErrorHandler();\n  }\n\n  // Send DFLYMIGRATE FLOW\n  void PrepareFlow(const std::string& node_id) {\n    uint32_t shard_id = EngineShard::tlocal()->shard_id();\n\n    VLOG(1) << \"Connecting to source node_id \" << node_id << \" shard_id \" << shard_id;\n    auto timeout = absl::GetFlag(FLAGS_slot_migration_connection_timeout_ms) * 1ms;\n    if (auto ec = ConnectAndAuth(timeout, &exec_st_); ec) {\n      LOG(WARNING) << \"Couldn't connect to source node_id \" << node_id << \" shard_id \" << shard_id\n                   << \": \" << ec.message()\n                   << \", socket state: \" + GetSocketInfo(Sock()->native_handle());\n      exec_st_.ReportError(GenericError(ec, \"Couldn't connect to source.\"));\n      return;\n    }\n\n    ResetParser(RedisParser::Mode::CLIENT);\n\n    std::string cmd = absl::StrCat(\"DFLYMIGRATE FLOW \", node_id, \" \", shard_id);\n    VLOG(1) << \"cmd: \" << cmd;\n\n    if (auto ec = SendCommandAndReadResponse(cmd); ec) {\n      exec_st_.ReportError(GenericError(ec, cmd));\n      return;\n    }\n\n    if (!CheckRespIsSimpleReply(\"OK\")) {\n      exec_st_.ReportError(absl::StrCat(\"Incorrect response for FLOW cmd: \",\n                                        ToSV(LastResponseArgs().front().GetBuf())));\n      return;\n    }\n  }\n\n  // Register db_slice and journal change listeners\n  void PrepareSync() {\n    streamer_.Start(Sock());\n  }\n\n  // Run restore streamer\n  void RunSync() {\n    streamer_.Run();\n  }\n\n  void Cancel() {\n    // Shutdown socket and allow IO loops to return.\n    ShutdownSocket();\n    streamer_.Cancel();\n  }\n\n  void Finalize(long attempt) {\n    streamer_.SendFinalize(attempt);\n  }\n\n  dfly::GenericError GetError() const {\n    return exec_st_.GetError();\n  }\n\n  using ProtocolClient::CloseSocket;\n\n private:\n  RestoreStreamer streamer_;\n};\n\nOutgoingMigration::OutgoingMigration(MigrationInfo info, ClusterFamily* cf, ServerFamily* sf)\n    : ProtocolClient(info.node_info.ip, info.node_info.port),\n      migration_info_(std::move(info)),\n      slot_migrations_(shard_set->size()),\n      server_family_(sf),\n      cf_(cf),\n      tx_(new Transaction{sf->service().FindCmd(\"DFLYCLUSTER\")}) {\n  tx_->InitByArgs(&namespaces->GetDefaultNamespace(), 0, {});\n}\n\nOutgoingMigration::~OutgoingMigration() {\n  main_sync_fb_.JoinIfNeeded();\n\n  exec_st_.JoinErrorHandler();\n  // Destroy each flow in its dedicated thread, because we could be the last\n  // owner of the db tables\n  OnAllShards([](auto& migration) {\n    if (migration) {\n      migration.reset();\n    }\n  });\n\n  CloseSocket();\n}\n\nbool OutgoingMigration::ChangeState(MigrationState new_state) {\n  util::fb2::LockGuard lk(state_mu_);\n  if (state_ == MigrationState::C_FINISHED) {\n    return false;\n  }\n\n  state_ = new_state;\n  return true;\n}\n\nvoid OutgoingMigration::OnAllShards(std::function<void(UniqueSliceSlotMigration&)> func) {\n  shard_set->RunBlockingInParallel(\n      [this, &func](auto* shard) { func(slot_migrations_[shard->shard_id()]); });\n}\n\nvoid OutgoingMigration::Finish(const GenericError& error) {\n  auto next_state = MigrationState::C_FINISHED;\n  if (error) {\n    // If OOM error move to FATAL, non-recoverable  state\n    if (error == errc::not_enough_memory) {\n      next_state = MigrationState::C_FATAL;\n    } else {\n      next_state = MigrationState::C_ERROR;\n      exec_st_.ReportError(error);\n    }\n    LOG(WARNING) << \"Finish outgoing migration for \" << cf_->MyID() << \": \"\n                 << migration_info_.node_info.id << \" with error: \" << error.Format();\n\n  } else {\n    LOG(INFO) << \"Finish outgoing migration for \" << cf_->MyID() << \": \"\n              << migration_info_.node_info.id;\n  }\n\n  bool should_cancel_flows = false;\n  absl::Cleanup on_exit([this]() { ShutdownSocket(); });\n\n  {\n    util::fb2::LockGuard lk(state_mu_);\n    switch (state_) {\n      case MigrationState::C_FATAL:\n      case MigrationState::C_FINISHED:\n        return;  // Already finished, nothing else to do\n\n      case MigrationState::C_CONNECTING:\n        should_cancel_flows = false;\n        break;\n\n      case MigrationState::C_SYNC:\n      case MigrationState::C_ERROR:\n        should_cancel_flows = true;\n        break;\n    }\n    state_ = next_state;\n  }\n\n  if (next_state == MigrationState::C_FATAL) {\n    // Fatal state stop any further processing of migration so we need to update error here\n    SetLastError(error);\n  }\n\n  if (should_cancel_flows) {\n    OnAllShards([](auto& migration) {\n      CHECK(migration != nullptr);\n      migration->Cancel();\n    });\n    exec_st_.JoinErrorHandler();\n  }\n}\n\nMigrationState OutgoingMigration::GetState() const {\n  util::fb2::LockGuard lk(state_mu_);\n  return state_;\n}\n\nvoid OutgoingMigration::SyncFb() {\n  VLOG(1) << \"Starting outgoing migration fiber for migration \" << migration_info_.ToString();\n\n  const absl::Time start_time = absl::Now();\n\n  // we retry starting migration until \"cancel\" is happened\n  while (GetState() != MigrationState::C_FINISHED) {\n    if (!ChangeState(MigrationState::C_CONNECTING)) {\n      break;\n    }\n\n    if (exec_st_.IsError()) {\n      ResetError();\n      ThisFiber::SleepFor(500ms);  // wait some time before next retry\n    }\n\n    VLOG(1) << \"Connecting to target node\";\n    auto timeout = absl::GetFlag(FLAGS_slot_migration_connection_timeout_ms) * 1ms;\n    if (auto ec = ConnectAndAuth(timeout, &exec_st_); ec) {\n      LOG(WARNING) << \"Can't connect to target node \" << server().Description()\n                   << \" for migration: \" << ec.message()\n                   << \", socket state: \" + GetSocketInfo(Sock()->native_handle());\n      exec_st_.ReportError(GenericError(ec, \"Couldn't connect to source.\"));\n      continue;\n    }\n\n    VLOG(1) << \"Migration initiating\";\n    ResetParser(RedisParser::Mode::CLIENT);\n    auto cmd = absl::StrCat(\"DFLYMIGRATE INIT \", cf_->MyID(), \" \", slot_migrations_.size());\n    for (const auto& s : migration_info_.slot_ranges) {\n      absl::StrAppend(&cmd, \" \", s.start, \" \", s.end);\n    }\n\n    if (auto ec = SendCommandAndReadResponse(cmd); ec) {\n      LOG(WARNING) << \"Could not send INIT command to \" << server().Description()\n                   << \" for migration: \" << ec.message()\n                   << \", socket state: \" + GetSocketInfo(Sock()->native_handle());\n      exec_st_.ReportError(GenericError(ec, \"Could not send INIT command.\"));\n      continue;\n    }\n\n    if (!CheckRespIsSimpleReply(\"OK\")) {\n      // Break outgoing migration if INIT from incoming node responded with OOM. Usually this will\n      // happen on second iteration after first failed with OOM. Sending second INIT is required to\n      // cleanup slots on incoming slot migration node.\n      if (CheckRespSimpleError(kIncomingMigrationOOM)) {\n        Finish(GenericError{std::make_error_code(errc::not_enough_memory),\n                            std::string(kIncomingMigrationOOM)});\n        break;\n      }\n      if (CheckRespIsSimpleReply(kUnknownMigration)) {\n        const absl::Duration passed = absl::Now() - start_time;\n        // we provide 30 seconds to distribute the config to all nodes to avoid extra errors\n        // reporting\n        if (passed >= absl::Milliseconds(30000)) {\n          exec_st_.ReportError(GenericError(LastResponseArgs().front().GetString()));\n        } else {\n          ThisFiber::SleepFor(500ms);  // to prevent too many attempts\n        }\n      } else {\n        exec_st_.ReportError(GenericError(LastResponseArgs().front().GetString()));\n      }\n      continue;\n    }\n\n    OnAllShards([this](auto& migration) {\n      DbSlice& db_slice = namespaces->GetDefaultNamespace().GetCurrentDbSlice();\n      journal::StartInThread();\n      migration = std::make_unique<SliceSlotMigration>(&db_slice, server(),\n                                                       migration_info_.slot_ranges, this);\n    });\n\n    if (!ChangeState(MigrationState::C_SYNC)) {\n      break;\n    }\n\n    OnAllShards([this](auto& migration) { migration->PrepareFlow(cf_->MyID()); });\n    if (!exec_st_.IsRunning()) {\n      continue;\n    }\n\n    // Global transactional cut for migration to register db_slice and journal\n    // listeners\n    {\n      Transaction::Guard tg{tx_.get()};\n      OnAllShards([](auto& migration) { migration->PrepareSync(); });\n    }\n\n    if (!exec_st_.IsRunning()) {\n      continue;\n    }\n\n    OnAllShards([](auto& migration) { migration->RunSync(); });\n\n    if (!exec_st_.IsRunning()) {\n      continue;\n    }\n\n    long attempt = 0;\n    while (GetState() != MigrationState::C_FINISHED && !FinalizeMigration(++attempt)) {\n      // Break loop and don't sleep in case of C_FATAL\n      if (GetState() == MigrationState::C_FATAL) {\n        break;\n      }\n      // Process commands that were on pause and try again\n      VLOG(1) << \"Waiting for migration to finalize...\";\n      ThisFiber::SleepFor(500ms);\n    }\n    if (!exec_st_.IsRunning()) {\n      continue;\n    }\n    break;\n  }\n\n  VLOG(1) << \"Exiting outgoing migration fiber for migration \" << migration_info_.ToString();\n}\n\nbool OutgoingMigration::FinalizeMigration(long attempt) {\n  // if it's not the 1st attempt and flows are work correctly we try to\n  // reconnect and ACK one more time\n  LOG(INFO) << \"Finalize migration for \" << cf_->MyID() << \" : \" << migration_info_.node_info.id\n            << \" attempt \" << attempt;\n  if (attempt > 1) {\n    if (!exec_st_.IsRunning()) {\n      return true;\n    }\n    auto timeout = absl::GetFlag(FLAGS_slot_migration_connection_timeout_ms) * 1ms;\n    if (auto ec = ConnectAndAuth(timeout, &exec_st_); ec) {\n      LOG(WARNING) << \"Couldn't connect to \" << cf_->MyID() << \" : \" << migration_info_.node_info.id\n                   << \" attempt \" << attempt << \": \" << ec.message()\n                   << \", socket state: \" + GetSocketInfo(Sock()->native_handle());\n      return false;\n    }\n  }\n\n  // Migration finalization has to be done via client pause because commands need to\n  // be blocked on coordinator level to avoid intializing transactions with stale cluster slot info\n  // TODO implement blocking on migrated slots only\n  bool is_block_active = true;\n  auto is_pause_in_progress = [&is_block_active] { return is_block_active; };\n  auto pause_fb_opt =\n      dfly::Pause(server_family_->GetNonPriviligedListeners(), &namespaces->GetDefaultNamespace(),\n                  nullptr, ClientPause::ALL, is_pause_in_progress);\n\n  DCHECK(pause_fb_opt);\n  if (!pause_fb_opt) {\n    auto err = absl::StrCat(\"Migration finalization time out \", cf_->MyID(), \" : \",\n                            migration_info_.node_info.id, \" attempt \", attempt);\n\n    LOG(WARNING) << err;\n    SetLastError(std::move(err));\n  }\n\n  absl::Cleanup cleanup([&is_block_active, &pause_fb_opt]() {\n    if (pause_fb_opt) {\n      is_block_active = false;\n      pause_fb_opt->JoinIfNeeded();\n    }\n  });\n\n  LOG(INFO) << \"FINALIZE flows for \" << cf_->MyID() << \" : \" << migration_info_.node_info.id;\n  OnAllShards([attempt](auto& migration) { migration->Finalize(attempt); });\n\n  auto cmd = absl::StrCat(\"DFLYMIGRATE ACK \", cf_->MyID(), \" \", attempt);\n  VLOG(1) << \"send \" << cmd;\n\n  if (auto err = SendCommand(cmd); err) {\n    LOG(WARNING) << \"Error during sending DFLYMIGRATE ACK to \" << server().Description() << \": \"\n                 << err.message() << \", socket state: \" + GetSocketInfo(Sock()->native_handle());\n    return false;\n  }\n\n  const absl::Time start = absl::Now();\n  const int64_t ack_timeout_ms = absl::GetFlag(FLAGS_migration_finalization_timeout_ms);\n  while (true) {\n    const absl::Time now = absl::Now();\n    const int64_t passed_ms = absl::ToInt64Milliseconds(now - start);\n    if (passed_ms >= ack_timeout_ms) {\n      LOG(WARNING) << \"Timeout fot ACK \" << cf_->MyID() << \" : \" << migration_info_.node_info.id\n                   << \" attempt \" << attempt;\n      return false;\n    }\n\n    if (auto resp = ReadRespReply(ack_timeout_ms - passed_ms); !resp) {\n      LOG(WARNING) << \"Error reading response to ACK command from \" << server().Description()\n                   << \": \" << resp.error()\n                   << \", socket state: \" + GetSocketInfo(Sock()->native_handle());\n      return false;\n    }\n\n    // Check OOM from incoming slot migration on ACK request\n    if (CheckRespSimpleError(kIncomingMigrationOOM)) {\n      Finish(GenericError{std::make_error_code(errc::not_enough_memory),\n                          std::string(kIncomingMigrationOOM)});\n      return false;\n    }\n\n    if (!CheckRespFirstTypes({RespExpr::INT64})) {\n      LOG(WARNING) << \"Incorrect response type for \" << cf_->MyID() << \" : \"\n                   << migration_info_.node_info.id << \" attempt \" << attempt\n                   << \" msg: \" << facade::ToSV(LastResponseArgs().front().GetBuf());\n      return false;\n    }\n\n    if (const auto res = get<int64_t>(LastResponseArgs().front().u); res == attempt) {\n      break;\n    } else {\n      LOG(WARNING) << \"Incorrect attempt payload \" << cf_->MyID() << \" : \"\n                   << migration_info_.node_info.id << \", sent \" << attempt << \" received \" << res;\n    }\n  }\n\n  if (!exec_st_.GetError()) {\n    Finish();\n    keys_number_ = cluster::GetKeyCount(migration_info_.slot_ranges);\n    cf_->ApplyMigrationSlotRangeToConfig(migration_info_.node_info.id, migration_info_.slot_ranges,\n                                         false);\n  }\n  return true;\n}\n\nvoid OutgoingMigration::Start() {\n  VLOG(1) << \"Resolving host DNS for outgoing migration\";\n  if (error_code ec = ResolveHostDns(); ec) {\n    LOG(WARNING) << \"Could not resolve host DNS for outgoing migration to \"\n                 << server().Description() << \": \" << ec.message();\n    exec_st_.ReportError(GenericError(ec, \"Could not resolve host dns.\"));\n    return;\n  }\n\n  main_sync_fb_ = fb2::Fiber(\"outgoing_migration\", &OutgoingMigration::SyncFb, this);\n}\n\nsize_t OutgoingMigration::GetKeyCount() const {\n  util::fb2::LockGuard lk(state_mu_);\n  if (state_ == MigrationState::C_FINISHED) {\n    return keys_number_;\n  }\n  return cluster::GetKeyCount(migration_info_.slot_ranges);\n}\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/outgoing_slot_migration.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include \"server/cluster/cluster_defs.h\"\n#include \"server/protocol_client.h\"\n#include \"server/transaction.h\"\n\nnamespace dfly {\n\nclass ServerFamily;\n\n}  // namespace dfly\nnamespace dfly::cluster {\nclass ClusterFamily;\n\n// Whole outgoing slots migration manager\nclass OutgoingMigration : private ProtocolClient {\n public:\n  OutgoingMigration(MigrationInfo info, ClusterFamily* cf, ServerFamily* sf);\n  ~OutgoingMigration();\n\n  // start migration process, sends INIT command to the target node\n  void Start();\n\n  // if is_error = false mark migration as FINISHED and cancel migration if it's not finished yet\n  // can be called from any thread, but only after Start()\n  // if is_error = true and migration is in progress it will be restarted otherwise nothing happens\n  void Finish(const GenericError& error = {}) ABSL_LOCKS_EXCLUDED(state_mu_);\n\n  MigrationState GetState() const ABSL_LOCKS_EXCLUDED(state_mu_);\n\n  const std::string& GetHostIp() const {\n    return server().host;\n  };\n\n  uint16_t GetPort() const {\n    return server().port;\n  };\n\n  const SlotRanges& GetSlots() const {\n    return migration_info_.slot_ranges;\n  }\n\n  const MigrationInfo GetMigrationInfo() const {\n    return migration_info_;\n  }\n\n  void ResetError() {\n    if (exec_st_.IsError()) {\n      SetLastError(exec_st_.GetError());\n      exec_st_.Reset(nullptr);\n    }\n  }\n\n  void SetLastError(dfly::GenericError err) ABSL_LOCKS_EXCLUDED(error_mu_) {\n    if (!err)\n      return;\n    errors_count_.fetch_add(1, std::memory_order_relaxed);\n    util::fb2::LockGuard lk(error_mu_);\n    last_error_ = std::move(err);\n  }\n\n  std::string GetErrorStr() const ABSL_LOCKS_EXCLUDED(error_mu_) {\n    util::fb2::LockGuard lk(error_mu_);\n    return last_error_.Format();\n  }\n\n  size_t GetErrorsCount() const {\n    return errors_count_.load(std::memory_order_relaxed);\n  }\n\n  size_t GetKeyCount() const ABSL_LOCKS_EXCLUDED(state_mu_);\n\n private:\n  MigrationState GetStateImpl() const;\n\n  // SliceSlotMigration manages state and data transferring for the corresponding shard\n  class SliceSlotMigration;\n\n  using UniqueSliceSlotMigration = std::unique_ptr<SliceSlotMigration>;\n\n  void SyncFb();\n  // return true if migration is finalized even with C_ERROR state\n  bool FinalizeMigration(long attempt);\n\n  bool ChangeState(MigrationState new_state) ABSL_LOCKS_EXCLUDED(state_mu_);\n\n  void OnAllShards(std::function<void(UniqueSliceSlotMigration&)>);\n\n  MigrationInfo migration_info_;\n  std::vector<std::unique_ptr<SliceSlotMigration>> slot_migrations_;\n  ServerFamily* server_family_;\n  ClusterFamily* cf_;\n  mutable util::fb2::Mutex error_mu_;\n  dfly::GenericError last_error_ ABSL_GUARDED_BY(error_mu_);\n  std::atomic<size_t> errors_count_ = 0;\n\n  util::fb2::Fiber main_sync_fb_;\n\n  mutable util::fb2::Mutex state_mu_;\n  MigrationState state_ ABSL_GUARDED_BY(state_mu_) = MigrationState::C_CONNECTING;\n\n  boost::intrusive_ptr<Transaction> tx_;\n\n  // when migration is finished we need to store number of migrated keys\n  // because new request can add or remove keys and we get incorrect statistic\n  size_t keys_number_ = 0;\n};\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster/slot_set.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <bitset>\n#include <memory>\n#include <vector>\n\n#include \"cluster_defs.h\"\n\nnamespace dfly::cluster {\n\nclass SlotSet {\n public:\n  static constexpr SlotId kSlotsNumber = SlotRange::kMaxSlotId + 1;\n  using TBitSet = std::bitset<kSlotsNumber>;\n\n  SlotSet(bool full_house = false) {\n    slots_ = std::make_unique<TBitSet>();\n    if (full_house)\n      slots_->flip();\n  }\n\n  SlotSet(const SlotRanges& slot_ranges) {\n    slots_ = std::make_unique<TBitSet>();\n    Set(slot_ranges, true);\n  }\n\n  SlotSet(const SlotSet& s) {\n    slots_ = std::make_unique<TBitSet>(*s.slots_);\n  }\n\n  SlotSet(SlotSet&& s) = default;\n\n  bool Contains(SlotId slot) const {\n    return slots_->test(slot);\n  }\n\n  void Set(const SlotRanges& slot_ranges, bool value) {\n    for (const auto& slot_range : slot_ranges) {\n      for (auto i = slot_range.start; i <= slot_range.end; ++i) {\n        slots_->set(i, value);\n      }\n    }\n  }\n\n  void Set(SlotId slot, bool value) {\n    slots_->set(slot, value);\n  }\n\n  bool Empty() const {\n    return slots_->none();\n  }\n\n  size_t Count() const {\n    return slots_->count();\n  }\n\n  bool All() const {\n    return slots_->all();\n  }\n\n  // Get SlotSet that are absent in the slots\n  SlotSet GetRemovedSlots(const SlotSet& slots) const {\n    // we need to avoid stack usage to prevent stack overflow\n    SlotSet res(slots);\n    res.slots_->flip();\n    *res.slots_ &= *slots_;\n    return res;\n  }\n\n  SlotRanges ToSlotRanges() const {\n    std::vector<SlotRange> res;\n\n    for (SlotId i = 0; i < kSlotsNumber; ++i) {\n      if (!slots_->test(i)) {\n        continue;\n      } else {\n        auto& range = res.emplace_back(SlotRange{i, i});\n        for (++i; i < kSlotsNumber && slots_->test(i); ++i) {\n          range.end = i;\n        }\n      }\n    }\n\n    return SlotRanges(res);\n  }\n\n private:\n  SlotSet(std::unique_ptr<TBitSet> s) {\n    slots_ = std::move(s);\n  }\n\n private:\n  std::unique_ptr<TBitSet> slots_;\n};\n\n}  // namespace dfly::cluster\n"
  },
  {
    "path": "src/server/cluster_support.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\nextern \"C\" {\n#include \"redis/crc16.h\"\n}\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"cluster_support.h\"\n#include \"common.h\"\n\nusing namespace std;\n\nABSL_FLAG(string, cluster_mode, \"\",\n          \"Cluster mode supported. Possible values are \"\n          \"'emulated', 'yes' or ''\");\n\nABSL_FLAG(bool, experimental_cluster_shard_by_slot, false,\n          \"If true, cluster mode is enabled and sharding is done by slot. \"\n          \"Otherwise, sharding is done by hash tag.\");\n\nnamespace dfly {\n\nvoid UniqueSlotChecker::Add(std::string_view key) {\n  if (!IsClusterEnabled()) {\n    return;\n  }\n\n  Add(KeySlot(key));\n}\n\nvoid UniqueSlotChecker::Add(SlotId slot_id) {\n  if (!IsClusterEnabled()) {\n    return;\n  }\n\n  if (slot_id_ == kNoSlotId) {\n    slot_id_ = slot_id;\n  } else if (slot_id_ != slot_id) {\n    slot_id_ = kCrossSlot;\n  }\n}\n\noptional<SlotId> UniqueSlotChecker::GetUniqueSlotId() const {\n  return slot_id_ > kMaxSlotNum ? optional<SlotId>() : slot_id_;\n}\n\nusing namespace detail;\n\nvoid InitializeCluster() {\n  string cluster_mode_str = absl::GetFlag(FLAGS_cluster_mode);\n\n  if (cluster_mode_str == \"emulated\") {\n    cluster_mode = ClusterMode::kEmulatedCluster;\n  } else if (cluster_mode_str == \"yes\") {\n    cluster_mode = ClusterMode::kRealCluster;\n  } else if (cluster_mode_str.empty()) {\n    cluster_mode = ClusterMode::kNoCluster;\n  } else {\n    LOG(ERROR) << \"Invalid value for flag --cluster_mode. Exiting...\";\n    exit(1);\n  }\n\n  if (cluster_mode != ClusterMode::kNoCluster) {\n    cluster_shard_by_slot = absl::GetFlag(FLAGS_experimental_cluster_shard_by_slot);\n  }\n}\n\nSlotId KeySlot(std::string_view key) {\n  string_view tag = LockTagOptions::instance().Tag(key);\n  return crc16(tag.data(), tag.length()) & kMaxSlotNum;\n}\n\nbool IsClusterShardedByTag() {\n  return IsClusterEnabledOrEmulated() || LockTagOptions::instance().enabled;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/cluster_support.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n#include <optional>\n#include <string_view>\n\n#include \"server/common_types.h\"\n\nnamespace dfly {\n\nnamespace detail {\n\nenum class ClusterMode {\n  kUninitialized,\n  kNoCluster,\n  kEmulatedCluster,\n  kRealCluster,\n};\n\ninline ClusterMode cluster_mode = ClusterMode::kUninitialized;\ninline bool cluster_shard_by_slot = false;\n\n};  // namespace detail\n\nconstexpr SlotId kMaxSlotNum = 0x3FFF;\n\n// A simple utility class that \"aggregates\" SlotId-s and can tell whether all inputs were the same.\n// Only works when cluster is enabled.\nclass UniqueSlotChecker {\n public:\n  void Add(std::string_view key);\n  void Add(SlotId slot_id);\n\n  std::optional<SlotId> GetUniqueSlotId() const;\n\n  bool IsCrossSlot() const {\n    return slot_id_ == kCrossSlot;\n  }\n\n  void Reset() {\n    slot_id_ = kNoSlotId;\n  }\n\n private:\n  // kNoSlotId - if slot wasn't set at all\n  static constexpr SlotId kNoSlotId = kMaxSlotNum + 1;\n  // kCrossSlot - if several different slots were set\n  static constexpr SlotId kCrossSlot = kNoSlotId + 1;\n\n  SlotId slot_id_ = kNoSlotId;\n};\n\nSlotId KeySlot(std::string_view key);\n\nvoid InitializeCluster();\n\ninline bool IsClusterEnabled() {\n  return detail::cluster_mode == detail::ClusterMode::kRealCluster;\n}\n\ninline bool IsClusterEmulated() {\n  return detail::cluster_mode == detail::ClusterMode::kEmulatedCluster;\n}\n\ninline bool IsClusterEnabledOrEmulated() {\n  return IsClusterEnabled() || IsClusterEmulated();\n}\n\ninline bool IsClusterShardedBySlot() {\n  return detail::cluster_shard_by_slot;\n}\n\nbool IsClusterShardedByTag();\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/cmd_support.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/cmd_support.h\"\n\n#include <absl/cleanup/cleanup.h>\n\n#include \"base/logging.h\"\n\nnamespace dfly::cmd {\n\nbool SingleHopWaiter::await_ready() noexcept {\n  auto* tx = cmd_cntx->tx();\n\n  if (!cmd_cntx->IsDeferredReply()) {\n    // Use fiber blocking in synchronous mode\n    tx->ScheduleSingleHop(callback);\n    return true;\n  } else {\n    // Schedule async hop and keep transaction alive\n    tx->SingleHopAsync(callback);\n    tx_keepalive_ = tx;\n    return false;\n  }\n}\n\nvoid SingleHopWaiter::await_suspend(std::coroutine_handle<> handle) const noexcept {\n  cmd_cntx->Resolve(tx_keepalive_->Blocker(), handle);\n}\n\nfacade::OpStatus SingleHopWaiter::await_resume() const noexcept {\n  return *cmd_cntx->tx()->LocalResultPtr();\n}\n\nvoid CmdR::Coro::return_value(const facade::ErrorReply& err) const noexcept {\n  cmd_cntx->SendError(err);\n}\n\n}  // namespace dfly::cmd\n"
  },
  {
    "path": "src/server/cmd_support.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/functional/function_ref.h>\n\n#include <concepts>\n#include <coroutine>\n#include <variant>\n\n#include \"facade/error.h\"\n#include \"facade/op_status.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard.h\"\n#include \"server/transaction.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly::cmd {\n\n// Awaitable sentinel for the single hop of a transaction. Used instead of the\n// actual awaitable to allow Promise to inject context implicitly and make command code simple.\nusing SingleHopSentinel = Transaction::RunnableType;\n\n// Awaitable in command context for the single hop of a transaction with return value\ntemplate <typename RT> using SingleHopSentinelT = absl::FunctionRef<RT(Transaction*, EngineShard*)>;\n\n// Perform single hop. Returns awaitable that resolves to resulting OpStatus\nSingleHopSentinel SingleHop(const auto& f) {\n  return f;\n}\n\n// Perform single hop. Returns awaitable that resolves to return value.\nauto SingleHopT(const auto& f) -> SingleHopSentinelT<decltype(f(nullptr, nullptr))> {\n  return f;\n}\n\n// Awaitable object for waiting for the single hop of a transaction to finish.\n// Avoids coroutine suspending in synchronous mode, doing a fiber suspend instead.\n// In asynchronous mode it registers the promise / blocker on the context.\nstruct SingleHopWaiter {\n  bool await_ready() noexcept;\n  void await_suspend(std::coroutine_handle<> handle) const noexcept;\n  facade::OpStatus await_resume() const noexcept;\n\n  CommandContext* cmd_cntx;\n  Transaction::RunnableType callback;\n  boost::intrusive_ptr<Transaction> tx_keepalive_ = nullptr;\n};\n\n// Extension of SingleHopWaiter capturing the return value of the callback\ntemplate <typename RT> struct SingleHopWaiterT : public SingleHopWaiter {\n  static_assert(std::is_base_of_v<facade::OpResultBase, RT>);\n\n  SingleHopWaiterT(CommandContext* cmd_cntx,\n                   absl::FunctionRef<RT(Transaction*, EngineShard*)> callback)\n      : SingleHopWaiter{cmd_cntx, *this}, callback{callback} {\n  }\n\n  OpStatus operator()(Transaction* tx, EngineShard* es) const {\n    result = callback(tx, es);\n    return result.status();\n  }\n\n  RT&& await_resume() noexcept {\n    return std::move(result);\n  }\n\n  absl::FunctionRef<RT(Transaction*, EngineShard*)> callback;\n  mutable RT result;\n};\n\n// Return type of async command. No actual use as of now\nstruct CmdR {\n  struct Coro;\n  using promise_type = Coro;\n};\n\nconstexpr CmdR kAborted = {};\n\n// Underlying driver (promise) of coroutine that defines its context\nstruct CmdR::Coro {\n  // Coroutine created of a top level command\n  Coro(facade::CmdArgList arg, CommandContext* cmd_cntx) : cmd_cntx{cmd_cntx} {\n  }\n\n  // Coroutine created of a internal function with arguments\n  template <typename... Ts> Coro(CommandContext* cmd_cntx, const Ts&... ts) : cmd_cntx{cmd_cntx} {\n  }\n\n  // Use it waiter directly cases when it needs to stay in scope to keep the transaction alive\n  auto& await_transform(SingleHopWaiter& waiter) const {\n    return waiter;\n  }\n\n  auto await_transform(SingleHopSentinel callback) const {\n    return SingleHopWaiter{cmd_cntx, callback};\n  }\n\n  template <typename RT> auto await_transform(SingleHopSentinelT<RT> callback) const {\n    return SingleHopWaiterT<RT>{cmd_cntx, callback};\n  }\n\n  // Return error\n  void return_value(const facade::ErrorReply& err) const noexcept;\n\n  // Conclude command without any error\n  void return_value(std::nullopt_t) const noexcept {\n  }\n\n  // Blank default implementations\n  CmdR get_return_object() {\n    return {};\n  }\n  void unhandled_exception() noexcept {\n  }\n  std::suspend_never initial_suspend() noexcept {\n    return {};\n  }\n  std::suspend_never final_suspend() noexcept {\n    return {};\n  }\n\n  CommandContext* cmd_cntx;\n};\n\n}  // namespace dfly::cmd\n"
  },
  {
    "path": "src/server/cms_family.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"core/cms.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/error.h\"\n#include \"facade/reply_builder.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/command_families.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/transaction.h\"\n\nnamespace dfly {\n\nusing namespace facade;\nusing namespace std;\n\nnamespace {\n\nconstexpr char kCmsNotFound[] = \"CMS: key does not exist\";\nconstexpr char kCmsWrongNumKeys[] = \"CMS: wrong number of keys\";\nconstexpr char kCmsWrongNumKeysWeights[] = \"CMS: wrong number of keys/weights\";\nconstexpr char kCmsCannotParseNumber[] = \"CMS: Cannot parse number\";\n\nOpStatus OpInitByDim(const OpArgs& op_args, string_view key, uint32_t width, uint32_t depth) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, OBJ_CMS);\n  RETURN_ON_BAD_STATUS(op_res);\n\n  if (!op_res->is_new)\n    return OpStatus::KEY_EXISTS;\n\n  PrimeValue& pv = op_res->it->second;\n  pv.SetCMS(width, depth);\n\n  return OpStatus::OK;\n}\n\nOpStatus OpInitByProb(const OpArgs& op_args, string_view key, double error, double probability) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, OBJ_CMS);\n  RETURN_ON_BAD_STATUS(op_res);\n\n  if (!op_res->is_new)\n    return OpStatus::KEY_EXISTS;\n\n  PrimeValue& pv = op_res->it->second;\n  CMS* cms = CompactObj::AllocateMR<CMS>(CMS::ErrorRateTag{}, error, probability,\n                                         CompactObj::memory_resource());\n  pv.SetCMS(cms);\n\n  return OpStatus::OK;\n}\n\nOpResult<vector<int64_t>> OpIncrBy(const OpArgs& op_args, string_view key,\n                                   const vector<pair<string_view, int64_t>>& items) {\n  auto& db_slice = op_args.GetDbSlice();\n  OpResult op_res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_CMS);\n  if (!op_res)\n    return op_res.status();\n\n  CMS* cms = op_res->it->second.GetCMS();\n  vector<int64_t> result;\n  result.reserve(items.size());\n\n  for (const auto& [item, incr] : items) {\n    result.push_back(cms->IncrBy(item, incr));\n  }\n\n  return result;\n}\n\nOpResult<vector<int64_t>> OpQuery(const OpArgs& op_args, string_view key, CmdArgList items) {\n  auto& db_slice = op_args.GetDbSlice();\n  OpResult op_res = db_slice.FindReadOnly(op_args.db_cntx, key, OBJ_CMS);\n  if (!op_res)\n    return op_res.status();\n\n  const CMS* cms = op_res.value()->second.GetCMS();\n  vector<int64_t> result;\n  result.reserve(items.size());\n\n  for (auto arg : items) {\n    result.push_back(cms->Query(ToSV(arg)));\n  }\n\n  return result;\n}\n\nstruct CmsInfo {\n  uint32_t width = 0;\n  uint32_t depth = 0;\n  int64_t count = 0;\n};\n\nOpResult<CmsInfo> OpInfo(const OpArgs& op_args, string_view key) {\n  auto& db_slice = op_args.GetDbSlice();\n  OpResult op_res = db_slice.FindReadOnly(op_args.db_cntx, key, OBJ_CMS);\n  if (!op_res)\n    return op_res.status();\n\n  const CMS* cms = op_res.value()->second.GetCMS();\n  return CmsInfo{cms->width(), cms->depth(), cms->total_count()};\n}\n\nvoid CmdInitByDim(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser(args);\n  string_view key = parser.Next();\n  uint32_t width, depth;\n\n  tie(width, depth) = parser.Next<uint32_t, uint32_t>();\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  RETURN_ON_PARSE_ERROR(parser, rb);\n\n  if (width == 0 || depth == 0) {\n    return rb->SendError(\"CMS: width and depth must be greater than 0\");\n  }\n\n  const auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpInitByDim(t->GetOpArgs(shard), key, width, depth);\n  };\n\n  OpStatus res = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  if (res == OpStatus::KEY_EXISTS) {\n    return rb->SendError(\"item exists\");\n  }\n  if (res == OpStatus::OK) {\n    return rb->SendOk();\n  }\n  return rb->SendError(res);\n}\n\nvoid CmdInitByProb(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser(args);\n  string_view key = parser.Next();\n  double error, probability;\n\n  tie(error, probability) = parser.Next<double, double>();\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  RETURN_ON_PARSE_ERROR(parser, rb);\n\n  if (error <= 0 || error >= 1) {\n    return rb->SendError(\"CMS: error must be between 0 and 1 exclusive\");\n  }\n  if (probability <= 0 || probability >= 1) {\n    return rb->SendError(\"CMS: probability must be between 0 and 1 exclusive\");\n  }\n\n  const auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpInitByProb(t->GetOpArgs(shard), key, error, probability);\n  };\n\n  OpStatus res = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  if (res == OpStatus::KEY_EXISTS) {\n    return rb->SendError(\"item exists\");\n  }\n  if (res == OpStatus::OK) {\n    return rb->SendOk();\n  }\n  return rb->SendError(res);\n}\n\nvoid CmdIncrBy(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  args.remove_prefix(1);\n\n  // Parse item/increment pairs\n  if (args.size() < 2 || args.size() % 2 != 0) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  vector<pair<string_view, int64_t>> items;\n  items.reserve(args.size() / 2);\n\n  for (size_t i = 0; i < args.size(); i += 2) {\n    string_view item = ToSV(args[i]);\n    int64_t incr;\n    if (!absl::SimpleAtoi(ToSV(args[i + 1]), &incr)) {\n      return cmd_cntx->SendError(kCmsCannotParseNumber);\n    }\n    if (incr <= 0) {\n      return cmd_cntx->SendError(\"CMS: increment must be a positive integer\");\n    }\n    items.emplace_back(item, incr);\n  }\n\n  const auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpIncrBy(t->GetOpArgs(shard), key, items);\n  };\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  OpResult<vector<int64_t>> res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (!res) {\n    if (res.status() == OpStatus::KEY_NOTFOUND) {\n      return rb->SendError(kCmsNotFound);\n    }\n    return rb->SendError(res.status());\n  }\n\n  SinkReplyBuilder::ReplyScope scope(rb);\n  rb->StartArray(res->size());\n  for (int64_t count : *res) {\n    rb->SendLong(count);\n  }\n}\n\nvoid CmdQuery(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  args.remove_prefix(1);\n\n  if (args.empty()) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  const auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpQuery(t->GetOpArgs(shard), key, args);\n  };\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  OpResult<vector<int64_t>> res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (!res) {\n    if (res.status() == OpStatus::KEY_NOTFOUND) {\n      return rb->SendError(kCmsNotFound);\n    }\n    return rb->SendError(res.status());\n  }\n\n  SinkReplyBuilder::ReplyScope scope(rb);\n  rb->StartArray(res->size());\n  for (int64_t count : *res) {\n    rb->SendLong(count);\n  }\n}\n\nvoid CmdInfo(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  const auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpInfo(t->GetOpArgs(shard), key);\n  };\n\n  OpResult<CmsInfo> res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (!res) {\n    if (res.status() == OpStatus::KEY_NOTFOUND) {\n      return rb->SendError(kCmsNotFound);\n    }\n    return rb->SendError(res.status());\n  }\n\n  {\n    SinkReplyBuilder::ReplyScope scope(rb);\n    rb->StartArray(6);\n    rb->SendBulkString(\"width\");\n    rb->SendLong(res->width);\n    rb->SendBulkString(\"depth\");\n    rb->SendLong(res->depth);\n    rb->SendBulkString(\"count\");\n    rb->SendLong(res->count);\n  }\n}\n\n// Structure to hold CMS data collected from a shard when merging\nstruct CmsShardData {\n  size_t src_index = 0;\n  string_view key;\n  uint32_t width = 0;\n  uint32_t depth = 0;\n  int64_t count = 0;\n  vector<int64_t> counters;\n\n  CmsShardData(size_t src_idx, string_view k, uint32_t w, uint32_t d, int64_t c,\n               const int64_t* data, size_t size)\n      : src_index(src_idx), key(k), width(w), depth(d), count(c), counters(data, data + size) {\n  }\n};\n\nstruct CmsMergeArgs {\n  string_view dest_key;\n  vector<string_view> src_keys;\n  vector<int64_t> weights;\n};\n\nbool ParseMergeArgs(CmdArgList args, RedisReplyBuilder* rb, CmsMergeArgs* out) {\n  CmdArgParser parser(args);\n  uint32_t num_keys;\n\n  out->dest_key = parser.Next();\n  num_keys = parser.Next<uint32_t>();\n  if (auto err = parser.TakeError(); err) {\n    rb->SendError(err.MakeReply());\n    return false;\n  }\n\n  if (num_keys == 0) {\n    rb->SendError(kCmsWrongNumKeys);\n    return false;\n  }\n\n  if (parser.Tail().size() < num_keys) {\n    rb->SendError(kSyntaxErr);\n    return false;\n  }\n\n  out->src_keys.reserve(num_keys);\n  for (uint32_t i = 0; i < num_keys; ++i) {\n    out->src_keys.push_back(parser.Next());\n  }\n\n  if (parser.HasNext()) {\n    string_view weights_kw = parser.Next();\n    if (!absl::EqualsIgnoreCase(weights_kw, \"WEIGHTS\")) {\n      rb->SendError(kCmsWrongNumKeysWeights);\n      return false;\n    }\n\n    out->weights.reserve(num_keys);\n    for (uint32_t i = 0; i < num_keys; ++i) {\n      if (!parser.HasNext()) {\n        rb->SendError(kCmsWrongNumKeysWeights);\n        return false;\n      }\n\n      int64_t weight;\n      if (!absl::SimpleAtoi(parser.Next(), &weight)) {\n        rb->SendError(kCmsCannotParseNumber);\n        return false;\n      }\n      out->weights.push_back(weight);\n    }\n  }\n\n  if (parser.HasNext()) {\n    rb->SendError(kCmsWrongNumKeysWeights);\n    return false;\n  }\n\n  if (out->weights.empty()) {\n    out->weights.resize(num_keys, 1);\n  }\n\n  return true;\n}\n\n// Merge multiple CMS structures into a destination key.\nvoid CmdMerge(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  CmsMergeArgs merge_args;\n  if (!ParseMergeArgs(args, rb, &merge_args)) {\n    return;\n  }\n\n  // multi-shard implementation\n  // 1. fetch from all shards\n  // 2. merge to dest\n  Transaction* tx = cmd_cntx->tx();\n\n  vector<OpResult<vector<CmsShardData>>> shard_results(shard_set->size(), OpStatus::SKIPPED);\n\n  auto read_cb = [&](Transaction* t, EngineShard* shard) -> OpStatus {\n    auto& db_slice = t->GetOpArgs(shard).GetDbSlice();\n    const DbContext& db_cntx = t->GetDbContext();\n    vector<CmsShardData> cms_list;\n\n    // Check each source key to see if it belongs to this shard\n    for (size_t src_idx = 0; src_idx < merge_args.src_keys.size(); ++src_idx) {\n      string_view key = merge_args.src_keys[src_idx];\n      ShardId key_shard = Shard(key, shard_set->size());\n      if (key_shard != shard->shard_id()) {\n        continue;\n      }\n\n      OpResult src_res = db_slice.FindReadOnly(db_cntx, key, OBJ_CMS);\n      if (!src_res) {\n        shard_results[shard->shard_id()] = src_res.status();\n        return OpStatus::OK;\n      }\n\n      const CMS* cms = src_res.value()->second.GetCMS();\n      size_t counter_count = cms->NumCounters();\n      cms_list.emplace_back(src_idx, key, cms->width(), cms->depth(), cms->total_count(),\n                            cms->Data(), counter_count);\n    }\n\n    if (!cms_list.empty()) {\n      shard_results[shard->shard_id()] = std::move(cms_list);\n    }\n    return OpStatus::OK;\n  };\n\n  tx->Execute(read_cb, false /* do not conclude */);\n\n  // Validate dimensions and make sure we found data for every source.\n  uint32_t ref_width = 0, ref_depth = 0;\n  size_t seen_sources = 0;\n\n  // Check for errors and validate dimensions.\n  for (auto& result : shard_results) {\n    if (result.status() == OpStatus::SKIPPED)\n      continue;\n\n    if (!result) {\n      tx->Conclude();\n      if (result.status() == OpStatus::KEY_NOTFOUND) {\n        return rb->SendError(kCmsNotFound);\n      }\n      return rb->SendError(result.status());\n    }\n\n    for (auto& cms_data : result.value()) {\n      if (seen_sources == 0) {\n        ref_width = cms_data.width;\n        ref_depth = cms_data.depth;\n      } else if (cms_data.width != ref_width || cms_data.depth != ref_depth) {\n        tx->Conclude();\n        return rb->SendError(\"CMS: dimension mismatch\");\n      }\n      ++seen_sources;\n    }\n  }\n\n  if (seen_sources != merge_args.src_keys.size()) {\n    tx->Conclude();\n    return rb->SendError(kCmsNotFound);\n  }\n\n  // Now write merged data to destination shard\n  ShardId dest_shard_id = Shard(merge_args.dest_key, shard_set->size());\n  OpStatus write_result = OpStatus::OK;\n\n  auto write_cb = [&](Transaction* t, EngineShard* shard) -> OpStatus {\n    if (shard->shard_id() != dest_shard_id) {\n      return OpStatus::OK;\n    }\n\n    auto& db_slice = t->GetOpArgs(shard).GetDbSlice();\n    OpResult dest_res = db_slice.FindMutable(t->GetDbContext(), merge_args.dest_key, OBJ_CMS);\n    if (!dest_res) {\n      write_result = dest_res.status();\n      return OpStatus::OK;\n    }\n\n    CMS* dest_cms = dest_res->it->second.GetCMS();\n\n    // Validate destination dimensions\n    if (ref_width != dest_cms->width() || ref_depth != dest_cms->depth()) {\n      write_result = OpStatus::INVALID_VALUE;\n      return OpStatus::OK;\n    }\n\n    // Reset destination before merging so the result is the weighted sum of sources only.\n    dest_cms->Reset();\n\n    for (const auto& result : shard_results) {\n      if (result.status() == OpStatus::SKIPPED)\n        continue;\n\n      for (const auto& cms_data : result.value()) {\n        CMS temp_cms(cms_data.width, cms_data.depth, CompactObj::memory_resource());\n        temp_cms.Load(cms_data.count, cms_data.counters.data());\n\n        if (!dest_cms->MergeFrom(temp_cms, merge_args.weights[cms_data.src_index])) {\n          write_result = OpStatus::INVALID_VALUE;\n          return OpStatus::OK;\n        }\n      }\n    }\n\n    return OpStatus::OK;\n  };\n\n  tx->Execute(write_cb, true /* conclude */);\n\n  if (write_result == OpStatus::KEY_NOTFOUND) {\n    return rb->SendError(kCmsNotFound);\n  }\n  if (write_result == OpStatus::INVALID_VALUE) {\n    return rb->SendError(\"CMS: dimension mismatch\");\n  }\n  return rb->SendOk();\n}\n\n}  // namespace\n\nusing CI = CommandId;\n\n#define HFUNC(x) SetHandler(&Cmd##x)\n\nvoid RegisterCmsFamily(CommandRegistry* registry) {\n  registry->StartFamily(acl::CMS);\n\n  *registry << CI{\"CMS.INITBYDIM\", CO::DENYOOM | CO::FAST, 4, 1, 1}.HFUNC(InitByDim)\n            << CI{\"CMS.INITBYPROB\", CO::DENYOOM | CO::FAST, 4, 1, 1}.HFUNC(InitByProb)\n            << CI{\"CMS.INCRBY\", CO::DENYOOM | CO::FAST, -4, 1, 1}.HFUNC(IncrBy)\n            << CI{\"CMS.QUERY\", CO::READONLY | CO::FAST, -3, 1, 1}.HFUNC(Query)\n            << CI{\"CMS.INFO\", CO::READONLY | CO::FAST, 2, 1, 1}.HFUNC(Info)\n            << CI{\"CMS.MERGE\", CO::DENYOOM | CO::VARIADIC_KEYS, -4, 3, 3}.HFUNC(Merge);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/cms_family_test.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"facade/facade_test.h\"\n#include \"server/test_utils.h\"\n\nnamespace dfly {\n\nusing testing::ElementsAre;\n\nclass CmsFamilyTest : public BaseFamilyTest {\n protected:\n};\n\nTEST_F(CmsFamilyTest, InitByDim) {\n  auto resp = Run(\"cms.initbydim cms1 1000 5\");\n  EXPECT_EQ(resp, \"OK\");\n  EXPECT_EQ(Run(\"type cms1\"), \"CMSk-TYPE\");\n\n  resp = Run(\"cms.initbydim cms1 100 5\");\n  EXPECT_THAT(resp, ErrArg(\"item exists\"));\n\n  resp = Run(\"cms.initbydim cms2 0 5\");\n  EXPECT_THAT(resp, ErrArg(\"width and depth must be greater than 0\"));\n\n  resp = Run(\"cms.initbydim cms3 5 0\");\n  EXPECT_THAT(resp, ErrArg(\"width and depth must be greater than 0\"));\n}\n\nTEST_F(CmsFamilyTest, InitByProb) {\n  auto resp = Run(\"cms.initbyprob cms1 0.01 0.01\");\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run(\"cms.initbyprob cms1 0.01 0.01\");\n  EXPECT_THAT(resp, ErrArg(\"item exists\"));\n\n  resp = Run(\"cms.initbyprob cms2 2 0.01\");\n  EXPECT_THAT(resp, ErrArg(\"error must be between 0 and 1\"));\n\n  resp = Run(\"cms.initbyprob cms3 0.01 0\");\n  EXPECT_THAT(resp, ErrArg(\"probability must be between 0 and 1\"));\n}\n\nTEST_F(CmsFamilyTest, IncrBy) {\n  Run(\"cms.initbydim cms 100 5\");\n\n  auto resp = Run(\"cms.incrby cms foo 3\");\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run(\"cms.incrby cms foo 4 bar 1\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(7), IntArg(1))));\n\n  // Should fail on non-existent key\n  resp = Run(\"cms.incrby noexist foo 1\");\n  EXPECT_THAT(resp, ErrArg(\"CMS: key does not exist\"));\n\n  // Should fail with invalid number\n  resp = Run(\"cms.incrby cms foo notanumber\");\n  EXPECT_THAT(resp, ErrArg(\"CMS: Cannot parse number\"));\n}\n\nTEST_F(CmsFamilyTest, Query) {\n  Run(\"cms.initbydim cms 100 5\");\n  Run(\"cms.incrby cms foo 5 bar 3\");\n\n  auto resp = Run(\"cms.query cms foo\");\n  EXPECT_THAT(resp, IntArg(5));\n\n  resp = Run(\"cms.query cms foo bar\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(5), IntArg(3))));\n\n  resp = Run(\"cms.query cms noexist\");\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run(\"cms.query noexist foo\");\n  EXPECT_THAT(resp, ErrArg(\"CMS: key does not exist\"));\n}\n\nTEST_F(CmsFamilyTest, Info) {\n  Run(\"cms.initbydim cms 1000 5\");\n  Run(\"cms.incrby cms foo 5 bar 3 baz 9\");\n\n  auto resp = Run(\"cms.info cms\");\n  EXPECT_THAT(\n      resp, RespArray(ElementsAre(\"width\", IntArg(1000), \"depth\", IntArg(5), \"count\", IntArg(17))));\n\n  resp = Run(\"cms.info noexist\");\n  EXPECT_THAT(resp, ErrArg(\"CMS: key does not exist\"));\n}\n\nTEST_F(CmsFamilyTest, Merge) {\n  Run(\"cms.initbydim A 100 5\");\n  Run(\"cms.initbydim B 100 5\");\n  Run(\"cms.initbydim C 100 5\");\n\n  Run(\"cms.incrby A foo 5 bar 3 baz 9\");\n  Run(\"cms.incrby B foo 2 foobar 3 baz 1\");\n\n  auto resp = Run(\"cms.query A foo bar baz\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(5), IntArg(3), IntArg(9))));\n\n  resp = Run(\"cms.query B foo foobar baz\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(2), IntArg(3), IntArg(1))));\n\n  resp = Run(\"cms.merge C 2 A B\");\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run(\"cms.query C foo bar baz foobar\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(7), IntArg(3), IntArg(10), IntArg(3))));\n\n  resp = Run(\"cms.merge noexist 1 A\");\n  EXPECT_THAT(resp, ErrArg(\"CMS: key does not exist\"));\n\n  resp = Run(\"cms.merge C 0 A\");\n  EXPECT_THAT(resp, ErrArg(\"CMS: wrong number of keys\"));\n\n  resp = Run(\"cms.merge A 1 B WEIGHTS 4 3\");\n  EXPECT_THAT(resp, ErrArg(\"CMS: wrong number of keys/weights\"));\n\n  resp = Run(\"cms.merge A 2 B noexist WEIGHTS 4 3\");\n  EXPECT_THAT(resp, ErrArg(\"CMS: key does not exist\"));\n\n  // Merge A into B, should return A values (destination is reset before merge)\n  resp = Run(\"cms.merge B 1 A\");\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run(\"cms.query B foo bar baz\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(5), IntArg(3), IntArg(9))));\n}\n\nTEST_F(CmsFamilyTest, MergeWithWeights) {\n  Run(\"cms.initbydim A 100 5\");\n  Run(\"cms.initbydim B 100 5\");\n  Run(\"cms.initbydim C 100 5\");\n\n  Run(\"cms.incrby A foo 5 bar 3 baz 9\");\n  Run(\"cms.incrby B foo 2 bar 3 baz 1\");\n\n  // Merge with weights: A contributes 2x, B contributes 3x\n  // foo: 5*2 + 2*3 = 16\n  // bar: 3*2 + 3*3 = 15\n  // baz: 9*2 + 1*3 = 21\n  auto resp = Run(\"cms.merge C 2 A B WEIGHTS 2 3\");\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run(\"cms.query C foo bar baz\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(16), IntArg(15), IntArg(21))));\n}\n\nTEST_F(CmsFamilyTest, MergeWithDuplicateSourceKeysPreservesWeightOrder) {\n  Run(\"cms.initbydim A 100 5\");\n  Run(\"cms.initbydim C 100 5\");\n\n  Run(\"cms.incrby A foo 2 bar 4\");\n\n  auto resp = Run(\"cms.merge C 2 A A WEIGHTS 1 3\");\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run(\"cms.query C foo bar\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(8), IntArg(16))));\n\n  resp = Run(\"cms.info C\");\n  EXPECT_THAT(\n      resp, RespArray(ElementsAre(\"width\", IntArg(100), \"depth\", IntArg(5), \"count\", IntArg(24))));\n}\n\n// Backported from tests/fakeredis/test/test_stack/test_cms.py::test_cms_info\nTEST_F(CmsFamilyTest, InfoAfterMerges) {\n  Run(\"cms.initbydim A 1000 5\");\n  Run(\"cms.initbydim B 1000 5\");\n  Run(\"cms.initbydim C 1000 5\");\n\n  Run(\"cms.incrby A foo 5 bar 3 baz 9\");\n  Run(\"cms.incrby B foo 2 bar 3 baz 1\");\n\n  auto resp = Run(\"cms.query A foo bar baz\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(5), IntArg(3), IntArg(9))));\n\n  resp = Run(\"cms.query B foo bar baz\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(2), IntArg(3), IntArg(1))));\n\n  resp = Run(\"cms.merge C 2 A B\");\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run(\"cms.query C foo bar baz\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(7), IntArg(6), IntArg(10))));\n\n  resp = Run(\"cms.merge C 2 A B WEIGHTS 1 2\");\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run(\"cms.query C foo bar baz\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(9), IntArg(9), IntArg(11))));\n\n  resp = Run(\"cms.merge C 2 A B WEIGHTS 2 3\");\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run(\"cms.query C foo bar baz\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(16), IntArg(15), IntArg(21))));\n\n  resp = Run(\"cms.info A\");\n  EXPECT_THAT(\n      resp, RespArray(ElementsAre(\"width\", IntArg(1000), \"depth\", IntArg(5), \"count\", IntArg(17))));\n\n  resp = Run(\"cms.info noexist\");\n  EXPECT_THAT(resp, ErrArg(\"CMS: key does not exist\"));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/collection_family_fallback.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#ifndef WITH_COLLECTION_CMDS\n\n#include \"base/logging.h\"\n#include \"server/hset_family.h\"\n#include \"server/set_family.h\"\n#include \"server/stream_family.h\"\n#include \"server/zset_family.h\"\nnamespace dfly {\n\nusing namespace std;\n\nnamespace {\nvoid Fail() {\n  CHECK(false) << \"Compiled without command support\";\n}\n}  // namespace\n\nStreamMemTracker::StreamMemTracker() {\n}\n\nvoid StreamMemTracker::UpdateStreamSize(PrimeValue& pv) const {\n}\n\nStringMap* HSetFamily::ConvertToStrMap(uint8_t* lp) {\n  Fail();\n  return nullptr;\n}\n\nStringSet* SetFamily::ConvertToStrSet(const intset* is, size_t expected_len) {\n  Fail();\n  return nullptr;\n}\n\nuint32_t SetFamily::MaxIntsetEntries() {\n  Fail();\n  return 0;\n}\n\nLoadBlobResult SetFamily::LoadLPSetBlob(std::string_view blob, PrimeValue* pv) {\n  Fail();\n  return LoadBlobResult::kCorrupted;\n}\n\nLoadBlobResult SetFamily::LoadIntSetBlob(std::string_view blob, PrimeValue* pv) {\n  Fail();\n  return LoadBlobResult::kCorrupted;\n}\n\nLoadBlobResult HSetFamily::LoadZiplistBlob(std::string_view blob, PrimeValue* pv) {\n  Fail();\n  return LoadBlobResult::kCorrupted;\n}\n\nLoadBlobResult HSetFamily::LoadListpackBlob(std::string_view blob, PrimeValue* pv) {\n  Fail();\n  return LoadBlobResult::kCorrupted;\n}\n\nLoadBlobResult ZSetFamily::LoadZiplistBlob(std::string_view blob, PrimeValue* pv) {\n  Fail();\n  return LoadBlobResult::kCorrupted;\n}\n\nLoadBlobResult ZSetFamily::LoadListpackBlob(std::string_view blob, PrimeValue* pv) {\n  Fail();\n  return LoadBlobResult::kCorrupted;\n}\n\nOpResult<ZSetFamily::MScoreResponse> ZSetFamily::ZGetMembers(CmdArgList args, Transaction* tx,\n                                                             SinkReplyBuilder* builder) {\n  Fail();\n  return {};\n}\n\nOpResult<ZSetFamily::AddResult> ZSetFamily::OpAdd(const OpArgs& op_args, const ZParams& zparams,\n                                                  std::string_view key, ScoredMemberSpan members) {\n  Fail();\n  return {};\n}\n\nOpResult<double> ZSetFamily::OpScore(const OpArgs& op_args, std::string_view key,\n                                     std::string_view member) {\n  Fail();\n  return 0;\n}\n\nvoid ZSetFamily::ZAddGeneric(std::string_view key, const ZParams& zparams, ScoredMemberSpan memb_sp,\n                             CommandContext* cmd_cntx) {\n  Fail();\n}\n\nOpResult<void> ZSetFamily::OpKeyExisted(const OpArgs& op_args, std::string_view key) {\n  Fail();\n  return {};\n}\n\nOpResult<std::vector<ZSetFamily::ScoredArray>> ZSetFamily::OpRanges(\n    const std::vector<ZRangeSpec>& range_specs, const OpArgs& op_args, std::string_view key) {\n  Fail();\n  return {};\n}\n\n}  // namespace dfly\n\n#endif\n"
  },
  {
    "path": "src/server/command_families.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n// Included by family object files that implement only their respective registration function.\n// Self-registration would require updating the build process to fix linking issues.\nnamespace dfly {\n\nclass CommandRegistry;\n\nvoid RegisterStringFamily(CommandRegistry*);\nvoid RegisterListFamily(CommandRegistry*);\nvoid RegisterBitopsFamily(CommandRegistry*);\nvoid RegisterGeoFamily(CommandRegistry*);\nvoid RegisterHllFamily(CommandRegistry*);\nvoid RegisterBloomFamily(CommandRegistry*);\nvoid RegisterCmsFamily(CommandRegistry*);\nvoid RegisterJsonFamily(CommandRegistry*);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/command_registry.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/command_registry.h\"\n\n#include <absl/container/inlined_vector.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_split.h>\n#include <absl/time/clock.h>\n#include <hdr/hdr_histogram.h>\n\n#include \"base/bits.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"base/stl_util.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/error.h\"\n#include \"server/acl/acl_commands_def.h\"\n\nusing namespace std;\nABSL_FLAG(vector<string>, rename_command, {},\n          \"Change the name of commands, format is: <cmd1_name>=<cmd1_new_name>, \"\n          \"<cmd2_name>=<cmd2_new_name>\");\nABSL_FLAG(vector<string>, restricted_commands, {},\n          \"Commands restricted to connections on the admin port\");\n\nABSL_FLAG(vector<string>, oom_deny_commands, {},\n          \"Additinal commands that will be marked as denyoom\");\n\nABSL_FLAG(vector<string>, command_alias, {},\n          \"Add an alias for given command(s), format is: <alias>=<original>, <alias>=<original>. \"\n          \"Aliases must be set identically on replicas, if applicable\");\n\nABSL_FLAG(bool, latency_tracking, false, \"If true, track latency for commands\");\n\nnamespace dfly {\n\nusing namespace facade;\n\nusing absl::AsciiStrToUpper;\nusing absl::GetFlag;\nusing absl::StrCat;\nusing absl::StrSplit;\n\nnamespace {\n\nuint32_t ImplicitCategories(uint32_t mask) {\n  if (mask & CO::ADMIN)\n    mask |= CO::NOSCRIPT;\n  return mask;\n}\n\nuint32_t ImplicitAclCategories(uint32_t mask) {\n  mask = ImplicitCategories(mask);\n  uint32_t out = 0;\n\n  if (mask & CO::JOURNALED)\n    out |= acl::WRITE;\n\n  if ((mask & CO::READONLY) && ((mask & CO::NOSCRIPT) == 0))\n    out |= acl::READ;\n\n  if (mask & CO::ADMIN)\n    out |= acl::ADMIN | acl::DANGEROUS;\n\n  // todo pubsub\n\n  if (mask & CO::FAST)\n    out |= acl::FAST;\n\n  if (mask & CO::BLOCKING)\n    out |= acl::BLOCKING;\n\n  if ((out & acl::FAST) == 0)\n    out |= acl::SLOW;\n\n  return out;\n}\n\nusing CmdLineMapping = absl::flat_hash_map<std::string, std::string>;\n\nCmdLineMapping ParseCmdlineArgMap(const absl::Flag<std::vector<std::string>>& flag) {\n  const auto& mappings = absl::GetFlag(flag);\n  CmdLineMapping parsed_mappings;\n  parsed_mappings.reserve(mappings.size());\n\n  for (const std::string& mapping : mappings) {\n    absl::InlinedVector<std::string_view, 2> kv = absl::StrSplit(mapping, '=');\n    if (kv.size() != 2) {\n      LOG(ERROR) << \"Malformed command '\" << mapping << \"' for \" << flag.Name()\n                 << \", expected key=value\";\n      exit(1);\n    }\n\n    std::string key = absl::AsciiStrToUpper(kv[0]);\n    std::string value = absl::AsciiStrToUpper(kv[1]);\n\n    if (key == value) {\n      LOG(ERROR) << \"Invalid attempt to map \" << key << \" to itself in \" << flag.Name();\n      exit(1);\n    }\n\n    if (!parsed_mappings.emplace(std::move(key), std::move(value)).second) {\n      LOG(ERROR) << \"Duplicate insert to \" << flag.Name() << \" not allowed\";\n      exit(1);\n    }\n  }\n  return parsed_mappings;\n}\n\nCmdLineMapping OriginalToAliasMap() {\n  CmdLineMapping original_to_alias;\n  CmdLineMapping alias_to_original = ParseCmdlineArgMap(FLAGS_command_alias);\n  original_to_alias.reserve(alias_to_original.size());\n  std::for_each(std::make_move_iterator(alias_to_original.begin()),\n                std::make_move_iterator(alias_to_original.end()),\n                [&original_to_alias](auto&& pair) {\n                  original_to_alias.emplace(std::move(pair.second), std::move(pair.first));\n                });\n\n  return original_to_alias;\n}\n\nconstexpr int64_t kLatencyHistogramMinValue = 1;        // Minimum value in usec\nconstexpr int64_t kLatencyHistogramMaxValue = 1000000;  // Maximum value in usec (1s)\nconstexpr int32_t kLatencyHistogramPrecision = 2;\n\n}  // namespace\n\nCommandId::CommandId(const char* name, uint32_t mask, int8_t arity, int8_t first_key,\n                     int8_t last_key, std::optional<uint32_t> acl_categories)\n    : facade::CommandId(name, ImplicitCategories(mask), arity, first_key, last_key,\n                        acl_categories.value_or(ImplicitAclCategories(mask))) {\n  implicit_acl_ = !acl_categories.has_value();\n  bool is_latency_tracked = GetFlag(FLAGS_latency_tracking);\n  if (is_latency_tracked) {\n    hdr_histogram* hist = nullptr;\n    const int init_result = hdr_init(kLatencyHistogramMinValue, kLatencyHistogramMaxValue,\n                                     kLatencyHistogramPrecision, &hist);\n    CHECK_EQ(init_result, 0) << \"failed to initialize histogram for command \" << name;\n    latency_histogram_ = hist;\n  }\n\n  if (name_.rfind(\"EVAL\", 0) == 0)\n    kind_multi_ctr_ = CO::MultiControlKind::EVAL;\n  else if (base::_in(name_, {\"EXEC\", \"MULTI\", \"DISCARD\"}))\n    kind_multi_ctr_ = CO::MultiControlKind::EXEC;\n  else if (base::_in(name_, {\"PUBLISH\", \"SUBSCRIBE\", \"UNSUBSCRIBE\"}))\n    kind_pubsub_ = CO::PubSubKind::REGULAR;\n  else if (base::_in(name_, {\"PSUBSCRIBE\", \"PUNSUBSCRIBE\"}))\n    kind_pubsub_ = CO::PubSubKind::PATTERN;\n  else if (base::_in(name_, {\"SPUBLISH\", \"SSUBSCRIBE\", \"SUNSUBSCRIBE\"}))\n    kind_pubsub_ = CO::PubSubKind::SHARDED;\n  can_be_monitored_ = (opt_mask_ & CO::ADMIN) == 0 && name_ != \"EXEC\";\n\n  if (base::_in(name_, {\"MSET\", \"MSETNX\"}))\n    interleave_step_ = 2;\n  else if (name_ == \"JSON.MSET\")\n    interleave_step_ = 3;\n}\n\nCommandId::~CommandId() {\n  // Aliases share the same latency histogram, so we only close it if this is not an alias.\n  if (latency_histogram_ && !is_alias_) {\n    hdr_close(latency_histogram_);\n  }\n}\n\nCommandId CommandId::Clone(const std::string_view name) const {\n  CommandId cloned =\n      CommandId{name.data(), opt_mask_, arity_, first_key_, last_key_, acl_categories_};\n  cloned.handler_ = handler_;\n  cloned.opt_mask_ = opt_mask_ | CO::HIDDEN;\n  cloned.acl_categories_ = acl_categories_;\n  cloned.implicit_acl_ = implicit_acl_;\n  cloned.interleave_step_ = interleave_step_;\n  cloned.is_alias_ = true;\n\n  // explicit sharing of the object since it's an alias we can do that.\n  // I am assuming that the source object lifetime is at least as of the cloned object.\n  if (cloned.latency_histogram_) {\n    hdr_close(cloned.latency_histogram_);  // Free the histogram in the cloned object.\n    cloned.latency_histogram_ = static_cast<hdr_histogram*>(latency_histogram_);\n  }\n  return cloned;\n}\n\nbool CommandId::IsTransactional() const {\n  if (first_key_ > 0 || (opt_mask_ & CO::GLOBAL_TRANS) || (opt_mask_ & CO::NO_KEY_TRANSACTIONAL))\n    return true;\n\n  if (name_ == \"EVAL\" || name_ == \"EVALSHA\" || name_ == \"EVAL_RO\" || name_ == \"EVALSHA_RO\" ||\n      name_ == \"EXEC\")\n    return true;\n\n  return false;\n}\n\nbool CommandId::IsMultiTransactional() const {\n  return kind_multi_ctr_.has_value();\n}\n\noptional<facade::ErrorReply> CommandId::Validate(CmdArgList tail_args) const {\n  if ((arity() > 0 && tail_args.size() + 1 != size_t(arity())) ||\n      (arity() < 0 && tail_args.size() + 1 < size_t(-arity()))) {\n    string prefix;\n    if (name() == \"EXEC\")\n      prefix = \"-EXECABORT Transaction discarded because of: \";\n    return facade::ErrorReply{prefix + facade::WrongNumArgsError(name()), kSyntaxErrType};\n  }\n\n  if (interleave_step_ && tail_args.size() % interleave_step_ != 0) {\n    return facade::ErrorReply{facade::WrongNumArgsError(name()), kSyntaxErrType};\n  }\n\n  if (validator_)\n    return validator_(tail_args);\n  return nullopt;\n}\n\nvoid CommandId::ResetStats(unsigned thread_index) {\n  command_stats_[thread_index] = {0, 0};\n  if (hdr_histogram* h = latency_histogram_; h != nullptr) {\n    hdr_reset(h);\n    std::atomic_thread_fence(std::memory_order_seq_cst);\n  }\n}\n\nvoid CommandId::RecordLatency(unsigned tid, uint64_t latency_usec) const {\n  auto& ent = command_stats_[tid];\n\n  ++ent.first;\n  ent.second += latency_usec;\n\n  if (latency_histogram_) {\n    hdr_record_value_atomic(latency_histogram_, latency_usec);\n  }\n}\n\nCommandRegistry::CommandRegistry() {\n  cmd_rename_map_ = ParseCmdlineArgMap(FLAGS_rename_command);\n\n  for (const string& name : GetFlag(FLAGS_restricted_commands)) {\n    restricted_cmds_.emplace(AsciiStrToUpper(name));\n  }\n\n  for (const string& name : GetFlag(FLAGS_oom_deny_commands)) {\n    oomdeny_cmds_.emplace(AsciiStrToUpper(name));\n  }\n}\n\nvoid CommandRegistry::Init(unsigned int thread_count) {\n  const CmdLineMapping original_to_alias = OriginalToAliasMap();\n  absl::flat_hash_map<std::string, CommandId> alias_to_command_id;\n  alias_to_command_id.reserve(original_to_alias.size());\n  for (auto& [_, cmd] : cmd_map_) {\n    cmd.Init(thread_count);\n    if (auto it = original_to_alias.find(cmd.name()); it != original_to_alias.end()) {\n      auto alias_cmd = cmd.Clone(it->second);\n      alias_cmd.Init(thread_count);\n      alias_to_command_id.insert({it->second, std::move(alias_cmd)});\n    }\n  }\n  std::copy(std::make_move_iterator(alias_to_command_id.begin()),\n            std::make_move_iterator(alias_to_command_id.end()),\n            std::inserter(cmd_map_, cmd_map_.end()));\n}\n\nCommandRegistry& CommandRegistry::operator<<(CommandId cmd) {\n  string k = string(cmd.name());\n\n  absl::InlinedVector<std::string_view, 2> maybe_subcommand = StrSplit(cmd.name(), \" \");\n  const bool is_sub_command = maybe_subcommand.size() == 2;\n  if (const auto it = cmd_rename_map_.find(maybe_subcommand.front()); it != cmd_rename_map_.end()) {\n    if (it->second.empty()) {\n      return *this;  // Incase of empty string we want to remove the command from registry.\n    }\n    k = is_sub_command ? StrCat(it->second, \" \", maybe_subcommand[1]) : it->second;\n  }\n\n  if (restricted_cmds_.find(k) != restricted_cmds_.end()) {\n    cmd.SetRestricted(true);\n  }\n\n  if (oomdeny_cmds_.find(k) != oomdeny_cmds_.end()) {\n    cmd.SetFlag(CO::DENYOOM);\n  }\n\n  cmd.SetFamily(family_of_commands_.size() - 1);\n  if (acl_category_)\n    cmd.SetAclCategory(*acl_category_);\n\n  if (!is_sub_command || absl::StartsWith(cmd.name(), \"ACL\")) {\n    cmd.SetBitIndex(1ULL << bit_index_);\n    family_of_commands_.back().emplace_back(k);\n    ++bit_index_;\n  } else {\n    DCHECK(absl::StartsWith(k, family_of_commands_.back().back()));\n    cmd.SetBitIndex(1ULL << (bit_index_ - 1));\n  }\n  CHECK(cmd_map_.emplace(k, std::move(cmd)).second) << k;\n\n  return *this;\n}\n\nvoid CommandRegistry::StartFamily(std::optional<uint32_t> acl_category) {\n  family_of_commands_.emplace_back();\n  bit_index_ = 0;\n  acl_category_ = acl_category;\n}\n\nstd::string_view CommandRegistry::RenamedOrOriginal(std::string_view orig) const {\n  if (!cmd_rename_map_.empty() && cmd_rename_map_.contains(orig)) {\n    return cmd_rename_map_.find(orig)->second;\n  }\n  return orig;\n}\n\nCommandRegistry::FamiliesVec CommandRegistry::GetFamilies() {\n  return std::move(family_of_commands_);\n}\n\nstd::pair<const CommandId*, ParsedArgs> CommandRegistry::FindExtended(string_view cmd,\n                                                                      ParsedArgs tail_args) const {\n  if (cmd == RenamedOrOriginal(\"ACL\"sv)) {\n    if (tail_args.empty()) {\n      return {Find(cmd), {}};\n    }\n\n    auto second_cmd = absl::AsciiStrToUpper(tail_args.Front());\n    string full_cmd = StrCat(cmd, \" \", second_cmd);\n\n    return {Find(full_cmd), tail_args.Tail()};\n  }\n\n  const CommandId* res = Find(cmd);\n  if (!res)\n    return {nullptr, {}};\n\n  // A workaround for XGROUP HELP that does not fit our static taxonomy of commands.\n  if (tail_args.size() == 1 && res->name() == \"XGROUP\") {\n    if (absl::EqualsIgnoreCase(tail_args.Front(), \"HELP\")) {\n      res = Find(\"_XGROUP_HELP\");\n    }\n  }\n  return {res, tail_args};\n}\n\nabsl::flat_hash_map<std::string, hdr_histogram*> CommandRegistry::LatencyMap() const {\n  absl::flat_hash_map<std::string, hdr_histogram*> cmd_latencies;\n  cmd_latencies.reserve(cmd_map_.size());\n  for (const auto& [cmd_name, cmd] : cmd_map_) {\n    cmd_latencies.insert({absl::AsciiStrToLower(cmd_name), cmd.GetLatencyHist()});\n  }\n  return cmd_latencies;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/command_registry.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n#include <absl/types/span.h>\n\n#include <functional>\n#include <optional>\n\n#include \"base/function2.hpp\"\n#include \"facade/command_id.h\"\n#include \"facade/facade_types.h\"\n\nstruct hdr_histogram;\n\nnamespace dfly {\n\nnamespace CO {\n\nenum CommandOpt : uint32_t {\n  READONLY = 1U << 0,\n  FAST = 1U << 1,       // Unused?\n  JOURNALED = 1U << 2,  // Command is logged to AOF / Journal.\n  LOADING = 1U << 3,    // Command allowed during LOADING state.\n  DENYOOM = 1U << 4,    // use-memory in redis.\n\n  DANGEROUS = 1U << 5,  // Dangerous commands are logged when used\n\n  VARIADIC_KEYS = 1U << 6,  // arg 2 determines number of keys. Relevant for ZUNIONSTORE, EVAL etc.\n\n  ADMIN = 1U << 7,  // implies NOSCRIPT,\n  NOSCRIPT = 1U << 8,\n  BLOCKING = 1U << 9,\n  HIDDEN = 1U << 10,  // does not show in COMMAND command output\n  GLOBAL_TRANS = 1U << 12,\n  STORE_LAST_KEY = 1U << 13,  // The command my have a store key as the last argument.\n\n  NO_AUTOJOURNAL = 1U << 15,  // Skip automatically logging command to journal inside transaction.\n\n  // Allows commands without keys to respect transaction ordering and enables journaling by default\n  NO_KEY_TRANSACTIONAL = 1U << 16,\n  NO_KEY_TX_SPAN_ALL = 1U << 17,  // All shards are active for the no-key-transactional command\n\n  // The same callback can be run multiple times without corrupting the result. Used for\n  // opportunistic optimizations where inconsistencies can only be detected afterwards.\n  IDEMPOTENT = 1U << 18,\n};\n\nenum class PubSubKind : uint8_t { REGULAR = 0, PATTERN = 1, SHARDED = 2 };\n\n// Commands controlling any multi command execution.\n// They often need to be handled separately from regular commands in many contexts\nenum class MultiControlKind : uint8_t {\n  EVAL,  // EVAL, EVAL_RO, EVALSHA, EVALSHA_RO\n  EXEC,  // EXEC, MULTI, DISCARD\n};\n\n};  // namespace CO\n\n// Per thread vector of command stats. Each entry is {cmd_calls, cmd_latency_agg in usec}.\nusing CmdCallStats = std::pair<uint64_t, uint64_t>;\n\nclass CommandId;\nclass CommandContext;\n\n// TODO: move it to helio\n// Makes sure that the POD T that is passed to the constructor is reset to default state\ntemplate <typename T> class MoveOnly {\n public:\n  MoveOnly() = default;\n\n  MoveOnly(const MoveOnly&) = delete;\n  MoveOnly& operator=(const MoveOnly&) = delete;\n\n  MoveOnly(MoveOnly&& t) noexcept : value_(std::move(t.value_)) {\n    t.value_ = T{};  // Reset the passed value to default state\n  }\n\n  MoveOnly& operator=(const T& t) noexcept {\n    value_ = t;\n    return *this;\n  }\n\n  operator const T&() const {  // NOLINT\n    return value_;\n  }\n\n private:\n  T value_{};\n};\n\nclass CommandId : public facade::CommandId {\n public:\n  using CmdArgList = facade::CmdArgList;\n\n  // NOTICE: name must be a literal string, otherwise metrics break! (see cmd_stats_map in\n  // server_state.h)\n  CommandId(const char* name, uint32_t mask, int8_t arity, int8_t first_key, int8_t last_key,\n            std::optional<uint32_t> acl_categories = std::nullopt);\n\n  CommandId(CommandId&& o) = default;\n\n  ~CommandId();\n\n  [[nodiscard]] CommandId Clone(std::string_view name) const;\n\n  void Init(unsigned thread_count) {\n    command_stats_ = std::make_unique<CmdCallStats[]>(thread_count);\n  }\n\n  using Handler = fu2::function_base<true, true, fu2::capacity_default, false, false,\n                                     void(CmdArgList, CommandContext*) const>;\n  using ArgValidator = fu2::function_base<true, true, fu2::capacity_default, false, false,\n                                          std::optional<facade::ErrorReply>(CmdArgList) const>;\n\n  // Returns the invoke time in usec.\n  void Invoke(CmdArgList args, CommandContext* cmd_cntx) const {\n    handler_(args, cmd_cntx);\n  }\n\n  // Returns error if validation failed, otherwise nullopt\n  std::optional<facade::ErrorReply> Validate(CmdArgList tail_args) const;\n\n  bool IsTransactional() const;\n\n  bool IsMultiTransactional() const;\n\n  bool IsReadOnly() const {\n    return opt_mask_ & CO::READONLY;\n  }\n\n  bool IsJournaled() const {\n    return opt_mask_ & CO::JOURNALED;\n  }\n\n  bool IsBlocking() const {\n    return opt_mask_ & CO::BLOCKING;\n  }\n\n  // See deduction logic for details. We don't monitor ADMIN commands\n  // and log the final `EXEC` command manually at the end.\n  bool CanBeMonitored() const {\n    return can_be_monitored_;\n  }\n\n  int8_t interleaved_step() const {\n    return interleave_step_;\n  }\n\n  template <typename RT> CommandId&& SetAsyncHandler(RT f(CmdArgList, CommandContext*)) && {\n    support_async_ = true;\n    handler_ = [f](CmdArgList args, CommandContext* cntx) { f(args, cntx); };\n    return std::move(*this);\n  }\n\n  CommandId&& SetHandler(Handler f, bool async_support = false) && {\n    support_async_ |= async_support;\n    handler_ = std::move(f);\n    return std::move(*this);\n  }\n\n  CommandId&& SetValidator(ArgValidator f) && {\n    validator_ = std::move(f);\n    return std::move(*this);\n  }\n\n  bool is_multi_key() const {\n    return (last_key_ != first_key_) || (opt_mask_ & CO::VARIADIC_KEYS);\n  }\n\n  void ResetStats(unsigned thread_index);\n\n  CmdCallStats GetStats(unsigned thread_index) const {\n    return command_stats_[thread_index];\n  }\n\n  void SetAclCategory(uint32_t mask) {\n    if (implicit_acl_)\n      acl_categories_ |= mask;\n  }\n\n  bool IsAlias() const {\n    return is_alias_;\n  }\n\n  hdr_histogram* GetLatencyHist() const {\n    return latency_histogram_;\n  }\n\n  std::optional<CO::PubSubKind> PubSubKind() const {\n    return kind_pubsub_;\n  }\n\n  // Returns value if this command controls multi command execution (EVAL, EXEC & helpers)\n  std::optional<CO::MultiControlKind> MultiControlKind() const {\n    return kind_multi_ctr_;\n  }\n\n  void RecordLatency(unsigned tid, uint64_t latency_usec) const;\n\n  bool SupportsAsync() const {\n    return support_async_;\n  }\n\n private:\n  std::optional<CO::PubSubKind> kind_pubsub_;\n  std::optional<CO::MultiControlKind> kind_multi_ctr_;\n\n  // The following fields must copy manually in the move constructor.\n  bool implicit_acl_;\n  bool is_alias_{false};\n  bool can_be_monitored_{true};\n  bool support_async_{false};\n  int8_t interleave_step_{0};\n\n  std::unique_ptr<CmdCallStats[]> command_stats_;\n  Handler handler_;\n  ArgValidator validator_;\n  MoveOnly<hdr_histogram*> latency_histogram_;  // Histogram for command latency in usec\n};\n\nclass CommandRegistry {\n public:\n  CommandRegistry();\n\n  void Init(unsigned thread_count);\n\n  CommandRegistry& operator<<(CommandId cmd);\n\n  const CommandId* Find(std::string_view cmd) const {\n    auto it = cmd_map_.find(cmd);\n    return it == cmd_map_.end() ? nullptr : &it->second;\n  }\n\n  CommandId* Find(std::string_view cmd) {\n    auto it = cmd_map_.find(cmd);\n    return it == cmd_map_.end() ? nullptr : &it->second;\n  }\n\n  using TraverseCb = std::function<void(std::string_view, const CommandId&)>;\n\n  void Traverse(TraverseCb cb) {\n    for (const auto& k_v : cmd_map_) {\n      cb(k_v.first, k_v.second);\n    }\n  }\n\n  void ResetCallStats(unsigned thread_index) {\n    for (auto& k_v : cmd_map_) {\n      k_v.second.ResetStats(thread_index);\n    }\n  }\n\n  void MergeCallStats(unsigned thread_index,\n                      std::function<void(std::string_view, const CmdCallStats&)> cb) const {\n    for (const auto& k_v : cmd_map_) {\n      auto src = k_v.second.GetStats(thread_index);\n      if (src.first == 0)\n        continue;\n      cb(k_v.second.name(), src);\n    }\n  }\n\n  void StartFamily(std::optional<uint32_t> acl_category = std::nullopt);\n\n  std::string_view RenamedOrOriginal(std::string_view orig) const;\n\n  using FamiliesVec = std::vector<std::vector<std::string>>;\n  FamiliesVec GetFamilies();\n\n  std::pair<const CommandId*, facade::ParsedArgs> FindExtended(std::string_view cmd,\n                                                               facade::ParsedArgs tail_args) const;\n\n  absl::flat_hash_map<std::string, hdr_histogram*> LatencyMap() const;\n\n private:\n  absl::flat_hash_map<std::string, CommandId> cmd_map_;\n  absl::flat_hash_map<std::string, std::string> cmd_rename_map_;\n  absl::flat_hash_set<std::string> restricted_cmds_;\n  absl::flat_hash_set<std::string> oomdeny_cmds_;\n\n  FamiliesVec family_of_commands_;\n  size_t bit_index_;\n  std::optional<uint32_t> acl_category_;  // category of family currently being built\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/common.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/common.h\"\n\n#include <absl/random/random.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <fast_float/fast_float.h>\n\n#include <system_error>\n\nextern \"C\" {\n#include \"redis/rdb.h\"\n}\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/compact_object.h\"\n#include \"core/glob_matcher.h\"\n#include \"core/interpreter.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/journal/journal.h\"\n#include \"server/server_state.h\"\n#include \"server/transaction.h\"\n\n// We've generalized \"hashtags\" so that users can specify custom delimiter and closures, see below.\n// If I had a time machine, I'd rename this to lock_on_tags.\nABSL_FLAG(bool, lock_on_hashtags, false,\n          \"When true, locks are done in the {hashtag} level instead of key level. Hashtag \"\n          \"extraction can be further configured with locktag_* flags.\");\n\n// We would have used `char` instead of `string`, but that's impossible.\nABSL_FLAG(\n    std::string, locktag_delimiter, \"\",\n    \"If set, this char is used to extract a lock tag by looking at delimiters, like hash tags. If \"\n    \"unset, regular hashtag extraction is done (with {}). Must be used with --lock_on_hashtags\");\n\nABSL_FLAG(unsigned, locktag_skip_n_end_delimiters, 0,\n          \"How many closing tag delimiters should we skip when extracting lock tags. 0 for no \"\n          \"skipping. For example, when delimiter is ':' and this flag is 2, the locktag for \"\n          \"':a:b:c:d:e' will be 'a:b:c'.\");\n\nABSL_FLAG(std::string, locktag_prefix, \"\",\n          \"Only keys with this prefix participate in tag extraction.\");\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace util;\n\nnamespace {\n\n// Thread-local cache with static linkage.\nthread_local std::optional<LockTagOptions> locktag_lock_options;\n\n}  // namespace\n\nvoid TEST_InvalidateLockTagOptions() {\n  locktag_lock_options = nullopt;  // For test main thread\n  CHECK(shard_set != nullptr);\n  shard_set->pool()->AwaitBrief(\n      [](ShardId shard, ProactorBase* proactor) { locktag_lock_options = nullopt; });\n}\n\nconst LockTagOptions& LockTagOptions::instance() {\n  if (!locktag_lock_options.has_value()) {\n    string delimiter = absl::GetFlag(FLAGS_locktag_delimiter);\n    if (delimiter.empty()) {\n      delimiter = \"{}\";\n    } else if (delimiter.size() == 1) {\n      delimiter += delimiter;  // Copy delimiter (e.g. \"::\") so that it's easier to use below\n    } else {\n      LOG(ERROR) << \"Invalid value for locktag_delimiter - must be a single char\";\n      exit(-1);\n    }\n\n    locktag_lock_options = {\n        .enabled = absl::GetFlag(FLAGS_lock_on_hashtags),\n        .open_locktag = delimiter[0],\n        .close_locktag = delimiter[1],\n        .skip_n_end_delimiters = absl::GetFlag(FLAGS_locktag_skip_n_end_delimiters),\n        .prefix = absl::GetFlag(FLAGS_locktag_prefix),\n    };\n  }\n\n  return *locktag_lock_options;\n}\n\nstd::string_view LockTagOptions::Tag(std::string_view key) const {\n  if (!absl::StartsWith(key, prefix)) {\n    return key;\n  }\n\n  const size_t start = key.find(open_locktag);\n  if (start == key.npos) {\n    return key;\n  }\n\n  size_t end = start;\n  for (unsigned i = 0; i <= skip_n_end_delimiters; ++i) {\n    size_t next = end + 1;\n    end = key.find(close_locktag, next);\n    if (end == key.npos || end == next) {\n      return key;\n    }\n  }\n\n  return key.substr(start + 1, end - start - 1);\n}\n\nconst char* GlobalStateName(GlobalState s) {\n  switch (s) {\n    case GlobalState::ACTIVE:\n      return \"ACTIVE\";\n    case GlobalState::LOADING:\n      return \"LOADING\";\n    case GlobalState::SHUTTING_DOWN:\n      return \"SHUTTING DOWN\";\n    case GlobalState::TAKEN_OVER:\n      return \"TAKEN OVER\";\n  }\n  ABSL_UNREACHABLE();\n}\n\nconst char* RdbTypeName(unsigned type) {\n  switch (type) {\n    case RDB_TYPE_STRING:\n      return \"string\";\n    case RDB_TYPE_LIST:\n      return \"list\";\n    case RDB_TYPE_SET:\n      return \"set\";\n    case RDB_TYPE_ZSET:\n      return \"zset\";\n    case RDB_TYPE_HASH:\n      return \"hash\";\n    case RDB_TYPE_STREAM_LISTPACKS:\n      return \"stream\";\n  }\n  return \"other\";\n}\n\nbool ParseDouble(string_view src, double* value) {\n  if (src.empty())\n    return false;\n\n  if (absl::EqualsIgnoreCase(src, \"-inf\")) {\n    *value = -HUGE_VAL;\n  } else if (absl::EqualsIgnoreCase(src, \"+inf\")) {\n    *value = HUGE_VAL;\n  } else {\n    fast_float::from_chars_result result = fast_float::from_chars(src.data(), src.end(), *value);\n    // nan double could be sent as \"nan\" with any case.\n    if (int(result.ec) != 0 || result.ptr != src.end() || isnan(*value))\n      return false;\n  }\n  return true;\n}\n\nOpResult<ScanOpts> ScanOpts::TryFrom(CmdArgList args, bool allow_novalues) {\n  ScanOpts scan_opts;\n  facade::CmdArgParser parser(args);\n\n  while (parser.HasNext()) {\n    std::string_view pattern;\n    std::string_view type_str;\n\n    if (parser.Check(\"NOVALUES\")) {\n      if (!allow_novalues) {\n        return facade::OpStatus::SYNTAX_ERR;\n      }\n      scan_opts.novalues = true;\n    } else if (parser.Check(\"COUNT\", &scan_opts.limit)) {\n      if (scan_opts.limit == 0)\n        scan_opts.limit = 1;\n    } else if (parser.Check(\"MATCH\", &pattern)) {\n      if (pattern != \"*\")\n        scan_opts.matcher.reset(new GlobMatcher{pattern, true});\n    } else if (parser.Check(\"TYPE\", &type_str)) {\n      CompactObjType obj_type = ObjTypeFromString(type_str);\n      if (obj_type == kInvalidCompactObjType) {\n        return facade::OpStatus::SYNTAX_ERR;\n      }\n      scan_opts.type_filter = obj_type;\n    } else if (parser.Check(\"BUCKET\", &scan_opts.bucket_id)) {\n      // no-op\n    } else if (parser.Check(\"ATTR\")) {\n      scan_opts.mask =\n          parser.MapNext(\"v\", ScanOpts::Mask::Volatile, \"p\", ScanOpts::Mask::Permanent, \"a\",\n                         ScanOpts::Mask::Accessed, \"u\", ScanOpts::Mask::Untouched);\n    } else if (parser.Check(\"MINMSZ\", &scan_opts.min_malloc_size)) {\n      // no-op\n    } else\n      return facade::OpStatus::SYNTAX_ERR;\n  }  // while\n\n  // Check for parsing errors (e.g. missing values or invalid integers)\n  if (auto err = parser.TakeError()) {\n    if (err.type == facade::CmdArgParser::INVALID_INT) {\n      return facade::OpStatus::INVALID_INT;\n    }\n    return facade::OpStatus::SYNTAX_ERR;\n  }\n\n  return scan_opts;\n}\n\nbool ScanOpts::Matches(std::string_view val_name) const {\n  return !matcher || matcher->Matches(val_name);\n}\n\nstd::ostream& operator<<(std::ostream& os, const GlobalState& state) {\n  return os << GlobalStateName(state);\n}\n\nScanOpts::~ScanOpts() {\n}\n\nBorrowedInterpreter::BorrowedInterpreter(Transaction* tx, ConnectionState* state) {\n  // Ensure squashing ignores EVAL. We can't run on a stub context, because it doesn't have our\n  // preborrowed interpreter (which can't be shared on multiple threads).\n  CHECK(!tx->IsSquashedStub());\n\n  if (auto borrowed = state->exec_info.preborrowed_interpreter; borrowed) {\n    // Ensure a preborrowed interpreter is only set for an already running MULTI transaction.\n    CHECK_EQ(state->exec_info.state, ConnectionState::ExecInfo::EXEC_RUNNING);\n\n    interpreter_ = borrowed;\n  } else {\n    // A scheduled transaction occupies a place in the transaction queue and holds locks,\n    // preventing other transactions from progressing. Blocking below can deadlock!\n    CHECK(!tx->IsScheduled());\n\n    interpreter_ = ServerState::tlocal()->BorrowInterpreter();\n    owned_ = true;\n  }\n}\n\nBorrowedInterpreter::~BorrowedInterpreter() {\n  if (owned_)\n    ServerState::tlocal()->ReturnInterpreter(interpreter_);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/common.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <atomic>\n#include <cstddef>\n#include <cstdint>\n#include <string_view>\n#include <vector>\n\n#include \"facade/facade_types.h\"\n#include \"server/common_types.h\"\n\nnamespace dfly {\n\nusing CompactObjType = unsigned;\nclass GlobMatcher;\n\n// Dependent on ExpirePeriod representation of the value.\nconstexpr int64_t kMaxExpireDeadlineSec = (1u << 28) - 1;  // 8.5 years\nconstexpr int64_t kMaxExpireDeadlineMs = kMaxExpireDeadlineSec * 1000;\n\nusing facade::ArgS;\nusing facade::CmdArgList;\nusing facade::CmdArgVec;\nusing facade::MutableSlice;\nusing facade::OpResult;\n\nusing StringVec = std::vector<std::string>;\n\nclass CommandId;\nstruct ConnectionState;\nclass Namespaces;\n\nstruct LockTagOptions {\n  bool enabled = false;\n  char open_locktag = '{';\n  char close_locktag = '}';\n  unsigned skip_n_end_delimiters = 0;\n  std::string prefix;\n\n  // Returns the tag according to the rules defined by this options object.\n  std::string_view Tag(std::string_view key) const;\n\n  static const LockTagOptions& instance();\n};\n\nstd::ostream& operator<<(std::ostream& os, const GlobalState& state);\n\nconst char* GlobalStateName(GlobalState gs);\n\nbool ParseHumanReadableBytes(std::string_view str, int64_t* num_bytes);\nbool ParseDouble(std::string_view src, double* value);\n\nconst char* RdbTypeName(unsigned type);\n\n// Globally used atomics for memory readings\ninline std::atomic_uint64_t used_mem_current{0};\ninline std::atomic_uint64_t rss_mem_current{0};\n// Current value of --maxmemory flag\ninline std::atomic_uint64_t max_memory_limit{0};\n\ninline Namespaces* namespaces = nullptr;\n\n// version 5.11 maps to 511 etc.\n// set upon server start.\ninline unsigned kernel_version = 0;\n\nstruct ScanOpts {\n  ~ScanOpts();  // because of forward declaration\n  ScanOpts() = default;\n  ScanOpts(ScanOpts&& other) = default;\n\n  bool Matches(std::string_view val_name) const;\n  static OpResult<ScanOpts> TryFrom(CmdArgList args, bool allow_novalues = false);\n\n  std::unique_ptr<GlobMatcher> matcher;\n  size_t limit = 10;\n  std::optional<CompactObjType> type_filter;\n  unsigned bucket_id = UINT_MAX;\n  enum class Mask {\n    Volatile,   // volatile, keys that have ttl\n    Permanent,  // permanent, keys that do not have ttl\n    Accessed,   // accessed, the key has been accessed since the last load/flush event, or the last\n                // time a flag was reset.\n    Untouched,  // untouched, the key has not been accessed/touched.\n  };\n  std::optional<Mask> mask;\n  size_t min_malloc_size = 0;\n  bool novalues = false;\n};\n\n// I use relative time from Feb 1, 2023 in seconds.\nconstexpr uint64_t kMemberExpiryBase = 1675209600;\n\ninline uint32_t MemberTimeSeconds(uint64_t now_ms) {\n  return (now_ms / 1000) - kMemberExpiryBase;\n}\n\n// Ensures availability of an interpreter for EVAL-like commands and it's automatic release.\n// If it's part of MULTI, the preborrowed interpreter is returned, otherwise a new is acquired.\nstruct BorrowedInterpreter {\n  BorrowedInterpreter(Transaction* tx, ConnectionState* state);\n\n  ~BorrowedInterpreter();\n\n  // Give up ownership of the interpreter, it must be returned manually.\n  Interpreter* Release() && {\n    assert(owned_);\n    owned_ = false;\n    return interpreter_;\n  }\n\n  operator Interpreter*() {\n    return interpreter_;\n  }\n\n private:\n  Interpreter* interpreter_ = nullptr;\n  bool owned_ = false;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/common_types.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstdint>\n\nnamespace dfly {\n\n// Transaction, replication and partitioning identifiers\nusing LSN = uint64_t;\nusing TxId = uint64_t;\nusing TxClock = uint64_t;\nusing SlotId = std::uint16_t;\n\n// Database and shard identifiers (moved from tx_base.h to reduce compilation dependencies)\nusing DbIndex = uint16_t;\nusing ShardId = uint16_t;\nusing LockFp = uint64_t;  // a key fingerprint used by the LockTable.\n\nconstexpr DbIndex kInvalidDbId = DbIndex(-1);\nconstexpr ShardId kInvalidSid = ShardId(-1);\nconstexpr DbIndex kMaxDbId = 1024;  // Reasonable starting point.\n\n// Server state and time enums (moved from common.h to reduce compilation dependencies)\nenum class GlobalState : uint8_t {\n  ACTIVE,\n  LOADING,\n  SHUTTING_DOWN,\n  TAKEN_OVER,\n};\n\nenum class TimeUnit : uint8_t { SEC, MSEC };\n\nenum class LoadBlobResult : uint8_t {\n  kSuccess,\n  kCorrupted,\n  kOutOfMemory,\n  kEmpty,\n};\n\nenum ExpireFlags {\n  EXPIRE_ALWAYS = 0,\n  EXPIRE_NX = 1 << 0,  // Set expiry only when key has no expiry\n  EXPIRE_XX = 1 << 2,  // Set expiry only when the key has expiry\n  EXPIRE_GT = 1 << 3,  // GT: Set expiry only when the new expiry is greater than current one\n  EXPIRE_LT = 1 << 4,  // LT: Set expiry only when the new expiry is less than current one\n};\n\n// Forward declarations for commonly used classes (to reduce header dependencies)\nclass EngineShard;\nclass Transaction;\nclass DbSlice;\nclass ConnectionContext;\nclass CommandContext;\nclass Namespace;\nclass CommandRegistry;\nclass Interpreter;\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/config_registry.cc",
    "content": "// Copyright 2023, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"server/config_registry.h\"\n\n#include <absl/flags/reflection.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_replace.h>\n\n#include \"base/logging.h\"\n#include \"core/glob_matcher.h\"\n#include \"strings/human_readable.h\"\n\nnamespace dfly {\nnamespace {\nusing namespace std;\n\nstring NormalizeConfigName(string_view name) {\n  return absl::StrReplaceAll(name, {{\"-\", \"_\"}, {\".\", \"_\"}});\n}\n}  // namespace\n\n// Convert internal flag name back to user-facing format\n// Example: search_query_string_bytes -> search.query-string-bytes\nstring DenormalizeConfigName(string_view name) {\n  string result{name};\n  if (absl::StartsWith(result, \"search_\")) {\n    // Replace first underscore after \"search\" with dot\n    result.replace(6, 1, \".\");\n    // Replace remaining underscores with dashes\n    for (size_t i = 7; i < result.size(); ++i) {\n      if (result[i] == '_') {\n        result[i] = '-';\n      }\n    }\n  }\n  return result;\n}\n\n// Returns true if the value was updated.\nauto ConfigRegistry::Set(string_view config_name, string_view value) -> SetResult {\n  string name = NormalizeConfigName(config_name);\n\n  util::fb2::LockGuard lk(mu_);\n  auto it = registry_.find(name);\n  if (it == registry_.end())\n    return SetResult::UNKNOWN;\n  if (!it->second.is_mutable)\n    return SetResult::READONLY;\n\n  auto cb = it->second.cb;\n\n  absl::CommandLineFlag* flag = absl::FindCommandLineFlag(name);\n  CHECK(flag) << config_name;\n  if (string error; !flag->ParseFrom(value, &error)) {\n    LOG(WARNING) << error;\n    return SetResult::INVALID;\n  }\n\n  bool success = !cb || cb(*flag);\n  return success ? SetResult::OK : SetResult::INVALID;\n}\n\nabsl::CommandLineFlag* ConfigRegistry::GetFlag(std::string_view config_name) {\n  string name = NormalizeConfigName(config_name);\n\n  {\n    util::fb2::LockGuard lk(mu_);\n    if (!registry_.contains(name))\n      return nullptr;\n  }\n\n  absl::CommandLineFlag* flag = absl::FindCommandLineFlag(name);\n  CHECK(flag);\n  return flag;\n}\n\noptional<string> ConfigRegistry::Get(string_view config_name) {\n  absl::CommandLineFlag* flag = GetFlag(config_name);\n  if (!flag) {\n    return nullopt;\n  }\n\n  // For MemoryBytesFlag, return numeric bytes for compatibility.\n  if (flag->IsOfType<strings::MemoryBytesFlag>()) {\n    auto val = flag->TryGet<strings::MemoryBytesFlag>();\n    if (val.has_value()) {\n      return absl::StrCat(val->value);\n    }\n  }\n\n  return flag->CurrentValue();\n}\n\nvoid ConfigRegistry::Reset() {\n  util::fb2::LockGuard lk(mu_);\n  registry_.clear();\n}\n\nvector<string> ConfigRegistry::List(string_view glob) const {\n  string normalized_glob = NormalizeConfigName(glob);\n  GlobMatcher matcher(normalized_glob, false /* case insensitive*/);\n\n  vector<string> res;\n  util::fb2::LockGuard lk(mu_);\n\n  for (const auto& [name, _] : registry_) {\n    if (matcher.Matches(name))\n      res.push_back(name);\n  }\n  return res;\n}\n\nvoid ConfigRegistry::RegisterInternal(string_view config_name, bool is_mutable, WriteCb cb) {\n  string name = NormalizeConfigName(config_name);\n\n  absl::CommandLineFlag* flag = absl::FindCommandLineFlag(name);\n  CHECK(flag) << \"Unknown config name: \" << name;\n\n  util::fb2::LockGuard lk(mu_);\n  auto [it, inserted] = registry_.emplace(name, Entry{std::move(cb), is_mutable});\n  CHECK(inserted) << \"Duplicate config name: \" << name;\n}\n\nvoid ConfigRegistry::ValidateCustomSetter(std::string_view name, WriteCb setter) const {\n  absl::CommandLineFlag* flag = absl::FindCommandLineFlag(name);\n  CHECK(flag) << \"Unknown config name: \" << name;\n  if (setter) {\n    bool cb_match = setter(*flag);\n    CHECK(cb_match) << \"Possible type mismatch with setter for flag \" << name;\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/config_registry.h",
    "content": "// Copyright 2023, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/flags/reflection.h>\n\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly {\n\n// Allows reading and modifying pre-registered configuration values by string names.\n// This class treats dashes (-) are as underscores (_).\nclass ConfigRegistry {\n public:\n  // Accepts the new value as argument. Return true if config was successfully updated.\n  using WriteCb = std::function<bool(const absl::CommandLineFlag&)>;\n\n  ConfigRegistry& Register(std::string_view name) {\n    RegisterInternal(name, false, {});\n    return *this;\n  }\n\n  ConfigRegistry& RegisterMutable(std::string_view name, WriteCb cb = {}) {\n    RegisterInternal(name, true, std::move(cb));\n    return *this;\n  }\n\n  template <typename T>\n  ConfigRegistry& RegisterSetter(std::string_view name, std::function<void(const T&)> f) {\n    ValidateCustomSetter(name,\n                         [](const absl::CommandLineFlag& flag) { return flag.IsOfType<T>(); });\n\n    return RegisterMutable(name, [f](const absl::CommandLineFlag& flag) {\n      auto res = flag.TryGet<T>();\n      if (res.has_value()) {\n        f(*res);\n        return true;\n      }\n      return false;\n    });\n  }\n\n  enum class SetResult : uint8_t {\n    OK,\n    UNKNOWN,\n    READONLY,\n    INVALID,\n  };\n\n  // Returns true if the value was updated.\n  SetResult Set(std::string_view config_name, std::string_view value) ABSL_LOCKS_EXCLUDED(mu_);\n\n  std::optional<std::string> Get(std::string_view config_name) ABSL_LOCKS_EXCLUDED(mu_);\n\n  absl::CommandLineFlag* GetFlag(std::string_view config_name) ABSL_LOCKS_EXCLUDED(mu_);\n\n  void Reset();\n\n  std::vector<std::string> List(std::string_view glob) const ABSL_LOCKS_EXCLUDED(mu_);\n\n private:\n  void RegisterInternal(std::string_view name, bool is_mutable, WriteCb cb)\n      ABSL_LOCKS_EXCLUDED(mu_);\n  void ValidateCustomSetter(std::string_view name, WriteCb setter) const;\n\n  mutable util::fb2::Mutex mu_;\n\n  struct Entry {\n    WriteCb cb;\n    bool is_mutable;\n  };\n\n  absl::flat_hash_map<std::string, Entry> registry_ ABSL_GUARDED_BY(mu_);\n};\n\ninline ConfigRegistry config_registry;\n\n// Convert internal flag name back to user-facing format for search parameters\n// Example: search_query_string_bytes -> search.query-string-bytes\nstd::string DenormalizeConfigName(std::string_view name);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/conn_context.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/conn_context.h\"\n\n#include <atomic>\n\n#include \"base/logging.h\"\n#include \"common/heap_size.h\"\n#include \"facade/reply_builder.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/channel_store.h\"\n#include \"server/command_registry.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/server_family.h\"\n#include \"server/server_state.h\"\n#include \"server/transaction.h\"\n#include \"src/facade/dragonfly_connection.h\"\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace facade;\nusing cmn::HeapSize;\n\nnamespace {\nvoid SendSubscriptionChangedResponse(string_view action, std::optional<string_view> topic,\n                                     unsigned count, RedisReplyBuilder* rb) {\n  rb->StartCollection(3, CollectionType::PUSH);\n  rb->SendBulkString(action);\n  if (topic.has_value())\n    rb->SendBulkString(topic.value());\n  else\n    rb->SendNull();\n  rb->SendLong(count);\n}\n\nvector<string> FormatExecSlowlog(const ConnectionState& state) {\n  const auto& info = state.exec_info;\n  return {absl::StrCat(\"num_cmds: \", info.body.size()), absl::StrCat(\"is_write: \", info.is_write)};\n}\n\nvector<string> FormatEvalSlowlog(const ConnectionState& state) {\n  if (!state.script_info)  // EVAL failed to initialize (error)\n    return {};\n\n  const auto& sinfo = *state.script_info;\n  return {\n      string{sinfo.stats.sha, sizeof(sinfo.stats.sha)},\n      absl::StrCat(\"num_cmds: \", sinfo.stats.num_commands),\n      absl::StrCat(\"slow_cmds: \", sinfo.stats.slow_commands.load(memory_order_relaxed)),\n      absl::StrCat(\"tx_mode: \", int(sinfo.stats.tx_mode)),\n      absl::StrCat(\"tx_shards: \", int(sinfo.stats.tx_shards)),\n      absl::StrCat(\"is_write: \", !sinfo.read_only),\n      absl::StrCat(\"lock_tags: \", sinfo.lock_tags.size()),\n  };\n}\n\n}  // namespace\n\nStoredCmd::StoredCmd(const CommandId* cid, facade::ArgSlice args, facade::ReplyMode mode)\n    : cid_{cid}, args_{args}, reply_mode_{mode} {\n  backed_ = std::make_unique<cmn::BackedArguments>(args.begin(), args.end(), args.size());\n  args_ = facade::ParsedArgs{*backed_};\n}\n\nCmdArgList StoredCmd::Slice(CmdArgVec* scratch) const {\n  return args_.ToSlice(scratch);\n}\n\nstd::string StoredCmd::FirstArg() const {\n  if (NumArgs() == 0) {\n    return {};\n  }\n  return string{args_.Front()};\n}\n\nConnectionContext::ConnectionContext(facade::Connection* owner, acl::UserCredentials cred)\n    : facade::ConnectionContext(owner) {\n  if (owner) {\n    skip_acl_validation = owner->IsPrivileged();\n    has_main_or_memcache_listener = owner->IsMainOrMemcache();\n  }\n\n  keys = std::move(cred.keys);\n  pub_sub = std::move(cred.pub_sub);\n  if (cred.acl_commands.empty()) {\n    acl_commands = std::vector<uint64_t>(acl::NumberOfFamilies(), acl::NONE_COMMANDS);\n  } else {\n    acl_commands = std::move(cred.acl_commands);\n  }\n  acl_db_idx = cred.db;\n}\n\nvoid ConnectionContext::ChangeMonitor(bool start) {\n  // Ensure idempotency: MONITOR may be queued multiple times inside MULTI/EXEC.\n  if (start == monitor)\n    return;\n\n  // This will either remove or register a new connection\n  // at the \"top level\" thread --> ServerState context\n  // note that we are registering/removing this connection to the thread at which at run\n  // then notify all other threads that there is a change in the number of monitors\n  auto& my_monitors = ServerState::tlocal()->Monitors();\n  if (start) {\n    my_monitors.Add(conn());\n  } else {\n    VLOG(1) << \"connection \" << conn()->GetClientId() << \" no longer needs to be monitored\";\n    my_monitors.Remove(conn());\n  }\n  // Tell other threads that about the change in the number of connection that we monitor\n  shard_set->pool()->AwaitBrief(\n      [start](unsigned, auto*) { ServerState::tlocal()->Monitors().NotifyChangeCount(start); });\n  EnableMonitoring(start);\n}\n\nvoid ConnectionContext::ChangeSubscription(bool to_add, bool to_reply, bool sharded,\n                                           CmdArgList args, facade::RedisReplyBuilder* rb) {\n  vector<unsigned> result = ChangeSubscriptions(args, false, to_add, to_reply);\n\n  if (to_reply) {\n    const string_view actionRegular[2] = {\"unsubscribe\", \"subscribe\"};\n    const string_view actionSharded[2] = {\"sunsubscribe\", \"ssubscribe\"};\n    const absl::Span<const string_view> action = sharded ? actionSharded : actionRegular;\n    SinkReplyBuilder::ReplyScope scope{rb};\n    for (size_t i = 0; i < result.size(); ++i) {\n      SendSubscriptionChangedResponse(action[to_add], ArgS(args, i), result[i], rb);\n    }\n  }\n}\n\nvoid ConnectionContext::ChangePSubscription(bool to_add, bool to_reply, CmdArgList args,\n                                            facade::RedisReplyBuilder* rb) {\n  vector<unsigned> result = ChangeSubscriptions(args, true, to_add, to_reply);\n\n  if (to_reply) {\n    const char* action[2] = {\"punsubscribe\", \"psubscribe\"};\n    if (result.size() == 0) {\n      return SendSubscriptionChangedResponse(action[to_add], std::nullopt, 0, rb);\n    }\n\n    SinkReplyBuilder::ReplyScope scope{rb};\n    for (size_t i = 0; i < result.size(); ++i) {\n      SendSubscriptionChangedResponse(action[to_add], ArgS(args, i), result[i], rb);\n    }\n  }\n}\n\nvoid ConnectionContext::UnsubscribeAll(bool to_reply, facade::RedisReplyBuilder* rb) {\n  if (to_reply && (!conn_state.subscribe_info || conn_state.subscribe_info->channels.empty())) {\n    return SendSubscriptionChangedResponse(\"unsubscribe\", std::nullopt, 0, rb);\n  }\n  StringVec channels(conn_state.subscribe_info->channels.begin(),\n                     conn_state.subscribe_info->channels.end());\n  CmdArgVec arg_vec(channels.begin(), channels.end());\n  ChangeSubscription(false, to_reply, false, CmdArgList{arg_vec}, rb);\n}\n\nvoid ConnectionContext::PUnsubscribeAll(bool to_reply, facade::RedisReplyBuilder* rb) {\n  if (to_reply && (!conn_state.subscribe_info || conn_state.subscribe_info->patterns.empty())) {\n    return SendSubscriptionChangedResponse(\"punsubscribe\", std::nullopt, 0, rb);\n  }\n\n  StringVec patterns(conn_state.subscribe_info->patterns.begin(),\n                     conn_state.subscribe_info->patterns.end());\n  CmdArgVec arg_vec(patterns.begin(), patterns.end());\n  ChangePSubscription(false, to_reply, CmdArgList{arg_vec}, rb);\n}\n\nsize_t ConnectionState::ExecInfo::UsedMemory() const {\n  return HeapSize(body) + HeapSize(watched_keys);\n}\n\nvoid ConnectionState::ExecInfo::AddStoredCmd(const CommandId* cid, ArgSlice args) {\n  body.emplace_back(cid, args);\n  stored_cmd_bytes += body.back().UsedMemory();\n  is_write |= cid->IsJournaled();\n}\n\nsize_t ConnectionState::ExecInfo::ClearStoredCmds() {\n  const size_t used = GetStoredCmdBytes();\n  vector<StoredCmd>{}.swap(body);\n  stored_cmd_bytes = 0;\n  return used;\n}\n\nsize_t ConnectionState::ScriptInfo::UsedMemory() const {\n  return HeapSize(lock_tags) + async_cmds_heap_mem;\n}\n\nsize_t ConnectionState::SubscribeInfo::UsedMemory() const {\n  return HeapSize(channels) + HeapSize(patterns);\n}\n\nsize_t ConnectionState::UsedMemory() const {\n  return HeapSize(exec_info) + HeapSize(script_info) + HeapSize(subscribe_info);\n}\n\nsize_t ConnectionContext::UsedMemory() const {\n  return facade::ConnectionContext::UsedMemory() + HeapSize(conn_state) +\n         HeapSize(authed_username) + HeapSize(acl_commands) + HeapSize(keys.key_globs) +\n         HeapSize(pub_sub.globs);\n}\n\nvoid ConnectionContext::Unsubscribe(std::string_view channel) {\n  auto* sinfo = conn_state.subscribe_info.get();\n  DCHECK(sinfo);\n  auto erased = sinfo->channels.erase(channel);\n  DCHECK(erased);\n  if (sinfo->IsEmpty()) {\n    conn_state.subscribe_info.reset();\n    DCHECK_GE(subscriptions, 1u);\n    --subscriptions;\n  }\n}\n\nvector<unsigned> ConnectionContext::ChangeSubscriptions(CmdArgList channels, bool pattern,\n                                                        bool to_add, bool to_reply) {\n  vector<unsigned> result(to_reply ? channels.size() : 0, 0);\n\n  if (!to_add && !conn_state.subscribe_info)\n    return result;\n\n  if (!conn_state.subscribe_info) {\n    DCHECK(to_add);\n\n    conn_state.subscribe_info.reset(new ConnectionState::SubscribeInfo);\n    subscriptions++;\n  }\n\n  auto& sinfo = *conn_state.subscribe_info.get();\n  auto& local_store = pattern ? sinfo.patterns : sinfo.channels;\n\n  int32_t tid = util::ProactorBase::me()->GetPoolIndex();\n  DCHECK_GE(tid, 0);\n\n  ChannelStoreUpdater csu{pattern, to_add, this, uint32_t(tid)};\n\n  // Gather all the channels we need to subscribe to / remove.\n  size_t i = 0;\n  for (string_view channel : channels) {\n    if (to_add && local_store.emplace(channel).second)\n      csu.Record(channel);\n    else if (!to_add && local_store.erase(channel) > 0)\n      csu.Record(channel);\n\n    if (to_reply)\n      result[i++] = sinfo.SubscriptionCount();\n  }\n\n  csu.Apply();\n\n  // Important to reset conn_state.subscribe_info only after all references to it were\n  // removed.\n  if (!to_add && conn_state.subscribe_info->IsEmpty()) {\n    conn_state.subscribe_info.reset();\n    DCHECK_GE(subscriptions, 1u);\n    subscriptions--;\n  }\n\n  return result;\n}\n\nvoid ConnectionState::ExecInfo::Clear() {\n  DCHECK(!preborrowed_interpreter);  // Must have been released properly\n  state = EXEC_INACTIVE;\n  const size_t cleared_size = ClearStoredCmds();\n  ServerState::tlocal()->stats.stored_cmd_bytes -= cleared_size;\n  is_write = false;\n  ClearWatched();\n}\n\nvoid ConnectionState::ExecInfo::ClearWatched() {\n  watched_keys.clear();\n  watched_dirty.store(false, memory_order_relaxed);\n  watched_existed = 0;\n}\n\nbool ConnectionState::ClientTracking::ShouldTrackKeys() const {\n  if (!IsTrackingOn()) {\n    return false;\n  }\n\n  if (noloop_ == true) {\n    // Once we implement REDIRECT this should return true since noloop\n    // without it only affects the current connection\n    return false;\n  }\n\n  if (option_ == NONE) {\n    return true;\n  }\n\n  const bool match = (seq_num_ == (1 + caching_seq_num_));\n  return option_ == OPTIN ? match : !match;\n}\n\nvoid CommandContext::ReuseInternal() {\n  cid_ = nullptr;\n  tx_ = nullptr;\n  arg_slice_backing.clear();\n  start_time_ns = 0;\n}\n\nvoid CommandContext::RecordLatency(facade::ArgSlice tail_args) const {\n  DCHECK_GT(start_time_ns, 0u);\n  int64_t after = absl::GetCurrentTimeNanos();\n\n  ServerState* ss = ServerState::SafeTLocal();  // Might have migrated thread, read after invocation\n  int64_t execution_time_usec = (after - start_time_ns) / 1000;\n\n  cid_->RecordLatency(ss->thread_index(), execution_time_usec);\n  DCHECK(conn_cntx_ != nullptr);\n\n  // TODO: we should probably discard more than only blocking commands here\n  const auto* conn = server_conn_cntx()->conn();\n  if (conn == nullptr || (cid_->opt_mask() & CO::BLOCKING))\n    return;\n\n  if (!ss->ShouldLogSlowCmd(execution_time_usec))  // It was not a slow command\n    return;\n\n  auto* cntx = static_cast<dfly::ConnectionContext*>(conn_cntx());\n\n  // Log nested commands of scripts that made it into slowlog\n  if (auto sinfo = cntx->conn_state.script_info.get(); !cid_->MultiControlKind() && sinfo)\n    sinfo->stats.slow_commands.fetch_add(1, memory_order_relaxed);\n\n  vector<string> aux_params;\n  CmdArgVec aux_slice;\n\n  // Rewrite arguments for exec/eval with stats\n  if (auto mck = cid_->MultiControlKind(); mck) {\n    switch (*mck) {\n      case CO::MultiControlKind::EXEC:\n        if (cid_->name() == \"EXEC\")\n          aux_params = FormatExecSlowlog(cntx->conn_state);\n        break;\n      case CO::MultiControlKind::EVAL:\n        aux_params = FormatEvalSlowlog(cntx->conn_state);\n        break;\n    };\n    aux_slice = {aux_params.begin(), aux_params.end()};\n    if (tail_args.size() > 0) {\n      if (!aux_params.empty())\n        tail_args.remove_prefix(1);  // remove script/sha from eval/evalsha\n      aux_slice.insert(aux_slice.end(), tail_args.begin(), tail_args.end());\n    }\n    tail_args = aux_slice;\n  }\n\n  ServerState::SafeTLocal()->GetSlowLog().Add(cid_->name(), tail_args, conn->GetName(),\n                                              conn->RemoteEndpointStr(), execution_time_usec,\n                                              absl::GetCurrentTimeNanos() / 1000);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/conn_context.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_set.h>\n\n#include \"facade/conn_context.h\"\n#include \"facade/parsed_command.h\"\n#include \"facade/reply_mode.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/common.h\"\n#include \"server/tx_base.h\"\n#include \"server/version.h\"\n\nnamespace dfly {\n\nclass EngineShardSet;\nclass ChannelStore;\nclass Interpreter;\nstruct FlowInfo;\n\n// Stores command id and arguments for delayed invocation.\n// Used for storing MULTI/EXEC commands.\nclass StoredCmd {\n public:\n  // Deep copy of args, creates backing storage internally.\n  StoredCmd(const CommandId* cid, ArgSlice args, facade::ReplyMode mode = facade::ReplyMode::FULL);\n\n  // Shallow copy of args.\n  StoredCmd(const CommandId* cid, facade::ParsedArgs args)\n      : cid_{cid}, args_{args}, reply_mode_(facade::ReplyMode::FULL) {\n  }\n\n  size_t NumArgs() const {\n    return args_.size();\n  }\n\n  size_t UsedMemory() const {\n    return backed_ ? backed_->HeapMemory() + sizeof(*backed_) : 0;\n  }\n\n  facade::ArgSlice Slice(CmdArgVec* scratch) const;\n  std::string FirstArg() const;\n\n  const CommandId* Cid() const {\n    return cid_;\n  }\n\n  facade::ReplyMode ReplyMode() const {\n    return reply_mode_;\n  }\n\n private:\n  const CommandId* cid_;     // underlying command\n  facade::ParsedArgs args_;  // arguments\n\n  // TODO: we could optimize the storage further by introducing StoredCmdCollection and\n  // keep the backing storage there. Then this class will only use shallow copies.\n  std::unique_ptr<cmn::BackedArguments> backed_;\n  facade::ReplyMode reply_mode_;  // reply mode\n};\n\nstruct ConnectionState {\n  // MULTI-EXEC transaction related data.\n  struct ExecInfo {\n    enum ExecState : uint8_t { EXEC_INACTIVE, EXEC_COLLECT, EXEC_RUNNING, EXEC_ERROR };\n\n    ExecInfo() = default;\n    // ExecInfo is immovable due to being referenced from DbSlice.\n    ExecInfo(ExecInfo&&) = delete;\n\n    bool IsCollecting() const {\n      return state == EXEC_COLLECT;\n    }\n\n    bool IsRunning() const {\n      return state == EXEC_RUNNING;\n    }\n\n    // Resets to blank state after EXEC or DISCARD\n    void Clear();\n\n    // Resets local watched keys info. Does not unregister the keys from DbSlices.\n    void ClearWatched();\n\n    size_t UsedMemory() const;\n\n    // Deep copies arguments and updates the stored_cmd_bytes.\n    void AddStoredCmd(const CommandId* cid, ArgSlice args);\n\n    // Empties the body vector and resets stored_cmd_bytes to 0. Returns the size before data was\n    // cleared.\n    size_t ClearStoredCmds();\n\n    // Returns memory used by the body field without iterating over each stored command\n    size_t GetStoredCmdBytes() const {\n      return stored_cmd_bytes + body.capacity() * sizeof(StoredCmd);\n    }\n\n    ExecState state = EXEC_INACTIVE;\n    std::vector<StoredCmd> body;\n    bool is_write = false;\n\n    std::vector<std::pair<DbIndex, std::string>> watched_keys;  // List of keys registered by WATCH\n    std::atomic_bool watched_dirty = false;  // Set if a watched key was changed before EXEC\n    uint32_t watched_existed = 0;            // Number of times watch was called on an existing key\n\n    // If the transaction contains EVAL calls, preborrow an interpreter that will be used for all of\n    // them. This has to be done to avoid potentially blocking when borrowing interpreters amid\n    // executing the multi transaction, which can create deadlocks by blocking other transactions\n    // that already borrowed all available interpreters but wait for keys to be unlocked.\n    Interpreter* preborrowed_interpreter = nullptr;\n\n    // The total size of all stored commands kept in \"body\". Does not include memory allocated by\n    // the \"body\" vector.\n    size_t stored_cmd_bytes = 0;\n  };\n\n  // Lua-script related data.\n  struct ScriptInfo {\n    size_t UsedMemory() const;\n\n    absl::flat_hash_set<LockTag> lock_tags;  // declared tags\n    bool read_only = false;\n\n    size_t async_cmds_heap_mem = 0;     // bytes used by async_cmds\n    size_t async_cmds_heap_limit = 0;   // max bytes allowed for async_cmds\n    std::vector<StoredCmd> async_cmds;  // aggregated by acall\n\n    struct Stats {\n      char sha[40];                            // sha of script\n      unsigned num_commands = 0;               // total number of command executed\n      std::atomic_uint32_t slow_commands = 0;  // commands that made it into slowlog\n\n      uint8_t tx_mode = 0;     // value of Transaction::MultiMode\n      unsigned tx_shards = 0;  // Number of shards on the transaction\n    } stats;\n  };\n\n  // PUB-SUB messaging related data.\n  struct SubscribeInfo {\n    bool IsEmpty() const {\n      return channels.empty() && patterns.empty();\n    }\n\n    unsigned SubscriptionCount() const {\n      return channels.size() + patterns.size();\n    }\n\n    size_t UsedMemory() const;\n\n    // TODO: to provide unique_strings across service. This will allow us to use string_view here.\n    absl::flat_hash_set<std::string> channels;\n    absl::flat_hash_set<std::string> patterns;\n  };\n\n  struct ReplicationInfo {\n    // If this server is master, and this connection is from a secondary replica,\n    // then it holds positive sync session id.\n    uint32_t repl_session_id = 0;\n    uint32_t repl_flow_id = UINT32_MAX;\n    std::string repl_ip_address;\n    uint32_t repl_listening_port = 0;\n    DflyVersion repl_version = DflyVersion::VER1;\n  };\n\n  struct SquashingInfo {\n    // Pointer to the original underlying context of the base command.\n    // Only const access it possible for reading from multiple threads,\n    // each squashing thread has its own proxy context that contains this info.\n    const ConnectionContext* owner = nullptr;\n  };\n\n  size_t UsedMemory() const;\n\n  // Client tracking is a per-connection state machine that adheres to the requirements\n  // of the CLIENT TRACKING command. Note that the semantics described below are enforced\n  // by the tests in server_family_test. The rules are:\n  // 1. If CLIENT TRACKING is ON then each READ command must be tracked. Invalidation\n  //    messages are sent `only once`. Subsequent changes of the same key require the\n  //    client to re-read the key in order to receive the next invalidation message.\n  // 2. CLIENT TRACKING ON OPTIN turns on optional tracking. Read commands are not\n  //    tracked unless the client issues a CLIENT CACHING YES command which conditionally\n  //    allows the tracking of the command that follows CACHING YES). For example:\n  //    >> CLIENT TRACKING ON\n  //    >> CLIENT CACHING YES\n  //    >> GET foo  <--------------------- From now foo is being tracked\n  //    However:\n  //    >> CLIENT TRACKING ON\n  //    >> CLIENT CACHING YES\n  //    >> SET foo bar\n  //    >> GET foo <--------------------- is *NOT* tracked since GET does not succeed CACHING\n  //    Also, in the context of multi transactions, CLIENT CACHING YES is *STICKY*:\n  //    >> CLIENT TRACKING ON\n  //    >> CLIENT CACHING YES\n  //    >> MULTI\n  //    >>   GET foo\n  //    >>   SET foo bar\n  //    >>   GET brother_foo\n  //    >> EXEC\n  //    From this point onwards `foo` and `get` keys are tracked. Same aplies if CACHING YES\n  //    is used within the MULTI/EXEC block.\n  //\n  // The state machine implements the above rules. We need to track:\n  // 1. If TRACKING is ON and OPTIN\n  // 2. Stickiness of CACHING as described above\n  //\n  // We introduce a monotonic counter called sequence number which we increment only:\n  // * On InvokeCmd when we are not Collecting (multi)\n  // We introduce another counter called caching_seq_num which is set to seq_num\n  // when the users sends a CLIENT CACHING YES command\n  // If seq_num == caching_seq_num + 1 then we know that we should Track().\n  class ClientTracking {\n   public:\n    enum Options : uint8_t {\n      NONE,   // NO subcommand, that is no OPTIN and no OUTPUT was used when CLIENT TRACKING was\n              // called. We track all keys of read commands.\n      OPTIN,  // OPTIN was used with CLIENT TRACKING. We only track keys of read commands preceded\n              // by CACHING TRUE command.\n      OPTOUT  // OPTOUT was used with CLIENT TRACKING. We track all keys of read commands except the\n              // ones preceded by a CACHING FALSE command.\n    };\n\n    // Sets to true when CLIENT TRACKING is ON\n    void SetClientTracking(bool is_on) {\n      tracking_enabled_ = is_on;\n    }\n\n    // Increment current sequence number\n    void IncrementSequenceNumber() {\n      ++seq_num_;\n    }\n\n    // Set if OPTIN/OPTOUT subcommand is used in CLIENT TRACKING\n    void SetOption(Options option) {\n      option_ = option;\n    }\n\n    void SetNoLoop(bool noloop) {\n      noloop_ = noloop;\n    }\n\n    // Check if the keys should be tracked. Result adheres to the state machine described above.\n    bool ShouldTrackKeys() const;\n\n    // Check only if CLIENT TRACKING is ON\n    bool IsTrackingOn() const {\n      return tracking_enabled_;\n    }\n\n    // Called by CLIENT CACHING YES and caches the current seq_num_\n    void SetCachingSequenceNumber(bool is_multi) {\n      // We need -1 when we are in multi\n      caching_seq_num_ = is_multi && seq_num_ != 0 ? seq_num_ - 1 : seq_num_;\n    }\n\n    void ResetCachingSequenceNumber() {\n      caching_seq_num_ = 1;\n    }\n\n    bool HasOption(Options option) const {\n      return option_ == option;\n    }\n\n   private:\n    // a flag indicating whether the client has turned on client tracking.\n    bool tracking_enabled_ = false;\n    bool noloop_ = false;\n    Options option_ = NONE;\n    // sequence number\n    size_t seq_num_ = 0;\n    size_t caching_seq_num_ = 1;\n  };\n\n public:\n  DbIndex db_index = 0;\n\n  ExecInfo exec_info;\n  ReplicationInfo replication_info;\n\n  std::unique_ptr<ScriptInfo> script_info;\n  std::unique_ptr<SubscribeInfo> subscribe_info;\n  ClientTracking tracking_info_;\n};\n\nclass ConnectionContext : public facade::ConnectionContext {\n public:\n  ConnectionContext(facade::Connection* owner, dfly::acl::UserCredentials cred);\n\n  struct DebugInfo {\n    uint32_t shards_count = 0;\n    TxClock clock = 0;\n  };\n\n  DebugInfo last_command_debug;\n\n  // TODO: to introduce proper accessors.\n  Namespace* ns = nullptr;\n  Transaction* transaction = nullptr;\n\n  ConnectionState conn_state;\n\n  DbIndex db_index() const {\n    return conn_state.db_index;\n  }\n\n  void ChangeSubscription(bool to_add, bool to_reply, bool sharded, CmdArgList args,\n                          facade::RedisReplyBuilder* rb);\n\n  void ChangePSubscription(bool to_add, bool to_reply, CmdArgList args,\n                           facade::RedisReplyBuilder* rb);\n  void UnsubscribeAll(bool to_reply, facade::RedisReplyBuilder* rb);\n  void PUnsubscribeAll(bool to_reply, facade::RedisReplyBuilder* rb);\n  void ChangeMonitor(bool start);  // either start or stop monitor on a given connection\n\n  size_t UsedMemory() const override;\n\n  virtual void Unsubscribe(std::string_view channel) override;\n\n  // Whether this connection is a connection from a replica to its master.\n  // This flag is true only on replica side, where we need to setup a special ConnectionContext\n  // instance that helps applying commands coming from master.\n  bool is_replicating = false;\n\n  bool monitor = false;  // when a monitor command is sent over a given connection, we need to aware\n                         // of it as a state for the connection\n  bool journal_emulated = false;  // whether it is used to dispatch journal commands\n\n  // Reference to a master-side FlowInfo for this connection if it is a replication connection.\n  FlowInfo* master_repl_flow = nullptr;\n\n  // The related connection is bound to main listener or serves the memcached protocol\n  bool has_main_or_memcache_listener = false;\n\n  // ACLs.\n  // The following variables represent the ACL rules of the context.\n  // Each command, before run, is authorized against those rules by\n  // IsUserAllowedToInvokeCmd(and variants) in validator.cc\n\n  // Username\n  std::string authed_username{\"default\"};\n\n  // Each entry in the list is a bitfield representing a specific command family,\n  // where each bit corresponds to an individual command within that family.\n  // Together, these entries encode the user's full ACL to commands.\n  // The index 'i' in 'acl_commands[i]' refers to the command family based on\n  // its registration order at runtime. For more details, see acl_commands_def.h.\n  std::vector<uint64_t> acl_commands;\n\n  // Keyspace. Each key referenced in a command must match (any) of the rules (globs).\n  dfly::acl::AclKeys keys;\n\n  // Pub/sub channels. Each channel referenced in a command must match (any) of the rules (globs).\n  dfly::acl::AclPubSub pub_sub;\n\n  // db index, std::numeric_limits<size_t>::max for ALL db's. Dragonfly specific extension.\n  size_t acl_db_idx = std::numeric_limits<size_t>::max();\n\n  // Skip ACL validation, used by internal commands and commands run on admin port\n  bool skip_acl_validation = false;\n\n private:\n  void EnableMonitoring(bool enable) {\n    subscriptions++;  // required to support the monitoring\n    monitor = enable;\n  }\n\n  std::vector<unsigned> ChangeSubscriptions(CmdArgList channels, bool pattern, bool to_add,\n                                            bool to_reply);\n};\n\nclass CommandContext : public facade::ParsedCommand {\n public:\n  CommandContext() = default;\n  CommandContext(facade::SinkReplyBuilder* rb, facade::ConnectionContext* conn_cntx) {\n    Init(rb, conn_cntx);\n  }\n\n  void SetupTx(const CommandId* cid, Transaction* tx) {\n    cid_ = cid;\n    tx_ = tx;\n  }\n\n  void UpdateCid(const CommandId* cid) {\n    cid_ = cid;\n  }\n\n  virtual size_t GetSize() const override {\n    return sizeof(CommandContext);\n  }\n\n  ConnectionContext* server_conn_cntx() const {\n    return static_cast<ConnectionContext*>(conn_cntx_);\n  }\n\n  void RecordLatency(facade::ArgSlice tail_args) const;\n\n  facade::Connection* conn() const {\n    return conn_cntx_->conn();\n  }\n\n  facade::SinkReplyBuilder* SwapReplier(facade::SinkReplyBuilder* new_rb) {\n    return std::exchange(rb_, new_rb);\n  }\n\n  Transaction* tx() const {\n    return tx_;\n  }\n\n  const CommandId* cid() const {\n    return cid_;\n  }\n\n  uint64_t start_time_ns = 0;\n\n  // Stores backing array for tail args slice\n  CmdArgVec arg_slice_backing;\n\n protected:\n  void ReuseInternal() final;\n\n  Transaction* tx_ = nullptr;\n  const CommandId* cid_ = nullptr;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/container_utils.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"server/container_utils.h\"\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/detail/listpack_wrap.h\"\n#include \"core/qlist.h\"\n#include \"core/sorted_map.h\"\n#include \"core/string_map.h\"\n#include \"core/string_set.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/namespaces.h\"\n#include \"server/transaction.h\"\n#include \"src/facade/op_status.h\"\n\nextern \"C\" {\n#include \"redis/intset.h\"\n#include \"redis/listpack.h\"\n#include \"redis/redis_aux.h\"\n#include \"redis/util.h\"\n}\n\nnamespace dfly::container_utils {\nusing namespace std;\nnamespace {\n\nstruct ShardFFResult {\n  PrimeKey key;\n  ShardId sid = kInvalidSid;\n};\n\n// Returns (iterator, args-index) if found, KEY_NOTFOUND otherwise.\n// If multiple keys are found, returns the first index in the ArgSlice.\nOpResult<std::pair<DbSlice::ConstIterator, unsigned>> FindFirstReadOnly(const DbSlice& db_slice,\n                                                                        const DbContext& cntx,\n                                                                        const ShardArgs& args,\n                                                                        int req_obj_type) {\n  DCHECK(!args.Empty());\n\n  for (auto it = args.begin(); it != args.end(); ++it) {\n    OpResult<DbSlice::ConstIterator> res = db_slice.FindReadOnly(cntx, *it, req_obj_type);\n    if (res)\n      return make_pair(res.value(), unsigned(it.index()));\n    if (res.status() != OpStatus::KEY_NOTFOUND)\n      return res.status();\n  }\n\n  VLOG(2) << \"FindFirst not found\";\n  return OpStatus::KEY_NOTFOUND;\n}\n\n// Find first non-empty key of a single shard transaction, pass it to `func` and return the key.\n// If no such key exists or a wrong type is found, the apropriate status is returned.\n// Optimized version of `FindFirstNonEmpty` below.\nOpResult<string> FindFirstNonEmptySingleShard(Transaction* trans, int req_obj_type,\n                                              BlockingResultCb func) {\n  DCHECK_EQ(trans->GetUniqueShardCnt(), 1u);\n  string key;\n  auto cb = [&](Transaction* t, EngineShard* shard) -> Transaction::RunnableResult {\n    ShardId sid = shard->shard_id();\n    auto args = t->GetShardArgs(sid);\n    auto ff_res = FindFirstReadOnly(t->GetDbSlice(sid), t->GetDbContext(), args, req_obj_type);\n\n    if (ff_res == OpStatus::WRONG_TYPE)\n      return OpStatus::WRONG_TYPE;\n\n    if (ff_res == OpStatus::KEY_NOTFOUND)\n      return {OpStatus::KEY_NOTFOUND, Transaction::RunnableResult::AVOID_CONCLUDING};\n\n    CHECK(ff_res.ok());  // No other errors possible\n    ff_res->first->first.GetString(&key);\n    func(t, shard, key);\n    return OpStatus::OK;\n  };\n\n  // Schedule single hop and hopefully find a key, otherwise avoid concluding\n  OpStatus status = trans->ScheduleSingleHop(cb);\n  if (status == OpStatus::OK)\n    return key;\n  return status;\n}\n\n// Find first non-empty key (sorted by order in command arguments) and return it,\n// otherwise return not found or wrong type error.\nOpResult<ShardFFResult> FindFirstNonEmpty(Transaction* trans, int req_obj_type) {\n  DCHECK_GT(trans->GetUniqueShardCnt(), 1u);\n\n  using FFResult = std::tuple<PrimeKey, unsigned, ShardId>;  // key, argument index, sid\n  VLOG(2) << \"FindFirst::Find \" << trans->DebugId();\n\n  // Holds Find results: (iterator to a found key, and its index in the passed arguments).\n  // See DbSlice::FindFirst for more details.\n  std::vector<OpResult<FFResult>> find_res(shard_set->size());\n  std::fill(find_res.begin(), find_res.end(), OpStatus::KEY_NOTFOUND);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    ShardId sid = shard->shard_id();\n    auto args = t->GetShardArgs(sid);\n    auto ff_res = FindFirstReadOnly(t->GetDbSlice(sid), t->GetDbContext(), args, req_obj_type);\n    if (ff_res) {\n      find_res[shard->shard_id()] =\n          FFResult{ff_res->first->first.AsRef(), ff_res->second, shard->shard_id()};\n    } else {\n      find_res[shard->shard_id()] = ff_res.status();\n    }\n    return OpStatus::OK;\n  };\n\n  trans->Execute(std::move(cb), false);\n\n  // If any key is of the wrong type, report it immediately\n  if (std::find(find_res.begin(), find_res.end(), OpStatus::WRONG_TYPE) != find_res.end())\n    return OpStatus::WRONG_TYPE;\n\n  // Order result by their keys position in the command arguments, push errors to back\n  auto comp = [](const OpResult<FFResult>& lhs, const OpResult<FFResult>& rhs) {\n    if (!lhs || !rhs)\n      return lhs.ok();\n    size_t i1 = std::get<1>(*lhs);\n    size_t i2 = std::get<1>(*rhs);\n    return i1 < i2;\n  };\n\n  // Find first element by the order above, so the first key. Returns error only if all are errors\n  auto it = std::min_element(find_res.begin(), find_res.end(), comp);\n  DCHECK(it != find_res.end());\n\n  if (*it == OpStatus::KEY_NOTFOUND)\n    return OpStatus::KEY_NOTFOUND;\n\n  CHECK(it->ok());  // No other errors than WRONG_TYPE and KEY_NOTFOUND\n  FFResult& res = **it;\n  return ShardFFResult{std::get<PrimeKey>(res).AsRef(), std::get<ShardId>(res)};\n}\n\n}  // namespace\n\nusing namespace std;\n\nbool IterateList(const PrimeValue& pv, const IterateFunc& func, size_t start, size_t end) {\n  DCHECK_LE(start, end);\n  bool success = true;\n  size_t len = pv.Size();\n  if (len == 0) {\n    return true;\n  }\n\n  if (end >= len) {\n    end = len - 1;\n    if (start > end) {\n      return true;\n    }\n  }\n\n  if (pv.Encoding() == kEncodingListPack) {\n    uint8_t* lp = static_cast<uint8_t*>(pv.RObjPtr());\n    uint8_t* p = lpSeek(lp, start);\n    while (p && start <= end) {\n      unsigned int slen;\n      long long lval;\n      uint8_t* vstr = lpGetValue(p, &slen, &lval);\n\n      if (vstr) {\n        success = func(ContainerEntry{reinterpret_cast<const char*>(vstr), slen});\n      } else {\n        success = func(ContainerEntry{lval});\n      }\n\n      if (!success)\n        break;\n\n      p = lpNext(lp, p);\n      start++;\n    }\n    return success;\n  }\n\n  DCHECK_EQ(pv.Encoding(), kEncodingQL2);\n  QList* ql = static_cast<QList*>(pv.RObjPtr());\n\n  ql->Iterate(\n      [&](const CollectionEntry& entry) {\n        success = func(entry);\n        return success;\n      },\n      start, end);\n  return success;\n}\n\nbool IterateSet(const PrimeValue& pv, const IterateFunc& func) {\n  bool success = true;\n  if (pv.Encoding() == kEncodingIntSet) {\n    intset* is = static_cast<intset*>(pv.RObjPtr());\n    int64_t ival;\n    int ii = 0;\n\n    while (success && intsetGet(is, ii++, &ival)) {\n      success = func(ContainerEntry{ival});\n    }\n  } else {\n    for (sds ptr : *static_cast<StringSet*>(pv.RObjPtr())) {\n      if (!func(ContainerEntry{ptr, sdslen(ptr)})) {\n        success = false;\n        break;\n      }\n    }\n  }\n\n  return success;\n}\n\nbool IterateSortedSet(const PrimeValue& pv, const IterateSortedFunc& func, size_t start, size_t end,\n                      bool reverse, bool use_score) {\n  size_t llen = pv.Size();\n  if (llen == 0)\n    return true;\n\n  if (end >= llen)\n    end = llen - 1;\n\n  if (start > end || start >= llen)\n    return true;\n\n  size_t rangelen = end - start + 1;\n\n  if (pv.Encoding() == OBJ_ENCODING_LISTPACK) {\n    uint8_t* zl = static_cast<uint8_t*>(pv.RObjPtr());\n    uint8_t *eptr, *sptr;\n    uint8_t* vstr;\n    unsigned int vlen;\n    long long vlong;\n    double score = 0.0;\n\n    if (reverse) {\n      eptr = lpSeek(zl, -2 - long(2 * start));\n    } else {\n      eptr = lpSeek(zl, 2 * start);\n    }\n    DCHECK(eptr);\n\n    sptr = lpNext(zl, eptr);\n\n    bool success = true;\n    while (success && rangelen--) {\n      DCHECK(eptr != NULL && sptr != NULL);\n      vstr = lpGetValue(eptr, &vlen, &vlong);\n\n      // don't bother to extract the score if it's gonna be ignored.\n      if (use_score)\n        score = detail::ZzlGetScore(sptr);\n\n      if (vstr == NULL) {\n        success = func(ContainerEntry{vlong}, score);\n      } else {\n        success = func(ContainerEntry{reinterpret_cast<const char*>(vstr), vlen}, score);\n      }\n\n      if (reverse) {\n        detail::ZzlPrev(zl, &eptr, &sptr);\n      } else {\n        detail::ZzlNext(zl, &eptr, &sptr);\n      };\n    }\n    return success;\n  } else {\n    CHECK_EQ(pv.Encoding(), OBJ_ENCODING_SKIPLIST);\n    auto* smap = static_cast<detail::SortedMap*>(pv.RObjPtr());\n    return smap->Iterate(start, rangelen, reverse, [&](sds ele, double score) {\n      return func(ContainerEntry{ele, sdslen(ele)}, score);\n    });\n  }\n  return false;\n}\n\nbool IterateMap(const PrimeValue& pv, const IterateKVFunc& func) {\n  bool finished = true;\n\n  if (pv.Encoding() == kEncodingListPack) {\n    detail::ListpackWrap lw{static_cast<uint8_t*>(pv.RObjPtr())};\n    for (const auto [key, val] : lw) {\n      if (!func(ContainerEntry{key.data(), key.size()}, ContainerEntry{val.data(), val.size()})) {\n        finished = false;\n        break;\n      }\n    }\n  } else {\n    StringMap* sm = static_cast<StringMap*>(pv.RObjPtr());\n    for (const auto& k_v : *sm) {\n      if (!func(ContainerEntry{k_v.first, sdslen(k_v.first)},\n                ContainerEntry{k_v.second, sdslen(k_v.second)})) {\n        finished = false;\n        break;\n      }\n    }\n  }\n  return finished;\n}\n\nStringMap* GetStringMap(const PrimeValue& pv, const DbContext& db_context) {\n  DCHECK_EQ(pv.Encoding(), kEncodingStrMap2);\n  StringMap* res = static_cast<StringMap*>(pv.RObjPtr());\n  uint32_t map_time = MemberTimeSeconds(db_context.time_now_ms);\n  res->set_time(map_time);\n  return res;\n}\n\nOpResult<string> RunCbOnFirstNonEmptyBlocking(Transaction* trans, int req_obj_type,\n                                              BlockingResultCb func, unsigned limit_ms,\n                                              bool* block_flag, bool* pause_flag) {\n  string result_key;\n\n  // Fast path. If we have only a single shard, we can run opportunistically with a single hop.\n  // If we don't find anything, we abort concluding and keep scheduled.\n  // Slow path: schedule, find results from shards, execute action if found.\n  OpResult<ShardFFResult> result;\n  if (trans->GetUniqueShardCnt() == 1) {\n    auto res = FindFirstNonEmptySingleShard(trans, req_obj_type, func);\n    if (res.ok()) {\n      return res;\n    } else {\n      result = res.status();\n    }\n  } else {\n    result = FindFirstNonEmpty(trans, req_obj_type);\n  }\n\n  // If a non-empty key exists, execute the callback immediately\n  if (result.ok()) {\n    auto cb = [&](Transaction* t, EngineShard* shard) {\n      if (shard->shard_id() == result->sid) {\n        result->key.GetString(&result_key);\n        func(t, shard, result_key);\n      }\n      return OpStatus::OK;\n    };\n    trans->Execute(std::move(cb), true);\n    return result_key;\n  }\n\n  // Abort on possible errors: wrong type, etc\n  if (result.status() != OpStatus::KEY_NOTFOUND) {\n    trans->Conclude();\n    return result.status();\n  }\n\n  // Multi transactions are not allowed to block\n  if (trans->IsMulti()) {\n    trans->Conclude();\n    return OpStatus::TIMED_OUT;\n  }\n\n  DCHECK(trans->IsScheduled());  // single shard optimization didn't forget to schedule\n  VLOG(1) << \"Blocking \" << trans->DebugId();\n\n  // If timeout (limit_ms) is zero, block indefinitely\n  auto limit_tp = Transaction::time_point::max();\n  if (limit_ms > 0) {\n    using namespace std::chrono;\n    limit_tp = steady_clock::now() + milliseconds(limit_ms);\n  }\n\n  auto* ns = &trans->GetNamespace();\n  const auto key_checker = [req_obj_type, ns](EngineShard* owner, const DbContext& context,\n                                              Transaction*, std::string_view key) -> bool {\n    return ns->GetDbSlice(owner->shard_id()).FindReadOnly(context, key, req_obj_type).ok();\n  };\n\n  auto status =\n      trans->WaitOnWatch(limit_tp, Transaction::kShardArgs, key_checker, block_flag, pause_flag);\n\n  if (status != OpStatus::OK)\n    return status;\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    if (auto wake_key = t->GetWakeKey(shard->shard_id()); wake_key) {\n      result_key = *wake_key;\n      func(t, shard, result_key);\n    }\n    return OpStatus::OK;\n  };\n  trans->Execute(std::move(cb), true);\n  return result_key;\n}\n\n}  // namespace dfly::container_utils\n"
  },
  {
    "path": "src/server/container_utils.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include \"base/logging.h\"\n#include \"core/collection_entry.h\"\n#include \"core/compact_object.h\"\n#include \"facade/op_status.h\"\n#include \"server/table.h\"\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n}\n\n#include <functional>\n\nnamespace dfly {\n\nclass StringMap;\n\nnamespace container_utils {\n\n// IsContainer returns true if the iterator points to a container type.\ninline bool IsContainer(const PrimeValue& pv) {\n  unsigned type = pv.ObjType();\n  return (type == OBJ_LIST || type == OBJ_SET || type == OBJ_ZSET);\n}\n\nusing ContainerEntry = CollectionEntry;\n\nusing IterateFunc = std::function<bool(ContainerEntry)>;\nusing IterateSortedFunc = std::function<bool(ContainerEntry, double)>;\nusing IterateKVFunc = std::function<bool(ContainerEntry, ContainerEntry)>;\n\n// Iterate over all values in [start, end] range (inclusive) and call func(val).\n// Iteration stops as soon\n// as func return false. Returns true if it successfully processed all elements\n// without breaking.\nbool IterateList(const PrimeValue& pv, const IterateFunc& func, size_t start = 0,\n                 size_t end = SIZE_MAX);\n\n// Iterate over all values and call func(val). Iteration stops as soon\n// as func return false. Returns true if it successfully processed all elements\n// without stopping.\nbool IterateSet(const PrimeValue& pv, const IterateFunc& func);\n\n// Iterate over all values and call func(val). Iteration stops as soon\n// as func return false. Returns true if it successfully processed all elements\n// without stopping.\nbool IterateSortedSet(const PrimeValue& pv, const IterateSortedFunc& func, size_t start = 0,\n                      size_t end = SIZE_MAX, bool reverse = false, bool use_score = false);\n\nbool IterateMap(const PrimeValue& pv, const IterateKVFunc& func);\n\n// Get StringMap pointer from primetable value. Sets expire time from db_context\nStringMap* GetStringMap(const PrimeValue& pv, const DbContext& db_context);\n\nusing BlockingResultCb =\n    std::function<void(Transaction*, EngineShard*, std::string_view /* key */)>;\n\n// Block until a any key of the transaction becomes non-empty and executes the callback.\n// If multiple keys are non-empty when this function is called, the callback is executed\n// immediately with the first key listed in the tx arguments.\nfacade::OpResult<std::string> RunCbOnFirstNonEmptyBlocking(Transaction* trans, int req_obj_type,\n                                                           BlockingResultCb cb, unsigned limit_ms,\n                                                           bool* block_flag, bool* pause_flag);\n\n};  // namespace container_utils\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/db_slice.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/db_slice.h\"\n\n#include \"core/dense_set.h\"\n\nextern \"C\" {\n#include \"redis/hyperloglog.h\"\n}\n\n#include <absl/cleanup/cleanup.h>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/top_keys.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"search/doc_index.h\"\n#include \"server/channel_store.h\"\n#include \"server/cluster/slot_set.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/journal/journal.h\"\n#include \"server/server_state.h\"\n#include \"server/tiered_storage.h\"\n#include \"strings/human_readable.h\"\n#include \"util/fibers/fibers.h\"\n#include \"util/fibers/stacktrace.h\"\n\nABSL_FLAG(uint32_t, max_eviction_per_heartbeat, 100,\n          \"The maximum number of key-value pairs that will be deleted in each eviction \"\n          \"when heartbeat based eviction is triggered under memory pressure.\");\n\nABSL_FLAG(uint32_t, max_segment_to_consider, 4,\n          \"The maximum number of dashtable segments to scan in each eviction \"\n          \"when heartbeat based eviction is triggered under memory pressure.\");\n\nABSL_FLAG(double, table_growth_margin, 0.4,\n          \"Prevents table from growing if number of free slots x average object size x this ratio \"\n          \"is larger than memory budget.\");\n\nABSL_FLAG(std::string, notify_keyspace_events, \"\",\n          \"notify-keyspace-events. Only Ex is supported for now\");\n\nABSL_FLAG(bool, cluster_flush_decommit_memory, false, \"Decommit memory after flushing slots\");\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace util;\nusing absl::GetFlag;\nusing namespace facade;\nusing Payload = journal::Entry::Payload;\n\nnamespace {\n\nconstexpr auto kPrimeSegmentSize = PrimeTable::kSegBytes;\n\n// mi_malloc good size is 32768. i.e. we have malloc waste of 1.5%.\nstatic_assert(kPrimeSegmentSize <= 32304);\n\nvoid AccountObjectMemory(string_view key, unsigned type, int64_t size, DbTable* db) {\n  DCHECK_NE(db, nullptr);\n  if (size == 0)\n    return;\n\n  DbTableStats& stats = db->stats;\n\n  stats.AddTypeMemoryUsage(type, size);\n\n  if (db->slots_stats) {\n    db->slots_stats[KeySlot(key)].memory_bytes += size;\n  }\n}\n\nclass PrimeEvictionPolicy {\n public:\n  static constexpr bool can_evict = true;  // we implement eviction functionality.\n  static constexpr bool can_gc = true;\n\n  // mem_offset - memory_offset that we should account for in addition to DbSlice::memory_budget.\n  // May be negative.\n  PrimeEvictionPolicy(const DbContext& cntx, bool can_evict, ssize_t mem_offset, ssize_t soft_limit,\n                      DbSlice* db_slice, bool apply_memory_limit)\n      : db_slice_(db_slice),\n        mem_offset_(mem_offset),\n        soft_limit_(soft_limit),\n        cntx_(cntx),\n        can_evict_(can_evict),\n        apply_memory_limit_(apply_memory_limit) {\n  }\n\n  // A hook function that is called every time a segment is full and requires splitting.\n  void RecordSplit(PrimeTable::Segment_t* segment) {\n    DVLOG(2) << \"split: \" << segment->SlowSize() << \"/\" << segment->capacity();\n  }\n  void OnMove(PrimeTable::Cursor source, PrimeTable::Cursor dest) {\n    moved_items_.push_back(std::make_pair(source, dest));\n  }\n\n  bool CanGrow(const PrimeTable& tbl) const;\n\n  unsigned GarbageCollect(const PrimeTable::HotBuckets& eb, PrimeTable* me);\n  unsigned Evict(const PrimeTable::HotBuckets& eb, PrimeTable* me);\n\n  unsigned evicted() const {\n    return evicted_;\n  }\n\n  unsigned checked() const {\n    return checked_;\n  }\n  const DbSlice::MovedItemsVec& moved_items() {\n    return moved_items_;\n  }\n\n private:\n  DbSlice::MovedItemsVec moved_items_;\n  DbSlice* db_slice_;\n  ssize_t mem_offset_;\n  ssize_t soft_limit_ = 0;\n  const DbContext cntx_;\n\n  unsigned evicted_ = 0;\n  unsigned checked_ = 0;\n\n  // unlike static constexpr can_evict, this parameter tells whether we can evict\n  // items in runtime.\n  const bool can_evict_;\n  const bool apply_memory_limit_;\n};\n\nbool PrimeEvictionPolicy::CanGrow(const PrimeTable& tbl) const {\n  ssize_t mem_available = db_slice_->memory_budget() + mem_offset_;\n  if (!apply_memory_limit_ || mem_available > soft_limit_)\n    return true;\n\n  DCHECK_LE(tbl.size(), tbl.capacity());\n  DCHECK_GT(tbl.size(), 0u);\n\n  // We take a conservative stance here -\n  // we estimate how much memory we will take with the current capacity\n  // even though we may currently use less memory.\n  // see https://github.com/dragonflydb/dragonfly/issues/256#issuecomment-1227095503\n  size_t table_free_items = ((tbl.capacity() - tbl.size()) + PrimeTable::kSegCapacity);\n\n  size_t obj_memory_usage = db_slice_->GetDBTable(cntx_.db_index)->stats.obj_memory_usage;\n  size_t avg_obj_size = obj_memory_usage / tbl.size();\n\n  // Catch significant discrepancies in average object size estimation.\n  // Note that this may happen if for example, db0 hosts a lot of small keys,\n  // db1 hosts huge keys etc. The goal of this comparison is to detect these cases and\n  // confirm that discrepancy is justified. Once we gather empirical evidence,\n  // we can remove this check and drop `db_slice_->bytes_per_object()` computation entirely.\n  if (avg_obj_size * 20 < db_slice_->bytes_per_object() ||\n      avg_obj_size > db_slice_->bytes_per_object() * 20) {\n    LOG_EVERY_T(WARNING, 1) << \"Avg object size estimation for the table is \" << avg_obj_size\n                            << \" vs \"\n                            << \" overall object size estimation \" << db_slice_->bytes_per_object();\n  }\n  size_t obj_bytes_estimation =\n      (avg_obj_size * table_free_items) * GetFlag(FLAGS_table_growth_margin);\n\n  bool can_grow = mem_available > int64_t(PrimeTable::kSegBytes + obj_bytes_estimation);\n  if (can_grow) {\n    VLOG(1) << \"free_items: \" << table_free_items << \", obj_bytes: \" << avg_obj_size << \" vs \"\n            << db_slice_->bytes_per_object() << \" \"\n            << \" mem_available: \" << mem_available;\n  } else {\n    LOG_EVERY_T(INFO, 1) << \"Can't grow, free_items \" << table_free_items\n                         << \", obj_bytes: \" << avg_obj_size << \" vs \"\n                         << db_slice_->bytes_per_object() << \" \"\n                         << \" mem_available: \" << mem_available;\n  }\n\n  return can_grow;\n}\n\nunsigned PrimeEvictionPolicy::GarbageCollect(const PrimeTable::HotBuckets& eb, PrimeTable* me) {\n  unsigned res = 0;\n\n  if (db_slice_->WillBlockOnJournalWrite()) {\n    return res;\n  }\n\n  // Disable flush journal changes to prevent preemtion in GarbageCollect.\n  journal::DisableFlushGuard journal_flush_guard(db_slice_->shard_owner()->journal());\n\n  // bool should_print = (eb.key_hash % 128) == 0;\n\n  // based on tests - it's more efficient to pass regular buckets to gc.\n  // stash buckets are filled last so much smaller change they have expired items.\n  string scratch;\n  unsigned num_buckets =\n      std::min<unsigned>(PrimeTable::HotBuckets::kRegularBuckets, eb.num_buckets);\n  for (unsigned i = 0; i < num_buckets; ++i) {\n    auto bucket_it = eb.at(i);\n    for (; !bucket_it.is_done(); ++bucket_it) {\n      if (bucket_it->first.HasExpire()) {\n        string_view key = bucket_it->first.GetSlice(&scratch);\n        ++checked_;\n        auto [prime_it, exp_it] = db_slice_->ExpireIfNeeded(\n            cntx_, DbSlice::Iterator(bucket_it, StringOrView::FromView(key)));\n        if (prime_it.is_done())\n          ++res;\n      }\n    }\n  }\n\n  return res;\n}\n\nunsigned PrimeEvictionPolicy::Evict(const PrimeTable::HotBuckets& eb, PrimeTable* me) {\n  if (!can_evict_ || db_slice_->WillBlockOnJournalWrite())\n    return 0;\n\n  // Disable flush journal changes to prevent preemtion in evict.\n  journal::DisableFlushGuard journal_flush_guard(db_slice_->shard_owner()->journal());\n\n  constexpr size_t kNumStashBuckets = ABSL_ARRAYSIZE(eb.probes.by_type.stash_buckets);\n\n  // choose \"randomly\" a stash bucket to evict an item.\n  auto bucket_it = eb.probes.by_type.stash_buckets[eb.key_hash % kNumStashBuckets];\n  auto last_slot_it = bucket_it;\n  last_slot_it += (PrimeTable::kSlotNum - 1);\n  if (!last_slot_it.is_done()) {\n    // don't evict sticky items\n    if (last_slot_it->first.IsSticky()) {\n      return 0;\n    }\n\n    DbTable* table = db_slice_->GetDBTable(cntx_.db_index);\n    auto& lt = table->trans_locks;\n    string scratch;\n    string_view key = last_slot_it->first.GetSlice(&scratch);\n    // do not evict locked keys\n    if (lt.Find(LockTag(key)).has_value())\n      return 0;\n\n    // log the evicted keys to journal.\n    if (auto journal = db_slice_->shard_owner()->journal(); journal) {\n      RecordExpiryBlocking(cntx_.db_index, key);\n    }\n    db_slice_->Del(cntx_, DbSlice::Iterator(last_slot_it, StringOrView::FromView(key)));\n\n    ++evicted_;\n  }\n  me->ShiftRight(bucket_it);\n\n  return 1;\n}\n\nclass AsyncDeleter {\n public:\n  static void EnqueDeletion(uint32_t next, DenseSet* ds);\n  static void Shutdown();\n\n private:\n  static constexpr uint32_t kClearStepSize = 1024;\n  struct ClearNode {\n    DenseSet* ds;\n    uint32_t cursor;\n    ClearNode* next;\n\n    ClearNode(DenseSet* d, uint32_t c, ClearNode* n) : ds(d), cursor(c), next(n) {\n    }\n  };\n\n  // Asynchronously deletes entries during the cpu-idle time.\n  static int32_t IdleCb();\n\n  // We add async deletion requests to a linked list and process them asynchronously\n  // in each thread.\n  static __thread ClearNode* head_;\n};\n\n__thread AsyncDeleter::ClearNode* AsyncDeleter::head_ = nullptr;\n\nvoid AsyncDeleter::EnqueDeletion(uint32_t next, DenseSet* ds) {\n  bool launch_task = (head_ == nullptr);\n\n  // register ds\n  head_ = new ClearNode{ds, next, head_};\n  ProactorBase* pb = ProactorBase::me();\n  DCHECK(pb);\n  DVLOG(2) << \"Adding async deletion task, thread \" << pb->GetPoolIndex() << \" \" << launch_task;\n  if (launch_task) {\n    pb->AddOnIdleTask(&IdleCb);\n  }\n}\n\nvoid AsyncDeleter::Shutdown() {\n  // we do not bother with deleting objects scheduled for asynchronous deletion\n  // during the shutdown. this should work well because we destroy mimalloc heap anyways.\n  while (head_) {\n    auto* next = head_->next;\n    delete head_;\n    head_ = next;\n  }\n}\n\nint32_t AsyncDeleter::IdleCb() {\n  if (head_ == nullptr)\n    return -1;  // unregister itself.\n\n  auto* current = head_;\n\n  DVLOG(2) << \"IdleCb \" << current->cursor;\n  uint32_t next = current->ds->ClearStep(current->cursor, kClearStepSize);\n  if (next == current->ds->BucketCount()) {  // reached the end.\n    CompactObj::DeleteMR<DenseSet>(current->ds);\n    head_ = current->next;\n    delete current;\n  } else {\n    current->cursor = next;\n  }\n  return ProactorBase::kOnIdleMaxLevel;\n};\n\ninline void TouchTopKeysIfNeeded(string_view key, DbTable::SampleTopKeys* sample) {\n  if (sample) {\n    sample->top_keys->Touch(key);\n    ++sample->total_samples;\n  }\n}\n\ninline void TouchHllIfNeeded(string_view key, DbTable::SampleUniqueKeys* sample) {\n  if (sample) {\n    HllBufferPtr hll_buf;\n    hll_buf.size = getDenseHllSize();\n    hll_buf.hll = sample->dense_hll;\n    pfadd_dense(hll_buf, reinterpret_cast<const uint8_t*>(key.data()), key.size());\n    ++sample->total_samples;\n  }\n}\n\ninline void TouchValuesHistogramIfNeeded(const PrimeValue& pv, base::Histogram* hist) {\n  if (hist) {\n    hist->Add(pv.Size());\n  }\n}\n\ninline bool MayDeleteAsynchronously(const PrimeValue& pv) {\n  unsigned obj_type = pv.ObjType();\n  return (obj_type == OBJ_SET || obj_type == OBJ_HASH) && pv.Encoding() == kEncodingStrMap2;\n}\n\n}  // namespace\n\n#define ADD(x) (x) += o.x\n\nDbStats& DbStats::operator+=(const DbStats& o) {\n  constexpr size_t kDbSz = sizeof(DbStats) - sizeof(DbTableStats);\n  static_assert(kDbSz == 24);\n\n  DbTableStats::operator+=(o);\n\n  ADD(key_count);\n  ADD(prime_capacity);\n  ADD(table_mem_usage);\n\n  return *this;\n}\n\nSliceEvents& SliceEvents::operator+=(const SliceEvents& o) {\n  static_assert(sizeof(SliceEvents) == 136, \"You should update this function with new fields\");\n\n  ADD(evicted_keys);\n  ADD(hard_evictions);\n  ADD(expired_keys);\n  ADD(garbage_collected);\n  ADD(stash_unloaded);\n  ADD(bumpups);\n  ADD(garbage_checked);\n  ADD(hits);\n  ADD(misses);\n  ADD(mutations);\n  ADD(insertion_rejections);\n  ADD(update);\n  ADD(ram_hits);\n  ADD(ram_cool_hits);\n  ADD(ram_misses);\n  ADD(huff_encode_total);\n  ADD(huff_encode_success);\n  return *this;\n}\n\n#undef ADD\n\nclass DbSlice::PrimeBumpPolicy {\n public:\n  bool CanBump(const CompactObj& obj) const {\n    return !obj.IsSticky();\n  }\n  void OnMove(PrimeTable::Cursor source, PrimeTable::Cursor dest) {\n    moved_items_.push_back(std::make_pair(source, dest));\n  }\n\n  const DbSlice::MovedItemsVec& moved_items() {\n    return moved_items_;\n  }\n\n private:\n  DbSlice::MovedItemsVec moved_items_;\n};\n\nDbSlice::DbSlice(uint32_t index, bool cache_mode, EngineShard* owner)\n    : shard_id_(index),\n      cache_mode_(cache_mode),\n      owner_(owner),\n      client_tracking_map_(owner->memory_resource()) {\n  db_arr_.emplace_back();\n  CreateDb(0);\n  expire_base_[0] = expire_base_[1] = 0;\n\n  std::string keyspace_events = GetFlag(FLAGS_notify_keyspace_events);\n  if (!keyspace_events.empty() && keyspace_events != \"Ex\") {\n    LOG(ERROR) << \"Only Ex is currently supported\";\n    exit(0);\n  }\n  expired_keys_events_recording_ = !keyspace_events.empty();\n}\n\nDbSlice::~DbSlice() {\n  // we do not need this code but it's easier to debug in case we encounter\n  // memory allocation bugs during delete operations.\n\n  for (auto& db : db_arr_) {\n    if (!db)\n      continue;\n    db.reset();\n  }\n\n  AsyncDeleter::Shutdown();\n}\n\nauto DbSlice::GetStats() const -> Stats {\n  Stats s;\n  s.events = events_;\n  s.db_stats.resize(db_arr_.size());\n\n  for (size_t i = 0; i < db_arr_.size(); ++i) {\n    if (!db_arr_[i])\n      continue;\n    const auto& db_wrap = *db_arr_[i];\n    DbStats& stats = s.db_stats[i];\n    stats = db_wrap.stats;\n    stats.key_count = db_wrap.prime.size();\n    stats.prime_capacity = db_wrap.prime.capacity();\n    stats.table_mem_usage = db_wrap.table_memory();\n  }\n  auto co_stats = CompactObj::GetStatsThreadLocal();\n  s.small_string_bytes = co_stats.small_string_bytes;\n  s.events.huff_encode_total = co_stats.huff_encode_total;\n  s.events.huff_encode_success = co_stats.huff_encode_success;\n\n  return s;\n}\n\nSlotStats DbSlice::GetSlotStats(SlotId sid) const {\n  CHECK(db_arr_[0]);\n  return db_arr_[0]->slots_stats[sid];\n}\n\nvoid DbSlice::Reserve(DbIndex db_ind, size_t key_size) {\n  ActivateDb(db_ind);\n\n  auto& db = db_arr_[db_ind];\n  DCHECK(db);\n\n  db->prime.Reserve(key_size);\n}\n\nDbSlice::AutoUpdater::AutoUpdater() {\n}\n\nDbSlice::AutoUpdater::AutoUpdater(AutoUpdater&& o) noexcept {\n  *this = std::move(o);\n}\n\nDbSlice::AutoUpdater& DbSlice::AutoUpdater::operator=(AutoUpdater&& o) noexcept {\n  Run();\n  fields_ = o.fields_;\n  o.Cancel();\n  return *this;\n}\n\nDbSlice::AutoUpdater::~AutoUpdater() {\n  Run();\n}\n\nvoid DbSlice::AutoUpdater::ReduceHeapUsage() {\n  AccountObjectMemory(fields_.key, fields_.it->second.ObjType(), -fields_.orig_value_heap_size,\n                      fields_.db_slice->GetDBTable(fields_.db_ind));\n  fields_.orig_value_heap_size = 0;  // Reset to avoid double accounting.\n}\n\nvoid DbSlice::AutoUpdater::Run() {\n  if (fields_.db_slice == nullptr) {\n    return;\n  }\n\n  // Check that AutoUpdater does not run after a key was removed.\n  // If this CHECK() failed for you, it probably means that you deleted a key while having an auto\n  // updater in scope. You'll probably want to call Run() (or Cancel() - but be careful).\n  DCHECK(IsValid(fields_.db_slice->db_arr_[fields_.db_ind]->prime.Find(fields_.key)));\n\n  CHECK_NE(fields_.db_slice, nullptr);\n\n  ssize_t delta = static_cast<int64_t>(fields_.it->second.MallocUsed()) -\n                  static_cast<int64_t>(fields_.orig_value_heap_size);\n  AccountObjectMemory(fields_.key, fields_.it->second.ObjType(), delta,\n                      fields_.db_slice->GetDBTable(fields_.db_ind));\n  fields_.db_slice->PostUpdate(fields_.db_ind, fields_.key);\n  Cancel();  // Reset to not run again\n}\n\nvoid DbSlice::AutoUpdater::Cancel() {\n  this->fields_ = {};\n}\n\nDbSlice::AutoUpdater::AutoUpdater(DbIndex db_ind, std::string_view key, const Iterator& it,\n                                  DbSlice* db_slice)\n    : fields_{.db_slice = db_slice,\n              .db_ind = db_ind,\n              .it = it,\n              .key = key,\n              .orig_value_heap_size = it->second.MallocUsed()} {\n  DCHECK(IsValid(it));\n}\n\nDbSlice::ItAndUpdater DbSlice::FindMutable(const Context& cntx, string_view key) {\n  return std::move(FindMutableInternal(cntx, key, std::nullopt).value());\n}\n\nOpResult<DbSlice::ItAndUpdater> DbSlice::FindMutable(const Context& cntx, string_view key,\n                                                     unsigned req_obj_type) {\n  return FindMutableInternal(cntx, key, req_obj_type);\n}\n\nOpResult<DbSlice::ItAndUpdater> DbSlice::FindMutableInternal(const Context& cntx, string_view key,\n                                                             std::optional<unsigned> req_obj_type) {\n  auto res = FindInternal(cntx, key, req_obj_type, UpdateStatsMode::kMutableStats);\n  if (!res.ok()) {\n    return res.status();\n  }\n\n  auto it = Iterator(res->it, StringOrView::FromView(key));\n  auto exp_it = ExpIterator(res->exp_it, StringOrView::FromView(key));\n  PreUpdateBlocking(cntx.db_index, it);\n  // PreUpdate() might have caused a deletion of `it`\n  if (res->it.IsOccupied()) {\n    DCHECK_GE(db_arr_[cntx.db_index]->stats.obj_memory_usage, res->it->second.MallocUsed());\n\n    return {{it, exp_it, AutoUpdater{cntx.db_index, key, it, this}}};\n  } else {\n    return OpStatus::KEY_NOTFOUND;\n  }\n}\n\nDbSlice::ItAndExpConst DbSlice::FindReadOnly(const Context& cntx, std::string_view key) const {\n  auto res = FindInternal(cntx, key, std::nullopt, UpdateStatsMode::kReadStats);\n  return {ConstIterator(res->it, StringOrView::FromView(key)),\n          ExpConstIterator(res->exp_it, StringOrView::FromView(key))};\n}\n\nOpResult<DbSlice::ConstIterator> DbSlice::FindReadOnly(const Context& cntx, string_view key,\n                                                       unsigned req_obj_type) const {\n  auto res = FindInternal(cntx, key, req_obj_type, UpdateStatsMode::kReadStats);\n  if (res.ok()) {\n    return ConstIterator(res->it, StringOrView::FromView(key));\n  }\n  return res.status();\n}\n\nauto DbSlice::FindInternal(const Context& cntx, string_view key, optional<unsigned> req_obj_type,\n                           UpdateStatsMode stats_mode) const -> OpResult<PrimeItAndExp> {\n  if (!IsDbValid(cntx.db_index)) {  // Can it even happen?\n    LOG(DFATAL) << \"Invalid db index \" << cntx.db_index;\n    return OpStatus::KEY_NOTFOUND;\n  }\n\n  auto& db = *db_arr_[cntx.db_index];\n  PrimeItAndExp res;\n  res.it = db.prime.Find(key);\n  int miss_weight = (stats_mode == UpdateStatsMode::kReadStats);\n\n  if (!IsValid(res.it)) {\n    events_.misses += miss_weight;\n    db.stats.events.misses += miss_weight;\n    return OpStatus::KEY_NOTFOUND;\n  }\n\n  TouchTopKeysIfNeeded(key, db.sample_top_keys);\n  TouchHllIfNeeded(key, db.sample_unique_keys);\n  TouchValuesHistogramIfNeeded(res.it->second, db.sample_values_hist);\n\n  if (req_obj_type.has_value() && res.it->second.ObjType() != req_obj_type.value()) {\n    events_.misses += miss_weight;\n    db.stats.events.misses += miss_weight;\n    return OpStatus::WRONG_TYPE;\n  }\n\n  if (res.it->first.HasExpire()) {  // check expiry state\n    res = ExpireIfNeeded(cntx, res.it);\n    if (!IsValid(res.it)) {\n      events_.misses += miss_weight;\n      db.stats.events.misses += miss_weight;\n      return OpStatus::KEY_NOTFOUND;\n    }\n  }\n\n  DCHECK(IsValid(res.it));\n\n  if (IsCacheMode()) {\n    fetched_items_.insert({res.it->first.HashCode(), cntx.db_index});\n  }\n\n  switch (stats_mode) {\n    case UpdateStatsMode::kMutableStats:\n      events_.mutations++;\n      break;\n    case UpdateStatsMode::kReadStats:\n      events_.hits++;\n      db.stats.events.hits++;\n      if (db.slots_stats) {\n        db.slots_stats[KeySlot(key)].total_reads++;\n      }\n      if (res.it->second.IsExternal()) {\n        if (res.it->second.IsCool())\n          events_.ram_cool_hits++;\n        else\n          events_.ram_misses++;\n      } else {\n        events_.ram_hits++;\n      }\n      break;\n  }\n\n  auto& pv = res.it->second;\n\n  // Cancel any pending stashes of looked up values\n  // Rationale: we either look it up for reads - and then it's hot, or alternatively,\n  // we follow up with modifications, so the pending stash becomes outdated.\n  if (pv.HasStashPending()) {\n    owner_->tiered_storage()->CancelStash(cntx.db_index, key, &pv);\n  }\n\n  // Fetch back cool items\n  if (pv.IsExternal() && pv.IsCool()) {\n    pv = owner_->tiered_storage()->Warmup(cntx.db_index, pv.GetCool());\n  }\n\n  // Mark this entry as being looked up. We use key (first) deliberately to preserve the hotness\n  // attribute of the entry in case of value overrides.\n  res.it->first.SetTouched(true);\n\n  return res;\n}\n\nOpResult<DbSlice::ItAndUpdater> DbSlice::AddOrFind(const Context& cntx, string_view key,\n                                                   std::optional<unsigned> req_obj_type) {\n  return AddOrFindInternal(cntx, key, req_obj_type);\n}\n\nOpResult<DbSlice::ItAndUpdater> DbSlice::AddOrFindInternal(const Context& cntx, string_view key,\n                                                           std::optional<unsigned> req_obj_type) {\n  DCHECK(IsDbValid(cntx.db_index));\n\n  DbTable& db = *db_arr_[cntx.db_index];\n  auto res = FindInternal(cntx, key, req_obj_type, UpdateStatsMode::kMutableStats);\n\n  if (res.ok()) {\n    Iterator it(res->it, StringOrView::FromView(key));\n    ExpIterator exp_it(res->exp_it, StringOrView::FromView(key));\n    PreUpdateBlocking(cntx.db_index, it);\n\n    // PreUpdate() might have caused a deletion of `it`\n    if (res->it.IsOccupied()) {\n      return ItAndUpdater{\n          .it = it, .exp_it = exp_it, .post_updater{cntx.db_index, key, it, this}, .is_new = false};\n    } else {\n      res = OpStatus::KEY_NOTFOUND;\n    }\n  } else if (res == OpStatus::WRONG_TYPE) {\n    return OpStatus::WRONG_TYPE;\n  }\n\n  auto status = res.status();\n  CHECK(status == OpStatus::KEY_NOTFOUND || status == OpStatus::OUT_OF_MEMORY) << status;\n\n  // It's a new entry.\n  CallChangeCallbacks(cntx.db_index, ChangeReq{key});\n\n  ssize_t memory_offset = -key.size();\n  size_t reclaimed = 0;\n  // If we are low on memory due to cold storage, free some memory.\n  if (owner_->tiered_storage()) {\n    // At least 40KB bytes to cover potential segment split.\n    ssize_t red_line = std::max<size_t>(key.size() * 2, 40_KB);\n    if (memory_budget_ < red_line) {\n      size_t goal = red_line - memory_budget_;\n      reclaimed = owner_->tiered_storage()->ReclaimMemory(goal);\n      memory_budget_ += reclaimed;\n    }\n\n    // CoolMemoryUsage is the memory that we can always reclaim, like in the block above,\n    // therefore we include it for PrimeEvictionPolicy considerations.\n    memory_offset += owner_->tiered_storage()->CoolMemoryUsage();\n  }\n\n  // In case we are loading from rdb file or replicating we want to disable conservative memory\n  // checks (inside PrimeEvictionPolicy::CanGrow) and reject insertions only after we pass max\n  // memory limit. When loading a snapshot created by the same server configuration (memory and\n  // number of shards) we will create a different dash table segment directory tree, because the\n  // tree shape is related to the order of entries insertion. Therefore when loading data from\n  // snapshot or from replication the conservative memory checks might fail as the new tree might\n  // have more segments. Because we dont want to fail loading a snapshot from the same server\n  // configuration we disable this checks on loading and replication.\n  bool apply_memory_limit =\n      !owner_->IsReplica() && !(ServerState::tlocal()->gstate() == GlobalState::LOADING);\n\n  // If we are over limit in non-cache scenario, just be conservative and throw.\n  if (apply_memory_limit && !IsCacheMode() && memory_budget_ + memory_offset < 0) {\n    LOG_EVERY_T(WARNING, 1) << \"AddOrFind: over limit, budget: \" << memory_budget_\n                            << \" reclaimed: \" << reclaimed << \" offset: \" << memory_offset;\n    events_.insertion_rejections++;\n    return OpStatus::OUT_OF_MEMORY;\n  }\n\n  ssize_t soft_budget_limit =\n      (0.3 * max_memory_limit.load(memory_order_relaxed)) / shard_set->size();\n  PrimeEvictionPolicy evp{cntx,          (IsCacheMode() && !owner_->IsReplica()),\n                          memory_offset, soft_budget_limit,\n                          this,          apply_memory_limit};\n\n  // Fast-path if change_cb_ is empty so we Find or Add using\n  // the insert operation: twice more efficient.\n  PrimeIterator it;\n\n  ssize_t table_before = db.prime.mem_usage();\n\n  try {\n    it = db.prime.InsertNew(key, PrimeValue{}, evp);\n  } catch (bad_alloc& e) {\n    LOG_EVERY_T(WARNING, 1) << \"AddOrFind: InsertNew failed, budget: \" << memory_budget_\n                            << \" reclaimed: \" << reclaimed << \" offset: \" << memory_offset;\n    events_.insertion_rejections++;\n    return OpStatus::OUT_OF_MEMORY;\n  }\n  CallMovedCallbacks(cntx.db_index, evp.moved_items());\n\n  events_.mutations++;\n  ssize_t table_increase = db.prime.mem_usage() - table_before;\n  memory_budget_ -= table_increase;\n\n  if (memory_budget_ < 0 && apply_memory_limit) {\n    // We may reach the state when our memory usage is below the limit even if we\n    // do not add new segments. For example, we have half full segments\n    // and we add new objects or update the existing ones and our memory usage grows.\n    // We do not require for a single operation to unload the whole negative debt.\n    // Instead, we create a positive, converging force that should help with freeing enough memory.\n    // Free at least K bytes or 3% of the total debt.\n    // TODO: to reenable and optimize this - this call significantly slows down server\n    // when evictions are running.\n#if 0\n    size_t evict_goal = std::max<size_t>(512, (-evp.mem_budget()) / 32);\n    auto [items, bytes] = FreeMemWithEvictionStep(cntx.db_index, it.segment_id(), evict_goal);\n    events_.hard_evictions += items;\n#endif\n  }\n\n  table_memory_ += table_increase;\n  entries_count_++;\n\n  if (it->first.IsInline()) {\n    ++db.stats.inline_keys;\n  } else {\n    AccountObjectMemory(key, OBJ_KEY, it->first.MallocUsed(), &db);  // Account for key\n  }\n\n  DCHECK_EQ(it->second.MallocUsed(), 0UL);  // Make sure accounting is no-op\n  it.SetVersion(NextVersion());\n\n  TouchTopKeysIfNeeded(key, db.sample_top_keys);\n  TouchHllIfNeeded(key, db.sample_unique_keys);\n\n  events_.garbage_collected = db.prime.garbage_collected();\n  events_.stash_unloaded = db.prime.stash_unloaded();\n  events_.evicted_keys += evp.evicted();\n  db.stats.events.evicted_keys += evp.evicted();\n  events_.garbage_checked += evp.checked();\n  if (db.slots_stats) {\n    SlotId sid = KeySlot(key);\n    db.slots_stats[sid].key_count += 1;\n  }\n\n  return ItAndUpdater{\n      .it = Iterator(it, StringOrView::FromView(key)),\n      .exp_it = ExpIterator{},\n      .post_updater{cntx.db_index, key, Iterator(it, StringOrView::FromView(key)), this},\n      .is_new = true};\n}\n\nvoid DbSlice::ActivateDb(DbIndex db_ind) {\n  if (db_arr_.size() <= db_ind)\n    db_arr_.resize(db_ind + 1);\n  CreateDb(db_ind);\n}\n\nvoid DbSlice::Del(Context cntx, Iterator it, DbTable* db_table, bool async) {\n  CHECK(IsValid(it));\n\n  ExpIterator exp_it;\n  DbTable* table = db_table ? db_table : db_arr_[cntx.db_index].get();\n  auto obj_type = it->second.ObjType();\n\n  if (doc_del_cb_ && (obj_type == OBJ_JSON || obj_type == OBJ_HASH)) {\n    string tmp;\n    string_view key = it->first.GetSlice(&tmp);\n    doc_del_cb_(key, cntx, it->second);\n  }\n\n  PerformDeletionAtomic(it, exp_it, table, async);\n}\n\nvoid DbSlice::DelMutable(Context cntx, ItAndUpdater it_updater) {\n  it_updater.post_updater.Run();\n  Del(cntx, it_updater.it);\n}\n\nvoid DbSlice::FlushSlotsFb(const cluster::SlotSet& slot_ids) {\n  VLOG(1) << \"Start FlushSlotsFb\";\n  // Slot deletion can take time as it traverses all the database, hence it runs in fiber.\n  // We want to flush all the data of a slot that was added till the time the call to FlushSlotsFb\n  // was made. Therefore we delete slots entries with version < next_version\n  uint64_t next_version = 0;\n  uint64_t del_count = 0;\n\n  // Explicitly copy table smart pointer to keep reference count up (flushall drops it)\n  boost::intrusive_ptr<DbTable> table = db_arr_.front();\n  size_t memory_before = table->table_memory() + table->stats.obj_memory_usage;\n\n  DbContext db_cntx;\n  db_cntx.time_now_ms = GetCurrentTimeMs();\n  db_cntx.db_index = table->index;\n\n  std::string tmp;\n  auto iterate_bucket = [&](PrimeTable::bucket_iterator it) {\n    it.AdvanceIfNotOccupied();\n    while (!it.is_done()) {\n      std::string_view key = it->first.GetSlice(&tmp);\n      SlotId sid = KeySlot(key);\n      if (slot_ids.Contains(sid) && it.GetVersion() < next_version) {\n        // We use copy of table smart pointer and pass it as table because FLLUSHALL can drop table.\n        Del(db_cntx, Iterator::FromPrime(it), table.get());\n        ++del_count;\n      }\n      ++it;\n    }\n  };\n\n  auto on_change = [&](DbIndex db_index, const ChangeReq& req) {\n    FiberAtomicGuard fg;\n    PrimeTable* table = GetTables(db_index).first;\n\n    if (const PrimeTable::bucket_iterator* bit = req.update()) {\n      if (!bit->is_done() && bit->GetVersion() < next_version) {\n        iterate_bucket(*bit);\n      }\n    } else {\n      string_view key = get<string_view>(req.change);\n      table->CVCUponInsert(next_version, key,\n                           [next_version, iterate_bucket](PrimeTable::bucket_iterator it) {\n                             DCHECK_LT(it.GetVersion(), next_version);\n                             iterate_bucket(it);\n                           });\n    }\n  };\n  next_version = RegisterOnChange(std::move(on_change));\n\n  ServerState& etl = *ServerState::tlocal();\n  PrimeTable* pt = &table->prime;\n  PrimeTable::Cursor cursor;\n\n  do {\n    PrimeTable::Cursor next = pt->TraverseBuckets(cursor, iterate_bucket);\n    cursor = next;\n    ThisFiber::Yield();\n  } while (cursor && etl.gstate() != GlobalState::SHUTTING_DOWN);\n\n  VLOG(1) << \"FlushSlotsFb del count is: \" << del_count;\n  UnregisterOnChange(next_version);\n\n  if (absl::GetFlag(FLAGS_cluster_flush_decommit_memory)) {\n    int64_t start = absl::GetCurrentTimeNanos();\n    etl.DecommitMemory(ServerState::kDataHeap);\n    int64_t took = absl::GetCurrentTimeNanos() - start;\n    size_t memory_after = table->table_memory() + table->stats.obj_memory_usage;\n\n    LOG(INFO) << \"Memory decommit took \" << took << \"ns, deleted \" << del_count << \", memory delta \"\n              << (memory_before - memory_after);\n  }\n}\n\nvoid DbSlice::FlushSlots(const cluster::SlotRanges& slot_ranges) {\n  cluster::SlotSet slot_set(slot_ranges);\n  InvalidateSlotWatches(slot_set);\n  fb2::Fiber(\"flush_slots\", [this, slot_set = std::move(slot_set)]() mutable {\n    FlushSlotsFb(slot_set);\n  }).Detach();\n}\n\nutil::fb2::Fiber DbSlice::FlushDbIndexes(const std::vector<DbIndex>& indexes) {\n  bool clear_tiered = owner_->tiered_storage() != nullptr;\n\n  if (clear_tiered)\n    RemoveOffloadedEntriesFromTieredStorage(indexes, db_arr_);\n\n  DbTableArray flush_db_arr(db_arr_.size());\n\n  for (DbIndex index : indexes) {\n    if (index == 0) {\n      // TODO: Async dealloc?\n      // TODO: Drop of global HNSW index doesn't respect per-shard ordering\n      owner_->search_indices()->DropAllIndices();\n    }\n\n    table_memory_ -= db_arr_[index]->table_memory();\n    entries_count_ -= db_arr_[index]->prime.size();\n\n    InvalidateDbWatches(index);\n    flush_db_arr[index] = std::move(db_arr_[index]);\n\n    CreateDb(index);\n    std::swap(db_arr_[index]->trans_locks, flush_db_arr[index]->trans_locks);\n  }\n\n  LOG_IF(DFATAL, !fetched_items_.empty())\n      << \"Some operation might bumped up items outside of a transaction\";\n\n  auto cb = [flush_db_arr = std::move(flush_db_arr)]() mutable {\n    flush_db_arr.clear();\n    ServerState::tlocal()->DecommitMemory(ServerState::kDataHeap | ServerState::kBackingHeap |\n                                          ServerState::kGlibcmalloc);\n  };\n\n  return {\"flush_dbs\", std::move(cb)};\n}\n\nutil::fb2::Fiber DbSlice::FlushDb(DbIndex db_ind) {\n  DVLOG(1) << \"Flushing db \" << db_ind;\n\n  // clear client tracking map.\n  client_tracking_map_.clear();\n\n  if (db_ind != kDbAll)  // Flush a single database if a specific index is provided\n    return FlushDbIndexes({db_ind});\n\n  std::vector<DbIndex> indexes;\n  indexes.reserve(db_arr_.size());\n  for (DbIndex i = 0; i < db_arr_.size(); ++i) {\n    if (db_arr_[i]) {\n      indexes.push_back(i);\n    }\n  }\n\n  return FlushDbIndexes(indexes);\n}\n\nvoid DbSlice::AddExpire(DbIndex db_ind, const Iterator& main_it, uint64_t at) {\n  bool had_expire = main_it->first.HasExpire();\n  bool was_inline = main_it->first.IsInline();\n  ssize_t old_malloc = static_cast<ssize_t>(main_it->first.MallocUsed());\n\n  main_it->first.SetExpireTime(at);\n\n  auto& db = *db_arr_[db_ind];\n  ssize_t new_malloc = static_cast<ssize_t>(main_it->first.MallocUsed());\n  if (was_inline && !main_it->first.IsInline()) {\n    --db.stats.inline_keys;\n    AccountObjectMemory(main_it.key(), OBJ_KEY, new_malloc, &db);\n  } else if (new_malloc != old_malloc) {\n    AccountObjectMemory(main_it.key(), OBJ_KEY, new_malloc - old_malloc, &db);\n  }\n\n  if (!had_expire)\n    ++db.stats.expire_count;\n}\n\nbool DbSlice::RemoveExpire(DbIndex db_ind, const Iterator& main_it) {\n  if (!main_it->first.HasExpire())\n    return false;\n\n  DCHECK(!main_it->first.IsInline());  // SDS_TTL_TAG is never inline\n  ssize_t old_malloc = static_cast<ssize_t>(main_it->first.MallocUsed());\n\n  main_it->first.ClearExpireTime();\n\n  auto& db = *db_arr_[db_ind];\n  ssize_t new_malloc = static_cast<ssize_t>(main_it->first.MallocUsed());\n  if (main_it->first.IsInline()) {\n    AccountObjectMemory(main_it.key(), OBJ_KEY, -old_malloc, &db);\n    ++db.stats.inline_keys;\n  } else if (new_malloc != old_malloc) {\n    AccountObjectMemory(main_it.key(), OBJ_KEY, new_malloc - old_malloc, &db);\n  }\n\n  --db.stats.expire_count;\n  return true;\n}\n\nbool DbSlice::SetMCFlag(DbIndex db_ind, const PrimeKey& key, uint32_t flag) {\n  DCHECK(!key.IsRef());\n\n  auto& db = *db_arr_[db_ind];\n  string scratch;\n  if (flag == 0 && !db.mcflag.Empty()) {\n    auto mcit = db.mcflag.Find(key.GetSlice(&scratch));\n    if (mcit != db.mcflag.end()) {\n      db.mcflag.Erase(mcit);\n      return true;\n    }\n  } else if (flag != 0) {\n    auto [it, _] = db.mcflag.Insert(key.GetSlice(&scratch), flag);\n    it->second = flag;\n    return true;\n  }\n  return false;\n}\n\nuint32_t DbSlice::GetMCFlag(DbIndex db_ind, const PrimeKey& key) const {\n  auto& db = *db_arr_[db_ind];\n  string scratch;\n  auto it = db.mcflag.Find(key.GetSlice(&scratch));\n  if (it.is_done()) {\n    LOG(DFATAL) << \"Internal error, inconsistent state, mcflag should be present but not found \"\n                << key.ToString();\n    return 0;\n  }\n  return it->second;\n}\n\nOpResult<DbSlice::ItAndUpdater> DbSlice::AddNew(const Context& cntx, string_view key,\n                                                PrimeValue obj, uint64_t expire_at_ms) {\n  auto op_result = AddOrUpdateInternal(cntx, key, std::move(obj), expire_at_ms, false);\n  RETURN_ON_BAD_STATUS(op_result);\n  auto& res = *op_result;\n  CHECK(res.is_new);\n\n  return DbSlice::ItAndUpdater{\n      .it = res.it, .exp_it = res.exp_it, .post_updater = std::move(res.post_updater)};\n}\n\nint64_t DbSlice::ExpireParams::Cap(int64_t value, TimeUnit unit) {\n  return unit == TimeUnit::SEC ? min(value, kMaxExpireDeadlineSec)\n                               : min(value, kMaxExpireDeadlineMs);\n}\n\npair<int64_t, int64_t> DbSlice::ExpireParams::Calculate(uint64_t now_ms, bool cap) const {\n  if (persist)\n    return {0, 0};\n\n  // return a negative absolute time if we overflow.\n  if (unit == TimeUnit::SEC && value > INT64_MAX / 1000) {\n    return {0, -1};\n  }\n\n  int64_t msec = (unit == TimeUnit::SEC) ? value * 1000 : value;\n  int64_t rel_msec = absolute ? msec - now_ms : msec;\n  if (cap)\n    rel_msec = Cap(rel_msec, TimeUnit::MSEC);\n  return make_pair(rel_msec, now_ms + rel_msec);\n}\n\nOpResult<int64_t> DbSlice::UpdateExpire(const Context& cntx, Iterator prime_it,\n                                        ExpIterator expire_it, const ExpireParams& params) {\n  constexpr uint64_t kPersistValue = 0;\n  DCHECK(params.IsDefined());\n  DCHECK(IsValid(prime_it));\n\n  if (params.persist) {  // Persist means remove expiry\n    RemoveExpire(cntx.db_index, prime_it);\n    return kPersistValue;\n  }\n\n  auto [rel_msec, abs_msec] = params.Calculate(cntx.time_now_ms, false);\n  if (abs_msec < 0 || rel_msec > kMaxExpireDeadlineMs) {\n    return OpStatus::OUT_OF_RANGE;\n  }\n\n  int64_t current_cmp = numeric_limits<int64_t>::max();  // inf if no expiry is set\n  bool satisfied = params.expire_options == ExpireFlags::EXPIRE_ALWAYS;\n\n  if (prime_it->first.HasExpire()) {\n    current_cmp = prime_it->first.GetExpireTime();\n    satisfied |= (params.expire_options & ExpireFlags::EXPIRE_XX);\n  } else {\n    satisfied |= (params.expire_options & ExpireFlags::EXPIRE_NX);\n  }\n\n  satisfied |= (params.expire_options & ExpireFlags::EXPIRE_LT) && (abs_msec < current_cmp);\n  satisfied |= (params.expire_options & ExpireFlags::EXPIRE_GT) && (abs_msec > current_cmp);\n\n  if (!satisfied)\n    return OpStatus::SKIPPED;\n\n  // If we update and the new value is already expired, delete the key\n  if (rel_msec <= 0) {\n    Del(cntx, prime_it);\n    return -1;\n  }\n\n  AddExpire(cntx.db_index, prime_it, abs_msec);\n  return abs_msec;\n}\n\nOpResult<DbSlice::ItAndUpdater> DbSlice::AddOrUpdateInternal(const Context& cntx,\n                                                             std::string_view key, PrimeValue obj,\n                                                             uint64_t expire_at_ms,\n                                                             bool force_update) {\n  DCHECK(!obj.IsRef());\n\n  auto op_result = AddOrFind(cntx, key, std::nullopt);\n  RETURN_ON_BAD_STATUS(op_result);\n\n  auto& res = *op_result;\n  if (!res.is_new && !force_update)  // have not inserted.\n    return op_result;\n\n  auto& it = res.it;\n\n  it->second = std::move(obj);\n\n  if (expire_at_ms) {\n    AddExpire(cntx.db_index, it, expire_at_ms);\n  } else {\n    RemoveExpire(cntx.db_index, it);\n  }\n\n  return op_result;\n}\n\nOpResult<DbSlice::ItAndUpdater> DbSlice::AddOrUpdate(const Context& cntx, string_view key,\n                                                     PrimeValue obj, uint64_t expire_at_ms) {\n  return AddOrUpdateInternal(cntx, key, std::move(obj), expire_at_ms, true);\n}\n\nsize_t DbSlice::DbSize(DbIndex db_ind) const {\n  DCHECK_LT(db_ind, db_array_size());\n\n  if (IsDbValid(db_ind)) {\n    return db_arr_[db_ind]->prime.size();\n  }\n  return 0;\n}\n\nbool DbSlice::Acquire(IntentLock::Mode mode, const KeyLockArgs& lock_args) {\n  if (lock_args.fps.empty()) {  // Can be empty for NO_KEY_TRANSACTIONAL commands.\n    return true;\n  }\n  DCHECK_LT(lock_args.db_index, db_array_size());\n\n  auto& lt = db_arr_[lock_args.db_index]->trans_locks;\n  bool lock_acquired = true;\n\n  if (lock_args.fps.size() == 1) {\n    lock_acquired = lt.Acquire(lock_args.fps.front(), mode);\n    uniq_fps_ = {lock_args.fps.front()};  // needed only for tests.\n  } else {\n    uniq_fps_.clear();\n\n    for (LockFp fp : lock_args.fps) {\n      if (uniq_fps_.insert(fp).second) {\n        lock_acquired &= lt.Acquire(fp, mode);\n      }\n    }\n  }\n\n  DVLOG(2) << \"Acquire \" << IntentLock::ModeName(mode) << \" for \" << lock_args.fps[0]\n           << \" has_acquired: \" << lock_acquired;\n\n  return lock_acquired;\n}\n\nvoid DbSlice::Release(IntentLock::Mode mode, const KeyLockArgs& lock_args) {\n  if (lock_args.fps.empty()) {  // Can be empty for NO_KEY_TRANSACTIONAL commands.\n    return;\n  }\n\n  DVLOG(2) << \"Release \" << IntentLock::ModeName(mode) << \" for \" << lock_args.fps[0];\n  auto& lt = db_arr_[lock_args.db_index]->trans_locks;\n  if (lock_args.fps.size() == 1) {\n    uint64_t fp = lock_args.fps.front();\n    lt.Release(fp, mode);\n  } else {\n    uniq_fps_.clear();\n    for (LockFp fp : lock_args.fps) {\n      if (uniq_fps_.insert(fp).second) {\n        lt.Release(fp, mode);\n      }\n    }\n  }\n  uniq_fps_.clear();\n}\n\nbool DbSlice::CheckLock(IntentLock::Mode mode, DbIndex dbid, uint64_t fp) const {\n  const auto& lt = db_arr_[dbid]->trans_locks;\n  auto lock = lt.Find(fp);\n  if (lock) {\n    return lock->Check(mode);\n  }\n  return true;\n}\n\nvoid DbSlice::PreUpdateBlocking(DbIndex db_ind, const Iterator& it) {\n  CallChangeCallbacks(db_ind, ChangeReq{it.GetInnerIt()});  // blocking point.\n  auto inner_it = it.GetInnerIt();                          // must call again to launder.\n  inner_it.SetVersion(NextVersion());\n}\n\nvoid DbSlice::PostUpdate(DbIndex db_ind, std::string_view key) {\n  auto& db = *db_arr_[db_ind];\n  auto& watched_keys = db.watched_keys;\n  if (!watched_keys.empty()) {\n    // Check if the key is watched.\n    if (auto wit = watched_keys.find(key); wit != watched_keys.end()) {\n      for (auto* dirty_ptr : wit->second)\n        dirty_ptr->store(true, memory_order_relaxed);\n      // No connections need to watch it anymore.\n      watched_keys.erase(wit);\n    }\n  }\n\n  ++events_.update;\n\n  if (db.slots_stats) {\n    db.slots_stats[KeySlot(key)].total_writes += 1;\n  }\n\n  if (!client_tracking_map_.empty()) {\n    QueueInvalidationTrackingMessageAtomic(key);\n  }\n}\n\nDbSlice::ItAndExp DbSlice::ExpireIfNeeded(const Context& cntx, Iterator it) const {\n  auto res = ExpireIfNeeded(cntx, it.GetInnerIt());\n  return {.it = Iterator::FromPrime(res.it), .exp_it = ExpIterator::FromPrime(res.exp_it)};\n}\n\nDbSlice::PrimeItAndExp DbSlice::ExpireIfNeeded(const Context& cntx, PrimeIterator it) const {\n  if (!it->first.HasExpire()) {\n    LOG(DFATAL) << \"Invalid call to ExpireIfNeeded\";\n    return {it, ExpireIterator{}};\n  }\n\n  int64_t expire_time = it->first.GetExpireTime();\n\n  // Never do expiration on replica or if expiration is disabled.\n  if (int64_t(cntx.time_now_ms) < expire_time || owner_->IsReplica() || !expire_allowed_) {\n    return {it, ExpireIterator{}};\n  }\n\n  string scratch;\n  string_view key = it->first.GetSlice(&scratch);\n\n  // Replicate expiry\n  if (auto journal = owner_->journal(); journal) {\n    RecordExpiryBlocking(cntx.db_index, key);\n  }\n\n  auto& db = db_arr_[cntx.db_index];\n  if (expired_keys_events_recording_)\n    db->expired_keys_events_.emplace_back(key);\n\n  auto obj_type = it->second.ObjType();\n  if (doc_del_cb_ && (obj_type == OBJ_JSON || obj_type == OBJ_HASH)) {\n    doc_del_cb_(key, cntx, it->second);\n  }\n\n  const_cast<DbSlice*>(this)->PerformDeletionAtomic(Iterator(it, StringOrView::FromView(key)),\n                                                    ExpIterator{}, db.get());\n\n  ++events_.expired_keys;\n  db->stats.events.expired_keys++;\n\n  return {PrimeIterator{}, ExpireIterator{}};\n}\n\nvoid DbSlice::ExpireAllIfNeeded() {\n  // We hold no locks to any of the keys so we should Wait() here such that\n  // we don't preempt in ExpireIfNeeded\n  serialization_latch_.Wait();\n  // Disable flush journal changes to prevent preemtion in traverse.\n  journal::DisableFlushGuard journal_flush_guard(owner_->journal());\n\n  for (DbIndex db_index = 0; db_index < db_arr_.size(); db_index++) {\n    if (!db_arr_[db_index])\n      continue;\n    auto& db = *db_arr_[db_index];\n\n    auto cb = [&](PrimeTable::iterator prime_it) {\n      if (prime_it->first.HasExpire()) {\n        ExpireIfNeeded(Context{nullptr, db_index, GetCurrentTimeMs()}, prime_it);\n      }\n    };\n\n    PrimeTable::Cursor cursor;\n    do {\n      cursor = db.prime.Traverse(cursor, cb);\n    } while (cursor);\n  }\n}\n\nuint64_t DbSlice::RegisterOnChange(ChangeCallback cb) {\n  return change_cb_.emplace_back(NextVersion(), std::move(cb)).first;\n}\n\nuint64_t DbSlice::RegisterOnMove(MovedCallback cb) {\n  ++next_moved_id_;\n  moved_cb_.emplace_back(next_moved_id_, cb);\n  return next_moved_id_;\n}\n\n// Ordering invariant (PIT mode):\n//   When the traversal fiber visits a bucket in BucketSaveCb, earlier-registered snapshots\n//   (those with snapshot_version_ < this snapshot's version) may not have serialized this bucket\n//   yet. FlushChangeToEarlierCallbacks invokes their OnDbChange callbacks so they serialize the\n//   bucket before the current snapshot stamps it with its own version. Without this, an earlier\n//   snapshot could miss the bucket entirely — its traversal already passed it, and the version\n//   stamp from the current snapshot would cause the earlier snapshot's OnDbChange to skip it.\nvoid DbSlice::FlushChangeToEarlierCallbacks(DbIndex db_ind, Iterator it, uint64_t upper_bound) {\n  unique_lock<LocalLatch> lk(serialization_latch_);\n\n  uint64_t bucket_version = it.GetVersion();\n  // change_cb_ is ordered by version.\n  DVLOG(2) << \"Running callbacks in dbid \" << db_ind << \" with bucket_version=\" << bucket_version\n           << \", upper_bound=\" << upper_bound;\n\n  const size_t limit = change_cb_.size();\n  auto ccb = change_cb_.begin();\n  for (size_t i = 0; i < limit; ++i) {\n    uint64_t cb_version = ccb->first;\n    DCHECK_LE(cb_version, upper_bound);\n    if (cb_version == upper_bound) {\n      return;\n    }\n    if (bucket_version < cb_version) {\n      ccb->second(db_ind, ChangeReq{it.GetInnerIt()});\n    }\n    ++ccb;\n  }\n}\n\n//! Unregisters the callback.\nvoid DbSlice::UnregisterOnChange(uint64_t id) {\n  serialization_latch_.Wait();\n  auto it = find_if(change_cb_.begin(), change_cb_.end(),\n                    [id](const auto& cb) { return cb.first == id; });\n  CHECK(it != change_cb_.end());\n  change_cb_.erase(it);\n}\n\nvoid DbSlice::UnregisterOnMoved(uint64_t id) {\n  serialization_latch_.Wait();\n  auto it =\n      find_if(moved_cb_.begin(), moved_cb_.end(), [id](const auto& cb) { return cb.first == id; });\n  CHECK(it != moved_cb_.end());\n  moved_cb_.erase(it);\n}\n\nauto DbSlice::DeleteExpiredStep(const Context& cntx, unsigned count) -> DeleteExpiredStats {\n  auto& db = *db_arr_[cntx.db_index];\n  DeleteExpiredStats result;\n\n  std::string stash;\n\n  unsigned checked = 0;\n  auto cb = [&](PrimeTable::iterator it) {\n    result.traversed++;\n\n    if (!it->first.HasExpire())\n      return;\n\n    checked++;\n\n    string_view key = it->first.GetSlice(&stash);\n    if (!CheckLock(IntentLock::EXCLUSIVE, cntx.db_index, key))\n      return;\n\n    int64_t ttl = it->first.GetExpireTime() - cntx.time_now_ms;\n    if (ttl <= 0) {\n      result.deleted_bytes += it->first.MallocUsed() + it->second.MallocUsed();\n      ExpireIfNeeded(cntx, it);\n      ++result.deleted;\n    }\n  };\n\n  unsigned i = 0;\n\n  auto quota_remains = [] {\n    // Break out of traversal if we spent more than 1ms\n    return base::CycleClock::ToUsec(ThisFiber::GetRunningTimeCycles()) < 1000;\n  };\n\n  for (; i < count / 3 && quota_remains(); ++i) {\n    db.expire_cursor = db.prime.Traverse(db.expire_cursor, cb);\n  }\n\n  // Continue traversing if we had a strong deletion rate among checked TTL keys.\n  if (result.deleted * 4 > checked) {\n    for (; i < count && quota_remains(); ++i) {\n      db.expire_cursor = db.prime.Traverse(db.expire_cursor, cb);\n    }\n  }\n\n  // Send and clear accumulated expired key events\n  if (auto& events = db_arr_[cntx.db_index]->expired_keys_events_; !events.empty()) {\n    ChannelStore* store = ServerState::tlocal()->channel_store();\n    store->SendMessages(absl::StrCat(\"__keyevent@\", cntx.db_index, \"__:expired\"), events, false);\n    events.clear();\n  }\n\n  return result;\n}\n\nint32_t DbSlice::GetNextSegmentForEviction(int32_t segment_id, DbIndex db_ind) const {\n  // wraps around if we reached the end\n  return db_arr_[db_ind]->prime.NextSeg((size_t)segment_id) %\n         db_arr_[db_ind]->prime.GetSegmentCount();\n}\n\npair<uint64_t, size_t> DbSlice::FreeMemWithEvictionStepAtomic(DbIndex db_ind, const Context& cntx,\n                                                              size_t starting_segment_id,\n                                                              size_t increase_goal_bytes) {\n  // Disable flush journal changes to prevent preemtion\n  journal::DisableFlushGuard journal_flush_guard(shard_owner()->journal());\n  FiberAtomicGuard guard;\n  DCHECK(!owner_->IsReplica());\n\n  size_t evicted_items = 0, evicted_bytes = 0;\n\n  if (owner_->tiered_storage()) {\n    evicted_bytes = owner_->tiered_storage()->ReclaimMemory(increase_goal_bytes);\n    if (evicted_bytes >= increase_goal_bytes)\n      return {0, evicted_bytes};\n  }\n\n  if ((!IsCacheMode()) || !expire_allowed_)\n    return {0, 0};\n\n  auto max_eviction_per_hb = GetFlag(FLAGS_max_eviction_per_heartbeat);\n  auto max_segment_to_consider = GetFlag(FLAGS_max_segment_to_consider);\n\n  auto time_start = absl::GetCurrentTimeNanos();\n  auto& db_table = db_arr_[db_ind];\n  constexpr int32_t num_slots = PrimeTable::Segment_t::kSlotNum;\n\n  string tmp;\n\n  bool record_keys = owner_->journal() || expired_keys_events_recording_;\n  vector<string> keys_to_journal;\n\n  for (int32_t slot_id = num_slots - 1; slot_id >= 0; --slot_id) {\n    for (int32_t bucket_id = PrimeTable::LargestBucketId(); bucket_id >= 0; --bucket_id) {\n      // pick a random segment to start with in each eviction,\n      // as segment_id does not imply any recency, and random selection should be fair enough\n      int32_t segment_id = starting_segment_id;\n      for (size_t num_seg_visited = 0; num_seg_visited < max_segment_to_consider;\n           ++num_seg_visited, segment_id = GetNextSegmentForEviction(segment_id, db_ind)) {\n        const auto& segment = db_table->prime.GetSegment(segment_id);\n        if (unsigned(bucket_id) >= segment->num_buckets())\n          bucket_id = segment->num_buckets() - 1;\n        const auto& bucket = segment->GetBucket(bucket_id);\n        if (bucket.IsEmpty() || !bucket.IsBusy(slot_id))\n          continue;\n\n        auto evict_it = db_table->prime.GetIterator(segment_id, bucket_id, slot_id);\n        // TODO: consider evicting inline entries as well\n\n        bool has_allocated = evict_it->second.HasAllocated() || evict_it->first.HasAllocated();\n        if (evict_it->first.IsSticky() || !has_allocated)\n          continue;\n\n        // check if the key is locked by looking up transaction table.\n        const auto& lt = db_table->trans_locks;\n        string_view key = evict_it->first.GetSlice(&tmp);\n        if (lt.Find(LockTag(key)).has_value())\n          continue;\n\n        if (record_keys)\n          keys_to_journal.emplace_back(key);\n\n        evicted_bytes += evict_it->first.MallocUsed() + evict_it->second.MallocUsed();\n        ++evicted_items;\n\n        Del(cntx, Iterator(evict_it, StringOrView::FromView(key)));\n\n        // returns when whichever condition is met first\n        if ((evicted_items == max_eviction_per_hb) || (evicted_bytes >= increase_goal_bytes))\n          goto finish;\n      }\n    }\n  }\n\nfinish:\n  // send the deletion to the replicas.\n  for (string_view key : keys_to_journal) {\n    if (auto journal = owner_->journal(); journal)\n      // Won't block because we disabled journal flushing. See first line of this function.\n      RecordExpiryBlocking(db_ind, key);\n\n    if (expired_keys_events_recording_)\n      db_table->expired_keys_events_.emplace_back(key);\n  }\n\n  // This might not always be atomic on exceptional cases -- see comments on the function\n  // declaration.\n  SendQueuedInvalidationMessagesAsync();\n  auto time_finish = absl::GetCurrentTimeNanos();\n  events_.evicted_keys += evicted_items;\n  db_arr_[db_ind]->stats.events.evicted_keys += evicted_items;\n  DVLOG(2) << \"Eviction time (us): \" << (time_finish - time_start) / 1000;\n  return pair<uint64_t, size_t>{evicted_items, evicted_bytes};\n}\n\nvoid DbSlice::CreateDb(DbIndex db_ind) {\n  auto& db = db_arr_[db_ind];\n  if (!db) {\n    db.reset(new DbTable{owner_->memory_resource(), db_ind});\n    table_memory_ += db->table_memory();\n  }\n}\n\nvoid DbSlice::RegisterWatchedKey(DbIndex db_indx, std::string_view key,\n                                 std::atomic_bool* dirty_ptr) {\n  // Because we might insert while another fiber is preempted\n  db_arr_[db_indx]->watched_keys[key].push_back(dirty_ptr);\n}\n\nvoid DbSlice::UnregisterConnectionWatches(absl::Span<const std::pair<DbIndex, std::string>> keys,\n                                          const std::atomic_bool* dirty_ptr) {\n  for (const auto& [db_indx, key] : keys) {\n    auto& watched_keys = db_arr_[db_indx]->watched_keys;\n    if (auto it = watched_keys.find(key); it != watched_keys.end()) {\n      it->second.erase(std::remove(it->second.begin(), it->second.end(), dirty_ptr),\n                       it->second.end());\n      if (it->second.empty())\n        watched_keys.erase(it);\n    }\n  }\n}\n\nvoid DbSlice::InvalidateDbWatches(DbIndex db_indx) {\n  for (const auto& [key, conn_list] : db_arr_[db_indx]->watched_keys) {\n    for (auto* dirty_ptr : conn_list)\n      dirty_ptr->store(true, memory_order_relaxed);\n  }\n}\n\nvoid DbSlice::InvalidateSlotWatches(const cluster::SlotSet& slot_ids) {\n  for (const auto& [key, conn_list] : db_arr_[0]->watched_keys) {\n    SlotId sid = KeySlot(key);\n    if (!slot_ids.Contains(sid)) {\n      continue;\n    }\n    for (auto* dirty_ptr : conn_list)\n      dirty_ptr->store(true, memory_order_relaxed);\n  }\n}\n\nvoid DbSlice::RemoveOffloadedEntriesFromTieredStorage(absl::Span<const DbIndex> indices,\n                                                      const DbTableArray& db_arr) const {\n  // Currently being used only for tiered storage.\n  TieredStorage* tiered_storage = shard_owner()->tiered_storage();\n  string scratch;\n  for (DbIndex index : indices) {\n    const auto& db_ptr = db_arr[index];\n    if (!db_ptr)\n      continue;\n\n    // Delete all tiered entries\n    PrimeTable::Cursor cursor;\n    do {\n      cursor = db_ptr->prime.Traverse(cursor, [&](PrimeIterator it) {\n        if (it->second.IsExternal()) {\n          tiered_storage->Delete(index, &it->second);\n        } else if (it->second.HasStashPending()) {\n          tiered_storage->CancelStash(index, it->first.GetSlice(&scratch), &it->second);\n        }\n      });\n    } while (cursor);\n\n    // While tiered_storage may delete some of its entries asynchronously, it updates\n    // stats.tiered_entries immediately during the Delete call, therefore tiered_entries\n    // should be zero by this point.\n    CHECK_EQ(db_ptr->stats.tiered_entries, 0u);\n  }\n}\n\nvoid DbSlice::SetDocDeletionCallback(DocDeletionCallback ddcb) {\n  doc_del_cb_ = std::move(ddcb);\n}\n\nvoid DbSlice::ResetUpdateEvents() {\n  events_.update = 0;\n}\n\nvoid DbSlice::ResetEvents() {\n  events_ = {};\n  for (auto& db : db_arr_) {\n    if (db) {\n      db->stats.events = {};\n    }\n  }\n}\n\nvoid DbSlice::SetNotifyKeyspaceEvents(std::string_view notify_keyspace_events) {\n  expired_keys_events_recording_ = !notify_keyspace_events.empty();\n}\n\nvoid DbSlice::QueueInvalidationTrackingMessageAtomic(std::string_view key) {\n  FiberAtomicGuard guard;\n  auto it = client_tracking_map_.find(key);\n  if (it == client_tracking_map_.end()) {\n    return;\n  }\n\n  ConnectionHashSet moved_set = std::move(it->second);\n  client_tracking_map_.erase(it);\n\n  auto [pend_it, inserted] = pending_send_map_.emplace(key, std::move(moved_set));\n  if (!inserted) {\n    ConnectionHashSet& client_set = pend_it->second;\n    for (auto& weak_ref : moved_set) {\n      client_set.insert(weak_ref);\n    }\n  }\n}\n\nvoid DbSlice::SendQueuedInvalidationMessagesCb(const TrackingMap& track_map,\n                                               unsigned calling_thread_id) const {\n  for (auto& [key, client_list] : track_map) {\n    for (auto& weak_ref : client_list) {\n      if (weak_ref.IsExpired() || (weak_ref.LastKnownThreadId() != calling_thread_id)) {\n        continue;  // Expired or migrated.\n      }\n      auto* conn = weak_ref.Get();\n      auto* cntx = static_cast<ConnectionContext*>(conn->cntx());\n      if (cntx && cntx->conn_state.tracking_info_.IsTrackingOn()) {\n        conn->SendInvalidationMessageAsync({key});\n      }\n    }\n  }\n}\n\nvoid DbSlice::SendQueuedInvalidationMessages() {\n  // We run while loop because when we block below, we might have new items added to\n  // pending_send_map_.\n  while (!pending_send_map_.empty()) {\n    // Notify all the clients. this function is not efficient,\n    // because it broadcasts to all threads unrelated to the subscribers for the key.\n    auto local_map = std::move(pending_send_map_);\n    pending_send_map_ = {};\n    auto cb = [&](unsigned thread_id, util::ProactorBase*) {\n      SendQueuedInvalidationMessagesCb(local_map, thread_id);\n    };\n\n    shard_set->pool()->AwaitBrief(std::move(cb));\n  }\n}\n\n// This function might preempt if the task queue within DispatchBrief is full and we can't\n// enqueue the callback. Although a rare case, this code might not be atomic.\nvoid DbSlice::SendQueuedInvalidationMessagesAsync() {\n  if (pending_send_map_.empty()) {\n    return;\n  }\n  // DispatchBrief will copy local_map\n  auto cb = [lm = std::move(pending_send_map_), this](unsigned idx, util::ProactorBase*) {\n    SendQueuedInvalidationMessagesCb(lm, idx);\n  };\n\n  shard_set->pool()->DispatchBrief(std::move(cb));\n}\n\nvoid DbSlice::StartSampleTopK(DbIndex db_ind, uint32_t min_freq) {\n  auto& db = *db_arr_[db_ind];\n  if (db.sample_top_keys) {\n    LOG(INFO) << \"Sampling already started for db \" << db_ind;\n    return;\n  }\n\n  TopKeys::Options opts;\n  opts.min_key_count_to_record = min_freq;\n  db.sample_top_keys = new DbTable::SampleTopKeys;\n  db.sample_top_keys->top_keys = new TopKeys(opts);\n}\n\nauto DbSlice::StopSampleTopK(DbIndex db_ind) -> SamplingResult {\n  auto& db = *db_arr_[db_ind];\n\n  if (!db.sample_top_keys) {\n    LOG(WARNING) << \"Sampling not started for db \" << db_ind;\n    return {};\n  }\n\n  auto fmap = db.sample_top_keys->top_keys->GetTopKeys();\n  SamplingResult result;\n  result.total_samples = db.sample_top_keys->total_samples;\n  delete db.sample_top_keys;\n  db.sample_top_keys = nullptr;\n\n  result.top_keys.reserve(fmap.size());\n  while (!fmap.empty()) {\n    auto node = fmap.extract(fmap.begin());  // Clear the map to avoid memory leak.\n    result.top_keys.emplace_back(std::move(node.key()), node.mapped());\n  }\n  return result;\n}\n\nvoid DbSlice::StartSampleKeys(DbIndex db_ind) {\n  auto& db = *db_arr_[db_ind];\n  if (db.sample_unique_keys) {\n    LOG(INFO) << \"Sampling already started for db \" << db_ind;\n    return;\n  }\n\n  HllBufferPtr hll_buf;\n  hll_buf.size = getDenseHllSize();\n  hll_buf.hll = new uint8_t[hll_buf.size];\n  CHECK_EQ(0, createDenseHll(hll_buf));\n  db.sample_unique_keys = new DbTable::SampleUniqueKeys;\n  db.sample_unique_keys->dense_hll = hll_buf.hll;\n}\n\n// Returns number of unique keys sampled.\nauto DbSlice::StopSampleKeys(DbIndex db_ind) -> UniqueSampleResult {\n  auto& db = *db_arr_[db_ind];\n  if (!db.sample_unique_keys) {\n    LOG(INFO) << \"Keys sampling not started for db \" << db_ind;\n    return {};\n  }\n  HllBufferPtr hll_buf;\n  hll_buf.hll = db.sample_unique_keys->dense_hll;\n  hll_buf.size = getDenseHllSize();\n  UniqueSampleResult result;\n  result.unique_keys_count = pfcountSingle(hll_buf);\n  result.total_samples = db.sample_unique_keys->total_samples;\n\n  delete db.sample_unique_keys;\n  db.sample_unique_keys = nullptr;\n\n  return result;\n}\n\nvoid DbSlice::StartSampleValues(DbIndex db_ind) {\n  auto& db = *db_arr_[db_ind];\n  if (db.sample_values_hist) {\n    LOG(INFO) << \"Sampling already started for db \" << db_ind;\n    return;\n  }\n\n  db.sample_values_hist = new base::Histogram();\n}\n\nunique_ptr<base::Histogram> DbSlice::StopSampleValues(DbIndex db_ind) {\n  auto& db = *db_arr_[db_ind];\n  if (!db.sample_values_hist) {\n    LOG(INFO) << \"Values sampling not started for db \" << db_ind;\n    return {};\n  }\n\n  return unique_ptr<base::Histogram>{exchange(db.sample_values_hist, nullptr)};\n}\n\nvoid DbSlice::PerformDeletionAtomic(const Iterator& del_it, const ExpIterator& exp_it,\n                                    DbTable* table, bool async) {\n  FiberAtomicGuard guard;\n  size_t table_before = table->table_memory();\n\n  if (del_it->second.HasFlag()) {\n    if (!SetMCFlag(table->index, del_it->first, 0)) {\n      LOG(DFATAL) << \"Internal error, inconsistent state, mcflag should be present but not found \"\n                  << del_it->first.ToString();\n    }\n  }\n\n  DbTableStats& stats = table->stats;\n\n  if (del_it->first.HasExpire())\n    --stats.expire_count;\n\n  PrimeValue& pv = del_it->second;\n\n  if (pv.HasStashPending()) {\n    string scratch;\n    string_view key = del_it->first.GetSlice(&scratch);\n    shard_owner()->tiered_storage()->CancelStash(table->index, key, &pv);\n  } else if (pv.IsExternal()) {\n    shard_owner()->tiered_storage()->Delete(table->index, &del_it->second);\n  }\n\n  ssize_t value_heap_size = pv.MallocUsed(), key_size_used = del_it->first.MallocUsed();\n  if (del_it->first.IsInline()) {\n    --stats.inline_keys;\n  } else {\n    AccountObjectMemory(del_it.key(), OBJ_KEY, -key_size_used, table);  // Key\n  }\n  AccountObjectMemory(del_it.key(), pv.ObjType(), -value_heap_size, table);  // Value\n\n  if (async && MayDeleteAsynchronously(pv)) {\n    DenseSet* ds = (DenseSet*)pv.RObjPtr();\n    pv.SetRObjPtr(nullptr);\n    const size_t kClearStepSize = 512;\n\n    uint32_t next = ds->ClearStep(0, kClearStepSize);\n    if (next < ds->BucketCount()) {\n      AsyncDeleter::EnqueDeletion(next, ds);\n    } else {\n      CompactObj::DeleteMR<DenseSet>(ds);\n    }\n  }\n\n  if (table->slots_stats) {\n    SlotId sid = KeySlot(del_it.key());\n    table->slots_stats[sid].key_count -= 1;\n  }\n\n  table->prime.Erase(del_it.GetInnerIt());\n\n  // Note, currently we do not shrink our tables upon deletion.\n  // This DCHECK ensures that if we decide to do so, we will have to update table_memory_\n  // accordingly.\n  DCHECK_EQ(table->table_memory(), table_before);\n\n  --entries_count_;\n  memory_budget_ += (value_heap_size + key_size_used);\n\n  if (!client_tracking_map_.empty()) {\n    QueueInvalidationTrackingMessageAtomic(del_it.key());\n  }\n}\n\nvoid DbSlice::OnCbFinishBlocking() {\n  if (IsCacheMode()) {\n    // move fetched items to local variable\n    auto fetched_items = std::move(fetched_items_);\n    fetched_items_ = {};\n    for (const auto& [key_hash, db_index] : fetched_items) {\n      auto& db = *db_arr_[db_index];\n\n      // We intentionally don't do extra key checking on this callback to speedup\n      // fetching. Probability of having hash collision is quite low and for bumpup\n      // purposes it should be fine if different key (with same hash) is returned.\n      auto predicate = [](const PrimeKey&) { return true; };\n\n      PrimeIterator it = db.prime.FindFirst(key_hash, predicate);\n\n      if (!IsValid(it)) {\n        continue;\n      }\n\n      if (!change_cb_.empty()) {\n        auto bump_cb = [&](PrimeTable::bucket_iterator bit) {\n          CallChangeCallbacks(db_index, ChangeReq{bit});\n        };\n        db.prime.CVCUponBump(change_cb_.back().first, it, bump_cb);\n      }\n\n      // We must not change the bucket's internal order during serialization\n      serialization_latch_.Wait();\n      PrimeBumpPolicy policy;\n      auto bump_it = db.prime.BumpUp(it, policy);\n      if (bump_it != it) {  // the item was bumped\n        ++events_.bumpups;\n      }\n      CallMovedCallbacks(db_index, policy.moved_items());\n    }\n  }\n\n  // Sends only if !pending_send_map_.empty()\n  SendQueuedInvalidationMessages();\n}\n\nvoid DbSlice::CallChangeCallbacks(DbIndex id, const ChangeReq& cr) const {\n  if (change_cb_.empty())\n    return;\n\n  // does not preempt, just increments the counter.\n  unique_lock<LocalLatch> lk(serialization_latch_);\n\n  const size_t limit = change_cb_.size();\n  auto ccb = change_cb_.begin();\n  for (size_t i = 0; i < limit; ++i) {\n    CHECK(ccb->second);\n    ccb->second(id, cr);\n    ++ccb;\n  }\n}\n\nvoid DbSlice::CallMovedCallbacks(\n    DbIndex id, const std::vector<std::pair<PrimeTable::Cursor, PrimeTable::Cursor>>& moved_items) {\n  if (moved_cb_.empty())\n    return;\n\n  // does not preempt, just increments the counter.\n  unique_lock<LocalLatch> lk(serialization_latch_);\n\n  const size_t limit = moved_cb_.size();\n  auto ccb = moved_cb_.begin();\n  for (size_t i = 0; i < limit; ++i) {\n    CHECK(ccb->second);\n    ccb->second(id, moved_items);\n    ++ccb;\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/db_slice.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n\n#include <atomic>\n\n#include \"common/string_or_view.h\"\n#include \"core/mi_memory_resource.h\"\n#include \"facade/connection_ref.h\"\n#include \"facade/op_status.h\"\n#include \"server/common.h\"\n#include \"server/common_types.h\"\n#include \"server/synchronization.h\"\n#include \"server/table.h\"\n#include \"server/tx_base.h\"\n#include \"util/fibers/fibers.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly {\n\nnamespace cluster {\nclass SlotRanges;\nclass SlotSet;\n}  // namespace cluster\n\nusing facade::OpResult;\n\nstruct DbStats : public DbTableStats {\n  // number of active keys.\n  size_t key_count = 0;\n\n  // total number of slots in prime dictionary (key capacity).\n  size_t prime_capacity = 0;\n\n  // Memory used by dictionaries.\n  size_t table_mem_usage = 0;\n\n  // We override additional DbStats fields explicitly in DbSlice::GetStats().\n  using DbTableStats::operator=;\n\n  DbStats& operator+=(const DbStats& o);\n};\n\nstruct SliceEvents {\n  // Number of eviction events.\n  size_t evicted_keys = 0;\n\n  // evictions that were performed when we have a negative memory budget.\n  size_t hard_evictions = 0;\n  size_t expired_keys = 0;\n  size_t garbage_checked = 0;\n  size_t garbage_collected = 0;\n  size_t stash_unloaded = 0;\n  size_t bumpups = 0;  // how many bump-upds we did.\n\n  // hits/misses on keys\n  size_t hits = 0;\n  size_t misses = 0;\n  size_t mutations = 0;\n\n  // ram hit/miss when tiering is enabled\n  size_t ram_hits = 0;\n  size_t ram_cool_hits = 0;\n  size_t ram_misses = 0;\n\n  // how many insertions were rejected due to OOM.\n  size_t insertion_rejections = 0;\n\n  // how many updates and insertions of keys between snapshot intervals\n  size_t update = 0;\n\n  uint64_t huff_encode_total = 0, huff_encode_success = 0;\n\n  SliceEvents& operator+=(const SliceEvents& o);\n};\n\nclass DbSlice {\n  DbSlice(const DbSlice&) = delete;\n  void operator=(const DbSlice&) = delete;\n\n public:\n  // Auto-laundering iterator wrapper. Laundering means re-finding keys if they moved between\n  // buckets.\n  template <typename T> class IteratorT {\n   public:\n    IteratorT() = default;\n\n    IteratorT(T it, StringOrView key)\n        : it_(it), fiber_epoch_(util::fb2::FiberSwitchEpoch()), key_(std::move(key)) {\n    }\n\n    static IteratorT FromPrime(T it) {\n      if (!IsValid(it)) {\n        return IteratorT();\n      }\n\n      std::string key;\n      it->first.GetString(&key);\n      return IteratorT(it, StringOrView::FromString(std::move(key)));\n    }\n\n    IteratorT(const IteratorT& o) = default;\n    IteratorT(IteratorT&& o) = default;\n    IteratorT& operator=(const IteratorT& o) = default;\n    IteratorT& operator=(IteratorT&& o) = default;\n\n    // Do NOT store this iterator in a variable, as it will not be laundered automatically.\n    const T& GetInnerIt() const {\n      LaunderIfNeeded();\n      return it_;\n    }\n\n    auto operator->() const {\n      return GetInnerIt().operator->();\n    }\n\n    auto is_done() const {\n      return GetInnerIt().is_done();\n    }\n\n    std::string_view key() const {\n      return key_.view();\n    }\n\n    auto IsOccupied() const {\n      return GetInnerIt().IsOccupied();\n    }\n\n    auto GetVersion() const {\n      return GetInnerIt().GetVersion();\n    }\n\n   private:\n    void LaunderIfNeeded() const;  // const is a lie\n\n    mutable T it_;\n    mutable uint64_t fiber_epoch_ = 0;\n    StringOrView key_;\n  };\n\n  using Iterator = IteratorT<PrimeIterator>;\n  using ConstIterator = IteratorT<PrimeConstIterator>;\n  using ExpIterator = IteratorT<ExpireIterator>;\n  using ExpConstIterator = IteratorT<ExpireConstIterator>;\n\n  class AutoUpdater {\n   public:\n    AutoUpdater();\n    AutoUpdater(const AutoUpdater& o) = delete;\n    AutoUpdater& operator=(const AutoUpdater& o) = delete;\n    AutoUpdater(AutoUpdater&& o) noexcept;\n    AutoUpdater& operator=(AutoUpdater&& o) noexcept;\n    ~AutoUpdater();\n\n    // Removes the memory usage attributed to the iterator and resets orig_heap_size.\n    // Used when the existing object is overridden by a new one.\n    void ReduceHeapUsage();\n\n    void Run();\n    void Cancel();\n\n   private:\n    // Wrap members in a struct to auto generate operator=\n    struct Fields {\n      DbSlice* db_slice = nullptr;\n      DbIndex db_ind = 0;\n\n      // TODO: remove `it` from ItAndUpdater as it's redundant with respect to this iterator.\n      Iterator it;\n      std::string_view key;\n\n      // The following fields are calculated at init time\n      size_t orig_value_heap_size = 0;\n    };\n\n    AutoUpdater(DbIndex db_ind, std::string_view key, const Iterator& it, DbSlice* db_slice);\n\n    friend class DbSlice;\n\n    Fields fields_ = {};\n  };\n\n  struct Stats {\n    // DbStats db;\n    std::vector<DbStats> db_stats;\n    SliceEvents events;\n    size_t small_string_bytes = 0;\n  };\n\n  using Context = DbContext;\n  using ChangeReq = dfly::ChangeReq;\n\n  // Called before deleting an element to notify the search indices.\n  using DocDeletionCallback =\n      std::function<void(std::string_view, const Context&, const PrimeValue& pv)>;\n\n  struct ExpireParams {\n    bool IsDefined() const {\n      return persist || value > INT64_MIN;\n    }\n\n    static int64_t Cap(int64_t value, TimeUnit unit);\n\n    // Calculate relative and absolue timepoints.\n    std::pair<int64_t, int64_t> Calculate(uint64_t now_msec, bool cap) const;\n\n    // Return true if relative expiration is in the past\n    bool IsExpired(uint64_t now_msec) const {\n      return Calculate(now_msec, false).first < 0;\n    }\n\n   public:\n    int64_t value = INT64_MIN;  // undefined\n    TimeUnit unit = TimeUnit::SEC;\n\n    bool absolute = false;\n    bool persist = false;        // persist means remove all expiry\n    int32_t expire_options = 0;  // ExpireFlags\n  };\n\n  DbSlice(uint32_t index, bool cache_mode, EngineShard* owner);\n  ~DbSlice();\n\n  // Activates `db_ind` database if it does not exist (see ActivateDb below).\n  void Reserve(DbIndex db_ind, size_t key_size);\n\n  // Returns statistics for the whole db slice. A bit heavy operation.\n  Stats GetStats() const;\n\n  // Returns slot statistics for db 0.\n  SlotStats GetSlotStats(SlotId sid) const;\n\n  void UpdateExpireBase(uint64_t now, unsigned generation) {\n    expire_base_[generation & 1] = now;\n  }\n\n  void UpdateMemoryParams(int64_t budget, size_t bytes_per_object) {\n    memory_budget_ = budget;\n    bytes_per_object_ = bytes_per_object;\n  }\n\n  ssize_t memory_budget() const {\n    return memory_budget_;\n  }\n\n  size_t bytes_per_object() const {\n    return bytes_per_object_;\n  }\n\n  int64_t ExpireTime(const ExpirePeriod& val) const {\n    return expire_base_[0] + val.duration_ms();\n  }\n\n  ExpirePeriod FromAbsoluteTime(uint64_t time_ms) const {\n    return ExpirePeriod{time_ms - expire_base_[0]};\n  }\n\n  struct ItAndUpdater {\n    Iterator it;\n    ExpIterator exp_it;\n    AutoUpdater post_updater;\n    bool is_new = false;\n  };\n\n  ItAndUpdater FindMutable(const Context& cntx, std::string_view key);\n  OpResult<ItAndUpdater> FindMutable(const Context& cntx, std::string_view key,\n                                     unsigned req_obj_type);\n\n  struct ItAndExpConst {\n    ConstIterator it;\n    ExpConstIterator exp_it;\n  };\n\n  ItAndExpConst FindReadOnly(const Context& cntx, std::string_view key) const;\n  OpResult<ConstIterator> FindReadOnly(const Context& cntx, std::string_view key,\n                                       unsigned req_obj_type) const;\n\n  // Consider using req_obj_type to specify the type of object you expect.\n  // Because it can evaluate to bugs like this:\n  // - We already have a key but with another type you expect.\n  // - During FindMutable we will not use req_obj_type, so the object type will not be checked.\n  // - AddOrFind will return the object with this key but with a different type.\n  // - Then you will update this object with a different type, which will lead to an error.\n  // If you proved the key type on your own, please add a comment there why don't specify\n  // req_obj_type\n  OpResult<ItAndUpdater> AddOrFind(const Context& cntx, std::string_view key,\n                                   std::optional<unsigned> req_obj_type);\n\n  // Same as AddOrSkip, but overwrites in case entry exists.\n  OpResult<ItAndUpdater> AddOrUpdate(const Context& cntx, std::string_view key, PrimeValue obj,\n                                     uint64_t expire_at_ms);\n\n  // Adds a new entry. Requires: key does not exist in this slice.\n  // Returns the iterator to the newly added entry.\n  // Returns OpStatus::OUT_OF_MEMORY if bad_alloc is thrown\n  OpResult<ItAndUpdater> AddNew(const Context& cntx, std::string_view key, PrimeValue obj,\n                                uint64_t expire_at_ms);\n\n  // Update entry expiration. Return epxiration timepoint in abs milliseconds, or -1 if the entry\n  // already expired and was deleted;\n  facade::OpResult<int64_t> UpdateExpire(const Context& cntx, Iterator prime_it, ExpIterator exp_it,\n                                         const ExpireParams& params);\n\n  // Adds expiry on a key. If the key already has expiry, updates it.\n  void AddExpire(DbIndex db_ind, const Iterator& main_it, uint64_t at);\n\n  // Removes expiry from a key. Returns true if expiry existed and was removed.\n  bool RemoveExpire(DbIndex db_ind, const Iterator& main_it);\n\n  // Returns false if no action was taken, true if the mc flag was set or removed.\n  bool SetMCFlag(DbIndex db_ind, const PrimeKey& key, uint32_t flag);\n\n  uint32_t GetMCFlag(DbIndex db_ind, const PrimeKey& key) const;\n\n  // Creates a database with index `db_ind`. If such database exists does nothing.\n  void ActivateDb(DbIndex db_ind);\n\n  // Deletes the iterator. The iterator must be valid.\n  // Context argument is used only for document removal and it just needs\n  // timestamp field. Last argument, db_table, is optional and is used only in FlushSlotsCb.\n  // If async is set, AsyncDeleter will enqueue deletion of the object\n  void Del(Context cntx, Iterator it, DbTable* db_table = nullptr, bool async = false);\n\n  // Deletes a key after FindMutable(). Runs post_updater before deletion\n  // to update memory accounting while the key is still valid.\n  // Takes ownership of it_updater (pass by value with move semantics).\n  void DelMutable(Context cntx, ItAndUpdater it_updater);\n\n  constexpr static DbIndex kDbAll = 0xFFFF;\n\n  // Flushes db_ind or all databases if kDbAll is passed\n  util::fb2::Fiber FlushDb(DbIndex db_ind);\n\n  // Flushes the data of given slot ranges.\n  void FlushSlots(const cluster::SlotRanges& slot_ranges);\n\n  EngineShard* shard_owner() const {\n    return owner_;\n  }\n\n  ShardId shard_id() const {\n    return shard_id_;\n  }\n\n  void OnCbFinishBlocking();\n\n  bool Acquire(IntentLock::Mode m, const KeyLockArgs& lock_args);\n  void Release(IntentLock::Mode m, const KeyLockArgs& lock_args);\n\n  // Returns true if the key can be locked under m. Does not lock.\n  bool CheckLock(IntentLock::Mode mode, DbIndex dbid, uint64_t fp) const;\n  bool CheckLock(IntentLock::Mode mode, DbIndex dbid, std::string_view key) const {\n    return CheckLock(mode, dbid, LockTag(key).Fingerprint());\n  }\n\n  size_t db_array_size() const {\n    return db_arr_.size();\n  }\n\n  bool IsDbValid(DbIndex id) const {\n    return id < db_arr_.size() && bool(db_arr_[id]);\n  }\n\n  auto CopyDBTablePtr(DbIndex id) {\n    return db_arr_[id];\n  }\n\n  DbTable* GetDBTable(DbIndex id) {\n    return db_arr_[id].get();\n  }\n\n  const DbTable* GetDBTable(DbIndex id) const {\n    return db_arr_[id].get();\n  }\n\n  std::pair<PrimeTable*, ExpireTable*> GetTables(DbIndex id) {\n    return std::pair<PrimeTable*, ExpireTable*>(&db_arr_[id]->prime, nullptr);\n  }\n\n  // Returns existing keys count in the db.\n  size_t DbSize(DbIndex db_ind) const;\n\n  DbTableStats* MutableStats(DbIndex db_ind) {\n    return &db_arr_[db_ind]->stats;\n  }\n\n  // Check whether 'it' has not expired. Returns it if it's still valid. Otherwise, erases it\n  // from both tables and return Iterator{}.\n  struct ItAndExp {\n    Iterator it;\n    ExpIterator exp_it;\n  };\n  ItAndExp ExpireIfNeeded(const Context& cntx, Iterator it) const;\n\n  // Iterate over all expire table entries and delete expired.\n  void ExpireAllIfNeeded();\n\n  // Current version of this slice.\n  // We maintain a shared versioning scheme for all databases in the slice.\n  uint64_t version() const {\n    return version_;\n  }\n\n  size_t table_memory() const {\n    return table_memory_;\n  }\n\n  size_t entries_count() const {\n    return entries_count_;\n  }\n\n  using ChangeCallback = std::function<void(DbIndex, const ChangeReq&)>;\n  // Holds pairs of source and destination cursors for items moved in the dash table\n  using MovedItemsVec = std::vector<std::pair<PrimeTable::Cursor, PrimeTable::Cursor>>;\n  using MovedCallback = std::function<void(DbIndex, const MovedItemsVec&)>;\n\n  //! Registers the callback to be called for each change.\n  //! Returns the registration id which is also the unique version of the dbslice\n  //! at a time of the call.\n  uint64_t RegisterOnChange(ChangeCallback cb);\n\n  //! Registers the callback to be called after items are moved in table.\n  //! Returns the registration id which is also the unique version of the dbslice\n  //! at a time of the call.\n  uint64_t RegisterOnMove(MovedCallback cb);\n\n  bool HasRegisteredCallbacks() const {\n    return !change_cb_.empty();\n  }\n\n  // Call registered callbacks with version less than upper_bound.\n  void FlushChangeToEarlierCallbacks(DbIndex db_ind, Iterator it, uint64_t upper_bound);\n\n  //! Unregisters the callback.\n  void UnregisterOnChange(uint64_t id);\n\n  void UnregisterOnMoved(uint64_t id);\n\n  struct DeleteExpiredStats {\n    uint32_t deleted = 0;        // number of deleted items due to expiry.\n    uint32_t deleted_bytes = 0;  // total bytes of deleted items.\n    uint32_t traversed = 0;      // total number of traversed entries in the prime table.\n  };\n\n  // Deletes some amount of possible expired items.\n  DeleteExpiredStats DeleteExpiredStep(const Context& cntx, unsigned count);\n\n  // Evicts items with dynamically allocated data from the primary table.\n  // Does not shrink tables.\n  // Returns number of (elements,bytes) freed due to evictions.\n  std::pair<uint64_t, size_t> FreeMemWithEvictionStepAtomic(DbIndex db_indx, const Context& cntx,\n                                                            size_t starting_segment_id,\n                                                            size_t increase_goal_bytes);\n\n  int32_t GetNextSegmentForEviction(int32_t segment_id, DbIndex db_ind) const;\n\n  const DbTableArray& databases() const {\n    return db_arr_;\n  }\n\n  void TEST_EnableCacheMode() {\n    cache_mode_ = 1;\n  }\n\n  bool IsCacheMode() const {\n    // During loading time we never bump elements.\n    return cache_mode_ && (load_ref_count_ == 0);\n  }\n\n  void IncrLoadInProgress() {\n    ++load_ref_count_;\n  }\n\n  void DecrLoadInProgress() {\n    --load_ref_count_;\n  }\n\n  bool IsLoadRefCountZero() const {\n    return load_ref_count_ == 0;\n  }\n\n  // Test hook to inspect last locked keys.\n  const auto& TEST_GetLastLockedFps() const {\n    return uniq_fps_;\n  }\n\n  // Register key to be watched - when touched, set dirty_ptr to true\n  void RegisterWatchedKey(DbIndex db_indx, std::string_view key, std::atomic_bool* dirty_ptr);\n\n  // Unregisted all watched key for given dirty_ptr\n  void UnregisterConnectionWatches(absl::Span<const std::pair<DbIndex, std::string>> keys,\n                                   const std::atomic_bool* dirty_ptr);\n\n  void SetDocDeletionCallback(DocDeletionCallback ddcb);\n\n  // Resets the event counter for updates/insertions\n  void ResetUpdateEvents();\n\n  // Resets events_ member. Used by CONFIG RESETSTAT\n  void ResetEvents();\n\n  // Controls the expiry/eviction state. The server may enter states where\n  // Both evictions and expiries will be stopped for a short period of time.\n  void SetExpireAllowed(bool is_allowed) {\n    expire_allowed_ = is_allowed;\n  }\n\n  // Track keys for the client represented by the the weak reference to its connection.\n  void TrackKey(const facade::ConnectionRef& conn_ref, std::string_view key) {\n    client_tracking_map_[key].insert(conn_ref);\n  }\n\n  // Does not check for non supported events. Callers must parse the string and reject it\n  // if it's not empty and not EX.\n  void SetNotifyKeyspaceEvents(std::string_view notify_keyspace_events);\n\n  bool WillBlockOnJournalWrite() const {\n    return serialization_latch_.IsBlocked();\n  }\n\n  LocalLatch* GetLatch() {\n    return &serialization_latch_;\n  }\n\n  void StartSampleTopK(DbIndex db_ind, uint32_t min_freq);\n\n  struct SamplingResult {\n    std::vector<std::pair<std::string, uint64_t>> top_keys;  // key -> frequency pairs.\n    uint64_t total_samples = 0;                              // Total number of keys sampled.\n  };\n  SamplingResult StopSampleTopK(DbIndex db_ind);\n\n  void StartSampleKeys(DbIndex db_ind);\n\n  // Returns number of unique keys sampled.\n  struct UniqueSampleResult {\n    uint64_t unique_keys_count = 0;  // Number of unique keys sampled.\n    uint64_t total_samples = 0;      // Total number of keys sampled.\n  };\n  UniqueSampleResult StopSampleKeys(DbIndex db_ind);\n\n  void StartSampleValues(DbIndex db_ind);\n\n  // Returns a histogram of sampled values.\n  std::unique_ptr<base::Histogram> StopSampleValues(DbIndex db_ind);\n\n private:\n  void PreUpdateBlocking(DbIndex db_ind, const Iterator& it);\n  void PostUpdate(DbIndex db_ind, std::string_view key);\n\n  OpResult<ItAndUpdater> AddOrUpdateInternal(const Context& cntx, std::string_view key,\n                                             PrimeValue obj, uint64_t expire_at_ms,\n                                             bool force_update);\n\n  void FlushSlotsFb(const cluster::SlotSet& slot_ids);\n  util::fb2::Fiber FlushDbIndexes(const std::vector<DbIndex>& indexes);\n\n  // Invalidate all watched keys in database. Used on FLUSH.\n  void InvalidateDbWatches(DbIndex db_indx);\n\n  // Invalidate all watched keys for given slots. Used on FlushSlots.\n  void InvalidateSlotWatches(const cluster::SlotSet& slot_ids);\n\n  // Clear tiered storage entries for the specified indices. Called during flushing some indices.\n  void RemoveOffloadedEntriesFromTieredStorage(absl::Span<const DbIndex> indices,\n                                               const DbTableArray& db_arr) const;\n\n  void PerformDeletionAtomic(const Iterator& del_it, const ExpIterator& exp_it, DbTable* table,\n                             bool async = false);\n\n  // Queues invalidation message to the clients that are tracking the change to a key.\n  void QueueInvalidationTrackingMessageAtomic(std::string_view key);\n  void SendQueuedInvalidationMessages();\n  void SendQueuedInvalidationMessagesAsync();\n\n  void CreateDb(DbIndex index);\n\n  enum class UpdateStatsMode : uint8_t {\n    kReadStats,\n    kMutableStats,\n  };\n\n  struct PrimeItAndExp {\n    PrimeIterator it;\n    ExpireIterator exp_it;\n  };\n\n  PrimeItAndExp ExpireIfNeeded(const Context& cntx, PrimeIterator it) const;\n\n  OpResult<ItAndUpdater> AddOrFindInternal(const Context& cntx, std::string_view key,\n                                           std::optional<unsigned> req_obj_type);\n\n  OpResult<PrimeItAndExp> FindInternal(const Context& cntx, std::string_view key,\n                                       std::optional<unsigned> req_obj_type,\n                                       UpdateStatsMode stats_mode) const;\n  OpResult<ItAndUpdater> FindMutableInternal(const Context& cntx, std::string_view key,\n                                             std::optional<unsigned> req_obj_type);\n\n  uint64_t NextVersion() {\n    return version_++;\n  }\n\n  void CallChangeCallbacks(DbIndex id, const ChangeReq& cr) const;\n  void CallMovedCallbacks(DbIndex id, const MovedItemsVec& moved_items);\n\n  // We need this because registered callbacks might yield and when they do so we want\n  // to avoid Heartbeat or Flushing the db.\n  // This latch protects us against this case.\n  mutable LocalLatch serialization_latch_;\n\n  ShardId shard_id_;\n  uint8_t cache_mode_ : 1;\n\n  EngineShard* owner_;\n\n  int64_t expire_base_[2];  // Used for expire logic, represents a real clock.\n  bool expire_allowed_ = true;\n\n  uint64_t version_ = 1;  // Used to version entries in the PrimeTable.\n  uint64_t next_moved_id_ = 1;\n\n  // Estimation of available memory dedicated to this shard.\n  // Recalculated periodically by dividing free memory left among all shards equally\n  ssize_t memory_budget_ = SSIZE_MAX / 2;\n  size_t bytes_per_object_ = 0;\n\n  size_t table_memory_ = 0;\n  uint64_t entries_count_ = 0;\n  unsigned load_ref_count_ = 0;\n\n  mutable SliceEvents events_;  // we may change this even for const operations.\n\n  DbTableArray db_arr_;\n\n  // key for bump up items pair contains <key hash, db_index>\n  using FetchedItemKey = std::pair<uint64_t, DbIndex>;\n\n  struct FpHasher {\n    size_t operator()(uint64_t val) const {\n      return val;\n    }\n    size_t operator()(const FetchedItemKey& val) const {\n      return val.first;\n    }\n  };\n\n  // Used in temporary computations in Acquire/Release.\n  mutable absl::flat_hash_set<uint64_t, FpHasher> uniq_fps_;\n\n  // ordered from the smallest to largest version.\n  std::list<std::pair<uint64_t, ChangeCallback>> change_cb_;\n\n  std::list<std::pair<uint32_t, MovedCallback>> moved_cb_;\n\n  // Used in temporary computations in Find item and CbFinish\n  // This set is used to hold fingerprints of key accessed during the run of\n  // a transaction callback (not the whole transaction).\n  // We track them to avoid bumping them again (in any direction) so that the iterators to\n  // the fetched keys will not be invalidated. We must do it for atomic operations,\n  // for operations that preempt in the middle we have another mechanism -\n  // auto laundering iterators, so in case of preemption we do not mind that fetched_items are\n  // cleared or changed.\n  mutable absl::flat_hash_set<FetchedItemKey, FpHasher> fetched_items_;\n\n  // Registered by shard indices on when first document index is created.\n  DocDeletionCallback doc_del_cb_;\n\n  // Record whenever a key expired to DbTable::expired_keys_events_ for keyspace notifications\n  bool expired_keys_events_recording_ = true;\n\n  struct Hash {\n    size_t operator()(const facade::ConnectionRef& c) const {\n      return std::hash<uint32_t>()(c.GetClientId());\n    }\n  };\n\n  // the following type definitions are confusing, and they are for achieving memory\n  // usage tracking for client_tracking_map_ data structure through C++'s memory resource and\n  // and polymorphic allocator (new C++ features)\n  // the declarations below meant to say:\n  // absl::flat_hash_map<std::string,\n  //                    absl::flat_hash_set<facade::Connection::WeakRef, Hash>>\n  //                    client_tracking_map_\n  using HashSetAllocator = PMR_NS::polymorphic_allocator<facade::ConnectionRef>;\n\n  using ConnectionHashSet =\n      absl::flat_hash_set<facade::ConnectionRef, Hash,\n                          absl::container_internal::hash_default_eq<facade::ConnectionRef>,\n                          HashSetAllocator>;\n\n  using AllocatorType = PMR_NS::polymorphic_allocator<std::pair<std::string, ConnectionHashSet>>;\n\n  using TrackingMap =\n      absl::flat_hash_map<std::string, ConnectionHashSet,\n                          absl::container_internal::hash_default_hash<std::string>,\n                          absl::container_internal::hash_default_eq<std::string>, AllocatorType>;\n  TrackingMap client_tracking_map_, pending_send_map_;\n\n  void SendQueuedInvalidationMessagesCb(const TrackingMap& track_map, unsigned idx) const;\n\n  class PrimeBumpPolicy;\n};\n\ninline bool IsValid(const DbSlice::Iterator& it) {\n  return dfly::IsValid(it.GetInnerIt());\n}\n\ninline bool IsValid(const DbSlice::ConstIterator& it) {\n  return dfly::IsValid(it.GetInnerIt());\n}\n\ninline bool IsValid(const DbSlice::ExpIterator& it) {\n  return dfly::IsValid(it.GetInnerIt());\n}\n\ninline bool IsValid(const DbSlice::ExpConstIterator& it) {\n  return dfly::IsValid(it.GetInnerIt());\n}\n\ntemplate <typename T> void DbSlice::IteratorT<T>::LaunderIfNeeded() const {\n  if (!dfly::IsValid(it_)) {\n    return;\n  }\n\n  uint64_t current_epoch = util::fb2::FiberSwitchEpoch();\n  if (current_epoch != fiber_epoch_) {\n    if (!it_.IsOccupied() || it_->first != key_.view()) {\n      it_ = it_.owner().Find(key_.view());\n    }\n    fiber_epoch_ = current_epoch;\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/debugcmd.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"server/debugcmd.h\"\n\n#include \"core/detail/gen_utils.h\"\n\n#define HUF_STATIC_LINKING_ONLY\n\nextern \"C\" {\n#include \"huff/hist.h\"\n#include \"huff/huf.h\"\n#include \"redis/redis_aux.h\"\n}\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/random/random.h>\n#include <absl/strings/escaping.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <lz4.h>\n#include <zdict.h>\n#include <zstd.h>\n\n#include <algorithm>\n#include <filesystem>\n#include <numeric>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/huff_coder.h\"\n#include \"core/qlist.h\"\n#include \"core/sorted_map.h\"\n#include \"core/string_map.h\"\n#include \"core/string_set.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"server/blocking_controller.h\"\n#include \"server/container_utils.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/main_service.h\"\n#include \"server/multi_command_squasher.h\"\n#include \"server/namespaces.h\"\n#include \"server/rdb_load.h\"\n#include \"server/server_state.h\"\n#include \"server/string_stats.h\"\n#include \"server/transaction.h\"\n\nusing namespace std;\n\nABSL_DECLARE_FLAG(string, dir);\nABSL_DECLARE_FLAG(string, dbfilename);\nABSL_DECLARE_FLAG(bool, df_snapshot_format);\n\nABSL_FLAG(bool, background_debug_jobs, false, \"Use background fibers for debug jobs\");\n\nnamespace dfly {\n\nusing namespace util;\nusing boost::intrusive_ptr;\nusing namespace facade;\nusing absl::StrAppend;\nusing absl::StrCat;\n\nnamespace {\n\nstruct ObjInfo {\n  unsigned type = 0;\n  unsigned encoding;\n  unsigned bucket_id = 0;\n  unsigned slot_id = 0;\n\n  // for lists - how many nodes do they have.\n  unsigned num_nodes = 0;\n  unsigned num_compressed = 0;\n\n  enum LockStatus : uint8_t { NONE, S, X } lock_status = NONE;\n\n  int64_t ttl = INT64_MAX;\n  optional<uint32_t> external_len;\n\n  bool has_sec_precision = false;\n  bool found = false;\n};\n\nstruct ValueCompressInfo {\n  size_t raw_size = 0;\n  size_t compressed_size = 0;\n};\n\nstd::string GenerateValue(size_t val_size, bool random_value, absl::InsecureBitGen* gen) {\n  if (random_value) {\n    return GetRandomHex(*gen, val_size);\n  } else {\n    return string(val_size, 'x');\n  }\n}\n\ntuple<const CommandId*, absl::InlinedVector<string, 5>> GeneratePopulateCommand(\n    string_view type, std::string key, size_t val_size, bool random_value, uint32_t elements,\n    const CommandRegistry& registry, absl::InsecureBitGen* gen) {\n  absl::InlinedVector<string, 5> args;\n  args.push_back(std::move(key));\n\n  const CommandId* cid = nullptr;\n  if (type == \"STRING\") {\n    cid = registry.Find(\"SET\");\n    args.push_back(GenerateValue(val_size, random_value, gen));\n  } else if (type == \"LIST\") {\n    cid = registry.Find(\"LPUSH\");\n    for (uint32_t i = 0; i < elements; ++i) {\n      args.push_back(GenerateValue(val_size, random_value, gen));\n    }\n  } else if (type == \"SET\") {\n    cid = registry.Find(\"SADD\");\n    for (size_t i = 0; i < elements; ++i) {\n      args.push_back(GenerateValue(val_size, random_value, gen));\n    }\n  } else if (type == \"HASH\") {\n    cid = registry.Find(\"HSET\");\n    for (size_t i = 0; i < elements; ++i) {\n      args.push_back(GenerateValue(val_size / 2, random_value, gen));\n      args.push_back(GenerateValue(val_size / 2, random_value, gen));\n    }\n  } else if (type == \"ZSET\") {\n    cid = registry.Find(\"ZADD\");\n    for (size_t i = 0; i < elements; ++i) {\n      args.push_back(StrCat((*gen)() % val_size));\n      args.push_back(GenerateValue(val_size, random_value, gen));\n    }\n  } else if (type == \"JSON\") {\n    cid = registry.Find(\"JSON.MERGE\");\n    args.push_back(\"$\");\n\n    string json = \"{\";\n    for (size_t i = 0; i < elements; ++i) {\n      absl::StrAppend(&json, \"\\\"\", GenerateValue(val_size / 2, random_value, gen), \"\\\":\\\"\",\n                      GenerateValue(val_size / 2, random_value, gen), \"\\\",\");\n    }\n    json[json.size() - 1] = '}';  // Replace last ',' with '}'\n    args.push_back(json);\n  } else if (type == \"STREAM\") {\n    cid = registry.Find(\"XADD\");\n    args.push_back(\"*\");\n    for (size_t i = 0; i < elements; ++i) {\n      args.push_back(GenerateValue(val_size / 2, random_value, gen));\n      args.push_back(GenerateValue(val_size / 2, random_value, gen));\n    }\n  }\n\n  return {cid, args};\n}\n\nstruct ObjHist {\n  base::Histogram key_len;\n  base::Histogram val_len;    // overall malloc-used size of the value.\n  base::Histogram card;       // for sets, hashmaps etc - it's number of entries.\n  base::Histogram entry_len;  // for sets, hashmaps etc - it's the length of each entry.\n  base::Histogram listpack;   // for listpack encodings - the malloc used of the listpack.\n};\n\n// Returns number of O(1) steps executed.\nvoid AddObjHist(PrimeIterator it, ObjHist* hist) {\n  using namespace container_utils;\n  const PrimeValue& pv = it->second;\n  size_t val_len = 0;\n\n  auto per_entry_cb = [&](ContainerEntry entry) {\n    if (entry.IsString()) {\n      val_len += entry.size();\n      hist->entry_len.Add(entry.size());\n    } else {\n      val_len += 8;  // size of long\n    }\n    return true;\n  };\n\n  hist->key_len.Add(it->first.MallocUsed());\n\n  if (pv.ObjType() == OBJ_LIST) {\n    IterateList(pv, per_entry_cb);\n    if (pv.Encoding() == kEncodingQL2) {\n      const QList* ql = static_cast<QList*>(pv.RObjPtr());\n      val_len = ql->MallocUsed(true);\n    } else if (pv.Encoding() == kEncodingListPack) {\n      val_len = pv.MallocUsed();\n      hist->listpack.Add(val_len);\n    }\n  } else if (pv.ObjType() == OBJ_ZSET) {\n    IterateSortedSet(pv, [&](ContainerEntry entry, double) { return per_entry_cb(entry); });\n    val_len = 0;  // reset - will be calculated below.\n    if (pv.Encoding() == OBJ_ENCODING_LISTPACK) {\n      hist->listpack.Add(pv.MallocUsed());\n    }\n  } else if (pv.ObjType() == OBJ_SET) {\n    IterateSet(pv, per_entry_cb);\n    val_len = 0;  // reset - will be calculated below.\n    if (pv.Encoding() == kEncodingIntSet) {\n      hist->listpack.Add(pv.MallocUsed());\n    }\n  } else if (pv.ObjType() == OBJ_HASH) {\n    IterateMap(pv, [&](ContainerEntry key, ContainerEntry value) {\n      hist->entry_len.Add(key.size() + value.size());\n      return true;\n    });\n    if (pv.Encoding() == kEncodingListPack) {\n      hist->listpack.Add(pv.MallocUsed());\n    }\n  }\n  // TODO: streams\n\n  if (val_len == 0) {\n    // Fallback\n    val_len = pv.MallocUsed(true);\n  }\n\n  hist->val_len.Add(val_len);\n\n  if (pv.ObjType() != OBJ_STRING && pv.ObjType() != OBJ_JSON)\n    hist->card.Add(pv.Size());\n}\n\n// ObjType -> ObjHist\n//\nusing ObjHistMap = absl::flat_hash_map<unsigned, unique_ptr<ObjHist>>;\n\nvoid MergeObjHistMap(ObjHistMap&& src, ObjHistMap* dest) {\n  for (auto& [obj_type, src_hist] : src) {\n    auto& dest_hist = (*dest)[obj_type];\n    if (!dest_hist) {\n      dest_hist = std::move(src_hist);\n    } else {\n      dest_hist->key_len.Merge(src_hist->key_len);\n      dest_hist->val_len.Merge(src_hist->val_len);\n      dest_hist->card.Merge(src_hist->card);\n      dest_hist->entry_len.Merge(src_hist->entry_len);\n      dest_hist->listpack.Merge(src_hist->listpack);\n    }\n  }\n}\n\nstruct SegmentInfo {\n  base::Histogram hist;\n};\n\nvoid DoSegmentHist(EngineShard* shard, ConnectionContext* cntx, SegmentInfo* info) {\n  auto& db_slice = cntx->ns->GetDbSlice(shard->shard_id());\n  DbTable* dbt = db_slice.GetDBTable(cntx->db_index());\n  if (dbt == nullptr)\n    return;\n\n  unsigned steps = 0;\n  auto& prime = dbt->prime;\n  for (size_t i = 0; i < prime.GetSegmentCount(); i = prime.NextSeg(i)) {\n    const auto* segment = prime.GetSegment(i);\n\n    info->hist.Add(segment->SlowSize());\n    if (++steps % 2000 == 0) {\n      ThisFiber::Yield();\n    }\n  }\n}\n\nstruct HufHist {\n  static constexpr unsigned kMaxSymbol = 255;\n  array<unsigned, kMaxSymbol + 1> hist;  // histogram of symbols.\n  unsigned max_symbol = 0;               // what is the max symbol of the histogram.\n\n  HufHist() {\n    hist.fill(0);\n  }\n\n  void Merge(const HufHist& other) {\n    max_symbol = std::max(max_symbol, other.max_symbol);\n    for (unsigned i = 0; i <= max_symbol; ++i) {\n      hist[i] += other.hist[i];\n    }\n  }\n\n  unsigned MaxFreqCount() const;\n};\n\nunsigned HufHist::MaxFreqCount() const {\n  unsigned max_freq = 0;\n  for (unsigned i = 0; i < kMaxSymbol; ++i) {\n    if (hist[i] > max_freq) {\n      max_freq = hist[i];\n    }\n  }\n  return max_freq;\n}\n\nconstexpr unsigned kMaxFreqPerShard = 1U << 20;\nconstexpr unsigned kMaxFreqTotal = static_cast<unsigned>((1U << 31) * 0.9);\n\nvoid DoComputeHist(CompactObjType type, EngineShard* shard, ConnectionContext* cntx,\n                   HufHist* dest) {\n  auto& db_slice = cntx->ns->GetDbSlice(shard->shard_id());\n  DbTable* dbt = db_slice.GetDBTable(cntx->db_index());\n  CHECK(dbt);\n\n  PrimeTable::Cursor cursor;\n  unsigned steps = 0;\n  string scratch;\n  constexpr size_t kMaxLen = 512;\n  PrimeTable& table = dbt->prime;\n\n  do {\n    cursor = table.Traverse(cursor, [&](PrimeIterator it) {\n      scratch.clear();\n      ++steps;\n      if (type == kInvalidCompactObjType) {  // KEYSPACE\n        if (it->first.MallocUsed() > 0) {\n          it->first.GetString(&scratch);\n        }\n      } else if (type == OBJ_STRING && it->second.ObjType() == OBJ_STRING) {\n        if (it->second.MallocUsed() > 0) {\n          it->second.GetString(&scratch);\n        }\n      } else if (type == OBJ_ZSET && it->second.ObjType() == OBJ_ZSET) {\n        container_utils::IterateSortedSet(\n            it->second, [&](container_utils::ContainerEntry entry, double) {\n              ++steps;\n              if (entry.IsString()) {\n                HIST_add(dest->hist.data(), entry.data(), entry.size());\n              }\n              return true;\n            });\n      } else if (type == OBJ_LIST && it->second.ObjType() == OBJ_LIST) {\n        container_utils::IterateList(it->second, [&](container_utils::ContainerEntry entry) {\n          ++steps;\n          if (entry.IsString()) {\n            HIST_add(dest->hist.data(), entry.data(), entry.size());\n          }\n          return true;\n        });\n      } else if (type == OBJ_HASH && it->second.ObjType() == OBJ_HASH) {\n        container_utils::IterateMap(it->second, [&](container_utils::ContainerEntry key,\n                                                    container_utils::ContainerEntry value) {\n          ++steps;\n          if (key.IsString()) {\n            HIST_add(dest->hist.data(), key.data(), key.size());\n          }\n          if (value.IsString()) {\n            HIST_add(dest->hist.data(), value.data(), value.size());\n          }\n          return true;\n        });\n      }\n\n      if (!scratch.empty()) {\n        size_t len = std::min(scratch.size(), kMaxLen);\n        HIST_add(dest->hist.data(), scratch.data(), len);\n      }\n    });\n\n    if (steps >= 40000) {\n      if (dest->MaxFreqCount() > kMaxFreqPerShard) {\n        break;\n      }\n\n      steps = 0;\n      ThisFiber::Yield();\n    }\n  } while (cursor);\n  dest->max_symbol = HufHist::kMaxSymbol;\n  while (dest->max_symbol && dest->hist[dest->max_symbol] == 0)\n    --dest->max_symbol;\n}\n\nObjInfo InspectOp(ConnectionContext* cntx, string_view key) {\n  auto& db_slice = cntx->ns->GetCurrentDbSlice();\n  auto db_index = cntx->db_index();\n  auto* pt = db_slice.GetTables(db_index).first;\n\n  PrimeIterator it = pt->Find(key);\n  ObjInfo oinfo;\n  if (IsValid(it)) {\n    const PrimeValue& pv = it->second;\n\n    oinfo.found = true;\n    oinfo.type = pv.ObjType();\n    oinfo.encoding = pv.Encoding();\n    oinfo.bucket_id = it.bucket_id();\n    oinfo.slot_id = it.slot_id();\n\n    if (pv.ObjType() == OBJ_LIST && pv.Encoding() == kEncodingQL2) {\n      const QList* qlist = static_cast<const QList*>(pv.RObjPtr());\n      oinfo.num_nodes = qlist->node_count();\n      auto* node = qlist->Head();\n\n      while (node) {\n        if (node->encoding == QUICKLIST_NODE_ENCODING_LZF) {\n          ++oinfo.num_compressed;\n        }\n        node = node->next;\n      }\n    }\n\n    if (pv.IsExternal()) {\n      oinfo.external_len.emplace(pv.GetExternalSlice().second);\n    }\n\n    if (it->first.HasExpire()) {\n      time_t exp_time = it->first.GetExpireTime();\n      oinfo.ttl = exp_time - GetCurrentTimeMs();\n      oinfo.has_sec_precision = false;  // Embedded TTL is always ms precision.\n    }\n  }\n\n  if (!db_slice.CheckLock(IntentLock::EXCLUSIVE, db_index, key)) {\n    oinfo.lock_status =\n        db_slice.CheckLock(IntentLock::SHARED, db_index, key) ? ObjInfo::S : ObjInfo::X;\n  }\n\n  return oinfo;\n}\n\nOpResult<ValueCompressInfo> EstimateCompression(ConnectionContext* cntx, string_view key) {\n  auto& db_slice = cntx->ns->GetCurrentDbSlice();\n  auto db_index = cntx->db_index();\n  auto* pt = db_slice.GetTables(db_index).first;\n\n  PrimeIterator it = pt->Find(key);\n  if (!IsValid(it)) {\n    return OpStatus::KEY_NOTFOUND;\n  }\n\n  // Only strings are supported right now.\n  if (it->second.ObjType() != OBJ_STRING && it->second.ObjType() != OBJ_LIST) {\n    return OpStatus::WRONG_TYPE;\n  }\n  ValueCompressInfo info;\n\n  if (it->second.ObjType() == OBJ_LIST) {\n    if (it->second.Encoding() != kEncodingQL2) {\n      return OpStatus::WRONG_TYPE;\n    }\n\n    const QList* src = static_cast<const QList*>(it->second.RObjPtr());\n    info.raw_size = src->MallocUsed(true);\n    QList qlist(-2, 1);\n    auto copy_cb = [&](QList::Entry entry) {\n      qlist.Push(entry.view(), QList::HEAD);\n      return true;\n    };\n    src->Iterate(copy_cb, 0, -1);\n    info.compressed_size = qlist.MallocUsed(true);\n    return info;\n  }\n\n  string scratch;\n  string_view value = it->second.GetSlice(&scratch);\n\n  info.raw_size = value.size();\n  info.compressed_size = info.raw_size;\n\n  if (info.raw_size >= 32) {\n    size_t compressed_size = ZSTD_compressBound(value.size());\n    unique_ptr<char[]> compressed(new char[compressed_size]);\n    info.compressed_size =\n        ZSTD_compress(compressed.get(), compressed_size, value.data(), value.size(), 5);\n  }\n\n  return info;\n};\n\nconst char* EncodingName(unsigned obj_type, unsigned encoding) {\n  switch (obj_type) {\n    case OBJ_STRING:\n      return \"raw\";\n    case OBJ_LIST:\n      switch (encoding) {\n        case kEncodingQL2:\n          return \"quicklist\";\n        case kEncodingListPack:\n          return \"listpack\";\n      }\n      break;\n    case OBJ_SET:\n      ABSL_FALLTHROUGH_INTENDED;\n    case OBJ_ZSET:\n      ABSL_FALLTHROUGH_INTENDED;\n    case OBJ_HASH:\n      switch (encoding) {\n        case kEncodingIntSet:\n          return \"intset\";\n        case kEncodingStrMap2:\n          return \"dense_set\";\n        case OBJ_ENCODING_SKIPLIST:  // we kept the old enum for zset\n          return \"btree\";\n        case OBJ_ENCODING_LISTPACK:\n          ABSL_FALLTHROUGH_INTENDED;\n        case kEncodingListPack:\n          return \"listpack\";\n      }\n      break;\n    case OBJ_JSON:\n      switch (encoding) {\n        case kEncodingJsonCons:\n          return \"jsoncons\";\n        case kEncodingJsonFlat:\n          return \"jsonflat\";\n      }\n      break;\n    case OBJ_STREAM:\n      return \"stream\";\n  }\n  return \"unknown\";\n}\n\nstruct IOStat {\n  uint64_t conn_received = 0;\n  uint64_t curr_conn_count = 0;\n  uint64_t cmd_total = 0, pipelined_cmd_total = 0;\n  size_t io_read_bytes = 0;\n  uint64_t io_reads_total = 0;\n\n  void From(const facade::FacadeStats& fs);\n  void Print(RedisReplyBuilder* rb) const;\n\n  IOStat& operator-=(const IOStat& other);\n};\n\nvoid IOStat::From(const facade::FacadeStats& fs) {\n  conn_received = fs.conn_stats.conn_received_cnt;\n  curr_conn_count = fs.conn_stats.num_conns_main;\n  cmd_total = fs.conn_stats.command_cnt_main;\n  pipelined_cmd_total = fs.conn_stats.pipelined_cmd_cnt;\n  io_read_bytes = fs.conn_stats.io_read_bytes;\n  io_reads_total = fs.conn_stats.io_read_cnt;\n}\n\nvoid IOStat::Print(RedisReplyBuilder* rb) const {\n  rb->StartCollection(6, CollectionType::MAP);\n  rb->SendSimpleString(\"connections_received\");\n  rb->SendLong(conn_received);\n  rb->SendSimpleString(\"current_conn_count\");\n  rb->SendLong(curr_conn_count);\n  rb->SendSimpleString(\"commands_total\");\n  rb->SendLong(cmd_total);\n  rb->SendSimpleString(\"pipelined_commands_total\");\n  rb->SendLong(pipelined_cmd_total);\n  rb->SendSimpleString(\"io_read_bytes\");\n  rb->SendLong(io_read_bytes);\n  rb->SendSimpleString(\"io_reads_total\");\n  rb->SendLong(io_reads_total);\n}\n\nIOStat& IOStat::operator-=(const IOStat& other) {\n  conn_received -= other.conn_received;\n  curr_conn_count -= other.curr_conn_count;\n  cmd_total -= other.cmd_total;\n  pipelined_cmd_total -= other.pipelined_cmd_total;\n  io_read_bytes -= other.io_read_bytes;\n  io_reads_total -= other.io_reads_total;\n\n  return *this;\n}\n\n// Traverse over all entries on all databases, manage cpu time automatically\ntemplate <typename F> void TraverseAllEntries(bool background, ConnectionContext* cntx, F&& f) {\n  util::fb2::BlockingCounter bc{0};\n  for (uint32_t i = 0; i < shard_set->size(); ++i) {\n    bc->Add(1);\n    util::ProactorBase* dest = shard_set->pool()->at(i);\n\n    auto cb = [f /* copy per thread */, bc, cntx, background]() mutable {\n      auto* shard = EngineShard::tlocal();\n      auto& db_slice = cntx->ns->GetDbSlice(shard->shard_id());\n\n      for (unsigned i = 0; i < db_slice.db_array_size(); ++i) {\n        boost::intrusive_ptr<DbTable> dbt = db_slice.CopyDBTablePtr(i);\n        if (!dbt)\n          continue;\n\n        PrimeTable::Cursor cursor;\n        do {\n          cursor = dbt->prime.Traverse(cursor, f);\n          if (background) {\n            ThisFiber::Yield();\n          } else if (base::CycleClock::ToUsec(ThisFiber::GetRunningTimeCycles()) >= 500) {\n            ThisFiber::Yield();\n          }\n        } while (cursor);\n      }\n      bc->Dec();\n    };\n    dest->DispatchBrief([cb, background]() mutable {\n      using namespace util::fb2;\n      Fiber::Opts opts{\n          .priority = background ? FiberPriority::BACKGROUND : FiberPriority::NORMAL,\n          .name = \"Debug/Traverse\",\n      };\n      Fiber(opts, std::move(cb)).Detach();\n    });\n  }\n  bc->Wait();\n}\n\n}  // namespace\n\nDebugCmd::DebugCmd(ServerFamily* owner, cluster::ClusterFamily* cf, ConnectionContext* cntx)\n    : sf_(*owner), cf_(*cf), cntx_(cntx) {\n}\n\nvoid DebugCmd::Run(CmdArgList args, CommandContext* cmd_cntx) {\n  string subcmd = absl::AsciiStrToUpper(ArgS(args, 0));\n  if (subcmd == \"HELP\") {\n    string_view help_arr[] = {\n        \"DEBUG <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n        \"EXEC\",\n        \"    Show the descriptors of the MULTI/EXEC transactions that were processed by \",\n        \"    the server. For each EXEC/i descriptor, 'i' is the number of shards it touches. \",\n        \"    Each descriptor details the commands it contained followed by number of their \",\n        \"    arguments. Each descriptor is prefixed by its frequency count\",\n        \"OBJECT <key> [COMPRESS]\",\n        \"    Show low-level info about `key` and associated value.\",\n        \"RELOAD [option ...]\",\n        \"    Save the RDB on disk and reload it back to memory. Valid <option> values:\",\n        \"    * NOSAVE: the database will be loaded from an existing RDB file.\",\n        \"    Examples:\",\n        \"    * DEBUG RELOAD NOSAVE: replace the current database with the contents of an\",\n        \"      existing RDB file.\",\n        \"REPLICA PAUSE/RESUME\",\n        \"    Stops replica from reconnecting to master, or resumes\",\n        \"MIGRATION PAUSE/RESUME\",\n        \"    Stops/resumes incoming migration process only in the SYNC state\",\n        \"REPLICA OFFSET\",\n        \"    Return sync id and array of number of journal commands executed for each replica flow\",\n        \"WATCHED\",\n        \"    Shows the watched keys as a result of BLPOP and similar operations.\",\n        \"POPULATE <count> [prefix] [size] [RAND] [SLOTS start end] [TYPE type] [ELEMENTS elements]\"\n        \" [EXPIRE start end]\",\n        \"    Create <count> string keys named key:<num> with value value:<num>.\",\n        \"    If <prefix> is specified then it is used instead of the 'key' prefix.\",\n        \"    If <size> is specified then X character is concatenated multiple times to value:<num>\",\n        \"    to meet value size.\",\n        \"    If RAND is specified then value will be set to random hex string in specified size.\",\n        \"    If SLOTS is specified then create keys only in given slots range.\",\n        \"    TYPE specifies data type (must be STRING/LIST/SET/HASH/ZSET/JSON/STREAM), default \"\n        \"STRING.\",\n        \"    ELEMENTS specifies how many sub elements if relevant (like entries in a list / set).\",\n        \"    EXPIRE specifies key expire ttl range.\",\n        \"OBJHIST\",\n        \"    Prints histogram of object sizes.\",\n        \"STACKTRACE\",\n        \"    Prints the stacktraces of all current fibers to the logs.\",\n        \"SHARDS\",\n        \"    Prints memory usage and key stats per shard, as well as min/max indicators.\",\n        \"TOPK ON [min_freq] | OFF [max_keys]\",\n        \"    Turns on or off sampling of topk keys. Provides top keys with at least <min_freq> \",\n        \"    during the sampling period. The results are returned in descending order of frequency\",\n        \"    when calling TOPK OFF command. First result is the sampled keys count.\",\n        \"KEYS ON | OFF\",\n        \"    Turns on/off counting of unique keys. Results are returned when calling \",\n        \"    KEYS OFF command. The results is array with two integers: unique keys count and \",\n        \"    sampled keys count.\",\n        \"VALUES ON | OFF\",\n        \"    Turns on/off measurement of value length distribution. Results are returned when \",\n        \"    calling VALUES OFF command.\",\n        \"TX\",\n        \"    Performs transaction analysis per shard.\",\n        \"TRAFFIC <path>/<file_prefix> | [STOP]\",\n        \"    Use <path>/<file_prefix> to start traffic logging to the specified path.\",\n        \"    All recorded files will have the specified prefix.\",\n        \"    Use 'STOP' or do not specify any arguments to stop traffic logging.\",\n        \"RECVSIZE [<tid> | ENABLE | DISABLE]\",\n        \"    Prints the histogram of the received request sizes on the given thread\",\n        \"COMPRESSION [IMPORT <bintable> | EXPORT | SET <bintable>] [type]\",\n        \"    Estimate the compressibility of values of the given type. if no type is given, \",\n        \"    checks compressibility of keys. If IN is specified, then the provided \",\n        \"    bintable is used to check compressibility. If OUT is specified, then \",\n        \"    the serialized table is printed as well\",\n        \"IOSTATS [PS]\",\n        \"    Prints IO stats per thread. If PS is specified, prints thread-level stats \",\n        \"    per second.\",\n        \"SEGMENTS\",\n        \"    Prints segment info for the current database.\",\n        \"COMPACT-TABLE threshold\",\n        \"    Attempts to merge underutilized segments in dash table\",\n        \"UNIQ-STRS\",\n        \"    Prints per-object unique string stats and estimated dedup savings across shards.\",\n        \"HELP\",\n        \"    Prints this help.\",\n    };\n    auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n    return rb->SendSimpleStrArr(help_arr);\n  }\n\n  VLOG(1) << \"subcmd \" << subcmd;\n\n  if (subcmd == \"POPULATE\") {\n    return Populate(args, cmd_cntx);\n  }\n\n  if (subcmd == \"RELOAD\") {\n    return Reload(args, cmd_cntx);\n  }\n\n  if (subcmd == \"REPLICA\" && args.size() == 2) {\n    return Replica(args, cmd_cntx);\n  }\n\n  if (subcmd == \"MIGRATION\" && args.size() == 2) {\n    return Migration(args, cmd_cntx);\n  }\n\n  if (subcmd == \"WATCHED\") {\n    return Watched(cmd_cntx);\n  }\n\n  if (subcmd == \"OBJECT\" && args.size() >= 2) {\n    string_view key = ArgS(args, 1);\n    args.remove_prefix(2);\n    return Inspect(key, args, cmd_cntx);\n  }\n\n  if (subcmd == \"TX\") {\n    return TxAnalysis(cmd_cntx);\n  }\n\n  if (subcmd == \"OBJHIST\") {\n    return ObjHist(cmd_cntx);\n  }\n\n  if (subcmd == \"STACKTRACE\") {\n    return Stacktrace(cmd_cntx);\n  }\n\n  if (subcmd == \"SHARDS\") {\n    return Shards(cmd_cntx);\n  }\n\n  if (subcmd == \"EXEC\") {\n    return Exec(cmd_cntx);\n  }\n\n  if (subcmd == \"TRAFFIC\") {\n    return LogTraffic(args.subspan(1), cmd_cntx);\n  }\n\n  if (subcmd == \"RECVSIZE\" && args.size() == 2) {\n    return RecvSize(ArgS(args, 1), cmd_cntx);\n  }\n\n  if (subcmd == \"TOPK\" && args.size() >= 2) {\n    return Topk(args.subspan(1), cmd_cntx);\n  }\n\n  if (subcmd == \"KEYS\" && args.size() >= 2) {\n    return Keys(args.subspan(1), cmd_cntx);\n  }\n\n  if (subcmd == \"VALUES\" && args.size() >= 2) {\n    return Values(args.subspan(1), cmd_cntx);\n  }\n  if (subcmd == \"COMPRESSION\") {\n    return Compression(args.subspan(1), cmd_cntx);\n  }\n\n  if (subcmd == \"IOSTATS\") {\n    return IOStats(args.subspan(1), cmd_cntx);\n  }\n  if (subcmd == \"SEGMENTS\") {\n    return Segments(args.subspan(1), cmd_cntx);\n  }\n\n  if (subcmd == \"COMPACT-TABLE\") {\n    return CompactTable(args.subspan(1), cmd_cntx);\n  }\n\n  if (subcmd == \"UNIQ-STRS\") {\n    return CountUniqueStrings(cmd_cntx);\n  }\n\n  string reply = UnknownSubCmd(subcmd, \"DEBUG\");\n  return cmd_cntx->SendError(reply, kSyntaxErrType);\n}\n\nvoid DebugCmd::Shutdown() {\n  // disable traffic logging\n  shard_set->pool()->AwaitFiberOnAll([](auto*) { facade::Connection::StopTrafficLogging(); });\n}\n\nvoid DebugCmd::Reload(CmdArgList args, CommandContext* cmd_cntx) {\n  bool save = true;\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  for (size_t i = 1; i < args.size(); ++i) {\n    string opt = absl::AsciiStrToUpper(ArgS(args, i));\n    VLOG(1) << \"opt \" << opt;\n\n    if (opt == \"NOSAVE\") {\n      save = false;\n    } else {\n      return cmd_cntx->SendError(\"DEBUG RELOAD only supports the NOSAVE options.\");\n    }\n  }\n\n  if (save) {\n    string err_details;\n    VLOG(1) << \"Performing save\";\n\n    GenericError ec = sf_.DoSave();\n    if (ec) {\n      return cmd_cntx->SendError(ec.Format());\n    }\n  }\n\n  string last_save_file = sf_.GetLastSaveInfo().file_name;\n\n  sf_.FlushAll(cntx_->ns);\n\n  if (auto fut_ec = sf_.Load(last_save_file, ServerFamily::LoadExistingKeys::kFail); fut_ec) {\n    GenericError ec = fut_ec->Get();\n    if (ec) {\n      string msg = ec.Format();\n      LOG(WARNING) << \"Could not load file \" << msg;\n      return cmd_cntx->SendError(msg);\n    }\n  }\n\n  rb->SendOk();\n}\n\nvoid DebugCmd::Replica(CmdArgList args, CommandContext* cmd_cntx) {\n  args.remove_prefix(1);\n\n  string opt = absl::AsciiStrToUpper(ArgS(args, 0));\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (opt == \"PAUSE\" || opt == \"RESUME\") {\n    sf_.PauseReplication(opt == \"PAUSE\");\n    return rb->SendOk();\n  } else if (opt == \"OFFSET\") {\n    const auto offset_info = sf_.GetReplicaOffsetInfo();\n    if (offset_info) {\n      rb->StartArray(2);\n      rb->SendBulkString(offset_info.value().sync_id);\n      rb->StartArray(offset_info.value().flow_offsets.size());\n      for (uint64_t offset : offset_info.value().flow_offsets) {\n        rb->SendLong(offset);\n      }\n      return;\n    } else {\n      return cmd_cntx->SendError(\"I am master\");\n    }\n  }\n  return cmd_cntx->SendError(UnknownSubCmd(\"replica\", \"DEBUG\"));\n}\n\nvoid DebugCmd::Migration(CmdArgList args, CommandContext* cmd_cntx) {\n  args.remove_prefix(1);\n\n  string opt = absl::AsciiStrToUpper(ArgS(args, 0));\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (opt == \"PAUSE\" || opt == \"RESUME\") {\n    cf_.PauseAllIncomingMigrations(opt == \"PAUSE\");\n    return rb->SendOk();\n  }\n  return cmd_cntx->SendError(UnknownSubCmd(\"MIGRATION\", \"DEBUG\"));\n}\n\nenum PopulateFlag { FLAG_RAND, FLAG_TYPE, FLAG_ELEMENTS, FLAG_SLOT, FLAG_EXPIRE, FLAG_UNKNOWN };\n\n// Populate arguments format:\n// required: (total count) (key prefix) (val size)\n// optional: [RAND | TYPE typename | ELEMENTS element num | SLOTS (key value)+ | EXPIRE start end]\noptional<DebugCmd::PopulateOptions> DebugCmd::ParsePopulateArgs(CmdArgList args,\n                                                                CommandContext* cmd_cntx) {\n  CmdArgParser parser(args.subspan(1));\n  PopulateOptions options;\n\n  options.total_count = parser.Next<uint64_t>();\n  options.prefix = parser.NextOrDefault<string_view>(\"key\");\n  options.val_size = parser.NextOrDefault<uint32_t>(16);\n  while (parser.HasNext()) {\n    PopulateFlag flag = parser.MapNext(\"RAND\", FLAG_RAND, \"TYPE\", FLAG_TYPE, \"ELEMENTS\",\n                                       FLAG_ELEMENTS, \"SLOTS\", FLAG_SLOT, \"EXPIRE\", FLAG_EXPIRE);\n    switch (flag) {\n      case FLAG_RAND:\n        options.populate_random_values = true;\n        break;\n      case FLAG_TYPE:\n        options.type = absl::AsciiStrToUpper(parser.Next<string_view>());\n        break;\n      case FLAG_ELEMENTS:\n        options.elements = parser.Next<uint32_t>();\n        break;\n      case FLAG_SLOT: {\n        auto [start, end] = parser.Next<FInt<0, 16383>, FInt<0, 16383>>();\n        options.slot_range = cluster::SlotRange{SlotId(start), SlotId(end)};\n        break;\n      }\n      case FLAG_EXPIRE: {\n        auto [min_ttl, max_ttl] = parser.Next<uint32_t, uint32_t>();\n        if (min_ttl >= max_ttl) {\n          cmd_cntx->SendError(kExpiryOutOfRange);\n          (void)parser.TakeError();\n          return nullopt;\n        }\n        options.expire_ttl_range = std::make_pair(min_ttl, max_ttl);\n        break;\n      }\n      default:\n        LOG(FATAL) << \"Unexpected flag in PopulateArgs. Args: \" << args;\n        break;\n    }\n  }\n  if (parser.HasError()) {\n    cmd_cntx->SendError(parser.TakeError().MakeReply());\n    return nullopt;\n  }\n  if (options.val_size == 0) {\n    cmd_cntx->SendError(\"val_size must be positive\");\n    return nullopt;\n  }\n  return options;\n}\n\nvoid DebugCmd::Populate(CmdArgList args, CommandContext* cmd_cntx) {\n  optional<PopulateOptions> options = ParsePopulateArgs(args, cmd_cntx);\n  if (!options.has_value()) {\n    return;\n  }\n  DCHECK(sf_.AreAllReplicasInStableSync());\n\n  ProactorPool& pp = sf_.service().proactor_pool();\n  size_t runners_count = pp.size();\n  vector<pair<uint64_t, uint64_t>> ranges(runners_count - 1);\n  uint64_t batch_size = options->total_count / runners_count;\n  size_t from = 0;\n  for (size_t i = 0; i < ranges.size(); ++i) {\n    ranges[i].first = from;\n    ranges[i].second = batch_size;\n    from += batch_size;\n  }\n  ranges.emplace_back(from, options->total_count - from);\n\n  vector<fb2::Fiber> fb_arr(ranges.size());\n  for (size_t i = 0; i < ranges.size(); ++i) {\n    auto range = ranges[i];\n\n    // whatever we do, we should not capture i by reference.\n    fb_arr[i] = pp.at(i)->LaunchFiber([range, options, this] {\n      this->PopulateRangeFiber(range.first, range.second, options.value());\n    });\n  }\n  for (auto& fb : fb_arr)\n    fb.Join();\n\n  cmd_cntx->rb()->SendOk();\n\n  DCHECK(sf_.AreAllReplicasInStableSync());\n}\n\nvoid DebugCmd::PopulateRangeFiber(uint64_t from, uint64_t num_of_keys,\n                                  const PopulateOptions& options) {\n  ThisFiber::SetName(\"populate_range\");\n  VLOG(1) << \"PopulateRange: \" << from << \"-\" << (from + num_of_keys - 1);\n\n  string key = StrCat(options.prefix, \":\");\n  size_t prefsize = key.size();\n  DbIndex db_indx = cntx_->db_index();\n  EngineShardSet& ess = *shard_set;\n  std::vector<PopulateBatch> ps(ess.size(), PopulateBatch{db_indx});\n\n  uint64_t index = from;\n  uint64_t to = from + num_of_keys;\n  uint64_t added = 0;\n  while (added < num_of_keys) {\n    if ((index >= to) && ((index - to) % options.total_count == 0)) {\n      index = index - num_of_keys + options.total_count;\n    }\n    key.resize(prefsize);  // shrink back\n\n    StrAppend(&key, index);\n\n    if (options.slot_range.has_value()) {\n      // Each fiber will add num_of_keys. Keys are in the form of <key_prefix>:<index>\n      // We need to make sure that different fibers will not add the same key.\n      // Fiber starting <key_prefix>:<from> to <key_prefix>:<from+num_of_keys-1>\n      // then continue to <key_prefix>:<from+total_count> to\n      // <key_prefix>:<from+total_count+num_of_keys-1> and continue until num_of_keys are added.\n\n      // Add keys only in slot range.\n      SlotId sid = KeySlot(key);\n      if (sid < options.slot_range->start || sid > options.slot_range->end) {\n        ++index;\n        continue;\n      }\n    }\n    ShardId sid = Shard(key, ess.size());\n\n    auto& shard_batch = ps[sid];\n    shard_batch.index[shard_batch.sz++] = index;\n    ++added;\n    ++index;\n\n    if (shard_batch.sz == 32) {\n      ess.Add(sid, [this, index, options, shard_batch]() {\n        DoPopulateBatch(options, shard_batch);\n        if (index % 50 == 0) {\n          ThisFiber::Yield();\n        }\n      });\n\n      // we capture shard_batch by value so we can override it here.\n      shard_batch.sz = 0;\n    }\n  }\n\n  ess.AwaitRunningOnShardQueue([&](EngineShard* shard) {\n    DoPopulateBatch(options, ps[shard->shard_id()]);\n    // Debug populate does not use transaction framework therefore we call OnCbFinishBlocking\n    // manually after running the callback Note that running debug populate while running\n    // flushall/db can cause dcheck fail because the finish cb is executed just when we finish\n    // populating the database.\n    cntx_->ns->GetDbSlice(shard->shard_id()).OnCbFinishBlocking();\n  });\n}\n\nvoid DebugCmd::Exec(CommandContext* cmd_cntx) {\n  EngineShardSet& ess = *shard_set;\n  fb2::Mutex mu;\n  std::map<string, unsigned> freq_cnt;\n\n  ess.pool()->AwaitFiberOnAll([&](auto*) {\n    for (const auto& k_v : ServerState::tlocal()->exec_freq_count) {\n      unique_lock lk(mu);\n      freq_cnt[k_v.first] += k_v.second;\n    }\n  });\n\n  string res;\n  for (const auto& k_v : freq_cnt) {\n    StrAppend(&res, k_v.second, \":\", k_v.first, \"\\n\");\n  }\n  StrAppend(&res, \"--------------------------\\n\");\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->SendVerbatimString(res);\n}\n\nvoid DebugCmd::LogTraffic(CmdArgList args, CommandContext* cmd_cntx) {\n  optional<string> path;\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (ProactorBase::me()->GetKind() != ProactorBase::IOURING) {\n    return cmd_cntx->SendError(\"Traffic recording supported only on iouring\");\n  }\n\n  if (args.size() == 1 && absl::AsciiStrToUpper(facade::ToSV(args.front())) != \"STOP\"sv) {\n    path = ArgS(args, 0);\n    LOG(INFO) << \"Logging to traffic to \" << *path << \"*.bin\";\n  } else {\n    LOG(INFO) << \"Traffic logging stopped\";\n  }\n\n  shard_set->pool()->AwaitFiberOnAll([path](auto*) {\n    if (path)\n      facade::Connection::StartTrafficLogging(*path);\n    else\n      facade::Connection::StopTrafficLogging();\n  });\n  rb->SendOk();\n}\n\nvoid DebugCmd::Inspect(string_view key, CmdArgList args, CommandContext* cmd_cntx) {\n  EngineShardSet& ess = *shard_set;\n  ShardId sid = Shard(key, ess.size());\n  VLOG(1) << \"DebugCmd::Inspect \" << key;\n\n  bool check_compression = false;\n  if (args.size() == 1) {\n    check_compression = absl::AsciiStrToUpper(ArgS(args, 0)) == \"COMPRESS\";\n  }\n  string resp;\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (check_compression) {\n    auto cb = [&] { return EstimateCompression(cntx_, key); };\n    auto res = ess.Await(sid, std::move(cb));\n    if (!res) {\n      cmd_cntx->SendError(res.status());\n      return;\n    }\n    StrAppend(&resp, \"raw_size: \", res->raw_size, \", compressed_size: \", res->compressed_size);\n    if (res->raw_size > 0) {\n      StrAppend(&resp, \" ratio: \", static_cast<double>(res->compressed_size) / (res->raw_size));\n    }\n  } else {\n    auto cb = [&] { return InspectOp(cntx_, key); };\n\n    ObjInfo res = ess.Await(sid, std::move(cb));\n\n    if (!res.found) {\n      cmd_cntx->SendError(kKeyNotFoundErr);\n      return;\n    }\n\n    StrAppend(&resp, \"encoding:\", EncodingName(res.type, res.encoding),\n              \" bucket_id:\", res.bucket_id);\n    StrAppend(&resp, \" slot:\", res.slot_id, \" shard:\", sid);\n\n    if (res.ttl != INT64_MAX) {\n      StrAppend(&resp, \" ttl:\", res.ttl, res.has_sec_precision ? \"s\" : \"ms\");\n    }\n\n    if (res.external_len) {\n      StrAppend(&resp, \" spill_len:\", *res.external_len);\n    }\n\n    if (res.num_nodes) {\n      // node count\n      StrAppend(&resp, \" nc:\", res.num_nodes);\n    }\n\n    if (res.num_compressed) {\n      // compressed nodes\n      StrAppend(&resp, \" cn:\", res.num_compressed);\n    }\n\n    if (res.lock_status != ObjInfo::NONE) {\n      StrAppend(&resp, \" lock:\", res.lock_status == ObjInfo::X ? \"x\" : \"s\");\n    }\n  }\n  rb->SendSimpleString(resp);\n}\n\nvoid DebugCmd::Watched(CommandContext* cmd_cntx) {\n  fb2::Mutex mu;\n\n  vector<string> watched_keys;\n  vector<string> awaked_trans;\n\n  auto cb = [&](EngineShard* shard) {\n    auto* bc = cntx_->ns->GetBlockingController(shard->shard_id());\n    if (bc) {\n      auto keys = bc->GetWatchedKeys(cntx_->db_index());\n\n      lock_guard lk(mu);\n      watched_keys.insert(watched_keys.end(), keys.begin(), keys.end());\n      for (auto* tx : bc->awakened_transactions()) {\n        awaked_trans.push_back(StrCat(\"[\", shard->shard_id(), \"] \", tx->DebugId()));\n      }\n    }\n  };\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  shard_set->RunBlockingInParallel(cb);\n  rb->StartArray(4);\n  rb->SendBulkString(\"awaked\");\n  rb->SendBulkStrArr(awaked_trans);\n  rb->SendBulkString(\"watched\");\n  rb->SendBulkStrArr(watched_keys);\n}\n\nvoid DebugCmd::TxAnalysis(CommandContext* cmd_cntx) {\n  vector<EngineShard::TxQueueInfo> shard_info(shard_set->size());\n\n  auto cb = [&](EngineShard* shard) {\n    auto& info = shard_info[shard->shard_id()];\n    info = shard->AnalyzeTxQueue();\n  };\n\n  shard_set->RunBriefInParallel(cb);\n\n  string result;\n  for (unsigned i = 0; i < shard_set->size(); ++i) {\n    const auto& info = shard_info[i];\n    StrAppend(&result, \"shard\", i, \":\\n\", info.Format(), \"\\n\");\n  }\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->SendVerbatimString(result);\n}\n\nvoid DebugCmd::ObjHist(CommandContext* cmd_cntx) {\n  vector<ObjHistMap> obj_hist_map_arr(shard_set->size());\n  auto cb = [&obj_hist_map_arr](PrimeIterator it) {\n    unsigned obj_type = it->second.ObjType();\n    auto& hist_ptr = obj_hist_map_arr[EngineShard::tlocal()->shard_id()][obj_type];\n    if (!hist_ptr) {\n      hist_ptr.reset(new struct ObjHist);\n    }\n    AddObjHist(it, hist_ptr.get());\n  };\n  TraverseAllEntries(absl::GetFlag(FLAGS_background_debug_jobs), cntx_, cb);\n\n  for (size_t i = shard_set->size() - 1; i > 0; --i) {\n    MergeObjHistMap(std::move(obj_hist_map_arr[i]), &obj_hist_map_arr[0]);\n  }\n\n  string result;\n  absl::StrAppend(&result, \"___begin object histogram___\\n\\n\");\n\n  for (auto& [obj_type, hist_ptr] : obj_hist_map_arr[0]) {\n    StrAppend(&result, \"OBJECT:\", ObjTypeToString(obj_type), \"\\n\");\n    StrAppend(&result, \"________________________________________________________________\\n\");\n    StrAppend(&result, \"Key memory used:\\n\", hist_ptr->key_len.ToString(), \"\\n\");\n    StrAppend(&result, \"Values - Total Memory used:\\n\", hist_ptr->val_len.ToString(), \"\\n\");\n    if (hist_ptr->card.count() > 0) {\n      StrAppend(&result, \"Cardinality histogram (number of elements in sets):\\n\",\n                hist_ptr->card.ToString(), \"\\n\");\n    }\n    StrAppend(&result, \"Items length histogram:\\n\", hist_ptr->entry_len.ToString(), \"\\n\");\n    if (hist_ptr->listpack.count() > 0) {\n      StrAppend(&result, \"Listpack histogram:\\n\", hist_ptr->listpack.ToString(), \"\\n\");\n    }\n  }\n\n  absl::StrAppend(&result, \"___end object histogram___\\n\");\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->SendVerbatimString(result);\n}\n\nvoid DebugCmd::Stacktrace(CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  fb2::Mutex m;\n  shard_set->pool()->AwaitFiberOnAll([&m](unsigned index, ProactorBase* base) {\n    EngineShard* es = EngineShard::tlocal();\n    string txq;\n    if (es) {\n      EngineShard::TxQueueInfo txq_info = es->AnalyzeTxQueue();\n      txq = txq_info.Format();\n    }\n    std::unique_lock lk(m);\n    LOG_IF(INFO, !txq.empty()) << \"Shard\" << index << \": \" << txq;\n    fb2::detail::FiberInterface::PrintAllFiberStackTraces();\n  });\n  base::FlushLogs();\n  rb->SendOk();\n}\n\nvoid DebugCmd::Shards(CommandContext* cmd_cntx) {\n  struct ShardInfo {\n    uint64_t used_memory = 0;\n    uint64_t key_count = 0;\n    uint64_t prime_capacity = 0;\n    uint64_t expire_count = 0;\n    uint64_t key_reads = 0;\n    size_t avg_object_size = 0;\n  };\n\n  vector<ShardInfo> infos(shard_set->size());\n  shard_set->RunBriefInParallel([&](EngineShard* shard) {\n    auto sid = shard->shard_id();\n    auto& db_slice = cntx_->ns->GetDbSlice(sid);\n    auto slice_stats = db_slice.GetStats();\n    auto& stats = infos[sid];\n\n    stats.used_memory = shard->UsedMemory();\n    for (const auto& db_stats : slice_stats.db_stats) {\n      stats.key_count += db_stats.key_count;\n      stats.prime_capacity += db_stats.prime_capacity;\n      stats.expire_count += db_stats.expire_count;\n    }\n    stats.avg_object_size = db_slice.bytes_per_object();\n    stats.key_reads = slice_stats.events.hits + slice_stats.events.misses;\n  });\n\n#define ADD_STAT(i, stat) absl::StrAppend(&out, \"shard\", i, \"_\", #stat, \": \", infos[i].stat, \"\\n\");\n#define MAXMIN_STAT(stat)                                   \\\n  {                                                         \\\n    uint64_t minv = std::numeric_limits<uint64_t>::max();   \\\n    uint64_t maxv = 0;                                      \\\n    for (const auto& info : infos) {                        \\\n      minv = std::min(minv, info.stat);                     \\\n      maxv = std::max(maxv, info.stat);                     \\\n    }                                                       \\\n    absl::StrAppend(&out, \"max_\", #stat, \": \", maxv, \"\\n\"); \\\n    absl::StrAppend(&out, \"min_\", #stat, \": \", minv, \"\\n\"); \\\n  }\n\n  string out;\n  absl::StrAppend(&out, \"num_shards: \", shard_set->size(), \"\\n\");\n\n  for (size_t i = 0; i < infos.size(); i++) {\n    ADD_STAT(i, used_memory);\n    ADD_STAT(i, key_count);\n    ADD_STAT(i, expire_count);\n    ADD_STAT(i, key_reads);\n\n    absl::StrAppend(&out, \"shard\", i,\n                    \"_prime_utilization: \", double(infos[i].key_count) / infos[i].prime_capacity,\n                    \"\\n\");\n    absl::StrAppend(&out, \"shard\", i, \"_avg_object_size: \", infos[i].avg_object_size, \"\\n\");\n  }\n\n  MAXMIN_STAT(used_memory);\n  MAXMIN_STAT(key_count);\n  MAXMIN_STAT(expire_count);\n  MAXMIN_STAT(key_reads);\n\n#undef ADD_STAT\n#undef MAXMIN_STAT\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->SendVerbatimString(out);\n}\n\nvoid DebugCmd::RecvSize(string_view param, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  uint8_t enable = 2;\n  if (absl::EqualsIgnoreCase(param, \"ENABLE\"))\n    enable = 1;\n  else if (absl::EqualsIgnoreCase(param, \"DISABLE\"))\n    enable = 0;\n\n  if (enable < 2) {\n    shard_set->pool()->AwaitBrief(\n        [enable](auto, auto*) { facade::Connection::TrackRequestSize(enable == 1); });\n    return rb->SendOk();\n  }\n\n  unsigned tid;\n  if (!absl::SimpleAtoi(param, &tid) || tid >= shard_set->pool()->size()) {\n    return cmd_cntx->SendError(kUintErr);\n  }\n\n  string hist;\n  shard_set->pool()->at(tid)->AwaitBrief(\n      [&]() { facade::Connection::GetRequestSizeHistogramThreadLocal(&hist); });\n  rb->SendVerbatimString(hist);\n}\n\nvoid DebugCmd::Topk(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  DCHECK_GE(args.size(), 1u);\n\n  string_view subcmd = ArgS(args, 0);\n  if (absl::EqualsIgnoreCase(subcmd, \"ON\")) {\n    uint32_t min_freq = 100;\n    if (args.size() > 1) {\n      if (!absl::SimpleAtoi(ArgS(args, 1), &min_freq))\n        return cmd_cntx->SendError(kUintErr);\n    }\n    shard_set->RunBriefInParallel([&](EngineShard* es) {\n      cntx_->ns->GetDbSlice(es->shard_id()).StartSampleTopK(cntx_->db_index(), min_freq);\n    });\n    return rb->SendOk();\n  }\n\n  if (absl::EqualsIgnoreCase(subcmd, \"OFF\")) {\n    vector<DbSlice::SamplingResult> results(shard_set->size());\n    uint32_t max_keys = 50;\n\n    if (args.size() > 1) {\n      if (!absl::SimpleAtoi(ArgS(args, 1), &max_keys))\n        return cmd_cntx->SendError(kUintErr);\n    }\n\n    shard_set->RunBriefInParallel([&](EngineShard* es) {\n      results[es->shard_id()] =\n          cntx_->ns->GetDbSlice(es->shard_id()).StopSampleTopK(cntx_->db_index());\n    });\n\n    vector<pair<uint64_t, string>> items;\n    uint64_t total_keys = 0;\n    for (const auto& res : results) {\n      total_keys += res.total_samples;\n      for (const auto& k_v : res.top_keys) {\n        items.emplace_back(k_v.second, k_v.first);\n        push_heap(items.begin(), items.end(), std::greater<>());\n        if (items.size() > max_keys) {\n          pop_heap(items.begin(), items.end(), std::greater<>());\n          items.pop_back();\n        }\n      }\n    }\n\n    rb->StartArray(2);\n    rb->SendLong(total_keys);\n    rb->StartArray(items.size());\n    for (const auto& k_v : items) {\n      rb->SendBulkString(StrCat(k_v.second, \":\", k_v.first));\n    }\n    return;\n  }\n\n  return cmd_cntx->SendError(kSyntaxErr);\n}\n\nvoid DebugCmd::Keys(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view subcmd = ArgS(args, 0);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (absl::EqualsIgnoreCase(subcmd, \"ON\")) {\n    shard_set->RunBriefInParallel([&](EngineShard* es) {\n      cntx_->ns->GetDbSlice(es->shard_id()).StartSampleKeys(cntx_->db_index());\n    });\n    return rb->SendOk();\n  }\n\n  if (absl::EqualsIgnoreCase(subcmd, \"OFF\")) {\n    atomic_uint64_t uniq_keys{0}, total_samples{0};\n    shard_set->RunBriefInParallel([&](EngineShard* es) {\n      DbSlice::UniqueSampleResult res =\n          cntx_->ns->GetDbSlice(es->shard_id()).StopSampleKeys(cntx_->db_index());\n      uniq_keys.fetch_add(res.unique_keys_count, memory_order_relaxed);\n      total_samples.fetch_add(res.total_samples, memory_order_relaxed);\n    });\n\n    uint64_t arr[2] = {uniq_keys.load(), total_samples.load()};\n    return rb->SendLongArr(absl::MakeConstSpan(arr));\n  }\n\n  return cmd_cntx->SendError(kSyntaxErr);\n}\n\nvoid DebugCmd::Values(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view subcmd = ArgS(args, 0);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (absl::EqualsIgnoreCase(subcmd, \"ON\")) {\n    shard_set->RunBriefInParallel([&](EngineShard* es) {\n      cntx_->ns->GetDbSlice(es->shard_id()).StartSampleValues(cntx_->db_index());\n    });\n    return rb->SendOk();\n  }\n\n  vector<unique_ptr<base::Histogram>> histograms(shard_set->size());\n  if (absl::EqualsIgnoreCase(subcmd, \"OFF\")) {\n    shard_set->RunBriefInParallel([&](EngineShard* es) {\n      histograms[es->shard_id()] =\n          cntx_->ns->GetDbSlice(es->shard_id()).StopSampleValues(cntx_->db_index());\n    });\n\n    base::Histogram merged_histogram;\n    for (const auto& hist : histograms) {\n      if (hist) {\n        merged_histogram.Merge(*hist);\n      }\n    }\n    return rb->SendVerbatimString(merged_histogram.ToString());\n  }\n\n  return cmd_cntx->SendError(kSyntaxErr);\n}\n\nstatic size_t PostProcessHist(HufHist* dest) {\n  size_t total_freq = 0;\n  auto& hist = dest->hist;\n  unsigned max_freq = 0;\n\n  for (unsigned i = 0; i <= HufHist::kMaxSymbol; i++) {\n    // raw_size may count less characters than the actual size because\n    // we may cut the counting early.\n    total_freq += hist[i];\n    if (hist[i] == 0) {\n      hist[i] = 1;  // Avoid zero frequency symbols.\n    }\n  }\n\n  if (total_freq > kMaxFreqTotal) {\n    // huffman encoder has a bug with frequencies too high, so we scale down everything\n    // to avoid overflow.\n    double scale = static_cast<double>(max_freq) / kMaxFreqTotal;\n    for (unsigned i = 0; i <= HufHist::kMaxSymbol; i++) {\n      hist[i] = unsigned(hist[i] / scale);\n      if (hist[i] == 0) {\n        hist[i] = 1;  // Avoid zero frequency symbols.\n      }\n    }\n  }\n  return total_freq;\n}\n\nvoid DebugCmd::Compression(CmdArgList args, CommandContext* cmd_cntx) {\n  CompactObjType type = kInvalidCompactObjType;\n  CmdArgParser parser(args);\n  string bintable;\n  bool print_bintable = false;\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (parser.Check(\"SET\", &bintable)) {\n    // SET <bintable> [type]\n    string raw;\n    atomic_bool succeed = absl::Base64Unescape(bintable, &raw);\n    if (succeed) {\n      CompactObj::HuffmanDomain domain = CompactObj::HUFF_KEYS;\n      if (parser.HasNext()) {\n        string_view type_str = parser.Next();\n        type = ObjTypeFromString(type_str);\n        if (type != OBJ_STRING) {  // Currently only string type is supported.\n          return cmd_cntx->SendError(kSyntaxErr);\n        }\n        domain = CompactObj::HUFF_STRING_VALUES;\n      }\n      shard_set->RunBriefInParallel([&](EngineShard* shard) {\n        if (!CompactObj::InitHuffmanThreadLocal(domain, raw)) {\n          succeed = false;\n        }\n      });\n    }\n    return succeed ? rb->SendOk() : cmd_cntx->SendError(\"Failed to set bintable\");\n  }\n\n  if (parser.Check(\"EXPORT\")) {\n    print_bintable = true;\n  } else if (parser.Check(\"IMPORT\", &bintable)) {\n    string raw;\n    bool succeed = absl::Base64Unescape(bintable, &raw);\n    if (succeed) {\n      bintable = raw;\n    }\n  }\n\n  if (parser.HasNext()) {\n    string_view type_str = parser.Next();\n    type = ObjTypeFromString(type_str);\n    if (type == kInvalidCompactObjType) {\n      return cmd_cntx->SendError(kSyntaxErr);\n    }\n  }\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  fb2::Mutex mu;\n  HufHist hist;\n  shard_set->RunBlockingInParallel([&](EngineShard* shard) {\n    HufHist local;\n    DoComputeHist(type, shard, cntx_, &local);\n    std::unique_lock lk(mu);\n    hist.Merge(local);\n  });\n\n  size_t num_bits = 0, compressed_size = 0, raw_size = 0;\n  if (hist.max_symbol) {\n    HuffmanEncoder huff_enc;\n    string err_msg;\n\n    raw_size = PostProcessHist(&hist);\n\n    if (bintable.empty()) {\n      if (!huff_enc.Build(hist.hist.data(), HufHist::kMaxSymbol, &err_msg)) {\n        return cmd_cntx->SendError(StrCat(\"Internal error: \", err_msg));\n      }\n    } else {\n      // Try to read the bintable and create a ctable from it.\n      if (!huff_enc.Load(bintable, &err_msg)) {\n        return cmd_cntx->SendError(StrCat(\"Internal error: \", err_msg));\n      }\n    }\n    num_bits = huff_enc.num_bits();\n    compressed_size = huff_enc.EstimateCompressedSize(hist.hist.data(), HufHist::kMaxSymbol);\n\n    if (print_bintable) {\n      bintable = huff_enc.Export();\n    } else {\n      bintable.clear();\n    }\n  }\n\n  unsigned map_len = print_bintable ? 6 : 5;\n\n  rb->StartCollection(map_len, CollectionType::MAP);\n  rb->SendSimpleString(\"max_symbol\");\n  rb->SendLong(hist.max_symbol);\n\n  rb->SendSimpleString(\"max_bits\");\n  rb->SendLong(num_bits);\n  rb->SendSimpleString(\"raw_size\");\n  rb->SendLong(raw_size);\n  rb->SendSimpleString(\"compressed_size\");\n  rb->SendLong(compressed_size);\n  rb->SendSimpleString(\"ratio\");\n  double ratio = raw_size > 0 ? static_cast<double>(compressed_size) / raw_size : 0;\n  rb->SendDouble(ratio);\n  if (print_bintable) {\n    rb->SendSimpleString(\"bintable\");\n    rb->SendBulkString(absl::Base64Escape(bintable));\n  }\n}\n\nvoid DebugCmd::IOStats(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  bool per_second = !args.empty() && absl::EqualsIgnoreCase(args[0], \"PS\");\n  vector<IOStat> stats(shard_set->pool()->size());\n\n  shard_set->pool()->AwaitBrief(\n      [&](unsigned index, ProactorBase*) { stats[index].From(*facade::tl_facade_stats); });\n\n  if (per_second) {\n    ThisFiber::SleepFor(1s);\n    vector<IOStat> stats2(shard_set->pool()->size());\n    shard_set->pool()->AwaitBrief(\n        [&](unsigned index, ProactorBase*) { stats2[index].From(*facade::tl_facade_stats); });\n\n    for (size_t i = 0; i < stats.size(); ++i) {\n      stats2[i] -= stats[i];\n    }\n    stats = std::move(stats2);\n  }\n\n  rb->StartArray(stats.size());\n  for (const auto& stat : stats) {\n    stat.Print(rb);\n  }\n}\n\nvoid DebugCmd::Segments(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  vector<SegmentInfo> info(shard_set->size());\n\n  shard_set->RunBlockingInParallel([&](EngineShard* shard) {\n    auto& hist = info[shard->shard_id()];\n    DoSegmentHist(shard, cntx_, &hist);\n  });\n\n  base::Histogram hist;\n  for (const auto& seg_info : info) {\n    hist.Merge(seg_info.hist);\n  }\n  string result;\n  absl::StrAppend(&result, \"___begin segment info___\\n\\n\");\n  absl::StrAppend(&result, \"Segment Capacity: \", PrimeTable::kSegCapacity, \"\\n\");\n  absl::StrAppend(&result, \"Segment Size Histogram: \\n\");\n  absl::StrAppend(&result, hist.ToString(), \"\\n\");\n  rb->SendVerbatimString(result);\n}\n\nvoid DebugCmd::CompactTable(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  double threshold = 0.25;\n  if (args.size() > 0) {\n    if (!absl::SimpleAtod(facade::ToSV(args[0]), &threshold)) {\n      return rb->SendError(\"Invalid threshold value\");\n    }\n    if (threshold <= 0.0 || threshold > 1.0) {\n      return rb->SendError(\"Threshold must be between 0 and 1\");\n    }\n  }\n\n  const DbIndex db_idx = cmd_cntx->server_conn_cntx()->db_index();\n  std::vector<size_t> results(shard_set->size());\n  shard_set->RunBlockingInParallel([&](EngineShard* shard) {\n    results[shard->shard_id()] = shard->CompactTable(threshold, db_idx);\n  });\n\n  rb->SendLong(std::accumulate(results.begin(), results.end(), 0ul));\n}\n\nvoid DebugCmd::CountUniqueStrings(const CommandContext* cmd_cntx) const {\n  using PerShardStats = std::array<std::unique_ptr<UniqueStrings>, OBJ_HASH + 1>;\n\n  vector<PerShardStats> all_shards(shard_set->size());\n  auto cb = [&all_shards](PrimeIterator it) {\n    const unsigned obj_type = it->second.ObjType();\n    if (obj_type != OBJ_HASH && obj_type != OBJ_LIST && obj_type != OBJ_SET &&\n        obj_type != OBJ_ZSET) {\n      return;\n    }\n\n    auto& entry = all_shards[EngineShard::tlocal()->shard_id()][obj_type];\n    if (!entry) {\n      entry = std::make_unique<UniqueStrings>();\n    }\n\n    if (obj_type == OBJ_HASH)\n      entry->AddHMap(it->second);\n    else if (obj_type == OBJ_LIST)\n      entry->AddList(it->second);\n    else if (obj_type == OBJ_SET)\n      entry->AddSet(it->second);\n    else if (obj_type == OBJ_ZSET)\n      entry->AddZSet(it->second);\n  };\n\n  TraverseAllEntries(absl::GetFlag(FLAGS_background_debug_jobs), cntx_, cb);\n\n  std::array<UniqueStrings, OBJ_HASH + 1> summary;\n  for (const PerShardStats& shard_stat : all_shards) {\n    for (CompactObjType obj_type = OBJ_LIST; obj_type <= OBJ_HASH; ++obj_type) {\n      if (shard_stat[obj_type]) {\n        summary[obj_type].Add(*shard_stat[obj_type]);\n      }\n    }\n  }\n\n  string result;\n  StrAppend(&result, \"___begin unique string stats___\\n\\n\");\n\n  for (CompactObjType obj_type = OBJ_LIST; obj_type <= OBJ_HASH; ++obj_type) {\n    const UniqueStrings& stats = summary[obj_type];\n    if (stats.total_count == 0) {\n      continue;\n    }\n    StrAppend(&result, \"OBJECT:\", ObjTypeToString(obj_type), \"\\n\");\n    StrAppend(&result, \"________________________________________________________________\\n\");\n    StrAppend(&result, stats.ToString(\"Strings\"));\n    StrAppend(&result, \"\\n\");\n  }\n\n  StrAppend(&result, \"___end unique string stats___\\n\");\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->SendVerbatimString(result);\n}\n\nvoid DebugCmd::DoPopulateBatch(const PopulateOptions& options, const PopulateBatch& batch) {\n  auto* exec_cid = sf_.service().mutable_registry()->Find(\"EXEC\");\n  boost::intrusive_ptr<Transaction> local_tx = new Transaction{exec_cid};\n  local_tx->StartMultiNonAtomic();\n  boost::intrusive_ptr<Transaction> stub_tx =\n      new Transaction{local_tx.get(), EngineShard::tlocal()->shard_id(), nullopt};\n\n  absl::InlinedVector<string_view, 5> args_view;\n  facade::CapturingReplyBuilder crb;\n  absl::InsecureBitGen gen;\n  CommandContext cmd_cntx{&crb, cntx_};\n  cmd_cntx.SetupTx(exec_cid, stub_tx.get());\n\n  for (unsigned i = 0; i < batch.sz; ++i) {\n    string key = StrCat(options.prefix, \":\", batch.index[i]);\n    uint32_t elements_left = options.elements;\n\n    // limit rss grow by 32K by limiting the element count in each command.\n    // for stream we use 4 fields and (elements / 4) stream entries\n    uint32_t max_batch_elements =\n        options.type == \"STREAM\" ? 4 : std::max(32_KB / options.val_size, 1ULL);\n    while (elements_left) {\n      uint32_t populate_elements = std::min(max_batch_elements, elements_left);\n      if (options.type == \"STREAM\" && populate_elements > 4) {\n        // populate_elements % 4 == 0, because we add 4 fields into one stream entry\n        populate_elements -= (populate_elements % 4);\n      }\n      elements_left -= populate_elements;\n      auto [cid, args] = GeneratePopulateCommand(options.type, key, options.val_size,\n                                                 options.populate_random_values, populate_elements,\n                                                 *sf_.service().mutable_registry(), &gen);\n      if (!cid) {\n        LOG_EVERY_N(WARNING, 10'000) << \"Unable to find command, was it renamed?\";\n        break;\n      }\n\n      args_view.clear();\n      for (auto& arg : args) {\n        args_view.push_back(arg);\n      }\n      auto args_span = absl::MakeSpan(args_view);\n      stub_tx->MultiSwitchCmd(cid);\n      crb.SetReplyMode(ReplyMode::NONE);\n      stub_tx->InitByArgs(cntx_->ns, cntx_->conn_state.db_index, args_span);\n      cmd_cntx.UpdateCid(cid);\n      sf_.service().InvokeCmd(args_span, &cmd_cntx);\n    }\n\n    if (options.expire_ttl_range.has_value()) {\n      uint32_t start = options.expire_ttl_range->first;\n      uint32_t end = options.expire_ttl_range->second;\n      uint32_t expire_ttl = rand() % (end - start) + start;\n      VLOG(1) << \"set key \" << key << \" expire ttl as \" << expire_ttl;\n      auto cid = sf_.service().mutable_registry()->Find(\"EXPIRE\");\n      absl::InlinedVector<string, 5> args;\n      args.push_back(std::move(key));\n      args.push_back(to_string(expire_ttl));\n      args_view.clear();\n      for (auto& arg : args) {\n        args_view.push_back(arg);\n      }\n      auto args_span = absl::MakeSpan(args_view);\n      crb.SetReplyMode(ReplyMode::NONE);\n      stub_tx->MultiSwitchCmd(cid);\n      stub_tx->InitByArgs(cntx_->ns, cntx_->conn_state.db_index, args_span);\n      cmd_cntx.UpdateCid(cid);\n      sf_.service().InvokeCmd(args_span, &cmd_cntx);\n    }\n  }\n\n  local_tx->UnlockMulti();\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/debugcmd.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"server/cluster/cluster_defs.h\"\n#include \"server/conn_context.h\"\n\nnamespace dfly {\n\nnamespace cluster {\nclass ClusterFamily;\n}\n\nclass EngineShardSet;\nclass ServerFamily;\n\nclass DebugCmd {\n private:\n  struct PopulateOptions {\n    uint64_t total_count = 0;\n    std::string_view prefix{\"key\"};\n    uint32_t val_size = 16;\n    bool populate_random_values = false;\n    std::string type{\"STRING\"};\n    uint32_t elements = 1;\n\n    std::optional<cluster::SlotRange> slot_range;\n    std::optional<std::pair<uint32_t, uint32_t>> expire_ttl_range;\n  };\n\n public:\n  DebugCmd(ServerFamily* owner, cluster::ClusterFamily* cf, ConnectionContext* cntx);\n\n  void Run(CmdArgList args, CommandContext* cmd_cntx);\n\n  static void Shutdown();\n\n private:\n  void Populate(CmdArgList args, CommandContext* cmd_cntx);\n  static std::optional<PopulateOptions> ParsePopulateArgs(CmdArgList args,\n                                                          CommandContext* cmd_cntx);\n  void PopulateRangeFiber(uint64_t from, uint64_t count, const PopulateOptions& opts);\n\n  void Reload(CmdArgList args, CommandContext* cmd_cntx);\n  void Replica(CmdArgList args, CommandContext* cmd_cntx);\n  void Migration(CmdArgList args, CommandContext* cmd_cntx);\n\n  void Exec(CommandContext* cmd_cntx);\n  void Inspect(std::string_view key, CmdArgList args, CommandContext* cmd_cntx);\n  void Watched(CommandContext* cmd_cntx);\n  void TxAnalysis(CommandContext* cmd_cntx);\n  void ObjHist(CommandContext* cmd_cntx);\n  void Stacktrace(CommandContext* cmd_cntx);\n  void Shards(CommandContext* cmd_cntx);\n  void LogTraffic(CmdArgList, CommandContext* cmd_cntx);\n  void RecvSize(std::string_view param, CommandContext* cmd_cntx);\n  void Topk(CmdArgList args, CommandContext* cmd_cntx);\n  void Keys(CmdArgList args, CommandContext* cmd_cntx);\n  void Values(CmdArgList args, CommandContext* cmd_cntx);\n  void Compression(CmdArgList args, CommandContext* cmd_cntx);\n  void IOStats(CmdArgList args, CommandContext* cmd_cntx);\n  void Segments(CmdArgList args, CommandContext* cmd_cntx);\n  void CompactTable(CmdArgList args, CommandContext* cmd_cntx);\n  void CountUniqueStrings(const CommandContext* cmd_cntx) const;\n  struct PopulateBatch {\n    DbIndex dbid;\n    uint64_t index[32];\n    uint64_t sz = 0;\n\n    explicit PopulateBatch(DbIndex id) : dbid(id) {\n    }\n  };\n\n  void DoPopulateBatch(const PopulateOptions& options, const PopulateBatch& batch);\n\n  ServerFamily& sf_;\n  cluster::ClusterFamily& cf_;\n  ConnectionContext* cntx_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/detail/compressor.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/detail/compressor.h\"\n\n#include <absl/flags/flag.h>\n#include <lz4frame.h>\n#include <zstd.h>\n\n#include \"base/logging.h\"\n\nABSL_FLAG(int, compression_level, 2, \"The compression level to use on zstd/lz4 compression\");\n\nnamespace dfly::detail {\n\nusing namespace std;\n\nclass ZstdCompressor : public CompressorImpl {\n public:\n  ZstdCompressor() {\n    cctx_ = ZSTD_createCCtx();\n  }\n  ~ZstdCompressor() {\n    ZSTD_freeCCtx(cctx_);\n  }\n\n  io::Result<io::Bytes> Compress(io::Bytes data);\n\n private:\n  ZSTD_CCtx* cctx_;\n  base::PODArray<uint8_t> compr_buf_;\n};\n\nio::Result<io::Bytes> ZstdCompressor::Compress(io::Bytes data) {\n  size_t buf_size = ZSTD_compressBound(data.size());\n  if (compr_buf_.capacity() < buf_size) {\n    compr_buf_.reserve(buf_size);\n  }\n  size_t compressed_size = ZSTD_compressCCtx(cctx_, compr_buf_.data(), compr_buf_.capacity(),\n                                             data.data(), data.size(), compression_level_);\n\n  if (ZSTD_isError(compressed_size)) {\n    LOG(ERROR) << \"ZSTD_compressCCtx failed with error \" << ZSTD_getErrorName(compressed_size);\n    return nonstd::make_unexpected(make_error_code(errc::operation_not_supported));\n  }\n  compressed_size_total_ += compressed_size;\n  uncompressed_size_total_ += data.size();\n  return io::Bytes(compr_buf_.data(), compressed_size);\n}\n\nclass Lz4Compressor : public CompressorImpl {\n public:\n  Lz4Compressor() {\n    LZ4F_errorCode_t code = LZ4F_createCompressionContext(&cctx_, LZ4F_VERSION);\n    CHECK(!LZ4F_isError(code));\n  }\n\n  ~Lz4Compressor() {\n    LZ4F_errorCode_t code = LZ4F_freeCompressionContext(cctx_);\n    CHECK(!LZ4F_isError(code));\n  }\n\n  // compress a string of data\n  io::Result<io::Bytes> Compress(io::Bytes data);\n\n private:\n  LZ4F_cctx* cctx_;\n};\n\nio::Result<io::Bytes> Lz4Compressor::Compress(io::Bytes data) {\n  LZ4F_preferences_t lz4_pref = LZ4F_INIT_PREFERENCES;\n  lz4_pref.compressionLevel = compression_level_;\n  lz4_pref.frameInfo.contentSize = data.size();\n\n  size_t buf_size = LZ4F_compressFrameBound(data.size(), &lz4_pref);\n  if (compr_buf_.capacity() < buf_size) {\n    compr_buf_.reserve(buf_size);\n  }\n\n  size_t frame_size =\n      LZ4F_compressFrame_usingCDict(cctx_, compr_buf_.data(), compr_buf_.capacity(), data.data(),\n                                    data.size(), nullptr /* dict */, &lz4_pref);\n  if (LZ4F_isError(frame_size)) {\n    LOG(ERROR) << \"LZ4F_compressFrame failed with error \" << LZ4F_getErrorName(frame_size);\n    return nonstd::make_unexpected(make_error_code(errc::operation_not_supported));\n  }\n\n  compressed_size_total_ += frame_size;\n  uncompressed_size_total_ += data.size();\n  return io::Bytes(compr_buf_.data(), frame_size);\n}\n\nCompressorImpl::CompressorImpl() {\n  compression_level_ = absl::GetFlag(FLAGS_compression_level);\n}\n\nCompressorImpl::~CompressorImpl() {\n  VLOG(1) << \"compressed size: \" << compressed_size_total_;\n  VLOG(1) << \"uncompressed size: \" << uncompressed_size_total_;\n}\n\nunique_ptr<CompressorImpl> CompressorImpl::CreateZstd() {\n  return make_unique<ZstdCompressor>();\n}\n\nunique_ptr<CompressorImpl> CompressorImpl::CreateLZ4() {\n  return make_unique<Lz4Compressor>();\n}\n\n}  // namespace dfly::detail\n"
  },
  {
    "path": "src/server/detail/compressor.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n\n#include \"base/pod_array.h\"\n#include \"io/io.h\"\n\nnamespace dfly::detail {\n\nclass CompressorImpl {\n public:\n  static std::unique_ptr<CompressorImpl> CreateZstd();\n  static std::unique_ptr<CompressorImpl> CreateLZ4();\n\n  CompressorImpl();\n  virtual ~CompressorImpl();\n  virtual io::Result<io::Bytes> Compress(io::Bytes data) = 0;\n\n protected:\n  int compression_level_ = 1;\n  size_t compressed_size_total_ = 0;\n  size_t uncompressed_size_total_ = 0;\n  base::PODArray<uint8_t> compr_buf_;\n};\n\n}  // namespace dfly::detail\n"
  },
  {
    "path": "src/server/detail/decompress.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/detail/decompress.h\"\n\n#include <lz4frame.h>\n#include <zstd.h>\n\n#include \"base/logging.h\"\n#include \"server/error.h\"\n#include \"server/rdb_extensions.h\"\n\nnamespace dfly {\n\nnamespace detail {\n\nusing io::IoBuf;\nusing rdb::errc;\nusing namespace std;\n\ninline auto Unexpected(errc ev) {\n  return nonstd::make_unexpected(RdbError(ev));\n}\n\nclass ZstdDecompress : public DecompressImpl {\n public:\n  ZstdDecompress() {\n    dctx_ = ZSTD_createDCtx();\n  }\n  ~ZstdDecompress() {\n    ZSTD_freeDCtx(dctx_);\n  }\n\n  io::Result<io::IoBuf*> Decompress(std::string_view str);\n\n private:\n  ZSTD_DCtx* dctx_;\n};\n\nio::Result<io::IoBuf*> ZstdDecompress::Decompress(std::string_view str) {\n  // Prepare membuf memory to uncompressed string.\n  auto uncomp_size = ZSTD_getFrameContentSize(str.data(), str.size());\n  if (uncomp_size == ZSTD_CONTENTSIZE_UNKNOWN) {\n    LOG(ERROR) << \"Zstd compression missing frame content size\";\n    return Unexpected(errc::invalid_encoding);\n  }\n  if (uncomp_size == ZSTD_CONTENTSIZE_ERROR) {\n    LOG(ERROR) << \"Invalid ZSTD compressed string\";\n    return Unexpected(errc::invalid_encoding);\n  }\n\n  uncompressed_mem_buf_.Reserve(uncomp_size + 1);\n\n  // Uncompress string to membuf\n  IoBuf::Bytes dest = uncompressed_mem_buf_.AppendBuffer();\n  if (dest.size() < uncomp_size) {\n    return Unexpected(errc::out_of_memory);\n  }\n  size_t const d_size =\n      ZSTD_decompressDCtx(dctx_, dest.data(), dest.size(), str.data(), str.size());\n  if (d_size == 0 || d_size != uncomp_size) {\n    LOG(ERROR) << \"Invalid ZSTD compressed string\";\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n  uncompressed_mem_buf_.CommitWrite(d_size);\n\n  // Add opcode of compressed blob end to membuf.\n  dest = uncompressed_mem_buf_.AppendBuffer();\n  if (dest.size() < 1) {\n    return Unexpected(errc::out_of_memory);\n  }\n  dest[0] = RDB_OPCODE_COMPRESSED_BLOB_END;\n  uncompressed_mem_buf_.CommitWrite(1);\n\n  return &uncompressed_mem_buf_;\n}\n\nclass Lz4Decompress : public DecompressImpl {\n public:\n  Lz4Decompress() {\n    auto result = LZ4F_createDecompressionContext(&dctx_, LZ4F_VERSION);\n    CHECK(!LZ4F_isError(result));\n  }\n  ~Lz4Decompress() {\n    auto result = LZ4F_freeDecompressionContext(dctx_);\n    CHECK(!LZ4F_isError(result));\n  }\n\n  io::Result<base::IoBuf*> Decompress(std::string_view str);\n\n private:\n  LZ4F_dctx* dctx_;\n};\n\nio::Result<base::IoBuf*> Lz4Decompress::Decompress(std::string_view data) {\n  LZ4F_frameInfo_t frame_info;\n  size_t frame_size = data.size();\n\n  // Get content size from frame data\n  size_t consumed = frame_size;  // The nb of bytes consumed from data will be written into consumed\n  size_t res = LZ4F_getFrameInfo(dctx_, &frame_info, data.data(), &consumed);\n  if (LZ4F_isError(res)) {\n    LOG(ERROR) << \"LZ4F_getFrameInfo failed with error \" << LZ4F_getErrorName(res);\n    return Unexpected(errc::rdb_file_corrupted);\n    ;\n  }\n\n  if (frame_info.contentSize == 0) {\n    LOG(ERROR) << \"Missing frame content size\";\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n\n  // reserve place for uncompressed data and end opcode\n  size_t reserve = frame_info.contentSize + 1;\n  uncompressed_mem_buf_.Reserve(reserve);\n  IoBuf::Bytes dest = uncompressed_mem_buf_.AppendBuffer();\n  if (dest.size() < reserve) {\n    return Unexpected(errc::out_of_memory);\n  }\n\n  // Uncompress data to membuf\n  string_view src = data.substr(consumed);\n  size_t src_size = src.size();\n\n  size_t ret = 1;\n  while (ret != 0) {\n    IoBuf::Bytes dest = uncompressed_mem_buf_.AppendBuffer();\n    size_t dest_capacity = dest.size();\n\n    // It will read up to src_size bytes from src,\n    // and decompress data into dest, of capacity dest_capacity\n    // The nb of bytes consumed from src will be written into src_size\n    // The nb of bytes decompressed into dest will be written into dest_capacity\n    ret = LZ4F_decompress(dctx_, dest.data(), &dest_capacity, src.data(), &src_size, nullptr);\n    if (LZ4F_isError(ret)) {\n      LOG(ERROR) << \"LZ4F_decompress failed with error \" << LZ4F_getErrorName(ret);\n      return Unexpected(errc::rdb_file_corrupted);\n    }\n    consumed += src_size;\n\n    uncompressed_mem_buf_.CommitWrite(dest_capacity);\n    src = src.substr(src_size);\n    src_size = src.size();\n  }\n  if (consumed != frame_size) {\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n  if (uncompressed_mem_buf_.InputLen() != frame_info.contentSize) {\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n\n  // Add opcode of compressed blob end to membuf.\n  dest = uncompressed_mem_buf_.AppendBuffer();\n  if (dest.size() < 1) {\n    return Unexpected(errc::out_of_memory);\n  }\n  dest[0] = RDB_OPCODE_COMPRESSED_BLOB_END;\n  uncompressed_mem_buf_.CommitWrite(1);\n\n  return &uncompressed_mem_buf_;\n}\n\nunique_ptr<DecompressImpl> DecompressImpl::CreateLZ4() {\n  return make_unique<Lz4Decompress>();\n}\n\nunique_ptr<DecompressImpl> DecompressImpl::CreateZstd() {\n  return make_unique<ZstdDecompress>();\n}\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/detail/decompress.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <memory>\n\n#include \"io/io.h\"\n#include \"io/io_buf.h\"\n\nnamespace dfly {\n\nnamespace detail {\n\nclass DecompressImpl {\n public:\n  static std::unique_ptr<DecompressImpl> CreateLZ4();\n  static std::unique_ptr<DecompressImpl> CreateZstd();\n\n  DecompressImpl() : uncompressed_mem_buf_{1U << 14} {\n  }\n  virtual ~DecompressImpl() {\n  }\n\n  virtual io::Result<io::IoBuf*> Decompress(std::string_view str) = 0;\n\n protected:\n  io::IoBuf uncompressed_mem_buf_;\n};\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/detail/save_stages_controller.cc",
    "content": "\n// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/detail/save_stages_controller.h\"\n\n#include <absl/strings/match.h>\n\n#include <numeric>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/detail/gen_utils.h\"\n#include \"server/detail/snapshot_storage.h\"\n#include \"server/main_service.h\"\n#include \"server/namespaces.h\"\n#include \"server/script_mgr.h\"\n#include \"server/transaction.h\"\n#include \"strings/human_readable.h\"\n\nusing namespace std;\n\nABSL_DECLARE_FLAG(string, dir);\nABSL_DECLARE_FLAG(string, dbfilename);\n\nnamespace dfly {\nnamespace detail {\n\nusing namespace util;\nusing absl::GetFlag;\nusing absl::StrCat;\nusing fb2::OpenLinux;\n\nnamespace fs = std::filesystem;\n\nnamespace {\n\n// Create a directory and all its parents if they don't exist.\nerror_code CreateDirs(fs::path dir_path) {\n  error_code ec;\n  fs::file_status dir_status = fs::status(dir_path, ec);\n  if (ec == errc::no_such_file_or_directory) {\n    fs::create_directories(dir_path, ec);\n    if (!ec)\n      dir_status = fs::status(dir_path, ec);\n  }\n  return ec;\n}\n\n// modifies 'filename' to be \"filename-postfix.extension\"\nvoid SetExtension(absl::AlphaNum postfix, string_view extension, fs::path* filename) {\n  filename->replace_extension();  // clear if exists\n  *filename += StrCat(\"-\", postfix, extension);\n}\n\nvoid ExtendDfsFilenameWithShard(int shard, string_view extension, fs::path* filename) {\n  // dragonfly snapshot.\n  SetExtension(absl::Dec(shard, absl::kZeroPad4), extension, filename);\n}\n\n}  // namespace\n\nGenericError ValidateFilename(const fs::path& filename, bool new_version) {\n  if (filename.empty()) {\n    return {};\n  }\n\n  string filename_str = filename.string();\n  if (filename_str.front() == '\"') {\n    return {\n        \"filename should not start with '\\\"', could it be that you put quotes in the flagfile?\"};\n  }\n\n  bool is_cloud_path = IsCloudPath(filename_str);\n\n  if (!filename.parent_path().empty() && !is_cloud_path) {\n    return {absl::StrCat(\"filename may not contain directory separators (Got \\\"\", filename.c_str(),\n                         \"\\\"). dbfilename should specify the filename without the directory\")};\n  }\n\n  if (!filename.has_extension()) {\n    return {};\n  }\n\n  if (new_version) {\n    if (absl::EqualsIgnoreCase(filename.extension().c_str(), \".rdb\")) {\n      return {absl::StrCat(\n          \"DF snapshot format is used but '.rdb' extension was given. Use --nodf_snapshot_format \"\n          \"or remove the filename extension.\")};\n    } else {\n      return {absl::StrCat(\"DF snapshot format requires no filename extension. Got \\\"\",\n                           filename.extension().c_str(), \"\\\"\")};\n    }\n  }\n  if (!new_version && !absl::EqualsIgnoreCase(filename.extension().c_str(), \".rdb\")) {\n    return {absl::StrCat(\"Bad filename extension \\\"\", filename.extension().c_str(),\n                         \"\\\" for SAVE with type RDB\")};\n  }\n  return {};\n}\n\nGenericError RdbSnapshot::Start(SaveMode save_mode, const std::string& path,\n                                const RdbSaver::GlobalData& glob_data,\n                                const std::string& snapshot_id) {\n  VLOG(1) << \"Saving RDB \" << path;\n\n  CHECK_NOTNULL(snapshot_storage_);\n  auto res = snapshot_storage_->OpenWriteFile(path);\n  if (!res) {\n    return res.error();\n  }\n\n  auto [file, file_type] = *res;\n  io_sink_.reset(file);\n\n  is_linux_file_ = file_type & FileType::IO_URING;\n  bool align_writes = (file_type & FileType::DIRECT) != 0;\n  saver_.reset(\n      new RdbSaver(io_sink_.get(), save_mode, align_writes, snapshot_id, DflyVersion::CURRENT_VER));\n\n  return saver_->SaveHeader(std::move(glob_data));\n}\n\nerror_code RdbSnapshot::SaveBody() {\n  return saver_->SaveBody(cntx_);\n}\n\nerror_code RdbSnapshot::WaitSnapshotInShard(EngineShard* shard) {\n  return saver_->WaitSnapshotInShard(shard);\n}\n\nsize_t RdbSnapshot::GetSaveBuffersSize() {\n  CHECK(saver_);\n  return saver_->GetTotalBuffersSize();\n}\n\nvoid RdbSnapshot::FillFreqMap() {\n  saver_->FillFreqMap(&freq_map_);\n}\n\nRdbSaver::SnapshotStats RdbSnapshot::GetCurrentSnapshotProgress() const {\n  CHECK(saver_);\n  return saver_->GetCurrentSnapshotProgress();\n}\n\nerror_code RdbSnapshot::Close() {\n#ifdef __linux__\n  if (is_linux_file_) {\n    return static_cast<LinuxWriteWrapper*>(io_sink_.get())->Close();\n  }\n#endif\n\n  error_code ec;\n\n  // S3 implementation is stack hungry. We use a fiber to close the file to\n  // avoid wasting stack space.\n  auto fb = ProactorBase::me()->LaunchFiber(\n      fb2::Launch::post, boost::context::fixedsize_stack{40 * 1024}, \"write_file_close\",\n      [&] { ec = static_cast<io::WriteFile*>(io_sink_.get())->Close(); });\n  fb.Join();\n  return ec;\n}\n\nvoid RdbSnapshot::StartInShard(EngineShard* shard) {\n  saver_->StartSnapshotInShard(false, &cntx_, shard);\n  started_shards_.fetch_add(1, memory_order_relaxed);\n}\n\nSaveStagesController::SaveStagesController(SaveStagesInputs&& inputs)\n    : SaveStagesInputs{std::move(inputs)} {\n  start_time_ = time(NULL);\n}\n\nSaveStagesController::~SaveStagesController() {\n  if (!snapshots_.empty() && snapshots_[0].first) {\n    LOG(INFO) << \"Forcefully closing save controller\";\n    WaitAllSnapshots();\n    Finalize();\n  }\n}\n\nstd::optional<SaveInfo> SaveStagesController::Init() {\n  if (auto err = BuildFullPath(); err) {\n    shared_err_ = err;\n    return GetSaveInfo();\n  }\n\n  snapshots_.resize(use_dfs_format_ ? shard_set->size() + 1 : 1);\n  for (auto& [snapshot, _] : snapshots_)\n    snapshot = make_unique<RdbSnapshot>(fq_threadpool_, snapshot_storage_.get());\n\n  return {};\n}\n\nvoid SaveStagesController::Start() {\n  if (use_dfs_format_)\n    SaveDfs();\n  else\n    SaveRdb();\n}\n\nvoid SaveStagesController::WaitAllSnapshots() {\n  if (use_dfs_format_) {\n    shard_set->RunBlockingInParallel([&](EngineShard* shard) { WaitSnapshotInShard(shard); });\n    SaveBody(shard_set->size());\n  } else {\n    SaveBody(0);\n  }\n}\n\nSaveInfo SaveStagesController::Finalize() {\n  RunStage(&SaveStagesController::CloseCb);\n\n  if (auto err = FinalizeFileMovement(); err) {\n    shared_err_ = err;\n  }\n\n  return GetSaveInfo();\n}\n\nsize_t SaveStagesController::GetSaveBuffersSize() {\n  std::atomic<size_t> total_bytes{0};\n\n  auto add_snapshot_bytes = [this, &total_bytes](ShardId sid) {\n    if (auto& snapshot = snapshots_[sid].first; snapshot && snapshot->HasStarted()) {\n      total_bytes.fetch_add(snapshot->GetSaveBuffersSize(), memory_order_relaxed);\n    }\n  };\n\n  if (!snapshots_.empty()) {\n    if (use_dfs_format_) {\n      shard_set->RunBriefInParallel([&](EngineShard* es) { add_snapshot_bytes(es->shard_id()); });\n\n    } else {\n      // When rdb format save is running, there is only one rdb saver instance, it is running on the\n      // connection thread that runs the save command.\n      add_snapshot_bytes(0);\n    }\n  }\n\n  return total_bytes.load(memory_order_relaxed);\n}\n\nRdbSaver::SnapshotStats SaveStagesController::GetCurrentSnapshotProgress() const {\n  if (snapshots_.empty()) {\n    return {0, 0};\n  }\n\n  std::vector<RdbSaver::SnapshotStats> results(snapshots_.size());\n  auto fetch = [this, &results](ShardId sid) {\n    if (auto& snapshot = snapshots_[sid].first; snapshot && snapshot->HasStarted()) {\n      results[sid] = snapshot->GetCurrentSnapshotProgress();\n    }\n  };\n\n  if (use_dfs_format_) {\n    shard_set->RunBriefInParallel([&](EngineShard* es) { fetch(es->shard_id()); });\n    RdbSaver::SnapshotStats init{0, 0};\n    return std::accumulate(\n        results.begin(), results.end(), init, [](auto init, auto pr) -> RdbSaver::SnapshotStats {\n          return {init.current_keys + pr.current_keys, init.total_keys + pr.total_keys};\n        });\n  }\n  fetch(0);\n  return results[0];\n}\n\n// In the new version (.dfs) we store a file for every shard and one more summary file.\n// Summary file is always last in snapshots array.\nvoid SaveStagesController::SaveDfs() {\n  // Extend all filenames with -{sid} or -summary and append .dfs.tmp\n  const string_view ext = snapshot_storage_->IsCloud() ? \".dfs\" : \".dfs.tmp\";\n  ShardId sid = 0;\n  for (auto& [_, filename] : snapshots_) {\n    filename = full_path_;\n    if (sid < shard_set->size())\n      ExtendDfsFilenameWithShard(sid++, ext, &filename);\n    else\n      SetExtension(\"summary\", ext, &filename);\n  }\n\n  absl::InsecureBitGen gen;\n  std::string snapshot_id = GetRandomHex(gen, 32);\n  // Save summary file.\n  SaveDfsSingle(nullptr, snapshot_id);\n\n  // Save shard files.\n  auto cb = [this, &snapshot_id](Transaction* t, EngineShard* shard) {\n    SaveDfsSingle(shard, snapshot_id);\n    return OpStatus::OK;\n  };\n  trans_->ScheduleSingleHop(std::move(cb));\n}\n\n// Start saving a dfs file on shard\nvoid SaveStagesController::SaveDfsSingle(EngineShard* shard, const std::string& snapshot_id) {\n  // for summary file, shard=null and index=shard_set->size(), see SaveDfs() above\n  auto& [snapshot, filename] = snapshots_[shard ? shard->shard_id() : shard_set->size()];\n\n  SaveMode mode = shard == nullptr ? SaveMode::SUMMARY : SaveMode::SINGLE_SHARD;\n  bool is_summary = (shard == nullptr);\n  auto glob_data = RdbSaver::GetGlobalData(service_, is_summary);\n\n  if (auto err = snapshot->Start(mode, filename, glob_data, snapshot_id); err) {\n    shared_err_ = err;\n    snapshot.reset();\n    return;\n  }\n\n  if (mode == SaveMode::SINGLE_SHARD)\n    snapshot->StartInShard(shard);\n}\n\n// Save a single rdb file\nvoid SaveStagesController::SaveRdb() {\n  auto& [snapshot, filename] = snapshots_.front();\n\n  filename = full_path_;\n  if (!filename.has_extension())\n    filename += \".rdb\";\n  if (!snapshot_storage_->IsCloud())\n    filename += \".tmp\";\n\n  // RDB is a summary file (contains all global data)\n  if (auto err =\n          snapshot->Start(SaveMode::RDB, filename, RdbSaver::GetGlobalData(service_, true), \"\");\n      err) {\n    snapshot.reset();\n    return;\n  }\n\n  auto cb = [snapshot = snapshot.get()](Transaction* t, EngineShard* shard) {\n    snapshot->StartInShard(shard);\n    return OpStatus::OK;\n  };\n  trans_->ScheduleSingleHop(std::move(cb));\n}\n\nuint32_t SaveStagesController::GetCurrentSaveDuration() {\n  return time(nullptr) - start_time_;\n}\n\nSaveInfo SaveStagesController::GetSaveInfo() {\n  SaveInfo info;\n  info.save_time = start_time_;\n  info.duration_sec = GetCurrentSaveDuration();\n\n  if (shared_err_) {\n    info.error = *shared_err_;\n    return info;\n  }\n\n  fs::path resulting_path = full_path_;\n  if (use_dfs_format_)\n    SetExtension(\"summary\", \".dfs\", &resulting_path);\n  else\n    resulting_path.replace_extension();  // remove .tmp\n\n  LOG(INFO) << \"Saving \" << resulting_path << \" finished after \"\n            << strings::HumanReadableElapsedTime(info.duration_sec);\n\n  info.freq_map.clear();\n  for (const auto& k_v : rdb_name_map_) {\n    info.freq_map.emplace_back(k_v);\n  }\n\n  info.file_name = resulting_path.generic_string();\n\n  return info;\n}\n\n// Remove .tmp extension or delete files in case of error\nGenericError SaveStagesController::FinalizeFileMovement() {\n  if (snapshot_storage_->IsCloud())\n    return {};\n  DVLOG(1) << \"FinalizeFileMovement start\";\n\n  // If the shared_err is set, the snapshot saving failed\n  bool has_error = bool(shared_err_);\n\n  std::error_code ec;\n  for (const auto& [_, filename] : snapshots_) {\n    if (has_error) {\n      filesystem::remove(filename, ec);\n    } else {\n      filesystem::rename(filename, fs::path{filename}.replace_extension(\"\"), ec);\n    }\n    if (ec)\n      break;\n  }\n  DVLOG(1) << \"FinalizeFileMovement end\";\n  return GenericError(ec);\n}\n\n// Build full path: get dir, try creating dirs, get filename with placeholder\nGenericError SaveStagesController::BuildFullPath() {\n  fs::path dir_path = cloud_uri_.empty() ? GetFlag(FLAGS_dir) : cloud_uri_;\n  if (!dir_path.empty() && cloud_uri_.empty() && !IsCloudPath(GetFlag(FLAGS_dir))) {\n    if (auto ec = CreateDirs(dir_path); ec)\n      return {ec, \"Failed to create directories\"};\n  }\n\n  fs::path filename = basename_.empty() ? GetFlag(FLAGS_dbfilename) : basename_;\n  if (filename.empty())\n    return {\"filename is not specified\"};\n\n  if (auto err = ValidateFilename(filename, use_dfs_format_); err)\n    return err;\n\n  SubstituteFilenamePlaceholders(\n      &filename, {.ts = \"%Y-%m-%dT%H:%M:%S\", .year = \"%Y\", .month = \"%m\", .day = \"%d\"});\n\n  tm time_tm;\n  localtime_r(&start_time_, &time_tm);\n  string src_format = filename.string();\n  string dest_buf(src_format.size() + 128, '\\0');\n  size_t len = strftime(dest_buf.data(), dest_buf.size(), src_format.c_str(), &time_tm);\n  if (len == 0)\n    return {\"invalid dbfilename format\"};\n  dest_buf.resize(len);\n\n  full_path_ = dir_path / dest_buf;\n\n  return {};\n}\n\nvoid SaveStagesController::SaveBody(unsigned index) {\n  CHECK(!use_dfs_format_ || index == shard_set->size());  // used in rdb and df summary file\n  if (auto& snapshot = snapshots_[index].first; snapshot && snapshot->HasStarted()) {\n    shared_err_ = snapshot->SaveBody();\n  }\n}\n\nvoid SaveStagesController::WaitSnapshotInShard(EngineShard* shard) {\n  if (auto& snapshot = snapshots_[shard->shard_id()].first; snapshot && snapshot->HasStarted()) {\n    shared_err_ = snapshot->WaitSnapshotInShard(shard);\n  }\n}\n\nvoid SaveStagesController::CloseCb(unsigned index) {\n  if (auto& snapshot = snapshots_[index].first; snapshot && snapshot->HasStarted()) {\n    snapshot->FillFreqMap();\n    shared_err_ = snapshot->Close();\n\n    unique_lock lk{rdb_name_map_mu_};\n    for (const auto& k_v : snapshot->freq_map())\n      rdb_name_map_[RdbTypeName(k_v.first)] += k_v.second;\n    lk.unlock();\n    snapshot.reset();\n  }\n\n  if (auto* es = EngineShard::tlocal(); use_dfs_format_ && es)\n    namespaces->GetDefaultNamespace().GetDbSlice(es->shard_id()).ResetUpdateEvents();\n}\n\nvoid SaveStagesController::RunStage(void (SaveStagesController::*cb)(unsigned)) {\n  if (use_dfs_format_) {\n    shard_set->RunBlockingInParallel([&](EngineShard* es) { (this->*cb)(es->shard_id()); });\n    (this->*cb)(shard_set->size());\n  } else {\n    (this->*cb)(0);\n  }\n}\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/detail/save_stages_controller.h",
    "content": "\n// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <filesystem>\n\n#include \"server/rdb_save.h\"\n#include \"util/fibers/fiberqueue_threadpool.h\"\n\nnamespace dfly {\n\nclass Transaction;\nclass Service;\n\nnamespace detail {\n\nclass SnapshotStorage;\n\nstruct SaveInfo {\n  time_t save_time = 0;  // epoch time in seconds.\n  uint32_t duration_sec = 0;\n  std::string file_name;\n  std::vector<std::pair<std::string_view, size_t>> freq_map;  // RDB_TYPE_xxx -> count mapping.\n  GenericError error;\n};\n\nstruct SaveStagesInputs {\n  bool use_dfs_format_;\n  std::string_view cloud_uri_;\n  std::string_view basename_;\n  Transaction* trans_;\n  Service* service_;\n  util::fb2::FiberQueueThreadPool* fq_threadpool_;\n  std::shared_ptr<SnapshotStorage> snapshot_storage_;\n  // true if the command that triggered this flow is bgsave. false otherwise.\n  bool is_bg_save_;\n};\n\nclass RdbSnapshot {\n public:\n  RdbSnapshot(util::fb2::FiberQueueThreadPool* fq_tp, SnapshotStorage* snapshot_storage)\n      : snapshot_storage_{snapshot_storage} {\n  }\n\n  GenericError Start(SaveMode save_mode, const string& path, const RdbSaver::GlobalData& glob_data,\n                     const std::string& snapshot_id);\n  void StartInShard(EngineShard* shard);\n\n  error_code SaveBody();\n  error_code WaitSnapshotInShard(EngineShard* shard);\n  void FillFreqMap();\n  error_code Close();\n  size_t GetSaveBuffersSize();\n\n  RdbSaver::SnapshotStats GetCurrentSnapshotProgress() const;\n\n  const RdbTypeFreqMap& freq_map() const {\n    return freq_map_;\n  }\n\n  bool HasStarted() const {\n    return started_shards_.load(std::memory_order_relaxed) > 0 ||\n           (saver_ && saver_->Mode() == SaveMode::SUMMARY);\n  }\n\n private:\n  bool is_linux_file_ = false;\n  SnapshotStorage* snapshot_storage_ = nullptr;\n\n  std::atomic_uint32_t started_shards_ = 0;\n\n  unique_ptr<io::Sink> io_sink_;\n  unique_ptr<RdbSaver> saver_;\n  RdbTypeFreqMap freq_map_;\n\n  ExecutionState cntx_{};\n};\n\nstruct SaveStagesController : public SaveStagesInputs {\n  explicit SaveStagesController(SaveStagesInputs&& input);\n  // Objects of this class are used concurrently. Call this function\n  // in a mutually exlusive context to avoid data races.\n  // Also call this function before any call to `WaitAllSnapshots`\n  // Returns empty optional on success and SaveInfo on failure\n  std::optional<SaveInfo> Init();\n  void Start();\n\n  ~SaveStagesController();\n\n  // Safe to call and no locks required\n  void WaitAllSnapshots();\n\n  // Call this function after you `WaitAllSnapshots`to finalize the chore.\n  // Performs cleanup of the object internally.\n  SaveInfo Finalize();\n  size_t GetSaveBuffersSize();\n  uint32_t GetCurrentSaveDuration();\n  RdbSaver::SnapshotStats GetCurrentSnapshotProgress() const;\n\n  bool IsBgSave() const {\n    return is_bg_save_;\n  }\n\n private:\n  // In the new version (.dfs) we store a file for every shard and one more summary file.\n  // Summary file is always last in snapshots array.\n  void SaveDfs();\n\n  // Start saving a dfs file on shard\n  void SaveDfsSingle(EngineShard* shard, const std::string& snapshot_id);\n  void SaveSnashot(EngineShard* shard);\n  void WaitSnapshotInShard(EngineShard* shard);\n\n  // Save a single rdb file\n  void SaveRdb();\n\n  SaveInfo GetSaveInfo();\n\n  // Remove .tmp extension or delete files in case of error\n  GenericError FinalizeFileMovement();\n\n  // Build full path: get dir, try creating dirs, get filename with placeholder\n  GenericError BuildFullPath();\n\n  void SaveBody(unsigned index);\n\n  void CloseCb(unsigned index);\n\n  void RunStage(void (SaveStagesController::*cb)(unsigned));\n\n  time_t start_time_;\n  std::filesystem::path full_path_;\n\n  AggregateGenericError shared_err_;\n  std::vector<std::pair<std::unique_ptr<RdbSnapshot>, std::filesystem::path>> snapshots_;\n\n  absl::flat_hash_map<string_view, size_t> rdb_name_map_;\n  util::fb2::Mutex rdb_name_map_mu_;\n  bool is_bg_save_ = false;\n};\n\nGenericError ValidateFilename(const std::filesystem::path& filename, bool new_version);\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/detail/snapshot_storage.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n\n#include \"server/detail/snapshot_storage.h\"\n\n#include <absl/strings/str_replace.h>\n#include <absl/strings/strip.h>\n\n#ifdef WITH_AWS\n#include <aws/core/auth/AWSCredentialsProvider.h>\n#include <aws/s3/S3Client.h>\n#include <aws/s3/model/ListObjectsV2Request.h>\n#include <aws/s3/model/PutObjectRequest.h>\n\n#include \"util/aws/aws.h\"\n#include \"util/aws/credentials_provider_chain.h\"\n#include \"util/aws/s3_endpoint_provider.h\"\n#include \"util/aws/s3_read_file.h\"\n#include \"util/aws/s3_write_file.h\"\n#endif\n\n#ifdef WITH_GCP\n#include \"util/cloud/gcp/gcs_file.h\"\n#endif\n\n#include <regex>\n\n#include \"base/logging.h\"\n#include \"io/file_util.h\"\n#include \"server/engine_shard_set.h\"\n#include \"util/cloud/azure/creds_provider.h\"\n#include \"util/cloud/azure/storage.h\"\n#include \"util/fibers/fiber_file.h\"\nnamespace dfly {\nnamespace detail {\n\nusing namespace util;\nusing namespace std;\n\nnamespace {\n\nconstexpr string_view kSummarySuffix = \"summary.dfs\"sv;\n\npair<string, string> GetBucketPath(string_view path) {\n  string_view clean = path;\n  auto prefix = absl::StartsWith(clean, kS3Prefix) ? kS3Prefix : kGCSPrefix;\n  clean = absl::StripPrefix(clean, prefix);\n\n  size_t pos = clean.find('/');\n  if (pos == string_view::npos) {\n    return make_pair(string(clean), \"\");\n  }\n\n  string bucket_name{clean.substr(0, pos)};\n  string obj_path{clean.substr(pos + 1)};\n\n  return make_pair(std::move(bucket_name), std::move(obj_path));\n}\n\n#ifdef __linux__\nconst int kRdbWriteFlags = O_CREAT | O_WRONLY | O_TRUNC | O_CLOEXEC | O_DIRECT;\n#endif\n\nstd::string EscapeRegex(string_view input) {\n  // List of regex special characters that need escaping\n  // We don't escape \"{}\" since we use them for our own placeholders.\n  constexpr std::string_view chars{\"\\\\.^$|?*+()[]\"};\n  std::string escaped;\n\n  // Reserve space to avoid multiple reallocations\n  escaped.reserve(input.size() * 1.1);\n\n  for (char c : input) {\n    // If the character is in our specialChars list, prepend a backslash\n    if (chars.find(c) != std::string::npos) {\n      escaped += '\\\\';\n    }\n    escaped += c;\n  }\n\n  return escaped;\n}\n\n}  // namespace\n\nstring SnapshotStorage::FindMatchingFile(string_view prefix, string_view dbfilename,\n                                         vector<SnapStat> keys) {\n  std::sort(std::begin(keys), std::end(keys),\n            [](const SnapStat& l, const SnapStat& r) { return l.last_modified > r.last_modified; });\n\n  // Create a regex to match the object keys, substituting the timestamp\n  // and adding an extension if needed.\n  fs::path fl_path{prefix};\n  fl_path.append(dbfilename);\n  fl_path = EscapeRegex(fl_path.string());\n\n  SubstituteFilenamePlaceholders(&fl_path,\n                                 {.ts = \"([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2})\",\n                                  .year = \"([0-9]{4})\",\n                                  .month = \"([0-9]{2})\",\n                                  .day = \"([0-9]{2})\"});\n  if (!fl_path.has_extension()) {\n    fl_path += \"(-summary.dfs|.rdb)\";\n  }\n  const std::regex re(fl_path.string());\n\n  for (const SnapStat& key : keys) {\n    DVLOG(1) << \"Checking object key: \" << key.name << \" against regex: \" << fl_path.string();\n    std::smatch m;\n    if (std::regex_match(key.name, m, re)) {\n      return key.name;\n    }\n  }\n  return {};\n}\n\nio::Result<SnapshotStorage::ExpandResult, GenericError> SnapshotStorage::ExpandSnapshot(\n    const string& load_path) {\n  if (!(absl::EndsWith(load_path, \".rdb\") || absl::EndsWith(load_path, \"summary.dfs\"))) {\n    return nonstd::make_unexpected(\n        GenericError(std::make_error_code(std::errc::invalid_argument), \"Bad filename extension\"));\n  }\n\n  error_code ec = CheckPath(load_path);\n  if (ec) {\n    return nonstd::make_unexpected(GenericError(ec, \"File not found\"));\n  }\n\n  ExpandResult result;\n\n  // Collect all other files in case we're loading dfs.\n  if (absl::EndsWith(load_path, \"summary.dfs\")) {\n    auto res = ExpandFromPath(load_path);\n    if (!res) {\n      return nonstd::make_unexpected(res.error());\n    }\n    result = std::move(*res);\n    result.push_back(load_path);\n  } else {\n    result.push_back(load_path);\n  }\n  return result;\n}\n\nFileSnapshotStorage::FileSnapshotStorage(fb2::FiberQueueThreadPool* fq_threadpool)\n    : fq_threadpool_{fq_threadpool} {\n}\n\nio::Result<std::pair<io::Sink*, uint8_t>, GenericError> FileSnapshotStorage::OpenWriteFile(\n    const std::string& path) {\n  if (fq_threadpool_) {  // EPOLL\n    FiberWriteOptions opts;\n    opts.direct = true;\n\n    auto res = OpenFiberWriteFile(path, fq_threadpool_, opts);\n    if (!res) {\n      return nonstd::make_unexpected(GenericError(res.error(), \"Couldn't open file for writing\"));\n    }\n\n    return std::pair(*res, FileType::FILE | FileType::DIRECT);\n  } else {\n#ifdef __linux__\n    auto res = fb2::OpenLinux(path, kRdbWriteFlags, 0666);\n    if (!res) {\n      return nonstd::make_unexpected(GenericError(\n          res.error(),\n          \"Couldn't open file for writing (is direct I/O supported by the file system?)\"));\n    }\n\n    uint8_t file_type = FileType::FILE | FileType::IO_URING;\n    if (kRdbWriteFlags & O_DIRECT) {\n      file_type |= FileType::DIRECT;\n    }\n    return std::pair(new LinuxWriteWrapper(res->release()), file_type);\n#else\n    LOG(FATAL) << \"Linux I/O is not supported on this platform\";\n#endif\n  }\n}\n\nio::ReadonlyFileOrError FileSnapshotStorage::OpenReadFile(const std::string& path) {\n#ifdef __linux__\n  if (fq_threadpool_) {\n    return OpenFiberReadFile(path, fq_threadpool_);\n  } else {\n    return fb2::OpenRead(path);\n  }\n#else\n  return OpenFiberReadFile(path, fq_threadpool_);\n#endif\n}\n\nio::Result<std::string, GenericError> FileSnapshotStorage::LoadPath(std::string_view dir,\n                                                                    std::string_view dbfilename) {\n  if (dbfilename.empty())\n    return {};\n\n  fs::path data_folder;\n  if (dir.empty()) {\n    data_folder = fs::current_path();\n  } else {\n    std::error_code file_ec;\n    data_folder = fs::canonical(dir, file_ec);\n    if (file_ec) {\n      return nonstd::make_unexpected(GenericError{file_ec, \"Data directory error\"});\n    }\n  }\n\n  LOG(INFO) << \"Load snapshot: Searching for snapshot in directory: \" << data_folder;\n\n  fs::path fl_path = data_folder.append(dbfilename);\n  // If we've found an exact match we're done.\n  if (fs::exists(fl_path))\n    return fl_path.generic_string();\n\n  SubstituteFilenamePlaceholders(&fl_path, {\"*\", \"*\", \"*\", \"*\"});\n  if (!fl_path.has_extension()) {\n    fl_path += \"*\";\n  }\n  io::Result<io::StatShortVec> short_vec = io::StatFiles(fl_path.generic_string());\n  if (short_vec) {\n    std::sort(short_vec->begin(), short_vec->end(),\n              [](const io::StatShort& l, const io::StatShort& r) {\n                return std::difftime(l.last_modified, r.last_modified) < 0;\n              });\n    auto it = std::find_if(short_vec->rbegin(), short_vec->rend(), [](const auto& stat) {\n      return absl::EndsWith(stat.name, \".rdb\") || absl::EndsWith(stat.name, kSummarySuffix);\n    });\n    if (it != short_vec->rend())\n      return it->name;\n  } else {\n    return nonstd::make_unexpected(\n        GenericError(short_vec.error(), \"Could not stat snapshot directory\"));\n  }\n\n  return nonstd::make_unexpected(GenericError(\n      std::make_error_code(std::errc::no_such_file_or_directory), \"Snapshot not found\"));\n}\n\nio::Result<vector<string>, GenericError> FileSnapshotStorage::ExpandFromPath(const string& path) {\n  string glob = absl::StrReplaceAll(path, {{\"summary\", \"????\"}});\n  io::Result<io::StatShortVec> files = io::StatFiles(glob);\n\n  if (!files || files->size() == 0) {\n    return nonstd::make_unexpected(GenericError(make_error_code(errc::no_such_file_or_directory),\n                                                \"Cound not find DFS shard files\"));\n  }\n\n  vector<string> paths;\n  for (auto& fstat : *files) {\n    paths.push_back(std::move(fstat.name));\n  }\n\n  return paths;\n}\n\nerror_code FileSnapshotStorage::CheckPath(const string& path) {\n  error_code ec;\n  std::ignore = fs::canonical(path, ec);\n  return ec;\n}\n\n#ifdef WITH_GCP\nGcsSnapshotStorage::~GcsSnapshotStorage() {\n  util::http::TlsClient::FreeContext(ctx_);\n}\n\nerror_code GcsSnapshotStorage::Init(unsigned connect_ms) {\n  error_code ec = creds_provider_.Init(connect_ms);\n  if (ec)\n    return ec;\n\n  ctx_ = util::http::TlsClient::CreateSslContext();\n  return ec;\n}\n\nio::Result<std::pair<io::Sink*, uint8_t>, GenericError> GcsSnapshotStorage::OpenWriteFile(\n    const std::string& path) {\n  CHECK(ctx_);\n\n  pair<string, string> bucket_path = GetBucketPath(path);\n  fb2::ProactorBase* proactor = fb2::ProactorBase::me();\n  unique_ptr<http::ClientPool> conn_pool = cloud::GCS::CreateApiConnectionPool(ctx_, proactor);\n  cloud::GcsWriteFileOptions opts;\n  opts.creds_provider = &creds_provider_;\n  opts.pool = conn_pool.release();\n  opts.pool_owned = true;\n\n  io::Result<io::WriteFile*> dest_res =\n      cloud::OpenWriteGcsFile(bucket_path.first, bucket_path.second, opts);\n  if (!dest_res) {\n    return nonstd::make_unexpected(GenericError(dest_res.error(), \"Could not open file\"));\n  }\n\n  return std::pair(*dest_res, FileType::CLOUD);\n}\n\nio::ReadonlyFileOrError GcsSnapshotStorage::OpenReadFile(const std::string& path) {\n  if (!IsGCSPath(path))\n    return nonstd::make_unexpected(GenericError(\"Invalid GCS path\"));\n\n  auto [bucket, key] = GetBucketPath(path);\n  fb2::ProactorBase* proactor = fb2::ProactorBase::me();\n  unique_ptr<http::ClientPool> conn_pool = cloud::GCS::CreateApiConnectionPool(ctx_, proactor);\n  cloud::GcsReadFileOptions opts;\n  opts.creds_provider = &creds_provider_;\n  opts.pool = conn_pool.release();\n  opts.pool_owned = true;\n\n  return cloud::OpenReadGcsFile(bucket, key, opts);\n}\n\nio::Result<std::string, GenericError> GcsSnapshotStorage::LoadPath(string_view dir,\n                                                                   string_view dbfilename) {\n  if (dbfilename.empty())\n    return \"\";\n\n  auto [bucket_name, prefix] = GetBucketPath(dir);\n\n  // GCS needs trailing slash to match prefix sub path\n  if (!prefix.empty() && prefix.back() != '/') {\n    prefix += '/';\n  }\n\n  fb2::ProactorBase* proactor = shard_set->pool()->GetNextProactor();\n\n  io::Result<vector<SnapStat>, GenericError> keys =\n      proactor->Await([this, proactor, bucket_name = bucket_name,\n                       prefix = prefix]() -> io::Result<vector<SnapStat>, GenericError> {\n        cloud::GCS gcs(&creds_provider_, ctx_, proactor);\n        vector<SnapStat> res;\n        error_code ec =\n            gcs.List(bucket_name, prefix, false, [&res](const cloud::StorageListItem& item) {\n              res.emplace_back(SnapStat{string(item.key), item.mtime_ns});\n            });\n        if (ec)\n          return nonstd::make_unexpected(GenericError(ec, \"Failed to list objects\"));\n        return res;\n      });\n\n  if (!keys) {\n    return nonstd::make_unexpected(keys.error());\n  }\n\n  auto match_key = FindMatchingFile(prefix, dbfilename, *keys);\n  if (!match_key.empty()) {\n    return absl::StrCat(kGCSPrefix, bucket_name, \"/\", match_key);\n  }\n  return nonstd::make_unexpected(GenericError(\n      std::make_error_code(std::errc::no_such_file_or_directory), \"Snapshot not found\"));\n}\n\nio::Result<vector<string>, GenericError> GcsSnapshotStorage::ExpandFromPath(\n    const string& load_path) {\n  if (!IsGCSPath(load_path))\n    return nonstd::make_unexpected(\n        GenericError(make_error_code(errc::invalid_argument), \"Invalid GCS path\"));\n\n  if (!absl::EndsWith(load_path, kSummarySuffix))\n    return vector<string>{};\n\n  const auto [bucket_name, obj_path] = GetBucketPath(load_path);\n  regex re(absl::StrReplaceAll(obj_path, {{\"summary\", \"[0-9]{4}\"}}));\n  string_view prefix = absl::StripSuffix(obj_path, kSummarySuffix);\n\n  // Find snapshot shard files if we're loading DFS.\n  fb2::ProactorBase* proactor = shard_set->pool()->GetNextProactor();\n  auto paths = proactor->Await([&, &bucket_name =\n                                       bucket_name]() -> io::Result<vector<string>, GenericError> {\n    vector<string> res;\n    cloud::GCS gcs(&creds_provider_, ctx_, proactor);\n\n    error_code ec = gcs.List(bucket_name, prefix, false, [&](const cloud::StorageListItem& item) {\n      std::smatch m;\n      string key{item.key};\n      if (std::regex_match(key, m, re)) {\n        res.push_back(absl::StrCat(kGCSPrefix, bucket_name, \"/\", item.key));\n      }\n    });\n\n    if (ec) {\n      return nonstd::make_unexpected(ec);\n    }\n\n    return res;\n  });\n\n  if (!paths || paths->empty()) {\n    return nonstd::make_unexpected(\n        GenericError{std::make_error_code(std::errc::no_such_file_or_directory),\n                     \"Cound not find DFS snapshot shard files\"});\n  }\n\n  return *paths;\n}\n\nerror_code GcsSnapshotStorage::CheckPath(const std::string& path) {\n  return {};\n}\n#endif\n\n// AZURE\n\nAzureSnapshotStorage::AzureSnapshotStorage() {\n  creds_provider_ = make_unique<util::cloud::azure::Credentials>();\n}\n\nAzureSnapshotStorage::~AzureSnapshotStorage() {\n  util::http::TlsClient::FreeContext(ctx_);\n}\n\nerror_code AzureSnapshotStorage::Init(unsigned connect_ms) {\n  error_code ec = creds_provider_->Init(connect_ms);\n  if (!ec) {\n    ctx_ = util::http::TlsClient::CreateSslContext();\n  }\n  return ec;\n}\n\nio::Result<std::pair<io::Sink*, uint8_t>, GenericError> AzureSnapshotStorage::OpenWriteFile(\n    const std::string& path) {\n  return nonstd::make_unexpected(GenericError(\"Not implemented\"));\n}\n\nio::ReadonlyFileOrError AzureSnapshotStorage::OpenReadFile(const std::string& path) {\n  if (!IsAzurePath(path))\n    return nonstd::make_unexpected(GenericError(\"Invalid azure path\"));\n\n  auto [bucket, key] = GetBucketPath(path);\n\n  return nonstd::make_unexpected(GenericError(\"Not implemented\"));\n}\n\nio::Result<std::string, GenericError> AzureSnapshotStorage::LoadPath(string_view dir,\n                                                                     string_view dbfilename) {\n  if (dbfilename.empty())\n    return \"\";\n\n  auto [bucket_name, prefix] = GetBucketPath(dir);\n\n  // TODO: check if needed\n  if (!prefix.empty() && prefix.back() != '/') {\n    prefix += '/';\n  }\n\n  fb2::ProactorBase* proactor = shard_set->pool()->GetNextProactor();\n\n  io::Result<vector<SnapStat>, GenericError> keys =\n      proactor->Await([this, bucket_name = bucket_name,\n                       prefix = prefix]() -> io::Result<vector<SnapStat>, GenericError> {\n        cloud::azure::Storage azure((cloud::azure::Credentials*)creds_provider_.get());\n        vector<SnapStat> res;\n        error_code ec =\n            azure.List(bucket_name, prefix, false, 500, [&res](const cloud::StorageListItem& item) {\n              res.emplace_back(string(item.key), item.mtime_ns);\n            });\n        if (ec)\n          return nonstd::make_unexpected(GenericError(ec, \"Failed to list objects\"));\n        return res;\n      });\n\n  if (!keys) {\n    return nonstd::make_unexpected(keys.error());\n  }\n\n  auto match_key = FindMatchingFile(prefix, dbfilename, *keys);\n  if (!match_key.empty()) {\n    return absl::StrCat(kGCSPrefix, bucket_name, \"/\", match_key);\n  }\n  return nonstd::make_unexpected(GenericError(\n      std::make_error_code(std::errc::no_such_file_or_directory), \"Snapshot not found\"));\n}\n\nio::Result<vector<string>, GenericError> AzureSnapshotStorage::ExpandFromPath(\n    const string& load_path) {\n  if (!IsAzurePath(load_path))\n    return nonstd::make_unexpected(\n        GenericError(make_error_code(errc::invalid_argument), \"Invalid Azure path\"));\n\n  if (!absl::EndsWith(load_path, kSummarySuffix))\n    return vector<string>{};\n\n  const auto [bucket_name, obj_path] = GetBucketPath(load_path);\n  regex re(absl::StrReplaceAll(obj_path, {{\"summary\", \"[0-9]{4}\"}}));\n  string_view prefix = absl::StripSuffix(obj_path, kSummarySuffix);\n\n  // Find snapshot shard files if we're loading DFS.\n  fb2::ProactorBase* proactor = shard_set->pool()->GetNextProactor();\n  auto paths = proactor->Await(\n      [&, &bucket_name = bucket_name]() -> io::Result<vector<string>, GenericError> {\n        vector<string> res;\n        cloud::azure::Storage azure(creds_provider_.get());\n\n        error_code ec =\n            azure.List(bucket_name, prefix, false, 500, [&](const cloud::StorageListItem& item) {\n              std::smatch m;\n              string key{item.key};\n              if (std::regex_match(key, m, re)) {\n                res.push_back(absl::StrCat(kAzurePrefix, bucket_name, \"/\", item.key));\n              }\n            });\n\n        if (ec) {\n          return nonstd::make_unexpected(ec);\n        }\n\n        return res;\n      });\n\n  if (!paths || paths->empty()) {\n    return nonstd::make_unexpected(\n        GenericError{std::make_error_code(std::errc::no_such_file_or_directory),\n                     \"Cound not find DFS snapshot shard files\"});\n  }\n\n  return *paths;\n}\n\nerror_code AzureSnapshotStorage::CheckPath(const std::string& path) {\n  return {};\n}\n\n#ifdef WITH_AWS\nAwsS3SnapshotStorage::AwsS3SnapshotStorage(const std::string& endpoint, bool https,\n                                           bool ec2_metadata, bool sign_payload) {\n  shard_set->pool()->GetNextProactor()->Await([&] {\n    if (!ec2_metadata) {\n      setenv(\"AWS_EC2_METADATA_DISABLED\", \"true\", 0);\n    }\n    // S3ClientConfiguration may request configuration and credentials from\n    // EC2 metadata so must be run in a proactor thread.\n    Aws::S3::S3ClientConfiguration s3_conf;\n    s3_conf.checksumConfig.responseChecksumValidation =\n        Aws::Client::ResponseChecksumValidation::WHEN_REQUIRED;\n\n    LOG(INFO) << \"Creating AWS S3 client; region=\" << s3_conf.region << \"; https=\" << std::boolalpha\n              << https << \"; endpoint=\" << endpoint;\n    if (!sign_payload) {\n      s3_conf.payloadSigningPolicy = Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never;\n    }\n    std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider =\n        std::make_shared<aws::CredentialsProviderChain>();\n    // Pass a custom endpoint. If empty uses the S3 endpoint.\n    std::shared_ptr<Aws::S3::S3EndpointProviderBase> endpoint_provider =\n        std::make_shared<aws::S3EndpointProvider>(endpoint, https);\n    s3_ = std::make_shared<Aws::S3::S3Client>(credentials_provider, endpoint_provider, s3_conf);\n  });\n}\n\nio::Result<std::pair<io::Sink*, uint8_t>, GenericError> AwsS3SnapshotStorage::OpenWriteFile(\n    const std::string& path) {\n  optional<pair<string, string>> bucket_path = GetBucketPath(path);\n  if (!bucket_path) {\n    return nonstd::make_unexpected(GenericError(\"Invalid S3 path\"));\n  }\n  auto [bucket, key] = *bucket_path;\n\n  fb2::ProactorBase* proactor = ProactorBase::me();\n\n  // We run S3 operations via a temporary fiber to avoid agressive stack consumption.\n  io::Result<std::pair<io::Sink*, uint8_t>, GenericError> result;\n  auto fb = proactor->LaunchFiber(\n      fb2::Launch::post, boost::context::fixedsize_stack{40 * 1024}, \"open_s3_write\", [&] {\n        io::Result<aws::S3WriteFile> file = aws::S3WriteFile::Open(bucket, key, s3_);\n        if (!file) {\n          result = nonstd::make_unexpected(GenericError(file.error(), \"Failed to open write file\"));\n          return;\n        }\n\n        aws::S3WriteFile* f = new aws::S3WriteFile(std::move(*file));\n        result = std::pair<io::Sink*, uint8_t>(f, FileType::CLOUD);\n      });\n  fb.Join();\n  return result;\n}\n\nio::ReadonlyFileOrError AwsS3SnapshotStorage::OpenReadFile(const std::string& path) {\n  std::optional<std::pair<std::string, std::string>> bucket_path = GetBucketPath(path);\n  if (!bucket_path) {\n    return nonstd::make_unexpected(GenericError(\"Invalid S3 path\"));\n  }\n  auto [bucket, key] = *bucket_path;\n  return new aws::S3ReadFile(bucket, key, s3_);\n}\n\nio::Result<std::string, GenericError> AwsS3SnapshotStorage::LoadPath(std::string_view dir,\n                                                                     std::string_view dbfilename) {\n  if (dbfilename.empty())\n    return \"\";\n\n  auto [bucket_name, prefix] = GetBucketPath(dir);\n\n  LOG(INFO) << \"Load snapshot: Searching for snapshot in S3 path: \" << kS3Prefix << bucket_name\n            << \"/\" << prefix;\n  io::Result<std::vector<SnapStat>, GenericError> keys = ListObjects(bucket_name, prefix);\n  if (!keys) {\n    return nonstd::make_unexpected(keys.error());\n  }\n\n  auto match_key = FindMatchingFile(prefix, dbfilename, *keys);\n  if (!match_key.empty()) {\n    return absl::StrCat(kS3Prefix, bucket_name, \"/\", match_key);\n  }\n  return nonstd::make_unexpected(GenericError(\n      std::make_error_code(std::errc::no_such_file_or_directory), \"Snapshot not found\"));\n}\n\nio::Result<vector<string>, GenericError> AwsS3SnapshotStorage::ExpandFromPath(\n    const string& load_path) {\n  optional<pair<string, string>> bucket_path = GetBucketPath(load_path);\n  if (!bucket_path) {\n    return nonstd::make_unexpected(\n        GenericError{std::make_error_code(std::errc::invalid_argument), \"Invalid S3 path\"});\n  }\n\n  auto& [bucket_name, obj_path] = *bucket_path;\n\n  // Limit prefix to objects in the same 'directory' as load_path.\n  const size_t pos = obj_path.find_last_of('/');\n  const std::string prefix = (pos == std::string_view::npos) ? \"\" : obj_path.substr(0, pos + 1);\n\n  io::Result<std::vector<SnapStat>, GenericError> list_res = ListObjects(bucket_name, prefix);\n  if (!list_res) {\n    return nonstd::make_unexpected(list_res.error());\n  }\n\n  vector<string> paths;\n  obj_path = EscapeRegex(obj_path);\n  const std::regex re(absl::StrReplaceAll(obj_path, {{\"summary\", \"[0-9]{4}\"}}));\n\n  for (const SnapStat& key : *list_res) {\n    std::smatch m;\n    DVLOG(1) << \"Checking object key: \" << key.name << \" against regex: \" << obj_path;\n\n    if (std::regex_match(key.name, m, re)) {\n      paths.push_back(std::string(kS3Prefix) + bucket_name + \"/\" + key.name);\n    }\n  }\n\n  if (paths.empty()) {\n    return nonstd::make_unexpected(\n        GenericError{std::make_error_code(std::errc::no_such_file_or_directory),\n                     \"Cound not find DFS snapshot shard files\"});\n  }\n\n  return paths;\n}\n\nerror_code AwsS3SnapshotStorage::CheckPath(const std::string& path) {\n  return {};\n}\n\nio::Result<std::vector<AwsS3SnapshotStorage::SnapStat>, GenericError>\nAwsS3SnapshotStorage::ListObjects(std::string_view bucket_name, std::string_view prefix) {\n  // Each list objects request has a 1000 object limit, so page through the\n  // objects if needed.\n  std::string continuation_token;\n  std::vector<SnapStat> keys;\n\n  // We use a random proactor because this function might be called from the main thread.\n  fb2::ProactorBase* proactor = shard_set->pool()->GetNextProactor();\n\n  do {\n    Aws::S3::Model::ListObjectsV2Request request;\n    request.SetBucket(std::string(bucket_name));\n    if (!prefix.empty()) {\n      // Ensure prefix ends with '/' to treat it as a directory-like namespace and avoid\n      // matching objects with similar prefix names.\n      if (prefix.back() == '/') {\n        request.SetPrefix(std::string(prefix));\n      } else {\n        request.SetPrefix(std::string(prefix) + '/');\n      }\n    }\n    request.SetDelimiter(\"/\");\n\n    if (!continuation_token.empty()) {\n      request.SetContinuationToken(continuation_token);\n    }\n\n    Aws::S3::Model::ListObjectsV2Outcome outcome;\n\n    // We use fibers to wrap the s3 call to avoid stack exhaustion.\n    auto fb = proactor->LaunchFiber(\n        fb2::Launch::post, boost::context::fixedsize_stack{40 * 1024}, \"list_s3\",\n        [&, &bucket_name = bucket_name] { outcome = s3_->ListObjectsV2(request); });\n\n    fb.Join();\n\n    if (outcome.IsSuccess()) {\n      continuation_token = outcome.GetResult().GetNextContinuationToken();\n      for (const auto& object : outcome.GetResult().GetContents()) {\n        keys.emplace_back(object.GetKey(), object.GetLastModified().Millis());\n      }\n    } else if (outcome.GetError().GetExceptionName() == \"PermanentRedirect\") {\n      return nonstd::make_unexpected(\n          GenericError{\"Failed list objects in S3 bucket: Permanent redirect; Ensure your \"\n                       \"configured AWS region matches the S3 bucket region\"});\n    } else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_BUCKET) {\n      return nonstd::make_unexpected(GenericError{\n          \"Failed list objects in S3 bucket: Bucket not found: \" + std::string(bucket_name)});\n    } else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::INVALID_ACCESS_KEY_ID) {\n      return nonstd::make_unexpected(\n          GenericError{\"Failed list objects in S3 bucket: Invalid access key ID\"});\n    } else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::SIGNATURE_DOES_NOT_MATCH) {\n      return nonstd::make_unexpected(\n          GenericError{\"Failed list objects in S3 bucket: Invalid signature; Check your AWS \"\n                       \"credentials are correct\"});\n    } else if (outcome.GetError().GetExceptionName() == \"InvalidToken\") {\n      return nonstd::make_unexpected(\n          GenericError{\"Failed list objects in S3 bucket: Invalid token; Check your AWS \"\n                       \"credentials are correct\"});\n    } else {\n      return nonstd::make_unexpected(GenericError{\"Failed list objects in S3 bucket: \" +\n                                                  outcome.GetError().GetExceptionName()});\n    }\n  } while (!continuation_token.empty());\n  return keys;\n}\n#endif\n\n#ifdef __linux__\nio::Result<size_t> LinuxWriteWrapper::WriteSome(const iovec* v, uint32_t len) {\n  io::Result<size_t> res = lf_->WriteSome(v, len, offset_, 0);\n  if (res) {\n    offset_ += *res;\n  }\n\n  return res;\n}\n#endif\n\nvoid SubstituteFilenamePlaceholders(fs::path* filename, const FilenameSubstitutions& fns) {\n  *filename = absl::StrReplaceAll(\n      filename->string(),\n      {{\"{Y}\", fns.year}, {\"{m}\", fns.month}, {\"{d}\", fns.day}, {\"{timestamp}\", fns.ts}});\n}\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/detail/snapshot_storage.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n\n#pragma once\n\n#ifdef WITH_AWS\n#include <aws/s3/S3Client.h>\n#endif\n\n#ifdef WITH_GCP\n#include \"util/cloud/gcp/gcp_creds_provider.h\"\n#include \"util/cloud/gcp/gcs.h\"\n#endif\n\n#include <absl/strings/match.h>\n\n#include <filesystem>\n#include <string>\n#include <string_view>\n#include <utility>\n\n#include \"io/io.h\"\n#include \"server/execution_state.h\"\n#include \"util/cloud/utils.h\"\n#include \"util/fibers/fiberqueue_threadpool.h\"\n#include \"util/fibers/uring_file.h\"\n\nnamespace dfly {\nnamespace detail {\n\nnamespace fs = std::filesystem;\n\nconstexpr std::string_view kS3Prefix = \"s3://\";\nconstexpr std::string_view kGCSPrefix = \"gs://\";\nconstexpr std::string_view kAzurePrefix = \"az://\";\n\nconst size_t kBucketConnectMs = 2000;\n\nenum FileType : uint8_t {\n  FILE = (1u << 0),\n  CLOUD = (1u << 1),\n  IO_URING = (1u << 2),\n  DIRECT = (1u << 3),\n};\n\nclass SnapshotStorage {\n public:\n  virtual ~SnapshotStorage() = default;\n\n  // Opens the file at the given path, and returns the open file and file\n  // type, which is a bitmask of FileType.\n  virtual io::Result<std::pair<io::Sink*, uint8_t>, GenericError> OpenWriteFile(\n      const std::string& path) = 0;\n\n  virtual io::ReadonlyFileOrError OpenReadFile(const std::string& path) = 0;\n\n  // Returns the path of the RDB file or DFS summary file to load.\n  virtual io::Result<std::string, GenericError> LoadPath(std::string_view dir,\n                                                         std::string_view dbfilename) = 0;\n\n  using ExpandResult = std::vector<std::string>;\n  // Searches for all the relevant snapshot files given the RDB file or DFS summary file path.\n  io::Result<ExpandResult, GenericError> ExpandSnapshot(const std::string& load_path);\n\n  virtual bool IsCloud() const {\n    return false;\n  }\n\n protected:\n  struct SnapStat {\n    SnapStat(std::string file_name, int64_t ts) : name(std::move(file_name)), last_modified(ts) {\n    }\n    std::string name;\n    int64_t last_modified;\n  };\n\n  // Returns empty string if nothing is matched. vector is passed by value on purpose, as it is\n  // been sorted inside.\n  static std::string FindMatchingFile(std::string_view prefix, std::string_view dbfilename,\n                                      std::vector<SnapStat> keys);\n\n  virtual io::Result<std::vector<std::string>, GenericError> ExpandFromPath(\n      const std::string& path) = 0;\n\n  virtual std::error_code CheckPath(const std::string& path) = 0;\n};\n\nclass FileSnapshotStorage : public SnapshotStorage {\n public:\n  explicit FileSnapshotStorage(util::fb2::FiberQueueThreadPool* fq_threadpool);\n\n  io::Result<std::pair<io::Sink*, uint8_t>, GenericError> OpenWriteFile(\n      const std::string& path) override;\n\n  io::ReadonlyFileOrError OpenReadFile(const std::string& path) override;\n\n  io::Result<std::string, GenericError> LoadPath(std::string_view dir,\n                                                 std::string_view dbfilename) override;\n\n private:\n  io::Result<std::vector<std::string>, GenericError> ExpandFromPath(const std::string& path) final;\n\n  std::error_code CheckPath(const std::string& path) final;\n  util::fb2::FiberQueueThreadPool* fq_threadpool_;\n};\n\n#ifdef WITH_GCP\nclass GcsSnapshotStorage : public SnapshotStorage {\n public:\n  ~GcsSnapshotStorage();\n\n  std::error_code Init(unsigned connect_ms);\n\n  io::Result<std::pair<io::Sink*, uint8_t>, GenericError> OpenWriteFile(\n      const std::string& path) override;\n\n  io::ReadonlyFileOrError OpenReadFile(const std::string& path) override;\n\n  io::Result<std::string, GenericError> LoadPath(std::string_view dir,\n                                                 std::string_view dbfilename) override;\n\n  bool IsCloud() const final {\n    return true;\n  }\n\n private:\n  io::Result<std::vector<std::string>, GenericError> ExpandFromPath(const std::string& path) final;\n\n  std::error_code CheckPath(const std::string& path) final;\n\n  util::cloud::GCPCredsProvider creds_provider_;\n  SSL_CTX* ctx_ = NULL;\n};\n#endif\n\nclass AzureSnapshotStorage : public SnapshotStorage {\n public:\n  AzureSnapshotStorage();\n  ~AzureSnapshotStorage();\n\n  std::error_code Init(unsigned connect_ms);\n\n  io::Result<std::pair<io::Sink*, uint8_t>, GenericError> OpenWriteFile(\n      const std::string& path) override;\n\n  io::ReadonlyFileOrError OpenReadFile(const std::string& path) override;\n\n  io::Result<std::string, GenericError> LoadPath(std::string_view dir,\n                                                 std::string_view dbfilename) override;\n\n  bool IsCloud() const final {\n    return true;\n  }\n\n private:\n  io::Result<std::vector<std::string>, GenericError> ExpandFromPath(const std::string& path) final;\n\n  std::error_code CheckPath(const std::string& path) final;\n\n  std::unique_ptr<util::cloud::CredentialsProvider> creds_provider_;\n  SSL_CTX* ctx_ = NULL;\n};\n\n#ifdef WITH_AWS\nclass AwsS3SnapshotStorage : public SnapshotStorage {\n public:\n  AwsS3SnapshotStorage(const std::string& endpoint, bool https, bool ec2_metadata,\n                       bool sign_payload);\n\n  io::Result<std::pair<io::Sink*, uint8_t>, GenericError> OpenWriteFile(\n      const std::string& path) override;\n\n  io::ReadonlyFileOrError OpenReadFile(const std::string& path) override;\n\n  io::Result<std::string, GenericError> LoadPath(std::string_view dir,\n                                                 std::string_view dbfilename) override;\n\n  bool IsCloud() const final {\n    return true;\n  }\n\n private:\n  io::Result<std::vector<std::string>, GenericError> ExpandFromPath(const std::string& path) final;\n\n  std::error_code CheckPath(const std::string& path) final;\n\n  // List the objects in the given bucket with the given prefix. This must\n  // run from a proactor.\n  io::Result<std::vector<SnapStat>, GenericError> ListObjects(std::string_view bucket_name,\n                                                              std::string_view prefix);\n\n  std::shared_ptr<Aws::S3::S3Client> s3_;\n};\n\n#endif\n\n#ifdef __linux__\n// takes ownership over the file.\nclass LinuxWriteWrapper : public io::Sink {\n public:\n  explicit LinuxWriteWrapper(util::fb2::LinuxFile* lf) : lf_(lf) {\n  }\n\n  io::Result<size_t> WriteSome(const iovec* v, uint32_t len) final;\n\n  std::error_code Close() {\n    return lf_->Close();\n  }\n\n private:\n  std::unique_ptr<util::fb2::LinuxFile> lf_;\n  off_t offset_ = 0;\n};\n#endif\n\nstruct FilenameSubstitutions {\n  std::string_view ts;\n  std::string_view year;\n  std::string_view month;\n  std::string_view day;\n};\n\nvoid SubstituteFilenamePlaceholders(fs::path* filename, const FilenameSubstitutions& fns);\n\ninline bool IsS3Path(std::string_view path) {\n  return absl::StartsWith(path, detail::kS3Prefix);\n}\n\ninline bool IsGCSPath(std::string_view path) {\n  return absl::StartsWith(path, detail::kGCSPrefix);\n}\n\ninline bool IsAzurePath(std::string_view path) {\n  return absl::StartsWith(path, detail::kAzurePrefix);\n}\n\ninline bool IsCloudPath(std::string_view path) {\n  return IsS3Path(path) || IsGCSPath(path) || IsAzurePath(path);\n}\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/detail/table.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"core/compact_object.h\"\n#include \"core/dash.h\"\n#include \"core/expire_period.h\"\n\nnamespace dfly {\n\nnamespace detail {\n\nusing PrimeKey = CompactKey;\nusing PrimeValue = CompactValue;\n\nstruct PrimeTablePolicy {\n  enum { kSlotNum = 14, kBucketNum = 56 };\n\n  static constexpr bool kUseVersion = true;\n\n  static uint64_t HashFn(const PrimeKey& s) {\n    return s.HashCode();\n  }\n\n  static uint64_t HashFn(std::string_view u) {\n    return CompactObj::HashCode(u);\n  }\n\n  static void DestroyKey(PrimeKey& cs) {\n    cs.Reset();\n  }\n\n  static void DestroyValue(PrimeValue& o) {\n    o.Reset();\n  }\n\n  static bool Equal(const PrimeKey& s1, std::string_view s2) {\n    return s1 == s2;\n  }\n\n  static bool Equal(const PrimeKey& s1, const PrimeKey& s2) {\n    return s1 == s2;\n  }\n};\n\nstruct ExpireTablePolicy {\n  enum : uint8_t { kSlotNum = 14, kBucketNum = 56 };\n  static constexpr bool kUseVersion = false;\n\n  static uint64_t HashFn(const PrimeKey& s) {\n    return s.HashCode();\n  }\n\n  static uint64_t HashFn(std::string_view u) {\n    return CompactObj::HashCode(u);\n  }\n\n  static void DestroyKey(PrimeKey& cs) {\n    cs.Reset();\n  }\n\n  static void DestroyValue(uint32_t val) {\n  }\n\n  static bool Equal(const PrimeKey& s1, std::string_view s2) {\n    return s1 == s2;\n  }\n};\n\n}  // namespace detail\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/detail/wrapped_json_path.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <string_view>\n#include <utility>\n#include <variant>\n\n#include \"base/logging.h\"\n#include \"common/string_or_view.h\"\n#include \"core/json/json_object.h\"\n#include \"core/json/path.h\"\n#include \"facade/op_status.h\"\n\nnamespace dfly {\n\nusing facade::OpResult;\nusing facade::OpStatus;\nusing Nothing = std::monostate;\nusing JsonExpression = jsoncons::jsonpath::jsonpath_expression<JsonType>;\n\nnamespace details {\ntemplate <typename T>\nvoid OptionalEmplace(bool keep_defined, std::optional<T> src, std::optional<T>* dest);\n\ntemplate <typename T> void OptionalEmplace(bool keep_defined, T src, T* dest);\n}  // namespace details\n\ntemplate <typename T>\nusing JsonPathReadOnlyCallback = absl::FunctionRef<T(std::string_view, const JsonType&)>;\n\ntemplate <typename T = Nothing> struct MutateCallbackResult {\n  bool should_be_deleted = false;\n  std::optional<T> value;\n};\n\ntemplate <typename T>\nusing JsonPathMutateCallback =\n    absl::FunctionRef<MutateCallbackResult<T>(std::optional<std::string_view>, JsonType*)>;\n\nenum class JsonPathType { kV2, kLegacy /*Or V1*/ };\nconstexpr JsonPathType kDefaultJsonPathType = JsonPathType::kV2;\n\nstruct CallbackResultOptions {\n public:\n  enum class SavingOrder { kSaveFirst, kSaveLast };\n  enum class OnEmpty { kSendNil, kSendWrongType };\n\n  // Default options for WrappedJsonPath::ExecuteReadOnlyCallback\n  static CallbackResultOptions DefaultReadOnlyOptions(\n      SavingOrder saving_order = SavingOrder::kSaveLast);\n  // Default options for WrappedJsonPath::ExecuteMutateCallback\n  static CallbackResultOptions DefaultMutateOptions();\n\n  OnEmpty on_empty;\n  SavingOrder saving_order{SavingOrder::kSaveLast};\n  std::optional<JsonPathType> path_type{std::nullopt};\n};\n\ntemplate <typename T> class JsonCallbackResult {\n private:\n  template <typename V> struct is_optional : std::false_type {};\n\n  template <typename V> struct is_optional<std::optional<V>> : std::true_type {};\n\n public:\n  using SavingOrder = CallbackResultOptions::SavingOrder;\n  using OnEmpty = CallbackResultOptions::OnEmpty;\n\n  JsonCallbackResult() = default;\n\n  explicit JsonCallbackResult(CallbackResultOptions options);\n\n  void AddValue(T value);\n\n  bool Empty() const;\n\n  bool IsV1() const;\n  const T& AsV1() const;\n  const auto& AsV2() const;\n\n  bool ShouldSendNil() const;\n  bool ShouldSendWrongType() const;\n\n private:\n  std::vector<T> result_;\n  CallbackResultOptions options_{OnEmpty::kSendWrongType, SavingOrder::kSaveLast,\n                                 kDefaultJsonPathType};\n};\n\nclass WrappedJsonPath {\n public:\n  static constexpr std::string_view kV1PathRootElement = \".\";\n  static constexpr std::string_view kV2PathRootElement = \"$\";\n\n  WrappedJsonPath(json::Path json_path, cmn::StringOrView path, JsonPathType path_type);\n\n  WrappedJsonPath(JsonExpression expression, cmn::StringOrView path, JsonPathType path_type);\n\n  template <typename T>\n  JsonCallbackResult<T> ExecuteReadOnlyCallback(const JsonType* json_entry,\n                                                JsonPathReadOnlyCallback<T> cb,\n                                                CallbackResultOptions options) const;\n\n  template <typename T>\n  OpResult<JsonCallbackResult<std::optional<T>>> ExecuteMutateCallback(\n      JsonType* json_entry, JsonPathMutateCallback<T> cb, CallbackResultOptions options) const;\n\n  bool IsLegacyModePath() const;\n\n  bool RefersToRootElement() const;\n\n  // Returns true if this is internal implementation of json path\n  // Check AsJsonPath\n  bool HoldsJsonPath() const;\n\n  // Internal implementation of json path\n  const json::Path& AsJsonPath() const;\n  // Jsoncons implementation of json path\n  const JsonExpression& AsJsonExpression() const;\n\n  // Returns the path as a string_view.\n  std::string_view Path() const;\n\n private:\n  CallbackResultOptions InitializePathType(CallbackResultOptions options) const;\n\n private:\n  std::variant<json::Path, JsonExpression> parsed_path_;\n  cmn::StringOrView path_;\n  JsonPathType path_type_ = kDefaultJsonPathType;\n};\n\n// Implementation\n/******************************************************************/\nnamespace details {\n\ntemplate <typename T>\nvoid OptionalEmplace(bool keep_defined, std::optional<T> src, std::optional<T>* dest) {\n  if (!keep_defined || !dest->has_value()) {\n    dest->swap(src);\n  }\n}\n\ntemplate <typename T> void OptionalEmplace(bool keep_defined, T src, T* dest) {\n  if (!keep_defined) {\n    *dest = std::move(src);\n  }\n}\n\n}  // namespace details\n\ninline CallbackResultOptions CallbackResultOptions::DefaultReadOnlyOptions(\n    SavingOrder saving_order) {\n  return CallbackResultOptions{OnEmpty::kSendNil, saving_order};\n}\n\ninline CallbackResultOptions CallbackResultOptions::DefaultMutateOptions() {\n  return CallbackResultOptions{OnEmpty::kSendWrongType};\n}\n\ntemplate <typename T>\nJsonCallbackResult<T>::JsonCallbackResult(CallbackResultOptions options) : options_(options) {\n}\n\ntemplate <typename T> void JsonCallbackResult<T>::AddValue(T value) {\n  if (result_.empty() || !IsV1()) {\n    result_.push_back(std::move(value));\n    return;\n  }\n\n  details::OptionalEmplace(options_.saving_order == SavingOrder::kSaveFirst, std::move(value),\n                           &result_.front());\n}\n\ntemplate <typename T> bool JsonCallbackResult<T>::Empty() const {\n  return result_.empty();\n}\n\ntemplate <typename T> bool JsonCallbackResult<T>::IsV1() const {\n  return options_.path_type == JsonPathType::kLegacy;\n}\n\ntemplate <typename T> const T& JsonCallbackResult<T>::AsV1() const {\n  return result_.front();\n}\n\ntemplate <typename T> const auto& JsonCallbackResult<T>::AsV2() const {\n  return result_;\n}\n\ntemplate <typename T> bool JsonCallbackResult<T>::ShouldSendNil() const {\n  return IsV1() && options_.on_empty == OnEmpty::kSendNil && result_.empty();\n}\n\ntemplate <typename T> bool JsonCallbackResult<T>::ShouldSendWrongType() const {\n  if (IsV1()) {\n    if (result_.empty() && options_.on_empty == OnEmpty::kSendWrongType)\n      return true;\n\n    if constexpr (is_optional<T>::value) {\n      return !result_.front().has_value();\n    }\n  }\n  return false;\n}\n\ninline WrappedJsonPath::WrappedJsonPath(json::Path json_path, cmn::StringOrView path,\n                                        JsonPathType path_type)\n    : parsed_path_(std::move(json_path)), path_(std::move(path)), path_type_(path_type) {\n}\n\ninline WrappedJsonPath::WrappedJsonPath(JsonExpression expression, cmn::StringOrView path,\n                                        JsonPathType path_type)\n    : parsed_path_(std::move(expression)), path_(std::move(path)), path_type_(path_type) {\n}\n\ntemplate <typename T>\nJsonCallbackResult<T> WrappedJsonPath::ExecuteReadOnlyCallback(\n    const JsonType* json_entry, JsonPathReadOnlyCallback<T> cb,\n    CallbackResultOptions options) const {\n  JsonCallbackResult<T> read_result{InitializePathType(options)};\n\n  auto eval_callback = [&cb, &read_result](std::string_view path, const JsonType& val) {\n    read_result.AddValue(cb(path, val));\n  };\n\n  if (HoldsJsonPath()) {\n    const auto& json_path = AsJsonPath();\n    json::EvaluatePath(json_path, *json_entry,\n                       [&eval_callback](std::optional<std::string_view> key, const JsonType& val) {\n                         eval_callback(key ? *key : std::string_view{}, val);\n                       });\n  } else {\n    const auto& json_expression = AsJsonExpression();\n    json_expression.evaluate(*json_entry, eval_callback);\n  }\n\n  return read_result;\n}\n\ntemplate <typename T>\nOpResult<JsonCallbackResult<std::optional<T>>> WrappedJsonPath::ExecuteMutateCallback(\n    JsonType* json_entry, JsonPathMutateCallback<T> cb, CallbackResultOptions options) const {\n  JsonCallbackResult<std::optional<T>> mutate_result{InitializePathType(options)};\n\n  auto mutate_callback = [&cb, &mutate_result](std::optional<std::string_view> path,\n                                               JsonType* val) -> bool {\n    auto res = cb(path, val);\n    if (res.value.has_value()) {\n      mutate_result.AddValue(std::move(res.value).value());\n    } else if (!mutate_result.IsV1()) {\n      mutate_result.AddValue(std::nullopt);\n    }\n    return res.should_be_deleted;\n  };\n\n  if (HoldsJsonPath()) {\n    const auto& json_path = AsJsonPath();\n    json::MutatePath(json_path, mutate_callback, json_entry);\n  } else {\n    using namespace jsoncons::jsonpath;\n    using namespace jsoncons::jsonpath::detail;\n    using Evaluator = jsonpath_evaluator<JsonType, JsonType&>;\n    using ValueType = Evaluator::value_type;\n    using Reference = Evaluator::reference;\n    using JsonSelector = Evaluator::path_expression_type;\n\n    custom_functions<JsonType> funcs = custom_functions<JsonType>();\n\n    std::error_code ec;\n    static_resources static_res(funcs);\n    Evaluator e;\n\n    JsonSelector expr = e.compile(static_res, path_.view(), ec);\n    if (ec) {\n      VLOG(1) << \"Failed to mutate json with error: \" << ec.message();\n      return OpStatus::SYNTAX_ERR;\n    }\n\n    eval_context<ValueType, Reference> resources;\n\n    auto f = [&mutate_callback](const basic_path_node<char>& path, JsonType& val) {\n      mutate_callback(to_string(path), &val);\n    };\n\n    expr.evaluate(resources, *json_entry, JsonSelector::path_node_type{}, *json_entry, std::move(f),\n                  result_options::nodups | result_options::path);\n  }\n  return mutate_result;\n}\n\ninline bool WrappedJsonPath::IsLegacyModePath() const {\n  return path_type_ == JsonPathType::kLegacy;\n}\n\ninline bool WrappedJsonPath::RefersToRootElement() const {\n  auto path = path_.view();\n  return path.empty() || path == kV1PathRootElement || path == kV2PathRootElement;\n}\n\ninline bool WrappedJsonPath::HoldsJsonPath() const {\n  return std::holds_alternative<json::Path>(parsed_path_);\n}\n\ninline const json::Path& WrappedJsonPath::AsJsonPath() const {\n  return std::get<json::Path>(parsed_path_);\n}\n\ninline const JsonExpression& WrappedJsonPath::AsJsonExpression() const {\n  return std::get<JsonExpression>(parsed_path_);\n}\n\ninline std::string_view WrappedJsonPath::Path() const {\n  return path_.view();\n}\n\ninline CallbackResultOptions WrappedJsonPath::InitializePathType(\n    CallbackResultOptions options) const {\n  if (!options.path_type) {\n    options.path_type = path_type_;\n  }\n  return options;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/dfly_bench.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\nextern \"C\" {\n#include \"redis/crc16.h\"\n}\n\n#include <absl/container/flat_hash_set.h>\n#include <absl/random/random.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_format.h>\n#include <absl/strings/str_split.h>\n\n#include <boost/icl/interval_set.hpp>\n#include <csignal>\n#include <queue>\n#include <shared_mutex>\n#include <tuple>\n\n#include \"absl/time/clock.h\"\n#include \"absl/time/time.h\"\n#include \"base/histogram.h\"\n#include \"base/init.h\"\n#include \"base/random.h\"\n#include \"base/zipf_gen.h\"\n#include \"facade/redis_parser.h\"\n#include \"io/io.h\"\n#include \"io/io_buf.h\"\n#include \"util/fibers/dns_resolve.h\"\n#include \"util/fibers/pool.h\"\n#include \"util/fibers/proactor_base.h\"\n\n// A load-test for DragonflyDB that fixes coordinated omission problem.\n\nusing std::string;\n\nABSL_FLAG(uint16_t, p, 6379, \"Server port\");\nABSL_FLAG(uint32_t, c, 20, \"Number of connections per thread\");\nABSL_FLAG(int32_t, qps, 20,\n          \"QPS schedule at which the generator sends requests to the server \"\n          \"per single connection. 0 means - coordinated omission, and positive value will throttle \"\n          \"the actual qps if server is slower than the target qps. \"\n          \"negative value means - hard target, without throttling.\");\n\nABSL_FLAG(uint32_t, n, 1000, \"Number of requests to send per connection\");\nABSL_FLAG(uint32_t, test_time, 0, \"Testing time in seconds\");\nABSL_FLAG(string, d, \"16\",\n          \"Specify value size as single number for fixed length or use min:max to generate random \"\n          \"value length between min and max.\");\nABSL_FLAG(string, h, \"localhost\", \"server hostname/ip\");\nABSL_FLAG(uint64_t, key_minimum, 0, \"Min value for keys used\");\nABSL_FLAG(uint64_t, key_maximum, 50'000'000, \"Max value for keys used\");\nABSL_FLAG(string, key_prefix, \"key:\", \"keys prefix\");\nABSL_FLAG(string, key_dist, \"U\", \"U for uniform, N for normal, Z for zipfian, S for sequential\");\nABSL_FLAG(double, zipf_alpha, 0.99, \"zipfian alpha parameter\");\nABSL_FLAG(uint64_t, seed, 42, \"A seed for random data generation\");\nABSL_FLAG(uint64_t, key_stddev, 0,\n          \"Standard deviation for non-uniform distribution, 0 chooses\"\n          \" a default value of (max-min)/6\");\nABSL_FLAG(uint32_t, pipeline, 1, \"maximum number of pending requests per connection\");\nABSL_FLAG(string, ratio, \"1:10\", \"Set:Get ratio\");\nABSL_FLAG(string, command, \"\",\n          \"custom command with __key__ placeholder for keys, \"\n          \"__data__ for values, __score__ for doubles\");\nABSL_FLAG(bool, random_data, true,\n          \"If true, generate random data for each request, otherwise uses incremental sequences.\"\n          \"Applies for __score__ and __data__ placeholders.\");\nABSL_FLAG(string, P, \"\", \"protocol can be empty (for RESP) or memcache_text\");\n\nABSL_FLAG(bool, tcp_nodelay, false, \"If true, set nodelay option on tcp socket\");\nABSL_FLAG(bool, noreply, false, \"If true, does not wait for replies. Relevant only for memcached.\");\n\nABSL_FLAG(bool, probe_cluster, true,\n          \"If false, skips cluster-mode probing and works only in single node mode\");\n\nABSL_FLAG(bool, greet, true,\n          \"If true, sends a greeting command on each connection, \"\n          \"to make sure the connection succeeded\");\nABSL_FLAG(bool, cluster_skip_tags, true,\n          \"If true, skips tags (compatible with memtier benchmark) in cluster mode, \"\n          \"othewise adds hash tags to keys\");\nABSL_FLAG(bool, ascii, true, \"If true, use ascii characters for values\");\nABSL_FLAG(bool, connect_only, false,\n          \"If true, will only connect to the server, without sending \"\n          \"loadtest commands\");\nABSL_FLAG(string, password, \"\", \"password to authenticate the client\");\n\nusing namespace std;\nusing namespace util;\nusing absl::GetFlag;\nusing absl::StrFormat;\nusing facade::RedisParser;\nusing facade::RespExpr;\nusing facade::RespVec;\nusing tcp = ::boost::asio::ip::tcp;\nusing absl::StrCat;\n\nthread_local base::Xoroshiro128p bit_gen;\nthread_local uint64_t seq_val = 1;\n\natomic_bool terminate_requested = false;\n\n#if __INTELLISENSE__\n#pragma diag_suppress 144\n#endif\n\nenum Protocol { RESP, MC_TEXT } protocol;\nenum DistType { UNIFORM, NORMAL, ZIPFIAN, SEQUENTIAL } dist_type{UNIFORM};\nconstexpr uint16_t kNumSlots = 16384;\n\nstatic string GetRandomBlob(size_t len, bool ascii) {\n  static bool is_random = GetFlag(FLAGS_random_data);\n\n  std::string res(len, '\\0');\n  size_t indx = 0;\n\n  for (; indx + 16 <= len; indx += 16) {  // 2 chars per byte\n    absl::numbers_internal::FastHexToBufferZeroPad16(is_random ? bit_gen() : seq_val++,\n                                                     res.data() + indx);\n  }\n\n  DCHECK_LE(indx, len);\n\n  if (indx < len) {\n    uint64_t next_val = is_random ? bit_gen() : seq_val++;\n    unsigned count = len - indx;\n\n    // extract hex chars from least significant nibble, as it's the one that changes\n    // with sequential values.\n    for (unsigned j = 0; j < count; ++j) {\n      res[indx++] = (next_val & 0x0F) + 'A';  // to ascii (not really hex, but ok for random data)\n      next_val >>= 4;\n    }\n  }\n\n  if (!ascii) {\n    for (size_t i = 0; i < len; i++) {\n      res[i] += 80;\n    }\n  }\n  return res;\n}\n\nuint16_t SlotId(string_view str) {\n  return crc16(str.data(), str.size()) % kNumSlots;\n}\n\nusing SlotRange = pair<uint16_t, uint16_t>;\n\nstruct ShardInfo {\n  vector<SlotRange> slots;  // list of [start, end] pairs. inclusive.\n  tcp::endpoint endpoint;\n};\n\nusing ClusterShards = vector<ShardInfo>;\n\nclass ShardSlots {\n private:\n  using IntervalSet = boost::icl::interval_set<uint16_t>;\n  using Interval = boost::icl::interval<uint16_t>;\n\n public:\n  void SetClusterSlotRanges(const ClusterShards& cluster_shards) {\n    for (auto shard : cluster_shards) {\n      IntervalSet shard_slots_;\n      for (auto& slot : shard.slots) {\n        shard_slots_.insert(Interval::closed(slot.first, slot.second));\n      }\n      shards_slots_.emplace(shard.endpoint, shard_slots_);\n    }\n  }\n\n  SlotRange NextSlotRange(const tcp::endpoint& ep, size_t i) {\n    std::shared_lock<fb2::SharedMutex> lock(mu_);\n    const auto& shard_slot_interval = shards_slots_[ep];\n    unsigned index = i % shard_slot_interval.iterative_size();\n    const auto& interval = next(shard_slot_interval.begin(), index);\n    return SlotRange{boost::icl::first(*interval), boost::icl::last(*interval)};\n  }\n\n  bool Empty() const {\n    return shards_slots_.empty();\n  }\n\n  size_t Size() const {\n    return shards_slots_.size();\n  }\n\n  vector<tcp::endpoint> Endpoints() const {\n    vector<tcp::endpoint> endpoints;\n    for (const auto& shard : shards_slots_) {\n      endpoints.push_back(shard.first);\n    }\n    return endpoints;\n  }\n\n  void MoveSlot(const tcp::endpoint& src_ep, const tcp::endpoint& dst_ep, uint16_t slot_id) {\n    std::unique_lock<fb2::SharedMutex> lock(mu_);\n    // Remove slot from source ep\n    auto& src_shard_slots = shards_slots_[src_ep];\n    // If slot id doesn't exists on source ep we have moved this slot before\n    if (src_shard_slots.find(slot_id) == src_shard_slots.end()) {\n      return;\n    }\n    src_shard_slots.subtract(slot_id);\n    // Add slot to dest ep\n    auto& dst_shard_slots = shards_slots_[dst_ep];\n    dst_shard_slots.insert(slot_id);\n  }\n\n private:\n  struct Hasher {\n    using is_transparent = void;\n    size_t operator()(const tcp::endpoint& ep) const {\n      std::size_t hash1 = std::hash<string>()(ep.address().to_string());\n      std::size_t hash2 = std::hash<unsigned short>()(ep.port());\n      return hash1 ^ (hash2 + 0x9e3779b9 + (hash1 << 6) + (hash1 >> 2));\n    }\n  };\n\n  struct Eq {\n    using is_transparent = void;\n    bool operator()(const tcp::endpoint& left, const tcp::endpoint& right) const {\n      return left == right;\n    }\n  };\n\n private:\n  fb2::SharedMutex mu_;\n  absl::flat_hash_map<tcp::endpoint, IntervalSet, Hasher, Eq> shards_slots_;\n};\n\nclass KeyGenerator {\n public:\n  KeyGenerator(uint32_t min, uint32_t max);\n\n  string operator()(uint16_t from, uint16_t to) const;\n  void EnableClusterMode();\n\n  bool IsClusterEnabled() const {\n    return !hash_slots_.empty();\n  }\n\n private:\n  string prefix_;\n  uint64_t min_, max_, range_;\n  mutable uint64_t seq_cursor_;\n  double stddev_ = 1.0 / 6;\n  mutable optional<base::ZipfianGenerator> zipf_;\n  vector<string> hash_slots_;\n};\n\nclass CommandGenerator {\n public:\n  explicit CommandGenerator(KeyGenerator* keygen);\n\n  string Next(SlotRange range);\n\n  bool might_hit() const {\n    return might_hit_;\n  }\n\n  bool noreply() const {\n    return noreply_;\n  }\n\n private:\n  enum TemplateType : uint8_t { KEY, VALUE, SCORE };\n\n  string FillSet(string_view key);\n  string FillGet(string_view key);\n\n  bool IsRandomValueLen() const {\n    return value_len_min_ != value_len_max_;\n  }\n\n  KeyGenerator* keygen_;\n  uint32_t ratio_set_ = 0, ratio_get_ = 0;\n  string command_;\n\n  using CmdPart = variant<string_view, TemplateType>;\n  vector<CmdPart> cmd_parts_;\n\n  string fixed_len_value_;  // used for fixed value string\n  int32_t value_len_min_ = 0, value_len_max_ = 0;\n  bool might_hit_ = false;\n  bool noreply_ = false;\n  bool is_ascii_ = true;\n};\n\nCommandGenerator::CommandGenerator(KeyGenerator* keygen) : keygen_(keygen) {\n  command_ = GetFlag(FLAGS_command);\n  is_ascii_ = GetFlag(FLAGS_ascii);\n\n  pair<string, string> value_len_str = absl::StrSplit(GetFlag(FLAGS_d), ':');\n  CHECK(absl::SimpleAtoi(value_len_str.first, &value_len_min_));\n  if (!value_len_str.second.empty()) {\n    CHECK(absl::SimpleAtoi(value_len_str.second, &value_len_max_));\n  } else {\n    value_len_max_ = value_len_min_;\n  }\n\n  if ((value_len_min_ < 0) || (value_len_max_ < 0) || (value_len_min_ > value_len_max_)) {\n    LOG(ERROR) << \"Invalid `-d \" << GetFlag(FLAGS_d)\n               << \"` argument. Min and max values should be bigger than 0 and min value should \"\n                  \"be smaller or equal to max. Setting to default (16).\";\n    value_len_max_ = value_len_min_ = 16;\n  }\n\n  if (!IsRandomValueLen()) {\n    fixed_len_value_ = string(value_len_min_, is_ascii_ ? 'a' : char(130));\n  }\n\n  if (command_.empty()) {\n    pair<string, string> ratio_str = absl::StrSplit(GetFlag(FLAGS_ratio), ':');\n    CHECK(absl::SimpleAtoi(ratio_str.first, &ratio_set_));\n    CHECK(absl::SimpleAtoi(ratio_str.second, &ratio_get_));\n    return;\n  }\n\n  vector<string_view> parts = absl::StrSplit(command_, ' ', absl::SkipEmpty());\n  for (string_view p : parts) {\n    if (p == \"__key__\"sv) {\n      cmd_parts_.emplace_back(KEY);\n    } else if (p == \"__data__\"sv) {\n      cmd_parts_.emplace_back(VALUE);\n    } else if (p == \"__score__\"sv) {\n      cmd_parts_.emplace_back(SCORE);\n    } else {\n      cmd_parts_.emplace_back(p);\n    }\n  }\n\n  if (!cmd_parts_.empty()) {\n    const string_view* cmd = get_if<string_view>(&cmd_parts_.front());\n    if (cmd) {\n      might_hit_ = absl::EqualsIgnoreCase(*cmd, \"get\") || absl::StartsWithIgnoreCase(*cmd, \"mget\");\n    }\n  }\n}\n\nstring CommandGenerator::Next(SlotRange range) {\n  noreply_ = false;\n\n  if (command_.empty()) {\n    string key = (*keygen_)(range.first, range.second);\n\n    if (absl::Uniform(bit_gen, 0U, ratio_get_ + ratio_set_) < ratio_set_) {\n      might_hit_ = false;\n      return FillSet(key);\n    }\n    might_hit_ = true;\n    return FillGet(key);\n  }\n\n  // For custom commands, we select a random slot and then use it for key generation.\n  uint16_t slot_id = 0;\n\n  if (keygen_->IsClusterEnabled()) {\n    slot_id = absl::Uniform(absl::IntervalClosedClosed, bit_gen, range.first, range.second);\n  }\n\n  string str, gen_cmd;\n  absl::StrAppend(&gen_cmd, \"*\", cmd_parts_.size(), \"\\r\\n\");\n  for (const CmdPart& part : cmd_parts_) {\n    if (auto p = get_if<string_view>(&part)) {\n      absl::StrAppend(&gen_cmd, \"$\", p->size(), \"\\r\\n\", *p, \"\\r\\n\");\n    } else {\n      switch (get<TemplateType>(part)) {\n        case KEY:\n          str = (*keygen_)(slot_id, slot_id);\n          break;\n        case VALUE: {\n          size_t value_len = IsRandomValueLen()\n                                 ? absl::Uniform(bit_gen, value_len_min_, value_len_max_)\n                                 : fixed_len_value_.size();\n          str = GetRandomBlob(value_len, is_ascii_);\n          break;\n        }\n        case SCORE: {\n          uniform_real_distribution<double> uniform(0, 1);\n          str = absl::StrCat(uniform(bit_gen));\n        }\n      }\n      absl::StrAppend(&gen_cmd, \"$\", str.size(), \"\\r\\n\", str, \"\\r\\n\");\n    }\n  }\n\n  return gen_cmd;\n}\n\nstring CommandGenerator::FillSet(string_view key) {\n  string res;\n  string_view value = fixed_len_value_;\n  string random_len_value;\n\n  if (IsRandomValueLen()) {\n    random_len_value = GetRandomBlob(absl::Uniform(bit_gen, value_len_min_, value_len_max_), true);\n    value = random_len_value;\n  }\n\n  if (protocol == RESP) {\n    absl::StrAppend(&res, \"*3\\r\\n$3\\r\\nset\\r\\n$\", key.size(), \"\\r\\n\", key);\n    absl::StrAppend(&res, \"\\r\\n$\", value.size(), \"\\r\\n\", value, \"\\r\\n\");\n  } else {\n    DCHECK_EQ(protocol, MC_TEXT);\n    absl::StrAppend(&res, \"set \", key, \" 0 0 \", value.size());\n    if (GetFlag(FLAGS_noreply)) {\n      absl::StrAppend(&res, \" noreply\");\n      noreply_ = true;\n    }\n\n    absl::StrAppend(&res, \"\\r\\n\", value, \"\\r\\n\");\n  }\n  return res;\n}\n\nstring CommandGenerator::FillGet(string_view key) {\n  return absl::StrCat(\"get \", key, \"\\r\\n\");\n}\n\nstruct ClientStats {\n  base::Histogram total_hist, online_hist;\n\n  uint64_t num_responses = 0;\n  uint64_t qps = 0;\n  uint64_t hit_count = 0;\n  uint64_t hit_opportunities = 0;\n  uint64_t num_errors = 0;\n  unsigned num_clients = 0;\n\n  ClientStats& operator+=(const ClientStats& o) {\n    total_hist.Merge(o.total_hist);\n    online_hist.Merge(o.online_hist);\n\n    num_responses += o.num_responses;\n    qps += o.qps;\n    hit_count += o.hit_count;\n    hit_opportunities += o.hit_opportunities;\n    num_errors += o.num_errors;\n    num_clients += o.num_clients;\n\n    return *this;\n  }\n};\n\n// Per connection driver.\nclass Driver {\n public:\n  explicit Driver(uint32_t num_reqs, uint32_t time_limit, ClientStats* stats, ProactorBase* p,\n                  ShardSlots* ss)\n      : num_reqs_(num_reqs), time_limit_(time_limit), shard_slots_(*ss), stats_(*stats) {\n    socket_.reset(p->CreateSocket());\n    if (time_limit_ > 0)\n      num_reqs_ = UINT32_MAX;\n  }\n\n  Driver(const Driver&) = delete;\n  Driver(Driver&&) = delete;\n  Driver& operator=(Driver&&) = delete;\n\n  void Connect(unsigned index, const tcp::endpoint& ep);\n  void Run(uint64_t* cycle_ns, CommandGenerator* cmd_gen);\n  void Shutdown();\n\n  float done() const {\n    if (time_limit_ > 0)\n      return double(absl::GetCurrentTimeNanos() - start_ns_) / (time_limit_ * 1e9);\n    return double(received_) / num_reqs_;\n  }\n\n  unsigned pending_length() const {\n    return reqs_.size();\n  }\n\n private:\n  void PopRequest();\n  void ReceiveFb();\n  void ParseRESP();\n  void ParseMC();\n  void RunCommandAndCheckResultIs(std::string_view cmd, std::string_view expected_res);\n\n  struct Req {\n    uint64_t start;\n    bool might_hit;\n  };\n\n  uint32_t num_reqs_, time_limit_, received_ = 0;\n  int64_t start_ns_ = 0;\n\n  tcp::endpoint ep_;\n  ShardSlots& shard_slots_;\n  ClientStats& stats_;\n  unique_ptr<FiberSocketBase> socket_;\n  fb2::Fiber receive_fb_;\n  queue<Req> reqs_;\n  fb2::CondVarAny cnd_;\n\n  facade::RedisParser parser_{RedisParser::Mode::CLIENT, 1 << 16};\n  io::IoBuf io_buf_{512};\n  unsigned blob_len_ = 0;\n};\n\n// Per thread client.\nclass TLocalClient {\n public:\n  explicit TLocalClient(ProactorBase* p, ShardSlots* ss) : p_(p), shard_slots_(ss) {\n  }\n\n  TLocalClient(const TLocalClient&) = delete;\n\n  void Connect(const tcp::endpoint& ep, const vector<tcp::endpoint>& shard_endpoints);\n  void Disconnect();\n\n  void Start(uint32_t key_min, uint32_t key_max, uint64_t cycle_ns);\n  void Join();\n\n  ClientStats stats;\n\n  tuple<float, float> GetMinMaxDone() const {\n    float min = 1, max = 0;\n\n    for (unsigned i = 0; i < drivers_.size(); ++i) {\n      float done = drivers_[i]->done();\n      max = std::max(done, max);\n      min = std::min(done, min);\n    }\n\n    return {min, max};\n  }\n\n  unsigned MaxPending() const {\n    unsigned max = 0;\n    for (unsigned i = 0; i < drivers_.size(); ++i) {\n      if (drivers_[i]->pending_length() > max) {\n        max = drivers_[i]->pending_length();\n      }\n    }\n    return max;\n  }\n\n  unsigned num_conns() const {\n    return drivers_.size();\n  }\n\n  void AdjustCycle();\n\n private:\n  ProactorBase* p_;\n  ShardSlots* shard_slots_;\n  vector<unique_ptr<Driver>> drivers_;\n  optional<KeyGenerator> key_gen_;\n  optional<CommandGenerator> cmd_gen_;\n\n  vector<fb2::Fiber> driver_fbs_;\n  uint64_t cur_cycle_ns_;\n  uint64_t target_cycle_;\n  int64_t start_time_;\n};\n\nKeyGenerator::KeyGenerator(uint32_t min, uint32_t max)\n    : min_(min), max_(max), range_(max - min + 1) {\n  prefix_ = GetFlag(FLAGS_key_prefix);\n  CHECK_GT(range_, 0u);\n\n  seq_cursor_ = min_;\n  switch (dist_type) {\n    case NORMAL: {\n      uint64_t stddev = GetFlag(FLAGS_key_stddev);\n      if (stddev != 0) {\n        stddev_ = double(stddev) / double(range_);\n      }\n      break;\n    }\n    case ZIPFIAN:\n      zipf_.emplace(min, max, GetFlag(FLAGS_zipf_alpha));\n      break;\n    default:;\n  }\n}\n\nstring KeyGenerator::operator()(uint16_t from, uint16_t to) const {\n  uint64_t key_suffix = 0;\n  uint16_t slot_id = from;\n  bool skip_tags = IsClusterEnabled() && GetFlag(FLAGS_cluster_skip_tags);\n  string res;\n\n  do {\n    switch (dist_type) {\n      case UNIFORM:\n        key_suffix = absl::Uniform(bit_gen, min_, max_);\n        break;\n      case NORMAL: {\n        double val = absl::Gaussian(bit_gen, 0.5, stddev_);\n        key_suffix = min_ + uint64_t(val * range_);\n        break;\n      }\n      case ZIPFIAN:\n        key_suffix = zipf_->Next(bit_gen);\n        break;\n      case SEQUENTIAL:\n        key_suffix = seq_cursor_++;\n        if (seq_cursor_ > max_)\n          seq_cursor_ = min_;\n        break;\n    }\n\n    if (!skip_tags)\n      break;\n\n    // If we skip tags, we must make sure that the key fits the slot range.\n    res = absl::StrCat(prefix_, key_suffix);\n    slot_id = SlotId(res);\n  } while (slot_id < from || slot_id > to);\n\n  // If we are in cluster mode we add the hash slot to the key to make sure it lands in the correct\n  // range.\n  if (IsClusterEnabled()) {\n    if (!skip_tags) {\n      if (to > from)\n        slot_id = absl::Uniform(absl::IntervalClosedClosed, bit_gen, from, to);\n      absl::StrAppend(&res, prefix_, \"{\", hash_slots_[slot_id], \"}\", key_suffix);\n    }\n  } else {\n    absl::StrAppend(&res, prefix_, key_suffix);\n  }\n\n  return res;\n}\n\nvoid KeyGenerator::EnableClusterMode() {\n  hash_slots_.resize(kNumSlots);\n  uint32_t i = 0;\n  uint32_t num_slots_filled = 0;\n\n  // Precompute the hash slots for each of the slot ids so given the slot id\n  // we could generate a key that belongs to that slot.\n  while (num_slots_filled < kNumSlots) {\n    string key = absl::StrCat(i);\n    uint16_t id = SlotId(key);\n    if (hash_slots_[id].empty()) {\n      hash_slots_[id] = std::move(key);\n      num_slots_filled++;\n    }\n    ++i;\n  }\n}\n\nvoid RunCommandAndCheckResultIs(std::string_view cmd, std::string_view expected,\n                                FiberSocketBase* socket) {\n  auto ec = socket->Write(io::Buffer(cmd));\n  CHECK(!ec);\n\n  uint8_t buf[128];\n  auto res_sz = socket->Recv(io::MutableBytes(buf));\n  CHECK(res_sz) << res_sz.error().message();\n  string_view resp = io::View(io::Bytes(buf, *res_sz));\n  CHECK_EQ(resp, expected) << resp;\n}\n\nvoid Driver::RunCommandAndCheckResultIs(std::string_view cmd, std::string_view expected_res) {\n  ::RunCommandAndCheckResultIs(cmd, expected_res, socket_.get());\n}\n\nvoid Driver::Connect(unsigned index, const tcp::endpoint& ep) {\n  VLOG(2) << \"Connecting \" << index << \" to \" << ep;\n  error_code ec = socket_->Connect(ep);\n  CHECK(!ec) << \"Could not connect to \" << ep << \" \" << ec;\n  if (GetFlag(FLAGS_tcp_nodelay)) {\n    int yes = 1;\n    CHECK_EQ(0, setsockopt(socket_->native_handle(), IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes)));\n  }\n\n  auto password = absl::GetFlag(FLAGS_password);\n  if (!password.empty()) {\n    auto command = absl::StrCat(\"AUTH \", password, \"\\r\\n\");\n    RunCommandAndCheckResultIs(command, \"+OK\\r\\n\");\n  } else if (absl::GetFlag(FLAGS_greet)) {\n    // TCP Connect does not ensure that the connection was indeed accepted by the server.\n    // if server backlog is too short the connection will get stuck in the accept queue.\n    // Therefore, we send a ping command to ensure that every connection got connected.\n    RunCommandAndCheckResultIs(\"PING\\r\\n\", \"+PONG\\r\\n\");\n  }\n  ep_ = ep;\n  receive_fb_ = MakeFiber(fb2::Launch::dispatch, [this] { ReceiveFb(); });\n}\n\nvoid Driver::Run(uint64_t* cycle_ns, CommandGenerator* cmd_gen) {\n  start_ns_ = absl::GetCurrentTimeNanos();\n  uint32_t pipeline = std::max<uint32_t>(GetFlag(FLAGS_pipeline), 1u);\n  bool should_throttle = GetFlag(FLAGS_qps) > 0;\n\n  stats_.num_clients++;\n  int64_t time_limit_ns =\n      time_limit_ > 0 ? int64_t(time_limit_) * 1'000'000'000 + start_ns_ : INT64_MAX;\n  int64_t now = start_ns_;\n  SlotRange slot_range{0, kNumSlots - 1};\n  CHECK_GT(num_reqs_, 0u);\n\n  uint32_t num_batches = ((num_reqs_ - 1) / pipeline) + 1;\n\n  for (unsigned i = 0; i < num_batches && now < time_limit_ns && !terminate_requested; ++i) {\n    if (i == num_batches - 1) {  // last batch\n      pipeline = num_reqs_ - i * pipeline;\n    }\n\n    string out_buf;\n    for (unsigned j = 0; j < pipeline; ++j) {\n      // TODO: this skews the distribution if slot ranges are uneven.\n      // Ideally we would like to pick randomly a single slot from all the ranges we have\n      // and pass it to cmd_gen->Next below.\n      if (!shard_slots_.Empty()) {\n        slot_range = shard_slots_.NextSlotRange(ep_, i);\n      }\n\n      absl::StrAppend(&out_buf, cmd_gen->Next(slot_range));\n\n      Req req;\n      req.start = absl::GetCurrentTimeNanos();\n      req.might_hit = cmd_gen->might_hit();\n\n      reqs_.push(req);\n\n      if (out_buf.size() >= 8192) {\n        error_code ec = socket_->Write(io::Buffer(out_buf));\n        out_buf.clear();\n        if (ec && FiberSocketBase::IsConnClosed(ec)) {\n          // TODO: report failure\n          VLOG(1) << \"Connection closed\";\n          break;\n        }\n        CHECK(!ec) << ec.message();\n      }\n      if (cmd_gen->noreply()) {\n        PopRequest();\n      }\n    }\n\n    if (!out_buf.empty()) {\n      error_code ec = socket_->Write(io::Buffer(out_buf));\n      CHECK(!ec || FiberSocketBase::IsConnClosed(ec)) << ec.message();\n    }\n\n    now = absl::GetCurrentTimeNanos();\n    if (cycle_ns) {\n      int64_t target_ts = start_ns_ + i * (*cycle_ns);\n      int64_t sleep_ns = target_ts - now;\n      if (reqs_.size() > pipeline * 2 && should_throttle && sleep_ns <= 0) {\n        sleep_ns = 10'000;\n      }\n\n      if (sleep_ns > 0) {\n        VLOG(5) << \"Sleeping for \" << sleep_ns << \"ns\";\n        // There is no point in sending more requests if they are piled up in the server.\n        do {\n          ThisFiber::SleepFor(chrono::nanoseconds(sleep_ns));\n        } while (should_throttle && reqs_.size() > pipeline * 2);\n      } else if (i % 256 == 255) {\n        ThisFiber::Yield();\n        VLOG(5) << \"Behind QPS schedule\";\n      }\n    } else {\n      // Coordinated omission.\n\n      fb2::NoOpLock lk;\n      cnd_.wait(lk, [this] { return reqs_.empty(); });\n    }\n  }\n\n  int64_t finish = absl::GetCurrentTimeNanos();\n  VLOG(1) << \"Done queuing \" << num_reqs_ << \" requests, which took \"\n          << StrFormat(\"%.1fs\", double(finish - start_ns_) / 1000'000'000)\n          << \". Waiting for server processing\";\n\n  // TODO: to change to a condvar or something.\n  while (!reqs_.empty()) {\n    ThisFiber::SleepFor(1ms);\n  }\n  Shutdown();\n}\n\nvoid Driver::Shutdown() {\n  std::ignore = socket_->Shutdown(SHUT_RDWR);  // breaks the receive fiber.\n  receive_fb_.Join();\n  std::ignore = socket_->Close();\n  stats_.num_clients--;\n}\n\nstatic string_view FindLine(io::Bytes buf) {\n  if (buf.size() < 2)\n    return {};\n  for (unsigned i = 0; i < buf.size() - 1; ++i) {\n    if (buf[i] == '\\r' && buf[i + 1] == '\\n') {\n      return io::View(buf.subspan(0, i + 2));\n    }\n  }\n  return {};\n};\n\nvoid Driver::PopRequest() {\n  uint64_t now = absl::GetCurrentTimeNanos();\n  uint64_t usec = (now - reqs_.front().start) / 1000;\n  stats_.online_hist.Add(usec);\n  stats_.total_hist.Add(usec);\n  stats_.hit_opportunities += reqs_.front().might_hit;\n  ++received_;\n  reqs_.pop();\n  if (reqs_.empty()) {\n    cnd_.notify_one();\n  }\n  ++stats_.num_responses;\n}\n\nvoid Driver::ReceiveFb() {\n  uint64_t now = absl::GetCurrentTimeNanos();\n  while (true) {\n    io_buf_.EnsureCapacity(256);\n    auto buf = io_buf_.AppendBuffer();\n    VLOG(3) << \"Socket read: \" << reqs_.size();\n\n    ::io::Result<size_t> recv_sz = socket_->Recv(buf);\n    CHECK(recv_sz) << recv_sz.error().message();\n\n    if (*recv_sz == 0) {\n      LOG_IF(DFATAL, !reqs_.empty())\n          << \"Broke with \" << reqs_.size() << \" requests,  received: \" << received_;\n      // clear reqs - to prevent Driver::Run block on them indefinitely.\n      decltype(reqs_)().swap(reqs_);\n      break;\n    }\n\n    io_buf_.CommitWrite(*recv_sz);\n\n    if (protocol == RESP) {\n      ParseRESP();\n    } else {\n      // MC_TEXT\n      ParseMC();\n    }\n  }\n  double usec = (absl::GetCurrentTimeNanos() - now) / 1000;\n  if (usec > 0)\n    stats_.qps += uint64_t(double(received_) * 1e6 / usec);\n  VLOG(1) << \"ReceiveFb done\";\n}\n\nvoid Driver::ParseRESP() {\n  uint32_t consumed = 0;\n  RedisParser::Result result = RedisParser::OK;\n  RespVec parse_args;\n  constexpr string_view kMovedErrorKey = \"MOVED\"sv;\n  boost::system::error_code ec;\n\n  do {\n    result = parser_.Parse(io_buf_.InputBuffer(), &consumed, &parse_args);\n    if (result == RedisParser::OK && !parse_args.empty()) {\n      if (parse_args[0].type == RespExpr::ERROR) {\n        string_view error = parse_args[0].GetView();\n        VLOG(2) << \"Error \" << error;\n        if (absl::StartsWith(error, kMovedErrorKey)) {\n          error = error.substr(kMovedErrorKey.length());\n          vector<string_view> parts =\n              absl::StrSplit(absl::StripTrailingAsciiWhitespace(error), ' ', absl::SkipEmpty());\n\n          CHECK_EQ(parts.size(), 2u);\n          uint32_t slot_id;\n          CHECK(absl::SimpleAtoi(parts[0], &slot_id));\n\n          vector<string_view> addr_parts = absl::StrSplit(parts[1], ':');\n          CHECK_EQ(2u, addr_parts.size());\n\n          auto host = boost::asio::ip::make_address(addr_parts[0], ec);\n          CHECK(!ec) << \"make_address failed with error: \" << ec.message()\n                     << \" while parsing address \" << addr_parts[0];\n\n          uint32_t port;\n          CHECK(absl::SimpleAtoi(addr_parts[1], &port));\n          CHECK_LT(port, 65536u);\n\n          shard_slots_.MoveSlot(ep_, tcp::endpoint(host, port), slot_id);\n        }\n        ++stats_.num_errors;\n      } else if (reqs_.front().might_hit && parse_args[0].type != RespExpr::NIL) {\n        ++stats_.hit_count;\n      }\n      parse_args.clear();\n      PopRequest();\n    }\n    io_buf_.ConsumeInput(consumed);\n  } while (result == RedisParser::OK && io_buf_.InputLen() > 0);\n}\n\nvoid Driver::ParseMC() {\n  while (true) {\n    string_view line = FindLine(io_buf_.InputBuffer());\n    if (line.empty())\n      break;\n\n    CHECK_EQ(line.back(), '\\n');\n    if (line == \"STORED\\r\\n\" || line == \"END\\r\\n\") {\n      PopRequest();\n      blob_len_ = 0;\n    } else if (absl::StartsWith(line, \"VALUE\")) {\n      // last token is a blob length.\n      auto it = line.rbegin();\n      while (it != line.rend() && *it != ' ')\n        ++it;\n      size_t len = it - line.rbegin() - 2;\n      const char* start = &(*it) + 1;\n      if (!absl::SimpleAtoi(string(start, len), &blob_len_)) {\n        LOG(ERROR) << \"Invalid blob len \" << line;\n        return;\n      }\n      ++stats_.hit_count;\n    } else if (absl::StartsWith(line, \"SERVER_ERROR\")) {\n      ++stats_.num_errors;\n      PopRequest();\n      blob_len_ = 0;\n    } else {\n      auto handle = socket_->native_handle();\n      CHECK_EQ(blob_len_ + 2, line.size()) << line;\n      blob_len_ = 0;\n      VLOG(2) << \"Got line \" << handle << \": \" << line;\n    }\n    io_buf_.ConsumeInput(line.size());\n  }\n}\n\nvoid TLocalClient::Connect(const tcp::endpoint& ep, const vector<tcp::endpoint>& endpoints) {\n  VLOG(2) << \"Connecting client...\" << ep;\n\n  unsigned conn_per_shard = GetFlag(FLAGS_c);\n  if (shard_slots_->Empty()) {\n    drivers_.resize(conn_per_shard);\n  } else {\n    drivers_.resize(shard_slots_->Size() * conn_per_shard);\n  }\n\n  for (auto& driver : drivers_) {\n    driver.reset(new Driver{GetFlag(FLAGS_n), GetFlag(FLAGS_test_time), &stats, p_, shard_slots_});\n  }\n  vector<fb2::Fiber> fbs(drivers_.size());\n\n  for (size_t i = 0; i < fbs.size(); ++i) {\n    vector<SlotRange> slots;\n    tcp::endpoint shard_ep = ep;\n    if (!shard_slots_->Empty()) {\n      size_t shard = i / conn_per_shard;\n      shard_ep = endpoints[shard];\n    }\n    fbs[i] =\n        fb2::Fiber(StrCat(\"connect/\", i), [&, shard_ep, i] { drivers_[i]->Connect(i, shard_ep); });\n  }\n\n  for (auto& fb : fbs)\n    fb.Join();\n}\n\nvoid TLocalClient::Disconnect() {\n  for (size_t i = 0; i < drivers_.size(); ++i) {\n    drivers_[i]->Shutdown();\n  }\n}\n\nvoid TLocalClient::Start(uint32_t key_min, uint32_t key_max, uint64_t cycle_ns) {\n  key_gen_.emplace(key_min, key_max);\n  cmd_gen_.emplace(&key_gen_.value());\n\n  driver_fbs_.resize(drivers_.size());\n  if (!shard_slots_->Empty()) {\n    key_gen_->EnableClusterMode();\n  }\n  cur_cycle_ns_ = cycle_ns;\n  target_cycle_ = cycle_ns;\n  start_time_ = absl::GetCurrentTimeNanos();\n\n  for (size_t i = 0; i < driver_fbs_.size(); ++i) {\n    driver_fbs_[i] = fb2::Fiber(StrCat(\"run/\", i), [&, i] {\n      drivers_[i]->Run(cur_cycle_ns_ ? &cur_cycle_ns_ : nullptr, &cmd_gen_.value());\n    });\n  }\n}\n\nvoid TLocalClient::Join() {\n  for (auto& fb : driver_fbs_)\n    fb.Join();\n\n  VLOG(1) << \"Total hits: \" << stats.hit_count;\n}\n\nvoid TLocalClient::AdjustCycle() {\n  if (cur_cycle_ns_ == 0 || stats.num_responses == 0)\n    return;\n\n  // We adjust sleeping cycle per thread, and it's the same for all connection in this thread.\n  // We compute the aggregated cycle so far based on responses, and if it\n  // is greater than current we increase the current cycle. Otherwise,\n  // we try slowly reducing the cycle back to the nominal one.\n\n  int64_t running_time = absl::GetCurrentTimeNanos() - start_time_;\n  int64_t real_cycle = running_time * drivers_.size() / stats.num_responses;\n  if (real_cycle > cur_cycle_ns_ * 1.05) {\n    cur_cycle_ns_ = (cur_cycle_ns_ + real_cycle) / 2;\n    VLOG(1) << \"Increasing cycle to \" << cur_cycle_ns_;\n  } else if (cur_cycle_ns_ > target_cycle_) {\n    cur_cycle_ns_ -= (cur_cycle_ns_ - target_cycle_) * 0.2;\n  }\n}\n\nthread_local unique_ptr<TLocalClient> client;\n\nvoid WatchFiber(size_t num_shards, atomic_bool* finish_signal, ProactorPool* pp) {\n  fb2::Mutex mutex;\n\n  int64_t start_time = absl::GetCurrentTimeNanos();\n  LOG(INFO) << \"Started watching\";\n\n  int64_t last_print = start_time;\n  uint64_t num_last_resp_cnt = 0;\n  num_shards = max<size_t>(num_shards, 1u);\n  uint64_t resp_goal = GetFlag(FLAGS_c) * pp->size() * GetFlag(FLAGS_n) * num_shards;\n  uint32_t time_limit = GetFlag(FLAGS_test_time);\n  bool should_throttle = GetFlag(FLAGS_qps) > 0;\n\n  while (*finish_signal == false) {\n    // we sleep with resolution of 1s but print with lower frequency to be more responsive\n    // when benchmark finishes.\n    ThisFiber::SleepFor(1s);\n    if (should_throttle) {\n      pp->AwaitBrief([](auto, auto*) { client->AdjustCycle(); });\n    }\n\n    int64_t now = absl::GetCurrentTimeNanos();\n    if (now - last_print < 5000'000'000LL)  // 5s\n      continue;\n\n    ClientStats stats;\n    float done_max = 0;\n    float done_min = 1;\n    unsigned max_pending = 0;\n\n    pp->AwaitFiberOnAll([&](auto* p) {\n      auto [mind, maxd] = client->GetMinMaxDone();\n      unsigned max_pend = client->MaxPending();\n\n      unique_lock lk(mutex);\n      stats += client->stats;\n      done_max = max(done_max, maxd);\n      done_min = min(done_min, mind);\n      max_pending = max(max_pending, max_pend);\n      client->stats.online_hist.Clear();\n    });\n\n    uint64_t total_ms = (now - start_time) / 1'000'000;\n    uint64_t period_ms = (now - last_print) / 1'000'000;\n    uint64_t period_resp_cnt = stats.num_responses - num_last_resp_cnt;\n    double done_perc = time_limit > 0 ? double(total_ms) / (10 * time_limit)\n                                      : double(stats.num_responses) * 100 / resp_goal;\n    double hitrate = stats.hit_opportunities > 0\n                         ? 100 * double(stats.hit_count) / double(stats.hit_opportunities)\n                         : 0;\n    unsigned latency = stats.online_hist.Percentile(99);\n\n    CONSOLE_INFO << total_ms / 1000 << \"s: \" << StrFormat(\"%.1f\", done_perc)\n                 << \"% done, RPS(now/agg): \" << period_resp_cnt * 1000 / period_ms << \"/\"\n                 << stats.num_responses * 1000 / total_ms << \", errs: \" << stats.num_errors\n                 << \", hitrate: \" << StrFormat(\"%.1f%%\", hitrate)\n                 << \", clients: \" << stats.num_clients << \"\\n\"\n                 << \"done_min: \" << StrFormat(\"%.2f%%\", done_min * 100)\n                 << \", done_max: \" << StrFormat(\"%.2f%%\", done_max * 100)\n                 << \", p99_lat(us): \" << latency << \", max_pending: \" << max_pending;\n\n    last_print = now;\n    num_last_resp_cnt = stats.num_responses;\n  }\n}\n\nClusterShards FetchClusterInfo(const tcp::endpoint& ep, ProactorBase* proactor) {\n  unique_ptr<FiberSocketBase> socket(proactor->CreateSocket());\n  error_code ec = socket->Connect(ep);\n  CHECK(!ec) << \"Could not connect to \" << ep << \" \" << ec;\n\n  if (const auto password = GetFlag(FLAGS_password); !password.empty()) {\n    RunCommandAndCheckResultIs(StrFormat(\"AUTH %s\\r\\n\", password), \"+OK\\r\\n\", socket.get());\n  }\n\n  ec = socket->Write(io::Buffer(\"cluster nodes\\r\\n\"));\n  CHECK(!ec);\n  facade::RedisParser parser{RedisParser::CLIENT, 1024};\n  uint8_t buf[1024];\n  RespVec resp_vec;\n  while (true) {\n    io::Result<size_t> res = socket->Recv(buf);\n    CHECK(res) << res.error().message();\n    RespExpr::Buffer bytes(buf, *res);\n    uint32_t consumed = 0;\n    facade::RedisParser::Result result = parser.Parse(bytes, &consumed, &resp_vec);\n    if (result == facade::RedisParser::OK) {\n      break;\n    }\n    CHECK_EQ(result, facade::RedisParser::INPUT_PENDING);\n  }\n  CHECK_EQ(1u, resp_vec.size());\n  std::ignore = socket->Close();\n  if (resp_vec.front().type == RespExpr::ERROR) {\n    LOG(INFO) << \"Cluster command failed \" << resp_vec.front().GetString();\n    return {};\n  }\n  string cluster_spec = resp_vec.front().GetString();\n  LOG(INFO) << \"Cluster spec: \" << cluster_spec;\n  vector<string_view> lines = absl::StrSplit(cluster_spec, '\\n', absl::SkipEmpty());\n  ClusterShards res;\n  for (string_view line : lines) {\n    vector<string_view> parts = absl::StrSplit(line, ' ');\n    // <id> <ip:port@cport[,hostname]> <flags> <master> <ping-sent> <pong-recv>\n    // <config-epoch> <link-state> <slot> <slot> ... <slot>\n    if (parts.size() < 9) {\n      LOG(WARNING) << \"Skipping line: \" << line;\n      continue;\n    }\n    ShardInfo shard;\n    vector<string_view> addr_parts = absl::StrSplit(parts[1], ':');\n    CHECK_EQ(2u, addr_parts.size());\n    string host(addr_parts[0]);\n    char ip_addr[INET6_ADDRSTRLEN];\n    std::error_code ec = fb2::DnsResolve(host, ip_addr);\n    CHECK(!ec) << \"Could not resolve \" << host << \" \" << ec;\n    auto address = ::boost::asio::ip::make_address(ip_addr);\n\n    uint32_t val;\n    vector<string_view> port_parts = absl::StrSplit(addr_parts[1], '@');\n    CHECK_EQ(2u, port_parts.size());\n    CHECK(absl::SimpleAtoi(port_parts[0], &val));\n    CHECK_LT(val, 65536u);\n\n    shard.endpoint = tcp::endpoint(address, val);\n\n    string_view flags = parts[2];\n    absl::flat_hash_set<string_view> flags_set(absl::StrSplit(flags, ','));\n    if (!flags_set.contains(\"master\")) {\n      LOG(INFO) << \"Skipping non-master node \" << shard.endpoint << \" \" << flags;\n      continue;\n    }\n\n    for (size_t i = 8; i < parts.size(); ++i) {\n      vector<string_view> slots = absl::StrSplit(parts[i], '-');\n      if (!absl::SimpleAtoi(slots[0], &val) || val >= kNumSlots) {\n        LOG(ERROR) << \"Invalid slot definition \" << parts[i];\n        continue;\n      }\n      SlotRange slot_range{uint16_t(val), uint16_t(val)};\n      if (slots.size() > 1) {\n        CHECK(absl::SimpleAtoi(slots[1], &val));\n        slot_range.second = val;\n      }\n      shard.slots.push_back(slot_range);\n    }\n    res.push_back(shard);\n  }\n\n  return res;\n}\n\nint main(int argc, char* argv[]) {\n  MainInitGuard guard(&argc, &argv);\n\n  unique_ptr<ProactorPool> pp;\n#ifdef __linux__\n  pp.reset(fb2::Pool::IOUring(256));\n#else\n  pp.reset(fb2::Pool::Epoll());\n#endif\n  pp->Run();\n  fb2::InitDnsResolver(2000);\n\n  ProactorBase::RegisterSignal({SIGTERM}, pp->GetNextProactor(), [](int) {\n    CONSOLE_INFO << \"terminate requested\";\n    terminate_requested = true;\n  });\n\n  string proto_str = GetFlag(FLAGS_P);\n  if (proto_str == \"memcache_text\") {\n    protocol = MC_TEXT;\n  } else {\n    CHECK(proto_str.empty());\n    protocol = RESP;\n  }\n\n  string dist = GetFlag(FLAGS_key_dist);\n\n  if (dist == \"U\") {\n    dist_type = UNIFORM;\n  } else if (dist == \"N\") {\n    dist_type = NORMAL;\n  } else if (dist == \"Z\") {\n    dist_type = ZIPFIAN;\n  } else if (dist == \"S\") {\n    dist_type = SEQUENTIAL;\n  } else {\n    LOG(FATAL) << \"Unknown distribution type: \" << dist;\n  }\n\n  auto* proactor = pp->GetNextProactor();\n  char ip_addr[128];\n\n  error_code ec =\n      proactor->Await([&] { return fb2::DnsResolve(GetFlag(FLAGS_h), 2000, ip_addr, proactor); });\n  CHECK(!ec) << \"Could not resolve \" << GetFlag(FLAGS_h) << \" \" << ec;\n\n  auto address = ::boost::asio::ip::make_address(ip_addr);\n  tcp::endpoint ep{address, GetFlag(FLAGS_p)};\n\n  ClusterShards shards;\n  if (protocol == RESP && GetFlag(FLAGS_probe_cluster)) {\n    shards = proactor->Await([&] { return FetchClusterInfo(ep, proactor); });\n  }\n  CONSOLE_INFO << \"Connecting to \"\n               << (shards.empty() ? string(\"single node \")\n                                  : absl::StrCat(shards.size(), \" shard cluster\"));\n\n  if (!shards.empty() && !GetFlag(FLAGS_command).empty() && GetFlag(FLAGS_cluster_skip_tags)) {\n    // For custom commands we may need to use the same hashtag for multiple keys.\n    LOG(WARNING) << \"Enforcing hash tags for custom commands\";\n    absl::SetFlag(&FLAGS_cluster_skip_tags, false);\n  }\n\n  ShardSlots shard_slots;\n  shard_slots.SetClusterSlotRanges(shards);\n  std::vector<tcp::endpoint> shard_endpoints = shard_slots.Endpoints();\n  pp->AwaitBrief([&](unsigned index, auto* p) {\n    base::SplitMix64 seed_mix(GetFlag(FLAGS_seed) + index * 0x6a45554a264d72bULL);\n    auto seed = seed_mix();\n    VLOG(1) << \"Seeding bitgen with seed \" << seed;\n    bit_gen.seed(seed);\n  });\n\n  pp->AwaitFiberOnAll([&](unsigned index, auto* p) {\n    client = make_unique<TLocalClient>(p, &shard_slots);\n    client->Connect(ep, shard_endpoints);\n  });\n\n  absl::Duration duration;\n  if (absl::GetFlag(FLAGS_connect_only)) {\n    pp->AwaitFiberOnAll([&](unsigned index, auto* p) { client->Disconnect(); });\n  } else {\n    const uint32_t key_minimum = GetFlag(FLAGS_key_minimum);\n    const uint32_t key_maximum = GetFlag(FLAGS_key_maximum);\n    CHECK_LE(key_minimum, key_maximum);\n\n    uint32_t thread_key_step = 0;\n    uint32_t desired_qps = abs(GetFlag(FLAGS_qps));\n    bool throttle = GetFlag(FLAGS_qps) > 0;\n    const int64_t interval = desired_qps ? 1'000'000'000LL / desired_qps : 0;\n    uint64_t num_reqs = GetFlag(FLAGS_n);\n\n    uint64_t total_conn_num = GetFlag(FLAGS_c) * pp->size();\n    uint64_t total_requests = num_reqs * total_conn_num;\n    uint32_t time_limit = GetFlag(FLAGS_test_time);\n\n    if (dist_type == SEQUENTIAL) {\n      thread_key_step = std::max(1UL, (key_maximum - key_minimum + 1) / pp->size());\n      if (total_requests > (key_maximum - key_minimum)) {\n        CONSOLE_INFO << \"Warning: only \" << key_maximum - key_minimum\n                     << \" unique entries will be accessed with \" << total_requests\n                     << \" total requests\";\n      }\n    }\n\n    if (!time_limit) {\n      CONSOLE_INFO << \"Running \" << pp->size() << \" threads, sending \" << num_reqs\n                   << \" requests per each connection, or \" << total_requests << \" requests overall \"\n                   << (throttle ? \"with\" : \"without\") << \" throttling\";\n    }\n    if (interval) {\n      CONSOLE_INFO << \"At a rate of \" << desired_qps << \" rps per connection, i.e. request every \"\n                   << interval / 1000 << \"us\";\n      CONSOLE_INFO << \"Overall scheduled RPS: \" << desired_qps * total_conn_num;\n    } else {\n      CONSOLE_INFO << \"Coordinated omission mode - the rate is determined by the server\";\n    }\n\n    atomic_bool finish{false};\n    pp->AwaitBrief([&](unsigned index, auto* p) {\n      uint32_t key_max = (thread_key_step > 0 && index + 1 < pp->size())\n                             ? key_minimum + (index + 1) * thread_key_step - 1\n                             : key_maximum;\n      client->Start(key_minimum + index * thread_key_step, key_max, interval);\n    });\n\n    auto watch_fb =\n        pp->GetNextProactor()->LaunchFiber([&] { WatchFiber(shards.size(), &finish, pp.get()); });\n    const absl::Time start_time = absl::Now();\n\n    // The actual run.\n    pp->AwaitFiberOnAll([&](unsigned index, auto* p) { client->Join(); });\n\n    duration = absl::Now() - start_time;\n    finish.store(true);\n    watch_fb.Join();\n  }\n\n  fb2::Mutex mutex;\n\n  LOG(INFO) << \"Resetting all threads\";\n\n  ClientStats summary;\n  pp->AwaitFiberOnAll([&](auto* p) {\n    unique_lock lk(mutex);\n    summary += client->stats;\n    lk.unlock();\n    client.reset();\n  });\n\n  CONSOLE_INFO << \"\\nTotal time: \" << duration\n               << \". Overall number of requests: \" << summary.num_responses\n               << \", QPS: \" << summary.qps << \", P99 lat: \" << summary.total_hist.Percentile(99)\n               << \"us\";\n\n  if (summary.num_errors) {\n    CONSOLE_INFO << \"Got \" << summary.num_errors << \" error responses!\";\n  }\n\n  CONSOLE_INFO << \"Latency summary, all times are in usec:\\n\" << summary.total_hist.ToString();\n  if (summary.hit_opportunities) {\n    CONSOLE_INFO << \"----------------------------------\\nHit rate: \"\n                 << 100 * double(summary.hit_count) / double(summary.hit_opportunities) << \"%\\n\";\n  }\n  pp->Stop();\n\n  return 0;\n}\n"
  },
  {
    "path": "src/server/dfly_main.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/flags/parse.h>\n#include <absl/flags/usage.h>\n#include <absl/flags/usage_config.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_split.h>\n#include <absl/strings/strip.h>\n\n#include \"absl/cleanup/cleanup.h\"\n#include \"absl/container/inlined_vector.h\"\n#include \"absl/strings/numbers.h\"\n\n#ifdef DFLY_ENABLE_MEMORY_TRACKING\n#define INJECT_ALLOCATION_TRACKER\n#include \"core/allocation_tracker.h\"\n#else\n#include <mimalloc-new-delete.h>\n#endif\n\n#ifdef __linux__\n#include \"util/fibers/uring_proactor.h\"\n#endif\n\n#include <mimalloc.h>\n#include <signal.h>\n\n#include <iostream>\n#include <memory>\n\n#ifdef USE_AFL\n#include <arpa/inet.h>\n#include <fcntl.h>\n#include <netinet/in.h>\n#include <sys/socket.h>\n#include <unistd.h>\n\n#include <thread>\n#endif\n\n#include \"base/init.h\"\n#include \"base/proc_util.h\"  // for GetKernelVersion\n#include \"facade/dragonfly_listener.h\"\n#include \"io/file.h\"\n#include \"io/file_util.h\"\n#include \"io/proc_reader.h\"\n#include \"server/common.h\"\n#include \"server/generic_family.h\"\n#include \"server/main_service.h\"\n#include \"server/server_family.h\"\n#include \"server/version.h\"\n#include \"server/version_monitor.h\"\n#include \"strings/human_readable.h\"\n#include \"util/accept_server.h\"\n#include \"util/fibers/pool.h\"\n#include \"util/varz.h\"\n\n#ifdef __APPLE__\n#include <crt_externs.h>\n#define environ (*_NSGetEnviron())\n#else\nextern char** environ;\n#endif\n\nusing namespace std;\n\nABSL_DECLARE_FLAG(int32_t, port);\nABSL_DECLARE_FLAG(uint32_t, memcached_port);\nABSL_DECLARE_FLAG(uint16_t, admin_port);\nABSL_DECLARE_FLAG(std::string, admin_bind);\nABSL_DECLARE_FLAG(strings::MemoryBytesFlag, maxmemory);\n\nABSL_FLAG(string, bind, \"\",\n          \"Bind address. If empty - binds on all interfaces. \"\n          \"It's not advised due to security implications.\");\nABSL_FLAG(string, pidfile, \"\", \"If not empty - server writes its pid into the file\");\nABSL_FLAG(string, unixsocket, \"\",\n          \"If not empty - specifies path for the Unix socket that will \"\n          \"be used for listening for incoming connections.\");\nABSL_FLAG(string, unixsocketperm, \"\", \"Set permissions for unixsocket, in octal value.\");\nABSL_FLAG(bool, force_epoll, false,\n          \"If true - uses linux epoll engine underneath. \"\n          \"Can fit for kernels older than 5.10.\");\nABSL_FLAG(\n    string, allocation_tracker, \"\",\n    \"Logs stack trace of memory allocation within these ranges. Format is min:max,min:max,....\");\n\nABSL_FLAG(bool, version_check, true,\n          \"If true, Will monitor for new releases on Dragonfly servers once a day.\");\n\nABSL_FLAG(uint16_t, tcp_backlog, 256, \"TCP listen(2) backlog parameter.\");\nABSL_FLAG(uint16_t, uring_recv_buffer_cnt, 0,\n          \"How many buffer ring entries to allocate per thread for io_uring receive operations. \"\n          \"Relevant only for modern kernels with io_uring enabled\");\n\nABSL_FLAG(bool, omit_basic_usage, false, \"Omit printing basic usage info.\");\n\n#ifdef USE_AFL\nABSL_FLAG(uint32_t, afl_loop_limit, UINT_MAX,\n          \"AFL++ persistent mode loop limit. Specifies how many fuzzing iterations \"\n          \"to run before restarting the process. Higher values improve performance but \"\n          \"may accumulate state.\");\nABSL_FLAG(uint16_t, afl_target_port, 0,\n          \"Port to send fuzz input to. Defaults to --port (RESP). \"\n          \"Set to --memcached_port to fuzz the memcache protocol.\");\n#endif\n\nusing namespace util;\nusing namespace facade;\nusing namespace io;\nusing absl::GetFlag;\nusing absl::StrCat;\nusing strings::HumanReadableNumBytes;\n\nnamespace dfly {\n\nnamespace {\n\n#if ABSL_HAVE_ADDRESS_SANITIZER\n// Increase stack size for all debug builds; tools like ASAN can require more than 50 KB.\nconstexpr size_t kAsanFactor = 2;\n#else\nconstexpr size_t kAsanFactor = 1;\n#endif\n\n#ifdef NDEBUG\nconstexpr size_t kFiberStackBase = 32_KB;\n#else\nconstexpr size_t kFiberStackBase = 48_KB;\n#endif\n\n// Default stack size for fibers. We decrease it by 16 bytes because some allocators\n// need additional 8-16 bytes for their internal structures, thus over reserving additional\n// memory pages if using round sizes.\nconstexpr size_t kFiberDefaultStackSize = kFiberStackBase * kAsanFactor - 16;\n\nenum class TermColor : uint8_t { kDefault, kRed, kGreen, kYellow };\n\n// Returns the ANSI color code for the given color. TermColor::kDefault is\n// an invalid input.\nconst char* GetAnsiColorCode(TermColor color) {\n  switch (color) {\n    case TermColor::kRed:\n      return \"1\";\n    case TermColor::kGreen:\n      return \"2\";\n    case TermColor::kYellow:\n      return \"3\";\n    default:\n      return nullptr;\n  }\n}\n\nstring ColorStart(TermColor color) {\n  return StrCat(\"\\033[0;3\", GetAnsiColorCode(color), \"m\");\n}\n\n// Resets the terminal to default.\nconst char kColorEnd[] = \"\\033[m\";\n\nstring ColoredStr(TermColor color, string_view str) {\n  return StrCat(ColorStart(color), str, kColorEnd);\n}\n\nbool HelpshortFlags(std::string_view f) {\n  return absl::StartsWith(f, \"\\033[0;32\");\n}\n\nbool HelpFlags(std::string_view f) {\n  return absl::StartsWith(f, \"\\033[0;3\");\n}\n\n#define STRING_PP_NX(A) #A\n#define STRING_MAKE_PP(A) STRING_PP_NX(A)\n\n// This would create a string value from a \"defined\" location of the source code\n// Note that SOURCE_PATH_FROM_BUILD_ENV is taken from the build system\n#define BUILD_LOCATION_PATH STRING_MAKE_PP(SOURCE_PATH_FROM_BUILD_ENV)\n\nstring NormalizePaths(std::string_view path) {\n  const std::string FULL_PATH = BUILD_LOCATION_PATH;\n  const std::string FULL_PATH_SRC = FULL_PATH + \"/src\";\n  const std::string FULL_PATH_HELIO = FULL_PATH + \"/helio\";\n\n  if (absl::ConsumePrefix(&path, \"../src/\") || absl::ConsumePrefix(&path, FULL_PATH_SRC))\n    return ColoredStr(TermColor::kGreen, path);\n\n  if (absl::ConsumePrefix(&path, \"../\") || absl::ConsumePrefix(&path, FULL_PATH_HELIO))\n    return ColoredStr(TermColor::kYellow, path);\n\n  if (absl::ConsumePrefix(&path, \"_deps/\"))\n    return string(path);\n\n  return string(path);\n}\n\ntemplate <typename... Args> unique_ptr<Listener> MakeListener(Args&&... args) {\n  auto res = make_unique<Listener>(std::forward<Args>(args)...);\n  res->SetConnFiberStackSize(kFiberDefaultStackSize);\n  return res;\n}\n\nvoid RunEngine(ProactorPool* pool, AcceptServer* acceptor) {\n  uint64_t maxmemory = absl::GetFlag(FLAGS_maxmemory);\n  if (maxmemory > 0 && maxmemory < pool->size() * 256_MB) {\n    LOG(ERROR) << \"There are \" << pool->size() << \" threads, so \"\n               << HumanReadableNumBytes(pool->size() * 256_MB) << \" are required. Exiting...\";\n    exit(1);\n  }\n\n  Service service(pool);\n\n  auto tcp_disabled = GetFlag(FLAGS_port) == 0u;\n  Listener* main_listener = nullptr;\n\n  std::vector<facade::Listener*> listeners;\n\n  // If we ever add a new listener, plz don't change this,\n  // we depend on tcp listener to be at the front since we later\n  // need to pass it to the AclFamily::Init\n  if (!tcp_disabled) {\n    auto listener = MakeListener(Protocol::REDIS, &service, Listener::Role::MAIN);\n    main_listener = listener.get();\n    listeners.push_back(listener.release());\n  }\n\n  const auto& bind = GetFlag(FLAGS_bind);\n\n  // Protected mode: if no bind address is specified and no password is set,\n  // bind only to localhost to prevent unauthorized remote access.\n  // Only enabled when running under systemd (INVOCATION_ID is set) to avoid\n  // breaking containerized deployments where binding to localhost would make\n  // the service unreachable from the host.\n  // GetPassword() checks both --requirepass flag and DFLY_PASSWORD env var.\n  bool running_under_systemd = getenv(\"INVOCATION_ID\") != nullptr;\n  bool protected_mode = running_under_systemd && bind.empty() && GetPassword().empty();\n  const char* bind_addr = nullptr;\n  if (protected_mode) {\n    bind_addr = \"127.0.0.1\";\n    LOG(WARNING) << \"Protected mode enabled. Binding to localhost only because no password is set. \"\n                 << \"To accept remote connections, set a password with --requirepass or \"\n                 << \"specify a bind address with --bind.\";\n  } else if (!bind.empty()) {\n    bind_addr = bind.c_str();\n  }\n\n  int32_t port = GetFlag(FLAGS_port);\n  // The reason for this code is a bit silly. We want to provide a way to\n  // bind any 'random' available port. The way to do that is to call\n  // bind with the argument port 0. However we can't expose this functionality\n  // as is to our users: Since giving --port=0 to redis DISABLES the network\n  // interface that would break users' existing configurations in potentionally\n  // unsafe ways. For that reason the user's --port=-1 means to us 'bind port 0'.\n  if (port == -1) {\n    port = 0;\n  } else if (port < 0 || port > 65535) {\n    LOG(ERROR) << \"Bad port number \" << port;\n    exit(1);\n  }\n\n  auto mc_port = GetFlag(FLAGS_memcached_port);\n  string unix_sock = GetFlag(FLAGS_unixsocket);\n  bool unlink_uds = false;\n  absl::Cleanup maybe_unlink_uds([&unlink_uds, &unix_sock]() {\n    if (unlink_uds) {\n      unlink(unix_sock.c_str());\n    }\n  });\n\n  if (!unix_sock.empty()) {\n    string perm_str = GetFlag(FLAGS_unixsocketperm);\n    uint32_t unix_socket_perm;\n    if (perm_str.empty()) {\n      // get umask of running process, indicates the permission bits that are turned off\n      mode_t umask_val = umask(0);\n      umask(umask_val);\n      unix_socket_perm = 0777 & ~umask_val;\n    } else {\n      if (!absl::numbers_internal::safe_strtoi_base(perm_str, &unix_socket_perm, 8) ||\n          unix_socket_perm > 0777) {\n        LOG(ERROR) << \"Invalid unixsocketperm: \" << perm_str;\n        exit(1);\n      }\n    }\n    unlink(unix_sock.c_str());\n\n    auto uds_listener = MakeListener(Protocol::REDIS, &service);\n    error_code ec =\n        acceptor->AddUDSListener(unix_sock.c_str(), unix_socket_perm, uds_listener.get());\n    if (ec) {\n      if (tcp_disabled) {\n        LOG(ERROR) << \"Could not open unix socket \" << unix_sock\n                   << \", and TCP listening is disabled (error: \" << ec << \"). Exiting.\";\n        exit(1);\n      } else {\n        LOG(WARNING) << \"Could not open unix socket \" << unix_sock << \", error \" << ec;\n      }\n    } else {\n      LOG(INFO) << \"Listening on unix socket \" << unix_sock;\n      listeners.push_back(uds_listener.release());\n      unlink_uds = true;\n    }\n  } else if (tcp_disabled) {\n    LOG(ERROR)\n        << \"Did not receive a unix socket to listen to, yet TCP listening is disabled. Exiting.\";\n    exit(1);\n  }\n\n  std::uint16_t admin_port = GetFlag(FLAGS_admin_port);\n  if (admin_port != 0) {\n    const std::string& admin_bind = GetFlag(FLAGS_admin_bind);\n    // Note passing the result of c_str() for empty string in optimized mode don't work, we must\n    // explicitly set this to null in this case\n    const char* interface_addr = admin_bind.empty() ? nullptr : admin_bind.c_str();\n    const std::string printable_addr =\n        absl::StrCat(\"admin socket \", interface_addr ? interface_addr : \"any\", \":\", admin_port);\n    auto admin_listener = MakeListener(Protocol::REDIS, &service, Listener::Role::PRIVILEGED);\n\n    error_code ec = acceptor->AddListener(interface_addr, admin_port, admin_listener.get());\n\n    if (ec) {\n      LOG(ERROR) << \"Failed to open \" << printable_addr << \", error: \" << ec.message();\n    } else {\n      LOG(INFO) << \"Listening on \" << printable_addr;\n      listeners.push_back(admin_listener.release());\n    }\n  }\n\n  if (main_listener) {\n    error_code ec = acceptor->AddListener(bind_addr, port, main_listener);\n\n    if (ec) {\n      LOG(ERROR) << \"Could not open port \" << port << \", error: \" << ec.message();\n      exit(1);\n    }\n\n    if (port == 0) {\n      absl::SetFlag(&FLAGS_port, main_listener->socket()->LocalEndpoint().port());\n    }\n  }\n\n  if (mc_port > 0 && !tcp_disabled) {\n    auto listener = MakeListener(Protocol::MEMCACHE, &service);\n    error_code ec = acceptor->AddListener(bind_addr, mc_port, listener.get());\n    if (ec) {\n      LOG(ERROR) << \"Could not open memcached port \" << mc_port << \", error: \" << ec.message();\n      exit(1);\n    }\n    listeners.push_back(listener.release());\n  }\n\n  service.Init(acceptor, listeners);\n\n  VersionMonitor version_monitor;\n\n  // check if it's a production release tag.\n  if (GetFlag(FLAGS_version_check) && kGitTag[0] == 'v' && strchr(kGitTag, '-') == nullptr) {\n    version_monitor.Run(pool);\n  }\n\n  // Start the acceptor loop and wait for the server to shutdown.\n  acceptor->Run();\n  google::FlushLogFiles(google::INFO);  // Flush the header.\n\n  acceptor->Wait();\n\n  version_monitor.Shutdown();\n  service.Shutdown();\n}\n\nbool CreatePidFile(const string& path) {\n  Result<WriteFile*> res = OpenWrite(path);\n  if (!res) {\n    LOG(ERROR) << \"Failed to open pidfile with error: \" << res.error().message() << \". Exiting...\";\n    return false;\n  }\n\n  unique_ptr<WriteFile> wf(res.value());\n  auto ec = wf->Write(to_string(getpid()));\n  if (ec) {\n    LOG(ERROR) << \"Failed to write pid into pidfile with error: \" << ec.message() << \". Exiting...\";\n    return false;\n  }\n\n  ec = wf->Close();\n  if (ec) {\n    LOG(WARNING) << \"Failed to close pidfile file descriptor with error: \" << ec.message() << \".\";\n  }\n\n  return true;\n}\n\n#ifdef __linux__\nbool ShouldUseEpollAPI(const base::sys::KernelVersion& kver) {\n  if (GetFlag(FLAGS_force_epoll))\n    return true;\n\n  if (kver.kernel < 5 || (kver.kernel == 5 && kver.major < 10)) {\n    LOG(WARNING) << \"Kernel is older than 5.10, switching to epoll engine.\";\n    return true;\n  }\n\n  struct io_uring ring;\n  io_uring_params params;\n  memset(&params, 0, sizeof(params));\n\n  int iouring_res = io_uring_queue_init_params(1024, &ring, &params);\n\n  if (iouring_res == 0) {\n    io_uring_queue_exit(&ring);\n    return false;\n  }\n\n  iouring_res = -iouring_res;\n\n  if (iouring_res == ENOSYS) {\n    LOG(WARNING) << \"iouring API is not supported. switching to epoll.\";\n  } else if (iouring_res == ENOMEM) {\n    LOG(WARNING) << \"io_uring does not have enough memory. That can happen when your \"\n                    \"max locked memory is too limited. If you run via docker, \"\n                    \"try adding '--ulimit memlock=-1' to \\\"docker run\\\" command.\"\n                    \"Meanwhile, switching to epoll\";\n  } else {\n    LOG(WARNING) << \"Weird error \" << iouring_res << \" switching to epoll\";\n  }\n\n  return true;\n}\n\nvoid GetCGroupPath(string* memory_path, string* cpu_path) {\n  CHECK(memory_path != nullptr) << \"memory_path is null! (this shouldn't happen!)\";\n  CHECK(cpu_path != nullptr) << \"cpu_path is null! (this shouldn't happen!)\";\n\n  // Begin by reading /proc/self/cgroup\n\n  auto cg = io::ReadFileToString(\"/proc/self/cgroup\");\n  CHECK(cg.has_value()) << \"Failed to read /proc/self/cgroup\";\n\n  string cgv = std::move(cg).value();\n\n  // Next, depending on cgroup version we either read:\n  // N:<cgroup name>:<path> -- in case of v1, in many lines\n  // 0::<cgroup name> -- in case of v2, in a single line\n\n  auto stripped = absl::StripAsciiWhitespace(cgv);\n\n  vector<string_view> groups = absl::StrSplit(stripped, '\\n');\n\n  if (groups.size() == 1) {\n    // for v2 we only read 0::<name>\n    size_t pos = cgv.rfind(':');\n    if (pos == string::npos) {\n      LOG(ERROR) << \"Failed to parse cgroupv2 format, got: \" << cgv;\n      exit(1);\n    }\n\n    auto cgroup = string_view(cgv.c_str() + pos + 1);\n    string_view cgroup_stripped = absl::StripTrailingAsciiWhitespace(cgroup);\n\n    *memory_path = absl::StrCat(\"/sys/fs/cgroup/\", cgroup_stripped);\n    *cpu_path = *memory_path;  // in v2 the path to the cgroup is singular\n  } else {\n    for (const auto& sv : groups) {\n      // in v1 the format is\n      // N:s1:2 where N is an integer, s1, s2 strings with s1 maybe empty.\n      vector<string_view> entry = absl::StrSplit(sv, ':');\n      if (entry.size() != 3u) {\n        LOG(ERROR) << \"Unsupported group \" << sv;\n        continue;\n      }\n\n      // in v1 there are several 'canonical' cgroups\n      // we are interested in the 'memory' and the 'cpu,cpuacct' ones\n      // which specify memory and cpu limits, respectively.\n      if (entry[1] == \"memory\")\n        *memory_path = absl::StrCat(\"/sys/fs/cgroup/memory/\", entry[2]);\n\n      if (entry[1] == \"cpu,cpuacct\")\n        *cpu_path = absl::StrCat(\"/sys/fs/cgroup/cpu,cpuacct/\", entry[2]);\n    }\n  }\n}\n\n// returns true on success.\nbool UpdateResourceLimitsIfInsideContainer(io::MemInfoData* mdata, size_t* max_threads) {\n  using absl::StrCat;\n\n  // did we succeed in reading *something*? if not, exit.\n  // note that all processes in Linux are in some cgroup, so at the very\n  // least we should read something.\n  bool read_something = false;\n\n  auto read_mem = [&read_something](string_view path, size_t* output) {\n    auto file = io::ReadFileToString(path);\n    DVLOG(1) << \"container limits: read \" << path << \": \" << file.value_or(\"N/A\");\n\n    size_t temp = numeric_limits<size_t>::max();\n\n    if (file.has_value()) {\n      if (!absl::StartsWith(*file, \"max\"))\n        CHECK(absl::SimpleAtoi(*file, &temp))\n            << \"Failed in parsing cgroup limits, path: \" << path << \" (read: \" << *file << \")\";\n      read_something = true;\n    }\n\n    *output = min(*output, temp);\n  };\n\n  string mem_path, cpu_path;\n  GetCGroupPath(&mem_path, &cpu_path);\n\n  if (mem_path.empty() || cpu_path.empty()) {\n    return true;  // not a container\n  }\n\n  VLOG(1) << \"mem_path = \" << mem_path;\n  VLOG(1) << \"cpu_path = \" << cpu_path;\n\n  /* Update memory limits */\n\n  // Start by reading global memory limits\n  auto parse_limits = [&](std::string_view base_mem) {\n    read_mem(StrCat(base_mem, \"/memory.limit_in_bytes\"), &mdata->mem_total);\n    read_mem(StrCat(base_mem, \"/memory.max\"), &mdata->mem_total);\n  };\n\n  // For v1\n  constexpr auto base_mem_v1 = \"/sys/fs/cgroup/memory\"sv;\n  parse_limits(base_mem_v1);\n  // For v2 if the previous failed\n  constexpr auto base_mem_v2 = \"/sys/fs/cgroup\"sv;\n  parse_limits(base_mem_v2);\n  // For v2 under /user.slice\n  constexpr auto base_mem_v2_slice = \"/sys/fs/cgroup/user.slice\"sv;\n  parse_limits(base_mem_v2_slice);\n\n  // Read cgroup-specific limits\n  read_mem(StrCat(mem_path, \"/memory.limit_in_bytes\"), &mdata->mem_total);\n  read_mem(StrCat(mem_path, \"/memory.max\"), &mdata->mem_total);\n  read_mem(StrCat(mem_path, \"/memory.high\"), &mdata->mem_avail);\n  mdata->mem_avail = min(mdata->mem_avail, mdata->mem_total);\n\n  /* Update thread limits */\n\n  auto read_cpu = [&read_something](string_view path, size_t* output) {\n    double count{0}, timeshare{1};\n\n    /**\n     * Summarized: the function does one of the following:\n     *\n     * 1. read path/cpu.max -- for v2. The format of this file is:\n     *  $COUNT $PERIOD\n     * which indicates that we can use upto $COUNT shares in a $PERIOD of time.\n     * If $COUNT is max, then we can use as much CPU as the system has. Otherwise,\n     * this translates to $COUNT/$PERIOD threads.\n     *\n     * 2. read path/cpu.cfs_quota_us & path/cpu.cfs_period_us -- same idea, but for v1.\n     */\n\n    if (auto cpu = ReadFileToString(StrCat(path, \"/cpu.max\")); cpu.has_value()) {\n      vector<string_view> res = absl::StrSplit(*cpu, ' ');\n\n      // Some linux distributions do not have anything there.\n      if (res.size() == 2u) {\n        if (res[0] == \"max\")\n          *output = 0u;\n        else {\n          CHECK(absl::SimpleAtod(res[0], &count))\n              << \"Failed in parsing cgroupv2 cpu count, path = \" << path << \" (read: \" << *cpu\n              << \")\";\n          CHECK(absl::SimpleAtod(res[1], &timeshare))\n              << \"Failed in parsing cgroupv2 cpu timeshare, path = \" << path << \" (read: \" << *cpu\n              << \")\";\n\n          *output = static_cast<size_t>(ceil(count / timeshare));\n        }\n\n        read_something = true;\n      }\n    } else if (auto quota = ReadFileToString(StrCat(path, \"/cpu.cfs_quota_us\"));\n               quota.has_value()) {\n      auto period = ReadFileToString(StrCat(path, \"/cpu.cfs_period_us\"));\n\n      CHECK(period.has_value()) << \"Failed to read cgroup cpu.cfs_period_us, but read \"\n                                   \"cpu.cfs_quota_us (this shouldn't happen!)\";\n\n      CHECK(absl::SimpleAtod(quota.value(), &count))\n          << \"Failed in parsing cgroupv1 cpu timeshare, quota = \" << path << \" (read: \" << *quota\n          << \")\";\n\n      if (count == -1)  // on -1 there is no limit.\n        count = 0;\n\n      CHECK(absl::SimpleAtod(period.value(), &timeshare))\n          << \"Failed in parsing cgroupv1 cpu timeshare, path = \" << path << \" (read: \" << *period\n          << \")\";\n\n      *output = static_cast<size_t>(count / timeshare);\n      read_something = true;\n    }\n  };\n\n  constexpr auto base_cpu = \"/sys/fs/cgroup/cpu\"sv;\n  read_cpu(base_cpu, max_threads);  // global cpu limits\n  constexpr auto base_cpu_v2 = \"/sys/fs/cgroup\"sv;\n  read_cpu(base_cpu_v2, max_threads);  // global cpu limits\n  constexpr auto base_cpu_v2_slice = \"/sys/fs/cgroup/user.slice\"sv;\n  read_cpu(base_cpu_v2_slice, max_threads);  // global cpu limits\n  read_cpu(cpu_path, max_threads);           // cgroup-specific limits\n\n  if (!read_something) {\n    LOG(ERROR) << \"Failed in deducing any cgroup limits with paths \" << mem_path << \" and \"\n               << cpu_path;\n    return false;\n  }\n  return true;\n}\n\n#endif\n\nvoid SetupAllocationTracker(ProactorPool* pool) {\n#ifdef DFLY_ENABLE_MEMORY_TRACKING\n  string flag = absl::GetFlag(FLAGS_allocation_tracker);\n  vector<pair<size_t, size_t>> track_ranges;\n  for (string_view entry : absl::StrSplit(flag, \",\", absl::SkipEmpty())) {\n    auto separator = entry.find(\":\");\n    if (separator == entry.npos) {\n      LOG(ERROR) << \"Can't find ':' in element\";\n      exit(-1);\n    }\n\n    pair<size_t, size_t> p;\n    if (!absl::SimpleAtoi(entry.substr(0, separator), &p.first)) {\n      LOG(ERROR) << \"Can't parse first number in pair\";\n      exit(-1);\n    }\n    if (!absl::SimpleAtoi(entry.substr(separator + 1), &p.second)) {\n      LOG(ERROR) << \"Can't parse second number in pair\";\n      exit(-1);\n    }\n\n    track_ranges.push_back(p);\n  }\n\n  pool->AwaitBrief([&](unsigned, ProactorBase*) {\n    for (auto range : track_ranges) {\n      if (!AllocationTracker::Get().Add(\n              {.lower_bound = range.first, .upper_bound = range.second, .sample_odds = 1.0})) {\n        LOG(ERROR) << \"Unable to track allocation range\";\n        exit(-1);\n      }\n    }\n  });\n#endif\n}\n\nvoid RegisterBufRings(ProactorPool* pool) {\n#ifdef __linux__\n  auto bufcnt = absl::GetFlag(FLAGS_uring_recv_buffer_cnt);\n  if (bufcnt == 0) {\n    return;\n  }\n\n  if (dfly::kernel_version < 602 || pool->at(0)->GetKind() != ProactorBase::IOURING) {\n    LOG(WARNING) << \"uring_recv_buffer_cnt is only supported on kernels >= 6.2 and with \"\n                    \"io_uring proactor\";\n    return;\n  }\n\n  // We need a power of 2 length.\n  bufcnt = absl::bit_ceil(bufcnt);\n  pool->AwaitBrief([&](unsigned, ProactorBase* pb) {\n    auto up = static_cast<fb2::UringProactor*>(pb);\n    int res = up->RegisterBufferRing(facade::kRecvSockGid, bufcnt, facade::kRecvBufSize);\n    if (res != 0) {\n      LOG(ERROR) << \"Failed to register buf ring for proactor \"\n                 << util::detail::SafeErrorMessage(res);\n      exit(1);\n    }\n  });\n  LOG(INFO) << \"Registered a bufring with \" << bufcnt << \" buffers of size \" << facade::kRecvBufSize\n            << \" per thread \";\n#endif\n}\n\nclass MiMallocResource : public PMR_NS::memory_resource {\n private:\n  void* do_allocate(std::size_t size, std::size_t align) final {\n    return mi_malloc_aligned(size, align);\n  }\n\n  void do_deallocate(void* ptr, std::size_t size, std::size_t align) final {\n    mi_free_size_aligned(ptr, size, align);\n  }\n\n  bool do_is_equal(const PMR_NS::memory_resource& o) const noexcept final {\n    return this == &o;\n  }\n};\n\nMiMallocResource g_mi_resource;\n\n#ifdef USE_AFL\n// AFL++ fuzzing helper functions\n// These functions support AFL++ persistent mode fuzzing by handling server readiness checks,\n// input reading, and test case execution. The __AFL_LOOP macro itself must remain in main()\n// due to AFL++ instrumentation requirements.\n\n// Waits for the Dragonfly server to become ready by attempting TCP connections.\n// Returns true if server is ready, false otherwise.\n// This is necessary because the server starts in a separate thread and we need to\n// wait for it to be fully initialized before starting the fuzzing loop.\nbool WaitForServerReady(uint16_t port, int max_attempts = 100) {\n  for (int i = 0; i < max_attempts; i++) {\n    std::this_thread::sleep_for(std::chrono::milliseconds(50));\n    int s = socket(AF_INET, SOCK_STREAM, 0);\n    if (s >= 0) {\n      struct sockaddr_in a = {};\n      a.sin_family = AF_INET;\n      a.sin_port = htons(port);\n      inet_pton(AF_INET, \"127.0.0.1\", &a.sin_addr);\n      if (connect(s, (struct sockaddr*)&a, sizeof(a)) == 0) {\n        close(s);\n        return true;\n      }\n      close(s);\n    }\n  }\n  return false;\n}\n\n// Configures stdin to non-blocking mode for AFL++ fuzzing.\n// Non-blocking mode is required because AFL++ feeds input through stdin,\n// and we need to handle cases where input might not be immediately available.\nvoid ConfigureStdinNonBlocking() {\n  fcntl(STDIN_FILENO, F_SETFL, fcntl(STDIN_FILENO, F_GETFL) | O_NONBLOCK);\n}\n\n// Reads fuzzing input from stdin with retry logic.\n// AFL++ provides test cases through stdin, and this function handles reading them\n// with appropriate retry logic for non-blocking I/O.\n// Returns the number of bytes read, or -1 on error, or 0 if no data available after retries.\nssize_t ReadFuzzInput(char* buffer, size_t buffer_size) {\n  ssize_t len = 0;\n  for (int attempt = 0; attempt < 100 && len == 0; attempt++) {\n    len = read(STDIN_FILENO, buffer, buffer_size);\n    if (len < 0 && errno == EAGAIN) {\n      usleep(10000);  // Wait 10ms and retry\n      continue;\n    }\n    if (len < 0)\n      break;\n  }\n  return len;\n}\n\n// Sends fuzzing input to the Dragonfly server and reads the response.\n// This executes one fuzzing iteration by:\n// 1. Creating a TCP socket connection to the server\n// 2. Sending the fuzzed data\n// 3. Reading a response (with timeout to prevent hangs)\n// The function uses short timeouts to keep fuzzing fast and prevent AFL++ from stalling.\nvoid SendFuzzInputToServer(uint16_t port, const char* data, ssize_t len) {\n  int s = socket(AF_INET, SOCK_STREAM, 0);\n  if (s >= 0) {\n    struct timeval tv = {.tv_sec = 0, .tv_usec = 200000};\n    setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));\n    setsockopt(s, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));\n\n    struct sockaddr_in a = {};\n    a.sin_family = AF_INET;\n    a.sin_port = htons(port);\n    inet_pton(AF_INET, \"127.0.0.1\", &a.sin_addr);\n    if (connect(s, (struct sockaddr*)&a, sizeof(a)) == 0) {\n      send(s, data, len, MSG_NOSIGNAL);\n      char r[4096];\n      recv(s, r, sizeof(r), 0);\n    }\n    close(s);\n  }\n}\n\n// Initializes AFL++ fuzzing by starting the server in a separate thread,\n// waiting for it to become ready, and preparing stdin for fuzzing input.\n// Returns the server thread handle. The caller is responsible for the fuzzing loop.\nstd::thread InitAflFuzzing(ProactorPool* pool, AcceptServer* acceptor) {\n  // Start server in a separate thread\n  std::thread server_thread([pool, acceptor]() {\n    dfly::RunEngine(pool, acceptor);\n    pool->Stop();\n  });\n\n  uint16_t target_port = GetFlag(FLAGS_afl_target_port);\n  uint16_t port = target_port ? target_port : GetFlag(FLAGS_port);\n\n  // Wait for server to become ready\n  if (!WaitForServerReady(port)) {\n    LOG(ERROR) << \"AFL++: Server not ready after 100 attempts, exiting...\";\n    exit(1);\n  }\n\n  uint32_t afl_loop_limit = GetFlag(FLAGS_afl_loop_limit);\n  LOG(INFO) << \"AFL++: Server ready, starting fuzzing loop with limit \" << afl_loop_limit\n            << \" iterations...\";\n\n  // Configure stdin for AFL++ input\n  ConfigureStdinNonBlocking();\n\n  return server_thread;\n}\n\n// Executes one AFL++ fuzzing iteration: reads input from stdin and sends it to the server.\n// Returns true if the iteration was successful, false if stdin EOF or error occurred.\nbool RunAflFuzzingIteration(uint16_t port) {\n  char buf[64 * 1024];\n\n  // Read fuzzing input from stdin\n  ssize_t len = ReadFuzzInput(buf, sizeof(buf));\n\n  if (len <= 0)\n    return false;  // stdin EOF or error\n\n  // Send fuzzed input to the server\n  SendFuzzInputToServer(port, buf, len);\n  return true;\n}\n#endif  // USE_AFL\n\n}  // namespace\n}  // namespace dfly\n\nextern \"C\" void _mi_options_init();\n\nusing namespace dfly;\n\nvoid sigill_hdlr(int signo) {\n  LOG(ERROR) << \"An attempt to execute an instruction failed.\"\n             << \"The root cause might be an old hardware. Exiting...\";\n  exit(1);\n}\n\nvoid PrintBasicUsageInfo() {\n  std::string output =\n      \"                   .--::--.                   \\n\"\n      \"   :+*=:          =@@@@@@@@=          :+*+:   \\n\"\n      \"  %@@@@@@%*=.     =@@@@@@@@-     .=*%@@@@@@#  \\n\"\n      \"  @@@@@@@@@@@@#+-. .%@@@@#. .-+#@@@@@@@@@@@%  \\n\"\n      \"  -@@@@@@@@@@@@@@@@*:#@@#:*@@@@@@@@@@@@@@@@-  \\n\"\n      \"    :+*********####-%@%%@%-####********++.    \\n\"\n      \"   .%@@@@@@@@@@@@@%:@@@@@@:@@@@@@@@@@@@@@%    \\n\"\n      \"   .@@@@@@@@%*+-:   =@@@@=  .:-+*%@@@@@@@%.   \\n\"\n      \"     =*+-:           ###*          .:-+*=     \\n\"\n      \"                     %@@%                     \\n\"\n      \"                     *@@*                     \\n\"\n      \"                     +@@=                     \\n\"\n      \"                     :##:                     \\n\"\n      \"                     :@@:                     \\n\"\n      \"                      @@                      \\n\"\n      \"                      ..                      \\n\"\n      \"* Logs will be written to the first available of the following paths:\\n\";\n\n  for (const auto& dir : google::GetLoggingDirectories()) {\n    const string_view maybe_slash = absl::EndsWith(dir, \"/\") ? \"\" : \"/\";\n    absl::StrAppend(&output, dir, maybe_slash, \"dragonfly.*\\n\");\n  }\n\n  absl::StrAppend(&output,\n                  \"* For the available flags type dragonfly [--help | --helpfull]\\n\"\n                  \"* Documentation can be found at: https://www.dragonflydb.io/docs\\n\");\n\n  std::cout << output;\n  std::cout.flush();\n}\n\nvoid ParseFlagsFromEnv() {\n  const auto& flags = absl::GetAllFlags();\n  for (char** env = environ; *env != nullptr; env++) {\n    constexpr string_view kPrefix = \"DFLY_\";\n    string_view environ_var = *env;\n    if (absl::StartsWith(environ_var, kPrefix)) {\n      // Per 'man environ', environment variables are included with their values\n      // in the format \"name=value\". Need to strip them apart, in order to work with flags object\n      pair<string_view, string_view> environ_pair =\n          absl::StrSplit(absl::StripPrefix(environ_var, kPrefix), absl::MaxSplits('=', 1));\n      const auto& [flag_name, flag_value] = environ_pair;\n      if (flag_name == \"DEV_ENV\") {\n        continue;  // DFLY_DEV_ENV is used to skip version check.\n      }\n\n      auto entry = flags.find(flag_name);\n      if (entry != flags.end()) {\n        if (absl::flags_internal::WasPresentOnCommandLine(flag_name)) {\n          continue;\n        }\n        string error;\n        auto& flag = entry->second;\n        bool success = flag->ParseFrom(flag_value, &error);\n        if (!success) {\n          LOG(FATAL) << \"could not parse flag \" << flag->Name()\n                     << \" from environment variable. Error: \" << error;\n        }\n      } else {\n        LOG(FATAL) << \"unknown environment variable DFLY_\" << flag_name;\n      }\n    }\n  }\n}\n\nint main(int argc, char* argv[]) {\n  absl::SetProgramUsageMessage(\n      R\"(a modern in-memory store.\n\nUsage: dragonfly [FLAGS]\n)\");\n\n  absl::FlagsUsageConfig config;\n  config.contains_help_flags = dfly::HelpFlags;\n  config.contains_helpshort_flags = dfly::HelpshortFlags;\n  config.normalize_filename = dfly::NormalizePaths;\n  config.version_string = [] {\n    string version = StrCat(dfly::kGitTag, \"-\", dfly::kGitSha);\n    return StrCat(\"dragonfly \", ColoredStr(TermColor::kGreen, version),\n                  \"\\nbuild time: \", ColoredStr(TermColor::kYellow, dfly::kBuildTime), \"\\n\");\n  };\n\n  absl::SetFlagsUsageConfig(config);\n  google::InitGoogleLogging(argv[0]);\n  google::SetLogFilenameExtension(\".log\");\n\n  MainInitGuard guard(&argc, &argv);\n\n  ParseFlagsFromEnv();\n\n  if (!GetFlag(FLAGS_omit_basic_usage)) {\n    PrintBasicUsageInfo();\n  }\n\n  LOG(INFO) << \"Starting dragonfly \" << GetVersion() << \"-\" << kGitSha;\n\n  struct sigaction act;\n  act.sa_handler = sigill_hdlr;\n  sigemptyset(&act.sa_mask);\n  sigaction(SIGILL, &act, nullptr);\n\n  // Ignore SIGHUP to prevent termination when the parent shell exits\n  signal(SIGHUP, SIG_IGN);\n\n  if (GetFlag(FLAGS_port) == 0u) {\n    string usock = GetFlag(FLAGS_unixsocket);\n    if (usock.length() == 0u) {\n      LOG(ERROR) << \"received --port 0, yet no unix socket to listen to. Exiting.\";\n      exit(1);\n    }\n    LOG(INFO) << \"received --port 0, disabling TCP listening.\";\n    LOG(INFO) << \"listening on unix socket \" << usock << \".\";\n  }\n\n  if (GetFlag(FLAGS_dbnum) > dfly::kMaxDbId) {\n    LOG(ERROR) << \"dbnum is too big. Exiting...\";\n    return 1;\n  }\n\n  string pidfile_path = GetFlag(FLAGS_pidfile);\n  if (!pidfile_path.empty()) {\n    if (!CreatePidFile(pidfile_path)) {\n      return 1;\n    }\n  }\n\n  io::MemInfoData mem_info = ReadMemInfo().value_or(io::MemInfoData{});\n  size_t max_available_threads = 0u;\n\n#ifdef __linux__\n  UpdateResourceLimitsIfInsideContainer(&mem_info, &max_available_threads);\n#endif\n\n  if (mem_info.swap_total != 0)\n    LOG(WARNING) << \"SWAP is enabled. Consider disabling it when running Dragonfly.\";\n\n  dfly::max_memory_limit = absl::GetFlag(FLAGS_maxmemory);\n\n  if (dfly::max_memory_limit == 0) {\n    LOG(INFO) << \"maxmemory has not been specified. Deciding myself....\";\n\n    size_t available = mem_info.mem_avail;\n    size_t maxmemory = size_t(0.8 * available);\n    if (maxmemory == 0) {\n      LOG(ERROR) << \"Could not deduce how much memory available. \"\n                 << \"Use --maxmemory=... to specify explicitly\";\n      return 1;\n    }\n    LOG(INFO) << \"Found \" << HumanReadableNumBytes(available)\n              << \" available memory. Setting maxmemory to \" << HumanReadableNumBytes(maxmemory);\n\n    absl::SetFlag(&FLAGS_maxmemory, maxmemory);\n    dfly::max_memory_limit = maxmemory;\n  } else {\n    string hr_limit = HumanReadableNumBytes(dfly::max_memory_limit);\n    if (dfly::max_memory_limit > mem_info.mem_avail)\n      LOG(WARNING) << \"Got memory limit \" << hr_limit << \", however only \"\n                   << HumanReadableNumBytes(mem_info.mem_avail) << \" was found.\";\n    LOG(INFO) << \"Max memory limit is: \" << hr_limit;\n  }\n\n  // Initialize mi_malloc options\n  // export MIMALLOC_VERBOSE=1 to see the options before the override.\n  // _default functions override the default options vaues but if the options were set\n  // via the environment variables, they will not be overridden.\n  mi_option_set_enabled_default(mi_option_show_errors, true);\n  mi_option_set_default(mi_option_purge_delay, 0);\n\n  // To see the options after the override, use:\n  // mi_options_print();\n\n  fb2::SetDefaultStackResource(&g_mi_resource, kFiberDefaultStackSize);\n\n  {\n    unique_ptr<util::ProactorPool> pool;\n\n#ifdef __linux__\n    base::sys::KernelVersion kver;\n    base::sys::GetKernelVersion(&kver);\n\n    CHECK_LT(kver.major, 99u);\n    dfly::kernel_version = kver.kernel * 100 + kver.major;\n\n    bool use_epoll = ShouldUseEpollAPI(kver);\n\n    if (use_epoll) {\n      pool.reset(fb2::Pool::Epoll(max_available_threads));\n    } else {\n      pool.reset(fb2::Pool::IOUring(1024, max_available_threads));  // 1024 - iouring queue size.\n    }\n#else\n    pool.reset(fb2::Pool::Epoll(max_available_threads));\n#endif\n\n    pool->Run();\n\n    SetupAllocationTracker(pool.get());\n    RegisterBufRings(pool.get());\n\n    AcceptServer acceptor(pool.get(), &g_mi_resource, true);\n    acceptor.set_back_log(absl::GetFlag(FLAGS_tcp_backlog));\n\n#ifdef USE_AFL\n    //  Persistent mode fuzzing integration:\n    // - AFL++ generates test cases and feeds them through stdin\n    // - This code reads from stdin and forwards the data to a real TCP connection to the Dragonfly\n    //   server\n    // - The server runs in a separate thread and processes the fuzzed input as if it came from a\n    //   normal client\n    // - Each fuzzing iteration: read stdin -> send to server via TCP -> read response -> repeat\n    //\n    // Process lifecycle:\n    // - When stdin closes (EOF), the fuzzing process exits - this is expected behavior\n    // - When the fuzzing session completes (__AFL_LOOP finishes), the process exits with code 0\n    // - Exiting with code 0 is REQUIRED for AFL++ to work correctly\n    // - This also enables \"dry run\" mode where AFL++ tests that the target can be fuzzed before\n    //   starting the actual fuzzing campaign\n\n    std::thread server_thread = dfly::InitAflFuzzing(pool.get(), &acceptor);\n\n    uint16_t target_port = GetFlag(FLAGS_afl_target_port);\n    uint16_t port = target_port ? target_port : GetFlag(FLAGS_port);\n    uint32_t afl_loop_limit = GetFlag(FLAGS_afl_loop_limit);\n    unsigned int loop_iteration = 0;\n\n    // AFL++ persistent mode loop - this macro MUST stay in main() for proper instrumentation\n    while (__AFL_LOOP(afl_loop_limit)) {\n      loop_iteration++;\n      if (!dfly::RunAflFuzzingIteration(port))\n        break;  // stdin EOF or error\n    }\n\n    // AFL++ fuzzing session completed successfully\n    LOG(INFO) << \"AFL++: Loop finished after \" << loop_iteration << \" iterations, exiting...\";\n    // Use _exit(0) to skip cleanup - required by AFL++ persistent mode\n    _exit(0);\n#else\n    dfly::RunEngine(pool.get(), &acceptor);\n    pool->Stop();\n#endif\n\n    if (!pidfile_path.empty()) {\n      unlink(pidfile_path.c_str());\n    }\n  }\n\n  return 0;\n}\n"
  },
  {
    "path": "src/server/dflycmd.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"server/dflycmd.h\"\n\n#include <absl/random/random.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_split.h>\n#include <absl/strings/strip.h>\n\n#include <limits>\n#include <memory>\n#include <optional>\n#include <utility>\n\n#include \"absl/cleanup/cleanup.h\"\n#include \"absl/strings/numbers.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/detail/gen_utils.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/dragonfly_listener.h\"\n#include \"facade/reply_builder.h\"\n#include \"server/cluster_support.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/journal/journal.h\"\n#include \"server/journal/streamer.h\"\n#include \"server/main_service.h\"\n#include \"server/namespaces.h\"\n#include \"server/rdb_save.h\"\n#include \"server/replica.h\"\n#include \"server/server_family.h\"\n#include \"server/server_state.h\"\n#include \"server/transaction.h\"\n#include \"util/fibers/synchronization.h\"\nusing namespace std;\n\nABSL_DECLARE_FLAG(bool, info_replication_valkey_compatible);\nABSL_DECLARE_FLAG(uint32_t, replication_timeout);\nABSL_DECLARE_FLAG(uint32_t, shard_repl_backlog_len);\n\nnamespace dfly {\n\nusing namespace facade;\nusing namespace util;\n\nusing std::string;\nusing util::ProactorBase;\n\nstd::string_view SyncStateName(DflyCmd::SyncState sync_state) {\n  switch (sync_state) {\n    case DflyCmd::SyncState::PREPARATION:\n      return \"preparation\";\n    case DflyCmd::SyncState::FULL_SYNC:\n      return \"full_sync\";\n    case DflyCmd::SyncState::STABLE_SYNC:\n      return absl::GetFlag(FLAGS_info_replication_valkey_compatible) ? \"online\" : \"stable_sync\";\n    case DflyCmd::SyncState::CANCELLED:\n      return \"cancelled\";\n  }\n  DCHECK(false) << \"Unspported state \" << int(sync_state);\n  return \"unsupported\";\n}\n\nnamespace {\nconst char kBadMasterId[] = \"bad master id\";\nconst char kIdNotFound[] = \"syncid not found\";\nconst char kInvalidSyncId[] = \"bad sync id\";\nconst char kInvalidState[] = \"invalid state\";\n\nbool ToSyncId(string_view str, uint32_t* num) {\n  if (!absl::StartsWith(str, \"SYNC\"))\n    return false;\n  str.remove_prefix(4);\n\n  return absl::SimpleAtoi(str, num);\n}\n\nbool WaitReplicaFlowToCatchup(absl::Time end_time, const DflyCmd::ReplicaInfo* replica,\n                              EngineShard* shard, bool with_ping) {\n  // We don't want any writes to the journal after we send the `PING`,\n  // and expirations could ruin that.\n  namespaces->GetDefaultNamespace().GetDbSlice(shard->shard_id()).SetExpireAllowed(false);\n\n  if (with_ping) {\n    // PING forces replica to send the most recent last_acked_lsn.\n    // ACKS from the replica are send only every X commands or every 3 seconds (flag configurable)\n    // or when forced (by the PING above).\n    journal::RecordEntry(0, journal::Op::PING, 0, nullopt, {});\n  }\n\n  const FlowInfo* flow = &replica->flows[shard->shard_id()];\n\n  while (flow->last_acked_lsn < journal::GetLsn()) {\n    if (absl::Now() > end_time) {\n      LOG(WARNING) << \"Couldn't synchronize with replica for takeover in time: \" << replica->address\n                   << \":\" << replica->listening_port << \", last acked: \" << flow->last_acked_lsn\n                   << \", expecting \" << journal::GetLsn();\n      return false;\n    }\n    if (!replica->exec_st.IsRunning()) {\n      return false;\n    }\n    LOG_EVERY_T(INFO, 1) << \"Replica lsn:\" << flow->last_acked_lsn\n                         << \" master lsn:\" << journal::GetLsn()\n                         << \"; Journal streamer state: \" << flow->streamer->FormatInternalState();\n    ThisFiber::SleepFor(1ms);\n  }\n\n  return true;\n}\n\n}  // namespace\n\nvoid DflyCmd::ReplicaInfo::Cancel() {\n  util::fb2::LockGuard lk{shared_mu};\n  if (replica_state == SyncState::CANCELLED) {\n    return;\n  }\n\n  LOG(INFO) << \"Disconnecting from replica \" << address << \":\" << listening_port;\n\n  // Update state and cancel context.\n  replica_state = SyncState::CANCELLED;\n  exec_st.ReportCancelError();\n  // Wait for tasks to finish.\n  shard_set->RunBlockingInParallel([this](EngineShard* shard) {\n    VLOG(2) << \"Disconnecting flow \" << shard->shard_id();\n\n    FlowInfo* flow = &flows[shard->shard_id()];\n    if (flow->cleanup) {\n      flow->cleanup();\n    }\n    VLOG(2) << \"After flow cleanup \" << shard->shard_id();\n    flow->conn = nullptr;\n  });\n  // Wait for error handler to quit.\n  exec_st.JoinErrorHandler();\n  VLOG(1) << \"Disconnecting replica \" << address << \":\" << listening_port;\n}\n\nDflyCmd::DflyCmd(ServerFamily* server_family) : sf_(server_family) {\n}\n\nvoid DflyCmd::Run(CmdArgList args, CommandContext* cmd_cntx) {\n  DCHECK_GE(args.size(), 1u);\n  string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n\n  if (sub_cmd == \"THREAD\") {\n    return Thread(args, cmd_cntx);\n  }\n\n  if (sub_cmd == \"FLOW\" && (args.size() >= 4 && args.size() <= 6)) {\n    return Flow(args, cmd_cntx);\n  }\n\n  if (sub_cmd == \"SYNC\" && args.size() == 2) {\n    return Sync(args, cmd_cntx);\n  }\n\n  if (sub_cmd == \"STARTSTABLE\" && args.size() == 2) {\n    return StartStable(args, cmd_cntx);\n  }\n\n  if (sub_cmd == \"TAKEOVER\" && (args.size() == 3 || args.size() == 4)) {\n    return TakeOver(args, cmd_cntx);\n  }\n\n  if (sub_cmd == \"EXPIRE\") {\n    return Expire(args, cmd_cntx);\n  }\n\n  if (sub_cmd == \"REPLICAOFFSET\" && args.size() == 1) {\n    return ReplicaOffset(args, cmd_cntx);\n  }\n\n  if (sub_cmd == \"LOAD\") {\n    return Load(args, cmd_cntx);\n  }\n\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n  if (sub_cmd == \"HELP\") {\n    string_view help_arr[] = {\n        \"DFLY <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n        \"THREAD\",\n        \"    Returns connection thread index and number of threads\",\n        \"THREAD <thread-id>\",\n        \"    Migrates connection to thread <thread-id>\",\n        \"EXPIRE\",\n        \"    Collects all expired items.\",\n        \"REPLICAOFFSET\",\n        \"    Returns LSN (log sequence number) per shard. These are the sequential ids of the \",\n        \"    journal entry.\",\n        \"LOAD <filename> [APPEND]\",\n        \"    Loads <filename> RDB/DFS file into the data store.\",\n        \"    * APPEND: Existing keys are NOT removed before loading the file, conflicting \",\n        \"      keys (that exist in both data store and in file) are overridden.\",\n        \"HELP\",\n        \"    Prints this help.\",\n    };\n    return rb->SendSimpleStrArr(help_arr);\n  }\n\n  cmd_cntx->SendError(kSyntaxErr);\n}\n\nvoid DflyCmd::Thread(CmdArgList args, CommandContext* cmd_cntx) {\n  util::ProactorPool* pool = shard_set->pool();\n\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n  if (args.size() == 1) {  // DFLY THREAD : returns connection thread index and number of threads.\n    rb->StartArray(2);\n    rb->SendLong(ProactorBase::me()->GetPoolIndex());\n    rb->SendLong(long(pool->size()));\n    return;\n  }\n\n  // DFLY THREAD to_thread : migrates current connection to a different thread.\n  string_view arg = ArgS(args, 1);\n  unsigned num_thread;\n  if (!absl::SimpleAtoi(arg, &num_thread)) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  if (num_thread < pool->size()) {\n    if (int(num_thread) != ProactorBase::me()->GetPoolIndex()) {\n      auto* conn = cmd_cntx->conn();\n      if (!conn->Migrate(pool->at(num_thread))) {\n        // Listener::PreShutdown() triggered\n        if (conn->socket()->IsOpen()) {\n          return cmd_cntx->SendError(kInvalidState);\n        }\n        return;\n      }\n    }\n\n    return rb->SendOk();\n  }\n\n  return cmd_cntx->SendError(kInvalidIntErr);\n}\n\nvoid DflyCmd::Flow(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view master_id = ArgS(args, 1);\n  string_view sync_id_str = ArgS(args, 2);\n  string_view flow_id_str = ArgS(args, 3);\n\n  std::optional<LSN> seqid;\n  std::optional<string> last_master_id;\n  std::optional<string> last_master_lsn;\n  if (args.size() == 5) {\n    seqid.emplace();\n    if (!absl::SimpleAtoi(ArgS(args, 4), &seqid.value())) {\n      return cmd_cntx->SendError(facade::kInvalidIntErr);\n    }\n  } else if (args.size() == 6) {\n    last_master_id = ArgS(args, 4);\n    last_master_lsn = ArgS(args, 5);\n  }\n\n  VLOG(1) << \"Got DFLY FLOW master_id: \" << master_id << \" sync_id: \" << sync_id_str\n          << \" flow: \" << flow_id_str << \" seq: \" << seqid.value_or(-1);\n\n  if (master_id != sf_->master_replid()) {\n    return cmd_cntx->SendError(kBadMasterId);\n  }\n\n  unsigned flow_id;\n  if (!absl::SimpleAtoi(flow_id_str, &flow_id) || flow_id >= shard_set->size()) {\n    return cmd_cntx->SendError(facade::kInvalidIntErr);\n  }\n\n  auto [sync_id, replica_ptr] = GetReplicaInfoOrReply(sync_id_str, cmd_cntx);\n  if (!sync_id)\n    return;\n\n  string eof_token;\n  std::string sync_type{\"FULL\"};\n  {\n    util::fb2::LockGuard lk{replica_ptr->shared_mu};\n\n    if (replica_ptr->replica_state != SyncState::PREPARATION) {\n      return cmd_cntx->SendError(kInvalidState);\n    }\n\n    // Set meta info on connection.\n    auto* conn_cntx = cmd_cntx->server_conn_cntx();\n    cmd_cntx->conn()->SetName(absl::StrCat(\"repl_flow_\", sync_id));\n    conn_cntx->conn_state.replication_info.repl_session_id = sync_id;\n    conn_cntx->conn_state.replication_info.repl_flow_id = flow_id;\n    conn_cntx->replica_conn = true;\n\n    absl::InsecureBitGen gen;\n    eof_token = GetRandomHex(gen, 40);\n\n    auto& flow = replica_ptr->flows[flow_id];\n    conn_cntx->master_repl_flow = &flow;\n    flow.conn = cmd_cntx->conn();\n    flow.eof_token = eof_token;\n    flow.version = replica_ptr->version;\n\n    if (!conn_cntx->conn()->Migrate(shard_set->pool()->at(flow_id))) {\n      // Listener::PreShutdown() triggered\n      if (conn_cntx->conn()->socket()->IsOpen()) {\n        return cmd_cntx->SendError(kInvalidState);\n      }\n      return;\n    }\n\n    journal::StartInThread();\n\n    std::optional<Replica::LastMasterSyncData> data = sf_->GetLastMasterData();\n    std::optional<LSN> lsn_to_start_partial;\n    // In this flow the master and the registered replica where synced from the same master.\n    if (last_master_id && data && data->id == *last_master_id) {\n      ++ServerState::tlocal()->stats.psync_requests_total;\n      auto flow_lsn =\n          ParseLsnVec(*last_master_lsn, data->last_journal_LSNs.size(), flow_id, cmd_cntx);\n      if (!flow_lsn) {\n        return;  // ParseLsnVec replies in case of error\n      }\n\n      if (IsLSNInPartialSyncBuffer(*flow_lsn)) {\n        lsn_to_start_partial.emplace(*flow_lsn);\n      }\n\n    } else if (seqid.has_value() && IsLSNInPartialSyncBuffer(*seqid)) {\n      lsn_to_start_partial.emplace(*seqid);\n    }\n\n    if (lsn_to_start_partial) {\n      flow.start_partial_sync_at = *lsn_to_start_partial;\n      sync_type = \"PARTIAL\";\n      VLOG(1) << \"Partial sync requested from LSN=\" << flow.start_partial_sync_at.value()\n              << \" and is available. (current_lsn=\" << journal::GetLsn() << \")\";\n    }\n  }\n\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->StartArray(2);\n  rb->SendSimpleString(sync_type);\n  rb->SendSimpleString(eof_token);\n}\n\nvoid DflyCmd::Sync(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view sync_id_str = ArgS(args, 1);\n\n  VLOG(1) << \"Got DFLY SYNC \" << sync_id_str;\n\n  auto [sync_id, replica_ptr] = GetReplicaInfoOrReply(sync_id_str, cmd_cntx);\n  if (!sync_id)\n    return;\n\n  util::fb2::LockGuard lk{replica_ptr->shared_mu};\n  if (!CheckReplicaStateOrReply(*replica_ptr, SyncState::PREPARATION, cmd_cntx))\n    return;\n\n  // Start full sync.\n  {\n    Transaction::Guard tg{cmd_cntx->tx()};\n    AggregateStatus status;\n\n    // Use explicit assignment for replica_ptr, because capturing structured bindings is C++20.\n    auto cb = [this, &status, replica_ptr = replica_ptr](EngineShard* shard) {\n      status = StartFullSyncInThread(&replica_ptr->flows[shard->shard_id()], &replica_ptr->exec_st,\n                                     shard);\n    };\n    shard_set->RunBlockingInParallel(std::move(cb));\n\n    // TODO: Send better error\n    if (*status != OpStatus::OK)\n      return cmd_cntx->SendError(kInvalidState);\n  }\n\n  LOG(INFO) << \"Started sync with replica \" << replica_ptr->address << \":\"\n            << replica_ptr->listening_port;\n\n  // protected by lk above.\n  replica_ptr->replica_state = SyncState::FULL_SYNC;\n\n  return cmd_cntx->SendOk();\n}\n\nvoid DflyCmd::StartStable(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view sync_id_str = ArgS(args, 1);\n\n  VLOG(1) << \"Got DFLY STARTSTABLE \" << sync_id_str;\n\n  auto [sync_id, replica_ptr] = GetReplicaInfoOrReply(sync_id_str, cmd_cntx);\n  if (!sync_id)\n    return;\n\n  util::fb2::LockGuard lk{replica_ptr->shared_mu};\n  auto repl_state = replica_ptr->replica_state;\n  if (repl_state != SyncState::FULL_SYNC && repl_state != SyncState::PREPARATION) {\n    cmd_cntx->SendError(kInvalidState);\n    return;\n  }\n\n  // Check all flows are connected.\n  // This might happen if a flow abruptly disconnected before sending the SYNC request.\n  for (const FlowInfo& flow : replica_ptr->flows) {\n    if (!flow.conn) {\n      cmd_cntx->SendError(kInvalidState);\n      return;\n    }\n  }\n\n  {\n    Transaction::Guard tg{cmd_cntx->tx()};\n    AggregateStatus status;\n\n    auto cb = [this, &status, replica_ptr = replica_ptr](EngineShard* shard) {\n      FlowInfo* flow = &replica_ptr->flows[shard->shard_id()];\n\n      // We are doing partial sync. We never started FullSync so we don't need to stop it.\n      bool is_partial = flow->start_partial_sync_at.has_value();\n      if (!is_partial) {\n        status = StopFullSyncInThread(flow, &replica_ptr->exec_st, shard);\n        if (*status != OpStatus::OK) {\n          return;\n        }\n      }\n\n      StartStableSyncInThread(flow, &replica_ptr->exec_st, shard);\n    };\n    shard_set->RunBlockingInParallel(std::move(cb));\n\n    if (*status != OpStatus::OK)\n      return cmd_cntx->SendError(kInvalidState);\n  }\n\n  LOG(INFO) << \"Transitioned into stable sync with replica \" << replica_ptr->address << \":\"\n            << replica_ptr->listening_port;\n\n  replica_ptr->replica_state = SyncState::STABLE_SYNC;\n  return cmd_cntx->SendOk();\n}\n\nbool DflyCmd::IsLSNInPartialSyncBuffer(LSN lsn) const {\n  const bool exists = journal::GetLsn() == lsn || journal::IsLSNInBuffer(lsn);\n  if (!exists) {\n    LOG(INFO) << \"Partial sync requested from stale LSN=\" << lsn\n              << \" that the replication buffer doesn't contain this anymore (current_lsn=\"\n              << journal::GetLsn() << \"). Will perform a full sync of the data.\";\n    LOG(INFO) << \"If this happens often you can control the replication buffer's size with the \"\n                 \"--shard_repl_backlog_len option\";\n  }\n  return exists;\n}\n\nstd::optional<LSN> DflyCmd::ParseLsnVec(std::string_view last_master_lsn,\n                                        size_t last_journal_lsn_size, size_t flow_id,\n                                        CommandContext* cmd_cntx) {\n  std::vector<std::string_view> lsn_str_vec = absl::StrSplit(last_master_lsn, '-');\n  if (lsn_str_vec.size() != last_journal_lsn_size) {\n    cmd_cntx->SendError(facade::kSyntaxErr);  // Unexpected flow. LSN vector of same master\n                                              // should be the same size on all replicas.\n    return std::nullopt;\n  }\n\n  std::vector<LSN> lsn_vec;\n  lsn_vec.reserve(lsn_str_vec.size());\n\n  for (string_view lsn_str : lsn_str_vec) {\n    int64_t value;\n    if (!absl::SimpleAtoi(lsn_str, &value)) {\n      cmd_cntx->SendError(facade::kInvalidIntErr);\n      return std::nullopt;\n    }\n    lsn_vec.push_back(value);\n  }\n\n  DCHECK(flow_id < lsn_vec.size());\n  if (flow_id >= lsn_vec.size()) {\n    LOG(ERROR) << \"Invalid flow_id: \" << flow_id << \" exceeds LSN vector size: \" << lsn_vec.size()\n               << \". Disabling partial sync.\";\n    return std::nullopt;\n  }\n\n  return {lsn_vec[flow_id]};\n}\n\n// DFLY TAKEOVER <timeout_sec> [SAVE] <sync_id>\n// timeout_sec - number of seconds to wait for TAKEOVER to converge.\n// SAVE option is used only by tests.\nvoid DflyCmd::TakeOver(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  parser.Next();\n  float timeout = std::ceil(parser.Next<float>());\n  if (timeout < 0) {\n    // allow 0s timeout for tests.\n    return cmd_cntx->SendError(\"timeout is negative\");\n  }\n\n  bool save_flag = static_cast<bool>(parser.Check(\"SAVE\"));\n\n  string_view sync_id_str = parser.Next<std::string_view>();\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  VLOG(1) << \"Got DFLY TAKEOVER \" << sync_id_str << \" time out:\" << timeout;\n\n  auto [sync_id, replica_ptr] = GetReplicaInfoOrReply(sync_id_str, cmd_cntx);\n  if (!sync_id)\n    return;\n\n  {\n    dfly::SharedLock lk{replica_ptr->shared_mu};\n    if (!CheckReplicaStateOrReply(*replica_ptr, SyncState::STABLE_SYNC, cmd_cntx))\n      return;\n\n    auto prev_state = sf_->service().SwitchState(GlobalState::ACTIVE, GlobalState::TAKEN_OVER);\n    if (prev_state != GlobalState::ACTIVE) {\n      LOG(WARNING) << prev_state << \" in progress, could not take over\";\n      return cmd_cntx->SendError(\"Takeover failed!\");\n    }\n  }\n\n  auto cluster_config_before = cluster::ClusterConfig::Current();\n\n  LOG(INFO) << \"Takeover initiated, locking down the database.\";\n  absl::Duration timeout_dur = absl::Seconds(timeout);\n  absl::Time end_time = absl::Now() + timeout_dur;\n  AggregateStatus status;\n\n  // We need to await for all dispatches to finish: Otherwise a transaction might be scheduled\n  // after this function exits but before the actual shutdown.\n  facade::DispatchTracker tracker{sf_->GetNonPriviligedListeners(), cmd_cntx->conn(), false, false};\n  shard_set->pool()->AwaitFiberOnAll([&](unsigned index, auto* pb) {\n    sf_->CancelBlockingOnThread();\n    tracker.TrackOnThread();\n  });\n\n  if (!tracker.Wait(timeout_dur)) {\n    LOG(WARNING) << \"Couldn't wait for commands to finish dispatching. \" << timeout_dur;\n    status = OpStatus::TIMED_OUT;\n\n    auto cb = [&](unsigned thread_index, util::Connection* conn) {\n      facade::Connection* dcon = static_cast<facade::Connection*>(conn);\n      LOG(INFO) << dcon->DebugInfo();\n    };\n\n    for (auto* listener : sf_->GetListeners()) {\n      listener->TraverseConnections(cb);\n    }\n  }\n\n  VLOG(1) << \"AwaitCurrentDispatches done\";\n\n  absl::Cleanup cleanup([] {\n    VLOG(2) << \"Enabling expiration\";\n    shard_set->RunBriefInParallel([](EngineShard* shard) {\n      namespaces->GetDefaultNamespace().GetDbSlice(shard->shard_id()).SetExpireAllowed(true);\n    });\n  });\n\n  atomic_bool catchup_success = true;\n  if (*status == OpStatus::OK) {\n    dfly::SharedLock lk{replica_ptr->shared_mu};\n    auto cb = [replica_ptr = replica_ptr, end_time, &catchup_success](EngineShard* shard) {\n      // PING to force the replica to send the last acked lsn.\n      if (!WaitReplicaFlowToCatchup(end_time, replica_ptr.get(), shard, true)) {\n        catchup_success.store(false);\n      }\n    };\n    shard_set->RunBlockingInParallel(std::move(cb));\n  }\n\n  VLOG(1) << \"WaitReplicaFlowToCatchup done\";\n\n  if (*status != OpStatus::OK || !catchup_success.load()) {\n    sf_->service().SwitchState(GlobalState::TAKEN_OVER, GlobalState::ACTIVE);\n    return cmd_cntx->SendError(\"Takeover failed!\");\n  }\n\n  cmd_cntx->SendOk();\n\n  atomic_bool rest_catchup_success = true;\n  {\n    util::fb2::LockGuard mu_lk(mu_);\n    for (auto [id, repl_ptr] : replica_infos_) {\n      if (replica_ptr == repl_ptr) {\n        continue;\n      }\n\n      auto cb = [repl_ptr = repl_ptr, end_time, &rest_catchup_success](EngineShard* shard) {\n        // We can't PING here as it will advance our LSN and disable partial sync for these nodes.\n        // Instead, wait and be optimistic that the end_time is not short. If the nodes didn't sync\n        // up in time, it's ok, they will fall back to full sync when reconfigured.\n        if (!WaitReplicaFlowToCatchup(end_time, repl_ptr.get(), shard, false)) {\n          rest_catchup_success.store(false);\n        }\n      };\n      shard_set->RunBlockingInParallel(std::move(cb));\n    }\n\n    if (!rest_catchup_success) {\n      LOG(WARNING) << \"Some of the replica nodes did not sync in time.\";\n    }\n  }\n\n  if (save_flag) {\n    VLOG(1) << \"Save snapshot after Takeover.\";\n    if (auto ec = sf_->DoSave(true); ec) {\n      LOG(WARNING) << \"Failed to perform snapshot \" << ec.Format();\n    }\n  }\n\n  // For non-cluster mode we shutdown\n  if (detail::cluster_mode != detail::ClusterMode::kRealCluster) {\n    VLOG(1) << \"Takeover accepted, shutting down.\";\n    std::string save_arg = \"NOSAVE\";\n    MutableSlice sargs(save_arg);\n    CommandContext child_cmd_cntx{cmd_cntx->rb(), nullptr};\n    sf_->ShutdownCmd(CmdArgList(&sargs, 1), &child_cmd_cntx);\n    return;\n  }\n\n  auto cluster_config_after = cluster::ClusterConfig::Current();\n  if (cluster_config_after.get() != cluster_config_before.get()) {\n    LOG(INFO) << \"ReconcileMasterSlots() early exit. Config already updated\";\n    return;\n  }\n  sf_->service().cluster_family().ReconcileMasterSlots(replica_ptr->id);\n}\n\nvoid DflyCmd::Expire(CmdArgList args, CommandContext* cmd_cntx) {\n  cmd_cntx->tx()->ScheduleSingleHop([](Transaction* t, EngineShard* shard) {\n    t->GetDbSlice(shard->shard_id()).ExpireAllIfNeeded();\n    return OpStatus::OK;\n  });\n\n  return cmd_cntx->SendOk();\n}\n\nvoid DflyCmd::ReplicaOffset(CmdArgList args, CommandContext* cmd_cntx) {\n  std::vector<LSN> lsns(shard_set->size());\n  shard_set->RunBriefInParallel([&](EngineShard* shard) {\n    lsns[shard->shard_id()] = shard->journal() ? journal::GetLsn() : 0;\n  });\n\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->SendLongArr(absl::MakeConstSpan(lsns));\n}\n\nvoid DflyCmd::Load(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  parser.ExpectTag(\"LOAD\");\n  string filename = parser.Next<string>();\n  ServerFamily::LoadExistingKeys existing_keys = ServerFamily::LoadExistingKeys::kFail;\n\n  if (parser.HasNext()) {\n    parser.ExpectTag(\"APPEND\");\n    existing_keys = ServerFamily::LoadExistingKeys::kOverride;\n  }\n\n  if (parser.TakeError() || parser.HasNext() || filename.empty()) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  if (existing_keys == ServerFamily::LoadExistingKeys::kFail) {\n    sf_->FlushAll(cmd_cntx->server_conn_cntx()->ns);\n  }\n\n  if (auto fut_ec = sf_->Load(filename, existing_keys); fut_ec) {\n    GenericError ec = fut_ec->Get();\n    if (ec) {\n      string msg = ec.Format();\n      LOG(WARNING) << \"Could not load file \" << msg;\n      return cmd_cntx->SendError(msg);\n    }\n  }\n\n  cmd_cntx->SendOk();\n}\n\nOpStatus DflyCmd::StartFullSyncInThread(FlowInfo* flow, ExecutionState* exec_st,\n                                        EngineShard* shard) {\n  DCHECK(shard);\n  DCHECK(flow->conn);\n\n  // The summary contains the LUA scripts, so make sure at least (and exactly one)\n  // of the flows also contain them.\n  SaveMode save_mode =\n      shard->shard_id() == 0 ? SaveMode::SINGLE_SHARD_WITH_SUMMARY : SaveMode::SINGLE_SHARD;\n  flow->saver =\n      std::make_unique<RdbSaver>(flow->conn->socket(), save_mode, false, \"\", flow->version);\n\n  flow->cleanup = [flow, shard]() {\n    // socket shutdown is needed before calling saver->Cancel(). Because\n    // we might cancel while the write to socket is blocking and\n    // therefore if we wont cancel the socket the full sync fiber might\n    // not get to pop entries from channel, which can cause dead lock if channel is full and some\n    // callbacks are blocked on trying to insert to channel.\n    flow->TryShutdownSocket();\n    flow->saver->CancelInShard(shard);  // stops writing to journal stream to channel\n    flow->saver.reset();\n  };\n\n  error_code ec;\n  RdbSaver* saver = flow->saver.get();\n  if (saver->Mode() == SaveMode::SUMMARY || saver->Mode() == SaveMode::SINGLE_SHARD_WITH_SUMMARY) {\n    // Full sync summary - include all global data\n    ec = saver->SaveHeader(saver->GetGlobalData(&sf_->service(), true));\n  } else {\n    // Per-shard - include only search index restore commands\n    ec = saver->SaveHeader(saver->GetGlobalData(&sf_->service(), false));\n  }\n  if (ec) {\n    exec_st->ReportError(ec);\n    return OpStatus::CANCELLED;\n  }\n\n  saver->StartSnapshotInShard(true, exec_st, shard);\n\n  return OpStatus::OK;\n}\n\nOpStatus DflyCmd::StopFullSyncInThread(FlowInfo* flow, ExecutionState* exec_st,\n                                       EngineShard* shard) {\n  DCHECK(shard);\n\n  error_code ec = flow->saver->StopFullSyncInShard(shard);\n  if (ec) {\n    exec_st->ReportError(ec);\n    return OpStatus::CANCELLED;\n  }\n\n  ec = flow->conn->socket()->Write(io::Buffer(flow->eof_token));\n  if (ec) {\n    exec_st->ReportError(ec);\n    return OpStatus::CANCELLED;\n  }\n\n  // Reset cleanup and saver\n  flow->cleanup = []() {};\n  flow->saver.reset();\n  return OpStatus::OK;\n}\n\nvoid DflyCmd::StartStableSyncInThread(FlowInfo* flow, ExecutionState* exec_st, EngineShard* shard) {\n  // Create streamer for shard flows.\n  DCHECK(shard);\n  DCHECK(flow->conn);\n\n  LSN partial_lsn = flow->start_partial_sync_at.value_or(0);\n  JournalStreamer::Config config{\n      .should_sent_lsn = true, .init_from_stable_sync = true, .start_partial_sync_at = partial_lsn};\n  flow->streamer.reset(new JournalStreamer(exec_st, config));\n  flow->streamer->Start(flow->conn->socket());\n\n  // Register cleanup.\n  flow->cleanup = [flow]() {\n    flow->TryShutdownSocket();\n    if (flow->streamer) {\n      flow->streamer->Cancel();\n    }\n  };\n}\n\nauto DflyCmd::CreateSyncSession(ConnectionState* state) -> std::pair<uint32_t, unsigned> {\n  util::fb2::LockGuard lk(mu_);\n  unsigned sync_id = next_sync_id_++;\n\n  unsigned flow_count = shard_set->size();\n  auto err_handler = [this, sync_id](const GenericError& err) {\n    LOG(INFO) << \"Replication error: \" << err.Format();\n\n    // Spawn external fiber to allow destructing the context from outside\n    // and return from the handler immediately.\n    fb2::Fiber(\"stop_replication\", &DflyCmd::StopReplication, this, sync_id).Detach();\n  };\n\n  string address = state->replication_info.repl_ip_address;\n  uint32_t port = state->replication_info.repl_listening_port;\n\n  LOG(INFO) << \"Registered replica \" << address << \":\" << port;\n\n  auto replica_ptr =\n      make_shared<ReplicaInfo>(flow_count, std::move(address), port, std::move(err_handler));\n  auto [it, inserted] = replica_infos_.emplace(sync_id, std::move(replica_ptr));\n  CHECK(inserted);\n\n  return {it->first, flow_count};\n}\n\nauto DflyCmd::GetReplicaInfoFromConnection(ConnectionState* state) -> std::shared_ptr<ReplicaInfo> {\n  util::fb2::LockGuard lk(mu_);\n  auto it = replica_infos_.find(state->replication_info.repl_session_id);\n  if (it == replica_infos_.end()) {\n    return nullptr;\n  }\n\n  return it->second;\n}\n\nvoid DflyCmd::OnClose(unsigned sync_id) {\n  if (!sync_id)\n    return;\n  StopReplication(sync_id);\n}\n\nvoid DflyCmd::StopReplication(uint32_t sync_id) {\n  auto replica_ptr = GetReplicaInfo(sync_id);\n  if (!replica_ptr)\n    return;\n  VLOG(1) << \"Stopping replication for sync_id: \" << sync_id;\n\n  // Because CancelReplication holds the per-replica mutex,\n  // aborting connection will block here until cancellation finishes.\n  // This allows keeping resources alive during the cleanup phase.\n  replica_ptr->Cancel();\n\n  util::fb2::LockGuard lk(mu_);\n  replica_infos_.erase(sync_id);\n}\n\n// Because we need to annotate unique_lock\nvoid DflyCmd::BreakStalledFlowsInShard() {\n  std::unique_lock global_lock(mu_, try_to_lock);\n\n  // give up on blocking because we run this function periodically in a background fiber,\n  // so it will eventually grab the lock.\n  if (!global_lock.owns_lock())\n    return;\n\n  ShardId sid = EngineShard::tlocal()->shard_id();\n\n  vector<uint32_t> deleted;\n\n  for (auto [sync_id, replica_ptr] : replica_infos_) {\n    dfly::SharedLock replica_lock{replica_ptr->shared_mu};\n\n    if (!replica_ptr->flows[sid].saver)\n      continue;\n\n    // If saver is present - we are currently using it for full sync.\n    int64_t last_write_ns = replica_ptr->flows[sid].saver->GetLastWriteTime();\n    int64_t timeout_ns = int64_t(absl::GetFlag(FLAGS_replication_timeout)) * 1'000'000LL;\n    int64_t now = absl::GetCurrentTimeNanos();\n    if (last_write_ns > 0 && last_write_ns + timeout_ns < now) {\n      LOG(INFO) << \"Master detected replication timeout, breaking full sync with replica, sync_id: \"\n                << sync_id << \" last_write_ms: \" << last_write_ns / 1000'000\n                << \", now: \" << now / 1000'000;\n\n      deleted.push_back(sync_id);\n      replica_lock.unlock();\n      replica_ptr->Cancel();\n    }\n  }\n\n  for (auto sync_id : deleted)\n    replica_infos_.erase(sync_id);\n}\n\nshared_ptr<DflyCmd::ReplicaInfo> DflyCmd::GetReplicaInfo(uint32_t sync_id) {\n  util::fb2::LockGuard lk(mu_);\n\n  auto it = replica_infos_.find(sync_id);\n  if (it != replica_infos_.end())\n    return it->second;\n  return {};\n}\n\nstd::vector<ReplicaRoleInfo> DflyCmd::GetReplicasRoleInfo() const {\n  std::vector<ReplicaRoleInfo> vec;\n  util::fb2::LockGuard lk(mu_);\n\n  vec.reserve(replica_infos_.size());\n  map replication_lags = ReplicationLagsLocked();\n\n  for (const auto& [id, info] : replica_infos_) {\n    LSN lag = replication_lags[id];\n    SyncState state = SyncState::PREPARATION;\n\n    // If the replica state being updated, its lag is undefined,\n    // the same applies of course if its state is not STABLE_SYNC.\n    shared_lock lk(info->shared_mu, try_to_lock);\n    if (lk.owns_lock()) {\n      state = info->replica_state;\n      // If the replica is not in stable sync, its lag is undefined, so we set it to 0.\n      if (state != SyncState::STABLE_SYNC) {\n        lag = 0;\n      }\n    } else {\n      lag = 0;\n    }\n    vec.push_back(\n        ReplicaRoleInfo{info->id, info->address, info->listening_port, SyncStateName(state), lag});\n  }\n  return vec;\n}\n\nvoid DflyCmd::GetReplicationMemoryStats(ReplicationMemoryStats* stats) const {\n  atomic<size_t> streamer_bytes{0}, full_sync_bytes{0};\n\n  {\n    util::fb2::LockGuard lk{mu_};  // prevent state changes\n    auto cb = [&](EngineShard* shard) ABSL_NO_THREAD_SAFETY_ANALYSIS {\n      for (const auto& [_, info] : replica_infos_) {\n        dfly::SharedLock repl_lk{info->shared_mu};\n\n        // flows should not be empty.\n        DCHECK(!info->flows.empty());\n        if (info->flows.empty())\n          continue;\n\n        const auto& flow = info->flows[shard->shard_id()];\n        if (flow.streamer)\n          streamer_bytes.fetch_add(flow.streamer->UsedBytes(), memory_order_relaxed);\n        if (flow.saver)\n          full_sync_bytes.fetch_add(flow.saver->GetTotalBuffersSize(), memory_order_relaxed);\n      }\n    };\n    shard_set->RunBlockingInParallel(cb);\n  }\n  stats->streamer_buf_capacity_bytes += streamer_bytes.load(memory_order_relaxed);\n  stats->full_sync_buf_bytes += full_sync_bytes.load(memory_order_relaxed);\n}\n\npair<uint32_t, shared_ptr<DflyCmd::ReplicaInfo>> DflyCmd::GetReplicaInfoOrReply(\n    std::string_view id_str, CommandContext* cmd_cntx) {\n  uint32_t sync_id;\n  if (!ToSyncId(id_str, &sync_id)) {\n    cmd_cntx->SendError(kInvalidSyncId);\n    return {0, nullptr};\n  }\n\n  util::fb2::LockGuard lk(mu_);\n  auto sync_it = replica_infos_.find(sync_id);\n  if (sync_it == replica_infos_.end()) {\n    cmd_cntx->SendError(kIdNotFound);\n    return {0, nullptr};\n  }\n\n  return {sync_id, sync_it->second};\n}\n\nstd::map<uint32_t, LSN> DflyCmd::ReplicationLagsLocked() const {\n  DCHECK(!mu_.try_lock());  // expects to be under global lock\n  if (replica_infos_.empty())\n    return {};\n\n  // In each shard we calculate a map of replica id to replication lag in the shard.\n  std::vector<std::map<uint32_t, LSN>> shard_lags(shard_set->size());\n  shard_set->RunBriefInParallel([&shard_lags, this](EngineShard* shard) {\n    auto& lags = shard_lags[shard->shard_id()];\n    for (const auto& info : ABSL_TS_UNCHECKED_READ(replica_infos_)) {\n      const ReplicaInfo* replica = info.second.get();\n      if (shard->journal()) {\n        int64_t lag = journal::GetLsn() - replica->flows[shard->shard_id()].last_acked_lsn;\n        lags[info.first] = lag;\n      }\n    }\n  });\n\n  // Merge the maps from all shards and derive the maximum lag for each replica.\n  std::map<uint32_t, LSN> rv;\n  for (const auto& lags : shard_lags) {\n    for (auto [replica_id, lag] : lags) {\n      rv[replica_id] = std::max(rv[replica_id], lag);\n    }\n  }\n  return rv;\n}\n\nvoid DflyCmd::SetDflyClientVersion(ConnectionState* state, DflyVersion version) {\n  auto replica_ptr = GetReplicaInfo(state->replication_info.repl_session_id);\n  VLOG(1) << \"Client version for session_id=\" << state->replication_info.repl_session_id << \" is \"\n          << int(version);\n\n  replica_ptr->version = version;\n}\n\n// Must run under locked replica_info.mu.\n// TODO: it's a bad design that we enforce replies under a lock because Send can potentially\n// block, leading to high contention in some case. Split it and avoid replying under a lock.\nbool DflyCmd::CheckReplicaStateOrReply(const ReplicaInfo& repl_info, SyncState expected,\n                                       CommandContext* cmd_cntx) {\n  if (repl_info.replica_state != expected) {\n    cmd_cntx->SendError(kInvalidState);\n    return false;\n  }\n\n  // Check all flows are connected.\n  // This might happen if a flow abruptly disconnected before sending the SYNC request.\n  for (const FlowInfo& flow : repl_info.flows) {\n    if (!flow.conn) {\n      cmd_cntx->SendError(kInvalidState);\n      return false;\n    }\n  }\n\n  return true;\n}\n\nvoid DflyCmd::Shutdown() {\n  ReplicaInfoMap pending;\n  {\n    util::fb2::LockGuard lk(mu_);\n    pending = std::move(replica_infos_);\n  }\n\n  for (auto& [_, replica_ptr] : pending) {\n    replica_ptr->Cancel();\n  }\n}\n\nvoid FlowInfo::TryShutdownSocket() {\n  // Close socket for clean disconnect.\n  if (conn->socket()->IsOpen()) {\n    std::ignore = conn->socket()->Shutdown(SHUT_RDWR);\n  }\n}\n\nFlowInfo::~FlowInfo() {\n}\n\nFlowInfo::FlowInfo() {\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/dflycmd.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/btree_map.h>\n\n#include <atomic>\n#include <memory>\n\n#include \"server/conn_context.h\"\n#include \"server/execution_state.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace facade {\nclass RedisReplyBuilder;\n}  // namespace facade\n\nnamespace util {\nclass ListenerInterface;\n}  // namespace util\n\nnamespace dfly {\n\nclass EngineShardSet;\nclass ServerFamily;\nclass RdbSaver;\nclass JournalStreamer;\nstruct ReplicaRoleInfo;\nstruct ReplicationMemoryStats;\n\n// Stores information related to a single flow.\nstruct FlowInfo {\n  FlowInfo();\n  ~FlowInfo();\n\n  // Shutdown associated socket if its still open.\n  void TryShutdownSocket();\n\n  facade::Connection* conn = nullptr;\n\n  std::unique_ptr<RdbSaver> saver;            // Saver for full sync phase.\n  std::unique_ptr<JournalStreamer> streamer;  // Streamer for stable sync phase\n  std::string eof_token;\n\n  DflyVersion version = DflyVersion::VER1;\n\n  std::optional<LSN> start_partial_sync_at;\n  uint64_t last_acked_lsn = 0;\n\n  std::function<void()> cleanup;  // Optional cleanup for cancellation.\n};\n\n// DflyCmd is responsible for managing replication. A master instance can be connected\n// to many replica instances, what is more, each of them can open multiple connections.\n// This is why its important to understand replica lifecycle management before making\n// any crucial changes.\n//\n// A ReplicaInfo instance is responsible for managing a replica's state and is accessible by its\n// sync_id. Each per-thread connection is called a Flow and is represented by the FlowInfo\n// instance, accessible by its index.\n//\n// An important aspect is synchronization and efficient locking. Two levels of locking are used:\n//  1. Global locking.\n//    Member  mutex `mu_` is used for synchronizing operations connected with internal data\n//    structures.\n//  2. Per-replica locking\n//    ReplicaInfo contains a separate mutex that is used for replica-only routines. It is held\n//    during state transitions (start full sync, start stable state sync), cancellation and member\n//    access.\n//\n// Upon first connection from the replica, a new ReplicaInfo is created.\n// It transitions through the following phases:\n//  1. Preparation\n//    During this start phase the \"flows\" are set up - one connection for every master thread. Those\n//    connections registered by the FLOW command sent from each newly opened connection.\n//  2. Full sync\n//    This phase is initiated by the SYNC command. It makes sure all flows are connected and the\n//    replica is in a valid state.\n//  3. Stable state sync\n//    After the replica has received confirmation, that each flow is ready to transition, it sends a\n//    STARTSTABLE command. This transitions the replica into streaming journal changes.\n//  4. Cancellation\n//    This can happed due to an error at any phase or through a normal abort. For properly releasing\n//    resources we need to run a multi-step cancellation procedure:\n//    1. Transition state\n//      We obtain the ReplicaInfo lock, transition into the cancelled state and cancel the context.\n//    2. Joining tasks\n//      Running tasks will stop on receiving the cancellation flag. Each FlowInfo has also an\n//      optional cleanup handler, that is invoked after cancelling. This should allow recovering\n//      from any state. The flows task will be awaited and joined if present.\n//    3. Unlocking the mutex\n//      Now that all tasks have finished and all cleanup handlers have run, we can safely release\n//      the per-replica mutex, so that all OnClose handlers will unblock and  internal resources\n//      will be released by dragonfly. Then the ReplicaInfo is removed from the global map.\n//\n//\nclass DflyCmd {\n public:\n  // See class comments for state descriptions.\n  enum class SyncState { PREPARATION, FULL_SYNC, STABLE_SYNC, CANCELLED };\n\n  // Stores information related to a single replica.\n  struct ABSL_LOCKABLE ReplicaInfo {\n    ReplicaInfo(unsigned flow_count, std::string address, uint32_t listening_port,\n                ExecutionState::ErrHandler err_handler)\n        : replica_state{SyncState::PREPARATION},\n          exec_st{std::move(err_handler)},\n          address{std::move(address)},\n          listening_port(listening_port),\n          flows{flow_count} {\n    }\n\n    // Transition into cancelled state, run cleanup.\n    void Cancel();\n\n    SyncState replica_state;  // always guarded by shared_mu\n    ExecutionState exec_st;\n\n    std::string id;\n    std::string address;\n    uint32_t listening_port;\n    DflyVersion version = DflyVersion::VER1;\n\n    // Flows describe the state of shard-local flow.\n    // They are always indexed by the shard index on the master.\n    std::vector<FlowInfo> flows;\n\n    util::fb2::SharedMutex shared_mu;  // See top of header for locking levels.\n  };\n\n public:\n  DflyCmd(ServerFamily* server_family);\n\n  void Run(CmdArgList args, CommandContext* cmd_cntx);\n\n  void OnClose(unsigned sync_id);\n\n  // Stop all background processes so we can exit in orderly manner.\n  void Shutdown();\n\n  // Create new sync session. Returns (session_id, number of flows)\n  std::pair<uint32_t, unsigned> CreateSyncSession(ConnectionState* state) ABSL_LOCKS_EXCLUDED(mu_);\n\n  // Master side access method to replication info of that connection.\n  std::shared_ptr<ReplicaInfo> GetReplicaInfoFromConnection(ConnectionState* state);\n\n  // Master-side command. Provides Replica info.\n  std::vector<ReplicaRoleInfo> GetReplicasRoleInfo() const ABSL_LOCKS_EXCLUDED(mu_);\n\n  void GetReplicationMemoryStats(ReplicationMemoryStats* out) const ABSL_NO_THREAD_SAFETY_ANALYSIS;\n\n  // Sets metadata.\n  void SetDflyClientVersion(ConnectionState* state, DflyVersion version);\n\n  // Tries to break those flows that stuck on socket write for too long time.\n  void BreakStalledFlowsInShard() ABSL_NO_THREAD_SAFETY_ANALYSIS;\n\n private:\n  // JOURNAL [START/STOP]\n  // Start or stop journaling.\n  // void Journal(CmdArgList args, ConnectionContext* cntx);\n\n  // THREAD [to_thread]\n  // Return connection thread index or migrate to another thread.\n  void Thread(CmdArgList args, CommandContext* cmd_cntx);\n\n  // FLOW <masterid> <syncid> <flowid> [<seqid>]\n  // Register connection as flow for sync session.\n  // If seqid is given, it means the client wants to try partial sync.\n  // If it is possible, return Ok and prepare for a partial sync, else\n  // return error and ask the replica to execute FLOW again.\n  void Flow(CmdArgList args, CommandContext* cmd_cntx);\n\n  // SYNC <syncid>\n  // Initiate full sync.\n  void Sync(CmdArgList args, CommandContext* cmd_cntx);\n\n  // STARTSTABLE <syncid>\n  // Switch to stable state replication.\n  void StartStable(CmdArgList args, CommandContext* cmd_cntx);\n  // TAKEOVER <syncid>\n  // Shut this master down atomically with replica promotion.\n  void TakeOver(CmdArgList args, CommandContext* cmd_cntx);\n\n  // EXPIRE\n  // Check all keys for expiry.\n  void Expire(CmdArgList args, CommandContext* cmd_cntx);\n\n  // REPLICAOFFSET\n  // Return journal records num sent for each flow of replication.\n  void ReplicaOffset(CmdArgList args, CommandContext* cmd_cntx);\n\n  void Load(CmdArgList args, CommandContext* cmd_cntx);\n\n  // Start full sync in thread. Start FullSyncFb. Called for each flow.\n  facade::OpStatus StartFullSyncInThread(FlowInfo* flow, ExecutionState* cntx, EngineShard* shard);\n\n  // Stop full sync in thread. Run state switch cleanup.\n  facade::OpStatus StopFullSyncInThread(FlowInfo* flow, ExecutionState* cntx, EngineShard* shard);\n\n  // Start stable sync in thread. Called for each flow.\n  void StartStableSyncInThread(FlowInfo* flow, ExecutionState* cntx, EngineShard* shard);\n\n  // Get ReplicaInfo by sync_id.\n  std::shared_ptr<ReplicaInfo> GetReplicaInfo(uint32_t sync_id) ABSL_LOCKS_EXCLUDED(mu_);\n\n  // Find sync info by id or send error reply.\n  std::pair<uint32_t, std::shared_ptr<ReplicaInfo>> GetReplicaInfoOrReply(std::string_view id,\n                                                                          CommandContext* cmd_cntx)\n      ABSL_LOCKS_EXCLUDED(mu_);\n\n  // Check replica is in expected state and flows are set-up correctly.\n  bool CheckReplicaStateOrReply(const ReplicaInfo& ri, SyncState expected,\n                                CommandContext* cmd_cntx);\n\n  // Main entrypoint for stopping replication.\n  void StopReplication(uint32_t sync_id) ABSL_LOCKS_EXCLUDED(mu_);\n\n  std::optional<LSN> ParseLsnVec(std::string_view lsn_vec, size_t last_journal_lsn_size,\n                                 size_t flow_id, CommandContext* cmd_cntx);\n\n  // Checks if LSN exists in the partial sync buffer. If not, also LOG that we can't\n  // partial sync.\n  bool IsLSNInPartialSyncBuffer(LSN lsn) const;\n\n  // Return a map between replication ID to lag. lag is defined as the maximum of difference\n  // between the master's LSN and the last acknowledged LSN in over all shards.\n  std::map<uint32_t, LSN> ReplicationLagsLocked() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);\n\n  ServerFamily* sf_;  // Not owned\n  uint32_t next_sync_id_ = 1;\n\n  using ReplicaInfoMap = absl::btree_map<uint32_t, std::shared_ptr<ReplicaInfo>>;\n  ReplicaInfoMap replica_infos_ ABSL_GUARDED_BY(mu_);\n\n  mutable util::fb2::Mutex mu_;  // Guard global operations. See header top for locking levels.\n};\n\nstd::string_view SyncStateName(DflyCmd::SyncState sync_state);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/dragonfly_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\nextern \"C\" {\n#include \"redis/sds.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#include <absl/strings/ascii.h>\n#include <absl/strings/str_join.h>\n#include <absl/strings/strip.h>\n#include <gmock/gmock.h>\n#include <reflex/matcher.h>\n\n#include \"base/flags.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/main_service.h\"\n#include \"server/test_utils.h\"\n\nABSL_DECLARE_FLAG(float, mem_defrag_threshold);\nABSL_DECLARE_FLAG(float, mem_defrag_waste_threshold);\nABSL_DECLARE_FLAG(uint32_t, mem_defrag_check_sec_interval);\nABSL_DECLARE_FLAG(std::vector<std::string>, rename_command);\nABSL_DECLARE_FLAG(bool, lua_resp2_legacy_float);\nABSL_DECLARE_FLAG(double, eviction_memory_budget_threshold);\nABSL_DECLARE_FLAG(std::vector<std::string>, command_alias);\nABSL_DECLARE_FLAG(bool, latency_tracking);\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace util;\nusing absl::SetFlag;\nusing absl::StrCat;\nusing fb2::Fiber;\nusing ::io::Result;\nusing testing::AnyOf;\nusing testing::Contains;\nusing testing::ElementsAre;\nusing testing::HasSubstr;\nusing testing::Key;\nusing testing::Pair;\n\nnamespace {\n\nconstexpr unsigned kPoolThreadCount = 4;\n\nconst char kKey1[] = \"x\";\nconst char kKey2[] = \"b\";\n\nconst char kKeySid0[] = \"x\";\nconst char kKeySid1[] = \"c\";\nconst char kKeySid2[] = \"b\";\n\n}  // namespace\n\n// This test is responsible for server and main service\n// (connection, transaction etc) families.\nclass DflyEngineTest : public BaseFamilyTest {\n protected:\n  DflyEngineTest() {\n    num_threads_ = kPoolThreadCount;\n  }\n};\n\nclass DflyEngineTestWithRegistry : public BaseFamilyTest {\n protected:\n  DflyEngineTestWithRegistry() {\n    num_threads_ = kPoolThreadCount;\n    ResetService();\n  }\n};\n\nclass SingleThreadDflyEngineTest : public BaseFamilyTest {\n protected:\n  SingleThreadDflyEngineTest() {\n    num_threads_ = 1;\n  }\n};\n\nclass DefragDflyEngineTest : public SingleThreadDflyEngineTest {};\n\n// TODO: to implement equivalent parsing in redis parser.\nTEST_F(DflyEngineTest, Sds) {\n  int argc;\n  sds* argv = sdssplitargs(\"\\r\\n\", &argc);\n  EXPECT_EQ(0, argc);\n  sdsfreesplitres(argv, argc);\n\n  argv = sdssplitargs(\"\\026 \\020 \\200 \\277 \\r\\n\", &argc);\n  EXPECT_EQ(4, argc);\n  EXPECT_STREQ(\"\\026\", argv[0]);\n  sdsfreesplitres(argv, argc);\n\n  argv = sdssplitargs(R\"(abc \"oops\\n\" )\"\n                      \"\\r\\n\",\n                      &argc);\n  EXPECT_EQ(2, argc);\n  EXPECT_STREQ(\"oops\\n\", argv[1]);\n  sdsfreesplitres(argv, argc);\n\n  argv = sdssplitargs(R\"( \"abc\\xf0\" )\"\n                      \"\\t'oops\\n'  \\r\\n\",\n                      &argc);\n  ASSERT_EQ(2, argc);\n  EXPECT_STREQ(\"abc\\xf0\", argv[0]);\n  EXPECT_STREQ(\"oops\\n\", argv[1]);\n  sdsfreesplitres(argv, argc);\n}\n\nclass DflyRenameCommandTest : public DflyEngineTest {\n protected:\n  DflyRenameCommandTest() {\n    // rename flushall to myflushall, flushdb command will not be able to execute\n    absl::SetFlag(\n        &FLAGS_rename_command,\n        std::vector<std::string>({\"flushall=myflushall\", \"flushdb=\", \"ping=abcdefghijklmnop\"}));\n  }\n\n  absl::FlagSaver _saver;\n};\n\nTEST_F(DflyRenameCommandTest, RenameCommand) {\n  Run({\"set\", \"a\", \"1\"});\n  ASSERT_EQ(1, CheckedInt({\"dbsize\"}));\n  // flushall should not execute anything and should return error, as it was renamed.\n  ASSERT_THAT(Run({\"flushall\"}), ErrArg(\"unknown command `FLUSHALL`\"));\n\n  ASSERT_EQ(1, CheckedInt({\"dbsize\"}));\n\n  ASSERT_EQ(Run({\"myflushall\"}), \"OK\");\n\n  ASSERT_EQ(0, CheckedInt({\"dbsize\"}));\n\n  ASSERT_THAT(Run({\"flushdb\", \"0\"}), ErrArg(\"unknown command `FLUSHDB`\"));\n\n  ASSERT_THAT(Run({\"\"}), ErrArg(\"unknown command ``\"));\n\n  ASSERT_THAT(Run({\"ping\"}), ErrArg(\"unknown command `PING`\"));\n  ASSERT_THAT(Run({\"abcdefghijklmnop\"}), \"PONG\");\n}\n\nTEST_F(SingleThreadDflyEngineTest, GlobalSingleThread) {\n  Run({\"set\", \"a\", \"1\"});\n  Run({\"move\", \"a\", \"1\"});\n}\n\nTEST_F(DflyEngineTest, LuaErrors) {\n  auto resp = Run({\"eval\", \"return redis.error_reply('some error')\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"some error\"));\n\n  resp = Run({\"eval\", \"return redis.pcall('foo', 'bar')\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR unknown command\"));\n\n  resp = Run({\"eval\", \"return redis.pcall('incrby', 'foo', 'bar')\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Number of keys can't be greater than number of args\"));\n}\n\nTEST_F(DflyEngineTest, EvalResp) {\n  auto resp = Run({\"eval\", \"return 43\", \"0\"});\n  EXPECT_THAT(resp, IntArg(43));\n\n  resp = Run({\"eval\", \"return {5, 'foo', 17.5}\", \"0\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(5), \"foo\", \"17.5\"));\n\n  resp = Run({\"eval\", \"return {map={a=1,b=2}}\", \"0\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  EXPECT_THAT(resp.GetVec(), AnyOf(ElementsAre(\"a\", IntArg(1), \"b\", IntArg(2)),\n                                   ElementsAre(\"b\", IntArg(2), \"a\", IntArg(1))));\n}\n\nTEST_F(DflyEngineTest, EvalPublish) {\n  auto resp = pp_->at(1)->Await([&] { return Run({\"subscribe\", \"foo\"}); });\n  EXPECT_THAT(resp, ArrLen(3));\n\n  resp = Run({\"eval\", \"return redis.call('publish', 'foo', 'bar')\", \"0\"});\n  EXPECT_THAT(resp, IntArg(1));\n}\n\nTEST_F(DflyEngineTest, EvalBug59) {\n  auto resp = Run({\"eval\", R\"(\nlocal epoch\nif redis.call('exists', KEYS[2]) ~= 0 then\n  epoch = redis.call(\"hget\", KEYS[2], \"e\")\nend\nif epoch == false or epoch == nil then\n  epoch = ARGV[6]\n  redis.call(\"hset\", KEYS[2], \"e\", epoch)\nend\nlocal offset = redis.call(\"hincrby\", KEYS[2], \"s\", 1)\nif ARGV[5] ~= '0' then\n\tredis.call(\"expire\", KEYS[2], ARGV[5])\nend\nredis.call(\"xadd\", KEYS[1], \"MAXLEN\", ARGV[2], offset, \"d\", ARGV[1])\nredis.call(\"expire\", KEYS[1], ARGV[3])\nif ARGV[4] ~= '' then\n\tlocal payload = \"__\" .. \"p1:\" .. offset .. \":\" .. epoch .. \"__\" .. ARGV[1]\n\tredis.call(\"publish\", ARGV[4], payload)\nend\n\nreturn {offset, epoch}\n    )\",\n                   \"2\", \"x\", \"y\", \"1\", \"2\", \"3\", \"4\", \"5\", \"6\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(1), \"6\"));\n}\n\n// Scenario: 1. a lua call A schedules itself on shards 0, 1, 2.\n//           2. another lua call B schedules itself on shards 1,2 but on shard 1 (or 2) it\n//              schedules itself before A.\n//              the order of scheduling: shard 0: A, shard 1: B, A. shard 2: B, A.\n//           3. A is executes its first command first, which coincendently runs only on shard 0,\n//              hence A finishes before B and then it tries to cleanup.\n//           4. There was an incorrect cleanup of multi-transactions that breaks for shard 1 (or 2)\n//              because it assume the A is at front of the queue.\nTEST_F(DflyEngineTest, EvalBug713) {\n  const char* script = \"return redis.call('get', KEYS[1])\";\n\n  // A\n  auto fb0 = pp_->at(1)->LaunchFiber([&] {\n    ThisFiber::Yield();\n    for (unsigned i = 0; i < 50; ++i) {\n      Run({\"eval\", script, \"3\", kKeySid0, kKeySid1, kKeySid2});\n    }\n  });\n\n  // B\n  for (unsigned j = 0; j < 50; ++j) {\n    Run({\"eval\", script, \"2\", kKeySid1, kKeySid2});\n  }\n  fb0.Join();\n}\n\n// Tests deadlock that happenned due to a fact that trans->Schedule was called\n// before interpreter->Lock().\n//\n// The problematic scenario:\n// 1. transaction 1 schedules itself and blocks on an interpreter lock\n// 2. transaction 2 schedules itself, but meanwhile an interpreter unlocks itself and\n//    transaction 2 grabs the lock but can not progress due to transaction 1 already\n//    scheduled before.\nTEST_F(DflyEngineTest, EvalBug713b) {\n  const char* script = \"return redis.call('get', KEYS[1])\";\n\n  const uint32_t kNumFibers = 20;\n  Fiber fibers[kNumFibers];\n\n  for (unsigned j = 0; j < kNumFibers; ++j) {\n    fibers[j] = pp_->at(1)->LaunchFiber([j, script, this] {\n      for (unsigned i = 0; i < 50; ++i) {\n        Run(StrCat(\"fb\", j), {\"eval\", script, \"3\", kKeySid0, kKeySid1, kKeySid2});\n      }\n    });\n  }\n\n  for (unsigned j = 0; j < kNumFibers; ++j) {\n    fibers[j].Join();\n  }\n}\n\nTEST_F(DflyEngineTest, EvalSha) {\n  auto resp = Run({\"script\", \"load\", \"return 5\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::STRING));\n\n  string sha{ToSV(resp.GetBuf())};\n\n  resp = Run({\"evalsha\", sha, \"0\"});\n  EXPECT_THAT(resp, IntArg(5));\n\n  absl::AsciiStrToUpper(&sha);\n  resp = Run({\"evalsha\", sha, \"0\"});\n  EXPECT_THAT(resp, IntArg(5));\n\n  resp = Run({\"evalsha\", \"foobar\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"No matching\"));\n\n  resp = Run({\"evalsha\", \"\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"No matching\"));\n\n  resp = Run({\"script\", \"load\", \"\\n return 5\"});\n\n  // Important to keep spaces in order to be compatible with Redis.\n  // See https://github.com/dragonflydb/dragonfly/issues/146\n  EXPECT_THAT(resp, \"c6459b95a0e81df97af6fdd49b1a9e0287a57363\");\n}\n\nTEST_F(DflyEngineTest, ScriptFlush) {\n  auto resp = Run({\"script\", \"load\", \"return 5\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::STRING));\n  string sha{ToSV(resp.GetBuf())};\n  resp = Run({\"evalsha\", sha, \"0\"});\n  EXPECT_THAT(5, resp.GetInt());\n  resp = Run({\"script\", \"exists\", sha});\n  EXPECT_THAT(1, resp.GetInt());\n\n  resp = Run({\"script\", \"flush\"});\n  resp = Run({\"script\", \"exists\", sha});\n  EXPECT_THAT(0, resp.GetInt());\n  EXPECT_THAT(Run({\"evalsha\", sha, \"0\"}), ErrArg(\"NOSCRIPT No matching script. Please use EVAL.\"));\n\n  resp = Run({\"script\", \"load\", \"return 5\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::STRING));\n  sha = string{ToSV(resp.GetBuf())};\n  resp = Run({\"evalsha\", sha, \"0\"});\n  EXPECT_THAT(5, resp.GetInt());\n  resp = Run({\"script\", \"exists\", sha});\n  EXPECT_THAT(1, resp.GetInt());\n}\n\nTEST_F(DflyEngineTestWithRegistry, Hello) {\n  auto resp = Run({\"hello\"});\n  ASSERT_THAT(resp, ArrLen(14));\n  resp = Run({\"hello\", \"2\"});\n  ASSERT_THAT(resp, ArrLen(14));\n\n  EXPECT_THAT(\n      resp.GetVec(),\n      ElementsAre(\"server\", \"redis\", \"version\", \"7.4.0\", \"dragonfly_version\",\n                  ArgType(RespExpr::STRING), \"proto\", IntArg(2), \"id\", ArgType(RespExpr::INT64),\n                  \"mode\", testing::AnyOf(\"standalone\", \"cluster\"), \"role\", \"master\"));\n\n  resp = Run({\"hello\", \"3\"});\n  ASSERT_THAT(resp, ArrLen(14));\n  EXPECT_THAT(\n      resp.GetVec(),\n      ElementsAre(\"server\", \"redis\", \"version\", \"7.4.0\", \"dragonfly_version\",\n                  ArgType(RespExpr::STRING), \"proto\", IntArg(3), \"id\", ArgType(RespExpr::INT64),\n                  \"mode\", testing::AnyOf(\"standalone\", \"cluster\"), \"role\", \"master\"));\n\n  EXPECT_THAT(Run({\"hello\", \"2\", \"AUTH\", \"uname\", \"pwd\"}),\n              ErrArg(\"WRONGPASS invalid username-password pair or user is disabled.\"));\n\n  EXPECT_THAT(Run({\"hello\", \"2\", \"AUTH\", \"default\", \"pwd\"}),\n              ErrArg(\"WRONGPASS invalid username-password pair or user is disabled.\"));\n\n  resp = Run({\"hello\", \"3\", \"AUTH\", \"default\", \"\"});\n  ASSERT_THAT(resp, ErrArg(\"WRONGPASS invalid username-password pair or user is disabled.\"));\n\n  TestInitAclFam();\n\n  resp = Run({\"hello\", \"3\", \"AUTH\", \"default\", \"tmp\"});\n  ASSERT_THAT(resp, ArrLen(14));\n\n  resp = Run({\"hello\", \"3\", \"AUTH\", \"default\", \"tmp\", \"SETNAME\", \"myname\"});\n  ASSERT_THAT(resp, ArrLen(14));\n}\n\nusing MP = MemcacheParser;\n\nTEST_F(DflyEngineTest, Memcache) {\n#if 0\n  auto resp = RunMC(MP::SET, \"key\", MCArgs{\"bar\", 1});\n  EXPECT_THAT(resp, ElementsAre(\"STORED\"));\n\n  resp = RunMC(MP::GETS, \"key\");\n  EXPECT_THAT(resp, ElementsAre(\"VALUE key 1 3 0\", \"bar\", \"END\"));\n\n  resp = RunMC(MP::GET, \"key\");\n  EXPECT_THAT(resp, ElementsAre(\"VALUE key 1 3\", \"bar\", \"END\"));\n\n  resp = RunMC(MP::ADD, \"key\", MCArgs{\"bar\", 1});\n  EXPECT_THAT(resp, ElementsAre(\"NOT_STORED\"));\n\n  resp = RunMC(MP::REPLACE, \"key2\", MCArgs{\"bar\", 1});\n  EXPECT_THAT(resp, ElementsAre(\"NOT_STORED\"));\n\n  resp = RunMC(MP::ADD, \"key2\", MCArgs{\"bar2\", 2});\n  EXPECT_THAT(resp, ElementsAre(\"STORED\"));\n\n  resp = GetMC(MP::GET, {\"key2\", \"key\"});\n  EXPECT_THAT(resp, ElementsAre(\"VALUE key2 2 4\", \"bar2\", \"VALUE key 1 3\", \"bar\", \"END\"));\n\n  resp = RunMC(MP::APPEND, \"key2\", MCArgs{\"val2\", 0});\n  EXPECT_THAT(resp, ElementsAre(\"STORED\"));\n  resp = RunMC(MP::GET, \"key2\");\n  EXPECT_THAT(resp, ElementsAre(\"VALUE key2 2 8\", \"bar2val2\", \"END\"));\n\n  resp = RunMC(MP::APPEND, \"unkn\", MCArgs{\"val2\", 0});\n  EXPECT_THAT(resp, ElementsAre(\"NOT_STORED\"));\n\n  resp = RunMC(MP::GET, \"unkn\");\n  EXPECT_THAT(resp, ElementsAre(\"END\"));\n\n  resp = GetMC(MP::GETS, {\"key\", \"key2\", \"unknown\"});\n  EXPECT_THAT(resp, ElementsAre(\"VALUE key 1 3 0\", \"bar\", \"VALUE key2 2 8 0\", \"bar2val2\", \"END\"));\n\n  EXPECT_THAT(RunMC(MP::SET, \"foo\", MCArgs{\"bar\"}), ElementsAre(\"STORED\"));\n\n  EXPECT_THAT(RunMC(MP::SET, \"foo\", MCArgs{\"bar\"}), ElementsAre(\"STORED\"));\n\n  // 30 seconds into the future\n  auto future_ts = time(nullptr) + 30;\n  EXPECT_THAT(GetMC(MP::GAT, {StrCat(future_ts), \"foo\", \"abc\", \"def\", \"ghi\"}),\n              ElementsAre(\"VALUE foo 0 3\", \"bar\", \"END\"));\n\n  EXPECT_THAT(GetMC(MP::GAT, {\"1000\"}),\n              ElementsAre(\"SERVER_ERROR wrong number of arguments for 'gat' command\"));\n#endif\n  EXPECT_THAT(RunMC(MP::SET, \"persisted-key\", MCArgs{\"bar\"}), ElementsAre(\"STORED\"));\n  // expiry of 0 removes the key expiry\n  EXPECT_THAT(GetMC(MP::GAT, {\"0\", \"persisted-key\"}),\n              ElementsAre(\"VALUE persisted-key 0 3\", \"bar\", \"END\"));\n}\n\nTEST_F(DflyEngineTest, MemcacheIncr) {\n  auto resp = RunMC(MP::INCR, \"key\", MCArgs{1});\n  EXPECT_THAT(resp, ElementsAre(\"NOT_FOUND\"));\n  resp = RunMC(MP::SET, \"key\", MCArgs{\"1\"});\n  EXPECT_THAT(resp, ElementsAre(\"STORED\"));\n  resp = RunMC(MP::INCR, \"key\", MCArgs{5});\n  EXPECT_THAT(resp, ElementsAre(\"6\"));\n}\n\nTEST_F(DflyEngineTest, MemcacheFlags) {\n  using MP = MemcacheParser;\n\n  auto resp = Run(\"resp\", {\"SET\", \"key\", \"bar\", \"_MCFLAGS\", \"42\"});\n  ASSERT_EQ(resp, \"OK\");\n  MCResponse resp2 = RunMC(MP::GET, \"key\");\n  EXPECT_THAT(resp2, ElementsAre(\"VALUE key 42 3\", \"bar\", \"END\"));\n\n  ASSERT_EQ(Run(\"resp\", {\"flushdb\"}), \"OK\");\n  pp_->AwaitFiberOnAll([](auto*) {\n    if (auto* shard = EngineShard::tlocal(); shard) {\n      EXPECT_EQ(namespaces->GetDefaultNamespace()\n                    .GetDbSlice(shard->shard_id())\n                    .GetDBTable(0)\n                    ->mcflag.size(),\n                0u);\n    }\n  });\n}\n\nTEST_F(DflyEngineTest, LimitMemory) {\n  mi_option_enable(mi_option_limit_os_alloc);\n  string blob(128, 'a');\n  for (size_t i = 0; i < 10000; ++i) {\n    auto resp = Run({\"set\", absl::StrCat(blob, i), blob});\n    ASSERT_EQ(resp, \"OK\");\n  }\n}\n\nTEST_F(DflyEngineTest, FlushAll) {\n  auto fb0 = pp_->at(0)->LaunchFiber([&] { Run({\"flushall\"}); });\n\n  auto fb1 = pp_->at(1)->LaunchFiber([&] {\n    Run({\"select\", \"2\"});\n\n    for (size_t i = 1; i < 100; ++i) {\n      RespExpr resp = Run({\"set\", \"foo\", \"bar\"});\n      ASSERT_EQ(resp, \"OK\");\n      ThisFiber::Yield();\n    }\n  });\n\n  fb0.Join();\n  fb1.Join();\n}\n\nTEST_F(DflyEngineTest, OOM) {\n  max_memory_limit = 300000;\n  size_t i = 0;\n  RespExpr resp;\n  for (; i < 10000; i += 3) {\n    resp = Run({\"mset\", StrCat(\"key\", i), \"bar\", StrCat(\"key\", i + 1), \"bar\", StrCat(\"key\", i + 2),\n                \"bar\"});\n    if (resp != \"OK\")\n      break;\n    ASSERT_EQ(resp, \"OK\");\n  }\n  EXPECT_THAT(resp, ErrArg(\"Out of mem\"));\n\n  string_view commands[5] = {\"set\", \"rpush\", \"sadd\", \"zadd\", \"hset\"};\n  for (unsigned j = 0; j < ABSL_ARRAYSIZE(commands); ++j) {\n    string_view cmd = commands[j];\n    vector<string_view> run_args({cmd, \"\"});\n    if (cmd == \"zadd\") {\n      run_args.push_back(\"1.1\");\n    } else if (cmd == \"hset\") {\n      run_args.push_back(\"foo\");\n    }\n    run_args.push_back(\"bar\");\n\n    for (unsigned i = 0; i < 5000; ++i) {\n      auto str = StrCat(\"key\", cmd, i);\n      run_args[1] = str;\n      resp = Run(run_args);\n\n      if (resp.type == RespExpr::ERROR)\n        break;\n\n      ASSERT_THAT(resp, testing::AnyOf(IntArg(1), \"OK\")) << cmd;\n    }\n    EXPECT_THAT(resp, ErrArg(\"Out of mem\"));\n  }\n}\n\n/// Reproduces the case where items with expiry data were evicted,\n/// and then written with the same key.\nTEST_F(DflyEngineTest, Bug207) {\n  max_memory_limit = 300000 * 4;\n\n  // The threshold is set to 0.3 to trigger eviction earlier and prevent OOM.\n  absl::FlagSaver fs;\n  absl::SetFlag(&FLAGS_eviction_memory_budget_threshold, 0.3);\n\n  shard_set->TEST_EnableCacheMode();\n\n  /* The value should be large enough to avoid being inlined. Heartbeat evicts only objects for\n   * which HasAllocated() returns true. */\n  std::string value(1000, '.');\n\n  ssize_t i = 0;\n  RespExpr resp;\n  for (; i < 1000; ++i) {\n    resp = Run({\"setex\", StrCat(\"key\", i), \"30\", value});\n    ASSERT_EQ(resp, \"OK\");\n  }\n\n  auto metrics = GetMetrics();\n  EXPECT_GT(metrics.events.evicted_keys, 0) << FormatMetrics(metrics);\n\n  for (; i > 0; --i) {\n    resp = Run({\"setex\", StrCat(\"key\", i), \"30\", \"bar\"});\n    ASSERT_EQ(resp, \"OK\");\n  }\n}\n\nTEST_F(DflyEngineTest, StickyEviction) {\n  max_memory_limit = 600000;  // 0.6mb\n  shard_set->TEST_EnableCacheMode();\n\n  string tmp_val(100, '.');\n\n  ssize_t failed = -1;\n\n  for (ssize_t i = 0; i < 4500; ++i) {\n    string key = StrCat(\"volatile\", i);\n    ASSERT_EQ(\"OK\", Run({\"set\", key, tmp_val}));\n    usleep(1);\n  }\n\n  bool done = false;\n  for (ssize_t i = 0; !done && i < 5000; ++i) {\n    string key = StrCat(\"key\", i);\n    while (true) {\n      if (Run({\"set\", key, tmp_val}) != \"OK\") {\n        failed = i;\n        done = true;\n        break;\n      }\n\n      // Eviction could have happened right after set, before stick. If so, try again\n      if (Run({\"stick\", key}).GetInt() == 1) {\n        break;\n      }\n    }\n  }\n\n  ASSERT_GE(failed, 0);\n  // Make sure none of the sticky values was evicted\n  for (ssize_t i = 0; i < failed; ++i) {\n    ASSERT_THAT(Run({\"exists\", StrCat(\"key\", i)}), IntArg(1));\n  }\n}\n\nTEST_F(DflyEngineTest, ZeroAllocationEviction) {\n  max_memory_limit = 500000;  // 0.5mb\n  shard_set->TEST_EnableCacheMode();\n\n  // Create entries with zero-allocation values (small integers)\n  // but with long keys to consume memory\n  string long_key_prefix(50, 'k');  // 50 character prefix\n\n  vector<string> keys;\n  int successful_sets = 0;\n  for (int i = 0; i < 1000; ++i) {\n    string key = StrCat(long_key_prefix, i);\n    auto result = Run({\"set\", key, to_string(i)});  // small integer value\n    if (result == \"OK\") {\n      keys.emplace_back(key);\n      successful_sets++;\n    } else {\n      break;  // Stop when we hit memory limit\n    }\n  }\n\n  ASSERT_GT(successful_sets, 10) << \"Should be able to set at least some keys\";\n\n  // Fill up more memory to trigger eviction\n  string large_value(500, 'v');\n  for (int i = 0; i < 500; ++i) {\n    string key = StrCat(\"trigger\", i);\n    Run({\"set\", key, large_value});  // This will trigger eviction\n  }\n\n  // Verify that some zero-allocation entries were evicted\n  int evicted_count = 0;\n  for (const string& key : keys) {\n    if (Run({\"exists\", key}).GetInt() == 0) {\n      evicted_count++;\n    }\n  }\n\n  // Should have evicted some entries with zero-allocation values\n  // but not external (disk storage) entries\n  EXPECT_GT(evicted_count, 0) << \"Zero-allocation entries should be evicted under memory pressure\";\n}\n\nTEST_F(DflyEngineTest, PSubscribe) {\n  single_response_ = false;\n  auto resp = pp_->at(1)->Await([&] { return Run({\"psubscribe\", \"a*\", \"b*\"}); });\n  EXPECT_THAT(resp, ArrLen(3));\n  resp = pp_->at(0)->Await([&] { return Run({\"publish\", \"ab\", \"foo\"}); });\n  EXPECT_THAT(resp, IntArg(1));\n\n  pp_->AwaitFiberOnAll([](ProactorBase* pb) {});\n\n  ASSERT_EQ(1, SubscriberMessagesLen(\"IO1\"));\n\n  const auto& msg = GetPublishedMessage(\"IO1\", 0);\n  EXPECT_EQ(\"foo\", msg.message);\n  EXPECT_EQ(\"ab\", msg.channel);\n  EXPECT_EQ(\"a*\", msg.pattern);\n}\n\nTEST_F(DflyEngineTest, PSubscribeMatchOnlyStar) {\n  single_response_ = false;\n  auto resp = pp_->at(1)->Await([&] { return Run({\"psubscribe\", \"*\"}); });\n  EXPECT_THAT(resp, ArrLen(3));\n  resp = pp_->at(0)->Await([&] { return Run({\"PUBLISH\", \"1234567890123456\", \"abc\"}); });\n  EXPECT_THAT(resp, IntArg(1));\n\n  pp_->AwaitFiberOnAll([](ProactorBase* pb) {});\n\n  ASSERT_EQ(1, SubscriberMessagesLen(\"IO1\"));\n\n  const auto& msg = GetPublishedMessage(\"IO1\", 0);\n  EXPECT_EQ(\"abc\", msg.message);\n  EXPECT_EQ(\"1234567890123456\", msg.channel);\n  EXPECT_EQ(\"*\", msg.pattern);\n}\n\nTEST_F(DflyEngineTest, Unsubscribe) {\n  auto resp = Run({\"unsubscribe\", \"a\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"unsubscribe\", \"a\", IntArg(0)));\n\n  resp = Run({\"unsubscribe\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"unsubscribe\", ArgType(RespExpr::NIL), IntArg(0)));\n\n  single_response_ = false;\n\n  Run({\"subscribe\", \"a\", \"b\"});\n\n  resp = Run({\"unsubscribe\", \"a\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"unsubscribe\", \"a\", IntArg(1)));\n\n  resp = Run({\"unsubscribe\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"unsubscribe\", \"b\", IntArg(0)));\n}\n\nTEST_F(DflyEngineTest, PUnsubscribe) {\n  auto resp = Run({\"punsubscribe\", \"a*\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"punsubscribe\", \"a*\", IntArg(0)));\n\n  resp = Run({\"punsubscribe\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"punsubscribe\", ArgType(RespExpr::NIL), IntArg(0)));\n\n  single_response_ = false;\n  Run({\"psubscribe\", \"a*\", \"b*\"});\n\n  resp = Run({\"punsubscribe\", \"a*\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"punsubscribe\", \"a*\", IntArg(1)));\n\n  resp = Run({\"punsubscribe\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"punsubscribe\", \"b*\", IntArg(0)));\n}\n\nTEST_F(DflyEngineTest, Bug468) {\n  RespExpr resp = Run({\"multi\"});\n  ASSERT_EQ(resp, \"OK\");\n  resp = Run({\"SET\", \"foo\", \"bar\", \"EX\", \"moo\"});\n  ASSERT_EQ(resp, \"QUEUED\");\n\n  resp = Run({\"exec\"});\n  ASSERT_THAT(resp, ErrArg(\"not an integer\"));\n\n  ASSERT_FALSE(IsLocked(0, \"foo\"));\n\n  resp = Run({\"eval\", \"return redis.call('set', 'foo', 'bar', 'EX', 'moo')\", \"1\", \"foo\"});\n  ASSERT_THAT(resp, ErrArg(\"not an integer\"));\n\n  ASSERT_FALSE(IsLocked(0, \"foo\"));\n}\n\nTEST_F(DflyEngineTest, Bug496) {\n  shard_set->RunBlockingInParallel([](EngineShard* shard) {\n    auto& db = namespaces->GetDefaultNamespace().GetDbSlice(shard->shard_id());\n\n    int cb_hits = 0;\n    uint32_t cb_id =\n        db.RegisterOnChange([&cb_hits](DbIndex, const DbSlice::ChangeReq&) { cb_hits++; });\n\n    {\n      auto res = *db.AddOrFind({}, \"key-1\", std::nullopt);\n      EXPECT_TRUE(res.is_new);\n      EXPECT_EQ(cb_hits, 1);\n    }\n\n    {\n      auto res = *db.AddOrFind({}, \"key-1\", std::nullopt);\n      EXPECT_FALSE(res.is_new);\n      EXPECT_EQ(cb_hits, 2);\n    }\n\n    {\n      auto res = *db.AddOrFind({}, \"key-2\", std::nullopt);\n      EXPECT_TRUE(res.is_new);\n      EXPECT_EQ(cb_hits, 3);\n    }\n\n    db.UnregisterOnChange(cb_id);\n  });\n}\n\nTEST_F(DflyEngineTest, Issue607) {\n  // https://github.com/dragonflydb/dragonfly/issues/607\n\n  Run({\"SET\", \"key\", \"value1\"});\n  EXPECT_EQ(Run({\"GET\", \"key\"}), \"value1\");\n\n  Run({\"SET\", \"key\", \"value2\"});\n  EXPECT_EQ(Run({\"GET\", \"key\"}), \"value2\");\n\n  Run({\"EXPIRE\", \"key\", \"1000\"});\n\n  Run({\"SET\", \"key\", \"value3\"});\n  EXPECT_EQ(Run({\"GET\", \"key\"}), \"value3\");\n}\n\nTEST_F(DflyEngineTest, Issue679) {\n  // https://github.com/dragonflydb/dragonfly/issues/679\n\n  Run({\"HMSET\", \"a\", \"key\", \"val\"});\n  Run({\"EXPIRE\", \"a\", \"1000\"});\n  Run({\"HMSET\", \"a\", \"key\", \"vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\"});\n  Run({\"EXPIRE\", \"a\", \"1001\"});\n}\n\nTEST_F(DflyEngineTest, Issue742) {\n  // https://github.com/dragonflydb/dragonfly/issues/607\n  // The stack was not cleaned in case of an error and it blew up.\n  for (int i = 0; i < 3'000; i++) {\n    Run({\"EVAL\", \"redis.get(KEYS[1], KEYS[2], KEYS[3], KEYS[4], KEYS[5])\", \"5\", \"k1\", \"k2\", \"k3\",\n         \"k4\", \"k5\"});\n  }\n}\n\nTEST_F(DefragDflyEngineTest, TestDefragOption) {\n  GTEST_SKIP() << \"Defragmentation check takes too long. Disabling this test\";\n\n  // mem_defrag_threshold is based on RSS statistic, but we don't count it in the test\n  absl::SetFlag(&FLAGS_mem_defrag_threshold, 0.0);\n  absl::SetFlag(&FLAGS_mem_defrag_check_sec_interval, 0);\n  absl::SetFlag(&FLAGS_mem_defrag_waste_threshold, 0.1);\n\n  //  Fill data into dragonfly and then check if we have\n  //  any location in memory to defrag. See issue #448 for details about this.\n  constexpr size_t kMaxMemoryForTest = 1'100'000;\n  constexpr int kNumberOfKeys = 1'000;  // this fill the memory\n  constexpr int kKeySize = 637;\n  constexpr int kMaxDefragTriesForTests = 30;\n  constexpr int kFactor = 4;\n\n  max_memory_limit = kMaxMemoryForTest;  // control memory size so no need for too many keys\n  std::vector<std::string> keys2delete;\n  keys2delete.push_back(\"del\");\n\n  // create keys that we would like to remove, try to make it none adjusting locations\n  for (int i = 0; i < kNumberOfKeys; i += kFactor) {\n    keys2delete.push_back(\"key-name:\" + std::to_string(i));\n  }\n\n  std::vector<std::string_view> keys(keys2delete.begin(), keys2delete.end());\n\n  Run({\"SELECT\", \"2\"});\n\n  RespExpr resp = Run(\n      {\"DEBUG\", \"POPULATE\", std::to_string(kNumberOfKeys), \"key-name\", std::to_string(kKeySize)});\n  ASSERT_EQ(resp, \"OK\");\n  auto r = CheckedInt({\"DBSIZE\"});\n\n  ASSERT_EQ(r, kNumberOfKeys);\n\n  shard_set->pool()->AwaitFiberOnAll([&](unsigned index, ProactorBase* base) {\n    EngineShard* shard = EngineShard::tlocal();\n    ASSERT_FALSE(shard == nullptr);  // we only have one and its should not be empty!\n    ThisFiber::SleepFor(100ms);\n\n    // make sure that the task that collect memory usage from all shard ran\n    // for at least once, and that no defrag was done yet.\n    auto stats = shard->stats();\n    for (int i = 0; i < 3; i++) {\n      ThisFiber::SleepFor(100ms);\n      EXPECT_EQ(stats.defrag_realloc_total, 0);\n    }\n  });\n\n  ArgSlice delete_cmd(keys);\n  r = CheckedInt(delete_cmd);\n  LOG(INFO) << \"finish deleting memory entries \" << r;\n  // the first element in this is the command del so size is one less\n  ASSERT_EQ(r, keys2delete.size() - 1);\n  // At this point we need to see whether we did running the task and whether the task did something\n  shard_set->pool()->AwaitFiberOnAll([&](unsigned index, ProactorBase* base) {\n    EngineShard* shard = EngineShard::tlocal();\n    ASSERT_TRUE(shard != nullptr);  // we only have one and its should not be empty!\n    // a \"busy wait\" to ensure that memory defragmentations was successful:\n    // the task ran and did it work\n    auto stats = shard->stats();\n    for (int i = 0; i < kMaxDefragTriesForTests && stats.defrag_realloc_total == 0; i++) {\n      stats = shard->stats();\n      ThisFiber::SleepFor(220ms);\n    }\n    // make sure that we successfully found places to defrag in memory\n    EXPECT_GT(stats.defrag_realloc_total, 0);\n    EXPECT_GE(stats.defrag_attempt_total, stats.defrag_realloc_total);\n  });\n}\n\nTEST_F(DefragDflyEngineTest, DefragEventuallyFinishes) {\n  Run(\"DEBUG POPULATE 5000 key 256\");\n  Run(\"FT.CREATE index ON HASH PREFIX 1 doc: SCHEMA t TAG WITHSUFFIXTRIE\");\n  for (int i = 0; i < 1000; ++i) {\n    Run(absl::StrFormat(\"HSET doc:%d t category%d\", i, i));\n  }\n\n  shard_set->pool()->AwaitFiberOnAll([&](unsigned, ProactorBase*) {\n    auto* shard = EngineShard::tlocal();\n    if (!shard)\n      return;\n\n    // Try to run defrag at least this many times and stop early if cursor reaches the end (winds\n    // back to 0)\n    constexpr auto max_attempts = 500;\n\n    std::vector<uint64_t> cursor_states;\n    cursor_states.reserve(max_attempts);\n\n    cursor_states.push_back(shard->GetDefragCursor());\n    EXPECT_EQ(cursor_states.back(), 0);\n\n    for (int i = 0; i < max_attempts; ++i) {\n      PageUsage page_usage{CollectPageStats::NO, 0, CycleQuota{CycleQuota::kDefaultDefragQuota}};\n      page_usage.SetForceReallocate(true);\n\n      shard->DoDefrag(&page_usage);\n      cursor_states.push_back(shard->GetDefragCursor());\n      if (cursor_states.back() == 0)\n        return;\n    }\n\n    // Defrag ran at least once\n    EXPECT_GT(cursor_states.size(), 1);\n    EXPECT_EQ(cursor_states.back(), 0)\n        << \"did not conclude defragmenting in \" << cursor_states.size() << \" runs\";\n\n    EXPECT_GT(shard->stats().defrag_realloc_total, 0);\n    EXPECT_GE(shard->stats().defrag_attempt_total, shard->stats().defrag_realloc_total);\n  });\n}\n\nTEST_F(DflyEngineTest, Issue752) {\n  // https://github.com/dragonflydb/dragonfly/issues/752\n  // local_result_ member was not reset between commands\n  Run({\"multi\"});\n  auto resp = Run({\"llen\", kKey1});\n  ASSERT_EQ(resp, \"QUEUED\");\n  resp = Run({\"del\", kKey1, kKey2});\n  ASSERT_EQ(resp, \"QUEUED\");\n  resp = Run({\"exec\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(IntArg(0), IntArg(0)));\n}\n\nTEST_F(DflyEngineTest, Latency) {\n  Run({\"latency\", \"latest\"});\n}\n\nTEST_F(DflyEngineTest, EvalBug2664) {\n  absl::FlagSaver fs;\n  SetFlag(&FLAGS_lua_resp2_legacy_float, true);\n\n  auto resp = Run({\"eval\", \"return 42.9\", \"0\"});\n  EXPECT_THAT(resp, IntArg(42));\n  resp = Run({\"eval\", \"return -3.8\", \"0\"});\n  EXPECT_THAT(resp, IntArg(-3));\n\n  resp = Run({\"hello\", \"3\"});\n  ASSERT_THAT(resp, ArrLen(14));\n\n  resp = Run({\"eval\", \"return 42.9\", \"0\"});\n  EXPECT_THAT(resp, IntArg(42));\n}\n\nTEST_F(DflyEngineTest, MemoryUsage) {\n  for (unsigned i = 0; i < 1000; ++i) {\n    Run({\"rpush\", \"l1\", StrCat(\"val\", i)});\n  }\n\n  for (unsigned i = 0; i < 1000; ++i) {\n    Run({\"rpush\", \"l2\", StrCat(string(200, 'a'), i)});\n  }\n  auto resp = Run({\"memory\", \"usage\", \"l1\"});\n  EXPECT_GT(*resp.GetInt(), 8000);\n\n  resp = Run({\"memory\", \"usage\", \"l2\"});\n  EXPECT_GT(*resp.GetInt(), 100000);\n}\n\n// MEMORY USAGE without a key caused a DCHECK crash in CmdArgParser destructor\n// because the parser error was never consumed.\nTEST_F(DflyEngineTest, MemoryUsageNoKey) {\n  auto resp = Run({\"memory\", \"usage\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n}\n\nTEST_F(DflyEngineTest, DebugObject) {\n  Run({\"set\", \"key\", \"value\"});\n  Run({\"lpush\", \"l1\", \"a\", \"b\"});\n  Run({\"sadd\", \"s1\", \"1\", \"2\", \"3\"});\n  Run({\"sadd\", \"s2\", \"a\", \"b\", \"c\"});\n  Run({\"zadd\", \"z1\", \"1\", \"a\", \"2\", \"b\", \"3\", \"c\"});\n  Run({\"hset\", \"h1\", \"a\", \"1\", \"b\", \"2\", \"c\", \"3\"});\n  auto resp = Run({\"debug\", \"object\", \"key\"});\n  EXPECT_THAT(resp.GetString(), HasSubstr(\"encoding:raw\"));\n  resp = Run({\"debug\", \"object\", \"l1\"});\n  EXPECT_THAT(resp.GetString(), HasSubstr(\"encoding:listpack\"));\n  resp = Run({\"debug\", \"object\", \"s1\"});\n  EXPECT_THAT(resp.GetString(), HasSubstr(\"encoding:intset\"));\n  resp = Run({\"debug\", \"object\", \"s2\"});\n  EXPECT_THAT(resp.GetString(), HasSubstr(\"encoding:dense_set\"));\n  resp = Run({\"debug\", \"object\", \"z1\"});\n  EXPECT_THAT(resp.GetString(), HasSubstr(\"encoding:listpack\"));\n\n  // Test promotion to quicklist\n  Run({\"lpush\", \"l1\", string(3000, 'x')});\n  resp = Run({\"debug\", \"object\", \"l1\"});\n  EXPECT_THAT(resp.GetString(), HasSubstr(\"encoding:quicklist\"));\n}\n\nTEST_F(DflyEngineTest, StreamMemInfo) {\n  for (int i = 1; i < 2; ++i) {\n    Run({\"XADD\", \"test\", std::to_string(i), \"var\", \"val\" + std::to_string(i)});\n  }\n\n  int64_t stream_mem_first = GetMetrics().db_stats[0].memory_usage_by_type[OBJ_STREAM];\n  EXPECT_GT(stream_mem_first, 0);\n\n  auto dump = Run({\"dump\", \"test\"});\n  Run({\"del\", \"test\"});\n  Run({\"restore\", \"test\", \"0\", facade::ToSV(dump.GetBuf())});\n\n  int64_t stream_mem_second = GetMetrics().db_stats[0].memory_usage_by_type[OBJ_STREAM];\n\n  // stream_mem_first != stream_mem_second due to a preallocation in XADD command (see\n  // STREAM_LISTPACK_MAX_PRE_ALLOCATE)\n  EXPECT_GT(stream_mem_second, 0);\n}\n\nTEST_F(DflyEngineTest, ReplicaofRejectOnLoad) {\n  service_->SwitchState(GlobalState::ACTIVE, GlobalState::LOADING);\n\n  RespExpr res = Run({\"REPLICAOF\", \"localhost\", \"3779\"});\n\n  ASSERT_THAT(res, ErrArg(\"LOADING Dragonfly is loading the dataset in memory\"));\n}\n\n// TODO: to test transactions with a single shard since then all transactions become local.\n// To consider having a parameter in dragonfly engine controlling number of shards\n// unconditionally from number of cpus. TO TEST BLPOP under multi for single/multi argument case.\n\nTEST_F(DflyEngineTest, CommandMetricLabels) {\n  EXPECT_EQ(Run({\"SET\", \"foo\", \"bar\"}), \"OK\");\n  EXPECT_EQ(Run({\"GET\", \"foo\"}), \"bar\");\n  const Metrics metrics = GetMetrics();\n\n  // The test connection counts as other\n  EXPECT_EQ(metrics.facade_stats.conn_stats.command_cnt_other, 2);\n  EXPECT_EQ(metrics.facade_stats.conn_stats.command_cnt_main, 0);\n  EXPECT_EQ(metrics.facade_stats.conn_stats.num_conns_main, 0);\n  EXPECT_EQ(metrics.facade_stats.conn_stats.num_conns_other, 0);\n}\n\nTEST_F(DflyEngineTest, Huffman) {\n  // enable compression for keys optimized for letter a.\n  auto resp = Run({\"debug\", \"compression\", \"set\", \"GBDgCpXW/////7/pygS5t9x7792qU1trLQ==\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // for string values optimized for letter x.\n  resp = Run({\"debug\", \"compression\", \"set\", \"ChD4bAf/D/bPSwY=\", \"string\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"debug\", \"populate\", \"200000\", \"aaaaaaaaaaaaaaaaaaaaaaaaaa\", \"32\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  auto metrics = GetMetrics();\n  EXPECT_EQ(metrics.events.huff_encode_success, 400000);  // each key and value\n  EXPECT_LT(metrics.heap_used_bytes, 14'000'000);         // less than 15mb\n}\n\nTEST_F(DflyEngineTest, MemoryKeys) {\n  Run({\"debug\", \"populate\", \"10000\", \"abcd_efgh_ijkl_mnop\", \"10\"});\n  auto metrics = GetMetrics();\n  EXPECT_GT(metrics.db_stats[0].memory_usage_by_type[OBJ_KEY], 100000);\n}\n\n// Verify that inline_keys, expire_count, and OBJ_KEY memory stay consistent\n// when expire is added/removed on inline keys (regression for memory underflow bug).\nTEST_F(DflyEngineTest, ExpireInlineKeyAccounting) {\n  // Keys short enough to be stored inline (kInlineLen = 16).\n  constexpr int kCount = 10;\n  for (int i = 0; i < kCount; i++)\n    Run({\"set\", absl::StrCat(\"k\", i), \"v\"});\n\n  auto stats = GetMetrics().db_stats[0];\n  EXPECT_EQ(stats.inline_keys, kCount);\n  EXPECT_EQ(stats.expire_count, 0u);\n  EXPECT_EQ(stats.memory_usage_by_type[OBJ_KEY], 0);\n\n  // Setting expire transitions inline -> SDS_TTL_TAG (heap-allocated).\n  for (int i = 0; i < kCount; i++)\n    Run({\"expire\", absl::StrCat(\"k\", i), \"3600\"});\n\n  stats = GetMetrics().db_stats[0];\n  EXPECT_EQ(stats.inline_keys, 0u);\n  EXPECT_EQ(stats.expire_count, kCount);\n  EXPECT_GT(stats.memory_usage_by_type[OBJ_KEY], 0);\n\n  // PERSIST transitions SDS_TTL_TAG -> inline again.\n  for (int i = 0; i < kCount; i++)\n    Run({\"persist\", absl::StrCat(\"k\", i)});\n\n  stats = GetMetrics().db_stats[0];\n  EXPECT_EQ(stats.inline_keys, kCount);\n  EXPECT_EQ(stats.expire_count, 0u);\n  EXPECT_EQ(stats.memory_usage_by_type[OBJ_KEY], 0);\n\n  // Re-expire then delete: prior bug caused memory accounting underflow on deletion.\n  for (int i = 0; i < kCount; i++)\n    Run({\"expire\", absl::StrCat(\"k\", i), \"3600\"});\n  for (int i = 0; i < kCount; i++)\n    Run({\"del\", absl::StrCat(\"k\", i)});\n\n  stats = GetMetrics().db_stats[0];\n  EXPECT_EQ(stats.inline_keys, 0u);\n  EXPECT_EQ(stats.expire_count, 0u);\n  EXPECT_EQ(stats.memory_usage_by_type[OBJ_KEY], 0);\n}\n\nclass DflyCommandAliasTest : public DflyEngineTest {\n protected:\n  DflyCommandAliasTest() {\n    SetFlag(&FLAGS_command_alias, {\"___set=set\", \"___ping=ping\"});\n    SetFlag(&FLAGS_latency_tracking, true);\n  }\n\n  absl::FlagSaver saver_;\n};\n\nTEST_F(DflyCommandAliasTest, Aliasing) {\n  EXPECT_EQ(Run({\"SET\", \"foo\", \"bar\"}), \"OK\");\n  EXPECT_EQ(Run({\"___SET\", \"a\", \"b\"}), \"OK\");\n  EXPECT_EQ(Run({\"GET\", \"foo\"}), \"bar\");\n  EXPECT_EQ(Run({\"GET\", \"a\"}), \"b\");\n  EXPECT_EQ(Run({\"___ping\"}), \"PONG\");\n\n  Metrics metrics = GetMetrics();\n  const auto& stats = metrics.cmd_stats_map;\n\n  EXPECT_THAT(stats, Contains(Pair(\"___set\", Key(1))));\n  EXPECT_THAT(stats, Contains(Pair(\"set\", Key(1))));\n  EXPECT_THAT(stats, Contains(Pair(\"___ping\", Key(1))));\n  EXPECT_THAT(stats, Contains(Pair(\"get\", Key(2))));\n\n  // test stats within multi-exec\n  EXPECT_EQ(Run({\"multi\"}), \"OK\");\n  EXPECT_EQ(Run({\"___set\", \"a\", \"x\"}), \"QUEUED\");\n  EXPECT_EQ(Run({\"exec\"}), \"OK\");\n\n  metrics = GetMetrics();\n  EXPECT_THAT(metrics.cmd_stats_map, Contains(Pair(\"___set\", Key(2))));\n  EXPECT_THAT(metrics.cmd_stats_map, Contains(Pair(\"set\", Key(1))));\n  EXPECT_THAT(metrics.cmd_stats_map, Contains(Pair(\"multi\", Key(1))));\n  EXPECT_THAT(metrics.cmd_stats_map, Contains(Pair(\"exec\", Key(1))));\n}\n\nTEST_F(DflyCommandAliasTest, AliasesShareHistogramPtr) {\n  EXPECT_EQ(Run({\"SET\", \"foo\", \"bar\"}), \"OK\");\n  EXPECT_EQ(Run({\"___SET\", \"a\", \"b\"}), \"OK\");\n  EXPECT_EQ(Run({\"___ping\"}), \"PONG\");\n\n  const auto command_histograms = GetMetrics().cmd_latency_map;\n  for (const auto& key : {\"set\", \"___set\", \"___ping\", \"ping\"}) {\n    EXPECT_TRUE(command_histograms.contains(key));\n  }\n\n  EXPECT_EQ(command_histograms.at(\"set\"), command_histograms.at(\"___set\"));\n  EXPECT_EQ(command_histograms.at(\"ping\"), command_histograms.at(\"___ping\"));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/engine_shard.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/engine_shard.h\"\n\n#include <absl/strings/escaping.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_format.h>\n\n#include <memory>\n\n#include \"base/flags.h\"\n#include \"core/huff_coder.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"io/proc_reader.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n#include \"server/blocking_controller.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/journal/journal.h\"\n#include \"server/namespaces.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/server_state.h\"\n#include \"server/snapshot.h\"\n#include \"server/tiered_storage.h\"\n#include \"server/transaction.h\"\n#include \"util/fibers/proactor_base.h\"\n\nusing namespace std;\n\nABSL_FLAG(float, mem_defrag_threshold, 0.7,\n          \"Minimum percentage of used memory relative to maxmemory cap before running \"\n          \"defragmentation\");\n\nABSL_FLAG(uint32_t, mem_defrag_check_sec_interval, 60,\n          \"Number of seconds between every defragmentation necessity check\");\n\nABSL_FLAG(float, mem_defrag_waste_threshold, 0.2,\n          \"The ratio of wasted/committed memory above which we run defragmentation\");\n\nABSL_FLAG(float, mem_defrag_page_utilization_threshold, 0.8,\n          \"memory page under utilization threshold. Ratio between used and committed size, below \"\n          \"this, memory in this page will defragmented\");\n\nABSL_FLAG(int32_t, hz, 100,\n          \"Base frequency at which the server performs other background tasks. \"\n          \"Warning: not advised to decrease in production.\");\n\nABSL_FLAG(string, tiered_prefix, \"\",\n          \"Enables tiered storage if set. \"\n          \"The string denotes the path and prefix of the files \"\n          \" associated with tiered storage. Stronly advised to use \"\n          \"high performance NVME ssd disks for this. Also, seems that pipeline_squash does \"\n          \"not work well with tiered storage, so it's advised to set it to 0.\");\n\nABSL_FLAG(bool, enable_heartbeat_eviction, true,\n          \"Enable eviction during heartbeat when memory is under pressure.\");\nABSL_FLAG(bool, enable_heartbeat_rss_eviction, true,\n          \"Enable eviction during heartbeat when rss memory is under pressure. Eviction based \"\n          \"on used_memory will still be enabled.\");\nABSL_FLAG(double, eviction_memory_budget_threshold, 0.1,\n          \"Eviction starts when the free memory (including RSS memory) drops below \"\n          \"eviction_memory_budget_threshold * max_memory_limit.\");\nABSL_FLAG(bool, background_heartbeat, false, \"Whether to run heartbeat as a background fiber\");\nABSL_DECLARE_FLAG(uint32_t, max_eviction_per_heartbeat);\n\nnamespace dfly {\n\nusing absl::GetFlag;\nusing namespace util;\n\nnamespace {\n\nconstexpr uint64_t kCursorDoneState = 0u;\n\nbool HasContendedLocks(ShardId shard_id, Transaction* trx, const DbTable* table) {\n  auto is_contended = [table](LockFp fp) { return table->trans_locks.Find(fp)->IsContended(); };\n\n  if (trx->IsMulti()) {\n    auto fps = trx->GetMultiFps();\n    for (const auto& [sid, fp] : fps) {\n      if (sid == shard_id && is_contended(fp))\n        return true;\n    }\n  } else {\n    KeyLockArgs lock_args = trx->GetLockArgs(shard_id);\n    for (size_t i = 0; i < lock_args.fps.size(); ++i) {\n      if (is_contended(lock_args.fps[i]))\n        return true;\n    }\n  }\n\n  return false;\n}\n\nconstexpr size_t kQueueLen = 64;\n\noptional<uint32_t> GetPeriodicCycleMs() {\n  int hz = GetFlag(FLAGS_hz);\n  if (hz <= 0)\n    return nullopt;\n\n  uint32_t clock_cycle_ms = 1000 / hz;\n  if (clock_cycle_ms == 0)\n    clock_cycle_ms = 1;\n  return clock_cycle_ms;\n}\n\nsize_t CalculateHowManyBytesToEvictOnShard(size_t global_memory_limit, size_t global_used_memory,\n                                           size_t shard_memory_threshold) {\n  if (global_used_memory > global_memory_limit) {\n    // Used memory is above the limit, we need to evict all bytes\n    return (global_used_memory - global_memory_limit) / shard_set->size() + shard_memory_threshold;\n  }\n\n  const size_t shard_budget = (global_memory_limit - global_used_memory) / shard_set->size();\n  return shard_budget < shard_memory_threshold ? (shard_memory_threshold - shard_budget) : 0;\n}\n\nclass HuffmanCheckTask {\n public:\n  HuffmanCheckTask() {\n    hist_.fill(0);\n  }\n\n  int32_t Run(DbSlice* db_slice);\n\n private:\n  PrimeTable::Cursor cursor_;\n\n  static constexpr unsigned kMaxSymbol = 255;\n  array<unsigned, kMaxSymbol + 1> hist_;  // histogram of symbols.\n  string scratch_;\n};\n\nint32_t HuffmanCheckTask::Run(DbSlice* db_slice) {\n  DbTable* db_table = db_slice->GetDBTable(0);  // we currently support only default db.\n  if (!db_table)\n    return -1;\n\n  // incrementally aggregate frequency histogram.\n  auto& prime = db_table->prime;\n\n  constexpr uint32_t kMaxTraverses = 512;\n  uint32_t traverses_count = 0;\n  do {\n    cursor_ = prime.Traverse(cursor_, [&](PrimeIterator it) {\n      if (!it->first.IsInline()) {\n        string_view val = it->first.GetSlice(&scratch_);\n        for (unsigned char c : val) {\n          hist_[c]++;\n        }\n\n        if (val.size() > 1024) {\n          traverses_count = kMaxTraverses;  // return early.\n          string{}.swap(scratch_);          // free memory.\n        }\n      }\n    });\n    traverses_count++;\n  } while (traverses_count < kMaxTraverses && cursor_);\n\n  if (cursor_)\n    return 4;  // priority to continue later.\n\n  // Finished scanning the table, now normalize the table.\n  constexpr unsigned kMaxFreqTotal = static_cast<unsigned>((1U << 31) * 0.9);\n  size_t total_freq = std::accumulate(hist_.begin(), hist_.end(), 0UL);\n  if (total_freq == 0)\n    return -1;\n\n  // to avoid overflow.\n  double scale = total_freq > kMaxFreqTotal ? static_cast<double>(total_freq) / kMaxFreqTotal : 1.0;\n  for (unsigned i = 0; i <= kMaxSymbol; i++) {\n    hist_[i] = static_cast<unsigned>(hist_[i] / scale);\n    if (hist_[i] == 0) {\n      hist_[i] = 1;  // Avoid zero frequency symbols.\n    }\n  }\n\n  // Build the huffman table. We currently output the table to logs and just increase\n  // the metric counter to signal that we built a table.\n\n  HuffmanEncoder huff_enc;\n  string error_msg;\n  if (huff_enc.Build(hist_.data(), kMaxSymbol, &error_msg)) {\n    size_t compressed_size = huff_enc.EstimateCompressedSize(hist_.data(), kMaxSymbol);\n    LOG(INFO) << \"Huffman table built, reducing character count from \" << total_freq << \" to \"\n              << compressed_size << \", compression ratio \" << double(compressed_size) / total_freq;\n    string bintable = huff_enc.Export();\n    LOG(INFO) << \"Huffman binary table: \" << absl::Base64Escape(bintable);\n    db_slice->shard_owner()->stats().huffman_tables_built++;\n  } else {\n    LOG(WARNING) << \"Huffman build failed: \" << error_msg;\n  }\n\n  return -1;  // task completed.\n}\n\n}  // namespace\n\n__thread EngineShard* EngineShard::shard_ = nullptr;\nuint64_t TEST_current_time_ms = 0;\n\nstring EngineShard::TxQueueInfo::Format() const {\n  string res;\n\n  if (tx_total > 0) {\n    absl::StrAppend(&res, \"tx armed \", tx_armed, \", total: \", tx_total, \",global:\", tx_global,\n                    \",runnable:\", tx_runnable, \"\\n\");\n    absl::StrAppend(&res, \", head: \", head.debug_id_info, \"\\n\");\n  }\n  if (total_locks > 0) {\n    absl::StrAppend(&res, \"locks total:\", total_locks, \",contended:\", contended_locks, \"\\n\");\n  }\n  if (max_contention_score > 0) {\n    absl::StrAppend(&res, \"max contention score: \", max_contention_score,\n                    \", lock: \", max_contention_lock, \"\\n\");\n  }\n\n  return res;\n}\n\nEngineShard::Stats& EngineShard::Stats::operator+=(const Stats& o) {\n  static_assert(sizeof(Stats) == 152);\n\n#define ADD(x) x += o.x\n\n  ADD(defrag_attempt_total);\n  ADD(defrag_realloc_total);\n  ADD(defrag_task_invocation_total);\n  ADD(defrag_skipped_mem_under_threshold);\n  ADD(defrag_skipped_within_check_interval);\n  ADD(defrag_skipped_not_enough_fragmentation);\n  ADD(poll_execution_total);\n  ADD(tx_ooo_total);\n  ADD(tx_optimistic_total);\n  ADD(tx_batch_schedule_calls_total);\n  ADD(tx_batch_scheduled_items_total);\n  ADD(total_heartbeat_expired_keys);\n  ADD(total_heartbeat_expired_bytes);\n  ADD(total_heartbeat_expired_calls);\n  ADD(total_migrated_keys);\n  ADD(huffman_tables_built);\n  ADD(stream_sequential_accesses);\n  ADD(stream_random_accesses);\n  ADD(stream_fetch_all_accesses);\n\n#undef ADD\n  return *this;\n}\n\nvoid EngineShard::DefragTaskState::UpdateScanState(uint64_t cursor_val) {\n  cursor = cursor_val;\n  // Once we're done with a db, jump to the next\n  if (cursor == kCursorDoneState) {\n    dbid++;\n  }\n}\n\nvoid EngineShard::DefragTaskState::ResetScanState() {\n  dbid = cursor = 0u;\n}\n\n// This function checks 3 things:\n// 1. Don't try memory fragmentation if we don't use \"enough\" memory (control by\n// mem_defrag_threshold flag)\n// 2. We have memory blocks that can be better utilized (there is a \"wasted memory\" in them).\n// 3. in case the above is OK, make sure that we have a \"gap\" between usage and commited memory\n// (control by mem_defrag_waste_threshold flag)\nEngineShard::DefragTaskState::SkipReason EngineShard::DefragTaskState::CheckRequired() {\n  using enum SkipReason;\n  if (cursor > kCursorDoneState) {\n    VLOG(2) << \"cursor: \" << cursor;\n    return NotSkipped;\n  }\n\n  size_t limit = max_memory_limit.load(memory_order_relaxed);\n\n  const std::size_t memory_per_shard = limit / shard_set->size();\n  if (memory_per_shard < (1 << 16)) {  // Too small.\n    return MemoryTooLow;\n  }\n\n  thread_local fragmentation_info finfo{\n      .committed = 0, .committed_golden = 0, .wasted = 0, .bin = 0};\n\n  const std::size_t global_threshold = double(limit) * GetFlag(FLAGS_mem_defrag_threshold);\n  if (global_threshold > rss_mem_current.load(memory_order_relaxed)) {\n    finfo.bin = 0;  // reset.\n    return MemoryBelowThreshold;\n  }\n\n  if (finfo.bin == 0) {  // did not start the iterative checking yet\n    const auto now = time(nullptr);\n    const auto seconds_from_prev_check = now - last_check_time;\n    const auto mem_defrag_interval = GetFlag(FLAGS_mem_defrag_check_sec_interval);\n\n    if (seconds_from_prev_check < mem_defrag_interval) {\n      return CheckWithinInterval;\n    }\n\n    // start checking.\n    finfo.committed = finfo.committed_golden = 0;\n    finfo.wasted = 0;\n    page_utilization_threshold = GetFlag(FLAGS_mem_defrag_page_utilization_threshold);\n  }\n\n  uint64_t start = absl::GetCurrentTimeNanos();\n  int res = zmalloc_get_allocator_fragmentation_step(page_utilization_threshold, &finfo);\n  uint64_t duration = absl::GetCurrentTimeNanos() - start;\n  VLOG(1) << \"Reading memory usage took \" << duration << \" ns on bin \" << finfo.bin - 1;\n\n  if (res == 0) {\n    // finished checking.\n    last_check_time = time(nullptr);\n\n    if (finfo.committed != finfo.committed_golden) {\n      LOG_FIRST_N(ERROR, 100) << \"committed memory computed incorrectly: \" << finfo.committed\n                              << \" vs \" << finfo.committed_golden;\n    }\n\n    const double waste_threshold = GetFlag(FLAGS_mem_defrag_waste_threshold);\n    if (finfo.wasted > size_t(finfo.committed * waste_threshold)) {\n      VLOG(1) << \"memory fragmentation issue found: \" << finfo.wasted << \" \" << finfo.committed;\n      return NotSkipped;\n    }\n    return NotEnoughFragmentation;\n  }\n  return CheckInProgress;\n}\n\nstd::optional<CollectedPageStats> EngineShard::DoDefrag(PageUsage* page_usage) {\n  // --------------------------------------------------------------------------\n  // NOTE: This task is running with exclusive access to the shard.\n  // i.e. - Since we are using shared nothing access here, and all access\n  // are done using fibers, This fiber is run only when no other fiber in the\n  // context of the controlling thread will access this shard!\n  // --------------------------------------------------------------------------\n\n  // TODO: enable tiered storage on non-default db slice\n  DbSlice& slice = namespaces->GetDefaultNamespace().GetDbSlice(shard_->shard_id());\n\n  // If we moved to an invalid db, skip as long as it's not the last one\n  while (!slice.IsDbValid(defrag_state_.dbid) && defrag_state_.dbid + 1 < slice.db_array_size())\n    defrag_state_.dbid++;\n\n  // If we found no valid db, we finished traversing and start from scratch next time\n  if (!slice.IsDbValid(defrag_state_.dbid)) {\n    defrag_state_.ResetScanState();\n    return std::nullopt;\n  }\n\n  DCHECK(slice.IsDbValid(defrag_state_.dbid));\n  auto [prime_table, _unused_expire] = slice.GetTables(defrag_state_.dbid);\n  PrimeTable::Cursor cur{defrag_state_.cursor};\n  uint64_t reallocations = 0;\n  uint64_t attempts = 0;\n\n  DbTable* db_table = slice.GetDBTable(defrag_state_.dbid);\n  do {\n    cur = prime_table->Traverse(cur, [&](PrimeIterator it) {\n      // for each value check whether we should move it because it\n      // seats on underutilized page of memory, and if so, do it.\n      const ssize_t original_size = it->second.MallocUsed();\n      const bool did = it->second.DefragIfNeeded(page_usage);\n      attempts++;\n      if (did) {\n        reallocations++;\n        if (const ssize_t delta = it->second.MallocUsed() - original_size; delta != 0) {\n          db_table->stats.AddTypeMemoryUsage(it->second.ObjType(), delta);\n        }\n      }\n    });\n  } while (!page_usage->QuotaDepleted() && cur && namespaces);\n  const uint64_t used_cycles = page_usage->UsedQuotaCycles();\n  const uint64_t usec = base::CycleClock::ToUsec(used_cycles);\n\n  defrag_state_.UpdateScanState(cur.token());\n\n  page_usage->ExtendQuota(50);\n  const auto [quota_depleted, objects_moved] = shard_search_indices_->Defragment(page_usage);\n  reallocations += objects_moved;\n\n  stats_.defrag_realloc_total += reallocations;\n  stats_.defrag_task_invocation_total++;\n  stats_.defrag_attempt_total += attempts;\n\n  const char* cursor_state =\n      defrag_state_.cursor == kCursorDoneState ? \"at the end\" : \"in progress\";\n  if (reallocations > 0) {\n    VLOG(2) << absl::StrFormat(\n        \"shard %u: successfully defragmented %lu times in %lu cycles (%lu usec), \"\n        \"cursor is %s\",\n        slice.shard_id(), reallocations, used_cycles, usec, cursor_state);\n  } else {\n    VLOG(2) << absl::StrFormat(\n        \"shard %u: ran defragmentation for %lu cycles (%lu usec), cursor at %s, \"\n        \"but no locations for defragmentation were found\",\n        slice.shard_id(), used_cycles, usec, cursor_state);\n  }\n\n  return page_usage->CollectedStats();\n}\n\n// the memory defragmentation task is as follow:\n//  1. Check if memory usage is high enough\n//  2. Check if diff between commited and used memory is high enough\n//  3. if all the above pass -> scan this shard and try to find whether we can move pointer to\n//  underutilized pages values\n//     if the cursor returned from scan is not in done state, schedule the task to run at high\n//     priority.\n//     otherwise lower the task priority so that it would not use the CPU when not required\nuint32_t EngineShard::DefragTask() {\n  using enum DefragTaskState::SkipReason;\n\n  constexpr uint32_t kRunAtLowPriority = 0u;\n  if (!namespaces) {\n    return kRunAtLowPriority;\n  }\n\n  if (auto check_result = defrag_state_.CheckRequired(); check_result == NotSkipped) {\n    VLOG(2) << shard_id_ << \": need to run defrag memory cursor state: \" << defrag_state_.cursor;\n    static const float threshold = GetFlag(FLAGS_mem_defrag_page_utilization_threshold);\n    // TODO (abhijat): implement move ctor for PageUsage so this object can be moved into the task.\n    PageUsage page_usage{CollectPageStats::NO, threshold,\n                         CycleQuota{CycleQuota::kDefaultDefragQuota}};\n    if (DoDefrag(&page_usage)) {\n      // we didn't finish the scan\n      return ProactorBase::kOnIdleMaxLevel;\n    }\n  } else {\n    std::string_view reason;\n    switch (check_result) {\n      case MemoryTooLow:\n        // Don't track stats for configuration which is not going to change\n        reason = \"memory too low\";\n        break;\n      case MemoryBelowThreshold:\n        reason = \"rss below threshold\";\n        stats_.defrag_skipped_mem_under_threshold++;\n        break;\n      case CheckWithinInterval:\n        reason = \"defrag check ran too soon\";\n        stats_.defrag_skipped_within_check_interval++;\n        break;\n      case NotEnoughFragmentation:\n        reason = \"not enough fragmentation to defrag\";\n        stats_.defrag_skipped_not_enough_fragmentation++;\n        break;\n      case CheckInProgress:\n        reason = \"check is in progress\";\n        break;\n      default:\n        DCHECK(false) << \"unexpected result\";\n    }\n    VLOG(2) << shard_id_ << \" skipped defragmentation task: \" << reason;\n  }\n  return 6;  // priority.\n}\n\nEngineShard::EngineShard(util::ProactorBase* pb, mi_heap_t* heap)\n    : txq_([](const Transaction* t) { return t->txid(); }),\n      queue_(kQueueLen, 1, 1),\n      queue2_(kQueueLen / 2, 2, 2),\n      shard_id_(pb->GetPoolIndex()),\n      mi_resource_(heap) {\n  queue_.Start(absl::StrCat(\"shard_queue_\", shard_id()));\n  queue2_.Start(absl::StrCat(\"l2_queue_\", shard_id()));\n}\n\nvoid EngineShard::Shutdown() {\n  DVLOG(1) << \"EngineShard::Shutdown\";\n\n  queue_.Shutdown();\n  queue2_.Shutdown();\n  DCHECK(!fiber_heartbeat_periodic_.IsJoinable());\n  DCHECK(!fiber_shard_handler_periodic_.IsJoinable());\n}\n\nvoid EngineShard::StopPeriodicFiber() {\n  ProactorBase::me()->RemoveOnIdleTask(defrag_task_id_);\n  ProactorBase::me()->RemoveOnIdleTask(huffman_check_task_id_);\n\n  fiber_heartbeat_periodic_done_.Notify();\n  if (fiber_heartbeat_periodic_.IsJoinable()) {\n    fiber_heartbeat_periodic_.Join();\n  }\n  fiber_shard_handler_periodic_done_.Notify();\n  if (fiber_shard_handler_periodic_.IsJoinable()) {\n    fiber_shard_handler_periodic_.Join();\n  }\n}\n\nstatic void RunFPeriodically(std::function<void()> f, std::chrono::milliseconds period_ms,\n                             std::string_view error_msg, util::fb2::Done* waiter) {\n  int64_t last_heartbeat_ms = INT64_MAX;\n\n  while (true) {\n    if (waiter->WaitFor(period_ms)) {\n      VLOG(2) << \"finished running engine shard periodic task\";\n      return;\n    }\n\n    int64_t now_ms = fb2::ProactorBase::GetMonotonicTimeNs() / 1000000;\n    if (now_ms - 5 * period_ms.count() > last_heartbeat_ms) {\n      VLOG(1) << \"This \" << error_msg << \" step took \" << now_ms - last_heartbeat_ms << \"ms\";\n    }\n    f();\n    last_heartbeat_ms = fb2::ProactorBase::GetMonotonicTimeNs() / 1000000;\n  }\n}\n\nvoid EngineShard::StartPeriodicHeartbeatFiber(util::ProactorBase* pb) {\n  auto cycle_ms = GetPeriodicCycleMs();\n  if (!cycle_ms) {\n    return;\n  }\n  auto heartbeat = [this]() { Heartbeat(); };\n\n  eviction_state_.rss_eviction_enabled = GetFlag(FLAGS_enable_heartbeat_rss_eviction);\n  std::chrono::milliseconds period_ms(*cycle_ms);\n\n  fb2::Fiber::Opts fb_opts{.priority = absl::GetFlag(FLAGS_background_heartbeat)\n                                           ? fb2::FiberPriority::BACKGROUND\n                                           : fb2::FiberPriority::NORMAL,\n                           .name = absl::StrCat(\"heartbeat_periodic\", pb->GetPoolIndex())};\n  fiber_heartbeat_periodic_ = fb2::Fiber(fb_opts, [this, period_ms, heartbeat]() mutable {\n    RunFPeriodically(heartbeat, period_ms, \"heartbeat\", &fiber_heartbeat_periodic_done_);\n  });\n  defrag_task_id_ = pb->AddOnIdleTask([this]() { return DefragTask(); });\n}\n\nvoid EngineShard::StartPeriodicShardHandlerFiber(util::ProactorBase* pb,\n                                                 std::function<void()> shard_handler) {\n  auto clock_cycle_ms = GetPeriodicCycleMs();\n  if (!clock_cycle_ms) {\n    return;\n  }\n\n  // Minimum 100ms\n  std::chrono::milliseconds period_ms(std::max(100u, *clock_cycle_ms));\n  fiber_shard_handler_periodic_ = MakeFiber(\n      [this, index = pb->GetPoolIndex(), period_ms, handler = std::move(shard_handler)]() mutable {\n        ThisFiber::SetName(absl::StrCat(\"shard_handler_periodic\", index));\n        RunFPeriodically(std::move(handler), period_ms, \"shard handler\",\n                         &fiber_shard_handler_periodic_done_);\n      });\n}\n\nvoid EngineShard::InitThreadLocal(ProactorBase* pb) {\n  CHECK(shard_ == nullptr) << pb->GetPoolIndex();\n\n  mi_heap_t* data_heap = ServerState::tlocal()->data_heap();\n  void* ptr = mi_heap_malloc_aligned(data_heap, sizeof(EngineShard), alignof(EngineShard));\n  shard_ = new (ptr) EngineShard(pb, data_heap);\n\n  CompactObj::InitThreadLocal(shard_->memory_resource());\n  SmallString::InitThreadLocal(data_heap);\n  InitTLStatelessAllocMR(shard_->memory_resource());\n\n  shard_->shard_search_indices_ = std::make_unique<ShardDocIndices>();\n}\n\nvoid EngineShard::InitTieredStorage(ProactorBase* pb, size_t max_file_size) {\n  if (string backing_prefix = GetFlag(FLAGS_tiered_prefix); !backing_prefix.empty()) {\n    LOG_IF(FATAL, pb->GetKind() != ProactorBase::IOURING)\n        << \"Only ioring based backing storage is supported. Exiting...\";\n\n    // TODO: enable tiered storage on non-default namespace\n    DbSlice& db_slice = namespaces->GetDefaultNamespace().GetDbSlice(shard_id());\n    auto* shard = EngineShard::tlocal();\n    shard->tiered_storage_ = make_unique<TieredStorage>(max_file_size, &db_slice);\n    error_code ec = shard->tiered_storage_->Open(backing_prefix);\n    CHECK(!ec) << ec.message();\n  }\n}\n\nvoid EngineShard::DestroyThreadLocal() {\n  if (!shard_)\n    return;\n\n  uint32_t shard_id = shard_->shard_id();\n  mi_heap_t* tlh = shard_->mi_resource_.heap();\n\n  shard_->Shutdown();\n\n  detail::InternedString::ResetPool();\n  shard_->~EngineShard();\n  CleanupStatelessAllocMR();\n\n  mi_free(shard_);\n  shard_ = nullptr;\n  CompactObj::InitThreadLocal(nullptr);\n\n  mi_heap_delete(tlh);\n  VLOG(1) << \"Shard reset \" << shard_id;\n}\n\n// Is called by Transaction::ExecuteAsync in order to run transaction tasks.\n// Only runs in its own thread.\nvoid EngineShard::PollExecution(const char* context, Transaction* trans) {\n  DVLOG(2) << \"PollExecution \" << context << \" \" << (trans ? trans->DebugId() : \"\") << \" \"\n           << txq_.size() << \" \" << (continuation_trans_ ? continuation_trans_->DebugId() : \"\");\n\n  ShardId sid = shard_id();\n  stats_.poll_execution_total++;\n\n  // If any of the following flags are present, we are guaranteed to run in this function:\n  // 1. AWAKED_Q -> Blocking transactions are executed immediately after waking up, they don't\n  // occupy a place in txq and have highest priority\n  // 2. WAS_SUSPENDED -> Suspended transactions are run to clean up and finalize blocking keys\n  // 3. OUT_OF_ORDER -> Transactions without conflicting keys can run earlier than their position in\n  // txq is reached\n  uint16_t flags = Transaction::AWAKED_Q | Transaction::WAS_SUSPENDED | Transaction::OUT_OF_ORDER;\n  auto [trans_mask, disarmed] =\n      trans ? trans->DisarmInShardWhen(sid, flags) : make_pair(uint16_t(0), false);\n\n  if (trans && trans_mask == 0)  // If not armed, it means that this poll task expired\n    return;\n\n  if (trans_mask & Transaction::AWAKED_Q) {\n    CHECK(trans->GetNamespace().GetBlockingController(shard_id_)->HasAwakedTransaction());\n    CHECK(continuation_trans_ == nullptr)\n        << continuation_trans_->DebugId() << \" when polling \" << trans->DebugId()\n        << \"cont_mask: \" << continuation_trans_->DEBUG_GetLocalMask(sid) << \" vs \"\n        << trans->DEBUG_GetLocalMask(sid);\n\n    // Commands like BRPOPLPUSH don't conclude immediately\n    if (!trans->RunInShard(this, true)) {\n      // execution is blocked while HasAwakedTransaction() returns true, so no need to set\n      // continuation_trans_. Moreover, setting it for wakened multi-hop transactions may lead to\n      // inconcistency, see BLMoveSimultaneously test.\n      // continuation_trans_ = trans;\n      return;\n    }\n\n    trans = nullptr;  // Avoid handling the caller below\n  }\n\n  bool update_stats = false;\n  ++poll_concurrent_factor_;\n\n  auto run = [this, &update_stats](Transaction* tx, bool allow_removal) -> bool /* concluding */ {\n    update_stats = true;\n    return tx->RunInShard(this, allow_removal);\n  };\n\n  // Check the currently running transaction, we have to handle it first until it concludes\n  if (continuation_trans_) {\n    bool is_self = continuation_trans_ == trans;\n    if (is_self)\n      trans = nullptr;\n\n    if ((is_self && disarmed) || continuation_trans_->DisarmInShard(sid)) {\n      if (bool concludes = run(continuation_trans_, true); concludes) {\n        continuation_trans_ = nullptr;\n        continuation_debug_id_.clear();\n      } else {\n        continuation_debug_id_ = continuation_trans_->DebugId(sid);\n      }\n    }\n  }\n\n  // Progress on the transaction queue if no transaction is running currently.\n  Transaction* head = nullptr;\n\n  while (continuation_trans_ == nullptr && !txq_.Empty()) {\n    head = get<Transaction*>(txq_.Front());\n\n    // Break if there are any awakened transactions, as we must give way to them\n    // before continuing to handle regular transactions from the queue.\n    if (head->GetNamespace().GetBlockingController(shard_id_) &&\n        head->GetNamespace().GetBlockingController(shard_id_)->HasAwakedTransaction())\n      break;\n\n    VLOG(2) << \"Considering head \" << head->DebugId()\n            << \" isarmed: \" << head->DEBUG_IsArmedInShard(sid);\n\n    // If the transaction isn't armed yet, it will be handled by a successive poll\n    bool should_run = (head == trans && disarmed) || head->DisarmInShard(sid);\n    if (!should_run)\n      break;\n\n    // Avoid processing the caller transaction below if we found it in the queue,\n    // because it most likely won't have enough time to arm itself again.\n    if (head == trans)\n      trans = nullptr;\n\n    TxId txid = head->txid();\n\n    // Update commited_txid before running, because RunInShard might block on i/o.\n    // This way scheduling transactions won't see an understated value.\n    DCHECK_LT(committed_txid_, txid);  //  strictly increasing when processed via txq\n    committed_txid_ = txid;\n\n    DCHECK(!continuation_trans_);  // while() check above ensures this.\n    if (bool concludes = run(head, true); !concludes) {\n      DCHECK_EQ(head->DEBUG_GetTxqPosInShard(sid), TxQueue::kEnd) << head->DebugId(sid);\n      continuation_trans_ = head;\n      continuation_debug_id_ = head->DebugId(sid);\n    }\n  }\n\n  // If we disarmed, but didn't find ourselves in the loop, run now.\n  if (trans && disarmed) {\n    // if WAS_SUSPENDED is true but not AWAKED_Q, it means the transaction was awaked\n    // in another thread and this one just follows along.\n    DCHECK(trans_mask & (Transaction::OUT_OF_ORDER | Transaction::WAS_SUSPENDED));\n    CHECK(trans != continuation_trans_);\n\n    bool is_ooo = trans_mask & Transaction::OUT_OF_ORDER;\n\n    // For OOO transactions that are still in the queue, we can not remove them unless\n    // they conclude.\n    bool concludes = run(trans, !is_ooo);\n    if (is_ooo && concludes) {\n      stats_.tx_ooo_total++;\n    }\n\n    // If the transaction concluded, it must remove itself from the tx queue.\n    // Otherwise it is required to stay there to keep the relative order.\n    if (!concludes && is_ooo) {\n      LOG_IF(DFATAL, trans->DEBUG_GetTxqPosInShard(sid) == TxQueue::kEnd);\n    }\n  }\n  --poll_concurrent_factor_;\n  if (update_stats) {\n    CacheStats();\n  }\n}\n\nvoid EngineShard::RemoveContTx(Transaction* tx) {\n  if (continuation_trans_ == tx) {\n    continuation_trans_ = nullptr;\n    continuation_debug_id_.clear();\n  }\n}\n\nvoid EngineShard::Heartbeat() {\n  DVLOG(3) << \" Hearbeat\";\n  DCHECK(namespaces);\n\n  CacheStats();\n\n  // TODO: iterate over all namespaces\n  DbSlice& db_slice = namespaces->GetDefaultNamespace().GetDbSlice(shard_id());\n\n  // Skip heartbeat if global transaction is in process.\n  // This is determined by attempting to check if shard lock can be acquired.\n  const bool can_acquire_global_lock = shard_lock()->Check(IntentLock::Mode::EXCLUSIVE);\n\n  if (db_slice.WillBlockOnJournalWrite() || !can_acquire_global_lock) {\n    uint64_t now = absl::GetCurrentTimeNanos();\n\n    uint64_t elapsed_ms = (now - stalled_start_ns_) / 1000000;\n\n    if (stalled_start_ns_ && elapsed_ms > 1000) {\n      LOG_EVERY_T(WARNING, 5) << \"Stalled heartbeat() fiber for \" << elapsed_ms / 1000\n                              << \" seconds\";\n    }\n    stalled_start_ns_ = now;\n    return;\n  }\n  stalled_start_ns_ = 0;\n\n  thread_local bool check_huffman = (shard_id_ == 0);  // run it only on shard 0.\n  if (check_huffman) {\n    auto* ptr = db_slice.GetDBTable(0);\n    if (ptr) {\n      size_t key_usage = ptr->stats.memory_usage_by_type[OBJ_KEY];\n      size_t obj_usage = ptr->stats.obj_memory_usage;\n\n#ifdef NDEBUG\n#define MB_THRESHOLD (50 * 1024 * 1024)\n#else\n#define MB_THRESHOLD (5 * 1024 * 1024)\n#endif\n\n      if (key_usage > MB_THRESHOLD && key_usage > obj_usage / 8) {\n        VLOG(1) << \"Scheduling huffman check task, key usage: \" << key_usage\n                << \", obj usage: \" << obj_usage;\n\n        check_huffman = false;  // trigger only once.\n\n        // launch the task\n        huffman_check_task_id_ =\n            ProactorBase::me()->AddOnIdleTask([task = HuffmanCheckTask{}]() mutable {\n              if (!shard_ || !namespaces) {\n                return -1;\n              }\n\n              DbSlice& db_slice = namespaces->GetDefaultNamespace().GetDbSlice(shard_->shard_id());\n              return task.Run(&db_slice);\n            });\n      }\n    }\n  }\n\n  if (!IsReplica()) {  // Never run expiry/evictions on replica.\n    RetireExpiredAndEvict();\n  }\n\n  if (tiered_storage_ && tiered_storage_->ShouldOffload()) {\n    VLOG(1) << \"Running Offloading, memory=\" << db_slice.memory_budget()\n            << \", cool memory: \" << tiered_storage_->CoolMemoryUsage();\n\n    for (unsigned i = 0; i < db_slice.db_array_size(); ++i) {\n      if (!db_slice.IsDbValid(i))\n        continue;\n      tiered_storage_->RunOffloading(i);\n    }\n  }\n}\n\nvoid EngineShard::RetireExpiredAndEvict() {\n  // Disable flush journal changes to prevent preemtion\n  journal::DisableFlushGuard journal_flush_guard(journal_);\n\n  // TODO: iterate over all namespaces\n  DbSlice& db_slice = namespaces->GetDefaultNamespace().GetDbSlice(shard_id());\n  constexpr double kTtlDeleteLimit = 200;\n\n  uint32_t traversed = GetMovingSum6(TTL_TRAVERSE);\n  uint32_t deleted = GetMovingSum6(TTL_DELETE);\n  unsigned ttl_delete_target = 5;\n\n  if (deleted > 10) {\n    // deleted should be <= traversed.\n    // hence we map our delete/traversed ratio into a range [0, kTtlDeleteLimit).\n    // The higher ttl_delete_target the more likely we have lots of expired items that need\n    // to be deleted.\n    ttl_delete_target = unsigned(kTtlDeleteLimit * double(deleted) / (double(traversed) + 10));\n  }\n\n  DbContext db_cntx;\n  db_cntx.time_now_ms = GetCurrentTimeMs();\n\n  size_t deleted_bytes = 0;\n  size_t eviction_goal = GetFlag(FLAGS_enable_heartbeat_eviction) ? CalculateEvictionBytes() : 0;\n\n  for (unsigned i = 0; i < db_slice.db_array_size(); ++i) {\n    if (!db_slice.IsDbValid(i))\n      continue;\n\n    db_cntx.db_index = i;\n    auto [pt, _unused_expt] = db_slice.GetTables(i);\n    uint64_t expire_count = db_slice.GetDBTable(i)->stats.expire_count;\n    if (expire_count > 0) {\n      // Scale traversal count to compensate for TTL key dilution in the prime table.\n      // Since we now scan the prime table (not a dedicated expire table), most entries\n      // may not have TTLs. We need more bucket traversals to check the same number of\n      // TTL keys, but cap to avoid excessive work when TTL keys are extremely sparse.\n      unsigned db_ttl_delete_target = ttl_delete_target;\n\n      if (pt->size() >= expire_count * 2) {\n        unsigned ratio = std::min(pt->size() / expire_count, 7UL);\n        db_ttl_delete_target = ttl_delete_target * ratio;\n      }\n      DbSlice::DeleteExpiredStats stats = db_slice.DeleteExpiredStep(db_cntx, db_ttl_delete_target);\n\n      deleted_bytes += stats.deleted_bytes;\n      eviction_goal -= std::min(eviction_goal, size_t(stats.deleted_bytes));\n      counter_[TTL_TRAVERSE].IncBy(stats.traversed);\n      counter_[TTL_DELETE].IncBy(stats.deleted);\n      stats_.total_heartbeat_expired_keys += stats.deleted;\n      stats_.total_heartbeat_expired_bytes += stats.deleted_bytes;\n      ++stats_.total_heartbeat_expired_calls;\n      VLOG(2) << \"Heartbeat expired \" << stats.deleted << \" keys with total bytes \"\n              << stats.deleted_bytes << \" with total expire flow calls \"\n              << stats_.total_heartbeat_expired_calls;\n    }\n\n    if (eviction_goal) {\n      uint32_t starting_segment_id = rand() % pt->GetSegmentCount();\n      auto [evicted_items, evicted_bytes] =\n          db_slice.FreeMemWithEvictionStepAtomic(i, db_cntx, starting_segment_id, eviction_goal);\n\n      VLOG(2) << \"Heartbeat eviction: Expected to evict \" << eviction_goal\n              << \" bytes. Actually evicted \" << evicted_items << \" items, \" << evicted_bytes\n              << \" bytes. Max eviction per heartbeat: \"\n              << GetFlag(FLAGS_max_eviction_per_heartbeat);\n\n      deleted_bytes += evicted_bytes;\n      eviction_goal -= std::min(eviction_goal, evicted_bytes);\n    }\n  }\n\n  // Track deleted bytes only if we expect to lower memory\n  if (eviction_state_.track_deleted_bytes) {\n    eviction_state_.deleted_bytes_at_prev_eviction = deleted_bytes;\n  }\n}\n\n// Adjust deleted bytes w.r.t shard used memory. If we increase shard used\n// memory in current heartbeat we can invalidate deleted_bytes. Otherwise we adjust deleted\n// bytes by diff.\nvoid EngineShard::EvictionTaskState::AdjustDeletedBytes(size_t shard_used_memory) {\n  if (shard_used_memory >= shard_used_memory_at_prev_eviction) {\n    deleted_bytes_at_prev_eviction = 0;\n  } else {\n    deleted_bytes_at_prev_eviction = std::min(\n        deleted_bytes_at_prev_eviction, shard_used_memory_at_prev_eviction - shard_used_memory);\n  }\n}\n\n// Check if adding value of previous deleted bytes will be higher than rss memory budget and\n// limit if needed.\nvoid EngineShard::EvictionTaskState::LimitAccumulatedDeletedBytes(\n    size_t shard_rss_over_memory_budget) {\n  const size_t next_acc_deleted_bytes =\n      acc_deleted_bytes_during_eviction + deleted_bytes_at_prev_eviction;\n  acc_deleted_bytes_during_eviction = shard_rss_over_memory_budget > next_acc_deleted_bytes\n                                          ? next_acc_deleted_bytes\n                                          : shard_rss_over_memory_budget;\n}\n\n// Once the rss memory is lowered we can start also decreasing accumulated total bytes.\nvoid EngineShard::EvictionTaskState::AdjustAccumulatedDeletedBytes(size_t global_used_rss_memory) {\n  if (global_used_rss_memory < global_rss_memory_at_prev_eviction) {\n    auto decrease_delete_bytes_before_rss_update =\n        std::min(acc_deleted_bytes_during_eviction,\n                 (global_rss_memory_at_prev_eviction - global_used_rss_memory) / shard_set->size());\n    VLOG(2) << \"deleted_bytes_before_rss_update: \" << acc_deleted_bytes_during_eviction\n            << \" decrease_delete_bytes_before_rss_update: \"\n            << decrease_delete_bytes_before_rss_update;\n    acc_deleted_bytes_during_eviction -= decrease_delete_bytes_before_rss_update;\n  }\n  LOG_IF(DFATAL, global_used_rss_memory < (acc_deleted_bytes_during_eviction * shard_set->size()))\n      << \"RSS eviction underflow \"\n      << \"global_used_rss_memory: \" << global_used_rss_memory\n      << \" total_deleted_bytes_on_eviction: \" << acc_deleted_bytes_during_eviction;\n}\n\nsize_t EngineShard::CalculateEvictionBytes() {\n  const size_t shards_count = shard_set->size();\n  const double eviction_memory_budget_threshold = GetFlag(FLAGS_eviction_memory_budget_threshold);\n\n  // Calculate threshold for both used_memory and rss_memory\n  const size_t limit = max_memory_limit.load(memory_order_relaxed);\n  const size_t shard_memory_budget_threshold =\n      size_t(limit * eviction_memory_budget_threshold) / shards_count;\n\n  const size_t global_used_memory = used_mem_current.load(memory_order_relaxed);\n\n  // Calculate how many bytes we need to evict on this shard\n  size_t goal_bytes =\n      CalculateHowManyBytesToEvictOnShard(limit, global_used_memory, shard_memory_budget_threshold);\n\n  VLOG_IF(2, goal_bytes > 0) << \"Used memory goal bytes: \" << goal_bytes\n                             << \", used memory: \" << global_used_memory\n                             << \", memory limit: \" << max_memory_limit;\n\n  // Check for `enable_heartbeat_rss_eviction` flag since it dynamic. And reset\n  // state if flag has changed.\n  bool rss_eviction_enabled_flag = GetFlag(FLAGS_enable_heartbeat_rss_eviction);\n  if (eviction_state_.rss_eviction_enabled != rss_eviction_enabled_flag) {\n    eviction_state_.Reset(rss_eviction_enabled_flag);\n  }\n  if (eviction_state_.rss_eviction_enabled) {\n    const size_t global_used_rss_memory = rss_mem_current.load(memory_order_relaxed);\n    const size_t rss_memory_threshold_start = limit * (1. - eviction_memory_budget_threshold);\n    const size_t shard_used_memory = UsedMemory();\n\n    // Adjust previous deleted bytes\n    eviction_state_.AdjustDeletedBytes(shard_used_memory);\n\n    // Calculate memory budget that is higher than rss_memory_threshold_start. This is our limit\n    // for accumulated_deleted_bytes.\n    const size_t shard_rss_over_memory_budget =\n        global_used_rss_memory > rss_memory_threshold_start\n            ? (global_used_rss_memory - rss_memory_threshold_start) / shards_count\n            : 0;\n    eviction_state_.LimitAccumulatedDeletedBytes(shard_rss_over_memory_budget);\n\n    // Once the rss memory is lowered we can start also decreasing accumulated total bytes.\n    eviction_state_.AdjustAccumulatedDeletedBytes(global_used_rss_memory);\n\n    // Update rss/used memory for this heartbeat\n    eviction_state_.global_rss_memory_at_prev_eviction = global_used_rss_memory;\n    eviction_state_.shard_used_memory_at_prev_eviction = shard_used_memory;\n\n    // If we underflow use limit as used_memory\n    size_t used_rss_memory_with_deleted_bytes = std::min(\n        global_used_rss_memory - eviction_state_.acc_deleted_bytes_during_eviction * shards_count,\n        limit);\n\n    // Try to evict more bytes if we are close to the rss memory limit\n    size_t rss_goal_bytes = CalculateHowManyBytesToEvictOnShard(\n        limit, used_rss_memory_with_deleted_bytes, shard_memory_budget_threshold);\n\n    // RSS evictions starts so we should start tracking deleted_bytes\n    if (rss_goal_bytes) {\n      eviction_state_.track_deleted_bytes = true;\n    } else {\n      // There is no RSS eviction goal and we have cleared tracked deleted bytes\n      if (!eviction_state_.acc_deleted_bytes_during_eviction) {\n        eviction_state_.track_deleted_bytes = false;\n      }\n    }\n\n    VLOG_IF(2, rss_goal_bytes > 0)\n        << \"Rss memory goal bytes: \" << rss_goal_bytes\n        << \", rss used memory: \" << global_used_rss_memory << \", rss memory limit: \" << limit\n        << \", accumulated_deleted_bytes_during_eviction: \"\n        << eviction_state_.acc_deleted_bytes_during_eviction;\n\n    goal_bytes = std::max(goal_bytes, rss_goal_bytes);\n  }\n\n  return goal_bytes;\n}\n\nvoid EngineShard::CacheStats() {\n  uint64_t now = fb2::ProactorBase::GetMonotonicTimeNs();\n  if (last_mem_params_.updated_at + 1000000 > now)  // 1ms\n    return;\n\n  size_t used_mem = UsedMemory();\n  DbSlice& db_slice = namespaces->GetDefaultNamespace().GetDbSlice(shard_id());\n\n  // Reflect local memory change on global value\n  size_t delta = used_mem - last_mem_params_.used_mem;  // negative value wraps safely\n  size_t current = used_mem_current.fetch_add(delta, memory_order_relaxed) + delta;\n  ssize_t free_mem = max_memory_limit.load(memory_order_relaxed) - current;\n\n  // Estimate bytes per object, excluding table memory\n  size_t entries = db_slice.entries_count();\n  size_t table_memory =\n      db_slice.table_memory() + (tiered_storage_ ? tiered_storage_->CoolMemoryUsage() : 0);\n  size_t obj_memory = table_memory <= used_mem ? used_mem - table_memory : 0;\n  size_t bytes_per_obj = entries > 0 ? obj_memory / entries : 0;\n\n  VLOG_EVERY_N(1, 500) << \"Entries count \" << entries << \" \"\n                       << \"obj_memory: \" << obj_memory << \", bytes_per_obj: \" << bytes_per_obj;\n\n  db_slice.UpdateMemoryParams(free_mem / shard_set->size(), bytes_per_obj);\n  last_mem_params_ = {now, used_mem};\n}\n\nsize_t EngineShard::UsedMemory() const {\n  return mi_resource_.used() + zmalloc_used_memory_tl + SmallString::UsedThreadLocal() +\n         search_indices()->GetUsedMemory();\n}\n\nbool EngineShard::ShouldThrottleForTiering() const {\n  // Throttle if the tiered storage is busy offloading (at least 30% of allowed capacity)\n  return tiered_storage_ && tiered_storage_->WriteDepthUsage() > 0.3 &&\n         tiered_storage_->ShouldOffload();\n}\n\nvoid EngineShard::FinalizeMulti(Transaction* tx) {\n  if (continuation_trans_ == tx) {\n    continuation_trans_ = nullptr;\n  }\n\n  // Wake only if no tx queue head is currently running\n  auto* bc = tx->GetNamespace().GetBlockingController(shard_id());\n  if (bc && continuation_trans_ == nullptr)\n    bc->NotifyPending();\n\n  PollExecution(\"unlockmulti\", nullptr);\n}\n\nEngineShard::TxQueueInfo EngineShard::AnalyzeTxQueue() const {\n  const TxQueue* queue = txq();\n\n  ShardId sid = shard_id();\n  TxQueueInfo info;\n\n  if (queue->Empty())\n    return info;\n\n  auto cur = queue->Head();\n  info.tx_total = queue->size();\n  unsigned max_db_id = 0;\n\n  auto& db_slice = namespaces->GetDefaultNamespace().GetCurrentDbSlice();\n\n  {\n    auto value = queue->At(cur);\n    Transaction* trx = std::get<Transaction*>(value);\n    info.head.debug_id_info = trx->DebugId(sid);\n  }\n\n  do {\n    auto value = queue->At(cur);\n    Transaction* trx = std::get<Transaction*>(value);\n    // find maximum index of databases used by transactions\n    if (trx->GetDbIndex() > max_db_id) {\n      max_db_id = trx->GetDbIndex();\n    }\n\n    bool is_armed = trx->DEBUG_IsArmedInShard(sid);\n    DVLOG(1) << \"Inspecting \" << trx->DebugId() << \" is_armed \" << is_armed;\n    if (is_armed) {\n      info.tx_armed++;\n\n      if (trx->IsGlobal() || (trx->IsMulti() && trx->GetMultiMode() == Transaction::GLOBAL)) {\n        info.tx_global++;\n      } else {\n        const DbTable* table = db_slice.GetDBTable(trx->GetDbIndex());\n        bool can_run = !HasContendedLocks(sid, trx, table);\n        if (can_run) {\n          info.tx_runnable++;\n        }\n      }\n    }\n    cur = queue->Next(cur);\n  } while (cur != queue->Head());\n\n  // Analyze locks\n  for (unsigned i = 0; i <= max_db_id; ++i) {\n    const DbTable* table = db_slice.GetDBTable(i);\n    if (table == nullptr)\n      continue;\n\n    info.total_locks += table->trans_locks.Size();\n    for (const auto& [key, lock] : table->trans_locks) {\n      if (lock.IsContended()) {\n        info.contended_locks++;\n        if (lock.ContentionScore() > info.max_contention_score) {\n          info.max_contention_score = lock.ContentionScore();\n          info.max_contention_lock = key;\n        }\n      }\n    }\n  }\n\n  return info;\n}\n\nsize_t EngineShard::CompactTable(double threshold, DbIndex db_idx) {\n  DbSlice& db_slice = namespaces->GetDefaultNamespace().GetDbSlice(shard_id());\n  auto& prime = db_slice.GetDBTable(db_idx)->prime;\n  size_t total_seg_merged = 0;\n\n  while (true) {\n    bool merged_any = false;\n    // Prompt GetSegmentCount() each iteration to handle directory resizes across preemptions\n    for (size_t seg_id = 0; seg_id < prime.GetSegmentCount(); seg_id = prime.NextSeg(seg_id)) {\n      if (SliceSnapshot::IsSnaphotInProgress()) {\n        return total_seg_merged;\n      }\n      // Fetch segment pointer fresh each iteration\n      auto* seg = prime.GetSegment(seg_id);\n\n      unsigned buddy_id = prime.FindBuddyId(seg_id);\n      if (buddy_id == seg_id)\n        continue;\n\n      if (seg_id > buddy_id)\n        continue;\n\n      auto* buddy = prime.GetSegment(buddy_id);\n\n      const size_t combined = seg->SlowSize() + buddy->SlowSize();\n      const size_t max_size = threshold * seg->capacity();\n\n      if (combined > max_size)\n        continue;\n\n      if (prime.Merge(seg_id, buddy_id)) {\n        ++total_seg_merged;\n        merged_any = true;\n      }\n\n      // Yield after merge (don't hold pointers across yield)\n      util::ThisFiber::Yield();\n    }\n\n    if (!merged_any)\n      break;\n  }\n\n  return total_seg_merged;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/engine_shard.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"core/intent_lock.h\"\n#include \"core/mi_memory_resource.h\"\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"core/task_queue.h\"\n#include \"core/tx_queue.h\"\n#include \"server/common_types.h\"\n#include \"util/sliding_counter.h\"\n\ntypedef char* sds;\n\nnamespace dfly {\n\nclass EngineShardSet;\nclass TieredStorage;\nclass ShardDocIndices;\n\nclass EngineShard {\n  friend class EngineShardSet;\n\n public:\n  struct Stats {\n    uint64_t defrag_attempt_total = 0;\n    uint64_t defrag_realloc_total = 0;\n    uint64_t defrag_task_invocation_total = 0;\n    uint64_t defrag_skipped_mem_under_threshold = 0;\n    uint64_t defrag_skipped_within_check_interval = 0;\n    uint64_t defrag_skipped_not_enough_fragmentation = 0;\n    uint64_t poll_execution_total = 0;\n\n    // number of optimistic executions - that were run as part of the scheduling.\n    uint64_t tx_optimistic_total = 0;\n    uint64_t tx_ooo_total = 0;\n\n    // Number of ScheduleBatchInShard calls.\n    uint64_t tx_batch_schedule_calls_total = 0;\n\n    // Number of transactions scheduled via ScheduleBatchInShard.\n    uint64_t tx_batch_scheduled_items_total = 0;\n\n    uint64_t total_heartbeat_expired_keys = 0;\n    uint64_t total_heartbeat_expired_bytes = 0;\n    uint64_t total_heartbeat_expired_calls = 0;\n\n    // cluster stats\n    uint64_t total_migrated_keys = 0;\n\n    // how many huffman tables were built successfully in the background\n    uint32_t huffman_tables_built = 0;\n\n    // Stream access pattern metrics (per-command, not per-entry).\n    uint64_t stream_sequential_accesses = 0;  // head/tail: XADD, XREAD recent, XTRIM, etc.\n    uint64_t stream_random_accesses = 0;      // arbitrary-ID lookups: XRANGE partial, XDEL, XCLAIM\n    uint64_t stream_fetch_all_accesses = 0;   // full stream scan from beginning\n\n    Stats& operator+=(const Stats&);\n  };\n\n  // Sets up a new EngineShard in the thread.\n  // If update_db_time is true, initializes periodic time update for its db_slice.\n  static void InitThreadLocal(util::ProactorBase* pb);\n\n  // Must be called after all InitThreadLocal() have finished\n  void InitTieredStorage(util::ProactorBase* pb, size_t max_file_size);\n\n  static void DestroyThreadLocal();\n\n  static EngineShard* tlocal() {\n    return shard_;\n  }\n\n  bool IsMyThread() const {\n    return this == shard_;\n  }\n\n  ShardId shard_id() const {\n    return shard_id_;\n  }\n\n  PMR_NS::memory_resource* memory_resource() {\n    return &mi_resource_;\n  }\n\n  TaskQueue* GetFiberQueue() {\n    return &queue_;\n  }\n\n  TaskQueue* GetSecondaryQueue() {\n    return &queue2_;\n  }\n\n  // Processes TxQueue, blocked transactions or any other execution state related to that\n  // shard. Tries executing the passed transaction if possible (does not guarantee though).\n  void PollExecution(const char* context, Transaction* trans);\n\n  // Returns transaction queue.\n  TxQueue* txq() {\n    return &txq_;\n  }\n\n  const TxQueue* txq() const {\n    return &txq_;\n  }\n\n  TxId committed_txid() const {\n    return committed_txid_;\n  }\n\n  // Signals whether shard-wide lock is active.\n  // Transactions that conflict with shard locks must subscribe into pending queue.\n  IntentLock* shard_lock() {\n    return &shard_lock_;\n  }\n\n  // Remove current continuation trans if its equal to tx.\n  void RemoveContTx(Transaction* tx);\n\n  const Stats& stats() const {\n    return stats_;\n  }\n\n  Stats& stats() {\n    return stats_;\n  }\n\n  // Calculate memory used by shard by summing multiple sources\n  size_t UsedMemory() const;\n\n  TieredStorage* tiered_storage() {\n    return tiered_storage_.get();\n  }\n\n  ShardDocIndices* search_indices() const {\n    return shard_search_indices_.get();\n  }\n\n  // Moving average counters.\n  enum MovingCnt : uint8_t { TTL_TRAVERSE, TTL_DELETE, COUNTER_TOTAL };\n\n  // Returns moving sum over the last 6 seconds.\n  uint32_t GetMovingSum6(MovingCnt type) const {\n    return counter_[unsigned(type)].SumTail();\n  }\n\n  bool journal() const {\n    return journal_;\n  }\n\n  void set_journal(bool enable) {\n    journal_ = enable;\n  }\n\n  void SetReplica(bool replica) {\n    is_replica_ = replica;\n  }\n\n  bool IsReplica() const {\n    return is_replica_;\n  }\n\n  const Transaction* GetContTx() const {\n    return continuation_trans_;\n  }\n\n  void StopPeriodicFiber();\n\n  struct TxQueueItem {\n    std::string debug_id_info;\n  };\n\n  struct TxQueueInfo {\n    // Armed - those that the coordinator has armed with callbacks and wants them to run.\n    // Runnable - those that could run (they own the locks) but probably can not run due\n    // to head of line blocking in the transaction queue i.e. there is a transaction that\n    // either is not armed or not runnable that is blocking the runnable transactions.\n    // tx_total is the size of the transaction queue.\n    unsigned tx_armed = 0, tx_total = 0, tx_runnable = 0, tx_global = 0;\n\n    // total_locks - total number of the transaction locks in the shard.\n    unsigned total_locks = 0;\n\n    // contended_locks - number of locks that are contended by more than one transaction.\n    unsigned contended_locks = 0;\n\n    // The score of the lock with maximum contention (see IntentLock::ContetionScore for details).\n    unsigned max_contention_score = 0;\n\n    // the lock fingerprint with maximum contention score.\n    uint64_t max_contention_lock;\n\n    // We can use a vector to hold debug info for all items in the txqueue\n    TxQueueItem head;\n\n    std::string Format() const;\n  };\n\n  TxQueueInfo AnalyzeTxQueue() const;\n\n  // Returns true if revelant write operations should throttle to wait for tiering to catch up.\n  // The estimate is based on memory usage crossing tiering redline and the write depth being at\n  // least 50% of allowed max, providing at least some guarantee of progress.\n  bool ShouldThrottleForTiering() const;\n\n  void FinalizeMulti(Transaction* tx);\n\n  // Scan the shard with the cursor and apply defragmentation for database entries.\n  // Returns collected page stats if defragmentation was performed.\n  std::optional<CollectedPageStats> DoDefrag(PageUsage* page_usage);\n\n  uint64_t GetDefragCursor() const {\n    return defrag_state_.cursor;\n  }\n\n  // Return total segments merged.\n  size_t CompactTable(double threshold, DbIndex db_idx);\n\n private:\n  struct DefragTaskState {\n    size_t dbid = 0u;\n    uint64_t cursor = 0u;\n    time_t last_check_time = 0;\n    float page_utilization_threshold = 0.8;\n\n    enum class SkipReason : uint8_t {\n      MemoryTooLow,\n      MemoryBelowThreshold,\n      CheckWithinInterval,\n      NotEnoughFragmentation,\n      CheckInProgress,\n      NotSkipped,\n    };\n\n    // check the current threshold and return a reason if we skip the defragmentation\n    SkipReason CheckRequired();\n\n    void UpdateScanState(uint64_t cursor_val);\n\n    void ResetScanState();\n  };\n\n  struct EvictionTaskState {\n    void Reset(bool rss_eviction_enabled_flag) {\n      rss_eviction_enabled = rss_eviction_enabled_flag;\n      shard_used_memory_at_prev_eviction = global_rss_memory_at_prev_eviction =\n          acc_deleted_bytes_during_eviction = deleted_bytes_at_prev_eviction = 0;\n    }\n    void AdjustDeletedBytes(size_t shard_used_memory);\n    void LimitAccumulatedDeletedBytes(size_t shard_rss_over_memory_budget);\n    void AdjustAccumulatedDeletedBytes(size_t global_used_rss_memory);\n    bool rss_eviction_enabled = true;\n    bool track_deleted_bytes = false;\n    size_t acc_deleted_bytes_during_eviction = 0;  // Accumulated deleted bytes during eviction\n    size_t deleted_bytes_at_prev_eviction = 0;     // Bytes deleted in previous eviction\n    size_t shard_used_memory_at_prev_eviction = 0;\n    size_t global_rss_memory_at_prev_eviction = 0;\n  };\n\n  EngineShard(util::ProactorBase* pb, mi_heap_t* heap);\n\n  // blocks the calling fiber.\n  void Shutdown();  // called before destructing EngineShard.\n\n  void StartPeriodicHeartbeatFiber(util::ProactorBase* pb);\n  void StartPeriodicShardHandlerFiber(util::ProactorBase* pb, std::function<void()> shard_handler);\n\n  void Heartbeat();\n  void RetireExpiredAndEvict();\n\n  /* Calculates the number of bytes to evict based on memory and rss memory usage. */\n  size_t CalculateEvictionBytes();\n\n  void CacheStats();\n\n  // We are running a task that checks whether we need to\n  // do memory de-fragmentation here, this task only run\n  // when there are available CPU time.\n  // --------------------------------------------------------------------------\n  // NOTE: This task is running with exclusive access to the shard.\n  // i.e. - Since we are using shared noting access here, and all access\n  // are done using fibers, This fiber is run only when no other fiber in the\n  // context of the controlling thread will access this shard!\n  // --------------------------------------------------------------------------\n  uint32_t DefragTask();\n\n  TxQueue txq_;\n  TaskQueue queue_, queue2_;\n\n  ShardId shard_id_;\n  Stats stats_;\n\n  // Become passive if replica: don't automatially evict expired items.\n  bool is_replica_ = false;\n  bool journal_ = false;\n\n  // Precise tracking of used memory by persistent shard local values and structures\n  MiMemoryResource mi_resource_;\n\n  struct {\n    uint64_t updated_at = 0;  // from GetMonotonicTimeNs\n    size_t used_mem = 0;\n  } last_mem_params_;\n\n  // Logical ts used to order distributed transactions.\n  TxId committed_txid_ = 0;\n  Transaction* continuation_trans_ = nullptr;\n  std::string continuation_debug_id_;\n  unsigned poll_concurrent_factor_ = 0;\n\n  IntentLock shard_lock_;\n\n  uint32_t defrag_task_id_ = UINT32_MAX, huffman_check_task_id_ = UINT32_MAX;\n  EvictionTaskState eviction_state_;  // Used on eviction fiber\n  util::fb2::Fiber fiber_heartbeat_periodic_;\n  util::fb2::Done fiber_heartbeat_periodic_done_;\n\n  util::fb2::Fiber fiber_shard_handler_periodic_;\n  util::fb2::Done fiber_shard_handler_periodic_done_;\n\n  DefragTaskState defrag_state_;\n  std::unique_ptr<TieredStorage> tiered_storage_;\n  // TODO: Move indices to Namespace\n  std::unique_ptr<ShardDocIndices> shard_search_indices_;\n  uint64_t stalled_start_ns_ = 0;\n  using Counter = util::SlidingCounter<7>;\n\n  Counter counter_[COUNTER_TOTAL];\n\n  static __thread EngineShard* shard_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/engine_shard_set.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/engine_shard_set.h\"\n\n#include <sys/statvfs.h>\n\n#include <filesystem>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"server/common.h\"\n#include \"server/db_slice.h\"\n#include \"server/namespaces.h\"\n#include \"server/tiered_storage.h\"\n#include \"strings/human_readable.h\"\n\nusing namespace std;\n\nABSL_FLAG(bool, cache_mode, false,\n          \"If true, the backend behaves like a cache, \"\n          \"by evicting entries when getting close to maxmemory limit\");\n\nABSL_FLAG(strings::MemoryBytesFlag, tiered_max_file_size, strings::MemoryBytesFlag{},\n          \"Limit on maximum file size that is used by the database for tiered storage. \"\n          \"0 - means the program will automatically determine its maximum file size. \"\n          \"default: 0\");\n\nABSL_DECLARE_FLAG(string, tiered_prefix);\n\nnamespace dfly {\n\nusing namespace tiering::literals;\n\nusing namespace util;\nusing absl::GetFlag;\nusing strings::HumanReadableNumBytes;\n\nnamespace {\n\nuint64_t GetFsLimit() {\n  std::filesystem::path file_path(GetFlag(FLAGS_tiered_prefix));\n  std::string dir_name_str = file_path.parent_path().string();\n\n  if (dir_name_str.empty())\n    dir_name_str = \".\";\n\n  struct statvfs stat;\n  if (statvfs(dir_name_str.c_str(), &stat) == 0) {\n    uint64_t limit = stat.f_frsize * stat.f_blocks;\n    return limit;\n  }\n  LOG(WARNING) << \"Error getting filesystem information \" << errno;\n  return 0;\n}\n\nsize_t GetTieredFileLimit(size_t threads) {\n  string file_prefix = GetFlag(FLAGS_tiered_prefix);\n  if (file_prefix.empty())\n    return 0;\n\n  size_t max_shard_file_size = 0;\n\n  size_t max_file_size = absl::GetFlag(FLAGS_tiered_max_file_size).value;\n  size_t max_file_size_limit = GetFsLimit();\n  if (max_file_size == 0) {\n    LOG(INFO) << \"max_file_size has not been specified. Deciding myself....\";\n    max_file_size = (max_file_size_limit * 0.8);\n  } else {\n    if (max_file_size_limit < max_file_size) {\n      LOG(WARNING) << \"Got max file size \" << HumanReadableNumBytes(max_file_size)\n                   << \", however only \" << HumanReadableNumBytes(max_file_size_limit)\n                   << \" disk space was found.\";\n    }\n  }\n\n  max_shard_file_size = max_file_size / threads;\n  if (max_shard_file_size < 256_MB) {\n    LOG(ERROR) << \"Max tiering file size is too small. Setting: \"\n               << HumanReadableNumBytes(max_file_size) << \" Required at least \"\n               << HumanReadableNumBytes(256_MB * threads) << \". Exiting..\";\n    exit(1);\n  }\n  LOG(INFO) << \"Max file size is: \" << HumanReadableNumBytes(max_file_size);\n\n  return max_shard_file_size;\n}\n\n}  // namespace\n\n/**\n\n\n  _____                _               ____   _                      _  ____         _\n | ____| _ __    __ _ (_) _ __    ___ / ___| | |__    __ _  _ __  __| |/ ___|   ___ | |_\n |  _|  | '_ \\  / _` || || '_ \\  / _ \\\\___ \\ | '_ \\  / _` || '__|/ _` |\\___ \\  / _ \\| __|\n | |___ | | | || (_| || || | | ||  __/ ___) || | | || (_| || |  | (_| | ___) ||  __/| |_\n |_____||_| |_| \\__, ||_||_| |_| \\___||____/ |_| |_| \\__,_||_|   \\__,_||____/  \\___| \\__|\n                |___/\n\n */\n\nEngineShardSet* shard_set = nullptr;\n\nvoid EngineShardSet::Init(uint32_t sz, std::function<void()> shard_handler) {\n  CHECK_EQ(0u, size());\n  CHECK(namespaces == nullptr);\n\n  shards_.reset(new EngineShard*[sz]);\n\n  size_ = sz;\n  size_t max_shard_file_size = GetTieredFileLimit(sz);\n  pp_->AwaitFiberOnAll([this](uint32_t index, ProactorBase* pb) {\n    if (index < size_) {\n      InitThreadLocal(pb);\n    }\n  });\n\n  // The order is important here. We must initialize namespaces after shards_.\n  namespaces = new Namespaces();\n\n  pp_->AwaitFiberOnAll([&](uint32_t index, ProactorBase* pb) {\n    if (index < size_) {\n      auto* shard = EngineShard::tlocal();\n      shard->InitTieredStorage(pb, max_shard_file_size);\n\n      // Must be last, as it accesses objects initialized above.\n      // We can not move shard_handler because this code is called multiple times.\n      shard->StartPeriodicHeartbeatFiber(pb);\n      shard->StartPeriodicShardHandlerFiber(pb, shard_handler);\n    }\n  });\n}\n\nvoid EngineShardSet::PreShutdown() {\n  RunBlockingInParallel([](EngineShard* shard) {\n    shard->StopPeriodicFiber();\n\n    // We must close tiered_storage before we destroy namespaces that own db slices.\n    if (shard->tiered_storage()) {\n      shard->tiered_storage()->Close();\n    }\n  });\n}\n\nvoid EngineShardSet::Shutdown() {\n  // Calling Namespaces::Clear before destroying engine shards, because it accesses them\n  // internally.\n  namespaces->Clear();\n  RunBlockingInParallel([](EngineShard*) { EngineShard::DestroyThreadLocal(); });\n\n  delete namespaces;\n  namespaces = nullptr;\n}\n\nvoid EngineShardSet::InitThreadLocal(ProactorBase* pb) {\n  EngineShard::InitThreadLocal(pb);\n  EngineShard* es = EngineShard::tlocal();\n  shards_[es->shard_id()] = es;\n}\n\nvoid EngineShardSet::TEST_EnableCacheMode() {\n  RunBlockingInParallel([](EngineShard* shard) {\n    namespaces->GetDefaultNamespace().GetCurrentDbSlice().TEST_EnableCacheMode();\n  });\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/engine_shard_set.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"server/engine_shard.h\"\n#include \"util/proactor_pool.h\"\n\nnamespace dfly {\n\nclass TieredStorage;\nclass ShardDocIndices;\nclass BlockingController;\nclass EngineShardSet;\n\nclass EngineShardSet {\n public:\n  struct CachedStats {\n    std::atomic_uint64_t used_memory;\n\n    CachedStats() : used_memory(0) {\n    }\n\n    CachedStats(const CachedStats& o) : used_memory(o.used_memory.load()) {\n    }\n  };\n\n  explicit EngineShardSet(util::ProactorPool* pp) : pp_(pp) {\n  }\n\n  uint32_t size() const {\n    return size_;\n  }\n\n  util::ProactorPool* pool() {\n    return pp_;\n  }\n\n  void Init(uint32_t size, std::function<void()> shard_handler);\n\n  // Shutdown sequence:\n  // - EngineShardSet.PreShutDown()\n  // - Namespaces.Clear()\n  // - EngineShardSet.Shutdown()\n  void PreShutdown();\n  void Shutdown();\n\n  // Uses a shard queue to dispatch. Callback runs in a dedicated fiber.\n  template <typename F> auto Await(ShardId sid, F&& f) {\n    return shards_[sid]->GetFiberQueue()->Await(std::forward<F>(f));\n  }\n\n  // Uses a shard queue to dispatch. Callback runs in a dedicated fiber.\n  template <typename F> auto Add(ShardId sid, F&& f) {\n    assert(sid < size_);\n    return shards_[sid]->GetFiberQueue()->Add(std::forward<F>(f));\n  }\n\n  template <typename F> auto AddL2(ShardId sid, F&& f) {\n    return shards_[sid]->GetSecondaryQueue()->Add(std::forward<F>(f));\n  }\n\n  // Runs a brief function on all shards. Waits for it to complete.\n  // `func` must not preempt.\n  template <typename U> void RunBriefInParallel(U&& func) const {\n    RunBriefInParallel(std::forward<U>(func), [](auto i) { return true; });\n  }\n\n  // Runs a brief function on selected shards. Waits for it to complete.\n  // `func` must not preempt.\n  template <typename U, typename P> void RunBriefInParallel(U&& func, P&& pred) const;\n\n  // Runs a possibly blocking function on all shards. Waits for it to complete.\n  template <typename U> void RunBlockingInParallel(U&& func) {\n    RunBlockingInParallel(std::forward<U>(func), [](auto i) { return true; });\n  }\n\n  // Runs a possibly blocking function on selected shards. Waits for it to complete.\n  template <typename U, typename P> void RunBlockingInParallel(U&& func, P&& pred);\n\n  // Runs func on all shards via the same shard queue that's been used by transactions framework.\n  // The functions running inside the shard queue run atomically (sequentially)\n  // with respect each other on the same shard.\n  template <typename U> void AwaitRunningOnShardQueue(U&& func) {\n    util::fb2::BlockingCounter bc(size_);\n    for (size_t i = 0; i < size_; ++i) {\n      Add(i, [&func, bc]() mutable {\n        func(EngineShard::tlocal());\n        bc->Dec();\n      });\n    }\n\n    bc->Wait();\n  }\n\n  // Used in tests\n  void TEST_EnableCacheMode();\n\n private:\n  void InitThreadLocal(util::ProactorBase* pb);\n  util::ProactorPool* pp_;\n  std::unique_ptr<EngineShard*[]> shards_;\n  uint32_t size_ = 0;\n};\n\ntemplate <typename U, typename P>\nvoid EngineShardSet::RunBriefInParallel(U&& func, P&& pred) const {\n  util::fb2::BlockingCounter bc{0};\n\n  for (uint32_t i = 0; i < size(); ++i) {\n    if (!pred(i))\n      continue;\n\n    bc->Add(1);\n    util::ProactorBase* dest = pp_->at(i);\n    dest->DispatchBrief([&func, bc]() mutable {\n      func(EngineShard::tlocal());\n      bc->Dec();\n    });\n  }\n  bc->Wait();\n}\n\ntemplate <typename U, typename P> void EngineShardSet::RunBlockingInParallel(U&& func, P&& pred) {\n  util::fb2::BlockingCounter bc{0};\n  static_assert(std::is_invocable_v<U, EngineShard*>,\n                \"Argument must be invocable EngineShard* as argument.\");\n  static_assert(std::is_void_v<std::invoke_result_t<U, EngineShard*>>,\n                \"Callable must not have a return value!\");\n\n  for (uint32_t i = 0; i < size(); ++i) {\n    if (!pred(i))\n      continue;\n\n    bc->Add(1);\n    util::ProactorBase* dest = pp_->at(i);\n\n    // the \"Dispatch\" call spawns a fiber underneath.\n    dest->Dispatch([&func, bc]() mutable {\n      func(EngineShard::tlocal());\n      bc->Dec();\n    });\n  }\n  bc->Wait();\n}\n\nShardId Shard(std::string_view v, ShardId shard_num);\n\n// absl::GetCurrentTimeNanos is twice faster than clock_gettime(CLOCK_REALTIME) on my laptop\n// and 4 times faster than on a VM. it takes 5-10ns to do a call.\n\nextern uint64_t TEST_current_time_ms;\n\ninline uint64_t GetCurrentTimeMs() {\n  return TEST_current_time_ms ? TEST_current_time_ms : absl::GetCurrentTimeNanos() / 1000000;\n}\n\ninline uint64_t GetCurrentTimeNs() {\n  return TEST_current_time_ms ? TEST_current_time_ms * 1000000 : absl::GetCurrentTimeNanos();\n}\n\nextern EngineShardSet* shard_set;\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/engine_shard_set_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/flags/reflection.h>\n#include <absl/strings/numbers.h>\n#include <absl/strings/str_split.h>\n#include <absl/strings/strip.h>\n#include <gmock/gmock.h>\n\n#include <map>\n#include <string>\n#include <string_view>\n#include <vector>\n\n#include \"base/flags.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"server/main_service.h\"\n#include \"server/test_utils.h\"\n\nABSL_DECLARE_FLAG(std::string, shard_round_robin_prefix);\n\nnamespace dfly {\nnamespace {\n\nusing namespace std;\nusing testing::Contains;\nusing testing::Pair;\n\nclass RoundRobinSharderTest : public BaseFamilyTest {\n protected:\n  RoundRobinSharderTest() {\n    absl::SetFlag(&FLAGS_shard_round_robin_prefix, \"RR:\");\n    SetTestFlag(\"cluster_mode\", \"emulated\");\n    ResetService();\n  }\n};\n\nTEST_F(RoundRobinSharderTest, RoundRobinShard) {\n  if (shard_set->size() < 2) {\n    GTEST_SKIP() << \"Can only test round robin with 2+ shards\";\n  }\n\n  Run({\"set\", \"{RR:key0}\", \"value\"});\n  EXPECT_THAT(GetShardKeyCount(), Contains(Pair(0, 1)));  // shard 0 has 1 key\n  EXPECT_THAT(GetShardKeyCount(), Contains(Pair(1, 0)));  // shard 1 has 0 keys\n\n  Run({\"set\", \"{RR:key1}\", \"value\"});\n  EXPECT_THAT(GetShardKeyCount(), Contains(Pair(0, 1)));  // shard 0 has 1 key\n  EXPECT_THAT(GetShardKeyCount(), Contains(Pair(1, 1)));  // shard 1 also has 1 key\n\n  Run({\"set\", \"{RR:key2}\", \"value\"});\n  if (shard_set->size() == 2) {\n    EXPECT_THAT(GetShardKeyCount(), Contains(Pair(0, 2)));\n    EXPECT_THAT(GetShardKeyCount(), Contains(Pair(1, 1)));\n  } else {\n    EXPECT_THAT(GetShardKeyCount(), Contains(Pair(0, 1)));\n    EXPECT_THAT(GetShardKeyCount(), Contains(Pair(1, 1)));\n    EXPECT_THAT(GetShardKeyCount(), Contains(Pair(2, 1)));\n  }\n}\n\n}  // namespace\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/error.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/error.h\"\n\n#include <absl/strings/str_cat.h>\n\nusing namespace std;\n\nnamespace dfly {\nnamespace rdb {\n\nclass error_category : public std::error_category {\n public:\n  const char* name() const noexcept final {\n    return \"dragonfly.rdbload\";\n  }\n\n  string message(int ev) const final;\n\n  error_condition default_error_condition(int ev) const noexcept final;\n\n  bool equivalent(int ev, const error_condition& condition) const noexcept final {\n    return condition.value() == ev && &condition.category() == this;\n  }\n\n  bool equivalent(const error_code& error, int ev) const noexcept final {\n    return error.value() == ev && &error.category() == this;\n  }\n};\n\nstring error_category::message(int ev) const {\n  switch (ev) {\n    case errc::wrong_signature:\n      return \"Wrong signature while trying to load from rdb file\";\n    case errc::out_of_memory:\n      return \"Out of memory, or used memory is too high\";\n    case errc::incorrect_snapshot_id:\n      return \"Snapshot id mismatch\";\n    default:\n      return absl::StrCat(\"Internal error when loading RDB file \", ev);\n      break;\n  }\n}\n\nerror_condition error_category::default_error_condition(int ev) const noexcept {\n  return error_condition{ev, *this};\n}\n\nstatic error_category rdb_category;\n\n}  // namespace rdb\n\nerror_code RdbError(rdb::errc ev) {\n  return error_code{static_cast<int>(ev), rdb::rdb_category};\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/error.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <atomic>\n#include <system_error>\n\n#include \"facade/error.h\"\n\nnamespace dfly {\n\nusing facade::kDbIndOutOfRangeErr;\nusing facade::kInvalidDbIndErr;\nusing facade::kInvalidIntErr;\nusing facade::kSyntaxErr;\nusing facade::kWrongTypeErr;\n\n#ifndef RETURN_ON_ERR\n\n#define RETURN_ON_ERR_T(T, x)                                          \\\n  do {                                                                 \\\n    std::error_code __ec = (x);                                        \\\n    if (__ec) {                                                        \\\n      DLOG(ERROR) << \"Error while calling \" #x \": \" << __ec.message(); \\\n      return (T)(__ec);                                                \\\n    }                                                                  \\\n  } while (0)\n\n#define RETURN_ON_ERR(x) RETURN_ON_ERR_T(std::error_code, x)\n\n#define RETURN_ON_GENERIC_ERR(x)                                   \\\n  do {                                                             \\\n    if (x) {                                                       \\\n      DLOG(ERROR) << \"Error while calling \" #x \": \" << x.Format(); \\\n      return x;                                                    \\\n    }                                                              \\\n  } while (0)\n\n#endif  // RETURN_ON_ERR\n\n#ifndef RETURN_ON_BAD_STATUS\n\n#define RETURN_ON_BAD_STATUS(x)  \\\n  do {                           \\\n    OpStatus __s = (x).status(); \\\n    if (__s != OpStatus::OK) {   \\\n      return __s;                \\\n    }                            \\\n  } while (0)\n\n#endif  // RETURN_ON_BAD_STATUS\n\n#ifndef GET_OR_SEND_UNEXPECTED\n\n#define GET_OR_SEND_UNEXPECTED(expr)        \\\n  ({                                        \\\n    auto expr_res = (expr);                 \\\n    if (!expr_res) {                        \\\n      builder->SendError(expr_res.error()); \\\n      return;                               \\\n    }                                       \\\n    std::move(expr_res).value();            \\\n  })\n\n#endif  // GET_OR_SEND_UNEXPECTED\n\nnamespace rdb {\n\nenum errc {\n  wrong_signature = 1,\n  bad_version = 2,\n  feature_not_supported = 3,\n  duplicate_key = 4,\n  rdb_file_corrupted = 5,\n  bad_checksum = 6,\n  bad_db_index = 7,\n  invalid_rdb_type = 8,\n  invalid_encoding = 9,\n  empty_key = 10,\n  out_of_memory = 11,\n  bad_json_string = 12,\n  unsupported_operation = 13,\n  value_expired = 14,  // applying to set and hmap\n  incorrect_snapshot_id = 15,\n};\n\n}  // namespace rdb\n\nstd::error_code RdbError(rdb::errc ev);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/execution_state.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/execution_state.h\"\n\n#include <absl/strings/str_cat.h>\n\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nGenericError::operator std::error_code() const {\n  return ec_;\n}\n\nGenericError::operator bool() const {\n  return bool(ec_) || !details_.empty();\n}\n\nstd::string GenericError::Format() const {\n  if (!ec_ && details_.empty())\n    return \"\";\n\n  if (details_.empty())\n    return ec_.message();\n  else if (!ec_)\n    return details_;\n  else\n    return absl::StrCat(ec_.message(), \": \", details_);\n}\n\nExecutionState::~ExecutionState() {\n  DCHECK(!err_handler_fb_.IsJoinable());\n  err_handler_fb_.JoinIfNeeded();\n}\n\nGenericError ExecutionState::GetError() const {\n  std::lock_guard lk(err_mu_);\n  return err_;\n}\n\nvoid ExecutionState::ReportCancelError() {\n  ReportError(std::make_error_code(errc::operation_canceled), \"ExecutionState cancelled\");\n}\n\nvoid ExecutionState::Reset(ErrHandler handler) {\n  util::fb2::Fiber fb;\n\n  unique_lock lk{err_mu_};\n  err_ = {};\n  err_handler_ = std::move(handler);\n  state_.store(State::RUN, std::memory_order_relaxed);\n  fb.swap(err_handler_fb_);\n  lk.unlock();\n  fb.JoinIfNeeded();\n}\n\nGenericError ExecutionState::SwitchErrorHandler(ErrHandler handler) {\n  std::lock_guard lk{err_mu_};\n  if (!err_) {\n    // No need to check for the error handler - it can't be running\n    // if no error is set.\n    err_handler_ = std::move(handler);\n  }\n  return err_;\n}\n\nvoid ExecutionState::JoinErrorHandler() {\n  util::fb2::Fiber fb;\n  unique_lock lk{err_mu_};\n  fb.swap(err_handler_fb_);\n  lk.unlock();\n  fb.JoinIfNeeded();\n}\n\nGenericError ExecutionState::ReportErrorInternal(GenericError&& err) {\n  if (IsCancelled()) {\n    LOG_IF(INFO, err != errc::operation_canceled) << err.Format();\n    return {};\n  }\n  lock_guard lk{err_mu_};\n  if (err_)\n    return err_;\n\n  err_ = std::move(err);\n\n  // This context is either new or was Reset, where the handler was joined\n  CHECK(!err_handler_fb_.IsJoinable());\n\n  LOG(WARNING) << \"ReportError: \" << err_.Format();\n\n  // We can move err_handler_ because it should run at most once.\n  if (err_handler_)\n    err_handler_fb_ = util::fb2::Fiber(\"report_internal_error\", std::move(err_handler_), err_);\n  state_.store(State::ERROR, std::memory_order_relaxed);\n  return err_;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/execution_state.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <atomic>\n#include <functional>\n#include <mutex>\n#include <string>\n#include <system_error>\n\n#include \"facade/facade_types.h\"\n#include \"util/fibers/fibers.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly {\n\n// AggregateValue is a thread safe utility to store the first\n// truthy value;\ntemplate <typename T> struct AggregateValue {\n  bool operator=(T val) {\n    std::lock_guard l{mu_};\n    if (!bool(current_) && bool(val)) {\n      current_ = val;\n    }\n    return bool(val);\n  }\n\n  T operator*() {\n    std::lock_guard l{mu_};\n    return current_;\n  }\n\n  operator bool() {\n    return bool(**this);\n  }\n\n  // Move out of value without critical section. Safe only when no longer in use.\n  T Destroy() && {\n    return std::move(current_);\n  }\n\n private:\n  util::fb2::Mutex mu_{};\n  T current_{};\n};\n\n// Thread safe utility to store the first non null error.\nusing AggregateError = AggregateValue<std::error_code>;\n\n// Thread safe utility to store the first non OK status.\nusing AggregateStatus = AggregateValue<facade::OpStatus>;\nstatic_assert(bool(facade::OpStatus::OK) == false,\n              \"Default initialization should be a falsy OK value\");\n\n// Error wrapper, that stores error_code and optional string message.\nclass GenericError {\n public:\n  GenericError() = default;\n  GenericError(std::error_code ec) : ec_{ec}, details_{} {\n  }\n  GenericError(std::string details) : ec_{}, details_{std::move(details)} {\n  }\n  GenericError(std::error_code ec, std::string details) : ec_{ec}, details_{std::move(details)} {\n  }\n\n  operator std::error_code() const;\n  operator bool() const;\n\n  std::string Format() const;  // Get string representation of error.\n\n private:\n  std::error_code ec_;\n  std::string details_;\n};\n\n// Thread safe utility to store the first non null generic error.\nusing AggregateGenericError = AggregateValue<GenericError>;\n\n// ExecutionState is a thread-safe utility for managing error reporting and cancellation for complex\n// tasks. There are 3 states: RUN, CANCELLED, ERROR RUN and CANCELLED are just a state without any\n// actions When report an error, only the first is stored, the next ones will be ignored. Then a\n// special error handler is run, if present, and the ExecutionState is ERROR. The error handler is\n// run in a separate handler to free up the caller.\n// If the state is CANCELLED all errors are ignored\n//\n// ReportCancelError() reporting an `errc::operation_canceled` error.\nclass ExecutionState {\n public:\n  using ErrHandler = std::function<void(const GenericError&)>;\n\n  ExecutionState() = default;\n  ExecutionState(ErrHandler err_handler) : err_handler_{std::move(err_handler)} {\n  }\n\n  ~ExecutionState();\n\n  // TODO Remove. This function was created to reduce size of the code that should be refactored\n  // Cancel() method should be used instead of this function\n  // Report a cancel error the context by submitting an `errc::operation_canceled` error.\n  // If the state is CANCELLED does nothing\n  void ReportCancelError();\n\n  bool IsRunning() const {\n    return state_.load(std::memory_order_relaxed) == State::RUN;\n  }\n\n  bool IsError() const {\n    return state_.load(std::memory_order_relaxed) == State::ERROR;\n  }\n\n  bool IsCancelled() const {\n    return state_.load(std::memory_order_relaxed) == State::CANCELLED;\n  }\n\n  void Cancel() {\n    state_.store(State::CANCELLED, std::memory_order_relaxed);\n  }\n\n  GenericError GetError() const;\n\n  // Report an error by submitting arguments for GenericError.\n  // If this is the first error that occured, then the error handler is run\n  // and the context state set to ERROR.\n  // If the state is CANCELLED does nothing\n  template <typename... T> GenericError ReportError(T&&... ts) {\n    return ReportErrorInternal(GenericError{std::forward<T>(ts)...});\n  }\n\n  // Wait for error handler to stop, reset error and state, assign new error handler.\n  void Reset(ErrHandler handler);\n\n  // Atomically replace the error handler if no error is present, and return the\n  // current stored error. This function can be used to transfer cleanup responsibility safely\n  //\n  // Beware, never do this manually in two steps. If you check the state,\n  // set the error handler and initialize resources, then the new error handler\n  // will never run if the context was cancelled between the first two steps.\n  GenericError SwitchErrorHandler(ErrHandler handler);\n\n  // If any error handler is running, wait for it to stop.\n  void JoinErrorHandler();\n\n private:\n  GenericError ReportErrorInternal(GenericError&& err);\n\n  enum class State { RUN, CANCELLED, ERROR };\n  std::atomic<State> state_{State::RUN};\n  GenericError err_;\n  ErrHandler err_handler_;\n  util::fb2::Fiber err_handler_fb_;\n\n  // We use regular mutexes to be able to call ReportError directly from I/O callbacks.\n  mutable std::mutex err_mu_;  // protects err_ and err_handler_\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/family_utils.cc",
    "content": "#include \"server/family_utils.h\"\n\n#include <absl/container/flat_hash_set.h>\n#include <absl/strings/str_cat.h>\n#include <xxhash.h>\n\n#include \"base/logging.h\"\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n#include \"redis/sds.h\"\n#include \"redis/stream.h\"\n#include \"redis/ziplist.h\"\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly {\n\nusing namespace std;\n\nnamespace {\n\nstruct ZiplistCbArgs {\n  long count = 0;\n  absl::flat_hash_set<string_view> fields;\n  unsigned char** lp;\n};\n\nint ZiplistPairsEntryConvertAndValidate(unsigned char* p, unsigned int head_count, void* userdata) {\n  unsigned char* str;\n  unsigned int slen;\n  long long vll;\n\n  ZiplistCbArgs* data = (ZiplistCbArgs*)userdata;\n\n  if (data->fields.empty()) {\n    data->fields.reserve(head_count / 2);\n  }\n\n  if (!ziplistGet(p, &str, &slen, &vll))\n    return 0;\n\n  if (((data->count) & 1) == 0) {\n    sds field = str ? sdsnewlen(str, slen) : sdsfromlonglong(vll);\n    auto [_, inserted] = data->fields.emplace(field, sdslen(field));\n    if (!inserted) {\n      sdsfree(field);\n      return 0;\n    }\n  }\n\n  if (str) {\n    *(data->lp) = lpAppend(*(data->lp), (unsigned char*)str, slen);\n  } else {\n    *(data->lp) = lpAppendInteger(*(data->lp), vll);\n  }\n\n  (data->count)++;\n  return 1;\n}\n\n}  // namespace\n\nstring XXH3_Digest(std::string_view s) {\n  uint64_t hash = XXH3_64bits(s.data(), s.size());\n  return absl::StrCat(absl::Hex(hash, absl::kZeroPad16));\n}\n\nsds WrapSds(std::string_view s) {\n  static thread_local sds tmp_sds = sdsempty();\n  return tmp_sds = sdscpylen(tmp_sds, s.data(), s.length());\n}\n\nNonUniquePicksGenerator::NonUniquePicksGenerator(RandomPick max_range) : max_range_(max_range) {\n  CHECK_GT(max_range, RandomPick(0));\n}\n\nRandomPick NonUniquePicksGenerator::Generate() {\n  return absl::Uniform(bitgen_, 0u, max_range_);\n}\n\nUniquePicksGenerator::UniquePicksGenerator(std::uint32_t picks_count, RandomPick max_range)\n    : remaining_picks_count_(picks_count), picked_indexes_(picks_count) {\n  CHECK_GE(max_range, picks_count);\n  current_random_limit_ = max_range - picks_count;\n}\n\nRandomPick UniquePicksGenerator::Generate() {\n  DCHECK_GT(remaining_picks_count_, 0u);\n\n  remaining_picks_count_--;\n\n  const RandomPick max_index = current_random_limit_++;\n  const RandomPick random_index = absl::Uniform(bitgen_, 0u, max_index + 1u);\n\n  const bool random_index_is_picked = picked_indexes_.emplace(random_index).second;\n  if (random_index_is_picked) {\n    return random_index;\n  }\n\n  picked_indexes_.insert(max_index);\n  return max_index;\n}\n\nstreamConsumer* StreamCreateConsumer(streamCG* cg, string_view name, uint64_t now_ms, int flags) {\n  DCHECK(cg);\n  DCHECK(!name.empty());\n  if (cg == NULL)\n    return NULL;\n\n  streamConsumer* consumer = (streamConsumer*)zmalloc(sizeof(*consumer));\n\n  int success =\n      raxTryInsert(cg->consumers, (unsigned char*)name.data(), name.size(), consumer, NULL);\n  if (!success) {\n    zfree(consumer);\n    return NULL;\n  }\n  consumer->name = sdsnewlen(name.data(), name.size());\n  consumer->pel = raxNew();\n  consumer->seen_time = now_ms;\n  consumer->active_time = -1;\n\n  return consumer;\n}\n\nint ZiplistPairsConvertAndValidateIntegrity(const uint8_t* zl, size_t size, unsigned char** lp) {\n  ZiplistCbArgs data;\n  data.lp = lp;\n\n  int ret = ziplistValidateIntegrity(const_cast<uint8_t*>(zl), size, 1,\n                                     ZiplistPairsEntryConvertAndValidate, &data);\n\n  if (data.count & 1)\n    ret = 0;\n\n  for (auto field : data.fields) {\n    sdsfree((sds)field.data());\n  }\n  return ret;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/family_utils.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_set.h>\n#include <absl/random/random.h>\n\n#include <cstdint>\n#include <string>\n#include <string_view>\n\n#include \"facade/facade_types.h\"\n#include \"server/engine_shard.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/table.h\"\n\nextern \"C\" {\n#include \"redis/sds.h\"\n}\n\ntypedef struct streamConsumer streamConsumer;\ntypedef struct streamCG streamCG;\n\nnamespace dfly {\n\n// Compute XXH3 hash and return as 16-character hex string\nstd::string XXH3_Digest(std::string_view s);\n\ntemplate <typename DenseSet>\nstd::vector<long> ExpireElements(DenseSet* owner, facade::CmdArgList values, uint32_t ttl_sec);\n\n// Copy str to thread local sds instance. Valid until next WrapSds call on thread\nsds WrapSds(std::string_view str);\n\nusing RandomPick = uint32_t;\n\nclass PicksGenerator {\n public:\n  virtual RandomPick Generate() = 0;\n  virtual ~PicksGenerator() = default;\n};\n\nclass NonUniquePicksGenerator : public PicksGenerator {\n public:\n  /* The generated value will be within the closed-open interval [0, max_range) */\n  NonUniquePicksGenerator(RandomPick max_range);\n\n  RandomPick Generate() override;\n\n private:\n  const RandomPick max_range_;\n  absl::BitGen bitgen_{};\n};\n\n/*\n * Generates unique index in O(1).\n *\n * picks_count specifies the number of random indexes to be generated.\n * In other words, this is the number of times the Generate() function is called.\n *\n * The class uses Robert Floyd's sampling algorithm\n * https://dl.acm.org/doi/pdf/10.1145/30401.315746\n * */\nclass UniquePicksGenerator : public PicksGenerator {\n public:\n  /* The generated value will be within the closed-open interval [0, max_range) */\n  UniquePicksGenerator(uint32_t picks_count, RandomPick max_range);\n\n  RandomPick Generate() override;\n\n private:\n  RandomPick current_random_limit_;\n  uint32_t remaining_picks_count_;\n  absl::flat_hash_set<RandomPick> picked_indexes_;\n  absl::BitGen bitgen_{};\n};\n\nstreamConsumer* StreamCreateConsumer(streamCG* cg, std::string_view name, uint64_t now_ms,\n                                     int flags);\n\n/* Use these methods to add or remove documents from the indexes for generic commands when the key\n * being modified could potentially be of type HSET or JSON. */\nvoid AddKeyToIndexesIfNeeded(std::string_view key, const DbContext& db_cntx, PrimeValue& pv,\n                             EngineShard* shard);\nvoid RemoveKeyFromIndexesIfNeeded(std::string_view key, const DbContext& db_cntx,\n                                  const PrimeValue& pv, EngineShard* shard);\n\n// Validate and convert field/value ziplist pairs into listpack.\n// Returns 1 on success, 0 on integrity failure.\nint ZiplistPairsConvertAndValidateIntegrity(const uint8_t* zl, size_t size, unsigned char** lp);\n\n// Returns true if this key type could potentially be indexed.\n// Or in other words, if the key is of type HSET or JSON.\nbool IsIndexedKeyType(const PrimeValue& pv);\n\n// Implementation\n/******************************************************************/\ntemplate <typename DenseSet>\ninline std::vector<long> ExpireElements(DenseSet* owner, facade::CmdArgList values,\n                                        uint32_t ttl_sec) {\n  std::vector<long> res;\n  res.reserve(values.size());\n\n  for (size_t i = 0; i < values.size(); i++) {\n    std::string_view field = facade::ToSV(values[i]);\n    auto it = owner->Find(field);\n    if (it != owner->end()) {\n      it.SetExpiryTime(ttl_sec);\n      res.emplace_back(ttl_sec == 0 ? 0 : 1);\n    } else {\n      res.emplace_back(-2);\n    }\n  }\n\n  return res;\n}\n\ninline void AddKeyToIndexesIfNeeded(std::string_view key, const DbContext& db_cntx, PrimeValue& pv,\n                                    EngineShard* shard) {\n  if (IsIndexedKeyType(pv)) {\n    shard->search_indices()->AddDoc(key, db_cntx, &pv);\n  }\n}\n\ninline void RemoveKeyFromIndexesIfNeeded(std::string_view key, const DbContext& db_cntx,\n                                         const PrimeValue& pv, EngineShard* shard) {\n  if (IsIndexedKeyType(pv)) {\n    shard->search_indices()->RemoveDoc(key, db_cntx, pv);\n  }\n}\n\ninline bool IsIndexedKeyType(const PrimeValue& pv) {\n  return pv.ObjType() == OBJ_HASH || pv.ObjType() == OBJ_JSON;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/generic_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/generic_family.h\"\n\n#include <absl/strings/ascii.h>\n#include <absl/strings/str_cat.h>\n\n#include <optional>\n\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/reply_builder.h\"\n\nextern \"C\" {\n#include \"redis/crc64.h\"\n}\n\n#include \"base/cycle_clock.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/glob_matcher.h\"\n#include \"core/qlist.h\"\n#include \"redis/rdb.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/blocking_controller.h\"\n#include \"server/cmd_support.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/container_utils.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/family_utils.h\"\n#include \"server/hset_family.h\"\n#include \"server/journal/journal.h\"\n#include \"server/namespaces.h\"\n#include \"server/rdb_extensions.h\"\n#include \"server/rdb_load.h\"\n#include \"server/rdb_save.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/set_family.h\"\n#include \"server/tiered_storage.h\"\n#include \"server/transaction.h\"\n#include \"util/fibers/fibers.h\"\n#include \"util/fibers/future.h\"\n#include \"util/varz.h\"\n\nABSL_FLAG(uint32_t, dbnum, 16, \"Number of databases\");\nABSL_FLAG(uint32_t, keys_output_limit, 8192, \"Maximum number of keys output by keys command\");\nABSL_FLAG(bool, unlink_experimental_async, true, \"If true, runs unlink command asynchronously.\");\n\nnamespace dfly {\nusing namespace std;\nusing namespace facade;\n\nnamespace {\n\nconstexpr uint32_t kMaxTtl = (1UL << 26);\nconstexpr size_t DUMP_FOOTER_SIZE = sizeof(uint64_t) + sizeof(uint16_t);  // version number and crc\n\nstd::optional<RdbVersion> GetRdbVersion(std::string_view msg, bool ignore_crc = false) {\n  if (msg.size() <= DUMP_FOOTER_SIZE) {\n    LOG(WARNING) << \"got restore payload that is too short - \" << msg.size();\n    return std::nullopt;\n  }\n\n  // The footer looks like this: version (2 bytes) | crc64 (8 bytes)\n  const std::uint8_t* footer =\n      reinterpret_cast<const std::uint8_t*>(msg.data()) + (msg.size() - DUMP_FOOTER_SIZE);\n  const RdbVersion version = (*(footer + 1) << 8 | (*footer));\n\n  if (version > RDB_VERSION) {\n    LOG(WARNING) << \"got restore payload with illegal version - supporting version up to \"\n                 << RDB_VERSION << \" got version \" << version;\n    return std::nullopt;\n  }\n\n  uint64_t expected_cs = absl::little_endian::Load64(footer + 2);  // skip the version\n\n  if (!ignore_crc) {\n    // Compute expected crc64 based on the actual data upto the expected crc64 field.\n    uint64_t actual_cs =\n        crc64(0, reinterpret_cast<const uint8_t*>(msg.data()), msg.size() - sizeof(uint64_t));\n\n    if (actual_cs != expected_cs) {\n      LOG(WARNING) << \"CRC check failed for restore command, expecting: \" << expected_cs << \" got \"\n                   << actual_cs;\n      return std::nullopt;\n    }\n  }\n\n  return version;\n}\n\ntemplate <typename It> int64_t GetExpireTime(const DbSlice& db_slice, const It& exp_it) {\n  if (!IsValid(exp_it))\n    return 0;\n\n  return db_slice.ExpireTime(exp_it->second);\n}\n\nclass InMemSource : public ::io::Source {\n public:\n  explicit InMemSource(std::string_view buf) : buf_(buf) {\n  }\n\n  ::io::Result<size_t> ReadSome(const iovec* v, uint32_t len) final;\n\n protected:\n  std::string_view buf_;\n  off_t offs_ = 0;\n};\n\n::io::Result<size_t> InMemSource::ReadSome(const iovec* v, uint32_t len) {\n  ssize_t read_total = 0;\n  while (size_t(offs_) < buf_.size() && len > 0) {\n    size_t read_sz = min<size_t>(buf_.size() - offs_, v->iov_len);\n    memcpy(v->iov_base, buf_.data() + offs_, read_sz);\n    read_total += read_sz;\n    offs_ += read_sz;\n\n    ++v;\n    --len;\n  }\n\n  return read_total;\n}\n\nclass RestoreArgs {\n private:\n  static constexpr int64_t NO_EXPIRATION = 0;\n\n  int64_t expiration_ = NO_EXPIRATION;\n  bool abs_time_ = false;\n  bool replace_ = false;  // if true, over-ride existing key\n  bool sticky_ = false;\n\n public:\n  RestoreArgs() = default;\n\n  RestoreArgs(int64_t expiration, bool abs_time, bool replace)\n      : expiration_(expiration), abs_time_(abs_time), replace_(replace) {\n  }\n\n  bool Replace() const {\n    return replace_;\n  }\n\n  bool Sticky() const {\n    return sticky_;\n  }\n\n  void SetSticky(bool sticky) {\n    sticky_ = sticky;\n  }\n\n  uint64_t ExpirationTime() const {\n    DCHECK_GE(expiration_, 0);\n    return expiration_;\n  }\n\n  bool Expired() const {\n    return expiration_ < 0;\n  }\n\n  bool HasExpiration() const {\n    return expiration_ != NO_EXPIRATION;\n  }\n\n  [[nodiscard]] bool UpdateExpiration(int64_t now_msec);\n\n  static OpResult<RestoreArgs> TryFrom(const CmdArgList& args);\n};\n\nclass RdbRestoreValue : protected RdbLoaderBase {\n public:\n  explicit RdbRestoreValue(RdbVersion rdb_version) {\n    rdb_version_ = rdb_version;\n  }\n\n  OpResult<DbSlice::ItAndUpdater> Add(string_view key, string_view payload, const DbContext& cntx,\n                                      const RestoreArgs& args, DbSlice* db_slice);\n\n private:\n  std::optional<OpaqueObj> Parse(io::Source* source);\n  int rdb_type_ = -1;\n};\n\nstd::optional<RdbLoaderBase::OpaqueObj> RdbRestoreValue::Parse(io::Source* source) {\n  src_ = source;\n  if (pending_read_.remaining == 0) {\n    io::Result<uint8_t> type_id = FetchType();\n    if (type_id && rdbIsObjectTypeDF(type_id.value())) {\n      rdb_type_ = *type_id;\n    }\n  }\n\n  if (rdb_type_ == -1) {\n    LOG(ERROR) << \"failed to load type id from the input stream or type id is invalid\";\n    return std::nullopt;\n  }\n\n  OpaqueObj obj;\n  error_code ec = ReadObj(rdb_type_, &obj);  // load the type from the input stream\n  if (ec) {\n    LOG(ERROR) << \"failed to load data for type id \" << rdb_type_;\n    return std::nullopt;\n  }\n\n  return std::optional<OpaqueObj>(std::move(obj));\n}\n\nOpResult<DbSlice::ItAndUpdater> RdbRestoreValue::Add(string_view key, string_view data,\n                                                     const DbContext& cntx, const RestoreArgs& args,\n                                                     DbSlice* db_slice) {\n  InMemSource data_src(data);\n  PrimeValue pv;\n  bool first_parse = true;\n  do {\n    auto opaque_res = Parse(&data_src);\n    if (!opaque_res) {\n      return OpStatus::INVALID_VALUE;\n    }\n\n    LoadConfig config;\n    if (first_parse) {\n      first_parse = false;\n    } else {\n      config.append = true;\n    }\n    if (pending_read_.remaining > 0) {\n      config.chunked = true;\n    }\n    config.reserve = pending_read_.reserve;\n\n    if (auto ec = FromOpaque(*opaque_res, config, &pv); ec) {\n      // Handle value_expired gracefully - all fields expired during deserialize\n      if (ec.value() == rdb::errc::value_expired) {\n        return OpStatus::SKIPPED;\n      }\n      // we failed - report and exit\n      LOG(WARNING) << \"error while trying to read data: \" << ec;\n      return OpStatus::INVALID_VALUE;\n    }\n  } while (pending_read_.remaining > 0);\n\n  auto res = db_slice->AddOrUpdate(cntx, key, std::move(pv), args.ExpirationTime());\n  if (res) {\n    res->it->first.SetSticky(args.Sticky());\n    AddKeyToIndexesIfNeeded(key, cntx, res->it->second, db_slice->shard_owner());\n  }\n  return res;\n}\n\n[[nodiscard]] bool RestoreArgs::UpdateExpiration(int64_t now_msec) {\n  if (HasExpiration()) {\n    int64_t ttl = abs_time_ ? expiration_ - now_msec : expiration_;\n    if (ttl > kMaxExpireDeadlineMs)\n      ttl = kMaxExpireDeadlineMs;\n\n    expiration_ = ttl < 0 ? -1 : ttl + now_msec;\n  }\n  return true;\n}\n\n// The structure that we are expecting is:\n// args[0] == \"key\"\n// args[1] == \"ttl\"\n// args[2] == serialized value (list of chars that are used for the actual restore).\n// args[3] .. args[n]: optional arguments that can be [REPLACE] [ABSTTL] [IDLETIME seconds]\n//            [FREQ frequency], in any order\nOpResult<RestoreArgs> RestoreArgs::TryFrom(const CmdArgList& args) {\n  RestoreArgs out_args;\n  string cur_arg{ArgS(args, 1)};  // extract ttl\n  if (!absl::SimpleAtoi(cur_arg, &out_args.expiration_) || (out_args.expiration_ < 0)) {\n    return OpStatus::INVALID_INT;\n  }\n\n  // the 3rd arg is the serialized value, so we are starting from one pass it\n  // Note that all these are actually optional\n  // note about the redis doc for this command: https://redis.io/commands/restore/\n  // the IDLETIME and FREQ are not required, but to make this the same as in redis\n  // we would parse them and ensure that they are correct, maybe later they will be used\n  int64_t idle_time = 0;\n\n  for (size_t i = 3; i < args.size(); ++i) {\n    cur_arg = absl::AsciiStrToUpper(ArgS(args, i));\n    bool additional = args.size() - i - 1 >= 1;\n    if (cur_arg == \"REPLACE\") {\n      out_args.replace_ = true;\n    } else if (cur_arg == \"ABSTTL\") {\n      out_args.abs_time_ = true;\n    } else if (cur_arg == \"STICK\") {\n      out_args.sticky_ = true;\n    } else if (cur_arg == \"IDLETIME\" && additional) {\n      ++i;\n      cur_arg = ArgS(args, i);\n      if (!absl::SimpleAtoi(cur_arg, &idle_time)) {\n        return OpStatus::INVALID_INT;\n      }\n      if (idle_time < 0) {\n        return OpStatus::SYNTAX_ERR;\n      }\n    } else if (cur_arg == \"FREQ\" && additional) {\n      ++i;\n      cur_arg = ArgS(args, i);\n      int freq = 0;\n      if (!absl::SimpleAtoi(cur_arg, &freq)) {\n        return OpStatus::INVALID_INT;\n      }\n      if (freq < 0 || freq > 255) {\n        return OpStatus::OUT_OF_RANGE;  // need to translate in this case\n      }\n    } else {\n      LOG(WARNING) << \"Got unknown command line option for restore '\" << cur_arg << \"'\";\n      return OpStatus::SYNTAX_ERR;\n    }\n  }\n  return out_args;\n}\n\nOpResult<string> DumpToString(string_view key, const PrimeValue& pv, const OpArgs& op_args) {\n  string str_res;\n\n  if (pv.IsExternal() && !pv.IsCool()) {\n    // TODO: consider moving blocking point to coordinator to avoid stalling shard queue\n    auto res =\n        ReadTieredString(op_args.db_cntx.db_index, key, pv, op_args.shard->tiered_storage()).Get();\n    if (!res.has_value())\n      return OpStatus::IO_ERROR;\n\n    // TODO: allow saving string directly without proxy object\n    str_res = RdbSerializerBase::DumpValue(PrimeValue{*res});\n  } else {\n    str_res = RdbSerializerBase::DumpValue(pv);\n  }\n\n  return {std::move(str_res)};\n}\n\nOpStatus OpPersist(const OpArgs& op_args, string_view key);\n\nclass Renamer {\n public:\n  Renamer(Transaction* t, std::string_view src_key, std::string_view dest_key, unsigned shard_count,\n          bool do_copy = false)\n      : transaction_(t),\n        src_key_(src_key),\n        dest_key_(dest_key),\n        src_sid_(Shard(src_key, shard_count)),\n        dest_sid_(Shard(dest_key, shard_count)),\n        do_copy_(do_copy) {\n  }\n\n  ErrorReply Rename(bool destination_should_not_exist);\n\n private:\n  void FetchData();\n  facade::OpStatus FinalizeRename();\n\n  bool KeyExists(Transaction* t, EngineShard* shard, std::string_view key) const;\n  void SerializeSrc(Transaction* t, EngineShard* shard);\n\n  OpStatus DelSrc(Transaction* t, EngineShard* shard);\n  OpStatus DeserializeDest(Transaction* t, EngineShard* shard);\n\n  struct SerializedValue {\n    std::string value;\n    std::optional<RdbVersion> version;\n    int64_t expire_ts;\n    bool sticky;\n  };\n\n  Transaction* const transaction_;\n\n  const std::string_view src_key_;\n  const std::string_view dest_key_;\n  const ShardId src_sid_;\n  const ShardId dest_sid_;\n\n  bool src_found_ = false;\n  bool dest_found_ = false;\n  bool do_copy_ = false;\n\n  OpResult<SerializedValue> serialized_value_;\n};\n\nErrorReply Renamer::Rename(bool destination_should_not_exist) {\n  FetchData();\n\n  if (!src_found_) {\n    transaction_->Conclude();\n    return OpStatus::KEY_NOTFOUND;\n  }\n\n  if (serialized_value_.status() != OpStatus::OK) {\n    transaction_->Conclude();\n    return serialized_value_.status();\n  }\n\n  if (!serialized_value_->version) {\n    transaction_->Conclude();\n    return ErrorReply{kInvalidDumpValueErr};\n  }\n\n  if (dest_found_ && destination_should_not_exist) {\n    transaction_->Conclude();\n    return OpStatus::KEY_EXISTS;\n  }\n\n  return FinalizeRename();\n}\n\nvoid Renamer::FetchData() {\n  auto cb = [this](Transaction* t, EngineShard* shard) {\n    auto args = t->GetShardArgs(shard->shard_id());\n    DCHECK(1 == args.Size() || do_copy_);\n\n    const ShardId shard_id = shard->shard_id();\n\n    if (shard_id == src_sid_) {\n      SerializeSrc(t, shard);\n    }\n\n    if (shard_id == dest_sid_) {\n      dest_found_ = KeyExists(t, shard, dest_key_);\n    }\n\n    return OpStatus::OK;\n  };\n\n  transaction_->Execute(std::move(cb), false);\n}\n\nOpStatus Renamer::FinalizeRename() {\n  OpStatus del_status = OpStatus::OK;\n  OpStatus deserialize_status = OpStatus::OK;\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    const ShardId shard_id = shard->shard_id();\n\n    if (!do_copy_ && shard_id == src_sid_) {\n      del_status = DelSrc(t, shard);\n    } else if (shard_id == dest_sid_) {\n      deserialize_status = DeserializeDest(t, shard);\n    }\n    return OpStatus::OK;\n  };\n\n  transaction_->Execute(std::move(cb), true);\n\n  LOG_IF(DFATAL,\n         (deserialize_status != OpStatus::OK && deserialize_status != OpStatus::OUT_OF_MEMORY) ||\n             del_status != OpStatus::OK)\n      << \"Error during rename command, deserialize_status: \" << deserialize_status\n      << \" del_status: \" << del_status;\n  return deserialize_status != OpStatus::OK ? deserialize_status : del_status;\n}\n\nbool Renamer::KeyExists(Transaction* t, EngineShard* shard, std::string_view key) const {\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  auto it = db_slice.FindReadOnly(t->GetDbContext(), key).it;\n  return IsValid(it);\n}\n\nvoid Renamer::SerializeSrc(Transaction* t, EngineShard* shard) {\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  auto [it, exp_it] = db_slice.FindReadOnly(t->GetDbContext(), src_key_);\n\n  src_found_ = IsValid(it);\n  if (!src_found_) {\n    return;\n  }\n\n  OpResult<string> res = DumpToString(src_key_, it->second, t->GetOpArgs(shard));\n  if (res.ok()) {\n    optional rdb_version = GetRdbVersion(*res);\n    int64_t exp_time = it->first.GetExpireTime();\n    serialized_value_ =\n        SerializedValue{std::move(*res), rdb_version, exp_time, it->first.IsSticky()};\n  } else {\n    serialized_value_ = res.status();\n  }\n}\n\nOpStatus Renamer::DelSrc(Transaction* t, EngineShard* shard) {\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  auto res = db_slice.FindMutable(t->GetDbContext(), src_key_);\n  auto& it = res.it;\n\n  CHECK(IsValid(it));\n\n  DVLOG(1) << \"Rename: removing the key '\" << src_key_;\n\n  db_slice.DelMutable(t->GetDbContext(), std::move(res));\n  if (shard->journal()) {\n    RecordJournal(t->GetOpArgs(shard), \"DEL\"sv, ArgSlice{src_key_}, 2);\n  }\n\n  return OpStatus::OK;\n}\n\nOpStatus Renamer::DeserializeDest(Transaction* t, EngineShard* shard) {\n  DCHECK(serialized_value_);  // Verified in FetchData\n\n  OpArgs op_args = t->GetOpArgs(shard);\n  RestoreArgs restore_args{serialized_value_->expire_ts, true, true};\n\n  if (!restore_args.UpdateExpiration(op_args.db_cntx.time_now_ms)) {\n    return OpStatus::OUT_OF_RANGE;\n  }\n\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  auto dest_res = db_slice.FindMutable(op_args.db_cntx, dest_key_);\n\n  if (dest_found_) {\n    DVLOG(1) << \"Rename: deleting the destiny key '\" << dest_key_;\n    db_slice.DelMutable(op_args.db_cntx, std::move(dest_res));\n  }\n\n  if (restore_args.Expired()) {\n    VLOG(1) << \"Rename: the new key '\" << dest_key_ << \"' already expired, will not save the value\";\n\n    if (dest_found_ && shard->journal()) {  // We need to delete old dest_key_ from replica\n      RecordJournal(op_args, \"DEL\"sv, ArgSlice{dest_key_}, 2);\n    }\n\n    return OpStatus::OK;\n  }\n\n  restore_args.SetSticky(serialized_value_->sticky);\n\n  RdbRestoreValue loader(serialized_value_->version.value());\n  auto add_res =\n      loader.Add(dest_key_, serialized_value_->value, op_args.db_cntx, restore_args, &db_slice);\n\n  if (!add_res) {\n    // SKIPPED means all fields expired during deserialize - treat as success\n    if (add_res.status() == OpStatus::SKIPPED) {\n      if (dest_found_ && shard->journal()) {\n        RecordJournal(op_args, \"DEL\"sv, ArgSlice{dest_key_}, 2);\n      }\n      return OpStatus::OK;\n    }\n    return add_res.status();\n  }\n\n  LOG_IF(DFATAL, !add_res->is_new)\n      << \"Unexpected override for key \" << dest_key_ << \" \" << dest_found_;\n  auto bc = op_args.db_cntx.ns->GetBlockingController(op_args.shard->shard_id());\n  if (bc) {\n    bc->Awaken(t->GetDbIndex(), dest_key_);\n  }\n\n  if (shard->journal()) {\n    auto expire_str = absl::StrCat(serialized_value_->expire_ts);\n\n    absl::InlinedVector<std::string_view, 6> args(\n        {dest_key_, expire_str, serialized_value_->value, \"REPLACE\"sv, \"ABSTTL\"sv});\n    if (serialized_value_->sticky) {\n      args.push_back(\"STICK\"sv);\n    }\n\n    RecordJournal(op_args, \"RESTORE\"sv, args, 2);\n  }\n\n  return OpStatus::OK;\n}\n\nOpStatus OpPersist(const OpArgs& op_args, string_view key) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto res = db_slice.FindMutable(op_args.db_cntx, key);\n\n  if (!IsValid(res.it)) {\n    return OpStatus::KEY_NOTFOUND;\n  } else {\n    bool cleared = db_slice.RemoveExpire(op_args.db_cntx.db_index, res.it);\n    return cleared ? OpStatus::OK : OpStatus::SKIPPED;\n  }\n}\n\nOpResult<std::string> OpDump(const OpArgs& op_args, string_view key) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto [it, _] = db_slice.FindReadOnly(op_args.db_cntx, key);\n\n  if (IsValid(it))\n    return DumpToString(key, it->second, op_args);\n  else\n    return OpStatus::KEY_NOTFOUND;\n}\n\nOpStatus OpRestore(const OpArgs& op_args, std::string_view key, std::string_view payload,\n                   RestoreArgs restore_args, RdbVersion rdb_version) {\n  if (!restore_args.UpdateExpiration(op_args.db_cntx.time_now_ms)) {\n    return OpStatus::OUT_OF_RANGE;\n  }\n\n  auto& db_slice = op_args.GetDbSlice();\n  bool found_prev = false;\n\n  // The redis impl (see cluster.c function restoreCommand), remove the old key if\n  // the replace option is set, so lets do the same here\n  {\n    auto res = db_slice.FindMutable(op_args.db_cntx, key);\n    if (IsValid(res.it)) {\n      found_prev = true;\n      if (restore_args.Replace()) {\n        VLOG(1) << \"restore command is running with replace, found old key '\" << key\n                << \"' and removing it\";\n        db_slice.DelMutable(op_args.db_cntx, std::move(res));\n      } else {\n        // we are not allowed to replace it.\n        return OpStatus::KEY_EXISTS;\n      }\n    }\n  }\n\n  if (restore_args.Expired()) {\n    VLOG(1) << \"the new key '\" << key << \"' already expired, will not save the value\";\n    return OpStatus::OK;\n  }\n\n  RdbRestoreValue loader(rdb_version);\n  auto add_res = loader.Add(key, payload, op_args.db_cntx, restore_args, &db_slice);\n  LOG_IF(DFATAL, add_res && !add_res->is_new)\n      << \"Unexpected override for key \" << key << \", found previous \" << found_prev\n      << \" override: \" << restore_args.Replace()\n      << \", type: \" << ObjTypeToString(add_res->it->second.ObjType());\n\n  return add_res.status();\n}\n\nbool ScanCb(const OpArgs& op_args, PrimeIterator prime_it, const ScanOpts& opts, StringVec* res) {\n  auto& db_slice = op_args.GetDbSlice();\n\n  DbSlice::Iterator it = DbSlice::Iterator::FromPrime(prime_it);\n  if (prime_it->first.HasExpire()) {\n    it = db_slice.ExpireIfNeeded(op_args.db_cntx, it).it;\n    if (!IsValid(it))\n      return false;\n  }\n\n  bool matches = !opts.type_filter || it->second.ObjType() == opts.type_filter;\n  if (opts.mask.has_value()) {\n    if (opts.mask == ScanOpts::Mask::Volatile) {\n      matches &= it->first.HasExpire();\n    } else if (opts.mask == ScanOpts::Mask::Permanent) {\n      matches &= !it->first.HasExpire();\n    } else if (opts.mask == ScanOpts::Mask::Accessed) {\n      matches &= it->first.WasTouched();\n    } else if (opts.mask == ScanOpts::Mask::Untouched) {\n      matches &= !it->first.WasTouched();\n    }\n  }\n  if (!matches)\n    return false;\n\n  if (opts.min_malloc_size > 0 && it->second.MallocUsed() < opts.min_malloc_size) {\n    return false;\n  }\n\n  if (opts.bucket_id != UINT_MAX && opts.bucket_id != it.GetInnerIt().bucket_id()) {\n    return false;\n  }\n\n  if (!opts.Matches(it.key())) {\n    return false;\n  }\n  res->emplace_back(it.key());\n\n  return true;\n}\n\nvoid OpScan(const OpArgs& op_args, const ScanOpts& scan_opts, uint64_t* cursor, StringVec* vec) {\n  auto& db_slice = op_args.GetDbSlice();\n  DCHECK(db_slice.IsDbValid(op_args.db_cntx.db_index));\n\n  // ScanCb can preempt due to journaling expired entries and we need to make sure that\n  // we enter the callback in a timing when journaling will not cause preemption. Otherwise,\n  // the bucket might change as we Traverse and yield.\n  db_slice.GetLatch()->Wait();\n\n  // Disable flush journal changes to prevent preemtion in traverse.\n  journal::DisableFlushGuard journal_flush_guard(op_args.shard->journal());\n  unsigned cnt = 0;\n\n  VLOG(1) << \"PrimeTable \" << db_slice.shard_id() << \"/\" << op_args.db_cntx.db_index << \" has \"\n          << db_slice.DbSize(op_args.db_cntx.db_index);\n\n  PrimeTable::Cursor cur{*cursor};\n  auto* prime_table = db_slice.GetTables(op_args.db_cntx.db_index).first;\n\n  const auto start_cycles = base::CycleClock::Now();\n\n  // Don't allow it to monopolize cpu time.\n  // Approximately 30 microseconds.\n  const uint64_t timeout_cycles = base::CycleClock::Frequency() >> 15;\n\n  do {\n    cur = prime_table->Traverse(\n        cur, [&](PrimeIterator it) { cnt += ScanCb(op_args, it, scan_opts, vec); });\n  } while (cur && cnt < scan_opts.limit &&\n           (base::CycleClock::Now() - start_cycles) < timeout_cycles);\n\n  VLOG(1) << \"OpScan \" << db_slice.shard_id() << \" cursor: \" << cur.token();\n  *cursor = cur.token();\n}\n\nuint64_t ScanGeneric(uint64_t cursor, const ScanOpts& scan_opts, StringVec* keys,\n                     ConnectionContext* cntx) {\n  ShardId sid = cursor % 1024;\n\n  EngineShardSet* ess = shard_set;\n  unsigned shard_count = ess->size();\n  constexpr uint64_t kMaxScanTimeMs = 50;\n\n  // Dash table returns a cursor with its right byte empty. We will use it\n  // for encoding shard index. For now scan has a limitation of 255 shards.\n  CHECK_LT(shard_count, 1024u);\n\n  if (sid >= shard_count) {  // protection\n    return 0;\n  }\n\n  cursor >>= 10;\n  DbContext db_cntx{cntx->ns, cntx->conn_state.db_index, GetCurrentTimeMs()};\n\n  do {\n    auto cb = [&] {\n      OpArgs op_args{EngineShard::tlocal(), nullptr, db_cntx};\n      OpScan(op_args, scan_opts, &cursor, keys);\n    };\n\n    // Avoid deadlocking, if called from shard queue script\n    if (EngineShard::tlocal() && EngineShard::tlocal()->shard_id() == sid) {\n      cb();\n      util::ThisFiber::Yield();\n    } else {\n      ess->Await(sid, cb);\n    }\n\n    if (cursor == 0) {\n      ++sid;\n      if (unsigned(sid) == shard_count)\n        break;\n    }\n\n    // Break after kMaxScanTimeMs.\n    uint64_t time_now_ms = GetCurrentTimeMs();\n    if (time_now_ms > db_cntx.time_now_ms + kMaxScanTimeMs) {\n      break;\n    }\n  } while (keys->size() < scan_opts.limit);\n\n  if (sid < shard_count) {\n    cursor = (cursor << 10) | sid;\n  } else {\n    DCHECK_EQ(0u, cursor);\n  }\n\n  return cursor;\n}\n\nvoid OpScanAndDelete(const OpArgs& op_args, const ScanOpts& scan_opts, uint64_t* cursor,\n                     uint32_t* deleted) {\n  StringVec keys;\n  OpScan(op_args, scan_opts, cursor, &keys);\n\n  auto& db_slice = op_args.GetDbSlice();\n  uint32_t count = 0;\n  for (const auto& key : keys) {\n    auto it = db_slice.FindMutable(op_args.db_cntx, key).it;\n    if (!IsValid(it))\n      continue;\n    db_slice.Del(op_args.db_cntx, it);\n    if (op_args.shard->journal()) {\n      RecordDelete(op_args.db_cntx.db_index, key);\n    }\n    ++count;\n  }\n  *deleted += count;\n}\n\nuint64_t RmGeneric(uint64_t cursor, const ScanOpts& scan_opts, uint32_t* deleted,\n                   ConnectionContext* cntx) {\n  ShardId sid = cursor % 1024;\n\n  EngineShardSet* ess = shard_set;\n  unsigned shard_count = ess->size();\n  constexpr uint64_t kMaxRmTimeMs = 100;\n\n  CHECK_LT(shard_count, 1024u);\n\n  if (sid >= shard_count) {\n    return 0;\n  }\n\n  cursor >>= 10;\n  DbContext db_cntx{cntx->ns, cntx->conn_state.db_index, GetCurrentTimeMs()};\n\n  *deleted = 0;\n\n  do {\n    auto cb = [&] {\n      OpArgs op_args{EngineShard::tlocal(), nullptr, db_cntx};\n      OpScanAndDelete(op_args, scan_opts, &cursor, deleted);\n    };\n\n    if (EngineShard::tlocal() && EngineShard::tlocal()->shard_id() == sid) {\n      cb();\n      util::ThisFiber::Yield();\n    } else {\n      ess->Await(sid, cb);\n    }\n\n    if (cursor == 0) {\n      ++sid;\n      if (unsigned(sid) == shard_count)\n        break;\n    }\n\n    uint64_t time_now_ms = GetCurrentTimeMs();\n    if (time_now_ms > db_cntx.time_now_ms + kMaxRmTimeMs) {\n      break;\n    }\n  } while (*deleted < scan_opts.limit);\n\n  if (sid < shard_count) {\n    cursor = (cursor << 10) | sid;\n  } else {\n    DCHECK_EQ(0u, cursor);\n  }\n\n  return cursor;\n}\n\nOpStatus OpExpire(const OpArgs& op_args, string_view key, const DbSlice::ExpireParams& params) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto find_res = db_slice.FindMutable(op_args.db_cntx, key);\n  if (!IsValid(find_res.it)) {\n    return OpStatus::KEY_NOTFOUND;\n  }\n\n  find_res.post_updater.Run();\n  auto res = db_slice.UpdateExpire(op_args.db_cntx, find_res.it, find_res.exp_it, params);\n\n  // If the value was deleted, replicate as DEL.\n  // Else, replicate as PEXPIREAT with exact time.\n  if (op_args.shard->journal() && res.ok()) {\n    if (res.value() == -1) {\n      RecordJournal(op_args, \"DEL\"sv, ArgSlice{key});\n    } else {\n      auto time = absl::StrCat(res.value());\n      // Note: Don't forget to change this when adding arguments to expire commands.\n      RecordJournal(op_args, \"PEXPIREAT\"sv, ArgSlice{key, time});\n    }\n  }\n\n  return res.status();\n}\n\n#ifdef WITH_COLLECTION_CMDS\nOpResult<vector<long>> OpFieldExpire(const OpArgs& op_args, string_view key, uint32_t ttl_sec,\n                                     CmdArgList values) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto [it, expire_it, auto_updater, is_new] = db_slice.FindMutable(op_args.db_cntx, key);\n\n  if (!IsValid(it) || (it->second.ObjType() != OBJ_SET && it->second.ObjType() != OBJ_HASH)) {\n    std::vector<long> res(values.size(), -2);\n    return res;\n  }\n\n  PrimeValue* pv = &it->second;\n  if (pv->ObjType() == OBJ_SET) {\n    return SetFamily::SetFieldsExpireTime(op_args, ttl_sec, values, pv);\n  } else {\n    return HSetFamily::SetFieldsExpireTime(op_args, ttl_sec, ExpireFlags::EXPIRE_ALWAYS, key,\n                                           values, pv);\n  }\n}\n\n// returns -2 if the key was not found, -3 if the field was not found,\n// -1 if ttl on the field was not found.\nOpResult<long> OpFieldTtl(Transaction* t, EngineShard* shard, string_view key, string_view field) {\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  const DbContext& db_cntx = t->GetDbContext();\n  auto [it, expire_it] = db_slice.FindReadOnly(db_cntx, key);\n  if (!IsValid(it))\n    return -2;\n\n  if (it->second.ObjType() != OBJ_SET && it->second.ObjType() != OBJ_HASH)\n    return OpStatus::WRONG_TYPE;\n\n  int32_t res = -1;\n  if (it->second.ObjType() == OBJ_SET) {\n    res = SetFamily::FieldExpireTime(db_cntx, it->second, field);\n  } else {\n    DCHECK_EQ(OBJ_HASH, it->second.ObjType());\n    res = HSetFamily::FieldExpireTime(db_cntx, it->second, field);\n  }\n  return res <= 0 ? res : int32_t(res - MemberTimeSeconds(db_cntx.time_now_ms));\n}\n#else\nOpResult<vector<long>> OpFieldExpire(const OpArgs& op_args, string_view key, uint32_t ttl_sec,\n                                     CmdArgList values) {\n  return OpStatus::SKIPPED;\n}\nOpResult<long> OpFieldTtl(Transaction* t, EngineShard* shard, string_view key, string_view field) {\n  return OpStatus::SKIPPED;\n}\n\n#endif\n\nOpResult<uint32_t> OpStick(const OpArgs& op_args, const ShardArgs& keys) {\n  DVLOG(1) << \"Stick: \" << keys.Front();\n\n  auto& db_slice = op_args.GetDbSlice();\n\n  uint32_t res = 0;\n  for (string_view key : keys) {\n    auto find_res = db_slice.FindMutable(op_args.db_cntx, key);\n    if (IsValid(find_res.it) && !find_res.it->first.IsSticky()) {\n      find_res.it->first.SetSticky(true);\n      ++res;\n    }\n  }\n\n  return res;\n}\n\nOpResult<uint64_t> OpExpireTime(Transaction* t, EngineShard* shard, string_view key) {\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  auto [it, expire_it] = db_slice.FindReadOnly(t->GetDbContext(), key);\n  if (!IsValid(it))\n    return OpStatus::KEY_NOTFOUND;\n\n  if (!it->first.HasExpire())\n    return OpStatus::SKIPPED;\n\n  int64_t ttl_ms = it->first.GetExpireTime();\n  DCHECK_GT(ttl_ms, 0);  // Otherwise FindReadOnly would return null.\n  return ttl_ms;\n}\n\n// OpMove touches multiple databases (op_args.db_idx, target_db), so it assumes it runs\n// as a global transaction.\n// TODO: Allow running OpMove without a global transaction.\nOpStatus OpMove(const OpArgs& op_args, string_view key, DbIndex target_db) {\n  auto& db_slice = op_args.GetDbSlice();\n\n  // Fetch value at key in current db.\n  auto from_res = db_slice.FindMutable(op_args.db_cntx, key);\n  if (!IsValid(from_res.it))\n    return OpStatus::KEY_NOTFOUND;\n\n  // Ensure target database exists.\n  db_slice.ActivateDb(target_db);\n\n  // Fetch value at key in target db.\n  DbContext target_cntx = op_args.db_cntx;\n  target_cntx.db_index = target_db;\n  auto to_res = db_slice.FindReadOnly(target_cntx, key);\n  if (IsValid(to_res.it))\n    return OpStatus::KEY_EXISTS;\n\n  bool sticky = from_res.it->first.IsSticky();\n  uint64_t exp_ts = from_res.it->first.GetExpireTime();\n  from_res.post_updater.Run();\n  PrimeValue from_obj = std::move(from_res.it->second);\n\n  db_slice.Del(op_args.db_cntx, from_res.it);\n  auto op_result = db_slice.AddNew(target_cntx, key, std::move(from_obj), exp_ts);\n  RETURN_ON_BAD_STATUS(op_result);\n  auto& add_res = *op_result;\n  add_res.it->first.SetSticky(sticky);\n\n  auto bc = op_args.db_cntx.ns->GetBlockingController(op_args.shard->shard_id());\n  if (add_res.it->second.ObjType() == OBJ_LIST && bc) {\n    bc->Awaken(target_db, key);\n  }\n\n  return OpStatus::OK;\n}\n\nOpResult<void> OpRen(const OpArgs& op_args, string_view from_key, string_view to_key,\n                     bool destination_should_not_exist) {\n  auto* es = op_args.shard;\n  auto& db_slice = op_args.GetDbSlice();\n  auto from_res = db_slice.FindMutable(op_args.db_cntx, from_key);\n  if (!IsValid(from_res.it))\n    return OpStatus::KEY_NOTFOUND;\n\n  if (from_key == to_key)\n    return destination_should_not_exist ? OpStatus::KEY_EXISTS : OpStatus::OK;\n\n  bool is_prior_list = false;\n  auto to_res = db_slice.FindMutable(op_args.db_cntx, to_key);\n  if (IsValid(to_res.it)) {\n    if (destination_should_not_exist)\n      return OpStatus::KEY_EXISTS;\n\n    RemoveKeyFromIndexesIfNeeded(to_key, op_args.db_cntx, to_res.it->second, op_args.shard);\n    is_prior_list = (to_res.it->second.ObjType() == OBJ_LIST);\n  }\n\n  // Delete the \"from\" document from the search index before deleting from the database\n  RemoveKeyFromIndexesIfNeeded(from_key, op_args.db_cntx, from_res.it->second, op_args.shard);\n\n  bool sticky = from_res.it->first.IsSticky();\n  uint64_t exp_ts = from_res.it->first.GetExpireTime();\n  from_res.post_updater.ReduceHeapUsage();\n\n  // we keep the value we want to move.\n  PrimeValue from_obj = std::move(from_res.it->second);\n\n  if (IsValid(to_res.it)) {\n    to_res.post_updater.ReduceHeapUsage();\n    to_res.it->second = std::move(from_obj);\n\n    if (exp_ts) {\n      db_slice.AddExpire(op_args.db_cntx.db_index, to_res.it, exp_ts);\n    } else {\n      db_slice.RemoveExpire(op_args.db_cntx.db_index, to_res.it);\n    }\n\n    to_res.it->first.SetSticky(sticky);\n    to_res.post_updater.Run();\n\n    db_slice.DelMutable(op_args.db_cntx, std::move(from_res));\n  } else {\n    // Here we first delete from_it because AddNew below could invalidate from_it.\n    // On the other hand, AddNew does not rely on the iterators - this is why we keep\n    // the value in `from_obj`.\n    db_slice.DelMutable(op_args.db_cntx, std::move(from_res));\n    auto op_result = db_slice.AddNew(op_args.db_cntx, to_key, std::move(from_obj), exp_ts);\n    RETURN_ON_BAD_STATUS(op_result);\n    to_res = std::move(*op_result);\n    to_res.it->first.SetSticky(sticky);\n  }\n\n  AddKeyToIndexesIfNeeded(to_key, op_args.db_cntx, to_res.it->second, op_args.shard);\n\n  auto bc = op_args.db_cntx.ns->GetBlockingController(es->shard_id());\n  if (!is_prior_list && to_res.it->second.ObjType() == OBJ_LIST && bc) {\n    bc->Awaken(op_args.db_cntx.db_index, to_key);\n  }\n  return OpStatus::OK;\n}\n\nOpResult<uint64_t> OpTtl(Transaction* t, EngineShard* shard, string_view key) {\n  auto opExpireTimeResult = OpExpireTime(t, shard, key);\n\n  if (opExpireTimeResult) {\n    auto now = t->GetDbContext().time_now_ms;\n    DCHECK_GT(now, 0u);\n\n    int64_t ttl_ms = opExpireTimeResult.value() - now;\n    DCHECK_GT(ttl_ms, 0);  // Otherwise FindReadOnly would return null.\n    return ttl_ms;\n  } else {\n    return opExpireTimeResult;\n  }\n}\n\nErrorReply RenameGeneric(CmdArgList args, bool destination_should_not_exist, Transaction* tx) {\n  string_view key[2] = {ArgS(args, 0), ArgS(args, 1)};\n\n  if (tx->GetUniqueShardCnt() == 1) {\n    tx->ReviveAutoJournal();  // Safe to use RENAME with single shard\n    auto cb = [&](Transaction* t, EngineShard* shard) {\n      return OpRen(t->GetOpArgs(shard), key[0], key[1], destination_should_not_exist);\n    };\n    OpResult<void> result = tx->ScheduleSingleHopT(std::move(cb));\n\n    return result.status();\n  }\n\n  Renamer renamer{tx, key[0], key[1], shard_set->size()};\n  return renamer.Rename(destination_should_not_exist);\n}\n\nvoid ExpireTimeGeneric(CmdArgList args, TimeUnit unit, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) { return OpExpireTime(t, shard, key); };\n  OpResult<uint64_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  if (result) {\n    long ttl = (unit == TimeUnit::SEC) ? (result.value() + 500) / 1000 : result.value();\n    cmd_cntx->SendLong(ttl);\n    return;\n  }\n\n  switch (result.status()) {\n    case OpStatus::KEY_NOTFOUND:\n      cmd_cntx->SendLong(-2);\n      break;\n    default:\n      LOG_IF(ERROR, result.status() != OpStatus::SKIPPED)\n          << \"Unexpected status \" << result.status();\n      cmd_cntx->SendLong(-1);\n      break;\n  }\n}\n\nvoid TtlGeneric(CmdArgList args, TimeUnit unit, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) { return OpTtl(t, shard, key); };\n  OpResult<uint64_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  if (result) {\n    long ttl = (unit == TimeUnit::SEC) ? (result.value() + 500) / 1000 : result.value();\n    cmd_cntx->SendLong(ttl);\n    return;\n  }\n\n  switch (result.status()) {\n    case OpStatus::KEY_NOTFOUND:\n      cmd_cntx->SendLong(-2);\n      break;\n    default:\n      LOG_IF(ERROR, result.status() != OpStatus::SKIPPED)\n          << \"Unexpected status \" << result.status();\n      cmd_cntx->SendLong(-1);\n      break;\n  }\n}\n\nio::Result<int32_t, string> ParseExpireOptionsOrReply(const CmdArgList args) {\n  int32_t flags = ExpireFlags::EXPIRE_ALWAYS;\n  for (auto& arg : args) {\n    string arg_sv = absl::AsciiStrToUpper(ToSV(arg));\n    if (arg_sv == \"NX\") {\n      flags |= ExpireFlags::EXPIRE_NX;\n    } else if (arg_sv == \"XX\") {\n      flags |= ExpireFlags::EXPIRE_XX;\n    } else if (arg_sv == \"GT\") {\n      flags |= ExpireFlags::EXPIRE_GT;\n    } else if (arg_sv == \"LT\") {\n      flags |= ExpireFlags::EXPIRE_LT;\n    } else {\n      return nonstd::make_unexpected(absl::StrCat(\"Unsupported option: \", arg_sv));\n    }\n  }\n\n  if ((flags & ExpireFlags::EXPIRE_NX) && (flags & ExpireFlags::EXPIRE_XX)) {\n    return nonstd::make_unexpected(\"NX and XX options at the same time are not compatible\");\n  }\n  if ((flags & ExpireFlags::EXPIRE_GT) && (flags & ExpireFlags::EXPIRE_LT)) {\n    return nonstd::make_unexpected(\"GT and LT options at the same time are not compatible\");\n  }\n  return flags;\n}\n\n}  // namespace\n\nOpResult<uint32_t> GenericFamily::OpDel(const OpArgs& op_args, const ShardArgs& keys, bool async) {\n  DVLOG(1) << \"Del: \" << keys.Front() << \" async: \" << async;\n  auto& db_slice = op_args.GetDbSlice();\n\n  uint32_t res = 0;\n\n  for (string_view key : keys) {\n    auto it = db_slice.FindMutable(op_args.db_cntx, key).it;  // post_updater will run immediately\n    if (!IsValid(it))\n      continue;\n\n    db_slice.Del(op_args.db_cntx, it, nullptr, async);\n    ++res;\n  }\n\n  return res;\n}\n\nstatic cmd::CmdR CmdDel(CmdArgList args, CommandContext* cmd_cntx) {\n  bool async_unlink =\n      cmd_cntx->cid()->name() == \"UNLINK\" && absl::GetFlag(FLAGS_unlink_experimental_async);\n\n  std::atomic_uint32_t result = 0;\n  auto cb = [&](Transaction* tx, EngineShard* es) {\n    auto args = tx->GetShardArgs(es->shard_id());\n    auto op_args = tx->GetOpArgs(es);\n    auto res = GenericFamily::OpDel(op_args, args, async_unlink);\n    result.fetch_add(res.value_or(0), memory_order_relaxed);\n    return OpStatus::OK;\n  };\n\n  co_await cmd::SingleHop(cb);\n  uint32_t del_cnt = result.load(memory_order_relaxed);\n\n  auto* rb = cmd_cntx->rb();\n  if (cmd_cntx->mc_command()) {\n    MCRender mc_render{cmd_cntx->mc_command()->cmd_flags};\n    rb->SendSimpleString(del_cnt ? mc_render.RenderDeleted() : mc_render.RenderNotFound());\n  } else {\n    rb->SendLong(del_cnt);\n  }\n  co_return std::nullopt;\n}\n\nvoid GenericFamily::Delex(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  // Parse optional condition\n  enum class Condition : uint8_t { NONE, IFEQ, IFNE, IFDEQ, IFDNE };\n  Condition cond = Condition::NONE;\n  string_view compare_value;\n\n  if (args.size() == 1) {\n    // DELEX key - no condition, behaves like DEL\n    cond = Condition::NONE;\n  } else if (args.size() == 2) {\n    // DELEX key <something> - invalid, needs both condition and value\n    // TODO: include error type in error reply\n    return cmd_cntx->SendError(facade::WrongNumArgsError(\"DELEX\"), kSyntaxErrType);\n  } else if (args.size() == 3) {\n    string_view opt = ArgS(args, 1);\n    compare_value = ArgS(args, 2);\n\n    if (absl::EqualsIgnoreCase(opt, \"IFEQ\")) {\n      cond = Condition::IFEQ;\n    } else if (absl::EqualsIgnoreCase(opt, \"IFNE\")) {\n      cond = Condition::IFNE;\n    } else if (absl::EqualsIgnoreCase(opt, \"IFDEQ\")) {\n      cond = Condition::IFDEQ;\n    } else if (absl::EqualsIgnoreCase(opt, \"IFDNE\")) {\n      cond = Condition::IFDNE;\n    } else {\n      return cmd_cntx->SendError(facade::UnknownSubCmd(opt, \"DELEX\"), kSyntaxErrType);\n    }\n  } else {\n    // args.size() > 3\n    return cmd_cntx->SendError(facade::WrongNumArgsError(\"DELEX\"), kSyntaxErrType);\n  }\n\n  // If no condition, delegate to standard DEL\n  if (cond == Condition::NONE) {\n    CmdDel(args, cmd_cntx);\n    return;\n  }\n\n  auto compare_str = [&](string_view val) {\n    bool is_digest = (cond == Condition::IFDEQ || cond == Condition::IFDNE);\n\n    if (is_digest) {\n      string dig = XXH3_Digest(val);\n      return (dig == compare_value) == (cond == Condition::IFDEQ);\n    }\n    return (val == compare_value) == (cond == Condition::IFEQ);\n  };\n\n  // Execute conditional delete\n  auto cb = [key, compare_str](Transaction* tx, EngineShard* es) -> OpResult<uint32_t> {\n    auto& db_slice = tx->GetDbSlice(es->shard_id());\n    auto it_res = db_slice.FindMutable(tx->GetDbContext(), key, OBJ_STRING);\n\n    // Key doesn't exist\n    if (!it_res.ok()) {\n      if (it_res.status() == OpStatus::KEY_NOTFOUND)\n        return 0;\n      return it_res.status();\n    }\n\n    // Get the value\n    const PrimeValue& pv = it_res->it->second;\n    // Check condition\n    bool should_delete = false;\n\n    if (pv.IsExternal()) {\n      util::fb2::Future<io::Result<bool>> fut = ReadTiered<bool>(\n          tx->GetDbIndex(), key, pv, [&](string_view val) { return compare_str(val); },\n          es->tiered_storage());\n\n      auto result = fut.Get();\n      if (!result)\n        // Tiered storage read failed - return generic I/O error\n        return OpStatus::IO_ERROR;\n      should_delete = *result;\n    } else {\n      should_delete = compare_str(pv.ToString());\n    }\n\n    // Delete if condition is met\n    if (should_delete) {\n      db_slice.DelMutable(tx->GetDbContext(), std::move(*it_res));\n      return 1;\n    }\n\n    return 0;\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n\n  if (result) {\n    cmd_cntx->SendLong(*result);\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid GenericFamily::Ping(CmdArgList args, CommandContext* cmd_cntx) {\n  if (args.size() > 1) {\n    return cmd_cntx->SendError(facade::WrongNumArgsError(\"ping\"), kSyntaxErrType);\n  }\n\n  string_view msg;\n\n  // If a client in the subscribe state and in resp2 mode, it returns an array for some reason.\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (cmd_cntx->server_conn_cntx()->conn_state.subscribe_info && !rb->IsResp3()) {\n    if (args.size() == 1) {\n      msg = ArgS(args, 0);\n    }\n\n    auto replier = [msg = string(msg)](RedisReplyBuilder* rb) {\n      string_view resp[2] = {\"pong\", msg};\n      rb->SendBulkStrArr(resp);\n    };\n    return cmd_cntx->ReplyWith(std::move(replier));\n  }\n\n  if (args.size() == 0) {\n    return cmd_cntx->SendSimpleString(\"PONG\");\n  }\n\n  msg = ArgS(args, 0);\n  DVLOG(2) << \"Ping \" << msg;\n\n  auto replier = [msg = string(msg)](RedisReplyBuilder* rb) { rb->SendBulkString(msg); };\n  return cmd_cntx->ReplyWith(std::move(replier));\n}\n\nvoid GenericFamily::Exists(CmdArgList args, CommandContext* cmd_cntx) {\n  VLOG(1) << \"Exists \" << ArgS(args, 0);\n\n  atomic_uint32_t result{0};\n\n  auto cb = [&result](Transaction* t, EngineShard* shard) {\n    ShardArgs args = t->GetShardArgs(shard->shard_id());\n    auto res = OpExists(t->GetOpArgs(shard), args);\n    result.fetch_add(res.value_or(0), memory_order_relaxed);\n\n    return OpStatus::OK;\n  };\n\n  OpStatus status = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  CHECK_EQ(OpStatus::OK, status);\n\n  return cmd_cntx->SendLong(result.load(memory_order_acquire));\n}\n\nvoid GenericFamily::Persist(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) { return OpPersist(t->GetOpArgs(shard), key); };\n\n  OpStatus status = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  cmd_cntx->SendLong(status == OpStatus::OK);\n}\n\nvoid GenericFamily::Expire(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view sec = ArgS(args, 1);\n  int64_t int_arg;\n\n  if (!absl::SimpleAtoi(sec, &int_arg)) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n\n  int_arg = std::max<int64_t>(int_arg, -1);\n\n  // silently cap the expire time to kMaxExpireDeadlineSec which is more than 8 years.\n  if (int_arg > kMaxExpireDeadlineSec) {\n    int_arg = kMaxExpireDeadlineSec;\n  }\n\n  auto expire_options = ParseExpireOptionsOrReply(args.subspan(2));\n  if (!expire_options) {\n    return cmd_cntx->SendError(expire_options.error());\n  }\n  DbSlice::ExpireParams params{.value = int_arg, .expire_options = expire_options.value()};\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpExpire(t->GetOpArgs(shard), key, params);\n  };\n\n  OpStatus status = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  cmd_cntx->SendLong(status == OpStatus::OK);\n}\n\nvoid GenericFamily::ExpireAt(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view sec = ArgS(args, 1);\n  int64_t int_arg;\n\n  if (!absl::SimpleAtoi(sec, &int_arg)) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n\n  int_arg = std::max<int64_t>(int_arg, 0L);\n  auto expire_options = ParseExpireOptionsOrReply(args.subspan(2));\n  if (!expire_options) {\n    return cmd_cntx->SendError(expire_options.error());\n  }\n  DbSlice::ExpireParams params{\n      .value = int_arg, .absolute = true, .expire_options = expire_options.value()};\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpExpire(t->GetOpArgs(shard), key, params);\n  };\n  OpStatus status = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n\n  if (status == OpStatus::OUT_OF_RANGE) {\n    return cmd_cntx->SendError(kExpiryOutOfRange);\n  }\n\n  cmd_cntx->SendLong(status == OpStatus::OK);\n}\n\nvoid GenericFamily::Keys(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view pattern(ArgS(args, 0));\n  uint64_t cursor = 0;\n\n  StringVec keys;\n\n  ScanOpts scan_opts;\n  if (pattern != \"*\") {\n    scan_opts.matcher.reset(new GlobMatcher{pattern, true});\n  }\n\n  scan_opts.limit = 512;\n  auto output_limit = absl::GetFlag(FLAGS_keys_output_limit);\n\n  do {\n    cursor = ScanGeneric(cursor, scan_opts, &keys, cmd_cntx->server_conn_cntx());\n  } while (cursor != 0 && keys.size() < output_limit);\n\n  auto replier = [keys = std::move(keys)](RedisReplyBuilder* rb) { rb->SendBulkStrArr(keys); };\n  return cmd_cntx->ReplyWith(std::move(replier));\n}\n\nvoid GenericFamily::PexpireAt(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view msec = ArgS(args, 1);\n  int64_t int_arg;\n\n  if (!absl::SimpleAtoi(msec, &int_arg)) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n\n  int_arg = std::max<int64_t>(int_arg, 0L);\n  auto expire_options = ParseExpireOptionsOrReply(args.subspan(2));\n  if (!expire_options) {\n    return cmd_cntx->SendError(expire_options.error());\n  }\n  DbSlice::ExpireParams params{.value = int_arg,\n                               .unit = TimeUnit::MSEC,\n                               .absolute = true,\n                               .expire_options = expire_options.value()};\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpExpire(t->GetOpArgs(shard), key, params);\n  };\n  OpStatus status = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n\n  if (status == OpStatus::OUT_OF_RANGE) {\n    return cmd_cntx->SendError(kExpiryOutOfRange);\n  } else {\n    cmd_cntx->SendLong(status == OpStatus::OK);\n  }\n}\n\nvoid GenericFamily::Pexpire(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view msec = ArgS(args, 1);\n  int64_t int_arg;\n\n  if (!absl::SimpleAtoi(msec, &int_arg)) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n  int_arg = std::max<int64_t>(int_arg, -1);\n\n  // to be more compatible with redis, we silently cap the expire time to kMaxExpireDeadlineSec\n  if (int_arg > kMaxExpireDeadlineMs) {\n    int_arg = kMaxExpireDeadlineMs;\n  }\n\n  auto expire_options = ParseExpireOptionsOrReply(args.subspan(2));\n  if (!expire_options) {\n    return cmd_cntx->SendError(expire_options.error());\n  }\n  DbSlice::ExpireParams params{\n      .value = int_arg, .unit = TimeUnit::MSEC, .expire_options = expire_options.value()};\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpExpire(t->GetOpArgs(shard), key, params);\n  };\n  OpStatus status = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n\n  if (status == OpStatus::OUT_OF_RANGE) {\n    return cmd_cntx->SendError(kExpiryOutOfRange);\n  }\n  cmd_cntx->SendLong(status == OpStatus::OK);\n}\n\nvoid GenericFamily::Stick(CmdArgList args, CommandContext* cmd_cntx) {\n  Transaction* transaction = cmd_cntx->tx();\n  VLOG(1) << \"Stick \" << ArgS(args, 0);\n\n  atomic_uint32_t result{0};\n\n  auto cb = [&result](const Transaction* t, EngineShard* shard) {\n    ShardArgs args = t->GetShardArgs(shard->shard_id());\n    auto res = OpStick(t->GetOpArgs(shard), args);\n    result.fetch_add(res.value_or(0), memory_order_relaxed);\n\n    return OpStatus::OK;\n  };\n\n  OpStatus status = transaction->ScheduleSingleHop(std::move(cb));\n  CHECK_EQ(OpStatus::OK, status);\n\n  DVLOG(2) << \"Stick ts \" << transaction->txid();\n\n  uint32_t match_cnt = result.load(memory_order_relaxed);\n  cmd_cntx->SendLong(match_cnt);\n}\n\nstruct SortEntryBase {\n  string key;\n  const string* bound_value = nullptr;\n  vector<string> get_values;  // Stores fetched GET pattern values\n\n  void BindValue(const std::string* value) {\n    bound_value = value;\n  }\n\n  std::string_view ResultKey() const {\n    if (bound_value) {\n      return *bound_value;\n    }\n    return key;\n  }\n};\n\n// Used to conditionally store double score\nstruct SortEntryScore : public SortEntryBase {\n  double score;\n};\n\n// SortEntry stores all data required for sorting\ntemplate <bool ALPHA>\nstruct SortEntry\n    // Store score only if we need it\n    : public std::conditional_t<ALPHA, SortEntryBase, SortEntryScore> {\n  bool Parse(string&& item) {\n    if constexpr (!ALPHA) {\n      if (!absl::SimpleAtod(item, &this->score)) {\n        if (!item.empty()) {\n          return false;\n        }\n        this->score = 0;\n      }\n      if (std::isnan(this->score)) {\n        return false;\n      }\n    }\n    this->key = std::move(item);\n    return true;\n  }\n\n  bool Parse(int64_t item) {\n    if constexpr (!ALPHA) {\n      this->score = item;\n    }\n    this->key = absl::StrCat(item);\n    return true;\n  }\n\n  static bool less(const SortEntry& l, const SortEntry& r) {\n    if constexpr (!ALPHA) {\n      if (l.score < r.score) {\n        return true;\n      } else if (r.score < l.score) {\n        return false;\n      }\n      // to prevent unstrict order we compare values lexicographically\n    }\n    return l.key < r.key;\n  }\n\n  static bool greater(const SortEntry& l, const SortEntry& r) {\n    return less(r, l);\n  }\n};\n\n// std::variant of all possible vectors of SortEntries\nusing SortEntryList = std::variant<\n    // Used when sorting by double values\n    std::vector<SortEntry<false>>,\n    // Used when sorting by string values\n    std::vector<SortEntry<true>>>;\n\n// Create SortEntryList based on runtime arguments\nSortEntryList MakeSortEntryList(bool alpha) {\n  if (alpha)\n    return SortEntryList{std::vector<SortEntry<true>>{}};\n  else\n    return SortEntryList{std::vector<SortEntry<false>>{}};\n}\n\n// Iterate over container with generic function that accepts strings and ints\ntemplate <typename F> bool Iterate(const PrimeValue& pv, F&& func) {\n  switch (pv.ObjType()) {\n    case OBJ_LIST:\n      return container_utils::IterateList(pv, func);\n    case OBJ_SET:\n      return container_utils::IterateSet(pv, func);\n    case OBJ_ZSET:\n      return container_utils::IterateSortedSet(\n          pv, [&](container_utils::ContainerEntry ce, double) { return func(ce); });\n    default:\n      return false;\n  }\n}\n\n// Create a SortEntryList from given key\nOpResult<CompactObjType> OpFetchSortEntries(const OpArgs& op_args, std::string_view key,\n                                            SortEntryList* dest) {\n  using namespace container_utils;\n\n  auto it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key).it;\n  if (!IsValid(it)) {\n    return OpStatus::KEY_NOTFOUND;\n  }\n  if (!IsContainer(it->second)) {\n    return OpStatus::WRONG_TYPE;\n  }\n\n  bool success = std::visit(\n      [&pv = it->second](auto& entries) {\n        entries.reserve(pv.Size());\n        return Iterate(pv, [&entries](const ContainerEntry& entry) {\n          if (entry.IsString())\n            return entries.emplace_back().Parse(entry.ToString());\n          else\n            return entries.emplace_back().Parse(entry.as_long());\n        });\n      },\n      *dest);\n  if (!success)\n    return OpStatus::INVALID_NUMERIC_RESULT;\n\n  return it->second.ObjType();\n}\n\n// Fetch container elements as strings (for BY pattern support)\nOpResult<pair<vector<string>, CompactObjType>> OpFetchContainerElements(const OpArgs& op_args,\n                                                                        std::string_view key) {\n  using namespace container_utils;\n\n  auto it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key).it;\n  if (!IsValid(it)) {\n    return OpStatus::KEY_NOTFOUND;\n  }\n  if (!IsContainer(it->second)) {\n    return OpStatus::WRONG_TYPE;\n  }\n\n  vector<string> elements;\n  elements.reserve(it->second.Size());\n\n  Iterate(it->second, [&elements](const ContainerEntry& entry) {\n    elements.emplace_back(entry.ToString());\n    return true;\n  });\n\n  return std::make_pair(std::move(elements), it->second.ObjType());\n}\n\n// Fetch a string value from a key (for BY pattern lookups)\n// TODO: does not support tiering.\nstring OpFetchStringValue(const OpArgs& op_args, std::string_view key) {\n  auto it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key).it;\n  if (!IsValid(it) || it->second.ObjType() != OBJ_STRING) {\n    return {};  // Missing key defaults to empty string\n  }\n\n  return it->second.ToString();\n}\n\ntemplate <typename IteratorBegin, typename IteratorEnd>\nOpResult<uint32_t> OpStore(const OpArgs& op_args, std::string_view key, IteratorBegin&& start_it,\n                           IteratorEnd&& end_it, bool has_get_patterns) {\n  uint32_t len = 0;\n\n  // If we are about to overwrite an existing indexed document (HASH/JSON),\n  // remove it from search indices first to avoid duplicate entries.\n  auto existing = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key).it;\n  if (IsValid(existing)) {\n    RemoveKeyFromIndexesIfNeeded(key, op_args.db_cntx, existing->second, op_args.shard);\n  }\n\n  QList* ql_v2 = CompactObj::AllocateMR<QList>();\n  QList::Where where = QList::TAIL;\n  for (auto it = start_it; it != end_it; ++it) {\n    if (has_get_patterns) {\n      // Store all GET pattern values for this entry\n      for (const auto& value : it->get_values) {\n        ql_v2->Push(value, where);\n      }\n    } else {\n      // No GET patterns - store the element itself\n      ql_v2->Push(it->ResultKey(), where);\n    }\n  }\n  len = ql_v2->Size();\n\n  PrimeValue pv;\n  pv.InitRobj(OBJ_LIST, kEncodingQL2, ql_v2);\n\n  // This would overwrite existing value if any with new list.\n  auto op_res = op_args.GetDbSlice().AddOrUpdate(op_args.db_cntx, key, std::move(pv), 0);\n  RETURN_ON_BAD_STATUS(op_res);\n\n  return len;\n}\n\nstruct SortParams {\n  bool alpha = false;\n  bool reversed = false;\n  bool is_read_only = false;\n  bool to_sort = true;\n\n  optional<string_view> store_key;\n\n  // first is offset, second is count\n  optional<pair<uint32_t, uint32_t>> bounds;\n\n  // These options are parsed but currently not fully supported or used by the visitor.\n  optional<string_view> by_pattern;\n  vector<string_view> get_patterns;\n};\n\ntemplate <typename C>\nauto GetSortRange(const C& entries, const optional<pair<uint32_t, uint32_t>>& bounds) {\n  auto start_it = entries.begin();\n  auto end_it = entries.end();\n  if (bounds) {\n    start_it += std::min<uint32_t>(bounds->first, entries.size());\n    end_it = entries.begin() + std::min<uint32_t>(bounds->first + bounds->second, entries.size());\n  }\n\n  return std::make_pair(start_it, end_it);\n};\n\n// Generic GET pattern fetcher that abstracts element access and result storage.\n// Handles pattern expansion, shard distribution, and parallel fetching.\n// Special pattern \"#\" returns the element value itself.\n// Uses \"read uncommitted\" isolation - fetches values across shards without transaction guarantees.\n//\n// Template parameters:\n//   ElementContainer: Container type holding elements (e.g., vector<string>, vector<SortEntry>)\n//   ElementAccessor: Callable that returns string_view for element at index: (size_t) ->\n//   string_view ResultSetter: Callable that stores fetched value: (size_t elem_idx, size_t\n//   pattern_idx, string value) -> void\ntemplate <typename ElementContainer, typename ElementAccessor, typename ResultSetter>\nvoid FetchGetPatternValues(const SortParams& params, const DbContext& db_cntx,\n                           const ElementContainer& elements, ElementAccessor get_element_key,\n                           ResultSetter set_result) {\n  if (params.get_patterns.empty())\n    return;\n\n  // Build a list of all external keys to fetch, organized by shard\n  // Structure: keys_by_shard[shard_id] = [(elem_idx, pattern_idx, ext_key), ...]\n  vector<vector<tuple<size_t, size_t, string>>> keys_by_shard(shard_set->size());\n\n  // Build external keys for each element and pattern\n  for (size_t elem_idx = 0; elem_idx < elements.size(); ++elem_idx) {\n    for (size_t pattern_idx = 0; pattern_idx < params.get_patterns.size(); ++pattern_idx) {\n      std::string_view pattern = params.get_patterns[pattern_idx];\n\n      if (pattern == \"#\") {\n        // Special pattern - return the element itself, no external fetch needed\n        set_result(elem_idx, pattern_idx, string(get_element_key(elem_idx)));\n        continue;\n      }\n\n      // Build external key by replacing '*' with the actual element value\n      size_t star_pos = pattern.find('*');\n      string ext_key;\n      if (star_pos == std::string_view::npos) {\n        // No asterisk - use pattern as literal key\n        ext_key = string(pattern);\n      } else {\n        ext_key = absl::StrCat(pattern.substr(0, star_pos), get_element_key(elem_idx),\n                               pattern.substr(star_pos + 1));\n      }\n\n      ShardId sid = Shard(ext_key, shard_set->size());\n      keys_by_shard[sid].emplace_back(elem_idx, pattern_idx, std::move(ext_key));\n    }\n  }\n\n  // Fetch all external keys in parallel across shards\n  shard_set->RunBlockingInParallel([&](EngineShard* shard) {\n    ShardId sid = shard->shard_id();\n    for (const auto& [elem_idx, pattern_idx, ext_key] : keys_by_shard[sid]) {\n      string value = OpFetchStringValue({shard, nullptr, db_cntx}, ext_key);\n      set_result(elem_idx, pattern_idx, std::move(value));\n    }\n  });\n}\n\n// Fetches external keys referenced by GET patterns and fills the get_values in sort entries.\n// For each entry, fetches values for all GET patterns. Special pattern \"#\" returns the element\n// itself. Uses \"read uncommitted\" isolation - fetches values across shards without transaction\n// guarantees.\ntemplate <bool ALPHA>\nOpStatus PopulateGetPatternValues(const SortParams& params, const DbContext& db_cntx,\n                                  std::vector<SortEntry<ALPHA>>* entries) {\n  DCHECK(!params.get_patterns.empty());\n\n  // Pre-allocate get_values for each entry\n  for (auto& entry : *entries) {\n    entry.get_values.resize(params.get_patterns.size());\n  }\n\n  // Use generic fetcher with lambdas to access ResultKey() and store in entry.get_values\n  FetchGetPatternValues(\n      params, db_cntx, *entries,\n      [&](size_t idx) -> std::string_view { return (*entries)[idx].ResultKey(); },\n      [&](size_t entry_idx, size_t pattern_idx, string value) {\n        (*entries)[entry_idx].get_values[pattern_idx] = std::move(value);\n      });\n\n  return OpStatus::OK;\n}\n\n// Visitor to handle the actual sorting and reply generation\nstruct SortVisitor {\n  const SortParams& params;\n  CompactObjType result_type;\n  CommandContext* cmd_cntx;\n  vector<string> raw_elements;\n\n  template <typename T> void operator()(T& entries) {\n    using value_t = typename std::decay_t<decltype(entries)>::value_type;\n    auto cmp = params.reversed ? &value_t::greater : &value_t::less;\n\n    DCHECK(params.to_sort);\n\n    DVLOG(2) << \"Sorting \" << entries.size() << \" elements\";\n\n    // Sort logic\n    if (params.bounds) {\n      auto sort_it =\n          entries.begin() +\n          std::min<uint32_t>(params.bounds->first + params.bounds->second, entries.size());\n      std::partial_sort(entries.begin(), sort_it, entries.end(), cmp);\n    } else {\n      std::sort(entries.begin(), entries.end(), cmp);\n    }\n\n    // Fetch GET pattern values if needed\n    if (!params.get_patterns.empty()) {\n      ConnectionContext* cntx = cmd_cntx->server_conn_cntx();\n      DbContext db_cntx{cntx->ns, cntx->db_index(), GetCurrentTimeMs()};\n      PopulateGetPatternValues(params, db_cntx, &entries);\n    }\n\n    if (!params.store_key) {\n      bool is_set = (result_type == OBJ_SET || result_type == OBJ_ZSET);\n      bool has_get_patterns = !params.get_patterns.empty();\n      auto replier = [entries = std::move(entries), bounds = params.bounds, is_set,\n                      has_get_patterns,\n                      raw_elements = std::move(raw_elements)](RedisReplyBuilder* rb) {\n        DVLOG(2) << \"Replying with sorted entries, count: \" << entries.size();\n        auto [start_it, end_it] = GetSortRange(entries, bounds);\n\n        size_t num_entries = std::distance(start_it, end_it);\n        size_t collection_size = has_get_patterns && !entries.empty()\n                                     ? num_entries * entries.front().get_values.size()\n                                     : num_entries;\n\n        rb->StartCollection(collection_size, is_set ? CollectionType::SET : CollectionType::ARRAY);\n\n        for (auto it = start_it; it != end_it; ++it) {\n          if (has_get_patterns && !it->get_values.empty()) {\n            // Send all GET pattern values for this entry\n            for (const auto& value : it->get_values) {\n              rb->SendBulkString(value);\n            }\n          } else {\n            // No GET patterns - send the element itself\n            rb->SendBulkString(it->ResultKey());\n          }\n        }\n      };\n      cmd_cntx->ReplyWith(std::move(replier));\n    } else {\n      std::string_view store_key_sv = params.store_key.value();\n      ShardId dest_sid = Shard(store_key_sv, shard_set->size());\n      OpResult<uint32_t> store_len;\n      bool has_get_patterns = !params.get_patterns.empty();\n\n      auto store_callback = [&](Transaction* t, EngineShard* shard) {\n        ShardId shard_id = shard->shard_id();\n        if (shard_id == dest_sid) {\n          auto [start_it, end_it] = GetSortRange(entries, params.bounds);\n          store_len =\n              OpStore(t->GetOpArgs(shard), store_key_sv, start_it, end_it, has_get_patterns);\n        }\n        return OpStatus::OK;\n      };\n      cmd_cntx->tx()->Execute(std::move(store_callback), true);\n\n      if (store_len) {\n        cmd_cntx->SendLong(store_len.value());\n      } else {\n        cmd_cntx->SendError(store_len.status());\n      }\n    }\n  }\n};\n\n// Fetches external keys referenced by a BY pattern and fills the sort entries. We deliberately\n// perform \"read uncommitted\" lookups across arbitrary shards, so this helper does not preserve the\n// enclosing transaction's isolation guarantees.\nOpStatus PopulateSortEntriesFromByPattern(const SortParams& params,\n                                          const vector<string>& raw_elements,\n                                          const DbContext& db_cntx, SortEntryList* sorted_entries) {\n  DCHECK(params.by_pattern);\n\n  vector<vector<pair<size_t, string>>> keys_by_shard(shard_set->size());\n  std::string_view pattern = *params.by_pattern;\n  size_t star_pos = pattern.find('*');\n  DCHECK_NE(star_pos, std::string_view::npos);\n  for (size_t i = 0; i < raw_elements.size(); ++i) {\n    string ext_key =\n        absl::StrCat(pattern.substr(0, star_pos), raw_elements[i], pattern.substr(star_pos + 1));\n    ShardId sid = Shard(ext_key, shard_set->size());\n    keys_by_shard[sid].emplace_back(i, std::move(ext_key));\n  }\n\n  std::visit([&](auto& entries) { entries.resize(raw_elements.size()); }, *sorted_entries);\n  atomic_bool parse_error{false};\n  shard_set->RunBlockingInParallel([&](EngineShard* shard) {\n    ShardId sid = shard->shard_id();\n    bool success = std::visit(\n        [&](auto& dest) {\n          for (const auto& [idx, ext_key] : keys_by_shard[sid]) {\n            string external_value = OpFetchStringValue({shard, nullptr, db_cntx}, ext_key);\n            auto& entry = dest[idx];\n            if (!entry.Parse(std::move(external_value)))\n              return false;\n            entry.BindValue(&raw_elements[idx]);\n          }\n          return true;\n        },\n        *sorted_entries);\n    if (!success) {\n      parse_error.store(true, memory_order_relaxed);\n    }\n  });\n\n  if (parse_error.load(memory_order_relaxed)) {\n    return OpStatus::INVALID_NUMERIC_RESULT;\n  }\n\n  return OpStatus::OK;\n}\n\nvoid SortGeneric(CmdArgList args, CommandContext* cmd_cntx, bool is_read_only) {\n  CmdArgParser parser(args);\n  std::string_view key = parser.Next();\n  SortParams params;\n  params.is_read_only = is_read_only;\n\n  while (parser.HasNext()) {\n    if (parser.Check(\"ALPHA\")) {\n      params.alpha = true;\n    } else if (parser.Check(\"DESC\")) {\n      params.reversed = true;\n    } else if (parser.Check(\"ASC\")) {\n      params.reversed = false;\n    } else if (parser.Check(\"LIMIT\")) {\n      uint32_t offset = parser.Next<uint32_t>();\n      uint32_t limit = parser.Next<uint32_t>();\n      params.bounds = {offset, limit};\n    } else if (!is_read_only && parser.Check(\"STORE\", &params.store_key)) {\n    } else if (parser.Check(\"BY\", &params.by_pattern)) {\n    } else if (parser.Check(\"GET\")) {\n      params.get_patterns.push_back(parser.Next());\n    } else {\n      LOG_EVERY_T(ERROR, 1) << \"Unsupported option \" << parser.Peek();\n      return cmd_cntx->SendError(kSyntaxErr);\n    }\n  }\n\n  if (parser.HasError()) {\n    return cmd_cntx->SendError(parser.TakeError().MakeReply());\n  }\n\n  // Validate BY pattern has exactly one '*'\n  if (params.by_pattern) {\n    size_t star_count = std::count(params.by_pattern->begin(), params.by_pattern->end(), '*');\n    if (star_count == 0) {\n      // \"nosort\" pattern - no '*' means skip sorting, preserve insertion order\n      params.to_sort = false;\n      params.by_pattern.reset();\n    } else if (star_count != 1) {\n      return cmd_cntx->SendError(kSyntaxErr);\n    }\n  }\n\n  // Validate GET patterns: each pattern must be \"#\" or have at most 1 asterisk\n  for (const auto& pattern : params.get_patterns) {\n    if (pattern == \"#\") {\n      continue;  // Special pattern, always valid\n    }\n    size_t star_count = std::count(pattern.begin(), pattern.end(), '*');\n    if (star_count > 1) {\n      return cmd_cntx->SendError(kSyntaxErr);\n    }\n  }\n\n  // Asserting that if is_read_only as true, then store_key should not exist.\n  DVLOG(1) << \"is_read_only parameter: \" << is_read_only\n           << \" and store_key parameter: \" << bool(params.store_key);\n  DCHECK(((is_read_only && !bool(params.store_key)) || !is_read_only));\n\n  ConnectionContext* cntx = cmd_cntx->server_conn_cntx();\n  DbContext db_cntx{cntx->ns, cntx->db_index(), GetCurrentTimeMs()};\n\n  CompactObjType source_type = OBJ_STRING;  // undefined in this context\n\n  // \"BY nosort\" or we need to sort by external keys - fetch unsorted first.\n  bool fetch_unsorted = !params.to_sort || params.by_pattern;\n  bool single_hop = !bool(params.store_key);\n  vector<string> raw_elements;\n  ShardId source_sid = Shard(key, shard_set->size());\n\n  // The high level steps are:\n  // 1. Fetch container elements (strings only, no parsing) if no sorting needed.\n  // 2. If sorting needed, prepare SortEntryList and fetch external keys if BY pattern is used.\n  // 3. Perform sorting and generate reply or store result if STORE option is used.\n  // 4. If no sorting needed, reply with fetched raw elements (with LIMIT if any).\n  if (fetch_unsorted) {\n    // Step 1: Fetch container elements (strings only, no parsing)\n    OpResult<pair<vector<string>, CompactObjType>> elem_result;\n\n    auto fetch_cb = [&](Transaction* t, EngineShard* shard) {\n      if (shard->shard_id() == source_sid) {\n        elem_result = OpFetchContainerElements(t->GetOpArgs(shard), key);\n      }\n      return OpStatus::OK;\n    };\n\n    cmd_cntx->tx()->Execute(std::move(fetch_cb), single_hop);\n\n    // elem_result->first is empty both for missing/empty containers and for errors;\n    // use elem_result's OpStatus to distinguish actual error cases (e.g. WRONG_TYPE).\n    if (elem_result->first.empty()) {\n      cmd_cntx->tx()->Conclude();\n      if (elem_result == OpStatus::WRONG_TYPE)\n        return cmd_cntx->SendError(elem_result.status());\n      else\n        return static_cast<RedisReplyBuilder*>(cmd_cntx->rb())->SendEmptyArray();\n    }\n\n    raw_elements.swap(elem_result->first);\n    source_type = elem_result->second;\n  }\n\n  if (params.to_sort) {\n    // Step 2 and 3: Prepare SortEntryList, fetch external keys if needed, perform sorting\n\n    auto sorted_entries =\n        MakeSortEntryList(params.alpha);  // Numeric or alpha depending on params.alpha\n    OpStatus sort_status = OpStatus::OK;\n\n    // Handle BY pattern with external key lookups\n    if (params.by_pattern) {\n      DCHECK(source_type == OBJ_SET || source_type == OBJ_ZSET || source_type == OBJ_LIST);\n      sort_status =\n          PopulateSortEntriesFromByPattern(params, raw_elements, db_cntx, &sorted_entries);\n    } else {  // No BY pattern, sort directly on fetched elements\n      OpResult<CompactObjType> fetch_result;\n      auto fetch_cb = [&](Transaction* t, EngineShard* shard) {\n        // in case of SORT option, we fetch only on the source shard\n        if (shard->shard_id() == source_sid) {\n          fetch_result = OpFetchSortEntries(t->GetOpArgs(shard), key, &sorted_entries);\n        }\n        return OpStatus::OK;\n      };\n\n      cmd_cntx->tx()->Execute(std::move(fetch_cb), single_hop);\n      sort_status = fetch_result.status();\n      source_type = *fetch_result;\n    }\n\n    if (sort_status != OpStatus::OK) {\n      DVLOG(2) << \"Sorting failed with status \" << sort_status;\n      cmd_cntx->tx()->Conclude();\n      if (sort_status == OpStatus::WRONG_TYPE)\n        return cmd_cntx->SendError(sort_status);\n      if (sort_status == OpStatus::INVALID_NUMERIC_RESULT)\n        return cmd_cntx->SendError(\"One or more scores can't be converted into double\");\n      return static_cast<RedisReplyBuilder*>(cmd_cntx->rb())->SendEmptyArray();\n    }\n\n    SortVisitor visitor{params, source_type, cmd_cntx, std::move(raw_elements)};\n    std::visit(visitor, sorted_entries);\n    return;\n  }\n\n  // No sorting required, just reply with fetched raw elements (with LIMIT if any)\n  DVLOG(1) << \"Replying with unsorted \" << raw_elements.size() << \" elements from key \" << key;\n  DCHECK(!raw_elements.empty());\n\n  // Fetch GET pattern values if needed (for unsorted path)\n  vector<vector<string>> get_values_per_element;\n  if (!params.get_patterns.empty()) {\n    // Pre-allocate storage for GET pattern values\n    get_values_per_element.resize(raw_elements.size(), vector<string>(params.get_patterns.size()));\n\n    // Use generic fetcher with lambdas to access raw_elements and store in get_values_per_element\n    FetchGetPatternValues(\n        params, db_cntx, raw_elements,\n        [&](size_t idx) -> std::string_view { return raw_elements[idx]; },\n        [&](size_t elem_idx, size_t pattern_idx, string value) {\n          get_values_per_element[elem_idx][pattern_idx] = std::move(value);\n        });\n  }\n\n  auto replier = [raw_elements = std::move(raw_elements), params, source_type,\n                  get_values = std::move(get_values_per_element)](RedisReplyBuilder* rb) {\n    auto [start_it, end_it] = GetSortRange(raw_elements, params.bounds);\n    bool is_set = (source_type == OBJ_SET || source_type == OBJ_ZSET);\n    size_t num_entries = std::distance(start_it, end_it);\n    size_t collection_size =\n        !get_values.empty() ? num_entries * get_values.front().size() : num_entries;\n\n    rb->StartCollection(collection_size, is_set ? CollectionType::SET : CollectionType::ARRAY);\n\n    size_t elem_idx = start_it - raw_elements.begin();\n    for (auto it = start_it; it != end_it; ++it, ++elem_idx) {\n      if (!get_values.empty() && !get_values[elem_idx].empty()) {\n        for (const auto& value : get_values[elem_idx]) {\n          rb->SendBulkString(value);\n        }\n      } else {\n        rb->SendBulkString(*it);\n      }\n    }\n  };\n  cmd_cntx->ReplyWith(std::move(replier));\n}\n\nvoid GenericFamily::Sort(CmdArgList args, CommandContext* cmd_cntx) {\n  SortGeneric(args, cmd_cntx, false);\n}\n\nvoid GenericFamily::Sort_RO(CmdArgList args, CommandContext* cmd_cntx) {\n  SortGeneric(args, cmd_cntx, true);\n}\n\nvoid GenericFamily::Restore(CmdArgList args, CommandContext* cmd_cntx) {\n  std::string_view key = ArgS(args, 0);\n  std::string_view serialized_value = ArgS(args, 2);\n\n  auto rdb_version =\n      GetRdbVersion(serialized_value, cmd_cntx->server_conn_cntx()->journal_emulated);\n  if (!rdb_version) {\n    return cmd_cntx->SendError(kInvalidDumpValueErr);\n  }\n\n  OpResult<RestoreArgs> restore_args = RestoreArgs::TryFrom(args);\n  if (!restore_args) {\n    if (restore_args.status() == OpStatus::OUT_OF_RANGE) {\n      return cmd_cntx->SendError(\"Invalid IDLETIME value, must be >= 0\");\n    } else {\n      return cmd_cntx->SendError(restore_args.status());\n    }\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpRestore(t->GetOpArgs(shard), key, serialized_value, restore_args.value(),\n                     rdb_version.value());\n  };\n\n  OpStatus result = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n\n  switch (result) {\n    case OpStatus::OK:\n      return cmd_cntx->SendOk();\n    case OpStatus::KEY_EXISTS:\n      return cmd_cntx->SendError(\"-BUSYKEY Target key name already exists.\");\n    case OpStatus::INVALID_VALUE:\n      return cmd_cntx->SendError(\"Bad data format\");\n    default:\n      return cmd_cntx->SendError(result);\n  }\n}\n\nvoid GenericFamily::FieldExpire(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view ttl_str = parser.Next();\n  uint32_t ttl_sec;\n  if (!absl::SimpleAtoi(ttl_str, &ttl_sec) || ttl_sec == 0 || ttl_sec > kMaxTtl) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n  CmdArgList fields = parser.Tail();\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpFieldExpire(t->GetOpArgs(shard), key, ttl_sec, fields);\n  };\n\n  OpResult<vector<long>> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  if (result) {\n    auto replier = [vec = std::move(result.value())](RedisReplyBuilder* rb) {\n      rb->SendLongArr(absl::MakeConstSpan(vec));\n    };\n    cmd_cntx->ReplyWith(std::move(replier));\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\n// Returns -2 if key not found, WRONG_TYPE if key is not a set or hash\n// -1 if the field does not have associated TTL on it, and -3 if field is not found.\nvoid GenericFamily::FieldTtl(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view field = ArgS(args, 1);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) { return OpFieldTtl(t, shard, key, field); };\n\n  OpResult<long> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  if (result) {\n    cmd_cntx->SendLong(*result);\n    return;\n  }\n\n  cmd_cntx->SendError(result.status());\n}\n\nvoid GenericFamily::Move(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view target_db_sv = ArgS(args, 1);\n  int32_t target_db;\n  if (!absl::SimpleAtoi(target_db_sv, &target_db)) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n\n  if (target_db < 0 || uint32_t(target_db) >= absl::GetFlag(FLAGS_dbnum)) {\n    return cmd_cntx->SendError(kDbIndOutOfRangeErr);\n  }\n\n  if (target_db == cmd_cntx->tx()->GetDbIndex()) {\n    return cmd_cntx->SendError(\"source and destination objects are the same\");\n  }\n\n  OpStatus res = OpStatus::SKIPPED;\n  ShardId target_shard = Shard(key, shard_set->size());\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    // MOVE runs as a global transaction and is therefore scheduled on every shard.\n    if (target_shard == shard->shard_id()) {\n      auto op_args = t->GetOpArgs(shard);\n      res = OpMove(op_args, key, target_db);\n      // MOVE runs as global command but we want to write the\n      // command to only one journal.\n      if (op_args.shard->journal()) {\n        RecordJournal(op_args, \"MOVE\"sv, ArgSlice{key, target_db_sv});\n      }\n    }\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  // Exactly one shard will call OpMove.\n  DCHECK(res != OpStatus::SKIPPED);\n  cmd_cntx->SendLong(res == OpStatus::OK);\n}\n\nvoid GenericFamily::Rename(CmdArgList args, CommandContext* cmd_cntx) {\n  auto reply = RenameGeneric(args, false, cmd_cntx->tx());\n  cmd_cntx->SendError(reply);\n}\n\nvoid GenericFamily::RenameNx(CmdArgList args, CommandContext* cmd_cntx) {\n  auto reply = RenameGeneric(args, true, cmd_cntx->tx());\n  if (!reply.status) {\n    return cmd_cntx->SendError(reply.ToSv(), reply.kind);\n  }\n\n  OpStatus st = reply.status.value();\n  if (st == OpStatus::OK) {\n    cmd_cntx->SendLong(1);\n  } else if (st == OpStatus::KEY_EXISTS) {\n    cmd_cntx->SendLong(0);\n  } else {\n    cmd_cntx->SendError(st);\n  }\n}\n\nvoid GenericFamily::Copy(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser(args);\n  auto [k1, k2] = parser.Next<std::string_view, std::string_view>();\n  bool replace = parser.Check(\"REPLACE\");\n  if (!parser.Finalize()) {\n    return cmd_cntx->SendError(parser.TakeError().MakeReply());\n  }\n\n  if (k1 == k2) {\n    return cmd_cntx->SendError(\"source and destination objects are the same\");\n  }\n\n  Renamer renamer(cmd_cntx->tx(), k1, k2, shard_set->size(), true);\n  auto reply = renamer.Rename(!replace);\n\n  if (!reply.status) {\n    return cmd_cntx->SendError(reply);\n  }\n\n  OpStatus st = reply.status.value();\n  if (st == OpStatus::OK) {\n    cmd_cntx->SendLong(1);\n  } else if (st == OpStatus::KEY_EXISTS) {\n    cmd_cntx->SendLong(0);\n  } else if (st == OpStatus::KEY_NOTFOUND) {\n    cmd_cntx->SendLong(0);\n  } else {\n    cmd_cntx->SendError(reply);\n  }\n}\n\nvoid GenericFamily::ExpireTime(CmdArgList args, CommandContext* cmd_cntx) {\n  ExpireTimeGeneric(args, TimeUnit::SEC, cmd_cntx);\n}\n\nvoid GenericFamily::PExpireTime(CmdArgList args, CommandContext* cmd_cntx) {\n  ExpireTimeGeneric(args, TimeUnit::MSEC, cmd_cntx);\n}\n\nvoid GenericFamily::Ttl(CmdArgList args, CommandContext* cmd_cntx) {\n  TtlGeneric(args, TimeUnit::SEC, cmd_cntx);\n}\n\nvoid GenericFamily::Pttl(CmdArgList args, CommandContext* cmd_cntx) {\n  TtlGeneric(args, TimeUnit::MSEC, cmd_cntx);\n}\n\nvoid GenericFamily::Select(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  int64_t index;\n  if (!absl::SimpleAtoi(key, &index)) {\n    return cmd_cntx->SendError(kInvalidDbIndErr);\n  }\n  if (IsClusterEnabled() && index != 0) {\n    return cmd_cntx->SendError(\"SELECT is not allowed in cluster mode\");\n  }\n  if (index < 0 || index >= absl::GetFlag(FLAGS_dbnum)) {\n    return cmd_cntx->SendError(kDbIndOutOfRangeErr);\n  }\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  if (cntx->conn_state.db_index == index) {\n    // accept a noop.\n    return cmd_cntx->SendOk();\n  }\n\n  // Only global/non-atomic multi transactions can change dbs safely,\n  // locked-ahead transactions acquired keys ahead for a specific dbindex\n  if (auto* tx = cmd_cntx->tx(); tx && tx->IsMulti()) {\n    if (tx->GetMultiMode() == Transaction::LOCK_AHEAD)\n      return cmd_cntx->SendError(\"SELECT is not allowed in regular EXEC/EVAL\");\n  }\n\n  if (cntx->conn_state.exec_info.IsRunning()) {\n    return cmd_cntx->SendError(\"SELECT is not allowed in a transaction\");\n  }\n\n  cntx->conn_state.db_index = index;\n  auto cb = [ns = cntx->ns, index](EngineShard* shard) {\n    auto& db_slice = ns->GetDbSlice(shard->shard_id());\n    db_slice.ActivateDb(index);\n    return OpStatus::OK;\n  };\n  shard_set->RunBriefInParallel(std::move(cb));\n\n  return cmd_cntx->SendOk();\n}\n\nvoid GenericFamily::Dump(CmdArgList args, CommandContext* cmd_cntx) {\n  std::string_view key = ArgS(args, 0);\n  DVLOG(1) << \"Dumping before ::ScheduleSingleHopT \" << key;\n  auto cb = [&](Transaction* t, EngineShard* shard) { return OpDump(t->GetOpArgs(shard), key); };\n  OpResult<string> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  if (result) {\n    DVLOG(1) << \"Dump \" << cmd_cntx->tx()->DebugId() << \": \" << key << \", dump size \"\n             << result.value().size();\n    auto reply = [data = std::move(*result)](RedisReplyBuilder* rb) { rb->SendBulkString(data); };\n    cmd_cntx->ReplyWith(std::move(reply));\n  } else {\n    static_cast<RedisReplyBuilder*>(cmd_cntx->rb())->SendNull();\n  }\n}\n\nvoid GenericFamily::Type(CmdArgList args, CommandContext* cmd_cntx) {\n  std::string_view key = ArgS(args, 0);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) -> OpResult<CompactObjType> {\n    auto& db_slice = t->GetDbSlice(shard->shard_id());\n    auto it = db_slice.FindReadOnly(t->GetDbContext(), key).it;\n    if (!it.is_done()) {\n      return it->second.ObjType();\n    } else {\n      return OpStatus::KEY_NOTFOUND;\n    }\n  };\n  OpResult<CompactObjType> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (!result) {\n    cmd_cntx->SendSimpleString(\"none\");\n  } else {\n    cmd_cntx->SendSimpleString(ObjTypeToString(result.value()));\n  }\n}\n\nvoid GenericFamily::Time(CmdArgList args, CommandContext* cmd_cntx) {\n  uint64_t now_usec;\n  if (cmd_cntx->tx()) {\n    now_usec = cmd_cntx->tx()->GetDbContext().time_now_ms * 1000;\n  } else {\n    now_usec = absl::GetCurrentTimeNanos() / 1000;\n  }\n  DCHECK_GT(now_usec, 0u);\n\n  auto replier = [now_usec](RedisReplyBuilder* rb) {\n    rb->StartArray(2);\n    rb->SendLong(now_usec / 1000000);\n    rb->SendLong(now_usec % 1000000);\n  };\n  cmd_cntx->ReplyWith(std::move(replier));\n}\n\nvoid GenericFamily::Echo(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  auto replier = [key = string(key)](RedisReplyBuilder* rb) { rb->SendBulkString(key); };\n  cmd_cntx->ReplyWith(std::move(replier));\n}\n\n// SCAN cursor [MATCH <glob>] [TYPE <type>] [COUNT <count>] [BUCKET <bucket_id>]\n// [ATTR <mask>] [MLCGE <len>]\nvoid GenericFamily::Scan(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view token = ArgS(args, 0);\n  uint64_t cursor = 0;\n  if (!absl::SimpleAtoi(token, &cursor)) {\n    if (absl::EqualsIgnoreCase(token, \"HELP\")) {\n      auto replier = [](RedisReplyBuilder* rb) {\n        string_view help_arr[] = {\n            \"SCAN cursor [MATCH <glob>] [TYPE <type>] [COUNT <count>] [ATTR <mask>] [MINMSZ \"\n            \"<len>]\",\n            \"    MATCH <glob> - pattern to match keys against\",\n            \"    TYPE <type> - type of values to match\",\n            \"    COUNT <count> - number of keys to return\",\n            \"    ATTR <v|p|a|u> - filter by attributes: v - volatile (ttl), \",\n            \"    p - persistent (no ttl), a - accessed since creation, u - untouched\",\n            \"    MINMSZ <len> - keeps keys with values, whose allocated size is greater or equal \"\n            \"to\",\n            \"        the specified length\",\n        };\n\n        rb->SendSimpleStrArr(help_arr);\n      };\n      return cmd_cntx->ReplyWith(std::move(replier));\n    }\n    return cmd_cntx->SendError(\"invalid cursor\");\n  }\n\n  OpResult<ScanOpts> ops = ScanOpts::TryFrom(args.subspan(1));\n  if (!ops) {\n    DVLOG(1) << \"Scan invalid args - return \" << ops << \" to the user\";\n    return cmd_cntx->SendError(ops.status());\n  }\n\n  const ScanOpts& scan_op = ops.value();\n\n  StringVec keys;\n  cursor = ScanGeneric(cursor, scan_op, &keys, cmd_cntx->server_conn_cntx());\n\n  auto replier = [cursor, keys = std::move(keys)](RedisReplyBuilder* builder) {\n    RedisReplyBuilder::ArrayScope scope{builder, 2};\n    builder->SendBulkString(absl::StrCat(cursor));\n    builder->SendBulkStrArr(keys);\n  };\n\n  cmd_cntx->ReplyWith(std::move(replier));\n}\n\nvoid GenericFamily::Rm(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view token = ArgS(args, 0);\n  uint64_t cursor = 0;\n  if (!absl::SimpleAtoi(token, &cursor)) {\n    if (absl::EqualsIgnoreCase(token, \"HELP\")) {\n      auto replier = [](RedisReplyBuilder* rb) {\n        string_view help_arr[] = {\n            \"RM cursor [MATCH <glob>] [TYPE <type>] [COUNT <count>]\",\n            \"    MATCH <glob> - pattern to match keys against\",\n            \"    TYPE <type> - type of values to match (string, list, set, zset, hash, stream)\",\n            \"    COUNT <count> - number of keys to delete per call\",\n        };\n        rb->SendSimpleStrArr(help_arr);\n      };\n      return cmd_cntx->ReplyWith(std::move(replier));\n    }\n    return cmd_cntx->SendError(\"invalid cursor\", kSyntaxErrType);\n  }\n\n  OpResult<ScanOpts> ops = ScanOpts::TryFrom(args.subspan(1));\n  if (!ops) {\n    return cmd_cntx->SendError(ops.status());\n  }\n\n  uint32_t deleted = 0;\n  cursor = RmGeneric(cursor, ops.value(), &deleted, cmd_cntx->server_conn_cntx());\n\n  auto replier = [cursor, deleted](RedisReplyBuilder* rb) {\n    RedisReplyBuilder::ArrayScope scope{rb, 2};\n    rb->SendBulkString(absl::StrCat(cursor));\n    rb->SendLong(deleted);\n  };\n  cmd_cntx->ReplyWith(std::move(replier));\n}\n\nOpResult<uint32_t> GenericFamily::OpExists(const OpArgs& op_args, const ShardArgs& keys) {\n  DVLOG(1) << \"Exists: \" << keys.Front();\n  auto& db_slice = op_args.GetDbSlice();\n  uint32_t res = 0;\n\n  for (string_view key : keys) {\n    auto find_res = db_slice.FindReadOnly(op_args.db_cntx, key);\n    res += IsValid(find_res.it);\n  }\n  return res;\n}\n\nvoid GenericFamily::RandomKey(CmdArgList args, CommandContext* cmd_cntx) {\n  const static size_t kMaxAttempts = 3;\n\n  absl::BitGen bitgen;\n  atomic_size_t candidates_counter{0};\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  DbContext db_cntx{cntx->ns, cntx->conn_state.db_index, GetCurrentTimeMs()};\n  ScanOpts scan_opts;\n  scan_opts.limit = 3;  // number of entries per shard\n  std::vector<StringVec> candidates_collection(shard_set->size());\n\n  shard_set->RunBriefInParallel(\n      [&](EngineShard* shard) {\n        auto* prime_table =\n            cntx->ns->GetDbSlice(shard->shard_id()).GetTables(db_cntx.db_index).first;\n        if (prime_table->size() == 0) {\n          return;\n        }\n\n        StringVec* candidates = &candidates_collection[shard->shard_id()];\n\n        for (size_t i = 0; i <= kMaxAttempts; ++i) {\n          if (!candidates->empty()) {\n            break;\n          }\n          uint64_t cursor = 0;  // scans from the start of the shard after reaching kMaxAttemps\n          if (i < kMaxAttempts) {\n            cursor = prime_table->GetRandomCursor(&bitgen).token();\n          }\n          OpScan({shard, 0u, db_cntx}, scan_opts, &cursor, candidates);\n        }\n\n        candidates_counter.fetch_add(candidates->size(), memory_order_relaxed);\n      },\n      [&](ShardId) { return true; });\n\n  auto candidates_count = candidates_counter.load(memory_order_relaxed);\n\n  size_t random_idx = absl::Uniform<size_t>(bitgen, 0, candidates_count);\n  for (auto& candidate : candidates_collection) {\n    if (random_idx >= candidate.size()) {\n      random_idx -= candidate.size();\n    } else {\n      auto replier = [key = std::move(candidate[random_idx])](RedisReplyBuilder* builder) {\n        builder->SendBulkString(key);\n      };\n      return cmd_cntx->ReplyWith(std::move(replier));\n    }\n  }\n  static_cast<RedisReplyBuilder*>(cmd_cntx->rb())->SendNull();\n}\n\nusing CI = CommandId;\n\n#define HFUNC(x) SetHandler(&GenericFamily::x)\n\nnamespace acl {\n\nconstexpr uint32_t kDel = KEYSPACE | WRITE | SLOW;\nconstexpr uint32_t kPing = FAST | CONNECTION;\nconstexpr uint32_t kEcho = FAST | CONNECTION;\nconstexpr uint32_t kExists = KEYSPACE | READ | FAST;\nconstexpr uint32_t kTouch = KEYSPACE | READ | FAST;\nconstexpr uint32_t kExpire = KEYSPACE | WRITE | FAST;\nconstexpr uint32_t kExpireAt = KEYSPACE | WRITE | FAST;\nconstexpr uint32_t kPersist = KEYSPACE | WRITE | FAST;\nconstexpr uint32_t kKeys = KEYSPACE | READ | SLOW | DANGEROUS;\nconstexpr uint32_t kPExpireAt = KEYSPACE | WRITE | FAST;\nconstexpr uint32_t kPExpire = KEYSPACE | WRITE | FAST;\nconstexpr uint32_t kRename = KEYSPACE | WRITE | SLOW;\nconstexpr uint32_t kCopy = KEYSPACE | WRITE | SLOW;\nconstexpr uint32_t kRenamNX = KEYSPACE | WRITE | FAST;\nconstexpr uint32_t kSelect = FAST | CONNECTION;\nconstexpr uint32_t kScan = KEYSPACE | READ | SLOW;\nconstexpr uint32_t kRm = KEYSPACE | WRITE | SLOW | DANGEROUS;\nconstexpr uint32_t kTTL = KEYSPACE | READ | FAST;\nconstexpr uint32_t kPTTL = KEYSPACE | READ | FAST;\nconstexpr uint32_t kFieldTtl = KEYSPACE | READ | FAST;\nconstexpr uint32_t kTime = FAST;\nconstexpr uint32_t kType = KEYSPACE | READ | FAST;\nconstexpr uint32_t kDump = KEYSPACE | READ | SLOW;\nconstexpr uint32_t kUnlink = KEYSPACE | WRITE | FAST;\nconstexpr uint32_t kStick = KEYSPACE | WRITE | FAST;\nconstexpr uint32_t kSort = WRITE | SET | SORTEDSET | LIST | SLOW | DANGEROUS;\nconstexpr uint32_t kSortRO = READ | SET | SORTEDSET | LIST | SLOW | DANGEROUS;\nconstexpr uint32_t kMove = KEYSPACE | WRITE | FAST;\nconstexpr uint32_t kRestore = KEYSPACE | WRITE | SLOW | DANGEROUS;\nconstexpr uint32_t kExpireTime = KEYSPACE | READ | FAST;\nconstexpr uint32_t kPExpireTime = KEYSPACE | READ | FAST;\nconstexpr uint32_t kFieldExpire = WRITE | HASH | SET | FAST;\n}  // namespace acl\n\nvoid GenericFamily::Register(CommandRegistry* registry) {\n  constexpr auto kSelectOpts = CO::LOADING | CO::FAST;\n  registry->StartFamily();\n  *registry\n      << CI{\"DEL\", CO::JOURNALED, -2, 1, -1, acl::kDel}.SetAsyncHandler(CmdDel)\n      << CI{\"DELEX\", CO::JOURNALED | CO::FAST, -2, 1, 1, acl::kDel}.HFUNC(Delex)\n      /* Redis compatibility:\n       * We don't allow PING during loading since in Redis PING is used as\n       * failure detection, and a loading server is considered to be\n       * not available. */\n      << CI{\"PING\", CO::FAST, -1, 0, 0, acl::kPing}.HFUNC(Ping)\n      << CI{\"ECHO\", CO::LOADING | CO::FAST, 2, 0, 0, acl::kEcho}.HFUNC(Echo)\n      << CI{\"EXISTS\", CO::READONLY | CO::FAST, -2, 1, -1, acl::kExists}.HFUNC(Exists)\n      << CI{\"TOUCH\", CO::READONLY | CO::FAST, -2, 1, -1, acl::kTouch}.HFUNC(Exists)\n      << CI{\"EXPIRE\", CO::JOURNALED | CO::FAST | CO::NO_AUTOJOURNAL, -3, 1, 1, acl::kExpire}.HFUNC(\n             Expire)\n      << CI{\"EXPIREAT\", CO::JOURNALED | CO::FAST | CO::NO_AUTOJOURNAL, -3, 1, 1, acl::kExpireAt}\n             .HFUNC(ExpireAt)\n      << CI{\"PERSIST\", CO::JOURNALED | CO::FAST, 2, 1, 1, acl::kPersist}.HFUNC(Persist)\n      << CI{\"KEYS\", CO::READONLY, 2, 0, 0, acl::kKeys}.HFUNC(Keys)\n      << CI{\"PEXPIREAT\", CO::JOURNALED | CO::FAST | CO::NO_AUTOJOURNAL, -3, 1, 1, acl::kPExpireAt}\n             .HFUNC(PexpireAt)\n      << CI{\"PEXPIRE\", CO::JOURNALED | CO::FAST | CO::NO_AUTOJOURNAL, -3, 1, 1, acl::kPExpire}\n             .HFUNC(Pexpire)\n      << CI{\"FIELDEXPIRE\", CO::JOURNALED | CO::FAST | CO::DENYOOM, -4, 1, 1, acl::kFieldExpire}\n             .HFUNC(FieldExpire)\n      << CI{\"RENAME\", CO::JOURNALED | CO::NO_AUTOJOURNAL, 3, 1, 2, acl::kRename}.HFUNC(Rename)\n      << CI{\"COPY\", CO::JOURNALED | CO::NO_AUTOJOURNAL, -3, 1, 2, acl::kCopy}.HFUNC(Copy)\n      << CI{\"RENAMENX\", CO::JOURNALED | CO::NO_AUTOJOURNAL, 3, 1, 2, acl::kRenamNX}.HFUNC(RenameNx)\n      << CI{\"SELECT\", kSelectOpts, 2, 0, 0, acl::kSelect}.HFUNC(Select)\n      << CI{\"SCAN\", CO::READONLY | CO::FAST | CO::LOADING, -2, 0, 0, acl::kScan}.HFUNC(Scan)\n      << CI{\"RM\", CO::JOURNALED | CO::NO_AUTOJOURNAL, -2, 0, 0, acl::kRm}.HFUNC(Rm)\n      << CI{\"TTL\", CO::READONLY | CO::FAST, 2, 1, 1, acl::kTTL}.HFUNC(Ttl)\n      << CI{\"PTTL\", CO::READONLY | CO::FAST, 2, 1, 1, acl::kPTTL}.HFUNC(Pttl)\n      << CI{\"FIELDTTL\", CO::READONLY | CO::FAST, 3, 1, 1, acl::kFieldTtl}.HFUNC(FieldTtl)\n      << CI{\"TIME\", CO::LOADING | CO::FAST, 1, 0, 0, acl::kTime}.HFUNC(Time)\n      << CI{\"TYPE\", CO::READONLY | CO::FAST | CO::LOADING, 2, 1, 1, acl::kType}.HFUNC(Type)\n      << CI{\"DUMP\", CO::READONLY, 2, 1, 1, acl::kDump}.HFUNC(Dump)\n      << CI{\"UNLINK\", CO::JOURNALED, -2, 1, -1, acl::kUnlink}.SetAsyncHandler(CmdDel)\n      << CI{\"STICK\", CO::JOURNALED, -2, 1, -1, acl::kStick}.HFUNC(Stick)\n      << CI{\"SORT\", CO::JOURNALED | CO::STORE_LAST_KEY, -2, 1, 1, acl::kSort}.HFUNC(Sort)\n      << CI{\"SORT_RO\", CO::READONLY, -2, 1, 1, acl::kSortRO}.HFUNC(Sort_RO)\n      << CI{\"MOVE\", CO::JOURNALED | CO::GLOBAL_TRANS | CO::NO_AUTOJOURNAL, 3, 1, 1, acl::kMove}\n             .HFUNC(Move)\n      << CI{\"RESTORE\", CO::JOURNALED, -4, 1, 1, acl::kRestore}.HFUNC(Restore)\n      << CI{\"RANDOMKEY\", CO::READONLY, 1, 0, 0, 0}.HFUNC(RandomKey)\n      << CI{\"EXPIRETIME\", CO::READONLY | CO::FAST, 2, 1, 1, acl::kExpireTime}.HFUNC(ExpireTime)\n      << CI{\"PEXPIRETIME\", CO::READONLY | CO::FAST, 2, 1, 1, acl::kPExpireTime}.HFUNC(PExpireTime);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/generic_family.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"base/flags.h\"\n#include \"facade/facade_types.h\"\n#include \"server/tx_base.h\"\n\nABSL_DECLARE_FLAG(uint32_t, dbnum);\n\nnamespace dfly {\n\nusing facade::CmdArgList;\nusing facade::OpResult;\n\nclass GenericFamily {\n public:\n  static void Register(CommandRegistry* registry);\n\n  // Accessed by Service::Exec and Service::Watch as an utility.\n  static OpResult<uint32_t> OpExists(const OpArgs& op_args, const ShardArgs& keys);\n  static OpResult<uint32_t> OpDel(const OpArgs& op_args, const ShardArgs& keys, bool async);\n\n private:\n  static void Delex(CmdArgList args, CommandContext* cmd_cntx);\n  static void Ping(CmdArgList args, CommandContext* cmd_cntx);\n  static void Exists(CmdArgList args, CommandContext* cmd_cntx);\n  static void Expire(CmdArgList args, CommandContext* cmd_cntx);\n  static void ExpireAt(CmdArgList args, CommandContext* cmd_cntx);\n  static void Persist(CmdArgList args, CommandContext* cmd_cntx);\n  static void Keys(CmdArgList args, CommandContext* cmd_cntx);\n  static void PexpireAt(CmdArgList args, CommandContext* cmd_cntx);\n  static void Pexpire(CmdArgList args, CommandContext* cmd_cntx);\n  static void Stick(CmdArgList args, CommandContext* cmd_cntx);\n  static void Sort(CmdArgList args, CommandContext* cmd_cntx);\n  static void Sort_RO(CmdArgList args, CommandContext* cmd_cntx);\n  static void Move(CmdArgList args, CommandContext* cmd_cntx);\n\n  static void Rename(CmdArgList args, CommandContext* cmd_cntx);\n  static void RenameNx(CmdArgList args, CommandContext* cmd_cntx);\n  static void Copy(CmdArgList args, CommandContext* cmd_cntx);\n  static void ExpireTime(CmdArgList args, CommandContext* cmd_cntx);\n  static void PExpireTime(CmdArgList args, CommandContext* cmd_cntx);\n  static void Ttl(CmdArgList args, CommandContext* cmd_cntx);\n  static void Pttl(CmdArgList args, CommandContext* cmd_cntx);\n\n  static void Echo(CmdArgList args, CommandContext* cmd_cntx);\n  static void Select(CmdArgList args, CommandContext* cmd_cntx);\n  static void Scan(CmdArgList args, CommandContext* cmd_cntx);\n  static void Rm(CmdArgList args, CommandContext* cmd_cntx);\n  static void Time(CmdArgList args, CommandContext* cmd_cntx);\n  static void Type(CmdArgList args, CommandContext* cmd_cntx);\n  static void Dump(CmdArgList args, CommandContext* cmd_cntx);\n  static void Restore(CmdArgList args, CommandContext* cmd_cntx);\n  static void RandomKey(CmdArgList args, CommandContext* cmd_cntx);\n  static void FieldTtl(CmdArgList args, CommandContext* cmd_cntx);\n  static void FieldExpire(CmdArgList args, CommandContext* cmd_cntx);\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/generic_family_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/generic_family.h\"\n\nextern \"C\" {\n#include \"redis/rdb.h\"\n}\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/test_utils.h\"\n#include \"server/transaction.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\nusing absl::StrCat;\n\nnamespace dfly {\n\nclass GenericFamilyTest : public BaseFamilyTest {};\n\nTEST_F(GenericFamilyTest, Expire) {\n  Run({\"set\", \"key\", \"val\"});\n\n  // sideqik expiry limit\n  auto resp = Run({\"expire\", \"key\", absl::StrCat(5 * 365 * 24 * 3600)});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"expire\", \"key\", \"1\"});\n  EXPECT_THAT(resp, IntArg(1));\n  AdvanceTime(1000);\n  resp = Run({\"get\", \"key\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  Run({\"set\", \"key\", \"val\"});\n  resp = Run({\"pexpireat\", \"key\", absl::StrCat(TEST_current_time_ms + 2000)});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // override\n  resp = Run({\"pexpireat\", \"key\", absl::StrCat(TEST_current_time_ms + 3000)});\n  EXPECT_THAT(resp, IntArg(1));\n\n  AdvanceTime(2999);\n  resp = Run({\"get\", \"key\"});\n  EXPECT_THAT(resp, \"val\");\n\n  AdvanceTime(1);\n  resp = Run({\"get\", \"key\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  // pexpire test\n  Run({\"set\", \"key\", \"val\"});\n  resp = Run({\"pexpire\", \"key\", absl::StrCat(2000)});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // expire time override\n  resp = Run({\"pexpire\", \"key\", absl::StrCat(3000)});\n  EXPECT_THAT(resp, IntArg(1));\n\n  AdvanceTime(2999);\n  resp = Run({\"get\", \"key\"});\n  EXPECT_THAT(resp, \"val\");\n\n  AdvanceTime(1);\n  resp = Run({\"get\", \"key\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(GenericFamilyTest, ExpireOptions) {\n  // NX and XX are mutually exclusive\n  Run({\"set\", \"key\", \"val\"});\n  auto resp = Run({\"expire\", \"key\", \"3600\", \"NX\", \"XX\"});\n  ASSERT_THAT(resp, ErrArg(\"NX and XX options at the same time are not compatible\"));\n\n  // GT and LT are mutually exclusive\n  resp = Run({\"expire\", \"key\", \"3600\", \"GT\", \"LT\"});\n  ASSERT_THAT(resp, ErrArg(\"GT and LT options at the same time are not compatible\"));\n\n  // NX option should be added since there is no expiry\n  resp = Run({\"expire\", \"key\", \"3600\", \"NX\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"ttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 3600);\n\n  // running again with NX option, should not change expiry\n  resp = Run({\"expire\", \"key\", \"42\", \"NX\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // given a key with no expiry\n  Run({\"set\", \"key2\", \"val\"});\n  resp = Run({\"expire\", \"key2\", \"404\", \"XX\"});\n  // XX does not apply expiry since key has no existing expiry\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"ttl\", \"key2\"});\n  EXPECT_THAT(resp.GetInt(), -1);\n\n  // GT does not apply since key has no \"inf\" expiry\n  resp = Run({\"expire\", \"key2\", \"404\", \"GT\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"ttl\", \"key2\"});\n  EXPECT_THAT(resp.GetInt(), -1);\n\n  // LT applies\n  resp = Run({\"expire\", \"key2\", \"404\", \"LT\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"ttl\", \"key2\"});\n  EXPECT_THAT(resp.GetInt(), 404);\n\n  Run({\"persist\", \"key\"});\n\n  // set expiry to 101\n  resp = Run({\"expire\", \"key\", \"101\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // GT should not apply expiry since new is not greater than the current one\n  resp = Run({\"expire\", \"key\", \"100\", \"GT\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"ttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 101);\n\n  // GT should apply expiry since new is greater than the current one\n  resp = Run({\"expire\", \"key\", \"102\", \"GT\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"ttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 102);\n\n  // GT should not apply since expiry is smaller than current\n  resp = Run({\"expire\", \"key\", \"101\", \"GT\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"ttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 102);\n\n  // LT should apply new expiry is smaller than current\n  resp = Run({\"expire\", \"key\", \"101\", \"LT\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"ttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 101);\n\n  resp = Run({\"expire\", \"key\", \"102\", \"LT\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"ttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 101);\n\n  // NX with GT, first sets expiry, updates only to larger values\n  Run({\"persist\", \"key\"});\n  Run({\"expire\", \"key\", \"5\", \"NX\", \"GT\"});\n  EXPECT_THAT(Run({\"ttl\", \"key\"}), IntArg(5));\n\n  Run({\"expire\", \"key\", \"3\", \"NX\", \"GT\"});\n  EXPECT_THAT(Run({\"ttl\", \"key\"}), IntArg(5));\n\n  Run({\"expire\", \"key\", \"7\", \"NX\", \"GT\"});\n  EXPECT_THAT(Run({\"ttl\", \"key\"}), IntArg(7));\n}\n\nTEST_F(GenericFamilyTest, ExpireAtOptions) {\n  auto test_time_ms = TEST_current_time_ms;\n  auto time_s = (test_time_ms + 500) / 1000;\n  auto test_time_s = time_s;\n\n  Run({\"set\", \"key\", \"val\"});\n  // NX and XX are mutually exclusive\n  auto resp = Run({\"expireat\", \"key\", \"3600\", \"NX\", \"XX\"});\n  ASSERT_THAT(resp, ErrArg(\"NX and XX options at the same time are not compatible\"));\n\n  // GT and LT are mutually exclusive\n  resp = Run({\"expireat\", \"key\", \"3600\", \"GT\", \"LT\"});\n  ASSERT_THAT(resp, ErrArg(\"GT and LT options at the same time are not compatible\"));\n\n  // NX option should be added since there is no expiry\n  test_time_s = time_s + 5;\n  resp = Run({\"expireat\", \"key\", absl::StrCat(test_time_s), \"NX\"});\n  EXPECT_THAT(resp, IntArg(1));\n  EXPECT_EQ(test_time_s, CheckedInt({\"EXPIRETIME\", \"key\"}));\n\n  // running again with NX option, should not change expiry\n  test_time_s = time_s + 9;\n  resp = Run({\"expireat\", \"key\", absl::StrCat(test_time_s), \"NX\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // NX option with expired time is not accepted and so it doesn't delete the value\n  resp = Run({\"expireat\", \"key\", absl::StrCat(TEST_current_time_ms / 1000 - 10), \"NX\"});\n  EXPECT_THAT(resp, IntArg(0));\n  EXPECT_THAT(Run({\"exists\", \"key\"}), IntArg(1));\n\n  // given a key with no expiry\n  Run({\"set\", \"key2\", \"val\"});\n  test_time_s = time_s + 9;\n  resp = Run({\"expireat\", \"key2\", absl::StrCat(test_time_s), \"XX\"});\n  // XX does not apply expiry since key has no existing expiry\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"ttl\", \"key2\"});\n  EXPECT_THAT(resp.GetInt(), -1);\n\n  // set expiry to 101\n  test_time_s = time_s + 101;\n  resp = Run({\"expireat\", \"key\", absl::StrCat(test_time_s)});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // GT should not apply expiry since new is not greater than the current one\n  auto less_test_time_s = time_s + 99;\n  resp = Run({\"expireat\", \"key\", absl::StrCat(less_test_time_s), \"GT\"});\n  EXPECT_THAT(resp, IntArg(0));\n  EXPECT_EQ(test_time_s, CheckedInt({\"EXPIRETIME\", \"key\"}));\n\n  // GT should apply expiry since new is greater than the current one\n  test_time_s = time_s + 105;\n  resp = Run({\"expireat\", \"key\", absl::StrCat(test_time_s), \"GT\"});\n  EXPECT_THAT(resp, IntArg(1));\n  EXPECT_EQ(test_time_s, CheckedInt({\"EXPIRETIME\", \"key\"}));\n\n  // LT should apply new expiry is smaller than current\n  test_time_s = time_s + 101;\n  resp = Run({\"expireat\", \"key\", absl::StrCat(test_time_s), \"LT\"});\n  EXPECT_THAT(resp, IntArg(1));\n  EXPECT_EQ(test_time_s, CheckedInt({\"EXPIRETIME\", \"key\"}));\n\n  // LT should not apply expiry since new is not lesser than the current one\n  auto gt_test_time_s = time_s + 102;\n  resp = Run({\"expireat\", \"key\", absl::StrCat(gt_test_time_s), \"LT\"});\n  EXPECT_THAT(resp, IntArg(0));\n  EXPECT_EQ(test_time_s, CheckedInt({\"EXPIRETIME\", \"key\"}));\n}\n\nTEST_F(GenericFamilyTest, PExpireOptions) {\n  // NX and XX are mutually exclusive\n  Run({\"set\", \"key\", \"val\"});\n  auto resp = Run({\"pexpire\", \"key\", \"3600\", \"NX\", \"XX\"});\n  ASSERT_THAT(resp, ErrArg(\"NX and XX options at the same time are not compatible\"));\n\n  // GT and LT are mutually exclusive\n  resp = Run({\"pexpire\", \"key\", \"3600\", \"GT\", \"LT\"});\n  ASSERT_THAT(resp, ErrArg(\"GT and LT options at the same time are not compatible\"));\n\n  // NX option should be added since there is no expiry\n  resp = Run({\"pexpire\", \"key\", \"3600000\", \"NX\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"pttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 3600000);\n\n  // running again with NX option, should not change expiry\n  resp = Run({\"pexpire\", \"key\", \"42\", \"NX\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // given a key with no expiry\n  Run({\"set\", \"key2\", \"val\"});\n  resp = Run({\"pexpire\", \"key2\", \"404\", \"XX\"});\n  // XX does not apply expiry since key has no existing expiry\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"pttl\", \"key2\"});\n  EXPECT_THAT(resp.GetInt(), -1);\n\n  // set expiry to 101\n  resp = Run({\"pexpire\", \"key\", \"101000\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // GT should not apply expiry since new is not greater than the current one\n  resp = Run({\"pexpire\", \"key\", \"100000\", \"GT\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"pttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 101000);\n\n  // GT should apply expiry since new is greater than the current one\n  resp = Run({\"pexpire\", \"key\", \"102000\", \"GT\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"pttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 102000);\n\n  // GT should not apply since expiry is smaller than current\n  resp = Run({\"pexpire\", \"key\", \"101000\", \"GT\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"pttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 102000);\n\n  // LT should apply new expiry is smaller than current\n  resp = Run({\"pexpire\", \"key\", \"101000\", \"LT\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"pttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 101000);\n\n  // LT should not apply since expiry is greater than current\n  resp = Run({\"pexpire\", \"key\", \"102000\", \"LT\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"pttl\", \"key\"});\n  EXPECT_THAT(resp.GetInt(), 101000);\n}\n\nTEST_F(GenericFamilyTest, PExpireAtOptions) {\n  auto test_time_ms = TEST_current_time_ms;\n  Run({\"set\", \"key\", \"val\"});\n  // NX and XX are mutually exclusive\n  auto resp = Run({\"pexpireat\", \"key\", \"3600\", \"NX\", \"XX\"});\n  ASSERT_THAT(resp, ErrArg(\"NX and XX options at the same time are not compatible\"));\n\n  // GT and LT are mutually exclusive\n  resp = Run({\"pexpireat\", \"key\", \"3600\", \"GT\", \"LT\"});\n  ASSERT_THAT(resp, ErrArg(\"GT and LT options at the same time are not compatible\"));\n\n  // NX option should be added since there is no expiry\n  test_time_ms = TEST_current_time_ms + 3600;\n  resp = Run({\"pexpireat\", \"key\", absl::StrCat(test_time_ms), \"NX\"});\n  EXPECT_THAT(resp, IntArg(1));\n  EXPECT_EQ(test_time_ms, CheckedInt({\"PEXPIRETIME\", \"key\"}));\n\n  // running again with NX option, should not change expiry\n  test_time_ms = TEST_current_time_ms + 42000;\n  resp = Run({\"pexpireat\", \"key\", absl::StrCat(test_time_ms), \"NX\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // given a key with no expiry\n  Run({\"set\", \"key2\", \"val\"});\n  test_time_ms = TEST_current_time_ms + 404;\n  resp = Run({\"pexpireat\", \"key2\", absl::StrCat(test_time_ms), \"XX\"});\n  // XX does not apply expiry since key has no existing expiry\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"ttl\", \"key2\"});\n  EXPECT_THAT(resp.GetInt(), -1);\n\n  // set expiry to 101\n  test_time_ms = TEST_current_time_ms + 101;\n  resp = Run({\"pexpireat\", \"key\", absl::StrCat(test_time_ms)});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // GT should not apply expiry since new is not greater than the current one\n  auto less_test_time_ms = TEST_current_time_ms + 100;\n  resp = Run({\"pexpireat\", \"key\", absl::StrCat(less_test_time_ms), \"GT\"});\n  EXPECT_THAT(resp, IntArg(0));\n  EXPECT_EQ(test_time_ms, CheckedInt({\"PEXPIRETIME\", \"key\"}));\n\n  // GT should apply expiry since new is greater than the current one\n  test_time_ms = TEST_current_time_ms + 105;\n  resp = Run({\"pexpireat\", \"key\", absl::StrCat(test_time_ms), \"GT\"});\n  EXPECT_THAT(resp, IntArg(1));\n  EXPECT_EQ(test_time_ms, CheckedInt({\"PEXPIRETIME\", \"key\"}));\n\n  // LT should apply new expiry is smaller than current\n  test_time_ms = TEST_current_time_ms + 101;\n  resp = Run({\"pexpireat\", \"key\", absl::StrCat(test_time_ms), \"LT\"});\n  EXPECT_THAT(resp, IntArg(1));\n  EXPECT_EQ(test_time_ms, CheckedInt({\"PEXPIRETIME\", \"key\"}));\n\n  // LT should not apply expiry since new is not lesser than the current one\n  auto gt_test_time_ms = TEST_current_time_ms + 102;\n  resp = Run({\"pexpireat\", \"key\", absl::StrCat(gt_test_time_ms), \"LT\"});\n  EXPECT_THAT(resp, IntArg(0));\n  EXPECT_EQ(test_time_ms, CheckedInt({\"PEXPIRETIME\", \"key\"}));\n}\n\nTEST_F(GenericFamilyTest, Del) {\n  for (size_t i = 0; i < 1000; ++i) {\n    Run({\"set\", StrCat(\"foo\", i), \"1\"});\n    Run({\"set\", StrCat(\"bar\", i), \"1\"});\n  }\n\n  ASSERT_EQ(2000, CheckedInt({\"dbsize\"}));\n\n  auto exist_fb = pp_->at(0)->LaunchFiber([&] {\n    for (size_t i = 0; i < 1000; ++i) {\n      int64_t resp = CheckedInt({\"exists\", StrCat(\"foo\", i), StrCat(\"bar\", i)});\n      ASSERT_TRUE(2 == resp || resp == 0) << resp << \" \" << i;\n    }\n  });\n\n  auto del_fb = pp_->at(2)->LaunchFiber([&] {\n    for (size_t i = 0; i < 1000; ++i) {\n      auto resp = CheckedInt({\"del\", StrCat(\"foo\", i), StrCat(\"bar\", i)});\n      ASSERT_EQ(2, resp);\n    }\n  });\n\n  exist_fb.Join();\n  del_fb.Join();\n\n  Run({\"setex\", \"k1\", \"10\", \"bar\"});\n  Run({\"del\", \"k1\"});\n}\n\nTEST_F(GenericFamilyTest, TTL) {\n  EXPECT_EQ(-2, CheckedInt({\"ttl\", \"foo\"}));\n  EXPECT_EQ(-2, CheckedInt({\"pttl\", \"foo\"}));\n  Run({\"set\", \"foo\", \"bar\"});\n  EXPECT_EQ(-1, CheckedInt({\"ttl\", \"foo\"}));\n  EXPECT_EQ(-1, CheckedInt({\"pttl\", \"foo\"}));\n}\n\nTEST_F(GenericFamilyTest, Exists) {\n  Run({\"mset\", \"x\", \"0\", \"y\", \"1\"});\n  auto resp = Run({\"exists\", \"x\", \"y\", \"x\"});\n  EXPECT_THAT(resp, IntArg(3));\n}\n\nTEST_F(GenericFamilyTest, Touch) {\n  RespExpr resp;\n\n  Run({\"mset\", \"x\", \"0\", \"y\", \"1\"});\n  resp = Run({\"touch\", \"x\", \"y\", \"x\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"touch\", \"z\", \"x\", \"w\"});\n  EXPECT_THAT(resp, IntArg(1));\n}\n\nTEST_F(GenericFamilyTest, Rename) {\n  RespExpr resp;\n  string b_val(32, 'b');\n  string x_val(32, 'x');\n\n  resp = Run({\"mset\", \"x\", x_val, \"b\", b_val});\n  ASSERT_EQ(resp, \"OK\");\n  ASSERT_EQ(2, last_cmd_dbg_info_.shards_count);\n\n  resp = Run({\"rename\", \"z\", \"b\"});\n  ASSERT_THAT(resp, ErrArg(\"no such key\"));\n\n  resp = Run({\"rename\", \"x\", \"b\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  int64_t val = CheckedInt({\"get\", \"x\"});\n  ASSERT_EQ(kint64min, val);  // does not exist\n\n  ASSERT_EQ(x_val, Run({\"get\", \"b\"}));  // swapped.\n\n  EXPECT_EQ(CheckedInt({\"exists\", \"x\", \"b\"}), 1);\n\n  const char* keys[2] = {\"b\", \"x\"};\n  auto ren_fb = pp_->at(0)->LaunchFiber([&] {\n    for (size_t i = 0; i < 200; ++i) {\n      int j = i % 2;\n      auto resp = Run({\"rename\", keys[j], keys[1 - j]});\n      ASSERT_EQ(resp, \"OK\");\n    }\n  });\n\n  auto exist_fb = pp_->at(2)->LaunchFiber([&] {\n    for (size_t i = 0; i < 300; ++i) {\n      int64_t resp = CheckedInt({\"exists\", \"x\", \"b\"});\n      ASSERT_EQ(1, resp);\n    }\n  });\n\n  exist_fb.Join();\n  ren_fb.Join();\n}\n\nTEST_F(GenericFamilyTest, RenameList) {\n  for (string_view dest : {\"b\", \"y\", \"z\"}) {\n    EXPECT_EQ(1, CheckedInt({\"lpush\", \"x\", \"elem\"}));\n    Metrics metrics = GetMetrics();\n\n    size_t list_usage = metrics.db_stats[0].memory_usage_by_type[OBJ_LIST];\n    size_t string_usage = metrics.db_stats[0].memory_usage_by_type[OBJ_STRING];\n    ASSERT_GT(list_usage, 0);\n    ASSERT_EQ(string_usage, 0);\n\n    auto resp = Run({\"rename\", \"x\", dest});\n    ASSERT_EQ(resp, \"OK\");\n    if (dest == \"b\") {\n      ASSERT_EQ(2, last_cmd_dbg_info_.shards_count);\n    } else {\n      ASSERT_EQ(1, last_cmd_dbg_info_.shards_count);\n    }\n\n    metrics = GetMetrics();\n    size_t list_usage_after = metrics.db_stats[0].memory_usage_by_type[OBJ_LIST];\n    string_usage = metrics.db_stats[0].memory_usage_by_type[OBJ_STRING];\n    ASSERT_EQ(list_usage_after, list_usage);\n    ASSERT_EQ(string_usage, 0);\n\n    EXPECT_EQ(0, CheckedInt({\"del\", \"x\"}));\n    EXPECT_EQ(1, CheckedInt({\"del\", dest}));\n  }\n}\n\nTEST_F(GenericFamilyTest, RenameBinary) {\n  const char kKey1[] = \"\\x01\\x02\\x03\\x04\";\n  const char kKey2[] = \"\\x05\\x06\\x07\\x08\";\n\n  Run({\"set\", kKey1, \"bar\"});\n  Run({\"rename\", kKey1, kKey2});\n  EXPECT_THAT(Run({\"get\", kKey1}), ArgType(RespExpr::NIL));\n  EXPECT_EQ(Run({\"get\", kKey2}), \"bar\");\n}\n\nTEST_F(GenericFamilyTest, RenameNx) {\n  // Set two keys\n  string b_val(32, 'b');\n  string x_val(32, 'x');\n  Run({\"mset\", \"x\", x_val, \"b\", b_val});\n\n  ASSERT_THAT(Run({\"renamenx\", \"z\", \"b\"}), ErrArg(\"no such key\"));\n  ASSERT_THAT(Run({\"renamenx\", \"x\", \"b\"}), IntArg(0));  // b already exists\n  ASSERT_THAT(Run({\"renamenx\", \"x\", \"y\"}), IntArg(1));\n  ASSERT_EQ(Run({\"get\", \"y\"}), x_val);\n  ASSERT_THAT(Run({\"renamenx\", \"y\", \"y\"}), IntArg(0));\n}\n\nTEST_F(GenericFamilyTest, RenameSameName) {\n  const char kKey[] = \"key\";\n\n  ASSERT_THAT(Run({\"rename\", kKey, kKey}), ErrArg(\"no such key\"));\n\n  ASSERT_EQ(Run({\"set\", kKey, \"value\"}), \"OK\");\n  EXPECT_EQ(Run({\"rename\", kKey, kKey}), \"OK\");\n}\n\nTEST_F(GenericFamilyTest, RenameSameShard) {\n  num_threads_ = 1;\n  ResetService();\n\n  ASSERT_EQ(Run({\"set\", \"x\", \"value\"}), \"OK\");\n  ASSERT_EQ(Run({\"set\", \"y\", \"value\"}), \"OK\");\n  EXPECT_EQ(Run({\"rename\", \"x\", \"y\"}), \"OK\");\n}\n\nTEST_F(GenericFamilyTest, Stick) {\n  // check stick returns zero on non-existent keys\n  ASSERT_THAT(Run({\"stick\", \"a\", \"b\"}), IntArg(0));\n\n  for (auto key : {\"a\", \"b\", \"c\", \"d\"}) {\n    Run({\"set\", key, \".\"});\n  }\n\n  // check stick is applied only once\n  ASSERT_THAT(Run({\"stick\", \"a\", \"b\"}), IntArg(2));\n  ASSERT_THAT(Run({\"stick\", \"a\", \"b\"}), IntArg(0));\n  ASSERT_THAT(Run({\"stick\", \"a\", \"c\"}), IntArg(1));\n  ASSERT_THAT(Run({\"stick\", \"b\", \"d\"}), IntArg(1));\n  ASSERT_THAT(Run({\"stick\", \"c\", \"d\"}), IntArg(0));\n\n  // check stickyness persists during writes\n  Run({\"set\", \"a\", \"new\"});\n  ASSERT_THAT(Run({\"stick\", \"a\"}), IntArg(0));\n  Run({\"append\", \"a\", \"-value\"});\n  ASSERT_THAT(Run({\"stick\", \"a\"}), IntArg(0));\n\n  // check rename persists stickyness\n  Run({\"rename\", \"a\", \"k\"});\n  ASSERT_THAT(Run({\"stick\", \"k\"}), IntArg(0));\n\n  // check rename persists stickyness on multiple shards\n  Run({\"del\", \"b\"});\n  string b_val(32, 'b');\n  string x_val(32, 'x');\n  Run({\"mset\", \"b\", b_val, \"x\", x_val});\n  ASSERT_EQ(2, last_cmd_dbg_info_.shards_count);\n  Run({\"stick\", \"x\"});\n  Run({\"rename\", \"x\", \"b\"});\n  ASSERT_THAT(Run({\"stick\", \"b\"}), IntArg(0));\n}\n\nTEST_F(GenericFamilyTest, Move) {\n  // Check MOVE returns 0 on non-existent keys\n  ASSERT_THAT(Run({\"move\", \"a\", \"1\"}), IntArg(0));\n\n  // Check MOVE catches non-existent database indices\n  ASSERT_THAT(Run({\"move\", \"a\", \"-1\"}), ArgType(RespExpr::ERROR));\n  ASSERT_THAT(Run({\"move\", \"a\", \"100500\"}), ArgType(RespExpr::ERROR));\n\n  // Check MOVE moves value & expiry & stickyness\n  Run({\"set\", \"a\", \"test\"});\n  Run({\"expire\", \"a\", \"1000\"});\n  Run({\"stick\", \"a\"});\n  ASSERT_THAT(Run({\"move\", \"a\", \"1\"}), IntArg(1));\n  Run({\"select\", \"1\"});\n  ASSERT_THAT(Run({\"get\", \"a\"}), \"test\");\n  ASSERT_THAT(Run({\"ttl\", \"a\"}), testing::Not(IntArg(-1)));\n  ASSERT_THAT(Run({\"stick\", \"a\"}), IntArg(0));\n\n  // Check MOVE doesn't move if key exists\n  Run({\"select\", \"1\"});\n  Run({\"set\", \"a\", \"test\"});\n  Run({\"select\", \"0\"});\n  Run({\"set\", \"a\", \"another test\"});\n  ASSERT_THAT(Run({\"move\", \"a\", \"1\"}), IntArg(0));  // exists from test case above\n  Run({\"select\", \"1\"});\n  ASSERT_THAT(Run({\"get\", \"a\"}), \"test\");\n\n  // Check MOVE awakes blocking operations\n  auto fb_blpop = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    Run({\"select\", \"1\"});\n    auto resp = Run({\"blpop\", \"l\", \"0\"});\n    ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n    EXPECT_THAT(resp.GetVec(), ElementsAre(\"l\", \"TestItem\"));\n  });\n\n  WaitUntilLocked(1, \"l\");\n\n  pp_->at(1)->Await([&] {\n    Run({\"select\", \"0\"});\n    Run({\"lpush\", \"l\", \"TestItem\"});\n    Run({\"move\", \"l\", \"1\"});\n  });\n\n  fb_blpop.Join();\n}\n\nusing testing::AnyOf;\nusing testing::Each;\nusing testing::StartsWith;\n\nTEST_F(GenericFamilyTest, Scan) {\n  for (unsigned i = 0; i < 10; ++i)\n    Run({\"set\", absl::StrCat(\"key\", i), \"bar\"});\n\n  for (unsigned i = 0; i < 10; ++i)\n    Run({\"set\", absl::StrCat(\"str\", i), \"bar\"});\n\n  for (unsigned i = 0; i < 10; ++i)\n    Run({\"sadd\", absl::StrCat(\"set\", i), \"bar\"});\n\n  for (unsigned i = 0; i < 10; ++i)\n    Run({\"zadd\", absl::StrCat(\"zset\", i), \"0\", \"bar\"});\n\n  auto resp = Run({\"scan\", \"0\", \"count\", \"20\", \"type\", \"string\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  auto vec = StrArray(resp.GetVec()[1]);\n  EXPECT_GT(vec.size(), 10);\n  EXPECT_THAT(vec, Each(AnyOf(StartsWith(\"str\"), StartsWith(\"key\"))));\n\n  resp = Run({\"scan\", \"0\", \"count\", \"20\", \"match\", \"zset*\"});\n  vec = StrArray(resp.GetVec()[1]);\n  EXPECT_EQ(10, vec.size());\n  EXPECT_THAT(vec, Each(StartsWith(\"zset\")));\n\n  Run({\"flushdb\"});\n\n  Run({\"set\", \"\", \"foo\"});\n  Run({\"set\", \"bar\", \"1\"});\n  resp = Run({\"keys\", \"*\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"bar\", \"\")));\n  resp = Run({\"keys\", \"\"});\n  EXPECT_EQ(resp, \"\");\n}\n\nTEST_F(GenericFamilyTest, ScanWithAttr) {\n  Run({\"set\", \"hello\", \"world\"});\n  Run({\"set\", \"foo\", \"bar\"});\n\n  Run({\"expire\", \"hello\", \"1000\"});\n\n  auto resp = Run({\"scan\", \"0\", \"attr\", \"v\"});\n  auto vec = StrArray(resp.GetVec()[1]);\n  ASSERT_EQ(1, vec.size());\n  EXPECT_EQ(vec[0], \"hello\");\n\n  resp = Run({\"scan\", \"0\", \"attr\", \"p\"});\n  vec = StrArray(resp.GetVec()[1]);\n  ASSERT_EQ(1, vec.size());\n  EXPECT_EQ(vec[0], \"foo\");\n\n  // before run get \"foo\", scan with a attr should return \"hello\", because set \"hello\" expire before\n  resp = Run({\"scan\", \"0\", \"attr\", \"a\"});\n  vec = StrArray(resp.GetVec()[1]);\n  ASSERT_EQ(1, vec.size());\n  EXPECT_EQ(vec[0], \"hello\");\n\n  // before run get \"foo\", scan with a attr should return \"foo\"\n  resp = Run({\"scan\", \"0\", \"attr\", \"u\"});\n  vec = StrArray(resp.GetVec()[1]);\n  ASSERT_EQ(1, vec.size());\n  EXPECT_EQ(vec[0], \"foo\");\n\n  ASSERT_THAT(Run({\"get\", \"foo\"}), \"bar\");\n\n  // after run get \"foo\", scan with a attr should return \"foo\" and \"hello\"\n  resp = Run({\"scan\", \"0\", \"attr\", \"a\"});\n  vec = StrArray(resp.GetVec()[1]);\n  ASSERT_EQ(2, vec.size());\n\n  // after run get \"foo\", scan with a attr should return empty set\n  resp = Run({\"scan\", \"0\", \"attr\", \"u\"});\n  vec = StrArray(resp.GetVec()[1]);\n  ASSERT_EQ(0, vec.size());\n}\n\nTEST_F(GenericFamilyTest, ScanMallocSize) {\n  Run({\"set\", \"k1\", string(1000, 'a')});\n  Run({\"set\", \"k2\", string(500, 'b')});\n  Run({\"set\", \"k3\", string(15, 'c')});\n\n  auto resp = Run({\"scan\", \"0\", \"MINMSZ\", \"15\"});\n  EXPECT_THAT(resp.GetVec()[1], RespArray(UnorderedElementsAre(\"k1\", \"k2\")));\n  resp = Run({\"scan\", \"0\", \"MINMSZ\", \"500\"});\n  EXPECT_THAT(resp.GetVec()[1], RespArray(UnorderedElementsAre(\"k1\")));\n}\n\nTEST_F(GenericFamilyTest, Sort) {\n  // Test list sort with params\n  Run({\"del\", \"list-1\"});\n  Run({\"lpush\", \"list-1\", \"3.5\", \"1.2\", \"10.1\", \"2.20\", \"200\"});\n  // numeric\n  ASSERT_THAT(Run({\"sort\", \"list-1\"}).GetVec(), ElementsAre(\"1.2\", \"2.20\", \"3.5\", \"10.1\", \"200\"));\n  // string\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"ALPHA\"}).GetVec(),\n              ElementsAre(\"1.2\", \"10.1\", \"2.20\", \"200\", \"3.5\"));\n  // desc numeric\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"DESC\"}).GetVec(),\n              ElementsAre(\"200\", \"10.1\", \"3.5\", \"2.20\", \"1.2\"));\n  // desc strig\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"DESC\", \"ALPHA\"}).GetVec(),\n              ElementsAre(\"3.5\", \"200\", \"2.20\", \"10.1\", \"1.2\"));\n  // limits\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"LIMIT\", \"0\", \"5\"}).GetVec(),\n              ElementsAre(\"1.2\", \"2.20\", \"3.5\", \"10.1\", \"200\"));\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"LIMIT\", \"0\", \"10\"}).GetVec(),\n              ElementsAre(\"1.2\", \"2.20\", \"3.5\", \"10.1\", \"200\"));\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"LIMIT\", \"2\", \"2\"}).GetVec(), ElementsAre(\"3.5\", \"10.1\"));\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"LIMIT\", \"1\", \"1\"}), \"2.20\");\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"LIMIT\", \"4\", \"2\"}), \"200\");\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"LIMIT\", \"5\", \"2\"}), ArrLen(0));\n  // limits desc\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"DESC\", \"LIMIT\", \"0\", \"5\"}).GetVec(),\n              ElementsAre(\"200\", \"10.1\", \"3.5\", \"2.20\", \"1.2\"));\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"DESC\", \"LIMIT\", \"2\", \"2\"}).GetVec(),\n              ElementsAre(\"3.5\", \"2.20\"));\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"DESC\", \"LIMIT\", \"1\", \"1\"}), \"10.1\");\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"DESC\", \"LIMIT\", \"5\", \"2\"}), ArrLen(0));\n\n  // Test set sort\n  Run({\"del\", \"set-1\"});\n  Run({\"sadd\", \"set-1\", \"5.3\", \"4.4\", \"60\", \"99.9\", \"100\", \"9\"});\n  ASSERT_THAT(Run({\"sort\", \"set-1\"}).GetVec(), ElementsAre(\"4.4\", \"5.3\", \"9\", \"60\", \"99.9\", \"100\"));\n  ASSERT_THAT(Run({\"sort\", \"set-1\", \"ALPHA\"}).GetVec(),\n              ElementsAre(\"100\", \"4.4\", \"5.3\", \"60\", \"9\", \"99.9\"));\n  ASSERT_THAT(Run({\"sort\", \"set-1\", \"DESC\"}).GetVec(),\n              ElementsAre(\"100\", \"99.9\", \"60\", \"9\", \"5.3\", \"4.4\"));\n  ASSERT_THAT(Run({\"sort\", \"set-1\", \"DESC\", \"ALPHA\"}).GetVec(),\n              ElementsAre(\"99.9\", \"9\", \"60\", \"5.3\", \"4.4\", \"100\"));\n\n  // Test intset sort\n  Run({\"del\", \"intset-1\"});\n  Run({\"sadd\", \"intset-1\", \"5\", \"4\", \"3\", \"2\", \"1\"});\n  ASSERT_THAT(Run({\"sort\", \"intset-1\"}).GetVec(), ElementsAre(\"1\", \"2\", \"3\", \"4\", \"5\"));\n\n  // Test sorted set sort\n  Run({\"del\", \"zset-1\"});\n  Run({\"zadd\", \"zset-1\", \"0\", \"3.3\", \"0\", \"30.1\", \"0\", \"8.2\"});\n  ASSERT_THAT(Run({\"sort\", \"zset-1\"}).GetVec(), ElementsAre(\"3.3\", \"8.2\", \"30.1\"));\n  ASSERT_THAT(Run({\"sort\", \"zset-1\", \"ALPHA\"}).GetVec(), ElementsAre(\"3.3\", \"30.1\", \"8.2\"));\n  ASSERT_THAT(Run({\"sort\", \"zset-1\", \"DESC\"}).GetVec(), ElementsAre(\"30.1\", \"8.2\", \"3.3\"));\n  ASSERT_THAT(Run({\"sort\", \"zset-1\", \"DESC\", \"ALPHA\"}).GetVec(), ElementsAre(\"8.2\", \"30.1\", \"3.3\"));\n\n  // Test sort with non existent key\n  Run({\"del\", \"list-2\"});\n  ASSERT_THAT(Run({\"sort\", \"list-2\"}), ArrLen(0));\n\n  // Test not convertible to double\n  Run({\"lpush\", \"list-2\", \"NOTADOUBLE\"});\n  ASSERT_THAT(Run({\"sort\", \"list-2\"}), ErrArg(\"One or more scores can't be converted into double\"));\n\n  Run({\"set\", \"foo\", \"bar\"});\n  ASSERT_THAT(Run({\"sort\", \"foo\"}), ErrArg(\"WRONGTYPE \"));\n\n  Run({\"rpush\", \"list-3\", \"\"});\n  ASSERT_THAT(Run({\"sort\", \"list-3\"}), \"\");\n\n  Run({\"rpush\", \"list-3\", \"2\", \"0\", \"\", \"-0.14\", \"0.12\", \"-0\", \"-123123\", \"7654\"});\n  ASSERT_THAT(Run({\"sort\", \"list-3\"}).GetVec(),\n              ElementsAre(\"-123123\", \"-0.14\", \"\", \"\", \"-0\", \"0\", \"0.12\", \"2\", \"7654\"));\n\n  Run({\"rpush\", \"NANvalue\", \"nan\"});\n  ASSERT_THAT(Run({\"sort\", \"NANvalue\"}),\n              ErrArg(\"One or more scores can't be converted into double\"));\n}\n\nTEST_F(GenericFamilyTest, SortBug3636) {\n  Run({\"RPUSH\", \"foo\", \"1.100000023841858\", \"1.100000023841858\", \"1.100000023841858\", \"-15710\",\n       \"1.100000023841858\", \"1.100000023841858\", \"1.100000023841858\", \"-15710\", \"-15710\",\n       \"1.100000023841858\", \"-15710\", \"-15710\", \"-15710\", \"-15710\", \"1.100000023841858\", \"-15710\",\n       \"-15710\"});\n  auto resp = Run({\"SORT\", \"foo\", \"desc\", \"alpha\"});\n  ASSERT_THAT(resp, ArrLen(17));\n}\n\nTEST_F(GenericFamilyTest, SortStore) {\n  // Test list sort with params\n  Run({\"del\", \"list-1\"});\n  Run({\"del\", \"list-2\"});\n  Run({\"lpush\", \"list-1\", \"3.5\", \"1.2\", \"10.1\", \"2.20\", \"200\"});\n  // numeric\n  auto resp = Run({\"sort\", \"list-1\", \"store\", \"list-2\"});\n  EXPECT_EQ(5, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-2\", \"0\", \"-1\"}).GetVec(),\n              ElementsAre(\"1.2\", \"2.20\", \"3.5\", \"10.1\", \"200\"));\n\n  // string\n  resp = Run({\"sort\", \"list-1\", \"ALPHA\", \"store\", \"list-2\"});\n  EXPECT_EQ(5, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-2\", \"0\", \"-1\"}).GetVec(),\n              ElementsAre(\"1.2\", \"10.1\", \"2.20\", \"200\", \"3.5\"));\n\n  // desc numeric\n  resp = Run({\"sort\", \"list-1\", \"DESC\", \"store\", \"list-2\"});\n  EXPECT_EQ(5, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-2\", \"0\", \"-1\"}).GetVec(),\n              ElementsAre(\"200\", \"10.1\", \"3.5\", \"2.20\", \"1.2\"));\n\n  // desc string\n  resp = Run({\"sort\", \"list-1\", \"ALPHA\", \"DESC\", \"store\", \"list-2\"});\n  EXPECT_EQ(5, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-2\", \"0\", \"-1\"}).GetVec(),\n              ElementsAre(\"3.5\", \"200\", \"2.20\", \"10.1\", \"1.2\"));\n\n  // limits\n  resp = Run({\"sort\", \"list-1\", \"LIMIT\", \"0\", \"5\", \"store\", \"list-2\"});\n  EXPECT_EQ(5, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-2\", \"0\", \"-1\"}).GetVec(),\n              ElementsAre(\"1.2\", \"2.20\", \"3.5\", \"10.1\", \"200\"));\n  resp = Run({\"sort\", \"list-1\", \"LIMIT\", \"0\", \"10\", \"store\", \"list-2\"});\n  EXPECT_EQ(5, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-2\", \"0\", \"-1\"}).GetVec(),\n              ElementsAre(\"1.2\", \"2.20\", \"3.5\", \"10.1\", \"200\"));\n  resp = Run({\"sort\", \"list-1\", \"LIMIT\", \"2\", \"2\", \"store\", \"list-2\"});\n  EXPECT_EQ(2, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-2\", \"0\", \"-1\"}).GetVec(), ElementsAre(\"3.5\", \"10.1\"));\n  resp = Run({\"sort\", \"list-1\", \"LIMIT\", \"1\", \"1\", \"store\", \"list-2\"});\n  EXPECT_EQ(1, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-2\", \"0\", \"-1\"}), \"2.20\");\n  resp = Run({\"sort\", \"list-1\", \"LIMIT\", \"4\", \"2\", \"store\", \"list-2\"});\n  EXPECT_EQ(1, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-2\", \"0\", \"-1\"}), \"200\");\n  resp = Run({\"sort\", \"list-1\", \"LIMIT\", \"5\", \"2\", \"store\", \"list-2\"});\n  EXPECT_EQ(0, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-2\", \"0\", \"-1\"}), ArrLen(0));\n\n  // Test set sort\n  Run({\"del\", \"set-1\"});\n  Run({\"del\", \"list-3\"});\n  Run({\"sadd\", \"set-1\", \"5.3\", \"4.4\", \"60\", \"99.9\", \"100\", \"9\"});\n  resp = Run({\"sort\", \"set-1\", \"store\", \"list-3\"});\n  EXPECT_EQ(6, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-3\", \"0\", \"-1\"}).GetVec(),\n              ElementsAre(\"4.4\", \"5.3\", \"9\", \"60\", \"99.9\", \"100\"));\n\n  // Test sorted set sort\n  Run({\"del\", \"zset-1\"});\n  Run({\"del\", \"list-4\"});\n  Run({\"zadd\", \"zset-1\", \"0\", \"3.3\", \"0\", \"30.1\", \"0\", \"8.2\"});\n  resp = Run({\"sort\", \"zset-1\", \"store\", \"list-4\"});\n  EXPECT_EQ(3, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-4\", \"0\", \"-1\"}).GetVec(), ElementsAre(\"3.3\", \"8.2\", \"30.1\"));\n\n  // Same key overwrite.\n  Run({\"del\", \"list-1\"});\n  Run({\"del\", \"list-2\"});\n  Run({\"lpush\", \"list-1\", \"3.5\", \"1.2\", \"10.1\", \"2.20\", \"200\"});\n  resp = Run({\"sort\", \"list-1\", \"store\", \"list-1\"});\n  EXPECT_EQ(5, resp.GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"list-1\", \"0\", \"-1\"}).GetVec(),\n              ElementsAre(\"1.2\", \"2.20\", \"3.5\", \"10.1\", \"200\"));\n\n  // Check that the keys should not expire after some time.\n  Run({\"del\", \"list-1\"});\n  Run({\"del\", \"list-2\"});\n  Run({\"lpush\", \"list-1\", \"3.5\", \"1.2\", \"10.1\", \"2.20\", \"200\"});\n  Run({\"sort\", \"list-1\", \"store\", \"list-2\"});\n  AdvanceTime(5000);\n  ASSERT_THAT(Run({\"lrange\", \"list-2\", \"0\", \"-1\"}).GetVec(),\n              ElementsAre(\"1.2\", \"2.20\", \"3.5\", \"10.1\", \"200\"));\n}\n\nTEST_F(GenericFamilyTest, SortStoreResetsExpiry) {\n  // SORT set STORE dest, where dest has an expiry — dest expiry must be cleared.\n  Run({\"del\", \"src\", \"dest\"});\n  Run({\"sadd\", \"src\", \"3\", \"1\", \"2\"});\n  Run({\"sadd\", \"dest\", \"old\"});\n  Run({\"expire\", \"dest\", \"100\"});\n  EXPECT_GT(Run({\"ttl\", \"dest\"}).GetInt(), 0);\n\n  auto resp = Run({\"sort\", \"src\", \"store\", \"dest\"});\n  EXPECT_EQ(3, resp.GetInt());\n  // Destination must have no expiry after SORT STORE overwrites it.\n  EXPECT_EQ(-1, Run({\"ttl\", \"dest\"}).GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"dest\", \"0\", \"-1\"}).GetVec(), ElementsAre(\"1\", \"2\", \"3\"));\n\n  // SORT src STORE src (same key), src has an expiry — must not crash and must clear expiry.\n  Run({\"del\", \"myset\"});\n  Run({\"sadd\", \"myset\", \"c\", \"a\", \"b\"});\n  Run({\"expire\", \"myset\", \"100\"});\n  EXPECT_GT(Run({\"ttl\", \"myset\"}).GetInt(), 0);\n\n  resp = Run({\"sort\", \"myset\", \"ALPHA\", \"store\", \"myset\"});\n  EXPECT_EQ(3, resp.GetInt());\n  EXPECT_EQ(-1, Run({\"ttl\", \"myset\"}).GetInt());\n  ASSERT_THAT(Run({\"lrange\", \"myset\", \"0\", \"-1\"}).GetVec(), ElementsAre(\"a\", \"b\", \"c\"));\n}\n\nTEST_F(GenericFamilyTest, Sort_RO) {\n  // Test list sort with params\n  Run({\"del\", \"list-1\"});\n  Run({\"lpush\", \"list-1\", \"3.5\", \"1.2\", \"10.1\", \"2.20\", \"200\"});\n  // numeric\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\"}).GetVec(),\n              ElementsAre(\"1.2\", \"2.20\", \"3.5\", \"10.1\", \"200\"));\n  // string\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"ALPHA\"}).GetVec(),\n              ElementsAre(\"1.2\", \"10.1\", \"2.20\", \"200\", \"3.5\"));\n  // desc numeric\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"DESC\"}).GetVec(),\n              ElementsAre(\"200\", \"10.1\", \"3.5\", \"2.20\", \"1.2\"));\n  // desc strig\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"DESC\", \"ALPHA\"}).GetVec(),\n              ElementsAre(\"3.5\", \"200\", \"2.20\", \"10.1\", \"1.2\"));\n  // limits\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"LIMIT\", \"0\", \"5\"}).GetVec(),\n              ElementsAre(\"1.2\", \"2.20\", \"3.5\", \"10.1\", \"200\"));\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"LIMIT\", \"0\", \"10\"}).GetVec(),\n              ElementsAre(\"1.2\", \"2.20\", \"3.5\", \"10.1\", \"200\"));\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"LIMIT\", \"2\", \"2\"}).GetVec(), ElementsAre(\"3.5\", \"10.1\"));\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"LIMIT\", \"1\", \"1\"}), \"2.20\");\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"LIMIT\", \"4\", \"2\"}), \"200\");\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"LIMIT\", \"5\", \"2\"}), ArrLen(0));\n  // limits desc\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"DESC\", \"LIMIT\", \"0\", \"5\"}).GetVec(),\n              ElementsAre(\"200\", \"10.1\", \"3.5\", \"2.20\", \"1.2\"));\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"DESC\", \"LIMIT\", \"2\", \"2\"}).GetVec(),\n              ElementsAre(\"3.5\", \"2.20\"));\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"DESC\", \"LIMIT\", \"1\", \"1\"}), \"10.1\");\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"DESC\", \"LIMIT\", \"5\", \"2\"}), ArrLen(0));\n\n  // Test set sort\n  Run({\"del\", \"set-1\"});\n  Run({\"sadd\", \"set-1\", \"5.3\", \"4.4\", \"60\", \"99.9\", \"100\", \"9\"});\n  ASSERT_THAT(Run({\"sort_ro\", \"set-1\"}).GetVec(),\n              ElementsAre(\"4.4\", \"5.3\", \"9\", \"60\", \"99.9\", \"100\"));\n  ASSERT_THAT(Run({\"sort_ro\", \"set-1\", \"ALPHA\"}).GetVec(),\n              ElementsAre(\"100\", \"4.4\", \"5.3\", \"60\", \"9\", \"99.9\"));\n  ASSERT_THAT(Run({\"sort_ro\", \"set-1\", \"DESC\"}).GetVec(),\n              ElementsAre(\"100\", \"99.9\", \"60\", \"9\", \"5.3\", \"4.4\"));\n  ASSERT_THAT(Run({\"sort_ro\", \"set-1\", \"DESC\", \"ALPHA\"}).GetVec(),\n              ElementsAre(\"99.9\", \"9\", \"60\", \"5.3\", \"4.4\", \"100\"));\n\n  // Test intset sort\n  Run({\"del\", \"intset-1\"});\n  Run({\"sadd\", \"intset-1\", \"5\", \"4\", \"3\", \"2\", \"1\"});\n  ASSERT_THAT(Run({\"sort_ro\", \"intset-1\"}).GetVec(), ElementsAre(\"1\", \"2\", \"3\", \"4\", \"5\"));\n\n  // Test sorted set sort\n  Run({\"del\", \"zset-1\"});\n  Run({\"zadd\", \"zset-1\", \"0\", \"3.3\", \"0\", \"30.1\", \"0\", \"8.2\"});\n  ASSERT_THAT(Run({\"sort_ro\", \"zset-1\"}).GetVec(), ElementsAre(\"3.3\", \"8.2\", \"30.1\"));\n  ASSERT_THAT(Run({\"sort_ro\", \"zset-1\", \"ALPHA\"}).GetVec(), ElementsAre(\"3.3\", \"30.1\", \"8.2\"));\n  ASSERT_THAT(Run({\"sort_ro\", \"zset-1\", \"DESC\"}).GetVec(), ElementsAre(\"30.1\", \"8.2\", \"3.3\"));\n  ASSERT_THAT(Run({\"sort_ro\", \"zset-1\", \"DESC\", \"ALPHA\"}).GetVec(),\n              ElementsAre(\"8.2\", \"30.1\", \"3.3\"));\n\n  // Test sort with non existent key\n  Run({\"del\", \"list-2\"});\n  ASSERT_THAT(Run({\"sort_ro\", \"list-2\"}), ArrLen(0));\n\n  // Test not convertible to double\n  Run({\"lpush\", \"list-2\", \"NOTADOUBLE\"});\n  ASSERT_THAT(Run({\"sort_ro\", \"list-2\"}),\n              ErrArg(\"One or more scores can't be converted into double\"));\n\n  Run({\"set\", \"foo\", \"bar\"});\n  ASSERT_THAT(Run({\"sort_ro\", \"foo\"}), ErrArg(\"WRONGTYPE \"));\n\n  Run({\"rpush\", \"list-3\", \"\"});\n  ASSERT_THAT(Run({\"sort_ro\", \"list-3\"}), \"\");\n\n  Run({\"rpush\", \"list-3\", \"2\", \"0\", \"\", \"-0.14\", \"0.12\", \"-0\", \"-123123\", \"7654\"});\n  ASSERT_THAT(Run({\"sort_ro\", \"list-3\"}).GetVec(),\n              ElementsAre(\"-123123\", \"-0.14\", \"\", \"\", \"-0\", \"0\", \"0.12\", \"2\", \"7654\"));\n\n  Run({\"rpush\", \"NANvalue\", \"nan\"});\n  ASSERT_THAT(Run({\"sort_ro\", \"NANvalue\"}),\n              ErrArg(\"One or more scores can't be converted into double\"));\n\n  // Test store option should not work\n  ASSERT_THAT(Run({\"sort_ro\", \"list-1\", \"store\", \"list-2\"}), ErrArg(\"syntax error\"));\n}\n\nTEST_F(GenericFamilyTest, SortROBug3636) {\n  Run({\"RPUSH\", \"foo\", \"1.100000023841858\", \"1.100000023841858\", \"1.100000023841858\", \"-15710\",\n       \"1.100000023841858\", \"1.100000023841858\", \"1.100000023841858\", \"-15710\", \"-15710\",\n       \"1.100000023841858\", \"-15710\", \"-15710\", \"-15710\", \"-15710\", \"1.100000023841858\", \"-15710\",\n       \"-15710\"});\n  auto resp = Run({\"SORT_RO\", \"foo\", \"desc\", \"alpha\"});\n  ASSERT_THAT(resp, ArrLen(17));\n}\n\nTEST_F(GenericFamilyTest, TimeNoKeys) {\n  auto resp = Run({\"time\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec()[0], ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp.GetVec()[1], ArgType(RespExpr::INT64));\n\n  // Check that time is the same inside a transaction.\n  Run({\"multi\"});\n  Run({\"time\"});\n  usleep(2000);\n  Run({\"time\"});\n  resp = Run({\"exec\"});\n\n  EXPECT_THAT(resp, RespArray(ElementsAre(RespArray(ElementsAre(Not(IntArg(0)), _)),\n                                          RespArray(ElementsAre(Not(IntArg(0)), _)))));\n\n  for (int i = 0; i < 2; ++i) {\n    int64_t val0 = get<int64_t>(resp.GetVec()[0].GetVec()[i].u);\n    int64_t val1 = get<int64_t>(resp.GetVec()[1].GetVec()[i].u);\n    EXPECT_EQ(val0, val1);\n  }\n}\n\nTEST_F(GenericFamilyTest, TimeWithKeys) {\n  auto resp = Run({\"time\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec()[0], ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp.GetVec()[1], ArgType(RespExpr::INT64));\n\n  // Check that time is the same inside a transaction.\n  Run({\"multi\"});\n  Run({\"time\"});\n  usleep(2000);\n  Run({\"time\"});\n  Run({\"get\", \"x\"});\n  resp = Run({\"exec\"});\n\n  EXPECT_THAT(resp, RespArray(ElementsAre(RespArray(ElementsAre(Not(IntArg(0)), _)),\n                                          RespArray(ElementsAre(Not(IntArg(0)), _)), _)));\n\n  for (int i = 0; i < 2; ++i) {\n    int64_t val0 = get<int64_t>(resp.GetVec()[0].GetVec()[i].u);\n    int64_t val1 = get<int64_t>(resp.GetVec()[1].GetVec()[i].u);\n    EXPECT_EQ(val0, val1);\n  }\n}\n\nTEST_F(GenericFamilyTest, Persist) {\n  auto resp = Run({\"set\", \"mykey\", \"somevalue\"});\n  EXPECT_EQ(resp, \"OK\");\n  // Key without expiration time - return 0\n  EXPECT_EQ(0, CheckedInt({\"persist\", \"mykey\"}));\n  EXPECT_EQ(-1, CheckedInt({\"TTL\", \"mykey\"}));\n  // set expiration time and try again\n  resp = Run({\"EXPIRE\", \"mykey\", \"10\"});\n  EXPECT_EQ(10, CheckedInt({\"TTL\", \"mykey\"}));\n  EXPECT_EQ(1, CheckedInt({\"persist\", \"mykey\"}));\n  EXPECT_EQ(-1, CheckedInt({\"TTL\", \"mykey\"}));\n  // persist on key that does not exist should also return 0\n  EXPECT_EQ(0, CheckedInt({\"persist\", \"keythatdoesnotexist\"}));\n}\n\nTEST_F(GenericFamilyTest, Dump) {\n  ASSERT_EQ(RDB_SER_VERSION, 9);\n  uint8_t EXPECTED_STRING_DUMP[13] = {0x00, 0xc0, 0x13, 0x09, 0x00, 0x23, 0x13,\n                                      0x6f, 0x4d, 0x68, 0xf6, 0x35, 0x6e};\n  uint8_t EXPECTED_HASH_DUMP[] = {0x10, 0xc,  0xc,  0x0,  0x0, 0x0,  0x2,  0x0,\n                                  0x13, 0x1,  0xc4, 0xd2, 0x2, 0xff, 0x9,  0x0,\n                                  0x68, 0x4d, 0x73, 0xa4, 0xf, 0x23, 0x4f, 0xc7};\n\n  uint8_t EXPECTED_LIST_DUMP[] = {0x12, 0x01, 0x02, '\\t', '\\t', 0x00, 0x00, 0x00,\n                                  0x01, 0x00, 0x14, 0x01, 0xff, '\\t', 0x00, 0xfb,\n                                  0xbd, 0x36, 0xf8, 0xb4, 't',  '%',  ';'};\n\n  // Check string dump\n  auto resp = Run({\"set\", \"z\", \"19\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"dump\", \"z\"});\n  auto dump = resp.GetBuf();\n  ASSERT_EQ(ToSV(dump), ToSV(EXPECTED_STRING_DUMP));\n\n  // Check list dump\n  EXPECT_EQ(1, CheckedInt({\"rpush\", \"l\", \"20\"}));\n  resp = Run({\"dump\", \"l\"});\n  dump = resp.GetBuf();\n  ASSERT_EQ(ToSV(dump), ToSV(EXPECTED_LIST_DUMP)) << absl::CHexEscape(resp.GetString());\n\n  // Check for hash dump\n  EXPECT_EQ(1, CheckedInt({\"hset\", \"z2\", \"19\", \"1234\"}));\n  resp = Run({\"dump\", \"z2\"});\n  dump = resp.GetBuf();\n  ASSERT_EQ(ToSV(dump), ToSV(EXPECTED_HASH_DUMP));\n\n  // Check that when running with none existing key we're getting nil\n  resp = Run({\"dump\", \"foo\"});\n  EXPECT_EQ(resp.type, RespExpr::NIL);\n}\n\nTEST_F(GenericFamilyTest, Restore) {\n  using std::chrono::duration_cast;\n  using std::chrono::milliseconds;\n  using std::chrono::seconds;\n  using std::chrono::system_clock;\n\n  // redis 6 with RDB_VERSION 9\n  uint8_t STRING_DUMP_REDIS[] = {0x00, 0xc1, 0xd2, 0x04, 0x09, 0x00, 0xd0,\n                                 0x75, 0x59, 0x6d, 0x10, 0x04, 0x3f, 0x5c};\n  auto resp = Run({\"set\", \"exiting-key\", \"1234\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // try to restore into existing key - this should fail. We should get BUSYKEY error\n  ASSERT_THAT(Run({\"restore\", \"exiting-key\", \"0\", ToSV(STRING_DUMP_REDIS)}),\n              ErrArg(\"BUSYKEY Target key name already exists.\"));\n\n  // Try restore while setting expiration into the past\n  // note that value for expiration is just some valid unix time stamp from the pass\n  resp = Run(\n      {\"restore\", \"exiting-key\", \"1665476212900\", ToSV(STRING_DUMP_REDIS), \"ABSTTL\", \"REPLACE\"});\n  ASSERT_EQ(resp, \"OK\");\n  resp = Run({\"get\", \"exiting-key\"});\n  EXPECT_EQ(resp.type, RespExpr::NIL);  // it was deleted as a result of restore action\n\n  // Test for string that we can successfully load the dumped data and read it back\n  resp = Run({\"restore\", \"new-key\", \"0\", ToSV(STRING_DUMP_REDIS)});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"get\", \"new-key\"});\n  EXPECT_EQ(\"1234\", resp);\n  resp = Run({\"dump\", \"new-key\"});\n  auto dump = resp.GetBuf();\n  ASSERT_EQ(ToSV(dump), ToSV(STRING_DUMP_REDIS));\n\n  // test for list\n  EXPECT_EQ(1, CheckedInt({\"rpush\", \"orig-list\", \"20\"}));\n  resp = Run({\"dump\", \"orig-list\"});\n  dump = resp.GetBuf();\n  resp = Run({\"restore\", \"new-list\", \"10\", ToSV(dump)});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"lpop\", \"new-list\"});\n  EXPECT_EQ(\"20\", resp);\n\n  // run with hash type\n  EXPECT_EQ(1, CheckedInt({\"hset\", \"orig-hash\", \"123\", \"45678\"}));\n  resp = Run({\"dump\", \"orig-hash\"});\n  dump = resp.GetBuf();\n  resp = Run({\"restore\", \"new-hash\", \"1\", ToSV(dump)});\n  EXPECT_EQ(resp, \"OK\");\n  EXPECT_EQ(1, CheckedInt({\"hexists\", \"new-hash\", \"123\"}));\n\n  // test with replace and no TTL\n  resp = Run({\"set\", \"string-key\", \"hello world\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"dump\", \"string-key\"});\n  dump = resp.GetBuf();\n  // this will change the value from \"hello world\" to \"1234\"\n  resp = Run({\"restore\", \"string-key\", \"7000\", ToSV(STRING_DUMP_REDIS), \"REPLACE\"});\n  resp = Run({\"get\", \"string-key\"});\n  EXPECT_EQ(\"1234\", resp);\n  // check TTL validity\n  EXPECT_EQ(CheckedInt({\"pttl\", \"string-key\"}), 7000);\n\n  // Make check about ttl with abs time, restoring back to \"hello world\"\n  resp = Run({\"restore\", \"string-key\", absl::StrCat(TEST_current_time_ms + 2000), ToSV(dump),\n              \"ABSTTL\", \"REPLACE\"});\n  resp = Run({\"get\", \"string-key\"});\n  EXPECT_EQ(\"hello world\", resp);\n  EXPECT_EQ(CheckedInt({\"pttl\", \"string-key\"}), 2000);\n\n  // Last but not least - just make sure that we are good without TTL as well\n  resp = Run({\"restore\", \"string-key\", \"0\", ToSV(STRING_DUMP_REDIS), \"REPLACE\"});\n  resp = Run({\"get\", \"string-key\"});\n  EXPECT_EQ(\"1234\", resp);\n  EXPECT_EQ(CheckedInt({\"ttl\", \"string-key\"}), -1);\n\n  // The following set was created in Redis 7 with rdb version 11 and it's listpack encoded.\n  // We should be able to read it and convert it to our own format DenseSet or HT\n  // sadd myset \"acme\"\n  // dump myset\n  uint8_t SET_LISTPACK_DUMP[] = {0x14, 0x0D, 0x0D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x84,\n                                 0x61, 0x63, 0x6D, 0x65, 0x05, 0xff, 0x0b, 0x00, 0xc1,\n                                 0x37, 0x5c, 0xe5, 0xe2, 0xc0, 0xdd, 0x27};\n  resp = Run({\"restore\", \"listpack-set\", \"0\", ToSV(SET_LISTPACK_DUMP)});\n  resp = Run({\"sismember\", \"listpack-set\", \"acme\"});\n  EXPECT_EQ(true, resp.GetInt().has_value());\n  EXPECT_EQ(1, resp.GetInt());\n\n  // The following zset was created in Redis 7 with rdb version 11 and it's listpack encoded.\n  // zadd my-zset 1 \"elon\"\n  // dump my-zset\n  uint8_t ZSET_LISTPACK_DUMP[] = {0x11, 0x0f, 0x0f, 0x00, 0x00, 0x00, 0x02, 0x00, 0x84,\n                                  0x65, 0x6c, 0x6f, 0x6e, 0x05, 0x01, 0x01, 0xff, 0x0b,\n                                  0x00, 0xc8, 0x01, 0x2c, 0xad, 0xd9, 0xa3, 0x99, 0x5e};\n\n  resp = Run({\"restore\", \"my-zset\", \"0\", ToSV(ZSET_LISTPACK_DUMP)});\n  EXPECT_EQ(resp.GetString(), \"OK\");\n  resp = Run({\"zrange\", \"my-zset\", \"0\", \"-1\"});\n  EXPECT_EQ(\"elon\", resp.GetString());\n\n  // corrupt the dump file but keep the crc correct.\n  ZSET_LISTPACK_DUMP[0] = 0x12;\n  uint8_t crc64[8] = {0x4e, 0xa3, 0x4c, 0x89, 0xc4, 0x8b, 0xd9, 0xe4};\n  memcpy(ZSET_LISTPACK_DUMP + 19, crc64, 8);\n  resp = Run({\"restore\", \"invalid\", \"0\", ToSV(ZSET_LISTPACK_DUMP)});\n  EXPECT_THAT(resp, ErrArg(\"ERR Bad data format\"));\n}\n\nTEST_F(GenericFamilyTest, Info) {\n  InitWithDbFilename();  // Needed for `save`\n\n  auto get_rdb_changes_since_last_save = [](const string& str) -> size_t {\n    const string matcher = \"rdb_changes_since_last_success_save:\";\n    const auto pos = str.find(matcher) + matcher.size();\n    const auto sub = str.substr(pos, 1);\n    return atoi(sub.c_str());\n  };\n\n  EXPECT_EQ(Run({\"set\", \"k\", \"1\"}), \"OK\");\n  auto resp = Run({\"info\", \"persistence\"});\n  EXPECT_EQ(1, get_rdb_changes_since_last_save(resp.GetString()));\n\n  EXPECT_EQ(Run({\"set\", \"k\", \"1\"}), \"OK\");\n  resp = Run({\"info\", \"persistence\"});\n  EXPECT_EQ(2, get_rdb_changes_since_last_save(resp.GetString()));\n\n  EXPECT_EQ(Run({\"set\", \"k2\", \"2\"}), \"OK\");\n  resp = Run({\"info\", \"persistence\"});\n  EXPECT_EQ(3, get_rdb_changes_since_last_save(resp.GetString()));\n\n  EXPECT_EQ(Run({\"save\"}), \"OK\");\n  resp = Run({\"info\", \"persistence\"});\n  EXPECT_EQ(0, get_rdb_changes_since_last_save(resp.GetString()));\n\n  EXPECT_EQ(Run({\"set\", \"k2\", \"2\"}), \"OK\");\n  resp = Run({\"info\", \"persistence\"});\n  EXPECT_EQ(1, get_rdb_changes_since_last_save(resp.GetString()));\n\n  EXPECT_EQ(Run({\"bgsave\"}), \"OK\");\n  bool cond = WaitUntilCondition(\n      [&]() {\n        resp = Run({\"info\", \"persistence\"});\n        return get_rdb_changes_since_last_save(resp.GetString()) == 0;\n      },\n      500ms);\n  EXPECT_TRUE(cond);\n\n  EXPECT_EQ(Run({\"set\", \"k3\", \"3\"}), \"OK\");\n  resp = Run({\"info\", \"persistence\"});\n  EXPECT_EQ(1, get_rdb_changes_since_last_save(resp.GetString()));\n\n  EXPECT_THAT(Run({\"del\", \"k3\"}), IntArg(1));\n  resp = Run({\"info\", \"persistence\"});\n  EXPECT_EQ(2, get_rdb_changes_since_last_save(resp.GetString()));\n}\n\nTEST_F(GenericFamilyTest, FieldTtl) {\n  TEST_current_time_ms = kMemberExpiryBase * 1000;  // to reset to test time.\n  EXPECT_THAT(Run({\"saddex\", \"key\", \"1\", \"val1\"}), IntArg(1));\n  EXPECT_THAT(Run({\"saddex\", \"key\", \"2\", \"val2\"}), IntArg(1));\n  EXPECT_THAT(Run({\"sadd\", \"key\", \"val3\"}), IntArg(1));\n\n  EXPECT_EQ(-2, CheckedInt({\"fieldttl\", \"nokey\", \"val1\"}));  // key not found\n  EXPECT_EQ(-3, CheckedInt({\"fieldttl\", \"key\", \"bar\"}));     // field not found\n  EXPECT_EQ(1, CheckedInt({\"fieldttl\", \"key\", \"val1\"}));\n  EXPECT_EQ(2, CheckedInt({\"fieldttl\", \"key\", \"val2\"}));\n  EXPECT_EQ(-1, CheckedInt({\"fieldttl\", \"key\", \"val3\"}));\n\n  AdvanceTime(1100);\n  EXPECT_EQ(-3, CheckedInt({\"fieldttl\", \"key\", \"val1\"}));\n  EXPECT_EQ(1, CheckedInt({\"fieldttl\", \"key\", \"val2\"}));\n\n  Run({\"set\", \"str\", \"val\"});\n  EXPECT_THAT(Run({\"fieldttl\", \"str\", \"bar\"}), ErrArg(\"wrong\"));\n\n  EXPECT_EQ(2, CheckedInt({\"HSETEX\", \"k2\", \"1\", \"f1\", \"v1\", \"f2\", \"v2\"}));\n  EXPECT_EQ(1, CheckedInt({\"HSET\", \"k2\", \"f3\", \"v3\"}));\n\n  EXPECT_EQ(1, CheckedInt({\"fieldttl\", \"k2\", \"f1\"}));\n  EXPECT_EQ(-1, CheckedInt({\"fieldttl\", \"k2\", \"f3\"}));\n  EXPECT_EQ(-3, CheckedInt({\"fieldttl\", \"k2\", \"f4\"}));\n}\n\nTEST_F(GenericFamilyTest, RandomKey) {\n  auto resp = Run({\"randomkey\"});\n  EXPECT_EQ(resp.type, RespExpr::NIL);\n\n  resp = Run({\"set\", \"k1\", \"1\"});\n  EXPECT_EQ(Run({\"randomkey\"}), \"k1\");\n}\n\nTEST_F(GenericFamilyTest, JsonType) {\n  auto resp = Run({\"json.set\", \"json\", \"$\", R\"({\"example\":\"value\"})\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"type\", \"json\"});\n  EXPECT_EQ(resp, \"ReJSON-RL\") << \"For the Redis GUI the register of the JSON type is important. \"\n                                  \"See https://github.com/dragonflydb/dragonfly/issues/3386\";\n\n  // Test json type lowercase works for the SCAN commmand\n  resp = Run({\"scan\", \"0\", \"type\", \"rejson-rl\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  auto vec = StrArray(resp.GetVec()[1]);\n  ASSERT_THAT(vec, ElementsAre(\"json\"));\n}\n\nTEST_F(GenericFamilyTest, FieldExpireSet) {\n  Run({\"SADD\", \"key\", \"a\", \"b\", \"c\"});\n  AdvanceTime(2'000);\n  EXPECT_THAT(Run({\"FIELDEXPIRE\", \"key\", \"10\", \"a\", \"b\", \"c\"}),\n              RespArray(ElementsAre(IntArg(1), IntArg(1), IntArg(1))));\n  EXPECT_EQ(10, CheckedInt({\"fieldttl\", \"key\", \"a\"}));\n  AdvanceTime(10'000);\n  EXPECT_THAT(Run({\"SMEMBERS\", \"key\"}), RespArray(ElementsAre()));\n}\n\nTEST_F(GenericFamilyTest, FieldExpireHset) {\n  for (int i = 0; i < 3; ++i) {\n    EXPECT_EQ(CheckedInt({\"HSET\", \"key\", absl::StrCat(\"k\", i), \"v\"}), 1);\n  }\n  AdvanceTime(2'000);\n  EXPECT_THAT(Run({\"FIELDEXPIRE\", \"key\", \"10\", \"k0\", \"k1\", \"k2\"}),\n              RespArray(ElementsAre(IntArg(1), IntArg(1), IntArg(1))));\n  EXPECT_EQ(10, CheckedInt({\"fieldttl\", \"key\", \"k0\"}));\n  AdvanceTime(10'000);\n  EXPECT_THAT(Run({\"HGETALL\", \"key\"}), RespArray(ElementsAre()));\n}\n\nTEST_F(GenericFamilyTest, FieldExpireNoSuchField) {\n  EXPECT_EQ(CheckedInt({\"SADD\", \"key\", \"a\"}), 1);\n  EXPECT_EQ(CheckedInt({\"HSET\", \"key2\", \"k0\", \"v0\"}), 1);\n  EXPECT_THAT(Run({\"FIELDEXPIRE\", \"key\", \"10\", \"a\", \"b\"}),\n              RespArray(ElementsAre(IntArg(1), IntArg(-2))));\n  EXPECT_THAT(Run({\"FIELDEXPIRE\", \"key2\", \"10\", \"k0\", \"b\"}),\n              RespArray(ElementsAre(IntArg(1), IntArg(-2))));\n}\n\nTEST_F(GenericFamilyTest, FieldExpireNoSuchKey) {\n  EXPECT_THAT(Run({\"FIELDEXPIRE\", \"key\", \"10\", \"a\", \"b\"}),\n              RespArray(ElementsAre(IntArg(-2), IntArg(-2))));\n}\n\nTEST_F(GenericFamilyTest, ExpireTime) {\n  EXPECT_EQ(-2, CheckedInt({\"EXPIRETIME\", \"foo\"}));\n  EXPECT_EQ(-2, CheckedInt({\"PEXPIRETIME\", \"foo\"}));\n  Run({\"set\", \"foo\", \"bar\"});\n  EXPECT_EQ(-1, CheckedInt({\"EXPIRETIME\", \"foo\"}));\n  EXPECT_EQ(-1, CheckedInt({\"PEXPIRETIME\", \"foo\"}));\n\n  // set expiry\n  uint64_t expire_time_in_ms = TEST_current_time_ms + 5000;\n  uint64_t expire_time_in_seconds = (expire_time_in_ms + 500) / 1000;\n  Run({\"pexpireat\", \"foo\", absl::StrCat(expire_time_in_ms)});\n  EXPECT_EQ(expire_time_in_seconds, CheckedInt({\"EXPIRETIME\", \"foo\"}));\n  EXPECT_EQ(expire_time_in_ms, CheckedInt({\"PEXPIRETIME\", \"foo\"}));\n}\n\nTEST_F(GenericFamilyTest, RestoreOOM) {\n  max_memory_limit = 20000000;\n  Run({\"set\", \"src\", string(5000, 'x')});\n  auto resp = Run({\"dump\", \"src\"});\n\n  string dump = resp.GetString();\n\n  // Let Dragonfly propagate max_memory_limit to shards. It does not have to be precise,\n  // the loop should have enough time for the internal processes to progress.\n  usleep(10000);\n  unsigned i = 0;\n  for (; i < 10000; ++i) {\n    resp = Run({\"restore\", absl::StrCat(\"dst\", i), \"0\", dump});\n    if (resp != \"OK\")\n      break;\n  }\n  ASSERT_LT(i, 10000);\n  EXPECT_THAT(resp, ErrArg(\"Out of memory\"));\n}\n\nTEST_F(GenericFamilyTest, Bug4466) {\n  auto resp = Run({\"SCAN\", \"9223372036854775808\"});  // an invalid cursor should not crash us.\n  EXPECT_THAT(resp, RespElementsAre(\"0\", RespElementsAre()));\n}\n\nTEST_F(GenericFamilyTest, Unlink) {\n  for (unsigned i = 0; i < 1000; ++i) {\n    unsigned start = i * 10;\n    vector<string> cmd = {\"SADD\", \"s1\"};\n    for (unsigned j = 0; j < 10; ++j) {\n      cmd.push_back(absl::StrCat(\"f\", start + j));\n    }\n    auto resp = Run(absl::MakeSpan(cmd));\n    ASSERT_THAT(resp, IntArg(10));\n    cmd[1] = \"s2\";\n    resp = Run(absl::MakeSpan(cmd));\n    ASSERT_THAT(resp, IntArg(10));\n  }\n  auto resp = Run({\"unlink\", \"s1\", \"s2\"});\n  EXPECT_THAT(resp, IntArg(2));\n}\n\nTEST_F(GenericFamilyTest, Copy) {\n  RespExpr resp;\n  string b_val(32, 'b');\n  string x_val(32, 'x');\n\n  resp = Run({\"mset\", \"x\", x_val, \"b\", b_val});\n  ASSERT_EQ(resp, \"OK\");\n  ASSERT_EQ(2, last_cmd_dbg_info_.shards_count);\n\n  resp = Run({\"COPY\", \"z\", \"b\"});\n  ASSERT_THAT(resp, IntArg(0));\n\n  resp = Run({\"COPY\", \"b\", \"c\"});\n  ASSERT_THAT(resp, IntArg(1));\n  ASSERT_EQ(b_val, Run({\"get\", \"c\"}));\n\n  resp = Run({\"COPY\", \"x\", \"b\", \"REPLACE\"});\n  ASSERT_THAT(resp, IntArg(1));\n\n  ASSERT_EQ(x_val, Run({\"get\", \"x\"}));\n  ASSERT_EQ(x_val, Run({\"get\", \"b\"}));\n  EXPECT_EQ(CheckedInt({\"exists\", \"x\", \"b\"}), 2);\n\n  const char* keys[2] = {\"b\", \"x\"};\n  auto ren_fb = pp_->at(0)->LaunchFiber([&] {\n    for (size_t i = 0; i < 200; ++i) {\n      int j = i % 2;\n      auto resp = Run({\"COPY\", keys[j], keys[1 - j], \"REPLACE\"});\n      ASSERT_THAT(resp, IntArg(1));\n    }\n  });\n\n  auto exist_fb = pp_->at(2)->LaunchFiber([&] {\n    for (size_t i = 0; i < 300; ++i) {\n      int64_t resp = CheckedInt({\"exists\", \"x\", \"b\"});\n      ASSERT_EQ(2, resp);\n    }\n  });\n\n  exist_fb.Join();\n  ren_fb.Join();\n}\n\nTEST_F(GenericFamilyTest, CopyNonString) {\n  EXPECT_EQ(1, CheckedInt({\"lpush\", \"x\", \"elem\"}));\n  auto resp = Run({\"COPY\", \"x\", \"b\"});\n  ASSERT_THAT(resp, IntArg(1));\n  ASSERT_EQ(2, last_cmd_dbg_info_.shards_count);\n\n  EXPECT_EQ(1, CheckedInt({\"del\", \"x\"}));\n  EXPECT_EQ(1, CheckedInt({\"del\", \"b\"}));\n}\n\nTEST_F(GenericFamilyTest, CopyBinary) {\n  const char kKey1[] = \"\\x01\\x02\\x03\\x04\";\n  const char kKey2[] = \"\\x05\\x06\\x07\\x08\";\n\n  Run({\"set\", kKey1, \"bar\"});\n  Run({\"COPY\", kKey1, kKey2});\n  EXPECT_EQ(Run({\"get\", kKey1}), \"bar\");\n  EXPECT_EQ(Run({\"get\", kKey2}), \"bar\");\n}\n\nTEST_F(GenericFamilyTest, CopyTTL) {\n  Run({\"setex\", \"k1\", \"10\", \"bar\"});\n\n  ASSERT_THAT(Run({\"COPY\", \"k1\", \"k2\"}), IntArg(1));\n  EXPECT_THAT(Run({\"ttl\", \"k2\"}), 10);\n}\n\nTEST_F(GenericFamilyTest, CopySameName) {\n  ASSERT_THAT(Run({\"COPY\", \"k1\", \"k1\"}), ErrArg(\"source and destination objects are the same\"));\n\n  ASSERT_EQ(Run({\"set\", \"k1\", \"v\"}), \"OK\");\n  ASSERT_THAT(Run({\"COPY\", \"k1\", \"k1\"}), ErrArg(\"source and destination objects are the same\"));\n}\n\nTEST_F(GenericFamilyTest, CopyToDB) {\n  // we don't support DB arg for now\n  ASSERT_THAT(Run({\"COPY\", \"k1\", \"k1\", \"DB\", \"SOME_DB\"}), ErrArg(\"syntax error\"));\n}\n\nTEST_F(GenericFamilyTest, CopyKeyExists) {\n  Run({\"set\", \"source\", \"value1\"});\n  Run({\"set\", \"destination\", \"value2\"});\n\n  ASSERT_THAT(Run({\"COPY\", \"source\", \"destination\"}), IntArg(0));\n\n  EXPECT_EQ(Run({\"get\", \"destination\"}), \"value2\");\n  EXPECT_EQ(Run({\"get\", \"source\"}), \"value1\");\n\n  ASSERT_THAT(Run({\"COPY\", \"source\", \"destination\", \"REPLACE\"}), IntArg(1));\n  EXPECT_EQ(Run({\"get\", \"destination\"}), \"value1\");\n}\n\nTEST_F(GenericFamilyTest, HashFieldExpiryDuringDeserialize) {\n  Run({\"HSETEX\", \"src\", \"1\", \"field1\", \"value1\"});\n\n  // Advance time past field TTL - now field is expired\n  AdvanceTime(2000);\n\n  Run({\"RENAME\", \"src\", \"dst\"});\n}\n\nTEST_F(GenericFamilyTest, SortNegativeLimit) {\n  Run({\"lpush\", \"list-neg\", \"1\", \"2\", \"3\", \"4\", \"5\"});\n\n  // Negative offset\n  auto resp = Run({\"sort\", \"list-neg\", \"LIMIT\", \"-1\", \"2\"});\n  ASSERT_THAT(resp, ErrArg(\"value is not an integer\"));\n\n  // Negative limit\n  resp = Run({\"sort\", \"list-neg\", \"LIMIT\", \"0\", \"-1\"});\n  ASSERT_THAT(resp, ErrArg(\"value is not an integer\"));\n\n  // Both negative\n  resp = Run({\"sort\", \"list-neg\", \"LIMIT\", \"-1\", \"-1\"});\n  ASSERT_THAT(resp, ErrArg(\"value is not an integer\"));\n}\n\nTEST_F(GenericFamilyTest, SortBy) {\n  Run({\"del\", \"list-1\"});\n  Run({\"lpush\", \"list-1\", \"1\", \"2\", \"3\"});\n  Run({\"set\", \"w_1\", \"30\"});\n  Run({\"set\", \"w_2\", \"20\"});\n  Run({\"set\", \"w_3\", \"10\"});\n\n  // standard sort\n  auto resp = Run({\"sort\", \"list-1\", \"BY\", \"w_*\"});\n  ASSERT_THAT(resp, RespElementsAre(\"3\", \"2\", \"1\"));\n\n  // desc\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"BY\", \"w_*\", \"DESC\"}), RespElementsAre(\"1\", \"2\", \"3\"));\n\n  // alpha\n  Run({\"set\", \"s_1\", \"c\"});\n  Run({\"set\", \"s_2\", \"b\"});\n  Run({\"set\", \"s_3\", \"a\"});\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"BY\", \"s_*\", \"ALPHA\"}), RespElementsAre(\"3\", \"2\", \"1\"));\n\n  // nosort, lpush reverses order, so 3, 2, 1 is insertion order (or close to it)\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"BY\", \"nosort\"}), RespElementsAre(\"3\", \"2\", \"1\"));\n\n  // missing keys -> 0\n  Run({\"del\", \"w_1\"});\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"BY\", \"w_*\"}), RespElementsAre(\"1\", \"3\", \"2\"));  // 0, 10, 20\n\n  // BY pattern with LIMIT - test pagination works correctly\n  Run({\"set\", \"w_1\", \"30\"});  // restore w_1\n  // Sorted order: 3 (w_3=10), 2 (w_2=20), 1 (w_1=30). LIMIT 1 2 skips first, returns next 2\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"BY\", \"w_*\", \"LIMIT\", \"1\", \"2\"}), RespElementsAre(\"2\", \"1\"));\n  // multiple asterisks should result in syntax error\n  ASSERT_THAT(Run({\"sort\", \"list-1\", \"BY\", \"w_*_*\"}), ErrArg(\"syntax error\"));\n}\n\nTEST_F(GenericFamilyTest, SortGet) {\n  // Setup test data\n  Run({\"del\", \"mylist\"});\n  Run({\"lpush\", \"mylist\", \"1\", \"2\", \"3\"});\n  Run({\"set\", \"obj_1\", \"first\"});\n  Run({\"set\", \"obj_2\", \"second\"});\n  Run({\"set\", \"obj_3\", \"third\"});\n  Run({\"set\", \"weight_1\", \"30\"});\n  Run({\"set\", \"weight_2\", \"20\"});\n  Run({\"set\", \"weight_3\", \"10\"});\n\n  // Test 1: Basic GET with single pattern (sorted numerically: 1,2,3)\n  auto resp = Run({\"sort\", \"mylist\", \"GET\", \"obj_*\"});\n  ASSERT_THAT(resp, RespElementsAre(\"first\", \"second\", \"third\"));\n\n  // Test 2: GET with special # pattern (returns element itself, sorted: 1,2,3)\n  resp = Run({\"sort\", \"mylist\", \"GET\", \"#\"});\n  ASSERT_THAT(resp, RespElementsAre(\"1\", \"2\", \"3\"));\n\n  // Test 3: Multiple GET patterns\n  resp = Run({\"sort\", \"mylist\", \"GET\", \"#\", \"GET\", \"obj_*\"});\n  ASSERT_THAT(resp, RespElementsAre(\"1\", \"first\", \"2\", \"second\", \"3\", \"third\"));\n\n  // Test 4: GET with BY pattern (sorted by weight: 3(10), 2(20), 1(30))\n  resp = Run({\"sort\", \"mylist\", \"BY\", \"weight_*\", \"GET\", \"obj_*\"});\n  ASSERT_THAT(resp, RespElementsAre(\"third\", \"second\", \"first\"));\n\n  // Test 5: Multiple GET patterns with BY\n  resp = Run({\"sort\", \"mylist\", \"BY\", \"weight_*\", \"GET\", \"#\", \"GET\", \"obj_*\"});\n  ASSERT_THAT(resp, RespElementsAre(\"3\", \"third\", \"2\", \"second\", \"1\", \"first\"));\n\n  // Test 6: GET with missing keys (should return empty strings, sorted: 1,2,3)\n  Run({\"del\", \"obj_2\"});\n  resp = Run({\"sort\", \"mylist\", \"GET\", \"obj_*\"});\n  ASSERT_THAT(resp, RespElementsAre(\"first\", \"\", \"third\"));\n\n  // Restore obj_2 for further tests\n  Run({\"set\", \"obj_2\", \"second\"});\n\n  // Test 7: GET with DESC (sorted DESC: 3,2,1)\n  resp = Run({\"sort\", \"mylist\", \"DESC\", \"GET\", \"obj_*\"});\n  ASSERT_THAT(resp, RespElementsAre(\"third\", \"second\", \"first\"));\n\n  // Test 8: GET with ALPHA\n  Run({\"del\", \"strlist\"});\n  Run({\"lpush\", \"strlist\", \"c\", \"b\", \"a\"});\n  Run({\"set\", \"obj_a\", \"alpha\"});\n  Run({\"set\", \"obj_b\", \"beta\"});\n  Run({\"set\", \"obj_c\", \"gamma\"});\n  resp = Run({\"sort\", \"strlist\", \"ALPHA\", \"GET\", \"obj_*\"});\n  ASSERT_THAT(resp, RespElementsAre(\"alpha\", \"beta\", \"gamma\"));\n\n  // Test 9: GET with LIMIT\n  resp = Run({\"sort\", \"mylist\", \"GET\", \"#\", \"GET\", \"obj_*\", \"LIMIT\", \"1\", \"2\"});\n  ASSERT_THAT(resp, RespElementsAre(\"2\", \"second\", \"3\", \"third\"));\n\n  // Test 10: GET with STORE\n  resp = Run({\"sort\", \"mylist\", \"GET\", \"#\", \"GET\", \"obj_*\", \"STORE\", \"result\"});\n  ASSERT_THAT(resp, IntArg(6));  // 3 elements * 2 GET patterns = 6 stored values\n  resp = Run({\"lrange\", \"result\", \"0\", \"-1\"});\n  ASSERT_THAT(resp, RespElementsAre(\"1\", \"first\", \"2\", \"second\", \"3\", \"third\"));\n\n  // Test 11: GET with BY nosort\n  resp = Run({\"sort\", \"mylist\", \"BY\", \"nosort\", \"GET\", \"obj_*\"});\n  ASSERT_THAT(resp, RespElementsAre(\"third\", \"second\", \"first\"));  // insertion order\n\n  // Test 12: GET pattern validation (multiple asterisks should error)\n  ASSERT_THAT(Run({\"sort\", \"mylist\", \"GET\", \"obj_*_*\"}), ErrArg(\"syntax error\"));\n\n  // Test 13: GET with empty list\n  Run({\"del\", \"emptylist\"});\n  Run({\"lpush\", \"emptylist\", \"placeholder\"});\n  Run({\"lpop\", \"emptylist\"});\n  resp = Run({\"sort\", \"emptylist\", \"GET\", \"obj_*\"});\n  ASSERT_THAT(resp, ArrLen(0));\n\n  // Test 14: GET with literal pattern (no asterisk)\n  Run({\"set\", \"fixed_key\", \"fixed_value\"});\n  resp = Run({\"sort\", \"mylist\", \"GET\", \"fixed_key\"});\n  ASSERT_THAT(resp, RespElementsAre(\"fixed_value\", \"fixed_value\", \"fixed_value\"));\n\n  // Test 15: SORT_RO with GET\n  resp = Run({\"sort_ro\", \"mylist\", \"GET\", \"#\", \"GET\", \"obj_*\"});\n  ASSERT_THAT(resp, RespElementsAre(\"1\", \"first\", \"2\", \"second\", \"3\", \"third\"));\n}\n\nTEST_F(GenericFamilyTest, Delex) {\n  // DELEX without condition behaves like DEL\n  Run({\"set\", \"key1\", \"value1\"});\n  EXPECT_EQ(1, CheckedInt({\"delex\", \"key1\"}));\n  EXPECT_THAT(Run({\"get\", \"key1\"}), ArgType(RespExpr::NIL));\n\n  // DELEX on non-existent key returns 0\n  EXPECT_EQ(0, CheckedInt({\"delex\", \"nonexistent\"}));\n\n  // DELEX IFEQ deletes when values match\n  Run({\"set\", \"key2\", \"value2\"});\n  EXPECT_EQ(1, CheckedInt({\"delex\", \"key2\", \"IFEQ\", \"value2\"}));\n  EXPECT_THAT(Run({\"get\", \"key2\"}), ArgType(RespExpr::NIL));\n\n  // DELEX IFEQ does not delete when values differ\n  Run({\"set\", \"key3\", \"value3\"});\n  EXPECT_EQ(0, CheckedInt({\"delex\", \"key3\", \"IFEQ\", \"wrongvalue\"}));\n  EXPECT_EQ(Run({\"get\", \"key3\"}), \"value3\");\n\n  // DELEX IFNE deletes when values differ\n  Run({\"set\", \"key4\", \"value4\"});\n  EXPECT_EQ(1, CheckedInt({\"delex\", \"key4\", \"IFNE\", \"differentvalue\"}));\n  EXPECT_THAT(Run({\"get\", \"key4\"}), ArgType(RespExpr::NIL));\n\n  // DELEX IFNE does not delete when values match\n  Run({\"set\", \"key5\", \"value5\"});\n  EXPECT_EQ(0, CheckedInt({\"delex\", \"key5\", \"IFNE\", \"value5\"}));\n  EXPECT_EQ(Run({\"get\", \"key5\"}), \"value5\");\n\n  // DELEX IFDEQ tests - get digest first and use it\n  Run({\"set\", \"key6\", \"value6\"});\n  auto digest = Run({\"digest\", \"key6\"});\n  string_view digest_str = ToSV(digest.GetBuf());\n  EXPECT_EQ(1, CheckedInt({\"delex\", \"key6\", \"IFDEQ\", string(digest_str)}));\n  EXPECT_THAT(Run({\"get\", \"key6\"}), ArgType(RespExpr::NIL));\n\n  // DELEX IFDEQ does not delete when digests differ\n  Run({\"set\", \"key7\", \"value7\"});\n  EXPECT_EQ(0, CheckedInt({\"delex\", \"key7\", \"IFDEQ\", \"0000000000000000\"}));\n  EXPECT_EQ(Run({\"get\", \"key7\"}), \"value7\");\n\n  // DELEX IFDNE deletes when digests differ\n  Run({\"set\", \"key8\", \"value8\"});\n  EXPECT_EQ(1, CheckedInt({\"delex\", \"key8\", \"IFDNE\", \"0000000000000000\"}));\n  EXPECT_THAT(Run({\"get\", \"key8\"}), ArgType(RespExpr::NIL));\n\n  // DELEX IFDNE does not delete when digests match\n  Run({\"set\", \"key9\", \"value9\"});\n  auto digest9 = Run({\"digest\", \"key9\"});\n  string_view digest9_str = ToSV(digest9.GetBuf());\n  EXPECT_EQ(0, CheckedInt({\"delex\", \"key9\", \"IFDNE\", string(digest9_str)}));\n  EXPECT_EQ(Run({\"get\", \"key9\"}), \"value9\");\n\n  Run({\"lpush\", \"list1\", \"item\"});\n  EXPECT_THAT(Run({\"delex\", \"list1\", \"IFEQ\", \"item\"}), ErrArg(\"WRONGTYPE\"));\n\n  // DELEX with invalid option returns syntax error\n  Run({\"set\", \"key10\", \"value10\"});\n  EXPECT_THAT(Run({\"delex\", \"key10\", \"INVALID\", \"value\"}), ErrArg(\"Unknown subcommand\"));\n\n  // DELEX with too many arguments returns error\n  EXPECT_THAT(Run({\"delex\", \"key\", \"IFEQ\", \"val\", \"extra\"}), ErrArg(\"wrong number of arguments\"));\n\n  EXPECT_THAT(Run({\"delex\", \"key11\", \"randomarg\"}), ErrArg(\"wrong number of arguments\"));\n  EXPECT_THAT(Run({\"delex\", \"key12\", \"IFEQ\"}), ErrArg(\"wrong number of arguments\"));\n  EXPECT_THAT(Run({\"delex\", \"key13\", \"xyz\"}), ErrArg(\"wrong number of arguments\"));\n}\n\nTEST_F(GenericFamilyTest, Rm) {\n  // Basic: RM 0 on empty db returns [0, 0]\n  auto resp = Run({\"rm\", \"0\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec()[0], \"0\");\n  EXPECT_THAT(resp.GetVec()[1], IntArg(0));\n\n  // With MATCH arg — still parses OK\n  resp = Run({\"rm\", \"0\", \"match\", \"foo*\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec()[1], IntArg(0));\n\n  // With TYPE arg — still parses OK\n  resp = Run({\"rm\", \"0\", \"type\", \"string\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec()[1], IntArg(0));\n\n  // With COUNT arg — still parses OK\n  resp = Run({\"rm\", \"0\", \"match\", \"foo*\", \"count\", \"100\"});\n  ASSERT_THAT(resp, ArrLen(2));\n\n  // Invalid cursor → error\n  resp = Run({\"rm\", \"notanumber\"});\n  EXPECT_THAT(resp, ErrArg(\"invalid cursor\"));\n\n  // Invalid options → syntax error\n  resp = Run({\"rm\", \"0\", \"badopt\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax\"));\n}\n\nTEST_F(GenericFamilyTest, RmDeletesMatchingKeys) {\n  for (int i = 0; i < 10; ++i)\n    Run({\"set\", absl::StrCat(\"foo\", i), \"val\"});\n  for (int i = 0; i < 5; ++i)\n    Run({\"set\", absl::StrCat(\"bar\", i), \"val\"});\n\n  // Delete all foo* keys by iterating until cursor returns 0\n  uint32_t total_deleted = 0;\n  uint64_t cursor = 0;\n  do {\n    auto resp = Run({\"rm\", absl::StrCat(cursor), \"match\", \"foo*\", \"count\", \"100\"});\n    ASSERT_THAT(resp, ArrLen(2));\n    ASSERT_TRUE(absl::SimpleAtoi(resp.GetVec()[0].GetString(), &cursor));\n    total_deleted += resp.GetVec()[1].GetInt().value();\n  } while (cursor != 0);\n\n  EXPECT_EQ(total_deleted, 10u);\n\n  // foo* keys are gone, bar* keys remain\n  EXPECT_EQ(Run({\"exists\", \"foo0\"}), 0);\n  EXPECT_EQ(Run({\"exists\", \"bar0\"}), 1);\n  EXPECT_EQ(Run({\"dbsize\"}), 5);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/geo_family.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/strings/ascii.h>\n\nextern \"C\" {\n#include \"redis/geo.h\"\n#include \"redis/geohash.h\"\n#include \"redis/geohash_helper.h\"\n#include \"redis/redis_aux.h\"\n#include \"redis/util.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#include \"base/logging.h\"\n#include \"core/sorted_map.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/error.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/command_families.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/family_utils.h\"\n#include \"server/transaction.h\"\n#include \"server/zset_family.h\"\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace facade;\nusing absl::SimpleAtoi;\nnamespace {\n\nusing CI = CommandId;\n\nenum Errors {\n  INVALID_LONG_LAT = CmdArgParser::ErrorType::CUSTOM_ERROR,\n  INVALID_UNIT = INVALID_LONG_LAT + 1,\n};\n\nconst char kNxXxErr[] = \"XX and NX options at the same time are not compatible\";\nconst char kFromMemberLonglatErr[] =\n    \"FROMMEMBER and FROMLONLAT options at the same time are not compatible\";\nconst char kByRadiusBoxErr[] = \"BYRADIUS and BYBOX options at the same time are not compatible\";\nconst char kAscDescErr[] = \"ASC and DESC options at the same time are not compatible\";\nconst char kStoreTypeErr[] = \"STORE and STOREDIST options at the same time are not compatible\";\nconst char kStoreCompatRadErr[] =\n    \"STORE option in GEORADIUS is not compatible with WITHDIST, WITHHASH and WITHCOORDS options\";\nconst char kStoreCompatByMemberErr[] =\n    \"STORE option in GEORADIUSBYMEMBER is not compatible with WITHDIST, WITHHASH and WITHCOORDS \"\n    \"options\";\nconst char kMemberNotFound[] = \"could not decode requested zset member\";\nconst char kInvalidUnit[] = \"unsupported unit provided. please use M, KM, FT, MI\";\nconst char kCountError[] = \"ERR COUNT must be > 0\";\nconstexpr string_view kGeoAlphabet = \"0123456789bcdefghjkmnpqrstuvwxyz\"sv;\n\nenum class Type {\n  FROMMEMBER,\n  FROMLONLAT,\n  BYRADIUS,\n  BYBOX,\n  ASC,\n  DESC,\n  COUNT,\n  WITHCOORD,\n  WITHDIST,\n  WITHHASH,\n\n  STORE,\n  STOREDIST\n};\n\nusing MScoreResponse = std::vector<std::optional<double>>;\n\nusing ScoredMember = std::pair<std::string, double>;\nusing ScoredArray = std::vector<ScoredMember>;\nusing ScoredMemberView = std::pair<double, std::string_view>;\nusing ScoredMemberSpan = absl::Span<const ScoredMemberView>;\n\nstruct GeoPoint {\n  double longitude;\n  double latitude;\n  double dist;\n  double score;\n  std::string member;\n  GeoPoint() : longitude(0.0), latitude(0.0), dist(0.0), score(0.0){};\n  GeoPoint(double _longitude, double _latitude, double _dist, double _score,\n           const std::string& _member)\n      : longitude(_longitude), latitude(_latitude), dist(_dist), score(_score), member(_member){};\n};\nusing GeoArray = std::vector<GeoPoint>;\n\nenum class Sorting { kUnsorted, kAsc, kDesc, kError };\nenum class GeoStoreType { kNoStore, kStoreHash, kStoreDist, kError };\nstruct GeoSearchOpts {\n  double conversion = 0;\n  uint64_t count = std::numeric_limits<uint64_t>::max();\n  Sorting sorting = Sorting::kUnsorted;\n  bool any = 0;\n  bool withdist = 0;\n  bool withcoord = 0;\n  bool withhash = 0;\n  GeoStoreType store = GeoStoreType::kNoStore;\n  string_view store_key;\n\n  bool HasWithStatement() const {\n    return withdist || withcoord || withhash;\n  }\n};\n\nbool ValidateLongLat(double longitude, double latitude) {\n  return !(longitude < GEO_LONG_MIN || longitude > GEO_LONG_MAX || latitude < GEO_LAT_MIN ||\n           latitude > GEO_LAT_MAX);\n}\n\nvoid ParseLongLat(CmdArgParser* parser, double lonlat[2]) {\n  std::tie(lonlat[0], lonlat[1]) = parser->Next<double, double>();\n\n  if (!ValidateLongLat(lonlat[0], lonlat[1])) {\n    parser->Report(Errors::INVALID_LONG_LAT);\n  }\n}\n\nbool ParseLongLat(string_view lon, string_view lat, std::pair<double, double>* res) {\n  if (!ParseDouble(lon, &res->first))\n    return false;\n\n  if (!ParseDouble(lat, &res->second))\n    return false;\n\n  return ValidateLongLat(res->first, res->second);\n}\n\nbool ScoreToLongLat(const std::optional<double>& val, double* xy) {\n  if (!val.has_value())\n    return false;\n\n  double score = *val;\n\n  GeoHashBits hash = {.bits = (uint64_t)score, .step = GEO_STEP_MAX};\n\n  return geohashDecodeToLongLatType(hash, xy) == 1;\n}\n\nbool ToAsciiGeoHash(const std::optional<double>& val, array<char, 12>* buf) {\n  if (!val.has_value())\n    return false;\n\n  double score = *val;\n\n  GeoHashBits hash = {.bits = (uint64_t)score, .step = GEO_STEP_MAX};\n\n  double xy[2];\n  if (!geohashDecodeToLongLatType(hash, xy)) {\n    return false;\n  }\n\n  /* Re-encode */\n  GeoHashRange r[2];\n  r[0].min = -180;\n  r[0].max = 180;\n  r[1].min = -90;\n  r[1].max = 90;\n\n  geohashEncode(&r[0], &r[1], xy[0], xy[1], 26, &hash);\n\n  for (int i = 0; i < 11; i++) {\n    int idx;\n    if (i == 10) {\n      /* We have just 52 bits, but the API used to output\n       * an 11 bytes geohash. For compatibility we assume\n       * zero. */\n      idx = 0;\n    } else {\n      idx = (hash.bits >> (52 - ((i + 1) * 5))) % kGeoAlphabet.size();\n    }\n    (*buf)[i] = kGeoAlphabet[idx];\n  }\n  (*buf)[11] = '\\0';\n\n  return true;\n}\n\ndouble ExtractUnit(CmdArgParser* parser) {\n  auto unit = parser->TryMapNext(\"M\", 1.0, \"KM\", 1000.0, \"FT\", 0.3048, \"MI\", 1609.34);\n  if (!unit)\n    parser->Report(Errors::INVALID_UNIT);\n  return unit.value_or(-1);\n}\n\ndouble ExtractUnit(std::string_view arg) {\n  const string unit = absl::AsciiStrToUpper(arg);\n  if (unit == \"M\") {\n    return 1;\n  } else if (unit == \"KM\") {\n    return 1000;\n  } else if (unit == \"FT\") {\n    return 0.3048;\n  } else if (unit == \"MI\") {\n    return 1609.34;\n  } else {\n    return -1;\n  }\n}\n\nbool HandleGeoParserFinalize(const GeoShape& shape, CmdArgParser* parser,\n                             CommandContext* cmd_cntx) {\n  if (parser->Finalize()) {\n    return false;\n  }\n\n  auto error = parser->TakeError();\n  switch (error.type) {\n    case Errors::INVALID_LONG_LAT: {\n      string err =\n          absl::StrCat(\"-ERR invalid longitude,latitude pair \", shape.xy[0], \",\", shape.xy[1]);\n      cmd_cntx->SendError(err, kSyntaxErrType);\n      break;\n    }\n    case Errors::INVALID_UNIT:\n      cmd_cntx->SendError(\"Unsupported unit provided. please use M, KM, FT, MI\", kSyntaxErrType);\n      break;\n    default:\n      cmd_cntx->SendError(error.MakeReply());\n      break;\n  }\n\n  return true;\n}\n\nvoid CmdGeoAdd(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  ZSetFamily::ZParams zparams;\n  size_t i = 1;\n  for (; i < args.size(); ++i) {\n    string cur_arg = absl::AsciiStrToUpper(ArgS(args, i));\n\n    if (cur_arg == \"XX\") {\n      zparams.flags |= ZADD_IN_XX;  // update only\n    } else if (cur_arg == \"NX\") {\n      zparams.flags |= ZADD_IN_NX;  // add new only.\n    } else if (cur_arg == \"CH\") {\n      zparams.ch = true;\n    } else {\n      break;\n    }\n  }\n\n  auto* builder = cmd_cntx->rb();\n  args.remove_prefix(i);\n  if (args.empty() || args.size() % 3 != 0) {\n    builder->SendError(kSyntaxErr);\n    return;\n  }\n\n  if ((zparams.flags & ZADD_IN_NX) && (zparams.flags & ZADD_IN_XX)) {\n    builder->SendError(kNxXxErr);\n    return;\n  }\n\n  absl::InlinedVector<ScoredMemberView, 4> members;\n  for (i = 0; i < args.size(); i += 3) {\n    string_view longitude = ArgS(args, i);\n    string_view latitude = ArgS(args, i + 1);\n    string_view member = ArgS(args, i + 2);\n\n    pair<double, double> longlat;\n\n    if (!ParseLongLat(longitude, latitude, &longlat)) {\n      string err = absl::StrCat(\"-ERR invalid longitude,latitude pair \", longitude, \",\", latitude,\n                                \",\", member);\n\n      return builder->SendError(err, kSyntaxErrType);\n    }\n\n    /* Turn the coordinates into the score of the element. */\n    GeoHashBits hash;\n    geohashEncodeWGS84(longlat.first, longlat.second, GEO_STEP_MAX, &hash);\n    GeoHashFix52Bits bits = geohashAlign52Bits(hash);\n\n    members.emplace_back(bits, member);\n  }\n  DCHECK(cmd_cntx->tx());\n\n  absl::Span memb_sp{members.data(), members.size()};\n  ZSetFamily::ZAddGeneric(key, zparams, memb_sp, cmd_cntx);\n}\n\nvoid CmdGeoHash(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  OpResult<MScoreResponse> result = ZSetFamily::ZGetMembers(args, cmd_cntx->tx(), rb);\n\n  if (result.status() == OpStatus::WRONG_TYPE) {\n    return rb->SendError(kWrongTypeErr);\n  }\n\n  RedisReplyBuilder::ArrayScope scope{rb, result->size()};\n  array<char, 12> buf;\n  for (const auto& p : result.value()) {\n    if (ToAsciiGeoHash(p, &buf)) {\n      rb->SendBulkString(string_view{buf.data(), buf.size() - 1});\n    } else {\n      rb->SendNull();\n    }\n  }\n}\n\nvoid CmdGeoPos(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  OpResult<MScoreResponse> result = ZSetFamily::ZGetMembers(args, cmd_cntx->tx(), rb);\n\n  if (result.status() != OpStatus::OK) {\n    return rb->SendError(result.status());\n  }\n\n  RedisReplyBuilder::ArrayScope scope{rb, result->size()};\n  double xy[2];\n  for (const auto& p : result.value()) {\n    if (ScoreToLongLat(p, xy)) {\n      rb->StartArray(2);\n      rb->SendDouble(xy[0]);\n      rb->SendDouble(xy[1]);\n    } else {\n      rb->SendNull();\n    }\n  }\n}\n\nvoid CmdGeoDist(CmdArgList args, CommandContext* cmd_cntx) {\n  double distance_multiplier = 1;\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (args.size() == 4) {\n    string_view unit = ArgS(args, 3);\n    distance_multiplier = ExtractUnit(unit);\n    args.remove_suffix(1);\n    if (distance_multiplier < 0) {\n      return rb->SendError(kInvalidUnit);\n    }\n  } else if (args.size() != 3) {\n    return rb->SendError(kSyntaxErr);\n  }\n\n  OpResult<MScoreResponse> result = ZSetFamily::ZGetMembers(args, cmd_cntx->tx(), rb);\n\n  if (result.status() != OpStatus::OK) {\n    return rb->SendError(result.status());\n  }\n\n  const MScoreResponse& arr = result.value();\n\n  if (arr.size() != 2) {\n    return rb->SendError(kSyntaxErr);\n  }\n\n  double xyxy[4];  // 2 pairs of score holding 2 locations\n  for (size_t i = 0; i < arr.size(); i++) {\n    if (!ScoreToLongLat(arr[i], xyxy + (i * 2))) {\n      return rb->SendNull();\n    }\n  }\n\n  return rb->SendDouble(geohashGetDistance(xyxy[0], xyxy[1], xyxy[2], xyxy[3]) /\n                        distance_multiplier);\n}\n\nnamespace {\nstd::vector<ZSetFamily::ZRangeSpec> GetGeoRangeSpec(const GeoHashRadius& n) {\n  array<GeoHashBits, 9> neighbors;\n  unsigned int last_processed = 0;\n\n  neighbors[0] = n.hash;\n  neighbors[1] = n.neighbors.north;\n  neighbors[2] = n.neighbors.south;\n  neighbors[3] = n.neighbors.east;\n  neighbors[4] = n.neighbors.west;\n  neighbors[5] = n.neighbors.north_east;\n  neighbors[6] = n.neighbors.north_west;\n  neighbors[7] = n.neighbors.south_east;\n  neighbors[8] = n.neighbors.south_west;\n\n  // Get range_specs for neighbors (*and* our own hashbox)\n  std::vector<ZSetFamily::ZRangeSpec> range_specs;\n  for (unsigned int i = 0; i < neighbors.size(); i++) {\n    if (HASHISZERO(neighbors[i])) {\n      continue;\n    }\n\n    // When a huge Radius (in the 5000 km range or more) is used,\n    // adjacent neighbors can be the same, leading to duplicated\n    // elements. Skip every range which is the same as the one\n    // processed previously.\n    if (last_processed && neighbors[i].bits == neighbors[last_processed].bits &&\n        neighbors[i].step == neighbors[last_processed].step) {\n      continue;\n    }\n\n    GeoHashFix52Bits min, max;\n    scoresOfGeoHashBox(neighbors[i], &min, &max);\n\n    ZSetFamily::ScoreInterval si;\n    si.first = ZSetFamily::Bound{static_cast<double>(min), false};\n    si.second = ZSetFamily::Bound{static_cast<double>(max), true};\n\n    ZSetFamily::RangeParams range_params;\n    range_params.interval_type = ZSetFamily::RangeParams::IntervalType::SCORE;\n    range_params.with_scores = true;\n    range_specs.emplace_back(si, range_params);\n\n    last_processed = i;\n  }\n  return range_specs;\n}\n\nvoid SortIfNeeded(GeoArray* ga, Sorting sorting, uint64_t count) {\n  if (sorting == Sorting::kUnsorted) {\n    if (count && ga->size() > count) {\n      ga->resize(count);\n    }\n    return;\n  }\n\n  auto comparator = [&](const GeoPoint& a, const GeoPoint& b) {\n    if (sorting == Sorting::kAsc) {\n      return a.dist < b.dist;\n    } else {\n      DCHECK(sorting == Sorting::kDesc);\n      return a.dist > b.dist;\n    }\n  };\n\n  if (count > 0) {\n    count = std::min(count, static_cast<uint64_t>(ga->size()));\n    std::partial_sort(ga->begin(), ga->begin() + count, ga->end(), comparator);\n    ga->resize(count);\n  } else {\n    std::sort(ga->begin(), ga->end(), comparator);\n  }\n}\n\nvoid GeoSearchStoreGeneric(Transaction* tx, facade::SinkReplyBuilder* builder,\n                           const GeoShape& shape_ref, string_view key, string_view member,\n                           const GeoSearchOpts& geo_ops) {\n  GeoShape* shape = &(const_cast<GeoShape&>(shape_ref));\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n\n  ShardId from_shard = Shard(key, shard_set->size());\n\n  if (!member.empty()) {\n    // get shape.xy from member\n    OpResult<double> member_score;\n    auto cb = [&](Transaction* t, EngineShard* shard) {\n      if (shard->shard_id() == from_shard) {\n        member_score = ZSetFamily::OpScore(t->GetOpArgs(shard), key, member);\n      }\n      return OpStatus::OK;\n    };\n    tx->Execute(std::move(cb), false);\n    auto member_sts = member_score.status();\n    if (member_sts != OpStatus::OK) {\n      tx->Conclude();\n      switch (member_sts) {\n        case OpStatus::WRONG_TYPE:\n          return builder->SendError(kWrongTypeErr);\n        case OpStatus::KEY_NOTFOUND:\n          return rb->StartArray(0);\n        case OpStatus::MEMBER_NOTFOUND:\n          return builder->SendError(kMemberNotFound);\n        default:\n          return builder->SendError(member_sts);\n      }\n    }\n    ScoreToLongLat(*member_score, shape->xy);\n  } else {\n    // verify key is valid\n    OpResult<void> result;\n    auto cb = [&](Transaction* t, EngineShard* shard) {\n      if (shard->shard_id() == from_shard) {\n        result = ZSetFamily::OpKeyExisted(t->GetOpArgs(shard), key);\n      }\n      return OpStatus::OK;\n    };\n    tx->Execute(std::move(cb), false);\n    auto result_sts = result.status();\n    if (result_sts != OpStatus::OK) {\n      tx->Conclude();\n      switch (result_sts) {\n        case OpStatus::WRONG_TYPE:\n          return builder->SendError(kWrongTypeErr);\n        case OpStatus::KEY_NOTFOUND:\n          return rb->StartArray(0);\n        default:\n          return builder->SendError(result_sts);\n      }\n    }\n  }\n  DCHECK(shape->xy[0] >= -180.0 && shape->xy[0] <= 180.0);\n  DCHECK(shape->xy[1] >= -90.0 && shape->xy[1] <= 90.0);\n\n  // query\n  GeoHashRadius georadius = geohashCalculateAreasByShapeWGS84(shape);\n  GeoArray ga;\n  auto range_specs = GetGeoRangeSpec(georadius);\n  // get all the matching members and add them to the potential result list\n  vector<OpResult<vector<ScoredArray>>> result_arrays;\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    auto res_it = ZSetFamily::OpRanges(range_specs, t->GetOpArgs(shard), key);\n    if (res_it) {\n      result_arrays.emplace_back(res_it);\n    }\n    return OpStatus::OK;\n  };\n\n  tx->Execute(std::move(cb), geo_ops.store == GeoStoreType::kNoStore);\n\n  // filter potential result list\n  double xy[2];\n  double distance;\n  unsigned long limit = geo_ops.any ? geo_ops.count : 0;\n  for (auto& result_array : result_arrays) {\n    for (auto& arr : *result_array) {\n      for (auto& p : arr) {\n        if (geoWithinShape(shape, p.second, xy, &distance) == 0) {\n          ga.emplace_back(xy[0], xy[1], distance, p.second, p.first);\n          if (limit > 0 && ga.size() >= limit)\n            break;\n        }\n      }\n    }\n  }\n\n  // sort and trim by count\n  SortIfNeeded(&ga, geo_ops.sorting, geo_ops.count);\n\n  if (geo_ops.store == GeoStoreType::kNoStore) {\n    // case 1: read mode\n    // case 2: write mode, kNoStore\n    // generate reply array withdist, withcoords, withhash\n    int record_size = 1;\n    if (geo_ops.withdist) {\n      record_size++;\n    }\n    if (geo_ops.withhash) {\n      record_size++;\n    }\n    if (geo_ops.withcoord) {\n      record_size++;\n    }\n\n    RedisReplyBuilder::ArrayScope scope{rb, ga.size()};\n    for (const auto& p : ga) {\n      // [member, dist, x, y, hash]\n      if (geo_ops.HasWithStatement()) {\n        rb->StartArray(record_size);\n      }\n      rb->SendBulkString(p.member);\n      if (geo_ops.withdist) {\n        rb->SendDouble(p.dist / geo_ops.conversion);\n      }\n      if (geo_ops.withhash) {\n        rb->SendDouble(p.score);\n      }\n      if (geo_ops.withcoord) {\n        rb->StartArray(2);\n        rb->SendDouble(p.longitude);\n        rb->SendDouble(p.latitude);\n      }\n    }\n  } else {\n    // case 3: write mode, !kNoStore\n    DCHECK(geo_ops.store == GeoStoreType::kStoreDist || geo_ops.store == GeoStoreType::kStoreHash);\n    ShardId dest_shard = Shard(geo_ops.store_key, shard_set->size());\n    DVLOG(1) << \"store shard:\" << dest_shard << \", key \" << geo_ops.store_key;\n\n    OpResult<ZSetFamily::AddResult> add_result;\n    vector<ScoredMemberView> smvec;\n    for (const auto& p : ga) {\n      if (geo_ops.store == GeoStoreType::kStoreDist) {\n        smvec.emplace_back(p.dist / geo_ops.conversion, p.member);\n      } else {\n        DCHECK(geo_ops.store == GeoStoreType::kStoreHash);\n        smvec.emplace_back(p.score, p.member);\n      }\n    }\n\n    auto store_cb = [&](Transaction* t, EngineShard* shard) {\n      if (shard->shard_id() == dest_shard) {\n        ZSetFamily::ZParams zparams;\n        zparams.override = true;\n        add_result = ZSetFamily::OpAdd(t->GetOpArgs(shard), zparams, geo_ops.store_key,\n                                       ScoredMemberSpan{smvec})\n                         .value();\n      }\n      return OpStatus::OK;\n    };\n\n    tx->Execute(std::move(store_cb), true);\n\n    rb->SendLong(smvec.size());\n  }\n}\n\n}  // namespace\n\nvoid CmdGeoSearch(CmdArgList args, CommandContext* cmd_cntx) {\n  GeoShape shape = {};\n  GeoSearchOpts geo_ops;\n  string_view member;\n\n  // FROMMEMBER or FROMLONLAT is set\n  int from_set = 0;\n  // BYRADIUS or BYBOX is set\n  int by_set = 0;\n  auto* builder = cmd_cntx->rb();\n\n  CmdArgParser parser(args);\n  string_view key = parser.Next();\n\n  while (parser.HasNext()) {\n    auto type = parser.MapNext(\n        \"FROMMEMBER\", Type::FROMMEMBER, \"FROMLONLAT\", Type::FROMLONLAT, \"BYRADIUS\", Type::BYRADIUS,\n        \"BYBOX\", Type::BYBOX, \"ASC\", Type::ASC, \"DESC\", Type::DESC, \"COUNT\", Type::COUNT,\n        \"WITHCOORD\", Type::WITHCOORD, \"WITHDIST\", Type::WITHDIST, \"WITHHASH\", Type::WITHHASH);\n\n    switch (type) {\n      case Type::FROMMEMBER:\n        ++from_set;\n        member = parser.Next();\n        break;\n      case Type::FROMLONLAT: {\n        ++from_set;\n        ParseLongLat(&parser, shape.xy);\n        break;\n      }\n      case Type::BYRADIUS:\n        ++by_set;\n        shape.t.radius = parser.Next<double>();\n        shape.conversion = ExtractUnit(&parser);\n        geo_ops.conversion = shape.conversion;\n        shape.type = CIRCULAR_TYPE;\n        break;\n      case Type::BYBOX: {\n        ++by_set;\n        std::tie(shape.t.r.width, shape.t.r.height) = parser.Next<double, double>();\n        shape.conversion = ExtractUnit(&parser);\n        geo_ops.conversion = shape.conversion;\n        shape.type = RECTANGLE_TYPE;\n        break;\n      }\n      case Type::ASC:\n        geo_ops.sorting = geo_ops.sorting == Sorting::kUnsorted ? Sorting::kAsc : Sorting::kError;\n        break;\n      case Type::DESC:\n        geo_ops.sorting = geo_ops.sorting == Sorting::kUnsorted ? Sorting::kDesc : Sorting::kError;\n        break;\n      case Type::COUNT:\n        geo_ops.count = parser.Next<uint64_t>();\n        geo_ops.any = parser.Check(\"ANY\");\n        break;\n      case Type::WITHCOORD:\n        geo_ops.withcoord = true;\n        break;\n      case Type::WITHDIST:\n        geo_ops.withdist = true;\n        break;\n      case Type::WITHHASH:\n        geo_ops.withhash = true;\n        break;\n      default:\n        return builder->SendError(kSyntaxErr);\n    }\n  }\n\n  if (HandleGeoParserFinalize(shape, &parser, cmd_cntx)) {\n    return;\n  }\n\n  // check mandatory options\n  if (from_set == 0 || by_set == 0) {\n    return builder->SendError(kSyntaxErr);\n  } else if (from_set > 1) {\n    return builder->SendError(kFromMemberLonglatErr);\n  } else if (by_set > 1) {\n    return builder->SendError(kByRadiusBoxErr);\n  } else if (geo_ops.sorting == Sorting::kError) {\n    return builder->SendError(kAscDescErr);\n  } else if (geo_ops.count == 0) {\n    return builder->SendError(kCountError);\n  }\n\n  geo_ops.count = (geo_ops.count == UINT64_MAX) ? 0 : geo_ops.count;\n  GeoSearchStoreGeneric(cmd_cntx->tx(), builder, shape, key, member, geo_ops);\n}\n\nvoid GeoRadiusByMemberGeneric(CmdArgList args, CommandContext* cmd_cntx, bool read_only) {\n  GeoShape shape = {};\n  GeoSearchOpts geo_ops;\n  // parse arguments\n  string_view key = ArgS(args, 0);\n  // member to latlong, set shape.xy\n  string_view member = ArgS(args, 1);\n\n  auto* builder = cmd_cntx->rb();\n  if (!ParseDouble(ArgS(args, 2), &shape.t.radius)) {\n    return builder->SendError(kInvalidFloatErr);\n  }\n  string_view unit = ArgS(args, 3);\n  shape.conversion = ExtractUnit(unit);\n  geo_ops.conversion = shape.conversion;\n  if (shape.conversion == -1) {\n    return builder->SendError(\"unsupported unit provided. please use M, KM, FT, MI\");\n  }\n  shape.type = CIRCULAR_TYPE;\n\n  for (size_t i = 4; i < args.size(); ++i) {\n    string cur_arg = absl::AsciiStrToUpper(ArgS(args, i));\n\n    if (cur_arg == \"ASC\") {\n      if (geo_ops.sorting != Sorting::kUnsorted) {\n        return builder->SendError(kAscDescErr);\n      }\n      geo_ops.sorting = Sorting::kAsc;\n    } else if (cur_arg == \"DESC\") {\n      if (geo_ops.sorting != Sorting::kUnsorted) {\n        return builder->SendError(kAscDescErr);\n      }\n      geo_ops.sorting = Sorting::kDesc;\n    } else if (cur_arg == \"COUNT\") {\n      if (i + 1 < args.size() && absl::SimpleAtoi(ArgS(args, i + 1), &geo_ops.count)) {\n        i++;\n        if (geo_ops.count == 0) {\n          return builder->SendError(kCountError);\n        }\n      } else {\n        return builder->SendError(kSyntaxErr);\n      }\n      if (i + 1 < args.size() && ArgS(args, i + 1) == \"ANY\") {\n        geo_ops.any = true;\n        i++;\n      }\n    } else if (cur_arg == \"WITHCOORD\") {\n      geo_ops.withcoord = true;\n    } else if (cur_arg == \"WITHDIST\") {\n      geo_ops.withdist = true;\n    } else if (cur_arg == \"WITHHASH\") {\n      geo_ops.withhash = true;\n    } else if (cur_arg == \"STORE\" && !read_only) {\n      if (geo_ops.store != GeoStoreType::kNoStore) {\n        return builder->SendError(kStoreTypeErr);\n      }\n      if (i + 1 < args.size()) {\n        geo_ops.store_key = ArgS(args, i + 1);\n        geo_ops.store = GeoStoreType::kStoreHash;\n        i++;\n      } else {\n        return builder->SendError(kSyntaxErr);\n      }\n    } else if (cur_arg == \"STOREDIST\" && !read_only) {\n      if (geo_ops.store != GeoStoreType::kNoStore) {\n        return builder->SendError(kStoreTypeErr);\n      }\n      if (i + 1 < args.size()) {\n        geo_ops.store_key = ArgS(args, i + 1);\n        geo_ops.store = GeoStoreType::kStoreDist;\n        i++;\n      } else {\n        return builder->SendError(kSyntaxErr);\n      }\n    } else {\n      return builder->SendError(kSyntaxErr);\n    }\n  }\n\n  if ((geo_ops.withcoord || geo_ops.withdist || geo_ops.withhash) &&\n      geo_ops.store != GeoStoreType::kNoStore) {\n    return builder->SendError(kStoreCompatByMemberErr);\n  }\n\n  geo_ops.count = (geo_ops.count == UINT64_MAX) ? 0 : geo_ops.count;\n  GeoSearchStoreGeneric(cmd_cntx->tx(), builder, shape, key, member, geo_ops);\n}\n\nvoid GeoRadiusGeneric(CmdArgList args, CommandContext* cmd_cntx, bool read_only) {\n  GeoShape shape = {};\n  GeoSearchOpts geo_ops;\n\n  auto* builder = cmd_cntx->rb();\n\n  CmdArgParser parser(args);\n\n  string_view key = parser.Next();\n  ParseLongLat(&parser, shape.xy);\n  shape.t.radius = parser.Next<double>();\n  shape.conversion = ExtractUnit(&parser);\n  geo_ops.conversion = shape.conversion;\n  shape.type = CIRCULAR_TYPE;\n\n  while (parser.HasNext()) {\n    // try and parse for only RO options first\n    auto type =\n        parser.TryMapNext(\"ASC\", Type::ASC, \"DESC\", Type::DESC, \"COUNT\", Type::COUNT, \"WITHCOORD\",\n                          Type::WITHCOORD, \"WITHDIST\", Type::WITHDIST, \"WITHHASH\", Type::WITHHASH);\n    // if writing variant and there there was a mapping failure test for write variant arguments\n    if (!type && !read_only) {\n      type = parser.MapNext(\"STORE\", Type::STORE, \"STOREDIST\", Type::STOREDIST);\n    }\n\n    // could not map the argument to an argument for RO or write GEORADIUS\n    if (!type) {\n      return builder->SendError(\"syntax error\", kSyntaxErrType);\n    }\n\n    switch (*type) {\n      case Type::STORE:\n        geo_ops.store_key = parser.Next();\n        geo_ops.store = geo_ops.store == GeoStoreType::kNoStore ? GeoStoreType::kStoreHash\n                                                                : GeoStoreType::kError;\n        break;\n      case Type::STOREDIST:\n        geo_ops.store_key = parser.Next();\n        geo_ops.store = geo_ops.store == GeoStoreType::kNoStore ? GeoStoreType::kStoreDist\n                                                                : GeoStoreType::kError;\n        break;\n      case Type::ASC:\n        geo_ops.sorting = geo_ops.sorting == Sorting::kUnsorted ? Sorting::kAsc : Sorting::kError;\n        break;\n      case Type::DESC:\n        geo_ops.sorting = geo_ops.sorting == Sorting::kUnsorted ? Sorting::kDesc : Sorting::kError;\n        break;\n      case Type::COUNT:\n        geo_ops.count = parser.Next<uint64_t>();\n        geo_ops.any = parser.Check(\"ANY\");\n        break;\n      case Type::WITHCOORD:\n        geo_ops.withcoord = true;\n        break;\n      case Type::WITHDIST:\n        geo_ops.withdist = true;\n        break;\n      case Type::WITHHASH:\n        geo_ops.withhash = true;\n        break;\n      default:\n        // If MapNext failed, it means an unknown option was provided or\n        // an option requiring an argument was missing its argument.\n        // The parser has already recorded the error.\n        DCHECK(parser.HasError());\n        break;\n    }\n  }\n\n  if (HandleGeoParserFinalize(shape, &parser, cmd_cntx)) {\n    return;\n  }\n\n  if (geo_ops.sorting == Sorting::kError) {\n    return builder->SendError(kAscDescErr);\n  } else if (geo_ops.store == GeoStoreType::kError) {\n    return builder->SendError(kStoreTypeErr);\n  } else if (geo_ops.count == 0) {\n    return builder->SendError(kCountError);\n  }\n\n  if ((geo_ops.withcoord || geo_ops.withdist || geo_ops.withhash) &&\n      geo_ops.store != GeoStoreType::kNoStore) {\n    return builder->SendError(kStoreCompatRadErr);\n  }\n\n  geo_ops.count = (geo_ops.count == UINT64_MAX) ? 0 : geo_ops.count;\n  GeoSearchStoreGeneric(cmd_cntx->tx(), builder, shape, key, \"\", geo_ops);\n}\n\nvoid CmdGeoRadiusByMember(CmdArgList args, CommandContext* cmd_cntx) {\n  GeoRadiusByMemberGeneric(args, cmd_cntx, false);\n}\n\nvoid CmdGeoRadiusByMemberRO(CmdArgList args, CommandContext* cmd_cntx) {\n  GeoRadiusByMemberGeneric(args, cmd_cntx, true);\n}\n\nvoid CmdGeoRadius(CmdArgList args, CommandContext* cmd_cntx) {\n  GeoRadiusGeneric(args, cmd_cntx, false);\n}\n\nvoid CmdGeoRadiusRO(CmdArgList args, CommandContext* cmd_cntx) {\n  GeoRadiusGeneric(args, cmd_cntx, true);\n}\n\n}  // namespace\n\n#define HFUNC(x) SetHandler(&Cmd##x)\n\nvoid RegisterGeoFamily(CommandRegistry* registry) {\n  registry->StartFamily(acl::GEO);\n  *registry << CI{\"GEOADD\", CO::JOURNALED | CO::DENYOOM, -5, 1, 1}.HFUNC(GeoAdd)\n            << CI{\"GEOHASH\", CO::READONLY, -2, 1, 1}.HFUNC(GeoHash)\n            << CI{\"GEOPOS\", CO::READONLY, -2, 1, 1}.HFUNC(GeoPos)\n            << CI{\"GEODIST\", CO::READONLY, -4, 1, 1}.HFUNC(GeoDist)\n            << CI{\"GEOSEARCH\", CO::READONLY, -7, 1, 1}.HFUNC(GeoSearch)\n            << CI{\"GEORADIUSBYMEMBER\", CO::JOURNALED | CO::STORE_LAST_KEY, -5, 1, 1}.HFUNC(\n                   GeoRadiusByMember)\n            << CI{\"GEORADIUSBYMEMBER_RO\", CO::READONLY, -5, 1, 1}.HFUNC(GeoRadiusByMemberRO)\n            << CI{\"GEORADIUS\", CO::JOURNALED | CO::STORE_LAST_KEY, -6, 1, 1}.HFUNC(GeoRadius)\n            << CI{\"GEORADIUS_RO\", CO::READONLY, -6, 1, 1}.HFUNC(GeoRadiusRO);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/geo_family_test.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\n\nnamespace dfly {\n\nclass GeoFamilyTest : public BaseFamilyTest {\n protected:\n};\n\nTEST_F(GeoFamilyTest, GeoAdd) {\n  EXPECT_EQ(2, CheckedInt({\"geoadd\", \"Sicily\", \"13.361389\", \"38.115556\", \"Palermo\", \"15.087269\",\n                           \"37.502669\", \"Catania\"}));\n  EXPECT_EQ(0, CheckedInt({\"geoadd\", \"Sicily\", \"13.361389\", \"38.115556\", \"Palermo\", \"15.087269\",\n                           \"37.502669\", \"Catania\"}));\n  auto resp = Run({\"geohash\", \"Sicily\", \"Palermo\", \"Catania\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"sqc8b49rny0\", \"sqdtr74hyu0\")));\n}\n\nTEST_F(GeoFamilyTest, GeoAddOptions) {\n  EXPECT_EQ(2, CheckedInt({\"geoadd\", \"Sicily\", \"13.361389\", \"38.115556\", \"Palermo\", \"15.087269\",\n                           \"37.502669\", \"Catania\"}));\n\n  // add 1 + update 1 + XX\n  EXPECT_EQ(0, CheckedInt({\"geoadd\", \"Sicily\", \"XX\", \"15.361389\", \"38.115556\", \"Palermo\",\n                           \"15.554167\", \"38.193611\", \"Messina\"}));\n  auto resp = Run({\"geopos\", \"Sicily\", \"Palermo\", \"Messina\"});\n  EXPECT_THAT(\n      resp, RespArray(ElementsAre(RespArray(ElementsAre(\"15.361389219760895\", \"38.1155563954963\")),\n                                  ArgType(RespExpr::NIL))));\n\n  // add 1 + update 1 + NX\n  EXPECT_EQ(1, CheckedInt({\"geoadd\", \"Sicily\", \"NX\", \"18.361389\", \"38.115556\", \"Palermo\", \"15.2875\",\n                           \"37.069167\", \"Syracuse\"}));\n  resp = Run({\"geopos\", \"Sicily\", \"Palermo\", \"Syracuse\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\n                        RespArray(ElementsAre(\"15.361389219760895\", \"38.1155563954963\")),\n                        RespArray(ElementsAre(\"15.287499725818634\", \"37.06916773705567\")))));\n\n  // add 1 + update 1 CH\n  EXPECT_EQ(2, CheckedInt({\"geoadd\", \"Sicily\", \"CH\", \"18.361389\", \"38.115556\", \"Palermo\",\n                           \"12.434167\", \"37.798056\", \"Marsala\"}));\n  resp = Run({\"geopos\", \"Sicily\", \"Palermo\", \"Marsala\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\n                        RespArray(ElementsAre(\"18.361386358737946\", \"38.1155563954963\")),\n                        RespArray(ElementsAre(\"12.43416577577591\", \"37.7980572230775\")))));\n\n  // update 1 + CH + XX\n  EXPECT_EQ(1, CheckedInt({\"geoadd\", \"Sicily\", \"CH\", \"XX\", \"10.361389\", \"38.115556\", \"Palermo\"}));\n  resp = Run({\"geopos\", \"Sicily\", \"Palermo\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(DoubleArg(10.361389), DoubleArg(38.115556))));\n\n  // add 1 + CH + NX\n  EXPECT_EQ(1, CheckedInt({\"geoadd\", \"Sicily\", \"CH\", \"NX\", \"14.25\", \"37.066667\", \"Gela\"}));\n  resp = Run({\"geopos\", \"Sicily\", \"Gela\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(DoubleArg(14.25), DoubleArg(37.066667))));\n\n  // add 1 + XX + NX\n  resp = Run({\"geoadd\", \"Sicily\", \"XX\", \"NX\", \"14.75\", \"36.933333\", \"Ragusa\"});\n  EXPECT_THAT(resp, ErrArg(\"XX and NX options at the same time are not compatible\"));\n\n  // incorrect number of args\n  resp = Run({\"geoadd\", \"Sicily\", \"14.75\", \"36.933333\", \"Ragusa\", \"10.23\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n}\n\nTEST_F(GeoFamilyTest, GeoPos) {\n  EXPECT_EQ(1, CheckedInt({\"geoadd\", \"Sicily\", \"13.361389\", \"38.115556\", \"Palermo\"}));\n  auto resp = Run({\"geopos\", \"Sicily\", \"Palermo\", \"NonExisting\"});\n  EXPECT_THAT(\n      resp, RespArray(ElementsAre(RespArray(ElementsAre(\"13.361389338970184\", \"38.1155563954963\")),\n                                  ArgType(RespExpr::NIL))));\n}\n\nTEST_F(GeoFamilyTest, GeoPosWrongType) {\n  Run({\"set\", \"x\", \"value\"});\n  EXPECT_THAT(Run({\"geopos\", \"x\", \"Sicily\", \"Palermo\"}), ErrArg(\"WRONGTYPE\"));\n}\n\nTEST_F(GeoFamilyTest, GeoDist) {\n  EXPECT_EQ(2, CheckedInt({\"geoadd\", \"Sicily\", \"13.361389\", \"38.115556\", \"Palermo\", \"15.087269\",\n                           \"37.502669\", \"Catania\"}));\n  auto resp = Run({\"geodist\", \"Sicily\", \"Palermo\", \"Catania\"});\n  // Haswell+ CPUs use FMA instructions, yielding higher precision that breaks exact string\n  // matching. DoubleArg handles parsing safely and applies standard floating-point tolerance.\n  EXPECT_THAT(resp, DoubleArg(166274.15156960033));\n\n  resp = Run({\"geodist\", \"Sicily\", \"Palermo\", \"Catania\", \"km\"});\n  EXPECT_THAT(resp, DoubleArg(166.27415156960032));\n\n  resp = Run({\"geodist\", \"Sicily\", \"Palermo\", \"Catania\", \"MI\"});\n  EXPECT_THAT(resp, DoubleArg(103.31822459492733));\n\n  resp = Run({\"geodist\", \"Sicily\", \"Palermo\", \"Catania\", \"FT\"});\n  EXPECT_THAT(resp, DoubleArg(545518.8699790037));\n\n  resp = Run({\"geodist\", \"Sicily\", \"Foo\", \"Bar\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(GeoFamilyTest, GeoSearch) {\n  EXPECT_EQ(10, CheckedInt({\"geoadd\",  \"Europe\",    \"13.4050\", \"52.5200\", \"Berlin\",   \"3.7038\",\n                            \"40.4168\", \"Madrid\",    \"9.1427\",  \"38.7369\", \"Lisbon\",   \"2.3522\",\n                            \"48.8566\", \"Paris\",     \"16.3738\", \"48.2082\", \"Vienna\",   \"4.8952\",\n                            \"52.3702\", \"Amsterdam\", \"10.7522\", \"59.9139\", \"Oslo\",     \"23.7275\",\n                            \"37.9838\", \"Athens\",    \"19.0402\", \"47.4979\", \"Budapest\", \"6.2603\",\n                            \"53.3498\", \"Dublin\"}));\n\n  auto resp = Run({\"GEOSEARCH\", \"Europe\", \"FROMLONLAT\", \"13.4050\", \"52.5200\", \"BYRADIUS\", \"500\",\n                   \"KM\", \"WITHCOORD\", \"WITHDIST\", \"WITHHASH\"});\n  EXPECT_THAT(\n      resp,\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(\"Berlin\", DoubleArg(0.00017343178521311378), \"3673983950397063\",\n                                RespArray(ElementsAre(DoubleArg(13.4050), DoubleArg(52.5200))))),\n          RespArray(ElementsAre(\"Dublin\", DoubleArg(487.5619030644293), \"3678981558208417\",\n                                RespArray(ElementsAre(DoubleArg(6.2603), DoubleArg(53.3498))))))));\n\n  resp = Run({\"GEOSEARCH\", \"invalid_key\", \"FROMMEMBER\", \"Madrid\", \"BYRADIUS\", \"700\", \"KM\",\n              \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"GEOSEARCH\", \"Europe\", \"FROMMEMBER\", \"invalid_member\", \"BYRADIUS\", \"700\", \"KM\",\n              \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(resp, ErrArg(\"could not decode requested zset member\"));\n\n  resp = Run({\"GEOSEARCH\", \"America\", \"FROMLONLAT\", \"13.4050\", \"52.5200\", \"BYBOX\", \"1000\", \"1000\",\n              \"KM\", \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"GEOSEARCH\", \"Europe\", \"FROMLONLAT\", \"130.4050\", \"52.5200\", \"BYBOX\", \"10\", \"10\", \"KM\",\n              \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"GEOSEARCH\", \"Europe\", \"FROMLONLAT\", \"13.4050\", \"52.5200\", \"BYBOX\", \"1000\", \"1000\",\n              \"KM\", \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(\n      resp,\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(\"Vienna\", DoubleArg(523.6926930553866),\n                                RespArray(ElementsAre(DoubleArg(16.3738), DoubleArg(48.2082))))),\n          RespArray(ElementsAre(\"Berlin\", DoubleArg(0.00017343178521311378),\n                                RespArray(ElementsAre(DoubleArg(13.4050), DoubleArg(52.5200))))),\n          RespArray(ElementsAre(\"Dublin\", DoubleArg(487.5619030644293),\n                                RespArray(ElementsAre(DoubleArg(6.2603), DoubleArg(53.3498))))))));\n\n  resp = Run({\"GEOSEARCH\", \"Europe\", \"FROMLONLAT\", \"13.4050\", \"52.5200\", \"BYRADIUS\", \"500\", \"KM\",\n              \"COUNT\", \"3\", \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(\n      resp,\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(\"Berlin\", DoubleArg(0.00017343178521311378),\n                                RespArray(ElementsAre(DoubleArg(13.4050), DoubleArg(52.5200))))),\n          RespArray(ElementsAre(\"Dublin\", DoubleArg(487.5619030644293),\n                                RespArray(ElementsAre(DoubleArg(6.2603), DoubleArg(53.3498))))))));\n\n  resp = Run({\"GEOSEARCH\", \"Europe\", \"FROMLONLAT\", \"13.4050\", \"52.5200\", \"BYRADIUS\", \"500\", \"KM\",\n              \"DESC\", \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(\n      resp,\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(\"Dublin\", DoubleArg(487.5619030644293),\n                                RespArray(ElementsAre(DoubleArg(6.2603), DoubleArg(53.3498))))),\n          RespArray(ElementsAre(\"Berlin\", DoubleArg(0.00017343178521311378),\n                                RespArray(ElementsAre(DoubleArg(13.4050), DoubleArg(52.5200))))))));\n\n  resp = Run({\"GEOSEARCH\", \"Europe\", \"FROMMEMBER\", \"Madrid\", \"BYRADIUS\", \"700\", \"KM\", \"WITHCOORD\",\n              \"WITHDIST\"});\n  EXPECT_THAT(\n      resp,\n      RespArray(ElementsAre(\n          // Use DoubleArg to tolerate floating-point precision differences on Haswell+ CPUs (e.g.,\n          // 0 becoming 5.7e-15 due to FMA).\n          RespArray(ElementsAre(\"Madrid\", DoubleArg(0),\n                                RespArray(ElementsAre(DoubleArg(3.7038), DoubleArg(40.4168))))),\n          RespArray(ElementsAre(\"Lisbon\", DoubleArg(502.20769462704106),\n                                RespArray(ElementsAre(DoubleArg(9.1427), DoubleArg(38.7369))))))));\n\n  resp = Run({\"GEOSEARCH\", \"Europe\", \"FROMMEMBER\", \"Madrid\", \"BYRADIUS\", \"700\", \"KM\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"Madrid\", \"Lisbon\")));\n}\n\nTEST_F(GeoFamilyTest, GeoRadiusByMember) {\n  EXPECT_EQ(10, CheckedInt({\"geoadd\",  \"Europe\",    \"13.4050\", \"52.5200\", \"Berlin\",   \"3.7038\",\n                            \"40.4168\", \"Madrid\",    \"9.1427\",  \"38.7369\", \"Lisbon\",   \"2.3522\",\n                            \"48.8566\", \"Paris\",     \"16.3738\", \"48.2082\", \"Vienna\",   \"4.8952\",\n                            \"52.3702\", \"Amsterdam\", \"10.7522\", \"59.9139\", \"Oslo\",     \"23.7275\",\n                            \"37.9838\", \"Athens\",    \"19.0402\", \"47.4979\", \"Budapest\", \"6.2603\",\n                            \"53.3498\", \"Dublin\"}));\n\n  auto resp = Run({\"GEORADIUSBYMEMBER\", \"invalid_key\", \"Madrid\", \"900\", \"KM\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"GEORADIUSBYMEMBER\", \"invalid_key\", \"Madrid\", \"900\", \"KM\", \"STORE\", \"store_key\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"GEORADIUSBYMEMBER\", \"Europe\", \"invalid_mem\", \"900\", \"KM\", \"STORE\", \"store_key\"});\n  EXPECT_THAT(resp, ErrArg(\"could not decode requested zset member\"));\n\n  resp = Run({\"GEORADIUSBYMEMBER\", \"Europe\", \"Madrid\", \"700\", \"KM\", \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(\n      resp,\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(\"Madrid\", DoubleArg(0),\n                                RespArray(ElementsAre(DoubleArg(3.703801), DoubleArg(40.416799))))),\n          RespArray(\n              ElementsAre(\"Lisbon\", DoubleArg(502.207695),\n                          RespArray(ElementsAre(DoubleArg(9.142698), DoubleArg(38.736900))))))));\n\n  EXPECT_EQ(\n      2, CheckedInt({\"GEORADIUSBYMEMBER\", \"Europe\", \"Madrid\", \"700\", \"KM\", \"STORE\", \"store_key\"}));\n  resp = Run({\"ZRANGE\", \"store_key\", \"0\", \"-1\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"Madrid\", \"Lisbon\")));\n  resp = Run({\"ZRANGE\", \"store_key\", \"0\", \"-1\", \"WITHSCORES\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\"Madrid\", \"3471766229222696\", \"Lisbon\", \"3473121093062745\")));\n\n  EXPECT_EQ(2, CheckedInt({\"GEORADIUSBYMEMBER\", \"Europe\", \"Madrid\", \"700\", \"KM\", \"STOREDIST\",\n                           \"store_dist_key\"}));\n  resp = Run({\"ZRANGE\", \"store_dist_key\", \"0\", \"-1\", \"WITHSCORES\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\"Madrid\", DoubleArg(0), \"Lisbon\", DoubleArg(502.207695))));\n\n  resp = Run(\n      {\"GEORADIUSBYMEMBER\", \"Europe\", \"Madrid\", \"900\", \"KM\", \"STORE\", \"store_key\", \"WITHCOORD\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR STORE option in GEORADIUSBYMEMBER is not compatible with WITHDIST, \"\n                           \"WITHHASH and WITHCOORDS options\"));\n\n  // Do not remove this test case, it's not redundant.\n  // It's different from the one above because the arguments have\n  // different permutation which our code did not handle.\n  auto err =\n      \"ERR STORE option in GEORADIUSBYMEMBER is not compatible with WITHDIST, WITHHASH and WITHCOORDS options\"sv;\n  resp = Run(\"GEORADIUSBYMEMBER Sicily Agrigento 100 km WITHHASH store tmp\");\n  EXPECT_THAT(resp, ErrArg(err));\n\n  resp = Run(\"GEOADD t 13.361389 38.115556 a 13.3619 38.1159 b 13.3608 38.1152 c\");\n  resp = Run(\"GEOSEARCH t FROMLONLAT 13.361389 38.115556 BYRADIUS 1 KM COUNT 0\");\n  EXPECT_THAT(resp, ErrArg(\"ERR COUNT must be > 0\"));\n}\n\nTEST_F(GeoFamilyTest, GeoRadiusByMemberRO) {\n  EXPECT_EQ(10, CheckedInt({\"geoadd\",  \"Europe\",    \"13.4050\", \"52.5200\", \"Berlin\",   \"3.7038\",\n                            \"40.4168\", \"Madrid\",    \"9.1427\",  \"38.7369\", \"Lisbon\",   \"2.3522\",\n                            \"48.8566\", \"Paris\",     \"16.3738\", \"48.2082\", \"Vienna\",   \"4.8952\",\n                            \"52.3702\", \"Amsterdam\", \"10.7522\", \"59.9139\", \"Oslo\",     \"23.7275\",\n                            \"37.9838\", \"Athens\",    \"19.0402\", \"47.4979\", \"Budapest\", \"6.2603\",\n                            \"53.3498\", \"Dublin\"}));\n\n  auto resp =\n      Run({\"GEORADIUSBYMEMBER_RO\", \"Europe\", \"Madrid\", \"700\", \"KM\", \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(\n      resp,\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(\"Madrid\", DoubleArg(0),\n                                RespArray(ElementsAre(DoubleArg(3.703801), DoubleArg(40.416799))))),\n          RespArray(\n              ElementsAre(\"Lisbon\", DoubleArg(502.207695),\n                          RespArray(ElementsAre(DoubleArg(9.142698), DoubleArg(38.736900))))))));\n\n  // GEORADIUSBYMEMBER_RO should not accept arguments for storing (writing data)\n  resp =\n      Run({\"GEORADIUSBYMEMBER_RO\", \"Europe\", \"Madrid\", \"700\", \"KM\", \"STOREDIST\", \"store_dist_key\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"GEORADIUSBYMEMBER_RO\", \"Europe\", \"Madrid\", \"700\", \"KM\", \"STORE\", \"store_key\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n}\n\nTEST_F(GeoFamilyTest, GeoRadius) {\n  EXPECT_EQ(10, CheckedInt({\"geoadd\",  \"Europe\",    \"13.4050\", \"52.5200\", \"Berlin\",   \"3.7038\",\n                            \"40.4168\", \"Madrid\",    \"9.1427\",  \"38.7369\", \"Lisbon\",   \"2.3522\",\n                            \"48.8566\", \"Paris\",     \"16.3738\", \"48.2082\", \"Vienna\",   \"4.8952\",\n                            \"52.3702\", \"Amsterdam\", \"10.7522\", \"59.9139\", \"Oslo\",     \"23.7275\",\n                            \"37.9838\", \"Athens\",    \"19.0402\", \"47.4979\", \"Budapest\", \"6.2603\",\n                            \"53.3498\", \"Dublin\"}));\n\n  auto resp = Run({\"GEORADIUS\", \"invalid_key\", \"16.3738\", \"48.2082\", \"900\", \"KM\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"GEORADIUS\", \"America\", \"13.4050\", \"52.5200\", \"500\", \"KM\", \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"GEORADIUS\", \"Europe\", \"130.4050\", \"52.5200\", \"10\", \"KM\", \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"GEORADIUS\", \"Europe\", \"13.4050\", \"52.5200\", \"500\", \"KM\", \"COUNT\", \"3\", \"WITHCOORD\",\n              \"WITHDIST\"});\n  EXPECT_THAT(\n      resp,\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(\"Berlin\", DoubleArg(0.00017343178521311378),\n                                RespArray(ElementsAre(DoubleArg(13.4050), DoubleArg(52.5200))))),\n          RespArray(ElementsAre(\"Dublin\", DoubleArg(487.5619030644293),\n                                RespArray(ElementsAre(DoubleArg(6.2603), DoubleArg(53.3498))))))));\n\n  resp = Run(\n      {\"GEORADIUS\", \"Europe\", \"13.4050\", \"52.5200\", \"500\", \"KM\", \"DESC\", \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(\n      resp,\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(\"Dublin\", DoubleArg(487.5619030644293),\n                                RespArray(ElementsAre(DoubleArg(6.2603), DoubleArg(53.3498))))),\n          RespArray(ElementsAre(\"Berlin\", DoubleArg(0.00017343178521311378),\n                                RespArray(ElementsAre(DoubleArg(13.4050), DoubleArg(52.5200))))))));\n\n  EXPECT_EQ(2, CheckedInt({\"GEORADIUS\", \"Europe\", \"3.7038\", \"40.4168\", \"700\", \"KM\", \"STORE\",\n                           \"store_key\"}));\n  resp = Run({\"ZRANGE\", \"store_key\", \"0\", \"-1\"});\n\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"Madrid\", \"Lisbon\")));\n  resp = Run({\"ZRANGE\", \"store_key\", \"0\", \"-1\", \"WITHSCORES\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\"Madrid\", \"3471766229222696\", \"Lisbon\", \"3473121093062745\")));\n\n  EXPECT_EQ(2, CheckedInt({\"GEORADIUS\", \"Europe\", \"3.7038\", \"40.4168\", \"700\", \"KM\", \"STOREDIST\",\n                           \"store_dist_key\"}));\n  resp = Run({\"ZRANGE\", \"store_dist_key\", \"0\", \"-1\", \"WITHSCORES\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\"Madrid\", DoubleArg(0), \"Lisbon\", DoubleArg(502.207694))));\n\n  // Test with STORE and other options\n  resp = Run({\"GEORADIUS\", \"key:poq6moq\\\\r\", \"111.38360132204588\", \"-71.17374967857494\",\n              \"69.77510489600115\", \"ft\", \"key\", \"WITHDIST\", \"COUNT\", \"key\", \"WITHCOORD\", \"count\",\n              \"WITHHASH\", \"STORE\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  Run(\"GEOADD Sicily 13.361389 38.115556 Palermo 15.087269 37.502669 Catania\");\n  resp = Run(\"GEORADIUS SICILY 15 37 200 KM COUNT 0\");\n  EXPECT_THAT(resp, ErrArg(\"ERR COUNT must be > 0\"));\n\n  Run(\"GEOADD Sicily 13.583333 37.316667 Agrigento\");\n  resp = Run(\"GEORADIUSBYMEMBER Sicily Agrigento 100 km COUNT 0\");\n  EXPECT_THAT(resp, ErrArg(\"ERR COUNT must be > 0\"));\n\n  resp = Run(\"GEORADIUS Sicily 15 37 200 km COUNT 1\");\n  EXPECT_THAT(resp, \"Agrigento\");\n\n  auto err =\n      \"ERR STORE option in GEORADIUS is not compatible with WITHDIST, WITHHASH and WITHCOORDS options\"sv;\n  resp = Run(\"GEORADIUS Sicily 15 37 200 km WITHDIST STORE result\");\n  EXPECT_THAT(resp, ErrArg(err));\n}\n\nTEST_F(GeoFamilyTest, GeoRadiusRO) {\n  EXPECT_EQ(10, CheckedInt({\"geoadd\",  \"Europe\",    \"13.4050\", \"52.5200\", \"Berlin\",   \"3.7038\",\n                            \"40.4168\", \"Madrid\",    \"9.1427\",  \"38.7369\", \"Lisbon\",   \"2.3522\",\n                            \"48.8566\", \"Paris\",     \"16.3738\", \"48.2082\", \"Vienna\",   \"4.8952\",\n                            \"52.3702\", \"Amsterdam\", \"10.7522\", \"59.9139\", \"Oslo\",     \"23.7275\",\n                            \"37.9838\", \"Athens\",    \"19.0402\", \"47.4979\", \"Budapest\", \"6.2603\",\n                            \"53.3498\", \"Dublin\"}));\n\n  // GEORADIUS_RO should not accept arguments for storing (writing data)\n  auto resp =\n      Run({\"GEORADIUS_RO\", \"Europe\", \"13.4050\", \"52.5200\", \"900\", \"KM\", \"STORE_DIST\", \"store_key\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"GEORADIUS_RO\", \"Europe\", \"13.4050\", \"52.5200\", \"900\", \"KM\", \"STORE\", \"store_key\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"GEORADIUS_RO\", \"Europe\", \"13.4050\", \"52.5200\", \"500\", \"KM\", \"COUNT\", \"3\",\n              \"WITHCOORD\", \"WITHDIST\"});\n  EXPECT_THAT(\n      resp,\n      RespArray(ElementsAre(\n          RespArray(ElementsAre(\"Berlin\", DoubleArg(0.00017343178521311378),\n                                RespArray(ElementsAre(DoubleArg(13.4050), DoubleArg(52.5200))))),\n          RespArray(ElementsAre(\"Dublin\", DoubleArg(487.5619030644293),\n                                RespArray(ElementsAre(DoubleArg(6.2603), DoubleArg(53.3498))))))));\n}\n\nTEST_F(GeoFamilyTest, GeoRadiusByMemberUb) {\n  Run({\"GEOADD\", \"geo\", \"-118.2437\", \"34.0522\", \"972\"});\n  Run({\"GEOADD\", \"geo\", \"-73.935242\", \"40.730610\", \"973\"});\n  Run({\"GEOADD\", \"geo\", \"-122.4194\", \"37.7749\", \"971\"});\n\n  auto resp = Run({\"GEORADIUSBYMEMBER\", \"geo\", \"971\", \"200\", \"mi\", \"WITHCOORD\", \"WITHDIST\", \"COUNT\",\n                   \"40\", \"ASC\"});\n  // Use DoubleArg(0) to tolerate tiny floating-point residuals (e.g. 5e-15) on AVX/FMA builds.\n  EXPECT_THAT(resp, RespArray(ElementsAre(\n                        \"971\", DoubleArg(0),\n                        RespArray(ElementsAre(\"-122.41940170526505\", \"37.77490001056578\")))));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/hll_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\nextern \"C\" {\n#include \"redis/hyperloglog.h\"\n}\n\n#include \"base/logging.h\"\n#include \"base/stl_util.h\"\n#include \"facade/error.h\"\n#include \"facade/reply_builder.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/container_utils.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/transaction.h\"\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace facade;\n\nnamespace {\n\ntemplate <typename T>\nvoid HandleOpValueResult(const OpResult<T>& result, SinkReplyBuilder* builder) {\n  static_assert(std::is_integral<T>::value,\n                \"we are only handling types that are integral types in the return types from \"\n                \"here\");\n  if (result) {\n    builder->SendLong(result.value());\n  } else {\n    switch (result.status()) {\n      case OpStatus::WRONG_TYPE:\n        builder->SendError(kWrongTypeErr);\n        break;\n      case OpStatus::OUT_OF_MEMORY:\n        builder->SendError(kOutOfMemory);\n        break;\n      case OpStatus::INVALID_VALUE:\n        builder->SendError(kInvalidHllError);\n        break;\n      case OpStatus::CORRUPTED_HLL:\n        builder->SendError(facade::StatusToMsg(OpStatus::CORRUPTED_HLL));\n        break;\n      default:\n        builder->SendLong(0);  // in case we don't have the value we should just send 0\n        break;\n    }\n  }\n}\n\nHllBufferPtr StringToHllPtr(string_view hll) {\n  return {.hll = (unsigned char*)hll.data(), .size = hll.size()};\n}\n\nbool ConvertToDenseIfNeeded(string* hll) {\n  int hll_validity = isValidHLL(StringToHllPtr(*hll));\n  if (hll_validity == HLL_VALID_SPARSE) {\n    string new_hll;\n    new_hll.resize(getDenseHllSize());\n    int result = convertSparseToDenseHll(StringToHllPtr(*hll), StringToHllPtr(new_hll));\n    if (result != 0) {\n      // Conversion failed - HLL data is corrupted\n      return false;\n    }\n    *hll = std::move(new_hll);\n    return true;\n  }\n  return hll_validity == HLL_VALID_DENSE;\n}\n\nOpResult<int> AddToHll(const OpArgs& op_args, string_view key, CmdArgList values) {\n  auto& db_slice = op_args.GetDbSlice();\n\n  string hll;\n\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, OBJ_STRING);\n  RETURN_ON_BAD_STATUS(op_res);\n  auto& res = *op_res;\n  if (res.is_new) {\n    hll.resize(getSparseHllInitSize());\n    initSparseHll(StringToHllPtr(hll));\n  } else {\n    res.it->second.GetString(&hll);\n  }\n  if (isValidHLL(StringToHllPtr(hll)) == HLL_INVALID) {\n    return OpStatus::INVALID_VALUE;\n  }\n\n  int updated = 0;\n  bool is_sparse = isValidHLL(StringToHllPtr(hll)) == HLL_VALID_SPARSE;\n  sds hll_sds;\n  if (is_sparse) {\n    hll_sds = sdsnewlen(hll.data(), hll.size());\n  }\n\n  for (const auto& value : values) {\n    int added;\n    if (is_sparse) {\n      // Inserting to sparse hll might extend it.\n      // We can't use std::string with sds\n      // `promoted` will be assigned 1 if sparse hll was promoted to dense\n      int promoted = 0;\n      added = pfadd_sparse(&hll_sds, (unsigned char*)value.data(), value.size(), &promoted);\n      if (promoted == 1) {\n        is_sparse = false;\n        hll = string{hll_sds, sdslen(hll_sds)};\n        sdsfree(hll_sds);\n        DCHECK_EQ(isValidHLL(StringToHllPtr(hll)), HLL_VALID_DENSE);\n      }\n    } else {\n      added = pfadd_dense(StringToHllPtr(hll), (unsigned char*)value.data(), value.size());\n    }\n    if (added < 0) {\n      return OpStatus::INVALID_VALUE;\n    }\n    updated += added;\n  }\n\n  if (is_sparse) {\n    hll = string{hll_sds, sdslen(hll_sds)};\n    sdsfree(hll_sds);\n  }\n  res.it->second.SetString(hll);\n  return std::min(updated, 1);\n}\n\nvoid PFAdd(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  args.remove_prefix(1);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return AddToHll(t->GetOpArgs(shard), key, args);\n  };\n\n  OpResult<int> res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  HandleOpValueResult(res, cmd_cntx->rb());\n}\n\nOpResult<int64_t> CountHllsSingle(const OpArgs& op_args, string_view key) {\n  auto& db_slice = op_args.GetDbSlice();\n\n  auto it = db_slice.FindReadOnly(op_args.db_cntx, key, OBJ_STRING);\n  if (it.ok()) {\n    string hll;\n    string_view hll_view = it.value()->second.GetSlice(&hll);\n\n    switch (isValidHLL(StringToHllPtr(hll_view))) {\n      case HLL_VALID_DENSE:\n        break;\n      case HLL_VALID_SPARSE:\n        // Even in the case of a read - we still want to convert the hll to dense format, as it\n        // could originate in Redis (like in replication or rdb load).\n        hll = hll_view;\n        if (!ConvertToDenseIfNeeded(&hll)) {\n          return OpStatus::CORRUPTED_HLL;\n        }\n        hll_view = hll;\n        break;\n      case HLL_INVALID:\n      default:\n        return OpStatus::INVALID_VALUE;\n    }\n\n    return pfcountSingle(StringToHllPtr(hll_view));\n  } else if (it.status() == OpStatus::WRONG_TYPE) {\n    return it.status();\n  } else {\n    // Non existing keys count as 0.\n    return 0;\n  }\n}\n\nOpResult<vector<string>> ReadValues(const OpArgs& op_args, const ShardArgs& keys) {\n  try {\n    vector<string> values;\n    for (string_view key : keys) {\n      auto it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_STRING);\n      if (it.ok()) {\n        string hll;\n        it.value()->second.GetString(&hll);\n        if (!ConvertToDenseIfNeeded(&hll)) {\n          return OpStatus::CORRUPTED_HLL;\n        }\n        values.push_back(std::move(hll));\n      } else if (it.status() == OpStatus::WRONG_TYPE) {\n        return OpStatus::WRONG_TYPE;\n      }\n    }\n    return values;\n  } catch (const std::bad_alloc&) {\n    return OpStatus::OUT_OF_MEMORY;\n  }\n}\n\nvector<HllBufferPtr> ConvertShardVector(const vector<vector<string>>& hlls) {\n  vector<HllBufferPtr> ptrs;\n  ptrs.reserve(hlls.size());\n  for (auto& shard_hlls : hlls) {\n    for (auto& hll : shard_hlls) {\n      ptrs.push_back(StringToHllPtr(hll));\n    }\n  }\n  return ptrs;\n}\n\nOpResult<int64_t> PFCountMulti(CmdArgList args, CommandContext* cmd_cntx) {\n  vector<vector<string>> hlls;\n  hlls.resize(shard_set->size());\n\n  atomic<OpStatus> error_status{OpStatus::OK};\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    ShardId sid = shard->shard_id();\n    ShardArgs shard_args = t->GetShardArgs(shard->shard_id());\n    auto result = ReadValues(t->GetOpArgs(shard), shard_args);\n    if (result.ok()) {\n      hlls[sid] = std::move(result.value());\n    } else {\n      error_status.store(result.status(), memory_order_relaxed);\n    }\n    return OpStatus::OK;\n  };\n\n  OpStatus cb_status = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  if (cb_status != OpStatus::OK) {\n    return cb_status;\n  }\n\n  OpStatus stored_error = error_status.load(memory_order_relaxed);\n  if (stored_error != OpStatus::OK) {\n    return stored_error;\n  }\n\n  vector<HllBufferPtr> ptrs = ConvertShardVector(hlls);\n  int64_t pf_count = pfcountMulti(ptrs.data(), ptrs.size());\n  if (pf_count < 0) {\n    return OpStatus::INVALID_VALUE;\n  } else {\n    return pf_count;\n  }\n}\n\nvoid PFCount(CmdArgList args, CommandContext* cmd_cntx) {\n  if (args.size() == 1) {\n    string_view key = ArgS(args, 0);\n    auto cb = [&](Transaction* t, EngineShard* shard) {\n      return CountHllsSingle(t->GetOpArgs(shard), key);\n    };\n\n    OpResult<int64_t> res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n    HandleOpValueResult(res, cmd_cntx->rb());\n  } else {\n    HandleOpValueResult(PFCountMulti(args, cmd_cntx), cmd_cntx->rb());\n  }\n}\n\nOpResult<int> PFMergeInternal(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder) {\n  vector<vector<string>> hlls;\n  hlls.resize(shard_set->size());\n\n  atomic<OpStatus> error_status{OpStatus::OK};\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    ShardId sid = shard->shard_id();\n    ShardArgs shard_args = t->GetShardArgs(shard->shard_id());\n    auto result = ReadValues(t->GetOpArgs(shard), shard_args);\n    if (result.ok()) {\n      hlls[sid] = std::move(result.value());\n    } else {\n      error_status.store(result.status(), memory_order_relaxed);\n    }\n    return OpStatus::OK;\n  };\n\n  tx->Execute(std::move(cb), false);\n\n  OpStatus stored_error = error_status.load(memory_order_relaxed);\n  if (stored_error != OpStatus::OK) {\n    tx->Conclude();\n    return stored_error;\n  }\n\n  vector<HllBufferPtr> ptrs = ConvertShardVector(hlls);\n\n  string hll;\n  hll.resize(getDenseHllSize());\n  createDenseHll(StringToHllPtr(hll));\n  int result = pfmerge(ptrs.data(), ptrs.size(), StringToHllPtr(hll));\n\n  auto set_cb = [&](Transaction* t, EngineShard* shard) {\n    string_view key = ArgS(args, 0);\n    const OpArgs& op_args = t->GetOpArgs(shard);\n    auto& db_slice = op_args.GetDbSlice();\n    auto op_res = db_slice.AddOrFind(t->GetDbContext(), key, OBJ_STRING);\n    RETURN_ON_BAD_STATUS(op_res);\n    auto& res = *op_res;\n    res.it->second.SetString(hll);\n\n    if (op_args.shard->journal()) {\n      RecordJournal(op_args, \"SET\", ArgSlice{key, hll});\n    }\n\n    return OpStatus::OK;\n  };\n  tx->Execute(std::move(set_cb), true);\n\n  return result;\n}\n\nvoid PFMerge(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  OpResult<int> result = PFMergeInternal(args, cmd_cntx->tx(), rb);\n  if (result.ok()) {\n    if (result.value() == 0) {\n      rb->SendOk();\n    } else {\n      rb->SendError(kInvalidHllError);\n    }\n  } else {\n    HandleOpValueResult(result, rb);\n  }\n}\n\n}  // namespace\n\nvoid RegisterHllFamily(CommandRegistry* registry) {\n  using CI = CommandId;\n  registry->StartFamily(acl::HYPERLOGLOG);\n  *registry << CI{\"PFADD\", CO::FAST | CO::JOURNALED, -3, 1, 1}.SetHandler(PFAdd)\n            << CI{\"PFCOUNT\", CO::READONLY, -2, 1, -1}.SetHandler(PFCount)\n            << CI{\"PFMERGE\", CO::JOURNALED | CO::NO_AUTOJOURNAL, -2, 1, -1}.SetHandler(PFMerge);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/hll_family_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/error.h\"\n#include \"facade/facade_test.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\nusing namespace facade;\n\nnamespace dfly {\n\nclass HllFamilyTest : public BaseFamilyTest {\n protected:\n  std::string GenerateUniqueValue(int index) {\n    return \"Value_{\" + std::to_string(index) + \"}\";\n  }\n};\n\nTEST_F(HllFamilyTest, Simple) {\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key\", \"1\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key\", \"1\"}), 0);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key\"}), 1);\n}\n\nTEST_F(HllFamilyTest, Promote) {\n  int unique_values = 20000;\n  // Sparse hll is promoted to dense at the 1660th+- insertion\n  // This value varies if any parameter in hyperloglog.c changes.\n  int promote_i = 1660;\n  // Keep consistent with hyperloglog.c\n  int kHllSparseMaxBytes = 3000;\n  int kHllDenseSize = 12304;\n  for (int i = 0; i < unique_values; ++i) {\n    std::string newkey = GenerateUniqueValue(i);\n    Run({\"pfadd\", \"key\", newkey});\n    if (i < promote_i) {\n      EXPECT_LT(CheckedInt({\"strlen\", \"key\"}), kHllSparseMaxBytes + 1);\n    } else {\n      EXPECT_EQ(CheckedInt({\"strlen\", \"key\"}), kHllDenseSize);\n    }\n  }\n  // HyperLogLog computations come with a\n  // margin of error, with a standard error rate of 0.81%.\n  // Set it to 5% so this test won't fail unless something went wrong badly.\n  EXPECT_LT(std::abs(CheckedInt({\"pfcount\", \"key\"}) - unique_values * 1.0) / unique_values, 0.05);\n}\n\nTEST_F(HllFamilyTest, MultipleValues) {\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key\", \"1\", \"2\", \"3\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key\"}), 3);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key\", \"1\", \"2\", \"3\"}), 0);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key\"}), 3);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key\", \"1\"}), 0);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key\"}), 3);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key\", \"2\"}), 0);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key\"}), 3);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key\", \"3\"}), 0);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key\"}), 3);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key\", \"3\", \"4\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key\"}), 4);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key\", \"5\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key\"}), 5);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key\", \"1\", \"2\", \"3\", \"4\", \"5\"}), 0);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key\"}), 5);\n}\n\nTEST_F(HllFamilyTest, MultipleValues_random) {\n  int insertions = 20000;\n  int unique_values = 0;\n  std::random_device rd;\n  std::mt19937 gen(rd());\n  std::uniform_int_distribution<> dis(1, 20);\n  // cumulated pfadd result\n  for (int i = 0; i < insertions; ++i) {\n    // Number of values to insert\n    int num_values = dis(gen);\n    unique_values += num_values;\n\n    // Prepare the command\n    std::vector<std::string> values;\n    values.reserve(num_values + 2);\n    values.push_back(\"pfadd\");\n    values.push_back(\"key\");\n\n    // Generate and add unique values to the command\n    for (int j = 0; j < num_values; ++j) {\n      values.push_back(GenerateUniqueValue(i * 20 + j));\n    }\n\n    std::vector<std::string_view> commandViews;\n    for (const auto& val : values) {\n      commandViews.push_back(val);\n    }\n    Run(commandViews);\n  }\n  // HyperLogLog computations come with a\n  // margin of error, with a standard error rate of 0.81%.\n  // Set it to 5% so this test won't fail unless something went wrong badly.\n  EXPECT_LT(std::abs(CheckedInt({\"pfcount\", \"key\"}) - unique_values * 1.0) / unique_values, 0.05);\n}\n\nTEST_F(HllFamilyTest, AddInvalid) {\n  EXPECT_EQ(Run({\"set\", \"key\", \"...\"}), \"OK\");\n  EXPECT_THAT(Run({\"pfadd\", \"key\", \"1\"}), ErrArg(kInvalidHllError));\n  EXPECT_THAT(Run({\"pfcount\", \"key\"}), ErrArg(kInvalidHllError));\n}\n\nTEST_F(HllFamilyTest, OtherType) {\n  Run({\"zadd\", \"key\", \"1\", \"a\"});\n  EXPECT_THAT(Run({\"pfadd\", \"key\", \"1\"}),\n              ErrArg(\"Operation against a key holding the wrong kind of value\"));\n  EXPECT_THAT(Run({\"pfcount\", \"key\"}),\n              ErrArg(\"Operation against a key holding the wrong kind of value\"));\n}\n\nTEST_F(HllFamilyTest, CountEmpty) {\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"nonexisting\"}), 0);\n}\n\nTEST_F(HllFamilyTest, CountInvalid) {\n  EXPECT_EQ(Run({\"set\", \"key\", \"...\"}), \"OK\");\n  EXPECT_THAT(Run({\"pfcount\", \"key\"}), ErrArg(kInvalidHllError));\n}\n\nTEST_F(HllFamilyTest, CountMultiple) {\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key1\", \"1\", \"2\", \"3\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key1\"}), 3);\n\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key2\", \"1\", \"2\", \"3\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key2\"}), 3);\n\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key3\", \"2\", \"3\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key3\"}), 2);\n\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key4\", \"4\", \"5\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key4\"}), 2);\n\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key1\", \"key4\"}), 5);\n\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"non-existing-key1\", \"non-existing-key2\"}), 0);\n\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key1\", \"non-existing-key\"}), 3);\n\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key1\", \"key2\"}), 3);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key1\", \"key3\"}), 3);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key1\", \"key2\", \"key3\"}), 3);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key1\", \"key2\", \"key3\", \"key4\"}), 5);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key1\", \"key2\", \"key3\", \"key4\", \"non-existing\"}), 5);\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key1\", \"key4\"}), 5);\n}\n\nTEST_F(HllFamilyTest, CountMultipleWithWrongType) {\n  EXPECT_EQ(Run({\"set\", \"key1\", \"value1\"}), \"OK\");\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key\", \"value\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"list1 element1\", \"data\"}), 1);\n\n  EXPECT_THAT(Run({\"pfcount\", \"key1\", \"key\", \"list1 element1\"}),\n              ErrArg(\"INVALIDOBJ Corrupted HLL object detected.\"));\n}\n\nTEST_F(HllFamilyTest, MergeToNew) {\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key1\", \"1\", \"2\", \"3\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key2\", \"4\", \"5\"}), 1);\n  EXPECT_EQ(Run({\"pfmerge\", \"key3\", \"key1\", \"key2\"}), \"OK\");\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key3\"}), 5);\n}\n\nTEST_F(HllFamilyTest, MergeToExisting) {\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key1\", \"1\", \"2\", \"3\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key2\", \"4\", \"5\"}), 1);\n  EXPECT_EQ(Run({\"pfmerge\", \"key3\", \"key2\", \"key1\"}), \"OK\");\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key3\"}), 5);\n  EXPECT_EQ(Run({\"pfmerge\", \"key3\", \"key3\"}), \"OK\");\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key3\"}), 5);\n  EXPECT_EQ(Run({\"pfmerge\", \"key3\"}), \"OK\");\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key3\"}), 5);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key4\", \"4\", \"5\", \"6\"}), 1);\n  EXPECT_EQ(Run({\"pfmerge\", \"key3\", \"key4\"}), \"OK\");\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key3\"}), 6);\n}\n\nTEST_F(HllFamilyTest, MergeNonExisting) {\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key1\", \"1\", \"2\", \"3\"}), 1);\n  EXPECT_EQ(Run({\"pfmerge\", \"key3\", \"key1\", \"key2\"}), \"OK\");\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key3\"}), 3);\n}\n\nTEST_F(HllFamilyTest, MergeOverlapping) {\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key1\", \"1\", \"2\", \"3\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key2\", \"2\", \"3\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key3\", \"1\", \"3\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key4\", \"2\", \"3\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key5\", \"3\"}), 1);\n  EXPECT_EQ(Run({\"pfmerge\", \"key6\", \"key1\", \"key2\", \"key3\", \"key4\", \"key5\"}), \"OK\");\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key6\"}), 3);\n}\n\nTEST_F(HllFamilyTest, MergeInvalid) {\n  Run({\"exists\", \"key1\", \"key4\"});\n  ASSERT_EQ(GetDebugInfo().shards_count, 2);  // ensure 2 shards\n\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key1\", \"1\", \"2\", \"3\"}), 1);\n  EXPECT_EQ(Run({\"set\", \"key4\", \"...\"}), \"OK\");\n  EXPECT_THAT(Run({\"pfmerge\", \"key1\", \"key4\"}),\n              ErrArg(\"INVALIDOBJ Corrupted HLL object detected.\"));\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key1\"}), 3);\n}\n\nTEST_F(HllFamilyTest, MergeWithInvalidHllFormat) {\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"complex@key \\\"weird!field\\\" \\\"value\\\\nwith\\\\tescape sequences\\\"\",\n                        \"some_element\"}),\n            1);\n  EXPECT_EQ(CheckedInt({\"append\", \"complex@key \\\"weird!field\\\" \\\"value\\\\nwith\\\\tescape sequences\\\"\",\n                        \"corrupt_data\"}),\n            33);\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"\\\"key with \\\\\\\"quotes\\\\\\\"\\\" \\\"value with \\\\\\\\backslashes\\\\\\\\\\\"\",\n                        \"element1\"}),\n            1);\n  EXPECT_THAT(Run({\"pfmerge\", \"result_key\",\n                   \"complex@key \\\"weird!field\\\" \\\"value\\\\nwith\\\\tescape sequences\\\"\",\n                   \"\\\"key with \\\\\\\"quotes\\\\\\\"\\\" \\\"value with \\\\\\\\backslashes\\\\\\\\\\\"\"}),\n              ErrArg(\"INVALIDOBJ Corrupted HLL object detected.\"));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/hset_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/hset_family.h\"\n\n#include <absl/strings/ascii.h>\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n#include \"redis/redis_aux.h\"\n#include \"redis/util.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#include \"base/logging.h\"\n#include \"core/detail/listpack_wrap.h\"\n#include \"core/overloaded.h\"\n#include \"core/string_map.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/container_utils.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/family_utils.h\"\n#include \"server/tiered_storage.h\"\n#include \"server/tiering/decoders.h\"\n#include \"server/tiering/serialized_map.h\"\n#include \"server/transaction.h\"\n\nusing namespace std;\n\nnamespace dfly {\n\nusing namespace facade;\nusing absl::SimpleAtoi;\n\nnamespace {\n\nusing IncrByParam = std::variant<double, int64_t>;\nusing OptStr = std::optional<std::string>;\nenum GetAllMode : uint8_t { FIELDS = 1, VALUES = 2 };\n\nbool IsGoodForListpack(CmdArgList args, const uint8_t* lp) {\n  size_t sum = 0;\n  for (auto s : args) {\n    if (s.size() > server.max_map_field_len)\n      return false;\n    sum += s.size();\n  }\n\n  return lpBytes(const_cast<uint8_t*>(lp)) + sum < server.max_listpack_map_bytes;\n}\n\nusing container_utils::GetStringMap;\n\n// Generic wrapper for multiple underlying map <string, string> types\n// holding a variant of:\n// 1. Listpack\n// 2. StringMap\n// 3. SerializedMap (tiered)\nstruct HMapWrap {\n private:\n  template <typename F> decltype(auto) VisitRef(F f) const {  // Cast T* to T&\n    return std::visit(Overloaded{[&f](auto* s) { return f(*s); }, f}, impl_);\n  }\n\n  template <typename F> decltype(auto) VisitMut(F& f) {\n    auto serialized_bust = [&](tiering::SerializedMap* s) {\n      ABSL_UNREACHABLE();                          // Serialized maps should never be mutable\n      return f(static_cast<StringMap*>(nullptr));  // purely for same return type\n    };\n    return std::visit(Overloaded{f, serialized_bust}, impl_);\n  }\n\n public:\n  HMapWrap(const PrimeValue& pv, DbContext db_cntx) {\n    DCHECK(!pv.IsExternal() || pv.IsCool());\n    if (pv.Encoding() == kEncodingListPack)\n      impl_ = detail::ListpackWrap{static_cast<uint8_t*>(pv.RObjPtr())};\n    else\n      impl_ = GetStringMap(pv, db_cntx);\n  }\n\n  explicit HMapWrap(tiering::SerializedMap* sm) : impl_{sm} {\n  }\n\n  size_t Length() const {\n    Overloaded ov{\n        [](StringMap* s) { return s->UpperBoundSize(); },\n        [](const detail::ListpackWrap& lw) { return lw.size(); },\n        [](tiering::SerializedMap* s) { return s->size(); },\n    };\n    return visit(ov, impl_);\n  }\n\n  auto Find(std::string_view key) const {\n    using RT = optional<pair<string_view, string_view>>;\n    return VisitRef([key](auto& h) -> RT {\n      if (auto it = h.Find(key); it != h.end())\n        return *it;\n      return std::nullopt;\n    });\n  }\n\n  auto Range() const {\n    auto f = [](auto p) -> pair<string_view, string_view> { return p; };  // implicit conversion\n    using IT = base::it::CompoundIterator<decltype(f), detail::ListpackWrap::Iterator,\n                                          StringMap::iterator, tiering::SerializedMap::Iterator>;\n    auto cb = [f](auto& h) -> std::pair<IT, IT> {\n      return {{f, h.begin()}, {std::nullopt, h.end()}};\n    };\n    return base::it::Range(VisitRef(cb));\n  }\n\n  bool Erase(std::string_view key) {\n    Overloaded ov{[key](StringMap* s) { return s->Erase(key); },\n                  [key](detail::ListpackWrap& lw) { return lw.Delete(key); }};\n    return VisitMut(ov);\n  }\n\n  void AddOrUpdate(std::string_view key, std::string_view value) {\n    Overloaded ov{[&](StringMap* sm) { sm->AddOrUpdate(key, value, UINT32_MAX, true); },\n                  [&](detail::ListpackWrap& lw) { lw.Insert(key, value, false); }};\n    VisitMut(ov);\n  }\n\n  void Launder(PrimeValue& pv) {\n    Overloaded ov{\n        [](StringMap* s) {},\n        [&](detail::ListpackWrap& lw) { pv.SetRObjPtr(lw.GetPointer()); },\n    };\n    VisitMut(ov);\n  }\n\n  template <typename T> optional<T> Get() const {\n    if (holds_alternative<T>(impl_))\n      return get<T>(impl_);\n    return nullopt;\n  }\n\n private:\n  variant<StringMap*, tiering::SerializedMap*, detail::ListpackWrap> impl_;\n};  // namespace dfly\n\n// Delete if length is zero\nvoid DeleteHw(HMapWrap& hw, const OpArgs& op_args, std::string_view key) {\n  auto& db_slice = op_args.GetDbSlice();\n  if (auto del_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_HASH); del_it) {\n    del_it->post_updater.Run();\n    db_slice.Del(op_args.db_cntx, del_it->it);\n    if (op_args.shard->journal()) {\n      RecordJournal(op_args, \"DEL\"sv, {key});\n    }\n  }\n}\n\nauto KeyAndArgs(Transaction* t, EngineShard* es) {\n  return std::make_pair(t->GetShardArgs(es->shard_id()).Front(), t->GetOpArgs(es));\n}\n\n// A wrappable callback returns a OpResult<T> or the future version of it for tiered values.\n// Because the top-level value needs to be an OpResult, the variant is wrapped as an OpResult again.\n// However, we can take the \"result\" out of the bare value and keep it only on the top-level.\ntemplate <typename T> using CbVariant = std::variant<T, ::util::fb2::Future<OpResult<T>>>;\n\n// Unwrap possibly future result to a regular one\ntemplate <typename T> OpResult<T> Unwrap(OpResult<CbVariant<T>> result) {\n  if (!result.ok())\n    return result.status();\n\n  Overloaded ov{\n      [](T res) -> OpResult<T> { return res; },\n      [](util::fb2::Future<OpResult<T>> fut) -> OpResult<T> { return fut.Get(); },\n  };\n  return visit(ov, std::move(result).value());\n}\n\n// Execute callback on generic HMapWrap, possibly on offloaded value and waiting for result\ntemplate <typename F, typename T = typename std::invoke_result_t<F, HMapWrap>::Type>\nOpResult<T> ExecuteRO(Transaction* tx, F&& f) {\n  auto shard_cb = [f = std::forward<F>(f)](Transaction* t,\n                                           EngineShard* es) -> OpResult<CbVariant<T>> {\n    // Fetch value of hash type\n    auto [key, op_args] = KeyAndArgs(t, es);\n    auto it_res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_HASH);\n    RETURN_ON_BAD_STATUS(it_res);\n    auto& pv = (*it_res)->second;\n\n    // Enqueue read for future values\n    if (pv.IsExternal() && !pv.IsCool()) {\n      using D = tiering::SerializedMapDecoder;\n      util::fb2::Future<OpResult<T>> fut;\n      auto read_cb = [fut, f = std::move(f)](io::Result<D*> res) mutable {\n        HMapWrap hw{res.value()->Get()};\n        fut.Resolve(f(hw));\n      };\n\n      es->tiered_storage()->Read(op_args.db_cntx.db_index, key, pv.GetExternalSlice(), D{},\n                                 std::move(read_cb));\n      return CbVariant<T>{std::move(fut)};\n    }\n\n    HMapWrap hw{pv, op_args.db_cntx};\n    auto res = f(hw);\n\n    if (hw.Length() == 0)  // Expirations might have emptied it\n      DeleteHw(hw, op_args, key);\n\n    // Move result into variant or keep error status\n    RETURN_ON_BAD_STATUS(res);\n    return CbVariant<T>{std::move(res).value()};\n  };\n\n  return Unwrap(tx->ScheduleSingleHopT(std::move(shard_cb)));\n}\n\n// Wrap write handler\ntemplate <typename F> auto WrapW(F&& f) {\n  using RT = std::invoke_result_t<F, HMapWrap&>;\n  return [f = std::forward<F>(f)](Transaction* t, EngineShard* es) -> RT {\n    auto [key, op_args] = KeyAndArgs(t, es);\n\n    auto it_res = op_args.GetDbSlice().FindMutable(op_args.db_cntx, key, OBJ_HASH);\n    RETURN_ON_BAD_STATUS(it_res);\n    auto& pv = it_res->it->second;\n\n    // Remove document before modification\n    op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, pv);\n\n    HMapWrap hw{pv, op_args.db_cntx};\n    auto res = f(hw);\n    hw.Launder(pv);\n\n    // Run post updater\n    it_res->post_updater.Run();\n\n    if (hw.Length() == 0)\n      DeleteHw(hw, op_args, key);\n    else\n      op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, &pv);\n\n    return res;\n  };\n}\n\nsize_t EstimateListpackMinBytes(CmdArgList members) {\n  size_t bytes = 0;\n  for (const auto& member : members) {\n    bytes += (member.size() + 1);  // string + at least 1 byte for string header.\n  }\n  return bytes;\n}\n\nOpStatus IncrementValue(optional<string_view> prev_val, IncrByParam* param) {\n  if (holds_alternative<double>(*param)) {\n    double incr = get<double>(*param);\n    double value = 0;\n\n    if (prev_val) {\n      if (!ParseDouble(*prev_val, &value)) {\n        return OpStatus::INVALID_VALUE;\n      }\n    }\n    value += incr;\n    if (isnan(value) || isinf(value)) {\n      return OpStatus::NAN_OR_INF_DURING_INCR;\n    }\n\n    param->emplace<double>(value);\n\n    return OpStatus::OK;\n  }\n\n  // integer increment\n  long long old_val = 0;\n  if (prev_val) {\n    if (!string2ll(prev_val->data(), prev_val->size(), &old_val)) {\n      return OpStatus::INVALID_VALUE;\n    }\n  }\n\n  int64_t incr = get<int64_t>(*param);\n  if ((incr < 0 && old_val < 0 && incr < (LLONG_MIN - old_val)) ||\n      (incr > 0 && old_val > 0 && incr > (LLONG_MAX - old_val))) {\n    return OpStatus::OUT_OF_RANGE;\n  }\n\n  int64_t new_val = old_val + incr;\n  param->emplace<int64_t>(new_val);\n\n  return OpStatus::OK;\n}\n\nOpStatus OpIncrBy(const OpArgs& op_args, string_view key, string_view field, IncrByParam* param) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, OBJ_HASH);\n  RETURN_ON_BAD_STATUS(op_res);\n\n  auto& add_res = *op_res;\n  PrimeValue& pv = add_res.it->second;\n  if (add_res.is_new) {\n    pv.InitRobj(OBJ_HASH, kEncodingListPack, lpNew(0));\n  } else {\n    op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, add_res.it->second);\n\n    if (pv.Encoding() == kEncodingListPack) {\n      uint8_t* lp = (uint8_t*)pv.RObjPtr();\n      size_t lpb = lpBytes(lp);\n\n      if (lpb >= server.max_listpack_map_bytes) {\n        StringMap* sm = HSetFamily::ConvertToStrMap(lp);\n        pv.InitRobj(OBJ_HASH, kEncodingStrMap2, sm);\n      }\n    }\n  }\n\n  HMapWrap hw{pv, op_args.db_cntx};\n  optional<string_view> res;\n  if (!add_res.is_new) {\n    if (auto it = hw.Find(field); it)\n      res = it->second;\n  }\n\n  if (OpStatus status = IncrementValue(res, param); status != OpStatus::OK)\n    return status;\n\n  if (holds_alternative<double>(*param)) {\n    double new_val = get<double>(*param);\n    char buf[128];\n    char* str = RedisReplyBuilder::FormatDouble(new_val, buf, sizeof(buf));\n    hw.AddOrUpdate(field, str);\n  } else {  // integer increment\n    int64_t new_val = get<int64_t>(*param);\n    absl::AlphaNum an(new_val);\n    hw.AddOrUpdate(field, an.Piece());\n  }\n\n  hw.Launder(pv);\n  op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, &pv);\n\n  return OpStatus::OK;\n}\n\nOpResult<StringVec> OpScan(const HMapWrap& hw, uint64_t* cursor, const ScanOpts& scan_op) {\n  /* We set the max number of iterations to ten times the specified\n   * COUNT, so if the hash table is in a pathological state (very\n   * sparsely populated) we avoid to block too much time at the cost\n   * of returning no or very few elements. (taken from redis code at db.c line 904 */\n  constexpr size_t INTERATION_FACTOR = 10;\n\n  StringVec res;\n  // If NOVALUES, we expect 1 element per match (key). Otherwise, 2 elements (key + value).\n  uint32_t count = scan_op.limit * (scan_op.novalues ? 1 : 2);\n\n  if (auto lw = hw.Get<detail::ListpackWrap>(); lw) {\n    // TODO: Optimize unnecessary value reads from iterator\n    for (const auto [key, value] : *lw) {\n      if (scan_op.Matches(key)) {\n        res.emplace_back(key);\n        if (!scan_op.novalues) {\n          res.emplace_back(value);\n        }\n      }\n    }\n    *cursor = 0;\n  } else {\n    StringMap* sm = *hw.Get<StringMap*>();\n\n    long max_iterations = count * INTERATION_FACTOR;\n\n    // note about this lambda - don't capture here! it should be convertible to C function!\n    auto scanCb = [&](const void* obj) {\n      sds val = (sds)obj;\n      size_t len = sdslen(val);\n      if (scan_op.Matches(string_view(val, len))) {\n        res.emplace_back(val, len);\n        if (!scan_op.novalues) {\n          val = StringMap::GetValue(val);\n          res.emplace_back(val, sdslen(val));\n        }\n      }\n    };\n\n    do {\n      *cursor = sm->Scan(*cursor, scanCb);\n    } while (*cursor && max_iterations-- && res.size() < count);\n  }\n\n  return res;\n}\n\nOpResult<vector<OptStr>> OpHMGet(const HMapWrap& hw, CmdArgList fields) {\n  DCHECK(!fields.empty());\n\n  std::vector<OptStr> result(fields.size());\n  if (auto lw = hw.Get<detail::ListpackWrap>(); lw) {\n    absl::flat_hash_map<string_view, absl::InlinedVector<size_t, 3>> reverse;\n    reverse.reserve(fields.size() + 1);\n    for (size_t i = 0; i < fields.size(); ++i) {\n      reverse[ArgS(fields, i)].push_back(i);  // map fields to their index.\n    }\n\n    for (const auto [key, value] : *lw) {\n      if (auto it = reverse.find(key); it != reverse.end()) {\n        for (size_t index : it->second) {\n          DCHECK_LT(index, result.size());\n          result[index].emplace(value);\n        }\n      }\n    }\n  } else {\n    StringMap* sm = *hw.Get<StringMap*>();\n    for (size_t i = 0; i < fields.size(); ++i) {\n      if (auto it = sm->Find(fields[i]); it != sm->end()) {\n        result[i].emplace(it->second, sdslen(it->second));\n      }\n    }\n  }\n\n  return result;\n}\n\nstruct OpSetParams {\n  bool skip_if_exists = false;\n  uint32_t ttl = UINT32_MAX;\n  bool keepttl = false;\n};\n\nOpResult<uint32_t> OpSet(const OpArgs& op_args, string_view key, CmdArgList values,\n                         const OpSetParams& op_sp = OpSetParams{}) {\n  DCHECK(!values.empty() && 0 == values.size() % 2);\n  VLOG(2) << \"OpSet(\" << key << \")\";\n\n  auto& db_slice = op_args.GetDbSlice();\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, OBJ_HASH);\n  RETURN_ON_BAD_STATUS(op_res);\n  auto& add_res = *op_res;\n\n  uint8_t* lp = nullptr;\n  auto& it = add_res.it;\n  PrimeValue& pv = it->second;\n\n  if (add_res.is_new) {\n    if (op_sp.ttl == UINT32_MAX) {\n      lp = lpNew(0);\n      pv.InitRobj(OBJ_HASH, kEncodingListPack, lp);\n    } else {\n      pv.InitRobj(OBJ_HASH, kEncodingStrMap2, CompactObj::AllocateMR<StringMap>());\n    }\n  } else {\n    op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, it->second);\n  }\n\n  if (pv.Encoding() == kEncodingListPack) {\n    lp = (uint8_t*)pv.RObjPtr();\n\n    if (op_sp.ttl != UINT32_MAX || !IsGoodForListpack(values, lp)) {\n      StringMap* sm = HSetFamily::ConvertToStrMap(lp);\n      pv.InitRobj(OBJ_HASH, kEncodingStrMap2, sm);\n      lp = nullptr;\n    }\n  }\n\n  unsigned created = 0;\n\n  if (lp) {\n    size_t malloc_reserved = zmalloc_size(lp);\n    size_t min_sz = EstimateListpackMinBytes(values);\n    if (min_sz > malloc_reserved) {\n      lp = (uint8_t*)zrealloc(lp, min_sz);\n    }\n    detail::ListpackWrap lw{lp};\n    for (size_t i = 0; i < values.size(); i += 2) {\n      created += lw.Insert(values[i], values[i + 1], op_sp.skip_if_exists);\n    }\n    pv.SetRObjPtr(lw.GetPointer());\n  } else {\n    DCHECK_EQ(kEncodingStrMap2, pv.Encoding());  // Dictionary\n    StringMap* sm = GetStringMap(pv, op_args.db_cntx);\n    sm->Reserve(values.size() / 2);\n    bool added;\n\n    for (size_t i = 0; i < values.size(); i += 2) {\n      string_view field = values[i];\n      string_view value = values[i + 1];\n      if (op_sp.skip_if_exists)\n        added = sm->AddOrSkip(field, value, op_sp.ttl);\n      else\n        added = sm->AddOrUpdate(field, value, op_sp.ttl, op_sp.keepttl);\n\n      created += unsigned(added);\n    }\n  }\n\n  op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, &pv);\n\n  if (auto* ts = op_args.shard->tiered_storage(); ts) {\n    StashPrimeValue(op_args.db_cntx.db_index, key, &pv, ts, nullptr);\n  }\n\n  return created;\n}\n\nvoid HGetGeneric(CmdArgList args, uint8_t getall_mask, CommandContext* cmd_cntx) {\n  auto cb = [getall_mask](const HMapWrap& hw) -> OpResult<vector<string>> {\n    vector<string> res;\n    bool keyval = (getall_mask == (FIELDS | VALUES));\n    res.reserve(hw.Length() * (keyval ? 2 : 1));\n\n    for (const auto& [key, value] : hw.Range()) {\n      if (getall_mask & FIELDS)\n        res.emplace_back(key);\n      if (getall_mask & VALUES)\n        res.emplace_back(value);\n    }\n\n    return res;\n  };\n\n  OpResult<vector<string>> result = ExecuteRO(cmd_cntx->tx(), cb);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  switch (result.status()) {\n    case OpStatus::OK:\n    case OpStatus::KEY_NOTFOUND: {\n      bool is_map = (getall_mask == (VALUES | FIELDS));\n      return rb->SendBulkStrArr(*result, is_map ? CollectionType::MAP : CollectionType::ARRAY);\n    }\n    default:\n      return cmd_cntx->SendError(result.status());\n  };\n}\n\nOpResult<vector<long>> OpHExpire(const OpArgs& op_args, string_view key, uint32_t ttl_sec,\n                                 ExpireFlags flags, CmdArgList values) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto op_res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_HASH);\n  RETURN_ON_BAD_STATUS(op_res);\n\n  PrimeValue* pv = &((*op_res).it->second);\n  auto res = HSetFamily::SetFieldsExpireTime(op_args, ttl_sec, flags, key, values, pv);\n\n  // If it is a hash which became empty after expiring fields, we must delete the key safely.\n  // We use DelMutable which consumes the iterator/updater to prevent the crash.\n  if (pv->Encoding() == kEncodingStrMap2) {\n    auto* sm = static_cast<StringMap*>(pv->RObjPtr());\n    if (sm->UpperBoundSize() == 0) {\n      db_slice.DelMutable(op_args.db_cntx, std::move(*op_res));\n    }\n  }\n\n  return res;\n}\n\n// HSETEX key [NX] [KEEPTTL] tll_sec field value field value ...\nvoid HSetEx(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n\n  string_view key = parser.Next();\n  OpSetParams op_sp;\n\n  const auto option_already_set = [&cmd_cntx] {\n    return cmd_cntx->SendError(WrongNumArgsError(cmd_cntx->cid()->name()), kSyntaxErrType);\n  };\n\n  while (true) {\n    if (parser.Check(\"NX\")) {\n      if (op_sp.skip_if_exists) {\n        return option_already_set();\n      }\n      op_sp.skip_if_exists = true;\n    } else if (parser.Check(\"KEEPTTL\")) {\n      if (op_sp.keepttl) {\n        return option_already_set();\n      }\n      op_sp.keepttl = true;\n    } else {\n      break;\n    }\n  }\n\n  op_sp.ttl = parser.Next<uint32_t>();\n  auto* rb = cmd_cntx->rb();\n  if (parser.HasError()) {\n    return cmd_cntx->SendError(parser.TakeError().MakeReply());\n  }\n\n  constexpr uint32_t kMaxTtl = (1UL << 26);\n  if (op_sp.ttl == 0 || op_sp.ttl > kMaxTtl) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n\n  CmdArgList fields = parser.Tail();\n\n  if (fields.size() % 2 != 0) {\n    return cmd_cntx->SendError(facade::WrongNumArgsError(cmd_cntx->cid()->name()), kSyntaxErrType);\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpSet(t->GetOpArgs(shard), key, fields, op_sp);\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result) {\n    rb->SendLong(*result);\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nstruct HSetReplies {\n  void Send(OpResult<uint32_t> result) const {\n    switch (result.status()) {\n      case OpStatus::OK:\n      case OpStatus::KEY_NOTFOUND:\n        return cmd_cntx->SendLong(result.value_or(0));\n      default:\n        return cmd_cntx->SendError(result.status());\n    };\n  }\n\n  CommandContext* cmd_cntx;\n};\n\nvoid CmdHDel(CmdArgList args, CommandContext* cmd_cntx) {\n  auto cb = [&](HMapWrap& hw) -> OpResult<uint32_t> {\n    unsigned deleted = 0;\n    for (string_view s : args.subspan(1))\n      deleted += hw.Erase(s);\n    return deleted;\n  };\n  HSetReplies{cmd_cntx}.Send(cmd_cntx->tx()->ScheduleSingleHopT(WrapW(cb)));\n}\n\nvoid CmdHExpire(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  using MinMaxTtl = FInt<0, (1 << 26)>;\n  auto [key, ttl_sec] = parser.Next<string_view, MinMaxTtl>();\n\n  ExpireFlags flags = parser\n                          .TryMapNext(\"NX\", ExpireFlags::EXPIRE_NX, \"XX\", ExpireFlags::EXPIRE_XX,\n                                      \"GT\", ExpireFlags::EXPIRE_GT, \"LT\", ExpireFlags::EXPIRE_LT)\n                          .value_or(ExpireFlags::EXPIRE_ALWAYS);\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (parser.HasError()) {\n    return cmd_cntx->SendError(parser.TakeError().MakeReply());\n  }\n  if (!parser.Check(\"FIELDS\"sv)) {\n    return cmd_cntx->SendError(\"Mandatory argument FIELDS is missing or not at the right position\",\n                               kSyntaxErrType);\n  }\n\n  uint32_t numFields = parser.Next<uint32_t>();\n\n  CmdArgList fields = parser.Tail();\n  if (fields.size() != numFields) {\n    return rb->SendError(\"The `numfields` parameter must match the number of arguments\",\n                         kSyntaxErrType);\n  }\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpHExpire(t->GetOpArgs(shard), key, ttl_sec, flags, fields);\n  };\n  OpResult<vector<long>> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  switch (result.status()) {\n    case OpStatus::OK:\n      return rb->SendLongArr(absl::MakeConstSpan(result.value()));\n    case OpStatus::KEY_NOTFOUND:\n      return rb->SendLongArr(absl::MakeConstSpan(vector<long>(numFields, -2)));\n    default:\n      return cmd_cntx->SendError(result.status());\n  };\n}\n\nOpResult<vector<long>> OpHTtl(Transaction* t, EngineShard* shard, string_view key,\n                              CmdArgList fields) {\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  const DbContext& db_cntx = t->GetDbContext();\n  auto it_res = db_slice.FindReadOnly(db_cntx, key, OBJ_HASH);\n  RETURN_ON_BAD_STATUS(it_res);\n\n  const PrimeValue& pv = (*it_res)->second;\n  vector<long> res;\n  res.reserve(fields.size());\n\n  for (auto field : fields) {\n    int32_t exp_time = HSetFamily::FieldExpireTime(db_cntx, pv, field);\n    if (exp_time <= 0) {\n      // -3 from FieldExpireTime means field not found -> HTTL returns -2\n      // -1 means no expiry -> stays -1\n      res.push_back(exp_time == -3 ? -2 : exp_time);\n    } else {\n      res.push_back(int32_t(exp_time - MemberTimeSeconds(db_cntx.time_now_ms)));\n    }\n  }\n\n  return res;\n}\n\nvoid CmdHTtl(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (parser.HasError()) {\n    return cmd_cntx->SendError(parser.TakeError().MakeReply());\n  }\n  if (!parser.Check(\"FIELDS\"sv)) {\n    return cmd_cntx->SendError(\"Mandatory argument FIELDS is missing or not at the right position\",\n                               kSyntaxErrType);\n  }\n\n  uint32_t numFields = parser.Next<uint32_t>();\n\n  CmdArgList fields = parser.Tail();\n  if (fields.size() != numFields) {\n    return rb->SendError(\"The `numfields` parameter must match the number of arguments\",\n                         kSyntaxErrType);\n  }\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) { return OpHTtl(t, shard, key, fields); };\n  OpResult<vector<long>> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  switch (result.status()) {\n    case OpStatus::OK:\n      return rb->SendLongArr(absl::MakeConstSpan(result.value()));\n    case OpStatus::KEY_NOTFOUND:\n      return rb->SendLongArr(absl::MakeConstSpan(vector<long>(numFields, -2)));\n    default:\n      return cmd_cntx->SendError(result.status());\n  };\n}\n\nvoid CmdHGet(CmdArgList args, CommandContext* cmd_cntx) {\n  auto cb = [field = args[1]](const HMapWrap& hw) -> OpResult<string> {\n    if (auto it = hw.Find(field); it)\n      return string{it->second};\n    return OpStatus::KEY_NOTFOUND;\n  };\n\n  OpResult<string> result = ExecuteRO(cmd_cntx->tx(), cb);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  switch (result.status()) {\n    case OpStatus::OK:\n      return rb->SendBulkString(*result);\n    case OpStatus::KEY_NOTFOUND:\n      return rb->SendNull();\n    default:\n      return cmd_cntx->SendError(result.status());\n  };\n}\n\nvoid CmdHMGet(CmdArgList args, CommandContext* cmd_cntx) {\n  auto fields = args.subspan(1);\n  auto cb = [fields](const HMapWrap& hw) { return OpHMGet(hw, fields); };\n\n  OpResult<vector<OptStr>> result = ExecuteRO(cmd_cntx->tx(), cb);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  switch (result.status()) {\n    case OpStatus::OK:\n    case OpStatus::KEY_NOTFOUND: {\n      RedisReplyBuilder::ArrayScope scope{rb, fields.size()};\n      for (size_t i = 0; i < fields.size(); i++) {\n        if (result.ok() && (*result)[i].has_value())\n          rb->SendBulkString(*(*result)[i]);\n        else\n          rb->SendNull();\n      }\n    } break;\n    default:\n      cmd_cntx->SendError(result.status());\n  };\n}\n\nvoid CmdHStrLen(CmdArgList args, CommandContext* cmd_cntx) {\n  auto cb = [field = ArgS(args, 1)](const HMapWrap& hw) -> OpResult<uint32_t> {\n    if (auto it = hw.Find(field); it)\n      return it->second.length();\n    return OpStatus::KEY_NOTFOUND;\n  };\n  HSetReplies{cmd_cntx}.Send(ExecuteRO(cmd_cntx->tx(), cb));\n}\n\nvoid CmdHLen(CmdArgList args, CommandContext* cmd_cntx) {\n  auto cb = [](const HMapWrap& hw) -> OpResult<uint32_t> { return hw.Length(); };\n  HSetReplies{cmd_cntx}.Send(ExecuteRO(cmd_cntx->tx(), cb));\n}\n\nvoid CmdHExists(CmdArgList args, CommandContext* cmd_cntx) {\n  auto cb = [field = args[1]](const HMapWrap& hw) -> OpResult<uint32_t> {\n    return hw.Find(field) ? 1 : 0;\n  };\n  HSetReplies{cmd_cntx}.Send(ExecuteRO(cmd_cntx->tx(), cb));\n}\n\nvoid CmdHIncrBy(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view field = ArgS(args, 1);\n  string_view incrs = ArgS(args, 2);\n  int64_t ival = 0;\n\n  if (!absl::SimpleAtoi(incrs, &ival)) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n\n  IncrByParam param{ival};\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpIncrBy(t->GetOpArgs(shard), key, field, &param);\n  };\n\n  OpStatus status = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n\n  if (status == OpStatus::OK) {\n    cmd_cntx->SendLong(get<int64_t>(param));\n  } else {\n    switch (status) {\n      case OpStatus::INVALID_VALUE:\n        cmd_cntx->SendError(\"hash value is not an integer\");\n        break;\n      case OpStatus::OUT_OF_RANGE:\n        cmd_cntx->SendError(kIncrOverflow);\n        break;\n      default:\n        cmd_cntx->SendError(status);\n        break;\n    }\n  }\n}\n\nvoid CmdHIncrByFloat(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view field = ArgS(args, 1);\n  string_view incrs = ArgS(args, 2);\n  double dval = 0;\n\n  if (!absl::SimpleAtod(incrs, &dval)) {\n    return cmd_cntx->SendError(kInvalidFloatErr);\n  }\n\n  if (isnan(dval) || isinf(dval)) {\n    return cmd_cntx->SendError(kNanOrInfDuringIncr);\n  }\n\n  IncrByParam param{dval};\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpIncrBy(t->GetOpArgs(shard), key, field, &param);\n  };\n\n  OpStatus status = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n\n  if (status == OpStatus::OK) {\n    auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n    rb->SendDouble(get<double>(param));\n  } else {\n    switch (status) {\n      case OpStatus::INVALID_VALUE:\n        cmd_cntx->SendError(\"hash value is not a float\");\n        break;\n      default:\n        cmd_cntx->SendError(status);\n        break;\n    }\n  }\n}\n\nvoid CmdHKeys(CmdArgList args, CommandContext* cmd_cntx) {\n  HGetGeneric(args, FIELDS, cmd_cntx);\n}\n\nvoid CmdHVals(CmdArgList args, CommandContext* cmd_cntx) {\n  HGetGeneric(args, VALUES, cmd_cntx);\n}\n\nvoid CmdHGetAll(CmdArgList args, CommandContext* cmd_cntx) {\n  HGetGeneric(args, GetAllMode::FIELDS | GetAllMode::VALUES, cmd_cntx);\n}\n\nvoid CmdHScan(CmdArgList args, CommandContext* cmd_cntx) {\n  std::string_view token = ArgS(args, 1);\n  uint64_t cursor = 0;\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (!absl::SimpleAtoi(token, &cursor)) {\n    return rb->SendError(\"invalid cursor\");\n  }\n\n  // HSCAN key cursor [MATCH pattern] [COUNT count] [NOVALUES]\n  if (args.size() > 7) {\n    DVLOG(1) << \"got \" << args.size() << \" this is more than it should be\";\n    return rb->SendError(kSyntaxErr);\n  }\n\n  OpResult<ScanOpts> ops = ScanOpts::TryFrom(args.subspan(2), true);\n  if (!ops) {\n    DVLOG(1) << \"HScan invalid args - return \" << ops << \" to the user\";\n    return cmd_cntx->SendError(ops.status());\n  }\n\n  const ScanOpts& scan_op = ops.value();\n  auto cb = [&](const HMapWrap& hw) { return OpScan(hw, &cursor, scan_op); };\n\n  OpResult<StringVec> result = ExecuteRO(cmd_cntx->tx(), cb);\n  switch (result.status()) {\n    case OpStatus::KEY_NOTFOUND:\n      cursor = 0;\n      [[fallthrough]];\n    case OpStatus::OK: {\n      RedisReplyBuilder::ArrayScope scope{rb, 2};\n      rb->SendBulkString(absl::StrCat(cursor));\n      rb->SendBulkStrArr(*result);\n      break;\n    }\n    default:\n      cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid CmdHSet(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  string_view cmd{cmd_cntx->cid()->name()};\n  auto* rb = cmd_cntx->rb();\n  if (args.size() % 2 != 1) {\n    return rb->SendError(facade::WrongNumArgsError(cmd), kSyntaxErrType);\n  }\n\n  args.remove_prefix(1);\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpSet(t->GetOpArgs(shard), key, args);\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  if (result && cmd == \"HSET\") {\n    rb->SendLong(*result);\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid CmdHSetNx(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpSet(t->GetOpArgs(shard), key, args.subspan(1), OpSetParams{.skip_if_exists = true});\n  };\n  HSetReplies{cmd_cntx}.Send(cmd_cntx->tx()->ScheduleSingleHopT(cb));\n}\n\nvoid StrVecEmplaceBack(StringVec& str_vec, const listpackEntry& lp) {\n  if (lp.sval) {\n    str_vec.emplace_back(reinterpret_cast<char*>(lp.sval), lp.slen);\n    return;\n  }\n  str_vec.emplace_back(absl::StrCat(lp.lval));\n}\n\nvoid CmdHRandField(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (args.size() > 3) {\n    DVLOG(1) << \"Wrong number of command arguments: \" << args.size();\n    return rb->SendError(kSyntaxErr);\n  }\n\n  string_view key = ArgS(args, 0);\n  int32_t count;\n  bool with_values = false;\n\n  if ((args.size() > 1) && (!SimpleAtoi(ArgS(args, 1), &count))) {\n    return rb->SendError(\"count value is not an integer\", kSyntaxErrType);\n  }\n\n  if (args.size() == 3) {\n    string arg = absl::AsciiStrToUpper(ArgS(args, 2));\n    if (arg != \"WITHVALUES\")\n      return rb->SendError(kSyntaxErr);\n    else\n      with_values = true;\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) -> OpResult<StringVec> {\n    auto& db_slice = t->GetDbSlice(shard->shard_id());\n    DbContext db_context = t->GetDbContext();\n    auto it_res = db_slice.FindReadOnly(db_context, key, OBJ_HASH);\n\n    if (!it_res)\n      return it_res.status();\n\n    const PrimeValue& pv = it_res.value()->second;\n    StringVec str_vec;\n\n    if (pv.Encoding() == kEncodingStrMap2) {\n      StringMap* string_map = GetStringMap(pv, db_context);\n\n      if (args.size() == 1) {\n        auto opt_pair = string_map->RandomPair();\n        if (opt_pair.has_value()) {\n          auto [key, value] = *opt_pair;\n          str_vec.emplace_back(key, sdslen(key));\n        }\n      } else {\n        size_t actual_count =\n            (count >= 0) ? std::min(size_t(count), string_map->UpperBoundSize()) : abs(count);\n        std::vector<sds> keys, vals;\n        if (count >= 0) {\n          string_map->RandomPairsUnique(actual_count, keys, vals, with_values);\n        } else {\n          string_map->RandomPairs(actual_count, keys, vals, with_values);\n        }\n        for (size_t i = 0; i < actual_count; ++i) {\n          str_vec.emplace_back(keys[i], sdslen(keys[i]));\n          if (with_values) {\n            str_vec.emplace_back(vals[i], sdslen(vals[i]));\n          }\n        }\n      }\n\n      if (string_map->Empty()) {  // Can happen if we use a TTL on hash members.\n        auto res_it = db_slice.FindMutable(db_context, key, OBJ_HASH);\n        if (res_it) {\n          db_slice.DelMutable(db_context, std::move(*res_it));\n        }\n        return facade::OpStatus::KEY_NOTFOUND;\n      }\n    } else if (pv.Encoding() == kEncodingListPack) {\n      uint8_t* lp = (uint8_t*)pv.RObjPtr();\n      size_t lplen = lpLength(lp);\n      CHECK(lplen > 0 && lplen % 2 == 0);\n      size_t hlen = lplen / 2;\n      if (args.size() == 1) {\n        listpackEntry key;\n        lpRandomPair(lp, hlen, &key, NULL);\n        StrVecEmplaceBack(str_vec, key);\n      } else {\n        size_t actual_count = (count >= 0) ? std::min(size_t(count), hlen) : abs(count);\n        std::unique_ptr<listpackEntry[]> keys = nullptr, vals = nullptr;\n        keys = std::make_unique<listpackEntry[]>(actual_count);\n        if (with_values)\n          vals = std::make_unique<listpackEntry[]>(actual_count);\n\n        // count has been specified.\n        if (count >= 0)\n          // always returns unique entries.\n          lpRandomPairsUnique(lp, actual_count, keys.get(), vals.get());\n        else\n          // allows non-unique entries.\n          lpRandomPairs(lp, actual_count, keys.get(), vals.get());\n\n        for (size_t i = 0; i < actual_count; ++i) {\n          StrVecEmplaceBack(str_vec, keys[i]);\n          if (with_values) {\n            StrVecEmplaceBack(str_vec, vals[i]);\n          }\n        }\n      }\n    } else {\n      LOG(FATAL) << \"Invalid encoding \" << pv.Encoding();\n    }\n    return str_vec;\n  };\n\n  OpResult<StringVec> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result) {\n    if (result->size() == 1 && args.size() == 1)\n      rb->SendBulkString(result->front());\n    else if (with_values) {\n      const auto result_size = result->size();\n      DCHECK(result_size % 2 == 0)\n          << \"unexpected size of strings \" << result_size << \", expected pairs\";\n      SinkReplyBuilder::ReplyScope scope{rb};\n      const bool is_resp3 = rb->IsResp3();\n      rb->StartArray(is_resp3 ? result_size / 2 : result_size);\n      for (size_t i = 0; i < result_size; i += 2) {\n        if (is_resp3)\n          rb->StartArray(2);\n        rb->SendBulkString((*result)[i]);\n        rb->SendBulkString((*result)[i + 1]);\n      }\n    } else\n      rb->SendBulkStrArr(*result, CollectionType::ARRAY);\n  } else if (result.status() == OpStatus::KEY_NOTFOUND) {\n    if (args.size() == 1)\n      rb->SendNull();\n    else\n      rb->SendEmptyArray();\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\n}  // namespace\n\nusing CI = CommandId;\n\n#define HFUNC(x) SetHandler(&Cmd##x)\n\nvoid HSetFamily::Register(CommandRegistry* registry) {\n  registry->StartFamily(acl::HASH);\n  *registry << CI{\"HDEL\", CO::FAST | CO::JOURNALED, -3, 1, 1}.HFUNC(HDel)\n            << CI{\"HLEN\", CO::FAST | CO::READONLY, 2, 1, 1}.HFUNC(HLen)\n            << CI{\"HEXISTS\", CO::FAST | CO::READONLY, 3, 1, 1}.HFUNC(HExists)\n            << CI{\"HGET\", CO::FAST | CO::READONLY, 3, 1, 1}.HFUNC(HGet)\n            << CI{\"HGETALL\", CO::FAST | CO::READONLY, 2, 1, 1}.HFUNC(HGetAll)\n            << CI{\"HMGET\", CO::FAST | CO::READONLY, -3, 1, 1}.HFUNC(HMGet)\n            << CI{\"HMSET\", CO::JOURNALED | CO::FAST | CO::DENYOOM, -4, 1, 1}.HFUNC(HSet)\n            << CI{\"HINCRBY\", CO::JOURNALED | CO::DENYOOM | CO::FAST, 4, 1, 1}.HFUNC(HIncrBy)\n            << CI{\"HINCRBYFLOAT\", CO::JOURNALED | CO::DENYOOM | CO::FAST, 4, 1, 1}.HFUNC(\n                   HIncrByFloat)\n            << CI{\"HKEYS\", CO::READONLY, 2, 1, 1}.HFUNC(HKeys)\n            << CI{\"HEXPIRE\", CO::JOURNALED | CO::FAST | CO::DENYOOM, -5, 1, 1}.HFUNC(HExpire)\n            << CI{\"HTTL\", CO::READONLY | CO::FAST, -4, 1, 1}.HFUNC(HTtl)\n            << CI{\"HRANDFIELD\", CO::READONLY, -2, 1, 1}.HFUNC(HRandField)\n            << CI{\"HSCAN\", CO::READONLY, -3, 1, 1}.HFUNC(HScan)\n            << CI{\"HSET\", CO::JOURNALED | CO::FAST | CO::DENYOOM, -4, 1, 1}.HFUNC(HSet)\n            << CI{\"HSETEX\", CO::JOURNALED | CO::FAST | CO::DENYOOM, -5, 1, 1}.SetHandler(HSetEx)\n            << CI{\"HSETNX\", CO::JOURNALED | CO::DENYOOM | CO::FAST, 4, 1, 1}.HFUNC(HSetNx)\n            << CI{\"HSTRLEN\", CO::READONLY | CO::FAST, 3, 1, 1}.HFUNC(HStrLen)\n            << CI{\"HVALS\", CO::READONLY, 2, 1, 1}.HFUNC(HVals);\n}\n\nauto HSetFamily::LoadZiplistBlob(std::string_view blob, PrimeValue* pv) -> LoadBlobResult {\n  unsigned char* lp = lpNew(blob.size());\n  if (!ZiplistPairsConvertAndValidateIntegrity((const uint8_t*)blob.data(), blob.size(), &lp)) {\n    LOG(ERROR) << \"Hash ziplist integrity check failed.\";\n    zfree(lp);\n    return LoadBlobResult::kCorrupted;\n  }\n\n  if (lpLength(lp) == 0) {\n    lpFree(lp);\n    return LoadBlobResult::kEmpty;\n  }\n\n  if (lpBytes(lp) > server.max_listpack_map_bytes) {\n    StringMap* sm = ConvertToStrMap(lp);\n    lpFree(lp);\n    pv->InitRobj(OBJ_HASH, kEncodingStrMap2, sm);\n  } else {\n    lp = lpShrinkToFit(lp);\n    pv->InitRobj(OBJ_HASH, kEncodingListPack, lp);\n  }\n\n  return LoadBlobResult::kSuccess;\n}\n\nauto HSetFamily::LoadListpackBlob(std::string_view blob, PrimeValue* pv) -> LoadBlobResult {\n  if (!lpValidateIntegrity((uint8_t*)blob.data(), blob.size(), 0, nullptr, nullptr)) {\n    LOG(ERROR) << \"Hash listpack integrity check failed.\";\n    return LoadBlobResult::kCorrupted;\n  }\n\n  unsigned char* lp = lpNew(blob.size());\n  std::memcpy(lp, blob.data(), blob.size());\n\n  if (lpLength(lp) == 0) {\n    lpFree(lp);\n    return LoadBlobResult::kEmpty;\n  }\n\n  if (lpBytes(lp) > server.max_listpack_map_bytes) {\n    StringMap* sm = ConvertToStrMap(lp);\n    lpFree(lp);\n    pv->InitRobj(OBJ_HASH, kEncodingStrMap2, sm);\n  } else {\n    lp = lpShrinkToFit(lp);\n    pv->InitRobj(OBJ_HASH, kEncodingListPack, lp);\n  }\n\n  return LoadBlobResult::kSuccess;\n}\n\nStringMap* HSetFamily::ConvertToStrMap(uint8_t* lp) {\n  StringMap* sm = CompactObj::AllocateMR<StringMap>();\n\n  detail::ListpackWrap lw{lp};\n  sm->Reserve(lw.size());\n  for (const auto [key, value] : lw)\n    LOG_IF(ERROR, !sm->AddOrUpdate(key, value)) << \"Internal error: duplicate key \" << key;\n  return sm;\n}\n\n// returns -1 if no expiry is associated with the field, -3 if no field is found.\nint32_t HSetFamily::FieldExpireTime(const DbContext& db_context, const PrimeValue& pv,\n                                    std::string_view field) {\n  DCHECK_EQ(OBJ_HASH, pv.ObjType());\n\n  if (pv.Encoding() == kEncodingListPack) {\n    detail::ListpackWrap lw{static_cast<uint8_t*>(pv.RObjPtr())};\n    return lw.Find(field) == lw.end() ? -3 : -1;\n  } else {\n    StringMap* string_map = (StringMap*)pv.RObjPtr();\n    string_map->set_time(MemberTimeSeconds(db_context.time_now_ms));\n    auto it = string_map->Find(field);\n    if (it == string_map->end())\n      return -3;\n    return it.HasExpiry() ? it.ExpiryTime() : -1;\n  }\n}\n\n// returns vector of results for each field in values:\n// -2 if the provided key does not exist.\n// 0 if the specified NX | XX | GT | LT condition has not been met.\n// 1 if the expiration time was set/updated.\n// 2 when HEXPIRE/HPEXPIRE is called with 0 seconds and the field is deleted.\nstatic std::vector<long> UpdateTTL(facade::CmdArgList values, uint32_t ttl_sec, ExpireFlags flags,\n                                   StringMap* owner) {\n  std::vector<long> res;\n  res.reserve(values.size());\n\n  for (size_t i = 0; i < values.size(); i++) {\n    std::string_view field = facade::ToSV(values[i]);\n    auto it = owner->Find(field);\n    if (it != owner->end()) {\n      switch (flags) {\n        case ExpireFlags::EXPIRE_NX:\n          if (it.HasExpiry()) {\n            res.emplace_back(0);\n            continue;\n          }\n          break;\n        case ExpireFlags::EXPIRE_XX:\n          if (!it.HasExpiry()) {\n            res.emplace_back(0);\n            continue;\n          }\n          break;\n        case ExpireFlags::EXPIRE_GT:\n          if (it.ExpiryTime() - owner->time_now() >= ttl_sec) {\n            res.emplace_back(0);\n            continue;\n          }\n          break;\n        case ExpireFlags::EXPIRE_LT:\n          if (it.ExpiryTime() - owner->time_now() <= ttl_sec) {\n            res.emplace_back(0);\n            continue;\n          }\n          break;\n        case ExpireFlags::EXPIRE_ALWAYS:\n          break;\n      }\n      if (ttl_sec == 0) {\n        owner->Erase(field);\n        res.emplace_back(2);\n      } else {\n        it.SetExpiryTime(ttl_sec);\n        res.emplace_back(1);\n      }\n    } else {\n      res.emplace_back(-2);\n    }\n  }\n\n  return res;\n}\n\nvector<long> HSetFamily::SetFieldsExpireTime(const OpArgs& op_args, uint32_t ttl_sec,\n                                             ExpireFlags flags, string_view key, CmdArgList values,\n                                             PrimeValue* pv) {\n  DCHECK_EQ(OBJ_HASH, pv->ObjType());\n  op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, *pv);\n\n  if (pv->Encoding() == kEncodingListPack) {\n    // a valid result can never be a listpack, since it doesnt keep ttl\n    uint8_t* lp = (uint8_t*)pv->RObjPtr();\n    StringMap* sm = HSetFamily::ConvertToStrMap(lp);\n    pv->InitRobj(OBJ_HASH, kEncodingStrMap2, sm);\n  }\n\n  // This needs to be explicitly fetched again since the pv might have changed.\n  StringMap* sm = container_utils::GetStringMap(*pv, op_args.db_cntx);\n  vector<long> res = UpdateTTL(values, ttl_sec, flags, sm);\n  op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, pv);\n  return res;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/hset_family.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <optional>\n\n#include \"facade/op_status.h\"\n#include \"server/common.h\"\n#include \"server/table.h\"\nnamespace dfly {\n\nclass StringMap;\n\nusing facade::OpResult;\nusing facade::OpStatus;\n\nclass HSetFamily {\n public:\n  static void Register(CommandRegistry* registry);\n\n  static LoadBlobResult LoadZiplistBlob(std::string_view blob, PrimeValue* pv);\n  static LoadBlobResult LoadListpackBlob(std::string_view blob, PrimeValue* pv);\n\n  // Does not free lp.\n  static StringMap* ConvertToStrMap(uint8_t* lp);\n\n  static int32_t FieldExpireTime(const DbContext& db_context, const PrimeValue& pv,\n                                 std::string_view field);\n\n  static std::vector<long> SetFieldsExpireTime(const OpArgs& op_args, uint32_t ttl_sec,\n                                               ExpireFlags flags, std::string_view key,\n                                               CmdArgList values, PrimeValue* pv);\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/hset_family_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/hset_family.h\"\n\n#include <absl/cleanup/cleanup.h>\n\n#include <tuple>\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n#include \"redis/sds.h\"\n}\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/detail/gen_utils.h\"\n#include \"facade/facade_test.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\nusing namespace facade;\n\nnamespace dfly {\n\nclass HSetFamilyTest : public BaseFamilyTest {\n protected:\n};\n\nclass HestFamilyTestProtocolVersioned : public HSetFamilyTest,\n                                        public ::testing::WithParamInterface<string> {\n protected:\n};\n\nINSTANTIATE_TEST_SUITE_P(HestFamilyTestProtocolVersioned, HestFamilyTestProtocolVersioned,\n                         ::testing::Values(\"2\", \"3\"));\n\nTEST_F(HSetFamilyTest, Basic) {\n  auto resp = Run({\"hset\", \"x\", \"a\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number\"));\n\n  EXPECT_THAT(Run({\"HSET\", \"hs\", \"key1\", \"val1\", \"key2\"}), ErrArg(\"wrong number\"));\n\n  EXPECT_EQ(1, CheckedInt({\"hset\", \"x\", \"a\", \"b\"}));\n  EXPECT_EQ(1, CheckedInt({\"hlen\", \"x\"}));\n\n  EXPECT_EQ(1, CheckedInt({\"hexists\", \"x\", \"a\"}));\n  EXPECT_EQ(0, CheckedInt({\"hexists\", \"x\", \"b\"}));\n  EXPECT_EQ(0, CheckedInt({\"hexists\", \"y\", \"a\"}));\n\n  EXPECT_EQ(0, CheckedInt({\"hset\", \"x\", \"a\", \"b\"}));\n  EXPECT_EQ(0, CheckedInt({\"hset\", \"x\", \"a\", \"c\"}));\n  EXPECT_EQ(0, CheckedInt({\"hset\", \"x\", \"a\", \"\"}));\n\n  EXPECT_EQ(2, CheckedInt({\"hset\", \"y\", \"a\", \"c\", \"d\", \"e\"}));\n  EXPECT_EQ(2, CheckedInt({\"hdel\", \"y\", \"a\", \"d\"}));\n\n  EXPECT_THAT(Run({\"hdel\", \"nokey\", \"a\"}), IntArg(0));\n}\n\nTEST_F(HSetFamilyTest, HSet) {\n  // Simulate HSET on mirror map\n  {\n    absl::flat_hash_map<string, string> mirror;  // mirror\n\n    // Generate HSET commands and check how many new entries were added\n    absl::InsecureBitGen gen{};\n    while (mirror.size() < 600) {\n      vector<string> cmd = {\"HSET\", \"hash\"};\n      size_t new_values = 0;\n      for (int i = 0; i < 20; i++) {\n        string key = GetRandomHex(gen, 3);\n        string value = GetRandomHex(gen, 20, 10);\n        new_values += mirror.contains(key) ? 0 : 1;\n        mirror[key] = value;\n\n        cmd.emplace_back(key);\n        cmd.emplace_back(value);\n      }\n\n      EXPECT_THAT(Run(cmd), IntArg(new_values));\n    }\n\n    // Verify consistency\n    EXPECT_THAT(Run({\"HLEN\", \"hash\"}), IntArg(mirror.size()));\n    for (const auto& [key, value] : mirror)\n      EXPECT_EQ(Run({\"HGET\", \"hash\", key}), mirror[key]);\n  }\n\n  // HSet with same key twice\n  Run({\"HSET\", \"hash\", \"key1\", \"value1\", \"key1\", \"value2\"});\n  EXPECT_EQ(Run({\"HGET\", \"hash\", \"key1\"}), \"value2\");\n\n  // Wrong value cases\n  EXPECT_THAT(Run({\"HSET\", \"key\"}), ErrArg(\"wrong number of arguments\"));\n  EXPECT_THAT(Run({\"HSET\", \"key\", \"key\"}), ErrArg(\"wrong number of arguments\"));\n  EXPECT_THAT(Run({\"HSET\", \"key\", \"key\", \"value\", \"key2\"}), ErrArg(\"wrong number of arguments\"));\n}\n\nTEST_F(HSetFamilyTest, HSetNX) {\n  // Should create new field\n  EXPECT_THAT(Run({\"HSETNX\", \"hash\", \"key1\", \"value1\"}), IntArg(1));\n  EXPECT_EQ(Run({\"HGET\", \"hash\", \"key1\"}), \"value1\");\n\n  // Should not overwrite\n  EXPECT_THAT(Run({\"HSETNX\", \"hash\", \"key1\", \"value2\"}), IntArg(0));\n  EXPECT_EQ(Run({\"HGET\", \"hash\", \"key1\"}), \"value1\");\n\n  // Wrong value cases\n  EXPECT_THAT(Run({\"HSETNX\", \"key\"}), ErrArg(\"wrong number of arguments\"));\n  EXPECT_THAT(Run({\"HSET\", \"key\", \"key\"}), ErrArg(\"wrong number of arguments\"));\n}\n\n// Listpack handles integers separately, so create a mix of different types\nTEST_F(HSetFamilyTest, MixedTypes) {\n  absl::flat_hash_set<string> str_keys, int_keys;\n  for (int i = 0; i < 100; i++) {\n    auto key1 = absl::StrCat(\"s\", i);\n    auto key2 = absl::StrCat(\"i\", i);\n    Run({\"HSET\", \"hash\", key1, \"VALUE\", key2, \"123456\"});\n    str_keys.emplace(key1);\n    int_keys.emplace(key2);\n  }\n\n  for (string_view key : str_keys)\n    EXPECT_EQ(Run({{\"HGET\", \"hash\", key}}), \"VALUE\");\n\n  for (string_view key : int_keys) {\n    EXPECT_EQ(Run({{\"HGET\", \"hash\", key}}), \"123456\");\n    EXPECT_EQ(CheckedInt({\"hincrby\", \"hash\", key, \"1\"}), 123456 + 1);\n  }\n}\n\nTEST_P(HestFamilyTestProtocolVersioned, Get) {\n  auto resp = Run({\"hello\", GetParam()});\n  EXPECT_THAT(resp.GetVec()[6], \"proto\");\n  EXPECT_THAT(resp.GetVec()[7], IntArg(atoi(GetParam().c_str())));\n\n  resp = Run({\"hset\", \"x\", \"a\", \"1\", \"b\", \"2\", \"c\", \"3\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"hmget\", \"unkwn\", \"a\", \"c\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(ArgType(RespExpr::NIL), ArgType(RespExpr::NIL)));\n\n  resp = Run({\"hkeys\", \"x\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"a\", \"b\", \"c\"));\n\n  resp = Run({\"hvals\", \"x\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"1\", \"2\", \"3\"));\n\n  resp = Run({\"hmget\", \"x\", \"a\", \"c\", \"d\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"1\", \"3\", ArgType(RespExpr::NIL)));\n\n  resp = Run({\"hmget\", \"x\", \"a\", \"c\", \"d\", \"d\", \"c\", \"a\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(\"1\", \"3\", ArgType(RespExpr::NIL), ArgType(RespExpr::NIL), \"3\", \"1\"));\n\n  resp = Run({\"hgetall\", \"x\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1\", \"b\", \"2\", \"c\", \"3\"));\n}\n\nTEST_F(HSetFamilyTest, HIncrBy) {\n  int total = 10;\n  // Check new field is created\n  EXPECT_EQ(CheckedInt({\"hincrby\", \"key\", \"field\", \"10\"}), 10);\n  EXPECT_EQ(Run({\"hget\", \"key\", \"field\"}), \"10\");\n  // Simulate multiple additions\n  for (int i = -100; i < 100; i += 7) {\n    total += i;\n    EXPECT_EQ(CheckedInt({\"hincrby\", \"key\", \"field\", to_string(i)}), total);\n  }\n\n  // Overflow\n  Run({\"hset\", \"key\", \"field2\", to_string(numeric_limits<int64_t>::max() - 1)});\n  EXPECT_THAT(Run({\"hincrby\", \"key\", \"field2\", \"2\"}), ErrArg(\"would overflow\"));\n\n  // Error case\n  Run({\"hset\", \"key\", \"a\", \" 1\"});\n  auto resp = Run({\"hincrby\", \"key\", \"a\", \"10\"});\n  EXPECT_THAT(resp, ErrArg(\"hash value is not an integer\"));\n}\n\nTEST_F(HSetFamilyTest, HIncrRespected) {\n  Run({\"hset\", \"key\", \"a\", \"1\"});\n  EXPECT_EQ(11, CheckedInt({\"hincrby\", \"key\", \"a\", \"10\"}));\n  EXPECT_EQ(11, CheckedInt({\"hget\", \"key\", \"a\"}));\n}\n\nTEST_F(HSetFamilyTest, HIncrCmdsPreserveTtl) {\n  Run({\"hsetex\", \"key\", \"5\", \"a\", \"1\"});\n  EXPECT_EQ(5, CheckedInt({\"fieldttl\", \"key\", \"a\"}));\n  EXPECT_EQ(2, CheckedInt({\"hincrby\", \"key\", \"a\", \"1\"}));\n  EXPECT_EQ(5, CheckedInt({\"fieldttl\", \"key\", \"a\"}));\n\n  // If the field has already expired by the time hincrby runs, the TTL is default\n  AdvanceTime(5 * 1000);\n  EXPECT_EQ(1, CheckedInt({\"hincrby\", \"key\", \"a\", \"1\"}));\n  EXPECT_EQ(-1, CheckedInt({\"fieldttl\", \"key\", \"a\"}));\n\n  Run({\"hsetex\", \"key\", \"5\", \"fl\", \"1.1\"});\n  EXPECT_EQ(5, CheckedInt({\"fieldttl\", \"key\", \"fl\"}));\n  EXPECT_EQ(\"2.2\", Run({\"hincrbyfloat\", \"key\", \"fl\", \"1.1\"}));\n}\n\nTEST_F(HSetFamilyTest, HScan) {\n  auto resp = Run(\"hscan non-existing-key 100 count 5\");\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(ArgType(RespExpr::STRING), ArgType(RespExpr::ARRAY)));\n  EXPECT_EQ(ToSV(resp.GetVec()[0].GetBuf()), \"0\");\n  EXPECT_EQ(StrArray(resp.GetVec()[1]).size(), 0);\n\n  for (int i = 0; i < 10; i++) {\n    Run({\"HSET\", \"myhash\", absl::StrCat(\"Field-\", i), absl::StrCat(\"Value-\", i)});\n  }\n\n  // Note that even though this limit by 4, it would return more because\n  // all fields are on listpack\n  resp = Run({\"hscan\", \"myhash\", \"0\", \"count\", \"4\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  auto vec = StrArray(resp.GetVec()[1]);\n  EXPECT_EQ(vec.size(), 20);\n  EXPECT_THAT(vec, Each(AnyOf(StartsWith(\"Field\"), StartsWith(\"Value\"))));\n\n  // Now run with filter on the results - we are expecting to not getting\n  // any result at this point\n  resp = Run({\"hscan\", \"myhash\", \"0\", \"match\", \"*x*\"});  // nothing should match this\n  EXPECT_THAT(resp, ArrLen(2));\n  vec = StrArray(resp.GetVec()[1]);\n  EXPECT_EQ(vec.size(), 0);\n\n  // now we will do a positive match - anything that has 1 on it\n  resp = Run({\"hscan\", \"myhash\", \"0\", \"match\", \"*1*\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  vec = StrArray(resp.GetVec()[1]);\n  EXPECT_EQ(vec.size(), 2);  // key/value = 2\n\n  // Test with large hash to see that count limit the number of entries\n  for (int i = 0; i < 200; i++) {\n    Run({\"HSET\", \"largehash\", absl::StrCat(\"KeyNum-\", i), absl::StrCat(\"KeyValue-\", i)});\n  }\n  resp = Run({\"hscan\", \"largehash\", \"0\", \"count\", \"20\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  vec = StrArray(resp.GetVec()[1]);\n\n  // See https://redis.io/commands/scan/ --> \"The COUNT option\", for why this cannot be exact\n  EXPECT_GE(vec.size(), 40);  // This should be larger than (20 * 2) and less than about 50\n  EXPECT_LT(vec.size(), 60);\n\n  // Test NOVALUES option on 'myhash' (which has 10 items)\n  resp = Run({\"hscan\", \"myhash\", \"0\", \"NOVALUES\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  vec = StrArray(resp.GetVec()[1]);\n  EXPECT_EQ(vec.size(), 10);\n  EXPECT_THAT(vec, Each(StartsWith(\"Field\")));  // Should contain \"Field-X\", but never \"Value-X\"\n}\n\n// Verifies that the NOVALUES flag functions correctly when combined with other arguments\n// like MATCH and COUNT, ensuring values are suppressed even during filtered or limited scans.\nTEST_F(HSetFamilyTest, HScan_NoValuesCombinations) {\n  Run({\"HSET\", \"h_combos\", \"user:1\", \"v1\", \"user:2\", \"v2\", \"admin:1\", \"v3\"});\n\n  // case 1: MATCH + NOVALUES\n  // We want only keys starting with \"user*\", and NO values.\n  auto resp = Run({\"HSCAN\", \"h_combos\", \"0\", \"MATCH\", \"user:*\", \"NOVALUES\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  auto vec = StrArray(resp.GetVec()[1]);\n\n  // Should find: \"user:1\", \"user:2\" (2 items)\n  // Should NOT find: \"admin:1\" (filtered out)\n  // Should NOT find: \"v1\", \"v2\" (values suppressed)\n  EXPECT_EQ(vec.size(), 2);\n  EXPECT_THAT(vec, UnorderedElementsAre(\"user:1\", \"user:2\"));\n\n  // case 2: COUNT + NOVALUES\n  // Populate a larger hash to force scanning behavior, verify no values and only key present\n  for (int i = 0; i < 50; ++i) {\n    Run({\"HSET\", \"h_large\", absl::StrCat(\"k\", i), \"v\"});\n  }\n  resp = Run({\"HSCAN\", \"h_large\", \"0\", \"COUNT\", \"10\", \"NOVALUES\"});\n  vec = StrArray(resp.GetVec()[1]);\n  EXPECT_GT(vec.size(), 0);\n  EXPECT_THAT(vec, Not(Contains(\"v\")));\n  EXPECT_THAT(vec, Each(StartsWith(\"k\")));\n}\n\nTEST_F(HSetFamilyTest, HScanLpMatchBug) {\n  Run({\"HSET\", \"key\", \"1\", \"2\"});\n  auto resp = Run({\"hscan\", \"key\", \"0\", \"match\", \"1\"});\n  EXPECT_THAT(resp, ArrLen(2));\n}\n\nTEST_F(HSetFamilyTest, HincrbyFloat) {\n  Run({\"hincrbyfloat\", \"k\", \"a\", \"1.5\"});\n  EXPECT_EQ(Run({\"hget\", \"k\", \"a\"}), \"1.5\");\n\n  Run({\"hincrbyfloat\", \"k\", \"a\", \"1.5\"});\n  EXPECT_EQ(Run({\"hget\", \"k\", \"a\"}), \"3\");\n\n  for (size_t i = 0; i < 500; ++i) {\n    Run({\"hincrbyfloat\", \"k\", absl::StrCat(\"v\", i), \"1.5\"});\n  }\n\n  for (size_t i = 0; i < 500; ++i) {\n    EXPECT_EQ(Run({\"hget\", \"k\", absl::StrCat(\"v\", i)}), \"1.5\");\n  }\n}\n\nTEST_F(HSetFamilyTest, HincrbyFloatCornerCases) {\n  Run({\"hset\", \"k\", \"mhv\", \"-1.8E+308\", \"phv\", \"1.8E+308\", \"nd\", \"-+-inf\", \"+inf\", \"+inf\", \"nan\",\n       \"nan\", \"-inf\", \"-inf\"});\n  // we don't support long doubles, so in all next cases we should return errors\n  EXPECT_THAT(Run({\"hincrbyfloat\", \"k\", \"mhv\", \"-1\"}), ErrArg(\"ERR hash value is not a float\"));\n  EXPECT_THAT(Run({\"hincrbyfloat\", \"k\", \"phv\", \"1\"}), ErrArg(\"ERR hash value is not a float\"));\n  EXPECT_THAT(Run({\"hincrbyfloat\", \"k\", \"nd\", \"1\"}), ErrArg(\"ERR hash value is not a float\"));\n  EXPECT_THAT(Run({\"hincrbyfloat\", \"k\", \"+inf\", \"1\"}),\n              ErrArg(\"increment would produce NaN or Infinity\"));\n  EXPECT_THAT(Run({\"hincrbyfloat\", \"k\", \"nan\", \"1\"}), ErrArg(\"ERR hash value is not a float\"));\n  EXPECT_THAT(Run({\"hincrbyfloat\", \"k\", \"-inf\", \"1\"}),\n              ErrArg(\"increment would produce NaN or Infinity\"));\n}\n\nTEST_F(HSetFamilyTest, HRandFloat) {\n  Run({\"HSET\", \"k\", \"1\", \"2\"});\n\n  EXPECT_EQ(Run({\"hrandfield\", \"k\"}), \"1\");\n\n  for (size_t i = 0; i < 500; ++i) {\n    Run({\"hincrbyfloat\", \"k\", absl::StrCat(\"v\", i), \"1.1\"});\n  }\n\n  Run({\"hrandfield\", \"k\"});\n}\n\nTEST_F(HSetFamilyTest, HRandField) {\n  // exercise Redis' listpack encoding\n  Run({\"HSET\", \"k\", \"a\", \"0\", \"b\", \"1\", \"c\", \"2\"});\n\n  EXPECT_THAT(Run({\"hrandfield\", \"k\"}), AnyOf(\"a\", \"b\", \"c\"));\n\n  EXPECT_THAT(Run({\"hrandfield\", \"k\", \"2\"}).GetVec(), IsSubsetOf({\"a\", \"b\", \"c\"}));\n\n  EXPECT_THAT(Run({\"hrandfield\", \"k\", \"3\"}).GetVec(), UnorderedElementsAre(\"a\", \"b\", \"c\"));\n\n  EXPECT_THAT(Run({\"hrandfield\", \"k\", \"4\"}).GetVec(), UnorderedElementsAre(\"a\", \"b\", \"c\"));\n\n  auto resp = Run({\"hrandfield\", \"k\", \"4\", \"withvalues\"});\n  EXPECT_THAT(resp, ArrLen(6));\n  auto vec = resp.GetVec();\n\n  std::vector<RespExpr> k, v;\n  for (unsigned int i = 0; i < vec.size(); ++i) {\n    if (i % 2 == 1)\n      v.push_back(vec[i]);\n    else\n      k.push_back(vec[i]);\n  }\n\n  EXPECT_THAT(v, UnorderedElementsAre(\"0\", \"1\", \"2\"));\n  EXPECT_THAT(k, UnorderedElementsAre(\"a\", \"b\", \"c\"));\n\n  resp = Run({\"hrandfield\", \"k\", \"-4\", \"withvalues\"});\n  EXPECT_THAT(resp, ArrLen(8));\n  vec = resp.GetVec();\n  k.clear();\n  v.clear();\n  for (unsigned int i = 0; i < vec.size(); ++i) {\n    if (i % 2 == 0) {\n      if (vec[i] == \"a\")\n        EXPECT_EQ(vec[i + 1], \"0\");\n      else if (vec[i] == \"b\")\n        EXPECT_EQ(vec[i + 1], \"1\");\n      else if (vec[i] == \"c\")\n        EXPECT_EQ(vec[i + 1], \"2\");\n      else\n        ADD_FAILURE();\n    }\n  }\n\n  // exercise Dragonfly's string map encoding\n  int num_entries = 500;\n  for (int i = 0; i < num_entries; i++) {\n    Run({\"HSET\", \"largehash\", std::to_string(i), std::to_string(i * 10)});\n  }\n\n  resp = Run({\"hrandfield\", \"largehash\"});\n  EXPECT_LE(stoi(resp.GetString()), num_entries - 1);\n  EXPECT_GE(stoi(resp.GetString()), 0);\n\n  resp = Run({\"hrandfield\", \"largehash\", std::to_string(num_entries / 2)});\n  vec = resp.GetVec();\n  std::vector<std::string> string_vec;\n  for (auto v : vec) {\n    string_vec.push_back(v.GetString());\n  }\n\n  sort(string_vec.begin(), string_vec.end());\n  auto it = std::unique(string_vec.begin(), string_vec.end());\n  bool is_unique = (it == string_vec.end());\n  EXPECT_TRUE(is_unique);\n\n  for (const auto& str : string_vec) {\n    EXPECT_LE(stoi(str), num_entries - 1);\n    EXPECT_GE(stoi(str), 0);\n  }\n\n  resp = Run({\"hrandfield\", \"largehash\", std::to_string(num_entries * -1 - 1)});\n  EXPECT_THAT(resp, ArrLen(num_entries + 1));\n  vec = resp.GetVec();\n\n  string_vec.clear();\n  for (auto v : vec) {\n    string_vec.push_back(v.GetString());\n    int i = stoi(v.GetString());\n    EXPECT_LE(i, num_entries - 1);\n    EXPECT_GE(i, 0);\n  }\n\n  sort(string_vec.begin(), string_vec.end());\n  it = std::unique(string_vec.begin(), string_vec.end());\n  is_unique = (it == string_vec.end());\n  EXPECT_FALSE(is_unique);\n\n  resp = Run({\"hrandfield\", \"largehash\", std::to_string(num_entries * -1 - 1), \"withvalues\"});\n  EXPECT_THAT(resp, ArrLen((num_entries + 1) * 2));\n  vec = resp.GetVec();\n\n  string_vec.clear();\n  for (unsigned int i = 0; i < vec.size(); ++i) {\n    if (i % 2 == 0) {\n      int k = stoi(vec[i].GetString());\n      EXPECT_LE(k, num_entries - 1);\n      EXPECT_GE(k, 0);\n      int v = stoi(vec[i + 1].GetString());\n      EXPECT_EQ(v, k * 10);\n      string_vec.push_back(vec[i].GetString());\n    }\n  }\n\n  sort(string_vec.begin(), string_vec.end());\n  it = std::unique(string_vec.begin(), string_vec.end());\n  is_unique = (it == string_vec.end());\n  EXPECT_FALSE(is_unique);\n}\n\nTEST_F(HSetFamilyTest, HSetEx) {\n  TEST_current_time_ms = kMemberExpiryBase * 1000;  // to reset to test time.\n\n  auto resp = Run({\"HSETEX\", \"k\", \"1\", \"f\", \"v\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  AdvanceTime(500);\n  EXPECT_THAT(Run({\"HGET\", \"k\", \"f\"}), \"v\");\n\n  AdvanceTime(500);\n  EXPECT_THAT(Run({\"HGET\", \"k\", \"f\"}), ArgType(RespExpr::NIL));\n\n  const std::string_view long_time = \"100\"sv;\n\n  resp = Run({\"HSETEX\", \"k\", long_time, \"field1\", \"value\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"HSETEX\", \"k\", long_time, \"field1\", \"new_value\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"HGET\", \"k\", \"field1\"});\n  EXPECT_THAT(resp, \"new_value\");  // HSETEX without NX option; value was replaced by new_value\n\n  resp = Run({\"HSETEX\", \"k\", long_time, \"field2\", \"value\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"HSETEX\", \"k\", \"NX\", long_time, \"field2\", \"new_value\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"HGET\", \"k\", \"field2\"});\n  EXPECT_THAT(resp, \"value\");  // HSETEX with NX option; value was NOT replaced by new_value\n\n  const std::string_view short_time = \"1\"sv;\n\n  resp = Run({\"HSETEX\", \"k\", long_time, \"field3\", \"value\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"HSETEX\", \"k\", short_time, \"field3\", \"value\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  AdvanceTime(1000);\n  resp = Run({\"HGET\", \"k\", \"field3\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n  // HSETEX without NX option; old expiration time was replaced by a new one\n\n  resp = Run({\"HSETEX\", \"k\", long_time, \"field4\", \"value\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"HSETEX\", \"k\", \"NX\", short_time, \"field4\", \"value\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  AdvanceTime(1100);\n  resp = Run({\"HGET\", \"k\", \"field4\"});\n  EXPECT_THAT(resp,\n              \"value\");  // HSETEX with NX option; old expiration time was NOT replaced by a new one\n\n  // KEEPTTL related asserts\n  EXPECT_THAT(Run({\"HSETEX\", \"k\", long_time, \"kttlfield\", \"value\"}), IntArg(1));\n  EXPECT_EQ(Run({\"HGET\", \"k\", \"kttlfield\"}), \"value\");\n  EXPECT_EQ(CheckedInt({\"FIELDTTL\", \"k\", \"kttlfield\"}), 100);\n\n  // KEEPTTL resets value of kttlfield, but preserves its TTL. afield is added with TTL=1\n  EXPECT_THAT(Run({\"HSETEX\", \"k\", \"KEEPTTL\", \"1\", \"kttlfield\", \"resetvalue\", \"afield\", \"aval\"}),\n              IntArg(1));\n  EXPECT_EQ(CheckedInt({\"FIELDTTL\", \"k\", \"kttlfield\"}), 100);\n  EXPECT_EQ(Run({\"FIELDTTL\", \"k\", \"afield\"}).GetInt(), 1);\n  EXPECT_EQ(Run({\"HGET\", \"k\", \"afield\"}), \"aval\");\n  // make afield expire\n  AdvanceTime(1000);\n  EXPECT_THAT(Run({\"HGET\", \"k\", \"afield\"}), ArgType(RespExpr::NIL));\n\n  // kttlfield is still present although with updated value\n  EXPECT_EQ(Run({\"HGET\", \"k\", \"kttlfield\"}), \"resetvalue\");\n  EXPECT_EQ(Run({\"FIELDTTL\", \"k\", \"kttlfield\"}).GetInt(), 99);\n\n  // If NX is supplied, with or without KEEPTTL neither expiry nor value is updated\n  EXPECT_THAT(Run({\"HSETEX\", \"k\", \"NX\", \"KEEPTTL\", \"1\", \"kttlfield\", \"value\"}), IntArg(0));\n\n  // No updates\n  EXPECT_EQ(Run({\"HGET\", \"k\", \"kttlfield\"}), \"resetvalue\");\n  EXPECT_EQ(Run({\"FIELDTTL\", \"k\", \"kttlfield\"}).GetInt(), 99);\n\n  EXPECT_THAT(Run({\"HSETEX\", \"k\", \"NX\", \"1\", \"kttlfield\", \"value\"}), IntArg(0));\n  // No updates\n  EXPECT_EQ(Run({\"HGET\", \"k\", \"kttlfield\"}), \"resetvalue\");\n  EXPECT_EQ(Run({\"FIELDTTL\", \"k\", \"kttlfield\"}).GetInt(), 99);\n\n  // Invalid TTL handling\n  EXPECT_THAT(Run({\"HSETEX\", \"k\", \"NX\", \"zero\", \"kttlfield\", \"value\"}),\n              ErrArg(\"ERR value is not an integer or out of range\"));\n\n  // Exercise the code path where a field is added without TTL, but then we set a new expiration AND\n  // provide KEEPTTL. Since there was no old expiry, the new TTL should be applied.\n  EXPECT_EQ(Run({\"HSET\", \"k\", \"nottl\", \"val\"}), 1);\n  EXPECT_EQ(Run({\"HSETEX\", \"k\", \"KEEPTTL\", long_time, \"nottl\", \"newval\"}), 0);\n  EXPECT_EQ(Run({\"FIELDTTL\", \"k\", \"nottl\"}).GetInt(), 100);\n\n  EXPECT_THAT(Run({\"HSETEX\", \"k\", \"NX\", \"KEEPTTL\", \"NX\", \"1\", \"v\", \"v2\"}),\n              ErrArg(\"ERR wrong number of arguments for 'hsetex' command\"));\n  EXPECT_THAT(Run({\"HSETEX\", \"k\", \"KEEPTTL\", \"KEEPTTL\", \"1\", \"v\", \"v2\"}),\n              ErrArg(\"ERR wrong number of arguments for 'hsetex' command\"));\n}\n\nTEST_F(HSetFamilyTest, TriggerConvertToStrMap) {\n  const int kElements = 200;\n  // Enough for IsGoodForListpack to become false\n  for (size_t i = 0; i < kElements; i++) {\n    auto k = absl::StrCat(100500700u + i);\n    Run({\"HSET\", \"hk\", k, \"100500700\"});\n  }\n  EXPECT_THAT(Run({\"HLEN\", \"hk\"}), IntArg(kElements));\n}\n\nTEST_F(HSetFamilyTest, Issue1140) {\n  Run({\"HSET\", \"CaseKey\", \"Foo\", \"Bar\"});\n\n  EXPECT_EQ(\"Bar\", Run({\"HGET\", \"CaseKey\", \"Foo\"}));\n}\n\nTEST_F(HSetFamilyTest, Issue2102) {\n  // Set key with element that will expire after 1s\n  EXPECT_EQ(CheckedInt({\"HSETEX\", \"key\", \"10\", \"k1\", \"v1\"}), 1);\n  AdvanceTime(10'000);\n  EXPECT_THAT(Run({\"HGETALL\", \"key\"}), RespArray(ElementsAre()));\n}\n\nTEST_F(HSetFamilyTest, HExpire) {\n  EXPECT_EQ(CheckedInt({\"HSET\", \"key\", \"k0\", \"v0\", \"k1\", \"v1\", \"k2\", \"v2\"}), 3);\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key\", \"10\", \"FIELDS\", \"3\", \"k0\", \"k1\", \"k2\"}),\n              RespArray(ElementsAre(IntArg(1), IntArg(1), IntArg(1))));\n  AdvanceTime(10'000);\n  EXPECT_THAT(Run({\"HGETALL\", \"key\"}), RespArray(ElementsAre()));\n\n  EXPECT_EQ(CheckedInt({\"HSETEX\", \"key2\", \"60\", \"k0\", \"v0\", \"k1\", \"v2\"}), 2);\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key2\", \"10\", \"FIELDS\", \"2\", \"k0\", \"k1\"}),\n              RespArray(ElementsAre(IntArg(1), IntArg(1))));\n  AdvanceTime(10'000);\n  EXPECT_THAT(Run({\"HGETALL\", \"key2\"}), RespArray(ElementsAre()));\n\n  EXPECT_EQ(CheckedInt({\"HSET\", \"key3\", \"k0\", \"v0\", \"k1\", \"v1\", \"k2\", \"v2\", \"k3\", \"v3\", \"k4\", \"v4\",\n                        \"k5\", \"v5\"}),\n            6);\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"10\", \"XX\", \"FIELDS\", \"1\", \"k0\"}), IntArg(0));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"10\", \"NX\", \"FIELDS\", \"1\", \"k0\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"10\", \"NX\", \"FIELDS\", \"1\", \"k0\"}), IntArg(0));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"10\", \"XX\", \"FIELDS\", \"1\", \"k0\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"10\", \"NX\", \"FIELDS\", \"3\", \"k1\", \"k2\", \"k3\"}),\n              RespArray(ElementsAre(IntArg(1), IntArg(1), IntArg(1))));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"8\", \"GT\", \"FIELDS\", \"1\", \"k2\"}), IntArg(0));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"12\", \"GT\", \"FIELDS\", \"1\", \"k2\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"8\", \"LT\", \"FIELDS\", \"1\", \"k3\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"12\", \"LT\", \"FIELDS\", \"1\", \"k3\"}), IntArg(0));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"10\", \"GT\", \"FIELDS\", \"1\", \"k4\"}), IntArg(0));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"10\", \"LT\", \"FIELDS\", \"1\", \"k5\"}), IntArg(1));\n  AdvanceTime(8'000);\n  EXPECT_THAT(\n      Run({\"HGETALL\", \"key3\"}),\n      RespArray(UnorderedElementsAre(\"k0\", \"v0\", \"k1\", \"v1\", \"k2\", \"v2\", \"k4\", \"v4\", \"k5\", \"v5\")));\n  AdvanceTime(2'000);\n  EXPECT_THAT(Run({\"HGETALL\", \"key3\"}), RespArray(UnorderedElementsAre(\"k2\", \"v2\", \"k4\", \"v4\")));\n  AdvanceTime(2'000);\n  EXPECT_THAT(Run({\"HGETALL\", \"key3\"}), RespArray(ElementsAre(\"k4\", \"v4\")));\n\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"10\", \"FIELDS\", \"1\", \"k4\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key3\", \"0\", \"XX\", \"FIELDS\", \"1\", \"k4\"}), IntArg(2));\n  EXPECT_THAT(Run({\"HGETALL\", \"key3\"}), RespArray(ElementsAre()));\n\n  EXPECT_EQ(\n      CheckedInt({\"HSET\", \"key4\", \"k0\", \"v0\", \"k1\", \"v1\", \"k2\", \"v2\", \"k3\", \"v3\", \"k4\", \"v4\"}), 5);\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key4\", \"0\", \"NX\", \"FIELDS\", \"2\", \"k0\", \"k1\"}),\n              RespElementsAre(IntArg(2), IntArg(2)));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key4\", \"0\", \"LT\", \"FIELDS\", \"2\", \"k2\", \"k3\"}),\n              RespElementsAre(IntArg(2), IntArg(2)));\n\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key4\", \"0\", \"XX\", \"FIELDS\", \"1\", \"k4\"}), IntArg(0));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key4\", \"10\", \"NX\", \"FIELDS\", \"1\", \"k4\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key4\", \"0\", \"NX\", \"FIELDS\", \"1\", \"k4\"}), IntArg(0));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key4\", \"0\", \"GT\", \"FIELDS\", \"1\", \"k4\"}), IntArg(0));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key4\", \"0\", \"FIELDS\", \"1\", \"k4\"}), IntArg(2));\n  EXPECT_THAT(Run({\"HGETALL\", \"key4\"}), RespArray(ElementsAre()));\n}\n\nTEST_F(HSetFamilyTest, HExpireNoExpireEarly) {\n  EXPECT_EQ(CheckedInt({\"HSET\", \"key\", \"k0\", \"v0\", \"k1\", \"v1\"}), 2);\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key\", \"10\", \"FIELDS\", \"2\", \"k0\", \"k1\"}),\n              RespArray(ElementsAre(IntArg(1), IntArg(1))));\n  AdvanceTime(9'000);\n  EXPECT_THAT(Run({\"HGETALL\", \"key\"}), RespArray(UnorderedElementsAre(\"k0\", \"v0\", \"k1\", \"v1\")));\n}\n\nTEST_F(HSetFamilyTest, HExpireNoSuchField) {\n  EXPECT_EQ(CheckedInt({\"HSET\", \"key\", \"k0\", \"v0\"}), 1);\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key\", \"10\", \"FIELDS\", \"2\", \"k0\", \"k1\"}),\n              RespArray(ElementsAre(IntArg(1), IntArg(-2))));\n}\n\nTEST_F(HSetFamilyTest, HExpireNoSuchKey) {\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key\", \"10\", \"FIELDS\", \"2\", \"k0\", \"k1\"}),\n              RespArray(ElementsAre(IntArg(-2), IntArg(-2))));\n}\n\nTEST_F(HSetFamilyTest, HExpireNoAddNew) {\n  Run({\"HEXPIRE\", \"key\", \"10\", \"FIELDS\", \"1\", \"k0\"});\n  EXPECT_THAT(Run({\"HGETALL\", \"key\"}), RespArray(ElementsAre()));\n}\n\nTEST_F(HSetFamilyTest, HExpireWithNullChar) {\n  string val_with_null(\"test\\0test\", 9);\n  Run({\"HSET\", \"hash\", \"field\", val_with_null});\n  string expected_val(\"test\\0test\", 9);\n  EXPECT_EQ(ToSV(Run({\"HGET\", \"hash\", \"field\"}).GetBuf()), expected_val);\n  Run({\"HEXPIRE\", \"hash\", \"15\", \"FIELDS\", \"1\", \"field\"});\n  EXPECT_EQ(ToSV(Run({\"HGET\", \"hash\", \"field\"}).GetBuf()), expected_val);\n}\n\nTEST_F(HSetFamilyTest, HTtl) {\n  // Non-existent key returns -2 for all fields\n  EXPECT_THAT(Run({\"HTTL\", \"nokey\", \"FIELDS\", \"2\", \"f1\", \"f2\"}),\n              RespArray(ElementsAre(IntArg(-2), IntArg(-2))));\n\n  // Fields without TTL return -1, non-existent fields return -2\n  EXPECT_EQ(CheckedInt({\"HSET\", \"key\", \"k0\", \"v0\", \"k1\", \"v1\"}), 2);\n  EXPECT_THAT(Run({\"HTTL\", \"key\", \"FIELDS\", \"3\", \"k0\", \"k1\", \"nosuch\"}),\n              RespArray(ElementsAre(IntArg(-1), IntArg(-1), IntArg(-2))));\n\n  // Set expiry and verify TTL\n  EXPECT_THAT(Run({\"HEXPIRE\", \"key\", \"10\", \"FIELDS\", \"1\", \"k0\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HTTL\", \"key\", \"FIELDS\", \"2\", \"k0\", \"k1\"}),\n              RespArray(ElementsAre(IntArg(10), IntArg(-1))));\n\n  // Advance time and verify TTL decreases\n  AdvanceTime(3000);\n  EXPECT_THAT(Run({\"HTTL\", \"key\", \"FIELDS\", \"1\", \"k0\"}), IntArg(7));\n\n  // Wrong type\n  Run({\"SET\", \"strkey\", \"val\"});\n  EXPECT_THAT(Run({\"HTTL\", \"strkey\", \"FIELDS\", \"1\", \"f\"}), ErrArg(\"WRONGTYPE\"));\n\n  // Syntax errors\n  EXPECT_THAT(Run({\"HTTL\", \"key\", \"1\", \"k0\"}), ErrArg(\"Mandatory argument FIELDS\"));\n  EXPECT_THAT(Run({\"HTTL\", \"key\", \"FIELDS\", \"2\", \"k0\"}), ErrArg(\"numfields\"));\n}\n\nTEST_F(HSetFamilyTest, RandomFieldAllExpired) {\n  for (int i = 0; i < 10; ++i) {\n    EXPECT_EQ(CheckedInt({\"HSETEX\", \"key\", \"10\", absl::StrCat(\"k\", i), \"v\"}), 1);\n  }\n  AdvanceTime(10'000);\n  EXPECT_THAT(Run({\"HRANDFIELD\", \"key\"}), ArgType(RespExpr::NIL));\n}\n\nTEST_F(HSetFamilyTest, RandomField1NotExpired) {\n  for (int i = 0; i < 10; ++i) {\n    EXPECT_EQ(CheckedInt({\"HSETEX\", \"key\", \"10\", absl::StrCat(\"k\", i), \"v\"}), 1);\n  }\n  EXPECT_EQ(CheckedInt({\"HSET\", \"key\", \"keep\", \"v\"}), 1);\n\n  AdvanceTime(10'000);\n  EXPECT_THAT(Run({\"HRANDFIELD\", \"key\"}), \"keep\");\n}\n\nTEST_F(HSetFamilyTest, EmptyHashBug) {\n  EXPECT_THAT(Run({\"HSET\", \"foo\", \"a_field\", \"a_value\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSETEX\", \"foo\", \"1\", \"b_field\", \"b_value\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HDEL\", \"foo\", \"a_field\"}), IntArg(1));\n\n  AdvanceTime(4000);\n\n  EXPECT_THAT(Run({\"HGETALL\", \"foo\"}), RespArray(ElementsAre()));\n  EXPECT_THAT(Run({\"EXISTS\", \"foo\"}), IntArg(0));\n}\n\nTEST_F(HSetFamilyTest, ScanAfterExpireSet) {\n  EXPECT_THAT(Run({\"HSET\", \"aset\", \"afield\", \"avalue\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HEXPIRE\", \"aset\", \"1\", \"FIELDS\", \"1\", \"afield\"}), IntArg(1));\n\n  const auto resp = Run({\"HSCAN\", \"aset\", \"0\", \"count\", \"100\"});\n  EXPECT_THAT(resp, ArrLen(2));\n\n  const auto vec = StrArray(resp.GetVec()[1]);\n  EXPECT_EQ(vec.size(), 2);\n\n  EXPECT_THAT(vec, Contains(\"afield\").Times(1));\n  EXPECT_THAT(vec, Contains(\"avalue\").Times(1));\n}\n\nTEST_F(HSetFamilyTest, KeyRemovedWhenEmpty) {\n  auto test_cmd = [&](const std::function<void()>& f, const std::string_view tag) {\n    EXPECT_THAT(Run({\"HSET\", \"a\", \"afield\", \"avalue\"}), IntArg(1));\n    EXPECT_THAT(Run({\"HEXPIRE\", \"a\", \"1\", \"FIELDS\", \"1\", \"afield\"}), IntArg(1));\n    AdvanceTime(1000);\n\n    EXPECT_THAT(Run({\"EXISTS\", \"a\"}), IntArg(1));\n    f();\n    EXPECT_THAT(Run({\"EXISTS\", \"a\"}), IntArg(0)) << \"failed when testing \" << tag;\n  };\n\n  test_cmd([&] { EXPECT_THAT(Run({\"HGET\", \"a\", \"afield\"}), ArgType(RespExpr::NIL)); }, \"HGET\");\n  test_cmd([&] { EXPECT_THAT(Run({\"HGETALL\", \"a\"}), RespArray(ElementsAre())); }, \"HGETALL\");\n  test_cmd([&] { EXPECT_THAT(Run({\"HDEL\", \"a\", \"afield\"}), IntArg(0)); }, \"HDEL\");\n  test_cmd([&] { EXPECT_THAT(Run({\"HSCAN\", \"a\", \"0\"}).GetVec()[0], \"0\"); }, \"HSCAN\");\n  test_cmd([&] { EXPECT_THAT(Run({\"HMGET\", \"a\", \"afield\"}), ArgType(RespExpr::NIL)); }, \"HMGET\");\n  test_cmd([&] { EXPECT_THAT(Run({\"HEXISTS\", \"a\", \"afield\"}), IntArg(0)); }, \"HEXISTS\");\n  test_cmd([&] { EXPECT_THAT(Run({\"HSTRLEN\", \"a\", \"afield\"}), IntArg(0)); }, \"HSTRLEN\");\n}\n\nTEST_F(HSetFamilyTest, HRandFieldRespFormat) {\n  absl::flat_hash_map<std::string, std::string> expected{\n      {\"a\", \"1\"},\n      {\"b\", \"2\"},\n      {\"c\", \"3\"},\n  };\n  Run({\"HELLO\", \"3\"});\n  EXPECT_THAT(Run({\"HSET\", \"key\", \"a\", \"1\", \"b\", \"2\", \"c\", \"3\"}), IntArg(3));\n  auto resp = Run({\"HRANDFIELD\", \"key\", \"3\", \"WITHVALUES\"});\n  EXPECT_THAT(resp, ArrLen(3));\n  for (const auto& v : resp.GetVec()) {\n    EXPECT_THAT(v, ArrLen(2));\n    const auto& kv = v.GetVec();\n    EXPECT_THAT(kv[0], AnyOf(\"a\", \"b\", \"c\"));\n    EXPECT_THAT(kv[1], expected[kv[0].GetView()]);\n  }\n\n  Run({\"HELLO\", \"2\"});\n  resp = Run({\"HRANDFIELD\", \"key\", \"3\", \"WITHVALUES\"});\n  EXPECT_THAT(resp, ArrLen(6));\n  const auto& vec = resp.GetVec();\n  for (size_t i = 0; i < vec.size(); i += 2) {\n    EXPECT_THAT(vec[i], AnyOf(\"a\", \"b\", \"c\"));\n    EXPECT_THAT(vec[i + 1], expected[vec[i].GetView()]);\n  }\n}\n\n// Make sure no \"Zombie Key\": HEXPIRE with TTL 0 must delete the key\n// if the hash becomes empty. If the key remains (zombie), saving the RDB or running\n// commands like EXISTS against it may lead to crashes or other incorrect behavior.\nTEST_F(HSetFamilyTest, HExpireZeroTTL_DeletesKey) {\n  constexpr auto kRdbFile = \"zombie_test.rdb\";\n  auto cleanup = absl::MakeCleanup([kRdbFile] { std::ignore = remove(kRdbFile); });\n  Run({\"HSET\", \"zombie\", \"f\", \"v\"});\n  auto resp = Run({\"HEXPIRE\", \"zombie\", \"0\", \"FIELDS\", \"1\", \"f\"});\n  EXPECT_THAT(resp, IntArg(2));\n  EXPECT_EQ(0, CheckedInt({\"EXISTS\", \"zombie\"}));\n  EXPECT_EQ(Run({\"SAVE\", \"RDB\", kRdbFile}), \"OK\");\n}\n\n// HINCRBYFLOAT with NaN on a non-existing key must not create a zombie empty hash.\n// Before the fix, the key was left in the DB with an empty listpack, causing HRANDFIELD\n// to crash with CHECK(lplen > 0 && lplen % 2 == 0).\nTEST_F(HSetFamilyTest, HIncrByFloatNaNDoesNotCreateKey) {\n  EXPECT_THAT(Run({\"HINCRBYFLOAT\", \"key\", \"field\", \"nan\"}),\n              ErrArg(\"increment would produce NaN or Infinity\"));\n  EXPECT_EQ(0, CheckedInt({\"EXISTS\", \"key\"}));\n  EXPECT_THAT(Run({\"HRANDFIELD\", \"key\"}), ArgType(RespExpr::NIL));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/http_api.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/http_api.h\"\n\n#include \"base/logging.h\"\n#include \"core/flatbuffers.h\"\n#include \"facade/conn_context.h\"\n#include \"facade/reply_capture.h\"\n#include \"server/conn_context.h\"\n#include \"server/main_service.h\"\n#include \"util/http/http_common.h\"\n\nnamespace dfly {\nusing namespace util;\nusing namespace std;\nnamespace h2 = boost::beast::http;\nnamespace payload = facade::payload;\nnamespace {\n\nbool IsVectorOfStrings(flexbuffers::Reference req) {\n  if (!req.IsVector()) {\n    return false;\n  }\n\n  auto vec = req.AsVector();\n  if (vec.size() == 0) {\n    return false;\n  }\n\n  for (size_t i = 0; i < vec.size(); ++i) {\n    if (!vec[i].IsString()) {\n      return false;\n    }\n  }\n  return true;\n}\n\n// Escape a string so that it is legal to print it in JSON text.\nstd::string JsonEscape(string_view input) {\n  auto hex_digit = [](unsigned c) -> char {\n    DCHECK_LT(c, 0xFu);\n    return c < 10 ? c + '0' : c - 10 + 'a';\n  };\n\n  string out;\n  out.reserve(input.size() + 2);\n  out.push_back('\\\"');\n\n  auto p = input.begin();\n  auto e = input.end();\n\n  while (p < e) {\n    uint8_t c = *p;\n    if (c == '\\\\' || c == '\\\"') {\n      out.push_back('\\\\');\n      out.push_back(*p++);\n    } else if (c <= 0x1f) {\n      switch (c) {\n        case '\\b':\n          out.append(\"\\\\b\");\n          p++;\n          break;\n        case '\\f':\n          out.append(\"\\\\f\");\n          p++;\n          break;\n        case '\\n':\n          out.append(\"\\\\n\");\n          p++;\n          break;\n        case '\\r':\n          out.append(\"\\\\r\");\n          p++;\n          break;\n        case '\\t':\n          out.append(\"\\\\t\");\n          p++;\n          break;\n        default:\n          // this condition captures non readable chars with value < 32,\n          // so size = 1 byte (e.g control chars).\n          out.append(\"\\\\u00\");\n          out.push_back(hex_digit((c & 0xf0) >> 4));\n          out.push_back(hex_digit(c & 0xf));\n          p++;\n      }\n    } else {\n      out.push_back(*p++);\n    }\n  }\n\n  out.push_back('\\\"');\n  return out;\n}\n\nstruct CaptureVisitor {\n  CaptureVisitor() {\n    str = R\"({\"result\":)\";\n  }\n\n  void operator()(monostate) {\n  }\n\n  void operator()(long v) {\n    absl::StrAppend(&str, v);\n  }\n\n  void operator()(double v) {\n    absl::StrAppend(&str, v);\n  }\n\n  void operator()(const payload::SimpleString& ss) {\n    absl::StrAppend(&str, \"\\\"\", ss, \"\\\"\");\n  }\n\n  void operator()(const payload::BulkString& bs) {\n    absl::StrAppend(&str, JsonEscape(bs));\n  }\n\n  void operator()(payload::Null) {\n    absl::StrAppend(&str, \"null\");\n  }\n\n  void operator()(const payload::Error& err) {\n    str = absl::StrCat(R\"({\"error\": \")\", err->first, \"\\\"\");\n  }\n\n  void operator()(facade::OpStatus status) {\n    absl::StrAppend(&str, \"\\\"\", facade::StatusToMsg(status), \"\\\"\");\n  }\n\n  void operator()(unique_ptr<payload::CollectionPayload> cp) {\n    if (!cp) {\n      absl::StrAppend(&str, \"null\");\n      return;\n    }\n    if (cp->len == 0 && cp->type == facade::CollectionType::ARRAY) {\n      absl::StrAppend(&str, \"[]\");\n      return;\n    }\n    absl::StrAppend(&str, \"[\");\n    bool append_delimiter = false;\n    for (auto& pl : cp->arr) {\n      if (append_delimiter) {\n        absl::StrAppend(&str, \",\");\n      }\n      append_delimiter = true;\n      visit(*this, std::move(pl));\n    }\n    absl::StrAppend(&str, \"]\");\n  }\n  string str;\n};\n\n}  // namespace\n\nvoid HttpAPI(const http::QueryArgs& args, HttpRequest&& req, Service* service,\n             HttpContext* http_cntx) {\n  auto& body = req.body();\n\n  flexbuffers::Builder fbb;\n  flatbuffers::Parser parser;\n  flexbuffers::Reference doc;\n  bool success = parser.ParseFlexBuffer(body.c_str(), nullptr, &fbb);\n  if (success) {\n    fbb.Finish();\n    doc = flexbuffers::GetRoot(fbb.GetBuffer());\n    if (!IsVectorOfStrings(doc)) {\n      success = false;\n    }\n  }\n\n  // TODO: to add a content-type/json check.\n  if (!success) {\n    VLOG(1) << \"Invalid body \" << body;\n    auto response = http::MakeStringResponse(h2::status::bad_request);\n    http::SetMime(http::kTextMime, &response);\n    response.body() = \"Failed to parse json\\r\\n\";\n    http_cntx->Invoke(std::move(response));\n    return;\n  }\n\n  flexbuffers::Vector vec = doc.AsVector();\n\n  facade::ConnectionContext* context = (facade::ConnectionContext*)http_cntx->user_data();\n  DCHECK(context);\n\n  facade::CapturingReplyBuilder reply_builder;\n\n  // TODO: to finish this.\n\n  CommandContext cmd_cntx;\n\n  cmd_cntx.Init(&reply_builder, context);\n  for (size_t i = 0; i < vec.size(); ++i) {\n    cmd_cntx.PushArg(vec[i].AsString().c_str());\n  }\n  service->DispatchCommand(facade::ParsedArgs{cmd_cntx}, &cmd_cntx,\n                           facade::AsyncPreference::ONLY_SYNC);\n  facade::CapturingReplyBuilder::Payload payload = reply_builder.Take();\n\n  auto response = http::MakeStringResponse();\n  http::SetMime(http::kJsonMime, &response);\n\n  CaptureVisitor visitor;\n  std::visit(visitor, std::move(payload));\n  visitor.str.append(\"}\\r\\n\");\n  response.body() = visitor.str;\n  http_cntx->Invoke(std::move(response));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/http_api.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"util/http/http_handler.h\"\n\nnamespace dfly {\nclass Service;\nusing HttpRequest = util::HttpListenerBase::RequestType;\n\n/**\n * @brief The main handler function for dispatching commands via HTTP.\n *\n * @param args - query arguments. currently not used.\n * @param req  - full http request including the body that should consist of a json array\n *               representing a Dragonfly command. aka `[\"set\", \"foo\", \"bar\"]`\n * @param service - a pointer to dfly::Service* object.\n * @param http_cntxt - a pointer to the http context object which provide dragonfly context\n *                     information via user_data() and allows to reply with HTTP responses.\n */\nvoid HttpAPI(const util::http::QueryArgs& args, HttpRequest&& req, Service* service,\n             util::HttpContext* http_cntxt);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/CMakeLists.txt",
    "content": "SET(DF_JOURNAL_SRCS\n    journal/cmd_serializer.cc journal/tx_executor.cc namespaces.cc\n    journal/journal.cc journal/types.cc journal/journal_slice.cc\n    journal/serializer.cc journal/executor.cc journal/streamer.cc\n    PARENT_SCOPE)\n"
  },
  {
    "path": "src/server/journal/cmd_serializer.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/journal/cmd_serializer.h\"\n\n#include \"server/container_utils.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard.h\"\n#include \"server/journal/serializer.h\"\n#include \"server/rdb_save.h\"\n#include \"server/tiered_storage.h\"\n\nnamespace dfly {\n\nnamespace {\nusing namespace std;\n\nclass CommandAggregator {\n public:\n  using WriteCmdCallback = std::function<void(absl::Span<const string_view>)>;\n\n  CommandAggregator(string_view key, WriteCmdCallback cb, size_t max_agg_bytes)\n      : key_(key), cb_(std::move(cb)), max_aggragation_bytes_(max_agg_bytes) {\n  }\n\n  ~CommandAggregator() {\n    CommitPending();\n  }\n\n  enum class CommitMode : uint8_t { kAuto, kNoCommit };\n\n  // Returns whether CommitPending() was called\n  bool AddArg(string arg, CommitMode commit_mode = CommitMode::kAuto) {\n    agg_bytes_ += arg.size();\n    members_.push_back(std::move(arg));\n\n    if (commit_mode != CommitMode::kNoCommit && agg_bytes_ >= max_aggragation_bytes_) {\n      CommitPending();\n      return true;\n    }\n\n    return false;\n  }\n\n private:\n  void CommitPending() {\n    if (members_.empty()) {\n      return;\n    }\n\n    args_.clear();\n    args_.reserve(members_.size() + 1);\n    args_.push_back(key_);\n    for (string_view member : members_) {\n      args_.push_back(member);\n    }\n    cb_(args_);\n    members_.clear();\n  }\n\n  string_view key_;\n  WriteCmdCallback cb_;\n  vector<string> members_;\n  absl::InlinedVector<string_view, 5> args_;\n  size_t agg_bytes_ = 0;\n  size_t max_aggragation_bytes_;\n};\n\n}  // namespace\n\nCmdSerializer::CmdSerializer(DbSlice* db_slice, FlushSerialized cb,\n                             size_t max_serialization_buffer_size)\n    : db_slice_(db_slice),\n      cb_(std::move(cb)),\n      max_serialization_buffer_size_(max_serialization_buffer_size) {\n  serializer_ = std::make_unique<RdbSerializer>(GetDefaultCompressionMode());\n}\n\nsize_t CmdSerializer::SerializeEntry(string_view key, const PrimeKey& pk, const PrimeValue& pv,\n                                     uint64_t expire_ms) {\n  // We send RESTORE commands objects we don't support breaking.\n  bool use_restore_serialization = true;\n  size_t commands = 1;\n  switch (pv.ObjType()) {\n    case OBJ_SET:\n      commands = SerializeSet(key, pv);\n      use_restore_serialization = false;\n      break;\n    case OBJ_ZSET:\n      commands = SerializeZSet(key, pv);\n      use_restore_serialization = false;\n      break;\n    case OBJ_HASH:\n      commands = SerializeHash(key, pv);\n      use_restore_serialization = false;\n      break;\n    case OBJ_LIST:\n      commands = SerializeList(key, pv);\n      use_restore_serialization = false;\n      break;\n    case OBJ_STRING:\n      commands = SerializeString(key, pv, expire_ms);\n      use_restore_serialization = false;\n      // reset expire_ms to skip it in SerializeExpireIfNeeded\n      expire_ms = 0;\n      break;\n    case OBJ_STREAM:\n    case OBJ_JSON:\n    case OBJ_SBF:\n    default:\n      // These types are unsupported wrt splitting huge values to multiple commands, so we send\n      // them as a RESTORE command.\n      break;\n  }\n\n  if (use_restore_serialization) {\n    // RESTORE sets STICK and EXPIRE as part of the command.\n    SerializeRestore(key, pk, pv, expire_ms);\n  } else {\n    SerializeStickIfNeeded(key, pk);\n    SerializeExpireIfNeeded(key, expire_ms);\n  }\n  return commands;\n}\n\nsize_t CmdSerializer::SerializeDelayedEntries(bool force,\n                                              absl::flat_hash_set<std::string>* tiered_keys) {\n  size_t serialized = 0;\n  for (auto it = delayed_entries_.begin(); it != delayed_entries_.end();) {\n    auto& entry = it->second;\n    // Skip unresolved entries unless force is true\n    if (!force && !entry->value.IsResolved()) {\n      ++it;\n      continue;\n    }\n\n    // If tiered_keys filter is provided, only serialize matching keys\n    // Compare the string key from the map with the keys in tiered_keys set\n    if (tiered_keys && !tiered_keys->contains(it->first)) {\n      ++it;\n      continue;\n    }\n\n    // Get the value from the future (blocks if not resolved and force=true)\n    auto res = entry->value.Get();\n    if (!res.has_value()) {\n      LOG(ERROR) << \"Failed to read delayed entry for key \" << entry->key.ToString();\n      it++;\n      continue;\n    }\n\n    // Serialize the entry and remove it from delayed_entries_\n    PrimeValue pv{*res};\n    serialized += SerializeEntry(entry->key.ToString(), entry->key, pv, entry->expire);\n    delayed_entries_.erase(it++);\n  }\n  return serialized;\n}\n\nvoid CmdSerializer::SerializeCommand(string_view cmd, absl::Span<const string_view> args) {\n  journal::Entry entry(0,                     // txid\n                       journal::Op::COMMAND,  // single command\n                       0,                     // db index\n                       0,                     // slot-id, but it is ignored at this level\n                       journal::Entry::Payload(cmd, ArgSlice(args)));\n\n  // Serialize into a string\n  io::StringSink cmd_sink;\n  JournalWriter writer{&cmd_sink};\n  writer.Write(entry);\n\n  cb_(std::move(cmd_sink).str());\n}\n\nvoid CmdSerializer::SerializeStickIfNeeded(string_view key, const PrimeKey& pk) {\n  if (!pk.IsSticky()) {\n    return;\n  }\n\n  SerializeCommand(\"STICK\", {key});\n}\n\nvoid CmdSerializer::SerializeExpireIfNeeded(string_view key, uint64_t expire_ms) {\n  if (expire_ms == 0) {\n    return;\n  }\n\n  SerializeCommand(\"PEXPIREAT\", {key, absl::StrCat(expire_ms)});\n}\n\nsize_t CmdSerializer::SerializeSet(string_view key, const PrimeValue& pv) {\n  CommandAggregator aggregator(\n      key, [&](absl::Span<const string_view> args) { SerializeCommand(\"SADD\", args); },\n      max_serialization_buffer_size_);\n\n  size_t commands = 0;\n  container_utils::IterateSet(pv, [&](container_utils::ContainerEntry ce) {\n    commands += aggregator.AddArg(ce.ToString());\n    return true;\n  });\n  return commands;\n}\n\nsize_t CmdSerializer::SerializeZSet(string_view key, const PrimeValue& pv) {\n  CommandAggregator aggregator(\n      key, [&](absl::Span<const string_view> args) { SerializeCommand(\"ZADD\", args); },\n      max_serialization_buffer_size_);\n\n  size_t commands = 0;\n  container_utils::IterateSortedSet(\n      pv,\n      [&](container_utils::ContainerEntry ce, double score) {\n        aggregator.AddArg(absl::StrCat(score), CommandAggregator::CommitMode::kNoCommit);\n        commands += aggregator.AddArg(ce.ToString());\n        return true;\n      },\n      /*start=*/0, /*end=*/SIZE_MAX, /*reverse=*/false, /*use_score=*/true);\n  return commands;\n}\n\nsize_t CmdSerializer::SerializeHash(string_view key, const PrimeValue& pv) {\n  CommandAggregator aggregator(\n      key, [&](absl::Span<const string_view> args) { SerializeCommand(\"HSET\", args); },\n      max_serialization_buffer_size_);\n\n  size_t commands = 0;\n  container_utils::IterateMap(\n      pv, [&](container_utils::ContainerEntry k, container_utils::ContainerEntry v) {\n        aggregator.AddArg(k.ToString(), CommandAggregator::CommitMode::kNoCommit);\n        commands += aggregator.AddArg(v.ToString());\n        return true;\n      });\n  return commands;\n}\n\nsize_t CmdSerializer::SerializeList(string_view key, const PrimeValue& pv) {\n  CommandAggregator aggregator(\n      key, [&](absl::Span<const string_view> args) { SerializeCommand(\"RPUSH\", args); },\n      max_serialization_buffer_size_);\n\n  size_t commands = 0;\n  container_utils::IterateList(pv, [&](container_utils::ContainerEntry ce) {\n    commands += aggregator.AddArg(ce.ToString());\n    return true;\n  });\n  return commands;\n}\n\nsize_t CmdSerializer::SerializeString(string_view key, const PrimeValue& pv, uint64_t expire_ms) {\n  string str;\n  if (pv.IsExternal()) {\n    if (pv.IsCool()) {\n      pv.GetCool().record->value.GetString(&str);\n    } else {\n      SerializeExternal(key, pv, expire_ms);\n      return 0;\n    }\n  } else {\n    pv.GetString(&str);\n  }\n\n  if (expire_ms) {\n    std::string expire_ms_str = to_string(expire_ms);\n    std::string_view args[] = {key, string_view(str), \"PXAT\", string_view(expire_ms_str)};\n    SerializeCommand(\"SET\", args);\n  } else {\n    std::string_view args[] = {key, string_view(str)};\n    SerializeCommand(\"SET\", args);\n  }\n\n  return 1;\n}\n\nvoid CmdSerializer::SerializeRestore(string_view key, const PrimeKey& pk, const PrimeValue& pv,\n                                     uint64_t expire_ms) {\n  absl::InlinedVector<string_view, 5> args;\n  args.push_back(key);\n\n  string expire_str = absl::StrCat(expire_ms);\n  args.push_back(expire_str);\n\n  // TODO we already ignore CRC in the load rdb code during migration, we need to provide ignore_crc\n  // = true when we are sure that all shards ignore crc during migration process\n  std::string value_dump = RdbSerializerBase::DumpValue(serializer_.get(), pv, false);\n  args.push_back(value_dump);\n\n  args.push_back(\"ABSTTL\");  // Means expire string is since epoch\n\n  if (pk.IsSticky()) {\n    args.push_back(\"STICK\");\n  }\n\n  SerializeCommand(\"RESTORE\", args);\n}\n\nvoid CmdSerializer::SerializeExternal(std::string_view key, const PrimeValue& pv,\n                                      time_t expire_time) {\n  // In cluster mode, db_id is always 0\n  constexpr DbIndex kClusterDbId = 0;\n  auto future = ReadTieredString(kClusterDbId, key, pv, EngineShard::tlocal()->tiered_storage());\n  PrimeKey prime_key{key};\n  uint32_t mc_flags = pv.HasFlag() ? db_slice_->GetMCFlag(kClusterDbId, prime_key) : 0;\n  auto entry = std::make_unique<TieredDelayedEntry>(kClusterDbId, std::move(prime_key),\n                                                    std::move(future), expire_time, mc_flags);\n  delayed_entries_.emplace(key, std::move(entry));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/cmd_serializer.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/types/span.h>\n\n#include <string>\n#include <string_view>\n\n#include \"server/table.h\"\n#include \"server/tiered_storage.h\"\n#include \"server/tx_base.h\"\n\nnamespace dfly {\n\nclass RdbSerializer;\n\n// CmdSerializer serializes DB entries (key+value) into command(s) in RESP format string.\n// Small entries are serialized as RESTORE commands, while bigger ones (see\n// serialization_max_chunk_size) are split into multiple commands (like rpush, hset, etc).\n// Expiration and stickiness are also serialized into commands.\nclass CmdSerializer {\n public:\n  using FlushSerialized = std::function<void(std::string)>;\n\n  explicit CmdSerializer(DbSlice* db_slice, FlushSerialized cb,\n                         size_t max_serialization_buffer_size);\n\n  // Returns how many commands we broke this entry into (like multiple HSETs etc)\n  size_t SerializeEntry(std::string_view key, const PrimeKey& pk, const PrimeValue& pv,\n                        uint64_t expire_ms);\n\n  // Serialize delayed entries. If force is true, blocks until all are resolved.\n  // If force is false, only serializes entries whose futures are already resolved.\n  // If tiered_keys is provided, only serializes entries whose keys are in the set.\n  size_t SerializeDelayedEntries(bool force, absl::flat_hash_set<std::string>* tiered_keys);\n\n private:\n  void SerializeCommand(std::string_view cmd, absl::Span<const std::string_view> args);\n  void SerializeStickIfNeeded(std::string_view key, const PrimeKey& pk);\n  void SerializeExpireIfNeeded(std::string_view key, uint64_t expire_ms);\n\n  size_t SerializeSet(std::string_view key, const PrimeValue& pv);\n  size_t SerializeZSet(std::string_view key, const PrimeValue& pv);\n  size_t SerializeHash(std::string_view key, const PrimeValue& pv);\n  size_t SerializeList(std::string_view key, const PrimeValue& pv);\n  size_t SerializeString(std::string_view key, const PrimeValue& pv, uint64_t expire_ms);\n  void SerializeRestore(std::string_view key, const PrimeKey& pk, const PrimeValue& pv,\n                        uint64_t expire_ms);\n  void SerializeExternal(std::string_view key, const PrimeValue& pv, time_t expire_time);\n\n  DbSlice* db_slice_;\n  FlushSerialized cb_;\n  size_t max_serialization_buffer_size_;\n  std::unique_ptr<RdbSerializer> serializer_;\n  absl::flat_hash_map<std::string, std::unique_ptr<TieredDelayedEntry>> delayed_entries_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/executor.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/journal/executor.h\"\n\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_split.h>\n\n#include <algorithm>\n#include <memory>\n\n#include \"base/logging.h\"\n#include \"facade/reply_capture.h\"\n#include \"facade/service_interface.h\"\n#include \"server/main_service.h\"\n#include \"server/namespaces.h\"\n\nusing namespace std;\n\nnamespace dfly {\n\nnamespace {\n// Build a CmdData from parts passed to absl::StrCat.\ntemplate <typename... Ts> void BuildFromParts(cmn::BackedArguments* dest, Ts... parts) {\n  vector<string> raw_parts{absl::StrCat(std::forward<Ts>(parts))...};\n\n  dest->Assign(raw_parts.begin(), raw_parts.end(), raw_parts.size());\n}\n\n}  // namespace\n\nJournalExecutor::JournalExecutor(Service* service)\n    : service_{service},\n      reply_builder_{new facade::CapturingReplyBuilder{facade::ReplyMode::NONE}},\n      conn_context_{nullptr, acl::UserCredentials{}} {\n  conn_context_.is_replicating = true;\n  conn_context_.journal_emulated = true;\n  conn_context_.skip_acl_validation = true;\n  conn_context_.ns = &namespaces->GetDefaultNamespace();\n}\n\nJournalExecutor::~JournalExecutor() {\n}\n\nfacade::DispatchResult JournalExecutor::Execute(DbIndex dbid, journal::ParsedEntry::CmdData& cmd) {\n  SelectDb(dbid);\n  CommandContext cntx_cmd;\n  cntx_cmd.Init(reply_builder_.get(), &conn_context_);\n\n  // TODO: we should improve interfaces in callers (replica and rdb_load) so that we pass\n  // CommandContext directly and avoid this swap.\n  cntx_cmd.SwapArgs(cmd);\n  return Execute(&cntx_cmd);\n}\n\nvoid JournalExecutor::FlushAll() {\n  CommandContext cmd;\n  cmd.Init(reply_builder_.get(), &conn_context_);\n  BuildFromParts(&cmd, \"FLUSHALL\");\n  std::ignore = Execute(&cmd);\n}\n\nvoid JournalExecutor::FlushSlots(const cluster::SlotRange& slot_range) {\n  CommandContext cmd;\n  cmd.Init(reply_builder_.get(), &conn_context_);\n  BuildFromParts(&cmd, \"DFLYCLUSTER\", \"FLUSHSLOTS\", slot_range.start, slot_range.end);\n  std::ignore = Execute(&cmd);\n}\n\nfacade::DispatchResult JournalExecutor::Execute(CommandContext* cmd_cntx) {\n  return service_->DispatchCommand(facade::ParsedArgs{*cmd_cntx}, cmd_cntx,\n                                   facade::AsyncPreference::ONLY_SYNC);\n}\n\nvoid JournalExecutor::SelectDb(DbIndex dbid) {\n  if (ensured_dbs_.size() <= dbid)\n    ensured_dbs_.resize(dbid + 1);\n\n  if (!ensured_dbs_[dbid]) {\n    CommandContext cmd;\n\n    cmd.Init(reply_builder_.get(), &conn_context_);\n    BuildFromParts(&cmd, \"SELECT\", dbid);\n    std::ignore = Execute(&cmd);\n    ensured_dbs_[dbid] = true;\n\n    // TODO: This is a temporary fix for #4146.\n    // For some reason without this the replication breaks in regtests.\n    auto cb = [](EngineShard* shard) { return OpStatus::OK; };\n    shard_set->RunBriefInParallel(std::move(cb));\n  } else {\n    conn_context_.conn_state.db_index = dbid;\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/executor.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/types/span.h>\n\n#include \"facade/service_interface.h\"\n#include \"server/cluster/cluster_defs.h\"\n#include \"server/conn_context.h\"\n#include \"server/journal/types.h\"\n\nnamespace facade {\nclass CapturingReplyBuilder;\n}  // namespace facade\n\nnamespace dfly {\n\nclass Service;\n\n// JournalExecutor allows executing journal entries.\nclass JournalExecutor {\n public:\n  explicit JournalExecutor(Service* service);\n  ~JournalExecutor();\n\n  JournalExecutor(JournalExecutor&&) = delete;\n\n  // Returns the result of Service::DispatchCommand\n  facade::DispatchResult Execute(DbIndex dbid, journal::ParsedEntry::CmdData& cmd);\n\n  void FlushAll();  // Execute FLUSHALL.\n  void FlushSlots(const cluster::SlotRange& slot_range);\n\n  ConnectionContext* connection_context() {\n    return &conn_context_;\n  }\n\n private:\n  facade::DispatchResult Execute(CommandContext* cmd_cntx);\n\n  // Select database. Ensure it exists if accessed for first time.\n  void SelectDb(DbIndex dbid);\n\n  Service* service_;\n  std::unique_ptr<facade::CapturingReplyBuilder> reply_builder_;\n  ConnectionContext conn_context_;\n\n  std::vector<bool> ensured_dbs_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/journal.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/journal/journal.h\"\n\n#include \"base/logging.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/journal/journal_slice.h\"\n\nnamespace dfly {\nnamespace journal {\n\nusing namespace std;\nusing namespace util;\n\nnamespace {\n\n// Active only in shard threads.\nthread_local JournalSlice journal_slice;\n\n}  // namespace\n\nvoid StartInThread() {\n  journal_slice.Init();\n\n  EngineShard* shard = EngineShard::tlocal();\n  shard->set_journal(true);\n}\n\nvoid StartInThreadAtLsn(LSN lsn) {\n  StartInThread();\n  journal_slice.ResetRingBuffer();\n  journal_slice.SetStartingLSN(lsn);\n}\n\nerror_code Close() {\n  VLOG(1) << \"Journal::Close\";\n\n  auto close_cb = [&](auto* shard) {\n    journal_slice.ResetRingBuffer();\n    shard->set_journal(false);\n  };\n\n  shard_set->RunBriefInParallel(close_cb);\n\n  return {};\n}\n\nbool HasRegisteredCallbacks() {\n  return journal_slice.HasRegisteredCallbacks();\n}\n\nbool IsLSNInBuffer(LSN lsn) {\n  return journal_slice.IsLSNInBuffer(lsn);\n}\n\nstd::string_view GetEntry(LSN lsn) {\n  return journal_slice.GetEntry(lsn);\n}\n\nuint32_t RegisterConsumer(JournalConsumerInterface* consumer) {\n  return journal_slice.RegisterOnChange(consumer);\n}\n\nvoid UnregisterConsumer(uint32_t id) {\n  journal_slice.UnregisterOnChange(id);\n}\n\nLSN GetLsn() {\n  return journal_slice.cur_lsn();\n}\n\nvoid RecordEntry(TxId txid, Op opcode, DbIndex dbid, std::optional<SlotId> slot,\n                 Entry::Payload payload) {\n  journal_slice.AddLogRecord(Entry{txid, opcode, dbid, slot, std::move(payload)});\n}\n\nvoid SetFlushMode(bool allow_flush) {\n  journal_slice.SetFlushMode(allow_flush);\n}\n\nsize_t LsnBufferSize() {\n  return journal_slice.GetRingBufferSize();\n}\n\nsize_t LsnBufferBytes() {\n  return journal_slice.GetRingBufferBytes();\n}\n\nsize_t thread_local DisableFlushGuard::counter_ = 0;\n\n}  // namespace journal\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/journal.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n#include \"server/journal/types.h\"\n#include \"util/fibers/detail/fiber_interface.h\"\n\nnamespace dfly {\n\nnamespace journal {\n\nvoid StartInThread();\n\n// Starts the journal at specified LSN\n// Also drops the (resets) the partial sync buffers\nvoid StartInThreadAtLsn(LSN lsn);\n\nstd::error_code Close();\n\n//******* The following functions must be called in the context of the owning shard *********//\n\nbool HasRegisteredCallbacks();\n\nbool IsLSNInBuffer(LSN lsn);\n\nstd::string_view GetEntry(LSN lsn);\n\nLSN GetLsn();\nuint32_t RegisterConsumer(JournalConsumerInterface* consumer);\nvoid UnregisterConsumer(uint32_t id);\n\nvoid RecordEntry(TxId txid, Op opcode, DbIndex dbid, std::optional<SlotId> slot,\n                 Entry::Payload payload);\n\nsize_t LsnBufferSize();\nsize_t LsnBufferBytes();\n\nvoid SetFlushMode(bool allow_flush);\n\nclass DisableFlushGuard {\n public:\n  explicit DisableFlushGuard(bool j) : journal_(j) {\n    if (journal_ && counter_ == 0) {\n      SetFlushMode(false);\n    }\n    util::fb2::detail::EnterFiberAtomicSection();\n    ++counter_;\n  }\n\n  ~DisableFlushGuard() {\n    util::fb2::detail::LeaveFiberAtomicSection();\n    --counter_;\n    if (journal_ && counter_ == 0) {\n      SetFlushMode(true);  // Restore the state on destruction\n    }\n  }\n\n  DisableFlushGuard(const DisableFlushGuard&) = delete;\n  DisableFlushGuard& operator=(const DisableFlushGuard&) = delete;\n\n private:\n  bool journal_;\n  static size_t thread_local counter_;\n};\n\n}  // namespace journal\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/journal_slice.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/journal/journal_slice.h\"\n\n#include <absl/container/inlined_vector.h>\n#include <absl/flags/flag.h>\n#include <absl/strings/escaping.h>\n#include <absl/strings/str_cat.h>\n#include <fcntl.h>\n\n#include <filesystem>\n\n#include \"base/function2.hpp\"\n#include \"base/logging.h\"\n#include \"server/journal/serializer.h\"\n#include \"util/fibers/fibers.h\"\n\nABSL_FLAG(uint32_t, shard_repl_backlog_len, 8192,\n          \"The length of the circular replication log per shard\");\n\nnamespace dfly {\nnamespace journal {\nusing namespace std;\nusing namespace util;\n\nJournalSlice::JournalSlice() {\n}\n\nJournalSlice::~JournalSlice() {\n}\n\nvoid JournalSlice::Init() {\n  // calling this function multiple times is allowed and it's a no-op.\n  if (ring_buffer_.capacity() > 0)\n    return;\n\n  ring_buffer_.set_capacity(absl::GetFlag(FLAGS_shard_repl_backlog_len));\n  ring_buffer_bytes_ = ring_buffer_.capacity() * sizeof(JournalItem);\n}\n\nbool JournalSlice::IsLSNInBuffer(LSN lsn) const {\n  DCHECK(ring_buffer_.capacity() > 0);\n\n  if (ring_buffer_.empty()) {\n    return false;\n  }\n\n  if (ring_buffer_.size() == 1) {\n    return ring_buffer_.front().lsn == lsn;\n  }\n\n  return ring_buffer_.front().lsn <= lsn && lsn <= ring_buffer_.back().lsn;\n}\n\nstd::string_view JournalSlice::GetEntry(LSN lsn) const {\n  DCHECK(ring_buffer_.capacity() > 0 && IsLSNInBuffer(lsn));\n\n  auto start = ring_buffer_.front().lsn;\n  DCHECK(ring_buffer_[lsn - start].lsn == lsn);\n  return ring_buffer_[lsn - start].data;\n}\n\nvoid JournalSlice::SetFlushMode(bool allow_flush) {\n  DCHECK(allow_flush != enable_journal_flush_);\n  enable_journal_flush_ = allow_flush;\n  if (allow_flush) {\n    // This lock is never blocking because it contends with UnregisterOnChange, which is cpu only.\n    // Hence this lock prevents the UnregisterOnChange to start running in the middle of\n    // SetFlushMode.\n    std::shared_lock lk(cb_mu_);\n    for (auto k_v : journal_consumers_arr_) {\n      k_v.second->ThrottleIfNeeded();\n    }\n  }\n}\n\nvoid JournalSlice::AddLogRecord(const Entry& entry) {\n  DCHECK(ring_buffer_.capacity() > 0);\n\n  JournalChangeItem item;\n\n  {\n    FiberAtomicGuard fg;\n    item.journal_item.lsn = lsn_++;\n\n    // only used by RestoreStreamer\n    item.cmd = entry.payload.cmd;\n    item.slot = entry.slot;\n\n    io::StringSink sink;\n    JournalWriter writer{&sink};\n    writer.Write(entry);\n\n    std::move(sink).str().swap(item.journal_item.data);\n\n    if (item.journal_item.data.size() > 32) {\n      // for non-SSO strings capacity should not be much higher than size.\n      DCHECK_LE(item.journal_item.data.capacity(), item.journal_item.data.size() * 2);\n    }\n    VLOG(2) << \"Writing item [\" << item.journal_item.lsn << \"]: \" << entry.ToString();\n  }\n\n  CallOnChange(&item);\n}\n\nvoid JournalSlice::CallOnChange(JournalChangeItem* change_item) {\n  // This lock is never blocking because it contends with UnregisterOnChange, which is cpu only.\n  // Hence this lock prevents the UnregisterOnChange to start running in the middle of CallOnChange.\n  // CallOnChange is atomic if JournalSlice::SetFlushMode(false) is called before.\n  std::shared_lock lk(cb_mu_);\n  for (auto k_v : journal_consumers_arr_) {\n    k_v.second->ConsumeJournalChange(*change_item);\n  }\n  auto& item = change_item->journal_item;\n\n  // We preserve order here. After ConsumeJournalChange there can reordering\n  if (ring_buffer_.size() == ring_buffer_.capacity()) {\n    const size_t bytes_removed = ring_buffer_.front().data.capacity();\n    DCHECK_GE(ring_buffer_bytes_, bytes_removed);\n    ring_buffer_bytes_ -= bytes_removed;\n  }\n  if (!ring_buffer_.empty()) {\n    DCHECK(item.lsn == ring_buffer_.back().lsn + 1);\n  }\n  ring_buffer_.push_back(std::move(item));\n  auto& data = ring_buffer_.back().data;\n\n  // Small strings assignment keep the existing capacity intact due to SSO.\n  // Shrink strings in this case to prevent excessive memory usage.\n  if (data.size() < 32 && data.capacity() > 64) {\n    data.shrink_to_fit();\n  }\n  ring_buffer_bytes_ += data.capacity();\n\n  if (enable_journal_flush_) {\n    for (auto k_v : journal_consumers_arr_) {\n      k_v.second->ThrottleIfNeeded();\n    }\n  }\n}\n\nuint32_t JournalSlice::RegisterOnChange(JournalConsumerInterface* consumer) {\n  // mutex lock isn't needed due to iterators are not invalidated\n  uint32_t id = next_cb_id_++;\n  journal_consumers_arr_.emplace_back(id, consumer);\n  return id;\n}\n\nvoid JournalSlice::UnregisterOnChange(uint32_t id) {\n  // we need to wait until callback is finished before remove it\n  lock_guard lk(cb_mu_);\n  auto it = find_if(journal_consumers_arr_.begin(), journal_consumers_arr_.end(),\n                    [id](const auto& e) { return e.first == id; });\n  CHECK(it != journal_consumers_arr_.end());\n  journal_consumers_arr_.erase(it);\n}\n\n}  // namespace journal\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/journal_slice.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <boost/circular_buffer.hpp>\n#include <optional>\n#include <shared_mutex>\n#include <string_view>\n\n#include \"server/journal/types.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly {\nnamespace journal {\n\n// Journal slice is present for both shards and io threads.\nclass JournalSlice {\n public:\n  JournalSlice();\n  ~JournalSlice();\n\n  void Init();\n\n  // This is always the LSN of the *next* journal entry.\n  LSN cur_lsn() const {\n    return lsn_;\n  }\n\n  std::error_code status() const {\n    return status_ec_;\n  }\n\n  void AddLogRecord(const Entry& entry);\n\n  // Register a callback that will be called every time a new entry is\n  // added to the journal.\n  // The callback receives the entry and a boolean that indicates whether\n  // awaiting (to apply backpressure) is allowed.\n  uint32_t RegisterOnChange(JournalConsumerInterface* consumer);\n  void UnregisterOnChange(uint32_t);\n\n  bool HasRegisteredCallbacks() const {\n    return !journal_consumers_arr_.empty();\n  }\n\n  /// Returns whether the journal entry with this LSN is available\n  /// from the buffer.\n  bool IsLSNInBuffer(LSN lsn) const;\n  std::string_view GetEntry(LSN lsn) const;\n  // SetFlushMode with allow_flush=false is used to disable preemptions during\n  // subsequent calls to AddLogRecord.\n  // SetFlushMode with allow_flush=true flushes all log records aggregated\n  // since the last call with allow_flush=false. This call may preempt.\n  // The caller must ensure that no preemptions occur between the initial call\n  // with allow_flush=false and the subsequent call with allow_flush=true.\n  void SetFlushMode(bool allow_flush);\n\n  size_t GetRingBufferSize() const {\n    return ring_buffer_.size();\n  }\n\n  size_t GetRingBufferBytes() const {\n    return ring_buffer_bytes_;\n  }\n\n  void ResetRingBuffer() {\n    ring_buffer_.clear();\n    ring_buffer_bytes_ = ring_buffer_.capacity() * sizeof(JournalItem);\n  }\n\n  void SetStartingLSN(LSN lsn) {\n    lsn_ = lsn;\n  }\n\n private:\n  void CallOnChange(JournalChangeItem* item);\n  boost::circular_buffer<JournalItem> ring_buffer_;\n\n  mutable util::fb2::SharedMutex cb_mu_;  // to prevent removing callback during call\n  std::list<std::pair<uint32_t, JournalConsumerInterface*>> journal_consumers_arr_;\n\n  LSN lsn_ = 1;\n\n  uint32_t next_cb_id_ = 1;\n  std::error_code status_ec_;\n  bool enable_journal_flush_ = true;\n\n  size_t ring_buffer_bytes_ = 0;\n};\n\n}  // namespace journal\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/journal_test.cc",
    "content": "#include <boost/circular_buffer.hpp>\n#include <random>\n#include <string>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/detail/gen_utils.h\"\n#include \"server/common.h\"\n#include \"server/journal/pending_buf.h\"\n#include \"server/journal/serializer.h\"\n#include \"server/journal/types.h\"\n#include \"server/serializer_commons.h\"\n#include \"util/fibers/fibers.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\n\nnamespace dfly {\nnamespace journal {\ntemplate <typename T> string ConCat(const T& list) {\n  string res;\n  for (auto arg : list) {\n    res += string_view{arg.data(), arg.size()};\n    res += ' ';\n  }\n  return res;\n}\n\ntemplate <> string ConCat(const CmdArgList& list) {\n  string res;\n  for (auto arg : list) {\n    res += facade::ToSV(arg);\n    res += ' ';\n  }\n  return res;\n}\n\nstruct EntryPayloadVisitor {\n  void operator()(const Entry::Payload& p) {\n    out->append(p.cmd).append(\" \");\n    *out += visit([](const auto& args) { return ConCat(args); }, p.args);\n  }\n\n  string* out;\n};\n\n// Extract payload from entry in string form.\nstd::string ExtractPayload(ParsedEntry& entry) {\n  std::string out = ConCat(entry.cmd);\n\n  if (!out.empty())\n    out.pop_back();\n\n  return out;\n}\n\nstd::string ExtractPayload(Entry& entry) {\n  std::string out;\n  EntryPayloadVisitor visitor{&out};\n  visitor(entry.payload);\n\n  if (!out.empty())\n    out.pop_back();\n\n  return out;\n}\n\n// Mock non-owned types with underlying storage.\nusing StoredSlices = vector<vector<string_view>>;\nusing StoredLists = vector<pair<vector<string>, CmdArgVec>>;\n\ntemplate <typename... Ss> ArgSlice StoreSlice(StoredSlices* vec, Ss... strings) {\n  vec->emplace_back(initializer_list<string_view>{strings...});\n  return ArgSlice{vec->back().data(), vec->back().size()};\n}\n\ntemplate <typename... Ss> CmdArgList StoreList(StoredLists* vec, Ss... strings) {\n  vector<string> stored_strings{strings...};\n  CmdArgVec out;\n  for (auto& s : stored_strings) {\n    out.emplace_back(s.data(), s.size());\n  }\n\n  vec->emplace_back(std::move(stored_strings), std::move(out));\n  auto& arg_vec = vec->back().second;\n  return CmdArgList{arg_vec.data(), arg_vec.size()};\n}\n\n// Test serializing and de-serializing entries.\nTEST(Journal, WriteRead) {\n  StoredSlices slices{};\n  StoredLists lists{};\n\n  auto slice = [v = &slices](auto... ss) { return StoreSlice(v, ss...); };\n  auto list = [v = &lists](auto... ss) { return StoreList(v, ss...); };\n  using Payload = Entry::Payload;\n\n  std::vector<Entry> test_entries = {\n      {0, Op::COMMAND, 0, nullopt, Payload(\"MSET\", slice(\"A\", \"1\", \"B\", \"2\"))},\n      {0, Op::COMMAND, 0, nullopt, Payload(\"MSET\", slice(\"C\", \"3\"))},\n      {1, Op::COMMAND, 0, nullopt, Payload(\"DEL\", list(\"A\", \"B\"))},\n      {2, Op::COMMAND, 1, nullopt, Payload(\"LPUSH\", list(\"l\", \"v1\", \"v2\"))},\n      {3, Op::COMMAND, 0, nullopt, Payload(\"MSET\", slice(\"D\", \"4\"))},\n      {4, Op::COMMAND, 1, nullopt, Payload(\"DEL\", list(\"l1\"))},\n      {5, Op::COMMAND, 2, nullopt, Payload(\"DEL\", list(\"E\", \"2\"))}};\n\n  // Write all entries to a buffer.\n  base::IoBuf buf;\n  io::BufSink sink{&buf};\n\n  JournalWriter writer{&sink};\n  for (const auto& entry : test_entries) {\n    writer.Write(entry);\n  }\n\n  // Read them back.\n  io::BufSource source{&buf};\n  JournalReader reader{&source, 0};\n\n  ParsedEntry res;\n  for (unsigned i = 0; i < test_entries.size(); i++) {\n    auto& expected = test_entries[i];\n\n    auto ec = reader.ReadEntry(&res);\n    ASSERT_FALSE(ec);\n\n    ASSERT_EQ(expected.opcode, res.opcode);\n    ASSERT_EQ(expected.txid, res.txid);\n    ASSERT_EQ(expected.dbid, res.dbid);\n    ASSERT_EQ(ExtractPayload(expected), ExtractPayload(res));\n  }\n}\n\nTEST(Journal, PendingBuf) {\n  PendingBuf pbuf;\n\n  ASSERT_TRUE(pbuf.Empty());\n  ASSERT_EQ(pbuf.Size(), 0);\n\n  pbuf.Push(\"one\");\n  pbuf.Push(\" smallllllllllllllllllllllllllllllll\");\n  pbuf.Push(\" test\");\n\n  ASSERT_FALSE(pbuf.Empty());\n  ASSERT_EQ(pbuf.Size(), 44);\n\n  {\n    auto& sending_buf = pbuf.PrepareSendingBuf();\n    ASSERT_EQ(sending_buf.buf.size(), 3);\n    ASSERT_EQ(sending_buf.mem_size, 44);\n\n    ASSERT_EQ(sending_buf.buf[0], \"one\");\n    ASSERT_EQ(sending_buf.buf[1], \" smallllllllllllllllllllllllllllllll\");\n    ASSERT_EQ(sending_buf.buf[2], \" test\");\n  }\n\n  const size_t string_num = PendingBuf::Buf::kMaxBufSize + 1000;\n  std::vector<std::string> test_data;\n  test_data.reserve(string_num);\n\n  absl::InsecureBitGen gen;\n\n  for (size_t i = 0; i < string_num; ++i) {\n    auto str = GetRandomHex(gen, 10, 90);\n    test_data.push_back(str);\n    pbuf.Push(std::move(str));\n  }\n\n  const size_t test_data_size =\n      std::accumulate(test_data.begin(), test_data.end(), 0,\n                      [](size_t size, const auto& s) { return s.size() + size; });\n\n  ASSERT_FALSE(pbuf.Empty());\n  ASSERT_EQ(pbuf.Size(), 44 + test_data_size);\n\n  pbuf.Pop();\n\n  ASSERT_FALSE(pbuf.Empty());\n  ASSERT_EQ(pbuf.Size(), test_data_size);\n\n  {\n    auto& sending_buf = pbuf.PrepareSendingBuf();\n\n    const size_t send_buf_size =\n        std::accumulate(test_data.begin(), test_data.begin() + PendingBuf::Buf::kMaxBufSize, 0,\n                        [](size_t size, const auto& s) { return s.size() + size; });\n\n    ASSERT_EQ(sending_buf.buf.size(), PendingBuf::Buf::kMaxBufSize);\n    ASSERT_EQ(sending_buf.mem_size, send_buf_size);\n\n    for (size_t i = 0; i < sending_buf.buf.size(); ++i) {\n      ASSERT_EQ(sending_buf.buf[i], test_data[i]);\n    }\n  }\n\n  pbuf.Pop();\n\n  test_data.erase(test_data.begin(), test_data.begin() + PendingBuf::Buf::kMaxBufSize);\n\n  const size_t last_buf_size =\n      std::accumulate(test_data.begin(), test_data.end(), 0,\n                      [](size_t size, const auto& s) { return s.size() + size; });\n\n  ASSERT_FALSE(pbuf.Empty());\n  ASSERT_EQ(pbuf.Size(), last_buf_size);\n\n  {\n    auto& sending_buf = pbuf.PrepareSendingBuf();\n\n    ASSERT_EQ(sending_buf.buf.size(), 1000);\n    ASSERT_EQ(sending_buf.mem_size, last_buf_size);\n\n    for (size_t i = 0; i < sending_buf.buf.size(); ++i) {\n      ASSERT_EQ(sending_buf.buf[i], test_data[i]);\n    }\n  }\n\n  pbuf.Pop();\n\n  ASSERT_TRUE(pbuf.Empty());\n  ASSERT_EQ(pbuf.Size(), 0);\n}\n\nTEST(Journal, CircularMemory) {\n  boost::circular_buffer<string> ring_buffer(1024);\n  for (int i = 0; i < 2000; ++i) {\n    ring_buffer.push_back(string(512, 'a'));\n  }\n\n  size_t cap = 0;\n  for (size_t i = 0; i < ring_buffer.size(); ++i) {\n    cap += ring_buffer[i].capacity();\n  }\n  LOG(INFO) << \"Total capacity: \" << cap;\n  for (size_t i = 0; i < 2000; ++i) {\n    ring_buffer.push_back(string(16, 'a'));\n  }\n  cap = 0;\n  for (size_t i = 0; i < ring_buffer.size(); ++i) {\n    cap += ring_buffer[i].capacity();\n  }\n  LOG(INFO) << \"Total capacity after push: \" << cap;\n\n  string tmp(1 << 16, 'x');\n  tmp = string(4, 'a');\n  LOG(INFO) << \"Tmp string capacity: \" << tmp.capacity();\n  tmp = string(32, 'a');\n  LOG(INFO) << \"Tmp string capacity: \" << tmp.capacity();\n}\n\n}  // namespace journal\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/pending_buf.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/inlined_vector.h>\n\n#include <cassert>\n#include <deque>\n#include <numeric>\n\nnamespace dfly {\n\nclass PendingBuf {\n public:\n  struct Buf {\n    size_t mem_size = 0;\n    absl::InlinedVector<std::string, 8> buf;\n\n#ifdef UIO_MAXIOV\n    static constexpr size_t kMaxBufSize = UIO_MAXIOV;\n#else\n    static constexpr size_t kMaxBufSize = 1024;\n#endif\n  };\n\n  PendingBuf() : bufs_(1) {\n  }\n\n  bool Empty() const {\n    return std::all_of(bufs_.begin(), bufs_.end(), [](const auto& b) { return b.buf.empty(); });\n  }\n\n  void Push(std::string str) {\n    assert(!bufs_.empty());\n    if (bufs_.back().buf.size() == Buf::kMaxBufSize) {\n      bufs_.emplace_back();\n    }\n    auto& front_buf = bufs_.back();\n    front_buf.mem_size += str.size();\n    front_buf.buf.push_back(std::move(str));\n  }\n\n  // should be called to get the next buffer for sending\n  const Buf& PrepareSendingBuf() {\n    // Adding to the buffer ensures that future `Push()`es will not modify the in-flight buffer\n    if (bufs_.size() == 1) {\n      bufs_.emplace_back();\n    }\n    return bufs_.front();\n  }\n\n  size_t FrontBufSize() const {\n    return bufs_.front().mem_size;\n  }\n\n  // should be called when the buf from PrepareSendingBuf() method was sent\n  void Pop() {\n    assert(bufs_.size() >= 2);\n    bufs_.pop_front();\n  }\n\n  size_t Size() const {\n    return std::accumulate(bufs_.begin(), bufs_.end(), 0,\n                           [](size_t s, const auto& b) { return s + b.mem_size; });\n  }\n\n private:\n  std::deque<Buf> bufs_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/serializer.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/journal/serializer.h\"\n\n#include <system_error>\n\n#include \"base/logging.h\"\n#include \"glog/logging.h\"\n#include \"io/io.h\"\n#include \"io/io_buf.h\"\n#include \"server/error.h\"\n#include \"server/journal/types.h\"\n#include \"server/main_service.h\"\n#include \"server/serializer_commons.h\"\n#include \"server/transaction.h\"\n\nusing namespace std;\n\nnamespace dfly {\n\nJournalWriter::JournalWriter(io::Sink* sink) : sink_{sink} {\n}\n\nvoid JournalWriter::Write(uint64_t v) {\n  uint8_t buf[10];\n  unsigned len = WritePackedUInt(v, buf);\n  sink_->Write(io::Bytes{buf}.first(len));\n}\n\nvoid JournalWriter::Write(std::string_view sv) {\n  Write(sv.size());\n  if (!sv.empty())  // arguments can be empty strings\n    sink_->Write(io::Buffer(sv));\n}\n\nvoid JournalWriter::Write(const journal::Entry::Payload& payload) {\n  if (payload.cmd.empty())\n    return;\n\n  size_t num_elems = 0, size = 0;\n  for (string_view str : base::it::Wrap(cmn::kToSV, payload.args)) {\n    num_elems++;\n    size += str.size();\n  };\n\n  Write(1 + num_elems);\n\n  size_t cmd_size = payload.cmd.size() + size;\n  Write(cmd_size);\n  Write(payload.cmd);\n\n  for (string_view str : base::it::Wrap(cmn::kToSV, payload.args))\n    this->Write(str);\n}\n\nvoid JournalWriter::Write(const journal::Entry& entry) {\n  // Check if entry has a new db index and we need to emit a SELECT entry.\n  if (entry.opcode != journal::Op::SELECT && entry.opcode != journal::Op::LSN &&\n      entry.opcode != journal::Op::PING && (!cur_dbid_ || entry.dbid != *cur_dbid_)) {\n    Write(journal::Entry{journal::Op::SELECT, entry.dbid, entry.slot});\n    cur_dbid_ = entry.dbid;\n  }\n\n  VLOG(1) << \"Writing entry \" << entry.ToString();\n\n  Write(uint8_t(entry.opcode));\n\n  switch (entry.opcode) {\n    case journal::Op::SELECT:\n      return Write(entry.dbid);\n    case journal::Op::LSN:\n      return Write(entry.lsn);\n    case journal::Op::PING:\n      return;\n    case journal::Op::COMMAND:\n      Write(entry.txid);\n      Write(1u);  // deprecated field, kept for backward compatibility.\n      Write(entry.payload);\n      break;\n    default:\n      LOG(FATAL) << \"Unknown journal opcode: \" << static_cast<int>(entry.opcode);\n      break;\n  };\n}\n\nJournalReader::JournalReader(io::Source* source, DbIndex dbid)\n    : source_{source}, buf_{4096}, dbid_{dbid} {\n}\n\nvoid JournalReader::SetSource(io::Source* source) {\n  CHECK_EQ(buf_.InputLen(), 0ULL);\n  source_ = source;\n}\n\nstd::error_code JournalReader::EnsureRead(size_t num) {\n  // Check if we already have enough.\n  if (buf_.InputLen() >= num)\n    return {};\n\n  uint64_t remainder = num - buf_.InputLen();\n  buf_.EnsureCapacity(remainder);\n\n  // Try reading at least how much we need, but possibly more\n  uint64_t read;\n  SET_OR_RETURN(source_->ReadAtLeast(buf_.AppendBuffer(), remainder), read);\n\n  // Happens on end of stream (for example, a too-small string buffer or a closed socket)\n  if (read < remainder) {\n    return make_error_code(errc::io_error);\n  }\n\n  buf_.CommitWrite(read);\n  return {};\n}\n\ntemplate <typename UT> io::Result<UT> JournalReader::ReadUInt() {\n  // Determine type and number of following bytes.\n  if (auto ec = EnsureRead(1); ec)\n    return make_unexpected(ec);\n  PackedUIntMeta meta{buf_.InputBuffer()[0]};\n  buf_.ConsumeInput(1);\n\n  if (auto ec = EnsureRead(meta.ByteSize()); ec)\n    return make_unexpected(ec);\n\n  // Read and check intenger.\n  uint64_t res;\n  SET_OR_UNEXPECT(ReadPackedUInt(meta, buf_.InputBuffer()), res);\n  buf_.ConsumeInput(meta.ByteSize());\n\n  if (res > std::numeric_limits<UT>::max())\n    return make_unexpected(make_error_code(errc::result_out_of_range));\n  return static_cast<UT>(res);\n}\n\ntemplate io::Result<uint8_t> JournalReader::ReadUInt<uint8_t>();\ntemplate io::Result<uint16_t> JournalReader::ReadUInt<uint16_t>();\ntemplate io::Result<uint32_t> JournalReader::ReadUInt<uint32_t>();\ntemplate io::Result<uint64_t> JournalReader::ReadUInt<uint64_t>();\n\nstd::error_code JournalReader::ReadString(io::MutableBytes buffer) {\n  size_t size = buffer.size();\n  uint64_t available = std::min(size, buf_.InputLen());\n  uint64_t remainder = 0;\n\n  if (available < size) {\n    remainder = size - available;\n  }\n\n  buf_.ReadAndConsume(available, buffer.data());\n\n  // If remainder of string is bigger than threshold - read and populate directly\n  // output buffer otherwise use intermediate io_buf.\n  bool is_short_remainder = remainder < (buf_.Capacity() / 2);\n\n  auto remainder_buf_pos = buffer.data() + available;\n\n  if (remainder) {\n    if (is_short_remainder) {\n      if (auto ec = EnsureRead(remainder); ec)\n        return ec;\n      buf_.ReadAndConsume(remainder, remainder_buf_pos);\n    } else {\n      uint64_t read;\n      SET_OR_RETURN(source_->Read({remainder_buf_pos, remainder}), read);\n      if (read < remainder) {\n        return make_error_code(errc::io_error);\n      }\n    }\n  }\n\n  return {};\n}\n\nstd::error_code JournalReader::ReadCommand(journal::ParsedEntry::CmdData* data) {\n  size_t num_strings = 0;\n  SET_OR_RETURN(ReadUInt<uint64_t>(), num_strings);\n\n  size_t cmd_size = 0;\n  SET_OR_RETURN(ReadUInt<uint64_t>(), cmd_size);\n\n  data->Reserve(num_strings, cmd_size + num_strings /* +\\0 char*/);\n\n  // Read all strings consecutively.\n  for (size_t i = 0; i < num_strings; ++i) {\n    size_t size = 0;\n    SET_OR_RETURN(ReadUInt<uint64_t>(), size);\n    if (size > cmd_size) {  // corrupted entry\n      return make_error_code(errc::io_error);\n    }\n    data->PushArg(size);\n    uint8_t* ptr = reinterpret_cast<uint8_t*>(data->data(i));\n    if (auto ec = ReadString({ptr, size}); ec)\n      return ec;\n\n    ptr[size] = '\\0';  // null terminate\n\n    cmd_size -= size;\n  }\n\n  return {};\n}\n\nstd::error_code JournalReader::ReadEntry(journal::ParsedEntry* dest) {\n  uint8_t int_op;\n  SET_OR_RETURN(ReadUInt<uint8_t>(), int_op);\n  journal::Op opcode = static_cast<journal::Op>(int_op);\n\n  if (opcode == journal::Op::SELECT) {\n    SET_OR_RETURN(ReadUInt<uint16_t>(), dbid_);\n    return ReadEntry(dest);\n  }\n\n  dest->dbid = dbid_;\n  dest->opcode = opcode;\n  dest->cmd.clear();\n  if (opcode == journal::Op::PING) {\n    return {};\n  }\n\n  if (opcode == journal::Op::LSN) {\n    SET_OR_RETURN(ReadUInt<uint64_t>(), dest->lsn);\n    return {};\n  }\n\n  SET_OR_RETURN(ReadUInt<uint64_t>(), dest->txid);\n  [[maybe_unused]] uint32_t unused;\n\n  SET_OR_RETURN(ReadUInt<uint32_t>(), unused);\n\n  VLOG(1) << \"Read entry \" << dest->ToString();\n\n  return ReadCommand(&dest->cmd);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/serializer.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <optional>\n#include <string>\n\n#include \"io/io.h\"\n#include \"io/io_buf.h\"\n#include \"server/journal/types.h\"\n\nnamespace dfly {\n\n// JournalWriter serializes journal entries to a sink.\n// It automatically keeps track of the current database index.\nclass JournalWriter {\n public:\n  JournalWriter(io::Sink* sink);\n\n  // Write single entry to sink.\n  void Write(const journal::Entry& entry);\n  void Write(uint64_t v);  // Write packed unsigned integer.\n\n private:\n  void Write(std::string_view sv);  // Write string.\n  void Write(const journal::Entry::Payload& payload);\n\n private:\n  io::Sink* sink_;\n  std::optional<DbIndex> cur_dbid_{};\n};\n\n// JournalReader allows deserializing journal entries from a source.\n// Like the writer, it automatically keeps track of the database index.\nstruct JournalReader {\n public:\n  // Initialize start database index.\n  JournalReader(io::Source* source, DbIndex dbid);\n\n  // Overwrite current source and ensure there is no leftover from previous.\n  void SetSource(io::Source* source);\n\n  // Try reading entry from source.\n  std::error_code ReadEntry(journal::ParsedEntry* dest);\n\n private:\n  // Read from source until buffer contains at least num bytes.\n  std::error_code EnsureRead(size_t num);\n\n  // Read unsigned integer in packed encoding.\n  template <typename UT> io::Result<UT> ReadUInt();\n\n  // Reads exactly buffer.size() bytes and copies them to buffer.\n  std::error_code ReadString(io::MutableBytes buffer);\n\n  // Read argument array into string buffer.\n  std::error_code ReadCommand(journal::ParsedEntry::CmdData* entry);\n\n private:\n  io::Source* source_;\n  base::IoBuf buf_;\n  DbIndex dbid_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/streamer.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/journal/streamer.h\"\n\n#include <absl/functional/bind_front.h>\n#include <sys/socket.h>\n\n#include <chrono>\n\n#ifdef __linux__\n#include <netinet/tcp.h>\n#endif\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard.h\"\n#include \"server/journal/cmd_serializer.h\"\n#include \"server/journal/serializer.h\"\n#include \"server/rdb_save.h\"\n#include \"server/server_state.h\"\n#include \"util/fibers/synchronization.h\"\n\nusing namespace facade;\n\nABSL_FLAG(uint32_t, replication_timeout, 30000,\n          \"Time in milliseconds to wait for the replication writes being stuck.\");\n\nABSL_FLAG(uint32_t, replication_stream_output_limit, 1_MB,\n          \"Time to wait for the replication output buffer go below the throttle limit\");\n\nABSL_FLAG(uint32_t, migration_buckets_serialization_threshold, 10,\n          \"The Number of buckets to serialize on each iteration before yielding\");\nABSL_FLAG(uint32_t, migration_buckets_sleep_usec, 500,\n          \"Sleep time in microseconds after each time we reach \"\n          \"migration_buckets_serialization_threshold\");\n\nABSL_FLAG(float, migration_buckets_cpu_budget, 0.2,\n          \"How much CPU budget to use for migration buckets serialization\");\n\nABSL_FLAG(uint32_t, replication_dispatch_threshold, 1500,\n          \"Number of bytes to aggregate before replication\");\n\nnamespace dfly {\nusing namespace util;\nusing namespace journal;\nusing namespace std;\nnamespace {\n\niovec IoVec(io::Bytes src) {\n  return iovec{const_cast<uint8_t*>(src.data()), src.size()};\n}\n\nuint32_t replication_stream_output_limit_cached = 64_KB;\nuint32_t migration_buckets_serialization_threshold_cached = 100;\nuint32_t migration_buckets_sleep_usec_cached = 100;\nuint32_t replication_dispatch_threshold = 1500;\nuint32_t stalled_writer_base_period_ms = 10;\n\nvoid LogTcpSocketDiagnostics(util::FiberSocketBase* dest) {\n  if (!dest) {\n    return;\n  }\n\n#ifdef __linux__\n  // On Linux, we can get TCP diagnostics using getsockopt.\n  int sockfd = dest->native_handle();\n  if (sockfd < 0) {\n    return;\n  }\n\n  struct tcp_info info;\n  socklen_t info_len = sizeof(info);\n  if (getsockopt(sockfd, IPPROTO_TCP, TCP_INFO, &info, &info_len) == 0) {\n    LOG_EVERY_T(INFO, 1) << \"TCP socket diagnostics - \"\n                         << \"state: \" << static_cast<int>(info.tcpi_state)\n                         << \", ca_state: \" << static_cast<int>(info.tcpi_ca_state)\n                         << \", retransmits: \" << static_cast<int>(info.tcpi_retransmits)\n                         << \", probes: \" << static_cast<int>(info.tcpi_probes)\n                         << \", backoff: \" << static_cast<int>(info.tcpi_backoff)\n                         << \", options: \" << static_cast<int>(info.tcpi_options)\n                         << \", snd_wscale: \" << static_cast<int>(info.tcpi_snd_wscale)\n                         << \", rcv_wscale: \" << static_cast<int>(info.tcpi_rcv_wscale)\n                         << \", rto: \" << info.tcpi_rto << \", ato: \" << info.tcpi_ato\n                         << \", snd_mss: \" << info.tcpi_snd_mss << \", rcv_mss: \" << info.tcpi_rcv_mss\n                         << \", unacked: \" << info.tcpi_unacked << \", sacked: \" << info.tcpi_sacked\n                         << \", lost: \" << info.tcpi_lost << \", retrans: \" << info.tcpi_retrans\n                         << \", fackets: \" << info.tcpi_fackets\n                         << \", last_data_sent: \" << info.tcpi_last_data_sent\n                         << \", last_ack_sent: \" << info.tcpi_last_ack_sent\n                         << \", last_data_recv: \" << info.tcpi_last_data_recv\n                         << \", last_ack_recv: \" << info.tcpi_last_ack_recv\n                         << \", pmtu: \" << info.tcpi_pmtu\n                         << \", rcv_ssthresh: \" << info.tcpi_rcv_ssthresh\n                         << \", rtt: \" << info.tcpi_rtt << \", rttvar: \" << info.tcpi_rttvar\n                         << \", snd_ssthresh: \" << info.tcpi_snd_ssthresh\n                         << \", snd_cwnd: \" << info.tcpi_snd_cwnd << \", advmss: \" << info.tcpi_advmss\n                         << \", reordering: \" << info.tcpi_reordering\n                         << \", rcv_rtt: \" << info.tcpi_rcv_rtt\n                         << \", rcv_space: \" << info.tcpi_rcv_space\n                         << \", total_retrans: \" << info.tcpi_total_retrans;\n  } else {\n    LOG_EVERY_T(INFO, 1) << \"Failed to get TCP socket info: \" << strerror(errno);\n  }\n#endif\n}\n\n}  // namespace\n\nJournalStreamer::JournalStreamer(ExecutionState* cntx, JournalStreamer::Config config)\n    : cntx_(cntx), config_(config) {\n  // cache the flag to avoid accessing it later.\n  replication_stream_output_limit_cached = absl::GetFlag(FLAGS_replication_stream_output_limit);\n  migration_buckets_sleep_usec_cached = absl::GetFlag(FLAGS_migration_buckets_sleep_usec);\n  replication_dispatch_threshold = absl::GetFlag(FLAGS_replication_dispatch_threshold);\n  last_async_write_time_ = fb2::ProactorBase::GetMonotonicTimeNs() / 1000000;\n}\n\nJournalStreamer::~JournalStreamer() {\n  if (!cntx_->IsError()) {\n    DCHECK_EQ(in_flight_bytes_, 0u);\n  }\n  VLOG(1) << \"~JournalStreamer\";\n}\n\nvoid JournalStreamer::ConsumeJournalChange(const JournalChangeItem& item) {\n  if (!ShouldWrite(item)) {\n    return;\n  }\n\n  DCHECK_GT(item.journal_item.lsn, last_lsn_writen_);\n  Write(item.journal_item.data);\n  time_t now = time(nullptr);\n  last_lsn_writen_ = item.journal_item.lsn;\n  // TODO: to chain it to the previous Write call.\n  if (config_.should_sent_lsn && now - last_lsn_time_ > 3) {\n    last_lsn_time_ = now;\n    io::StringSink sink;\n    JournalWriter writer(&sink);\n    writer.Write(Entry{journal::Op::LSN, last_lsn_writen_});\n    Write(std::move(sink).str());\n  }\n}\n\nvoid JournalStreamer::Start(util::FiberSocketBase* dest) {\n  CHECK(dest_ == nullptr && dest != nullptr);\n  dest_ = dest;\n  // For partial sync we first catch up from journal replication buffer and only then register.\n  if (config_.start_partial_sync_at == 0) {\n    journal_cb_id_ = journal::RegisterConsumer(this);\n  }\n  StartStalledDataWriterFiber();\n}\n\nbool JournalStreamer::Cancel() {\n  VLOG(1) << \"JournalStreamer::Cancel \" << cntx_->IsCancelled();\n  waker_.notifyAll();\n  bool res = false;\n  if (journal_cb_id_) {\n    auto cb_id = journal_cb_id_;\n    journal_cb_id_ = 0;  // Reset to prevent double unregistration in another fiber\n    journal::UnregisterConsumer(cb_id);\n    res = true;\n  }\n  StopStalledDataWriterFiber();\n  WaitForInflightToComplete(false);\n  return res;\n}\n\nsize_t JournalStreamer::UsedBytes() const {\n  return pending_buf_.Size();\n}\n\nstd::string JournalStreamer::FormatInternalState() const {\n  return absl::StrCat(\n      \"pending_buf_size:\", pending_buf_.Size(), \" in_flight_bytes:\", in_flight_bytes_,\n      \" total_sent:\", total_sent_, \" throttle_count:\", throttle_count_,\n      \" total_throttle_wait_usec:\", total_throttle_wait_usec_,\n      \" throttle_waiters:\", throttle_waiters_, \" last_async_write_time_ms:\", last_async_write_time_,\n      \" last_lsn_time_s:\", last_lsn_time_, \" last_lsn_writen_:\", last_lsn_writen_);\n}\n\nvoid JournalStreamer::Write(std::string str) {\n  DCHECK(!str.empty());\n  DVLOG(3) << \"Writing \" << str.size() << \" bytes\";\n\n  pending_buf_.Push(std::move(str));\n  AsyncWrite(false);\n}\n\nvoid JournalStreamer::StartStalledDataWriterFiber() {\n  if (config_.init_from_stable_sync && !stalled_data_writer_.IsJoinable()) {\n    auto pb = fb2::ProactorBase::me();\n    std::chrono::milliseconds period_us(stalled_writer_base_period_ms);\n    stalled_data_writer_ = MakeFiber([this, index = pb->GetPoolIndex(), period_us]() mutable {\n      ThisFiber::SetName(absl::StrCat(\"fiber_periodic_journal_writer_\", index));\n      this->StalledDataWriterFiber(period_us, &stalled_data_writer_done_);\n    });\n  }\n}\n\nbool JournalStreamer::MaybePartialStreamLSNs() {\n  // Same algorithm as SwitchIncrementalFb. The only difference is that we don't sent\n  // the old LSN\"s via a snapshot but rather as journal changes.\n  if (config_.start_partial_sync_at > 0) {\n    LSN lsn = config_.start_partial_sync_at;\n    DCHECK_LE(lsn, journal::GetLsn()) << \"The replica tried to sync from the future.\";\n\n    LOG(INFO) << \"Starting partial sync from lsn: \" << lsn;\n    // The replica sends the LSN of the next entry is wants to receive.\n    while (cntx_->IsRunning() && journal::IsLSNInBuffer(lsn)) {\n      JournalChangeItem item;\n      item.journal_item.data = journal::GetEntry(lsn);\n      item.journal_item.lsn = lsn;\n      ConsumeJournalChange(item);\n      lsn++;\n    }\n\n    if (!cntx_->IsRunning()) {\n      return false;\n    }\n\n    if (journal::GetLsn() != lsn) {\n      // We stopped but we didn't manage to send the whole stream.\n      cntx_->ReportError(\n          std::make_error_code(errc::state_not_recoverable),\n          absl::StrCat(\"Partial sync was unsuccessful because entry #\", lsn,\n                       \" was dropped from the buffer. Current lsn=\", journal::GetLsn()));\n      return false;\n    }\n\n    // We are done, register back to the journal so we don't miss any changes\n    journal_cb_id_ = journal::RegisterConsumer(this);\n\n    LOG(INFO) << \"Last LSN sent in partial sync was \" << (lsn - 1);\n    // flush pending\n    if (pending_buf_.Size() != 0) {\n      AsyncWrite(true);\n    }\n  }\n  return true;\n}\n\nvoid JournalStreamer::StalledDataWriterFiber(std::chrono::milliseconds period_ms,\n                                             util::fb2::Done* waiter) {\n  if (!MaybePartialStreamLSNs()) {\n    // Either context got cancelled, or partial sync failed because the lsn's stalled.\n    return;\n  }\n\n  while (cntx_->IsRunning()) {\n    if (waiter->WaitFor(period_ms)) {\n      if (!cntx_->IsRunning()) {\n        return;\n      }\n    }\n\n    // We don't want to force async write to replicate if last data\n    // was written recent. Data needs to be stalled for period_ms duration.\n    if (!pending_buf_.Size() || in_flight_bytes_ > 0 ||\n        ((last_async_write_time_ + period_ms.count()) >\n         (fb2::ProactorBase::GetMonotonicTimeNs() / 1000000))) {\n      continue;\n    }\n\n    AsyncWrite(true);\n  }\n}\n\nvoid JournalStreamer::AsyncWrite(bool force_send) {\n  // Stable sync or RestoreStreamer replication can't write data until\n  // previous AsyncWriter finished.\n  if (in_flight_bytes_ > 0) {\n    return;\n  }\n\n  // Writing in stable sync and outside of fiber needs to check\n  // threshold before writing data.\n  if (config_.init_from_stable_sync && !force_send &&\n      pending_buf_.FrontBufSize() < replication_dispatch_threshold) {\n    return;\n  }\n\n  const auto& cur_buf = pending_buf_.PrepareSendingBuf();\n\n  in_flight_bytes_ = cur_buf.mem_size;\n  total_sent_ += in_flight_bytes_;\n  last_async_write_time_ = fb2::ProactorBase::GetMonotonicTimeNs() / 1000000;\n\n  const auto v_size = cur_buf.buf.size();\n  absl::InlinedVector<iovec, 8> v(v_size);\n\n  for (size_t i = 0; i < v_size; ++i) {\n    const auto* uptr = reinterpret_cast<const uint8_t*>(cur_buf.buf[i].data());\n    v[i] = IoVec(io::Bytes(uptr, cur_buf.buf[i].size()));\n  }\n\n  dest_->AsyncWrite(v.data(), v.size(),\n                    [this, len = in_flight_bytes_](std::error_code ec) { OnCompletion(ec, len); });\n}\n\nvoid JournalStreamer::OnCompletion(std::error_code ec, size_t len) {\n  DCHECK_EQ(in_flight_bytes_, len);\n\n  DVLOG(3) << \"Completing \" << in_flight_bytes_;\n  in_flight_bytes_ = 0;\n  pending_buf_.Pop();\n  if (cntx_->IsRunning()) {\n    if (ec) {\n      // Enhanced error logging with socket diagnostics for master disconnects\n      LOG_EVERY_T(INFO, 1) << \"JournalStreamer write error: \" << ec.message()\n                           << \" (code: \" << ec.value() << \", category: \" << ec.category().name()\n                           << \")\";\n\n      LogTcpSocketDiagnostics(dest_);\n\n      cntx_->ReportError(ec);\n    } else if (!pending_buf_.Empty()) {\n      AsyncWrite(false);\n    }\n  }\n\n  // notify ThrottleIfNeeded or WaitForInflightToComplete that waits\n  // for all the completions to finish.\n  // ThrottleIfNeeded can run from multiple fibers in the journal thread.\n  // For example, from Heartbeat calling TriggerJournalWriteToSink to flush potential\n  // expiration deletions and there are other cases as well.\n  waker_.notifyAll();\n}\n\nvoid JournalStreamer::ThrottleIfNeeded() {\n  if (!cntx_->IsRunning() || !IsStalled())\n    return;\n\n  ++throttle_count_;\n  ++throttle_waiters_;\n\n  const auto start = chrono::steady_clock::now();\n  const auto next = start + chrono::milliseconds(absl::GetFlag(FLAGS_replication_timeout));\n  auto log_start = start;\n  size_t inflight_start = in_flight_bytes_;\n  size_t sent_start = total_sent_;\n\n  // Please note that ThrottleIfNeeded is unfair. Specifically with several producers pushing data\n  // to this JournalStreamer, one of them may be stalled and the other will be able to\n  // progress indefinitely. The stalled producer will be woken up only to verify again that the\n  // other one succeeded to push data before it.\n  // We currently do not solve this problem, but at least we will be more verbose about it.\n  std::cv_status status = waker_.await_until(\n      [&] {\n        bool finished = !IsStalled() || !cntx_->IsRunning();\n        if (finished)\n          return finished;\n\n        // Log every second that we are stalled and for how long.\n        auto current = chrono::steady_clock::now();\n        if (current - log_start > 1000ms) {\n          log_start = current;\n          LOG(WARNING) << \"Waiting for \"\n                       << chrono::duration_cast<chrono::milliseconds>(current - start).count()\n                       << \"ms \" << ThisFiber::GetName();\n        }\n\n        return false;\n      },\n      next);\n\n  --throttle_waiters_;\n  total_throttle_wait_usec_ +=\n      chrono::duration_cast<chrono::microseconds>(chrono::steady_clock::now() - start).count();\n  if (status == std::cv_status::timeout) {\n    LOG(WARNING) << \"Stream timed out, inflight bytes/sent start: \" << inflight_start << \"/\"\n                 << sent_start << \", end: \" << in_flight_bytes_ << \"/\" << total_sent_;\n    cntx_->ReportError(\"JournalStreamer write operation timeout\");\n  }\n}\n\nvoid JournalStreamer::WaitForInflightToComplete(bool with_timeout) {\n  const auto start = chrono::steady_clock::now();\n  const auto max_timeout = start + chrono::milliseconds(absl::GetFlag(FLAGS_replication_timeout));\n  while (in_flight_bytes_) {\n    auto next = chrono::steady_clock::now() + 1s;\n    std::cv_status status =\n        waker_.await_until([this] { return this->in_flight_bytes_ == 0; }, next);\n    LOG_IF(WARNING, status == std::cv_status::timeout)\n        << \"Waiting for inflight bytes \" << in_flight_bytes_;\n\n    if (next >= max_timeout) {\n      if (with_timeout) {\n        cntx_->ReportError(\"JournalStreamer write operation timeout\");\n        break;\n      } else {\n        LOG(WARNING) << \"WaitForInflightToComplete timed out with \" << in_flight_bytes_\n                     << \" inflight bytes remaining\";\n      }\n    }\n  }\n}\n\nvoid JournalStreamer::StopStalledDataWriterFiber() {\n  if (config_.init_from_stable_sync && stalled_data_writer_.IsJoinable()) {\n    stalled_data_writer_done_.Notify();\n    if (stalled_data_writer_.IsJoinable()) {\n      stalled_data_writer_.Join();\n    }\n  }\n}\n\nbool JournalStreamer::IsStalled() const {\n  return pending_buf_.Size() >= replication_stream_output_limit_cached;\n}\n\nRestoreStreamer::RestoreStreamer(DbSlice* slice, cluster::SlotSet slots, ExecutionState* cntx)\n    : JournalStreamer(cntx, {}), db_slice_(slice), my_slots_(std::move(slots)) {\n  DCHECK(slice != nullptr);\n  migration_buckets_serialization_threshold_cached =\n      absl::GetFlag(FLAGS_migration_buckets_serialization_threshold);\n  db_array_ = slice->databases();  // Inc ref to make sure DB isn't deleted while we use it\n\n  cmd_serializer_ = std::make_unique<CmdSerializer>(\n      db_slice_,\n      [&](std::string s) {\n        Write(std::move(s));\n        ThrottleIfNeeded();\n      },\n      ServerState::tlocal()->serialization_max_chunk_size);\n}\n\nvoid RestoreStreamer::Start(util::FiberSocketBase* dest) {\n  if (!cntx_->IsRunning())\n    return;\n\n  VLOG(1) << \"RestoreStreamer start\";\n  auto db_cb = absl::bind_front(&RestoreStreamer::OnDbChange, this);\n  snapshot_version_ = db_slice_->RegisterOnChange(std::move(db_cb));\n\n  JournalStreamer::Start(dest);\n}\n\nvoid RestoreStreamer::Run() {\n  VLOG(1) << \"RestoreStreamer run\";\n\n  PrimeTable::Cursor cursor;\n  uint64_t last_yield = 0;\n\n  // Explicitly copy table smart pointer to keep reference count up (flushall drops it)\n  boost::intrusive_ptr<DbTable> table = db_array_.front();\n  PrimeTable* pt = &table->prime;\n\n  do {\n    if (!cntx_->IsRunning())\n      return;\n\n    // If someone else throtles due to huge pending_buf_, give it priority.\n    // Apparently, continue goes through the loop by checking the condition below, so we check\n    // cursor here as well.\n    // In addition if bucket writing was too intensive on CPU and we are overloaded.\n    // Note that we account for CPU time from OnDbChange and here as well (inside WriteBucket).\n    // But we only throttle here, so if we migrated lots of slots during mutations, we\n    // won't progress here but if we have not, then this fiber will progress withing the\n    // CPU budget we defined for it.\n    bool should_stall =\n        throttle_waiters_ > 0 ||\n        (pending_buf_.Size() >= replication_stream_output_limit_cached / 3) ||\n        cpu_aggregator_.IsOverloaded(absl::GetFlag(FLAGS_migration_buckets_cpu_budget));\n    if (cursor && should_stall) {\n      ThisFiber::SleepFor(300us);\n\n      // We have a design bug in RealTimeAggregator that resets it measurements only when\n      // the next sample is taken. So we add this sample to ensure cpu_aggregator_\n      // refreshes its state.\n      base::CpuTimeGuard guard(&cpu_aggregator_);\n      stats_.iter_skips++;\n      continue;\n    }\n\n    cursor = pt->TraverseBuckets(cursor, [&](PrimeTable::bucket_iterator it) {\n      if (!cntx_->IsRunning())  // Could be cancelled any time as Traverse may preempt\n        return;\n\n      db_slice_->FlushChangeToEarlierCallbacks(0 /*db_id always 0 for cluster*/,\n                                               DbSlice::Iterator::FromPrime(it), snapshot_version_);\n\n      if (!cntx_->IsRunning())  // Could have been cancelled in above call too\n        return;\n\n      // Do not progress if we are stalled.\n      ThrottleIfNeeded();\n\n      std::lock_guard guard(big_value_mu_);\n\n      {\n        // Locking this never preempts. See snapshot.cc for why we need it.\n        auto* blocking_counter = db_slice_->GetLatch();\n        lock_guard blocking_counter_guard(*blocking_counter);\n\n        stats_.buckets_loop += WriteBucket(it, false);\n      }\n\n      // We could have delayed entries that are watiting so we want to flush them\n      cmd_serializer_->SerializeDelayedEntries(false, nullptr);\n    });\n\n    // TODO: FLAGS_migration_buckets_cpu_budget should eventually be a single configurable\n    // setting that controls how agressive we are with migration pace.\n    // Once we gain confidence with FLAGS_migration_buckets_cpu_budget we should retire\n    // migration_buckets_serialization_threshold and migration_buckets_sleep_usec.\n    if (++last_yield >= migration_buckets_serialization_threshold_cached) {\n      ThisFiber::SleepFor(chrono::microseconds(migration_buckets_sleep_usec_cached));\n      last_yield = 0;\n    }\n  } while (cursor);\n\n  // Force serialize of all delayed entries.\n  {\n    std::lock_guard guard(big_value_mu_);\n    cmd_serializer_->SerializeDelayedEntries(true, nullptr);\n  }\n\n  VLOG(1) << \"RestoreStreamer finished loop of \" << my_slots_.ToSlotRanges().ToString()\n          << \", shard \" << db_slice_->shard_id() << \". Buckets looped \" << stats_.buckets_loop;\n}\n\nvoid RestoreStreamer::SendFinalize(long attempt) {\n  VLOG(1) << \"RestoreStreamer LSN of \" << my_slots_.ToSlotRanges().ToString() << \", shard \"\n          << db_slice_->shard_id() << \" attempt \" << attempt << \" with \" << stats_.commands\n          << \" commands. Buckets looped \" << stats_.buckets_loop << \", buckets on_db_update \"\n          << stats_.buckets_on_db_update << \", buckets skipped \" << stats_.buckets_skipped\n          << \", buckets written \" << stats_.buckets_written << \". Keys skipped \"\n          << stats_.keys_skipped << \", keys written \" << stats_.keys_written\n          << \" throttle count: \" << throttle_count_\n          << \", throttle on db update: \" << stats_.throttle_on_db_update\n          << \", throttle usec on db update: \" << stats_.throttle_usec_on_db_update\n          << \", iter_skips: \" << stats_.iter_skips;\n\n  // Drain all pending journal data before sending the finalize marker.\n  // At this point client pause is active, so no new entries can arrive.\n  WaitForInflightToComplete(true);\n\n  journal::Entry entry(journal::Op::LSN, attempt);\n\n  io::StringSink sink;\n  JournalWriter writer{&sink};\n  writer.Write(entry);\n  Write(std::move(sink).str());\n\n  // DFLYMIGRATE ACK command has a timeout so we want to send it only when LSN is ready to be sent\n  ThrottleIfNeeded();\n}\n\nRestoreStreamer::~RestoreStreamer() {\n}\n\nbool RestoreStreamer::Cancel() {\n  auto sver = snapshot_version_;\n  snapshot_version_ = 0;  // to prevent double cancel in another fiber\n  cntx_->Cancel();\n  if (sver != 0) {\n    db_slice_->UnregisterOnChange(sver);\n  }\n  bool res = JournalStreamer::Cancel();\n  LOG_IF(WARNING, res != (sver != 0)) << \"Journal and DBSlice unregister state mismatch in \"\n                                         \"RestoreStreamer Cancel. DBSlice unregister state: \"\n                                      << (sver != 0) << \", Journal unregister state: \" << res;\n  return res && (sver != 0);\n}\n\nbool RestoreStreamer::ShouldWrite(const journal::JournalChangeItem& item) const {\n  if (item.cmd == \"FLUSHALL\" || item.cmd == \"FLUSHDB\") {\n    // On FLUSH* we restart the migration\n    CHECK(dest_ != nullptr);\n    cntx_->ReportError(\"FLUSH command during migration\");\n    std::ignore = dest_->Shutdown(SHUT_RDWR);\n    return false;\n  }\n\n  if (!item.slot.has_value()) {\n    return false;\n  }\n\n  return ShouldWrite(*item.slot);\n}\n\nbool RestoreStreamer::ShouldWrite(std::string_view key) const {\n  return ShouldWrite(KeySlot(key));\n}\n\nbool RestoreStreamer::ShouldWrite(SlotId slot_id) const {\n  return my_slots_.Contains(slot_id);\n}\n\nbool RestoreStreamer::WriteBucket(PrimeTable::bucket_iterator it, bool on_db_change_cb) {\n  auto& shard_stats = EngineShard::tlocal()->stats();\n  bool written = false;\n  absl::flat_hash_set<string> tiered_keys;\n  string key_buffer;  // we can reuse it\n\n  // Only track tiered keys when needed and flush delayed entries\n  // 1. When we have tiered storage\n  // 2. We're called from a OnDbChange callback\n  //\n  // We need to track all keys in bucket with tiering. Even if they are not set as external. There\n  // is situation when we request externalization of key and key is read - marking it as not\n  // external but not yet flushed. When OnDbChange callback is called we need to flush it and than\n  // write journal changes - so we cannot realy on IsExternal flag and need to track all keys.\n  const bool track_tiered_keys =\n      on_db_change_cb && EngineShard::tlocal()->tiered_storage() != nullptr;\n\n  if (!it.is_done() && it.GetVersion() < snapshot_version_) {\n    base::CpuTimeGuard guard(&cpu_aggregator_);\n    stats_.buckets_written++;\n    it.SetVersion(snapshot_version_);\n    for (it.AdvanceIfNotOccupied(); !it.is_done(); ++it) {\n      const auto& pv = it->second;\n      string_view key = it->first.GetSlice(&key_buffer);\n      if (ShouldWrite(key)) {\n        ++stats_.keys_written;\n        ++shard_stats.total_migrated_keys;\n        uint64_t expire = it->first.GetExpireTime();\n        // Track tiered keys that will need delayed entry flushing\n        if (track_tiered_keys) {\n          tiered_keys.emplace(key);\n        }\n        WriteEntry(key, it->first, pv, expire);\n        written = true;\n      } else {\n        stats_.keys_skipped++;\n      }\n    }\n  } else {\n    // Bucket already serialized, but we may still need to track tiered keys\n    // for force-flushing their delayed entries\n    if (track_tiered_keys) {\n      for (it.AdvanceIfNotOccupied(); !it.is_done(); ++it) {\n        string_view key = it->first.GetSlice(&key_buffer);\n        if (ShouldWrite(key)) {\n          tiered_keys.emplace(key);\n        }\n      }\n    }\n    stats_.buckets_skipped++;\n  }\n\n  // Force serialized entries for keys that are tiered and were updated during migration.\n  // Unfortunately we cannot be selective here and need to flush all delayed entreis that we\n  // collected while traversing bucket.\n  // TODO: change interface so we forcefully flush only single entry.\n  if (tiered_keys.size()) {\n    cmd_serializer_->SerializeDelayedEntries(true, &tiered_keys);\n  }\n\n  // we don't need throttle here, because we throttle after every entry written\n\n  return written;\n}\n\n// Ordering invariant (PIT mode, slot migration):\n//   Same as SliceSnapshot::OnDbChange — for any key K the baseline must be sent before any\n//   journal entry that mutates K. RestoreStreamer always uses PIT mode (snapshot_version_ != 0)\n//   and serializes-before-mutate via CVCUponInsert (inserts) or WriteBucket (updates).\n//   big_value_mu_ prevents interleaving with the traversal fiber's WriteBucket.\nvoid RestoreStreamer::OnDbChange(DbIndex db_index, const DbSlice::ChangeReq& req) {\n  std::lock_guard guard(big_value_mu_);\n  DCHECK_EQ(db_index, 0) << \"Restore migration only allowed in cluster mode in db0\";\n\n  PrimeTable* table = db_slice_->GetTables(0).first;\n  uint64_t throttle_start = throttle_count_;\n  uint64_t throttle_usec_start = total_throttle_wait_usec_;\n  if (const PrimeTable::bucket_iterator* bit = req.update()) {\n    if (snapshot_version_ == 0) {\n      // If snapshot_version_ is 0, it means that Cancel() was called and we shouldn't proceed.\n      return;\n    }\n    stats_.buckets_on_db_update += WriteBucket(*bit, true);\n  } else {\n    string_view key = get<string_view>(req.change);\n    table->CVCUponInsert(snapshot_version_, key, [&](PrimeTable::bucket_iterator it) {\n      if (snapshot_version_ != 0) {  // we need this check because lambda can be called several\n                                     // times and we can preempt in WriteBucket\n        DCHECK_LT(it.GetVersion(), snapshot_version_);\n        stats_.buckets_on_db_update += WriteBucket(it, true);\n      }\n    });\n  }\n  stats_.throttle_on_db_update += throttle_count_ - throttle_start;\n  stats_.throttle_usec_on_db_update += total_throttle_wait_usec_ - throttle_usec_start;\n}\n\nvoid RestoreStreamer::WriteEntry(string_view key, const PrimeKey& pk, const PrimeValue& pv,\n                                 uint64_t expire_ms) {\n  stats_.commands += cmd_serializer_->SerializeEntry(key, pk, pv, expire_ms);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/streamer.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"base/cycle_clock.h\"\n#include \"server/cluster/slot_set.h\"\n#include \"server/common_types.h\"\n#include \"server/execution_state.h\"\n#include \"server/journal/journal.h\"\n#include \"server/journal/pending_buf.h\"\n#include \"server/synchronization.h\"\n#include \"util/fiber_socket_base.h\"\n\nnamespace dfly {\n\n// Buffered single-shard journal streamer that listens for journal changes with a\n// journal listener and writes them to a destination sink in a separate fiber.\nclass JournalStreamer : public journal::JournalConsumerInterface {\n public:\n  struct Config {\n    bool should_sent_lsn = false;\n    bool init_from_stable_sync = false;\n    LSN start_partial_sync_at = 0;\n  };\n\n  JournalStreamer(ExecutionState* cntx, Config config);\n\n  virtual ~JournalStreamer();\n\n  // Self referential.\n  JournalStreamer(const JournalStreamer& other) = delete;\n  JournalStreamer(JournalStreamer&& other) = delete;\n\n  // Register journal listener and start writer in fiber.\n  virtual void Start(util::FiberSocketBase* dest);\n\n  void ConsumeJournalChange(const journal::JournalChangeItem& item);\n\n  // Must be called on context cancellation for unblocking\n  // and manual cleanup. If it unregistered a listener, returns true.\n  virtual bool Cancel();\n\n  size_t UsedBytes() const;\n\n  // For debugging purposes. Return string with formatted internal state.\n  std::string FormatInternalState() const;\n\n protected:\n  // TODO: we copy the string on each write because JournalItem may be passed to multiple\n  // streamers so we can not move it. However, if we would either wrap JournalItem in shared_ptr\n  // or wrap JournalItem::data in shared_ptr, we can avoid the cost of copying strings.\n  // Also, for small strings it's more peformant to copy to the intermediate buffer than\n  // to issue an io operation.\n  void Write(std::string str);\n\n  // Blocks the if the consumer if not keeping up.\n  void ThrottleIfNeeded() final;\n\n  virtual bool ShouldWrite(const journal::JournalChangeItem& item) const {\n    return cntx_->IsRunning();\n  }\n\n  void WaitForInflightToComplete(bool with_timeout);\n\n  size_t inflight_bytes() const {\n    return in_flight_bytes_;\n  }\n\n  util::FiberSocketBase* dest_ = nullptr;\n  ExecutionState* cntx_;\n  uint64_t throttle_count_ = 0;\n  uint64_t total_throttle_wait_usec_ = 0;\n  uint32_t throttle_waiters_ = 0;\n\n  PendingBuf pending_buf_;\n\n private:\n  // Return true if all lsn's from config_.start_partial_sync_at were sent (or if started from 0).\n  // Return false if not all lsn's were sent (stalled) in time. Cancels the context with error.\n  bool MaybePartialStreamLSNs();\n\n  void AsyncWrite(bool force_send);\n  void OnCompletion(std::error_code ec, size_t len);\n\n  bool IsStalled() const;\n\n  util::fb2::Fiber stalled_data_writer_;\n  util::fb2::Done stalled_data_writer_done_;\n  void StartStalledDataWriterFiber();\n  void StopStalledDataWriterFiber();\n  void StalledDataWriterFiber(std::chrono::milliseconds period_ms, util::fb2::Done* waiter);\n\n  const Config config_;\n  // If we are replication in stable sync we can aggregate data before sending\n  size_t in_flight_bytes_ = 0, total_sent_ = 0;\n  // Last time that send data in milliseconds\n  uint64_t last_async_write_time_ = 0;\n  time_t last_lsn_time_ = 0;\n  LSN last_lsn_writen_ = 0;\n  util::fb2::EventCount waker_;\n  uint32_t journal_cb_id_{0};\n};\n\nclass CmdSerializer;\n\n// Serializes existing DB as RESTORE commands, and sends updates as regular commands.\n// Only handles relevant slots, while ignoring all others.\nclass RestoreStreamer : public JournalStreamer {\n public:\n  RestoreStreamer(DbSlice* slice, cluster::SlotSet slots, ExecutionState* cntx);\n  ~RestoreStreamer() override;\n\n  void Start(util::FiberSocketBase* dest) override;\n\n  void Run();\n\n  // Cancel() must be called if Start() is called\n  bool Cancel() override;\n\n  void SendFinalize(long attempt);\n\n private:\n  void OnDbChange(DbIndex db_index, const ChangeReq& req);\n  bool ShouldWrite(const journal::JournalChangeItem& item) const override;\n  bool ShouldWrite(std::string_view key) const;\n  bool ShouldWrite(SlotId slot_id) const;\n\n  // Returns true if any entry was actually written\n  bool WriteBucket(PrimeTable::bucket_iterator it, bool on_db_change);\n\n  void WriteEntry(std::string_view key, const PrimeKey& pk, const PrimeValue& pv,\n                  uint64_t expire_ms);\n\n  struct Stats {\n    uint64_t buckets_skipped = 0;\n    uint64_t buckets_written = 0;\n    uint64_t buckets_loop = 0;\n    uint64_t buckets_on_db_update = 0;\n    uint64_t throttle_on_db_update = 0;\n    uint64_t throttle_usec_on_db_update = 0;\n    uint64_t keys_written = 0;\n    uint64_t keys_skipped = 0;\n    uint64_t commands = 0;\n    uint64_t iter_skips = 0;\n  };\n\n  DbSlice* db_slice_;\n  DbTableArray db_array_;\n  uint64_t snapshot_version_ = 0;\n  cluster::SlotSet my_slots_;\n\n  std::unique_ptr<CmdSerializer> cmd_serializer_;\n\n  ThreadLocalMutex big_value_mu_;\n  Stats stats_;\n  base::RealTimeAggregator cpu_aggregator_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/tx_executor.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"tx_executor.h\"\n\n#include <absl/strings/match.h>\n\n#include \"base/logging.h\"\n#include \"server/execution_state.h\"\n#include \"server/journal/serializer.h\"\n\nusing namespace std;\nusing namespace facade;\n\nnamespace dfly {\n\nbool MultiShardExecution::InsertTxToSharedMap(TxId txid, uint32_t shard_cnt) {\n  std::unique_lock lk(map_mu);\n  auto [it, was_insert] = tx_sync_execution.emplace(txid, shard_cnt);\n  lk.unlock();\n\n  VLOG(2) << \"txid: \" << txid << \" unique_shard_cnt_: \" << shard_cnt\n          << \" was_insert: \" << was_insert;\n  it->second.block->Dec();\n\n  return was_insert;\n}\n\nMultiShardExecution::TxExecutionSync& MultiShardExecution::Find(TxId txid) {\n  std::lock_guard lk(map_mu);\n  VLOG(2) << \"Execute txid: \" << txid;\n  auto it = tx_sync_execution.find(txid);\n  DCHECK(it != tx_sync_execution.end());\n  return it->second;\n}\n\nvoid MultiShardExecution::Erase(TxId txid) {\n  std::lock_guard lg{map_mu};\n  tx_sync_execution.erase(txid);\n}\n\nvoid MultiShardExecution::CancelAllBlockingEntities() {\n  lock_guard lk{map_mu};\n  for (auto& tx_data : tx_sync_execution) {\n    tx_data.second.barrier.Cancel();\n    tx_data.second.block->Cancel();\n  }\n}\n\nvoid TransactionData::AddEntry(journal::ParsedEntry&& entry) {\n  opcode = entry.opcode;\n\n  switch (entry.opcode) {\n    case journal::Op::LSN:\n      lsn = entry.lsn;\n      return;\n    case journal::Op::PING:\n      return;\n    case journal::Op::EXPIRED:\n    case journal::Op::COMMAND:\n      command = std::move(entry.cmd);\n      dbid = entry.dbid;\n      txid = entry.txid;\n      return;\n    default:\n      DCHECK(false) << \"Unsupported opcode\";\n  }\n}\n\nbool TransactionData::IsGlobalCmd() const {\n  if (command.empty()) {\n    return false;\n  }\n\n  string_view front = command.Front();\n\n  if (absl::EqualsIgnoreCase(front, \"FLUSHDB\"sv) || absl::EqualsIgnoreCase(front, \"FLUSHALL\"sv))\n    return true;\n\n  if (command.size() > 1 && absl::EqualsIgnoreCase(front, \"DFLYCLUSTER\"sv) &&\n      absl::EqualsIgnoreCase(command[1], \"FLUSHSLOTS\"sv)) {\n    return true;\n  }\n\n  return false;\n}\n\nbool TransactionReader::NextTxData(JournalReader* reader, ExecutionState* cntx,\n                                   TransactionData* dest) {\n  if (!cntx->IsRunning()) {\n    return false;\n  }\n  journal::ParsedEntry entry;\n  if (auto ec = reader->ReadEntry(&entry); ec) {\n    cntx->ReportError(ec);\n    return false;\n  }\n\n  // When LSN opcode is sent master does not increase journal lsn.\n  if (lsn_.has_value() && entry.opcode != journal::Op::LSN) {\n    ++*lsn_;\n    VLOG(2) << \"read lsn: \" << *lsn_;\n  }\n\n  dest->command.clear();\n  dest->AddEntry(std::move(entry));\n\n  if (lsn_.has_value() && dest->opcode == journal::Op::LSN) {\n    DCHECK_NE(dest->lsn, 0u);\n    LOG_IF_EVERY_N(WARNING, dest->lsn != *lsn_, 10000)\n        << \"master lsn:\" << dest->lsn << \" replica lsn\" << *lsn_;\n    DCHECK_EQ(dest->lsn, *lsn_);\n  }\n  return true;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/tx_executor.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <unordered_map>\n\n#include \"server/execution_state.h\"\n#include \"server/journal/types.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly {\n\nstruct JournalReader;\n\n// Coordinator for multi shard execution.\nclass MultiShardExecution {\n public:\n  struct TxExecutionSync {\n    util::fb2::Barrier barrier;\n    std::atomic_uint32_t counter;\n    util::fb2::BlockingCounter block;\n\n    explicit TxExecutionSync(uint32_t counter)\n        : barrier(counter), counter(counter), block(counter) {\n    }\n  };\n\n  bool InsertTxToSharedMap(TxId txid, uint32_t shard_cnt);\n  TxExecutionSync& Find(TxId txid);\n  void Erase(TxId txid);\n  void CancelAllBlockingEntities();\n\n private:\n  util::fb2::Mutex map_mu;\n  std::unordered_map<TxId, TxExecutionSync> tx_sync_execution;\n};\n\n// This class holds the commands of transaction in single shard.\n// Once all commands were received, the transaction can be executed.\nstruct TransactionData {\n  // Update the data from ParsedEntry\n  void AddEntry(journal::ParsedEntry&& entry);\n\n  bool IsGlobalCmd() const;\n\n  TxId txid{0};\n  DbIndex dbid{0};\n  journal::ParsedEntry::CmdData command;\n\n  journal::Op opcode;\n  uint64_t lsn = 0;\n};\n\n// Utility for reading TransactionData from a journal reader.\n// The journal stream can contain interleaved data for multiple multi transactions,\n// expiries and out of order executed transactions that need to be grouped on the replica side.\nstruct TransactionReader {\n  TransactionReader(std::optional<uint64_t> lsn = std::nullopt) : lsn_(lsn) {\n  }\n\n  bool NextTxData(JournalReader* reader, ExecutionState* cntx, TransactionData* dest);\n\n private:\n  std::optional<uint64_t> lsn_ = 0;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/journal/types.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/journal/types.h\"\n\n#include <absl/strings/str_join.h>\n\nnamespace dfly::journal {\n\nusing namespace std;\n\nvoid AppendPrefix(string_view cmd, string* dest) {\n  absl::StrAppend(dest, \", cmd='\");\n  absl::StrAppend(dest, cmd);\n  absl::StrAppend(dest, \"', args=[\");\n}\n\nvoid AppendSuffix(string* dest) {\n  if (dest->back() == ',')\n    dest->pop_back();\n  absl::StrAppend(dest, \"]\");\n}\n\nstring Entry::ToString() const {\n  string rv = absl::StrCat(\"{op=\", opcode, \", dbid=\", dbid);\n\n  if (HasPayload()) {\n    AppendPrefix(payload.cmd, &rv);\n    for (string_view arg : base::it::Wrap(cmn::kToSV, payload.args))\n      absl::StrAppend(&rv, \"'\", cmn::ToSV(arg), \"',\");\n    AppendSuffix(&rv);\n  } else {\n    absl::StrAppend(&rv, \", empty\");\n  }\n\n  rv += \"}\";\n  return rv;\n}\n\nstring ParsedEntry::ToString() const {\n  string rv = absl::StrCat(\"{op=\", opcode, \", dbid=\", dbid, \", cmd='\");\n  for (string_view arg : cmd) {\n    absl::StrAppend(&rv, arg, \" \");\n  }\n  rv.pop_back();\n  rv += \"'}\";\n  return rv;\n}\n\n}  // namespace dfly::journal\n"
  },
  {
    "path": "src/server/journal/types.h",
    "content": "// Copyright 2022, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <optional>\n#include <string>\n#include <variant>\n\n#include \"common/backed_args.h\"\n#include \"server/common_types.h\"\n#include \"server/table.h\"\n\nnamespace dfly {\nnamespace journal {\n\nenum class Op : uint8_t { SELECT = 6, EXPIRED = 9 /* sunset*/, COMMAND = 10, PING = 13, LSN = 15 };\n\nstruct EntryBase {\n  TxId txid;\n  Op opcode;\n  DbIndex dbid;\n  std::optional<SlotId> slot;\n  LSN lsn{0};\n};\n\n// This struct represents a single journal entry.\n// Those are either control instructions or commands.\nstruct Entry : public EntryBase {\n  // Payload represents a non-owning view into a command executed on the shard.\n  struct Payload {\n    std::string_view cmd;\n    std::variant<ShardArgs,  // Shard parts.\n                 ArgSlice>   // Parts of a full command.\n        args;\n\n    Payload() = default;\n\n    Payload(std::string_view c, const ShardArgs& a) : cmd(c), args(a) {\n    }\n    Payload(std::string_view c, ArgSlice a) : cmd(c), args(a) {\n    }\n  };\n\n  Entry(TxId txid, Op opcode, DbIndex dbid, std::optional<SlotId> slot_id, Payload pl)\n      : EntryBase{txid, opcode, dbid, slot_id}, payload{std::move(pl)} {\n  }\n\n  Entry(journal::Op opcode, DbIndex dbid, std::optional<SlotId> slot_id)\n      : EntryBase{0, opcode, dbid, slot_id, 0} {\n  }\n\n  Entry(journal::Op opcode, LSN lsn) : EntryBase{0, opcode, 0, std::nullopt, lsn} {\n  }\n\n  Entry(TxId txid, journal::Op opcode, DbIndex dbid, std::optional<SlotId> slot_id)\n      : EntryBase{txid, opcode, dbid, slot_id, 0} {\n  }\n\n  bool HasPayload() const {\n    return !payload.cmd.empty();\n  }\n\n  std::string ToString() const;\n\n  Payload payload;\n};\n\nstruct ParsedEntry : public EntryBase {\n  using CmdData = cmn::BackedArguments;\n  CmdData cmd;\n\n  ParsedEntry(const ParsedEntry&) = delete;\n  ParsedEntry() = default;\n\n  std::string ToString() const;\n};\n\nstruct JournalItem {\n  LSN lsn;\n  std::string data;\n};\n\nstruct JournalChangeItem {\n  JournalItem journal_item;\n\n  std::string_view cmd;\n  std::optional<SlotId> slot;\n};\n\nstruct JournalConsumerInterface {\n  virtual ~JournalConsumerInterface() = default;\n\n  // Receives a journal change for serializing\n  virtual void ConsumeJournalChange(const JournalChangeItem& item) = 0;\n  // Waits for writing the serialized data\n  virtual void ThrottleIfNeeded() = 0;\n};\n\n}  // namespace journal\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/json_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_join.h>\n#include <absl/strings/str_split.h>\n\n#include <type_traits>\n\n#include \"absl/cleanup/cleanup.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/flatbuffers.h\"\n#include \"core/json/json_object.h\"\n#include \"core/json/path.h\"\n#include \"core/mi_memory_resource.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/op_status.h\"\n#include \"facade/reply_builder.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/command_families.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/db_slice.h\"\n#include \"server/detail/wrapped_json_path.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/execution_state.h\"\n#include \"server/journal/journal.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/sharding.h\"\n#include \"server/tiered_storage.h\"\n#include \"server/transaction.h\"\n\n// clang-format off\n#include <jsoncons_ext/jsonpatch/jsonpatch.hpp>\n#include <jsoncons_ext/jsonpointer/jsonpointer.hpp>\n#include <jsoncons_ext/mergepatch/mergepatch.hpp>\n// clang-format on\n\nABSL_DECLARE_FLAG(bool, jsonpathv2);\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace jsoncons;\nusing facade::CmdArgParser;\nusing facade::kSyntaxErrType;\nusing facade::RedisReplyBuilder;\nusing facade::SinkReplyBuilder;\n\nusing JsonExpression = jsonpath::jsonpath_expression<JsonType>;\nusing CI = CommandId;\n\nnamespace {\n\nstruct JsonAutoUpdaterOptions {\n  bool disable_indexing = false;  // If true, the key will not be removed or added to the indexes\n  bool update_on_delete = false;  // If true, SetJsonSize will be called on destruction\n};\n\n/* Helper class which must be initialized before any mutate operations on json.\n  It will track the memory usage of the json object and update the size in the CompactObj.\n  It also contains indexes updates, post update operations on the iterator. */\nclass JsonAutoUpdater {\n public:\n  JsonAutoUpdater(const OpArgs& op_args, string_view key, DbSlice::ItAndUpdater it,\n                  JsonAutoUpdaterOptions options = {})\n      : op_args_(op_args), key_(key), it_(std::move(it)), options_(options) {\n    if (!options_.disable_indexing) {\n      op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, it.it->second);\n    }\n\n    /* We need to initialize start memory usage after RemoveDoc because internally RemoveDoc has\n    static cache that can allocate/deallocate memory. Because of this, we will\n    overestimate/underestimate memory usage for json object. */\n    start_size_ = GetMemoryUsage();\n  }\n\n  JsonAutoUpdater(const JsonAutoUpdater&) = delete;\n  JsonAutoUpdater& operator=(const JsonAutoUpdater&) = delete;\n\n  JsonAutoUpdater(JsonAutoUpdater&&) = default;\n  JsonAutoUpdater& operator=(JsonAutoUpdater&&) = delete;\n\n  void SetJsonSize() {\n    set_size_was_called_ = true;\n\n    ShrinkJsonIfNeeded();\n\n    const size_t current = GetMemoryUsage();\n    int64_t diff = static_cast<int64_t>(current) - static_cast<int64_t>(start_size_);\n\n    GetPrimeValue().SetJsonSize(diff);\n\n    // Under any flow we must not end up with this special value.\n    DCHECK(GetPrimeValue().MallocUsed() != 0);\n  }\n\n  void AddDocToIndexes() {\n    op_args_.shard->search_indices()->AddDoc(key_, op_args_.db_cntx, &GetPrimeValue());\n  }\n\n  ~JsonAutoUpdater() {\n    if (was_released_) {\n      return;  // Skip all cleanup if iterator was released\n    }\n\n    if (options_.update_on_delete && !set_size_was_called_) {\n      SetJsonSize();\n    } else if (!set_size_was_called_) {\n      LOG(WARNING) << \"JsonAutoUpdater destructor called without SetJsonSize() being called. This \"\n                      \"may lead to memory tracking issues.\";\n    }\n\n    it_.post_updater.Run();\n\n    /* We need to call AddDoc after SetJsonSize because internally AddDoc has static cache that can\n    allocate/deallocate memory. Because of this, we will overestimate/underestimate memory usage for\n    json object. */\n    if (!options_.disable_indexing) {\n      AddDocToIndexes();\n    }\n  }\n\n  PrimeValue& GetPrimeValue() {\n    return it_.it->second;\n  }\n\n  JsonType* GetJson() {\n    return GetPrimeValue().GetJson();\n  }\n\n  const DbSlice::Iterator& GetIterator() const {\n    return it_.it;\n  }\n\n  // Releases ownership of the iterator. After calling this, the destructor becomes a noop.\n  // Used when we need to delete the entry manually (e.g., on error paths for newly created keys).\n  DbSlice::ItAndUpdater Release() {\n    was_released_ = true;\n    return std::move(it_);\n  }\n\n private:\n  size_t GetMemoryUsage() const {\n    return static_cast<MiMemoryResource*>(CompactObj::memory_resource())->used();\n  }\n\n  /* Shrinks the json object to fit its current size.\n     Sometimes after mutating the json object, it may have more capacity than needed.\n     This method will reduce the capacity to fit the current size. */\n  void ShrinkJsonIfNeeded() {\n    auto json = GetJson();\n    if (json->size() * 2 < json->capacity()) {\n      json->shrink_to_fit();\n    }\n  }\n\n  const OpArgs& op_args_;\n  string_view key_;\n  DbSlice::ItAndUpdater it_;\n  JsonAutoUpdaterOptions options_;\n\n  // Used to track the memory usage of the json object\n  size_t start_size_{0};\n  bool set_size_was_called_{false};\n  bool was_released_{false};\n};\n\ntemplate <typename T> using ParseResult = io::Result<T, std::string>;\n\nParseResult<JsonExpression> ParseJsonPathAsExpression(std::string_view path) {\n  std::error_code ec;\n  JsonExpression res = MakeJsonPathExpr(path, ec);\n  if (ec)\n    return nonstd::make_unexpected(kSyntaxErr);\n  return res;\n}\n\nParseResult<WrappedJsonPath> ParseJsonPath(StringOrView path, JsonPathType path_type) {\n  if (absl::GetFlag(FLAGS_jsonpathv2)) {\n    auto path_result = json::ParsePath(path.view());\n    if (!path_result) {\n      VLOG(1) << \"Invalid Json path: \" << path << ' ' << path_result.error();\n      return nonstd::make_unexpected(kSyntaxErr);\n    }\n    return WrappedJsonPath{std::move(path_result).value(), std::move(path), path_type};\n  }\n\n  auto expr_result = ParseJsonPathAsExpression(path.view());\n  if (!expr_result) {\n    VLOG(1) << \"Invalid Json path: \" << path << ' ' << expr_result.error();\n    return nonstd::make_unexpected(kSyntaxErr);\n  }\n  return WrappedJsonPath{std::move(expr_result).value(), std::move(path), path_type};\n}\n\nParseResult<WrappedJsonPath> ParseJsonPathV1(std::string_view path) {\n  if (path.empty() || path == WrappedJsonPath::kV1PathRootElement) {\n    return ParseJsonPath(StringOrView::FromView(WrappedJsonPath::kV2PathRootElement),\n                         JsonPathType::kLegacy);\n  }\n\n  std::string v2_path = absl::StrCat(\n      WrappedJsonPath::kV2PathRootElement, path.front() != '.' && path.front() != '[' ? \".\" : \"\",\n      path);  // Convert to V2 path; TODO(path.front() != all kinds of symbols)\n  return ParseJsonPath(StringOrView::FromString(std::move(v2_path)), JsonPathType::kLegacy);\n}\n\nParseResult<WrappedJsonPath> ParseJsonPathV2(std::string_view path) {\n  return ParseJsonPath(StringOrView::FromView(path), JsonPathType::kV2);\n}\n\nbool IsJsonPathV2(std::string_view path) {\n  return !path.empty() && path.front() == '$';\n}\n\nParseResult<WrappedJsonPath> ParseJsonPath(std::string_view path) {\n  return IsJsonPathV2(path) ? ParseJsonPathV2(path) : ParseJsonPathV1(path);\n}\n\nnamespace reply_generic {\n\ntemplate <typename I> void Send(I begin, I end, CommandContext* cmd_cntx);\n\ninline RedisReplyBuilder* RB(CommandContext* cmd_cntx) {\n  return static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n}\n\nvoid Send(bool value, CommandContext* cmd_cntx) {\n  RB(cmd_cntx)->SendBulkString(value ? \"true\"sv : \"false\"sv);\n}\n\nvoid Send(long value, CommandContext* cmd_cntx) {\n  RB(cmd_cntx)->SendLong(value);\n}\n\nvoid Send(size_t value, CommandContext* cmd_cntx) {\n  RB(cmd_cntx)->SendLong(value);\n}\n\nvoid Send(double value, CommandContext* cmd_cntx) {\n  RB(cmd_cntx)->SendDouble(value);\n}\n\nvoid Send(const std::string& value, CommandContext* cmd_cntx) {\n  RB(cmd_cntx)->SendBulkString(value);\n}\n\nvoid Send(const std::vector<std::string>& vec, CommandContext* cmd_cntx) {\n  Send(vec.begin(), vec.end(), cmd_cntx);\n}\n\ntemplate <typename Allocator>\nvoid Send(const JsonWithAllocator<Allocator>& value, CommandContext* cmd_cntx) {\n  auto* rb = RB(cmd_cntx);\n  if (value.is_double()) {\n    Send(value.as_double(), cmd_cntx);\n  } else if (value.is_number()) {\n    Send(value.template as_integer<long>(), cmd_cntx);\n  } else if (value.is_bool()) {\n    rb->SendSimpleString(value.as_bool() ? \"true\" : \"false\");\n  } else if (value.is_null()) {\n    rb->SendNull();\n  } else if (value.is_string()) {\n    rb->SendBulkString(value.as_string_view());\n  } else if (value.is_object()) {\n    rb->StartArray(value.size() + 1);\n    rb->SendSimpleString(\"{\");\n    for (const auto& item : value.object_range()) {\n      rb->StartArray(2);\n      rb->SendBulkString(item.key());\n      Send(item.value(), cmd_cntx);\n    }\n  } else if (value.is_array()) {\n    if (rb->IsResp3()) {\n      rb->StartArray(value.size());\n      for (const auto& item : value.array_range()) {\n        Send(item, cmd_cntx);\n      }\n    } else {\n      rb->StartArray(value.size() + 1);\n      rb->SendSimpleString(\"[\");\n      for (const auto& item : value.array_range()) {\n        Send(item, cmd_cntx);\n      }\n    }\n  }\n}\n\ntemplate <typename T> void Send(const std::optional<T>& opt, CommandContext* cmd_cntx) {\n  if (opt.has_value()) {\n    Send(opt.value(), cmd_cntx);\n  } else {\n    RB(cmd_cntx)->SendNull();\n  }\n}\n\ntemplate <typename I> void Send(I begin, I end, CommandContext* cmd_cntx) {\n  RedisReplyBuilder* rb = RB(cmd_cntx);\n  RedisReplyBuilder::ReplyScope scope{rb};\n  if (begin == end) {\n    rb->SendEmptyArray();\n  } else {\n    if constexpr (is_same_v<decltype(*begin), const string>) {\n      rb->SendBulkStrArr(cmn::OwnedArgSlice{begin, end});\n    } else {\n      rb->StartArray(end - begin);\n      for (auto i = begin; i != end; ++i) {\n        Send(*i, cmd_cntx);\n      }\n    }\n  }\n}\n\ntemplate <typename T> void Send(const JsonCallbackResult<T>& result, CommandContext* cmd_cntx) {\n  RedisReplyBuilder* rb = RB(cmd_cntx);\n  if (result.ShouldSendNil())\n    return rb->SendNull();\n  if (result.ShouldSendWrongType())\n    return cmd_cntx->SendError(OpStatus::WRONG_JSON_TYPE);\n\n  if (result.IsV1()) {\n    /* The specified path was restricted (JSON legacy mode), then the result consists only of a\n     * single value */\n    if (rb->IsResp3()) {\n      rb->StartArray(1);\n    }\n    Send(result.AsV1(), cmd_cntx);\n  } else {\n    /* The specified path was enhanced (starts with '$'), then the result is an array of multiple\n     * values */\n    const auto& arr = result.AsV2();\n    if (rb->IsResp3()) {\n      rb->StartArray(arr.size());\n      for (const auto& item : arr) {\n        // For JSON.TYPE (std::string), preserve nested array behavior for compatibility\n        if constexpr (std::is_same_v<T, std::string>) {\n          rb->StartArray(1);\n        }\n        Send(item, cmd_cntx);\n      }\n    } else {\n      Send(arr.begin(), arr.end(), cmd_cntx);\n    }\n  }\n}\n\ntemplate <typename T> void Send(const OpResult<T>& result, CommandContext* cmd_cntx) {\n  if (result) {\n    RedisReplyBuilder::ReplyScope scope{cmd_cntx->rb()};\n    Send(result.value(), cmd_cntx);\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid SendJsonString(const OpResult<string>& result, CommandContext* cmd_cntx) {\n  if (result) {\n    RedisReplyBuilder::ReplyScope scope{cmd_cntx->rb()};\n    RedisReplyBuilder* rb = RB(cmd_cntx);\n    const string& json_str = result.value();\n    if (rb->IsResp3()) {\n      if (const std::optional<TmpJson> parsed_json = JsonFromString(json_str)) {\n        Send(parsed_json.value(), cmd_cntx);\n        return;\n      }\n    }\n    Send(json_str, cmd_cntx);\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\n}  // namespace reply_generic\n\nusing OptSize = optional<size_t>;\nusing SavingOrder = CallbackResultOptions::SavingOrder;\nusing OnEmpty = CallbackResultOptions::OnEmpty;\n\nstruct JsonGetParams {\n  std::optional<std::string> indent;\n  std::optional<std::string> new_line;\n  std::optional<std::string> space;\n  bool no_escape = false;  // Flag for NOESCAPE option\n  std::vector<std::pair<std::string_view, WrappedJsonPath>> paths;\n};\n\nstd::optional<JsonGetParams> ParseJsonGetParams(CmdArgParser* parser, SinkReplyBuilder* builder) {\n  JsonGetParams parsed_args;\n  while (parser->HasNext()) {\n    if (parser->Check(\"NOESCAPE\")) {\n      parsed_args.no_escape = true;\n    } else if (parser->Check(\"SPACE\")) {\n      parsed_args.space = parser->Next();\n    } else if (parser->Check(\"NEWLINE\")) {\n      parsed_args.new_line = parser->Next();\n    } else if (parser->Check(\"INDENT\")) {\n      parsed_args.indent = parser->Next();\n    } else {\n      std::string_view path_str = parser->Next();\n\n      auto json_path = ParseJsonPath(path_str);\n      if (!json_path) {\n        builder->SendError(json_path.error());\n        return std::nullopt;\n      }\n\n      parsed_args.paths.emplace_back(path_str, std::move(json_path).value());\n    }\n  }\n  return parsed_args;\n}\n\n// This method makes a comparison of json considering their types\n// For example, 3 != 3.0 because json_type::int64_value != json_type::double_value\nbool JsonAreEquals(const JsonType& lhs, const JsonType& rhs) {\n  if (lhs.type() != rhs.type()) {\n    return false;\n  }\n  switch (lhs.type()) {\n    case json_type::array_value: {\n      if (lhs.size() != rhs.size()) {\n        return false;\n      }\n\n      auto rhs_array = rhs.array_range();\n      for (auto l_it = lhs.array_range().begin(), r_it = rhs_array.begin(); r_it != rhs_array.end();\n           ++r_it, ++l_it) {\n        if (!JsonAreEquals(*l_it, *r_it)) {\n          return false;\n        }\n      }\n      return true;\n    }\n\n    case json_type::object_value: {\n      if (lhs.size() != rhs.size()) {\n        return false;\n      }\n      return std::all_of(\n          lhs.object_range().begin(), lhs.object_range().end(), [&](const auto& l_it) {\n            auto r_it = rhs.find(l_it.key());\n            return r_it != rhs.object_range().end() && JsonAreEquals(l_it.value(), r_it->value());\n          });\n    }\n\n    default:\n      return lhs == rhs;\n  }\n}\n\n/* Converts a JSONPath to a JSONPointer.\n   E.g. $[a][b][0] -> /a/b/0.\n   V1 JSONPath is not supported. */\nstd::optional<std::string> ConvertJsonPathToJsonPointer(string_view json_path) {\n  auto parsed_path = json::ParsePath(json_path);\n\n  if (!parsed_path) {\n    VLOG(2) << \"Error during conversion of JSONPath to JSONPointer: \" << json_path\n            << \". Invalid JSONPath.\";\n    return std::nullopt;\n  }\n\n  std::string pointer;\n  const auto& path = parsed_path.value();\n  for (const auto& node : path) {\n    const auto& type = node.type();\n    if (type == json::SegmentType::IDENTIFIER) {\n      absl::StrAppend(&pointer, \"/\"sv, node.identifier());\n    } else if (type == json::SegmentType::INDEX) {\n      const auto& index = node.index();\n\n      if (index.first != index.second) {\n        VLOG(2) << \"Error during conversion of JSONPath to JSONPointer: \" << json_path\n                << \". Index range is not supported.\";\n        return std::nullopt;\n      }\n\n      absl::StrAppend(&pointer, \"/\"sv, node.index().first);\n    } else {\n      VLOG(2) << \"Error during conversion of JSONPath to JSONPointer: \" << json_path\n              << \". Unsupported segment type.\";\n      return std::nullopt;\n    }\n  }\n\n  return pointer;\n}\n\n/* Use this method on the shard thread\n\n   If you do memory tracking, make sure to initialize it before calling this method, and reset the\n   result before invoking SetJsonSize. Note that even after calling std::move on an optional, it may\n   still hold the JSON value, which can lead to incorrect memory tracking. */\nstd::optional<JsonType> ShardJsonFromString(std::string_view input) {\n  return ParseJsonUsingShardHeap(input);\n}\n\nOpStatus SetFullJson(const OpArgs& op_args, string_view key, string_view json_str) {\n  // We check the type of the object later, because we allow here OBJ_JSON and OBJ_STRING\n  auto it_res = op_args.GetDbSlice().AddOrFind(op_args.db_cntx, key, std::nullopt);\n  RETURN_ON_BAD_STATUS(it_res);\n\n  auto type = it_res->it->second.ObjType();\n  if (type == OBJ_JSON) {\n    // If it json we need to remove the old json object from the indexes\n    op_args.shard->search_indices()->RemoveDoc(key, op_args.db_cntx, it_res->it->second);\n  } else if (type != OBJ_STRING) {\n    // The object is not a JSON object and not a string, so we cannot set a full JSON value\n    return OpStatus::WRONG_TYPE;\n  }\n\n  const bool is_new_key = it_res->is_new;\n\n  // AddOrFind for Add case has type == OBJ_STRING.\n  // We either added a new key (is_new_key is true) or found a pre-existing (string).\n  // For both cases we must reset the object before we set up the JsonAutoUpdater.\n  // *note* that ShardJsonFromString is called twice. This *parses and allocates* the\n  // same JSON object twice and might impact performance of large json strings.\n  if (type != OBJ_JSON) {\n    if (!ShardJsonFromString(json_str)) {\n      if (is_new_key) {\n        // Delete the key if it was created during this operation to avoid\n        // an orphan (leftover empty key).\n        auto& db_slice = op_args.GetDbSlice();\n        db_slice.DelMutable(op_args.db_cntx, std::move(*it_res));\n      }\n      VLOG(1) << \"got invalid JSON string '\" << json_str << \"' cannot be saved\";\n      return OpStatus::INVALID_JSON;\n    }\n    it_res->it->second.Reset();\n  }\n\n  JsonAutoUpdater updater(op_args, key, *std::move(it_res),\n                          {.disable_indexing = true, .update_on_delete = false});\n\n  {\n    std::optional<JsonType> parsed_json = ShardJsonFromString(json_str);\n    if (!parsed_json) {\n      VLOG(1) << \"got invalid JSON string '\" << json_str << \"' cannot be saved\";\n      if (type == OBJ_JSON) {\n        // We need to add the document to the indexes, because we removed it before\n        op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, &updater.GetPrimeValue());\n      }\n      if (is_new_key) {\n        auto& db_slice = op_args.GetDbSlice();\n        db_slice.DelMutable(op_args.db_cntx, updater.Release());\n      }\n      return OpStatus::INVALID_JSON;\n    }\n\n    op_args.GetDbSlice().RemoveExpire(op_args.db_cntx.db_index, updater.GetIterator());\n\n    if (JsonEnconding() == kEncodingJsonFlat) {\n      flexbuffers::Builder fbb;\n      json::FromJsonType(*parsed_json, &fbb);\n      fbb.Finish();\n      const auto& buf = fbb.GetBuffer();\n      updater.GetPrimeValue().SetJson(buf.data(), buf.size());\n    } else {\n      updater.GetPrimeValue().SetJson(std::move(*parsed_json));\n    }\n\n    // We should reset parsed_json before setting the size of the json, because\n    // std::optional still holds the value and it will be deallocated\n  }\n  updater.SetJsonSize();\n\n  // We need to manually run add document here\n  op_args.shard->search_indices()->AddDoc(key, op_args.db_cntx, &updater.GetPrimeValue());\n\n  return OpStatus::OK;\n}\n\n/* Sets a partial JSON value at the specified path.\n   True means that the value was set, false means that the value was not set. */\nOpResult<bool> SetPartialJson(const OpArgs& op_args, string_view key,\n                              const WrappedJsonPath& json_path, string_view json_str,\n                              bool is_nx_condition, bool is_xx_condition) {\n  auto it_res = op_args.GetDbSlice().FindMutable(op_args.db_cntx, key, OBJ_JSON);\n  RETURN_ON_BAD_STATUS(it_res);\n\n  JsonAutoUpdater updater(op_args, key, *std::move(it_res));\n\n  /* This method would use copy for parsed_json and not move!\n     The reason being, that we are applying this multiple times for each match we found.\n     So for example if we have an array that this expression will match each entry in it then the\n     assign here is called N times. */\n  std::optional<JsonType> parsed_json = ShardJsonFromString(json_str);\n  if (!parsed_json) {\n    VLOG(1) << \"got invalid JSON string '\" << json_str << \"' cannot be saved\";\n    return OpStatus::INVALID_JSON;\n  }\n\n  bool path_exists = false;\n  bool value_was_set = false;\n\n  // If the path exists, this callback will be called\n  auto mutate_cb = [&](std::optional<std::string_view>, JsonType* val) -> MutateCallbackResult<> {\n    path_exists = true;\n    if (!is_nx_condition) {\n      value_was_set = true;\n      *val = JsonType(parsed_json.value(), StatelessAllocator<char>{});\n    }\n    return {};\n  };\n\n  auto mutate_res = json_path.ExecuteMutateCallback<Nothing>(\n      updater.GetJson(), mutate_cb, CallbackResultOptions::DefaultMutateOptions());\n\n  // Set a new value if the path doesn't exist and the xx condition is not set.\n  if (mutate_res && !path_exists && !is_xx_condition) {\n    auto pointer = ConvertJsonPathToJsonPointer(json_path.Path());\n    if (!pointer) {\n      return OpStatus::SYNTAX_ERR;\n    }\n\n    std::error_code ec;\n    jsoncons::jsonpointer::add(*updater.GetJson(), pointer.value(), std::move(parsed_json).value(),\n                               ec);\n    if (ec) {\n      VLOG(1) << \"Failed to add a JSON value to the following path: \" << json_str\n              << \" with the error: \" << ec.message();\n      return OpStatus::SYNTAX_ERR;\n    }\n\n    value_was_set = true;\n  }\n\n  if (value_was_set) {\n    // We should do reset before setting the size of the json, because\n    // std::optional still holds the value and it will be deallocated\n    parsed_json.reset();\n    updater.SetJsonSize();\n  }\n\n  return value_was_set;\n}\n\nsize_t NormalizeNegativeIndex(int index, size_t size) {\n  if (index >= 0) {\n    return index;\n  }\n\n  if (static_cast<size_t>(-index) > size) {\n    return 0;\n  }\n  return size + index;\n}\n\nauto GetJsonArrayIterator(JsonType* val, size_t index) {\n  return std::next(val->array_range().begin(), static_cast<ptrdiff_t>(index));\n}\n\nauto GetJsonArrayIterator(const JsonType& val, size_t index) {\n  return std::next(val.array_range().begin(), static_cast<ptrdiff_t>(index));\n}\n\nstring JsonTypeToName(const JsonType& val) {\n  using namespace std::string_literals;\n\n  if (val.is_null()) {\n    return \"null\"s;\n  } else if (val.is_bool()) {\n    return \"boolean\"s;\n  } else if (val.is_string()) {\n    return \"string\"s;\n  } else if (val.is_int64() || val.is_uint64()) {\n    return \"integer\"s;\n  } else if (val.is_number()) {\n    return \"number\"s;\n  } else if (val.is_object()) {\n    return \"object\"s;\n  } else if (val.is_array()) {\n    return \"array\"s;\n  }\n\n  return std::string{};\n}\n\n// Returns the index of the next right bracket\nOptSize GetNextIndex(string_view str) {\n  size_t current_idx = 0;\n  while (current_idx + 1 < str.size()) {\n    // ignore escaped character after the backslash (e.g. \\').\n    if (str[current_idx] == '\\\\') {\n      current_idx += 2;\n    } else if (str[current_idx] == '\\'' && str[current_idx + 1] == ']') {\n      return current_idx;\n    } else {\n      current_idx++;\n    }\n  }\n\n  return nullopt;\n}\n\n// Encodes special characters when appending token to JSONPointer\nstruct JsonPointerFormatter {\n  void operator()(std::string* out, string_view token) const {\n    for (size_t i = 0; i < token.size(); i++) {\n      char ch = token[i];\n      if (ch == '~') {\n        out->append(\"~0\");\n      } else if (ch == '/') {\n        out->append(\"~1\");\n      } else if (ch == '\\\\') {\n        // backslash for encoded another character should remove.\n        if (i + 1 < token.size() && token[i + 1] == '\\\\') {\n          out->append(1, '\\\\');\n          i++;\n        }\n      } else {\n        out->append(1, ch);\n      }\n    }\n  }\n};\n\n// Returns the JsonPointer of a JsonPath\n// e.g. $[a][b][0] -> /a/b/0\nstring ConvertToJsonPointer(string_view json_path) {\n  if (json_path.empty() || json_path[0] != '$') {\n    LOG(FATAL) << \"Unexpected JSONPath syntax: \" << json_path;\n  }\n\n  // remove prefix\n  json_path.remove_prefix(1);\n\n  // except the supplied string is compatible with JSONPath syntax.\n  // Each item in the string is a left bracket followed by\n  // numeric or '<key>' and then a right bracket.\n  vector<string_view> parts;\n  bool invalid_syntax = false;\n  while (!json_path.empty()) {\n    bool is_array = false;\n    bool is_object = false;\n\n    // check string size is sufficient enough for at least one item.\n    if (2 >= json_path.size()) {\n      invalid_syntax = true;\n      break;\n    }\n\n    if (json_path[0] == '[') {\n      if (json_path[1] == '\\'') {\n        is_object = true;\n        json_path.remove_prefix(2);\n      } else if (isdigit(json_path[1])) {\n        is_array = true;\n        json_path.remove_prefix(1);\n      } else {\n        invalid_syntax = true;\n        break;\n      }\n    } else {\n      invalid_syntax = true;\n      break;\n    }\n\n    if (is_array) {\n      size_t end_val_idx = json_path.find(']');\n      if (end_val_idx == string::npos) {\n        invalid_syntax = true;\n        break;\n      }\n\n      parts.emplace_back(json_path.substr(0, end_val_idx));\n      json_path.remove_prefix(end_val_idx + 1);\n    } else if (is_object) {\n      OptSize end_val_idx = GetNextIndex(json_path);\n      if (!end_val_idx) {\n        invalid_syntax = true;\n        break;\n      }\n\n      parts.emplace_back(json_path.substr(0, *end_val_idx));\n      json_path.remove_prefix(*end_val_idx + 2);\n    } else {\n      invalid_syntax = true;\n      break;\n    }\n  }\n\n  if (invalid_syntax) {\n    LOG(FATAL) << \"Unexpected JSONPath syntax: \" << json_path;\n  }\n\n  string result{\"/\"};  // initialize with a leading slash\n  result += absl::StrJoin(parts, \"/\", JsonPointerFormatter());\n  return result;\n}\n\nsize_t CountJsonFields(const JsonType& j) {\n  size_t res = 0;\n  json_type type = j.type();\n  if (type == json_type::array_value) {\n    res += j.size();\n    for (const auto& item : j.array_range()) {\n      if (item.type() == json_type::array_value || item.type() == json_type::object_value) {\n        res += CountJsonFields(item);\n      }\n    }\n\n  } else if (type == json_type::object_value) {\n    res += j.size();\n    for (const auto& item : j.object_range()) {\n      if (item.value().type() == json_type::array_value ||\n          item.value().type() == json_type::object_value) {\n        res += CountJsonFields(item.value());\n      }\n    }\n\n  } else {\n    res += 1;\n  }\n\n  return res;\n}\n\nstruct ReadOnlyOperationOptions {\n  bool return_nil_if_key_not_found = false;\n  CallbackResultOptions cb_result_options = CallbackResultOptions::DefaultReadOnlyOptions();\n};\n\ntemplate <typename T>\nOpResult<JsonCallbackResult<T>> JsonReadOnlyOperation(const OpArgs& op_args, std::string_view key,\n                                                      const WrappedJsonPath& json_path,\n                                                      JsonPathReadOnlyCallback<T> cb,\n                                                      ReadOnlyOperationOptions options = {}) {\n  auto it_res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_JSON);\n\n  if (!it_res) {\n    if (options.return_nil_if_key_not_found && it_res == OpStatus::KEY_NOTFOUND) {\n      return JsonCallbackResult<T>{{CallbackResultOptions::OnEmpty::kSendNil,\n                                    options.cb_result_options.saving_order,\n                                    JsonPathType::kLegacy}};  // set legacy mode to return nil\n    }\n    return it_res.status();\n  }\n\n  JsonType* json_val = it_res.value()->second.GetJson();\n  DCHECK(json_val) << \"should have a valid JSON object for key \" << key;\n\n  return json_path.ExecuteReadOnlyCallback<T>(json_val, cb, options.cb_result_options);\n}\n\ntemplate <typename T>\nOpResult<JsonCallbackResult<optional<T>>> JsonMutateOperation(\n    const OpArgs& op_args, std::string_view key, const WrappedJsonPath& json_path,\n    JsonPathMutateCallback<T> cb,\n    CallbackResultOptions cb_result_options = CallbackResultOptions::DefaultMutateOptions()) {\n  auto it_res = op_args.GetDbSlice().FindMutable(op_args.db_cntx, key, OBJ_JSON);\n  RETURN_ON_BAD_STATUS(it_res);\n\n  JsonAutoUpdater updater(op_args, key, *std::move(it_res));\n\n  auto mutate_res = json_path.ExecuteMutateCallback(updater.GetJson(), cb, cb_result_options);\n\n  updater.SetJsonSize();\n\n  return mutate_res;\n}\n\nbool LegacyModeIsEnabled(const std::vector<std::pair<std::string_view, WrappedJsonPath>>& paths) {\n  return std::all_of(paths.begin(), paths.end(),\n                     [](auto& parsed_path) { return parsed_path.second.IsLegacyModePath(); });\n}\n\nOpResult<std::string> OpJsonGet(const OpArgs& op_args, string_view key,\n                                const JsonGetParams& params) {\n  // We don't use OBJ_JSON here because we want to support both JSON and STRING types.\n  // If the key is not OBJ_JSON and not OBJ_STRING, we return WRONG_TYPE.\n  auto it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key).it;\n  if (!IsValid(it))\n    return OpStatus::KEY_NOTFOUND;\n\n  const JsonType* json_ptr = nullptr;\n  JsonType json;\n  if (it->second.ObjType() == OBJ_JSON) {\n    json_ptr = it->second.GetJson();\n  } else if (it->second.ObjType() == OBJ_STRING) {\n    string tmp;\n    it->second.GetString(&tmp);\n    auto parsed_json = ShardJsonFromString(tmp);\n    if (!parsed_json) {\n      return OpStatus::WRONG_TYPE;\n    }\n    json.swap(*parsed_json);\n    json_ptr = &json;\n  } else {\n    return OpStatus::WRONG_TYPE;\n  }\n\n  const auto& paths = params.paths;\n  const JsonType& json_entry = *json_ptr;\n\n  if (paths.empty()) {\n    // this implicitly means that we're using . which\n    // means we just brings all values\n    return json_entry.to_string();\n  }\n\n  json_options options;\n  options.spaces_around_comma(spaces_option::no_spaces)\n      .spaces_around_colon(spaces_option::no_spaces)\n      .object_array_line_splits(line_split_kind::multi_line)\n      .indent_size(0)\n      .new_line_chars(\"\");\n\n  if (params.indent) {\n    options.indent_size(1);\n    options.indent_chars(params.indent.value());\n  }\n\n  if (params.new_line) {\n    options.new_line_chars(params.new_line.value());\n  }\n\n  if (params.space) {\n    options.after_key_chars(params.space.value());\n  }\n\n  auto cb = [](std::string_view, const JsonType& val) { return val; };\n\n  const bool legacy_mode_is_enabled = LegacyModeIsEnabled(paths);\n  CallbackResultOptions cb_options = CallbackResultOptions::DefaultReadOnlyOptions();\n  cb_options.path_type = legacy_mode_is_enabled ? JsonPathType::kLegacy : JsonPathType::kV2;\n\n  auto eval_wrapped = [&](const WrappedJsonPath& json_path) -> std::optional<JsonType> {\n    auto eval_result = json_path.ExecuteReadOnlyCallback<JsonType>(&json_entry, cb, cb_options);\n\n    DCHECK(legacy_mode_is_enabled == eval_result.IsV1());\n\n    if (eval_result.IsV1()) {\n      if (eval_result.Empty())\n        return nullopt;\n      return eval_result.AsV1();\n    }\n\n    return JsonType{eval_result.AsV2()};\n  };\n\n  JsonType out{\n      jsoncons::json_object_arg};  // see https://github.com/danielaparker/jsoncons/issues/482\n  if (paths.size() == 1) {\n    auto eval_result = eval_wrapped(paths[0].second);\n    if (!eval_result) {\n      return OpStatus::INVALID_JSON_PATH;\n    }\n    out = std::move(eval_result).value();  // TODO(Print not existing path to the user)\n  } else {\n    for (const auto& [path_str, path] : paths) {\n      auto eval_result = eval_wrapped(path);\n      if (legacy_mode_is_enabled && !eval_result) {\n        return OpStatus::INVALID_JSON_PATH;\n      }\n      out[path_str] = std::move(eval_result).value();  // TODO(Print not existing path to the user)\n    }\n  }\n\n  jsoncons::json_printable jp(out, options, jsoncons::indenting::indent);\n  std::stringstream ss;\n  jp.dump(ss);\n  return ss.str();\n}\n\nauto OpType(const OpArgs& op_args, string_view key, const WrappedJsonPath& json_path) {\n  auto cb = [](const string_view&, const JsonType& val) -> std::string {\n    return JsonTypeToName(val);\n  };\n  return JsonReadOnlyOperation<std::string>(op_args, key, json_path, std::move(cb), {true});\n}\n\nOpResult<JsonCallbackResult<OptSize>> OpStrLen(const OpArgs& op_args, string_view key,\n                                               const WrappedJsonPath& json_path) {\n  auto cb = [](const string_view&, const JsonType& val) -> OptSize {\n    if (val.is_string()) {\n      return val.as_string_view().size();\n    } else {\n      return nullopt;\n    }\n  };\n  return JsonReadOnlyOperation<OptSize>(\n      op_args, key, json_path, std::move(cb),\n      {json_path.IsLegacyModePath(),\n       CallbackResultOptions::DefaultReadOnlyOptions(SavingOrder::kSaveFirst)});\n}\n\nOpResult<JsonCallbackResult<OptSize>> OpObjLen(const OpArgs& op_args, string_view key,\n                                               const WrappedJsonPath& json_path) {\n  auto cb = [](const string_view&, const JsonType& val) -> optional<size_t> {\n    if (val.is_object()) {\n      return val.size();\n    } else {\n      return nullopt;\n    }\n  };\n  return JsonReadOnlyOperation<OptSize>(\n      op_args, key, json_path, std::move(cb),\n      {json_path.IsLegacyModePath(),\n       CallbackResultOptions::DefaultReadOnlyOptions(SavingOrder::kSaveFirst)});\n}\n\nOpResult<JsonCallbackResult<OptSize>> OpArrLen(const OpArgs& op_args, string_view key,\n                                               const WrappedJsonPath& json_path) {\n  auto cb = [](const string_view&, const JsonType& val) -> OptSize {\n    if (val.is_array()) {\n      return val.size();\n    } else {\n      return std::nullopt;\n    }\n  };\n  return JsonReadOnlyOperation<OptSize>(\n      op_args, key, json_path, std::move(cb),\n      {true, CallbackResultOptions::DefaultReadOnlyOptions(SavingOrder::kSaveFirst)});\n}\n\ntemplate <typename T>\nauto OpToggle(const OpArgs& op_args, string_view key,\n              const WrappedJsonPath& json_path) {  // TODO(change the output type for enhanced path)\n  auto cb = [](std::optional<std::string_view>,\n               JsonType* val) -> MutateCallbackResult<std::optional<T>> {\n    if (val->is_bool()) {\n      bool next_val = val->as_bool() ^ true;\n      *val = next_val;\n      return {false, next_val};\n    }\n    return {};\n  };\n  return JsonMutateOperation<std::optional<T>>(op_args, key, json_path, std::move(cb));\n}\n\ntemplate <typename T>\nauto ExecuteToggle(string_view key, const WrappedJsonPath& json_path, CommandContext* cmd_cntx) {\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpToggle<T>(t->GetOpArgs(shard), key, json_path);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nenum ArithmeticOpType : uint8_t { OP_ADD, OP_MULTIPLY };\n\nvoid BinOpApply(double num, bool num_is_double, ArithmeticOpType op, JsonType* val,\n                bool* overflow) {\n  double result = 0;\n  switch (op) {\n    case OP_ADD:\n      result = val->as<double>() + num;\n      break;\n    case OP_MULTIPLY:\n      result = val->as<double>() * num;\n      break;\n  }\n\n  if (isinf(result)) {\n    *overflow = true;\n    return;\n  }\n\n  if (val->is_double() || num_is_double) {\n    *val = result;\n  } else {\n    *val = static_cast<uint64_t>(result);\n  }\n  *overflow = false;\n}\n\n// Tmp solution with struct CallbackResult, because MutateCallbackResult<std::optional<JsonType>>\n// does not compile\nstruct DoubleArithmeticCallbackResult {\n  explicit DoubleArithmeticCallbackResult(bool legacy_mode_is_enabled_)\n      : legacy_mode_is_enabled(legacy_mode_is_enabled_) {\n    if (!legacy_mode_is_enabled) {\n      json_value.emplace(jsoncons::json_array_arg);\n    }\n  }\n\n  void AddValue(JsonType val) {\n    if (legacy_mode_is_enabled) {\n      json_value = std::move(val);\n    } else {\n      json_value->emplace_back(std::move(val));\n    }\n  }\n\n  void AddEmptyValue() {\n    if (!legacy_mode_is_enabled) {\n      json_value->emplace_back(JsonType::null());\n    }\n  }\n\n  std::optional<JsonType> json_value;\n  bool legacy_mode_is_enabled;\n};\n\nOpResult<string> OpDoubleArithmetic(const OpArgs& op_args, string_view key,\n                                    const WrappedJsonPath& json_path, string_view num,\n                                    ArithmeticOpType op_type) {\n  bool has_fractional_part = num.find('.') != string::npos;\n  double double_value = 0;\n\n  if (!ParseDouble(num, &double_value)) {\n    VLOG(2) << \"Failed to parse number as double: \" << num;\n    return OpStatus::WRONG_TYPE;\n  }\n\n  bool is_result_overflow = false;\n\n  DoubleArithmeticCallbackResult result{json_path.IsLegacyModePath()};\n  auto cb = [&](std::optional<std::string_view>, JsonType* val) -> MutateCallbackResult<> {\n    if (val->is_number()) {\n      bool res = false;\n      BinOpApply(double_value, has_fractional_part, op_type, val, &res);\n      if (res) {\n        is_result_overflow = true;\n      } else {\n        result.AddValue(*val);\n        return {};\n      }\n    }\n    result.AddEmptyValue();\n    return {};\n  };\n\n  auto res = JsonMutateOperation<Nothing>(op_args, key, json_path, std::move(cb));\n\n  if (is_result_overflow)\n    return OpStatus::INVALID_NUMERIC_RESULT;\n\n  RETURN_ON_BAD_STATUS(res);\n\n  if (!result.json_value) {\n    return OpStatus::WRONG_JSON_TYPE;\n  }\n  return result.json_value->as_string();\n}\n\n// Deletes items specified by the expression/path.\nOpResult<long> OpDel(const OpArgs& op_args, string_view key, string_view path,\n                     const WrappedJsonPath& json_path) {\n  if (json_path.RefersToRootElement()) {\n    auto& db_slice = op_args.GetDbSlice();\n    auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_JSON);\n\n    // For JSON.DEL, if key doesn't exist, return 0 instead of error\n    if (res_it.status() == OpStatus::KEY_NOTFOUND) {\n      return 0;\n    }\n\n    RETURN_ON_BAD_STATUS(res_it);\n\n    if (IsValid(res_it->it)) {\n      db_slice.DelMutable(op_args.db_cntx, std::move(*res_it));\n      return 1;\n    }\n    return 0;\n  }\n\n  // FindMutable because we need to run the AutoUpdater at the end which will account\n  // the deltas calculated from the MemoryTracker\n  auto it_res = op_args.GetDbSlice().FindMutable(op_args.db_cntx, key, OBJ_JSON);\n  if (!it_res) {\n    return 0;\n  }\n\n  if (json_path.HoldsJsonPath()) {\n    JsonAutoUpdater updater(op_args, key, *std::move(it_res),\n                            {.disable_indexing = false, .update_on_delete = true});\n    const json::Path& path = json_path.AsJsonPath();\n    long deletions = json::DeletePath(path, updater.GetJson());\n    return deletions;\n  }\n\n  // Allocates memory for the deletion_items.\n  // So we need to initialize JsonAutoUpdater after this callback\n  vector<string> deletion_items;\n  auto cb = [&deletion_items](string_view path, const JsonType& val) -> Nothing {\n    deletion_items.emplace_back(path);\n    return {};\n  };\n\n  auto res = json_path.ExecuteReadOnlyCallback<Nothing>(\n      it_res->it->second.GetJson(), cb, CallbackResultOptions::DefaultReadOnlyOptions());\n  if (deletion_items.empty()) {\n    return 0;\n  }\n\n  long total_deletions = 0;\n  JsonType patch(jsoncons::json_array_arg, {});\n  reverse(deletion_items.begin(), deletion_items.end());  // deletion should finish at root keys.\n  for (const auto& item : deletion_items) {\n    string pointer = ConvertToJsonPointer(item);\n    total_deletions++;\n    JsonType patch_item(jsoncons::json_object_arg, {{\"op\", \"remove\"}, {\"path\", pointer}});\n    patch.emplace_back(patch_item);\n  }\n\n  JsonAutoUpdater updater(op_args, key, *std::move(it_res));\n\n  std::error_code ec;\n  jsoncons::jsonpatch::apply_patch(*updater.GetJson(), patch, ec);\n  if (ec) {\n    VLOG(1) << \"Failed to apply patch on json with error: \" << ec.message();\n    return 0;\n  }\n\n  updater.SetJsonSize();\n\n  return total_deletions;\n}\n\n// Returns a vector of string vectors,\n// keys within the same object are stored in the same string vector.\nauto OpObjKeys(const OpArgs& op_args, string_view key, const WrappedJsonPath& json_path) {\n  auto cb = [](const string_view& path, const JsonType& val) {\n    // Aligned with ElastiCache flavor.\n    DVLOG(2) << \"path: \" << path << \" val: \" << val.to_string();\n\n    StringVec vec;\n    if (val.is_object()) {\n      for (const auto& member : val.object_range()) {\n        vec.emplace_back(member.key());\n      }\n    }\n    return vec;\n  };\n  return JsonReadOnlyOperation<StringVec>(\n      op_args, key, json_path, std::move(cb),\n      {json_path.IsLegacyModePath(),\n       CallbackResultOptions::DefaultReadOnlyOptions(SavingOrder::kSaveFirst)});\n}\n\nOpResult<JsonCallbackResult<OptSize>> OpStrAppend(const OpArgs& op_args, string_view key,\n                                                  const WrappedJsonPath& path, string_view value) {\n  auto cb = [&](optional<string_view>, JsonType* val) -> MutateCallbackResult<size_t> {\n    if (!val->is_string())\n      return {};\n\n    string new_val = absl::StrCat(val->as_string_view(), value);\n    size_t len = new_val.size();\n    *val = std::move(new_val);\n    return {false, len};  // do not delete, new value len\n  };\n  return JsonMutateOperation<size_t>(op_args, key, path, std::move(cb));\n}\n\n// Returns the numbers of values cleared.\n// Clears containers(arrays or objects) and zeroing numbers.\nOpResult<long> OpClear(const OpArgs& op_args, string_view key, const WrappedJsonPath& path) {\n  long clear_items = 0;\n\n  auto cb = [&clear_items](std::optional<std::string_view>,\n                           JsonType* val) -> MutateCallbackResult<> {\n    if (!(val->is_object() || val->is_array() || val->is_number())) {\n      return {};\n    }\n\n    if (val->is_object()) {\n      val->erase(val->object_range().begin(), val->object_range().end());\n    } else if (val->is_array()) {\n      val->erase(val->array_range().begin(), val->array_range().end());\n    } else if (val->is_number()) {\n      *val = 0;\n    }\n\n    clear_items += 1;\n    return {};\n  };\n\n  auto res = JsonMutateOperation<Nothing>(op_args, key, path, std::move(cb));\n  RETURN_ON_BAD_STATUS(res);\n  return clear_items;\n}\n\n// Returns string vector that represents the pop out values.\nauto OpArrPop(const OpArgs& op_args, string_view key, WrappedJsonPath& path, int index) {\n  auto cb = [index](std::optional<std::string_view>,\n                    JsonType* val) -> MutateCallbackResult<std::string> {\n    if (!val->is_array() || val->empty()) {\n      return {};\n    }\n\n    size_t array_size = val->size();\n    size_t removal_index = std::min(NormalizeNegativeIndex(index, array_size), array_size - 1);\n\n    auto it = GetJsonArrayIterator(val, removal_index);\n    string str;\n    error_code ec;\n    it->dump(str, {}, ec);\n    if (ec) {\n      LOG(ERROR) << \"Failed to dump JSON to string with the error: \" << ec.message();\n      return {};\n    }\n\n    val->erase(it);\n    return {false, std::move(str)};\n  };\n  return JsonMutateOperation<std::string>(op_args, key, path, std::move(cb),\n                                          CallbackResultOptions{OnEmpty::kSendNil});\n}\n\n// Returns numeric vector that represents the new length of the array at each path.\nauto OpArrTrim(const OpArgs& op_args, string_view key, const WrappedJsonPath& path, int start_index,\n               int stop_index) {\n  auto cb = [&](optional<string_view>, JsonType* val) -> MutateCallbackResult<size_t> {\n    if (!val->is_array()) {\n      return {};\n    }\n\n    if (val->empty()) {\n      return {false, 0};\n    }\n\n    size_t array_size = val->size();\n\n    size_t trim_start_index = NormalizeNegativeIndex(start_index, array_size);\n    size_t trim_end_index = NormalizeNegativeIndex(stop_index, array_size);\n\n    if (trim_start_index >= array_size || trim_start_index > trim_end_index) {\n      val->erase(val->array_range().begin(), val->array_range().end());\n      return {false, 0};\n    }\n\n    trim_end_index = std::min(trim_end_index, array_size);\n\n    auto trim_start_it = GetJsonArrayIterator(val, trim_start_index);\n    auto trim_end_it = val->array_range().end();\n    if (trim_end_index < val->size()) {\n      trim_end_it = GetJsonArrayIterator(val, trim_end_index + 1);\n    }\n\n    *val = jsoncons::json_array<JsonType>(trim_start_it, trim_end_it);\n    return {false, val->size()};\n  };\n  return JsonMutateOperation<size_t>(op_args, key, path, std::move(cb));\n}\n\n// Returns numeric vector that represents the new length of the array at each path.\nOpResult<JsonCallbackResult<OptSize>> OpArrInsert(const OpArgs& op_args, string_view key,\n                                                  const WrappedJsonPath& json_path, int index,\n                                                  const vector<string_view>& new_values) {\n  vector<JsonType> parsed_values;\n  parsed_values.reserve(new_values.size());\n\n  for (const auto& nv : new_values) {\n    optional<JsonType> v = ShardJsonFromString(nv);\n    if (!v) {\n      return OpStatus::SYNTAX_ERR;\n    }\n\n    parsed_values.emplace_back(std::move(*v));\n  }\n\n  bool out_of_boundaries_encountered = false;\n\n  // Insert user-supplied value into the supplied index that should be valid.\n  // If at least one index isn't valid within an array in the json doc, the operation is discarded.\n  // Negative indexes start from the end of the array.\n  auto cb = [&](std::optional<std::string_view>, JsonType* val) -> MutateCallbackResult<size_t> {\n    if (out_of_boundaries_encountered || !val->is_array()) {\n      return {};\n    }\n\n    size_t array_size = val->size();\n    size_t insert_before_index;\n\n    if (index < 0) {\n      if (static_cast<size_t>(-index) > array_size) {\n        out_of_boundaries_encountered = true;\n        return {};\n      }\n      insert_before_index = array_size + index;\n    } else {\n      if (static_cast<size_t>(index) > val->size()) {\n        out_of_boundaries_encountered = true;\n        return {};\n      }\n      insert_before_index = index;\n    }\n\n    auto it = GetJsonArrayIterator(val, insert_before_index);\n    for (auto& new_val : parsed_values) {\n      it = val->insert(it, new_val);\n      it++;\n    }\n    return {false, val->size()};\n  };\n\n  auto res = JsonMutateOperation<size_t>(op_args, key, json_path, std::move(cb));\n  if (out_of_boundaries_encountered) {\n    return OpStatus::OUT_OF_RANGE;\n  }\n  return res;\n}\n\nOpResult<JsonCallbackResult<optional<optional<unsigned long>>>> OpArrAppend(\n    const OpArgs& op_args, string_view key, const WrappedJsonPath& path,\n    const vector<string_view>& append_values) {\n  vector<JsonType> parsed_values;\n  parsed_values.reserve(append_values.size());\n\n  for (const auto& v : append_values) {\n    optional<JsonType> parsed = ShardJsonFromString(v);\n    if (!parsed) {\n      return OpStatus::SYNTAX_ERR;\n    }\n    parsed_values.emplace_back(std::move(*parsed));\n  }\n\n  auto cb = [&](std::optional<std::string_view>,\n                JsonType* val) -> MutateCallbackResult<std::optional<std::size_t>> {\n    if (!val->is_array()) {\n      return {};\n    }\n    for (auto& new_val : parsed_values) {\n      val->emplace_back(new_val);\n    }\n    return {false, val->size()};\n  };\n  return JsonMutateOperation<std::optional<std::size_t>>(op_args, key, path, std::move(cb));\n}\n\n// Returns a numeric vector representing each JSON value first index of the JSON scalar.\n// An index value of -1 represents unfound in the array.\n// JSON scalar has types of string, boolean, null, and number.\nOpResult<JsonCallbackResult<optional<long>>> OpArrIndex(const OpArgs& op_args, string_view key,\n                                                        const WrappedJsonPath& json_path,\n                                                        string_view search_val, int start_index,\n                                                        int end_index) {\n  const optional<JsonType> search_value_json = ShardJsonFromString(search_val);\n  if (!search_value_json) {\n    return OpStatus::SYNTAX_ERR;\n  }\n\n  auto cb = [&](const string_view&, const JsonType& val) -> std::optional<long> {\n    if (!val.is_array()) {\n      return std::nullopt;\n    }\n\n    if (val.empty()) {\n      return -1;\n    }\n\n    size_t array_size = val.size();\n\n    if (start_index < 0 && static_cast<size_t>(-start_index) > array_size) {\n      return -1;\n    }\n\n    size_t pos_start_index = NormalizeNegativeIndex(start_index, array_size);\n    size_t pos_end_index =\n        end_index == 0 ? array_size : NormalizeNegativeIndex(end_index, array_size);\n\n    if (pos_start_index >= array_size && pos_end_index < array_size) {\n      return -1;\n    }\n\n    pos_start_index = std::min(pos_start_index, array_size - 1);\n    pos_end_index = std::min(pos_end_index, array_size - 1);\n\n    if (pos_start_index > pos_end_index) {\n      return -1;\n    }\n\n    size_t pos = -1;\n    auto it = GetJsonArrayIterator(val, pos_start_index);\n    while (it != val.array_range().end()) {\n      if (JsonAreEquals(search_value_json, *it)) {\n        pos = pos_start_index;\n        break;\n      }\n\n      if (pos_start_index == pos_end_index) {\n        break;\n      }\n\n      ++it;\n      pos_start_index++;\n    }\n\n    return pos;\n  };\n\n  return JsonReadOnlyOperation<std::optional<long>>(\n      op_args, key, json_path, std::move(cb),\n      {false, CallbackResultOptions{CallbackResultOptions::OnEmpty::kSendWrongType}});\n}\n\n// Returns string vector that represents the query result of each supplied key.\nstd::vector<std::optional<std::string>> OpJsonMGet(const WrappedJsonPath& json_path,\n                                                   const Transaction* t, EngineShard* shard) {\n  ShardArgs args = t->GetShardArgs(shard->shard_id());\n  DCHECK(!args.Empty());\n  std::vector<std::optional<std::string>> response(args.Size());\n\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  unsigned index = 0;\n  for (string_view key : args) {\n    auto it_res = db_slice.FindReadOnly(t->GetDbContext(), key, OBJ_JSON);\n    auto& dest = response[index++];\n    if (!it_res.ok())\n      continue;\n\n    JsonType* json_val = it_res.value()->second.GetJson();\n    DCHECK(json_val) << \"should have a valid JSON object for key \" << key;\n\n    auto cb = [](std::string_view, const JsonType& val) { return val; };\n\n    auto eval_wrapped = [&json_val,\n                         &cb](const WrappedJsonPath& json_path) -> std::optional<JsonType> {\n      auto eval_result = json_path.ExecuteReadOnlyCallback<JsonType>(\n          json_val, std::move(cb), CallbackResultOptions::DefaultReadOnlyOptions());\n\n      if (eval_result.IsV1()) {\n        if (eval_result.Empty())\n          return nullopt;\n        return eval_result.AsV1();\n      }\n\n      return JsonType{eval_result.AsV2()};\n    };\n\n    auto eval_result = eval_wrapped(json_path);\n\n    if (!eval_result) {\n      continue;\n    }\n\n    std::string str;\n    std::error_code ec;\n    eval_result->dump(str, {}, ec);\n    if (ec) {\n      VLOG(1) << \"Failed to dump JSON array to string with the error: \" << ec.message();\n    }\n\n    dest = std::move(str);\n  }\n\n  return response;\n}\n\n// Returns numeric vector that represents the number of fields of JSON value at each path.\nauto OpFields(const OpArgs& op_args, string_view key, const WrappedJsonPath& json_path) {\n  auto cb = [](const string_view&, const JsonType& val) -> std::optional<std::size_t> {\n    return CountJsonFields(val);\n  };\n  return JsonReadOnlyOperation<std::optional<std::size_t>>(op_args, key, json_path, std::move(cb));\n}\n\n// Returns numeric vector that represents the memory size in bytes of JSON value at each path.\nauto OpMemory(const OpArgs& op_args, string_view key, const WrappedJsonPath& json_path) {\n  auto cb = [](const string_view&, const JsonType& val) -> std::optional<std::size_t> {\n    return ComputeMemorySize(val);\n  };\n  return JsonReadOnlyOperation<std::optional<std::size_t>>(\n      op_args, key, json_path, std::move(cb),\n      ReadOnlyOperationOptions{false, CallbackResultOptions::DefaultReadOnlyOptions()});\n}\n\n// Returns json vector that represents the result of the json query. A shard local\n// heap allocated JSON cannot be copied and then destroyed on another shard because we use stateless\n// allocators which forward all requests to thread local memory resource. This resource is\n// initialized by the engine shard, and it is possible that the coordinator thread may not have this\n// resource initialized. So the value is first copied to the std allocator-backed type TmpJson.\nOpResult<JsonCallbackResult<TmpJson>> OpResp(const OpArgs& op_args, string_view key,\n                                             const WrappedJsonPath& json_path) {\n  auto cb = [](const string_view&, const JsonType& val) {\n    string s;\n    val.dump(s);\n    return JsonFromString(s);\n  };\n  return JsonReadOnlyOperation<TmpJson>(op_args, key, json_path, std::move(cb));\n}\n\n// Returns boolean that represents the result of the operation.\nOpResult<bool> OpSet(const OpArgs& op_args, string_view key, string_view path,\n                     const WrappedJsonPath& json_path, std::string_view json_str,\n                     bool is_nx_condition, bool is_xx_condition) {\n  // The whole key should be replaced.\n  // NOTE: unlike in Redis, we are overriding the value when the path is \"$\"\n  // this is regardless of the current key type. In redis if the key exists\n  // and its not JSON, it would return an error.\n  if (json_path.RefersToRootElement()) {\n    if (is_nx_condition || is_xx_condition) {\n      auto it_res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_JSON);\n      bool key_exists = (it_res.status() != OpStatus::KEY_NOTFOUND);\n      if (is_nx_condition && key_exists) {\n        return false;\n      }\n\n      if (is_xx_condition && !key_exists) {\n        return false;\n      }\n    }\n\n    OpStatus result = SetFullJson(op_args, key, json_str);\n    if (result == OpStatus::OK) {\n      return true;\n    }\n    return result;\n  }\n\n  return SetPartialJson(op_args, key, json_path, json_str, is_nx_condition, is_xx_condition);\n}\n\nOpResult<bool> OpSet(const OpArgs& op_args, string_view key, string_view path,\n                     std::string_view json_str, bool is_nx_condition, bool is_xx_condition) {\n  auto res_json_path = ParseJsonPath(path);\n  if (!res_json_path) {\n    return OpStatus::SYNTAX_ERR;  // TODO(Return initial error)\n  }\n  return OpSet(op_args, key, path, res_json_path.value(), json_str, is_nx_condition,\n               is_xx_condition);\n}\n\nOpStatus OpMSet(const OpArgs& op_args, const ShardArgs& args) {\n  DCHECK_EQ(args.Size() % 3, 0u);\n\n  OpStatus result = OpStatus::OK;\n  size_t stored = 0;\n  for (auto it = args.begin(); it != args.end();) {\n    string_view key = *(it++);\n    string_view path = *(it++);\n    string_view value = *(it++);\n    if (auto res = OpSet(op_args, key, path, value, false, false); !res.ok()) {\n      result = res.status();\n      break;\n    }\n\n    stored++;\n  }\n\n  // Replicate custom journal, see OpMSet\n  if (auto journal = op_args.shard->journal(); journal) {\n    if (stored * 3 == args.Size()) {\n      RecordJournal(op_args, \"JSON.MSET\", args, op_args.tx->GetUniqueShardCnt());\n      DCHECK_EQ(result, OpStatus::OK);\n      return result;\n    }\n\n    string_view cmd = stored == 0 ? \"PING\" : \"JSON.MSET\";\n    vector<string_view> store_args(args.begin(), args.end());\n    store_args.resize(stored * 3);\n    RecordJournal(op_args, cmd, store_args, op_args.tx->GetUniqueShardCnt());\n  }\n\n  return result;\n}\n\n// Note that currently OpMerge works only with jsoncons and json::Path support has not been\n// implemented yet.\nOpStatus OpMerge(const OpArgs& op_args, string_view key, string_view path,\n                 const WrappedJsonPath& json_path, std::string_view json_str) {\n  auto it_res = op_args.GetDbSlice().FindMutable(op_args.db_cntx, key, OBJ_JSON);\n  OpStatus res_status = it_res.status();\n\n  if (res_status == OpStatus::OK) {\n    JsonAutoUpdater updater(op_args, key, *std::move(it_res));\n\n    std::optional<JsonType> parsed_json = ShardJsonFromString(json_str);\n    if (!parsed_json) {\n      VLOG(1) << \"got invalid JSON string '\" << json_str << \"' cannot be saved\";\n      return OpStatus::INVALID_JSON;\n    }\n\n    auto cb = [&](std::optional<std::string_view> cur_path,\n                  JsonType* val) -> MutateCallbackResult<> {\n      string_view strpath = cur_path ? *cur_path : string_view{};\n      DVLOG(2) << \"Handling \" << strpath << \" \" << val->to_string();\n\n      // https://datatracker.ietf.org/doc/html/rfc7386#section-2\n      try {\n        mergepatch::apply_merge_patch(*val, *parsed_json);\n      } catch (const std::exception& e) {\n        LOG_EVERY_T(ERROR, 1) << \"Exception in OpMerge: \" << e.what() << \" with obj: \" << *val\n                              << \" and patch: \" << *parsed_json << \", path: \" << strpath;\n      }\n\n      return {};\n    };\n\n    auto opts = CallbackResultOptions::DefaultMutateOptions();\n    auto res = json_path.ExecuteMutateCallback<Nothing>(updater.GetJson(), cb, opts);\n    parsed_json.reset();\n    updater.SetJsonSize();\n\n    res_status = res.status();\n  }\n\n  if (res_status != OpStatus::KEY_NOTFOUND)\n    return res_status;\n\n  if (json_path.RefersToRootElement()) {\n    return OpSet(op_args, key, path, json_path, json_str, false, false).status();\n  }\n  return OpStatus::SYNTAX_ERR;\n}\n\nvoid CmdSet(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  auto [key, path, json_str] = parser.Next<string_view, string_view, string_view>();\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto res = parser.TryMapNext(\"NX\", 1, \"XX\", 2);\n  bool is_xx_condition = (res == 2), is_nx_condition = (res == 1);\n\n  if (parser.TakeError() || parser.HasNext())  // also clear the parser error dcheck\n    return builder->SendError(kSyntaxErr);\n\n  auto cb = [&, &key = key, &path = path, &json_str = json_str](Transaction* t,\n                                                                EngineShard* shard) {\n    return OpSet(t->GetOpArgs(shard), key, path, json_path, json_str, is_nx_condition,\n                 is_xx_condition);\n  };\n\n  OpResult<bool> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  if (result) {\n    if (*result) {\n      builder->SendOk();\n    } else {\n      builder->SendNull();\n    }\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\n// JSON.MSET key path value [key path value ...]\nvoid CmdMSet(CmdArgList args, CommandContext* cmd_cntx) {\n  DCHECK_GE(args.size(), 3u);\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (args.size() % 3 != 0) {\n    return builder->SendError(facade::WrongNumArgsError(\"json.mset\"));\n  }\n\n  AggregateStatus status;\n  auto cb = [&status](Transaction* t, EngineShard* shard) {\n    auto op_args = t->GetOpArgs(shard);\n    ShardArgs args = t->GetShardArgs(shard->shard_id());\n    if (auto result = OpMSet(op_args, args); result != OpStatus::OK)\n      status = result;\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->ScheduleSingleHop(cb);\n\n  if (*status != OpStatus::OK)\n    return cmd_cntx->SendError(*status);\n  builder->SendOk();\n}\n\n// JSON.MERGE key path value\n// Based on https://datatracker.ietf.org/doc/html/rfc7386 spec\nvoid CmdMerge(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.Next();\n  string_view value = parser.Next();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpMerge(t->GetOpArgs(shard), key, path, json_path, value);\n  };\n\n  OpStatus status = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  if (status == OpStatus::OK)\n    return builder->SendOk();\n  cmd_cntx->SendError(status);\n}\n\nvoid CmdResp(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.NextOrDefault();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpResp(t->GetOpArgs(shard), key, json_path);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdDebug(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view command = parser.Next();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (absl::EqualsIgnoreCase(command, \"help\")) {\n    builder->StartArray(3);\n    builder->SendBulkString(\n        \"JSON.DEBUG MEMORY <key> [path] - report memory size (bytes) of the JSON element. \"\n        \"Path defaults to root if not provided.\");\n    builder->SendBulkString(\n        \"JSON.DEBUG FIELDS <key> [path] - report number of fields in the JSON element. \"\n        \"Path defaults to root if not provided.\");\n    builder->SendBulkString(\"JSON.DEBUG HELP - print help message.\");\n    return;\n  }\n\n  if (absl::EqualsIgnoreCase(command, \"memory\")) {\n    // JSON.DEBUG MEMORY\n    string_view key = parser.Next();\n    string_view path = parser.NextOrDefault();\n\n    WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n    ShardId sid = Shard(key, shard_set->size());\n    ConnectionContext* cntx = cmd_cntx->server_conn_cntx();\n    auto cb = [&]() {\n      EngineShard* shard = EngineShard::tlocal();\n      DbContext db_cntx{cntx->ns, cntx->conn_state.db_index};\n      OpArgs op_args{shard, nullptr, db_cntx};\n      return OpMemory(op_args, key, json_path);\n    };\n\n    auto result = shard_set->Await(sid, std::move(cb));\n    reply_generic::Send(result, cmd_cntx);\n    return;\n  }\n\n  if (absl::EqualsIgnoreCase(command, \"fields\")) {\n    // JSON.DEBUG FIELDS\n    string_view key = parser.Next();\n    string_view path = parser.NextOrDefault();\n\n    WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n    ShardId sid = Shard(key, shard_set->size());\n    ConnectionContext* cntx = cmd_cntx->server_conn_cntx();\n    auto cb = [&]() {\n      EngineShard* shard = EngineShard::tlocal();\n      DbContext db_cntx{cntx->ns, cntx->conn_state.db_index};\n      OpArgs op_args{shard, nullptr, db_cntx};\n      return OpFields(op_args, key, json_path);\n    };\n\n    auto result = shard_set->Await(sid, std::move(cb));\n    reply_generic::Send(result, cmd_cntx);\n    return;\n  }\n\n  builder->SendError(facade::UnknownSubCmd(command, \"JSON.DEBUG\"), facade::kSyntaxErrType);\n}\n\nvoid CmdMGet(CmdArgList args, CommandContext* cmd_cntx) {\n  DCHECK_GE(args.size(), 1U);\n\n  string_view path = ArgS(args, args.size() - 1);\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  unsigned shard_count = shard_set->size();\n  std::vector<std::vector<std::optional<std::string>>> mget_resp(shard_count);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    ShardId sid = shard->shard_id();\n    mget_resp[sid] = OpJsonMGet(json_path, t, shard);\n    return OpStatus::OK;\n  };\n\n  OpStatus result = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  CHECK_EQ(OpStatus::OK, result);\n\n  std::vector<std::optional<std::string>> results(args.size() - 1);\n  for (ShardId sid = 0; sid < shard_count; ++sid) {\n    if (!cmd_cntx->tx()->IsActive(sid))\n      continue;\n\n    std::vector<std::optional<std::string>>& res = mget_resp[sid];\n    ShardArgs shard_args = cmd_cntx->tx()->GetShardArgs(sid);\n    unsigned src_index = 0;\n    for (auto it = shard_args.begin(); it != shard_args.end(); ++it, ++src_index) {\n      if (!res[src_index])\n        continue;\n\n      uint32_t dst_indx = it.index();\n      results[dst_indx] = std::move(res[src_index]);\n    }\n  }\n\n  reply_generic::Send(results.begin(), results.end(), cmd_cntx);\n}\n\nvoid CmdArrIndex(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.Next();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  string_view search_value = parser.Next();\n\n  int start_index = 0;\n  if (parser.HasNext()) {\n    if (!absl::SimpleAtoi(parser.Next(), &start_index)) {\n      VLOG(1) << \"Failed to convert the start index to numeric\" << ArgS(args, 3);\n      builder->SendError(kInvalidIntErr);\n      return;\n    }\n  }\n\n  int end_index = 0;\n  if (parser.HasNext()) {\n    if (!absl::SimpleAtoi(parser.Next(), &end_index)) {\n      VLOG(1) << \"Failed to convert the stop index to numeric\" << ArgS(args, 4);\n      builder->SendError(kInvalidIntErr);\n      return;\n    }\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpArrIndex(t->GetOpArgs(shard), key, json_path, search_value, start_index, end_index);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdArrInsert(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view path = ArgS(args, 1);\n  int index = -1;\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (!absl::SimpleAtoi(ArgS(args, 2), &index)) {\n    VLOG(1) << \"Failed to convert the following value to numeric: \" << ArgS(args, 2);\n    builder->SendError(kInvalidIntErr);\n    return;\n  }\n\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  vector<string_view> new_values;\n  for (size_t i = 3; i < args.size(); i++) {\n    new_values.emplace_back(ArgS(args, i));\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpArrInsert(t->GetOpArgs(shard), key, json_path, index, new_values);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdArrAppend(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view path = ArgS(args, 1);\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  vector<string_view> append_values;\n  for (size_t i = 2; i < args.size(); ++i) {\n    append_values.emplace_back(ArgS(args, i));\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpArrAppend(t->GetOpArgs(shard), key, json_path, append_values);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdArrTrim(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view path = ArgS(args, 1);\n  int start_index;\n  int stop_index;\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (!absl::SimpleAtoi(ArgS(args, 2), &start_index)) {\n    VLOG(1) << \"Failed to parse array start index\";\n    builder->SendError(kInvalidIntErr);\n    return;\n  }\n\n  if (!absl::SimpleAtoi(ArgS(args, 3), &stop_index)) {\n    VLOG(1) << \"Failed to parse array stop index\";\n    builder->SendError(kInvalidIntErr);\n    return;\n  }\n\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpArrTrim(t->GetOpArgs(shard), key, json_path, start_index, stop_index);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdArrPop(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.NextOrDefault();\n  int index = parser.NextOrDefault<int>(-1);\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpArrPop(t->GetOpArgs(shard), key, json_path, index);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdClear(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.NextOrDefault();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpClear(t->GetOpArgs(shard), key, json_path);\n  };\n\n  OpResult<long> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdStrAppend(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view path = ArgS(args, 1);\n  string_view value = ArgS(args, 2);\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  // We try parsing the value into json string object first.\n  optional<TmpJson> parsed_json = JsonFromString(value);\n  if (!parsed_json || !parsed_json->is_string()) {\n    return builder->SendError(\"expected string value\", kSyntaxErrType);\n  };\n\n  string_view json_string = parsed_json->as_string_view();\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpStrAppend(t->GetOpArgs(shard), key, json_path, json_string);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdObjKeys(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.NextOrDefault();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpObjKeys(t->GetOpArgs(shard), key, json_path);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdDel(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.NextOrDefault();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpDel(t->GetOpArgs(shard), key, path, json_path);\n  };\n\n  OpResult<long> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdNumIncrBy(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view path = ArgS(args, 1);\n  string_view num = ArgS(args, 2);\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpDoubleArithmetic(t->GetOpArgs(shard), key, json_path, num, OP_ADD);\n  };\n\n  OpResult<string> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::SendJsonString(result, cmd_cntx);\n}\n\nvoid CmdNumMultBy(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view path = ArgS(args, 1);\n  string_view num = ArgS(args, 2);\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpDoubleArithmetic(t->GetOpArgs(shard), key, json_path, num, OP_MULTIPLY);\n  };\n\n  OpResult<string> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::SendJsonString(result, cmd_cntx);\n}\n\nvoid CmdToggle(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.NextOrDefault();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  if (json_path.IsLegacyModePath()) {\n    ExecuteToggle<bool>(key, json_path, cmd_cntx);\n  } else {\n    ExecuteToggle<long>(key, json_path, cmd_cntx);\n  }\n}\n\nvoid CmdType(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.NextOrDefault();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpType(t->GetOpArgs(shard), key, json_path);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdArrLen(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.NextOrDefault();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpArrLen(t->GetOpArgs(shard), key, json_path);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdObjLen(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.NextOrDefault();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpObjLen(t->GetOpArgs(shard), key, json_path);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdStrLen(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n  string_view path = parser.NextOrDefault();\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  WrappedJsonPath json_path = GET_OR_SEND_UNEXPECTED(ParseJsonPath(path));\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpStrLen(t->GetOpArgs(shard), key, json_path);\n  };\n\n  auto result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  reply_generic::Send(result, cmd_cntx);\n}\n\nvoid CmdGet(CmdArgList args, CommandContext* cmd_cntx) {\n  DCHECK_GE(args.size(), 1U);\n\n  facade::CmdArgParser parser{args};\n  string_view key = parser.Next();\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  auto params = ParseJsonGetParams(&parser, builder);\n  if (!params) {\n    return;  // ParseJsonGetParams should have already sent an error\n  }\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpJsonGet(t->GetOpArgs(shard), key, params.value());\n  };\n\n  OpResult<string> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n\n  if (result == OpStatus::KEY_NOTFOUND) {\n    rb->SendNull();  // Match Redis\n  } else {\n    reply_generic::Send(result, cmd_cntx);\n  }\n}\n\n}  // namespace\n\n#define HFUNC(x) SetHandler(&Cmd##x)\n\n// Redis modules do not have acl categories, therefore they can not be used by default.\n// However, we do not implement those as modules and therefore we can define our own\n// sensible defaults.\n// For now I introduced only the JSON category which will be the default.\n// TODO: Add sensible defaults/categories to json commands\n\nvoid RegisterJsonFamily(CommandRegistry* registry) {\n  constexpr size_t kMsetFlags = CO::JOURNALED | CO::DENYOOM | CO::FAST | CO::NO_AUTOJOURNAL;\n  registry->StartFamily();\n  *registry << CI{\"JSON.GET\", CO::READONLY | CO::FAST, -2, 1, 1, acl::JSON}.HFUNC(Get);\n  *registry << CI{\"JSON.MGET\", CO::READONLY | CO::FAST, -3, 1, -2, acl::JSON}.HFUNC(MGet);\n  *registry << CI{\"JSON.TYPE\", CO::READONLY | CO::FAST, -2, 1, 1, acl::JSON}.HFUNC(Type);\n  *registry << CI{\"JSON.STRLEN\", CO::READONLY | CO::FAST, -2, 1, 1, acl::JSON}.HFUNC(StrLen);\n  *registry << CI{\"JSON.OBJLEN\", CO::READONLY | CO::FAST, -2, 1, 1, acl::JSON}.HFUNC(ObjLen);\n  *registry << CI{\"JSON.ARRLEN\", CO::READONLY | CO::FAST, -2, 1, 1, acl::JSON}.HFUNC(ArrLen);\n  *registry << CI{\"JSON.TOGGLE\", CO::JOURNALED | CO::FAST, 3, 1, 1, acl::JSON}.HFUNC(Toggle);\n  *registry << CI{\"JSON.NUMINCRBY\", CO::JOURNALED | CO::FAST, 4, 1, 1, acl::JSON}.HFUNC(NumIncrBy);\n  *registry << CI{\"JSON.NUMMULTBY\", CO::JOURNALED | CO::FAST, 4, 1, 1, acl::JSON}.HFUNC(NumMultBy);\n  *registry << CI{\"JSON.DEL\", CO::JOURNALED, -2, 1, 1, acl::JSON}.HFUNC(Del);\n  *registry << CI{\"JSON.FORGET\", CO::JOURNALED, -2, 1, 1, acl::JSON}.HFUNC(\n      Del);  // An alias of JSON.DEL.\n  *registry << CI{\"JSON.OBJKEYS\", CO::READONLY | CO::FAST, -2, 1, 1, acl::JSON}.HFUNC(ObjKeys);\n  *registry << CI{\"JSON.STRAPPEND\", CO::JOURNALED | CO::DENYOOM | CO::FAST, 4, 1, 1, acl::JSON}\n                   .HFUNC(StrAppend);\n  *registry << CI{\"JSON.CLEAR\", CO::JOURNALED | CO::FAST, -2, 1, 1, acl::JSON}.HFUNC(Clear);\n  *registry << CI{\"JSON.ARRPOP\", CO::JOURNALED | CO::FAST, -2, 1, 1, acl::JSON}.HFUNC(ArrPop);\n  *registry << CI{\"JSON.ARRTRIM\", CO::JOURNALED | CO::FAST, 5, 1, 1, acl::JSON}.HFUNC(ArrTrim);\n  *registry << CI{\"JSON.ARRINSERT\", CO::JOURNALED | CO::DENYOOM | CO::FAST, -4, 1, 1, acl::JSON}\n                   .HFUNC(ArrInsert);\n  *registry << CI{\"JSON.ARRAPPEND\", CO::JOURNALED | CO::DENYOOM | CO::FAST, -4, 1, 1, acl::JSON}\n                   .HFUNC(ArrAppend);\n  *registry << CI{\"JSON.ARRINDEX\", CO::READONLY | CO::FAST, -4, 1, 1, acl::JSON}.HFUNC(ArrIndex);\n  *registry\n      << CI{\"JSON.DEBUG\", CO::READONLY | CO::FAST, -2, 0, 0, acl::JSON}.HFUNC(Debug)\n      << CI{\"JSON.RESP\", CO::READONLY | CO::FAST, -2, 1, 1, acl::JSON}.HFUNC(Resp)\n      << CI{\"JSON.SET\", CO::JOURNALED | CO::DENYOOM | CO::FAST, -4, 1, 1, acl::JSON}.HFUNC(Set)\n      << CI{\"JSON.MSET\", kMsetFlags, -4, 1, -1, acl::JSON}.HFUNC(MSet)\n      << CI{\"JSON.MERGE\", CO::JOURNALED | CO::DENYOOM | CO::FAST, 4, 1, 1, acl::JSON}.HFUNC(Merge);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/json_family_memory_test.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\n\nABSL_DECLARE_FLAG(bool, jsonpathv2);\n\nnamespace dfly {\n\nclass JsonFamilyMemoryTest : public BaseFamilyTest {\n public:\n  static MiMemoryResource* GetMemoryResource() {\n    thread_local mi_heap_t* heap = mi_heap_new();\n    thread_local MiMemoryResource memory_resource{heap};\n    return &memory_resource;\n  }\n\n protected:\n  void SetUp() override {\n    BaseFamilyTest::SetUp();\n    // Make the core running the thread use the same resource as the rest of the test. Although\n    // BaseFamilyTest initializes the heap on shards serving transactions, the core running the test\n    // needs this initialized explicitly.\n    InitTLStatelessAllocMR(GetMemoryResource());\n    detail::InternedString::ResetPool();\n  }\n\n  auto GetJsonMemoryUsageFromDb(std::string_view key) {\n    return Run({\"MEMORY\", \"USAGE\", key, \"WITHOUTKEY\"});\n  }\n};\n\n// Single-thread fixture so all keys land on the same shard and share the same\n// thread-local InternedBlobPool. Required to reproduce interned string sharing bugs.\nclass JsonFamilyMemoryTestSingleThread : public JsonFamilyMemoryTest {\n public:\n  JsonFamilyMemoryTestSingleThread() {\n    num_threads_ = 1;\n  }\n};\n\nsize_t GetMemoryUsage() {\n  return JsonFamilyMemoryTest::GetMemoryResource()->used();\n}\n\nsize_t GetJsonMemoryUsageFromString(std::string_view json_str, bool include_root = true) {\n  size_t start = GetMemoryUsage();\n  auto json = ParseJsonUsingShardHeap(json_str);\n  if (!json) {\n    return 0;\n  }\n\n  // The same behaviour as in CompactObj\n  void* ptr =\n      JsonFamilyMemoryTest::GetMemoryResource()->allocate(sizeof(JsonType), alignof(JsonType));\n  JsonType* json_on_heap = new (ptr) JsonType(std::move(json).value());\n  DCHECK(json_on_heap);\n\n  size_t result = GetMemoryUsage() - start;\n  if (!include_root)\n    result -= mi_usable_size(ptr);\n\n  // Free the memory\n  json_on_heap->~JsonType();\n  JsonFamilyMemoryTest::GetMemoryResource()->deallocate(json_on_heap, sizeof(JsonType),\n                                                        alignof(JsonType));\n  return result;\n}\n\nTEST_F(JsonFamilyMemoryTest, SimpleSet) {\n  std::string_view big_json = R\"({\"a\":\"some big string asdkasdkasdfkkasjdkfjka\"})\";\n  size_t start_size = GetJsonMemoryUsageFromString(big_json);\n\n  auto resp = Run({\"JSON.SET\", \"j1\", \"$\", big_json});\n  EXPECT_EQ(resp, \"OK\");\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n  EXPECT_THAT(resp, IntArg(start_size));\n\n  std::string_view small_json = R\"({\"a\":\" \"})\";\n  size_t next_size = GetJsonMemoryUsageFromString(small_json);\n\n  resp = Run({\"JSON.SET\", \"j1\", \"$\", small_json});\n  EXPECT_EQ(resp, \"OK\");\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n  EXPECT_THAT(resp, IntArg(next_size));\n\n  // Again set big json\n  resp = Run({\"JSON.SET\", \"j1\", \"$\", big_json});\n  EXPECT_EQ(resp, \"OK\");\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n  EXPECT_THAT(resp, IntArg(start_size));\n}\n\nTEST_F(JsonFamilyMemoryTest, PartialSet) {\n  std::string_view start_json = R\"({\"a\":\"some text\", \"b\":\" \"})\";\n  size_t start_size = GetJsonMemoryUsageFromString(start_json);\n\n  auto resp = Run({\"JSON.SET\", \"j1\", \"$\", start_json});\n  EXPECT_EQ(resp, \"OK\");\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n  EXPECT_THAT(resp, IntArg(start_size));\n\n  std::string_view json_after_set = R\"({\"a\":\"some text\", \"b\":\"some another text\"})\";\n  size_t size_after_set = GetJsonMemoryUsageFromString(json_after_set);\n\n  resp = Run({\"JSON.SET\", \"j1\", \"$.b\", \"\\\"some another text\\\"\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n  EXPECT_THAT(resp, IntArg(size_after_set));\n\n  // Again set start json\n  resp = Run({\"JSON.SET\", \"j1\", \"$.b\", \"\\\" \\\"\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n  EXPECT_THAT(resp, IntArg(start_size));\n}\n\n/* Tests how works memory usage after deleting json object in jsoncons */\nTEST_F(JsonFamilyMemoryTest, JsonConsDelTest) {\n  std::string_view start_json = R\"({\"a\":\"some text\", \"b\":\" \"})\";\n\n  size_t start = GetMemoryUsage();\n\n  auto json = ParseJsonUsingShardHeap(start_json);\n  void* ptr = GetMemoryResource()->allocate(sizeof(JsonType), alignof(JsonType));\n  JsonType* json_on_heap = new (ptr) JsonType(std::move(json).value());\n\n  size_t memory_usage_before_erase = GetMemoryUsage() - start;\n\n  json_on_heap->erase(\"a\");\n  /* To deallocate memory we should use shrink_to_fit */\n  json_on_heap->shrink_to_fit();\n\n  size_t memory_usage_after_erase = GetMemoryUsage() - start;\n\n  EXPECT_GT(memory_usage_before_erase, memory_usage_after_erase);\n  // b is interned, parsing it again will just reuse the same object and not use extra memory. to\n  // force a realistic comparison use a new character.\n  EXPECT_EQ(memory_usage_after_erase, GetJsonMemoryUsageFromString(R\"({\"x\":\" \"})\"));\n}\n\nTEST_F(JsonFamilyMemoryTest, SimpleDel) {\n  std::string_view start_json = R\"({\"a\":\"some text\", \"b\":\" \"})\";\n  size_t start_size = GetJsonMemoryUsageFromString(start_json);\n\n  auto resp = Run({\"JSON.SET\", \"j1\", \"$\", start_json});\n  EXPECT_EQ(resp, \"OK\");\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n  EXPECT_THAT(resp, IntArg(start_size));\n  // Use non-interned key to get accurate usage\n  std::string_view json_after_del = R\"({\"k\":\" \"})\";\n  size_t size_after_del = GetJsonMemoryUsageFromString(json_after_del);\n\n  // Test that raw memory usage is correct\n  resp = Run({\"JSON.SET\", \"j2\", \"$\", json_after_del});\n  EXPECT_EQ(resp, \"OK\");\n  resp = GetJsonMemoryUsageFromDb(\"j2\");\n  EXPECT_THAT(resp, IntArg(size_after_del));\n\n  // Test that after deletion memory usage is correct\n  resp = Run({\"JSON.DEL\", \"j1\", \"$.a\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"JSON.GET\", \"j1\"});\n  EXPECT_EQ(resp, R\"({\"b\":\" \"})\");\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n\n  /* We still expect the initial size here, because after deletion we do not call shrink_to_fit on\n     the JSON object. As a result, the memory will not be deallocated. Check\n     JsonFamilyMemoryTest::JsonConsDelTest for example. */\n  const size_t size_after_delete = [start_size] {\n    const detail::InternedString dropped(\"a\");\n    return start_size - dropped.MemUsed();\n  }();\n  EXPECT_THAT(resp, IntArg(size_after_delete));\n\n  // Again set start json\n  resp = Run({\"JSON.SET\", \"j1\", \"$.a\", \"\\\"some text\\\"\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n  EXPECT_THAT(resp, IntArg(start_size));\n}\n\nTEST_F(JsonFamilyMemoryTest, JsonShrinking) {\n  std::string_view start_json = R\"({\"a\":\"some text\",\"b\":\"some another text\",\"c\":\" \"})\";\n  size_t start_size = GetJsonMemoryUsageFromString(start_json);\n\n  auto resp = Run({\"JSON.SET\", \"j1\", \"$\", start_json});\n  EXPECT_EQ(resp, \"OK\");\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n  EXPECT_THAT(resp, IntArg(start_size));\n\n  // Change key but keep length so that interned key \"c\" does not throw off calculation\n  std::string_view json_after_del = R\"({\"z\":\" \"})\";\n  size_t size_after_del = GetJsonMemoryUsageFromString(json_after_del);\n\n  // Test that raw memory usage is correct\n  resp = Run({\"JSON.SET\", \"j2\", \"$\", json_after_del});\n  EXPECT_EQ(resp, \"OK\");\n  resp = GetJsonMemoryUsageFromDb(\"j2\");\n  EXPECT_THAT(resp, IntArg(size_after_del));\n\n  // Test that after deletion memory usage decreases\n  resp = Run({\"JSON.DEL\", \"j1\", \"$.a\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"JSON.DEL\", \"j1\", \"$.b\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"JSON.GET\", \"j1\"});\n  EXPECT_EQ(resp, R\"({\"c\":\" \"})\");\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n  // Now we expect the size to be smaller, because shrink_to_fit was called\n  EXPECT_THAT(resp, IntArg(size_after_del));\n\n  // Again set start json\n  resp = Run({\"JSON.SET\", \"j1\", \"$.a\", \"\\\"some text\\\"\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.SET\", \"j1\", \"$.b\", \"\\\"some another text\\\"\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.GET\", \"j1\"});\n  EXPECT_EQ(resp, start_json);\n  resp = GetJsonMemoryUsageFromDb(\"j1\");\n\n  // Jsoncons will allocate more memory for the new json that needed.\n  // This is totally fine, because we will not call shrink_to_fit.\n  // Different compilers may allocate different amounts, so check reasonable range\n  auto final_size = get<int64_t>(resp.u);\n  EXPECT_GT(final_size, start_size);      // Should be larger than initial\n  EXPECT_LT(final_size, start_size * 2);  // But not unreasonably large\n}\n\nTEST_F(JsonFamilyMemoryTest, ShortKeyAccounting) {\n  const std::string value(128, 'v');\n  std::string json = \"{\";\n  for (int i = 0; i < 512; ++i) {\n    if (i)\n      json += \",\";\n    json += absl::StrFormat(R\"(\"k%d\":\"%s\")\", i, value);\n  }\n  json += \"}\";\n\n  auto resp = Run({\"JSON.SET\", \"j1\", \"$\", json});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.DEBUG\", \"MEMORY\", \"j1\"});\n\n  const auto actual = get<int64_t>(resp.u);\n  const auto expected = static_cast<int64_t>(GetJsonMemoryUsageFromString(json, false));\n\n  EXPECT_LE(std::llabs(actual - expected), 64);\n}\n\nTEST_F(JsonFamilyMemoryTest, MergeMemoryTrackingCrash) {\n  Run(\"JSON.SET key $ {\\\"x\\\":1}\");\n\n  auto resp = Run(\"JSON.MERGE key $ {\\\"y\\\":2}\");\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run(\"JSON.MERGE key $ null\");\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run(\"JSON.GET key\");\n  ASSERT_THAT(resp, \"null\");\n}\n\nTEST_F(JsonFamilyMemoryTestSingleThread, InternedStringSharedBlobAccounting) {\n  // ref count for x = 1\n  ASSERT_THAT(Run(\"json.set foo $ {\\\"x\\\":3}\"), \"OK\");\n  // ref count for x = 2\n  ASSERT_THAT(Run(\"json.set bar $ {\\\"x\\\":5}\"), \"OK\");\n  Run(\"del foo\");\n  Run(\"json.merge bar $ null\");\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/json_family_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include <absl/flags/flag.h>\n#include <absl/strings/str_replace.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/error.h\"\n#include \"facade/facade_test.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\n\nABSL_DECLARE_FLAG(bool, jsonpathv2);\n\nnamespace dfly {\n\nclass JsonFamilyTest : public BaseFamilyTest {\n protected:\n};\n\nMATCHER_P(ElementsAreArraysMatcher, matchers, \"\") {\n  const auto& vec = arg.GetVec();\n  const size_t expected_size = std::tuple_size<decltype(matchers)>::value;\n\n  if (vec.size() != expected_size) {\n    *result_listener << \"size mismatch: expected \" << expected_size << \" but got \" << vec.size();\n    return false;\n  }\n\n  bool result = true;\n  size_t index = 0;\n\n  auto check_matcher = [&](const auto& matcher) {\n    if (!ExplainMatchResult(matcher, vec[index].GetVec(), result_listener)) {\n      *result_listener << \" at index \" << index;\n      result = false;\n    }\n    index++;\n  };\n\n  std::apply([&check_matcher](const auto&... matchers) { (check_matcher(matchers), ...); },\n             matchers);\n\n  return result;\n}\n\ntemplate <typename... Matchers> auto ElementsAreArrays(Matchers&&... matchers) {\n  return ElementsAreArraysMatcher(std::make_tuple(std::forward<Matchers>(matchers)...));\n}\n\nTEST_F(JsonFamilyTest, SetGetBasic) {\n  string json = R\"(\n    {\n       \"store\": {\n        \"book\": [\n         {\n           \"category\": \"Fantasy\",\n           \"author\": \"J. K. Rowling\",\n           \"title\": \"Harry Potter and the Philosopher's Stone\",\n           \"isbn\": 9780747532743,\n           \"price\": 5.99\n         }\n       ]\n      }\n    }\n)\";\n\n  string xml = R\"(\n    <?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n    <store>\n      <book>\n        <category>Fantasy</category>\n        <author>J. K. Rowling</author>\n        <title>Harry Potter and the Philosopher&#x27;s Stone</title>\n        <isbn>9780747532743</isbn>\n        <price>5.99</price>\n      </book>\n    </store>\n)\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$..*\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n\n  resp = Run({\"JSON.GET\", \"json\", \"$..book[0].price\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::STRING));\n\n  resp = Run({\"JSON.GET\", \"json\", \"//*\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::ERROR));\n\n  resp = Run({\"JSON.GET\", \"json\", \"//book[0]\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::ERROR));\n\n  resp = Run({\"JSON.GET\", \"json\", \"store.book[0].category\"});\n  EXPECT_EQ(resp, \"\\\"Fantasy\\\"\");\n\n  resp = Run({\"JSON.GET\", \"json\", \".store.book[0].category\"});\n  EXPECT_EQ(resp, \"\\\"Fantasy\\\"\");\n\n  resp = Run({\"SET\", \"xml\", xml});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"xml\", \"$..*\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::ERROR));\n}\n\nTEST_F(JsonFamilyTest, GetLegacy) {\n  string json = R\"({\"name\":\"Leonard Cohen\",\"lastSeen\":1478476800,\"loggedOut\": true})\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \"$\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json\"});  // V1 Response\n  ASSERT_THAT(resp, \"{\\\"lastSeen\\\":1478476800,\\\"loggedOut\\\":true,\\\"name\\\":\\\"Leonard Cohen\\\"}\");\n\n  resp = Run({\"JSON.GET\", \"json\", \".\"});  // V1 Response\n  ASSERT_THAT(resp, \"{\\\"lastSeen\\\":1478476800,\\\"loggedOut\\\":true,\\\"name\\\":\\\"Leonard Cohen\\\"}\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$\"});  // V2 Response\n  ASSERT_THAT(resp, \"[{\\\"lastSeen\\\":1478476800,\\\"loggedOut\\\":true,\\\"name\\\":\\\"Leonard Cohen\\\"}]\");\n\n  resp = Run({\"JSON.GET\", \"json\", \".name\"});  // V1 Response\n  ASSERT_THAT(resp, \"\\\"Leonard Cohen\\\"\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.name\"});  // V2 Response\n  ASSERT_THAT(resp, \"[\\\"Leonard Cohen\\\"]\");\n\n  resp = Run({\"JSON.GET\", \"json\", \".name\", \"$.lastSeen\"});  // V2 Response\n  ASSERT_THAT(resp, \"{\\\"$.lastSeen\\\":[1478476800],\\\".name\\\":[\\\"Leonard Cohen\\\"]}\");\n\n  resp = Run({\"JSON.GET\", \"json\", \".name\", \".lastSeen\"});  // V1 Response\n  ASSERT_THAT(resp, \"{\\\".lastSeen\\\":1478476800,\\\".name\\\":\\\"Leonard Cohen\\\"}\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.name\", \"$.lastSeen\"});  // V2 Response\n  ASSERT_THAT(resp, \"{\\\"$.lastSeen\\\":[1478476800],\\\"$.name\\\":[\\\"Leonard Cohen\\\"]}\");\n\n  json = R\"(\n    {\"a\":\"first\",\"b\":{\"field\":\"second\"},\"c\":{\"field\":\"third\"}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \"$\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"bar\"});  // V1 Response\n  ASSERT_THAT(resp, ErrArg(\"ERR invalid JSON path\"));\n\n  resp = Run({\"JSON.GET\", \"json\", \".\", \"bar\"});  // V1 Response\n  ASSERT_THAT(resp, ErrArg(\"ERR invalid JSON path\"));\n\n  resp = Run({\"JSON.GET\", \"json\", \".a\", \"bar\", \"foo\", \"third\", \".\"});  // V1 Response\n  ASSERT_THAT(resp, ErrArg(\"ERR invalid JSON path\"));\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.bar\"});  // V2 Response\n  ASSERT_THAT(resp, \"[]\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"bar\", \"$.a\"});  // V2 Response\n  ASSERT_THAT(resp, R\"({\"$.a\":[\"first\"],\"bar\":[]})\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.bar\"});  // V2 Response\n  ASSERT_THAT(resp, \"[]\");\n}\n\nstatic const string PhonebookJson = R\"(\n    {\n      \"firstName\":\"John\",\n      \"lastName\":\"Smith\",\n      \"age\":27,\n      \"weight\":135.25,\n      \"isAlive\":true,\n      \"address\":{\n          \"street\":\"21 2nd Street\",\n          \"city\":\"New York\",\n          \"state\":\"NY\",\n          \"zipcode\":\"10021-3100\"\n      },\n      \"phoneNumbers\":[\n          {\n            \"type\":\"home\",\n            \"number\":\"212 555-1234\"\n          },\n          {\n            \"type\":\"office\",\n            \"number\":\"646 555-4567\"\n          }\n      ],\n      \"children\":[\n\n      ],\n      \"spouse\":null\n    }\n  )\";\n\nTEST_F(JsonFamilyTest, SetGetFromPhonebook) {\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", PhonebookJson});\n  ASSERT_THAT(resp, \"OK\");\n\n  auto compact_json = jsoncons::json::parse(PhonebookJson).as_string();\n\n  resp = Run({\"JSON.GET\", \"json\", \".\"});\n  EXPECT_EQ(resp, compact_json);\n\n  resp = Run({\"JSON.GET\", \"json\", \"$\"});\n  EXPECT_EQ(resp, \"[\" + compact_json + \"]\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.address.*\"});\n  EXPECT_EQ(resp, R\"([\"New York\",\"NY\",\"21 2nd Street\",\"10021-3100\"])\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.firstName\", \"$.age\", \"$.lastName\"});\n  EXPECT_EQ(resp, R\"({\"$.age\":[27],\"$.firstName\":[\"John\"],\"$.lastName\":[\"Smith\"]})\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.spouse.*\"});\n  EXPECT_EQ(resp, \"[]\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.children.*\"});\n  EXPECT_EQ(resp, \"[]\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$..phoneNumbers[1].*\"});\n  EXPECT_EQ(resp, R\"([\"646 555-4567\",\"office\"])\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.address.*\", \"INDENT\", \"indent\", \"NEWLINE\", \"newline\"});\n  EXPECT_EQ(\n      resp,\n      R\"([newlineindent\"New York\",newlineindent\"NY\",newlineindent\"21 2nd Street\",newlineindent\"10021-3100\"newline])\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.address\", \"SPACE\", \"space\"});\n  EXPECT_EQ(\n      resp,\n      R\"([{\"city\":space\"New York\",\"state\":space\"NY\",\"street\":space\"21 2nd Street\",\"zipcode\":space\"10021-3100\"}])\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.firstName\", \"$.age\", \"$.lastName\", \"INDENT\", \"indent\",\n              \"NEWLINE\", \"newline\", \"SPACE\", \"space\"});\n  EXPECT_EQ(\n      resp,\n      R\"({newlineindent\"$.age\":space[newlineindentindent27newlineindent],newlineindent\"$.firstName\":space[newlineindentindent\"John\"newlineindent],newlineindent\"$.lastName\":space[newlineindentindent\"Smith\"newlineindent]newline})\");\n\n  resp =\n      Run({\"JSON.GET\", \"json\", \"$..phoneNumbers.*\", \"INDENT\", \"t\", \"NEWLINE\", \"s\", \"SPACE\", \"s\"});\n  EXPECT_EQ(\n      resp,\n      R\"([st{stt\"number\":s\"212 555-1234\",stt\"type\":s\"home\"st},st{stt\"number\":s\"646 555-4567\",stt\"type\":s\"office\"st}s])\");\n}\n\nTEST_F(JsonFamilyTest, GetBrackets) {\n  string json = R\"(\n    {\"a\":\"first\", \"b\":{\"a\":\"second\"}}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$[\\\"a\\\"]\"});\n  ASSERT_THAT(resp, \"[\\\"first\\\"]\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$..[\\\"a\\\"]\"});\n  ASSERT_THAT(resp, R\"([\"first\",\"second\"])\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.b[\\\"a\\\"]\"});\n  ASSERT_THAT(resp, \"[\\\"second\\\"]\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"[\\\"a\\\"]\"});\n  ASSERT_THAT(resp, \"\\\"first\\\"\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"..[\\\"a\\\"]\"});\n  ASSERT_THAT(resp, \"\\\"second\\\"\");\n\n  json = R\"(\n    [\"first\", [\"second\"]]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$[0]\"});\n  ASSERT_THAT(resp, \"[\\\"first\\\"]\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$..[0]\"});\n  ASSERT_THAT(resp, R\"([\"first\",\"second\"])\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"[0]\"});\n  ASSERT_THAT(resp, \"\\\"first\\\"\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"..[0]\"});\n  ASSERT_THAT(resp, \"\\\"second\\\"\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$[\\\"first\\\"]\"});\n  ASSERT_THAT(resp, \"[]\");\n\n  json = R\"(\n    {\"a\":{\"b\":{\"c\":\"first\"}}, \"b\":{\"b\":{\"c\":\"second\"}}, \"c\":{\"b\":{\"c\":\"third\"}}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json\", R\"($[\"a\"]['b'][\"c\"])\"});\n  ASSERT_THAT(resp, \"[\\\"first\\\"]\");\n\n  resp = Run({\"JSON.GET\", \"json\", R\"($[\"a\"].b['c'])\"});\n  ASSERT_THAT(resp, \"[\\\"first\\\"]\");\n\n  resp = Run({\"JSON.GET\", \"json\", R\"($..['b'][\"c\"])\"});\n  ASSERT_THAT(resp, R\"([\"first\",\"second\",\"third\"])\");\n\n  resp = Run({\"JSON.GET\", \"json\", R\"($.c['b'][\"c\"])\"});\n  ASSERT_THAT(resp, \"[\\\"third\\\"]\");\n}\n\nTEST_F(JsonFamilyTest, GetWithNoEscape) {\n  string json = R\"({\"key\": \"value with special characters: \\n \\t \\\" \\\"\"})\";\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  // Test without NOESCAPE option\n  resp = Run({\"JSON.GET\", \"json\", \".\"});\n  EXPECT_EQ(resp, \"{\\\"key\\\":\\\"value with special characters: \\\\n \\\\t \\\\\\\" \\\\\\\"\\\"}\");\n\n  // Test with NOESCAPE option\n  resp = Run({\"JSON.GET\", \"json\", \".\", \"NOESCAPE\"});\n  EXPECT_EQ(resp, \"{\\\"key\\\":\\\"value with special characters: \\\\n \\\\t \\\\\\\" \\\\\\\"\\\"}\");  // No changes\n}\n\nTEST_F(JsonFamilyTest, Type) {\n  string json = R\"(\n    [1, 2.3, \"foo\", true, null, {}, []]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.TYPE\", \"json\", \"$[*]\"});\n  ASSERT_THAT(resp, RespArray(ElementsAre(\"integer\", \"number\", \"string\", \"boolean\", \"null\",\n                                          \"object\", \"array\")));\n\n  resp = Run({\"JSON.TYPE\", \"json\", \"$[10]\"});\n  EXPECT_THAT(resp, ArrLen(0));\n\n  resp = Run({\"JSON.TYPE\", \"not_exist_key\", \"$[10]\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(JsonFamilyTest, TypeLegacy) {\n  string json = R\"(\n    {\n      \"firstName\":\"John\",\n      \"lastName\":\"Smith\",\n      \"age\":27,\n      \"weight\":135.25,\n      \"isAlive\":true,\n      \"address\":{\"street\":\"21 2nd Street\",\"city\":\"New York\",\"state\":\"NY\",\"zipcode\":\"10021-3100\"},\n      \"phoneNumbers\":[{\"type\":\"home\",\"number\":\"212 555-1234\"},{\"type\":\"office\",\"number\":\"646 555-4567\"}],\n      \"children\":[],\n      \"spouse\":null\n    }\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.TYPE\", \"json\"});\n  EXPECT_EQ(resp, \"object\");\n\n  resp = Run({\"JSON.TYPE\", \"json\", \".children\"});\n  EXPECT_EQ(resp, \"array\");\n\n  resp = Run({\"JSON.TYPE\", \"json\", \".firstName\"});\n  EXPECT_EQ(resp, \"string\");\n\n  resp = Run({\"JSON.TYPE\", \"json\", \".age\"});\n  EXPECT_EQ(resp, \"integer\");\n\n  resp = Run({\"JSON.TYPE\", \"json\", \".weight\"});\n  EXPECT_EQ(resp, \"number\");\n\n  resp = Run({\"JSON.TYPE\", \"json\", \".isAlive\"});\n  EXPECT_EQ(resp, \"boolean\");\n\n  resp = Run({\"JSON.TYPE\", \"json\", \".spouse\"});\n  EXPECT_EQ(resp, \"null\");\n\n  resp = Run({\"JSON.TYPE\", \"not_exist_key\", \".some_field\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(JsonFamilyTest, StrLen) {\n  string json = R\"(\n    {\"a\":{\"a\":\"a\"}, \"b\":{\"a\":\"a\", \"b\":1}, \"c\":{\"a\":\"a\", \"b\":\"bb\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":3}}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  /* Test simple response from only one value */\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \"$.a.a\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \"$.a\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \"$.a.*\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \"$.c.b\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.STRLEN\", \"non_existent_key\", \"$.c.b\"});\n  EXPECT_THAT(resp, ErrArg(\"no such key\"));\n\n  resp = Run({\"JSON.STRLEN\", \"non_existent_key\", \"$\"});\n  EXPECT_THAT(resp, ErrArg(\"no such key\"));\n\n  /*\n  Test response from several possible values\n  In JSON V2, the response is an array of all possible values\n  */\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \"$.c.*\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(1), IntArg(2)));\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \"$.d.*\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(ArgType(RespExpr::NIL), IntArg(1), ArgType(RespExpr::NIL)));\n}\n\nTEST_F(JsonFamilyTest, StrLenLegacy) {\n  string json = R\"(\n    {\"a\":{\"a\":\"a\"}, \"b\":{\"a\":\"a\", \"b\":1}, \"c\":{\"a\":\"a\", \"b\":\"bb\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":3}}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  /* Test simple response from only one value */\n\n  resp = Run({\"JSON.STRLEN\", \"json\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \".a.a\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \".a\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \".a.*\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \".c.b\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.STRLEN\", \"non_existent_key\", \".c.b\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  /*\n  Test response from several possible values\n  In JSON legacy mode, the response contains only one value - the first string's length.\n  */\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \".c.*\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.STRLEN\", \"json\", \".d.*\"});\n  EXPECT_THAT(resp, IntArg(1));\n}\n\nTEST_F(JsonFamilyTest, ObjLen) {\n  string json = R\"(\n    {\"a\":{}, \"b\":{\"a\":\"a\"}, \"c\":{\"a\":\"a\", \"b\":\"bb\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":{\"a\":3,\"b\":4}}, \"e\":1}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  /* Test simple response from only one value */\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \"$.a\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \"$.a.*\"});\n  EXPECT_THAT(resp.GetVec(), IsEmpty());\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \"$.b\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \"$.b.*\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \"$.c\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \"$.d\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.OBJLEN\", \"non_existent_key\", \"$.a\"});\n  EXPECT_THAT(resp, ErrArg(\"no such key\"));\n\n  /*\n  Test response from several possible values\n  In JSON V2, the response is an array of all possible values\n  */\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \"$.c.*\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(ArgType(RespExpr::NIL), ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \"$.d.*\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(ArgType(RespExpr::NIL), ArgType(RespExpr::NIL), IntArg(2)));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \"$.*\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(1), IntArg(2), IntArg(3), ArgType(RespExpr::NIL)));\n}\n\nTEST_F(JsonFamilyTest, ObjLenLegacy) {\n  string json = R\"(\n    {\"a\":{}, \"b\":{\"a\":\"a\"}, \"c\":{\"a\":\"a\", \"b\":\"bb\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":{\"a\":3,\"b\":4}}, \"e\":1}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  /* Test simple response from only one value */\n  resp = Run({\"JSON.STRLEN\", \"json\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \".a\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \".a.*\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \".b\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \".b.*\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \".c\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \".d\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.OBJLEN\", \"non_existent_key\", \".a\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \".none\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  /*\n  Test response from several possible values\n  In JSON legacy mode, the response contains only one value - the first object's length.\n  */\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \".c.*\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \".d.*\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.OBJLEN\", \"json\", \".*\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(JsonFamilyTest, ArrLen) {\n  string json = R\"(\n    [[], [\"a\"], [\"a\", \"b\"], [\"a\", \"b\", \"c\"]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRLEN\", \"json\", \"$[*]\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(0), IntArg(1), IntArg(2), IntArg(3)));\n\n  json = R\"(\n    [[], \"a\", [\"a\", \"b\"], [\"a\", \"b\", \"c\"], 4]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRLEN\", \"json\", \"$[*]\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(0), ArgType(RespExpr::NIL), IntArg(2), IntArg(3),\n                                         ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.OBJLEN\", \"non_existent_key\", \"$[*]\"});\n  EXPECT_THAT(resp, ErrArg(\"no such key\"));\n}\n\nTEST_F(JsonFamilyTest, ArrLenLegacy) {\n  string json = R\"(\n    [[], [\"a\"], [\"a\", \"b\"], [\"a\", \"b\", \"c\"]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRLEN\", \"json\"});\n  EXPECT_THAT(resp, IntArg(4));\n\n  resp = Run({\"JSON.ARRLEN\", \"json\", \"[*]\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"JSON.ARRLEN\", \"json\", \"[3]\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  json = R\"(\n    [[], \"a\", [\"a\", \"b\"], [\"a\", \"b\", \"c\"], 4]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRLEN\", \"json\", \"[*]\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"JSON.ARRLEN\", \"json\", \"[1]\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.ARRLEN\", \"json\", \"[2]\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.OBJLEN\", \"non_existent_key\", \"[*]\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(JsonFamilyTest, Toggle) {\n  string json = R\"(\n    {\"a\":true, \"b\":false, \"c\":1, \"d\":null, \"e\":\"foo\", \"f\":[], \"g\":{}}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.TOGGLE\", \"json\", \"$.*\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(1), ArgType(RespExpr::NIL), ArgType(RespExpr::NIL),\n                          ArgType(RespExpr::NIL), ArgType(RespExpr::NIL), ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.*\"});\n  EXPECT_EQ(resp, R\"([false,true,1,null,\"foo\",[],{}])\");\n\n  resp = Run({\"JSON.TOGGLE\", \"json\", \"$.*\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(1), IntArg(0), ArgType(RespExpr::NIL), ArgType(RespExpr::NIL),\n                          ArgType(RespExpr::NIL), ArgType(RespExpr::NIL), ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.*\"});\n  EXPECT_EQ(resp, R\"([true,false,1,null,\"foo\",[],{}])\");\n}\n\nTEST_F(JsonFamilyTest, ToggleLegacy) {\n  string json = R\"(\n    {\"a\":true, \"b\":false, \"c\":1, \"d\":null, \"e\":\"foo\", \"f\":[], \"g\":{}}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.TOGGLE\", \"json\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"JSON.TOGGLE\", \"json\", \".*\"});\n  EXPECT_EQ(resp, \"true\");\n\n  resp = Run({\"JSON.TOGGLE\", \"json\", \".*\"});\n  EXPECT_EQ(resp, \"false\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.*\"});\n  EXPECT_EQ(R\"([true,false,1,null,\"foo\",[],{}])\", resp);\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", \"true\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.TOGGLE\", \"json\", \".\"});\n  EXPECT_EQ(resp, \"false\");\n\n  resp = Run({\"JSON.TOGGLE\", \"json\", \".\"});\n  EXPECT_EQ(resp, \"true\");\n\n  json = R\"(\n    {\"isAvailable\": false}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.TOGGLE\", \"json\", \".isAvailable\"});\n  EXPECT_EQ(resp, \"true\");\n\n  resp = Run({\"JSON.TOGGLE\", \"json\", \".isAvailable\"});\n  EXPECT_EQ(resp, \"false\");\n}\n\nTEST_F(JsonFamilyTest, NumIncrBy) {\n  string json = R\"(\n    {\"e\":1.5,\"a\":1}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.a\", \"1.1\"});\n  EXPECT_EQ(resp, \"[2.1]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.e\", \"1\"});\n  EXPECT_EQ(resp, \"[2.5]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.e\", \"inf\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR result is not a number\"));\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.e\", \"1.7e308\"});\n  EXPECT_EQ(resp, \"[1.7e+308]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.e\", \"1.7e308\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR result is not a number\"));\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.*\"});\n  EXPECT_EQ(resp, R\"([1,1.7e+308])\");\n\n  json = R\"(\n    {\"a\":[], \"b\":[1], \"c\":[1,2], \"d\":[1,2,3]}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.d[*]\", \"10\"});\n  EXPECT_EQ(resp, \"[11,12,13]\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.d[*]\"});\n  EXPECT_EQ(resp, \"[11,12,13]\");\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.a[*]\", \"1\"});\n  EXPECT_EQ(resp, \"[]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.b[*]\", \"1\"});\n  EXPECT_EQ(resp, \"[2]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.c[*]\", \"1\"});\n  EXPECT_EQ(resp, \"[2,3]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.d[*]\", \"1\"});\n  EXPECT_EQ(resp, \"[2,3,4]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.d[2]\", \"1\"});\n  EXPECT_EQ(resp, \"[5]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[],\"b\":[2],\"c\":[2,3],\"d\":[2,3,5]})\");\n\n  json = R\"(\n    {\"a\":{}, \"b\":{\"a\":1}, \"c\":{\"a\":1, \"b\":2}, \"d\":{\"a\":1, \"b\":2, \"c\":3}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.a.*\", \"1\"});\n  EXPECT_EQ(resp, \"[]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.b.*\", \"1\"});\n  EXPECT_EQ(resp, \"[2]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.c.*\", \"1\"});\n  EXPECT_EQ(resp, \"[2,3]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.d.*\", \"1\"});\n  EXPECT_EQ(resp, \"[2,3,4]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{},\"b\":{\"a\":2},\"c\":{\"a\":2,\"b\":3},\"d\":{\"a\":2,\"b\":3,\"c\":4}})\");\n\n  json = R\"(\n    {\"a\":{\"a\":\"a\"}, \"b\":{\"a\":\"a\", \"b\":1}, \"c\":{\"a\":\"a\", \"b\":\"b\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":3}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.a.*\", \"1\"});\n  EXPECT_EQ(resp, \"[null]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.b.*\", \"1\"});\n  EXPECT_EQ(resp, \"[null,2]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.c.*\", \"1\"});\n  EXPECT_EQ(resp, \"[null,null]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.d.*\", \"1\"});\n  EXPECT_EQ(resp, \"[2,null,4]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"a\"},\"b\":{\"a\":\"a\",\"b\":2},\"c\":{\"a\":\"a\",\"b\":\"b\"},\"d\":{\"a\":2,\"b\":\"b\",\"c\":4}})\");\n}\n\nTEST_F(JsonFamilyTest, NumIncrByLegacy) {\n  string json = R\"(\n    {\"e\":1.5,\"a\":1}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".a\", \"1.1\"});\n  EXPECT_EQ(resp, \"2.1\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".e\", \"1\"});\n  EXPECT_EQ(resp, \"2.5\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".e\", \"inf\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR result is not a number\"));\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".e\", \"1.7e308\"});\n  EXPECT_EQ(resp, \"1.7e+308\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".e\", \"1.7e308\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR result is not a number\"));\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.*\"});\n  EXPECT_EQ(resp, R\"([1,1.7e+308])\");\n\n  json = R\"(\n    {\"a\":[], \"b\":[1], \"c\":[1,2], \"d\":[1,2,3]}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".d[*]\", \"10\"});\n  EXPECT_EQ(resp, \"13\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.d[*]\"});\n  EXPECT_EQ(resp, \"[11,12,13]\");\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".a[*]\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".b[*]\", \"1\"});\n  EXPECT_EQ(resp, \"2\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".c[*]\", \"1\"});\n  EXPECT_EQ(resp, \"3\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".d[*]\", \"1\"});\n  EXPECT_EQ(resp, \"4\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".d[2]\", \"1\"});\n  EXPECT_EQ(resp, \"5\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[],\"b\":[2],\"c\":[2,3],\"d\":[2,3,5]})\");\n\n  json = R\"(\n    {\"a\":{}, \"b\":{\"a\":1}, \"c\":{\"a\":1, \"b\":2}, \"d\":{\"a\":1, \"b\":2, \"c\":3}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".a.*\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".b.*\", \"1\"});\n  EXPECT_EQ(resp, \"2\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".c.*\", \"1\"});\n  EXPECT_EQ(resp, \"3\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".d.*\", \"1\"});\n  EXPECT_EQ(resp, \"4\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{},\"b\":{\"a\":2},\"c\":{\"a\":2,\"b\":3},\"d\":{\"a\":2,\"b\":3,\"c\":4}})\");\n\n  json = R\"(\n    {\"a\":{\"a\":\"a\"}, \"b\":{\"a\":\"a\", \"b\":1}, \"c\":{\"a\":\"a\", \"b\":\"b\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":3}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".a.*\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".b.*\", \"1\"});\n  EXPECT_EQ(resp, \"2\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".c.*\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".d.*\", \"1\"});\n  EXPECT_EQ(resp, \"4\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"a\"},\"b\":{\"a\":\"a\",\"b\":2},\"c\":{\"a\":\"a\",\"b\":\"b\"},\"d\":{\"a\":2,\"b\":\"b\",\"c\":4}})\");\n}\n\nTEST_F(JsonFamilyTest, NumMultBy) {\n  string json = R\"(\n    {\"a\":[], \"b\":[1], \"c\":[1,2], \"d\":[1,2,3]}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.d[*]\", \"2\"});\n  EXPECT_EQ(resp, \"[2,4,6]\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.d[*]\"});\n  EXPECT_EQ(resp, R\"([2,4,6])\");\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.a[*]\", \"2\"});\n  EXPECT_EQ(resp, \"[]\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.b[*]\", \"2\"});\n  EXPECT_EQ(resp, \"[2]\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.c[*]\", \"2\"});\n  EXPECT_EQ(resp, \"[2,4]\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.d[*]\", \"2\"});\n  EXPECT_EQ(resp, \"[2,4,6]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[],\"b\":[2],\"c\":[2,4],\"d\":[2,4,6]})\");\n\n  json = R\"(\n    {\"a\":{}, \"b\":{\"a\":1}, \"c\":{\"a\":1, \"b\":2}, \"d\":{\"a\":1, \"b\":2, \"c\":3}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.a.*\", \"2\"});\n  EXPECT_EQ(resp, \"[]\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.b.*\", \"2\"});\n  EXPECT_EQ(resp, \"[2]\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.c.*\", \"2\"});\n  EXPECT_EQ(resp, \"[2,4]\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.d.*\", \"2\"});\n  EXPECT_EQ(resp, \"[2,4,6]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{},\"b\":{\"a\":2},\"c\":{\"a\":2,\"b\":4},\"d\":{\"a\":2,\"b\":4,\"c\":6}})\");\n\n  json = R\"(\n    {\"a\":{\"a\":\"a\"}, \"b\":{\"a\":\"a\", \"b\":1}, \"c\":{\"a\":\"a\", \"b\":\"b\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":3}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.a.*\", \"2\"});\n  EXPECT_EQ(resp, \"[null]\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.b.*\", \"2\"});\n  EXPECT_EQ(resp, \"[null,2]\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.c.*\", \"2\"});\n  EXPECT_EQ(resp, \"[null,null]\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.d.*\", \"2\"});\n  EXPECT_EQ(resp, \"[2,null,6]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"a\"},\"b\":{\"a\":\"a\",\"b\":2},\"c\":{\"a\":\"a\",\"b\":\"b\"},\"d\":{\"a\":2,\"b\":\"b\",\"c\":6}})\");\n}\n\nTEST_F(JsonFamilyTest, NumMultByLegacy) {\n  string json = R\"(\n    {\"a\":[], \"b\":[1], \"c\":[1,2], \"d\":[1,2,3]}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".d[*]\", \"2\"});\n  EXPECT_EQ(resp, \"6\");\n\n  resp = Run({\"JSON.GET\", \"json\", \"$.d[*]\"});\n  EXPECT_EQ(resp, R\"([2,4,6])\");\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".a[*]\", \"2\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".b[*]\", \"2\"});\n  EXPECT_EQ(resp, \"2\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".c[*]\", \"2\"});\n  EXPECT_EQ(resp, \"4\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".d[*]\", \"2\"});\n  EXPECT_EQ(resp, \"6\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[],\"b\":[2],\"c\":[2,4],\"d\":[2,4,6]})\");\n\n  json = R\"(\n    {\"a\":{}, \"b\":{\"a\":1}, \"c\":{\"a\":1, \"b\":2}, \"d\":{\"a\":1, \"b\":2, \"c\":3}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".a.*\", \"2\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".b.*\", \"2\"});\n  EXPECT_EQ(resp, \"2\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".c.*\", \"2\"});\n  EXPECT_EQ(resp, \"4\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".d.*\", \"2\"});\n  EXPECT_EQ(resp, \"6\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{},\"b\":{\"a\":2},\"c\":{\"a\":2,\"b\":4},\"d\":{\"a\":2,\"b\":4,\"c\":6}})\");\n\n  json = R\"(\n    {\"a\":{\"a\":\"a\"}, \"b\":{\"a\":\"a\", \"b\":1}, \"c\":{\"a\":\"a\", \"b\":\"b\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":3}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".a.*\", \"2\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".b.*\", \"2\"});\n  EXPECT_EQ(resp, \"2\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".c.*\", \"2\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".d.*\", \"2\"});\n  EXPECT_EQ(resp, \"6\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"a\"},\"b\":{\"a\":\"a\",\"b\":2},\"c\":{\"a\":\"a\",\"b\":\"b\"},\"d\":{\"a\":2,\"b\":\"b\",\"c\":6}})\");\n}\n\nTEST_F(JsonFamilyTest, NumericOperationsWithConversions) {\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", R\"({\"a\":2.0})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.a\", \"1\"});\n  EXPECT_EQ(resp, \"[3.0]\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.a\", \"1.0\"});\n  EXPECT_EQ(resp, \"[4.0]\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.a\", \"2\"});\n  EXPECT_EQ(resp, \"[8.0]\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.a\", \"2.0\"});\n  EXPECT_EQ(resp, \"[16.0]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":16.0})\");\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", R\"({\"a\":2})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.a\", \"1\"});\n  EXPECT_EQ(resp, \"[3]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":3})\");  // Is still integer\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \"$.a\", \"1.0\"});\n  EXPECT_EQ(resp, \"[4.0]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":4.0})\");  // Is converted to double\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", R\"({\"a\":2})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.a\", \"2\"});\n  EXPECT_EQ(resp, \"[4]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":4})\");  // Is still integer\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \"$.a\", \"2.0\"});\n  EXPECT_EQ(resp, \"[8.0]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":8.0})\");  // Is converted to double\n}\n\nTEST_F(JsonFamilyTest, NumericOperationsWithConversionsLegacy) {\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", R\"({\"a\":2.0})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".a\", \"1\"});\n  EXPECT_EQ(resp, \"3.0\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".a\", \"1.0\"});\n  EXPECT_EQ(resp, \"4.0\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".a\", \"2\"});\n  EXPECT_EQ(resp, \"8.0\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".a\", \"2.0\"});\n  EXPECT_EQ(resp, \"16.0\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":16.0})\");\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", R\"({\"a\":2})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".a\", \"1\"});\n  EXPECT_EQ(resp, \"3\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":3})\");  // Is still integer\n\n  resp = Run({\"JSON.NUMINCRBY\", \"json\", \".a\", \"1.0\"});\n  EXPECT_EQ(resp, \"4.0\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":4.0})\");  // Is converted to double\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", R\"({\"a\":2})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".a\", \"2\"});\n  EXPECT_EQ(resp, \"4\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":4})\");  // Is still integer\n\n  resp = Run({\"JSON.NUMMULTBY\", \"json\", \".a\", \"2.0\"});\n  EXPECT_EQ(resp, \"8.0\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":8.0})\");  // Is converted to double\n}\n\nTEST_F(JsonFamilyTest, NumericOperationsResp2Resp3) {\n  // Test RESP2 behavior\n  Run({\"HELLO\", \"2\"});\n\n  auto resp = Run({\"JSON.SET\", \"a\", \"$\", \"1\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"a\", \"$\", \"1\"});\n  EXPECT_EQ(resp, \"[2]\");  // Currently returns string \"[2]\"\n\n  resp = Run({\"JSON.TYPE\", \"a\", \"$\"});\n  EXPECT_EQ(resp, \"integer\");\n\n  resp = Run({\"JSON.TYPE\", \"a\", \".\"});\n  EXPECT_EQ(resp, \"integer\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"a\", \"$\", \"2\"});\n  EXPECT_EQ(resp, \"[4]\");  // Currently returns string \"[4]\"\n\n  // Test RESP3 behavior\n  Run({\"HELLO\", \"3\"});\n  Run({\"FLUSHALL\"});\n\n  resp = Run({\"JSON.SET\", \"a\", \"$\", \"1\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.NUMINCRBY\", \"a\", \"$\", \"1\"});\n  // In RESP3, this should return a proper array with integer: 1) (integer) 2\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.TYPE\", \"a\", \"$\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"integer\")));\n\n  resp = Run({\"JSON.TYPE\", \"a\", \".\"});\n  EXPECT_EQ(resp, \"integer\");\n\n  resp = Run({\"JSON.NUMMULTBY\", \"a\", \"$\", \"2\"});\n  // In RESP3, this should return a proper array with integer: 1) (integer) 4\n  EXPECT_THAT(resp, IntArg(4));\n}\n\nTEST_F(JsonFamilyTest, Del) {\n  string json = R\"(\n    {\"a\":{}, \"b\":{\"a\":1}, \"c\":{\"a\":1, \"b\":2}, \"d\":{\"a\":1, \"b\":2, \"c\":3}, \"e\": [1,2,3,4,5]}}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \"$.d.*\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{},\"b\":{\"a\":1},\"c\":{\"a\":1,\"b\":2},\"d\":{},\"e\":[1,2,3,4,5]})\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \"$.e[*]\"});\n  EXPECT_THAT(resp, IntArg(5));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{},\"b\":{\"a\":1},\"c\":{\"a\":1,\"b\":2},\"d\":{},\"e\":[]})\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \"$..*\"});\n\n  // TODO: legacy jsoncons implementation returns, 8 but in practive it should return 5.\n  // redis-stack returns 5 as well.\n  // Once we drop jsoncons path, we can enforce here equality.\n  EXPECT_GE(resp.GetInt(), 5);\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({})\");\n\n  resp = Run({\"JSON.DEL\", \"json\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"GET\", \"json\"});  // This is legal since the key was removed\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  json = R\"(\n    {\"a\":[{\"b\": [1,2,3]}], \"b\": [{\"c\": 2}], \"c']\":[1,2,3]}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \"$.a[0].b[0]\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"GET\", \"json\"});  // not a legal type\n  EXPECT_THAT(resp, ErrArg(\"Operation against a key holding the wrong kind of value\"));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[{\"b\":[2,3]}],\"b\":[{\"c\":2}],\"c']\":[1,2,3]})\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \"$.b[0].c\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[{\"b\":[2,3]}],\"b\":[{}],\"c']\":[1,2,3]})\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \"$.*\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({})\");\n\n  resp = Run({\"JSON.SET\", \"json\", \"$\", R\"({\"a\": 1})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \"$\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  if (absl::GetFlag(FLAGS_jsonpathv2)) {\n    // Test recursive delete with $..a path\n    resp = Run({\"JSON.SET\", \"doc2\", \"$\",\n                R\"({\"a\": {\"a\": 2, \"b\": 3}, \"b\": [\"a\", \"b\"], \"nested\": {\"b\": [true, \"a\", \"b\"]}})\"});\n    ASSERT_THAT(resp, \"OK\");\n\n    resp = Run({\"JSON.GET\", \"doc2\"});\n    EXPECT_EQ(resp, R\"({\"a\":{\"a\":2,\"b\":3},\"b\":[\"a\",\"b\"],\"nested\":{\"b\":[true,\"a\",\"b\"]}})\");\n\n    // JSON.DEL with $..a should find and delete the key \"a\" at root level\n    // but not string values \"a\" inside arrays\n    resp = Run({\"JSON.DEL\", \"doc2\", \"$..a\"});\n    EXPECT_THAT(resp, IntArg(1));\n\n    resp = Run({\"JSON.GET\", \"doc2\"});\n    EXPECT_EQ(resp, R\"({\"b\":[\"a\",\"b\"],\"nested\":{\"b\":[true,\"a\",\"b\"]}})\");\n  }\n}\n\nTEST_F(JsonFamilyTest, DelLegacy) {\n  string json = R\"(\n    {\"a\":{}, \"b\":{\"a\":1}, \"c\":{\"a\":1, \"b\":2}, \"d\":{\"a\":1, \"b\":2, \"c\":3}, \"e\": [1,2,3,4,5]}}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \".d.*\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{},\"b\":{\"a\":1},\"c\":{\"a\":1,\"b\":2},\"d\":{},\"e\":[1,2,3,4,5]})\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \".e[*]\"});\n  EXPECT_THAT(resp, IntArg(5));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{},\"b\":{\"a\":1},\"c\":{\"a\":1,\"b\":2},\"d\":{},\"e\":[]})\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \"..*\"});\n  EXPECT_GE(resp.GetInt(), 5);\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({})\");\n\n  resp = Run({\"JSON.DEL\", \"json\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"GET\", \"json\"});  // This is legal since the key was removed\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  json = R\"(\n    {\"a\":[{\"b\": [1,2,3]}], \"b\": [{\"c\": 2}], \"c']\":[1,2,3]}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \".a[0].b[0]\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"GET\", \"json\"});  // not a legal type\n  EXPECT_THAT(resp, ErrArg(\"Operation against a key holding the wrong kind of value\"));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[{\"b\":[2,3]}],\"b\":[{\"c\":2}],\"c']\":[1,2,3]})\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \".b[0].c\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[{\"b\":[2,3]}],\"b\":[{}],\"c']\":[1,2,3]})\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \".*\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({})\");\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", R\"({\"a\": 1})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEL\", \"json\", \".\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEL\", \"json\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(JsonFamilyTest, ObjKeys) {\n  string json = R\"(\n    {\"a\":{}, \"b\":{\"a\":\"a\"}, \"c\":{\"a\":\"a\", \"b\":\"bb\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":{\"a\":3,\"b\":4}}, \"e\":1}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \"$\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"b\", \"c\", \"d\", \"e\"));\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \"$.a\"});\n  EXPECT_THAT(resp.GetVec(), IsEmpty());\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \"$.b\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\"));\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \"$.*\"});\n  EXPECT_THAT(resp, ElementsAreArrays(IsEmpty(), ElementsAre(\"a\"), ElementsAre(\"a\", \"b\"),\n                                      ElementsAre(\"a\", \"b\", \"c\"), IsEmpty()));\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \"$.notfound\"});\n  EXPECT_THAT(resp.GetVec(), IsEmpty());\n\n  json = R\"(\n     {\"a\":[7], \"inner\": {\"a\": {\"b\": 2, \"c\": 1337}}}\n   )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \"$..a\"});\n  EXPECT_THAT(resp, ElementsAreArrays(IsEmpty(), ElementsAre(\"b\", \"c\")));\n\n  json = R\"(\n     {\"a\":{}, \"b\":{\"c\":{\"d\": {\"e\": 1337}}}}\n   )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \"$..*\"});\n  EXPECT_THAT(resp, ElementsAreArrays(IsEmpty(), ElementsAre(\"c\"), ElementsAre(\"d\"),\n                                      ElementsAre(\"e\"), IsEmpty()));\n}\n\nTEST_F(JsonFamilyTest, ObjKeysLegacy) {\n  string json = R\"(\n    {\"a\":{}, \"b\":{\"a\":\"a\"}, \"c\":{\"a\":\"a\", \"b\":\"bb\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":{\"a\":3,\"b\":4}}, \"e\":1}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"b\", \"c\", \"d\", \"e\"));\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \".\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"b\", \"c\", \"d\", \"e\"));\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \".a\"});\n  EXPECT_THAT(resp.GetVec(), IsEmpty());\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \".b\"});\n  EXPECT_THAT(resp, \"a\");\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \".*\"});\n  EXPECT_THAT(resp.GetVec(), IsEmpty());\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \".notfound\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  json = R\"(\n     {\"a\":[7], \"inner\": {\"a\": {\"b\": 2, \"c\": 1337}}}\n   )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \"..a\"});\n  EXPECT_THAT(resp.GetVec(), IsEmpty());\n\n  json = R\"(\n     {\"a\":{}, \"b\":{\"c\":{\"d\": {\"e\": 1337}}}}\n   )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.OBJKEYS\", \"json\", \"..*\"});\n  EXPECT_THAT(resp.GetVec(), IsEmpty());\n}\n\nTEST_F(JsonFamilyTest, StrAppend) {\n  string json = R\"(\n    {\"a\":{\"a\":\"a\"}, \"b\":{\"a\":\"a\", \"b\":1}, \"c\":{\"a\":\"a\", \"b\":\"bb\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":3}}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  /* Test simple response from only one value */\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.a.a\", \"\\\"ab\\\"\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aab\"},\"b\":{\"a\":\"a\",\"b\":1},\"c\":{\"a\":\"a\",\"b\":\"bb\"},\"d\":{\"a\":1,\"b\":\"b\",\"c\":3}})\");\n\n  const char kVal[] = \"\\\"a\\\"\";\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.a.*\", kVal});\n  EXPECT_THAT(resp, IntArg(4));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aaba\"},\"b\":{\"a\":\"a\",\"b\":1},\"c\":{\"a\":\"a\",\"b\":\"bb\"},\"d\":{\"a\":1,\"b\":\"b\",\"c\":3}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.c.b\", kVal});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aaba\"},\"b\":{\"a\":\"a\",\"b\":1},\"c\":{\"a\":\"a\",\"b\":\"bba\"},\"d\":{\"a\":1,\"b\":\"b\",\"c\":3}})\");\n\n  /*\n  Test response from several possible values\n  In JSON V2, the response is an array of all possible values\n  */\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.b.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(2), ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aaba\"},\"b\":{\"a\":\"aa\",\"b\":1},\"c\":{\"a\":\"a\",\"b\":\"bba\"},\"d\":{\"a\":1,\"b\":\"b\",\"c\":3}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.c.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(2), IntArg(4)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aaba\"},\"b\":{\"a\":\"aa\",\"b\":1},\"c\":{\"a\":\"aa\",\"b\":\"bbaa\"},\"d\":{\"a\":1,\"b\":\"b\",\"c\":3}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.d.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(ArgType(RespExpr::NIL), IntArg(2), ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aaba\"},\"b\":{\"a\":\"aa\",\"b\":1},\"c\":{\"a\":\"aa\",\"b\":\"bbaa\"},\"d\":{\"a\":1,\"b\":\"ba\",\"c\":3}})\");\n\n  json = R\"(\n    {\"a\":{\"a\":\"a\", \"b\":\"aa\", \"c\":\"aaa\"}, \"b\":{\"a\":\"aaa\", \"b\":\"aa\", \"c\":\"a\"}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.a.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(2), IntArg(3), IntArg(4)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":\"aaaa\"},\"b\":{\"a\":\"aaa\",\"b\":\"aa\",\"c\":\"a\"}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.b.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(4), IntArg(3), IntArg(2)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":\"aaaa\"},\"b\":{\"a\":\"aaaa\",\"b\":\"aaa\",\"c\":\"aa\"}})\");\n\n  json = R\"(\n    {\"a\":{\"a\":\"a\", \"b\":\"aa\", \"c\":[\"aaaaa\", \"aaaaa\"]}, \"b\":{\"a\":\"aaa\", \"b\":[\"aaaaa\", \"aaaaa\"], \"c\":\"a\"}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.a.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(2), IntArg(3), ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":[\"aaaaa\",\"aaaaa\"]},\"b\":{\"a\":\"aaa\",\"b\":[\"aaaaa\",\"aaaaa\"],\"c\":\"a\"}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.b.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(4), ArgType(RespExpr::NIL), IntArg(2)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":[\"aaaaa\",\"aaaaa\"]},\"b\":{\"a\":\"aaaa\",\"b\":[\"aaaaa\",\"aaaaa\"],\"c\":\"aa\"}})\");\n\n  json = R\"(\n    {\"a\":{\"a\":\"a\", \"b\":\"aa\", \"c\":{\"c\": \"aaaaa\"}}, \"b\":{\"a\":\"aaa\", \"b\":{\"b\": \"aaaaa\"}, \"c\":\"a\"}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.a.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(2), IntArg(3), ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":{\"c\":\"aaaaa\"}},\"b\":{\"a\":\"aaa\",\"b\":{\"b\":\"aaaaa\"},\"c\":\"a\"}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$.b.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(4), ArgType(RespExpr::NIL), IntArg(2)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":{\"c\":\"aaaaa\"}},\"b\":{\"a\":\"aaaa\",\"b\":{\"b\":\"aaaaa\"},\"c\":\"aa\"}})\");\n\n  json = R\"(\n    {\"a\":\"foo\", \"inner\": {\"a\": \"bye\"}, \"inner1\": {\"a\": 7}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"$..a\", \"\\\"bar\\\"\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(6), IntArg(6), ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":\"foobar\",\"inner\":{\"a\":\"byebar\"},\"inner1\":{\"a\":7}})\");\n}\n\nTEST_F(JsonFamilyTest, StrAppendLegacyMode) {\n  string json = R\"(\n    {\"a\":{\"a\":\"a\"}, \"b\":{\"a\":\"a\", \"b\":1}, \"c\":{\"a\":\"a\", \"b\":\"bb\"}, \"d\":{\"a\":1, \"b\":\"b\", \"c\":3}}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  /* Test simple response from only one value */\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".a.a\", \"\\\"ab\\\"\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_THAT(\n      resp,\n      R\"({\"a\":{\"a\":\"aab\"},\"b\":{\"a\":\"a\",\"b\":1},\"c\":{\"a\":\"a\",\"b\":\"bb\"},\"d\":{\"a\":1,\"b\":\"b\",\"c\":3}})\");\n\n  const char kVal[] = \"\\\"a\\\"\";\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".a.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(4));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_THAT(\n      resp,\n      R\"({\"a\":{\"a\":\"aaba\"},\"b\":{\"a\":\"a\",\"b\":1},\"c\":{\"a\":\"a\",\"b\":\"bb\"},\"d\":{\"a\":1,\"b\":\"b\",\"c\":3}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".c.b\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_THAT(\n      resp,\n      R\"({\"a\":{\"a\":\"aaba\"},\"b\":{\"a\":\"a\",\"b\":1},\"c\":{\"a\":\"a\",\"b\":\"bba\"},\"d\":{\"a\":1,\"b\":\"b\",\"c\":3}})\");\n\n  /*\n  Test response from several possible values\n  In JSON legacy mode, the response contains only one value - the new length of the last updated\n  string.\n  */\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".b.*\", kVal});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_THAT(\n      resp,\n      R\"({\"a\":{\"a\":\"aaba\"},\"b\":{\"a\":\"aa\",\"b\":1},\"c\":{\"a\":\"a\",\"b\":\"bba\"},\"d\":{\"a\":1,\"b\":\"b\",\"c\":3}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".c.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(4));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_THAT(\n      resp,\n      R\"({\"a\":{\"a\":\"aaba\"},\"b\":{\"a\":\"aa\",\"b\":1},\"c\":{\"a\":\"aa\",\"b\":\"bbaa\"},\"d\":{\"a\":1,\"b\":\"b\",\"c\":3}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".d.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_THAT(\n      resp,\n      R\"({\"a\":{\"a\":\"aaba\"},\"b\":{\"a\":\"aa\",\"b\":1},\"c\":{\"a\":\"aa\",\"b\":\"bbaa\"},\"d\":{\"a\":1,\"b\":\"ba\",\"c\":3}})\");\n\n  json = R\"(\n    {\"a\":{\"a\":\"a\", \"b\":\"aa\", \"c\":\"aaa\"}, \"b\":{\"a\":\"aaa\", \"b\":\"aa\", \"c\":\"a\"}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".a.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(4));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":\"aaaa\"},\"b\":{\"a\":\"aaa\",\"b\":\"aa\",\"c\":\"a\"}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".b.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":\"aaaa\"},\"b\":{\"a\":\"aaaa\",\"b\":\"aaa\",\"c\":\"aa\"}})\");\n\n  json = R\"(\n    {\"a\":{\"a\":\"a\", \"b\":\"aa\", \"c\":[\"aaaaa\", \"aaaaa\"]}, \"b\":{\"a\":\"aaa\", \"b\":[\"aaaaa\", \"aaaaa\"], \"c\":\"a\"}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".a.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":[\"aaaaa\",\"aaaaa\"]},\"b\":{\"a\":\"aaa\",\"b\":[\"aaaaa\",\"aaaaa\"],\"c\":\"a\"}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".b.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":[\"aaaaa\",\"aaaaa\"]},\"b\":{\"a\":\"aaaa\",\"b\":[\"aaaaa\",\"aaaaa\"],\"c\":\"aa\"}})\");\n\n  json = R\"(\n    {\"a\":{\"a\":\"a\", \"b\":\"aa\", \"c\":{\"c\": \"aaaaa\"}}, \"b\":{\"a\":\"aaa\", \"b\":{\"b\": \"aaaaa\"}, \"c\":\"a\"}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".a.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":{\"c\":\"aaaaa\"}},\"b\":{\"a\":\"aaa\",\"b\":{\"b\":\"aaaaa\"},\"c\":\"a\"}})\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \".b.*\", kVal});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(\n      resp,\n      R\"({\"a\":{\"a\":\"aa\",\"b\":\"aaa\",\"c\":{\"c\":\"aaaaa\"}},\"b\":{\"a\":\"aaaa\",\"b\":{\"b\":\"aaaaa\"},\"c\":\"aa\"}})\");\n\n  json = R\"(\n    {\"a\":\"foo\", \"inner\": {\"a\": \"bye\"}, \"inner1\": {\"a\": 7}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"json\", \"..a\", \"\\\"bar\\\"\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::INT64));\n  EXPECT_THAT(resp, IntArg(6));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":\"foobar\",\"inner\":{\"a\":\"byebar\"},\"inner1\":{\"a\":7}})\");\n}\n\nTEST_F(JsonFamilyTest, Clear) {\n  string json = R\"(\n    [[], [0], [0,1], [0,1,2], 1, true, null, \"d\"]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.CLEAR\", \"json\", \"$[*]\"});\n  EXPECT_THAT(resp, IntArg(5));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[],[],[],[],0,true,null,\"d\"])\");\n\n  resp = Run({\"JSON.CLEAR\", \"json\", \"$\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([])\");\n\n  json = R\"(\n    {\"children\": [\"Yossi\", \"Rafi\", \"Benni\", \"Avraham\", \"Yehoshua\", \"Moshe\"]}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.CLEAR\", \"json\", \"$.children\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"children\":[]})\");\n\n  resp = Run({\"JSON.CLEAR\", \"json\", \"$\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({})\");\n}\n\nTEST_F(JsonFamilyTest, ClearLegacy) {\n  string json = R\"(\n    [[], [0], [0,1], [0,1,2], 1, true, null, \"d\"]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.CLEAR\", \"json\", \"[*]\"});\n  EXPECT_THAT(resp, IntArg(5));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[],[],[],[],0,true,null,\"d\"])\");\n\n  resp = Run({\"JSON.CLEAR\", \"json\", \".\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([])\");\n\n  json = R\"(\n    {\"children\": [\"Yossi\", \"Rafi\", \"Benni\", \"Avraham\", \"Yehoshua\", \"Moshe\"]}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.CLEAR\", \"json\", \".children\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"children\":[]})\");\n\n  resp = Run({\"JSON.CLEAR\", \"json\", \".\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({})\");\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.CLEAR\", \"json\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({})\");\n}\n\nTEST_F(JsonFamilyTest, ArrPop) {\n  string json = R\"(\n    [[6,1,6], [7,2,7], [8,3,8]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"json\", \"$[*]\", \"-2\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"1\", \"2\", \"3\"));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[6,6],[7,7],[8,8]])\");\n\n  json = R\"(\n    [[], [\"a\"], [\"a\", \"b\"]]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"json\", \"$[*]\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(ArgType(RespExpr::NIL), R\"(\"a\")\", R\"(\"b\")\"));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[],[],[\"a\"]])\");\n}\n\nTEST_F(JsonFamilyTest, ArrPopLegacy) {\n  string json = R\"(\n    [[6,1,6], [7,2,7], [8,3,8]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"json\", \"[*]\", \"-2\"});\n  EXPECT_EQ(resp, R\"(3)\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[6,6],[7,7],[8,8]])\");\n\n  json = R\"(\n    [[], [\"a\"], [\"a\", \"b\"]]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"json\", \".\"});\n  EXPECT_EQ(resp, R\"([\"a\",\"b\"])\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[],[\"a\"]])\");\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"json\", \".\", \"0\"});\n  EXPECT_EQ(resp, \"[]\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[\"a\"],[\"a\",\"b\"]])\");\n\n  resp = Run({\"JSON.ARRPOP\", \"json\"});\n  EXPECT_EQ(resp, R\"([\"a\",\"b\"])\");\n\n  json = R\"(\n    {\"a\":\"b\"}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"json\", \".\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", \"[]\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"json\", \".\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(JsonFamilyTest, ArrPopOutOfRange) {\n  string json = R\"(\n    [0,1,2,3,4,5]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"arr\", \"$\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"arr\", \"$\", \"-55\"});\n  EXPECT_EQ(resp, \"0\");\n\n  resp = Run({\"JSON.SET\", \"arr\", \"$\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"arr\", \"$\", \"55\"});\n  EXPECT_EQ(resp, \"5\");\n\n  // Test legacy mode\n  resp = Run({\"JSON.SET\", \"arr\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"arr\", \".\", \"-55\"});\n  EXPECT_EQ(resp, \"0\");\n\n  resp = Run({\"JSON.SET\", \"arr\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"arr\", \".\", \"55\"});\n  EXPECT_EQ(resp, \"5\");\n}\n\nTEST_F(JsonFamilyTest, ArrTrim) {\n  string json = R\"(\n    [[], [\"a\"], [\"a\", \"b\"], [\"a\", \"b\", \"c\"]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"json\", \"$[*]\", \"0\", \"1\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(0), IntArg(1), IntArg(2), IntArg(2)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[],[\"a\"],[\"a\",\"b\"],[\"a\",\"b\"]])\");\n\n  json = R\"(\n    {\"a\":[], \"nested\": {\"a\": [1,4]}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"json\", \"$..a\", \"0\", \"1\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(0), IntArg(2)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[],\"nested\":{\"a\":[1,4]}})\");\n\n  json = R\"(\n    {\"a\":[1,2,3,2], \"nested\": {\"a\": false}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"json\", \"$..a\", \"1\", \"2\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(2), ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[2,3],\"nested\":{\"a\":false}})\");\n\n  json = R\"(\n    [1,2,3,4,5,6,7]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \"$\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"json\", \"$\", \"2\", \"3\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([3,4])\");\n}\n\nTEST_F(JsonFamilyTest, ArrTrimLegacy) {\n  string json = R\"(\n    [[], [\"a\"], [\"a\", \"b\"], [\"a\", \"b\", \"c\"]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"json\", \"[*]\", \"0\", \"1\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[],[\"a\"],[\"a\",\"b\"],[\"a\",\"b\"]])\");\n\n  json = R\"(\n    {\"a\":[], \"nested\": {\"a\": [1,4]}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"json\", \"..a\", \"0\", \"1\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[],\"nested\":{\"a\":[1,4]}})\");\n\n  json = R\"(\n    {\"a\":[1,2,3,2], \"nested\": {\"a\": false}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"json\", \"..a\", \"1\", \"2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[2,3],\"nested\":{\"a\":false}})\");\n\n  json = R\"(\n    [1,2,3,4,5,6,7]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \"$\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"json\", \".\", \"2\", \"3\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([3,4])\");\n\n  json = R\"(\n    {\"a\":\"b\"}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"json\", \".\", \"0\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n}\n\nTEST_F(JsonFamilyTest, ArrTrimOutOfRange) {\n  string arr = R\"(\n    [0,1,2,3,4]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"arr\", \"$\", arr});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"arr\", \"$\", \"-1\", \"3\"});\n  EXPECT_THAT(resp, IntArg(0));\n  EXPECT_EQ(Run({\"JSON.GET\", \"arr\"}), \"[]\");\n\n  resp = Run({\"JSON.SET\", \"arr\", \"$\", arr});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"arr\", \"$\", \"54\", \"55\"});\n  EXPECT_THAT(resp, IntArg(0));\n  EXPECT_EQ(Run({\"JSON.GET\", \"arr\"}), \"[]\");\n\n  resp = Run({\"JSON.SET\", \"arr\", \"$\", arr});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"arr\", \"$\", \"56\", \"55\"});\n  EXPECT_THAT(resp, IntArg(0));\n  EXPECT_EQ(Run({\"JSON.GET\", \"arr\"}), \"[]\");\n\n  resp = Run({\"JSON.SET\", \"arr\", \"$\", arr});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"arr\", \"$\", \"-55\", \"-55\"});\n  EXPECT_THAT(resp, IntArg(1));\n  EXPECT_EQ(Run({\"JSON.GET\", \"arr\"}), \"[0]\");\n\n  resp = Run({\"JSON.SET\", \"arr\", \"$\", arr});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"arr\", \"$\", \"-2\", \"-1\"});\n  EXPECT_THAT(resp, IntArg(2));\n  EXPECT_EQ(Run({\"JSON.GET\", \"arr\"}), \"[3,4]\");\n\n  resp = Run({\"JSON.SET\", \"arr\", \"$\", arr});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"arr\", \"$\", \"-1\", \"-2\"});\n  EXPECT_THAT(resp, IntArg(0));\n  EXPECT_EQ(Run({\"JSON.GET\", \"arr\"}), \"[]\");\n}\n\nTEST_F(JsonFamilyTest, ArrInsert) {\n  string json = R\"(\n    [[], [\"a\"], [\"a\", \"b\"]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINSERT\", \"json\", \"$[*]\", \"0\", R\"(\"a\")\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(1), IntArg(2), IntArg(3)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[\"a\"],[\"a\",\"a\"],[\"a\",\"a\",\"b\"]])\");\n\n  resp = Run({\"JSON.ARRINSERT\", \"json\", \"$[*]\", \"-1\", R\"(\"b\")\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(2), IntArg(3), IntArg(4)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[\"b\",\"a\"],[\"a\",\"b\",\"a\"],[\"a\",\"a\",\"b\",\"b\"]])\");\n\n  resp = Run({\"JSON.ARRINSERT\", \"json\", \"$[*]\", \"1\", R\"(\"c\")\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(3), IntArg(4), IntArg(5)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[\"b\",\"c\",\"a\"],[\"a\",\"c\",\"b\",\"a\"],[\"a\",\"c\",\"a\",\"b\",\"b\"]])\");\n\n  json = R\"(\n    {\"a\":{\"b\":\"c\"}, \"b\":[[\"a\"], [\"a\", \"b\"]]}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINSERT\", \"json\", \"$.a\", \"0\", R\"(\"c\")\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(JsonFamilyTest, ArrInsertLegacy) {\n  string json = R\"(\n    [[], [\"a\"], [\"a\", \"b\"]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINSERT\", \"json\", \"[*]\", \"0\", R\"(\"c\")\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.ARRINSERT\", \"json\", \".\", \"0\", R\"(\"c\")\"});\n  EXPECT_THAT(resp, IntArg(4));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([\"c\",[\"c\"],[\"c\",\"a\"],[\"c\",\"a\",\"b\"]])\");\n\n  json = R\"(\n    {\"a\":{\"b\":\"c\"}, \"b\":[[\"a\"], [\"a\", \"b\"]]}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINSERT\", \"json\", \".a\", \"0\", R\"(\"c\")\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n}\n\nTEST_F(JsonFamilyTest, ArrInsertOutOfRange) {\n  string json = R\"(\n    [0,1,2,3,4,5]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"arr\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINSERT\", \"arr\", \"$\", \"-55\", \"6\"});\n  EXPECT_THAT(resp, ErrArg(\"index out of range\"));\n\n  resp = Run({\"JSON.ARRINSERT\", \"arr\", \"$\", \"55\", \"6\"});\n  EXPECT_THAT(resp, ErrArg(\"index out of range\"));\n\n  resp = Run({\"JSON.ARRINSERT\", \"arr\", \".\", \"-55\", \"6\"});  // Legacy mode\n  EXPECT_THAT(resp, ErrArg(\"index out of range\"));\n\n  resp = Run({\"JSON.ARRINSERT\", \"arr\", \".\", \"55\", \"6\"});  // Legacy mode\n  EXPECT_THAT(resp, ErrArg(\"index out of range\"));\n\n  resp = Run({\"JSON.SET\", \"arr\", \".\", \"[]\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINSERT\", \"arr\", \"$\", \"-1\", \"2\"});\n  EXPECT_THAT(resp, ErrArg(\"index out of range\"));\n\n  resp = Run({\"JSON.ARRINSERT\", \"arr\", \"$\", \"1\", \"2\"});\n  EXPECT_THAT(resp, ErrArg(\"index out of range\"));\n\n  resp = Run({\"JSON.ARRINSERT\", \"arr\", \"$\", \"0\", \"2\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.GET\", \"arr\"});\n  EXPECT_EQ(resp, \"[2]\");\n}\n\nTEST_F(JsonFamilyTest, ArrAppend) {\n  string json = R\"(\n    [[], [\"a\"], [\"a\", \"b\"]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRAPPEND\", \"json\", \"$[*]\", R\"(\"a\")\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(1), IntArg(2), IntArg(3)));\n\n  resp = Run({\"JSON.ARRAPPEND\", \"json\", \"$[*]\", R\"(\"b\")\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(2), IntArg(3), IntArg(4)));\n\n  json = R\"(\n    {\"a\": [1], \"nested\": {\"a\": [1,2], \"nested2\": {\"a\": 42}}}\n  )\";\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRAPPEND\", \"json\", \"$..a\", \"3\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(2), IntArg(3), ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"a\":[1,3],\"nested\":{\"a\":[1,2,3],\"nested2\":{\"a\":42}}})\");\n}\n\nTEST_F(JsonFamilyTest, ArrAppendLegacy) {\n  string json = R\"(\n    [[], [\"a\"], [\"a\", \"b\"]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRAPPEND\", \"json\", \"[-1]\", R\"(\"c\")\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.ARRAPPEND\", \"json\", \".*\", R\"(\"c\")\"});\n  EXPECT_THAT(resp, IntArg(4));\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"([[\"c\"],[\"a\",\"c\"],[\"a\",\"b\",\"c\",\"c\"]])\");\n\n  json = R\"(\n    {\"a\":\"b\"}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRAPPEND\", \"json\", \".\", R\"(\"c\")\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n}\n\nTEST_F(JsonFamilyTest, ArrIndex) {\n  string json = R\"(\n    [[], [\"a\"], [\"a\", \"b\"], [\"a\", \"b\", \"c\"]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$[*]\", R\"(\"b\")\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(-1), IntArg(-1), IntArg(1), IntArg(1)));\n\n  json = R\"(\n    {\"a\":[\"a\",\"b\",\"c\",\"d\"], \"nested\": {\"a\": [\"c\",\"d\"]}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$..a\", R\"(\"b\")\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(1), IntArg(-1)));\n\n  json = R\"(\n    {\"a\":[\"a\",\"b\",\"c\",\"d\"], \"nested\": {\"a\": false}}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$..a\", R\"(\"b\")\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(1), ArgType(RespExpr::NIL)));\n\n  resp = Run(\n      {\"JSON.SET\", \"json\", \".\", R\"({\"key\" : [\"Alice\", \"Bob\", \"Carol\", \"David\", \"Eve\", \"Frank\"]})\"});\n  ASSERT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$.key\", R\"(\"Bob\")\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$.key\", R\"(\"Bob\")\", \"1\", \"2\"});\n  EXPECT_THAT(resp, IntArg(1));\n}\n\nTEST_F(JsonFamilyTest, ArrIndexLegacy) {\n  string json = R\"(\n    {\"children\": [\"John\", \"Jack\", \"Tom\", \"Bob\", \"Mike\"]}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \".children\", R\"(\"Tom\")\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \".children\", R\"(\"DoesNotExist\")\"});\n  EXPECT_THAT(resp, IntArg(-1));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \".children.[0].notexist\", \"3\"});\n  EXPECT_THAT(resp.type, RespExpr::ERROR);\n\n  json = R\"(\n    {\"a\":\"b\"}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \".\", R\"(\"Tom\")\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong JSON type of path value\"));\n}\n\nTEST_F(JsonFamilyTest, ArrIndexWithNumericValues) {\n  string json = R\"(\n    [2, 3.0, 3]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \"$\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$\", \"3\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$\", \"3.0\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  json = R\"(\n    [[1, 2, 3], [1.0, 2.0, 3.0], 2.0, [1,2,3]]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \"$\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$\", \"[1,2,3]\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$\", \"[1.0,2.0,3.0]\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  json = R\"(\n    [{\"a\":2},{\"a\":2.0},2.0]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \"$\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$\", R\"({\"a\":2})\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$\", R\"({\"a\":2.0})\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  json = R\"(\n    [{\"arr\":[1,2,3],\"number\":2},{\"arr\":[1.0,2.0,3.0],\"number\":2.0},2]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \"$\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$\", R\"({\"arr\":[1,2,3],\"number\":2})\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$\", R\"({\"arr\":[1.0,2.0,3.0],\"number\":2.0})\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$\", R\"({\"arr\":[1,2,3],\"number\":2.0})\"});\n  EXPECT_THAT(resp, IntArg(-1));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \"$\", R\"({\"arr\":[1.0,2.0,3.0],\"number\":2})\"});\n  EXPECT_THAT(resp, IntArg(-1));\n}\n\nTEST_F(JsonFamilyTest, ArrIndexWithNumericValuesLegacy) {\n  string json = R\"(\n    [2, 3.0, 3]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \".\", \"3\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \".\", \"3.0\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  json = R\"(\n    [{\"arr\":[1,2,3],\"number\":2},{\"arr\":[1.0,2.0,3.0],\"number\":2.0},2]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \".\", R\"({\"arr\":[1,2,3],\"number\":2})\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \".\", R\"({\"arr\":[1.0,2.0,3.0],\"number\":2.0})\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \".\", R\"({\"arr\":[1,2,3],\"number\":2.0})\"});\n  EXPECT_THAT(resp, IntArg(-1));\n\n  resp = Run({\"JSON.ARRINDEX\", \"json\", \".\", R\"({\"arr\":[1.0,2.0,3.0],\"number\":2})\"});\n  EXPECT_THAT(resp, IntArg(-1));\n}\n\nTEST_F(JsonFamilyTest, ArrIndexOutOfRange) {\n  auto resp = Run({\"JSON.SET\", \"arr\", \".\", R\"([1,1,1,1,1])\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"arr\", \"$\", \"1\", \"-55\", \"-55\"});\n  EXPECT_THAT(resp, IntArg(-1));\n\n  resp = Run({\"JSON.ARRINDEX\", \"arr\", \"$\", \"1\", \"-55\", \"-56\"});\n  EXPECT_THAT(resp, IntArg(-1));\n\n  resp = Run({\"JSON.ARRINDEX\", \"arr\", \"$\", \"1\", \"-55\", \"-54\"});\n  EXPECT_THAT(resp, IntArg(-1));\n\n  resp = Run({\"JSON.ARRINDEX\", \"arr\", \"$\", \"1\", \"-2\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.ARRINDEX\", \"arr\", \"$\", \"1\", \"-2\", \"-1\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.ARRINDEX\", \"arr\", \"$\", \"1\", \"-2\", \"-3\"});\n  EXPECT_THAT(resp, IntArg(-1));\n\n  resp = Run({\"JSON.ARRINDEX\", \"arr\", \"$\", \"1\", \"55\", \"56\"});\n  EXPECT_THAT(resp, IntArg(4));\n\n  resp = Run({\"JSON.ARRINDEX\", \"arr\", \"$\", \"1\", \"55\", \"54\"});\n  EXPECT_THAT(resp, IntArg(4));\n\n  resp = Run({\"JSON.ARRINDEX\", \"arr\", \"$\", \"1\", \"5\", \"4\"});\n  EXPECT_THAT(resp, IntArg(-1));\n}\n\nTEST_F(JsonFamilyTest, MGet) {\n  string json[] = {\n      R\"(\n    {\"address\":{\"street\":\"14 Imber Street\",\"city\":\"Petah-Tikva\",\"country\":\"Israel\",\"zipcode\":\"49511\"}}\n  )\",\n      R\"(\n    {\"address\":{\"street\":\"Oranienburger Str. 27\",\"city\":\"Berlin\",\"country\":\"Germany\",\"zipcode\":\"10117\"}}\n  )\",\n      R\"(\n    {\"a\":1, \"b\": 2, \"nested\": {\"a\": 3}, \"c\": null}\n  )\",\n      R\"(\n    {\"a\":4, \"b\": 5, \"nested\": {\"a\": 6}, \"c\": null}\n  )\"};\n\n  auto resp = Run({\"JSON.SET\", \"json1\", \".\", json[0]});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json2\", \".\", json[1]});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.MGET\", \"json1\", \"??INNNNVALID??\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR syntax error\"));\n\n  resp = Run({\"JSON.MGET\", \"json1\", \"json2\", \"json3\", \"$.address.country\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(R\"([\"Israel\"])\", R\"([\"Germany\"])\", ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.SET\", \"json3\", \".\", json[2]});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json4\", \".\", json[3]});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.MGET\", \"json3\", \"json4\", \"$..a\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(R\"([1,3])\", R\"([4,6])\"));\n}\n\nTEST_F(JsonFamilyTest, MGetLegacy) {\n  string json[] = {\n      R\"(\n    {\"address\":{\"street\":\"14 Imber Street\",\"city\":\"Petah-Tikva\",\"country\":\"Israel\",\"zipcode\":\"49511\"}}\n  )\",\n      R\"(\n    {\"address\":{\"street\":\"Oranienburger Str. 27\",\"city\":\"Berlin\",\"country\":\"Germany\",\"zipcode\":\"10117\"}}\n  )\",\n      R\"(\n    {\"a\":1, \"b\": 2, \"nested\": {\"a\": 3}, \"c\": null}\n  )\",\n      R\"(\n    {\"a\":4, \"b\": 5, \"nested\": {\"a\": 6}, \"c\": null}\n  )\"};\n\n  auto resp = Run({\"JSON.SET\", \"json1\", \".\", json[0]});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json2\", \".\", json[1]});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.MGET\", \"json1\", \"json2\", \"json3\", \".address.country\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(R\"(\"Israel\")\", R\"(\"Germany\")\", ArgType(RespExpr::NIL)));\n\n  resp = Run({\"JSON.MGET\", \"json1\", \"json2\", \".[0]\"});\n  if (auto jsonpathv2 = absl::GetFlag(FLAGS_jsonpathv2); jsonpathv2) {\n    ASSERT_EQ(RespExpr::ARRAY, resp.type);\n    EXPECT_THAT(resp.GetVec(), ElementsAre(ArgType(RespExpr::NIL), ArgType(RespExpr::NIL)));\n  } else {\n    EXPECT_THAT(resp, ErrArg(\"ERR syntax error\"));\n  }\n\n  resp = Run({\"JSON.SET\", \"json3\", \".\", json[2]});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json4\", \".\", json[3]});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.MGET\", \"json3\", \"json4\", \"..a\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(R\"(3)\", R\"(6)\"));\n}\n\nTEST_F(JsonFamilyTest, DebugHelp) {\n  auto resp = Run({\"JSON.DEBUG\", \"HELP\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_EQ(resp.GetVec().size(), 3);\n\n  EXPECT_THAT(resp.GetVec()[0].GetString(), HasSubstr(\"MEMORY\"));\n  EXPECT_THAT(resp.GetVec()[1].GetString(), HasSubstr(\"FIELDS\"));\n  EXPECT_THAT(resp.GetVec()[2].GetString(), HasSubstr(\"HELP\"));\n}\n\nTEST_F(JsonFamilyTest, DebugFields) {\n  string json = R\"(\n    [1, 2.3, \"foo\", true, null, {}, [], {\"a\":1, \"b\":2}, [1,2,3]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json1\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEBUG\", \"fields\", \"json1\", \"$[*]\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(1), IntArg(1), IntArg(1), IntArg(1), IntArg(1),\n                                         IntArg(0), IntArg(0), IntArg(2), IntArg(3)));\n\n  resp = Run({\"JSON.DEBUG\", \"fields\", \"json1\", \"$\"});\n  EXPECT_THAT(resp, IntArg(14));\n\n  json = R\"(\n    [[1,2,3, [4,5,6,[6,7,8]]], {\"a\": {\"b\": {\"c\": 1337}}}]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json1\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEBUG\", \"fields\", \"json1\", \"$[*]\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(11), IntArg(3)));\n\n  resp = Run({\"JSON.DEBUG\", \"fields\", \"json1\", \"$\"});\n  EXPECT_THAT(resp, IntArg(16));\n\n  json = R\"({\"a\":1, \"b\":2, \"c\":{\"k1\":1,\"k2\":2}})\";\n\n  resp = Run({\"JSON.SET\", \"obj_doc\", \"$\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEBUG\", \"FIELDS\", \"obj_doc\", \"$.a\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.DEBUG\", \"fields\", \"obj_doc\", \"$.a\"});\n  EXPECT_THAT(resp, IntArg(1));\n}\n\nTEST_F(JsonFamilyTest, DebugFieldsLegacy) {\n  string json = R\"(\n    [1, 2.3, \"foo\", true, null, {}, [], {\"a\":1, \"b\":2}, [1,2,3]]\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json1\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEBUG\", \"fields\", \"json1\", \"[*]\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.DEBUG\", \"fields\", \"json1\", \".\"});\n  EXPECT_THAT(resp, IntArg(14));\n\n  resp = Run({\"JSON.DEBUG\", \"fields\", \"json1\"});\n  EXPECT_THAT(resp, IntArg(14));\n\n  json = R\"(\n    [[1,2,3, [4,5,6,[6,7,8]]], {\"a\": {\"b\": {\"c\": 1337}}}]\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json1\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEBUG\", \"fields\", \"json1\", \"[*]\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  resp = Run({\"JSON.DEBUG\", \"fields\", \"json1\", \".\"});\n  EXPECT_THAT(resp, IntArg(16));\n\n  json = R\"({\"a\":1, \"b\":2, \"c\":{\"k1\":1,\"k2\":2}})\";\n\n  resp = Run({\"JSON.SET\", \"obj_doc\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEBUG\", \"FIELDS\", \"obj_doc\", \".a\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.DEBUG\", \"fields\", \"obj_doc\", \".a\"});\n  EXPECT_THAT(resp, IntArg(1));\n}\n\nTEST_F(JsonFamilyTest, DebugMemory) {\n  auto resp = Run({\"JSON.SET\", \"json1\", \"$\",\n                   R\"([1, 2.3, \"foo\", true, null, {}, [], {\"a\":1, \"b\":2}, [1,2,3]])\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.DEBUG\", \"memory\", \"json1\", \"$[*]\"});\n  EXPECT_EQ(resp.type, RespExpr::ARRAY);\n  EXPECT_EQ(resp.GetVec().size(), 9);\n  EXPECT_EQ(resp.GetVec()[0].GetInt(), 0);\n  EXPECT_EQ(resp.GetVec()[1].GetInt(), 0);\n  EXPECT_EQ(resp.GetVec()[2].GetInt(), 0);\n  EXPECT_EQ(resp.GetVec()[3].GetInt(), 0);\n  EXPECT_EQ(resp.GetVec()[4].GetInt(), 0);\n  EXPECT_GE(resp.GetVec()[5].GetInt(), 0);\n  EXPECT_GE(resp.GetVec()[6].GetInt(), 0);\n  EXPECT_GT(resp.GetVec()[7].GetInt(), 0);\n  EXPECT_GT(resp.GetVec()[8].GetInt(), 0);\n\n  resp = Run({\"JSON.DEBUG\", \"memory\", \"json1\", \"$\"});\n  EXPECT_GT(resp.GetInt(), 0);\n\n  resp = Run({\"JSON.SET\", \"bigstr\", \"$\",\n              R\"({\"text\":\"This is a longer string that should definitely exceed SSO buffer\"})\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.DEBUG\", \"memory\", \"bigstr\", \"$.text\"});\n  EXPECT_GT(resp.GetInt(), 0);\n\n  resp = Run({\"JSON.SET\", \"obj_doc\", \"$\", R\"({\"num\":42, \"obj\":{\"k1\":1,\"k2\":2}})\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.DEBUG\", \"MEMORY\", \"obj_doc\", \"$.num\"});\n  EXPECT_EQ(resp.GetInt(), 0);\n  resp = Run({\"JSON.DEBUG\", \"memory\", \"obj_doc\", \"$.obj\"});\n  EXPECT_GT(resp.GetInt(), 0);\n}\n\nTEST_F(JsonFamilyTest, DebugMemoryLegacy) {\n  auto resp = Run({\"JSON.SET\", \"json1\", \"$\",\n                   R\"([1, 2.3, \"foo\", true, null, {}, [], {\"a\":1, \"b\":2}, [1,2,3]])\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.DEBUG\", \"memory\", \"json1\", \".\"});\n  EXPECT_EQ(resp.type, RespExpr::INT64);\n  EXPECT_GT(resp.GetInt(), 0);\n\n  resp = Run({\"JSON.DEBUG\", \"memory\", \"json1\"});\n  EXPECT_EQ(resp.type, RespExpr::INT64);\n  EXPECT_GT(resp.GetInt(), 0);\n\n  resp = Run({\"JSON.SET\", \"primitives\", \"$\", R\"({\"num\":42, \"bool\":true, \"null\":null})\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.DEBUG\", \"memory\", \"primitives\", \".num\"});\n  EXPECT_EQ(resp.GetInt(), 0);\n  resp = Run({\"JSON.DEBUG\", \"memory\", \"primitives\", \".bool\"});\n  EXPECT_EQ(resp.GetInt(), 0);\n  resp = Run({\"JSON.DEBUG\", \"memory\", \"primitives\", \".null\"});\n  EXPECT_EQ(resp.GetInt(), 0);\n\n  resp = Run({\"JSON.SET\", \"obj_doc\", \"$\",\n              R\"({\"longstring\":\"This is a very long string that definitely exceeds SSO buffer\"})\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.DEBUG\", \"MEMORY\", \"obj_doc\", \".longstring\"});\n  EXPECT_GT(resp.GetInt(), 0);\n\n  resp = Run({\"JSON.SET\", \"arr\", \"$\", R\"([1,2,3,4,5,6,7,8,9,10])\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.DEBUG\", \"memory\", \"arr\", \".\"});\n  EXPECT_GT(resp.GetInt(), 0);\n\n  resp = Run({\"JSON.SET\", \"obj\", \"$\", R\"({\"a\":1, \"b\":2, \"c\":3})\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.DEBUG\", \"memory\", \"obj\", \".\"});\n  EXPECT_GT(resp.GetInt(), 0);\n}\n\nTEST_F(JsonFamilyTest, Resp) {\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", PhonebookJson});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.RESP\", \"json\", \"$\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n\n  resp = Run({\"JSON.RESP\", \"json\", \"$.address.*\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"New York\", \"NY\", \"21 2nd Street\", \"10021-3100\"));\n\n  resp = Run({\"JSON.RESP\", \"json\", \"$.isAlive\"});\n  EXPECT_THAT(resp, \"true\");\n\n  resp = Run({\"JSON.RESP\", \"json\", \"$.age\"});\n  EXPECT_THAT(resp, IntArg(27));\n\n  resp = Run({\"JSON.RESP\", \"json\", \"$.weight\"});\n  EXPECT_THAT(resp, \"135.25\");\n}\n\nTEST_F(JsonFamilyTest, RespLegacy) {\n  auto resp = Run({\"JSON.SET\", \"json\", \".\", PhonebookJson});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.RESP\", \"json\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n\n  resp = Run({\"JSON.RESP\", \"json\", \".address.*\"});\n  EXPECT_THAT(resp, \"10021-3100\");\n\n  resp = Run({\"JSON.RESP\", \"json\", \".isAlive\"});\n  EXPECT_THAT(resp, \"true\");\n\n  resp = Run({\"JSON.RESP\", \"json\", \".age\"});\n  EXPECT_THAT(resp, IntArg(27));\n\n  resp = Run({\"JSON.RESP\", \"json\", \".weight\"});\n  EXPECT_THAT(resp, \"135.25\");\n}\n\nTEST_F(JsonFamilyTest, Set) {\n  string json = R\"(\n    {\"a\":{\"a\":1, \"b\":2, \"c\":3}}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json1\", \".\", json});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json1\", \"$.a.*\", \"0\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json1\"});\n  EXPECT_EQ(resp, R\"({\"a\":{\"a\":0,\"b\":0,\"c\":0}})\");\n\n  json = R\"(\n    {\"a\": [1,2,3,4,5]}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json2\", \".\", json});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json2\", \"$.a[*]\", \"0\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json2\"});\n  EXPECT_EQ(resp, R\"({\"a\":[0,0,0,0,0]})\");\n\n  json = R\"(\n    {\"a\": 2}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json3\", \"$\", json});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json3\", \"$.b\", \"8\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json3\", \"$.c\", \"[1,2,3]\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json3\", \"$.z\", \"3\", \"XX\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.SET\", \"json3\", \"$.b\", \"4\", \"NX\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.GET\", \"json3\"});\n  EXPECT_EQ(resp, R\"({\"a\":2,\"b\":8,\"c\":[1,2,3]})\");\n}\n\nTEST_F(JsonFamilyTest, SetLegacy) {\n  string json = R\"(\n    {\"a\":{\"a\":1, \"b\":2, \"c\":3}}\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"json1\", \".\", json});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json1\", \".a.*\", \"0\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json1\"});\n  EXPECT_EQ(resp, R\"({\"a\":{\"a\":0,\"b\":0,\"c\":0}})\");\n\n  json = R\"(\n    {\"a\": [1,2,3,4,5]}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json2\", \".\", json});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json2\", \".a[*]\", \"0\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json2\"});\n  EXPECT_EQ(resp, R\"({\"a\":[0,0,0,0,0]})\");\n\n  json = R\"(\n    {\"a\": 2}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json3\", \".\", json});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json3\", \".b\", \"8\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json3\", \".c\", \"[1,2,3]\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json3\", \".z\", \"3\", \"XX\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.SET\", \"json3\", \".z\", \"3\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json3\", \".z\", \"4\", \"XX\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json3\", \".b\", \"4\", \"NX\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.SET\", \"json3\", \".b\", \"5\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json3\", \".\", \"[]\", \"NX\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"JSON.GET\", \"json3\"});\n  EXPECT_EQ(resp, R\"({\"a\":2,\"b\":5,\"c\":[1,2,3],\"z\":4})\");\n\n  json = R\"(\n    {\"foo\": \"bar\"}\n  )\";\n\n  resp = Run({\"JSON.SET\", \"json4\", \".\", json});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json4\", \"foo\", \"\\\"baz\\\"\", \"XX\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json4\", \"foo2\", \"\\\"qaz\\\"\", \"NX\"});\n  EXPECT_THAT(resp, \"OK\");\n}\n\nTEST_F(JsonFamilyTest, MSet) {\n  string json1 = R\"({\"a\":{\"a\":1,\"b\":2,\"c\":3}})\";\n  string json2 = R\"({\"a\":{\"a\":4,\"b\":5,\"c\":6}})\";\n\n  auto resp = Run({\"JSON.MSET\", \"j1\", \"$\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number\"));\n  resp = Run({\"JSON.MSET\", \"j1\", \"$\", json1, \"j3\", \"$\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number\"));\n\n  resp = Run({\"JSON.MSET\", \"j1\", \"$\", json1, \"j2\", \"$\", json2, \"j3\", \"$\", json1, \"j4\", \"$\", json2});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.MGET\", \"j1\", \"j2\", \"j3\", \"j4\", \"$\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"[\" + json1 + \"]\", \"[\" + json2 + \"]\", \"[\" + json1 + \"]\",\n                                         \"[\" + json2 + \"]\"));\n}\n\nTEST_F(JsonFamilyTest, MSetLegacy) {\n  string json1 = R\"({\"a\":{\"a\":1,\"b\":2,\"c\":3}})\";\n  string json2 = R\"({\"a\":{\"a\":4,\"b\":5,\"c\":6}})\";\n\n  auto resp = Run({\"JSON.MSET\", \"j1\", \".\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number\"));\n  resp = Run({\"JSON.MSET\", \"j1\", \".\", json1, \"j3\", \".\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number\"));\n\n  resp = Run({\"JSON.MSET\", \"j1\", \".\", json1, \"j2\", \".\", json2, \"j3\", \".\", json1, \"j4\", \".\", json2});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.MGET\", \"j1\", \"j2\", \"j3\", \"j4\", \"$\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"[\" + json1 + \"]\", \"[\" + json2 + \"]\", \"[\" + json1 + \"]\",\n                                         \"[\" + json2 + \"]\"));\n}\n\nTEST_F(JsonFamilyTest, Merge) {\n  string json = R\"(\n  { \"a\": \"b\",\n    \"c\": {\n      \"d\": \"e\",\n      \"f\": \"g\"\n    }\n  }\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"j1\", \"$\", json});\n  EXPECT_EQ(resp, \"OK\");\n\n  string patch = R\"(\n    {\n      \"a\":\"z\",\n      \"c\": {\n      \"f\": null\n      }\n    }\n  )\";\n\n  resp = Run({\"JSON.MERGE\", \"new\", \"$\", patch});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"new\"});\n  EXPECT_EQ(resp, R\"({\"a\":\"z\",\"c\":{\"f\":null}})\");\n\n  resp = Run({\"JSON.MERGE\", \"j1\", \"$\", patch});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.GET\", \"j1\"});\n  EXPECT_EQ(resp, R\"({\"a\":\"z\",\"c\":{\"d\":\"e\"}})\");\n\n  resp = Run({\"JSON.SET\", \"foo\", \"$\", R\"(\"{\"f1\":1, \"common\":2}\")\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.MERGE\", \"foo\", \"$\", R\"({\"f2\":2, \"common\":4})\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.GET\", \"foo\"});\n  EXPECT_EQ(resp, R\"({\"common\":4,\"f2\":2})\");\n\n  json = R\"({\n  \"ans\": {\n    \"x\": {\n      \"y\" : {\n        \"doubled\": false,\n        \"answers\": [\n          \"foo\",\n          \"bar\"\n        ]\n      }\n    }\n  }\n  })\";\n  resp = Run({\"JSON.SET\", \"j2\", \"$\", json});\n  ASSERT_EQ(resp, \"OK\");\n\n  patch = R\"(\n    {\"z\": {\n      \"doubled\": false,\n      \"answers\": [\"xxx\",  \"yyy\"]\n     },\n     \"y\": { \"doubled\": true}\n     })\";\n\n  resp = Run({\"JSON.MERGE\", \"j2\", \"$.ans.x\", patch});\n\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.GET\", \"j2\"});\n  EXPECT_EQ(resp, R\"({\"ans\":{\"x\":{\"y\":{\"answers\":[\"foo\",\"bar\"],\"doubled\":true},)\"\n                  R\"(\"z\":{\"answers\":[\"xxx\",\"yyy\"],\"doubled\":false}}}})\");\n\n  // Test not existing entry\n  resp = Run({\"JSON.MERGE\", \"j3\", \"$\", patch});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.GET\", \"j3\"});\n  EXPECT_EQ(resp, R\"({\"y\":{\"doubled\":true},\"z\":{\"answers\":[\"xxx\",\"yyy\"],\"doubled\":false}})\");\n}\n\nTEST_F(JsonFamilyTest, MergeLegacy) {\n  string json = R\"(\n  { \"a\": \"b\",\n    \"c\": {\n      \"d\": \"e\",\n      \"f\": \"g\"\n    }\n  }\n  )\";\n\n  auto resp = Run({\"JSON.SET\", \"j1\", \"$\", json});\n  EXPECT_EQ(resp, \"OK\");\n\n  string patch = R\"(\n    {\n      \"a\":\"z\",\n      \"c\": {\n      \"f\": null\n      }\n    }\n  )\";\n\n  resp = Run({\"JSON.MERGE\", \"new\", \".\", patch});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"new\"});\n  EXPECT_EQ(resp, R\"({\"a\":\"z\",\"c\":{\"f\":null}})\");\n\n  resp = Run({\"JSON.MERGE\", \"j1\", \".\", patch});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.GET\", \"j1\"});\n  EXPECT_EQ(resp, R\"({\"a\":\"z\",\"c\":{\"d\":\"e\"}})\");\n\n  resp = Run({\"JSON.SET\", \"foo\", \"$\", R\"(\"{\"f1\":1, \"common\":2}\")\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.MERGE\", \"foo\", \".\", R\"({\"f2\":2, \"common\":4})\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.GET\", \"foo\"});\n  EXPECT_EQ(resp, R\"({\"common\":4,\"f2\":2})\");\n\n  json = R\"({\n  \"ans\": {\n    \"x\": {\n      \"y\" : {\n        \"doubled\": false,\n        \"answers\": [\n          \"foo\",\n          \"bar\"\n        ]\n      }\n    }\n  }\n  })\";\n  resp = Run({\"JSON.SET\", \"j2\", \"$\", json});\n  ASSERT_EQ(resp, \"OK\");\n\n  patch = R\"(\n    {\"z\": {\n      \"doubled\": false,\n      \"answers\": [\"xxx\",  \"yyy\"]\n     },\n     \"y\": { \"doubled\": true}\n     })\";\n\n  resp = Run({\"JSON.MERGE\", \"j2\", \".ans.x\", patch});\n\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.GET\", \"j2\"});\n  EXPECT_EQ(resp, R\"({\"ans\":{\"x\":{\"y\":{\"answers\":[\"foo\",\"bar\"],\"doubled\":true},)\"\n                  R\"(\"z\":{\"answers\":[\"xxx\",\"yyy\"],\"doubled\":false}}}})\");\n\n  // Test not existing entry\n  resp = Run({\"JSON.MERGE\", \"j3\", \".\", patch});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"JSON.GET\", \"j3\"});\n  EXPECT_EQ(resp, R\"({\"y\":{\"doubled\":true},\"z\":{\"answers\":[\"xxx\",\"yyy\"],\"doubled\":false}})\");\n}\n\nTEST_F(JsonFamilyTest, GetString) {\n  string json = R\"(\n  { \"a\": \"b\",\n    \"c\": {\n      \"d\": \"e\",\n      \"f\": \"g\"\n    }\n  }\n  )\";\n\n  auto resp = Run({\"SET\", \"json\", json});\n  EXPECT_THAT(resp, \"OK\");\n  resp = Run({\"JSON.GET\", \"json\", \"$.c\"});\n  EXPECT_EQ(resp, R\"([{\"d\":\"e\",\"f\":\"g\"}])\");\n  Run({\"SET\", \"not_json\", \"not_json\"});\n  resp = Run({\"JSON.GET\", \"not_json\", \"$.c\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONGTYPE\"));\n}\n\nTEST_F(JsonFamilyTest, MaxNestingJsonDepth) {\n  auto generate_nested_json = [](int depth) -> std::string {\n    std::string json = \"{\";\n    for (int i = 0; i < depth - 1; ++i) {\n      json += R\"(\"key\": {)\";\n    }\n    json += R\"(\"key\": \"value\")\";  // Innermost value\n    for (int i = 0; i < depth - 1; ++i) {\n      json += \"}\";\n    }\n    json += \"}\";\n    return json;\n  };\n\n  // Generate JSON with maximum allowed depth (256)\n  /* std::string valid_json = generate_nested_json(255);\n\n  // Test with valid JSON at depth 256\n  auto resp = Run({\"JSON.SET\", \"valid_json\",  \".\", valid_json});\n  EXPECT_THAT(resp, \"OK\"); */\n\n  // Generate JSON exceeding maximum depth (257)\n  std::string invalid_json = generate_nested_json(257);\n\n  // Test with invalid JSON at depth 257\n  auto resp = Run({\"JSON.SET\", \"invalid_json\", \".\", invalid_json});\n  EXPECT_THAT(resp, ErrArg(\"failed to parse JSON\"));\n}\n\nTEST_F(JsonFamilyTest, SetNestedFields) {\n  auto resp = Run({\"JSON.SET\", \"json\", \"$\", \"{}\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"json\", \"$['field1']\", \"1\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"field1\":1})\");\n\n  resp = Run({\"JSON.SET\", \"json\", \"$['-field2']\", \"2\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"json\"});\n  EXPECT_EQ(resp, R\"({\"-field2\":2,\"field1\":1})\");\n}\n\nTEST_F(JsonFamilyTest, ArrPopWithFormatParameter) {\n  auto resp = Run({\"JSON.ARRPOP\", \"test_resp3\", \"FORMAT\", \"EXPAND\", \"$.a\"});\n  ASSERT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n}\n\nTEST_F(JsonFamilyTest, DepthLimitExceeded) {\n  string deep_json =\n      R\"({\"jdiqr\":{\"nro\":{\"uzuf\":{\"bq\":{\"yc\":{\"zodmw\":{\"zbbq\":{\"sf\":{\"oule\":{\"j\":{\"mjsss\":{\"tap\":{\"bh\":{\"f\":{\"zlwgu\":{\"s\":{\"kt\":{\"fnmo\":{\"hub\":{\"xj\":{\"jo\":{\"ofara\":{\"kx\":{\"uw\":{\"z\":{\"mwvk\":{\"jo\":{\"qqz\":{\"b\":{\"tbp\":{\"esx\":{\"g\":{\"p\":{\"tpzk\":{\"i\":{\"azq\":{\"ttcd\":{\"wl\":{\"zo\":{\"l\":{\"nsq\":{\"tulso\":{\"uk\":{\"imfzw\":{\"vlub\":{\"k\":{\"ypml\":{\"voack\":{\"sosd\":{\"f\":{\"x\":{\"usv\":{\"hnw\":{\"ax\":{\"e\":{\"ozi\":{\"doi\":{\"k\":{\"bz\":{\"vxhp\":{\"e\":{\"vnpv\":{\"rhs\":{\"j\":{\"esp\":{\"f\":{\"ykyvy\":{\"xvmhg\":{\"eks\":{\"oijy\":{\"sjk\":{\"a\":{\"sejgy\":{\"msd\":{\"acyo\":{\"yxss\":{\"slbf\":{\"ssuns\":{\"c\":{\"kv\":{\"i\":{\"y\":{\"ubqz\":{\"uam\":{\"igaq\":{\"jl\":{\"vy\":{\"zlu\":{\"gscx\":{\"mb\":{\"idca\":{\"k\":{\"twx\":{\"ngjs\":{\"k\":{\"xcx\":{\"sxc\":{\"ye\":{\"fty\":{\"pho\":{\"lrn\":{\"wmv\":{\"h\":{\"sfuk\":{\"ilwzy\":{\"nlofv\":{\"mpcms\":{\"bg\":{\"jykgm\":{\"x\":{\"nbe\":{\"ixbyh\":{\"tmus\":{\"nqulr\":{\"cqxdw\":{\"wwpi\":{\"kj\":{\"udb\":{\"oct\":{\"tqkv\":{\"r\":{\"zev\":{\"rsu\":{\"gs\":{\"pyzm\":{\"au\":{\"__leaf\":42}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}})\";\n\n  auto resp = Run({\"JSON.SET\", \"test\", \"$\", deep_json});\n  ASSERT_THAT(resp, ErrArg(\"ERR failed to parse JSON\"));\n}\n\nTEST_F(JsonFamilyTest, JsonCommandsWorkingWithOtherTypesBug) {\n  std::string_view wrong_type_err{kWrongTypeErr};\n  wrong_type_err.remove_prefix(1);  // Remove the leading - character\n\n  auto resp = Run({\"HSET\", \"k1\", \"field\", \"value\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // First bug: JSON.SET should return an error\n  resp = Run({\"JSON.SET\", \"k1\", \"$\", R\"({\"a\":\"b\"})\"});\n  ASSERT_THAT(resp, ErrArg(wrong_type_err));\n\n  // Second bug: JSON.DEL should not delete the hash\n  resp = Run({\"HSET\", \"k2\", \"field\", \"value\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"JSON.DEL\", \"k2\"});\n  ASSERT_THAT(resp, ErrArg(wrong_type_err));\n\n  resp = Run({\"HGET\", \"k2\", \"field\"});\n  EXPECT_THAT(resp, \"value\");\n}\n\nTEST_F(JsonFamilyTest, ResetStringKeyWithSetGet) {\n  auto resp = Run({\"JSON.SET\", \"key\", \"$\", R\"({\"a\":\"b\"})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"key\"});\n  EXPECT_THAT(resp, R\"({\"a\":\"b\"})\");\n\n  // Resetting the key with a string value\n  resp = Run({\"SET\", \"key\", R\"({\"a\":\"b\"})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"GET\", \"key\"});\n  EXPECT_THAT(resp, R\"({\"a\":\"b\"})\");\n\n  // JSON.GET should still work after resetting the key with a string value\n  resp = Run({\"JSON.GET\", \"key\"});\n  EXPECT_THAT(resp, R\"({\"a\":\"b\"})\");\n\n  // Resetting the key again with JSON.SET\n  // This should not cause any issues\n  resp = Run({\"JSON.SET\", \"key\", \"$\", R\"({\"a\":\"b\"})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"key\"});\n  EXPECT_THAT(resp, R\"({\"a\":\"b\"})\");\n}\n\nTEST_F(JsonFamilyTest, DelNonExistingKey) {\n  auto resp = Run({\"EXISTS\", \"nonexisting_key\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"JSON.DEL\", \"nonexisting_key\", \".\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"JSON.DEL\", \"nonexisting_key\", \"$\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"JSON.DEL\", \"nonexisting_key\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(JsonFamilyTest, JsonKeysWithDots) {\n  auto resp = Run(\n      {\"JSON.SET\", \"OFFERS:DBX-AGG1611-IGN\", \"$\",\n       R\"({\"Gallery\": {\"Images\": {\"bdz1xjm.jpeg\": \"some_value\", \"bdz1xjm\": \"another_value\"}}})\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"OFFERS:DBX-AGG1611-IGN\", \"$['Gallery']['Images']['bdz1xjm']\"});\n  EXPECT_THAT(resp, \"[\\\"another_value\\\"]\");\n\n  resp = Run({\"JSON.GET\", \"OFFERS:DBX-AGG1611-IGN\", \"$['Gallery']['Images']['bdz1xjm.jpeg']\"});\n  EXPECT_THAT(resp, \"[\\\"some_value\\\"]\");\n}\n\nTEST_F(JsonFamilyTest, JsonSetDeleteExpiryOfExistingKey) {\n  auto resp = Run(\"SET key foo EX 1000\");\n  ASSERT_THAT(resp, \"OK\");\n  resp = Run(\"JSON.SET key $ {}\");\n  ASSERT_THAT(resp, \"OK\");\n  resp = Run(\"TTL key\");\n  ASSERT_THAT(resp, IntArg(-1));\n  resp = Run(\"EXPIRE key 100\");\n  ASSERT_THAT(resp, IntArg(1));\n  resp = Run(\"TTL key\");\n  EXPECT_THAT(resp.GetInt(), 100);\n}\n\nTEST_F(JsonFamilyTest, JsonIntPathTest) {\n  auto resp = Run(\n      R\"(JSON.SET test:images $ {\"images\":[{\"id\":1,\"sizes\":{\"1\":\"small.jpg\",\"10\":\"medium.jpg\",\"14\":\"large.jpg\",\"8\":\"thumb.jpg\"}}]})\");\n  ASSERT_THAT(resp, \"OK\");\n  resp = Run(R\"(JSON.GET test:images $.images[0].sizes.10)\");\n  EXPECT_THAT(resp, \"[\\\"medium.jpg\\\"]\");\n  resp = Run(R\"(JSON.GET test:images $.images[0].sizes[\"10\"])\");\n  EXPECT_THAT(resp, \"[\\\"medium.jpg\\\"]\");\n  resp = Run(R\"(JSON.GET test:images $.images[0].sizes['10'])\");\n  EXPECT_THAT(resp, \"[\\\"medium.jpg\\\"]\");\n  resp = Run(R\"(JSON.GET test:images $.images[0][\"sizes\"][\"10\"])\");\n  EXPECT_THAT(resp, \"[\\\"medium.jpg\\\"]\");\n  resp = Run(R\"(JSON.GET test:images $.images[0].sizes.8)\");\n  EXPECT_THAT(resp, \"[\\\"thumb.jpg\\\"]\");\n  resp = Run(R\"(JSON.GET test:images $.images[0].sizes.14)\");\n  EXPECT_THAT(resp, \"[\\\"large.jpg\\\"]\");\n  resp = Run(R\"(JSON.GET test:images $.images[0].sizes[\"8\"])\");\n  EXPECT_THAT(resp, \"[\\\"thumb.jpg\\\"]\");\n  resp = Run(R\"(JSON.GET test:images $.images[0].sizes[\"14\"])\");\n  EXPECT_THAT(resp, \"[\\\"large.jpg\\\"]\");\n}\n\nTEST_F(JsonFamilyTest, ARRLEN_RESP3NestedArrayBug) {\n  Run({\"HELLO\", \"3\"});\n\n  string json = R\"({\"a\":[1], \"b\":{\"a\":[1,2,3]}, \"c\":{\"x\":\"not_a\"}})\";\n  auto resp = Run({\"JSON.SET\", \"doc\", \".\", json});\n  ASSERT_THAT(resp, \"OK\");\n\n  // In RESP3 mode, this should return [1, 3] (direct integers)\n  // NOT [[1], [3]] (integers wrapped in arrays)\n  resp = Run({\"JSON.ARRLEN\", \"doc\", \"$..a\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_EQ(resp.GetVec().size(), 2);\n\n  // The bug: each element is wrapped in array when it shouldn't be\n  // Check that elements are NOT arrays themselves\n  EXPECT_THAT(resp.GetVec()[0], Not(ArgType(RespExpr::ARRAY)));  // Should be integer, not array\n  EXPECT_THAT(resp.GetVec()[1], Not(ArgType(RespExpr::ARRAY)));  // Should be integer, not array\n\n  // Verify the actual values\n  EXPECT_THAT(resp.GetVec()[0], IntArg(1));\n  EXPECT_THAT(resp.GetVec()[1], IntArg(3));\n}\n\nTEST_F(JsonFamilyTest, ARRAPPEND_RESP3NestedArrayBug) {\n  Run({\"HELLO\", \"3\"});\n\n  auto resp = Run({\"JSON.SET\", \"doc\", \".\", R\"({\"a\":[1], \"b\":{\"a\":[1,2,3]}})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRAPPEND\", \"doc\", \"$..a\", \"2\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_EQ(resp.GetVec().size(), 2);\n  EXPECT_THAT(resp.GetVec()[0], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[1], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[0], IntArg(2));\n  EXPECT_THAT(resp.GetVec()[1], IntArg(4));\n}\n\nTEST_F(JsonFamilyTest, ARRINDEX_RESP3NestedArrayBug) {\n  Run({\"HELLO\", \"3\"});\n\n  auto resp = Run({\"JSON.SET\", \"doc\", \".\", R\"({\"a\":[\"x\",\"y\"], \"b\":{\"a\":[\"y\",\"z\"]}})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRINDEX\", \"doc\", \"$..a\", R\"(\"y\")\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_EQ(resp.GetVec().size(), 2);\n  EXPECT_THAT(resp.GetVec()[0], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[1], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[0], IntArg(1));\n  EXPECT_THAT(resp.GetVec()[1], IntArg(0));\n}\n\nTEST_F(JsonFamilyTest, ARRPOP_RESP3NestedArrayBug) {\n  Run({\"HELLO\", \"3\"});\n\n  auto resp = Run({\"JSON.SET\", \"doc\", \".\", R\"({\"a\":[7], \"b\":{\"a\":[8]}})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRPOP\", \"doc\", \"$..a\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_EQ(resp.GetVec().size(), 2);\n  EXPECT_THAT(resp.GetVec()[0], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[1], Not(ArgType(RespExpr::ARRAY)));\n}\n\nTEST_F(JsonFamilyTest, ARRTRIM_RESP3NestedArrayBug) {\n  Run({\"HELLO\", \"3\"});\n\n  auto resp = Run({\"JSON.SET\", \"doc\", \".\", R\"({\"a\":[1,2], \"b\":{\"a\":[3,4,5]}})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.ARRTRIM\", \"doc\", \"$..a\", \"0\", \"0\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_EQ(resp.GetVec().size(), 2);\n  EXPECT_THAT(resp.GetVec()[0], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[1], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[0], IntArg(1));\n  EXPECT_THAT(resp.GetVec()[1], IntArg(1));\n}\n\nTEST_F(JsonFamilyTest, STRLEN_RESP3NestedArrayBug) {\n  Run({\"HELLO\", \"3\"});\n\n  auto resp = Run({\"JSON.SET\", \"doc\", \".\", R\"({\"s\":\"hi\", \"b\":{\"s\":\"abc\"}})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.STRLEN\", \"doc\", \"$..s\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_EQ(resp.GetVec().size(), 2);\n  EXPECT_THAT(resp.GetVec()[0], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[1], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[0], IntArg(2));\n  EXPECT_THAT(resp.GetVec()[1], IntArg(3));\n}\n\nTEST_F(JsonFamilyTest, OBJLEN_RESP3NestedArrayBug) {\n  Run({\"HELLO\", \"3\"});\n\n  auto resp = Run({\"JSON.SET\", \"doc\", \".\", R\"({\"o\":{\"k\":1}, \"b\":{\"o\":{\"k\":1,\"m\":2}}})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.OBJLEN\", \"doc\", \"$..o\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_EQ(resp.GetVec().size(), 2);\n  EXPECT_THAT(resp.GetVec()[0], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[1], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[0], IntArg(1));\n  EXPECT_THAT(resp.GetVec()[1], IntArg(2));\n}\n\nTEST_F(JsonFamilyTest, OBJKEYS_RESP3NestedArrayBug) {\n  Run({\"HELLO\", \"3\"});\n\n  auto resp = Run({\"JSON.SET\", \"doc\", \".\", R\"({\"o\":{\"k\":1}, \"b\":{\"o\":{\"k\":1,\"m\":2}}})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.OBJKEYS\", \"doc\", \"$..o\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_EQ(resp.GetVec().size(), 2);\n  // Each element should be array of keys, not array wrapped again\n  auto& el0 = resp.GetVec()[0];\n  auto& el1 = resp.GetVec()[1];\n  ASSERT_THAT(el0, ArgType(RespExpr::ARRAY));\n  ASSERT_THAT(el1, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(el0.GetVec(), ElementsAre(\"k\"));\n  // Order of keys in objects is not guaranteed, so check size only for the second\n  EXPECT_EQ(el1.GetVec().size(), 2);\n}\n\nTEST_F(JsonFamilyTest, STRAPPEND_RESP3NestedArrayBug) {\n  Run({\"HELLO\", \"3\"});\n\n  auto resp = Run({\"JSON.SET\", \"doc\", \".\", R\"({\"s\":\"a\", \"b\":{\"s\":\"zz\"}})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.STRAPPEND\", \"doc\", \"$..s\", R\"(\"b\")\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_EQ(resp.GetVec().size(), 2);\n  EXPECT_THAT(resp.GetVec()[0], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[1], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[0], IntArg(2));\n  EXPECT_THAT(resp.GetVec()[1], IntArg(3));\n}\n\nTEST_F(JsonFamilyTest, TOGGLE_RESP3NestedArrayBug) {\n  Run({\"HELLO\", \"3\"});\n\n  auto resp = Run({\"JSON.SET\", \"doc\", \".\", R\"({\"b\":true, \"x\":{\"b\":false}})\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.TOGGLE\", \"doc\", \"$..b\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_EQ(resp.GetVec().size(), 2);\n  EXPECT_THAT(resp.GetVec()[0], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[1], Not(ArgType(RespExpr::ARRAY)));\n  EXPECT_THAT(resp.GetVec()[0], IntArg(0));\n  EXPECT_THAT(resp.GetVec()[1], IntArg(1));\n}\n\nTEST_F(JsonFamilyTest, SetOverLargeStringKey) {\n  // Create a key with a large string value (must be heap-allocated, >16 bytes).\n  string large_value(16000, 'x');\n  Run({\"SET\", \"key\", large_value});\n\n  // Overwrite the string key with a small JSON using root path.\n  // Without the fix, freeing the old string inside SetJson caused a negative\n  // memory diff in JsonAutoUpdater::SetJsonSize while bytes_used was 0.\n  auto resp = Run({\"JSON.SET\", \"key\", \"$\", \"1\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.GET\", \"key\"});\n  EXPECT_EQ(resp, \"1\");\n}\n\nTEST_F(JsonFamilyTest, SetFullJsonInvalidOnNewKey) {\n  // Try to set invalid JSON on a non-existent key\n  auto resp = Run(\"JSON.SET newkey $ {invalid}\");\n  EXPECT_THAT(resp, ErrArg(\"failed to parse JSON\"));\n\n  // Verify the key was NOT created (proper cleanup)\n  resp = Run(\"EXISTS newkey\");\n  EXPECT_THAT(resp, IntArg(0));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/list_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\nextern \"C\" {\n#include \"redis/sds.h\"\n}\n\n#include <absl/functional/overload.h>\n#include <absl/strings/numbers.h>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/detail/listpack.h\"\n#include \"core/qlist.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/blocking_controller.h\"\n#include \"server/cluster/cluster_defs.h\"\n#include \"server/command_families.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/container_utils.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/family_utils.h\"\n#include \"server/namespaces.h\"\n#include \"server/transaction.h\"\n\n/**\n * The number of entries allowed per internal list node can be specified\n * as a fixed maximum size or a maximum number of elements.\n * For a fixed maximum size, use -5 through -1, meaning:\n * -5: max size: 64 Kb  <-- not recommended for normal workloads\n * -4: max size: 32 Kb  <-- not recommended\n * -3: max size: 16 Kb  <-- probably not recommended\n * -2: max size: 8 Kb   <-- good\n * -1: max size: 4 Kb   <-- good\n * Positive numbers mean store up to _exactly_ that number of elements\n * per list node.\n * The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size),\n * but if your use case is unique, adjust the settings as necessary.\n *\n */\nABSL_FLAG(int32_t, list_max_listpack_size, -2, \"Maximum listpack size, default is 8kb\");\n\n/**\n * Lists may also be compressed.\n * Compress depth is the number of quicklist listpack nodes from *each* side of\n * the list to *exclude* from compression.  The head and tail of the list\n * are always uncompressed for fast push/pop operations.  Settings are:\n * 0: disable all list compression\n * 1: depth 1 means \"don't start compressing until after 1 node into the list,\n *    going from either the head or tail\"\n *    So: [head]->node->node->...->node->[tail]\n *    [head], [tail] will always be uncompressed; inner nodes will compress.\n * 2: [head]->[next]->node->node->...->node->[prev]->[tail]\n *    2 here means: don't compress head or head->next or tail->prev or tail,\n *    but compress all nodes between them.\n * 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail]\n * etc.\n *\n */\n\nABSL_FLAG(int32_t, list_compress_depth, 0, \"Compress depth of the list. Default is no compression\");\nABSL_FLAG(unsigned, list_tiering_threshold, 0,\n          \"Tiering threshold for lists. Default - no tiering.\");\n\nnamespace dfly {\n\nusing namespace std;\n\nusing namespace facade;\nusing absl::GetFlag;\nusing absl::Overload;\nusing time_point = Transaction::time_point;\n\nnamespace {\n\nclass ListWrapper {\n  using LP = detail::ListPack;\n\n  std::variant<QList*, LP> impl_;\n\n  template <typename F> decltype(auto) VisitRef(F f) const {  // Cast T* to T&\n    return std::visit(Overload{[&f](auto* s) { return f(*s); }, f}, impl_);\n  }\n\n  template <typename F> decltype(auto) VisitMut(F f) {  // Cast T* to T&\n    return std::visit(Overload{[&f](auto* s) { return f(*s); }, f}, impl_);\n  }\n\n  static QList* PromoteToQLIfNeeded(LP lp, size_t additional_size) {\n    size_t sz = lp.BytesSize();\n    if (ShouldStoreAsListPack(sz + additional_size)) {\n      return nullptr;\n    }\n    QList* ql = CompactObj::AllocateMR<QList>(GetFlag(FLAGS_list_max_listpack_size),\n                                              GetFlag(FLAGS_list_compress_depth));\n    if (GetFlag(FLAGS_list_tiering_threshold) > 0) {\n      ql->SetTieringParams(\n          QList::TieringParams{.node_depth_threshold = GetFlag(FLAGS_list_tiering_threshold)});\n    }\n    if (lp.Size() > 0) {\n      ql->AppendListpack(lp.GetPointer());\n    }\n    return ql;\n  }\n\n  void PushInternal(string_view value, QList::Where where, QList& ql) {\n    ql.Push(value, where);\n  }\n\n  void PushInternal(string_view value, QList::Where where, LP& lp) {\n    if (QList* ql = PromoteToQLIfNeeded(lp, value.size()); ql) {\n      if (lp.Size() == 0) {  // otherwise we already appended it in PromoteToQLIfNeeded.\n        lpFree(lp.GetPointer());\n      }\n      ql->Push(value, where);\n      impl_ = ql;\n    } else {\n      lp.Push(value, where);\n    }\n  }\n\n  bool InsertInternal(string_view pivot, string_view elem, QList::InsertOpt insert_opt, QList& ql) {\n    return ql.Insert(pivot, elem, insert_opt);\n  }\n\n  bool InsertInternal(string_view pivot, string_view elem, QList::InsertOpt insert_opt, LP& lp) {\n    uint8_t* p = lp.Find(pivot);\n    if (!p)\n      return false;\n\n    if (QList* ql = PromoteToQLIfNeeded(lp, elem.size()); ql) {\n      DCHECK_GT(ql->Size(), 0u);  // otherwise we would not Find the pivot.\n      impl_ = ql;\n      return ql->Insert(pivot, elem, insert_opt);\n    }\n\n    lp.Insert(p, elem, insert_opt);\n    return true;\n  }\n\n  bool ReplaceInternal(long index, string_view elem, QList& ql) {\n    return ql.Replace(index, elem);\n  }\n\n  bool ReplaceInternal(long index, string_view elem, LP& lp) {\n    uint8_t* p = lp.Seek(index);\n    if (!p)\n      return false;\n\n    if (QList* ql = PromoteToQLIfNeeded(lp, elem.size()); ql) {\n      DCHECK_GT(ql->Size(), 0u);  // otherwise we would not seek\n      impl_ = ql;\n      return ql->Replace(index, elem);\n    }\n    lp.Replace(p, elem);\n    return true;\n  }\n\n public:\n  template <typename T> explicit ListWrapper(T t) : impl_(std::forward<T>(t)) {\n  }\n\n  size_t Size() const {\n    return VisitRef([](auto& list) { return list.Size(); });\n  }\n\n  string Pop(QList::Where where) {\n    return VisitMut([where](auto& list) { return list.Pop(where); });\n  }\n\n  void Push(string_view value, QList::Where where) {\n    VisitMut([&](auto& list) { PushInternal(value, where, list); });\n  }\n\n  string First(QList::Where where) const {\n    return visit(Overload{[&](QList* ql) {\n                            auto it = ql->GetIterator(where);\n                            CHECK(it.Valid());\n                            return it.Get().to_string();\n                          },\n                          [&](const LP& lp) { return lp.First(where); }},\n                 impl_);\n  }\n\n  std::optional<string> At(long index) const {\n    return visit(Overload{[&](QList* ql) -> optional<string> {\n                            auto it = ql->GetIterator(index);\n                            if (!it.Valid())\n                              return nullopt;\n                            return it.Get().to_string();\n                          },\n                          [&](const LP& lp) { return lp.At(index); }},\n                 impl_);\n  }\n\n  vector<uint32_t> Pos(string_view element, uint32_t rank, uint32_t count, uint32_t max_len,\n                       QList::Where where) const;\n\n  bool Insert(string_view pivot, string_view elem, QList::InsertOpt insert_opt) {\n    return VisitMut([&](auto& list) { return InsertInternal(pivot, elem, insert_opt, list); });\n  }\n\n  unsigned Remove(string_view elem, unsigned count, QList::Where where);\n\n  bool Replace(long index, string_view elem) {\n    return VisitMut([&](auto& list) { return ReplaceInternal(index, elem, list); });\n  }\n\n  void Erase(long start, long count) {\n    VisitMut([&](auto& list) { list.Erase(start, count); });\n  }\n\n  void Launder(PrimeValue* pv) {\n    if (auto* lp = std::get_if<LP>(&impl_)) {\n      pv->SetRObjPtr(lp->GetPointer());\n    } else if (pv->Encoding() != kEncodingQL2) {\n      // We promoted to QList but the PrimeValue is not updated.\n      pv->SetRObjPtr(nullptr);\n      auto* ql = std::get<QList*>(impl_);\n      pv->InitRobj(OBJ_LIST, kEncodingQL2, ql);\n    }\n  }\n};\n\nvector<uint32_t> ListWrapper::Pos(string_view element, uint32_t rank, uint32_t count,\n                                  uint32_t max_len, QList::Where where) const {\n  DCHECK_GT(rank, 0u);\n\n  if (auto* lp = std::get_if<LP>(&impl_)) {\n    return lp->Pos(element, rank, count, max_len, where);\n  }\n\n  vector<uint32_t> matches;\n\n  auto* ql = std::get<QList*>(impl_);\n  auto it = ql->GetIterator(where);\n  if (!it.Valid())\n    return matches;\n\n  unsigned index = 0;\n  while (max_len == 0 || index < max_len) {\n    if (it.Get() == element) {\n      if (rank == 1) {\n        auto k = (where == QList::HEAD) ? index : ql->Size() - index - 1;\n        matches.push_back(k);\n        if (count && matches.size() >= count)\n          break;\n      } else {\n        rank--;\n      }\n    }\n    index++;\n    if (!it.Next())\n      break;\n  }\n  return matches;\n}\n\nunsigned ListWrapper::Remove(string_view elem, unsigned count, QList::Where where) {\n  // try parsing the element into an integer.\n  int64_t ival;\n  int is_int = lpStringToInt64(elem.data(), elem.size(), &ival);\n  CollectionEntry collection_elem(elem.data(), elem.size());\n  if (is_int) {\n    collection_elem = CollectionEntry{ival};\n  }\n\n  if (auto* lp = std::get_if<LP>(&impl_)) {\n    return lp->Remove(collection_elem, count, where);\n  }\n\n  auto* ql = std::get<QList*>(impl_);\n  auto it = ql->GetIterator(where);\n  auto is_match = [&](const QList::Entry& entry) {\n    return is_int ? entry.is_int() && entry.ival() == ival : entry == elem;\n  };\n\n  unsigned removed = 0;\n  while (it.Valid()) {\n    QList::Entry entry = it.Get();\n    if (is_match(entry)) {\n      it = ql->Erase(it);\n      removed++;\n      if (count && removed == count)\n        break;\n    } else {\n      it.Next();\n    }\n  }\n  return removed;\n}\n\nListWrapper GetLW(const PrimeValue& mv) {\n  if (mv.Encoding() == kEncodingQL2) {\n    return ListWrapper{static_cast<QList*>(mv.RObjPtr())};\n  }\n  return ListWrapper{detail::ListPack(static_cast<uint8_t*>(mv.RObjPtr()))};\n}\n\nenum class ListDir : uint8_t { LEFT, RIGHT };\n\nQList::Where ToWhere(ListDir dir) {\n  return dir == ListDir::LEFT ? QList::HEAD : QList::TAIL;\n}\n\nListDir ParseDir(facade::CmdArgParser* parser) {\n  return parser->MapNext(\"LEFT\", ListDir::LEFT, \"RIGHT\", ListDir::RIGHT);\n}\n\nclass BPopPusher {\n public:\n  BPopPusher(string_view pop_key, string_view push_key, ListDir popdir, ListDir pushdir);\n\n  // Returns WRONG_TYPE, OK.\n  // If OK is returned then use result() to fetch the value.\n  OpResult<string> Run(unsigned limit_ms, Transaction* tx, ConnectionContext* cntx);\n\n private:\n  OpResult<string> RunSingle(time_point tp, Transaction* tx, ConnectionContext* cntx);\n  OpResult<string> RunPair(time_point tp, Transaction* tx, ConnectionContext* cntx);\n\n  string_view pop_key_, push_key_;\n  ListDir popdir_, pushdir_;\n};\n\n// Called as a callback from BPopGeneric after we've determined which key to pop.\nstd::string OpBPop(Transaction* t, EngineShard* shard, std::string_view key, ListDir dir) {\n  DVLOG(2) << \"popping from \" << key << \" \" << t->DebugId();\n\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  auto it_res = db_slice.FindMutable(t->GetDbContext(), key, OBJ_LIST);\n\n  CHECK(it_res) << t->DebugId() << \" \" << key;  // must exist and must be ok.\n\n  auto it = it_res->it;\n  std::string value;\n  size_t len;\n\n  ListWrapper lw = GetLW(it->second);\n  QList::Where where = ToWhere(dir);\n  value = lw.Pop(where);\n  lw.Launder(&it->second);\n  len = lw.Size();\n\n  it_res->post_updater.Run();\n\n  OpArgs op_args = t->GetOpArgs(shard);\n  if (len == 0) {\n    DVLOG(1) << \"deleting key \" << key << \" \" << t->DebugId();\n    op_args.GetDbSlice().Del(op_args.db_cntx, it);\n  }\n\n  if (op_args.shard->journal()) {\n    string command = dir == ListDir::LEFT ? \"LPOP\" : \"RPOP\";\n    RecordJournal(op_args, command, ArgSlice{key}, 1);\n  }\n\n  return value;\n}\n\nListWrapper CreateOrGet(const OpArgs& op_args, string_view key, bool create, PrimeValue* pv) {\n  if (create) {\n    auto blocking_controller = op_args.db_cntx.ns->GetBlockingController(op_args.shard->shard_id());\n    if (blocking_controller) {\n      blocking_controller->Awaken(op_args.db_cntx.db_index, key);\n    }\n\n    uint8_t* lp = lpNew(0);\n    pv->InitRobj(OBJ_LIST, kEncodingListPack, lp);\n    return ListWrapper{detail::ListPack(lp)};\n  }\n\n  return GetLW(*pv);\n}\n\nOpResult<string> OpMoveSingleShard(const OpArgs& op_args, string_view src, string_view dest,\n                                   ListDir src_dir, ListDir dest_dir) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto src_res = db_slice.FindMutable(op_args.db_cntx, src, OBJ_LIST);\n  if (!src_res)\n    return src_res.status();\n\n  auto src_it = src_res->it;\n  string val;\n  ListWrapper srcql_v2 = GetLW(src_it->second);\n  size_t prev_len = srcql_v2.Size();\n\n  if (src == dest) {  // simple case.\n    val = srcql_v2.Pop(ToWhere(src_dir));\n    srcql_v2.Push(val, ToWhere(dest_dir));\n    srcql_v2.Launder(&src_it->second);\n    return val;\n  }\n\n  src_res->post_updater.Run();\n\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, dest, OBJ_LIST);\n  RETURN_ON_BAD_STATUS(op_res);\n  auto& dest_res = *op_res;\n\n  // Insertion of dest could invalidate src_it. Find it again.\n  src_res = db_slice.FindMutable(op_args.db_cntx, src, OBJ_LIST);\n  src_it = src_res->it;\n\n  ListWrapper dest_lw = CreateOrGet(op_args, dest, dest_res.is_new, &dest_res.it->second);\n\n  val = srcql_v2.Pop(ToWhere(src_dir));\n  srcql_v2.Launder(&src_it->second);\n\n  dest_lw.Push(val, ToWhere(dest_dir));\n  dest_lw.Launder(&dest_res.it->second);\n\n  src_res->post_updater.Run();\n  dest_res.post_updater.Run();\n\n  if (prev_len == 1) {\n    db_slice.Del(op_args.db_cntx, src_it);\n  }\n\n  return val;\n}\n\n// Read-only peek operation that determines whether the list exists and optionally\n// returns the first from left/right value without popping it from the list.\nOpResult<string> Peek(const OpArgs& op_args, string_view key, ListDir dir, bool fetch) {\n  auto it_res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_LIST);\n  if (!it_res) {\n    return it_res.status();\n  }\n\n  if (!fetch)\n    return OpStatus::OK;\n\n  const PrimeValue& pv = it_res.value()->second;\n  DCHECK_GT(pv.Size(), 0u);  // should be not-empty.\n\n  ListWrapper lw = GetLW(pv);\n  return lw.First(ToWhere(dir));\n}\n\nOpResult<uint32_t> OpPush(const OpArgs& op_args, std::string_view key, ListDir dir,\n                          bool skip_notexist, const facade::ArgRange& vals, bool journal_rewrite) {\n  DbSlice::ItAndUpdater res;\n\n  if (skip_notexist) {\n    auto tmp_res = op_args.GetDbSlice().FindMutable(op_args.db_cntx, key, OBJ_LIST);\n    if (tmp_res == OpStatus::KEY_NOTFOUND)\n      return 0;  // Redis returns 0 for nonexisting keys for the *PUSHX actions.\n    RETURN_ON_BAD_STATUS(tmp_res);\n    res = std::move(*tmp_res);\n  } else {\n    auto op_res = op_args.GetDbSlice().AddOrFind(op_args.db_cntx, key, OBJ_LIST);\n    RETURN_ON_BAD_STATUS(op_res);\n    res = std::move(*op_res);\n  }\n\n  size_t len = 0;\n  DVLOG(1) << \"OpPush \" << key << \" new_key \" << res.is_new;\n  ListWrapper lw = CreateOrGet(op_args, key, res.is_new, &res.it->second);\n\n  QList::Where where = ToWhere(dir);\n  for (string_view v : vals) {\n    lw.Push(v, where);\n  }\n  lw.Launder(&res.it->second);\n  len = lw.Size();\n\n  if (journal_rewrite && op_args.shard->journal()) {\n    string command = dir == ListDir::LEFT ? \"LPUSH\" : \"RPUSH\";\n    vector<string_view> mapped(vals.Size() + 1);\n    mapped[0] = key;\n    std::copy(vals.begin(), vals.end(), mapped.begin() + 1);\n    RecordJournal(op_args, command, mapped, 2);\n  }\n\n  return len;\n}\n\nOpResult<StringVec> OpPop(const OpArgs& op_args, string_view key, ListDir dir, uint32_t count,\n                          bool return_results, bool journal_rewrite) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto it_res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_LIST);\n  if (!it_res)\n    return it_res.status();\n\n  if (count == 0)\n    return StringVec{};\n\n  auto it = it_res->it;\n  size_t prev_len = 0;\n  StringVec res;\n\n  ListWrapper lw = GetLW(it->second);\n  prev_len = lw.Size();\n\n  if (prev_len < count) {\n    count = prev_len;\n  }\n\n  if (return_results) {\n    res.reserve(count);\n  }\n\n  QList::Where where = ToWhere(dir);\n  for (unsigned i = 0; i < count; ++i) {\n    string val = lw.Pop(where);\n    if (return_results) {\n      res.push_back(std::move(val));\n    }\n  }\n  lw.Launder(&it->second);\n\n  it_res->post_updater.Run();\n\n  if (count == prev_len) {\n    db_slice.Del(op_args.db_cntx, it);\n  }\n\n  if (op_args.shard->journal() && journal_rewrite) {\n    string command = dir == ListDir::LEFT ? \"LPOP\" : \"RPOP\";\n    RecordJournal(op_args, command, ArgSlice{key}, 2);\n  }\n  return res;\n}\n\nOpResult<string> MoveTwoShards(Transaction* trans, string_view src, string_view dest,\n                               ListDir src_dir, ListDir dest_dir, bool conclude_on_error) {\n  DCHECK_EQ(2u, trans->GetUniqueShardCnt());\n\n  OpResult<string> find_res[2];\n  OpResult<string> result;\n\n  // Transaction is comprised of 2 hops:\n  // 1 - check for entries existence, their types and if possible -\n  //     read the value we may move from the source list.\n  // 2.  If everything is ok, pop from source and push the peeked value into\n  //     the destination.\n  //\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    auto args = t->GetShardArgs(shard->shard_id());\n    DCHECK_EQ(1u, args.Size());\n    bool is_dest = args.Front() == dest;\n    find_res[is_dest] = Peek(t->GetOpArgs(shard), args.Front(), src_dir, !is_dest);\n    return OpStatus::OK;\n  };\n\n  trans->Execute(std::move(cb), false);\n\n  if (!find_res[0] || find_res[1].status() == OpStatus::WRONG_TYPE) {\n    result = find_res[0] ? find_res[1] : find_res[0];\n    if (conclude_on_error)\n      trans->Conclude();\n  } else {\n    // Everything is ok, lets proceed with the mutations.\n    auto cb = [&](Transaction* t, EngineShard* shard) {\n      auto args = t->GetShardArgs(shard->shard_id());\n      auto key = args.Front();\n      bool is_dest = (key == dest);\n      OpArgs op_args = t->GetOpArgs(shard);\n\n      if (is_dest) {\n        string_view val{find_res[0].value()};\n        DVLOG(1) << \"Pushing value: \" << val << \" to list: \" << dest;\n\n        OpPush(op_args, key, dest_dir, false, ArgSlice{val}, true);\n\n        // blocking_controller does not have to be set with non-blocking transactions.\n        auto blocking_controller = t->GetNamespace().GetBlockingController(shard->shard_id());\n        if (blocking_controller) {\n          IndexSlice slice(0, 1);\n          ShardArgs sa{absl::MakeSpan(&src, 1), absl::MakeSpan(&slice, 1)};\n\n          // hack, again. since we hacked which queue we are waiting on (see RunPair)\n          // we must clean-up src key here manually. See RunPair why we do this.\n          // in short- we suspended on \"src\" on both shards.\n          blocking_controller->RemovedWatched(sa, t);\n        }\n      } else {\n        DVLOG(1) << \"Popping value from list: \" << key;\n        OpPop(op_args, key, src_dir, 1, false, true);\n      }\n\n      return OpStatus::OK;\n    };\n    trans->Execute(std::move(cb), true);\n    result = std::move(find_res[0].value());\n  }\n\n  return result;\n}\n\nOpResult<uint32_t> OpLen(const OpArgs& op_args, std::string_view key) {\n  auto res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_LIST);\n  if (!res)\n    return res.status();\n\n  ListWrapper lw = GetLW(res.value()->second);\n  return lw.Size();\n}\n\nOpResult<string> OpIndex(const OpArgs& op_args, std::string_view key, long index) {\n  auto res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_LIST);\n  if (!res)\n    return res.status();\n\n  ListWrapper lw = GetLW(res.value()->second);\n  optional elem = lw.At(index);\n  if (!elem)\n    return OpStatus::KEY_NOTFOUND;\n  return std::move(*elem);\n}\n\nOpResult<vector<uint32_t>> OpPos(const OpArgs& op_args, string_view key, string_view element,\n                                 int rank, uint32_t count, uint32_t max_len) {\n  DCHECK(key.data() && element.data());\n  DCHECK_NE(rank, 0);\n\n  auto it_res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_LIST);\n  if (!it_res.ok())\n    return it_res.status();\n\n  const PrimeValue& pv = (*it_res)->second;\n  ListWrapper lw = GetLW(pv);\n\n  QList::Where where = QList::HEAD;\n  if (rank < 0) {\n    rank = -rank;\n    where = QList::TAIL;\n  }\n\n  return lw.Pos(element, rank, count, max_len, where);\n}\n\nOpResult<int> OpInsert(const OpArgs& op_args, string_view key, string_view pivot, string_view elem,\n                       QList::InsertOpt insert_opt) {\n  DCHECK(key.data() && pivot.data() && elem.data());\n\n  auto& db_slice = op_args.GetDbSlice();\n  auto it_res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_LIST);\n  if (!it_res)\n    return it_res.status();\n\n  ListWrapper lw = GetLW(it_res->it->second);\n\n  int res = -1;\n\n  if (lw.Insert(pivot, elem, insert_opt)) {\n    lw.Launder(&it_res->it->second);\n    res = int(lw.Size());\n  }\n\n  return res;\n}\n\nOpResult<uint32_t> OpRem(const OpArgs& op_args, string_view key, string_view elem, long count) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto it_res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_LIST);\n  if (!it_res)\n    return it_res.status();\n\n  ListWrapper lw = GetLW(it_res->it->second);\n\n  QList::Where where = QList::HEAD;\n  if (count < 0) {\n    count = -count;\n    where = QList::TAIL;\n  }\n\n  unsigned removed = lw.Remove(elem, count, where);\n  size_t len = lw.Size();\n  lw.Launder(&it_res->it->second);\n  it_res->post_updater.Run();\n\n  if (len == 0) {\n    db_slice.Del(op_args.db_cntx, it_res->it);\n  }\n\n  return removed;\n}\n\nOpStatus OpSet(const OpArgs& op_args, string_view key, string_view elem, long index) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto it_res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_LIST);\n  if (!it_res)\n    return it_res.status();\n\n  ListWrapper lw = GetLW(it_res->it->second);\n  OpStatus status = OpStatus::OUT_OF_RANGE;\n  if (lw.Replace(index, elem)) {\n    lw.Launder(&it_res->it->second);\n    status = OpStatus::OK;\n  }\n  return status;\n}\n\nOpStatus OpTrim(const OpArgs& op_args, string_view key, long start, long end) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto it_res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_LIST);\n  if (!it_res)\n    return it_res.status();\n\n  auto it = it_res->it;\n\n  long llen = long(it->second.Size());\n\n  /* convert negative indexes */\n  if (start < 0)\n    start = llen + start;\n  if (end < 0)\n    end = llen + end;\n  if (start < 0)\n    start = 0;\n\n  long ltrim, rtrim;\n\n  /* Invariant: start >= 0, so this test will be true when end < 0.\n   * The range is empty when start > end or start >= length. */\n  if (start > end || start >= llen) {\n    /* Out of range start or start > end result in empty list */\n    ltrim = llen;\n    rtrim = 0;\n  } else {\n    if (end >= llen)\n      end = llen - 1;\n    ltrim = start;\n    rtrim = llen - end - 1;\n  }\n\n  ListWrapper lw = GetLW(it->second);\n  lw.Erase(0, ltrim);\n  lw.Erase(-rtrim, rtrim);\n  lw.Launder(&it->second);\n\n  it_res->post_updater.Run();\n\n  if (it->second.Size() == 0) {\n    db_slice.Del(op_args.db_cntx, it);\n  }\n  return OpStatus::OK;\n}\n\nOpResult<StringVec> OpRange(const OpArgs& op_args, std::string_view key, long start, long end) {\n  auto res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_LIST);\n  if (!res)\n    return res.status();\n\n  const PrimeValue& pv = (*res)->second;\n  long llen = pv.Size();\n\n  /* convert negative indexes */\n  if (start < 0)\n    start = llen + start;\n  if (end < 0)\n    end = llen + end;\n  if (start < 0)\n    start = 0;\n\n  /* Invariant: start >= 0, so this test will be true when end < 0.\n   * The range is empty when start > end or start >= length. */\n  if (start > end || start >= llen) {\n    /* Out of range start or start > end result in empty list */\n    return StringVec{};\n  }\n\n  StringVec str_vec;\n  container_utils::IterateList(\n      pv,\n      [&str_vec](container_utils::ContainerEntry ce) {\n        str_vec.emplace_back(ce.ToString());\n        return true;\n      },\n      start, end);\n  return str_vec;\n}\n\nvoid MoveGeneric(string_view src, string_view dest, ListDir src_dir, ListDir dest_dir,\n                 Transaction* tx, SinkReplyBuilder* builder) {\n  OpResult<string> result;\n\n  if (tx->GetUniqueShardCnt() == 1) {\n    auto cb = [&](Transaction* t, EngineShard* shard) {\n      OpArgs op_args = t->GetOpArgs(shard);\n      auto op_res = OpMoveSingleShard(op_args, src, dest, src_dir, dest_dir);\n      if (op_res) {\n        if (op_args.shard->journal()) {\n          std::string_view cmd = src_dir == ListDir::LEFT ? \"LPOP\" : \"RPOP\";\n          RecordJournal(op_args, cmd, ArgSlice{src}, 1);\n          cmd = dest_dir == ListDir::LEFT ? \"LPUSH\" : \"RPUSH\";\n          RecordJournal(op_args, cmd, ArgSlice{dest, op_res.value()}, 1);\n        }\n      }\n      return op_res;\n    };\n    result = tx->ScheduleSingleHopT(std::move(cb));\n  } else {\n    result = MoveTwoShards(tx, src, dest, src_dir, dest_dir, true);\n  }\n\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  if (result) {\n    return rb->SendBulkString(*result);\n  }\n\n  switch (result.status()) {\n    case OpStatus::KEY_NOTFOUND:\n      rb->SendNull();\n      break;\n\n    default:\n      builder->SendError(result.status());\n      break;\n  }\n}\n\nvoid RPopLPush(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view src = ArgS(args, 0);\n  string_view dest = ArgS(args, 1);\n\n  MoveGeneric(src, dest, ListDir::RIGHT, ListDir::LEFT, cmd_cntx->tx(), cmd_cntx->rb());\n}\n\nvoid BRPopLPush(CmdArgList args, CommandContext* cmd_cntx) {\n  facade::CmdArgParser parser{args};\n  auto [src, dest] = parser.Next<string_view, string_view>();\n  float timeout = parser.Next<float>();\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (auto err = parser.TakeError(); err)\n    return cmd_cntx->SendError(err.MakeReply());\n\n  if (timeout < 0)\n    return cmd_cntx->SendError(\"timeout is negative\");\n\n  BPopPusher bpop_pusher(src, dest, ListDir::RIGHT, ListDir::LEFT);\n  OpResult<string> op_res =\n      bpop_pusher.Run(unsigned(timeout * 1000), cmd_cntx->tx(), cmd_cntx->server_conn_cntx());\n\n  if (op_res) {\n    return builder->SendBulkString(*op_res);\n  }\n\n  switch (op_res.status()) {\n    case OpStatus::CANCELLED:\n    case OpStatus::TIMED_OUT:\n      return builder->SendNull();\n      break;\n\n    default:\n      return builder->SendError(op_res.status());\n      break;\n  }\n}\n\nvoid BLMove(CmdArgList args, CommandContext* cmd_cntx) {\n  facade::CmdArgParser parser{args};\n  auto [src, dest] = parser.Next<string_view, string_view>();\n  ListDir src_dir = ParseDir(&parser);\n  ListDir dest_dir = ParseDir(&parser);\n  float timeout = parser.Next<float>();\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (auto err = parser.TakeError(); err)\n    return cmd_cntx->SendError(err.MakeReply());\n\n  if (timeout < 0)\n    return cmd_cntx->SendError(\"timeout is negative\");\n\n  BPopPusher bpop_pusher(src, dest, src_dir, dest_dir);\n  OpResult<string> op_res =\n      bpop_pusher.Run(unsigned(timeout * 1000), cmd_cntx->tx(), cmd_cntx->server_conn_cntx());\n\n  if (op_res) {\n    return builder->SendBulkString(*op_res);\n  }\n\n  switch (op_res.status()) {\n    case OpStatus::CANCELLED:\n    case OpStatus::TIMED_OUT:\n      return builder->SendNull();\n      break;\n\n    default:\n      return builder->SendError(op_res.status());\n      break;\n  }\n}\n\nBPopPusher::BPopPusher(string_view pop_key, string_view push_key, ListDir popdir, ListDir pushdir)\n    : pop_key_(pop_key), push_key_(push_key), popdir_(popdir), pushdir_(pushdir) {\n}\n\nOpResult<string> BPopPusher::Run(unsigned limit_ms, Transaction* tx, ConnectionContext* cntx) {\n  time_point tp =\n      limit_ms ? chrono::steady_clock::now() + chrono::milliseconds(limit_ms) : time_point::max();\n\n  if (tx->GetUniqueShardCnt() == 1) {\n    return RunSingle(tp, tx, cntx);\n  }\n\n  return RunPair(tp, tx, cntx);\n}\n\nOpResult<string> BPopPusher::RunSingle(time_point tp, Transaction* tx, ConnectionContext* cntx) {\n  OpResult<string> op_res;\n  bool is_multi = tx->IsMulti();\n  auto cb_move = [&](Transaction* t, EngineShard* shard) {\n    OpArgs op_args = t->GetOpArgs(shard);\n    op_res = OpMoveSingleShard(op_args, pop_key_, push_key_, popdir_, pushdir_);\n    if (op_res) {\n      if (op_args.shard->journal()) {\n        std::string_view cmd = popdir_ == ListDir::LEFT ? \"LPOP\" : \"RPOP\";\n        RecordJournal(op_args, cmd, ArgSlice{pop_key_}, 1);\n        cmd = pushdir_ == ListDir::LEFT ? \"LPUSH\" : \"RPUSH\";\n        RecordJournal(op_args, cmd, ArgSlice{push_key_, op_res.value()}, 1);\n      }\n    }\n    return OpStatus::OK;\n  };\n  tx->Execute(cb_move, false);\n\n  if (is_multi || op_res.status() != OpStatus::KEY_NOTFOUND) {\n    if (op_res.status() == OpStatus::KEY_NOTFOUND) {\n      op_res = OpStatus::TIMED_OUT;\n    }\n    tx->Conclude();\n    return op_res;\n  }\n\n  const auto key_checker = [](EngineShard* owner, const DbContext& context, Transaction*,\n                              std::string_view key) -> bool {\n    return context.GetDbSlice(owner->shard_id()).FindReadOnly(context, key, OBJ_LIST).ok();\n  };\n\n  // Block\n  auto status = tx->WaitOnWatch(tp, pop_key_, key_checker, &(cntx->blocked), &(cntx->paused));\n  if (status != OpStatus::OK)\n    return status;\n\n  tx->Execute(cb_move, true);\n  return op_res;\n}\n\nOpResult<string> BPopPusher::RunPair(time_point tp, Transaction* tx, ConnectionContext* cntx) {\n  bool is_multi = tx->IsMulti();\n  OpResult<string> op_res = MoveTwoShards(tx, pop_key_, push_key_, popdir_, pushdir_, false);\n\n  if (is_multi || op_res.status() != OpStatus::KEY_NOTFOUND) {\n    if (op_res.status() == OpStatus::KEY_NOTFOUND) {\n      op_res = OpStatus::TIMED_OUT;\n    }\n    tx->Conclude();\n    return op_res;\n  }\n\n  const auto key_checker = [](EngineShard* owner, const DbContext& context, Transaction*,\n                              std::string_view key) -> bool {\n    return context.GetDbSlice(owner->shard_id()).FindReadOnly(context, key, OBJ_LIST).ok();\n  };\n\n  // a hack: we watch in both shards for pop_key but only in the source shard it's relevant.\n  // Therefore we follow the regular flow of watching the key but for the destination shard it\n  // will never be triggerred.\n  // This allows us to run Transaction::Execute on watched transactions in both shards.\n  if (auto status = tx->WaitOnWatch(tp, pop_key_, key_checker, &cntx->blocked, &cntx->paused);\n      status != OpStatus::OK)\n    return status;\n\n  return MoveTwoShards(tx, pop_key_, push_key_, popdir_, pushdir_, true);\n}\n\nvoid PushGeneric(ListDir dir, bool skip_notexists, CmdArgList args, CommandContext* cmd_cntx) {\n  std::string_view key = ArgS(args, 0);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpPush(t->GetOpArgs(shard), key, dir, skip_notexists, args.subspan(1), false);\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result) {\n    return cmd_cntx->SendLong(result.value());\n  }\n\n  return cmd_cntx->SendError(result.status());\n}\n\nvoid PopGeneric(ListDir dir, CmdArgList args, CommandContext* cmd_cntx) {\n  facade::CmdArgParser parser{args};\n  string_view key = parser.Next();\n\n  uint32_t count = 1;\n  bool return_arr = false;\n  if (parser.HasNext()) {\n    count = parser.Next<uint32_t>();\n    return_arr = true;\n  }\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpPop(t->GetOpArgs(shard), key, dir, count, true, false);\n  };\n\n  OpResult<StringVec> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  switch (result.status()) {\n    case OpStatus::KEY_NOTFOUND:\n      return rb->SendNull();\n    case OpStatus::WRONG_TYPE:\n      return cmd_cntx->SendError(kWrongTypeErr);\n    default:;\n  }\n\n  if (return_arr) {\n    rb->SendBulkStrArr(*result);\n  } else {\n    DCHECK_EQ(1u, result->size());\n    rb->SendBulkString(result->front());\n  }\n}\n\nvoid BPopGeneric(ListDir dir, CmdArgList args, CommandContext* cmd_cntx) {\n  DCHECK_GE(args.size(), 2u);\n\n  float timeout;\n  auto timeout_str = ArgS(args, args.size() - 1);\n  if (!absl::SimpleAtof(timeout_str, &timeout)) {\n    return cmd_cntx->SendError(\"timeout is not a float or out of range\");\n  }\n  if (timeout < 0) {\n    return cmd_cntx->SendError(\"timeout is negative\");\n  }\n  VLOG(1) << \"BPop timeout(\" << timeout << \")\";\n\n  std::string popped_value;\n  auto cb = [dir, &popped_value](Transaction* t, EngineShard* shard, std::string_view key) {\n    popped_value = OpBPop(t, shard, key, dir);\n  };\n\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  Transaction* tx = cmd_cntx->tx();\n  OpResult<string> popped_key = container_utils::RunCbOnFirstNonEmptyBlocking(\n      tx, OBJ_LIST, std::move(cb), unsigned(timeout * 1000), &cntx->blocked, &cntx->paused);\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (popped_key) {\n    DVLOG(1) << \"BPop \" << tx->DebugId() << \" popped from key \" << popped_key;  // key.\n    std::string_view str_arr[2] = {*popped_key, popped_value};\n    return rb->SendBulkStrArr(str_arr);\n  }\n\n  DVLOG(1) << \"result for \" << tx->DebugId() << \" is \" << popped_key.status();\n\n  switch (popped_key.status()) {\n    case OpStatus::WRONG_TYPE:\n      return cmd_cntx->SendError(kWrongTypeErr);\n    case OpStatus::CANCELLED:\n    case OpStatus::TIMED_OUT:\n      return rb->SendNullArray();\n    case OpStatus::KEY_MOVED: {\n      auto error = cluster::SlotOwnershipError(*tx->GetUniqueSlotId());\n      CHECK(!error.status.has_value() || error.status.value() != facade::OpStatus::OK);\n      return cmd_cntx->SendError(error);\n    }\n    default:\n      LOG(ERROR) << \"Unexpected error \" << popped_key.status();\n  }\n  return rb->SendNullArray();\n}\n\n// Returns the first non-empty key found in the shard arguments along with its type validity.\n// Returns a pair of (key, is_valid_type) where is_valid_type is true if the key exists\n// and has the correct type (LIST). If a wrong type is found, returns that key with false.\n// Returns nullopt if no suitable key is found.\noptional<pair<string_view, bool>> GetFirstNonEmptyKeyFound(EngineShard* shard, Transaction* t) {\n  ShardArgs keys = t->GetShardArgs(shard->shard_id());\n  DCHECK(!keys.Empty());\n\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  optional<pair<string_view, bool>> result;\n\n  for (string_view key : keys) {\n    auto res = db_slice.FindReadOnly(t->GetDbContext(), key, OBJ_LIST);\n    if (res) {\n      result = {key, true};\n      break;\n    }\n\n    // If the key is not found, check if it's a wrong type error\n    if (res.status() == OpStatus::WRONG_TYPE) {\n      result = {key, false};\n      break;\n    }\n  }\n\n  return result;\n}\n\nvoid CmdLMPop(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* response_builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  CmdArgParser parser{args};\n  parser.Skip(parser.Next<size_t>());  // skip numkeys and keys\n\n  ListDir dir = parser.MapNext(\"LEFT\", ListDir::LEFT, \"RIGHT\", ListDir::RIGHT);\n  size_t pop_count = 1;\n  if (parser.Check(\"COUNT\"))\n    pop_count = parser.Next<size_t>();\n\n  if (!parser.Finalize())\n    return cmd_cntx->SendError(parser.TakeError().MakeReply());\n\n  // Create a vector to store first found key for each shard\n  vector<optional<pair<string_view, bool>>> found_keys_per_shard(shard_set->size());\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    // Each shard writes results to its own space\n    found_keys_per_shard[shard->shard_id()] = GetFirstNonEmptyKeyFound(shard, t);\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(cb), false /* followed by another hop */);\n\n  // Find the first existing key from command arguments\n  optional<string_view> key_to_pop;\n  bool found_wrong_type = false;\n  size_t min_index = numeric_limits<size_t>::max();\n\n  // Iterate over each shard to find the key with the smallest index\n  for (ShardId sid = 0; sid < found_keys_per_shard.size(); ++sid) {\n    if (!found_keys_per_shard[sid])\n      continue;\n\n    const auto& [found_key, is_valid_type] = *found_keys_per_shard[sid];\n    ShardArgs shard_args = cmd_cntx->tx()->GetShardArgs(sid);\n\n    for (auto it = shard_args.begin(); it != shard_args.end(); ++it) {\n      if (found_key == *it && it.index() < min_index) {\n        min_index = it.index();\n        key_to_pop = found_key;\n        found_wrong_type = !is_valid_type;\n        break;\n      }\n    }\n  }\n\n  // Handle errors and empty cases first\n  if (!key_to_pop || found_wrong_type) {\n    cmd_cntx->tx()->Conclude();\n    if (found_wrong_type) {\n      response_builder->SendError(kWrongTypeErr);\n    } else {\n      response_builder->SendNull();\n    }\n    return;\n  }\n\n  // Pop values from the found key\n  optional<ShardId> key_shard = Shard(*key_to_pop, shard_set->size());\n  OpResult<StringVec> result;\n\n  auto cb_pop = [dir, pop_count, key_shard, &result, key = *key_to_pop](Transaction* t,\n                                                                        EngineShard* shard) {\n    if (*key_shard == shard->shard_id()) {\n      result = OpPop(t->GetOpArgs(shard), key, dir, pop_count, true, true);\n    }\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(cb_pop), true);\n\n  if (result) {\n    response_builder->StartArray(2);\n    response_builder->SendBulkString(*key_to_pop);\n    response_builder->SendBulkStrArr(*result);\n  } else {\n    response_builder->SendNull();\n  }\n}\n\nvoid CmdBLMPop(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* response_builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  CmdArgParser parser{args};\n  float timeout = parser.Next<float>();\n  if (auto err = parser.TakeError(); err)\n    return cmd_cntx->SendError(err.MakeReply());\n\n  if (timeout < 0)\n    return cmd_cntx->SendError(\"timeout is negative\");\n\n  parser.Skip(parser.Next<size_t>());  // Skip numkeys and keys\n  ListDir dir = parser.MapNext(\"LEFT\", ListDir::LEFT, \"RIGHT\", ListDir::RIGHT);\n\n  size_t pop_count = 1;\n  if (parser.Check(\"COUNT\"))\n    pop_count = parser.Next<size_t>();\n\n  if (!parser.Finalize())\n    return cmd_cntx->SendError(parser.TakeError().MakeReply());\n\n  OpResult<StringVec> result;\n  auto cb = [&](Transaction* t, EngineShard* shard, string_view key) {\n    result = OpPop(t->GetOpArgs(shard), key, dir, pop_count, true, true);\n    return result.status();\n  };\n\n  ConnectionContext* conn_cntx = cmd_cntx->server_conn_cntx();\n  OpResult<string> popped_key = container_utils::RunCbOnFirstNonEmptyBlocking(\n      cmd_cntx->tx(), OBJ_LIST, std::move(cb), unsigned(timeout * 1000), &conn_cntx->blocked,\n      &conn_cntx->paused);\n\n  if (popped_key.ok()) {\n    response_builder->StartArray(2);\n    response_builder->SendBulkString(*popped_key);\n    response_builder->SendBulkStrArr(*result);\n  } else {\n    response_builder->SendNull();\n  }\n}\n\nvoid CmdLPush(CmdArgList args, CommandContext* cmd_cntx) {\n  return PushGeneric(ListDir::LEFT, false, args, cmd_cntx);\n}\n\nvoid CmdLPushX(CmdArgList args, CommandContext* cmd_cntx) {\n  return PushGeneric(ListDir::LEFT, true, args, cmd_cntx);\n}\n\nvoid CmdLPop(CmdArgList args, CommandContext* cmd_cntx) {\n  return PopGeneric(ListDir::LEFT, args, cmd_cntx);\n}\n\nvoid CmdRPush(CmdArgList args, CommandContext* cmd_cntx) {\n  return PushGeneric(ListDir::RIGHT, false, args, cmd_cntx);\n}\n\nvoid CmdRPushX(CmdArgList args, CommandContext* cmd_cntx) {\n  return PushGeneric(ListDir::RIGHT, true, args, cmd_cntx);\n}\n\nvoid CmdRPop(CmdArgList args, CommandContext* cmd_cntx) {\n  return PopGeneric(ListDir::RIGHT, args, cmd_cntx);\n}\n\nvoid CmdLLen(CmdArgList args, CommandContext* cmd_cntx) {\n  auto key = ArgS(args, 0);\n  auto cb = [&](Transaction* t, EngineShard* shard) { return OpLen(t->GetOpArgs(shard), key); };\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result) {\n    cmd_cntx->SendLong(result.value());\n  } else if (result.status() == OpStatus::KEY_NOTFOUND) {\n    cmd_cntx->SendLong(0);\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid CmdLPos(CmdArgList args, CommandContext* cmd_cntx) {\n  facade::CmdArgParser parser{args};\n  auto [key, elem] = parser.Next<string_view, string_view>();\n\n  int rank = 1;\n  uint32_t count = 1;\n  uint32_t max_len = 0;\n  bool skip_count = true;\n\n  while (parser.HasNext()) {\n    if (parser.Check(\"RANK\")) {\n      rank = parser.Next<int>();\n      continue;\n    }\n\n    if (parser.Check(\"COUNT\")) {\n      count = parser.Next<uint32_t>();\n      skip_count = false;\n      continue;\n    }\n\n    if (parser.Check(\"MAXLEN\")) {\n      max_len = parser.Next<uint32_t>();\n      continue;\n    }\n\n    parser.Skip(1);\n  }\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (rank == 0)\n    return rb->SendError(kInvalidIntErr);\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  auto cb = [&, &key = key, &elem = elem](Transaction* t, EngineShard* shard) {\n    return OpPos(t->GetOpArgs(shard), key, elem, rank, count, max_len);\n  };\n\n  Transaction* trans = cmd_cntx->tx();\n  auto result = trans->ScheduleSingleHopT(std::move(cb));\n\n  if (result.status() == OpStatus::WRONG_TYPE) {\n    return rb->SendError(result.status());\n  } else if (result.status() == OpStatus::INVALID_VALUE) {\n    return rb->SendError(result.status());\n  }\n\n  if (skip_count) {\n    if (result->empty()) {\n      rb->SendNull();\n    } else {\n      rb->SendLong((*result)[0]);\n    }\n  } else {\n    rb->SendLongArr(absl::MakeConstSpan(result.value()));\n  }\n}\n\nvoid CmdLIndex(CmdArgList args, CommandContext* cmd_cntx) {\n  std::string_view key = ArgS(args, 0);\n  std::string_view index_str = ArgS(args, 1);\n  int32_t index;\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (!absl::SimpleAtoi(index_str, &index)) {\n    rb->SendError(kInvalidIntErr);\n    return;\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpIndex(t->GetOpArgs(shard), key, index);\n  };\n\n  OpResult<string> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result) {\n    rb->SendBulkString(result.value());\n  } else if (result.status() == OpStatus::WRONG_TYPE) {\n    rb->SendError(result.status());\n  } else {\n    rb->SendNull();\n  }\n}\n\n/* LINSERT <key> (BEFORE|AFTER) <pivot> <element> */\nvoid CmdLInsert(CmdArgList args, CommandContext* cmd_cntx) {\n  facade::CmdArgParser parser{args};\n  string_view key = parser.Next();\n  QList::InsertOpt ins_opt = parser.MapNext(\"AFTER\", QList::AFTER, \"BEFORE\", QList::BEFORE);\n  auto [pivot, elem] = parser.Next<string_view, string_view>();\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  DCHECK(pivot.data() && elem.data());\n\n  auto cb = [&, &pivot = pivot, &elem = elem](Transaction* t, EngineShard* shard) {\n    return OpInsert(t->GetOpArgs(shard), key, pivot, elem, ins_opt);\n  };\n\n  OpResult<int> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result || result == OpStatus::KEY_NOTFOUND) {\n    return rb->SendLong(result.value_or(0));\n  }\n\n  rb->SendError(result.status());\n}\n\nvoid CmdLTrim(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view s_str = ArgS(args, 1);\n  string_view e_str = ArgS(args, 2);\n  int32_t start, end;\n\n  if (!absl::SimpleAtoi(s_str, &start) || !absl::SimpleAtoi(e_str, &end)) {\n    cmd_cntx->SendError(kInvalidIntErr);\n    return;\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpTrim(t->GetOpArgs(shard), key, start, end);\n  };\n  OpStatus st = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  if (st == OpStatus::KEY_NOTFOUND)\n    st = OpStatus::OK;\n  cmd_cntx->SendError(st);\n}\n\nvoid CmdLRange(CmdArgList args, CommandContext* cmd_cntx) {\n  std::string_view key = ArgS(args, 0);\n  std::string_view s_str = ArgS(args, 1);\n  std::string_view e_str = ArgS(args, 2);\n  int32_t start, end;\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (!absl::SimpleAtoi(s_str, &start) || !absl::SimpleAtoi(e_str, &end)) {\n    rb->SendError(kInvalidIntErr);\n    return;\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpRange(t->GetOpArgs(shard), key, start, end);\n  };\n\n  auto res = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (!res && res.status() != OpStatus::KEY_NOTFOUND) {\n    return rb->SendError(res.status());\n  }\n\n  rb->SendBulkStrArr(*res);\n}\n\n// lrem key 5 foo, will remove foo elements from the list if exists at most 5 times.\nvoid CmdLRem(CmdArgList args, CommandContext* cmd_cntx) {\n  std::string_view key = ArgS(args, 0);\n  std::string_view index_str = ArgS(args, 1);\n  std::string_view elem = ArgS(args, 2);\n  int32_t count;\n\n  if (!absl::SimpleAtoi(index_str, &count)) {\n    cmd_cntx->SendError(kInvalidIntErr);\n    return;\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpRem(t->GetOpArgs(shard), key, elem, count);\n  };\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result || result == OpStatus::KEY_NOTFOUND) {\n    return cmd_cntx->SendLong(result.value_or(0));\n  }\n  cmd_cntx->SendError(result.status());\n}\n\nvoid CmdLSet(CmdArgList args, CommandContext* cmd_cntx) {\n  std::string_view key = ArgS(args, 0);\n  std::string_view index_str = ArgS(args, 1);\n  std::string_view elem = ArgS(args, 2);\n  int32_t count;\n\n  if (!absl::SimpleAtoi(index_str, &count)) {\n    cmd_cntx->SendError(kInvalidIntErr);\n    return;\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpSet(t->GetOpArgs(shard), key, elem, count);\n  };\n  OpResult<void> result = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  if (result) {\n    cmd_cntx->rb()->SendOk();\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid CmdBLPop(CmdArgList args, CommandContext* cmd_cntx) {\n  BPopGeneric(ListDir::LEFT, args, cmd_cntx);\n}\n\nvoid CmdBRPop(CmdArgList args, CommandContext* cmd_cntx) {\n  BPopGeneric(ListDir::RIGHT, args, cmd_cntx);\n}\n\nvoid CmdLMove(CmdArgList args, CommandContext* cmd_cntx) {\n  facade::CmdArgParser parser{args};\n  auto [src, dest] = parser.Next<string_view, string_view>();\n  ListDir src_dir = ParseDir(&parser);\n  ListDir dest_dir = ParseDir(&parser);\n\n  if (auto err = parser.TakeError(); err)\n    return cmd_cntx->SendError(err.MakeReply());\n\n  MoveGeneric(src, dest, src_dir, dest_dir, cmd_cntx->tx(), cmd_cntx->rb());\n}\n\n}  // namespace\n\nusing CI = CommandId;\n\n#define HFUNC(x) SetHandler(&Cmd##x)\n\nvoid RegisterListFamily(CommandRegistry* registry) {\n  registry->StartFamily(acl::LIST);\n  *registry\n      << CI{\"LPUSH\", CO::JOURNALED | CO::FAST | CO::DENYOOM, -3, 1, 1}.HFUNC(LPush)\n      << CI{\"LPUSHX\", CO::JOURNALED | CO::FAST | CO::DENYOOM, -3, 1, 1}.HFUNC(LPushX)\n      << CI{\"LPOP\", CO::JOURNALED | CO::FAST, -2, 1, 1}.HFUNC(LPop)\n      << CI{\"LMPOP\", CO::JOURNALED | CO::VARIADIC_KEYS | CO::NO_AUTOJOURNAL, -4, 2, 2}.HFUNC(LMPop)\n      << CI{\"BLMPOP\", CO::JOURNALED | CO::BLOCKING | CO::VARIADIC_KEYS | CO::NO_AUTOJOURNAL, -5, 3,\n            3}\n             .HFUNC(BLMPop)\n      << CI{\"RPUSH\", CO::JOURNALED | CO::FAST | CO::DENYOOM, -3, 1, 1}.HFUNC(RPush)\n      << CI{\"RPUSHX\", CO::JOURNALED | CO::FAST | CO::DENYOOM, -3, 1, 1}.HFUNC(RPushX)\n      << CI{\"RPOP\", CO::JOURNALED | CO::FAST, -2, 1, 1}.HFUNC(RPop)\n      << CI{\"RPOPLPUSH\", CO::JOURNALED | CO::NO_AUTOJOURNAL, 3, 1, 2}.SetHandler(RPopLPush)\n      << CI{\"BRPOPLPUSH\", CO::JOURNALED | CO::NOSCRIPT | CO::BLOCKING | CO::NO_AUTOJOURNAL, 4, 1, 2}\n             .SetHandler(BRPopLPush)\n      << CI{\"BLPOP\", CO::JOURNALED | CO::NOSCRIPT | CO::BLOCKING | CO::NO_AUTOJOURNAL, -3, 1, -2}\n             .HFUNC(BLPop)\n      << CI{\"BRPOP\", CO::JOURNALED | CO::NOSCRIPT | CO::BLOCKING | CO::NO_AUTOJOURNAL, -3, 1, -2}\n             .HFUNC(BRPop)\n      << CI{\"LLEN\", CO::READONLY | CO::FAST, 2, 1, 1}.HFUNC(LLen)\n      << CI{\"LPOS\", CO::READONLY, -3, 1, 1}.HFUNC(LPos)\n      << CI{\"LINDEX\", CO::READONLY, 3, 1, 1}.HFUNC(LIndex)\n      << CI{\"LINSERT\", CO::JOURNALED | CO::DENYOOM, 5, 1, 1}.HFUNC(LInsert)\n      << CI{\"LRANGE\", CO::READONLY, 4, 1, 1}.HFUNC(LRange)\n      << CI{\"LSET\", CO::JOURNALED | CO::DENYOOM, 4, 1, 1}.HFUNC(LSet)\n      << CI{\"LTRIM\", CO::JOURNALED, 4, 1, 1}.HFUNC(LTrim)\n      << CI{\"LREM\", CO::JOURNALED, 4, 1, 1}.HFUNC(LRem)\n      << CI{\"LMOVE\", CO::JOURNALED | CO::NO_AUTOJOURNAL, 5, 1, 2}.HFUNC(LMove)\n      << CI{\"BLMOVE\", CO::JOURNALED | CO::NO_AUTOJOURNAL | CO::BLOCKING, 6, 1, 2}.SetHandler(\n             BLMove);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/list_family_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/strings/match.h>\n\n#include <random>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/blocking_controller.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/test_utils.h\"\n#include \"server/transaction.h\"\n#include \"util/fibers/fibers.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\nusing absl::StrCat;\n\nnamespace dfly {\n\nclass ListFamilyTest : public BaseFamilyTest {\n protected:\n  ListFamilyTest() {\n    num_threads_ = 4;\n  }\n\n  static unsigned NumWatched() {\n    atomic_uint32_t sum{0};\n\n    auto ns = &namespaces->GetDefaultNamespace();\n    shard_set->RunBriefInParallel([&](EngineShard* es) {\n      auto* bc = ns->GetBlockingController(es->shard_id());\n      if (bc)\n        sum.fetch_add(bc->NumWatched(0), memory_order_relaxed);\n    });\n\n    return sum.load();\n  }\n\n  static bool HasAwakened() {\n    atomic_uint32_t sum{0};\n    auto ns = &namespaces->GetDefaultNamespace();\n    shard_set->RunBriefInParallel([&](EngineShard* es) {\n      auto* bc = ns->GetBlockingController(es->shard_id());\n      if (bc)\n        sum.fetch_add(bc->HasAwakedTransaction(), memory_order_relaxed);\n    });\n\n    return sum.load() > 0;\n  }\n};\n\nconst char kKey1[] = \"x\";\nconst char kKey2[] = \"b\";\nconst char kKey3[] = \"c\";\n\nTEST_F(ListFamilyTest, Basic) {\n  auto resp = Run({\"lpush\", kKey1, \"1\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"lpush\", kKey2, \"2\"});\n  ASSERT_THAT(resp, IntArg(1));\n  resp = Run({\"llen\", kKey1});\n  ASSERT_THAT(resp, IntArg(1));\n}\n\nTEST_F(ListFamilyTest, Expire) {\n  auto resp = Run({\"lpush\", kKey1, \"1\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"expire\", kKey1, \"1\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  AdvanceTime(1000);\n\n  resp = Run({\"lpush\", kKey1, \"1\"});\n  EXPECT_THAT(resp, IntArg(1));\n}\n\nTEST_F(ListFamilyTest, BLMPopNonblocking) {\n  auto resp = Run({\"lpush\", kKey1, \"1\", \"2\", \"3\", \"4\"});\n  EXPECT_THAT(resp, IntArg(4));\n\n  resp = Run({\"blmpop\", \"0.01\", \"2\", kKey2, kKey1, \"LEFT\"});\n  EXPECT_THAT(resp, RespElementsAre(kKey1, RespElementsAre(\"4\")));\n\n  resp = Run({\"blmpop\", \"0.01\", \"2\", kKey2, kKey1, \"RIGHT\", \"COUNT\", \"2\"});\n  EXPECT_THAT(resp, RespElementsAre(kKey1, RespElementsAre(\"1\", \"2\")));\n\n  // If the count exceeds the size of the key's values (but the key is non-empty) then return all of\n  // the key's values\n  resp = Run({\"blmpop\", \"0.01\", \"1\", kKey1, \"RIGHT\", \"COUNT\", \"10\"});\n  EXPECT_THAT(resp, RespElementsAre(kKey1, RespElementsAre(\"3\")));\n}\n\nTEST_F(ListFamilyTest, BLMPopInvalidSyntax) {\n  // Not enough arguments\n  auto resp = Run({\"blmpop\", \"0.1\", \"1\", kKey1});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  // Timeout is not a float\n  resp = Run({\"blmpop\", \"foo\", \"1\", kKey1, \"LEFT\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not a valid float\"));\n\n  // Negative timeout\n  resp = Run({\"blmpop\", \"-0.01\", \"1\", kKey1, \"LEFT\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"timeout is negative\"));\n\n  // Zero keys\n  resp = Run({\"blmpop\", \"0.01\", \"0\", \"LEFT\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input key is needed\"));\n\n  // Number of keys is not uint\n  resp = Run({\"blmpop\", \"0.01\", \"aa\", kKey1, \"LEFT\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  // Missing LEFT/RIGHT\n  resp = Run({\"blmpop\", \"0.01\", \"1\", kKey1, \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Wrong number of keys\n  resp = Run({\"blmpop\", \"0.01\", \"1\", kKey1, kKey2, \"LEFT\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // COUNT without number\n  resp = Run({\"blmpop\", \"0.01\", \"1\", kKey1, \"LEFT\", \"COUNT\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // COUNT is not uint\n  resp = Run({\"blmpop\", \"0.01\", \"1\", kKey1, \"LEFT\", \"COUNT\", \"boo\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  // Too many arguments\n  resp = Run({\"blmpop\", \"0.01\", \"1\", \"c\", \"LEFT\", \"COUNT\", \"2\", \"foo\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n}\n\nTEST_F(ListFamilyTest, BLMPopBlocking) {\n  // attempting to pop from empty key results in blocking and returns\n  // null if no values are pushed to the key.\n  RespExpr resp;\n  auto fb0 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    resp = Run({\"blmpop\", \"0.1\", \"1\", kKey1, \"LEFT\"});\n  });\n  ThisFiber::SleepFor(1ms);\n  ASSERT_TRUE(IsLocked(0, kKey1));\n\n  fb0.Join();\n  ASSERT_FALSE(IsLocked(0, kKey1));\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  // BLMPOP should not block if there is a non-empty key available\n  resp = Run({\"lpush\", kKey1, \"0\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  auto fb1 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n    resp = Run({\"blmpop\", \"0.1\", \"1\", kKey1, \"LEFT\"});\n  });\n  ThisFiber::SleepFor(1ms);\n  // shouldn't need to lock the key just pop immediately\n  ASSERT_FALSE(IsLocked(0, kKey1));\n  fb1.Join();\n\n  // should block until a key is available and then immediately unblock\n  auto fb2 = pp_->at(2)->LaunchFiber(Launch::dispatch, [&] {\n    resp = Run({\"blmpop\", \"0.1\", \"1\", kKey1, \"LEFT\"});\n  });\n\n  // key should be locked while waiting\n  WaitUntilLocked(0, kKey1);\n  ASSERT_TRUE(IsLocked(0, kKey1));\n\n  auto push_resp = Run({\"lpush\", kKey1, \"1\"});\n  EXPECT_THAT(push_resp, IntArg(1));\n\n  // key should be unlocked after being inserted to\n  fb2.Join();\n  ASSERT_FALSE(IsLocked(0, kKey1));\n  EXPECT_THAT(resp, RespElementsAre(kKey1, RespElementsAre(\"1\")));\n}\n\nTEST_F(ListFamilyTest, BLPopUnblocking) {\n  auto resp = Run({\"lpush\", kKey1, \"1\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"lpush\", kKey2, \"2\"});\n  ASSERT_THAT(resp, IntArg(1));\n\n  resp = Run({\"blpop\", kKey1, kKey2});  // missing \"0\" delimiter.\n  ASSERT_THAT(resp, ErrArg(\"timeout is not a float\"));\n\n  resp = Run({\"blpop\", kKey1, kKey2, \"0\"});\n  ASSERT_EQ(2, GetDebugInfo().shards_count);\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(kKey1, \"1\"));\n\n  resp = Run({\"blpop\", kKey1, kKey2, \"0\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(kKey2, \"2\"));\n\n  resp = Run({\"set\", \"z\", \"1\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  resp = Run({\"blpop\", \"z\", \"0\"});\n  ASSERT_THAT(resp, ErrArg(\"WRONGTYPE \"));\n\n  ASSERT_FALSE(IsLocked(0, \"x\"));\n  ASSERT_FALSE(IsLocked(0, \"y\"));\n  ASSERT_FALSE(IsLocked(0, \"z\"));\n}\n\nTEST_F(ListFamilyTest, BLPopBlocking) {\n  RespExpr resp0, resp1;\n\n  // Run the fiber at creation.\n  auto fb0 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    resp0 = Run({\"blpop\", \"x\", \"0\"});\n    LOG(INFO) << \"pop0\";\n  });\n\n  ThisFiber::SleepFor(50us);\n  auto fb1 = pp_->at(1)->LaunchFiber([&] {\n    resp1 = Run({\"blpop\", \"x\", \"0\"});\n    LOG(INFO) << \"pop1\";\n  });\n  ThisFiber::SleepFor(30us);\n\n  RespExpr resp = pp_->at(1)->Await([&] { return Run(\"B1\", {\"lpush\", \"x\", \"2\", \"1\"}); });\n  ASSERT_THAT(resp, IntArg(2));\n\n  fb0.Join();\n  fb1.Join();\n\n  // fb0 should start first and be the first transaction blocked. Therefore, it should pop '1'.\n  // sometimes order is switched, need to think how to fix it.\n  int64_t epoch0 = GetDebugInfo(\"IO0\").clock;\n  int64_t epoch1 = GetDebugInfo(\"IO1\").clock;\n  ASSERT_LT(epoch0, epoch1);\n  ASSERT_THAT(resp0, ArrLen(2));\n  EXPECT_THAT(resp0.GetVec(), ElementsAre(\"x\", \"1\"));\n  ASSERT_FALSE(IsLocked(0, \"x\"));\n  ASSERT_EQ(0, NumWatched());\n}\n\nTEST_F(ListFamilyTest, BLPopMultiple) {\n  RespExpr resp0, resp1;\n\n  resp0 = Run({\"blpop\", kKey1, kKey2, \"0.01\"});  // timeout\n  EXPECT_THAT(resp0, ArgType(RespExpr::NIL_ARRAY));\n  ASSERT_EQ(2, GetDebugInfo().shards_count);\n\n  ASSERT_FALSE(IsLocked(0, kKey1));\n  ASSERT_FALSE(IsLocked(0, kKey2));\n\n  auto fb1 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    resp0 = Run({\"blpop\", kKey1, kKey2, \"0\"});\n  });\n\n  pp_->at(1)->Await([&] { Run({\"lpush\", kKey1, \"1\", \"2\", \"3\"}); });\n  fb1.Join();\n\n  ASSERT_THAT(resp0, ArrLen(2));\n  EXPECT_THAT(resp0.GetVec(), ElementsAre(kKey1, \"3\"));\n  ASSERT_FALSE(IsLocked(0, kKey1));\n  ASSERT_FALSE(IsLocked(0, kKey2));\n  ASSERT_EQ(0, NumWatched());\n}\n\nTEST_F(ListFamilyTest, BLPopTimeout) {\n  RespExpr resp = Run({\"blpop\", kKey1, kKey2, kKey3, \"0.01\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));\n  EXPECT_EQ(3, GetDebugInfo().shards_count);\n  ASSERT_FALSE(IsLocked(0, kKey1));\n\n  // Under Multi\n  resp = Run({\"multi\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  Run({\"blpop\", kKey1, \"0\"});\n  resp = Run({\"exec\"});\n\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));\n  ASSERT_FALSE(IsLocked(0, kKey1));\n  ASSERT_EQ(0, NumWatched());\n}\n\nTEST_F(ListFamilyTest, BLPopTimeout2) {\n  Run({\"BLPOP\", \"blist1\", \"blist2\", \"0.1\"});\n\n  Run({\"RPUSH\", \"blist2\", \"d\"});\n  Run({\"RPUSH\", \"blist2\", \"hello\"});\n\n  auto resp = Run({\"BLPOP\", \"blist1\", \"blist2\", \"1\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"blist2\", \"d\"));\n\n  Run({\"RPUSH\", \"blist1\", \"a\"});\n  Run({\"DEL\", \"blist2\"});\n  Run({\"RPUSH\", \"blist2\", \"d\"});\n  Run({\"BLPOP\", \"blist1\", \"blist2\", \"1\"});\n  ASSERT_EQ(0, NumWatched());\n}\n\nTEST_F(ListFamilyTest, BLPopMultiPush) {\n  Run({\"exists\", kKey1, kKey2, kKey3});\n  ASSERT_EQ(3, GetDebugInfo().shards_count);\n  RespExpr blpop_resp;\n  auto pop_fb = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    blpop_resp = Run({\"blpop\", kKey1, kKey2, kKey3, \"0\"});\n  });\n\n  WaitUntilLocked(0, kKey1);\n\n  auto p1_fb = pp_->at(1)->LaunchFiber([&] {\n    for (unsigned i = 0; i < 100; ++i) {\n      // a filler command to create scheduling queue.\n      Run({\"exists\", kKey1, kKey2, kKey3});\n    }\n  });\n\n  auto p2_fb = pp_->at(2)->LaunchFiber([&] {\n    Run({\"multi\"});\n    Run({\"lpush\", kKey3, \"C\"});\n    Run({\"exists\", kKey2});\n    Run({\"lpush\", kKey2, \"B\"});\n    Run({\"exists\", kKey1});\n    Run({\"lpush\", kKey1, \"A\"});\n    Run({\"exists\", kKey1, kKey2, kKey3});\n    auto resp = Run({\"exec\"});\n    ASSERT_THAT(resp, ArrLen(6));\n  });\n\n  p1_fb.Join();\n  p2_fb.Join();\n\n  pop_fb.Join();\n\n  // We can't determine what key was popped, so only check result presence.\n  // It might not be first kKey3 \"C\" because of squashing and re-ordering.\n  ASSERT_THAT(blpop_resp, ArrLen(2));\n  ASSERT_THAT(Run({\"exists\", kKey1, kKey2, kKey3}), IntArg(2));\n  ASSERT_EQ(0, NumWatched());\n}\n\nTEST_F(ListFamilyTest, WrongTypeDoesNotWake) {\n  RespExpr blpop_resp;\n\n  auto pop_fb = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    blpop_resp = Run({\"blpop\", kKey1, \"0\"});\n  });\n\n  WaitUntilLocked(0, kKey1);\n\n  auto p1_fb = pp_->at(1)->LaunchFiber([&] {\n    Run({\"multi\"});\n    Run({\"lpush\", kKey1, \"A\"});\n    Run({\"set\", kKey1, \"foo\"});\n\n    auto resp = Run({\"exec\"});\n    EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(1), \"OK\"));\n\n    Run({\"del\", kKey1});\n    Run({\"lpush\", kKey1, \"B\"});\n  });\n\n  p1_fb.Join();\n  pop_fb.Join();\n  ASSERT_THAT(blpop_resp, ArrLen(2));\n  EXPECT_THAT(blpop_resp.GetVec(), ElementsAre(kKey1, \"B\"));\n}\n\nTEST_F(ListFamilyTest, BPopSameKeyTwice) {\n  RespExpr blpop_resp;\n\n  auto pop_fb = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    blpop_resp = Run({\"blpop\", kKey1, kKey2, kKey2, kKey1, \"0\"});\n    EXPECT_EQ(0, NumWatched());\n  });\n\n  WaitUntilLocked(0, kKey1);\n\n  pp_->at(1)->Await([&] { EXPECT_EQ(1, CheckedInt({\"lpush\", kKey1, \"bar\"})); });\n  pop_fb.Join();\n\n  ASSERT_THAT(blpop_resp, ArrLen(2));\n  EXPECT_THAT(blpop_resp.GetVec(), ElementsAre(kKey1, \"bar\"));\n\n  pop_fb = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    blpop_resp = Run({\"blpop\", kKey1, kKey2, kKey2, kKey1, \"0\"});\n  });\n\n  WaitUntilLocked(0, kKey1);\n\n  pp_->at(1)->Await([&] { EXPECT_EQ(1, CheckedInt({\"lpush\", kKey2, \"bar\"})); });\n  pop_fb.Join();\n\n  ASSERT_THAT(blpop_resp, ArrLen(2));\n  EXPECT_THAT(blpop_resp.GetVec(), ElementsAre(kKey2, \"bar\"));\n}\n\nTEST_F(ListFamilyTest, BPopTwoKeysSameShard) {\n  Run({\"exists\", \"x\", \"y\"});\n  ASSERT_EQ(1, GetDebugInfo().shards_count);\n  RespExpr blpop_resp;\n\n  auto pop_fb = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    blpop_resp = Run({\"blpop\", \"x\", \"y\", \"0\"});\n    EXPECT_FALSE(IsLocked(0, \"y\"));\n    ASSERT_EQ(0, NumWatched());\n  });\n\n  WaitUntilLocked(0, \"x\");\n\n  pp_->at(1)->Await([&] { EXPECT_EQ(1, CheckedInt({\"lpush\", \"x\", \"bar\"})); });\n  pop_fb.Join();\n\n  ASSERT_THAT(blpop_resp, ArrLen(2));\n  EXPECT_THAT(blpop_resp.GetVec(), ElementsAre(\"x\", \"bar\"));\n}\n\nTEST_F(ListFamilyTest, BPopRename) {\n  RespExpr blpop_resp;\n\n  Run({\"exists\", kKey1, kKey2});\n  ASSERT_EQ(2, GetDebugInfo().shards_count);\n\n  auto pop_fb = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    blpop_resp = Run({\"blpop\", kKey1, \"0\"});\n  });\n\n  WaitUntilLocked(0, kKey1);\n\n  pp_->at(1)->Await([&] {\n    EXPECT_EQ(1, CheckedInt({\"lpush\", \"a\", \"bar\"}));\n    Run({\"rename\", \"a\", kKey1});\n  });\n  pop_fb.Join();\n\n  ASSERT_THAT(blpop_resp, ArrLen(2));\n  EXPECT_THAT(blpop_resp.GetVec(), ElementsAre(kKey1, \"bar\"));\n}\n\nTEST_F(ListFamilyTest, BPopFlush) {\n  RespExpr blpop_resp;\n  auto pop_fb = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    blpop_resp = Run({\"blpop\", kKey1, \"0\"});\n  });\n\n  WaitUntilLocked(0, kKey1);\n\n  pp_->at(1)->Await([&] {\n    Run({\"flushdb\"});\n    EXPECT_EQ(1, CheckedInt({\"lpush\", kKey1, \"bar\"}));\n  });\n  pop_fb.Join();\n}\n\nTEST_F(ListFamilyTest, LRem) {\n  auto resp = Run({\"rpush\", kKey1, \"a\", \"b\", \"a\", \"c\"});\n  ASSERT_THAT(resp, IntArg(4));\n  resp = Run({\"lrem\", kKey1, \"2\", \"a\"});\n  ASSERT_THAT(resp, IntArg(2));\n\n  resp = Run({\"lrange\", kKey1, \"0\", \"1\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"b\", \"c\"));\n\n  Run({\"set\", \"foo\", \"bar\"});\n  ASSERT_THAT(Run({\"lrem\", \"foo\", \"0\", \"elem\"}), ErrArg(\"WRONGTYPE\"));\n  ASSERT_THAT(Run({\"lrem\", \"nexists\", \"0\", \"elem\"}), IntArg(0));\n\n  // Triggers QUICKLIST_NODE_CONTAINER_PLAIN coverage\n  string val(10000, 'a');\n  Run({\"rpush\", kKey2, val, \"12345678\"});\n\n  ASSERT_THAT(Run({\"lrem\", kKey2, \"1\", \"12345678\"}), IntArg(1));\n  ASSERT_THAT(Run({\"lrem\", kKey2, \"1\", val}), IntArg(1));\n\n  ASSERT_THAT(Run({\"lpush\", kKey3, \"bar\", \"bar\", \"foo\"}), IntArg(3));\n  ASSERT_THAT(Run({\"lrem\", kKey3, \"-2\", \"bar\"}), IntArg(2));\n  resp = Run({\"lrange\", kKey3, \"0\", \"-1\"});\n  ASSERT_EQ(resp, \"foo\");\n}\n\nTEST_F(ListFamilyTest, DumpRestorePlain) {\n  const string kValue(10'000, '#');\n  EXPECT_EQ(CheckedInt({\"LPUSH\", kKey1, kValue}), 1);\n  auto buffer = Run({\"DUMP\", kKey1}).GetBuf();\n  EXPECT_EQ(Run({\"RESTORE\", kKey2, \"0\", ToSV(buffer)}), \"OK\");\n  EXPECT_EQ(CheckedInt({\"LLEN\", kKey2}), 1);\n  EXPECT_EQ(Run({\"LRANGE\", kKey2, \"0\", \"1\"}), kValue);\n}\n\nTEST_F(ListFamilyTest, LTrim) {\n  Run({\"rpush\", kKey1, \"a\", \"b\", \"c\", \"d\"});\n  ASSERT_EQ(Run({\"ltrim\", kKey1, \"-2\", \"-1\"}), \"OK\");\n  auto resp = Run({\"lrange\", kKey1, \"0\", \"1\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"c\", \"d\"));\n  ASSERT_EQ(Run({\"ltrim\", kKey1, \"0\", \"0\"}), \"OK\");\n  ASSERT_EQ(Run({\"lrange\", kKey1, \"0\", \"1\"}), \"c\");\n  Run({\"set\", \"foo\", \"bar\"});\n  ASSERT_THAT(Run({\"ltrim\", \"foo\", \"0\", \"1\"}), ErrArg(\"WRONGTYPE\"));\n  ASSERT_EQ(Run({\"ltrim\", \"nexists\", \"0\", \"1\"}), \"OK\");\n}\n\nTEST_F(ListFamilyTest, LRange) {\n  auto resp = Run({\"lrange\", kKey1, \"0\", \"5\"});\n  ASSERT_THAT(resp, ArrLen(0));\n  Run({\"rpush\", kKey1, \"0\", \"1\", \"2\"});\n  resp = Run({\"lrange\", kKey1, \"-2\", \"-1\"});\n\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"1\", \"2\"));\n}\n\nTEST_F(ListFamilyTest, Lset) {\n  Run({\"rpush\", kKey1, \"0\", \"1\", \"2\"});\n  ASSERT_EQ(Run({\"lset\", kKey1, \"0\", \"bar\"}), \"OK\");\n  ASSERT_EQ(Run({\"lpop\", kKey1}), \"bar\");\n  ASSERT_EQ(Run({\"lset\", kKey1, \"-1\", \"foo\"}), \"OK\");\n  ASSERT_EQ(Run({\"rpop\", kKey1}), \"foo\");\n  Run({\"rpush\", kKey2, \"a\"});\n  ASSERT_THAT(Run({\"lset\", kKey2, \"1\", \"foo\"}), ErrArg(\"index out of range\"));\n}\n\nTEST_F(ListFamilyTest, LPop) {\n  Run({\"rpush\", \"foo\", \"bar\"});\n  auto resp = Run({\"lpop\", \"foo\", \"0\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre()));\n  resp = Run({\"lpop\", \"bar\", \"0\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(ListFamilyTest, LPos) {\n  auto resp = Run({\"rpush\", kKey1, \"1\", \"a\", \"b\", \"1\", \"1\", \"a\", \"1\"});\n  ASSERT_THAT(resp, IntArg(7));\n\n  ASSERT_THAT(Run({\"lpos\", kKey1, \"1\"}), IntArg(0));\n\n  ASSERT_THAT(Run({\"lpos\", kKey1, \"f\"}), ArgType(RespExpr::NIL));\n  ASSERT_THAT(Run({\"lpos\", kKey1, \"1\", \"COUNT\", \"-1\"}), ArgType(RespExpr::ERROR));\n  ASSERT_THAT(Run({\"lpos\", kKey1, \"1\", \"MAXLEN\", \"-1\"}), ArgType(RespExpr::ERROR));\n  ASSERT_THAT(Run({\"lpos\", kKey1, \"1\", \"RANK\", \"0\"}), ArgType(RespExpr::ERROR));\n\n  resp = Run({\"lpos\", kKey1, \"a\", \"RANK\", \"-1\", \"COUNT\", \"2\"});\n  ASSERT_THAT(resp.GetVec(), ElementsAre(IntArg(5), IntArg(1)));\n\n  resp = Run({\"lpos\", kKey1, \"1\", \"COUNT\", \"0\"});\n  ASSERT_THAT(resp.GetVec(), ElementsAre(IntArg(0), IntArg(3), IntArg(4), IntArg(6)));\n\n  resp = Run({\"lpos\", kKey1, \"1\", \"COUNT\", \"0\", \"MAXLEN\", \"5\"});\n  ASSERT_THAT(resp.GetVec(), ElementsAre(IntArg(0), IntArg(3), IntArg(4)));\n}\n\nTEST_F(ListFamilyTest, RPopLPush) {\n  // src and dest are diffrent keys\n  auto resp = Run({\"rpush\", kKey1, \"1\", \"a\", \"b\", \"1\", \"2\", \"3\", \"4\"});\n  ASSERT_THAT(resp, IntArg(7));\n\n  resp = Run({\"rpoplpush\", kKey1, kKey2});\n  ASSERT_THAT(resp, \"4\");\n\n  resp = Run({\"rpoplpush\", kKey1, kKey2});\n  ASSERT_THAT(resp, \"3\");\n\n  resp = Run({\"rpoplpush\", kKey1, kKey2});\n  ASSERT_THAT(resp, \"2\");\n\n  resp = Run({\"rpoplpush\", kKey1, kKey2});\n  ASSERT_THAT(resp, \"1\");\n\n  resp = Run({\"lrange\", kKey1, \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"1\", \"a\", \"b\"));\n\n  resp = Run({\"lrange\", kKey2, \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"1\", \"2\", \"3\", \"4\"));\n\n  resp = Run({\"rpoplpush\", kKey1, kKey2});\n  ASSERT_THAT(resp, \"b\");\n\n  resp = Run({\"rpoplpush\", kKey1, kKey2});\n  ASSERT_THAT(resp, \"a\");\n\n  resp = Run({\"rpoplpush\", kKey1, kKey2});\n  ASSERT_THAT(resp, \"1\");\n\n  ASSERT_THAT(Run({\"lrange\", kKey1, \"0\", \"-1\"}), ArrLen(0));\n  EXPECT_THAT(Run({\"exists\", kKey1}), IntArg(0));\n  ASSERT_THAT(Run({\"rpoplpush\", kKey1, kKey2}), ArgType(RespExpr::NIL));\n\n  resp = Run({\"lrange\", kKey2, \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(7));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"1\", \"a\", \"b\", \"1\", \"2\", \"3\", \"4\"));\n\n  // src and dest are the same key\n  resp = Run({\"rpush\", kKey1, \"1\", \"a\", \"b\", \"1\", \"2\", \"3\", \"4\"});\n  ASSERT_THAT(resp, IntArg(7));\n\n  resp = Run({\"rpoplpush\", kKey1, kKey1});\n  ASSERT_THAT(resp, \"4\");\n\n  resp = Run({\"rpoplpush\", kKey1, kKey1});\n  ASSERT_THAT(resp, \"3\");\n\n  resp = Run({\"rpoplpush\", kKey1, kKey1});\n  ASSERT_THAT(resp, \"2\");\n\n  resp = Run({\"rpoplpush\", kKey1, kKey1});\n  ASSERT_THAT(resp, \"1\");\n\n  resp = Run({\"lrange\", kKey1, \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(7));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"1\", \"2\", \"3\", \"4\", \"1\", \"a\", \"b\"));\n\n  resp = Run({\"rpoplpush\", kKey1, kKey1});\n  ASSERT_THAT(resp, \"b\");\n\n  resp = Run({\"rpoplpush\", kKey1, kKey1});\n  ASSERT_THAT(resp, \"a\");\n\n  resp = Run({\"rpoplpush\", kKey1, kKey1});\n  ASSERT_THAT(resp, \"1\");\n\n  resp = Run({\"lrange\", kKey1, \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(7));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"1\", \"a\", \"b\", \"1\", \"2\", \"3\", \"4\"));\n}\n\nTEST_F(ListFamilyTest, LMove) {\n  // src and dest are different keys\n  auto resp = Run({\"rpush\", kKey1, \"1\", \"2\", \"3\", \"4\", \"5\"});\n  ASSERT_THAT(resp, IntArg(5));\n\n  resp = Run({\"lmove\", kKey1, kKey2, \"LEFT\", \"RIGHT\"});\n  ASSERT_THAT(resp, \"1\");\n  ASSERT_THAT(Run({\"llen\", kKey1}), IntArg(4));\n\n  resp = Run({\"lmove\", kKey1, kKey2, \"LEFT\", \"LEFT\"});\n  ASSERT_THAT(resp, \"2\");\n\n  resp = Run({\"lrange\", kKey2, \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"2\", \"1\"));\n\n  resp = Run({\"lmove\", kKey1, kKey2, \"RIGHT\", \"LEFT\"});\n  ASSERT_THAT(resp, \"5\");\n\n  resp = Run({\"lrange\", kKey2, \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"5\", \"2\", \"1\"));\n\n  resp = Run({\"lmove\", kKey1, kKey2, \"RIGHT\", \"RIGHT\"});\n  ASSERT_THAT(resp, \"4\");\n\n  resp = Run({\"lrange\", kKey1, \"0\", \"-1\"});\n  ASSERT_EQ(resp, \"3\");\n\n  resp = Run({\"lrange\", kKey2, \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"5\", \"2\", \"1\", \"4\"));\n\n  resp = Run({\"lmove\", kKey1, kKey2, \"RIGHT\", \"RIGHT\"});\n  ASSERT_THAT(resp, \"3\");\n\n  ASSERT_THAT(Run({\"lrange\", kKey1, \"0\", \"-1\"}), ArrLen(0));\n  EXPECT_THAT(Run({\"exists\", kKey1}), IntArg(0));\n  ASSERT_THAT(Run({\"lmove\", kKey1, kKey2, \"LEFT\", \"RIGHT\"}), ArgType(RespExpr::NIL));\n  ASSERT_THAT(Run({\"lmove\", kKey1, kKey2, \"RIGHT\", \"RIGHT\"}), ArgType(RespExpr::NIL));\n\n  resp = Run({\"lrange\", kKey2, \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(5));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"5\", \"2\", \"1\", \"4\", \"3\"));\n\n  // src and dest are the same key\n  resp = Run({\"rpush\", kKey1, \"1\", \"2\", \"3\", \"4\", \"5\"});\n  ASSERT_THAT(resp, IntArg(5));\n\n  resp = Run({\"lmove\", kKey1, kKey1, \"LEFT\", \"RIGHT\"});\n  ASSERT_THAT(resp, \"1\");\n\n  resp = Run({\"lmove\", kKey1, kKey1, \"LEFT\", \"LEFT\"});\n  ASSERT_THAT(resp, \"2\");\n\n  resp = Run({\"lmove\", kKey1, kKey1, \"RIGHT\", \"LEFT\"});\n  ASSERT_THAT(resp, \"1\");\n\n  resp = Run({\"lmove\", kKey1, kKey1, \"RIGHT\", \"RIGHT\"});\n  ASSERT_THAT(resp, \"5\");\n\n  resp = Run({\"lmove\", kKey1, kKey1, \"LEFT\", \"RIGHT\"});\n  ASSERT_THAT(resp, \"1\");\n\n  resp = Run({\"lrange\", kKey1, \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(5));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"2\", \"3\", \"4\", \"5\", \"1\"));\n\n  resp = Run({\"lmove\", kKey1, kKey1, \"LEFT\", \"RIGHT\"});\n  ASSERT_THAT(resp, \"2\");\n\n  resp = Run({\"lmove\", kKey1, kKey1, \"LEFT\", \"RIGHT\"});\n  ASSERT_THAT(resp, \"3\");\n\n  resp = Run({\"lmove\", kKey1, kKey1, \"RIGHT\", \"RIGHT\"});\n  ASSERT_THAT(resp, \"3\");\n\n  resp = Run({\"lmove\", kKey1, kKey1, \"LEFT\", \"RIGHT\"});\n  ASSERT_THAT(resp, \"4\");\n\n  resp = Run({\"lrange\", kKey1, \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(5));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"5\", \"1\", \"2\", \"3\", \"4\"));\n\n  ASSERT_THAT(Run({\"lmove\", kKey1, kKey1, \"LEFT\", \"R\"}), ArgType(RespExpr::ERROR));\n}\n\nTEST_F(ListFamilyTest, TwoQueueBug451) {\n  // The bug was that if 2 push operations where queued together in the tx queue,\n  // and the first awoke pending blpop, then the PollExecution function would continue with the\n  // second push before switching to blpop, which contradicts the spec.\n  std::atomic_bool running{true};\n  std::atomic_int it_cnt{0};\n\n  auto pop_fiber = [&]() {\n    auto id = \"t-\" + std::to_string(it_cnt.fetch_add(1));\n    while (running.load()) {\n      Run(id, {\"blpop\", \"a\", \"0.1\"});\n    }\n  };\n\n  auto push_fiber = [&]() {\n    auto id = \"t-\" + std::to_string(it_cnt.fetch_add(1));\n    for (int i = 0; i < 300; i++) {\n      Run(id, {\"rpush\", \"a\", \"DATA\"});\n    }\n    ThisFiber::SleepFor(50ms);\n    running = false;\n  };\n\n  vector<Fiber> fbs;\n\n  // more likely to reproduce the bug if we start pop_fiber first.\n  for (int i = 0; i < 2; i++) {\n    fbs.push_back(pp_->at(i)->LaunchFiber(pop_fiber));\n  }\n\n  for (int i = 0; i < 2; i++) {\n    fbs.push_back(pp_->at(i)->LaunchFiber(push_fiber));\n  }\n\n  for (auto& f : fbs)\n    f.Join();\n  ASSERT_EQ(0, NumWatched());\n}\n\nTEST_F(ListFamilyTest, BRPopLPushSingleShard) {\n  EXPECT_THAT(Run({\"brpoplpush\", \"x\", \"y\", \"0.05\"}), ArgType(RespExpr::NIL));\n  ASSERT_EQ(0, NumWatched());\n\n  EXPECT_THAT(Run({\"lpush\", \"x\", \"val1\"}), IntArg(1));\n  EXPECT_EQ(Run({\"brpoplpush\", \"x\", \"y\", \"0.01\"}), \"val1\");\n  ASSERT_EQ(1, GetDebugInfo().shards_count);\n\n  EXPECT_THAT(Run({\n                  \"exists\",\n                  \"x\",\n              }),\n              IntArg(0));\n  Run({\"set\", \"x\", \"str\"});\n  EXPECT_THAT(Run({\"brpoplpush\", \"y\", \"x\", \"0.01\"}), ErrArg(\"wrong kind of value\"));\n\n  Run({\"del\", \"x\", \"y\"});\n  Run({\"multi\"});\n  Run({\"brpoplpush\", \"y\", \"x\", \"0\"});\n  RespExpr resp = Run({\"exec\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n  ASSERT_FALSE(IsLocked(0, \"x\"));\n  ASSERT_FALSE(IsLocked(0, \"y\"));\n  ASSERT_EQ(0, NumWatched());\n}\n\nTEST_F(ListFamilyTest, BRPopLPushSingleShardBug2857) {\n  Run({\"lpush\", \"src\", \"val1\"});\n  RespExpr resp;\n  auto blpop = [&]() { resp = Run(\"id\", {\"blpop\", \"dest\", \"4\"}); };\n  auto f = pp_->at(1)->LaunchFiber(Launch::dispatch, blpop);\n  EXPECT_THAT(Run({\"brpoplpush\", \"src\", \"dest\", \"1\"}), \"val1\");\n  f.Join();\n  EXPECT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"dest\", \"val1\"));\n\n  // Timeout\n  f = pp_->at(1)->LaunchFiber(Launch::dispatch, blpop);\n  EXPECT_THAT(Run({\"brpoplpush\", \"src\", \"dest\", \"1\"}), ArgType(RespExpr::NIL));\n  f.Join();\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));\n}\n\nTEST_F(ListFamilyTest, BRPopLPushSingleShardBug4569) {\n  RespExpr resp;\n  auto fb0 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] { resp = Run({\"brpop\", \"x\", \"0\"}); });\n  WaitUntilLocked(0, \"x\");\n\n  ASSERT_TRUE(IsLocked(0, \"x\"));\n  Run({\"lpush\", \"y\", \"val\"});\n  Run({\"rpoplpush\", \"y\", \"x\"});\n  ASSERT_EQ(1, GetDebugInfo().shards_count);\n  fb0.Join();\n  EXPECT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"x\", \"val\"));\n  ASSERT_EQ(0, NumWatched());\n  ASSERT_FALSE(IsLocked(0, \"x\"));\n}\n\nTEST_F(ListFamilyTest, BRPopLPushSingleShardBlocking) {\n  RespExpr resp;\n\n  // Run the fiber at creation.\n  auto fb0 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    resp = Run({\"brpoplpush\", \"x\", \"y\", \"0\"});\n  });\n  ThisFiber::SleepFor(30us);\n  pp_->at(1)->Await([&] { Run(\"B1\", {\"lpush\", \"y\", \"2\"}); });\n\n  pp_->at(1)->Await([&] { Run(\"B1\", {\"lpush\", \"x\", \"1\"}); });\n  fb0.Join();\n  ASSERT_EQ(resp, \"1\");\n  ASSERT_FALSE(IsLocked(0, \"x\"));\n  ASSERT_FALSE(IsLocked(0, \"y\"));\n  ASSERT_EQ(0, NumWatched());\n}\n\nTEST_F(ListFamilyTest, BRPopContended) {\n  RespExpr resp;\n  atomic_bool done{false};\n  constexpr auto kNumFibers = 4;\n\n  // Run the fiber at creation.\n  Fiber fb[kNumFibers];\n  for (int i = 0; i < kNumFibers; i++) {\n    fb[i] = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n      string id = StrCat(\"id\", i);\n      while (!done) {\n        Run(id, {\"brpop\", \"k0\", \"k1\", \"k2\", \"k3\", \"k4\", \"0.1\"});\n      };\n    });\n  }\n\n  for (int i = 0; i < 500; i++) {\n    string key = absl::StrCat(\"k\", i % 3);\n    Run({\"lpush\", key, \"foo\"});\n  }\n\n  done = true;\n  for (int i = 0; i < kNumFibers; i++) {\n    fb[i].Join();\n  }\n  ASSERT_EQ(0, NumWatched());\n  ASSERT_FALSE(HasAwakened());\n}\n\nTEST_F(ListFamilyTest, BRPopLPushTwoShards) {\n  RespExpr resp;\n  EXPECT_THAT(Run({\"brpoplpush\", \"x\", \"z\", \"0.05\"}), ArgType(RespExpr::NIL));\n\n  ASSERT_EQ(0, NumWatched());\n\n  Run({\"lpush\", \"x\", \"val\"});\n  EXPECT_EQ(Run({\"brpoplpush\", \"x\", \"z\", \"0\"}), \"val\");\n  resp = Run({\"lrange\", \"z\", \"0\", \"-1\"});\n  ASSERT_EQ(resp, \"val\");\n  Run({\"del\", \"z\"});\n  ASSERT_EQ(0, NumWatched());\n\n  // Run the fiber at creation.\n  auto fb0 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    resp = Run({\"brpoplpush\", \"x\", \"z\", \"0\"});\n  });\n\n  ThisFiber::SleepFor(30us);\n  RespExpr resp_push = pp_->at(1)->Await([&] { return Run(\"B1\", {\"lpush\", \"z\", \"val2\"}); });\n  ASSERT_THAT(resp_push, IntArg(1));\n\n  resp_push = pp_->at(1)->Await([&] { return Run(\"B1\", {\"lpush\", \"x\", \"val1\"}); });\n  ASSERT_THAT(resp_push, IntArg(1));\n  fb0.Join();\n\n  // Result of brpoplpush above.\n  ASSERT_EQ(resp, \"val1\");\n\n  resp = Run({\"lrange\", \"z\", \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"val1\", \"val2\"));\n  ASSERT_FALSE(IsLocked(0, \"x\"));\n  ASSERT_FALSE(IsLocked(0, \"z\"));\n  ASSERT_EQ(0, NumWatched());\n  ASSERT_FALSE(HasAwakened());\n\n  // TODO: there is a bug here.\n  // we do not wake the dest shard, when source is awaked which prevents\n  // the atomicity and causes the first bug as well.\n}\n\nTEST_F(ListFamilyTest, BLMove) {\n  EXPECT_THAT(Run({\"blmove\", \"x\", \"y\", \"right\", \"right\", \"0.05\"}), ArgType(RespExpr::NIL));\n  ASSERT_EQ(0, NumWatched());\n\n  EXPECT_THAT(Run({\"lpush\", \"x\", \"val1\"}), IntArg(1));\n  EXPECT_THAT(Run({\"lpush\", \"y\", \"val2\"}), IntArg(1));\n\n  EXPECT_EQ(Run({\"blmove\", \"x\", \"y\", \"right\", \"left\", \"0.01\"}), \"val1\");\n  auto resp = Run({\"lrange\", \"y\", \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"val1\", \"val2\"));\n}\n\n// Wake two BLMOVEs on the same shard simultaneously\nTEST_F(ListFamilyTest, BLMoveSimultaneously) {\n  EXPECT_EQ(Shard(\"src1\", shard_set->size()),\n            Shard(\"src10\", shard_set->size()));  // wake on same shard\n  EXPECT_NE(Shard(\"dest110\", shard_set->size()),\n            Shard(\"src1\", shard_set->size()));  // Trigger MoveTwoShards\n\n  auto f1 = pp_->at(1)->LaunchFiber([this]() {\n    Run(\"c1\", {\"blmove\", \"src1\", \"dest110\", \"LEFT\", \"RIGHT\", \"0\"});\n  });\n  auto f2 = pp_->at(1)->LaunchFiber([this]() {\n    Run(\"c2\", {\"blmove\", \"src10\", \"dest110\", \"LEFT\", \"RIGHT\", \"0\"});\n  });\n\n  ThisFiber::SleepFor(5ms);\n  Run({\"multi\"});\n  Run({\"rpush\", \"src1\", \"v1\"});\n  Run({\"rpush\", \"src10\", \"v2\"});\n  Run({\"exec\"});\n\n  f1.Join();\n  f2.Join();\n\n  auto res = Run({\"lrange\", \"dest110\", \"0\", \"-1\"});\n  EXPECT_THAT(res.GetVec(), UnorderedElementsAre(\"v1\", \"v2\"));\n}\n\n// Move key five times in rings 0 -> 1 -> 2 ... -> 0\nTEST_F(ListFamilyTest, BLMoveRings) {\n  vector<fb2::Fiber> fibers;\n  for (int j = 0; j < 5; j++) {\n    for (int i = 0; i < 10; i++) {\n      fibers.emplace_back(pp_->at(i % pp_->size())->LaunchFiber([i, j, this]() {\n        auto key1 = to_string(i);\n        auto key2 = to_string((i + 1) % 10);\n        Run(key1 + to_string(j), {\"blmove\", key1, key2, \"LEFT\", \"RIGHT\", \"0\"});\n      }));\n    }\n  }\n\n  ThisFiber::SleepFor(5ms);\n\n  Run({\"lpush\", \"0\", \"v1\"});\n  for (auto& fiber : fibers)\n    fiber.Join();\n\n  for (int i = 1; i < 10; i++)\n    EXPECT_THAT(Run({\"llen\", to_string(i)}), IntArg(0));\n  EXPECT_EQ(Run({\"lrange\", \"0\", \"0\", \"-1\"}), \"v1\");\n}\n\n// Move in waves where each wave layer has a fixed set of \"vertices\" through which all values travel\nTEST_F(ListFamilyTest, BLMoveWaves) {\n  static constexpr int kFlow = 64;\n  vector<int> wave_sizes = {1 /* 0:0 */, kFlow, kFlow / 2, kFlow / 4, kFlow / 8, kFlow / 3,\n                            kFlow / 5,   1,     kFlow / 6, kFlow,     kFlow / 4, 1};\n\n  vector<fb2::Fiber> fibers;\n  for (size_t i = 1; i < wave_sizes.size(); i++) {\n    for (size_t j = 0; j < kFlow; j++) {\n      fibers.emplace_back(pp_->at(i % 3)->LaunchFiber([i, j, wave_sizes, this]() {\n        auto src = to_string(i - 1) + \":\" + to_string(j / (kFlow / wave_sizes[i - 1]));\n        auto dest = to_string(i) + \":\" + to_string(j / (kFlow / wave_sizes[i]));\n        Run(\"c\" + to_string(i * kFlow + j), {\"blmove\", src, dest, \"LEFT\", \"RIGHT\", \"0\"});\n      }));\n    }\n  }\n\n  vector<string> values(kFlow);\n  for (size_t i = 0; i < kFlow; i++)\n    values[i] = \"v\" + to_string(i);\n\n  Run({\"multi\"});\n  for (size_t i = 0; i < kFlow; i++)\n    Run({\"lpush\", \"0:0\", values[i]});\n  Run({\"exec\"});\n\n  for (auto& fiber : fibers)\n    fiber.Join();\n\n  auto res = Run({\"lrange\", to_string(wave_sizes.size() - 1) + \":0\", \"0\", \"-1\"});\n  EXPECT_THAT(res.GetVec(), UnorderedElementsAreArray(values));\n}\n\n// Move value back and forth between two lists, verfiy that atomic lookup of states catches it only\n// in one of two possible states\nTEST_F(ListFamilyTest, BLMovePendulum) {\n  GTEST_SKIP() << \"Blocking commands don't respect transactional ordering after waking up\";\n  // Suppose BLMOVE A -> B is running, then MULTI LLEN A LLEN B EXEC will\n  // 1. Run on shard B because it doesn't have \"blocking\" keys freely, so LLEN B = 0\n  // 2. Will run on shard A after BLMOVE A removed itself from the \"awakened\" set, so LLEN A = 0\n  // => we observe a theoretically impossible state and the execution order is not linearizable\n\n  vector<fb2::Fiber> fibers;\n\n  atomic_bool stopped = false;\n  auto swing = [this, &stopped](int i, string src, string dest) {\n    while (!stopped.load(std::memory_order_relaxed))\n      Run(src + dest + to_string(i), {\"blmove\", src, dest, \"LEFT\", \"RIGHT\", \"0\"});\n  };\n\n  for (int i = 0; i < 3; i++)\n    fibers.emplace_back(pp_->at(i % pp_->size())->LaunchFiber([=]() { swing(i, \"A\", \"B\"); }));\n\n  for (int i = 0; i < 3; i++)\n    fibers.emplace_back(pp_->at(i % pp_->size())->LaunchFiber([=]() { swing(i, \"B\", \"A\"); }));\n\n  Run({\"lpush\", \"A\", \"v\"});\n  ThisFiber::SleepFor(1ms);\n\n  for (int i = 0; i < 100; i++) {\n    Run({\"multi\"});\n    Run({\"llen\", \"A\"});\n    Run({\"llen\", \"B\"});\n    auto res = Run({\"EXEC\"});\n    int i1 = *res.GetVec()[0].GetInt();\n    int i2 = *res.GetVec()[1].GetInt();\n    ASSERT_EQ(i1 + i2, 1);\n  }\n\n  stopped = true;\n  Run({\"lpush\", \"A\", \"stop\"});\n  Run({\"lpush\", \"B\", \"stop\"});\n  for (auto& fiber : fibers)\n    fiber.Join();\n\n  int i1 = *Run({\"llen\", \"A\"}).GetInt();\n  int i2 = *Run({\"llen\", \"B\"}).GetInt();\n  ASSERT_EQ(i1 + i2, 3);  // v, stop, stop\n}\n\nTEST_F(ListFamilyTest, LPushX) {\n  // No push for 'lpushx' on nonexisting key.\n  EXPECT_THAT(Run({\"lpushx\", kKey1, \"val1\"}), IntArg(0));\n  EXPECT_THAT(Run({\"llen\", kKey1}), IntArg(0));\n\n  EXPECT_THAT(Run({\"lpush\", kKey1, \"val1\"}), IntArg(1));\n  EXPECT_THAT(Run({\"lrange\", kKey1, \"0\", \"-1\"}), \"val1\");\n\n  EXPECT_THAT(Run({\"lpushx\", kKey1, \"val2\"}), IntArg(2));\n  EXPECT_THAT(Run({\"lrange\", kKey1, \"0\", \"-1\"}).GetVec(), ElementsAre(\"val2\", \"val1\"));\n}\n\nTEST_F(ListFamilyTest, RPushX) {\n  // No push for 'rpushx' on nonexisting key.\n  EXPECT_THAT(Run({\"rpushx\", kKey1, \"val1\"}), IntArg(0));\n  EXPECT_THAT(Run({\"llen\", kKey1}), IntArg(0));\n\n  EXPECT_THAT(Run({\"rpush\", kKey1, \"val1\"}), IntArg(1));\n  EXPECT_THAT(Run({\"lrange\", kKey1, \"0\", \"-1\"}), \"val1\");\n\n  EXPECT_THAT(Run({\"rpushx\", kKey1, \"val2\"}), IntArg(2));\n  EXPECT_THAT(Run({\"lrange\", kKey1, \"0\", \"-1\"}).GetVec(), ElementsAre(\"val1\", \"val2\"));\n}\n\nTEST_F(ListFamilyTest, LInsert) {\n  // List not found.\n  EXPECT_THAT(Run({\"linsert\", \"notfound\", \"before\", \"foo\", \"bar\"}), IntArg(0));\n\n  // Key is not a list.\n  Run({\"set\", \"notalist\", \"x\"});\n  EXPECT_THAT(Run({\"linsert\", \"notalist\", \"before\", \"foo\", \"bar\"}),\n              ErrArg(\"Operation against a key holding the wrong kind of value\"));\n\n  // Insert before.\n  Run({\"rpush\", \"mylist\", \"foo\"});\n  EXPECT_THAT(Run({\"linsert\", \"mylist\", \"before\", \"foo\", \"bar\"}), IntArg(2));\n  auto resp = Run({\"lrange\", \"mylist\", \"0\", \"1\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"bar\", \"foo\"));\n\n  // Insert after.\n  EXPECT_THAT(Run({\"linsert\", \"mylist\", \"after\", \"foo\", \"car\"}), IntArg(3));\n  resp = Run({\"lrange\", \"mylist\", \"0\", \"2\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"bar\", \"foo\", \"car\"));\n\n  // Insert before, pivot not found.\n  EXPECT_THAT(Run({\"linsert\", \"mylist\", \"before\", \"notfound\", \"x\"}), IntArg(-1));\n\n  // Insert after, pivot not found.\n  EXPECT_THAT(Run({\"linsert\", \"mylist\", \"after\", \"notfound\", \"x\"}), IntArg(-1));\n\n  // insert empty\n  Run({\"rpush\", \"k\", \"a\"});\n  Run({\"linsert\", \"k\", \"before\", \"a\", \"\"});\n  resp = Run({\"lpop\", \"k\"});\n  EXPECT_EQ(resp, \"\");\n  resp = Run({\"linsert\", \"k\", \"before\", \"\", \"\"});\n  EXPECT_THAT(resp, IntArg(-1));\n}\n\nTEST_F(ListFamilyTest, BLPopUnwakesInScript) {\n  const string_view SCRIPT = R\"(\n    for i = 1, 1000 do\n      redis.call('MGET', 'a', 'b', 'c', 'd')\n      redis.call('LPUSH', 'l', tostring(i))\n    end\n  )\";\n\n  // Start blpop with without timeout\n  auto f1 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&]() {\n    auto resp = Run(\"blpop\", {\"BLPOP\", \"l\", \"0\"});\n    // blpop should only be awakened after the script has completed, so the\n    // last element added in the script should be returned.\n    EXPECT_THAT(resp, ArgType(RespExpr::ARRAY));\n    EXPECT_THAT(resp.GetVec(), ElementsAre(\"l\", \"1000\"));\n  });\n\n  // Start long running script that intends to wake up blpop\n  auto f2 = pp_->at(2)->LaunchFiber([&] {\n    Run(\"script\", {\"EVAL\", SCRIPT, \"5\", \"a\", \"b\", \"c\", \"d\", \"l\"});\n  });\n\n  // Run blpop that times out\n  auto resp = Run({\"blpop\", \"g\", \"0.01\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));\n\n  f1.Join();\n  f2.Join();\n}\n\nTEST_F(ListFamilyTest, OtherMultiWakesBLpop) {\n  const string_view SCRIPT = R\"(\n    redis.call('LPUSH', 'l', 'bad')\n    for i = 1, 1000 do\n      redis.call('MGET', 'a', 'b', 'c', 'd')\n    end\n    redis.call('LPUSH', 'l', 'good')\n  )\";\n\n  const string_view SCRIPT_SHORT = R\"(\n    redis.call('GET', KEYS[1])\n  )\";\n\n  // Start BLPOP with infinite timeout\n  auto f1 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n    auto resp = Run(\"blpop\", {\"BLPOP\", \"l\", \"0\"});\n    // blpop should only be awakened after the script has completed, so the\n    // last element added in the script should be returned.\n    EXPECT_THAT(resp, ArgType(RespExpr::ARRAY));\n    EXPECT_THAT(resp.GetVec(), ElementsAre(\"l\", \"good\"));\n  });\n\n  // Start long running script that accesses the list, but should wake up blpop only after it\n  // finished\n  auto f2 = pp_->at(2)->LaunchFiber(Launch::dispatch, [&] {\n    Run(\"script\", {\"EVAL\", SCRIPT, \"5\", \"a\", \"b\", \"c\", \"d\", \"l\"});\n  });\n\n  // Run quick multi transaction that concludes after one hop\n  Run({\"EVAL\", SCRIPT_SHORT, \"1\", \"y\"});\n\n  f1.Join();\n  f2.Join();\n}\n\nTEST_F(ListFamilyTest, ContendExpire) {\n  vector<fb2::Fiber> blpop_fibers;\n  for (unsigned i = 0; i < num_threads_; ++i) {\n    for (unsigned j = 0; j < 30; ++j) {\n      blpop_fibers.emplace_back(pp_->at(i)->LaunchFiber(Launch::post, [&, i, j] {\n        string keys[2] = {\"key0\", \"key1\"};\n        thread_local unsigned cur = 0;\n        for (unsigned n = 0; n < 30; n++) {\n          string k = keys[cur];\n          cur ^= 1;\n          Run(StrCat(\"push\", i, \"_\", j), {\"lpush\", k, \"foo\"});\n          Run(StrCat(\"blpop\", i, \"_\", j), {\"blpop\", keys[cur], \"a\", \"0.001\"});\n        }\n      }));\n    }\n  }\n\n  for (auto& f : blpop_fibers) {\n    f.Join();\n  }\n}\n\nTEST_F(ListFamilyTest, LMPopInvalidSyntax) {\n  // Not enough arguments\n  auto resp = Run({\"lmpop\", \"1\", \"a\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  // Zero keys\n  resp = Run({\"lmpop\", \"0\", \"LEFT\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input key is needed\"));\n\n  // Number of keys is not uint\n  resp = Run({\"lmpop\", \"aa\", \"a\", \"LEFT\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  // Missing LEFT/RIGHT\n  resp = Run({\"lmpop\", \"1\", \"a\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Wrong number of keys\n  resp = Run({\"lmpop\", \"1\", \"a\", \"b\", \"LEFT\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // COUNT without number\n  resp = Run({\"lmpop\", \"1\", \"a\", \"LEFT\", \"COUNT\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // COUNT is not uint\n  resp = Run({\"lmpop\", \"1\", \"a\", \"LEFT\", \"COUNT\", \"boo\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  // Too many arguments\n  resp = Run({\"lmpop\", \"1\", \"c\", \"LEFT\", \"COUNT\", \"2\", \"foo\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n}\n\nTEST_F(ListFamilyTest, LMPop) {\n  // All lists are empty\n  auto resp = Run({\"lmpop\", \"1\", \"e\", \"LEFT\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  // LEFT operation\n  resp = Run({\"lpush\", \"a\", \"a1\", \"a2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"lmpop\", \"1\", \"a\", \"LEFT\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"a\", RespArray(ElementsAre(\"a2\")))));\n\n  // RIGHT operation\n  resp = Run({\"lpush\", \"b\", \"b1\", \"b2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"lmpop\", \"1\", \"b\", \"RIGHT\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"b\", RespArray(ElementsAre(\"b1\")))));\n\n  // COUNT > 1\n  resp = Run({\"lpush\", \"c\", \"c1\", \"c2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"lmpop\", \"1\", \"c\", \"RIGHT\", \"COUNT\", \"2\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"c\", RespArray(ElementsAre(\"c1\", \"c2\")))));\n\n  resp = Run({\"llen\", \"c\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // COUNT > number of elements in list\n  resp = Run({\"lpush\", \"d\", \"d1\", \"d2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"lmpop\", \"1\", \"d\", \"RIGHT\", \"COUNT\", \"3\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"d\", RespArray(ElementsAre(\"d1\", \"d2\")))));\n\n  resp = Run({\"llen\", \"d\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // First non-empty list is not the first list\n  resp = Run({\"lpush\", \"x\", \"x1\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"lpush\", \"y\", \"y1\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"lmpop\", \"3\", \"empty\", \"x\", \"y\", \"RIGHT\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"x\", RespArray(ElementsAre(\"x1\")))));\n\n  resp = Run({\"llen\", \"x\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(ListFamilyTest, LMPopMultipleElements) {\n  // Test removing multiple elements from left end\n  Run({\"rpush\", \"list1\", \"a\", \"b\", \"c\", \"d\", \"e\"});\n  auto resp = Run({\"lmpop\", \"1\", \"list1\", \"LEFT\", \"COUNT\", \"3\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"list1\", RespArray(ElementsAre(\"a\", \"b\", \"c\")))));\n\n  resp = Run({\"lrange\", \"list1\", \"0\", \"-1\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"d\", \"e\"));\n\n  // Test removing multiple elements from right end\n  Run({\"rpush\", \"list2\", \"v\", \"w\", \"x\", \"y\", \"z\"});\n  resp = Run({\"lmpop\", \"1\", \"list2\", \"RIGHT\", \"COUNT\", \"2\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"list2\", RespArray(ElementsAre(\"z\", \"y\")))));\n\n  resp = Run({\"lrange\", \"list2\", \"0\", \"-1\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"v\", \"w\", \"x\"));\n}\n\nTEST_F(ListFamilyTest, LMPopMultipleLists) {\n  // Test finding first non-empty list\n  Run({\"rpush\", \"list1\", \"a\", \"b\"});\n  Run({\"rpush\", \"list2\", \"c\", \"d\"});\n  Run({\"rpush\", \"list3\", \"e\", \"f\"});\n\n  // Pop from first non-empty list\n  auto resp = Run({\"lmpop\", \"3\", \"list1\", \"list2\", \"list3\", \"LEFT\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"list1\", RespArray(ElementsAre(\"a\")))));\n\n  // Pop from second list after first becomes empty\n  Run({\"lmpop\", \"1\", \"list1\", \"LEFT\"});  // Empty list1\n  resp = Run({\"lmpop\", \"3\", \"list1\", \"list2\", \"list3\", \"RIGHT\", \"COUNT\", \"2\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"list2\", RespArray(ElementsAre(\"d\", \"c\")))));\n\n  // Verify third list remains untouched\n  resp = Run({\"lrange\", \"list3\", \"0\", \"-1\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"e\", \"f\"));\n}\n\nTEST_F(ListFamilyTest, LMPopEdgeCases) {\n  // Test with empty list\n  Run({\"rpush\", \"empty_list\", \"a\"});\n  Run({\"lpop\", \"empty_list\"});\n  auto resp = Run({\"lmpop\", \"1\", \"empty_list\", \"LEFT\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  // Test with non-existent list\n  resp = Run({\"lmpop\", \"1\", \"nonexistent\", \"LEFT\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  // Test with wrong type key\n  Run({\"set\", \"string_key\", \"value\"});\n  resp = Run({\"lmpop\", \"1\", \"string_key\", \"LEFT\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONGTYPE Operation against a key holding the wrong kind of value\"));\n\n  // Test without COUNT parameter - should return 1 element by default\n  Run({\"rpush\", \"list\", \"a\", \"b\"});\n  resp = Run({\"lmpop\", \"1\", \"list\", \"LEFT\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\n                  \"list\", RespArray(ElementsAre(\"a\")))));  // Should return 1 element by default\n\n  // Test with COUNT = 0 - should return error\n  resp = Run({\"lmpop\", \"1\", \"list\", \"LEFT\", \"COUNT\", \"0\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"list\", RespArray(ElementsAre()))));\n\n  // Test with negative COUNT - should return error\n  resp = Run({\"lmpop\", \"1\", \"list\", \"LEFT\", \"COUNT\", \"-1\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n}\n\nTEST_F(ListFamilyTest, LMPopDocExample) {\n  // Try to pop from non-existing lists\n  auto resp = Run({\"LMPOP\", \"2\", \"non1\", \"non2\", \"LEFT\", \"COUNT\", \"10\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  // Create first list and test basic pop\n  resp = Run({\"LPUSH\", \"mylist\", \"one\", \"two\", \"three\", \"four\", \"five\"});\n  EXPECT_THAT(resp, IntArg(5));\n\n  resp = Run({\"LMPOP\", \"1\", \"mylist\", \"LEFT\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"mylist\", RespArray(ElementsAre(\"five\")))));\n\n  resp = Run({\"LRANGE\", \"mylist\", \"0\", \"-1\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"four\", \"three\", \"two\", \"one\"));\n\n  // Test RIGHT pop with COUNT\n  resp = Run({\"LMPOP\", \"1\", \"mylist\", \"RIGHT\", \"COUNT\", \"10\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"mylist\",\n                                          RespArray(ElementsAre(\"one\", \"two\", \"three\", \"four\")))));\n\n  // Create two lists and test multi-key pop\n  resp = Run({\"LPUSH\", \"mylist\", \"one\", \"two\", \"three\", \"four\", \"five\"});\n  EXPECT_THAT(resp, IntArg(5));\n\n  resp = Run({\"LPUSH\", \"mylist2\", \"a\", \"b\", \"c\", \"d\", \"e\"});\n  EXPECT_THAT(resp, IntArg(5));\n\n  resp = Run({\"LMPOP\", \"2\", \"mylist\", \"mylist2\", \"RIGHT\", \"COUNT\", \"3\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\"mylist\", RespArray(ElementsAre(\"one\", \"two\", \"three\")))));\n\n  resp = Run({\"LRANGE\", \"mylist\", \"0\", \"-1\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"five\", \"four\"));\n\n  resp = Run({\"LMPOP\", \"2\", \"mylist\", \"mylist2\", \"RIGHT\", \"COUNT\", \"5\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"mylist\", RespArray(ElementsAre(\"four\", \"five\")))));\n\n  resp = Run({\"LMPOP\", \"2\", \"mylist\", \"mylist2\", \"RIGHT\", \"COUNT\", \"10\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\"mylist2\", RespArray(ElementsAre(\"a\", \"b\", \"c\", \"d\", \"e\")))));\n\n  // Verify both lists are now empty\n  resp = Run({\"EXISTS\", \"mylist\", \"mylist2\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(ListFamilyTest, LMPopWrongType) {\n  // Setup: create a list and a hash\n  Run({\"lpush\", \"l1\", \"e1\"});\n  Run({\"hset\", \"foo\", \"k1\", \"v1\"});\n\n  // Test: first key is wrong type\n  auto resp = Run({\"lmpop\", \"2\", \"foo\", \"l1\", \"left\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONGTYPE Operation against a key holding the wrong kind of value\"));\n\n  // Test: second key is wrong type but first doesn't exist\n  resp = Run({\"lmpop\", \"2\", \"nonexistent\", \"foo\", \"left\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONGTYPE Operation against a key holding the wrong kind of value\"));\n\n  // Test: second key is wrong type but first is a valid list\n  resp = Run({\"lmpop\", \"2\", \"l1\", \"foo\", \"left\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"l1\", RespArray(ElementsAre(\"e1\")))));\n}\n\n// Blocking command wakeup is complicated by running multi transaction at the same time\nTEST_F(ListFamilyTest, AwakeMulti) {\n  auto f1 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n    for (unsigned i = 0; i < 100; ++i) {\n      Run(\"CONSUMER\", {\"blmove\", \"src\", \"dest\", \"LEFT\", \"LEFT\", \"0\"});\n    };\n  });\n  auto f2 = pp_->at(1)->LaunchFiber([&] {\n    for (unsigned i = 0; i < 100; ++i) {\n      Run(\"PROD\", {\"lpush\", \"src\", \"a\"});\n      ThisFiber::SleepFor(50us);\n    };\n  });\n\n  auto f3 = pp_->at(2)->LaunchFiber([&] {\n    for (unsigned i = 0; i < 100; ++i) {\n      Run({\"multi\"});\n      for (unsigned j = 0; j < 8; ++j) {\n        Run({\"get\", StrCat(\"key\", j)});\n      };\n      Run({\"exec\"});\n    };\n  });\n\n  f1.Join();\n  f2.Join();\n  f3.Join();\n}\n\nTEST_F(ListFamilyTest, PressureBLMove) {\n#ifndef NDEBUG\n  GTEST_SKIP() << \"Requires release build to reproduce\";\n#endif\n\n  auto consumer = [this](string_view id, string_view src, string_view dest) {\n    for (unsigned i = 0; i < 1000; ++i) {\n      Run(id, {\"blmove\", src, dest, \"LEFT\", \"LEFT\", \"0\"});\n    };\n  };\n  auto producer = [this](string_view id, size_t delay, string_view src) {\n    for (unsigned i = 0; i < 1000; ++i) {\n      Run(id, {\"lpush\", src, \"a\"});\n      ThisFiber::SleepFor(1us * delay);\n    }\n  };\n\n  for (size_t delay : {1, 2, 5}) {\n    LOG(INFO) << \"Running with delay: \" << delay;\n    auto f1 = pp_->at(1)->LaunchFiber([=] { consumer(\"c1\", \"src\", \"dest\"); });\n    auto f2 = pp_->at(1)->LaunchFiber([=] { producer(\"p1\", delay, \"src\"); });\n\n    f1.Join();\n    f2.Join();\n  }\n}\n\nTEST_F(ListFamilyTest, AwakeDb1) {\n  const char* kDbId = \"1\";\n\n  auto f1 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n    Run(\"C\", {\"SELECT\", kDbId});\n    Run(\"C\", {\"brpoplpush\", \"x\", \"y\", \"0\"});\n    ASSERT_EQ(GetDebugInfo(\"C\").shards_count, 1);\n  });\n  Run({\"SELECT\", kDbId});\n  Run({\"EVAL\", \"redis.call('LPUSH', KEYS[1], 'val'); return 1;\", \"1\", \"x\"});\n  f1.Join();\n}\n\n#pragma GCC diagnostic pop\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/main_service.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/main_service.h\"\n\n#include \"absl/strings/str_split.h\"\n#include \"facade/resp_expr.h\"\n#include \"util/fibers/detail/fiber_interface.h\"\n#include \"util/fibers/proactor_base.h\"\n#include \"util/fibers/synchronization.h\"\n\n#ifdef __FreeBSD__\n#include <pthread_np.h>\n#elif defined(__linux__)\n#include \"util/fibers/uring_proactor.h\"\n#endif\n\nextern \"C\" {\n#include \"redis/redis_aux.h\"\n}\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/functional/bind_front.h>\n#include <absl/strings/ascii.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_format.h>\n#include <xxhash.h>\n\n#include <csignal>\n#include <filesystem>\n\n#include \"base/cycle_clock.h\"\n#include \"base/flag_utils.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/search/vector_utils.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/dragonfly_listener.h\"\n#include \"facade/error.h\"\n#include \"facade/reply_builder.h\"\n#include \"facade/reply_capture.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/acl/acl_family.h\"\n#include \"server/acl/user_registry.h\"\n#include \"server/acl/validator.h\"\n#include \"server/channel_store.h\"\n#include \"server/cluster/cluster_family.h\"\n#include \"server/command_families.h\"\n#include \"server/dflycmd.h\"\n#include \"server/error.h\"\n#include \"server/generic_family.h\"\n#include \"server/hset_family.h\"\n#include \"server/http_api.h\"\n#include \"server/multi_command_squasher.h\"\n#include \"server/namespaces.h\"\n#include \"server/script_mgr.h\"\n#include \"server/search/search_family.h\"\n#include \"server/server_state.h\"\n#include \"server/set_family.h\"\n#include \"server/sharding.h\"\n#include \"server/stream_family.h\"\n#include \"server/tiered_storage.h\"\n#include \"server/transaction.h\"\n#include \"server/version.h\"\n#include \"server/zset_family.h\"\n#include \"strings/human_readable.h\"\n#include \"util/html/sorted_table.h\"\n#include \"util/varz.h\"\n\nusing namespace std;\nusing facade::ErrorReply;\n\nABSL_FLAG(int32_t, port, 6379,\n          \"Redis port. 0 disables the port, -1 will bind on a random available port.\");\n\nABSL_FLAG(uint16_t, announce_port, 0,\n          \"Port that Dragonfly announces to cluster clients and replication master\");\n\nABSL_FLAG(uint32_t, memcached_port, 0, \"Memcached port\");\n\nABSL_FLAG(uint32_t, num_shards, 0, \"Number of database shards, 0 - to choose automatically\");\n\nABSL_FLAG(bool, multi_exec_squash, true,\n          \"Whether multi exec will squash single shard commands to optimize performance\");\n\nABSL_FLAG(bool, lua_resp2_legacy_float, false,\n          \"Return rounded down integers instead of floats for lua scripts with RESP2\");\nABSL_FLAG(uint32_t, multi_eval_squash_buffer, 4096, \"Max buffer for squashed commands per script\");\n\nABSL_DECLARE_FLAG(bool, primary_port_http_enabled);\nABSL_FLAG(bool, admin_nopass, false,\n          \"If set, would enable open admin access to console on the assigned port, without \"\n          \"authorization needed.\");\n\nABSL_FLAG(bool, expose_http_api, false,\n          \"If set, will expose a POST /api handler for sending redis commands as json array.\");\n\nABSL_FLAG(strings::MemoryBytesFlag, maxmemory, strings::MemoryBytesFlag{},\n          \"Limit on maximum-memory that is used by the database, until data starts to be evicted \"\n          \"(according to eviction policy). With tiering, this value defines only the size in RAM, \"\n          \"and not the whole dataset (RAM + SSD). \"\n          \"Must be *at least* 256MiB per proactor thread. \"\n          \"Can be any human‑readable bytes values (supports K/M/G/T/P/E with optional B, \"\n          \"case‑insensitive, both 'GiB' & 'GB' possible). Examples: 300000000, 512MB, 2G, 1.25GiB. \"\n          \"0 - value will be automatically defined based on the env (ex: machine's capacity). \"\n          \"default: 0\");\n\nABSL_RETIRED_FLAG(\n    double, oom_deny_ratio, 1.1,\n    \"commands with flag denyoom will return OOM when the ratio between maxmemory and used \"\n    \"memory is above this value\");\n\nABSL_FLAG(uint32_t, shard_thread_busy_polling_usec, 0,\n          \"If non-zero, overrides the busy polling parameter for shard threads.\");\n\nABSL_FLAG(string, huffman_table, \"\",\n          \"a comma separated map: domain1:code1,domain2:code2,... where \"\n          \"domain can currently be only KEYS or STRINGS, code is a base64-encoded huffman table\"\n          \" exported via \"\n          \"DEBUG COMPRESSION EXPORT. if the flag is empty no huffman compression is applied.\");\n\nABSL_FLAG(bool, jsonpathv2, true,\n          \"If true uses Dragonfly jsonpath implementation, \"\n          \"otherwise uses legacy jsoncons implementation.\");\n\nABSL_FLAG(uint32_t, scheduler_background_budget, 50'000, \"Background fiber budget in nanoseconds\");\nABSL_FLAG(uint32_t, scheduler_background_sleep_prob, 50,\n          \"Sleep probability of background fibers on reaching budget\");\nABSL_FLAG(uint32_t, scheduler_background_warrant, 5,\n          \"Percentage of guaranteed cpu time for background fibers\");\n\nABSL_FLAG(uint32_t, squash_stats_latency_lower_limit, 0,\n          \"If set, will not track latency stats below this threshold (usec). \");\n\nnamespace {\n\nstruct ShutdownWatchdog {\n  util::fb2::Fiber watchdog_fb;\n  util::fb2::Done watchdog_done;\n  util::ProactorPool& pool;\n\n  explicit ShutdownWatchdog(util::ProactorPool& pp);\n  void Disarm();\n};\n\nShutdownWatchdog::ShutdownWatchdog(util::ProactorPool& pp) : pool{pp} {\n  watchdog_fb = pool.GetNextProactor()->LaunchFiber(\"shutdown_watchdog\", [&] {\n    if (!watchdog_done.WaitFor(20s)) {\n      LOG(ERROR) << \"Deadlock detected during shutdown\";\n      absl::SetFlag(&FLAGS_alsologtostderr, true);\n      util::fb2::Mutex m;\n      pool.AwaitFiberOnAll([&m](unsigned index, auto*) {\n        util::ThisFiber::SetName(absl::StrFormat(\"print_stack_fib_%u\", index));\n        std::unique_lock lk(m);\n        LOG(ERROR) << \"Proactor \" << index << \":\\n\";\n        util::fb2::detail::FiberInterface::PrintAllFiberStackTraces();\n      });\n    }\n  });\n}\n\nvoid ShutdownWatchdog::Disarm() {\n  watchdog_done.Notify();\n  watchdog_fb.JoinIfNeeded();\n}\n\nstd::optional<ShutdownWatchdog> shutdown_watchdog = std::nullopt;\n\n}  // namespace\n\nnamespace dfly {\n\n#if defined(__linux__)\n#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 30\n#include <sys/syscall.h>\n#define gettid() syscall(SYS_gettid)\n#endif\n\n#elif defined(__FreeBSD__)\n\n#define gettid() pthread_getthreadid_np()\n\n#elif defined(__APPLE__)\n\ninline unsigned gettid() {\n  uint64_t tid;\n  pthread_threadid_np(NULL, &tid);\n  return tid;\n}\n\n#endif\n\nusing namespace util;\nusing absl::GetFlag;\nusing absl::StrCat;\nusing base::VarzValue;\nusing ::boost::intrusive_ptr;\nusing namespace facade;\nnamespace h2 = boost::beast::http;\n\nnamespace {\n\nstd::optional<VarzFunction> engine_varz;\n\nconstexpr size_t kMaxThreadSize = 1024;\n\n// Unwatch all keys for a connection and unregister from DbSlices.\n// Used by UNWATCH, DICARD and EXEC.\nvoid UnwatchAllKeys(Namespace* ns, ConnectionState::ExecInfo* exec_info) {\n  if (!exec_info->watched_keys.empty()) {\n    auto cb = [&](EngineShard* shard) {\n      ns->GetDbSlice(shard->shard_id())\n          .UnregisterConnectionWatches(exec_info->watched_keys, &exec_info->watched_dirty);\n    };\n    shard_set->RunBriefInParallel(std::move(cb));\n  }\n  exec_info->ClearWatched();\n}\n\nvoid MultiCleanup(ConnectionContext* cntx) {\n  auto& exec_info = cntx->conn_state.exec_info;\n  if (auto* borrowed = exec_info.preborrowed_interpreter; borrowed) {\n    ServerState::tlocal()->ReturnInterpreter(borrowed);\n    exec_info.preborrowed_interpreter = nullptr;\n  }\n  UnwatchAllKeys(cntx->ns, &exec_info);\n  exec_info.Clear();\n}\n\nvoid DeactivateMonitoring(ConnectionContext* server_ctx) {\n  if (server_ctx->monitor) {\n    // remove monitor on this connection\n    server_ctx->ChangeMonitor(false /*start*/);\n  }\n}\n\n// The format of the message that are sending is\n// +\"time of day\" [db-number <lua|unix:path|connection info] \"command\" \"arg1\" .. \"argM\"\nstd::string CreateMonitorTimestamp() {\n  timeval tv;\n\n  gettimeofday(&tv, nullptr);\n  return absl::StrCat(tv.tv_sec, \".\", tv.tv_usec, absl::kZeroPad6);\n}\n\nauto CmdEntryToMonitorFormat(std::string_view str) -> std::string {\n  // This code is based on Redis impl for it at sdscatrepr@sds.c\n  std::string result = absl::StrCat(\"\\\"\");\n\n  for (auto c : str) {\n    switch (c) {\n      case '\\\\':\n        absl::StrAppend(&result, \"\\\\\\\\\");\n        break;\n      case '\"':\n        absl::StrAppend(&result, \"\\\\\\\"\");\n        break;\n      case '\\n':\n        absl::StrAppend(&result, \"\\\\n\");\n        break;\n      case '\\r':\n        absl::StrAppend(&result, \"\\\\r\");\n        break;\n      case '\\t':\n        absl::StrAppend(&result, \"\\\\t\");\n        break;\n      case '\\a':\n        absl::StrAppend(&result, \"\\\\a\");\n        break;\n      case '\\b':\n        absl::StrAppend(&result, \"\\\\b\");\n        break;\n      default:\n        if (isprint(c)) {\n          result += c;\n        } else {\n          absl::StrAppendFormat(&result, \"\\\\x%02x\", c);\n        }\n        break;\n    }\n  }\n  absl::StrAppend(&result, \"\\\"\");\n  return result;\n}\n\nstd::string MakeMonitorMessage(const ConnectionContext* cntx, const CommandId* cid,\n                               CmdArgList tail_args) {\n  std::string message = absl::StrCat(CreateMonitorTimestamp(), \" [\", cntx->conn_state.db_index);\n\n  string endpoint;\n  if (cntx->conn_state.script_info) {\n    endpoint = \"lua\";\n  } else if (const auto* conn = cntx->conn(); conn != nullptr) {\n    endpoint = conn->RemoteEndpointStr();\n  } else {\n    endpoint = \"REPLICATION:0\";\n  }\n  absl::StrAppend(&message, \" \", endpoint, \"] \");\n\n  absl::StrAppend(&message, \"\\\"\", cid->name(), \"\\\"\");\n\n  if (cid->name() == \"AUTH\")\n    return message;\n\n  for (auto arg : tail_args)\n    absl::StrAppend(&message, \" \", CmdEntryToMonitorFormat(facade::ToSV(arg)));\n\n  return message;\n}\n\nvoid DispatchMonitor(ConnectionContext* cntx, const CommandId* cid, CmdArgList tail_args) {\n  auto cb = [msg = MakeMonitorMessage(cntx, cid, tail_args)](unsigned idx, util::ProactorBase*) {\n    const auto& monitors = ServerState::tlocal()->Monitors().monitors();\n    if (monitors.empty())\n      return;\n\n    VLOG(2) << \"Sending command '\" << msg << \"' from \" << ProactorBase::me()->GetPoolIndex()\n            << \" to \" << monitors.size() << \" monitors\";\n    for (auto monitor_conn : monitors)\n      monitor_conn->SendMonitorMessageAsync(msg);\n  };\n  shard_set->pool()->DispatchBrief(std::move(cb));\n}\n\nclass InterpreterReplier : public RedisReplyBuilder {\n public:\n  explicit InterpreterReplier(ObjectExplorer* explr) : RedisReplyBuilder(nullptr), explr_(explr) {\n  }\n\n  void SendError(std::string_view str, std::string_view type) final;\n\n  void SendBulkString(std::string_view str) final;\n  void SendSimpleString(std::string_view str) final;\n\n  void SendNullArray() final;\n  void SendNull() final;\n  void SendLong(long val) final;\n  void SendDouble(double val) final;\n\n  void StartCollection(unsigned len, CollectionType type) final;\n\n private:\n  void PostItem();\n\n  ObjectExplorer* explr_;\n  vector<pair<unsigned, unsigned>> array_len_;\n  unsigned num_elems_ = 0;\n};\n\n// Serialized result of script invocation to Redis protocol\nclass EvalSerializer : public ObjectExplorer {\n public:\n  explicit EvalSerializer(RedisReplyBuilder* rb, bool float_as_int)\n      : rb_(rb), float_as_int_(float_as_int) {\n  }\n\n  void OnBool(bool b) final {\n    if (b) {\n      rb_->SendLong(1);\n    } else {\n      rb_->SendNull();\n    }\n  }\n\n  void OnString(string_view str) final {\n    rb_->SendBulkString(str);\n  }\n\n  void OnDouble(double d) final {\n    if (float_as_int_ || GetFlag(FLAGS_lua_resp2_legacy_float)) {\n      const long val = d >= 0 ? static_cast<long>(floor(d)) : static_cast<long>(ceil(d));\n      rb_->SendLong(val);\n    } else {\n      rb_->SendDouble(d);\n    }\n  }\n\n  void OnInt(int64_t val) final {\n    rb_->SendLong(val);\n  }\n\n  void OnArrayStart(unsigned len) final {\n    rb_->StartArray(len);\n  }\n\n  void OnArrayEnd() final {\n  }\n\n  void OnMapStart(unsigned len) final {\n    rb_->StartCollection(len, CollectionType::MAP);\n  }\n\n  void OnMapEnd() final {\n  }\n\n  void OnNil() final {\n    rb_->SendNull();\n  }\n\n  void OnStatus(string_view str) {\n    rb_->SendSimpleString(str);\n  }\n\n  void OnError(string_view str) {\n    if (!str.empty() && str.front() != '-') {\n      rb_->SendError(absl::StrCat(\"-\", str));\n    } else {\n      rb_->SendError(str);\n    }\n  }\n\n private:\n  RedisReplyBuilder* rb_;\n  bool float_as_int_;\n};\n\nvoid InterpreterReplier::PostItem() {\n  if (array_len_.empty()) {\n    DCHECK_EQ(0u, num_elems_);\n    ++num_elems_;\n  } else {\n    ++num_elems_;\n\n    while (num_elems_ == array_len_.back().second) {\n      num_elems_ = array_len_.back().first;\n      explr_->OnArrayEnd();\n\n      array_len_.pop_back();\n      if (array_len_.empty())\n        break;\n    }\n  }\n}\n\nvoid InterpreterReplier::SendError(string_view str, std::string_view type) {\n  DCHECK(array_len_.empty());\n  DVLOG(1) << \"Lua/df_call error \" << str;\n  if (!str.empty() && str.front() != '-') {\n    explr_->OnError(absl::StrCat(\"-ERR \", str));\n  } else {\n    explr_->OnError(str);\n  }\n}\n\nvoid InterpreterReplier::SendSimpleString(string_view str) {\n  if (array_len_.empty())\n    explr_->OnStatus(str);\n  else\n    explr_->OnString(str);\n  PostItem();\n}\n\nvoid InterpreterReplier::SendNullArray() {\n  SendSimpleStrArr(ArgSlice{});\n  PostItem();\n}\n\nvoid InterpreterReplier::SendNull() {\n  explr_->OnNil();\n  PostItem();\n}\n\nvoid InterpreterReplier::SendLong(long val) {\n  explr_->OnInt(val);\n  PostItem();\n}\n\nvoid InterpreterReplier::SendDouble(double val) {\n  explr_->OnDouble(val);\n  PostItem();\n}\n\nvoid InterpreterReplier::SendBulkString(string_view str) {\n  explr_->OnString(str);\n  PostItem();\n}\n\nvoid InterpreterReplier::StartCollection(unsigned len, CollectionType type) {\n  if (type == CollectionType::MAP)\n    len *= 2;\n  explr_->OnArrayStart(len);\n\n  if (len == 0) {\n    explr_->OnArrayEnd();\n    PostItem();\n  } else {\n    array_len_.emplace_back(num_elems_ + 1, len);\n    num_elems_ = 0;\n  }\n}\n\nbool IsSHA(string_view str) {\n  return std::all_of(str.begin(), str.end(),\n                     [](unsigned char c) { return absl::ascii_isxdigit(c); });\n}\n\noptional<ErrorReply> EvalValidator(CmdArgList args) {\n  string_view num_keys_str = ArgS(args, 1);\n  int32_t num_keys;\n\n  if (!absl::SimpleAtoi(num_keys_str, &num_keys) || num_keys < 0)\n    return ErrorReply{facade::kInvalidIntErr};\n\n  if (unsigned(num_keys) > args.size() - 2)\n    return ErrorReply{\"Number of keys can't be greater than number of args\", kSyntaxErrType};\n\n  return nullopt;\n}\n\nenum class ExecScriptUse : uint8_t {\n  NONE = 0,\n  SCRIPT_LOAD = 1,\n  SCRIPT_RUN = 2,\n};\n\nExecScriptUse DetermineScriptPresense(const std::vector<StoredCmd>& body) {\n  bool script_load = false;\n  for (const auto& scmd : body) {\n    if (scmd.Cid()->MultiControlKind() == CO::MultiControlKind::EVAL) {\n      return ExecScriptUse::SCRIPT_RUN;\n    }\n\n    if ((scmd.Cid()->name() == \"SCRIPT\") && (absl::AsciiStrToUpper(scmd.FirstArg()) == \"LOAD\")) {\n      script_load = true;\n    }\n  }\n\n  if (script_load)\n    return ExecScriptUse::SCRIPT_LOAD;\n\n  return ExecScriptUse::NONE;\n}\n\n// Returns the multi mode for that transaction. Returns NOT_DETERMINED if no scheduling\n// is required.\nTransaction::MultiMode DeduceExecMode(ExecScriptUse state,\n                                      const ConnectionState::ExecInfo& exec_info,\n                                      const ScriptMgr& script_mgr) {\n  // Check if script most LIKELY has global eval transactions\n  bool contains_global = false;\n  bool contains_admin_cmd = false;\n  Transaction::MultiMode multi_mode = Transaction::LOCK_AHEAD;\n\n  if (state == ExecScriptUse::SCRIPT_RUN) {\n    contains_global = script_mgr.AreGlobalByDefault();\n  }\n\n  bool transactional = contains_global;\n  if (!transactional) {\n    for (const auto& scmd : exec_info.body) {\n      // We can only tell if eval is transactional based on they keycount\n      if (absl::StartsWith(scmd.Cid()->name(), \"EVAL\")) {\n        CmdArgVec arg_vec{};\n        auto args = scmd.Slice(&arg_vec);\n        auto keys = DetermineKeys(scmd.Cid(), args);\n        transactional |= (keys && keys.value().NumArgs() > 0);\n      } else {\n        transactional |= scmd.Cid()->IsTransactional();\n      }\n      contains_global |= scmd.Cid()->opt_mask() & CO::GLOBAL_TRANS;\n      contains_admin_cmd |= scmd.Cid()->opt_mask() & CO::ADMIN;\n\n      // We can't run no-key-transactional commands in lock-ahead mode currently,\n      // because it means we have to schedule on all shards\n      if (scmd.Cid()->opt_mask() & CO::NO_KEY_TRANSACTIONAL)\n        contains_global = true;\n\n      if (contains_global)\n        break;\n    }\n  }\n\n  // multi/exec contains commands like ping that do not affect db state.\n  if (!transactional && exec_info.watched_keys.empty())\n    return Transaction::NOT_DETERMINED;\n\n  if (contains_admin_cmd) {\n    multi_mode = Transaction::NON_ATOMIC;\n  }\n  // Atomic modes fall back to GLOBAL if they contain global commands.\n  else if (contains_global && multi_mode == Transaction::LOCK_AHEAD) {\n    multi_mode = Transaction::GLOBAL;\n  }\n\n  return multi_mode;\n}\n\nstring CreateExecDescriptor(const std::vector<StoredCmd>& stored_cmds, unsigned num_uniq_shards) {\n  string result;\n  size_t max_len = std::min<size_t>(20u, stored_cmds.size());\n  absl::StrAppend(&result, \"EXEC/\", num_uniq_shards, \"/\", max_len);\n\n  return result;\n}\n\nstring ConnectionLogContext(const facade::Connection* conn) {\n  if (conn == nullptr) {\n    return \"(null-conn)\";\n  }\n  return absl::StrCat(\"(\", conn->RemoteEndpointStr(), \")\");\n}\n\nstring FailedCommandToString(std::string_view command, facade::CmdArgList args,\n                             std::string_view reason) {\n  string result;\n  absl::StrAppend(&result, \" \", command);\n\n  if (command != \"AUTH\" && command != \"ACL SETUSER\") {\n    for (auto arg : args) {\n      absl::StrAppend(&result, \" \", absl::CHexEscape(arg));\n    }\n  }\n\n  absl::StrAppend(&result, \" failed with reason: \", reason);\n\n  return result;\n}\n\nthread_local uint32_t squash_stats_latency_lower_limit_cached;\n\nvoid UpdateFromFlagsOnThread() {\n  if (uint32_t poll = GetFlag(FLAGS_shard_thread_busy_polling_usec);\n      poll > 0 && EngineShard::tlocal())\n    ProactorBase::me()->SetBusyPollUsec(poll);\n  squash_stats_latency_lower_limit_cached = GetFlag(FLAGS_squash_stats_latency_lower_limit);\n}\n\nstd::vector<std::string> GetMutableFlagNames() {\n  return base::GetFlagNames(FLAGS_shard_thread_busy_polling_usec,\n                            FLAGS_squash_stats_latency_lower_limit);\n}\n\nvoid UpdateSchedulerFlagsOnThread() {\n  using fb2::detail::Scheduler;\n  auto* sched = util::fb2::detail::FiberScheduler();\n  sched->UpdateConfig(&Scheduler::Config::budget_background_fib,\n                      GetFlag(FLAGS_scheduler_background_budget));\n  sched->UpdateConfig(&Scheduler::Config::background_sleep_prob,\n                      GetFlag(FLAGS_scheduler_background_sleep_prob));\n  sched->UpdateConfig(&Scheduler::Config::background_warrant_pct,\n                      GetFlag(FLAGS_scheduler_background_warrant));\n}\n\nvoid SetHuffmanTable(const std::string& huffman_table) {\n  if (huffman_table.empty())\n    return;\n  vector<string_view> parts = absl::StrSplit(huffman_table, ',');\n  for (const auto& part : parts) {\n    vector<string_view> kv = absl::StrSplit(part, ':');\n    if (kv.size() != 2 || kv[0].empty() || kv[1].empty()) {\n      LOG(ERROR) << \"Invalid huffman table entry\" << part;\n      continue;\n    }\n    string domain_str = absl::AsciiStrToUpper(kv[0]);\n    CompactObj::HuffmanDomain domain;\n\n    if (domain_str == \"KEYS\") {\n      domain = CompactObj::HUFF_KEYS;\n    } else if (domain_str == \"STRINGS\") {\n      domain = CompactObj::HUFF_STRING_VALUES;\n    } else {\n      LOG(ERROR) << \"Unknown huffman domain: \" << kv[0];\n      continue;\n    }\n\n    string unescaped;\n    if (!absl::Base64Unescape(kv[1], &unescaped)) {\n      LOG(ERROR) << \"Failed to decode base64 huffman table for domain \" << kv[0] << \" with value \"\n                 << kv[1];\n      continue;\n    }\n\n    atomic_bool success = true;\n    shard_set->RunBriefInParallel([&](auto* shard) {\n      if (!CompactObj::InitHuffmanThreadLocal(domain, unescaped)) {\n        success = false;\n      }\n    });\n    LOG_IF(ERROR, !success) << \"Failed to set huffman table for domain \" << kv[0] << \" with value \"\n                            << kv[1];\n  }\n}\n\nstring_view CommandOptName(CO::CommandOpt opt, bool enabled) {\n  using namespace CO;\n  if (!enabled) {\n    if (opt == FAST)\n      return \"SLOW\";\n    return \"\";\n  }\n\n  switch (opt) {\n    case JOURNALED:\n      return \"write\";\n    case READONLY:\n      return \"readonly\";\n    case DENYOOM:\n      return \"denyoom\";\n    case FAST:\n      return \"fast\";\n    case LOADING:\n      return \"loading\";\n    case DANGEROUS:\n      return \"dangerous\";\n    case ADMIN:\n      return \"admin\";\n    case NOSCRIPT:\n      return \"noscript\";\n    case BLOCKING:\n      return \"blocking\";\n    case HIDDEN:\n    case GLOBAL_TRANS:\n    case STORE_LAST_KEY:\n    case VARIADIC_KEYS:\n    case NO_AUTOJOURNAL:\n    case NO_KEY_TRANSACTIONAL:\n    case NO_KEY_TX_SPAN_ALL:\n    case IDEMPOTENT:\n      return \"\";\n  }\n  return \"\";\n}\n\nOpResult<void> OpTrackKeys(const OpArgs slice_args, const facade::Connection::WeakRef& conn_ref,\n                           const ShardArgs& args) {\n  if (conn_ref.IsExpired()) {\n    DVLOG(2) << \"Connection expired, exiting TrackKey function.\";\n    return OpStatus::OK;\n  }\n\n  DVLOG(2) << \"Start tracking keys for client ID: \" << conn_ref.GetClientId();\n\n  // TODO: There is a bug here that we track all arguments instead of tracking only keys.\n  auto& db_slice = slice_args.GetDbSlice();\n  for (auto key : args)\n    db_slice.TrackKey(conn_ref, key);\n\n  return OpStatus::OK;\n}\n\nvoid TrackIfNeeded(CommandContext* cmd_cntx) {\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  auto& info = cntx->conn_state.tracking_info_;\n\n  if (!info.IsTrackingOn()) {\n    return;\n  }\n\n  if (auto* tx = cmd_cntx->tx(); tx) {\n    // Reset it, because in multi/exec the transaction pointer is the same and\n    // we will end up triggerring the callback on the following commands. To avoid this\n    // we reset it.\n    tx->SetTrackingCallback({});\n    if (cmd_cntx->cid()->IsReadOnly() && info.ShouldTrackKeys()) {\n      auto conn = cntx->conn()->Borrow();\n      tx->SetTrackingCallback([conn](Transaction* trans) {\n        auto* shard = EngineShard::tlocal();\n        OpTrackKeys(trans->GetOpArgs(shard), conn, trans->GetShardArgs(shard->shard_id()));\n      });\n    }\n  }\n}\n\n// Check CLIENT PAUSE state and block if needed\nvoid CheckPauseState(facade::Connection* conn, ConnectionContext* dfly_cntx, const CommandId* cid) {\n  auto& etl = *ServerState::tlocal();\n  if (etl.IsPaused() && !conn->IsPrivileged()) {\n    bool is_write = cid->IsJournaled();\n    is_write |= cid->name() == \"PUBLISH\" || cid->name() == \"EVAL\" || cid->name() == \"EVALSHA\";\n    is_write |= cid->name() == \"EXEC\" && dfly_cntx->conn_state.exec_info.is_write;\n\n    dfly_cntx->paused = true;\n    etl.AwaitPauseState(is_write);\n    dfly_cntx->paused = false;\n  }\n}\n\n// Prepare transaction for DispatchCommand.\n//\n// Return value:\n//   first  - newly created top-level transaction (or nullptr if none).\n//   second - result: overall status of preparation.\npair<intrusive_ptr<Transaction>, OpStatus> PrepareTransaction(const CommandId* cid,\n                                                              ArgSlice tail_args,\n                                                              CommandContext* cmd_ctx) {\n  auto* dfly_cntx = cmd_ctx->server_conn_cntx();\n  bool init = false;\n  intrusive_ptr<Transaction> res;\n  if (dfly_cntx->transaction) {  // Existing transaction context (e.g., MULTI/EXEC or script)\n    DCHECK(dfly_cntx->transaction->IsMulti());  // dispatching in multi\n    if (cid->IsTransactional()) {\n      dfly_cntx->transaction->MultiSwitchCmd(cid);\n      init = true;\n    }\n  } else {\n    if (cid->IsTransactional()) {\n      res.reset(new Transaction{cid});\n      init = !res->IsMulti();  // Multi command initialize themselves based on their mode\n    }\n    dfly_cntx->transaction = res.get();\n  }\n\n  cmd_ctx->SetupTx(cid, dfly_cntx->transaction);\n\n  if (init) {\n    DCHECK(cmd_ctx->tx());\n    if (auto st =\n            cmd_ctx->tx()->InitByArgs(dfly_cntx->ns, dfly_cntx->conn_state.db_index, tail_args);\n        st != OpStatus::OK) {\n      if (res) {\n        dfly_cntx->transaction = nullptr;\n      }\n      return {nullptr, st};\n    }\n\n    if (res)  // new transaction\n      dfly_cntx->last_command_debug.shards_count = cmd_ctx->tx()->GetUniqueShardCnt();\n  }\n\n  return {std::move(res), OpStatus::OK};\n}\n\nvoid StoreInMultiBlock(ConnectionContext* dfly_cntx, const CommandId* cid, ArgSlice tail_args) {\n  // TODO: protect against aggregating huge transactions.\n  auto& exec_info = dfly_cntx->conn_state.exec_info;\n  const size_t old_size = exec_info.GetStoredCmdBytes();\n  exec_info.AddStoredCmd(cid, tail_args);  // Deep copy of args.\n  ServerState::tlocal()->stats.stored_cmd_bytes += exec_info.GetStoredCmdBytes() - old_size;\n}\n\nbool ShouldLogError(const CommandId& cid, string_view reason, CmdArgList tail_args) {\n  if (absl::StartsWith(reason, \"-BUSYGROUP\"))\n    return false;\n\n  if (cid.name() != \"CLIENT\")\n    return true;\n  return tail_args.empty() || !absl::EqualsIgnoreCase(tail_args.front(), \"maint_notifications\");\n}\n\n}  // namespace\n\nService::Service(ProactorPool* pp)\n    : pp_(*pp),\n      acl_family_(&user_registry_, pp),\n      server_family_(this),\n      cluster_family_(&server_family_) {\n  CHECK(pp);\n  CHECK(shard_set == NULL);\n\n#ifdef PRINT_STACKTRACES_ON_SIGNAL\n  LOG(INFO) << \"PRINT STACKTRACES REGISTERED\";\n  ProactorBase::RegisterSignal({SIGUSR1}, pp_.GetNextProactor(), [this](int signal) {\n    LOG(INFO) << \"Received \" << strsignal(signal);\n    base::SetVLogLevel(\"uring_proactor\", 2);\n\n    util::fb2::Mutex m;\n    pp_.AwaitFiberOnAll([&m](unsigned index, util::ProactorBase* base) {\n      util::fb2::LockGuard lk(m);\n      util::fb2::detail::FiberInterface::PrintAllFiberStackTraces();\n    });\n  });\n#endif\n\n  CHECK(shard_set == nullptr);\n  shard_set = new EngineShardSet(pp);\n\n  // We support less than 1024 threads and we support less than 1024 shards.\n  // For example, Scan uses 10 bits in cursor to encode shard id it currently traverses.\n  CHECK_LT(pp->size(), kMaxThreadSize);\n  RegisterCommands();\n\n  exec_cid_ = FindCmd(\"EXEC\");\n\n  engine_varz.emplace(\"engine\", [this] { return GetVarzStats(); });\n}\n\nService::~Service() {\n#ifdef PRINT_STACKTRACES_ON_SIGNAL\n  ProactorBase::ClearSignal({SIGUSR1}, true);\n#endif\n\n  delete shard_set;\n  shard_set = nullptr;\n}\n\nvoid RegisterMutableFlags(ConfigRegistry* reg, absl::Span<const std::string> names,\n                          std::function<void()> f) {\n  auto cb = [f](auto&&) {\n    shard_set->pool()->AwaitBrief([f](unsigned tid, auto*) { f(); });\n    return true;\n  };\n  for (std::string_view name : names)\n    reg->RegisterMutable(name, cb);\n}\n\nvoid Service::Init(util::AcceptServer* acceptor, std::vector<facade::Listener*> listeners) {\n  InitRedisTables();\n  facade::Connection::Init(pp_.size());\n\n#if defined(WITH_SEARCH)\n  // Initialize SimSIMD runtime if needed (explicit, avoids implicit static initializers)\n  dfly::search::InitSimSIMD();\n#endif\n\n  config_registry.RegisterMutable(\"dbfilename\");\n  config_registry.Register(\"dbnum\");  // equivalent to databases in redis.\n  config_registry.Register(\"dir\");\n  config_registry.RegisterMutable(\"enable_heartbeat_eviction\");\n  config_registry.RegisterMutable(\"enable_heartbeat_rss_eviction\");\n  config_registry.RegisterMutable(\"masterauth\");\n  config_registry.RegisterMutable(\"masteruser\");\n  config_registry.RegisterMutable(\"max_eviction_per_heartbeat\");\n  config_registry.RegisterMutable(\"max_segment_to_consider\");\n  config_registry.RegisterMutable(\"pipeline_squash\");\n  config_registry.RegisterMutable(\"lua_mem_gc_threshold\");\n  config_registry.RegisterMutable(\"background_debug_jobs\");\n\n  // Register ServerState flags\n  RegisterMutableFlags(&config_registry, ServerState::GetMutableFlagNames(),\n                       []() { ServerState::tlocal()->UpdateFromFlags(); });\n  // Register Connection flags\n  RegisterMutableFlags(&config_registry, facade::Connection::GetMutableFlagNames(),\n                       []() { facade::Connection::UpdateFromFlags(); });\n  // Register tiered storage flags\n  RegisterMutableFlags(&config_registry, TieredStorage::GetMutableFlagNames(), []() {\n    if (auto* es = EngineShard::tlocal(); es && es->tiered_storage()) {\n      es->tiered_storage()->UpdateFromFlags();\n    }\n  });\n  // Register main service flags\n  RegisterMutableFlags(&config_registry, GetMutableFlagNames(),\n                       []() { UpdateFromFlagsOnThread(); });\n  // Register squsher flags\n  RegisterMutableFlags(&config_registry, MultiCommandSquasher::GetMutableFlagNames(),\n                       []() { MultiCommandSquasher::UpdateFromFlags(); });\n\n  // Register scheduler flags\n  RegisterMutableFlags(\n      &config_registry,\n      base::GetFlagNames(FLAGS_scheduler_background_budget, FLAGS_scheduler_background_sleep_prob,\n                         FLAGS_scheduler_background_warrant),\n      []() { UpdateSchedulerFlagsOnThread(); });\n\n  config_registry.RegisterSetter<strings::MemoryBytesFlag>(\n      \"maxmemory\", [](const strings::MemoryBytesFlag& flag) {\n        // TODO: reduce code reliance on constant direct access of max_memory_limit\n        max_memory_limit.store(flag.value, memory_order_relaxed);\n      });\n\n  config_registry.RegisterMutable(\"replica_partial_sync\");\n  config_registry.RegisterMutable(\"background_snapshotting\");\n  config_registry.RegisterMutable(\"replication_timeout\");\n  config_registry.RegisterMutable(\"migration_finalization_timeout_ms\");\n  config_registry.RegisterMutable(\"slot_migration_throttle_us\");\n  config_registry.RegisterMutable(\"table_growth_margin\");\n  config_registry.RegisterMutable(\"tcp_keepalive\");\n  config_registry.RegisterMutable(\"timeout\");\n  config_registry.RegisterMutable(\"send_timeout\");\n  config_registry.RegisterMutable(\"managed_service_info\");\n#ifdef WITH_SEARCH\n  config_registry.RegisterMutable(\"MAXSEARCHRESULTS\");\n  config_registry.RegisterMutable(\"search_query_string_bytes\");\n#endif\n\n  config_registry.RegisterMutable(\n      \"notify_keyspace_events\", [pool = &pp_](const absl::CommandLineFlag& flag) {\n        auto res = flag.TryGet<std::string>();\n        if (!res.has_value() || (!res->empty() && !absl::EqualsIgnoreCase(*res, \"EX\"))) {\n          return false;\n        }\n\n        pool->AwaitBrief([&res](unsigned, auto*) {\n          auto* shard = EngineShard::tlocal();\n          if (shard) {\n            auto shard_id = shard->shard_id();\n            auto& db_slice = namespaces->GetDefaultNamespace().GetDbSlice(shard_id);\n            db_slice.SetNotifyKeyspaceEvents(*res);\n          }\n        });\n\n        return true;\n      });\n\n  config_registry.RegisterMutable(\"aclfile\");\n  config_registry.RegisterSetter<uint32_t>(\"acllog_max_len\", [](uint32_t val) {\n    shard_set->pool()->AwaitFiberOnAll(\n        [val](auto index, auto* context) { ServerState::tlocal()->acl_log.SetTotalEntries(val); });\n  });\n\n  uint32_t shard_num = GetFlag(FLAGS_num_shards);\n  if (shard_num == 0 || shard_num > pp_.size()) {\n    LOG_IF(WARNING, shard_num > pp_.size())\n        << \"Requested num_shards (\" << shard_num << \") is bigger than thread count (\" << pp_.size()\n        << \"), using num_shards=\" << pp_.size();\n    shard_num = pp_.size();\n  }\n\n  // We assume that listeners.front() is the main_listener\n  // see dfly_main RunEngine. In unit tests, listeners are empty.\n  facade::Listener* main_listener = listeners.empty() ? nullptr : listeners.front();\n\n  ChannelStore* cs = new ChannelStore{};\n  // Must initialize before the shard_set because EngineShard::Init references ServerState.\n  pp_.AwaitBrief([&](uint32_t index, ProactorBase* pb) {\n    tl_facade_stats = new FacadeStats;\n    ServerState::Init(index, shard_num, main_listener, &user_registry_);\n    ServerState::tlocal()->UpdateChannelStore(cs);\n  });\n\n  const auto tcp_disabled = GetFlag(FLAGS_port) == 0u;\n  // We assume that listeners.front() is the main_listener\n  // see dfly_main RunEngine\n  if (!tcp_disabled && main_listener) {\n    acl_family_.Init(main_listener, &user_registry_);\n  }\n\n  // Initialize shard_set with a callback running once in a while in the shard threads.\n  shard_set->Init(shard_num, [this] {\n    server_family_.GetDflyCmd()->BreakStalledFlowsInShard();\n    server_family_.UpdateMemoryGlobalStats();\n  });\n  // InitThreadLocals might block\n  pp_.AwaitFiberOnAll(\n      [&](uint32_t index, ProactorBase* pb) { sharding::InitThreadLocals(shard_set->size()); });\n  Transaction::Init(shard_num);\n\n  shard_set->pool()->AwaitBrief([](unsigned, auto*) {\n    facade::Connection::UpdateFromFlags();\n    UpdateFromFlagsOnThread();\n    UpdateSchedulerFlagsOnThread();\n  });\n  SetHuffmanTable(GetFlag(FLAGS_huffman_table));\n\n  // Requires that shard_set will be initialized before because server_family_.Init might\n  // load the snapshot.\n  server_family_.Init(acceptor, std::move(listeners));\n}\n\nvoid Service::Shutdown() {\n  VLOG(1) << \"Service::Shutdown\";\n\n  // We mark that we are shutting down. After this incoming requests will be\n  // rejected.\n  mu_.lock();\n  global_state_ = GlobalState::SHUTTING_DOWN;\n  mu_.unlock();\n\n  pp_.AwaitFiberOnAll([](ProactorBase* pb) {\n    ServerState::tlocal()->EnterLameDuck();\n    facade::Connection::ShutdownThreadLocal();\n  });\n\n  config_registry.Reset();\n\n  // to shutdown all the runtime components that depend on EngineShard\n  cluster_family_.Shutdown();\n  server_family_.Shutdown();\n\n  shutdown_watchdog.emplace(pp_);\n\n  engine_varz.reset();\n\n  ChannelStore::Destroy();\n\n  shard_set->PreShutdown();\n  shard_set->Shutdown();\n\n  Transaction::Shutdown();\n\n  pp_.AwaitFiberOnAll([](ProactorBase* pb) {\n#if defined(DFLY_USE_SSL)\n    // Explicitly release OpenSSL thread-local state here.\n    // This prevents a potential crash during thread exit where the allocator (e.g. mimalloc)\n    // might tear down the thread's heap before OpenSSL tries to free its internal state.\n    OPENSSL_thread_stop();\n#endif\n    ServerState::tlocal()->Destroy();\n  });\n\n  // wait for all the pending callbacks to stop.\n  ThisFiber::SleepFor(10ms);\n  facade::Connection::Shutdown();\n\n  shutdown_watchdog->Disarm();\n}\n\nOpResult<KeyIndex> Service::FindKeys(const CommandId* cid, CmdArgList args) {\n  // Sharded pub-sub acts as if it's sharded by its channel name (just for checks)\n  if (cid->PubSubKind() == CO::PubSubKind::SHARDED) {\n    // SPUBLISH has only one key, the rest is data\n    if (cid->name() == registry_.RenamedOrOriginal(\"SPUBLISH\"))\n      return KeyIndex(0, 1);\n    return {KeyIndex(0, args.size())};  // sub/unsub list of channels\n  }\n\n  return DetermineKeys(cid, args);\n}\n\noptional<ErrorReply> Service::CheckKeysOwnership(const CommandId& cid, CmdArgList args,\n                                                 const ConnectionContext& dfly_cntx) {\n  if (dfly_cntx.is_replicating) {\n    // Always allow commands on the replication port, as it might be for future-owned keys.\n    return nullopt;\n  }\n\n  if (cid.first_key_pos() == 0 && cid.PubSubKind() != CO::PubSubKind::SHARDED) {\n    return nullopt;  // No key command.\n  }\n\n  OpResult<KeyIndex> key_index_res = FindKeys(&cid, args);\n\n  if (!key_index_res) {\n    return ErrorReply{key_index_res.status()};\n  }\n\n  const auto& key_index = *key_index_res;\n\n  UniqueSlotChecker slot_checker;\n  for (string_view key : key_index.Range(args)) {\n    slot_checker.Add(key);\n  }\n\n  if (slot_checker.IsCrossSlot()) {\n    return ErrorReply{kCrossSlotError};\n  }\n\n  optional<SlotId> keys_slot = slot_checker.GetUniqueSlotId();\n\n  if (keys_slot.has_value()) {\n    if (auto error = cluster::SlotOwnershipError(*keys_slot);\n        !error.status.has_value() || error.status.value() != facade::OpStatus::OK) {\n      return ErrorReply{std::move(error)};\n    }\n  }\n\n  return nullopt;\n}\n\n// TODO(kostas) refactor. Almost 1-1 with CheckKeyOwnership() above.\nstd::optional<facade::ErrorReply> Service::TakenOverSlotError(const CommandId& cid, CmdArgList args,\n                                                              const ConnectionContext& dfly_cntx) {\n  if (cid.first_key_pos() == 0 && cid.PubSubKind() != CO::PubSubKind::SHARDED) {\n    return nullopt;  // No key command.\n  }\n\n  OpResult<KeyIndex> key_index_res = FindKeys(&cid, args);\n\n  if (!key_index_res) {\n    return ErrorReply{key_index_res.status()};\n  }\n\n  const auto& key_index = *key_index_res;\n\n  UniqueSlotChecker slot_checker;\n  for (string_view key : key_index.Range(args)) {\n    slot_checker.Add(key);\n  }\n\n  if (slot_checker.IsCrossSlot()) {\n    return ErrorReply{kCrossSlotError};\n  }\n\n  optional<SlotId> keys_slot = slot_checker.GetUniqueSlotId();\n  if (!keys_slot.has_value()) {\n    return nullopt;\n  }\n\n  if (auto error = cluster::SlotOwnershipError(*keys_slot);\n      !error.status.has_value() || error.status.value() != facade::OpStatus::OK) {\n    return ErrorReply{std::move(error)};\n  }\n  const auto cluster_config = cluster::ClusterConfig::Current();\n  if (!cluster_config)\n    return facade::ErrorReply{facade::kClusterNotConfigured};\n\n  // Moved regardless, we have been taken over\n  cluster::ClusterNodeInfo redirect = cluster_config->GetMasterNodeForSlot(*keys_slot);\n  return facade::ErrorReply{\n      absl::StrCat(\"-MOVED \", *keys_slot, \" \", redirect.ip, \":\", redirect.port), \"MOVED\"};\n}\n\n// Return OK if all keys are allowed to be accessed: either declared in EVAL or\n// transaction is running in global or non-atomic mode.\noptional<ErrorReply> CheckKeysDeclared(const ConnectionState::ScriptInfo& eval_info,\n                                       const CommandId* cid, CmdArgList args,\n                                       Transaction::MultiMode multi_mode) {\n  // We either scheduled on all shards or re-schedule for each operation,\n  // so we are not restricted to any keys.\n  if (multi_mode == Transaction::GLOBAL || multi_mode == Transaction::NON_ATOMIC)\n    return nullopt;\n\n  OpResult<KeyIndex> key_index_res = DetermineKeys(cid, args);\n  if (!key_index_res)\n    return ErrorReply{key_index_res.status()};\n\n  // TODO: Switch to transaction internal locked keys once single hop multi transactions are merged\n  // const auto& locked_keys = trans->GetMultiKeys();\n  const auto& locked_tags = eval_info.lock_tags;\n  for (string_view key : key_index_res->Range(args)) {\n    if (!locked_tags.contains(LockTag{key})) {\n      return ErrorReply(absl::StrCat(kUndeclaredKeyErr, \", key: \", key));\n    }\n  }\n\n  return nullopt;\n}\n\nstatic optional<ErrorReply> VerifyConnectionAclStatus(const CommandId* cid,\n                                                      const ConnectionContext* cntx,\n                                                      string_view error_msg, ArgSlice tail_args) {\n  if (!acl::IsUserAllowedToInvokeCommand(*cntx, *cid, tail_args)) {\n    return ErrorReply(absl::StrCat(\"-NOPERM \", cntx->authed_username, \" \", error_msg));\n  }\n  return nullopt;\n}\n\nbool ShouldDenyOnOOM(const CommandContext& cmd_cntx) {\n  DCHECK_NE(cmd_cntx.start_time_ns, 0u);\n  ServerState& etl = *ServerState::tlocal();\n  if ((cmd_cntx.cid()->opt_mask() & CO::DENYOOM) && etl.is_master) {\n    auto memory_stats = etl.GetMemoryUsage(cmd_cntx.start_time_ns);\n\n    size_t limit = max_memory_limit.load(memory_order_relaxed);\n    if (memory_stats.used_mem > limit ||\n        (etl.rss_oom_deny_ratio > 0 && memory_stats.rss_mem > (limit * etl.rss_oom_deny_ratio))) {\n      DLOG(WARNING) << \"Out of memory, used \" << memory_stats.used_mem << \" ,rss \"\n                    << memory_stats.rss_mem << \" ,limit \" << limit;\n      etl.stats.oom_error_cmd_cnt++;\n      return true;\n    }\n  }\n  return false;\n}\n\nstd::optional<ErrorReply> Service::VerifyCommandState(const CommandId& cid, CmdArgList tail_args,\n                                                      const ConnectionContext& dfly_cntx) {\n  ServerState& etl = *ServerState::tlocal();\n\n  // If there is no connection owner, it means the command it being called\n  // from another command or used internally, therefore is always permitted.\n  if (dfly_cntx.conn() != nullptr && !dfly_cntx.conn()->IsPrivileged() && cid.IsRestricted()) {\n    VLOG(1) << \"Non-admin attempt to execute \" << cid.name() << \" \" << tail_args << \" \"\n            << ConnectionLogContext(dfly_cntx.conn());\n    return ErrorReply{\"Cannot execute restricted command (admin only)\", kRestrictDenied};\n  }\n\n  if (auto err = cid.Validate(tail_args); err)\n    return err;\n\n  // Check if the command is allowed to execute under this global state\n  bool allowed_by_state = true;\n  const GlobalState gstate = etl.gstate();\n  switch (gstate) {\n    case GlobalState::LOADING:\n      allowed_by_state = dfly_cntx.journal_emulated || (cid.opt_mask() & CO::LOADING);\n      break;\n    case GlobalState::SHUTTING_DOWN:\n      allowed_by_state = false;\n      break;\n    case GlobalState::TAKEN_OVER:\n      // Only PING, admin commands, and all commands via admin connections are allowed\n      // we prohibit even read commands, because read commands running in pipeline can take a while\n      // to send all data to a client which leads to fail in takeover\n      allowed_by_state =\n          dfly_cntx.conn()->IsPrivileged() || (cid.opt_mask() & CO::ADMIN) || cid.name() == \"PING\";\n      break;\n    default:\n      break;\n  }\n\n  if (!allowed_by_state) {\n    VLOG(1) << \"Command \" << cid.name() << \" not executed because global state is \" << gstate;\n\n    if (gstate == GlobalState::LOADING) {\n      return ErrorReply(kLoadingErr);\n    }\n\n    if (gstate == GlobalState::TAKEN_OVER) {\n      if (IsClusterEnabled()) {\n        if (auto err = TakenOverSlotError(cid, tail_args, dfly_cntx); err) {\n          return err;\n        }\n      }\n      return ErrorReply(kLoadingErr);\n    }\n\n    return ErrorReply{StrCat(\"Can not execute during \", GlobalStateName(gstate))};\n  }\n\n  string_view cmd_name{cid.name()};\n\n  if (dfly_cntx.req_auth && !dfly_cntx.authenticated) {\n    if (cmd_name != \"AUTH\" && cmd_name != \"QUIT\" && cmd_name != \"HELLO\") {\n      return ErrorReply{\"-NOAUTH Authentication required.\", facade::kNoAuthErrType};\n    }\n  }\n\n  // only reset and quit are allow if this connection is used for monitoring\n  if (dfly_cntx.monitor && (cmd_name != \"RESET\" && cmd_name != \"QUIT\"))\n    return ErrorReply{\"Replica can't interact with the keyspace\"};\n\n  bool is_write_cmd = cid.IsJournaled();\n  bool is_trans_cmd = cid.MultiControlKind() == CO::MultiControlKind::EXEC;\n  bool under_script = dfly_cntx.conn_state.script_info != nullptr;\n  bool multi_active = dfly_cntx.conn_state.exec_info.IsCollecting() && !is_trans_cmd;\n\n  if (!etl.is_master && is_write_cmd && !dfly_cntx.is_replicating)\n    return ErrorReply{\"-READONLY You can't write against a read only replica.\"};\n\n  if (multi_active) {\n    if (cmd_name == \"WATCH\" || cmd_name == \"FLUSHALL\" || cmd_name == \"FLUSHDB\" ||\n        absl::EndsWith(cmd_name, \"SUBSCRIBE\"))\n      return ErrorReply{absl::StrCat(\"'\", cmd_name, \"' not allowed inside a transaction\")};\n  }\n\n  if (IsClusterEnabled()) {\n    if (auto err = CheckKeysOwnership(cid, tail_args, dfly_cntx); err)\n      return err;\n  }\n\n  if (under_script && (cid.opt_mask() & CO::NOSCRIPT))\n    return ErrorReply{\"This Redis command is not allowed from script\"};\n\n  if (under_script) {\n    auto* tx = dfly_cntx.transaction;\n    DCHECK(tx);\n    // The following commands access shards arbitrarily without having keys, so they can only be run\n    // non atomically or globally.\n    Transaction::MultiMode mode = tx->GetMultiMode();\n    bool shard_access = (cid.opt_mask()) & (CO::GLOBAL_TRANS | CO::NO_KEY_TRANSACTIONAL);\n    if (shard_access && (mode != Transaction::GLOBAL && mode != Transaction::NON_ATOMIC))\n      return ErrorReply(\"This Redis command is not allowed from script\");\n\n    if (cid.IsTransactional()) {\n      auto err = CheckKeysDeclared(*dfly_cntx.conn_state.script_info, &cid, tail_args, mode);\n\n      if (err.has_value()) {\n        VLOG(1) << \"CheckKeysDeclared failed with error \" << err->ToSv() << \" for command \"\n                << cid.name();\n        return err;\n      }\n    }\n\n    if (dfly_cntx.conn_state.script_info->read_only && is_write_cmd) {\n      return ErrorReply{\"Write commands are not allowed from read-only scripts\"};\n    }\n  }\n\n  return VerifyConnectionAclStatus(&cid, &dfly_cntx, \"has no ACL permissions\", tail_args);\n}\n\nDispatchResult Service::DispatchCommand(facade::ParsedArgs args, facade::ParsedCommand* parsed_cmd,\n                                        facade::AsyncPreference async_pref) {\n  DCHECK(!args.empty());\n  DCHECK_NE(0u, shard_set->size()) << \"Init was not called\";\n\n  // We must resolve the command ID (cid) before the guard block.\n  // The following switch statement relies on the command's metadata\n  // (e.g., SupportsAsync()) to evaluate execution preferences,\n  // making this lookup a hard dependency for the logic below.\n  string cmd = absl::AsciiStrToUpper(args.Front());\n  const auto [cid, args_no_cmd] = registry_.FindExtended(cmd, args.Tail());\n  if (cid == nullptr) {\n    if (async_pref != AsyncPreference::ONLY_SYNC) {\n      parsed_cmd->SetDeferredReply();\n    }\n    parsed_cmd->SendError(ReportUnknownCmd(cmd));\n    return DispatchResult::ERROR;\n  }\n\n  // Determine if command should run async\n  switch (async_pref) {\n    case AsyncPreference::ONLY_SYNC:\n      break;\n    case AsyncPreference::ONLY_ASYNC:\n      if (!cid->SupportsAsync())\n        return DispatchResult::WOULD_BLOCK;\n      [[fallthrough]];\n    case AsyncPreference::PREFER_ASYNC:\n      if (cid->SupportsAsync())\n        parsed_cmd->SetDeferredReply();\n      break;\n  };\n\n  CommandContext* cmd_cntx = static_cast<CommandContext*>(parsed_cmd);\n  ConnectionContext* dfly_cntx = cmd_cntx->server_conn_cntx();\n\n  if (dfly_cntx->async_dispatch && cid->IsBlocking()) {\n    ++ServerState::tlocal()->stats.blocking_commands_in_pipelines;\n    cmd_cntx->conn()->FlushReplies();\n  }\n\n  ArgSlice tail_args;\n  if (cmd_cntx->IsDeferredReply()) {\n    args_no_cmd.ToVec(&cmd_cntx->arg_slice_backing);  // Ensure lifetime\n    tail_args = cmd_cntx->arg_slice_backing;\n  } else {\n    tail_args = args_no_cmd.ToSlice(&cmd_cntx->arg_slice_backing);\n  }\n\n  // Block on CLIENT PAUSE if needed\n  if (auto* conn = cmd_cntx->conn(); conn /* replica context doesn't have an owner */) {\n    if (VLOG_IS_ON(2)) {\n      bool under_script = bool(dfly_cntx->conn_state.script_info);\n      LOG(INFO) << \"Got (\" << conn->GetClientId() << \"): \" << (under_script ? \"LUA \" : \"\")\n                << cid->name() << \" \" << tail_args << \" in dbid=\" << dfly_cntx->conn_state.db_index;\n    }\n\n    // Check pause state only if it is a top level transaction.\n    if (dfly_cntx->transaction == nullptr)\n      CheckPauseState(conn, dfly_cntx, cid);\n  }\n\n  // Verify command state\n  if (auto err = VerifyCommandState(*cid, tail_args, *dfly_cntx); err) {\n    LOG_IF(WARNING, dfly_cntx->replica_conn || !dfly_cntx->conn() /* no owner in replica context */)\n        << \"VerifyCommandState error: \" << err->ToSv();\n    if (auto& exec_info = dfly_cntx->conn_state.exec_info; exec_info.IsCollecting())\n      exec_info.state = ConnectionState::ExecInfo::EXEC_ERROR;\n\n    // We need to skip this because ACK's should not be replied to\n    // Bonus points because this allows to continue replication with ACL users who got\n    // their access revoked and reinstated\n\n    if (cid->name() == \"REPLCONF\") {\n      DCHECK_GE(args_no_cmd.size(), 1u);\n      // We should not reply to REPLCONF ACKS.\n      if (absl::EqualsIgnoreCase(args_no_cmd.Front(), \"ACK\")) {\n        server_family_.GetDflyCmd()->OnClose(\n            dfly_cntx->conn_state.replication_info.repl_session_id);\n        return DispatchResult::ERROR;\n      }\n    }\n    DCHECK(!err->status);\n    cmd_cntx->SendError(*err);\n    return DispatchResult::ERROR;\n  }\n\n  VLOG_IF(1, cid->opt_mask() & CO::CommandOpt::DANGEROUS)\n      << \"Executing dangerous command \" << cid->name() << \" \"\n      << ConnectionLogContext(dfly_cntx->conn());\n\n  // If inside MULTI block, store command\n  bool is_trans_cmd = cid->MultiControlKind() == CO::MultiControlKind::EXEC;\n  if (dfly_cntx->conn_state.exec_info.IsCollecting() && !is_trans_cmd) {\n    StoreInMultiBlock(dfly_cntx, cid, tail_args);\n    cmd_cntx->SendSimpleString(\"QUEUED\");\n    return DispatchResult::OK;\n  }\n\n  auto [dispatched_tx, status] = PrepareTransaction(cid, tail_args, cmd_cntx);\n  if (status != OpStatus::OK) {\n    DCHECK(!dispatched_tx);\n    cmd_cntx->SendError(StatusToMsg(status));\n    return DispatchResult::ERROR;\n  }\n\n  DispatchResult res = InvokeCmd(tail_args, cmd_cntx);\n  if (dispatched_tx) {\n    DCHECK(dfly_cntx->transaction == dispatched_tx.get());\n    dfly_cntx->transaction = nullptr;\n  }\n\n  if ((res != DispatchResult::OK) && (res != DispatchResult::OOM)) {\n    cmd_cntx->SendError(\"Internal Error\");\n    dfly_cntx->conn()->MarkForClose();\n  }\n\n  return res;\n}\n\nclass ReplyGuard {\n public:\n  explicit ReplyGuard(const CommandContext& cmd_cntx) {\n    const bool is_script = bool(cmd_cntx.server_conn_cntx()->conn_state.script_info);\n    cid_name_ = cmd_cntx.cid()->name();\n    const bool is_one_of = (cid_name_ == \"REPLCONF\" || cid_name_ == \"DFLY\");\n    bool is_mcache = cmd_cntx.mc_command() != nullptr;\n    const bool is_no_reply_memcache =\n        (is_mcache && cmd_cntx.mc_command()->cmd_flags.no_reply) || cid_name_ == \"QUIT\";\n    const bool should_dcheck = !is_one_of && !is_script && !is_no_reply_memcache;\n    if (should_dcheck) {\n      cmd_cntx_ = &cmd_cntx;\n      replies_recorded_ = cmd_cntx.rb()->RepliesRecorded();\n    }\n  }\n\n  ~ReplyGuard() {\n    if (cmd_cntx_ && !cmd_cntx_->IsDeferredReply()) {\n      auto* rb = cmd_cntx_->rb();\n      DCHECK_GT(rb->RepliesRecorded(), replies_recorded_) << cid_name_ << \" \" << typeid(*rb).name();\n    }\n  }\n\n private:\n  const CommandContext* cmd_cntx_ = nullptr;\n  size_t replies_recorded_ = 0;\n  std::string_view cid_name_;\n};\n\nDispatchResult Service::InvokeCmd(CmdArgList tail_args, CommandContext* cmd_cntx) {\n  auto* cid = cmd_cntx->cid();\n  DCHECK(cid);\n  DCHECK(!cid->Validate(tail_args));\n\n  cmd_cntx->start_time_ns = absl::GetCurrentTimeNanos();\n\n  ConnectionContext* cntx = cmd_cntx->server_conn_cntx();\n  auto* builder = cmd_cntx->rb();\n  DCHECK(builder);\n  DCHECK(cntx);\n\n  if (ShouldDenyOnOOM(*cmd_cntx)) {\n    cmd_cntx->SendError(ErrorReply{OpStatus::OUT_OF_MEMORY});\n    return DispatchResult::OOM;\n  }\n\n  bool has_monitors = !ServerState::tlocal()->Monitors().Empty();\n  if (cid->CanBeMonitored() && has_monitors) {\n    DispatchMonitor(cntx, cid, tail_args);\n  }\n\n  ServerState::tlocal()->RecordCmd(cntx->has_main_or_memcache_listener);\n  TrackIfNeeded(cmd_cntx);\n  auto* tx = cmd_cntx->tx();\n\n  // For EVAL[] and EXEC/DISCARD, clean up state.\n  // We don't do it directly in commands to allow some introspection after execution (slowlog).\n  absl::Cleanup mck_cleanup = [cntx, cid, mck = cid->MultiControlKind()]() {\n    if (mck && *mck == CO::MultiControlKind::EXEC && cid->name() != \"MULTI\")\n      MultiCleanup(cntx);\n    else if (mck && *mck == CO::MultiControlKind::EVAL)\n      cntx->conn_state.script_info.reset();\n  };\n\n#ifndef NDEBUG\n  // Verifies that we reply to the client when needed.\n  ReplyGuard reply_guard(*cmd_cntx);\n#endif\n  builder->ConsumeLastError();  // throw away last error\n  try {\n    cid->Invoke(tail_args, cmd_cntx);\n  } catch (std::exception& e) {\n    LOG(ERROR) << \"Internal error, system probably unstable \" << e.what();\n    return DispatchResult::ERROR;\n  }\n\n  DispatchResult res = DispatchResult::OK;\n  if (std::string reason = builder->ConsumeLastError(); !reason.empty()) {\n    // Set flag if OOM reported\n    if (reason == kOutOfMemory) {\n      res = DispatchResult::OOM;\n    }\n    VLOG(2) << FailedCommandToString(cid->name(), tail_args, reason);\n    if (ShouldLogError(*cid, reason, tail_args)) {\n      LOG_EVERY_T(WARNING, 1) << FailedCommandToString(cid->name(), tail_args, reason);\n    }\n  }\n\n  if (cntx->conn_state.tracking_info_.IsTrackingOn()) {\n    if ((!tx && cid->name() != \"MULTI\") || (tx && !tx->IsMulti())) {\n      // Each time we execute a command we need to increase the sequence number in\n      // order to properly track clients when OPTIN is used.\n      // We don't do this for `multi/exec` because it would break the\n      // semantics, i.e, CACHING should stick for all commands following\n      // the CLIENT CACHING ON within a multi/exec block\n      cntx->conn_state.tracking_info_.IncrementSequenceNumber();\n    }\n  }\n\n  cmd_cntx->RecordLatency(tail_args);\n\n  if (tx && !cntx->conn_state.exec_info.IsRunning() && cntx->conn_state.script_info == nullptr) {\n    cntx->last_command_debug.clock = tx->txid();\n  }\n\n  return res;\n}\n\nDispatchManyResult Service::DispatchManyCommands(std::function<facade::ParsedArgs()> arg_gen,\n                                                 unsigned count, SinkReplyBuilder* builder,\n                                                 facade::ConnectionContext* cntx) {\n  ConnectionContext* dfly_cntx = static_cast<ConnectionContext*>(cntx);\n  DCHECK(!dfly_cntx->conn_state.exec_info.IsRunning());\n  DCHECK_EQ(builder->GetProtocol(), Protocol::REDIS);\n  DCHECK_GT(count, 1u);\n\n  auto* ss = dfly::ServerState::tlocal();\n  // Don't even start when paused. We can only continue if DispatchTracker is aware of us running.\n  if (ss->IsPaused())\n    return {.processed = 0, .account_in_stats = false};\n\n  vector<StoredCmd> stored_cmds;\n  intrusive_ptr<Transaction> dist_trans;\n  uint32_t dispatched = 0;\n  MultiCommandSquasher::Stats stats;\n\n  uint64_t start_cycles = base::CycleClock::Now();\n  CommandContext dummy_cmd_cntx;\n  dummy_cmd_cntx.Init(builder, dfly_cntx);\n\n  auto perform_squash = [&] {\n    if (stored_cmds.empty())\n      return;\n\n    if (!dist_trans) {\n      dist_trans.reset(new Transaction{exec_cid_});\n      dist_trans->StartMultiNonAtomic();\n    } else {\n      // Reset to original command id as it's changed during squashing\n      dist_trans->MultiSwitchCmd(exec_cid_);\n    }\n\n    dfly_cntx->transaction = dist_trans.get();\n    MultiCommandSquasher::Opts opts;\n    opts.verify_commands = true;\n    opts.max_squash_size = ss->max_squash_cmd_num;\n\n    stats += MultiCommandSquasher::Execute(absl::MakeSpan(stored_cmds),\n                                           static_cast<RedisReplyBuilder*>(builder), dfly_cntx,\n                                           this, opts);\n    dfly_cntx->transaction = nullptr;\n\n    dispatched += stored_cmds.size();\n    stored_cmds.clear();\n  };\n\n  for (unsigned i = 0; i < count; i++) {\n    ParsedArgs args = arg_gen();\n    string cmd = absl::AsciiStrToUpper(args.Front());\n    const auto [cid, tail_args] = registry_.FindExtended(cmd, args.Tail());\n\n    // MULTI...EXEC commands need to be collected into a single context, so squashing is not\n    // possible\n    const bool is_multi = dfly_cntx->conn_state.exec_info.IsCollecting() ||\n                          (cid != nullptr && cid->MultiControlKind() == CO::MultiControlKind::EXEC);\n\n    // Generally, executing any multi-transactions (including eval) is not possible because they\n    // might request a stricter multi mode than non-atomic which is used for squashing.\n    // TODO: By allowing promoting non-atomic multit transactions to lock-ahead for specific command\n    // invocations, we can potentially execute multiple eval in parallel, which is very powerful\n    // paired with shardlocal eval\n    const bool is_eval = cid != nullptr && cid->MultiControlKind() == CO::MultiControlKind::EVAL;\n    const bool is_blocking = cid != nullptr && cid->IsBlocking();\n\n    if (!is_multi && !is_eval && !is_blocking && cid != nullptr) {\n      stored_cmds.reserve(count);\n      stored_cmds.emplace_back(cid, tail_args);  // Shallow copy\n      continue;\n    }\n\n    // Squash accumulated commands\n    perform_squash();\n\n    // Stop accumulating when a pause is requested, fall back to regular dispatch\n    if (ss->IsPaused())\n      break;\n\n    // Dispatch non squashed command only after all squshed commands were executed and replied\n    DispatchCommand(args, &dummy_cmd_cntx, AsyncPreference::ONLY_SYNC);\n    dispatched++;\n  }\n\n  perform_squash();\n\n  if (dist_trans)\n    dist_trans->UnlockMulti();\n\n  uint64_t total_usec = base::CycleClock::ToUsec(base::CycleClock::Now() - start_cycles);\n  bool account_in_stats = total_usec > squash_stats_latency_lower_limit_cached;\n  if (account_in_stats) {\n    auto* ss = ServerState::tlocal();\n    ss->stats.multi_squash_exec_hop_usec += stats.hop_usec;\n    ss->stats.multi_squash_exec_reply_usec += stats.reply_usec;\n    ss->stats.multi_squash_hops += stats.hops;\n    ss->stats.squashed_commands += stats.squashed_commands;\n  } else {\n    ss->stats.squash_stats_ignored++;\n  }\n  return {.processed = dispatched, .account_in_stats = account_in_stats};\n}\n\nDispatchResult Service::DispatchMC(facade::ParsedCommand* parsed_cmd,\n                                   facade::AsyncPreference apref) {\n  CommandContext* cmd_ctx = static_cast<CommandContext*>(parsed_cmd);\n  const auto& cmd = *parsed_cmd->mc_command();\n\n  auto* cntx = cmd_ctx->server_conn_cntx();\n  DCHECK(cntx->transaction == nullptr);\n\n  string_view cmd_name, cmd_opt;\n  char buffer[absl::numbers_internal::kFastToBufferSize];\n\n  switch (cmd.type) {\n    case MemcacheParser::REPLACE:\n      cmd_name = \"SET\";\n      cmd_opt = \"XX\";\n      break;\n    case MemcacheParser::SET:\n      cmd_name = \"SET\";\n      break;\n    case MemcacheParser::ADD:\n      cmd_name = \"SET\";\n      cmd_opt = \"NX\";\n      break;\n    case MemcacheParser::DELETE:\n      cmd_name = \"DEL\";\n      break;\n    case MemcacheParser::INCR:\n      cmd_name = \"INCRBY\";\n      absl::numbers_internal::FastIntToBuffer(cmd.delta, buffer);\n      cmd_opt = buffer;\n      break;\n    case MemcacheParser::DECR:\n      cmd_name = \"DECRBY\";\n      absl::numbers_internal::FastIntToBuffer(cmd.delta, buffer);\n      cmd_opt = buffer;\n      break;\n    case MemcacheParser::APPEND:\n      cmd_name = \"APPEND\";\n      break;\n    case MemcacheParser::PREPEND:\n      cmd_name = \"PREPEND\";\n      break;\n    case MemcacheParser::GAT:\n    case MemcacheParser::GATS:\n      cmd_name = \"GAT\";\n      break;\n    case MemcacheParser::GET:\n    case MemcacheParser::GETS:\n      cmd_name = \"MGET\";\n      break;\n    case MemcacheParser::FLUSHALL:\n      cmd_name = \"FLUSHDB\";\n      break;\n    case MemcacheParser::QUIT:\n      cmd_name = \"QUIT\";\n      break;\n    case MemcacheParser::STATS:\n      if (apref == AsyncPreference::ONLY_ASYNC)\n        return DispatchResult::WOULD_BLOCK;\n      server_family_.StatsMC(cmd.key(), cmd_ctx);\n      return DispatchResult::OK;\n    case MemcacheParser::VERSION:\n      if (apref == AsyncPreference::ONLY_ASYNC)\n        return DispatchResult::WOULD_BLOCK;\n      cmd_ctx->SendSimpleString(\"VERSION 1.6.0 DF\");\n      return DispatchResult::OK;\n    default:\n      if (apref != AsyncPreference::ONLY_SYNC) {\n        parsed_cmd->SetDeferredReply();\n      }\n      cmd_ctx->SendSimpleString(\"CLIENT_ERROR bad command line format\");\n      return DispatchResult::ERROR;\n  }\n\n  absl::InlinedVector<string_view, 8> args = {cmd_name};\n\n  bool is_store = MemcacheParser::IsStoreCmd(cmd.type);\n  bool is_read = !is_store && cmd.type < MemcacheParser::QUIT;\n  if (!is_read) {\n    if (!cmd.backed_args->empty())\n      args.emplace_back(cmd.key());\n\n    if (is_store)\n      args.emplace_back(cmd.value());\n    if (!cmd_opt.empty())\n      args.emplace_back(cmd_opt);\n\n    if (cmd.expire_ts && cmd_name == \"SET\") {\n      args.emplace_back(\"EXAT\");\n      absl::numbers_internal::FastIntToBuffer(cmd.expire_ts, buffer);\n      args.emplace_back(buffer);\n    }\n  } else {  // is_read\n    args.insert(args.end(), cmd.backed_args->begin(), cmd.backed_args->end());\n  }\n\n  return DispatchCommand(ParsedArgs{args}, parsed_cmd, apref);\n}\n\nErrorReply Service::ReportUnknownCmd(string_view cmd_name) {\n  constexpr uint8_t kMaxUknownCommands = 64;\n  constexpr uint8_t kMaxUknownCommandLength = 20;\n\n  lock_guard lk(mu_);\n  if (unknown_cmds_.size() <= kMaxUknownCommands && cmd_name.size() <= kMaxUknownCommandLength)\n    unknown_cmds_[cmd_name]++;\n\n  return ErrorReply{StrCat(\"unknown command `\", cmd_name, \"`\"), \"unknown_cmd\"};\n}\n\nbool RequirePrivilegedAuth() {\n  return !GetFlag(FLAGS_admin_nopass);\n}\n\nfacade::ConnectionContext* Service::CreateContext(facade::Connection* owner) {\n  auto cred = user_registry_.GetCredentials(\"default\");\n  ConnectionContext* res = new ConnectionContext{owner, std::move(cred)};\n  res->ns = &namespaces->GetOrInsert(\"\");\n\n  if (owner->socket()->IsUDS()) {\n    res->req_auth = false;\n    res->skip_acl_validation = true;\n  } else if (owner->IsPrivileged() && RequirePrivilegedAuth()) {\n    res->req_auth = !GetPassword().empty();\n  } else if (!owner->IsPrivileged()) {\n    // Memcached protocol doesn't support authentication, so we don't require it\n    if (owner->GetProtocol() == Protocol::MEMCACHE) {\n      res->req_auth = false;\n      res->authenticated = true;  // Automatically authenticated for Memcached protocol\n    } else {\n      res->req_auth = !user_registry_.AuthUser(\"default\", \"\");\n    }\n  }\n\n  // a bit of a hack. I set up breaker callback here for the owner.\n  // Should work though it's confusing to have it here.\n  owner->RegisterBreakHook([res](uint32_t) {\n    if (res->transaction)\n      res->transaction->CancelBlocking(nullptr);\n  });\n\n  return res;\n}\n\nfacade::ParsedCommand* Service::AllocateParsedCommand() {\n  return new CommandContext{};\n}\n\nconst CommandId* Service::FindCmd(std::string_view cmd) const {\n  return registry_.Find(registry_.RenamedOrOriginal(cmd));\n}\n\nbool Service::IsLocked(Namespace* ns, DbIndex db_index, std::string_view key) const {\n  ShardId sid = Shard(key, shard_count());\n  bool is_open = pp_.at(sid)->AwaitBrief([db_index, key, ns, sid] {\n    return ns->GetDbSlice(sid).CheckLock(IntentLock::EXCLUSIVE, db_index, key);\n  });\n  return !is_open;\n}\n\nbool Service::IsShardSetLocked() const {\n  std::atomic_uint res{0};\n\n  shard_set->RunBriefInParallel([&](EngineShard* shard) {\n    bool unlocked = shard->shard_lock()->Check(IntentLock::SHARED);\n    res.fetch_add(!unlocked, memory_order_relaxed);\n  });\n\n  return res.load() != 0;\n}\n\nabsl::flat_hash_map<std::string, unsigned> Service::UknownCmdMap() const {\n  lock_guard lk(mu_);\n  return unknown_cmds_;\n}\n\nvoid Service::Quit(CmdArgList args, CommandContext* cmd_cntx) {\n  if (cmd_cntx->rb()->GetProtocol() == Protocol::REDIS)\n    cmd_cntx->rb()->SendOk();\n\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  DeactivateMonitoring(cntx);\n  cmd_cntx->conn()->MarkForClose();\n}\n\nvoid Service::Multi(CmdArgList args, CommandContext* cmd_cntx) {\n  auto& conn_state = cmd_cntx->server_conn_cntx()->conn_state;\n  if (conn_state.exec_info.IsCollecting()) {\n    return cmd_cntx->SendError(\"MULTI calls can not be nested\");\n  }\n  conn_state.exec_info.state = ConnectionState::ExecInfo::EXEC_COLLECT;\n  // TODO: to protect against huge exec transactions.\n  return cmd_cntx->rb()->SendOk();\n}\n\nvoid Service::Watch(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  auto& exec_info = cntx->conn_state.exec_info;\n\n  // Skip if EXEC will already fail due previous WATCH.\n  if (exec_info.watched_dirty.load(memory_order_relaxed)) {\n    return cmd_cntx->rb()->SendOk();\n  }\n\n  atomic_uint32_t keys_existed = 0;\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    ShardId shard_id = shard->shard_id();\n    ShardArgs largs = t->GetShardArgs(shard_id);\n    for (auto k : largs) {\n      t->GetDbSlice(shard_id).RegisterWatchedKey(cntx->db_index(), k, &exec_info.watched_dirty);\n    }\n\n    auto res = GenericFamily::OpExists(t->GetOpArgs(shard), largs);\n    keys_existed.fetch_add(res.value_or(0), memory_order_relaxed);\n    return OpStatus::OK;\n  };\n  cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n\n  // Duplicate keys are stored to keep correct count.\n  exec_info.watched_existed += keys_existed.load(memory_order_relaxed);\n  for (string_view key : args) {\n    exec_info.watched_keys.emplace_back(cntx->db_index(), key);\n  }\n\n  return cmd_cntx->rb()->SendOk();\n}\n\nvoid Service::Unwatch(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  UnwatchAllKeys(cntx->ns, &cntx->conn_state.exec_info);\n  return cmd_cntx->rb()->SendOk();\n}\n\noptional<CapturingReplyBuilder::Payload> Service::FlushEvalAsyncCmds(ConnectionContext* cntx,\n                                                                     bool force) {\n  auto& info = cntx->conn_state.script_info;\n  auto* tx = cntx->transaction;\n  size_t used_mem = info->async_cmds_heap_mem + info->async_cmds.size() * sizeof(StoredCmd);\n\n  if ((info->async_cmds.empty() || !force) && used_mem < info->async_cmds_heap_limit)\n    return nullopt;\n\n  ++ServerState::tlocal()->stats.eval_squashed_flushes;\n\n  auto* eval_cid = registry_.Find(\"EVAL\");\n  DCHECK(eval_cid);\n  tx->MultiSwitchCmd(eval_cid);\n\n  CapturingReplyBuilder crb{ReplyMode::ONLY_ERR};\n  MultiCommandSquasher::Opts opts;\n  opts.verify_commands = true;\n  opts.error_abort = true;\n  opts.max_squash_size = ServerState::tlocal()->max_squash_cmd_num;\n  MultiCommandSquasher::Execute(absl::MakeSpan(info->async_cmds), &crb, cntx, this, opts);\n\n  info->async_cmds_heap_mem = 0;\n  info->async_cmds.clear();\n\n  auto reply = crb.Take();\n  return CapturingReplyBuilder::TryExtractError(reply) ? make_optional(std::move(reply)) : nullopt;\n}\n\nvoid Service::CallFromScript(Interpreter::CallArgs& ca, CommandContext* cmd_cntx) {\n  auto* tx = cmd_cntx->tx();\n  DCHECK(tx);\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  auto& info = cntx->conn_state.script_info;\n  info->stats.num_commands++;\n\n  InterpreterReplier replier(ca.translator);\n  optional<ErrorReply> findcmd_err;\n  if (ca.async) {\n    string cmd = absl::AsciiStrToUpper(ca.args[0]);\n\n    // Full command verification happens during squashed execution\n    if (auto* cid = registry_.Find(cmd); cid != nullptr) {\n      auto reply_mode = ca.error_abort ? ReplyMode::ONLY_ERR : ReplyMode::NONE;\n      info->async_cmds.emplace_back(cid, ca.args.subspan(1), reply_mode);\n      info->async_cmds_heap_mem += info->async_cmds.back().UsedMemory();\n    } else if (ca.error_abort) {  // If we don't abort on errors, we can ignore it completely\n      findcmd_err = ReportUnknownCmd(ca.args[0]);\n    }\n  }\n\n  if (auto err = FlushEvalAsyncCmds(cntx, !ca.async || findcmd_err.has_value()); err) {\n    CapturingReplyBuilder::Apply(std::move(*err), &replier);  // forward error to lua\n    *ca.requested_abort = true;\n    return;\n  }\n\n  if (findcmd_err.has_value()) {\n    auto* prev = cmd_cntx->SwapReplier(&replier);\n    cmd_cntx->SendError(*findcmd_err);\n    *ca.requested_abort |= ca.error_abort;\n    cmd_cntx->SwapReplier(prev);\n  }\n\n  if (ca.async)\n    return;\n\n  auto* prev = cmd_cntx->SwapReplier(&replier);\n  DispatchCommand(ParsedArgs{ca.args}, cmd_cntx, AsyncPreference::ONLY_SYNC);\n  cmd_cntx->SwapReplier(prev);\n}\n\nvoid Service::Eval(CmdArgList args, CommandContext* cmd_cntx, bool read_only) {\n  string_view body = ArgS(args, 0);\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (body.empty()) {\n    return rb->SendNull();\n  }\n\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  BorrowedInterpreter interpreter{cmd_cntx->tx(), &cntx->conn_state};\n  auto res = server_family_.script_mgr()->Insert(body, interpreter);\n  if (!res)\n    return cmd_cntx->SendError(res.error().Format(), facade::kScriptErrType);\n\n  string sha{std::move(res.value())};\n\n  CallSHA(args, sha, interpreter, read_only, cmd_cntx);\n}\n\nvoid Service::EvalRo(CmdArgList args, CommandContext* cmd_cntx) {\n  Eval(args, cmd_cntx, true);\n}\n\nvoid Service::EvalSha(CmdArgList args, CommandContext* cmd_cntx, bool read_only) {\n  string sha = absl::AsciiStrToLower(ArgS(args, 0));\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  BorrowedInterpreter interpreter{cmd_cntx->tx(), &cntx->conn_state};\n  CallSHA(args, sha, interpreter, read_only, cmd_cntx);\n}\n\nvoid Service::EvalShaRo(CmdArgList args, CommandContext* cmd_cntx) {\n  EvalSha(args, cmd_cntx, true);\n}\n\nvoid Service::CallSHA(CmdArgList args, string_view sha, Interpreter* interpreter, bool read_only,\n                      CommandContext* cmd_cntx) {\n  uint32_t num_keys;\n  CHECK(absl::SimpleAtoi(ArgS(args, 1), &num_keys));  // we already validated this\n\n  EvalArgs ev_args;\n  ev_args.sha = sha;\n  ev_args.keys = args.subspan(2, num_keys);\n  ev_args.args = args.subspan(2 + num_keys);\n\n  uint64_t start = absl::GetCurrentTimeNanos();\n  EvalInternal(args, ev_args, interpreter, read_only, cmd_cntx);\n\n  uint64_t end = absl::GetCurrentTimeNanos();\n  ServerState::tlocal()->RecordCallLatency(sha, (end - start) / 1000);\n}\n\nvoid LoadScript(string_view sha, ScriptMgr* script_mgr, Interpreter* interpreter) {\n  if (interpreter->Exists(sha))\n    return;\n\n  auto script_data = script_mgr->Find(sha);\n  if (!script_data) {\n    LOG(DFATAL) << \"Script \" << sha << \" not found in script mgr\";\n    return;\n  }\n\n  string err;\n  Interpreter::AddResult add_res = interpreter->AddFunction(sha, script_data->body, &err);\n  if (add_res != Interpreter::ADD_OK) {\n    LOG(DFATAL) << \"Error adding \" << sha << \" to database, err \" << err;\n  }\n}\n\n// Determine multi mode based on script params.\nTransaction::MultiMode DetermineMultiMode(ScriptMgr::ScriptParams params) {\n  if (params.atomic && params.undeclared_keys)\n    return Transaction::GLOBAL;\n  else if (params.atomic)\n    return Transaction::LOCK_AHEAD;\n  else\n    return Transaction::NON_ATOMIC;\n}\n\n// Starts multi transaction. Returns true if transaction was scheduled.\n// Skips scheduling if multi mode requires declaring keys, but no keys were declared.\nbool StartMulti(ConnectionContext* cntx, Transaction::MultiMode tx_mode, CmdArgList keys) {\n  Transaction* tx = cntx->transaction;\n  DCHECK(tx);\n  Namespace* ns = cntx->ns;\n  const DbIndex dbid = cntx->db_index();\n\n  switch (tx_mode) {\n    case Transaction::GLOBAL:\n      tx->StartMultiGlobal(ns, dbid);\n      return true;\n    case Transaction::LOCK_AHEAD:\n      if (keys.empty())\n        return false;\n      tx->StartMultiLockedAhead(ns, dbid, keys);\n      return true;\n    case Transaction::NON_ATOMIC:\n      tx->StartMultiNonAtomic();\n      return true;\n    default:\n      LOG(FATAL) << \"Invalid mode\";\n  };\n\n  return false;\n}\n\n// `multi_mode` is the deduced multi mode that is not yet set on the transaction\nstatic bool CanRunSingleShardMulti(bool one_shard, Transaction::MultiMode multi_mode,\n                                   const Transaction& tx) {\n  if (tx.GetMultiMode() != Transaction::NOT_DETERMINED) {\n    // We may be running EVAL under MULTI. Currently RunSingleShardMulti() will attempt to lock\n    // keys, in which case will be already locked by MULTI. We could optimize this path as well\n    // though.\n    return false;\n  }\n\n  // If we have only a single shard, we can run a global command without hops\n  if (shard_set->size() == 1 && multi_mode == Transaction::GLOBAL)\n    return true;\n\n  return one_shard && multi_mode == Transaction::LOCK_AHEAD;\n}\n\nvoid Service::EvalInternal(CmdArgList args, const EvalArgs& eval_args, Interpreter* interpreter,\n                           bool read_only, CommandContext* cmd_cntx) {\n  const static size_t kShaSize = 40;\n  static_assert(sizeof(ConnectionState::ScriptInfo::Stats::sha) == kShaSize);\n\n  // Sanitizing the input to avoid code injection.\n  if (eval_args.sha.size() != kShaSize || !IsSHA(eval_args.sha)) {\n    return cmd_cntx->SendError(facade::kScriptNotFound);\n  }\n\n  auto* ss = ServerState::tlocal();\n  auto params = ss->GetScriptParams(eval_args.sha);\n  if (!params) {\n    return cmd_cntx->SendError(facade::kScriptNotFound);\n  }\n\n  LoadScript(eval_args.sha, server_family_.script_mgr(), interpreter);\n\n  string error;\n  auto* conn_cntx = cmd_cntx->server_conn_cntx();\n  DCHECK(!conn_cntx->conn_state.script_info);  // we should not call eval from the script.\n\n  // TODO: to determine whether the script is RO by scanning all \"redis.p?call\" calls\n  // and checking whether all invocations consist of RO commands.\n  // we can do it once during script insertion into script mgr.\n  auto& sinfo = conn_cntx->conn_state.script_info;\n  sinfo = make_unique<ConnectionState::ScriptInfo>();\n  sinfo->lock_tags.reserve(eval_args.keys.size());\n  sinfo->read_only = read_only;\n  memcpy(sinfo->stats.sha, eval_args.sha.data(), eval_args.sha.size());\n\n  optional<ShardId> sid{nullopt};\n  UniqueSlotChecker slot_checker;\n  for (size_t i = 0; i < eval_args.keys.size(); ++i) {\n    string_view key = ArgS(eval_args.keys, i);\n    slot_checker.Add(key);\n    sinfo->lock_tags.insert(LockTag(key));\n\n    ShardId cur_sid = Shard(key, shard_count());\n    if (i == 0) {\n      sid = cur_sid;\n    }\n    if (sid.has_value() && *sid != cur_sid) {\n      sid = nullopt;\n    }\n  }\n\n  sinfo->async_cmds_heap_limit = GetFlag(FLAGS_multi_eval_squash_buffer);\n  Transaction* tx = cmd_cntx->tx();\n  CHECK(tx != nullptr);\n\n  Interpreter::RunResult result;\n  Transaction::MultiMode script_mode = DetermineMultiMode(*params);\n\n  interpreter->SetGlobalArray(\"KEYS\", eval_args.keys);\n  interpreter->SetGlobalArray(\"ARGV\", eval_args.args);\n\n  // Reset cid to EVAL[] as the context is reused during command dispatch\n  absl::Cleanup clean = [interpreter, cmd_cntx, cid = cmd_cntx->cid()]() {\n    interpreter->ResetStack();\n    cmd_cntx->SetupTx(cid, cmd_cntx->tx());\n  };\n\n  if (CanRunSingleShardMulti(sid.has_value(), script_mode, *tx)) {\n    sinfo->stats.tx_shards = 1;\n    // It might be that there are no declared keys, but there is only a single shard\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wmaybe-uninitialized\"\n    DCHECK(sid.has_value() || shard_set->size() == 1);\n    ShardId real_sid = sid.value_or(ShardId(0));\n#pragma GCC diagnostic pop\n\n    // If script runs on a single shard, we run it remotely to save hops.\n    interpreter->SetRedisFunc([cmd_cntx, this](Interpreter::CallArgs args) {\n      // Disable squashing, as we're using the squashing mechanism to run remotely.\n      args.async = false;\n      CallFromScript(args, cmd_cntx);\n    });\n\n    ++ss->stats.eval_shardlocal_coordination_cnt;\n    tx->PrepareSingleSquash(conn_cntx->ns, real_sid, conn_cntx->db_index(), eval_args.keys,\n                            script_mode);\n\n    tx->ScheduleSingleHop([&](Transaction*, EngineShard*) {\n      boost::intrusive_ptr<Transaction> stub_tx =\n          new Transaction{tx, real_sid, slot_checker.GetUniqueSlotId()};\n      conn_cntx->transaction = stub_tx.get();\n\n      result = interpreter->RunFunction(eval_args.sha, &error);\n\n      conn_cntx->transaction = tx;\n      return OpStatus::OK;\n    });\n\n    // Migration only makes sense if there are distinct shards\n    if (sid.has_value() && *sid != ss->thread_index()) {\n      VLOG(2) << \"Migrating connection \" << conn_cntx->conn() << \" from \"\n              << ProactorBase::me()->GetPoolIndex() << \" to \" << real_sid;\n      conn_cntx->conn()->RequestAsyncMigration(shard_set->pool()->at(real_sid), false);\n    }\n  } else {\n    Transaction::MultiMode tx_mode = tx->GetMultiMode();\n    bool scheduled = false;\n\n    // Check if eval is already part of a running multi transaction\n    if (tx_mode != Transaction::NOT_DETERMINED) {\n      if (tx_mode > script_mode) {\n        string err = StrCat(\n            \"Multi mode conflict when running eval in multi transaction. Multi mode is: \", tx_mode,\n            \" eval mode is: \", script_mode);\n        return cmd_cntx->SendError(err);\n      }\n    } else {\n      scheduled = StartMulti(conn_cntx, script_mode, eval_args.keys);\n      sinfo->stats.tx_shards = tx->GetUniqueShardCnt();\n    }\n\n    ++ss->stats.eval_io_coordination_cnt;\n    interpreter->SetRedisFunc(\n        [cmd_cntx, this](Interpreter::CallArgs args) { CallFromScript(args, cmd_cntx); });\n\n    result = interpreter->RunFunction(eval_args.sha, &error);\n\n    if (auto err = FlushEvalAsyncCmds(conn_cntx, true); err) {\n      auto err_ref = CapturingReplyBuilder::TryExtractError(*err);\n      result = Interpreter::RUN_ERR;\n      error = absl::StrCat(err_ref->first);\n    }\n\n    // Conclude the transaction.\n    if (scheduled)\n      tx->UnlockMulti();\n  }\n\n  sinfo->stats.tx_mode = script_mode;\n\n  if (result == Interpreter::RUN_ERR) {\n    string resp = StrCat(\"Error running script (call to \", eval_args.sha, \"): \", error);\n    server_family_.script_mgr()->OnScriptError(eval_args.sha, error);\n    return cmd_cntx->SendError(resp, facade::kScriptErrType);\n  }\n\n  CHECK(result == Interpreter::RUN_OK);\n\n  // TODO(vlad): Investigate if using ReplyScope here is possible with a different serialization\n  // strategy due to currently SerializeResult destructuring a value while serializing\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  SinkReplyBuilder::ReplyAggregator agg(builder);\n  EvalSerializer ser{builder, params->float_as_int};\n  if (!interpreter->IsResultSafe()) {\n    builder->SendError(\"reached lua stack limit\");\n  } else {\n    interpreter->SerializeResult(&ser);\n  }\n}\n\nvoid Service::Discard(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  if (!cntx->conn_state.exec_info.IsCollecting()) {\n    return rb->SendError(\"DISCARD without MULTI\");\n  }\n\n  rb->SendOk();\n}\n\n// Return true if non of the connections watched keys expired.\nbool CheckWatchedKeyExpiry(ConnectionContext* cntx, const CommandId* exists_cid,\n                           const CommandId* exec_cid) {\n  auto& exec_info = cntx->conn_state.exec_info;\n  auto* tx = cntx->transaction;\n\n  CmdArgVec str_list(exec_info.watched_keys.size());\n  for (size_t i = 0; i < str_list.size(); i++) {\n    auto& [db, s] = exec_info.watched_keys[i];\n    str_list[i] = MutableSlice{s.data(), s.size()};\n  }\n\n  atomic_uint32_t watch_exist_count{0};\n  auto cb = [&watch_exist_count](Transaction* t, EngineShard* shard) {\n    ShardArgs args = t->GetShardArgs(shard->shard_id());\n    auto res = GenericFamily::OpExists(t->GetOpArgs(shard), args);\n    watch_exist_count.fetch_add(res.value_or(0), memory_order_relaxed);\n\n    return OpStatus::OK;\n  };\n\n  tx->MultiSwitchCmd(exists_cid);\n  tx->InitByArgs(cntx->ns, cntx->conn_state.db_index, CmdArgList{str_list});\n  OpStatus status = tx->ScheduleSingleHop(std::move(cb));\n  CHECK_EQ(OpStatus::OK, status);\n\n  // Reset cid to EXEC as it was before\n  tx->MultiSwitchCmd(exec_cid);\n\n  // The comparison can still be true even if a key expired due to another one being created.\n  // So we have to check the watched_dirty flag, which is set if a key expired.\n  return watch_exist_count.load() == exec_info.watched_existed &&\n         !exec_info.watched_dirty.load(memory_order_relaxed);\n}\n\n// Check if exec_info watches keys on dbs other than db_indx.\nbool IsWatchingOtherDbs(DbIndex db_indx, const ConnectionState::ExecInfo& exec_info) {\n  return std::any_of(exec_info.watched_keys.begin(), exec_info.watched_keys.end(),\n                     [db_indx](const auto& pair) { return pair.first != db_indx; });\n}\n\ntemplate <typename F> void IterateAllKeys(const ConnectionState::ExecInfo* exec_info, F&& f) {\n  for (auto& [dbid, key] : exec_info->watched_keys)\n    f(MutableSlice{key.data(), key.size()});\n\n  CmdArgVec arg_vec{};\n\n  for (const auto& scmd : exec_info->body) {\n    if (!scmd.Cid()->IsTransactional())\n      continue;\n\n    auto args = scmd.Slice(&arg_vec);\n    auto key_res = DetermineKeys(scmd.Cid(), args);\n    if (!key_res.ok())\n      continue;\n\n    for (unsigned i : key_res->Range())\n      f(arg_vec[i]);\n  }\n}\n\nCmdArgVec CollectAllKeys(ConnectionState::ExecInfo* exec_info) {\n  CmdArgVec out;\n  out.reserve(exec_info->watched_keys.size() + exec_info->body.size());\n\n  IterateAllKeys(exec_info, [&out](MutableSlice key) { out.push_back(key); });\n\n  return out;\n}\n\nvoid Service::Exec(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  auto& exec_info = cntx->conn_state.exec_info;\n\n  if (exec_info.state == ConnectionState::ExecInfo::EXEC_ERROR) {\n    return rb->SendError(\"-EXECABORT Transaction discarded because of previous errors\");\n  }\n\n  // Check basic invariants\n  if (!exec_info.IsCollecting()) {\n    return rb->SendError(\"EXEC without MULTI\");\n  }\n\n  if (IsWatchingOtherDbs(cntx->db_index(), exec_info)) {\n    return rb->SendError(\"Dragonfly does not allow WATCH and EXEC on different databases\");\n  }\n\n  if (exec_info.watched_dirty.load(memory_order_relaxed)) {\n    return rb->SendNull();\n  }\n\n  auto keys = CollectAllKeys(&exec_info);\n  if (IsClusterEnabled()) {\n    UniqueSlotChecker slot_checker;\n    for (const auto& s : keys) {\n      slot_checker.Add(s);\n    }\n\n    if (slot_checker.IsCrossSlot()) {\n      return rb->SendError(kCrossSlotError);\n    }\n  }\n\n  // The transaction can contain script load script execution, determine their presence ahead to\n  // customize logic below.\n  ExecScriptUse state = DetermineScriptPresense(exec_info.body);\n\n  // We borrow a single interpreter for all the EVALs/Script load inside. Returned by MultiCleanup\n  if (state != ExecScriptUse::NONE) {\n    exec_info.preborrowed_interpreter =\n        BorrowedInterpreter(cmd_cntx->tx(), &cntx->conn_state).Release();\n  }\n\n  // Determine according multi mode, not only only flag, but based on presence of global commands\n  // and scripts\n  Transaction::MultiMode multi_mode = DeduceExecMode(state, exec_info, *script_mgr());\n\n  bool scheduled = false;\n  if (multi_mode != Transaction::NOT_DETERMINED) {\n    scheduled = StartMulti(cntx, multi_mode, keys);\n  }\n\n  // EXEC should not run if any of the watched keys expired.\n  if (!exec_info.watched_keys.empty() &&\n      !CheckWatchedKeyExpiry(cntx, registry_.Find(\"EXISTS\"), exec_cid_)) {\n    cmd_cntx->tx()->UnlockMulti();\n    return rb->SendNull();\n  }\n\n  exec_info.state = ConnectionState::ExecInfo::EXEC_RUNNING;\n\n  VLOG(2) << \"StartExec \" << exec_info.body.size();\n\n  // Make sure we flush whatever responses we aggregated in the reply builder.\n  SinkReplyBuilder::ReplyAggregator agg(rb);\n  rb->StartArray(exec_info.body.size());\n\n  if (!exec_info.body.empty()) {\n    string descr = CreateExecDescriptor(exec_info.body, cmd_cntx->tx()->GetUniqueShardCnt());\n    ServerState::tlocal()->exec_freq_count[descr]++;\n\n    if (GetFlag(FLAGS_multi_exec_squash) && state != ExecScriptUse::SCRIPT_RUN &&\n        !cntx->conn_state.tracking_info_.IsTrackingOn()) {\n      MultiCommandSquasher::Opts opts;\n      opts.max_squash_size = ServerState::tlocal()->max_squash_cmd_num;\n      MultiCommandSquasher::Execute(absl::MakeSpan(exec_info.body), rb, cntx, this, opts);\n    } else {\n      CmdArgVec arg_vec;\n      DCHECK_EQ(cmd_cntx->cid(), exec_cid_);\n\n      for (const auto& scmd : exec_info.body) {\n        CmdArgList args = scmd.Slice(&arg_vec);\n\n        if (scmd.Cid()->IsTransactional()) {\n          cmd_cntx->tx()->MultiSwitchCmd(scmd.Cid());\n          OpStatus st = cmd_cntx->tx()->InitByArgs(cntx->ns, cntx->conn_state.db_index, args);\n          if (st != OpStatus::OK) {\n            cmd_cntx->SendError(st);\n            break;\n          }\n        }\n\n        // TODO: we will have to create a CommandContext per command if we want to support async\n        // execution inside exec.\n        cmd_cntx->UpdateCid(scmd.Cid());\n        auto invoke_res = InvokeCmd(args, cmd_cntx);\n        if ((invoke_res != DispatchResult::OK) ||\n            rb->GetError())  // checks for i/o error, not logical error.\n          break;\n      }\n      cmd_cntx->UpdateCid(exec_cid_);\n    }\n  }\n\n  if (scheduled) {\n    VLOG(2) << \"Exec unlocking \" << exec_info.body.size() << \" commands\";\n    cmd_cntx->tx()->UnlockMulti();\n  }\n\n  // Dispatch at the end manually to have (MULTI, cmds..., EXEC) order\n  if (!ServerState::tlocal()->Monitors().Empty()) {\n    LOG_IF(DFATAL, exec_cid_->opt_mask() & CO::ADMIN) << \"EXEC should be non admin command\";\n    DispatchMonitor(cntx, exec_cid_, args);\n  }\n\n  VLOG(2) << \"Exec completed\";\n}\n\nvoid Service::Publish(CmdArgList args, CommandContext* cmd_cntx) {\n  bool sharded = cmd_cntx->cid()->PubSubKind() == CO::PubSubKind::SHARDED;\n  if (!sharded && IsClusterEnabled())\n    return cmd_cntx->SendError(\"PUBLISH is not supported in cluster mode yet\");\n\n  string_view channel = ArgS(args, 0);\n  string_view messages[] = {ArgS(args, 1)};\n\n  auto* cs = ServerState::tlocal()->channel_store();\n  cmd_cntx->SendLong(cs->SendMessages(channel, messages, sharded));\n}\n\nvoid Service::Subscribe(CmdArgList args, CommandContext* cmd_cntx) {\n  bool sharded = cmd_cntx->cid()->PubSubKind() == CO::PubSubKind::SHARDED;\n  if (!sharded && IsClusterEnabled())\n    return cmd_cntx->SendError(\"SUBSCRIBE is not supported in cluster mode yet\");\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  auto* conn_cntx = cmd_cntx->server_conn_cntx();\n  conn_cntx->ChangeSubscription(true /*add*/, true /* reply*/, sharded, args, rb);\n}\n\nvoid Service::Unsubscribe(CmdArgList args, CommandContext* cmd_cntx) {\n  bool sharded = cmd_cntx->cid()->PubSubKind() == CO::PubSubKind::SHARDED;\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  auto* conn_cntx = cmd_cntx->server_conn_cntx();\n  if (!sharded && IsClusterEnabled())\n    return rb->SendError(\"UNSUBSCRIBE is not supported in cluster mode yet\");\n\n  if (args.size() == 0) {\n    conn_cntx->UnsubscribeAll(true, rb);\n  } else {\n    conn_cntx->ChangeSubscription(false, true, sharded, args, rb);\n  }\n}\n\nvoid Service::PSubscribe(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (IsClusterEnabled()) {\n    return rb->SendError(\"PSUBSCRIBE is not supported in cluster mode yet\");\n  }\n  cmd_cntx->server_conn_cntx()->ChangePSubscription(true, true, args, rb);\n}\n\nvoid Service::PUnsubscribe(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (IsClusterEnabled()) {\n    return rb->SendError(\"PUNSUBSCRIBE is not supported in cluster mode yet\");\n  }\n  auto* conn_cntx = cmd_cntx->server_conn_cntx();\n  if (args.size() == 0) {\n    conn_cntx->PUnsubscribeAll(true, rb);\n  } else {\n    conn_cntx->ChangePSubscription(false, true, args, rb);\n  }\n}\n\n// Not a real implementation. Serves as a decorator to accept some function commands\n// for testing.\nvoid Service::Function(CmdArgList args, CommandContext* cmd_cntx) {\n  string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n\n  if (sub_cmd == \"FLUSH\") {\n    return cmd_cntx->rb()->SendOk();\n  }\n\n  string err = UnknownSubCmd(sub_cmd, \"FUNCTION\");\n  return cmd_cntx->SendError(err, kSyntaxErrType);\n}\n\nvoid Service::PubsubChannels(string_view pattern, SinkReplyBuilder* builder) {\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  rb->SendBulkStrArr(ServerState::tlocal()->channel_store()->ListChannels(pattern));\n}\n\nvoid Service::PubsubPatterns(SinkReplyBuilder* builder) {\n  size_t pattern_count = ServerState::tlocal()->channel_store()->PatternCount();\n  builder->SendLong(pattern_count);\n}\n\nvoid Service::PubsubNumSub(CmdArgList args, SinkReplyBuilder* builder) {\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  rb->StartArray(args.size() * 2);\n  for (string_view channel : args) {\n    rb->SendBulkString(channel);\n    rb->SendLong(ServerState::tlocal()->channel_store()->FetchSubscribers(channel).size());\n  }\n}\n\nvoid Service::Monitor(CmdArgList args, CommandContext* cmd_cntx) {\n  VLOG(1) << \"starting monitor on this connection: \"\n          << cmd_cntx->server_conn_cntx()->conn()->GetClientId();\n  // we are registering the current connection for all threads so they will be aware of\n  // this connection, to send to it any command\n  cmd_cntx->rb()->SendOk();\n  cmd_cntx->server_conn_cntx()->ChangeMonitor(true /* start */);\n}\n\nvoid Service::Pubsub(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (args.size() < 1) {\n    rb->SendError(WrongNumArgsError(cmd_cntx->cid()->name()));\n    return;\n  }\n\n  string subcmd = absl::AsciiStrToUpper(ArgS(args, 0));\n\n  if (subcmd == \"HELP\") {\n    string_view help_arr[] = {\n        \"PUBSUB <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n        \"CHANNELS [<pattern>]\",\n        \"\\tReturn the currently active channels matching a <pattern> (default: '*').\",\n        \"NUMPAT\",\n        \"\\tReturn number of subscriptions to patterns.\",\n        \"NUMSUB [<channel> <channel...>]\",\n        \"\\tReturns the number of subscribers for the specified channels, excluding\",\n        \"\\tpattern subscriptions.\",\n        \"SHARDCHANNELS [pattern]\",\n        \"\\tReturns a list of active shard channels, optionally matching the specified pattern \",\n        \"(default: '*').\",\n        \"SHARDNUMSUB [<channel> <channel...>]\",\n        \"\\tReturns the number of subscribers for the specified shard channels, excluding\",\n        \"\\tpattern subscriptions.\",\n        \"HELP\",\n        \"\\tPrints this help.\"};\n\n    rb->SendSimpleStrArr(help_arr);\n    return;\n  }\n\n  // Don't allow SHARD subcommands in non cluster mode\n  if (!IsClusterEnabledOrEmulated() && ((subcmd == \"SHARDCHANNELS\") || (subcmd == \"SHARDNUMSUB\"))) {\n    auto err = absl::StrCat(\"PUBSUB \", subcmd, \" is not supported in non cluster mode\");\n    return rb->SendError(err);\n  }\n\n  if (subcmd == \"CHANNELS\" || subcmd == \"SHARDCHANNELS\") {\n    string_view pattern;\n    if (args.size() > 1) {\n      pattern = ArgS(args, 1);\n    }\n    PubsubChannels(pattern, rb);\n  } else if (subcmd == \"NUMPAT\") {\n    PubsubPatterns(rb);\n  } else if (subcmd == \"NUMSUB\" || subcmd == \"SHARDNUMSUB\") {\n    args.remove_prefix(1);\n    PubsubNumSub(args, rb);\n  } else {\n    rb->SendError(UnknownSubCmd(subcmd, \"PUBSUB\"));\n  }\n}\n\nvoid Service::Command(CmdArgList args, CommandContext* cmd_cntx) {\n  unsigned cmd_cnt = 0;\n  registry_.Traverse([&](string_view name, const CommandId& cd) {\n    if ((cd.opt_mask() & CO::HIDDEN) == 0) {\n      ++cmd_cnt;\n    }\n  });\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  auto serialize_command = [rb, this](string_view name, const CommandId& cid) {\n    rb->StartArray(7);\n    rb->SendSimpleString(cid.name());\n    rb->SendLong(cid.arity());\n\n    vector<string> opts;\n    for (uint32_t i = 0; i < 32; i++) {\n      unsigned obit = (1u << i);\n      if (auto name = CommandOptName(CO::CommandOpt{obit}, cid.opt_mask() & obit); !name.empty())\n        opts.emplace_back(name);\n    }\n    rb->SendSimpleStrArr(opts);\n\n    rb->SendLong(cid.first_key_pos());\n    rb->SendLong(cid.last_key_pos());\n    rb->SendLong(cid.interleaved_step() ? cid.interleaved_step() : 1);\n\n    {\n      const auto& table = acl_family_.GetRevTable();\n      vector<string> cats;\n      for (uint32_t i = 0; i < 32; i++) {\n        if (cid.acl_categories() & (1 << i)) {\n          cats.emplace_back(\"@\" + table[i]);\n        }\n      }\n      rb->SendSimpleStrArr(cats);\n    }\n  };\n\n  // If no arguments are specified, reply with all commands\n  if (args.empty()) {\n    rb->StartArray(cmd_cnt);\n    registry_.Traverse([&](string_view name, const CommandId& cid) {\n      if (cid.opt_mask() & CO::HIDDEN)\n        return;\n      serialize_command(name, cid);\n    });\n    return;\n  }\n\n  string subcmd = absl::AsciiStrToUpper(ArgS(args, 0));\n\n  // COUNT\n  if (subcmd == \"COUNT\") {\n    return rb->SendLong(cmd_cnt);\n  }\n\n  bool sufficient_args = (args.size() == 2);\n\n  // INFO [cmd]\n  if (subcmd == \"INFO\" && sufficient_args) {\n    string cmd = absl::AsciiStrToUpper(ArgS(args, 1));\n\n    if (const auto* cid = registry_.Find(cmd); cid) {\n      rb->StartArray(1);\n      serialize_command(cmd, *cid);\n    } else {\n      rb->SendNull();\n    }\n\n    return;\n  }\n\n  sufficient_args = (args.size() == 1);\n  if (subcmd == \"DOCS\" && sufficient_args) {\n    // Returning an error here forces the interactive CLI client to fall back to static hints and\n    // tab completion\n    return rb->SendError(\"COMMAND DOCS Not Implemented\");\n  }\n\n  if (subcmd == \"HELP\" && sufficient_args) {\n    // Return help information for supported COMMAND subcommands\n    constexpr string_view help[] = {\n        \"(no subcommand)\",\n        \"    Return details about all commands.\",\n        \"INFO command-name\",\n        \"    Return details about specified command.\",\n        \"COUNT\",\n        \"    Return the total number of commands in this server.\",\n    };\n    return rb->SendSimpleStrArr(help);\n  }\n\n  return rb->SendError(kSyntaxErr, kSyntaxErrType);\n}\n\nVarzValue::Map Service::GetVarzStats() {\n  VarzValue::Map res;\n\n  Metrics m = server_family_.GetMetrics(&namespaces->GetDefaultNamespace());\n  DbStats db_stats;\n  for (const auto& s : m.db_stats) {\n    db_stats += s;\n  }\n\n  res.emplace_back(\"keys\", VarzValue::FromInt(db_stats.key_count));\n  res.emplace_back(\"obj_mem_usage\", VarzValue::FromInt(db_stats.obj_memory_usage));\n  double load = double(db_stats.key_count) / (1 + db_stats.prime_capacity);\n  res.emplace_back(\"table_load_factor\", VarzValue::FromDouble(load));\n\n  return res;\n}\n\nGlobalState Service::SwitchState(GlobalState from, GlobalState to) {\n  util::fb2::LockGuard lk(mu_);\n  GlobalState prev = global_state_;\n  if (global_state_ != from) {\n    return prev;\n  }\n\n  VLOG(1) << \"Switching state from \" << from << \" to \" << to;\n  global_state_ = to;\n\n  pp_.Await([&](ProactorBase*) {\n    ServerState::tlocal()->set_gstate(to);\n    auto* es = EngineShard::tlocal();\n    if (es && to == GlobalState::ACTIVE) {\n      DbSlice& db = namespaces->GetDefaultNamespace().GetDbSlice(es->shard_id());\n      DCHECK(db.IsLoadRefCountZero());\n    }\n  });\n  return prev;\n}\n\nbool Service::RequestLoadingState() {\n  GlobalState prev = SwitchState(GlobalState::ACTIVE, GlobalState::LOADING);\n  if (prev == GlobalState::ACTIVE || prev == GlobalState::LOADING) {\n    util::fb2::LockGuard lk(mu_);\n    loading_state_counter_++;\n    return true;\n  }\n  return false;\n}\n\nvoid Service::RemoveLoadingState() {\n  bool switch_state = false;\n  {\n    util::fb2::LockGuard lk(mu_);\n    CHECK_GT(loading_state_counter_, 0u);\n    --loading_state_counter_;\n    switch_state = loading_state_counter_ == 0;\n  }\n  if (switch_state) {\n    SwitchState(GlobalState::LOADING, GlobalState::ACTIVE);\n  }\n}\n\nbool Service::IsLoadingExclusively() {\n  util::fb2::LockGuard lk(mu_);\n  return global_state_ == GlobalState::LOADING && loading_state_counter_ == 0;\n}\n\nvoid Service::ConfigureHttpHandlers(util::HttpListenerBase* base, bool is_privileged) {\n  // We skip authentication on privileged listener if the flag admin_nopass is set\n  // We also skip authentication if requirepass is empty\n  const bool should_skip_auth =\n      (is_privileged && !RequirePrivilegedAuth()) || GetPassword().empty();\n  if (!should_skip_auth) {\n    base->SetAuthFunctor([pass = GetPassword()](std::string_view path, std::string_view username,\n                                                std::string_view password) {\n      if (path == \"/metrics\")\n        return true;\n      const bool pass_verified = pass.empty() ? true : password == pass;\n      return username == \"default\" && pass_verified;\n    });\n  }\n  server_family_.ConfigureMetrics(base);\n\n  if (GetFlag(FLAGS_expose_http_api)) {\n    base->RegisterCb(\"/api\",\n                     [this](const http::QueryArgs& args, HttpRequest&& req, HttpContext* send) {\n                       HttpAPI(args, std::move(req), this, send);\n                     });\n  }\n}\n\nvoid Service::OnConnectionClose(facade::ConnectionContext* cntx) {\n  ConnectionContext* server_cntx = static_cast<ConnectionContext*>(cntx);\n  ConnectionState& conn_state = server_cntx->conn_state;\n  VLOG_IF(1, conn_state.replication_info.repl_session_id)\n      << \"OnConnectionClose: \" << server_cntx->conn()->GetName()\n      << \", repl_session_id: \" << conn_state.replication_info.repl_session_id;\n\n  if (conn_state.subscribe_info) {  // Clean-ups related to PUBSUB\n    if (!conn_state.subscribe_info->channels.empty()) {\n      server_cntx->UnsubscribeAll(false, nullptr);\n    }\n\n    if (conn_state.subscribe_info) {\n      DCHECK(!conn_state.subscribe_info->patterns.empty());\n      server_cntx->PUnsubscribeAll(false, nullptr);\n    }\n\n    DCHECK(!conn_state.subscribe_info);\n  }\n\n  UnwatchAllKeys(server_cntx->ns, &conn_state.exec_info);\n\n  DeactivateMonitoring(server_cntx);\n\n  server_family_.OnClose(server_cntx);\n\n  conn_state.tracking_info_.SetClientTracking(false);\n}\n\nvoid Service::RegisterTieringFlags() {\n#ifdef WITH_TIERING\n  // TODO(vlad): Introduce templatable flag cache\n  auto update_tiered_storage = [](auto) {\n    shard_set->pool()->AwaitBrief([](unsigned, auto*) {\n      if (auto* es = EngineShard::tlocal(); es && es->tiered_storage()) {\n        es->tiered_storage()->UpdateFromFlags();\n      }\n    });\n  };\n  config_registry.RegisterSetter<bool>(\"tiered_experimental_cooling\", update_tiered_storage);\n  config_registry.RegisterSetter<unsigned>(\"tiered_storage_write_depth\", update_tiered_storage);\n  config_registry.RegisterSetter<float>(\"tiered_offload_threshold\", update_tiered_storage);\n  config_registry.RegisterSetter<float>(\"tiered_upload_threshold\", update_tiered_storage);\n#endif\n}\n\nService::ContextInfo Service::GetContextInfo(facade::ConnectionContext* cntx) const {\n  ConnectionContext* server_cntx = static_cast<ConnectionContext*>(cntx);\n  return {.db_index = server_cntx->db_index(),\n          .async_dispatch = server_cntx->async_dispatch,\n          .conn_closing = server_cntx->conn_closing,\n          .subscribers = bool(server_cntx->conn_state.subscribe_info),\n          .blocked = server_cntx->blocked};\n}\n\n#define HFUNC(x) SetHandler(&Service::x)\n#define MFUNC(x) \\\n  SetHandler([this](CmdArgList sp, CommandContext* cntx) { this->x(std::move(sp), cntx); })\n\nnamespace acl {\nconstexpr uint32_t kQuit = FAST | CONNECTION;\nconstexpr uint32_t kMulti = FAST | TRANSACTION;\nconstexpr uint32_t kWatch = FAST | TRANSACTION;\nconstexpr uint32_t kUnwatch = FAST | TRANSACTION;\nconstexpr uint32_t kDiscard = FAST | TRANSACTION;\nconstexpr uint32_t kEval = SLOW | SCRIPTING;\nconstexpr uint32_t kEvalRo = SLOW | SCRIPTING;\nconstexpr uint32_t kEvalSha = SLOW | SCRIPTING;\nconstexpr uint32_t kEvalShaRo = SLOW | SCRIPTING;\nconstexpr uint32_t kExec = SLOW | TRANSACTION;\nconstexpr uint32_t kPublish = PUBSUB | FAST;\nconstexpr uint32_t kSubscribe = PUBSUB | SLOW;\nconstexpr uint32_t kUnsubscribe = PUBSUB | SLOW;\nconstexpr uint32_t kPSubscribe = PUBSUB | SLOW;\nconstexpr uint32_t kPUnsubsribe = PUBSUB | SLOW;\nconstexpr uint32_t kFunction = SLOW;\nconstexpr uint32_t kMonitor = ADMIN | SLOW | DANGEROUS;\nconstexpr uint32_t kPubSub = SLOW;\nconstexpr uint32_t kCommand = SLOW | CONNECTION;\n}  // namespace acl\n\nvoid Service::Register(CommandRegistry* registry) {\n  using CI = CommandId;\n  registry->StartFamily();\n  *registry\n      << CI{\"QUIT\", CO::FAST, 1, 0, 0, acl::kQuit}.HFUNC(Quit)\n      << CI{\"MULTI\", CO::NOSCRIPT | CO::FAST | CO::LOADING, 1, 0, 0, acl::kMulti}.HFUNC(Multi)\n      << CI{\"WATCH\", CO::LOADING, -2, 1, -1, acl::kWatch}.HFUNC(Watch)\n      << CI{\"UNWATCH\", CO::LOADING, 1, 0, 0, acl::kUnwatch}.HFUNC(Unwatch)\n      << CI{\"DISCARD\", CO::NOSCRIPT | CO::FAST | CO::LOADING, 1, 0, 0, acl::kDiscard}.MFUNC(Discard)\n      << CI{\"EVAL\", CO::NOSCRIPT | CO::VARIADIC_KEYS, -3, 3, 3, acl::kEval}\n             .MFUNC(Eval)\n             .SetValidator(&EvalValidator)\n      << CI{\"EVAL_RO\", CO::NOSCRIPT | CO::READONLY | CO::VARIADIC_KEYS, -3, 3, 3, acl::kEvalRo}\n             .MFUNC(EvalRo)\n             .SetValidator(&EvalValidator)\n      << CI{\"EVALSHA\", CO::NOSCRIPT | CO::VARIADIC_KEYS, -3, 3, 3, acl::kEvalSha}\n             .MFUNC(EvalSha)\n             .SetValidator(&EvalValidator)\n      << CI{\"EVALSHA_RO\",   CO::NOSCRIPT | CO::READONLY | CO::VARIADIC_KEYS, -3, 3, 3,\n            acl::kEvalShaRo}\n             .MFUNC(EvalShaRo)\n             .SetValidator(&EvalValidator)\n      << CI{\"EXEC\", CO::LOADING | CO::NOSCRIPT, 1, 0, 0, acl::kExec}.MFUNC(Exec)\n      << CI{\"PUBLISH\", CO::LOADING | CO::FAST, 3, 0, 0, acl::kPublish}.MFUNC(Publish)\n      << CI{\"SPUBLISH\", CO::LOADING | CO::FAST, 3, 0, 0, acl::kPublish}.MFUNC(Publish)\n      << CI{\"SUBSCRIBE\", CO::NOSCRIPT | CO::LOADING, -2, 0, 0, acl::kSubscribe}.MFUNC(Subscribe)\n      << CI{\"SSUBSCRIBE\", CO::NOSCRIPT | CO::LOADING, -2, 0, 0, acl::kSubscribe}.MFUNC(Subscribe)\n      << CI{\"UNSUBSCRIBE\", CO::NOSCRIPT | CO::LOADING, -1, 0, 0, acl::kUnsubscribe}.MFUNC(\n             Unsubscribe)\n      << CI{\"SUNSUBSCRIBE\", CO::NOSCRIPT | CO::LOADING, -1, 0, 0, acl::kUnsubscribe}.MFUNC(\n             Unsubscribe)\n      << CI{\"PSUBSCRIBE\", CO::NOSCRIPT | CO::LOADING, -2, 0, 0, acl::kPSubscribe}.MFUNC(PSubscribe)\n      << CI{\"PUNSUBSCRIBE\", CO::NOSCRIPT | CO::LOADING, -1, 0, 0, acl::kPUnsubsribe}.MFUNC(\n             PUnsubscribe)\n      << CI{\"FUNCTION\", CO::NOSCRIPT, 2, 0, 0, acl::kFunction}.MFUNC(Function)\n      << CI{\"MONITOR\", CO::ADMIN, 1, 0, 0, acl::kMonitor}.MFUNC(Monitor)\n      << CI{\"PUBSUB\", CO::LOADING | CO::FAST, -1, 0, 0, acl::kPubSub}.MFUNC(Pubsub)\n      << CI{\"COMMAND\", CO::LOADING | CO::NOSCRIPT, -1, 0, 0, acl::kCommand}.MFUNC(Command);\n}\n\nvoid Service::RegisterCommands() {\n  Register(&registry_);\n  server_family_.Register(&registry_);\n  GenericFamily::Register(&registry_);\n  RegisterListFamily(&registry_);\n  RegisterStringFamily(&registry_);\n\n#ifdef WITH_COLLECTION_CMDS\n  SetFamily::Register(&registry_);\n  HSetFamily::Register(&registry_);\n  ZSetFamily::Register(&registry_);\n  StreamFamily::Register(&registry_);\n#endif\n\n#ifdef WITH_EXTENSION_CMDS\n  RegisterGeoFamily(&registry_);\n  RegisterBitopsFamily(&registry_);\n  RegisterHllFamily(&registry_);\n  RegisterBloomFamily(&registry_);\n  RegisterCmsFamily(&registry_);\n  RegisterJsonFamily(&registry_);\n#endif\n\n#ifdef WITH_SEARCH\n  SearchFamily::Register(&registry_);\n#endif\n\n  cluster_family_.Register(&registry_);\n\n  // AclFamily should always be registered last\n  // If we add a new familly, register that first above and *not* below\n  acl_family_.Register(&registry_);\n\n  // Only after all the commands are registered\n  registry_.Init(pp_.size());\n\n  using CI = CommandId;\n  if (VLOG_IS_ON(2)) {\n    LOG(INFO) << \"Multi-key commands are: \";\n    registry_.Traverse([](std::string_view key, const CI& cid) {\n      if (cid.is_multi_key()) {\n        string key_len;\n        if (cid.last_key_pos() < 0)\n          key_len = \"unlimited\";\n        else\n          key_len = StrCat(cid.last_key_pos() - cid.first_key_pos() + 1);\n        LOG(INFO) << \"    \" << key << \": with \" << key_len << \" keys\";\n      }\n    });\n\n    LOG(INFO) << \"Non-transactional commands are: \";\n    registry_.Traverse([](std::string_view name, const CI& cid) {\n      if (cid.IsTransactional()) {\n        LOG(INFO) << \"    \" << name;\n      }\n    });\n  }\n}\n\nconst acl::AclFamily* Service::TestInit() {\n  acl_family_.Init(nullptr, &user_registry_);\n  return &acl_family_;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/main_service.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"base/varz_value.h\"\n#include \"core/interpreter.h\"\n#include \"facade/service_interface.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/acl/acl_family.h\"\n#include \"server/acl/user_registry.h\"\n#include \"server/cluster/cluster_family.h\"\n#include \"server/command_registry.h\"\n#include \"server/config_registry.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/server_family.h\"\n\nnamespace util {\nclass AcceptServer;\n}  // namespace util\n\nnamespace dfly {\n\nusing facade::MemcacheParser;\n\nclass Service : public facade::ServiceInterface {\n public:\n  explicit Service(util::ProactorPool* pp);\n  ~Service();\n\n  void Init(util::AcceptServer* acceptor, std::vector<facade::Listener*> listeners);\n\n  void Shutdown();\n\n  // Prepare command execution, verify and execute, reply to context\n  facade::DispatchResult DispatchCommand(facade::ParsedArgs args, facade::ParsedCommand* parsed_cmd,\n                                         facade::AsyncPreference apref) final;\n\n  // Execute multiple consecutive commands, possibly in parallel by squashing\n  facade::DispatchManyResult DispatchManyCommands(std::function<facade::ParsedArgs()> arg_gen,\n                                                  unsigned count, facade::SinkReplyBuilder* builder,\n                                                  facade::ConnectionContext* cntx) final;\n\n  // Check OOM and invoke command with args\n  facade::DispatchResult InvokeCmd(CmdArgList tail_args, CommandContext* cmd_cntx);\n\n  // Verify command prepares execution in correct state.\n  // It's usually called before command execution. Only for multi/exec transactions it's checked\n  // when the command is queued for execution, not before the execution itself.\n  std::optional<facade::ErrorReply> VerifyCommandState(const CommandId& cid, ArgSlice tail_args,\n                                                       const ConnectionContext& cntx);\n\n  facade::DispatchResult DispatchMC(facade::ParsedCommand* parsed_cmd,\n                                    facade::AsyncPreference apref) final;\n\n  facade::ConnectionContext* CreateContext(facade::Connection* owner) final;\n  facade::ParsedCommand* AllocateParsedCommand() final;\n\n  const CommandId* FindCmd(std::string_view) const;\n\n  CommandRegistry* mutable_registry() {\n    return &registry_;\n  }\n\n  facade::ErrorReply ReportUnknownCmd(std::string_view cmd_name) ABSL_LOCKS_EXCLUDED(mu_);\n\n  // Attempts to switch global state from 'from' to 'to'.\n  // Returns the PREVIOUS global state (before the switch attempt).\n  // If from equals the previous state then the switch is performed and 'from' is returned.\n  // Otherwise, does not switch and returns the current (unchanged) state.\n  // Upon switch, updates cached global state in threadlocal ServerState struct.\n  GlobalState SwitchState(GlobalState from, GlobalState to) ABSL_LOCKS_EXCLUDED(mu_);\n\n  bool RequestLoadingState() ABSL_LOCKS_EXCLUDED(mu_);\n  void RemoveLoadingState() ABSL_LOCKS_EXCLUDED(mu_);\n\n  // Return true if state is LOADING and loading_state_counter_ == 0, that is,\n  // if no multiple operations require LOADING_STATE at the same time.\n  bool IsLoadingExclusively() ABSL_LOCKS_EXCLUDED(mu_);\n\n  void ConfigureHttpHandlers(util::HttpListenerBase* base, bool is_privileged) final;\n  void OnConnectionClose(facade::ConnectionContext* cntx) final;\n\n  Service::ContextInfo GetContextInfo(facade::ConnectionContext* cntx) const final;\n\n  uint32_t shard_count() const {\n    return shard_set->size();\n  }\n\n  // Used by tests.\n  bool IsLocked(Namespace* ns, DbIndex db_index, std::string_view key) const;\n  bool IsShardSetLocked() const;\n\n  util::ProactorPool& proactor_pool() {\n    return pp_;\n  }\n\n  absl::flat_hash_map<std::string, unsigned> UknownCmdMap() const;\n\n  ScriptMgr* script_mgr() {\n    return server_family_.script_mgr();\n  }\n\n  const ScriptMgr* script_mgr() const {\n    return server_family_.script_mgr();\n  }\n\n  ServerFamily& server_family() {\n    return server_family_;\n  }\n\n  cluster::ClusterFamily& cluster_family() {\n    return cluster_family_;\n  }\n\n  // Utility function used in unit tests\n  // Do not use in production, only meant to be used by unit tests\n  const acl::AclFamily* TestInit();\n\n private:\n  using SinkReplyBuilder = facade::SinkReplyBuilder;\n\n  static void Quit(CmdArgList args, CommandContext* cmd_cntx);\n  static void Multi(CmdArgList args, CommandContext* cmd_cntx);\n\n  static void Watch(CmdArgList args, CommandContext* cmd_cntx);\n  static void Unwatch(CmdArgList args, CommandContext* cmd_cntx);\n\n  void Discard(CmdArgList args, CommandContext* cmd_cntx);\n  void Eval(CmdArgList args, CommandContext* cmd_cntx, bool read_only = false);\n  void EvalRo(CmdArgList args, CommandContext* cmd_cntx);\n  void EvalSha(CmdArgList args, CommandContext* cmd_cntx, bool read_only = false);\n  void EvalShaRo(CmdArgList args, CommandContext* cmd_cntx);\n  void Exec(CmdArgList args, CommandContext* cmd_cntx);\n  void Publish(CmdArgList args, CommandContext* cmd_cntx);\n  void Subscribe(CmdArgList args, CommandContext* cmd_cntx);\n  void Unsubscribe(CmdArgList args, CommandContext* cmd_cntx);\n  void PSubscribe(CmdArgList args, CommandContext* cmd_cntx);\n  void PUnsubscribe(CmdArgList args, CommandContext* cmd_cntx);\n  void Function(CmdArgList args, CommandContext* cmd_cntx);\n  void Monitor(CmdArgList args, CommandContext* cmd_cntx);\n  void Pubsub(CmdArgList args, CommandContext* cmd_cntx);\n  void Command(CmdArgList args, CommandContext* cmd_cntx);\n\n  void PubsubChannels(std::string_view pattern, SinkReplyBuilder* builder);\n  void PubsubPatterns(SinkReplyBuilder* builder);\n  void PubsubNumSub(CmdArgList channels, SinkReplyBuilder* builder);\n\n  struct EvalArgs {\n    std::string_view sha;  // only one of them is defined.\n    CmdArgList keys, args;\n  };\n\n  // Return error if not all keys are owned by the server when running in cluster mode\n  std::optional<facade::ErrorReply> CheckKeysOwnership(const CommandId& cid, CmdArgList args,\n                                                       const ConnectionContext& dfly_cntx);\n\n  // Return moved error if we *own* the slot. This function is used from flows that assume our\n  // state is TAKEN_OVER which happens after a replica takeover.\n  std::optional<facade::ErrorReply> TakenOverSlotError(const CommandId& cid, CmdArgList args,\n                                                       const ConnectionContext& dfly_cntx);\n\n  void EvalInternal(CmdArgList args, const EvalArgs& eval_args, Interpreter* interpreter,\n                    bool read_only, CommandContext* cmd_cntx);\n  void CallSHA(CmdArgList args, std::string_view sha, Interpreter* interpreter, bool read_only,\n               CommandContext* cmd_cntx);\n\n  // Return optional payload - first received error that occured when executing commands.\n  std::optional<facade::payload::Payload> FlushEvalAsyncCmds(ConnectionContext* cntx,\n                                                             bool force = false);\n\n  void CallFromScript(Interpreter::CallArgs& args, CommandContext* cmd_cntx);\n\n  OpResult<KeyIndex> FindKeys(const CommandId* cid, CmdArgList args);\n\n  void RegisterCommands();\n  void Register(CommandRegistry* registry);\n  // Helper for registering tiering flags\n  void RegisterTieringFlags();\n\n  base::VarzValue::Map GetVarzStats();\n\n  util::ProactorPool& pp_;\n\n  acl::UserRegistry user_registry_;\n  acl::AclFamily acl_family_;\n  ServerFamily server_family_;\n  cluster::ClusterFamily cluster_family_;\n  CommandRegistry registry_;\n  absl::flat_hash_map<std::string, unsigned> unknown_cmds_;\n\n  const CommandId* exec_cid_;  // command id of EXEC command for pipeline squashing\n\n  mutable util::fb2::Mutex mu_;\n  GlobalState global_state_ ABSL_GUARDED_BY(mu_) = GlobalState::ACTIVE;\n  uint32_t loading_state_counter_ ABSL_GUARDED_BY(mu_) = 0;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/memory_cmd.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/memory_cmd.h\"\n\n#include <absl/strings/ascii.h>\n#include <absl/strings/str_cat.h>\n\n#ifdef __linux__\n#include <malloc.h>\n#endif\n\n#include <mimalloc.h>\n\n#include \"base/flags.h\"\n#include \"core/allocation_tracker.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/dragonfly_listener.h\"\n#include \"facade/error.h\"\n#include \"facade/reply_builder.h\"\n#include \"io/io_buf.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/namespaces.h\"\n#include \"server/server_family.h\"\n#include \"server/server_state.h\"\n\nusing namespace std;\nusing namespace facade;\n\nABSL_DECLARE_FLAG(float, mem_defrag_page_utilization_threshold);\n\nnamespace dfly {\n\nnamespace {\n\nvoid MiStatsCallback(const char* msg, void* arg) {\n  string* str = (string*)arg;\n  absl::StrAppend(str, msg);\n}\n\n// blocksize, reserved, committed, used.\nusing BlockKey = std::tuple<size_t, size_t, size_t, size_t>;\nusing BlockMap = absl::flat_hash_map<BlockKey, uint64_t>;\n\nbool MiArenaVisit(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size,\n                  void* arg) {\n  BlockMap* bmap = (BlockMap*)arg;\n  BlockKey bkey{block_size, area->reserved, area->committed, area->used * block_size};\n  (*bmap)[bkey]++;\n\n  return true;\n}\n\nstruct BlockSummary {\n  size_t reserved = 0;\n  size_t committed = 0;\n  size_t used = 0;\n};\n\nusing BlockSummaryMap = absl::flat_hash_map<size_t, BlockSummary>;\n\nbool MiArenaVisitSummary(const mi_heap_t*, const mi_heap_area_t* area, void*, size_t block_size,\n                         void* arg) {\n  BlockSummaryMap* bsm = static_cast<BlockSummaryMap*>(arg);\n  BlockSummary& block_stats = (*bsm)[block_size];\n  block_stats.committed += area->committed;\n  block_stats.reserved += area->reserved;\n  block_stats.used += area->used * block_size;\n  return true;\n}\n\nBlockSummaryMap CollectSummary(bool backing) {\n  BlockSummaryMap summary;\n  const mi_heap_t* data_heap = backing ? mi_heap_get_backing() : ServerState::tlocal()->data_heap();\n  mi_heap_visit_blocks(data_heap, false, MiArenaVisitSummary, &summary);\n  return summary;\n}\n\nvector<BlockSummaryMap> CollectSummaries(bool backing) {\n  std::vector<BlockSummaryMap> summaries(shard_set->size());\n  shard_set->RunBriefInParallel([&summaries, backing](EngineShard* shard) {\n    summaries[shard->shard_id()] = CollectSummary(backing);\n  });\n  return summaries;\n}\n\nvoid FormatSummary(std::string* str, const BlockSummaryMap& summary) {\n  absl::StrAppend(str, absl::StrFormat(\"%10s %10s %10s %10s %10s %8s\\n\", \"BlockSize\", \"Reserved\",\n                                       \"Committed\", \"Used\", \"Wasted\", \"Waste%\"));\n  std::vector<std::pair<size_t, BlockSummary>> entries{summary.begin(), summary.end()};\n  std::ranges::sort(entries, {}, [](const auto& entry) {\n    const BlockSummary& stats = entry.second;\n    return stats.committed > stats.used ? stats.committed - stats.used : 0;\n  });\n\n  size_t total_reserved = 0;\n  size_t total_committed = 0;\n  size_t total_used = 0;\n\n  for (const auto& [size, block_summary] : entries) {\n    const size_t wasted = block_summary.committed > block_summary.used\n                              ? block_summary.committed - block_summary.used\n                              : 0;\n    const double waste_pct = 100.0 * wasted / std::max<size_t>(1UL, block_summary.committed);\n    absl::StrAppend(str, absl::StrFormat(\"%10zu %10zu %10zu %10zu %10zu %8.2f%%\\n\", size,\n                                         block_summary.reserved, block_summary.committed,\n                                         block_summary.used, wasted, waste_pct));\n    total_reserved += block_summary.reserved;\n    total_committed += block_summary.committed;\n    total_used += block_summary.used;\n  }\n\n  const size_t wasted = total_committed > total_used ? total_committed - total_used : 0;\n  absl::StrAppend(str, absl::StrFormat(\"%10s %10zu %10zu %10zu %10zu %8.2f%%\\n\", \"Total:\",\n                                       total_reserved, total_committed, total_used, wasted,\n                                       100.0 * wasted / std::max<size_t>(1UL, total_committed)));\n}\n\nstring FormatSummaries(const vector<BlockSummaryMap>& summaries) {\n  string str;\n  BlockSummaryMap machine_wide;\n  for (size_t i = 0; i < summaries.size(); ++i) {\n    absl::StrAppend(&str, \"\\nArena statistics for thread \", i, \":\\n\");\n    FormatSummary(&str, summaries[i]);\n    for (const auto& [size, block_summary] : summaries[i]) {\n      BlockSummary& machine_block = machine_wide[size];\n      machine_block.reserved += block_summary.reserved;\n      machine_block.committed += block_summary.committed;\n      machine_block.used += block_summary.used;\n    }\n  }\n\n  absl::StrAppend(&str, \"\\nArena statistics for machine:\\n\");\n  FormatSummary(&str, machine_wide);\n\n  return str;\n}\n\nstd::string MallocStatsCb(bool backing, unsigned tid) {\n  string str;\n\n  uint64_t start = absl::GetCurrentTimeNanos();\n\n  absl::StrAppend(&str, \"\\nArena statistics from thread:\", tid, \"\\n\");\n\n  mi_heap_t* data_heap = backing ? mi_heap_get_backing() : ServerState::tlocal()->data_heap();\n\n  BlockMap block_map;\n\n  mi_heap_visit_blocks(data_heap, false /* visit all blocks*/, MiArenaVisit, &block_map);\n  uint64_t reserved = 0, committed = 0, used = 0;\n  absl::StrAppend(&str, \"Count BlockSize Reserved Committed Used\\n\");\n  for (const auto& k_v : block_map) {\n    uint64_t count = k_v.second;\n    absl::StrAppend(&str, count, \" \", get<0>(k_v.first), \" \", get<1>(k_v.first), \" \",\n                    get<2>(k_v.first), \" \", get<3>(k_v.first), \"\\n\");\n    reserved += count * get<1>(k_v.first);\n    committed += count * get<2>(k_v.first);\n    used += count * get<3>(k_v.first);\n  }\n\n  absl::StrAppend(\n      &str, \"total reserved: \", reserved, \", committed: \", committed, \", used: \", used,\n      \" fragmentation waste: \",\n      100.0 * (committed > used ? committed - used : 0) / std::max<size_t>(1UL, committed), \"%\\n\");\n  const uint64_t delta = (absl::GetCurrentTimeNanos() - start) / 1000;\n  absl::StrAppend(&str, \"--- End mimalloc statistics, took \", delta, \"us ---\\n\");\n\n  return str;\n}\n\nsize_t MemoryUsage(PrimeIterator it, bool account_key_memory_usage) {\n  size_t key_size = account_key_memory_usage ? it->first.MallocUsed() : 0;\n  return key_size + it->second.MallocUsed(true);\n}\n\n}  // namespace\n\nMemoryCmd::MemoryCmd(ServerFamily* owner, CommandContext* cmd_cntx)\n    : cmd_cntx_(cmd_cntx), owner_(owner) {\n}\n\nvoid MemoryCmd::Run(CmdArgList args) {\n  CmdArgParser parser(args);\n\n  if (parser.Check(\"HELP\")) {\n    string_view help_arr[] = {\n        \"MEMORY <subcommand> [<arg> ...]. Subcommands are:\",\n        \"STATS\",\n        \"    Shows breakdown of memory.\",\n        \"MALLOC-STATS\",\n        \"    Show global malloc stats as provided by allocator libraries\",\n        \"ARENA [SUMMARY] [BACKING] [thread-id]\",\n        \"    Show mimalloc arena stats for a heap residing in specified thread-id. 0 by default.\",\n        \"    If SUMMARY is specified, show stats summarized by block size\",\n        \"        per thread summary, followed by machine wide summary\",\n        \"        thread-id is ignored for summary output.\",\n        \"    If BACKING is specified, show stats for the backing heap.\",\n        \"ARENA SHOW\",\n        \"    Prints the arena summary report for the entire process.\",\n        \"    Requires MIMALLOC_VERBOSE=1 environment to be set. The output goes to stdout\",\n        \"USAGE <key> [WITHOUTKEY]\",\n        \"    Show memory usage of a key.\",\n        \"    If WITHOUTKEY is specified, the key itself is not accounted.\",\n        \"DECOMMIT\",\n        \"    Force decommit the memory freed by the server back to OS.\",\n        \"TRACK\",\n        \"    Allow tracking of memory allocation via `new` and `delete` based on input criteria.\",\n        \"    USE WITH CAUTIOUS! This command is designed for Dragonfly developers.\",\n        \"    ADD <lower-bound> <upper-bound> <sample-odds>\",\n        \"        Sets up tracking memory allocations in the (inclusive) range [lower, upper]\",\n        \"        sample-odds indicates how many of the allocations will be logged, there 0 means \"\n        \"none, 1 means all, and everything in between is linear\",\n        \"        There could be at most 4 tracking placed in parallel\",\n        \"    REMOVE <lower-bound> <upper-bound>\",\n        \"        Removes all memory tracking added which match bounds\",\n        \"        Could remove 0, 1 or more\",\n        \"    CLEAR\",\n        \"        Removes all memory tracking\",\n        \"    GET\",\n        \"        Returns an array with all active tracking\",\n        \"    ADDRESS <address>\",\n        \"        Returns whether <address> is known to be allocated internally by any of the \"\n        \"backing heaps\",\n        \"DEFRAGMENT [threshold]\",\n        \"    Tries to free memory by moving allocations around from sparsely used memory pages.\",\n        \"    If a threshold is supplied, it is used to determine if data will be moved from the \"\n        \"page.\",\n        \"    Pages used less than the threshold percentage (default 0.8) are targeted for moving \"\n        \"out data.\",\n    };\n    auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx_->rb());\n    return rb->SendSimpleStrArr(help_arr);\n  };\n\n  if (parser.Check(\"STATS\")) {\n    return Stats();\n  }\n\n  if (parser.Check(\"USAGE\")) {\n    if (!parser.HasNext()) {\n      return cmd_cntx_->SendError(kSyntaxErr);\n    }\n    string_view key = parser.Next();\n    bool account_key_memory_usage = !parser.Check(\"WITHOUTKEY\");\n    return Usage(key, account_key_memory_usage);\n  }\n\n  if (parser.Check(\"DECOMMIT\")) {\n    shard_set->pool()->AwaitBrief(\n        [](unsigned, auto* pb) { ServerState::tlocal()->DecommitMemory(ServerState::kAllMemory); });\n    return cmd_cntx_->rb()->SendSimpleString(\"OK\");\n  }\n\n  if (parser.Check(\"MALLOC-STATS\")) {\n    return MallocStats();\n  }\n\n  if (parser.Check(\"ARENA\")) {\n    return ArenaStats(args);\n  }\n\n  if (parser.Check(\"TRACK\")) {\n    args.remove_prefix(1);\n    return Track(args);\n  }\n\n  if (parser.Check(\"DEFRAGMENT\")) {\n    static const float default_threshold =\n        absl::GetFlag(FLAGS_mem_defrag_page_utilization_threshold);\n    const float threshold = parser.NextOrDefault(default_threshold);\n\n    std::vector<CollectedPageStats> results(shard_set->size());\n    shard_set->pool()->AwaitFiberOnAll([threshold, &results](util::ProactorBase*) {\n      if (auto* shard = EngineShard::tlocal(); shard) {\n        PageUsage page_usage{CollectPageStats::YES, threshold,\n                             CycleQuota{CycleQuota::kDefaultDefragQuota}};\n        if (auto shard_res = shard->DoDefrag(&page_usage); shard_res.has_value()) {\n          results[shard->shard_id()] = std::move(shard_res.value());\n        }\n      }\n    });\n\n    const CollectedPageStats merged = CollectedPageStats::Merge(std::move(results), threshold);\n    auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx_->rb());\n    return rb->SendVerbatimString(merged.ToString());\n  }\n\n  string err = UnknownSubCmd(parser.Next(), \"MEMORY\");\n  return cmd_cntx_->SendError(err, kSyntaxErrType);\n}\n\nnamespace {\n\nstruct ConnectionMemoryUsage {\n  size_t connection_size = 0;\n  size_t replication_connection_count = 0;\n  size_t replication_connection_size = 0;\n};\n\nConnectionMemoryUsage GetConnectionMemoryUsage(ServerFamily* server) {\n  vector<ConnectionMemoryUsage> mems(shard_set->pool()->size());\n\n  for (auto* listener : server->GetListeners()) {\n    listener->TraverseConnections([&](unsigned thread_index, util::Connection* conn) {\n      if (conn == nullptr) {\n        return;\n      }\n\n      auto* dfly_conn = static_cast<facade::Connection*>(conn);\n      auto* cntx = static_cast<ConnectionContext*>(dfly_conn->cntx());\n\n      size_t usage = dfly_conn->GetMemoryUsage();\n      if (cntx == nullptr || cntx->master_repl_flow == nullptr) {\n        mems[thread_index].connection_size += usage;\n      } else {\n        mems[thread_index].replication_connection_count++;\n        mems[thread_index].replication_connection_size += usage;\n      }\n    });\n  }\n\n  ConnectionMemoryUsage mem;\n  for (const auto& m : mems) {\n    mem.connection_size += m.connection_size;\n    mem.replication_connection_count += m.replication_connection_count;\n    mem.replication_connection_size += m.replication_connection_size;\n  }\n  return mem;\n}\n\n}  // namespace\n\nvoid MemoryCmd::Stats() {\n  vector<pair<string, size_t>> stats;\n  stats.reserve(25);\n  ConnectionMemoryUsage connection_memory = GetConnectionMemoryUsage(owner_);\n\n  // Connection stats, excluding replication connections\n  stats.push_back({\"connections.direct_bytes\", connection_memory.connection_size});\n\n  // Replication connection stats\n  stats.push_back(\n      {\"replication.connections_count\", connection_memory.replication_connection_count});\n  stats.push_back({\"replication.direct_bytes\", connection_memory.replication_connection_size});\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx_->rb());\n  rb->StartCollection(stats.size(), CollectionType::MAP);\n  for (const auto& [k, v] : stats) {\n    rb->SendBulkString(k);\n    rb->SendLong(v);\n  }\n}\n\nvoid MemoryCmd::MallocStats() {\n  string report;\n\n#if __GLIBC__  // MUSL/alpine do not have mallinfo routines.\n#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 33)\n  struct mallinfo2 malloc_info = mallinfo2();\n#else\n  struct mallinfo malloc_info = mallinfo();  // buggy because 32-bit stats may overflow.\n#endif\n\n  absl::StrAppend(&report, \"___ Begin malloc stats ___\\n\");\n  absl::StrAppend(&report, \"arena: \", malloc_info.arena, \", ordblks: \", malloc_info.ordblks,\n                  \", smblks: \", malloc_info.smblks, \"\\n\");\n  absl::StrAppend(&report, \"hblks: \", malloc_info.hblks, \", hblkhd: \", malloc_info.hblkhd,\n                  \", usmblks: \", malloc_info.usmblks, \"\\n\");\n  absl::StrAppend(&report, \"fsmblks: \", malloc_info.fsmblks, \", uordblks: \", malloc_info.uordblks,\n                  \", fordblks: \", malloc_info.fordblks, \", keepcost: \", malloc_info.keepcost, \"\\n\");\n  absl::StrAppend(&report, \"___ End malloc stats ___\\n\\n\");\n#endif\n\n  absl::StrAppend(&report, \"___ Begin mimalloc stats ___\\n\");\n  mi_stats_print_out(MiStatsCallback, &report);\n  absl::StrAppend(&report, \"___ End mimalloc stats ___\\n\\n\");\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx_->rb());\n  return rb->SendVerbatimString(report);\n}\n\nvoid MemoryCmd::ArenaStats(CmdArgList args) {\n  uint32_t tid = 0;\n  bool backing = false;\n  bool show_arenas = false;\n  bool summarize = false;\n\n  if (args.size() >= 2) {\n    string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 1));\n\n    if (sub_cmd == \"SHOW\") {\n      if (args.size() != 2)\n        return cmd_cntx_->SendError(kSyntaxErr, kSyntaxErrType);\n      show_arenas = true;\n    } else {\n      unsigned tid_indx = 1;\n\n      if (sub_cmd == \"SUMMARY\") {\n        ++tid_indx;\n        summarize = true;\n\n        if (args.size() > tid_indx) {\n          sub_cmd = absl::AsciiStrToUpper(ArgS(args, tid_indx));\n        }\n      }\n\n      if (sub_cmd == \"BACKING\") {\n        ++tid_indx;\n        backing = true;\n      }\n\n      if (summarize && args.size() > tid_indx) {\n        return cmd_cntx_->SendError(kSyntaxErr, kSyntaxErrType);\n      }\n\n      if (args.size() > tid_indx && !absl::SimpleAtoi(ArgS(args, tid_indx), &tid)) {\n        return cmd_cntx_->SendError(kInvalidIntErr);\n      }\n    }\n  }\n\n  if (show_arenas) {\n    mi_debug_show_arenas();\n    return cmd_cntx_->rb()->SendOk();\n  }\n\n  if (summarize) {\n    const uint64_t start = absl::GetCurrentTimeNanos();\n    const auto summaries = CollectSummaries(backing);\n    string report = FormatSummaries(summaries);\n    const uint64_t delta = (absl::GetCurrentTimeNanos() - start) / 1000;\n    absl::StrAppend(&report, \"\\n--- End mimalloc statistics, took \", delta, \"us ---\\n\");\n    auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx_->rb());\n    return rb->SendVerbatimString(report);\n  }\n\n  if (backing && tid >= shard_set->pool()->size()) {\n    return cmd_cntx_->SendError(\n        absl::StrCat(\"Thread id must be less than \", shard_set->pool()->size()));\n  }\n\n  if (!backing && tid >= shard_set->size()) {\n    return cmd_cntx_->SendError(absl::StrCat(\"Thread id must be less than \", shard_set->size()));\n  }\n\n  const string mi_malloc_info =\n      shard_set->pool()->at(tid)->AwaitBrief([=] { return MallocStatsCb(backing, tid); });\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx_->rb());\n  return rb->SendVerbatimString(mi_malloc_info);\n}\n\nvoid MemoryCmd::Usage(std::string_view key, bool account_key_memory_usage) {\n  ShardId sid = Shard(key, shard_set->size());\n  ssize_t memory_usage = shard_set->pool()->at(sid)->AwaitBrief(\n      [key, account_key_memory_usage, this, sid]() -> ssize_t {\n        auto& db_slice = cmd_cntx_->server_conn_cntx()->ns->GetDbSlice(sid);\n        auto [pt, exp_t] = db_slice.GetTables(cmd_cntx_->server_conn_cntx()->db_index());\n        PrimeIterator it = pt->Find(key);\n        if (IsValid(it)) {\n          return MemoryUsage(it, account_key_memory_usage);\n        } else {\n          return -1;\n        }\n      });\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx_->rb());\n  if (memory_usage < 0)\n    return rb->SendNull();\n  rb->SendLong(memory_usage);\n}\n\nvoid MemoryCmd::Track(CmdArgList args) {\n#ifndef DFLY_ENABLE_MEMORY_TRACKING\n  return cmd_cntx_->SendError(\"MEMORY TRACK must be enabled at build time.\");\n#endif\n\n  CmdArgParser parser(args);\n\n  if (parser.Check(\"ADD\")) {\n    AllocationTracker::TrackingInfo tracking_info;\n    std::tie(tracking_info.lower_bound, tracking_info.upper_bound, tracking_info.sample_odds) =\n        parser.Next<size_t, size_t, double>();\n    if (parser.HasError()) {\n      return cmd_cntx_->SendError(parser.TakeError().MakeReply());\n    }\n\n    atomic_bool error{false};\n    shard_set->pool()->AwaitBrief([&](unsigned index, auto*) {\n      if (!AllocationTracker::Get().Add(tracking_info)) {\n        error.store(true);\n      }\n    });\n\n    if (error.load()) {\n      return cmd_cntx_->SendError(\"Unable to add tracker\");\n    } else {\n      return cmd_cntx_->rb()->SendOk();\n    }\n  }\n\n  if (parser.Check(\"REMOVE\")) {\n    auto [lower_bound, upper_bound] = parser.Next<size_t, size_t>();\n    if (parser.HasError()) {\n      return cmd_cntx_->SendError(parser.TakeError().MakeReply());\n    }\n\n    atomic_bool error{false};\n    shard_set->pool()->AwaitBrief([&, lo = lower_bound, hi = upper_bound](unsigned index, auto*) {\n      if (!AllocationTracker::Get().Remove(lo, hi)) {\n        error.store(true);\n      }\n    });\n\n    if (error.load()) {\n      return cmd_cntx_->SendError(\"Unable to remove tracker\");\n    } else {\n      return cmd_cntx_->rb()->SendOk();\n    }\n  }\n\n  if (parser.Check(\"CLEAR\")) {\n    shard_set->pool()->AwaitBrief([&](unsigned index, auto*) { AllocationTracker::Get().Clear(); });\n    return cmd_cntx_->rb()->SendOk();\n  }\n\n  if (parser.Check(\"GET\")) {\n    auto ranges = AllocationTracker::Get().GetRanges();\n    auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx_->rb());\n    rb->StartArray(ranges.size());\n    for (const auto& range : ranges) {\n      rb->SendSimpleString(\n          absl::StrCat(range.lower_bound, \",\", range.upper_bound, \",\", range.sample_odds));\n    }\n    return;\n  }\n\n  if (parser.Check(\"ADDRESS\")) {\n    string_view ptr_str = parser.Next();\n    if (parser.HasError()) {\n      return cmd_cntx_->SendError(parser.TakeError().MakeReply());\n    }\n\n    size_t ptr = 0;\n    if (!absl::SimpleHexAtoi(ptr_str, &ptr)) {\n      return cmd_cntx_->SendError(\"Address must be hex number\");\n    }\n\n    atomic_bool found{false};\n    shard_set->pool()->AwaitBrief([&](unsigned index, auto*) {\n      if (mi_heap_check_owned(mi_heap_get_backing(), (void*)ptr)) {\n        found.store(true);\n      }\n    });\n\n    return cmd_cntx_->rb()->SendSimpleString(found.load() ? \"FOUND\" : \"NOT-FOUND\");\n  }\n\n  return cmd_cntx_->SendError(kSyntaxErrType);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/memory_cmd.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"server/conn_context.h\"\n\nnamespace dfly {\n\nclass ServerFamily;\n\nclass MemoryCmd {\n public:\n  MemoryCmd(ServerFamily* owner, CommandContext* cmd_cntx);\n\n  void Run(CmdArgList args);\n\n private:\n  void Stats();\n  void MallocStats();\n  void ArenaStats(CmdArgList args);\n  void Usage(std::string_view key, bool account_key_memory_usage);\n  void Track(CmdArgList args);\n\n  CommandContext* cmd_cntx_;\n  ServerFamily* owner_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/multi_command_squasher.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/multi_command_squasher.h\"\n\n#include <absl/container/inlined_vector.h>\n\n#include \"base/cycle_clock.h\"\n#include \"base/flag_utils.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/overloaded.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/transaction.h\"\n#include \"server/tx_base.h\"\n\nABSL_FLAG(uint32_t, max_busy_squash_usec, 1000,\n          \"Maximum time in microseconds to execute squashed commands before yielding.\");\n\nABSL_FLAG(uint32_t, log_squash_info_threshold_usec, 1 << 31,\n          \"Threshold in microseconds above which to log squashing timings.\");\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace facade;\nusing namespace util;\nusing base::CycleClock;\n\nnamespace {\n\nthread_local uint64_t max_busy_squash_cycles_cached = 1ULL << 32;\nthread_local uint32_t log_squash_threshold_cached = 1ULL << 31;\n\nsize_t Size(const CapturingReplyBuilder::Payload& payload) {\n  size_t payload_size = sizeof(CapturingReplyBuilder::Payload);\n  return payload_size +\n         visit(Overloaded{[](const payload::SimpleString& data) { return data.size(); },\n                          [](const payload::BulkString& data) { return data.size(); },\n                          [](const payload::Error& data) {\n                            return data->first.size() + data->second.size();\n                          },\n                          [](const unique_ptr<payload::CollectionPayload>& data) {\n                            if (!data || (data->len == 0 && data->type == CollectionType::ARRAY)) {\n                              return 0ul;\n                            }\n                            size_t res = 0;\n                            for (const auto& pl : data->arr) {\n                              res += Size(pl);\n                            }\n                            return res;\n                          },\n                          // Other payload types are small\n                          [](const auto&) { return 0ul; }},\n               payload);\n}\n\n}  // namespace\n\nMultiCommandSquasher::Stats& MultiCommandSquasher::Stats::operator+=(const Stats& o) {\n  squashed_commands += o.squashed_commands;\n  hop_usec += o.hop_usec;\n  reply_usec += o.reply_usec;\n  hops += o.hops;\n  yields += o.yields;\n\n  return *this;\n}\n\nMultiCommandSquasher::MultiCommandSquasher(absl::Span<StoredCmd> cmds, ConnectionContext* cntx,\n                                           Service* service, const Opts& opts)\n    : cmds_{cmds}, cntx_{cntx}, service_{service}, base_cid_{nullptr}, opts_{opts} {\n  auto mode = cntx->transaction->GetMultiMode();\n  base_cid_ = cntx->transaction->GetCId();\n  atomic_ = mode != Transaction::NON_ATOMIC;\n}\n\nMultiCommandSquasher::ShardExecInfo& MultiCommandSquasher::PrepareShardInfo(ShardId sid) {\n  if (sharded_.empty()) {\n    sharded_.resize(shard_set->size());\n    for (size_t i = 0; i < sharded_.size(); i++) {\n      sharded_[i].reply_size_total_ptr = &tl_facade_stats->reply_stats.squashing_current_reply_size;\n    }\n  }\n\n  auto& sinfo = sharded_[sid];\n  if (!sinfo.local_tx) {\n    if (IsAtomic()) {\n      sinfo.local_tx = new Transaction{cntx_->transaction, sid, nullopt};\n    } else {\n      // Non-atomic squashing does not use the transactional framework for fan out, so local\n      // transactions have to be fully standalone, check locks and release them immediately.\n      sinfo.local_tx = new Transaction{base_cid_};\n      sinfo.local_tx->StartMultiNonAtomic();\n    }\n    num_shards_++;\n  }\n\n  return sinfo;\n}\n\nMultiCommandSquasher::SquashResult MultiCommandSquasher::TrySquash(const StoredCmd* cmd) {\n  DCHECK(cmd->Cid());\n\n  const CommandId& cid = *cmd->Cid();\n  if (!cid.IsTransactional() || (cid.opt_mask() & CO::BLOCKING) ||\n      (cid.opt_mask() & CO::GLOBAL_TRANS))\n    return SquashResult::NOT_SQUASHED;\n\n  if (cid.name() == \"CLIENT\" || cntx_->conn_state.tracking_info_.IsTrackingOn()) {\n    return SquashResult::NOT_SQUASHED;\n  }\n\n  auto args = cmd->Slice(&tmp_keylist_);\n  if (args.empty())\n    return SquashResult::NOT_SQUASHED;\n\n  // Instead of returning an error, we treat command as non-squashable, allowing the\n  // standalone execution path to handle it.\n  // Validate returns an optional ErrorReply\n  if (cid.Validate(args).has_value())\n    return SquashResult::NOT_SQUASHED;\n\n  auto keys = DetermineKeys(&cid, args);\n  if (!keys.ok() || keys->NumArgs() == 0)\n    return SquashResult::NOT_SQUASHED;\n\n  // Check if all command keys belong to one shard\n  ShardId last_sid = kInvalidSid;\n\n  for (string_view key : keys->Range(args)) {\n    ShardId sid = Shard(key, shard_set->size());\n    if (last_sid == kInvalidSid || last_sid == sid)\n      last_sid = sid;\n    else\n      return SquashResult::NOT_SQUASHED;  // at least two shards\n  }\n\n  auto& sinfo = PrepareShardInfo(last_sid);\n\n  sinfo.dispatched.push_back({.cmd = cmd, .reply = {}});\n  order_.push_back(last_sid);\n\n  bool need_flush = sinfo.dispatched.size() >= opts_.max_squash_size;\n  return need_flush ? SquashResult::SQUASHED_FULL : SquashResult::SQUASHED;\n}\n\nbool MultiCommandSquasher::ExecuteStandalone(RedisReplyBuilder* rb, const StoredCmd* cmd) {\n  DCHECK(order_.empty());  // check no squashed chain is interrupted\n\n  auto args = cmd->Slice(&tmp_keylist_);\n\n  if (opts_.verify_commands) {\n    if (auto err = service_->VerifyCommandState(*cmd->Cid(), args, *cntx_); err) {\n      rb->SendError(std::move(*err));\n      return !opts_.error_abort;\n    }\n  }\n\n  auto* tx = cntx_->transaction;\n  if (cmd->Cid()->IsTransactional()) {\n    tx->MultiSwitchCmd(cmd->Cid());\n    auto status = tx->InitByArgs(cntx_->ns, cntx_->conn_state.db_index, args);\n    if (status != OpStatus::OK) {\n      rb->SendError(status);\n      return !opts_.error_abort;\n    }\n  }\n  CommandContext cmd_cntx{rb, cntx_};\n  cmd_cntx.SetupTx(cmd->Cid(), tx);\n  service_->InvokeCmd(args, &cmd_cntx);\n  return true;\n}\n\nOpStatus MultiCommandSquasher::SquashedHopCb(EngineShard* es, RespVersion resp_v) {\n  auto& sinfo = sharded_[es->shard_id()];\n  DCHECK(!sinfo.dispatched.empty());\n\n  auto* local_tx = sinfo.local_tx.get();\n  CapturingReplyBuilder crb(ReplyMode::FULL, resp_v);\n  CmdArgVec arg_vec;\n  CommandContext cmd_cntx{&crb, cntx_};\n  cmd_cntx.SetupTx(nullptr, local_tx);\n\n  auto move_reply = [&sinfo](CapturingReplyBuilder::Payload&& src,\n                             CapturingReplyBuilder::Payload* dst) {\n    *dst = std::move(src);\n    size_t sz = Size(*dst);\n    sinfo.reply_size_delta += sz;\n    sinfo.reply_size_total_ptr->fetch_add(sz, std::memory_order_relaxed);\n  };\n\n  for (auto& dispatched : sinfo.dispatched) {\n    auto args = dispatched.cmd->Slice(&arg_vec);\n    if (opts_.verify_commands) {\n      // The shared context is used for state verification, the local one is only for replies\n      if (auto err = service_->VerifyCommandState(*dispatched.cmd->Cid(), args, *cntx_); err) {\n        crb.SendError(std::move(*err));\n        move_reply(crb.Take(), &dispatched.reply);\n        continue;\n      }\n    }\n\n    crb.SetReplyMode(dispatched.cmd->ReplyMode());\n\n    local_tx->MultiSwitchCmd(dispatched.cmd->Cid());\n    auto status = local_tx->InitByArgs(cntx_->ns, cntx_->conn_state.db_index, args);\n    if (status != OpStatus::OK) {\n      crb.SendError(status);\n    } else {\n      cmd_cntx.UpdateCid(dispatched.cmd->Cid());\n      service_->InvokeCmd(args, &cmd_cntx);\n    }\n    move_reply(crb.Take(), &dispatched.reply);\n  }\n\n  return OpStatus::OK;\n}\n\nbool MultiCommandSquasher::ExecuteSquashed(facade::RedisReplyBuilder* rb) {\n  DCHECK(!cntx_->conn_state.exec_info.IsCollecting());\n\n  if (order_.empty())\n    return true;\n\n  unsigned num_shards = 0;\n  for (auto& sd : sharded_) {\n    if (!sd.dispatched.empty())\n      ++num_shards;\n  }\n\n  Transaction* tx = cntx_->transaction;\n  ServerState::tlocal()->stats.squash_width_freq_arr[num_shards - 1]++;\n  uint64_t start = CycleClock::Now();\n  atomic_uint64_t max_sched_cycles{0}, max_exec_cycles{0};\n  base::SpinLock lock;\n  uint64_t fiber_running_cycles{0}, proactor_running_cycles{0};\n  uint32_t max_sched_thread_id{0}, max_sched_seq_num{0};\n\n  // Atomic transactions (that have all keys locked) perform hops and run squashed commands via\n  // stubs, non-atomic ones just run the commands in parallel.\n  if (IsAtomic()) {\n    auto cb = [this](ShardId sid) { return !sharded_[sid].dispatched.empty(); };\n    tx->PrepareSquashedMultiHop(base_cid_, cb);\n    tx->ScheduleSingleHop(\n        [this, rb](auto* tx, auto* es) { return SquashedHopCb(es, rb->GetRespVersion()); });\n  } else {\n    fb2::BlockingCounter bc(num_shards);\n    DVLOG(1) << \"Squashing \" << num_shards << \" \" << tx->DebugId();\n\n    // Saves work in case logging is disable (i.e. log_squash_threshold_cached is high).\n    const uint64_t min_threshold_cycles = CycleClock::FromUsec(log_squash_threshold_cached / 5);\n    auto cb = [&, bc, rb]() mutable {\n      uint64_t sched_time = CycleClock::Now() - start;\n\n      // Update max_sched_cycles in lock-free fashion, to avoid contention\n      uint64_t current = max_sched_cycles.load(memory_order_relaxed);\n      while (sched_time > min_threshold_cycles && sched_time > current) {\n        if (max_sched_cycles.compare_exchange_weak(current, sched_time, memory_order_relaxed,\n                                                   memory_order_relaxed)) {\n          lock_guard<base::SpinLock> g(lock);\n\n          // If it is still the longest scheduling time\n          if (max_sched_cycles.load(memory_order_relaxed) == sched_time) {\n            // Store the stats from the callback with longest scheduling time.\n            fiber_running_cycles = ThisFiber::GetRunningTimeCycles();\n            proactor_running_cycles = ProactorBase::me()->GetCurrentBusyCycles();\n            max_sched_thread_id = ProactorBase::me()->GetPoolIndex();\n            max_sched_seq_num = fb2::GetFiberRunSeq();\n          }\n          break;\n        }\n        // current is updated to the current value of max_sched_cycles, so the loop will retry\n        // with the new value if sched_time is still greater than it.\n      }\n\n      if (ThisFiber::GetRunningTimeCycles() > max_busy_squash_cycles_cached) {\n        ThisFiber::Yield();\n        stats_.yields++;\n      }\n      this->SquashedHopCb(EngineShard::tlocal(), rb->GetRespVersion());\n      uint64_t exec_time = CycleClock::Now() - start;\n      current = max_exec_cycles.load(memory_order_relaxed);\n      while (exec_time > current) {\n        if (max_exec_cycles.compare_exchange_weak(current, exec_time, memory_order_relaxed,\n                                                  memory_order_relaxed))\n          break;\n      }\n\n      bc->Dec();  // Release barrier: Must be the last one in the callback.\n    };\n    for (unsigned i = 0; i < sharded_.size(); ++i) {\n      if (!sharded_[i].dispatched.empty())\n        shard_set->AddL2(i, cb);\n    }\n    bc->Wait();\n  }\n\n  uint64_t after_hop = CycleClock::Now();\n  bool aborted = false;\n\n  size_t total_reply_size = 0;\n  for (auto& sinfo : sharded_) {\n    total_reply_size += sinfo.reply_size_delta;\n  }\n\n  for (auto idx : order_) {\n    auto& sinfo = sharded_[idx];\n    DCHECK_LT(sinfo.reply_id, sinfo.dispatched.size());\n\n    auto& reply = sinfo.dispatched[sinfo.reply_id++].reply;\n    aborted |= opts_.error_abort && CapturingReplyBuilder::TryExtractError(reply);\n\n    CapturingReplyBuilder::Apply(std::move(reply), rb);\n    if (aborted)\n      break;\n  }\n\n  uint64_t after_reply = CycleClock::Now();\n  uint64_t total_usec = CycleClock::ToUsec(after_reply - start);\n  stats_.hop_usec += total_usec;\n  stats_.reply_usec += CycleClock::ToUsec(after_reply - after_hop);\n  stats_.hops++;\n  stats_.squashed_commands += order_.size();\n\n  if (total_usec > log_squash_threshold_cached) {\n    uint64_t max_sched_usec = CycleClock::ToUsec(max_sched_cycles.load());\n    uint64_t fiber_running_usec = CycleClock::ToUsec(fiber_running_cycles);\n    uint64_t proactor_running_usec = CycleClock::ToUsec(proactor_running_cycles);\n    uint64_t max_exec_usec = CycleClock::ToUsec(max_exec_cycles.load());\n\n    LOG_EVERY_T(INFO, 0.1)\n        << \"Squashed \" << order_.size() << \" commands. \"\n        << \"Total/Fanout/MaxSchedTime/ThreadCbTime/ThreadId/FiberCbTime/FiberSeq/\"\n        << \"MaxExecTime: \" << total_usec << \"/\" << num_shards_ << \"/\" << max_sched_usec << \"/\"\n        << proactor_running_usec << \"/\" << max_sched_thread_id << \"/\" << fiber_running_usec << \"/\"\n        << \"/\" << max_sched_seq_num << \"/\" << max_exec_usec << \"\\ncoordinator thread running time: \"\n        << CycleClock::ToUsec(ProactorBase::me()->GetCurrentBusyCycles());\n  }\n\n  tl_facade_stats->reply_stats.squashing_current_reply_size.fetch_sub(total_reply_size,\n                                                                      std::memory_order_release);\n  for (auto& sinfo : sharded_) {\n    sinfo.dispatched.clear();\n    sinfo.reply_id = 0;\n  }\n\n  order_.clear();\n  return !aborted;\n}\n\nvoid MultiCommandSquasher::Run(RedisReplyBuilder* rb) {\n  DVLOG(1) << \"Trying to squash \" << cmds_.size() << \" commands for transaction \"\n           << cntx_->transaction->DebugId();\n\n  for (auto& cmd : cmds_) {\n    auto res = TrySquash(&cmd);\n\n    if (res == SquashResult::NOT_SQUASHED || res == SquashResult::SQUASHED_FULL) {\n      if (!ExecuteSquashed(rb))\n        break;\n\n      // if the last command was not added - we squash it separately.\n      if (res == SquashResult::NOT_SQUASHED) {\n        if (!ExecuteStandalone(rb, &cmd))\n          break;\n      }\n    }\n  }\n\n  ExecuteSquashed(rb);  // Flush leftover\n\n  // Set last txid.\n  cntx_->last_command_debug.clock = cntx_->transaction->txid();\n\n  // UnlockMulti is a no-op for non-atomic multi transactions,\n  // still called for correctness and future changes\n  if (!IsAtomic()) {\n    for (auto& sd : sharded_) {\n      if (sd.local_tx)\n        sd.local_tx->UnlockMulti();\n    }\n  }\n\n  VLOG(1) << \"Handled \" << cmds_.size() << \" commands, max fanout: \" << num_shards_\n          << \", atomic: \" << atomic_;\n}\n\nbool MultiCommandSquasher::IsAtomic() const {\n  return atomic_;\n}\n\nvoid MultiCommandSquasher::UpdateFromFlags() {\n  max_busy_squash_cycles_cached = CycleClock::FromUsec(absl::GetFlag(FLAGS_max_busy_squash_usec));\n  log_squash_threshold_cached = absl::GetFlag(FLAGS_log_squash_info_threshold_usec);\n}\n\nvector<string> MultiCommandSquasher::GetMutableFlagNames() {\n  return base::GetFlagNames(FLAGS_max_busy_squash_usec, FLAGS_log_squash_info_threshold_usec);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/multi_command_squasher.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"facade/reply_capture.h\"\n#include \"server/conn_context.h\"\n#include \"server/main_service.h\"\n\nnamespace dfly {\n\n// MultiCommandSquasher allows executing a series of commands under a multi transaction\n// and squashing multiple consecutive single-shard commands into one hop whenever it's possible,\n// thus parallelizing command execution and greatly decreasing the dispatch overhead for them.\n//\n// Single shard commands are executed in small batches over multiple shards.\n// For atomic multi transactions (global & locking ahead), the batch is executed with a regular hop\n// of the multi transaction. Each shard contains a \"stub\" transaction to mimic the regular\n// transactional api for commands. Non atomic multi transactions use regular shard_set dispatches\n// instead of hops for executing batches. This allows avoiding locking many keys at once. Each shard\n// contains a non-atomic multi transaction to execute squashed commands.\nclass MultiCommandSquasher {\n public:\n  struct Opts {\n    bool verify_commands = false;   // Whether commands need to be verified before execution\n    bool error_abort = false;       // Abort upon receiving error\n    unsigned max_squash_size = 32;  // How many commands to squash at once\n  };\n\n  struct Stats {\n    uint32_t squashed_commands = 0;  // Total number of squashed commands\n    uint32_t hop_usec = 0;           // Total time spent in hops (microseconds)\n    uint32_t reply_usec = 0;         // Total time spent in replies (microseconds)\n    uint32_t hops = 0;               // Total number of hops executed\n    uint32_t yields = 0;\n    Stats& operator+=(const Stats& o);\n  };\n\n  // Returns number of processed commands.\n  static Stats Execute(absl::Span<StoredCmd> cmds, facade::RedisReplyBuilder* rb,\n                       ConnectionContext* cntx, Service* service, const Opts& opts) {\n    MultiCommandSquasher sq{cmds, cntx, service, opts};\n    sq.Run(rb);\n    return sq.stats_;\n  }\n\n  static void UpdateFromFlags();\n  static std::vector<std::string> GetMutableFlagNames();\n\n private:\n  // Per-shard execution info.\n  struct ShardExecInfo {\n    ShardExecInfo() : local_tx{nullptr} {\n    }\n\n    struct Command {\n      const StoredCmd* cmd;\n      facade::CapturingReplyBuilder::Payload reply;\n    };\n    std::vector<Command> dispatched;  // Dispatched commands\n    unsigned reply_id = 0;\n\n    std::atomic<size_t>* reply_size_total_ptr;   // Total size of replies on the IO thread\n    size_t reply_size_delta = 0;                 // Size of replies for this shard\n    boost::intrusive_ptr<Transaction> local_tx;  // stub-mode tx for use inside shard\n  };\n\n  enum class SquashResult : uint8_t { SQUASHED, SQUASHED_FULL, NOT_SQUASHED };\n\n  MultiCommandSquasher(absl::Span<StoredCmd> cmds, ConnectionContext* cntx, Service* Service,\n                       const Opts& opts);\n\n  // Lazy initialize shard info.\n  ShardExecInfo& PrepareShardInfo(ShardId sid);\n\n  // Retrun squash flags\n  SquashResult TrySquash(const StoredCmd* cmd);\n\n  // Execute separate non-squashed cmd. Return false if aborting on error.\n  bool ExecuteStandalone(facade::RedisReplyBuilder* rb, const StoredCmd* cmd);\n\n  // Callback that runs on shards during squashed hop.\n  facade::OpStatus SquashedHopCb(EngineShard* es, facade::RespVersion resp_v);\n\n  // Execute all currently squashed commands. Return false if aborting on error.\n  bool ExecuteSquashed(facade::RedisReplyBuilder* rb);\n\n  void Run(facade::RedisReplyBuilder* rb);\n\n  bool IsAtomic() const;\n\n  absl::Span<StoredCmd> cmds_;  // Input range of stored commands\n  ConnectionContext* cntx_;     // Underlying context\n  Service* service_;\n\n  bool atomic_;                // Whether working in any of the atomic modes\n  const CommandId* base_cid_;  // underlying cid (exec or eval) for executing batch hops\n\n  Opts opts_;\n\n  std::vector<ShardExecInfo> sharded_;\n  std::vector<ShardId> order_;  // reply order for squashed cmds\n\n  size_t num_shards_ = 0;\n\n  std::vector<MutableSlice> tmp_keylist_;\n  Stats stats_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/multi_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/flags/reflection.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_replace.h>\n#include <gmock/gmock.h>\n\n#include \"base/flags.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/interpreter.h\"\n#include \"facade/facade_test.h\"\n#include \"server/conn_context.h\"\n#include \"server/main_service.h\"\n#include \"server/test_utils.h\"\n#include \"server/transaction.h\"\n\nABSL_DECLARE_FLAG(uint32_t, num_shards);\nABSL_DECLARE_FLAG(bool, multi_exec_squash);\nABSL_DECLARE_FLAG(bool, lua_auto_async);\nABSL_DECLARE_FLAG(bool, lua_allow_undeclared_auto_correct);\nABSL_DECLARE_FLAG(std::string, default_lua_flags);\nABSL_DECLARE_FLAG(std::vector<std::string>, lua_force_atomicity_shas);\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace util;\nusing absl::StrCat;\nusing ::io::Result;\nusing testing::_;\nusing testing::ElementsAre;\nusing testing::HasSubstr;\n\nnamespace {\n\nconstexpr unsigned kPoolThreadCount = 4;\n\nconst char kKey1[] = \"x\";\nconst char kKey2[] = \"b\";\nconst char kKey3[] = \"c\";\nconst char kKey4[] = \"y\";\n\nconst char kKeySid0[] = \"x\";\nconst char kKeySid1[] = \"c\";\nconst char kKeySid2[] = \"b\";\n\n}  // namespace\n\n// This test is responsible for server and main service\n// (connection, transaction etc) families.\nclass MultiTest : public BaseFamilyTest {\n protected:\n  MultiTest() : BaseFamilyTest() {\n    num_threads_ = kPoolThreadCount;\n  }\n};\n\nclass SingleShardMultiTest : public BaseFamilyTest {\n protected:\n  SingleShardMultiTest() : BaseFamilyTest() {\n    num_threads_ = 5;\n    absl::SetFlag(&FLAGS_num_shards, 1);\n  }\n\n  absl::FlagSaver saver_;\n};\n\nstruct MultiTxTest : public MultiTest {};\n\n// Check constants are valid.\nTEST_F(MultiTest, VerifyConstants) {\n  Run({\"mget\", kKeySid0, kKeySid1, kKeySid2});\n  ASSERT_EQ(3, GetDebugInfo().shards_count);\n}\n\nTEST_F(MultiTest, MultiAndFlush) {\n  RespExpr resp = Run({\"multi\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  resp = Run({\"get\", kKey1});\n  ASSERT_EQ(resp, \"QUEUED\");\n\n  EXPECT_THAT(Run({\"FLUSHALL\"}), ErrArg(\"not allowed inside a transaction\"));\n}\n\nTEST_F(MultiTest, MultiWithError) {\n  EXPECT_THAT(Run({\"exec\"}), ErrArg(\"EXEC without MULTI\"));\n  EXPECT_THAT(Run({\"multi\"}), \"OK\");\n  EXPECT_THAT(Run({\"set\", \"x\", \"y\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"set\", \"x\"}), ErrArg(\"wrong number of arguments for 'set' command\"));\n  EXPECT_THAT(Run({\"exec\"}), ErrArg(\"EXECABORT Transaction discarded because of previous errors\"));\n\n  EXPECT_THAT(Run({\"multi\"}), \"OK\");\n  EXPECT_THAT(Run({\"set\", \"z\", \"y\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"exec\"}), \"OK\");\n\n  EXPECT_THAT(Run({\"get\", \"x\"}), ArgType(RespExpr::NIL));\n  EXPECT_THAT(Run({\"get\", \"z\"}), \"y\");\n}\n\nTEST_F(MultiTest, Multi) {\n  RespExpr resp = Run({\"multi\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  resp = Run({\"get\", kKey1});\n  ASSERT_EQ(resp, \"QUEUED\");\n\n  resp = Run({\"get\", kKey4});\n  ASSERT_EQ(resp, \"QUEUED\");\n\n  resp = Run({\"exec\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(ArgType(RespExpr::NIL), ArgType(RespExpr::NIL)));\n\n  atomic_bool tx_empty = true;\n  shard_set->RunBriefInParallel([&](EngineShard* shard) {\n    if (!shard->txq()->Empty())\n      tx_empty.store(false);\n  });\n  EXPECT_TRUE(tx_empty);\n\n  resp = Run({\"get\", kKey4});\n  ASSERT_THAT(resp, ArgType(RespExpr::NIL));\n\n  ASSERT_FALSE(IsLocked(0, kKey1));\n  ASSERT_FALSE(IsLocked(0, kKey4));\n  ASSERT_FALSE(service_->IsShardSetLocked());\n}\n\nTEST_F(MultiTxTest, MultiUnlock) {\n  auto* exec_cid = service_->FindCmd(\"EXEC\");\n  boost::intrusive_ptr<Transaction> tx(new Transaction{exec_cid});\n\n  auto* ns = &namespaces->GetDefaultNamespace();\n  string_view keys[4] = {kKey1, kKey2, kKey3, kKey4};\n\n  pp_->at(0)->Await([&] { tx->StartMultiLockedAhead(ns, 0, keys); });\n\n  for (auto key : keys)\n    EXPECT_TRUE(IsLocked(0, key));\n\n  pp_->at(0)->Await([&] { tx->UnlockMulti(true); });\n\n  for (auto key : keys)\n    EXPECT_FALSE(IsLocked(0, key));\n}\n\nTEST_F(MultiTest, MultiGlobalCommands) {\n  ASSERT_THAT(Run({\"set\", \"key\", \"val\"}), \"OK\");\n\n  ASSERT_THAT(Run({\"multi\"}), \"OK\");\n  ASSERT_THAT(Run({\"move\", \"key\", \"2\"}), \"QUEUED\");\n  ASSERT_THAT(Run({\"save\"}), \"QUEUED\");\n\n  RespExpr resp = Run({\"exec\"});\n  ASSERT_THAT(resp, ArrLen(2));\n\n  ASSERT_THAT(Run({\"get\", \"key\"}), ArgType(RespExpr::NIL));\n\n  ASSERT_THAT(Run({\"select\", \"2\"}), \"OK\");\n  ASSERT_THAT(Run({\"get\", \"key\"}), \"val\");\n\n  ASSERT_FALSE(IsLocked(0, \"key\"));\n  ASSERT_FALSE(IsLocked(2, \"key\"));\n}\n\nTEST_F(MultiTest, HitMissStats) {\n  RespExpr resp = Run({\"set\", \"Key1\", \"VAL\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  resp = Run({\"get\", \"Key1\"});\n  ASSERT_EQ(resp, \"VAL\");\n\n  resp = Run({\"get\", \"Key2\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::NIL));\n\n  auto metrics = GetMetrics();\n  EXPECT_THAT(metrics.events.hits, 1);\n  EXPECT_THAT(metrics.events.misses, 1);\n}\n\nTEST_F(MultiTest, PerDbHitMissStats) {\n  Run({\"SELECT\", \"0\"});\n  ASSERT_EQ(Run({\"SET\", \"key1\", \"val1\"}), \"OK\");\n  ASSERT_EQ(Run({\"GET\", \"key1\"}), \"val1\");\n  ASSERT_THAT(Run({\"GET\", \"nonexistent1\"}), ArgType(RespExpr::NIL));\n\n  Run({\"SELECT\", \"1\"});\n  ASSERT_EQ(Run({\"SET\", \"key2\", \"val2\"}), \"OK\");\n  ASSERT_EQ(Run({\"GET\", \"key2\"}), \"val2\");\n  ASSERT_THAT(Run({\"GET\", \"nonexistent2\"}), ArgType(RespExpr::NIL));\n\n  auto metrics = GetMetrics();\n\n  EXPECT_GE(metrics.db_stats.size(), 2u);\n  EXPECT_EQ(metrics.db_stats[0].events.hits, 1u);\n  EXPECT_EQ(metrics.db_stats[0].events.misses, 1u);\n  EXPECT_EQ(metrics.db_stats[1].events.hits, 1u);\n  EXPECT_EQ(metrics.db_stats[1].events.misses, 1u);\n\n  EXPECT_EQ(metrics.events.hits, 2u);\n  EXPECT_EQ(metrics.events.misses, 2u);\n}\n\nTEST_F(MultiTest, PerDbHitMissStatsReset) {\n  Run({\"SELECT\", \"0\"});\n  Run({\"SET\", \"key1\", \"val1\"});\n  Run({\"GET\", \"key1\"});\n  Run({\"GET\", \"key2\"});\n\n  auto before = GetMetrics();\n  ASSERT_GT(before.db_stats[0].events.hits, 0u);\n  ASSERT_GT(before.db_stats[0].events.misses, 0u);\n\n  EXPECT_EQ(\"OK\", Run({\"CONFIG\", \"RESETSTAT\"}));\n\n  auto after = GetMetrics();\n  EXPECT_EQ(after.db_stats[0].events.hits, 0u);\n  EXPECT_EQ(after.db_stats[0].events.misses, 0u);\n}\n\nTEST_F(MultiTest, PerDbHitMissInfoOutput) {\n  Run({\"SELECT\", \"0\"});\n  Run({\"SET\", \"testkey\", \"testval\"});\n  Run({\"GET\", \"testkey\"});\n  Run({\"GET\", \"missing\"});\n\n  auto info_resp = Run({\"INFO\", \"keyspace\"});\n  ASSERT_TRUE(info_resp.type == RespExpr::STRING);\n  string info_str = info_resp.GetString();\n  EXPECT_THAT(info_str, HasSubstr(\"hits=1\"));\n  EXPECT_THAT(info_str, HasSubstr(\"misses=1\"));\n  EXPECT_THAT(info_str, HasSubstr(\"hit_ratio=50.00\"));\n}\n\nTEST_F(MultiTest, MultiEmpty) {\n  RespExpr resp = Run({\"multi\"});\n  ASSERT_EQ(resp, \"OK\");\n  resp = Run({\"exec\"});\n  EXPECT_THAT(resp, ArrLen(0));\n  EXPECT_FALSE(service_->IsShardSetLocked());\n\n  Run({\"multi\"});\n  ASSERT_EQ(Run({\"ping\", \"foo\"}), \"QUEUED\");\n  resp = Run({\"exec\"});\n  EXPECT_EQ(resp, \"foo\");\n\n  Run({\"multi\"});\n  Run({\"set\", \"a\", \"\"});\n  resp = Run({\"exec\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"get\", \"a\"});\n  EXPECT_EQ(resp, \"\");\n}\n\nTEST_F(MultiTest, MultiSeq) {\n  RespExpr resp = Run({\"multi\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  resp = Run({\"set\", kKey1, absl::StrCat(1)});\n  ASSERT_EQ(resp, \"QUEUED\");\n  resp = Run({\"get\", kKey1});\n  ASSERT_EQ(resp, \"QUEUED\");\n  resp = Run({\"mget\", kKey1, kKey4});\n  ASSERT_EQ(resp, \"QUEUED\");\n  resp = Run({\"exec\"});\n\n  ASSERT_FALSE(IsLocked(0, kKey1));\n  ASSERT_FALSE(IsLocked(0, kKey4));\n  ASSERT_FALSE(service_->IsShardSetLocked());\n\n  ASSERT_THAT(resp, ArrLen(3));\n  const auto& arr = resp.GetVec();\n  EXPECT_THAT(arr, ElementsAre(\"OK\", \"1\", ArrLen(2)));\n\n  ASSERT_THAT(arr[2].GetVec(), ElementsAre(\"1\", ArgType(RespExpr::NIL)));\n}\n\nTEST_F(MultiTest, MultiConsistent) {\n  Run({\"mset\", kKey1, \"base\", kKey4, \"base\"});\n\n  auto mset_fb = pp_->at(0)->LaunchFiber([&] {\n    for (size_t i = 1; i < 10; ++i) {\n      string base = StrCat(i * 900);\n      RespExpr resp = Run({\"mset\", kKey1, base, kKey4, base});\n      ASSERT_EQ(resp, \"OK\");\n    }\n  });\n\n  auto fb = pp_->at(1)->LaunchFiber([&] {\n    RespExpr resp = Run({\"multi\"});\n    ASSERT_EQ(resp, \"OK\");\n    ThisFiber::SleepFor(1ms);\n\n    resp = Run({\"get\", kKey1});\n    ASSERT_EQ(resp, \"QUEUED\");\n\n    resp = Run({\"get\", kKey4});\n    ASSERT_EQ(resp, \"QUEUED\");\n\n    resp = Run({\"mget\", kKey4, kKey1});\n    ASSERT_EQ(resp, \"QUEUED\");\n\n    resp = Run({\"exec\"});\n    ASSERT_THAT(resp, ArrLen(3));\n    const RespVec& resp_arr = resp.GetVec();\n    ASSERT_THAT(resp_arr, ElementsAre(ArgType(RespExpr::STRING), ArgType(RespExpr::STRING),\n                                      ArgType(RespExpr::ARRAY)));\n    ASSERT_EQ(resp_arr[0].GetBuf(), resp_arr[1].GetBuf());\n    const RespVec& sub_arr = resp_arr[2].GetVec();\n    EXPECT_THAT(sub_arr, ElementsAre(ArgType(RespExpr::STRING), ArgType(RespExpr::STRING)));\n    EXPECT_EQ(sub_arr[0].GetBuf(), sub_arr[1].GetBuf());\n    EXPECT_EQ(sub_arr[0].GetBuf(), resp_arr[0].GetBuf());\n  });\n\n  mset_fb.Join();\n  fb.Join();\n\n  ASSERT_FALSE(IsLocked(0, kKey1));\n  ASSERT_FALSE(IsLocked(0, kKey4));\n  ASSERT_FALSE(service_->IsShardSetLocked());\n}\n\nTEST_F(MultiTest, MultiConsistent2) {\n  const int kKeyCount = 50;\n  const int kRuns = 50;\n  const int kJobs = 20;\n\n  vector<string> all_keys(kKeyCount);\n  for (size_t i = 0; i < kKeyCount; i++)\n    all_keys[i] = absl::StrCat(\"key\", i);\n\n  auto cb = [&](string id) {\n    for (size_t r = 0; r < kRuns; r++) {\n      size_t num_keys = (rand() % 5) + 1;\n      set<string_view> keys;\n      for (size_t i = 0; i < num_keys; i++)\n        keys.insert(all_keys[rand() % kKeyCount]);\n\n      Run(id, {\"MULTI\"});\n      for (auto key : keys)\n        Run(id, {\"INCR\", key});\n      for (auto key : keys)\n        Run(id, {\"DECR\", key});\n      auto resp = Run(id, {\"EXEC\"});\n\n      ASSERT_EQ(resp.GetVec().size(), keys.size() * 2);\n      for (size_t i = 0; i < keys.size(); i++) {\n        EXPECT_EQ(resp.GetVec()[i].GetInt(), optional<int64_t>(1));\n        EXPECT_EQ(resp.GetVec()[i + keys.size()].GetInt(), optional<int64_t>(0));\n      }\n    }\n  };\n\n  vector<Fiber> fbs(kJobs);\n  for (size_t i = 0; i < kJobs; i++) {\n    fbs[i] = pp_->at(i % pp_->size())->LaunchFiber([i, cb]() { cb(absl::StrCat(\"worker\", i)); });\n  }\n\n  for (auto& fb : fbs)\n    fb.Join();\n}\n\nTEST_F(MultiTest, MultiConsistent3) {\n  GTEST_SKIP() << \"Known consistency bug\";\n\n  absl::SetFlag(&FLAGS_multi_exec_squash, false);\n  vector<Fiber> fbs;\n\n  auto run_multi = [this](string_view client) {\n    Run(client, {\"multi\"});\n    Run(client, {\"incr\", kKeySid0});\n    Run(client, {\"incr\", kKeySid1});\n    Run(client, {\"incr\", kKeySid2});\n    Run(client, {\"exec\"});\n  };\n\n  auto run_mget = [this](string_view client) {\n    auto resp = Run(client, {\"mget\", kKeySid0, kKeySid1, kKeySid2});\n    const auto& elems = resp.GetVec();\n    EXPECT_EQ(elems[0].GetString(), elems[1].GetString());\n    EXPECT_EQ(elems[1].GetString(), elems[2].GetString());\n  };\n\n  for (size_t i = 0; i < 10; i++) {\n    auto fb = pp_->at(i % pp_->size())->LaunchFiber([i, run_mget, run_multi] {\n      auto client = absl::StrCat(\"c\", i);\n      for (size_t j = 0; j < 1000; j++) {\n        if (j % 2)\n          run_mget(client);\n        else\n          run_multi(client);\n        size_t sleep = 30 + j / 10 + 5 * i;\n        ThisFiber::SleepFor(chrono::microseconds(sleep));\n      }\n    });\n    fbs.emplace_back(std::move(fb));\n  }\n\n  for (auto& fb : fbs)\n    fb.JoinIfNeeded();\n\n  auto metrics = GetMetrics();\n  EXPECT_GT(metrics.shard_stats.tx_optimistic_total, 100);\n}\n\nTEST_F(MultiTest, MultiRename) {\n  RespExpr resp = Run({\"mget\", kKey1, kKey4});\n  ASSERT_EQ(1, GetDebugInfo().shards_count);\n\n  resp = Run({\"multi\"});\n  ASSERT_EQ(resp, \"OK\");\n  Run({\"set\", kKey1, \"1\"});\n\n  resp = Run({\"rename\", kKey1, kKey4});\n  ASSERT_EQ(resp, \"QUEUED\");\n  resp = Run({\"exec\"});\n\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"OK\", \"OK\"));\n\n  // Now rename with keys spawning multiple shards.\n  Run({\"mget\", kKey4, kKey2});\n  ASSERT_EQ(2, GetDebugInfo().shards_count);\n\n  Run({\"multi\"});\n  resp = Run({\"rename\", kKey4, kKey2});\n  ASSERT_EQ(resp, \"QUEUED\");\n  resp = Run({\"exec\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  EXPECT_FALSE(IsLocked(0, kKey1));\n  EXPECT_FALSE(IsLocked(0, kKey2));\n  EXPECT_FALSE(IsLocked(0, kKey4));\n  EXPECT_FALSE(service_->IsShardSetLocked());\n}\n\n// Run multi without transactional commands\nTEST_F(MultiTest, MultiWithoutTx) {\n  Run({\"multi\"});\n  Run({\"ping\"});\n  auto resp = Run({\"exec\"});\n  EXPECT_EQ(resp, \"PONG\");\n\n  // EVAL without keys and default script flags should be non-transactional\n  Run({\"multi\"});\n  Run({\"eval\", \"return 'OK1'\", \"0\"});\n  Run({\"ping\"});\n  Run({\"eval\", \"return 'OK2'\", \"0\", \"not-a-key\"});\n  Run({\"ping\"});\n  Run({\"eval\", \"return 'OK3'\", \"0\", \"not-a-key\", \"as-well\"});\n  Run({\"ping\"});\n  resp = Run({\"exec\"});\n  EXPECT_EQ(resp.GetVec()[2], \"OK2\");\n  EXPECT_EQ(resp.GetVec()[4], \"OK3\");\n}\n\nTEST_F(MultiTest, MultiCommandsWithBonusKeys) {\n  absl::FlagSaver fs;\n  absl::SetFlag(&FLAGS_multi_exec_squash, true);\n\n  EXPECT_EQ(Shard(\"za\", shard_set->size()), Shard(\"zb\", shard_set->size()));\n  EXPECT_EQ(Shard(\"zb\", shard_set->size()), Shard(\"ze\", shard_set->size()));\n\n  // Check bonus keys are correctly processed with squashing\n  Run({\"multi\"});\n  Run({\"zadd\", \"za\", \"1\", \"a\", \"2\", \"b\"});\n  Run({\"zadd\", \"zb\", \"2\", \"b\", \"3\", \"c\"});\n  Run({\"zinterstore\", \"ze\", \"2\", \"za\", \"zb\"});\n  auto resp = Run({\"exec\"});\n  EXPECT_THAT(resp.GetVec()[2], IntArg(1));\n  EXPECT_THAT(Run({\"zcard\", \"ze\"}), IntArg(1));\n\n  // Check squashing correctly pre-validates commands\n  Run({\"multi\"});\n  Run({\"zinterstore\", \"ze\", \"2\", \"za\", \"zb\", \"z one extra\"});\n  resp = Run({\"exec\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n}\n\nTEST_F(MultiTest, MultiHop) {\n  Run({\"set\", kKey1, \"1\"});\n\n  auto p1_fb = pp_->at(1)->LaunchFiber([&] {\n    for (int i = 0; i < 100; ++i) {\n      auto resp = Run({\"rename\", kKey1, kKey2});\n      ASSERT_EQ(resp, \"OK\");\n      EXPECT_EQ(2, GetDebugInfo(\"IO1\").shards_count);\n\n      resp = Run({\"rename\", kKey2, kKey1});\n      ASSERT_EQ(resp, \"OK\");\n    }\n  });\n\n  // mset should be executed either as ooo or via tx-queue because previous transactions\n  // have been unblocked and executed as well. In other words, this mset should never block\n  // on serializability constraints.\n  auto p2_fb = pp_->at(2)->LaunchFiber([&] {\n    for (int i = 0; i < 100; ++i) {\n      Run({\"mset\", kKey3, \"1\", kKey4, \"2\"});\n    }\n  });\n\n  p1_fb.Join();\n  p2_fb.Join();\n}\n\nTEST_F(MultiTest, FlushDb) {\n  Run({\"mset\", kKey1, \"1\", kKey4, \"2\"});\n  auto resp = Run({\"flushdb\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  auto fb0 = pp_->at(0)->LaunchFiber([&] {\n    for (unsigned i = 0; i < 100; ++i) {\n      Run({\"flushdb\"});\n    }\n  });\n\n  pp_->at(1)->Await([&] {\n    for (unsigned i = 0; i < 100; ++i) {\n      Run({\"mset\", kKey1, \"1\", kKey4, \"2\"});\n      int64_t ival = CheckedInt({\"exists\", kKey1, kKey4});\n      ASSERT_TRUE(ival == 0 || ival == 2) << i << \" \" << ival;\n    }\n  });\n\n  fb0.Join();\n\n  ASSERT_FALSE(IsLocked(0, kKey1));\n  ASSERT_FALSE(IsLocked(0, kKey4));\n  ASSERT_FALSE(service_->IsShardSetLocked());\n}\n\n// Triggers a false possitive and therefore we turn it off\n// There seem not to be a good solution to handle these false positives\n// since sanitizers work well with u_context which is *very* slow\nTEST_F(MultiTest, Eval) {\n  if (auto config = absl::GetFlag(FLAGS_default_lua_flags); config != \"\") {\n    GTEST_SKIP() << \"Skipped Eval test because default_lua_flags is set\";\n    return;\n  }\n  absl::FlagSaver saver;\n  absl::SetFlag(&FLAGS_lua_allow_undeclared_auto_correct, true);\n\n  RespExpr resp;\n\n  resp = Run({\"incrby\", \"foo\", \"42\"});\n  EXPECT_THAT(resp, IntArg(42));\n\n  // first time running the script will return error and will change the script flag to allow\n  // undeclared\n  resp = Run({\"eval\", \"return redis.call('get', 'foo')\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"undeclared\"));\n\n  // running the same script the second time will succeed\n  resp = Run({\"eval\", \"return redis.call('get', 'foo')\", \"0\"});\n  EXPECT_THAT(resp, \"42\");\n\n  Run({\"script\", \"flush\"});  // Reset global flag due to lua_allow_undeclared_auto_correct effect\n\n  resp = Run({\"eval\", \"return redis.call('get', 'foo')\", \"1\", \"bar\"});\n  EXPECT_THAT(resp, ErrArg(\"undeclared\"));\n  ASSERT_FALSE(IsLocked(0, \"foo\"));\n\n  Run({\"script\", \"flush\"});  // Reset global flag from autocorrect\n\n  resp = Run({\"eval\", \"return redis.call('get', 'foo')\", \"1\", \"foo\"});\n  EXPECT_THAT(resp, \"42\");\n  ASSERT_FALSE(IsLocked(0, \"foo\"));\n\n  resp = Run({\"eval\", \"return redis.call('get', KEYS[1])\", \"1\", \"foo\"});\n  EXPECT_THAT(resp, \"42\");\n  ASSERT_FALSE(IsLocked(0, \"foo\"));\n  ASSERT_FALSE(service_->IsShardSetLocked());\n\n  resp = Run({\"eval\", \"return 77\", \"2\", \"foo\", \"zoo\"});\n  EXPECT_THAT(resp, IntArg(77));\n\n  // a,b important here to spawn multiple shards.\n  resp = Run({\"eval\", \"return redis.call('exists', KEYS[2])\", \"2\", \"a\", \"b\"});\n  // EXPECT_EQ(2, GetDebugInfo().shards_count);\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"eval\", \"return redis.call('hmset', KEYS[1], 'f1', '2222')\", \"1\", \"hmap\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"hvals\", \"hmap\"});\n  EXPECT_EQ(resp, \"2222\");\n\n  Run({\"sadd\", \"s1\", \"a\", \"b\"});\n  Run({\"sadd\", \"s2\", \"a\", \"c\"});\n  resp = Run({\"eval\", \"return redis.call('SUNION', KEYS[1], KEYS[2])\", \"2\", \"s1\", \"s2\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  const auto& arr = resp.GetVec();\n  EXPECT_THAT(arr, ElementsAre(\"a\", \"b\", \"c\"));\n\n  Run({\"zadd\", \"z1\", \"123\", \"a\", \"12345678912345\", \"b\", \"12.5\", \"c\"});\n  const char* kGetScore = \"return redis.call('ZSCORE', KEYS[1], ARGV[1]) .. '-works'\";\n\n  resp = Run({\"eval\", kGetScore, \"1\", \"z1\", \"a\"});\n  EXPECT_EQ(resp, \"123-works\");\n  resp = Run({\"eval\", kGetScore, \"1\", \"z1\", \"b\"});\n  EXPECT_EQ(resp, \"12345678912345-works\");\n  resp = Run({\"eval\", kGetScore, \"1\", \"z1\", \"c\"});\n  EXPECT_EQ(resp, \"12.5-works\");\n\n  // Multiple calls in a Lua script\n  EXPECT_EQ(Run({\"eval\",\n                 R\"(redis.call('set', 'foo', '42')\n                    return redis.call('get', 'foo'))\",\n                 \"1\", \"foo\"}),\n            \"42\");\n\n  auto condition = [&]() { return IsLocked(0, \"foo\"); };\n  auto fb = ExpectConditionWithSuspension(condition);\n  EXPECT_EQ(Run({\"eval\",\n                 R\"(redis.call('set', 'foo', '42')\n                    return redis.call('get', 'foo'))\",\n                 \"1\", \"foo\"}),\n            \"42\");\n  fb.Join();\n\n  // Call multi-shard command scan from single shard mode\n  resp = Run({\"eval\", \"return redis.call('scan', '0'); \", \"1\", \"key\"});\n  EXPECT_EQ(resp.GetVec()[0], \"0\");\n  EXPECT_EQ(resp.GetVec()[1].type, RespExpr::Type::ARRAY);\n}\n\nTEST_F(MultiTest, Watch) {\n  auto kExecFail = ArgType(RespExpr::NIL);\n  auto kExecSuccess = ArgType(RespExpr::ARRAY);\n\n  // Check watch doesn't run in multi.\n  Run({\"multi\"});\n  ASSERT_THAT(Run({\"watch\", \"a\"}), ErrArg(\"not allowed inside a transaction\"));\n  Run({\"discard\"});\n\n  // Check watch on existing key.\n  Run({\"set\", \"a\", \"1\"});\n  EXPECT_EQ(Run({\"watch\", \"a\"}), \"OK\");\n  Run({\"set\", \"a\", \"2\"});\n  Run({\"multi\"});\n  ASSERT_THAT(Run({\"exec\"}), kExecFail);\n\n  // Check watch with nonempty exec body\n  EXPECT_EQ(Run({\"watch\", \"a\"}), \"OK\");\n  Run({\"multi\"});\n  Run({\"get\", \"a\"});\n  Run({\"get\", \"b\"});\n  Run({\"get\", \"c\"});\n  ASSERT_THAT(Run({\"exec\"}), kExecSuccess);\n\n  // Check watch data cleared after EXEC.\n  Run({\"set\", \"a\", \"1\"});\n  Run({\"multi\"});\n  ASSERT_THAT(Run({\"exec\"}), kExecSuccess);\n\n  // Check watch on non-existent key.\n  Run({\"del\", \"b\"});\n  EXPECT_EQ(Run({\"watch\", \"b\"}), \"OK\");  // didn't exist yet\n  Run({\"set\", \"b\", \"1\"});\n  Run({\"multi\"});\n  ASSERT_THAT(Run({\"exec\"}), kExecFail);\n\n  // Check EXEC doesn't miss watched key expiration.\n  Run({\"watch\", \"a\"});\n  Run({\"expire\", \"a\", \"1\"});\n  AdvanceTime(1000);\n  Run({\"multi\"});\n  Run({\"get\", \"a\"});\n  ASSERT_THAT(Run({\"exec\"}), kExecFail);\n\n  // Check unwatch.\n  Run({\"watch\", \"a\"});\n  Run({\"unwatch\"});\n  Run({\"set\", \"a\", \"3\"});\n  Run({\"multi\"});\n  ASSERT_THAT(Run({\"exec\"}), kExecSuccess);\n\n  // Check double expire\n  Run({\"watch\", \"a\", \"b\"});\n  Run({\"set\", \"a\", \"2\"});\n  Run({\"set\", \"b\", \"2\"});\n  Run({\"multi\"});\n  ASSERT_THAT(Run({\"exec\"}), kExecFail);\n\n  // Check EXPIRE + new key.\n  Run({\"set\", \"a\", \"1\"});\n  Run({\"del\", \"c\"});\n  Run({\"watch\", \"c\"});  // didn't exist yet\n  Run({\"watch\", \"a\"});\n  Run({\"set\", \"c\", \"1\"});\n  Run({\"expire\", \"a\", \"1\"});  // a existed\n\n  AdvanceTime(1000);\n\n  Run({\"multi\"});\n  ASSERT_THAT(Run({\"exec\"}), kExecFail);\n\n  // Check FLUSHDB touches watched keys\n  Run({\"select\", \"1\"});\n  Run({\"set\", \"a\", \"1\"});\n  Run({\"watch\", \"a\"});\n  Run({\"flushdb\"});\n  Run({\"multi\"});\n  ASSERT_THAT(Run({\"exec\"}), kExecFail);\n\n  // Check multi db watches are not supported.\n  Run({\"select\", \"1\"});\n  Run({\"set\", \"a\", \"1\"});\n  Run({\"watch\", \"a\"});\n  Run({\"select\", \"0\"});\n  Run({\"multi\"});\n  ASSERT_THAT(Run({\"exec\"}), ArgType(RespExpr::ERROR));\n\n  // Check watch keys are isolated between databases.\n  Run({\"set\", \"a\", \"1\"});\n  Run({\"watch\", \"a\"});\n  Run({\"select\", \"1\"});\n  Run({\"set\", \"a\", \"2\"});  // changing a on db 1\n  Run({\"select\", \"0\"});\n  Run({\"multi\"});\n  ASSERT_THAT(Run({\"exec\"}), kExecSuccess);\n}\n\nTEST_F(MultiTest, MultiOOO) {\n  GTEST_SKIP() << \"Command squashing breaks stats\";\n\n  auto fb0 = pp_->at(0)->LaunchFiber([&] {\n    for (unsigned i = 0; i < 100; i++) {\n      Run({\"multi\"});\n      Run({\"rpush\", \"a\", \"bar\"});\n      Run({\"exec\"});\n    }\n  });\n\n  pp_->at(1)->Await([&] {\n    for (unsigned i = 0; i < 100; ++i) {\n      Run({\"multi\"});\n      Run({\"rpush\", \"b\", \"bar\"});\n      Run({\"exec\"});\n    }\n  });\n\n  fb0.Join();\n  auto metrics = GetMetrics();\n\n  // OOO works in LOCK_AHEAD mode.\n  EXPECT_EQ(200, metrics.shard_stats.tx_ooo_total);\n}\n\n// Lua scripts lock their keys ahead and thus can run out of order.\nTEST_F(MultiTest, EvalOOO) {\n  if (auto config = absl::GetFlag(FLAGS_default_lua_flags); config != \"\") {\n    GTEST_SKIP() << \"Skipped EvalOOO test because default_lua_flags is set\";\n    return;\n  }\n\n  // Assign to prevent asyc optimization.\n  const char* kScript = \"local r = redis.call('MGET', unpack(KEYS)); return 'OK'\";\n\n  // Check single call.\n  {\n    auto resp = Run({\"eval\", kScript, \"3\", kKey1, kKey2, kKey3});\n    ASSERT_EQ(resp, \"OK\");\n  }\n\n  const int kTimes = 10;\n  // Check scripts running on different shards don't block each other.\n  {\n    auto run = [this, kScript](auto key) {\n      for (int i = 0; i < kTimes; i++)\n        Run({\"eval\", kScript, \"1\", key});\n    };\n\n    auto f1 = pp_->at(0)->LaunchFiber([&]() { run(kKeySid0); });\n    auto f2 = pp_->at(1)->LaunchFiber([&]() { run(kKeySid1); });\n\n    f1.Join();\n    f2.Join();\n  }\n\n  auto metrics = GetMetrics();\n  auto sum = metrics.coordinator_stats.eval_io_coordination_cnt +\n             metrics.coordinator_stats.eval_shardlocal_coordination_cnt;\n  EXPECT_EQ(1 + 2 * kTimes, sum);\n}\n\n// Run MULTI/EXEC commands in parallel, where each command is:\n//        MULTI - SET k1 v - SET k2 v - SET k3 v - EXEC\n// but the order of the commands inside appears in any permutation.\nTEST_F(MultiTest, MultiContendedPermutatedKeys) {\n  constexpr int kRounds = 5;\n\n  auto run = [this](vector<string> keys, bool reversed) {\n    int i = 0;\n    do {\n      Run({\"multi\"});\n      auto apply = [this](auto key) { Run({\"set\", key, \"v\"}); };\n\n      if (reversed)\n        for_each(keys.rbegin(), keys.rend(), apply);\n      else\n        for_each(keys.begin(), keys.end(), apply);\n\n      Run({\"exec\"});\n    } while (next_permutation(keys.begin(), keys.end()) || i++ < kRounds);\n  };\n\n  vector<string> keys = {kKeySid0, kKeySid1, kKey3};\n\n  auto f1 = pp_->at(1)->LaunchFiber([run, keys]() { run(keys, false); });\n  auto f2 = pp_->at(2)->LaunchFiber([run, keys]() { run(keys, true); });\n\n  f1.Join();\n  f2.Join();\n}\n\nTEST_F(MultiTest, MultiCauseUnblocking) {\n  const int kRounds = 10;\n  vector<string> keys = {kKeySid0, kKeySid1, kKeySid2};\n\n  auto push = [this, keys]() mutable {\n    int i = 0;\n    do {\n      Run({\"multi\"});\n      for (auto k : keys)\n        Run({\"lpush\", k, \"v\"});\n      Run({\"exec\"});\n    } while (next_permutation(keys.begin(), keys.end()) || i++ < kRounds);\n  };\n\n  auto pop = [this, keys]() mutable {\n    int i = 0;\n    do {\n      for (int j = keys.size() - 1; j >= 0; j--)\n        ASSERT_THAT(Run({\"blpop\", keys[j], \"0\"}), ArrLen(2));\n    } while (next_permutation(keys.begin(), keys.end()) || i++ < kRounds);\n  };\n\n  auto f1 = pp_->at(1)->LaunchFiber([push]() mutable { push(); });\n  auto f2 = pp_->at(2)->LaunchFiber([pop]() mutable { pop(); });\n\n  f1.Join();\n  f2.Join();\n}\n\nTEST_F(MultiTest, ExecGlobalFallback) {\n  Run({\"multi\"});\n  Run({\"set\", \"a\", \"1\"});  // won't run ooo, because it became part of global\n  Run({\"move\", \"a\", \"1\"});\n  Run({\"exec\"});\n  EXPECT_EQ(1, GetMetrics().coordinator_stats.tx_global_cnt);\n}\n\nTEST_F(MultiTest, ScriptFlagsCommand) {\n  if (auto flags = absl::GetFlag(FLAGS_default_lua_flags); flags != \"\") {\n    GTEST_SKIP() << \"Skipped ScriptFlagsCommand test because default_lua_flags is set\";\n    return;\n  }\n\n  const char* kUndeclared1 = \"return redis.call('GET', 'random-key-1');\";\n  const char* kUndeclared2 = \"return redis.call('GET', 'random-key-2');\";\n\n  Run({\"set\", \"random-key-1\", \"works\"});\n  Run({\"set\", \"random-key-2\", \"works\"});\n\n  // Check SCRIPT FLAGS is applied correctly to loaded scripts.\n  {\n    auto sha_resp = Run({\"script\", \"load\", kUndeclared1});\n    auto sha = facade::ToSV(sha_resp.GetBuf());\n\n    EXPECT_THAT(Run({\"evalsha\", sha, \"0\"}), ErrArg(\"undeclared\"));\n\n    EXPECT_EQ(Run({\"script\", \"flags\", sha, \"allow-undeclared-keys\"}), \"OK\");\n\n    EXPECT_THAT(Run({\"evalsha\", sha, \"0\"}), \"works\");\n  }\n\n  // Check SCRIPT FLAGS can be applied by sha before loading.\n  {\n    char sha_buf[41];\n    Interpreter::FuncSha1(kUndeclared2, sha_buf);\n    string_view sha{sha_buf, 40};\n\n    EXPECT_THAT(Run({\"script\", \"flags\", sha, \"allow-undeclared-keys\"}), \"OK\");\n\n    EXPECT_THAT(Run({\"eval\", kUndeclared2, \"0\"}), \"works\");\n  }\n}\n\nTEST_F(MultiTest, ScriptFlagsInvalidSha) {\n  EXPECT_THAT(Run({\"script\", \"flags\", \"short\", \"allow-undeclared-keys\"}), ErrArg(\"\"));\n}\n\nTEST_F(MultiTest, ScriptFlagsEmbedded) {\n  const char* s1 = R\"(\n  --!df flags=allow-undeclared-keys\n  return redis.call('GET', 'random-key');\n)\";\n\n  // Check eval finds script flags.\n  Run({\"set\", \"random-key\", \"works\"});\n  EXPECT_EQ(Run({\"eval\", s1, \"0\"}), \"works\");\n\n  const char* s2 = R\"(\n  --!df flags=this-is-an-error\n  redis.call('SET', 'random-key', 'failed')\n  )\";\n\n  EXPECT_THAT(Run({\"eval\", s2, \"0\"}), ErrArg(\"Invalid flag: this-is-an-error\"));\n}\n\nTEST_F(MultiTest, UndeclaredKeyFlag) {\n  absl::FlagSaver fs;  // lua_undeclared_keys_shas changed via CONFIG cmd below\n\n  const char* script = \"return redis.call('GET', 'random-key');\";\n  Run({\"set\", \"random-key\", \"works\"});\n\n  // Get SHA for script in a persistent way\n  string sha = Run({\"script\", \"load\", script}).GetString();\n\n  // Make sure we can't run the script before setting the flag\n  EXPECT_THAT(Run({\"evalsha\", sha, \"0\"}), ErrArg(\"undeclared\"));\n  EXPECT_THAT(Run({\"eval\", script, \"0\"}), ErrArg(\"undeclared\"));\n\n  // Clear all Lua scripts so we can configure the cache\n  EXPECT_THAT(Run({\"script\", \"flush\"}), \"OK\");\n  EXPECT_THAT(Run({\"script\", \"exists\", sha}), IntArg(0));\n\n  EXPECT_THAT(\n      Run({\"config\", \"set\", \"lua_undeclared_keys_shas\", absl::StrCat(sha, \",NON-EXISTING-HASH\")}),\n      \"OK\");\n\n  // Check eval finds script flags.\n  EXPECT_EQ(Run({\"eval\", script, \"0\"}), \"works\");\n  EXPECT_EQ(Run({\"evalsha\", sha, \"0\"}), \"works\");\n}\n\nTEST_F(MultiTest, LegacyFloatFlag) {\n  const char* script_with_flag = R\"(\n  --!df flags=legacy-float\n  return 42.9\n)\";\n  EXPECT_THAT(Run({\"eval\", script_with_flag, \"0\"}), IntArg(42));\n\n  const char* script_negative = R\"(\n  --!df flags=legacy-float\n  return -3.8\n)\";\n  EXPECT_THAT(Run({\"eval\", script_negative, \"0\"}), IntArg(-3));\n\n  EXPECT_THAT(Run({\"eval\", \"return 42.9\", \"0\"}), DoubleArg(42.9));\n\n  const char* script = \"return 42.9\";\n  char sha_buf[41];\n  Interpreter::FuncSha1(script, sha_buf);\n  string_view sha{sha_buf, 40};\n\n  EXPECT_EQ(Run({\"script\", \"flags\", string(sha), \"legacy-float\"}), \"OK\");\n\n  EXPECT_THAT(Run({\"eval\", script, \"0\"}), IntArg(42));\n}\n\nTEST_F(MultiTest, LegacyFloatShaFlag) {\n  absl::FlagSaver fs;\n\n  const char* script = \"return 42.9\";\n  string sha = Run({\"script\", \"load\", script}).GetString();\n\n  EXPECT_THAT(Run({\"evalsha\", sha, \"0\"}), DoubleArg(42.9));\n\n  Run({\"script\", \"flush\"});\n  EXPECT_THAT(Run({\"config\", \"set\", \"lua_float_as_int_shas\", sha}), \"OK\");\n\n  EXPECT_THAT(Run({\"eval\", script, \"0\"}), IntArg(42));\n}\n\nTEST_F(MultiTest, CjsonDecodeIntegerBehavior) {\n  // cjson.decode always returns integers for whole numbers (Redis/Lua 5.1 compatible)\n  const char* script_cjson = R\"(\n    local obj = cjson.decode('{\"value\": 42}')\n    return tostring(obj.value)\n  )\";\n  EXPECT_EQ(Run({\"eval\", script_cjson, \"0\"}), \"42\");\n\n  // Floats with fractional parts remain as floats\n  const char* script_cjson_float = R\"(\n    local obj = cjson.decode('{\"value\": 42.5}')\n    return tostring(obj.value)\n  )\";\n  EXPECT_EQ(Run({\"eval\", script_cjson_float, \"0\"}), \"42.5\");\n}\n\nTEST_F(MultiTest, ScriptBadCommand) {\n  const char* s1 = \"redis.call('FLUSHALL')\";\n  const char* s2 = \"redis.call('FLUSHALL'); redis.set(KEYS[1], ARGS[1]);\";\n  const char* s3 = \"redis.acall('FLUSHALL'); redis.set(KEYS[1], ARGS[1]);\";\n  const char* s4 = R\"(\n    --!df flags=disable-atomicity\n    redis.call('FLUSHALL');\n    return \"OK\";\n  )\";\n\n  auto resp = Run({\"eval\", s1, \"0\"});  // tx won't be scheduled at all\n  EXPECT_THAT(resp, ErrArg(\"This Redis command is not allowed from script\"));\n\n  resp = Run({\"eval\", s2, \"1\", \"works\", \"false\"});  // will be scheduled as lock ahead\n  EXPECT_THAT(resp, ErrArg(\"This Redis command is not allowed from script\"));\n\n  resp = Run({\"eval\", s3, \"1\", \"works\", \"false\"});  // also async call will happen\n  EXPECT_THAT(resp, ErrArg(\"This Redis command is not allowed from script\"));\n\n  resp = Run({\"eval\", s4, \"0\"});\n  EXPECT_EQ(resp, \"OK\");\n}\n\nTEST_F(MultiTest, MultiSquash) {\n  string_view script = R\"(\nredis.call('APPEND', KEYS[1], ARGV[1]);\nredis.call('GET', KEYS[1]);\nredis.call('APPEND', KEYS[1], ARGV[2])\nreturn 'OK';\n)\";\n\n  auto resp = Run({\"EVAL\", script, \"1\", \"A\", \"works\", \"reliably\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"EVAL\", script, \"1\", \"A\", \"once\", \"again\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  auto metrics = GetMetrics();\n  EXPECT_EQ(metrics.coordinator_stats.eval_shardlocal_coordination_cnt, 2u);\n  // EXPECT_EQ(metrics.shard_stats.tx_ooo_total, 2u);\n\n  auto a_expect = absl::StrCat(\"works\", \"reliably\", \"once\", \"again\");\n  EXPECT_EQ(Run({\"GET\", \"A\"}), a_expect);\n}\n\n// Check that single shard script running with allow-undeclared-keys (i.e. global)\n// running on a single shard setup can be squashed with \"shardlocal\" execution\nTEST_F(SingleShardMultiTest, MultiSquashGlobalSingleShard) {\n  string_view script = R\"(\n--!df flags=allow-undeclared-keys\nredis.call('SET', 'first', 'works');\nredis.call('SET', 'second', 'too');\nredis.call('SET', 'third', 'as well');\nreturn 'OK';\n)\";\n\n  auto resp = Run({\"EVAL\", script, \"0\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Check call was shardlocal and out of order\n  auto metrics = GetMetrics();\n  EXPECT_EQ(metrics.coordinator_stats.eval_shardlocal_coordination_cnt, 1u);\n\n  EXPECT_EQ(Run({\"GET\", \"first\"}), \"works\");\n  EXPECT_EQ(Run({\"GET\", \"second\"}), \"too\");\n  EXPECT_EQ(Run({\"GET\", \"third\"}), \"as well\");\n}\n\nTEST_F(MultiTest, MultiEvalModeConflict) {\n  const char* s1 = R\"(\n  --!df flags=allow-undeclared-keys\n  return redis.call('GET', 'random-key');\n)\";\n\n  EXPECT_EQ(Run({\"multi\"}), \"OK\");\n  // Check eval finds script flags.\n  EXPECT_EQ(Run({\"set\", \"random-key\", \"works\"}), \"QUEUED\");\n  EXPECT_EQ(Run({\"eval\", s1, \"0\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"exec\"}),\n              RespArray(ElementsAre(\n                  \"OK\", ErrArg(\"Multi mode conflict when running eval in multi transaction\"))));\n}\n\n// Run multi-exec transactions that move values from a source list\n// to destination list through two contended channels.\nTEST_F(MultiTest, ContendedList) {\n  constexpr int listSize = 50;\n  constexpr int stepSize = 5;\n\n  auto run = [this](string_view src, string_view dest) {\n    for (int i = 0; i < listSize / stepSize; i++) {\n      Run({\"multi\"});\n      Run({\"sort\", src});\n      for (int j = 0; j < stepSize; j++)\n        Run({\"lmove\", src, j % 2 ? \"chan-1\" : \"chan-2\", \"RIGHT\", \"RIGHT\"});\n      for (int j = 0; j < stepSize; j++)\n        Run({\"lmove\", j % 2 ? \"chan-1\" : \"chan-2\", dest, \"LEFT\", \"RIGHT\"});\n      Run({\"exec\"});\n    }\n  };\n\n  for (int i = 0; i < listSize; i++) {\n    Run({\"lpush\", \"l1\", \"a\"});\n    Run({\"lpush\", \"l2\", \"b\"});\n  }\n\n  auto f1 = pp_->at(1)->LaunchFiber([run]() mutable { run(\"l1\", \"l1-out\"); });\n  auto f2 = pp_->at(2)->LaunchFiber([run]() mutable { run(\"l2\", \"l2-out\"); });\n\n  f1.Join();\n  f2.Join();\n\n  for (int i = 0; i < listSize; i++) {\n    EXPECT_EQ(Run({\"lpop\", \"l1-out\"}), \"a\");\n    EXPECT_EQ(Run({\"lpop\", \"l2-out\"}), \"b\");\n  }\n\n  EXPECT_THAT(Run({\"llen\", \"chan-1\"}), IntArg(0));\n  EXPECT_THAT(Run({\"llen\", \"chan-2\"}), IntArg(0));\n}\n\n// Test that squashing makes single-key ops atomic withing a non-atomic tx\n// because it runs them within one hop.\nTEST_F(MultiTest, TestSquashing) {\n  absl::FlagSaver fs;\n  absl::SetFlag(&FLAGS_multi_exec_squash, true);\n\n  const char* keys[] = {kKeySid0, kKeySid1, kKeySid2};\n\n  atomic_bool done{false};\n  auto f1 = pp_->at(1)->LaunchFiber([this, keys, &done]() {\n    while (!done.load()) {\n      for (auto key : keys)\n        ASSERT_THAT(Run({\"llen\", key}), IntArg(0));\n    }\n  });\n\n  for (unsigned times = 0; times < 10; times++) {\n    Run({\"multi\"});\n    for (auto key : keys)\n      Run({\"lpush\", key, \"works\"});\n    for (auto key : keys)\n      Run({\"lpop\", key});\n    Run({\"exec\"});\n  }\n\n  done.store(true);\n  f1.Join();\n\n  // Test some more unusual commands\n  Run({\"multi\"});\n  Run({\"mget\", \"x1\", \"x2\", \"x3\"});\n  Run({\"mget\", \"x4\"});\n  Run({\"mget\", \"x5\", \"x6\", \"x7\", \"x8\"});\n  Run({\"ft.search\", \"i1\", \"*\"});\n  Run({\"exec\"});\n}\n\nTEST_F(MultiTest, MultiLeavesTxQueue) {\n  // Tests the scenario, where the OOO multi-tx is scheduled into tx queue and there is another\n  // tx (mget) after it that runs and tests for atomicity.\n  absl::FlagSaver fs;\n  absl::SetFlag(&FLAGS_multi_exec_squash, false);\n\n  for (unsigned i = 0; i < 20; ++i) {\n    string key = StrCat(\"x\", i);\n    LOG(INFO) << key << \": shard \" << Shard(key, shard_set->size());\n  }\n\n  Run({\"mget\", \"x5\", \"x8\", \"x9\", \"x13\", \"x16\", \"x17\"});\n  ASSERT_EQ(1, GetDebugInfo().shards_count);\n\n  auto fb1 = pp_->at(1)->LaunchFiber(Launch::post, [&] {\n    // Runs multi on shard0 1000 times.\n    for (unsigned j = 0; j < 1000; ++j) {\n      Run({\"multi\"});\n      Run({\"incrby\", \"x13\", \"1\"});\n      Run({\"incrby\", \"x16\", \"1\"});\n      Run({\"incrby\", \"x17\", \"1\"});\n      Run({\"exec\"});\n    }\n  });\n\n  auto fb2 = pp_->at(2)->LaunchFiber(Launch::dispatch, [&] {\n    // Runs multi on shard0 1000 times.\n    for (unsigned j = 0; j < 1000; ++j) {\n      Run({\"multi\"});\n      Run({\"incrby\", \"x5\", \"1\"});\n      Run({\"incrby\", \"x8\", \"1\"});\n      Run({\"incrby\", \"x9\", \"1\"});\n      Run({\"exec\"});\n    }\n  });\n\n  auto check_triple = [](const RespExpr::Vec& arr, unsigned start) {\n    if (arr[start].type != arr[start + 1].type || arr[start + 1].type != arr[start + 2].type) {\n      return false;\n    }\n\n    if (arr[start].type == RespExpr::STRING) {\n      string s0 = arr[start].GetString();\n      string s1 = arr[start + 1].GetString();\n      string s2 = arr[start + 2].GetString();\n      if (s0 != s1 || s1 != s2) {\n        return false;\n      }\n    }\n    return true;\n  };\n\n  bool success = pp_->at(0)->Await([&]() -> bool {\n    for (unsigned j = 0; j < 1000; ++j) {\n      auto resp = Run({\"mget\", \"x5\", \"x8\", \"x9\", \"x13\", \"x16\", \"x17\"});\n      const RespExpr::Vec& arr = resp.GetVec();\n      CHECK_EQ(6u, arr.size());\n\n      if (!check_triple(arr, 0)) {\n        LOG(ERROR) << \"inconsistent \" << arr[0] << \" \" << arr[1] << \" \" << arr[2];\n        return false;\n      }\n      if (!check_triple(arr, 3)) {\n        LOG(ERROR) << \"inconsistent \" << arr[3] << \" \" << arr[4] << \" \" << arr[5];\n        return false;\n      }\n    }\n    return true;\n  });\n\n  fb1.Join();\n  fb2.Join();\n  ASSERT_TRUE(success);\n}\n\nTEST_F(MultiTest, TestLockedKeys) {\n  auto condition = [&]() { return IsLocked(0, \"key1\") && IsLocked(0, \"key2\"); };\n  auto fb = ExpectConditionWithSuspension(condition);\n\n  EXPECT_EQ(Run({\"multi\"}), \"OK\");\n  EXPECT_EQ(Run({\"set\", \"key1\", \"val1\"}), \"QUEUED\");\n  EXPECT_EQ(Run({\"set\", \"key2\", \"val2\"}), \"QUEUED\");\n  EXPECT_EQ(Run({\"mset\", \"key1\", \"val3\", \"key1\", \"val4\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"exec\"}), RespArray(ElementsAre(\"OK\", \"OK\", \"OK\")));\n  fb.Join();\n  EXPECT_FALSE(IsLocked(0, \"key1\"));\n  EXPECT_FALSE(IsLocked(0, \"key2\"));\n}\n\nTEST_F(MultiTest, EvalExpiration) {\n  // Make sure expiration is correctly set even from Lua scripts\n  if (auto config = absl::GetFlag(FLAGS_default_lua_flags); config != \"\") {\n    GTEST_SKIP() << \"Skipped Eval test because default_lua_flags is set\";\n    return;\n  }\n\n  Run({\"eval\", \"redis.call('set', 'x', 0, 'ex', 5, 'nx')\", \"1\", \"x\"});\n  EXPECT_LE(CheckedInt({\"pttl\", \"x\"}), 5000);\n}\n\nTEST_F(MultiTest, MemoryInScript) {\n  EXPECT_EQ(Run({\"set\", \"x\", \"y\"}), \"OK\");\n\n  auto resp = Run({\"eval\", \"return redis.call('MEMORY', 'USAGE', KEYS[1])\", \"1\", \"x\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(MultiTest, NoKeyTransactional) {\n  Run({\"multi\"});\n  Run({\"ft._list\"});\n  Run({\"exec\"});\n}\n\nTEST_F(MultiTest, NoKeyTransactionalMany) {\n  vector<vector<string>> cmds;\n  cmds.push_back({\"rename\", \"x\", \"z\"});\n  cmds.push_back({\"ft._list\"});\n  RunMany(cmds);\n}\n\nclass MultiEvalTest : public BaseFamilyTest {\n protected:\n  MultiEvalTest() : BaseFamilyTest() {\n    num_threads_ = kPoolThreadCount;\n    absl::SetFlag(&FLAGS_default_lua_flags, \"allow-undeclared-keys\");\n  }\n\n  absl::FlagSaver fs_;\n};\n\nTEST_F(MultiEvalTest, MultiAllEval) {\n  RespExpr brpop_resp;\n\n  // Run the fiber at creation.\n  auto fb0 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n    brpop_resp = Run({\"brpop\", \"x\", \"1\"});\n  });\n  Run({\"multi\"});\n  Run({\"eval\", \"return redis.call('lpush', 'x', 'y')\", \"0\"});\n  Run({\"eval\", \"return redis.call('lpop', 'x')\", \"0\"});\n  RespExpr exec_resp = Run({\"exec\"});\n  fb0.Join();\n\n  EXPECT_THAT(exec_resp.GetVec(), ElementsAre(IntArg(1), \"y\"));\n\n  EXPECT_THAT(brpop_resp, ArgType(RespExpr::NIL_ARRAY));\n}\n\nTEST_F(MultiEvalTest, MultiSomeEval) {\n  RespExpr brpop_resp;\n\n  // Run the fiber at creation.\n  auto fb0 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n    brpop_resp = Run({\"brpop\", \"x\", \"1\"});\n  });\n  Run({\"multi\"});\n  Run({\"eval\", \"return redis.call('lpush', 'x', 'y')\", \"0\"});\n  Run({\"lpop\", \"x\"});\n  RespExpr exec_resp = Run({\"exec\"});\n  fb0.Join();\n\n  EXPECT_THAT(exec_resp.GetVec(), ElementsAre(IntArg(1), \"y\"));\n\n  EXPECT_THAT(brpop_resp, ArgType(RespExpr::NIL_ARRAY));\n}\n\nTEST_F(MultiEvalTest, ScriptSquashingUknownCmd) {\n  absl::FlagSaver fs;\n  absl::SetFlag(&FLAGS_lua_auto_async, true);\n\n  // The script below contains two commands for which execution can't even be prepared\n  // (FIRST/SECOND WRONG). The first is issued with pcall, so its error should be completely\n  // ignored, the second one should cause an abort and no further commands should be executed\n  string_view s = R\"(\n    redis.pcall('INCR', 'A')\n    redis.pcall('FIRST WRONG')\n    redis.pcall('INCR', 'A')\n    redis.call('SECOND WRONG')\n    redis.pcall('INCR', 'A')\n  )\";\n\n  EXPECT_THAT(Run({\"EVAL\", s, \"1\", \"A\"}), ErrArg(\"unknown command `SECOND WRONG`\"));\n  EXPECT_EQ(Run({\"get\", \"A\"}), \"2\");\n}\n\nTEST_F(MultiEvalTest, MultiAndEval) {\n  // We had a bug in borrowing interpreters which caused a crash in this scenario\n  Run({\"multi\"});\n  Run({\"eval\", \"return redis.call('set', 'x', 'y1')\", \"1\", \"x\"});\n  Run({\"exec\"});\n\n  Run({\"eval\", \"return redis.call('set', 'x', 'y1')\", \"1\", \"x\"});\n\n  Run({\"multi\"});\n  Run({\"eval\", \"return 'OK';\", \"0\"});\n  auto resp = Run({\"exec\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // We had a bug running script load inside multi\n  Run({\"multi\"});\n  Run({\"script\", \"load\", \"return '5'\"});\n  Run({\"exec\"});\n\n  Run({\"multi\"});\n  Run({\"script\", \"load\", \"return '5'\"});\n  Run({\"get\", \"x\"});\n  Run({\"exec\"});\n\n  Run({\"multi\"});\n  Run({\"script\", \"load\", \"return '5'\"});\n  Run({\"mset\", \"x1\", \"y1\", \"x2\", \"y2\"});\n  Run({\"exec\"});\n\n  Run({\"multi\"});\n  Run({\"script\", \"load\", \"return '5'\"});\n  Run({\"eval\", \"return redis.call('set', 'x', 'y')\", \"1\", \"x\"});\n  Run({\"get\", \"x\"});\n  Run({\"exec\"});\n\n  Run({\"get\", \"x\"});\n}\n\nTEST_F(MultiTest, MultiTypes) {\n  // we had a bug with namespaces for type command in multi/exec\n  EXPECT_THAT(Run({\"multi\"}), \"OK\");\n  EXPECT_THAT(Run({\"type\", \"sdfx3\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"type\", \"asdasd2\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"type\", \"wer124\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"type\", \"asafdasd\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"type\", \"dsfgser\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"type\", \"erg2\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"exec\"}),\n              RespArray(ElementsAre(\"none\", \"none\", \"none\", \"none\", \"none\", \"none\")));\n}\n\nTEST_F(MultiTest, EvalRo) {\n  RespExpr resp;\n\n  resp = Run({\"set\", \"foo\", \"bar\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"eval_ro\", \"return redis.call('get', KEYS[1])\", \"1\", \"foo\"});\n  EXPECT_THAT(resp, \"bar\");\n\n  resp = Run({\"eval_ro\", \"return redis.call('set', KEYS[1], 'car')\", \"1\", \"foo\"});\n  EXPECT_THAT(resp, ErrArg(\"Write commands are not allowed from read-only scripts\"));\n}\n\nTEST_F(MultiTest, EvalShaRo) {\n  RespExpr resp;\n\n  const char* read_script = \"return redis.call('get', KEYS[1]);\";\n  const char* write_script = \"return redis.call('set', KEYS[1], 'car');\";\n\n  auto sha_resp = Run({\"script\", \"load\", read_script});\n  auto read_sha = facade::ToSV(sha_resp.GetBuf());\n  sha_resp = Run({\"script\", \"load\", write_script});\n  auto write_sha = facade::ToSV(sha_resp.GetBuf());\n\n  resp = Run({\"set\", \"foo\", \"bar\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"evalsha_ro\", read_sha, \"1\", \"foo\"});\n  EXPECT_THAT(resp, \"bar\");\n\n  resp = Run({\"evalsha_ro\", write_sha, \"1\", \"foo\"});\n  EXPECT_THAT(resp, ErrArg(\"Write commands are not allowed from read-only scripts\"));\n}\n\nTEST_F(MultiTest, EvalSelect) {\n  string_view script = R\"(--!df flags=X\nredis.call('SET', 'A', ARGV[1])\nredis.call('SELECT', '1')\nredis.call('SET', 'A', ARGV[2])\nreturn 'OK';\n)\";\n  auto script_global = absl::StrReplaceAll(script, {{\"X\", \"allow-undeclared-keys\"}});\n  auto resp = Run({\"EVAL\", script_global, \"0\", \"G1\", \"G2\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  Run({\"SELECT\", \"0\"});\n  EXPECT_EQ(Run({\"GET\", \"A\"}), \"G1\");\n  Run({\"SELECT\", \"1\"});\n  EXPECT_EQ(Run({\"GET\", \"A\"}), \"G2\");\n  Run({\"SELECT\", \"0\"});\n\n  auto script_nonatomic = absl::StrReplaceAll(script, {{\"X\", \"disable-atomicity\"}});\n  resp = Run({\"EVAL\", script_nonatomic, \"0\", \"G3\", \"G4\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  Run({\"SELECT\", \"0\"});\n  EXPECT_EQ(Run({\"GET\", \"A\"}), \"G3\");\n  Run({\"SELECT\", \"1\"});\n  EXPECT_EQ(Run({\"GET\", \"A\"}), \"G4\");\n  Run({\"SELECT\", \"0\"});\n\n  // Don't allow in regular transactions\n  string_view script_fail = R\"(\nredis.call('SET', KEYS[1], ARGV[1])\nredis.call('SELECT', '1')\nredis.call('SET', KEYS[1], ARGV[1])\n)\";\n  resp = Run({\"EVAL\", script_fail, \"1\", \"A\", \"wont-work\"});\n  EXPECT_THAT(resp, ErrArg(\"SELECT is not allowed in regular\"));\n}\n\nTEST_F(MultiTest, StoredCmdBytesMetric) {\n  ASSERT_EQ(GetMetrics().coordinator_stats.stored_cmd_bytes, 0);\n\n  RespExpr resp = Run({\"multi\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  for (auto i = 0; i < 100; ++i) {\n    ASSERT_EQ(Run({\"get\", kKey1}), \"QUEUED\");\n  }\n\n  ASSERT_GT(GetMetrics().coordinator_stats.stored_cmd_bytes, 0);\n\n  resp = Run({\"exec\"});\n  ASSERT_THAT(resp, ArrLen(100));\n  ASSERT_THAT(resp.GetVec(), Contains(ArgType(RespExpr::NIL)).Times(100));\n  ASSERT_EQ(GetMetrics().coordinator_stats.stored_cmd_bytes, 0);\n}\n\n// Verify that lazy expiration works inside EVAL running in global mode.\n// Previously, the shard_lock()->Check(EXCLUSIVE) guard in ExpireIfNeeded\n// prevented lazy expiry while a global transaction held the shard lock,\n// causing expired keys to be returned as if they were still alive.\nTEST_F(MultiTest, EvalGlobalLazyExpire) {\n  // Set key with TTL, advance time past expiry, then read via global EVAL.\n  // The global shard lock blocks heartbeat during EVAL, so active expiry\n  // cannot delete the key — only lazy expiry inside GET can.\n  Run({\"set\", \"key\", \"val\", \"px\", \"10\"});\n  AdvanceTime(100);\n\n  constexpr char kScript[] = R\"(\n--!df flags=allow-undeclared-keys\nreturn redis.call('GET', KEYS[1])\n)\";\n\n  auto resp = Run({\"eval\", kScript, \"1\", \"key\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/namespaces.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/namespaces.h\"\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"server/blocking_controller.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n\nABSL_DECLARE_FLAG(bool, cache_mode);\n\nnamespace dfly {\n\nusing namespace std;\n\nNamespace::Namespace() {\n  shard_db_slices_.resize(shard_set->size());\n  shard_blocking_controller_.resize(shard_set->size());\n  shard_set->RunBriefInParallel([&](EngineShard* es) {\n    CHECK(es != nullptr);\n    ShardId sid = es->shard_id();\n    shard_db_slices_[sid] = make_unique<DbSlice>(sid, absl::GetFlag(FLAGS_cache_mode), es);\n    shard_db_slices_[sid]->UpdateExpireBase(absl::GetCurrentTimeNanos() / 1000000, 0);\n  });\n}\n\nDbSlice& Namespace::GetCurrentDbSlice() {\n  EngineShard* es = EngineShard::tlocal();\n  CHECK(es != nullptr);\n  return GetDbSlice(es->shard_id());\n}\n\nDbSlice& Namespace::GetDbSlice(ShardId sid) {\n  CHECK_LT(sid, shard_db_slices_.size());\n  return *shard_db_slices_[sid];\n}\n\nBlockingController* Namespace::GetOrAddBlockingController(EngineShard* shard) {\n  if (!shard_blocking_controller_[shard->shard_id()]) {\n    shard_blocking_controller_[shard->shard_id()] = make_unique<BlockingController>(shard, this);\n  }\n\n  return shard_blocking_controller_[shard->shard_id()].get();\n}\n\nBlockingController* Namespace::GetBlockingController(ShardId sid) {\n  return shard_blocking_controller_[sid].get();\n}\n\nNamespaces::Namespaces() {\n  default_namespace_ = &GetOrInsert(\"\");\n}\n\nNamespaces::~Namespaces() {\n  Clear();\n}\n\nvoid Namespaces::Clear() {\n  util::fb2::LockGuard guard(mu_);\n\n  default_namespace_ = nullptr;\n\n  if (namespaces_.empty()) {\n    return;\n  }\n\n  shard_set->RunBriefInParallel([&](EngineShard* es) {\n    CHECK(es != nullptr);\n    for (auto& ns : ABSL_TS_UNCHECKED_READ(namespaces_)) {\n      ns.second.shard_db_slices_[es->shard_id()].reset();\n    }\n  });\n\n  namespaces_.clear();\n}\n\nNamespace& Namespaces::GetDefaultNamespace() const {\n  CHECK(default_namespace_ != nullptr);\n  return *default_namespace_;\n}\n\nNamespace& Namespaces::GetOrInsert(std::string_view ns) {\n  {\n    // Try to look up under a shared lock\n    dfly::SharedLock guard(mu_);\n    auto it = namespaces_.find(ns);\n    if (it != namespaces_.end()) {\n      return it->second;\n    }\n  }\n\n  {\n    // Key was not found, so we create create it under unique lock\n    util::fb2::LockGuard guard(mu_);\n    return namespaces_[ns];\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/namespaces.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/node_hash_map.h>\n\n#include <memory>\n#include <string>\n#include <vector>\n\n#include \"server/common_types.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly {\n\nclass BlockingController;\nclass DbSlice;\nclass EngineShard;\n\n// A Namespace is a way to separate and isolate different databases in a single instance.\n// It can be used to allow multiple tenants to use the same server without hacks of using a common\n// prefix, or SELECT-ing a different database.\n// Each Namespace contains per-shard DbSlice, as well as a BlockingController.\nclass Namespace {\n public:\n  Namespace();\n\n  DbSlice& GetCurrentDbSlice();\n\n  DbSlice& GetDbSlice(ShardId sid);\n  BlockingController* GetOrAddBlockingController(EngineShard* shard);\n  BlockingController* GetBlockingController(ShardId sid);\n\n private:\n  std::vector<std::unique_ptr<DbSlice>> shard_db_slices_;\n  std::vector<std::unique_ptr<BlockingController>> shard_blocking_controller_;\n\n  friend class Namespaces;\n};\n\n// Namespaces is a registry and container for Namespace instances.\n// Each Namespace has a unique string name, which identifies it in the store.\n// Any attempt to access a non-existing Namespace will first create it, add it to the internal map\n// and will then return it.\n// It is currently impossible to remove a Namespace after it has been created.\n// The default Namespace can be accessed via either GetDefaultNamespace() (which guarantees not to\n// yield), or via the GetOrInsert() with an empty string.\n// The initialization order of this class with the engine shards is slightly subtle, as they have\n// mutual dependencies.\nclass Namespaces {\n public:\n  Namespaces();\n  ~Namespaces();\n\n  void Clear() ABSL_LOCKS_EXCLUDED(mu_);  // Thread unsafe, use in tear-down or tests\n\n  Namespace& GetDefaultNamespace() const;  // No locks\n  Namespace& GetOrInsert(std::string_view ns) ABSL_LOCKS_EXCLUDED(mu_);\n\n private:\n  util::fb2::SharedMutex mu_{};\n  absl::node_hash_map<std::string, Namespace> namespaces_ ABSL_GUARDED_BY(mu_);\n  Namespace* default_namespace_ = nullptr;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/protocol_client.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"server/protocol_client.h\"\n\n#include \"facade/tls_helpers.h\"\n\nextern \"C\" {\n#include \"redis/rdb.h\"\n}\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/flags/flag.h>\n#include <absl/functional/bind_front.h>\n#include <absl/strings/escaping.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/strip.h>\n\n#include <boost/asio/ip/tcp.hpp>\n#include <string>\n\n#include \"base/logging.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/redis_parser.h\"\n#include \"facade/reply_builder.h\"\n#include \"facade/socket_utils.h\"\n#include \"server/error.h\"\n#include \"server/journal/executor.h\"\n#include \"server/journal/serializer.h\"\n#include \"server/main_service.h\"\n#include \"server/rdb_load.h\"\n#include \"strings/human_readable.h\"\n#include \"util/fibers/dns_resolve.h\"\n\n#ifdef DFLY_USE_SSL\n#include \"util/tls/tls_socket.h\"\n#endif\n\nABSL_FLAG(std::string, masteruser, \"\", \"username for authentication with master\");\nABSL_FLAG(std::string, masterauth, \"\", \"password for authentication with master\");\nABSL_FLAG(bool, tls_replication, false, \"Enable TLS on replication\");\n\nABSL_DECLARE_FLAG(std::string, tls_cert_file);\nABSL_DECLARE_FLAG(std::string, tls_key_file);\nABSL_DECLARE_FLAG(std::string, tls_ca_cert_file);\nABSL_DECLARE_FLAG(std::string, tls_ca_cert_dir);\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace util;\nusing namespace boost::asio;\nusing namespace facade;\nusing absl::GetFlag;\nusing absl::StrCat;\n\nerror_code ProtocolClient::Recv(FiberSocketBase* input, base::IoBuf* dest) {\n  auto buf = dest->AppendBuffer();\n  io::Result<size_t> exp_size = input->Recv(buf);\n  if (!exp_size) {\n    LOG(WARNING) << \"Socket error \" << exp_size.error();\n    return exp_size.error();\n  }\n\n  if (*exp_size == 0) {\n    VLOG(1) << \"Connection closed by peer\";\n    return make_error_code(errc::connection_aborted);\n  }\n\n  TouchIoTime();\n\n  dest->CommitWrite(*exp_size);\n  return error_code{};\n}\n\nstd::string ProtocolClient::ServerContext::Description() const {\n  return absl::StrCat(host, \":\", port);\n}\n\nvoid ValidateClientTlsFlags() {\n  if (!GetFlag(FLAGS_tls_replication)) {\n    return;\n  }\n\n  bool has_auth = false;\n\n  if (!GetFlag(FLAGS_tls_key_file).empty()) {\n    if (GetFlag(FLAGS_tls_cert_file).empty()) {\n      LOG(ERROR) << \"tls_cert_file flag should be set\";\n      exit(1);\n    }\n    has_auth = true;\n  }\n\n  if (!GetFlag(FLAGS_masterauth).empty())\n    has_auth = true;\n\n  if (!has_auth) {\n    LOG(ERROR) << \"No authentication method configured!\";\n    exit(1);\n  }\n}\n\n#ifdef DFLY_USE_SSL\nvoid ProtocolClient::MaybeInitSslCtx() {\n  if (GetFlag(FLAGS_tls_replication)) {\n    ssl_ctx_ = CreateSslCntx(facade::TlsContextRole::CLIENT);\n  }\n}\n#endif\n\nProtocolClient::ProtocolClient(string host, uint16_t port) {\n  server_context_.host = std::move(host);\n  server_context_.port = port;\n#ifdef DFLY_USE_SSL\n  MaybeInitSslCtx();\n#endif\n}\nProtocolClient::ProtocolClient(ServerContext context) : server_context_(std::move(context)) {\n#ifdef DFLY_USE_SSL\n  MaybeInitSslCtx();\n#endif\n}\n\nProtocolClient::~ProtocolClient() {\n  exec_st_.JoinErrorHandler();\n\n#ifdef DFLY_USE_SSL\n  if (ssl_ctx_) {\n    SSL_CTX_free(ssl_ctx_);\n  }\n#endif\n}\n\nerror_code ProtocolClient::ResolveHostDns() {\n  char ip_addr[INET6_ADDRSTRLEN];\n\n  // IPv6 address can be enclosed in square brackets.\n  // https://www.rfc-editor.org/rfc/rfc2732#section-2\n  // We need to remove the brackets before resolving the DNS.\n  // Enclosed IPv6 addresses can't be resolved by the DNS resolver.\n  std::string host = server_context_.host;\n  if (!host.empty() && host.front() == '[' && host.back() == ']') {\n    host = host.substr(1, host.size() - 2);\n  }\n\n  auto ec = util::fb2::DnsResolve(host, 0, ip_addr, ProactorBase::me());\n  if (ec) {\n    LOG(ERROR) << \"Dns error \" << ec << \", host: \" << server_context_.host;\n    return make_error_code(errc::host_unreachable);\n  }\n\n  LOG_IF(INFO, std::string(ip_addr) != server_context_.host)\n      << \"Resolved endpoint \" << server_context_.Description() << \" to \" << ip_addr << \":\"\n      << server_context_.port;\n  server_context_.endpoint = {ip::make_address(ip_addr), server_context_.port};\n\n  return error_code{};\n}\n\nerror_code ProtocolClient::ConnectAndAuth(std::chrono::milliseconds connect_timeout_ms,\n                                          ExecutionState* cntx) {\n  ProactorBase* mythread = ProactorBase::me();\n  CHECK(mythread);\n  {\n    unique_lock lk(sock_mu_);\n    // The context closes sock_. So if the context error handler has already\n    // run we must not create a new socket. sock_mu_ syncs between the two\n    // functions.\n    if (cntx->IsRunning()) {\n      if (sock_) {\n        LOG_IF(WARNING, sock_->Close()) << \"Error closing socket\";\n        sock_.reset(nullptr);\n      }\n\n      if (ssl_ctx_) {\n#ifdef DFLY_USE_SSL\n        auto tls_sock = std::make_unique<tls::TlsSocket>(mythread->CreateSocket());\n        tls_sock->InitSSL(ssl_ctx_);\n        sock_ = std::move(tls_sock);\n#endif\n      } else {\n        sock_.reset(mythread->CreateSocket());\n      }\n    } else {\n      return cntx->GetError();\n    }\n  }\n\n  // We set this timeout because this call blocks other REPLICAOF commands. We don't need it for the\n  // rest of the sync.\n  {\n    uint32_t timeout = sock_->timeout();\n    sock_->set_timeout(connect_timeout_ms.count());\n    RETURN_ON_ERR(sock_->Connect(server_context_.endpoint));\n    sock_->set_timeout(timeout);\n  }\n\n  // For idle connections we enable TCP keepalive to prevent disconnects.\n  int yes = 1;\n  if (setsockopt(sock_->native_handle(), SOL_SOCKET, SO_KEEPALIVE, &yes, sizeof(yes)) == 0) {\n    int intv = 300;\n#ifdef __APPLE__\n    setsockopt(sock_->native_handle(), IPPROTO_TCP, TCP_KEEPALIVE, &intv, sizeof(intv));\n#else\n    setsockopt(sock_->native_handle(), IPPROTO_TCP, TCP_KEEPIDLE, &intv, sizeof(intv));\n#endif\n\n    intv /= 3;\n    setsockopt(sock_->native_handle(), IPPROTO_TCP, TCP_KEEPINTVL, &intv, sizeof(intv));\n\n    intv = 3;\n    setsockopt(sock_->native_handle(), IPPROTO_TCP, TCP_KEEPCNT, &intv, sizeof(intv));\n  }\n\n  // CHECK_EQ(0, setsockopt(sock_->native_handle(), IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes)));\n\n  auto masterauth = GetFlag(FLAGS_masterauth);\n  auto masteruser = GetFlag(FLAGS_masteruser);\n  ResetParser(RedisParser::Mode::CLIENT);\n  if (!masterauth.empty()) {\n    auto cmd = masteruser.empty() ? StrCat(\"AUTH \", masterauth)\n                                  : StrCat(\"AUTH \", masteruser, \" \", masterauth);\n    RETURN_ON_ERR(SendCommandAndReadResponse(cmd));\n    last_cmd_ = \"AUTH\";  // Make sure the password is not printed to logs\n    PC_RETURN_ON_BAD_RESPONSE(CheckRespIsSimpleReply(\"OK\"));\n  }\n  return error_code{};\n}\n\nvoid ProtocolClient::ShutdownSocketImpl(bool should_close) {\n  unique_lock lk(sock_mu_);\n  if (sock_) {\n    sock_->proactor()->Await([this, should_close] {\n      if (sock_->IsOpen()) {\n        auto ec = sock_->Shutdown(SHUT_RDWR);\n        LOG_IF(ERROR, ec) << \"Could not shutdown socket \" << ec;\n      }\n      if (should_close) {\n        auto ec = sock_->Close();  // Quietly close.\n        LOG_IF(WARNING, ec) << \"Error closing socket \" << ec << \"/\" << ec.message();\n      }\n    });\n  }\n}\n\nvoid ProtocolClient::CloseSocket() {\n  return ShutdownSocketImpl(true);\n}\n\nvoid ProtocolClient::ShutdownSocket() {\n  return ShutdownSocketImpl(false);\n}\n\nvoid ProtocolClient::DefaultErrorHandler(const GenericError& err) {\n  LOG(WARNING) << \"Socket error: \" << err.Format() << \" in \" << server_context_.Description()\n               << \", socket info: \" << GetSocketInfo(sock_ ? sock_->native_handle() : -1);\n  ShutdownSocket();\n}\n\nio::Result<ProtocolClient::ReadRespRes> ProtocolClient::ReadRespReply(base::IoBuf* buffer,\n                                                                      bool copy_msg) {\n  DCHECK(parser_);\n\n  error_code ec;\n  if (!buffer) {\n    buffer = &resp_buf_;\n    buffer->Clear();\n  }\n  last_resp_ = \"\";\n\n  uint32_t processed_bytes = 0;\n\n  RedisParser::Result result = RedisParser::OK;\n  while (!ec) {\n    uint32_t consumed;\n    if (buffer->InputLen() == 0 || result == RedisParser::INPUT_PENDING) {\n      DCHECK_GT(buffer->AppendLen(), 0u);\n\n      ec = Recv(sock_.get(), buffer);\n      if (ec) {\n        return nonstd::make_unexpected(ec);\n      }\n    }\n\n    result = parser_->Parse(buffer->InputBuffer(), &consumed, &resp_args_);\n    processed_bytes += consumed;\n    if (copy_msg)\n      last_resp_ +=\n          std::string_view(reinterpret_cast<char*>(buffer->InputBuffer().data()), consumed);\n\n    if (result == RedisParser::OK) {\n      return ReadRespRes{processed_bytes, consumed};  // success path\n    }\n\n    buffer->ConsumeInput(consumed);\n\n    if (result != RedisParser::INPUT_PENDING) {\n      LOG(ERROR) << \"Invalid parser status \" << result << \" for response \" << last_resp_;\n      return nonstd::make_unexpected(std::make_error_code(std::errc::bad_message));\n    }\n\n    // We need to read more data. Check that we have enough space.\n    if (buffer->AppendLen() < 64u) {\n      buffer->EnsureCapacity(buffer->Capacity() * 2);\n    }\n  }\n\n  return nonstd::make_unexpected(ec);\n}\n\nio::Result<ProtocolClient::ReadRespRes> ProtocolClient::ReadRespReply(uint32_t timeout) {\n  auto prev_timeout = sock_->timeout();\n  sock_->set_timeout(timeout);\n  auto res = ReadRespReply();\n  sock_->set_timeout(prev_timeout);\n  return res;\n}\n\nio::Result<dfly::RESPObj> ProtocolClient::TakeRespReply(uint32_t timeout, base::IoBuf* buffer,\n                                                        bool copy_msg) {\n  auto prev_timeout = sock_->timeout();\n  sock_->set_timeout(timeout);\n  absl::Cleanup on_exit([this, prev_timeout]() { sock_->set_timeout(prev_timeout); });\n\n  error_code ec;\n  if (!buffer) {\n    buffer = &resp_buf_;\n  }\n\n  last_resp_ = \"\";\n\n  uint32_t processed_bytes = 0;\n  std::optional<dfly::RESPObj> resp;\n\n  do {\n    resp = resp_parser_.Feed(nullptr, 0);  // check if previous data produced a reply\n    if (resp && !resp->Empty()) {\n      VLOG(2) << \"return reply from previous data read\";\n      return std::move(resp).value();  // success path\n    }\n    if (buffer->InputLen() == 0) {\n      DCHECK_GT(buffer->AppendLen(), 0u);\n      ec = Recv(sock_.get(), buffer);\n      if (ec) {\n        VLOG(2) << \"error socket reading reply: \" << ec;\n        return nonstd::make_unexpected(ec);\n      }\n    }\n\n    auto input_buf = buffer->InputBuffer();\n    resp = resp_parser_.Feed(reinterpret_cast<char*>(input_buf.data()), input_buf.size());\n    processed_bytes += input_buf.size();\n    if (copy_msg)\n      last_resp_ +=\n          std::string_view(reinterpret_cast<char*>(buffer->InputBuffer().data()), input_buf.size());\n\n    buffer->ConsumeInput(input_buf.size());\n    if (resp && !resp->Empty()) {\n      VLOG(2) << \"successfully parsed readed reply\";\n      return std::move(resp).value();  // success path\n    }\n\n    // We need to read more data. Check that we have enough space.\n    if (buffer->AppendLen() < 64u) {\n      buffer->EnsureCapacity(buffer->Capacity() * 2);\n    }\n  } while (resp);\n\n  VLOG(2) << \"protocol issue\";\n  return nonstd::make_unexpected(std::make_error_code(std::errc::bad_message));\n}\n\nerror_code ProtocolClient::ReadLine(base::IoBuf* io_buf, string_view* line) {\n  size_t eol_pos;\n  std::string_view input_str = ToSV(io_buf->InputBuffer());\n\n  // consume whitespace.\n  while (true) {\n    auto it = find_if_not(input_str.begin(), input_str.end(), absl::ascii_isspace);\n    size_t ws_len = it - input_str.begin();\n    io_buf->ConsumeInput(ws_len);\n    input_str = ToSV(io_buf->InputBuffer());\n    if (!input_str.empty())\n      break;\n\n    RETURN_ON_ERR(Recv(sock_.get(), io_buf));\n    input_str = ToSV(io_buf->InputBuffer());\n  };\n\n  // find eol.\n  while (true) {\n    eol_pos = input_str.find('\\n');\n\n    if (eol_pos != std::string_view::npos) {\n      DCHECK_GT(eol_pos, 0u);  // can not be 0 because then would be consumed as a whitespace.\n      if (input_str[eol_pos - 1] != '\\r') {\n        break;\n      }\n      *line = input_str.substr(0, eol_pos - 1);\n      return error_code{};\n    }\n\n    RETURN_ON_ERR(Recv(sock_.get(), io_buf));\n    input_str = ToSV(io_buf->InputBuffer());\n  }\n\n  LOG(ERROR) << \"Bad replication header: \" << input_str;\n  return std::make_error_code(std::errc::illegal_byte_sequence);\n}\n\nbool ProtocolClient::CheckRespIsSimpleReply(string_view reply) const {\n  return resp_args_.size() == 1 && resp_args_.front().type == RespExpr::STRING &&\n         ToSV(resp_args_.front().GetBuf()) == reply;\n}\n\nbool ProtocolClient::CheckRespSimpleError(string_view error) const {\n  return resp_args_.size() == 1 && resp_args_.front().type == RespExpr::ERROR &&\n         ToSV(resp_args_.front().GetBuf()) == error;\n}\n\nbool ProtocolClient::CheckRespFirstTypes(initializer_list<RespExpr::Type> types) const {\n  unsigned i = 0;\n  for (RespExpr::Type type : types) {\n    if (i >= resp_args_.size() || resp_args_[i].type != type)\n      return false;\n    ++i;\n  }\n  return true;\n}\n\nerror_code ProtocolClient::SendCommand(string_view command) {\n  string formatted_command = RedisReplyBuilderBase::SerializeCommand(command);\n  DCHECK(sock_->proactor() == ProactorBase::me());\n  auto ec = sock_->Write(io::Buffer(formatted_command));\n  if (!ec)\n    TouchIoTime();\n  return ec;\n}\n\nerror_code ProtocolClient::SendCommandAndReadResponse(string_view command) {\n  last_cmd_ = command;\n  if (auto ec = SendCommand(command); ec)\n    return ec;\n  auto response_res = ReadRespReply();\n  return response_res.has_value() ? error_code{} : response_res.error();\n}\n\nvoid ProtocolClient::ResetParser(RedisParser::Mode mode) {\n  // We accept any length for the parser because it has been approved by the master.\n  parser_.reset(new RedisParser(mode));\n}\n\nuint64_t ProtocolClient::LastIoTime() const {\n  return last_io_time_.load(std::memory_order_relaxed);\n}\n\nvoid ProtocolClient::TouchIoTime() {\n  last_io_time_.store(Proactor()->GetMonotonicTimeNs(), std::memory_order_relaxed);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/protocol_client.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <absl/strings/escaping.h>\n\n#include <queue>\n#include <variant>\n\n#include \"facade/facade_types.h\"\n#include \"facade/redis_parser.h\"\n#include \"facade/resp_parser.h\"\n#include \"io/io_buf.h\"\n#include \"server/execution_state.h\"\n#include \"server/version.h\"\n#include \"util/fiber_socket_base.h\"\n\n#ifdef DFLY_USE_SSL\n#include <openssl/ssl.h>\n#endif\n\nnamespace dfly {\n\nclass Service;\nclass ConnectionContext;\nclass JournalExecutor;\nstruct JournalReader;\n\nvoid ValidateClientTlsFlags();\n\n// A helper class for implementing a Redis client that talks to a redis server.\n// This class should be inherited from.\nclass ProtocolClient {\n public:\n#ifdef DFLY_USE_SSL\n  using SSL_CTX = struct ssl_ctx_st;\n#endif\n\n  ProtocolClient(std::string master_host, uint16_t port);\n  virtual ~ProtocolClient();\n\n  // First Shutdown() the socket and immediately Close() it.\n  // Any attempt for IO in the socket after Close() will crash with CHECK fail.\n  void CloseSocket();\n\n  // Shutdown the underline socket but do not Close() it. By decoupling this, api\n  // callers can shutdown the socket, wait for the relevant flows to gracefully exit\n  // (by observing during an IO operation that the socket was shut down) and then finally\n  // Close() the socket.\n  void ShutdownSocket();\n\n  uint64_t LastIoTime() const;\n  void TouchIoTime();\n\n  const std::string& GetHost() const {\n    return server().host;\n  };\n\n  uint16_t GetPort() const {\n    return server().port;\n  };\n\n protected:\n  struct ServerContext {\n    std::string host;\n    uint16_t port;\n    boost::asio::ip::tcp::endpoint endpoint;\n\n    std::string Description() const;\n  };\n\n  // Constructing using a fully initialized ServerContext allows to skip\n  // the DNS resolution step.\n  explicit ProtocolClient(ServerContext context);\n\n  std::error_code ResolveHostDns();\n  // Connect to master and authenticate if needed.\n  std::error_code ConnectAndAuth(std::chrono::milliseconds connect_timeout_ms,\n                                 ExecutionState* cntx);\n\n  void DefaultErrorHandler(const GenericError& err);\n\n  struct ReadRespRes {\n    uint32_t total_read;\n    uint32_t left_in_buffer;\n  };\n\n  // This function uses parser_ and cmd_args_ in order to consume a single response\n  // from the sock_. The output will reside in resp_args_.\n  // For error reporting purposes, the parsed command would be in last_resp_ if copy_msg is true.\n  // If io_buf is not given, a internal temporary buffer will be used.\n  // It is the responsibility of the caller to call buffer->ConsumeInput(rv.left_in_buffer) when it\n  // is done with the result of the call; Calling ConsumeInput may invalidate the data in the result\n  // if the buffer relocates.\n  // TODO these functions contains bugs related to partial reads and parser state management.\n  io::Result<ReadRespRes> ReadRespReply(base::IoBuf* buffer = nullptr, bool copy_msg = true);\n  io::Result<ReadRespRes> ReadRespReply(uint32_t timeout);\n\n  io::Result<facade::RESPObj> TakeRespReply(uint32_t timeout, base::IoBuf* buffer = nullptr,\n                                            bool copy_msg = true);\n\n  std::error_code ReadLine(base::IoBuf* io_buf, std::string_view* line);\n\n  // Check if reps_args contains a simple reply.\n  bool CheckRespIsSimpleReply(std::string_view reply) const;\n\n  // Check if resp_args contains a simple error\n  bool CheckRespSimpleError(std::string_view error) const;\n\n  // Check resp_args contains the following types at front.\n  bool CheckRespFirstTypes(std::initializer_list<facade::RespExpr::Type> types) const;\n\n  // Send command, update last_io_time, return error.\n  std::error_code SendCommand(std::string_view command);\n  // Send command, read response into resp_args_.\n  std::error_code SendCommandAndReadResponse(std::string_view command);\n\n  const ServerContext& server() const {\n    return server_context_;\n  }\n\n  void ResetParser(facade::RedisParser::Mode mode);\n\n  // TODO can return invalid results if response answer was bigger than provided buffer into\n  // ReadRespReply\n  auto& LastResponseArgs() {\n    return resp_args_;\n  }\n\n  auto* Proactor() const {\n    return sock_->proactor();\n  }\n\n  util::FiberSocketBase* Sock() const {\n    return sock_.get();\n  }\n\n private:\n  std::error_code Recv(util::FiberSocketBase* input, base::IoBuf* dest);\n\n  void ShutdownSocketImpl(bool should_close);\n\n  ServerContext server_context_;\n\n  std::unique_ptr<facade::RedisParser> parser_;\n  facade::RespVec resp_args_;\n  base::IoBuf resp_buf_;\n\n  facade::RESPParser resp_parser_;\n\n  std::unique_ptr<util::FiberSocketBase> sock_;\n  util::fb2::Mutex sock_mu_;\n\n protected:\n  ExecutionState exec_st_;  // context for tasks in replica.\n\n  std::string last_cmd_;\n  std::string last_resp_;\n\n  std::atomic<uint64_t> last_io_time_ = 0;  // in ns, monotonic clock.\n\n#ifdef DFLY_USE_SSL\n\n  void MaybeInitSslCtx();\n\n  SSL_CTX* ssl_ctx_{nullptr};\n#else\n  void* ssl_ctx_{nullptr};\n#endif\n};\n\n}  // namespace dfly\n\n/**\n * A convenience macro to use with ProtocolClient instances for protocol input validation.\n */\n#define PC_RETURN_ON_BAD_RESPONSE_T(T, x)                                                      \\\n  do {                                                                                         \\\n    if (!(x)) {                                                                                \\\n      LOG(ERROR) << \"Bad response to \\\"\" << last_cmd_ << \"\\\": \\\"\" << absl::CEscape(last_resp_) \\\n                 << \"\\\"\";                                                                      \\\n      return (T)(std::make_error_code(errc::bad_message));                                     \\\n    }                                                                                          \\\n  } while (false)\n\n#define PC_RETURN_ON_BAD_RESPONSE(x) PC_RETURN_ON_BAD_RESPONSE_T(std::error_code, x)\n"
  },
  {
    "path": "src/server/rdb_extensions.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\nextern \"C\" {\n#include \"redis/rdb.h\"\n}\n\n//  Custom types: Range 30-35 is used by DF RDB types.\nconstexpr uint8_t RDB_TYPE_JSON = 30;\nconstexpr uint8_t RDB_TYPE_HASH_WITH_EXPIRY = 31;\nconstexpr uint8_t RDB_TYPE_SET_WITH_EXPIRY = 32;\nconstexpr uint8_t RDB_TYPE_SBF = 33;\nconstexpr uint8_t RDB_TYPE_SBF2 = 34;\nconstexpr uint8_t RDB_TYPE_CMS = 35;\n\nconstexpr bool rdbIsObjectTypeDF(uint8_t type) {\n  return __rdbIsObjectType(type) || (type == RDB_TYPE_JSON) ||\n         (type == RDB_TYPE_HASH_WITH_EXPIRY) || (type == RDB_TYPE_SET_WITH_EXPIRY) ||\n         (type == RDB_TYPE_SBF) || (type == RDB_TYPE_SBF2) || (type == RDB_TYPE_CMS);\n}\n\n//  Opcodes: Range 200-240 is used by DF extensions.\n\n// This opcode is sent by the master Dragonfly instance to a replica\n// to notify that it finished streaming static data and is ready\n// to switch to the stable state replication phase.\nconstexpr uint8_t RDB_OPCODE_FULLSYNC_END = 200;\n\nconstexpr uint8_t RDB_OPCODE_COMPRESSED_ZSTD_BLOB_START = 201;\nconstexpr uint8_t RDB_OPCODE_COMPRESSED_LZ4_BLOB_START = 202;\nconstexpr uint8_t RDB_OPCODE_COMPRESSED_BLOB_END = 203;\n\nconstexpr uint8_t RDB_OPCODE_JOURNAL_BLOB = 210;\n\n// A full sync will continue to send information in journal blobs until the replica\n// sends a `DFLY STARTSTABLE` to the master.\n// We use this opcode to synchronize the journal offsets at the end of the full sync,\n// so it is always sent at the end of the RDB stream.\nconstexpr uint8_t RDB_OPCODE_JOURNAL_OFFSET = 211;\n\nconstexpr uint8_t RDB_OPCODE_DF_MASK = 220; /* Mask for key properties */\n\n// RDB_OPCODE_DF_MASK define 4byte field with next flags\nconstexpr uint32_t DF_MASK_FLAG_STICKY = (1 << 0);\nconstexpr uint32_t DF_MASK_FLAG_MC_FLAGS = (1 << 1);\n\n// Opcode to store HNSW vector index node data for global indices\n// Format: [index_name, elements_number, internal_id, global_id, level, zero_level_links_num,\n// zero_level_links,\n//          higher_level_links_num (only if level > 0), higher_level_links (only if level > 0)]\nconstexpr uint8_t RDB_OPCODE_VECTOR_INDEX = 222;\n\n// Opcode to store ShardDocIndex key-to-DocId mapping for search indices\n// Format: [shard_id, index_name, mapping_count, then for each mapping: key_string, doc_id]\nconstexpr uint8_t RDB_OPCODE_SHARD_DOC_INDEX = 223;\n"
  },
  {
    "path": "src/server/rdb_load.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/rdb_load.h\"\n\n#include \"absl/strings/escaping.h\"\n#include \"server/search/global_hnsw_index.h\"\n#include \"server/tiered_storage.h\"\n\nextern \"C\" {\n#include \"redis/intset.h\"\n#include \"redis/listpack.h\"\n#include \"redis/lzfP.h\" /* LZF compression library */\n#include \"redis/stream.h\"\n#include \"redis/util.h\"\n#include \"redis/ziplist.h\"\n#include \"redis/zmalloc.h\"\n}\n#include <absl/cleanup/cleanup.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_split.h>\n\n#include <cstring>\n\n#include \"base/endian.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/bloom.h\"\n#include \"core/cms.h\"\n#include \"core/detail/listpack_wrap.h\"\n#include \"core/json/json_object.h\"\n#include \"core/qlist.h\"\n#include \"core/sorted_map.h\"\n#include \"core/string_map.h\"\n#include \"core/string_set.h\"\n#include \"server/cluster/cluster_config.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/family_utils.h\"\n#include \"server/hset_family.h\"\n#include \"server/journal/executor.h\"\n#include \"server/journal/serializer.h\"\n#include \"server/main_service.h\"\n#include \"server/namespaces.h\"\n#include \"server/rdb_extensions.h\"\n#include \"server/script_mgr.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/search/global_hnsw_index.h\"\n#include \"server/serializer_commons.h\"\n#include \"server/server_state.h\"\n#include \"server/set_family.h\"\n#include \"server/stream_family.h\"\n#include \"server/transaction.h\"\n#include \"server/zset_family.h\"\n#include \"strings/human_readable.h\"\n\nABSL_DECLARE_FLAG(int32_t, list_max_listpack_size);\nABSL_DECLARE_FLAG(int32_t, list_compress_depth);\nABSL_DECLARE_FLAG(uint32_t, dbnum);\nABSL_FLAG(bool, deserialize_hnsw_index, false, \"Deserialize HNSW vector index graph structure\");\nABSL_FLAG(bool, rdb_load_dry_run, false, \"Dry run RDB load without applying changes\");\nABSL_FLAG(bool, rdb_ignore_expiry, false, \"Ignore Key Expiry when loding from RDB snapshot\");\n\nnamespace dfly {\n\nusing namespace std;\nusing base::IoBuf;\nusing nonstd::make_unexpected;\nusing namespace util;\nusing absl::GetFlag;\nusing rdb::errc;\nusing namespace tiering::literals;\n\nnamespace {\n\nint64_t LpGetIntegerIfValid(unsigned char* ele, int* valid) {\n  int64_t v = 0;\n  *valid = lpGetInteger(ele, &v);\n  return v;\n}\n\n// Returns 1 if the stream listpack entries structure is valid, 0 otherwise.\nint StreamValidateListpackIntegrity(unsigned char* lp, size_t size) {\n  int valid_record;\n  unsigned char *p, *next;\n\n  if (!lpValidateIntegrity(lp, size, 0, NULL, NULL))\n    return 0;\n\n  next = p = lpValidateFirst(lp);\n  if (!lpValidateNext(lp, &next, size))\n    return 0;\n  if (!p)\n    return 0;\n\n  LpGetIntegerIfValid(p, &valid_record);\n  if (!valid_record)\n    return 0;\n  p = next;\n  if (!lpValidateNext(lp, &next, size))\n    return 0;\n\n  LpGetIntegerIfValid(p, &valid_record);\n  if (!valid_record)\n    return 0;\n  p = next;\n  if (!lpValidateNext(lp, &next, size))\n    return 0;\n\n  LpGetIntegerIfValid(p, &valid_record);\n  if (!valid_record)\n    return 0;\n  p = next;\n  if (!lpValidateNext(lp, &next, size))\n    return 0;\n  return 1;\n}\n\n// Maximum length of each LoadTrace segment.\n//\n// Note kMaxBlobLen must be a multiple of 6 to avoid truncating elements\n// containing 2 or 3 items.\nconstexpr size_t kMaxBlobLen = 4092;\n\ninline auto Unexpected(errc ev) {\n  return make_unexpected(RdbError(ev));\n}\n\nconst error_code kOk;\n\n/* callback for ziplistValidateIntegrity.\n * The ziplist element pointed by 'p' will be converted and stored into listpack. */\nint ziplistEntryConvertAndValidate(unsigned char* p, unsigned int head_count, void* userdata) {\n  unsigned char* str;\n  unsigned int slen;\n  long long vll;\n  unsigned char** lp = (unsigned char**)userdata;\n\n  if (!ziplistGet(p, &str, &slen, &vll))\n    return 0;\n\n  if (str)\n    *lp = lpAppend(*lp, (unsigned char*)str, slen);\n  else\n    *lp = lpAppendInteger(*lp, vll);\n\n  return 1;\n}\nstring ModuleTypeName(uint64_t module_id) {\n  static const char ModuleNameSet[] =\n      \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"\n      \"abcdefghijklmnopqrstuvwxyz\"\n      \"0123456789-_\";\n\n  char name[10];\n\n  name[9] = '\\0';\n  char* p = name + 8;\n  module_id >>= 10;\n  for (int j = 0; j < 9; j++) {\n    *p-- = ModuleNameSet[module_id & 63];\n    module_id >>= 6;\n  }\n\n  return string{name};\n}\n\nbool RdbTypeAllowedEmpty(int type) {\n  return type == RDB_TYPE_STRING || type == RDB_TYPE_JSON || type == RDB_TYPE_SBF ||\n         type == RDB_TYPE_STREAM_LISTPACKS || type == RDB_TYPE_SET_WITH_EXPIRY ||\n         type == RDB_TYPE_HASH_WITH_EXPIRY || type == RDB_TYPE_SBF2 || type == RDB_TYPE_CMS;\n}\n\nDbSlice& GetCurrentDbSlice() {\n  return namespaces->GetDefaultNamespace().GetCurrentDbSlice();\n}\n\n}  // namespace\n\nclass RdbLoaderBase::OpaqueObjLoader {\n public:\n  OpaqueObjLoader(int rdb_type, PrimeValue* pv, LoadConfig config)\n      : rdb_type_(rdb_type), pv_(pv), config_(config) {\n  }\n\n  void operator()(long long val) {\n    pv_->SetInt(val);\n  }\n\n  void operator()(const base::PODArray<char>& str);\n  void operator()(const LzfString& lzfstr);\n  void operator()(const unique_ptr<LoadTrace>& ptr);\n  void operator()(const RdbSBF& src);\n  void operator()(const RdbCMS& src);\n\n  std::error_code ec() const {\n    return ec_;\n  }\n\n private:\n  using ScratchBuf = base::PODArray<char>;\n\n  void CreateSet(const LoadTrace* ltrace);\n  void CreateHMap(const LoadTrace* ltrace);\n  void CreateList(const LoadTrace* ltrace);\n  void CreateZSet(const LoadTrace* ltrace);\n  void CreateStream(const LoadTrace* ltrace);\n\n  void HandleBlob(string_view blob);\n\n  string_view ToSV(const RdbVariant& obj, ScratchBuf* buf);\n\n  // Returns whether pv_ has the given object type and encoding. If not ec_\n  // is set to the error.\n  bool EnsureObjEncoding(CompactObjType type, unsigned encoding);\n\n  template <typename F> static void Iterate(const LoadTrace& ltrace, F&& f) {\n    for (const auto& blob : ltrace.arr) {\n      if (!f(blob)) {\n        return;\n      }\n    }\n  }\n\n  std::error_code ec_;\n  int rdb_type_;\n  ScratchBuf buf1_, buf2_, buf3_;\n  PrimeValue* pv_;\n  LoadConfig config_;\n};\n\nRdbLoaderBase::RdbLoaderBase() : origin_mem_buf_{16_KB} {\n  mem_buf_ = &origin_mem_buf_;\n}\n\nRdbLoaderBase::~RdbLoaderBase() {\n}\n\nvoid RdbLoaderBase::OpaqueObjLoader::operator()(const base::PODArray<char>& str) {\n  string_view sv(str.data(), str.size());\n  HandleBlob(sv);\n}\n\nvoid RdbLoaderBase::OpaqueObjLoader::operator()(const LzfString& lzfstr) {\n  string tmp(lzfstr.uncompressed_len, '\\0');\n  if (lzf_decompress(lzfstr.compressed_blob.data(), lzfstr.compressed_blob.size(), tmp.data(),\n                     tmp.size()) == 0) {\n    LOG(ERROR) << \"Invalid LZF compressed string\";\n    ec_ = RdbError(errc::rdb_file_corrupted);\n    return;\n  }\n  HandleBlob(tmp);\n}\n\nvoid RdbLoaderBase::OpaqueObjLoader::operator()(const unique_ptr<LoadTrace>& ptr) {\n  switch (rdb_type_) {\n    case RDB_TYPE_SET:\n    case RDB_TYPE_SET_WITH_EXPIRY:\n      CreateSet(ptr.get());\n      break;\n    case RDB_TYPE_HASH:\n    case RDB_TYPE_HASH_WITH_EXPIRY:\n      CreateHMap(ptr.get());\n      break;\n    case RDB_TYPE_LIST_QUICKLIST:\n    case RDB_TYPE_LIST_QUICKLIST_2:\n      CreateList(ptr.get());\n      break;\n    case RDB_TYPE_ZSET:\n    case RDB_TYPE_ZSET_2:\n      CreateZSet(ptr.get());\n      break;\n    case RDB_TYPE_STREAM_LISTPACKS:\n    case RDB_TYPE_STREAM_LISTPACKS_2:\n    case RDB_TYPE_STREAM_LISTPACKS_3:\n      CreateStream(ptr.get());\n      break;\n    default:\n      LOG(FATAL) << \"Unsupported rdb type \" << rdb_type_;\n  }\n}\n\nvoid RdbLoaderBase::OpaqueObjLoader::operator()(const RdbSBF& src) {\n  SBF* sbf =\n      CompactObj::AllocateMR<SBF>(src.grow_factor, src.fp_prob, src.max_capacity, src.prev_size,\n                                  src.current_size, CompactObj::memory_resource());\n  for (unsigned i = 0; i < src.filters.size(); ++i) {\n    sbf->AddFilter(src.filters[i].blob, src.filters[i].hash_cnt);\n  }\n  pv_->SetSBF(sbf);\n}\n\nvoid RdbLoaderBase::OpaqueObjLoader::operator()(const RdbCMS& src) {\n  CMS* cms = CompactObj::AllocateMR<CMS>(src.width, src.depth, CompactObj::memory_resource());\n  DCHECK_EQ(src.counters.size(), cms->NumCounters());\n  cms->Load(src.total_incr_count, src.counters.data());\n  pv_->SetCMS(cms);\n}\n\nvoid RdbLoaderBase::OpaqueObjLoader::CreateSet(const LoadTrace* ltrace) {\n  size_t len = ltrace->arr.size();\n\n  bool is_intset = true;\n  if (!config_.chunked && rdb_type_ == RDB_TYPE_SET &&\n      ltrace->arr.size() <= SetFamily::MaxIntsetEntries()) {\n    Iterate(*ltrace, [&](const LoadBlob& blob) {\n      if (!holds_alternative<long long>(blob.rdb_var)) {\n        is_intset = false;\n        return false;\n      }\n      return true;\n    });\n  } else {\n    /* Use a regular set when there are too many entries, or when the\n     * set is being chunked. */\n    is_intset = false;\n  }\n\n  sds sdsele = nullptr;\n  void* inner_obj = nullptr;\n\n  auto cleanup = absl::MakeCleanup([&] {\n    if (sdsele)\n      sdsfree(sdsele);\n    if (inner_obj) {\n      if (is_intset) {\n        zfree(inner_obj);\n      } else {\n        CompactObj::DeleteMR<StringSet>(inner_obj);\n      }\n    }\n  });\n\n  if (is_intset) {\n    inner_obj = intsetNew();\n\n    long long llval;\n    Iterate(*ltrace, [&](const LoadBlob& blob) {\n      llval = get<long long>(blob.rdb_var);\n      uint8_t success;\n      inner_obj = intsetAdd((intset*)inner_obj, llval, &success);\n      if (!success) {\n        LOG(ERROR) << \"Duplicate set members detected\";\n        ec_ = RdbError(errc::duplicate_key);\n        return false;\n      }\n      return true;\n    });\n  } else {\n    StringSet* set;\n    if (config_.append) {\n      // Note we always use StringSet when the object is being chunked.\n      if (!EnsureObjEncoding(OBJ_SET, kEncodingStrMap2)) {\n        return;\n      }\n      set = static_cast<StringSet*>(pv_->RObjPtr());\n    } else {\n      set = CompactObj::AllocateMR<StringSet>();\n      set->set_time(MemberTimeSeconds(GetCurrentTimeMs()));\n      inner_obj = set;\n\n      // Expand the set up front to avoid rehashing.\n      set->Reserve((config_.reserve > len) ? config_.reserve : len);\n    }\n\n    size_t increment = 1;\n    if (rdb_type_ == RDB_TYPE_SET_WITH_EXPIRY) {\n      increment = 2;\n    }\n\n    bool values_expired = false;\n\n    for (size_t i = 0; i < ltrace->arr.size(); i += increment) {\n      string_view element = ToSV(ltrace->arr[i].rdb_var, &buf1_);\n\n      uint32_t ttl_sec = UINT32_MAX;\n      if (increment == 2) {\n        int64_t ttl_time = -1;\n        string_view ttl_str = ToSV(ltrace->arr[i + 1].rdb_var, &buf2_);\n        if (!absl::SimpleAtoi(ttl_str, &ttl_time)) {\n          LOG(ERROR) << \"Can't parse set TTL \" << ttl_str;\n          ec_ = RdbError(errc::rdb_file_corrupted);\n          return;\n        }\n\n        if (ttl_time != -1) {\n          if (ttl_time <= set->time_now()) {\n            values_expired = true;\n            continue;\n          }\n\n          ttl_sec = ttl_time - set->time_now();\n        }\n      }\n      if (!set->Add(element, ttl_sec)) {\n        LOG(ERROR) << \"Duplicate set members detected \" << absl::CHexEscape(element) << \" with TTL \"\n                   << ttl_sec << \" \" << rdb_type_ << \" \" << set->ExpirationUsed() << \" \"\n                   << config_.append;\n        ec_ = RdbError(errc::duplicate_key);\n        return;\n      }\n    }\n    if (set->Empty() && values_expired) {\n      ec_ = RdbError(errc::value_expired);\n    }\n  }\n\n  if (ec_)\n    return;\n\n  if (!config_.append) {\n    pv_->InitRobj(OBJ_SET, is_intset ? kEncodingIntSet : kEncodingStrMap2, inner_obj);\n  }\n  std::move(cleanup).Cancel();\n}\n\nvoid RdbLoaderBase::OpaqueObjLoader::CreateHMap(const LoadTrace* ltrace) {\n  size_t increment = 2;\n  if (rdb_type_ == RDB_TYPE_HASH_WITH_EXPIRY)\n    increment = 3;\n\n  size_t len = ltrace->arr.size() / increment;\n\n  /* Too many entries? Use a hash table right from the start. */\n  bool keep_lp = !config_.chunked && (len <= 64) && (rdb_type_ != RDB_TYPE_HASH_WITH_EXPIRY);\n\n  size_t lp_size = 0;\n  if (keep_lp) {\n    Iterate(*ltrace, [&](const LoadBlob& blob) {\n      size_t str_len = StrLen(blob.rdb_var);\n      lp_size += str_len;\n\n      if (str_len > server.max_map_field_len) {\n        keep_lp = false;\n        return false;\n      }\n      return true;\n    });\n  }\n\n  if (keep_lp) {\n    uint8_t* lp = lpNew(lp_size);\n\n    CHECK(ltrace->arr.size() % 2 == 0);\n    for (size_t i = 0; i < ltrace->arr.size(); i += 2) {\n      /* Add pair to listpack */\n      string_view sv = ToSV(ltrace->arr[i].rdb_var, &buf1_);\n      lp = lpAppend(lp, reinterpret_cast<const uint8_t*>(sv.data()), sv.size());\n\n      sv = ToSV(ltrace->arr[i + 1].rdb_var, &buf1_);\n      lp = lpAppend(lp, reinterpret_cast<const uint8_t*>(sv.data()), sv.size());\n    }\n\n    if (ec_) {\n      lpFree(lp);\n      return;\n    }\n\n    lp = lpShrinkToFit(lp);\n    pv_->InitRobj(OBJ_HASH, kEncodingListPack, lp);\n  } else {\n    StringMap* string_map;\n    if (config_.append) {\n      // Note we always use StringMap when the object is being streamed.\n      if (!EnsureObjEncoding(OBJ_HASH, kEncodingStrMap2)) {\n        return;\n      }\n\n      string_map = static_cast<StringMap*>(pv_->RObjPtr());\n    } else {\n      string_map = CompactObj::AllocateMR<StringMap>();\n      string_map->set_time(MemberTimeSeconds(GetCurrentTimeMs()));\n\n      // Expand the map up front to avoid rehashing.\n      string_map->Reserve((config_.reserve > len) ? config_.reserve : len);\n    }\n\n    auto cleanup = absl::MakeCleanup([&] {\n      if (!config_.append) {\n        CompactObj::DeleteMR<StringMap>(string_map);\n      }\n    });\n    bool values_expired = false;\n    for (size_t i = 0; i < ltrace->arr.size(); i += increment) {\n      string_view key = ToSV(ltrace->arr[i].rdb_var, &buf1_);\n      string_view val = ToSV(ltrace->arr[i + 1].rdb_var, &buf2_);\n\n      if (ec_)\n        return;\n\n      uint32_t ttl_sec = UINT32_MAX;\n      if (increment == 3) {\n        int64_t ttl_time = -1;\n        string_view ttl_str = ToSV(ltrace->arr[i + 2].rdb_var, &buf3_);\n        if (!absl::SimpleAtoi(ttl_str, &ttl_time)) {\n          LOG(ERROR) << \"Can't parse hashmap TTL for \" << key << \", ttl='\" << ttl_str\n                     << \"', val=\" << val;\n          ec_ = RdbError(errc::rdb_file_corrupted);\n          return;\n        }\n\n        if (ttl_time != -1) {\n          if (ttl_time <= string_map->time_now()) {\n            values_expired = true;\n            continue;\n          }\n\n          ttl_sec = ttl_time - string_map->time_now();\n        }\n      }\n\n      if (!string_map->AddOrSkip(key, val, ttl_sec)) {\n        LOG(ERROR) << \"Duplicate hash fields detected for field \" << key;\n        ec_ = RdbError(errc::rdb_file_corrupted);\n        return;\n      }\n    }\n    if (string_map->Empty() && values_expired) {\n      ec_ = RdbError(errc::value_expired);\n      return;\n    }\n    if (!config_.append) {\n      pv_->InitRobj(OBJ_HASH, kEncodingStrMap2, string_map);\n    }\n    std::move(cleanup).Cancel();\n  }\n}\n\nvoid RdbLoaderBase::OpaqueObjLoader::CreateList(const LoadTrace* ltrace) {\n  QList* qlv2 = nullptr;\n  if (config_.append) {\n    if (pv_->ObjType() != OBJ_LIST) {\n      ec_ = RdbError(errc::invalid_rdb_type);\n      return;\n    }\n    DCHECK_EQ(pv_->Encoding(), kEncodingQL2);\n    qlv2 = static_cast<QList*>(pv_->RObjPtr());\n  } else {\n    qlv2 = CompactObj::AllocateMR<QList>(GetFlag(FLAGS_list_max_listpack_size),\n                                         GetFlag(FLAGS_list_compress_depth));\n  }\n\n  auto cleanup = absl::Cleanup([&] {\n    if (!config_.append) {\n      CompactObj::DeleteMR<QList>(qlv2);\n    }\n  });\n\n  Iterate(*ltrace, [&](const LoadBlob& blob) {\n    unsigned container = blob.encoding;\n    string_view sv = ToSV(blob.rdb_var, &buf1_);\n\n    if (ec_)\n      return false;\n\n    uint8_t* lp = nullptr;\n    if (container == QUICKLIST_NODE_CONTAINER_PLAIN) {\n      lp = (uint8_t*)zmalloc(sv.size());\n      ::memcpy(lp, (uint8_t*)sv.data(), sv.size());\n      qlv2->AppendPlain(lp, sv.size());\n\n      return true;\n    }\n\n    if (rdb_type_ == RDB_TYPE_LIST_QUICKLIST_2) {\n      uint8_t* src = (uint8_t*)sv.data();\n      if (!lpValidateIntegrity(src, sv.size(), 0, nullptr, nullptr)) {\n        LOG(ERROR) << \"Listpack integrity check failed.\";\n        ec_ = RdbError(errc::rdb_file_corrupted);\n        return false;\n      }\n\n      if (lpLength(src) == 0) {\n        return true;\n      }\n\n      lp = (uint8_t*)zmalloc(sv.size());\n      ::memcpy(lp, src, sv.size());\n    } else {\n      lp = lpNew(sv.size());\n      if (!ziplistValidateIntegrity((uint8_t*)sv.data(), sv.size(), 1,\n                                    ziplistEntryConvertAndValidate, &lp)) {\n        LOG(ERROR) << \"Ziplist integrity check failed: \" << sv.size();\n        zfree(lp);\n        ec_ = RdbError(errc::rdb_file_corrupted);\n        return false;\n      }\n\n      /* Silently skip empty ziplists, if we'll end up with empty quicklist we'll fail later. */\n      if (lpLength(lp) == 0) {\n        zfree(lp);\n        return true;\n      }\n\n      lp = lpShrinkToFit(lp);\n    }\n\n    qlv2->AppendListpack(lp);\n    return true;\n  });\n\n  if (ec_)\n    return;\n  if (qlv2 && qlv2->Size() == 0) {\n    ec_ = RdbError(errc::empty_key);\n    return;\n  }\n\n  std::move(cleanup).Cancel();\n\n  if (!config_.append) {\n    // Try to convert to listpack if it's a single-node quicklist\n    if (uint8_t* lp = qlv2->TryExtractListpack()) {\n      CompactObj::DeleteMR<QList>(qlv2);\n      pv_->InitRobj(OBJ_LIST, kEncodingListPack, lp);\n    } else {\n      pv_->InitRobj(OBJ_LIST, kEncodingQL2, qlv2);\n    }\n  }\n}\n\nvoid RdbLoaderBase::OpaqueObjLoader::CreateZSet(const LoadTrace* ltrace) {\n  size_t zsetlen = ltrace->arr.size();\n\n  unsigned encoding = OBJ_ENCODING_SKIPLIST;\n  detail::SortedMap* zs;\n  if (config_.append) {\n    // Note we always use SortedMap when the object is being chunked.\n    if (!EnsureObjEncoding(OBJ_ZSET, OBJ_ENCODING_SKIPLIST)) {\n      return;\n    }\n\n    zs = static_cast<detail::SortedMap*>(pv_->RObjPtr());\n  } else {\n    zs = CompactObj::AllocateMR<detail::SortedMap>();\n\n    size_t reserve = (config_.reserve > zsetlen) ? config_.reserve : zsetlen;\n    if (reserve > 2 && !zs->Reserve(reserve)) {\n      LOG(ERROR) << \"OOM in dictTryExpand \" << zsetlen;\n      ec_ = RdbError(errc::out_of_memory);\n      return;\n    }\n  }\n\n  auto cleanup = absl::MakeCleanup([&] {\n    if (!config_.append) {\n      CompactObj::DeleteMR<detail::SortedMap>(zs);\n    }\n  });\n\n  size_t maxelelen = 0, totelelen = 0;\n\n  Iterate(*ltrace, [&](const LoadBlob& blob) {\n    string_view sv = ToSV(blob.rdb_var, &buf1_);\n\n    double score = blob.score;\n\n    /* Don't care about integer-encoded strings. */\n    if (sv.size() > maxelelen)\n      maxelelen = sv.size();\n    totelelen += sv.size();\n\n    if (!zs->InsertNew(score, sv)) {\n      LOG(ERROR) << \"Duplicate zset fields detected\";\n      ec_ = RdbError(errc::rdb_file_corrupted);\n      return false;\n    }\n\n    return true;\n  });\n\n  if (ec_)\n    return;\n\n  void* inner = zs;\n  if (!config_.chunked && zs->Size() <= ZSET_MAX_LISTPACK_ENTRIES &&\n      maxelelen <= ZSET_MAX_LISTPACK_VALUE && lpSafeToAdd(NULL, totelelen)) {\n    encoding = OBJ_ENCODING_LISTPACK;\n    inner = zs->ToListPack();\n    CompactObj::DeleteMR<detail::SortedMap>(zs);\n  }\n\n  std::move(cleanup).Cancel();\n\n  if (!config_.append) {\n    pv_->InitRobj(OBJ_ZSET, encoding, inner);\n  }\n}\n\nvoid RdbLoaderBase::OpaqueObjLoader::CreateStream(const LoadTrace* ltrace) {\n  stream* s;\n  StreamMemTracker mem_tracker;\n  if (config_.append) {\n    if (!EnsureObjEncoding(OBJ_STREAM, OBJ_ENCODING_STREAM)) {\n      return;\n    }\n\n    s = static_cast<stream*>(pv_->RObjPtr());\n  } else {\n    s = streamNew();\n  }\n\n  auto cleanup = absl::Cleanup([&] {\n    if (!config_.append) {\n      freeStream(s);\n    }\n  });\n\n  for (size_t i = 0; i < ltrace->arr.size(); i += 2) {\n    string_view nodekey = ToSV(ltrace->arr[i].rdb_var, &buf1_);\n    string_view data = ToSV(ltrace->arr[i + 1].rdb_var, &buf2_);\n\n    uint8_t* lp = (uint8_t*)data.data();\n\n    if (!StreamValidateListpackIntegrity(lp, data.size())) {\n      LOG(ERROR) << \"Stream listpack integrity check failed.\";\n      ec_ = RdbError(errc::rdb_file_corrupted);\n      return;\n    }\n    CHECK(lpFirst(lp) != NULL);\n    uint8_t* copy_lp = (uint8_t*)zmalloc(data.size());\n    ::memcpy(copy_lp, lp, data.size());\n    /* Insert the key in the radix tree. */\n    int retval =\n        raxTryInsert(s->rax, (unsigned char*)nodekey.data(), nodekey.size(), copy_lp, NULL);\n    if (!retval) {\n      zfree(copy_lp);\n      LOG(ERROR) << \"Listpack re-added with existing key\";\n      ec_ = RdbError(errc::rdb_file_corrupted);\n      return;\n    }\n  }\n\n  // We only load the stream metadata and consumer groups (stream_trace) on\n  // the final read (when reading the stream in increments). Therefore if\n  // stream_trace is null add the partial stream, then stream_trace will be\n  // loaded later.\n  if (!ltrace->stream_trace) {\n    if (!config_.append) {\n      pv_->InitRobj(OBJ_STREAM, OBJ_ENCODING_STREAM, s);\n    }\n    std::move(cleanup).Cancel();\n    return;\n  }\n\n  s->length = ltrace->stream_trace->stream_len;\n  CopyStreamId(ltrace->stream_trace->last_id, &s->last_id);\n  CopyStreamId(ltrace->stream_trace->first_id, &s->first_id);\n  CopyStreamId(ltrace->stream_trace->max_deleted_entry_id, &s->max_deleted_entry_id);\n  s->entries_added = ltrace->stream_trace->entries_added;\n\n  if (rdb_type_ == RDB_TYPE_STREAM_LISTPACKS) {\n    /* Since the rax is already loaded, we can find the first entry's\n     * ID. */\n    streamGetEdgeID(s, 1, 1, &s->first_id);\n  }\n\n  for (const auto& cg : ltrace->stream_trace->cgroup) {\n    streamCG* cgroup = nullptr;\n    {\n      string_view cgname = ToSV(cg.name, &buf1_);\n      streamID cg_id;\n      cg_id.ms = cg.ms;\n      cg_id.seq = cg.seq;\n\n      uint64_t entries_read = cg.entries_read;\n      if (rdb_type_ == RDB_TYPE_STREAM_LISTPACKS) {\n        entries_read = streamEstimateDistanceFromFirstEverEntry(s, &cg_id);\n      }\n\n      cgroup = streamCreateCG(s, cgname.data(), cgname.size(), &cg_id, entries_read);\n      if (cgroup == NULL) {\n        LOG(ERROR) << \"Duplicated consumer group name \" << cgname;\n        ec_ = RdbError(errc::duplicate_key);\n        return;\n      }\n    }\n    for (const auto& pel : cg.pel_arr) {\n      streamNACK* nack = reinterpret_cast<streamNACK*>(zmalloc(sizeof(*nack)));\n      nack->delivery_time = pel.delivery_time;\n      nack->delivery_count = pel.delivery_count;\n      nack->consumer = nullptr;\n\n      if (!raxTryInsert(cgroup->pel, const_cast<uint8_t*>(pel.rawid.data()), pel.rawid.size(), nack,\n                        NULL)) {\n        LOG(ERROR) << \"Duplicated global PEL entry loading stream consumer group\";\n        ec_ = RdbError(errc::duplicate_key);\n        streamFreeNACK(nack);\n        return;\n      }\n    }\n\n    for (const auto& cons : cg.cons_arr) {\n      streamConsumer* consumer = StreamCreateConsumer(\n          cgroup, ToSV(cons.name, &buf1_), cons.seen_time, SCC_NO_NOTIFY | SCC_NO_DIRTIFY);\n      if (!consumer) {\n        LOG(ERROR) << \"Duplicate stream consumer detected.\";\n        ec_ = RdbError(errc::duplicate_key);\n        return;\n      }\n\n      consumer->active_time = cons.active_time;\n      /* Create the PEL (pending entries list) about entries owned by this specific\n       * consumer. */\n      for (const auto& rawid : cons.nack_arr) {\n        uint8_t* ptr = const_cast<uint8_t*>(rawid.data());\n        streamNACK* nack = nullptr;\n        int fres = raxFind(cgroup->pel, ptr, rawid.size(), (void**)&nack);\n        if (fres == 0) {\n          LOG(ERROR) << \"Consumer entry not found in group global PEL\";\n          ec_ = RdbError(errc::rdb_file_corrupted);\n          return;\n        }\n\n        /* Set the NACK consumer, that was left to NULL when\n         * loading the global PEL. Then set the same shared\n         * NACK structure also in the consumer-specific PEL. */\n        nack->consumer = consumer;\n        if (!raxTryInsert(consumer->pel, ptr, rawid.size(), nack, NULL)) {\n          LOG(ERROR) << \"Duplicated consumer PEL entry loading a stream consumer group\";\n          streamFreeNACK(nack);\n          ec_ = RdbError(errc::duplicate_key);\n          return;\n        }\n      }\n    }\n  }\n\n  std::move(cleanup).Cancel();\n  if (!config_.append) {\n    pv_->InitRobj(OBJ_STREAM, OBJ_ENCODING_STREAM, s);\n  }\n  mem_tracker.UpdateStreamSize(*pv_);\n}\n\nvoid RdbLoaderBase::OpaqueObjLoader::HandleBlob(string_view blob) {\n  auto handle_load_result = [&](LoadBlobResult load_result) {\n    switch (load_result) {\n      case LoadBlobResult::kCorrupted:\n        LOG(ERROR) << \"Corrupted blob detected with size \" << blob.size() << \" for rdb type \"\n                   << rdb_type_;\n        ec_ = RdbError(errc::rdb_file_corrupted);\n        break;\n      case LoadBlobResult::kOutOfMemory:\n        LOG(ERROR) << \"OOM in LoadBlob \" << blob.size();\n        ec_ = RdbError(errc::out_of_memory);\n        break;\n      case LoadBlobResult::kEmpty:\n        ec_ = RdbError(errc::empty_key);\n        break;\n      default:\n        break;\n    }\n  };\n\n  if (rdb_type_ == RDB_TYPE_STRING) {\n    if (config_.append) {\n      pv_->AppendString(blob);\n    } else if (config_.reserve) {\n      pv_->ReserveString(config_.reserve);\n      pv_->AppendString(blob);\n    } else {\n      pv_->SetString(blob);\n    }\n    return;\n  }\n\n  if (rdb_type_ == RDB_TYPE_SET_INTSET || rdb_type_ == RDB_TYPE_SET_LISTPACK) {\n    LoadBlobResult load_result = rdb_type_ == RDB_TYPE_SET_INTSET\n                                     ? SetFamily::LoadIntSetBlob(blob, pv_)\n                                     : SetFamily::LoadLPSetBlob(blob, pv_);\n    handle_load_result(load_result);\n    return;\n  }\n\n  if (rdb_type_ == RDB_TYPE_HASH_ZIPLIST || rdb_type_ == RDB_TYPE_HASH_LISTPACK) {\n    LoadBlobResult load_result = rdb_type_ == RDB_TYPE_HASH_ZIPLIST\n                                     ? HSetFamily::LoadZiplistBlob(blob, pv_)\n                                     : HSetFamily::LoadListpackBlob(blob, pv_);\n    handle_load_result(load_result);\n    return;\n  }\n\n  if (rdb_type_ == RDB_TYPE_ZSET_ZIPLIST || rdb_type_ == RDB_TYPE_ZSET_LISTPACK) {\n    LoadBlobResult load_result = rdb_type_ == RDB_TYPE_ZSET_ZIPLIST\n                                     ? ZSetFamily::LoadZiplistBlob(blob, pv_)\n                                     : ZSetFamily::LoadListpackBlob(blob, pv_);\n    handle_load_result(load_result);\n    return;\n  } else if (rdb_type_ == RDB_TYPE_JSON) {\n    size_t start_size = static_cast<MiMemoryResource*>(CompactObj::memory_resource())->used();\n    {\n      if (auto json = ParseJsonUsingShardHeap(blob)) {\n        pv_->SetJson(std::move(*json));\n      } else {\n        LOG(INFO) << \"Invalid JSON string during rdb load of JSON object: \" << blob;\n        ec_ = RdbError(errc::bad_json_string);\n        return;\n      }\n    }\n    size_t end_size = static_cast<MiMemoryResource*>(CompactObj::memory_resource())->used();\n    DCHECK(end_size > start_size);\n    pv_->SetJsonSize(end_size - start_size);\n  } else {\n    LOG(FATAL) << \"Unsupported rdb type \" << rdb_type_;\n  }\n}\n\nstring_view RdbLoaderBase::OpaqueObjLoader::ToSV(const RdbVariant& obj, ScratchBuf* buf) {\n  if (holds_alternative<long long>(obj)) {\n    buf->resize(absl::numbers_internal::kFastToBufferSize);\n    auto val = get<long long>(obj);\n    char* next = absl::numbers_internal::FastIntToBuffer(val, buf->data());\n    return string_view{buf->data(), size_t(next - buf->data())};\n  }\n\n  const base::PODArray<char>* ch_arr = get_if<base::PODArray<char>>(&obj);\n  if (ch_arr) {\n    // pass non-null pointer to avoid UB with lp API.\n    return ch_arr->empty() ? \"\"sv : string_view{ch_arr->data(), ch_arr->size()};\n  }\n\n  const LzfString* lzf = get_if<LzfString>(&obj);\n  if (lzf) {\n    buf->resize(lzf->uncompressed_len);\n    if (lzf_decompress(lzf->compressed_blob.data(), lzf->compressed_blob.size(), buf->data(),\n                       lzf->uncompressed_len) == 0) {\n      LOG(ERROR) << \"Invalid LZF compressed string\";\n      ec_ = RdbError(errc::rdb_file_corrupted);\n      return {buf->data(), 0};  // important to return non-null pointer to avoid UB with lp API.\n    }\n    return {buf->data(), buf->size()};\n  }\n\n  LOG(FATAL) << \"Unexpected variant\";\n  return {};\n}\n\nbool RdbLoaderBase::OpaqueObjLoader::EnsureObjEncoding(CompactObjType type, unsigned encoding) {\n  if (pv_->ObjType() != type) {\n    LOG(DFATAL) << \"Invalid RDB type \" << pv_->ObjType() << \"; expected \" << type;\n    ec_ = RdbError(errc::invalid_rdb_type);\n    return false;\n  }\n  if (pv_->Encoding() != encoding) {\n    LOG(DFATAL) << \"Invalid encoding \" << pv_->Encoding() << \"; expected \" << encoding;\n    ec_ = RdbError(errc::invalid_encoding);\n    return false;\n  }\n\n  return true;\n}\n\nstd::error_code RdbLoaderBase::FetchBuf(size_t size, void* dest) {\n  if (size == 0)\n    return kOk;\n\n  uint8_t* next = (uint8_t*)dest;\n  size_t bytes_read;\n\n  size_t to_copy = std::min(mem_buf_->InputLen(), size);\n  DVLOG(3) << \"Copying \" << to_copy << \" bytes\";\n\n  ::memcpy(next, mem_buf_->InputBuffer().data(), to_copy);\n  mem_buf_->ConsumeInput(to_copy);\n  size -= to_copy;\n  if (size == 0)\n    return kOk;\n\n  next += to_copy;\n\n  if (size + bytes_read_ > source_limit_) {\n    LOG(ERROR) << \"Out of bound read \" << size + bytes_read_ << \" vs \" << source_limit_;\n\n    return RdbError(errc::rdb_file_corrupted);\n  }\n\n  if (size > 512) {  // Worth reading directly into next.\n    io::MutableBytes mb{next, size};\n\n    SET_OR_RETURN(src_->Read(mb), bytes_read);\n    if (bytes_read < size)\n      return RdbError(errc::rdb_file_corrupted);\n\n    bytes_read_ += bytes_read;\n    DCHECK_LE(bytes_read_, source_limit_);\n\n    return kOk;\n  }\n\n  io::MutableBytes mb = mem_buf_->AppendBuffer();\n\n  // Must be because mem_buf_ is be empty.\n  DCHECK_GT(mb.size(), size);\n\n  if (bytes_read_ + mb.size() > source_limit_) {\n    mb = mb.subspan(0, source_limit_ - bytes_read_);\n  }\n\n  SET_OR_RETURN(src_->ReadAtLeast(mb, size), bytes_read);\n\n  if (bytes_read < size)\n    return RdbError(errc::rdb_file_corrupted);\n  bytes_read_ += bytes_read;\n\n  DCHECK_LE(bytes_read_, source_limit_);\n\n  mem_buf_->CommitWrite(bytes_read);\n  ::memcpy(next, mem_buf_->InputBuffer().data(), size);\n  mem_buf_->ConsumeInput(size);\n\n  return kOk;\n}\n\nsize_t RdbLoaderBase::StrLen(const RdbVariant& tset) {\n  const base::PODArray<char>* arr = get_if<base::PODArray<char>>(&tset);\n  if (arr)\n    return arr->size();\n\n  if (holds_alternative<long long>(tset)) {\n    auto val = get<long long>(tset);\n    char buf[32];\n    char* next = absl::numbers_internal::FastIntToBuffer(val, buf);\n    return (next - buf);\n  }\n\n  const LzfString* lzf = get_if<LzfString>(&tset);\n  if (lzf)\n    return lzf->uncompressed_len;\n\n  LOG(DFATAL) << \"should not reach\";\n  return 0;\n}\n\nauto RdbLoaderBase::FetchGenericString() -> io::Result<string> {\n  bool isencoded;\n  size_t len;\n\n  SET_OR_UNEXPECT(LoadLen(&isencoded), len);\n\n  if (isencoded) {\n    switch (len) {\n      case RDB_ENC_INT8:\n      case RDB_ENC_INT16:\n      case RDB_ENC_INT32:\n        return FetchIntegerObject(len);\n      case RDB_ENC_LZF:\n        return FetchLzfStringObject();\n      default:\n        LOG(ERROR) << \"Unknown RDB string encoding len \" << len;\n        return Unexpected(errc::rdb_file_corrupted);\n    }\n  }\n\n  string res;\n\n  if (len > 0) {\n    res.resize(len);\n    error_code ec = FetchBuf(len, res.data());\n    if (ec) {\n      return make_unexpected(ec);\n    }\n  }\n\n  return res;\n}\n\nauto RdbLoaderBase::FetchLzfStringObject() -> io::Result<string> {\n  bool zerocopy_decompress = true;\n\n  const uint8_t* cbuf = NULL;\n  uint64_t clen, len;\n\n  SET_OR_UNEXPECT(LoadLen(NULL), clen);\n  SET_OR_UNEXPECT(LoadLen(NULL), len);\n\n  // TODO serialization and deserialization for data > 512 MB should be done via chunks\n  if (len <= clen || clen == 0) {\n    LOG(ERROR) << \"Bad compressed string\";\n    return Unexpected(rdb::rdb_file_corrupted);\n  }\n\n  if (mem_buf_->InputLen() >= clen) {\n    cbuf = mem_buf_->InputBuffer().data();\n  } else {\n    compr_buf_.resize(clen);\n    zerocopy_decompress = false;\n\n    /* Load the compressed representation and uncompress it to target. */\n    error_code ec = FetchBuf(clen, compr_buf_.data());\n    if (ec) {\n      return make_unexpected(ec);\n    }\n    cbuf = compr_buf_.data();\n  }\n\n  string res(len, 0);\n\n  if (lzf_decompress(cbuf, clen, res.data(), len) == 0) {\n    LOG(ERROR) << \"Invalid LZF compressed string\";\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n\n  // FetchBuf consumes the input but if we have not went through that path\n  // we need to consume now.\n  if (zerocopy_decompress)\n    mem_buf_->ConsumeInput(clen);\n\n  return res;\n}\n\nauto RdbLoaderBase::FetchIntegerObject(int enctype) -> io::Result<string> {\n  io::Result<long long> val = ReadIntObj(enctype);\n\n  if (!val.has_value()) {\n    return val.get_unexpected();\n  }\n\n  char buf[32];\n  absl::numbers_internal::FastIntToBuffer(*val, buf);\n\n  return string(buf);\n}\n\nio::Result<double> RdbLoaderBase::FetchBinaryDouble() {\n  union {\n    uint64_t val;\n    double d;\n  } u;\n\n  static_assert(sizeof(u) == sizeof(uint64_t));\n  auto ec = EnsureRead(8);\n  if (ec)\n    return make_unexpected(ec);\n\n  uint8_t buf[8];\n  mem_buf_->ReadAndConsume(8, buf);\n  u.val = base::LE::LoadT<uint64_t>(buf);\n  return u.d;\n}\n\nio::Result<double> RdbLoaderBase::FetchDouble() {\n  uint8_t len;\n\n  SET_OR_UNEXPECT(FetchInt<uint8_t>(), len);\n  constexpr double kInf = std::numeric_limits<double>::infinity();\n  switch (len) {\n    case 255:\n      return -kInf;\n    case 254:\n      return kInf;\n    case 253:\n      return std::numeric_limits<double>::quiet_NaN();\n    default:;\n  }\n  char buf[256];\n  error_code ec = FetchBuf(len, buf);\n  if (ec)\n    return make_unexpected(ec);\n  buf[len] = '\\0';\n  double val;\n  if (sscanf(buf, \"%lg\", &val) != 1)\n    return Unexpected(errc::rdb_file_corrupted);\n  return val;\n}\n\nauto RdbLoaderBase::ReadKey() -> io::Result<string> {\n  return FetchGenericString();\n}\n\nerror_code RdbLoaderBase::ReadObj(int rdbtype, OpaqueObj* dest) {\n  io::Result<OpaqueObj> iores;\n\n  switch (rdbtype) {\n    case RDB_TYPE_SET:\n    case RDB_TYPE_SET_WITH_EXPIRY:\n      iores = ReadSet(rdbtype);\n      break;\n    case RDB_TYPE_SET_INTSET:\n      iores = ReadIntSet();\n      break;\n    case RDB_TYPE_HASH_ZIPLIST:\n    case RDB_TYPE_HASH_LISTPACK:\n    case RDB_TYPE_ZSET_LISTPACK:\n    case RDB_TYPE_ZSET_ZIPLIST:\n    case RDB_TYPE_STRING:\n    case RDB_TYPE_JSON:\n      iores = ReadGeneric(rdbtype);\n      break;\n    case RDB_TYPE_HASH:\n    case RDB_TYPE_HASH_WITH_EXPIRY:\n      iores = ReadHMap(rdbtype);\n      break;\n    case RDB_TYPE_ZSET:\n    case RDB_TYPE_ZSET_2:\n      iores = ReadZSet(rdbtype);\n      break;\n    case RDB_TYPE_LIST_QUICKLIST:\n    case RDB_TYPE_LIST_QUICKLIST_2:\n      iores = ReadListQuicklist(rdbtype);\n      break;\n    case RDB_TYPE_STREAM_LISTPACKS:\n    case RDB_TYPE_STREAM_LISTPACKS_2:\n    case RDB_TYPE_STREAM_LISTPACKS_3:\n      iores = ReadStreams(rdbtype);\n      break;\n    case RDB_TYPE_SET_LISTPACK:\n      // We need to deal with protocol versions 9 and older because in these\n      // RDB_TYPE_JSON == 20. On newer versions > 9 we bumped up RDB_TYPE_JSON to 30\n      // because it overlapped with the new type RDB_TYPE_SET_LISTPACK\n      if (rdb_version_ < 10) {\n        // consider it RDB_TYPE_JSON_OLD (20)\n        iores = ReadGeneric(RDB_TYPE_JSON);\n      } else {\n        iores = ReadGeneric(rdbtype);\n      }\n      break;\n    case RDB_TYPE_MODULE_2:\n      iores = ReadRedisJson();\n      break;\n    case RDB_TYPE_SBF:\n      iores = ReadSBF();\n      break;\n    case RDB_TYPE_SBF2:\n      iores = ReadSBF2();\n      break;\n    case RDB_TYPE_CMS:\n      iores = ReadCMS();\n      break;\n    default:\n      LOG(ERROR) << \"Unsupported rdb type \" << rdbtype;\n\n      return RdbError(errc::invalid_encoding);\n  }\n\n  if (!iores)\n    return iores.error();\n  *dest = std::move(*iores);\n  return error_code{};\n}\n\nstatic const size_t kMaxStringSize = 200_KB;\n\nerror_code RdbLoaderBase::ReadStringObj(RdbVariant* dest, bool big_string_split) {\n  bool isencoded = false;\n  size_t len;\n  SET_OR_RETURN(LoadLen(&isencoded), len);\n\n  if (isencoded) {\n    switch (len) {\n      case RDB_ENC_INT8:\n      case RDB_ENC_INT16:\n      case RDB_ENC_INT32: {\n        io::Result<long long> io_int = ReadIntObj(len);\n        if (!io_int)\n          return io_int.error();\n        dest->emplace<long long>(*io_int);\n        return error_code{};\n      }\n      case RDB_ENC_LZF: {\n        io::Result<LzfString> lzf = ReadLzf();\n        if (!lzf)\n          return lzf.error();\n\n        dest->emplace<LzfString>(std::move(lzf.value()));\n        return error_code{};\n      }\n      default:\n        LOG(ERROR) << \"Unknown RDB string encoding \" << len;\n        return RdbError(errc::rdb_file_corrupted);\n    }\n  }\n\n  if (big_string_split && len > kMaxStringSize) {\n    pending_read_.remaining = len - kMaxStringSize;\n    pending_read_.reserve = len;\n    len = kMaxStringSize;\n  }\n\n  auto& blob = dest->emplace<base::PODArray<char>>();\n  blob.resize(len);\n  return FetchBuf(len, blob.data());\n}\n\nerror_code RdbLoaderBase::ReadRemainingString(RdbVariant* dest) {\n  size_t read_len = std::min(pending_read_.remaining, kMaxStringSize);\n  pending_read_.remaining = pending_read_.remaining - read_len;\n\n  auto& blob = dest->emplace<base::PODArray<char>>();\n  blob.resize(read_len);\n  return FetchBuf(read_len, blob.data());\n}\n\nio::Result<long long> RdbLoaderBase::ReadIntObj(int enctype) {\n  long long val;\n\n  if (enctype == RDB_ENC_INT8) {\n    SET_OR_UNEXPECT(FetchInt<int8_t>(), val);\n  } else if (enctype == RDB_ENC_INT16) {\n    SET_OR_UNEXPECT(FetchInt<int16_t>(), val);\n  } else if (enctype == RDB_ENC_INT32) {\n    SET_OR_UNEXPECT(FetchInt<int32_t>(), val);\n  } else {\n    return Unexpected(errc::invalid_encoding);\n  }\n  return val;\n}\n\nauto RdbLoaderBase::ReadLzf() -> io::Result<LzfString> {\n  uint64_t clen;\n  LzfString res;\n\n  SET_OR_UNEXPECT(LoadLen(NULL), clen);\n  SET_OR_UNEXPECT(LoadLen(NULL), res.uncompressed_len);\n\n  if (res.uncompressed_len > 1ULL << 29) {\n    LOG(ERROR) << \"Uncompressed length is too big \" << res.uncompressed_len;\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n\n  res.compressed_blob.resize(clen);\n  /* Load the compressed representation and uncompress it to target. */\n  error_code ec = FetchBuf(clen, res.compressed_blob.data());\n  if (ec) {\n    return make_unexpected(ec);\n  }\n\n  return res;\n}\n\nauto RdbLoaderBase::ReadSet(int rdbtype) -> io::Result<OpaqueObj> {\n  size_t len;\n  if (pending_read_.remaining > 0) {\n    len = pending_read_.remaining;\n  } else {\n    SET_OR_UNEXPECT(LoadLen(NULL), len);\n    if (rdbtype == RDB_TYPE_SET_WITH_EXPIRY) {\n      len *= 2;\n    }\n    pending_read_.reserve = len;\n  }\n\n  // Limit each read to kMaxBlobLen elements.\n  unique_ptr<LoadTrace> load_trace(new LoadTrace);\n  size_t n = std::min(len, kMaxBlobLen);\n  load_trace->arr.resize(n);\n  for (size_t i = 0; i < n; i++) {\n    error_code ec = ReadStringObj(&load_trace->arr[i].rdb_var);\n    if (ec) {\n      return make_unexpected(ec);\n    }\n  }\n\n  // If there are still unread elements, cache the number of remaining\n  // elements, or clear if the full object has been read.\n  if (len > n) {\n    pending_read_.remaining = len - n;\n  } else if (pending_read_.remaining > 0) {\n    pending_read_.remaining = 0;\n  }\n\n  return OpaqueObj{std::move(load_trace), rdbtype};\n}\n\nauto RdbLoaderBase::ReadIntSet() -> io::Result<OpaqueObj> {\n  RdbVariant obj;\n  error_code ec = ReadStringObj(&obj);\n  if (ec) {\n    return make_unexpected(ec);\n  }\n\n  const LzfString* lzf = get_if<LzfString>(&obj);\n  const base::PODArray<char>* arr = get_if<base::PODArray<char>>(&obj);\n\n  if (lzf) {\n    if (lzf->uncompressed_len == 0 || lzf->compressed_blob.empty())\n      return Unexpected(errc::rdb_file_corrupted);\n  } else if (arr) {\n    if (arr->empty())\n      return Unexpected(errc::rdb_file_corrupted);\n  } else {\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n\n  return OpaqueObj{std::move(obj), RDB_TYPE_SET_INTSET};\n}\n\nauto RdbLoaderBase::ReadGeneric(int rdbtype) -> io::Result<OpaqueObj> {\n  bool is_string_type = RDB_TYPE_STRING == rdbtype;\n  RdbVariant str_obj;\n  error_code ec;\n  if (pending_read_.remaining) {\n    ec = ReadRemainingString(&str_obj);\n  } else {\n    ec = ReadStringObj(&str_obj, is_string_type);\n  }\n  if (ec)\n    return make_unexpected(ec);\n\n  if (!is_string_type && StrLen(str_obj) == 0) {\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n\n  return OpaqueObj{std::move(str_obj), rdbtype};\n}\n\nauto RdbLoaderBase::ReadHMap(int rdbtype) -> io::Result<OpaqueObj> {\n  size_t len;\n  if (pending_read_.remaining > 0) {\n    len = pending_read_.remaining;\n  } else {\n    SET_OR_UNEXPECT(LoadLen(NULL), len);\n\n    if (rdbtype == RDB_TYPE_HASH) {\n      len *= 2;\n    } else {\n      DCHECK_EQ(rdbtype, RDB_TYPE_HASH_WITH_EXPIRY);\n      len *= 3;\n    }\n\n    pending_read_.reserve = len;\n  }\n\n  // Limit each read to kMaxBlobLen elements.\n  unique_ptr<LoadTrace> load_trace(new LoadTrace);\n  size_t n = std::min<size_t>(len, kMaxBlobLen);\n  load_trace->arr.resize(n);\n  for (size_t i = 0; i < n; ++i) {\n    error_code ec = ReadStringObj(&load_trace->arr[i].rdb_var);\n    if (ec)\n      return make_unexpected(ec);\n  }\n\n  // If there are still unread elements, cache the number of remaining\n  // elements, or clear if the full object has been read.\n  if (len > n) {\n    pending_read_.remaining = len - n;\n  } else if (pending_read_.remaining > 0) {\n    pending_read_.remaining = 0;\n  }\n\n  return OpaqueObj{std::move(load_trace), rdbtype};\n}\n\nauto RdbLoaderBase::ReadZSet(int rdbtype) -> io::Result<OpaqueObj> {\n  uint64_t zsetlen;\n  if (pending_read_.remaining > 0) {\n    zsetlen = pending_read_.remaining;\n  } else {\n    SET_OR_UNEXPECT(LoadLen(nullptr), zsetlen);\n    pending_read_.reserve = zsetlen;\n  }\n\n  if (zsetlen == 0)\n    return Unexpected(errc::empty_key);\n\n  double score;\n\n  // Limit each read to kMaxBlobLen elements.\n  unique_ptr<LoadTrace> load_trace(new LoadTrace);\n  size_t n = std::min<size_t>(zsetlen, kMaxBlobLen);\n  load_trace->arr.resize(n);\n  for (size_t i = 0; i < n; ++i) {\n    error_code ec = ReadStringObj(&load_trace->arr[i].rdb_var);\n    if (ec)\n      return make_unexpected(ec);\n    if (rdbtype == RDB_TYPE_ZSET_2) {\n      SET_OR_UNEXPECT(FetchBinaryDouble(), score);\n    } else {\n      SET_OR_UNEXPECT(FetchDouble(), score);\n    }\n    if (isnan(score)) {\n      LOG(ERROR) << \"Zset with NAN score detected\";\n      return Unexpected(errc::rdb_file_corrupted);\n    }\n    load_trace->arr[i].score = score;\n  }\n\n  // If there are still unread elements, cache the number of remaining\n  // elements, or clear if the full object has been read.\n  if (zsetlen > n) {\n    pending_read_.remaining = zsetlen - n;\n  } else if (pending_read_.remaining > 0) {\n    pending_read_.remaining = 0;\n  }\n\n  return OpaqueObj{std::move(load_trace), rdbtype};\n}\n\nauto RdbLoaderBase::ReadListQuicklist(int rdbtype) -> io::Result<OpaqueObj> {\n  size_t len;\n  if (pending_read_.remaining > 0) {\n    len = pending_read_.remaining;\n  } else {\n    SET_OR_UNEXPECT(LoadLen(NULL), len);\n    pending_read_.reserve = len;\n  }\n\n  if (len == 0)\n    return Unexpected(errc::empty_key);\n\n  unique_ptr<LoadTrace> load_trace(new LoadTrace);\n  // Lists pack multiple entries into each list node (8Kb by default),\n  // therefore using a smaller segment length than kMaxBlobLen.\n  size_t n = std::min<size_t>(len, 512);\n  load_trace->arr.resize(n);\n  for (size_t i = 0; i < n; ++i) {\n    uint64_t container = QUICKLIST_NODE_CONTAINER_PACKED;\n    if (rdbtype == RDB_TYPE_LIST_QUICKLIST_2) {\n      SET_OR_UNEXPECT(LoadLen(nullptr), container);\n\n      if (container != QUICKLIST_NODE_CONTAINER_PACKED &&\n          container != QUICKLIST_NODE_CONTAINER_PLAIN) {\n        LOG(ERROR) << \"Quicklist integrity check failed.\";\n        return Unexpected(errc::rdb_file_corrupted);\n      }\n    }\n\n    RdbVariant var;\n    error_code ec = ReadStringObj(&var);\n    if (ec)\n      return make_unexpected(ec);\n\n    if (StrLen(var) == 0) {\n      return Unexpected(errc::rdb_file_corrupted);\n    }\n    load_trace->arr[i].rdb_var = std::move(var);\n    load_trace->arr[i].encoding = container;\n  }\n\n  // If there are still unread elements, cache the number of remaining\n  // elements, or clear if the full object has been read.\n  if (len > n) {\n    pending_read_.remaining = len - n;\n  } else if (pending_read_.remaining > 0) {\n    pending_read_.remaining = 0;\n  }\n\n  return OpaqueObj{std::move(load_trace), rdbtype};\n}\n\nauto RdbLoaderBase::ReadStreams(int rdbtype) -> io::Result<OpaqueObj> {\n  size_t listpacks;\n  if (pending_read_.remaining > 0) {\n    listpacks = pending_read_.remaining;\n  } else {\n    SET_OR_UNEXPECT(LoadLen(NULL), listpacks);\n  }\n\n  unique_ptr<LoadTrace> load_trace(new LoadTrace);\n  // Streams pack multiple entries into each stream node (4Kb or 100\n  // entries), therefore using a smaller segment length than kMaxBlobLen.\n  size_t n = std::min<size_t>(listpacks, 512);\n  load_trace->arr.resize(n * 2);\n\n  error_code ec;\n  for (size_t i = 0; i < n; ++i) {\n    /* Get the master ID, the one we'll use as key of the radix tree\n     * node: the entries inside the listpack itself are delta-encoded\n     * relatively to this ID. */\n    RdbVariant stream_id, blob;\n    ec = ReadStringObj(&stream_id);\n    if (ec)\n      return make_unexpected(ec);\n    if (StrLen(stream_id) != sizeof(streamID)) {\n      LOG(ERROR) << \"Stream node key entry is not the size of a stream ID\";\n\n      return Unexpected(errc::rdb_file_corrupted);\n    }\n\n    ec = ReadStringObj(&blob);\n    if (ec)\n      return make_unexpected(ec);\n    if (StrLen(blob) == 0) {\n      LOG(ERROR) << \"Stream listpacks loading failed\";\n      return Unexpected(errc::rdb_file_corrupted);\n    }\n\n    load_trace->arr[2 * i].rdb_var = std::move(stream_id);\n    load_trace->arr[2 * i + 1].rdb_var = std::move(blob);\n  }\n\n  // If there are still unread elements, cache the number of remaining\n  // elements, or clear if the full object has been read.\n  //\n  // We only load the stream metadata and consumer groups in the final read,\n  // so if there are still unread elements return the partial stream.\n  if (listpacks > n) {\n    pending_read_.remaining = listpacks - n;\n    return OpaqueObj{std::move(load_trace), rdbtype};\n  }\n\n  pending_read_.remaining = 0;\n\n  // Load stream metadata.\n  load_trace->stream_trace.reset(new StreamTrace);\n\n  /* Load total number of items inside the stream. */\n  SET_OR_UNEXPECT(LoadLen(nullptr), load_trace->stream_trace->stream_len);\n\n  /* Load the last entry ID. */\n  SET_OR_UNEXPECT(LoadLen(nullptr), load_trace->stream_trace->last_id.ms);\n  SET_OR_UNEXPECT(LoadLen(nullptr), load_trace->stream_trace->last_id.seq);\n\n  if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_2) {\n    /* Load the first entry ID. */\n    SET_OR_UNEXPECT(LoadLen(nullptr), load_trace->stream_trace->first_id.ms);\n    SET_OR_UNEXPECT(LoadLen(nullptr), load_trace->stream_trace->first_id.seq);\n\n    /* Load the maximal deleted entry ID. */\n    SET_OR_UNEXPECT(LoadLen(nullptr), load_trace->stream_trace->max_deleted_entry_id.ms);\n    SET_OR_UNEXPECT(LoadLen(nullptr), load_trace->stream_trace->max_deleted_entry_id.seq);\n\n    /* Load the offset. */\n    SET_OR_UNEXPECT(LoadLen(nullptr), load_trace->stream_trace->entries_added);\n  } else {\n    /* During migration the offset can be initialized to the stream's\n     * length. At this point, we also don't care about tombstones\n     * because CG offsets will be later initialized as well. */\n    load_trace->stream_trace->entries_added = load_trace->stream_trace->stream_len;\n  }\n\n  /* Consumer groups loading */\n  uint64_t cgroups_count;\n  SET_OR_UNEXPECT(LoadLen(nullptr), cgroups_count);\n  load_trace->stream_trace->cgroup.resize(cgroups_count);\n\n  for (size_t i = 0; i < cgroups_count; ++i) {\n    auto& cgroup = load_trace->stream_trace->cgroup[i];\n    /* Get the consumer group name and ID. We can then create the\n     * consumer group ASAP and populate its structure as\n     * we read more data. */\n\n    // sds cgname;\n    RdbVariant cgname;\n    ec = ReadStringObj(&cgname);\n    if (ec)\n      return make_unexpected(ec);\n    cgroup.name = std::move(cgname);\n\n    SET_OR_UNEXPECT(LoadLen(nullptr), cgroup.ms);\n    SET_OR_UNEXPECT(LoadLen(nullptr), cgroup.seq);\n\n    cgroup.entries_read = 0;\n    if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_2) {\n      SET_OR_UNEXPECT(LoadLen(nullptr), cgroup.entries_read);\n    }\n\n    /* Load the global PEL for this consumer group, however we'll\n     * not yet populate the NACK structures with the message\n     * owner, since consumers for this group and their messages will\n     * be read as a next step. So for now leave them not resolved\n     * and later populate it. */\n    uint64_t pel_size;\n    SET_OR_UNEXPECT(LoadLen(nullptr), pel_size);\n\n    cgroup.pel_arr.resize(pel_size);\n\n    for (size_t j = 0; j < pel_size; ++j) {\n      auto& pel = cgroup.pel_arr[j];\n      error_code ec = FetchBuf(pel.rawid.size(), pel.rawid.data());\n      if (ec) {\n        LOG(ERROR) << \"Stream PEL ID loading failed.\";\n        return make_unexpected(ec);\n      }\n\n      SET_OR_UNEXPECT(FetchInt<int64_t>(), pel.delivery_time);\n      SET_OR_UNEXPECT(LoadLen(nullptr), pel.delivery_count);\n    }\n\n    /* Now that we loaded our global PEL, we need to load the\n     * consumers and their local PELs. */\n    uint64_t consumers_num;\n    SET_OR_UNEXPECT(LoadLen(nullptr), consumers_num);\n    cgroup.cons_arr.resize(consumers_num);\n\n    for (size_t j = 0; j < consumers_num; ++j) {\n      auto& consumer = cgroup.cons_arr[j];\n      ec = ReadStringObj(&consumer.name);\n      if (ec)\n        return make_unexpected(ec);\n\n      SET_OR_UNEXPECT(FetchInt<int64_t>(), consumer.seen_time);\n\n      if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_3) {\n        SET_OR_UNEXPECT(FetchInt<int64_t>(), consumer.active_time);\n      } else {\n        /* That's the best estimate we got */\n        consumer.active_time = consumer.seen_time;\n      }\n\n      /* Load the PEL about entries owned by this specific\n       * consumer. */\n      SET_OR_UNEXPECT(LoadLen(nullptr), pel_size);\n      consumer.nack_arr.resize(pel_size);\n      for (size_t k = 0; k < pel_size; ++k) {\n        auto& nack = consumer.nack_arr[k];\n        // unsigned char rawid[sizeof(streamID)];\n        error_code ec = FetchBuf(nack.size(), nack.data());\n        if (ec) {\n          LOG(ERROR) << \"Stream PEL ID loading failed.\";\n          return make_unexpected(ec);\n        }\n        /*streamNACK* nack = (streamNACK*)raxFind(cgroup->pel, rawid, sizeof(rawid));\n        if (nack == raxNotFound) {\n          LOG(ERROR) << \"Consumer entry not found in group global PEL\";\n          return Unexpected(errc::rdb_file_corrupted);\n        }*/\n\n        /* Set the NACK consumer, that was left to NULL when\n         * loading the global PEL. Then set the same shared\n         * NACK structure also in the consumer-specific PEL. */\n        /*\n        nack->consumer = consumer;\n        if (!raxTryInsert(consumer->pel, rawid, sizeof(rawid), nack, NULL)) {\n          LOG(ERROR) << \"Duplicated consumer PEL entry loading a stream consumer group\";\n          streamFreeNACK(nack);\n          return Unexpected(errc::duplicate_key);\n        }*/\n      }\n    }  // while (consumers_num)\n  }    // while (cgroup_num)\n\n  return OpaqueObj{std::move(load_trace), RDB_TYPE_STREAM_LISTPACKS};\n}\n\nauto RdbLoaderBase::ReadRedisJson() -> io::Result<OpaqueObj> {\n  auto json_magic_number = LoadLen(nullptr);\n  if (!json_magic_number) {\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n\n  constexpr string_view kJsonModule = \"ReJSON-RL\"sv;\n  string module_name = ModuleTypeName(*json_magic_number);\n  if (module_name != kJsonModule) {\n    LOG(ERROR) << \"Unsupported module: \" << module_name;\n    return Unexpected(errc::unsupported_operation);\n  }\n\n  int encver = *json_magic_number & 1023;\n  if (encver != 3) {\n    LOG(ERROR) << \"Unsupported ReJSON version: \" << encver;\n    return Unexpected(errc::unsupported_operation);\n  }\n\n  auto opcode = FetchInt<uint8_t>();\n  if (!opcode || *opcode != RDB_MODULE_OPCODE_STRING) {\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n\n  RdbVariant dest;\n  error_code ec = ReadStringObj(&dest);\n  if (ec) {\n    return make_unexpected(ec);\n  }\n\n  opcode = FetchInt<uint8_t>();\n  if (!opcode || *opcode != RDB_MODULE_OPCODE_EOF) {\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n\n  return OpaqueObj{std::move(dest), RDB_TYPE_JSON};\n}\n\nauto RdbLoaderBase::ReadSBFImpl(bool chunking) -> io::Result<OpaqueObj> {\n  RdbSBF res;\n  uint64_t options;\n  SET_OR_UNEXPECT(LoadLen(nullptr), options);\n  if (options != 0)\n    return Unexpected(errc::rdb_file_corrupted);\n  SET_OR_UNEXPECT(FetchBinaryDouble(), res.grow_factor);\n  SET_OR_UNEXPECT(FetchBinaryDouble(), res.fp_prob);\n  if (res.fp_prob <= 0 || res.fp_prob > 0.5) {\n    return Unexpected(errc::rdb_file_corrupted);\n  }\n  SET_OR_UNEXPECT(LoadLen(nullptr), res.prev_size);\n  SET_OR_UNEXPECT(LoadLen(nullptr), res.current_size);\n  SET_OR_UNEXPECT(LoadLen(nullptr), res.max_capacity);\n\n  unsigned num_filters = 0;\n  SET_OR_UNEXPECT(LoadLen(nullptr), num_filters);\n  auto is_power2 = [](size_t n) { return (n & (n - 1)) == 0; };\n\n  for (unsigned i = 0; i < num_filters; ++i) {\n    unsigned hash_cnt;\n    string filter_data;\n    SET_OR_UNEXPECT(LoadLen(nullptr), hash_cnt);\n\n    if (chunking) {\n      size_t total_size = 0;\n      SET_OR_UNEXPECT(LoadLen(nullptr), total_size);\n      if (total_size == 0) {\n        return Unexpected(errc::rdb_file_corrupted);\n      }\n\n      filter_data.resize(total_size);\n      size_t offset = 0;\n      while (offset < total_size) {\n        size_t chunk_size = 0;\n        SET_OR_UNEXPECT(LoadLen(nullptr), chunk_size);\n        if (chunk_size == 0 || chunk_size > total_size - offset) {\n          return Unexpected(errc::rdb_file_corrupted);\n        }\n        error_code ec = FetchBuf(chunk_size, filter_data.data() + offset);\n        if (ec) {\n          return make_unexpected(ec);\n        }\n\n        offset += chunk_size;\n      }\n    } else {\n      SET_OR_UNEXPECT(FetchGenericString(), filter_data);\n    }\n\n    size_t bit_len = filter_data.size() * 8;\n    if (!is_power2(bit_len)) {  // must be power of two\n      return Unexpected(errc::rdb_file_corrupted);\n    }\n    res.filters.emplace_back(hash_cnt, std::move(filter_data));\n  }\n  return OpaqueObj{std::move(res), RDB_TYPE_SBF};\n}\n\nauto RdbLoaderBase::ReadSBF() -> io::Result<OpaqueObj> {\n  return ReadSBFImpl(false);\n}\n\nauto RdbLoaderBase::ReadSBF2() -> io::Result<OpaqueObj> {\n  return ReadSBFImpl(true);\n}\n\nio::Result<RdbLoaderBase::OpaqueObj> RdbLoaderBase::ReadCMS() {\n  RdbCMS res;\n\n  SET_OR_UNEXPECT(LoadLen(nullptr), res.width);\n  SET_OR_UNEXPECT(LoadLen(nullptr), res.depth);\n  SET_OR_UNEXPECT(LoadLen(nullptr), res.total_incr_count);\n\n  const size_t num_counters = res.width * res.depth;\n  res.counters.resize(num_counters);\n  for (size_t i = 0; i < num_counters; ++i) {\n    uint64_t raw;\n    auto ec = FetchBuf(sizeof(raw), &raw);\n    if (ec)\n      return make_unexpected(ec);\n    res.counters[i] = static_cast<int64_t>(base::LE::LoadT<uint64_t>(&raw));\n  }\n\n  return OpaqueObj{std::move(res), RDB_TYPE_CMS};\n}\n\ntemplate <typename T> io::Result<T> RdbLoaderBase::FetchInt() {\n  auto ec = EnsureRead(sizeof(T));\n  if (ec)\n    return make_unexpected(ec);\n\n  char buf[16];\n  mem_buf_->ReadAndConsume(sizeof(T), buf);\n\n  return base::LE::LoadT<std::make_unsigned_t<T>>(buf);\n}\n\nio::Result<uint8_t> RdbLoaderBase::FetchType() {\n  return FetchInt<uint8_t>();\n}\n\n// -------------- RdbLoader   ----------------------------\n\nstruct RdbLoader::ObjSettings {\n  long long now;           // current epoch time in ms.\n  int64_t expiretime = 0;  // expire epoch time in ms\n  uint32_t mc_flags = 0;\n\n  bool has_expired = false;\n\n  bool is_sticky = false;\n  bool has_mc_flags = false;\n\n  void Reset() {\n    mc_flags = expiretime = 0;\n    has_expired = false;\n    is_sticky = false;\n    has_mc_flags = false;\n  }\n\n  void SetExpire(int64_t val) {\n    expiretime = val;\n    has_expired = (val <= now);\n  }\n\n  void SetMCFlags(uint32_t flags) {\n    has_mc_flags = true;\n    mc_flags = flags;\n  }\n\n  ObjSettings() = default;\n};\n\nRdbLoader::RdbLoader(Service* service, RdbLoadContext* load_context, std::string snapshot_id)\n    : service_{service},\n      load_context_(load_context),\n      snapshot_id_(std::move(snapshot_id)),\n      rdb_ignore_expiry_{GetFlag(FLAGS_rdb_ignore_expiry)},\n      deserialize_hnsw_index_{GetFlag(FLAGS_deserialize_hnsw_index)},\n      script_mgr_{service == nullptr ? nullptr : service->script_mgr()},\n      shard_buf_{shard_set->size()} {\n}\n\nRdbLoader::~RdbLoader() {\n  while (true) {\n    Item* item = item_queue_.Pop();\n    if (item == nullptr)\n      break;\n    delete item;\n  }\n\n  // Decommit local memory.\n  // We create an RdbLoader for each thread, so each one will Decommit for itself after\n  // full sync ends (since we explicitly reset the RdbLoader).\n  auto* tlocal = ServerState::tlocal();\n  tlocal->DecommitMemory(ServerState::kAllMemory);\n}\n\nerror_code RdbLoader::Load(io::Source* src) {\n  CHECK(!src_ && src);\n\n  is_tiered_enabled_ =\n      shard_set->Await(0, [] { return EngineShard::tlocal()->tiered_storage() != nullptr; });\n\n  absl::Time start = absl::Now();\n  src_ = src;\n\n  IoBuf::Bytes bytes = mem_buf_->AppendBuffer();\n  io::Result<size_t> read_sz = src_->ReadAtLeast(bytes, 9);\n  if (!read_sz)\n    return read_sz.error();\n\n  bytes_read_ = *read_sz;\n  if (bytes_read_ < 9) {\n    return RdbError(errc::wrong_signature);\n  }\n\n  mem_buf_->CommitWrite(bytes_read_);\n\n  {\n    auto cb = mem_buf_->InputBuffer();\n\n    if (memcmp(cb.data(), \"REDIS\", 5) != 0) {\n      VLOG(1) << \"Bad header: \" << absl::CHexEscape(facade::ToSV(cb));\n      return RdbError(errc::wrong_signature);\n    }\n\n    char buf[64] = {0};\n    ::memcpy(buf, cb.data() + 5, 4);\n\n    rdb_version_ = atoi(buf);\n    if (rdb_version_ < 5 || rdb_version_ > RDB_VERSION) {  // We accept starting from 5.\n      LOG(ERROR) << \"RDB Version \" << rdb_version_ << \" is not supported\";\n      return RdbError(errc::bad_version);\n    }\n\n    mem_buf_->ConsumeInput(9);\n  }\n\n  int type;\n\n  /* Key-specific attributes, set by opcodes before the key type. */\n  ObjSettings settings;\n  settings.now = GetCurrentTimeMs();\n  size_t keys_loaded = 0;\n\n  auto cleanup = absl::Cleanup([&] { FinishLoad(start, &keys_loaded); });\n\n  // Increment local one if it exists\n  if (EngineShard* es = EngineShard::tlocal(); es) {\n    GetCurrentDbSlice().IncrLoadInProgress();\n  }\n\n  while (!stop_early_.load(memory_order_relaxed)) {\n    if (pause_) {\n      ThisFiber::SleepFor(100ms);\n      continue;\n    }\n\n    /* Read type. */\n    SET_OR_RETURN(FetchType(), type);\n\n    DVLOG(3) << \"Opcode type: \" << type;\n\n    /* Handle special types. */\n    if (type == RDB_OPCODE_EXPIRETIME) {\n      LOG(ERROR) << \"opcode RDB_OPCODE_EXPIRETIME not supported\";\n\n      return RdbError(errc::invalid_encoding);\n    }\n\n    if (type == RDB_OPCODE_EXPIRETIME_MS) {\n      int64_t val;\n      /* EXPIRETIME_MS: milliseconds precision expire times introduced\n       * with RDB v3. Like EXPIRETIME but no with more precision. */\n      SET_OR_RETURN(FetchInt<int64_t>(), val);\n      if (!rdb_ignore_expiry_) {\n        settings.SetExpire(val);\n      }\n      continue; /* Read next opcode. */\n    }\n\n    if (type == RDB_OPCODE_DF_MASK) {\n      uint32_t mask;\n      SET_OR_RETURN(FetchInt<uint32_t>(), mask);\n      settings.is_sticky = mask & DF_MASK_FLAG_STICKY;\n      settings.has_mc_flags = mask & DF_MASK_FLAG_MC_FLAGS;\n      if (settings.has_mc_flags) {\n        SET_OR_RETURN(FetchInt<uint32_t>(), settings.mc_flags);\n      }\n      continue; /* Read next opcode. */\n    }\n\n    if (type == RDB_OPCODE_FREQ) {\n      /* FREQ: LFU frequency. */\n      FetchInt<uint8_t>();  // IGNORE\n      continue;             /* Read next opcode. */\n    }\n\n    if (type == RDB_OPCODE_IDLE) {\n      /* IDLE: LRU idle time. */\n      uint64_t idle;\n      SET_OR_RETURN(LoadLen(nullptr), idle);  // ignore\n      (void)idle;\n      continue; /* Read next opcode. */\n    }\n\n    if (type == RDB_OPCODE_EOF) {\n      /* EOF: End of file, exit the main loop. */\n      break;\n    }\n\n    if (type == RDB_OPCODE_FULLSYNC_END) {\n      VLOG(1) << \"Read RDB_OPCODE_FULLSYNC_END\";\n      RETURN_ON_ERR(EnsureRead(8));\n      mem_buf_->ConsumeInput(8);  // ignore 8 bytes\n\n      if (full_sync_cut_cb) {\n        FlushAllShards();  // Flush as the handler awakes post load handlers\n        full_sync_cut_cb();\n      }\n      continue;\n    }\n\n    if (type == RDB_OPCODE_JOURNAL_OFFSET) {\n      VLOG(1) << \"Read RDB_OPCODE_JOURNAL_OFFSET\";\n      uint64_t journal_offset;\n      SET_OR_RETURN(FetchInt<uint64_t>(), journal_offset);\n      VLOG(1) << \"Got offset \" << journal_offset;\n      journal_offset_ = journal_offset;\n      continue;\n    }\n\n    if (type == RDB_OPCODE_SELECTDB) {\n      unsigned dbid = 0;\n\n      /* SELECTDB: Select the specified database. */\n      SET_OR_RETURN(LoadLen(nullptr), dbid);\n\n      if (dbid > GetFlag(FLAGS_dbnum)) {\n        LOG(WARNING) << \"database id \" << dbid << \" exceeds dbnum limit. Try increasing the flag.\";\n\n        return RdbError(errc::bad_db_index);\n      }\n\n      DVLOG(2) << \"Select DB: \" << dbid;\n      for (unsigned i = 0; i < shard_set->size(); ++i) {\n        // we should flush pending items before switching dbid.\n        FlushShardAsync(i);\n\n        // Active database if not existed before.\n        shard_set->Add(i, [dbid] { GetCurrentDbSlice().ActivateDb(dbid); });\n      }\n\n      cur_db_index_ = dbid;\n      if (EngineShard::tlocal()) {  // because we sometimes create entries inline.\n        GetCurrentDbSlice().ActivateDb(dbid);\n      }\n      continue; /* Read next opcode. */\n    }\n\n    if (type == RDB_OPCODE_RESIZEDB) {\n      /* RESIZEDB: Hint about the size of the keys in the currently\n       * selected data base, in order to avoid useless rehashing. */\n      uint64_t db_size, expires_size;\n      SET_OR_RETURN(LoadLen(nullptr), db_size);\n      SET_OR_RETURN(LoadLen(nullptr), expires_size);\n\n      VLOG(1) << \"RESIZEDB: db_size=\" << db_size << \", expires_size=\" << expires_size;\n\n      // We do not use this information because it is not possible to easily preallocate\n      // dash tables based on this information. Moreover, number of shards can change\n      // between the original shard set and the loading server.\n      continue; /* Read next opcode. */\n    }\n\n    if (type == RDB_OPCODE_AUX) {\n      RETURN_ON_ERR(HandleAux());\n      continue; /* Read type again. */\n    }\n\n    if (type == RDB_OPCODE_MODULE_AUX) {\n      uint64_t module_id;\n      SET_OR_RETURN(LoadLen(nullptr), module_id);\n      string module_name = ModuleTypeName(module_id);\n\n      LOG(WARNING) << \"WARNING: Skipping data for module \" << module_name;\n      RETURN_ON_ERR(SkipModuleData());\n      continue;\n    }\n\n    if (type == RDB_OPCODE_COMPRESSED_ZSTD_BLOB_START ||\n        type == RDB_OPCODE_COMPRESSED_LZ4_BLOB_START) {\n      RETURN_ON_ERR(HandleCompressedBlob(type));\n      continue;\n    }\n\n    if (type == RDB_OPCODE_COMPRESSED_BLOB_END) {\n      RETURN_ON_ERR(HandleCompressedBlobFinish());\n      continue;\n    }\n\n    if (type == RDB_OPCODE_JOURNAL_BLOB) {\n      FlushAllShards();  // Always flush before applying incremental on top\n      RETURN_ON_ERR(HandleJournalBlob(service_));\n      continue;\n    }\n\n    if (type == RDB_OPCODE_SLOT_INFO) {\n      [[maybe_unused]] uint64_t slot_id;\n      SET_OR_RETURN(LoadLen(nullptr), slot_id);\n      [[maybe_unused]] uint64_t slot_size;\n      SET_OR_RETURN(LoadLen(nullptr), slot_size);\n      [[maybe_unused]] uint64_t expires_slot_size;\n      SET_OR_RETURN(LoadLen(nullptr), expires_slot_size);\n      continue;\n    }\n\n    if (type == RDB_OPCODE_VECTOR_INDEX) {\n      // HNSW vector index graph data.\n      // Binary format: [index_key, elements_number,\n      //   then for each node (little-endian):\n      //     internal_id (4 bytes), global_id (8 bytes), level (4 bytes),\n      //     for each level (0 to level): links_num (4 bytes) + links (4 bytes each)]\n      string index_key;\n      SET_OR_RETURN(FetchGenericString(), index_key);\n\n      uint64_t elements_number;\n      SET_OR_RETURN(LoadLen(nullptr), elements_number);\n\n      if (!deserialize_hnsw_index_) {\n        RETURN_ON_ERR(SkipVectorIndex(index_key, elements_number));\n      } else {\n        DCHECK_GT(shard_count_, 0u);\n        // Parse \"index_name:field_name\" from the composite key.\n        size_t colon_pos = index_key.rfind(':');\n        string_view index_name{index_key.data(),\n                               colon_pos != string::npos ? colon_pos : index_key.size()};\n        string_view field_name = colon_pos != string::npos\n                                     ? string_view{index_key.data() + colon_pos + 1}\n                                     : string_view{};\n\n        if (shard_count_ == shard_set->size()) {\n          // Same shard count: restore directly.\n          RETURN_ON_ERR(RestoreVectorIndex(index_key, index_name, field_name, elements_number));\n        } else {\n          // Different shard count: load nodes and defer restoration.\n          // Global_ids will be remapped in PerformPostLoad after all key mappings are collected.\n          PendingHnswNodes pending{std::string(index_name), std::string(field_name), {}};\n          RETURN_ON_ERR(LoadVectorIndexNodes(elements_number, &pending.nodes));\n          LOG(INFO) << \"Deferred HNSW index restore for \" << index_key << \" with \"\n                    << pending.nodes.size() << \" nodes (shard count mismatch: \" << shard_count_\n                    << \" vs \" << shard_set->size() << \")\";\n          load_context_->AddPendingHnswNodes(std::move(pending));\n        }\n      }\n      continue;\n    }\n\n    if (type == RDB_OPCODE_SHARD_DOC_INDEX) {\n      // Load ShardDocIndex key-to-DocId mapping\n      // Format: [shard_id, index_name, mapping_count, then for each mapping: key_string, doc_id]\n      PendingIndexMapping pim;\n      uint32_t shard_id;\n      SET_OR_RETURN(LoadLen(nullptr), shard_id);\n\n      SET_OR_RETURN(FetchGenericString(), pim.index_name);\n\n      uint64_t mapping_count;\n      SET_OR_RETURN(LoadLen(nullptr), mapping_count);\n      pim.mappings.reserve(mapping_count);\n\n      for (uint64_t i = 0; i < mapping_count; ++i) {\n        string key;\n        SET_OR_RETURN(FetchGenericString(), key);\n        uint64_t doc_id;\n        SET_OR_RETURN(LoadLen(nullptr), doc_id);\n        pim.mappings.emplace_back(std::move(key), static_cast<search::DocId>(doc_id));\n      }\n\n      if (!deserialize_hnsw_index_) {\n        continue;\n      }\n      DCHECK_GT(shard_count_, 0u);\n\n      VLOG(2) << \"Loaded index mapping for shard \" << shard_id << \" with \" << mapping_count\n              << \" entries\";\n\n      // Always store mappings. When shard counts differ, PerformPostLoad will redistribute\n      // keys to replica shards and remap global_ids accordingly.\n      load_context_->AddPendingIndexMapping(shard_id, std::move(pim));\n      continue;\n    }\n\n    if (!rdbIsObjectTypeDF(type)) {\n      LOG(ERROR) << \"Unrecognized rdb object type: \" << type;\n      LOG(ERROR) << \"Last iteration: \";\n      LOG(ERROR) << \"key loaded: \" << absl::CHexEscape(last_key_loaded_);\n      LOG(ERROR) << \"pending_read_.remaining: \" << pending_read_.remaining\n                 << \"\\npending_read_.reserve: \" << pending_read_.reserve;\n      // In case we encounter an error, it might worth peeking the InputBuffer()\n      return RdbError(errc::invalid_rdb_type);\n    }\n\n    ++keys_loaded;\n    RETURN_ON_ERR(LoadKeyValPair(type, &settings));\n    settings.Reset();\n  }  // main load loop\n\n  DVLOG(1) << \"RdbLoad loop finished\";\n\n  if (stop_early_) {\n    return *ec_;\n  }\n\n  /* Verify the checksum if RDB version is >= 5 */\n  RETURN_ON_ERR(VerifyChecksum());\n\n  return kOk;\n}\n\nvoid RdbLoader::FinishLoad(absl::Time start_time, size_t* keys_loaded) {\n  BlockingCounter bc(shard_set->size());\n  for (unsigned i = 0; i < shard_set->size(); ++i) {\n    // Flush the remaining items.\n    FlushShardAsync(i);\n\n    // Send sentinel callbacks to ensure that all previous messages have been processed.\n    shard_set->Add(i, [bc]() mutable { bc->Dec(); });\n  }\n  bc->Wait();  // wait for sentinels to report.\n  // Decrement local one if it exists\n  if (EngineShard* es = EngineShard::tlocal(); es) {\n    GetCurrentDbSlice().DecrLoadInProgress();\n  }\n\n  now_chunked_.clear();\n\n  absl::Duration dur = absl::Now() - start_time;\n  load_time_ = double(absl::ToInt64Milliseconds(dur)) / 1000;\n  keys_loaded_ = *keys_loaded;\n}\n\nstd::error_code RdbLoaderBase::EnsureRead(size_t min_sz) {\n  // In the flow of reading compressed data, we store the uncompressed data to in uncompressed\n  // buffer. When parsing entries we call ensure read with 9 bytes to read the length of\n  // key/value. If the key/value is very small (less than 9 bytes) the remainded data in\n  // uncompressed buffer might contain less than 9 bytes. We need to make sure that we dont read\n  // from sink to the uncompressed buffer and therefor in this flow we return here.\n  if (mem_buf_ != &origin_mem_buf_)\n    return std::error_code{};\n  if (mem_buf_->InputLen() >= min_sz)\n    return std::error_code{};\n  return EnsureReadInternal(min_sz);\n}\n\nerror_code RdbLoaderBase::EnsureReadInternal(size_t min_to_read) {\n  // We need to include what we already read inside Input buffer. Otherwise we might expect to read\n  // more than the minimum\n  const size_t min_sz = min_to_read - mem_buf_->InputLen();\n\n  auto out_buf = mem_buf_->AppendBuffer();\n  CHECK_GT(out_buf.size(), min_sz);\n\n  // If limit was applied we do not want to read more than needed\n  // important when reading from sockets.\n  if (bytes_read_ + out_buf.size() > source_limit_) {\n    out_buf = out_buf.subspan(0, source_limit_ - bytes_read_);\n  }\n\n  io::Result<size_t> res = src_->ReadAtLeast(out_buf, min_sz);\n  if (!res) {\n    VLOG(1) << \"Error reading from source: \" << res.error() << \" \" << min_sz << \" bytes\";\n    return res.error();\n  }\n  if (*res < min_sz)\n    return RdbError(errc::rdb_file_corrupted);\n  DVLOG(2) << \"EnsureRead \" << *res << \" bytes\";\n  bytes_read_ += *res;\n\n  DCHECK_LE(bytes_read_, source_limit_);\n  mem_buf_->CommitWrite(*res);\n\n  return kOk;\n}\n\nio::Result<uint64_t> RdbLoaderBase::LoadLen(bool* is_encoded) {\n  if (is_encoded)\n    *is_encoded = false;\n\n  // Every RDB file with rdbver >= 5 has 8-bytes checksum at the end,\n  // so we can ensure we have 9 bytes to read up until that point.\n  error_code ec = EnsureRead(9);\n  if (ec)\n    return make_unexpected(ec);\n\n  // Read integer meta info.\n  auto bytes = mem_buf_->InputBuffer();\n  PackedUIntMeta meta{bytes[0]};\n  bytes.remove_prefix(1);\n\n  // Read integer.\n  uint64_t res;\n  SET_OR_UNEXPECT(ReadPackedUInt(meta, bytes), res);\n\n  if (meta.Type() == RDB_ENCVAL && is_encoded)\n    *is_encoded = true;\n\n  mem_buf_->ConsumeInput(1 + meta.ByteSize());\n\n  return res;\n}\n\nerror_code RdbLoaderBase::AllocateDecompressOnce(int op_type) {\n  if (decompress_impl_) {\n    return {};\n  }\n\n  if (op_type == RDB_OPCODE_COMPRESSED_ZSTD_BLOB_START) {\n    decompress_impl_ = detail::DecompressImpl::CreateZstd();\n  } else if (op_type == RDB_OPCODE_COMPRESSED_LZ4_BLOB_START) {\n    decompress_impl_ = detail::DecompressImpl::CreateLZ4();\n  } else {\n    return RdbError(errc::unsupported_operation);\n  }\n  return {};\n}\n\nerror_code RdbLoaderBase::SkipModuleData() {\n  uint64_t opcode;\n  SET_OR_RETURN(LoadLen(nullptr), opcode);  // ignore field 'when_opcode'\n  if (opcode != RDB_MODULE_OPCODE_UINT)\n    return RdbError(errc::rdb_file_corrupted);\n  SET_OR_RETURN(LoadLen(nullptr), opcode);  // ignore field 'when'\n\n  while (true) {\n    SET_OR_RETURN(LoadLen(nullptr), opcode);\n\n    switch (opcode) {\n      case RDB_MODULE_OPCODE_EOF:\n        return kOk;  // Module data end\n\n      case RDB_MODULE_OPCODE_SINT:\n      case RDB_MODULE_OPCODE_UINT: {\n        [[maybe_unused]] uint64_t _;\n        SET_OR_RETURN(LoadLen(nullptr), _);\n        break;\n      }\n\n      case RDB_MODULE_OPCODE_STRING: {\n        RdbVariant dest;\n        error_code ec = ReadStringObj(&dest);\n        if (ec) {\n          return ec;\n        }\n        break;\n      }\n\n      case RDB_MODULE_OPCODE_DOUBLE: {\n        [[maybe_unused]] double _;\n        SET_OR_RETURN(FetchBinaryDouble(), _);\n        break;\n      }\n\n      default:\n        // TODO: handle RDB_MODULE_OPCODE_FLOAT\n        LOG(ERROR) << \"Unsupported module section: \" << opcode;\n        return RdbError(errc::rdb_file_corrupted);\n    }\n  }\n}\n\nerror_code RdbLoaderBase::HandleCompressedBlob(int op_type) {\n  DVLOG(2) << \"HandleCompressedBlob: \" << op_type;\n  RETURN_ON_ERR(AllocateDecompressOnce(op_type));\n\n  // Fetch uncompress blob\n  string res;\n  SET_OR_RETURN(FetchGenericString(), res);\n\n  // Decompress blob and switch membuf pointer\n  // Last type in the compressed blob is RDB_OPCODE_COMPRESSED_BLOB_END\n  // in which we will switch back to the origin membuf (HandleCompressedBlobFinish)\n  SET_OR_RETURN(decompress_impl_->Decompress(res), mem_buf_);\n\n  return kOk;\n}\n\nerror_code RdbLoaderBase::HandleCompressedBlobFinish() {\n  DVLOG(2) << \"HandleCompressedBlobFinish\";\n\n  CHECK_NE(&origin_mem_buf_, mem_buf_);\n  CHECK_EQ(mem_buf_->InputLen(), size_t(0));\n  mem_buf_ = &origin_mem_buf_;\n  return kOk;\n}\n\nerror_code RdbLoaderBase::HandleJournalBlob(Service* service) {\n  // Read the number of entries in the journal blob.\n  size_t num_entries;\n  bool _encoded;\n  SET_OR_RETURN(LoadLen(&_encoded), num_entries);\n\n  // Read the journal blob.\n  string journal_blob;\n  SET_OR_RETURN(FetchGenericString(), journal_blob);\n\n  io::BytesSource bs{io::Buffer(journal_blob)};\n  journal_reader_.SetSource(&bs);\n\n  // Parse and exectue in loop.\n  size_t done = 0;\n  JournalExecutor ex{service};\n  while (done < num_entries) {\n    journal::ParsedEntry entry;\n    auto ec = journal_reader_.ReadEntry(&entry);\n    if (ec)\n      return ec;\n\n    done++;\n\n    if (entry.cmd.empty()) {\n      if (entry.opcode == journal::Op::PING) {\n        continue;\n      }\n      return RdbError(errc::rdb_file_corrupted);\n    }\n\n    if (absl::EqualsIgnoreCase(entry.cmd[0], \"FLUSHALL\") ||\n        absl::EqualsIgnoreCase(entry.cmd[0], \"FLUSHDB\")) {\n      // Applying a flush* operation in the middle of a load can cause out-of-sync deletions of\n      // data that should not be deleted, see https://github.com/dragonflydb/dragonfly/issues/1231\n      // By returning an error we are effectively restarting the replication.\n      return RdbError(errc::unsupported_operation);\n    }\n\n    DVLOG(2) << \"Executing item: \" << entry.ToString();\n    ex.Execute(entry.dbid, entry.cmd);\n  }\n\n  return std::error_code{};\n}\n\nerror_code RdbLoader::HandleAux() {\n  /* AUX: generic string-string fields. Use to add state to RDB\n   * which is backward compatible. Implementations of RDB loading\n   * are required to skip AUX fields they don't understand.\n   *\n   * An AUX field is composed of two strings: key and value. */\n  string auxkey, auxval;\n\n  SET_OR_RETURN(FetchGenericString(), auxkey);\n  SET_OR_RETURN(FetchGenericString(), auxval);\n\n  if (!auxkey.empty() && auxkey[0] == '%') {\n    /* All the fields with a name staring with '%' are considered\n     * information fields and are logged at startup with a log\n     * level of NOTICE. */\n    LOG(INFO) << \"RDB '\" << auxkey << \"': \" << auxval;\n  } else if (auxkey == \"snapshot-id\") {\n    if (snapshot_id_.empty()) {\n      snapshot_id_ = auxval;\n    } else if (snapshot_id_ != auxval) {\n      return RdbError(errc::incorrect_snapshot_id);\n    }\n  } else if (auxkey == \"repl-stream-db\") {\n    // TODO\n  } else if (auxkey == \"repl-id\") {\n    // TODO\n  } else if (auxkey == \"repl-offset\") {\n    // TODO\n  } else if (auxkey == \"lua\") {\n    LoadScriptFromAux(std::move(auxval));\n  } else if (auxkey == \"redis-ver\") {\n    VLOG(1) << \"Loading RDB produced by Redis version \" << auxval;\n  } else if (auxkey == \"df-ver\") {\n    VLOG(1) << \"Loading RDB produced by Dragonfly version \" << auxval;\n  } else if (auxkey == \"ctime\") {\n    int64_t ctime;\n    if (absl::SimpleAtoi(auxval, &ctime)) {\n      time_t age = time(NULL) - ctime;\n      if (age < 0)\n        age = 0;\n      VLOG(1) << \"RDB age \" << strings::HumanReadableElapsedTime(age);\n    }\n  } else if (auxkey == \"used-mem\") {\n    int64_t usedmem;\n    if (absl::SimpleAtoi(auxval, &usedmem)) {\n      VLOG(1) << \"RDB memory usage when created \" << strings::HumanReadableNumBytes(usedmem);\n      // We allow 5% tolerance for snapshot used memory\n      if (usedmem > (max_memory_limit * 1.05)) {\n        if (IsClusterEnabled()) {\n          LOG(INFO) << \"Allowing to load a snapshot of size \" << usedmem\n                    << \", despite memory limit of \" << max_memory_limit << \" due to cluster mode\";\n        } else if (is_tiered_enabled_) {\n          LOG(INFO) << \"Allowing to load a snapshot of size \" << usedmem\n                    << \", despite memory limit of \" << max_memory_limit << \" due to tiered storage\";\n        } else {\n          LOG(WARNING) << \"Could not load snapshot - its used memory is \" << usedmem\n                       << \" but the limit is \" << max_memory_limit;\n          return RdbError(errc::out_of_memory);\n        }\n      }\n    }\n  } else if (auxkey == \"aof-preamble\") {\n    long long haspreamble;\n    if (absl::SimpleAtoi(auxval, &haspreamble) && haspreamble) {\n      VLOG(1) << \"RDB has an AOF tail\";\n    }\n  } else if (auxkey == \"redis-bits\") {\n    /* Just ignored. */\n  } else if (auxkey == \"search-index\") {\n    LoadSearchIndexDefFromAux(std::move(auxval));\n  } else if (auxkey == \"hnsw-index-metadata\") {\n    LoadHnswIndexMetadataFromAux(std::move(auxval));\n  } else if (auxkey == \"search-synonyms\") {\n    LoadSearchSynonymsFromAux(std::move(auxval));\n  } else if (auxkey == \"shard-count\") {\n    uint32_t shard_count;\n    if (absl::SimpleAtoi(auxval, &shard_count)) {\n      shard_count_ = shard_count;\n      load_context_->SetMasterShardCount(shard_count);\n    }\n  } else if (auxkey == \"shard-id\") {\n    uint32_t shard_id;\n    if (absl::SimpleAtoi(auxval, &shard_id)) {\n      shard_id_ = shard_id;\n    }\n  } else if (auxkey == \"table-mem\") {\n    size_t mem;\n    if (absl::SimpleAtoi(auxval, &mem)) {\n      table_used_memory_ = mem;\n    }\n  } else {\n    /* We ignore fields we don't understand, as by AUX field\n     * contract. */\n    LOG(WARNING) << \"Unrecognized RDB AUX field: '\" << auxkey << \"'\";\n  }\n\n  return kOk;\n}\n\nerror_code RdbLoader::VerifyChecksum() {\n  uint64_t expected;\n\n  SET_OR_RETURN(FetchInt<uint64_t>(), expected);\n\n  io::Bytes cur_buf = mem_buf_->InputBuffer();\n\n  VLOG(1) << \"VerifyChecksum: input buffer len \" << cur_buf.size() << \", expected \" << expected;\n\n  return kOk;\n}\n\nvoid RdbLoader::FlushShardAsync(ShardId sid) {\n  auto& out_buf = shard_buf_[sid];\n  if (out_buf.empty())\n    return;\n\n  auto cb = [indx = this->cur_db_index_, this, ib = std::move(out_buf)] {\n    auto& db_slice = GetCurrentDbSlice();\n\n    // Before we start loading, increment LoadInProgress.\n    // This is required because FlushShardAsync dispatches to multiple shards, and those shards\n    // might have not yet have their state (load in progress) incremented.\n    db_slice.IncrLoadInProgress();\n    this->LoadItemsBuffer(indx, ib);\n    db_slice.DecrLoadInProgress();\n  };\n\n  bool preempted = shard_set->Add(sid, std::move(cb));\n  VLOG_IF(2, preempted) << \"FlushShardAsync was throttled\";\n}\n\nvoid RdbLoader::FlushAllShards() {\n  for (ShardId i = 0; i < shard_set->size(); i++)\n    FlushShardAsync(i);\n}\n\nstd::error_code RdbLoaderBase::FromOpaque(const OpaqueObj& opaque, LoadConfig config,\n                                          PrimeValue* pv) {\n  OpaqueObjLoader visitor(opaque.rdb_type, pv, config);\n  std::visit(visitor, opaque.obj);\n\n  return visitor.ec();\n}\n\nvoid RdbLoaderBase::CopyStreamId(const StreamID& src, struct streamID* dest) {\n  dest->ms = src.ms;\n  dest->seq = src.seq;\n}\n\nvoid RdbLoader::CreateObjectOnShard(const DbContext& db_cntx, const Item* item, DbSlice* db_slice) {\n  PrimeValue pv;\n  PrimeValue* pv_ptr = &pv;\n  DbIndex db_ind = db_cntx.db_index;\n\n  auto error_msg = [](const auto* item, auto db_ind) {\n    return absl::StrCat(\"Found empty key: \", item->key, \" in DB \", db_ind, \" rdb_type \",\n                        item->val.rdb_type);\n  };\n\n  LoadConfig config_copy = item->load_config;\n  if (item->load_config.chunked && item->load_config.append) {\n    std::unique_lock lk{now_chunked_mu_};\n    if (auto it = now_chunked_.find(item->key); it != now_chunked_.end()) {\n      pv_ptr = it->second.get();\n    } else {\n      // Sets and hashes are deleted when all their entries are expired.\n      // If it's the case, set reset append flag and start from scratch.\n      bool key_is_not_expired = item->expire_ms == 0 || db_cntx.time_now_ms < item->expire_ms;\n      bool is_set_expiry_type = item->val.rdb_type == RDB_TYPE_HASH_WITH_EXPIRY ||\n                                item->val.rdb_type == RDB_TYPE_SET_WITH_EXPIRY;\n      if (!is_set_expiry_type && key_is_not_expired) {\n        LOG(ERROR) << \"Count not to find append key '\" << item->key << \"' in DB \" << db_ind;\n        return;\n      }\n      config_copy.append = false;\n    }\n  }\n\n  if (auto ec = FromOpaque(item->val, config_copy, pv_ptr); ec) {\n    if (ec.value() == errc::value_expired) {\n      // hmap and sset values can expire and we ok with it,\n      // so we don't set ec_ in this case\n      return;\n    }\n    ec_ = ec;\n    if (ec.value() == errc::empty_key) {\n      auto error = error_msg(item, db_ind);\n      if (RdbTypeAllowedEmpty(item->val.rdb_type)) {\n        LOG(WARNING) << error;\n      } else {\n        LOG(ERROR) << error;\n      }\n      return;\n    }\n    LOG(ERROR) << \"Could not load value for key '\" << absl::CHexEscape(item->key) << \"' in DB \"\n               << db_ind << \" \" << item->load_config.chunked << \" \" << item->load_config.append\n               << \" \" << item->val.rdb_type;\n    stop_early_ = true;\n    return;\n  }\n\n  if (item->load_config.chunked) {\n    std::unique_lock lk{now_chunked_mu_};\n    if (!now_chunked_.contains(item->key))\n      now_chunked_.emplace(item->key, make_unique<PrimeValue>(std::move(pv)));\n\n    if (!item->load_config.finalize)\n      return;\n\n    pv = std::move(*now_chunked_.extract(item->key).mapped());\n  }\n\n  // We need this extra check because we don't return empty_key\n  if (!pv.TagAllowsEmptyValue() && pv.Size() == 0) {\n    LOG(WARNING) << error_msg(item, db_ind);\n    return;\n  }\n\n  if (item->expire_ms > 0 && db_cntx.time_now_ms >= item->expire_ms) {\n    VLOG(2) << \"Expire key on load: \" << item->key;\n    return;\n  }\n\n  auto op_res = db_slice->AddOrUpdate(db_cntx, item->key, std::move(pv), item->expire_ms);\n  if (!op_res) {\n    LOG(ERROR) << \"OOM failed to add key '\" << item->key << \"' in DB \" << db_ind;\n    ec_ = RdbError(errc::out_of_memory);\n    stop_early_ = true;\n    return;\n  }\n\n  DbSlice::ItAndUpdater& updater = *op_res;\n  updater.it->first.SetSticky(item->is_sticky);\n  if (item->has_mc_flags) {\n    updater.it->second.SetFlag(true);\n    db_slice->SetMCFlag(db_cntx.db_index, updater.it->first, item->mc_flags);\n  }\n\n  if (!override_existing_keys_ && !updater.is_new) {\n    LOG(WARNING) << \"RDB has duplicated key '\" << item->key << \"' in DB \" << db_ind << \" of type \"\n                 << updater.it->second.ObjType();\n  }\n\n  if (auto* ts = db_slice->shard_owner()->tiered_storage(); ts) {\n    // Finalize the AutoUpdater before stashing. The stash callback may complete\n    // (e.g. during the SleepFor yield below) and transform the PrimeValue to external,\n    // changing MallocUsed(). If the AutoUpdater ran after that, it would compute a\n    // bogus negative memory delta and crash in AccountObjectMemory.\n    auto it = updater.it;\n    updater.post_updater.Run();\n    StashPrimeValue(db_cntx.db_index, item->key, &it->second, ts, nullptr);\n\n    // Block, if tiered storage is active, but can't keep up\n    while (db_slice->shard_owner()->ShouldThrottleForTiering())\n      ThisFiber::SleepFor(100us);\n  }\n}\n\nvoid RdbLoader::LoadItemsBuffer(DbIndex db_ind, const ItemsBuf& ib) {\n  EngineShard* es = EngineShard::tlocal();\n  DbContext db_cntx{&namespaces->GetDefaultNamespace(), db_ind, GetCurrentTimeMs()};\n  DbSlice& db_slice = db_cntx.GetDbSlice(es->shard_id());\n\n  DCHECK(!db_slice.IsCacheMode());\n\n  for (const auto* item : ib) {\n    CreateObjectOnShard(db_cntx, item, &db_slice);\n    if (stop_early_) {\n      return;\n    }\n  }\n\n  for (auto* item : ib) {\n    item_queue_.Push(item);\n  }\n}\n\n// Loads the next key/val pair.\n//\n// Huge objects may be loaded in parts, where only a subset of elements are\n// loaded at a time. This reduces the memory required to load huge objects and\n// prevents LoadItemsBuffer blocking.\nerror_code RdbLoader::LoadKeyValPair(int type, ObjSettings* settings) {\n  std::string key;\n  int64_t start = absl::GetCurrentTimeNanos();\n\n  SET_OR_RETURN(ReadKey(), key);\n  last_key_loaded_ = key;\n\n  bool dry_run = absl::GetFlag(FLAGS_rdb_load_dry_run);\n  bool streamed = false;\n  do {\n    // If there is a cached Item in the free pool, take it, otherwise allocate\n    // a new Item (LoadItemsBuffer returns free items).\n    Item* item = item_queue_.Pop();\n    if (item == nullptr) {\n      item = new Item;\n    }\n    // Delete the item if we fail to load the key/val pair.\n    auto cleanup = absl::Cleanup([item] { delete item; });\n\n    item->load_config.append = pending_read_.remaining > 0;\n\n    error_code ec = ReadObj(type, &item->val);\n    if (ec) {\n      VLOG(2) << \"ReadObj error \" << ec << \" for key \" << key;\n      return ec;\n    }\n\n    // If the key can be discarded, we must still continue to read the\n    // object from the RDB so we can read the next key.\n    if (ShouldDiscardKey(key, *settings)) {\n      pending_read_.reserve = 0;\n      continue;\n    }\n\n    if (dry_run)\n      continue;\n\n    item->load_config.finalize = pending_read_.remaining == 0;\n    if (!item->load_config.finalize) {\n      item->key = key;\n      streamed = true;\n    } else {\n      // Avoid copying the key if this is the last read of the object.\n      item->key = std::move(key);\n    }\n\n    item->load_config.chunked = streamed;\n    item->load_config.reserve = pending_read_.reserve;\n    // Clear 'reserve' as we must only set when the object is first\n    // initialized.\n    pending_read_.reserve = 0;\n\n    item->is_sticky = settings->is_sticky;\n    item->has_mc_flags = settings->has_mc_flags;\n    item->mc_flags = settings->mc_flags;\n    item->expire_ms = settings->expiretime;\n\n    std::move(cleanup).Cancel();\n    ShardId sid = Shard(item->key, shard_set->size());\n    EngineShard* es = EngineShard::tlocal();\n\n    if (es && es->shard_id() == sid) {\n      DbContext db_cntx{&namespaces->GetDefaultNamespace(), cur_db_index_, GetCurrentTimeMs()};\n      CreateObjectOnShard(db_cntx, item, &db_cntx.GetDbSlice(sid));\n      item_queue_.Push(item);\n    } else {\n      auto& out_buf = shard_buf_[sid];\n\n      out_buf.emplace_back(item);\n\n      constexpr size_t kBufSize = 64;\n      if (out_buf.size() >= kBufSize) {\n        // Despite being async, this function can block if the shard queue is full.\n        FlushShardAsync(sid);\n      }\n    }\n  } while (pending_read_.remaining > 0 && !stop_early_.load(memory_order_relaxed));\n\n  int delta_ms = (absl::GetCurrentTimeNanos() - start) / 1000'000;\n  LOG_IF(INFO, delta_ms > 1000) << \"Took \" << delta_ms << \" ms to load rdb_type \" << type;\n\n  return kOk;\n}\n\nbool RdbLoader::ShouldDiscardKey(std::string_view key, const ObjSettings& settings) const {\n  if (!load_unowned_slots_ && IsClusterEnabled()) {\n    const auto cluster_config = cluster::ClusterConfig::Current();\n    if (cluster_config && !cluster_config->IsMySlot(key)) {\n      return true;\n    }\n  }\n\n  /* Check if the key already expired. This function is used when loading\n   * an RDB file from disk, either at startup, or when an RDB was\n   * received from the master. In the latter case, the master is\n   * responsible for key expiry. If we would expire keys here, the\n   * snapshot taken by the master may not be reflected on the slave.\n   * Similarly if the RDB is the preamble of an AOF file, we want to\n   * load all the keys as they are, since the log of operations later\n   * assume to work in an exact keyspace state. */\n  if (ServerState::tlocal()->is_master && (settings.has_expired)) {\n    VLOG(3) << \"Expire key on read: \" << key;\n    return true;\n  }\n\n  return false;\n}\n\nvoid RdbLoader::LoadScriptFromAux(string&& body) {\n  ServerState* ss = ServerState::tlocal();\n  auto interpreter = ss->BorrowInterpreter();\n  absl::Cleanup clean = [ss, interpreter] { ss->ReturnInterpreter(interpreter); };\n\n  if (script_mgr_) {\n    auto res = script_mgr_->Insert(body, interpreter);\n    if (!res)\n      LOG(ERROR) << \"Error compiling script\";\n  }\n}\n\nvoid RdbLoader::LoadSearchIndexDefFromAux(string&& def) {\n  LoadSearchCommandFromAux(service_, std::move(def), \"FT.CREATE\", \"index definition\", true);\n}\n\nvoid RdbLoader::LoadHnswIndexMetadataFromAux(string&& def) {\n  try {\n    auto json_opt = JsonFromString(def);\n    if (!json_opt) {\n      LOG(ERROR) << \"Invalid HNSW index metadata JSON: \" << def;\n      return;\n    }\n    const auto& json = *json_opt;\n\n    PendingHnswMetadata phm;\n    phm.index_name = json[\"index_name\"].as<string>();\n    phm.field_name = json[\"field_name\"].as<string>();\n    phm.metadata.max_elements = json[\"max_elements\"].as<size_t>();\n    phm.metadata.cur_element_count = json[\"cur_element_count\"].as<size_t>();\n    phm.metadata.maxlevel = json[\"maxlevel\"].as<int>();\n    phm.metadata.enterpoint_node = json[\"enterpoint_node\"].as<size_t>();\n\n    LOG(INFO) << \"Loaded HNSW metadata for index=\" << phm.index_name << \" field=\" << phm.field_name\n              << \" elements=\" << phm.metadata.cur_element_count;\n\n    load_context_->AddPendingHnswMetadata(std::move(phm));\n  } catch (const std::exception& e) {\n    LOG(ERROR) << \"Failed to parse HNSW index metadata JSON: \" << e.what() << \" def: \" << def;\n  }\n}\n\nerror_code RdbLoader::LoadVectorIndexNodes(uint64_t elements_number,\n                                           std::vector<search::HnswNodeData>* nodes) {\n  nodes->reserve(elements_number);\n  for (uint64_t elem = 0; elem < elements_number; ++elem) {\n    search::HnswNodeData node;\n    SET_OR_RETURN(FetchInt<uint32_t>(), node.internal_id);\n    SET_OR_RETURN(FetchInt<uint64_t>(), node.global_id);\n    uint32_t raw_level;\n    SET_OR_RETURN(FetchInt<uint32_t>(), raw_level);\n    node.level = static_cast<int>(raw_level);\n\n    node.levels_links.resize(node.level + 1);\n    for (int lvl = 0; lvl <= node.level; ++lvl) {\n      uint32_t links_num;\n      SET_OR_RETURN(FetchInt<uint32_t>(), links_num);\n      node.levels_links[lvl].resize(links_num);\n      for (uint32_t i = 0; i < links_num; ++i) {\n        SET_OR_RETURN(FetchInt<uint32_t>(), node.levels_links[lvl][i]);\n      }\n    }\n    nodes->push_back(std::move(node));\n  }\n  return {};\n}\n\nerror_code RdbLoader::RestoreVectorIndex(string_view index_key, string_view index_name,\n                                         string_view field_name, uint64_t elements_number) {\n#ifdef WITH_SEARCH\n  // Look up the HNSW index in the global registry. It should exist from FT.CREATE in aux.\n  auto hnsw_index = GlobalHnswIndexRegistry::Instance().Get(index_name, field_name);\n  if (!hnsw_index) {\n    LOG(ERROR) << \"HNSW index not found for restoration: \" << index_key;\n    return SkipVectorIndex(index_key, elements_number);\n  }\n\n  std::vector<search::HnswNodeData> nodes;\n  RETURN_ON_ERR(LoadVectorIndexNodes(elements_number, &nodes));\n\n  if (!nodes.empty()) {\n    auto metadata = load_context_->FindHnswMetadata(index_name, field_name);\n    DCHECK(metadata) << \"HNSW metadata missing for \" << index_key;\n\n    hnsw_index->RestoreFromNodes(nodes, *metadata);\n    LOG(INFO) << \"Restored HNSW index \" << index_key << \" with \" << nodes.size() << \" nodes\";\n  }\n  return {};\n#else\n  return SkipVectorIndex(index_key, elements_number);\n#endif\n}\n\nerror_code RdbLoader::SkipVectorIndex(string_view index_key, uint64_t elements_number) {\n  for (uint64_t elem = 0; elem < elements_number; ++elem) {\n    SET_OR_RETURN(FetchInt<uint32_t>(), std::ignore);  // internal_id\n    SET_OR_RETURN(FetchInt<uint64_t>(), std::ignore);  // global_id\n    uint32_t raw_level;\n    SET_OR_RETURN(FetchInt<uint32_t>(), raw_level);\n    int level = static_cast<int>(raw_level);\n\n    for (int lvl = 0; lvl <= level; ++lvl) {\n      uint32_t links_num;\n      SET_OR_RETURN(FetchInt<uint32_t>(), links_num);\n      for (uint32_t i = 0; i < links_num; ++i) {\n        SET_OR_RETURN(FetchInt<uint32_t>(), std::ignore);\n      }\n    }\n  }\n\n  if (elements_number > 0) {\n    LOG(INFO) << \"Skipping HNSW vector index restore: \" << index_key\n              << \" elements_number=\" << elements_number << \" shard_count_=\" << shard_count_\n              << \" current_shards=\" << shard_set->size() << \". Index will be rebuilt from data.\";\n  }\n  return {};\n}\n\nvoid RdbLoader::LoadSearchSynonymsFromAux(string&& def) {\n  load_context_->AddPendingSynonymCommand(std::move(def));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/rdb_load.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <system_error>\n\nextern \"C\" {\n#include \"redis/rdb.h\"\n}\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n\n#include \"base/mpsc_intrusive_queue.h\"\n#include \"base/pod_array.h\"\n#include \"core/search/base.h\"\n#include \"core/search/hnsw_index.h\"\n#include \"io/io.h\"\n#include \"io/io_buf.h\"\n#include \"server/detail/decompress.h\"\n#include \"server/execution_state.h\"\n#include \"server/journal/serializer.h\"\n#include \"server/rdb_load_context.h\"\n\nstruct streamID;\n\nnamespace dfly {\n\nclass EngineShardSet;\nclass ScriptMgr;\nclass CompactObj;\nclass Service;\n\nusing RdbVersion = std::uint16_t;\n\nclass RdbLoaderBase {\n protected:\n  RdbLoaderBase();\n  ~RdbLoaderBase();\n\n  struct LoadTrace;\n  using MutableBytes = ::io::MutableBytes;\n\n  struct LzfString {\n    base::PODArray<uint8_t> compressed_blob;\n    uint64_t uncompressed_len;\n  };\n\n  struct RdbSBF {\n    double grow_factor, fp_prob;\n    size_t prev_size, current_size;\n    size_t max_capacity;\n\n    struct Filter {\n      unsigned hash_cnt;\n      std::string blob;\n      Filter(unsigned h, std::string b) : hash_cnt(h), blob(std::move(b)) {\n      }\n    };\n    std::vector<Filter> filters;\n  };\n\n  struct RdbCMS {\n    uint32_t width, depth;\n    int64_t total_incr_count;\n    std::vector<int64_t> counters;\n  };\n\n  using RdbVariant = std::variant<long long, base::PODArray<char>, LzfString,\n                                  std::unique_ptr<LoadTrace>, RdbSBF, RdbCMS>;\n\n  struct OpaqueObj {\n    RdbVariant obj;\n    int rdb_type{0};\n  };\n\n  struct LoadBlob {\n    RdbVariant rdb_var;\n    union {\n      unsigned encoding;\n      double score;\n    };\n  };\n\n  struct StreamPelTrace {\n    std::array<uint8_t, 16> rawid;\n    int64_t delivery_time;\n    uint64_t delivery_count;\n  };\n\n  struct StreamConsumerTrace {\n    RdbVariant name;\n    int64_t seen_time;\n    int64_t active_time;\n    std::vector<std::array<uint8_t, 16>> nack_arr;\n  };\n\n  struct StreamID {\n    uint64_t ms = 0;\n    uint64_t seq = 0;\n  };\n\n  struct StreamCGTrace {\n    RdbVariant name;\n    uint64_t ms;\n    uint64_t seq;\n    uint64_t entries_read;\n    std::vector<StreamPelTrace> pel_arr;\n    std::vector<StreamConsumerTrace> cons_arr;\n  };\n\n  struct StreamTrace {\n    size_t lp_len;\n    size_t stream_len;\n    StreamID last_id;\n    StreamID first_id;             /* The first non-tombstone entry, zero if empty. */\n    StreamID max_deleted_entry_id; /* The maximal ID that was deleted. */\n    uint64_t entries_added = 0;    /* All time count of elements added. */\n    std::vector<StreamCGTrace> cgroup;\n  };\n\n  struct LoadTrace {\n    std::vector<LoadBlob> arr;\n    std::unique_ptr<StreamTrace> stream_trace;\n  };\n\n  // Contains the state of a pending partial read.\n  //\n  // This us used to load huge objects in parts (only loading a subset of\n  // elements at a time) (see LoadKeyValPair).\n  struct PendingRead {\n    // Number of elements in the object to reserve.\n    //\n    // Used to reserve the elements in a huge object up front, then append\n    // in next loads.\n    size_t reserve = 0;\n\n    // Number of elements remaining in the object.\n    size_t remaining = 0;\n  };\n\n  struct LoadConfig {\n    bool chunked = false;   // Big value streamed incrementally\n    size_t reserve = 0;     // Number of elements to reserve to optimize big value load\n    bool append = false;    // Append chunk to existing object\n    bool finalize = false;  // Last portion of chunked stream, finalize object\n  };\n\n  class OpaqueObjLoader;\n\n  io::Result<uint8_t> FetchType();\n\n  template <typename T> io::Result<T> FetchInt();\n\n  static std::error_code FromOpaque(const OpaqueObj& opaque, LoadConfig config, PrimeValue* pv);\n\n  io::Result<uint64_t> LoadLen(bool* is_encoded);\n  std::error_code FetchBuf(size_t size, void* dest);\n\n  io::Result<std::string> FetchGenericString();\n  io::Result<std::string> FetchLzfStringObject();\n  io::Result<std::string> FetchIntegerObject(int enctype);\n\n  io::Result<double> FetchBinaryDouble();\n  io::Result<double> FetchDouble();\n\n  ::io::Result<std::string> ReadKey();\n\n  std::error_code ReadObj(int rdbtype, OpaqueObj* dest);\n  std::error_code ReadStringObj(RdbVariant* rdb_variant, bool big_string_split = false);\n  std::error_code ReadRemainingString(RdbVariant* dest);\n  ::io::Result<long long> ReadIntObj(int encoding);\n  ::io::Result<LzfString> ReadLzf();\n\n  ::io::Result<OpaqueObj> ReadSet(int rdbtype);\n  ::io::Result<OpaqueObj> ReadIntSet();\n  ::io::Result<OpaqueObj> ReadGeneric(int rdbtype);\n  ::io::Result<OpaqueObj> ReadHMap(int rdbtype);\n  ::io::Result<OpaqueObj> ReadZSet(int rdbtype);\n  ::io::Result<OpaqueObj> ReadListQuicklist(int rdbtype);\n  ::io::Result<OpaqueObj> ReadStreams(int rdbtype);\n  ::io::Result<OpaqueObj> ReadRedisJson();\n  ::io::Result<OpaqueObj> ReadSBFImpl(bool chunking);\n  ::io::Result<OpaqueObj> ReadSBF();\n  ::io::Result<OpaqueObj> ReadSBF2();\n  ::io::Result<OpaqueObj> ReadCMS();\n\n  std::error_code SkipModuleData();\n  std::error_code HandleCompressedBlob(int op_type);\n  std::error_code HandleCompressedBlobFinish();\n  std::error_code AllocateDecompressOnce(int op_type);\n\n  std::error_code HandleJournalBlob(Service* service);\n\n  static size_t StrLen(const RdbVariant& tset);\n\n  std::error_code EnsureRead(size_t min_sz);\n\n  std::error_code EnsureReadInternal(size_t min_to_read);\n\n  static void CopyStreamId(const StreamID& src, struct streamID* dest);\n\n  base::IoBuf* mem_buf_ = nullptr;\n  base::IoBuf origin_mem_buf_;\n  ::io::Source* src_ = nullptr;\n\n  size_t bytes_read_ = 0;\n  size_t source_limit_ = SIZE_MAX;\n  base::PODArray<uint8_t> compr_buf_;\n  std::unique_ptr<detail::DecompressImpl> decompress_impl_;\n  JournalReader journal_reader_{nullptr, 0};\n  std::optional<uint64_t> journal_offset_ = std::nullopt;\n  RdbVersion rdb_version_ = RDB_VERSION;\n  PendingRead pending_read_;\n};\n\nclass RdbLoader : protected RdbLoaderBase {\n public:\n  // load_context is shared across all RdbLoader instances in a load session.\n  explicit RdbLoader(Service* service, RdbLoadContext* load_context, std::string snapshot_id = {});\n\n  ~RdbLoader();\n\n  void SetOverrideExistingKeys(bool override) {\n    override_existing_keys_ = override;\n  }\n\n  void SetLoadUnownedSlots(bool load_unowned) {\n    load_unowned_slots_ = load_unowned;\n  }\n\n  // Sets shard count of the snapshot being loaded.\n  // Does not necessarily match the shard count of the current instance.\n  void SetShardCount(uint32_t shard_cnt) {\n    shard_count_ = shard_cnt;\n  }\n\n  std::error_code Load(::io::Source* src);\n\n  void set_source_limit(size_t n) {\n    source_limit_ = n;\n  }\n\n  ::io::Bytes Leftover() const {\n    return mem_buf_->InputBuffer();\n  }\n\n  size_t bytes_read() const {\n    return bytes_read_;\n  }\n\n  size_t keys_loaded() const {\n    return keys_loaded_;\n  }\n\n  // returns time in seconds.\n  double load_time() const {\n    return load_time_;\n  }\n\n  void stop() {\n    stop_early_.store(true);\n  }\n\n  void Pause(bool pause) {\n    pause_ = pause;\n  }\n\n  const std::string& GetSnapshotId() const {\n    return snapshot_id_;\n  }\n\n  // Return the offset that was received with a RDB_OPCODE_JOURNAL_OFFSET command,\n  // or 0 if no offset was received.\n  std::optional<uint64_t> journal_offset() const {\n    return journal_offset_;\n  }\n\n  // Set callback for receiving RDB_OPCODE_FULLSYNC_END.\n  // This opcode is used by a master instance to notify it finished streaming static data\n  // and is ready to switch to stable state sync.\n  void SetFullSyncCutCb(std::function<void()> cb) {\n    full_sync_cut_cb = std::move(cb);\n  }\n\n  uint32_t shard_id() const {\n    return shard_id_;\n  }\n\n  uint32_t shard_count() const {\n    return shard_count_;\n  }\n\n private:\n  struct Item {\n    std::string key;\n    OpaqueObj val;\n    uint64_t expire_ms;\n    std::atomic<Item*> next;\n    bool is_sticky = false;\n    bool has_mc_flags = false;\n    uint32_t mc_flags = 0;\n\n    LoadConfig load_config;\n\n    friend void MPSC_intrusive_store_next(Item* dest, Item* nxt) {\n      dest->next.store(nxt, std::memory_order_release);\n    }\n\n    friend Item* MPSC_intrusive_load_next(const Item& src) {\n      return src.next.load(std::memory_order_acquire);\n    }\n  };\n\n  using ItemsBuf = std::vector<Item*>;\n\n  struct ObjSettings;\n\n  std::error_code LoadKeyValPair(int type, ObjSettings* settings);\n  // Returns whether to discard the read key pair.\n  bool ShouldDiscardKey(std::string_view key, const ObjSettings& settings) const;\n\n  std::error_code HandleAux();\n\n  std::error_code VerifyChecksum();\n\n  void FinishLoad(absl::Time start_time, size_t* keys_loaded);\n\n  void FlushShardAsync(ShardId sid);\n  void FlushAllShards();\n\n  void LoadItemsBuffer(DbIndex db_ind, const ItemsBuf& ib);\n\n  void CreateObjectOnShard(const DbContext& db_cntx, const Item* item, DbSlice* db_slice);\n\n  void LoadScriptFromAux(std::string&& value);\n\n  // Load index definition from RESP string describing it in FT.CREATE format,\n  // issues an FT.CREATE call, but does not start indexing\n  void LoadSearchIndexDefFromAux(std::string&& value);\n\n  // Load HNSW index metadata from JSON, sets metadata on the GlobalHnswIndexRegistry\n  void LoadHnswIndexMetadataFromAux(std::string&& value);\n\n  // Load synonyms from RESP string and issue FT.SYNUPDATE call\n  void LoadSearchSynonymsFromAux(std::string&& value);\n\n  // Restore HNSW vector index graph from serialized node data.\n  std::error_code RestoreVectorIndex(std::string_view index_key, std::string_view index_name,\n                                     std::string_view field_name, uint64_t elements_number);\n\n  // Load HNSW vector index nodes into a vector for deferred restoration.\n  std::error_code LoadVectorIndexNodes(uint64_t elements_number,\n                                       std::vector<search::HnswNodeData>* nodes);\n\n  // Skip over serialized HNSW vector index node data without restoring.\n  std::error_code SkipVectorIndex(std::string_view index_key, uint64_t elements_number);\n\n  Service* service_;\n  RdbLoadContext* load_context_;\n\n  std::string snapshot_id_;\n  bool override_existing_keys_ = false;\n  bool load_unowned_slots_ = false;\n  bool rdb_ignore_expiry_;\n  const bool deserialize_hnsw_index_;\n  uint32_t shard_id_ = UINT32_MAX;\n  uint32_t shard_count_ = 0;\n  size_t table_used_memory_ = 0;\n  ScriptMgr* script_mgr_;\n  std::vector<ItemsBuf> shard_buf_;\n\n  size_t keys_loaded_ = 0;\n  double load_time_ = 0;\n\n  DbIndex cur_db_index_ = 0;\n  bool pause_ = false;\n  bool is_tiered_enabled_ = false;\n  AggregateError ec_;\n\n  // We use atomics here because shard threads can notify RdbLoader fiber from another thread\n  // that it should stop early.\n  std::atomic_bool stop_early_{false};\n\n  // Callback when receiving RDB_OPCODE_FULLSYNC_END\n  std::function<void()> full_sync_cut_cb;\n\n  // A free pool of allocated unused items.\n  base::MPSCIntrusiveQueue<Item> item_queue_;\n\n  // Map of currently chunked big values\n  std::unordered_map<std::string, std::unique_ptr<PrimeValue>> now_chunked_;\n  base::SpinLock now_chunked_mu_;  // guards now_chunked_\n\n  std::string last_key_loaded_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/rdb_load_context.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/rdb_load_context.h\"\n\n#include <absl/container/flat_hash_set.h>\n#include <absl/strings/match.h>\n\n#include <algorithm>\n#include <limits>\n\n#include \"base/logging.h\"\n#include \"facade/redis_parser.h\"\n#include \"facade/reply_capture.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/main_service.h\"\n#include \"server/namespaces.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/search/global_hnsw_index.h\"\n#include \"server/sharding.h\"\n\nnamespace dfly {\n\nnamespace {\n\nconstexpr search::GlobalDocId kInvalidRemapGid = std::numeric_limits<search::GlobalDocId>::max();\n\n// index_name -> master_shard_id -> new_global_ids indexed by old doc_id\nusing HnswRemapTable =\n    absl::flat_hash_map<std::string,\n                        absl::flat_hash_map<uint32_t, std::vector<search::GlobalDocId>>>;\n\n// vector indexed by shard_id; per-shard map from index_name to keys in doc_id order\nusing PerShardMappings = std::vector<absl::flat_hash_map<std::string, std::vector<std::string>>>;\n\n// Assigns new global_ids to each (key, old_doc_id) pair, distributing keys to their target\n// shards. Returns a table mapping old (index, master_shard, old_doc_id) -> new_global_id.\nHnswRemapTable BuildRemapTable(\n    const absl::flat_hash_map<uint32_t, std::vector<PendingIndexMapping>>& index_mappings,\n    ShardId new_shard_count) {\n  HnswRemapTable remap_table;\n#ifdef WITH_SEARCH\n  absl::flat_hash_map<std::string, absl::flat_hash_map<uint32_t, search::DocId>> doc_id_counters;\n\n  for (const auto& [master_shard_id, pim_vec] : index_mappings) {\n    for (const auto& pim : pim_vec) {\n      auto& vec = remap_table[pim.index_name][master_shard_id];\n      auto& counters = doc_id_counters[pim.index_name];\n\n      // Pre-allocate to max old_doc_id in one shot, avoiding O(N²) repeated resizes when\n      // doc_ids arrive in increasing order.\n      search::DocId max_id = 0;\n      for (const auto& [key, old_doc_id] : pim.mappings) {\n        max_id = std::max(max_id, old_doc_id);\n      }\n      vec.assign(max_id + 1, kInvalidRemapGid);\n\n      for (const auto& [key, old_doc_id] : pim.mappings) {\n        ShardId new_shard_id = Shard(key, new_shard_count);\n        // Counter starts at 0 for each (index, shard) — equivalent to DocKeyIndex::Add() on a\n        // fresh index (free_ids_ empty → id = last_id_++). DocKeyIndex::Restore() is later called\n        // with these exact keys in doc_id order, so the key_index stays consistent with the\n        // global_ids stored in the remapped HNSW graph.\n        search::DocId new_doc_id = counters[new_shard_id]++;\n        vec[old_doc_id] = search::CreateGlobalDocId(new_shard_id, new_doc_id);\n      }\n    }\n  }\n#endif\n  return remap_table;\n}\n\n// Remaps global_ids in deferred HNSW nodes and restores the graphs.\n// Returns the set of index names that failed restoration (to be excluded from key mappings).\nabsl::flat_hash_set<std::string> RemapAndRestoreHnswGraphs(\n    std::vector<PendingHnswNodes>& pending_nodes,\n    const std::vector<PendingHnswMetadata>& hnsw_metadata, const HnswRemapTable& remap_table) {\n  absl::flat_hash_set<std::string> failed_indices;\n#ifdef WITH_SEARCH\n  for (auto& pn : pending_nodes) {\n    auto remap_it = remap_table.find(pn.index_name);\n\n    auto hnsw_index = GlobalHnswIndexRegistry::Instance().Get(pn.index_name, pn.field_name);\n    if (!hnsw_index) {\n      LOG(ERROR) << \"HNSW index not found for deferred restoration: \" << pn.index_name << \":\"\n                 << pn.field_name << \". Will rebuild from scratch.\";\n      failed_indices.insert(pn.index_name);\n      continue;\n    }\n\n    if (remap_it == remap_table.end()) {\n      LOG(WARNING) << \"No remap table for index \" << pn.index_name << \":\" << pn.field_name\n                   << \" (no key mappings). Will rebuild from scratch.\";\n      failed_indices.insert(pn.index_name);\n      continue;\n    }\n\n    size_t remapped = 0;\n    for (auto& node : pn.nodes) {\n      auto [shard_id, doc_id] = search::DecomposeGlobalDocId(node.global_id);\n      auto shard_it = remap_it->second.find(shard_id);\n      if (shard_it != remap_it->second.end() && doc_id < shard_it->second.size()) {\n        search::GlobalDocId new_gid = shard_it->second[doc_id];\n        if (new_gid != kInvalidRemapGid) {\n          node.global_id = new_gid;\n          ++remapped;\n        }\n      }\n    }\n\n    if (remapped != pn.nodes.size()) {\n      LOG(WARNING) << \"Incomplete remap for HNSW index \" << pn.index_name << \":\" << pn.field_name\n                   << \" (\" << remapped << \"/\" << pn.nodes.size()\n                   << \" nodes). Will rebuild from scratch.\";\n      failed_indices.insert(pn.index_name);\n      continue;\n    }\n\n    const PendingHnswMetadata* phm_ptr = nullptr;\n    for (const auto& phm : hnsw_metadata) {\n      if (phm.index_name == pn.index_name && phm.field_name == pn.field_name) {\n        phm_ptr = &phm;\n        break;\n      }\n    }\n    DCHECK(phm_ptr) << \"HNSW metadata missing for \" << pn.index_name << \":\" << pn.field_name;\n\n    hnsw_index->RestoreFromNodes(pn.nodes, phm_ptr->metadata);\n    LOG(INFO) << \"Restored HNSW index \" << pn.index_name << \":\" << pn.field_name << \" with \"\n              << pn.nodes.size() << \" nodes (\" << remapped << \" global_ids remapped)\";\n  }\n#endif\n  return failed_indices;\n}\n\n// Uses the remap table to distribute keys to their target shards.\n// Each shard's entry maps index_name -> keys in new doc_id order (vector index = doc_id),\n// matching the order assigned by BuildRemapTable (same iteration over index_mappings).\nPerShardMappings PreDistributeKeyMappings(\n    const absl::flat_hash_map<uint32_t, std::vector<PendingIndexMapping>>& index_mappings,\n    const HnswRemapTable& remap_table, ShardId new_shard_count) {\n  PerShardMappings per_shard(new_shard_count);\n\n  for (const auto& [master_shard_id, pim_vec] : index_mappings) {\n    for (const auto& pim : pim_vec) {\n      auto idx_it = remap_table.find(pim.index_name);\n      if (idx_it == remap_table.end())\n        continue;\n      auto shard_it = idx_it->second.find(master_shard_id);\n      if (shard_it == idx_it->second.end())\n        continue;\n      const auto& remap_vec = shard_it->second;\n\n      for (const auto& [key, old_doc_id] : pim.mappings) {\n        if (old_doc_id >= remap_vec.size())\n          continue;\n        search::GlobalDocId new_gid = remap_vec[old_doc_id];\n        if (new_gid == kInvalidRemapGid)\n          continue;\n        ShardId new_shard_id = search::DecomposeGlobalDocId(new_gid).first;\n        per_shard[new_shard_id][pim.index_name].push_back(key);\n      }\n    }\n  }\n\n  return per_shard;\n}\n\n}  // namespace\n\nvoid LoadSearchCommandFromAux(Service* service, std::string&& def, std::string_view command_name,\n                              std::string_view error_context, bool add_NX) {\n  facade::CapturingReplyBuilder crb;\n\n  ConnectionContext cntx{nullptr, acl::UserCredentials{}};\n  cntx.is_replicating = true;\n  cntx.journal_emulated = true;\n  cntx.skip_acl_validation = true;\n  cntx.ns = &namespaces->GetDefaultNamespace();\n\n  uint32_t consumed = 0;\n  facade::RespVec resp_vec;\n  facade::RedisParser parser;\n\n  // Prepend a whitespace so names starting with ':' are treated as names, not RESP tokens.\n  def.insert(def.begin(), ' ');\n\n  // Add resp terminator\n  constexpr std::string_view kRespTerminator = \"\\r\\n\";\n  def += kRespTerminator;\n\n  std::string_view printable_def{def.data(), def.size() - kRespTerminator.size()};\n\n  io::MutableBytes buffer{reinterpret_cast<uint8_t*>(def.data()), def.size()};\n  auto res = parser.Parse(buffer, &consumed, &resp_vec);\n\n  if (res != facade::RedisParser::Result::OK) {\n    LOG(ERROR) << \"Bad \" << error_context << \": \" << printable_def;\n    return;\n  }\n\n  // Temporary migration fix for backwards compatibility with old snapshots where TAG fields were\n  // serialized as \"TAG SORTABLE SEPARATOR x\" but parser expects \"TAG SEPARATOR x SORTABLE\".\n  // Reorder arguments if needed.\n  // TODO: Remove this workaround after Apr 2026.\n  for (size_t i = 0; i + 2 < resp_vec.size(); ++i) {\n    std::string_view cur = resp_vec[i].GetView();\n    std::string_view next = resp_vec[i + 1].GetView();\n    if (absl::EqualsIgnoreCase(cur, \"SORTABLE\") && absl::EqualsIgnoreCase(next, \"SEPARATOR\")) {\n      // SORTABLE SEPARATOR x -> SEPARATOR x SORTABLE\n      std::swap(resp_vec[i], resp_vec[i + 1]);      // SEPARATOR SORTABLE x\n      std::swap(resp_vec[i + 1], resp_vec[i + 2]);  // SEPARATOR x SORTABLE\n    }\n  }\n\n  // Prepend command name (FT.CREATE or FT.SYNUPDATE)\n  CommandContext cntx_cmd;\n  cntx_cmd.Init(&crb, &cntx);\n\n  cntx_cmd.PushArg(command_name);\n  cntx_cmd.PushArg(resp_vec[0].GetView());  // index name\n  if (add_NX) {\n    cntx_cmd.PushArg(\"NX\");\n  }\n  for (unsigned i = 1; i < resp_vec.size(); i++) {\n    cntx_cmd.PushArg(resp_vec[i].GetView());\n  }\n  service->DispatchCommand(facade::ParsedArgs{cntx_cmd}, &cntx_cmd,\n                           facade::AsyncPreference::ONLY_SYNC);\n\n  auto response = crb.Take();\n  if (auto err = facade::CapturingReplyBuilder::TryExtractError(response); err) {\n    LOG(ERROR) << \"Bad \" << error_context << \": \" << def << \" \" << err->first;\n  }\n}\n\nvoid RdbLoadContext::AddPendingSynonymCommand(std::string cmd) {\n  util::fb2::LockGuard<util::fb2::Mutex> lk(mu_);\n  pending_synonym_cmds_.push_back(std::move(cmd));\n}\n\nvoid RdbLoadContext::AddPendingIndexMapping(uint32_t shard_id, PendingIndexMapping mapping) {\n  util::fb2::LockGuard<util::fb2::Mutex> lk(mu_);\n  pending_index_mappings_[shard_id].emplace_back(std::move(mapping));\n}\n\nvoid RdbLoadContext::AddPendingHnswMetadata(PendingHnswMetadata metadata) {\n  util::fb2::LockGuard<util::fb2::Mutex> lk(mu_);\n  pending_hnsw_metadata_.emplace_back(std::move(metadata));\n}\n\nvoid RdbLoadContext::AddPendingHnswNodes(PendingHnswNodes nodes) {\n  util::fb2::LockGuard<util::fb2::Mutex> lk(mu_);\n  pending_hnsw_nodes_.emplace_back(std::move(nodes));\n}\n\nvoid RdbLoadContext::SetMasterShardCount(uint32_t count) {\n  master_shard_count_ = count;\n}\n\nstd::optional<search::HnswIndexMetadata> RdbLoadContext::FindHnswMetadata(\n    std::string_view index_name, std::string_view field_name) const {\n  util::fb2::LockGuard<util::fb2::Mutex> lk(mu_);\n  for (const auto& phm : pending_hnsw_metadata_) {\n    if (phm.index_name == index_name && phm.field_name == field_name) {\n      return phm.metadata;\n    }\n  }\n  return std::nullopt;\n}\n\nstd::vector<std::string> RdbLoadContext::TakePendingSynonymCommands() {\n  util::fb2::LockGuard<util::fb2::Mutex> lk(mu_);\n  std::vector<std::string> result;\n  result.swap(pending_synonym_cmds_);\n  return result;\n}\n\nabsl::flat_hash_map<uint32_t, std::vector<PendingIndexMapping>>\nRdbLoadContext::TakePendingIndexMappings() {\n  util::fb2::LockGuard<util::fb2::Mutex> lk(mu_);\n  decltype(pending_index_mappings_) result;\n  std::swap(result, pending_index_mappings_);\n  return result;\n}\n\nstd::vector<PendingHnswNodes> RdbLoadContext::TakePendingHnswNodes() {\n  util::fb2::LockGuard<util::fb2::Mutex> lk(mu_);\n  return std::move(pending_hnsw_nodes_);\n}\n\nRdbLoadContext::PerShardMappings RdbLoadContext::RemapHnswForDifferentShardCount(\n    const absl::flat_hash_map<uint32_t, std::vector<PendingIndexMapping>>& index_mappings,\n    std::vector<PendingHnswNodes>& pending_nodes,\n    const std::vector<PendingHnswMetadata>& hnsw_metadata) {\n  const ShardId new_shard_count = shard_set->size();\n\n  // Build remap table: index_name -> master_shard_id -> new_global_ids indexed by old doc_id.\n  // Freed when this function returns.\n  HnswRemapTable remap_table = BuildRemapTable(index_mappings, new_shard_count);\n\n  // Remap global_ids, restore HNSW graphs; failed indices are excluded from key mappings.\n  auto failed = RemapAndRestoreHnswGraphs(pending_nodes, hnsw_metadata, remap_table);\n  for (const auto& name : failed) {\n    remap_table.erase(name);\n  }\n\n  // Pre-distribute key mappings per target shard; keys in doc_id order (index = doc_id).\n  return PreDistributeKeyMappings(index_mappings, remap_table, new_shard_count);\n}\n\nvoid RdbLoadContext::PerformPostLoad(Service* service, bool is_error) {\n#ifdef WITH_SEARCH\n  const CommandId* cmd = service->FindCmd(\"FT.CREATE\");\n  if (cmd == nullptr)  // In case search module is disabled\n    return;\n\n  std::vector<std::string> synonym_cmds = TakePendingSynonymCommands();\n  auto index_mappings = TakePendingIndexMappings();\n  auto pending_nodes = TakePendingHnswNodes();\n\n  // Extract remaining shared state under lock. After this, no member access is needed.\n  std::vector<PendingHnswMetadata> hnsw_metadata;\n  {\n    util::fb2::LockGuard<util::fb2::Mutex> lk(mu_);\n    hnsw_metadata.swap(pending_hnsw_metadata_);\n  }\n  uint32_t master_shards = master_shard_count_;\n\n  bool has_hnsw_restore = !hnsw_metadata.empty();\n\n  if (is_error)\n    return;\n\n  // When shard counts differ, remap HNSW global_ids and redistribute key mappings on-the-fly.\n  bool shard_count_differs = master_shards != 0 && master_shards != shard_set->size();\n\n  if (shard_count_differs && !index_mappings.empty()) {\n    // Remaps HNSW global_ids, restores HNSW graphs, and pre-distributes key mappings by target\n    // shard. The internal remap table is local to the function and freed when it returns.\n    auto per_shard_mappings =\n        RemapHnswForDifferentShardCount(index_mappings, pending_nodes, hnsw_metadata);\n\n    // Each shard reads only its own pre-built slice — no per-shard filtering of all N keys.\n    shard_set->AwaitRunningOnShardQueue([&per_shard_mappings](EngineShard* es) {\n      for (const auto& [name, keys] : per_shard_mappings[es->shard_id()]) {\n        if (auto* index = es->search_indices()->GetIndex(name); index) {\n          index->RestoreKeyIndex(keys);\n          VLOG(1) << \"Restored \" << keys.size() << \" key mappings for index \" << name\n                  << \" on shard \" << es->shard_id();\n        }\n      }\n    });\n  } else {\n    if (shard_count_differs && !pending_nodes.empty()) {\n      LOG(WARNING) << \"Have \" << pending_nodes.size()\n                   << \" deferred HNSW node sets but no key mappings for remapping. \"\n                      \"Affected indices will be rebuilt from scratch.\";\n    }\n\n    if (!index_mappings.empty()) {\n      shard_set->AwaitRunningOnShardQueue([&index_mappings](EngineShard* es) {\n        auto it = index_mappings.find(es->shard_id());\n        if (it == index_mappings.end())\n          return;\n        for (const auto& pim : it->second) {\n          if (auto* index = es->search_indices()->GetIndex(pim.index_name); index) {\n            index->RestoreKeyIndex(pim.mappings);\n            VLOG(1) << \"Restored \" << pim.mappings.size() << \" key mappings for index \"\n                    << pim.index_name << \" on shard \" << es->shard_id();\n          }\n        }\n      });\n    }\n  }\n  // RestoreKeyIndex (above) and RebuildAllIndices (below) run in separate sequential\n  // AwaitRunningOnShardQueue calls, so there is no parallel index build that could interfere\n  // with the doc_ids assigned during key mapping restoration.\n  shard_set->AwaitRunningOnShardQueue([has_hnsw_restore](EngineShard* es) {\n    OpArgs op_args{es, nullptr,\n                   DbContext{&namespaces->GetDefaultNamespace(), 0, GetCurrentTimeMs()}};\n    es->search_indices()->RebuildAllIndices(op_args, has_hnsw_restore);\n  });\n\n  // Now execute all pending synonym commands after indices are rebuilt\n  for (auto& syn_cmd : synonym_cmds) {\n    LoadSearchCommandFromAux(service, std::move(syn_cmd), \"FT.SYNUPDATE\", \"synonym definition\");\n  }\n\n  // Wait until index building ends\n  shard_set->RunBlockingInParallel(\n      [](EngineShard* es) { es->search_indices()->BlockUntilConstructionEnd(); });\n#endif\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/rdb_load_context.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <optional>\n#include <string>\n#include <vector>\n\n#include \"core/search/base.h\"\n#include \"core/search/hnsw_index.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly {\n\nclass Service;\n\n// Dispatches a search command (FT.CREATE / FT.SYNUPDATE) from a serialized AUX string.\nvoid LoadSearchCommandFromAux(Service* service, std::string&& def, std::string_view command_name,\n                              std::string_view error_context, bool add_NX = false);\n\n// Pending index key-to-DocId mappings to apply after indices are created.\nstruct PendingIndexMapping {\n  std::string index_name;\n  std::vector<std::pair<std::string, search::DocId>> mappings;\n};\n\n// HNSW metadata loaded from \"hnsw-index-metadata\" AUX fields.\nstruct PendingHnswMetadata {\n  std::string index_name;\n  std::string field_name;\n  search::HnswIndexMetadata metadata;\n};\n\n// Deferred HNSW graph nodes for restoration when shard counts differ.\nstruct PendingHnswNodes {\n  std::string index_name;\n  std::string field_name;\n  std::vector<search::HnswNodeData> nodes;\n};\n\n// Shared context for collecting search-related state across multiple RdbLoader instances\n// during a single load session. Consumed by PerformPostLoad after all loaders finish.\n//\n// Thread-safe: all mutating methods lock internally.\nclass RdbLoadContext {\n public:\n  RdbLoadContext() = default;\n\n  RdbLoadContext(const RdbLoadContext&) = delete;\n  RdbLoadContext& operator=(const RdbLoadContext&) = delete;\n\n  void AddPendingSynonymCommand(std::string cmd);\n  void AddPendingIndexMapping(uint32_t shard_id, PendingIndexMapping mapping);\n  void AddPendingHnswMetadata(PendingHnswMetadata metadata);\n  void AddPendingHnswNodes(PendingHnswNodes nodes);\n  void SetMasterShardCount(uint32_t count);\n\n  std::optional<search::HnswIndexMetadata> FindHnswMetadata(std::string_view index_name,\n                                                            std::string_view field_name) const;\n\n  // Performs post load procedures while still remaining in global LOADING state.\n  // Called once immediately after loading the snapshot / full sync succeeded from the coordinator.\n  void PerformPostLoad(Service* service, bool is_error = false);\n\n private:\n  std::vector<std::string> TakePendingSynonymCommands();\n  absl::flat_hash_map<uint32_t, std::vector<PendingIndexMapping>> TakePendingIndexMappings();\n  std::vector<PendingHnswNodes> TakePendingHnswNodes();\n\n  // Pre-distributed key mappings indexed by target shard_id.\n  // Per-shard: index_name -> keys in doc_id order (vector index = doc_id).\n  using PerShardMappings = std::vector<absl::flat_hash_map<std::string, std::vector<std::string>>>;\n\n  // Remaps HNSW node global_ids, restores HNSW graphs, and pre-distributes key mappings by\n  // target shard. The internal remap table is local and freed when this function returns.\n  // Failed indices are excluded from the returned mappings so they fall back to a full rebuild.\n  PerShardMappings RemapHnswForDifferentShardCount(\n      const absl::flat_hash_map<uint32_t, std::vector<PendingIndexMapping>>& index_mappings,\n      std::vector<PendingHnswNodes>& pending_nodes,\n      const std::vector<PendingHnswMetadata>& hnsw_metadata);\n\n  mutable util::fb2::Mutex mu_;\n  std::vector<std::string> pending_synonym_cmds_ ABSL_GUARDED_BY(mu_);\n  absl::flat_hash_map<uint32_t, std::vector<PendingIndexMapping>> pending_index_mappings_\n      ABSL_GUARDED_BY(mu_);\n  std::vector<PendingHnswMetadata> pending_hnsw_metadata_ ABSL_GUARDED_BY(mu_);\n  std::vector<PendingHnswNodes> pending_hnsw_nodes_ ABSL_GUARDED_BY(mu_);\n  uint32_t master_shard_count_ = 0;  // Set identically by all loaders from AUX field.\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/rdb_save.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/rdb_save.h\"\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_format.h>\n\n#include <queue>\n\nextern \"C\" {\n#include \"redis/crc64.h\"\n#include \"redis/intset.h\"\n#include \"redis/listpack.h\"\n#include \"redis/rdb.h\"\n#include \"redis/stream.h\"\n#include \"redis/util.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/bloom.h\"\n#include \"core/cms.h\"\n#include \"core/json/json_object.h\"\n#include \"core/qlist.h\"\n#include \"core/search/hnsw_index.h\"\n#include \"core/size_tracking_channel.h\"\n#include \"core/sorted_map.h\"\n#include \"core/string_map.h\"\n#include \"core/string_set.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/main_service.h\"\n#include \"server/namespaces.h\"\n#include \"server/rdb_extensions.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/search/global_hnsw_index.h\"\n#include \"server/serializer_commons.h\"\n#include \"server/snapshot.h\"\n#include \"server/tiered_storage.h\"\n#include \"server/tiering/common.h\"\n#include \"util/fibers/simple_channel.h\"\n\nABSL_FLAG(dfly::CompressionMode, compression_mode, dfly::CompressionMode::MULTI_ENTRY_LZ4,\n          \"set 0 for no compression,\"\n          \"set 1 for single entry lzf compression,\"\n          \"set 2 for multi entry zstd compression on df snapshot and single entry on rdb snapshot,\"\n          \"set 3 for multi entry lz4 compression on df snapshot and single entry on rdb snapshot\");\n\n// Flip this value to 'true' in March 2026.\nABSL_FLAG(bool, rdb_sbf_chunked, false, \"Enable new save format for saving SBFs in chunks.\");\n\nnamespace dfly {\n\nusing namespace std;\nusing base::IoBuf;\nusing io::Bytes;\n\nusing namespace tiering::literals;\n\nnamespace {\n\n/* Encodes the \"value\" argument as integer when it fits in the supported ranges\n * for encoded types. If the function successfully encodes the integer, the\n * representation is stored in the buffer pointer to by \"enc\" and the string\n * length is returned. Otherwise 0 is returned. */\nunsigned EncodeInteger(long long value, uint8_t* enc) {\n  if (value >= -(1 << 7) && value <= (1 << 7) - 1) {\n    enc[0] = (RDB_ENCVAL << 6) | RDB_ENC_INT8;\n    enc[1] = value & 0xFF;\n    return 2;\n  }\n\n  if (value >= -(1 << 15) && value <= (1 << 15) - 1) {\n    enc[0] = (RDB_ENCVAL << 6) | RDB_ENC_INT16;\n    enc[1] = value & 0xFF;\n    enc[2] = (value >> 8) & 0xFF;\n    return 3;\n  }\n\n  constexpr long long k31 = (1LL << 31);\n  if (value >= -k31 && value <= k31 - 1) {\n    enc[0] = (RDB_ENCVAL << 6) | RDB_ENC_INT32;\n    enc[1] = value & 0xFF;\n    enc[2] = (value >> 8) & 0xFF;\n    enc[3] = (value >> 16) & 0xFF;\n    enc[4] = (value >> 24) & 0xFF;\n    return 5;\n  }\n\n  return 0;\n}\n\n/* String objects in the form \"2391\" \"-100\" without any space and with a\n * range of values that can fit in an 8, 16 or 32 bit signed value can be\n * encoded as integers to save space */\nunsigned TryIntegerEncoding(string_view input, uint8_t* dest) {\n  long long value;\n\n  /* Check if it's possible to encode this value as a number */\n  if (!absl::SimpleAtoi(input, &value))\n    return 0;\n  absl::AlphaNum alpha(value);\n\n  /* If the number converted back into a string is not identical\n   * then it's not possible to encode the string as integer */\n  if (alpha.size() != input.size() || alpha.Piece() != input)\n    return 0;\n\n  return EncodeInteger(value, dest);\n}\n\nconstexpr size_t kBufLen = 64_KB;\nconstexpr size_t kAmask = 4_KB - 1;\nconstexpr uint32_t kChannelLen = 2;\n\n}  // namespace\n\nbool AbslParseFlag(std::string_view in, dfly::CompressionMode* flag, std::string* err) {\n  if (in == \"0\" || in == \"NONE\") {\n    *flag = dfly::CompressionMode::NONE;\n    return true;\n  }\n  if (in == \"1\" || in == \"SINGLE_ENTRY\") {\n    *flag = dfly::CompressionMode::SINGLE_ENTRY;\n    return true;\n  }\n  if (in == \"2\" || in == \"MULTI_ENTRY_ZSTD\") {\n    *flag = dfly::CompressionMode::MULTI_ENTRY_ZSTD;\n    return true;\n  }\n  if (in == \"3\" || in == \"MULTI_ENTRY_LZ4\") {\n    *flag = dfly::CompressionMode::MULTI_ENTRY_LZ4;\n    return true;\n  }\n\n  *err = absl::StrCat(\"Unknown value \", in, \" for compression_mode flag\");\n  return false;\n}\n\nstd::string AbslUnparseFlag(dfly::CompressionMode flag) {\n  switch (flag) {\n    case dfly::CompressionMode::NONE:\n      return \"NONE\";\n    case dfly::CompressionMode::SINGLE_ENTRY:\n      return \"SINGLE_ENTRY\";\n    case dfly::CompressionMode::MULTI_ENTRY_ZSTD:\n      return \"MULTI_ENTRY_ZSTD\";\n    case dfly::CompressionMode::MULTI_ENTRY_LZ4:\n      return \"MULTI_ENTRY_LZ4\";\n  }\n  DCHECK(false) << \"Unknown compression_mode flag value \" << int(flag);\n  return \"NONE\";\n}\n\ndfly::CompressionMode GetDefaultCompressionMode() {\n  return absl::GetFlag(FLAGS_compression_mode);\n}\n\nuint8_t RdbObjectType(const CompactObj& pv) {\n  unsigned type = pv.ObjType();\n  unsigned compact_enc = pv.Encoding();\n  switch (type) {\n    case OBJ_STRING:\n      return RDB_TYPE_STRING;\n    case OBJ_LIST:\n      return RDB_TYPE_LIST_QUICKLIST_2;\n      break;\n    case OBJ_SET:\n      if (compact_enc == kEncodingIntSet)\n        return RDB_TYPE_SET_INTSET;\n      else if (compact_enc == kEncodingStrMap2) {\n        if (((StringSet*)pv.RObjPtr())->ExpirationUsed())\n          return RDB_TYPE_SET_WITH_EXPIRY;\n        else\n          return RDB_TYPE_SET;\n      }\n      break;\n    case OBJ_ZSET:\n      if (compact_enc == OBJ_ENCODING_LISTPACK)\n        return RDB_TYPE_ZSET_LISTPACK;\n      else if (compact_enc == OBJ_ENCODING_SKIPLIST)\n        return RDB_TYPE_ZSET_2;\n      break;\n    case OBJ_HASH:\n      if (compact_enc == kEncodingListPack)\n        return RDB_TYPE_HASH_LISTPACK;\n      else if (compact_enc == kEncodingStrMap2) {\n        if (((StringMap*)pv.RObjPtr())->ExpirationUsed())\n          return RDB_TYPE_HASH_WITH_EXPIRY;  // Incompatible with Redis\n        else\n          return RDB_TYPE_HASH;\n      }\n      break;\n    case OBJ_STREAM:\n      return RDB_TYPE_STREAM_LISTPACKS_3;\n    case OBJ_MODULE:\n      return RDB_TYPE_MODULE_2;\n    case OBJ_JSON:\n      return RDB_TYPE_JSON;\n    case OBJ_SBF:\n      return absl::GetFlag(FLAGS_rdb_sbf_chunked) ? RDB_TYPE_SBF2 : RDB_TYPE_SBF;\n    case OBJ_CMS:\n      return RDB_TYPE_CMS;\n  }\n  LOG(FATAL) << \"Unknown encoding \" << compact_enc << \" for type \" << type;\n  return 0; /* avoid warning */\n}\n\nRdbSerializerBase::RdbSerializerBase(CompressionMode compression_mode)\n    : compression_mode_(compression_mode), mem_buf_{4_KB}, tmp_buf_(nullptr) {\n}\n\nRdbSerializer::RdbSerializer(CompressionMode compression_mode, ConsumeFun consume_fun,\n                             size_t flush_threshold)\n    : RdbSerializerBase(compression_mode),\n      consume_fun_(std::move(consume_fun)),\n      flush_threshold_(flush_threshold) {\n}\n\nRdbSerializer::~RdbSerializer() {\n  VLOG(2) << \"compression mode: \" << uint32_t(compression_mode_);\n  if (compression_stats_) {\n    VLOG(2) << \"compression not effective: \" << compression_stats_->compression_no_effective;\n    VLOG(2) << \"string compression skipped: \" << compression_stats_->size_skip_count;\n    VLOG(2) << \"compression failed: \" << compression_stats_->compression_failed;\n    VLOG(2) << \"compressed blobs:\" << compression_stats_->compressed_blobs;\n  }\n}\n\nstd::error_code RdbSerializer::SaveValue(const PrimeValue& pv) {\n  std::error_code ec;\n  if (pv.ObjType() == OBJ_STRING) {\n    auto opt_int = pv.TryGetInt();\n    if (opt_int) {\n      ec = SaveLongLongAsString(*opt_int);\n    } else {\n      if (pv.IsExternal()) {\n        if (pv.IsCool()) {\n          return SaveValue(pv.GetCool().record->value);\n        }\n        LOG(FATAL) << \"External string not supported yet\";\n      } else {\n        ec = SaveString(pv.GetSlice(&tmp_str_));\n      }\n    }\n  } else {\n    ec = SaveObject(pv);\n  }\n  return ec;\n}\n\nerror_code RdbSerializer::SelectDb(uint32_t dbid) {\n  if (dbid == last_entry_db_index_) {\n    return error_code{};\n  }\n  last_entry_db_index_ = dbid;\n  uint8_t buf[16];\n  buf[0] = RDB_OPCODE_SELECTDB;\n  unsigned enclen = WritePackedUInt(dbid, io::MutableBytes{buf}.subspan(1));\n  return WriteRaw(Bytes{buf, enclen + 1});\n}\n\n// Called by snapshot\nio::Result<uint8_t> RdbSerializer::SaveEntry(const PrimeKey& pk, const PrimeValue& pv,\n                                             uint64_t expire_ms, uint32_t mc_flags, DbIndex dbid) {\n  if (!pv.TagAllowsEmptyValue() && pv.Size() == 0) {\n    string_view key = pk.GetSlice(&tmp_str_);\n    LOG(DFATAL) << \"SaveEntry skipped empty PrimeValue with key: \" << key << \" with tag \"\n                << static_cast<int>(pv.Tag());\n    return 0;\n  }\n\n  DVLOG(3) << \"Selecting \" << dbid << \" previous: \" << last_entry_db_index_;\n  auto ec = SelectDb(dbid);\n  if (ec) {\n    return make_unexpected(ec);\n  }\n\n  /* Save the expire time */\n  if (expire_ms > 0) {\n    uint8_t buf[16] = {RDB_OPCODE_EXPIRETIME_MS};\n    absl::little_endian::Store64(buf + 1, expire_ms);\n    if (auto ec = WriteRaw(Bytes{buf, 9}); ec)\n      return make_unexpected(ec);\n  }\n\n  /* Save the key poperties */\n  uint32_t df_mask_flags = pk.IsSticky() ? DF_MASK_FLAG_STICKY : 0;\n  df_mask_flags |= pv.HasFlag() ? DF_MASK_FLAG_MC_FLAGS : 0;\n  if (df_mask_flags != 0) {\n    uint8_t buf[9] = {RDB_OPCODE_DF_MASK};\n    absl::little_endian::Store32(buf + 1, df_mask_flags);\n    size_t buf_size = 5;\n    if (df_mask_flags & DF_MASK_FLAG_MC_FLAGS) {\n      absl::little_endian::Store32(buf + buf_size, mc_flags);\n      buf_size += 4;\n    }\n    if (auto ec = WriteRaw(Bytes{buf, buf_size}); ec)\n      return make_unexpected(ec);\n  }\n\n  uint8_t rdb_type = RdbObjectType(pv);\n\n  string_view key = pk.GetSlice(&tmp_str_);\n  DVLOG(3) << ((void*)this) << \": Saving key/val start \" << key << \" in dbid=\" << dbid;\n\n  if (auto ec = WriteOpcode(rdb_type); ec)\n    return make_unexpected(ec);\n\n  if (auto ec = SaveString(key); ec)\n    return make_unexpected(ec);\n\n  if (auto ec = SaveValue(pv); ec) {\n    LOG(ERROR) << \"Problems saving value for key \" << key << \" in dbid=\" << dbid;\n    return make_unexpected(ec);\n  }\n\n  // We flush here because if the next element in the bucket we are serializing is a container,\n  // it will first serialize the first entry and then flush the internal buffer, even if\n  // crossed the limit.\n  PushToConsumerIfNeeded(FlushState::kFlushEndEntry);\n  return rdb_type;\n}\n\nerror_code RdbSerializer::SaveObject(const PrimeValue& pv) {\n  unsigned obj_type = pv.ObjType();\n  CHECK_NE(obj_type, OBJ_STRING);\n\n  if (obj_type == OBJ_LIST) {\n    return SaveListObject(pv);\n  }\n\n  if (obj_type == OBJ_SET) {\n    return SaveSetObject(pv);\n  }\n\n  if (obj_type == OBJ_HASH) {\n    return SaveHSetObject(pv);\n  }\n\n  if (obj_type == OBJ_ZSET) {\n    return SaveZSetObject(pv);\n  }\n\n  if (obj_type == OBJ_STREAM) {\n    return SaveStreamObject(pv);\n  }\n\n  if (obj_type == OBJ_JSON) {\n    return SaveJsonObject(pv);\n  }\n\n  if (obj_type == OBJ_SBF) {\n    return SaveSBFObject(pv);\n  }\n\n  if (obj_type == OBJ_CMS) {\n    return SaveCMSObject(pv);\n  }\n\n  LOG(ERROR) << \"Not implemented \" << obj_type;\n  return make_error_code(errc::function_not_supported);\n}\n\nerror_code RdbSerializer::SaveListObject(const PrimeValue& pv) {\n  /* Save a list value */\n  if (pv.Encoding() == kEncodingListPack) {\n    uint8_t* lp = (uint8_t*)pv.RObjPtr();\n    size_t len = 1;  // 1 node\n    RETURN_ON_ERR(SaveLen(len));\n\n    // Node 1\n    RETURN_ON_ERR(SaveLen(QUICKLIST_NODE_CONTAINER_PACKED));\n    size_t lp_bytes = lpBytes(lp);\n    RETURN_ON_ERR(SaveString(lp, lp_bytes));\n\n    PushToConsumerIfNeeded(FlushState::kFlushEndEntry);\n    return error_code{};\n  }\n\n  DCHECK_EQ(pv.Encoding(), kEncodingQL2);\n  QList* ql = reinterpret_cast<QList*>(pv.RObjPtr());\n  const QList::Node* node = ql->Head();\n  size_t len = ql->node_count();\n\n  RETURN_ON_ERR(SaveLen(len));\n\n  while (node) {\n    DVLOG(3) << \"QL node (encoding/container/sz): \" << node->encoding << \"/\" << node->container\n             << \"/\" << node->sz;\n\n    // Use listpack encoding\n    RETURN_ON_ERR(SaveLen(node->container));\n    if (node->IsCompressed()) {\n      void* data;\n      size_t compress_len = node->GetLZF(&data);\n      // TODO: LZ4 compression mode is not enabled for list objects yet.\n      // If it will be enabled in the future, we need to adjust here accordingly.\n      RETURN_ON_ERR(SaveLzfBlob(Bytes{reinterpret_cast<uint8_t*>(data), compress_len}, node->sz));\n    } else {\n      RETURN_ON_ERR(SaveString(node->entry, node->sz));\n      FlushState flush_state = FlushState::kFlushMidEntry;\n      if (node->next == nullptr)\n        flush_state = FlushState::kFlushEndEntry;\n      PushToConsumerIfNeeded(flush_state);\n    }\n    node = node->next;\n  }\n  return error_code{};\n}\n\nerror_code RdbSerializer::SaveSetObject(const PrimeValue& obj) {\n  if (obj.Encoding() == kEncodingStrMap2) {\n    StringSet* set = (StringSet*)obj.RObjPtr();\n\n    // We don't expire any data during serialization\n    set->set_time(0);\n\n    // due to we avoid expiring we can use UpperBoundSize() instead of SlowSize()\n    RETURN_ON_ERR(SaveLen(set->UpperBoundSize()));\n    for (auto it = set->begin(); it != set->end();) {\n      RETURN_ON_ERR(SaveString(string_view{*it, sdslen(*it)}));\n      if (set->ExpirationUsed()) {\n        int64_t expiry = -1;\n        if (it.HasExpiry())\n          expiry = it.ExpiryTime();\n        RETURN_ON_ERR(SaveLongLongAsString(expiry));\n      }\n      ++it;\n      FlushState flush_state = FlushState::kFlushMidEntry;\n      if (it == set->end())\n        flush_state = FlushState::kFlushEndEntry;\n      PushToConsumerIfNeeded(flush_state);\n    }\n    set->set_time(MemberTimeSeconds(GetCurrentTimeMs()));\n  } else {\n    CHECK_EQ(obj.Encoding(), kEncodingIntSet);\n    intset* is = (intset*)obj.RObjPtr();\n    size_t len = intsetBlobLen(is);\n\n    RETURN_ON_ERR(SaveString(string_view{(char*)is, len}));\n  }\n\n  return error_code{};\n}\n\nerror_code RdbSerializer::SaveHSetObject(const PrimeValue& pv) {\n  DCHECK_EQ(OBJ_HASH, pv.ObjType());\n\n  if (pv.Encoding() == kEncodingStrMap2) {\n    StringMap* string_map = (StringMap*)pv.RObjPtr();\n\n    // We don't expire any data during serialization\n    string_map->set_time(0);\n\n    // due to we avoid expiring we can use UpperBoundSize() instead of SlowSize()\n    RETURN_ON_ERR(SaveLen(string_map->UpperBoundSize()));\n\n    for (auto it = string_map->begin(); it != string_map->end();) {\n      const auto& [k, v] = *it;\n      RETURN_ON_ERR(SaveString(string_view{k, sdslen(k)}));\n      RETURN_ON_ERR(SaveString(string_view{v, sdslen(v)}));\n      if (string_map->ExpirationUsed()) {\n        int64_t expiry = -1;\n        if (it.HasExpiry())\n          expiry = it.ExpiryTime();\n        RETURN_ON_ERR(SaveLongLongAsString(expiry));\n      }\n      ++it;\n      FlushState flush_state = FlushState::kFlushMidEntry;\n      if (it == string_map->end())\n        flush_state = FlushState::kFlushEndEntry;\n      PushToConsumerIfNeeded(flush_state);\n    }\n\n    string_map->set_time(MemberTimeSeconds(GetCurrentTimeMs()));\n  } else {\n    CHECK_EQ(kEncodingListPack, pv.Encoding());\n\n    uint8_t* lp = (uint8_t*)pv.RObjPtr();\n    size_t lp_bytes = lpBytes(lp);\n    RETURN_ON_ERR(SaveString((uint8_t*)lp, lp_bytes));\n  }\n\n  return error_code{};\n}\n\nerror_code RdbSerializer::SaveZSetObject(const PrimeValue& pv) {\n  DCHECK_EQ(OBJ_ZSET, pv.ObjType());\n  if (pv.Encoding() == OBJ_ENCODING_SKIPLIST) {\n    auto* zs = static_cast<detail::SortedMap*>(pv.RObjPtr());\n\n    RETURN_ON_ERR(SaveLen(zs->Size()));\n    std::error_code ec;\n\n    const size_t total = zs->Size();\n    size_t count = 0;\n\n    // Iterate over the sorted map and save the key and score.\n    // The order is important (from smallest to biggest) - so that the loader\n    // will load the entries faster.\n    zs->Iterate(0, total, false, [&](sds ele, double score) mutable {\n      ec = SaveString(string_view{ele, sdslen(ele)});\n      if (ec)\n        return false;\n      ec = SaveBinaryDouble(score);\n      if (ec)\n        return false;\n      ++count;\n      FlushState flush_state = FlushState::kFlushMidEntry;\n      if (count == total)\n        flush_state = FlushState::kFlushEndEntry;\n\n      PushToConsumerIfNeeded(flush_state);\n      return true;\n    });\n  } else {\n    CHECK_EQ(pv.Encoding(), unsigned(OBJ_ENCODING_LISTPACK));\n    uint8_t* lp = (uint8_t*)pv.RObjPtr();\n    size_t lp_bytes = lpBytes(lp);\n\n    RETURN_ON_ERR(SaveString((uint8_t*)lp, lp_bytes));\n  }\n\n  return error_code{};\n}\n\nerror_code RdbSerializer::SaveStreamObject(const PrimeValue& pv) {\n  /* Store how many listpacks we have inside the radix tree. */\n  stream* s = (stream*)pv.RObjPtr();\n  const size_t rax_size = raxSize(s->rax);\n\n  RETURN_ON_ERR(SaveLen(rax_size));\n\n  /* Serialize all the listpacks inside the radix tree as they are,\n   * when loading back, we'll use the first entry of each listpack\n   * to insert it back into the radix tree. */\n  raxIterator ri;\n  raxStart(&ri, s->rax);\n  raxSeek(&ri, \"^\", NULL, 0);\n\n  auto stop_listpacks_rax = absl::MakeCleanup([&] { raxStop(&ri); });\n\n  for (size_t i = 0; raxNext(&ri); i++) {\n    uint8_t* lp = (uint8_t*)ri.data;\n    size_t lp_bytes = lpBytes(lp);\n\n    RETURN_ON_ERR(SaveString((uint8_t*)ri.key, ri.key_len));\n    RETURN_ON_ERR(SaveString(lp, lp_bytes));\n\n    PushToConsumerIfNeeded(FlushState::kFlushMidEntry);\n  }\n\n  std::move(stop_listpacks_rax).Invoke();\n\n  /* Save the number of elements inside the stream. We cannot obtain\n   * this easily later, since our macro nodes should be checked for\n   * number of items: not a great CPU / space tradeoff. */\n\n  RETURN_ON_ERR(SaveLen(s->length));\n\n  /* Save the last entry ID. */\n  RETURN_ON_ERR(SaveLen(s->last_id.ms));\n  RETURN_ON_ERR(SaveLen(s->last_id.seq));\n\n  uint8_t rdb_type = RdbObjectType(pv);\n\n  // 'first_id', 'max_deleted_entry_id' and 'entries_added' are added\n  // in RDB_TYPE_STREAM_LISTPACKS_2\n  if (rdb_type >= RDB_TYPE_STREAM_LISTPACKS_2) {\n    /* Save the first entry ID. */\n    RETURN_ON_ERR(SaveLen(s->first_id.ms));\n    RETURN_ON_ERR(SaveLen(s->first_id.seq));\n\n    /* Save the maximal tombstone ID. */\n    RETURN_ON_ERR(SaveLen(s->max_deleted_entry_id.ms));\n    RETURN_ON_ERR(SaveLen(s->max_deleted_entry_id.seq));\n\n    /* Save the offset. */\n    RETURN_ON_ERR(SaveLen(s->entries_added));\n  }\n  /* The consumer groups and their clients are part of the stream\n   * type, so serialize every consumer group. */\n\n  /* Save the number of groups. */\n  size_t num_cgroups = s->cgroups ? raxSize(s->cgroups) : 0;\n  RETURN_ON_ERR(SaveLen(num_cgroups));\n\n  if (num_cgroups) {\n    /* Serialize each consumer group. */\n    raxStart(&ri, s->cgroups);\n    raxSeek(&ri, \"^\", NULL, 0);\n\n    auto stop_cgroups_rax = absl::MakeCleanup([&] { raxStop(&ri); });\n\n    while (raxNext(&ri)) {\n      streamCG* cg = (streamCG*)ri.data;\n\n      /* Save the group name. */\n      RETURN_ON_ERR(SaveString((uint8_t*)ri.key, ri.key_len));\n\n      /* Last ID. */\n      RETURN_ON_ERR(SaveLen(cg->last_id.ms));\n\n      RETURN_ON_ERR(SaveLen(cg->last_id.seq));\n\n      if (rdb_type >= RDB_TYPE_STREAM_LISTPACKS_2) {\n        /* Save the group's logical reads counter. */\n        RETURN_ON_ERR(SaveLen(cg->entries_read));\n      }\n\n      /* Save the global PEL. */\n      RETURN_ON_ERR(SaveStreamPEL(cg->pel, true));\n\n      /* Save the consumers of this group. */\n      RETURN_ON_ERR(SaveStreamConsumers(rdb_type >= RDB_TYPE_STREAM_LISTPACKS_3, cg));\n    }\n  }\n\n  PushToConsumerIfNeeded(FlushState::kFlushEndEntry);\n\n  return error_code{};\n}\n\nerror_code RdbSerializer::SaveJsonObject(const PrimeValue& pv) {\n  auto json_string = pv.GetJson()->to_string();\n  return SaveString(json_string);\n}\n\nstd::error_code RdbSerializer::SaveSBFObject(const PrimeValue& pv) {\n  SBF* sbf = pv.GetSBF();\n\n  // options to allow format mutations in the future.\n  RETURN_ON_ERR(SaveLen(0));  // options - reserved\n  RETURN_ON_ERR(SaveBinaryDouble(sbf->grow_factor()));\n  RETURN_ON_ERR(SaveBinaryDouble(sbf->fp_probability()));\n  RETURN_ON_ERR(SaveLen(sbf->prev_size()));\n  RETURN_ON_ERR(SaveLen(sbf->current_size()));\n  RETURN_ON_ERR(SaveLen(sbf->max_capacity()));\n  RETURN_ON_ERR(SaveLen(sbf->num_filters()));\n\n  for (unsigned i = 0; i < sbf->num_filters(); ++i) {\n    RETURN_ON_ERR(SaveLen(sbf->hashfunc_cnt(i)));\n\n    string_view blob = sbf->data(i);\n    if (absl::GetFlag(FLAGS_rdb_sbf_chunked)) {\n      RETURN_ON_ERR(SaveLen(blob.size()));\n\n      for (size_t offset = 0; offset < blob.size(); offset += kFilterChunkSize) {\n        size_t chunk_len = std::min(kFilterChunkSize, blob.size() - offset);\n        RETURN_ON_ERR(SaveString(blob.substr(offset, chunk_len)));\n      }\n    } else {\n      RETURN_ON_ERR(SaveString(blob));\n    }\n\n    FlushState flush_state = FlushState::kFlushMidEntry;\n    if ((i + 1) == sbf->num_filters())\n      flush_state = FlushState::kFlushEndEntry;\n    PushToConsumerIfNeeded(flush_state);\n  }\n\n  return {};\n}\n\nstd::error_code RdbSerializer::SaveCMSObject(const PrimeValue& pv) {\n  CMS* cms = pv.GetCMS();\n\n  RETURN_ON_ERR(SaveLen(cms->width()));\n  RETURN_ON_ERR(SaveLen(cms->depth()));\n  RETURN_ON_ERR(SaveLen(cms->total_count()));\n\n  size_t num_counters = cms->NumCounters();\n  const int64_t* data = cms->Data();\n\n  // Serialize counters as little-endian 64-bit values\n  std::vector<uint64_t> buf(num_counters);\n  for (size_t i = 0; i < num_counters; ++i) {\n    absl::little_endian::Store64(&buf[i], static_cast<uint64_t>(data[i]));\n  }\n  RETURN_ON_ERR(\n      WriteRaw(Bytes{reinterpret_cast<const uint8_t*>(buf.data()), buf.size() * sizeof(uint64_t)}));\n\n  return {};\n}\n\n/* Save a long long value as either an encoded string or a string. */\nerror_code RdbSerializer::SaveLongLongAsString(int64_t value) {\n  uint8_t buf[32];\n  unsigned enclen = EncodeInteger(value, buf);\n  if (enclen > 0) {\n    return WriteRaw(Bytes{buf, enclen});\n  }\n\n  /* Encode as string */\n  enclen = ll2string((char*)buf, 32, value);\n  DCHECK_LT(enclen, 32u);\n\n  RETURN_ON_ERR(SaveLen(enclen));\n  return WriteRaw(Bytes{buf, enclen});\n}\n\n/* Saves a double for RDB 8 or greater, where IE754 binary64 format is assumed.\n * We just make sure the integer is always stored in little endian, otherwise\n * the value is copied verbatim from memory to disk.\n *\n * Return -1 on error, the size of the serialized value on success. */\nerror_code RdbSerializer::SaveBinaryDouble(double val) {\n  static_assert(sizeof(val) == 8);\n  const uint64_t* src = reinterpret_cast<const uint64_t*>(&val);\n  uint8_t buf[8];\n  absl::little_endian::Store64(buf, *src);\n\n  return WriteRaw(Bytes{buf, sizeof(buf)});\n}\n\nerror_code RdbSerializer::SaveStreamPEL(rax* pel, bool nacks) {\n  /* Number of entries in the PEL. */\n\n  RETURN_ON_ERR(SaveLen(raxSize(pel)));\n\n  /* Save each entry. */\n  raxIterator ri;\n  raxStart(&ri, pel);\n  raxSeek(&ri, \"^\", NULL, 0);\n  auto cleanup = absl::MakeCleanup([&] { raxStop(&ri); });\n\n  while (raxNext(&ri)) {\n    /* We store IDs in raw form as 128 big big endian numbers, like\n     * they are inside the radix tree key. */\n    RETURN_ON_ERR(WriteRaw(Bytes{ri.key, sizeof(streamID)}));\n\n    if (nacks) {\n      streamNACK* nack = (streamNACK*)ri.data;\n      uint8_t buf[8];\n      absl::little_endian::Store64(buf, nack->delivery_time);\n      RETURN_ON_ERR(WriteRaw(buf));\n      RETURN_ON_ERR(SaveLen(nack->delivery_count));\n\n      /* We don't save the consumer name: we'll save the pending IDs\n       * for each consumer in the consumer PEL, and resolve the consumer\n       * at loading time. */\n    }\n  }\n\n  return error_code{};\n}\n\nerror_code RdbSerializer::SaveStreamConsumers(bool save_active, streamCG* cg) {\n  /* Number of consumers in this consumer group. */\n\n  RETURN_ON_ERR(SaveLen(raxSize(cg->consumers)));\n\n  /* Save each consumer. */\n  raxIterator ri;\n  raxStart(&ri, cg->consumers);\n  raxSeek(&ri, \"^\", NULL, 0);\n  auto cleanup = absl::MakeCleanup([&] { raxStop(&ri); });\n  uint8_t buf[8];\n\n  while (raxNext(&ri)) {\n    streamConsumer* consumer = (streamConsumer*)ri.data;\n\n    /* Consumer name. */\n    RETURN_ON_ERR(SaveString(ri.key, ri.key_len));\n\n    /* seen time. */\n    absl::little_endian::Store64(buf, consumer->seen_time);\n    RETURN_ON_ERR(WriteRaw(buf));\n\n    if (save_active) {\n      /* Active time. */\n      absl::little_endian::Store64(buf, consumer->active_time);\n      RETURN_ON_ERR(WriteRaw(buf));\n    }\n    /* Consumer PEL, without the ACKs (see last parameter of the function\n     * passed with value of 0), at loading time we'll lookup the ID\n     * in the consumer group global PEL and will put a reference in the\n     * consumer local PEL. */\n\n    RETURN_ON_ERR(SaveStreamPEL(consumer->pel, false));\n  }\n\n  return error_code{};\n}\n\nerror_code RdbSerializer::SendEofAndChecksum() {\n  VLOG(2) << \"SendEof\";\n  /* EOF opcode */\n  RETURN_ON_ERR(WriteOpcode(RDB_OPCODE_EOF));\n\n  /* CRC64 checksum. It will be zero if checksum computation is disabled, the\n   * loading code skips the check in this case. */\n  uint8_t buf[8];\n  uint64_t chksum = 0;\n\n  absl::little_endian::Store64(buf, chksum);\n  return WriteRaw(buf);\n}\n\nerror_code RdbSerializer::SendJournalOffset(uint64_t journal_offset) {\n  VLOG(2) << \"SendJournalOffset\";\n  RETURN_ON_ERR(WriteOpcode(RDB_OPCODE_JOURNAL_OFFSET));\n  uint8_t buf[sizeof(uint64_t)];\n  absl::little_endian::Store64(buf, journal_offset);\n  return WriteRaw(buf);\n}\n\nerror_code RdbSerializer::SaveHNSWEntry(const search::HnswNodeData& node,\n                                        absl::Span<uint8_t> tmp_buf) {\n  // Binary format using little-endian encoding for efficiency:\n  // - internal_id: 4 bytes (uint32_t)\n  // - global_id: 8 bytes (uint64_t)\n  // - level: 4 bytes (int)\n  // - for each level (0 to level): links_num (4 bytes) + links (4 bytes each)\n\n  size_t total_size = node.TotalSize();\n  DCHECK_LE(total_size, tmp_buf.size());\n  uint8_t* ptr = tmp_buf.data();\n\n  absl::little_endian::Store32(ptr, static_cast<uint32_t>(node.internal_id));\n  ptr += 4;\n  absl::little_endian::Store64(ptr, node.global_id);\n  ptr += 8;\n  absl::little_endian::Store32(ptr, static_cast<uint32_t>(node.level));\n  ptr += 4;\n\n  for (const auto& level_links : node.levels_links) {\n    absl::little_endian::Store32(ptr, static_cast<uint32_t>(level_links.size()));\n    ptr += 4;\n    for (uint32_t link : level_links) {\n      absl::little_endian::Store32(ptr, link);\n      ptr += 4;\n    }\n  }\n\n  return WriteRaw(Bytes{tmp_buf.data(), total_size});\n}\n\nerror_code RdbSerializerBase::SendFullSyncCut() {\n  VLOG(1) << \"SendFullSyncCut\";\n  RETURN_ON_ERR(WriteOpcode(RDB_OPCODE_FULLSYNC_END));\n\n  // RDB_OPCODE_FULLSYNC_END followed by 8 bytes of 0.\n  // The reason for this is that some opcodes require to have at least 8 bytes of data\n  // in the read buffer when consuming the rdb data, and since RDB_OPCODE_FULLSYNC_END is one of\n  // the last opcodes sent to replica, we respect this requirement by sending a blob of 8 bytes.\n  uint8_t buf[8] = {0};\n  return WriteRaw(buf);\n}\n\nstd::error_code RdbSerializerBase::WriteOpcode(uint8_t opcode) {\n  return WriteRaw(::io::Bytes{&opcode, 1});\n}\n\nsize_t RdbSerializerBase::GetBufferCapacity() const {\n  return mem_buf_.Capacity();\n}\n\nsize_t RdbSerializerBase::GetTempBufferSize() const {\n  return tmp_buf_.size();\n}\n\nerror_code RdbSerializerBase::WriteRaw(const io::Bytes& buf) {\n  mem_buf_.Reserve(mem_buf_.InputLen() + buf.size());\n  IoBuf::Bytes dest = mem_buf_.AppendBuffer();\n  memcpy(dest.data(), buf.data(), buf.size());\n  mem_buf_.CommitWrite(buf.size());\n  return error_code{};\n}\n\nstring RdbSerializerBase::Flush(RdbSerializerBase::FlushState flush_state) {\n  auto bytes = PrepareFlush(flush_state);\n  if (bytes.empty())\n    return {};\n\n  if (bytes.size() > serialization_peak_bytes_) {\n    serialization_peak_bytes_ = bytes.size();\n  }\n\n  DVLOG(2) << \"FlushToSink \" << bytes.size() << \" bytes\";\n\n  string result(io::View(bytes));\n\n  mem_buf_.ConsumeInput(bytes.size());\n\n  return result;\n}\n\nstring RdbSerializer::Flush(FlushState flush_state) {\n  string res = RdbSerializerBase::Flush(flush_state);\n\n  // After every flush we should write the DB index again because the blobs in the channel are\n  // interleaved and multiple savers can correspond to a single writer (in case of single file rdb\n  // snapshot)\n  last_entry_db_index_ = kInvalidDbId;\n\n  return res;\n}\n\nnamespace {\nusing VersionBuffer = std::array<char, sizeof(uint16_t)>;\nusing CrcBuffer = std::array<char, sizeof(uint64_t)>;\n\nVersionBuffer MakeRdbVersion() {\n  VersionBuffer buf;\n  buf[0] = RDB_SER_VERSION & 0xff;\n  buf[1] = (RDB_SER_VERSION >> 8) & 0xff;\n  return buf;\n}\n\nCrcBuffer MakeCheckSum(std::string_view dump_res, bool ignore_crc) {\n  uint64_t chksum =\n      ignore_crc ? 0 : crc64(0, reinterpret_cast<const uint8_t*>(dump_res.data()), dump_res.size());\n  CrcBuffer buf;\n  absl::little_endian::Store64(buf.data(), chksum);\n  return buf;\n}\n\nvoid AppendFooter(bool ignore_crc, string* dest) {\n  auto to_bytes = [dest](const auto& buf) { dest->append(buf.data(), buf.size()); };\n\n  /* Write the footer, this is how it looks like:\n   * ----------------+---------------------+---------------+\n   * ... RDB payload | 2 bytes RDB version | 8 bytes CRC64 |\n   * ----------------+---------------------+---------------+\n   * RDB version and CRC are both in little endian.\n   */\n  const auto ver = MakeRdbVersion();\n  to_bytes(ver);\n  const auto crc = MakeCheckSum(*dest, ignore_crc);\n  to_bytes(crc);\n}\n}  // namespace\n\nstring RdbSerializerBase::DumpValue(RdbSerializer* serializer, const PrimeValue& obj,\n                                    bool ignore_crc) {\n  CompressionMode serializer_used_compression_mode = serializer->compression_mode_;\n  if (serializer_used_compression_mode != CompressionMode::NONE) {\n    serializer->SetCompressionMode(CompressionMode::SINGLE_ENTRY);\n  }\n\n  // According to Redis code we need to\n  // 1. Save the value itself - without the key\n  // 2. Save footer: this include the RDB version and the CRC value for the message\n  auto type = RdbObjectType(obj);\n  DVLOG(2) << \"We are going to dump object type: \" << int(type);\n\n  std::error_code ec = serializer->WriteOpcode(type);\n  CHECK(!ec);\n  ec = serializer->SaveValue(obj);\n  CHECK(!ec);  // make sure that fully was successful\n  string res = serializer->Flush(RdbSerializerBase::FlushState::kFlushMidEntry);\n  CHECK(!res.empty());             // make sure that fully was successful\n  AppendFooter(ignore_crc, &res);  // version and crc\n  CHECK_GT(res.size(), 10u);\n\n  serializer->SetCompressionMode(serializer_used_compression_mode);\n  return res;\n}\n\nstring RdbSerializerBase::DumpValue(const PrimeValue& obj, bool ignore_crc) {\n  RdbSerializer serializer(GetDefaultCompressionMode());\n  return DumpValue(&serializer, obj, ignore_crc);\n}\n\nsize_t RdbSerializerBase::SerializedLen() const {\n  return mem_buf_.InputLen();\n}\n\nio::Bytes RdbSerializerBase::PrepareFlush(RdbSerializerBase::FlushState flush_state) {\n  size_t sz = mem_buf_.InputLen();\n  if (sz == 0)\n    return {};\n\n  bool is_last_chunk = flush_state == FlushState::kFlushEndEntry;\n  VLOG(2) << \"PrepareFlush:\" << is_last_chunk << \" \" << number_of_chunks_;\n  if (is_last_chunk && number_of_chunks_ == 0) {\n    if (compression_mode_ == CompressionMode::MULTI_ENTRY_ZSTD ||\n        compression_mode_ == CompressionMode::MULTI_ENTRY_LZ4) {\n      CompressBlob();\n    }\n  }\n\n  number_of_chunks_ = is_last_chunk ? 0 : (number_of_chunks_ + 1);\n\n  return mem_buf_.InputBuffer();\n}\n\nerror_code RdbSerializerBase::WriteJournalEntry(std::string_view serialized_entry) {\n  VLOG(2) << \"WriteJournalEntry\";\n  RETURN_ON_ERR(WriteOpcode(RDB_OPCODE_JOURNAL_BLOB));\n  RETURN_ON_ERR(SaveLen(1));\n  RETURN_ON_ERR(SaveString(serialized_entry));\n  return error_code{};\n}\n\nerror_code RdbSerializerBase::SaveString(string_view val) {\n  /* Try integer encoding */\n  if (val.size() <= 11) {\n    uint8_t buf[16];\n\n    unsigned enclen = TryIntegerEncoding(val, buf);\n    if (enclen > 0) {\n      return WriteRaw(Bytes{buf, unsigned(enclen)});\n    }\n  }\n\n  /* Try LZF compression - under 20 bytes it's unable to compress even\n   * aaaaaaaaaaaaaaaaaa so skip it */\n  size_t len = val.size();\n  if ((compression_mode_ == CompressionMode::SINGLE_ENTRY) && (len > 20)) {\n    size_t comprlen, outlen = len;\n    tmp_buf_.resize(outlen + 1);\n\n    // Due to stack constraints im fibers we can not allow large arrays on stack.\n    // Therefore I am lazily allocating it on heap. It's not fixed in quicklist.\n    if (!lzf_) {\n      lzf_.reset(new LZF_HSLOT[1 << HLOG]);\n    }\n\n    /* We require at least 8 bytes compression for this to be worth it */\n    comprlen = lzf_compress(val.data(), len, tmp_buf_.data(), outlen, lzf_.get());\n    if (comprlen > 0 && comprlen < len - 8 && comprlen < size_t(len * 0.85)) {\n      return SaveLzfBlob(Bytes{tmp_buf_.data(), comprlen}, len);\n    }\n  }\n\n  /* Store verbatim */\n  RETURN_ON_ERR(SaveLen(len));\n  if (len > 0) {\n    Bytes b{reinterpret_cast<const uint8_t*>(val.data()), val.size()};\n    RETURN_ON_ERR(WriteRaw(b));\n  }\n  return error_code{};\n}\n\nerror_code RdbSerializerBase::SaveLen(size_t len) {\n  uint8_t buf[16];\n  unsigned enclen = WritePackedUInt(len, buf);\n  return WriteRaw(Bytes{buf, enclen});\n}\n\nerror_code RdbSerializerBase::SaveLzfBlob(const io::Bytes& src, size_t uncompressed_len) {\n  /* Data compressed! Let's save it on disk */\n  uint8_t opcode = (RDB_ENCVAL << 6) | RDB_ENC_LZF;\n  RETURN_ON_ERR(WriteOpcode(opcode));\n  RETURN_ON_ERR(SaveLen(src.size()));\n  RETURN_ON_ERR(SaveLen(uncompressed_len));\n  RETURN_ON_ERR(WriteRaw(src));\n\n  return error_code{};\n}\n\nAlignedBuffer::AlignedBuffer(size_t cap, ::io::Sink* upstream)\n    : capacity_(cap), upstream_(upstream) {\n  aligned_buf_ = (char*)mi_malloc_aligned(kBufLen, 4_KB);\n}\n\nAlignedBuffer::~AlignedBuffer() {\n  mi_free(aligned_buf_);\n}\n\nio::Result<size_t> AlignedBuffer::WriteSome(const iovec* v, uint32_t len) {\n  size_t total_len = 0;\n  uint32_t vindx = 0;\n\n  for (; vindx < len; ++vindx) {\n    auto item = v[vindx];\n    total_len += item.iov_len;\n\n    while (buf_offs_ + item.iov_len > capacity_) {\n      size_t to_write = capacity_ - buf_offs_;\n      memcpy(aligned_buf_ + buf_offs_, item.iov_base, to_write);\n      iovec ivec{.iov_base = aligned_buf_, .iov_len = capacity_};\n      error_code ec = upstream_->Write(&ivec, 1);\n      if (ec)\n        return nonstd::make_unexpected(ec);\n\n      item.iov_len -= to_write;\n      item.iov_base = reinterpret_cast<char*>(item.iov_base) + to_write;\n      buf_offs_ = 0;\n    }\n\n    DCHECK_GT(item.iov_len, 0u);\n    memcpy(aligned_buf_ + buf_offs_, item.iov_base, item.iov_len);\n    buf_offs_ += item.iov_len;\n  }\n\n  return total_len;\n}\n\n// Note that it may write more than AlignedBuffer has at this point since it rounds up the length\n// to the nearest page boundary.\nerror_code AlignedBuffer::Flush() {\n  size_t len = (buf_offs_ + kAmask) & (~kAmask);\n  if (len == 0)\n    return error_code{};\n\n  iovec ivec{.iov_base = aligned_buf_, .iov_len = len};\n  buf_offs_ = 0;\n\n  return upstream_->Write(&ivec, 1);\n}\n\n// Ensures SliceSnapshot is destroyed on its owning shard thread.\nstruct OwnerThreadDeleter {\n  ShardId owner_sid;\n\n  OwnerThreadDeleter() : owner_sid(0) {\n  }\n\n  explicit OwnerThreadDeleter(ShardId sid) : owner_sid(sid) {\n  }\n\n  static OwnerThreadDeleter FromShard(EngineShard* shard) {\n    return OwnerThreadDeleter(shard->shard_id());\n  }\n\n  void operator()(SliceSnapshot* ptr) const {\n    if (!ptr)\n      return;\n\n    if (EngineShard::tlocal() && EngineShard::tlocal()->shard_id() == owner_sid) {\n      delete ptr;\n      return;\n    }\n\n    shard_set->Await(owner_sid, [ptr] { delete ptr; });\n  }\n};\n\nusing SnapshotPtr = std::unique_ptr<SliceSnapshot, OwnerThreadDeleter>;\n\nclass RdbSaver::Impl final : public SliceSnapshot::SnapshotDataConsumerInterface {\n private:\n  void CleanShardSnapshots();\n  SnapshotPtr CreateSliceSnapshot(EngineShard* shard, DbSlice* db_slice, ExecutionState* cntx);\n\n public:\n  // We pass K=sz to say how many producers are pushing data in order to maintain\n  // correct closing semantics - channel is closing when K producers marked it as closed.\n  Impl(bool align_writes, unsigned producers_len, CompressionMode compression_mode,\n       SaveMode save_mode, io::Sink* sink, DflyVersion replica_dfly_version);\n\n  ~Impl();\n\n  void StartSnapshotting(bool stream_journal, ExecutionState* cntx, EngineShard* shard);\n\n  void StopSnapshotting(EngineShard* shard);\n  void WaitForSnapshottingFinish(EngineShard* shard);\n\n  // Pushes snapshot data. Called from SliceSnapshot\n  void ConsumeData(std::string data, ExecutionState* cntx) override;\n  // Finalizes the snapshot writing. Called from SliceSnapshot\n  void Finalize() override;\n\n  // used only for legacy rdb save flows.\n  error_code ConsumeChannel(const ExecutionState* cll);\n\n  void FillFreqMap(RdbTypeFreqMap* dest) const;\n\n  error_code SaveAuxFieldStrStr(string_view key, string_view val);\n\n  void CancelInShard(EngineShard* shard);\n\n  size_t GetTotalBuffersSize() const;\n\n  RdbSaver::SnapshotStats GetCurrentSnapshotProgress() const;\n\n  error_code FlushSerializer();\n\n  error_code FlushSink() {\n    return aligned_buf_ ? aligned_buf_->Flush() : error_code{};\n  }\n\n  size_t Size() const {\n    return shard_snapshots_.size();\n  }\n\n  RdbSerializer* serializer() {\n    return &meta_serializer_;\n  }\n\n  int64_t last_write_ts() const {\n    return last_write_time_ns_;\n  }\n\n private:\n  error_code WriteRecord(io::Bytes src);\n\n  SnapshotPtr& GetSnapshot(EngineShard* shard);\n\n  io::Sink* sink_;\n  int64_t last_write_time_ns_ = -1;  // last write call.\n  vector<SnapshotPtr> shard_snapshots_;\n\n  // used for serializing non-body components in the calling fiber.\n  RdbSerializer meta_serializer_;\n  using RecordChannel = SizeTrackingChannel<string, base::mpmc_bounded_queue<string>>;\n  std::optional<RecordChannel> channel_;\n  std::optional<AlignedBuffer> aligned_buf_;\n\n  // Single entry compression is compatible with redis rdb snapshot\n  // Multi entry compression is available only on df snapshot, this will\n  // make snapshot size smaller and opreation faster.\n  CompressionMode compression_mode_;\n  SaveMode save_mode_;\n  DflyVersion replica_dfly_version_ = DflyVersion::CURRENT_VER;\n};\n\n// We pass K=sz to say how many producers are pushing data in order to maintain\n// correct closing semantics - channel is closing when K producers marked it as closed.\nRdbSaver::Impl::Impl(bool align_writes, unsigned producers_len, CompressionMode compression_mode,\n                     SaveMode sm, io::Sink* sink, DflyVersion replica_dfly_version)\n    : sink_(sink),\n      shard_snapshots_(producers_len),\n      meta_serializer_(CompressionMode::NONE),  // Note: I think there is not need for compression\n                                                // at all in meta serializer\n      compression_mode_(compression_mode) {\n  if (align_writes) {\n    aligned_buf_.emplace(kBufLen, sink);\n    sink_ = &aligned_buf_.value();\n  }\n  if (sm == SaveMode::RDB) {\n    channel_.emplace(kChannelLen, producers_len);\n  }\n  save_mode_ = sm;\n  replica_dfly_version_ = replica_dfly_version;\n}\n\nvoid RdbSaver::Impl::CleanShardSnapshots() {\n  // Deleter dispatches destruction to the owning shard thread when needed\n  shard_snapshots_.clear();\n}\n\nRdbSaver::Impl::~Impl() {\n  CleanShardSnapshots();\n}\n\nerror_code RdbSaver::Impl::SaveAuxFieldStrStr(string_view key, string_view val) {\n  auto& ser = meta_serializer_;\n  RETURN_ON_ERR(ser.WriteOpcode(RDB_OPCODE_AUX));\n  RETURN_ON_ERR(ser.SaveString(key));\n  RETURN_ON_ERR(ser.SaveString(val));\n\n  return error_code{};\n}\n\nerror_code RdbSaver::Impl::ConsumeChannel(const ExecutionState* es) {\n  error_code io_error;\n  string record;\n\n  auto& stats = ServerState::tlocal()->stats;\n  DCHECK(channel_.has_value());\n  // we can not exit on io-error since we spawn fibers that push data.\n  // TODO: we may signal them to stop processing and exit asap in case of the error.\n  while (channel_->Pop(record)) {\n    if (io_error || (!es->IsRunning()))\n      continue;\n\n    do {\n      if (!es->IsRunning())\n        continue;\n\n      auto start = absl::GetCurrentTimeNanos();\n      io_error = WriteRecord(io::Buffer(record));\n      if (io_error) {\n        break;  // from the inner TryPop loop.\n      }\n\n      auto delta_usec = (absl::GetCurrentTimeNanos() - start) / 1'000;\n      stats.rdb_save_usec += delta_usec;\n      stats.rdb_save_count++;\n    } while ((channel_->TryPop(record)));\n  }  // while (channel_.Pop())\n\n  for (auto& ptr : shard_snapshots_) {\n    ptr->WaitSnapshotting();\n  }\n  VLOG(1) << \"ConsumeChannel finished \" << io_error;\n\n  DCHECK(!channel_->TryPop(record));\n\n  return io_error;\n}\n\nerror_code RdbSaver::Impl::WriteRecord(io::Bytes src) {\n  // For huge values, we break them up into chunks of upto several MBs to send in a single call,\n  // so we could be more responsive.\n  error_code ec;\n  size_t start_size = src.size();\n  last_write_time_ns_ = absl::GetCurrentTimeNanos();\n  do {\n    io::Bytes part = src.subspan(0, 8_MB);\n    src.remove_prefix(part.size());\n\n    ec = sink_->Write(part);\n\n    int64_t now = absl::GetCurrentTimeNanos();\n    unsigned delta_ms = (now - last_write_time_ns_) / 1000'000;\n    last_write_time_ns_ = now;\n\n    // Log extreme timings into the log for visibility.\n    LOG_IF(INFO, delta_ms > 1000) << \"Channel write took \" << delta_ms << \" ms while writing \"\n                                  << part.size() << \"/\" << start_size;\n    if (ec) {\n      LOG(INFO) << \"Error writing to rdb sink \" << ec.message();\n      break;\n    }\n  } while (!src.empty());\n  last_write_time_ns_ = -1;\n  return ec;\n}\n\nvoid RdbSaver::Impl::StartSnapshotting(bool stream_journal, ExecutionState* cntx,\n                                       EngineShard* shard) {\n  auto& s = GetSnapshot(shard);\n  auto& db_slice = namespaces->GetDefaultNamespace().GetDbSlice(shard->shard_id());\n\n  s = CreateSliceSnapshot(shard, &db_slice, cntx);\n\n  const auto allow_flush = (save_mode_ != SaveMode::RDB) ? SliceSnapshot::SnapshotFlush::kAllow\n                                                         : SliceSnapshot::SnapshotFlush::kDisallow;\n\n  s->Start(stream_journal, allow_flush);\n}\n\nSnapshotPtr RdbSaver::Impl::CreateSliceSnapshot(EngineShard* shard, DbSlice* db_slice,\n                                                ExecutionState* cntx) {\n  return SnapshotPtr(\n      new SliceSnapshot(compression_mode_, db_slice, this, cntx, replica_dfly_version_),\n      OwnerThreadDeleter::FromShard(shard));\n}\n\n// called on save flow\nvoid RdbSaver::Impl::WaitForSnapshottingFinish(EngineShard* shard) {\n  auto& snapshot = GetSnapshot(shard);\n  CHECK(snapshot);\n  snapshot->WaitSnapshotting();\n}\n\nvoid RdbSaver::Impl::ConsumeData(std::string data, ExecutionState* cntx) {\n  if (!cntx->IsRunning()) {\n    return;\n  }\n  if (channel_) {  // Rdb write to channel\n    channel_->Push(std::move(data));\n  } else {  // Write directly to socket\n    auto ec = WriteRecord(io::Buffer(data));\n    if (ec) {\n      cntx->ReportError(ec);\n    }\n  }\n}\n\nvoid RdbSaver::Impl::Finalize() {\n  if (channel_) {\n    channel_->StartClosing();\n  }\n}\n\n// called from replication flow\nvoid RdbSaver::Impl::StopSnapshotting(EngineShard* shard) {\n  auto& snapshot = GetSnapshot(shard);\n  CHECK(snapshot);\n  snapshot->FinalizeJournalStream(false);\n}\n\nvoid RdbSaver::Impl::CancelInShard(EngineShard* shard) {\n  auto& snapshot = GetSnapshot(shard);\n  if (snapshot) {  // Cancel can be called before snapshotting started.\n    snapshot->FinalizeJournalStream(true);\n  }\n}\n\n// This function is called from connection thread when info command is invoked.\n// All accessed variableds must be thread safe, as they are fetched not from the rdb saver thread.\nsize_t RdbSaver::Impl::GetTotalBuffersSize() const {\n  std::atomic<size_t> channel_bytes{0};\n  std::atomic<size_t> serializer_bytes{0};\n\n  auto cb = [this, &channel_bytes, &serializer_bytes](ShardId sid) {\n    auto& snapshot = shard_snapshots_[sid];\n    // before create a snapshot we save header so shard_snapshots_ are vector of nullptr until we\n    // start snapshots saving\n    if (!snapshot)\n      return;\n    if (channel_.has_value())\n      channel_bytes.fetch_add(channel_->GetSize(), memory_order_relaxed);\n    serializer_bytes.store(snapshot->GetBufferCapacity() + snapshot->GetTempBuffersSize(),\n                           memory_order_relaxed);\n  };\n\n  if (shard_snapshots_.size() == 1) {\n    cb(0);\n  } else {\n    shard_set->RunBriefInParallel([&](EngineShard* es) { cb(es->shard_id()); });\n  }\n\n  VLOG(2) << \"channel_bytes:\" << channel_bytes.load(memory_order_relaxed)\n          << \" serializer_bytes: \" << serializer_bytes.load(memory_order_relaxed);\n  return channel_bytes.load(memory_order_relaxed) + serializer_bytes.load(memory_order_relaxed);\n}\n\nRdbSaver::SnapshotStats RdbSaver::Impl::GetCurrentSnapshotProgress() const {\n  std::vector<RdbSaver::SnapshotStats> results(shard_snapshots_.size());\n\n  auto cb = [this, &results](ShardId sid) {\n    auto& snapshot = shard_snapshots_[sid];\n    // before create a snapshot we save header so shard_snapshots_ are vector of nullptr until we\n    // start snapshots saving\n    if (!snapshot)\n      return;\n    results[sid] = snapshot->GetCurrentSnapshotProgress();\n  };\n\n  if (shard_snapshots_.size() == 1) {\n    cb(0);\n    return results[0];\n  }\n\n  shard_set->RunBriefInParallel([&](EngineShard* es) { cb(es->shard_id()); });\n  RdbSaver::SnapshotStats init{0, 0};\n  return std::accumulate(\n      results.begin(), results.end(), init, [](auto init, auto pr) -> RdbSaver::SnapshotStats {\n        return {init.current_keys + pr.current_keys, init.total_keys + pr.total_keys};\n      });\n}\n\nerror_code RdbSaver::Impl::FlushSerializer() {\n  last_write_time_ns_ = absl::GetCurrentTimeNanos();\n  string blob = serializer()->Flush(RdbSerializerBase::FlushState::kFlushMidEntry);\n  error_code ec;\n  if (!blob.empty()) {\n    ec = sink_->Write(io::Buffer(blob));\n  }\n  last_write_time_ns_ = -1;\n  return ec;\n}\n\nnamespace {\n\n// Collect search index definitions and optionally HNSW metadata.\n// search_indices always gets simple \"index_name cmd\" restore commands.\n// For summary shards, hnsw_index_metadata gets JSON with HNSW graph metadata,\n// and search_synonyms gets synonym group restore commands.\nvoid CollectSearchIndices([[maybe_unused]] const EngineShard& shard,\n                          [[maybe_unused]] StringVec* search_indices,\n                          [[maybe_unused]] StringVec* search_synonyms,\n                          [[maybe_unused]] StringVec* hnsw_index_metadata,\n                          [[maybe_unused]] bool is_summary) {\n#ifdef WITH_SEARCH\n  auto* indices = shard.search_indices();\n  for (const auto& index_name : indices->GetIndexNames()) {\n    auto* index = indices->GetIndex(index_name);\n    auto index_info = index->GetInfo();\n\n    // Always store the simple restore command format\n    std::string restore_cmd = absl::StrCat(index_name, \" \", index_info.BuildRestoreCommand());\n    search_indices->emplace_back(std::move(restore_cmd));\n\n    if (!is_summary)\n      continue;\n\n    // Collect HNSW metadata for vector field (first one found), for now we don't support multiple\n    // vector fields per index serialization\n    for (const auto& [fident, finfo] : index_info.base_index.schema.fields) {\n      if (finfo.type == search::SchemaField::VECTOR &&\n          !(finfo.flags & search::SchemaField::NOINDEX)) {\n        if (auto hnsw_index = GlobalHnswIndexRegistry::Instance().Get(index_name, finfo.short_name);\n            hnsw_index) {\n          auto meta = hnsw_index->GetMetadata();\n          TmpJson meta_json;\n          meta_json[\"index_name\"] = index_name;\n          meta_json[\"field_name\"] = finfo.short_name;\n          meta_json[\"max_elements\"] = meta.max_elements;\n          meta_json[\"cur_element_count\"] = meta.cur_element_count;\n          meta_json[\"maxlevel\"] = meta.maxlevel;\n          meta_json[\"enterpoint_node\"] = meta.enterpoint_node;\n          hnsw_index_metadata->emplace_back(meta_json.to_string());\n          break;\n        }\n      }\n    }\n\n    // Save synonym groups\n    const auto& synonym_groups = index->GetSynonyms().GetGroups();\n    for (const auto& [group_id, terms] : synonym_groups) {\n      if (!terms.empty()) {\n        std::string syn_cmd =\n            absl::StrCat(index_name, \" \", group_id, \" \", absl::StrJoin(terms, \" \"));\n        search_synonyms->emplace_back(std::move(syn_cmd));\n      }\n    }\n  }\n#endif\n}\n\n}  // namespace\n\nRdbSaver::GlobalData RdbSaver::GetGlobalData(const Service* service, bool is_summary) {\n  StringVec script_bodies, search_indices, search_synonyms, hnsw_index_metadata;\n  size_t table_mem_result = 0;\n\n  if (!is_summary) {\n    shard_set->RunBriefInParallel([&](EngineShard* shard) {\n      if (shard->shard_id() == 0)\n        CollectSearchIndices(*shard, &search_indices, &search_synonyms, &hnsw_index_metadata,\n                             is_summary);\n    });\n    return RdbSaver::GlobalData{std::move(script_bodies), std::move(search_indices),\n                                std::move(search_synonyms), std::move(hnsw_index_metadata),\n                                table_mem_result};\n  }\n  {\n    // For summary file: collect all global data\n    auto scripts = service->script_mgr()->GetAll();\n    script_bodies.reserve(scripts.size());\n    for (auto& [sha, data] : scripts)\n      script_bodies.push_back(std::move(data.body));\n  }\n\n  atomic<size_t> table_mem{0};\n  shard_set->RunBriefInParallel([&](EngineShard* shard) {\n    if (shard->shard_id() == 0)\n      CollectSearchIndices(*shard, &search_indices, &search_synonyms, &hnsw_index_metadata,\n                           is_summary);\n\n    auto& db_slice = namespaces->GetDefaultNamespace().GetDbSlice(shard->shard_id());\n    size_t shard_table_mem = 0;\n    for (size_t db_id = 0; db_id < db_slice.db_array_size(); ++db_id) {\n      auto* db_table = db_slice.GetDBTable(db_id);\n      if (db_table) {\n        shard_table_mem += db_table->table_memory();\n      }\n    }\n    table_mem.fetch_add(shard_table_mem, memory_order_relaxed);\n  });\n\n  return RdbSaver::GlobalData{std::move(script_bodies), std::move(search_indices),\n                              std::move(search_synonyms), std::move(hnsw_index_metadata),\n                              table_mem.load(memory_order_relaxed)};\n}\n\nvoid RdbSaver::Impl::FillFreqMap(RdbTypeFreqMap* dest) const {\n  for (auto& ptr : shard_snapshots_) {\n    const RdbTypeFreqMap& src_map = ptr->freq_map();\n    for (const auto& k_v : src_map)\n      (*dest)[k_v.first] += k_v.second;\n  }\n}\n\nSnapshotPtr& RdbSaver::Impl::GetSnapshot(EngineShard* shard) {\n  // For single shard configuration, we maintain only one snapshot,\n  // so we do not have to map it via shard_id.\n  unsigned sid = shard_snapshots_.size() == 1 ? 0 : shard->shard_id();\n  CHECK(sid < shard_snapshots_.size());\n  return shard_snapshots_[sid];\n}\n\nRdbSaver::RdbSaver(::io::Sink* sink, SaveMode save_mode, bool align_writes, std::string snapshot_id,\n                   DflyVersion replica_dfly_version)\n    : replica_dfly_version_(replica_dfly_version), snapshot_id_(std::move(snapshot_id)) {\n  CHECK_NOTNULL(sink);\n  CompressionMode compression_mode = GetDefaultCompressionMode();\n  int producer_count = 0;\n  switch (save_mode) {\n    case SaveMode::SUMMARY:\n      producer_count = 0;\n      if (compression_mode >= CompressionMode::SINGLE_ENTRY) {\n        compression_mode_ = CompressionMode::SINGLE_ENTRY;\n      } else {\n        compression_mode_ = CompressionMode::NONE;\n      }\n      break;\n    case SaveMode::SINGLE_SHARD:\n    case SaveMode::SINGLE_SHARD_WITH_SUMMARY:\n      producer_count = 1;\n      compression_mode_ = compression_mode;\n      break;\n    case SaveMode::RDB:\n      producer_count = shard_set->size();\n      if (compression_mode >= CompressionMode::SINGLE_ENTRY) {\n        compression_mode_ = CompressionMode::SINGLE_ENTRY;\n      } else {\n        compression_mode_ = CompressionMode::NONE;\n      }\n      break;\n  }\n  VLOG(1) << \"Rdb save using compression mode:\" << uint32_t(compression_mode_);\n  impl_.reset(new Impl(align_writes, producer_count, compression_mode_, save_mode, sink,\n                       replica_dfly_version_));\n  save_mode_ = save_mode;\n}\n\nRdbSaver::~RdbSaver() {\n  // Decommit local memory.\n  // We create an RdbSaver for each thread, so each one will Decommit for itself.\n  auto* tlocal = ServerState::tlocal();\n  tlocal->DecommitMemory(ServerState::kAllMemory);\n}\n\nvoid RdbSaver::StartSnapshotInShard(bool stream_journal, ExecutionState* cntx, EngineShard* shard) {\n  impl_->StartSnapshotting(stream_journal, cntx, shard);\n}\n\nerror_code RdbSaver::WaitSnapshotInShard(EngineShard* shard) {\n  impl_->WaitForSnapshottingFinish(shard);\n  return SaveEpilog();\n}\n\nerror_code RdbSaver::StopFullSyncInShard(EngineShard* shard) {\n  impl_->StopSnapshotting(shard);\n  return SaveEpilog();\n}\n\nerror_code RdbSaver::SaveHeader(const GlobalData& glob_state) {\n  char magic[16];\n  // We should use RDB_VERSION here from rdb.h when we ditch redis 6 support\n  // For now we serialize to an older version.\n  size_t sz = absl::SNPrintF(magic, sizeof(magic), \"REDIS%04d\", RDB_SER_VERSION);\n  CHECK_EQ(9u, sz);\n\n  RETURN_ON_ERR(impl_->serializer()->WriteRaw(Bytes{reinterpret_cast<uint8_t*>(magic), sz}));\n  RETURN_ON_ERR(SaveAux(glob_state));  // Should be first after magic\n  RETURN_ON_ERR(impl_->FlushSerializer());\n  return error_code{};\n}\n\nerror_code RdbSaver::SaveBody(const ExecutionState& cntx) {\n  RETURN_ON_ERR(impl_->FlushSerializer());\n\n  if (save_mode_ == SaveMode::RDB) {\n    VLOG(1) << \"SaveBody , snapshots count: \" << impl_->Size();\n    error_code io_error = impl_->ConsumeChannel(&cntx);\n    if (io_error) {\n      return io_error;\n    }\n    if (cntx.GetError()) {\n      return cntx.GetError();\n    }\n  } else {\n    DCHECK(save_mode_ == SaveMode::SUMMARY);\n  }\n\n  return SaveEpilog();\n}\n\nvoid RdbSaver::FillFreqMap(RdbTypeFreqMap* freq_map) {\n  freq_map->clear();\n  impl_->FillFreqMap(freq_map);\n}\n\nerror_code RdbSaver::SaveAux(const GlobalData& glob_state) {\n  // Should be first\n  if (!snapshot_id_.empty()) {\n    RETURN_ON_ERR(impl_->SaveAuxFieldStrStr(\"snapshot-id\", snapshot_id_));\n  }\n\n  /* Add a few fields about the state when the RDB was created. */\n  RETURN_ON_ERR(impl_->SaveAuxFieldStrStr(\"redis-ver\", REDIS_VERSION));\n  RETURN_ON_ERR(impl_->SaveAuxFieldStrStr(\"df-ver\", GetVersion()));\n  RETURN_ON_ERR(SaveAuxFieldStrInt(\"redis-bits\", 64));\n\n  RETURN_ON_ERR(SaveAuxFieldStrInt(\"ctime\", time(NULL)));\n  auto used_mem = used_mem_current.load(memory_order_relaxed);\n  VLOG(1) << \"Used memory during save: \" << used_mem;\n  RETURN_ON_ERR(SaveAuxFieldStrInt(\"used-mem\", used_mem));\n  RETURN_ON_ERR(SaveAuxFieldStrInt(\"aof-preamble\", 0));\n\n  // Save lua scripts only in rdb or summary file\n  DCHECK(save_mode_ != SaveMode::SINGLE_SHARD || glob_state.lua_scripts.empty());\n  for (const string& s : glob_state.lua_scripts)\n    RETURN_ON_ERR(impl_->SaveAuxFieldStrStr(\"lua\", s));\n\n  if (save_mode_ == SaveMode::RDB) {\n    if (!glob_state.search_indices.empty())\n      LOG(WARNING) << \"Dragonfly search index data is incompatible with the RDB format\";\n  } else {\n    // Search index definitions - for non-summary shards only sent to replicas >= VER6,\n    // since older replicas only expect search-index from the summary shard.\n    bool send_search_index =\n        (save_mode_ != SaveMode::SINGLE_SHARD) || (replica_dfly_version_ >= DflyVersion::VER6);\n    if (send_search_index) {\n      for (const string& s : glob_state.search_indices)\n        RETURN_ON_ERR(impl_->SaveAuxFieldStrStr(\"search-index\", s));\n    }\n\n    // HNSW index metadata (JSON, summary only) - only for replicas >= VER6\n    if (replica_dfly_version_ >= DflyVersion::VER6) {\n      for (const string& s : glob_state.hnsw_index_metadata)\n        RETURN_ON_ERR(impl_->SaveAuxFieldStrStr(\"hnsw-index-metadata\", s));\n    }\n\n    // Save synonyms only in summary file\n    DCHECK(save_mode_ != SaveMode::SINGLE_SHARD || glob_state.search_synonyms.empty());\n    for (const string& s : glob_state.search_synonyms)\n      RETURN_ON_ERR(impl_->SaveAuxFieldStrStr(\"search-synonyms\", s));\n\n    if (save_mode_ == SaveMode::SINGLE_SHARD_WITH_SUMMARY || save_mode_ == SaveMode::SUMMARY) {\n      // We save the shard id in the summary file, so that we can restore it later.\n      RETURN_ON_ERR(SaveAuxFieldStrInt(\"shard-count\", shard_set->size()));\n      RETURN_ON_ERR(SaveAuxFieldStrInt(\"table-mem\", glob_state.table_used_memory));\n    }\n    if (EngineShard* shard = EngineShard::tlocal(); shard) {\n      RETURN_ON_ERR(SaveAuxFieldStrInt(\"shard-id\", shard->shard_id()));\n    }\n  }\n\n  // TODO: \"repl-stream-db\", \"repl-id\", \"repl-offset\"\n  return error_code{};\n}\n\nerror_code RdbSaver::SaveEpilog() {\n  RETURN_ON_ERR(impl_->serializer()->SendEofAndChecksum());\n\n  RETURN_ON_ERR(impl_->FlushSerializer());\n\n  return impl_->FlushSink();\n}\n\nerror_code RdbSaver::SaveAuxFieldStrInt(string_view key, int64_t val) {\n  char buf[LONG_STR_SIZE];\n  int vlen = ll2string(buf, sizeof(buf), val);\n  return impl_->SaveAuxFieldStrStr(key, string_view(buf, vlen));\n}\n\nvoid RdbSaver::CancelInShard(EngineShard* shard) {\n  impl_->CancelInShard(shard);\n}\n\nsize_t RdbSaver::GetTotalBuffersSize() const {\n  return impl_->GetTotalBuffersSize();\n}\n\nRdbSaver::SnapshotStats RdbSaver::GetCurrentSnapshotProgress() const {\n  return impl_->GetCurrentSnapshotProgress();\n}\n\nint64_t RdbSaver::GetLastWriteTime() const {\n  return impl_->last_write_ts();\n}\n\nvoid RdbSerializerBase::AllocateCompressorOnce() {\n  if (compressor_impl_) {\n    return;\n  }\n  if (compression_mode_ == CompressionMode::MULTI_ENTRY_ZSTD) {\n    compressor_impl_ = detail::CompressorImpl::CreateZstd();\n  } else if (compression_mode_ == CompressionMode::MULTI_ENTRY_LZ4) {\n    compressor_impl_ = detail::CompressorImpl::CreateLZ4();\n  } else {\n    LOG(FATAL) << \"Invalid compression mode \" << unsigned(compression_mode_);\n  }\n}\n\nvoid RdbSerializerBase::CompressBlob() {\n  if (!compression_stats_) {\n    compression_stats_.emplace(CompressionStats{});\n  }\n  Bytes blob_to_compress = mem_buf_.InputBuffer();\n  VLOG(2) << \"CompressBlob size \" << blob_to_compress.size();\n  size_t blob_size = blob_to_compress.size();\n\n  if (blob_size < kMinStrSizeToCompress || blob_size > kMaxStrSizeToCompress) {\n    ++compression_stats_->size_skip_count;\n    return;\n  }\n\n  AllocateCompressorOnce();\n\n  // Compress the data. We copy compressed data once into the internal buffer of compressor_impl_\n  // and then we copy it again into the mem_buf_.\n  //\n  // TODO: it is possible to avoid double copying here by changing the compressor interface,\n  // so that the compressor will accept the output buffer and return the final size. This requires\n  // exposing the additional compress bound interface as well.\n  io::Result<io::Bytes> res = compressor_impl_->Compress(blob_to_compress);\n  if (!res) {\n    ++compression_stats_->compression_failed;\n    return;\n  }\n\n  Bytes compressed_blob = *res;\n  if (compressed_blob.length() > blob_size * kMinCompressionReductionPrecentage) {\n    ++compression_stats_->compression_no_effective;\n    return;\n  }\n\n  // Clear membuf and write the compressed blob to it\n  mem_buf_.ConsumeInput(blob_size);\n  mem_buf_.Reserve(compressed_blob.length() + 1 + 9);  // reserve space for blob + opcode + len\n\n  // First write opcode for compressed string\n  auto dest = mem_buf_.AppendBuffer();\n  uint8_t opcode = compression_mode_ == CompressionMode::MULTI_ENTRY_ZSTD\n                       ? RDB_OPCODE_COMPRESSED_ZSTD_BLOB_START\n                       : RDB_OPCODE_COMPRESSED_LZ4_BLOB_START;\n  dest[0] = opcode;\n  mem_buf_.CommitWrite(1);\n\n  // Write encoded compressed blob len\n  dest = mem_buf_.AppendBuffer();\n  unsigned enclen = WritePackedUInt(compressed_blob.length(), dest);\n  mem_buf_.CommitWrite(enclen);\n\n  // Write compressed blob\n  dest = mem_buf_.AppendBuffer();\n  memcpy(dest.data(), compressed_blob.data(), compressed_blob.length());\n  mem_buf_.CommitWrite(compressed_blob.length());\n  ++compression_stats_->compressed_blobs;\n  auto& stats = ServerState::tlocal()->stats;\n  ++stats.compressed_blobs;\n}\n\nsize_t RdbSerializer::GetTempBufferSize() const {\n  return RdbSerializerBase::GetTempBufferSize() + tmp_str_.size();\n}\n\nvoid RdbSerializer::PushToConsumerIfNeeded(RdbSerializerBase::FlushState flush_state) {\n  if (consume_fun_ && SerializedLen() > flush_threshold_) {\n    string blob = Flush(flush_state);\n    DCHECK(!blob.empty());  // SerializedLen() > 0.\n    consume_fun_(std::move(blob));\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/rdb_save.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/types/span.h>\n\nextern \"C\" {\n#include \"redis/lzfP.h\"\n}\n\n#include <optional>\n\n#include \"base/pod_array.h\"\n#include \"io/io.h\"\n#include \"io/io_buf.h\"\n#include \"server/detail/compressor.h\"\n#include \"server/execution_state.h\"\n#include \"server/journal/serializer.h\"\n#include \"server/journal/types.h\"\n#include \"server/table.h\"\n#include \"server/version.h\"\n\ntypedef struct rax rax;\ntypedef struct streamCG streamCG;\n\nnamespace dfly::search {\nstruct HnswNodeData;\n}  // namespace dfly::search\n\nnamespace dfly {\n\n// keys are RDB_TYPE_xxx constants.\nusing RdbTypeFreqMap = absl::flat_hash_map<unsigned, size_t>;\n\nuint8_t RdbObjectType(const CompactObj& pv);\n\nclass EngineShard;\nclass Service;\n\nclass AlignedBuffer : public ::io::Sink {\n public:\n  using io::Sink::Write;\n\n  AlignedBuffer(size_t cap, ::io::Sink* upstream);\n  ~AlignedBuffer();\n\n  std::error_code Write(std::string_view buf) {\n    return Write(io::Buffer(buf));\n  }\n\n  io::Result<size_t> WriteSome(const iovec* v, uint32_t len) final;\n\n  std::error_code Flush();\n\n  ::io::Sink* upstream() {\n    return upstream_;\n  }\n\n private:\n  size_t capacity_;\n  ::io::Sink* upstream_;\n  char* aligned_buf_ = nullptr;\n\n  off_t buf_offs_ = 0;\n};\n\n// SaveMode for snapshot. Used by RdbSaver to adjust internals.\nenum class SaveMode {\n  SUMMARY,                    // Save only header values (summary.dfs). Expected to read no shards.\n  SINGLE_SHARD,               // Save single shard values (XXXX.dfs). Expected to read one shard.\n  SINGLE_SHARD_WITH_SUMMARY,  // Save single shard value with the global summary. Used in the\n                              // replication's fully sync stage.\n  RDB,                        // Save .rdb file. Expected to read all shards.\n};\n\nenum class CompressionMode : uint8_t { NONE, SINGLE_ENTRY, MULTI_ENTRY_ZSTD, MULTI_ENTRY_LZ4 };\n\nCompressionMode GetDefaultCompressionMode();\n\nusing StringVec = std::vector<std::string>;\n\nclass RdbSaver {\n public:\n  // Global data which doesn't belong to shards and is serialized in header\n  struct GlobalData {\n    const StringVec lua_scripts;          // bodies of lua scripts\n    const StringVec search_indices;       // ft.create commands to re-create search indices\n    const StringVec search_synonyms;      // ft.synupdate commands to restore synonyms\n    const StringVec hnsw_index_metadata;  // HNSW metadata JSON (summary only)\n    size_t table_used_memory = 0;         // total memory used by all tables in all shards\n  };\n\n  // single_shard - true means that we run RdbSaver on a single shard and we do not use\n  // to snapshot all the datastore shards.\n  // single_shard - false, means we capture all the data using a single RdbSaver instance\n  // (corresponds to legacy, redis compatible mode)\n  // if align_writes is true - writes data in aligned chunks of 4KB to fit direct I/O requirements.\n  // snapshot_id - allows to identify that group of files belongs to the same snapshot\n  // replica_dfly_version - upper bound for conditional serialization of new features.\n  explicit RdbSaver(::io::Sink* sink, SaveMode save_mode, bool align_writes,\n                    std::string snapshot_id, DflyVersion replica_dfly_version);\n\n  ~RdbSaver();\n\n  // Initiates the serialization in the shard's thread.\n  // cll allows breaking in the middle.\n  void StartSnapshotInShard(bool stream_journal, ExecutionState* cntx, EngineShard* shard);\n\n  // Stops full-sync serialization for replication in the shard's thread.\n  std::error_code StopFullSyncInShard(EngineShard* shard);\n\n  // Wait for snapshotting finish in shard thread. Called from save flows in shard thread.\n  std::error_code WaitSnapshotInShard(EngineShard* shard);\n\n  // Stores auxiliary (meta) values and header_info\n  std::error_code SaveHeader(const GlobalData& header_info);\n\n  // Writes the RDB file into sink. Waits for the serialization to finish.\n  // Called only for save rdb flow and save df on summary file.\n  std::error_code SaveBody(const ExecutionState& cntx);\n\n  // Fills freq_map with the histogram of rdb types.\n  void FillFreqMap(RdbTypeFreqMap* freq_map);\n\n  void CancelInShard(EngineShard* shard);\n\n  SaveMode Mode() const {\n    return save_mode_;\n  }\n\n  // Get total size of all rdb serializer buffers and items currently placed in channel\n  size_t GetTotalBuffersSize() const;\n\n  struct SnapshotStats {\n    size_t current_keys = 0;\n    size_t total_keys = 0;\n    size_t big_value_preemptions = 0;\n  };\n\n  SnapshotStats GetCurrentSnapshotProgress() const;\n\n  // Fetch global data to be serialized in snapshot.\n  // is_summary: true for summary file (full data with JSON search indices),\n  //             false for per-shard files (only simple search index restore commands)\n  static GlobalData GetGlobalData(const Service* service, bool is_summary);\n\n  // Returns time in nanos of start of the last pending write interaction.\n  // Returns -1 if no write operations are currently pending.\n  int64_t GetLastWriteTime() const;\n\n private:\n  class Impl;\n\n  std::error_code SaveEpilog();\n\n  std::error_code SaveAux(const GlobalData&);\n  std::error_code SaveAuxFieldStrInt(std::string_view key, int64_t val);\n\n  std::unique_ptr<Impl> impl_;\n  SaveMode save_mode_;\n  CompressionMode compression_mode_;\n  DflyVersion replica_dfly_version_ = DflyVersion::CURRENT_VER;\n  std::string snapshot_id_;\n};\n\nclass RdbSerializer;\nclass RdbSerializerBase {\n public:\n  enum class FlushState : uint8_t { kFlushMidEntry, kFlushEndEntry };\n\n  explicit RdbSerializerBase(CompressionMode compression_mode);\n  virtual ~RdbSerializerBase() = default;\n\n  // Dumps `obj` in DUMP command format into `out`. Uses default compression mode.\n  static std::string DumpValue(const PrimeValue& obj, bool ignore_crc = false);\n  static std::string DumpValue(RdbSerializer* serializer, const PrimeValue& obj,\n                               bool ignore_crc = false);\n\n  // Internal buffer size. Might shrink after flush due to compression.\n  size_t SerializedLen() const;\n\n  // Flush internal buffer and return serialized blob.\n  virtual std::string Flush(FlushState flush_state);\n\n  size_t GetBufferCapacity() const;\n  virtual size_t GetTempBufferSize() const;\n\n  std::error_code WriteRaw(const ::io::Bytes& buf);\n\n  // Write journal entry as an embedded journal blob.\n  std::error_code WriteJournalEntry(std::string_view entry);\n\n  // Send FULL_SYNC_CUT opcode to notify that all static data was sent.\n  std::error_code SendFullSyncCut();\n\n  std::error_code WriteOpcode(uint8_t opcode);\n\n  std::error_code SaveLen(size_t len);\n  std::error_code SaveString(std::string_view val);\n  std::error_code SaveString(const uint8_t* buf, size_t len) {\n    return SaveString(io::View(io::Bytes{buf, len}));\n  }\n\n  uint64_t GetSerializationPeakBytes() const {\n    return serialization_peak_bytes_;\n  }\n\n  void SetCompressionMode(CompressionMode mode) {\n    compression_mode_ = mode;\n  }\n\n protected:\n  // Prepare internal buffer for flush. Compress it.\n  io::Bytes PrepareFlush(FlushState flush_state);\n\n  // If membuf data is compressable use compression impl to compress the data and write it to membuf\n  void CompressBlob();\n  void AllocateCompressorOnce();\n\n  std::error_code SaveLzfBlob(const ::io::Bytes& src, size_t uncompressed_len);\n\n  CompressionMode compression_mode_;\n  io::IoBuf mem_buf_;\n  std::unique_ptr<detail::CompressorImpl> compressor_impl_;\n\n  static constexpr size_t kFilterChunkSize = 1ULL << 26;\n  static constexpr size_t kMinStrSizeToCompress = 256;\n  static constexpr size_t kMaxStrSizeToCompress = 1 * 1024 * 1024;\n  static constexpr double kMinCompressionReductionPrecentage = 0.95;\n  struct CompressionStats {\n    uint32_t compression_no_effective = 0;\n    uint32_t size_skip_count = 0;\n    uint32_t compression_failed = 0;\n    uint32_t compressed_blobs = 0;\n  };\n  std::optional<CompressionStats> compression_stats_;\n  base::PODArray<uint8_t> tmp_buf_;\n  std::unique_ptr<LZF_HSLOT[]> lzf_;\n  size_t number_of_chunks_ = 0;\n\n  uint64_t serialization_peak_bytes_ = 0;\n};\n\nclass RdbSerializer : public RdbSerializerBase {\n public:\n  // ConsumeFun is called when internal buffer exceeds flush_threshold.\n  // The callback receives the extracted data.\n  using ConsumeFun = std::function<void(std::string)>;\n\n  explicit RdbSerializer(CompressionMode compression_mode, ConsumeFun consume_fun = {},\n                         size_t flush_threshold = 0);\n\n  ~RdbSerializer();\n\n  std::string Flush(FlushState flush_state) override;\n  std::error_code SelectDb(uint32_t dbid);\n\n  // Must be called in the thread to which `it` belongs.\n  // Returns the serialized rdb_type or the error.\n  // expire_ms = 0 means no expiry.\n  // This function might preempt if flush_fun_ is used.\n  io::Result<uint8_t> SaveEntry(const PrimeKey& pk, const PrimeValue& pv, uint64_t expire_ms,\n                                uint32_t mc_flags, DbIndex dbid);\n\n  // This would work for either string or an object.\n  // The arg pv is taken from it->second if accessing\n  // this by finding the key. This function is used\n  // for the dump command - thus it is public function.\n  // This function might preempt if flush_fun_ is used.\n  std::error_code SaveValue(const PrimeValue& pv);\n\n  std::error_code SendJournalOffset(uint64_t journal_offset);\n\n  // Save HNSW index entry using provided tmp_buf for serialization to avoid repeated allocations.\n  std::error_code SaveHNSWEntry(const search::HnswNodeData& node, absl::Span<uint8_t> tmp_buf);\n\n  size_t GetTempBufferSize() const override;\n  std::error_code SendEofAndChecksum();\n\n private:\n  // Might preempt if flush_fun_ is used\n  std::error_code SaveObject(const PrimeValue& pv);\n  std::error_code SaveListObject(const PrimeValue& pv);\n  std::error_code SaveSetObject(const PrimeValue& pv);\n  std::error_code SaveHSetObject(const PrimeValue& pv);\n  std::error_code SaveZSetObject(const PrimeValue& pv);\n  std::error_code SaveStreamObject(const PrimeValue& obj);\n  std::error_code SaveJsonObject(const PrimeValue& pv);\n  std::error_code SaveSBFObject(const PrimeValue& pv);\n  std::error_code SaveCMSObject(const PrimeValue& pv);\n\n  std::error_code SaveLongLongAsString(int64_t value);\n  std::error_code SaveBinaryDouble(double val);\n  std::error_code SaveStreamPEL(rax* pel, bool nacks);\n  std::error_code SaveStreamConsumers(bool save_active, streamCG* cg);\n\n  // Might preempt\n  void PushToConsumerIfNeeded(FlushState flush_state);\n\n  std::string tmp_str_;\n  DbIndex last_entry_db_index_ = kInvalidDbId;\n  ConsumeFun consume_fun_;\n  size_t flush_threshold_ = 0;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/rdb_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include <gmock/gmock.h>\n\nextern \"C\" {\n#include \"redis/crc64.h\"\n#include \"redis/listpack.h\"\n#include \"redis/redis_aux.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#include <absl/flags/reflection.h>\n#include <mimalloc.h>\n\n#include \"base/flags.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"  // needed to find operator== for RespExpr.\n#include \"io/file.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/rdb_load.h\"\n#include \"server/rdb_save.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\nusing namespace facade;\nusing absl::SetFlag;\nusing absl::StrCat;\n\nABSL_DECLARE_FLAG(int32, list_compress_depth);\nABSL_DECLARE_FLAG(int32, list_max_listpack_size);\nABSL_DECLARE_FLAG(dfly::CompressionMode, compression_mode);\nABSL_DECLARE_FLAG(bool, rdb_ignore_expiry);\nABSL_DECLARE_FLAG(uint32_t, num_shards);\nABSL_DECLARE_FLAG(bool, rdb_sbf_chunked);\nABSL_DECLARE_FLAG(bool, serialize_hnsw_index);\nABSL_DECLARE_FLAG(bool, deserialize_hnsw_index);\n\nnamespace dfly {\n\nstatic const auto kMatchNil = ArgType(RespExpr::NIL);\n\nclass RdbTest : public BaseFamilyTest {\n protected:\n  void SetUp();\n\n  io::FileSource GetSource(string name);\n\n  std::error_code LoadRdb(const string& filename) {\n    return pp_->at(0)->Await([&] {\n      io::FileSource fs = GetSource(filename);\n\n      RdbLoadContext load_context;\n      RdbLoader loader(service_.get(), &load_context);\n      return loader.Load(&fs);\n    });\n  }\n};\n\nvoid RdbTest::SetUp() {\n  // Setting max_memory_limit must be before calling  InitWithDbFilename\n  max_memory_limit = 40000000;\n  absl::SetFlag(&FLAGS_serialize_hnsw_index, true);\n  absl::SetFlag(&FLAGS_deserialize_hnsw_index, true);\n  InitWithDbFilename();\n  CHECK_EQ(zmalloc_used_memory_tl, 0);\n}\n\ninline const uint8_t* to_byte(const void* s) {\n  return reinterpret_cast<const uint8_t*>(s);\n}\n\nio::FileSource RdbTest::GetSource(string name) {\n  string rdb_file = base::ProgramRunfile(\"testdata/\" + name);\n  auto open_res = io::OpenRead(rdb_file, io::ReadonlyFile::Options{});\n  CHECK(open_res) << rdb_file;\n\n  return io::FileSource(*open_res);\n}\n\nstatic string FloatToBytes(float f) {\n  return string(reinterpret_cast<const char*>(&f), sizeof(float));\n}\n\nTEST_F(RdbTest, SnapshotIdTest) {\n  absl::SetFlag(&FLAGS_num_shards, num_threads_);\n  ResetService();\n\n  EXPECT_EQ(Run({\"mset\", \"0\", \"1\", \"2\", \"3\", \"4\", \"5\", \"6\", \"7\", \"8\", \"9\", \"10\", \"11\"}), \"OK\");\n\n  Run({\"save\", \"df\", \"test_dump\"});\n\n  absl::SetFlag(&FLAGS_num_shards, num_threads_ - 1);\n  ResetService();\n\n  EXPECT_EQ(Run({\"mset\", \"test1\", \"val1\", \"test2\", \"val2\"}), \"OK\");\n\n  Run({\"save\", \"df\", \"test_dump\"});\n\n  ResetService();\n\n  EXPECT_EQ(Run({\"dfly\", \"load\", \"test_dump-summary.dfs\"}), \"OK\");\n\n  auto resp = Run({\"keys\", \"*\"});\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"test1\", \"test2\"));\n}\n\nTEST_F(RdbTest, Crc) {\n  std::string_view s{\"TEST\"};\n\n  uint64_t c = crc64(0, to_byte(s.data()), s.size());\n  ASSERT_NE(c, 0);\n\n  uint64_t c2 = crc64(c, to_byte(s.data()), s.size());\n  EXPECT_NE(c, c2);\n\n  uint64_t c3 = crc64(c, to_byte(&c), sizeof(c));\n  EXPECT_EQ(c3, 0);\n\n  s = \"COOLTEST\";\n  c = crc64(0, to_byte(s.data()), 8);\n  c2 = crc64(0, to_byte(s.data()), 4);\n  c3 = crc64(c2, to_byte(s.data() + 4), 4);\n  EXPECT_EQ(c, c3);\n\n  c2 = crc64(0, to_byte(s.data() + 4), 4);\n  c3 = crc64(c2, to_byte(s.data()), 4);\n  EXPECT_NE(c, c3);\n}\n\nTEST_F(RdbTest, LoadEmpty) {\n  auto ec = LoadRdb(\"empty.rdb\");\n  ASSERT_FALSE(ec) << ec;\n}\n\nTEST_F(RdbTest, LoadSmall6) {\n  // The rdb file contians keys that already expired, we want to continue loading them in this test.\n  absl::FlagSaver fs;\n  SetTestFlag(\"rdb_ignore_expiry\", \"true\");\n\n  auto ec = LoadRdb(\"redis6_small.rdb\");\n\n  ASSERT_FALSE(ec) << ec.message();\n\n  auto resp = Run({\"scan\", \"0\"});\n\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(StrArray(resp.GetVec()[1]),\n              UnorderedElementsAre(\"list1\", \"hset_zl\", \"list2\", \"zset_sl\", \"intset\", \"set1\",\n                                   \"zset_zl\", \"hset_ht\", \"intkey\", \"strkey\"));\n  EXPECT_THAT(Run({\"get\", \"intkey\"}), \"1234567\");\n  EXPECT_THAT(Run({\"get\", \"strkey\"}), \"abcdefghjjjjjjjjjj\");\n\n  resp = Run({\"smembers\", \"intset\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(),\n              UnorderedElementsAre(\"111\", \"222\", \"1234\", \"3333\", \"4444\", \"67899\", \"76554\"));\n\n  // TODO: when we implement PEXPIRETIME we will be able to do it directly.\n  int ttl = CheckedInt({\"ttl\", \"set1\"});    // should expire at 1747008000.\n  EXPECT_GT(ttl + time(NULL), 1747007000);  // left 1000 seconds margin in case the clock is off.\n\n  Run({\"select\", \"1\"});\n  ASSERT_EQ(10, CheckedInt({\"dbsize\"}));\n  ASSERT_EQ(128, CheckedInt({\"strlen\", \"longggggggggggggggkeyyyyyyyyyyyyy:9\"}));\n  resp = Run({\"script\", \"exists\", \"4ca238f611c9d0ae4e9a75a5dbac22aedc379801\",\n              \"282297a0228f48cd3fc6a55de6316f31422f5d17\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(IntArg(1), IntArg(1)));\n}\n\nTEST_F(RdbTest, Stream) {\n  auto ec = LoadRdb(\"redis6_stream.rdb\");\n\n  ASSERT_FALSE(ec) << ec.message();\n\n  auto resp = Run({\"type\", \"key:10\"});\n  EXPECT_EQ(resp, \"stream\");\n\n  resp = Run({\"xinfo\", \"groups\", \"key:0\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec()[0],\n              RespElementsAre(\"name\", \"g1\", \"consumers\", 0, \"pending\", 0, \"last-delivered-id\",\n                              \"1655444851524-3\", \"entries-read\", 128, \"lag\", 0));\n  EXPECT_THAT(resp.GetVec()[1],\n              RespElementsAre(\"name\", \"g2\", \"consumers\", 1, \"pending\", 0, \"last-delivered-id\",\n                              \"1655444851523-1\", \"entries-read\", kMatchNil, \"lag\", kMatchNil));\n\n  resp = Run({\"xinfo\", \"groups\", \"key:1\"});  // test dereferences array of size 1\n  EXPECT_THAT(resp, RespElementsAre(\"name\", \"g2\", \"consumers\", IntArg(0), \"pending\", IntArg(0),\n                                    \"last-delivered-id\", \"1655444851523-1\", \"entries-read\",\n                                    kMatchNil, \"lag\", kMatchNil));\n\n  resp = Run({\"xinfo\", \"groups\", \"key:2\"});\n  EXPECT_THAT(resp, ArrLen(0));\n\n  Run({\"save\"});\n}\n\nTEST_F(RdbTest, ComressionModeSaveDragonflyAndReload) {\n  Run({\"debug\", \"populate\", \"50000\"});\n  ASSERT_EQ(50000, CheckedInt({\"dbsize\"}));\n  // Check keys inserted are lower than 50,000.\n  auto resp = Run({\"keys\", \"key:[5-9][0-9][0-9][0-9][0-9]*\"});\n  EXPECT_EQ(resp.GetVec().size(), 0);\n\n  for (auto mode : {CompressionMode::NONE, CompressionMode::SINGLE_ENTRY,\n                    CompressionMode::MULTI_ENTRY_ZSTD, CompressionMode::MULTI_ENTRY_LZ4}) {\n    SetFlag(&FLAGS_compression_mode, mode);\n    RespExpr resp = Run({\"save\", \"df\"});\n    ASSERT_EQ(resp, \"OK\");\n\n    if (mode == CompressionMode::MULTI_ENTRY_ZSTD || mode == CompressionMode::MULTI_ENTRY_LZ4) {\n      EXPECT_GE(GetMetrics().coordinator_stats.compressed_blobs, 1);\n    }\n\n    auto save_info = service_->server_family().GetLastSaveInfo();\n    resp = Run({\"dfly\", \"load\", save_info.file_name});\n    ASSERT_EQ(resp, \"OK\");\n    ASSERT_EQ(50000, CheckedInt({\"dbsize\"}));\n  }\n}\n\nTEST_F(RdbTest, RdbLoaderOnReadCompressedDataShouldNotEnterEnsureReadFlow) {\n  SetFlag(&FLAGS_compression_mode, CompressionMode::MULTI_ENTRY_ZSTD);\n  for (int i = 0; i < 1000; ++i) {\n    Run({\"set\", StrCat(i), \"1\"});\n  }\n  RespExpr resp = Run({\"save\", \"df\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  auto save_info = service_->server_family().GetLastSaveInfo();\n  resp = Run({\"dfly\", \"load\", save_info.file_name});\n  ASSERT_EQ(resp, \"OK\");\n}\n\nTEST_F(RdbTest, SaveLoadSticky) {\n  Run({\"set\", \"a\", \"1\"});\n  Run({\"set\", \"b\", \"2\"});\n  Run({\"set\", \"c\", \"3\"});\n  Run({\"stick\", \"a\", \"b\"});\n  RespExpr resp = Run({\"save\", \"df\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  resp = Run({\"debug\", \"reload\"});\n  ASSERT_EQ(resp, \"OK\");\n  EXPECT_THAT(Run({\"get\", \"a\"}), \"1\");\n  EXPECT_THAT(Run({\"get\", \"b\"}), \"2\");\n  EXPECT_THAT(Run({\"get\", \"c\"}), \"3\");\n  EXPECT_THAT(Run({\"stick\", \"a\", \"b\"}), IntArg(0));\n  EXPECT_THAT(Run({\"stick\", \"c\"}), IntArg(1));\n}\n\nTEST_F(RdbTest, ReloadSetSmallStringBug) {\n  auto str = absl::StrCat(std::string(32, 'X'));\n  Run({\"set\", \"small_key\", str});\n  auto resp = Run({\"debug\", \"reload\"});\n  ASSERT_EQ(resp, \"OK\");\n}\n\nTEST_F(RdbTest, Reload) {\n  absl::FlagSaver fs;\n\n  SetFlag(&FLAGS_list_compress_depth, 1);\n  SetFlag(&FLAGS_list_max_listpack_size, 1);  // limit listpack to a single element.\n\n  Run({\"set\", \"string_key\", \"val\"});\n  Run({\"set\", \"large_key\", string(511, 'L')});\n  Run({\"set\", \"huge_key\", string((1 << 17) - 10, 'H')});\n\n  Run({\"sadd\", \"set_key1\", \"val1\", \"val2\"});\n  Run({\"sadd\", \"intset_key\", \"1\", \"2\", \"3\"});\n  Run({\"hset\", \"small_hset\", \"field1\", \"val1\", \"field2\", \"val2\"});\n  Run({\"hset\", \"large_hset\", \"field1\", string(510, 'V'), string(120, 'F'), \"val2\"});\n\n  Run({\"rpush\", \"list_key1\", \"val\", \"val2\"});\n  Run({\"rpush\", \"list_key2\", \"head\", string(511, 'a'), string(500, 'b'), \"tail\"});\n\n  Run({\"zadd\", \"zs1\", \"1.1\", \"a\", \"-1.1\", \"b\"});\n  Run({\"zadd\", \"zs2\", \"1.1\", string(510, 'a'), \"-1.1\", string(502, 'b')});\n\n  Run({\"hset\", \"large_keyname\", string(240, 'X'), \"-5\"});\n  Run({\"hset\", \"large_keyname\", string(240, 'Y'), \"-500\"});\n  Run({\"hset\", \"large_keyname\", string(240, 'Z'), \"-50000\"});\n\n  auto resp = Run({\"debug\", \"reload\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  EXPECT_EQ(2, CheckedInt({\"scard\", \"set_key1\"}));\n  EXPECT_EQ(3, CheckedInt({\"scard\", \"intset_key\"}));\n  EXPECT_EQ(2, CheckedInt({\"hlen\", \"small_hset\"}));\n  EXPECT_EQ(2, CheckedInt({\"hlen\", \"large_hset\"}));\n  EXPECT_EQ(4, CheckedInt({\"LLEN\", \"list_key2\"}));\n  EXPECT_EQ(2, CheckedInt({\"ZCARD\", \"zs1\"}));\n  EXPECT_EQ(2, CheckedInt({\"ZCARD\", \"zs2\"}));\n\n  EXPECT_EQ(-5, CheckedInt({\"hget\", \"large_keyname\", string(240, 'X')}));\n  EXPECT_EQ(-500, CheckedInt({\"hget\", \"large_keyname\", string(240, 'Y')}));\n  EXPECT_EQ(-50000, CheckedInt({\"hget\", \"large_keyname\", string(240, 'Z')}));\n}\n\nTEST_F(RdbTest, ReloadTtl) {\n  Run({\"set\", \"key\", \"val\"});\n  Run({\"expire\", \"key\", \"1000\"});\n  Run({\"debug\", \"reload\"});\n  EXPECT_LT(990, CheckedInt({\"ttl\", \"key\"}));\n}\n\nTEST_F(RdbTest, ReloadExpired) {\n  Run({\"set\", \"key\", \"val\"});\n  Run({\"expire\", \"key\", \"2\"});\n  RespExpr resp = Run({\"save\", \"df\"});\n  ASSERT_EQ(resp, \"OK\");\n  auto save_info = service_->server_family().GetLastSaveInfo();\n  AdvanceTime(2000);\n  resp = Run({\"dfly\", \"load\", save_info.file_name});\n  ASSERT_EQ(resp, \"OK\");\n  resp = Run({\"get\", \"key\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(RdbTest, HashmapExpiry) {\n  // Add non-expiring elements\n  Run({\"hset\", \"key\", \"key1\", \"val1\", \"key2\", \"val2\"});\n  Run({\"debug\", \"reload\"});\n  EXPECT_THAT(Run({\"hgetall\", \"key\"}),\n              RespArray(UnorderedElementsAre(\"key1\", \"val1\", \"key2\", \"val2\")));\n\n  // Add expiring elements\n  Run({\"hsetex\", \"key\", \"5\", \"key3\", \"val3\", \"key4\", \"val4\"});\n  Run({\"debug\", \"reload\"});  // Reload before expiration\n  EXPECT_THAT(Run({\"hgetall\", \"key\"}),\n              RespArray(UnorderedElementsAre(\"key1\", \"val1\", \"key2\", \"val2\", \"key3\", \"val3\", \"key4\",\n                                             \"val4\")));\n  AdvanceTime(10'000);\n  EXPECT_THAT(Run({\"hgetall\", \"key\"}),\n              RespArray(UnorderedElementsAre(\"key1\", \"val1\", \"key2\", \"val2\")));\n\n  Run({\"hsetex\", \"key\", \"5\", \"key5\", \"val5\", \"key6\", \"val6\"});\n  EXPECT_THAT(Run({\"hgetall\", \"key\"}),\n              RespArray(UnorderedElementsAre(\"key1\", \"val1\", \"key2\", \"val2\", \"key5\", \"val5\", \"key6\",\n                                             \"val6\")));\n  AdvanceTime(10'000);\n  Run({\"debug\", \"reload\"});  // Reload after expiration\n  EXPECT_THAT(Run({\"hgetall\", \"key\"}),\n              RespArray(UnorderedElementsAre(\"key1\", \"val1\", \"key2\", \"val2\")));\n}\n\nTEST_F(RdbTest, SaveLoadExpiredValuesHmap) {\n  // Add expiring elements\n  Run({\"hsetex\", \"hkey\", \"1\", \"key3\", \"val3\", \"key4\", \"val4\"});\n\n  RespExpr resp = Run({\"TYPE\", \"hkey\"});\n  ASSERT_EQ(resp, \"hash\");\n\n  AdvanceTime(10'000);\n  resp = Run({\"save\", \"RDB\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  resp = Run({\"TYPE\", \"hkey\"});\n  ASSERT_EQ(resp, \"hash\");\n\n  Run({\"debug\", \"reload\"});\n\n  resp = Run({\"TYPE\", \"hkey\"});\n  ASSERT_EQ(resp, \"none\");\n}\n\nTEST_F(RdbTest, SaveLoadExpiredValuesHugeHmap) {\n  constexpr auto keys_num = 10000;\n  for (int i = 0; i < keys_num; ++i) {\n    Run({\"hsetex\", \"hkey\", \"1\", absl::StrCat(\"key\", i), \"val\"});\n  }\n\n  ASSERT_EQ(keys_num, CheckedInt({\"hlen\", \"hkey\"}));\n\n  AdvanceTime(10'000);\n\n  Run({\"debug\", \"reload\"});\n\n  ASSERT_EQ(Run({\"TYPE\", \"hkey\"}), \"none\");\n\n  // with one value that isn't expired\n  for (int i = 0; i < keys_num; ++i) {\n    Run({\"hsetex\", \"hkey\", \"1\", absl::StrCat(\"key\", i), \"val\"});\n  }\n\n  Run({\"hset\", \"hkey\", base::RandStr(20), \"val\"});\n\n  ASSERT_EQ(keys_num + 1, CheckedInt({\"hlen\", \"hkey\"}));\n\n  AdvanceTime(10'000);\n\n  Run({\"debug\", \"reload\"});\n\n  ASSERT_EQ(1, CheckedInt({\"hlen\", \"hkey\"}));\n}\n\nTEST_F(RdbTest, SaveLoadExpiredValuesSSet) {\n  // Add expiring elements\n  Run({\"saddex\", \"skey\", \"1\", \"key3\", \"key4\"});\n\n  RespExpr resp = Run({\"TYPE\", \"skey\"});\n  ASSERT_EQ(resp, \"set\");\n\n  AdvanceTime(10'000);\n  resp = Run({\"save\", \"RDB\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  resp = Run({\"TYPE\", \"skey\"});\n  ASSERT_EQ(resp, \"set\");\n\n  Run({\"debug\", \"reload\"});\n\n  resp = Run({\"TYPE\", \"skey\"});\n  ASSERT_EQ(resp, \"none\");\n}\n\nTEST_F(RdbTest, SaveLoadExpiredValuesHugeSet) {\n  constexpr auto keys_num = 10000;\n  for (int i = 0; i < keys_num; ++i) {\n    Run({\"saddex\", \"skey\", \"1\", absl::StrCat(\"key\", i)});\n  }\n\n  ASSERT_EQ(keys_num, CheckedInt({\"scard\", \"skey\"}));\n\n  AdvanceTime(10'000);\n\n  Run({\"debug\", \"reload\"});\n\n  ASSERT_EQ(Run({\"TYPE\", \"skey\"}), \"none\");\n\n  // with one value that isn't expired\n  for (int i = 0; i < keys_num; ++i) {\n    Run({\"saddex\", \"skey\", \"1\", absl::StrCat(\"key\", i)});\n  }\n  Run({\"sadd\", \"skey\", base::RandStr(20)});\n\n  ASSERT_EQ(keys_num + 1, CheckedInt({\"scard\", \"skey\"}));\n\n  AdvanceTime(10'000);\n\n  Run({\"debug\", \"reload\"});\n\n  ASSERT_EQ(1, CheckedInt({\"scard\", \"skey\"}));\n}\n\nTEST_F(RdbTest, SetExpiry) {\n  // Add non-expiring elements\n  Run({\"sadd\", \"key\", \"key1\", \"key2\"});\n  Run({\"debug\", \"reload\"});\n  EXPECT_THAT(Run({\"smembers\", \"key\"}), RespArray(UnorderedElementsAre(\"key1\", \"key2\")));\n\n  // Add expiring elements\n  Run({\"saddex\", \"key\", \"5\", \"key3\", \"key4\"});\n  Run({\"debug\", \"reload\"});  // Reload before expiration\n  EXPECT_THAT(Run({\"smembers\", \"key\"}),\n              RespArray(UnorderedElementsAre(\"key1\", \"key2\", \"key3\", \"key4\")));\n  AdvanceTime(10'000);\n  EXPECT_THAT(Run({\"smembers\", \"key\"}), RespArray(UnorderedElementsAre(\"key1\", \"key2\")));\n\n  Run({\"saddex\", \"key\", \"5\", \"key5\", \"key6\"});\n  EXPECT_THAT(Run({\"smembers\", \"key\"}),\n              RespArray(UnorderedElementsAre(\"key1\", \"key2\", \"key5\", \"key6\")));\n  AdvanceTime(10'000);\n  Run({\"debug\", \"reload\"});  // Reload after expiration\n  EXPECT_THAT(Run({\"smembers\", \"key\"}), RespArray(UnorderedElementsAre(\"key1\", \"key2\")));\n}\n\n// Tests that integer elements in sets with expiry are not corrupted during RDB load.\n// This test covers the bug where ToSV() internal buffer was being reused,\n// causing string corruption when loading integer elements.\nTEST_F(RdbTest, SetExpiryInteger) {\n  // Add integer elements with expiry - integers trigger ToSV() buffer reuse\n  Run({\"saddex\", \"s1\", \"10\", \"1\", \"2\", \"3\", \"12345\", \"67890\"});\n\n  // Verify elements are added correctly\n  EXPECT_EQ(5, CheckedInt({\"scard\", \"s1\"}));\n  EXPECT_THAT(Run({\"smembers\", \"s1\"}),\n              RespArray(UnorderedElementsAre(\"1\", \"2\", \"3\", \"12345\", \"67890\")));\n\n  // Reload from RDB - this would trigger the corruption bug\n  Run({\"debug\", \"reload\"});\n\n  // Verify integers were loaded correctly without corruption\n  EXPECT_EQ(5, CheckedInt({\"scard\", \"s1\"}));\n  EXPECT_THAT(Run({\"smembers\", \"s1\"}),\n              RespArray(UnorderedElementsAre(\"1\", \"2\", \"3\", \"12345\", \"67890\")));\n\n  // Verify all elements are actually in the set (no duplicates from corruption)\n  EXPECT_THAT(Run({\"sismember\", \"s1\", \"1\"}), IntArg(1));\n  EXPECT_THAT(Run({\"sismember\", \"s1\", \"2\"}), IntArg(1));\n  EXPECT_THAT(Run({\"sismember\", \"s1\", \"3\"}), IntArg(1));\n  EXPECT_THAT(Run({\"sismember\", \"s1\", \"12345\"}), IntArg(1));\n  EXPECT_THAT(Run({\"sismember\", \"s1\", \"67890\"}), IntArg(1));\n}\n\nTEST_F(RdbTest, SaveFlush) {\n  Run({\"debug\", \"populate\", \"500000\"});\n\n  auto save_fb = pp_->at(1)->LaunchFiber([&] {\n    RespExpr resp = Run({\"save\"});\n    ASSERT_EQ(resp, \"OK\");\n  });\n\n  do {\n    usleep(10);\n  } while (!service_->server_family().TEST_IsSaving());\n\n  Run({\"flushdb\"});\n  save_fb.Join();\n  auto save_info = service_->server_family().GetLastSaveInfo();\n  ASSERT_EQ(1, save_info.freq_map.size());\n  auto& k_v = save_info.freq_map.front();\n  EXPECT_EQ(\"string\", k_v.first);\n  EXPECT_EQ(500000, k_v.second);\n}\n\nTEST_F(RdbTest, SaveManyDbs) {\n  Run({\"debug\", \"populate\", \"50000\"});\n  pp_->at(1)->Await([&] {\n    Run({\"select\", \"1\"});\n    Run({\"debug\", \"populate\", \"10000\"});\n  });\n\n  auto metrics = GetMetrics();\n  ASSERT_EQ(2, metrics.db_stats.size());\n  EXPECT_EQ(50000, metrics.db_stats[0].key_count);\n  EXPECT_EQ(10000, metrics.db_stats[1].key_count);\n\n  auto save_fb = pp_->at(0)->LaunchFiber([&] {\n    RespExpr resp = Run({\"save\"});\n    ASSERT_EQ(resp, \"OK\");\n  });\n\n  do {\n    usleep(10);\n  } while (!service_->server_family().TEST_IsSaving());\n\n  pp_->at(1)->Await([&] {\n    Run({\"select\", \"1\"});\n    for (unsigned i = 0; i < 1000; ++i) {\n      Run({\"set\", StrCat(\"abc\", i), \"bar\"});\n    }\n  });\n\n  save_fb.Join();\n\n  auto save_info = service_->server_family().GetLastSaveInfo();\n  ASSERT_EQ(1, save_info.freq_map.size());\n  auto& k_v = save_info.freq_map.front();\n\n  EXPECT_EQ(\"string\", k_v.first);\n  EXPECT_EQ(60000, k_v.second);\n  auto resp = Run({\"debug\", \"reload\", \"NOSAVE\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  metrics = GetMetrics();\n  ASSERT_EQ(2, metrics.db_stats.size());\n  EXPECT_EQ(50000, metrics.db_stats[0].key_count);\n  EXPECT_EQ(10000, metrics.db_stats[1].key_count);\n  if (metrics.db_stats[1].key_count != 10000) {\n    Run({\"select\", \"1\"});\n    resp = Run({\"scan\", \"0\", \"match\", \"ab*\"});\n    StringVec vec = StrArray(resp.GetVec()[1]);\n    for (const auto& s : vec) {\n      LOG(ERROR) << \"Bad key: \" << s;\n    }\n  }\n}\n\nTEST_F(RdbTest, HMapBugs) {\n  // Force kEncodingStrMap2 encoding.\n  server.max_map_field_len = 0;\n  Run({\"hset\", \"hmap1\", \"key1\", \"val\", \"key2\", \"val2\"});\n  Run({\"hset\", \"hmap2\", \"key1\", string(690557, 'a')});\n\n  server.max_map_field_len = 32;\n  Run({\"debug\", \"reload\"});\n  EXPECT_EQ(2, CheckedInt({\"hlen\", \"hmap1\"}));\n}\n\nTEST_F(RdbTest, Issue1305) {\n  /***************\n   * The code below crashes because of the weird listpack API that assumes that lpInsert\n   * pointers are null then it should do deletion :(. See lpInsert comments for more info.\n\n     uint8_t* lp = lpNew(128);\n     lpAppend(lp, NULL, 0);\n     lpFree(lp);\n\n  */\n\n  // Force kEncodingStrMap2 encoding.\n  server.max_map_field_len = 0;\n  Run({\"hset\", \"hmap\", \"key1\", \"val\", \"key2\", \"\"});\n\n  server.max_map_field_len = 32;\n  Run({\"debug\", \"reload\"});\n  EXPECT_EQ(2, CheckedInt({\"hlen\", \"hmap\"}));\n}\n\nTEST_F(RdbTest, JsonTest) {\n  string_view data[] = {\n      R\"({\"a\":1})\"sv,                          //\n      R\"([1,2,3,4,5,6])\"sv,                    //\n      R\"({\"a\":1.0,\"b\":[1,2],\"c\":\"value\"})\"sv,  //\n      R\"({\"a\":{\"a\":{\"a\":{\"a\":1}}}})\"sv         //\n  };\n\n  for (auto test : data) {\n    Run({\"json.set\", \"doc\", \"$\", test});\n    auto dump = Run({\"dump\", \"doc\"});\n    Run({\"del\", \"doc\"});\n    Run({\"restore\", \"doc\", \"0\", facade::ToSV(dump.GetBuf())});\n    auto res = Run({\"json.get\", \"doc\"});\n    ASSERT_EQ(res, test);\n  }\n}\n\n// hll.rdb has 2 keys: \"key-dense\" and \"key-sparse\", both are HLL with a single added value \"1\".\nclass HllRdbTest : public RdbTest, public testing::WithParamInterface<string> {};\n\nTEST_P(HllRdbTest, Hll) {\n  LOG(INFO) << \" max memory: \" << max_memory_limit\n            << \" used_mem_current: \" << used_mem_current.load();\n  auto ec = LoadRdb(\"hll.rdb\");\n\n  ASSERT_FALSE(ec) << ec.message();\n\n  EXPECT_EQ(CheckedInt({\"pfcount\", GetParam()}), 1);\n\n  EXPECT_EQ(CheckedInt({\"pfcount\", GetParam(), \"non-existing\"}), 1);\n\n  EXPECT_EQ(CheckedInt({\"pfadd\", \"key2\", \"2\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfcount\", GetParam(), \"key2\"}), 2);\n\n  EXPECT_EQ(CheckedInt({\"pfadd\", GetParam(), \"2\"}), 1);\n  EXPECT_EQ(CheckedInt({\"pfcount\", GetParam()}), 2);\n\n  EXPECT_EQ(Run({\"pfmerge\", \"key3\", GetParam(), \"key2\"}), \"OK\");\n  EXPECT_EQ(CheckedInt({\"pfcount\", \"key3\"}), 2);\n}\n\nINSTANTIATE_TEST_SUITE_P(HllRdbTest, HllRdbTest, Values(\"key-sparse\", \"key-dense\"));\n\nTEST_F(RdbTest, LoadSmall7) {\n  // Contains 3 keys\n  // 1. A list called my-list encoded as RDB_TYPE_LIST_QUICKLIST_2\n  // 2. A hashtable called my-hset encoded as RDB_TYPE_HASH_LISTPACK\n  // 3. A set called my-set encoded as RDB_TYPE_SET_LISTPACK\n  // 4. A zset called my-zset encoded as RDB_TYPE_ZSET_LISTPACK\n  auto ec = LoadRdb(\"redis7_small.rdb\");\n\n  ASSERT_FALSE(ec) << ec.message();\n\n  auto resp = Run({\"scan\", \"0\"});\n\n  ASSERT_THAT(resp, ArrLen(2));\n\n  EXPECT_THAT(StrArray(resp.GetVec()[1]),\n              UnorderedElementsAre(\"my-set\", \"my-hset\", \"my-list\", \"zset\"));\n\n  resp = Run({\"smembers\", \"my-set\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"redis\", \"acme\"));\n\n  resp = Run({\"hgetall\", \"my-hset\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"acme\", \"44\", \"field\", \"22\"));\n\n  resp = Run({\"lrange\", \"my-list\", \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"list1\", \"list2\"));\n\n  resp = Run({\"zrange\", \"zset\", \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"einstein\", \"schrodinger\"));\n}\n\nTEST_F(RdbTest, RedisJson) {\n  // RDB file generated via:\n  // ./redis-server --save \"\" --appendonly no --loadmodule ../lib/rejson.so\n  // and then:\n  // JSON.SET json-str $ '\"hello\"'\n  // JSON.SET json-arr $ \"[1, true, \\\"hello\\\", 3.14]\"\n  // JSON.SET json-obj $\n  // '{\"company\":\"DragonflyDB\",\"product\":\"Dragonfly\",\"website\":\"https://dragondlydb.io\",\"years-active\":[2021,2022,2023,2024,\"and\n  // more!\"]}'\n  auto ec = LoadRdb(\"redis_json.rdb\");\n\n  ASSERT_FALSE(ec) << ec.message();\n\n  EXPECT_EQ(Run({\"JSON.GET\", \"json-str\"}), \"\\\"hello\\\"\");\n  EXPECT_EQ(Run({\"JSON.GET\", \"json-arr\"}), \"[1,true,\\\"hello\\\",3.14]\");\n  EXPECT_EQ(Run({\"JSON.GET\", \"json-obj\"}),\n            \"{\\\"company\\\":\\\"DragonflyDB\\\",\\\"product\\\":\\\"Dragonfly\\\",\\\"website\\\":\\\"https://\"\n            \"dragondlydb.io\\\",\\\"years-active\\\":[2021,2022,2023,2024,\\\"and more!\\\"]}\");\n}\n\nTEST_F(RdbTest, SBF) {\n  EXPECT_THAT(Run({\"BF.ADD\", \"k\", \"1\"}), IntArg(1));\n  Run({\"debug\", \"reload\"});\n  EXPECT_EQ(Run({\"type\", \"k\"}), \"MBbloom--\");\n  EXPECT_THAT(Run({\"BF.EXISTS\", \"k\", \"1\"}), IntArg(1));\n}\n\nTEST_F(RdbTest, SBFLargeFilterChunking) {\n  absl::SetFlag(&FLAGS_rdb_sbf_chunked, true);\n  max_memory_limit = 200000000;\n\n  // Using this set of parameters for the BF.RESERVE command resulted in a\n  // filter size large enough to require chunking (> 64 MB).\n  const double error_rate = 0.001;\n  const size_t capacity = 50'000'000;\n  const size_t num_items = 100;\n\n  size_t collisions = 0;\n\n  Run({\"BF.RESERVE\", \"large_key\", std::to_string(error_rate), std::to_string(capacity)});\n  for (size_t i = 0; i < num_items; i++) {\n    auto res = Run({\"BF.ADD\", \"large_key\", absl::StrCat(\"item\", i)});\n    if (*res.GetInt() == 0)\n      collisions++;\n  }\n  EXPECT_LT(static_cast<double>(collisions) / num_items, error_rate);\n\n  Run({\"debug\", \"reload\"});\n  EXPECT_EQ(Run({\"type\", \"large_key\"}), \"MBbloom--\");\n\n  for (size_t i = 0; i < num_items; i++) {\n    EXPECT_THAT(Run({\"BF.EXISTS\", \"large_key\", absl::StrCat(\"item\", i)}), IntArg(1));\n  }\n}\n\nTEST_F(RdbTest, RestoreSearchIndexNameStartingWithColon) {\n  // Create an index with a name that starts with ':' and add a sample document\n  EXPECT_EQ(Run({\"FT.CREATE\", \":Order:index\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \":Order:\", \"SCHEMA\",\n                 \"customer_name\", \"AS\", \"customer_name\", \"TEXT\", \"status\", \"AS\", \"status\", \"TAG\"}),\n            \"OK\");\n\n  EXPECT_THAT(Run({\"HSET\", \":Order:1\", \"customer_name\", \"John\", \"status\", \"new\"}), IntArg(2));\n\n  // Save and reload to ensure the index definition is persisted and restored\n  EXPECT_EQ(Run({\"save\", \"df\"}), \"OK\");\n  EXPECT_EQ(Run({\"debug\", \"reload\"}), \"OK\");\n\n  // Verify a basic search works on the restored index\n  auto search = Run({\"FT.SEARCH\", \":Order:index\", \"John\"});\n  ASSERT_THAT(search, ArgType(RespExpr::ARRAY));\n  const auto& v = search.GetVec();\n  ASSERT_FALSE(v.empty());\n  EXPECT_THAT(v.front(), IntArg(1));\n}\n\n// Parametrized test for RestoreVectorSearchIndexHnsw with varying document counts\nclass HnswRestoreTest : public RdbTest, public testing::WithParamInterface<int> {};\n\nTEST_P(HnswRestoreTest, RestoreVectorSearchIndexHnsw) {\n  int num_docs = GetParam();\n\n  EXPECT_EQ(\n      Run({\"FT.CREATE\", \"only_vec_idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"embedding\",\n           \"VECTOR\", \"HNSW\", \"6\", \"TYPE\", \"FLOAT32\", \"DIM\", \"2\", \"DISTANCE_METRIC\", \"L2\"}),\n      \"OK\");\n\n  EXPECT_EQ(Run({\"FT.CREATE\", \"vec_idx\", \"ON\",   \"HASH\",      \"PREFIX\",          \"1\",    \"doc:\",\n                 \"SCHEMA\",    \"name\",    \"TEXT\", \"embedding\", \"VECTOR\",          \"HNSW\", \"6\",\n                 \"TYPE\",      \"FLOAT32\", \"DIM\",  \"2\",         \"DISTANCE_METRIC\", \"L2\"}),\n            \"OK\");\n\n  // Insert documents with incrementing vectors\n  for (int i = 1; i <= num_docs; ++i) {\n    float x = static_cast<float>(i * 2 - 1);\n    float y = static_cast<float>(i * 2);\n    Run({\"HSET\", StrCat(\"doc:\", i), \"name\", StrCat(\"doc\", i), \"embedding\",\n         StrCat(FloatToBytes(x), FloatToBytes(y))});\n  }\n\n  LOG(INFO) << \"Created \" << num_docs << \" documents with vector embeddings\";\n\n  EXPECT_EQ(Run({\"save\", \"df\"}), \"OK\");\n  auto save_info = service_->server_family().GetLastSaveInfo();\n\n  // Reload from the saved file - this should restore the HNSW index, not rebuild it\n  // Look for \"Restored HNSW index\" in logs to verify restoration vs rebuild\n  LOG(INFO) << \"Reloading from \" << save_info.file_name << \" - expecting HNSW index restoration\";\n  EXPECT_EQ(Run({\"dfly\", \"load\", save_info.file_name}), \"OK\");\n\n  // Wait for async index building to complete on both indices\n  auto is_indexing_done = [this](string_view idx_name) {\n    auto resp = Run({\"FT.INFO\", idx_name});\n    auto arr = resp.GetVec();\n    auto it = std::find_if(arr.begin(), arr.end(), [](const auto& e) { return e == \"indexing\"; });\n    return it != arr.end() && (++it)->GetInt() == 0;\n  };\n\n  ASSERT_TRUE(WaitUntilCondition([&] { return is_indexing_done(\"vec_idx\"); },\n                                 std::chrono::milliseconds(10000)));\n  ASSERT_TRUE(WaitUntilCondition([&] { return is_indexing_done(\"only_vec_idx\"); },\n                                 std::chrono::milliseconds(10000)));\n\n  // Verify text search still works on the restored index\n  auto search = Run({\"FT.SEARCH\", \"vec_idx\", \"doc1\"});\n  ASSERT_THAT(search, ArgType(RespExpr::ARRAY));\n  const auto& v = search.GetVec();\n  ASSERT_FALSE(v.empty());\n  EXPECT_THAT(v.front(), IntArg(1));\n\n  // Verify KNN vector search works on the restored index\n  // Query vector close to (1.0, 2.0) should find doc:1 as nearest\n  string query_vec = StrCat(FloatToBytes(1.1f), FloatToBytes(2.1f));\n  auto knn_search = Run({\"FT.SEARCH\", \"vec_idx\", \"*=>[KNN 2 @embedding $vec]\", \"PARAMS\", \"2\", \"vec\",\n                         query_vec, \"RETURN\", \"1\", \"name\"});\n  ASSERT_THAT(knn_search, ArgType(RespExpr::ARRAY));\n  EXPECT_GE(knn_search.GetVec().front().GetInt(), 1);\n\n  // The same check for another index with only vector field\n  knn_search = Run({\"FT.SEARCH\", \"only_vec_idx\", \"*=>[KNN 2 @embedding $vec]\", \"PARAMS\", \"2\", \"vec\",\n                    query_vec, \"RETURN\", \"1\", \"name\"});\n  ASSERT_THAT(knn_search, ArgType(RespExpr::ARRAY));\n  EXPECT_GE(knn_search.GetVec().front().GetInt(), 1);\n\n  // Verify total document count matches\n  EXPECT_EQ(CheckedInt({\"dbsize\"}), num_docs);\n\n  LOG(INFO) << \"Successfully verified HNSW index restoration with \" << num_docs << \" documents\";\n}\n\nINSTANTIATE_TEST_SUITE_P(HnswRestoreTest, HnswRestoreTest, Values(5, 50, 500, 1000),\n                         [](const testing::TestParamInfo<int>& info) {\n                           return StrCat(\"Docs\", info.param);\n                         });\n\nTEST_F(RdbTest, DflyLoadAppend) {\n  // Create an RDB with (k1,1) value in it saved as `filename`\n  EXPECT_EQ(Run({\"set\", \"k1\", \"1\"}), \"OK\");\n  EXPECT_EQ(Run({\"save\", \"df\"}), \"OK\");\n  string filename = service_->server_family().GetLastSaveInfo().file_name;\n\n  // Without APPEND option - db should be flushed\n  EXPECT_EQ(Run({\"set\", \"k1\", \"TO-BE-FLUSHED\"}), \"OK\");\n  EXPECT_EQ(Run({\"set\", \"k2\", \"TO-BE-FLUSHED\"}), \"OK\");\n  EXPECT_EQ(Run({\"dfly\", \"load\", filename}), \"OK\");\n  EXPECT_THAT(Run({\"dbsize\"}), IntArg(1));\n  EXPECT_EQ(Run({\"get\", \"k1\"}), \"1\");\n\n  // With APPEND option - db shouldn't be flushed, but k1 should be overridden\n  EXPECT_EQ(Run({\"set\", \"k1\", \"TO-BE-OVERRIDDEN\"}), \"OK\");\n  EXPECT_EQ(Run({\"set\", \"k2\", \"2\"}), \"OK\");\n  EXPECT_EQ(Run({\"dfly\", \"load\", filename, \"append\"}), \"OK\");\n  EXPECT_THAT(Run({\"dbsize\"}), IntArg(2));\n  EXPECT_EQ(Run({\"get\", \"k1\"}), \"1\");\n  EXPECT_EQ(Run({\"get\", \"k2\"}), \"2\");\n}\n\n// Tests loading a huge set, where the set is loaded in multiple partial reads.\nTEST_F(RdbTest, LoadHugeSet) {\n  // Add 2 sets with 100k elements each (note must have more than kMaxBlobLen\n  // elements to test partial reads).\n  Run({\"debug\", \"populate\", \"2\", \"test\", \"100\", \"rand\", \"type\", \"set\", \"elements\", \"100000\"});\n  ASSERT_EQ(100000, CheckedInt({\"scard\", \"test:0\"}));\n  ASSERT_EQ(100000, CheckedInt({\"scard\", \"test:1\"}));\n\n  RespExpr resp = Run({\"save\", \"df\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  auto save_info = service_->server_family().GetLastSaveInfo();\n  resp = Run({\"dfly\", \"load\", save_info.file_name});\n  ASSERT_EQ(resp, \"OK\");\n\n  ASSERT_EQ(100000, CheckedInt({\"scard\", \"test:0\"}));\n  ASSERT_EQ(100000, CheckedInt({\"scard\", \"test:1\"}));\n  auto metrics = GetMetrics();\n  EXPECT_GT(metrics.db_stats[0].obj_memory_usage, 24'000'000u);\n}\n\n// Tests loading a huge hmap, where the map is loaded in multiple partial\n// reads.\nTEST_F(RdbTest, LoadHugeHMap) {\n  // Add 2 sets with 100k elements each (note must have more than kMaxBlobLen\n  // elements to test partial reads).\n  Run({\"debug\", \"populate\", \"2\", \"test\", \"100\", \"rand\", \"type\", \"hash\", \"elements\", \"100000\"});\n  ASSERT_EQ(100000, CheckedInt({\"hlen\", \"test:0\"}));\n  ASSERT_EQ(100000, CheckedInt({\"hlen\", \"test:1\"}));\n\n  RespExpr resp = Run({\"save\", \"df\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  auto save_info = service_->server_family().GetLastSaveInfo();\n  resp = Run({\"dfly\", \"load\", save_info.file_name});\n  ASSERT_EQ(resp, \"OK\");\n\n  ASSERT_EQ(100000, CheckedInt({\"hlen\", \"test:0\"}));\n  ASSERT_EQ(100000, CheckedInt({\"hlen\", \"test:1\"}));\n  auto metrics = GetMetrics();\n  EXPECT_GT(metrics.db_stats[0].obj_memory_usage, 29'000'000u);\n}\n\n// Tests loading a huge zset, where the zset is loaded in multiple partial\n// reads.\nTEST_F(RdbTest, LoadHugeZSet) {\n  // Add 2 sets with 100k elements each (note must have more than kMaxBlobLen\n  // elements to test partial reads).\n  Run({\"debug\", \"populate\", \"2\", \"test\", \"100\", \"rand\", \"type\", \"zset\", \"elements\", \"100000\"});\n  ASSERT_EQ(100000, CheckedInt({\"zcard\", \"test:0\"}));\n  ASSERT_EQ(100000, CheckedInt({\"zcard\", \"test:1\"}));\n\n  RespExpr resp = Run({\"save\", \"df\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  auto save_info = service_->server_family().GetLastSaveInfo();\n  resp = Run({\"dfly\", \"load\", save_info.file_name});\n  ASSERT_EQ(resp, \"OK\");\n\n  ASSERT_EQ(100000, CheckedInt({\"zcard\", \"test:0\"}));\n  ASSERT_EQ(100000, CheckedInt({\"zcard\", \"test:1\"}));\n  auto metrics = GetMetrics();\n  EXPECT_GT(metrics.db_stats[0].obj_memory_usage, 26'000'000u);\n}\n\n// Tests loading a huge list, where the list is loaded in multiple partial\n// reads.\nTEST_F(RdbTest, LoadHugeList) {\n  // Add 2 lists with 100k elements each (note must have more than 512*8Kb\n  // elements to test partial reads).\n  Run({\"debug\", \"populate\", \"2\", \"test\", \"100\", \"rand\", \"type\", \"list\", \"elements\", \"100000\"});\n  ASSERT_EQ(100000, CheckedInt({\"llen\", \"test:0\"}));\n  ASSERT_EQ(100000, CheckedInt({\"llen\", \"test:1\"}));\n\n  RespExpr resp = Run({\"save\", \"df\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  auto save_info = service_->server_family().GetLastSaveInfo();\n  resp = Run({\"dfly\", \"load\", save_info.file_name});\n  ASSERT_EQ(resp, \"OK\");\n\n  ASSERT_EQ(100000, CheckedInt({\"llen\", \"test:0\"}));\n  ASSERT_EQ(100000, CheckedInt({\"llen\", \"test:1\"}));\n  auto metrics = GetMetrics();\n  EXPECT_GT(metrics.db_stats[0].obj_memory_usage, 20'000'000u);\n}\n\n// Tests loading a huge stream, where the stream is loaded in multiple partial\n// reads.\nTEST_F(RdbTest, LoadHugeStream) {\n  TEST_current_time_ms = 1000;\n\n  // Add a huge stream (test:0) with 2000 entries, and 4 1k elements per entry\n  // (note must be more than 512*4kb elements to test partial reads).\n  // We add 2000 entries to the stream to ensure that the stream, because populate stream\n  // adds only a single entry at a time, with multiple elements in it.\n\n  Run({\"debug\", \"populate\", \"1\", \"test\", \"2000\", \"rand\", \"type\", \"stream\", \"elements\", \"8000\"});\n\n  ASSERT_EQ(2000, CheckedInt({\"xlen\", \"test:0\"}));\n  Run({\"XGROUP\", \"CREATE\", \"test:0\", \"grp1\", \"0\"});\n  Run({\"XGROUP\", \"CREATE\", \"test:0\", \"grp2\", \"0\"});\n  Run({\"XREADGROUP\", \"GROUP\", \"grp1\", \"Alice\", \"COUNT\", \"1\", \"STREAMS\", \"test:0\", \">\"});\n  Run({\"XREADGROUP\", \"GROUP\", \"grp2\", \"Alice\", \"COUNT\", \"1\", \"STREAMS\", \"test:0\", \">\"});\n\n  auto resp = Run({\"xinfo\", \"stream\", \"test:0\"});\n\n  EXPECT_THAT(\n      resp, RespElementsAre(\"length\", 2000, \"radix-tree-keys\", 2000, \"radix-tree-nodes\", 2010,\n                            \"last-generated-id\", \"1000-1999\", \"max-deleted-entry-id\", \"0-0\",\n                            \"entries-added\", 2000, \"recorded-first-entry-id\", \"1000-0\", \"groups\", 2,\n                            \"first-entry\", ArrLen(2), \"last-entry\", ArrLen(2)));\n\n  resp = Run({\"save\", \"df\"});\n  ASSERT_EQ(resp, \"OK\");\n\n  auto save_info = service_->server_family().GetLastSaveInfo();\n  resp = Run({\"dfly\", \"load\", save_info.file_name});\n  ASSERT_EQ(resp, \"OK\");\n\n  ASSERT_EQ(2000, CheckedInt({\"xlen\", \"test:0\"}));\n  resp = Run({\"xinfo\", \"stream\", \"test:0\"});\n  EXPECT_THAT(\n      resp, RespElementsAre(\"length\", 2000, \"radix-tree-keys\", 2000, \"radix-tree-nodes\", 2010,\n                            \"last-generated-id\", \"1000-1999\", \"max-deleted-entry-id\", \"0-0\",\n                            \"entries-added\", 2000, \"recorded-first-entry-id\", \"1000-0\", \"groups\", 2,\n                            \"first-entry\", ArrLen(2), \"last-entry\", ArrLen(2)));\n  resp = Run({\"xinfo\", \"groups\", \"test:0\"});\n  EXPECT_THAT(resp, RespElementsAre(RespElementsAre(\"name\", \"grp1\", \"consumers\", 1, \"pending\", 1,\n                                                    \"last-delivered-id\", \"1000-0\", \"entries-read\",\n                                                    1, \"lag\", 1999),\n                                    _));\n}\n\nTEST_F(RdbTest, LoadStream2) {\n  auto ec = LoadRdb(\"RDB_TYPE_STREAM_LISTPACKS_2.rdb\");\n  ASSERT_FALSE(ec) << ec.message();\n  auto res = Run({\"XINFO\", \"STREAM\", \"mystream\"});\n  ASSERT_THAT(res.GetVec(),\n              ElementsAre(\"length\", 2, \"radix-tree-keys\", 1, \"radix-tree-nodes\", 2,\n                          \"last-generated-id\", \"1732613360686-0\", \"max-deleted-entry-id\", \"0-0\",\n                          \"entries-added\", 2, \"recorded-first-entry-id\", \"1732613352350-0\",\n                          \"groups\", 1, \"first-entry\", RespElementsAre(\"1732613352350-0\", _),\n                          \"last-entry\", RespElementsAre(\"1732613360686-0\", _)));\n}\n\nTEST_F(RdbTest, LoadStream3) {\n  auto ec = LoadRdb(\"RDB_TYPE_STREAM_LISTPACKS_3.rdb\");\n  ASSERT_FALSE(ec) << ec.message();\n  auto res = Run({\"XINFO\", \"STREAM\", \"mystream\"});\n  ASSERT_THAT(\n      res.GetVec(),\n      ElementsAre(\"length\", 2, \"radix-tree-keys\", 1, \"radix-tree-nodes\", 2, \"last-generated-id\",\n                  \"1732614679549-0\", \"max-deleted-entry-id\", \"0-0\", \"entries-added\", 2,\n                  \"recorded-first-entry-id\", \"1732614676541-0\", \"groups\", 1, \"first-entry\",\n                  ArgType(RespExpr::ARRAY), \"last-entry\", ArgType(RespExpr::ARRAY)));\n}\n\nTEST_F(RdbTest, SnapshotTooBig) {\n  // Run({\"debug\", \"populate\", \"10000\", \"foo\", \"1000\"});\n  //  usleep(5000);  // let the stats to sync\n  max_memory_limit = 100000;\n  used_mem_current = 1000000;\n  auto resp = Run({\"debug\", \"reload\"});\n  ASSERT_THAT(resp, ErrArg(\"Out of memory\"));\n}\n\nTEST_F(RdbTest, HugeKeyIssue4497) {\n  SetTestFlag(\"cache_mode\", \"true\");\n  ResetService();\n\n  EXPECT_EQ(Run({\"flushall\"}), \"OK\");\n  EXPECT_EQ(Run({\"debug\", \"populate\", \"1\", \"k\", \"1000\", \"rand\", \"type\", \"set\", \"elements\", \"5000\"}),\n            \"OK\");\n  EXPECT_EQ(Run({\"save\", \"rdb\", \"hugekey.rdb\"}), \"OK\");\n  EXPECT_EQ(Run({\"dfly\", \"load\", \"hugekey.rdb\"}), \"OK\");\n  EXPECT_EQ(Run({\"flushall\"}), \"OK\");\n}\n\nTEST_F(RdbTest, HugeKeyIssue4554) {\n  SetTestFlag(\"cache_mode\", \"true\");\n  // We need to stress one flow/shard such that the others finish early. Lock on hashtags allows\n  // that.\n  SetTestFlag(\"lock_on_hashtags\", \"true\");\n  ResetService();\n\n  EXPECT_EQ(\n      Run({\"debug\", \"populate\", \"20\", \"{tmp}\", \"20\", \"rand\", \"type\", \"set\", \"elements\", \"10000\"}),\n      \"OK\");\n  EXPECT_EQ(Run({\"save\", \"df\", \"hugekey\"}), \"OK\");\n  EXPECT_EQ(Run({\"dfly\", \"load\", \"hugekey-summary.dfs\"}), \"OK\");\n  EXPECT_EQ(Run({\"flushall\"}), \"OK\");\n}\n\n// ignore_expiry.rdb contains 2 keys which are expired keys\n// this test case verifies wheather rdb_ignore_expiry flag is working as expected.\nTEST_F(RdbTest, RDBIgnoreExpiryFlag) {\n  absl::FlagSaver fs;\n\n  SetTestFlag(\"rdb_ignore_expiry\", \"true\");\n  auto ec = LoadRdb(\"ignore_expiry.rdb\");\n\n  ASSERT_FALSE(ec) << ec.message();\n\n  auto resp = Run({\"scan\", \"0\"});\n\n  ASSERT_THAT(resp, ArrLen(2));\n\n  EXPECT_THAT(StrArray(resp.GetVec()[1]), UnorderedElementsAre(\"test\", \"test2\"));\n\n  EXPECT_THAT(Run({\"get\", \"test\"}), \"expkey\");\n  EXPECT_THAT(Run({\"get\", \"test2\"}), \"expkey\");\n\n  int ttl = CheckedInt({\"ttl\", \"test\"});  // should ignore expiry for key\n  EXPECT_EQ(ttl, -1);\n\n  int ttl2 = CheckedInt({\"ttl\", \"test2\"});  // should ignore expiry for key\n  EXPECT_EQ(ttl2, -1);\n}\n\nTEST_F(RdbTest, CmsSerialization) {\n  Run(\"cms.initbydim cms 1000 5\");\n  Run(\"cms.incrby cms foo 5 bar 3 baz 9\");\n\n  auto resp = Run(\"cms.query cms foo bar baz\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(5), IntArg(3), IntArg(9))));\n\n  Run(\"save df cms\");\n  Run(\"flushall\");\n  EXPECT_EQ(Run(\"dfly load cms-summary.dfs\"), \"OK\");\n\n  resp = Run(\"cms.query cms foo bar baz\");\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(5), IntArg(3), IntArg(9))));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/replica.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"server/replica.h\"\n\n#include <chrono>\n\n#include \"absl/strings/match.h\"\n#include \"facade/service_interface.h\"\n#include \"server/engine_shard.h\"\n\nextern \"C\" {\n#include \"redis/rdb.h\"\n}\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/flags/flag.h>\n#include <absl/functional/bind_front.h>\n#include <absl/strings/escaping.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/strip.h>\n\n#include <boost/asio/ip/tcp.hpp>\n#include <memory>\n#include <utility>\n\n#include \"base/logging.h\"\n#include \"facade/redis_parser.h\"\n#include \"facade/reply_capture.h\"\n#include \"facade/socket_utils.h\"\n#include \"server/error.h\"\n#include \"server/journal/executor.h\"\n#include \"server/journal/journal.h\"\n#include \"server/journal/serializer.h\"\n#include \"server/main_service.h\"\n#include \"server/namespaces.h\"\n#include \"server/rdb_load.h\"\n#include \"strings/human_readable.h\"\n\n#define LOG_REPL_ERROR(msg)                                         \\\n  do {                                                              \\\n    if (state_mask_ & R_ENABLED) {                                  \\\n      if ((state_mask_ & R_SYNCING) || (state_mask_ & R_SYNC_OK)) { \\\n        LOG(WARNING) << msg;                                        \\\n      } else {                                                      \\\n        LOG(ERROR) << msg;                                          \\\n      }                                                             \\\n    } else {                                                        \\\n      VLOG(1) << msg;                                               \\\n    }                                                               \\\n  } while (0)\n\nABSL_FLAG(int, replication_acks_interval, 1000, \"Interval between acks in milliseconds.\");\nABSL_FLAG(int, master_connect_timeout_ms, 20000,\n          \"Timeout for establishing connection to a replication master\");\nABSL_FLAG(int, master_reconnect_timeout_ms, 1000,\n          \"Timeout for re-establishing connection to a replication master\");\nABSL_FLAG(bool, replica_partial_sync, true,\n          \"Use partial sync to reconnect when a replica connection is interrupted.\");\nABSL_FLAG(bool, break_replication_on_master_restart, false,\n          \"When in replica mode, and master restarts, break replication from master to avoid \"\n          \"flushing the replica's data.\");\nABSL_FLAG(std::string, replica_announce_ip, \"\",\n          \"IP address that Dragonfly announces to replication master\");\nABSL_DECLARE_FLAG(int32_t, port);\nABSL_DECLARE_FLAG(uint16_t, announce_port);\nABSL_FLAG(\n    int, replica_priority, 100,\n    \"Published by info command for sentinel to pick replica based on score during a failover\");\nABSL_FLAG(bool, experimental_replicaof_v2, true,\n          \"Use ReplicaOfV2 algorithm for initiating replication\");\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace util;\nusing namespace facade;\nusing absl::StrCat;\n\nnamespace {\n\nconstexpr unsigned kRdbEofMarkSize = 40;\n\n// Distribute flow indices over all available threads (shard_set pool size).\nvector<vector<unsigned>> Partition(unsigned num_flows) {\n  vector<vector<unsigned>> partition(shard_set->pool()->size());\n  for (unsigned i = 0; i < num_flows; ++i) {\n    partition[i % partition.size()].push_back(i);\n  }\n  return partition;\n}\n\n}  // namespace\n\nReplica::Replica(string host, uint16_t port, Service* se, std::string_view id,\n                 std::optional<cluster::SlotRange> slot_range)\n    : ProtocolClient(std::move(host), port), service_(*se), id_{id}, slot_range_(slot_range) {\n  proactor_ = ProactorBase::me();\n}\n\nReplica::~Replica() {\n  sync_fb_.JoinIfNeeded();\n  acks_fb_.JoinIfNeeded();\n}\n\nstatic const char kConnErr[] = \"could not connect to master: \";\n\nGenericError Replica::Start() {\n  VLOG(1) << \"Starting replication \" << this;\n  ProactorBase* mythread = ProactorBase::me();\n  CHECK(mythread);\n  DCHECK(proactor_ == mythread);\n\n  auto check_connection_error = [this](error_code ec, const char* msg) -> GenericError {\n    if (!exec_st_.IsRunning()) {\n      CloseSocket();\n      return {\"replication cancelled\"};\n    }\n    if (ec) {\n      CloseSocket();\n      exec_st_.ReportCancelError();\n      return {absl::StrCat(msg, ec.message())};\n    }\n    return ec;\n  };\n\n  // 0. Set basic error handler that is reponsible for cleaning up on errors.\n  // Can return an error only if replication was cancelled immediately.\n  auto err = exec_st_.SwitchErrorHandler([this](const auto& ge) { this->DefaultErrorHandler(ge); });\n  RETURN_ON_GENERIC_ERR(check_connection_error(err, \"replication cancelled\"));\n\n  // 1. Resolve dns.\n  VLOG(1) << \"Resolving master DNS\";\n  error_code ec = ResolveHostDns();\n  RETURN_ON_GENERIC_ERR(check_connection_error(ec, \"could not resolve master dns\"));\n\n  // 2. Connect socket.\n  VLOG(1) << \"Connecting to master\";\n  ec = ConnectAndAuth(absl::GetFlag(FLAGS_master_connect_timeout_ms) * 1ms, &exec_st_);\n  RETURN_ON_GENERIC_ERR(check_connection_error(ec, kConnErr));\n\n  // 3. Greet.\n  VLOG(1) << \"Greeting\";\n  state_mask_ = R_ENABLED | R_TCP_CONNECTED;\n  ec = Greet();\n  RETURN_ON_ERR(check_connection_error(ec, \"could not greet master \"));\n\n  return {};\n}\n\nvoid Replica::StartMainReplicationFiber(std::optional<LastMasterSyncData> last_master_sync_data) {\n  sync_fb_ = fb2::Fiber(\"main_replication\", &Replica::MainReplicationFb, this,\n                        std::move(last_master_sync_data));\n}\n\nvoid Replica::EnableReplication() {\n  VLOG(1) << \"Enabling replication\";\n\n  state_mask_ = R_ENABLED;                                           // set replica state to enabled\n  sync_fb_ = MakeFiber(&Replica::MainReplicationFb, this, nullopt);  // call replication fiber\n}\n\nstd::optional<Replica::LastMasterSyncData> Replica::Stop() {\n  VLOG(1) << \"Stopping replication \" << this;\n  // Stops the loop in MainReplicationFb.\n\n  proactor_->Await([this] {\n    state_mask_ = 0;               // Specifically ~R_ENABLED.\n    exec_st_.ReportCancelError();  // Context is fully resposible for cleanup.\n  });\n\n  // Make sure the replica fully stopped and did all cleanup,\n  // so we can freely release resources (connections).\n  sync_fb_.JoinIfNeeded();\n  CloseSocket();\n  DVLOG(1) << \"MainReplicationFb stopped \" << this;\n  acks_fb_.JoinIfNeeded();\n  for (auto& flow : shard_flows_) {\n    flow.reset();\n  }\n\n  if (last_journal_LSNs_.has_value()) {\n    return LastMasterSyncData{master_context_.master_repl_id, last_journal_LSNs_.value()};\n  }\n  return nullopt;\n}\n\nvoid Replica::Pause(bool pause) {\n  VLOG(1) << \"Pausing replication\";\n  Proactor()->Await([&] {\n    is_paused_ = pause;\n    if (shard_flows_.empty())\n      return;\n\n    auto cb = [&](unsigned index, auto*) {\n      for (auto id : thread_flow_map_[index]) {\n        shard_flows_[id]->Pause(pause);\n      }\n    };\n    shard_set->pool()->AwaitBrief(cb);\n  });\n}\n\nstd::error_code Replica::TakeOver(unsigned timeout_sec, bool save_flag) {\n  VLOG(1) << \"Taking over \" << timeout_sec << \" seconds, save_flag=\" << save_flag;\n\n  std::error_code ec;\n  auto takeOverCmd = absl::StrCat(\"TAKEOVER \", timeout_sec, (save_flag ? \" SAVE\" : \"\"));\n  Proactor()->Await([this, &ec, cmd = std::move(takeOverCmd), timeout_sec] {\n    // Set socket timeout to prevent hanging on unresponsive master\n    // Add buffer time for master processing (timeout + 10 seconds)\n    auto prev_timeout = Sock()->timeout();\n    Sock()->set_timeout((timeout_sec + 10) * 1000);  // milliseconds\n\n    ec = SendNextPhaseRequest(cmd);\n\n    Sock()->set_timeout(prev_timeout);\n  });\n\n  // If we successfully taken over, return and let server_family stop the replication.\n  return ec;\n}\n\nvoid Replica::MainReplicationFb(std::optional<LastMasterSyncData> last_master_sync_data) {\n  VLOG(1) << \"Main replication fiber started \" << this;\n  // Switch shard states to replication.\n  SetShardStates(true);\n\n  error_code ec;\n  while (state_mask_ & R_ENABLED) {\n    // Discard all previous errors and set default error handler.\n    exec_st_.Reset([this](const GenericError& ge) { this->DefaultErrorHandler(ge); });\n    // 1. Connect socket.\n    if ((state_mask_ & R_TCP_CONNECTED) == 0) {\n      ThisFiber::SleepFor(500ms);\n      if (is_paused_)\n        continue;\n\n      ec = ResolveHostDns();\n      if (ec) {\n        LOG(ERROR) << \"Error resolving dns to \" << server().host << \" (phase: \" << GetCurrentPhase()\n                   << \"): \" << ec;\n        continue;\n      }\n\n      // Give a lower timeout for connect, because we're\n      reconnect_count_++;\n      ec = ConnectAndAuth(absl::GetFlag(FLAGS_master_reconnect_timeout_ms) * 1ms, &exec_st_);\n      if (ec) {\n        LOG(WARNING) << \"Error connecting to \" << server().Description()\n                     << \" (phase: \" << GetCurrentPhase() << \"): \" << ec\n                     << \", reason: \" << ec.message();\n        continue;\n      }\n      VLOG(1) << \"Replica socket connected\";\n      state_mask_ |= R_TCP_CONNECTED;\n      continue;\n    }\n\n    DCHECK(Proactor() == proactor_);\n\n    // 2. Greet.\n    if ((state_mask_ & R_GREETED) == 0) {\n      ec = Greet();\n      if (ec) {\n        LOG(WARNING) << \"Error greeting \" << server().Description()\n                     << \" (phase: \" << GetCurrentPhase() << \"): \" << ec << \" \" << ec.message()\n                     << \", socket state: \" + GetSocketInfo(Sock()->native_handle());\n        state_mask_ &= R_ENABLED;\n        continue;\n      }\n      state_mask_ |= R_GREETED;\n      continue;\n    }\n\n    // 3. Initiate full sync\n    if ((state_mask_ & R_SYNC_OK) == 0) {\n      if (HasDflyMaster()) {\n        ec = InitiateDflySync(std::exchange(last_master_sync_data, nullopt));\n      } else\n        ec = InitiatePSync();\n\n      if (ec) {\n        LOG(WARNING) << \"Error syncing with \" << server().Description()\n                     << \" (phase: \" << GetCurrentPhase() << \"): \" << ec << \" \" << ec.message()\n                     << \", socket state: \" + GetSocketInfo(Sock()->native_handle());\n        state_mask_ &= R_ENABLED;  // reset all flags besides R_ENABLED\n        continue;\n      }\n      state_mask_ |= R_SYNC_OK;\n      continue;\n    }\n\n    // 4. Start stable state sync.\n    DCHECK(state_mask_ & R_SYNC_OK);\n\n    if (HasDflyMaster())\n      ec = ConsumeDflyStream();\n    else\n      ec = ConsumeRedisStream();\n\n    state_mask_ &= R_ENABLED;\n    if (state_mask_ & R_ENABLED) {  // replication was not stopped.\n      LOG(WARNING) << \"Error stable sync with \" << server().Description()\n                   << \" (phase: \" << GetCurrentPhase() << \"): \" << ec << \" \" << ec.message()\n                   << \", socket state: \" + GetSocketInfo(Sock()->native_handle());\n    }\n  }\n\n  // Wait for unblocking cleanup to finish.\n  exec_st_.JoinErrorHandler();\n\n  // Revert shard states to normal state.\n  SetShardStates(false);\n\n  VLOG(1) << \"Main replication fiber finished\";\n}\n\nerror_code Replica::Greet() {\n  ResetParser(RedisParser::Mode::CLIENT);\n  VLOG(1) << \"greeting message handling\";\n  // Corresponds to server.repl_state == REPL_STATE_CONNECTING state in redis\n  RETURN_ON_ERR(SendCommandAndReadResponse(\"PING\"));  // optional.\n  PC_RETURN_ON_BAD_RESPONSE(CheckRespIsSimpleReply(\"PONG\"));\n\n  // Corresponds to server.repl_state == REPL_STATE_SEND_HANDSHAKE condition in replication.c\n  uint16_t port = absl::GetFlag(FLAGS_announce_port);\n  if (port == 0) {\n    port = static_cast<uint16_t>(absl::GetFlag(FLAGS_port));\n  }\n  RETURN_ON_ERR(SendCommandAndReadResponse(StrCat(\"REPLCONF listening-port \", port)));\n  PC_RETURN_ON_BAD_RESPONSE(CheckRespIsSimpleReply(\"OK\"));\n\n  auto announce_ip = absl::GetFlag(FLAGS_replica_announce_ip);\n  if (!announce_ip.empty()) {\n    RETURN_ON_ERR(SendCommandAndReadResponse(StrCat(\"REPLCONF ip-address \", announce_ip)));\n    LOG_IF(WARNING, !CheckRespIsSimpleReply(\"OK\"))\n        << \"Master did not OK announced IP address, perhaps it is using an old version\";\n  }\n\n  // Corresponds to server.repl_state == REPL_STATE_SEND_CAPA\n  RETURN_ON_ERR(SendCommandAndReadResponse(\"REPLCONF capa eof capa psync2\"));\n  PC_RETURN_ON_BAD_RESPONSE(CheckRespIsSimpleReply(\"OK\"));\n\n  // Announce that we are the dragonfly client.\n  // Note that we currently do not support dragonfly->redis replication.\n  RETURN_ON_ERR(SendCommandAndReadResponse(\"REPLCONF capa dragonfly\"));\n  PC_RETURN_ON_BAD_RESPONSE(CheckRespFirstTypes({RespExpr::STRING}));\n\n  if (LastResponseArgs().size() == 1) {  // Redis\n    PC_RETURN_ON_BAD_RESPONSE(CheckRespIsSimpleReply(\"OK\"));\n  } else if (LastResponseArgs().size() >= 3) {  // it's dragonfly master.\n    PC_RETURN_ON_BAD_RESPONSE(!HandleCapaDflyResp());\n    if (auto ec = ConfigureDflyMaster(); ec)\n      return ec;\n  } else {\n    PC_RETURN_ON_BAD_RESPONSE(false);\n  }\n\n  state_mask_ |= R_GREETED;\n  return error_code{};\n}\n\nstd::error_code Replica::HandleCapaDflyResp() {\n  // Response is: <master_repl_id, syncid, num_shards [, version]>\n  if (!CheckRespFirstTypes({RespExpr::STRING, RespExpr::STRING, RespExpr::INT64}) ||\n      LastResponseArgs()[0].GetBuf().size() != CONFIG_RUN_ID_SIZE)\n    return make_error_code(errc::bad_message);\n\n  int64 param_num_flows = get<int64_t>(LastResponseArgs()[2].u);\n  if (param_num_flows <= 0 || param_num_flows > 1024) {\n    // sanity check, we support upto 1024 shards.\n    // It's not that we can not support more but it's probably highly unlikely that someone\n    // will run dragonfly with more than 1024 cores.\n    LOG(ERROR) << \"Invalid flow count \" << param_num_flows;\n    return make_error_code(errc::bad_message);\n  }\n\n  DCHECK(proactor_ == Proactor());\n\n  // If we're syncing a different replication ID, drop the saved LSNs.\n  string_view master_repl_id = ToSV(LastResponseArgs()[0].GetBuf());\n\n  // If we tried to replicate from ourself return an error\n  if (master_repl_id == id_) {\n    LOG(WARNING) << \"Can't connect to myself\";\n    return make_error_code(errc::connection_aborted);\n  }\n\n  if (master_context_.master_repl_id != master_repl_id) {\n    if (absl::GetFlag(FLAGS_break_replication_on_master_restart) &&\n        !master_context_.master_repl_id.empty()) {\n      LOG(ERROR) << \"Encountered different master repl id (\" << master_repl_id << \" vs \"\n                 << master_context_.master_repl_id << \")\";\n      state_mask_ = 0;\n      return make_error_code(errc::connection_aborted);\n    }\n    last_journal_LSNs_.reset();\n  }\n  master_context_.master_repl_id = master_repl_id;\n  master_context_.dfly_session_id = ToSV(LastResponseArgs()[1].GetBuf());\n  master_context_.num_flows = param_num_flows;\n\n  if (LastResponseArgs().size() >= 4) {\n    PC_RETURN_ON_BAD_RESPONSE(LastResponseArgs()[3].type == RespExpr::INT64);\n    master_context_.version = DflyVersion(get<int64_t>(LastResponseArgs()[3].u));\n  }\n  VLOG(1) << \"Master id: \" << master_context_.master_repl_id\n          << \", sync id: \" << master_context_.dfly_session_id\n          << \", num journals: \" << param_num_flows\n          << \", version: \" << unsigned(master_context_.version);\n\n  return error_code{};\n}\n\nstd::error_code Replica::ConfigureDflyMaster() {\n  // We need to send this because we may require to use this for cluster commands.\n  // this reason to send this here is that in other context we can get an error reply\n  // since we are budy with the replication\n  RETURN_ON_ERR(SendCommandAndReadResponse(StrCat(\"REPLCONF CLIENT-ID \", id_)));\n  if (!CheckRespIsSimpleReply(\"OK\")) {\n    LOG(WARNING) << \"Bad REPLCONF CLIENT-ID response\";\n  }\n\n  RETURN_ON_ERR(\n      SendCommandAndReadResponse(StrCat(\"REPLCONF CLIENT-VERSION \", DflyVersion::CURRENT_VER)));\n  PC_RETURN_ON_BAD_RESPONSE(CheckRespIsSimpleReply(\"OK\"));\n\n  return error_code{};\n}\n\nerror_code Replica::InitiatePSync() {\n  base::IoBuf io_buf{128};\n\n  // Corresponds to server.repl_state == REPL_STATE_SEND_PSYNC\n  string id(\"?\");  // corresponds to null master id and null offset\n  int64_t offs = -1;\n  if (!master_context_.master_repl_id.empty()) {  // in case we synced before\n    id = master_context_.master_repl_id;          // provide the replication offset and master id\n    // TBD: for incremental sync send repl_offs_, not supported yet.\n    // offs = repl_offs_;\n  }\n\n  RETURN_ON_ERR(SendCommand(StrCat(\"PSYNC \", id, \" \", offs)));\n\n  // Master may delay sync response with \"repl_diskless_sync_delay\"\n  PSyncResponse repl_header;\n\n  RETURN_ON_ERR(ParseReplicationHeader(&io_buf, &repl_header));\n\n  string* token = absl::get_if<string>(&repl_header.fullsync);\n  size_t snapshot_size = SIZE_MAX;\n  if (!token) {\n    snapshot_size = absl::get<size_t>(repl_header.fullsync);\n  }\n  TouchIoTime();\n\n  // we get token for diskless redis replication. For disk based replication\n  // we get the snapshot size.\n  if (snapshot_size || token != nullptr) {\n    LOG(INFO) << \"Starting full sync with Redis master\";\n\n    state_mask_ |= R_SYNCING;\n\n    io::PrefixSource ps{io_buf.InputBuffer(), Sock()};\n\n    // Set LOADING state.\n    if (!service_.RequestLoadingState()) {\n      return exec_st_.ReportError(std::make_error_code(errc::state_not_recoverable),\n                                  \"Failed to enter LOADING state\");\n    }\n\n    absl::Cleanup cleanup = [this]() { service_.RemoveLoadingState(); };\n\n    if (slot_range_.has_value()) {\n      JournalExecutor{&service_}.FlushSlots(slot_range_.value());\n    } else {\n      JournalExecutor{&service_}.FlushAll();\n    }\n\n    RdbLoadContext load_context;\n    RdbLoader loader(NULL, &load_context);\n    loader.SetLoadUnownedSlots(true);\n    loader.set_source_limit(snapshot_size);\n    // TODO: to allow registering callbacks within loader to send '\\n' pings back to master.\n    // Also to allow updating last_io_time_.\n    error_code ec = loader.Load(&ps);\n    RETURN_ON_ERR(ec);\n    VLOG(1) << \"full sync completed\";\n\n    if (token) {\n      uint8_t buf[kRdbEofMarkSize];\n      io::PrefixSource chained(loader.Leftover(), &ps);\n      VLOG(1) << \"Before reading from chained stream\";\n      io::Result<size_t> eof_res = chained.Read(io::MutableBytes{buf});\n      CHECK(eof_res && *eof_res == kRdbEofMarkSize);\n\n      VLOG(1) << \"Comparing token \" << ToSV(buf);\n\n      // TODO: handle gracefully...\n      CHECK_EQ(0, memcmp(token->data(), buf, kRdbEofMarkSize));\n      CHECK(chained.UnusedPrefix().empty());\n    } else {\n      CHECK_EQ(0u, loader.Leftover().size());\n      CHECK_EQ(snapshot_size, loader.bytes_read());\n    }\n\n    CHECK(ps.UnusedPrefix().empty());\n    io_buf.ConsumeInput(io_buf.InputLen());\n    TouchIoTime();\n  } else {\n    LOG(INFO) << \"Re-established sync with Redis master with ID=\" << id;\n  }\n\n  state_mask_ &= ~R_SYNCING;\n  state_mask_ |= R_SYNC_OK;\n\n  // There is a data race condition in Redis-master code, where \"ACK 0\" handler may be\n  // triggered before Redis is ready to transition to the streaming state and it silenty ignores\n  // \"ACK 0\". We reduce the chance it happens with this delay.\n  ThisFiber::SleepFor(50ms);\n\n  return error_code{};\n}\n\n// Initialize and start sub-replica for each flow.\nerror_code Replica::InitiateDflySync(std::optional<LastMasterSyncData> last_master_sync_data) {\n  auto start_time = absl::Now();\n\n  // Initialize MultiShardExecution.\n  multi_shard_exe_.reset(new MultiShardExecution());\n\n  auto load_context = std::make_shared<RdbLoadContext>();\n\n  // Initialize shard flows.\n  shard_flows_.resize(master_context_.num_flows);\n  DCHECK(!shard_flows_.empty());\n  for (unsigned i = 0; i < shard_flows_.size(); ++i) {\n    // Transfer LSN state for partial sync\n    uint64_t partial_sync_lsn = 0;\n    if (shard_flows_[i]) {\n      partial_sync_lsn = shard_flows_[i]->JournalExecutedCount();\n    }\n    shard_flows_[i].reset(new DflyShardReplica(server(), master_context_, i, &service_,\n                                               multi_shard_exe_, load_context.get()));\n    if (partial_sync_lsn > 0) {\n      shard_flows_[i]->SetRecordsExecuted(partial_sync_lsn);\n    }\n  }\n  thread_flow_map_ = Partition(shard_flows_.size());\n\n  // Blocked on until all flows got full sync cut.\n  BlockingCounter sync_block{unsigned(shard_flows_.size())};\n\n  // Switch to new error handler that closes flow sockets.\n  auto err_handler = [this, sync_block](const auto& ge) mutable {\n    // Unblock this function.\n    sync_block->Cancel();\n\n    // Make sure the flows are not in a state transition\n    lock_guard lk{flows_op_mu_};\n\n    // Unblock all sockets.\n    DefaultErrorHandler(ge);\n    for (auto& flow : shard_flows_)\n      flow->Cancel();\n  };\n\n  RETURN_ON_ERR(exec_st_.SwitchErrorHandler(std::move(err_handler)));\n\n  // Start full sync flows.\n  state_mask_ |= R_SYNCING;\n\n  std::string_view sync_type;\n  absl::Cleanup cleanup = [this, &sync_type]() {\n    // We do the following operations regardless of outcome.\n    JoinDflyFlows();\n    if (sync_type == \"full\") {\n      service_.RemoveLoadingState();\n    }\n    state_mask_ &= ~R_SYNCING;\n    last_journal_LSNs_.reset();\n  };\n\n  {\n    unsigned num_df_flows = shard_flows_.size();\n    if (last_master_sync_data && num_df_flows != last_master_sync_data->last_journal_LSNs.size()) {\n      LOG(WARNING) << \"last master has different flow size: \"\n                   << last_master_sync_data->last_journal_LSNs.size()\n                   << \" than current: \" << num_df_flows;\n      last_master_sync_data = std::nullopt;\n    }\n\n    // Going out of the way to avoid using std::vector<bool>...\n    auto is_full_sync = std::make_unique<bool[]>(num_df_flows);\n    // The elements of this bool array are not always initialized but we call std::accumulate below\n    // unconditionally. For some cases this will accumulate whatever junk that uninitialized memory\n    // cell contain. Do not remove the memset below.\n    std::memset(is_full_sync.get(), 0, num_df_flows);\n    DCHECK(!last_journal_LSNs_ || last_journal_LSNs_->size() == num_df_flows);\n    auto shard_cb = [&](unsigned index, auto*) {\n      for (auto id : thread_flow_map_[index]) {\n        auto ec = shard_flows_[id]->StartSyncFlow(sync_block, &exec_st_,\n                                                  last_journal_LSNs_.has_value()\n                                                      ? std::optional((*last_journal_LSNs_)[id])\n                                                      : std::nullopt,\n                                                  last_master_sync_data);\n        if (ec.has_value())\n          is_full_sync[id] = ec.value();\n        else\n          exec_st_.ReportError(ec.error());\n      }\n    };\n\n    if (last_journal_LSNs_) {\n      ++psync_attempts_;\n    }\n\n    // Lock to prevent the error handler from running instantly\n    // while the flows are in a mixed state.\n    lock_guard lk{flows_op_mu_};\n\n    shard_set->pool()->AwaitFiberOnAll(std::move(shard_cb));\n    if (last_journal_LSNs_) {\n      ++psync_attempts_;\n    }\n\n    last_journal_LSNs_.reset();\n    size_t num_full_flows =\n        std::accumulate(is_full_sync.get(), is_full_sync.get() + num_df_flows, 0);\n\n    if (num_full_flows == num_df_flows) {\n      // Make sure we're in LOADING state.\n      if (!service_.RequestLoadingState()) {\n        return exec_st_.ReportError(std::make_error_code(errc::state_not_recoverable),\n                                    \"Failed to enter LOADING state\");\n      }\n      sync_type = \"full\";\n\n      DVLOG(1) << \"Calling Flush on all slots \" << this;\n\n      passed_full_sync_ = false;\n      if (slot_range_.has_value()) {\n        JournalExecutor{&service_}.FlushSlots(slot_range_.value());\n      } else {\n        JournalExecutor{&service_}.FlushAll();\n      }\n      DVLOG(1) << \"Flush on all slots ended \" << this;\n    } else if (num_full_flows == 0) {\n      sync_type = \"partial\";\n    } else {\n      exec_st_.ReportError(std::make_error_code(errc::state_not_recoverable),\n                           \"Won't do a partial sync: some flows must fully resync\");\n    }\n  }\n\n  RETURN_ON_ERR(exec_st_.GetError());\n\n  LOG(INFO) << \"Started \" << sync_type << \" sync with \" << server().Description();\n\n  // We skip full sync if we can do partial\n  if (sync_type != \"partial\") {\n    // Send DFLY SYNC.\n    if (auto ec = SendNextPhaseRequest(\"SYNC\"); ec) {\n      return exec_st_.ReportError(ec);\n    }\n\n    // Wait for all flows to receive full sync cut.\n    // In case of an error, this is unblocked by the error handler.\n    VLOG(1) << \"Waiting for all full sync cut confirmations\";\n    sync_block->Wait();\n\n    // Check if we woke up due to cancellation.\n    if (!exec_st_.IsRunning()) {\n      load_context->PerformPostLoad(&service_, true);\n      return exec_st_.GetError();\n    }\n\n    load_context->PerformPostLoad(&service_);\n  }\n\n  passed_full_sync_ = true;\n\n  // Send DFLY STARTSTABLE.\n  if (auto ec = SendNextPhaseRequest(\"STARTSTABLE\"); ec) {\n    return exec_st_.ReportError(ec);\n  }\n\n  if (sync_type == \"partial\") {\n    ++psync_successes_;\n  }\n\n  // Joining flows and resetting state is done by cleanup.\n  double seconds = double(absl::ToInt64Milliseconds(absl::Now() - start_time)) / 1000;\n  LOG(INFO) << sync_type << \" sync finished in \" << strings::HumanReadableElapsedTime(seconds);\n\n  return exec_st_.GetError();\n}\n\nerror_code Replica::ConsumeRedisStream() {\n  base::IoBuf io_buf(16_KB);\n  ConnectionContext conn_context{nullptr, {}};\n  conn_context.is_replicating = true;\n  conn_context.journal_emulated = true;\n  conn_context.skip_acl_validation = true;\n  conn_context.ns = &namespaces->GetDefaultNamespace();\n\n  // we never reply back on the commands.\n  facade::CapturingReplyBuilder null_builder{facade::ReplyMode::NONE};\n  ResetParser(RedisParser::Mode::SERVER);\n\n  // Master waits for this command in order to start sending replication stream.\n  RETURN_ON_ERR(SendCommand(\"REPLCONF ACK 0\"));\n\n  VLOG(1) << \"Before reading repl-log\";\n\n  // Redis sends either pings every \"repl_ping_slave_period\" time inside replicationCron().\n  // or, alternatively, write commands stream coming from propagate() function.\n  // Replica connection must send \"REPLCONF ACK xxx\" in order to make sure that master replication\n  // buffer gets disposed of already processed commands, this is done in a separate fiber.\n  error_code ec;\n  LOG(INFO) << \"Transitioned into stable sync\";\n\n  // Set new error handler.\n  auto err_handler = [this](const auto& ge) {\n    // Trigger ack-fiber\n    replica_waker_.notifyAll();\n    DefaultErrorHandler(ge);\n  };\n  RETURN_ON_ERR(exec_st_.SwitchErrorHandler(std::move(err_handler)));\n\n  acks_fb_ = fb2::Fiber(\"redis_acks\", &Replica::RedisStreamAcksFb, this);\n\n  CommandContext cmnd_ctx;\n  cmnd_ctx.Init(&null_builder, &conn_context);\n  while (true) {\n    // Yield if the fiber has been running for long.\n    if (base::CycleClock::ToUsec(ThisFiber::GetRunningTimeCycles()) > 1000) {  // 1ms\n      ThisFiber::Yield();\n    }\n\n    // If the acks-fb or something else triggered a shutdown, then do not attempt to read from the\n    // stream.\n    if (!exec_st_.IsRunning()) {\n      DCHECK(exec_st_.IsError());\n      LOG_REPL_ERROR(\"Stopping stream consumer in phase \"\n                     << GetCurrentPhase()\n                     << \" because of external error: \" << exec_st_.GetError().Format());\n      acks_fb_.JoinIfNeeded();\n      return exec_st_.GetError();\n    }\n\n    auto response = ReadRespReply(&io_buf, /*copy_msg=*/false);\n    if (!response.has_value()) {\n      LOG_REPL_ERROR(\"Error in Redis Stream at phase \"\n                     << GetCurrentPhase() << \" with \" << server().Description()\n                     << \", error: \" << response.error()\n                     << \", socket state: \" + GetSocketInfo(Sock()->native_handle()));\n      exec_st_.ReportError(response.error());\n      acks_fb_.JoinIfNeeded();\n      return response.error();\n    }\n\n    const auto& last_args = LastResponseArgs();\n    if (!last_args.empty()) {\n      string cmd = absl::CHexEscape(last_args[0].GetView());\n\n      // Valkey and Redis may send MULTI and EXEC as part of their replication commands.\n      // Dragonfly disallows some commands, such as SELECT, inside of MULTI/EXEC, so here we simply\n      // ignore MULTI/EXEC and execute their inner commands individually.\n      if (!absl::EqualsIgnoreCase(cmd, \"MULTI\") && !absl::EqualsIgnoreCase(cmd, \"EXEC\")) {\n        VLOG(2) << \"Got command \" << cmd << \"\\n consumed: \" << response->total_read;\n\n        if (LastResponseArgs()[0].GetBuf()[0] == '\\r') {\n          for (const auto& arg : LastResponseArgs()) {\n            LOG(INFO) << absl::CHexEscape(ToSV(arg.GetBuf()));\n          }\n        }\n\n        FillBackedArgs(last_args, &cmnd_ctx);\n        service_.DispatchCommand(facade::ParsedArgs{cmnd_ctx}, &cmnd_ctx,\n                                 facade::AsyncPreference::ONLY_SYNC);\n      }\n    }\n\n    io_buf.ConsumeInput(response->left_in_buffer);\n    repl_offs_ += response->total_read;\n    replica_waker_.notify();  // Notify to trigger ACKs.\n  }\n}\n\nerror_code Replica::ConsumeDflyStream() {\n  // Set new error handler that closes flow sockets.\n  auto err_handler = [this](const auto& ge) {\n    // Make sure the flows are not in a state transition\n    lock_guard lk{flows_op_mu_};\n\n    LOG_REPL_ERROR(\"Replication error in phase \"\n                   << GetCurrentPhase() << \" with \" << server().Description() << \", error: \"\n                   << ge.Format() << \", socket state: \" + GetSocketInfo(Sock()->native_handle()));\n\n    DefaultErrorHandler(ge);\n    for (auto& flow : shard_flows_) {\n      flow->Cancel();\n    }\n    multi_shard_exe_->CancelAllBlockingEntities();\n  };\n  RETURN_ON_ERR(exec_st_.SwitchErrorHandler(std::move(err_handler)));\n\n  LOG(INFO) << \"Transitioned into stable sync\";\n  // Transition flows into stable sync.\n  {\n    auto shard_cb = [&](unsigned index, auto*) {\n      const auto& local_ids = thread_flow_map_[index];\n\n      for (unsigned id : local_ids) {\n        auto ec = shard_flows_[id]->StartStableSyncFlow(&exec_st_);\n        if (ec)\n          exec_st_.ReportError(ec);\n      }\n    };\n\n    // Lock to prevent error handler from running on mixed state.\n    lock_guard lk{flows_op_mu_};\n    shard_set->pool()->AwaitFiberOnAll(std::move(shard_cb));\n  }\n\n  JoinDflyFlows();\n\n  last_journal_LSNs_.emplace();\n  for (auto& flow : shard_flows_) {\n    last_journal_LSNs_->push_back(flow->JournalExecutedCount());\n  }\n\n  LOG(INFO) << \"Exit stable sync\";\n  // The only option to unblock is to cancel the context.\n  CHECK(exec_st_.GetError());\n\n  return exec_st_.GetError();\n}\n\nvoid Replica::JoinDflyFlows() {\n  for (auto& flow : shard_flows_) {\n    flow->JoinFlow();\n  }\n}\n\nvoid Replica::SetShardStates(bool replica) {\n  shard_set->RunBriefInParallel([replica](EngineShard* shard) { shard->SetReplica(replica); });\n}\n\nerror_code Replica::SendNextPhaseRequest(string_view kind) {\n  // Ask master to start sending replication stream\n  string request = StrCat(\"DFLY \", kind, \" \", master_context_.dfly_session_id);\n\n  VLOG(1) << \"Sending: \" << request;\n  RETURN_ON_ERR(SendCommandAndReadResponse(request));\n\n  PC_RETURN_ON_BAD_RESPONSE(CheckRespIsSimpleReply(\"OK\"));\n\n  return std::error_code{};\n}\n\nio::Result<bool> DflyShardReplica::StartSyncFlow(\n    BlockingCounter sb, ExecutionState* cntx, std::optional<LSN> lsn,\n    std::optional<Replica::LastMasterSyncData> last_master_data) {\n  using nonstd::make_unexpected;\n  DCHECK(!master_context_.master_repl_id.empty() && !master_context_.dfly_session_id.empty());\n  proactor_index_ = ProactorBase::me()->GetPoolIndex();\n\n  RETURN_ON_ERR_T(make_unexpected,\n                  ConnectAndAuth(absl::GetFlag(FLAGS_master_connect_timeout_ms) * 1ms, &exec_st_));\n\n  VLOG(1) << \"Sending on flow \" << master_context_.master_repl_id << \" \"\n          << master_context_.dfly_session_id << \" \" << flow_id_ << \" lsn: \" << lsn.value_or(-1);\n\n  // DFLY FLOW <master_id> <session_id> <flow_id> [lsn] [last_master_id lsn-vec]\n  std::string cmd = StrCat(\"DFLY FLOW \", master_context_.master_repl_id, \" \",\n                           master_context_.dfly_session_id, \" \", flow_id_);\n  // Try to negotiate a partial sync if possible.\n  if (lsn.has_value() && master_context_.version > DflyVersion::VER1 &&\n      absl::GetFlag(FLAGS_replica_partial_sync)) {\n    absl::StrAppend(&cmd, \" \", *lsn);\n  }\n  if (last_master_data && master_context_.version >= DflyVersion::VER5 &&\n      absl::GetFlag(FLAGS_replica_partial_sync)) {\n    string lsn_str = absl::StrJoin(last_master_data.value().last_journal_LSNs, \"-\");\n    absl::StrAppend(&cmd, \" \", last_master_data.value().id, \" \", lsn_str);\n    VLOG(1) << \"Sending last master sync flow \" << last_master_data.value().id << \" \" << lsn_str;\n  }\n\n  ResetParser(RedisParser::Mode::CLIENT);\n  leftover_buf_.emplace(128);\n  RETURN_ON_ERR_T(make_unexpected, SendCommand(cmd));\n  auto read_resp = ReadRespReply(&*leftover_buf_);\n  if (!read_resp.has_value()) {\n    return make_unexpected(read_resp.error());\n  }\n\n  PC_RETURN_ON_BAD_RESPONSE_T(make_unexpected,\n                              CheckRespFirstTypes({RespExpr::STRING, RespExpr::STRING}));\n\n  string_view flow_directive = ToSV(LastResponseArgs()[0].GetBuf());\n\n  string eof_token;\n  PC_RETURN_ON_BAD_RESPONSE_T(make_unexpected,\n                              flow_directive == \"FULL\" || flow_directive == \"PARTIAL\");\n  bool is_full_sync = flow_directive == \"FULL\";\n\n  eof_token = ToSV(LastResponseArgs()[1].GetBuf());\n\n  leftover_buf_->ConsumeInput(read_resp->left_in_buffer);\n\n  // Skip full sync if we are doing partial. Clean up will take care mixed state, e.g,\n  // some flows receive partial while others receive full.\n  if (is_full_sync) {\n    // We can not discard io_buf because it may contain data\n    // besides the response we parsed. Therefore we pass it further to ReplicateDFFb.\n    sync_fb_ = fb2::Fiber(\"shard_full_sync\", &DflyShardReplica::FullSyncDflyFb, this,\n                          std::move(eof_token), sb, cntx);\n  } else if (last_master_data) {\n    // Only needed when we are rotating masters.\n    SetRecordsExecuted(last_master_data->last_journal_LSNs[flow_id_]);\n  }\n\n  return is_full_sync;\n}\n\nerror_code DflyShardReplica::StartStableSyncFlow(ExecutionState* cntx) {\n  DCHECK(!master_context_.master_repl_id.empty() && !master_context_.dfly_session_id.empty());\n  ProactorBase* mythread = ProactorBase::me();\n  CHECK(mythread);\n\n  if (!Sock()->IsOpen()) {\n    return std::make_error_code(errc::io_error);\n  }\n  rdb_loader_.reset();  // we do not need it anymore.\n  sync_fb_ =\n      fb2::Fiber(\"shard_stable_sync_read\", &DflyShardReplica::StableSyncDflyReadFb, this, cntx);\n\n  return std::error_code{};\n}\n\nvoid DflyShardReplica::FullSyncDflyFb(std::string eof_token, BlockingCounter bc,\n                                      ExecutionState* cntx) {\n  DCHECK(leftover_buf_);\n  io::PrefixSource ps{leftover_buf_->InputBuffer(), Sock()};\n\n  rdb_loader_->SetFullSyncCutCb([bc, ran = false]() mutable {\n    if (!ran) {\n      bc->Dec();\n      ran = true;\n    }\n  });\n\n  // In the no point-in-time replication flow, it's possible to serialize a journal change\n  // before serializing the bucket that the key was updated in on the master side. As a result,\n  // when loading the serialized bucket data on the replica, it may overwrite the earlier entry\n  // added by the journal change. This is an expected and valid scenario, so to avoid unnecessary\n  // warnings, we enable SetOverrideExistingKeys(true).\n  rdb_loader_->SetOverrideExistingKeys(true);\n\n  // Load incoming rdb stream.\n  if (std::error_code ec = rdb_loader_->Load(&ps); ec) {\n    cntx->ReportError(ec, \"Error loading rdb format\");\n    return;\n  }\n\n  // Try finding eof token.\n  io::PrefixSource chained_tail{rdb_loader_->Leftover(), &ps};\n  if (!eof_token.empty()) {\n    unique_ptr<uint8_t[]> buf{new uint8_t[eof_token.size()]};\n\n    io::Result<size_t> res =\n        chained_tail.ReadAtLeast(io::MutableBytes{buf.get(), eof_token.size()}, eof_token.size());\n\n    if (!res || *res != eof_token.size()) {\n      cntx->ReportError(std::make_error_code(errc::protocol_error),\n                        \"Error finding eof token in stream\");\n      return;\n    }\n  }\n\n  // Keep loader leftover.\n  io::Bytes unused = chained_tail.UnusedPrefix();\n  if (!unused.empty()) {\n    leftover_buf_.emplace(unused.size());\n    leftover_buf_->WriteAndCommit(unused.data(), unused.size());\n  } else {\n    leftover_buf_.reset();\n  }\n\n  if (auto jo = rdb_loader_->journal_offset(); jo.has_value()) {\n    this->journal_rec_executed_.store(*jo);\n  } else {\n    cntx->ReportError(std::make_error_code(errc::protocol_error),\n                      \"Error finding journal offset in stream\");\n  }\n  VLOG(1) << \"FullSyncDflyFb finished after reading \" << rdb_loader_->bytes_read() << \" bytes\";\n}\n\nvoid DflyShardReplica::StableSyncDflyReadFb(ExecutionState* cntx) {\n  DCHECK_EQ(proactor_index_, ProactorBase::me()->GetPoolIndex());\n\n  // Check leftover from full sync.\n  io::Bytes prefix{};\n  if (leftover_buf_ && leftover_buf_->InputLen() > 0) {\n    prefix = leftover_buf_->InputBuffer();\n  }\n\n  io::PrefixSource ps{prefix, Sock()};\n\n  JournalReader reader{&ps, 0};\n  DCHECK_GE(journal_rec_executed_, 1u);\n  TransactionReader tx_reader{journal_rec_executed_.load(std::memory_order_relaxed) - 1};\n\n  acks_fb_ = fb2::Fiber(\"shard_acks\", &DflyShardReplica::StableSyncDflyAcksFb, this, cntx);\n  TransactionData tx_data;\n  while (tx_reader.NextTxData(&reader, cntx, &tx_data)) {\n    DVLOG(3) << \"Lsn: \" << tx_data.lsn;\n\n    last_io_time_ = Proactor()->GetMonotonicTimeNs();\n    if (tx_data.opcode == journal::Op::LSN) {\n      //  Do nothing\n    } else if (tx_data.opcode == journal::Op::PING) {\n      force_ping_ = true;\n      journal_rec_executed_.fetch_add(1, std::memory_order_relaxed);\n      if (EngineShard::tlocal() && EngineShard::tlocal()->journal()) {\n        // We must register this entry to the journal to allow partial sync\n        // if journal is active.\n        journal::RecordEntry(0, journal::Op::PING, 0, nullopt, {});\n      }\n    } else {\n      const bool is_successful = ExecuteTx(std::move(tx_data), cntx);\n      if (is_successful) {\n        // We only increment upon successful execution of the transaction.\n        // The reason for this is that during partial sync we sent this\n        // number as the lsn number to resume from. However, if for example\n        // we increment this when a command fails (because the context\n        // got cancelled, e.g, replication connection broke), we will get\n        // inconsistent data because the replica will resume from the next\n        // lsn of the master and this lsn entry will be lost.\n        journal_rec_executed_.fetch_add(1, std::memory_order_relaxed);\n      } else {\n        // We only report DFATAL:\n        // 1. Context is running\n        // 2. We are ACTIVE global state\n        if (cntx->IsRunning() && ((*ServerState::tlocal()).gstate() == GlobalState::ACTIVE)) {\n          LOG(DFATAL) << \"ExecuteTx() on replica should be successful.\";\n        }\n      }\n    }\n\n    shard_replica_waker_.notifyAll();\n  }\n}\n\nvoid Replica::RedisStreamAcksFb() {\n  constexpr size_t kAckRecordMaxInterval = 1024;\n  std::chrono::duration ack_time_max_interval =\n      1ms * absl::GetFlag(FLAGS_replication_acks_interval);\n  std::string ack_cmd;\n  auto next_ack_tp = std::chrono::steady_clock::now();\n\n  while (exec_st_.IsRunning()) {\n    VLOG(2) << \"Sending an ACK with offset=\" << repl_offs_;\n    ack_cmd = absl::StrCat(\"REPLCONF ACK \", repl_offs_);\n    next_ack_tp = std::chrono::steady_clock::now() + ack_time_max_interval;\n    if (auto ec = SendCommand(ack_cmd); ec) {\n      exec_st_.ReportError(ec);\n      break;\n    }\n    ack_offs_ = repl_offs_;\n\n    replica_waker_.await_until(\n        [&]() { return repl_offs_ > ack_offs_ + kAckRecordMaxInterval || (!exec_st_.IsRunning()); },\n        next_ack_tp);\n  }\n}\n\nvoid DflyShardReplica::StableSyncDflyAcksFb(ExecutionState* cntx) {\n  DCHECK_EQ(proactor_index_, ProactorBase::me()->GetPoolIndex());\n\n  constexpr size_t kAckRecordMaxInterval = 1024;\n  std::chrono::duration ack_time_max_interval =\n      1ms * absl::GetFlag(FLAGS_replication_acks_interval);\n  std::string ack_cmd;\n  auto next_ack_tp = std::chrono::steady_clock::now();\n\n  uint64_t current_offset;\n  while (cntx->IsRunning()) {\n    // Handle ACKs with the master. PING opcodes from the master mean we should immediately\n    // answer.\n    current_offset = journal_rec_executed_.load(std::memory_order_relaxed);\n    VLOG(1) << \"Sending an ACK with offset=\" << current_offset << \" forced=\" << force_ping_;\n    ack_cmd = absl::StrCat(\"REPLCONF ACK \", current_offset);\n    force_ping_ = false;\n    next_ack_tp = std::chrono::steady_clock::now() + ack_time_max_interval;\n    if (auto ec = SendCommand(ack_cmd); ec) {\n      cntx->ReportError(ec);\n      break;\n    }\n    ack_offs_ = current_offset;\n\n    shard_replica_waker_.await_until(\n        [&]() {\n          return journal_rec_executed_.load(std::memory_order_relaxed) >\n                     ack_offs_ + kAckRecordMaxInterval ||\n                 force_ping_ || (!cntx->IsRunning());\n        },\n        next_ack_tp);\n  }\n}\n\nDflyShardReplica::DflyShardReplica(ServerContext server_context, MasterContext master_context,\n                                   uint32_t flow_id, Service* service,\n                                   std::shared_ptr<MultiShardExecution> multi_shard_exe,\n                                   RdbLoadContext* load_context)\n    : ProtocolClient(server_context),\n      service_(*service),\n      master_context_(master_context),\n      multi_shard_exe_(multi_shard_exe),\n      flow_id_(flow_id) {\n  executor_ = std::make_unique<JournalExecutor>(service);\n  rdb_loader_ = std::make_unique<RdbLoader>(&service_, load_context);\n  rdb_loader_->SetLoadUnownedSlots(true);\n  rdb_loader_->SetShardCount(master_context.num_flows);\n}\n\nDflyShardReplica::~DflyShardReplica() {\n  CloseSocket();\n  JoinFlow();\n}\n\nbool DflyShardReplica::ExecuteTx(TransactionData&& tx_data, ExecutionState* cntx) {\n  if (!cntx->IsRunning()) {\n    return false;\n  }\n\n  if (!tx_data.IsGlobalCmd()) {\n    VLOG(3) << \"Execute cmd without sync between shards. txid: \" << tx_data.txid;\n    return executor_->Execute(tx_data.dbid, tx_data.command) == facade::DispatchResult::OK;\n  }\n\n  bool inserted_by_me =\n      multi_shard_exe_->InsertTxToSharedMap(tx_data.txid, master_context_.num_flows);\n\n  auto& multi_shard_data = multi_shard_exe_->Find(tx_data.txid);\n\n  VLOG(2) << \"Execute txid: \" << tx_data.txid << \" waiting for data in all shards\";\n  // Wait until shards flows got transaction data and inserted to map.\n  // This step enforces that replica will execute multi shard commands that finished on master\n  // and replica recieved all the commands from all shards.\n  multi_shard_data.block->Wait();\n  // Check if we woke up due to cancellation.\n  if (!exec_st_.IsRunning())\n    return false;\n  VLOG(2) << \"Execute txid: \" << tx_data.txid << \" block wait finished\";\n\n  VLOG(2) << \"Execute txid: \" << tx_data.txid << \" global command execution\";\n  // Wait until all shards flows get to execution step of this transaction.\n  multi_shard_data.barrier.Wait();\n  // Check if we woke up due to cancellation.\n  if (!exec_st_.IsRunning())\n    return false;\n  // Global command will be executed only from one flow fiber. This ensure corectness of data in\n  // replica.\n  bool execution_res = true;\n  if (inserted_by_me) {\n    execution_res = executor_->Execute(tx_data.dbid, tx_data.command) == facade::DispatchResult::OK;\n  }\n  // Wait until exection is done, to make sure we done execute next commands while the global is\n  // executed.\n  multi_shard_data.barrier.Wait();\n  // Check if we woke up due to cancellation.\n  if (!exec_st_.IsRunning())\n    return false;\n\n  // Erase from map can be done only after all flow fibers executed the transaction commands.\n  // The last fiber which will decrease the counter to 0 will be the one to erase the data from\n  // map\n  auto val = multi_shard_data.counter.fetch_sub(1, std::memory_order_relaxed);\n  VLOG(2) << \"txid: \" << tx_data.txid << \" counter: \" << val;\n  if (val == 1) {\n    multi_shard_exe_->Erase(tx_data.txid);\n  }\n  return execution_res;\n}\n\nerror_code Replica::ParseReplicationHeader(base::IoBuf* io_buf, PSyncResponse* dest) {\n  std::string_view str;\n\n  RETURN_ON_ERR(ReadLine(io_buf, &str));\n\n  DCHECK(!str.empty());\n\n  std::string_view header;\n  bool valid = false;\n\n  auto bad_header = [str]() {\n    LOG(ERROR) << \"Bad replication header: \" << str;\n    return std::make_error_code(std::errc::illegal_byte_sequence);\n  };\n\n  // non-empty lines\n  if (str[0] != '+') {\n    return bad_header();\n  }\n\n  header = str.substr(1);\n  VLOG(1) << \"header: \" << header;\n  if (absl::ConsumePrefix(&header, \"FULLRESYNC \")) {\n    // +FULLRESYNC db7bd45bf68ae9b1acac33acb 123\\r\\n\n    //             master_id  repl_offset\n    size_t pos = header.find(' ');\n    if (pos != std::string_view::npos) {\n      if (absl::SimpleAtoi(header.substr(pos + 1), &repl_offs_)) {\n        master_context_.master_repl_id = string(header.substr(0, pos));\n        valid = true;\n        VLOG(1) << \"master repl_id \" << master_context_.master_repl_id << \" / \" << repl_offs_;\n      }\n    }\n\n    if (!valid)\n      return bad_header();\n\n    io_buf->ConsumeInput(str.size() + 2);\n    RETURN_ON_ERR(ReadLine(io_buf, &str));  // Read the next line parsed below.\n\n    // Readline checks for non ws character first before searching for eol\n    // so str must be non empty.\n    DCHECK(!str.empty());\n\n    if (str[0] != '$') {\n      return bad_header();\n    }\n\n    std::string_view token = str.substr(1);\n    VLOG(1) << \"token: \" << token;\n    if (absl::ConsumePrefix(&token, \"EOF:\")) {\n      CHECK_EQ(kRdbEofMarkSize, token.size()) << token;\n      dest->fullsync.emplace<string>(token);\n      VLOG(1) << \"Token: \" << token;\n    } else {\n      size_t rdb_size = 0;\n      if (!absl::SimpleAtoi(token, &rdb_size))\n        return std::make_error_code(std::errc::illegal_byte_sequence);\n\n      VLOG(1) << \"rdb size \" << rdb_size;\n      dest->fullsync.emplace<size_t>(rdb_size);\n    }\n    io_buf->ConsumeInput(str.size() + 2);\n  } else if (absl::ConsumePrefix(&header, \"CONTINUE\")) {\n    // we send psync2 so we should get master replid.\n    // That could change due to redis failovers.\n    // TODO: part sync\n    dest->fullsync.emplace<size_t>(0);\n    LOG(ERROR) << \"Partial replication not supported yet\";\n    return std::make_error_code(std::errc::not_supported);\n  } else {\n    LOG(ERROR) << \"Unknown replication header\";\n    return bad_header();\n  }\n\n  return error_code{};\n}\n\nauto Replica::GetSummary() const -> Summary {\n  auto f = [this]() {\n    auto last_io_time = LastIoTime();\n\n    for (const auto& flow : shard_flows_) {\n      last_io_time = std::max(last_io_time, flow->LastIoTime());\n    }\n\n    Summary res;\n    res.host = server().host;\n    res.port = server().port;\n    res.master_link_established = (state_mask_ & R_TCP_CONNECTED);\n    res.full_sync_in_progress = (state_mask_ & R_SYNCING);\n    res.full_sync_done = (state_mask_ & R_SYNC_OK);\n\n    uint64_t current_time = ProactorBase::GetMonotonicTimeNs();\n    // last_io_time is derived above by reading last_io_time_ from all the flows,\n    // by accessing them from a foreign thread, see the loop above. As a result some\n    // threads may have last_io_time_ bigger than our current time, so we fix it here.\n    if (last_io_time > current_time) {\n      res.master_last_io_sec = 0;\n    } else {\n      res.master_last_io_sec = (current_time - last_io_time) / 1000000000UL;\n    }\n\n    res.master_id = master_context_.master_repl_id;\n    res.reconnect_count = reconnect_count_;\n    res.repl_offset_sum = 0;\n    for (uint64_t offs : GetReplicaOffset()) {\n      res.repl_offset_sum += offs;\n    }\n    res.psync_successes = psync_successes_;\n    res.psync_attempts = psync_attempts_;\n    res.passed_full_sync = passed_full_sync_;\n    return res;\n  };\n\n  return proactor_->AwaitBrief(f);\n}\n\nstd::vector<uint64_t> Replica::GetReplicaOffset() const {\n  std::vector<uint64_t> flow_rec_count;\n  flow_rec_count.resize(shard_flows_.size());\n  for (const auto& flow : shard_flows_) {\n    uint32_t flow_id = flow->FlowId();\n    uint64_t rec_count = flow->JournalExecutedCount();\n    DCHECK_LT(flow_id, shard_flows_.size());\n    flow_rec_count[flow_id] = rec_count;\n  }\n  return flow_rec_count;\n}\n\nstd::string Replica::GetSyncId() const {\n  return master_context_.dfly_session_id;\n}\n\nstd::string Replica::GetCurrentPhase() const {\n  if (!(state_mask_ & R_ENABLED))\n    return \"DISABLED\";\n  if (!(state_mask_ & R_TCP_CONNECTED))\n    return \"TCP_CONNECTING\";\n  if (!(state_mask_ & R_GREETED))\n    return \"GREETING\";\n  if (!(state_mask_ & R_SYNC_OK))\n    return \"INITIAL_SYNC\";\n  if (state_mask_ & R_SYNCING)\n    return \"FULL_SYNC_IN_PROGRESS\";\n\n  return \"STABLE_SYNC\";\n}\n\nstd::vector<unsigned> Replica::GetFlowMapAtIndex(size_t index) const {\n  // Not all proactors have flows\n  if (index >= thread_flow_map_.size()) {\n    return {};\n  }\n  return thread_flow_map_[index];\n}\n\nsize_t Replica::GetRecCountExecutedPerShard(const std::vector<unsigned>& indexes) const {\n  size_t total_shard_lsn = 0;\n  for (auto index : indexes) {\n    total_shard_lsn += shard_flows_[index]->JournalExecutedCount();\n  }\n  // Journal always starts at pos 1\n  return std::max<size_t>(1UL, total_shard_lsn);\n}\n\nuint32_t DflyShardReplica::FlowId() const {\n  return flow_id_;\n}\n\nvoid DflyShardReplica::Pause(bool pause) {\n  if (rdb_loader_) {\n    rdb_loader_->Pause(pause);\n  }\n}\n\nvoid DflyShardReplica::JoinFlow() {\n  sync_fb_.JoinIfNeeded();\n  acks_fb_.JoinIfNeeded();\n}\n\nvoid DflyShardReplica::Cancel() {\n  if (rdb_loader_)\n    rdb_loader_->stop();\n  ShutdownSocket();\n  shard_replica_waker_.notifyAll();\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/replica.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <absl/container/inlined_vector.h>\n\n#include <atomic>\n#include <boost/fiber/barrier.hpp>\n#include <queue>\n#include <variant>\n\n#include \"facade/facade_types.h\"\n#include \"facade/redis_parser.h\"\n#include \"io/io_buf.h\"\n#include \"server/cluster/cluster_defs.h\"\n#include \"server/execution_state.h\"\n#include \"server/journal/tx_executor.h\"\n#include \"server/journal/types.h\"\n#include \"server/protocol_client.h\"\n#include \"server/replica_types.h\"\n#include \"server/version.h\"\n#include \"util/fiber_socket_base.h\"\n\nnamespace dfly {\n\nclass Service;\nclass ConnectionContext;\nclass JournalExecutor;\nstruct JournalReader;\nclass DflyShardReplica;\n\n// The attributes of the master we are connecting to.\nstruct MasterContext {\n  std::string master_repl_id;\n  std::string dfly_session_id;  // Sync session id for dfly sync.\n  unsigned num_flows = 0;\n  DflyVersion version = DflyVersion::VER1;\n};\n\n// This class manages replication from both Dragonfly and Redis masters.\nclass Replica : ProtocolClient {\n private:\n  // The flow is : R_ENABLED -> R_TCP_CONNECTED -> (R_SYNCING) -> R_SYNC_OK.\n  // SYNCING means that the initial ack succeeded. It may be optional if we can still load from\n  // the journal offset.\n  enum State : unsigned {\n    R_ENABLED = 1,  // Replication mode is enabled. Serves for signaling shutdown.\n    R_TCP_CONNECTED = 2,\n    R_GREETED = 4,     // Initial handshake with the master is done.\n    R_SYNCING = 8,     // In process of full sync with the master.\n    R_SYNC_OK = 0x10,  // Signals successful ending of full-sync state, exclusive with R_SYNCING.\n  };\n\n public:\n  Replica(std::string master_host, uint16_t port, Service* se, std::string_view id,\n          std::optional<cluster::SlotRange> slot_range);\n  ~Replica();\n\n  // Spawns a fiber that runs until link with master is broken or the replication is stopped.\n  // Returns true if initial link with master has been established or\n  // false if it has failed.\n  GenericError Start();\n  using LastMasterSyncData = dfly::LastMasterSyncData;\n  void StartMainReplicationFiber(std::optional<LastMasterSyncData> data);\n\n  // Sets the server state to have replication enabled.\n  // It is like Start(), but does not attempt to establish\n  // a connection right-away, but instead lets MainReplicationFb do the work.\n  void EnableReplication();\n\n  std::optional<LastMasterSyncData> Stop();  // thread-safe\n\n  void Pause(bool pause);\n\n  std::error_code TakeOver(unsigned timeout, bool save_flag);\n\n  bool IsContextCancelled() const {\n    return !exec_st_.IsRunning();\n  }\n\n private: /* Main standalone mode functions */\n  // Coordinate state transitions. Spawned by start.\n  void MainReplicationFb(std::optional<LastMasterSyncData> data);\n\n  std::error_code Greet();  // Send PING and REPLCONF.\n\n  std::error_code HandleCapaDflyResp();\n  std::error_code ConfigureDflyMaster();\n\n  std::error_code InitiatePSync();                                           // Redis full sync.\n  std::error_code InitiateDflySync(std::optional<LastMasterSyncData> data);  // Dragonfly full sync.\n\n  std::error_code ConsumeRedisStream();  // Redis stable state.\n  std::error_code ConsumeDflyStream();   // Dragonfly stable state.\n\n  void RedisStreamAcksFb();\n\n  // Joins all the flows when doing sharded replication. This is called in two\n  // places: Once at the end of full sync to join the full sync fibers, and twice\n  // if a stable sync is interrupted to join the cancelled stable sync fibers.\n  void JoinDflyFlows();\n  void SetShardStates(bool replica);  // Call SetReplica(replica) on all shards.\n\n  // Send DFLY ${kind} to the master instance.\n  std::error_code SendNextPhaseRequest(std::string_view kind);\n\n private: /* Utility */\n  struct PSyncResponse {\n    // string - end of sync token (diskless)\n    // size_t - size of the full sync blob (disk-based).\n    // if fullsync is 0, it means that master can continue with partial replication.\n    std::variant<std::string, size_t> fullsync;\n  };\n\n  std::error_code ParseReplicationHeader(base::IoBuf* io_buf, PSyncResponse* dest);\n\n public: /* Utility */\n  using Summary = ReplicaSummary;\n\n  Summary GetSummary() const;  // thread-safe, blocks fiber, makes a hop.\n\n  bool HasDflyMaster() const {\n    return !master_context_.dfly_session_id.empty();\n  }\n\n  std::vector<uint64_t> GetReplicaOffset() const;\n  std::string GetSyncId() const;\n\n  // Get the current replication phase based on state_mask_\n  std::string GetCurrentPhase() const;\n\n  // Used *only* in TakeOver flow and replicaof no one. There is small data race if\n  // thread_flow_map_ gets written by the MainReplicationFiber thread but\n  // the chances for that are extremely rare.\n  std::vector<unsigned> GetFlowMapAtIndex(size_t index) const;\n\n  size_t GetRecCountExecutedPerShard(const std::vector<unsigned>& indexes) const;\n\n private:\n  util::fb2::ProactorBase* proactor_ = nullptr;\n  Service& service_;\n  MasterContext master_context_;\n\n  // In redis replication mode.\n  util::fb2::Fiber sync_fb_;\n  util::fb2::Fiber acks_fb_;\n  util::fb2::EventCount replica_waker_;\n\n  std::vector<std::unique_ptr<DflyShardReplica>> shard_flows_;\n  std::vector<std::vector<unsigned>> thread_flow_map_;  // a map from proactor id to flow list.\n\n  // A vector of the last executer LSNs when a replication is interrupted.\n  // Allows partial sync on reconnects.\n  std::optional<std::vector<LSN>> last_journal_LSNs_;\n  std::shared_ptr<MultiShardExecution> multi_shard_exe_;\n\n  // Guard operations where flows might be in a mixed state (transition/setup)\n  util::fb2::Mutex flows_op_mu_;\n\n  // repl_offs - till what offset we've already read from the master.\n  // ack_offs_ last acknowledged offset.\n  size_t repl_offs_ = 0, ack_offs_ = 0;\n  unsigned state_mask_ = 0;  // see State enum above.\n\n  // When replica starts full sync it is set to false and true when it completes the full sync.\n  // Disconnects do not reset this, so this variable is still true if the master\n  // is not connected and the state_mask_ is cleared.\n  // Furthermore, on reconnects that enter full sync\n  // again this variable is set to false until full sync completes.\n  // Therefore, we have a consistent view of the replica:\n  // 1. True. Replica passed full sync even if master disconnects. In fact, once a\n  // node reached stable, the deltas from journal are the only missing items.\n  // 2. False. Replica has not passed full sync or a disconnect started full sync again.\n  bool passed_full_sync_ = false;\n\n  bool is_paused_ = false;\n  std::string id_;\n\n  std::optional<cluster::SlotRange> slot_range_;\n\n  uint32_t reconnect_count_ = 0;\n  size_t psync_attempts_ = 0;\n  size_t psync_successes_ = 0;\n};\n\nclass RdbLoader;\n// This class implements a single shard replication flow from a Dragonfly master instance.\n// Multiple DflyShardReplica objects are managed by a Replica object.\nclass DflyShardReplica : public ProtocolClient {\n public:\n  DflyShardReplica(ServerContext server_context, MasterContext master_context, uint32_t flow_id,\n                   Service* service, std::shared_ptr<MultiShardExecution> multi_shard_exe,\n                   class RdbLoadContext* load_context);\n  ~DflyShardReplica();\n\n  void Cancel();\n  void JoinFlow();\n\n  // Start replica initialized as dfly flow.\n  // Sets is_full_sync when successful.\n  io::Result<bool> StartSyncFlow(util::fb2::BlockingCounter block, ExecutionState* cntx,\n                                 std::optional<LSN>,\n                                 std::optional<Replica::LastMasterSyncData> data);\n\n  // Transition into stable state mode as dfly flow.\n  std::error_code StartStableSyncFlow(ExecutionState* cntx);\n\n  // Single flow full sync fiber spawned by StartFullSyncFlow.\n  void FullSyncDflyFb(std::string eof_token, util::fb2::BlockingCounter block,\n                      ExecutionState* cntx);\n\n  // Single flow stable state sync fiber spawned by StartStableSyncFlow.\n  void StableSyncDflyReadFb(ExecutionState* cntx);\n\n  void StableSyncDflyAcksFb(ExecutionState* cntx);\n\n  // Return true if the transaction executed successfully. On error,\n  // or on context cancellation return false.\n  bool ExecuteTx(TransactionData&& tx_data, ExecutionState* cntx);\n\n  uint32_t FlowId() const;\n\n  uint64_t JournalExecutedCount() const {\n    return journal_rec_executed_.load(std::memory_order_relaxed);\n  }\n\n  uint64_t SetRecordsExecuted(uint64_t value) {\n    return journal_rec_executed_ = value;\n  }\n\n  // Can be called from any thread.\n  void Pause(bool pause);\n\n private:\n  Service& service_;\n  MasterContext master_context_;\n\n  std::optional<base::IoBuf> leftover_buf_;\n\n  util::fb2::EventCount shard_replica_waker_;  // waker for trans_data_queue_\n\n  std::unique_ptr<JournalExecutor> executor_;\n  std::unique_ptr<RdbLoader> rdb_loader_;\n\n  // The master instance has a LSN for each journal record. This counts\n  // the number of journal records executed in this flow plus the initial\n  // journal offset that we received in the transition from full sync\n  // to stable sync.\n  // Note: This is not 1-to-1 the LSN in the master, because this counts\n  // **executed** records, which might be received interleaved when commands\n  // run out-of-order on the master instance.\n  // Atomic, because JournalExecutedCount() can be called from any thread.\n  std::atomic_uint64_t journal_rec_executed_ = 1;\n\n  util::fb2::Fiber sync_fb_, acks_fb_;\n  size_t ack_offs_ = 0;\n  int proactor_index_ = -1;\n  bool force_ping_ = false;\n\n  std::shared_ptr<MultiShardExecution> multi_shard_exe_;\n  uint32_t flow_id_ = UINT32_MAX;  // Flow id if replica acts as a dfly flow.\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/replica_types.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <string>\n#include <vector>\n\n#include \"server/common_types.h\"\n\nnamespace dfly {\n\nstruct ReplicaSummary {\n  std::string host;\n  uint16_t port;\n  bool master_link_established;\n  bool full_sync_in_progress;\n  bool full_sync_done;\n  time_t master_last_io_sec;  // monotonic clock.\n  std::string master_id;\n  uint32_t reconnect_count;\n\n  // sum of the offsets on all the flows.\n  uint64_t repl_offset_sum;\n  size_t psync_attempts;\n  size_t psync_successes;\n  // We can't rely on full_sync_done or full_sync_in_progress because\n  // on disconnects the replica state mask is cleared. We use this variable\n  // to track if the replica reached full sync. When master disconnects,\n  // we use this variable to print the journal offsets in info command even\n  // when the link is down. It's reset whenever a full sync is initiated again.\n  bool passed_full_sync;\n};\n\nstruct LastMasterSyncData {\n  std::string id;\n  std::vector<LSN> last_journal_LSNs;  // lsn for each master shard.\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/script_mgr.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/script_mgr.h\"\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/strings/ascii.h>\n#include <absl/strings/match.h>\n#include <absl/strings/numbers.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_split.h>\n\n#include <regex>\n#include <string>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/interpreter.h\"\n#include \"facade/error.h\"\n#include \"facade/reply_builder.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/server_state.h\"\n#include \"server/transaction.h\"\n\nABSL_FLAG(std::string, default_lua_flags, \"\",\n          \"Configure default flags for running Lua scripts: \\n - Use 'allow-undeclared-keys' to \"\n          \"allow accessing undeclared keys, \\n - Use 'disable-atomicity' to allow \"\n          \"running scripts non-atomically, \\n - Use 'legacy-float' to return floats as integers.\\n\"\n          \"Specify multiple values separated by space, for example 'allow-undeclared-keys \"\n          \"disable-atomicity' runs scripts non-atomically and allows accessing undeclared keys\");\n\nABSL_FLAG(\n    bool, lua_auto_async, false,\n    \"If enabled, call/pcall with discarded values are automatically replaced with acall/apcall.\");\n\nABSL_FLAG(bool, lua_allow_undeclared_auto_correct, false,\n          \"If enabled, when a script that is not allowed to run with undeclared keys is trying to \"\n          \"access undeclared keys, automaticaly set the script flag to be able to run with \"\n          \"undeclared key.\");\n\nABSL_FLAG(\n    std::vector<std::string>, lua_undeclared_keys_shas, {},\n    \"Comma-separated list of Lua script SHAs which are allowed to access undeclared keys. SHAs are \"\n    \"only looked at when loading the script, and new values do not affect already-loaded script.\");\n\nABSL_FLAG(std::vector<std::string>, lua_float_as_int_shas, {},\n          \"Comma-separated list of Lua script SHAs which should return floats as integers. \"\n          \"SHAs are only looked at when loading the script.\");\n\nnamespace dfly {\nusing namespace std;\nusing namespace facade;\nusing namespace util;\n\nScriptMgr::ScriptMgr() {\n  // Build default script flags\n  string flags = absl::GetFlag(FLAGS_default_lua_flags);\n\n  static_assert(ScriptParams{}.atomic && !ScriptParams{}.undeclared_keys &&\n                !ScriptParams{}.float_as_int);\n\n  auto err = ScriptParams::ApplyFlags(flags, &default_params_);\n  CHECK(!err) << err.Format();\n}\n\nScriptMgr::ScriptKey::ScriptKey(string_view sha) : array{} {\n  DCHECK_EQ(sha.size(), size());\n  memcpy(data(), sha.data(), size());\n}\n\nvoid ScriptMgr::Run(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder,\n                    ConnectionContext* cntx) {\n  string subcmd = absl::AsciiStrToUpper(ArgS(args, 0));\n\n  if (subcmd == \"HELP\") {\n    string_view kHelp[] = {\n        \"SCRIPT <subcommand> [<arg> [value] [opt] ...]\",\n        \"Subcommands are:\",\n        \"EXISTS <sha1> [<sha1> ...]\",\n        \"   Return information about the existence of the scripts in the script cache.\",\n        \"FLUSH\",\n        \"   Flush the Lua scripts cache. Very dangerous on replicas.\",\n        \"LOAD <script>\",\n        \"   Load a script into the scripts cache without executing it.\",\n        \"FLAGS <sha> [flags ...]\",\n        \"   Set specific flags for script. Can be called before the sript is loaded.\",\n        \"   The following flags are possible: \",\n        \"      - Use 'allow-undeclared-keys' to allow accessing undeclared keys\",\n        \"      - Use 'disable-atomicity' to allow running scripts non-atomically\",\n        \"      - Use 'legacy-float' to return floats as integers\",\n        \"LIST\",\n        \"   Lists loaded scripts.\",\n        \"LATENCY\",\n        \"   Prints latency histograms in usec for every called function.\",\n        \"GC\",\n        \"   Invokes garbage collection on all unused interpreter instances.\",\n        \"HELP\",\n        \"   Prints this help.\"};\n    auto rb = static_cast<RedisReplyBuilder*>(builder);\n    return rb->SendSimpleStrArr(kHelp);\n  }\n\n  if (subcmd == \"EXISTS\" && args.size() > 1)\n    return ExistsCmd(args, tx, builder);\n\n  if (subcmd == \"FLUSH\")\n    return FlushCmd(args, tx, builder);\n\n  if (subcmd == \"LIST\")\n    return ListCmd(tx, builder);\n\n  if (subcmd == \"LATENCY\")\n    return LatencyCmd(tx, builder);\n\n  if (subcmd == \"LOAD\" && args.size() == 2)\n    return LoadCmd(args, tx, builder, cntx);\n\n  if (subcmd == \"FLAGS\" && args.size() > 2)\n    return ConfigCmd(args, tx, builder);\n\n  if (subcmd == \"GC\")\n    return GCCmd(tx, builder);\n\n  string err = absl::StrCat(\"Unknown subcommand or wrong number of arguments for '\", subcmd,\n                            \"'. Try SCRIPT HELP.\");\n  builder->SendError(err, kSyntaxErrType);\n}\n\nvoid ScriptMgr::ExistsCmd(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder) const {\n  vector<uint8_t> res(args.size() - 1, 0);\n  for (size_t i = 1; i < args.size(); ++i) {\n    if (string_view sha = ArgS(args, i); Find(sha)) {\n      res[i - 1] = 1;\n    }\n  }\n\n  auto rb = static_cast<RedisReplyBuilder*>(builder);\n  rb->StartArray(res.size());\n  for (uint8_t v : res) {\n    rb->SendLong(v);\n  }\n}\n\nvoid ScriptMgr::FlushCmd(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder) {\n  FlushAllScript();\n\n  return builder->SendOk();\n}\n\nvoid ScriptMgr::LoadCmd(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder,\n                        ConnectionContext* cntx) {\n  string_view body = ArgS(args, 1);\n  auto rb = static_cast<RedisReplyBuilder*>(builder);\n  if (body.empty()) {\n    char sha[41];\n    Interpreter::FuncSha1(body, sha);\n    return rb->SendBulkString(sha);\n  }\n\n  BorrowedInterpreter interpreter{tx, &cntx->conn_state};\n\n  auto res = Insert(body, interpreter);\n  if (!res)\n    return builder->SendError(res.error().Format());\n\n  // Schedule empty callback inorder to journal command via transaction framework.\n  tx->ScheduleSingleHop([](auto* t, auto* shard) { return OpStatus::OK; });\n\n  return rb->SendBulkString(res.value());\n}\n\nvoid ScriptMgr::ConfigCmd(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder) {\n  string_view sha = ArgS(args, 1);\n  if (sha.size() != ScriptKey{}.size()) {\n    return builder->SendError(kSyntaxErr);\n  }\n\n  lock_guard lk{mu_};\n  ScriptKey key{sha};\n  auto& data = db_[key];\n\n  for (auto flag : args.subspan(2)) {\n    if (auto err = ScriptParams::ApplyFlags(facade::ToSV(flag), &data); err)\n      return builder->SendError(\"Invalid config format: \" + err.Format());\n  }\n\n  UpdateScriptCaches(key, data);\n\n  // Schedule empty callback inorder to journal command via transaction framework.\n  tx->ScheduleSingleHop([](auto* t, auto* shard) { return OpStatus::OK; });\n\n  return builder->SendOk();\n}\n\nvoid ScriptMgr::ListCmd(Transaction* tx, SinkReplyBuilder* builder) const {\n  vector<pair<string, ScriptData>> scripts = GetAll();\n  auto rb = static_cast<RedisReplyBuilder*>(builder);\n  rb->StartArray(scripts.size());\n  for (const auto& [sha, data] : scripts) {\n    rb->StartArray(2);\n    rb->SendBulkString(sha);\n    rb->SendBulkString(data.body);\n  }\n}\n\nvoid ScriptMgr::LatencyCmd(Transaction* tx, SinkReplyBuilder* builder) const {\n  absl::flat_hash_map<std::string, base::Histogram> result;\n  fb2::Mutex mu;\n\n  shard_set->pool()->AwaitFiberOnAll([&](auto* pb) {\n    auto* ss = ServerState::tlocal();\n    mu.lock();\n    for (const auto& k_v : ss->call_latency_histos()) {\n      result[k_v.first].Merge(k_v.second);\n    }\n    mu.unlock();\n  });\n\n  auto rb = static_cast<RedisReplyBuilder*>(builder);\n  rb->StartArray(result.size());\n  for (const auto& k_v : result) {\n    rb->StartArray(2);\n    rb->SendBulkString(k_v.first);\n    rb->SendVerbatimString(k_v.second.ToString());\n  }\n}\n\nvoid ScriptMgr::GCCmd(Transaction* tx, SinkReplyBuilder* builder) const {\n  auto cb = [](Interpreter* ir) {\n    ir->RunGC();\n    ThisFiber::Yield();\n  };\n  shard_set->pool()->AwaitFiberOnAll(\n      [cb](auto* pb) { ServerState::tlocal()->AlterInterpreters(cb); });\n  return builder->SendOk();\n}\n\n// Check if script starts with lua flags instructions (--df flags=...).\nio::Result<optional<ScriptMgr::ScriptParams>, GenericError> DeduceParams(string_view body) {\n  static const regex kRegex{R\"(^\\s*?--!df flags=([^\\s\\n\\r]*)[\\s\\n\\r])\"};\n  cmatch matches;\n\n  if (!regex_search(body.data(), matches, kRegex))\n    return nullopt;\n\n  ScriptMgr::ScriptParams params;\n  if (auto err = ScriptMgr::ScriptParams::ApplyFlags(matches.str(1), &params); err)\n    return nonstd::make_unexpected(err);\n\n  return params;\n}\n\nunique_ptr<char[]> CharBufFromSV(string_view sv) {\n  auto ptr = make_unique<char[]>(sv.size() + 1);\n  memcpy(ptr.get(), sv.data(), sv.size());\n  ptr[sv.size()] = '\\0';\n  return ptr;\n}\n\nnonstd::expected<string, GenericError> ScriptMgr::Insert(string_view body,\n                                                         Interpreter* interpreter) {\n  char sha_buf[64];\n  Interpreter::FuncSha1(body, sha_buf);\n  string_view sha{sha_buf, std::strlen(sha_buf)};\n\n  if (interpreter->Exists(sha)) {\n    return string{sha};\n  }\n\n  auto params_opt = DeduceParams(body);\n  if (!params_opt)\n    return params_opt.get_unexpected();\n  auto params = params_opt->value_or(default_params_);\n\n  if (!params.atomic) {\n    // override atomicity for a specific buggy script.\n    constexpr string_view sha_4522 =\n        \"f8133be7f04abd9dfefa83c3b29a9d837cfbda86\"sv;  // Sidekiq, see #4522\n    if (sha == sha_4522) {\n      params.atomic = true;\n    }\n  }\n\n  const char* kUndeclaredShas[] = {\n      \"351130589c64523cb98978dc32c64173a31244f3\",  // Sidekiq, see #2442\n      \"6ae15ef4678593dc61f991c9953722d67d822776\",  // Sidekiq, see #2442\n      \"34b1048274c8e50a0cc587a3ed9c383a82bb78c5\",  // Sidekiq\n      \"b725ca33e5b36f318ab1150b8ac955a3d997c872\",  // Sentry, see #5495\n      \"8c4dafdf9b6b7bcf511a0d1ec0518bed9260e16d\",  // django-cacheops see #6119\n      \"3fc258d735c924d5652fceb90b41bea1f1f29e4b\",  // django-cacheops see #6119\n      \"43d401bd2bd0ad864c3ca221512cda1b6215ec23\",  // django-cacheops see #272\n      // Cm_Cache_Backend_Redis (Magento) - until\n      // https://github.com/colinmollenhour/Cm_Cache_Backend_Redis/pull/186 is merged\n      \"1617c9fb2bda7d790bb1aaa320c1099d81825e64\",  // Cm_Cache_Backend_Redis LUA_SAVE\n      \"39383dcf36d2e71364a666b2a806bc8219cd332d\",  // Cm_Cache_Backend_Redis LUA_CLEAN\n      \"6990147f5d1999b936dac3b6f7e5d2071908bcf3\",  // Cm_Cache_Backend_Redis LUA_GC\n  };\n\n  if (find(begin(kUndeclaredShas), end(kUndeclaredShas), sha) != end(kUndeclaredShas)) {\n    params.undeclared_keys = true;\n  } else {\n    auto undeclared_shas = absl::GetFlag(FLAGS_lua_undeclared_keys_shas);\n    if (find(undeclared_shas.begin(), undeclared_shas.end(), sha) != undeclared_shas.end()) {\n      params.undeclared_keys = true;\n    }\n  }\n\n  auto float_as_int_shas = absl::GetFlag(FLAGS_lua_float_as_int_shas);\n  if (find(float_as_int_shas.begin(), float_as_int_shas.end(), sha) != float_as_int_shas.end()) {\n    params.float_as_int = true;\n  }\n\n  // If the script is atomic, check for possible squashing optimizations.\n  // For non atomic modes, squashing increases the time locks are held, which\n  // can decrease throughput with frequently accessed keys.\n  optional<string> async_body;\n  if (params.atomic && absl::GetFlag(FLAGS_lua_auto_async)) {\n    if (async_body = Interpreter::DetectPossibleAsyncCalls(body); async_body)\n      body = *async_body;\n  }\n\n  string result;\n  Interpreter::AddResult add_result = interpreter->AddFunction(sha, body, &result);\n  if (add_result == Interpreter::COMPILE_ERR)\n    return nonstd::make_unexpected(GenericError{std::move(result)});\n\n  lock_guard lk{mu_};\n  auto [it, _] = db_.emplace(sha, InternalScriptData{params, nullptr});\n\n  if (!it->second.body) {\n    it->second.body = CharBufFromSV(body);\n  }\n\n  UpdateScriptCaches(sha, it->second);\n\n  return string{sha};\n}\n\noptional<ScriptMgr::ScriptData> ScriptMgr::Find(std::string_view sha) const {\n  if (sha.size() != ScriptKey{}.size())\n    return std::nullopt;\n\n  lock_guard lk{mu_};\n  if (auto it = db_.find(sha); it != db_.end() && it->second.body)\n    return ScriptData{it->second, it->second.body.get()};\n\n  return std::nullopt;\n}\n\nvoid ScriptMgr::OnScriptError(std::string_view sha, std::string_view error) {\n  ++tl_facade_stats->reply_stats.script_error_count;\n\n  // Log script errors at most 5 times a second.\n  LOG_EVERY_T(WARNING, 0.2) << \"Error running script (call to \" << sha << \"): \" << error;\n\n  // If script has undeclared_keys and was not flaged to run in this mode we will change the\n  // script flag - this will make script next run to not fail but run as global.\n  if (absl::GetFlag(FLAGS_lua_allow_undeclared_auto_correct)) {\n    size_t pos = error.rfind(kUndeclaredKeyErr);\n    lock_guard lk{mu_};\n    auto it = db_.find(sha);\n    if (it == db_.end()) {\n      return;\n    }\n\n    if (pos != string::npos) {\n      it->second.undeclared_keys = true;\n      LOG(WARNING) << \"Setting undeclared_keys flag for script with sha : (\" << sha << \")\";\n      UpdateScriptCaches(sha, it->second);\n    }\n  }\n}\n\nvoid ScriptMgr::FlushAllScript() {\n  lock_guard lk{mu_};\n  db_.clear();\n\n  shard_set->pool()->AwaitFiberOnAll([](auto* pb) {\n    ServerState* ss = ServerState::tlocal();\n    ss->FlushScriptCache();\n  });\n}\n\nvector<pair<string, ScriptMgr::ScriptData>> ScriptMgr::GetAll() const {\n  vector<pair<string, ScriptData>> res;\n\n  lock_guard lk{mu_};\n  res.reserve(db_.size());\n  for (const auto& [sha, data] : db_) {\n    string body = data.body ? string{data.body.get()} : string{};\n    res.emplace_back(string{sha.data(), sha.size()}, ScriptData{data, std::move(body)});\n  }\n\n  return res;\n}\n\nvoid ScriptMgr::UpdateScriptCaches(ScriptKey sha, ScriptParams params) const {\n  shard_set->pool()->AwaitBrief([&sha, &params](auto index, auto* pb) {\n    ServerState::tlocal()->SetScriptParams(sha, params);\n  });\n}\n\nbool ScriptMgr::AreGlobalByDefault() const {\n  return default_params_.undeclared_keys && default_params_.atomic;\n}\n\nGenericError ScriptMgr::ScriptParams::ApplyFlags(string_view config, ScriptParams* params) {\n  auto parts = absl::StrSplit(config, absl::ByAnyChar(\",; \"), absl::SkipEmpty());\n  for (auto flag : parts) {\n    if (flag == \"disable-atomicity\") {\n      params->atomic = false;\n      continue;\n    }\n\n    if (flag == \"allow-undeclared-keys\") {\n      params->undeclared_keys = true;\n      continue;\n    }\n\n    if (flag == \"legacy-float\") {\n      params->float_as_int = true;\n      continue;\n    }\n\n    if (flag == \"no-writes\") {  // Used by Redis.\n      // TODO: lock read-only.\n      continue;\n    }\n\n    return GenericError{\"Invalid flag: \"s + string{flag}};\n  }\n\n  return {};\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/script_mgr.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <array>\n#include <nonstd/expected.hpp>\n#include <optional>\n\n#include \"server/common_types.h\"\n#include \"server/execution_state.h\"\n\nnamespace facade {\nclass SinkReplyBuilder;\n}  // namespace facade\n\nnamespace dfly {\n\nusing facade::CmdArgList;\n\nclass EngineShardSet;\nclass Interpreter;\n\n// This class has a state through the lifetime of a server because it manipulates scripts\nclass ScriptMgr {\n public:\n  struct ScriptParams {\n    bool atomic = true;            // Whether script must run atomically.\n    bool undeclared_keys = false;  // Whether script accesses undeclared keys.\n    bool float_as_int = false;     // Whether to return floats as integers.\n\n    // Return GenericError if some flag was invalid.\n    // Valid flags are:\n    // - allow-undeclared-keys -> undeclared_keys=true\n    // - disable-atomicity     -> atomic=false\n    // - legacy-float          -> float_as_int=true\n    static GenericError ApplyFlags(std::string_view flags, ScriptParams* params);\n  };\n\n  struct ScriptData : public ScriptParams {\n    std::string body;  // script source code present in lua interpreter\n  };\n\n  struct ScriptKey : public std::array<char, 40> {\n    ScriptKey() = default;\n    ScriptKey(std::string_view sha);\n  };\n\n public:\n  using SinkReplyBuilder = facade::SinkReplyBuilder;\n\n  ScriptMgr();\n\n  void Run(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder, ConnectionContext* cntx);\n\n  // Insert script and return sha. Get possible error from compilation or parsing script flags.\n  nonstd::expected<std::string, GenericError> Insert(std::string_view body,\n                                                     Interpreter* interpreter);\n\n  // Get script body by sha, returns nullptr if not found.\n  std::optional<ScriptData> Find(std::string_view sha) const;\n\n  // Returns a list of all scripts in the database with their sha and body.\n  std::vector<std::pair<std::string, ScriptData>> GetAll() const;\n\n  void FlushAllScript();\n\n  // Returns if scripts run as global transactions by default\n  bool AreGlobalByDefault() const;\n\n  void OnScriptError(std::string_view sha, std::string_view error);\n\n private:\n  void ExistsCmd(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder) const;\n  void FlushCmd(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder);\n  void LoadCmd(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder,\n               ConnectionContext* cntx);\n  void ConfigCmd(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder);\n  void ListCmd(Transaction* tx, SinkReplyBuilder* builder) const;\n  void LatencyCmd(Transaction* tx, SinkReplyBuilder* builder) const;\n  void GCCmd(Transaction* tx, SinkReplyBuilder* builder) const;\n\n  void UpdateScriptCaches(ScriptKey sha, ScriptParams params) const;\n\n private:\n  struct InternalScriptData : public ScriptParams {\n    std::unique_ptr<char[]> body{};\n    std::unique_ptr<char[]> orig_body{};\n  };\n\n  ScriptParams default_params_;\n\n  absl::flat_hash_map<ScriptKey, InternalScriptData> db_;\n  mutable util::fb2::Mutex mu_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/search/CMakeLists.txt",
    "content": "if (NOT WITH_SEARCH)\n  SET(DF_SEARCH_SRCS search/doc_index_fallback.cc PARENT_SCOPE)\nelse()\n  SET(DF_SEARCH_SRCS\n    search/aggregator.cc\n    search/doc_accessors.cc\n    search/doc_index.cc\n    search/search_family.cc\n    search/index_join.cc\n    search/global_hnsw_index.cc\n    search/index_builder.cc\n    PARENT_SCOPE)\nendif()\n"
  },
  {
    "path": "src/server/search/aggregator.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/search/aggregator.h\"\n\n#include \"base/logging.h\"\n#include \"server/search/doc_index.h\"\n\nnamespace dfly::aggregate {\n\nnamespace {\n\nusing ValuesList = absl::FixedArray<Value>;\n\nValuesList ExtractFieldsValues(const DocValues& dv, absl::Span<const std::string> fields) {\n  ValuesList out(fields.size());\n  for (size_t i = 0; i < fields.size(); i++) {\n    auto it = dv.find(fields[i]);\n    out[i] = (it != dv.end()) ? it->second : Value{};\n  }\n  return out;\n}\n\nDocValues PackFields(ValuesList values, absl::Span<const std::string> fields) {\n  DCHECK_EQ(values.size(), fields.size());\n  DocValues out;\n  for (size_t i = 0; i < fields.size(); i++)\n    out[fields[i]] = std::move(values[i]);\n  return out;\n}\n\nconst Value kEmptyValue = Value{};\n\n}  // namespace\n\nvoid Aggregator::DoGroup(absl::Span<const std::string> fields, absl::Span<const Reducer> reducers) {\n  // Separate items into groups\n  absl::flat_hash_map<ValuesList, std::vector<DocValues>> groups;\n  for (auto& value : result.values) {\n    groups[ExtractFieldsValues(value, fields)].push_back(std::move(value));\n  }\n\n  // Restore DocValues and apply reducers\n  auto& values = result.values;\n  values.clear();\n  values.reserve(groups.size());\n  while (!groups.empty()) {\n    auto node = groups.extract(groups.begin());\n    DocValues doc = PackFields(std::move(node.key()), fields);\n    for (auto& reducer : reducers) {\n      doc[reducer.result_field] = reducer.func({reducer.source_field, node.mapped()});\n    }\n    values.push_back(std::move(doc));\n  }\n\n  auto& fields_to_print = result.fields_to_print;\n  fields_to_print.clear();\n  fields_to_print.reserve(fields.size() + reducers.size());\n\n  for (auto& field : fields) {\n    fields_to_print.insert(field);\n  }\n  for (auto& reducer : reducers) {\n    fields_to_print.insert(reducer.result_field);\n  }\n}\n\nvoid Aggregator::DoSort(const SortParams& sort_params) {\n  /*\n    Comparator for sorting DocValues by fields.\n    If some of the fields is not present in the DocValues, comparator returns:\n    1. l_it == l.end() && r_it != r.end()\n      asc -> false\n      desc -> false\n    2. l_it != l.end() && r_it == r.end()\n      asc -> true\n      desc -> true\n    3. l_it == l.end() && r_it == r.end()\n      asc -> false\n      desc -> false\n  */\n  auto comparator = [&](const DocValues& l, const DocValues& r) {\n    for (const auto& [field, order] : sort_params.fields) {\n      auto l_it = l.find(field);\n      auto r_it = r.find(field);\n\n      // If some of the values is not present\n      if (l_it == l.end() || r_it == r.end()) {\n        if (l_it == l.end() && r_it == r.end()) {\n          continue;\n        }\n        return l_it != l.end();\n      }\n\n      const auto& lv = l_it->second;\n      const auto& rv = r_it->second;\n      if (lv == rv) {\n        continue;\n      }\n      return order == SortOrder::ASC ? lv < rv : lv > rv;\n    }\n    return false;\n  };\n\n  auto& values = result.values;\n  if (sort_params.SortAll()) {\n    std::sort(values.begin(), values.end(), comparator);\n  } else {\n    DCHECK_GE(sort_params.max, 0);\n    const size_t limit = std::min(values.size(), size_t(sort_params.max));\n    std::partial_sort(values.begin(), values.begin() + limit, values.end(), comparator);\n    values.resize(limit);\n  }\n\n  for (auto& field : sort_params.fields) {\n    result.fields_to_print.insert(field.first);\n  }\n}\n\nvoid Aggregator::DoLimit(size_t offset, size_t num) {\n  auto& values = result.values;\n  values.erase(values.begin(), values.begin() + std::min(offset, values.size()));\n  values.resize(std::min(num, values.size()));\n}\n\nconst Value& ValueIterator::operator*() const {\n  auto it = values_.front().find(field_);\n  return it == values_.front().end() ? kEmptyValue : it->second;\n}\n\nValueIterator& ValueIterator::operator++() {\n  values_.remove_prefix(1);\n  return *this;\n}\n\nReducer::Func FindReducerFunc(ReducerFunc name) {\n  const static auto kCountReducer = [](ValueIterator it) -> double {\n    return std::distance(it, it.end());\n  };\n\n  const static auto kSumReducer = [](ValueIterator it) -> double {\n    double sum = 0;\n    for (; it != it.end(); ++it)\n      sum += std::holds_alternative<double>(*it) ? std::get<double>(*it) : 0.0;\n    return sum;\n  };\n\n  switch (name) {\n    case ReducerFunc::COUNT:\n      return [](ValueIterator it) -> Value { return kCountReducer(it); };\n    case ReducerFunc::COUNT_DISTINCT:\n      return [](ValueIterator it) -> Value {\n        return double(std::unordered_set<Value>(it, it.end()).size());\n      };\n    case ReducerFunc::SUM:\n      return [](ValueIterator it) -> Value { return kSumReducer(it); };\n    case ReducerFunc::AVG:\n      return [](ValueIterator it) -> Value { return kSumReducer(it) / kCountReducer(it); };\n    case ReducerFunc::MAX:\n      return [](ValueIterator it) -> Value { return *std::max_element(it, it.end()); };\n    case ReducerFunc::MIN:\n      return [](ValueIterator it) -> Value { return *std::min_element(it, it.end()); };\n  }\n\n  return nullptr;\n}\n\nAggregationStep MakeGroupStep(std::vector<std::string> fields, std::vector<Reducer> reducers) {\n  return [fields = std::move(fields), reducers = std::move(reducers)](Aggregator* aggregator) {\n    aggregator->DoGroup(fields, reducers);\n  };\n}\n\nAggregationStep MakeSortStep(SortParams sort_params) {\n  return [params = std::move(sort_params)](Aggregator* aggregator) { aggregator->DoSort(params); };\n}\n\nAggregationStep MakeLimitStep(size_t offset, size_t num) {\n  return [=](Aggregator* aggregator) { aggregator->DoLimit(offset, num); };\n}\n\nAggregationResult Process(std::vector<DocValues> values,\n                          absl::Span<const std::string_view> fields_to_print,\n                          absl::Span<const AggregationStep> steps) {\n  Aggregator aggregator{std::move(values), {fields_to_print.begin(), fields_to_print.end()}};\n  for (auto& step : steps) {\n    step(&aggregator);\n  }\n  return aggregator.result;\n}\n\n}  // namespace dfly::aggregate\n"
  },
  {
    "path": "src/server/search/aggregator.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n#include <absl/types/span.h>\n\n#include <string>\n#include <variant>\n\n#include \"core/search/base.h\"\n#include \"facade/reply_builder.h\"\n#include \"io/io.h\"\n\nnamespace dfly {\nenum class SortOrder;\n}\n\nnamespace dfly::aggregate {\n\nstruct Reducer;\n\nusing Value = ::dfly::search::SortableValue;\n\n// DocValues sent through the pipeline\n// TODO: Replace DocValues with compact linear search map instead of hash map\nusing DocValues = absl::flat_hash_map<std::string, Value>;\n\nstruct AggregationResult {\n  // Values to be passed to the next step\n  std::vector<DocValues> values;\n\n  // Fields from values to be printed\n  absl::flat_hash_set<std::string_view> fields_to_print;\n};\n\nstruct SortParams {\n  constexpr static int64_t kSortAll = -1;\n\n  bool SortAll() const {\n    return max == kSortAll;\n  }\n\n  /* Fields to sort by. If multiple fields are provided, sorting works hierarchically:\n     - First, the i-th field is compared.\n     - If the i-th field values are equal, the (i + 1)-th field is compared, and so on. */\n  absl::InlinedVector<std::pair<std::string, SortOrder>, 2> fields;\n  /* Max number of elements to include in the sorted result.\n     If set, only the first [max] elements are fully sorted using partial_sort. */\n  int64_t max = kSortAll;\n};\n\nstruct Aggregator {\n  void DoGroup(absl::Span<const std::string> fields, absl::Span<const Reducer> reducers);\n  void DoSort(const SortParams& sort_params);\n  void DoLimit(size_t offset, size_t num);\n\n  AggregationResult result;\n};\n\nusing AggregationStep = std::function<void(Aggregator*)>;  // Group, Sort, etc.\n\n// Iterator over Span<DocValues> that yields doc[field] or monostate if not present.\n// Extra clumsy for STL compatibility!\nstruct ValueIterator {\n  using iterator_category = std::forward_iterator_tag;\n  using difference_type = std::ptrdiff_t;\n  using value_type = const Value;\n  using pointer = const Value*;\n  using reference = const Value&;\n\n  ValueIterator(std::string_view field, absl::Span<const DocValues> values)\n      : field_{field}, values_{values} {\n  }\n\n  const Value& operator*() const;\n\n  ValueIterator& operator++();\n\n  bool operator==(const ValueIterator& other) const {\n    return values_.size() == other.values_.size();\n  }\n\n  bool operator!=(const ValueIterator& other) const {\n    return !operator==(other);\n  }\n\n  static ValueIterator end() {\n    return ValueIterator{};\n  }\n\n private:\n  ValueIterator() = default;\n\n  std::string_view field_;\n  absl::Span<const DocValues> values_;\n};\n\nstruct Reducer {\n  using Func = Value (*)(ValueIterator);\n  std::string source_field, result_field;\n  Func func;\n};\n\nenum class ReducerFunc { COUNT, COUNT_DISTINCT, SUM, AVG, MAX, MIN };\n\n// Find reducer function by uppercase name (COUNT, MAX, etc...), empty functor if not found\nReducer::Func FindReducerFunc(ReducerFunc name);\n\n// Make `GROUPBY [fields...]`  with REDUCE step\nAggregationStep MakeGroupStep(std::vector<std::string> fields, std::vector<Reducer> reducers);\n\n// Make `SORTBY field [DESC]` step\nAggregationStep MakeSortStep(SortParams sort_params);\n\n// Make `LIMIT offset num` step\nAggregationStep MakeLimitStep(size_t offset, size_t num);\n\n// Process values with given steps\nAggregationResult Process(std::vector<DocValues> values,\n                          absl::Span<const std::string_view> fields_to_print,\n                          absl::Span<const AggregationStep> steps);\n\n}  // namespace dfly::aggregate\n"
  },
  {
    "path": "src/server/search/aggregator_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/search/aggregator.h\"\n\n#include \"base/gtest.h\"\n#include \"server/search/doc_index.h\"\n\nnamespace dfly::aggregate {\n\nusing namespace std::string_literals;\n\nusing StepsList = std::vector<AggregationStep>;\n\nTEST(AggregatorTest, Sort) {\n  std::vector<DocValues> values = {\n      DocValues{{\"a\", 1.0}},\n      DocValues{{\"a\", 0.5}},\n      DocValues{{\"a\", 1.5}},\n  };\n\n  SortParams params;\n  params.fields.emplace_back(\"a\", SortOrder::ASC);\n  StepsList steps = {MakeSortStep(std::move(params))};\n\n  auto result = Process(values, {\"a\"}, steps);\n\n  EXPECT_EQ(result.values[0][\"a\"], Value(0.5));\n  EXPECT_EQ(result.values[1][\"a\"], Value(1.0));\n  EXPECT_EQ(result.values[2][\"a\"], Value(1.5));\n}\n\nTEST(AggregatorTest, Limit) {\n  std::vector<DocValues> values = {\n      DocValues{{\"i\", 1.0}},\n      DocValues{{\"i\", 2.0}},\n      DocValues{{\"i\", 3.0}},\n      DocValues{{\"i\", 4.0}},\n  };\n\n  StepsList steps = {MakeLimitStep(1, 2)};\n\n  auto result = Process(values, {\"i\"}, steps);\n\n  EXPECT_EQ(result.values.size(), 2);\n  EXPECT_EQ(result.values[0][\"i\"], Value(2.0));\n  EXPECT_EQ(result.values[1][\"i\"], Value(3.0));\n}\n\nTEST(AggregatorTest, SimpleGroup) {\n  std::vector<DocValues> values = {\n      DocValues{{\"i\", 1.0}, {\"tag\", \"odd\"}},\n      DocValues{{\"i\", 2.0}, {\"tag\", \"even\"}},\n      DocValues{{\"i\", 3.0}, {\"tag\", \"odd\"}},\n      DocValues{{\"i\", 4.0}, {\"tag\", \"even\"}},\n  };\n\n  std::vector<std::string> fields = {\"tag\"};\n  StepsList steps = {MakeGroupStep(std::move(fields), {})};\n\n  auto result = Process(values, {\"i\", \"tag\"}, steps);\n  EXPECT_EQ(result.values.size(), 2);\n\n  EXPECT_EQ(result.values[0].size(), 1);\n  std::set<Value> groups{result.values[0][\"tag\"], result.values[1][\"tag\"]};\n  std::set<Value> expected{\"even\", \"odd\"};\n  EXPECT_EQ(groups, expected);\n}\n\nTEST(AggregatorTest, GroupWithReduce) {\n  std::vector<DocValues> values;\n  // range from 0 to 9 inclusive\n  for (size_t i = 0; i < 10; i++) {\n    values.push_back(DocValues{\n        {\"i\", double(i)},\n        {\"half-i\", double(i / 4)},\n        {\"tag\", i % 2 == 0 ? \"even\" : \"odd\"},\n    });\n  }\n\n  std::vector<std::string> fields = {\"tag\"};\n  std::vector<Reducer> reducers = {\n      Reducer{\"\", \"count\", FindReducerFunc(ReducerFunc::COUNT)},\n      Reducer{\"i\", \"sum-i\", FindReducerFunc(ReducerFunc::SUM)},\n      Reducer{\"half-i\", \"distinct-hi\", FindReducerFunc(ReducerFunc::COUNT_DISTINCT)},\n      Reducer{\"null-field\", \"distinct-null\", FindReducerFunc(ReducerFunc::COUNT_DISTINCT)}};\n\n  StepsList steps = {MakeGroupStep(std::move(fields), std::move(reducers))};\n\n  auto result = Process(values, {\"i\", \"half-i\", \"tag\"}, steps);\n  EXPECT_EQ(result.values.size(), 2);\n\n  // Reorder even first\n  if (result.values[0].at(\"tag\") == Value(\"odd\"))\n    std::swap(result.values[0], result.values[1]);\n\n  // Even\n  EXPECT_EQ(result.values[0].at(\"count\"), Value{(double)5});\n  EXPECT_EQ(result.values[0].at(\"sum-i\"), Value{(double)2 + 4 + 6 + 8});\n  EXPECT_EQ(result.values[0].at(\"distinct-hi\"), Value{(double)3});\n  EXPECT_EQ(result.values[0].at(\"distinct-null\"), Value{(double)1});\n\n  // Odd\n  EXPECT_EQ(result.values[1].at(\"count\"), Value{(double)5});\n  EXPECT_EQ(result.values[1].at(\"sum-i\"), Value{(double)1 + 3 + 5 + 7 + 9});\n  EXPECT_EQ(result.values[1].at(\"distinct-hi\"), Value{(double)3});\n  EXPECT_EQ(result.values[1].at(\"distinct-null\"), Value{(double)1});\n}\n\n}  // namespace dfly::aggregate\n"
  },
  {
    "path": "src/server/search/doc_accessors.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n// GCC yields a spurious warning about uninitialized data in DocumentAccessor::StringList.\n\n#ifndef __clang__\n#pragma GCC diagnostic ignored \"-Wmaybe-uninitialized\"\n#endif\n\n#include \"server/search/doc_accessors.h\"\n\n#include <absl/functional/any_invocable.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_join.h>\n\n#include \"base/flags.h\"\n#include \"core/detail/listpack_wrap.h\"\n#include \"core/json/path.h\"\n#include \"core/overloaded.h\"\n#include \"core/search/search.h\"\n#include \"core/search/vector_utils.h\"\n#include \"core/string_map.h\"\n#include \"server/container_utils.h\"\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n};\n\nABSL_DECLARE_FLAG(bool, jsonpathv2);\n\nnamespace dfly {\n\nusing namespace std;\n\nnamespace {\n\nstring_view SdsToSafeSv(sds str) {\n  return str != nullptr ? string_view{str, sdslen(str)} : \"\"sv;\n}\n\nusing FieldValue = std::optional<search::SortableValue>;\n\nFieldValue ToSortableValue(search::SchemaField::FieldType type, string_view value) {\n  if (value.empty()) {\n    return std::nullopt;\n  }\n\n  if (type == search::SchemaField::NUMERIC) {\n    auto value_as_double = search::ParseNumericField(value);\n    if (!value_as_double) {  // temporary convert to double\n      return std::nullopt;\n    }\n    return value_as_double.value();\n  }\n  if (type == search::SchemaField::VECTOR) {\n    auto opt_vector = search::BytesToFtVectorSafe(value);\n    if (!opt_vector) {\n      return std::nullopt;\n    }\n    auto& [ptr, size] = opt_vector.value();\n    return absl::StrCat(\"[\", absl::StrJoin(absl::Span<const float>{ptr.get(), size}, \",\"), \"]\");\n  }\n  return string{value};\n}\n\nFieldValue ExtractSortableValue(const search::Schema& schema, string_view key, string_view value) {\n  auto it = schema.fields.find(key);\n  if (it == schema.fields.end())\n    return ToSortableValue(search::SchemaField::TEXT, value);\n  return ToSortableValue(it->second.type, value);\n}\n\nFieldValue ExtractSortableValueFromJson(const search::Schema& schema, string_view key,\n                                        const JsonType& json) {\n  if (json.is_null()) {\n    return std::monostate{};\n  }\n  auto json_as_string = json.as_string();\n  return ExtractSortableValue(schema, key, json_as_string);\n}\n\n/* Returns true if json elements were successfully processed. */\nbool ProcessJsonElements(const std::vector<JsonType>& json_elements,\n                         absl::FunctionRef<bool(const JsonType&)> cb) {\n  auto process = [&cb](const auto& json_range) -> bool {\n    for (const auto& json : json_range) {\n      if (!json.is_null() && !cb(json)) {\n        return false;\n      }\n    }\n    return true;\n  };\n\n  if (!json_elements[0].is_array()) {\n    return process(json_elements);\n  }\n  return json_elements.size() == 1 && process(json_elements[0].array_range());\n}\n\n}  // namespace\n\nSearchDocData BaseAccessor::Serialize(const search::Schema& schema,\n                                      absl::Span<const FieldReference> fields) const {\n  SearchDocData out{};\n  for (const auto& field : fields) {\n    string_view fident = field.Identifier(schema, false);\n    auto field_value =\n        ExtractSortableValue(schema, fident, absl::StrJoin(GetStrings(fident).value(), \",\"));\n    if (field_value) {\n      out[field.OutputName()] = std::move(field_value).value();\n    }\n  }\n  return out;\n}\n\nstd::optional<BaseAccessor::VectorInfo> BaseAccessor::GetVector(std::string_view active_field,\n                                                                size_t dim) const {\n  auto strings_list = GetStrings(active_field);\n  if (strings_list) {\n    if (!strings_list->empty()) {\n      auto value = strings_list->front();\n      if ((value.size() % sizeof(float)) || (value.size() / sizeof(float) != dim)) {\n        return std::nullopt;\n      }\n      return value.data();\n    } else {\n      return nullptr;\n    }\n  }\n  return std::nullopt;\n}\n\nstd::optional<BaseAccessor::NumsList> BaseAccessor::GetNumbers(\n    std::string_view active_field) const {\n  auto strings_list = GetStrings(active_field);\n  if (!strings_list) {\n    return std::nullopt;\n  }\n\n  NumsList nums_list;\n  nums_list.reserve(strings_list->size());\n  for (auto str : strings_list.value()) {\n    auto num = search::ParseNumericField(str);\n    if (!num) {\n      return std::nullopt;\n    }\n    nums_list.push_back(num.value());\n  }\n  return nums_list;\n}\n\nstd::optional<BaseAccessor::StringList> BaseAccessor::GetTags(std::string_view active_field) const {\n  return GetStrings(active_field);\n}\n\nstd::optional<BaseAccessor::StringList> ListPackAccessor::GetStrings(\n    string_view active_field) const {\n  auto it = lw_.Find(active_field);\n  return it != lw_.end() ? StringList{(*it).second} : StringList{};\n}\n\nSearchDocData ListPackAccessor::Serialize(const search::Schema& schema) const {\n  SearchDocData out{};\n  for (const auto [key, value] : lw_) {\n    if (auto field_value = ExtractSortableValue(schema, key, value); field_value) {\n      out[key] = std::move(field_value).value();\n    }\n  }\n  return out;\n}\n\nstd::optional<BaseAccessor::StringList> StringMapAccessor::GetStrings(\n    string_view active_field) const {\n  auto it = hset_->Find(active_field);\n  return it != hset_->end() ? StringList{SdsToSafeSv(it->second)} : StringList{};\n}\n\nSearchDocData StringMapAccessor::Serialize(const search::Schema& schema) const {\n  SearchDocData out{};\n  for (const auto& [kptr, vptr] : *hset_) {\n    auto field_value = ExtractSortableValue(schema, SdsToSafeSv(kptr), SdsToSafeSv(vptr));\n    if (field_value) {\n      out[SdsToSafeSv(kptr)] = std::move(field_value).value();\n    }\n  }\n  return out;\n}\n\nstruct JsonAccessor::JsonPathContainer {\n  vector<JsonType> Evaluate(const JsonType& json) const {\n    vector<JsonType> res;\n\n    visit(Overloaded{[&](const json::Path& path) {\n                       json::EvaluatePath(path, json,\n                                          [&](auto, const JsonType& v) { res.push_back(v); });\n                     },\n                     [&](const jsoncons::jsonpath::jsonpath_expression<JsonType>& path) {\n                       auto json_arr = path.evaluate(json);\n                       for (const auto& v : json_arr.array_range())\n                         res.push_back(v);\n                     }},\n          val);\n\n    return res;\n  }\n\n  variant<json::Path, jsoncons::jsonpath::jsonpath_expression<JsonType>> val;\n};\n\nstd::optional<BaseAccessor::StringList> JsonAccessor::GetStrings(std::string_view field) const {\n  return GetStrings(field, false);\n}\n\nstd::optional<BaseAccessor::StringList> JsonAccessor::GetTags(std::string_view active_field) const {\n  return GetStrings(active_field, true);\n}\n\nstd::optional<BaseAccessor::StringList> JsonAccessor::GetStrings(std::string_view field,\n                                                                 bool accept_boolean_values) const {\n  auto* path = GetPath(field);\n  if (!path)\n    return search::EmptyAccessResult<StringList>();\n\n  auto path_res = path->Evaluate(json_);\n  if (path_res.empty())\n    return search::EmptyAccessResult<StringList>();\n\n  auto is_convertible_to_string = [](bool accept_boolean_values) -> bool (*)(const JsonType& json) {\n    if (accept_boolean_values) {\n      return [](const JsonType& json) -> bool { return json.is_string() || json.is_bool(); };\n    } else {\n      return [](const JsonType& json) -> bool { return json.is_string(); };\n    }\n  }(accept_boolean_values);\n\n  if (path_res.size() == 1 && !path_res[0].is_array()) {\n    if (path_res[0].is_null())\n      return StringList{};\n    if (!is_convertible_to_string(path_res[0]))\n      return std::nullopt;\n\n    buf_ = path_res[0].as_string();\n    return StringList{buf_};\n  }\n\n  buf_.clear();\n\n  // First, grow buffer and compute string sizes\n  vector<size_t> sizes;\n  sizes.reserve(path_res.size());\n\n  // Returns true if json element is convertiable to string\n  auto add_json_element_to_buf = [&](const JsonType& json) -> bool {\n    if (!is_convertible_to_string(json))\n      return false;\n\n    size_t start = buf_.size();\n    buf_ += json.as_string();\n    sizes.push_back(buf_.size() - start);\n    return true;\n  };\n\n  if (!ProcessJsonElements(path_res, std::move(add_json_element_to_buf))) {\n    return std::nullopt;\n  }\n\n  // Reposition start pointers to the most recent allocation of buf\n  StringList out(sizes.size());\n\n  size_t start = 0;\n  for (size_t i = 0; i < out.size(); i++) {\n    out[i] = string_view{buf_}.substr(start, sizes[i]);\n    start += sizes[i];\n  }\n\n  return out;\n}\n\nstd::optional<BaseAccessor::VectorInfo> JsonAccessor::GetVector(string_view active_field,\n                                                                size_t dim) const {\n  auto* path = GetPath(active_field);\n  if (!path)\n    return VectorInfo{};\n\n  auto res = path->Evaluate(json_);\n  if (res.empty() || res[0].is_null())\n    return VectorInfo{};\n\n  if (!res[0].is_array())\n    return std::nullopt;\n\n  size_t size = res[0].size();\n\n  if (size != dim)\n    return std::nullopt;\n\n  auto ptr = make_unique<float[]>(size);\n\n  size_t i = 0;\n  for (const auto& v : res[0].array_range()) {\n    if (!v.is_number()) {\n      return std::nullopt;\n    }\n    ptr[i++] = v.as<float>();\n  }\n\n  return search::OwnedFtVector{std::move(ptr), size};\n}\n\nstd::optional<BaseAccessor::NumsList> JsonAccessor::GetNumbers(string_view active_field) const {\n  auto* path = GetPath(active_field);\n  if (!path)\n    return search::EmptyAccessResult<NumsList>();\n\n  auto path_res = path->Evaluate(json_);\n  if (path_res.empty())\n    return search::EmptyAccessResult<NumsList>();\n\n  NumsList nums_list;\n  nums_list.reserve(path_res.size());\n\n  // Returns true if json element is convertiable to number\n  auto add_json_element = [&](const JsonType& json) -> bool {\n    if (!json.is_number())\n      return false;\n    nums_list.push_back(json.as<double>());\n    return true;\n  };\n\n  if (!ProcessJsonElements(path_res, std::move(add_json_element))) {\n    return std::nullopt;\n  }\n  return nums_list;\n}\n\nJsonAccessor::JsonPathContainer* JsonAccessor::GetPath(std::string_view field) const {\n  if (auto it = path_cache_.find(field); it != path_cache_.end()) {\n    return it->second.get();\n  }\n\n  string ec_msg;\n  unique_ptr<JsonPathContainer> ptr;\n  if (absl::GetFlag(FLAGS_jsonpathv2)) {\n    auto path_expr = json::ParsePath(field);\n    if (path_expr) {\n      ptr.reset(new JsonPathContainer{std::move(path_expr.value())});\n    } else {\n      ec_msg = path_expr.error();\n    }\n  } else {\n    error_code ec;\n    auto path_expr = MakeJsonPathExpr(field, ec);\n    if (ec) {\n      ec_msg = ec.message();\n    } else {\n      ptr.reset(new JsonPathContainer{std::move(path_expr)});\n    }\n  }\n\n  if (!ptr) {\n    // This can occur for fields that are not actual JSON paths but are computed aliases\n    // (e.g., 'vector_distance' from a KNN search clause in FT.SEARCH RETURN).\n    // Such fields are valid for return but won't be found as paths in the document.\n    VLOG(1) << \"Invalid Json path: \" << field << ' ' << ec_msg;\n    return nullptr;\n  }\n\n  JsonPathContainer* path = ptr.get();\n  path_cache_[field] = std::move(ptr);\n  return path;\n}\n\nSearchDocData JsonAccessor::Serialize(const search::Schema& schema,\n                                      absl::Span<const FieldReference> fields) const {\n  SearchDocData out{};\n  for (const auto& field : fields) {\n    string_view ident = field.Identifier(schema, true);\n    if (auto* path = GetPath(ident); path) {\n      if (auto res = path->Evaluate(json_); !res.empty()) {\n        auto field_value = ExtractSortableValueFromJson(schema, ident, res[0]);\n        if (field_value) {\n          out[field.OutputName()] = std::move(field_value).value();\n        }\n      }\n    }\n  }\n  return out;\n}\n\nSearchDocData JsonAccessor::Serialize(const search::Schema& schema) const {\n  return {{\"$\", json_.to_string()}};\n}\n\nvoid JsonAccessor::RemoveFieldFromCache(string_view field) {\n  path_cache_.erase(field);\n}\n\nthread_local absl::flat_hash_map<std::string, std::unique_ptr<JsonAccessor::JsonPathContainer>>\n    JsonAccessor::path_cache_;\n\nunique_ptr<BaseAccessor> GetAccessor(const DbContext& db_cntx, const PrimeValue& pv) {\n  DCHECK(pv.ObjType() == OBJ_HASH || pv.ObjType() == OBJ_JSON);\n\n  if (pv.ObjType() == OBJ_JSON) {\n    DCHECK(pv.GetJson());\n    return make_unique<JsonAccessor>(pv.GetJson());\n  }\n\n  if (pv.Encoding() == kEncodingListPack) {\n    auto ptr = reinterpret_cast<uint8_t*>(pv.RObjPtr());\n    return make_unique<ListPackAccessor>(ptr);\n  } else {\n    auto* sm = container_utils::GetStringMap(pv, db_cntx);\n    return make_unique<StringMapAccessor>(sm);\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/search/doc_accessors.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/types/span.h>\n\n#include <string>\n\n#include \"core/detail/listpack_wrap.h\"\n#include \"core/json/json_object.h\"\n#include \"core/search/search.h\"\n#include \"core/search/vector_utils.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/table.h\"\n\nnamespace dfly {\n\nclass StringMap;\n\n// Document accessors allow different types (json/hset) to be hidden\n// behind a document interface for quering fields and serializing.\n// Field string_view's are only valid until the next is requested.\nstruct BaseAccessor : public search::DocumentAccessor {\n  // Serialize all fields\n  virtual SearchDocData Serialize(const search::Schema& schema) const = 0;\n\n  // Serialize selected fields\n  virtual SearchDocData Serialize(const search::Schema& schema,\n                                  absl::Span<const FieldReference> fields) const;\n\n  // Default implementation uses GetStrings\n  virtual std::optional<VectorInfo> GetVector(std::string_view active_field,\n                                              size_t dim) const override;\n  virtual std::optional<NumsList> GetNumbers(std::string_view active_field) const override;\n  virtual std::optional<StringList> GetTags(std::string_view active_field) const override;\n};\n\n// Accessor for hashes stored with listpack\nstruct ListPackAccessor : public BaseAccessor {\n  explicit ListPackAccessor(uint8_t* ptr /* listpack ptr */) : lw_{ptr} {\n  }\n\n  std::optional<StringList> GetStrings(std::string_view field) const override;\n  SearchDocData Serialize(const search::Schema& schema) const override;\n\n private:\n  detail::ListpackWrap lw_;\n};\n\n// Accessor for hashes stored with StringMap\nstruct StringMapAccessor : public BaseAccessor {\n  explicit StringMapAccessor(StringMap* hset) : hset_{hset} {\n  }\n\n  std::optional<StringList> GetStrings(std::string_view field) const override;\n  SearchDocData Serialize(const search::Schema& schema) const override;\n\n private:\n  StringMap* hset_;\n};\n\n// Accessor for json values\nstruct JsonAccessor : public BaseAccessor {\n  struct JsonPathContainer;  // contains jsoncons::jsonpath::jsonpath_expression\n\n  explicit JsonAccessor(const JsonType* json) : json_{*json} {\n  }\n\n  std::optional<StringList> GetStrings(std::string_view field) const override;\n  std::optional<VectorInfo> GetVector(std::string_view field, size_t dim) const override;\n  std::optional<NumsList> GetNumbers(std::string_view active_field) const override;\n  std::optional<StringList> GetTags(std::string_view active_field) const override;\n\n  // The JsonAccessor works with structured types and not plain strings, so an overload is needed\n  SearchDocData Serialize(const search::Schema& schema,\n                          absl::Span<const FieldReference> fields) const override;\n  SearchDocData Serialize(const search::Schema& schema) const override;\n\n  static void RemoveFieldFromCache(std::string_view field);\n\n private:\n  /* If accept_boolean_values is true, then json boolean values are converted to strings */\n  std::optional<StringList> GetStrings(std::string_view field, bool accept_boolean_values) const;\n\n  /// Parses `field` into a JSON path. Caches the results internally.\n  JsonPathContainer* GetPath(std::string_view field) const;\n\n  const JsonType& json_;\n  mutable std::string buf_;\n\n  // Contains built json paths to avoid parsing them repeatedly\n  static thread_local absl::flat_hash_map<std::string, std::unique_ptr<JsonPathContainer>>\n      path_cache_;\n};\n\n// Get accessor for value\nstd::unique_ptr<BaseAccessor> GetAccessor(const DbContext& db_cntx, const PrimeValue& pv);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/search/doc_index.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/search/doc_index.h\"\n\n#include <absl/strings/str_join.h>\n\n#include <memory>\n#include <queue>\n#include <ranges>\n\n#include \"absl/strings/str_cat.h\"\n#include \"base/logging.h\"\n#include \"core/overloaded.h\"\n#include \"core/search/indices.h\"\n#include \"core/search/stateless_allocator.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/family_utils.h\"\n#include \"server/search/doc_accessors.h\"\n#include \"server/search/global_hnsw_index.h\"\n#include \"server/search/index_builder.h\"\n#include \"server/server_state.h\"\n#include \"util/fibers/fibers.h\"\n\nnamespace dfly {\n\nusing namespace std;\nusing facade::ErrorReply;\nusing nonstd::make_unexpected;\n\nnamespace {\n\ntemplate <typename F>\nvoid TraverseAllMatching(const DocIndex& index, const OpArgs& op_args, F&& f) {\n  auto& db_slice = op_args.GetDbSlice();\n  DCHECK(db_slice.IsDbValid(op_args.db_cntx.db_index));\n  auto [prime_table, _] = db_slice.GetTables(op_args.db_cntx.db_index);\n\n  string scratch;\n  auto cb = [&](PrimeTable::iterator it) {\n    PrimeValue& pv = it->second;\n    string_view key = it->first.GetSlice(&scratch);\n\n    if (!index.Matches(key, pv.ObjType()))\n      return;\n\n    f(key, op_args.db_cntx, pv);\n  };\n\n  PrimeTable::Cursor cursor;\n  do {\n    cursor = prime_table->Traverse(cursor, cb);\n    // Yield if the fiber has been running for long.\n    if (base::CycleClock::ToUsec(util::ThisFiber::GetRunningTimeCycles()) > 500) {  // 500us\n      util::ThisFiber::Yield();\n    }\n  } while (cursor);\n}\n\nbool IsSortableField(std::string_view field_identifier, const search::Schema& schema) {\n  auto it = schema.fields.find(field_identifier);\n  return it != schema.fields.end() && (it->second.flags & search::SchemaField::SORTABLE);\n}\n\nusing SortIndiciesFieldsList =\n    std::vector<std::pair<string_view /*identifier*/, string_view /*alias*/>>;\n\nstd::pair<std::vector<FieldReference>, SortIndiciesFieldsList> PreprocessAggregateFields(\n    const search::Schema& schema, const AggregateParams& params,\n    const std::optional<std::vector<FieldReference>>& load_fields) {\n  absl::flat_hash_map<std::string_view, FieldReference> fields_by_identifier;\n  absl::flat_hash_map<std::string_view, std::string_view> sort_indicies_aliases;\n  fields_by_identifier.reserve(schema.field_names.size());\n  sort_indicies_aliases.reserve(schema.field_names.size());\n\n  for (const auto& [fname, fident] : schema.field_names) {\n    if (!IsSortableField(fident, schema)) {\n      fields_by_identifier.emplace(fident, FieldReference{fident, fname});\n    } else {\n      sort_indicies_aliases[fident] = fname;\n    }\n  }\n\n  for (const auto& field : load_fields.value_or(vector<FieldReference>{})) {\n    string_view fident = field.Identifier(schema, false);\n    if (!IsSortableField(fident, schema)) {\n      fields_by_identifier.insert_or_assign(fident, field);\n    } else {\n      sort_indicies_aliases[fident] = field.OutputName();\n    }\n  }\n\n  vector<FieldReference> fields;\n  fields.reserve(fields_by_identifier.size());\n  for (auto& [_, field] : fields_by_identifier) {\n    fields.emplace_back(field);\n  }\n\n  return {std::move(fields), {sort_indicies_aliases.begin(), sort_indicies_aliases.end()}};\n}\n\n/* Separate fields into basic and sortable. The second vector contains flags indicating\n   whether the field at the same index in the first vector is sortable or not. */\nstd::pair<std::vector<FieldReference>, std::vector<bool>> GetBasicFields(\n    absl::Span<const std::string_view> fields, const search::Schema& schema) {\n  const size_t fields_count = fields.size();\n  std::vector<bool> is_sortable_field(fields_count);\n  std::vector<FieldReference> basic_fields;\n  basic_fields.reserve(fields_count);\n  for (size_t i = 0; i < fields_count; ++i) {\n    bool is_sortable = IsSortableField(fields[i], schema);\n    is_sortable_field[i] = is_sortable;\n    if (!is_sortable) {\n      basic_fields.emplace_back(fields[i]);\n    }\n  }\n  return {std::move(basic_fields), std::move(is_sortable_field)};\n}\n\nauto GetIndexedHnswFields(const search::Schema& schema) {\n  return schema.fields |\n         std::views::filter([](const auto& item) { return item.second.IsIndexableHnswField(); });\n}\n}  // namespace\n\nbool FieldReference::IsJsonPath(std::string_view name) {\n  if (name.size() < 2) {\n    return false;\n  }\n  return name.front() == '$' && (name[1] == '.' || name[1] == '[');\n}\n\nbool SearchParams::ShouldReturnField(std::string_view alias) const {\n  auto cb = [alias](const auto& entry) { return entry.OutputName() == alias; };\n  return !return_fields || any_of(return_fields->begin(), return_fields->end(), cb);\n}\n\nstring_view SearchFieldTypeToString(search::SchemaField::FieldType type) {\n  switch (type) {\n    case search::SchemaField::TAG:\n      return \"TAG\";\n    case search::SchemaField::TEXT:\n      return \"TEXT\";\n    case search::SchemaField::NUMERIC:\n      return \"NUMERIC\";\n    case search::SchemaField::VECTOR:\n      return \"VECTOR\";\n    case search::SchemaField::GEO:\n      return \"GEO\";\n  }\n  ABSL_UNREACHABLE();\n  return \"\";\n}\n\nstring DocIndexInfo::BuildRestoreCommand() const {\n  std::string out;\n\n  // ON HASH/JSON\n  absl::StrAppend(&out, \"ON\", \" \", base_index.type == DocIndex::HASH ? \"HASH\" : \"JSON\");\n\n  // optional PREFIX count *prefix1* *prefix2* ...\n  if (!base_index.prefixes.empty()) {\n    absl::StrAppend(&out, \" PREFIX\", \" \", base_index.prefixes.size());\n    for (const auto& prefix : base_index.prefixes) {\n      absl::StrAppend(&out, \" \", prefix);\n    }\n  }\n\n  // STOPWORDS\n  absl::StrAppend(&out, \" STOPWORDS \", base_index.options.stopwords.size());\n  for (const auto& sw : base_index.options.stopwords)\n    absl::StrAppend(&out, \" \", sw);\n\n  absl::StrAppend(&out, \" SCHEMA\");\n  for (const auto& [fident, finfo] : base_index.schema.fields) {\n    // Store field name, alias and type\n    absl::StrAppend(&out, \" \", fident, \" AS \", finfo.short_name, \" \",\n                    SearchFieldTypeToString(finfo.type));\n\n    // Store specific params\n    Overloaded info{\n        [](monostate) {},\n        [out = &out](const search::SchemaField::VectorParams& params) {\n          auto sim = params.sim == search::VectorSimilarity::L2   ? \"L2\"\n                     : params.sim == search::VectorSimilarity::IP ? \"IP\"\n                                                                  : \"COSINE\";\n          absl::StrAppend(out, \" \", params.use_hnsw ? \"HNSW\" : \"FLAT\", \" 6 \", \"DIM \", params.dim,\n                          \" DISTANCE_METRIC \", sim, \" INITIAL_CAP \", params.capacity);\n        },\n        [out = &out](const search::SchemaField::TagParams& params) {\n          absl::StrAppend(out, \" \", \"SEPARATOR\", \" \", string{params.separator});\n          if (params.case_sensitive)\n            absl::StrAppend(out, \" \", \"CASESENSITIVE\");\n        },\n        [out = &out](const search::SchemaField::TextParams& params) {\n          if (params.with_suffixtrie)\n            absl::StrAppend(out, \" \", \"WITH_SUFFIXTRIE\");\n        },\n        [out = &out](const search::SchemaField::NumericParams& params) {\n          absl::StrAppend(out, \" \", \"BLOCKSIZE\", \" \", std::to_string(params.block_size));\n        }};\n    visit(info, finfo.special_params);\n\n    // Store shared field flags\n    if (finfo.flags & search::SchemaField::SORTABLE)\n      absl::StrAppend(&out, \" SORTABLE\");\n\n    if (finfo.flags & search::SchemaField::NOINDEX)\n      absl::StrAppend(&out, \" NOINDEX\");\n  }\n\n  return out;\n}\n\nShardDocIndex::DocId ShardDocIndex::DocKeyIndex::Add(string_view key) {\n  DCHECK_EQ(ids_.count(key), 0u);\n\n  DocId id;\n  if (!free_ids_.empty()) {\n    id = free_ids_.back();\n    free_ids_.pop_back();\n    keys_[id] = key;\n  } else {\n    id = last_id_++;\n    DCHECK_EQ(keys_.size(), id);\n    keys_.emplace_back(key);\n  }\n\n  ids_[key] = id;\n  return id;\n}\n\nShardDocIndex::DocId ShardDocIndex::DocKeyIndex::AddNew(string_view key) {\n  DCHECK_EQ(ids_.count(key), 0u);\n\n  DocId id = last_id_++;\n  if (id < keys_.size()) {\n    keys_[id] = key;\n  } else {\n    DCHECK_EQ(keys_.size(), id);\n    keys_.emplace_back(key);\n  }\n\n  ids_[key] = id;\n  return id;\n}\nstd::optional<ShardDocIndex::DocId> ShardDocIndex::DocKeyIndex::Find(string_view key) const {\n  auto it = ids_.find(key);\n  return it != ids_.end() ? std::make_optional(it->second) : std::nullopt;\n}\n\nvoid ShardDocIndex::DocKeyIndex::Remove(DocId id) {\n  ids_.extract(keys_[id]);\n  keys_[id] = \"\";\n  free_ids_.push_back(id);\n}\n\nstring_view ShardDocIndex::DocKeyIndex::Get(DocId id) const {\n  DCHECK_LT(id, keys_.size());\n  // Check that this id was not removed\n  DCHECK(id < last_id_ && std::find(free_ids_.begin(), free_ids_.end(), id) == free_ids_.end());\n\n  return keys_[id];\n}\n\nbool ShardDocIndex::DocKeyIndex::IsValid(DocId id) const {\n  if (id >= last_id_ || id >= keys_.size())\n    return false;\n  // Check if the key at this slot is still tracked in the reverse map with the same id.\n  // This correctly handles empty keys: freed slots have their key extracted from ids_,\n  // while valid empty-key docs still have ids_[\"\"] == id.\n  auto it = ids_.find(keys_[id]);\n  return it != ids_.end() && it->second == id;\n}\n\nsize_t ShardDocIndex::DocKeyIndex::Size() const {\n  return ids_.size();\n}\n\nstd::vector<std::pair<std::string, search::DocId>> ShardDocIndex::DocKeyIndex::Serialize() const {\n  std::vector<std::pair<std::string, search::DocId>> result;\n  result.reserve(ids_.size());\n  for (search::DocId id = 0; id < keys_.size(); ++id) {\n    if (!keys_[id].empty()) {\n      result.emplace_back(keys_[id], id);\n    }\n  }\n  return result;\n}\n\nvoid ShardDocIndex::DocKeyIndex::Restore(\n    const std::vector<std::pair<std::string, search::DocId>>& mappings) {\n  DCHECK(ids_.empty()) << \"Restore should only be called on an empty DocKeyIndex\";\n  // Find max doc_id to size the keys_ vector appropriately\n  DocId max_id = 0;\n  for (const auto& [key, doc_id] : mappings) {\n    max_id = std::max(max_id, doc_id);\n  }\n\n  // Resize keys_ to accommodate all doc_ids\n  keys_.resize(max_id + 1);\n  last_id_ = max_id + 1;\n\n  // Restore the mappings\n  for (const auto& [key, doc_id] : mappings) {\n    keys_[doc_id] = key;\n    ids_[key] = doc_id;\n  }\n\n  // Build free_ids_ list for any gaps in the id sequence\n  for (DocId id = 0; id <= max_id; ++id) {\n    if (keys_[id].empty()) {\n      free_ids_.push_back(id);\n    }\n  }\n}\n\nvoid ShardDocIndex::DocKeyIndex::Restore(const std::vector<std::string>& keys) {\n  DCHECK(ids_.empty()) << \"Restore should only be called on an empty DocKeyIndex\";\n  keys_.resize(keys.size());\n  for (DocId id = 0; id < static_cast<DocId>(keys.size()); ++id) {\n    keys_[id] = keys[id];\n    ids_[keys[id]] = id;\n  }\n  last_id_ = static_cast<DocId>(keys.size());\n}\n\nuint8_t DocIndex::GetObjCode() const {\n  return type == JSON ? OBJ_JSON : OBJ_HASH;\n}\n\nbool DocIndex::Matches(string_view key, unsigned obj_code) const {\n  if (obj_code != GetObjCode())\n    return false;\n\n  // Empty prefixes means match all keys\n  if (prefixes.empty())\n    return true;\n\n  for (const auto& prefix : prefixes) {\n    if (key.rfind(prefix, 0) == 0)\n      return true;\n  }\n  return false;\n}\n\nShardDocIndex::ShardDocIndex(shared_ptr<const DocIndex> index)\n    : base_{std::move(index)}, key_index_{} {\n}\n\nShardDocIndex::~ShardDocIndex() {\n  CancelBuilder();\n}\n\nvoid ShardDocIndex::Rebuild(const OpArgs& op_args, PMR_NS::memory_resource* mr, bool is_restored) {\n  CancelBuilder();\n\n  // When restoring, preserve key_index_ populated by RestoreKeyIndex() so that DocIds\n  // match the GlobalDocIds stored in the serialized HNSW graph. CursorLoop will use\n  // the existing DocIds to add documents to the regular indices.\n  if (!is_restored) {\n    key_index_ = DocKeyIndex{};\n    // Full rebuild handles all documents — discard any buffered state from LOADING.\n    is_restoring_vectors_ = false;\n    pending_vector_updates_.clear();\n  } else {\n    // Restored path: VectorLoop will call RestoreGlobalVectorIndices which drains\n    // the buffers. Until then, buffer any journal-driven mutations.\n    is_restoring_vectors_ = true;\n  }\n\n  indices_.emplace(base_->schema, base_->options, mr, &synonyms_);\n\n  // Create builder and start indexing\n  builder_ = std::make_unique<search::IndexBuilder>(this);\n  builder_->Start(op_args, is_restored, [this] {\n    VLOG(1) << \"Indexed \" << key_index_.Size()\n            << \" docs on prefixes: \" << absl::StrJoin(base_->prefixes, \", \");\n    builder_.reset();\n  });\n}\n\nvoid ShardDocIndex::CancelBuilder() {\n  if (builder_) {\n    builder_->Cancel();\n    builder_.reset();\n  }\n}\n\nvoid ShardDocIndex::RebuildForGroup(const OpArgs& op_args, const std::string_view& group_id,\n                                    const std::vector<std::string_view>& terms) {\n  if (!indices_)\n    return;\n\n  absl::flat_hash_set<DocId> docs_to_rebuild;\n  std::vector<search::TextIndex*> text_indices = indices_->GetAllTextIndices();\n\n  // Find all documents containing any term from the synonyms group\n  for (auto* text_index : text_indices) {\n    for (const auto& term : terms) {\n      if (const auto* container = text_index->Matching(term)) {\n        for (DocId doc_id : *container) {\n          docs_to_rebuild.insert(doc_id);\n        }\n      }\n    }\n  }\n\n  auto& db_slice = op_args.GetDbSlice();\n  DCHECK(db_slice.IsDbValid(op_args.db_cntx.db_index));\n\n  auto update_indices = [&](bool remove) {\n    for (DocId doc_id : docs_to_rebuild) {\n      std::string_view key = key_index_.Get(doc_id);\n      auto it = db_slice.FindReadOnly(op_args.db_cntx, key, base_->GetObjCode());\n\n      if (!it || !IsValid(*it)) {\n        continue;\n      }\n\n      auto accessor = GetAccessor(op_args.db_cntx, (*it)->second);\n      if (remove) {\n        indices_->Remove(doc_id, *accessor);\n      } else {\n        // Add in this case always succeeds, because we are adding the same document again\n        [[maybe_unused]] bool res = indices_->Add(doc_id, *accessor);\n        DCHECK(res);\n      }\n    }\n  };\n\n  update_indices(true);\n  synonyms_.UpdateGroup(group_id, terms);\n  update_indices(false);\n}\n\nstd::optional<ShardDocIndex::DocId> ShardDocIndex::GetDocId(std::string_view key,\n                                                            const DbContext& db_cntx) {\n  if (!indices_)\n    return std::nullopt;\n\n  // Only handle documents from database 0\n  if (db_cntx.db_index != 0)\n    return std::nullopt;\n\n  return key_index_.Find(key);\n}\n\nstd::optional<ShardDocIndex::DocId> ShardDocIndex::AddDoc(string_view key, const DbContext& db_cntx,\n                                                          const PrimeValue& pv) {\n  if (!indices_)\n    return std::nullopt;\n\n  // Only index documents from database 0\n  if (db_cntx.db_index != 0)\n    return std::nullopt;\n\n  // Don't add document again if it exists. TODO: Try add?\n  if (key_index_.Find(key))\n    return std::nullopt;\n\n  auto accessor = GetAccessor(db_cntx, pv);\n  DocId id = key_index_.Add(key);\n  if (!indices_->Add(id, *accessor)) {\n    key_index_.Remove(id);\n    return std::nullopt;\n  }\n\n  return id;\n}\n\nvoid ShardDocIndex::RemoveDoc(DocId id, const DbContext& db_cntx, const PrimeValue& pv) {\n  auto accessor = GetAccessor(db_cntx, pv);\n  key_index_.Remove(id);\n  indices_->Remove(id, *accessor);\n}\n\nvoid ShardDocIndex::AddDocToGlobalVectorIndex(ShardDocIndex::DocId doc_id, const DbContext& db_cntx,\n                                              PrimeValue* pv) {\n  if (is_restoring_vectors_) {\n    // Buffer the key — will be re-applied after RestoreGlobalVectorIndices completes.\n    std::string_view key = key_index_.Get(doc_id);\n    pending_vector_updates_.emplace(key);\n    return;\n  }\n\n  auto accessor = GetAccessor(db_cntx, *pv);\n  GlobalDocId global_id = search::CreateGlobalDocId(EngineShard::tlocal()->shard_id(), doc_id);\n\n  for (const auto& [field_ident, field_info] : GetIndexedHnswFields(base_->schema)) {\n    if (auto index = GlobalHnswIndexRegistry::Instance().Get(base_->name, field_info.short_name);\n        index) {\n      bool added = index->Add(global_id, *accessor, field_ident);\n      if (added && !index->IsVectorCopied()) {\n        pv->SetOmitDefrag(true);\n      }\n    }\n  }\n}\n\nvoid ShardDocIndex::RemoveDocFromGlobalVectorIndex(ShardDocIndex::DocId doc_id,\n                                                   const DbContext& db_cntx, const PrimeValue& pv) {\n  if (is_restoring_vectors_) {\n    // Buffer the key — will be re-applied after RestoreGlobalVectorIndices completes.\n    std::string_view key = key_index_.Get(doc_id);\n    pending_vector_updates_.emplace(key);\n    return;\n  }\n\n  auto accessor = GetAccessor(db_cntx, pv);\n  GlobalDocId global_id = search::CreateGlobalDocId(EngineShard::tlocal()->shard_id(), doc_id);\n\n  for (const auto& [field_ident, field_info] : GetIndexedHnswFields(base_->schema)) {\n    if (auto index = GlobalHnswIndexRegistry::Instance().Get(base_->name, field_info.short_name);\n        index) {\n      index->Remove(global_id, *accessor, field_ident);\n    }\n  }\n}\n\nvoid ShardDocIndex::RemoveFromAllHnswIndices(search::DocId doc_id) {\n  GlobalDocId global_id = search::CreateGlobalDocId(EngineShard::tlocal()->shard_id(), doc_id);\n  for (const auto& [field_ident, field_info] : GetIndexedHnswFields(base_->schema)) {\n    if (auto index = GlobalHnswIndexRegistry::Instance().Get(base_->name, field_info.short_name);\n        index) {\n      index->Remove(global_id);\n    }\n  }\n}\n\nvoid ShardDocIndex::RestoreGlobalVectorIndices(std::string_view index_name, const OpArgs& op_args) {\n  // Don't run loop if no vector fields are present\n  if (std::ranges::empty(GetIndexedHnswFields(base_->schema)))\n    return;\n\n  LOG(INFO) << \"Restoring vector index '\" << index_name << \"' from serialized graph on shard \"\n            << EngineShard::tlocal()->shard_id();\n\n  auto& db_slice = op_args.GetDbSlice();\n  DCHECK(db_slice.IsDbValid(op_args.db_cntx.db_index));\n\n  size_t processed = 0;\n  size_t successful_updates = 0;\n  size_t failed_updates = 0;\n  size_t missing_documents = 0;\n\n  // Collect missing document IDs to remove after the loop (can't modify key_index_ during\n  // iteration over the snapshot). Store the key too so we can re-validate: concurrent fibers\n  // may free and reuse the DocId during Yield(), making the original local_id stale.\n  struct MissingDoc {\n    std::string key;\n    DocId local_id;\n    GlobalDocId global_id;\n  };\n  std::vector<MissingDoc> missing_doc_ids;\n\n  // Snapshot the map: Yield() inside the loop lets other fibers run (e.g. FullSyncDflyFb\n  // finishing its RDB load), which may mutate key_index_ via doc_del_cb_ and invalidate\n  // flat_hash_map iterators.\n  auto doc_keys_snapshot = key_index_.GetDocKeysMap();\n\n  for (const auto& [key, local_id] : doc_keys_snapshot) {\n    auto it = db_slice.FindMutable(op_args.db_cntx, key, base_->GetObjCode());\n    if (!it || !IsValid(it->it)) {\n      ++missing_documents;\n      GlobalDocId global_id =\n          search::CreateGlobalDocId(EngineShard::tlocal()->shard_id(), local_id);\n      missing_doc_ids.push_back({std::string(key), local_id, global_id});\n      continue;\n    }\n\n    PrimeValue& pv = it->it->second;\n    auto doc = GetAccessor(op_args.db_cntx, pv);\n    GlobalDocId global_id = search::CreateGlobalDocId(EngineShard::tlocal()->shard_id(), local_id);\n\n    for (const auto& [field_ident, field_info] : GetIndexedHnswFields(base_->schema)) {\n      if (auto index = GlobalHnswIndexRegistry::Instance().Get(index_name, field_info.short_name);\n          index) {\n        bool success = index->UpdateVectorData(global_id, *doc, field_ident);\n        if (success) {\n          ++successful_updates;\n          if (!index->IsVectorCopied()) {\n            pv.SetOmitDefrag(true);\n          }\n        } else {\n          // Node not in restored HNSW graph (new doc added during full sync via journal\n          // events before index was created). Fall back to Add.\n          bool added = index->Add(global_id, *doc, field_ident);\n          if (added) {\n            ++successful_updates;\n            if (!index->IsVectorCopied()) {\n              pv.SetOmitDefrag(true);\n            }\n          } else {\n            ++failed_updates;\n          }\n        }\n      }\n    }\n\n    // Yield periodically to avoid blocking the fiber\n    if (++processed % 1000 == 0) {\n      util::ThisFiber::Yield();\n    }\n  }\n\n  // Remove HNSW nodes for documents that no longer exist in DB (deleted before or during\n  // restoration). Without this, stale nodes remain in the graph with no vector data, causing\n  // inconsistent KNN search results compared to the master.\n  // Re-validate each entry: concurrent fibers may have freed and reused the DocId.\n  for (const auto& [key, local_id, global_id] : missing_doc_ids) {\n    for (const auto& [field_ident, field_info] : GetIndexedHnswFields(base_->schema)) {\n      if (auto index = GlobalHnswIndexRegistry::Instance().Get(index_name, field_info.short_name);\n          index) {\n        index->Remove(global_id);\n      }\n    }\n    // Only remove from key_index_ if the mapping still matches the snapshot.\n    if (key_index_.Find(key) == local_id) {\n      key_index_.Remove(local_id);\n    }\n  }\n\n  // Log summary of vector restoration\n  size_t total_docs = doc_keys_snapshot.size();\n  if (failed_updates > 0 || missing_documents > 0) {\n    LOG(WARNING) << \"Restored vectors for index \" << index_name << \": \" << successful_updates\n                 << \" successful, \" << failed_updates << \" failed (missing vector field), \"\n                 << missing_documents << \" missing documents out of \" << total_docs << \" total\";\n  } else {\n    VLOG(1) << \"Restored vectors for index \" << index_name << \": \" << successful_updates << \"/\"\n            << total_docs << \" documents\";\n  }\n\n  // Drain pending vector updates that arrived via journal during the LOADING window.\n  // Clear the flag BEFORE draining so that AddDoc/AddDocToGlobalVectorIndex work normally.\n  is_restoring_vectors_ = false;\n\n  if (!pending_vector_updates_.empty()) {\n    LOG(INFO) << \"Draining \" << pending_vector_updates_.size()\n              << \" pending vector updates for index '\" << index_name << \"' on shard \"\n              << EngineShard::tlocal()->shard_id();\n\n    for (const auto& key : pending_vector_updates_) {\n      auto local_id = key_index_.Find(key);\n      auto it = db_slice.FindMutable(op_args.db_cntx, key, base_->GetObjCode());\n\n      if (it && IsValid(it->it)) {\n        // Key exists in DB — ensure it's properly indexed with current data.\n        PrimeValue& pv = it->it->second;\n\n        if (local_id) {\n          // Already in key_index_ (from snapshot). Remove old HNSW node and re-add\n          // with current vector data to match master state.\n          RemoveFromAllHnswIndices(*local_id);\n          AddDocToGlobalVectorIndex(*local_id, op_args.db_cntx, &pv);\n        } else {\n          // New document not in key_index_ (added during full sync).\n          auto doc_id = AddDoc(key, op_args.db_cntx, pv);\n          if (doc_id) {\n            AddDocToGlobalVectorIndex(*doc_id, op_args.db_cntx, &pv);\n          }\n        }\n      } else if (local_id) {\n        // Key absent from DB — remove stale HNSW node and key_index_ entry.\n        RemoveFromAllHnswIndices(*local_id);\n        key_index_.Remove(*local_id);\n      }\n    }\n    pending_vector_updates_.clear();\n  }\n}\n\nShardDocIndex::SerializedEntryWithKey ShardDocIndex::SerializeDocWithKey(\n    search::DocId id, const OpArgs& op_args, const search::Schema& schema,\n    const std::optional<std::vector<FieldReference>>& return_fields) {\n  auto entry = LoadEntry(id, op_args);\n  if (entry) {\n    if (return_fields) {\n      return std::optional<std::pair<std::string_view, SearchDocData>>{\n          std::make_pair(entry->first, entry->second->Serialize(schema, *return_fields))};\n    } else {\n      return std::optional<std::pair<std::string_view, SearchDocData>>{\n          std::make_pair(entry->first, entry->second->Serialize(schema))};\n    }\n  }\n  return std::nullopt;\n}\n\nbool ShardDocIndex::Matches(string_view key, unsigned obj_code) const {\n  return base_->Matches(key, obj_code);\n}\n\noptional<ShardDocIndex::LoadedEntry> ShardDocIndex::LoadEntry(DocId id,\n                                                              const OpArgs& op_args) const {\n  auto& db_slice = op_args.GetDbSlice();\n  string_view key = key_index_.Get(id);\n  auto it = db_slice.FindReadOnly(op_args.db_cntx, key, base_->GetObjCode());\n  if (!it || !IsValid(*it))\n    return std::nullopt;\n\n  return {{key, GetAccessor(op_args.db_cntx, (*it)->second)}};\n}\n\nvector<search::SortableValue> ShardDocIndex::KeepTopKSorted(vector<DocId>* ids, size_t limit,\n                                                            const SearchParams::SortOption& sort,\n                                                            const OpArgs& op_args) const {\n  DCHECK_GT(limit, 0u) << \"Limit=0 still has O(ids->size()) complexity\";\n\n  auto comp = [order = sort.order](const auto& lhs, const auto& rhs) {\n    return order == SortOrder::ASC ? lhs < rhs : lhs > rhs;\n  };\n  // Priority queue keeps top-k values in reverse order (to compare against top - worst value)\n  using QPair = std::pair<search::SortableValue, DocId>;\n  std::priority_queue<QPair, std::vector<QPair>, decltype(comp)> q(comp);\n\n  // Iterate over all documents, extract sortable field and update the queue\n  for (DocId id : *ids) {\n    auto entry = LoadEntry(id, op_args);\n    if (!entry)\n      continue;\n\n    auto result = entry->second->Serialize(base_->schema, {sort.field});\n    if (result.empty())\n      continue;\n\n    // Check if the extracted value is better than the worst (q.top())\n    if (q.size() < limit || comp(result.begin()->second, q.top().first)) {\n      if (q.size() >= limit)\n        q.pop();\n      q.emplace(std::move(result.begin()->second), id);\n    }\n  }\n\n  // Reorder ids and collect scores\n  vector<search::SortableValue> out(q.size());\n  for (int i = 0; !q.empty(); i++) {\n    auto [v, id] = q.top();\n    (*ids)[i] = id;\n    out[i] = std::move(v);\n    q.pop();\n  }\n  return out;\n}\n\nSearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& params,\n                                   search::SearchAlgorithm* search_algo,\n                                   bool is_knn_prefilter) const {\n  size_t limit = params.limit_offset + params.limit_total;\n\n  // If we don't sort the documents, we don't need to copy more ids than are requested\n  // Also for HNSW KNN search we don't cut results at the search stage.\n  bool can_cut = !params.sort_option && !search_algo->GetKnnScoreSortOption() && !is_knn_prefilter;\n  size_t id_cutoff_limit = can_cut ? limit : numeric_limits<size_t>::max();\n\n  auto result = search_algo->Search(&*indices_, id_cutoff_limit);\n  if (!result.error.empty())\n    return {facade::ErrorReply(std::move(result.error))};\n\n  if (limit == 0)\n    return {result.total, {}, std::move(result.profile)};\n\n  // Tune sort for KNN: Skip if it's on the knn field, otherwise extend the limit if needed\n  bool skip_sort = false;\n  if (auto ko = search_algo->GetKnnScoreSortOption(); ko) {\n    skip_sort = !params.sort_option || params.sort_option->IsSame(*ko);\n    if (!skip_sort)\n      limit = max(limit, ko->limit);\n  }\n\n  // We don't apply limit if this is prefilter HNSW KNN search\n  if (is_knn_prefilter) {\n    limit = std::numeric_limits<size_t>::max();\n  }\n\n  auto return_fields = params.return_fields.value_or(vector<FieldReference>{});\n\n  // Apply SORTBY\n  // TODO(vlad): Write profiling up to here\n  vector<search::SortableValue> sort_scores;\n  if (params.sort_option && !skip_sort) {\n    const auto& so = *params.sort_option;\n    auto fident = so.field.Identifier(base_->schema, false);\n    if (IsSortableField(fident, base_->schema)) {\n      auto* idx = indices_->GetSortIndex(fident);\n      sort_scores = idx->Sort(&result.ids, limit, so.order == SortOrder::DESC);\n    } else {\n      sort_scores = KeepTopKSorted(&result.ids, limit, so, op_args);\n      // KeepTopKSorted only fills the first sort_scores.size() entries of result.ids;\n      // trim the rest to avoid out-of-bounds access on sort_scores in the loop below.\n      if (!sort_scores.empty())\n        result.ids.resize(sort_scores.size());\n      if (params.ShouldReturnAllFields())\n        return_fields.push_back(so.field);\n    }\n\n    // If we sorted with knn_scores present, rearrange them\n    if (!sort_scores.empty() && !result.knn_scores.empty()) {\n      unordered_map<DocId, size_t> score_lookup(result.knn_scores.begin(), result.knn_scores.end());\n      for (size_t i = 0; i < min(limit, result.ids.size()); i++)\n        result.knn_scores[i] = {result.ids[i], score_lookup[result.ids[i]]};\n    }\n  }\n\n  // Cut off unnecessary items\n  result.ids.resize(min(result.ids.size(), limit));\n\n  // Serialize documents\n  vector<SerializedSearchDoc> out;\n  out.reserve(min(limit, result.ids.size()));\n\n  size_t expired_count = 0;\n  for (size_t i = 0; i < result.ids.size(); i++) {\n    float knn_score = result.knn_scores.empty() ? 0 : result.knn_scores[i].second;\n    auto sort_score = sort_scores.empty() ? std::monostate{} : std::move(sort_scores[i]);\n\n    // Don't load entry if we need only its key. Ignore expiration.\n    if (params.IdsOnly()) {\n      string_view key = key_index_.Get(result.ids[i]);\n      out.push_back({result.ids[i], string{key}, {}, knn_score, sort_score});\n      continue;\n    }\n\n    auto entry = LoadEntry(result.ids[i], op_args);\n    if (!entry) {\n      expired_count++;\n      continue;\n    }\n\n    auto& [key, accessor] = *entry;\n\n    // Load all specified fields from document\n    SearchDocData fields{};\n    if (params.ShouldReturnAllFields())\n      fields = accessor->Serialize(base_->schema);\n\n    auto more_fields = accessor->Serialize(base_->schema, return_fields);\n    fields.insert(make_move_iterator(more_fields.begin()), make_move_iterator(more_fields.end()));\n    out.push_back({result.ids[i], string{key}, std::move(fields), knn_score, sort_score});\n  }\n\n  return {result.total - expired_count, std::move(out), std::move(result.profile)};\n}\n\nvector<SearchDocData> ShardDocIndex::SearchForAggregator(\n    const OpArgs& op_args, const AggregateParams& params,\n    search::SearchAlgorithm* search_algo) const {\n  auto search_results = search_algo->Search(&*indices_);\n\n  if (!search_results.error.empty())\n    return {};\n\n  auto [fields_to_load, sort_indicies] =\n      PreprocessAggregateFields(base_->schema, params, params.load_fields);\n\n  vector<absl::flat_hash_map<string, search::SortableValue>> out;\n  for (DocId doc : search_results.ids) {\n    auto entry = LoadEntry(doc, op_args);\n    if (!entry)\n      continue;\n    auto& [_, accessor] = *entry;\n\n    SearchDocData extracted_sort_indicies;\n    extracted_sort_indicies.reserve(sort_indicies.size());\n    for (const auto& [fident, fname] : sort_indicies) {\n      extracted_sort_indicies[fname] = indices_->GetSortIndexValue(doc, fident);\n    }\n\n    SearchDocData loaded = accessor->Serialize(base_->schema, fields_to_load);\n\n    out.emplace_back(make_move_iterator(extracted_sort_indicies.begin()),\n                     make_move_iterator(extracted_sort_indicies.end()));\n    out.back().insert(make_move_iterator(loaded.begin()), make_move_iterator(loaded.end()));\n  }\n\n  return out;\n}\n\njoin::Vector<join::OwnedEntry> ShardDocIndex::PreagregateDataForJoin(\n    const OpArgs& op_args, absl::Span<const std::string_view> join_fields,\n    search::SearchAlgorithm* search_algo) const {\n  auto search_results = search_algo->Search(&*indices_);\n\n  const size_t fields_count = join_fields.size();\n  const auto [basic_fields, is_sortable_field] = GetBasicFields(join_fields, base_->schema);\n\n  join::Vector<join::OwnedEntry> result;\n  result.reserve(search_results.ids.size());\n\n  const ShardId shard_id = op_args.shard->shard_id();\n  for (DocId doc : search_results.ids) {\n    auto entry = LoadEntry(doc, op_args);\n    if (!entry)\n      continue;\n\n    auto& [key, accessor] = *entry;\n\n    SearchDocData loaded_basic_fields = accessor->Serialize(base_->schema, basic_fields);\n\n    bool insert_key = true;\n    join::Vector<join::OwnedJoinableValue> join_fields_values(fields_count);\n    for (size_t i = 0; i < fields_count; ++i) {\n      search::SortableValue value;\n      if (is_sortable_field[i]) {\n        value = indices_->GetSortIndexValue(doc, join_fields[i]);\n      } else {\n        value = loaded_basic_fields[join_fields[i]];\n      }\n\n      auto copy = [&](auto&& v) {\n        using T = std::decay_t<decltype(v)>;\n        if constexpr (!std::is_same_v<T, std::monostate>) {\n          join_fields_values[i] = v;\n        } else {\n          // If the value is nil, we skip this key\n          insert_key = false;\n        }\n      };\n\n      std::visit(std::move(copy), value);\n    }\n\n    if (insert_key) {\n      result.emplace_back(std::piecewise_construct, std::forward_as_tuple(shard_id, doc),\n                          std::forward_as_tuple(std::make_move_iterator(join_fields_values.begin()),\n                                                std::make_move_iterator(join_fields_values.end())));\n    }\n  }\n\n  return result;\n}\n\nShardDocIndex::FieldsValuesPerDocId ShardDocIndex::LoadKeysData(\n    const OpArgs& op_args, const absl::flat_hash_set<search::DocId>& doc_ids,\n    absl::Span<const std::string_view> fields_to_load) const {\n  const size_t fields_count = fields_to_load.size();\n  const auto [basic_fields, is_sortable_field] = GetBasicFields(fields_to_load, base_->schema);\n\n  FieldsValuesPerDocId result;\n  result.reserve(doc_ids.size());\n\n  for (DocId doc : doc_ids) {\n    auto entry = LoadEntry(doc, op_args);\n    if (!entry)\n      continue;\n\n    auto& [key, accessor] = *entry;\n\n    SearchDocData loaded_basic_fields = accessor->Serialize(base_->schema, basic_fields);\n\n    FieldsValues fields_values(fields_count);\n    for (size_t i = 0; i < fields_count; ++i) {\n      if (is_sortable_field[i]) {\n        fields_values[i] = indices_->GetSortIndexValue(doc, fields_to_load[i]);\n      } else {\n        fields_values[i] = loaded_basic_fields[fields_to_load[i]];\n      }\n    }\n\n    result.emplace(std::piecewise_construct, std::forward_as_tuple(doc),\n                   std::forward_as_tuple(std::make_move_iterator(fields_values.begin()),\n                                         std::make_move_iterator(fields_values.end())));\n  }\n\n  return result;\n}\n\nDocIndexInfo ShardDocIndex::GetInfo() const {\n  return {.base_index = *base_,\n          .num_docs = key_index_.Size(),\n          .indexing = bool(builder_),\n          .percent_indexed = bool(builder_) ? 0.5f : 1.0f,  // no estimation for now\n          .hnsw_metadata = nullopt};\n}\n\nio::Result<StringVec, ErrorReply> ShardDocIndex::GetTagVals(string_view field) const {\n  search::BaseIndex* base_index = indices_->GetIndex(field);\n  if (base_index == nullptr) {\n    return make_unexpected(ErrorReply{\"-No such field\"});\n  }\n\n  search::TagIndex* tag_index = dynamic_cast<search::TagIndex*>(base_index);\n  if (tag_index == nullptr) {\n    return make_unexpected(ErrorReply{\"-Not a tag field\"});\n  }\n\n  return tag_index->GetTerms();\n}\n\nShardDocIndices::ShardDocIndices() : local_mr_{ServerState::tlocal()->data_heap()} {\n  InitTLSearchMR(&local_mr_);\n}\n\nShardDocIndex* ShardDocIndices::GetIndex(string_view name) {\n  auto it = indices_.find(name);\n  return it != indices_.end() ? it->second.get() : nullptr;\n}\n\nvoid ShardDocIndices::InitIndex(const OpArgs& op_args, std::string_view name,\n                                shared_ptr<const DocIndex> index_ptr) {\n  auto shard_index = make_unique<ShardDocIndex>(std::move(index_ptr));\n  auto [it, _] = indices_.emplace(name, std::move(shard_index));\n\n  // Don't build while loading, shutting down, etc.\n  // After loading, indices are rebuilt separately\n  if (ServerState::tlocal()->gstate() == GlobalState::ACTIVE)\n    it->second->Rebuild(op_args, &local_mr_);\n\n  op_args.GetDbSlice().SetDocDeletionCallback(\n      [this](string_view key, const DbContext& cntx, const PrimeValue& pv) {\n        RemoveDoc(key, cntx, pv);\n      });\n}\n\nunique_ptr<ShardDocIndex> ShardDocIndices::DropIndex(string_view name) {\n  auto it = indices_.find(name);\n  if (it == indices_.end())\n    return nullptr;\n\n  DropIndexCache(*it->second);\n  auto index = std::move(it->second);\n  indices_.erase(it);\n\n  return index;\n}\n\nvoid ShardDocIndices::DropAllIndices() {\n  for (const auto& [_, idx] : indices_)\n    DropIndexCache(*idx);\n  indices_.clear();\n  GlobalHnswIndexRegistry::Instance().Reset();\n}\n\nvoid ShardDocIndices::DropIndexCache(const dfly::ShardDocIndex& shard_doc_index) {\n  auto info = shard_doc_index.GetInfo();\n  for (const auto& [fident, field] : info.base_index.schema.fields)\n    JsonAccessor::RemoveFieldFromCache(fident);\n}\n\nvoid ShardDocIndices::RebuildAllIndices(const OpArgs& op_args, bool is_restored) {\n  for (auto& [index_name, ptr] : indices_) {\n    // Only use the restore path for indices that have populated key mappings.\n    // When shard counts differ, PerformPostLoad remaps the mappings; if remapping fails,\n    // the mappings are removed so the index falls back to full rebuild here.\n    bool index_restored = is_restored && ptr->key_index_.Size() > 0;\n    ptr->Rebuild(op_args, &local_mr_, index_restored);\n  }\n}\n\nvoid ShardDocIndices::BlockUntilConstructionEnd() {\n  bool indexing = false;\n  do {\n    indexing = false;\n    for (const auto& [_, ptr] : indices_)\n      indexing |= ptr->GetInfo().indexing;\n\n    if (indexing)\n      util::ThisFiber::SleepFor(5ms);\n  } while (indexing);\n}\n\nvector<string> ShardDocIndices::GetIndexNames() const {\n  vector<string> names{};\n  names.reserve(indices_.size());\n  for (const auto& [name, ptr] : indices_)\n    names.push_back(name);\n  return names;\n}\n\nvoid ShardDocIndices::AddDoc(string_view key, const DbContext& db_cntx, PrimeValue* pv) {\n  DCHECK(IsIndexedKeyType(*pv));\n  for (auto& [index_name, index] : indices_) {\n    if (index->Matches(key, pv->ObjType())) {\n      std::optional<search::DocId> doc_id = index->AddDoc(key, db_cntx, *pv);\n      if (doc_id) {\n        index->AddDocToGlobalVectorIndex(*doc_id, db_cntx, pv);\n      }\n    }\n  }\n}\n\nvoid ShardDocIndices::RemoveDoc(string_view key, const DbContext& db_cntx, const PrimeValue& pv) {\n  DCHECK(IsIndexedKeyType(pv));\n  for (auto& [index_name, index] : indices_) {\n    if (index->Matches(key, pv.ObjType())) {\n      std::optional<search::DocId> doc_id = index->GetDocId(key, db_cntx);\n      if (doc_id) {\n        index->RemoveDocFromGlobalVectorIndex(*doc_id, db_cntx, pv);\n        index->RemoveDoc(*doc_id, db_cntx, pv);\n      }\n    }\n  }\n}\n\nsize_t ShardDocIndices::GetUsedMemory() const {\n  return local_mr_.used();\n}\n\nSearchStats ShardDocIndices::GetStats() const {\n  size_t total_entries = 0;\n  for (const auto& [_, index] : indices_)\n    total_entries += index->GetInfo().num_docs;\n\n  return {GetUsedMemory(), indices_.size(), total_entries};\n}\n\nsearch::DefragmentResult ShardDocIndices::Defragment(PageUsage* page_usage) {\n  // In case of resumed defragmentation, iteration order may change in case there were insertions\n  // after the last defragment operation completed, so there is no guarantee that an entry will only\n  // be defragmented once per cycle. This will only happen in case of a new index being added\n  // though, so it is an acceptable anomaly.\n  search::DefragmentMap dm{indices_, &next_defrag_index_};\n  return dm.Defragment(page_usage);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/search/doc_index.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n\n#include <memory>\n#include <optional>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include \"base/pmr/memory_resource.h\"\n#include \"core/mi_memory_resource.h\"\n#include \"core/search/base.h\"\n#include \"core/search/hnsw_index.h\"\n#include \"core/search/search.h\"\n#include \"core/search/synonyms.h\"\n#include \"server/search/aggregator.h\"\n#include \"server/search/index_join.h\"\n#include \"server/stats.h\"\n#include \"server/table.h\"\n\nnamespace dfly {\n\nusing StringVec = std::vector<std::string>;\n\nnamespace search {\nstruct IndexBuilder;\n}  // namespace search\n\nstruct BaseAccessor;\n\nusing SearchDocData = absl::flat_hash_map<std::string /*field*/, search::SortableValue /*value*/>;\nusing Synonyms = search::Synonyms;\n\nstd::string_view SearchFieldTypeToString(search::SchemaField::FieldType);\n\nstruct SerializedSearchDoc {\n  search::DocId id;\n  std::string key;\n  SearchDocData values;\n  float knn_score;\n  search::SortableValue sort_score;\n};\n\nstruct SearchResult {\n  SearchResult() = default;\n\n  SearchResult(size_t total_hits, std::vector<SerializedSearchDoc> docs,\n               std::optional<search::AlgorithmProfile> profile)\n      : total_hits{total_hits}, docs{std::move(docs)}, profile{std::move(profile)} {\n  }\n\n  SearchResult(facade::ErrorReply error) : error{std::move(error)} {\n  }\n\n  size_t total_hits;\n  std::vector<SerializedSearchDoc> docs;\n  std::optional<search::AlgorithmProfile> profile;\n\n  std::optional<facade::ErrorReply> error;\n};\n\n// Field reference with optional alias as parsed from RETURN [field AS alias], LOAD, etc...\nstruct FieldReference {\n  explicit FieldReference(std::string_view name, std::string_view alias = \"\")\n      : name_{name}, alias_{alias} {\n  }\n\n  std::string_view Identifier(const search::Schema& schema, bool is_json) const {\n    return (is_json && IsJsonPath(name_)) ? name_ : schema.LookupAlias(name_);\n  }\n\n  std::string_view Name() const {\n    return name_;\n  }\n\n  std::string_view OutputName() const {\n    return alias_.empty() ? name_ : alias_;\n  }\n\n private:\n  static bool IsJsonPath(std::string_view name);\n\n  std::string_view name_, alias_;\n};\n\nenum class SortOrder { ASC, DESC };\n\nstruct SearchParams {\n  struct SortOption {\n    FieldReference field;\n    SortOrder order = SortOrder::ASC;\n\n    bool IsSame(const search::KnnScoreSortOption& knn_sort) const {\n      return knn_sort.score_field_alias == field.OutputName();\n    }\n  };\n\n  // Parameters for \"LIMIT offset total\": select total amount documents with a specific offset from\n  // the whole result set\n  size_t limit_offset = 0;\n  size_t limit_total = 10;\n\n  bool with_sortkeys = false;\n\n  /*\n  1. If not set -> return all fields\n  2. If set but empty -> no fields should be returned\n  3. If set and not empty -> return only these fields\n  */\n  std::optional<std::vector<FieldReference>> return_fields;\n\n  /*\n    Fields that should be also loaded from the document.\n\n    Only one of load_fields and return_fields should be set.\n  */\n  std::optional<std::vector<FieldReference>> load_fields;\n\n  std::optional<SortOption> sort_option;\n\n  search::OptionalFilters optional_filters;\n\n  search::QueryParams query_params;\n\n  bool ShouldReturnAllFields() const {\n    return !return_fields.has_value();\n  }\n\n  bool IdsOnly() const {\n    return return_fields && return_fields->empty();\n  }\n\n  bool ShouldReturnField(std::string_view alias) const;\n};\n\nstruct AggregateParams {\n  struct JoinParams {\n    // Fist field is the index name, second is the field name.\n    using Field = std::pair<std::string, std::string>;\n\n    struct Condition {\n      Condition(std::string_view field_, std::string_view foreign_index_,\n                std::string_view foreign_field_)\n          : field{field_}, foreign_field{Field{foreign_index_, foreign_field_}} {\n      }\n\n      std::string field;\n      Field foreign_field;\n    };\n\n    std::string index;\n    std::string index_alias;\n    std::vector<Condition> conditions;\n    std::string query = \"*\";\n  };\n\n  /* Can have 2 scenarios:\n      1. No joins - then this is ignored\n      2. Has joins and SORTBY ... LIMIT option - then this is used to sort/limit right after join\n      3. Has joins and LIMIT option - then this is used to limit right after join.\n     Next aggregation steps after first LIMIT or first SORTBY will be applied on the final result,\n     after loading the data for all joined documents. */\n  struct JoinAggregateParams {\n    static constexpr size_t kDefaultLimit = std::numeric_limits<size_t>::max();\n\n    bool HasLimit() const {\n      return limit_total != kDefaultLimit;\n    }\n\n    bool HasValue() const {\n      return HasLimit() || sort.has_value();\n    }\n\n    size_t limit_offset = 0;\n    size_t limit_total = kDefaultLimit;\n    std::optional<aggregate::SortParams> sort;\n  };\n\n  std::string_view index, query;\n  search::QueryParams params;\n\n  std::vector<JoinParams> joins;\n  JoinAggregateParams join_agg_params;\n\n  std::optional<std::vector<FieldReference>> load_fields;\n  std::vector<aggregate::AggregationStep> steps;\n};\n\n// Stores basic info about a document index.\nstruct DocIndex {\n  enum DataType : uint8_t { HASH, JSON };\n\n  // Get numeric OBJ_ code\n  uint8_t GetObjCode() const;\n\n  // Return true if the following document (key, obj_code) is tracked by this index.\n  bool Matches(std::string_view key, unsigned obj_code) const;\n\n  std::string name;\n  search::Schema schema;\n  search::IndicesOptions options;\n  std::vector<std::string> prefixes;\n  DataType type{HASH};\n};\n\nstruct DocIndexInfo {\n  DocIndex base_index;\n  size_t num_docs = 0;\n\n  bool indexing = false;\n  float percent_indexed = 1;\n\n  // HNSW metadata for vector index (if present)\n  // TODO: move to schema\n  std::optional<search::HnswIndexMetadata> hnsw_metadata = std::nullopt;\n\n  // Build original ft.create command that can be used to re-create this index\n  std::string BuildRestoreCommand() const;\n};\n\nclass ShardDocIndices;\n\n// Stores internal search indices for documents of a document index on a specific shard.\nclass ShardDocIndex {\n  friend class ShardDocIndices;\n  friend struct search::IndexBuilder;\n\n  using DocId = search::DocId;\n  using GlobalDocId = search::GlobalDocId;\n\n  // Used in FieldsValuesPerDocId to store values for each field per document\n  using FieldsValues = absl::InlinedVector<search::SortableValue, 4>;\n\n  // DocKeyIndex manages mapping document keys to ids and vice versa through a simple interface.\n  struct DocKeyIndex {\n    DocId Add(std::string_view key);\n\n    // Like Add but always allocates a fresh DocId, never reusing free_ids_.\n    // Used during restored CursorLoop to avoid colliding with HNSW node ids.\n    DocId AddNew(std::string_view key);\n\n    void Remove(DocId id);\n\n    std::string_view Get(DocId id) const;\n    bool IsValid(DocId id) const;\n    std::optional<DocId> Find(std::string_view key) const;\n    size_t Size() const;\n\n    // Get const reference to the internal ids map\n    const absl::flat_hash_map<std::string, DocId>& GetDocKeysMap() const {\n      return ids_;\n    }\n\n    // Serialization: returns pairs of (key, doc_id) for all active mappings\n    std::vector<std::pair<std::string, DocId>> Serialize() const;\n\n    // Restore key-to-docId mappings from serialized data (RDB load)\n    void Restore(const std::vector<std::pair<std::string, search::DocId>>& mappings);\n\n    // Restore from remapped keys in doc_id order (vector index = doc_id).\n    void Restore(const std::vector<std::string>& keys);\n\n   private:\n    absl::flat_hash_map<std::string, DocId> ids_;\n    std::vector<std::string> keys_;\n    std::vector<DocId> free_ids_;\n    DocId last_id_ = 0;\n  };\n\n public:\n  // Index must be rebuilt at least once after intialization\n  explicit ShardDocIndex(std::shared_ptr<const DocIndex> index);\n\n  // Possibly blocking to stop indexing job\n  ~ShardDocIndex();\n\n  // Perform search on all indexed documents and return results.\n  SearchResult Search(const OpArgs& op_args, const SearchParams& params,\n                      search::SearchAlgorithm* search_algo, bool is_knn_prefilter) const;\n\n  // Perform search and load requested values - note params might be interpreted differently.\n  std::vector<SearchDocData> SearchForAggregator(const OpArgs& op_args,\n                                                 const AggregateParams& params,\n                                                 search::SearchAlgorithm* search_algo) const;\n\n  // Methods needed for join operation\n  join::Vector<join::OwnedEntry> PreagregateDataForJoin(\n      const OpArgs& op_args, absl::Span<const std::string_view> join_fields,\n      search::SearchAlgorithm* search_algo) const;\n\n  using FieldsValuesPerDocId = absl::flat_hash_map<DocId, FieldsValues>;\n  FieldsValuesPerDocId LoadKeysData(const OpArgs& op_args,\n                                    const absl::flat_hash_set<search::DocId>& doc_ids,\n                                    absl::Span<const std::string_view> fields_to_load) const;\n\n  // Return whether base index matches\n  bool Matches(std::string_view key, unsigned obj_code) const;\n\n  std::optional<ShardDocIndex::DocId> GetDocId(std::string_view key, const DbContext& db_cntx);\n\n  std::optional<ShardDocIndex::DocId> AddDoc(std::string_view key, const DbContext& db_cntx,\n                                             const PrimeValue& pv);\n\n  void RemoveDoc(DocId id, const DbContext& db_cntx, const PrimeValue& pv);\n\n  DocIndexInfo GetInfo() const;\n\n  io::Result<StringVec, facade::ErrorReply> GetTagVals(std::string_view field) const;\n\n  // Get synonym manager for this shard\n  const Synonyms& GetSynonyms() const {\n    return synonyms_;\n  }\n\n  Synonyms& GetSynonyms() {\n    return synonyms_;\n  }\n\n  // Rebuild indices only for documents containing terms from the updated synonym group\n  void RebuildForGroup(const OpArgs& op_args, const std::string_view& group_id,\n                       const std::vector<std::string_view>& terms);\n\n  // Public access to key index for direct operations (e.g., when dropping index with DD)\n  // TODO: replace with keys() view\n  const DocKeyIndex& key_index() const {\n    return key_index_;\n  }\n\n  void AddDocToGlobalVectorIndex(ShardDocIndex::DocId doc_id, const DbContext& db_cntx,\n                                 PrimeValue* pv);\n  void RemoveDocFromGlobalVectorIndex(ShardDocIndex::DocId doc_id, const DbContext& db_cntx,\n                                      const PrimeValue& pv);\n\n  // Rebuild global vector indices from restored key index, updating vector data\n  // for nodes whose graph structure was already restored from RDB.\n  void RestoreGlobalVectorIndices(std::string_view index_name, const OpArgs& op_args);\n\n  // Serialize doc and return with key name\n  using SerializedEntryWithKey = std::optional<std::pair<std::string_view, SearchDocData>>;\n  SerializedEntryWithKey SerializeDocWithKey(\n      search::DocId id, const OpArgs& op_args, const search::Schema& schema,\n      const std::optional<std::vector<FieldReference>>& return_fields);\n\n  search::DefragmentResult Defragment(PageUsage* page_usage) {\n    if (indices_) {\n      return indices_->Defragment(page_usage);\n    }\n    return search::DefragmentResult{false, 0};\n  }\n\n  std::vector<std::pair<std::string, DocId>> SerializeKeyIndex() const {\n    return key_index_.Serialize();\n  }\n\n  // Restore key-to-docId mappings from serialized data (RDB load)\n  void RestoreKeyIndex(const std::vector<std::pair<std::string, search::DocId>>& mappings) {\n    key_index_.Restore(mappings);\n  }\n\n  // Restore from remapped keys in doc_id order (vector index = doc_id).\n  void RestoreKeyIndex(const std::vector<std::string>& keys) {\n    key_index_.Restore(keys);\n  }\n\n private:\n  // Clears internal data. Traverses all matching documents and assigns ids.\n  void Rebuild(const OpArgs& op_args, PMR_NS::memory_resource* mr, bool is_restored = false);\n\n  // Cancel builder if in progress\n  void CancelBuilder();\n\n  using LoadedEntry = std::pair<std::string_view, std::unique_ptr<BaseAccessor>>;\n  std::optional<LoadedEntry> LoadEntry(search::DocId id, const OpArgs& op_args) const;\n\n  // Behaviour identical to SortIndex::Sort for non-sortable fields that need to be fetched first\n  std::vector<search::SortableValue> KeepTopKSorted(std::vector<DocId>* ids, size_t limit,\n                                                    const SearchParams::SortOption& sort,\n                                                    const OpArgs& op_args) const;\n\n  // Remove a DocId from all HNSW indices for this index.\n  void RemoveFromAllHnswIndices(search::DocId doc_id);\n\n private:\n  std::shared_ptr<const DocIndex> base_;\n  std::optional<search::FieldIndices> indices_;\n  DocKeyIndex key_index_;\n  Synonyms synonyms_;\n\n  std::unique_ptr<search::IndexBuilder> builder_;\n\n  // Buffered state for journal events arriving while HNSW vector indices\n  // are being restored from serialized graph data (is_restoring_vectors_ == true).\n  // Drained by RestoreGlobalVectorIndices after the graph is fully restored.\n  absl::flat_hash_set<std::string> pending_vector_updates_;\n  bool is_restoring_vectors_ = false;\n};\n\n// Stores shard doc indices by name on a specific shard.\nclass ShardDocIndices {\n public:\n  ShardDocIndices();\n\n  // Get sharded document index by its name or nullptr if not found\n  ShardDocIndex* GetIndex(std::string_view name);\n\n  // Init index: create shard local state for given index with given name.\n  // Build if instance is in active state.\n  void InitIndex(const OpArgs& op_args, std::string_view name,\n                 std::shared_ptr<const DocIndex> index);\n\n  // Drop index, return the dropped index if it existed or nullptr otherwise\n  std::unique_ptr<ShardDocIndex> DropIndex(std::string_view name);\n\n  // Drop all indices\n  void DropAllIndices();\n\n  // Rebuild all indices\n  void RebuildAllIndices(const OpArgs& op_args, bool is_restored);\n\n  // Block until construction of all indices finishes\n  void BlockUntilConstructionEnd();\n\n  std::vector<std::string> GetIndexNames() const;\n\n  /* Use AddDoc and RemoveDoc only if pv object type is json or hset */\n  void AddDoc(std::string_view key, const DbContext& db_cnt, PrimeValue* pv);\n  void RemoveDoc(std::string_view key, const DbContext& db_cnt, const PrimeValue& pv);\n\n  size_t GetUsedMemory() const;\n  SearchStats GetStats() const;  // combines stats for all indices\n\n  search::DefragmentResult Defragment(PageUsage* page_usage);\n\n private:\n  // Clean caches that might have data from this index\n  void DropIndexCache(const dfly::ShardDocIndex& shard_doc_index);\n\n private:\n  MiMemoryResource local_mr_;\n  absl::flat_hash_map<std::string, std::unique_ptr<ShardDocIndex>> indices_;\n\n  std::string next_defrag_index_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/search/doc_index_fallback.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#ifndef WITH_SEARCH\n#include \"core/page_usage/page_usage_stats.h\"\n#include \"core/search/base.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/search/index_builder.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nShardDocIndices::ShardDocIndices() : local_mr_(nullptr) {\n}\n\nvoid ShardDocIndices::AddDoc(std::string_view key, const DbContext& db_cnt, PrimeValue* pv) {\n}\nvoid ShardDocIndices::RemoveDoc(std::string_view key, const DbContext& db_cnt,\n                                const PrimeValue& pv) {\n}\n\nvoid ShardDocIndices::DropAllIndices() {\n}\nvoid ShardDocIndices::RebuildAllIndices(const OpArgs& op_args, bool is_restored) {\n}\nvoid ShardDocIndices::BlockUntilConstructionEnd() {\n}\n\nsize_t ShardDocIndices::GetUsedMemory() const {\n  return 0;\n}\nSearchStats ShardDocIndices::GetStats() const {\n  return {};\n}\n\nsearch::DefragmentResult ShardDocIndices::Defragment(PageUsage*) {\n  return search::DefragmentResult{};\n}\n\nShardDocIndex::~ShardDocIndex() {\n}\n\n}  // namespace dfly\n#endif\n"
  },
  {
    "path": "src/server/search/global_hnsw_index.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/search/global_hnsw_index.h\"\n\n#include <absl/strings/str_cat.h>\n\n#include \"base/logging.h\"\n#include \"core/search/ast_expr.h\"\n#include \"core/search/base.h\"\n#include \"core/search/index_result.h\"\n#include \"core/search/indices.h\"\n#include \"core/search/vector_utils.h\"\n#include \"server/engine_shard.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/search/doc_accessors.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/transaction.h\"\n#include \"server/tx_base.h\"\n\nnamespace dfly {\n\n// Global index registry implementation\n\nGlobalHnswIndexRegistry& GlobalHnswIndexRegistry::Instance() {\n  static GlobalHnswIndexRegistry instance;\n  return instance;\n}\n\nbool GlobalHnswIndexRegistry::Create(std::string_view index_name, std::string_view field_name,\n                                     const search::SchemaField::VectorParams& params,\n                                     DocIndex::DataType data_type) {\n  std::string key = MakeKey(index_name, field_name);\n\n  std::unique_lock<std::shared_mutex> lock(registry_mutex_);\n\n  auto it = indices_.find(key);\n\n  if (it != indices_.end())\n    return false;\n\n  // We make a copy of vector data when:\n  // 1. Data type is JSON. This is because JSON object is not represented as contiguous memory.\n  // 2. Data type is HASH and vector data memory size is smaller than threshold for listpack\n  // encoding.\n  //    We use pesimistic approach for decision and expect that ONLY VECTOR data field is used.\n  //    When HSET object is created function `IsGoodForListpack` decides if object should be encoded\n  //    as listpack or StringMap. Problem with listpack encoding is that vector memory, if\n  //    referenced, can have wrong alignment for vector distance operations.\n  const bool copy_vector =\n      (data_type == DocIndex::JSON) || (params.dim * 4 < server.max_listpack_map_bytes);\n\n  indices_[key] = std::make_shared<search::HnswVectorIndex>(params, copy_vector);\n\n  return true;\n}\n\nbool GlobalHnswIndexRegistry::Remove(std::string_view index_name, std::string_view field_name) {\n  std::string key = MakeKey(index_name, field_name);\n  std::unique_lock<std::shared_mutex> lock(registry_mutex_);\n  return bool(indices_.erase(key));\n}\n\nstd::shared_ptr<search::HnswVectorIndex> GlobalHnswIndexRegistry::Get(\n    std::string_view index_name, std::string_view field_name) const {\n  std::string key = MakeKey(index_name, field_name);\n  std::shared_lock<std::shared_mutex> lock(registry_mutex_);\n  auto it = indices_.find(key);\n  return it != indices_.end() ? it->second : nullptr;\n}\n\nbool GlobalHnswIndexRegistry::Exist(std::string_view index_name,\n                                    std::string_view field_name) const {\n  std::string key = MakeKey(index_name, field_name);\n  std::shared_lock<std::shared_mutex> lock(registry_mutex_);\n  return indices_.find(key) != indices_.end();\n}\n\nvoid GlobalHnswIndexRegistry::Reset() {\n  std::unique_lock<std::shared_mutex> lock(registry_mutex_);\n  indices_.clear();\n}\n\nabsl::flat_hash_set<std::string> GlobalHnswIndexRegistry::GetIndexNames() const {\n  std::shared_lock<std::shared_mutex> lock(registry_mutex_);\n  absl::flat_hash_set<std::string> index_names;\n  for (const auto& [key, _] : indices_) {\n    // Keys are in format \"index_name:field_name\", extract index_name.\n    // Use rfind because index names may legally contain ':' (e.g. \":Order:index\").\n    size_t pos = key.rfind(':');\n    if (pos != std::string::npos) {\n      index_names.insert(key.substr(0, pos));\n    }\n  }\n  return index_names;\n}\n\nstd::string GlobalHnswIndexRegistry::MakeKey(std::string_view index_name,\n                                             std::string_view field_name) const {\n  return absl::StrCat(index_name, \":\", field_name);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/search/global_hnsw_index.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n\n#include <memory>\n#include <shared_mutex>\n#include <string>\n#include <string_view>\n#include <vector>\n\n#include \"core/search/base.h\"\n#include \"core/search/hnsw_index.h\"\n#include \"core/search/search.h\"\n#include \"server/search/doc_index.h\"\n\nnamespace dfly {\nclass GlobalHnswIndexRegistry {\n public:\n  static GlobalHnswIndexRegistry& Instance();\n\n  bool Create(std::string_view index_name, std::string_view field_name,\n              const search::SchemaField::VectorParams& params, DocIndex::DataType data_type);\n\n  bool Remove(std::string_view index_name, std::string_view field_name);\n\n  std::shared_ptr<search::HnswVectorIndex> Get(std::string_view index_name,\n                                               std::string_view field_name) const;\n\n  bool Exist(std::string_view index_name, std::string_view field_name) const;\n\n  absl::flat_hash_map<std::string, std::shared_ptr<search::HnswVectorIndex>> GetAll() const {\n    std::shared_lock<std::shared_mutex> lock(registry_mutex_);\n    return indices_;\n  }\n\n  // Returns unique index names from all registered HNSW indices\n  absl::flat_hash_set<std::string> GetIndexNames() const;\n\n  void Reset();\n\n private:\n  GlobalHnswIndexRegistry() = default;\n  std::string MakeKey(std::string_view index_name, std::string_view field_name) const;\n\n  mutable std::shared_mutex registry_mutex_;\n  absl::flat_hash_map<std::string, std::shared_ptr<search::HnswVectorIndex>> indices_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/search/index_builder.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/search/index_builder.h\"\n\n#include <ranges>\n\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/search/doc_accessors.h\"\n#include \"server/search/global_hnsw_index.h\"\n\nnamespace dfly::search {\n\nvoid IndexBuilder::Start(const OpArgs& op_args, bool is_restored,\n                         std::function<void()> on_complete) {\n  using namespace util::fb2;\n  auto table = op_args.GetDbSlice().CopyDBTablePtr(op_args.db_cntx.db_index);\n  DCHECK(table.get());\n\n  is_restored_ = is_restored;\n\n  auto cb = [this, table, db_cntx = op_args.db_cntx, on_complete = std::move(on_complete)] {\n    CursorLoop(table.get(), db_cntx);\n    VectorLoop(table.get(), db_cntx);\n\n    // TODO: make it step by step + wire cancellation inside\n    if (state_.IsRunning())\n      index_->indices_->FinalizeInitialization();\n\n    // Finish by clearing the fiber reference and calling on_complete as its last action\n    {\n      util::FiberAtomicGuard guard{};  // preserve cancellation\n      fiber_.Detach();                 // builder is now safely deleteable\n      if (!state_.IsCancelled())\n        on_complete();\n    }\n  };\n\n  fiber_ = Fiber{std::move(cb)};\n}\n\nvoid IndexBuilder::Cancel() {\n  state_.Cancel();\n  util::fb2::Fiber{std::move(fiber_)}.JoinIfNeeded();  // steal and wait for finish\n}\n\nutil::fb2::Fiber IndexBuilder::Worker() {\n  return std::move(fiber_);\n}\n\nvoid IndexBuilder::CursorLoop(dfly::DbTable* table, DbContext db_cntx) {\n  auto cb = [this, db_cntx, scratch = std::string{}](PrimeTable::iterator it) mutable {\n    PrimeValue& pv = it->second;\n    std::string_view key = it->first.GetSlice(&scratch);\n\n    if (!index_->Matches(key, pv.ObjType()))\n      return;\n\n    // TODO: make it a parameter of SharDocIndex::AddDoc()\n    if (is_restored_) {\n      // Use existing DocIds from the restored key_index_ to keep them aligned with\n      // GlobalDocIds stored in the serialized HNSW graph. Only add to regular indices\n      // (text/tag/numeric); vector indices are handled separately by VectorLoop.\n      if (auto doc_id = index_->key_index().Find(key); doc_id) {\n        auto accessor = GetAccessor(db_cntx, pv);\n        if (!index_->indices_->Add(*doc_id, *accessor)) {\n          LOG(WARNING) << \"Failed to restore index entry for key: \" << key\n                       << \", removing from key index\";\n          index_->key_index_.Remove(*doc_id);\n        }\n      } else {\n        // New document not in the restored key_index_ (added by journal events during\n        // full sync before the index was created). Use AddNew to allocate a fresh DocId\n        // that won't collide with serialized HNSW node ids from freed slots.\n        auto accessor = GetAccessor(db_cntx, pv);\n        DocId id = index_->key_index_.AddNew(key);\n        if (!index_->indices_->Add(id, *accessor)) {\n          index_->key_index_.Remove(id);\n        }\n      }\n    } else {\n      index_->AddDoc(key, db_cntx, pv);\n    }\n  };\n\n  PrimeTable::Cursor cursor;\n  do {\n    cursor = table->prime.Traverse(cursor, cb);\n    if (base::CycleClock::ToUsec(util::ThisFiber::GetRunningTimeCycles()) > 500)\n      util::ThisFiber::Yield();\n  } while (cursor && state_.IsRunning());\n}\n\nvoid IndexBuilder::VectorLoop(dfly::DbTable* table, DbContext db_cntx) {\n  bool any_vector = std::ranges::any_of(index_->base_->schema.fields, [](const auto& item) {\n    return item.second.IsIndexableHnswField();\n  });\n  if (!any_vector || !state_.IsRunning())\n    return;\n\n  // If any HNSW index was restored from RDB, use UpdateVectorData instead of Add.\n  if (is_restored_) {\n    // TODO: Add support for concurrent modifications\n    OpArgs op_args{EngineShard::tlocal(), nullptr, db_cntx};\n    index_->RestoreGlobalVectorIndices(index_->base_->name, op_args);\n    return;\n  }\n\n  // Non-restored path: rebuilding HNSW from scratch. Clear the restoring flag and discard\n  // any pending updates — the full table traversal below will pick up all current documents.\n  index_->is_restoring_vectors_ = false;\n  index_->pending_vector_updates_.clear();\n\n  auto cb = [this, db_cntx, scratch = std::string{}](PrimeTable::iterator it) mutable {\n    PrimeValue& pv = it->second;\n    std::string_view key = it->first.GetSlice(&scratch);\n\n    if (auto local_id = index_->key_index().Find(key); local_id)\n      index_->AddDocToGlobalVectorIndex(*local_id, db_cntx, &pv);\n  };\n\n  // Because order of acquiring mutexes for global vector indices is not determined, we must run\n  // all accesses on a single thread through the shard queue to have a single linear order\n  // TODO: this prevents asynchronous indexing for vector fields\n  auto shard_cb = [&] {\n    PrimeTable::Cursor cursor;\n    do {\n      cursor = table->prime.Traverse(cursor, cb);\n      if (base::CycleClock::ToUsec(util::ThisFiber::GetRunningTimeCycles()) > 500)\n        util::ThisFiber::Yield();\n    } while (cursor && state_.IsRunning());\n  };\n  shard_set->Await(EngineShard::tlocal()->shard_id(), std::move(shard_cb));\n}\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/server/search/index_builder.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <functional>\n\n#include \"server/execution_state.h\"\n#include \"server/tx_base.h\"\n\nnamespace dfly {\nstruct DbTable;\nclass ShardDocIndex;\n}  // namespace dfly\n\nnamespace dfly::search {\n\n// Asynchronous index builder\nstruct IndexBuilder {\n  explicit IndexBuilder(ShardDocIndex* index) : index_{index} {\n  }\n\n  // Start building and call `on_complete` on finish from worker fiber.\n  // If `is_restored` is true, VectorLoop will use UpdateVectorData instead of Add\n  // for HNSW indices (restored from RDB). This flag is passed from PerformPostLoad.\n  void Start(const OpArgs& op_args, bool is_restored, std::function<void()> on_complete);\n\n  // Cancel building and wait for worker to finish. Safe to delete after\n  // TODO: Maybe implement nonblocking version?\n  void Cancel();\n\n  // Get fiber reference. Temporary to polyfill sync construction places\n  util::fb2::Fiber Worker();\n\n private:\n  // Loop with cursor over table and add entries to regular index\n  void CursorLoop(DbTable* table, DbContext db_cntx);\n\n  // Loop with cursor over table and add entries to global HNSW vector indices\n  void VectorLoop(DbTable* table, DbContext db_cntx);\n\n  dfly::ExecutionState state_;\n  ShardDocIndex* index_;\n  bool is_restored_ = false;\n  util::fb2::Fiber fiber_;\n};\n\n}  // namespace dfly::search\n"
  },
  {
    "path": "src/server/search/index_join.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/search/index_join.h\"\n\nnamespace dfly::join {\n\nnamespace {\n// Joins joined_entries with new index entries using join_expressions.\n// It uses hash joining algorithm to find matching entries.\nstd::vector<KeyIndexes> JoinWithNewIndex(\n    EntriesPerIndex indexes_entries, absl::Span<const KeyIndexes> joined_entries,\n    size_t new_index,  // represented as index in indexes_entries\n    absl::Span<const JoinExpression> join_expressions) {\n  /* We fill join_map with values sets from joined entries.\n     In join_map we store {set of field values} to indexes in joined_entries that match this set of\n     field values. So, then we can go over new_index entries and match their values with\n     joined_entries using this.\n     TODO: use hash map for the smallest set (new_index or joined_entries) */\n  using ValuesSet = Vector<JoinableValue>;\n  using JoinEntriesIndexes = absl::InlinedVector<size_t, 1>;\n  absl::flat_hash_map<ValuesSet, JoinEntriesIndexes> join_map;\n  join_map.reserve(joined_entries.size());\n\n  // Now we need to initialize join_map with values of joined entries.\n  for (size_t i = 0; i < joined_entries.size(); ++i) {\n    const auto& joined_entry_keys = joined_entries[i];\n\n    ValuesSet values_set;\n    values_set.reserve(join_expressions.size());\n\n    // Go over all join expressions and get field values using foreign index and field.\n    for (const auto& join_expression : join_expressions) {\n      size_t index = join_expression.foreign_index;\n      size_t field_index = join_expression.foreign_field;\n\n      // Now we need to get value of this field from joined key in this index\n      DCHECK_LT(index, joined_entry_keys.size()) << \"Join order broken, index out of range\";\n      KeyIndex key_index = joined_entry_keys[index];\n      const JoinableValue& field_value = indexes_entries[index][key_index].second[field_index];\n\n      // Add value to the set\n      values_set.push_back(field_value);\n    }\n\n    // That means that this set of values corresponds to joined entry i\n    join_map[values_set].push_back(i);\n  }\n\n  std::vector<KeyIndexes> result;\n  result.reserve(join_map.size());\n\n  // Now we store all possible sets of values in joined_entries that match this set.\n  // We can iterate over new index and find entries with the same set of values.\n  const auto& new_index_entries = indexes_entries[new_index];\n  for (size_t i = 0; i < new_index_entries.size(); ++i) {\n    const auto& index_entries = new_index_entries[i].second;\n\n    ValuesSet values_set;\n    values_set.reserve(join_expressions.size());\n    // Go over all join expressions and get field values for this entry\n    for (const auto& join_expression : join_expressions) {\n      const JoinableValue& field_value = index_entries[join_expression.field];\n      values_set.push_back(field_value);\n    }\n\n    // Now we need to find this set in the join_map\n    auto it = join_map.find(values_set);\n    if (it == join_map.end()) {\n      continue;\n    }\n\n    // This entry in new index matches some joined entries,\n    // we need to go over all entries with the same set of values\n    // and add them to the result\n    for (size_t joined_entry_index : it->second) {\n      result.push_back(joined_entries[joined_entry_index]);\n      // Add new index entry to the joined entry\n      result.back().push_back(i);\n    }\n  }\n\n  return result;\n}\n\n}  // anonymous namespace\n\nVector<Vector<Key>> JoinAllIndexes(\n    EntriesPerIndex indexes_entries, IndexesJoinExpressions joins,\n    absl::FunctionRef<void(std::vector<KeyIndexes>*)> aggregate_after_join) {\n  if (indexes_entries.empty()) {\n    return {};\n  }\n\n  // Will used to initialize joined entries\n  const auto& first_index_entries = indexes_entries[0];\n\n  /* Store current result of joins\n     Each entry is vector of indexes, that referce to one key in the index\n     For example, {1, 0, 4} means that key with index 1 in the first index,\n     key with index 0 in the second index and key with index 4 in the third index were joined to\n     single entry. */\n  std::vector<KeyIndexes> joined_entries(first_index_entries.size(), KeyIndexes(1));\n\n  // At the first step all keys from the first index are joined\n  for (size_t i = 0; i < first_index_entries.size(); ++i) {\n    joined_entries[i][0] = i;\n  }\n\n  DCHECK(joins[0].empty()) << \"Base index must be first and have no joins\";\n\n  /* Now we need to iterate over all indexes and the joins\n     Using joins for the new index, we will find matching entries in the current result\n     (joined_entries) with the entries in the new index. */\n  for (size_t i = 1; i < indexes_entries.size(); ++i) {\n    joined_entries = JoinWithNewIndex(indexes_entries, joined_entries, i, joins[i]);\n  }\n\n  // Apply aggregation after join if needed\n  // It can change size of joined_entries\n  aggregate_after_join(&joined_entries);\n\n  const size_t result_size = joined_entries.size();\n  const size_t indexes_count = indexes_entries.size();\n  // Now we have joined entries, we need to build JoinResult\n  Vector<Vector<Key>> result(result_size, Vector<Key>(indexes_count));\n\n  for (size_t i = 0; i < result_size; ++i) {\n    auto& result_entry = result[i];\n\n    for (size_t index = 0; index < indexes_count; ++index) {\n      // Index of joined key in the current index\n      KeyIndex key_index = joined_entries[i][index];\n      // Find key by the key_index\n      const auto& key = indexes_entries[index][key_index].first;\n\n      // Add key to the result\n      // That means that this key from this index was joined\n      result_entry[index] = key;\n    }\n  }\n\n  return result;\n}\n\nVector<Vector<Key>> JoinAllIndexes(EntriesPerIndex indexes_entries, IndexesJoinExpressions joins) {\n  return JoinAllIndexes(indexes_entries, joins, [](std::vector<KeyIndexes>*) {});\n}\n\n}  // namespace dfly::join\n"
  },
  {
    "path": "src/server/search/index_join.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <vector>\n\n#include \"base/logging.h\"\n#include \"core/linear_search_map.h\"\n#include \"core/search/base.h\"\n#include \"server/common_types.h\"\n\nnamespace dfly::join {\n\ntemplate <typename T> using Vector = absl::InlinedVector<T, 4>;\n\n/* Represents field value.\n   Same as search::SortableValue, but do not have monostate and stores string_view instead of\n   std::string. */\nusing JoinableValue = std::variant<double, std::string_view>;\n\n/* Each index has its own set of fields used for joins.\n   Additionally, each index contains multiple keys/documents it has indexed, and each document\n   includes several fields.\n\n   For example:\n    JOIN index2 ON index2.field1 = other_index.field2 AND index2.field3 = other_index.field4\n\n    So, index2 uses field1 and field3 for joins. It also indexed docs key1, key2, key3:\n    EntriesPerIndex will store something like:\n                            [{\"key1\", {\"field1\" : value, \"field3\" : value}},\n                             {\"key2\", {\"field1\" : value, \"field3\" : value}},\n                             {\"key3\", {\"field1\" : value, \"field3\" : value}}].\n    But to make join algorithm more efficient, we store it as raw vectors,\n    instead of field_name as string, we use indexes;\n    instead of key names we use shard id and doc id.\n*/\nusing Key = std::pair<ShardId, search::DocId>;\nusing Entry = std::pair<Key, Vector<JoinableValue> /*fields values of this key*/>;\nusing EntriesPerIndex = absl::Span<const Vector<Entry> /*one index can store several keys*/>;\n\n// TODO: comments\nusing OwnedJoinableValue = std::variant<double, std::string>;\nusing OwnedEntry = std::pair<Key, Vector<OwnedJoinableValue>>;\n\n// Stores data for single join expression,\n// e.g. index1.field1 = index2.field2:\n// field - \"field1\", foreign_index - \"index2\", foreign_field - \"field2\"\nstruct JoinExpression {\n  size_t field;          // field is represented as index in the Entry.second array\n  size_t foreign_index;  // foreign_index is represented as index in the EntriesPerIndex array\n  size_t foreign_field;  // foreign_field is too represented as index in the Entry.second array\n};\n\nusing JoinExpressionsVec = Vector<JoinExpression>;\n\n/* Each index can have several join expressions, e.g.:\n   JOIN index1 ON index1.field1 = other_index.field2 AND index1.field3 = other_index.field4\n   will result in:\n   {\"index1\", {{\"field1\", \"other_index\", \"field2\"}, {\"field3\", \"other_index\", \"field4\"}}} */\nusing IndexesJoinExpressions = absl::Span<const JoinExpressionsVec>;\n\nusing KeyIndex = size_t;\nusing KeyIndexes = Vector<KeyIndex>;\n\n/* Joins all indexes in indexes_map using join_expressions.\n   Join algorithm is used is hash join. */\nVector<Vector<Key>> JoinAllIndexes(\n    EntriesPerIndex indexes_entries, IndexesJoinExpressions joins,\n    absl::FunctionRef<void(std::vector<KeyIndexes>*)> aggregate_after_join);\n\nVector<Vector<Key>> JoinAllIndexes(EntriesPerIndex indexes_entries, IndexesJoinExpressions joins);\n\n}  // namespace dfly::join\n"
  },
  {
    "path": "src/server/search/index_join_test.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/search/index_join.h\"\n\n#include <absl/container/flat_hash_set.h>\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n\n#include <utility>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nusing namespace join;\n\nclass IndexJoinTest : public testing::Test {\n protected:\n};\n\nstruct TestIndexData {\n  struct FieldData {\n    std::string_view name;\n    JoinableValue value;\n  };\n\n  struct KeyData {\n    std::string_view key;\n    std::vector<FieldData> fields;\n  };\n\n  std::string_view index_name;\n  std::vector<KeyData> entries;\n};\n\nstruct TestJoinExpression {\n  std::string_view field;\n  std::string_view foreign_index;\n  std::string_view foreign_field;\n};\n\nstruct PreprocessedIndexData {\n  Vector<Vector<Entry>> entries;\n  std::unordered_map<std::string_view, size_t> index_name_to_index;\n  std::unordered_map<std::string_view, Key> key_name_to_key;\n  std::vector<std::unordered_map<std::string_view, size_t>> field_names_to_index;\n};\n\nMATCHER_P(IsJoinResultMatcher, expected, \"\") {\n  std::vector<testing::Matcher<std::vector<join::Key>>> matchers;\n  for (const auto& entry : expected) {\n    std::vector<join::Key> keys;\n    for (auto field : entry) {\n      keys.push_back(field);\n    }\n    matchers.push_back(testing::ElementsAreArray(keys));\n  }\n\n  std::vector<std::vector<join::Key>> result;\n  for (size_t index = 0; index < arg.size(); ++index) {\n    std::vector<join::Key> entry;\n    for (const auto& key : arg[index]) {\n      entry.push_back(key);\n    }\n    result.push_back(std::move(entry));\n  }\n  return testing::ExplainMatchResult(testing::UnorderedElementsAreArray(matchers), result,\n                                     result_listener);\n}\n\ntemplate <typename... Args>\nauto IsJoinResult(const PreprocessedIndexData& data,\n                  std::vector<std::vector<std::string_view>> joined_data) {\n  std::vector<std::vector<join::Key>> joined_keys(joined_data.size());\n  for (size_t i = 0; i < joined_data.size(); ++i) {\n    for (const auto& entry : joined_data[i]) {\n      auto it = data.key_name_to_key.find(entry);\n      DCHECK(it != data.key_name_to_key.end()) << \"Key not found in index data: \" << entry;\n      joined_keys[i].push_back(it->second);\n    }\n  }\n\n  return IsJoinResultMatcher(std::move(joined_keys));\n}\n\nPreprocessedIndexData PreprocessIndexesData(std::vector<TestIndexData> indexes_data) {\n  PreprocessedIndexData data;\n\n  auto contains = [](const auto& set, const auto& key) { return set.find(key) != set.end(); };\n\n  search::DocId doc_id = 0;\n  for (size_t index = 0; index < indexes_data.size(); index++) {\n    const auto& [index_name, index_data] = indexes_data[index];\n    DCHECK(!contains(data.index_name_to_index, index_name))\n        << \"Duplicate index name: \" << index_name;\n    data.index_name_to_index[index_name] = index;\n\n    data.field_names_to_index.emplace_back();\n    auto& field_names_map = data.field_names_to_index.back();\n\n    if (!index_data.empty()) {\n      for (size_t i = 0; i < index_data[0].fields.size(); ++i) {\n        const auto& field = index_data[0].fields[i];\n        DCHECK(!contains(field_names_map, field.name))\n            << \"Duplicate field name in index: \" << field.name;\n        field_names_map[field.name] = i;\n      }\n    }\n\n    Vector<Entry> index_entries;\n    index_entries.reserve(index_data.size());\n\n    for (size_t i = 0; i < index_data.size(); ++i) {\n      const auto& [key, fields] = index_data[i];\n      DCHECK(!contains(data.key_name_to_key, key)) << \"Duplicate key name in index: \" << key;\n\n      Key key_for_join = {0 /*in tests we are using 0 for ShardId*/, doc_id++};\n      data.key_name_to_key[key] = key_for_join;\n\n      Entry entry = {key_for_join, Vector<JoinableValue>(field_names_map.size())};\n      std::set<std::string_view> fields_set;\n      for (const auto& [field_name, field_value] : fields) {\n        DCHECK(contains(field_names_map, field_name));\n        DCHECK(!contains(fields_set, field_name)) << \"Duplicate field name in key: \" << field_name;\n\n        entry.second[field_names_map[field_name]] = field_value;\n        fields_set.insert(field_name);\n      }\n\n      DCHECK_EQ(fields_set.size(), field_names_map.size())\n          << \"Not all fields are set for key: \" << key;\n\n      index_entries.emplace_back(std::move(entry));\n    }\n\n    data.entries.emplace_back(std::move(index_entries));\n  }\n\n  return data;\n}\n\njoin::Vector<JoinExpressionsVec> BuildJoinExpressions(\n    const PreprocessedIndexData& index_data,\n    std::initializer_list<std::pair<std::string_view, std::initializer_list<TestJoinExpression>>>\n        data) {\n  join::Vector<JoinExpressionsVec> join_expressions(1);\n\n  auto contains = [](const auto& set, const auto& key) { return set.find(key) != set.end(); };\n\n  std::set<std::string_view> index_names_set;\n  for (const auto& [index_name, expressions] : data) {\n    DCHECK(contains(index_data.index_name_to_index, index_name))\n        << \"Index not found in join expressions: \" << index_name;\n    DCHECK(!contains(index_names_set, index_name))\n        << \"Duplicate index name in join expressions: \" << index_name;\n\n    index_names_set.insert(index_name);\n    size_t current_index = index_data.index_name_to_index.at(index_name);\n\n    JoinExpressionsVec exprs;\n    for (const auto& expr : expressions) {\n      DCHECK(contains(index_data.field_names_to_index[current_index], expr.field))\n          << \"Field not found in index: \" << expr.field;\n      size_t field_index = index_data.field_names_to_index[current_index].at(expr.field);\n\n      DCHECK(contains(index_data.index_name_to_index, expr.foreign_index))\n          << \"Foreign index not found in join expressions: \" << expr.foreign_index;\n      size_t foreign_index = index_data.index_name_to_index.at(expr.foreign_index);\n\n      DCHECK(contains(index_data.field_names_to_index[foreign_index], expr.foreign_field))\n          << \"Foreign field not found in foreign index: \" << expr.foreign_field;\n      size_t foreign_field_index =\n          index_data.field_names_to_index[foreign_index].at(expr.foreign_field);\n\n      exprs.emplace_back(JoinExpression{field_index, foreign_index, foreign_field_index});\n    }\n\n    join_expressions.emplace_back(std::move(exprs));\n  }\n\n  return join_expressions;\n}\n\nTEST_F(IndexJoinTest, SimpleJoin) {\n  auto data = PreprocessIndexesData({{\"index1\",\n                                      {{\"key1\", {{\"field1\", 1.0}, {\"field2\", \"value1\"}}},\n                                       {\"key2\", {{\"field1\", 2.0}, {\"field2\", \"value2\"}}}}},\n                                     {\"index2\",\n                                      {{\"key3\", {{\"field3\", 1.0}, {\"field4\", \"value3\"}}},\n                                       {\"key4\", {{\"field3\", 2.0}, {\"field4\", \"value4\"}}}}}});\n\n  auto joins = BuildJoinExpressions(data, {{\"index2\", {{\"field3\", \"index1\", \"field1\"}}}});\n\n  auto result = JoinAllIndexes(data.entries, joins);\n  EXPECT_THAT(result, IsJoinResult(data, {{\"key1\", \"key3\"}, {\"key2\", \"key4\"}}));\n}\n\nTEST_F(IndexJoinTest, MultipleJoins) {\n  auto data = PreprocessIndexesData({{\"index1\",\n                                      {{\"key1\", {{\"field1\", 1.0}, {\"field2\", \"value1\"}}},\n                                       {\"key2\", {{\"field1\", 2.0}, {\"field2\", \"value2\"}}}}},\n                                     {\"index2\",\n                                      {{\"key3\", {{\"field3\", 1.0}, {\"field4\", \"value3\"}}},\n                                       {\"key4\", {{\"field3\", 2.0}, {\"field4\", \"value4\"}}}}},\n                                     {\"index3\",\n                                      {{\"key5\", {{\"field5\", 1.0}, {\"field6\", \"value5\"}}},\n                                       {\"key6\", {{\"field5\", 2.0}, {\"field6\", \"value6\"}}}}}});\n\n  auto joins = BuildJoinExpressions(data, {{\"index2\", {{\"field3\", \"index1\", \"field1\"}}},\n                                           {\"index3\", {{\"field5\", \"index2\", \"field3\"}}}});\n\n  auto result = JoinAllIndexes(data.entries, joins);\n  EXPECT_THAT(result, IsJoinResult(data, {{\"key1\", \"key3\", \"key5\"}, {\"key2\", \"key4\", \"key6\"}}));\n}\n\nTEST_F(IndexJoinTest, NoMatches) {\n  // Different values\n  auto data = PreprocessIndexesData({{\"index1\",\n                                      {{\"key1\", {{\"field1\", 1.0}, {\"field2\", \"value1\"}}},\n                                       {\"key2\", {{\"field1\", 2.0}, {\"field2\", \"value2\"}}}}},\n                                     {\"index2\",\n                                      {{\"key3\", {{\"field3\", 3.0}, {\"field4\", \"value3\"}}},\n                                       {\"key4\", {{\"field3\", 4.0}, {\"field4\", \"value4\"}}}}}});\n\n  auto joins = BuildJoinExpressions(data, {{\"index2\", {{\"field3\", \"index1\", \"field1\"}}}});\n\n  auto result = JoinAllIndexes(data.entries, joins);\n  EXPECT_TRUE(result.empty());\n\n  // Different types\n  auto data2 = PreprocessIndexesData({{\"index1\",\n                                       {{\"key1\", {{\"field1\", 1.0}, {\"field2\", \"value1\"}}},\n                                        {\"key2\", {{\"field1\", 2.0}, {\"field2\", \"value2\"}}}}},\n                                      {\"index2\",\n                                       {{\"key3\", {{\"field3\", \"value3\"}, {\"field4\", \"value4\"}}},\n                                        {\"key4\", {{\"field3\", \"value5\"}, {\"field4\", \"value6\"}}}}}});\n\n  auto joins2 = BuildJoinExpressions(data2, {{\"index2\", {{\"field3\", \"index1\", \"field1\"}}}});\n\n  result = JoinAllIndexes(data2.entries, joins2);\n  EXPECT_TRUE(result.empty());\n}\n\nTEST_F(IndexJoinTest, JoinWithMultipleFields) {\n  auto data = PreprocessIndexesData({{\"index1\",\n                                      {{\"key1\", {{\"field1\", 1.0}, {\"field2\", \"value1\"}}},\n                                       {\"key2\", {{\"field1\", 2.0}, {\"field2\", \"value2\"}}}}},\n                                     {\"index2\",\n                                      {{\"key3\", {{\"field3\", 1.0}, {\"field4\", \"value1\"}}},\n                                       {\"key4\", {{\"field3\", 2.0}, {\"field4\", \"value2\"}}}}},\n                                     {\"index3\",\n                                      {{\"key5\", {{\"field5\", 1.0}, {\"field6\", \"value1\"}}},\n                                       {\"key6\", {{\"field5\", 2.0}, {\"field6\", \"value2\"}}}}}});\n\n  auto joins = BuildJoinExpressions(\n      data, {{\"index2\", {{\"field3\", \"index1\", \"field1\"}, {\"field4\", \"index1\", \"field2\"}}},\n             {\"index3\", {{\"field5\", \"index2\", \"field3\"}, {\"field6\", \"index2\", \"field4\"}}}});\n\n  auto result = JoinAllIndexes(data.entries, joins);\n  EXPECT_THAT(result, IsJoinResult(data, {{\"key1\", \"key3\", \"key5\"}, {\"key2\", \"key4\", \"key6\"}}));\n}\n\nTEST_F(IndexJoinTest, JoinWithSeveralCopiesOfSameKey) {\n  auto data = PreprocessIndexesData({{\"index1\",\n                                      {{\"key1\", {{\"field1\", 1.0}, {\"field2\", \"value1\"}}},\n                                       {\"key2\", {{\"field1\", 2.0}, {\"field2\", \"value2\"}}},\n                                       {\"key3\", {{\"field1\", 1.0}, {\"field2\", \"value1\"}}},\n                                       {\"key4\", {{\"field1\", 2.0}, {\"field2\", \"value2\"}}}}},\n                                     {\"index2\",\n                                      {{\"key5\", {{\"field3\", 1.0}, {\"field4\", \"value1\"}}},\n                                       {\"key6\", {{\"field3\", 2.0}, {\"field4\", \"value2\"}}}}},\n                                     {\"index3\",\n                                      {{\"key7\", {{\"field5\", 1.0}, {\"field6\", \"value1\"}}},\n                                       {\"key8\", {{\"field5\", 2.0}, {\"field6\", \"value2\"}}},\n                                       {\"key9\", {{\"field5\", 1.0}, {\"field6\", \"value1\"}}},\n                                       {\"key10\", {{\"field5\", 2.0}, {\"field6\", \"value2\"}}},\n                                       {\"key11\", {{\"field5\", 11.0}, {\"field6\", \"value2\"}}}}}});\n\n  auto joins = BuildJoinExpressions(\n      data, {{\"index2\", {{\"field3\", \"index1\", \"field1\"}, {\"field4\", \"index1\", \"field2\"}}},\n             {\"index3\", {{\"field5\", \"index2\", \"field3\"}, {\"field6\", \"index2\", \"field4\"}}}});\n\n  auto result = JoinAllIndexes(data.entries, joins);\n  EXPECT_THAT(result, IsJoinResult(data, {{\"key1\", \"key5\", \"key7\"},\n                                          {\"key2\", \"key6\", \"key8\"},\n                                          {\"key3\", \"key5\", \"key7\"},\n                                          {\"key4\", \"key6\", \"key8\"},\n                                          {\"key1\", \"key5\", \"key9\"},\n                                          {\"key2\", \"key6\", \"key10\"},\n                                          {\"key3\", \"key5\", \"key9\"},\n                                          {\"key4\", \"key6\", \"key10\"}}));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/search/search_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/search/search_family.h\"\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/flags/flag.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_format.h>\n#include <absl/strings/str_join.h>\n#include <absl/strings/str_split.h>\n\n#include <atomic>\n#include <variant>\n#include <vector>\n\n#include \"base/logging.h\"\n#include \"core/search/indices.h\"\n#include \"core/search/query_driver.h\"\n#include \"core/search/search.h\"\n#include \"core/search/vector_utils.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/error.h\"\n#include \"facade/reply_builder.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/cluster/cluster_config.h\"\n#include \"server/cluster/coordinator.h\"\n#include \"server/command_registry.h\"\n#include \"server/config_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/container_utils.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/namespaces.h\"\n#include \"server/search/aggregator.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/search/global_hnsw_index.h\"\n#include \"server/transaction.h\"\n#include \"src/core/overloaded.h\"\n\nABSL_FLAG(bool, search_reject_legacy_field, true, \"FT.AGGREGATE: Reject legacy field names.\");\nABSL_FLAG(bool, cluster_search, false,\n          \"Enable search commands for cross-shard search. turned off by default for safety.\");\n\nABSL_FLAG(size_t, MAXSEARCHRESULTS, 1000000, \"Maximum number of results from ft.search command\");\n\nABSL_FLAG(size_t, search_query_string_bytes, 10240,\n          \"Maximum number of bytes in search query string\");\n\nABSL_FLAG(size_t, subset_knn_search_threshold, 8192,\n          \"If prefilter results are below this threshold, we will do exact subset search \"\n          \"instead of HNSW graph search\");\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace facade;\n\nnamespace {\n// we use it to find which flags are belong to search\nconst std::string kCurrentFile = std::filesystem::path(__FILE__).filename().string();\n\nusing nonstd::make_unexpected;\n\ntemplate <typename T> using ParseResult = io::Result<T, ErrorReply>;\n\nnonstd::unexpected_type<ErrorReply> CreateSyntaxError(std::string message) {\n  return make_unexpected(ErrorReply{std::move(message), kSyntaxErrType});\n}\n\nnonstd::unexpected_type<ErrorReply> CreateSyntaxError(std::string_view message) {\n  return make_unexpected(ErrorReply{message, kSyntaxErrType});\n}\n\nstring IndexNotFoundMsg(string_view index_name) {\n  return absl::StrCat(\"Index with name '\", index_name, \"' not found\");\n}\n\n// Send error from parser or result\n// Returns false if no errors occured\ntemplate <typename T>\nbool SendErrorIfOccurred(const ParseResult<T>& result, CmdArgParser* parser,\n                         CommandContext* cmd_cntx) {\n  if (auto err = parser->TakeError(); err || !result) {\n    cmd_cntx->SendError(!result ? result.error() : err.MakeReply());\n    return true;\n  }\n\n  return false;\n}\n\nbool IsValidJsonPath(string_view path) {\n  error_code ec;\n  MakeJsonPathExpr<TmpJson>(path, ec);\n  return !ec;\n}\n\nsearch::SchemaField::VectorParams ParseVectorParams(CmdArgParser* parser) {\n  search::SchemaField::VectorParams params{};\n\n  params.use_hnsw = parser->MapNext(\"HNSW\", true, \"FLAT\", false);\n  const size_t num_args = parser->Next<size_t>();\n\n  for (size_t i = 0; i * 2 < num_args; i++) {\n    if (parser->Check(\"DIM\", &params.dim)) {\n    } else if (parser->Check(\"DISTANCE_METRIC\")) {\n      params.sim =\n          parser->MapNext(\"L2\", search::VectorSimilarity::L2, \"IP\", search::VectorSimilarity::IP,\n                          \"COSINE\", search::VectorSimilarity::COSINE);\n    } else if (parser->Check(\"INITIAL_CAP\", &params.capacity)) {\n    } else if (parser->Check(\"M\", &params.hnsw_m)) {\n    } else if (parser->Check(\"EF_CONSTRUCTION\", &params.hnsw_ef_construction)) {\n    } else if (parser->Check(\"EF_RUNTIME\")) {\n      parser->Next<size_t>();\n      LOG(WARNING) << \"EF_RUNTIME not supported\";\n    } else if (parser->Check(\"EPSILON\")) {\n      parser->Next<double>();\n      LOG(WARNING) << \"EPSILON not supported\";\n    } else {\n      parser->Skip(2);\n    }\n  }\n\n  return params;\n}\n\nParseResult<search::SchemaField::TagParams> ParseTagParams(CmdArgParser* parser) {\n  search::SchemaField::TagParams params{};\n  while (parser->HasNext()) {\n    if (parser->Check(\"SEPARATOR\")) {\n      std::string_view separator = parser->NextOrDefault();\n\n      if (separator.size() != 1) {\n        return CreateSyntaxError(\n            absl::StrCat(\"Tag separator must be a single character. Got `\"sv, separator, \"`\"sv));\n      }\n\n      params.separator = separator.front();\n      continue;\n    }\n\n    if (parser->Check(\"CASESENSITIVE\")) {\n      params.case_sensitive = true;\n      continue;\n    }\n\n    if (parser->Check(\"WITHSUFFIXTRIE\")) {\n      params.with_suffixtrie = true;\n      continue;\n    }\n\n    break;\n  }\n  return params;\n}\n\nParseResult<search::SchemaField::TextParams> ParseTextParams(CmdArgParser* parser) {\n  search::SchemaField::TextParams params{};\n  params.with_suffixtrie = parser->Check(\"WITHSUFFIXTRIE\");\n  return params;\n}\n\nsearch::SchemaField::NumericParams ParseNumericParams(CmdArgParser* parser) {\n  search::SchemaField::NumericParams params{};\n  if (parser->Check(\"BLOCKSIZE\")) {\n    params.block_size = parser->Next<size_t>();\n  }\n  return params;\n}\n\n// breaks on ParamsVariant initialization\n#ifndef __clang__\n#pragma GCC diagnostic push\n#pragma GCC diagnostic ignored \"-Wmaybe-uninitialized\"\n#endif\n\nusing ParsedSchemaField =\n    ParseResult<std::pair<search::SchemaField::FieldType, search::SchemaField::ParamsVariant>>;\n\n// Tag fields include: [separator char] [casesensitive]\nParsedSchemaField ParseTag(CmdArgParser* parser) {\n  auto tag_params = ParseTagParams(parser);\n  if (!tag_params) {\n    return make_unexpected(tag_params.error());\n  }\n  return std::make_pair(search::SchemaField::TAG, std::move(tag_params).value());\n}\n\nParsedSchemaField ParseText(CmdArgParser* parser) {\n  auto text_params = ParseTextParams(parser);\n  if (!text_params)\n    return make_unexpected(text_params.error());\n  return std::make_pair(search::SchemaField::TEXT, std::move(text_params).value());\n}\n\nParsedSchemaField ParseNumeric(CmdArgParser* parser) {\n  return std::make_pair(search::SchemaField::NUMERIC, ParseNumericParams(parser));\n}\n\n// Vector fields include: {algorithm} num_args args...\nParsedSchemaField ParseVector(CmdArgParser* parser) {\n  auto vector_params = ParseVectorParams(parser);\n\n  if (parser->HasError()) {\n    auto err = parser->TakeError();\n    VLOG(1) << \"Could not parse vector param \" << err.index;\n    return CreateSyntaxError(\"Parse error of vector parameters\"sv);\n  }\n\n  if (vector_params.dim == 0) {\n    return CreateSyntaxError(\"Knn vector dimension cannot be zero\"sv);\n  }\n  return std::make_pair(search::SchemaField::VECTOR, vector_params);\n}\n\nParsedSchemaField ParseGeo(CmdArgParser* parser) {\n  return std::make_pair(search::SchemaField::GEO, std::monostate{});\n}\n\n// ON HASH | JSON\nParseResult<bool> ParseOnOption(CmdArgParser* parser, DocIndex* index) {\n  index->type = parser->MapNext(\"HASH\"sv, DocIndex::HASH, \"JSON\"sv, DocIndex::JSON);\n  return true;\n}\n\n// PREFIX count prefix [prefix ...]\nParseResult<bool> ParsePrefix(CmdArgParser* parser, DocIndex* index) {\n  size_t count = parser->Next<size_t>();\n  index->prefixes.reserve(count);\n  for (size_t i = 0; i < count; i++) {\n    index->prefixes.push_back(parser->Next<std::string>());\n  }\n  return true;\n}\n\n// STOPWORDS count [words...]\nParseResult<bool> ParseStopwords(CmdArgParser* parser, DocIndex* index) {\n  index->options.stopwords.clear();\n  for (size_t num = parser->Next<size_t>(); num > 0; num--) {\n    index->options.stopwords.emplace(parser->Next());\n  }\n  return true;\n}\n\nconstexpr std::array<const std::string_view, 4> kIgnoredOptions = {\n    \"UNF\"sv, \"NOSTEM\"sv, \"INDEXMISSING\"sv, \"INDEXEMPTY\"sv};\nconstexpr std::array<const std::string_view, 3> kIgnoredOptionsWithArg = {\"WEIGHT\"sv, \"PHONETIC\"sv};\n\n// SCHEMA field [AS alias] type [flags...]\nParseResult<bool> ParseSchema(CmdArgParser* parser, DocIndex* index) {\n  auto& schema = index->schema;\n\n  if (!parser->HasNext()) {\n    return CreateSyntaxError(\"Fields arguments are missing\"sv);\n  }\n\n  while (parser->HasNext()) {\n    string_view field = parser->Next();\n    string_view field_alias = field;\n\n    // Verify json path is correct\n    if (index->type == DocIndex::JSON && !IsValidJsonPath(field)) {\n      return CreateSyntaxError(absl::StrCat(\"Bad json path: \"sv, field));\n    }\n\n    // AS [alias]\n    parser->Check(\"AS\", &field_alias);\n\n    if (schema.field_names.contains(field_alias)) {\n      return CreateSyntaxError(absl::StrCat(\"Duplicate field in schema - \"sv, field_alias));\n    }\n\n    // Determine type\n    using search::SchemaField;\n    auto params_parser =\n        parser->TryMapNext(\"TAG\"sv, &ParseTag, \"TEXT\"sv, &ParseText, \"NUMERIC\"sv, &ParseNumeric,\n                           \"VECTOR\"sv, &ParseVector, \"GEO\", &ParseGeo);\n    if (!params_parser) {\n      return CreateSyntaxError(\n          absl::StrCat(\"Field type \"sv, parser->Next(), \" is not supported\"sv));\n    }\n\n    auto parsed_params = params_parser.value()(parser);\n    if (!parsed_params) {\n      return make_unexpected(parsed_params.error());\n    }\n\n    auto [field_type, params] = std::move(parsed_params).value();\n\n    // Flags: check for SORTABLE and NOINDEX\n    uint8_t flags = 0;\n    while (parser->HasNext()) {\n      auto flag = parser->TryMapNext(\"NOINDEX\", search::SchemaField::NOINDEX, \"SORTABLE\",\n                                     search::SchemaField::SORTABLE);\n      if (!flag) {\n        std::string_view option = parser->Peek();\n        if (std::find(kIgnoredOptions.begin(), kIgnoredOptions.end(), option) !=\n            kIgnoredOptions.end()) {\n          LOG_IF(WARNING, option != \"INDEXMISSING\"sv && option != \"INDEXEMPTY\"sv)\n              << \"Ignoring unsupported field option in FT.CREATE: \" << option;\n          // Ignore these options\n          parser->Skip(1);\n          continue;\n        }\n        if (std::find(kIgnoredOptionsWithArg.begin(), kIgnoredOptionsWithArg.end(), option) !=\n            kIgnoredOptionsWithArg.end()) {\n          LOG(WARNING) << \"Ignoring unsupported field option in FT.CREATE: \" << option;\n          // Ignore these options with argument\n          parser->Skip(2);\n          continue;\n        }\n        break;\n      }\n\n      flags |= *flag;\n    }\n\n    schema.fields[field] = {field_type, flags, string{field_alias}, params};\n    schema.field_names[field_alias] = field;\n  }\n\n  return false;\n}\n\n#ifndef __clang__\n#pragma GCC diagnostic pop\n#endif\n\nParseResult<DocIndex> CreateDocIndex(std::string_view name, CmdArgParser* parser) {\n  DocIndex index{};\n  index.name = name;\n\n  while (parser->HasNext()) {\n    auto option_parser =\n        parser->TryMapNext(\"ON\"sv, &ParseOnOption, \"PREFIX\"sv, &ParsePrefix, \"STOPWORDS\"sv,\n                           &ParseStopwords, \"SCHEMA\"sv, &ParseSchema);\n\n    if (!option_parser) {\n      // Unsupported parameters are ignored for now\n      parser->Skip(1);\n      continue;\n    }\n\n    auto parse_result = option_parser.value()(parser, &index);\n    if (!parse_result) {\n      return make_unexpected(parse_result.error());\n    }\n    if (!parse_result.value()) {\n      break;\n    }\n  }\n\n  return index;\n}\n\nstd::string_view ParseField(CmdArgParser* parser) {\n  std::string_view field = parser->Next();\n  if (absl::StartsWith(field, \"@\"sv)) {\n    field.remove_prefix(1);  // remove leading @ if exists\n  }\n  return field;\n}\n\nstd::optional<std::string_view> ParseFieldWithAtSign(CmdArgParser* parser) {\n  std::string_view field = parser->Next();\n  if (absl::StartsWith(field, \"@\"sv)) {\n    field.remove_prefix(1);  // remove leading @\n  } else {\n    if (absl::GetFlag(FLAGS_search_reject_legacy_field)) {\n      return std::nullopt;\n    }\n  }\n  return field;\n}\n\nvoid ParseNumericFilter(CmdArgParser* parser, SearchParams* params) {\n  auto field = ParseField(parser);\n  size_t lo = parser->Next<size_t>();\n  size_t hi = parser->Next<size_t>();\n  if (auto it = params->optional_filters.find(field); it != params->optional_filters.end()) {\n    search::OptionalNumericFilter* numeric_filter =\n        dynamic_cast<search::OptionalNumericFilter*>(it->second.get());\n    numeric_filter->AddRange(lo, hi);\n  } else {\n    params->optional_filters.emplace(field,\n                                     std::make_unique<search::OptionalNumericFilter>(lo, hi));\n  }\n}\n\nstd::vector<FieldReference> ParseLoadOrReturnFields(CmdArgParser* parser, bool is_load) {\n  // TODO: Change to num_strings. In Redis strings number is expected. For example: LOAD 3 $.a AS a\n  std::vector<FieldReference> fields;\n  size_t num_fields = parser->Next<size_t>();\n\n  while (parser->HasNext() && num_fields--) {\n    string_view field = is_load ? ParseField(parser) : parser->Next();\n    string_view alias;\n    parser->Check(\"AS\", &alias);\n    fields.emplace_back(field, alias);\n  }\n  return fields;\n}\n\nsearch::QueryParams ParseQueryParams(CmdArgParser* parser) {\n  search::QueryParams params;\n  size_t num_args = parser->Next<size_t>();\n  while (parser->HasNext() && params.Size() * 2 < num_args) {\n    auto [k, v] = parser->Next<string_view, string_view>();\n    params[k] = v;\n  }\n  return params;\n}\n\nParseResult<SearchParams> ParseSearchParams(CmdArgParser* parser) {\n  SearchParams params;\n\n  const size_t max_results = absl::GetFlag(FLAGS_MAXSEARCHRESULTS);\n\n  while (parser->HasNext()) {\n    // [LIMIT offset total]\n    if (parser->Check(\"LIMIT\")) {\n      params.limit_offset = parser->Next<size_t>();\n      params.limit_total = parser->Next<size_t>();\n      if (params.limit_total > max_results) {\n        return CreateSyntaxError(absl::StrFormat(\"LIMIT exceeds maximum of %d\", max_results));\n      }\n    } else if (parser->Check(\"LOAD\")) {\n      if (params.return_fields) {\n        return CreateSyntaxError(\"LOAD cannot be applied after RETURN\"sv);\n      }\n\n      params.load_fields = ParseLoadOrReturnFields(parser, true);\n    } else if (parser->Check(\"RETURN\")) {\n      if (params.load_fields) {\n        return CreateSyntaxError(\"RETURN cannot be applied after LOAD\"sv);\n      }\n      if (!params.return_fields)  // after NOCONTENT it's silently ignored\n        params.return_fields = ParseLoadOrReturnFields(parser, false);\n    } else if (parser->Check(\"NOCONTENT\")) {  // NOCONTENT\n      params.return_fields.emplace();\n    } else if (parser->Check(\"PARAMS\")) {  // [PARAMS num(ignored) name(ignored) knn_vector]\n      params.query_params = ParseQueryParams(parser);\n    } else if (parser->Check(\"SORTBY\")) {\n      FieldReference field{ParseField(parser)};\n      params.sort_option =\n          SearchParams::SortOption{field, parser->Check(\"DESC\") ? SortOrder::DESC : SortOrder::ASC};\n    } else if (parser->Check(\"FILTER\")) {\n      ParseNumericFilter(parser, &params);\n    } else if (parser->Check(\"WITHSORTKEYS\")) {\n      params.with_sortkeys = true;\n    } else {\n      // Unsupported parameters are ignored for now\n      parser->Skip(1);\n    }\n  }\n\n  params.limit_total = std::min(params.limit_total, max_results);\n\n  return params;\n}\n\nParseResult<aggregate::SortParams> ParseAggregatorSortParams(CmdArgParser* parser) {\n  size_t strings_num = parser->Next<size_t>();\n\n  aggregate::SortParams sort_params;\n  sort_params.fields.reserve(strings_num / 2);\n\n  while (parser->HasNext() && strings_num > 0) {\n    std::string_view potential_field =\n        parser->Peek();  // Peek to get the field name for potential error message\n    std::optional<std::string_view> parsed_field = ParseFieldWithAtSign(parser);\n    if (!parsed_field) {\n      return CreateSyntaxError(\n          absl::StrCat(\"SORTBY field name '\", potential_field, \"' must start with '@'\"));\n    }\n    strings_num--;\n\n    SortOrder sord_order = SortOrder::ASC;\n    if (strings_num > 0) {\n      auto order = parser->TryMapNext(\"ASC\", SortOrder::ASC, \"DESC\", SortOrder::DESC);\n      if (order) {\n        sord_order = order.value();\n        strings_num--;\n      }\n    }\n\n    sort_params.fields.emplace_back(*parsed_field, sord_order);\n  }\n\n  if (strings_num) {\n    return CreateSyntaxError(\"bad arguments for SORTBY: specified invalid number of strings\"sv);\n  }\n\n  if (parser->Check(\"MAX\")) {\n    sort_params.max = parser->Next<size_t>();\n  }\n\n  return sort_params;\n}\n\nstd::pair<std::string_view, std::string_view> Split(std::string_view s, char delim) {\n  return absl::StrSplit(s, absl::MaxSplits(absl::ByChar(delim), 1));\n}\n\n// Example: LOAD_FROM index AS alias num_conditions condition [condition ...] [QUERY query]\n// condition is in the form index.field=foreign_index.field or foreign_index.field=index.field\nParseResult<AggregateParams::JoinParams> ParseAggregatorJoinParams(\n    CmdArgParser* parser, absl::flat_hash_set<std::string>* known_indexes) {\n  AggregateParams::JoinParams join_params;\n  join_params.index = parser->Next<std::string>();\n  if (parser->Check(\"AS\")) {\n    join_params.index_alias = parser->Next<std::string>();\n  } else {\n    join_params.index_alias = join_params.index;\n  }\n\n  if (known_indexes->contains(join_params.index_alias)) {\n    return CreateSyntaxError(\n        absl::StrCat(\"Duplicate index alias in LOAD_FROM: '\", join_params.index_alias, \"'\"));\n  }\n\n  // Validate index name\n  known_indexes->insert(join_params.index_alias);\n\n  size_t num_fields = parser->Next<size_t>();\n  join_params.conditions.reserve(num_fields);\n  // Conditions are in the form index.field=foreign_index.field or foreign_index.field=index.field\n  while (parser->HasNext() && num_fields > 0) {\n    auto [left, right] = Split(parser->Next(), '=');\n    auto [l_index, l_field] = Split(left, '.');\n    auto [r_index, r_field] = Split(right, '.');\n\n    if (right.empty() || l_field.empty() || r_field.empty()) {\n      return CreateSyntaxError(\n          \"bad arguments for LOAD_FROM: expected 'index.field=foreign_index.field'\"sv);\n    }\n\n    if (!known_indexes->contains(l_index) || !known_indexes->contains(r_index)) {\n      return CreateSyntaxError(absl::StrCat(\"bad arguments for LOAD_FROM: unknown index '\",\n                                            known_indexes->contains(l_index) ? r_index : l_index,\n                                            \"'\"));\n    }\n\n    if (l_index == join_params.index_alias) {\n      join_params.conditions.emplace_back(l_field, r_index, r_field);\n    } else if (r_index == join_params.index_alias) {\n      join_params.conditions.emplace_back(r_field, l_index, l_field);\n    } else {\n      return CreateSyntaxError(absl::StrCat(\n          \"bad arguments for LOAD_FROM: one of the field must be from the current index '\",\n          join_params.index_alias, \"'. Got '\", left, \"' and '\", right, \"'\"));\n    }\n\n    num_fields--;\n  }\n\n  parser->Check(\"QUERY\", &join_params.query);\n\n  return join_params;\n}\n\nParseResult<AggregateParams> ParseAggregatorParams(CmdArgParser* parser) {\n  AggregateParams params;\n  tie(params.index, params.query) = parser->Next<string_view, string_view>();\n\n  // Parse LOAD count field [field ...]\n  // LOAD options are at the beginning of the query, so we need to parse them first\n  while (parser->HasNext() && parser->Check(\"LOAD\")) {\n    auto fields = ParseLoadOrReturnFields(parser, true);\n    if (!params.load_fields.has_value())\n      params.load_fields = std::move(fields);\n    else\n      params.load_fields->insert(params.load_fields->end(), make_move_iterator(fields.begin()),\n                                 make_move_iterator(fields.end()));\n  }\n\n  // Used for join params\n  absl::flat_hash_set<std::string> current_known_indexes;\n  current_known_indexes.insert(std::string{params.index});\n  while (parser->HasNext() && parser->Check(\"LOAD_FROM\")) {\n    auto join_params = ParseAggregatorJoinParams(parser, &current_known_indexes);\n    if (!join_params) {\n      return make_unexpected(join_params.error());\n    }\n    params.joins.emplace_back(std::move(join_params).value());\n  }\n  const bool joining_enabled = !params.joins.empty();\n\n  while (parser->HasNext()) {\n    // GROUPBY nargs property [property ...]\n    if (parser->Check(\"GROUPBY\")) {\n      size_t num_fields = parser->Next<size_t>();\n\n      std::vector<std::string> fields;\n      fields.reserve(num_fields);\n      while (parser->HasNext() && num_fields > 0) {\n        auto parsed_field = ParseFieldWithAtSign(parser);\n        if (!parsed_field) {\n          return CreateSyntaxError(\"bad arguments: Field name should start with '@'\"sv);\n        }\n\n        fields.emplace_back(*parsed_field);\n        num_fields--;\n      }\n\n      vector<aggregate::Reducer> reducers;\n      while (parser->Check(\"REDUCE\")) {\n        using RF = aggregate::ReducerFunc;\n        auto func_name =\n            parser->TryMapNext(\"COUNT\", RF::COUNT, \"COUNT_DISTINCT\", RF::COUNT_DISTINCT, \"SUM\",\n                               RF::SUM, \"AVG\", RF::AVG, \"MAX\", RF::MAX, \"MIN\", RF::MIN);\n\n        if (!func_name) {\n          return CreateSyntaxError(absl::StrCat(\"reducer function \", parser->Next(), \" not found\"));\n        }\n\n        auto func = aggregate::FindReducerFunc(*func_name);\n        auto nargs = parser->Next<size_t>();\n\n        string source_field;\n        if (nargs > 0) {\n          source_field = ParseField(parser);\n        }\n\n        parser->ExpectTag(\"AS\");\n        string result_field = parser->Next<string>();\n\n        reducers.push_back(\n            aggregate::Reducer{std::move(source_field), std::move(result_field), func});\n      }\n\n      params.steps.push_back(aggregate::MakeGroupStep(std::move(fields), std::move(reducers)));\n      continue;\n    }\n\n    // SORTBY nargs\n    if (parser->Check(\"SORTBY\")) {\n      auto sort_params = ParseAggregatorSortParams(parser);\n      if (!sort_params) {\n        return make_unexpected(sort_params.error());  // Propagate the specific error\n      }\n\n      if (!joining_enabled || params.join_agg_params.HasValue()) {\n        params.steps.push_back(aggregate::MakeSortStep(std::move(sort_params).value()));\n      } else {\n        params.join_agg_params.sort = std::move(sort_params).value();\n      }\n      continue;\n    }\n\n    // LIMIT\n    if (parser->Check(\"LIMIT\")) {\n      auto [offset, num] = parser->Next<size_t, size_t>();\n      if (!joining_enabled || params.join_agg_params.HasLimit()) {\n        params.steps.push_back(aggregate::MakeLimitStep(offset, num));\n      } else {\n        params.join_agg_params.limit_offset = offset;\n        params.join_agg_params.limit_total = num;\n      }\n      continue;\n    }\n\n    // PARAMS\n    if (parser->Check(\"PARAMS\")) {\n      params.params = ParseQueryParams(parser);\n      continue;\n    }\n\n    if (parser->Check(\"LOAD\")) {\n      return CreateSyntaxError(\"LOAD cannot be applied after projectors or reducers\"sv);\n    }\n\n    if (parser->Check(\"LOAD_FROM\")) {\n      return CreateSyntaxError(\"LOAD_FROM cannot be applied after projectors or reducers\"sv);\n    }\n\n    return CreateSyntaxError(absl::StrCat(\"Unknown clause: \", parser->Peek()));\n  }\n\n  return params;\n}\n\n// Data that we need at the first step of join\nstruct PreprocessedJoinData {\n  struct SortParam {\n    size_t index;\n    size_t field_index;\n    SortOrder order;\n  };\n\n  explicit PreprocessedJoinData(size_t n)\n      : indexes(n), needed_fields(n), joins_per_index(n), fields_to_load_per_index(n) {\n  }\n\n  // Index names\n  join::Vector<std::string_view> indexes;\n  // Maps index alias to its index in the indexes vector\n  absl::flat_hash_map<std::string_view, size_t> alias_to_index;\n\n  // For each index we store the fields that are needed for the join\n  join::Vector<join::Vector<std::string_view>> needed_fields;\n  // For each index we store the join expressions that are used to join this index\n  join::Vector<join::JoinExpressionsVec> joins_per_index;\n  // For each index we store the fields that should be loaded from the document after the join\n  join::Vector<join::Vector<std::string_view>> fields_to_load_per_index;\n  // Maps field names to the shard_id and their index in the needed_fields vector\n  join::Vector<SortParam> sort_params;\n};\n\nio::Result<PreprocessedJoinData, ErrorReply> PreprocessDataForJoin(std::string_view index,\n                                                                   const AggregateParams& params) {\n  DCHECK(!params.joins.empty());\n\n  const size_t n = params.joins.size();\n  PreprocessedJoinData result(n + 1);\n\n  // Collect aliases and initialize result.indexes\n  result.alias_to_index.reserve(n);\n  result.alias_to_index[index] = 0;\n  result.indexes[0] = index;\n  for (size_t i = 0; i < n; ++i) {\n    result.alias_to_index[params.joins[i].index_alias] = i + 1;\n    result.indexes[i + 1] = params.joins[i].index;\n  }\n\n  // Collect needed fields for joins for each index\n  // needed_fields[i] contains fields needed for index i\n  // for each field name we store its index\n  // Also collect joins for each index\n  std::vector<absl::flat_hash_map<std::string_view, size_t>> needed_fields(n + 1);\n\n  auto insert = [&](std::string_view field, auto* map) -> size_t {\n    auto it = map->find(field);\n    if (it == map->end()) {\n      const size_t field_index = map->size();\n      map->emplace(field, field_index);\n      return field_index;\n    }\n    return it->second;\n  };\n\n  for (size_t i = 0; i < n; ++i) {\n    const auto& join = params.joins[i];\n    for (const auto& condition : join.conditions) {\n      size_t field_index = insert(condition.field, &needed_fields[i + 1]);\n\n      DCHECK(result.alias_to_index.contains(condition.foreign_field.first))\n          << \"Unknown foreign index alias: \" << condition.foreign_field.first;\n      size_t foreign_index = result.alias_to_index[condition.foreign_field.first];\n      DCHECK_LE(foreign_index, i) << \"Foreign index alias out of range: \"\n                                  << condition.foreign_field.first;\n\n      size_t foreign_field_index =\n          insert(condition.foreign_field.second, &needed_fields[foreign_index]);\n\n      // Update joins for this index\n      result.joins_per_index[i + 1].emplace_back(\n          join::JoinExpression{field_index, foreign_index, foreign_field_index});\n    }\n  }\n\n  // Collect fields needed for sorting\n  // Max option will be temprorary ignored\n  if (params.join_agg_params.sort) {\n    for (const auto& sort_field : params.join_agg_params.sort.value().fields) {\n      auto [index_alias, field_name] = Split(sort_field.first, '.');\n\n      auto it = result.alias_to_index.find(index_alias);\n      if (it == result.alias_to_index.end()) {\n        return CreateSyntaxError(absl::StrCat(\"Unknown index alias '\", index_alias,\n                                              \"' in the SORTBY option. Field: '\", field_name, \"'\"));\n      }\n\n      size_t index = it->second;\n      size_t field_index = insert(field_name, &needed_fields[index]);\n      result.sort_params.push_back(\n          PreprocessedJoinData::SortParam{index, field_index, sort_field.second});\n    }\n  }\n\n  // Map them to the result.needed_fields\n  for (size_t i = 0; i <= n; ++i) {\n    auto& from = needed_fields[i];\n    auto& to = result.needed_fields[i];\n\n    to.resize(from.size());\n    for (const auto& [field_name, field_index] : from) {\n      to[field_index] = field_name;\n    }\n  }\n\n  // Initialize fields_to_load_per_index\n  for (const auto& field : params.load_fields.value_or(std::vector<FieldReference>{})) {\n    auto [index_alias, field_name] = Split(field.Name(), '.');\n\n    auto it = result.alias_to_index.find(index_alias);\n    if (it == result.alias_to_index.end()) {\n      return CreateSyntaxError(absl::StrCat(\"Unknown index alias '\", index_alias,\n                                            \"' in the LOAD option. Field: '\", field_name, \"'\"));\n    }\n\n    result.fields_to_load_per_index[it->second].emplace_back(field_name);\n  }\n\n  return result;\n}\n\n// Merge preaggregated results from all shards for each index\njoin::Vector<join::Vector<join::Entry>> MergePreaggregatedShardJoinData(\n    absl::Span<const std::vector<join::Vector<join::OwnedEntry>>> preaggregated_shard_data) {\n  if (preaggregated_shard_data.empty()) {\n    return {};\n  }\n\n  // indexes_entries[i] contains the preaggregated data for index i\n  const size_t indexes_count = preaggregated_shard_data[0].size();\n  join::Vector<join::Vector<join::Entry>> indexes_entries(indexes_count);\n  for (size_t i = 0; i < indexes_count; ++i) {\n    auto& entries = indexes_entries[i];\n\n    size_t num_docs = 0;\n    for (size_t j = 0; j < shard_set->size(); ++j) {\n      num_docs += preaggregated_shard_data[j][i].size();\n    }\n\n    entries.reserve(num_docs);\n    for (size_t j = 0; j < shard_set->size(); ++j) {\n      for (const auto& entry : preaggregated_shard_data[j][i]) {\n        join::Vector<join::JoinableValue> field_values;\n        field_values.reserve(entry.second.size());\n\n        auto insert_copy = [&field_values](const auto& field_value) {\n          field_values.emplace_back(field_value);\n        };\n\n        for (const auto& field_value : entry.second) {\n          std::visit(insert_copy, field_value);\n        }\n\n        entries.emplace_back(entry.first, std::move(field_values));\n      }\n    }\n  }\n\n  return indexes_entries;\n}\n\njoin::Vector<join::Vector<join::Key>> DoJoin(\n    absl::Span<const std::vector<join::Vector<join::OwnedEntry>>> preaggregated_shard_data,\n    const AggregateParams& params, const PreprocessedJoinData& join_data) {\n  using join::KeyIndexes;\n\n  auto indexes_entries = MergePreaggregatedShardJoinData(preaggregated_shard_data);\n\n  auto sort_and_limit = [&](std::vector<KeyIndexes>* joined_entries) {\n    const size_t offset = params.join_agg_params.limit_offset;\n    const size_t total = params.join_agg_params.limit_total;\n    if (offset >= joined_entries->size()) {\n      joined_entries->clear();\n      return;\n    }\n\n    const auto& sort_params = join_data.sort_params;\n    auto comparator = [&](const KeyIndexes& l, const KeyIndexes& r) {\n      for (const auto& sort_param : sort_params) {\n        size_t index = sort_param.index;\n        const join::JoinableValue& l_value =\n            indexes_entries[index][l[index]].second[sort_param.field_index];\n        const join::JoinableValue& r_value =\n            indexes_entries[index][r[index]].second[sort_param.field_index];\n\n        if (l_value == r_value) {\n          continue;\n        }\n        return sort_param.order == SortOrder::ASC ? l_value < r_value : l_value > r_value;\n      }\n      return false;\n    };\n\n    size_t limit = offset + total;\n    if (!sort_params.empty()) {\n      if (limit >= joined_entries->size()) {\n        std::sort(joined_entries->begin(), joined_entries->end(), std::move(comparator));\n      } else {\n        std::partial_sort(joined_entries->begin(), joined_entries->begin() + limit,\n                          joined_entries->end(), std::move(comparator));\n        joined_entries->resize(limit);\n      }\n    }\n\n    size_t new_limit = std::min(limit, joined_entries->size());\n    if (offset) {\n      for (size_t i = offset; i < new_limit; ++i) {\n        auto& dest = (*joined_entries)[i - offset];\n        auto& src = (*joined_entries)[i];\n        DCHECK(dest.size() == src.size());\n        dest = std::move(src);\n      }\n    }\n\n    size_t new_size = std::min(total, joined_entries->size() - offset);\n    joined_entries->resize(new_size);\n  };\n\n  return join::JoinAllIndexes(indexes_entries, join_data.joins_per_index, sort_and_limit);\n}\n\nstd::vector<aggregate::DocValues> MergeJoinedKeysWithData(\n    const AggregateParams& agg_params, const PreprocessedJoinData& join_data,\n    absl::Span<const join::Vector<join::Key>> joined_entries,\n    absl::Span<const std::vector<ShardDocIndex::FieldsValuesPerDocId>> shard_keys_data) {\n  std::vector<aggregate::DocValues> merged_data;\n  merged_data.reserve(joined_entries.size());\n\n  const size_t indexes_count = join_data.indexes.size();\n  const auto& fields_per_index = join_data.fields_to_load_per_index;\n\n  for (const auto& entry : joined_entries) {\n    aggregate::DocValues doc_values;\n\n    // First reserve space for the total number of fields\n    size_t docs_count = 0;\n    for (size_t i = 0; i < indexes_count; ++i) {\n      docs_count += fields_per_index[i].size();\n    }\n    doc_values.reserve(docs_count);\n\n    for (size_t i = 0; i < indexes_count; ++i) {\n      std::string_view index_alias =\n          (i == 0) ? agg_params.index : agg_params.joins[i - 1].index_alias;\n\n      const auto [shard_id, doc_id] = entry[i];\n      const auto& field_values_per_doc_id = shard_keys_data[shard_id][i];\n\n      auto it = field_values_per_doc_id.find(doc_id);\n      if (it == field_values_per_doc_id.end()) {\n        /* This doc id was joined but not found on the second step. This can happen due to\n         * expiration for example. For now, just skip it */\n        continue;\n      }\n\n      const auto& field_values = it->second;\n\n      for (size_t j = 0; j < fields_per_index[i].size(); ++j) {\n        std::string_view field_alias = fields_per_index[i][j];  // tmp alias is identifier\n        doc_values.emplace(absl::StrCat(index_alias, \".\"sv, field_alias), field_values[j]);\n      }\n    }\n\n    merged_data.push_back(std::move(doc_values));\n  }\n  return merged_data;\n}\n\nauto SortableValueSender(RedisReplyBuilder* rb) {\n  return Overloaded{\n      [rb](monostate) { rb->SendNull(); },\n      [rb](double d) { rb->SendDouble(d); },\n      [rb](const string& s) { rb->SendBulkString(s); },\n  };\n}\n\nvoid SendSerializedDoc(const SerializedSearchDoc& doc, SinkReplyBuilder* builder) {\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  auto sortable_value_sender = SortableValueSender(rb);\n\n  rb->StartCollection(doc.values.size(), CollectionType::MAP);\n  for (const auto& [k, v] : doc.values) {\n    rb->SendBulkString(k);\n    visit(sortable_value_sender, v);\n  }\n}\n\ntemplate <typename T>\nvoid PartialSort(absl::Span<SerializedSearchDoc*> docs, size_t limit, SortOrder order,\n                 T SerializedSearchDoc::*field) {\n  auto cb = [order, field](SerializedSearchDoc* l, SerializedSearchDoc* r) {\n    return order == SortOrder::ASC ? l->*field < r->*field : r->*field < l->*field;\n  };\n  partial_sort(docs.begin(), docs.begin() + min(limit, docs.size()), docs.end(), cb);\n}\n\nvoid SearchReply(const SearchParams& params,\n                 std::optional<search::KnnScoreSortOption> knn_sort_option,\n                 absl::Span<SearchResult> results, SinkReplyBuilder* builder, bool is_css) {\n  size_t total_hits = 0;\n  absl::InlinedVector<SerializedSearchDoc*, 5> docs;\n  docs.reserve(results.size());\n  for (auto& shard_results : results) {\n    total_hits += shard_results.total_hits;\n    for (auto& doc : shard_results.docs) {\n      docs.push_back(&doc);\n    }\n  }\n\n  // Reorder and cut KNN results before applying SORT and LIMIT\n  optional<string> knn_score_ret_field;\n  bool ignore_sort = false;\n  if (knn_sort_option) {\n    total_hits = min(total_hits, knn_sort_option->limit);\n    PartialSort(absl::MakeSpan(docs), total_hits, SortOrder::ASC, &SerializedSearchDoc::knn_score);\n    docs.resize(min(docs.size(), knn_sort_option->limit));\n\n    ignore_sort = !params.sort_option || params.sort_option->IsSame(*knn_sort_option);\n    if (params.ShouldReturnField(knn_sort_option->score_field_alias))\n      knn_score_ret_field = knn_sort_option->score_field_alias;\n  }\n\n  // Apply LIMIT\n  size_t offset = 0;\n  size_t limit = 0;\n  if (is_css) {\n    limit = std::min(docs.size(), params.limit_total + params.limit_offset);\n  } else {\n    offset = std::min(params.limit_offset, docs.size());\n    limit = std::min(docs.size() - offset, params.limit_total);\n  }\n  const size_t end = limit + offset;\n\n  // Apply SORTBY if its different from the KNN sort\n  if (params.sort_option && !ignore_sort)\n    PartialSort(absl::MakeSpan(docs), end, params.sort_option->order,\n                &SerializedSearchDoc::sort_score);\n\n  const bool reply_with_ids_only = params.IdsOnly();\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  const size_t items_per_field = (reply_with_ids_only ? 1 : 2) + params.with_sortkeys;\n  RedisReplyBuilder::ArrayScope scope{rb, limit * items_per_field + 1};\n\n  Overloaded sortable_value_sender{\n      [rb](monostate) { rb->SendNull(); },\n      [rb](double d) { rb->SendBulkString(absl::StrCat(\"#\", d)); },\n      [rb](const string& s) { rb->SendBulkString(\"$\" + s); },\n  };\n\n  rb->SendLong(total_hits);\n  for (size_t i = offset; i < end; i++) {\n    rb->SendBulkString(docs[i]->key);\n    if (params.with_sortkeys) {\n      visit(sortable_value_sender, docs[i]->sort_score);\n    }\n\n    if (!reply_with_ids_only) {\n      if (knn_score_ret_field)\n        docs[i]->values[*knn_score_ret_field] = docs[i]->knn_score;\n\n      SendSerializedDoc(*docs[i], builder);\n    }\n  }\n}\n\n// Warms up the query parser to avoid first-call slowness\nvoid WarmupQueryParser() {\n  static std::once_flag warmed_up;\n  std::call_once(warmed_up, []() {\n    search::QueryParams params;\n    search::QueryDriver driver{};\n    driver.SetParams(&params);\n    driver.SetInput(std::string{\"\"});\n    (void)search::Parser (&driver)();\n  });\n}\n\nvector<SearchResult> SearchGlobalHnswIndex(\n    const search::AstKnnNode* knn, const shared_ptr<search::HnswVectorIndex>& index,\n    const std::string_view index_name,\n    const std::optional<search::KnnScoreSortOption>& knn_score_option,\n    const std::vector<SearchResult>& sharded_prefilter_docs, const SearchParams& params,\n    const CommandContext& cmd_cntx) {\n  std::vector<SearchResult> results(1);\n\n  std::optional<std::vector<search::GlobalDocId>> prefilter_global_docs_ids = std::nullopt;\n\n  // Quick lookup to match global id to serialized doc\n  std::map<search::GlobalDocId, const SerializedSearchDoc*> prefilter_docs_lookup;\n\n  const bool has_prefilter_docs = knn->HasPreFilter();\n  const ShardId shard_size = sharded_prefilter_docs.size();\n\n  // We have pre filter docs so all documents should already be fetched\n  if (has_prefilter_docs) {\n    std::vector<search::GlobalDocId> global_doc_ids;\n    for (size_t shard_id = 0; shard_id < shard_size; shard_id++) {\n      for (auto& doc : sharded_prefilter_docs[shard_id].docs) {\n        auto global_doc_id = search::CreateGlobalDocId(shard_id, doc.id);\n        global_doc_ids.emplace_back(global_doc_id);\n        prefilter_docs_lookup[global_doc_id] = &doc;\n      }\n    }\n    prefilter_global_docs_ids = std::move(global_doc_ids);\n  }\n\n  // Search HNSW index\n  std::vector<std::pair<float, search::GlobalDocId>> knn_results;\n\n  if (prefilter_global_docs_ids) {\n    VLOG(1) << \"Searching HNSW index with prefilter size: \" << prefilter_global_docs_ids->size();\n    if (prefilter_global_docs_ids->size() < absl::GetFlag(FLAGS_subset_knn_search_threshold)) {\n      knn_results = index->SubsetKnn(knn->vec.first.get(), knn->limit, *prefilter_global_docs_ids);\n    } else {\n      knn_results =\n          index->Knn(knn->vec.first.get(), knn->limit, knn->ef_runtime, *prefilter_global_docs_ids);\n    }\n  } else {\n    knn_results = index->Knn(knn->vec.first.get(), knn->limit, knn->ef_runtime);\n  }\n\n  std::vector<SerializedSearchDoc> knn_search_serialized_docs;\n  knn_search_serialized_docs.reserve(knn_results.size());\n\n  // Serialized docs for each shard\n  std::vector<std::vector<SerializedSearchDoc>> shard_docs(shard_size);\n\n  for (const auto& [score, global_doc_id] : knn_results) {\n    if (has_prefilter_docs) {\n      knn_search_serialized_docs.emplace_back(*prefilter_docs_lookup[global_doc_id]);\n      knn_search_serialized_docs.back().knn_score = score;\n    } else {\n      // Create SerializedSearchDoc and fill only knn information\n      auto [shard_id, local_doc_id] = search::DecomposeGlobalDocId(global_doc_id);\n      SerializedSearchDoc doc;\n      doc.id = local_doc_id;\n      doc.knn_score = score;\n      shard_docs[shard_id].emplace_back(doc);\n    }\n  }\n\n  // If we have prefilter docs we don't need to fetch docs so can return early\n  if (has_prefilter_docs) {\n    results[0].total_hits = knn_search_serialized_docs.size();\n    results[0].docs = std::move(knn_search_serialized_docs);\n    return results;\n  }\n\n  // Do we need to set sort score\n  bool set_sort_score = params.sort_option && !params.sort_option->IsSame(*knn_score_option);\n\n  // Do we need to remove sort field from response\n  bool remove_sort_field = false;\n\n  std::optional<std::vector<FieldReference>> return_fields = params.return_fields;\n\n  // If we don't return all fields\n  if (return_fields) {\n    // We have sort_option and it's different than knn score\n    if (set_sort_score) {\n      bool found_sort_return_field = false;\n      for (const auto& return_field : *return_fields) {\n        if (params.sort_option->field.Name() == return_field.Name()) {\n          found_sort_return_field = true;\n          break;\n        }\n      }\n      // Sort return field is not found so we need to add it for request and\n      // remove this field in response\n      if (!found_sort_return_field) {\n        (*return_fields).push_back(params.sort_option->field);\n        remove_sort_field = true;\n      }\n    }\n  }\n\n  // Indicator if we serialized document on shard\n  std::vector<std::vector<bool>> shard_docs_serialized_indicator(shard_size);\n\n  // Fetch all docs from shards\n  cmd_cntx.tx()->ScheduleSingleHop([&](Transaction* t, EngineShard* es) {\n    auto* index = es->search_indices()->GetIndex(index_name);\n\n    // No index found or no docs on this shard\n    if (!index || shard_docs[es->shard_id()].empty()) {\n      return OpStatus::OK;\n    }\n\n    const auto& schema = index->GetInfo().base_index.schema;\n\n    // Resize shard with default `true` value\n    shard_docs_serialized_indicator[es->shard_id()].resize(shard_docs[es->shard_id()].size(), true);\n\n    for (size_t i = 0; i < shard_docs[es->shard_id()].size(); i++) {\n      auto& shard_doc = shard_docs[es->shard_id()][i];\n      if (auto doc =\n              index->SerializeDocWithKey(shard_doc.id, t->GetOpArgs(es), schema, return_fields);\n          doc) {\n        auto& [key, fields] = *doc;\n\n        // Handle sort_score and remove field if we don't need it\n        search::SortableValue sort_score = std::monostate{};\n        if (set_sort_score) {\n          sort_score = fields[params.sort_option->field.Name()];\n          if (remove_sort_field) {\n            fields.erase(params.sort_option->field.Name());\n          }\n        }\n        shard_doc.key = std::string{key};\n        shard_doc.values = std::move(fields);\n        shard_doc.sort_score = sort_score;\n      } else {\n        // If we couldn't serialize requested doc\n        shard_docs_serialized_indicator[es->shard_id()][i] = false;\n      }\n    }\n    return OpStatus::OK;\n  });\n\n  // Transform shard results back to\n  size_t shard_id = 0;\n  std::for_each(shard_docs.begin(), shard_docs.end(),\n                [&](const std::vector<SerializedSearchDoc>& shard) {\n                  for (size_t doc_index = 0; doc_index < shard.size(); ++doc_index) {\n                    // Check if we serialized doc\n                    if (shard_docs_serialized_indicator[shard_id][doc_index]) {\n                      knn_search_serialized_docs.push_back(shard[doc_index]);\n                    }\n                  }\n                  shard_id++;\n                });\n\n  results[0].total_hits = knn_search_serialized_docs.size();\n  results[0].docs = std::move(knn_search_serialized_docs);\n\n  return results;\n}\n\n// Search HNSW index for all documents within the given radius.\n// Similar to SearchGlobalHnswIndex but uses RangeQuery instead of Knn.\nvector<SearchResult> SearchGlobalHnswIndexRange(\n    const search::AstVectorRangeNode* range, const shared_ptr<search::HnswVectorIndex>& index,\n    string_view index_name, const std::optional<search::KnnScoreSortOption>& knn_score_option,\n    const SearchParams& params, const CommandContext& cmd_cntx) {\n  std::vector<SearchResult> results(1);\n  const ShardId shard_size = shard_set->size();\n\n  auto range_results = index->RangeQuery(range->vec.first.get(), static_cast<float>(range->radius));\n\n  std::vector<std::vector<SerializedSearchDoc>> shard_docs(shard_size);\n  for (const auto& [score, global_doc_id] : range_results) {\n    auto [shard_id, local_doc_id] = search::DecomposeGlobalDocId(global_doc_id);\n    SerializedSearchDoc doc;\n    doc.id = local_doc_id;\n    doc.knn_score = score;\n    shard_docs[shard_id].emplace_back(doc);\n  }\n\n  bool set_sort_score =\n      params.sort_option && (!knn_score_option || !params.sort_option->IsSame(*knn_score_option));\n  bool remove_sort_field = false;\n  std::optional<std::vector<FieldReference>> return_fields = params.return_fields;\n\n  if (set_sort_score && return_fields) {\n    bool found_sort_field = false;\n    for (const auto& rf : *return_fields) {\n      if (rf.Name() == params.sort_option->field.Name()) {\n        found_sort_field = true;\n        break;\n      }\n    }\n    if (!found_sort_field) {\n      return_fields->push_back(params.sort_option->field);\n      remove_sort_field = true;\n    }\n  }\n\n  cmd_cntx.tx()->ScheduleSingleHop([&](Transaction* t, EngineShard* es) {\n    auto* idx = es->search_indices()->GetIndex(index_name);\n    if (!idx || shard_docs[es->shard_id()].empty())\n      return OpStatus::OK;\n    const auto& schema = idx->GetInfo().base_index.schema;\n    for (auto& shard_doc : shard_docs[es->shard_id()]) {\n      if (auto doc =\n              idx->SerializeDocWithKey(shard_doc.id, t->GetOpArgs(es), schema, return_fields);\n          doc) {\n        auto& [key, fields] = *doc;\n        search::SortableValue sort_score = std::monostate{};\n        if (set_sort_score) {\n          sort_score = fields[params.sort_option->field.Name()];\n          if (remove_sort_field)\n            fields.erase(params.sort_option->field.Name());\n        }\n        shard_doc.key = std::string{key};\n        shard_doc.values = std::move(fields);\n        shard_doc.sort_score = sort_score;\n      }\n    }\n    return OpStatus::OK;\n  });\n\n  std::vector<SerializedSearchDoc> serialized_docs;\n  serialized_docs.reserve(range_results.size());\n  for (const auto& shard : shard_docs) {\n    for (const auto& doc : shard) {\n      if (!doc.key.empty())\n        serialized_docs.push_back(doc);\n    }\n  }\n\n  results[0].total_hits = serialized_docs.size();\n  results[0].docs = std::move(serialized_docs);\n  return results;\n}\n\n// Try creating global hnsw indices for given fields and return true on success\nbool CreateHnswIndices(std::string_view idx_name, const DocIndex& index) {\n  std::vector<std::string> created_vector_indices;\n  for (const auto& [field_ident, field_info] : index.schema.fields) {\n    if (!field_info.IsIndexableHnswField())\n      continue;\n\n    const auto& vparams = std::get<search::SchemaField::VectorParams>(field_info.special_params);\n\n    bool success = GlobalHnswIndexRegistry::Instance().Create(idx_name, field_info.short_name,\n                                                              vparams, index.type);\n    if (!success) {\n      // Clean created indices\n      for (const auto& cfname : created_vector_indices)\n        GlobalHnswIndexRegistry::Instance().Remove(idx_name, cfname);\n      return false;\n    }\n\n    created_vector_indices.emplace_back(field_info.short_name);\n  }\n  return true;\n}\n\n}  // namespace\n\nvoid CmdFtCreate(CmdArgList args, CommandContext* cmd_cntx) {\n  WarmupQueryParser();\n\n  auto* builder = cmd_cntx->rb();\n  if (cmd_cntx->server_conn_cntx()->conn_state.db_index != 0) {\n    return builder->SendError(\"Cannot create index on db != 0\"sv);\n  }\n\n  CmdArgParser parser{args};\n  string_view idx_name = parser.Next();\n\n  // Parse optional NX (Only create if not exists) parameter for internal usage\n  bool is_NX = parser.Check(\"NX\");\n\n  bool is_cross_shard = parser.Check(\"CSS\");\n\n  auto parsed_index = CreateDocIndex(idx_name, &parser);\n  if (SendErrorIfOccurred(parsed_index, &parser, cmd_cntx)) {\n    return;\n  }\n\n  // Check if index already exists\n  atomic_uint exists_cnt = 0;\n  cmd_cntx->tx()->Execute(\n      [idx_name, &exists_cnt](auto* tx, auto* es) {\n        if (es->search_indices()->GetIndex(idx_name) != nullptr)\n          exists_cnt.fetch_add(1, std::memory_order_relaxed);\n        return OpStatus::OK;\n      },\n      false);\n\n  DCHECK(exists_cnt == 0u || exists_cnt == shard_set->size());\n\n  if (exists_cnt.load(memory_order_relaxed) > 0) {\n    cmd_cntx->tx()->Conclude();\n    return is_NX ? builder->SendOk() : builder->SendError(\"Index already exists\");\n  }\n\n  if (absl::GetFlag(FLAGS_cluster_search) && !is_cross_shard && IsClusterEnabled()) {\n    std::string args_str = absl::StrJoin(args.subspan(1), \" \");\n    std::string cmd = absl::StrCat(\"FT.CREATE \", idx_name, \" CSS \", args_str);\n\n    // TODO add processing of the reply to make sure index was created successfully on all shards,\n    // and prevent simultaneous creation of the same index.\n    auto req_future = cluster::Coordinator::Current().DispatchAll(cmd, [](const RESPObj&) {});\n    // TODO add error handling\n    CHECK(!req_future.Get());\n  }\n\n  if (!CreateHnswIndices(idx_name, *parsed_index)) {\n    cmd_cntx->tx()->Conclude();\n    return builder->SendError(\"Index already exists\");\n  }\n\n  auto idx_ptr = make_shared<DocIndex>(std::move(parsed_index).value());\n  cmd_cntx->tx()->Execute(\n      [idx_name, idx_ptr](auto* tx, auto* es) {\n        es->search_indices()->InitIndex(tx->GetOpArgs(es), idx_name, idx_ptr);\n        return OpStatus::OK;\n      },\n      true);\n\n  builder->SendOk();\n}\n\nvoid CmdFtAlter(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view idx_name = parser.Next();\n  parser.ExpectTag(\"SCHEMA\");\n  parser.ExpectTag(\"ADD\");\n  auto* builder = cmd_cntx->rb();\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  // First, extract existing index info\n  shared_ptr<DocIndex> index_info;\n  auto idx_cb = [idx_name, &index_info](auto* tx, EngineShard* es) {\n    if (es->shard_id() > 0)  // all shards have the same data, fetch from first\n      return OpStatus::OK;\n\n    if (auto* idx = es->search_indices()->GetIndex(idx_name); idx != nullptr)\n      index_info = make_shared<DocIndex>(idx->GetInfo().base_index);\n    return OpStatus::OK;\n  };\n  cmd_cntx->tx()->Execute(idx_cb, false);\n\n  if (!index_info) {\n    cmd_cntx->tx()->Conclude();\n    return cmd_cntx->SendError(\"Index not found\");\n  }\n\n  // Parse additional schema\n  DocIndex new_index{};\n  new_index.type = index_info->type;\n  auto parse_result = ParseSchema(&parser, &new_index);\n  if (SendErrorIfOccurred(parse_result, &parser, cmd_cntx)) {\n    cmd_cntx->tx()->Conclude();\n    return;\n  }\n\n  auto& new_fields = new_index.schema;\n\n  // For logging we copy the whole schema\n  // TODO: Use a more efficient way for logging\n  LOG(INFO) << \"Adding \"\n            << DocIndexInfo{.base_index = new_index, .hnsw_metadata = {}}.BuildRestoreCommand();\n\n  // Merge schemas\n  search::Schema& schema = index_info->schema;\n  schema.fields.insert(new_fields.fields.begin(), new_fields.fields.end());\n  schema.field_names.insert(new_fields.field_names.begin(), new_fields.field_names.end());\n\n  // Rebuild index\n  // TODO: Introduce partial rebuild\n  auto upd_cb = [idx_name, index_info](Transaction* tx, EngineShard* es) {\n    (void)es->search_indices()->DropIndex(idx_name);\n    es->search_indices()->InitIndex(tx->GetOpArgs(es), idx_name, index_info);\n    return OpStatus::OK;\n  };\n  cmd_cntx->tx()->Execute(upd_cb, true);\n\n  builder->SendOk();\n}\n\nvoid CmdFtDropIndex(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view idx_name = ArgS(args, 0);\n\n  // Parse optional DD (Delete Documents) parameter\n  bool delete_docs = args.size() > 1 && absl::EqualsIgnoreCase(args[1], \"DD\");\n\n  shared_ptr<DocIndex> index_info;\n  atomic_uint num_deleted{0};\n\n  auto cb = [&](Transaction* t, EngineShard* es) {\n    // Get index info from first shard for global cleanup\n    if (es->shard_id() == 0) {\n      if (auto* idx = es->search_indices()->GetIndex(idx_name); idx != nullptr) {\n        index_info = make_shared<DocIndex>(idx->GetInfo().base_index);\n      }\n    }\n    // Drop the index and get its pointer\n    auto index = es->search_indices()->DropIndex(idx_name);\n    if (!index)\n      return OpStatus::OK;\n\n    num_deleted.fetch_add(1);\n\n    // If DD is set, delete all documents that were in the index\n    if (delete_docs) {\n      // Get const reference to document keys map (index will be destroyed after this scope)\n      const auto& doc_keys = index->key_index().GetDocKeysMap();\n\n      auto op_args = t->GetOpArgs(es);\n      auto& db_slice = op_args.GetDbSlice();\n\n      for (const auto& [key, doc_id] : doc_keys) {\n        auto it = db_slice.FindMutable(op_args.db_cntx, key).it;\n        if (IsValid(it)) {\n          db_slice.Del(op_args.db_cntx, it);\n        }\n      }\n    }\n\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(cb, true);\n\n  if (index_info) {\n    for (const auto& [field_ident, field_info] : index_info->schema.fields) {\n      if (field_info.type == search::SchemaField::VECTOR &&\n          !(field_info.flags & search::SchemaField::NOINDEX)) {\n        if (GlobalHnswIndexRegistry::Instance().Remove(idx_name, field_info.short_name)) {\n          num_deleted.fetch_add(1);\n        }\n      }\n    }\n  }\n\n  if (num_deleted == 0u)\n    return cmd_cntx->SendError(IndexNotFoundMsg(idx_name));\n  return cmd_cntx->rb()->SendOk();\n}\n\nvoid CmdFtInfo(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view idx_name = ArgS(args, 0);\n\n  vector<DocIndexInfo> infos(shard_set->size());\n\n  cmd_cntx->tx()->ScheduleSingleHop([&](Transaction* t, EngineShard* es) {\n    auto* index = es->search_indices()->GetIndex(idx_name);\n    if (index != nullptr)\n      infos[es->shard_id()] = index->GetInfo();\n    return OpStatus::OK;\n  });\n\n  // Count how many shards didn't find the index by checking empty entries.\n  size_t num_notfound = std::count_if(infos.begin(), infos.end(), [](const DocIndexInfo& info) {\n    return info.base_index.schema.fields.empty();\n  });\n\n  DCHECK(num_notfound == 0u || num_notfound == shard_set->size());\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (num_notfound > 0u)\n    return rb->SendError(IndexNotFoundMsg(idx_name));\n\n  DCHECK(infos.front().base_index.schema.fields.size() ==\n         infos.back().base_index.schema.fields.size());\n\n  bool indexing = false;\n  float percent_indexed = 1.0;\n  size_t total_num_docs = 0;\n  for (const auto& info : infos) {\n    total_num_docs += info.num_docs;\n    indexing |= info.indexing;\n    percent_indexed = std::min(percent_indexed, info.percent_indexed);\n  }\n\n  const auto& info = infos.front();\n  const auto& schema = info.base_index.schema;\n\n  rb->StartCollection(7, CollectionType::MAP);\n\n  rb->SendSimpleString(\"index_name\");\n  rb->SendSimpleString(idx_name);\n\n  rb->SendSimpleString(\"index_definition\");\n  {\n    rb->StartCollection(3, CollectionType::MAP);\n    rb->SendSimpleString(\"key_type\");\n    rb->SendSimpleString(info.base_index.type == DocIndex::JSON ? \"JSON\" : \"HASH\");\n    rb->SendSimpleString(\"prefixes\");\n    rb->StartArray(info.base_index.prefixes.size());\n    for (const auto& prefix : info.base_index.prefixes) {\n      rb->SendBulkString(prefix);\n    }\n    rb->SendSimpleString(\"default_score\");\n    rb->SendLong(1);\n  }\n\n  rb->SendSimpleString(\"index_options\");\n  rb->SendEmptyArray();\n\n  rb->SendSimpleString(\"attributes\");\n  rb->StartArray(schema.fields.size());\n  for (const auto& [field_ident, field_info] : schema.fields) {\n    vector<string> info;\n\n    string_view base[] = {\"identifier\"sv, string_view{field_ident},\n                          \"attribute\"sv,  field_info.short_name,\n                          \"type\"sv,       SearchFieldTypeToString(field_info.type)};\n    info.insert(info.end(), base, base + ABSL_ARRAYSIZE(base));\n\n    if (field_info.flags & search::SchemaField::NOINDEX)\n      info.emplace_back(\"NOINDEX\"sv);\n\n    if (field_info.flags & search::SchemaField::SORTABLE)\n      info.emplace_back(\"SORTABLE\"sv);\n\n    if (field_info.type == search::SchemaField::NUMERIC) {\n      auto& numeric_params =\n          std::get<search::SchemaField::NumericParams>(field_info.special_params);\n      info.emplace_back(\"blocksize\"sv);\n      info.emplace_back(std::to_string(numeric_params.block_size));\n    }\n\n    rb->SendSimpleStrArr(info);\n  }\n\n  rb->SendSimpleString(\"num_docs\");\n  rb->SendLong(total_num_docs);\n\n  rb->SendSimpleString(\"indexing\");\n  rb->SendLong(indexing ? 1 : 0);\n\n  rb->SendSimpleString(\"percent_indexed\");\n  rb->SendDouble(percent_indexed);\n}\n\nvoid CmdFtList(CmdArgList args, CommandContext* cmd_cntx) {\n  atomic_int first{0};\n  vector<string> names;\n\n  cmd_cntx->tx()->ScheduleSingleHop([&](Transaction* t, EngineShard* es) {\n    // Using `first` to assign `names` only once without a race\n    if (first.fetch_add(1) == 0)\n      names = es->search_indices()->GetIndexNames();\n    return OpStatus::OK;\n  });\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->SendBulkStrArr(names);\n}\n\nstatic vector<SearchResult> FtSearchCSS(std::string_view idx, std::string_view query,\n                                        std::string_view args_str, const SearchParams& params) {\n  vector<SearchResult> results;\n  const bool sorted = params.sort_option.has_value();\n  const std::string_view with_sortkeys = sorted && !params.with_sortkeys ? \" WITHSORTKEYS\"sv : \"\"sv;\n  std::string cmd = absl::StrCat(\"FT.SEARCH \", idx, \" \", query, \" CSS \", args_str, with_sortkeys);\n\n  util::fb2::Mutex mu_;\n  auto req_future = cluster::Coordinator::Current().DispatchAll(cmd, [&](const RESPObj& resp_obj) {\n    RESPIterator it{resp_obj};\n    const auto size = it.Next<uint64_t>();\n\n    std::lock_guard lock{mu_};\n    auto& res = results.emplace_back();\n    results.back().total_hits = size;\n\n    while (it.HasNext()) {\n      auto& search_doc = res.docs.emplace_back();\n      search_doc.key = it.Next<std::string>();\n      if (sorted) {\n        auto sort_score = it.Next<std::string_view>();\n        if (sort_score.empty() || (sort_score[0] != '#' && sort_score[0] != '$')) {\n          it.SetError();\n          break;\n        }\n        if (sort_score[0] == '#') {  // It's a double\n          double sort_res = 0;\n          if (ParseDouble(sort_score.substr(1), &sort_res)) {\n            search_doc.sort_score = sort_res;\n          } else {\n            it.SetError();\n            break;\n          }\n        } else {  // It's a string\n          search_doc.sort_score = std::string(sort_score.substr(1));\n        }\n      }\n\n      for (auto arr_fields = it.Next<RESPIterator>(); arr_fields.HasNext();) {\n        auto [key, value] = arr_fields.Next<std::string, std::string>();\n        search_doc.values.emplace(std::move(key), std::move(value));\n      }\n    }\n    if (it.HasError()) {\n      LOG(ERROR) << \"FT.SEARCH CSS reply parsing error: \" << resp_obj;\n    }\n  });\n  // TODO add error handling\n  CHECK(!req_future.Get());\n  return results;\n}\n\nvoid CmdFtSearch(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view index_name = parser.Next();\n  string_view query_str = parser.Next();\n\n  bool is_cross_shard = parser.Check(\"CSS\");\n\n  auto* builder = cmd_cntx->rb();\n  auto params = ParseSearchParams(&parser);\n  if (SendErrorIfOccurred(params, &parser, cmd_cntx))\n    return;\n\n  // Check query string length limit\n  size_t max_query_bytes = absl::GetFlag(FLAGS_search_query_string_bytes);\n  if (query_str.size() > max_query_bytes) {\n    return builder->SendError(\n        absl::StrCat(\"Query string is too long, max length is \", max_query_bytes, \" bytes\"));\n  }\n\n  vector<SearchResult> css_docs;\n  if (absl::GetFlag(FLAGS_cluster_search) && !is_cross_shard && IsClusterEnabled()) {\n    std::string args_str = absl::StrJoin(args.subspan(2), \" \");\n\n    css_docs = FtSearchCSS(index_name, query_str, args_str, *params);\n  }\n\n  search::SearchAlgorithm search_algo;\n  if (!search_algo.Init(query_str, &params->query_params, &params->optional_filters))\n    return builder->SendError(\"Query syntax error\");\n\n  std::unique_ptr<search::AstNode> knn_node;\n  search::AstKnnNode* knn = nullptr;\n\n  if (search_algo.IsKnnQuery()) {\n    // Check if it is HNSW node\n    if (GlobalHnswIndexRegistry::Instance().Exist(index_name, search_algo.GetKnnNode()->field)) {\n      knn_node = search_algo.PopKnnNode();\n      knn = std::get_if<search::AstKnnNode>(knn_node.get());\n    }\n  }\n\n  // Check for HNSW vector range query (mutually exclusive with KNN)\n  const search::AstVectorRangeNode* hnsw_range = nullptr;\n  if (!knn) {\n    if (auto* vr = search_algo.GetVectorRangeNode(); vr != nullptr) {\n      if (GlobalHnswIndexRegistry::Instance().Exist(index_name, vr->field))\n        hnsw_range = vr;\n    }\n  }\n\n  // Because our coordinator thread may not have a shard, we can't check ahead if the index exists.\n  atomic<bool> index_not_found{false};\n  vector<SearchResult> docs(shard_set->size());\n\n  const bool knn_has_prefilter = knn && knn->HasPreFilter();\n  bool empty_prefilter_result = true;\n\n  // If the query does not contain knn component, or it is a hybrid query.\n  // HNSW vector range has no prefilter, so skip per-shard search entirely.\n  if ((!knn || knn_has_prefilter) && !hnsw_range) {\n    cmd_cntx->tx()->ScheduleSingleHop([&](Transaction* t, EngineShard* es) {\n      if (auto* index = es->search_indices()->GetIndex(index_name); index)\n        docs[es->shard_id()] =\n            index->Search(t->GetOpArgs(es), *params, &search_algo, knn_has_prefilter);\n      else\n        index_not_found.store(true, memory_order_relaxed);\n      return OpStatus::OK;\n    });\n\n    if (index_not_found.load(memory_order_relaxed))\n      return cmd_cntx->SendError(string{index_name} + \": no such index\");\n\n    for (const auto& res : docs) {\n      empty_prefilter_result &= res.docs.empty();\n      if (res.error)\n        return cmd_cntx->SendError(*res.error);\n    }\n  }\n\n  if (knn_node && (!knn_has_prefilter || !empty_prefilter_result)) {\n    auto hnsw_index = GlobalHnswIndexRegistry::Instance().Get(index_name, knn->field);\n    if (!hnsw_index) {\n      return builder->SendError(string{index_name} + \": no such global hnsw index\");\n    }\n    docs = SearchGlobalHnswIndex(knn, hnsw_index, index_name, search_algo.GetKnnScoreSortOption(),\n                                 docs, *params, *cmd_cntx);\n  }\n\n  auto knn_sort_option = search_algo.GetKnnScoreSortOption();\n\n  if (hnsw_range) {\n    auto hnsw_index = GlobalHnswIndexRegistry::Instance().Get(index_name, hnsw_range->field);\n    if (!hnsw_index) {\n      return builder->SendError(string{index_name} + \": no such global hnsw index\");\n    }\n    if (hnsw_range->vec.second == 0) {\n      return builder->SendError(\"Parse error of vector parameters\");\n    }\n    if (hnsw_range->radius < 0 || std::isnan(hnsw_range->radius)) {\n      return builder->SendError(\n          absl::StrCat(\"VECTOR_RANGE radius must be non-negative, got: \", hnsw_range->radius));\n    }\n    if (hnsw_index->GetDim() != hnsw_range->vec.second) {\n      return builder->SendError(\n          absl::StrCat(\"Wrong vector index dimensions, got: \", hnsw_range->vec.second,\n                       \", expected: \", hnsw_index->GetDim()));\n    }\n    if (!hnsw_range->score_alias.empty())\n      knn_sort_option =\n          search::KnnScoreSortOption{hnsw_range->score_alias, std::numeric_limits<size_t>::max()};\n    docs = SearchGlobalHnswIndexRange(hnsw_range, hnsw_index, index_name, knn_sort_option, *params,\n                                      *cmd_cntx);\n  }\n\n  // TODO add merging of CSS results with local results (SORT, LIMIT, etc)\n  docs.insert(docs.end(), std::make_move_iterator(css_docs.begin()),\n              std::make_move_iterator(css_docs.end()));\n\n  SearchReply(*params, knn_sort_option, absl::MakeSpan(docs), builder, is_cross_shard);\n}\n\nvoid CmdFtProfile(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n\n  string_view index_name = parser.Next();\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (!parser.Check(\"SEARCH\") && !parser.Check(\"AGGREGATE\")) {\n    return rb->SendError(\"no `SEARCH` or `AGGREGATE` provided\");\n  }\n\n  parser.Check(\"LIMITED\");  // TODO: Implement limited profiling\n  parser.ExpectTag(\"QUERY\");\n\n  string_view query_str = parser.Next();\n\n  auto params = ParseSearchParams(&parser);\n  if (SendErrorIfOccurred(params, &parser, cmd_cntx))\n    return;\n\n  search::SearchAlgorithm search_algo;\n  if (!search_algo.Init(query_str, &params->query_params))\n    return cmd_cntx->SendError(\"query syntax error\");\n\n  search_algo.EnableProfiling();\n\n  absl::Time start = absl::Now();\n  const size_t shards_count = shard_set->size();\n\n  // Because our coordinator thread may not have a shard, we can't check ahead if the index exists.\n  std::atomic<bool> index_not_found{false};\n  std::vector<SearchResult> search_results(shards_count);\n  std::vector<absl::Duration> profile_results(shards_count);\n\n  cmd_cntx->tx()->ScheduleSingleHop([&](Transaction* t, EngineShard* es) {\n    auto* index = es->search_indices()->GetIndex(index_name);\n    if (!index) {\n      index_not_found.store(true, memory_order_relaxed);\n      return OpStatus::OK;\n    }\n\n    const ShardId shard_id = es->shard_id();\n\n    auto shard_start = absl::Now();\n    search_results[shard_id] = index->Search(t->GetOpArgs(es), *params, &search_algo, false);\n    profile_results[shard_id] = {absl::Now() - shard_start};\n\n    return OpStatus::OK;\n  });\n\n  if (index_not_found.load())\n    return rb->SendError(std::string{index_name} + \": no such index\");\n\n  auto took = absl::Now() - start;\n\n  bool result_is_empty = false;\n  size_t total_docs = 0;\n  size_t total_serialized = 0;\n  for (const auto& result : search_results) {\n    if (!result.error) {\n      total_docs += result.total_hits;\n      total_serialized += result.docs.size();\n    } else {\n      result_is_empty = true;\n    }\n  }\n\n  // First element -> Result of the search command\n  // Second element -> Profile information\n  rb->StartArray(2);\n\n  // Result of the search command\n  if (!result_is_empty) {\n    SearchReply(*params, search_algo.GetKnnScoreSortOption(), absl::MakeSpan(search_results), rb,\n                false);\n  } else {\n    rb->StartArray(1);\n    rb->SendLong(0);\n  }\n\n  // Profile information\n  rb->StartArray(shards_count + 1);\n\n  // General stats\n  rb->StartCollection(3, CollectionType::MAP);\n  rb->SendBulkString(\"took\");\n  rb->SendLong(absl::ToInt64Microseconds(took));\n  rb->SendBulkString(\"hits\");\n  rb->SendLong(static_cast<long>(total_docs));\n  rb->SendBulkString(\"serialized\");\n  rb->SendLong(static_cast<long>(total_serialized));\n\n  // Per-shard stats\n  for (size_t shard_id = 0; shard_id < shards_count; shard_id++) {\n    rb->StartCollection(2, CollectionType::MAP);\n    rb->SendBulkString(\"took\");\n    rb->SendLong(absl::ToInt64Microseconds(profile_results[shard_id]));\n    rb->SendBulkString(\"tree\");\n\n    const auto& search_result = search_results[shard_id];\n    if (search_result.error || !search_result.profile || search_result.profile->events.empty()) {\n      rb->SendEmptyArray();\n      continue;\n    }\n\n    const auto& events = search_result.profile->events;\n    for (size_t i = 0; i < events.size(); i++) {\n      const auto& event = events[i];\n\n      size_t children = 0;\n      size_t children_micros = 0;\n      for (size_t j = i + 1; j < events.size(); j++) {\n        if (events[j].depth == event.depth)\n          break;\n        if (events[j].depth == event.depth + 1) {\n          children++;\n          children_micros += events[j].micros;\n        }\n      }\n\n      rb->StartCollection(4 + (children > 0), CollectionType::MAP);\n      rb->SendSimpleString(\"total_time\");\n      rb->SendLong(event.micros);\n      rb->SendSimpleString(\"operation\");\n      rb->SendSimpleString(event.descr);\n      rb->SendSimpleString(\"self_time\");\n      rb->SendLong(event.micros - children_micros);\n      rb->SendSimpleString(\"procecssed\");\n      rb->SendLong(event.num_processed);\n\n      if (children > 0) {\n        rb->SendSimpleString(\"children\");\n        rb->StartArray(children);\n      }\n    }\n  }\n}\n\nvoid CmdFtTagVals(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view index_name = ArgS(args, 0);\n  string_view field_name = ArgS(args, 1);\n  VLOG(1) << \"FtTagVals: \" << index_name << \" \" << field_name;\n\n  vector<io::Result<StringVec, ErrorReply>> shard_results(shard_set->size(), StringVec{});\n\n  cmd_cntx->tx()->ScheduleSingleHop([&](Transaction* t, EngineShard* es) {\n    if (auto* index = es->search_indices()->GetIndex(index_name); index)\n      shard_results[es->shard_id()] = index->GetTagVals(field_name);\n    else\n      shard_results[es->shard_id()] =\n          nonstd::make_unexpected(ErrorReply(IndexNotFoundMsg(index_name)));\n\n    return OpStatus::OK;\n  });\n\n  absl::flat_hash_set<string> result_set;\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  // Check first if either shard had errors. Also merge the results into a single set.\n  for (auto& res : shard_results) {\n    if (res) {\n      result_set.insert(make_move_iterator(res->begin()), make_move_iterator(res->end()));\n    } else {\n      res.error().kind = facade::kSearchErrType;\n      return cmd_cntx->SendError(res.error());\n    }\n  }\n\n  shard_results.clear();\n  vector<string> vec(result_set.begin(), result_set.end());\n\n  rb->SendBulkStrArr(vec, CollectionType::SET);\n}\n\nvoid CmdFtAggregate(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  auto* builder = cmd_cntx->rb();\n\n  const auto params = ParseAggregatorParams(&parser);\n  if (SendErrorIfOccurred(params, &parser, cmd_cntx))\n    return;\n\n  // Check query string length limit\n  size_t max_query_bytes = absl::GetFlag(FLAGS_search_query_string_bytes);\n  if (params->query.size() > max_query_bytes) {\n    return builder->SendError(\n        absl::StrCat(\"Query string is too long, max length is \", max_query_bytes, \" bytes\"));\n  }\n\n  std::vector<aggregate::DocValues> values;\n\n  if (params->joins.empty()) {\n    search::SearchAlgorithm search_algo;\n    if (!search_algo.Init(params->query, &params->params))\n      return builder->SendError(\"Query syntax error\");\n\n    using ResultContainer = decltype(declval<ShardDocIndex>().SearchForAggregator(\n        declval<OpArgs>(), params.value(), &search_algo));\n\n    vector<ResultContainer> query_results(shard_set->size());\n\n    cmd_cntx->tx()->ScheduleSingleHop([&](Transaction* t, EngineShard* es) {\n      if (auto* index = es->search_indices()->GetIndex(params->index); index) {\n        query_results[es->shard_id()] =\n            index->SearchForAggregator(t->GetOpArgs(es), params.value(), &search_algo);\n      }\n      return OpStatus::OK;\n    });\n\n    // ResultContainer is absl::flat_hash_map<std::string, search::SortableValue>\n    // DocValues is absl::flat_hash_map<std::string_view, SortableValue>\n    // Keys of values should point to the keys of the query_results\n    size_t total_values = 0;\n    for (const auto& sub_results : query_results) {\n      total_values += sub_results.size();\n    }\n\n    values.reserve(total_values);\n    for (auto& sub_results : query_results) {\n      for (auto& docs : sub_results) {\n        aggregate::DocValues doc_value;\n        for (auto& doc : docs) {\n          doc_value[doc.first] = std::move(doc.second);\n        }\n        values.emplace_back(std::move(doc_value));\n      }\n    }\n  } else {\n    const size_t indexes_count = params->joins.size() + 1;\n\n    std::vector<search::SearchAlgorithm> search_algos(indexes_count);\n    if (!search_algos[0].Init(params->query, &params->params)) {\n      return builder->SendError(\"Query syntax error\");\n    }\n\n    for (size_t i = 0; i < params->joins.size(); ++i) {\n      // Check join query string length limit\n      if (params->joins[i].query.size() > max_query_bytes) {\n        return cmd_cntx->SendError(absl::StrCat(\"Join query string is too long, max length is \",\n                                                max_query_bytes, \" bytes\"));\n      }\n\n      search::QueryParams empty_params;\n      if (!search_algos[i + 1].Init(params->joins[i].query, &empty_params)) {\n        return cmd_cntx->SendError(\"Query syntax error in JOIN\");\n      }\n    }\n\n    auto data_for_join = PreprocessDataForJoin(params->index, *params);\n    if (!data_for_join) {\n      return cmd_cntx->SendError(data_for_join.error());\n    }\n\n    // preaggregated_shard_data is preaggregation results per index per shard\n    // preaggregated_shard_data[shard_id][i] is the results of index i on shard shard_id\n    using JoinDataVector = join::Vector<join::OwnedEntry>;\n    std::vector<std::vector<JoinDataVector>> preaggregated_shard_data(\n        shard_set->size(), std::vector<JoinDataVector>(indexes_count));\n    cmd_cntx->tx()->Execute(\n        [&](Transaction* t, EngineShard* es) {\n          auto& shard_data = preaggregated_shard_data[es->shard_id()];\n          for (size_t i = 0; i < indexes_count; ++i) {\n            if (auto* index = es->search_indices()->GetIndex(data_for_join->indexes[i]); index) {\n              shard_data[i] = index->PreagregateDataForJoin(\n                  t->GetOpArgs(es), data_for_join->needed_fields[i], &search_algos[i]);\n            }\n          }\n          return OpStatus::OK;\n        },\n        false);\n\n    // Do join\n    auto joined_entries = DoJoin(preaggregated_shard_data, *params, *data_for_join);\n\n    // Collect doc_ids per index that were joined\n    // Each shard stores set of doc_ids per each index that was joined\n    using DocIdsSet = absl::flat_hash_set<search::DocId>;\n    std::vector<std::vector<DocIdsSet>> doc_ids_per_shard(shard_set->size(),\n                                                          std::vector<DocIdsSet>(indexes_count));\n    for (const auto& entry : joined_entries) {\n      for (size_t index = 0; index < indexes_count; index++) {\n        const auto [shard_id, doc_id] = entry[index];\n        doc_ids_per_shard[shard_id][index].insert(doc_id);\n      }\n    }\n\n    // Load fields for keys that were joined\n    std::vector<std::vector<ShardDocIndex::FieldsValuesPerDocId>> shard_keys_data_per_index(\n        shard_set->size(), std::vector<ShardDocIndex::FieldsValuesPerDocId>(indexes_count));\n    cmd_cntx->tx()->Execute(\n        [&](Transaction* t, EngineShard* es) {\n          const ShardId shard_id = es->shard_id();\n          auto& shard_keys_data = shard_keys_data_per_index[shard_id];\n          const auto& doc_ids_per_index = doc_ids_per_shard[shard_id];\n\n          for (size_t i = 0; i < indexes_count; ++i) {\n            if (auto* index = es->search_indices()->GetIndex(data_for_join->indexes[i]); index) {\n              shard_keys_data[i] = index->LoadKeysData(t->GetOpArgs(es), doc_ids_per_index[i],\n                                                       data_for_join->fields_to_load_per_index[i]);\n            }\n          }\n          return OpStatus::OK;\n        },\n        true);\n\n    // Now we have sets of keys that were joined and keys data.\n    // We need to build DocValues for each joined set.\n    values =\n        MergeJoinedKeysWithData(*params, *data_for_join, joined_entries, shard_keys_data_per_index);\n  }\n\n  std::vector<std::string_view> load_fields;\n  if (params->load_fields) {\n    load_fields.reserve(params->load_fields->size());\n    for (const auto& field : params->load_fields.value()) {\n      load_fields.push_back(field.OutputName());\n    }\n  }\n\n  auto agg_results = aggregate::Process(std::move(values), load_fields, params->steps);\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  auto sortable_value_sender = SortableValueSender(rb);\n\n  const size_t result_size = agg_results.values.size();\n  RedisReplyBuilder::ArrayScope scope{rb, result_size + 1};\n  rb->SendLong(result_size);\n\n  for (const auto& value : agg_results.values) {\n    size_t fields_count = 0;\n    for (const auto& field : agg_results.fields_to_print) {\n      if (value.find(field) != value.end()) {\n        fields_count++;\n      }\n    }\n\n    rb->StartArray(fields_count * 2);\n    for (const auto& field : agg_results.fields_to_print) {\n      auto it = value.find(field);\n      if (it != value.end()) {\n        rb->SendBulkString(field);\n        std::visit(sortable_value_sender, it->second);\n      }\n    }\n  }\n}\n\nvoid CmdFtSynDump(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view index_name = ArgS(args, 0);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  atomic_bool index_not_found{true};\n  // Store per-shard synonym data\n  vector<absl::flat_hash_map<std::string, absl::flat_hash_set<std::string>>> shard_term_groups(\n      shard_set->size());\n\n  // Collect synonym data from all shards\n  cmd_cntx->tx()->Execute(\n      [&](Transaction* t, EngineShard* es) {\n        auto* index = es->search_indices()->GetIndex(index_name);\n        if (!index)\n          return OpStatus::OK;\n\n        index_not_found.store(false, std::memory_order_relaxed);\n\n        // Get synonym data from current shard\n        const auto& groups = index->GetSynonyms().GetGroups();\n\n        // Build term -> group_ids mapping for this shard\n        auto& term_groups = shard_term_groups[es->shard_id()];\n        for (const auto& [group_id, group] : groups) {\n          for (const auto& term : group) {\n            term_groups[term].insert(group_id);\n          }\n        }\n\n        return OpStatus::OK;\n      },\n      true);\n\n  if (index_not_found.load(std::memory_order_relaxed))\n    return rb->SendError(\"Unknown index name\");\n\n  // Merge data from all shards into a single map\n  absl::flat_hash_map<std::string, absl::flat_hash_set<std::string>> merged_term_groups;\n  for (auto& shard_groups : shard_term_groups) {\n    for (auto& [term, group_ids] : shard_groups) {\n      auto& merged_ids = merged_term_groups[term];\n      merged_ids.merge(group_ids);\n    }\n  }\n\n  // Format response according to Redis protocol:\n  // Array of term + array of group ids pairs\n  rb->StartArray(merged_term_groups.size() * 2);\n  for (const auto& [term, group_ids] : merged_term_groups) {\n    rb->SendBulkString(term);\n    rb->StartArray(group_ids.size());\n\n    // Sort group_ids before sending\n    std::vector<std::string> sorted_ids(group_ids.begin(), group_ids.end());\n    std::sort(sorted_ids.begin(), sorted_ids.end());\n\n    for (const auto& id : sorted_ids) {\n      rb->SendBulkString(id);\n    }\n  }\n}\n\nvoid FtConfigHelp(CmdArgParser* parser, CommandContext* cmd_cntx) {\n  string_view param = parser->Next();\n\n  vector<string> names = config_registry.List(param);\n  vector<absl::CommandLineFlag*> res;\n\n  for (const auto& name : names) {\n    auto* flag = config_registry.GetFlag(name);\n    DCHECK(flag);\n    if (flag && flag->Filename().find(kCurrentFile) != std::string::npos) {\n      res.push_back(flag);\n    }\n  }\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->StartArray(res.size());\n  for (const auto& flag : res) {\n    rb->StartArray(5);\n    rb->SendBulkString(flag->Name());\n    rb->SendBulkString(\"Description\"sv);\n    rb->SendBulkString(flag->Help());\n    rb->SendBulkString(\"Value\"sv);\n    rb->SendBulkString(flag->CurrentValue());\n  }\n}\n\nvoid FtConfigGet(CmdArgParser* parser, CommandContext* cmd_cntx) {\n  string_view param = parser->Next();\n  vector<string> names = config_registry.List(param);\n\n  vector<string> res;\n\n  for (const auto& name : names) {\n    auto* flag = config_registry.GetFlag(name);\n    DCHECK(flag);\n    if (flag && flag->Filename().find(kCurrentFile) != std::string::npos) {\n      // Convert internal name (search_query_string_bytes) back to user-facing format\n      // (search.query-string-bytes)\n      string display_name = DenormalizeConfigName(name);\n      res.push_back(display_name);\n      res.push_back(flag->CurrentValue());\n    }\n  }\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  return rb->SendBulkStrArr(res, CollectionType::MAP);\n}\n\nvoid FtConfigSet(CmdArgParser* parser, CommandContext* cmd_cntx) {\n  auto [param, value] = parser->Next<string_view, string_view>();\n\n  if (!parser->Finalize()) {\n    cmd_cntx->SendError(parser->TakeError().MakeReply());\n    return;\n  }\n\n  vector<string> names = config_registry.List(param);\n  if (names.size() != 1 ||\n      config_registry.GetFlag(names[0])->Filename().find(kCurrentFile) == std::string::npos) {\n    return cmd_cntx->SendError(\"Invalid option name\");\n  }\n\n  ConfigRegistry::SetResult result = config_registry.Set(param, value);\n\n  const char kErrPrefix[] = \"FT.CONFIG SET failed (possibly related to argument '\";\n  switch (result) {\n    case ConfigRegistry::SetResult::OK:\n      return cmd_cntx->SendOk();\n    case ConfigRegistry::SetResult::UNKNOWN:\n      return cmd_cntx->SendError(\n          absl::StrCat(\"Unknown option or number of arguments for CONFIG SET - '\", param, \"'\"),\n          kConfigErrType);\n\n    case ConfigRegistry::SetResult::READONLY:\n      return cmd_cntx->SendError(absl::StrCat(kErrPrefix, param, \"') - can't set immutable config\"),\n                                 kConfigErrType);\n\n    case ConfigRegistry::SetResult::INVALID:\n      return cmd_cntx->SendError(absl::StrCat(kErrPrefix, param, \"') - argument can not be set\"),\n                                 kConfigErrType);\n  }\n  ABSL_UNREACHABLE();\n}\n\nvoid CmdFtConfig(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  auto func = parser.MapNext(\"GET\", &FtConfigGet, \"SET\", &FtConfigSet, \"HELP\", &FtConfigHelp);\n\n  if (auto err = parser.TakeError(); err) {\n    cmd_cntx->SendError(\"Unknown subcommand\");\n    return;\n  }\n  func(&parser, cmd_cntx);\n}\n\nvoid CmdFtSynUpdate(CmdArgList args, CommandContext* cmd_cntx) {\n  facade::CmdArgParser parser{args};\n  auto [index_name, group_id] = parser.Next<string_view, string>();\n\n  // Redis ignores this parameter. Checked on redis_version:6.2.13\n  [[maybe_unused]] bool skip_initial_scan = parser.Check(\"SKIPINITIALSCAN\");\n\n  // Collect terms\n  std::vector<std::string_view> terms;\n  while (parser.HasNext()) {\n    terms.emplace_back(parser.Next());\n  }\n\n  if (terms.empty()) {\n    return cmd_cntx->SendError(\"No terms specified\");\n  }\n\n  if (!parser.Finalize()) {\n    return cmd_cntx->SendError(parser.TakeError().MakeReply());\n  }\n\n  std::atomic_bool index_not_found{true};\n\n  // Update synonym groups in all shards\n  cmd_cntx->tx()->Execute(\n      [&](Transaction* t, EngineShard* es) {\n        auto* index = es->search_indices()->GetIndex(index_name);\n        if (!index)\n          return OpStatus::OK;\n\n        index_not_found.store(false, std::memory_order_relaxed);\n\n        // Rebuild indices only for documents containing terms from the updated group\n        index->RebuildForGroup(\n            OpArgs{es, nullptr,\n                   DbContext{&namespaces->GetDefaultNamespace(), 0, GetCurrentTimeMs()}},\n            group_id, terms);\n\n        return OpStatus::OK;\n      },\n      true);\n\n  if (index_not_found.load(std::memory_order_relaxed))\n    return cmd_cntx->SendError(string{index_name} + \": no such index\");\n\n  cmd_cntx->rb()->SendOk();\n}\n\nvoid CmdFtDebug(CmdArgList args, CommandContext* cmd_cntx) {\n  // FT._DEBUG command stub for test compatibility\n  // This command is used by integration tests to control internal behavior\n  CmdArgParser parser{args};\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (args.empty() || parser.Check(\"HELP\")) {\n    rb->SendSimpleString(\"FT._DEBUG - Debug command stub (not fully implemented)\");\n    return;\n  }\n\n  // Handle CONTROLLED_VARIABLE subcommand used by tests\n  if (parser.Check(\"CONTROLLED_VARIABLE\")) {\n    if (parser.Check(\"SET\")) {\n      // Consume variable name and value - these are required by the command\n      parser.Next();  // variable name\n      parser.Next();  // variable value\n\n      RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n      // Just acknowledge the command\n      rb->SendOk();\n      return;\n    }\n  }\n\n  // For any other subcommand, just return OK\n  rb->SendOk();\n}\n\n#define HFUNC(x) SetHandler(&Cmd##x)\n\n// Redis search is a module. Therefore we introduce dragonfly extension search\n// to set as the default for the search family of commands. More sensible defaults,\n// should also be considered in the future\n\nvoid SearchFamily::Register(CommandRegistry* registry) {\n  using CI = CommandId;\n\n  // Disable journaling, because no-key-transactional enables it by default\n  const uint32_t kReadOnlyMask =\n      CO::NO_KEY_TRANSACTIONAL | CO::NO_KEY_TX_SPAN_ALL | CO::NO_AUTOJOURNAL | CO::IDEMPOTENT;\n\n  registry->StartFamily();\n  *registry\n      << CI{\"FT.CREATE\", CO::JOURNALED | CO::GLOBAL_TRANS, -2, 0, 0, acl::FT_SEARCH}.HFUNC(FtCreate)\n      << CI{\"FT.ALTER\", CO::JOURNALED | CO::GLOBAL_TRANS, -3, 0, 0, acl::FT_SEARCH}.HFUNC(FtAlter)\n      << CI{\"FT.DROPINDEX\", CO::JOURNALED | CO::GLOBAL_TRANS, -2, 0, 0, acl::FT_SEARCH}.HFUNC(\n             FtDropIndex)\n      << CI{\"FT.INFO\", CO::NO_KEY_TRANSACTIONAL | CO::NO_KEY_TX_SPAN_ALL | CO::NO_AUTOJOURNAL,\n            -2,        0,\n            0,         acl::FT_SEARCH}\n             .HFUNC(FtInfo)\n      << CI{\"FT.CONFIG\", CO::ADMIN | CO::LOADING | CO::DANGEROUS, -3, 0, 0, acl::FT_SEARCH}.HFUNC(\n             FtConfig)\n      // Underscore same as in RediSearch because it's \"temporary\" (long time already)\n      << CI{\"FT._LIST\", kReadOnlyMask, 1, 0, 0, acl::FT_SEARCH}.HFUNC(FtList)\n      << CI{\"FT.SEARCH\", kReadOnlyMask, -3, 0, 0, acl::FT_SEARCH}.HFUNC(FtSearch)\n      << CI{\"FT.AGGREGATE\", kReadOnlyMask, -3, 0, 0, acl::FT_SEARCH}.HFUNC(FtAggregate)\n      << CI{\"FT.PROFILE\", kReadOnlyMask, -4, 0, 0, acl::FT_SEARCH}.HFUNC(FtProfile)\n      << CI{\"FT.TAGVALS\", kReadOnlyMask, 3, 0, 0, acl::FT_SEARCH}.HFUNC(FtTagVals)\n      << CI{\"FT.SYNDUMP\", kReadOnlyMask, 2, 0, 0, acl::FT_SEARCH}.HFUNC(FtSynDump)\n      << CI{\"FT.SYNUPDATE\", CO::JOURNALED | CO::GLOBAL_TRANS, -4, 0, 0, acl::FT_SEARCH}.HFUNC(\n             FtSynUpdate)\n      << CI{\"FT._DEBUG\", kReadOnlyMask, -1, 0, 0, acl::FT_SEARCH}.HFUNC(FtDebug);\n}\n\nvoid SearchFamily::Shutdown() {\n  shard_set->RunBlockingInParallel([](EngineShard* es) { es->search_indices()->DropAllIndices(); });\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/search/search_family.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\nnamespace dfly {\nclass CommandRegistry;\n\nclass SearchFamily {\n public:\n  static void Register(CommandRegistry* registry);\n  static void Shutdown();\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/search/search_family_test.cc",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/search/search_family.h\"\n\n#include <absl/flags/flag.h>\n#include <absl/strings/str_format.h>\n\n#include <algorithm>\n#include <string_view>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"core/detail/gen_utils.h\"\n#include \"facade/error.h\"\n#include \"facade/facade_test.h\"\n#include \"facade/resp_parser.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\nusing namespace facade;\n\nABSL_DECLARE_FLAG(bool, search_reject_legacy_field);\nABSL_DECLARE_FLAG(size_t, search_query_string_bytes);\n\nnamespace {\n\n// Verify and extract score field from vector search result\nauto vector_score = [](std::string_view score_name, const RespExpr::Vec& score_field) -> float {\n  EXPECT_THAT(score_field.size(), 2);\n  EXPECT_THAT(score_field[0].GetString(), score_name);\n  float score;\n  bool success = absl::SimpleAtof(score_field[1].GetView(), &score);\n  EXPECT_TRUE(success);\n  return score;\n};\n\n// Helper to convert float array to binary format\nauto Vec3ToBytes = [](float x, float y, float z) -> string {\n  string result;\n  result.append(reinterpret_cast<const char*>(&x), sizeof(float));\n  result.append(reinterpret_cast<const char*>(&y), sizeof(float));\n  result.append(reinterpret_cast<const char*>(&z), sizeof(float));\n  return result;\n};\n\n}  // namespace\n\nnamespace dfly {\n\nclass SearchFamilyTest : public BaseFamilyTest {\n protected:\n};\n\nconst auto kNoResults = IntArg(0);  // tests auto destruct single element arrays\n\n/* Asserts that response is array of two arrays. Used to test FT.PROFILE response */\n::testing::AssertionResult AssertArrayOfTwoArrays(const RespExpr& resp) {\n  if (resp.GetVec().size() != 2) {\n    return ::testing::AssertionFailure()\n           << \"Expected response array length to be 2, but was \" << resp.GetVec().size();\n  }\n\n  const auto& vec = resp.GetVec();\n  if (vec[0].type != RespExpr::ARRAY) {\n    return ::testing::AssertionFailure()\n           << \"Expected resp[0] to be an array, but was \" << vec[0].type;\n  }\n  if (vec[1].type != RespExpr::ARRAY) {\n    return ::testing::AssertionFailure()\n           << \"Expected resp[1] to be an array, but was \" << vec[1].type;\n  }\n  return ::testing::AssertionSuccess();\n}\n\n#define ASSERT_ARRAY_OF_TWO_ARRAYS(resp) ASSERT_PRED1(AssertArrayOfTwoArrays, resp)\n\nMATCHER_P2(DocIds, total, arg_ids, \"\") {\n  if (arg_ids.empty()) {\n    if (auto res = arg.GetInt(); !res || *res != 0) {\n      *result_listener << \"Expected single zero\";\n      return false;\n    }\n    return true;\n  }\n\n  if (arg.type != RespExpr::ARRAY) {\n    *result_listener << \"Wrong response type: \" << int(arg.type);\n    return false;\n  }\n\n  auto results = arg.GetVec();\n  if (results.size() != arg_ids.size() * 2 + 1) {\n    *result_listener << \"Wrong resp vec size: \" << results.size();\n    return false;\n  }\n\n  if (auto num_results = results[0].GetInt(); !num_results || size_t(*num_results) != total) {\n    *result_listener << \"Bad total count in reply: \" << num_results.value_or(-1);\n    return false;\n  }\n\n  vector<string> received_ids;\n  for (size_t i = 1; i < results.size(); i += 2)\n    received_ids.push_back(results[i].GetString());\n\n  vector<string> expected_ids = arg_ids;\n  sort(received_ids.begin(), received_ids.end());\n  sort(expected_ids.begin(), expected_ids.end());\n\n  return expected_ids == received_ids;\n}\n\ntemplate <typename... Args> auto AreDocIds(Args... args) {\n  return DocIds(sizeof...(args), vector<string>{args...});\n}\n\ntemplate <typename... Args> auto IsArray(Args... args) {\n  return RespArray(ElementsAre(std::forward<Args>(args)...));\n}\n\ntemplate <typename... Args> auto IsUnordArray(Args... args) {\n  return RespArray(UnorderedElementsAre(std::forward<Args>(args)...));\n}\ntemplate <typename Expected, size_t... Is>\nvoid BuildKvMatchers(std::vector<Matcher<std::pair<std::string, RespExpr>>>& kv_matchers,\n                     const Expected& expected, std::index_sequence<Is...>) {\n  (kv_matchers.emplace_back(Pair(std::get<Is * 2>(expected), std::get<Is * 2 + 1>(expected))), ...);\n}\n\nMATCHER_P(IsMapMatcher, expected, \"\") {\n  if (arg.type != RespExpr::ARRAY) {\n    *result_listener << \"Wrong response type: \" << arg.type;\n    return false;\n  }\n\n  constexpr size_t expected_size = std::tuple_size<decltype(expected)>::value;\n  constexpr size_t exprected_pairs_number = expected_size / 2;\n\n  auto result = arg.GetVec();\n  if (result.size() != expected_size) {\n    *result_listener << \"Wrong resp array size: \" << result.size();\n    return false;\n  }\n\n  std::vector<std::pair<std::string, RespExpr>> received_pairs;\n  for (size_t i = 0; i < result.size(); i += 2) {\n    received_pairs.emplace_back(result[i].GetString(), result[i + 1]);\n  }\n\n  std::vector<Matcher<std::pair<std::string, RespExpr>>> kv_matchers;\n  BuildKvMatchers(kv_matchers, expected, std::make_index_sequence<exprected_pairs_number>{});\n\n  return ExplainMatchResult(UnorderedElementsAreArray(kv_matchers), received_pairs,\n                            result_listener);\n}\n\ntemplate <typename... Args> auto IsMap(Args... args) {\n  return IsMapMatcher(std::make_tuple(args...));\n}\n\nMATCHER_P(IsMapWithSizeMatcher, expected, \"\") {\n  if (arg.type != RespExpr::ARRAY) {\n    *result_listener << \"Wrong response type: \" << arg.type;\n    return false;\n  }\n  constexpr size_t expected_size = std::tuple_size<decltype(expected)>::value;\n  constexpr size_t exprected_pairs_number = expected_size / 2;\n\n  auto result = arg.GetVec();\n  if (result.size() != expected_size + 1 || result.size() % 2 != 1) {\n    *result_listener << \"Wrong resp array size: \" << result.size();\n    return false;\n  }\n\n  if (result[0].GetInt() != exprected_pairs_number) {\n    *result_listener << \"Wrong pairs count: \" << result[0].GetInt().value_or(-1);\n    return false;\n  }\n\n  std::vector<std::pair<std::string, RespExpr>> received_pairs;\n  for (size_t i = 1; i < result.size(); i += 2) {\n    received_pairs.emplace_back(result[i].GetString(), result[i + 1]);\n  }\n\n  std::vector<Matcher<std::pair<std::string, RespExpr>>> kv_matchers;\n  BuildKvMatchers(kv_matchers, expected, std::make_index_sequence<exprected_pairs_number>{});\n\n  return ExplainMatchResult(UnorderedElementsAreArray(kv_matchers), received_pairs,\n                            result_listener);\n}\n\ntemplate <typename... Args> auto IsMapWithSize(Args... args) {\n  return IsMapWithSizeMatcher(std::make_tuple(args...));\n}\n\nMATCHER_P(IsUnordArrayWithSizeMatcher, expected, \"\") {\n  if (arg.type != RespExpr::ARRAY) {\n    *result_listener << \"Wrong response type: \" << arg.type;\n    return false;\n  }\n\n  auto result = arg.GetVec();\n  size_t expected_size = std::tuple_size<decltype(expected)>::value;\n  if (result.size() != expected_size + 1) {\n    *result_listener << \"Wrong resp array size: \" << result.size();\n    return false;\n  }\n\n  if (result[0].GetInt() != expected_size) {\n    *result_listener << \"Wrong elements count: \" << result[0].GetInt().value_or(-1);\n    return false;\n  }\n\n  std::vector<RespExpr> received_elements(result.begin() + 1, result.end());\n\n  // Create a vector of matchers from the tuple\n  std::vector<Matcher<RespExpr>> matchers;\n  std::apply([&matchers](auto&&... args) { ((matchers.push_back(args)), ...); }, expected);\n\n  return ExplainMatchResult(UnorderedElementsAreArray(matchers), received_elements,\n                            result_listener);\n}\n\ntemplate <typename... Matchers> auto IsUnordArrayWithSize(Matchers... matchers) {\n  return IsUnordArrayWithSizeMatcher(std::make_tuple(matchers...));\n}\n\nTEST_F(SearchFamilyTest, CreateDropListIndex) {\n  EXPECT_EQ(Run({\"ft.create\", \"idx-1\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"prefix-1\"}), \"OK\");\n  EXPECT_EQ(Run({\"ft.create\", \"idx-2\", \"ON\", \"JSON\", \"PREFIX\", \"1\", \"prefix-2\"}), \"OK\");\n  EXPECT_EQ(Run({\"ft.create\", \"idx-3\", \"ON\", \"JSON\", \"PREFIX\", \"1\", \"prefix-3\"}), \"OK\");\n\n  EXPECT_THAT(Run({\"ft._list\"}).GetVec(), testing::UnorderedElementsAre(\"idx-1\", \"idx-2\", \"idx-3\"));\n\n  EXPECT_EQ(Run({\"ft.dropindex\", \"idx-2\"}), \"OK\");\n  EXPECT_THAT(Run({\"ft._list\"}).GetVec(), testing::UnorderedElementsAre(\"idx-1\", \"idx-3\"));\n\n  EXPECT_THAT(Run({\"ft.create\", \"idx-1\"}), ErrArg(\"Index already exists\"));\n\n  EXPECT_THAT(Run({\"ft.dropindex\", \"idx-100\"}), ErrArg(\"Index with name 'idx-100' not found\"));\n\n  EXPECT_EQ(Run({\"ft.dropindex\", \"idx-1\"}), \"OK\");\n  EXPECT_EQ(Run({\"ft._list\"}), \"idx-3\");\n}\n\nTEST_F(SearchFamilyTest, CreateDropDifferentDatabases) {\n  // Create index on db 0\n  auto resp =\n      Run({\"ft.create\", \"idx-1\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc-\", \"SCHEMA\", \"name\", \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Add some data on database 0 (only db 0 is indexed)\n  Run({\"hset\", \"doc-0\", \"name\", \"Name of 0\"});\n\n  // Verify search works on db 0\n  resp = Run({\"ft.search\", \"idx-1\", \"*\"});\n  EXPECT_THAT(resp, IsMapWithSize(\"doc-0\", IsMap(\"name\", \"Name of 0\")));\n\n  EXPECT_EQ(Run({\"select\", \"1\"}), \"OK\");  // change database\n\n  // Creating an index on non zero database must fail\n  resp = Run({\"ft.create\", \"idx-2\", \"ON\", \"JSON\", \"PREFIX\", \"1\", \"prefix-2\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Cannot create index on db != 0\"));\n\n  // Search from db 1 should return 0 results (only db 0 is indexed)\n  resp = Run({\"ft.search\", \"idx-1\", \"*\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // ft.dropindex must work from another database\n  EXPECT_EQ(Run({\"ft.dropindex\", \"idx-1\"}), \"OK\");\n  EXPECT_THAT(Run({\"ft.info\", \"idx-1\"}), ErrArg(\"Index with name 'idx-1' not found\"));\n}\n\nTEST_F(SearchFamilyTest, AlterIndex) {\n  Run({\"hset\", \"d:1\", \"color\", \"blue\", \"cost\", \"150\"});\n  Run({\"hset\", \"d:2\", \"color\", \"green\", \"cost\", \"200\"});\n\n  Run({\"ft.create\", \"idx-1\", \"ON\", \"HASH\"});\n\n  EXPECT_EQ(Run({\"ft.alter\", \"idx-1\", \"schema\", \"add\", \"color\", \"tag\"}), \"OK\");\n  EXPECT_THAT(Run({\"ft.search\", \"idx-1\", \"@color:{blue}\"}), AreDocIds(\"d:1\"));\n  EXPECT_THAT(Run({\"ft.search\", \"idx-1\", \"@color:{green}\"}), AreDocIds(\"d:2\"));\n\n  EXPECT_EQ(Run({\"ft.alter\", \"idx-1\", \"schema\", \"add\", \"cost\", \"numeric\"}), \"OK\");\n  EXPECT_THAT(Run({\"ft.search\", \"idx-1\", \"@cost:[0 100]\"}), kNoResults);\n  EXPECT_THAT(Run({\"ft.search\", \"idx-1\", \"@cost:[100 300]\"}), AreDocIds(\"d:1\", \"d:2\"));\n\n  EXPECT_THAT(Run({\"ft.alter\", \"idx-2\", \"schema\", \"add\", \"price\", \"numeric\"}),\n              ErrArg(\"Index not found\"));\n}\n\nTEST_F(SearchFamilyTest, SuffixPrefixSearch) {\n  Run({\"ft.create\", \"idx\", \"SCHEMA\", \"name\", \"TEXT\"});\n  Run({\"hset\", \"d:1\", \"name\", \"apple\"});\n  Run({\"hset\", \"d:2\", \"name\", \"carrot\"});\n\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx\", \"app*\"}), AreDocIds(\"d:1\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx\", \"@name:app*\"}), AreDocIds(\"d:1\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx\", \"*le\"}), AreDocIds(\"d:1\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx\", \"@name:*le\"}), AreDocIds(\"d:1\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx\", \"*pl*\"}), AreDocIds(\"d:1\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx\", \"@name:*pl*\"}), AreDocIds(\"d:1\"));\n}\n\nTEST_F(SearchFamilyTest, InfoIndex) {\n  EXPECT_EQ(\n      Run({\"ft.create\", \"idx-1\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc-\", \"SCHEMA\", \"name\", \"TEXT\"}),\n      \"OK\");\n\n  for (size_t i = 0; i < 15; i++) {\n    Run({\"hset\", absl::StrCat(\"doc-\", i), \"name\", absl::StrCat(\"Name of\", i)});\n  }\n\n  auto info = Run({\"ft.info\", \"idx-1\"});\n\n  auto descriptor_matcher =\n      IsArray(\"key_type\", \"HASH\", \"prefixes\", IsArray(\"doc-\"), \"default_score\", 1);\n  auto schema_matcher = IsArray(IsArray(\"identifier\", \"name\", \"attribute\", \"name\", \"type\", \"TEXT\"));\n\n  EXPECT_THAT(info, IsArray(_, _, _, descriptor_matcher, \"index_options\", RespArray(IsEmpty()),\n                            \"attributes\", schema_matcher, \"num_docs\", IntArg(15), \"indexing\",\n                            IntArg(0), \"percent_indexed\", \"1\"));\n}\n\nTEST_F(SearchFamilyTest, Stats) {\n  EXPECT_EQ(\n      Run({\"ft.create\", \"idx-1\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc1-\", \"SCHEMA\", \"name\", \"TEXT\"}),\n      \"OK\");\n\n  EXPECT_EQ(\n      Run({\"ft.create\", \"idx-2\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc2-\", \"SCHEMA\", \"name\", \"TEXT\"}),\n      \"OK\");\n\n  for (size_t i = 0; i < 50; i++) {\n    Run({\"hset\", absl::StrCat(\"doc1-\", i), \"name\", absl::StrCat(\"Name of\", i)});\n    Run({\"hset\", absl::StrCat(\"doc2-\", i), \"name\", absl::StrCat(\"Name of\", i)});\n  }\n\n  auto metrics = GetMetrics();\n  EXPECT_EQ(metrics.search_stats.num_indices, 2);\n  EXPECT_EQ(metrics.search_stats.num_entries, 50 * 2);\n\n  size_t expected_usage = 2 * (50 + 3 /* number of distinct words*/) * (24 + 48 /* kv size */) +\n                          50 * 2 * 1 /* posting list entries */;\n  EXPECT_GE(metrics.search_stats.used_memory, expected_usage);\n  EXPECT_LE(metrics.search_stats.used_memory, 3 * expected_usage);\n}\n\n// Test how asynchronous indexing indexes documents and reports its progress\nTEST_F(SearchFamilyTest, Indexing) {\n  // Create documents\n#ifdef NDEBUG\n  constexpr size_t kNumDocs = 10'000;\n#else\n  constexpr size_t kNumDocs = 1'000;\n#endif\n\n  for (size_t i = 0; i < kNumDocs; i++) {\n    Run({\"hset\", absl::StrCat(\"doc-\", i), \"t\", absl::StrCat(\"some long text at \", i), \"v1\",\n         absl::StrCat(i / 10), \"v2\", absl::StrCat(i / 1000)});\n  }\n\n  string_view create_cmd[] = {\"ft.create\", \"i1\", \"schema\", \"v1\", \"numeric\", \"t\", \"text\"};\n\n  // Drop immediately to check cancel\n  {\n    Run(create_cmd);\n    for (size_t i = 0; i < 3; i++)\n      ThisFiber::Yield();\n    Run({\"ft.dropindex\", \"i1\"});\n  }\n\n  // Update with ft.alter to check restart\n  {\n    Run(create_cmd);\n    for (size_t i = 0; i < 5; i++)\n      ThisFiber::Yield();\n    Run({\"ft.alter\", \"i1\", \"schema\", \"add\", \"v2\", \"numeric\"});\n  }\n\n  // loop and wait for index construction\n  absl::Time deadline = absl::Now() + absl::Seconds(10);\n  size_t iterations = 0;\n  bool seen_full = false;\n  while (true) {\n    auto resp = Run({\"ft.info\", \"i1\"});\n    auto arr = resp.GetVec();\n\n    auto find_field = [&arr](string_view field) {\n      return ++std::find_if(arr.begin(), arr.end(), [field](const auto& i) { return i == field; });\n    };\n\n    auto num_docs = find_field(\"num_docs\");\n    auto indexing = find_field(\"indexing\");\n    auto percent_indexed = find_field(\"percent_indexed\");\n\n    if (indexing->GetInt() == 0) {\n      EXPECT_THAT(*num_docs, IntArg(kNumDocs));\n      EXPECT_EQ(*percent_indexed, \"1\");\n      break;\n    }\n\n    // Check basic invariants\n    EXPECT_FALSE(seen_full);\n    seen_full |= num_docs->GetInt() == kNumDocs;\n    EXPECT_THAT(*indexing, IntArg(1));\n    EXPECT_NE(*percent_indexed, \"1\");  // change once we have estimations\n\n    // Check search doesn't return any errors\n    resp = Run({\"ft.search\", \"i1\", \"@v1:[10 20]\"});\n    EXPECT_THAT(resp, Not(ErrArg(\"\")));\n\n    iterations++;\n    ASSERT_LE(absl::Now(), deadline);\n  }\n\n  EXPECT_GT(iterations, 0u);  // ensure we observed indexing-in-progress state at least once\n\n  auto resp = Run({\"ft.search\", \"i1\", \"@v1:[10 20]\", \"LIMIT\", \"0\", \"0\"});\n  EXPECT_THAT(resp, IntArg(110));\n\n  // check added with alter field v2 is fully indexed\n  resp = Run({\"ft.search\", \"i1\", \"@v2:[0 10000]\", \"LIMIT\", \"0\", \"0\"});\n  EXPECT_THAT(resp, IntArg(kNumDocs));\n}\n\nTEST_F(SearchFamilyTest, Simple) {\n  Run({\"hset\", \"d:1\", \"foo\", \"baz\", \"k\", \"v\"});\n  Run({\"hset\", \"d:2\", \"foo\", \"bar\", \"k\", \"v\"});\n  Run({\"hset\", \"d:3\", \"foo\", \"bad\", \"k\", \"v\"});\n\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"PREFIX\", \"1\", \"d:\", \"SCHEMA\", \"foo\", \"TEXT\", \"k\", \"TEXT\"}),\n            \"OK\");\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@foo:bar\"}), AreDocIds(\"d:2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@foo:bar | @foo:baz\"}), AreDocIds(\"d:1\", \"d:2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@foo:(bar|baz|bad)\"}), AreDocIds(\"d:1\", \"d:2\", \"d:3\"));\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@foo:none\"}), kNoResults);\n\n  EXPECT_THAT(Run({\"ft.search\", \"iNone\", \"@foo:bar\"}), ErrArg(\"iNone: no such index\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@@NOTAQUERY@@\"}), ErrArg(\"Query syntax error\"));\n\n  // w: prefix is not part of index\n  Run({\"hset\", \"w:2\", \"foo\", \"this\", \"k\", \"v\"});\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@foo:this\"}), kNoResults);\n}\n\nTEST_F(SearchFamilyTest, Errors) {\n  Run({\"ft.create\", \"i1\", \"PREFIX\", \"1\", \"d:\", \"SCHEMA\", \"foo\", \"TAG\", \"bar\", \"TEXT\"});\n\n  // Wrong field\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@whoami:lol\"}), ErrArg(\"Invalid field: whoami\"));\n\n  // Wrong field type\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@foo:lol\"}), ErrArg(\"Wrong access type for field: foo\"));\n\n  // ft.create index on json schema $.sometag AS sometag TAG SEPARATOR\n  EXPECT_THAT(Run({\"ft.create\", \"i2\", \"ON\", \"JSON\", \"SCHEMA\", \"$.sometag\", \"AS\", \"sometag\", \"TAG\",\n                   \"SEPARATOR\"}),\n              ErrArg(\"Tag separator must be a single character. Got ``\"));\n}\n\nTEST_F(SearchFamilyTest, NoPrefix) {\n  Run({\"hset\", \"d:1\", \"a\", \"one\", \"k\", \"v\"});\n  Run({\"hset\", \"d:2\", \"a\", \"two\", \"k\", \"v\"});\n  Run({\"hset\", \"d:3\", \"a\", \"three\", \"k\", \"v\"});\n\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"schema\", \"a\", \"text\", \"k\", \"text\"}), \"OK\");\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"one | three\"}), AreDocIds(\"d:1\", \"d:3\"));\n}\n\nTEST_F(SearchFamilyTest, Json) {\n  Run({\"json.set\", \"k1\", \".\", R\"({\"a\": \"small test\", \"b\": \"some details\"})\"});\n  Run({\"json.set\", \"k2\", \".\", R\"({\"a\": \"another test\", \"b\": \"more details\"})\"});\n  Run({\"json.set\", \"k3\", \".\", R\"({\"a\": \"last test\", \"b\": \"secret details\"})\"});\n\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"on\", \"json\", \"schema\", \"$.a\", \"as\", \"a\", \"text\", \"$.b\", \"as\",\n                 \"b\", \"text\"}),\n            \"OK\");\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"some|more\"}), AreDocIds(\"k1\", \"k2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"some|more|secret\"}), AreDocIds(\"k1\", \"k2\", \"k3\"));\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@a:last @b:details\"}), AreDocIds(\"k3\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@a:(another|small)\"}), AreDocIds(\"k1\", \"k2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@a:(another|small|secret)\"}), AreDocIds(\"k1\", \"k2\"));\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"none\"}), kNoResults);\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@a:small @b:secret\"}), kNoResults);\n}\n\nTEST_F(SearchFamilyTest, JsonAttributesPaths) {\n  Run({\"json.set\", \"k1\", \".\", R\"(   {\"nested\": {\"value\": \"no\"}} )\"});\n  Run({\"json.set\", \"k2\", \".\", R\"(   {\"nested\": {\"value\": \"yes\"}} )\"});\n  Run({\"json.set\", \"k3\", \".\", R\"(   {\"nested\": {\"value\": \"maybe\"}} )\"});\n\n  EXPECT_EQ(\n      Run({\"ft.create\", \"i1\", \"on\", \"json\", \"schema\", \"$.nested.value\", \"as\", \"value\", \"text\"}),\n      \"OK\");\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"yes\"}), AreDocIds(\"k2\"));\n}\n\nTEST_F(SearchFamilyTest, JsonIdentifierWithBrackets) {\n  Run({\"json.set\", \"k1\", \".\", R\"({\"name\":\"London\",\"population\":8.8,\"continent\":\"Europe\"})\"});\n  Run({\"json.set\", \"k2\", \".\", R\"({\"name\":\"Athens\",\"population\":3.1,\"continent\":\"Europe\"})\"});\n  Run({\"json.set\", \"k3\", \".\", R\"({\"name\":\"Tel-Aviv\",\"population\":1.3,\"continent\":\"Asia\"})\"});\n  Run({\"json.set\", \"k4\", \".\", R\"({\"name\":\"Hyderabad\",\"population\":9.8,\"continent\":\"Asia\"})\"});\n\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"on\", \"json\", \"schema\", \"$[\\\"name\\\"]\", \"as\", \"name\", \"tag\",\n                 \"$[\\\"population\\\"]\", \"as\", \"population\", \"numeric\", \"sortable\", \"$[\\\"continent\\\"]\",\n                 \"as\", \"continent\", \"tag\"}),\n            \"OK\");\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"(@continent:{Europe})\"}), AreDocIds(\"k1\", \"k2\"));\n}\n\nTEST_F(SearchFamilyTest, JsonArrayValues) {\n  string_view D1 = R\"(\n{\n  \"name\": \"Alex\",\n  \"plays\" : [\n    {\"game\": \"Pacman\", \"score\": 10},\n    {\"game\": \"Tetris\", \"score\": 15}\n  ],\n  \"areas\": [\"EU-west\", \"EU-central\"]\n}\n)\";\n  string_view D2 = R\"(\n{\n  \"name\": \"Bob\",\n  \"plays\" : [\n    {\"game\": \"Pacman\", \"score\": 15},\n    {\"game\": \"Mario\", \"score\": 7}\n  ],\n  \"areas\": [\"US-central\"]\n}\n)\";\n  string_view D3 = R\"(\n{\n  \"name\": \"Caren\",\n  \"plays\" : [\n    {\"game\": \"Mario\", \"score\": 9},\n    {\"game\": \"Doom\", \"score\": 20}\n  ],\n  \"areas\": [\"EU-central\", \"EU-east\"]\n}\n)\";\n\n  Run({\"json.set\", \"k1\", \".\", D1});\n  Run({\"json.set\", \"k2\", \".\", D2});\n  Run({\"json.set\", \"k3\", \".\", D3});\n\n  Run({\"ft.create\", \"i1\",\n       \"on\",        \"json\",\n       \"schema\",    \"$.name\",\n       \"as\",        \"name\",\n       \"text\",      \"$.plays[*].game\",\n       \"as\",        \"games\",\n       \"tag\",       \"$.plays[*].score\",\n       \"as\",        \"scores\",\n       \"numeric\",   \"$.areas[*]\",\n       \"as\",        \"areas\",\n       \"tag\"});\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\"}), AreDocIds(\"k1\", \"k2\", \"k3\"));\n\n  // Find players by games\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@games:{Tetris | Mario | Doom}\"}),\n              AreDocIds(\"k1\", \"k2\", \"k3\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@games:{Pacman}\"}), AreDocIds(\"k1\", \"k2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@games:{Mario}\"}), AreDocIds(\"k2\", \"k3\"));\n\n  // Find players by scores\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@scores:[15 15]\"}), AreDocIds(\"k1\", \"k2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@scores:[0 (10]\"}), AreDocIds(\"k2\", \"k3\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@scores:[(15 20]\"}), AreDocIds(\"k3\"));\n\n  // Find platers by areas\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@areas:{'EU-central'}\"}), AreDocIds(\"k1\", \"k3\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@areas:{'US-central'}\"}), AreDocIds(\"k2\"));\n\n  // Test complicated RETURN expression\n  auto res = Run(\n      {\"ft.search\", \"i1\", \"@name:bob\", \"return\", \"1\", \"max($.plays[*].score)\", \"as\", \"max-score\"});\n  EXPECT_THAT(res, IsMapWithSize(\"k2\", IsMap(\"max-score\", \"15\")));\n\n  // Test invalid json path expression omits that field\n  res = Run({\"ft.search\", \"i1\", \"@name:alex\", \"return\", \"1\", \"::??INVALID??::\", \"as\", \"retval\"});\n  EXPECT_THAT(res, IsMapWithSize(\"k1\", IsMap()));\n}\n\nTEST_F(SearchFamilyTest, Tags) {\n  Run({\"hset\", \"d:1\", \"color\", \"red, green\"});\n  Run({\"hset\", \"d:2\", \"color\", \"green, blue\"});\n  Run({\"hset\", \"d:3\", \"color\", \"blue, red\"});\n  Run({\"hset\", \"d:4\", \"color\", \"red\"});\n  Run({\"hset\", \"d:5\", \"color\", \"green\"});\n  Run({\"hset\", \"d:6\", \"color\", \"blue\"});\n\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"on\", \"hash\", \"schema\", \"color\", \"tag\", \"dummy\", \"numeric\"}),\n            \"OK\");\n  EXPECT_THAT(Run({\"ft.tagvals\", \"i2\", \"color\"}), ErrArg(\"Index with name 'i2' not found\"));\n  EXPECT_THAT(Run({\"ft.tagvals\", \"i1\", \"foo\"}), ErrArg(\"No such field\"));\n  EXPECT_THAT(Run({\"ft.tagvals\", \"i1\", \"dummy\"}), ErrArg(\"Not a tag field\"));\n  auto resp = Run({\"ft.tagvals\", \"i1\", \"color\"});\n  ASSERT_THAT(resp, IsUnordArray(\"red\", \"blue\", \"green\"));\n\n  // Tags don't participate in full text search\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"red\"}), kNoResults);\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{ red }\"}), AreDocIds(\"d:1\", \"d:3\", \"d:4\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{green}\"}), AreDocIds(\"d:1\", \"d:2\", \"d:5\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue}\"}), AreDocIds(\"d:2\", \"d:3\", \"d:6\"));\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{red | green}\"}),\n              AreDocIds(\"d:1\", \"d:2\", \"d:3\", \"d:4\", \"d:5\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue | green}\"}),\n              AreDocIds(\"d:1\", \"d:2\", \"d:3\", \"d:5\", \"d:6\"));\n\n  EXPECT_EQ(Run({\"ft.create\", \"i2\", \"on\", \"hash\", \"schema\", \"c1\", \"as\", \"c2\", \"tag\"}), \"OK\");\n\n  // TODO: there is a discrepancy here between redis stack and Dragonfly,\n  // we accept the original field when it has alias, while redis stack does not.\n  //\n  // EXPECT_THAT(Run({\"ft.tagvals\", \"i2\", \"c1\"}), ErrArg(\"No such field\"));\n  EXPECT_THAT(Run({\"ft.tagvals\", \"i2\", \"c2\"}), ArrLen(0));\n}\n\nTEST_F(SearchFamilyTest, TagOptions) {\n  Run({\"hset\", \"d:1\", \"color\", \"    red/   green // bLUe   \"});\n  Run({\"hset\", \"d:2\", \"color\", \"blue   /// GReeN   \"});\n  Run({\"hset\", \"d:3\", \"color\", \"grEEn // yellow   //\"});\n  Run({\"hset\", \"d:4\", \"color\", \"  /blue/green/  \"});\n\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"on\", \"hash\", \"schema\", \"color\", \"tag\", \"casesensitive\",\n                 \"separator\", \"/\"}),\n            \"OK\");\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{green}\"}), AreDocIds(\"d:1\", \"d:4\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{GReeN}\"}), AreDocIds(\"d:2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue}\"}), AreDocIds(\"d:2\", \"d:4\"));\n}\n\nTEST_F(SearchFamilyTest, SymbolsInTag) {\n  Run({\"FT.CREATE\", \"demo_idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"tags\", \"TAG\"});\n  Run({\"HSET\", \"doc:1\", \"name\", \"First Item\", \"tags\", \"@first\"});\n  Run({\"HSET\", \"doc:2\", \"name\", \"Second Item\", \"tags\", \"?second\"});\n  Run({\"HSET\", \"doc:3\", \"name\", \"Third Item\", \"tags\", \":third\"});\n  Run({\"HSET\", \"doc:4\", \"name\", \"Fourth Item\", \"tags\", \"\\\"fourth\"});\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"demo_idx\", R\"(@tags:{\\?second})\"}), AreDocIds(\"doc:2\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"demo_idx\", R\"(@tags:{\\@first})\"}), AreDocIds(\"doc:1\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"demo_idx\", R\"(@tags:{\\:third})\"}), AreDocIds(\"doc:3\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"demo_idx\", R\"(@tags:{\\\"fourth})\"}), AreDocIds(\"doc:4\"));\n}\n\nTEST_F(SearchFamilyTest, TagNumbers) {\n  Run({\"hset\", \"d:1\", \"number\", \"1\"});\n  Run({\"hset\", \"d:2\", \"number\", \"2\"});\n  Run({\"hset\", \"d:3\", \"number\", \"3\"});\n\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"on\", \"hash\", \"schema\", \"number\", \"tag\"}), \"OK\");\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@number:{1}\"}), AreDocIds(\"d:1\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@number:{1|2}\"}), AreDocIds(\"d:1\", \"d:2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@number:{1|2|3}\"}), AreDocIds(\"d:1\", \"d:2\", \"d:3\"));\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@number:{1.0|2|3.0}\"}), AreDocIds(\"d:2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@number:{1|2|3.0}\"}), AreDocIds(\"d:1\", \"d:2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@number:{1|hello|2}\"}), AreDocIds(\"d:1\", \"d:2\"));\n}\n\nTEST_F(SearchFamilyTest, TagEscapeCharacters) {\n  EXPECT_EQ(Run({\"ft.create\", \"item_idx\", \"ON\", \"JSON\", \"PREFIX\", \"1\", \"p\", \"SCHEMA\", \"$.name\",\n                 \"AS\", \"name\", \"TAG\"}),\n            \"OK\");\n  EXPECT_EQ(Run({\"json.set\", \"p:1\", \"$\", \"{\\\"name\\\":\\\"escape-error\\\"}\"}), \"OK\");\n\n  auto resp = Run({\"ft.search\", \"item_idx\", \"@name:{escape\\\\-err*}\"});\n  EXPECT_THAT(resp, AreDocIds(\"p:1\"));\n}\n\nTEST_F(SearchFamilyTest, Numbers) {\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"schema\", \"i\", \"numeric\", \"j\", \"numeric\"}), \"OK\");\n\n  for (unsigned i = 0; i <= 10; i++) {\n    for (unsigned j = 0; j <= 10; j++) {\n      auto key = absl::StrCat(\"i\", i, \"j\", j);\n      Run({\"hset\", key, \"i\", absl::StrCat(i), \"j\", absl::StrCat(j)});\n    }\n  }\n\n  // Test simple ranges:\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@i:[5 5] @j:[5 5]\"}), AreDocIds(\"i5j5\"));\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@i:[0 1] @j:[9 10]\"}),\n              AreDocIds(\"i0j9\", \"i0j10\", \"i1j9\", \"i1j10\"));\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@i:[7 8] @j:[2 3]\"}),\n              AreDocIds(\"i7j2\", \"i7j3\", \"i8j2\", \"i8j3\"));\n\n  // Test union of ranges:\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"(@i:[1 2] | @i:[6 6]) @j:[7 7]\"}),\n              AreDocIds(\"i1j7\", \"i2j7\", \"i6j7\"));\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"(@i:[1 5] | @i:[1 3] | @i:[3 5]) @j:[7 7]\"}),\n              AreDocIds(\"i1j7\", \"i2j7\", \"i3j7\", \"i4j7\", \"i5j7\"));\n\n  // Test intersection of ranges:\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"(@i:[9 9]) (@j:[5 7] @j:[6 8])\"}),\n              AreDocIds(\"i9j6\", \"i9j7\"));\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@i:[9 9] (@j:[4 6] @j:[1 5] @j:[5 10])\"}),\n              AreDocIds(\"i9j5\"));\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@i:[9 9] (@j:[4 6] @j:[1 5] @j:[5 10])\"}),\n              AreDocIds(\"i9j5\"));\n\n  // Test negation of ranges:\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@i:[9 9] -@j:[1 10]\"}), AreDocIds(\"i9j0\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"-@i:[0 9] -@j:[1 10]\"}), AreDocIds(\"i10j0\"));\n\n  // Test empty range\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@i:[9 1]\"}), AreDocIds());\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@j:[5 0]\"}), AreDocIds());\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@i:[7 1] @j:[6 2]\"}), AreDocIds());\n}\n\nTEST_F(SearchFamilyTest, TestLimit) {\n  Run({\"ft.create\", \"i1\", \"SCHEMA\", \"match\", \"text\"});\n\n  for (unsigned i = 0; i < 20; i++)\n    Run({\"hset\", to_string(i), \"match\", \"all\"});\n\n  // Default limit is 10\n  auto resp = Run({\"ft.search\", \"i1\", \"all\"});\n  EXPECT_THAT(resp, ArrLen(10 * 2 + 1));\n\n  resp = Run({\"ft.search\", \"i1\", \"all\", \"limit\", \"0\", \"0\"});\n  EXPECT_THAT(resp, IntArg(20));\n\n  resp = Run({\"ft.search\", \"i1\", \"all\", \"limit\", \"0\", \"5\"});\n  EXPECT_THAT(resp, ArrLen(5 * 2 + 1));\n\n  resp = Run({\"ft.search\", \"i1\", \"all\", \"limit\", \"17\", \"5\"});\n  EXPECT_THAT(resp, ArrLen(3 * 2 + 1));\n}\n\nstring_view FloatSV(const float* f) {\n  return {reinterpret_cast<const char*>(f), sizeof(float)};\n}\n\nauto MatchEntry = [](string key, auto... fields) { return IsMapWithSize(key, IsMap(fields...)); };\n\nTEST_F(SearchFamilyTest, ReturnOption) {\n  for (unsigned i = 0; i < 20; i++) {\n    const float score = i;\n    Run({\"hset\", \"k\"s + to_string(i), \"longA\", to_string(i), \"longB\", to_string(i + 1), \"longC\",\n         to_string(i + 2), \"secret\", to_string(i + 3), \"vector\", FloatSV(&score)});\n  }\n\n  Run({\"ft.create\", \"i1\",     \"SCHEMA\", \"longA\",   \"AS\",    \"justA\", \"TEXT\",\n       \"longB\",     \"AS\",     \"justB\",  \"NUMERIC\", \"longC\", \"AS\",    \"justC\",\n       \"NUMERIC\",   \"vector\", \"VECTOR\", \"FLAT\",    \"2\",     \"DIM\",   \"1\"});\n\n  // Check all fields are returned\n  auto resp = Run({\"ft.search\", \"i1\", \"@justA:0\"});\n  EXPECT_THAT(resp, MatchEntry(\"k0\", \"longA\", \"0\", \"longB\", \"1\", \"longC\", \"2\", \"secret\", \"3\",\n                               \"vector\", \"[0]\"));\n\n  // Check no fields are returned\n  resp = Run({\"ft.search\", \"i1\", \"@justA:0\", \"return\", \"0\"});\n  EXPECT_THAT(resp, IsArray(IntArg(1), \"k0\"));\n\n  resp = Run({\"ft.search\", \"i1\", \"@justA:0\", \"nocontent\"});\n  EXPECT_THAT(resp, IsArray(IntArg(1), \"k0\"));\n\n  // Check only one field is returned (and with original identifier)\n  resp = Run({\"ft.search\", \"i1\", \"@justA:0\", \"return\", \"1\", \"longA\"});\n  EXPECT_THAT(resp, MatchEntry(\"k0\", \"longA\", \"0\"));\n\n  // Check only one field is returned with right alias\n  resp = Run({\"ft.search\", \"i1\", \"@justA:0\", \"return\", \"1\", \"longB\", \"as\", \"madeupname\"});\n  EXPECT_THAT(resp, MatchEntry(\"k0\", \"madeupname\", \"1\"));\n\n  // Check two fields\n  resp = Run({\"ft.search\", \"i1\", \"@justA:0\", \"return\", \"2\", \"longB\", \"as\", \"madeupname\", \"longC\"});\n  EXPECT_THAT(resp, MatchEntry(\"k0\", \"madeupname\", \"1\", \"longC\", \"2\"));\n\n  // Check non-existing field\n  resp = Run({\"ft.search\", \"i1\", \"@justA:0\", \"return\", \"1\", \"nothere\"});\n  EXPECT_THAT(resp, MatchEntry(\"k0\"));\n\n  // Checl implcit __vector_score is provided\n  float score = 20;\n  resp = Run({\"ft.search\", \"i1\", \"@justA:0 => [KNN 20 @vector $vector]\", \"SORTBY\", \"__vector_score\",\n              \"DESC\", \"RETURN\", \"1\", \"longA\", \"PARAMS\", \"2\", \"vector\", FloatSV(&score)});\n  EXPECT_THAT(resp, MatchEntry(\"k0\", \"longA\", \"0\"));\n\n  // Check sort doesn't shadow knn return alias\n  score = 20;\n  resp = Run({\"ft.search\", \"i1\", \"@justA:0 => [KNN 20 @vector $vector AS vec_return]\", \"SORTBY\",\n              \"vec_return\", \"DESC\", \"RETURN\", \"1\", \"vec_return\", \"PARAMS\", \"2\", \"vector\",\n              FloatSV(&score)});\n  EXPECT_THAT(resp, MatchEntry(\"k0\", \"vec_return\", \"20\"));\n}\n\nTEST_F(SearchFamilyTest, ReturnOptionJson) {\n  const string_view j =\n      R\"({\"actions\":[\"fly\",\"sleep\"],\"name\":\"dragon\",\"not_indexed\":true,\"size\":3})\";\n  Run({\"json.set\", \"k1\", \".\", j});\n  Run({\"ft.create\", \"i1\", \"on\", \"json\", \"schema\", \"$.name\", \"as\", \"name\", \"text\", \"$.actions[0]\",\n       \"as\", \"primary_action\", \"tag\", \"$.size\", \"as\", \"size\", \"numeric\"});\n\n  // Return whole document as a single field by default\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\"}), MatchEntry(\"k1\", \"$\", j));\n\n  // RETURN 0\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\", \"return\", \"0\"}), IsArray(IntArg(1), \"k1\"));\n\n  // RETURN by full path\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\", \"return\", \"1\", \"$.name\"}),\n              MatchEntry(\"k1\", \"$.name\", \"dragon\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\", \"return\", \"1\", \"$.actions\"}),\n              MatchEntry(\"k1\", \"$.actions\", \"[\\\"fly\\\",\\\"sleep\\\"]\"));\n\n  // RETURN by full path with alias\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\", \"return\", \"1\", \"$.name\", \"as\", \"n\"}),\n              MatchEntry(\"k1\", \"n\", \"dragon\"));\n\n  // RETURN by schema alias\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\", \"return\", \"1\", \"name\"}),\n              MatchEntry(\"k1\", \"name\", \"dragon\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\", \"return\", \"1\", \"primary_action\"}),\n              MatchEntry(\"k1\", \"primary_action\", \"fly\"));\n\n  // RETURN by schema alias with new alias\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\", \"return\", \"1\", \"name\", \"as\", \"n\"}),\n              MatchEntry(\"k1\", \"n\", \"dragon\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\", \"return\", \"1\", \"primary_action\", \"as\", \"pa\"}),\n              MatchEntry(\"k1\", \"pa\", \"fly\"));\n\n  // Whole document with SORTBY includes sortable field as return field\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\", \"sortby\", \"size\"}),\n              MatchEntry(\"k1\", \"$\", j, \"size\", \"3\"));\n\n  // RETURN with SORTBY doesn't include sortable field\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\", \"sortby\", \"size\", \"return\", \"1\", \"name\"}),\n              MatchEntry(\"k1\", \"name\", \"dragon\"));\n}\n\nTEST_F(SearchFamilyTest, TestStopWords) {\n  Run({\"ft.create\", \"i1\", \"STOPWORDS\", \"3\", \"red\", \"green\", \"blue\", \"SCHEMA\", \"title\", \"TEXT\"});\n\n  Run({\"hset\", \"d:1\", \"title\", \"ReD? parrot flies away\"});\n  Run({\"hset\", \"d:2\", \"title\", \"GrEEn crocodile eats you\"});\n  Run({\"hset\", \"d:3\", \"title\", \"BLUe. Whale surfes the sea\"});\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"red\"}), kNoResults);\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"green\"}), kNoResults);\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"blue\"}), kNoResults);\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"parrot\"}), AreDocIds(\"d:1\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"crocodile\"}), AreDocIds(\"d:2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"whale\"}), AreDocIds(\"d:3\"));\n}\n\nTEST_F(SearchFamilyTest, SimpleUpdates) {\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"schema\", \"title\", \"text\", \"visits\", \"numeric\"}), \"OK\");\n\n  Run({\"hset\", \"d:1\", \"title\", \"Dragonfly article\", \"visits\", \"100\"});\n  Run({\"hset\", \"d:2\", \"title\", \"Butterfly observations\", \"visits\", \"50\"});\n  Run({\"hset\", \"d:3\", \"title\", \"Bumblebee studies\", \"visits\", \"30\"});\n\n  // Check values above were added to the index\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"article | observations | studies\"}),\n              AreDocIds(\"d:1\", \"d:2\", \"d:3\"));\n\n  // Update title - text value\n  {\n    Run({\"hset\", \"d:2\", \"title\", \"Butterfly studies\"});\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"observations\"}), kNoResults);\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"studies\"}), AreDocIds(\"d:2\", \"d:3\"));\n\n    Run({\"hset\", \"d:1\", \"title\", \"Upcoming Dragonfly presentation\"});\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"article\"}), kNoResults);\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"upcoming presentation\"}), AreDocIds(\"d:1\"));\n\n    Run({\"hset\", \"d:3\", \"title\", \"Secret bumblebee research\"});\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"studies\"}), AreDocIds(\"d:2\"));\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"secret research\"}), AreDocIds(\"d:3\"));\n  }\n\n  // Update visits - numeric value\n  {\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@visits:[50 1000]\"}), AreDocIds(\"d:1\", \"d:2\"));\n\n    Run({\"hset\", \"d:3\", \"visits\", \"75\"});\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@visits:[0 49]\"}), kNoResults);\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@visits:[50 1000]\"}), AreDocIds(\"d:1\", \"d:2\", \"d:3\"));\n\n    Run({\"hset\", \"d:1\", \"visits\", \"125\"});\n    Run({\"hset\", \"d:2\", \"visits\", \"150\"});\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@visits:[100 1000]\"}), AreDocIds(\"d:1\", \"d:2\"));\n\n    Run({\"hset\", \"d:3\", \"visits\", \"175\"});\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@visits:[0 100]\"}), kNoResults);\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@visits:[150 1000]\"}), AreDocIds(\"d:2\", \"d:3\"));\n  }\n\n  // Delete documents\n  {\n    Run({\"del\", \"d:2\", \"d:3\"});\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"dragonfly\"}), AreDocIds(\"d:1\"));\n    EXPECT_THAT(Run({\"ft.search\", \"i1\", \"butterfly | bumblebee\"}), kNoResults);\n  }\n}\n\nTEST_F(SearchFamilyTest, Unicode) {\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"schema\", \"title\", \"text\", \"visits\", \"numeric\"}), \"OK\");\n\n  // Explicitly using screaming uppercase to check utf-8 to lowercase functionality\n  Run({\"hset\", \"d:1\", \"title\", \"Веселая СТРЕКОЗА Иван\", \"visits\", \"400\"});\n  Run({\"hset\", \"d:2\", \"title\", \"Die fröhliche Libelle Günther\", \"visits\", \"300\"});\n  Run({\"hset\", \"d:3\", \"title\", \"השפירית המהירה יעקב\", \"visits\", \"200\"});\n  Run({\"hset\", \"d:4\", \"title\", \"πανίσχυρη ΛΙΒΕΛΛΟΎΛΗ Δίας\", \"visits\", \"100\"});\n\n  // Check we find our dragonfly in all languages\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"стРекоЗа|liBellE|השפירית|λΙβελλοΎλη\"}),\n              AreDocIds(\"d:1\", \"d:2\", \"d:3\", \"d:4\"));\n\n  // Check the result is valid\n  auto resp = Run({\"ft.search\", \"i1\", \"λιβελλούλη\"});\n  EXPECT_THAT(resp,\n              IsMapWithSize(\"d:4\", IsMap(\"visits\", \"100\", \"title\", \"πανίσχυρη ΛΙΒΕΛΛΟΎΛΗ Δίας\")));\n\n  // Repeat with tags\n  Run({\"ft.create\", \"i2\", \"schema\", \"color\", \"tag\", \"separator\", \"/\"});\n\n  Run({\"hset\", \"d:5\", \"color\", \"зеЛеный/żółtY\"});\n  Run({\"hset\", \"d:6\", \"color\", \"κόκκινος/Білий\"});\n\n  auto tagvals = Run({\"ft.tagvals\", \"i2\", \"color\"});\n  EXPECT_THAT(tagvals.GetVec(), UnorderedElementsAre(\"зеленый\", \"żółty\", \"κόκκινος\", \"білий\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i2\", \"@color:{зеленый|білий}\"}), AreDocIds(\"d:5\", \"d:6\"));\n}\n\nTEST_F(SearchFamilyTest, UnicodeWords) {\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"schema\", \"title\", \"text\"}), \"OK\");\n\n  Run({\"hset\", \"d:1\", \"title\",\n       \"WORD!!! Одно слово? Zwei Wörter. Comma before ,sentence, \"\n       \"Τρεις λέξεις: χελώνα-σκύλου-γάτας. !זה עובד\",\n       \"visits\", \"400\"});\n\n  // Make sure it includes ALL those words\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"word слово wörter sentence λέξεις γάτας עובד\"}),\n              AreDocIds(\"d:1\"));\n}\n\nTEST_F(SearchFamilyTest, PrefixSuffixInfixTrie) {\n  Run({\"ft.create\", \"i1\", \"schema\", \"title\", \"text\", \"withsuffixtrie\"});\n\n  Run({\"hset\", \"d:1\", \"title\", \"CaspIAn SeA\"});\n  Run({\"hset\", \"d:2\", \"title\", \"GreAt LakEs\"});\n  Run({\"hset\", \"d:3\", \"title\", \"Lake VictorIA\"});\n  Run({\"hset\", \"d:4\", \"title\", \"LaKE Como\"});\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*ea*\"}), AreDocIds(\"d:1\", \"d:2\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*ia*\"}), AreDocIds(\"d:1\", \"d:3\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"lake*\"}), AreDocIds(\"d:2\", \"d:3\", \"d:4\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*lake\"}), AreDocIds(\"d:3\", \"d:4\"));\n}\n\nstruct SortTest : SearchFamilyTest, public testing::WithParamInterface<bool /* sortable */> {};\n\nTEST_P(SortTest, BasicSort) {\n  auto AreRange = [](size_t total, size_t l, size_t r, string_view prefix) {\n    vector<string> out;\n    for (size_t i = min(l, r); i < max(l, r); i++)\n      out.push_back(absl::StrCat(prefix, i));\n    if (l > r)\n      reverse(out.begin(), out.end());\n    return DocIds(total, out);\n  };\n\n  vector<string_view> params{\"ft.create\", \"i1\", \"prefix\", \"1\", \"d:\", \"schema\", \"ord\", \"numeric\"};\n  if (GetParam())\n    params.emplace_back(\"sortable\");\n  Run(params);\n\n  size_t num_docs = 100;\n  for (size_t i = 0; i < num_docs; i++)\n    Run({\"hset\", absl::StrCat(\"d:\", i), \"ord\", absl::StrCat(i)});\n\n  // Check SORTBY in ASC and DESC mode with different LIMIT parameters\n  for (int take = 17; take < 35; take += 7) {\n    for (size_t i = 0; i < num_docs - take; i++)\n      EXPECT_THAT(\n          Run({\"ft.search\", \"i1\", \"*\", \"SORTBY\", \"ord\", \"LIMIT\", to_string(i), to_string(take)}),\n          AreRange(num_docs, i, i + take, \"d:\"));\n\n    for (size_t i = 0; i < num_docs - take; i++)\n      EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\", \"SORTBY\", \"ord\", \"DESC\", \"LIMIT\", to_string(i),\n                       to_string(take)}),\n                  AreRange(num_docs, num_docs - i, num_docs - i - take, \"d:\"));\n  }\n\n  params = {\"ft.create\", \"i2\", \"prefix\", \"1\", \"d2:\", \"schema\", \"name\", \"text\"};\n  if (GetParam())\n    params.emplace_back(\"sortable\");\n  Run(params);\n\n  absl::InsecureBitGen gen;\n  vector<string> random_strs;\n  for (size_t i = 0; i < 10; i++)\n    random_strs.emplace_back(GetRandomHex(gen, 7));\n  sort(random_strs.begin(), random_strs.end());\n\n  for (size_t i = 0; i < 10; i++)\n    Run({\"hset\", absl::StrCat(\"d2:\", i), \"name\", random_strs[i]});\n\n  for (size_t i = 0; i < 7; i++)\n    EXPECT_THAT(Run({\"ft.search\", \"i2\", \"*\", \"SORTBY\", \"name\", \"DESC\", \"LIMIT\", to_string(i), \"3\"}),\n                AreRange(10, 10 - i, 10 - i - 3, \"d2:\"));\n}\n\nINSTANTIATE_TEST_SUITE_P(Sortable, SortTest, testing::Values(true));\nINSTANTIATE_TEST_SUITE_P(NotSortable, SortTest, testing::Values(false));\n\nTEST_F(SearchFamilyTest, FtProfile) {\n  Run({\"ft.create\", \"i1\", \"schema\", \"name\", \"text\"});\n\n  auto resp = Run({\"ft.profile\", \"i1\", \"search\", \"query\", \"(a | b) c d\"});\n  ASSERT_ARRAY_OF_TWO_ARRAYS(resp);\n\n  const auto& top_level = resp.GetVec();\n  EXPECT_THAT(top_level[0], IsMapWithSize());\n\n  const auto& profile_result = top_level[1].GetVec();\n  EXPECT_EQ(profile_result.size(), shard_set->size() + 1);\n\n  EXPECT_THAT(profile_result[0].GetVec(), ElementsAre(\"took\", _, \"hits\", _, \"serialized\", _));\n\n  for (size_t sid = 0; sid < shard_set->size(); sid++) {\n    const auto& shard_resp = profile_result[sid + 1].GetVec();\n    EXPECT_THAT(shard_resp, ElementsAre(\"took\", _, \"tree\", _));\n\n    const auto& tree = shard_resp[3].GetVec();\n    EXPECT_EQ(tree[3].GetString() /* operation */, \"Logical{n=3,o=and}\"s);\n    EXPECT_GT(tree[1].GetInt() /* total time*/, tree[5].GetInt() /* self time */);\n    EXPECT_EQ(tree[7].GetInt() /* processed */, 0);\n  }\n\n  // Test LIMITED throws no errors\n  resp = Run({\"ft.profile\", \"i1\", \"search\", \"limited\", \"query\", \"(a | b) c d\"});\n  ASSERT_ARRAY_OF_TWO_ARRAYS(resp);\n}\n\nTEST_F(SearchFamilyTest, FtProfileInvalidQuery) {\n  Run({\"json.set\", \"j1\", \".\", R\"({\"id\":\"1\"})\"});\n  Run({\"ft.create\", \"i1\", \"on\", \"json\", \"schema\", \"$.id\", \"as\", \"id\", \"tag\"});\n\n  auto resp = Run({\"ft.profile\", \"i1\", \"search\", \"query\", \"@id:[1 1]\"});\n  ASSERT_ARRAY_OF_TWO_ARRAYS(resp);\n\n  EXPECT_THAT(resp.GetVec()[0], IsMapWithSize());\n\n  resp = Run({\"ft.profile\", \"i1\", \"search\", \"query\", \"@{invalid13289}\"});\n  EXPECT_THAT(resp, ErrArg(\"query syntax error\"));\n}\n\nTEST_F(SearchFamilyTest, FtProfileErrorReply) {\n  Run({\"ft.create\", \"i1\", \"schema\", \"name\", \"text\"});\n\n  auto resp = Run({\"ft.profile\", \"i1\", \"not_search\", \"query\", \"(a | b) c d\"});\n  EXPECT_THAT(resp, ErrArg(\"no `SEARCH` or `AGGREGATE` provided\"));\n\n  resp = Run({\"ft.profile\", \"i1\", \"search\", \"not_query\", \"(a | b) c d\"});\n  EXPECT_THAT(resp, ErrArg(kSyntaxErr));\n\n  resp = Run({\"ft.profile\", \"non_existent_key\", \"search\", \"query\", \"(a | b) c d\"});\n  EXPECT_THAT(resp, ErrArg(\"non_existent_key: no such index\"));\n}\n\nTEST_F(SearchFamilyTest, SimpleExpiry) {\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"schema\", \"title\", \"text\", \"expires-in\", \"numeric\"}), \"OK\");\n\n  Run({\"hset\", \"d:1\", \"title\", \"never to expire\", \"expires-in\", \"100500\"});\n\n  Run({\"hset\", \"d:2\", \"title\", \"first to expire\", \"expires-in\", \"50\"});\n  Run({\"pexpire\", \"d:2\", \"50\"});\n\n  Run({\"hset\", \"d:3\", \"title\", \"second to expire\", \"expires-in\", \"100\"});\n  Run({\"pexpire\", \"d:3\", \"100\"});\n\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\"}), AreDocIds(\"d:1\", \"d:2\", \"d:3\"));\n\n  AdvanceTime(60);\n  ThisFiber::SleepFor(5ms);  // Give heartbeat time to delete expired doc\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\"}), AreDocIds(\"d:1\", \"d:3\"));\n\n  AdvanceTime(60);\n  Run({\"HGETALL\", \"d:3\"});  // Trigger expiry by access\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"*\"}), AreDocIds(\"d:1\"));\n\n  Run({\"flushall\"});\n}\n\nTEST_F(SearchFamilyTest, DocsEditing) {\n  auto resp = Run({\"JSON.SET\", \"k1\", \".\", R\"({\"a\":\"1\"})\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"JSON\", \"SCHEMA\", \"$.a\", \"AS\", \"a\", \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.SEARCH\", \"index\", \"*\"});\n  EXPECT_THAT(resp, IsMapWithSize(\"k1\", IsMap(\"$\", R\"({\"a\":\"1\"})\")));\n\n  // Test dump and restore\n  resp = Run({\"DUMP\", \"k1\"});\n  auto dump = resp.GetBuf();\n\n  resp = Run({\"DEL\", \"k1\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"RESTORE\", \"k1\", \"0\", ToSV(dump)});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.SEARCH\", \"index\", \"*\"});\n  EXPECT_THAT(resp, IsMapWithSize(\"k1\", IsMap(\"$\", R\"({\"a\":\"1\"})\")));\n\n  // Test renaming a key\n  EXPECT_EQ(Run({\"RENAME\", \"k1\", \"new_k1\"}), \"OK\");\n\n  resp = Run({\"FT.SEARCH\", \"index\", \"*\"});\n  EXPECT_THAT(resp, IsMapWithSize(\"new_k1\", IsMap(\"$\", R\"({\"a\":\"1\"})\")));\n\n  EXPECT_EQ(Run({\"RENAME\", \"new_k1\", \"k1\"}), \"OK\");\n\n  resp = Run({\"FT.SEARCH\", \"index\", \"*\"});\n  EXPECT_THAT(resp, IsMapWithSize(\"k1\", IsMap(\"$\", R\"({\"a\":\"1\"})\")));\n}\n\nTEST_F(SearchFamilyTest, AggregateGroupBy) {\n  auto resp = Run(\n      {\"ft.create\", \"i1\", \"ON\", \"HASH\", \"SCHEMA\", \"word\", \"TAG\", \"foo\", \"NUMERIC\", \"text\", \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  Run({\"hset\", \"key:1\", \"word\", \"item1\", \"foo\", \"10\", \"text\", \"\\\"first key\\\"\", \"non_indexed_value\",\n       \"1\"});\n  Run({\"hset\", \"key:2\", \"word\", \"item2\", \"foo\", \"20\", \"text\", \"\\\"second key\\\"\", \"non_indexed_value\",\n       \"2\"});\n  Run({\"hset\", \"key:3\", \"word\", \"item1\", \"foo\", \"40\", \"text\", \"\\\"third key\\\"\", \"non_indexed_value\",\n       \"3\"});\n\n  resp = Run(\n      {\"ft.aggregate\", \"i1\", \"*\", \"GROUPBY\", \"1\", \"@word\", \"REDUCE\", \"COUNT\", \"0\", \"AS\", \"count\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"count\", \"2\", \"word\", \"item1\"),\n                                         IsMap(\"word\", \"item2\", \"count\", \"1\")));\n\n  resp = Run({\"ft.aggregate\", \"i1\", \"*\", \"GROUPBY\", \"1\", \"@word\", \"REDUCE\", \"SUM\", \"1\", \"@foo\",\n              \"AS\", \"foo_total\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"foo_total\", \"50\", \"word\", \"item1\"),\n                                         IsMap(\"foo_total\", \"20\", \"word\", \"item2\")));\n\n  resp = Run({\"ft.aggregate\", \"i1\", \"*\", \"GROUPBY\", \"1\", \"@word\", \"REDUCE\", \"AVG\", \"1\", \"@foo\",\n              \"AS\", \"foo_average\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"foo_average\", \"20\", \"word\", \"item2\"),\n                                         IsMap(\"foo_average\", \"25\", \"word\", \"item1\")));\n\n  resp = Run({\"ft.aggregate\", \"i1\", \"*\", \"GROUPBY\", \"2\", \"@word\", \"@text\", \"REDUCE\", \"SUM\", \"1\",\n              \"@foo\", \"AS\", \"foo_total\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(\n                        IsMap(\"foo_total\", \"10\", \"word\", \"item1\", \"text\", \"\\\"first key\\\"\"),\n                        IsMap(\"foo_total\", \"40\", \"word\", \"item1\", \"text\", \"\\\"third key\\\"\"),\n                        IsMap(\"foo_total\", \"20\", \"word\", \"item2\", \"text\", \"\\\"second key\\\"\")));\n\n  resp = Run({\"ft.aggregate\", \"i1\", \"*\", \"LOAD\", \"2\", \"foo\", \"word\", \"GROUPBY\", \"1\", \"@word\",\n              \"REDUCE\", \"SUM\", \"1\", \"@foo\", \"AS\", \"foo_total\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"foo_total\", \"20\", \"word\", \"item2\"),\n                                         IsMap(\"foo_total\", \"50\", \"word\", \"item1\")));\n\n  resp = Run({\"ft.aggregate\", \"i1\", \"*\", \"LOAD\", \"2\", \"foo\", \"text\", \"GROUPBY\", \"2\", \"@word\",\n              \"@text\", \"REDUCE\", \"SUM\", \"1\", \"@foo\", \"AS\", \"foo_total\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(\n                        IsMap(\"foo_total\", \"40\", \"word\", \"item1\", \"text\", \"\\\"third key\\\"\"),\n                        IsMap(\"foo_total\", \"20\", \"word\", \"item2\", \"text\", \"\\\"second key\\\"\"),\n                        IsMap(\"foo_total\", \"10\", \"word\", \"item1\", \"text\", \"\\\"first key\\\"\")));\n}\n\nTEST_F(SearchFamilyTest, JsonAggregateGroupBy) {\n  auto resp =\n      Run({\"FT.CREATE\", \"json_index\", \"ON\", \"JSON\", \"SCHEMA\", \"$.name\", \"AS\", \"name\", \"TEXT\",\n           \"$.price\", \"AS\", \"price\", \"NUMERIC\", \"$.quantity\", \"AS\", \"quantity\", \"NUMERIC\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  Run({\"JSON.SET\", \"product:1\", \"$\", R\"({\"name\": \"Product A\", \"price\": 10, \"quantity\": 2})\"});\n  Run({\"JSON.SET\", \"product:2\", \"$\", R\"({\"name\": \"Product B\", \"price\": 20, \"quantity\": 3})\"});\n  Run({\"JSON.SET\", \"product:3\", \"$\", R\"({\"name\": \"Product C\", \"price\": 30, \"quantity\": 5})\"});\n\n  resp = Run({\"FT.AGGREGATE\", \"json_index\", \"*\", \"GROUPBY\", \"0\", \"REDUCE\", \"SUM\", \"1\", \"price\",\n              \"AS\", \"total_price\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"total_price\", \"60\")));\n\n  resp = Run({\"FT.AGGREGATE\", \"json_index\", \"*\", \"GROUPBY\", \"0\", \"REDUCE\", \"AVG\", \"1\", \"price\",\n              \"AS\", \"avg_price\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"avg_price\", \"20\")));\n}\n\nTEST_F(SearchFamilyTest, JsonAggregateGroupByWithoutAtSign) {\n  auto resp =\n      Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"SCHEMA\", \"group\", \"TAG\", \"value\", \"NUMERIC\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  absl::FlagSaver fs;\n  Run({\"HSET\", \"h1\", \"group\", \"first\", \"value\", \"1\"});\n  Run({\"HSET\", \"h2\", \"group\", \"second\", \"value\", \"2\"});\n  Run({\"HSET\", \"h3\", \"group\", \"first\", \"value\", \"3\"});\n\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, false);\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"GROUPBY\", \"1\", \"group\", \"REDUCE\", \"COUNT\", \"0\", \"AS\",\n              \"count\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"count\", \"2\", \"group\", \"first\"),\n                                         IsMap(\"group\", \"second\", \"count\", \"1\")));\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, true);\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"GROUPBY\", \"1\", \"group\", \"REDUCE\", \"COUNT\", \"0\", \"AS\",\n              \"count\"});\n  EXPECT_THAT(resp, ErrArg(\"bad arguments: Field name should start with '@'\"));\n}\n\nTEST_F(SearchFamilyTest, AggregateGroupByReduceSort) {\n  Run({\"ft.create\", \"i1\", \"schema\", \"even\", \"tag\", \"sortable\", \"value\", \"numeric\", \"sortable\"});\n  for (size_t i = 0; i < 101; i++) {  // 51 even, 50 odd\n    Run({\"hset\", absl::StrCat(\"k\", i), \"even\", (i % 2 == 0) ? \"true\" : \"false\", \"value\",\n         absl::StrCat(i)});\n  }\n\n  absl::FlagSaver fs;\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, false);\n  // clang-format off\n  auto resp = Run({\"ft.aggregate\", \"i1\", \"*\",\n                  \"GROUPBY\", \"1\", \"@even\",\n                      \"REDUCE\", \"count\", \"0\", \"as\", \"count\",\n                      \"REDUCE\", \"count_distinct\", \"1\", \"even\", \"as\", \"distinct_tags\",\n                      \"REDUCE\", \"count_distinct\", \"1\", \"value\", \"as\", \"distinct_vals\",\n                      \"REDUCE\", \"max\", \"1\", \"value\", \"as\", \"max_val\",\n                      \"REDUCE\", \"min\", \"1\", \"value\", \"as\", \"min_val\",\n                  \"SORTBY\", \"1\", \"count\"});\n  // clang-format on\n\n  EXPECT_THAT(resp,\n              IsUnordArrayWithSize(IsMap(\"even\", \"false\", \"count\", \"50\", \"distinct_tags\", \"1\",\n                                         \"distinct_vals\", \"50\", \"max_val\", \"99\", \"min_val\", \"1\"),\n                                   IsMap(\"even\", \"true\", \"count\", \"51\", \"distinct_tags\", \"1\",\n                                         \"distinct_vals\", \"51\", \"max_val\", \"100\", \"min_val\", \"0\")));\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, true);\n  // clang-format off\n  resp = Run({\"ft.aggregate\", \"i1\", \"*\",\n                  \"GROUPBY\", \"1\", \"@even\",\n                      \"REDUCE\", \"count\", \"0\", \"as\", \"count\",\n                      \"REDUCE\", \"count_distinct\", \"1\", \"even\", \"as\", \"distinct_tags\",\n                      \"REDUCE\", \"count_distinct\", \"1\", \"value\", \"as\", \"distinct_vals\",\n                      \"REDUCE\", \"max\", \"1\", \"value\", \"as\", \"max_val\",\n                      \"REDUCE\", \"min\", \"1\", \"value\", \"as\", \"min_val\",\n                  \"SORTBY\", \"1\", \"count\"});\n  // clang-format on\n\n  EXPECT_THAT(resp, ErrArg(\"SORTBY field name 'count' must start with '@'\"));\n}\n\nTEST_F(SearchFamilyTest, AggregateLoadGroupBy) {\n  for (size_t i = 0; i < 101; i++) {  // 51 even, 50 odd\n    Run({\"hset\", absl::StrCat(\"k\", i), \"even\", (i % 2 == 0) ? \"true\" : \"false\", \"value\",\n         absl::StrCat(i)});\n  }\n  Run({\"ft.create\", \"i1\", \"schema\", \"value\", \"numeric\", \"sortable\"});\n\n  // clang-format off\n  auto resp = Run({\"ft.aggregate\", \"i1\", \"*\",\n                  \"LOAD\", \"1\", \"even\",\n                  \"GROUPBY\", \"1\", \"@even\"});\n  // clang-format on\n\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"even\", \"false\"), IsMap(\"even\", \"true\")));\n}\n\nTEST_F(SearchFamilyTest, AggregateLoad) {\n  Run({\"hset\", \"key:1\", \"word\", \"item1\", \"foo\", \"10\"});\n  Run({\"hset\", \"key:2\", \"word\", \"item2\", \"foo\", \"20\"});\n  Run({\"hset\", \"key:3\", \"word\", \"item1\", \"foo\", \"30\"});\n\n  auto resp = Run({\"ft.create\", \"index\", \"ON\", \"HASH\", \"SCHEMA\", \"word\", \"TAG\", \"foo\", \"NUMERIC\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // ft.aggregate index \"*\" LOAD 1 @word LOAD 1 @foo\n  resp = Run({\"ft.aggregate\", \"index\", \"*\", \"LOAD\", \"1\", \"@word\", \"LOAD\", \"1\", \"@foo\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"word\", \"item1\", \"foo\", \"30\"),\n                                         IsMap(\"word\", \"item2\", \"foo\", \"20\"),\n                                         IsMap(\"word\", \"item1\", \"foo\", \"10\")));\n\n  // ft.aggregate index \"*\" GROUPBY 1 @word REDUCE SUM 1 @foo AS foo_total LOAD 1 foo_total\n  resp = Run({\"ft.aggregate\", \"index\", \"*\", \"GROUPBY\", \"1\", \"@word\", \"REDUCE\", \"SUM\", \"1\", \"@foo\",\n              \"AS\", \"foo_total\", \"LOAD\", \"1\", \"foo_total\"});\n  EXPECT_THAT(resp, ErrArg(\"LOAD cannot be applied after projectors or reducers\"));\n}\n\nTEST_F(SearchFamilyTest, Vector) {\n  auto resp = Run({\"ft.create\", \"ann\", \"ON\", \"HASH\", \"SCHEMA\", \"vector\", \"VECTOR\", \"HNSW\", \"8\",\n                   \"TYPE\", \"FLOAT32\", \"DIM\", \"100\", \"distance_metric\", \"cosine\", \"M\", \"64\"});\n  EXPECT_EQ(resp, \"OK\");\n}\n\nTEST_F(SearchFamilyTest, EscapedSymbols) {\n  Run({\"ft.create\", \"i1\", \"ON\", \"HASH\", \"SCHEMA\", \"color\", \"tag\"});\n\n  // TODO ',' is separator, we need to check should next request work or not\n  // In redis it works for JSON but not for HASH\n  // Run({\"hset\", \"i1\", \"color\", R\"(blue,1\\$+)\"});\n  // EXPECT_THAT(Run({\"ft.search\", \"i1\", R\"(@color:{blue\\,1\\\\\\$\\+})\"}), AreDocIds(\"i1\"));\n  // EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue}\"}), kNoResults);\n\n  Run({\"hset\", \"i1\", \"color\", \"blue.1\\\"%=\"});\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue\\\\.1\\\\\\\"\\\\%\\\\=}\"}), AreDocIds(\"i1\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue}\"}), kNoResults);\n\n  Run({\"hset\", \"i1\", \"color\", \"blue<1'^~\"});\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue\\\\<1\\\\'\\\\^\\\\~}\"}), AreDocIds(\"i1\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue}\"}), kNoResults);\n\n  Run({\"hset\", \"i1\", \"color\", \"blue>1:&/\"});\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue\\\\>1\\\\:\\\\&\\\\/}\"}), AreDocIds(\"i1\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue}\"}), kNoResults);\n\n  Run({\"hset\", \"i1\", \"color\", \"blue{1;* \"});\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue\\\\{1\\\\;\\\\*\\\\ }\"}), AreDocIds(\"i1\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue}\"}), kNoResults);\n\n  Run({\"hset\", \"i1\", \"color\", \"blue}1!(\"});\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue\\\\}1\\\\!\\\\(}\"}), AreDocIds(\"i1\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue}\"}), kNoResults);\n\n  Run({\"hset\", \"i1\", \"color\", \"blue[1@)\"});\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue\\\\[1\\\\@\\\\)}\"}), AreDocIds(\"i1\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue}\"}), kNoResults);\n\n  Run({\"hset\", \"i1\", \"color\", \"blue]1#-\"});\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue\\\\]1\\\\#\\\\-}\"}), AreDocIds(\"i1\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@color:{blue}\"}), kNoResults);\n}\n\nTEST_F(SearchFamilyTest, FlushSearchIndices) {\n  auto resp =\n      Run({\"FT.CREATE\", \"json\", \"ON\", \"JSON\", \"SCHEMA\", \"$.nested.value\", \"AS\", \"value\", \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  EXPECT_EQ(Run({\"FLUSHALL\"}), \"OK\");\n\n  // Test that the index was removed\n  resp = Run({\"FT.CREATE\", \"json\", \"ON\", \"JSON\", \"SCHEMA\", \"$.another.nested.value\", \"AS\", \"value\",\n              \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  EXPECT_EQ(Run({\"FLUSHDB\"}), \"OK\");\n\n  // Test that the index was removed\n  resp = Run({\"FT.CREATE\", \"json\", \"ON\", \"JSON\", \"SCHEMA\", \"$.another.nested.value\", \"AS\", \"value\",\n              \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  EXPECT_EQ(Run({\"select\", \"1\"}), \"OK\");\n  EXPECT_EQ(Run({\"FLUSHDB\"}), \"OK\");\n  EXPECT_EQ(Run({\"select\", \"0\"}), \"OK\");\n\n  // Test that index was not removed\n  resp = Run({\"FT.CREATE\", \"json\", \"ON\", \"JSON\", \"SCHEMA\", \"$.another.nested.value\", \"AS\", \"value\",\n              \"TEXT\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR Index already exists\"));\n}\n\nTEST_F(SearchFamilyTest, AggregateWithLoadOptionHard) {\n  // Test HASH\n  Run({\"HSET\", \"h1\", \"word\", \"item1\", \"foo\", \"10\", \"text\", \"first key\"});\n  Run({\"HSET\", \"h2\", \"word\", \"item2\", \"foo\", \"20\", \"text\", \"second key\"});\n\n  auto resp = Run(\n      {\"FT.CREATE\", \"i1\", \"ON\", \"HASH\", \"SCHEMA\", \"word\", \"TAG\", \"foo\", \"NUMERIC\", \"text\", \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.AGGREGATE\", \"i1\", \"*\", \"LOAD\", \"2\", \"foo\", \"text\", \"GROUPBY\", \"2\", \"@word\",\n              \"@text\", \"REDUCE\", \"SUM\", \"1\", \"@foo\", \"AS\", \"foo_total\"});\n  EXPECT_THAT(resp,\n              IsUnordArrayWithSize(IsMap(\"foo_total\", \"20\", \"word\", \"item2\", \"text\", \"second key\"),\n                                   IsMap(\"foo_total\", \"10\", \"word\", \"item1\", \"text\", \"first key\")));\n\n  resp = Run({\"FT.AGGREGATE\", \"i1\", \"*\", \"LOAD\", \"1\", \"@word\", \"GROUPBY\", \"1\", \"@word\", \"REDUCE\",\n              \"SUM\", \"1\", \"@foo\", \"AS\", \"foo_total\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"foo_total\", \"20\", \"word\", \"item2\"),\n                                         IsMap(\"foo_total\", \"10\", \"word\", \"item1\")));\n\n  resp = Run({\"FT.CREATE\", \"i2\", \"ON\", \"JSON\", \"SCHEMA\", \"$.word\", \"AS\", \"word\", \"TAG\", \"$.foo\",\n              \"AS\", \"foo\", \"NUMERIC\", \"$.text\", \"AS\", \"text\", \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Test JSON\n  Run({\"JSON.SET\", \"j1\", \".\", R\"({\"word\":\"item1\",\"foo\":10,\"text\":\"first key\"})\"});\n  Run({\"JSON.SET\", \"j2\", \".\", R\"({\"word\":\"item2\",\"foo\":20,\"text\":\"second key\"})\"});\n\n  resp = Run({\"FT.AGGREGATE\", \"i2\", \"*\", \"LOAD\", \"2\", \"foo\", \"text\", \"GROUPBY\", \"2\", \"@word\",\n              \"@text\", \"REDUCE\", \"SUM\", \"1\", \"@foo\", \"AS\", \"foo_total\"});\n  EXPECT_THAT(resp,\n              IsUnordArrayWithSize(IsMap(\"foo_total\", \"20\", \"word\", \"item2\", \"text\", \"second key\"),\n                                   IsMap(\"foo_total\", \"10\", \"word\", \"item1\", \"text\", \"first key\")));\n\n  resp = Run({\"FT.AGGREGATE\", \"i2\", \"*\", \"LOAD\", \"1\", \"@word\", \"GROUPBY\", \"1\", \"@word\", \"REDUCE\",\n              \"SUM\", \"1\", \"@foo\", \"AS\", \"foo_total\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"foo_total\", \"20\", \"word\", \"item2\"),\n                                         IsMap(\"foo_total\", \"10\", \"word\", \"item1\")));\n}\n\nTEST_F(SearchFamilyTest, WrongFieldTypeJson) {\n  EXPECT_EQ(Run({\"FT.CREATE\", \"i1\", \"ON\", \"JSON\", \"SCHEMA\", \"$.value\", \"AS\", \"value\", \"NUMERIC\",\n                 \"SORTABLE\"}),\n            \"OK\");\n\n  EXPECT_EQ(Run({\"FT.CREATE\", \"i2\", \"ON\", \"JSON\", \"SCHEMA\", \"$.value\", \"AS\", \"value\", \"NUMERIC\"}),\n            \"OK\");\n\n  auto resp =\n      Run({\"FT.CREATE\", \"i3\", \"ON\", \"JSON\", \"SCHEMA\", \"$.arr[*].id\", \"AS\", \"id\", \"NUMERIC\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"i4\", \"ON\", \"JSON\", \"SCHEMA\", \"$.arr[*].id\", \"AS\", \"id\", \"NUMERIC\",\n              \"SORTABLE\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Test simple\n  Run({\"JSON.SET\", \"j1\", \".\", R\"({\"value\":\"one\"})\"});\n  Run({\"JSON.SET\", \"j2\", \".\", R\"({\"value\":1})\"});\n\n  resp = Run({\"FT.SEARCH\", \"i1\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j2\"));\n\n  resp = Run({\"FT.AGGREGATE\", \"i1\", \"*\", \"LOAD\", \"1\", \"$.value\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"$.value\", \"1\")));\n\n  // Test with two fields. One is loading\n  Run({\"JSON.SET\", \"j3\", \".\", R\"({\"value\":\"two\",\"another_value\":1})\"});\n  Run({\"JSON.SET\", \"j4\", \".\", R\"({\"value\":2,\"another_value\":2})\"});\n\n  absl::FlagSaver fs;\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, false);\n  resp = Run({\"FT.AGGREGATE\", \"i2\", \"*\", \"LOAD\", \"2\", \"$.value\", \"$.another_value\", \"GROUPBY\", \"2\",\n              \"$.value\", \"$.another_value\", \"REDUCE\", \"COUNT\", \"0\", \"AS\", \"count\"});\n  EXPECT_THAT(resp,\n              IsUnordArrayWithSize(\n                  IsMap(\"$.value\", \"1\", \"$.another_value\", ArgType(RespExpr::NIL), \"count\", \"1\"),\n                  IsMap(\"$.value\", \"2\", \"$.another_value\", \"2\", \"count\", \"1\")));\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, true);\n\n  resp = Run({\"FT.AGGREGATE\", \"i2\", \"*\", \"LOAD\", \"2\", \"$.value\", \"$.another_value\", \"GROUPBY\", \"2\",\n              \"$.value\", \"$.another_value\", \"REDUCE\", \"COUNT\", \"0\", \"AS\", \"count\"});\n  EXPECT_THAT(resp, ErrArg(\"bad arguments: Field name should start with '@'\"));\n\n  // Test multiple field values\n  Run({\"JSON.SET\", \"j5\", \".\", R\"({\"arr\":[{\"id\":1},{\"id\":\"two\"}]})\"});\n  Run({\"JSON.SET\", \"j6\", \".\", R\"({\"arr\":[{\"id\":1},{\"id\":2}]})\"});\n  Run({\"JSON.SET\", \"j7\", \".\", R\"({\"arr\":[]})\"});\n\n  resp = Run({\"FT.SEARCH\", \"i3\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j1\", \"j2\", \"j3\", \"j4\", \"j6\", \"j7\"));  // Only j5 fails\n\n  resp = Run({\"FT.SEARCH\", \"i4\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j1\", \"j2\", \"j3\", \"j4\", \"j6\", \"j7\"));  // Only j5 fails\n}\n\nTEST_F(SearchFamilyTest, WrongFieldTypeHash) {\n  EXPECT_EQ(Run({\"FT.CREATE\", \"i1\", \"ON\", \"HASH\", \"SCHEMA\", \"value\", \"NUMERIC\", \"SORTABLE\"}), \"OK\");\n  EXPECT_EQ(Run({\"FT.CREATE\", \"i2\", \"ON\", \"HASH\", \"SCHEMA\", \"value\", \"NUMERIC\"}), \"OK\");\n\n  // Test simple\n  Run({\"HSET\", \"h1\", \"value\", \"one\"});\n  Run({\"HSET\", \"h2\", \"value\", \"1\"});\n\n  auto resp = Run({\"FT.SEARCH\", \"i1\", \"*\"});\n  EXPECT_THAT(resp, IsMapWithSize(\"h2\", IsMap(\"value\", \"1\")));\n\n  resp = Run({\"FT.AGGREGATE\", \"i1\", \"*\", \"LOAD\", \"1\", \"@value\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"value\", \"1\")));\n\n  // Test with two fields. One is loading\n  Run({\"HSET\", \"h3\", \"value\", \"two\", \"another_value\", \"1\"});\n  Run({\"HSET\", \"h4\", \"value\", \"2\", \"another_value\", \"2\"});\n\n  resp = Run({\"FT.SEARCH\", \"i2\", \"*\", \"LOAD\", \"1\", \"@another_value\"});\n  EXPECT_THAT(resp, IsMapWithSize(\"h2\", IsMap(\"value\", \"1\"), \"h4\",\n                                  IsMap(\"value\", \"2\", \"another_value\", \"2\")));\n\n  resp = Run({\"FT.AGGREGATE\", \"i2\", \"*\", \"LOAD\", \"2\", \"@value\", \"@another_value\", \"GROUPBY\", \"2\",\n              \"@value\", \"@another_value\", \"REDUCE\", \"COUNT\", \"0\", \"AS\", \"count\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(\n                        IsMap(\"value\", \"1\", \"another_value\", ArgType(RespExpr::NIL), \"count\", \"1\"),\n                        IsMap(\"value\", \"2\", \"another_value\", \"2\", \"count\", \"1\")));\n}\n\nTEST_F(SearchFamilyTest, WrongFieldTypeHardJson) {\n  auto resp = Run({\"FT.CREATE\", \"i1\", \"ON\", \"JSON\", \"SCHEMA\", \"$.data\", \"AS\", \"data\", \"NUMERIC\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run(\n      {\"FT.CREATE\", \"i2\", \"ON\", \"JSON\", \"SCHEMA\", \"$.data\", \"AS\", \"data\", \"NUMERIC\", \"SORTABLE\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"i3\", \"ON\", \"JSON\", \"SCHEMA\", \"$.data\", \"AS\", \"data\", \"TAG\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp =\n      Run({\"FT.CREATE\", \"i4\", \"ON\", \"JSON\", \"SCHEMA\", \"$.data\", \"AS\", \"data\", \"TAG\", \"SORTABLE\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"i5\", \"ON\", \"JSON\", \"SCHEMA\", \"$.data\", \"AS\", \"data\", \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp =\n      Run({\"FT.CREATE\", \"i6\", \"ON\", \"JSON\", \"SCHEMA\", \"$.data\", \"AS\", \"data\", \"TEXT\", \"SORTABLE\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"i7\", \"ON\", \"JSON\", \"SCHEMA\", \"$.data\", \"AS\", \"data\", \"VECTOR\", \"FLAT\",\n              \"6\", \"TYPE\", \"FLOAT32\", \"DIM\", \"3\", \"DISTANCE_METRIC\", \"L2\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  Run({\"JSON.SET\", \"j1\", \".\", R\"({\"data\":1,\"name\":\"doc_with_int\"})\"});\n  Run({\"JSON.SET\", \"j2\", \".\", R\"({\"data\":\"1\",\"name\":\"doc_with_int_as_string\"})\"});\n  Run({\"JSON.SET\", \"j3\", \".\", R\"({\"data\":\"string\",\"name\":\"doc_with_string\"})\"});\n  Run({\"JSON.SET\", \"j4\", \".\",\n       R\"({\"data\":[\"first\", \"second\", \"third\"],\"name\":\"doc_with_strings\"})\"});\n  Run({\"JSON.SET\", \"j5\", \".\", R\"({\"name\":\"no_data\"})\"});\n  Run({\"JSON.SET\", \"j6\", \".\", R\"({\"data\":[5,4,3],\"name\":\"doc_with_vector\"})\"});\n  Run({\"JSON.SET\", \"j7\", \".\", R\"({\"data\":\"[5,4,3]\",\"name\":\"doc_with_vector_as_string\"})\"});\n  Run({\"JSON.SET\", \"j8\", \".\", R\"({\"data\":null,\"name\":\"doc_with_null\"})\"});\n  Run({\"JSON.SET\", \"j9\", \".\", R\"({\"data\":[null, null, null],\"name\":\"doc_with_nulls\"})\"});\n  Run({\"JSON.SET\", \"j10\", \".\", R\"({\"data\":true,\"name\":\"doc_with_boolean\"})\"});\n  Run({\"JSON.SET\", \"j11\", \".\", R\"({\"data\":[true, false, true],\"name\":\"doc_with_booleans\"})\"});\n\n  resp = Run({\"FT.SEARCH\", \"i1\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j1\", \"j5\", \"j6\", \"j8\", \"j9\"));\n\n  resp = Run({\"FT.SEARCH\", \"i2\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j1\", \"j5\", \"j6\", \"j8\", \"j9\"));\n\n  resp = Run({\"FT.SEARCH\", \"i3\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j2\", \"j3\", \"j4\", \"j5\", \"j7\", \"j8\", \"j9\", \"j10\", \"j11\"));\n\n  resp = Run({\"FT.SEARCH\", \"i4\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j2\", \"j3\", \"j4\", \"j5\", \"j7\", \"j8\", \"j9\", \"j10\", \"j11\"));\n\n  resp = Run({\"FT.SEARCH\", \"i5\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j2\", \"j3\", \"j4\", \"j5\", \"j7\", \"j8\", \"j9\"));\n\n  resp = Run({\"FT.SEARCH\", \"i6\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j2\", \"j3\", \"j4\", \"j5\", \"j7\", \"j8\", \"j9\"));\n\n  resp = Run({\"FT.SEARCH\", \"i7\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j5\", \"j6\", \"j8\"));\n}\n\nTEST_F(SearchFamilyTest, WrongFieldTypeHardHash) {\n  auto resp = Run({\"FT.CREATE\", \"i1\", \"ON\", \"HASH\", \"SCHEMA\", \"data\", \"NUMERIC\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"i2\", \"ON\", \"HASH\", \"SCHEMA\", \"data\", \"NUMERIC\", \"SORTABLE\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"i3\", \"ON\", \"HASH\", \"SCHEMA\", \"data\", \"TAG\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"i4\", \"ON\", \"HASH\", \"SCHEMA\", \"data\", \"TAG\", \"SORTABLE\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"i5\", \"ON\", \"HASH\", \"SCHEMA\", \"data\", \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"i6\", \"ON\", \"HASH\", \"SCHEMA\", \"data\", \"TEXT\", \"SORTABLE\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"i7\", \"ON\", \"HASH\", \"SCHEMA\", \"data\", \"VECTOR\", \"FLAT\", \"6\", \"TYPE\",\n              \"FLOAT32\", \"DIM\", \"3\", \"DISTANCE_METRIC\", \"L2\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  Run({\"HSET\", \"j1\", \"data\", \"1\", \"name\", \"doc_with_int\"});\n  Run({\"HSET\", \"j2\", \"data\", \"1\", \"name\", \"doc_with_int_as_string\"});\n  Run({\"HSET\", \"j3\", \"data\", \"string\", \"name\", \"doc_with_string\"});\n  Run({\"HSET\", \"j4\", \"name\", \"no_data\"});\n  Run({\"HSET\", \"j5\", \"data\", \"5,4,3\", \"name\", \"doc_with_fake_vector\"});\n  Run({\"HSET\", \"j6\", \"data\", \"[5,4,3]\", \"name\", \"doc_with_fake_vector_as_string\"});\n\n  // Vector [1, 2, 3]\n  std::string vector = std::string(\"\\x3f\\x80\\x00\\x00\\x40\\x00\\x00\\x00\\x40\\x40\\x00\\x00\", 12);\n  Run({\"HSET\", \"j7\", \"data\", vector, \"name\", \"doc_with_vector [1, 2, 3]\"});\n\n  resp = Run({\"FT.SEARCH\", \"i1\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j2\", \"j1\", \"j4\"));\n\n  resp = Run({\"FT.SEARCH\", \"i2\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j2\", \"j1\", \"j4\"));\n\n  resp = Run({\"FT.SEARCH\", \"i3\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j2\", \"j7\", \"j3\", \"j6\", \"j1\", \"j4\", \"j5\"));\n\n  resp = Run({\"FT.SEARCH\", \"i4\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j2\", \"j7\", \"j3\", \"j6\", \"j1\", \"j4\", \"j5\"));\n\n  resp = Run({\"FT.SEARCH\", \"i5\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j4\", \"j2\", \"j7\", \"j3\", \"j6\", \"j1\", \"j5\"));\n\n  resp = Run({\"FT.SEARCH\", \"i6\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j4\", \"j2\", \"j7\", \"j3\", \"j6\", \"j1\", \"j5\"));\n\n  resp = Run({\"FT.SEARCH\", \"i7\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j4\", \"j7\"));\n}\n\nTEST_F(SearchFamilyTest, WrongVectorFieldType) {\n  auto resp =\n      Run({\"FT.CREATE\", \"index\", \"ON\", \"JSON\", \"SCHEMA\", \"$.vector_field\", \"AS\", \"vector_field\",\n           \"VECTOR\", \"FLAT\", \"6\", \"TYPE\", \"FLOAT32\", \"DIM\", \"3\", \"DISTANCE_METRIC\", \"L2\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  Run({\"JSON.SET\", \"j1\", \".\",\n       R\"({\"vector_field\": [0.1, 0.2, 0.3], \"name\": \"doc_with_correct_dim\"})\"});\n  Run({\"JSON.SET\", \"j2\", \".\", R\"({\"vector_field\": [0.1, 0.2], \"name\": \"doc_with_small_dim\"})\"});\n  Run({\"JSON.SET\", \"j3\", \".\",\n       R\"({\"vector_field\": [0.1, 0.2, 0.3, 0.4], \"name\": \"doc_with_large_dim\"})\"});\n  Run({\"JSON.SET\", \"j4\", \".\", R\"({\"vector_field\": [1, 2, 3], \"name\": \"doc_with_int_values\"})\"});\n  Run({\"JSON.SET\", \"j5\", \".\",\n       R\"({\"vector_field\":\"not_vector\", \"name\":\"doc_with_incorrect_field_type\"})\"});\n  Run({\"JSON.SET\", \"j6\", \".\", R\"({\"name\":\"doc_with_no_field\"})\"});\n  Run({\"JSON.SET\", \"j7\", \".\",\n       R\"({\"vector_field\": [999999999999999999999999999999999999999, -999999999999999999999999999999999999999, 500000000000000000000000000000000000000], \"name\": \"doc_with_out_of_range_values\"})\"});\n  Run({\"JSON.SET\", \"j8\", \".\", R\"({\"vector_field\":null, \"name\": \"doc_with_null\"})\"});\n  Run({\"JSON.SET\", \"j9\", \".\", R\"({\"vector_field\":[null, null, null], \"name\": \"doc_with_nulls\"})\"});\n  Run({\"JSON.SET\", \"j10\", \".\", R\"({\"vector_field\":true, \"name\": \"doc_with_boolean\"})\"});\n  Run({\"JSON.SET\", \"j11\", \".\",\n       R\"({\"vector_field\":[true, false, true], \"name\": \"doc_with_booleans\"})\"});\n  Run({\"JSON.SET\", \"j12\", \".\", R\"({\"vector_field\":1, \"name\": \"doc_with_int\"})\"});\n\n  resp = Run({\"FT.SEARCH\", \"index\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j6\", \"j7\", \"j1\", \"j4\", \"j8\"));\n}\n\n// Test that FT.AGGREGATE prints only needed fields\nTEST_F(SearchFamilyTest, AggregateResultFields) {\n  auto resp = Run({\"FT.CREATE\", \"i1\", \"ON\", \"JSON\", \"SCHEMA\", \"$.a\", \"AS\", \"a\", \"TEXT\", \"SORTABLE\",\n                   \"$.b\", \"AS\", \"b\", \"TEXT\", \"$.c\", \"AS\", \"c\", \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.CREATE\", \"i2\", \"ON\", \"JSON\", \"SCHEMA\", \"$.id\", \"AS\", \"id\", \"NUMERIC\", \"$.number\",\n              \"AS\", \"number\", \"NUMERIC\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  Run({\"JSON.SET\", \"j1\", \".\", R\"({\"a\":\"1\",\"b\":\"2\",\"c\":\"3\"})\"});\n  Run({\"JSON.SET\", \"j2\", \".\", R\"({\"a\":\"4\",\"b\":\"5\",\"c\":\"6\"})\"});\n  Run({\"JSON.SET\", \"j3\", \".\", R\"({\"a\":\"7\",\"b\":\"8\",\"c\":\"9\"})\"});\n\n  resp = Run({\"FT.AGGREGATE\", \"i1\", \"*\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(), IsMap(), IsMap()));\n\n  absl::FlagSaver fs;\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, false);\n  resp = Run({\"FT.AGGREGATE\", \"i1\", \"*\", \"SORTBY\", \"1\", \"a\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"a\", \"1\"), IsMap(\"a\", \"4\"), IsMap(\"a\", \"7\")));\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, true);\n  resp = Run({\"FT.AGGREGATE\", \"i1\", \"*\", \"SORTBY\", \"1\", \"a\"});\n  EXPECT_THAT(resp, ErrArg(\"SORTBY field name 'a' must start with '@'\"));\n\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, false);\n  resp = Run({\"FT.AGGREGATE\", \"i1\", \"*\", \"LOAD\", \"1\", \"@b\", \"SORTBY\", \"1\", \"a\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"b\", \"2\", \"a\", \"1\"), IsMap(\"b\", \"5\", \"a\", \"4\"),\n                                         IsMap(\"b\", \"8\", \"a\", \"7\")));\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, true);\n  resp = Run({\"FT.AGGREGATE\", \"i1\", \"*\", \"LOAD\", \"1\", \"@b\", \"SORTBY\", \"1\", \"a\"});\n  EXPECT_THAT(resp, ErrArg(\"SORTBY field name 'a' must start with '@'\"));\n\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, false);\n  resp = Run({\"FT.AGGREGATE\", \"i1\", \"*\", \"SORTBY\", \"1\", \"a\", \"GROUPBY\", \"2\", \"@b\", \"@a\", \"REDUCE\",\n              \"COUNT\", \"0\", \"AS\", \"count\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"b\", \"8\", \"a\", \"7\", \"count\", \"1\"),\n                                         IsMap(\"b\", \"2\", \"a\", \"1\", \"count\", \"1\"),\n                                         IsMap(\"b\", \"5\", \"a\", \"4\", \"count\", \"1\")));\n  absl::SetFlag(&FLAGS_search_reject_legacy_field, true);\n  resp = Run({\"FT.AGGREGATE\", \"i1\", \"*\", \"SORTBY\", \"1\", \"a\", \"GROUPBY\", \"2\", \"@b\", \"@a\", \"REDUCE\",\n              \"COUNT\", \"0\", \"AS\", \"count\"});\n  EXPECT_THAT(resp, ErrArg(\"SORTBY field name 'a' must start with '@'\"));\n\n  Run({\"JSON.SET\", \"j4\", \".\", R\"({\"id\":1, \"number\":4})\"});\n  Run({\"JSON.SET\", \"j5\", \".\", R\"({\"id\":2})\"});\n\n  resp = Run({\"FT.AGGREGATE\", \"i2\", \"*\", \"LOAD\", \"2\", \"@id\", \"@number\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"id\", \"1\", \"number\", \"4\"), IsMap(\"id\", \"2\"), IsMap(),\n                                         IsMap(), IsMap()));\n}\n\nTEST_F(SearchFamilyTest, AggregateSortByJson) {\n  Run({\"FT.CREATE\", \"index\", \"ON\", \"JSON\", \"SCHEMA\", \"$.name\", \"AS\", \"name\", \"TEXT\", \"$.number\",\n       \"AS\", \"number\", \"NUMERIC\", \"$.group\", \"AS\", \"group\", \"TAG\"});\n  Run({\"JSON.SET\", \"j1\", \"$\", R\"({\"name\": \"first\", \"number\": 1200, \"group\": \"first\"})\"});\n  Run({\"JSON.SET\", \"j2\", \"$\", R\"({\"name\": \"second\", \"number\": 800, \"group\": \"first\"})\"});\n  Run({\"JSON.SET\", \"j3\", \"$\", R\"({\"name\": \"third\", \"number\": 300, \"group\": \"first\"})\"});\n  Run({\"JSON.SET\", \"j4\", \"$\", R\"({\"name\": \"fourth\", \"number\": 400, \"group\": \"second\"})\"});\n  Run({\"JSON.SET\", \"j5\", \"$\", R\"({\"name\": \"fifth\", \"number\": 900, \"group\": \"second\"})\"});\n  Run({\"JSON.SET\", \"j6\", \"$\", R\"({\"name\": \"sixth\", \"number\": 300, \"group\": \"first\"})\"});\n  Run({\"JSON.SET\", \"j7\", \"$\", R\"({\"name\": \"seventh\", \"number\": 400, \"group\": \"second\"})\"});\n  Run({\"JSON.SET\", \"j8\", \"$\", R\"({\"name\": \"eighth\", \"group\": \"first\"})\"});\n  Run({\"JSON.SET\", \"j9\", \"$\", R\"({\"name\": \"ninth\", \"group\": \"second\"})\"});\n\n  // Test sorting by name (DESC) and number (ASC)\n  auto resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"4\", \"@name\", \"DESC\", \"@number\", \"ASC\"});\n  EXPECT_THAT(\n      resp, IsUnordArrayWithSize(\n                IsMap(\"name\", \"third\", \"number\", \"300\"), IsMap(\"name\", \"sixth\", \"number\", \"300\"),\n                IsMap(\"name\", \"seventh\", \"number\", \"400\"), IsMap(\"name\", \"second\", \"number\", \"800\"),\n                IsMap(\"name\", \"ninth\"), IsMap(\"name\", \"fourth\", \"number\", \"400\"),\n                IsMap(\"name\", \"first\", \"number\", \"1200\"), IsMap(\"name\", \"fifth\", \"number\", \"900\"),\n                IsMap(\"name\", \"eighth\")));\n\n  // Test sorting by name (ASC) and number (DESC)\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"4\", \"@name\", \"ASC\", \"@number\", \"DESC\"});\n  EXPECT_THAT(\n      resp, IsUnordArrayWithSize(\n                IsMap(\"name\", \"eighth\"), IsMap(\"name\", \"fifth\", \"number\", \"900\"),\n                IsMap(\"name\", \"first\", \"number\", \"1200\"), IsMap(\"name\", \"fourth\", \"number\", \"400\"),\n                IsMap(\"name\", \"ninth\"), IsMap(\"name\", \"second\", \"number\", \"800\"),\n                IsMap(\"name\", \"seventh\", \"number\", \"400\"), IsMap(\"name\", \"sixth\", \"number\", \"300\"),\n                IsMap(\"name\", \"third\", \"number\", \"300\")));\n\n  // Test sorting by group (ASC), number (DESC), and name\n  resp = Run(\n      {\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"5\", \"@group\", \"ASC\", \"@number\", \"DESC\", \"@name\"});\n  EXPECT_THAT(resp,\n              IsUnordArrayWithSize(IsMap(\"group\", \"first\", \"number\", \"1200\", \"name\", \"first\"),\n                                   IsMap(\"group\", \"first\", \"number\", \"800\", \"name\", \"second\"),\n                                   IsMap(\"group\", \"first\", \"number\", \"300\", \"name\", \"sixth\"),\n                                   IsMap(\"group\", \"first\", \"number\", \"300\", \"name\", \"third\"),\n                                   IsMap(\"group\", \"first\", \"name\", \"eighth\"),\n                                   IsMap(\"group\", \"second\", \"number\", \"900\", \"name\", \"fifth\"),\n                                   IsMap(\"group\", \"second\", \"number\", \"400\", \"name\", \"fourth\"),\n                                   IsMap(\"group\", \"second\", \"number\", \"400\", \"name\", \"seventh\"),\n                                   IsMap(\"group\", \"second\", \"name\", \"ninth\")));\n\n  // Test sorting by number (ASC), group (DESC), and name\n  resp = Run(\n      {\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"5\", \"@number\", \"ASC\", \"@group\", \"DESC\", \"@name\"});\n  EXPECT_THAT(resp,\n              IsUnordArrayWithSize(IsMap(\"number\", \"300\", \"group\", \"first\", \"name\", \"sixth\"),\n                                   IsMap(\"number\", \"300\", \"group\", \"first\", \"name\", \"third\"),\n                                   IsMap(\"number\", \"400\", \"group\", \"second\", \"name\", \"fourth\"),\n                                   IsMap(\"number\", \"400\", \"group\", \"second\", \"name\", \"seventh\"),\n                                   IsMap(\"number\", \"800\", \"group\", \"first\", \"name\", \"second\"),\n                                   IsMap(\"number\", \"900\", \"group\", \"second\", \"name\", \"fifth\"),\n                                   IsMap(\"number\", \"1200\", \"group\", \"first\", \"name\", \"first\"),\n                                   IsMap(\"group\", \"second\", \"name\", \"ninth\"),\n                                   IsMap(\"group\", \"first\", \"name\", \"eighth\")));\n\n  // Test sorting with MAX 3\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"1\", \"@number\", \"MAX\", \"3\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"number\", \"300\"), IsMap(\"number\", \"300\"),\n                                         IsMap(\"number\", \"400\")));\n\n  // Test sorting with MAX 3\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"2\", \"@number\", \"DESC\", \"MAX\", \"3\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"number\", \"1200\"), IsMap(\"number\", \"900\"),\n                                         IsMap(\"number\", \"800\")));\n\n  // Test sorting by number (ASC) with MAX 999\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"1\", \"@number\", \"MAX\", \"999\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"number\", \"300\"), IsMap(\"number\", \"300\"),\n                                         IsMap(\"number\", \"400\"), IsMap(\"number\", \"400\"),\n                                         IsMap(\"number\", \"800\"), IsMap(\"number\", \"900\"),\n                                         IsMap(\"number\", \"1200\"), IsMap(), IsMap()));\n\n  // Test sorting by name and number (DESC)\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"3\", \"@name\", \"@number\", \"DESC\"});\n  EXPECT_THAT(\n      resp, IsUnordArrayWithSize(\n                IsMap(\"name\", \"eighth\"), IsMap(\"name\", \"fifth\", \"number\", \"900\"),\n                IsMap(\"name\", \"first\", \"number\", \"1200\"), IsMap(\"name\", \"fourth\", \"number\", \"400\"),\n                IsMap(\"name\", \"ninth\"), IsMap(\"name\", \"second\", \"number\", \"800\"),\n                IsMap(\"name\", \"seventh\", \"number\", \"400\"), IsMap(\"name\", \"sixth\", \"number\", \"300\"),\n                IsMap(\"name\", \"third\", \"number\", \"300\")));\n\n  // Test SORTBY with MAX, GROUPBY, and REDUCE COUNT\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"1\", \"@name\", \"MAX\", \"3\", \"GROUPBY\", \"1\",\n              \"@number\", \"REDUCE\", \"COUNT\", \"0\", \"AS\", \"count\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"number\", \"900\", \"count\", \"1\"),\n                                         IsMap(\"number\", ArgType(RespExpr::NIL), \"count\", \"1\"),\n                                         IsMap(\"number\", \"1200\", \"count\", \"1\")));\n\n  // Test SORTBY with MAX, GROUPBY (0 fields), and REDUCE COUNT\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"1\", \"@name\", \"MAX\", \"3\", \"GROUPBY\", \"0\",\n              \"REDUCE\", \"COUNT\", \"0\", \"AS\", \"count\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"count\", \"3\")));\n}\n\nTEST_F(SearchFamilyTest, AggregateSortByParsingErrors) {\n  Run({\"FT.CREATE\", \"index\", \"ON\", \"JSON\", \"SCHEMA\", \"$.name\", \"AS\", \"name\", \"TEXT\", \"$.number\",\n       \"AS\", \"number\", \"NUMERIC\", \"$.group\", \"AS\", \"group\", \"TAG\"});\n  Run({\"JSON.SET\", \"j1\", \"$\", R\"({\"name\": \"first\", \"number\": 1200, \"group\": \"first\"})\"});\n  Run({\"JSON.SET\", \"j2\", \"$\", R\"({\"name\": \"second\", \"number\": 800, \"group\": \"first\"})\"});\n  Run({\"JSON.SET\", \"j3\", \"$\", R\"({\"name\": \"third\", \"number\": 300, \"group\": \"first\"})\"});\n  Run({\"JSON.SET\", \"j4\", \"$\", R\"({\"name\": \"fourth\", \"number\": 400, \"group\": \"second\"})\"});\n  Run({\"JSON.SET\", \"j5\", \"$\", R\"({\"name\": \"fifth\", \"number\": 900, \"group\": \"second\"})\"});\n  Run({\"JSON.SET\", \"j6\", \"$\", R\"({\"name\": \"sixth\", \"number\": 300, \"group\": \"first\"})\"});\n  Run({\"JSON.SET\", \"j7\", \"$\", R\"({\"name\": \"seventh\", \"number\": 400, \"group\": \"second\"})\"});\n  Run({\"JSON.SET\", \"j8\", \"$\", R\"({\"name\": \"eighth\", \"group\": \"first\"})\"});\n  Run({\"JSON.SET\", \"j9\", \"$\", R\"({\"name\": \"ninth\", \"group\": \"second\"})\"});\n\n  // Test SORTBY with invalid argument count\n  auto resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"999\", \"@name\", \"@number\", \"DESC\"});\n  EXPECT_THAT(resp, ErrArg(\"bad arguments for SORTBY: specified invalid number of strings\"));\n\n  // Test SORTBY with negative argument count\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"-3\", \"@name\", \"@number\", \"DESC\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  // Test MAX with invalid value\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"1\", \"@name\", \"MAX\", \"-10\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  // Test MAX without a value\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"1\", \"@name\", \"MAX\"});\n  EXPECT_THAT(resp, ErrArg(kSyntaxErr));\n\n  // Test SORTBY with a non-existing field\n  /* Temporary unsupported\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"1\", \"@nonexistingfield\"});\n  EXPECT_THAT(resp, ErrArg(\"Property `nonexistingfield` not loaded nor in schema\")); */\n\n  // Test SORTBY with an invalid value\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"notvalue\", \"@name\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n}\n\nTEST_F(SearchFamilyTest, AggregateSortByParsingErrorsWithoutAt) {\n  Run({\"FT.CREATE\", \"index\", \"ON\", \"JSON\", \"SCHEMA\", \"$.name\", \"AS\", \"name\", \"TEXT\", \"$.number\",\n       \"AS\", \"number\", \"NUMERIC\", \"$.group\", \"AS\", \"group\", \"TAG\"});\n\n  Run({\"JSON.SET\", \"j1\", \"$\", R\"({\"name\": \"first\", \"number\": 1200, \"group\": \"first\"})\"});\n\n  // Test SORTBY with field name without '@'\n  auto resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"1\", \"name\"});\n  EXPECT_THAT(resp, ErrArg(\"SORTBY field name 'name' must start with '@'\"));\n\n  // Test SORTBY with field name without '@' and multiple sort fields\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"3\", \"name\", \"@number\", \"DESC\"});\n  EXPECT_THAT(resp, ErrArg(\"SORTBY field name 'name' must start with '@'\"));\n\n  // Test SORTBY with field name without '@' and MAX option\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"1\", \"name\", \"MAX\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"SORTBY field name 'name' must start with '@'\"));\n\n  // Check that the old error still works for wrong number of args\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"SORTBY\", \"2\", \"@name\"});\n  EXPECT_THAT(resp, ErrArg(\"bad arguments for SORTBY: specified invalid number of strings\"));\n}\n\nTEST_F(SearchFamilyTest, InvalidSearchOptions) {\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"JSON\", \"SCHEMA\", \"$.field1\", \"AS\", \"field1\", \"TEXT\", \"$.field2\",\n       \"AS\", \"field2\", \"TEXT\"});\n\n  Run({\"JSON.SET\", \"j1\", \".\", R\"({\"field1\":\"first\",\"field2\":\"second\"})\"});\n\n  /* Test with an empty query and LOAD. TODO: Add separate test for query syntax\n  auto resp = Run({\"FT.SEARCH\", \"idx\", \"\", \"LOAD\", \"1\", \"@field1\"});\n  EXPECT_THAT(resp, IsMapWithSize()); */\n\n  // Test with LIMIT missing arguments\n  auto resp = Run({\"FT.SEARCH\", \"idx\", \"*\", \"LIMIT\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(kSyntaxErr));\n\n  // Test with LIMIT exceeding the maximum allowed value\n  resp = Run({\"FT.SEARCH\", \"idx\", \"*\", \"LIMIT\", \"0\", \"100000000000000000000\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  // Test with LIMIT and negative arguments\n  resp = Run({\"FT.SEARCH\", \"idx\", \"*\", \"LIMIT\", \"-1\", \"10\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  // Test with LIMIT and invalid argument types\n  resp = Run({\"FT.SEARCH\", \"idx\", \"*\", \"LIMIT\", \"start\", \"count\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  // Test with invalid RETURN syntax (missing count)\n  resp = Run({\"FT.SEARCH\", \"idx\", \"*\", \"RETURN\", \"@field1\", \"@field2\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  // Test with RETURN having duplicate fields\n  resp = Run({\"FT.SEARCH\", \"idx\", \"*\", \"RETURN\", \"4\", \"field1\", \"field1\", \"field2\", \"field2\"});\n  EXPECT_THAT(resp, IsMapWithSize(\"j1\", IsMap(\"field1\", \"first\", \"field2\", \"second\")));\n\n  // Test with RETURN exceeding maximum allowed count\n  resp = Run({\"FT.SEARCH\", \"idx\", \"*\", \"RETURN\", \"100000000000000000000\", \"@field1\", \"@field2\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  // Test with NOCONTENT and RETURN\n  resp = Run({\"FT.SEARCH\", \"idx\", \"*\", \"NOCONTENT\", \"RETURN\", \"2\", \"@field1\", \"@field2\"});\n  EXPECT_THAT(resp, IsArray(IntArg(1), \"j1\"));\n}\n\nTEST_F(SearchFamilyTest, KnnSearchOptions) {\n  auto resp = Run({\"FT.CREATE\", \"my_index\", \"ON\",  \"JSON\",   \"PREFIX\",          \"1\",     \"doc:\",\n                   \"SCHEMA\",    \"$.vector\", \"AS\",  \"vector\", \"VECTOR\",          \"FLAT\",  \"6\",\n                   \"TYPE\",      \"FLOAT32\",  \"DIM\", \"4\",      \"DISTANCE_METRIC\", \"COSINE\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  Run({\"JSON.SET\", \"doc:1\", \".\", R\"({\"vector\": [0.1, 0.2, 0.3, 0.4]})\"});\n  Run({\"JSON.SET\", \"doc:2\", \".\", R\"({\"vector\": [0.5, 0.6, 0.7, 0.8]})\"});\n  Run({\"JSON.SET\", \"doc:3\", \".\", R\"({\"vector\": [0.9, 0.1, 0.4, 0.3]})\"});\n\n  std::string query_vector(\"\\x00\\x00\\x00\\x3f\\x00\\x00\\x00\\x40\\x00\\x00\\x00\\x41\\x00\\x00\\x80\\x42\", 16);\n\n  // KNN 2\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"*=>[KNN 2 @vector $query_vector]\", \"PARAMS\", \"2\",\n              \"query_vector\", query_vector});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\"));\n\n  // KNN 11929939\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"*=>[KNN 11929939 @vector $query_vector]\", \"PARAMS\", \"2\",\n              \"query_vector\", query_vector});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\"));\n\n  // KNN 11929939, LIMIT 4 2\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"*=>[KNN 11929939 @vector $query_vector]\", \"PARAMS\", \"2\",\n              \"query_vector\", query_vector, \"LIMIT\", \"4\", \"2\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  // KNN 11929939, LIMIT 0 10\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"*=>[KNN 11929939 @vector $query_vector]\", \"PARAMS\", \"2\",\n              \"query_vector\", query_vector, \"LIMIT\", \"0\", \"10\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\"));\n\n  // KNN 1, LIMIT 0 2\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"*=>[KNN 1 @vector $query_vector]\", \"PARAMS\", \"2\",\n              \"query_vector\", query_vector, \"LIMIT\", \"0\", \"2\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\"));\n\n  // Parenthesized star - used by LangChain for KNN queries (issue #6342)\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"(*)=>[KNN 2 @vector $query_vector]\", \"PARAMS\", \"2\",\n              \"query_vector\", query_vector});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\"));\n\n  // Double parenthesized star\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"((*))=>[KNN 2 @vector $query_vector]\", \"PARAMS\", \"2\",\n              \"query_vector\", query_vector});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\"));\n}\n\nTEST_F(SearchFamilyTest, KnnWithSortBy) {\n  Run({\"FT.CREATE\", \"i1\",      \"ON\",     \"JSON\", \"PREFIX\",          \"1\",    \"d:\",\n       \"SCHEMA\",    \"$.v\",     \"AS\",     \"v\",    \"VECTOR\",          \"FLAT\", \"6\",\n       \"TYPE\",      \"FLOAT32\", \"DIM\",    \"1\",    \"DISTANCE_METRIC\", \"L2\",   \"$.d\",\n       \"AS\",        \"d\",       \"NUMERIC\"});\n\n  vector<string> doc_ids(100);\n  for (size_t i = 0; i < doc_ids.size(); i++) {\n    doc_ids[i] = absl::StrCat(\"d:\", i);\n    auto v = absl::StrFormat(R\"({\"v\": [%d.0], \"d\": %d})\", i, i);\n    Run({\"JSON.SET\", doc_ids[i], \".\", v});\n  }\n\n  // We first select knn_limit closest values and then sort in REVERSE by distance\n  // on a non-sortable field. The result should be first cut off by knn_limit and then sorted\n  for (size_t knn_limit = 8; knn_limit < 47; knn_limit += 3) {\n    vector<string> expect_ids(doc_ids.begin() + knn_limit - min<size_t>(knn_limit, 10u),\n                              doc_ids.begin() + knn_limit);\n    reverse(expect_ids.begin(), expect_ids.end());\n\n    const float qpoint = 0.0f;\n    std::string q = absl::StrFormat(\"*=>[KNN %d @v $query_vector]\", knn_limit);\n    auto resp = Run({\"ft.search\", \"i1\", q, \"SORTBY\", \"d\", \"DESC\", \"PARAMS\", \"2\", \"query_vector\",\n                     FloatSV(&qpoint), \"LIMIT\", \"0\", \"10\", \"RETURN\", \"1\", \"d\"});\n    EXPECT_THAT(resp, DocIds(knn_limit, expect_ids)) << knn_limit;\n  }\n}\n\nTEST_F(SearchFamilyTest, InvalidAggregateOptions) {\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"JSON\", \"SCHEMA\", \"$.field1\", \"AS\", \"field1\", \"TEXT\", \"$.field2\",\n       \"AS\", \"field2\", \"TEXT\"});\n\n  Run({\"JSON.SET\", \"j1\", \".\", R\"({\"field1\":\"first\",\"field2\":\"second\"})\"});\n\n  // Test GROUPBY with no arguments\n  auto resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"GROUPBY\"});\n  EXPECT_THAT(resp, ErrArg(kSyntaxErr));\n\n  // Test GROUPBY with invalid count\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"GROUPBY\", \"-1\", \"@field1\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  resp =\n      Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"GROUPBY\", \"100000000000000000000\", \"@field1\", \"@field2\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  // Test REDUCE with no REDUCE function\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"GROUPBY\", \"1\", \"@field1\", \"REDUCE\"});\n  EXPECT_THAT(resp, ErrArg(\"reducer function  not found\"));\n\n  /* // Test REDUCE with COUNT function\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"GROUPBY\", \"1\", \"@field1\", \"REDUCE\", \"COUNT\", \"0\"});\n  EXPECT_THAT(resp, IsMapWithSize(\"__generated_aliascount\", \"1\", \"field1\", \"first\")); */\n\n  // Test REDUCE with invalid function\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"GROUPBY\", \"1\", \"@field1\", \"REDUCE\", \"INVALIDFUNC\", \"0\",\n              \"AS\", \"result\"});\n  EXPECT_THAT(resp, ErrArg(\"reducer function INVALIDFUNC not found\"));\n\n  // Test SORTBY with no arguments\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"SORTBY\"});\n  EXPECT_THAT(resp, ErrArg(kSyntaxErr));\n\n  // Test SORTBY with invalid count\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"SORTBY\", \"-1\", \"@field1\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"SORTBY\", \"100000000000000000000\", \"@field1\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  // Test LIMIT with invalid arguments\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"LIMIT\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(kSyntaxErr));\n\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"LIMIT\", \"-1\", \"10\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"LIMIT\", \"0\", \"100000000000000000000\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  // Test LOAD with invalid arguments\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"LOAD\", \"@field1\", \"@field2\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"LOAD\", \"-1\", \"@field1\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  resp = Run({\"FT.AGGREGATE\", \"idx\", \"*\", \"LOAD\", \"100000000000000000000\", \"@field1\", \"@field2\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n}\n\nTEST_F(SearchFamilyTest, InvalidCreateOptions) {\n  // Test with a duplicate field in the schema\n  auto resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"SCHEMA\", \"title\", \"TEXT\", \"title\", \"TEXT\"});\n  EXPECT_THAT(resp, ErrArg(\"Duplicate field in schema - title\"));\n\n  // Test with no fields in the schema\n  resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"SCHEMA\"});\n  EXPECT_THAT(resp, ErrArg(\"Fields arguments are missing\"));\n\n  // Test with an invalid field type\n  resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"SCHEMA\", \"title\", \"UNKNOWN_TYPE\"});\n  EXPECT_THAT(resp, ErrArg(\"Field type UNKNOWN_TYPE is not supported\"));\n\n  // Test with an invalid STOPWORDS argument\n  resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"STOPWORDS\", \"10\", \"the\", \"and\", \"of\", \"SCHEMA\",\n              \"title\", \"TEXT\"});\n  EXPECT_THAT(resp, ErrArg(kSyntaxErr));\n\n  resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"STOPWORDS\", \"99999999999999999999\", \"the\", \"and\",\n              \"of\", \"SCHEMA\", \"title\", \"TEXT\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"STOPWORDS\", \"-1\", \"the\", \"and\", \"of\", \"SCHEMA\",\n              \"title\", \"TEXT\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"STOPWORDS\", \"not_a_number\", \"the\", \"and\", \"of\",\n              \"SCHEMA\", \"title\", \"TEXT\"});\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n}\n\nTEST_F(SearchFamilyTest, SynonymManagement) {\n  // Create index with prefix\n  EXPECT_EQ(\n      Run({\"FT.CREATE\", \"my_idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"title\", \"TEXT\"}),\n      \"OK\");\n\n  // Add first group of synonyms\n  EXPECT_EQ(Run({\"FT.SYNUPDATE\", \"my_idx\", \"1\", \"cat\", \"feline\", \"kitty\"}), \"OK\");\n\n  // Add second group of synonyms\n  EXPECT_EQ(Run({\"FT.SYNUPDATE\", \"my_idx\", \"2\", \"kitty\", \"cute\", \"adorable\"}), \"OK\");\n\n  // Add third group of synonyms\n  EXPECT_EQ(Run({\"FT.SYNUPDATE\", \"my_idx\", \"3\", \"kitty\", \"tiger\", \"cub\"}), \"OK\");\n\n  // Check the dump output\n  auto resp = Run({\"FT.SYNDUMP\", \"my_idx\"});\n  EXPECT_THAT(resp, IsUnordArray(\"cub\", IsArray(\"3\"), \"cute\", IsArray(\"2\"), \"adorable\",\n                                 IsArray(\"2\"), \"kitty\", IsArray(\"1\", \"2\", \"3\"), \"feline\",\n                                 IsArray(\"1\"), \"tiger\", IsArray(\"3\"), \"cat\", IsArray(\"1\")));\n}\n\nTEST_F(SearchFamilyTest, SynonymsSearch) {\n  // Create search index\n  auto resp =\n      Run({\"FT.CREATE\", \"myIndex\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"title\", \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Add documents\n  EXPECT_THAT(Run({\"HSET\", \"doc:1\", \"title\", \"car\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:2\", \"title\", \"automobile\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:3\", \"title\", \"vehicle\"}), IntArg(1));\n\n  // Add synonyms \"car\" and \"automobile\" to group 1\n  resp = Run({\"FT.SYNUPDATE\", \"myIndex\", \"1\", \"car\", \"automobile\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Check synonyms list\n  resp = Run({\"FT.SYNDUMP\", \"myIndex\"});\n  ASSERT_THAT(resp, ArrLen(4));\n\n  // Search for \"car\" (should find both \"car\" and \"automobile\")\n  resp = Run({\"FT.SEARCH\", \"myIndex\", \"car\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\"));\n\n  // Search for \"automobile\" (should find both \"car\" and \"automobile\")\n  resp = Run({\"FT.SEARCH\", \"myIndex\", \"automobile\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\"));\n\n  // Add \"vehicle\" to the synonym group\n  resp = Run({\"FT.SYNUPDATE\", \"myIndex\", \"1\", \"vehicle\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Search for \"vehicle\" (should find all three documents)\n  resp = Run({\"FT.SEARCH\", \"myIndex\", \"vehicle\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\"));\n}\n\n// Test for case-insensitive synonyms\nTEST_F(SearchFamilyTest, CaseInsensitiveSynonyms) {\n  // Create an index\n  EXPECT_EQ(Run({\"FT.CREATE\", \"case_idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"title\",\n                 \"TEXT\"}),\n            \"OK\");\n\n  // Add documents with different case words\n  EXPECT_THAT(Run({\"HSET\", \"doc:1\", \"title\", \"The cat is sleeping\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:2\", \"title\", \"A feline hunter\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:3\", \"title\", \"The dog is barking\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:4\", \"title\", \"A Canine friend\"}), IntArg(1));\n\n  // Add synonym groups with text IDs\n  EXPECT_EQ(Run({\"FT.SYNUPDATE\", \"case_idx\", \"my_synonyms_group0\", \"cat\", \"feline\"}), \"OK\");\n  EXPECT_EQ(Run({\"FT.SYNUPDATE\", \"case_idx\", \"my_synonyms_group1\", \"dog\", \"canine\"}), \"OK\");\n\n  // Check synonym output\n  auto resp = Run({\"FT.SYNDUMP\", \"case_idx\"});\n  EXPECT_THAT(resp, ArrLen(8));  // 4 terms, each with a list of groups\n\n  // Synonym search is case-insensitive\n  // Search for \"cat\" should find \"cat\" and \"feline\"\n  resp = Run({\"FT.SEARCH\", \"case_idx\", \"cat\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\"));\n\n  // Search for \"feline\" should find \"feline\" and \"cat\"\n  resp = Run({\"FT.SEARCH\", \"case_idx\", \"feline\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:2\", \"doc:1\"));\n\n  // Search for \"dog\" should find \"dog\" and \"canine\"\n  resp = Run({\"FT.SEARCH\", \"case_idx\", \"dog\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:3\", \"doc:4\"));\n\n  // Search for \"canine\" should find \"canine\" and \"dog\"\n  resp = Run({\"FT.SEARCH\", \"case_idx\", \"canine\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:4\", \"doc:3\"));\n\n  // Search with different case\n  // Search for \"Cat\" (uppercase) should find \"cat\" and \"feline\"\n  resp = Run({\"FT.SEARCH\", \"case_idx\", \"Cat\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\"));\n\n  // Search for \"FELINE\" (uppercase) should find \"feline\" and \"cat\"\n  resp = Run({\"FT.SEARCH\", \"case_idx\", \"FELINE\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:2\", \"doc:1\"));\n\n  // Search for \"DoG\" (mixed case) should find \"dog\" and \"canine\"\n  resp = Run({\"FT.SEARCH\", \"case_idx\", \"DoG\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:3\", \"doc:4\"));\n\n  // Search for \"cAnInE\" (mixed case) should find \"canine\" and \"dog\"\n  resp = Run({\"FT.SEARCH\", \"case_idx\", \"cAnInE\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:4\", \"doc:3\"));\n}\n\nTEST_F(SearchFamilyTest, SynonymsWithSpaces) {\n  EXPECT_EQ(Run({\"FT.CREATE\", \"my_index\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"field\",\n                 \"TEXT\"}),\n            \"OK\");\n\n  EXPECT_EQ(Run({\"FT.SYNUPDATE\", \"my_index\", \"syn_group\", \"word1\", \"word2\"}), \"OK\");\n\n  EXPECT_THAT(Run({\"HSET\", \"doc:1\", \"field\", \" syn_group\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:2\", \"field\", \"syn_group\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:3\", \"field\", \"word1\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:4\", \"field\", \"word2\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:5\", \"field\", R\"(\\ syn_group)\"}), IntArg(1));\n\n  auto resp = Run({\"FT.SEARCH\", \"my_index\", \"word1\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:3\", \"doc:4\"));\n\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"word2\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:4\", \"doc:3\"));\n\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"syn_group\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:2\", \"doc:1\", \"doc:5\"));\n\n  // FT.SEARCH my_index \"\\ syn_group\"\n  // FT.SEARCH my_index \" syn_group\"\n  // The both transform to \" syn_group\" after syntax analysis\n  // \" syn_group\" passes to query_str in FtSearch\n  resp = Run({\"FT.SEARCH\", \"my_index\", \" syn_group\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\", \"doc:5\"));\n}\n\nTEST_F(SearchFamilyTest, SynonymsWithLeadingSpaces) {\n  EXPECT_EQ(Run({\"FT.CREATE\", \"my_index\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"title\",\n                 \"TEXT\"}),\n            \"OK\");\n\n  EXPECT_EQ(Run({\"FT.SYNUPDATE\", \"my_index\", \"group1\", \"word\", \"    several_spaces_synonym\"}),\n            \"OK\");\n\n  auto resp = Run({\"FT.SYNDUMP\", \"my_index\"});\n  EXPECT_THAT(resp, IsUnordArray(\"    several_spaces_synonym\", IsArray(\"group1\"), \"word\",\n                                 IsArray(\"group1\")));\n\n  EXPECT_THAT(Run({\"HSET\", \"doc:1\", \"title\", \"word\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:2\", \"title\", \"several_spaces_synonym\"}), IntArg(1));\n\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"word\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\"));\n\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"several_spaces_synonym\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:2\"));\n\n  EXPECT_THAT(Run({\"HSET\", \"doc:3\", \"title\", \"    several_spaces_synonym\"}), IntArg(1));\n\n  resp = Run({\"FT.SEARCH\", \"my_index\", \"word\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\"));\n}\n\n// Test to verify prefix search works correctly with synonyms\nTEST_F(SearchFamilyTest, PrefixSearchWithSynonyms) {\n  // Create search index\n  EXPECT_EQ(Run({\"FT.CREATE\", \"prefix_index\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\",\n                 \"title\", \"TEXT\"}),\n            \"OK\");\n\n  // Add documents with words that start with the same prefix\n  EXPECT_THAT(Run({\"HSET\", \"doc:1\", \"title\", \"apple\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:2\", \"title\", \"application\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:3\", \"title\", \"banana\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:4\", \"title\", \"appetizer\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:5\", \"title\", \"pineapple\"}), IntArg(1));\n  EXPECT_THAT(Run({\"HSET\", \"doc:6\", \"title\", \"macintosh\"}), IntArg(1));\n\n  // Check prefix search before adding synonyms\n  auto resp = Run({\"FT.SEARCH\", \"prefix_index\", \"app*\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\", \"doc:4\"));\n\n  // Add synonym: apple <-> macintosh\n  EXPECT_EQ(Run({\"FT.SYNUPDATE\", \"prefix_index\", \"1\", \"apple\", \"macintosh\"}), \"OK\");\n\n  // Verify prefix search still works after adding synonyms\n  resp = Run({\"FT.SEARCH\", \"prefix_index\", \"app*\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\", \"doc:4\"));\n\n  // Check exact term search for terms that are now synonyms\n  resp = Run({\"FT.SEARCH\", \"prefix_index\", \"apple\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:6\"));  // Should find both apple and macintosh\n\n  resp = Run({\"FT.SEARCH\", \"prefix_index\", \"macintosh\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:6\", \"doc:1\"));  // Should find both macintosh and apple\n\n  // Check that prefix search for mac* only finds macintosh, not apple\n  resp = Run({\"FT.SEARCH\", \"prefix_index\", \"mac*\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:6\"));  // Should only find macintosh\n}\n\nTEST_F(SearchFamilyTest, SearchSortByOptionNonSortableFieldJson) {\n  Run({\"JSON.SET\", \"json1\", \"$\", R\"({\"text\":\"2\"})\"});\n  Run({\"JSON.SET\", \"json2\", \"$\", R\"({\"text\":\"1\"})\"});\n\n  auto resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"JSON\", \"SCHEMA\", \"$.text\", \"AS\", \"text\", \"TEXT\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  auto expect_expr = [](std::string_view text_field) {\n    return IsArray(2, \"json2\", IsMap(text_field, \"1\", \"$\", R\"({\"text\":\"1\"})\"), \"json1\",\n                   IsMap(text_field, \"2\", \"$\", R\"({\"text\":\"2\"})\"));\n  };\n\n  resp = Run({\"FT.SEARCH\", \"index\", \"*\", \"SORTBY\", \"text\"});\n  EXPECT_THAT(resp, expect_expr(\"text\"sv));\n}\n\nTEST_F(SearchFamilyTest, SearchNonNullFields) {\n  // Basic schema with text, tag, and numeric fields\n  EXPECT_EQ(Run({\"ft.create\", \"i1\", \"schema\", \"title\", \"text\", \"tags\", \"tag\", \"score\", \"numeric\",\n                 \"sortable\"}),\n            \"OK\");\n\n  EXPECT_EQ(Run({\"ft.create\", \"i2\", \"on\", \"json\", \"schema\", \"$.title\", \"as\", \"title\", \"text\",\n                 \"$.meta.tags\", \"as\", \"tags\", \"tag\", \"$.meta.score\", \"as\", \"score\", \"numeric\"}),\n            \"OK\");\n\n  EXPECT_EQ(Run({\"ft.create\", \"text_idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"text:\", \"SCHEMA\", \"content\",\n                 \"TEXT\"}),\n            \"OK\");\n\n  EXPECT_EQ(Run({\"ft.create\", \"tag_idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"tag:\", \"SCHEMA\",\n                 \"categories\", \"TAG\", \"SEPARATOR\", \",\"}),\n            \"OK\");\n\n  EXPECT_EQ(Run({\"ft.create\", \"num_idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"num:\", \"SCHEMA\", \"price\",\n                 \"NUMERIC\", \"SORTABLE\"}),\n            \"OK\");\n\n  Run({\"hset\", \"d:1\", \"title\", \"Document with title and tags\", \"tags\", \"tag1,tag2\"});\n  Run({\"hset\", \"d:2\", \"title\", \"Document with title and score\", \"score\", \"75\"});\n  Run({\"hset\", \"d:3\", \"title\", \"Document with all fields\", \"tags\", \"tag2,tag3\", \"score\", \"100\"});\n  Run({\"hset\", \"d:4\", \"tags\", \"Document with only tags\", \"score\", \"50\"});\n\n  // Testing non-null field searches with @field:* syntax\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@title:*\"}), AreDocIds(\"d:1\", \"d:2\", \"d:3\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@tags:*\"}), AreDocIds(\"d:1\", \"d:3\", \"d:4\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@score:*\"}), AreDocIds(\"d:2\", \"d:3\", \"d:4\"));\n\n  // Testing combinations of non-null field searches\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@title:* @tags:*\"}), AreDocIds(\"d:1\", \"d:3\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@title:* @score:*\"}), AreDocIds(\"d:2\", \"d:3\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@tags:* @score:*\"}), AreDocIds(\"d:3\", \"d:4\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i1\", \"@title:* @tags:* @score:*\"}), AreDocIds(\"d:3\"));\n\n  // Testing non-null field searches with sorting\n  auto result = Run({\"ft.search\", \"i1\", \"@score:*\", \"SORTBY\", \"score\", \"DESC\"});\n  ASSERT_EQ(result.GetVec().size(), 7);\n  EXPECT_EQ(result.GetVec()[1].GetString(), \"d:3\");  // Highest score (100) first\n  EXPECT_EQ(result.GetVec()[3].GetString(), \"d:2\");  // Middle score (75)\n  EXPECT_EQ(result.GetVec()[5].GetString(), \"d:4\");  // Lowest score (50) last\n\n  // Testing non-null field searches with JSON\n  Run({\"json.set\", \"j:1\", \".\",\n       R\"({\"title\": \"JSON document\", \"meta\": {\"tags\": [\"tag1\", \"tag2\"]}})\"});\n  Run({\"json.set\", \"j:2\", \".\", R\"({\"meta\": {\"score\": 100}})\"});\n  Run({\"json.set\", \"j:3\", \".\",\n       R\"({\"title\": \"Full JSON\", \"meta\": {\"tags\": [\"tag3\"], \"score\": 80}})\"});\n\n  EXPECT_THAT(Run({\"ft.search\", \"i2\", \"@title:*\"}), AreDocIds(\"j:1\", \"j:3\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i2\", \"@tags:*\"}), AreDocIds(\"j:1\", \"j:3\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i2\", \"@score:*\"}), AreDocIds(\"j:2\", \"j:3\"));\n  EXPECT_THAT(Run({\"ft.search\", \"i2\", \"@title:* @tags:* @score:*\"}), AreDocIds(\"j:3\"));\n\n  // Testing text indices with star query\n  Run({\"hset\", \"text:1\", \"content\", \"apple banana\"});\n  Run({\"hset\", \"text:2\", \"content\", \"cherry date\"});\n  Run({\"hset\", \"text:3\", \"content\", \"elephant fig\"});\n\n  EXPECT_THAT(Run({\"ft.search\", \"text_idx\", \"*\"}), AreDocIds(\"text:1\", \"text:2\", \"text:3\"));\n\n  // Testing tag indices with star query\n  Run({\"hset\", \"tag:1\", \"categories\", \"fruit,food\"});\n  Run({\"hset\", \"tag:2\", \"categories\", \"drink,beverage\"});\n  Run({\"hset\", \"tag:3\", \"categories\", \"tech,gadget\"});\n\n  EXPECT_THAT(Run({\"ft.search\", \"tag_idx\", \"*\"}), AreDocIds(\"tag:1\", \"tag:2\", \"tag:3\"));\n\n  // Testing numeric indices with star query\n  Run({\"hset\", \"num:1\", \"price\", \"10.5\"});\n  Run({\"hset\", \"num:2\", \"price\", \"20.75\"});\n  Run({\"hset\", \"num:3\", \"price\", \"30.99\"});\n\n  EXPECT_THAT(Run({\"ft.search\", \"num_idx\", \"*\"}), AreDocIds(\"num:1\", \"num:2\", \"num:3\"));\n\n  // Testing vector indices with star query\n  string vector1 = R\"(\\x00\\x00\\x80\\x3f\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00)\";  // [1,0,0]\n  string vector2 = R\"(\\x00\\x00\\x00\\x00\\x00\\x00\\x80\\x3f\\x00\\x00\\x00\\x00)\";  // [0,1,0]\n  string vector3 = R\"(\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x80\\x3f)\";  // [0,0,1]\n\n  Run({\"hset\", \"vec:1\", \"embedding\", vector1});\n  Run({\"hset\", \"vec:2\", \"embedding\", vector2});\n  Run({\"hset\", \"vec:3\", \"embedding\", vector3});\n\n  // Testing star query with result limit\n  auto limit_result = Run({\"ft.search\", \"text_idx\", \"*\", \"LIMIT\", \"0\", \"2\"});\n\n  // No sorting, so results returned are in random order (implementation-dependent).\n  EXPECT_THAT(limit_result, RespElementsAre(IntArg(3), _, _, _, _));\n\n  // Testing star query with sorting\n  auto price_desc_result = Run({\"ft.search\", \"num_idx\", \"*\", \"SORTBY\", \"price\", \"DESC\"});\n  ASSERT_EQ(price_desc_result.GetVec().size(), 7);\n  EXPECT_EQ(price_desc_result.GetVec()[1].GetString(), \"num:3\");  // Most expensive item first\n  EXPECT_EQ(price_desc_result.GetVec()[3].GetString(), \"num:2\");\n  EXPECT_EQ(price_desc_result.GetVec()[5].GetString(), \"num:1\");  // Cheapest item last\n\n  auto price_asc_result = Run({\"ft.search\", \"num_idx\", \"*\", \"SORTBY\", \"price\", \"ASC\"});\n  ASSERT_EQ(price_asc_result.GetVec().size(), 7);\n  EXPECT_EQ(price_asc_result.GetVec()[1].GetString(), \"num:1\");  // Cheapest item first\n  EXPECT_EQ(price_asc_result.GetVec()[3].GetString(), \"num:2\");\n  EXPECT_EQ(price_asc_result.GetVec()[5].GetString(), \"num:3\");  // Most expensive item last\n}\n\nTEST_F(SearchFamilyTest, SortIndexBasicOperations) {\n  // Create an index with a numeric field and a text field, both SORTABLE\n  EXPECT_EQ(Run({\"ft.create\", \"sort_idx\", \"SCHEMA\", \"num_field\", \"NUMERIC\", \"SORTABLE\", \"str_field\",\n                 \"TEXT\", \"SORTABLE\"}),\n            \"OK\");\n\n  // Add documents with different field values - only with both fields for test simplification\n  Run({\"hset\", \"doc:1\", \"num_field\", \"10\", \"str_field\", \"apple\"});\n  Run({\"hset\", \"doc:2\", \"num_field\", \"20\", \"str_field\", \"banana\"});\n  Run({\"hset\", \"doc:3\", \"num_field\", \"5\", \"str_field\", \"cherry\"});\n  Run({\"hset\", \"doc:4\", \"num_field\", \"15\", \"str_field\", \"date\"});\n\n  // Test search with star (* - all documents)\n  EXPECT_THAT(Run({\"ft.search\", \"sort_idx\", \"*\"}), AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\", \"doc:4\"));\n\n  // Test search by field presence\n  EXPECT_THAT(Run({\"ft.search\", \"sort_idx\", \"@num_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\", \"doc:4\"));\n  EXPECT_THAT(Run({\"ft.search\", \"sort_idx\", \"@str_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\", \"doc:4\"));\n\n  // Test sorting by numeric field (ascending)\n  auto num_asc_result = Run({\"ft.search\", \"sort_idx\", \"*\", \"SORTBY\", \"num_field\", \"ASC\"});\n\n  // Check the overall order, not specific indices\n  ASSERT_GE(num_asc_result.GetVec().size(), 9);  // 4 documents * 2 + 1\n\n  // Collect document IDs in the order they appear in the result\n  std::vector<std::string> sorted_ids;\n  for (size_t i = 1; i < num_asc_result.GetVec().size(); i += 2) {\n    sorted_ids.push_back(num_asc_result.GetVec()[i].GetString());\n  }\n\n  // Verify that the numeric field sorting order is correct\n  ASSERT_EQ(sorted_ids.size(), 4);\n  EXPECT_EQ(sorted_ids[0], \"doc:3\");  // 5\n  EXPECT_EQ(sorted_ids[1], \"doc:1\");  // 10\n  EXPECT_EQ(sorted_ids[2], \"doc:4\");  // 15\n  EXPECT_EQ(sorted_ids[3], \"doc:2\");  // 20\n\n  // Sorting by text field (descending)\n  auto str_desc_result = Run({\"ft.search\", \"sort_idx\", \"*\", \"SORTBY\", \"str_field\", \"DESC\"});\n\n  // Check the overall order of text sorting\n  sorted_ids.clear();\n  for (size_t i = 1; i < str_desc_result.GetVec().size(); i += 2) {\n    sorted_ids.push_back(str_desc_result.GetVec()[i].GetString());\n  }\n\n  ASSERT_EQ(sorted_ids.size(), 4);\n  EXPECT_EQ(sorted_ids[0], \"doc:4\");  // date\n  EXPECT_EQ(sorted_ids[1], \"doc:3\");  // cherry\n  EXPECT_EQ(sorted_ids[2], \"doc:2\");  // banana\n  EXPECT_EQ(sorted_ids[3], \"doc:1\");  // apple\n\n  // Update a document\n  Run({\"hset\", \"doc:3\", \"num_field\", \"30\"});  // 5 -> 30\n\n  // Check the updated sorting\n  auto updated_result = Run({\"ft.search\", \"sort_idx\", \"*\", \"SORTBY\", \"num_field\", \"ASC\"});\n  sorted_ids.clear();\n  for (size_t i = 1; i < updated_result.GetVec().size(); i += 2) {\n    sorted_ids.push_back(updated_result.GetVec()[i].GetString());\n  }\n\n  ASSERT_EQ(sorted_ids.size(), 4);\n  EXPECT_EQ(sorted_ids[0], \"doc:1\");  // 10\n  EXPECT_EQ(sorted_ids[1], \"doc:4\");  // 15\n  EXPECT_EQ(sorted_ids[2], \"doc:2\");  // 20\n  EXPECT_EQ(sorted_ids[3], \"doc:3\");  // 30\n\n  // Test document deletion\n  Run({\"del\", \"doc:2\"});\n  auto after_delete_result = Run({\"ft.search\", \"sort_idx\", \"*\"});\n  EXPECT_THAT(after_delete_result, AreDocIds(\"doc:1\", \"doc:3\", \"doc:4\"));\n}\n\n// Separate test for documents with missing fields during sorting\nTEST_F(SearchFamilyTest, SortIndexWithNullFields) {\n  EXPECT_EQ(Run({\"ft.create\", \"null_sort_idx\", \"SCHEMA\", \"num_field\", \"NUMERIC\", \"SORTABLE\"}),\n            \"OK\");\n\n  // Documents with and without numeric field\n  Run({\"hset\", \"doc:1\", \"num_field\", \"10\"});\n  Run({\"hset\", \"doc:2\", \"num_field\", \"20\"});\n  Run({\"hset\", \"doc:3\", \"other_field\", \"value\"});  // no numeric field\n\n  // Verify that all documents are indexed\n  EXPECT_THAT(Run({\"ft.search\", \"null_sort_idx\", \"*\"}), AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\"));\n\n  // Verify that only documents with numeric field are found by @num_field:* query\n  EXPECT_THAT(Run({\"ft.search\", \"null_sort_idx\", \"@num_field:*\"}), AreDocIds(\"doc:1\", \"doc:2\"));\n\n  // When sorting, documents without the field should be at the end (but exact order may vary)\n  auto sort_result = Run({\"ft.search\", \"null_sort_idx\", \"*\", \"SORTBY\", \"num_field\", \"ASC\"});\n\n  // Collect results\n  std::vector<std::string> sorted_ids;\n  for (size_t i = 1; i < sort_result.GetVec().size(); i += 2) {\n    sorted_ids.push_back(sort_result.GetVec()[i].GetString());\n  }\n\n  // Verify that documents with numeric fields are in the correct order,\n  // and the document without a numeric field is either at the end or not included (depends on\n  // implementation)\n  ASSERT_GE(sorted_ids.size(), 2);\n\n  // Check only documents with known field values\n  auto doc1_pos = std::find(sorted_ids.begin(), sorted_ids.end(), \"doc:1\");\n  auto doc2_pos = std::find(sorted_ids.begin(), sorted_ids.end(), \"doc:2\");\n\n  ASSERT_NE(doc1_pos, sorted_ids.end());\n  ASSERT_NE(doc2_pos, sorted_ids.end());\n\n  // doc:1 (10) should be before doc:2 (20) in ascending sort\n  EXPECT_LT(std::distance(sorted_ids.begin(), doc1_pos),\n            std::distance(sorted_ids.begin(), doc2_pos));\n}\n\nTEST_F(SearchFamilyTest, VectorIndexOperations) {\n  // Create an index with a vector field\n  EXPECT_EQ(Run({\"ft.create\", \"vector_idx\", \"SCHEMA\", \"vec\", \"VECTOR\", \"FLAT\", \"6\", \"TYPE\",\n                 \"FLOAT32\", \"DIM\", \"3\", \"DISTANCE_METRIC\", \"L2\", \"name\", \"TEXT\"}),\n            \"OK\");\n\n  // Function to convert float vectors to binary representation\n  auto FloatsToBytes = [](const std::vector<float>& floats) -> std::string {\n    return std::string(reinterpret_cast<const char*>(floats.data()), floats.size() * sizeof(float));\n  };\n\n  // Prepare vector data in binary format\n  std::string vec1 = FloatsToBytes({1.0f, 0.0f, 0.0f});\n  std::string vec2 = FloatsToBytes({0.0f, 1.0f, 0.0f});\n  std::string vec3 = FloatsToBytes({0.0f, 0.0f, 1.0f});\n  std::string vec4 = FloatsToBytes({0.5f, 0.5f, 0.0f});\n  std::string vec5 = FloatsToBytes({0.3f, 0.3f, 0.3f});\n\n  // Add documents with vector data in binary format\n  Run({\"hset\", \"vec:1\", \"vec\", vec1, \"name\", \"vector1\"});\n  Run({\"hset\", \"vec:2\", \"vec\", vec2, \"name\", \"vector2\"});\n  Run({\"hset\", \"vec:3\", \"vec\", vec3, \"name\", \"vector3\"});\n  Run({\"hset\", \"vec:4\", \"vec\", vec4, \"name\", \"vector4\"});\n  Run({\"hset\", \"vec:5\", \"vec\", vec5, \"name\", \"vector5\"});\n\n  // Basic star search\n  auto star_search = Run({\"ft.search\", \"vector_idx\", \"*\"});\n  EXPECT_THAT(star_search, AreDocIds(\"vec:1\", \"vec:2\", \"vec:3\", \"vec:4\", \"vec:5\"));\n\n  // Search by vector field presence\n  auto vec_field_search = Run({\"ft.search\", \"vector_idx\", \"@vec:*\"});\n  EXPECT_THAT(vec_field_search, AreDocIds(\"vec:1\", \"vec:2\", \"vec:3\", \"vec:4\", \"vec:5\"));\n}\n\n// Test to verify that @field:* syntax works with sortable fields\nTEST_F(SearchFamilyTest, SortIndexGetAllResults) {\n  // Create an index with a numeric field that is SORTABLE but not indexed as a regular field\n  EXPECT_EQ(Run({\"ft.create\", \"sort_only_idx\", \"SCHEMA\", \"sort_field\", \"NUMERIC\", \"SORTABLE\"}),\n            \"OK\");\n\n  // Add documents with and without the sortable field\n  Run({\"hset\", \"doc:1\", \"sort_field\", \"10\", \"other_field\", \"value1\"});\n  Run({\"hset\", \"doc:2\", \"sort_field\", \"20\", \"other_field\", \"value2\"});\n  Run({\"hset\", \"doc:3\", \"sort_field\", \"30\", \"other_field\", \"value3\"});\n  Run({\"hset\", \"doc:4\", \"other_field\", \"value4\"});  // no sort_field\n  Run({\"hset\", \"doc:5\", \"other_field\", \"value5\"});  // no sort_field\n\n  // Test that all documents are indexed\n  EXPECT_THAT(Run({\"ft.search\", \"sort_only_idx\", \"*\"}),\n              AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\", \"doc:4\", \"doc:5\"));\n\n  // Test that @field:* search works for sortable field\n  // This should only return documents that have the sort_field\n  EXPECT_THAT(Run({\"ft.search\", \"sort_only_idx\", \"@sort_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\"));\n\n  // Test sorting with @field:* query\n  auto sort_result =\n      Run({\"ft.search\", \"sort_only_idx\", \"@sort_field:*\", \"SORTBY\", \"sort_field\", \"DESC\"});\n\n  // Collect document IDs in order\n  std::vector<std::string> sorted_ids;\n  for (size_t i = 1; i < sort_result.GetVec().size(); i += 2) {\n    sorted_ids.push_back(sort_result.GetVec()[i].GetString());\n  }\n\n  // Verify correct order\n  ASSERT_EQ(sorted_ids.size(), 3);\n  EXPECT_EQ(sorted_ids[0], \"doc:3\");  // 30\n  EXPECT_EQ(sorted_ids[1], \"doc:2\");  // 20\n  EXPECT_EQ(sorted_ids[2], \"doc:1\");  // 10\n}\n\nTEST_F(SearchFamilyTest, JsonWithNullFields) {\n  // Create indices for text, tag, and numeric fields (non-sortable)\n  EXPECT_EQ(Run({\"FT.CREATE\", \"idx:regular\", \"ON\", \"JSON\", \"SCHEMA\", \"$.text_field\", \"AS\",\n                 \"text_field\", \"TEXT\", \"$.tag_field\", \"AS\", \"tag_field\", \"TAG\", \"$.num_field\", \"AS\",\n                 \"num_field\", \"NUMERIC\"}),\n            \"OK\");\n\n  // Create indices for text, tag, and numeric fields (sortable)\n  EXPECT_EQ(Run({\"FT.CREATE\",    \"idx:sortable\", \"ON\",         \"JSON\",    \"SCHEMA\",\n                 \"$.text_field\", \"AS\",           \"text_field\", \"TEXT\",    \"SORTABLE\",\n                 \"$.tag_field\",  \"AS\",           \"tag_field\",  \"TAG\",     \"SORTABLE\",\n                 \"$.num_field\",  \"AS\",           \"num_field\",  \"NUMERIC\", \"SORTABLE\"}),\n            \"OK\");\n\n  // Create JSON documents with null values in different field types\n  Run({\"JSON.SET\", \"doc:1\", \".\",\n       R\"({\"text_field\": \"sample text\", \"tag_field\": \"tag1,tag2\", \"num_field\": 100})\"});\n  Run({\"JSON.SET\", \"doc:2\", \".\", R\"({\"text_field\": null, \"tag_field\": \"tag3\", \"num_field\": 200})\"});\n  Run({\"JSON.SET\", \"doc:3\", \".\",\n       R\"({\"text_field\": \"another text\", \"tag_field\": null, \"num_field\": 300})\"});\n  Run({\"JSON.SET\", \"doc:4\", \".\",\n       R\"({\"text_field\": \"more text\", \"tag_field\": \"tag4,tag5\", \"num_field\": null})\"});\n  Run({\"JSON.SET\", \"doc:5\", \".\", R\"({\"text_field\": null, \"tag_field\": null, \"num_field\": null})\"});\n  Run({\"JSON.SET\", \"doc:6\", \".\", R\"({\"other_field\": \"not indexed field\"})\"});\n\n  // Test @field:* searches on non-sortable index\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx:regular\", \"@text_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:3\", \"doc:4\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx:regular\", \"@tag_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:2\", \"doc:4\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx:regular\", \"@num_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\"));\n\n  // Test @field:* searches on sortable index\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx:sortable\", \"@text_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:3\", \"doc:4\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx:sortable\", \"@tag_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:2\", \"doc:4\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx:sortable\", \"@num_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\"));\n\n  // Test search for documents with non-null values for all fields\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx:regular\", \"@text_field:* @tag_field:* @num_field:*\"}),\n              AreDocIds(\"doc:1\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx:sortable\", \"@text_field:* @tag_field:* @num_field:*\"}),\n              AreDocIds(\"doc:1\"));\n\n  // Test combined queries\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx:regular\", \"@text_field:* @tag_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:4\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx:regular\", \"@text_field:* @num_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:3\"));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx:regular\", \"@tag_field:* @num_field:*\"}),\n              AreDocIds(\"doc:1\", \"doc:2\"));\n}\n\nTEST_F(SearchFamilyTest, TestHsetDeleteDocumentHnswSchemaCrash) {\n  EXPECT_EQ(Run({\"FT.CREATE\", \"idx\", \"SCHEMA\", \"n\", \"NUMERIC\", \"v\", \"VECTOR\", \"HNSW\", \"8\", \"TYPE\",\n                 \"FLOAT16\", \"DIM\", \"4\", \"DISTANCE_METRIC\", \"L2\", \"M\", \"65536\"}),\n            \"OK\");\n\n  auto res = Run({\"HSET\", \"doc\", \"n\", \"0\"});\n  EXPECT_EQ(res, 1);\n\n  res = Run({\"DEL\", \"doc\"});\n  EXPECT_EQ(res, 1);\n}\n\nTEST_F(SearchFamilyTest, RenameDocumentBetweenIndices) {\n  absl::FlagSaver fs;\n\n  SetTestFlag(\"cluster_mode\", \"emulated\");\n  ResetService();\n\n  EXPECT_EQ(Run({\"ft.create\", \"idx1\", \"prefix\", \"1\", \"idx1\", \"filter\", \"@index==\\\"yes\\\"\", \"schema\",\n                 \"t\", \"text\"}),\n            \"OK\");\n  EXPECT_EQ(Run({\"ft.create\", \"idx2\", \"prefix\", \"1\", \"idx2\", \"filter\", \"@index==\\\"yes\\\"\", \"schema\",\n                 \"t\", \"text\"}),\n            \"OK\");\n\n  Run({\"hset\", \"idx1:{doc}1\", \"t\", \"foo1\", \"index\", \"yes\"});\n\n  EXPECT_EQ(Run({\"rename\", \"idx1:{doc}1\", \"idx2:{doc}1\"}), \"OK\");\n  EXPECT_EQ(Run({\"rename\", \"idx2:{doc}1\", \"idx1:{doc}1\"}), \"OK\");\n}\n\nTEST_F(SearchFamilyTest, JsonSetIndexesBug) {\n  auto resp = Run(\n      {\"FT.CREATE\", \"index\", \"ON\", \"json\", \"SCHEMA\", \"$.text\", \"AS\", \"text\", \"TEXT\", \"SORTABLE\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"j1\", \"$\", R\"({\"text\":\"some text\"})\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"j1\", \"$\", R\"({\"asd}\"})\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR failed to parse JSON\"));\n\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"GROUPBY\", \"1\", \"@text\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"text\", \"some text\")));\n}\n\nTEST_F(SearchFamilyTest, SearchReindexWriteSearchRace) {\n  const std::string kIndexName = \"myRaceIdx\";\n  const int kWriterOps = 200;\n  const int kSearcherOps = 200;\n  const int kReindexerOps = 200;\n\n  auto writer_fiber = pp_->at(0)->LaunchFiber([&] {\n    for (int i = 1; i <= kWriterOps; ++i) {\n      std::string doc_key = absl::StrCat(\"doc:\", i);\n      std::string content = absl::StrCat(\"text data item \", i, \" for race condition test\");\n      std::string tags_val = absl::StrCat(\"tagA,tagB,\", (i % 10));\n      std::string numeric_field_val = std::to_string(i);\n      Run({\"hset\", doc_key, \"content\", content, \"tags\", tags_val, \"numeric_field\",\n           numeric_field_val});\n    }\n  });\n\n  auto searcher_fiber = pp_->at(1)->LaunchFiber([&] {\n    for (int i = 1; i <= kSearcherOps; ++i) {\n      int random_val_content = 1 + (i % kWriterOps);\n      std::string query_content = absl::StrCat(\"@content:item\", random_val_content);\n      Run({\"ft.search\", kIndexName, query_content});\n    }\n  });\n\n  auto reindexer_fiber = pp_->at(2)->LaunchFiber([&] {\n    for (int i = 1; i <= kReindexerOps; ++i) {\n      Run({\"ft.create\", kIndexName, \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"content\",\n           \"TEXT\", \"SORTABLE\", \"tags\", \"TAG\", \"SORTABLE\", \"numeric_field\", \"NUMERIC\", \"SORTABLE\"});\n      Run({\"ft.dropindex\", kIndexName});\n    }\n  });\n\n  // Join fibers\n  writer_fiber.Join();\n  searcher_fiber.Join();\n  reindexer_fiber.Join();\n\n  ASSERT_FALSE(service_->IsShardSetLocked());\n}\n\nTEST_F(SearchFamilyTest, IgnoredOptionsInFtCreate) {\n  GTEST_SKIP() << \"The usage of ignored options is now wrong - it skips supported ones!\";\n\n  // Create an index with various options, some of which should be ignored\n  // INDEXMISSING and INDEXEMPTY are supported by default\n  auto resp = Run({\"FT.CREATE\",\n                   \"idx\",\n                   \"ON\",\n                   \"HASH\",\n                   \"SCHEMA\",\n                   \"title\",\n                   \"TEXT\",\n                   \"UNF\",\n                   \"NOSTEM\",\n                   \"CASESENSITIVE\",\n                   \"WITHSUFFIXTRIE\",\n                   \"INDEXMISSING\",\n                   \"INDEXEMPTY\",\n                   \"WEIGHT\",\n                   \"1\",\n                   \"SEPARATOR\",\n                   \"|\",\n                   \"PHONETIC\",\n                   \"dm:en\",\n                   \"SORTABLE\"});\n\n  // Check that the response is OK, indicating the index was created successfully\n  EXPECT_THAT(resp, \"OK\");\n\n  Run({\"HSET\", \"doc:1\", \"title\", \"Test Document\"});\n\n  // Verify that the index was created correctly\n  resp = Run({\"FT.SEARCH\", \"idx\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\"));\n}\n\nTEST_F(SearchFamilyTest, JsonDelIndexesBug) {\n  auto resp = Run(\n      {\"FT.CREATE\", \"index\", \"ON\", \"json\", \"SCHEMA\", \"$.text\", \"AS\", \"text\", \"TEXT\", \"SORTABLE\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.SET\", \"j1\", \"$\", R\"({\"text\":\"some text\"})\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"JSON.DEL\", \"j1\", \"$.text\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"FT.AGGREGATE\", \"index\", \"*\", \"GROUPBY\", \"1\", \"@text\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"text\", ArgType(RespExpr::NIL))));\n}\n\nTEST_F(SearchFamilyTest, SearchStatsInfoRace) {\n  auto index_ops_fiber = pp_->at(0)->LaunchFiber([&] {\n    for (int i = 1; i <= 5; ++i) {\n      std::string idx_name = absl::StrCat(\"idx\", i);\n      std::string prefix = absl::StrCat(\"prefix\", i, \":\");\n      Run({\"FT.CREATE\", idx_name, \"ON\", \"HASH\", \"PREFIX\", \"1\", prefix});\n      Run({\"FT.DROPINDEX\", idx_name});\n    }\n  });\n\n  auto info_ops_fiber = pp_->at(1)->LaunchFiber([&] {\n    for (int i = 1; i <= 10; ++i) {\n      Run({\"INFO\"});\n    }\n  });\n\n  index_ops_fiber.Join();\n  info_ops_fiber.Join();\n\n  ASSERT_FALSE(service_->IsShardSetLocked());\n}\n\nTEST_F(SearchFamilyTest, EmptyKeyBug) {\n  auto resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"SCHEMA\", \"field\", \"TEXT\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"HSET\", \"\", \"field\", \"value\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"FT.SEARCH\", \"index\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"\"));\n}\n\nTEST_F(SearchFamilyTest, SetDoesNotUpdateIndexesBug) {\n  auto resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"SCHEMA\", \"field\", \"TEXT\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"HSET\", \"k1\", \"field\", \"value\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // Here we are changing the type of k1 from HASH to STRING.\n  // This should affect the index, the hset value should not be indexed anymore.\n  resp = Run({\"SET\", \"k1\", \"anothervalue\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"RENAME\", \"k1\", \"anotherkey\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  /* Here we should see that the value is indexed again.\n     We have checks in indexes that prove that the key was not present in the index.\n     The bug was, that this check was failing for this operation because it was not removed from the\n     index during the SET operation */\n  resp = Run({\"HSET\", \"k1\", \"field\", \"value\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"FT.SEARCH\", \"index\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"k1\"));\n}\n\nTEST_F(SearchFamilyTest, SortStoreDoesNotUpdateIndexesBug) {\n  // Create an index over HASH\n  auto resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"SCHEMA\", \"field\", \"TEXT\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  // Index a HASH document under k1\n  resp = Run({\"HSET\", \"k1\", \"field\", \"value\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // Prepare a source list to sort and store into k1 (overwriting k1 to LIST)\n  EXPECT_THAT(Run({\"RPUSH\", \"lst\", \"b\", \"a\"}), IntArg(2));\n  // SORT lst STORE k1 -> changes type of k1 from HASH to LIST\n  Run({\"SORT\", \"lst\", \"ALPHA\", \"STORE\", \"k1\"});\n\n  // Rename away and recreate k1 as HASH again\n  EXPECT_EQ(Run({\"RENAME\", \"k1\", \"anotherkey\"}), \"OK\");\n  EXPECT_THAT(Run({\"HSET\", \"k1\", \"field\", \"value\"}), IntArg(1));\n\n  // If SORT/STORE failed to remove k1 from indexes, the re-index here should crash.\n  // Successful run should contain only the new k1 document in the index.\n  resp = Run({\"FT.SEARCH\", \"index\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"k1\"));\n}\n\nTEST_F(SearchFamilyTest, BlockSizeOptionFtCreate) {\n  // Create an index with a block size option\n  auto resp = Run({\"FT.CREATE\", \"index\", \"ON\", \"HASH\", \"SCHEMA\", \"number1\", \"NUMERIC\", \"BLOCKSIZE\",\n                   \"2\", \"number2\", \"NUMERIC\", \"BLOCKSIZE\", \"1024\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  // Verify that the index was created successfully\n  resp = Run({\"FT.INFO\", \"index\"});\n  EXPECT_THAT(resp, IsArray(_, _, _, _, _, _, \"attributes\",\n                            IsUnordArray(IsArray(\"identifier\", \"number1\", \"attribute\", \"number1\",\n                                                 \"type\", \"NUMERIC\", \"blocksize\", \"2\"),\n                                         IsArray(\"identifier\", \"number2\", \"attribute\", \"number2\",\n                                                 \"type\", \"NUMERIC\", \"blocksize\", \"1024\")),\n                            \"num_docs\", IntArg(0), _, _, _, _));\n\n  // Add a document to the index\n  for (int i = 1; i <= 5; ++i) {\n    Run({\"HSET\", absl::StrCat(\"doc:\", i), \"number1\", std::to_string(i), \"number2\",\n         std::to_string(i * 10)});\n  }\n\n  // Search the index\n  resp = Run({\"FT.SEARCH\", \"index\", \"@number1:[1 3] @number2:[10 30]\", \"SORTBY\", \"number1\", \"ASC\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\"));\n}\n\nTEST_F(SearchFamilyTest, AggregateWithLoadFromJoinSimple) {\n  Run({\"ft.create\", \"idx1\", \"ON\", \"HASH\", \"SCHEMA\", \"num1\", \"NUMERIC\", \"num2\", \"NUMERIC\"});\n  Run({\"ft.create\", \"idx2\", \"ON\", \"HASH\", \"SCHEMA\", \"num3\", \"NUMERIC\", \"num4\", \"NUMERIC\"});\n\n  Run({\"hset\", \"k1\", \"num1\", \"0\", \"num2\", \"1\"});\n  Run({\"hset\", \"k2\", \"num1\", \"1\", \"num2\", \"2\"});\n\n  Run({\"hset\", \"k3\", \"num3\", \"0\", \"num4\", \"3\"});\n  Run({\"hset\", \"k4\", \"num3\", \"1\", \"num4\", \"4\"});\n\n  auto resp = Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"4\", \"idx1.num1\", \"idx1.num2\", \"idx2.num3\",\n                   \"idx2.num4\", \"LOAD_FROM\", \"idx2\", \"1\", \"idx2.num3=idx1.num1\"});\n\n  EXPECT_THAT(resp,\n              IsUnordArrayWithSize(\n                  IsMap(\"idx1.num1\", \"1\", \"idx1.num2\", \"2\", \"idx2.num3\", \"1\", \"idx2.num4\", \"4\"),\n                  IsMap(\"idx1.num1\", \"0\", \"idx1.num2\", \"1\", \"idx2.num3\", \"0\", \"idx2.num4\", \"3\")));\n}\n\nTEST_F(SearchFamilyTest, AggregateWithLoadFromJoinMultipleJoins) {\n  Run({\"ft.create\", \"idx1\", \"ON\", \"HASH\", \"SCHEMA\", \"num1\", \"NUMERIC\", \"str1\", \"TEXT\"});\n  Run({\"ft.create\", \"idx2\", \"ON\", \"HASH\", \"SCHEMA\", \"num2\", \"NUMERIC\", \"str2\", \"TAG\"});\n  Run({\"ft.create\", \"idx3\", \"ON\", \"HASH\", \"SCHEMA\", \"num3\", \"NUMERIC\", \"str3\", \"TAG\"});\n  Run({\"ft.create\", \"idx4\", \"ON\", \"HASH\", \"SCHEMA\", \"num4\", \"NUMERIC\", \"str4\", \"TEXT\"});\n\n  Run({\"hset\", \"k1\", \"num1\", \"0\", \"str1\", \"value1\"});\n  Run({\"hset\", \"k2\", \"num1\", \"1\", \"str1\", \"value2\"});\n\n  Run({\"hset\", \"k3\", \"num2\", \"0\", \"str2\", \"value3\"});\n  Run({\"hset\", \"k4\", \"num2\", \"1\", \"str2\", \"value4\"});\n\n  Run({\"hset\", \"k5\", \"num3\", \"2\", \"str3\", \"value1\"});\n  Run({\"hset\", \"k6\", \"num3\", \"3\", \"str3\", \"value2\"});\n\n  Run({\"hset\", \"k7\", \"num4\", \"2\", \"str4\", \"value3\"});\n  Run({\"hset\", \"k8\", \"num4\", \"3\", \"str4\", \"value4\"});\n\n  auto resp = Run({\"ft.aggregate\",\n                   \"idx1\",\n                   \"*\",\n                   \"LOAD\",\n                   \"8\",\n                   \"idx1.num1\",\n                   \"idx1.str1\",\n                   \"idx2.num2\",\n                   \"idx2.str2\",\n                   \"idx3.num3\",\n                   \"idx3.str3\",\n                   \"idx4.num4\",\n                   \"idx4.str4\",\n                   \"LOAD_FROM\",\n                   \"idx2\",\n                   \"1\",\n                   \"idx2.num2=idx1.num1\",\n                   \"LOAD_FROM\",\n                   \"idx3\",\n                   \"1\",\n                   \"idx3.str3=idx1.str1\",\n                   \"LOAD_FROM\",\n                   \"idx4\",\n                   \"1\",\n                   \"idx4.str4=idx2.str2\"});\n\n  EXPECT_THAT(\n      resp,\n      IsUnordArrayWithSize(\n          IsMap(\"idx1.num1\", \"1\", \"idx1.str1\", \"value2\", \"idx2.num2\", \"1\", \"idx2.str2\", \"value4\",\n                \"idx3.num3\", \"3\", \"idx3.str3\", \"value2\", \"idx4.num4\", \"3\", \"idx4.str4\", \"value4\"),\n          IsMap(\"idx1.num1\", \"0\", \"idx1.str1\", \"value1\", \"idx2.num2\", \"0\", \"idx2.str2\", \"value3\",\n                \"idx3.num3\", \"2\", \"idx3.str3\", \"value1\", \"idx4.num4\", \"2\", \"idx4.str4\", \"value3\")));\n\n  // Simple requests\n  resp = Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"4\", \"idx1.num1\", \"idx1.str1\", \"idx2.num2\",\n              \"idx2.str2\", \"LOAD_FROM\", \"idx2\", \"1\", \"idx2.num2=idx1.num1\"});\n  EXPECT_THAT(\n      resp,\n      IsUnordArrayWithSize(\n          IsMap(\"idx1.num1\", \"1\", \"idx1.str1\", \"value2\", \"idx2.num2\", \"1\", \"idx2.str2\", \"value4\"),\n          IsMap(\"idx1.num1\", \"0\", \"idx1.str1\", \"value1\", \"idx2.num2\", \"0\", \"idx2.str2\", \"value3\")));\n\n  resp = Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"4\", \"idx1.num1\", \"idx1.str1\", \"idx3.num3\",\n              \"idx3.str3\", \"LOAD_FROM\", \"idx3\", \"1\", \"idx3.str3=idx1.str1\"});\n  EXPECT_THAT(\n      resp,\n      IsUnordArrayWithSize(\n          IsMap(\"idx1.num1\", \"1\", \"idx1.str1\", \"value2\", \"idx3.num3\", \"3\", \"idx3.str3\", \"value2\"),\n          IsMap(\"idx1.num1\", \"0\", \"idx1.str1\", \"value1\", \"idx3.num3\", \"2\", \"idx3.str3\", \"value1\")));\n\n  resp = Run({\"ft.aggregate\", \"idx2\", \"*\", \"LOAD\", \"4\", \"idx2.num2\", \"idx2.str2\", \"idx4.num4\",\n              \"idx4.str4\", \"LOAD_FROM\", \"idx4\", \"1\", \"idx4.str4=idx2.str2\"});\n  EXPECT_THAT(\n      resp,\n      IsUnordArrayWithSize(\n          IsMap(\"idx2.num2\", \"1\", \"idx2.str2\", \"value4\", \"idx4.num4\", \"3\", \"idx4.str4\", \"value4\"),\n          IsMap(\"idx2.num2\", \"0\", \"idx2.str2\", \"value3\", \"idx4.num4\", \"2\", \"idx4.str4\", \"value3\")));\n\n  resp = Run({\"ft.aggregate\", \"idx3\", \"*\", \"LOAD\", \"4\", \"idx3.num3\", \"idx3.str3\", \"idx4.num4\",\n              \"idx4.str4\", \"LOAD_FROM\", \"idx4\", \"1\", \"idx3.num3=idx4.num4\"});\n  EXPECT_THAT(\n      resp,\n      IsUnordArrayWithSize(\n          IsMap(\"idx3.num3\", \"3\", \"idx3.str3\", \"value2\", \"idx4.num4\", \"3\", \"idx4.str4\", \"value4\"),\n          IsMap(\"idx3.num3\", \"2\", \"idx3.str3\", \"value1\", \"idx4.num4\", \"2\", \"idx4.str4\", \"value3\")));\n}\n\nTEST_F(SearchFamilyTest, AggregateWithLoadFromMultipleFields) {\n  Run({\"ft.create\", \"idx1\", \"ON\", \"HASH\", \"SCHEMA\", \"num1\", \"NUMERIC\", \"str1\", \"TEXT\", \"num2\",\n       \"NUMERIC\"});\n  Run({\"ft.create\", \"idx2\", \"ON\", \"HASH\", \"SCHEMA\", \"num2\", \"NUMERIC\", \"str2\", \"TAG\", \"num3\",\n       \"NUMERIC\"});\n  Run({\"ft.create\", \"idx3\", \"ON\", \"HASH\", \"SCHEMA\", \"num3\", \"NUMERIC\", \"str3\", \"TEXT\", \"num4\",\n       \"NUMERIC\"});\n\n  Run({\"hset\", \"k1\", \"num1\", \"0\", \"str1\", \"value1\", \"num2\", \"5\"});\n  Run({\"hset\", \"k2\", \"num1\", \"1\", \"str1\", \"value2\", \"num2\", \"10\"});\n\n  Run({\"hset\", \"k3\", \"num2\", \"1\", \"str2\", \"value3\", \"num3\", \"10\"});\n  Run({\"hset\", \"k4\", \"num2\", \"0\", \"str2\", \"value4\", \"num3\", \"5\"});\n\n  Run({\"hset\", \"k5\", \"num3\", \"2\", \"str3\", \"value4\", \"num4\", \"5\"});\n  Run({\"hset\", \"k6\", \"num3\", \"3\", \"str3\", \"value3\", \"num4\", \"10\"});\n\n  auto resp = Run({\"ft.aggregate\",\n                   \"idx1\",\n                   \"*\",\n                   \"LOAD\",\n                   \"9\",\n                   \"idx1.num1\",\n                   \"idx1.str1\",\n                   \"idx1.num2\",\n                   \"idx2.num2\",\n                   \"idx2.str2\",\n                   \"idx2.num3\",\n                   \"idx3.num3\",\n                   \"idx3.str3\",\n                   \"idx3.num4\",\n                   \"LOAD_FROM\",\n                   \"idx2\",\n                   \"2\",\n                   \"idx1.num1=idx2.num2\",\n                   \"idx1.num2=idx2.num3\",\n                   \"LOAD_FROM\",\n                   \"idx3\",\n                   \"3\",\n                   \"idx1.num2=idx3.num4\",\n                   \"idx2.num3=idx3.num4\",\n                   \"idx2.str2=idx3.str3\"});\n\n  EXPECT_THAT(\n      resp, IsUnordArrayWithSize(IsMap(\"idx1.num1\", \"1\", \"idx1.str1\", \"value2\", \"idx1.num2\", \"10\",\n                                       \"idx2.num2\", \"1\", \"idx2.str2\", \"value3\", \"idx2.num3\", \"10\",\n                                       \"idx3.num3\", \"3\", \"idx3.str3\", \"value3\", \"idx3.num4\", \"10\"),\n                                 IsMap(\"idx1.num1\", \"0\", \"idx1.str1\", \"value1\", \"idx1.num2\", \"5\",\n                                       \"idx2.num2\", \"0\", \"idx2.str2\", \"value4\", \"idx2.num3\", \"5\",\n                                       \"idx3.num3\", \"2\", \"idx3.str3\", \"value4\", \"idx3.num4\", \"5\")));\n}\n\nTEST_F(SearchFamilyTest, AggregateWithLoadFromSeveralCopiesOfSameKey) {\n  Run({\"ft.create\", \"idx1\", \"ON\", \"HASH\", \"SCHEMA\", \"num1\", \"NUMERIC\", \"str1\", \"TEXT\", \"num2\",\n       \"NUMERIC\"});\n  Run({\"ft.create\", \"idx2\", \"ON\", \"HASH\", \"SCHEMA\", \"num2\", \"NUMERIC\", \"str2\", \"TAG\", \"num3\",\n       \"NUMERIC\"});\n  Run({\"ft.create\", \"idx3\", \"ON\", \"HASH\", \"SCHEMA\", \"num3\", \"NUMERIC\", \"str3\", \"TEXT\", \"num4\",\n       \"NUMERIC\"});\n\n  Run({\"hset\", \"k1\", \"num1\", \"0\", \"str1\", \"value1\", \"num2\", \"5\"});\n  Run({\"hset\", \"k2\", \"num1\", \"1\", \"str1\", \"value2\", \"num2\", \"10\"});\n\n  Run({\"hset\", \"k3\", \"num2\", \"1\", \"str2\", \"value3\", \"num3\", \"10\"});\n  Run({\"hset\", \"k4\", \"num2\", \"0\", \"str2\", \"value4\", \"num3\", \"5\"});\n\n  Run({\"hset\", \"k5\", \"num3\", \"2\", \"str3\", \"value1\", \"num4\", \"15\"});\n  Run({\"hset\", \"k6\", \"num3\", \"3\", \"str3\", \"value1\", \"num4\", \"20\"});\n  Run({\"hset\", \"k7\", \"num3\", \"4\", \"str3\", \"value2\", \"num4\", \"25\"});\n  Run({\"hset\", \"k8\", \"num3\", \"5\", \"str3\", \"value2\", \"num4\", \"30\"});\n\n  auto resp = Run({\"ft.aggregate\",\n                   \"idx1\",\n                   \"*\",\n                   \"LOAD\",\n                   \"9\",\n                   \"idx1.num1\",\n                   \"idx1.str1\",\n                   \"idx1.num2\",\n                   \"idx2.num2\",\n                   \"idx2.str2\",\n                   \"idx2.num3\",\n                   \"idx3.num3\",\n                   \"idx3.str3\",\n                   \"idx3.num4\",\n                   \"LOAD_FROM\",\n                   \"idx2\",\n                   \"2\",\n                   \"idx1.num1=idx2.num2\",\n                   \"idx1.num2=idx2.num3\",\n                   \"LOAD_FROM\",\n                   \"idx3\",\n                   \"1\",  // Multiple copies of the same key\n                   \"idx1.str1=idx3.str3\"});\n\n  EXPECT_THAT(resp, IsUnordArrayWithSize(\n                        IsMap(\"idx1.num1\", \"0\", \"idx1.str1\", \"value1\", \"idx1.num2\", \"5\",\n                              \"idx2.num2\", \"0\", \"idx2.str2\", \"value4\", \"idx2.num3\", \"5\",\n                              \"idx3.num3\", \"2\", \"idx3.str3\", \"value1\", \"idx3.num4\", \"15\"),\n                        IsMap(\"idx1.num1\", \"0\", \"idx1.str1\", \"value1\", \"idx1.num2\", \"5\",\n                              \"idx2.num2\", \"0\", \"idx2.str2\", \"value4\", \"idx2.num3\", \"5\",\n                              \"idx3.num3\", \"3\", \"idx3.str3\", \"value1\", \"idx3.num4\", \"20\"),\n                        IsMap(\"idx1.num1\", \"1\", \"idx1.str1\", \"value2\", \"idx1.num2\", \"10\",\n                              \"idx2.num2\", \"1\", \"idx2.str2\", \"value3\", \"idx2.num3\", \"10\",\n                              \"idx3.num3\", \"4\", \"idx3.str3\", \"value2\", \"idx3.num4\", \"25\"),\n                        IsMap(\"idx1.num1\", \"1\", \"idx1.str1\", \"value2\", \"idx1.num2\", \"10\",\n                              \"idx2.num2\", \"1\", \"idx2.str2\", \"value3\", \"idx2.num3\", \"10\",\n                              \"idx3.num3\", \"5\", \"idx3.str3\", \"value2\", \"idx3.num4\", \"30\")));\n}\n\nTEST_F(SearchFamilyTest, AggregateWithLoadFromNoMatches) {\n  Run({\"ft.create\", \"idx1\", \"ON\", \"HASH\", \"SCHEMA\", \"num1\", \"NUMERIC\", \"str1\", \"TEXT\"});\n  Run({\"ft.create\", \"idx2\", \"ON\", \"HASH\", \"SCHEMA\", \"num2\", \"NUMERIC\", \"str2\", \"TEXT\"});\n\n  Run({\"hset\", \"k1\", \"num1\", \"0\", \"str1\", \"value1\"});\n  Run({\"hset\", \"k2\", \"num1\", \"1\", \"str1\", \"value2\"});\n\n  Run({\"hset\", \"k3\", \"num2\", \"0\", \"str2\", \"value3\"});\n  Run({\"hset\", \"k4\", \"num2\", \"1\", \"str2\", \"value4\"});\n\n  auto resp =\n      Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"4\", \"idx1.num1\", \"idx1.str1\", \"idx2.num2\",\n           \"idx2.str2\", \"LOAD_FROM\", \"idx2\", \"2\", \"idx2.num2=idx1.num1\", \"idx2.str2=idx1.str1\"});\n\n  EXPECT_THAT(resp, IntArg(0));  // No matches, so result should be empty\n}\n\nTEST_F(SearchFamilyTest, AggregateWithLoadFromQueries) {\n  Run({\"ft.create\", \"idx1\", \"ON\", \"HASH\", \"SCHEMA\", \"num1\", \"NUMERIC\", \"str1\", \"TAG\"});\n  Run({\"ft.create\", \"idx2\", \"ON\", \"HASH\", \"SCHEMA\", \"num2\", \"NUMERIC\", \"str2\", \"TEXT\"});\n\n  // Another case\n  Run({\"ft.create\", \"idx3\", \"ON\", \"HASH\", \"SCHEMA\", \"num3\", \"NUMERIC\", \"str3\", \"TAG\"});\n  Run({\"ft.create\", \"idx4\", \"ON\", \"HASH\", \"SCHEMA\", \"num4\", \"NUMERIC\", \"str4\", \"TAG\"});\n\n  std::vector<::testing::Matcher<RespExpr>> matchers;\n  for (int i = 0; i < 100; ++i) {\n    // For even i str1 and str2 should match, for odd i they should not\n    std::string str1 = absl::StrCat(\"tag\", i);\n    std::string str2 = i % 2 == 0 ? str1 : absl::StrCat(\"text\", i);\n    Run({\"hset\", absl::StrCat(\"k1:\", i), \"num1\", std::to_string(i), \"str1\", str1});\n    Run({\"hset\", absl::StrCat(\"k2:\", i), \"num2\", std::to_string(i), \"str2\", str2});\n\n    if (i % 2 == 0 && i >= 35 && i <= 57) {\n      matchers.emplace_back(IsMap(\"idx1.num1\", std::to_string(i), \"idx1.str1\", str1, \"idx2.num2\",\n                                  std::to_string(i), \"idx2.str2\", str2));\n    }\n  }\n  matchers.insert(matchers.begin(), IntArg(matchers.size()));\n\n  auto resp = Run({\"ft.aggregate\", \"idx1\", \"@num1:[35 57]\", \"LOAD\", \"4\", \"idx1.num1\", \"idx1.str1\",\n                   \"idx2.num2\", \"idx2.str2\", \"LOAD_FROM\", \"idx2\", \"1\", \"idx2.str2=idx1.str1\",\n                   \"QUERY\", \"@num2:[35 57]\"});\n\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAreArray(matchers));\n\n  size_t num3 = 1;\n  size_t num4 = 5;\n\n  std::vector<std::string> tag_values = {\"tag1\", \"tag2\", \"tag3\", \"tag4\"};\n  matchers.clear();\n  for (size_t i = 0; i < 100; ++i) {\n    std::string str = tag_values[i % tag_values.size()];\n    const size_t num3_actual = i * 100 + num3;\n    const size_t num4_actual = i * 100 + num4;\n\n    Run({\"hset\", absl::StrCat(\"k3:\", i), \"num3\", std::to_string(num3_actual), \"str3\", str});\n    Run({\"hset\", absl::StrCat(\"k4:\", i), \"num4\", std::to_string(num4_actual), \"str4\", str});\n\n    if ((str == \"tag1\" || str == \"tag4\") && num3 == num4) {\n      matchers.emplace_back(IsMap(\"idx3.num3\", std::to_string(num3_actual), \"idx3.str3\", str,\n                                  \"idx4.num4\", std::to_string(num4_actual), \"idx4.str4\", str));\n    }\n\n    num3 = (num3 + 3) % 12;\n    num4 = (num4 + 7) % 12;\n  }\n  DCHECK(!matchers.empty());\n  matchers.insert(matchers.begin(), IntArg(matchers.size()));\n\n  resp = Run({\"ft.aggregate\", \"idx3\", \"@str3:{tag1|tag4}\", \"LOAD\", \"4\", \"idx3.num3\", \"idx3.str3\",\n              \"idx4.num4\", \"idx4.str4\", \"LOAD_FROM\", \"idx4\", \"1\", \"idx4.num4=idx3.num3\", \"QUERY\",\n              \"@str4:{tag1|tag4}\"});\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAreArray(matchers));\n}\n\nTEST_F(SearchFamilyTest, AggregateWithLoadFromSyntaxErrors) {\n  Run({\"ft.create\", \"idx1\", \"ON\", \"HASH\", \"SCHEMA\", \"num1\", \"NUMERIC\", \"str1\", \"TEXT\"});\n  Run({\"ft.create\", \"idx2\", \"ON\", \"HASH\", \"SCHEMA\", \"num2\", \"NUMERIC\", \"str2\", \"TEXT\"});\n  Run({\"ft.create\", \"idx3\", \"ON\", \"HASH\", \"SCHEMA\", \"num3\", \"NUMERIC\", \"str3\", \"TEXT\"});\n\n  Run({\"hset\", \"k1\", \"num1\", \"0\", \"str1\", \"str\"});\n  Run({\"hset\", \"k2\", \"num2\", \"0\", \"str2\", \"str\"});\n  Run({\"hset\", \"k3\", \"num3\", \"0\", \"str3\", \"str\"});\n\n  // Test when index does not exist\n  EXPECT_THAT(Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"2\", \"idx1.num1\", \"idx1.str1\", \"LOAD_FROM\",\n                   \"idx4\", \"1\", \"idx4.num2=idx1.num1\"}),\n              IntArg(0));\n\n  // Test when index exists but no LOAD_FROM is specified\n  EXPECT_THAT(Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"2\", \"idx1.num1\", \"idx1.str1\", \"LOAD_FROM\",\n                   \"idx3\", \"1\", \"idx3.num3=idx2.num2\"}),\n              ErrArg(\"bad arguments for LOAD_FROM: unknown index 'idx2'\"));\n\n  // Test when index exists but was specified after it was used\n  EXPECT_THAT(\n      Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"2\", \"idx1.num1\", \"idx1.str1\", \"LOAD_FROM\", \"idx2\",\n           \"1\", \"idx2.num2=idx3.num3\", \"LOAD_FROM\", \"idx3\", \"1\", \"idx3.str3=idx1.str1\"}),\n      ErrArg(\"bad arguments for LOAD_FROM: unknown index 'idx3'\"));\n\n  // Test when LOAD_FROM is not using fields of current index\n  EXPECT_THAT(\n      Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"2\", \"idx1.num1\", \"idx1.str1\", \"LOAD_FROM\", \"idx2\",\n           \"1\", \"idx2.str2=idx1.str1\", \"LOAD_FROM\", \"idx3\", \"1\", \"idx2.str2=idx1.str1\"}),\n      ErrArg(\"bad arguments for LOAD_FROM: one of the field must be from the current index 'idx3'. \"\n             \"Got 'idx2.str2' and 'idx1.str1'\"));\n\n  // Test when field of index does not exist\n  EXPECT_THAT(Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"2\", \"idx1.num1\", \"idx1.str1\", \"LOAD_FROM\",\n                   \"idx2\", \"1\", \"idx2.num2=idx1.nonexistent_field\"}),\n              IntArg(0));\n  EXPECT_THAT(Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"2\", \"idx1.num1\", \"idx1.str1\", \"LOAD_FROM\",\n                   \"idx2\", \"1\", \"idx2.nonexistent_field=idx1.num1\"}),\n              IntArg(0));\n\n  // Test when field in QUERY does not exist in index\n  EXPECT_THAT(Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"2\", \"idx1.num1\", \"idx1.str1\", \"LOAD_FROM\",\n                   \"idx2\", \"1\", \"idx2.num2=idx1.num1\", \"QUERY\", \"@nonexistent_tag:{tag1|tag2}\"}),\n              IntArg(0));\n\n  // Test when field in LOAD does not exist in index\n  EXPECT_THAT(Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"2\", \"idx1.num1\", \"idx1.non_existent_field\",\n                   \"LOAD_FROM\", \"idx2\", \"1\", \"idx2.num2=idx1.num1\"}),\n              IsUnordArrayWithSize(\n                  IsMap(\"idx1.num1\", \"0\", \"idx1.non_existent_field\", ArgType(RespExpr::NIL))));\n\n  // Test index aliases\n  EXPECT_THAT(Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"4\", \"idx1.num1\", \"idx1.str1\", \"alias.num2\",\n                   \"alias.str2\", \"LOAD_FROM\", \"idx2\", \"AS\", \"alias\", \"1\", \"alias.num2=idx1.num1\"}),\n              IsUnordArrayWithSize(IsMap(\"idx1.num1\", \"0\", \"idx1.str1\", \"str\", \"alias.num2\", \"0\",\n                                         \"alias.str2\", \"str\")));\n  EXPECT_THAT(Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"4\", \"idx1.num1\", \"idx1.str1\", \"idx2.num2\",\n                   \"idx2.str2\", \"LOAD_FROM\", \"idx2\", \"AS\", \"alias\", \"1\", \"alias.num2=idx1.num1\"}),\n              ErrArg(\"Unknown index alias 'idx2' in the LOAD option. Field: 'num2'\"));\n\n  // Test same index used multiple times\n  EXPECT_THAT(Run({\"ft.aggregate\", \"idx1\", \"*\", \"LOAD\", \"4\", \"idx1.num1\", \"idx1.str1\", \"idx2.num2\",\n                   \"idx2.str2\", \"LOAD_FROM\", \"idx2\", \"1\", \"idx2.num2=idx1.num1\", \"LOAD_FROM\",\n                   \"idx2\", \"1\", \"idx2.str2=idx1.str1\"}),\n              ErrArg(\"Duplicate index alias in LOAD_FROM: 'idx2'\"));\n}\n\nTEST_F(SearchFamilyTest, AggregateWithLoadFromSortingAndLimiting) {\n  Run({\"ft.create\", \"idx1\", \"ON\", \"HASH\", \"SCHEMA\", \"num1\", \"NUMERIC\", \"str1\", \"TEXT\"});\n  Run({\"ft.create\", \"idx2\", \"ON\", \"HASH\", \"SCHEMA\", \"num2\", \"NUMERIC\", \"str2\", \"TEXT\"});\n\n  std::vector<::testing::Matcher<RespExpr>> matchers;\n  for (int i = 0; i < 100; ++i) {\n    const std::string num_value = std::to_string(i);\n    const std::string str_value = absl::StrCat(\"value\", i);\n    Run({\"hset\", absl::StrCat(\"k1:\", i), \"num1\", num_value, \"str1\", str_value});\n    Run({\"hset\", absl::StrCat(\"k2:\", i), \"num2\", num_value, \"str2\", str_value});\n\n    if (i > 79 && i <= 89) {\n      // Insert to beginning because we will sort DESCENDING\n      matchers.emplace(matchers.begin(), IsMap(\"idx1.num1\", num_value, \"idx1.str1\", str_value,\n                                               \"idx2.num2\", num_value, \"idx2.str2\", str_value));\n    }\n  }\n  DCHECK_EQ(matchers.size(), 10u);\n  matchers.insert(matchers.begin(), IntArg(10));\n\n  auto resp = Run({\"ft.aggregate\",\n                   \"idx1\",\n                   \"*\",\n                   \"LOAD\",\n                   \"4\",\n                   \"idx1.num1\",\n                   \"idx1.str1\",\n                   \"idx2.num2\",\n                   \"idx2.str2\",\n                   \"LOAD_FROM\",\n                   \"idx2\",\n                   \"1\",\n                   \"idx2.num2=idx1.num1\",\n                   \"SORTBY\",\n                   \"2\",\n                   \"@idx1.num1\",\n                   \"DESC\",\n                   \"LIMIT\",\n                   \"10\",\n                   \"10\"});\n\n  EXPECT_THAT(resp.GetVec(), ElementsAreArray(matchers));\n}\n\nTEST_F(SearchFamilyTest, AggregateWithLoadFromSortBySeveralFields) {\n  Run({\"ft.create\", \"idx1\", \"ON\", \"HASH\", \"SCHEMA\", \"num1\", \"NUMERIC\", \"str1\", \"TEXT\", \"num3\",\n       \"NUMERIC\"});\n  Run({\"ft.create\", \"idx2\", \"ON\", \"HASH\", \"SCHEMA\", \"num2\", \"NUMERIC\", \"str2\", \"TEXT\", \"num4\",\n       \"NUMERIC\"});\n\n  std::vector<std::pair<int, std::string>> expected;\n  for (int i = 0; i < 100; ++i) {\n    const std::string num_value = std::to_string(i % 10);  // Only 10 distinct values\n    const std::string str_value = absl::StrCat(\"value\", i);\n    Run({\"hset\", absl::StrCat(\"k1:\", i), \"num1\", num_value, \"str1\", str_value, \"num3\",\n         std::to_string(i)});\n    Run({\"hset\", absl::StrCat(\"k2:\", i), \"num2\", num_value, \"str2\", str_value, \"num4\",\n         std::to_string(i)});\n\n    expected.emplace_back(i % 10, str_value);\n  }\n\n  // Sort by num1 ASC, str1 DESC\n  std::sort(expected.begin(), expected.end(), [](const auto& a, const auto& b) {\n    if (a.first != b.first) {\n      return a.first < b.first;  // Ascending order for num1\n    }\n    return a.second > b.second;  // Descending order for str1\n  });\n\n  std::vector<::testing::Matcher<RespExpr>> matchers;\n  matchers.push_back(IntArg(20));\n  for (size_t i = 50; i < 70; ++i) {\n    const auto& [num, str] = expected[i];\n    matchers.emplace_back(IsMap(\"idx1.num1\", std::to_string(num), \"idx1.str1\", str, \"idx2.num2\",\n                                std::to_string(num), \"idx2.str2\", str));\n  }\n\n  auto resp = Run({\"ft.aggregate\",\n                   \"idx1\",\n                   \"*\",\n                   \"LOAD\",\n                   \"4\",\n                   \"idx1.num1\",\n                   \"idx1.str1\",\n                   \"idx2.num2\",\n                   \"idx2.str2\",\n                   \"LOAD_FROM\",\n                   \"idx2\",\n                   \"1\",\n                   \"idx2.num4=idx1.num3\",\n                   \"SORTBY\",\n                   \"4\",\n                   \"@idx1.num1\",\n                   \"ASC\",\n                   \"@idx1.str1\",\n                   \"DESC\",\n                   \"LIMIT\",\n                   \"50\",\n                   \"20\"});\n\n  EXPECT_THAT(resp.GetVec(), ElementsAreArray(matchers));\n}\n\nTEST_F(SearchFamilyTest, NumericFilter) {\n  // Index name, age, height\n  Run({\"FT.CREATE\", \"i1\", \"ON\", \"HASH\", \"SCHEMA\", \"name\", \"TEXT\", \"age\", \"NUMERIC\", \"height\",\n       \"NUMERIC\"});\n\n  // Index name, age\n  Run({\"FT.CREATE\", \"i2\", \"ON\", \"HASH\", \"SCHEMA\", \"name\", \"TEXT\", \"age\", \"NUMERIC\"});\n\n  Run({\"HSET\", \"id:1\", \"name\", \"John\", \"age\", \"28\", \"height\", \"184\"});\n  Run({\"HSET\", \"id:2\", \"name\", \"Ivan\", \"age\", \"30\", \"height\", \"180\"});\n  Run({\"HSET\", \"id:3\", \"name\", \"Jon\", \"age\", \"25\", \"height\", \"182\"});\n  Run({\"HSET\", \"id:4\", \"name\", \"Juan\", \"age\", \"32\", \"height\", \"186\"});\n  Run({\"HSET\", \"id:5\", \"name\", \"Ioan\", \"age\", \"35\", \"height\", \"181\"});\n\n  // Filter with non-star query\n  auto res = Run({\"FT.SEARCH\", \"i1\", \"I*\", \"FILTER\", \"age\", \"31\", \"40\"});\n  EXPECT_THAT(res, AreDocIds(\"id:5\"));\n\n  // Filter on ONE NUMERIC index\n  res = Run({\"FT.SEARCH\", \"i1\", \"*\", \"FILTER\", \"age\", \"25\", \"28\"});\n  EXPECT_THAT(res, AreDocIds(\"id:1\", \"id:3\"));\n\n  // Filter on TWO NUMERIC indexes\n  res =\n      Run({\"FT.SEARCH\", \"i1\", \"*\", \"FILTER\", \"age\", \"25\", \"28\", \"FILTER\", \"height\", \"180\", \"182\"});\n  EXPECT_THAT(res, AreDocIds(\"id:3\"));\n\n  // Filter on TWO NUMERIC indexes where second filtering produce empty result\n  res =\n      Run({\"FT.SEARCH\", \"i1\", \"*\", \"FILTER\", \"age\", \"25\", \"28\", \"FILTER\", \"height\", \"200\", \"300\"});\n  EXPECT_THAT(res, AreDocIds());\n\n  // Filter on index which doesn't exists\n  res = Run({\"FT.SEARCH\", \"i2\", \"*\", \"FILTER\", \"height\", \"180\", \"190\"});\n  EXPECT_THAT(res, ErrArg(\"Invalid field: height\"));\n\n  // Two filters on same field\n  res = Run({\"FT.SEARCH\", \"i1\", \"J*\", \"FILTER\", \"age\", \"25\", \"30\", \"FILTER\", \"age\", \"28\", \"32\"});\n  EXPECT_THAT(res, AreDocIds(\"id:1\"));\n\n  Run({\"FLUSHALL\"});\n}\n\nTEST_F(SearchFamilyTest, MAXSEARCHRESULTS) {\n  EXPECT_EQ(Run({\"HSET\", \"s1\", \"phrase\", \"hello world\"}), 1);\n  EXPECT_EQ(Run({\"HSET\", \"s2\", \"phrase\", \"hello simple world\"}), 1);\n  EXPECT_EQ(Run({\"HSET\", \"s3\", \"phrase\", \"hello somewhat less simple world\"}), 1);\n  EXPECT_EQ(Run({\"FT.CREATE\", \"memes\", \"SCHEMA\", \"phrase\", \"TEXT\"}), \"OK\");\n\n  auto resp = Run({\"FT.CONFIG\", \"GET\", \"MAXSEARCHRESULTS\"});\n  EXPECT_THAT(resp, IsArray(\"MAXSEARCHRESULTS\", \"1000000\"));\n\n  resp = Run({\"FT.SEARCH\", \"memes\", \"@phrase:(hello world)\", \"NOCONTENT\"});\n  EXPECT_THAT(resp, RespElementsAre(IntArg(3), _, _, _));\n\n  resp = Run({\"FT.CONFIG\", \"SET\", \"MAXSEARCHRESULTS\", \"1\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  resp = Run({\"FT.SEARCH\", \"memes\", \"@phrase:(hello world)\", \"NOCONTENT\"});\n  EXPECT_THAT(resp, RespElementsAre(IntArg(3), _));\n\n  resp = Run({\"FT.SEARCH\", \"memes\", \"@phrase:(hello world)\", \"NOCONTENT\", \"LIMIT\", \"0\", \"1\"});\n  EXPECT_THAT(resp, RespElementsAre(IntArg(3), _));\n\n  resp = Run({\"FT.SEARCH\", \"memes\", \"@phrase:(hello world)\", \"NOCONTENT\", \"LIMIT\", \"0\", \"3\"});\n  EXPECT_THAT(resp, ErrArg(\"LIMIT exceeds maximum of 1\"));\n\n  resp = Run({\"FT.CONFIG\", \"GET\", \"MAXSEARCHRESULTS\"});\n  EXPECT_THAT(resp, IsArray(\"MAXSEARCHRESULTS\", \"1\"));\n\n  resp = Run({\"FT.CONFIG\", \"HELP\", \"MAXSEARCHRESULTS\"});\n  EXPECT_THAT(resp, IsArray(\"MAXSEARCHRESULTS\", \"Description\",\n                            \"Maximum number of results from ft.search command\", \"Value\", \"1\"));\n\n  resp = Run({\"FT.CONFIG\", \"GET\", \"*\"});\n  // Should contain MAXSEARCHRESULTS among other search config parameters\n  EXPECT_THAT(resp, RespArray(Contains(\"MAXSEARCHRESULTS\")));\n  EXPECT_THAT(resp, RespArray(Contains(\"1\")));\n\n  resp = Run({\"FT.CONFIG\", \"HELP\", \"*\"});\n  // Should contain MAXSEARCHRESULTS description among other search configs\n  EXPECT_THAT(resp.GetVec(),\n              Contains(IsArray(\"MAXSEARCHRESULTS\", \"Description\",\n                               \"Maximum number of results from ft.search command\", \"Value\", \"1\")));\n\n  // restore normal value for other tests\n  Run({\"FT.CONFIG\", \"SET\", \"MAXSEARCHRESULTS\", \"1000000\"});\n}\n\nTEST_F(SearchFamilyTest, InvalidConfigOptions) {\n  // Test with an invalid argument\n  auto resp = Run({\"FT.CONFIG\", \"INVALIDARG\", \"INVLIDARG\"});\n  EXPECT_THAT(resp, ErrArg(\"Unknown subcommand\"));\n\n  // Test with an invalid argument\n  resp = Run({\"FT.CONFIG\", \"GET\", \"INVALIDARG\"});\n  EXPECT_THAT(resp, IsArray());\n\n  // Test with an invalid argument\n  resp = Run({\"FT.CONFIG\", \"SET\", \"INVALIDARG\"});\n  EXPECT_THAT(resp, ErrArg(kSyntaxErr));\n\n  // Test with an invalid argument\n  resp = Run({\"FT.CONFIG\", \"SET\", \"INVALIDARG\", \"5\"});\n  EXPECT_THAT(resp, ErrArg(\"Invalid option\"));\n\n  // Test with an invalid value\n  resp = Run({\"FT.CONFIG\", \"SET\", \"MAXSEARCHRESULTS\", \"not_a_number\"});\n  EXPECT_THAT(resp, ErrArg(\"ERR FT.CONFIG SET failed (possibly related to argument \"\n                           \"'MAXSEARCHRESULTS') - argument can not be set\"));\n\n  // Test with an invalid argument\n  resp = Run({\"FT.CONFIG\", \"HELP\", \"INVALIDARG\"});\n  EXPECT_THAT(resp, IsArray());\n}\n\nTEST_F(SearchFamilyTest, DropIndexWithDD) {\n  // Create an index on HASH documents\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"name\", \"TEXT\"});\n\n  // Add some documents\n  Run({\"HSET\", \"doc:1\", \"name\", \"Alice\"});\n  Run({\"HSET\", \"doc:2\", \"name\", \"Bob\"});\n  Run({\"HSET\", \"doc:3\", \"name\", \"Charlie\"});\n\n  // Verify documents exist\n  auto resp = Run({\"EXISTS\", \"doc:1\", \"doc:2\", \"doc:3\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  // Verify index works\n  resp = Run({\"FT.SEARCH\", \"idx\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\"));\n\n  // Drop index WITHOUT DD - documents should remain\n  Run({\"FT.DROPINDEX\", \"idx\"});\n  resp = Run({\"EXISTS\", \"doc:1\", \"doc:2\", \"doc:3\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  // Create index again\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"name\", \"TEXT\"});\n  ThisFiber::Yield();\n\n  // Verify index works again\n  resp = Run({\"FT.SEARCH\", \"idx\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc:1\", \"doc:2\", \"doc:3\"));\n\n  // Drop index WITH DD - documents should be deleted\n  Run({\"FT.DROPINDEX\", \"idx\", \"DD\"});\n  resp = Run({\"EXISTS\", \"doc:1\", \"doc:2\", \"doc:3\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(SearchFamilyTest, DropIndexWithDDJson) {\n  // Create an index on JSON documents\n  Run({\"FT.CREATE\", \"jidx\", \"ON\", \"JSON\", \"PREFIX\", \"1\", \"jdoc:\", \"SCHEMA\", \"$.name\", \"AS\", \"name\",\n       \"TEXT\"});\n\n  // Add some JSON documents\n  Run({\"JSON.SET\", \"jdoc:1\", \"$\", R\"({\"name\": \"Alice\"})\"});\n  Run({\"JSON.SET\", \"jdoc:2\", \"$\", R\"({\"name\": \"Bob\"})\"});\n  Run({\"JSON.SET\", \"jdoc:3\", \"$\", R\"({\"name\": \"Charlie\"})\"});\n\n  // Verify documents exist\n  auto resp = Run({\"EXISTS\", \"jdoc:1\", \"jdoc:2\", \"jdoc:3\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  // Verify index works\n  resp = Run({\"FT.SEARCH\", \"jidx\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"jdoc:1\", \"jdoc:2\", \"jdoc:3\"));\n\n  // Drop index WITH DD - documents should be deleted\n  Run({\"FT.DROPINDEX\", \"jidx\", \"DD\"});\n  resp = Run({\"EXISTS\", \"jdoc:1\", \"jdoc:2\", \"jdoc:3\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(SearchFamilyTest, DropIndexWithInvalidOption) {\n  // Create an index\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"name\", \"TEXT\"});\n  Run({\"HSET\", \"doc:1\", \"name\", \"test\"});\n\n  // Drop with unrecognized option (should be ignored, index dropped but documents remain)\n  auto resp = Run({\"FT.DROPINDEX\", \"idx\", \"INVALID\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  // Document should still exist\n  resp = Run({\"EXISTS\", \"doc:1\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // Clean up\n  Run({\"DEL\", \"doc:1\"});\n}\n\nTEST_F(SearchFamilyTest, ZsetStoreCommandsOverwriteIndexedHash) {\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"SCHEMA\", \"field\", \"TEXT\"});\n  EXPECT_THAT(Run({\"ZADD\", \"zset1\", \"1\", \"a\", \"2\", \"b\"}), IntArg(2));\n  EXPECT_THAT(Run({\"ZADD\", \"zset2\", \"1.5\", \"a\", \"3\", \"c\"}), IntArg(2));\n\n  // Test ZINTERSTORE\n  EXPECT_THAT(Run({\"HSET\", \"dest\", \"field\", \"value\"}), IntArg(1));\n  EXPECT_THAT(Run({\"ZINTERSTORE\", \"dest\", \"2\", \"zset1\", \"zset2\"}), IntArg(1));\n  EXPECT_EQ(Run({\"RENAME\", \"dest\", \"x\"}), \"OK\");\n\n  // Test ZUNIONSTORE\n  EXPECT_THAT(Run({\"HSET\", \"dest\", \"field\", \"value\"}), IntArg(1));\n  EXPECT_THAT(Run({\"ZUNIONSTORE\", \"dest\", \"2\", \"zset1\", \"zset2\"}), IntArg(3));\n  EXPECT_EQ(Run({\"RENAME\", \"dest\", \"y\"}), \"OK\");\n}\n\nTEST_F(SearchFamilyTest, SetStoreCommandsOverwriteIndexedHash) {\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"SCHEMA\", \"field\", \"TEXT\"});\n  EXPECT_THAT(Run({\"SADD\", \"set1\", \"a\", \"b\", \"c\"}), IntArg(3));\n  EXPECT_THAT(Run({\"SADD\", \"set2\", \"b\", \"c\", \"d\"}), IntArg(3));\n\n  // Test SINTERSTORE\n  EXPECT_THAT(Run({\"HSET\", \"dest\", \"field\", \"value\"}), IntArg(1));\n  EXPECT_THAT(Run({\"SINTERSTORE\", \"dest\", \"set1\", \"set2\"}), IntArg(2));\n  EXPECT_EQ(Run({\"RENAME\", \"dest\", \"x\"}), \"OK\");\n\n  // Test SUNIONSTORE\n  EXPECT_THAT(Run({\"HSET\", \"dest\", \"field\", \"value\"}), IntArg(1));\n  EXPECT_THAT(Run({\"SUNIONSTORE\", \"dest\", \"set1\", \"set2\"}), IntArg(4));\n  EXPECT_EQ(Run({\"RENAME\", \"dest\", \"y\"}), \"OK\");\n\n  // Test SDIFFSTORE\n  EXPECT_THAT(Run({\"HSET\", \"dest\", \"field\", \"value\"}), IntArg(1));\n  EXPECT_THAT(Run({\"SDIFFSTORE\", \"dest\", \"set1\", \"set2\"}), IntArg(1));\n  EXPECT_EQ(Run({\"RENAME\", \"dest\", \"z\"}), \"OK\");\n}\n\nTEST_F(SearchFamilyTest, HsetOnDifferentDatabasesCrash) {\n  // This test verifies that creating documents with the same key on different databases\n  // doesn't crash. Only database 0 is indexed.\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"SCHEMA\", \"field1\", \"TEXT\"});\n\n  // Create document on database 0 - should be indexed\n  EXPECT_THAT(Run({\"HSET\", \"hash1\", \"field1\", \"value1\"}), IntArg(1));\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx\", \"value1\"}), AreDocIds(\"hash1\"));\n\n  // Switch to database 1\n  EXPECT_THAT(Run({\"SELECT\", \"1\"}), \"OK\");\n\n  // Create document with same key on database 1 - should NOT crash\n  EXPECT_THAT(Run({\"HSET\", \"hash1\", \"field1\", \"another_value\"}), IntArg(1));\n\n  // Search on database 1 should return no results (only db 0 is indexed)\n  auto resp = Run({\"FT.SEARCH\", \"idx\", \"another_value\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // Switch back to database 0\n  EXPECT_THAT(Run({\"SELECT\", \"0\"}), \"OK\");\n\n  // Search on database 0 should still find the original document\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"idx\", \"value1\"}), AreDocIds(\"hash1\"));\n}\n\nTEST_F(SearchFamilyTest, QueryStringBytesLimit) {\n  EXPECT_EQ(Run({\"ft.create\", \"idx\", \"ON\", \"HASH\", \"SCHEMA\", \"name\", \"TEXT\", \"age\", \"NUMERIC\"}),\n            \"OK\");\n\n  Run({\"hset\", \"doc1\", \"name\", \"alice\", \"age\", \"30\"});\n  Run({\"hset\", \"doc2\", \"name\", \"bob\", \"age\", \"25\"});\n\n  absl::FlagSaver fs;\n\n  string query = \"@name:alice @age:[25 30]\";\n  size_t query_len = query.size();\n\n  // Set limit to query_len - 1 (just below query length)\n  absl::SetFlag(&FLAGS_search_query_string_bytes, query_len - 1);\n\n  auto resp = Run({\"ft.search\", \"idx\", query});\n  EXPECT_THAT(resp, ErrArg(absl::StrCat(\"Query string is too long, max length is \", query_len - 1,\n                                        \" bytes\")));\n\n  absl::SetFlag(&FLAGS_search_query_string_bytes, query_len);\n\n  resp = Run({\"ft.search\", \"idx\", query});\n  EXPECT_THAT(resp, AreDocIds(\"doc1\"));\n\n  // Test FT.AGGREGATE with same query\n  absl::SetFlag(&FLAGS_search_query_string_bytes, query_len - 1);\n\n  resp = Run({\"ft.aggregate\", \"idx\", query, \"LOAD\", \"1\", \"name\"});\n  EXPECT_THAT(resp, ErrArg(absl::StrCat(\"Query string is too long, max length is \", query_len - 1,\n                                        \" bytes\")));\n\n  absl::SetFlag(&FLAGS_search_query_string_bytes, query_len);\n\n  resp = Run({\"ft.aggregate\", \"idx\", query, \"LOAD\", \"1\", \"name\"});\n  EXPECT_THAT(resp, IsUnordArrayWithSize(IsMap(\"name\", \"alice\")));\n}\n\nTEST_F(SearchFamilyTest, KnnHnsw) {\n  // Create an index with a vector field using HASH documents\n  auto resp = Run({\"FT.CREATE\", \"knn_idx\", \"ON\", \"HASH\", \"SCHEMA\", \"even\", \"TAG\", \"pos\", \"VECTOR\",\n                   \"HNSW\", \"6\", \"TYPE\", \"FLOAT32\", \"DIM\", \"1\", \"DISTANCE_METRIC\", \"L2\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Helper to convert float to binary format\n  auto FloatToBytes = [](float f) -> string {\n    return string(reinterpret_cast<const char*>(&f), sizeof(float));\n  };\n\n  // Add some test documents with vector data\n  Run({\"HSET\", \"doc1\", \"even\", \"yes\", \"pos\", FloatToBytes(1.0f)});\n  Run({\"HSET\", \"doc2\", \"even\", \"no\", \"pos\", FloatToBytes(2.0f)});\n  Run({\"HSET\", \"doc3\", \"even\", \"yes\", \"pos\", FloatToBytes(3.0f)});\n\n  // Add documents without the vector field\n  Run({\"HSET\", \"doc4\", \"even\", \"yes\"});\n  Run({\"HSET\", \"doc5\", \"even\", \"maybe\"});\n\n  // Query vector (2.0f - should find doc2 closest, but filtered to \"yes\" docs)\n  string query_vec = FloatToBytes(2.0f);\n\n  // Perform KNN search with tag filter\n  resp = Run({\"FT.SEARCH\", \"knn_idx\", \"@even:{yes} => [KNN 3 @pos $vec]\", \"PARAMS\", \"2\", \"vec\",\n              query_vec});\n  // Should return documents with \"even\": \"yes\" sorted by vector distance to 2.0\n  EXPECT_THAT(resp, AreDocIds(\"doc3\", \"doc1\"));\n\n  // Verify that document without field is added to tag but not in hnsw vector index\n  resp = Run({\"FT.SEARCH\", \"knn_idx\", \"@even:{maybe}\"});\n  EXPECT_THAT(resp, AreDocIds(\"doc5\"));\n\n  resp = Run({\"FT.SEARCH\", \"knn_idx\", \"@even:{maybe} => [KNN 3 @pos $vec]\", \"PARAMS\", \"2\", \"vec\",\n              query_vec});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // Verify that empty prefilter return zero results\n  resp = Run({\"FT.SEARCH\", \"knn_idx\", \"@even:{non_existing} => [KNN 3 @pos $vec]\", \"PARAMS\", \"2\",\n              \"vec\", query_vec});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(SearchFamilyTest, KnnHnswCosineDistanceCalculation) {\n  // Create index with 3D vectors using COSINE distance metric with HNSW\n  auto resp = Run({\"FT.CREATE\", \"cosine_idx\", \"ON\", \"HASH\", \"SCHEMA\", \"vec\", \"VECTOR\", \"HNSW\", \"6\",\n                   \"TYPE\", \"FLOAT32\", \"DIM\", \"3\", \"DISTANCE_METRIC\", \"COSINE\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Query vector will be [1, 0, 0]\n  // Cosine distance = 1 - cosine_similarity = 1 - (dot_product / (norm1 * norm2))\n\n  // doc1: [1, 0, 0] - identical to query, distance = 0\n  Run({\"HSET\", \"doc1\", \"vec\", Vec3ToBytes(1.0f, 0.0f, 0.0f)});\n\n  // doc2: [0, 1, 0] - orthogonal (y-axis), distance = 1\n  Run({\"HSET\", \"doc2\", \"vec\", Vec3ToBytes(0.0f, 1.0f, 0.0f)});\n\n  // doc3: [0, 0, 1] - orthogonal (z-axis), distance = 1\n  Run({\"HSET\", \"doc3\", \"vec\", Vec3ToBytes(0.0f, 0.0f, 1.0f)});\n\n  // doc4: [-1, 0, 0] - opposite direction, distance = 2\n  Run({\"HSET\", \"doc4\", \"vec\", Vec3ToBytes(-1.0f, 0.0f, 0.0f)});\n\n  // doc5: [2, 0, 0] - same direction, 2x magnitude, distance = 0 (cosine is magnitude-invariant)\n  Run({\"HSET\", \"doc5\", \"vec\", Vec3ToBytes(2.0f, 0.0f, 0.0f)});\n\n  // doc6: [0, 0, 0] - EDGE CASE: zero vector (undefined cosine, implementation-dependent)\n  Run({\"HSET\", \"doc6\", \"vec\", Vec3ToBytes(0.0f, 0.0f, 0.0f)});\n\n  // doc7: [1, 1, 0] - 45° angle in xy-plane, cos_sim = 1/√2 ≈ 0.707, distance ≈ 0.293\n  Run({\"HSET\", \"doc7\", \"vec\", Vec3ToBytes(1.0f, 1.0f, 0.0f)});\n\n  // doc8: [1, 1, 1] - equal components, cos_sim = 1/√3 ≈ 0.577, distance ≈ 0.423\n  Run({\"HSET\", \"doc8\", \"vec\", Vec3ToBytes(1.0f, 1.0f, 1.0f)});\n\n  // doc9: [0.1, 0, 0] - EDGE CASE: very small magnitude, same direction, distance = 0\n  Run({\"HSET\", \"doc9\", \"vec\", Vec3ToBytes(0.1f, 0.0f, 0.0f)});\n\n  // doc10: [10, 0, 0] - EDGE CASE: very large magnitude, same direction, distance = 0\n  Run({\"HSET\", \"doc10\", \"vec\", Vec3ToBytes(10.0f, 0.0f, 0.0f)});\n\n  // Query with [1, 0, 0]\n  string query_vec = Vec3ToBytes(1.0f, 0.0f, 0.0f);\n\n  // Test: Verify all distance scores\n  resp = Run({\"FT.SEARCH\", \"cosine_idx\", \"*=>[KNN 10 @vec $query_vec AS score]\", \"PARAMS\", \"2\",\n              \"query_vec\", query_vec, \"RETURN\", \"1\", \"score\", \"SORTBY\", \"score\"});\n\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  auto results = resp.GetVec();\n  ASSERT_GE(results.size(), 3);  // At least count + 1 doc\n\n  // Gather all scores\n  std::map<string, double> doc_scores;\n  for (size_t i = 1; i < results.size(); i += 2) {\n    string doc_id = results[i].GetString();\n    double score = vector_score(\"score\", results[i + 1].GetVec());\n    doc_scores[doc_id] = score;\n  }\n\n  // Verify expected distances (with tolerance for floating-point)\n  // doc1, doc5, doc9, doc10 should all have distance ≈ 0 (same direction, magnitude-invariant)\n  if (doc_scores.contains(\"doc1\")) {\n    EXPECT_LT(doc_scores[\"doc1\"], 0.01);\n  }\n\n  if (doc_scores.contains(\"doc5\")) {\n    EXPECT_LT(doc_scores[\"doc5\"], 0.01);\n  }\n\n  if (doc_scores.contains(\"doc9\")) {\n    EXPECT_LT(doc_scores[\"doc9\"], 0.01);\n  }\n\n  if (doc_scores.contains(\"doc10\")) {\n    EXPECT_LT(doc_scores[\"doc10\"], 0.01);\n  }\n\n  // doc7: 45° angle, distance ≈ 1 - 1/√2 ≈ 0.293\n  if (doc_scores.contains(\"doc7\")) {\n    EXPECT_GT(doc_scores[\"doc7\"], 0.25);\n    EXPECT_LT(doc_scores[\"doc7\"], 0.35);\n  }\n\n  // doc8: distance ≈ 1 - 1/√3 ≈ 0.423\n  if (doc_scores.contains(\"doc8\")) {\n    EXPECT_GT(doc_scores[\"doc8\"], 0.38);\n    EXPECT_LT(doc_scores[\"doc8\"], 0.47);\n  }\n\n  // doc2, doc3: orthogonal, distance = 1\n  if (doc_scores.contains(\"doc2\")) {\n    EXPECT_GT(doc_scores[\"doc2\"], 0.95);\n    EXPECT_LT(doc_scores[\"doc2\"], 1.05);\n  }\n\n  if (doc_scores.contains(\"doc3\")) {\n    EXPECT_GT(doc_scores[\"doc3\"], 0.95);\n    EXPECT_LT(doc_scores[\"doc3\"], 1.05);\n  }\n\n  // doc4: opposite direction, distance = 2\n  if (doc_scores.contains(\"doc4\")) {\n    EXPECT_GT(doc_scores[\"doc4\"], 1.95);\n    EXPECT_LT(doc_scores[\"doc4\"], 2.05);\n  }\n\n  // doc6: zero vector - EDGE CASE, behavior is implementation-dependent\n  // Most implementations treat it as maximum distance or handle specially\n}\n\nTEST_F(SearchFamilyTest, KnnHnswL2DistanceCalculation) {\n  // Create index with 3D vectors using L2 (Euclidean) distance metric with HNSW\n  auto resp = Run({\"FT.CREATE\", \"l2_idx\", \"ON\", \"HASH\", \"SCHEMA\", \"vec\", \"VECTOR\", \"HNSW\", \"6\",\n                   \"TYPE\", \"FLOAT32\", \"DIM\", \"3\", \"DISTANCE_METRIC\", \"L2\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Query vector will be [1, 0, 0]\n  // L2_distance = sqrt(sum((a[i] - b[i])^2))\n\n  // doc1: [1, 0, 0] - identical to query, distance = 0\n  Run({\"HSET\", \"doc1\", \"vec\", Vec3ToBytes(1.0f, 0.0f, 0.0f)});\n\n  // doc2: [0, 1, 0] - orthogonal, distance = sqrt(1 + 1 + 0) = √2 ≈ 1.414\n  Run({\"HSET\", \"doc2\", \"vec\", Vec3ToBytes(0.0f, 1.0f, 0.0f)});\n\n  // doc3: [0, 0, 1] - orthogonal, distance = sqrt(1 + 0 + 1) = √2 ≈ 1.414\n  Run({\"HSET\", \"doc3\", \"vec\", Vec3ToBytes(0.0f, 0.0f, 1.0f)});\n\n  // doc4: [-1, 0, 0] - opposite direction, distance = sqrt(4 + 0 + 0) = 2\n  Run({\"HSET\", \"doc4\", \"vec\", Vec3ToBytes(-1.0f, 0.0f, 0.0f)});\n\n  // doc5: [2, 0, 0] - same direction, 2x magnitude, distance = sqrt(1 + 0 + 0) = 1\n  Run({\"HSET\", \"doc5\", \"vec\", Vec3ToBytes(2.0f, 0.0f, 0.0f)});\n\n  // doc6: [0, 0, 0] - EDGE CASE: zero vector, distance = sqrt(1 + 0 + 0) = 1\n  Run({\"HSET\", \"doc6\", \"vec\", Vec3ToBytes(0.0f, 0.0f, 0.0f)});\n\n  // doc7: [1, 1, 0] - distance = sqrt(0 + 1 + 0) = 1\n  Run({\"HSET\", \"doc7\", \"vec\", Vec3ToBytes(1.0f, 1.0f, 0.0f)});\n\n  // doc8: [1, 1, 1] - distance = sqrt(0 + 1 + 1) = √2 ≈ 1.414\n  Run({\"HSET\", \"doc8\", \"vec\", Vec3ToBytes(1.0f, 1.0f, 1.0f)});\n\n  // doc9: [0.1, 0, 0] - EDGE CASE: very small magnitude, distance = sqrt(0.81 + 0 + 0) = 0.9\n  Run({\"HSET\", \"doc9\", \"vec\", Vec3ToBytes(0.1f, 0.0f, 0.0f)});\n\n  // doc10: [10, 0, 0] - EDGE CASE: very large magnitude, distance = sqrt(81 + 0 + 0) = 9\n  Run({\"HSET\", \"doc10\", \"vec\", Vec3ToBytes(10.0f, 0.0f, 0.0f)});\n\n  // Query with [1, 0, 0]\n  string query_vec = Vec3ToBytes(1.0f, 0.0f, 0.0f);\n\n  // Test: Verify all distance scores\n  resp = Run({\"FT.SEARCH\", \"l2_idx\", \"*=>[KNN 10 @vec $query_vec AS score]\", \"PARAMS\", \"2\",\n              \"query_vec\", query_vec, \"RETURN\", \"1\", \"score\", \"SORTBY\", \"score\"});\n\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  auto results = resp.GetVec();\n  ASSERT_GE(results.size(), 3);  // At least count + 1 doc\n\n  // Gather all scores\n  std::map<string, double> doc_scores;\n  for (size_t i = 1; i < results.size(); i += 2) {\n    string doc_id = results[i].GetString();\n    double score = vector_score(\"score\", results[i + 1].GetVec());\n    doc_scores[doc_id] = score;\n  }\n\n  // Verify expected distances (with tolerance for floating-point)\n  // doc1: distance = 0 (identical)\n  if (doc_scores.contains(\"doc1\")) {\n    EXPECT_LT(doc_scores[\"doc1\"], 0.01);\n  }\n\n  // doc9: distance = 0.9 (small magnitude, same direction)\n  if (doc_scores.contains(\"doc9\")) {\n    EXPECT_GT(doc_scores[\"doc9\"], 0.85);\n    EXPECT_LT(doc_scores[\"doc9\"], 0.95);\n  }\n\n  // doc5, doc6, doc7: distance = 1\n  if (doc_scores.contains(\"doc5\")) {\n    EXPECT_GT(doc_scores[\"doc5\"], 0.95);\n    EXPECT_LT(doc_scores[\"doc5\"], 1.05);\n  }\n\n  if (doc_scores.contains(\"doc6\")) {\n    EXPECT_GT(doc_scores[\"doc6\"], 0.95);\n    EXPECT_LT(doc_scores[\"doc6\"], 1.05);\n  }\n\n  if (doc_scores.contains(\"doc7\")) {\n    EXPECT_GT(doc_scores[\"doc7\"], 0.95);\n    EXPECT_LT(doc_scores[\"doc7\"], 1.05);\n  }\n\n  // doc2, doc3, doc8: distance = √2 ≈ 1.414\n  if (doc_scores.contains(\"doc2\")) {\n    EXPECT_GT(doc_scores[\"doc2\"], 1.37);\n    EXPECT_LT(doc_scores[\"doc2\"], 1.46);\n  }\n\n  if (doc_scores.contains(\"doc3\")) {\n    EXPECT_GT(doc_scores[\"doc3\"], 1.37);\n    EXPECT_LT(doc_scores[\"doc3\"], 1.46);\n  }\n\n  if (doc_scores.contains(\"doc8\")) {\n    EXPECT_GT(doc_scores[\"doc8\"], 1.37);\n    EXPECT_LT(doc_scores[\"doc8\"], 1.46);\n  }\n\n  // doc4: distance = 2 (opposite direction)\n  if (doc_scores.contains(\"doc4\")) {\n    EXPECT_GT(doc_scores[\"doc4\"], 1.95);\n    EXPECT_LT(doc_scores[\"doc4\"], 2.05);\n  }\n\n  // doc10: distance = 9 (large magnitude, same direction)\n  if (doc_scores.contains(\"doc10\")) {\n    EXPECT_GT(doc_scores[\"doc10\"], 8.95);\n    EXPECT_LT(doc_scores[\"doc10\"], 9.05);\n  }\n}\n\nTEST_F(SearchFamilyTest, KnnHnswIPDistanceCalculation) {\n  // Create index with 3D vectors using IP (Inner Product) distance metric with HNSW\n  auto resp = Run({\"FT.CREATE\", \"ip_idx\", \"ON\", \"HASH\", \"SCHEMA\", \"vec\", \"VECTOR\", \"HNSW\", \"6\",\n                   \"TYPE\", \"FLOAT32\", \"DIM\", \"3\", \"DISTANCE_METRIC\", \"IP\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Comprehensive test cases with edge cases - SAME VECTORS as other tests\n  // Query vector will be [1, 0, 0]\n  // IP_distance = 1 - dot_product(a, b)\n\n  // doc1: [1, 0, 0] - dot = 1, distance = 0\n  Run({\"HSET\", \"doc1\", \"vec\", Vec3ToBytes(1.0f, 0.0f, 0.0f)});\n\n  // doc2: [0, 1, 0] - dot = 0, distance = 1\n  Run({\"HSET\", \"doc2\", \"vec\", Vec3ToBytes(0.0f, 1.0f, 0.0f)});\n\n  // doc3: [0, 0, 1] - dot = 0, distance = 1\n  Run({\"HSET\", \"doc3\", \"vec\", Vec3ToBytes(0.0f, 0.0f, 1.0f)});\n\n  // doc4: [-1, 0, 0] - dot = -1, distance = 2\n  Run({\"HSET\", \"doc4\", \"vec\", Vec3ToBytes(-1.0f, 0.0f, 0.0f)});\n\n  // doc5: [2, 0, 0] - dot = 2, distance = -1 (NOT magnitude-invariant like cosine)\n  Run({\"HSET\", \"doc5\", \"vec\", Vec3ToBytes(2.0f, 0.0f, 0.0f)});\n\n  // doc6: [0, 0, 0] - EDGE CASE: zero vector, dot = 0, distance = 1\n  Run({\"HSET\", \"doc6\", \"vec\", Vec3ToBytes(0.0f, 0.0f, 0.0f)});\n\n  // doc7: [1, 1, 0] - dot = 1, distance = 0\n  Run({\"HSET\", \"doc7\", \"vec\", Vec3ToBytes(1.0f, 1.0f, 0.0f)});\n\n  // doc8: [1, 1, 1] - dot = 1, distance = 0\n  Run({\"HSET\", \"doc8\", \"vec\", Vec3ToBytes(1.0f, 1.0f, 1.0f)});\n\n  // doc9: [0.1, 0, 0] - EDGE CASE: dot = 0.1, distance = 0.9\n  Run({\"HSET\", \"doc9\", \"vec\", Vec3ToBytes(0.1f, 0.0f, 0.0f)});\n\n  // doc10: [10, 0, 0] - EDGE CASE: dot = 10, distance = -9\n  Run({\"HSET\", \"doc10\", \"vec\", Vec3ToBytes(10.0f, 0.0f, 0.0f)});\n\n  // Query with [1, 0, 0]\n  string query_vec = Vec3ToBytes(1.0f, 0.0f, 0.0f);\n\n  // Test: Verify all distance scores\n  // For IP, lower distance means higher dot product (better match)\n  resp = Run({\"FT.SEARCH\", \"ip_idx\", \"*=>[KNN 10 @vec $query_vec AS score]\", \"PARAMS\", \"2\",\n              \"query_vec\", query_vec, \"RETURN\", \"1\", \"score\", \"SORTBY\", \"score\"});\n\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  auto results = resp.GetVec();\n  ASSERT_GE(results.size(), 3);  // At least count + 1 doc\n\n  // Gather all scores\n  std::map<string, double> doc_scores;\n  for (size_t i = 1; i < results.size(); i += 2) {\n    string doc_id = results[i].GetString();\n    double score = vector_score(\"score\", results[i + 1].GetVec());\n    doc_scores[doc_id] = score;\n  }\n\n  // Verify expected distances (with tolerance for floating-point)\n  // doc10: distance = -9 (dot = 10, EDGE CASE: large magnitude advantage)\n  if (doc_scores.contains(\"doc10\")) {\n    EXPECT_GT(doc_scores[\"doc10\"], -9.05);\n    EXPECT_LT(doc_scores[\"doc10\"], -8.95);\n  }\n\n  // doc5: distance = -1 (dot = 2, shows magnitude matters for IP unlike cosine)\n  if (doc_scores.contains(\"doc5\")) {\n    EXPECT_GT(doc_scores[\"doc5\"], -1.05);\n    EXPECT_LT(doc_scores[\"doc5\"], -0.95);\n  }\n\n  // doc1, doc7, doc8: distance = 0 (dot = 1)\n  if (doc_scores.contains(\"doc1\")) {\n    EXPECT_GT(doc_scores[\"doc1\"], -0.05);\n    EXPECT_LT(doc_scores[\"doc1\"], 0.05);\n  }\n\n  if (doc_scores.contains(\"doc7\")) {\n    EXPECT_GT(doc_scores[\"doc7\"], -0.05);\n    EXPECT_LT(doc_scores[\"doc7\"], 0.05);\n  }\n\n  if (doc_scores.contains(\"doc8\")) {\n    EXPECT_GT(doc_scores[\"doc8\"], -0.05);\n    EXPECT_LT(doc_scores[\"doc8\"], 0.05);\n  }\n\n  // doc9: distance = 0.9 (dot = 0.1, EDGE CASE: small magnitude penalty)\n  if (doc_scores.contains(\"doc9\")) {\n    EXPECT_GT(doc_scores[\"doc9\"], 0.85);\n    EXPECT_LT(doc_scores[\"doc9\"], 0.95);\n  }\n\n  // doc2, doc3, doc6: distance = 1 (dot = 0)\n  if (doc_scores.contains(\"doc2\")) {\n    EXPECT_GT(doc_scores[\"doc2\"], 0.95);\n    EXPECT_LT(doc_scores[\"doc2\"], 1.05);\n  }\n\n  if (doc_scores.contains(\"doc3\")) {\n    EXPECT_GT(doc_scores[\"doc3\"], 0.95);\n    EXPECT_LT(doc_scores[\"doc3\"], 1.05);\n  }\n\n  if (doc_scores.contains(\"doc6\")) {\n    EXPECT_GT(doc_scores[\"doc6\"], 0.95);\n    EXPECT_LT(doc_scores[\"doc6\"], 1.05);\n  }\n\n  // doc4: distance = 2 (dot = -1, opposite direction is worst)\n  if (doc_scores.contains(\"doc4\")) {\n    EXPECT_GT(doc_scores[\"doc4\"], 1.95);\n    EXPECT_LT(doc_scores[\"doc4\"], 2.05);\n  }\n}\n\nTEST_F(SearchFamilyTest, ParseCSSResponse) {\n  using Fields = std::map<std::string, std::string>;\n  using Docs = std::map<std::string, Fields>;\n\n  std::string msg1 =\n      \"*17\\r\\n:8\\r\\n$2\\r\\ns0\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"0\\r\\n$2\\r\\ns3\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"3\\r\\n$2\\r\\ns7\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"7\\r\\n$2\\r\\ns8\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"8\\r\\n$2\\r\\ns4\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"4\\r\\n$2\\r\\ns9\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest 9\\r\\n\";\n\n  std::string msg2 =\n      \"$2\\r\\ns1\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest \"\n      \"1\\r\\n$2\\r\\ns5\\r\\n*2\\r\\n$5\\r\\ntitle\\r\\n$6\\r\\ntest 5\\r\\n\";\n\n  RESPParser reader;\n  auto reply = reader.Feed(msg1.c_str(), msg1.size());\n  ASSERT_TRUE(reply->Empty());\n\n  reply = reader.Feed(msg2.c_str(), msg2.size());\n  ASSERT_FALSE(reply->Empty());\n\n  EXPECT_EQ(reply->GetType(), RESPObj::Type::ARRAY);\n  auto array = *reply->As<RESPArray>();\n  EXPECT_GE(array.Size(), 1);\n  EXPECT_EQ(array[0].GetType(), RESPObj::Type::INTEGER);\n\n  Docs search_results;\n  for (size_t i = 1; i < array.Size(); i += 2) {\n    auto& fields = search_results[*array[i].As<std::string>()];\n\n    auto field_array = *array[i + 1].As<RESPArray>();\n\n    for (size_t j = 0; j < field_array.Size(); j += 2) {\n      std::string field_name = *field_array[j].As<std::string>();\n      std::string field_value = *field_array[j + 1].As<std::string>();\n\n      fields[field_name] = field_value;\n    }\n  }\n\n  EXPECT_EQ(search_results.size(), 8);\n\n  EXPECT_EQ(search_results[\"s0\"][\"title\"], \"test 0\");\n  EXPECT_EQ(search_results[\"s1\"][\"title\"], \"test 1\");\n  EXPECT_EQ(search_results[\"s3\"][\"title\"], \"test 3\");\n  EXPECT_EQ(search_results[\"s4\"][\"title\"], \"test 4\");\n  EXPECT_EQ(search_results[\"s5\"][\"title\"], \"test 5\");\n  EXPECT_EQ(search_results[\"s7\"][\"title\"], \"test 7\");\n  EXPECT_EQ(search_results[\"s8\"][\"title\"], \"test 8\");\n  EXPECT_EQ(search_results[\"s9\"][\"title\"], \"test 9\");\n}\n\nTEST_F(SearchFamilyTest, WithSortKeysOption) {\n  EXPECT_EQ(Run({\"ft.create\", \"users\", \"SCHEMA\", \"first_name\", \"TEXT\", \"SORTABLE\", \"last_name\",\n                 \"TEXT\", \"age\", \"NUMERIC\", \"SORTABLE\"}),\n            \"OK\");\n\n  Run({\"HSET\", \"user1\", \"first_name\", \"alice\", \"last_name\", \"jones\", \"age\", \"35\"});\n  Run({\"HSET\", \"user2\", \"first_name\", \"bob\", \"last_name\", \"jones\", \"age\", \"36\"});\n\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"users\", \"jones\", \"SORTBY\", \"age\", \"WITHSORTKEYS\", \"NOCONTENT\"}),\n              IsArray(IntArg(2), \"user1\", \"#35\", \"user2\", \"#36\"));\n\n  EXPECT_THAT(\n      Run({\"FT.SEARCH\", \"users\", \"jones\", \"SORTBY\", \"first_name\", \"WITHSORTKEYS\", \"NOCONTENT\"}),\n      IsArray(IntArg(2), \"user1\", \"$alice\", \"user2\", \"$bob\"));\n\n  EXPECT_THAT(Run({\"FT.SEARCH\", \"users\", \"jones\", \"WITHSORTKEYS\", \"NOCONTENT\"}),\n              IsArray(IntArg(2), \"user1\", ArgType(RespExpr::NIL), \"user2\", ArgType(RespExpr::NIL)));\n\n  EXPECT_THAT(\n      Run({\"FT.SEARCH\", \"users\", \"jones\", \"SORTBY\", \"last_name\", \"WITHSORTKEYS\"}),\n      IsUnordArray(IntArg(2),\n\n                   \"user2\", \"$jones\", IsMap(\"last_name\", \"jones\", \"first_name\", \"bob\", \"age\", \"36\"),\n                   \"user1\", \"$jones\",\n                   IsMap(\"last_name\", \"jones\", \"first_name\", \"alice\", \"age\", \"35\")));\n}\n\n// GEO index tests for FT.SEARCH with HASH and JSON documents\n\nTEST_F(SearchFamilyTest, GeoSearchHash) {\n  auto resp =\n      Run({\"FT.CREATE\", \"geo_idx\", \"ON\", \"HASH\", \"SCHEMA\", \"name\", \"TEXT\", \"location\", \"GEO\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Add documents with geo coordinates as \"lon,lat\" or \"lon lat\" format\n  Run({\"HSET\", \"city:1\", \"name\", \"Mountain View\", \"location\", \"-122.08, 37.386\"});\n  Run({\"HSET\", \"city:2\", \"name\", \"Palo Alto\", \"location\", \"-122.143, 37.444\"});\n  Run({\"HSET\", \"city:3\", \"name\", \"San Jose\", \"location\", \"-121.886, 37.338\"});\n  Run({\"HSET\", \"city:4\", \"name\", \"San Francisco\", \"location\", \"-122.419, 37.774\"});\n\n  // Search within 30 miles of Mountain View - should find nearby cities\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:[-122.08 37.386 30 mi]\"});\n  EXPECT_THAT(resp, AreDocIds(\"city:1\", \"city:2\", \"city:3\"));\n\n  // Search within 50 miles - should include San Francisco\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:[-122.08 37.386 50 mi]\"});\n  EXPECT_THAT(resp, AreDocIds(\"city:1\", \"city:2\", \"city:3\", \"city:4\"));\n\n  // Search with very small radius - only exact match\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:[-122.08 37.386 1 km]\"});\n  EXPECT_THAT(resp, AreDocIds(\"city:1\"));\n\n  // Search with wildcard - return all geo indexed docs\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"city:1\", \"city:2\", \"city:3\", \"city:4\"));\n\n  // Combine geo search with text search\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"San* @location:[-122.08 37.386 50 mi]\"});\n  EXPECT_THAT(resp, AreDocIds(\"city:3\", \"city:4\"));\n}\n\nTEST_F(SearchFamilyTest, GeoSearchJson) {\n  auto resp = Run({\"FT.CREATE\", \"geo_idx\", \"ON\", \"JSON\", \"SCHEMA\", \"$.name\", \"AS\", \"name\", \"TEXT\",\n                   \"$.location\", \"AS\", \"location\", \"GEO\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Add JSON documents with geo coordinates\n  Run({\"JSON.SET\", \"city:1\", \".\", R\"({\"name\":\"Mountain View\",\"location\":\"-122.08, 37.386\"})\"});\n  Run({\"JSON.SET\", \"city:2\", \".\", R\"({\"name\":\"Palo Alto\",\"location\":\"-122.143, 37.444\"})\"});\n  Run({\"JSON.SET\", \"city:3\", \".\", R\"({\"name\":\"San Jose\",\"location\":\"-121.886, 37.338\"})\"});\n  Run({\"JSON.SET\", \"city:4\", \".\", R\"({\"name\":\"San Francisco\",\"location\":\"-122.419, 37.774\"})\"});\n\n  // Search within 30 miles of Mountain View\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:[-122.08 37.386 30 mi]\"});\n  EXPECT_THAT(resp, AreDocIds(\"city:1\", \"city:2\", \"city:3\"));\n\n  // Search within 50 miles - should include San Francisco\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:[-122.08 37.386 50 mi]\"});\n  EXPECT_THAT(resp, AreDocIds(\"city:1\", \"city:2\", \"city:3\", \"city:4\"));\n\n  // Search with kilometers\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:[-122.08 37.386 50 km]\"});\n  EXPECT_THAT(resp, AreDocIds(\"city:1\", \"city:2\", \"city:3\"));\n\n  // Search with wildcard\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"city:1\", \"city:2\", \"city:3\", \"city:4\"));\n}\n\nTEST_F(SearchFamilyTest, GeoSearchInvalidValues) {\n  auto resp =\n      Run({\"FT.CREATE\", \"geo_idx\", \"ON\", \"HASH\", \"SCHEMA\", \"name\", \"TEXT\", \"location\", \"GEO\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Test documents with invalid geo values are excluded from index\n  Run({\"HSET\", \"d:1\", \"name\", \"valid\", \"location\", \"-122.08, 37.386\"});\n  Run({\"HSET\", \"d:2\", \"name\", \"invalid_text\", \"location\", \"not a coordinate\"});\n  Run({\"HSET\", \"d:3\", \"name\", \"missing_lon\", \"location\", \", 37.386\"});\n  Run({\"HSET\", \"d:4\", \"name\", \"missing_lat\", \"location\", \"-122.08,\"});\n  Run({\"HSET\", \"d:7\", \"name\", \"empty\", \"location\", \"\"});\n  Run({\"HSET\", \"d:8\", \"name\", \"no_location\"});\n  Run({\"HSET\", \"d:9\", \"name\", \"space_format\", \"location\", \"-122.08,  37.386\"});\n\n  // Only valid coordinates should be indexed (d:1 and d:9)\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"d:1\", \"d:9\"));\n\n  // Search should only find valid documents\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:[-122.08 37.386 100 mi]\"});\n  EXPECT_THAT(resp, AreDocIds(\"d:1\", \"d:9\"));\n\n  // All documents should still be searchable by other fields\n  // TODO: failed to add - silent skip?\n  // resp = Run({\"FT.SEARCH\", \"geo_idx\", \"*\"});\n  // EXPECT_THAT(resp, AreDocIds(\"d:1\", \"d:2\", \"d:3\", \"d:4\", \"d:5\", \"d:6\", \"d:7\", \"d:8\", \"d:9\"));\n}\n\nTEST_F(SearchFamilyTest, GeoSearchInvalidValuesJson) {\n  auto resp = Run({\"FT.CREATE\", \"geo_idx\", \"ON\", \"JSON\", \"SCHEMA\", \"$.name\", \"AS\", \"name\", \"TEXT\",\n                   \"$.location\", \"AS\", \"location\", \"GEO\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Test JSON documents with various invalid geo values\n  Run({\"JSON.SET\", \"j:1\", \".\", R\"({\"name\":\"valid\",\"location\":\"-122.08, 37.386\"})\"});\n  Run({\"JSON.SET\", \"j:2\", \".\", R\"({\"name\":\"invalid_text\",\"location\":\"not a coordinate\"})\"});\n  Run({\"JSON.SET\", \"j:3\", \".\", R\"({\"name\":\"number\",\"location\":12345})\"});\n  Run({\"JSON.SET\", \"j:4\", \".\", R\"({\"name\":\"null_value\",\"location\":null})\"});\n  Run({\"JSON.SET\", \"j:5\", \".\", R\"({\"name\":\"array\",\"location\":[\"-122.08\", \"37.386\"]})\"});\n  Run({\"JSON.SET\", \"j:6\", \".\", R\"({\"name\":\"no_location\"})\"});\n  Run({\"JSON.SET\", \"j:7\", \".\", R\"({\"name\":\"empty_string\",\"location\":\"\"})\"});\n  Run({\"JSON.SET\", \"j:8\", \".\", R\"({\"name\":\"valid 2\",\"location\":\"-122.08, 37.386\"})\"});\n\n  // Only valid coordinates should be indexed\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j:1\", \"j:8\"));\n\n  // All documents should still be searchable via full-text\n  // TODO: failed to add - silent skip?\n  // resp = Run({\"FT.SEARCH\", \"geo_idx\", \"*\"});\n  // EXPECT_THAT(resp, AreDocIds(\"j:1\", \"j:2\", \"j:3\", \"j:4\", \"j:5\", \"j:6\", \"j:7\", \"j:8\"));\n}\n\nTEST_F(SearchFamilyTest, GeoSearchUnits) {\n  auto resp = Run({\"FT.CREATE\", \"geo_idx\", \"ON\", \"HASH\", \"SCHEMA\", \"location\", \"GEO\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Test different distance units: m, km, mi, ft\n  // TODO: support lowercase\n  // TODO: support query with without dot for coord (i.e.) 0.0 0.0\n  Run({\"HSET\", \"p:1\", \"location\", \"0, 0\"});      // Origin\n  Run({\"HSET\", \"p:2\", \"location\", \"0.001, 0\"});  // ~111 meters east\n  Run({\"HSET\", \"p:3\", \"location\", \"0.01, 0\"});   // ~1.11 km east\n  Run({\"HSET\", \"p:4\", \"location\", \"0.1, 0\"});    // ~11.1 km east\n\n  // Test meters\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:[0.0 0.0 200 M]\"});\n  EXPECT_THAT(resp, AreDocIds(\"p:1\", \"p:2\"));\n\n  // Test kilometers\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:[0.0 0.0 2 KM]\"});\n  EXPECT_THAT(resp, AreDocIds(\"p:1\", \"p:2\", \"p:3\"));\n\n  // Test miles\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:[0.0 0.0 10 MI]\"});\n  EXPECT_THAT(resp, AreDocIds(\"p:1\", \"p:2\", \"p:3\", \"p:4\"));\n\n  // Test feet\n  resp = Run({\"FT.SEARCH\", \"geo_idx\", \"@location:[0.0 0.0 500 FT]\"});\n  EXPECT_THAT(resp, AreDocIds(\"p:1\", \"p:2\"));\n}\n\nTEST_F(SearchFamilyTest, HnswVectorRange) {\n  auto FloatToBytes = [](float f) -> string {\n    return string(reinterpret_cast<const char*>(&f), sizeof(float));\n  };\n\n  // 1-D HNSW index with an extra numeric field for SORTBY testing\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"SCHEMA\", \"pos\", \"VECTOR\", \"HNSW\", \"6\", \"TYPE\", \"FLOAT32\",\n       \"DIM\", \"1\", \"DISTANCE_METRIC\", \"L2\", \"val\", \"NUMERIC\"});\n\n  // 10 docs at positions 0..9, val = i*10\n  for (int i = 0; i < 10; i++) {\n    Run({\"HSET\", absl::StrFormat(\"k%d\", i), \"pos\", FloatToBytes(static_cast<float>(i)), \"val\",\n         absl::StrFormat(\"%d\", i * 10)});\n  }\n\n  string query_vec = FloatToBytes(5.0f);\n\n  // Basic range: query at 5.0, radius 1.5 → k4 (dist=1), k5 (dist=0), k6 (dist=1)\n  auto resp = Run({\"FT.SEARCH\", \"idx\", \"@pos:[VECTOR_RANGE 1.5 $vec]=>{$YIELD_DISTANCE_AS: dist}\",\n                   \"PARAMS\", \"2\", \"vec\", query_vec, \"LIMIT\", \"0\", \"10\"});\n  EXPECT_THAT(resp, AreDocIds(\"k4\", \"k5\", \"k6\"));\n\n  // Score alias is returned in each document by default\n  resp = Run({\"FT.SEARCH\", \"idx\", \"@pos:[VECTOR_RANGE 1.5 $vec]=>{$YIELD_DISTANCE_AS: dist}\",\n              \"PARAMS\", \"2\", \"vec\", query_vec, \"RETURN\", \"1\", \"dist\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  // Response: [total, key1, [field, val, ...], ...]\n  // Each doc should have \"dist\" in its fields\n  auto& arr = resp.GetVec();\n  ASSERT_GE(arr.size(), 3u);\n  for (size_t i = 2; i < arr.size(); i += 2) {\n    auto fields = arr[i].GetVec();\n    ASSERT_GE(fields.size(), 2u);\n    EXPECT_EQ(fields[0].GetString(), \"dist\");\n  }\n\n  // Large radius — all 10 docs returned\n  resp = Run({\"FT.SEARCH\", \"idx\", \"@pos:[VECTOR_RANGE 100 $vec]=>{$YIELD_DISTANCE_AS: dist}\",\n              \"PARAMS\", \"2\", \"vec\", query_vec, \"LIMIT\", \"0\", \"20\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_EQ(resp.GetVec()[0].GetInt(), 10);\n\n  // SORTBY val ASC — tests that sort_score is populated for non-score SORTBY\n  resp = Run({\"FT.SEARCH\", \"idx\", \"@pos:[VECTOR_RANGE 1.5 $vec]=>{$YIELD_DISTANCE_AS: dist}\",\n              \"PARAMS\", \"2\", \"vec\", query_vec, \"SORTBY\", \"val\", \"ASC\", \"RETURN\", \"1\", \"val\",\n              \"LIMIT\", \"0\", \"10\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  auto& asc_arr = resp.GetVec();\n  // Extract val values from response: [total, key, [val, v1], key, [val, v2], ...]\n  vector<int> vals_asc;\n  for (size_t i = 2; i < asc_arr.size(); i += 2) {\n    auto fields = asc_arr[i].GetVec();\n    ASSERT_GE(fields.size(), 2u);\n    vals_asc.push_back(stoi(fields[1].GetString()));\n  }\n  EXPECT_THAT(vals_asc, ElementsAre(40, 50, 60));\n\n  // SORTBY val DESC\n  resp = Run({\"FT.SEARCH\", \"idx\", \"@pos:[VECTOR_RANGE 1.5 $vec]=>{$YIELD_DISTANCE_AS: dist}\",\n              \"PARAMS\", \"2\", \"vec\", query_vec, \"SORTBY\", \"val\", \"DESC\", \"RETURN\", \"1\", \"val\",\n              \"LIMIT\", \"0\", \"10\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  auto& desc_arr = resp.GetVec();\n  vector<int> vals_desc;\n  for (size_t i = 2; i < desc_arr.size(); i += 2) {\n    auto fields = desc_arr[i].GetVec();\n    ASSERT_GE(fields.size(), 2u);\n    vals_desc.push_back(stoi(fields[1].GetString()));\n  }\n  EXPECT_THAT(vals_desc, ElementsAre(60, 50, 40));\n}\n\nTEST_F(SearchFamilyTest, GeoIndexFieldValidation) {\n  // Test 1: Correct geo field definition and usage with HASH\n  auto resp =\n      Run({\"FT.CREATE\", \"idx_hash\", \"ON\", \"HASH\", \"SCHEMA\", \"name\", \"TEXT\", \"coords\", \"GEO\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // Documents with correct geo fields\n  Run({\"HSET\", \"h:1\", \"name\", \"Location_A\", \"coords\", \"-122.4194, 37.7749\"});\n  Run({\"HSET\", \"h:2\", \"name\", \"Location_B\", \"coords\", \"-118.2437, 34.0522\"});\n\n  // Verify correct geo fields are indexed properly\n  resp = Run({\"FT.SEARCH\", \"idx_hash\", \"@coords:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"h:1\", \"h:2\"));\n\n  // Test geo search with correct fields\n  resp = Run({\"FT.SEARCH\", \"idx_hash\", \"@coords:[-122.4194 37.7749 50 mi]\"});\n  EXPECT_THAT(resp, AreDocIds(\"h:1\"));\n\n  // Test 2: Missing geo fields\n  Run({\"HSET\", \"h:3\", \"name\", \"No_Coords\"});  // Missing coords field entirely\n\n  // Documents with missing geo fields should not appear in geo queries\n  resp = Run({\"FT.SEARCH\", \"idx_hash\", \"@coords:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"h:1\", \"h:2\"));\n\n  // But should still be searchable by text fields\n  resp = Run({\"FT.SEARCH\", \"idx_hash\", \"@name:No_Coords\"});\n  EXPECT_THAT(resp, AreDocIds(\"h:3\"));\n\n  // Test 3: Incorrect geo field formats\n  Run({\"HSET\", \"h:4\", \"name\", \"Empty_Coords\", \"coords\", \"\"});  // Empty coords field\n  Run({\"HSET\", \"h:5\", \"name\", \"Invalid_Text\", \"coords\", \"not a coordinate\"});\n  Run({\"HSET\", \"h:6\", \"name\", \"Out_of_Range_Lat\", \"coords\", \"-122.0, 91.0\"});  // Lat > 90\n  Run({\"HSET\", \"h:7\", \"name\", \"Out_of_Range_Lon\", \"coords\", \"181.0, 45.0\"});   // Lon > 180\n  Run({\"HSET\", \"h:8\", \"name\", \"Missing_Lon\", \"coords\", \", 37.7749\"});\n  Run({\"HSET\", \"h:9\", \"name\", \"Missing_Lat\", \"coords\", \"-122.4194,\"});\n  Run({\"HSET\", \"h:10\", \"name\", \"Single_Value\", \"coords\", \"-122.4194\"});\n  Run({\"HSET\", \"h:11\", \"name\", \"Too_Many_Values\", \"coords\", \"-122.4194, 37.7749, 100\"});\n  Run({\"HSET\", \"h:12\", \"name\", \"Special_Chars\", \"coords\", \"abc#@!, xyz!@#\"});\n\n  // Verify incorrect formats are not indexed in geo field\n  resp = Run({\"FT.SEARCH\", \"idx_hash\", \"@coords:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"h:1\", \"h:2\"));\n\n  // Verify incorrect formats are not indexed at all\n  resp = Run({\"FT.SEARCH\", \"idx_hash\", \"*\"});\n  EXPECT_THAT(resp, AreDocIds(\"h:1\", \"h:2\", \"h:3\"));\n\n  // Test 4: Correct geo field definition with JSON\n  resp = Run({\"FT.CREATE\", \"idx_json\", \"ON\", \"JSON\", \"SCHEMA\", \"$.name\", \"AS\", \"name\", \"TEXT\",\n              \"$.location\", \"AS\", \"location\", \"GEO\"});\n  EXPECT_EQ(resp, \"OK\");\n\n  // JSON documents with correct geo fields\n  Run({\"JSON.SET\", \"j:1\", \".\", R\"({\"name\":\"City_A\",\"location\":\"-122.4194, 37.7749\"})\"});\n  Run({\"JSON.SET\", \"j:2\", \".\", R\"({\"name\":\"City_B\",\"location\":\"-118.2437, 34.0522\"})\"});\n\n  // Verify correct geo fields are indexed\n  resp = Run({\"FT.SEARCH\", \"idx_json\", \"@location:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j:1\", \"j:2\"));\n\n  // Test 5: JSON documents with missing geo fields\n  Run({\"JSON.SET\", \"j:3\", \".\", R\"({\"name\":\"No_Location\"})\"});  // Missing location field\n  Run({\"JSON.SET\", \"j:4\", \".\", R\"({\"name\":\"Null_Location\",\"location\":null})\"});  // Null value\n\n  // Missing/null geo fields should not appear in geo queries\n  resp = Run({\"FT.SEARCH\", \"idx_json\", \"@location:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j:1\", \"j:2\"));\n\n  // But should be searchable by text\n  resp = Run({\"FT.SEARCH\", \"idx_json\", \"@name:*Location\"});\n  EXPECT_THAT(resp, AreDocIds(\"j:3\", \"j:4\"));\n\n  // Test 6: JSON documents with incorrect geo field types/formats\n  Run({\"JSON.SET\", \"j:5\", \".\", R\"({\"name\":\"Empty_Location\",\"location\":\"\"})\"});  // Empty string\n  Run({\"JSON.SET\", \"j:6\", \".\", R\"({\"name\":\"Number_Type\",\"location\":12345})\"});\n  Run({\"JSON.SET\", \"j:7\", \".\", R\"({\"name\":\"Boolean_Type\",\"location\":true})\"});\n  Run({\"JSON.SET\", \"j:8\", \".\", R\"({\"name\":\"Array_Type\",\"location\":[\"-122.4\", \"37.7\"]})\"});\n  Run({\"JSON.SET\", \"j:9\", \".\", R\"({\"name\":\"Object_Type\",\"location\":{\"lon\":-122.4,\"lat\":37.7}})\"});\n  Run({\"JSON.SET\", \"j:10\", \".\", R\"({\"name\":\"Invalid_Format\",\"location\":\"invalid coords\"})\"});\n  Run({\"JSON.SET\", \"j:11\", \".\", R\"({\"name\":\"Out_of_Range\",\"location\":\"200, 100\"})\"});\n\n  // Verify incorrect types/formats are not indexed as geo\n  resp = Run({\"FT.SEARCH\", \"idx_json\", \"@location:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j:1\", \"j:2\"));\n\n  // Documents with incorrect geo formats should still be searchable by text\n  resp = Run({\"FT.SEARCH\", \"idx_json\", \"@name:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j:1\", \"j:2\", \"j:3\", \"j:4\"));\n\n  // Test 7: Adding multiple locations for same document should index all locations\n  Run({\"JSON.SET\", \"j:12\", \".\",\n       R\"({\"name\":\"Multi_Locations\",\"location\":[\"-123.00, 12.00\", \"-124.0, 12.0\"]})\"});\n\n  resp = Run({\"FT.SEARCH\", \"idx_json\", \"@location:[-123.00 12.00 1 m]\"});\n  EXPECT_THAT(resp, AreDocIds(\"j:12\"));\n\n  resp = Run({\"FT.SEARCH\", \"idx_json\", \"@location:[-124.00 12.00 1 m]\"});\n  EXPECT_THAT(resp, AreDocIds(\"j:12\"));\n\n  // Check that we return only one document even if multiple locations match\n  resp = Run({\"FT.SEARCH\", \"idx_json\", \"@location:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j:1\", \"j:2\", \"j:12\"));\n\n  resp = Run({\"FT.SEARCH\", \"idx_json\", \"@location:[-124.00 12.00 1000 km]\"});\n  EXPECT_THAT(resp, AreDocIds(\"j:12\"));\n\n  // Deleting multi location document should remove all locations\n  Run({\"JSON.DEL\", \"j:12\"});\n  resp = Run({\"FT.SEARCH\", \"idx_json\", \"@location:*\"});\n  EXPECT_THAT(resp, AreDocIds(\"j:1\", \"j:2\"));\n}\n\nTEST_F(SearchFamilyTest, VectorFieldWrongSizeDoesNotCrash) {\n  // DIM=1 FLOAT32 expects exactly 4 bytes per value.\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"SCHEMA\", \"pos\", \"VECTOR\", \"HNSW\", \"6\", \"TYPE\", \"FLOAT32\",\n       \"DIM\", \"1\", \"DISTANCE_METRIC\", \"L2\"});\n\n  // Insert values with wrong byte lengths (6 and 7 bytes instead of 4).\n  Run({\"HSET\", \"k1\", \"pos\", \"AAAAAAA\"});  // 7 bytes\n  Run({\"HSET\", \"k2\", \"pos\", \"AQAAAA\"});   // 6 bytes\n  Run({\"HSET\", \"k3\", \"pos\", \"AgAAAA\"});   // 6 bytes\n\n  // FT.SEARCH must not crash when serializing the wrong-sized vector fields.\n  auto resp = Run({\"FT.SEARCH\", \"idx\", \"*\", \"PARAMS\", \"2\", \"vec\", \"AQAAAA\", \"LIMIT\", \"0\", \"10\"});\n  EXPECT_THAT(resp, Not(ErrArg(\"\")));\n\n  // Same scenario with 10-byte values and multiple keys.\n  Run({\"FT.CREATE\", \"idx2\", \"ON\", \"HASH\", \"SCHEMA\", \"v\", \"VECTOR\", \"HNSW\", \"6\", \"TYPE\", \"FLOAT32\",\n       \"DIM\", \"1\", \"DISTANCE_METRIC\", \"L2\"});\n  Run({\"HSET\", \"a1\", \"v\", \"aaaaaaaaaa\"});  // 10 bytes\n  Run({\"HSET\", \"a2\", \"v\", \"bbbbbbbbbb\"});\n  Run({\"HSET\", \"a3\", \"v\", \"cccccccccc\"});\n  Run({\"HSET\", \"a4\", \"v\", \"dddddddddd\"});\n  Run({\"HSET\", \"a5\", \"v\", \"eeeeeeeeee\"});\n\n  resp = Run({\"FT.SEARCH\", \"idx2\", \"*\", \"PARAMS\", \"2\", \"vec\", \"aaaaaaaaaa\", \"LIMIT\", \"0\", \"100\"});\n  EXPECT_THAT(resp, Not(ErrArg(\"\")));\n}\n\nTEST_F(SearchFamilyTest, SortBySkipsDocsWithoutSortField) {\n  // KeepTopKSorted skips docs that don't have the sort field, returning fewer sort scores\n  // than result.ids.size(). The loop then accesses sort_scores[i] out-of-bounds.\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"SCHEMA\", \"val\", \"NUMERIC\"});\n\n  Run({\"HSET\", \"valid:1\", \"val\", \"123\"});\n  Run({\"HSET\", \"valid:2\", \"val\", \"456\"});\n  Run({\"HSET\", \"valid:3\", \"val\", \"789\"});\n\n  // These docs are indexed (no prefix restriction) but lack the sort field.\n  // They appear in '*' search results but are skipped by KeepTopKSorted.\n  for (int i = 0; i < 97; i++)\n    Run({\"HSET\", absl::StrCat(\"nofield:\", i), \"txt\", \"garbage\"});\n\n  auto resp = Run({\"FT.SEARCH\", \"idx\", \"*\", \"SORTBY\", \"val\", \"LIMIT\", \"0\", \"100\"});\n  auto vec = resp.GetVec();\n\n  // Extract doc keys from the response (indices 1, 3, 5, ...).\n  vector<string> keys;\n  for (size_t i = 1; i < vec.size(); i += 2)\n    keys.push_back(vec[i].GetString());\n\n  EXPECT_THAT(keys, ElementsAre(\"valid:1\", \"valid:2\", \"valid:3\"));\n}\n\nTEST_F(SearchFamilyTest, NumericIndexRejectsNonFiniteValues) {\n  // Regression test: HSET with inf/nan values on a NUMERIC field used to crash with\n  // DCHECK(std::isfinite(value)) in RangeTree::Add, because absl::SimpleAtod accepts\n  // \"inf\", \"-inf\", \"nan\" etc. as valid doubles.\n  Run({\"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"SCHEMA\", \"val\", \"NUMERIC\"});\n\n  Run({\"HSET\", \"doc:1\", \"val\", \"inf\"});\n  Run({\"HSET\", \"doc:2\", \"val\", \"-inf\"});\n  Run({\"HSET\", \"doc:3\", \"val\", \"+inf\"});\n  Run({\"HSET\", \"doc:4\", \"val\", \"nan\"});\n  Run({\"HSET\", \"doc:5\", \"val\", \"42\"});  // finite — must still be indexed\n\n  // Non-finite docs are not in the numeric index; only doc:5 should match the range query.\n  auto resp = Run({\"FT.SEARCH\", \"idx\", \"@val:[-inf +inf]\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(1), \"doc:5\", _)));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/serializer_base.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/serializer_base.h\"\n\n#include \"base/logging.h\"\n\nnamespace dfly {\n\nSerializerBase::SerializerBase(DbSlice* slice) : db_slice_(slice) {\n}\n\nSerializerBase::~SerializerBase() {\n}\n\nuint64_t SerializerBase::RegisterChangeListener() {\n  DCHECK(db_slice_);\n  auto cb = [this](DbIndex db_index, const DbSlice::ChangeReq& req) {\n    HandleChangeReq(db_index, req);\n  };\n  snapshot_version_ = db_slice_->RegisterOnChange(std::move(cb));\n  return snapshot_version_;\n}\n\nvoid SerializerBase::UnregisterChangeListener() {\n  if (snapshot_version_ == 0)\n    return;\n  DCHECK(db_slice_);\n  db_slice_->UnregisterOnChange(snapshot_version_);\n  snapshot_version_ = 0;\n}\n\nvoid SerializerBase::MarkBucketSerializing(BucketIdentity bid) {\n  DCHECK(!bucket_states_.contains(bid)) << \"Bucket already in transient state\";\n  bucket_states_.emplace(bid, BucketState{BucketPhase::kSerializing, {}});\n}\n\nvoid SerializerBase::FinishBucketIteration(BucketIdentity bid,\n                                           std::vector<TieredDelayedEntry> delayed) {\n  auto it = bucket_states_.find(bid);\n  DCHECK(it != bucket_states_.end() && it->second.phase == BucketPhase::kSerializing);\n\n  if (delayed.empty()) {\n    // Serializing -> Covered\n    bucket_states_.erase(it);\n  } else {\n    // Serializing -> DelayedPending\n    it->second.phase = BucketPhase::kDelayedPending;\n    it->second.delayed = std::move(delayed);\n  }\n}\n\nvoid SerializerBase::CompleteBucketDelayed(BucketIdentity bid) {\n  auto it = bucket_states_.find(bid);\n  DCHECK(it != bucket_states_.end() && it->second.phase == BucketPhase::kDelayedPending);\n  bucket_states_.erase(it);\n}\n\nvoid SerializerBase::OnChange(DbIndex db_index, PrimeTable::bucket_iterator it) {\n  std::lock_guard guard(big_value_mu_);\n\n  if (it.is_done() || it.GetVersion() >= snapshot_version_) {\n    ++stats_.buckets_skipped;\n    return;\n  }\n\n  BucketIdentity bid = it.bucket_address();\n  if (bucket_states_.contains(bid)) {\n    ++stats_.change_during_serialization;\n    return;\n  }\n\n  it.SetVersion(snapshot_version_);\n  MarkBucketSerializing(bid);\n  DoSerializeBucket(db_index, it);\n  FinishBucketIteration(bid, {});\n  ++stats_.buckets_on_change;\n}\n\nvoid SerializerBase::OnInsert(DbIndex db_index, std::string_view key) {\n  DCHECK(db_slice_);\n  PrimeTable* table = db_slice_->GetTables(db_index).first;\n  table->CVCUponInsert(snapshot_version_, key, [this, db_index](PrimeTable::bucket_iterator bit) {\n    DCHECK_LT(bit.GetVersion(), snapshot_version_);\n    OnChange(db_index, bit);\n  });\n}\n\nvoid SerializerBase::HandleChangeReq(DbIndex db_index, const DbSlice::ChangeReq& req) {\n  if (auto update = req.update(); update) {\n    OnChange(db_index, *update);\n  } else {\n    OnInsert(db_index, std::get<std::string_view>(req.change));\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/serializer_base.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <vector>\n\n#include \"server/db_slice.h\"\n#include \"server/synchronization.h\"\n#include \"server/table.h\"\n#include \"server/tiered_storage.h\"\n\nnamespace dfly {\n\n// Opaque identity for a physical DashTable bucket — its memory address.\n// Unique across all databases/segments for the lifetime of a serialization.\nusing BucketIdentity = uintptr_t;\n\n// SerializerBase owns the DbSlice change-listener registration and a per-bucket\n// state machine that tracks each bucket through:\n//\n//   NotVisited  ->  Serializing  ->  (DelayedPending  ->)  Covered\n//\n// NotVisited and Covered are implicit (bucket absent from the map).\n// Only transient states (Serializing, DelayedPending) are stored in the map.\n//\n// State tracking is purely observational in early PRs: it drives DCHECKs and\n// stats but does not alter the serialization control flow.\nclass SerializerBase {\n public:\n  // Aggregated counters for observability.\n  struct Stats {\n    uint64_t buckets_loop = 0;       // main traversal loop\n    uint64_t buckets_on_change = 0;  // OnChange callback fired\n    uint64_t buckets_skipped = 0;    // already Covered when seen\n    uint64_t keys_serialized = 0;\n    uint64_t change_during_serialization = 0;  // change hit an in-flight bucket\n  };\n\n  explicit SerializerBase(DbSlice* slice);\n  virtual ~SerializerBase();\n\n  // Registers a ChangeCallback with DbSlice.  Returns the snapshot version\n  // (version upper-bound for entries that must be saved).\n  uint64_t RegisterChangeListener();\n\n  // Unregisters the callback.  Safe to call if already unregistered.\n  void UnregisterChangeListener();\n\n  uint64_t snapshot_version() const {\n    return snapshot_version_;\n  }\n\n  const Stats& GetStats() const {\n    return stats_;\n  }\n\n protected:\n  // Phase of an in-flight bucket (only stored while transient).\n  enum class BucketPhase : uint8_t {\n    kSerializing,     // bucket is being iterated by the main loop / OnChange\n    kDelayedPending,  // all entries serialized but tiered reads still in-flight\n  };\n\n  struct BucketState {\n    BucketPhase phase;\n    std::vector<TieredDelayedEntry> delayed;\n  };\n\n  // --- Bucket state machine ---\n\n  // Transition bucket from NotVisited -> Serializing.\n  // Must be called before DoSerializeBucket.  Caller is responsible for\n  // stamping the bucket version to snapshot_version_ first.\n  void MarkBucketSerializing(BucketIdentity bid);\n\n  // Transition bucket from Serializing -> Covered (empty delayed) or\n  // Serializing -> DelayedPending (non-empty delayed).\n  // Takes ownership of the delayed entries.\n  void FinishBucketIteration(BucketIdentity bid, std::vector<TieredDelayedEntry> delayed);\n\n  // Transition bucket from DelayedPending -> Covered.\n  void CompleteBucketDelayed(BucketIdentity bid);\n\n  // --- Subclass serialization hook ---\n\n  // Serialize a single bucket.  Returns the number of entries serialized.\n  // Called while big_value_mu_ is held.\n  virtual unsigned DoSerializeBucket(DbIndex db_index, PrimeTable::bucket_iterator it) = 0;\n\n  // --- Change callbacks ---\n\n  // Called when an existing bucket is about to be mutated.\n  // Default: if unvisited, stamps version, MarkBucketSerializing, DoSerializeBucket,\n  //          FinishBucketIteration.\n  //          If in-flight, increments change_during_serialization (mutex barrier\n  //          preserves the existing serialization behaviour).\n  // Holds big_value_mu_ while running.\n  virtual void OnChange(DbIndex db_index, PrimeTable::bucket_iterator it);\n\n  // Called when a new key is about to be inserted.\n  // Default: CVCUponInsert -> OnChange for every touched bucket.\n  virtual void OnInsert(DbIndex db_index, std::string_view key);\n\n  // --- Shared members (to be moved from subclasses in later PRs) ---\n\n  DbSlice* db_slice_;\n  DbTableArray db_array_;\n  uint64_t snapshot_version_ = 0;\n  ThreadLocalMutex big_value_mu_;\n  Stats stats_;\n\n private:\n  // Called by DbSlice when a change is detected.\n  void HandleChangeReq(DbIndex db_index, const DbSlice::ChangeReq& req);\n\n  absl::flat_hash_map<BucketIdentity, BucketState> bucket_states_;\n  uint64_t change_cb_id_ = 0;\n\n  // For unit-test only.\n  size_t BucketStateCountForTesting() const {\n    return bucket_states_.size();\n  }\n  friend class SerializerBaseTest;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/serializer_base_test.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/serializer_base.h\"\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"server/test_utils.h\"\n\nnamespace dfly {\n\nclass SerializerBaseTest : public BaseFamilyTest, public SerializerBase {\n public:\n  SerializerBaseTest() : SerializerBase(nullptr) {\n  }\n\n protected:\n  using SerializerBase::BucketPhase;\n  using SerializerBase::CompleteBucketDelayed;\n  using SerializerBase::FinishBucketIteration;\n  using SerializerBase::MarkBucketSerializing;\n\n  size_t BucketCount() const {\n    return BucketStateCountForTesting();\n  }\n\n  unsigned DoSerializeBucket(DbIndex /*db_index*/, PrimeTable::bucket_iterator /*it*/) override {\n    return 0;\n  }\n};\n\n// --- State-machine tests ---\n\nTEST_F(SerializerBaseTest, MarkThenFinishNoneDelayed) {\n  constexpr BucketIdentity bid = 0x1000;\n\n  EXPECT_EQ(0u, BucketCount());\n  MarkBucketSerializing(bid);\n  EXPECT_EQ(1u, BucketCount());\n\n  FinishBucketIteration(bid, {});\n  EXPECT_EQ(0u, BucketCount());\n}\n\nTEST_F(SerializerBaseTest, MarkThenFinishWithDelayedThenComplete) {\n  constexpr BucketIdentity bid = 0x2000;\n\n  MarkBucketSerializing(bid);\n  EXPECT_EQ(1u, BucketCount());\n\n  // Simulate one delayed (tiered) entry.\n  std::vector<TieredDelayedEntry> delayed;\n  delayed.push_back({});\n  FinishBucketIteration(bid, std::move(delayed));\n\n  EXPECT_EQ(1u, BucketCount());\n\n  CompleteBucketDelayed(bid);\n  EXPECT_EQ(0u, BucketCount());\n}\n\nTEST_F(SerializerBaseTest, MultipleBucketsIndependent) {\n  constexpr BucketIdentity bid1 = 0x1000;\n  constexpr BucketIdentity bid2 = 0x2000;\n  constexpr BucketIdentity bid3 = 0x3000;\n\n  MarkBucketSerializing(bid1);\n  MarkBucketSerializing(bid2);\n  MarkBucketSerializing(bid3);\n  EXPECT_EQ(3u, BucketCount());\n\n  FinishBucketIteration(bid2, {});\n  EXPECT_EQ(2u, BucketCount());\n\n  std::vector<TieredDelayedEntry> d;\n  d.push_back({});\n  FinishBucketIteration(bid1, std::move(d));\n  EXPECT_EQ(2u, BucketCount());\n\n  FinishBucketIteration(bid3, {});\n  EXPECT_EQ(1u, BucketCount());\n\n  CompleteBucketDelayed(bid1);\n  EXPECT_EQ(0u, BucketCount());\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/serializer_commons.cc",
    "content": "#include \"server/serializer_commons.h\"\n\nextern \"C\" {\n#include \"redis/rdb.h\"\n}\n\n#include <absl/base/internal/endian.h>\n\n#include <system_error>\n\n#include \"base/logging.h\"\n\nusing namespace std;\n\nnamespace dfly {\n\nint PackedUIntMeta::Type() const {\n  return (first_byte & 0xC0) >> 6;\n}\n\nunsigned PackedUIntMeta::ByteSize() const {\n  switch (Type()) {\n    case RDB_ENCVAL:\n    case RDB_6BITLEN:\n      return 0;\n    case RDB_14BITLEN:\n      return 1;\n  };\n  switch (first_byte) {\n    case RDB_32BITLEN:\n      return 4;\n    case RDB_64BITLEN:\n      return 8;\n  };\n  return 0;\n}\n\n/* Saves an encoded unsigned integer. The first two bits in the first byte are used to\n * hold the encoding type. See the RDB_* definitions for more information\n * on the types of encoding. buf must be at least 9 bytes.\n * */\nunsigned WritePackedUInt(uint64_t value, io::MutableBytes buf) {\n  if (value < (1 << 6)) {\n    /* Save a 6 bit value */\n    buf[0] = (value & 0xFF) | (RDB_6BITLEN << 6);\n    return 1;\n  }\n\n  if (value < (1 << 14)) {\n    /* Save a 14 bit value */\n    buf[0] = ((value >> 8) & 0xFF) | (RDB_14BITLEN << 6);\n    buf[1] = value & 0xFF;\n    return 2;\n  }\n\n  if (value <= UINT32_MAX) {\n    /* Save a 32 bit value */\n    buf[0] = RDB_32BITLEN;\n    absl::big_endian::Store32(buf.data() + 1, value);\n    return 1 + 4;\n  }\n\n  /* Save a 64 bit value */\n  buf[0] = RDB_64BITLEN;\n  absl::big_endian::Store64(buf.data() + 1, value);\n  return 1 + 8;\n}\n\nio::Result<uint64_t> ReadPackedUInt(PackedUIntMeta meta, io::Bytes bytes) {\n  DCHECK(meta.ByteSize() <= bytes.size());\n  switch (meta.Type()) {\n    case RDB_ENCVAL:\n    case RDB_6BITLEN:\n      return meta.first_byte & 0x3F;\n    case RDB_14BITLEN:\n      return ((meta.first_byte & 0x3F) << 8) | bytes[0];\n  };\n  switch (meta.first_byte) {\n    case RDB_32BITLEN:\n      return absl::big_endian::Load32(bytes.data());\n    case RDB_64BITLEN:\n      return absl::big_endian::Load64(bytes.data());\n  };\n  return make_unexpected(make_error_code(errc::illegal_byte_sequence));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/serializer_commons.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <nonstd/expected.hpp>\n#include <system_error>\n\n#include \"io/io.h\"\n#include \"server/error.h\"\n\nnamespace dfly {\n\nusing nonstd::make_unexpected;\n\n#define SET_OR_RETURN(expr, dest)              \\\n  do {                                         \\\n    auto exp_val = (expr);                     \\\n    if (!exp_val) {                            \\\n      VLOG(1) << \"Error while calling \" #expr; \\\n      return exp_val.error();                  \\\n    }                                          \\\n    dest = std::move(exp_val.value());         \\\n  } while (0)\n\n#define SET_OR_UNEXPECT(expr, dest)            \\\n  {                                            \\\n    auto exp_res = (expr);                     \\\n    if (!exp_res)                              \\\n      return make_unexpected(exp_res.error()); \\\n    dest = std::move(exp_res.value());         \\\n  }\n\n// Represents meta information for an encoded packed unsigned integer.\nstruct PackedUIntMeta {\n  // Initialize by first byte in sequence.\n  PackedUIntMeta(uint8_t first_byte) : first_byte{first_byte} {\n  }\n\n  // Get underlying RDB type.\n  int Type() const;\n\n  // Get additional size in bytes (excluding first one).\n  unsigned ByteSize() const;\n\n  uint8_t first_byte;\n};\n\n// Saves an packed unsigned integer. The first two bits in the first byte are used to\n// hold the encoding type. See the RDB_* definitions for more information\n// on the types of encoding. buf must be at least 9 bytes.\nunsigned WritePackedUInt(uint64_t value, io::MutableBytes dest);\n\n// Deserialize packed unsigned integer.\nio::Result<uint64_t> ReadPackedUInt(PackedUIntMeta meta, io::Bytes source);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/server_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/server_family.h\"\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/random/random.h>  // for master_replid_ generation.\n#include <absl/strings/match.h>\n#include <absl/strings/str_join.h>\n#include <absl/strings/str_replace.h>\n#include <absl/strings/strip.h>\n#include <croncpp.h>  // cron::cronexpr\n#include <fcntl.h>    // for mkstemp\n#include <hdr/hdr_histogram.h>\n#include <sys/resource.h>\n#include <sys/stat.h>  // for fchmod\n#include <sys/utsname.h>\n#include <unistd.h>  // for getpid(), write(), close(), unlink(), fsync()\n\n#include <algorithm>\n#include <chrono>\n#include <filesystem>\n#include <fstream>\n#include <optional>\n#include <unordered_map>\n#include <unordered_set>\n\n#include \"absl/strings/ascii.h\"\n#include \"core/detail/gen_utils.h\"\n#include \"facade/error.h\"\n#include \"server/common.h\"\n#include \"server/slowlog.h\"\n\nextern \"C\" {\n#include \"redis/redis_aux.h\"\n}\n\n#include \"base/flags.h\"\n#include \"base/histogram.h\"\n#include \"base/logging.h\"\n#include \"core/compact_object.h\"\n#include \"core/dense_set.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/dragonfly_listener.h\"\n#include \"facade/reply_builder.h\"\n#include \"io/file_util.h\"\n#include \"io/proc_reader.h\"\n#include \"search/doc_index.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/acl/user_registry.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/debugcmd.h\"\n#include \"server/detail/save_stages_controller.h\"\n#include \"server/detail/snapshot_storage.h\"\n#include \"server/dflycmd.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/generic_family.h\"\n#include \"server/journal/journal.h\"\n#include \"server/main_service.h\"\n#include \"server/memory_cmd.h\"\n#include \"server/multi_command_squasher.h\"\n#include \"server/namespaces.h\"\n#include \"server/rdb_load.h\"\n#include \"server/rdb_save.h\"\n#include \"server/replica.h\"\n#include \"server/script_mgr.h\"\n#include \"server/search/search_family.h\"\n#include \"server/server_state.h\"\n#include \"server/snapshot.h\"\n#include \"server/tiered_storage.h\"\n#include \"server/transaction.h\"\n#include \"server/version.h\"\n#include \"strings/human_readable.h\"\n#include \"util/accept_server.h\"\n#include \"util/aws/aws.h\"\n\nusing namespace std;\n\nstruct ReplicaOfFlag {\n  string host;\n  string port;\n\n  bool has_value() const {\n    return !host.empty() && !port.empty();\n  }\n};\n\nstatic bool AbslParseFlag(std::string_view in, ReplicaOfFlag* flag, std::string* err);\nstatic std::string AbslUnparseFlag(const ReplicaOfFlag& flag);\n\nstruct CronExprFlag {\n  static constexpr std::string_view kCronPrefix = \"0 \"sv;\n  std::optional<cron::cronexpr> cron_expr;\n};\n\nstatic bool AbslParseFlag(std::string_view in, CronExprFlag* flag, std::string* err);\nstatic std::string AbslUnparseFlag(const CronExprFlag& flag);\n\nABSL_FLAG(string, dir, \"\", \"working directory\");\nABSL_FLAG(string, dbfilename, \"dump-{timestamp}\",\n          \"the filename to save/load the DB, instead of/with {timestamp} can be used {Y}, {m}, and \"\n          \"{d} macros\");\nABSL_FLAG(string, requirepass, \"\",\n          \"password for AUTH authentication. \"\n          \"If empty can also be set with DFLY_PASSWORD environment variable.\");\nABSL_FLAG(uint32_t, maxclients, 64000, \"Maximum number of concurrent clients allowed.\");\n\nABSL_FLAG(string, save_schedule, \"\", \"the flag is deprecated, please use snapshot_cron instead\");\nABSL_FLAG(CronExprFlag, snapshot_cron, {},\n          \"cron expression for the time to save a snapshot, crontab style\");\nABSL_FLAG(bool, df_snapshot_format, true,\n          \"if true, save in dragonfly-specific snapshotting format\");\nABSL_FLAG(int, epoll_file_threads, 0,\n          \"thread size for file workers when running in epoll mode, default is hardware concurrent \"\n          \"threads\");\nABSL_FLAG(ReplicaOfFlag, replicaof, ReplicaOfFlag{},\n          \"Specifies a host and port which point to a target master \"\n          \"to replicate. \"\n          \"Format should be <IPv4>:<PORT> or host:<PORT> or [<IPv6>]:<PORT>\");\nABSL_FLAG(int32_t, slowlog_log_slower_than, 10000,\n          \"Add commands slower than this threshold to slow log. The value is expressed in \"\n          \"microseconds and if it's negative - disables the slowlog.\");\nABSL_FLAG(uint32_t, slowlog_max_len, 20, \"Slow log maximum length.\");\n\nABSL_FLAG(uint32_t, pause_wait_timeout, 1,\n          \"Timeout in seconds, to set up the pause for all connections for CLIENT PAUSE command \"\n          \"and cluster slot migration finalization procedure.\");\n\nABSL_FLAG(string, s3_endpoint, \"\", \"endpoint for s3 snapshots, default uses aws regional endpoint\");\nABSL_FLAG(bool, s3_use_https, true, \"whether to use https for s3 endpoints\");\n// Disable EC2 metadata by default, or if a users credentials are invalid the\n// AWS client will spent 30s trying to connect to inaccessable EC2 endpoints\n// to load the credentials.\nABSL_FLAG(bool, s3_ec2_metadata, false,\n          \"whether to load credentials and configuration from EC2 metadata\");\n// Enables S3 payload signing over HTTP. This reduces the latency and resource\n// usage when writing snapshots to S3, at the expense of security.\nABSL_FLAG(bool, s3_sign_payload, true,\n          \"whether to sign the s3 request payload when uploading snapshots\");\n\nABSL_FLAG(bool, info_replication_valkey_compatible, true,\n          \"when true - output valkey compatible values for info-replication\");\n\nABSL_FLAG(bool, managed_service_info, false,\n          \"Hides some implementation details from users when true (i.e. in managed service env)\");\n\nABSL_FLAG(string, availability_zone, \"\",\n          \"server availability zone, used by clients to read from local-zone replicas\");\n\nABSL_FLAG(bool, keep_legacy_memory_metrics, true, \"legacy metrics format\");\n// TODO deprecate when flipped in production\nABSL_FLAG(bool, replicaof_no_one_start_journal, true,\n          \"when set, preserves journal offsets after REPLICAOF NO ONE\");\n\nABSL_DECLARE_FLAG(int32_t, port);\nABSL_DECLARE_FLAG(bool, cache_mode);\nABSL_DECLARE_FLAG(int32_t, hz);\nABSL_DECLARE_FLAG(bool, tls);\nABSL_DECLARE_FLAG(string, tls_ca_cert_file);\nABSL_DECLARE_FLAG(string, tls_ca_cert_dir);\nABSL_DECLARE_FLAG(int, replica_priority);\nABSL_DECLARE_FLAG(double, rss_oom_deny_ratio);\nABSL_DECLARE_FLAG(bool, experimental_replicaof_v2);\n\nbool AbslParseFlag(std::string_view in, ReplicaOfFlag* flag, std::string* err) {\n#define RETURN_ON_ERROR(cond, m)                                           \\\n  do {                                                                     \\\n    if ((cond)) {                                                          \\\n      *err = m;                                                            \\\n      LOG(WARNING) << \"Error in parsing arguments for --replicaof: \" << m; \\\n      return false;                                                        \\\n    }                                                                      \\\n  } while (0)\n\n  if (in.empty()) {  // on empty flag \"parse\" nothing. If we return false then DF exists.\n    *flag = ReplicaOfFlag{};\n    return true;\n  }\n\n  auto pos = in.find_last_of(':');\n  RETURN_ON_ERROR(pos == string::npos, \"missing ':'.\");\n\n  string_view ip = in.substr(0, pos);\n  flag->port = in.substr(pos + 1);\n\n  RETURN_ON_ERROR(ip.empty() || flag->port.empty(), \"IP/host or port are empty.\");\n\n  // For IPv6: ip1.front == '[' AND ip1.back == ']'\n  // For IPv4: ip1.front != '[' AND ip1.back != ']'\n  // Together, this ip1.front == '[' iff ip1.back == ']', which can be implemented as XNOR (NOT XOR)\n  RETURN_ON_ERROR(((ip.front() == '[') ^ (ip.back() == ']')), \"unclosed brackets.\");\n\n  if (ip.front() == '[') {\n    // shortest possible IPv6 is '::1' (loopback)\n    RETURN_ON_ERROR(ip.length() <= 2, \"IPv6 host name is too short\");\n\n    flag->host = ip.substr(1, ip.length() - 2);\n  } else {\n    flag->host = ip;\n  }\n\n  VLOG(1) << \"--replicaof: Received \" << flag->host << \" :  \" << flag->port;\n  return true;\n#undef RETURN_ON_ERROR\n}\n\nstd::string AbslUnparseFlag(const ReplicaOfFlag& flag) {\n  return (flag.has_value()) ? absl::StrCat(flag.host, \":\", flag.port) : \"\";\n}\n\nbool AbslParseFlag(std::string_view in, CronExprFlag* flag, std::string* err) {\n  if (in.empty()) {\n    flag->cron_expr = std::nullopt;\n    return true;\n  }\n  if (absl::StartsWith(in, \"\\\"\")) {\n    *err = absl::StrCat(\"Could it be that you put quotes in the flagfile?\");\n\n    return false;\n  }\n\n  std::string raw_cron_expr = absl::StrCat(CronExprFlag::kCronPrefix, in);\n  try {\n    VLOG(1) << \"creating cron from: '\" << raw_cron_expr << \"'\";\n    flag->cron_expr = cron::make_cron(raw_cron_expr);\n    return true;\n  } catch (const cron::bad_cronexpr& ex) {\n    *err = ex.what();\n  }\n  return false;\n}\n\nstd::string AbslUnparseFlag(const CronExprFlag& flag) {\n  if (flag.cron_expr) {\n    auto str_expr = to_cronstr(*flag.cron_expr);\n    DCHECK(absl::StartsWith(str_expr, CronExprFlag::kCronPrefix));\n    return str_expr.substr(CronExprFlag::kCronPrefix.size());\n  }\n  return \"\";\n}\n\nnamespace dfly {\n\nusing absl::GetFlag;\nusing absl::StrCat;\nusing namespace facade;\nusing namespace util;\nusing detail::SaveStagesController;\nusing http::StringResponse;\nusing strings::HumanReadableNumBytes;\n\nusing EngineFunc = void (ServerFamily::*)(CmdArgList args, CommandContext*);\n\ninline CommandId::Handler HandlerFunc(ServerFamily* se, EngineFunc f) {\n  return [=](CmdArgList args, CommandContext* cntx) { return (se->*f)(args, cntx); };\n}\n\nnamespace {\n\n// TODO these should be configurable as command line flag and at runtime via config set\nconstexpr std::array<double, 3> kLatencyPercentiles = {50.0, 99.0, 99.9};\n\nbool is_histogram_empty(const hdr_histogram* h) {\n  return hdr_min(h) == std::numeric_limits<int64_t>::max();\n}\n\nconst auto kRedisVersion = \"7.4.0\";\n\n// Captured memory peaks\nstruct {\n  std::atomic<size_t> used = 0;\n  std::atomic<size_t> rss = 0;\n} glob_memory_peaks;\n\nsize_t FetchRssMemory(const io::StatusData& sdata) {\n  return sdata.vm_rss + sdata.hugetlb_pages;\n}\n\nusing CI = CommandId;\n\nstruct CmdArgListFormatter {\n  void operator()(std::string* out, MutableSlice arg) const {\n    out->append(absl::StrCat(\"`\", std::string_view(arg.data(), arg.size()), \"`\"));\n  }\n};\n\nstring UnknownCmd(string cmd, CmdArgList args) {\n  return absl::StrCat(\"unknown command '\", cmd, \"' with args beginning with: \",\n                      absl::StrJoin(args.begin(), args.end(), \", \", CmdArgListFormatter()));\n}\n\nstd::shared_ptr<detail::SnapshotStorage> CreateCloudSnapshotStorage(std::string_view uri) {\n  if (detail::IsS3Path(uri)) {\n#ifdef WITH_AWS\n    shard_set->pool()->GetNextProactor()->Await([&] { util::aws::Init(); });\n    return std::make_shared<detail::AwsS3SnapshotStorage>(\n        absl::GetFlag(FLAGS_s3_endpoint), absl::GetFlag(FLAGS_s3_use_https),\n        absl::GetFlag(FLAGS_s3_ec2_metadata), absl::GetFlag(FLAGS_s3_sign_payload));\n#else\n    LOG(ERROR) << \"Compiled without AWS support\";\n    exit(1);\n#endif\n  } else if (detail::IsGCSPath(uri)) {\n#ifdef WITH_GCP\n    auto gcs = std::make_shared<detail::GcsSnapshotStorage>();\n    auto ec = shard_set->pool()->GetNextProactor()->Await([&] { return gcs->Init(3000); });\n    if (ec) {\n      LOG(ERROR) << \"Failed to initialize GCS snapshot storage: \" << ec.message();\n      exit(1);\n    }\n    return gcs;\n#else\n    LOG(ERROR) << \"Compiled without GCP support\";\n    exit(1);\n#endif\n  } else {\n    LOG(ERROR) << \"Uknown cloud storage \" << uri;\n    exit(1);\n  }\n}\n\n// Check that if TLS is used at least one form of client authentication is\n// enabled. That means either using a password or giving a root\n// certificate for authenticating client certificates which will\n// be required.\nbool ValidateServerTlsFlags() {\n  if (!absl::GetFlag(FLAGS_tls)) {\n    return true;\n  }\n\n  bool has_auth = false;\n\n  if (!dfly::GetPassword().empty()) {\n    has_auth = true;\n  }\n\n  if (!(absl::GetFlag(FLAGS_tls_ca_cert_file).empty() &&\n        absl::GetFlag(FLAGS_tls_ca_cert_dir).empty())) {\n    has_auth = true;\n  }\n\n  if (!has_auth) {\n    LOG(ERROR) << \"TLS configured but no authentication method is used!\";\n    return false;\n  }\n\n  return true;\n}\n\ntemplate <typename T> void UpdateMax(T* maxv, T current) {\n  *maxv = std::max(*maxv, current);\n}\n\nvoid SetMasterFlagOnAllThreads(bool is_master) {\n  auto cb = [is_master](unsigned, auto*) { ServerState::tlocal()->is_master = is_master; };\n  shard_set->pool()->AwaitBrief(cb);\n}\n\nstd::optional<cron::cronexpr> InferSnapshotCronExpr() {\n  string save_time = GetFlag(FLAGS_save_schedule);\n  auto cron_expr = GetFlag(FLAGS_snapshot_cron);\n\n  if (!save_time.empty()) {\n    LOG(ERROR) << \"save_schedule flag is deprecated, please use snapshot_cron instead\";\n    exit(1);\n  }\n\n  if (cron_expr.cron_expr) {\n    return std::move(cron_expr.cron_expr);\n  }\n\n  return std::nullopt;\n}\n\nvoid ClientSetName(CmdArgList args, CommandContext* cmd_cntx) {\n  if (args.size() == 1) {\n    cmd_cntx->conn()->SetName(string{ArgS(args, 0)});\n    return cmd_cntx->rb()->SendOk();\n  }\n  return cmd_cntx->SendError(facade::kSyntaxErr);\n}\n\nvoid ClientGetName(CmdArgList args, CommandContext* cmd_cntx) {\n  if (!args.empty()) {\n    return cmd_cntx->SendError(facade::kSyntaxErr);\n  }\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (auto name = cmd_cntx->conn()->GetName(); !name.empty()) {\n    return rb->SendBulkString(name);\n  } else {\n    return rb->SendNull();\n  }\n}\n\nvoid ClientInfo(CmdArgList args, CommandContext* cmd_cntx) {\n  if (!args.empty()) {\n    return cmd_cntx->SendError(facade::kSyntaxErr);\n  }\n  auto* conn = cmd_cntx->conn();\n  string info = conn->GetClientInfo();\n\n  // redis-py (5expects these fields. We append dummy values to keep the output parsable.\n  absl::StrAppend(&info, \" db=\", cmd_cntx->server_conn_cntx()->db_index(), \"\\r\\n\");\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  return rb->SendBulkString(info);\n}\n\nvoid ClientList(CmdArgList args, absl::Span<facade::Listener*> listeners,\n                CommandContext* cmd_cntx) {\n  if (!args.empty()) {\n    return cmd_cntx->SendError(facade::kSyntaxErr);\n  }\n\n  vector<string> client_info;\n  absl::base_internal::SpinLock mu;\n\n  // we can not preempt the connection traversal, so we need to use a spinlock.\n  // alternatively we could lock when mutating the connection list, but it seems not important.\n  auto cb = [&](unsigned thread_index, util::Connection* conn) {\n    facade::Connection* dcon = static_cast<facade::Connection*>(conn);\n    string info = dcon->GetClientInfo(thread_index);\n    absl::base_internal::SpinLockHolder l(&mu);\n    client_info.push_back(std::move(info));\n  };\n\n  for (auto* listener : listeners) {\n    listener->TraverseConnections(cb);\n  }\n\n  string result = absl::StrJoin(client_info, \"\\n\");\n  result.append(\"\\n\");\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  return rb->SendVerbatimString(result);\n}\n\nvoid ClientTracking(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (!rb->IsResp3())\n    return cmd_cntx->SendError(\n        \"Client tracking is currently not supported for RESP2. Please use RESP3.\");\n\n  CmdArgParser parser{args};\n  if (!parser.HasAtLeast(1) || args.size() > 3)\n    return cmd_cntx->SendError(kSyntaxErr);\n\n  bool is_on = false;\n  using Tracking = ConnectionState::ClientTracking;\n  Tracking::Options option = Tracking::NONE;\n  if (parser.Check(\"ON\")) {\n    is_on = true;\n  } else if (!parser.Check(\"OFF\")) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  bool noloop = false;\n\n  if (parser.HasNext()) {\n    if (parser.Check(\"OPTIN\")) {\n      option = Tracking::OPTIN;\n    } else if (parser.Check(\"OPTOUT\")) {\n      option = Tracking::OPTOUT;\n    } else if (parser.Check(\"NOLOOP\")) {\n      noloop = true;\n    } else {\n      return cmd_cntx->SendError(kSyntaxErr);\n    }\n  }\n\n  if (parser.HasNext()) {\n    if (!noloop && parser.Check(\"NOLOOP\")) {\n      noloop = true;\n    } else {\n      return cmd_cntx->SendError(kSyntaxErr);\n    }\n  }\n\n  auto* conn_cntx = cmd_cntx->server_conn_cntx();\n  if (is_on) {\n    ++conn_cntx->subscriptions;\n  }\n\n  conn_cntx->conn_state.tracking_info_.SetClientTracking(is_on);\n  conn_cntx->conn_state.tracking_info_.SetOption(option);\n  conn_cntx->conn_state.tracking_info_.SetNoLoop(noloop);\n  return cmd_cntx->rb()->SendOk();\n}\n\nvoid ClientCaching(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (!rb->IsResp3())\n    return cmd_cntx->SendError(\n        \"Client caching is currently not supported for RESP2. Please use RESP3.\");\n\n  if (args.size() != 1) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  if (!cntx->conn_state.tracking_info_.IsTrackingOn()) {\n    return cmd_cntx->SendError(\n        \"CLIENT CACHING can be called only when the client is in tracking mode with OPTIN or \"\n        \"OPTOUT mode enabled\");\n  }\n\n  using Tracking = ConnectionState::ClientTracking;\n  CmdArgParser parser{args};\n\n  if (parser.Check(\"YES\")) {\n    if (!cntx->conn_state.tracking_info_.HasOption(Tracking::OPTIN)) {\n      return cmd_cntx->SendError(\n          \"CLIENT CACHING YES is only valid when tracking is enabled in OPTIN mode\");\n    }\n  } else if (parser.Check(\"NO\")) {\n    if (!cntx->conn_state.tracking_info_.HasOption(Tracking::OPTOUT)) {\n      return cmd_cntx->SendError(\n          \"CLIENT CACHING NO is only valid when tracking is enabled in OPTOUT mode\");\n    }\n    cntx->conn_state.tracking_info_.ResetCachingSequenceNumber();\n  } else {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  bool is_multi = cmd_cntx->tx() && cmd_cntx->tx()->IsMulti();\n  cntx->conn_state.tracking_info_.SetCachingSequenceNumber(is_multi);\n  cmd_cntx->rb()->SendOk();\n}\n\nvoid ClientSetInfo(CmdArgList args, CommandContext* cmd_cntx) {\n  if (args.size() != 2) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  auto* conn = cmd_cntx->conn();\n  if (conn == nullptr) {\n    return cmd_cntx->SendError(\"No connection\");\n  }\n\n  string type = absl::AsciiStrToUpper(ArgS(args, 0));\n  string_view val = ArgS(args, 1);\n\n  if (type == \"LIB-NAME\") {\n    conn->SetLibName(string(val));\n  } else if (type == \"LIB-VER\") {\n    conn->SetLibVersion(string(val));\n  } else {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  cmd_cntx->rb()->SendOk();\n}\n\nvoid ClientId(CmdArgList args, CommandContext* cmd_cntx) {\n  if (args.size() != 0) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  return cmd_cntx->rb()->SendLong(cmd_cntx->conn()->GetClientId());\n}\n\nvoid ClientKill(CmdArgList args, absl::Span<facade::Listener*> listeners,\n                CommandContext* cmd_cntx) {\n  std::function<bool(facade::Connection * conn)> evaluator;\n\n  if (args.size() == 1) {\n    string_view ip_port = ArgS(args, 0);\n    if (ip_port.find(':') != ip_port.npos) {\n      evaluator = [ip_port](facade::Connection* conn) {\n        return conn->RemoteEndpointStr() == ip_port;\n      };\n    }\n  } else if (args.size() == 2) {\n    string filter_type = absl::AsciiStrToUpper(ArgS(args, 0));\n    string_view filter_value = ArgS(args, 1);\n    if (filter_type == \"ADDR\") {\n      evaluator = [filter_value](facade::Connection* conn) {\n        return conn->RemoteEndpointStr() == filter_value;\n      };\n    } else if (filter_type == \"LADDR\") {\n      evaluator = [filter_value](facade::Connection* conn) {\n        return conn->LocalBindStr() == filter_value;\n      };\n    } else if (filter_type == \"ID\") {\n      uint32_t id;\n      if (absl::SimpleAtoi(filter_value, &id)) {\n        evaluator = [id](facade::Connection* conn) { return conn->GetClientId() == id; };\n      }\n    }\n    // TODO: Add support for KILL USER/TYPE/SKIPME\n  }\n\n  if (!evaluator) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  const bool is_admin_request = cmd_cntx->conn()->IsPrivileged();\n\n  atomic<uint32_t> killed_connections = 0;\n  atomic<uint32_t> kill_errors = 0;\n\n  auto cb = [&](unsigned idx, ProactorBase* p) mutable {\n    // Step 1 aggregate the per thread connections from all listeners\n    std::vector<facade::Connection::WeakRef> connections;\n    auto traverse_cb = [&](unsigned idx, util::Connection* conn) {\n      facade::Connection* dconn = static_cast<facade::Connection*>(conn);\n      if (evaluator(dconn)) {\n        if (is_admin_request || !dconn->IsPrivileged()) {\n          connections.push_back(dconn->Borrow());\n        } else {\n          kill_errors.fetch_add(1);\n        }\n      }\n    };\n    for (auto* listener : listeners) {\n      listener->TraverseConnectionsOnThread(traverse_cb, UINT32_MAX, nullptr);\n    }\n\n    // Step 2 kill the clients\n    for (auto& tcon : connections) {\n      facade::Connection* conn = tcon.Get();\n      if (conn && conn->socket()->proactor()->GetPoolIndex() == p->GetPoolIndex()) {\n        conn->ShutdownSelfBlocking();\n        killed_connections.fetch_add(1);\n      }\n    }\n  };\n\n  shard_set->pool()->AwaitFiberOnAll(cb);\n\n  if (kill_errors.load() == 0) {\n    return cmd_cntx->rb()->SendLong(killed_connections.load());\n  } else {\n    return cmd_cntx->SendError(absl::StrCat(\"Killed \", killed_connections.load(),\n                                            \" client(s), but unable to kill \", kill_errors.load(),\n                                            \" admin client(s).\"));\n  }\n}\n\nvoid ClientMigrate(CmdArgList args, absl::Span<facade::Listener*> listeners,\n                   CommandContext* cmd_cntx) {\n  if (args.size() != 2) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  uint32_t id;\n  if (!absl::SimpleAtoi(args[0], &id)) {\n    return cmd_cntx->SendError(\"Invalid client id\");\n  }\n\n  uint32_t tid = 0;\n  if (!absl::SimpleAtoi(args[1], &tid) || tid >= shard_set->pool()->size()) {\n    return cmd_cntx->SendError(\"Invalid thread id\");\n  }\n\n  unsigned migrated = 0;\n  auto cb_brief = [&](unsigned current_tid, ProactorBase* p) {\n    if (current_tid == tid) {\n      return;  // we should not migrate to the same thread\n    }\n\n    auto traverse_cb = [&](unsigned, util::Connection* conn) {\n      facade::Connection* dconn = static_cast<facade::Connection*>(conn);\n      if (dconn->GetClientId() == id) {\n        ++migrated;\n        dconn->RequestAsyncMigration(shard_set->pool()->at(tid), true /* force */);\n      }\n    };\n\n    for (auto* listener : listeners) {\n      if (listener->IsPrivilegedInterface())\n        continue;  // skip privileged interfaces\n\n      listener->TraverseConnectionsOnThread(traverse_cb, UINT32_MAX, nullptr);\n    }\n  };\n\n  shard_set->pool()->AwaitBrief(cb_brief);\n\n  return cmd_cntx->rb()->SendLong(migrated);\n}\n\nstd::string_view GetOSString() {\n  // Call uname() only once since it can be expensive. Cache the final result in a static string.\n  static string os_string = []() {\n    utsname os_name;\n    uname(&os_name);\n    return StrCat(os_name.sysname, \" \", os_name.release, \" \", os_name.machine);\n  }();\n\n  return os_string;\n}\n\nstring_view GetRedisMode() {\n  return IsClusterEnabledOrEmulated() ? \"cluster\"sv : \"standalone\"sv;\n}\n\nstruct ReplicaOfArgs {\n  string host;\n  uint16_t port;\n  std::optional<cluster::SlotRange> slot_range;\n  static nonstd::expected<ReplicaOfArgs, ErrorReply> FromCmdArgs(CmdArgList args);\n  bool IsReplicaOfNoOne() const {\n    return port == 0;\n  }\n  friend std::ostream& operator<<(std::ostream& os, const ReplicaOfArgs& args) {\n    if (args.IsReplicaOfNoOne()) {\n      return os << \"NO ONE\";\n    }\n    os << args.host << \":\" << args.port;\n    if (args.slot_range.has_value()) {\n      os << \" SLOTS [\" << args.slot_range.value().start << \"-\" << args.slot_range.value().end\n         << \"]\";\n    }\n    return os;\n  }\n};\n\nnonstd::expected<ReplicaOfArgs, ErrorReply> ReplicaOfArgs::FromCmdArgs(CmdArgList args) {\n  ReplicaOfArgs replicaof_args;\n  CmdArgParser parser(args);\n\n  if (parser.Check(\"NO\")) {\n    parser.ExpectTag(\"ONE\");\n    replicaof_args.port = 0;\n  } else {\n    replicaof_args.host = parser.Next<string>();\n    replicaof_args.port = parser.Next<uint16_t>();\n    if (auto err = parser.TakeError(); err || replicaof_args.port < 1) {\n      return nonstd::make_unexpected(ErrorReply(\"port is out of range\"));\n    }\n    if (parser.HasNext()) {\n      auto [slot_start, slot_end] = parser.Next<SlotId, SlotId>();\n      replicaof_args.slot_range = cluster::SlotRange{slot_start, slot_end};\n      if (auto err = parser.TakeError(); err || !replicaof_args.slot_range->IsValid()) {\n        return nonstd::make_unexpected(ErrorReply(\"Invalid slot range\"));\n      }\n    }\n  }\n\n  if (auto err = parser.TakeError(); err) {\n    return nonstd::make_unexpected(err.MakeReply());\n  }\n  return replicaof_args;\n}\n\nuint64_t GetDelayMs(uint64_t ts) {\n  uint64_t now_ns = fb2::ProactorBase::GetMonotonicTimeNs();\n  uint64_t delay_ns = 0;\n  if (ts < now_ns - 1000000) {  // if more than 1ms has passed between ts and now_ns\n    delay_ns = (now_ns - ts) / 1000000;\n  }\n  return delay_ns;\n}\n\nbool ReadProcStats(io::StatusData* sdata) {\n#ifdef __linux__\n  io::Result<io::StatusData> sdata_res = io::ReadStatusInfo();\n  if (!sdata_res) {\n    LOG_FIRST_N(ERROR, 10) << \"Error fetching /proc/self/status stats. error \"\n                           << sdata_res.error().message();\n    return false;\n  }\n\n  size_t total_rss = FetchRssMemory(*sdata_res);\n  rss_mem_current.store(total_rss, memory_order_relaxed);\n  if (total_rss > glob_memory_peaks.rss.load(memory_order_relaxed))\n    glob_memory_peaks.rss.store(total_rss, memory_order_relaxed);\n\n  *sdata = *sdata_res;\n  return true;\n#else\n  return false;\n#endif\n}\n\n// Rewrite the configuration file with runtime modified settings\nGenericError RewriteConfigFile() {\n  absl::CommandLineFlag* flagfile_flag = absl::FindCommandLineFlag(\"flagfile\");\n  if (!flagfile_flag || flagfile_flag->CurrentValue().empty()) {\n    return GenericError(\"The server is running without a config file\");\n  }\n\n  std::string config_file_path = flagfile_flag->CurrentValue();\n\n  // Read original config file\n  std::ifstream file(config_file_path);\n  if (!file.is_open()) {\n    return GenericError(\"Cannot read config file\");\n  }\n\n  std::string original_content;\n  std::string line;\n  std::unordered_set<std::string> existing_flags;\n  std::vector<std::string> updated_lines;\n  bool in_generated_section = false;\n  bool had_generated_section = false;\n\n  // Get only runtime modified flag values (not startup config)\n  std::unordered_map<std::string, std::string> current_flags;\n  auto all_flags = absl::GetAllFlags();\n  for (const auto& [flag_name, flag_ptr] : all_flags) {\n    // Only include flags that were modified at runtime via CONFIG SET\n    // We exclude 'flagfile' and other startup-only configs\n    if (flag_ptr->CurrentValue() != flag_ptr->DefaultValue() && flag_name != \"flagfile\") {\n      // Additional check: only include if the config is known to ConfigRegistry\n      // This ensures we only write configs that can be modified at runtime\n      auto config_names = config_registry.List(flag_name);\n      if (!config_names.empty()) {\n        current_flags[std::string(flag_name)] = flag_ptr->CurrentValue();\n      }\n    }\n  }\n\n  // Process original file line by line\n  while (std::getline(file, line)) {\n    std::string trimmed = line;\n    trimmed.erase(0, trimmed.find_first_not_of(\" \\t\"));\n\n    // Skip generated section from previous rewrites\n    if (trimmed == \"# Generated by CONFIG REWRITE\") {\n      in_generated_section = true;\n      had_generated_section = true;\n      break;\n    }\n\n    if (!in_generated_section) {\n      // Check if this line is a flag definition\n      if (!trimmed.empty() && trimmed[0] == '-' && trimmed[1] == '-') {\n        size_t eq_pos = trimmed.find('=');\n        if (eq_pos != std::string::npos) {\n          std::string flag_name = trimmed.substr(2, eq_pos - 2);\n          if (current_flags.count(flag_name)) {\n            // Update existing flag with current value\n            updated_lines.push_back(absl::StrCat(\"--\", flag_name, \"=\", current_flags[flag_name]));\n            existing_flags.insert(flag_name);\n          } else {\n            // Keep original line if flag is not in current active flags\n            updated_lines.push_back(line);\n          }\n        } else {\n          // Keep original line as-is\n          updated_lines.push_back(line);\n        }\n      } else {\n        // Keep comments and other lines as-is\n        updated_lines.push_back(line);\n      }\n    }\n  }\n  file.close();\n\n  // Collect new flags that weren't in original config\n  std::vector<std::string> new_flags;\n  for (const auto& [flag_name, flag_value] : current_flags) {\n    if (existing_flags.find(flag_name) == existing_flags.end()) {\n      new_flags.push_back(absl::StrCat(\"--\", flag_name, \"=\", flag_value));\n    }\n  }\n\n  // Build final content\n  std::string final_content;\n  for (const auto& line : updated_lines) {\n    final_content += line + \"\\n\";\n  }\n\n  // Add new flags section if there are any\n  if (!new_flags.empty()) {\n    if (!final_content.empty() && final_content.back() != '\\n') {\n      final_content += \"\\n\";\n    }\n    // Only add extra spacing if this is the first time adding generated section\n    if (!had_generated_section) {\n      final_content += \"\\n# Generated by CONFIG REWRITE\\n\";\n    } else {\n      final_content += \"# Generated by CONFIG REWRITE\\n\";\n    }\n    for (const auto& new_flag : new_flags) {\n      final_content += new_flag + \"\\n\";\n    }\n  }\n\n  // Atomic write using mkstemp + rename\n  std::string tmp_template = config_file_path + \".tmpXXXXXX\";\n  int fd = mkstemp(tmp_template.data());\n  if (fd == -1) {\n    return GenericError(\"Failed to create temporary file\");\n  }\n\n  size_t off = 0;\n  while (off < final_content.size()) {\n    ssize_t n = write(fd, final_content.c_str() + off, final_content.size() - off);\n    if (n <= 0) {\n      close(fd);\n      unlink(tmp_template.data());\n      return GenericError(\"Failed to write config file\");\n    }\n    off += n;\n  }\n\n  fsync(fd);\n  fchmod(fd, 0644);\n  close(fd);\n\n  if (rename(tmp_template.data(), config_file_path.c_str()) == -1) {\n    unlink(tmp_template.data());\n    return GenericError(\"Failed to rewrite config file\");\n  }\n\n  return {};\n}\n\nbool IsMaster() {\n  // We call this function on startup where tlocal() == nullptr. We handle\n  // this case below.\n  if (!ServerState::tlocal()) {\n    return true;\n  }\n  return ServerState::tlocal()->is_master;\n}\n\n}  // namespace\n\nvoid SlowLogGet(dfly::CmdArgList args, std::string_view sub_cmd, util::ProactorPool* pp,\n                CommandContext* cmd_cntx) {\n  size_t requested_slow_log_length = UINT32_MAX;\n  size_t argc = args.size();\n  if (argc >= 3) {\n    return cmd_cntx->SendError(facade::UnknownSubCmd(sub_cmd, \"SLOWLOG\"), facade::kSyntaxErrType);\n  } else if (argc == 2) {\n    string_view length = facade::ArgS(args, 1);\n    int64_t num;\n    if ((!absl::SimpleAtoi(length, &num)) || (num < -1)) {\n      return cmd_cntx->SendError(\"count should be greater than or equal to -1\",\n                                 facade::kSyntaxErrType);\n    }\n    if (num >= 0) {\n      requested_slow_log_length = num;\n    }\n  }\n\n  // gather all the individual slowlogs from all the fibers and sort them by their timestamp\n  std::vector<boost::circular_buffer<SlowLogEntry>> entries(pp->size());\n  pp->AwaitFiberOnAll([&](auto index, auto* context) {\n    auto shard_entries = ServerState::tlocal()->GetSlowLog().Entries();\n    entries[index] = shard_entries;\n  });\n\n  std::vector<std::pair<SlowLogEntry, unsigned>> merged_slow_log;\n  for (size_t i = 0; i < entries.size(); ++i) {\n    for (const auto& log_item : entries[i]) {\n      merged_slow_log.emplace_back(log_item, i);\n    }\n  }\n\n  std::sort(merged_slow_log.begin(), merged_slow_log.end(), [](const auto& e1, const auto& e2) {\n    return e1.first.unix_ts_usec > e2.first.unix_ts_usec;\n  });\n\n  requested_slow_log_length = std::min(merged_slow_log.size(), requested_slow_log_length);\n\n  auto* rb = static_cast<facade::RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->StartArray(requested_slow_log_length);\n  for (size_t i = 0; i < requested_slow_log_length; ++i) {\n    const auto& entry = merged_slow_log[i].first;\n    const auto& args = entry.cmd_args;\n\n    rb->StartArray(6);\n\n    rb->SendLong(entry.entry_id * pp->size() + merged_slow_log[i].second);\n    rb->SendLong(entry.unix_ts_usec / 1000000);\n    rb->SendLong(entry.exec_time_usec);\n\n    // if we truncated the args, there is one pseudo-element containing the number of truncated\n    // args that we must add, so the result length is increased by 1\n    size_t len = args.size() + int(args.size() < entry.original_length);\n\n    rb->StartArray(len);\n\n    for (const auto& arg : args) {\n      if (arg.second > 0) {\n        auto suffix = absl::StrCat(\"... (\", arg.second, \" more bytes)\");\n        auto cmd_arg = arg.first.substr(0, kMaximumSlowlogArgLength - suffix.length());\n        rb->SendBulkString(absl::StrCat(cmd_arg, suffix));\n      } else {\n        rb->SendBulkString(arg.first);\n      }\n    }\n    // if we truncated arguments - add a special string to indicate that.\n    if (args.size() < entry.original_length) {\n      rb->SendBulkString(\n          absl::StrCat(\"... (\", entry.original_length - args.size(), \" more arguments)\"));\n    }\n\n    rb->SendBulkString(entry.client_ip);\n    rb->SendBulkString(entry.client_name);\n  }\n}\n\nstd::optional<fb2::Fiber> Pause(std::vector<facade::Listener*> listeners, Namespace* ns,\n                                facade::Connection* conn, ClientPause pause_state,\n                                std::function<bool()> is_pause_in_progress,\n                                std::function<void()> maybe_cleanup) {\n  // Track connections and set pause state to be able to wait untill all running transactions read\n  // the new pause state. Exlude already paused commands from the busy count. Exlude tracking\n  // blocked connections because: a) If the connection is blocked it is puased. b) We read pause\n  // state after waking from blocking so if the trasaction was waken by another running\n  //    command that did not pause on the new state yet we will pause after waking up.\n  DispatchTracker tracker{listeners, conn, true /* ignore paused commands */,\n                          true /*ignore blocking*/};\n  shard_set->pool()->AwaitFiberOnAll([&tracker, pause_state](unsigned, util::ProactorBase*) {\n    // Commands don't suspend before checking the pause state, so\n    // it's impossible to deadlock on waiting for a command that will be paused.\n    tracker.TrackOnThread();\n    ServerState::tlocal()->SetPauseState(pause_state, true);\n  });\n\n  // Wait for all busy commands to finish running before replying to guarantee\n  // that no more (write) operations will occur.\n  const absl::Duration kDispatchTimeout = absl::Seconds(absl::GetFlag(FLAGS_pause_wait_timeout));\n  if (!tracker.Wait(kDispatchTimeout)) {\n    LOG(WARNING) << \"Couldn't wait for commands to finish dispatching in \" << kDispatchTimeout;\n    shard_set->pool()->AwaitBrief([pause_state](unsigned, util::ProactorBase*) {\n      ServerState::tlocal()->SetPauseState(pause_state, false);\n    });\n    return std::nullopt;\n  }\n\n  // We should not expire/evict keys while clients are paused.\n  shard_set->RunBriefInParallel(\n      [ns](EngineShard* shard) { ns->GetDbSlice(shard->shard_id()).SetExpireAllowed(false); });\n\n  return fb2::Fiber(\"client_pause\",\n                    [is_pause_in_progress, pause_state, ns, maybe_cleanup]() mutable {\n                      // On server shutdown we sleep 10ms to make sure all running task finish,\n                      // therefore 10ms steps ensure this fiber will not left hanging .\n                      constexpr auto step = 10ms;\n                      while (is_pause_in_progress()) {\n                        ThisFiber::SleepFor(step);\n                      }\n\n                      ServerState& etl = *ServerState::tlocal();\n                      if (etl.gstate() != GlobalState::SHUTTING_DOWN) {\n                        shard_set->pool()->AwaitFiberOnAll([pause_state](util::ProactorBase* pb) {\n                          ServerState::tlocal()->SetPauseState(pause_state, false);\n                        });\n                        shard_set->RunBriefInParallel([ns](EngineShard* shard) {\n                          ns->GetDbSlice(shard->shard_id()).SetExpireAllowed(true);\n                        });\n                      }\n                      if (maybe_cleanup) {\n                        maybe_cleanup();\n                      }\n                    });\n}\n\nServerFamily::ServerFamily(Service* service) : service_(*service) {\n  start_time_ = time(NULL);\n  thread_safe_save_info_.Update([this](SaveInfoData* data) { data->save_time = start_time_; });\n  script_mgr_.reset(new ScriptMgr());\n\n  {\n    absl::InsecureBitGen eng;\n    master_replid_ = GetRandomHex(eng, CONFIG_RUN_ID_SIZE);\n    DCHECK_EQ(CONFIG_RUN_ID_SIZE, master_replid_.size());\n  }\n\n  if (auto ec =\n          detail::ValidateFilename(GetFlag(FLAGS_dbfilename), GetFlag(FLAGS_df_snapshot_format));\n      ec) {\n    LOG(ERROR) << ec.Format();\n    exit(1);\n  }\n\n  if (!ValidateServerTlsFlags()) {\n    exit(1);\n  }\n  ValidateClientTlsFlags();\n  dfly_cmd_ = make_unique<DflyCmd>(this);\n  legacy_format_metrics_ = GetFlag(FLAGS_keep_legacy_memory_metrics);\n}\n\nServerFamily::~ServerFamily() {\n}\n\nvoid SetMaxClients(std::vector<facade::Listener*>& listeners, uint32_t maxclients) {\n  for (auto* listener : listeners) {\n    if (!listener->IsPrivilegedInterface()) {\n      listener->socket()->proactor()->Await(\n          [listener, maxclients]() { listener->SetMaxClients(maxclients); });\n    }\n  }\n}\n\nvoid SetSlowLogMaxLen(util::ProactorPool& pool, uint32_t val) {\n  pool.AwaitFiberOnAll(\n      [&val](auto index, auto* context) { ServerState::tlocal()->GetSlowLog().ChangeLength(val); });\n}\n\nvoid SetSlowLogThreshold(util::ProactorPool& pool, int32_t val) {\n  pool.AwaitFiberOnAll([val](auto index, auto* context) {\n    ServerState::tlocal()->log_slower_than_usec = val < 0 ? UINT32_MAX : uint32_t(val);\n  });\n}\n\nvoid ServerFamily::Init(util::AcceptServer* acceptor, std::vector<facade::Listener*> listeners) {\n  CHECK(acceptor_ == nullptr);\n  acceptor_ = acceptor;\n  listeners_ = std::move(listeners);\n\n  auto os_string = GetOSString();\n  LOG_FIRST_N(INFO, 1) << \"Host OS: \" << os_string << \" with \" << shard_set->pool()->size()\n                       << \" threads\";\n  SetMaxClients(listeners_, absl::GetFlag(FLAGS_maxclients));\n  config_registry.RegisterSetter<uint32_t>(\n      \"maxclients\", [this](uint32_t val) { SetMaxClients(listeners_, val); });\n\n  SetSlowLogThreshold(service_.proactor_pool(), absl::GetFlag(FLAGS_slowlog_log_slower_than));\n  config_registry.RegisterMutable(\"slowlog_log_slower_than\",\n                                  [this](const absl::CommandLineFlag& flag) {\n                                    auto res = flag.TryGet<int32_t>();\n                                    if (res.has_value())\n                                      SetSlowLogThreshold(service_.proactor_pool(), res.value());\n                                    return res.has_value();\n                                  });\n  SetSlowLogMaxLen(service_.proactor_pool(), absl::GetFlag(FLAGS_slowlog_max_len));\n  config_registry.RegisterSetter<uint32_t>(\n      \"slowlog_max_len\", [this](uint32_t val) { SetSlowLogMaxLen(service_.proactor_pool(), val); });\n\n  // We only reconfigure TLS when the 'tls' config key changes. Therefore to\n  // update TLS certs, first update tls_cert_file, then set 'tls true'.\n  config_registry.RegisterMutable(\"tls\", [this](const absl::CommandLineFlag& flag) {\n    if (!ValidateServerTlsFlags()) {\n      return false;\n    }\n    for (facade::Listener* l : listeners_) {\n      // Must reconfigure in the listener proactor to avoid a race.\n      if (!l->socket()->proactor()->Await([l] { return l->ReconfigureTLS(); })) {\n        return false;\n      }\n    }\n    return true;\n  });\n  config_registry.RegisterMutable(\"tls_cert_file\");\n  config_registry.RegisterMutable(\"tls_key_file\");\n  config_registry.RegisterMutable(\"tls_ca_cert_file\");\n  config_registry.RegisterMutable(\"tls_ca_cert_dir\");\n  config_registry.RegisterMutable(\"replica_priority\");\n  config_registry.RegisterMutable(\"lua_undeclared_keys_shas\");\n  config_registry.RegisterMutable(\"lua_float_as_int_shas\");\n  config_registry.RegisterMutable(\"point_in_time_snapshot\");\n\n  pb_task_ = shard_set->pool()->GetNextProactor();\n  if (pb_task_->GetKind() == ProactorBase::EPOLL) {\n    fq_threadpool_.reset(new fb2::FiberQueueThreadPool(absl::GetFlag(FLAGS_epoll_file_threads)));\n  }\n\n  string flag_dir = GetFlag(FLAGS_dir);\n\n  if (detail::IsCloudPath(flag_dir)) {\n    snapshot_storage_ = CreateCloudSnapshotStorage(flag_dir);\n  } else if (fq_threadpool_) {\n    snapshot_storage_ = std::make_shared<detail::FileSnapshotStorage>(fq_threadpool_.get());\n  } else {\n    snapshot_storage_ = std::make_shared<detail::FileSnapshotStorage>(nullptr);\n  }\n\n  // check for '--replicaof' before loading anything\n  if (ReplicaOfFlag flag = GetFlag(FLAGS_replicaof); flag.has_value()) {\n    service_.proactor_pool().GetNextProactor()->Await(\n        [this, &flag]() { this->Replicate(flag.host, flag.port); });\n  } else {  // load from snapshot only if --replicaof is empty\n    LoadFromSnapshot();\n  }\n\n  const auto create_snapshot_schedule_fb = [this] {\n    snapshot_schedule_fb_ =\n        service_.proactor_pool().GetNextProactor()->LaunchFiber([this] { SnapshotScheduling(); });\n  };\n  config_registry.RegisterMutable(\n      \"snapshot_cron\", [this, create_snapshot_schedule_fb](const absl::CommandLineFlag& flag) {\n        JoinSnapshotSchedule();\n        create_snapshot_schedule_fb();\n        return true;\n      });\n  create_snapshot_schedule_fb();\n}\n\nvoid ServerFamily::LoadFromSnapshot() {\n  {\n    util::fb2::LockGuard lk{loading_stats_mu_};\n    loading_stats_.restore_count++;\n  }\n\n  const auto load_path_result =\n      snapshot_storage_->LoadPath(GetFlag(FLAGS_dir), GetFlag(FLAGS_dbfilename));\n\n  if (load_path_result) {\n    const std::string& load_path = *load_path_result;\n    if (!load_path.empty()) {\n      auto future = Load(load_path, LoadExistingKeys::kFail);\n      load_fiber_ = service_.proactor_pool().GetNextProactor()->LaunchFiber([future]() mutable {\n        // Wait for load to finish in a dedicated fiber.\n        // Failure to load on start causes Dragonfly to exit with an error code.\n        if (!future.has_value() || future->Get()) {\n          // Error was already printed to log at this point.\n          exit(1);\n        }\n      });\n    }\n  } else {\n    if (std::error_code(load_path_result.error()) == std::errc::no_such_file_or_directory) {\n      LOG(WARNING) << \"Load snapshot: No snapshot found\";\n    } else {\n      loading_stats_mu_.lock();\n      loading_stats_.failed_restore_count++;\n      loading_stats_mu_.unlock();\n      LOG(ERROR) << \"Failed to load snapshot with error: \" << load_path_result.error().Format();\n      exit(1);\n    }\n  }\n}\n\nvoid ServerFamily::JoinSnapshotSchedule() {\n  schedule_done_.Notify();\n  snapshot_schedule_fb_.JoinIfNeeded();\n  schedule_done_.Reset();\n}\n\nvoid ServerFamily::Shutdown() {\n  VLOG(1) << \"ServerFamily::Shutdown\";\n\n  load_fiber_.JoinIfNeeded();\n\n  JoinSnapshotSchedule();\n\n  bg_save_fb_.JoinIfNeeded();\n\n  if (save_on_shutdown_ && !absl::GetFlag(FLAGS_dbfilename).empty()) {\n    shard_set->pool()->GetNextProactor()->Await([this]() ABSL_LOCKS_EXCLUDED(loading_stats_mu_) {\n      GenericError ec = DoSave();\n\n      util::fb2::LockGuard lk{loading_stats_mu_};\n      loading_stats_.backup_count++;\n\n      if (ec) {\n        loading_stats_.failed_backup_count++;\n        LOG(WARNING) << \"Failed to perform snapshot \" << ec.Format();\n      }\n    });\n  }\n\n  client_pause_ec_.await([this] { return active_pauses_.load() == 0; });\n\n  pb_task_->Await([this] {\n    auto ec = journal::Close();\n    LOG_IF(ERROR, ec) << \"Error closing journal \" << ec;\n\n    util::fb2::LockGuard lk(replicaof_mu_);\n    if (replica_) {\n      replica_->Stop();\n    }\n    StopAllClusterReplicas();\n\n    dfly_cmd_->Shutdown();\n    DebugCmd::Shutdown();\n#ifdef WITH_SEARCH\n    SearchFamily::Shutdown();\n#endif\n  });\n}\n\nbool ServerFamily::HasPrivilegedInterface() {\n  return any_of(listeners_.begin(), listeners_.end(),\n                [](auto* l) { return l->IsPrivilegedInterface(); });\n}\n\nvoid ServerFamily::UpdateMemoryGlobalStats() {\n  // Called from all shards, but one updates global stats below\n  if (EngineShard::tlocal()->shard_id() > 0)\n    return;\n\n  // Update used memory peak\n  uint64_t mem_current = used_mem_current.load(std::memory_order_relaxed);\n  if (mem_current > glob_memory_peaks.used.load(memory_order_relaxed))\n    glob_memory_peaks.used.store(mem_current, memory_order_relaxed);\n\n  io::StatusData status_data;\n  bool success = ReadProcStats(&status_data);  // updates glob_memory_peaks.rss\n  if (!success)\n    return;\n\n  size_t total_rss = FetchRssMemory(status_data);\n\n  // Decide on stopping or accepting new connections based on oom deny ratio\n  double rss_oom_deny_ratio = ServerState::tlocal()->rss_oom_deny_ratio;\n  if (rss_oom_deny_ratio > 0) {\n    size_t memory_limit = max_memory_limit.load(memory_order_relaxed) * rss_oom_deny_ratio;\n    if (total_rss > memory_limit && accepting_connections_ && HasPrivilegedInterface()) {\n      LOG_EVERY_T(WARNING, 10)\n          << \"Accepting connections stopped, used memory over limit: total_rss \" << total_rss\n          << \" > memory_limit \" << memory_limit;\n      ChangeConnectionAccept(false);\n    } else if (total_rss < memory_limit && !accepting_connections_) {\n      LOG_EVERY_T(INFO, 10) << \"Accepting connections again, used memory below limit\";\n      ChangeConnectionAccept(true);\n    }\n  }\n}\n\nstruct AggregateLoadResult {\n  AggregateError first_error;\n  std::atomic<size_t> keys_read;\n};\n\nvoid ServerFamily::FlushAll(Namespace* ns) {\n  const CommandId* cid = service_.FindCmd(\"FLUSHALL\");\n  boost::intrusive_ptr<Transaction> flush_trans(new Transaction{cid});\n  flush_trans->InitByArgs(ns, 0, {});\n  VLOG(1) << \"Performing flush\";\n  Drakarys(flush_trans.get(), DbSlice::kDbAll, false);\n}\n\n// Load starts as many fibers as there are files to load each one separately.\n// It starts one more fiber that waits for all load fibers to finish and returns the first\n// error (if any occured) with a future.\nstd::optional<fb2::Future<GenericError>> ServerFamily::Load(const std::string& path,\n                                                            LoadExistingKeys existing_keys) {\n  DCHECK(!path.empty());\n  DCHECK_GT(shard_count(), 0u);\n\n  // TODO: to move it to helio.\n  auto immediate = [](auto val) {\n    fb2::Future<GenericError> future;\n    future.Resolve(std::move(val));\n    return future;\n  };\n\n  if (!IsMaster()) {\n    return immediate(string(\"Replica cannot load data\"));\n  }\n\n  auto expand_result = snapshot_storage_->ExpandSnapshot(path);\n  if (!expand_result) {\n    LOG(ERROR) << \"Failed to load snapshot: \" << expand_result.error().Format();\n\n    return immediate(expand_result.error());\n  }\n\n  auto prev_state = service_.SwitchState(GlobalState::ACTIVE, GlobalState::LOADING);\n  if (prev_state != GlobalState::ACTIVE) {\n    LOG(WARNING) << prev_state << \" in progress, ignored\";\n    return {};\n  }\n\n  // Reset state on error\n  absl::Cleanup reset_state{\n      [this]() { service_.SwitchState(GlobalState::LOADING, GlobalState::ACTIVE); }};\n\n  auto& pool = service_.proactor_pool();\n\n  const vector<string>& paths = *expand_result;\n\n  LOG(INFO) << \"Loading \" << path;\n\n  vector<fb2::Fiber> load_fibers;\n  load_fibers.reserve(paths.size());\n\n  LoadOptions load_opts;\n  auto load_context = std::make_unique<RdbLoadContext>();\n  if (absl::EndsWith(path, \"summary.dfs\")) {\n    // we read summary first to get snapshot_id and load data correctly\n    error_code load_ec = pool.GetNextProactor()->Await(\n        [&] { return LoadRdb(path, existing_keys, &load_opts, load_context.get()); });\n    if (load_ec)\n      return immediate(load_ec);\n  }\n\n  auto aggregated_result = std::make_shared<AggregateLoadResult>();\n\n  for (const auto& file : paths) {\n    // we have already read summary so we skip it now\n    if (absl::EndsWith(file, \"summary.dfs\"))\n      continue;\n\n    // For single file, choose thread that does not handle shards if possible.\n    // This will balance out the CPU during the load.\n    ProactorBase* proactor;\n    if (paths.size() == 1 && shard_count() < pool.size()) {\n      proactor = pool.at(shard_count());\n    } else {\n      proactor = pool.GetNextProactor();\n    }\n\n    auto load_func = [file, existing_keys, load_opts, aggregated_result,\n                      load_context = load_context.get(), this]() mutable {\n      error_code load_ec = LoadRdb(file, existing_keys, &load_opts, load_context);\n      if (load_ec) {\n        aggregated_result->first_error = load_ec;\n      } else {\n        aggregated_result->keys_read.fetch_add(load_opts.num_loaded_keys, memory_order_relaxed);\n      }\n    };\n    load_fibers.push_back(proactor->LaunchFiber(std::move(load_func)));\n  }\n\n  fb2::Future<GenericError> future;\n\n  // Run fiber that empties the channel and sets ec_promise.\n  auto load_join_func = [this, aggregated_result, load_fibers = std::move(load_fibers),\n                         load_context = std::move(load_context), future]() mutable {\n    for (auto& fiber : load_fibers) {\n      fiber.Join();\n    }\n\n    if (aggregated_result->first_error) {\n      load_context->PerformPostLoad(&service_, true);\n      LOG(ERROR) << \"Rdb load failed: \" << (*aggregated_result->first_error).message();\n    } else {\n      load_context->PerformPostLoad(&service_);\n      LOG(INFO) << \"Load finished, num keys read: \" << aggregated_result->keys_read;\n    }\n\n    service_.SwitchState(GlobalState::LOADING, GlobalState::ACTIVE);\n    future.Resolve(*(aggregated_result->first_error));\n  };\n  pool.GetNextProactor()->Dispatch(std::move(load_join_func));\n\n  std::move(reset_state).Cancel();  // load_join_func resets state after loading\n  return future;\n}\n\nvoid ServerFamily::SnapshotScheduling() {\n  const std::optional<cron::cronexpr> cron_expr = InferSnapshotCronExpr();\n  if (!cron_expr) {\n    return;\n  }\n\n  ServerState* ss = ServerState::tlocal();\n  do {\n    if (schedule_done_.WaitFor(100ms)) {\n      return;\n    }\n  } while (ss->gstate() == GlobalState::LOADING);\n\n  while (true) {\n    const std::chrono::time_point now = std::chrono::system_clock::now();\n    const std::chrono::time_point next = cron::cron_next(cron_expr.value(), now);\n\n    if (schedule_done_.WaitFor(next - now)) {\n      break;\n    };\n\n    GenericError ec = DoSave();\n\n    util::fb2::LockGuard lk{loading_stats_mu_};\n    loading_stats_.backup_count++;\n\n    if (ec) {\n      loading_stats_.failed_backup_count++;\n      LOG(WARNING) << \"Failed to perform snapshot \" << ec.Format();\n    }\n  }\n}\n\nstd::error_code ServerFamily::LoadRdb(const std::string& rdb_file, LoadExistingKeys existing_keys,\n                                      LoadOptions* load_opts, RdbLoadContext* load_context) {\n  DCHECK(load_opts);\n  VLOG(1) << \"Loading data from \" << rdb_file;\n  CHECK(fb2::ProactorBase::IsProactorThread()) << \"must be called from proactor thread\";\n\n  const std::string& filt_snapshot_id = load_opts->snapshot_id;\n\n  ProactorBase* proactor = fb2::ProactorBase::me();\n  error_code result;\n  auto fb = proactor->LaunchFiber([&] {\n    io::ReadonlyFileOrError res = snapshot_storage_->OpenReadFile(rdb_file);\n    if (!res) {\n      result = res.error();\n      return;\n    }\n\n    io::FileSource fs(*res);\n\n    RdbLoader loader{&service_, load_context, filt_snapshot_id};\n    loader.SetShardCount(load_opts->shard_count);\n    if (existing_keys == LoadExistingKeys::kOverride) {\n      loader.SetOverrideExistingKeys(true);\n    }\n\n    auto ec = loader.Load(&fs);\n    if (ec) {\n      // We ignore incorrect_snapshot_id, it means we try to load file from incorrect snapshot.\n      if (ec.value() != rdb::errc::incorrect_snapshot_id)\n        result = ec;\n    } else {\n      VLOG(1) << \"Done loading RDB from \" << rdb_file << \", keys loaded: \" << loader.keys_loaded();\n      VLOG(1) << \"Loading finished after \" << strings::HumanReadableElapsedTime(loader.load_time());\n      load_opts->num_loaded_keys = loader.keys_loaded();\n      load_opts->snapshot_id = loader.GetSnapshotId();\n      load_opts->shard_count = loader.shard_count();\n    }\n  });\n\n  fb.Join();\n  return result;\n}\n\nenum class MetricType : uint8_t { COUNTER, GAUGE, SUMMARY, HISTOGRAM };\n\nconst char* MetricTypeName(MetricType type) {\n  switch (type) {\n    case MetricType::COUNTER:\n      return \"counter\";\n    case MetricType::GAUGE:\n      return \"gauge\";\n    case MetricType::SUMMARY:\n      return \"summary\";\n    case MetricType::HISTOGRAM:\n      return \"histogram\";\n  }\n  return \"unknown\";\n}\n\ninline string GetMetricFullName(string_view metric_name) {\n  return StrCat(\"dragonfly_\", metric_name);\n}\n\nvoid AppendMetricHeader(string_view metric_name, string_view metric_help, MetricType type,\n                        string* dest) {\n  const auto full_metric_name = GetMetricFullName(metric_name);\n  absl::StrAppend(dest, \"# HELP \", full_metric_name, \" \", metric_help, \"\\n\");\n  absl::StrAppend(dest, \"# TYPE \", full_metric_name, \" \", MetricTypeName(type), \"\\n\");\n}\n\nvoid AppendLabelTupple(absl::Span<const string_view> label_names,\n                       absl::Span<const string_view> label_values, string* dest) {\n  if (label_names.empty())\n    return;\n\n  absl::StrAppend(dest, \"{\");\n  for (size_t i = 0; i < label_names.size(); ++i) {\n    if (i > 0) {\n      absl::StrAppend(dest, \", \");\n    }\n    absl::StrAppend(dest, label_names[i], \"=\\\"\", label_values[i], \"\\\"\");\n  }\n\n  absl::StrAppend(dest, \"}\");\n}\n\nvoid AppendMetricValue(string_view metric_name, const absl::AlphaNum& value,\n                       absl::Span<const string_view> label_names,\n                       absl::Span<const string_view> label_values, string* dest) {\n  absl::StrAppend(dest, GetMetricFullName(metric_name));\n  AppendLabelTupple(label_names, label_values, dest);\n  absl::StrAppend(dest, \" \", value, \"\\n\");\n}\n\nvoid AppendMetricWithoutLabels(string_view name, string_view help, const absl::AlphaNum& value,\n                               MetricType type, string* dest) {\n  AppendMetricHeader(name, help, type, dest);\n  AppendMetricValue(name, value, {}, {}, dest);\n}\n\nvoid AppendPipelineLatencySummary(string_view name, string_view help, const base::Histogram& hist,\n                                  uint64_t total_count, double total_sum_usec, string* dest) {\n  AppendMetricHeader(name, help, MetricType::SUMMARY, dest);\n  const string full_name = GetMetricFullName(name);\n  if (hist.count() > 0) {\n    auto [p95, p99] = hist.Percentiles(95, 99);\n    AppendMetricValue(name, p95 * 1e-6, {\"quantile\"}, {\"0.95\"}, dest);\n    AppendMetricValue(name, p99 * 1e-6, {\"quantile\"}, {\"0.99\"}, dest);\n  }\n  // Use monotonically increasing counters for _sum/_count so that Prometheus\n  // rate()/irate() functions work correctly even though the histogram is decayed.\n  absl::StrAppend(dest, full_name, \"_sum \", total_sum_usec * 1e-6, \"\\n\");\n  absl::StrAppend(dest, full_name, \"_count \", total_count, \"\\n\");\n}\n\nvoid PrintPrometheusMetrics(uint64_t uptime, const Metrics& m, DflyCmd* dfly_cmd,\n                            StringResponse* resp, bool legacy) {\n  // Server metrics\n  AppendMetricHeader(\"version\", \"\", MetricType::GAUGE, &resp->body());\n  AppendMetricValue(\"version\", 1, {\"version\"}, {GetVersion()}, &resp->body());\n\n  AppendMetricWithoutLabels(\"master\", \"1 if master 0 if replica\", IsMaster() ? 1 : 0,\n                            MetricType::GAUGE, &resp->body());\n  AppendMetricWithoutLabels(\"uptime_in_seconds\", \"\", uptime, MetricType::COUNTER, &resp->body());\n\n  // Clients metrics\n  const auto& conn_stats = m.facade_stats.conn_stats;\n  AppendMetricWithoutLabels(\"max_clients\", \"Maximal number of clients\", GetFlag(FLAGS_maxclients),\n                            MetricType::GAUGE, &resp->body());\n  AppendMetricHeader(\"connected_clients\", \"\", MetricType::GAUGE, &resp->body());\n  AppendMetricValue(\"connected_clients\", conn_stats.num_conns_main, {\"listener\"}, {\"main\"},\n                    &resp->body());\n  AppendMetricValue(\"connected_clients\", conn_stats.num_conns_other, {\"listener\"}, {\"other\"},\n                    &resp->body());\n  AppendMetricWithoutLabels(\"blocked_clients\", \"\", conn_stats.num_blocked_clients,\n                            MetricType::GAUGE, &resp->body());\n  AppendMetricWithoutLabels(\"pipeline_queue_length\", \"\", conn_stats.pipeline_queue_entries,\n                            MetricType::GAUGE, &resp->body());\n  AppendMetricWithoutLabels(\"send_delay_seconds\", \"\",\n                            double(GetDelayMs(m.oldest_pending_send_ts)) / 1000.0,\n                            MetricType::GAUGE, &resp->body());\n\n  AppendMetricWithoutLabels(\"pipeline_throttle_total\", \"\", conn_stats.pipeline_throttle_count,\n                            MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"pipeline_commands_total\", \"\", conn_stats.pipelined_cmd_cnt,\n                            MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"pipeline_dispatch_calls_total\", \"\", conn_stats.pipeline_dispatch_calls,\n                            MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"pipeline_dispatch_commands_total\", \"\",\n                            conn_stats.pipeline_dispatch_commands, MetricType::COUNTER,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"pipeline_dispatch_skip_flush_total\", \"\",\n                            conn_stats.skip_pipeline_flushing, MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"pipeline_dispatch_flush_duration_seconds\", \"\",\n                            conn_stats.pipeline_dispatch_flush_usec * 1e-6, MetricType::COUNTER,\n                            &resp->body());\n\n  AppendMetricWithoutLabels(\"pipeline_commands_duration_seconds\", \"\",\n                            conn_stats.pipelined_cmd_latency * 1e-6, MetricType::COUNTER,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"pipeline_queue_wait_duration_seconds\", \"\",\n                            conn_stats.pipelined_wait_latency * 1e-6, MetricType::COUNTER,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"pipeline_blocking_commands_total\", \"\",\n                            m.coordinator_stats.blocking_commands_in_pipelines, MetricType::COUNTER,\n                            &resp->body());\n\n  // pipelined_cmd_cnt/pipelined_cmd_latency are monotonically increasing counters used for\n  // Prometheus _count/_sum; the histogram is decayed and therefore not monotonic.\n  AppendPipelineLatencySummary(\"pipeline_latency_seconds\", \"Pipeline command latency distribution\",\n                               conn_stats.pipelined_latency_hist, conn_stats.pipelined_cmd_cnt,\n                               conn_stats.pipelined_cmd_latency, &resp->body());\n\n  AppendMetricWithoutLabels(\"cmd_squash_stats_ignored_total\", \"\",\n                            m.coordinator_stats.squash_stats_ignored, MetricType::COUNTER,\n                            &resp->body());\n\n  AppendMetricWithoutLabels(\"cmd_squash_hop_total\", \"\", m.coordinator_stats.multi_squash_hops,\n                            MetricType::COUNTER, &resp->body());\n\n  AppendMetricWithoutLabels(\"cmd_squash_commands_total\", \"\", m.coordinator_stats.squashed_commands,\n                            MetricType::COUNTER, &resp->body());\n\n  AppendMetricWithoutLabels(\"cmd_squash_hop_duration_seconds\", \"\",\n                            m.coordinator_stats.multi_squash_exec_hop_usec * 1e-6,\n                            MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"cmd_squash_hop_reply_seconds\", \"\",\n                            m.coordinator_stats.multi_squash_exec_reply_usec * 1e-6,\n                            MetricType::COUNTER, &resp->body());\n\n  string connections_libs;\n  AppendMetricHeader(\"connections_libs\", \"Total number of connections by libname:ver\",\n                     MetricType::GAUGE, &connections_libs);\n  for (const auto& [lib, count] : m.connections_lib_name_ver_map) {\n    AppendMetricValue(\"connections_libs\", count, {\"lib\"}, {lib}, &connections_libs);\n  }\n  absl::StrAppend(&resp->body(), connections_libs);\n\n  // Memory metrics\n  io::StatusData sdata;\n  bool success = ReadProcStats(&sdata);\n  AppendMetricWithoutLabels(\"memory_used_bytes\", \"\", m.heap_used_bytes, MetricType::GAUGE,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"memory_used_peak_bytes\", \"\", m.used_mem_peak, MetricType::GAUGE,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"fibers_count\", \"\", m.worker_fiber_count, MetricType::GAUGE,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"blocked_tasks\", \"\", m.blocked_tasks, MetricType::GAUGE, &resp->body());\n\n  AppendMetricWithoutLabels(\"memory_max_bytes\", \"\", max_memory_limit.load(memory_order_relaxed),\n                            MetricType::GAUGE, &resp->body());\n\n  if (m.events.insertion_rejections | m.coordinator_stats.oom_error_cmd_cnt) {\n    AppendMetricHeader(\"oom_errors_total\", \"Rejected requests due to out of memory errors\",\n                       MetricType::COUNTER, &resp->body());\n    AppendMetricValue(\"oom_errors_total\", m.events.insertion_rejections, {\"type\"}, {\"insert\"},\n                      &resp->body());\n    AppendMetricValue(\"oom_errors_total\", m.coordinator_stats.oom_error_cmd_cnt, {\"type\"}, {\"cmd\"},\n                      &resp->body());\n  }\n  if (success) {\n    size_t rss = FetchRssMemory(sdata);\n    AppendMetricWithoutLabels(\"used_memory_rss_bytes\", \"\", rss, MetricType::GAUGE, &resp->body());\n    AppendMetricWithoutLabels(\"swap_memory_bytes\", \"\", sdata.vm_swap, MetricType::GAUGE,\n                              &resp->body());\n  }\n\n  DbStats total;\n  for (const auto& db_stats : m.db_stats) {\n    total += db_stats;\n  }\n\n  {\n    string type_used_memory_metric;\n    bool added = false;\n    AppendMetricHeader(\"type_used_memory\", \"Memory used per type\", MetricType::GAUGE,\n                       &type_used_memory_metric);\n\n    for (unsigned type = 0; type < total.memory_usage_by_type.size(); type++) {\n      size_t mem = total.memory_usage_by_type[type];\n      if (mem > 0) {\n        AppendMetricValue(\"type_used_memory\", mem, {\"type\"}, {ObjTypeToString(type)},\n                          &type_used_memory_metric);\n        added = true;\n      }\n    }\n    if (added)\n      absl::StrAppend(&resp->body(), type_used_memory_metric);\n  }\n\n  // Stats metrics\n  AppendMetricWithoutLabels(\"connections_received_total\", \"\", conn_stats.conn_received_cnt,\n                            MetricType::COUNTER, &resp->body());\n\n  AppendMetricHeader(\"commands_processed_total\", \"\", MetricType::COUNTER, &resp->body());\n  AppendMetricValue(\"commands_processed_total\", conn_stats.command_cnt_main, {\"listener\"}, {\"main\"},\n                    &resp->body());\n  AppendMetricValue(\"commands_processed_total\", conn_stats.command_cnt_other, {\"listener\"},\n                    {\"other\"}, &resp->body());\n  AppendMetricWithoutLabels(\"keyspace_hits_total\", \"\", m.events.hits, MetricType::COUNTER,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"keyspace_misses_total\", \"\", m.events.misses, MetricType::COUNTER,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"keyspace_mutations_total\", \"\", m.events.mutations, MetricType::COUNTER,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"lua_interpreter_cnt\", \"\", m.lua_stats.interpreter_cnt,\n                            MetricType::GAUGE, &resp->body());\n\n  AppendMetricWithoutLabels(\"freed_memory_lua\", \"\", m.lua_stats.gc_freed_memory,\n                            MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"lua_blocked_total\", \"\", m.lua_stats.blocked_cnt, MetricType::COUNTER,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"lua_gc_interpreter_return\", \"\", m.lua_stats.interpreter_return,\n                            MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"lua_force_gc_calls\", \"\", m.lua_stats.force_gc_calls,\n                            MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"lua_gc_duration_total_sec\", \"\", m.lua_stats.gc_duration_ns * 1e-9,\n                            MetricType::COUNTER, &resp->body());\n\n  AppendMetricWithoutLabels(\"backups_total\", \"\", m.loading_stats.backup_count, MetricType::COUNTER,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"failed_backups_total\", \"\", m.loading_stats.failed_backup_count,\n                            MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"restores_total\", \"\", m.loading_stats.restore_count,\n                            MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"failed_restores_total\", \"\", m.loading_stats.failed_restore_count,\n                            MetricType::COUNTER, &resp->body());\n\n  // Net metrics\n  AppendMetricWithoutLabels(\"net_input_recv_total\", \"\", conn_stats.io_read_cnt, MetricType::COUNTER,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"net_read_yields_total\", \"\", conn_stats.num_read_yields,\n                            MetricType::COUNTER, &resp->body());\n\n  AppendMetricWithoutLabels(\"net_input_bytes_total\", \"\", conn_stats.io_read_bytes,\n                            MetricType::COUNTER, &resp->body());\n\n  AppendMetricWithoutLabels(\"net_output_bytes_total\", \"\", m.facade_stats.reply_stats.io_write_bytes,\n                            MetricType::COUNTER, &resp->body());\n  {\n    AppendMetricWithoutLabels(\"reply_duration_seconds\", \"\",\n                              m.facade_stats.reply_stats.send_stats.total_duration * 1e-9,\n                              MetricType::COUNTER, &resp->body());\n    AppendMetricWithoutLabels(\"reply_total\", \"\", m.facade_stats.reply_stats.send_stats.count,\n                              MetricType::COUNTER, &resp->body());\n  }\n\n  AppendMetricWithoutLabels(\"script_error_total\", \"\", m.facade_stats.reply_stats.script_error_count,\n                            MetricType::COUNTER, &resp->body());\n\n  AppendMetricHeader(\"listener_accept_error_total\", \"Listener accept errors\", MetricType::COUNTER,\n                     &resp->body());\n  AppendMetricValue(\"listener_accept_error_total\", m.refused_conn_max_clients_reached_count,\n                    {\"reason\"}, {\"limit_reached\"}, &resp->body());\n  AppendMetricValue(\"listener_accept_error_total\", m.facade_stats.conn_stats.tls_accept_disconnects,\n                    {\"reason\"}, {\"tls_error\"}, &resp->body());\n\n  // Per-DB expired/evicted totals\n  {\n    string exp_str, evict_str;\n    for (size_t i = 0; i < m.db_stats.size(); ++i) {\n      const auto& s = m.db_stats[i];\n      if (s.events.expired_keys > 0)\n        AppendMetricValue(\"expired_keys_total\", s.events.expired_keys, {\"db\"}, {StrCat(\"db\", i)},\n                          &exp_str);\n      if (s.events.evicted_keys > 0)\n        AppendMetricValue(\"evicted_keys_total\", s.events.evicted_keys, {\"db\"}, {StrCat(\"db\", i)},\n                          &evict_str);\n    }\n    AppendMetricHeader(\"expired_keys_total\", \"\", MetricType::COUNTER, &resp->body());\n    absl::StrAppend(&resp->body(), exp_str);\n    AppendMetricHeader(\"evicted_keys_total\", \"\", MetricType::COUNTER, &resp->body());\n    absl::StrAppend(&resp->body(), evict_str);\n  }\n\n  // Memory stats\n  if (legacy) {\n    AppendMetricWithoutLabels(\"memory_fiberstack_vms_bytes\",\n                              \"virtual memory size used by all the fibers\",\n                              m.worker_fiber_stack_size, MetricType::GAUGE, &resp->body());\n\n    AppendMetricWithoutLabels(\n        \"commands_squashing_replies_bytes\", \"\",\n        m.facade_stats.reply_stats.squashing_current_reply_size.load(memory_order_relaxed),\n        MetricType::GAUGE, &resp->body());\n\n    AppendMetricWithoutLabels(\"tls_bytes\", \"\", m.tls_bytes, MetricType::GAUGE, &resp->body());\n    AppendMetricWithoutLabels(\"snapshot_serialization_bytes\", \"\", m.serialization_bytes,\n                              MetricType::GAUGE, &resp->body());\n\n    AppendMetricWithoutLabels(\"used_memory_lua\", \"\", m.lua_stats.used_bytes, MetricType::GAUGE,\n                              &resp->body());\n\n    AppendMetricWithoutLabels(\"client_read_buffer_bytes\", \"\", conn_stats.read_buf_capacity,\n                              MetricType::GAUGE, &resp->body());\n    AppendMetricWithoutLabels(\"dispatch_queue_bytes\", \"\", conn_stats.dispatch_queue_bytes,\n                              MetricType::GAUGE, &resp->body());\n    AppendMetricWithoutLabels(\"pipeline_queue_bytes\", \"\", conn_stats.pipeline_queue_bytes,\n                              MetricType::GAUGE, &resp->body());\n    AppendMetricWithoutLabels(\"pipeline_cmd_cache_bytes\", \"\", conn_stats.pipeline_cmd_cache_bytes,\n                              MetricType::GAUGE, &resp->body());\n  }\n\n  string memory_by_class_bytes;\n  AppendMetricHeader(\"memory_by_class_bytes\", \"Memory metrics\", MetricType::GAUGE,\n                     &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", m.lua_stats.used_bytes, {\"class\"}, {\"used_lua\"},\n                    &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", m.worker_fiber_stack_size, {\"class\"},\n                    {\"fiberstack_vms\"}, &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", m.tls_bytes, {\"class\"}, {\"tls\"},\n                    &memory_by_class_bytes);\n\n  const size_t squashed =\n      m.facade_stats.reply_stats.squashing_current_reply_size.load(memory_order_relaxed);\n\n  AppendMetricValue(\"memory_by_class_bytes\", squashed, {\"class\"}, {\"commands_squashing_replies\"},\n                    &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", conn_stats.pipeline_cmd_cache_bytes, {\"class\"},\n                    {\"pipeline_cmd_cache\"}, &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", conn_stats.pipeline_queue_bytes, {\"class\"},\n                    {\"pipeline_queue\"}, &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", conn_stats.dispatch_queue_bytes, {\"class\"},\n                    {\"dispatch_queue\"}, &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", conn_stats.read_buf_capacity, {\"class\"},\n                    {\"client_read_buffer\"}, &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", total.table_mem_usage, {\"class\"}, {\"table_used\"},\n                    &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", total.obj_memory_usage, {\"class\"}, {\"object_used\"},\n                    &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", m.coordinator_stats.stored_cmd_bytes, {\"class\"},\n                    {\"conn_stored_commands\"}, &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", m.search_stats.used_memory, {\"class\"}, {\"search_used\"},\n                    &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", m.interned_string_stats.pool_bytes, {\"class\"},\n                    {\"interned_string_pool\"}, &memory_by_class_bytes);\n\n  AppendMetricValue(\"memory_by_class_bytes\", m.interned_string_stats.pool_table_bytes, {\"class\"},\n                    {\"interned_string_table\"}, &memory_by_class_bytes);\n\n  // Interned string stats\n  AppendMetricWithoutLabels(\"interned_string_entries\", \"Number of unique interned strings\",\n                            m.interned_string_stats.pool_entries, MetricType::GAUGE, &resp->body());\n  AppendMetricWithoutLabels(\"interned_string_hits_total\", \"Interned string pool hits\",\n                            m.interned_string_stats.hits, MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"interned_string_misses_total\", \"Interned string pool misses\",\n                            m.interned_string_stats.misses, MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"interned_string_entries_dedup_factor\",\n                            \"Deduplication achieved by interned strings\",\n                            m.interned_string_stats.pool_entries == 0\n                                ? 0.0\n                                : static_cast<double>(m.interned_string_stats.live_references) /\n                                      static_cast<double>(m.interned_string_stats.pool_entries),\n                            MetricType::GAUGE, &resp->body());\n\n  // Command stats\n  if (!m.cmd_stats_map.empty()) {\n    string command_metrics;\n\n    AppendMetricHeader(\"commands_total\", \"Total number of commands executed\", MetricType::COUNTER,\n                       &command_metrics);\n    for (const auto& [name, stat] : m.cmd_stats_map) {\n      const auto calls = stat.first;\n      AppendMetricValue(\"commands_total\", calls, {\"cmd\"}, {name}, &command_metrics);\n    }\n\n    AppendMetricHeader(\"commands_duration_seconds\", \"Duration of commands in seconds\",\n                       MetricType::COUNTER, &command_metrics);\n    for (const auto& [name, stat] : m.cmd_stats_map) {\n      const double duration_seconds = stat.second * 1e-6;\n      AppendMetricValue(\"commands_duration_seconds\", duration_seconds, {\"cmd\"}, {name},\n                        &command_metrics);\n    }\n\n    absl::StrAppend(&resp->body(), command_metrics);\n  }\n\n  if (m.replica_side_info) {  // replica side\n    const auto reconnect_count = m.replica_side_info->summary.reconnect_count;\n    AppendMetricWithoutLabels(\"replica_reconnect_count\", \"Number of replica reconnects\",\n                              reconnect_count, MetricType::COUNTER, &resp->body());\n  } else {  // Master side\n    string replication_lag_metrics;\n    vector<ReplicaRoleInfo> replicas_info = dfly_cmd->GetReplicasRoleInfo();\n    ReplicationMemoryStats repl_mem;\n    dfly_cmd->GetReplicationMemoryStats(&repl_mem);\n    if (legacy) {\n      AppendMetricWithoutLabels(\n          \"replication_streaming_bytes\", \"Stable sync replication memory usage\",\n          repl_mem.streamer_buf_capacity_bytes, MetricType::GAUGE, &resp->body());\n      AppendMetricWithoutLabels(\"replication_full_sync_bytes\", \"Full sync memory usage\",\n                                repl_mem.full_sync_buf_bytes, MetricType::GAUGE, &resp->body());\n    }\n    AppendMetricValue(\"memory_by_class_bytes\", repl_mem.streamer_buf_capacity_bytes, {\"class\"},\n                      {\"replication_streaming\"}, &memory_by_class_bytes);\n    AppendMetricValue(\"memory_by_class_bytes\", repl_mem.full_sync_buf_bytes, {\"class\"},\n                      {\"replication_full_sync\"}, &memory_by_class_bytes);\n\n    AppendMetricWithoutLabels(\"replication_psync_count\", \"Pync count\",\n                              m.coordinator_stats.psync_requests_total, MetricType::COUNTER,\n                              &resp->body());\n    AppendMetricHeader(\"connected_replica_lag_records\", \"Lag in records of a connected replica.\",\n                       MetricType::GAUGE, &replication_lag_metrics);\n    for (const auto& replica : replicas_info) {\n      AppendMetricValue(\"connected_replica_lag_records\", replica.lsn_lag,\n                        {\"replica_ip\", \"replica_port\", \"replica_state\"},\n                        {replica.address, absl::StrCat(replica.listening_port), replica.state},\n                        &replication_lag_metrics);\n    }\n    absl::StrAppend(&resp->body(), replication_lag_metrics);\n  }\n\n  AppendMetricWithoutLabels(\"fiber_switch_total\", \"\", m.fiber_switch_cnt, MetricType::COUNTER,\n                            &resp->body());\n  double delay_seconds = m.fiber_switch_delay_usec * 1e-6;\n  AppendMetricWithoutLabels(\"fiber_switch_delay_seconds_total\", \"\", delay_seconds,\n                            MetricType::COUNTER, &resp->body());\n\n  AppendMetricWithoutLabels(\"fiber_longrun_total\", \"\", m.fiber_longrun_cnt, MetricType::COUNTER,\n                            &resp->body());\n  double longrun_seconds = m.fiber_longrun_usec * 1e-6;\n  AppendMetricWithoutLabels(\"fiber_longrun_seconds\", \"\", longrun_seconds, MetricType::COUNTER,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"tx_queue_len\", \"\", m.tx_queue_len, MetricType::GAUGE, &resp->body());\n\n  {\n    bool added = false;\n    string str;\n    AppendMetricHeader(\"transaction_widths_total\", \"Transaction counts by their widths\",\n                       MetricType::COUNTER, &str);\n\n    for (unsigned width = 0; width < shard_set->size(); ++width) {\n      uint64_t count = m.coordinator_stats.tx_width_freq_arr[width];\n\n      if (count > 0) {\n        AppendMetricValue(\"transaction_widths_total\", count, {\"width\"}, {StrCat(\"w\", width + 1)},\n                          &str);\n        added = true;\n      }\n    }\n    if (added)\n      absl::StrAppend(&resp->body(), str);\n  }\n\n  if (IsClusterEnabled()) {\n    string migration_errors_str;\n    AppendMetricHeader(\"migration_errors_total\", \"Total error numbers of current migrations\",\n                       MetricType::GAUGE, &migration_errors_str);\n    AppendMetricValue(\"migration_errors_total\", m.migration_errors_total, {\"num\"},\n                      {\"migration errors\"}, &migration_errors_str);\n    absl::StrAppend(&resp->body(), migration_errors_str);\n\n    string moved_errors_str;\n    uint64_t moved_total_errors = 0;\n    if (m.facade_stats.reply_stats.err_count.contains(\"MOVED\")) {\n      moved_total_errors = m.facade_stats.reply_stats.err_count.at(\"MOVED\");\n    }\n    AppendMetricHeader(\"moved_errors_total\", \"Total number of moved slot errors\",\n                       MetricType::COUNTER, &moved_errors_str);\n    AppendMetricValue(\"moved_errors_total\", moved_total_errors, {\"num\"}, {\"moved errors\"},\n                      &moved_errors_str);\n    absl::StrAppend(&resp->body(), moved_errors_str);\n  }\n\n  string db_key_metrics, db_key_expire_metrics, db_capacity_metrics;\n\n  AppendMetricHeader(\"db_keys\", \"Total number of keys by DB\", MetricType::GAUGE, &db_key_metrics);\n  AppendMetricHeader(\"db_capacity\", \"Table capacity by DB\", MetricType::GAUGE,\n                     &db_capacity_metrics);\n\n  AppendMetricHeader(\"db_keys_expiring\", \"Total number of expiring keys by DB\", MetricType::GAUGE,\n                     &db_key_expire_metrics);\n\n  for (size_t i = 0; i < m.db_stats.size(); ++i) {\n    AppendMetricValue(\"db_keys\", m.db_stats[i].key_count, {\"db\"}, {StrCat(\"db\", i)},\n                      &db_key_metrics);\n    AppendMetricValue(\"db_capacity\", m.db_stats[i].prime_capacity, {\"db\"}, {StrCat(\"db\", i)},\n                      &db_capacity_metrics);\n\n    AppendMetricValue(\"db_keys_expiring\", m.db_stats[i].expire_count, {\"db\"}, {StrCat(\"db\", i)},\n                      &db_key_expire_metrics);\n\n    AppendMetricValue(\"keyspace_hits_total\", m.db_stats[i].events.hits, {\"db\"}, {StrCat(\"db\", i)},\n                      &resp->body());\n    AppendMetricValue(\"keyspace_misses_total\", m.db_stats[i].events.misses, {\"db\"},\n                      {StrCat(\"db\", i)}, &resp->body());\n  }\n\n  absl::StrAppend(&resp->body(), db_key_metrics, db_key_expire_metrics, db_capacity_metrics,\n                  memory_by_class_bytes);\n\n  AppendMetricWithoutLabels(\"defrag_invocations\", \"Defrag invocations\",\n                            m.shard_stats.defrag_task_invocation_total, MetricType::COUNTER,\n                            &resp->body());\n  AppendMetricWithoutLabels(\"defrag_attempts\", \"Objects examined\",\n                            m.shard_stats.defrag_attempt_total, MetricType::COUNTER, &resp->body());\n  AppendMetricWithoutLabels(\"defrag_objects_moved\", \"Objects moved\",\n                            m.shard_stats.defrag_realloc_total, MetricType::COUNTER, &resp->body());\n\n  AppendMetricHeader(\"defrag_skipped_total\", \"Defrag tasks skipped\", MetricType::COUNTER,\n                     &resp->body());\n  AppendMetricValue(\"defrag_skipped_total\", m.shard_stats.defrag_skipped_mem_under_threshold,\n                    {\"reason\"}, {\"mem_under_threshold\"}, &resp->body());\n  AppendMetricValue(\"defrag_skipped_total\", m.shard_stats.defrag_skipped_within_check_interval,\n                    {\"reason\"}, {\"within_check_interval\"}, &resp->body());\n  AppendMetricValue(\"defrag_skipped_total\", m.shard_stats.defrag_skipped_not_enough_fragmentation,\n                    {\"reason\"}, {\"not_enough_fragmentation\"}, &resp->body());\n\n  AppendMetricWithoutLabels(\"huffman_tables_built\", \"Huffman tables built\",\n                            m.shard_stats.huffman_tables_built, MetricType::COUNTER, &resp->body());\n\n  AppendMetricHeader(\"list_reads\", \"List Reads Patterns\", MetricType::COUNTER, &resp->body());\n  AppendMetricValue(\"list_reads\", m.qlist_stats.total_node_reads, {\"type\"}, {\"total\"},\n                    &resp->body());\n  AppendMetricValue(\"list_reads\", m.qlist_stats.interior_node_reads, {\"type\"}, {\"interior\"},\n                    &resp->body());\n\n  // Tiered metrics\n  {\n    AppendMetricWithoutLabels(\"tiered_entries\", \"Tiered entries\", total.tiered_entries,\n                              MetricType::GAUGE, &resp->body());\n\n    // Bytes: used, allocated, capacity\n    AppendMetricHeader(\"tiered_bytes\", \"Tiered bytes\", MetricType::GAUGE, &resp->body());\n    AppendMetricValue(\"tiered_bytes\", total.tiered_used_bytes, {\"type\"}, {\"used\"}, &resp->body());\n    AppendMetricValue(\"tiered_bytes\", m.tiered_stats.cold_storage_bytes, {\"type\"}, {\"cold\"},\n                      &resp->body());\n    AppendMetricValue(\"tiered_bytes\", m.tiered_stats.allocated_bytes, {\"type\"}, {\"allocated\"},\n                      &resp->body());\n    AppendMetricValue(\"tiered_bytes\", m.tiered_stats.capacity_bytes, {\"type\"}, {\"capacity\"},\n                      &resp->body());\n\n    // Events: stash, fetch, upload, cancel\n    AppendMetricHeader(\"tiered_events\", \"Tiered events\", MetricType::COUNTER, &resp->body());\n    AppendMetricValue(\"tiered_events\", m.tiered_stats.total_stashes, {\"type\"}, {\"stash\"},\n                      &resp->body());\n    AppendMetricValue(\"tiered_events\", m.tiered_stats.total_fetches, {\"type\"}, {\"fetch\"},\n                      &resp->body());\n    AppendMetricValue(\"tiered_events\", m.tiered_stats.total_uploads, {\"type\"}, {\"upload\"},\n                      &resp->body());\n    AppendMetricValue(\"tiered_events\", m.tiered_stats.total_cancels, {\"type\"}, {\"cancel\"},\n                      &resp->body());\n    AppendMetricValue(\"tiered_events\", m.tiered_stats.total_deletes, {\"type\"}, {\"delete\"},\n                      &resp->body());\n\n    // Hits: ram, cool, missed\n    AppendMetricHeader(\"tiered_hits\", \"Tiered hits\", MetricType::COUNTER, &resp->body());\n    AppendMetricValue(\"tiered_hits\", m.events.ram_hits, {\"type\"}, {\"ram\"}, &resp->body());\n    AppendMetricValue(\"tiered_hits\", m.events.ram_cool_hits, {\"type\"}, {\"cool\"}, &resp->body());\n    AppendMetricValue(\"tiered_hits\", m.events.ram_misses, {\"type\"}, {\"disk\"}, &resp->body());\n\n    // Potential problems due to overloading system\n    AppendMetricHeader(\"tiered_overload\", \"Potential problems due to overloading\",\n                       MetricType::COUNTER, &resp->body());\n    AppendMetricValue(\"tiered_overload\", m.tiered_stats.total_clients_throttled, {\"type\"},\n                      {\"client throttling\"}, &resp->body());\n    AppendMetricValue(\"tiered_overload\", m.tiered_stats.total_stash_overflows, {\"type\"},\n                      {\"stash overflows\"}, &resp->body());\n\n    AppendMetricHeader(\"tiered_list_events\", \"Tiered List Events\", MetricType::COUNTER,\n                       &resp->body());\n    AppendMetricValue(\"tiered_list_events\", m.qlist_stats.offload_requests, {\"type\"}, {\"offload\"},\n                      &resp->body());\n    AppendMetricValue(\"tiered_list_events\", m.qlist_stats.onload_requests, {\"type\"}, {\"onload\"},\n                      &resp->body());\n  }\n\n  // Stream access pattern metrics\n  if (m.shard_stats.stream_sequential_accesses || m.shard_stats.stream_random_accesses ||\n      m.shard_stats.stream_fetch_all_accesses) {\n    AppendMetricHeader(\"stream_accesses_total\", \"Total stream accesses by type\",\n                       MetricType::COUNTER, &resp->body());\n    AppendMetricValue(\"stream_accesses_total\", m.shard_stats.stream_sequential_accesses,\n                      {\"access_type\"}, {\"sequential\"}, &resp->body());\n    AppendMetricValue(\"stream_accesses_total\", m.shard_stats.stream_random_accesses,\n                      {\"access_type\"}, {\"random\"}, &resp->body());\n    AppendMetricValue(\"stream_accesses_total\", m.shard_stats.stream_fetch_all_accesses,\n                      {\"access_type\"}, {\"fetch_all\"}, &resp->body());\n  }\n}\n\nvoid ServerFamily::ConfigureMetrics(util::HttpListenerBase* http_base) {\n  // The naming of the metrics should be compatible with redis_exporter, see\n  // https://github.com/oliver006/redis_exporter/blob/master/exporter/exporter.go#L111\n\n  auto cb = [this](const util::http::QueryArgs& args, util::HttpContext* send) {\n    StringResponse resp = util::http::MakeStringResponse(boost::beast::http::status::ok);\n    util::http::SetMime(util::http::kTextMime, &resp);\n    uint64_t uptime = time(NULL) - start_time_;\n    PrintPrometheusMetrics(uptime, GetMetrics(&namespaces->GetDefaultNamespace()), dfly_cmd_.get(),\n                           &resp, legacy_format_metrics_);\n    return send->Invoke(std::move(resp));\n  };\n\n  http_base->RegisterCb(\"/metrics\", cb);\n}\n\nvoid ServerFamily::PauseReplication(bool pause) {\n  util::fb2::LockGuard lk(replicaof_mu_);\n\n  // Switch to primary mode.\n  if (!IsMaster()) {\n    auto repl_ptr = replica_;\n    CHECK(repl_ptr);\n    repl_ptr->Pause(pause);\n  }\n}\n\nstd::optional<ReplicaOffsetInfo> ServerFamily::GetReplicaOffsetInfo() {\n  util::fb2::LockGuard lk(replicaof_mu_);\n\n  // Switch to primary mode.\n  if (!IsMaster()) {\n    auto repl_ptr = replica_;\n    CHECK(repl_ptr);\n    return ReplicaOffsetInfo{repl_ptr->GetSyncId(), repl_ptr->GetReplicaOffset()};\n  }\n  return nullopt;\n}\n\nvector<facade::Listener*> ServerFamily::GetNonPriviligedListeners() const {\n  std::vector<facade::Listener*> listeners;\n  listeners.reserve(listeners.size());\n  for (facade::Listener* listener : listeners_) {\n    if (!listener->IsPrivilegedInterface()) {\n      listeners.push_back(listener);\n    }\n  }\n  return listeners;\n}\n\nbool ServerFamily::AreAllReplicasInStableSync() const {\n  auto roles = dfly_cmd_->GetReplicasRoleInfo();\n  if (roles.empty()) {\n    return true;\n  }\n  auto match = SyncStateName(DflyCmd::SyncState::STABLE_SYNC);\n  return std::all_of(roles.begin(), roles.end(),\n                     [&match](auto& elem) { return elem.state == match; });\n}\n\noptional<Metrics::ReplicaInfo> ServerFamily::GetReplicaSummary() const {\n  util::fb2::LockGuard lk(replicaof_mu_);\n  if (replica_ == nullptr) {\n    return nullopt;\n  }\n\n  Metrics::ReplicaInfo info;\n  info.summary = replica_->GetSummary();\n  for (const auto& cl_repl : cluster_replicas_) {\n    info.cl_repl_summary.push_back(cl_repl->GetSummary());\n  }\n\n  return info;\n}\n\nvoid ServerFamily::OnClose(ConnectionContext* cntx) {\n  dfly_cmd_->OnClose(cntx->conn_state.replication_info.repl_session_id);\n}\n\nvoid ServerFamily::StatsMC(std::string_view section, CommandContext* cmd_ctx) {\n  if (!section.empty()) {\n    return cmd_ctx->SendError(\"\");\n  }\n  string info;\n\n#define ADD_LINE(name, val) absl::StrAppend(&info, \"STAT \" #name \" \", val, \"\\r\\n\")\n\n  time_t now = time(NULL);\n  struct rusage ru;\n  getrusage(RUSAGE_SELF, &ru);\n\n  auto dbl_time = [](const timeval& tv) -> double {\n    return tv.tv_sec + double(tv.tv_usec) / 1000000.0;\n  };\n\n  double utime = dbl_time(ru.ru_utime);\n  double systime = dbl_time(ru.ru_stime);\n  auto kind = ProactorBase::me()->GetKind();\n  const char* multiplex_api = (kind == ProactorBase::IOURING) ? \"iouring\" : \"epoll\";\n\n  Metrics m = GetMetrics(&namespaces->GetDefaultNamespace());\n  uint64_t uptime = time(NULL) - start_time_;\n\n  const uint32_t total_conns =\n      m.facade_stats.conn_stats.num_conns_main + m.facade_stats.conn_stats.num_conns_other;\n  ADD_LINE(pid, getpid());\n  ADD_LINE(uptime, uptime);\n  ADD_LINE(time, now);\n  ADD_LINE(version, kGitTag);\n  ADD_LINE(libevent, multiplex_api);\n  ADD_LINE(pointer_size, sizeof(void*));\n  ADD_LINE(rusage_user, utime);\n  ADD_LINE(rusage_system, systime);\n  ADD_LINE(max_connections, -1);\n  ADD_LINE(curr_connections, total_conns);\n  ADD_LINE(total_connections, -1);\n  ADD_LINE(rejected_connections, -1);\n  ADD_LINE(bytes_read, m.facade_stats.conn_stats.io_read_bytes);\n  ADD_LINE(bytes_written, m.facade_stats.reply_stats.io_write_bytes);\n  ADD_LINE(limit_maxbytes, -1);\n\n  absl::StrAppend(&info, \"END\\r\\n\");\n\n  MCReplyBuilder* mc_builder = static_cast<MCReplyBuilder*>(cmd_ctx->rb());\n  mc_builder->SendRaw(info);\n\n#undef ADD_LINE\n}\n\nGenericError ServerFamily::DoSave(bool ignore_state) {\n  const CommandId* cid = service().FindCmd(\"SAVE\");\n  CHECK_NOTNULL(cid);\n  boost::intrusive_ptr<Transaction> trans(new Transaction{cid});\n  trans->InitByArgs(&namespaces->GetDefaultNamespace(), 0, {});\n  return DoSave(SaveCmdOptions{absl::GetFlag(FLAGS_df_snapshot_format), {}, {}}, trans.get(),\n                ignore_state);\n}\n\nGenericError ServerFamily::DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts,\n                                               Transaction* trans, DoSaveCheckAndStartOpts opts) {\n  auto [ignore_state, bg_save] = opts;\n  auto state = ServerState::tlocal()->gstate();\n\n  // In some cases we want to create a snapshot even if server is not active, f.e in takeover\n  if (!ignore_state && (state != GlobalState::ACTIVE && state != GlobalState::SHUTTING_DOWN)) {\n    return GenericError{make_error_code(errc::operation_in_progress),\n                        StrCat(GlobalStateName(state), \" - can not save database\")};\n  }\n\n  std::shared_ptr<SaveStagesController> controller;\n  {\n    util::fb2::LockGuard lk(save_mu_);\n    if (save_controller_) {\n      return GenericError{make_error_code(errc::operation_in_progress),\n                          \"SAVING - can not save database\"};\n    }\n\n    auto snapshot_storage = save_cmd_opts.cloud_uri.empty()\n                                ? snapshot_storage_\n                                : CreateCloudSnapshotStorage(save_cmd_opts.cloud_uri);\n\n    controller = make_shared<SaveStagesController>(detail::SaveStagesInputs{\n        save_cmd_opts.new_version, save_cmd_opts.cloud_uri, save_cmd_opts.basename, trans,\n        &service_, fq_threadpool_.get(), snapshot_storage, opts.bg_save});\n    save_controller_ = controller;\n  }\n\n  // Initialize resources outside of mutex (this may take time for S3 operations)\n  auto res = controller->Init();\n  if (res) {\n    DCHECK_EQ(res->error, true);\n    thread_safe_save_info_.Update([&](SaveInfoData* data) {\n      data->last_error = res->error;\n      data->last_error_time = res->save_time;\n      data->failed_duration_sec = res->duration_sec;\n      if (bg_save) {\n        data->last_bgsave_status = false;\n      }\n    });\n\n    // Reset the controller under lock if initialization failed.\n    util::fb2::LockGuard lk(save_mu_);\n    if (save_controller_ == controller) {\n      save_controller_.reset();\n    }\n    return res->error;\n  }\n\n  // Success - update state\n  controller->Start();\n  thread_safe_save_info_.Update(\n      [bg_save](SaveInfoData* data) { data->bgsave_in_progress = bg_save; });\n\n  return {};\n}\n\nGenericError ServerFamily::WaitUntilSaveFinished(Transaction* trans, bool ignore_state) {\n  std::shared_ptr<SaveStagesController> controller;\n  {\n    util::fb2::LockGuard lk(save_mu_);\n    controller = save_controller_;\n  }\n\n  if (!controller) {\n    return GenericError{make_error_code(errc::operation_not_supported), \"Save not in progress\"};\n  }\n\n  controller->WaitAllSnapshots();\n  detail::SaveInfo save_info;\n\n  VLOG(1) << \"Before WaitUntilSaveFinished::Finalize\";\n  bool is_bg_save;\n  {\n    util::fb2::LockGuard lk(save_mu_);\n    // It's possible that another save was initiated and the controller has changed.\n    // We only finalize and reset if it's still the same one we were waiting for.\n    if (save_controller_ == controller) {\n      save_info = save_controller_->Finalize();\n      is_bg_save = save_controller_->IsBgSave();\n      save_controller_.reset();\n    } else {\n      // Another save has started. The old one is already finalized by the new one.\n      // We just need to get the info.\n      return GenericError(\"Save operation was superseded by another save\");\n    }\n  }\n\n  thread_safe_save_info_.Update([&](SaveInfoData* data) {\n    if (is_bg_save) {\n      data->bgsave_in_progress = false;\n      data->last_bgsave_status = !save_info.error;\n    }\n\n    if (save_info.error) {\n      data->last_error = save_info.error;\n      data->last_error_time = save_info.save_time;\n      data->failed_duration_sec = save_info.duration_sec;\n    } else {\n      data->save_time = save_info.save_time;\n      data->success_duration_sec = save_info.duration_sec;\n      data->file_name = save_info.file_name;\n      data->freq_map = save_info.freq_map;\n    }\n  });\n\n  return save_info.error;\n}\n\nGenericError ServerFamily::DoSave(const SaveCmdOptions& save_cmd_opts, Transaction* trans,\n                                  bool ignore_state) {\n  DoSaveCheckAndStartOpts opts{.ignore_state = ignore_state};\n  if (auto ec = DoSaveCheckAndStart(save_cmd_opts, trans, opts); ec) {\n    return ec;\n  }\n\n  return WaitUntilSaveFinished(trans, ignore_state);\n}\n\nbool ServerFamily::TEST_IsSaving() const {\n  std::atomic_bool is_saving{false};\n  shard_set->pool()->AwaitFiberOnAll([&](auto*) {\n    if (SliceSnapshot::IsSnaphotInProgress())\n      is_saving.store(true, std::memory_order_relaxed);\n  });\n  return is_saving.load(std::memory_order_relaxed);\n}\n\nvoid ServerFamily::Drakarys(Transaction* transaction, DbIndex db_ind, bool wait) {\n  VLOG(1) << \"Drakarys\";\n\n  vector<fb2::Fiber> fibers(shard_set->size());\n  transaction->Execute(\n      [db_ind, &fibers](Transaction* t, EngineShard* shard) {\n        fibers[shard->shard_id()] = t->GetDbSlice(shard->shard_id()).FlushDb(db_ind);\n        return OpStatus::OK;\n      },\n      true);\n\n  auto action = wait ? &fb2::Fiber::JoinIfNeeded : &fb2::Fiber::Detach;\n  for (auto& f : fibers)\n    (f.*action)();\n}\n\nSaveInfoData ServerFamily::GetLastSaveInfo() const {\n  return thread_safe_save_info_.Get();\n}\n\nvoid ServerFamily::DbSize(CmdArgList args, CommandContext* cmd_cntx) {\n  atomic_ulong num_keys{0};\n\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  shard_set->RunBriefInParallel(\n      [&](EngineShard* shard) {\n        auto db_size = cntx->ns->GetDbSlice(shard->shard_id()).DbSize(cntx->conn_state.db_index);\n        num_keys.fetch_add(db_size, memory_order_relaxed);\n      },\n      [](ShardId) { return true; });\n\n  return cmd_cntx->rb()->SendLong(num_keys.load(memory_order_relaxed));\n}\n\nvoid ServerFamily::CancelBlockingOnThread(std::function<OpStatus(ArgSlice)> status_cb) {\n  auto cb = [status_cb](unsigned thread_index, util::Connection* conn) {\n    if (auto fcntx = static_cast<facade::Connection*>(conn)->cntx(); fcntx) {\n      auto* cntx = static_cast<ConnectionContext*>(fcntx);\n      if (cntx->transaction) {\n        cntx->transaction->CancelBlocking(status_cb);\n      }\n    }\n  };\n\n  for (auto* listener : listeners_) {\n    listener->TraverseConnectionsOnThread(cb, UINT32_MAX, nullptr);\n  }\n}\n\nstring GetPassword() {\n  string flag = GetFlag(FLAGS_requirepass);\n  if (!flag.empty()) {\n    return flag;\n  }\n\n  const char* env_var = getenv(\"DFLY_PASSWORD\");\n  if (env_var) {\n    return env_var;\n  }\n\n  return \"\";\n}\n\nvoid ServerFamily::SendInvalidationMessages() const {\n  // send invalidation message (caused by flushdb) to all the clients which\n  // turned on client tracking\n  auto cb = [](unsigned thread_index, util::Connection* conn) {\n    facade::ConnectionContext* fc = static_cast<facade::Connection*>(conn)->cntx();\n    if (fc) {\n      ConnectionContext* cntx = static_cast<ConnectionContext*>(fc);\n      if (cntx->conn_state.tracking_info_.IsTrackingOn()) {\n        facade::Connection::InvalidationMessage x;\n        x.invalidate_due_to_flush = true;\n        cntx->conn()->SendInvalidationMessageAsync(x);\n      }\n    }\n  };\n  for (auto* listener : listeners_) {\n    listener->TraverseConnections(cb);\n  }\n}\n\nvoid ServerFamily::FlushDb(CmdArgList args, CommandContext* cmd_cntx) {\n  if (args.size() > 1)\n    return cmd_cntx->SendError(kSyntaxErr);\n\n  bool sync = CmdArgParser{args}.Check(\"SYNC\");\n  string_view cmd_name = cmd_cntx->tx()->GetCId()->name();\n  DbIndex index = cmd_name == \"FLUSHALL\" ? DbSlice::kDbAll : cmd_cntx->tx()->GetDbIndex();\n  Drakarys(cmd_cntx->tx(), index, sync);\n  SendInvalidationMessages();\n  cmd_cntx->rb()->SendOk();\n}\n\nbool ServerFamily::DoAuth(ConnectionContext* cntx, std::string_view username,\n                          std::string_view password) {\n  const auto* registry = ServerState::tlocal()->user_registry;\n  CHECK(registry);\n  const bool is_authorized = registry->AuthUser(username, password);\n  if (is_authorized) {\n    cntx->authed_username = username;\n    auto cred = registry->GetCredentials(username);\n    cntx->acl_commands = cred.acl_commands;\n    cntx->keys = std::move(cred.keys);\n    cntx->pub_sub = std::move(cred.pub_sub);\n    cntx->ns = &namespaces->GetOrInsert(cred.ns);\n    cntx->authenticated = true;\n    cntx->acl_db_idx = cred.db;\n    if (cred.db == std::numeric_limits<size_t>::max()) {\n      cntx->conn_state.db_index = 0;\n    } else {\n      auto cb = [ns = cntx->ns, index = cred.db](EngineShard* shard) {\n        auto& db_slice = ns->GetDbSlice(shard->shard_id());\n        db_slice.ActivateDb(index);\n        return OpStatus::OK;\n      };\n      shard_set->RunBriefInParallel(std::move(cb));\n      cntx->conn_state.db_index = cred.db;\n    }\n  }\n  return is_authorized;\n}\n\nvoid ServerFamily::Auth(CmdArgList args, CommandContext* cmd_cntx) {\n  if (args.size() > 2) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  ConnectionContext* cntx = cmd_cntx->server_conn_cntx();\n  // non admin port auth\n  if (!cntx->conn()->IsPrivileged()) {\n    const bool one_arg = args.size() == 1;\n    std::string_view username = one_arg ? \"default\" : facade::ToSV(args[0]);\n    const size_t index = one_arg ? 0 : 1;\n    std::string_view password = facade::ToSV(args[index]);\n    if (DoAuth(cntx, username, password)) {\n      return cmd_cntx->rb()->SendOk();\n    }\n    auto& log = ServerState::tlocal()->acl_log;\n    using Reason = acl::AclLog::Reason;\n    log.Add(*cntx, \"AUTH\", Reason::AUTH, std::string(username));\n    return cmd_cntx->SendError(facade::kAuthRejected, facade::kNoAuthErrType);\n  }\n\n  if (!cntx->req_auth) {\n    return cmd_cntx->SendError(\n        \"AUTH <password> called without any password configured for \"\n        \"admin port. Are you sure your configuration is correct?\");\n  }\n\n  string_view pass = ArgS(args, 0);\n  if (pass == GetPassword()) {\n    cntx->authenticated = true;\n    cmd_cntx->rb()->SendOk();\n  } else {\n    return cmd_cntx->SendError(facade::kAuthRejected, facade::kNoAuthErrType);\n  }\n}\n\nvoid ServerFamily::ClientUnPauseCmd(CmdArgList args, CommandContext* cmd_cntx) {\n  if (!args.empty()) {\n    return cmd_cntx->SendError(facade::kSyntaxErr);\n  }\n  is_c_pause_in_progress_.store(false, std::memory_order_relaxed);\n  cmd_cntx->rb()->SendOk();\n}\n\nvoid ServerFamily::ChangeConnectionAccept(bool accept) {\n  DCHECK_NE(accept, accepting_connections_);\n  auto h = accept ? &ListenerInterface::resume_accepting : &ListenerInterface::pause_accepting;\n  for (auto* listener : GetNonPriviligedListeners())\n    listener->socket()->proactor()->Await([listener, h]() { (listener->*h)(); });\n  accepting_connections_ = accept;\n}\n\nvoid ClientHelp(SinkReplyBuilder* builder) {\n  string_view help_arr[] = {\n      \"CLIENT <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n      \"CACHING (YES|NO)\",\n      \"    Enable/disable tracking of the keys for next command in OPTIN/OPTOUT modes.\",\n      \"GETNAME\",\n      \"    Return the name of the current connection.\",\n      \"ID\",\n      \"    Return the ID of the current connection.\",\n      \"KILL <ip:port>\",\n      \"    Kill connection made from <ip:port>.\",\n      \"KILL <option> <value> [<option> <value> [...]]\",\n      \"    Kill connections. Options are:\",\n      \"    * ADDR (<ip:port>|<unixsocket>:0)\",\n      \"      Kill connections made from the specified address\",\n      \"    * LADDR (<ip:port>|<unixsocket>:0)\",\n      \"      Kill connections made to specified local address\",\n      \"    * ID <client-id>\",\n      \"      Kill connections by client id.\",\n      \"INFO\",\n      \"    Return information about the current client connection.\",\n      \"LIST\",\n      \"    Return information about client connections.\",\n      \"UNPAUSE\",\n      \"    Stop the current client pause, resuming traffic.\",\n      \"PAUSE <timeout> [WRITE|ALL]\",\n      \"    Suspend all, or just write, clients for <timeout> milliseconds.\",\n      \"SETNAME <name>\",\n      \"    Assign the name <name> to the current connection.\",\n      \"SETINFO <option> <value>\",\n      \"Set client meta attr. Options are:\",\n      \"    * LIB-NAME: the client lib name.\",\n      \"    * LIB-VER: the client lib version.\",\n      \"TRACKING (ON|OFF) [OPTIN] [OPTOUT] [NOLOOP]\",\n      \"    Control server assisted client side caching.\",\n      \"MIGRATE <client-id> <tid>\",\n      \"    Migrates connection specified by client-id to the specified thread id.\",\n      \"HELP\",\n      \"    Print this help.\"};\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  return rb->SendSimpleStrArr(help_arr);\n}\n\nvoid ServerFamily::Client(CmdArgList args, CommandContext* cmd_cntx) {\n  string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n  CmdArgList sub_args = args.subspan(1);\n  auto* builder = cmd_cntx->rb();\n\n  if (sub_cmd == \"SETNAME\") {\n    return ClientSetName(sub_args, cmd_cntx);\n  } else if (sub_cmd == \"GETNAME\") {\n    return ClientGetName(sub_args, cmd_cntx);\n  } else if (sub_cmd == \"INFO\") {\n    return ClientInfo(sub_args, cmd_cntx);\n  } else if (sub_cmd == \"LIST\") {\n    return ClientList(sub_args, absl::MakeSpan(listeners_), cmd_cntx);\n  } else if (sub_cmd == \"PAUSE\") {\n    return ClientPauseCmd(sub_args, cmd_cntx);\n  } else if (sub_cmd == \"UNPAUSE\") {\n    return ClientUnPauseCmd(sub_args, cmd_cntx);\n  } else if (sub_cmd == \"TRACKING\") {\n    return ClientTracking(sub_args, cmd_cntx);\n  } else if (sub_cmd == \"KILL\") {\n    return ClientKill(sub_args, absl::MakeSpan(listeners_), cmd_cntx);\n  } else if (sub_cmd == \"CACHING\") {\n    return ClientCaching(sub_args, cmd_cntx);\n  } else if (sub_cmd == \"SETINFO\") {\n    return ClientSetInfo(sub_args, cmd_cntx);\n  } else if (sub_cmd == \"ID\") {\n    return ClientId(sub_args, cmd_cntx);\n  } else if (sub_cmd == \"MIGRATE\") {\n    return ClientMigrate(sub_args, absl::MakeSpan(listeners_), cmd_cntx);\n  } else if (sub_cmd == \"HELP\") {\n    return ClientHelp(builder);\n  }\n\n  return cmd_cntx->SendError(UnknownSubCmd(sub_cmd, \"CLIENT\"), kSyntaxErrType);\n}\n\nvoid ServerFamily::Config(CmdArgList args, CommandContext* cmd_cntx) {\n  string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n\n  auto* builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (sub_cmd == \"HELP\") {\n    string_view help_arr[] = {\n        \"CONFIG <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n        \"GET <pattern>\",\n        \"    Return parameters matching the glob-like <pattern> and their values.\",\n        \"SET <directive> <value>\",\n        \"    Set the configuration <directive> to <value>.\",\n        \"RESETSTAT\",\n        \"    Reset statistics reported by the INFO command.\",\n        \"REWRITE\",\n        \"    Rewrite the configuration file with the current configuration.\",\n        \"HELP\",\n        \"    Prints this help.\",\n    };\n\n    return builder->SendSimpleStrArr(help_arr);\n  }\n\n  if (sub_cmd == \"SET\") {\n    if (args.size() != 3) {\n      return cmd_cntx->SendError(WrongNumArgsError(\"config|set\"), kConfigErrType);\n    }\n\n    string param = absl::AsciiStrToLower(ArgS(args, 1));\n\n    ConfigRegistry::SetResult result = config_registry.Set(param, ArgS(args, 2));\n\n    const char kErrPrefix[] = \"CONFIG SET failed (possibly related to argument '\";\n    switch (result) {\n      case ConfigRegistry::SetResult::OK:\n        return builder->SendOk();\n      case ConfigRegistry::SetResult::UNKNOWN:\n        return cmd_cntx->SendError(\n            absl::StrCat(\"Unknown option or number of arguments for CONFIG SET - '\", param, \"'\"),\n            kConfigErrType);\n\n      case ConfigRegistry::SetResult::READONLY:\n        return cmd_cntx->SendError(\n            absl::StrCat(kErrPrefix, param, \"') - can't set immutable config\"), kConfigErrType);\n      case ConfigRegistry::SetResult::INVALID:\n        return cmd_cntx->SendError(absl::StrCat(kErrPrefix, param, \"') - argument can not be set\"),\n                                   kConfigErrType);\n    }\n    ABSL_UNREACHABLE();\n  }\n\n  if (sub_cmd == \"GET\" && args.size() == 2) {\n    vector<string> res;\n    string_view param = ArgS(args, 1);\n\n    // Support 'databases' for backward compatibility.\n    if (param == \"databases\") {\n      res.emplace_back(param);\n      res.push_back(absl::StrCat(absl::GetFlag(FLAGS_dbnum)));\n    } else {\n      vector<string> names = config_registry.List(param);\n\n      for (const auto& name : names) {\n        auto value = config_registry.Get(name);\n        DCHECK(value.has_value());\n        if (value.has_value()) {\n          // Convert internal name (search_query_string_bytes) back to user-facing format\n          // (search.query-string-bytes)\n          string display_name = DenormalizeConfigName(name);\n          res.push_back(display_name);\n          res.push_back(*value);\n        }\n      }\n    }\n    auto* rb = static_cast<RedisReplyBuilder*>(builder);\n    return rb->SendBulkStrArr(res, CollectionType::MAP);\n  }\n\n  if (sub_cmd == \"REWRITE\") {\n    if (auto ec = RewriteConfigFile(); ec) {\n      return cmd_cntx->SendError(ec.Format(), kConfigErrType);\n    }\n    return builder->SendOk();\n  }\n\n  if (sub_cmd == \"RESETSTAT\") {\n    ResetStat(cmd_cntx->server_conn_cntx()->ns);\n    return builder->SendOk();\n  } else {\n    return cmd_cntx->SendError(UnknownSubCmd(sub_cmd, \"CONFIG\"), kSyntaxErrType);\n  }\n}\n\nvoid ServerFamily::Debug(CmdArgList args, CommandContext* cmd_cntx) {\n  DebugCmd dbg_cmd{this, &service_.cluster_family(), cmd_cntx->server_conn_cntx()};\n\n  return dbg_cmd.Run(args, cmd_cntx);\n}\n\nvoid ServerFamily::Memory(CmdArgList args, CommandContext* cmd_cntx) {\n  MemoryCmd mem_cmd{this, cmd_cntx};\n\n  return mem_cmd.Run(args);\n}\n\nvoid ServerFamily::Shrink(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  auto cb = [key](Transaction* t, EngineShard* shard) -> OpResult<int64_t> {\n    auto& db_slice = t->GetDbSlice(shard->shard_id());\n    auto it = db_slice.FindReadOnly(t->GetDbContext(), key).it;\n    if (!IsValid(it)) {\n      return OpStatus::KEY_NOTFOUND;\n    }\n\n    const PrimeValue& pv = it->second;\n    unsigned encoding = pv.Encoding();\n    unsigned obj_type = pv.ObjType();\n\n    // Only DenseSet-based structures (set or hash with kEncodingStrMap2)\n    if (encoding != kEncodingStrMap2 || (obj_type != OBJ_SET && obj_type != OBJ_HASH)) {\n      return OpStatus::WRONG_TYPE;\n    }\n\n    DenseSet* ds = static_cast<DenseSet*>(pv.RObjPtr());\n    ds->set_time(MemberTimeSeconds(t->GetDbContext().time_now_ms));\n    size_t current_size = ds->UpperBoundSize();\n    size_t bucket_count = ds->BucketCount();\n\n    if (current_size == 0 || bucket_count == 0) {\n      return 0;\n    }\n\n    size_t optimal_size = std::max(size_t(8), absl::bit_ceil(current_size));\n    if (optimal_size >= bucket_count) {\n      return 0;\n    }\n\n    size_t bucket_bytes_before = bucket_count * sizeof(void*);\n    ds->Shrink(optimal_size);\n    size_t bucket_bytes_after = ds->BucketCount() * sizeof(void*);\n\n    return bucket_bytes_before - bucket_bytes_after;\n  };\n\n  OpResult<int64_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  if (result.status() == OpStatus::KEY_NOTFOUND) {\n    return rb->SendNull();\n  }\n  if (result.status() == OpStatus::WRONG_TYPE) {\n    return cmd_cntx->SendError(\"WRONGTYPE Key is not a set or hash with DenseSet encoding\");\n  }\n  if (!result) {\n    return cmd_cntx->SendError(result.status());\n  }\n\n  rb->SendLong(*result);\n}\n\nvoid ServerFamily::BgSaveFb(boost::intrusive_ptr<Transaction> trans) {\n  GenericError ec = WaitUntilSaveFinished(trans.get());\n  if (ec) {\n    LOG(INFO) << \"Error in BgSaveFb: \" << ec.Format();\n  }\n}\n\nstd::optional<SaveCmdOptions> ServerFamily::GetSaveCmdOpts(CmdArgList args,\n                                                           CommandContext* cmd_cntx) {\n  if (args.size() > 3) {\n    cmd_cntx->SendError(kSyntaxErr);\n    return {};\n  }\n\n  SaveCmdOptions save_cmd_opts;\n  save_cmd_opts.new_version = absl::GetFlag(FLAGS_df_snapshot_format);\n\n  if (args.size() >= 1) {\n    string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n    if (sub_cmd == \"DF\") {\n      save_cmd_opts.new_version = true;\n    } else if (sub_cmd == \"RDB\") {\n      save_cmd_opts.new_version = false;\n    } else {\n      cmd_cntx->SendError(UnknownSubCmd(sub_cmd, \"SAVE\"), kSyntaxErrType);\n      return {};\n    }\n  }\n\n  if (args.size() >= 2) {\n    if (detail::IsS3Path(ArgS(args, 1))) {\n#ifdef WITH_AWS\n      save_cmd_opts.cloud_uri = ArgS(args, 1);\n#else\n      LOG(ERROR) << \"Compiled without AWS support\";\n      exit(1);\n#endif\n    } else if (detail::IsGCSPath(ArgS(args, 1))) {\n      save_cmd_opts.cloud_uri = ArgS(args, 1);\n    } else {\n      // no cloud_uri get basename and return\n      save_cmd_opts.basename = ArgS(args, 1);\n      return save_cmd_opts;\n    }\n    // cloud_uri is set so get basename if provided\n    if (args.size() == 3) {\n      save_cmd_opts.basename = ArgS(args, 2);\n    }\n  }\n\n  return save_cmd_opts;\n}\n\n// SAVE [DF|RDB] [CLOUD_URI] [BASENAME]\n// TODO add missing [SCHEDULE]\nvoid ServerFamily::BgSave(CmdArgList args, CommandContext* cmd_cntx) {\n  auto maybe_res = GetSaveCmdOpts(args, cmd_cntx);\n  if (!maybe_res) {\n    return;\n  }\n\n  DoSaveCheckAndStartOpts opts{.bg_save = true};\n  if (auto ec = DoSaveCheckAndStart(*maybe_res, cmd_cntx->tx(), opts); ec) {\n    return cmd_cntx->SendError(ec.Format());\n  }\n  bg_save_fb_.JoinIfNeeded();\n  bg_save_fb_ = fb2::Fiber(\"bg_save_fiber\", &ServerFamily::BgSaveFb, this,\n                           boost::intrusive_ptr<Transaction>(cmd_cntx->tx()));\n  cmd_cntx->rb()->SendOk();\n}\n\n// SAVE [DF|RDB] [CLOUD_URI] [BASENAME]\n// Allows saving the snapshot of the dataset on disk, potentially overriding the format\n// and the snapshot name.\nvoid ServerFamily::Save(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  auto maybe_res = GetSaveCmdOpts(args, cmd_cntx);\n  if (!maybe_res) {\n    return;\n  }\n\n  GenericError ec = DoSave(*maybe_res, cmd_cntx->tx());\n  if (ec) {\n    return cmd_cntx->SendError(ec.Format());\n  } else {\n    rb->SendOk();\n  }\n}\n\nstatic void MergeDbSliceStats(const DbSlice::Stats& src, Metrics* dest) {\n  if (src.db_stats.size() > dest->db_stats.size())\n    dest->db_stats.resize(src.db_stats.size());\n\n  for (size_t i = 0; i < src.db_stats.size(); ++i)\n    dest->db_stats[i] += src.db_stats[i];\n\n  dest->events += src.events;\n  dest->small_string_bytes += src.small_string_bytes;\n}\n\nvoid ServerFamily::ResetStat(Namespace* ns) {\n  shard_set->pool()->AwaitBrief(\n      [registry = service_.mutable_registry(), ns](unsigned index, auto*) {\n        registry->ResetCallStats(index);\n        EngineShard* shard = EngineShard::tlocal();\n        if (shard) {\n          auto& db_slice = ns->GetDbSlice(shard->shard_id());\n          db_slice.ResetEvents();\n        }\n        facade::ResetStats();\n        ServerState::tlocal()->exec_freq_count.clear();\n\n        auto reset_cb = [](uint64_t) -> uint64_t { return 0u; };\n        ServerState::tlocal()->stats.tx_width_freq_arr.apply(reset_cb);\n        ServerState::tlocal()->stats.squash_width_freq_arr.apply(reset_cb);\n      });\n}\n\nMetrics ServerFamily::GetMetrics(Namespace* ns) const {\n  Metrics result;\n  util::fb2::Mutex mu;\n\n  uint64_t start = absl::GetCurrentTimeNanos();\n\n  auto cmd_stat_cb = [&dest = result.cmd_stats_map](string_view name, const CmdCallStats& stat) {\n    auto& [calls, sum] = dest[absl::AsciiStrToLower(name)];\n    calls += stat.first;\n    sum += stat.second;\n  };\n\n  auto cb = [&](unsigned index, ProactorBase* pb) {\n    EngineShard* shard = EngineShard::tlocal();\n    ServerState* ss = ServerState::tlocal();\n\n    lock_guard lk(mu);\n\n    result.fiber_switch_cnt += fb2::FiberSwitchEpoch();\n    result.fiber_switch_delay_usec += fb2::FiberSwitchDelayUsec();\n    result.fiber_longrun_cnt += fb2::FiberLongRunCnt();\n    result.fiber_longrun_usec += fb2::FiberLongRunSumUsec();\n    result.worker_fiber_stack_size += fb2::WorkerFibersStackSize();\n    result.worker_fiber_count += fb2::WorkerFibersCount();\n    result.blocked_tasks += TaskQueue::blocked_submitters();\n\n    result.coordinator_stats.Add(ss->stats);\n\n    result.qps += uint64_t(ss->MovingSum6());\n    result.facade_stats += *tl_facade_stats;\n    result.serialization_bytes += SliceSnapshot::GetThreadLocalMemoryUsage();\n\n    if (shard) {\n      result.heap_used_bytes += shard->UsedMemory();\n      MergeDbSliceStats(ns->GetDbSlice(shard->shard_id()).GetStats(), &result);\n      result.shard_stats += shard->stats();\n\n      if (shard->tiered_storage()) {\n        result.tiered_stats += shard->tiered_storage()->GetStats();\n      }\n\n      if (shard->search_indices()) {\n        result.search_stats += shard->search_indices()->GetStats();\n      }\n\n      result.qlist_stats += QList::stats;\n\n      result.traverse_ttl_per_sec += shard->GetMovingSum6(EngineShard::TTL_TRAVERSE);\n      result.delete_ttl_per_sec += shard->GetMovingSum6(EngineShard::TTL_DELETE);\n      if (result.tx_queue_len < shard->txq()->size())\n        result.tx_queue_len = shard->txq()->size();\n\n      if (shard->journal()) {\n        result.lsn_buffer_size += journal::LsnBufferSize();\n        result.lsn_buffer_bytes += journal::LsnBufferBytes();\n      }\n    }  // if (shard)\n\n    result.tls_bytes += Listener::TLSUsedMemoryThreadLocal();\n    result.refused_conn_max_clients_reached_count += Listener::RefusedConnectionMaxClientsCount();\n\n    result.lua_stats += InterpreterManager::tl_stats();\n\n    auto connections_lib_name_ver_map = facade::Connection::GetLibStatsTL();\n    for (auto& [k, v] : connections_lib_name_ver_map) {\n      result.connections_lib_name_ver_map[k] += v;\n    }\n\n    auto& send_list = facade::SinkReplyBuilder::pending_list;\n    if (!send_list.empty()) {\n      DCHECK(std::is_sorted(send_list.begin(), send_list.end(),\n                            [](const auto& left, const auto& right) {\n                              return left.timestamp_ns < right.timestamp_ns;\n                            }));\n\n      auto& oldest_member = send_list.front();\n      result.oldest_pending_send_ts =\n          min<uint64_t>(result.oldest_pending_send_ts, oldest_member.timestamp_ns);\n    }\n    service_.mutable_registry()->MergeCallStats(index, cmd_stat_cb);\n    result.interned_string_stats += GetInternedStringStats();\n  };  // cb\n\n  service_.proactor_pool().AwaitFiberOnAll(std::move(cb));\n\n  uint64_t after_cb = absl::GetCurrentTimeNanos();\n\n  // Normalize moving average stats\n  result.qps /= 6;\n  result.traverse_ttl_per_sec /= 6;\n  result.delete_ttl_per_sec /= 6;\n\n  if (!IsMaster()) {\n    result.replica_side_info = GetReplicaSummary();\n  }\n\n  {\n    util::fb2::LockGuard lk{loading_stats_mu_};\n    result.loading_stats = loading_stats_;\n  }\n\n  result.migration_errors_total = service_.cluster_family().MigrationsErrorsCount();\n\n  // Update peak stats. We rely on the fact that GetMetrics is called frequently enough to\n  // update peak_stats_ from it.\n  {\n    util::fb2::LockGuard lk{peak_stats_mu_};\n    // Note: PeakStats::conn_dispatch_queue_bytes is a legacy name. It now tracks the combined\n    // server-wide total of dispatch_queue_bytes and pipeline_queue_bytes for ALL connections.\n    UpdateMax(&peak_stats_.conn_dispatch_queue_bytes,\n              result.facade_stats.conn_stats.dispatch_queue_bytes +\n                  result.facade_stats.conn_stats.pipeline_queue_bytes);\n    UpdateMax(&peak_stats_.conn_read_buf_capacity,\n              result.facade_stats.conn_stats.read_buf_capacity);\n    result.peak_stats = peak_stats_;\n  }\n\n  result.peak_stats = peak_stats_;\n  result.cmd_latency_map = service_.mutable_registry()->LatencyMap();\n  result.used_mem_peak = glob_memory_peaks.used.load(memory_order_relaxed);\n  result.used_mem_rss_peak = glob_memory_peaks.rss.load(memory_order_relaxed);\n\n  uint64_t delta_ms = (absl::GetCurrentTimeNanos() - start) / 1'000'000;\n  if (delta_ms > 30) {\n    uint64_t cb_dur = (after_cb - start) / 1'000'000;\n    LOG(INFO) << \"GetMetrics took \" << delta_ms << \" ms, out of which callback took \" << cb_dur\n              << \" ms\";\n  }\n  return result;\n}\n\nstring ServerFamily::FormatInfoMetrics(const Metrics& m, std::string_view section,\n                                       bool priveleged) const {\n  string info;\n  DbStats total;\n\n  for (const auto& db_stats : m.db_stats)\n    total += db_stats;\n\n  auto should_enter = [&](string_view name, bool hidden = false) {\n    if ((!hidden && section.empty()) || section == \"ALL\" || section == name) {\n      auto normalized_name = string{name.substr(0, 1)} + absl::AsciiStrToLower(name.substr(1));\n      absl::StrAppend(&info, info.empty() ? \"\" : \"\\r\\n\", \"# \", normalized_name, \"\\r\\n\");\n      return true;\n    }\n    return false;\n  };\n\n  auto append = [&info](const absl::AlphaNum& a1, const absl::AlphaNum& a2) {\n    absl::StrAppend(&info, a1, \":\", a2, \"\\r\\n\");\n  };\n\n  bool show_managed_info = priveleged || !absl::GetFlag(FLAGS_managed_service_info);\n\n  // For some reason on some distributions (like Fedora and OpenSuse) each call to append\n  // increase the stack usage of this function. So we use the lambda trick to avoid this.\n  // Also, it's more readable.\n  auto add_server_info = [&] {\n    ProactorBase* proactor = ProactorBase::me();\n\n    // proactor might be null in tests.\n    auto kind = proactor ? ProactorBase::me()->GetKind() : ProactorBase::EPOLL;\n    const char* multiplex_api = (kind == ProactorBase::IOURING) ? \"iouring\" : \"epoll\";\n\n    append(\"redis_version\", kRedisVersion);\n    append(\"dragonfly_version\", GetVersion());\n    append(\"redis_mode\", GetRedisMode());\n    append(\"arch_bits\", 64);\n    // Add process_id for Redis compatibility (same order as Redis INFO output).\n    append(\"process_id\", getpid());\n\n    if (show_managed_info) {\n      append(\"os\", GetOSString());\n      append(\"thread_count\", service_.proactor_pool().size());\n    }\n    append(\"multiplexing_api\", multiplex_api);\n    append(\"tcp_port\", GetFlag(FLAGS_port));\n\n    // Add availability_zone if it's not empty\n    const auto& az = GetFlag(FLAGS_availability_zone);\n    if (!az.empty()) {\n      append(\"availability_zone\", az);\n    }\n\n    uint64_t uptime = time(NULL) - start_time_;\n    append(\"uptime_in_seconds\", uptime);\n    append(\"uptime_in_days\", uptime / (3600 * 24));\n\n    append(\"hz\", GetFlag(FLAGS_hz));\n    append(\"executable\", base::kProgramName);\n    absl::CommandLineFlag* flagfile_flag = absl::FindCommandLineFlag(\"flagfile\");\n    append(\"config_file\", flagfile_flag->CurrentValue());\n  };\n\n  auto add_clients_info = [&] {\n    append(\"connected_clients\",\n           m.facade_stats.conn_stats.num_conns_main + m.facade_stats.conn_stats.num_conns_other);\n    append(\"max_clients\", GetFlag(FLAGS_maxclients));\n    append(\"client_read_buffer_bytes\", m.facade_stats.conn_stats.read_buf_capacity);\n    append(\"blocked_clients\", m.facade_stats.conn_stats.num_blocked_clients);\n    append(\"pipeline_queue_length\", m.facade_stats.conn_stats.pipeline_queue_entries);\n    append(\"send_delay_ms\", GetDelayMs(m.oldest_pending_send_ts));\n    append(\"timeout_disconnects\", m.coordinator_stats.conn_timeout_events);\n  };\n\n  auto add_mem_info = [&] {\n    append(\"used_memory\", m.heap_used_bytes);\n    append(\"used_memory_human\", HumanReadableNumBytes(m.heap_used_bytes));\n    append(\"used_memory_peak\", m.used_mem_peak);\n    append(\"used_memory_peak_human\", HumanReadableNumBytes(m.used_mem_peak));\n\n    // Virtual memory size, upper bound estimation on the RSS memory used by the fiber stacks.\n    append(\"fibers_stack_vms\", m.worker_fiber_stack_size);\n    append(\"fibers_count\", m.worker_fiber_count);\n\n    io::StatusData sdata;\n    bool success = ReadProcStats(&sdata);\n    size_t rss = FetchRssMemory(sdata);\n    if (success) {\n      append(\"used_memory_rss\", rss);\n      append(\"used_memory_rss_human\", HumanReadableNumBytes(rss));\n    }\n    append(\"used_memory_peak_rss\", glob_memory_peaks.used.load(memory_order_relaxed));\n\n    size_t limit = max_memory_limit.load(memory_order_relaxed);\n    append(\"maxmemory\", limit);\n    append(\"maxmemory_human\", HumanReadableNumBytes(limit));\n\n    append(\"used_memory_lua\", m.lua_stats.used_bytes);\n\n    // Blob - all these cases where the key/objects are represented by a single blob allocated on\n    // heap. For example, strings or intsets. members of lists, sets, zsets etc\n    // are not accounted for to avoid complex computations. In some cases, when number of members\n    // is known we approximate their allocations by taking 16 bytes per member.\n    append(\"object_used_memory\", total.obj_memory_usage);\n\n    for (unsigned type = 0; type < total.memory_usage_by_type.size(); type++) {\n      size_t mem = total.memory_usage_by_type[type];\n      if (mem > 0) {\n        append(absl::StrCat(\"type_used_memory_\", ObjTypeToString(type)), mem);\n      }\n    }\n    append(\"table_used_memory\", total.table_mem_usage);\n    append(\"prime_capacity\", total.prime_capacity);\n    append(\"num_entries\", total.key_count);\n    append(\"inline_keys\", total.inline_keys);\n    append(\"small_string_bytes\", m.small_string_bytes);\n    append(\"pipeline_cache_bytes\", m.facade_stats.conn_stats.pipeline_cmd_cache_bytes);\n    append(\"dispatch_queue_bytes\", m.facade_stats.conn_stats.dispatch_queue_bytes);\n    append(\"pipeline_queue_bytes\", m.facade_stats.conn_stats.pipeline_queue_bytes);\n    append(\"dispatch_queue_subscriber_bytes\",\n           m.facade_stats.conn_stats.dispatch_queue_subscriber_bytes);\n    append(\"dispatch_queue_peak_bytes\", m.peak_stats.conn_dispatch_queue_bytes);\n    append(\"client_read_buffer_peak_bytes\", m.peak_stats.conn_read_buf_capacity);\n    append(\"tls_bytes\", m.tls_bytes);\n    append(\"snapshot_serialization_bytes\", m.serialization_bytes);\n    append(\"commands_squashing_replies_bytes\",\n           m.facade_stats.reply_stats.squashing_current_reply_size.load(memory_order_relaxed));\n    append(\"psync_buffer_size\", m.lsn_buffer_size);\n    append(\"psync_buffer_bytes\", m.lsn_buffer_bytes);\n\n    if (GetFlag(FLAGS_cache_mode)) {\n      append(\"cache_mode\", \"cache\");\n      // PHP Symphony needs this field to work.\n      append(\"maxmemory_policy\", \"eviction\");\n    } else {\n      append(\"cache_mode\", \"store\");\n      // Compatible with redis based frameworks.\n      append(\"maxmemory_policy\", \"noeviction\");\n    }\n\n    // master\n    if (!m.replica_side_info) {\n      ReplicationMemoryStats repl_mem;\n      dfly_cmd_->GetReplicationMemoryStats(&repl_mem);\n      append(\"replication_streaming_buffer_bytes\", repl_mem.streamer_buf_capacity_bytes);\n      append(\"replication_full_sync_buffer_bytes\", repl_mem.full_sync_buf_bytes);\n    }\n\n    if (auto controller_copy = GetSaveController()) {\n      append(\"save_buffer_bytes\", controller_copy->GetSaveBuffersSize());\n    }\n  };\n\n  auto add_stats_info = [&] {\n    auto& conn_stats = m.facade_stats.conn_stats;\n    auto& reply_stats = m.facade_stats.reply_stats;\n\n    append(\"total_connections_received\", conn_stats.conn_received_cnt);\n    append(\"total_handshakes_started\", conn_stats.handshakes_started);\n    append(\"total_handshakes_completed\", conn_stats.handshakes_completed);\n    append(\"total_commands_processed\", conn_stats.command_cnt_main + conn_stats.command_cnt_other);\n    append(\"instantaneous_ops_per_sec\", m.qps);\n    append(\"total_pipelined_commands\", conn_stats.pipelined_cmd_cnt);\n    append(\"pipeline_throttle_total\", conn_stats.pipeline_throttle_count);\n    append(\"pipelined_latency_usec\", conn_stats.pipelined_cmd_latency);\n    append(\"total_net_input_bytes\", conn_stats.io_read_bytes);\n    append(\"connection_migrations\", conn_stats.num_migrations);\n    append(\"connection_recv_provided_calls\", conn_stats.num_recv_provided_calls);\n    append(\"total_net_output_bytes\", reply_stats.io_write_bytes);\n    append(\"rdb_save_usec\", m.coordinator_stats.rdb_save_usec);\n    append(\"rdb_save_count\", m.coordinator_stats.rdb_save_count);\n    append(\"big_value_preemptions\", m.coordinator_stats.big_value_preemptions);\n    append(\"compressed_blobs\", m.coordinator_stats.compressed_blobs);\n    append(\"instantaneous_input_kbps\", -1);\n    append(\"instantaneous_output_kbps\", -1);\n    append(\"rejected_connections\", -1);\n    append(\"expired_keys\", m.events.expired_keys);\n    append(\"evicted_keys\", m.events.evicted_keys);\n    append(\"total_heartbeat_expired_keys\", m.shard_stats.total_heartbeat_expired_keys);\n    append(\"total_heartbeat_expired_bytes\", m.shard_stats.total_heartbeat_expired_bytes);\n    append(\"total_heartbeat_expired_calls\", m.shard_stats.total_heartbeat_expired_calls);\n    append(\"hard_evictions\", m.events.hard_evictions);\n    append(\"garbage_checked\", m.events.garbage_checked);\n    append(\"garbage_collected\", m.events.garbage_collected);\n    append(\"bump_ups\", m.events.bumpups);\n    append(\"stash_unloaded\", m.events.stash_unloaded);\n    append(\"oom_rejections\", m.events.insertion_rejections + m.coordinator_stats.oom_error_cmd_cnt);\n    append(\"traverse_ttl_sec\", m.traverse_ttl_per_sec);\n    append(\"delete_ttl_sec\", m.delete_ttl_per_sec);\n    append(\"keyspace_hits\", m.events.hits);\n    append(\"keyspace_misses\", m.events.misses);\n    append(\"keyspace_mutations\", m.events.mutations);\n    append(\"total_reads_processed\", conn_stats.io_read_cnt);\n    append(\"total_writes_processed\", reply_stats.io_write_cnt);\n    append(\"huffenc_attempt_total\", m.events.huff_encode_total);\n    append(\"huffenc_success_total\", m.events.huff_encode_success);\n    append(\"defrag_attempt_total\", m.shard_stats.defrag_attempt_total);\n    append(\"defrag_realloc_total\", m.shard_stats.defrag_realloc_total);\n    append(\"defrag_task_invocation_total\", m.shard_stats.defrag_task_invocation_total);\n\n    // Number of connections that are currently blocked on grabbing interpreter.\n    append(\"blocked_on_interpreter\", m.coordinator_stats.blocked_on_interpreter);\n    append(\"lua_interpreter_cnt\", m.lua_stats.interpreter_cnt);\n\n    // Total number of events of when a connection was blocked on grabbing interpreter.\n    append(\"lua_blocked_total\", m.lua_stats.blocked_cnt);\n\n    append(\"lua_interpreter_return\", m.lua_stats.interpreter_return);\n    append(\"lua_force_gc_calls\", m.lua_stats.force_gc_calls);\n    append(\"lua_gc_freed_memory_total\", m.lua_stats.gc_freed_memory);\n    append(\"lua_gc_duration_total_sec\", m.lua_stats.gc_duration_ns * 1e-9);\n  };\n\n  auto add_tiered_info = [&] {\n    append(\"tiered_entries\", total.tiered_entries);\n    append(\"tiered_entries_bytes\", total.tiered_used_bytes);\n    append(\"tiered_entries_bytes_human\", HumanReadableNumBytes(total.tiered_used_bytes));\n\n    append(\"tiered_total_stashes\", m.tiered_stats.total_stashes);\n    append(\"tiered_total_fetches\", m.tiered_stats.total_fetches);\n    append(\"tiered_total_cancels\", m.tiered_stats.total_cancels);\n    append(\"tiered_total_deletes\", m.tiered_stats.total_deletes);\n    append(\"tiered_total_uploads\", m.tiered_stats.total_uploads);\n    append(\"tiered_total_stash_overflows\", m.tiered_stats.total_stash_overflows);\n    append(\"tiered_heap_buf_allocations\", m.tiered_stats.total_heap_buf_allocs);\n    append(\"tiered_registered_buf_allocations\", m.tiered_stats.total_registered_buf_allocs);\n\n    append(\"tiered_allocated_bytes\", m.tiered_stats.allocated_bytes);\n    append(\"tiered_capacity_bytes\", m.tiered_stats.capacity_bytes);\n\n    append(\"tiered_pending_read_cnt\", m.tiered_stats.pending_read_cnt);\n    append(\"tiered_pending_stash_cnt\", m.tiered_stats.pending_stash_cnt);\n\n    append(\"tiered_small_bins_cnt\", m.tiered_stats.small_bins_cnt);\n    append(\"tiered_small_bins_entries_cnt\", m.tiered_stats.small_bins_entries_cnt);\n    append(\"tiered_small_bins_filling_bytes\", m.tiered_stats.small_bins_filling_bytes);\n    append(\"tiered_cold_storage_bytes\", m.tiered_stats.cold_storage_bytes);\n    append(\"tiered_offloading_steps\", m.tiered_stats.total_offloading_steps);\n    append(\"tiered_offloading_stashes\", m.tiered_stats.total_offloading_stashes);\n    append(\"tiered_ram_hits\", m.events.ram_hits);\n    append(\"tiered_ram_cool_hits\", m.events.ram_cool_hits);\n    append(\"tiered_ram_misses\", m.events.ram_misses);\n\n    append(\"tiered_clients_throttled\", m.tiered_stats.clients_throttled);\n    append(\"tiered_total_clients_throttled\", m.tiered_stats.total_clients_throttled);\n  };\n\n  auto add_persistence_info = [&] {\n    size_t current_snap_keys = 0;\n    size_t total_snap_keys = 0;\n    double perc = 0;\n    bool is_saving = false;\n    uint32_t curent_durration_sec = 0;\n    if (auto controller_copy = GetSaveController()) {\n      is_saving = true;\n      curent_durration_sec = controller_copy->GetCurrentSaveDuration();\n      auto res = controller_copy->GetCurrentSnapshotProgress();\n      if (res.total_keys != 0) {\n        current_snap_keys = res.current_keys;\n        total_snap_keys = res.total_keys;\n        perc = (static_cast<double>(current_snap_keys) / total_snap_keys) * 100;\n      }\n    }\n\n    append(\"current_snapshot_perc\", perc);\n    append(\"current_save_keys_processed\", current_snap_keys);\n    append(\"current_save_keys_total\", total_snap_keys);\n\n    auto save_info = GetLastSaveInfo();\n    // when last success save\n    append(\"last_success_save\", save_info.save_time);\n    append(\"last_saved_file\", save_info.file_name);\n    append(\"last_success_save_duration_sec\", save_info.success_duration_sec);\n\n    ServerState* ss = ServerState::tlocal();\n\n    // ss can be null in tests.\n    unsigned is_loading = ss && (ss->gstate() == GlobalState::LOADING);\n    append(\"loading\", is_loading);\n    append(\"saving\", is_saving);\n    append(\"current_save_duration_sec\", curent_durration_sec);\n\n    for (const auto& k_v : save_info.freq_map) {\n      append(StrCat(\"rdb_\", k_v.first), k_v.second);\n    }\n    append(\"rdb_changes_since_last_success_save\", m.events.update);\n\n    append(\"rdb_bgsave_in_progress\", static_cast<int>(save_info.bgsave_in_progress));\n    std::string val = save_info.last_bgsave_status ? \"ok\" : \"err\";\n    append(\"rdb_last_bgsave_status\", val);\n\n    // when last failed save\n    append(\"last_failed_save\", save_info.last_error_time);\n    append(\"last_error\", save_info.last_error.Format());\n    append(\"last_failed_save_duration_sec\", save_info.failed_duration_sec);\n  };\n\n  auto add_tx_info = [&] {\n    append(\"tx_shard_polls\", m.shard_stats.poll_execution_total);\n    append(\"tx_shard_optimistic_total\", m.shard_stats.tx_optimistic_total);\n    append(\"tx_shard_ooo_total\", m.shard_stats.tx_ooo_total);\n    append(\"tx_global_total\", m.coordinator_stats.tx_global_cnt);\n    append(\"tx_normal_total\", m.coordinator_stats.tx_normal_cnt);\n    append(\"tx_inline_runs_total\", m.coordinator_stats.tx_inline_runs);\n    append(\"tx_schedule_cancel_total\", m.coordinator_stats.tx_schedule_cancel_cnt);\n    append(\"tx_batch_scheduled_items_total\", m.shard_stats.tx_batch_scheduled_items_total);\n    append(\"tx_batch_schedule_calls_total\", m.shard_stats.tx_batch_schedule_calls_total);\n    append(\"tx_with_freq\", absl::StrJoin(m.coordinator_stats.tx_width_freq_arr, \",\"));\n    append(\"squash_with_freq\", absl::StrJoin(m.coordinator_stats.squash_width_freq_arr, \",\"));\n    append(\"tx_queue_len\", m.tx_queue_len);\n\n    append(\"eval_io_coordination_total\", m.coordinator_stats.eval_io_coordination_cnt);\n    append(\"eval_shardlocal_coordination_total\",\n           m.coordinator_stats.eval_shardlocal_coordination_cnt);\n    append(\"eval_squashed_flushes\", m.coordinator_stats.eval_squashed_flushes);\n  };\n\n  auto add_repl_info = [&] {\n    if (!m.replica_side_info) {\n      vector<ReplicaRoleInfo> replicas_info = dfly_cmd_->GetReplicasRoleInfo();\n      append(\"role\", \"master\");\n      append(\"connected_slaves\", replicas_info.size());\n\n      if (show_managed_info) {\n        for (size_t i = 0; i < replicas_info.size(); i++) {\n          auto& r = replicas_info[i];\n          // e.g. slave0:ip=172.19.0.3,port=6379,state=full_sync\n          append(StrCat(\"slave\", i), StrCat(\"ip=\", r.address, \",port=\", r.listening_port,\n                                            \",state=\", r.state, \",lag=\", r.lsn_lag));\n        }\n      }\n      append(\"master_replid\", master_replid_);\n    } else {\n      append(\"role\", GetFlag(FLAGS_info_replication_valkey_compatible) ? \"slave\" : \"replica\");\n\n      auto replication_info_cb = [&](const Replica::Summary& rinfo) {\n        append(\"master_host\", rinfo.host);\n        append(\"master_port\", rinfo.port);\n\n        const char* link = rinfo.master_link_established ? \"up\" : \"down\";\n        append(\"master_link_status\", link);\n        append(\"master_last_io_seconds_ago\", rinfo.master_last_io_sec);\n        append(\"master_sync_in_progress\", rinfo.full_sync_in_progress);\n        append(\"master_replid\", rinfo.master_id);\n        if (rinfo.full_sync_done || (rinfo.passed_full_sync && !rinfo.master_link_established))\n          append(\"slave_repl_offset\", rinfo.repl_offset_sum);\n        append(\"slave_priority\", GetFlag(FLAGS_replica_priority));\n        append(\"slave_read_only\", 1);\n        append(\"psync_attempts\", rinfo.psync_attempts);\n        append(\"psync_successes\", rinfo.psync_successes);\n      };\n\n      const auto& info = *m.replica_side_info;\n\n      replication_info_cb(info.summary);\n      // Special case, when multiple masters replicate to a single replica.\n      for (const auto& summary : info.cl_repl_summary) {\n        replication_info_cb(summary);\n      }\n    }\n  };\n\n  auto add_cmdstats = [&] {\n    auto append_sorted = [&append](string_view prefix, auto display) {\n      sort(display.begin(), display.end());\n      for (const auto& k_v : display) {\n        append(StrCat(prefix, k_v.first), k_v.second);\n      }\n    };\n\n    vector<pair<string_view, string>> commands;\n    for (const auto& [name, stats] : m.cmd_stats_map) {\n      const auto calls = stats.first, sum = stats.second;\n      commands.push_back(\n          {name, absl::StrJoin({absl::StrCat(\"calls=\", calls), absl::StrCat(\"usec=\", sum),\n                                absl::StrCat(\"usec_per_call=\", static_cast<double>(sum) / calls)},\n                               \",\")});\n    }\n\n    auto unknown_cmd = service_.UknownCmdMap();\n\n    append_sorted(\"cmdstat_\", std::move(commands));\n    append_sorted(\"unknown_\",\n                  vector<pair<string_view, uint64_t>>(unknown_cmd.cbegin(), unknown_cmd.cend()));\n  };\n\n  if (should_enter(\"SERVER\")) {\n    add_server_info();\n  }\n\n  if (should_enter(\"CLIENTS\")) {\n    add_clients_info();\n  }\n\n  if (should_enter(\"MEMORY\")) {\n    add_mem_info();\n  }\n\n  if (should_enter(\"STATS\")) {\n    add_stats_info();\n  }\n\n  if (should_enter(\"TIERED\", true)) {\n    add_tiered_info();\n  }\n\n  if (should_enter(\"PERSISTENCE\", true)) {\n    add_persistence_info();\n  }\n\n  if (should_enter(\"TRANSACTION\", true)) {\n    add_tx_info();\n  }\n\n  if (should_enter(\"REPLICATION\")) {\n    add_repl_info();\n  }\n\n  if (should_enter(\"COMMANDSTATS\", true)) {\n    add_cmdstats();\n  }\n\n  if (should_enter(\"MODULES\")) {\n    append(\"module\",\n           \"name=ReJSON,ver=20000,api=1,filters=0,usedby=[search],using=[],options=[handle-io-\"\n           \"errors]\");\n    append(\"module\",\n           \"name=search,ver=20000,api=1,filters=0,usedby=[],using=[ReJSON],options=[handle-io-\"\n           \"errors]\");\n  }\n\n#ifdef WITH_SEARCH\n  if (should_enter(\"SEARCH\", true)) {\n    append(\"search_memory\", m.search_stats.used_memory);\n    append(\"search_num_indices\", m.search_stats.num_indices);\n    append(\"search_num_entries\", m.search_stats.num_entries);\n  }\n#endif\n\n  if (should_enter(\"ERRORSTATS\", true)) {\n    for (const auto& k_v : m.facade_stats.reply_stats.err_count) {\n      append(k_v.first, k_v.second);\n    }\n  }\n\n  if (should_enter(\"KEYSPACE\")) {\n    for (size_t i = 0; i < m.db_stats.size(); ++i) {\n      const auto& stats = m.db_stats[i];\n      bool show = (i == 0) || (stats.key_count > 0);\n      if (show) {\n        size_t total = stats.events.hits + stats.events.misses;\n        double hit_ratio =\n            (total > 0) ? static_cast<double>(stats.events.hits) / (total)*100.0 : 0.0;\n        string val = StrCat(\"keys=\", stats.key_count, \",expires=\", stats.expire_count,\n                            \",hits=\", stats.events.hits, \",misses=\", stats.events.misses,\n                            \",hit_ratio=\", absl::StrFormat(\"%.2f\", hit_ratio),\n                            \",avg_ttl=-1\");  // TODO\n        append(StrCat(\"db\", i), val);\n      }\n    }\n  }\n\n#ifndef __APPLE__\n  if (should_enter(\"CPU\")) {\n    struct rusage ru, cu, tu;\n    getrusage(RUSAGE_SELF, &ru);\n    getrusage(RUSAGE_CHILDREN, &cu);\n    getrusage(RUSAGE_THREAD, &tu);\n    append(\"used_cpu_sys\", StrCat(ru.ru_stime.tv_sec, \".\", ru.ru_stime.tv_usec));\n    append(\"used_cpu_user\", StrCat(ru.ru_utime.tv_sec, \".\", ru.ru_utime.tv_usec));\n    append(\"used_cpu_sys_children\", StrCat(cu.ru_stime.tv_sec, \".\", cu.ru_stime.tv_usec));\n    append(\"used_cpu_user_children\", StrCat(cu.ru_utime.tv_sec, \".\", cu.ru_utime.tv_usec));\n    append(\"used_cpu_sys_main_thread\", StrCat(tu.ru_stime.tv_sec, \".\", tu.ru_stime.tv_usec));\n    append(\"used_cpu_user_main_thread\", StrCat(tu.ru_utime.tv_sec, \".\", tu.ru_utime.tv_usec));\n  }\n#endif\n\n  if (should_enter(\"CLUSTER\")) {\n    append(\"cluster_enabled\", IsClusterEnabledOrEmulated());\n    append(\"migration_errors_total\", service_.cluster_family().MigrationsErrorsCount());\n    append(\"total_migrated_keys\", m.shard_stats.total_migrated_keys);\n  }\n\n  if (should_enter(\"LATENCYSTATS\")) {\n    for (const auto& [cmd_name, hist] : m.cmd_latency_map) {\n      if (!hist) {\n        continue;\n      }\n\n      if (is_histogram_empty(hist)) {\n        continue;\n      }\n\n      absl::InlinedVector<std::string, 4> stats;\n      for (const auto percentile : kLatencyPercentiles) {\n        const auto value = hdr_value_at_percentile(hist, percentile);\n        // If the percentile is an integer, print it as an integer, otherwise print it as a double\n        if (std::trunc(percentile) == percentile) {\n          stats.emplace_back(absl::StrFormat(\"p%d=%d\", static_cast<int64_t>(percentile), value));\n        } else {\n          stats.emplace_back(absl::StrFormat(\"p%g=%d\", percentile, value));\n        }\n      }\n\n      append(absl::StrFormat(\"latency_percentiles_usec_%s\", cmd_name), absl::StrJoin(stats, \",\"));\n    }\n  }\n\n  return info;\n}\n\nvoid ServerFamily::Info(CmdArgList args, CommandContext* cmd_cntx) {\n  std::vector<std::string> sections;\n  bool need_metrics{false};  // Save time - do not fetch metrics if we don't need them.\n  Metrics metrics;\n\n  sections.reserve(args.size());\n  for (const auto& arg : args) {\n    sections.emplace_back(absl::AsciiStrToUpper(arg));\n    const auto& section = sections.back();\n    if (!need_metrics && (section != \"SERVER\") && (section != \"REPLICATION\")) {\n      need_metrics = true;\n    }\n  }\n\n  if (need_metrics || sections.empty()) {\n    metrics = GetMetrics(cmd_cntx->server_conn_cntx()->ns);\n  } else if (!IsMaster()) {\n    metrics.replica_side_info = GetReplicaSummary();\n  }\n\n  std::string info;\n  bool is_priveleged = cmd_cntx->conn()->IsPrivileged();\n  // For multiple requested sections, invalid section names are ignored (not included in the\n  // output). The command does not abort or return an error if some sections are invalid. This\n  // matches Valkey behavior.\n  if (sections.empty()) {  // No sections: default to all sections.\n    info = FormatInfoMetrics(metrics, \"\", is_priveleged);\n  } else if (sections.size() == 1) {  // Single section\n    info = FormatInfoMetrics(metrics, sections[0], is_priveleged);\n  } else {  // Multiple sections: concatenate results for each requested section.\n    for (const auto& section : sections) {\n      const std::string section_str = FormatInfoMetrics(metrics, section, is_priveleged);\n      if (!section_str.empty()) {\n        if (!info.empty()) {\n          absl::StrAppend(&info, \"\\r\\n\", section_str);\n        } else {\n          info = section_str;\n        }\n      }\n    }\n  }\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  rb->SendVerbatimString(info);\n}\n\nvoid ServerFamily::Hello(CmdArgList args, CommandContext* cmd_cntx) {\n  // If no arguments are provided default to RESP2.\n  bool is_resp3 = false;\n  bool has_auth = false;\n  bool has_setname = false;\n  string_view username;\n  string_view password;\n  string_view clientname;\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (!args.empty()) {\n    string_view proto_version = ArgS(args, 0);\n    is_resp3 = proto_version == \"3\";\n    bool valid_proto_version = proto_version == \"2\" || is_resp3;\n    if (!valid_proto_version) {\n      cmd_cntx->SendError(UnknownCmd(\"HELLO\", args));\n      return;\n    }\n\n    for (uint32_t i = 1; i < args.size(); i++) {\n      auto sub_cmd = ArgS(args, i);\n      auto moreargs = args.size() - 1 - i;\n      if (absl::EqualsIgnoreCase(sub_cmd, \"AUTH\") && moreargs >= 2) {\n        has_auth = true;\n        username = ArgS(args, i + 1);\n        password = ArgS(args, i + 2);\n        i += 2;\n      } else if (absl::EqualsIgnoreCase(sub_cmd, \"SETNAME\") && moreargs > 0) {\n        has_setname = true;\n        clientname = ArgS(args, i + 1);\n        i += 1;\n      } else {\n        cmd_cntx->SendError(kSyntaxErr);\n        return;\n      }\n    }\n  }\n\n  ConnectionContext* cntx = cmd_cntx->server_conn_cntx();\n  if (has_auth && !DoAuth(cntx, username, password)) {\n    return cmd_cntx->SendError(facade::kAuthRejected, facade::kNoAuthErrType);\n  }\n\n  if (cntx->req_auth && !cntx->authenticated) {\n    cmd_cntx->SendError(\n        \"-NOAUTH HELLO must be called with the client already \"\n        \"authenticated, otherwise the HELLO <proto> AUTH <user> <pass> \"\n        \"option can be used to authenticate the client and \"\n        \"select the RESP protocol version at the same time\",\n        facade::kNoAuthErrType);\n    return;\n  }\n\n  if (has_setname) {\n    cntx->conn()->SetName(string{clientname});\n  }\n\n  int proto_version = 2;\n  if (is_resp3) {\n    proto_version = 3;\n    rb->SetRespVersion(RespVersion::kResp3);\n  } else {\n    // Issuing hello 2 again is valid and should switch back to RESP2\n    rb->SetRespVersion(RespVersion::kResp2);\n  }\n\n  // Define number of fields in the response - add availability_zone if flag is not empty\n  const auto& az = GetFlag(FLAGS_availability_zone);\n  const int fields_count = az.empty() ? 7 : 8;\n\n  SinkReplyBuilder::ReplyAggregator agg(rb);\n  rb->StartCollection(fields_count, CollectionType::MAP);\n  rb->SendBulkString(\"server\");\n  rb->SendBulkString(\"redis\");\n  rb->SendBulkString(\"version\");\n  rb->SendBulkString(kRedisVersion);\n  rb->SendBulkString(\"dragonfly_version\");\n  rb->SendBulkString(GetVersion());\n  rb->SendBulkString(\"proto\");\n  rb->SendLong(proto_version);\n  rb->SendBulkString(\"id\");\n  rb->SendLong(cntx->conn()->GetClientId());\n  rb->SendBulkString(\"mode\");\n  rb->SendBulkString(GetRedisMode());\n  rb->SendBulkString(\"role\");\n  rb->SendBulkString(IsMaster() ? \"master\" : \"slave\");\n\n  // Add availability_zone to the response if flag is explicitly set and not empty\n  if (!az.empty()) {\n    rb->SendBulkString(\"availability_zone\");\n    rb->SendBulkString(az);\n  }\n}\n\nvoid ServerFamily::AddReplicaOf(CmdArgList args, CommandContext* cmd_cntx) {\n  util::fb2::LockGuard lk(replicaof_mu_);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (IsMaster()) {\n    return cmd_cntx->SendError(\n        \"Calling ADDREPLICAOFF allowed only after server is already a replica\");\n  }\n  CHECK(replica_);\n\n  auto replicaof_args = ReplicaOfArgs::FromCmdArgs(args);\n  if (!replicaof_args.has_value()) {\n    return cmd_cntx->SendError(replicaof_args.error());\n  }\n  if (replicaof_args->IsReplicaOfNoOne()) {\n    return cmd_cntx->SendError(\"ADDREPLICAOF does not support no one\");\n  }\n  LOG(INFO) << \"Add Replica \" << *replicaof_args;\n\n  auto add_replica = make_unique<Replica>(replicaof_args->host, replicaof_args->port, &service_,\n                                          master_replid(), replicaof_args->slot_range);\n  GenericError ec = add_replica->Start();\n  if (ec) {\n    return cmd_cntx->SendError(ec.Format());\n  }\n  add_replica->StartMainReplicationFiber(nullopt);\n  cluster_replicas_.push_back(std::move(add_replica));\n  rb->SendOk();\n}\n\nvoid ServerFamily::ReplicaOfInternal(CmdArgList args, CommandContext* cmd_cntx,\n                                     ActionOnConnectionFail on_err) {\n  std::shared_ptr<Replica> new_replica;\n  std::optional<Replica::LastMasterSyncData> last_master_data;\n  {\n    util::fb2::LockGuard lk(replicaof_mu_);  // Only one REPLICAOF command can run at a time\n\n    // We should not execute replica of command while loading from snapshot.\n    ServerState* ss = ServerState::tlocal();\n    if (ss->is_master && ss->gstate() == GlobalState::LOADING) {\n      cmd_cntx->SendError(kLoadingErr);\n      return;\n    }\n\n    auto replicaof_args = ReplicaOfArgs::FromCmdArgs(args);\n    if (!replicaof_args.has_value()) {\n      cmd_cntx->SendError(replicaof_args.error());\n      return;\n    }\n\n    LOG(INFO) << \"Replicating \" << *replicaof_args;\n\n    // If NO ONE was supplied, just stop the current replica (if it exists)\n    if (replicaof_args->IsReplicaOfNoOne()) {\n      if (!ss->is_master) {\n        CHECK(replica_);\n\n        SetMasterFlagOnAllThreads(true);  // Flip flag before clearing replica\n        // No partial sync for NO ONE flow\n        replica_->Stop();\n        replica_.reset();\n\n        StopAllClusterReplicas();\n      }\n\n      // May not switch to ACTIVE if the process is, for example, shutting down at the same time.\n      service_.SwitchState(GlobalState::LOADING, GlobalState::ACTIVE);\n\n      return cmd_cntx->rb()->SendOk();\n    }\n\n    // If any replication is in progress, stop it, cancellation should kick in immediately\n\n    if (replica_)\n      last_master_data = replica_->Stop();\n    StopAllClusterReplicas();\n\n    const GlobalState gstate = ServerState::tlocal()->gstate();\n    if (gstate == GlobalState::TAKEN_OVER) {\n      service_.SwitchState(GlobalState::TAKEN_OVER, GlobalState::LOADING);\n    } else if (auto prev_state = service_.SwitchState(GlobalState::ACTIVE, GlobalState::LOADING);\n               prev_state != GlobalState::ACTIVE) {\n      LOG(WARNING) << prev_state << \" in progress, ignored\";\n      cmd_cntx->SendError(\"Invalid state\");\n      return;\n    }\n\n    // Create a new replica and assign it\n    new_replica = make_shared<Replica>(replicaof_args->host, replicaof_args->port, &service_,\n                                       master_replid(), replicaof_args->slot_range);\n\n    replica_ = new_replica;\n\n    // TODO: disconnect pending blocked clients (pubsub, blocking commands)\n    SetMasterFlagOnAllThreads(false);  // Flip flag after assiging replica\n\n  }  // release the lock, lk.unlock()\n  // We proceed connecting below without the lock to allow interrupting the replica immediately.\n  // From this point and onward, it should be highly responsive.\n\n  GenericError ec{};\n  switch (on_err) {\n    case ActionOnConnectionFail::kReturnOnError:\n      ec = new_replica->Start();\n      break;\n    case ActionOnConnectionFail::kContinueReplication:\n      new_replica->EnableReplication();\n      break;\n  };\n\n  // If the replication attempt failed, clean up global state. The replica should have stopped\n  // internally.\n  util::fb2::LockGuard lk(replicaof_mu_);  // Only one REPLICAOF command can run at a time\n\n  // If there was an error above during Start we must not start the main replication fiber.\n  // However, it could be the case that Start() above connected succefully and by the time\n  // we acquire the lock, the context got cancelled because another ReplicaOf command\n  // executed and acquired the replicaof_mu_ before us.\n  const bool cancelled = new_replica->IsContextCancelled();\n  if (ec || cancelled) {\n    if (replica_ == new_replica) {\n      service_.SwitchState(GlobalState::LOADING, GlobalState::ACTIVE);\n      SetMasterFlagOnAllThreads(true);\n      replica_.reset();\n    }\n    cmd_cntx->SendError(ec ? ec.Format() : \"replication cancelled\");\n    return;\n  }\n  // Successfully connected now we flush\n  // If we are called by \"Replicate\", tx will be null but we do not need\n  // to flush anything.\n  if (on_err == ActionOnConnectionFail::kReturnOnError) {\n    new_replica->StartMainReplicationFiber(last_master_data);\n  }\n  cmd_cntx->rb()->SendOk();\n}\n\nvoid ServerFamily::StopAllClusterReplicas() {\n  // Stop all cluster replication.\n  for (auto& replica : cluster_replicas_) {\n    replica->Stop();\n    replica.reset();\n  }\n  cluster_replicas_.clear();\n}\n\nvoid ServerFamily::ReplicaOf(CmdArgList args, CommandContext* cmd_cntx) {\n  const bool use_replica_of_v2 = absl::GetFlag(FLAGS_experimental_replicaof_v2);\n  if (use_replica_of_v2) {\n    ReplicaOfInternalV2(args, cmd_cntx, ActionOnConnectionFail::kReturnOnError);\n    return;\n  }\n  ReplicaOfInternal(args, cmd_cntx, ActionOnConnectionFail::kReturnOnError);\n}\n\nvoid ServerFamily::Replicate(string_view host, string_view port) {\n  StringVec replicaof_params{string(host), string(port)};\n\n  CmdArgVec args_vec;\n  for (auto& s : replicaof_params) {\n    args_vec.emplace_back(MutableSlice{s.data(), s.size()});\n  }\n  CmdArgList args_list = absl::MakeSpan(args_vec);\n  io::NullSink sink;\n  facade::RedisReplyBuilder rb(&sink);\n  const bool use_replica_of_v2 = absl::GetFlag(FLAGS_experimental_replicaof_v2);\n  CommandContext cmd_cntx{&rb, nullptr};\n  if (use_replica_of_v2) {\n    ReplicaOfInternalV2(args_list, &cmd_cntx, ActionOnConnectionFail::kContinueReplication);\n    return;\n  }\n  ReplicaOfInternal(args_list, &cmd_cntx, ActionOnConnectionFail::kContinueReplication);\n}\n\nvoid ServerFamily::StartJournalInShardThreads(Replica* repl_ptr) {\n  shard_set->RunBriefInParallel([this, repl_ptr](auto* shard) {\n    size_t index = shard->shard_id();\n    auto flow_map = repl_ptr->GetFlowMapAtIndex(index);\n    size_t rec_executed = repl_ptr->GetRecCountExecutedPerShard(flow_map);\n    LOG(INFO) << \"Shard \" << index << \" starts journal at: \" << rec_executed;\n    journal::StartInThreadAtLsn(rec_executed);\n  });\n}\n\nvoid ServerFamily::ReplicaOfNoOne(SinkReplyBuilder* builder) {\n  util::fb2::LockGuard lk(replicaof_mu_);\n\n  if (!IsMaster()) {\n    CHECK(replica_);\n\n    auto repl_ptr = replica_;\n    if (absl::GetFlag(FLAGS_replicaof_no_one_start_journal)) {\n      // Start journal and keep offsets.\n      StartJournalInShardThreads(repl_ptr.get());\n    }\n    // flip flag before clearing replica_\n    SetMasterFlagOnAllThreads(true);\n\n    last_master_data_ = replica_->Stop();\n    replica_.reset();\n    StopAllClusterReplicas();\n  }\n\n  // May not switch to ACTIVE if the process is, for example, shutting down at the same time.\n  service_.SwitchState(GlobalState::LOADING, GlobalState::ACTIVE);\n\n  return builder->SendOk();\n}\n\nvoid ServerFamily::ReplicaOfInternalV2(CmdArgList args, CommandContext* cmd_cntx,\n                                       ActionOnConnectionFail on_error)\n    ABSL_LOCKS_EXCLUDED(replicaof_mu_) {\n  auto replicaof_args = ReplicaOfArgs::FromCmdArgs(args);\n  if (!replicaof_args.has_value()) {\n    return cmd_cntx->SendError(replicaof_args.error());\n  }\n\n  LOG(INFO) << \"Initiate replication with: \" << *replicaof_args;\n  // This is a \"weak\" check. For example, if the node is already a replica,\n  // it could be the case that one of the flows disconnects. The MainReplicationFiber\n  // will then loop and if it can't partial sync it will enter LOADING state because of\n  // full sync. Note that the fiber is not aware of the replicaof_mu_ so even\n  // if that mutex is locked below before any state check we can't really enforce\n  // that the old replication fiber won't try to full sync and update the state to LOADING.\n  // What is more here is that we always call `replica->Stop()`. So even if we end up in the\n  // scenario described, the semantics are well defined. First, cancel the old replica and\n  // move on with the new one. Cancelation will be slower and ReplicaOf() will\n  // induce higher latency -- but that's ok because it's an highly improbable flow with\n  // well defined semantics.\n  ServerState* ss = ServerState::tlocal();\n\n  if (IsMaster() && ss->gstate() == GlobalState::LOADING) {\n    return cmd_cntx->SendError(kLoadingErr);\n  }\n\n  // replicaof no one\n  if (replicaof_args->IsReplicaOfNoOne()) {\n    return ReplicaOfNoOne(cmd_cntx->rb());\n  }\n\n  auto new_replica = make_shared<Replica>(replicaof_args->host, replicaof_args->port, &service_,\n                                          master_replid(), replicaof_args->slot_range);\n  GenericError ec;\n  switch (on_error) {\n    case ActionOnConnectionFail::kReturnOnError:\n      ec = new_replica->Start();\n      break;\n    case ActionOnConnectionFail::kContinueReplication:\n      new_replica->EnableReplication();\n      break;\n  };\n\n  if (ec || new_replica->IsContextCancelled()) {\n    return cmd_cntx->SendError(ec ? ec.Format() : \"replication cancelled\");\n  }\n\n  // Critical section.\n  // 1. Stop the old replica_ if it exists\n  // 2. Update all the pointers to the new replica and update master flag\n  // 3. Start the main replication fiber\n  // 4. Send OK\n  util::fb2::LockGuard lk(replicaof_mu_);\n  std::optional<Replica::LastMasterSyncData> last_master_data;\n  if (replica_)\n    last_master_data = replica_->Stop();\n\n  StopAllClusterReplicas();\n\n  if (ServerState::tlocal()->gstate() == GlobalState::TAKEN_OVER)\n    service_.SwitchState(GlobalState::TAKEN_OVER, GlobalState::LOADING);\n\n  // TODO Update thread locals. That way INFO never blocks\n  replica_ = new_replica;\n  SetMasterFlagOnAllThreads(false);\n\n  if (on_error == ActionOnConnectionFail::kReturnOnError) {\n    replica_->StartMainReplicationFiber(last_master_data);\n  }\n\n  cmd_cntx->rb()->SendOk();\n}\n\n// REPLTAKEOVER <seconds> [SAVE]\n// SAVE is used only by tests.\nvoid ServerFamily::ReplTakeOver(CmdArgList args, CommandContext* cmd_cntx) {\n  VLOG(1) << \"ReplTakeOver start\";\n\n  CmdArgParser parser{args};\n\n  int timeout_sec = parser.Next<int>();\n  bool save_flag = static_cast<bool>(parser.Check(\"SAVE\"));\n\n  auto* builder = cmd_cntx->rb();\n  if (parser.HasNext())\n    return cmd_cntx->SendError(absl::StrCat(\"Unsupported option:\", string_view(parser.Next())));\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  // We allow zero timeouts for tests.\n  if (timeout_sec < 0) {\n    return cmd_cntx->SendError(\"timeout is negative\");\n  }\n\n  // We return OK, to support idempotency semantics.\n  if (IsMaster())\n    return builder->SendOk();\n\n  util::fb2::LockGuard lk(replicaof_mu_);\n\n  auto repl_ptr = replica_;\n  CHECK(repl_ptr);\n\n  // Start journal to allow partial sync from same source master\n  StartJournalInShardThreads(repl_ptr.get());\n\n  auto info = replica_->GetSummary();\n  if (!info.full_sync_done) {\n    return cmd_cntx->SendError(\"Full sync not done\");\n  }\n\n  std::error_code res = replica_->TakeOver(timeout_sec, save_flag);\n  if (res) {\n    LOG(WARNING) << \"Takeover failed with error: \" << res << \" - \" << res.message();\n    return cmd_cntx->SendError(absl::StrCat(\"Couldn't execute takeover: \", res.message()));\n  }\n\n  LOG(INFO) << \"Takeover successful, promoting this instance to master.\";\n\n  if (IsClusterEnabled()) {\n    service().cluster_family().ReconcileReplicaSlots();\n  }\n\n  last_master_data_ = replica_->Stop();\n  replica_.reset();\n\n  SetMasterFlagOnAllThreads(true);\n  return builder->SendOk();\n}\n\nvoid ServerFamily::ReplConf(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* builder = cmd_cntx->rb();\n  {\n    util::fb2::LockGuard lk(replicaof_mu_);\n    if (!IsMaster()) {\n      return cmd_cntx->SendError(\"Replicating a replica is unsupported\");\n    }\n  }\n\n  auto err_cb = [&]() mutable {\n    LOG(ERROR) << \"Error in receiving command: \" << args;\n    cmd_cntx->SendError(kSyntaxErr);\n  };\n\n  if (args.size() % 2 == 1)\n    return err_cb();\n\n  ConnectionContext* cntx = cmd_cntx->server_conn_cntx();\n  for (unsigned i = 0; i < args.size(); i += 2) {\n    DCHECK_LT(i + 1, args.size());\n\n    string cmd = absl::AsciiStrToUpper(ArgS(args, i));\n    std::string_view arg = ArgS(args, i + 1);\n    if (cmd == \"CAPA\") {\n      if (arg == \"dragonfly\" && args.size() == 2 && i == 0) {\n        auto [sid, flow_count] = dfly_cmd_->CreateSyncSession(&cntx->conn_state);\n        cntx->conn()->SetName(absl::StrCat(\"repl_ctrl_\", sid));\n\n        string sync_id = absl::StrCat(\"SYNC\", sid);\n        cntx->conn_state.replication_info.repl_session_id = sid;\n\n        cntx->replica_conn = true;\n\n        // The response for 'capa dragonfly' is: <masterid> <syncid> <numthreads> <version>\n        auto* rb = static_cast<RedisReplyBuilder*>(builder);\n        rb->StartArray(4);\n        rb->SendSimpleString(master_replid_);\n        rb->SendSimpleString(sync_id);\n        rb->SendLong(flow_count);\n        rb->SendLong(unsigned(DflyVersion::CURRENT_VER));\n        return;\n      }\n    } else if (cmd == \"LISTENING-PORT\") {\n      uint32_t replica_listening_port;\n      if (!absl::SimpleAtoi(arg, &replica_listening_port)) {\n        return cmd_cntx->SendError(kInvalidIntErr);\n      }\n      cntx->conn_state.replication_info.repl_listening_port = replica_listening_port;\n      // We set a default value of ip_address here, because LISTENING-PORT is a mandatory field\n      // but IP-ADDRESS is optional\n      if (cntx->conn_state.replication_info.repl_ip_address.empty()) {\n        cntx->conn_state.replication_info.repl_ip_address = cntx->conn()->RemoteEndpointAddress();\n      }\n    } else if (cmd == \"IP-ADDRESS\") {\n      cntx->conn_state.replication_info.repl_ip_address = arg;\n    } else if (cmd == \"CLIENT-ID\" && args.size() == 2) {\n      auto info = dfly_cmd_->GetReplicaInfoFromConnection(&cntx->conn_state);\n      DCHECK(info != nullptr);\n      if (info) {\n        info->id = arg;\n      }\n    } else if (cmd == \"CLIENT-VERSION\" && args.size() == 2) {\n      unsigned version;\n      if (!absl::SimpleAtoi(arg, &version)) {\n        return cmd_cntx->SendError(kInvalidIntErr);\n      }\n      dfly_cmd_->SetDflyClientVersion(&cntx->conn_state, DflyVersion(version));\n    } else if (cmd == \"ACK\" && args.size() == 2) {\n      // Don't send error/Ok back through the socket, because we don't want to interleave with\n      // the journal writes that we write into the same socket.\n\n      if (!cntx->master_repl_flow) {\n        LOG(ERROR) << \"No replication flow assigned\";\n        return;\n      }\n\n      uint64_t ack;\n      if (!absl::SimpleAtoi(arg, &ack)) {\n        LOG(ERROR) << \"Bad int in REPLCONF ACK command! arg=\" << arg;\n        return;\n      }\n      VLOG(2) << \"Received client ACK=\" << ack;\n      cntx->master_repl_flow->last_acked_lsn = ack;\n      return;\n    } else {\n      VLOG(1) << \"Error \" << cmd << \" \" << arg << \" \" << args.size();\n      return err_cb();\n    }\n  }\n\n  return builder->SendOk();\n}\n\nvoid ServerFamily::Role(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  util::fb2::LockGuard lk(replicaof_mu_);\n  // Thread local var is_master is updated under mutex replicaof_mu_ together with replica_,\n  // ensuring eventual consistency of is_master. When determining if the server is a replica and\n  // accessing the replica_ object, we must lock replicaof_mu_. Using is_master alone is\n  // insufficient in this scenario.\n  if (!replica_) {\n    rb->StartArray(2);\n    rb->SendBulkString(\"master\");\n    auto vec = dfly_cmd_->GetReplicasRoleInfo();\n    rb->StartArray(vec.size());\n    for (auto& data : vec) {\n      rb->StartArray(3);\n      rb->SendBulkString(data.address);\n      rb->SendBulkString(absl::StrCat(data.listening_port));\n      rb->SendBulkString(data.state);\n    }\n\n  } else {\n    rb->StartArray(4 + cluster_replicas_.size() * 3);\n    rb->SendBulkString(GetFlag(FLAGS_info_replication_valkey_compatible) ? \"slave\" : \"replica\");\n\n    auto send_replica_info = [rb](const Replica::Summary& rinfo) {\n      rb->SendBulkString(rinfo.host);\n      rb->SendBulkString(absl::StrCat(rinfo.port));\n      if (rinfo.full_sync_done) {\n        rb->SendBulkString(GetFlag(FLAGS_info_replication_valkey_compatible) ? \"online\"\n                                                                             : \"stable_sync\");\n      } else if (rinfo.full_sync_in_progress) {\n        rb->SendBulkString(\"full_sync\");\n      } else if (rinfo.master_link_established) {\n        rb->SendBulkString(\"preparation\");\n      } else {\n        rb->SendBulkString(\"connecting\");\n      }\n    };\n    send_replica_info(replica_->GetSummary());\n    for (const auto& replica : cluster_replicas_) {\n      send_replica_info(replica->GetSummary());\n    }\n  }\n}\n\nvoid ServerFamily::Script(CmdArgList args, CommandContext* cmd_cntx) {\n  script_mgr_->Run(args, cmd_cntx->tx(), cmd_cntx->rb(), cmd_cntx->server_conn_cntx());\n}\n\nvoid ServerFamily::LastSave(CmdArgList args, CommandContext* cmd_cntx) {\n  auto info = thread_safe_save_info_.Get();\n  cmd_cntx->rb()->SendLong(info.save_time);\n}\n\nvoid ServerFamily::Latency(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n\n  if (sub_cmd == \"LATEST\" || sub_cmd == \"HISTOGRAM\") {\n    return rb->SendEmptyArray();\n  }\n\n  return cmd_cntx->SendError(UnknownSubCmd(sub_cmd, \"LATENCY\"), kSyntaxErrType);\n}\n\nvoid ServerFamily::ShutdownCmd(CmdArgList args, CommandContext* cmd_cntx) {\n  // Supported options (case-insensitive):\n  // SAVE | NOSAVE, NOW, FORCE, ABORT, SAFE (Valkey-specific, the same as SAVE in Dragonfly)\n  enum ShutBits : uint32_t {\n    SB_SAVE = 1u << 0,\n    SB_NOSAVE = 1u << 1,\n    SB_NOW = 1u << 2,\n    SB_FORCE = 1u << 3,\n    SB_ABORT = 1u << 4,\n  };\n\n  uint32_t sb = 0;\n\n  CmdArgParser parser(args);\n  while (parser.HasNext()) {\n    // Map SAFE to SAVE directly (fallthrough behavior)\n    ShutBits opt = parser.MapNext(\"SAVE\", SB_SAVE, \"NOSAVE\", SB_NOSAVE, \"NOW\", SB_NOW, \"FORCE\",\n                                  SB_FORCE, \"ABORT\", SB_ABORT, \"SAFE\", SB_SAVE);\n    sb |= static_cast<uint32_t>(opt);\n  }\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  // Conflicting toggles\n  if ((sb & SB_SAVE) && (sb & SB_NOSAVE)) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  if (sb & SB_ABORT) {\n    // We currently do not support aborting an in-progress shutdown sequence.\n    return cmd_cntx->SendError(\"SHUTDOWN ABORT is not supported\");\n  }\n\n  // Configure save behavior on shutdown according to options.\n  if (sb & SB_FORCE) {\n    // FORCE implies no snapshot on shutdown regardless of SAVE/SAFE\n    save_on_shutdown_ = false;\n  } else if (sb & SB_NOSAVE) {\n    save_on_shutdown_ = false;\n  } else if (sb & SB_SAVE) {\n    save_on_shutdown_ = true;\n  }\n\n  // Wire NOW/FORCE to a single fast-shutdown flag for listeners.\n  facade::g_shutdown_fast.store((sb & (SB_NOW | SB_FORCE)) != 0, std::memory_order_seq_cst);\n\n  CHECK_NOTNULL(acceptor_)->Stop();\n  cmd_cntx->rb()->SendOk();\n\n  // Reset flag for any subsequent restarts (mainly for tests).\n  facade::g_shutdown_fast.store(false, std::memory_order_seq_cst);\n}\n\nvoid ServerFamily::Dfly(CmdArgList args, CommandContext* cmd_cntx) {\n  dfly_cmd_->Run(args, cmd_cntx);\n}\n\nvoid ServerFamily::SlowLog(CmdArgList args, CommandContext* cmd_cntx) {\n  string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (sub_cmd == \"HELP\") {\n    string_view help[] = {\n        \"SLOWLOG <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n        \"GET [<count>]\",\n        \"    Return top <count> entries from the slowlog (default: 10, -1 mean all).\",\n        \"    Entries are made of:\",\n        \"    id, timestamp, time in microseconds, arguments array, client IP and port,\",\n        \"    client name\",\n        \"LEN\",\n        \"    Return the length of the slowlog.\",\n        \"RESET\",\n        \"    Reset the slowlog.\",\n        \"HELP\",\n        \"    Prints this help.\",\n    };\n\n    rb->SendSimpleStrArr(help);\n    return;\n  }\n\n  if (sub_cmd == \"LEN\") {\n    vector<int> lengths(service_.proactor_pool().size());\n    service_.proactor_pool().AwaitFiberOnAll([&lengths](auto index, auto* context) {\n      lengths[index] = ServerState::tlocal()->GetSlowLog().Length();\n    });\n    int sum = std::accumulate(lengths.begin(), lengths.end(), 0);\n    return rb->SendLong(sum);\n  }\n\n  if (sub_cmd == \"RESET\") {\n    service_.proactor_pool().AwaitFiberOnAll(\n        [](auto index, auto* context) { ServerState::tlocal()->GetSlowLog().Reset(); });\n    return rb->SendOk();\n  }\n\n  if (sub_cmd == \"GET\") {\n    return SlowLogGet(args, sub_cmd, &service_.proactor_pool(), cmd_cntx);\n  }\n  cmd_cntx->SendError(UnknownSubCmd(sub_cmd, \"SLOWLOG\"), kSyntaxErrType);\n}\n\nvoid ServerFamily::Module(CmdArgList args, CommandContext* cmd_cntx) {\n  string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (sub_cmd != \"LIST\")\n    return cmd_cntx->SendError(kSyntaxErr);\n\n  rb->StartArray(2);\n\n  // Json\n  rb->StartCollection(2, CollectionType::MAP);\n  rb->SendSimpleString(\"name\");\n  rb->SendSimpleString(\"ReJSON\");\n  rb->SendSimpleString(\"ver\");\n  rb->SendLong(20'808);\n\n  // Search\n  rb->StartCollection(2, CollectionType::MAP);\n  rb->SendSimpleString(\"name\");\n  rb->SendSimpleString(\"search\");\n  rb->SendSimpleString(\"ver\");\n  rb->SendLong(21'015);  // we target v2\n}\n\nvoid ServerFamily::ClientPauseCmd(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser(args);\n  auto listeners = GetNonPriviligedListeners();\n\n  auto timeout = parser.Next<uint64_t>();\n  ClientPause pause_state = ClientPause::ALL;\n  if (parser.HasNext()) {\n    pause_state = parser.MapNext(\"WRITE\", ClientPause::WRITE, \"ALL\", ClientPause::ALL);\n  }\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  const auto timeout_ms = timeout * 1ms;\n  auto is_pause_in_progress = [this, end_time = chrono::steady_clock::now() + timeout_ms] {\n    return ServerState::tlocal()->gstate() != GlobalState::SHUTTING_DOWN &&\n           chrono::steady_clock::now() < end_time && is_c_pause_in_progress_.load();\n  };\n\n  auto cleanup = [this] {\n    active_pauses_.fetch_sub(1);\n    client_pause_ec_.notify();\n  };\n\n  ConnectionContext* cntx = cmd_cntx->server_conn_cntx();\n  if (auto pause_fb_opt = Pause(listeners, cntx->ns, cntx->conn(), pause_state,\n                                std::move(is_pause_in_progress), cleanup);\n      pause_fb_opt) {\n    is_c_pause_in_progress_.store(true);\n    active_pauses_.fetch_add(1);\n    pause_fb_opt->Detach();\n    return cmd_cntx->rb()->SendOk();\n  }\n  cmd_cntx->SendError(\"Failed to pause all running clients\");\n}\n\n#define HFUNC(x) SetHandler(HandlerFunc(this, &ServerFamily::x))\n\nnamespace acl {\nconstexpr uint32_t kAuth = FAST | CONNECTION;\nconstexpr uint32_t kBGSave = ADMIN | SLOW | DANGEROUS;\nconstexpr uint32_t kClient = SLOW | CONNECTION;\nconstexpr uint32_t kConfig = ADMIN | SLOW | DANGEROUS;\nconstexpr uint32_t kDbSize = KEYSPACE | READ | FAST;\nconstexpr uint32_t kDebug = ADMIN | SLOW | DANGEROUS;\nconstexpr uint32_t kFlushDB = KEYSPACE | WRITE | SLOW | DANGEROUS;\nconstexpr uint32_t kFlushAll = KEYSPACE | WRITE | SLOW | DANGEROUS;\nconstexpr uint32_t kInfo = SLOW | DANGEROUS;\nconstexpr uint32_t kHello = FAST | CONNECTION;\nconstexpr uint32_t kLastSave = ADMIN | FAST | DANGEROUS;\nconstexpr uint32_t kLatency = ADMIN | SLOW | DANGEROUS;\nconstexpr uint32_t kMemory = READ | SLOW;\nconstexpr uint32_t kSave = ADMIN | SLOW | DANGEROUS;\nconstexpr uint32_t kShutDown = ADMIN | SLOW | DANGEROUS;\nconstexpr uint32_t kSlaveOf = ADMIN | SLOW | DANGEROUS;\nconstexpr uint32_t kReplicaOf = ADMIN | SLOW | DANGEROUS;\nconstexpr uint32_t kReplTakeOver = DANGEROUS;\nconstexpr uint32_t kReplConf = ADMIN | SLOW | DANGEROUS;\nconstexpr uint32_t kRole = ADMIN | FAST | DANGEROUS;\nconstexpr uint32_t kSlowLog = ADMIN | SLOW | DANGEROUS;\nconstexpr uint32_t kScript = SLOW | SCRIPTING;\nconstexpr uint32_t kModule = ADMIN | SLOW | DANGEROUS;\n// TODO(check this)\nconstexpr uint32_t kDfly = ADMIN;\n}  // namespace acl\n\nvoid ServerFamily::Register(CommandRegistry* registry) {\n  constexpr auto kReplicaOpts = CO::LOADING | CO::ADMIN | CO::GLOBAL_TRANS;\n  constexpr auto kMemOpts = CO::LOADING | CO::READONLY | CO::FAST;\n  registry->StartFamily();\n  *registry\n      << CI{\"AUTH\", CO::NOSCRIPT | CO::FAST | CO::LOADING, -2, 0, 0, acl::kAuth}.HFUNC(Auth)\n      << CI{\"BGSAVE\", CO::ADMIN | CO::GLOBAL_TRANS, -1, 0, 0, acl::kBGSave}.HFUNC(BgSave)\n      << CI{\"CLIENT\", CO::NOSCRIPT | CO::LOADING, -2, 0, 0, acl::kClient}.HFUNC(Client)\n      << CI{\"CONFIG\", CO::ADMIN | CO::LOADING | CO::DANGEROUS, -2, 0, 0, acl::kConfig}.HFUNC(Config)\n      << CI{\"DBSIZE\", CO::READONLY | CO::FAST | CO::LOADING, 1, 0, 0, acl::kDbSize}.HFUNC(DbSize)\n      << CI{\"DEBUG\", CO::ADMIN | CO::LOADING, -2, 0, 0, acl::kDebug}.HFUNC(Debug)\n      << CI{\"FLUSHDB\", CO::JOURNALED | CO::GLOBAL_TRANS | CO::DANGEROUS, -1, 0, 0, acl::kFlushDB}\n             .HFUNC(FlushDb)\n      << CI{\"FLUSHALL\", CO::JOURNALED | CO::GLOBAL_TRANS | CO::DANGEROUS, -1, 0, 0, acl::kFlushAll}\n             .HFUNC(FlushDb)\n      << CI{\"INFO\", CO::LOADING, -1, 0, 0, acl::kInfo}.HFUNC(Info)\n      << CI{\"HELLO\", CO::LOADING, -1, 0, 0, acl::kHello}.HFUNC(Hello)\n      << CI{\"LASTSAVE\", CO::LOADING | CO::FAST, 1, 0, 0, acl::kLastSave}.HFUNC(LastSave)\n      << CI{\"LATENCY\", CO::NOSCRIPT | CO::LOADING | CO::FAST, -2, 0, 0, acl::kLatency}.HFUNC(\n             Latency)\n      << CI{\"MEMORY\", kMemOpts, -2, 0, 0, acl::kMemory}.HFUNC(Memory)\n      << CI{\"SHRINK\", CO::JOURNALED | CO::FAST, 2, 1, 1, acl::kMemory}.HFUNC(Shrink)\n      << CI{\"SAVE\", CO::ADMIN | CO::GLOBAL_TRANS, -1, 0, 0, acl::kSave}.HFUNC(Save)\n      << CI{\"SHUTDOWN\",    CO::ADMIN | CO::NOSCRIPT | CO::LOADING | CO::DANGEROUS, -1, 0, 0,\n            acl::kShutDown}\n             .HFUNC(ShutdownCmd)\n      << CI{\"SLAVEOF\", kReplicaOpts, 3, 0, 0, acl::kSlaveOf}.HFUNC(ReplicaOf)\n      << CI{\"REPLICAOF\", kReplicaOpts, -3, 0, 0, acl::kReplicaOf}.HFUNC(ReplicaOf)\n      << CI{\"ADDREPLICAOF\", kReplicaOpts, 5, 0, 0, acl::kReplicaOf}.HFUNC(AddReplicaOf)\n      << CI{\"REPLTAKEOVER\", CO::ADMIN | CO::GLOBAL_TRANS, -2, 0, 0, acl::kReplTakeOver}.HFUNC(\n             ReplTakeOver)\n      << CI{\"REPLCONF\", CO::ADMIN | CO::LOADING, -1, 0, 0, acl::kReplConf}.HFUNC(ReplConf)\n      << CI{\"ROLE\", CO::LOADING | CO::FAST | CO::NOSCRIPT, 1, 0, 0, acl::kRole}.HFUNC(Role)\n      << CI{\"SLOWLOG\", CO::ADMIN | CO::FAST, -2, 0, 0, acl::kSlowLog}.HFUNC(SlowLog)\n      << CI{\"SCRIPT\", CO::NOSCRIPT | CO::NO_KEY_TRANSACTIONAL, -2, 0, 0, acl::kScript}.HFUNC(Script)\n      << CI{\"DFLY\", CO::ADMIN | CO::GLOBAL_TRANS | CO::HIDDEN, -2, 0, 0, acl::kDfly}.HFUNC(Dfly)\n      << CI{\"MODULE\", CO::ADMIN, 2, 0, 0, acl::kModule}.HFUNC(Module);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/server_family.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <functional>\n#include <memory>\n#include <optional>\n#include <string>\n\n#include \"core/qlist.h\"\n#include \"facade/facade_stats.h\"\n#include \"facade/facade_types.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/replica_types.h\"\n#include \"server/server_state.h\"\n#include \"server/stats.h\"\n#include \"util/fibers/fiberqueue_threadpool.h\"\n#include \"util/fibers/future.h\"\n\nstruct hdr_histogram;\n\nnamespace facade {\nclass Listener;\n}  // namespace facade\n\nnamespace util {\n\nclass AcceptServer;\nclass HttpListenerBase;\n\n}  // namespace util\n\nnamespace dfly {\n\nnamespace detail {\n\nstruct SaveStagesController;\nclass SnapshotStorage;\n\n}  // namespace detail\n\nstd::string GetPassword();\n\nclass CommandContext;\nclass CommandRegistry;\nclass DflyCmd;\nclass Replica;\nclass Service;\nclass ScriptMgr;\nclass RdbLoadContext;\n\nstruct ReplicaRoleInfo {\n  std::string id;\n  std::string address;\n  uint32_t listening_port;\n  std::string_view state;\n  uint64_t lsn_lag;\n};\n\nstruct ReplicationMemoryStats {\n  size_t streamer_buf_capacity_bytes = 0;  // total capacities of streamer buffers\n  size_t full_sync_buf_bytes = 0;          // total bytes used for full sync buffers\n};\n\nstruct LoadingStats {\n  size_t restore_count = 0;\n  size_t failed_restore_count = 0;\n\n  size_t backup_count = 0;\n  size_t failed_backup_count = 0;\n};\n\n// Global peak stats recorded after aggregating metrics over all shards.\n// Note that those values are only updated during GetMetrics calls.\nstruct PeakStats {\n  size_t conn_dispatch_queue_bytes = 0;  // peak value of conn_stats.dispatch_queue_bytes\n  size_t conn_read_buf_capacity = 0;     // peak of total read buf capcacities\n};\n\n// Aggregated metrics over multiple sources on all shards\nstruct Metrics {\n  SliceEvents events;              // general keyspace stats\n  std::vector<DbStats> db_stats;   // dbsize stats\n  EngineShard::Stats shard_stats;  // per-shard stats\n\n  facade::FacadeStats facade_stats;  // client stats and buffer sizes\n  TieredStats tiered_stats;\n\n  SearchStats search_stats;\n  ServerState::Stats coordinator_stats;  // stats on transaction running\n  PeakStats peak_stats;\n  QList::Stats qlist_stats;\n\n  size_t qps = 0;\n\n  size_t used_mem_peak = 0;\n  size_t used_mem_rss_peak = 0;\n\n  size_t heap_used_bytes = 0;\n  size_t small_string_bytes = 0;\n  uint32_t traverse_ttl_per_sec = 0;\n  uint32_t delete_ttl_per_sec = 0;\n  uint64_t hoffman_encode_total = 0, hoffman_encode_success = 0;\n  uint64_t fiber_switch_cnt = 0;\n  uint64_t fiber_switch_delay_usec = 0;\n  uint64_t tls_bytes = 0;\n  uint64_t refused_conn_max_clients_reached_count = 0;\n  uint64_t serialization_bytes = 0;\n\n  // Statistics about fibers running for a long time (more than 1ms).\n  uint64_t fiber_longrun_cnt = 0;\n  uint64_t fiber_longrun_usec = 0;\n\n  // Max length of the all the tx shard-queues.\n  uint32_t tx_queue_len = 0;\n  uint32_t worker_fiber_count = 0;\n  uint32_t blocked_tasks = 0;\n  size_t worker_fiber_stack_size = 0;\n\n  size_t lsn_buffer_size = 0;\n  size_t lsn_buffer_bytes = 0;\n\n  // monotonic timestamp (ProactorBase::GetMonotonicTimeNs) of the connection stuck on send\n  // for longest time.\n  uint64_t oldest_pending_send_ts = uint64_t(-1);\n\n  InterpreterManager::Stats lua_stats;\n\n  // command call frequencies (count, aggregated latency in usec).\n  std::map<std::string, std::pair<uint64_t, uint64_t>> cmd_stats_map;\n\n  absl::flat_hash_map<std::string, uint64_t> connections_lib_name_ver_map;\n\n  struct ReplicaInfo {\n    ReplicaSummary summary;\n\n    // cluster\n    std::vector<ReplicaSummary> cl_repl_summary;\n  };\n\n  // Replica reconnect stats on the replica side. Undefined for master\n  std::optional<ReplicaInfo> replica_side_info;\n\n  size_t migration_errors_total;\n\n  LoadingStats loading_stats;\n\n  absl::flat_hash_map<std::string, hdr_histogram*> cmd_latency_map;\n\n  InternedStringStats interned_string_stats;\n};\n\n// Contains the state of the last save operation.\n// This object is immutable.\nstruct SaveInfoData {\n  time_t save_time = 0;  // epoch time in seconds.\n  uint32_t success_duration_sec = 0;\n  std::string file_name;\n  std::vector<std::pair<std::string_view, size_t>> freq_map;  // RDB_TYPE_xxx -> count mapping.\n\n  // last error save info\n  GenericError last_error;\n  time_t last_error_time = 0;      // epoch time in seconds.\n  time_t failed_duration_sec = 0;  // epoch time in seconds.\n\n  // false if last attempt failed\n  bool last_bgsave_status = true;\n  bool bgsave_in_progress = false;\n};\n\n// A thread-safe wrapper for SaveInfoData using the Copy-on-Write pattern.\nclass ThreadSafeSaveInfo {\n public:\n  // Returns a snapshot of the current save info.\n  SaveInfoData Get() const {\n    std::lock_guard<util::fb2::Mutex> lock(data_mutex_);\n    return data_;\n  }\n\n  // The modifier function is called under a lock.\n  void Update(std::function<void(SaveInfoData*)> modifier) {\n    std::lock_guard<util::fb2::Mutex> lock(writer_mutex_);\n    SaveInfoData new_data(Get());\n    modifier(&new_data);\n    UpdateData(new_data);\n  }\n\n private:\n  void UpdateData(const SaveInfoData& new_data) {\n    std::lock_guard<util::fb2::Mutex> lock(data_mutex_);\n    data_ = new_data;\n  }\n\n  mutable util::fb2::Mutex writer_mutex_;\n  mutable util::fb2::Mutex data_mutex_;\n  SaveInfoData data_;\n};\n\nstruct SnapshotSpec {\n  std::string hour_spec;\n  std::string minute_spec;\n};\n\nstruct ReplicaOffsetInfo {\n  std::string sync_id;\n  std::vector<uint64_t> flow_offsets;\n};\n\nstruct SaveCmdOptions {\n  // if new_version is true, saves DF specific, non redis compatible snapshot.\n  bool new_version;\n  // cloud storage URI\n  std::string_view cloud_uri;\n  // if basename is not empty it will override dbfilename flag\n  std::string_view basename;\n};\n\nclass ServerFamily {\n  using SinkReplyBuilder = facade::SinkReplyBuilder;\n\n public:\n  explicit ServerFamily(Service* service);\n  ~ServerFamily();\n\n  void Init(util::AcceptServer* acceptor, std::vector<facade::Listener*> listeners);\n  void Register(CommandRegistry* registry);\n  void Shutdown() ABSL_LOCKS_EXCLUDED(replicaof_mu_);\n\n  // Public because is used by DflyCmd.\n  void ShutdownCmd(CmdArgList args, CommandContext* cmd_cntx);\n\n  Service& service() {\n    return service_;\n  }\n\n  void ResetStat(Namespace* ns);\n\n  Metrics GetMetrics(Namespace* ns) const;\n\n  std::string FormatInfoMetrics(const Metrics& metrics, std::string_view section,\n                                bool priveleged) const;\n\n  ScriptMgr* script_mgr() {\n    return script_mgr_.get();\n  }\n\n  const ScriptMgr* script_mgr() const {\n    return script_mgr_.get();\n  }\n\n  void StatsMC(std::string_view section, CommandContext* cmd_ctx);\n\n  GenericError DoSave(const SaveCmdOptions& save_cmd_opts, Transaction* transaction,\n                      bool ignore_state = false);\n\n  // Calls DoSave with a default generated transaction and with the format\n  // specified in --df_snapshot_format\n  GenericError DoSave(bool ignore_state = false);\n\n  // Burns down and destroy all the data from the database.\n  // if kDbAll is passed, burns all the databases to the ground.\n  // `wait` makes it wait for all fibers to finish and decommit\n  void Drakarys(Transaction* transaction, DbIndex db_ind, bool wait);\n\n  SaveInfoData GetLastSaveInfo() const;\n\n  void FlushAll(Namespace* ns);\n\n  // Load snapshot from file (.rdb file or summary.dfs file) and return\n  // future with error_code.\n  enum class LoadExistingKeys : uint8_t { kFail, kOverride };\n  std::optional<util::fb2::Future<GenericError>> Load(const std::string& file_name,\n                                                      LoadExistingKeys existing_keys);\n\n  bool TEST_IsSaving() const;\n\n  void ConfigureMetrics(util::HttpListenerBase* listener);\n\n  void PauseReplication(bool pause) ABSL_LOCKS_EXCLUDED(replicaof_mu_);\n  std::optional<ReplicaOffsetInfo> GetReplicaOffsetInfo() ABSL_LOCKS_EXCLUDED(replicaof_mu_);\n\n  const std::string& master_replid() const {\n    return master_replid_;\n  }\n\n  DflyCmd* GetDflyCmd() const {\n    return dfly_cmd_.get();\n  }\n\n  std::optional<LastMasterSyncData> GetLastMasterData() const {\n    return last_master_data_;\n  }\n\n  absl::Span<facade::Listener* const> GetListeners() const {\n    return listeners_;\n  }\n\n  std::vector<facade::Listener*> GetNonPriviligedListeners() const;\n\n  // Replica-side method. Returns replication summary if this server is a replica,\n  // nullopt otherwise.\n  std::optional<Metrics::ReplicaInfo> GetReplicaSummary() const;\n\n  void OnClose(ConnectionContext* cntx);\n\n  void CancelBlockingOnThread(std::function<facade::OpStatus(facade::ArgSlice)> = {});\n\n  // Sets the server to replicate another instance. Does not flush the database beforehand!\n  void Replicate(std::string_view host, std::string_view port);\n\n  void UpdateMemoryGlobalStats();\n\n  // Return true if no replicas are registered or if all replicas reached stable sync\n  // Used in debug populate to DCHECK insocsistent flows that violate transaction gurantees\n  bool AreAllReplicasInStableSync() const;\n\n private:\n  // Helper to safely get save controller copy\n  std::shared_ptr<detail::SaveStagesController> GetSaveController() const {\n    util::fb2::LockGuard lk{save_mu_};\n    return save_controller_;\n  }\n\n  bool HasPrivilegedInterface();\n  void JoinSnapshotSchedule();\n  void LoadFromSnapshot() ABSL_LOCKS_EXCLUDED(loading_stats_mu_);\n\n  uint32_t shard_count() const {\n    return shard_set->size();\n  }\n\n  void Auth(CmdArgList args, CommandContext* cmd_cntx);\n  void Client(CmdArgList args, CommandContext* cmd_cntx);\n  void Config(CmdArgList args, CommandContext* cmd_cntx);\n  void DbSize(CmdArgList args, CommandContext* cmd_cntx);\n  void Debug(CmdArgList args, CommandContext* cmd_cntx);\n  void Dfly(CmdArgList args, CommandContext* cmd_cntx);\n  void Memory(CmdArgList args, CommandContext* cmd_cntx);\n  void Shrink(CmdArgList args, CommandContext* cmd_cntx);\n  void FlushDb(CmdArgList args, CommandContext* cmd_cntx);\n  void Info(CmdArgList args, CommandContext* cmd_cntx) ABSL_LOCKS_EXCLUDED(replicaof_mu_);\n  void Hello(CmdArgList args, CommandContext* cmd_cntx);\n  void LastSave(CmdArgList args, CommandContext* cmd_cntx);\n  void Latency(CmdArgList args, CommandContext* cmd_cntx);\n  void ReplicaOf(CmdArgList args, CommandContext* cmd_cntx);\n  void AddReplicaOf(CmdArgList args, CommandContext* cmd_cntx);\n  void ReplTakeOver(CmdArgList args, CommandContext* cmd_cntx) ABSL_LOCKS_EXCLUDED(replicaof_mu_);\n  void ReplConf(CmdArgList args, CommandContext* cmd_cntx);\n  void Role(CmdArgList args, CommandContext* cmd_cntx) ABSL_LOCKS_EXCLUDED(replicaof_mu_);\n  void Save(CmdArgList args, CommandContext* cmd_cntx);\n  void BgSave(CmdArgList args, CommandContext* cmd_cntx);\n  void Script(CmdArgList args, CommandContext* cmd_cntx);\n  void SlowLog(CmdArgList args, CommandContext* cmd_cntx);\n  void Module(CmdArgList args, CommandContext* cmd_cntx);\n\n  void SyncGeneric(std::string_view repl_master_id, uint64_t offs, ConnectionContext* cntx);\n\n  enum ActionOnConnectionFail {\n    kReturnOnError,        // if we fail to connect to master, return to err\n    kContinueReplication,  // continue attempting to connect to master, regardless of initial\n                           // failure\n  };\n\n  // REPLICAOF implementation. See arguments above\n  void ReplicaOfInternal(CmdArgList args, CommandContext* cmnd_cntx,\n                         ActionOnConnectionFail on_error) ABSL_LOCKS_EXCLUDED(replicaof_mu_);\n\n  void StartJournalInShardThreads(Replica* repl_ptr);\n\n  void ReplicaOfNoOne(SinkReplyBuilder* builder) ABSL_LOCKS_EXCLUDED(replicaof_mu_);\n\n  // REPLICAOF implementation without two phase locking.\n  void ReplicaOfInternalV2(CmdArgList args, CommandContext* cmnd_cntx,\n                           ActionOnConnectionFail on_error) ABSL_LOCKS_EXCLUDED(replicaof_mu_);\n\n  struct LoadOptions {\n    std::string snapshot_id;\n    uint32_t shard_count = 0;      // Shard count of the snapshot being loaded.\n    uint64_t num_loaded_keys = 0;  // Number of keys loaded.\n  };\n\n  // Updates LoadOptions if successful. If snapshot_id and shard_count are passed in,\n  // may use them for consistency checks.\n  std::error_code LoadRdb(const std::string& rdb_file, LoadExistingKeys existing_keys,\n                          LoadOptions* load_opts, RdbLoadContext* load_context);\n\n  void SnapshotScheduling() ABSL_LOCKS_EXCLUDED(loading_stats_mu_);\n\n  void SendInvalidationMessages() const;\n\n  std::optional<SaveCmdOptions> GetSaveCmdOpts(CmdArgList args, CommandContext* cmd_cntx);\n\n  void BgSaveFb(boost::intrusive_ptr<Transaction> trans);\n\n  struct DoSaveCheckAndStartOpts {\n    bool ignore_state = false;\n    bool bg_save = false;\n  };\n\n  GenericError DoSaveCheckAndStart(const SaveCmdOptions& save_cmd_opts, Transaction* trans,\n                                   DoSaveCheckAndStartOpts opts) ABSL_LOCKS_EXCLUDED(save_mu_);\n\n  GenericError WaitUntilSaveFinished(Transaction* trans,\n                                     bool ignore_state = false) ABSL_NO_THREAD_SAFETY_ANALYSIS;\n  void StopAllClusterReplicas() ABSL_EXCLUSIVE_LOCKS_REQUIRED(replicaof_mu_);\n\n  static bool DoAuth(ConnectionContext* cntx, std::string_view username, std::string_view password);\n\n  void ClientPauseCmd(CmdArgList args, CommandContext* cmd_cntx);\n  void ClientUnPauseCmd(CmdArgList args, CommandContext* cmd_cntx);\n\n  // Set accepting_connections_ and update listners according to it\n  void ChangeConnectionAccept(bool accept);\n\n  util::fb2::Fiber snapshot_schedule_fb_;\n  util::fb2::Fiber load_fiber_;\n\n  Service& service_;\n\n  util::AcceptServer* acceptor_ = nullptr;\n  std::vector<facade::Listener*> listeners_;\n  bool accepting_connections_ = true;  // reject connections near oom\n  util::ProactorBase* pb_task_ = nullptr;\n\n  mutable util::fb2::Mutex replicaof_mu_, save_mu_;\n  std::shared_ptr<Replica> replica_ ABSL_GUARDED_BY(replicaof_mu_);\n  std::vector<std::unique_ptr<Replica>> cluster_replicas_\n      ABSL_GUARDED_BY(replicaof_mu_);  // used to replicating multiple nodes to single dragonfly\n\n  std::unique_ptr<ScriptMgr> script_mgr_;\n  std::unique_ptr<DflyCmd> dfly_cmd_;\n\n  std::string master_replid_;\n  std::optional<LastMasterSyncData> last_master_data_;\n\n  time_t start_time_ = 0;  // in seconds, epoch time.\n\n  ThreadSafeSaveInfo thread_safe_save_info_;\n  std::shared_ptr<detail::SaveStagesController> save_controller_ ABSL_GUARDED_BY(save_mu_);\n\n  // Used to override save on shutdown behavior that is usually set\n  // be --dbfilename.\n  bool save_on_shutdown_{true};\n\n  util::fb2::Done schedule_done_;\n  std::unique_ptr<util::fb2::FiberQueueThreadPool> fq_threadpool_;\n  std::shared_ptr<detail::SnapshotStorage> snapshot_storage_;\n\n  std::atomic<bool> is_c_pause_in_progress_ = false;\n  // We need this because if dragonfly shuts down during pause, ServerState will destruct\n  // before the dettached fiber Pause() causing a seg fault.\n  std::atomic<size_t> active_pauses_ = 0;\n  util::fb2::EventCount client_pause_ec_;\n\n  // protected by save_mu_\n  util::fb2::Fiber bg_save_fb_;\n\n  mutable util::fb2::Mutex peak_stats_mu_;\n  mutable PeakStats peak_stats_;\n\n  mutable util::fb2::Mutex loading_stats_mu_;\n  LoadingStats loading_stats_ ABSL_GUARDED_BY(loading_stats_mu_);\n\n  bool legacy_format_metrics_ = true;\n};\n\n// Reusable CLIENT PAUSE implementation that blocks while polling is_pause_in_progress\nstd::optional<util::fb2::Fiber> Pause(std::vector<facade::Listener*> listeners, Namespace* ns,\n                                      facade::Connection* conn, ClientPause pause_state,\n                                      std::function<bool()> is_pause_in_progress,\n                                      std::function<void()> maybe_cleanup = {});\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/server_family_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/server_family.h\"\n\n#include <absl/strings/match.h>\n\n#include \"absl/strings/str_cat.h\"\n#include \"base/flags.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"facade/socket_utils.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\nusing namespace boost;\n\nABSL_DECLARE_FLAG(string, cluster_mode);\n\nnamespace dfly {\n\nclass ServerFamilyTest : public BaseFamilyTest {\n protected:\n};\n\n#ifdef __linux__\nTEST_F(ServerFamilyTest, ReadTcpInfo) {\n  // Create a TCP socket\n  int sockfd = socket(AF_INET, SOCK_STREAM, 0);\n  ASSERT_GT(sockfd, 0) << \"Failed to create socket\";\n\n  // We'll create a socket in LISTEN state\n  struct sockaddr_in server_addr;\n  memset(&server_addr, 0, sizeof(server_addr));\n  server_addr.sin_family = AF_INET;\n  server_addr.sin_addr.s_addr = INADDR_ANY;\n  server_addr.sin_port = 0;  // Let the system choose a free port\n\n  // Bind to the port\n  ASSERT_EQ(bind(sockfd, (struct sockaddr*)&server_addr, sizeof(server_addr)), 0)\n      << \"Failed to bind socket: \" << strerror(errno);\n\n  // Start listening\n  ASSERT_EQ(listen(sockfd, 1), 0) << \"Failed to listen on socket: \" << strerror(errno);\n\n  // Get socket info\n  std::string socket_info = GetSocketInfo(sockfd);\n  std::cout << \"Socket info for valid socket: \" << socket_info << std::endl;\n  EXPECT_FALSE(socket_info.empty()) << \"Socket info should not be empty\";\n\n  // The socket info should contain some recognizable patterns\n  // For a listening socket, it should contain information about the local address\n  EXPECT_NE(socket_info.find(\"State: LISTEN\"), std::string::npos)\n      << \"Socket info doesn't contain expected local address pattern\";\n\n  // Close the socket\n  close(sockfd);\n\n  // Test invalid socket\n  socket_info = GetSocketInfo(-1);\n  EXPECT_EQ(socket_info, \"invalid socket\");\n}\n\nTEST_F(ServerFamilyTest, GetTcpSocketInfoIPv6) {\n  // Create an IPv6 TCP socket\n  int sockfd = socket(AF_INET6, SOCK_STREAM, 0);\n  ASSERT_GT(sockfd, 0) << \"Failed to create IPv6 socket\";\n\n  // We'll create a socket in LISTEN state\n  struct sockaddr_in6 server_addr;\n  memset(&server_addr, 0, sizeof(server_addr));\n  server_addr.sin6_family = AF_INET6;\n  server_addr.sin6_addr = in6addr_any;\n  server_addr.sin6_port = 0;  // Let the system choose a free port\n\n  // Bind to the port\n  ASSERT_EQ(bind(sockfd, (struct sockaddr*)&server_addr, sizeof(server_addr)), 0)\n      << \"Failed to bind IPv6 socket: \" << strerror(errno);\n\n  // Start listening\n  ASSERT_EQ(listen(sockfd, 1), 0) << \"Failed to listen on IPv6 socket: \" << strerror(errno);\n\n  // Get socket info\n  std::string socket_info = GetSocketInfo(sockfd);\n  std::cout << \"Socket info for valid IPv6 socket: \" << socket_info << std::endl;\n  EXPECT_FALSE(socket_info.empty()) << \"IPv6 socket info should not be empty\";\n\n  // The socket info should contain some recognizable patterns\n  // For a listening IPv6 socket, it should contain information about the local address\n  EXPECT_NE(socket_info.find(\"State: LISTEN\"), std::string::npos)\n      << \"IPv6 socket info doesn't contain expected LISTEN state\";\n\n  // If IPv6 support works correctly, the socket info should indicate an IPv6 address format\n  EXPECT_NE(socket_info.find(\"Local: [\"), std::string::npos)\n      << \"IPv6 socket info doesn't use IPv6 address format\";\n\n  // Close the socket\n  close(sockfd);\n}\n#endif\n\nTEST_F(ServerFamilyTest, SlowLogTruncation) {\n  auto resp = Run({\"config\", \"set\", \"slowlog_max_len\", \"3\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n  resp = Run({\"config\", \"set\", \"slowlog_log_slower_than\", \"0\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n\n  // Test args count truncation: 32 args (no truncation) vs 33 args (truncated)\n  std::vector<std::string> cmd_args = {\"LPUSH\", \"mykey\"};\n  for (int i = 1; i <= 30; ++i) {\n    cmd_args.push_back(std::to_string(i));\n  }\n  resp = Run(absl::Span<std::string>(cmd_args));\n  EXPECT_THAT(resp.GetInt(), 30);\n  resp = Run({\"slowlog\", \"get\"});\n  auto slowlog = resp.GetVec();\n  EXPECT_THAT(slowlog[0].GetVec()[3].GetVec(), ElementsAreArray(cmd_args));\n\n  cmd_args.push_back(\"31\");\n  resp = Run(absl::Span<std::string>(cmd_args));\n  EXPECT_THAT(resp.GetInt(), 61);\n  resp = Run({\"slowlog\", \"get\"});\n  slowlog = resp.GetVec();\n  auto commands = slowlog[0].GetVec()[3].GetVec();\n  EXPECT_THAT(commands.size(), 32);\n  EXPECT_THAT(commands[31].GetString(), \"... (2 more arguments)\");\n\n  // Test args length truncation: 128 bytes (no truncation) vs 129 bytes (truncated)\n  std::string at_limit = std::string(128, 'A');\n  resp = Run({\"lpush\", \"key1\", at_limit});\n  resp = Run({\"slowlog\", \"get\"});\n  slowlog = resp.GetVec();\n  EXPECT_THAT(slowlog[0].GetVec()[3].GetVec()[2].GetString(), at_limit);\n\n  std::string over_limit = std::string(129, 'A');\n  resp = Run({\"lpush\", \"key2\", over_limit});\n  resp = Run({\"slowlog\", \"get\"});\n  slowlog = resp.GetVec();\n  auto truncated = slowlog[0].GetVec()[3].GetVec()[2].GetString();\n  EXPECT_THAT(truncated, std::string(110, 'A') + \"... (1 more bytes)\");\n}\n\nTEST_F(ServerFamilyTest, SlowLogMaxLengthZero) {\n  auto resp = Run({\"config\", \"set\", \"slowlog_max_len\", \"0\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n  resp = Run({\"config\", \"set\", \"slowlog_log_slower_than\", \"0\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n  Run({\"slowlog\", \"reset\"});\n\n  // issue an arbitrary command\n  resp = Run({\"set\", \"foo\", \"bar\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n  resp = Run({\"slowlog\", \"get\"});\n\n  // slowlog should be empty since max_len is 0\n  EXPECT_THAT(resp.GetVec().size(), 0);\n}\n\nTEST_F(ServerFamilyTest, SlowLogGetLen) {\n  auto resp = Run({\"config\", \"set\", \"slowlog_max_len\", \"3\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n  resp = Run({\"config\", \"set\", \"slowlog_log_slower_than\", \"0\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n\n  for (int i = 1; i <= 3; ++i) {\n    resp = Run({\"lpush\", \"mykey\", std::to_string(i)});\n    EXPECT_THAT(resp.GetInt(), i);\n  }\n\n  // Test GET 0 - returns empty\n  resp = Run({\"slowlog\", \"get\", \"0\"});\n  EXPECT_THAT(resp.GetVec().size(), 0);\n\n  // Test GET -1 - returns all entries\n  resp = Run({\"slowlog\", \"get\", \"-1\"});\n  EXPECT_THAT(resp.GetVec().size(), 3);\n\n  // Test GET < -1 - returns error\n  resp = Run({\"slowlog\", \"get\", \"-2\"});\n  EXPECT_THAT(resp.GetString(), \"ERR count should be greater than or equal to -1\");\n}\n\nTEST_F(ServerFamilyTest, SlowLogLen) {\n  auto resp = Run({\"config\", \"set\", \"slowlog_max_len\", \"3\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n  resp = Run({\"config\", \"set\", \"slowlog_log_slower_than\", \"0\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n  Run({\"slowlog\", \"reset\"});\n\n  for (int i = 1; i < 4; ++i) {\n    resp = Run({\"lpush\", \"mykey\", std::to_string(i)});\n    EXPECT_THAT(resp.GetInt(), i);\n  }\n\n  resp = Run({\"slowlog\", \"len\"});\n  EXPECT_THAT(resp.GetInt(), 3);\n}\n\nTEST_F(ServerFamilyTest, SlowLogMinusOneDisabled) {\n  auto resp = Run({\"config\", \"set\", \"slowlog_max_len\", \"3\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n  resp = Run({\"config\", \"set\", \"slowlog_log_slower_than\", \"-1\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n  Run({\"slowlog\", \"reset\"});\n\n  // issue some commands\n  for (int i = 1; i < 4; ++i) {\n    resp = Run({\"lpush\", \"mykey\", std::to_string(i)});\n    EXPECT_THAT(resp.GetInt(), i);\n  }\n\n  // slowlog is still empty\n  resp = Run({\"slowlog\", \"get\"});\n  EXPECT_THAT(resp.GetVec().size(), 0);\n  resp = Run({\"slowlog\", \"len\"});\n  EXPECT_THAT(resp.GetInt(), 0);\n}\n\n// Test how slowlog captures additional information about heavy commands\nTEST_F(ServerFamilyTest, SlowLogExecEval) {\n  Run({\"config\", \"set\", \"slowlog_max_len\", \"20\"});\n  Run({\"config\", \"set\", \"slowlog_log_slower_than\", \"0\"});\n\n  // Run EXEC\n  {\n    Run({\"multi\"});\n    Run({\"set\", \"first\", \"ok\"});\n    Run({\"set\", \"second2\", \"ok\"});\n    Run({\"get\", \"third3\"});\n    Run({\"exec\"});\n  }\n\n  // Run EVAL\n  {\n    const std::string_view script = R\"(\nfor i, key in ipairs(KEYS) do\n  redis.call('GET', key)\nend\nfor i, key in ipairs(KEYS) do\n  redis.call('SET', key, 'some-data')\nend\nreturn 'OK';\n    )\";\n    auto resp = Run({\"EVAL\", script, \"3\", \"first\", \"second2\", \"third3\", \"second2\"});\n    EXPECT_EQ(resp, \"OK\");\n  }\n\n  size_t found = 0;\n  auto resp = Run({\"slowlog\", \"get\"});\n  for (const auto& entry : resp.GetVec()) {\n    const auto& args = entry.GetVec()[3].GetVec();\n    if (args[0] == \"EXEC\") {\n      EXPECT_THAT(args, ElementsAreArray({\"EXEC\", \"num_cmds: 3\", \"is_write: 1\"}));\n      found++;\n    } else if (args[0] == \"EVAL\") {\n      const auto sha = \"41e84cf7973712deda6c1737a69bd1365eeb060f\";\n      EXPECT_THAT(args, ElementsAreArray({\"EVAL\", sha, \"num_cmds: 6\", \"slow_cmds: 6\", \"tx_mode: 2\",\n                                          \"tx_shards: 2\", \"is_write: 1\", \"lock_tags: 3\", \"3\",\n                                          \"first\", \"second2\", \"third3\", \"second2\"}));\n      found++;\n    }\n  }\n\n  EXPECT_EQ(found, 2);\n}\n\nTEST_F(ServerFamilyTest, ClientPause) {\n  auto start = absl::Now();\n  Run({\"CLIENT\", \"PAUSE\", \"50\"});\n\n  Run({\"get\", \"key\"});\n  EXPECT_GT((absl::Now() - start), absl::Milliseconds(50));\n\n  start = absl::Now();\n\n  Run({\"CLIENT\", \"PAUSE\", \"50\", \"WRITE\"});\n\n  Run({\"get\", \"key\"});\n  EXPECT_LT((absl::Now() - start), absl::Milliseconds(10));\n  Run({\"set\", \"key\", \"value2\"});\n  EXPECT_GT((absl::Now() - start), absl::Milliseconds(50));\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingOnAndOff) {\n  // case 1. can't use the feature for resp2\n  auto resp = Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n  EXPECT_THAT(resp.GetString(),\n              \"ERR Client tracking is currently not supported for RESP2. Please use RESP3.\");\n\n  // case 2. allows when resp3 is used\n  Run({\"HELLO\", \"3\"});\n  resp = Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n\n  resp = Run({\"CLIENT\", \"CACHING\", \"YES\"});\n  EXPECT_THAT(\n      resp, ErrArg(\"ERR CLIENT CACHING YES is only valid when tracking is enabled in OPTIN mode\"));\n\n  resp = Run({\"CLIENT\", \"CACHING\", \"NO\"});\n  EXPECT_THAT(\n      resp, ErrArg(\"ERR CLIENT CACHING NO is only valid when tracking is enabled in OPTOUT mode\"));\n\n  // case 3. turn off client tracking\n  resp = Run({\"CLIENT\", \"TRACKING\", \"OFF\"});\n  EXPECT_THAT(resp.GetString(), \"OK\");\n\n  resp = Run({\"CLIENT\", \"CACHING\", \"YES\"});\n  EXPECT_THAT(\n      resp,\n      ErrArg(\"CLIENT CACHING can be called only when the client is in tracking mode with OPTIN or \"\n             \"OPTOUT mode enabled\"));\n}\n\nTEST_F(ServerFamilyTest, ToggleTrackingOnAndOff) {\n  Run(\"HELLO 3\");\n  // seq = 0\n  auto resp = Run(\"CLIENT TRACKING ON OPTIN\");\n  // seq = 1\n  EXPECT_THAT(resp.GetString(), \"OK\");\n\n  resp = Run(\"CLIENT CACHING YES\");\n  // seq = 2, caching = 1\n  EXPECT_THAT(resp.GetString(), \"OK\");\n\n  resp = Run(\"CLIENT TRACKING OFF\");\n  resp = Run(\"CLIENT TRACKING ON OPTIN\");\n  // seq = 3, caching = 1\n  EXPECT_THAT(resp.GetString(), \"OK\");\n  // seq(3) != (caching(1) + 1)\n  resp = Run(\"GET foo\");\n  resp = Run(\"SET foo tmp\");\n  EXPECT_THAT(resp.GetString(), \"OK\");\n\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 0);\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingReadKey) {\n  // case 1. only read the keys doesn't trigger any notification.\n  Run({\"HELLO\", \"3\"});\n  Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n\n  Run({\"SET\", \"FOO\", \"10\"});\n  Run({\"GET\", \"FOO\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 0);\n\n  Run({\"GET\", \"BAR\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 0);\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingOptin) {\n  Run({\"HELLO\", \"3\"});\n  Run({\"CLIENT\", \"TRACKING\", \"ON\", \"OPTIN\"});\n\n  Run({\"GET\", \"FOO\"});\n  Run({\"SET\", \"FOO\", \"10\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 0);\n  Run({\"GET\", \"FOO\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 0);\n\n  Run({\"CLIENT\", \"CACHING\", \"YES\"});\n  // Start tracking once\n  Run({\"GET\", \"FOO\"});\n  Run({\"SET\", \"FOO\", \"20\"});\n  Run({\"GET\", \"FOO\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 1);\n\n  Run({\"GET\", \"BAR\"});\n  Run({\"SET\", \"BAR\", \"20\"});\n  Run({\"GET\", \"BAR\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 1);\n\n  // Start tracking once\n  Run({\"CLIENT\", \"CACHING\", \"YES\"});\n  Run({\"GET\", \"BAR\"});\n  Run({\"SET\", \"BAR\", \"20\"});\n  Run({\"GET\", \"BAR\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 2);\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingMulti) {\n  Run({\"HELLO\", \"3\"});\n  Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n  Run({\"MULTI\"});\n  Run({\"GET\", \"FOO\"});\n  Run({\"SET\", \"TMP\", \"10\"});\n  Run({\"GET\", \"FOOBAR\"});\n  Run({\"EXEC\"});\n\n  Run({\"SET\", \"FOO\", \"10\"});\n  Run({\"SET\", \"FOOBAR\", \"10\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 2);\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingCompatibilityMulti) {\n  // Compatibility Test, all CLIENT commands should be allowed in MULTI\n  Run({\"HELLO\", \"3\"});\n  Run({\"MULTI\"});\n  auto resp = Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n  EXPECT_THAT(resp.GetString(), \"QUEUED\");\n  // Used by sentinel in MULTI/EXEC blocks\n  resp = Run({\"CLIENT\", \"KILL\", \"127.0.0.1:6380\"});\n  EXPECT_THAT(resp.GetString(), \"QUEUED\");\n  resp = Run({\"CLIENT\", \"SETNAME\", \"YO\"});\n  EXPECT_THAT(resp.GetString(), \"QUEUED\");\n  resp = Run({\"CLIENT\", \"GETNAME\"});\n  EXPECT_THAT(resp.GetString(), \"QUEUED\");\n  Run({\"EXEC\"});\n\n  Run({\"GET\", \"FOO\"});\n  Run({\"SET\", \"FOO\", \"10\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 1);\n\n  Run({\"MULTI\"});\n  resp = Run({\"CLIENT\", \"PAUSE\", \"0\", \"WRITE\"});\n  EXPECT_THAT(resp.GetString(), \"QUEUED\");\n  Run({\"EXEC\"});\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingMultiOptin) {\n  Run({\"HELLO\", \"3\"});\n  // Check stickiness\n  Run({\"CLIENT\", \"TRACKING\", \"ON\", \"OPTIN\"});\n  Run({\"CLIENT\", \"CACHING\", \"YES\"});\n  Run({\"MULTI\"});\n  Run({\"GET\", \"FOO\"});\n  Run({\"SET\", \"TMP\", \"10\"});\n  Run({\"GET\", \"FOOBAR\"});\n  Run({\"DISCARD\"});\n\n  Run({\"SET\", \"FOO\", \"10\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 0);\n\n  Run({\"CLIENT\", \"CACHING\", \"YES\"});\n  Run({\"MULTI\"});\n  Run({\"GET\", \"FOO\"});\n  Run({\"SET\", \"TMP\", \"10\"});\n  Run({\"GET\", \"FOOBAR\"});\n  Run({\"EXEC\"});\n\n  Run({\"SET\", \"FOO\", \"10\"});\n  Run({\"SET\", \"FOOBAR\", \"10\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 2);\n\n  // CACHING enclosed in MULTI\n  Run({\"MULTI\"});\n  Run({\"GET\", \"TMP\"});\n  Run({\"GET\", \"TMP_TMP\"});\n  Run({\"SET\", \"TMP\", \"10\"});\n  Run({\"CLIENT\", \"CACHING\", \"YES\"});\n  Run({\"GET\", \"FOO\"});\n  Run({\"GET\", \"FOOBAR\"});\n  Run({\"EXEC\"});\n\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 2);\n  Run({\"SET\", \"TMP\", \"10\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 2);\n  Run({\"SET\", \"FOO\", \"10\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 3);\n  Run({\"SET\", \"FOOBAR\", \"10\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 4);\n\n  // CACHING enclosed in MULTI, ON/OFF\n  Run({\"MULTI\"});\n  Run({\"GET\", \"TMP\"});\n  Run({\"SET\", \"TMP\", \"10\"});\n  Run({\"CLIENT\", \"CACHING\", \"YES\"});\n  Run({\"GET\", \"FOO\"});\n  Run({\"GET\", \"BAR\"});\n  Run({\"EXEC\"});\n\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 4);\n  Run({\"SET\", \"FOO\", \"10\"});\n  Run({\"GET\", \"FOO\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 5);\n  Run({\"SET\", \"BAR\", \"10\"});\n  Run({\"GET\", \"BAR\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 6);\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingOptout) {\n  Run({\"HELLO\", \"3\"});\n  // Check stickiness\n  Run({\"CLIENT\", \"TRACKING\", \"ON\", \"OPTOUT\"});\n  Run({\"GET\", \"FOO\"});\n  Run({\"SET\", \"FOO\", \"BAR\"});\n  Run({\"GET\", \"BAR\"});\n  Run({\"SET\", \"BAR\", \"FOO\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 2);\n\n  // Switch off tracking for a single command\n  Run({\"CLIENT\", \"CACHING\", \"NO\"});\n  Run({\"GET\", \"FOO\"});\n  Run({\"SET\", \"FOO\", \"BAR\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 2);\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingMultiOptout) {\n  Run({\"HELLO\", \"3\"});\n  // Check stickiness\n  Run({\"CLIENT\", \"TRACKING\", \"ON\", \"OPTOUT\"});\n\n  Run({\"MULTI\"});\n  Run({\"GET\", \"FOO\"});\n  Run({\"SET\", \"TMP\", \"10\"});\n  Run({\"GET\", \"FOOBAR\"});\n  Run({\"EXEC\"});\n\n  Run({\"SET\", \"FOO\", \"10\"});\n  Run({\"SET\", \"FOOBAR\", \"10\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 2);\n\n  // CACHING enclosed in MULTI\n  Run({\"MULTI\"});\n  Run({\"CLIENT\", \"CACHING\", \"NO\"});\n  Run({\"GET\", \"TMP\"});\n  Run({\"GET\", \"TMP_TMP\"});\n  Run({\"SET\", \"TMP\", \"10\"});\n  Run({\"SET\", \"TMP_TMP\", \"10\"});\n  Run({\"EXEC\"});\n\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 2);\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingUpdateKey) {\n  Run({\"HELLO\", \"3\"});\n  Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n\n  Run({\"GET\", \"FOO\"});\n  Run({\"SET\", \"FOO\", \"10\"});\n  const auto& msg = GetInvalidationMessage(\"IO0\", 0);\n  EXPECT_EQ(msg.key, \"FOO\");\n\n  // make sure invalidation message only gets sent once.\n  Run({\"GET\", \"FOO\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 1);\n\n  // update string from another connection\n  // need to do another read to re-initialize the tracking of the key.\n  Run({\"GET\", \"FOO\"});\n  pp_->at(1)->Await([&] { return Run({\"SET\", \"FOO\", \"30\"}); });\n  pp_->AwaitFiberOnAll([](ProactorBase* pb) {});\n  const auto& msg2 = GetInvalidationMessage(\"IO0\", 1);\n  EXPECT_EQ(msg2.key, \"FOO\");\n\n  // case 4. test multi command\n  Run({\"MGET\", \"X1\", \"X2\", \"X3\", \"X4\", \"Y1\", \"Y2\", \"Y3\", \"Y4\", \"Z1\", \"Z2\", \"Z3\", \"Z4\"});\n  pp_->at(1)->Await([&] { return Run({\"MSET\", \"X1\", \"1\", \"Y3\", \"2\", \"Z2\", \"3\", \"Z4\", \"5\"}); });\n  pp_->AwaitFiberOnAll([](ProactorBase* pb) {});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 6);\n  std::vector<std::string_view> keys_invalidated;\n  for (unsigned int i = 2; i < 6; ++i)\n    keys_invalidated.push_back(GetInvalidationMessage(\"IO0\", i).key);\n  ASSERT_THAT(keys_invalidated, UnorderedElementsAre(\"X1\", \"Y3\", \"Z2\", \"Z4\"));\n\n  Run({\"FLUSHDB\"});\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingDeleteKey) {\n  Run({\"HELLO\", \"3\"});\n  Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n  Run({\"SET\", \"FOO\", \"10\"});\n  Run({\"GET\", \"FOO\"});\n  pp_->at(1)->Await([&] { return Run({\"DEL\", \"FOO\"}); });\n  pp_->AwaitFiberOnAll([](ProactorBase* pb) {});\n  EXPECT_EQ(GetInvalidationMessage(\"IO0\", 0).key, \"FOO\");\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingRenameKey) {\n  Run({\"HELLO\", \"3\"});\n  Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n  Run({\"SET\", \"FOO\", \"10\"});\n  Run({\"GET\", \"FOO\"});\n  pp_->at(1)->Await([&] { return Run({\"RENAME\", \"FOO\", \"BAR\"}); });\n  pp_->AwaitFiberOnAll([](ProactorBase* pb) {});\n  EXPECT_EQ(GetInvalidationMessage(\"IO0\", 0).key, \"FOO\");\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingExpireKey) {\n  Run({\"HELLO\", \"3\"});\n  Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n  Run({\"SET\", \"C\", \"10\"});\n  Run({\"GET\", \"C\"});\n  Run({\"EXPIRE\", \"C\", \"1\"});\n  AdvanceTime(1000);\n  auto resp = Run({\"GET\", \"C\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 1);\n  EXPECT_EQ(GetInvalidationMessage(\"IO0\", 0).key, \"C\");\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingSelectDB) {\n  Run({\"HELLO\", \"3\"});\n  Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n  Run({\"SET\", \"C\", \"10\"});\n  Run({\"GET\", \"C\"});\n  pp_->at(1)->Await([&] { return Run({\"SELECT\", \"2\"}); });\n  pp_->at(1)->Await([&] { return Run({\"SET\", \"C\", \"1000\"}); });\n  pp_->AwaitFiberOnAll([](ProactorBase* pb) {});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 1);\n  EXPECT_EQ(GetInvalidationMessage(\"IO0\", 0).key, \"C\");\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingNonTransactionalBug) {\n  Run({\"HELLO\", \"3\"});\n  Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n\n  Run({\"CLUSTER\", \"SLOTS\"});\n}\n\nTEST_F(ServerFamilyTest, ClientTrackingLuaBug) {\n  Run({\"HELLO\", \"3\"});\n  // Check stickiness\n  Run({\"CLIENT\", \"TRACKING\", \"ON\"});\n  using namespace std::string_literals;\n  std::string eval = R\"(redis.call('get', 'foo'); redis.call('set', 'foo', 'bar'); )\";\n  Run({\"EVAL\", absl::StrCat(eval, \"return 1\"), \"1\", \"foo\"});\n  Run({\"PING\"});\n\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 1);\n  absl::StrAppend(&eval, R\"(redis.call('get', 'oof'); redis.call('set', 'oof', 'bar'); return 1)\");\n  Run({\"EVAL\", eval, \"2\", \"foo\", \"oof\"});\n  Run({\"PING\"});\n  EXPECT_EQ(InvalidationMessagesLen(\"IO0\"), 3);\n}\n\nTEST_F(ServerFamilyTest, ConfigNormalization) {\n  // TODO: Ideally we'd also test that INFO REPLICATION returns the value set in the config, but\n  // there is no way currently to setup a mock replica in unit tests.\n\n  absl::FlagSaver fs;  // Restores the flag to default value after test finishes\n\n  // Default value\n  EXPECT_THAT(Run({\"config\", \"get\", \"replica-priority\"}),\n              RespArray(ElementsAre(\"replica_priority\", \"100\")));\n  EXPECT_THAT(Run({\"config\", \"get\", \"replica_priority\"}),\n              RespArray(ElementsAre(\"replica_priority\", \"100\")));\n\n  // Set with dash\n  EXPECT_THAT(Run({\"config\", \"set\", \"replica-priority\", \"7\"}), \"OK\");\n\n  EXPECT_THAT(Run({\"config\", \"get\", \"replica-priority\"}),\n              RespArray(ElementsAre(\"replica_priority\", \"7\")));\n  EXPECT_THAT(Run({\"config\", \"get\", \"replica_priority\"}),\n              RespArray(ElementsAre(\"replica_priority\", \"7\")));\n\n  // Set with underscore\n  EXPECT_THAT(Run({\"config\", \"set\", \"replica_priority\", \"13\"}), \"OK\");\n\n  EXPECT_THAT(Run({\"config\", \"get\", \"replica-priority\"}),\n              RespArray(ElementsAre(\"replica_priority\", \"13\")));\n  EXPECT_THAT(Run({\"config\", \"get\", \"replica_priority\"}),\n              RespArray(ElementsAre(\"replica_priority\", \"13\")));\n}\n\n// Verify CONFIG GET returns numeric bytes for memory configs (Redis/Valkey compatibility).\nTEST_F(ServerFamilyTest, ConfigGetMemoryBytes) {\n  absl::FlagSaver fs;\n\n  // Set maxmemory using human-readable format\n  EXPECT_THAT(Run({\"config\", \"set\", \"maxmemory\", \"1GB\"}), \"OK\");\n\n  // CONFIG GET should return numeric bytes, not human-readable format\n  EXPECT_THAT(Run({\"config\", \"get\", \"maxmemory\"}),\n              RespArray(ElementsAre(\"maxmemory\", \"1073741824\")));\n\n  // Test another value\n  EXPECT_THAT(Run({\"config\", \"set\", \"maxmemory\", \"512MB\"}), \"OK\");\n  EXPECT_THAT(Run({\"config\", \"get\", \"maxmemory\"}),\n              RespArray(ElementsAre(\"maxmemory\", \"536870912\")));\n}\n\nTEST_F(ServerFamilyTest, CommandDocsOk) {\n  EXPECT_THAT(Run({\"command\", \"docs\"}), ErrArg(\"COMMAND DOCS Not Implemented\"));\n}\n\nTEST_F(ServerFamilyTest, PubSubCommandErr) {\n  // Check conditions only in non cluster mode\n  if (auto cluster_mode = absl::GetFlag(FLAGS_cluster_mode); cluster_mode == \"\") {\n    EXPECT_THAT(Run({\"PUBSUB\", \"SHARDCHANNELS\"}),\n                ErrArg(\"PUBSUB SHARDCHANNELS is not supported in non cluster mode\"));\n    EXPECT_THAT(Run({\"PUBSUB\", \"SHARDNUMSUB\"}),\n                ErrArg(\"PUBSUB SHARDNUMSUB is not supported in non cluster mode\"));\n  }\n  EXPECT_THAT(Run({\"PUBSUB\", \"INVALIDSUBCOMMAND\"}),\n              ErrArg(\"Unknown subcommand or wrong number of arguments for 'INVALIDSUBCOMMAND'. Try \"\n                     \"PUBSUB HELP.\"));\n}\n\nTEST_F(ServerFamilyTest, InfoMultipleSections) {\n  // Check that when querying multiple valid sections, both are returned non empty.\n  Run({\"set\", \"foo\", \"bar\"});  // set some data\n  auto resp = Run({\"info\", \"replication\", \"persistence\"});\n  auto info = resp.GetString();\n  EXPECT_NE(info.find(\"# Replication\"), std::string::npos);\n  EXPECT_NE(info.find(\"# Persistence\"), std::string::npos);\n}\n\nTEST_F(ServerFamilyTest, InfoMultipleSectionsInvalid) {\n  // Check that when querying a valid and an invalid section, only the valid section is returned.\n  Run({\"set\", \"foo\", \"bar\"});  // set some data\n  auto resp = Run({\"info\", \"replication\", \"invalidsection\"});\n  auto info = resp.GetString();\n  EXPECT_NE(info.find(\"# Replication\"), std::string::npos);\n  EXPECT_EQ(info.find(\"# invalidsection\"), std::string::npos);\n}\n\n// DEBUG POPULATE with val_size=0 caused SIGFPE (division by zero) in DoPopulateBatch.\nTEST_F(ServerFamilyTest, DebugPopulateZeroValSize) {\n  // val_size=0 with the default element count (1) must not crash the server.\n  auto resp = Run({\"DEBUG\", \"POPULATE\", \"1\", \"key\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"val_size must be positive\"));\n}\n\nTEST_F(ServerFamilyTest, MemoryArenaSummary) {\n  auto resp = Run({\"MEMORY\", \"ARENA\", \"SUMMARY\"});\n  const auto response = resp.GetString();\n\n  EXPECT_THAT(response, HasSubstr(\"BlockSize\"));\n\n  for (const auto shard_id : std::views::iota(0UL, shard_set->size())) {\n    EXPECT_THAT(response, HasSubstr(\"Arena statistics for thread \" + std::to_string(shard_id)));\n  }\n\n  EXPECT_THAT(response, HasSubstr(\"Arena statistics for machine\"));\n\n  resp = Run({\"MEMORY\", \"ARENA\", \"SUMMARY\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"MEMORY\", \"ARENA\", \"SUMMARY\", \"X\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"MEMORY\", \"ARENA\", \"SUMMARY\", \"BACKING\"});\n  EXPECT_THAT(resp.GetString(), HasSubstr(\"BlockSize\"));\n\n  resp = Run({\"MEMORY\", \"ARENA\", \"SUMMARY\", \"BACKING\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"MEMORY\", \"ARENA\"});\n  EXPECT_THAT(resp.GetString(), HasSubstr(\"Count\"));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/server_state.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/server_state.h\"\n\n#include <mimalloc.h>\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\n#include \"base/flag_utils.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"facade/conn_context.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/facade_stats.h\"\n#include \"server/common.h\"\n#include \"server/journal/journal.h\"\n#include \"util/listener_interface.h\"\n\nusing facade::operator\"\"_KB;\n\nABSL_FLAG(uint32_t, interpreter_per_thread, 10, \"Lua interpreters per thread\");\nABSL_FLAG(uint32_t, timeout, 0,\n          \"Close the connection after it is idle for N seconds (0 to disable)\");\nABSL_FLAG(uint32_t, send_timeout, 0,\n          \"Close the connection after it is stuck on send for N seconds (0 to disable)\");\n\nABSL_FLAG(double, rss_oom_deny_ratio, 1.25,\n          \"When the ratio between maxmemory and RSS memory exceeds this value, commands marked as \"\n          \"DENYOOM will fail with OOM error and new connections to non-admin port will be \"\n          \"rejected. Negative value disables this feature.\");\n\nABSL_FLAG(size_t, serialization_max_chunk_size, 64_KB,\n          \"Maximum size of a value that may be serialized at once during snapshotting or full \"\n          \"sync. Values bigger than this threshold will be serialized using streaming \"\n          \"serialization. 0 - to disable streaming mode\");\nABSL_FLAG(uint32_t, max_squashed_cmd_num, 100,\n          \"Max number of commands squashed in a single shard during squash optimizaiton\");\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace std::chrono_literals;\n\n__thread ServerState* ServerState::state_ = nullptr;\n\nfacade::ConnectionStats* ServerState::tl_connection_stats() {\n  return &facade::tl_facade_stats->conn_stats;\n}\n\nServerState::Stats::Stats(unsigned num_shards)\n    : tx_width_freq_arr(num_shards), squash_width_freq_arr(num_shards) {\n}\n\nServerState::Stats& ServerState::Stats::Add(const ServerState::Stats& other) {\n  static_assert(sizeof(Stats) == 26 * 8, \"Stats size mismatch\");\n\n#define ADD(x) this->x += (other.x)\n\n  ADD(eval_io_coordination_cnt);\n\n  ADD(eval_shardlocal_coordination_cnt);\n  ADD(eval_squashed_flushes);\n\n  ADD(tx_global_cnt);\n  ADD(tx_normal_cnt);\n  ADD(tx_inline_runs);\n  ADD(tx_schedule_cancel_cnt);\n\n  ADD(multi_squash_hops);\n  ADD(multi_squash_exec_hop_usec);\n  ADD(multi_squash_exec_reply_usec);\n  ADD(squashed_commands);\n  ADD(squash_stats_ignored);\n  ADD(blocking_commands_in_pipelines);\n  ADD(blocked_on_interpreter);\n  ADD(rdb_save_usec);\n  ADD(rdb_save_count);\n\n  ADD(big_value_preemptions);\n  ADD(compressed_blobs);\n\n  ADD(oom_error_cmd_cnt);\n  ADD(conn_timeout_events);\n  ADD(psync_requests_total);\n\n  if (this->tx_width_freq_arr.size() > 0) {\n    DCHECK_EQ(this->tx_width_freq_arr.size(), other.tx_width_freq_arr.size());\n    this->tx_width_freq_arr += other.tx_width_freq_arr;\n  } else {\n    this->tx_width_freq_arr = other.tx_width_freq_arr;\n  }\n  if (this->squash_width_freq_arr.size() > 0) {\n    DCHECK_EQ(this->squash_width_freq_arr.size(), other.squash_width_freq_arr.size());\n    this->squash_width_freq_arr += other.squash_width_freq_arr;\n  } else {\n    this->squash_width_freq_arr = other.squash_width_freq_arr;\n  }\n\n  ADD(stored_cmd_bytes);\n  return *this;\n#undef ADD\n}\n\nvoid MonitorsRepo::Add(facade::Connection* connection) {\n  VLOG(1) << \"register connection \"\n          << \" at address 0x\" << std::hex << (const void*)connection << \" for thread \"\n          << util::ProactorBase::me()->GetPoolIndex();\n\n  monitors_.push_back(connection);\n}\n\nvoid MonitorsRepo::Remove(const facade::Connection* conn) {\n  auto it = std::find_if(monitors_.begin(), monitors_.end(),\n                         [&conn](const auto& val) { return val == conn; });\n  if (it != monitors_.end()) {\n    VLOG(1) << \"removing connection 0x\" << std::hex << conn << \" releasing token\";\n    monitors_.erase(it);\n  } else {\n    VLOG(1) << \"no connection 0x\" << std::hex << conn << \" found in the registered list here\";\n  }\n}\n\nvoid MonitorsRepo::NotifyChangeCount(bool added) {\n  if (added) {\n    ++global_count_;\n  } else {\n    DCHECK(global_count_ > 0);\n    --global_count_;\n  }\n}\n\nServerState::ServerState() : interpreter_mgr_{absl::GetFlag(FLAGS_interpreter_per_thread)} {\n  CHECK(mi_heap_get_backing() == mi_heap_get_default());\n\n  mi_heap_t* tlh = mi_heap_new();\n  init_zmalloc_threadlocal(tlh);\n  data_heap_ = tlh;\n\n  UpdateFromFlags();\n}\n\nServerState::~ServerState() {\n  watcher_fiber_.JoinIfNeeded();\n}\n\nvoid ServerState::Init(uint32_t thread_index, uint32_t num_shards,\n                       util::ListenerInterface* main_listener, acl::UserRegistry* registry) {\n  state_ = new ServerState();\n  state_->gstate_ = GlobalState::ACTIVE;\n  state_->thread_index_ = thread_index;\n  state_->user_registry = registry;\n  state_->stats = Stats(num_shards);\n  if (main_listener) {\n    state_->watcher_fiber_ = util::fb2::Fiber(\n        util::fb2::Launch::post, \"ConnectionsWatcher\",\n        [state = state_, main_listener] { state->ConnectionsWatcherFb(main_listener); });\n  }\n}\n\nvoid ServerState::Destroy() {\n  delete state_;\n  state_ = nullptr;\n}\n\nvoid ServerState::EnterLameDuck() {\n  gstate_ = GlobalState::SHUTTING_DOWN;\n  watcher_cv_.notify_all();\n}\n\nServerState::MemoryUsageStats ServerState::GetMemoryUsage(uint64_t now_ns) {\n  static constexpr uint64_t kCacheEveryNs = 1000;\n  if (now_ns > used_mem_last_update_ + kCacheEveryNs) {\n    used_mem_last_update_ = now_ns;\n    memory_stats_cached_.used_mem = used_mem_current.load(std::memory_order_relaxed);\n    memory_stats_cached_.rss_mem = rss_mem_current.load(std::memory_order_relaxed);\n  }\n  return memory_stats_cached_;\n}\n\nbool ServerState::AllowInlineScheduling() const {\n  // We can't allow inline scheduling during a full sync, because then journaling transactions\n  // will be scheduled before RdbLoader::LoadItemsBuffer is finished. We can't use the regular\n  // locking mechanism because RdbLoader is not using transactions.\n  if (gstate_ == GlobalState::LOADING)\n    return false;\n\n  // Journal callbacks can preempt; This means we have to disallow inline scheduling\n  // because then we might interleave the callbacks loop from an inlined-scheduled command\n  // and a normally-scheduled command.\n  // The problematic loop is in JournalSlice::AddLogRecord, going over all the callbacks.\n\n  if (journal::HasRegisteredCallbacks())\n    return false;\n\n  return true;\n}\n\nvoid ServerState::SetPauseState(ClientPause state, bool start) {\n  client_pauses_[int(state)] += (start ? 1 : -1);\n  if (!client_pauses_[int(state)]) {\n    client_pause_ec_.notifyAll();\n  }\n}\n\nvoid ServerState::AwaitPauseState(bool is_write) {\n  client_pause_ec_.await([is_write, this]() {\n    return client_pauses_[int(ClientPause::ALL)] == 0 &&\n           (!is_write || client_pauses_[int(ClientPause::WRITE)] == 0);\n  });\n}\n\nvoid ServerState::DecommitMemory(uint8_t flags) {\n  if (flags & kDataHeap) {\n    mi_heap_collect(data_heap(), true);\n  }\n  if (flags & kBackingHeap) {\n    mi_heap_collect(mi_heap_get_backing(), true);\n  }\n\n  if (flags & kGlibcmalloc) {\n    // trims the memory (reduces RSS usage) from the malloc allocator. Does not present in\n    // MUSL lib.\n#ifdef __GLIBC__\n// There is an issue with malloc_trim and sanitizers because the asan replace malloc but is not\n// aware of malloc_trim which causes malloc_trim to segfault because it's not initialized properly\n#ifndef ABSL_HAVE_ADDRESS_SANITIZER\n    malloc_trim(0);\n#endif\n#endif\n  }\n}\n\nvoid ServerState::UpdateFromFlags() {\n  rss_oom_deny_ratio = absl::GetFlag(FLAGS_rss_oom_deny_ratio);\n  serialization_max_chunk_size = absl::GetFlag(FLAGS_serialization_max_chunk_size);\n  max_squash_cmd_num = absl::GetFlag(FLAGS_max_squashed_cmd_num);\n}\n\nvector<string> ServerState::GetMutableFlagNames() {\n  return base::GetFlagNames(FLAGS_rss_oom_deny_ratio, FLAGS_serialization_max_chunk_size,\n                            FLAGS_max_squashed_cmd_num);\n}\n\nInterpreter* ServerState::BorrowInterpreter() {\n  stats.blocked_on_interpreter++;\n  auto* ptr = interpreter_mgr_.Get();\n  stats.blocked_on_interpreter--;\n  return ptr;\n}\n\nvoid ServerState::ReturnInterpreter(Interpreter* ir) {\n  interpreter_mgr_.Return(ir);\n}\n\nvoid ServerState::FlushScriptCache() {\n  cached_script_params_.clear();\n  interpreter_mgr_.Reset();\n}\n\nvoid ServerState::AlterInterpreters(std::function<void(Interpreter*)> modf) {\n  interpreter_mgr_.Alter(std::move(modf));\n}\n\nServerState* ServerState::SafeTLocal() {\n  // https://stackoverflow.com/a/75622732\n  asm volatile(\"\");\n  return state_;\n}\n\nbool ServerState::ShouldLogSlowCmd(unsigned latency_usec) const {\n  return slow_log_shard_.IsEnabled() && latency_usec >= log_slower_than_usec;\n}\n\nvoid ServerState::ConnectionsWatcherFb(util::ListenerInterface* main) {\n  optional<facade::Connection::WeakRef> last_reference;\n\n  while (true) {\n    util::fb2::NoOpLock noop;\n    if (watcher_cv_.wait_for(noop, 1s, [this] { return gstate_ == GlobalState::SHUTTING_DOWN; })) {\n      break;\n    }\n\n    const uint32_t timeout = absl::GetFlag(FLAGS_timeout);\n    const uint32_t send_timeout = absl::GetFlag(FLAGS_send_timeout);\n    VLOG(1) << \"ConnectionsWatcherFb: timeout=\" << timeout << \", send_timeout=\" << send_timeout;\n\n    if (timeout == 0 && send_timeout == 0) {\n      continue;\n    }\n\n    facade::Connection* from = nullptr;\n    if (last_reference && !last_reference->IsExpired()) {\n      from = last_reference->Get();\n    }\n\n    // We use weak refs, because ShutdownSelf below can potentially block the fiber,\n    // and during this time some of the connections might be destroyed. Weak refs allow checking\n    // validity of each connection.\n    vector<facade::Connection::WeakRef> conn_refs;\n\n    auto cb = [&](unsigned thread_index, util::Connection* conn) {\n      facade::Connection* dfly_conn = static_cast<facade::Connection*>(conn);\n      using Phase = facade::Connection::Phase;\n      auto phase = dfly_conn->phase();\n      bool is_replica = true;\n      if (dfly_conn->cntx()) {\n        is_replica = dfly_conn->cntx()->replica_conn;\n      }\n\n      bool idle_read = timeout != 0 && !is_replica && phase == Phase::READ_SOCKET &&\n                       dfly_conn->idle_time() > timeout;\n      bool stuck_sending = send_timeout != 0 && !is_replica && dfly_conn->IsSending() &&\n                           dfly_conn->GetSendWaitTimeSec() > send_timeout;\n\n      VLOG(2) << \"Connection check: \" << dfly_conn->GetClientInfo()\n              << \", phase=\" << static_cast<int>(phase) << \", idle_time=\" << dfly_conn->idle_time()\n              << \", is_replica=\" << is_replica << \", is_sending=\" << dfly_conn->IsSending()\n              << \", idle_read=\" << idle_read << \", stuck_sending=\" << stuck_sending;\n\n      if (idle_read || stuck_sending) {\n        conn_refs.push_back(dfly_conn->Borrow());\n      }\n    };\n\n    util::Connection* next = main->TraverseConnectionsOnThread(cb, 100, from);\n    if (next) {\n      last_reference = static_cast<facade::Connection*>(next)->Borrow();\n    } else {\n      last_reference.reset();\n    }\n\n    VLOG(1) << \"Found \" << conn_refs.size() << \" connections to close due to timeout\";\n    for (auto& ref : conn_refs) {\n      facade::Connection* conn = ref.Get();\n      if (conn) {\n        VLOG(1) << \"Closing connection due to timeout: \" << conn->GetClientInfo();\n        conn->ShutdownSelfBlocking();\n        stats.conn_timeout_events++;\n      }\n    }\n  }\n}\n\nvoid ServerState::UnsubscribeSlotsAndUpdateChannelStore(const ChannelStore::ChannelsSubMap& sub_map,\n                                                        ChannelStore* replacement) {\n  channel_store_->UnsubscribeConnectionsFromDeletedSlots(sub_map, thread_index_);\n  channel_store_ = replacement;\n}\n\nvoid ServerState::RecordCmd(bool is_main_conn) {\n  if (is_main_conn) {\n    ++tl_connection_stats()->command_cnt_main;\n  } else {\n    ++tl_connection_stats()->command_cnt_other;\n  }\n  qps_.Inc();\n}\n}  // end of namespace dfly\n"
  },
  {
    "path": "src/server/server_state.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <optional>\n#include <valarray>\n#include <vector>\n\n#include \"base/histogram.h\"\n#include \"core/interpreter.h\"\n#include \"server/acl/acl_log.h\"\n#include \"server/channel_store.h\"\n#include \"server/common_types.h\"\n#include \"server/script_mgr.h\"\n#include \"server/slowlog.h\"\n#include \"util/sliding_counter.h\"\n\ntypedef struct mi_heap_s mi_heap_t;\n\nnamespace facade {\nclass Connection;\nstruct ConnectionStats;\n}  // namespace facade\n\nnamespace util {\nclass ListenerInterface;\n}\n\nnamespace dfly {\n\nnamespace acl {\nclass UserRegistry;\n}  // namespace acl\n\n// This would be used as a thread local storage of sending\n// monitor messages.\n// Each thread will have its own list of all the connections that are\n// used for monitoring. When a connection is set to monitor it would register\n// itself to this list on all i/o threads. When a new command is dispatched,\n// and this list is not empty, it would send in the same thread context as then\n// thread that registered here the command.\n// Note about performance: we are assuming that we would not have many connections\n// that are registered here. This is not pub sub where it must be high performance\n// and may support many to many with tens or more of connections. It is assumed that\n// since monitoring is for debugging only, we would have less than 1 in most cases.\n// Also note that we holding this list on the thread level since this is the context\n// at which this would run. It also minimized the number of copied for this list.\nclass MonitorsRepo {\n public:\n  using MonitorVec = std::vector<facade::Connection*>;\n\n  // This function adds a new connection to be monitored. This function only add\n  // new connection that belong to this thread! Must not be called outside of this\n  // thread context\n  void Add(facade::Connection* conn);\n\n  // This function remove a connection what was monitored. This function only removes\n  // a connection that belong to this thread! Must not be called outside of this\n  // thread context\n  void Remove(const facade::Connection* conn);\n\n  // We have for each thread the total number of monitors in the application.\n  // So this call is thread safe since we hold a copy of this for each thread.\n  // If this return true, then we don't need to run the monitor operation at all.\n  bool Empty() const {\n    return global_count_ == 0u;\n  }\n\n  // This function is run on all threads to either increment or decrement the \"shared\" counter\n  // of the monitors - it must be called as part of removing a monitor (for example\n  // when a connection is closed).\n  void NotifyChangeCount(bool added);\n\n  std::size_t Size() const {\n    return monitors_.size();\n  }\n\n  const MonitorVec& monitors() const {\n    return monitors_;\n  }\n\n private:\n  MonitorVec monitors_;            // save connections belonging to this thread only!\n  unsigned int global_count_ = 0;  // by global its means that we count the monitor for all threads\n};\n\nenum class ClientPause { WRITE, ALL };\n\n// Present in every server thread. This class differs from EngineShard. The latter manages\n// state around engine shards while the former represents coordinator/connection state.\n// There may be threads that handle engine shards but not IO, there may be threads that handle IO\n// but not engine shards and there can be threads that handle both.\n// Instances of ServerState are present only for threads that handle\n// IO and manage incoming connections.\nclass ServerState {  // public struct - to allow initialization.\n  ServerState(const ServerState&) = delete;\n  void operator=(const ServerState&) = delete;\n\n public:\n  struct Stats {\n    Stats(unsigned num_shards = 0);  // Default initialization should be valid for Add()\n\n    Stats(Stats&& other) = default;\n    Stats& operator=(Stats&& other) = default;\n    Stats(const Stats&) = delete;\n    Stats& operator=(const Stats& other) = delete;\n\n    Stats& Add(const Stats& other);\n\n    uint64_t tx_global_cnt = 0;\n    uint64_t tx_normal_cnt = 0;\n    uint64_t tx_inline_runs = 0;\n    uint64_t tx_schedule_cancel_cnt = 0;\n\n    uint64_t eval_io_coordination_cnt = 0;\n    uint64_t eval_shardlocal_coordination_cnt = 0;\n    uint64_t eval_squashed_flushes = 0;\n\n    uint64_t multi_squash_hops = 0;\n    uint64_t multi_squash_exec_hop_usec = 0;\n    uint64_t multi_squash_exec_reply_usec = 0;\n    uint64_t squashed_commands = 0;\n    uint64_t squash_stats_ignored = 0;\n    uint64_t blocking_commands_in_pipelines = 0;\n    uint64_t blocked_on_interpreter = 0;\n\n    uint64_t rdb_save_usec = 0;\n    uint64_t rdb_save_count = 0;\n\n    uint64_t big_value_preemptions = 0;\n    uint64_t compressed_blobs = 0;\n\n    // Number of times we rejected command dispatch due to OOM condition.\n    uint64_t oom_error_cmd_cnt = 0;\n    uint32_t conn_timeout_events = 0;\n    uint64_t psync_requests_total = 0;\n    std::valarray<uint64_t> tx_width_freq_arr, squash_width_freq_arr;\n\n    // Memory size of stored commands during multi-exec in connections\n    size_t stored_cmd_bytes = 0;\n  };\n\n  // Unsafe version.\n  // Do not use after fiber migration because it can cause a data race.\n  static ServerState* tlocal() {\n    return state_;\n  }\n\n  // Safe version.\n  // Calls to tlocal() before and after a fiber migrates to a different thread may both\n  // return the thread local of the thread that run the fiber before the migration. Use this\n  // function to avoid this and access the correct thread local after the migration.\n  static ServerState* __attribute__((noinline)) SafeTLocal();\n\n  static facade::ConnectionStats* tl_connection_stats();\n\n  ServerState();\n  ~ServerState();\n\n  static void Init(uint32_t thread_index, uint32_t num_shards,\n                   util::ListenerInterface* main_listener, acl::UserRegistry* registry);\n  static void Destroy();\n\n  void EnterLameDuck();\n\n  void TxCountInc() {\n    ++live_transactions_;\n  }\n\n  void TxCountDec() {\n    --live_transactions_;  // can go negative since we can start on one thread and end on another.\n  }\n\n  int64_t live_transactions() const {\n    return live_transactions_;\n  }\n\n  GlobalState gstate() const {\n    return gstate_;\n  }\n\n  void set_gstate(GlobalState s) {\n    gstate_ = s;\n  }\n\n  struct MemoryUsageStats {\n    uint64_t used_mem = 0;\n    uint64_t rss_mem = 0;\n  };\n\n  MemoryUsageStats GetMemoryUsage(uint64_t now_ns);\n\n  bool AllowInlineScheduling() const;\n\n  // Borrow interpreter from interpreter pool, return it with ReturnInterpreter.\n  // Will block if no interpreters are aviable. Use with caution!\n  Interpreter* BorrowInterpreter();\n\n  // Return interpreter to internal manager to be re-used.\n  void ReturnInterpreter(Interpreter*);\n\n  void FlushScriptCache();\n\n  // Invoke function on all free interpreters. They are marked atomically as\n  // used and the function is allowed to suspend.\n  void AlterInterpreters(std::function<void(Interpreter*)> modf);\n\n  // Returns sum of all requests in the last 6 seconds\n  // (not including the current one).\n  uint32_t MovingSum6() const {\n    return qps_.SumTail();\n  }\n\n  void RecordCmd(bool is_main_conn);\n\n  // data heap used by zmalloc and shards.\n  mi_heap_t* data_heap() {\n    return data_heap_;\n  }\n\n  constexpr MonitorsRepo& Monitors() {\n    return monitors_;\n  }\n\n  const absl::flat_hash_map<std::string, base::Histogram>& call_latency_histos() const {\n    return call_latency_histos_;\n  }\n\n  void RecordCallLatency(std::string_view sha, uint64_t latency_usec) {\n    call_latency_histos_[sha].Add(latency_usec);\n  }\n\n  void SetScriptParams(const ScriptMgr::ScriptKey& key, ScriptMgr::ScriptParams params) {\n    cached_script_params_[key] = params;\n  }\n\n  std::optional<ScriptMgr::ScriptParams> GetScriptParams(const ScriptMgr::ScriptKey& key) {\n    auto it = cached_script_params_.find(key);\n    return it != cached_script_params_.end() ? std::optional{it->second} : std::nullopt;\n  }\n\n  uint32_t thread_index() const {\n    return thread_index_;\n  }\n\n  ChannelStore* channel_store() const {\n    return channel_store_;\n  }\n\n  void UpdateChannelStore(ChannelStore* replacement) {\n    channel_store_ = replacement;\n  }\n\n  void UnsubscribeSlotsAndUpdateChannelStore(const ChannelStore::ChannelsSubMap& sub_map,\n                                             ChannelStore* replacement);\n\n  bool ShouldLogSlowCmd(unsigned latency_usec) const;\n\n  Stats stats;\n\n  bool is_master = true;\n  uint32_t log_slower_than_usec = UINT32_MAX;\n  uint32_t max_squash_cmd_num = 32;\n\n  acl::UserRegistry* user_registry;\n\n  acl::AclLog acl_log;\n\n  // Starts or ends a `CLIENT PAUSE` command. @state controls whether\n  // this is pausing only writes or every command, @start controls\n  // whether this is starting or ending the pause.\n  void SetPauseState(ClientPause state, bool start);\n\n  // Awaits until the pause is over and the command can execute.\n  // @is_write controls whether the command is a write command or not.\n  void AwaitPauseState(bool is_write);\n\n  bool IsPaused() const {\n    return (client_pauses_[0] + client_pauses_[1]) > 0;\n  }\n\n  SlowLogShard& GetSlowLog() {\n    return slow_log_shard_;\n  };\n\n  // Tries to returns as much RSS memory as possible to the OS.\n  // Decommits 3 possible heaps according to the flags.\n  // For decommit_glibcmalloc the heap is global for the process, for others it's specific only\n  // for this thread.\n  enum : uint8_t {\n    kDataHeap = 1,\n    kBackingHeap = 2,\n    kGlibcmalloc = 4,\n    kAllMemory = kDataHeap | kBackingHeap | kGlibcmalloc\n  };\n  void DecommitMemory(uint8_t flags);\n\n  void UpdateFromFlags();                                 // Update configration from flags\n  static std::vector<std::string> GetMutableFlagNames();  // Dependencies of UpdateFromFlags\n\n  // Exec descriptor frequency count for this thread.\n  absl::flat_hash_map<std::string, unsigned> exec_freq_count;\n  double rss_oom_deny_ratio;\n  size_t serialization_max_chunk_size;\n\n private:\n  // A fiber constantly watching connections on the main listener.\n  void ConnectionsWatcherFb(util::ListenerInterface* main);\n\n  int64_t live_transactions_ = 0;\n  SlowLogShard slow_log_shard_;\n  mi_heap_t* data_heap_;\n\n  InterpreterManager interpreter_mgr_;\n  absl::flat_hash_map<ScriptMgr::ScriptKey, ScriptMgr::ScriptParams> cached_script_params_;\n\n  ChannelStore* channel_store_;\n\n  GlobalState gstate_ = GlobalState::ACTIVE;\n\n  // To support concurrent `CLIENT PAUSE commands` correctly, we store the amount\n  // of current CLIENT PAUSE commands that are in effect. Blocked execution fibers\n  // should subscribe to `client_pause_ec_` through `AwaitPauseState` to be\n  // notified when the break is over.\n  int client_pauses_[2] = {};\n  util::fb2::EventCount client_pause_ec_;\n\n  // Monitors connections. Currently responsible for closing timed out connections.\n  util::fb2::Fiber watcher_fiber_;\n  util::fb2::CondVarAny watcher_cv_;\n\n  using Counter = util::SlidingCounter<7>;\n  Counter qps_;\n\n  MonitorsRepo monitors_;\n\n  absl::flat_hash_map<std::string, base::Histogram> call_latency_histos_;\n  uint32_t thread_index_ = 0;\n\n  uint64_t used_mem_last_update_ = 0;\n  MemoryUsageStats memory_stats_cached_;  // thread local cache of used and rss memory current\n\n  static __thread ServerState* state_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/set_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/set_family.h\"\n\n#include \"server/family_utils.h\"\n\nextern \"C\" {\n#include \"redis/intset.h\"\n#include \"redis/redis_aux.h\"\n#include \"redis/util.h\"  // for string2ll\n}\n\n#include \"base/cycle_clock.h\"\n#include \"base/logging.h\"\n#include \"base/stl_util.h\"\n#include \"core/detail/listpack_wrap.h\"\n#include \"core/string_set.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/container_utils.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/journal/journal.h\"\n#include \"server/transaction.h\"\n\nnamespace dfly {\n\nusing namespace facade;\n\nusing namespace std;\n\nusing ResultStringVec = vector<OpResult<StringVec>>;\nusing ResultSetView = OpResult<absl::flat_hash_set<std::string_view>>;\nusing SvArray = vector<std::string_view>;\nusing SetType = pair<void*, unsigned>;\n\nnamespace {\n\n// Possible sources of new set entries\nusing NewEntries = std::variant<ArgSlice, absl::flat_hash_set<std::string_view>>;\n\nauto EntriesRange(const NewEntries& entries) {\n  return base::it::Wrap(cmn::kToSV, entries);\n}\n\nconstexpr uint32_t kMaxIntSetEntries = 256;\n\nbool IsDenseEncoding(const CompactObj& co) {\n  return co.Encoding() == kEncodingStrMap2;\n}\n\nintset* IntsetAddSafe(string_view val, intset* is, bool* success, bool* added) {\n  long long llval;\n  *added = false;\n  if (!string2ll(val.data(), val.size(), &llval)) {\n    *success = false;\n    return is;\n  }\n\n  uint8_t inserted = 0;\n  is = intsetAdd(is, llval, &inserted);\n  if (inserted) {\n    *added = true;\n    *success = intsetLen(is) <= kMaxIntSetEntries;\n  } else {\n    *added = false;\n    *success = true;\n  }\n\n  return is;\n}\n\nstruct StringSetWrapper {\n  StringSetWrapper(const CompactObj& obj, const DbContext& db_cntx)\n      : StringSetWrapper(obj.RObjPtr(), db_cntx.time_now_ms) {\n    DCHECK(IsDenseEncoding(obj));\n  }\n\n  StringSetWrapper(const SetType& st, const DbContext& db_cntx)\n      : StringSetWrapper(st.first, db_cntx.time_now_ms) {\n    DCHECK_EQ(st.second, kEncodingStrMap2);\n  }\n\n  static void Init(CompactObj* obj) {\n    obj->InitRobj(OBJ_SET, kEncodingStrMap2, CompactObj::AllocateMR<StringSet>());\n  }\n\n  unsigned Add(const NewEntries& entries, uint32_t ttl_sec, bool keepttl) const {\n    unsigned res = 0;\n    string_view members[StringSet::kMaxBatchLen];\n    size_t entries_len = std::visit([](const auto& e) { return e.size(); }, entries);\n    unsigned len = 0;\n    if (ss->BucketCount() < entries_len) {\n      ss->Reserve(entries_len);\n    }\n    for (string_view member : EntriesRange(entries)) {\n      members[len++] = member;\n      if (len == StringSet::kMaxBatchLen) {\n        res += ss->AddMany(absl::MakeSpan(members, StringSet::kMaxBatchLen), ttl_sec, keepttl);\n        len = 0;\n      }\n    }\n\n    if (len) {\n      res += ss->AddMany(absl::MakeSpan(members, len), ttl_sec, keepttl);\n    }\n\n    return res;\n  }\n\n  pair<unsigned, bool> Remove(const facade::ArgRange& entries) const {\n    unsigned removed = 0;\n    for (string_view member : entries)\n      removed += ss->Erase(member);\n    return {removed, ss->Empty()};\n  }\n\n  uint64_t Scan(uint64_t curs, const ScanOpts& scan_op, StringVec* res) const {\n    uint32_t count = scan_op.limit;\n    long maxiterations = count * 10;\n\n    const auto start_cycles = base::CycleClock::Now();\n    // Approximately 100usec\n    const uint64_t timeout_cycles = base::CycleClock::Now() + base::CycleClock::Frequency() / 10000;\n\n    do {\n      auto scan_callback = [&](sds ptr) {\n        if (string_view str{ptr, sdslen(ptr)}; scan_op.Matches(str))\n          res->emplace_back(str);\n      };\n      curs = ss->Scan(curs, scan_callback);\n    } while (curs && maxiterations-- && res->size() < count &&\n             (base::CycleClock::Now() - start_cycles) < timeout_cycles);\n    return curs;\n  }\n\n  explicit operator StringSet*() const {\n    return ss;\n  }\n\n  StringSet* operator->() const {\n    return ss;\n  }\n\n  auto Range() const {\n    auto transform = [](sds ptr) { return string_view{ptr, sdslen(ptr)}; };\n    return base::it::Transform(transform, base::it::Range(ss->begin(), ss->end()));\n  }\n\n private:\n  StringSetWrapper(void* robj_ptr, uint64_t now_ms) : ss(static_cast<StringSet*>(robj_ptr)) {\n    ss->set_time(MemberTimeSeconds(now_ms));\n  }\n\n  StringSet* const ss;\n};\n\n// returns (removed, isempty)\npair<unsigned, bool> RemoveSet(const DbContext& db_context, const facade::ArgRange& vals,\n                               CompactObj* set) {\n  if (set->Encoding() == kEncodingIntSet) {\n    intset* is = (intset*)set->RObjPtr();\n    long long llval;\n\n    unsigned removed = 0;\n    for (string_view val : vals) {\n      if (!string2ll(val.data(), val.size(), &llval)) {\n        continue;\n      }\n\n      int is_removed = 0;\n      is = intsetRemove(is, llval, &is_removed);\n      removed += is_removed;\n    }\n    set->SetRObjPtr(is);\n\n    return {removed, intsetLen(is) == 0};\n  } else {\n    return StringSetWrapper{*set, db_context}.Remove(vals);\n  }\n}\n\nvoid InitSet(const NewEntries& vals, CompactObj* set) {\n  bool int_set = true;\n  long long intv;\n\n  for (string_view v : EntriesRange(vals)) {\n    if (!string2ll(v.data(), v.size(), &intv)) {\n      int_set = false;\n      break;\n    }\n  }\n\n  if (int_set) {\n    intset* is = intsetNew();\n    set->InitRobj(OBJ_SET, kEncodingIntSet, is);\n  } else {\n    StringSetWrapper::Init(set);\n  }\n}\n\nuint32_t SetTypeLen(const DbContext& db_context, const SetType& set) {\n  if (set.second == kEncodingIntSet) {\n    return intsetLen((const intset*)set.first);\n  } else {\n    return StringSetWrapper(set, db_context)->UpperBoundSize();\n  }\n}\n\nbool IsInSet(const DbContext& db_context, const SetType& st, int64_t val) {\n  if (st.second == kEncodingIntSet)\n    return intsetFind((intset*)st.first, val);\n\n  char buf[32];\n  char* next = absl::numbers_internal::FastIntToBuffer(val, buf);\n  string_view str{buf, size_t(next - buf)};\n\n  return StringSetWrapper(st, db_context)->Contains(str);\n}\n\nbool IsInSet(const DbContext& db_context, const SetType& st, string_view member) {\n  if (st.second == kEncodingIntSet) {\n    long long llval;\n    if (!string2ll(member.data(), member.size(), &llval))\n      return false;\n\n    return intsetFind((intset*)st.first, llval);\n  } else {\n    return StringSetWrapper(st, db_context)->Contains(member);\n  }\n}\n\n// returns -3 if member is not found, -1 if no ttl is associated with this member.\nint32_t GetExpiry(const DbContext& db_context, const SetType& st, string_view member) {\n  if (st.second == kEncodingIntSet) {\n    long long llval;\n    if (!string2ll(member.data(), member.size(), &llval))\n      return -3;\n\n    return -1;\n  } else {\n    StringSetWrapper ss{st, db_context};\n    auto it = ss->Find(member);\n    if (it == ss->end())\n      return -3;\n\n    return it.HasExpiry() ? it.ExpiryTime() : -1;\n  }\n}\n\n// Removes arg from result.\nvoid DiffStrSet(const DbContext& db_context, const SetType& st,\n                absl::flat_hash_set<string>* result) {\n  for (string_view entry : StringSetWrapper{st, db_context}.Range())\n    result->erase(entry);\n}\n\nvoid InterStrSet(const DbContext& db_context, const vector<SetType>& vec, StringVec* result) {\n  for (string_view str : StringSetWrapper{vec.front(), db_context}.Range()) {\n    size_t j = 1;\n    for (j = 1; j < vec.size(); ++j) {\n      if (vec[j].first != vec.front().first && !IsInSet(db_context, vec[j], str)) {\n        break;\n      }\n    }\n\n    if (j == vec.size()) {\n      result->emplace_back(str);\n    }\n  }\n}\n\ntemplate <typename C = absl::flat_hash_set<string>>\nStringVec RandMemberStrSetPicky(StringSet* strset, size_t count) {\n  C picks;\n  picks.reserve(count);\n\n  size_t tries = 0;\n  while (picks.size() < count && tries++ < count * 2) {\n    auto member = *strset->GetRandomMember();\n    picks.insert(picks.end(), {member, sdslen(member)});\n  }\n\n  if constexpr (is_same_v<StringVec, C>)\n    return picks;\n  return StringVec{make_move_iterator(picks.begin()), make_move_iterator(picks.end())};\n}\n\nStringVec RandMemberStrSet(const DbContext& db_context, const CompactObj& co,\n                           PicksGenerator& generator, size_t picks_count) {\n  CHECK(IsDenseEncoding(co));\n  StringSetWrapper strset{co, db_context};\n\n  // If the set is small, extract entries with StringSet::GetRandomMember\n  if (picks_count * 5 < strset->UpperBoundSize()) {\n    StringSet* ss(strset);\n    if (bool unique = (dynamic_cast<UniquePicksGenerator*>(&generator) != nullptr); unique)\n      return RandMemberStrSetPicky(ss, picks_count);\n    else\n      return RandMemberStrSetPicky<StringVec>(ss, picks_count);\n  }\n\n  std::unordered_map<RandomPick, std::uint32_t> times_index_is_picked;\n  for (std::size_t i = 0; i < picks_count; i++) {\n    times_index_is_picked[generator.Generate()]++;\n  }\n\n  StringVec result;\n  result.reserve(picks_count);\n\n  std::uint32_t ss_entry_index = 0;\n  for (string_view str : strset.Range()) {\n    auto it = times_index_is_picked.find(ss_entry_index++);\n    if (it != times_index_is_picked.end()) {\n      while (it->second--)\n        result.emplace_back(str);\n    }\n  }\n  /* Equal elements in the result are always successive. So, it is necessary to shuffle them */\n  absl::BitGen gen;\n  std::shuffle(result.begin(), result.end(), gen);\n\n  return result;\n}\n\nStringVec RandMemberSet(const DbContext& db_context, const CompactObj& co,\n                        PicksGenerator& generator, std::size_t picks_count) {\n  if (co.Encoding() == kEncodingIntSet) {\n    intset* is = static_cast<intset*>(co.RObjPtr());\n\n    StringVec result;\n    result.reserve(picks_count);\n\n    for (std::size_t i = 0; i < picks_count; i++) {\n      const std::size_t picked_index = generator.Generate();\n\n      int64_t value = 0;\n      CHECK_GT(intsetGet(is, picked_index, &value), std::uint8_t(0));\n\n      result.push_back(absl::StrCat(value));\n    }\n    return result;\n  }\n  return RandMemberStrSet(db_context, co, generator, picks_count);\n}\n\nvector<string> ToVec(absl::flat_hash_set<string>&& set) {\n  vector<string> result(set.size());\n  size_t i = 0;\n\n  // extract invalidates current iterator. therefore, we increment it first before extracting.\n  // hence the weird loop.\n  for (auto it = set.begin(); it != set.end();) {\n    result[i] = std::move(set.extract(it++).value());\n    ++i;\n  }\n\n  return result;\n}\n\nResultSetView UnionResultVec(const ResultStringVec& result_vec) {\n  absl::flat_hash_set<std::string_view> uniques;\n\n  for (const auto& val : result_vec) {\n    if (val || val.status() == OpStatus::SKIPPED) {\n      for (const string& s : val.value()) {\n        uniques.emplace(s);\n      }\n      continue;\n    }\n\n    if (val.status() != OpStatus::KEY_NOTFOUND) {\n      return val.status();\n    }\n  }\n\n  return uniques;\n}\n\nResultSetView DiffResultVec(const ResultStringVec& result_vec, ShardId src_shard) {\n  for (const auto& res : result_vec) {\n    if (res.status() == OpStatus::WRONG_TYPE)\n      return res.status();\n  }\n\n  absl::flat_hash_set<std::string_view> uniques;\n\n  for (const auto& val : result_vec[src_shard].value()) {\n    uniques.emplace(val);\n  }\n\n  for (unsigned i = 0; i < result_vec.size(); ++i) {\n    if (i == src_shard)\n      continue;\n\n    if (result_vec[i]) {\n      for (const string& s : result_vec[i].value()) {\n        uniques.erase(s);\n      }\n    }\n  }\n  return uniques;\n}\n\nOpResult<SvArray> InterResultVec(const ResultStringVec& result_vec, unsigned required_shard_cnt,\n                                 unsigned limit = 0) {\n  absl::flat_hash_map<std::string_view, unsigned> uniques;\n\n  for (const auto& res : result_vec) {\n    if (!res && !base::_in(res.status(), {OpStatus::SKIPPED, OpStatus::KEY_NOTFOUND}))\n      return res.status();\n  }\n\n  for (const auto& res : result_vec) {\n    if (res.status() == OpStatus::KEY_NOTFOUND)\n      return OpStatus::OK;  // empty set.\n  }\n\n  std::vector<const StringVec*> sorted_vec;\n  for (const auto& res : result_vec) {\n    if (res.status() == OpStatus::SKIPPED)\n      continue;\n    DCHECK(res);  // we handled it above.\n    sorted_vec.push_back(&res.value());\n  }\n\n  // Sort the per shard-sorted sets\n  if (!sorted_vec.empty()) {\n    std::sort(sorted_vec.begin(), sorted_vec.end(),\n              [](const auto* lhs, const auto* rhs) { return lhs->size() < rhs->size(); });\n\n    for (const string& s : *sorted_vec[0]) {\n      uniques.emplace(s, 1);\n    }\n    // Remove the smallest\n    sorted_vec.erase(sorted_vec.begin());\n\n    for (const auto& res : sorted_vec) {\n      for (const string& s : *res) {\n        auto it = uniques.find(s);\n        if (it != uniques.end()) {\n          ++it->second;\n        }\n      }\n    }\n  }\n\n  SvArray result;\n  result.reserve(uniques.size());\n\n  for (const auto& k_v : uniques) {\n    if (k_v.second == required_shard_cnt) {\n      if (limit != 0 && result.size() >= limit)\n        return result;\n      result.push_back(k_v.first);\n    }\n  }\n\n  return result;\n}\n\nSvArray ToSvArray(const absl::flat_hash_set<std::string_view>& set) {\n  SvArray result;\n  result.reserve(set.size());\n  copy(set.begin(), set.end(), back_inserter(result));\n  return result;\n}\n\n// if overwrite is true then OpAdd writes vals into the key and discards its previous value.\nOpResult<uint32_t> OpAdd(const OpArgs& op_args, std::string_view key, const NewEntries& vals,\n                         bool overwrite, bool journal_update) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto vals_it = EntriesRange(vals);\n\n  VLOG(2) << \"OpAdd(\" << key << \")\";\n\n  // overwrite - meaning we run in the context of 2-hop operation and we want\n  // to overwrite the key. However, if the set is empty it means we should delete the\n  // key if it exists.\n  if (overwrite && (vals_it.begin() == vals_it.end())) {\n    auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_SET);\n    if (res_it) {\n      db_slice.DelMutable(op_args.db_cntx, std::move(*res_it));\n      if (journal_update && op_args.shard->journal()) {\n        RecordJournal(op_args, \"DEL\"sv, ArgSlice{key});\n      }\n    }\n    return OpStatus::OK;\n  }\n\n  // We can use std::nullopt here because we check the type later.\n  // If the overwrite is true, we will call InitSet that calles SetMeta\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, std::nullopt);\n  RETURN_ON_BAD_STATUS(op_res);\n  auto& add_res = *op_res;\n\n  PrimeValue& co = add_res.it->second;\n\n  if (!add_res.is_new) {\n    // for non-overwrite case it must be set.\n    if (!overwrite && co.ObjType() != OBJ_SET)\n      return OpStatus::WRONG_TYPE;\n\n    if (overwrite)  // Overwriting the value removes expiration\n      db_slice.RemoveExpire(op_args.db_cntx.db_index, add_res.it);\n  }\n\n  if (add_res.is_new || overwrite) {\n    // If we're overwriting an existing key (not a new one), we need to remove it from\n    // search indexes first. This prevents crashes when the key is indexed (e.g., HASH or JSON).\n    if (!add_res.is_new && overwrite) {\n      RemoveKeyFromIndexesIfNeeded(key, op_args.db_cntx, co, op_args.shard);\n    }\n\n    // does not store the values, merely sets the encoding.\n    // TODO: why not store the values as well?\n    InitSet(vals, &co);\n  }\n\n  uint32_t res = 0;\n\n  if (co.Encoding() == kEncodingIntSet) {\n    intset* is = (intset*)co.RObjPtr();\n    bool success = true;\n\n    for (auto val : vals_it) {\n      bool added = false;\n      is = IntsetAddSafe(val, is, &success, &added);\n      res += added;\n\n      if (!success) {\n        co.SetRObjPtr(is);\n\n        StringSet* ss = SetFamily::ConvertToStrSet(is, intsetLen(is));\n        if (!ss) {\n          return OpStatus::OUT_OF_MEMORY;\n        }\n\n        // frees 'is' on a way.\n        co.InitRobj(OBJ_SET, kEncodingStrMap2, ss);\n        break;\n      }\n    }\n\n    if (success)\n      co.SetRObjPtr(is);\n  }\n\n  if (co.Encoding() != kEncodingIntSet) {\n    res = StringSetWrapper{co, op_args.db_cntx}.Add(vals, UINT32_MAX, false);\n  }\n\n  // TODO: consider optimization to record real command if the replica is in stable_sync state\n  // and there is no slot migration process going on.\n  if (journal_update && op_args.shard->journal()) {\n    if (overwrite) {\n      RecordJournal(op_args, \"DEL\"sv, ArgSlice{key});\n    }\n    size_t size = visit([](auto& c) { return c.size(); }, vals);\n    vector<string_view> mapped(size + 1);\n    mapped[0] = key;\n    std::copy(vals_it.begin(), vals_it.end(), mapped.begin() + 1);\n    RecordJournal(op_args, \"SADD\"sv, mapped);\n  }\n  return res;\n}\n\nOpResult<uint32_t> OpAddEx(const OpArgs& op_args, string_view key, uint32_t ttl_sec,\n                           const NewEntries& vals, bool keepttl) {\n  auto& db_slice = op_args.GetDbSlice();\n\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, OBJ_SET);\n  RETURN_ON_BAD_STATUS(op_res);\n  auto& add_res = *op_res;\n\n  CompactObj& co = add_res.it->second;\n\n  if (add_res.is_new) {\n    StringSetWrapper::Init(&co);\n  } else {\n    // Update stats and trigger any handle the old value if needed.\n    if (co.Encoding() == kEncodingIntSet) {\n      intset* is = (intset*)co.RObjPtr();\n      StringSet* ss = SetFamily::ConvertToStrSet(is, intsetLen(is));\n      if (!ss) {\n        return OpStatus::OUT_OF_MEMORY;\n      }\n      co.InitRobj(OBJ_SET, kEncodingStrMap2, ss);\n    }\n\n    CHECK(IsDenseEncoding(co));\n  }\n\n  return StringSetWrapper{co, op_args.db_cntx}.Add(vals, ttl_sec, keepttl);\n}\n\nOpResult<uint32_t> OpRem(const OpArgs& op_args, string_view key, const facade::ArgRange& vals,\n                         bool journal_rewrite) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto find_res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_SET);\n  if (!find_res) {\n    return find_res.status();\n  }\n\n  CompactObj& co = find_res->it->second;\n  auto [removed, isempty] = RemoveSet(op_args.db_cntx, vals, &co);\n\n  find_res->post_updater.Run();\n\n  if (isempty) {\n    db_slice.Del(op_args.db_cntx, find_res->it);\n  }\n  if (removed && journal_rewrite && op_args.shard->journal()) {\n    vector<string_view> mapped(vals.Size() + 1);\n    mapped[0] = key;\n    std::copy(vals.begin(), vals.end(), mapped.begin() + 1);\n    RecordJournal(op_args, \"SREM\"sv, mapped);\n  }\n\n  return removed;\n}\n\n// For SMOVE. Comprised of 2 transactional steps: Find and Commit.\n// After Find Mover decides on the outcome of the operation, applies it in commit\n// and reports the result.\nclass Mover {\n public:\n  Mover(string_view src, string_view dest, string_view member, bool journal_rewrite)\n      : src_(src), dest_(dest), member_(member), journal_rewrite_(journal_rewrite) {\n  }\n\n  void Find(Transaction* t);\n  OpResult<unsigned> Commit(Transaction* t);\n\n private:\n  OpStatus OpFind(Transaction* t, EngineShard* es);\n  OpStatus OpMutate(Transaction* t, EngineShard* es);\n\n  string_view src_, dest_, member_;\n  OpResult<bool> found_[2];\n  bool journal_rewrite_;\n};\n\nOpStatus Mover::OpFind(Transaction* t, EngineShard* es) {\n  auto& db_slice = t->GetDbSlice(es->shard_id());\n  ShardArgs largs = t->GetShardArgs(es->shard_id());\n\n  // In case both src and dest are in the same shard, largs size will be 2.\n  DCHECK_LE(largs.Size(), 2u);\n\n  for (auto k : largs) {\n    unsigned index = (k == src_) ? 0 : 1;\n    auto res = db_slice.FindReadOnly(t->GetDbContext(), k, OBJ_SET);\n    if (res && index == 0) {  // successful src find.\n      DCHECK(!res->is_done());\n      const CompactObj& val = res.value()->second;\n      SetType st{val.RObjPtr(), val.Encoding()};\n      found_[0] = IsInSet(t->GetDbContext(), st, member_);\n    } else {\n      found_[index] = res.status();\n    }\n  }\n\n  return OpStatus::OK;\n}\n\nOpStatus Mover::OpMutate(Transaction* t, EngineShard* es) {\n  ShardArgs largs = t->GetShardArgs(es->shard_id());\n  DCHECK_LE(largs.Size(), 2u);\n\n  OpArgs op_args = t->GetOpArgs(es);\n  for (auto k : largs) {\n    if (k == src_) {\n      CHECK_EQ(1u,\n               OpRem(op_args, k, ArgSlice{member_}, journal_rewrite_).value());  // must succeed.\n    } else {\n      DCHECK_EQ(k, dest_);\n      OpAdd(op_args, k, ArgSlice(&member_, 1), false, journal_rewrite_);\n    }\n  }\n\n  return OpStatus::OK;\n}\n\nvoid Mover::Find(Transaction* t) {\n  // non-concluding step.\n  t->Execute([this](Transaction* t, EngineShard* es) { return this->OpFind(t, es); }, false);\n}\n\nOpResult<unsigned> Mover::Commit(Transaction* t) {\n  OpResult<unsigned> res;\n  bool noop = false;\n\n  if (found_[0].status() == OpStatus::WRONG_TYPE || found_[1].status() == OpStatus::WRONG_TYPE) {\n    res = OpStatus::WRONG_TYPE;\n    noop = true;\n  } else if (!found_[0].value_or(false)) {\n    res = 0;\n    noop = true;\n  } else {\n    res = 1;\n    noop = (src_ == dest_);\n  }\n\n  if (noop) {\n    t->Conclude();\n  } else {\n    t->Execute([this](Transaction* t, EngineShard* es) { return this->OpMutate(t, es); }, true);\n  }\n\n  return res;\n}\n\n// Read-only OpUnion op on sets.\nOpResult<StringVec> OpUnion(const OpArgs& op_args, ShardArgs::Iterator start,\n                            ShardArgs::Iterator end) {\n  DCHECK(start != end);\n  absl::flat_hash_set<string> uniques;\n\n  for (; start != end; ++start) {\n    auto find_res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, *start, OBJ_SET);\n    if (find_res) {\n      const PrimeValue& pv = find_res.value()->second;\n      if (IsDenseEncoding(pv)) {\n        StringSet* ss = (StringSet*)pv.RObjPtr();\n        ss->set_time(MemberTimeSeconds(op_args.db_cntx.time_now_ms));\n      }\n      container_utils::IterateSet(pv, [&uniques](container_utils::ContainerEntry ce) {\n        uniques.emplace(ce.ToString());\n        return true;\n      });\n      continue;\n    }\n\n    if (find_res.status() != OpStatus::KEY_NOTFOUND) {\n      return find_res.status();\n    }\n  }\n\n  return ToVec(std::move(uniques));\n}\n\n// Read-only OpDiff op on sets.\nOpResult<StringVec> OpDiff(const OpArgs& op_args, ShardArgs::Iterator start,\n                           ShardArgs::Iterator end) {\n  auto& db_slice = op_args.GetDbSlice();\n  DCHECK(start != end);\n  DVLOG(1) << \"OpDiff from \" << *start;\n  auto find_res = db_slice.FindReadOnly(op_args.db_cntx, *start, OBJ_SET);\n\n  if (!find_res) {\n    return find_res.status();\n  }\n\n  absl::flat_hash_set<string> uniques;\n  const PrimeValue& pv = find_res.value()->second;\n  if (IsDenseEncoding(pv)) {\n    StringSet* ss = (StringSet*)pv.RObjPtr();\n    ss->set_time(MemberTimeSeconds(op_args.db_cntx.time_now_ms));\n  }\n\n  container_utils::IterateSet(pv, [&uniques](container_utils::ContainerEntry ce) {\n    uniques.emplace(ce.ToString());\n    return true;\n  });\n\n  DCHECK(!uniques.empty());  // otherwise the key would not exist.\n\n  for (++start; start != end; ++start) {\n    auto diff_res = db_slice.FindReadOnly(op_args.db_cntx, *start, OBJ_SET);\n    if (!diff_res) {\n      if (diff_res.status() == OpStatus::WRONG_TYPE) {\n        return OpStatus::WRONG_TYPE;\n      }\n      continue;  // KEY_NOTFOUND\n    }\n\n    SetType st2{diff_res.value()->second.RObjPtr(), diff_res.value()->second.Encoding()};\n    if (st2.second == kEncodingIntSet) {\n      int ii = 0;\n      intset* is = (intset*)st2.first;\n      int64_t intele;\n      char buf[32];\n\n      while (intsetGet(is, ii++, &intele)) {\n        char* next = absl::numbers_internal::FastIntToBuffer(intele, buf);\n        uniques.erase(string_view{buf, size_t(next - buf)});\n      }\n    } else {\n      DiffStrSet(op_args.db_cntx, st2, &uniques);\n    }\n  }\n\n  return ToVec(std::move(uniques));\n}\n\n// Read-only OpInter op on sets.\nOpResult<StringVec> OpInter(const Transaction* t, EngineShard* es, bool remove_first) {\n  auto& db_slice = t->GetDbSlice(es->shard_id());\n  ShardArgs args = t->GetShardArgs(es->shard_id());\n  auto it = args.begin();\n  if (remove_first) {\n    ++it;\n  }\n  DCHECK(it != args.end());\n\n  StringVec result;\n  if (args.Size() == 1 + unsigned(remove_first)) {\n    auto find_res = db_slice.FindReadOnly(t->GetDbContext(), *it, OBJ_SET);\n    if (!find_res)\n      return find_res.status();\n\n    const PrimeValue& pv = find_res.value()->second;\n    if (IsDenseEncoding(pv)) {\n      StringSet* ss = (StringSet*)pv.RObjPtr();\n      ss->set_time(MemberTimeSeconds(t->GetDbContext().time_now_ms));\n    }\n\n    result.reserve(pv.Size());\n    container_utils::IterateSet(find_res.value()->second,\n                                [&result](container_utils::ContainerEntry ce) {\n                                  result.push_back(ce.ToString());\n                                  return true;\n                                });\n    return result;\n  }\n\n  vector<SetType> sets(args.Size() - int(remove_first));\n\n  OpStatus status = OpStatus::OK;\n  unsigned index = 0;\n  for (; it != args.end(); ++it) {\n    auto& dest = sets[index++];\n    auto find_res = db_slice.FindReadOnly(t->GetDbContext(), *it, OBJ_SET);\n    if (!find_res) {\n      if (status == OpStatus::OK || status == OpStatus::KEY_NOTFOUND ||\n          find_res.status() != OpStatus::KEY_NOTFOUND) {\n        status = find_res.status();\n      }\n      continue;\n    }\n    const PrimeValue& pv = find_res.value()->second;\n    void* ptr = pv.RObjPtr();\n    dest = make_pair(ptr, pv.Encoding());\n  }\n\n  if (status != OpStatus::OK)\n    return status;\n\n  auto comp = [db_contx = t->GetDbContext()](const SetType& left, const SetType& right) {\n    return SetTypeLen(db_contx, left) < SetTypeLen(db_contx, right);\n  };\n\n  std::sort(sets.begin(), sets.end(), comp);\n\n  int encoding = sets.front().second;\n  result.reserve(SetTypeLen(t->GetDbContext(), sets.front()));\n  if (encoding == kEncodingIntSet) {\n    int ii = 0;\n    intset* is = (intset*)sets.front().first;\n    int64_t intele;\n\n    while (intsetGet(is, ii++, &intele)) {\n      size_t j = 1;\n      for (j = 1; j < sets.size(); j++) {\n        if (sets[j].first != is && !IsInSet(t->GetDbContext(), sets[j], intele))\n          break;\n      }\n\n      /* Only take action when all sets contain the member */\n      if (j == sets.size()) {\n        result.push_back(absl::StrCat(intele));\n      }\n    }\n  } else {\n    InterStrSet(t->GetDbContext(), sets, &result);\n  }\n\n  return result;\n}\n\nOpResult<StringVec> OpRandMember(const OpArgs& op_args, std::string_view key, int count) {\n  auto find_res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_SET);\n  if (!find_res)\n    return find_res.status();\n\n  const CompactObj& co = find_res.value()->second;\n\n  const std::uint32_t size = co.Size();\n  const bool picks_are_unique = count >= 0;\n  const std::uint32_t picks_count =\n      picks_are_unique ? std::min(static_cast<std::uint32_t>(count), size) : std::abs(count);\n\n  auto generator = [picks_are_unique, picks_count, size]() -> std::unique_ptr<PicksGenerator> {\n    if (picks_are_unique) {\n      return std::make_unique<UniquePicksGenerator>(picks_count, size);\n    } else {\n      return std::make_unique<NonUniquePicksGenerator>(size);\n    }\n  }();\n\n  return RandMemberSet(op_args.db_cntx, co, *generator, picks_count);\n}\n\n// count - how many elements to pop.\nOpResult<StringVec> OpPop(const OpArgs& op_args, string_view key, unsigned count) {\n  auto& db_cntx = op_args.db_cntx;\n  auto& db_slice = op_args.GetDbSlice();\n  auto find_res = db_slice.FindMutable(db_cntx, key, OBJ_SET);\n  if (!find_res) {\n    return find_res.status();\n  }\n\n  PrimeValue& co = find_res->it->second;\n\n  const std::uint32_t size = co.Size();\n  const std::uint32_t picks_count = std::min(count, size);\n\n  /* CASE 1:\n   * The number of requested elements is greater than or equal to\n   * the number of elements inside the set: simply return the whole set. */\n  if (count >= size) {\n    if (IsDenseEncoding(co)) {\n      StringSet* ss = (StringSet*)co.RObjPtr();\n      ss->set_time(MemberTimeSeconds(op_args.db_cntx.time_now_ms));\n    }\n\n    StringVec result;\n    result.reserve(picks_count);\n\n    container_utils::IterateSet(co, [&result](container_utils::ContainerEntry ce) {\n      result.push_back(ce.ToString());\n      return true;\n    });\n\n    // Delete the set as it is now empty\n    db_slice.DelMutable(op_args.db_cntx, std::move(*find_res));\n\n    // Replicate as DEL.\n    if (op_args.shard->journal()) {\n      RecordJournal(op_args, \"DEL\"sv, ArgSlice{key});\n    }\n    return result;\n  }\n\n  /* CASE 2:\n   * The number of requested elements is less than the number of elements inside the set.\n   * In this case, we need to select random members from the set and then remove them. */\n  UniquePicksGenerator generator{picks_count, size};\n\n  // Select random members\n  StringVec result = RandMemberSet(db_cntx, co, generator, picks_count);\n\n  // Remove selected members\n  auto [removed, is_empty] = RemoveSet(db_cntx, result, &co);\n  find_res->post_updater.Run();\n\n  CHECK(!is_empty);\n\n  // Replicate as SREM with removed keys, because SPOP is not deterministic.\n  if (removed && op_args.shard->journal()) {\n    vector<string_view> mapped(result.size() + 1);\n    mapped[0] = key;\n    copy(result.begin(), result.end(), mapped.begin() + 1);\n    RecordJournal(op_args, \"SREM\"sv, mapped);\n  }\n\n  return result;\n}\n\nOpResult<StringVec> OpScan(const OpArgs& op_args, string_view key, uint64_t* cursor,\n                           const ScanOpts& scan_op) {\n  auto find_res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_SET);\n\n  if (!find_res) {\n    *cursor = 0;\n    return find_res.status();\n  }\n\n  auto it = find_res.value();\n  StringVec res;\n\n  if (it->second.Encoding() == kEncodingIntSet) {\n    intset* is = (intset*)it->second.RObjPtr();\n    int64_t intele;\n    uint32_t pos = 0;\n    while (intsetGet(is, pos++, &intele)) {\n      std::string int_str = absl::StrCat(intele);\n      if (scan_op.Matches(int_str)) {\n        res.push_back(int_str);\n      }\n    }\n    *cursor = 0;\n  } else {\n    *cursor = StringSetWrapper{it->second, op_args.db_cntx}.Scan(*cursor, scan_op, &res);\n  }\n\n  return res;\n}\n\nvoid SendNumeric(OpResult<uint32_t> result, CommandContext* cmd_cntx) {\n  switch (result.status()) {\n    case OpStatus::OK:\n      return cmd_cntx->SendLong(result.value());\n    case OpStatus::WRONG_TYPE:\n      return cmd_cntx->SendError(kWrongTypeErr);\n    default:\n      return cmd_cntx->SendLong(0);\n  }\n}\n\nstruct SetReplies {\n  explicit SetReplies(CommandContext* cntx)\n      : cmd_cntx(cntx), script(cntx->server_conn_cntx()->conn_state.script_info) {\n  }\n\n  template <typename T> void Send(vector<T> sv) {\n    if (script)  // output is sorted under scripts\n      sort(sv.begin(), sv.end());\n    auto replier = [vec = std::move(sv)](facade::SinkReplyBuilder* builder) {\n      auto* rb = static_cast<RedisReplyBuilder*>(builder);\n      rb->SendBulkStrArr(vec, CollectionType::SET);\n    };\n    cmd_cntx->ReplyWith(std::move(replier));\n  }\n\n  void Send(const ResultSetView& rsv) {\n    if (!rsv)\n      return cmd_cntx->SendError(rsv.status());\n\n    SvArray arr = ToSvArray(rsv.value());\n    Send(std::move(arr));\n  }\n\n  CommandContext* cmd_cntx;\n  bool script;\n};\n\nvoid CmdSAdd(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  auto values = args.subspan(1);\n\n  auto cb = [key, values](Transaction* t, EngineShard* shard) {\n    return OpAdd(t->GetOpArgs(shard), key, values, false, false);\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result) {\n    return cmd_cntx->SendLong(result.value());\n  }\n\n  cmd_cntx->SendError(result.status());\n}\n\nvoid CmdSIsMember(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view val = ArgS(args, 1);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    auto find_res = t->GetDbSlice(shard->shard_id()).FindReadOnly(t->GetDbContext(), key, OBJ_SET);\n\n    if (find_res) {\n      SetType st{find_res.value()->second.RObjPtr(), find_res.value()->second.Encoding()};\n      return IsInSet(t->GetDbContext(), st, val) ? OpStatus::OK : OpStatus::KEY_NOTFOUND;\n    }\n\n    return find_res.status();\n  };\n\n  OpResult<void> result = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  SendNumeric(result ? OpResult<uint32_t>(1) : result.status(), cmd_cntx);\n}\n\nvoid CmdSMIsMember(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  auto members = args.subspan(1);\n\n  vector<int32_t> memberships(members.size());\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    DbContext db_cntx = t->GetDbContext();\n    auto find_res = t->GetDbSlice(shard->shard_id()).FindReadOnly(db_cntx, key, OBJ_SET);\n    if (find_res) {\n      SetType st{(*find_res)->second.RObjPtr(), find_res.value()->second.Encoding()};\n      for (size_t i = 0; i < members.size(); ++i)\n        memberships[i] = IsInSet(db_cntx, st, ToSV(members[i]));\n      ;\n      return OpStatus::OK;\n    }\n    return find_res.status();\n  };\n\n  OpResult<void> result = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n\n  auto replier = [result, memberships = std::move(memberships)](facade::SinkReplyBuilder* builder) {\n    auto* rb = static_cast<RedisReplyBuilder*>(builder);\n    if (result || result == OpStatus::KEY_NOTFOUND) {\n      rb->SendLongArr(absl::MakeConstSpan(memberships));\n    } else {\n      rb->SendError(result.status());\n    }\n  };\n  cmd_cntx->ReplyWith(std::move(replier));\n}\n\nvoid CmdSMove(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view src = ArgS(args, 0);\n  string_view dest = ArgS(args, 1);\n  string_view member = ArgS(args, 2);\n\n  Mover mover{src, dest, member, true};\n  mover.Find(cmd_cntx->tx());\n\n  OpResult<unsigned> result = mover.Commit(cmd_cntx->tx());\n  if (!result) {\n    return cmd_cntx->SendError(result.status());\n  }\n\n  cmd_cntx->SendLong(result.value());\n}\n\nvoid CmdSRem(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  auto vals = args.subspan(1);\n\n  auto cb = [key, vals](Transaction* t, EngineShard* shard) {\n    return OpRem(t->GetOpArgs(shard), key, vals, false);\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  SendNumeric(result, cmd_cntx);\n}\n\nvoid CmdSCard(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) -> OpResult<uint32_t> {\n    auto find_res = t->GetDbSlice(shard->shard_id()).FindReadOnly(t->GetDbContext(), key, OBJ_SET);\n    if (!find_res) {\n      return find_res.status();\n    }\n\n    return find_res.value()->second.Size();\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  SendNumeric(result, cmd_cntx);\n}\n\nvoid CmdSPop(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  unsigned count = 1;\n  if (args.size() > 1) {\n    string_view arg = ArgS(args, 1);\n    if (!absl::SimpleAtoi(arg, &count)) {\n      cmd_cntx->SendError(kInvalidIntErr);\n      return;\n    }\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpPop(t->GetOpArgs(shard), key, count);\n  };\n\n  OpResult<StringVec> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  auto replier = [result = std::move(result),\n                  pop_single = (args.size() == 1)](facade::SinkReplyBuilder* builder) {\n    auto* rb = static_cast<RedisReplyBuilder*>(builder);\n    if (result || result.status() == OpStatus::KEY_NOTFOUND) {\n      if (pop_single) {  // SPOP key\n        if (result.status() == OpStatus::KEY_NOTFOUND) {\n          rb->SendNull();\n        } else {\n          DCHECK_EQ(1u, result.value().size());\n          rb->SendBulkString(result.value().front());\n        }\n      } else {  // SPOP key cnt\n        rb->SendBulkStrArr(*result, CollectionType::SET);\n      }\n      return;\n    }\n\n    rb->SendError(result.status());\n  };\n  cmd_cntx->ReplyWith(std::move(replier));\n}\n\nvoid CmdSDiff(CmdArgList args, CommandContext* cmd_cntx) {\n  ResultStringVec result_set(shard_set->size(), OpStatus::SKIPPED);\n  string_view src_key = ArgS(args, 0);\n  ShardId src_shard = Shard(src_key, result_set.size());\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    ShardArgs largs = t->GetShardArgs(shard->shard_id());\n    if (shard->shard_id() == src_shard) {\n      CHECK_EQ(src_key, largs.Front());\n      result_set[shard->shard_id()] = OpDiff(t->GetOpArgs(shard), largs.begin(), largs.end());\n    } else {\n      result_set[shard->shard_id()] = OpUnion(t->GetOpArgs(shard), largs.begin(), largs.end());\n    }\n\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  ResultSetView rsv = DiffResultVec(result_set, src_shard);\n  SetReplies{cmd_cntx}.Send(rsv);\n}\n\nvoid CmdSDiffStore(CmdArgList args, CommandContext* cmd_cntx) {\n  ResultStringVec result_set(shard_set->size(), OpStatus::SKIPPED);\n  string_view dest_key = ArgS(args, 0);\n  ShardId dest_shard = Shard(dest_key, result_set.size());\n  string_view src_key = ArgS(args, 1);\n  ShardId src_shard = Shard(src_key, result_set.size());\n\n  VLOG(1) << \"SDiffStore \" << src_key << \" \" << src_shard;\n\n  // read-only op\n  auto diff_cb = [&](Transaction* t, EngineShard* shard) {\n    ShardArgs largs = t->GetShardArgs(shard->shard_id());\n    OpArgs op_args = t->GetOpArgs(shard);\n    DCHECK(!largs.Empty());\n    ShardArgs::Iterator start = largs.begin();\n    ShardArgs::Iterator end = largs.end();\n    if (shard->shard_id() == dest_shard) {\n      CHECK_EQ(*start, dest_key);\n      ++start;\n      if (start == end)\n        return OpStatus::OK;\n    }\n\n    if (shard->shard_id() == src_shard) {\n      CHECK_EQ(src_key, *start);\n      result_set[shard->shard_id()] = OpDiff(op_args, start, end);  // Diff\n    } else {\n      result_set[shard->shard_id()] = OpUnion(op_args, start, end);  // Union\n    }\n\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(diff_cb), false);\n  ResultSetView rsv = DiffResultVec(result_set, src_shard);\n  if (!rsv) {\n    cmd_cntx->tx()->Conclude();\n    cmd_cntx->SendError(rsv.status());\n    return;\n  }\n\n  size_t result_size = rsv.value().size();\n  auto store_cb = [&](Transaction* t, EngineShard* shard) {\n    if (shard->shard_id() == dest_shard) {\n      OpAdd(t->GetOpArgs(shard), dest_key, std::move(rsv.value()), true, true);\n    }\n\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(store_cb), true);\n  cmd_cntx->SendLong(result_size);\n}\n\nvoid CmdSMembers(CmdArgList args, CommandContext* cmd_cntx) {\n  auto cb = [](Transaction* t, EngineShard* shard) { return OpInter(t, shard, false); };\n\n  OpResult<StringVec> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  if (result || result.status() == OpStatus::KEY_NOTFOUND) {\n    SetReplies{cmd_cntx}.Send(std::move(*result));\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid CmdSRandMember(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n\n  bool is_count = parser.HasNext();\n  int count = is_count ? parser.Next<int>() : 1;\n\n  if (parser.HasNext())\n    return cmd_cntx->SendError(WrongNumArgsError(\"SRANDMEMBER\"));\n\n  if (auto err = parser.TakeError(); err)\n    return cmd_cntx->SendError(err.MakeReply());\n\n  const auto cb = [&](Transaction* t, EngineShard* shard) -> OpResult<StringVec> {\n    return OpRandMember(t->GetOpArgs(shard), key, count);\n  };\n\n  OpResult<StringVec> result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n\n  auto replier = [is_count, result = std::move(result)](facade::SinkReplyBuilder* builder) {\n    auto* rb = static_cast<RedisReplyBuilder*>(builder);\n    if (result || result == OpStatus::KEY_NOTFOUND) {\n      if (is_count) {\n        rb->SendBulkStrArr(*result, CollectionType::SET);\n      } else if (result->size()) {\n        rb->SendBulkString(result->front());\n      } else {\n        rb->SendNull();\n      }\n      return;\n    }\n    rb->SendError(result.status());\n  };\n  cmd_cntx->ReplyWith(std::move(replier));\n}\n\nvoid CmdSInter(CmdArgList args, CommandContext* cmd_cntx) {\n  ResultStringVec result_set(shard_set->size(), OpStatus::SKIPPED);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    result_set[shard->shard_id()] = OpInter(t, shard, false);\n\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  OpResult<SvArray> result = InterResultVec(result_set, cmd_cntx->tx()->GetUniqueShardCnt());\n  if (result) {\n    SetReplies{cmd_cntx}.Send(std::move(*result));\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid CmdSInterStore(CmdArgList args, CommandContext* cmd_cntx) {\n  ResultStringVec result_set(shard_set->size(), OpStatus::SKIPPED);\n  string_view dest_key = ArgS(args, 0);\n  ShardId dest_shard = Shard(dest_key, result_set.size());\n  atomic_uint32_t inter_shard_cnt{0};\n\n  auto inter_cb = [&](Transaction* t, EngineShard* shard) {\n    ShardArgs largs = t->GetShardArgs(shard->shard_id());\n    if (shard->shard_id() == dest_shard) {\n      CHECK_EQ(largs.Front(), dest_key);\n      if (largs.Size() == 1)\n        return OpStatus::OK;\n    }\n    inter_shard_cnt.fetch_add(1, memory_order_relaxed);\n    result_set[shard->shard_id()] = OpInter(t, shard, shard->shard_id() == dest_shard);\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(inter_cb), false);\n\n  OpResult<SvArray> result = InterResultVec(result_set, inter_shard_cnt.load(memory_order_relaxed));\n  if (!result) {\n    cmd_cntx->tx()->Conclude();\n    cmd_cntx->SendError(result.status());\n    return;\n  }\n\n  auto store_cb = [&](Transaction* t, EngineShard* shard) {\n    if (shard->shard_id() == dest_shard) {\n      OpAdd(t->GetOpArgs(shard), dest_key, result.value(), true, true);\n    }\n\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(store_cb), true);\n  cmd_cntx->SendLong(result->size());\n}\n\nvoid CmdSInterCard(CmdArgList args, CommandContext* cmd_cntx) {\n  unsigned num_keys;\n  if (!absl::SimpleAtoi(ArgS(args, 0), &num_keys))\n    return cmd_cntx->SendError(kSyntaxErr);\n\n  unsigned limit = 0;\n  if (args.size() == (num_keys + 3) && ArgS(args, 1 + num_keys) == \"LIMIT\") {\n    if (!absl::SimpleAtoi(ArgS(args, num_keys + 2), &limit))\n      return cmd_cntx->SendError(\"limit can't be negative\");\n  } else if (args.size() > (num_keys + 1))\n    return cmd_cntx->SendError(kSyntaxErr);\n\n  ResultStringVec result_set(shard_set->size(), OpStatus::SKIPPED);\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    result_set[shard->shard_id()] = OpInter(t, shard, false);\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  OpResult<SvArray> result = InterResultVec(result_set, cmd_cntx->tx()->GetUniqueShardCnt(), limit);\n\n  if (result) {\n    return cmd_cntx->SendLong(result->size());\n  }\n  cmd_cntx->SendError(result.status());\n}\n\nvoid CmdSUnion(CmdArgList args, CommandContext* cmd_cntx) {\n  ResultStringVec result_set(shard_set->size());\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    ShardArgs largs = t->GetShardArgs(shard->shard_id());\n    result_set[shard->shard_id()] = OpUnion(t->GetOpArgs(shard), largs.begin(), largs.end());\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n\n  ResultSetView unionset = UnionResultVec(result_set);\n  SetReplies{cmd_cntx}.Send(unionset);\n}\n\nvoid CmdSUnionStore(CmdArgList args, CommandContext* cmd_cntx) {\n  ResultStringVec result_set(shard_set->size(), OpStatus::SKIPPED);\n  string_view dest_key = ArgS(args, 0);\n  ShardId dest_shard = Shard(dest_key, result_set.size());\n\n  auto union_cb = [&](Transaction* t, EngineShard* shard) {\n    ShardArgs largs = t->GetShardArgs(shard->shard_id());\n    ShardArgs::Iterator start = largs.begin(), end = largs.end();\n    if (shard->shard_id() == dest_shard) {\n      CHECK_EQ(*start, dest_key);\n      ++start;\n      if (start == end)\n        return OpStatus::OK;\n    }\n    result_set[shard->shard_id()] = OpUnion(t->GetOpArgs(shard), start, end);\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(union_cb), false);\n\n  ResultSetView unionset = UnionResultVec(result_set);\n  if (!unionset) {\n    cmd_cntx->tx()->Conclude();\n    cmd_cntx->SendError(unionset.status());\n    return;\n  }\n\n  size_t result_size = unionset.value().size();\n  auto store_cb = [&](Transaction* t, EngineShard* shard) {\n    if (shard->shard_id() == dest_shard) {\n      OpAdd(t->GetOpArgs(shard), dest_key, std::move(unionset.value()), true, true);\n    }\n\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(store_cb), true);\n  cmd_cntx->SendLong(result_size);\n}\n\nvoid CmdSScan(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view token = ArgS(args, 1);\n\n  uint64_t cursor = 0;\n\n  if (!absl::SimpleAtoi(token, &cursor)) {\n    return cmd_cntx->SendError(\"invalid cursor\");\n  }\n\n  // SSCAN key cursor [MATCH pattern] [COUNT count]\n  if (args.size() > 6) {\n    DVLOG(1) << \"got \" << args.size() << \" this is more than it should be\";\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  OpResult<ScanOpts> ops = ScanOpts::TryFrom(args.subspan(2));\n  if (!ops) {\n    DVLOG(1) << \"SScan invalid args - return \" << ops << \" to the user\";\n    return cmd_cntx->SendError(ops.status());\n  }\n\n  const ScanOpts& scan_op = ops.value();\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);\n  };\n\n  OpResult<StringVec> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result.status() != OpStatus::WRONG_TYPE) {\n    auto replier = [cursor, result = std::move(result)](facade::SinkReplyBuilder* builder) {\n      auto* rb = static_cast<RedisReplyBuilder*>(builder);\n      RedisReplyBuilder::ArrayScope scope{rb, 2};\n      rb->SendBulkString(absl::StrCat(cursor));\n      rb->SendBulkStrArr(*result);\n    };\n    cmd_cntx->ReplyWith(std::move(replier));\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\n// Syntax: saddex key [KEEPTTL] ttl_sec member [member...]\nvoid CmdSAddEx(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser(args);\n\n  const std::string_view key = parser.Next<std::string_view>();\n  const bool keepttl = parser.Check(\"KEEPTTL\");\n  const uint32_t ttl_sec = parser.Next<uint32_t>();\n\n  if (auto err = parser.TakeError(); err) {\n    return cmd_cntx->SendError(err.MakeReply());\n  }\n  constexpr uint32_t kMaxTtl = (1UL << 26);\n  if (ttl_sec == 0 || ttl_sec > kMaxTtl) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n\n  CmdArgList vals = parser.Tail();\n  if (vals.empty()) {\n    return cmd_cntx->SendError(WrongNumArgsError(\"SADDEX\"));\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpAddEx(t->GetOpArgs(shard), key, ttl_sec, vals, keepttl);\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result) {\n    return cmd_cntx->SendLong(result.value());\n  }\n\n  cmd_cntx->SendError(result.status());\n}\n\n}  // namespace\n\nauto SetFamily::LoadIntSetBlob(std::string_view blob, PrimeValue* pv) -> LoadBlobResult {\n  if (!intsetValidateIntegrity((const uint8_t*)blob.data(), blob.size(), 0)) {\n    LOG(ERROR) << \"Intset integrity check failed.\";\n    return LoadBlobResult::kCorrupted;\n  }\n\n  const intset* is = (const intset*)blob.data();\n\n  unsigned len = intsetLen(is);\n\n  if (len > SetFamily::MaxIntsetEntries()) {\n    StringSet* set = SetFamily::ConvertToStrSet(is, len);\n\n    if (!set) {\n      LOG(ERROR) << \"OOM in ConvertToStrSet \" << len;\n      return LoadBlobResult::kOutOfMemory;\n    }\n    pv->InitRobj(OBJ_SET, kEncodingStrMap2, set);\n  } else {\n    intset* mine = reinterpret_cast<intset*>(CompactObj::memory_resource()->allocate(blob.size()));\n    ::memcpy(mine, blob.data(), blob.size());\n    pv->InitRobj(OBJ_SET, kEncodingIntSet, mine);\n  }\n\n  return LoadBlobResult::kSuccess;\n}\n\nauto SetFamily::LoadLPSetBlob(std::string_view blob, PrimeValue* pv) -> LoadBlobResult {\n  if (!lpValidateIntegrity((uint8_t*)blob.data(), blob.size(), 0, nullptr, nullptr)) {\n    LOG(ERROR) << \"ListPack integrity check failed.\";\n    return LoadBlobResult::kCorrupted;\n  }\n\n  unsigned char* lp = (unsigned char*)blob.data();\n  StringSet* set = CompactObj::AllocateMR<StringSet>();\n  for (unsigned char* cur = lpFirst(lp); cur != nullptr; cur = lpNext(lp, cur)) {\n    unsigned char field_buf[LP_INTBUF_SIZE];\n    string_view elem = detail::ListpackWrap::GetView(cur, field_buf);\n    if (!set->Add(elem)) {\n      LOG(ERROR) << \"Duplicate member \" << elem;\n      CompactObj::DeleteMR<StringSet>(set);\n      return LoadBlobResult::kCorrupted;\n    }\n  }\n  pv->InitRobj(OBJ_SET, kEncodingStrMap2, set);\n  return LoadBlobResult::kSuccess;\n}\n\nStringSet* SetFamily::ConvertToStrSet(const intset* is, size_t expected_len) {\n  int64_t intele;\n  char buf[32];\n  int ii = 0;\n\n  StringSet* ss = CompactObj::AllocateMR<StringSet>();\n  if (expected_len) {\n    ss->Reserve(expected_len);\n  }\n\n  while (intsetGet(const_cast<intset*>(is), ii++, &intele)) {\n    char* next = absl::numbers_internal::FastIntToBuffer(intele, buf);\n    string_view str{buf, size_t(next - buf)};\n    CHECK(ss->Add(str));\n  }\n\n  return ss;\n}\n\nusing CI = CommandId;\n\n#define HFUNC(x) SetHandler(&Cmd##x)\n\nvoid SetFamily::Register(CommandRegistry* registry) {\n  registry->StartFamily(acl::SET);\n  *registry << CI{\"SADD\", CO::JOURNALED | CO::FAST | CO::DENYOOM, -3, 1, 1}.HFUNC(SAdd)\n            << CI{\"SDIFF\", CO::READONLY, -2, 1, -1}.HFUNC(SDiff)\n            << CI{\"SDIFFSTORE\", CO::JOURNALED | CO::DENYOOM | CO::NO_AUTOJOURNAL, -3, 1, -1}.HFUNC(\n                   SDiffStore)\n            << CI{\"SINTER\", CO::READONLY, -2, 1, -1}.HFUNC(SInter)\n            << CI{\"SINTERSTORE\", CO::JOURNALED | CO::DENYOOM | CO::NO_AUTOJOURNAL, -3, 1, -1}.HFUNC(\n                   SInterStore)\n            << CI{\"SINTERCARD\", CO::READONLY | CO::VARIADIC_KEYS, -3, 2, 2}.HFUNC(SInterCard)\n            << CI{\"SMEMBERS\", CO::READONLY, 2, 1, 1}.HFUNC(SMembers)\n            << CI{\"SISMEMBER\", CO::FAST | CO::READONLY, 3, 1, 1}.HFUNC(SIsMember)\n            << CI{\"SMISMEMBER\", CO::FAST | CO::READONLY, -3, 1, 1}.HFUNC(SMIsMember)\n            << CI{\"SMOVE\", CO::FAST | CO::JOURNALED | CO::NO_AUTOJOURNAL, 4, 1, 2}.HFUNC(SMove)\n            << CI{\"SREM\", CO::JOURNALED | CO::FAST, -3, 1, 1}.HFUNC(SRem)\n            << CI{\"SCARD\", CO::READONLY | CO::FAST, 2, 1, 1}.HFUNC(SCard)\n            << CI{\"SPOP\", CO::JOURNALED | CO::FAST | CO::NO_AUTOJOURNAL, -2, 1, 1}.HFUNC(SPop)\n            << CI{\"SRANDMEMBER\", CO::READONLY, -2, 1, 1}.HFUNC(SRandMember)\n            << CI{\"SUNION\", CO::READONLY, -2, 1, -1}.HFUNC(SUnion)\n            << CI{\"SUNIONSTORE\", CO::JOURNALED | CO::DENYOOM | CO::NO_AUTOJOURNAL, -3, 1, -1}.HFUNC(\n                   SUnionStore)\n            << CI{\"SSCAN\", CO::READONLY, -3, 1, 1}.HFUNC(SScan)\n            << CI{\"SADDEX\", CO::JOURNALED | CO::FAST | CO::DENYOOM, -4, 1, 1}.HFUNC(SAddEx);\n}\n\nuint32_t SetFamily::MaxIntsetEntries() {\n  return kMaxIntSetEntries;\n}\n\nint32_t SetFamily::FieldExpireTime(const DbContext& db_context, const PrimeValue& pv,\n                                   std::string_view field) {\n  DCHECK_EQ(OBJ_SET, pv.ObjType());\n\n  SetType st{pv.RObjPtr(), pv.Encoding()};\n  return GetExpiry(db_context, st, field);\n}\n\nvector<long> SetFamily::SetFieldsExpireTime(const OpArgs& op_args, uint32_t ttl_sec,\n                                            CmdArgList values, PrimeValue* pv) {\n  DCHECK_EQ(OBJ_SET, pv->ObjType());\n\n  if (pv->Encoding() == kEncodingIntSet) {\n    // a valid result can never be a intset, since it doesnt keep ttl\n    intset* is = (intset*)pv->RObjPtr();\n    StringSet* ss = SetFamily::ConvertToStrSet(is, intsetLen(is));\n    if (!ss) {\n      std::vector<long> out(values.size(), -2);\n      return out;\n    }\n    pv->InitRobj(OBJ_SET, kEncodingStrMap2, ss);\n  }\n\n  auto ss = static_cast<StringSet*>(pv->RObjPtr());\n  ss->set_time(MemberTimeSeconds(op_args.db_cntx.time_now_ms));\n  return ExpireElements(ss, values, ttl_sec);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/set_family.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"facade/facade_types.h\"\n#include \"server/table.h\"\n#include \"server/tx_base.h\"\n\ntypedef struct intset intset;\n\nnamespace dfly {\n\nusing facade::OpResult;\n\nclass StringSet;\n\nclass SetFamily {\n public:\n  static void Register(CommandRegistry* registry);\n\n  static LoadBlobResult LoadIntSetBlob(std::string_view blob, PrimeValue* pv);\n  static LoadBlobResult LoadLPSetBlob(std::string_view blob, PrimeValue* pv);\n\n  static uint32_t MaxIntsetEntries();\n\n  // Returns nullptr on OOM.\n  static StringSet* ConvertToStrSet(const intset* is, size_t expected_len);\n\n  // returns expiry time in seconds since kMemberExpiryBase date.\n  // returns -3 if field was not found, -1 if no ttl is associated with the item.\n  static int32_t FieldExpireTime(const DbContext& db_context, const PrimeValue& pv,\n                                 std::string_view field);\n\n  static std::vector<long> SetFieldsExpireTime(const OpArgs& op_args, uint32_t ttl_sec,\n                                               facade::CmdArgList values, PrimeValue* pv);\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/set_family_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/set_family.h\"\n\n#include \"absl/flags/flag.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/test_utils.h\"\n\nextern \"C\" {\n#include \"redis/intset.h\"\n#include \"redis/zmalloc.h\"\n}\n\nABSL_DECLARE_FLAG(std::string, shard_round_robin_prefix);\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\nusing namespace boost;\n\nnamespace dfly {\n\nclass SetFamilyTest : public BaseFamilyTest {\n protected:\n};\n\nMATCHER_P(ConsistsOfMatcher, elements, \"\") {\n  auto vec = arg.GetVec();\n  for (const auto& x : vec) {\n    if (elements.find(x.GetString()) == elements.end()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nauto ConsistsOf(std::initializer_list<std::string> elements) {\n  return ConsistsOfMatcher(std::unordered_set<std::string>{elements});\n}\n\nTEST_F(SetFamilyTest, SAdd) {\n  auto resp = Run({\"sadd\", \"x\", \"1\", \"2\", \"3\"});\n  EXPECT_THAT(resp, IntArg(3));\n  resp = Run({\"sadd\", \"x\", \"2\", \"3\"});\n  EXPECT_THAT(resp, IntArg(0));\n  Run({\"set\", \"a\", \"foo\"});\n  resp = Run({\"sadd\", \"a\", \"b\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONGTYPE \"));\n  resp = Run({\"type\", \"x\"});\n  EXPECT_EQ(resp, \"set\");\n}\n\nTEST_F(SetFamilyTest, IntConv) {\n  auto resp = Run({\"sadd\", \"x\", \"134\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"sadd\", \"x\", \"abc\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"sadd\", \"x\", \"134\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(SetFamilyTest, SUnionStore) {\n  auto resp = Run({\"sadd\", \"b\", \"1\", \"2\", \"3\"});\n  Run({\"sadd\", \"c\", \"10\", \"11\"});\n  Run({\"set\", \"a\", \"foo\"});\n  resp = Run({\"sunionstore\", \"a\", \"b\", \"c\"});\n\n  EXPECT_THAT(resp, IntArg(5));\n  resp = Run({\"type\", \"a\"});\n  ASSERT_EQ(resp, \"set\");\n\n  resp = Run({\"smembers\", \"a\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"11\", \"10\", \"1\", \"2\", \"3\"));\n}\n\n// Check that SUNIONSTORE overwrites a value including resetting its expiration\nTEST_F(SetFamilyTest, SUnionStoreExpiration) {\n  Run({\"sadd\", \"s1\", \"a\", \"b\"});\n  Run({\"sadd\", \"s2\", \"c\", \"d\"});\n\n  Run({\"set\", \"target\", \"some-value\"});\n  EXPECT_THAT(Run({\"expire\", \"target\", \"1010\"}), IntArg(1));\n  EXPECT_THAT(Run({\"ttl\", \"target\"}), IntArg(1010));\n\n  EXPECT_THAT(Run({\"sunionstore\", \"target\", \"s1\", \"s2\"}), IntArg(4));\n  EXPECT_THAT(Run({\"scard\", \"target\"}), IntArg(4));\n  EXPECT_THAT(Run({\"ttl\", \"target\"}), IntArg(-1));\n}\n\nTEST_F(SetFamilyTest, SDiff) {\n  auto resp = Run({\"sadd\", \"b\", \"1\", \"2\", \"3\"});\n  Run({\"sadd\", \"c\", \"10\", \"11\"});\n  Run({\"set\", \"a\", \"foo\"});\n\n  resp = Run({\"sdiff\", \"b\", \"c\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"1\", \"2\", \"3\"));\n\n  resp = Run({\"sdiffstore\", \"a\", \"b\", \"c\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  Run({\"set\", \"str\", \"foo\"});\n  EXPECT_THAT(Run({\"sdiff\", \"b\", \"str\"}), ErrArg(\"WRONGTYPE \"));\n\n  Run({\"sadd\", \"bar\", \"x\", \"a\", \"b\", \"c\"});\n  Run({\"sadd\", \"foo\", \"c\"});\n  Run({\"sadd\", \"car\", \"a\", \"d\"});\n  EXPECT_EQ(2, CheckedInt({\"SDIFFSTORE\", \"tar\", \"bar\", \"foo\", \"car\"}));\n}\n\nTEST_F(SetFamilyTest, SInter) {\n  auto resp = Run({\"sadd\", \"a\", \"1\", \"2\", \"3\", \"4\"});\n  Run({\"sadd\", \"b\", \"3\", \"5\", \"6\", \"2\"});\n  resp = Run({\"sinterstore\", \"d\", \"a\", \"b\"});\n  EXPECT_THAT(resp, IntArg(2));\n  resp = Run({\"smembers\", \"d\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"3\", \"2\"));\n\n  Run({\"set\", \"y\", \"\"});\n  resp = Run({\"sinter\", \"x\", \"y\"});\n  ASSERT_EQ(1, GetDebugInfo(\"IO0\").shards_count);\n  EXPECT_THAT(resp, ErrArg(\"WRONGTYPE Operation against a key\"));\n  resp = Run({\"sinterstore\", \"none1\", \"none2\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  EXPECT_THAT(Run({\"sinter\"}), ErrArg(\"wrong number of arguments\"));\n}\n\nTEST_F(SetFamilyTest, SInterCard) {\n  Run({\"sadd\", \"s1\", \"2\", \"b\", \"1\", \"a\"});\n  Run({\"sadd\", \"s2\", \"3\", \"c\", \"2\", \"b\"});\n  Run({\"sadd\", \"s3\", \"2\", \"b\", \"3\", \"c\"});\n\n  EXPECT_EQ(2, CheckedInt({\"sintercard\", \"2\", \"s1\", \"s2\"}));\n  EXPECT_EQ(0, CheckedInt({\"sintercard\", \"2\", \"s1\", \"s4\"}));\n  EXPECT_EQ(2, CheckedInt({\"sintercard\", \"2\", \"s2\", \"s3\", \"LIMIT\", \"2\"}));\n  EXPECT_EQ(4, CheckedInt({\"sintercard\", \"1\", \"s1\"}));\n\n  auto resp = Run({\"sintercard\", \"a\", \"s1\", \"s2\"});\n  // redis does not throw this message, but SimpleAtoi does\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n  resp = Run({\"sintercard\", \"2\", \"s1\", \"s2\", \"LIMIT\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n  resp = Run({\"sintercard\", \"2\", \"s1\", \"s2\", \"LIMIT\", \"a\"});\n  EXPECT_THAT(resp, ErrArg(\"limit can't be negative\"));\n  resp = Run({\"sintercard\", \"2\", \"s1\", \"s2\", \"LIMIT\", \"-1\"});\n  EXPECT_THAT(resp, ErrArg(\"limit can't be negative\"));\n  resp = Run({\"sintercard\", \"2\", \"s1\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n  resp = Run({\"sintercard\", \"-1\", \"s1\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n}\n\nTEST_F(SetFamilyTest, SMove) {\n  auto resp = Run({\"sadd\", \"a\", \"1\", \"2\", \"3\", \"4\"});\n  Run({\"sadd\", \"b\", \"3\", \"5\", \"6\", \"2\"});\n  resp = Run({\"smove\", \"a\", \"b\", \"1\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  Run({\"sadd\", \"x\", \"a\", \"b\", \"c\"});\n  Run({\"sadd\", \"y\", \"c\"});\n  EXPECT_THAT(Run({\"smove\", \"x\", \"y\", \"c\"}), IntArg(1));\n}\n\nTEST_F(SetFamilyTest, SPop) {\n  auto resp = Run({\"sadd\", \"x\", \"1\", \"2\", \"3\"});\n  resp = Run({\"spop\", \"x\", \"3\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"1\", \"2\", \"3\"));\n  resp = Run({\"type\", \"x\"});\n  EXPECT_EQ(resp, \"none\");\n\n  Run({\"sadd\", \"x\", \"1\", \"2\", \"3\"});\n  resp = Run({\"spop\", \"x\", \"2\"});\n\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), IsSubsetOf({\"1\", \"2\", \"3\"}));\n\n  resp = Run({\"scard\", \"x\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  Run({\"sadd\", \"y\", \"a\", \"b\", \"c\"});\n  resp = Run({\"spop\", \"y\", \"1\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::STRING));\n  EXPECT_THAT(resp, testing::AnyOf(\"a\", \"b\", \"c\"));\n\n  resp = Run({\"smembers\", \"y\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), IsSubsetOf({\"a\", \"b\", \"c\"}));\n\n  // Test POP on large set with small pop count\n  vector<string> xlarge{\"sadd\", \"xlarge\"};\n  for (size_t i = 0; i < 100; i++)\n    xlarge.push_back(to_string(i));\n  Run(absl::MakeSpan(xlarge));\n\n  resp = Run({\"spop\", \"xlarge\", \"2\"});\n  {\n    auto elems = resp.GetVec();\n    EXPECT_NE(elems[0].GetString(), elems[1].GetString());\n  }\n\n  resp = Run({\"scard\", \"xlarge\"});\n  EXPECT_THAT(resp, IntArg(98));\n}\n\nTEST_F(SetFamilyTest, SRandMember) {\n  // Test IntSet\n  Run({\"sadd\", \"x\", \"1\", \"2\", \"3\"});\n\n  // Test if count > 0 (IntSet)\n  auto resp = Run({\"SRandMember\", \"x\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n  EXPECT_THAT(resp, AnyOf(\"1\", \"2\", \"3\"));\n\n  resp = Run({\"SRandMember\", \"x\", \"1\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n  EXPECT_THAT(resp, AnyOf(\"1\", \"2\", \"3\"));\n\n  resp = Run({\"SRandMember\", \"x\", \"2\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), IsSubsetOf({\"1\", \"2\", \"3\"}));\n\n  resp = Run({\"SRandMember\", \"x\", \"3\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"1\", \"2\", \"3\"));\n\n  // Test if count is larger than the size of the IntSet\n  resp = Run({\"SRandMember\", \"x\", \"25\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"1\", \"2\", \"3\"));\n\n  // Test if count < 0 (IntSet)\n  resp = Run({\"SRandMember\", \"x\", \"-1\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n  EXPECT_THAT(resp, AnyOf(\"1\", \"2\", \"3\"));\n\n  resp = Run({\"SRandMember\", \"x\", \"-2\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp, ConsistsOf({\"1\", \"2\", \"3\"}));\n\n  resp = Run({\"SRandMember\", \"x\", \"-3\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp, ConsistsOf({\"1\", \"2\", \"3\"}));\n\n  // Test if count < 0, but the absolute value is larger than the size of the IntSet\n  resp = Run({\"SRandMember\", \"x\", \"-25\"});\n  ASSERT_THAT(resp, ArrLen(25));\n  EXPECT_THAT(resp, ConsistsOf({\"1\", \"2\", \"3\"}));\n\n  // Test StrSet\n  Run({\"sadd\", \"y\", \"a\", \"b\", \"c\"});\n\n  // Test if count > 0 (StrSet)\n  resp = Run({\"SRandMember\", \"y\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n  EXPECT_THAT(resp, AnyOf(\"a\", \"b\", \"c\"));\n\n  resp = Run({\"SRandMember\", \"y\", \"1\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n  EXPECT_THAT(resp, AnyOf(\"a\", \"b\", \"c\"));\n\n  resp = Run({\"SRandMember\", \"y\", \"2\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), IsSubsetOf({\"a\", \"b\", \"c\"}));\n\n  resp = Run({\"SRandMember\", \"y\", \"3\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"a\", \"b\", \"c\"));\n\n  // Test if count is larger than the size of the StrSet\n  resp = Run({\"SRandMember\", \"y\", \"25\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"a\", \"b\", \"c\"));\n\n  // Test if count < 0 (StrSet)\n  resp = Run({\"SRandMember\", \"y\", \"-1\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n  EXPECT_THAT(resp, AnyOf(\"a\", \"b\", \"c\"));\n\n  resp = Run({\"SRandMember\", \"y\", \"-2\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp, ConsistsOf({\"a\", \"b\", \"c\"}));\n\n  resp = Run({\"SRandMember\", \"y\", \"-3\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp, ConsistsOf({\"a\", \"b\", \"c\"}));\n\n  // Test if count < 0, but the absolute value is larger than the size of the StrSet\n  resp = Run({\"SRandMember\", \"y\", \"-25\"});\n  ASSERT_THAT(resp, ArrLen(25));\n  EXPECT_THAT(resp, ConsistsOf({\"a\", \"b\", \"c\"}));\n\n  // Test if count is 0\n  ASSERT_THAT(Run({\"SRandMember\", \"x\", \"0\"}), ArrLen(0));\n\n  // Test if set is empty\n  EXPECT_THAT(Run({\"SAdd\", \"empty::set\", \"1\"}), IntArg(1));\n  EXPECT_THAT(Run({\"SRem\", \"empty::set\", \"1\"}), IntArg(1));\n  ASSERT_THAT(Run({\"SRandMember\", \"empty::set\", \"0\"}), ArrLen(0));\n  ASSERT_THAT(Run({\"SRandMember\", \"empty::set\", \"3\"}), ArrLen(0));\n  ASSERT_THAT(Run({\"SRandMember\", \"empty::set\", \"-4\"}), ArrLen(0));\n\n  // Test if key does not exist\n  ASSERT_THAT(Run({\"SRandMember\", \"unknown::set\"}), ArgType(RespExpr::NIL));\n  ASSERT_THAT(Run({\"SRandMember\", \"unknown::set\", \"0\"}), ArrLen(0));\n\n  // Test wrong arguments\n  resp = Run({\"SRandMember\", \"x\", \"5\", \"3\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n}\n\nTEST_F(SetFamilyTest, SMIsMember) {\n  Run({\"sadd\", \"foo\", \"a\"});\n  Run({\"sadd\", \"foo\", \"b\"});\n\n  auto resp = Run({\"smismember\", \"foo\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"smismember\", \"foo1\", \"a\", \"b\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(0), IntArg(0))));\n\n  resp = Run({\"smismember\", \"foo\", \"a\", \"c\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(1), IntArg(0))));\n\n  resp = Run({\"smismember\", \"foo\", \"a\", \"b\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(1), IntArg(1))));\n\n  resp = Run({\"smismember\", \"foo\", \"d\", \"e\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(0), IntArg(0))));\n\n  resp = Run({\"smismember\", \"foo\", \"b\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"smismember\", \"foo\", \"x\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(SetFamilyTest, Empty) {\n  auto resp = Run({\"smembers\", \"x\"});\n  ASSERT_THAT(resp, ArrLen(0));\n}\n\nTEST_F(SetFamilyTest, SScan) {\n  auto resp = Run(\"sscan non-existing-key 100 count 5\");\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(ArgType(RespExpr::STRING), ArgType(RespExpr::ARRAY)));\n  EXPECT_EQ(ToSV(resp.GetVec()[0].GetBuf()), \"0\");\n  EXPECT_EQ(StrArray(resp.GetVec()[1]).size(), 0);\n\n  // Test for int set\n  for (int i = 0; i < 15; i++) {\n    Run({\"sadd\", \"myintset\", absl::StrCat(i)});\n  }\n\n  // Note that even though this limit by 4, it would return more because\n  // all fields are on intlist\n  resp = Run({\"sscan\", \"myintset\", \"0\", \"count\", \"4\"});\n  auto vec = StrArray(resp.GetVec()[1]);\n  EXPECT_THAT(vec.size(), 15);\n\n  resp = Run({\"sscan\", \"myintset\", \"0\", \"match\", \"1*\"});\n  vec = StrArray(resp.GetVec()[1]);\n  EXPECT_THAT(vec, UnorderedElementsAre(\"1\", \"10\", \"11\", \"12\", \"13\", \"14\"));\n\n  // test string set\n  for (int i = 0; i < 15; i++) {\n    Run({\"sadd\", \"mystrset\", absl::StrCat(\"str-\", i)});\n  }\n\n  resp = Run({\"sscan\", \"mystrset\", \"0\", \"count\", \"5\"});\n  vec = StrArray(resp.GetVec()[1]);\n  EXPECT_THAT(vec.size(), 5);\n\n  resp = Run({\"sscan\", \"mystrset\", \"0\", \"match\", \"str-1*\"});\n  vec = StrArray(resp.GetVec()[1]);\n  EXPECT_THAT(vec, UnorderedElementsAre(\"str-1\", \"str-10\", \"str-11\", \"str-12\", \"str-13\", \"str-14\"));\n\n  resp = Run({\"sscan\", \"mystrset\", \"0\", \"match\", \"str-1*\", \"count\", \"3\"});\n  vec = StrArray(resp.GetVec()[1]);\n  EXPECT_THAT(vec, IsSubsetOf({\"str-1\", \"str-10\", \"str-11\", \"str-12\", \"str-13\", \"str-14\"}));\n  EXPECT_EQ(vec.size(), 3);\n\n  // nothing should match this\n  resp = Run({\"sscan\", \"mystrset\", \"0\", \"match\", \"1*\"});\n  vec = StrArray(resp.GetVec()[1]);\n  EXPECT_THAT(vec.size(), 0);\n}\n\nTEST_F(SetFamilyTest, HugeSScan) {\n  for (int i = 0; i < 60000; i += 5) {\n    Run({\"sadd\", \"myintset\", absl::StrCat(i), absl::StrCat(i + 1), absl::StrCat(i + 2),\n         absl::StrCat(i + 3), absl::StrCat(i + 4)});\n  }\n\n  auto resp = Run({\"sscan\", \"myintset\", \"0\", \"count\", \"50000\"});\n  auto vec = StrArray(resp.GetVec()[1]);\n  EXPECT_GE(vec.size(), 50000);\n}\n\nTEST_F(SetFamilyTest, IntSetMemcpy) {\n  // This logic is used in CompactObject::DefragIntSet\n  intset* original = intsetNew();\n  uint8_t success = 0;\n  for (int i = 0; i < 250; ++i) {\n    original = intsetAdd(original, i, &success);\n    ASSERT_THAT(success, 1);\n  }\n  const size_t blob_len = intsetBlobLen(original);\n  intset* replacement = (intset*)zmalloc(blob_len);\n  memcpy(replacement, original, blob_len);\n\n  ASSERT_THAT(original->encoding, replacement->encoding);\n  ASSERT_THAT(original->length, replacement->length);\n\n  for (int i = 0; i < 250; ++i) {\n    int64_t value;\n    ASSERT_THAT(intsetGet(replacement, i, &value), 1);\n    ASSERT_THAT(value, i);\n  }\n\n  zfree(original);\n  zfree(replacement);\n}\n\nTEST_F(SetFamilyTest, SAddEx) {\n  TEST_current_time_ms = kMemberExpiryBase * 1000;\n  EXPECT_THAT(Run({\"saddex\", \"key\", \"2\", \"val\"}), IntArg(1));\n  AdvanceTime(1500);\n  EXPECT_THAT(Run({\"saddex\", \"key\", \"2\", \"val\"}), IntArg(0));\n  AdvanceTime(1000);\n  EXPECT_EQ(1, CheckedInt({\"sismember\", \"key\", \"val\"}));\n\n  auto resp = Run({\"saddex\", \"k\", \"one\", \"v\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  // KEEPTTL support. add field orig with TTL=10\n  EXPECT_THAT(Run({\"saddex\", \"key\", \"10\", \"orig\"}), IntArg(1));\n\n  // add fields new and orig with TTL=1 and KEEPTTL=true. orig ttl should be preserved\n  EXPECT_THAT(Run({\"saddex\", \"key\", \"KEEPTTL\", \"1\", \"orig\", \"new\"}), IntArg(1));\n  EXPECT_LE(CheckedInt({\"fieldttl\", \"key\", \"new\"}), 1);\n\n  // The expiry for orig should be unchanged, at least greater than 5 at this point given some time\n  // has passed since we set it to 10\n  EXPECT_GT(CheckedInt({\"fieldttl\", \"key\", \"orig\"}), 5);\n\n  // without KEEPTTL the TTL should be overwritten\n  EXPECT_THAT(Run({\"saddex\", \"key\", \"2\", \"orig\", \"new\"}), IntArg(0));\n  EXPECT_LE(CheckedInt({\"fieldttl\", \"key\", \"orig\"}), 2);\n\n  // At least one arg is expected\n  EXPECT_THAT(Run({\"saddex\", \"key\", \"KEEPTTL\", \"2\"}), ErrArg(\"wrong number of arguments\"));\n}\n\nTEST_F(SetFamilyTest, CheckSetLinkExpiryTransfer) {\n  for (int i = 0; i < 10; i++) {\n    EXPECT_THAT(Run(absl::StrCat(\"SADDEX key 5 \", i)), IntArg(1));\n  }\n  for (int i = 0; i < 9; i++) {\n    Run(absl::StrCat(\"SREM key \", i));\n  }\n  EXPECT_THAT(Run(\"SCARD key\"), IntArg(1));\n  AdvanceTime(6000);\n  Run(\"SMEMBERS key\");\n  EXPECT_THAT(Run(\"SCARD key\"), IntArg(0));\n}\n\nTEST_F(SetFamilyTest, SetInter_5590) {\n  absl::FlagSaver fs;\n  SetTestFlag(\"num_shards\", \"2\");\n  num_threads_ = 3;\n  SetTestFlag(\"shard_round_robin_prefix\", \"prefix-\");\n  ResetService();\n\n  Run(\"DEBUG POPULATE 1 prefix- 5 RAND ELEMENTS 5000 TYPE SET\");\n  Run(\"SADD prefix-:0 common\");\n  // shard 0 has 1 key\n  EXPECT_THAT(GetShardKeyCount(), Contains(Pair(0, 1)));\n\n  Run(\"SADD prefix-foo bar hello common\");\n  // shard 1 has 1 key\n  EXPECT_THAT(GetShardKeyCount(), Contains(Pair(0, 1)));\n  EXPECT_THAT(GetShardKeyCount(), Contains(Pair(1, 1)));\n\n  int64_t start = absl::GetCurrentTimeNanos();\n  Run(\"SINTER prefix-foo prefix-:0\");\n  int64_t end = absl::GetCurrentTimeNanos();\n  // Less than 100 ms. Before the fix it took 3seconds.\n  EXPECT_LE(end - start, 100000000);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/sharding.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/sharding.h\"\n\n#include <xxhash.h>\n\n#include \"absl/strings/match.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"server/cluster_support.h\"\n#include \"server/common.h\"\n#include \"util/fibers/synchronization.h\"\n\nusing namespace std;\n\nABSL_FLAG(string, shard_round_robin_prefix, \"\", \"Deprecated -- will be removed\");\n\nnamespace dfly {\nnamespace {\n// RoundRobinSharder implements a way to distribute keys that begin with some prefix.\n// Round-robin is disabled by default. It is not a general use-case optimization, but instead only\n// reasonable when there are a few highly contended keys, which we'd like to spread between the\n// shards evenly.\n// When enabled, the distribution is done via hash table: the hash of the key is used to look into\n// a pre-allocated vector. This means that collisions are possible, but are very unlikely if only\n// a few keys are used.\n// Thread safe.\nclass RoundRobinSharder {\n public:\n  static void Init(uint32_t shard_set_size) {\n    round_robin_prefix_ = absl::GetFlag(FLAGS_shard_round_robin_prefix);\n    shard_set_size_ = shard_set_size;\n\n    if (IsEnabled()) {\n      LOG(WARNING) << \"shard_round_robin_prefix is deprecated and will be removed in new versions\";\n      // ~100k entries will consume 200kb per thread, and will allow 100 keys with < 2.5% collision\n      // probability. Since this has a considerable footprint, we only allocate when enabled. We're\n      // using a prime number close to 100k for better utilization.\n      constexpr size_t kRoundRobinSize = 100'003;\n      round_robin_shards_tl_cache_.resize(kRoundRobinSize);\n      std::fill(round_robin_shards_tl_cache_.begin(), round_robin_shards_tl_cache_.end(),\n                kInvalidSid);\n\n      util::fb2::LockGuard guard(mutex_);\n      if (round_robin_shards_.empty()) {\n        round_robin_shards_ = round_robin_shards_tl_cache_;\n      }\n    }\n  }\n\n  static bool IsEnabled() {\n    return !round_robin_prefix_.empty();\n  }\n\n  static optional<ShardId> TryGetShardId(string_view key, XXH64_hash_t key_hash) {\n    DCHECK(!round_robin_shards_tl_cache_.empty());\n\n    if (!absl::StartsWith(key, round_robin_prefix_)) {\n      return nullopt;\n    }\n\n    size_t index = key_hash % round_robin_shards_tl_cache_.size();\n    ShardId sid = round_robin_shards_tl_cache_[index];\n\n    if (sid == kInvalidSid) {\n      util::fb2::LockGuard guard(mutex_);\n      sid = round_robin_shards_[index];\n      if (sid == kInvalidSid) {\n        sid = next_shard_;\n        round_robin_shards_[index] = sid;\n        next_shard_ = (next_shard_ + 1) % shard_set_size_;\n      }\n      round_robin_shards_tl_cache_[index] = sid;\n    }\n\n    return sid;\n  }\n\n private:\n  static thread_local string round_robin_prefix_;\n  static thread_local vector<ShardId> round_robin_shards_tl_cache_;\n  static thread_local uint32_t shard_set_size_;\n  static vector<ShardId> round_robin_shards_ ABSL_GUARDED_BY(mutex_);\n  static ShardId next_shard_ ABSL_GUARDED_BY(mutex_);\n  static util::fb2::Mutex mutex_;\n};\n\n}  // namespace\n\nthread_local string RoundRobinSharder::round_robin_prefix_;\nthread_local uint32_t RoundRobinSharder::shard_set_size_;\nthread_local vector<ShardId> RoundRobinSharder::round_robin_shards_tl_cache_;\nvector<ShardId> RoundRobinSharder::round_robin_shards_;\nShardId RoundRobinSharder::next_shard_;\nutil::fb2::Mutex RoundRobinSharder::mutex_;\n\nShardId Shard(string_view v, ShardId shard_num) {\n  // This cluster sharding is not necessary and may degrade keys distribution among shard threads.\n  // For example, if we have 3 shards, then no single-char keys will be assigned to shard 2 and\n  // 32 single char keys in range ['_' - '~'] will be assigned to shard 0.\n  // Yes, SlotId function does not have great distribution properties.\n  // On the other side, slot based sharding may help with pipeline squashing optimizations,\n  // because they rely on commands being single-sharded.\n  // TODO: once we improve our squashing logic, we can remove this.\n  if (IsClusterShardedBySlot()) {\n    return KeySlot(v) % shard_num;\n  }\n\n  if (IsClusterShardedByTag()) {\n    v = LockTagOptions::instance().Tag(v);\n  }\n\n  XXH64_hash_t hash = XXH64(v.data(), v.size(), 120577240643ULL);\n\n  if (RoundRobinSharder::IsEnabled()) {\n    auto round_robin = RoundRobinSharder::TryGetShardId(v, hash);\n    if (round_robin.has_value()) {\n      return *round_robin;\n    }\n  }\n\n  return hash % shard_num;\n}\n\nnamespace sharding {\nvoid InitThreadLocals(uint32_t shard_set_size) {\n  RoundRobinSharder::Init(shard_set_size);\n}\n}  // namespace sharding\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/sharding.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <string_view>\n\n#include \"server/common_types.h\"\n\nnamespace dfly {\n\nShardId Shard(std::string_view v, ShardId shard_num);\n\nnamespace sharding {\nvoid InitThreadLocals(uint32_t shard_set_size);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/slowlog.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/slowlog.h\"\n\n#include \"base/logging.h\"\n#include \"facade/facade_types.h\"\n\nnamespace dfly {\n\nusing namespace std;\n\nvoid SlowLogShard::ChangeLength(const size_t new_length) {\n  log_entries_.set_capacity(new_length);\n}\n\nvoid SlowLogShard::Reset() {\n  log_entries_.clear();\n}\n\nvoid SlowLogShard::Add(const string_view command_name, CmdArgList args,\n                       const string_view client_name, const string_view client_ip,\n                       uint64_t exec_time_usec, uint64_t unix_ts_usec) {\n  DCHECK_GT(log_entries_.capacity(), 0u);\n\n  vector<pair<string, uint32_t>> slowlog_args;\n  size_t slowlog_effective_length = args.size();\n  if (args.size() > kMaximumSlowlogArgCount) {\n    // we store one argument fewer because the last argument is \"wasted\"\n    // for telling how many further arguments there are\n    slowlog_effective_length = kMaximumSlowlogArgCount - 1;\n  }\n  slowlog_args.reserve(slowlog_effective_length);\n  slowlog_args.emplace_back(command_name, 0);\n\n  for (size_t i = 0; i < slowlog_effective_length; ++i) {\n    string_view arg = facade::ArgS(args, i);\n    size_t extra_bytes = 0;\n    // If any of the arguments is deemed too long, it will be truncated\n    // and the truncated string will be suffixed by the number of truncated bytes in\n    // this format: \"... (n more bytes)\"\n    size_t extra_bytes_suffix_length = 0;\n    if (arg.size() > kMaximumSlowlogArgLength) {\n      extra_bytes = arg.size() - kMaximumSlowlogArgLength;\n    }\n    slowlog_args.emplace_back(arg.substr(0, kMaximumSlowlogArgLength - extra_bytes_suffix_length),\n                              extra_bytes);\n  }\n\n  log_entries_.push_back(SlowLogEntry{slowlog_entry_id_++, unix_ts_usec, exec_time_usec,\n                                      /* +1 for the command */ args.size() + 1,\n                                      std::move(slowlog_args), string(client_ip),\n                                      string(client_name)});\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/slowlog.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <boost/circular_buffer.hpp>\n#include <string>\n#include <vector>\n\n#include \"base/integral_types.h\"\n#include \"facade/facade_types.h\"\n\nnamespace dfly {\n\nusing facade::CmdArgList;\n\nconstexpr size_t kMaximumSlowlogArgCount = 31;  // 32 - 1 for the command name\nconstexpr size_t kMaximumSlowlogArgLength = 128;\n\nstruct SlowLogEntry {\n  uint32_t entry_id;\n  uint64_t unix_ts_usec;\n  uint64_t exec_time_usec;\n  size_t original_length;\n  // a vector of pairs of argument and extra bytes if the argument was truncated\n  std::vector<std::pair<std::string, uint32_t>> cmd_args;\n  std::string client_ip;\n  std::string client_name;\n};\n\nclass SlowLogShard {\n public:\n  boost::circular_buffer<SlowLogEntry>& Entries() {\n    return log_entries_;\n  }\n\n  void Add(const std::string_view command_name, CmdArgList args, const std::string_view client_name,\n           const std::string_view client_ip, uint64_t exec_time_usec, uint64_t unix_ts_usec);\n  void Reset();\n  void ChangeLength(size_t new_length);\n\n  size_t Length() const {\n    return log_entries_.size();\n  }\n\n  size_t IsEnabled() const {\n    return log_entries_.capacity() > 0;\n  }\n\n private:\n  uint32_t slowlog_entry_id_ = 0;\n\n  // TODO: to replace with base::RingBuffer because circular_buffer does not seem to support\n  // move semantics.\n  boost::circular_buffer<SlowLogEntry> log_entries_;\n};\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/snapshot.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/snapshot.h\"\n\n#include <absl/strings/str_cat.h>\n\n#include <mutex>\n\n#include \"base/cycle_clock.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/search/base.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/execution_state.h\"\n#include \"server/journal/journal.h\"\n#include \"server/rdb_extensions.h\"\n#include \"server/rdb_save.h\"\n#include \"server/search/global_hnsw_index.h\"\n#include \"server/server_state.h\"\n#include \"server/tiered_storage.h\"\n#include \"util/fibers/stacktrace.h\"\n#include \"util/fibers/synchronization.h\"\n\nABSL_FLAG(bool, point_in_time_snapshot, true, \"If true replication uses point in time snapshoting\");\nABSL_FLAG(bool, background_snapshotting, false, \"Whether to run snapshot as a background fiber\");\nABSL_FLAG(bool, serialize_hnsw_index, false, \"Serialize HNSW vector index graph structure\");\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace util;\nusing namespace chrono_literals;\n\nusing facade::operator\"\"_KB;\n\nnamespace {\nthread_local absl::flat_hash_set<SliceSnapshot*> tl_slice_snapshots;\n\n// Controls the chunks size for pushing serialized data. The larger the chunk the more CPU\n// it may require (especially with compression), and less responsive the server may be.\nconstexpr size_t kMinBlobSize = 8_KB;\n\n}  // namespace\n\nSliceSnapshot::SliceSnapshot(CompressionMode compression_mode, DbSlice* slice,\n                             SnapshotDataConsumerInterface* consumer, ExecutionState* cntx,\n                             DflyVersion replica_dfly_version)\n    : db_slice_(slice),\n      db_array_(slice->databases()),\n      compression_mode_(compression_mode),\n      replica_dfly_version_(replica_dfly_version),\n      consumer_(consumer),\n      cntx_(cntx) {\n  tl_slice_snapshots.insert(this);\n}\n\nSliceSnapshot::~SliceSnapshot() {\n  DCHECK(db_slice_->shard_owner()->IsMyThread());\n  tl_slice_snapshots.erase(this);\n}\n\nsize_t SliceSnapshot::GetThreadLocalMemoryUsage() {\n  size_t mem = 0;\n  for (SliceSnapshot* snapshot : tl_slice_snapshots) {\n    mem += snapshot->GetBufferCapacity();\n  }\n  return mem;\n}\n\nbool SliceSnapshot::IsSnaphotInProgress() {\n  return !tl_slice_snapshots.empty();\n}\n\nvoid SliceSnapshot::Start(bool stream_journal, SnapshotFlush allow_flush) {\n  DCHECK(!snapshot_fb_.IsJoinable());\n\n  auto db_cb = [this](DbIndex db_index, const DbSlice::ChangeReq& req) {\n    OnDbChange(db_index, req);\n  };\n\n  use_background_mode_ = absl::GetFlag(FLAGS_background_snapshotting);\n  snapshot_version_ = db_slice_->RegisterOnChange(std::move(db_cb));\n\n  if (stream_journal) {\n    use_snapshot_version_ = absl::GetFlag(FLAGS_point_in_time_snapshot);\n    journal_cb_id_ = journal::RegisterConsumer(this);\n    if (!use_snapshot_version_) {\n      auto moved_cb = [this](DbIndex db_index, const DbSlice::MovedItemsVec& items) {\n        OnMoved(db_index, items);\n      };\n      moved_cb_id_ = db_slice_->RegisterOnMove(std::move(moved_cb));\n    }\n  }\n\n  size_t flush_threshold = 0;\n  RdbSerializer::ConsumeFun consume_fun;\n  if (allow_flush == SnapshotFlush::kAllow) {\n    flush_threshold = ServerState::tlocal()->serialization_max_chunk_size;\n    if (flush_threshold != 0) {\n      // The callback receives data directly from the serializer, no need to call back into it.\n      consume_fun = [this](std::string data) {\n        HandleFlushData(std::move(data));\n        VLOG(2) << \"HandleFlushData via callback\";\n        ++ServerState::tlocal()->stats.big_value_preemptions;\n      };\n    }\n  }\n  serializer_ = std::make_unique<RdbSerializer>(compression_mode_, consume_fun, flush_threshold);\n\n  VLOG(1) << \"DbSaver::Start - saving entries with version less than \" << snapshot_version_;\n\n  fb2::Fiber::Opts opts{.priority = use_background_mode_ ? fb2::FiberPriority::BACKGROUND\n                                                         : fb2::FiberPriority::NORMAL,\n                        .name = absl::StrCat(\"SliceSnapshot-\", ProactorBase::me()->GetPoolIndex())};\n  snapshot_fb_ = fb2::Fiber(opts, [this, stream_journal] {\n    // TODO add error processing for index serialization\n    SerializeIndexMappings();\n    SerializeGlobalHnswIndices();\n    this->IterateBucketsFb(stream_journal);\n    db_slice_->UnregisterOnChange(snapshot_version_);\n    if (!use_snapshot_version_) {\n      db_slice_->UnregisterOnMoved(moved_cb_id_);\n    }\n    consumer_->Finalize();\n    VLOG(1) << \"Serialization peak bytes: \" << serializer_->GetSerializationPeakBytes();\n  });\n}\n\n// Called only for replication use-case.\nvoid SliceSnapshot::FinalizeJournalStream(bool cancel) {\n  VLOG(1) << \"FinalizeJournalStream\";\n  DCHECK(db_slice_->shard_owner()->IsMyThread());\n  if (!journal_cb_id_) {  // Finalize only once.\n    // In case of incremental snapshotting in StartIncremental, if an error is encountered,\n    // journal_cb_id_ may not be set, but the snapshot fiber is still running.\n    snapshot_fb_.JoinIfNeeded();\n    return;\n  }\n  uint32_t cb_id = journal_cb_id_;\n  journal_cb_id_ = 0;\n\n  // Wait for serialization to finish in any case.\n  snapshot_fb_.JoinIfNeeded();\n\n  journal::UnregisterConsumer(cb_id);\n  if (!cancel) {\n    // always succeeds because serializer_ flushes to string.\n    VLOG(1) << \"FinalizeJournalStream lsn: \" << journal::GetLsn();\n    std::ignore = serializer_->SendJournalOffset(journal::GetLsn());\n    PushSerialized(true);\n  }\n}\n\n// The algorithm is to go over all the buckets and serialize those with\n// version < snapshot_version_. In order to serialize each physical bucket exactly once we update\n// bucket version to snapshot_version_ once it has been serialized.\n// We handle serialization at physical bucket granularity.\n// To further complicate things, Table::Traverse covers a logical bucket that may comprise of\n// several physical buckets in dash table. For example, items belonging to logical bucket 0\n// can reside in buckets 0,1 and stash buckets 56-59.\n// PrimeTable::Traverse guarantees an atomic traversal of a single logical bucket,\n// it also guarantees 100% coverage of all items that exists when the traversal started\n// and survived until it finished.\n\nvoid SliceSnapshot::SerializeIndexMapping(\n    uint32_t shard_id, std::string_view index_name,\n    const std::vector<std::pair<std::string, search::DocId>>& mappings) {\n  // Format: [RDB_OPCODE_SHARD_DOC_INDEX, shard_id, index_name, mapping_count,\n  //          then for each mapping: key_string, doc_id]\n  if (auto ec = serializer_->WriteOpcode(RDB_OPCODE_SHARD_DOC_INDEX); ec)\n    return;\n  if (auto ec = serializer_->SaveLen(shard_id); ec)\n    return;\n  if (auto ec = serializer_->SaveString(index_name); ec)\n    return;\n  if (auto ec = serializer_->SaveLen(mappings.size()); ec)\n    return;\n\n  for (const auto& [key, doc_id] : mappings) {\n    if (auto ec = serializer_->SaveString(key); ec)\n      return;\n    if (auto ec = serializer_->SaveLen(doc_id); ec)\n      return;\n  }\n  PushSerialized(false);\n}\n\nvoid SliceSnapshot::SerializeIndexMappings() {\n#ifdef WITH_SEARCH\n  if (SaveMode() == dfly::SaveMode::RDB || !absl::GetFlag(FLAGS_serialize_hnsw_index) ||\n      replica_dfly_version_ < DflyVersion::VER6) {\n    return;\n  }\n\n  // Get all HNSW index names from the global registry\n  absl::flat_hash_set<std::string> hnsw_index_names =\n      GlobalHnswIndexRegistry::Instance().GetIndexNames();\n\n  auto* indices = db_slice_->shard_owner()->search_indices();\n  uint32_t shard_id = db_slice_->shard_owner()->shard_id();\n\n  for (const auto& index_name : hnsw_index_names) {\n    auto* index = indices->GetIndex(index_name);\n    if (!index) {\n      continue;\n    }\n\n    auto mappings = index->SerializeKeyIndex();\n    if (mappings.empty()) {\n      continue;\n    }\n\n    SerializeIndexMapping(shard_id, index_name, mappings);\n  }\n#endif\n}\n\nvoid SliceSnapshot::SerializeGlobalHnswIndices() {\n#ifdef WITH_SEARCH\n  // Serialize HNSW global indices for shard 0 only\n  if (db_slice_->shard_owner()->shard_id() != 0 || SaveMode() == dfly::SaveMode::RDB ||\n      !absl::GetFlag(FLAGS_serialize_hnsw_index) || replica_dfly_version_ < DflyVersion::VER6) {\n    return;\n  }\n\n  auto all_indices = GlobalHnswIndexRegistry::Instance().GetAll();\n\n  // Preallocate buffer for HNSW entry serialization.\n  std::vector<uint8_t> tmp_buf;\n\n  for (const auto& [index_key, index] : all_indices) {\n    {\n      // Acquire a read lock to ensure a consistent snapshot of the graph.\n      // While held, Add/Remove calls will defer into the adapter's internal list\n      // and will be replayed automatically on the next write operation.\n      auto read_lock = index->GetReadLock();\n\n      // Format: [RDB_OPCODE_VECTOR_INDEX, index_name, elements_number,\n      //          then for each node: binary encoded entry via SaveHNSWEntry]\n      if (auto ec = serializer_->WriteOpcode(RDB_OPCODE_VECTOR_INDEX); ec) {\n        continue;\n      }\n      if (auto ec = serializer_->SaveString(index_key); ec) {\n        continue;\n      }\n\n      size_t node_count = index->GetNodeCount();\n      if (auto ec = serializer_->SaveLen(node_count); ec) {\n        continue;\n      }\n\n      constexpr size_t kBatchSize = 1000;\n      for (size_t i = 0; i < node_count; i += kBatchSize) {\n        size_t batch_end = std::min(i + kBatchSize, node_count);\n        auto nodes = index->GetNodesRange(i, batch_end);\n        for (const auto& node : nodes) {\n          tmp_buf.resize(node.TotalSize());\n          if (auto ec = serializer_->SaveHNSWEntry(node, absl::MakeSpan(tmp_buf)); ec)\n            break;\n        }\n      }\n    }  // read_lock released here\n\n    // Flush after completing entire index to avoid splitting HNSW data across compressed blobs.\n    // The HNSW loader expects all nodes for an index to be readable in one pass.\n    PushSerialized(false);\n  }\n#endif\n}\n\n// Serializes all the entries with version less than snapshot_version_.\nvoid SliceSnapshot::IterateBucketsFb(bool send_full_sync_cut) {\n  const uint64_t kCyclesPerJiffy = base::CycleClock::Frequency() >> 16;  // ~15usec.\n\n  for (DbIndex db_indx = 0; db_indx < db_array_.size(); ++db_indx) {\n    stats_.keys_total += db_slice_->DbSize(db_indx);\n  }\n\n  for (DbIndex snapshot_db_index_ = 0; snapshot_db_index_ < db_array_.size();\n       ++snapshot_db_index_) {\n    if (!cntx_->IsRunning())\n      return;\n\n    if (!db_array_[snapshot_db_index_])\n      continue;\n\n    PrimeTable* pt = &db_array_[snapshot_db_index_]->prime;\n    VLOG(1) << \"Start traversing \" << pt->size() << \" items for index \" << snapshot_db_index_;\n\n    do {\n      if (!cntx_->IsRunning()) {\n        return;\n      }\n\n      snapshot_cursor_ = pt->TraverseBuckets(\n          snapshot_cursor_,\n          [this, &snapshot_db_index_](auto it) { return BucketSaveCb(snapshot_db_index_, it); });\n\n      if (use_background_mode_) {\n        // Yielding for background fibers has low overhead if the time slice isn't used up.\n        // Do it after every bucket for maximum responsiveness.\n        DCHECK(ThisFiber::Priority() == fb2::FiberPriority::BACKGROUND);\n        ThisFiber::Yield();\n        PushSerialized(false);\n      } else {\n        if (!PushSerialized(false)) {\n          if (!use_background_mode_ && ThisFiber::GetRunningTimeCycles() > kCyclesPerJiffy) {\n            ThisFiber::Yield();\n          }\n        }\n      }\n    } while (snapshot_cursor_);\n\n    DVLOG(2) << \"after loop \" << ThisFiber::GetName();\n    // Wait for all the outstanding delayed entries and serialize them as well.\n    PushDelayedEntries(true, nullptr);\n    PushSerialized(true);\n  }  // for (dbindex)\n\n  CHECK(!serialize_bucket_running_);\n  if (send_full_sync_cut) {\n    CHECK(!serializer_->SendFullSyncCut());\n    PushSerialized(true);\n  }\n\n  // serialized + side_saved must be equal to the total saved.\n  VLOG(1) << \"Exit SnapshotSerializer loop_serialized: \" << stats_.loop_serialized\n          << \", side_saved \" << stats_.side_saved << \", cbcalls \" << stats_.savecb_calls\n          << \", journal_saved \" << stats_.jounal_changes << \", moved_saved \" << stats_.moved_saved\n          << \", flushed_under_lock \" << stats_.flushed_under_lock;\n}\n\nbool SliceSnapshot::BucketSaveCb(DbIndex db_index, PrimeTable::bucket_iterator it) {\n  std::lock_guard guard(big_value_mu_);\n\n  ++stats_.savecb_calls;\n\n  if (use_snapshot_version_) {\n    if (it.GetVersion() >= snapshot_version_) {\n      // either has been already serialized or added after snapshotting started.\n      DVLOG(3) << \"Skipped \" << it.segment_id() << \":\" << it.bucket_id() << \" at \"\n               << it.GetVersion();\n      ++stats_.skipped;\n      return false;\n    }\n\n    db_slice_->FlushChangeToEarlierCallbacks(db_index, DbSlice::Iterator::FromPrime(it),\n                                             snapshot_version_);\n  }\n\n  auto* latch = db_slice_->GetLatch();\n\n  // Locking this never preempts. We merely just increment the underline counter such that\n  // if SerializeBucket preempts, Heartbeat() won't run because the blocking counter is not\n  // zero.\n  std::lock_guard latch_guard(*latch);\n\n  stats_.loop_serialized += SerializeBucket(db_index, it, false);\n\n  return false;\n}\n\nunsigned SliceSnapshot::SerializeBucket(DbIndex db_index, PrimeTable::bucket_iterator it,\n                                        bool push_tiered) {\n  if (use_snapshot_version_) {\n    DCHECK_LT(it.GetVersion(), snapshot_version_);\n    it.SetVersion(snapshot_version_);\n  }\n\n  // traverse physical bucket and write it into string file.\n  serialize_bucket_running_ = true;\n\n  unsigned result = 0;\n\n  std::vector<TieredDelayEntryKey> bucket_tiered_keys;\n  const bool tiering_enabled = EngineShard::tlocal()->tiered_storage() != nullptr;\n  const bool track_tiered_keys = push_tiered && tiering_enabled;\n\n  for (it.AdvanceIfNotOccupied(); !it.is_done(); ++it) {\n    ++result;\n    // might preempt due to big value serialization.\n    SerializeEntry(db_index, it->first, it->second);\n    // Track tiered keys to push them with priority after the loop, but only for callbacks.\n    if (track_tiered_keys && it->second.IsExternal()) {\n      bucket_tiered_keys.emplace_back(db_index, it->first.ToString());\n    }\n  }\n\n  if (tiering_enabled) {\n    // Push tracked tiered keys forcefully. If there are too many delayed entries\n    // accumulated we should also push them forcefully.\n    const size_t kMaxDelayedEntries = 512;\n    PushDelayedEntries(delayed_entries_.size() > kMaxDelayedEntries,\n                       track_tiered_keys ? &bucket_tiered_keys : nullptr);\n  }\n\n  serialize_bucket_running_ = false;\n  return result;\n}\n\nvoid SliceSnapshot::SerializeEntry(DbIndex db_indx, const PrimeKey& pk, const PrimeValue& pv) {\n  if (pv.IsExternal() && pv.IsCool())\n    return SerializeEntry(db_indx, pk, pv.GetCool().record->value);\n\n  time_t expire_time = pk.GetExpireTime();\n  uint32_t mc_flags = pv.HasFlag() ? db_slice_->GetMCFlag(db_indx, pk) : 0;\n\n  if (pv.IsExternal()) {\n    // TODO: we loose the stickiness attribute by cloning like this PrimeKey.\n    SerializeExternal(db_indx, PrimeKey{pk.ToString()}, pv, expire_time, mc_flags);\n  } else {\n    io::Result<uint8_t> res = serializer_->SaveEntry(pk, pv, expire_time, mc_flags, db_indx);\n    CHECK(res);\n    ++type_freq_map_[*res];\n  }\n}\n\nvoid SliceSnapshot::HandleFlushData(std::string data) {\n  if (data.empty())\n    return;\n\n  if (big_value_mu_.is_locked()) {\n    ++stats_.flushed_under_lock;\n  }\n  size_t serialized = data.size();\n  uint64_t id = rec_id_++;\n\n  if (use_background_mode_) {\n    // Yield after possibly long cpu slice due to compression and serialization\n    // before possbile suspension of ConsumeData resets the cpu time of the last slice\n    if (ThisFiber::Priority() == fb2::FiberPriority::BACKGROUND)\n      ThisFiber::Yield();\n    // else: This function is invoked from the journal with regular priority as well.\n    // TODO: Mavbe Sleep() to provide write backpressure in advance?\n  }\n\n  uint64_t running_cycles = ThisFiber::GetRunningTimeCycles();\n\n  fb2::NoOpLock lk;\n  // We create a critical section here that ensures that records are pushed in sequential order.\n  // As a result, it is not possible for two fiber producers to push concurrently.\n  // If A.id = 5, and then B.id = 6, and both are blocked here, it means that last_pushed_id_ < 4.\n  // Once last_pushed_id_ = 4, A will be unblocked, while B will wait until A finishes pushing and\n  // update last_pushed_id_ to 5.\n  seq_cond_.wait(lk, [&] { return id == this->last_pushed_id_ + 1; });\n\n  // Blocking point.\n  consumer_->ConsumeData(std::move(data), cntx_);\n\n  DCHECK_EQ(last_pushed_id_ + 1, id);\n  last_pushed_id_ = id;\n  seq_cond_.notify_all();\n\n  if (!use_background_mode_) {\n    // serializer_->Flush can be quite slow for large values or due to compression, therefore\n    // we counter-balance CPU over-usage by sleeping.\n    // We measure running_cycles before the preemption points, because they reset the counter.\n    uint64_t sleep_usec = (running_cycles * 1000'000 / base::CycleClock::Frequency()) / 2;\n    ThisFiber::SleepFor(chrono::microseconds(std::min<uint64_t>(sleep_usec, 2000ul)));\n  }\n\n  VLOG(2) << \"Pushed with Serialize() \" << serialized;\n}\n\nsize_t SliceSnapshot::FlushSerialized() {\n  std::string blob = serializer_->Flush(RdbSerializerBase::FlushState::kFlushEndEntry);\n  size_t serialized = blob.size();\n  HandleFlushData(std::move(blob));\n  return serialized;\n}\n\nbool SliceSnapshot::PushSerialized(bool force) {\n  if (!force && serializer_->SerializedLen() < kMinBlobSize)\n    return false;\n  return FlushSerialized();\n}\n\nvoid SliceSnapshot::PushDelayedEntries(bool force,\n                                       std::vector<TieredDelayEntryKey>* bucket_tiered_keys) {\n  using DelayedEntryIt = decltype(delayed_entries_)::iterator;\n\n  // Serializes a single delayed entry. Resolves the tiered read future, write the\n  // key/value and removes the entry from the map.\n  auto serialize_entry = [this](DelayedEntryIt it) {\n    auto& entry = it->second;\n    auto value = entry->value.Get();\n\n    if (!value.has_value()) {\n      cntx_->ReportError(make_error_code(errc::io_error),\n                         absl::StrCat(\"Failed to read tiered key: \", entry->key.ToString()));\n      return;\n    }\n\n    PrimeValue pv{*value};\n    auto res = serializer_->SaveEntry(entry->key, pv, entry->expire, entry->mc_flags, entry->dbid);\n    CHECK(res);\n\n    delayed_entries_.erase(it);\n\n    // If we have serialized enough data we should push it to avoid building\n    // up a large blob in memory.\n    PushSerialized(false);\n  };\n\n  // When tiered_keys are provided, we should serialize the entries matching the keys.\n  if (bucket_tiered_keys) {\n    for (const auto& key : *bucket_tiered_keys) {\n      if (auto it = delayed_entries_.find(key); it != delayed_entries_.end())\n        serialize_entry(it);\n    }\n  }\n\n  // Serialize the delayed entries that are resolved, or all if force it true.\n  for (auto it = delayed_entries_.begin(); it != delayed_entries_.end();) {\n    if (!force && !it->second->value.IsResolved()) {\n      ++it;\n      continue;\n    }\n    serialize_entry(it++);\n  }\n\n  // If we need to serialize all entries (force=true), we should push\n  // leftover serialized data after the loop.\n  PushSerialized(force);\n}\n\nvoid SliceSnapshot::SerializeExternal(DbIndex db_index, PrimeKey pk, const PrimeValue& pv,\n                                      time_t expire_time, uint32_t mc_flags) {\n  // We prefer avoid blocking, so we just schedule a tiered read and append\n  // it to the delayed entries.\n  auto key = pk.ToString();\n  auto future = ReadTieredString(db_index, key, pv, EngineShard::tlocal()->tiered_storage());\n  auto entry = std::make_unique<TieredDelayedEntry>(db_index, std::move(pk), std::move(future),\n                                                    expire_time, mc_flags);\n  delayed_entries_.emplace(std::make_pair(db_index, key), std::move(entry));\n  ++type_freq_map_[RDB_TYPE_STRING];\n}\n\n// Ordering invariant (both modes):\n//   For any key K, the replica must receive K's baseline value strictly before any journal entry\n//   that mutates K. This is required for baseline-dependent journal entries (e.g., HSET, LPUSH)\n//   which cannot be replayed without the prior value.\n//\n// PIT mode: enforced by serialize-before-mutate. OnDbChange serializes the bucket before the\n//   mutation commits; ConsumeJournalChange runs after the mutation on the same fiber, so the\n//   baseline is always first. big_value_mu_ prevents interleaving with the traversal fiber's\n//   SerializeBucket (which can preempt via consume_fun_).\n//\n// Non-PIT mode: OnDbChange only acquires big_value_mu_ as a barrier — no serialization. The\n//   mutex prevents journaling mutations from slipping in the middle of bucket serialization\n//   on the traversal fiber — see ConsumeJournalChange for details. OnMoved handles items\n//   displaced across the traversal cursor.\nvoid SliceSnapshot::OnDbChange(DbIndex db_index, const DbSlice::ChangeReq& req) {\n  std::lock_guard guard(big_value_mu_);\n  if (use_snapshot_version_) {\n    PrimeTable* table = db_slice_->GetTables(db_index).first;\n    const PrimeTable::bucket_iterator* bit = req.update();\n\n    if (bit) {\n      if (!bit->is_done() && bit->GetVersion() < snapshot_version_) {\n        stats_.side_saved += SerializeBucket(db_index, *bit, true);\n      }\n    } else {\n      string_view key = get<string_view>(req.change);\n      table->CVCUponInsert(snapshot_version_, key,\n                           [this, db_index](PrimeTable::bucket_iterator it) {\n                             DCHECK_LT(it.GetVersion(), snapshot_version_);\n                             stats_.side_saved += SerializeBucket(db_index, it, true);\n                           });\n    }\n  }\n}\n\nbool SliceSnapshot::IsPositionSerialized(DbIndex id, PrimeTable::Cursor cursor) {\n  uint8_t depth = db_slice_->GetTables(id).first->depth();\n\n  return id < snapshot_db_index_ ||\n         (id == snapshot_db_index_ &&\n          (cursor.bucket_id() < snapshot_cursor_.bucket_id() ||\n           (cursor.bucket_id() == snapshot_cursor_.bucket_id() &&\n            cursor.segment_id(depth) < snapshot_cursor_.segment_id(depth))));\n}\n\nvoid SliceSnapshot::OnMoved(DbIndex id, const DbSlice::MovedItemsVec& items) {\n  std::lock_guard barrier(big_value_mu_);\n  DCHECK(!use_snapshot_version_);\n  for (const auto& item_cursors : items) {\n    // If item was moved from a bucket that was serialized to a bucket that was not serialized\n    // serialize the moved item.\n    const PrimeTable::Cursor& dest = item_cursors.second;\n    const PrimeTable::Cursor& source = item_cursors.first;\n    if (IsPositionSerialized(id, dest) && !IsPositionSerialized(id, source)) {\n      PrimeTable::bucket_iterator bit = db_slice_->GetTables(id).first->CursorToBucketIt(dest);\n      ++stats_.moved_saved;\n      SerializeBucket(id, bit, true);\n    }\n  }\n}\n\n// big_value_mu_ prevents expiry/eviction DEL journal entries from interleaving with an\n// in-progress SaveEntry for a large value. SaveEntry may yield mid-entry (emitting chunks\n// across multiple scheduler turns); expiry paths emit DEL via RecordDelete directly,\n// bypassing OnDbChange. Without the lock, such a DEL could be written between two chunks\n// of the same entry, producing an invalid wire format for the downstream consumer.\n//\n// Note: even if the protocol were extended to support interleaved chunks, the lock would\n// still be required semantically: a DEL journal entry must not be applied on the replica\n// while the entry's baseline is still being loaded. The delayed deletion queue proposal\n// in the design doc addresses this without a shard-wide lock.\n//\n// Note: for transaction-driven mutations, baseline-before-journal ordering is already\n// guaranteed by call order on the mutation fiber (OnDbChange precedes ConsumeJournalChange);\n// big_value_mu_ is not needed for that ordering.\nvoid SliceSnapshot::ConsumeJournalChange(const journal::JournalChangeItem& item) {\n  std::lock_guard barrier(big_value_mu_);\n\n  // remove when we support interleaving chunks.\n  LOG_IF(DFATAL, serialize_bucket_running_)\n      << \"Internal error: can not run interleave journal and bucket serialization\";\n  std::ignore = serializer_->WriteJournalEntry(item.journal_item.data);\n  ++stats_.jounal_changes;\n}\n\nvoid SliceSnapshot::ThrottleIfNeeded() {\n  PushSerialized(false);\n}\n\nsize_t SliceSnapshot::GetBufferCapacity() const {\n  if (serializer_ == nullptr) {\n    return 0;\n  }\n\n  return serializer_->GetBufferCapacity();\n}\n\nsize_t SliceSnapshot::GetTempBuffersSize() const {\n  if (serializer_ == nullptr) {\n    return 0;\n  }\n\n  return serializer_->GetTempBufferSize();\n}\n\nRdbSaver::SnapshotStats SliceSnapshot::GetCurrentSnapshotProgress() const {\n  return {stats_.loop_serialized + stats_.side_saved, stats_.keys_total};\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/snapshot.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <deque>\n\n#include \"server/db_slice.h\"\n#include \"server/rdb_save.h\"\n#include \"server/synchronization.h\"\n#include \"server/table.h\"\n#include \"server/tiered_storage.h\"\n\nnamespace dfly {\n\nclass ExecutionState;\n\nnamespace journal {\nstruct Entry;\n}  // namespace journal\n\nnamespace search {\nusing DocId = uint32_t;\n}  // namespace search\n\n// ┌────────────────┐   ┌─────────────┐\n// │IterateBucketsFb│   │  OnDbChange │\n// └──────┬─────────┘   └─┬───────────┘\n//        │               │            OnDbChange forces whole bucket to be\n//        ▼               ▼            serialized if iterate didn't reach it yet\n// ┌──────────────────────────┐\n// │     SerializeBucket      │        Both might fall back to a temporary serializer\n// └────────────┬─────────────┘        if default is used on another db index\n//              │\n//              |                      Socket is left open in journal streaming mode\n//              ▼\n// ┌──────────────────────────┐          ┌─────────────────────────┐\n// │     SerializeEntry       │          │  ConsumeJournalChange   │\n// └─────────────┬────────────┘          └────────────┬────────────┘\n//               │                                    │\n//         PushBytes                                  │   into serializer buffer)\n//               │                                    ▼\n//               ▼                        ┌──────────────────────────┐\n//               ▼                        │     WriteJournalEntry    │\n// ┌──────────────────────────────┐       │  (appends journal entry  │\n// │     push_cb(buffer)          │       │   into serializer buffer)│\n// └──────────────────────────────┘       └──────────────────────────┘\n\n// SliceSnapshot is used for iterating over a shard at a specified point-in-time\n// and submitting all values to an output sink.\n// In journal streaming mode, the snapshot continues submitting changes\n// over the sink until explicitly stopped.\nclass SliceSnapshot : public journal::JournalConsumerInterface {\n public:\n  // Represents a target sink for receiving snapshot data. Specifically designed\n  // to send data to RdbSaver wrapping up a file shard or a socket.\n  struct SnapshotDataConsumerInterface {\n    virtual ~SnapshotDataConsumerInterface() = default;\n\n    // Receives a chunk of snapshot data for processing\n    virtual void ConsumeData(std::string data, ExecutionState* cntx) = 0;\n\n    // Finalizes the snapshot writing\n    virtual void Finalize() = 0;\n  };\n\n  SliceSnapshot(CompressionMode compression_mode, DbSlice* slice,\n                SnapshotDataConsumerInterface* consumer, ExecutionState* cntx,\n                DflyVersion replica_dfly_version);\n  ~SliceSnapshot();\n\n  static size_t GetThreadLocalMemoryUsage();\n  static bool IsSnaphotInProgress();\n\n  // Initialize snapshot, start bucket iteration fiber, register listeners.\n  // In journal streaming mode it needs to be stopped by either Stop or Cancel.\n  enum class SnapshotFlush : uint8_t { kAllow, kDisallow };\n\n  void Start(bool stream_journal, SnapshotFlush allow_flush = SnapshotFlush::kDisallow);\n\n  // Finalizes journal streaming writes. Only called for replication.\n  // Blocking. Must be called from the Snapshot thread.\n  void FinalizeJournalStream(bool cancel);\n\n  // Waits for a regular, non journal snapshot to finish.\n  // Called only for non-replication, backups usecases.\n  void WaitSnapshotting() {\n    snapshot_fb_.JoinIfNeeded();\n  }\n\n  const RdbTypeFreqMap& freq_map() const {\n    return type_freq_map_;\n  }\n\n  // Get different sizes, in bytes. All disjoint.\n  size_t GetBufferCapacity() const;\n  size_t GetTempBuffersSize() const;\n\n  RdbSaver::SnapshotStats GetCurrentSnapshotProgress() const;\n\n  // Journal listener\n  void ConsumeJournalChange(const journal::JournalChangeItem& item);\n  void ThrottleIfNeeded();\n\n private:\n  [[maybe_unused]] void SerializeIndexMapping(\n      uint32_t shard_id, std::string_view index_name,\n      const std::vector<std::pair<std::string, search::DocId>>& mappings);\n\n  // Serialize ShardDocIndex key-to-DocId mappings for all search indices on this shard\n  void SerializeIndexMappings();\n\n  // Serialize HNSW global indices for shard 0 only\n  void SerializeGlobalHnswIndices();\n\n  // Main snapshotting fiber that iterates over all buckets in the db slice\n  // and submits them to SerializeBucket.\n  void IterateBucketsFb(bool send_full_sync_cut);\n\n  // Called on traversing cursor by IterateBucketsFb.\n  bool BucketSaveCb(DbIndex db_index, PrimeTable::bucket_iterator it);\n\n  // Serialize single bucket.\n  // Returns number of serialized entries, updates bucket version to snapshot version.\n  unsigned SerializeBucket(DbIndex db_index, PrimeTable::bucket_iterator bucket_it,\n                           bool push_tracked_tiered_keys);\n\n  // Serialize entry into passed serializer.\n  void SerializeEntry(DbIndex db_index, const PrimeKey& pk, const PrimeValue& pv);\n\n  // DbChange listener\n  void OnDbChange(DbIndex db_index, const DbSlice::ChangeReq& req);\n\n  // DbSlice moved listener\n  void OnMoved(DbIndex db_index, const DbSlice::MovedItemsVec& items);\n  bool IsPositionSerialized(DbIndex db_index, PrimeTable::Cursor cursor);\n\n  // Push serializer's internal buffer.\n  // Push regardless of buffer size if force is true.\n  // Return true if pushed. Can block. Is called from the snapshot thread.\n  bool PushSerialized(bool force);\n  void SerializeExternal(DbIndex db_index, PrimeKey pk, const PrimeValue& pv, time_t expire_time,\n                         uint32_t mc_flags);\n\n  // Handles data provided by RdbSerializer when its internal buffer exceeds the threshold\n  // during big value serialization (e.g. huge sets/lists or large strings).\n  // The data has already been extracted from the serializer and is owned here, ensuring correct\n  // plumbing and making it safe to move.\n  void HandleFlushData(std::string data);\n\n  // Used for explicit flushes at safe points (e.g. between entries). Can block.\n  size_t FlushSerialized();\n\n  // Tuple <db_index, key> is used as a key to uniquely identify tiered entry on shard.\n  using TieredDelayEntryKey = std::pair<DbIndex, std::string>;\n\n  // Serialize delayed entries.\n  // If bucket_tiered_keys is provided we should serialize these keys forcefully.\n  // Other entries can be serialized if they are resolved, but we don't wait for them unless force\n  // is true.\n  void PushDelayedEntries(bool force, std::vector<TieredDelayEntryKey>* bucket_tiered_keys);\n\n  DbSlice* db_slice_;\n  const DbTableArray db_array_;\n  PrimeTable::Cursor snapshot_cursor_;\n  DbIndex snapshot_db_index_ = 0;\n\n  std::unique_ptr<RdbSerializer> serializer_;\n\n  // Delayed entries that are waiting for tiered storage reads to complete before they can be\n  // serialized.\n  absl::flat_hash_map<TieredDelayEntryKey, std::unique_ptr<TieredDelayedEntry>> delayed_entries_;\n\n  // Used for sanity checks.\n  bool serialize_bucket_running_ = false;\n\n  util::fb2::Fiber snapshot_fb_;  // IterateEntriesFb\n  util::fb2::CondVarAny seq_cond_;\n\n  const CompressionMode compression_mode_;\n  RdbTypeFreqMap type_freq_map_;\n\n  // version upper bound for entries that should be saved (not included).\n  uint64_t snapshot_version_;\n  uint64_t moved_cb_id_ = 0;\n  uint32_t journal_cb_id_ = 0;\n  uint32_t moved_cb_id = 0;\n\n  bool use_background_mode_ = false;\n  bool use_snapshot_version_ = true;\n  DflyVersion replica_dfly_version_ = DflyVersion::CURRENT_VER;\n\n  uint64_t rec_id_ = 1, last_pushed_id_ = 0;\n\n  struct Stats {\n    size_t loop_serialized = 0;\n    size_t skipped = 0;\n    size_t side_saved = 0;\n    size_t savecb_calls = 0;\n    size_t keys_total = 0;\n    size_t jounal_changes = 0;\n    size_t moved_saved = 0;\n    size_t flushed_under_lock = 0;\n  } stats_;\n\n  ThreadLocalMutex big_value_mu_;\n\n  SnapshotDataConsumerInterface* consumer_;\n  ExecutionState* cntx_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/stats.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/stats.h\"\n\n#include <algorithm>\n\nnamespace dfly {\n\n#define ADD(x) (x) += o.x\n\nTieredStats& TieredStats::operator+=(const TieredStats& o) {\n  static_assert(sizeof(TieredStats) == 168);\n\n  ADD(total_stashes);\n  ADD(total_fetches);\n  ADD(total_cancels);\n  ADD(total_deletes);\n  ADD(total_defrags);\n  ADD(total_uploads);\n  ADD(total_heap_buf_allocs);\n  ADD(total_registered_buf_allocs);\n\n  ADD(allocated_bytes);\n  ADD(capacity_bytes);\n\n  ADD(pending_read_cnt);\n  ADD(pending_stash_cnt);\n\n  ADD(small_bins_cnt);\n  ADD(small_bins_entries_cnt);\n  ADD(small_bins_filling_bytes);\n  ADD(small_bins_filling_entries_cnt);\n\n  ADD(total_stash_overflows);\n  ADD(cold_storage_bytes);\n  ADD(total_offloading_steps);\n  ADD(total_offloading_stashes);\n\n  ADD(clients_throttled);\n  ADD(total_clients_throttled);\n  return *this;\n}\n\nSearchStats& SearchStats::operator+=(const SearchStats& o) {\n  static_assert(sizeof(SearchStats) == 24);\n  ADD(used_memory);\n  ADD(num_entries);\n\n  // Different shards could have inconsistent num_indices values during concurrent operations.\n  // This can happen on concurrent index creation.\n  // We use max to ensure that the total num_indices is the maximum of all shards.\n  num_indices = std::max(num_indices, o.num_indices);\n  return *this;\n}\n\n#undef ADD\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/stats.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstddef>\n#include <cstdint>\n\nnamespace dfly {\n\nstruct TieredStats {\n  uint64_t total_stashes = 0;\n  uint64_t total_fetches = 0;\n  uint64_t total_cancels = 0;\n  uint64_t total_deletes = 0;\n  uint64_t total_defrags = 0;\n  uint64_t total_uploads = 0;\n  uint64_t total_registered_buf_allocs = 0;\n  uint64_t total_heap_buf_allocs = 0;\n\n  // How many times the system did not perform Stash call due to overloaded disk write queue\n  // (disjoint with total_stashes).\n  uint64_t total_stash_overflows = 0;\n  uint64_t total_offloading_steps = 0;\n  uint64_t total_offloading_stashes = 0;\n\n  size_t allocated_bytes = 0;\n  size_t capacity_bytes = 0;\n\n  uint32_t pending_read_cnt = 0;\n  uint32_t pending_stash_cnt = 0;\n\n  uint64_t small_bins_cnt = 0;\n  uint64_t small_bins_entries_cnt = 0;\n  size_t small_bins_filling_bytes = 0;\n  size_t small_bins_filling_entries_cnt = 0;\n  size_t cold_storage_bytes = 0;\n\n  uint64_t clients_throttled = 0;        // current number of throttled clients\n  uint64_t total_clients_throttled = 0;  // total number of throttles\n\n  TieredStats& operator+=(const TieredStats&);\n};\n\nstruct SearchStats {\n  size_t used_memory = 0;\n  size_t num_indices = 0;\n  size_t num_entries = 0;\n\n  SearchStats& operator+=(const SearchStats&);\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/stream_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/stream_family.h\"\n\n#include <absl/cleanup/cleanup.h>\n#include <absl/strings/ascii.h>\n#include <absl/strings/str_cat.h>\n\nextern \"C\" {\n#include \"redis/redis_aux.h\"\n#include \"redis/stream.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#include \"base/logging.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/blocking_controller.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/execution_state.h\"\n#include \"server/family_utils.h\"\n#include \"server/namespaces.h\"\n#include \"server/transaction.h\"\n\nnamespace dfly {\n\nusing namespace facade;\nusing namespace std;\n\nStreamMemTracker::StreamMemTracker() {\n  start_size_ = zmalloc_used_memory_tl;\n}\n\nvoid StreamMemTracker::UpdateStreamSize(PrimeValue& pv) const {\n  const size_t current = zmalloc_used_memory_tl;\n  int64_t diff = static_cast<int64_t>(current) - static_cast<int64_t>(start_size_);\n  pv.AddStreamSize(diff);\n  // Under any flow we must not end up with this special value.\n  DCHECK(pv.MallocUsed() != 0);\n}\n\nnamespace {\n\n// ---------------------------------------------------------------------------\n// Stream helper functions (only used within stream_family)\n// ---------------------------------------------------------------------------\n\n/* Set 'id' to be its successor stream ID.\n * If 'id' is the maximal possible id, it is wrapped around to 0-0 and C_ERR\n * is returned. */\nint StreamIncrID(streamID* id) {\n  int ret = C_OK;\n  if (id->seq == UINT64_MAX) {\n    if (id->ms == UINT64_MAX) {\n      id->ms = id->seq = 0;\n      ret = C_ERR;\n    } else {\n      id->ms++;\n      id->seq = 0;\n    }\n  } else {\n    id->seq++;\n  }\n  return ret;\n}\n\n/* Set 'id' to be its predecessor stream ID.\n * If 'id' is the minimal possible id, it resets to UINT64_MAX and C_ERR is returned. */\nint StreamDecrID(streamID* id) {\n  int ret = C_OK;\n  if (id->seq == 0) {\n    if (id->ms == 0) {\n      id->ms = id->seq = UINT64_MAX;\n      ret = C_ERR;\n    } else {\n      id->ms--;\n      id->seq = UINT64_MAX;\n    }\n  } else {\n    id->seq--;\n  }\n  return ret;\n}\n\n/* Returns non-zero if the ID is 0-0. */\nint StreamIDEqZero(streamID* id) {\n  return !(id->ms || id->seq);\n}\n\n/* Returns non-zero if the range from 'start' to 'end' contains a tombstone. */\nint StreamRangeHasTombstones(stream* s, streamID* start, streamID* end) {\n  streamID start_id, end_id;\n\n  if (!s->length || StreamIDEqZero(&s->max_deleted_entry_id)) {\n    return 0;\n  }\n\n  if (start) {\n    start_id = *start;\n  } else {\n    start_id.ms = 0;\n    start_id.seq = 0;\n  }\n\n  if (end) {\n    end_id = *end;\n  } else {\n    end_id.ms = UINT64_MAX;\n    end_id.seq = UINT64_MAX;\n  }\n\n  if (streamCompareID(&start_id, &s->max_deleted_entry_id) <= 0 &&\n      streamCompareID(&s->max_deleted_entry_id, &end_id) <= 0) {\n    return 1;\n  }\n  return 0;\n}\n\nint64_t StreamTrim(stream* s, streamAddTrimArgs* args);  // defined below\n\n/* Trims a stream by length. Returns the number of deleted items. */\nint64_t StreamTrimByLength(stream* s, long long maxlen, int approx) {\n  streamAddTrimArgs args = {};\n  args.trim_strategy = TRIM_STRATEGY_MAXLEN;\n  args.approx_trim = approx;\n  args.limit = approx ? 100 * server.stream_node_max_entries : 0;\n  args.maxlen = maxlen;\n  return StreamTrim(s, &args);\n}\n\n/* Trims a stream by minimum ID. Returns the number of deleted items. */\nint64_t StreamTrimByID(stream* s, streamID minid, int approx) {\n  streamAddTrimArgs args = {};\n  args.trim_strategy = TRIM_STRATEGY_MINID;\n  args.approx_trim = approx;\n  args.limit = approx ? 100 * server.stream_node_max_entries : 0;\n  args.minid = minid;\n  return StreamTrim(s, &args);\n}\n\n/* Return 1 if 'id' exists in 's' (and not marked as deleted). */\nint StreamEntryExists(stream* s, streamID* id) {\n  streamIterator si;\n  streamIteratorStart(&si, s, id, id, 0);\n  streamID myid;\n  int64_t numfields;\n  int found = streamIteratorGetID(&si, &myid, &numfields);\n  streamIteratorStop(&si);\n  if (!found)\n    return 0;\n  serverAssert(streamCompareID(id, &myid) == 0);\n  return 1;\n}\n\nint64_t LpGetInteger(uint8_t* ele) {\n  int64_t v = 0;\n  int res = lpGetInteger(ele, &v);\n  DCHECK(res != 0);\n  return v;\n}\n\nvoid StreamIteratorRemoveEntry(streamIterator* si, streamID* current) {\n  uint8_t* lp = static_cast<uint8_t*>(si->lp);\n  int64_t aux;\n\n  int64_t flags = LpGetInteger(si->lp_flags);\n  flags |= STREAM_ITEM_FLAG_DELETED;\n  lp = lpReplaceInteger(lp, &si->lp_flags, flags);\n\n  uint8_t* p = lpFirst(lp);\n  aux = LpGetInteger(p);\n\n  if (aux == 1) {\n    lpFree(lp);\n    checkedRaxRemove(si->stream->rax, si->ri.key, si->ri.key_len, NULL);\n  } else {\n    lp = lpReplaceInteger(lp, &p, aux - 1);\n    p = lpNext(lp, p);\n    aux = LpGetInteger(p);\n    lp = lpReplaceInteger(lp, &p, aux + 1);\n    if (si->lp != lp)\n      raxInsert(si->stream->rax, si->ri.key, si->ri.key_len, lp, NULL);\n    CHECK_GT(lpBytes(lp), 0u);\n  }\n\n  si->stream->length--;\n\n  streamID start, end;\n  if (si->rev) {\n    streamDecodeID(si->start_key, &start);\n    end = *current;\n  } else {\n    start = *current;\n    streamDecodeID(si->end_key, &end);\n  }\n  streamIteratorStop(si);\n  streamIteratorStart(si, si->stream, &start, &end, si->rev);\n}\n\n/* Delete the specified item ID from the stream, returning 1 if deleted. */\nint StreamDeleteItem(stream* s, streamID* id) {\n  int deleted = 0;\n  streamIterator si;\n  streamIteratorStart(&si, s, id, id, 0);\n  streamID myid;\n  int64_t numfields;\n  if (streamIteratorGetID(&si, &myid, &numfields)) {\n    StreamIteratorRemoveEntry(&si, &myid);\n    deleted = 1;\n  }\n  streamIteratorStop(&si);\n  return deleted;\n}\n\n/* Get the last valid (non-tombstone) streamID of 's'. */\nvoid StreamLastValidID(stream* s, streamID* maxid) {\n  streamIterator si;\n  streamIteratorStart(&si, s, NULL, NULL, 1);\n  int64_t numfields;\n  if (!streamIteratorGetID(&si, maxid, &numfields) && s->length)\n    serverPanic(\"Corrupt stream, length is %llu, but no max id\", (unsigned long long)s->length);\n  streamIteratorStop(&si);\n}\n\n/* Calculate the lag for a consumer group. */\nlong long StreamCGLag(stream* s, streamCG* cg) {\n  int valid = 0;\n  long long lag = 0;\n\n  if (!s->entries_added) {\n    lag = 0;\n    valid = 1;\n  } else if (cg->entries_read != SCG_INVALID_ENTRIES_READ &&\n             !StreamRangeHasTombstones(s, &cg->last_id, NULL)) {\n    lag = (long long)s->entries_added - cg->entries_read;\n    valid = 1;\n  } else {\n    long long entries_read = streamEstimateDistanceFromFirstEverEntry(s, &cg->last_id);\n    if (entries_read != SCG_INVALID_ENTRIES_READ) {\n      lag = (long long)s->entries_added - entries_read;\n      valid = 1;\n    }\n  }\n\n  if (valid) {\n    return lag;\n  }\n  return SCG_INVALID_LAG;\n}\n\n/* Lookup the consumer group in the specified stream. */\nstreamCG* StreamLookupCG(stream* s, sds groupname) {\n  if (s->cgroups == NULL)\n    return NULL;\n  void* cg = NULL;\n  raxFind(s->cgroups, (unsigned char*)groupname, sdslen(groupname), &cg);\n  return static_cast<streamCG*>(cg);\n}\n\n/* Lookup a consumer by name in the group 'cg'. */\nstreamConsumer* StreamLookupConsumer(streamCG* cg, sds name) {\n  if (cg == NULL)\n    return NULL;\n  void* consumer = NULL;\n  raxFind(cg->consumers, (unsigned char*)name, sdslen(name), &consumer);\n  return static_cast<streamConsumer*>(consumer);\n}\n\n/* Delete the specified consumer from consumer group 'cg'. */\nvoid StreamDelConsumer(streamCG* cg, streamConsumer* consumer) {\n  raxIterator ri;\n  raxStart(&ri, consumer->pel);\n  raxSeek(&ri, \"^\", NULL, 0);\n  while (raxNext(&ri)) {\n    streamNACK* nack = static_cast<streamNACK*>(ri.data);\n    raxRemove(cg->pel, ri.key, ri.key_len, NULL);\n    streamFreeNACK(nack);\n  }\n  raxStop(&ri);\n\n  raxRemove(cg->consumers, (unsigned char*)consumer->name, sdslen(consumer->name), NULL);\n  raxFree(consumer->pel);\n  sdsfree(consumer->name);\n  zfree(consumer);\n}\n\n/* Get the stream ID of the edge (first or last) entry in a listpack node.\n * Returns 1 if found, 0 if the listpack is empty or invalid. */\nint LpGetEdgeStreamID(uint8_t* lp, int first, streamID* master_id, streamID* edge_id) {\n  if (lp == NULL)\n    return 0;\n\n  uint8_t* lp_ele;\n  if (first) {\n    lp_ele = lpFirst(lp);\n    lp_ele = lpNext(lp, lp_ele);  // skip entry count\n    lp_ele = lpNext(lp, lp_ele);  // skip deleted count\n    int64_t master_fields_count = LpGetInteger(lp_ele);\n    lp_ele = lpNext(lp, lp_ele);  // seek first field\n    for (int64_t i = 0; i < master_fields_count; i++)\n      lp_ele = lpNext(lp, lp_ele);\n    lp_ele = lpNext(lp, lp_ele);\n    if (lp_ele == NULL)\n      return 0;\n  } else {\n    lp_ele = lpLast(lp);\n    int64_t lp_count = LpGetInteger(lp_ele);\n    if (lp_count == 0)\n      return 0;\n    while (lp_count--)\n      lp_ele = lpPrev(lp, lp_ele);\n  }\n\n  lp_ele = lpNext(lp, lp_ele);  // seek ID (lp_ele points to 'flags')\n  streamID id = *master_id;\n  id.ms += LpGetInteger(lp_ele);\n  lp_ele = lpNext(lp, lp_ele);\n  id.seq += LpGetInteger(lp_ele);\n  *edge_id = id;\n  return 1;\n}\n\n/* Trim the stream 's' according to args->trim_strategy, and return the\n * number of elements removed from the stream. The 'approx' option, if non-zero,\n * specifies that the trimming must be performed in a approximated way in\n * order to maximize performances. This means that the stream may contain\n * entries with IDs < 'id' in case of MINID (or more elements than 'maxlen'\n * in case of MAXLEN), and elements are only removed if we can remove\n * a *whole* node of the radix tree. The elements are removed from the head\n * of the stream (older elements).\n *\n * The function may return zero if:\n *\n * 1) The minimal entry ID of the stream is already < 'id' (MINID); or\n * 2) The stream is already shorter or equal to the specified max length (MAXLEN); or\n * 3) The 'approx' option is true and the head node did not have enough elements\n *    to be deleted.\n *\n * args->limit is the maximum number of entries to delete. The purpose is to\n * prevent this function from taking to long.\n * If 'limit' is 0 then we do not limit the number of deleted entries.\n * Much like the 'approx', if 'limit' is smaller than the number of entries\n * that should be trimmed, there is a chance we will still have entries with\n * IDs < 'id' (or number of elements >= maxlen in case of MAXLEN).\n */\nint64_t StreamTrim(stream* s, streamAddTrimArgs* args) {\n  size_t maxlen = args->maxlen;\n  streamID* id = &args->minid;\n  int approx = args->approx_trim;\n  int64_t limit = args->limit;\n  int trim_strategy = args->trim_strategy;\n\n  if (trim_strategy == TRIM_STRATEGY_NONE)\n    return 0;\n\n  raxIterator ri;\n  raxStart(&ri, s->rax);\n  raxSeek(&ri, \"^\", NULL, 0);\n\n  int64_t deleted = 0;\n  while (raxNext(&ri)) {\n    if (trim_strategy == TRIM_STRATEGY_MAXLEN && s->length <= maxlen)\n      break;\n\n    uint8_t* lp = static_cast<uint8_t*>(ri.data);\n    CHECK_GT(lpBytes(lp), 0u);\n    uint8_t* p = lpFirst(lp);\n    int64_t entries = LpGetInteger(p);\n\n    if (limit && (deleted + entries) > limit)\n      break;\n\n    int remove_node;\n    streamID master_id = {0, 0};\n    if (trim_strategy == TRIM_STRATEGY_MAXLEN) {\n      remove_node = s->length - entries >= maxlen;\n    } else {\n      streamDecodeID(ri.key, &master_id);\n      streamID last_id = {0, 0};\n      LpGetEdgeStreamID(lp, 0, &master_id, &last_id);\n      remove_node = streamCompareID(&last_id, id) < 0;\n    }\n\n    if (remove_node) {\n      lpFree(lp);\n      checkedRaxRemove(s->rax, ri.key, ri.key_len, NULL);\n      raxSeek(&ri, \">=\", ri.key, ri.key_len);\n      s->length -= entries;\n      deleted += entries;\n      continue;\n    }\n\n    if (approx)\n      break;\n\n    int64_t deleted_from_lp = 0;\n    p = lpNext(lp, p);  // skip deleted field\n    p = lpNext(lp, p);  // skip num-of-fields\n\n    int64_t master_fields_count = LpGetInteger(p);\n    p = lpNext(lp, p);\n    for (int64_t j = 0; j < master_fields_count; j++)\n      p = lpNext(lp, p);\n    p = lpNext(lp, p);  // skip zero master entry terminator\n\n    while (p) {\n      uint8_t* pcopy = p;\n      int64_t flags = LpGetInteger(p);\n      p = lpNext(lp, p);\n      int64_t to_skip;\n\n      int64_t ms_delta = LpGetInteger(p);\n      p = lpNext(lp, p);\n      int64_t seq_delta = LpGetInteger(p);\n      p = lpNext(lp, p);\n\n      streamID currid = {0, 0};\n      if (trim_strategy == TRIM_STRATEGY_MINID) {\n        currid.ms = master_id.ms + ms_delta;\n        currid.seq = master_id.seq + seq_delta;\n      }\n\n      int stop;\n      if (trim_strategy == TRIM_STRATEGY_MAXLEN) {\n        stop = s->length <= maxlen;\n      } else {\n        stop = streamCompareID(&currid, id) >= 0;\n      }\n      if (stop)\n        break;\n\n      if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) {\n        to_skip = master_fields_count;\n      } else {\n        to_skip = LpGetInteger(p);\n        p = lpNext(lp, p);\n        to_skip *= 2;\n      }\n\n      while (to_skip--)\n        p = lpNext(lp, p);\n      p = lpNext(lp, p);\n\n      if (!(flags & STREAM_ITEM_FLAG_DELETED)) {\n        intptr_t delta = p - lp;\n        flags |= STREAM_ITEM_FLAG_DELETED;\n        lp = lpReplaceInteger(lp, &pcopy, flags);\n        deleted_from_lp++;\n        s->length--;\n        p = lp + delta;\n      }\n    }\n    deleted += deleted_from_lp;\n\n    p = lpFirst(lp);\n    lp = lpReplaceInteger(lp, &p, entries - deleted_from_lp);\n    p = lpNext(lp, p);\n    int64_t marked_deleted = LpGetInteger(p);\n    lp = lpReplaceInteger(lp, &p, marked_deleted + deleted_from_lp);\n\n    raxInsert(s->rax, ri.key, ri.key_len, lp, NULL);\n    CHECK_GT(lpBytes(lp), 0u);\n    break;\n  }\n  raxStop(&ri);\n\n  if (s->length == 0) {\n    s->first_id.ms = 0;\n    s->first_id.seq = 0;\n  } else if (deleted) {\n    streamGetEdgeID(s, 1, 1, &s->first_id);\n  }\n\n  return deleted;\n}\n\nvoid FreeConsumerVoid(void* sc_) {\n  streamConsumer* sc = static_cast<streamConsumer*>(sc_);\n  raxFree(sc->pel);\n  sdsfree(sc->name);\n  zfree(sc);\n}\n\nvoid StreamFreeCG(streamCG* cg) {\n  raxFreeWithCallback(cg->pel, zfree);\n  raxFreeWithCallback(cg->consumers, FreeConsumerVoid);\n  zfree(cg);\n}\n\n// ---------------------------------------------------------------------------\n\nstruct Record {\n  streamID id;\n  vector<pair<string, string>> kv_arr;\n  uint64_t delivery_time = 0;\n};\n\nusing RecordVec = vector<Record>;\n\nusing nonstd::make_unexpected;\n\ntemplate <typename T> using ParseResult = io::Result<T, ErrorReply>;\n\nnonstd::unexpected_type<ErrorReply> CreateSyntaxError(std::string_view message) {\n  return make_unexpected(ErrorReply{message, kSyntaxErrType});\n}\n\nstruct ParsedStreamId {\n  streamID val;\n\n  // Was an ID different than \"ms-*\" specified? for XADD only.\n  bool has_seq = false;\n  // Was an ID different than \"*\" specified? for XADD only.\n  bool id_given = false;\n\n  // Whether to lookup messages after the last ID in the stream. Used for XREAD\n  // when using ID '$'.\n  bool resolve_last_id = false;\n};\n\nstruct RangeId {\n  ParsedStreamId parsed_id;\n  bool exclude = false;\n};\n\nstruct TrimOpts {\n  static constexpr int32_t kNoTrimLimit = -1;\n\n  bool HasLimit() const {\n    return limit != kNoTrimLimit;\n  }\n\n  bool IsMaxLen() const {\n    return std::holds_alternative<uint32_t>(length_or_id);\n  }\n\n  uint32_t AsMaxLen() const {\n    return std::get<uint32_t>(length_or_id);\n  }\n\n  const ParsedStreamId& AsMinId() const {\n    return std::get<ParsedStreamId>(length_or_id);\n  }\n\n  // First is MaxLen, second is MinId.\n  std::variant<uint32_t, ParsedStreamId> length_or_id;\n  int32_t limit = kNoTrimLimit;\n  bool approx = false;\n};\n\nstruct AddOpts {\n  std::optional<TrimOpts> trim_opts;\n  ParsedStreamId parsed_id;\n  bool no_mkstream = false;\n};\n\n/* Used to journal the XADD command.\n   The actual stream ID assigned after adding may differ from the one specified in the command.\n   So, for the replica, we need to specify the exact ID that was actually added. */\nstruct AddArgsJournaler {\n  void SetStreamId(std::string_view stream_id) {\n    add_args[stream_id_index] = stream_id;\n  }\n\n  CmdArgVec add_args;\n  size_t stream_id_index;\n};\n\nstruct NACKInfo {\n  streamID pel_id;\n  string consumer_name;\n  size_t delivery_time;\n  size_t delivery_count;\n};\n\nstruct ConsumerInfo {\n  string name;\n  mstime_t seen_time;\n  mstime_t active_time;\n  size_t pel_count;\n  vector<NACKInfo> pending;\n  size_t idle;\n};\n\nstruct GroupInfo {\n  string name;\n  size_t consumer_size;\n  size_t pending_size;\n  streamID last_id;\n  int64_t entries_read;\n  int64_t lag;\n  vector<NACKInfo> stream_nack_vec;\n  vector<ConsumerInfo> consumer_info_vec;\n};\n\nusing GroupInfoVec = vector<GroupInfo>;\n\nstruct StreamInfo {\n  size_t length;\n  size_t radix_tree_keys;\n  size_t radix_tree_nodes;\n  size_t groups;\n  streamID recorded_first_entry_id;\n  streamID last_generated_id;\n  streamID max_deleted_entry_id;\n  size_t entries_added;\n  Record first_entry;\n  Record last_entry;\n  vector<Record> entries;\n  GroupInfoVec cgroups;\n};\n\nenum class StreamAccessKind { kNone, kSequential, kRandom, kFetchAll };\n\nstruct RangeOpts {\n  ParsedStreamId start;\n  ParsedStreamId end;\n  bool is_rev = false;\n  uint32_t count = kuint32max;\n\n  // readgroup range fields\n  streamCG* group = nullptr;\n  streamConsumer* consumer = nullptr;\n  bool noack = false;\n\n  StreamAccessKind access_kind = StreamAccessKind::kRandom;\n};\n\nvoid RecordStreamAccess(const OpArgs& op_args, StreamAccessKind kind) {\n  auto& stats = op_args.shard->stats();\n  switch (kind) {\n    case StreamAccessKind::kNone:\n      // No-op: skip metrics recording for internal calls\n      break;\n    case StreamAccessKind::kSequential:\n      stats.stream_sequential_accesses++;\n      break;\n    case StreamAccessKind::kRandom:\n      stats.stream_random_accesses++;\n      break;\n    case StreamAccessKind::kFetchAll:\n      stats.stream_fetch_all_accesses++;\n      break;\n  }\n}\n\nstruct StreamIDsItem {\n  ParsedStreamId id;\n\n  // Readgroup fields - id and group-consumer pair is exclusive.\n  streamCG* group = nullptr;\n  streamConsumer* consumer = nullptr;\n  bool serve_history = false;\n  bool is_consumer_new = false;\n};\n\nstruct ReadOpts {\n  // Contains a mapping from stream name to the starting stream ID.\n  unordered_map<string_view, StreamIDsItem> stream_ids;\n  // Contains the maximum number of entries to return for each stream.\n  uint32_t count = kuint32max;\n  // Contains the time to block waiting for entries, or -1 if should not block.\n  int64_t timeout = -1;\n  size_t streams_arg = 0;\n\n  // readgroup fields\n  bool read_group = false;\n  string_view group_name;\n  string_view consumer_name;\n  bool noack = false;\n};\n\nconst char kTrimOptionConflictErr[] =\n    \"MAXLEN and MINID options at the same time are not compatible\";\nconst char kInvalidStreamId[] = \"Invalid stream ID specified as stream command argument\";\nconst char kXGroupKeyNotFound[] =\n    \"The XGROUP subcommand requires the key to exist. \"\n    \"Note that for CREATE you may want to use the MKSTREAM option to create \"\n    \"an empty stream automatically.\";\nconst char kSameStreamFound[] = \"Same stream specified multiple time\";\n\nconst uint32_t STREAM_LISTPACK_MAX_SIZE = 1 << 30;\nconst uint32_t kStreamNodeMaxBytes = 4096;\nconst uint32_t kStreamNodeMaxEntries = 100;\nconst uint32_t STREAM_LISTPACK_MAX_PRE_ALLOCATE = 4096;\n\nstring StreamIdRepr(const streamID& id) {\n  return absl::StrCat(id.ms, \"-\", id.seq);\n};\n\nfacade::ErrorReply NoGroupError(string_view key, string_view cgroup) {\n  return facade::ErrorReply(\n      absl::StrCat(\"-NOGROUP No such consumer group '\", cgroup, \"' for key name '\", key, \"'\"),\n      kNoGroupErrType);\n}\n\nfacade::ErrorReply NoGroupOrKey(string_view key, string_view cgroup, string_view suffix = \"\") {\n  return facade::ErrorReply(\n      absl::StrCat(\"-NOGROUP No such key '\", key, \"'\", \" or consumer group '\", cgroup, \"'\", suffix),\n      kNoGroupErrType);\n}\n\nstring LeqTopIdError(string_view cmd_name) {\n  return absl::StrCat(\"The ID specified in \", cmd_name,\n                      \" is equal or smaller than the target stream top item\");\n}\n\ninline const uint8_t* SafePtr(MutableSlice field) {\n  return field.empty() ? reinterpret_cast<const uint8_t*>(\"\")\n                       : reinterpret_cast<const uint8_t*>(field.data());\n}\n\nbool ParseID(string_view strid, bool strict, uint64_t missing_seq, ParsedStreamId* dest) {\n  if (strid.empty() || strid.size() > 127)\n    return false;\n\n  if (strid == \"*\")\n    return true;\n\n  dest->id_given = true;\n  dest->has_seq = true;\n\n  /* Handle the \"-\" and \"+\" special cases. */\n  if (strid == \"-\" || strid == \"+\") {\n    if (strict)\n      return false;\n\n    if (strid == \"-\") {\n      dest->val.ms = 0;\n      dest->val.seq = 0;\n      return true;\n    }\n\n    dest->val.ms = UINT64_MAX;\n    dest->val.seq = UINT64_MAX;\n    return true;\n  }\n\n  /* Parse <ms>-<seq> form. */\n  streamID result{.ms = 0, .seq = missing_seq};\n\n  size_t dash_pos = strid.find('-');\n  if (!absl::SimpleAtoi(strid.substr(0, dash_pos), &result.ms))\n    return false;\n\n  if (dash_pos != string_view::npos) {\n    if (dash_pos + 1 == strid.size())\n      return false;\n\n    if (dash_pos + 2 == strid.size() && strid[dash_pos + 1] == '*') {\n      result.seq = 0;\n      dest->has_seq = false;\n    } else if (!absl::SimpleAtoi(strid.substr(dash_pos + 1), &result.seq)) {\n      return false;\n    }\n  }\n\n  dest->val = result;\n\n  return true;\n}\n\nenum class RangeBoundary { kStart, kEnd };\nbool ParseRangeId(string_view id, RangeBoundary type, RangeId* dest) {\n  if (id.empty())\n    return false;\n  if (id[0] == '(') {\n    dest->exclude = true;\n    id.remove_prefix(1);\n  }\n  uint64 missing_seq = type == RangeBoundary::kStart ? 0 : -1;\n  return ParseID(id, dest->exclude, missing_seq, &dest->parsed_id);\n}\n\n/* This is a wrapper function for lpGet() to directly get an integer value\n * from the listpack (that may store numbers as a string), converting\n * the string if needed.\n * The `valid` argument is an optional output parameter to get an indication\n * if the record was valid, when this parameter is NULL, the function will\n * fail with an assertion. */\nstatic inline int64_t lpGetIntegerIfValid(unsigned char* ele, int* valid) {\n  int64_t v;\n  unsigned char* e = lpGet(ele, &v, NULL);\n  if (e == NULL) {\n    if (valid)\n      *valid = 1;\n    return v;\n  }\n  /* The following code path should never be used for how listpacks work:\n   * they should always be able to store an int64_t value in integer\n   * encoded form. However the implementation may change. */\n  long long ll;\n  int ret = string2ll((char*)e, v, &ll);\n  if (valid)\n    *valid = ret;\n  else\n    serverAssert(ret != 0);\n  v = ll;\n  return v;\n}\n\nint64_t lpGetInteger(unsigned char* ele) {\n  return lpGetIntegerIfValid(ele, NULL);\n}\n\n/* Generate the next stream item ID given the previous one. If the current\n * milliseconds Unix time is greater than the previous one, just use this\n * as time part and start with sequence part of zero. Otherwise we use the\n * previous time (and never go backward) and increment the sequence. */\nvoid StreamNextID(uint64_t now_ms, const streamID* last_id, streamID* new_id) {\n  if (now_ms > last_id->ms) {\n    new_id->ms = now_ms;\n    new_id->seq = 0;\n  } else {\n    *new_id = *last_id;\n    StreamIncrID(new_id);\n  }\n}\n\n/* Convert the specified stream entry ID as a 128 bit big endian number, so\n * that the IDs can be sorted lexicographically. */\ninline void StreamEncodeID(uint8_t* buf, const streamID& id) {\n  absl::big_endian::Store64(buf, id.ms);\n  absl::big_endian::Store64(buf + 8, id.seq);\n}\n\n/* Adds a new item into the stream 's' having the specified number of\n * field-value pairs as specified in 'numfields' and stored into 'argv'.\n * Returns the new entry ID populating the 'added_id' structure.\n *\n * If 'use_id' is not NULL, the ID is not auto-generated by the function,\n * but instead the passed ID is used to add the new entry. In this case\n * adding the entry may fail as specified later in this comment.\n *\n * When 'use_id' is used alongside with a zero 'seq-given', the sequence\n * part of the passed ID is ignored and the function will attempt to use an\n * auto-generated sequence.\n *\n * The function returns 0 if the item was added, this is always true\n * if the ID was generated by the function. However the function may return\n * errors in several cases:\n * 1. If an ID was given via 'use_id', but adding it failed since the\n *    current top ID is greater or equal, it returns EDOM.\n * 2. If a size of a single element or the sum of the elements is too big to\n *    be stored into the stream. it returns ERANGE. */\nint StreamAppendItem(stream* s, CmdArgList fields, uint64_t now_ms, streamID* added_id,\n                     streamID* use_id, int seq_given) {\n  /* Generate the new entry ID. */\n  streamID id;\n  if (use_id) {\n    if (seq_given) {\n      id = *use_id;\n    } else {\n      /* The automatically generated sequence can be either zero (new\n       * timestamps) or the incremented sequence of the last ID. In the\n       * latter case, we need to prevent an overflow/advancing forward\n       * in time. */\n      if (s->last_id.ms == use_id->ms) {\n        if (s->last_id.seq == UINT64_MAX) {\n          return EDOM;\n        }\n        id = s->last_id;\n        id.seq++;\n      } else {\n        id = *use_id;\n      }\n    }\n  } else {\n    StreamNextID(now_ms, &s->last_id, &id);\n  }\n\n  /* Check that the new ID is greater than the last entry ID\n   * or return an error. Automatically generated IDs might\n   * overflow (and wrap-around) when incrementing the sequence\n     part. */\n  if (streamCompareID(&id, &s->last_id) <= 0) {\n    return EDOM;\n  }\n\n  /* Avoid overflow when trying to add an element to the stream (listpack\n   * can only host up to 32bit length strings, and also a total listpack size\n   * can't be bigger than 32bit length. */\n  size_t totelelen = 0;\n  for (size_t i = 0; i < fields.size(); i++) {\n    totelelen += fields[i].size();\n  }\n\n  if (totelelen > STREAM_LISTPACK_MAX_SIZE) {\n    return ERANGE;\n  }\n\n  /* Add the new entry. */\n  raxIterator ri;\n  raxStart(&ri, s->rax);\n  raxSeek(&ri, \"$\", NULL, 0);\n\n  size_t lp_bytes = 0;      /* Total bytes in the tail listpack. */\n  unsigned char* lp = NULL; /* Tail listpack pointer. */\n\n  /* We have to add the key into the radix tree in lexicographic order,\n   * to do so we consider the ID as a single 128 bit number written in\n   * big endian, so that the most significant bytes are the first ones. */\n  uint8_t rax_key[16]; /* Key in the radix tree containing the listpack.*/\n  streamID master_id;  /* ID of the master entry in the listpack. */\n\n  if (!raxEOF(&ri)) {\n    /* Get a reference to the tail node listpack. */\n    lp = (uint8_t*)ri.data;\n    lp_bytes = lpBytes(lp);\n    CHECK_GT(lp_bytes, 0U);\n    DCHECK(ri.key_len == sizeof(rax_key));\n    memcpy(rax_key, ri.key, sizeof(rax_key));\n  }\n\n  /* Create a new listpack and radix tree node if needed. Note that when\n   * a new listpack is created, we populate it with a \"master entry\". This\n   * is just a set of fields that is taken as references in order to compress\n   * the stream entries that we'll add inside the listpack.\n   *\n   * Note that while we use the first added entry fields to create\n   * the master entry, the first added entry is NOT represented in the master\n   * entry, which is a stand alone object. But of course, the first entry\n   * will compress well because it's used as reference.\n   *\n   * The master entry is composed like in the following example:\n   *\n   * +-------+---------+------------+---------+--/--+---------+---------+-+\n   * | count | deleted | num-fields | field_1 | field_2 | ... | field_N |0|\n   * +-------+---------+------------+---------+--/--+---------+---------+-+\n   *\n   * count and deleted just represent respectively the total number of\n   * entries inside the listpack that are valid, and marked as deleted\n   * (deleted flag in the entry flags set). So the total number of items\n   * actually inside the listpack (both deleted and not) is count+deleted.\n   *\n   * The real entries will be encoded with an ID that is just the\n   * millisecond and sequence difference compared to the key stored at\n   * the radix tree node containing the listpack (delta encoding), and\n   * if the fields of the entry are the same as the master entry fields, the\n   * entry flags will specify this fact and the entry fields and number\n   * of fields will be omitted (see later in the code of this function).\n   *\n   * The \"0\" entry at the end is the same as the 'lp-count' entry in the\n   * regular stream entries (see below), and marks the fact that there are\n   * no more entries, when we scan the stream from right to left. */\n\n  /* First of all, check if we can append to the current macro node or\n   * if we need to switch to the next one. 'lp' will be set to NULL if\n   * the current node is full. */\n  if (lp != NULL) {\n    int new_node = 0;\n    size_t node_max_bytes = kStreamNodeMaxBytes;\n    if (node_max_bytes == 0 || node_max_bytes > STREAM_LISTPACK_MAX_SIZE)\n      node_max_bytes = STREAM_LISTPACK_MAX_SIZE;\n    if (lp_bytes + totelelen >= node_max_bytes) {\n      new_node = 1;\n    } else if (kStreamNodeMaxEntries) {\n      unsigned char* lp_ele = lpFirst(lp);\n      /* Count both live entries and deleted ones. */\n      int64_t count = lpGetInteger(lp_ele) + lpGetInteger(lpNext(lp, lp_ele));\n      if (count >= kStreamNodeMaxEntries) {\n        new_node = 1;\n      }\n    }\n\n    if (new_node) {\n      /* Shrink extra pre-allocated memory */\n      lp = lpShrinkToFit(lp);\n      if (ri.key_len != sizeof(rax_key) || memcmp(ri.key, rax_key, sizeof(rax_key)) != 0) {\n        LOG(DFATAL) << \"StreamAppendItem: Key mismatch\";\n      }\n      if (ri.data != lp)\n        raxInsert(s->rax, ri.key, ri.key_len, lp, NULL);\n      lp = NULL;\n    }\n  }\n\n  int flags = 0;\n  unsigned numfields = fields.size() / 2;\n  uint8_t* old_lp = lp;\n  if (lp == NULL) {\n    master_id = id;\n    StreamEncodeID(rax_key, id);\n    /* Create the listpack having the master entry ID and fields.\n     * Pre-allocate some bytes when creating listpack to avoid realloc on\n     * every XADD. Since listpack.c uses malloc_size, it'll grow in steps,\n     * and won't realloc on every XADD.\n     * When listpack reaches max number of entries, we'll shrink the\n     * allocation to fit the data. */\n    size_t prealloc = STREAM_LISTPACK_MAX_PRE_ALLOCATE;\n\n    lp = lpNew(prealloc);\n    lp = lpAppendInteger(lp, 1); /* One item, the one we are adding. */\n    lp = lpAppendInteger(lp, 0); /* Zero deleted so far. */\n    lp = lpAppendInteger(lp, numfields);\n    for (int64_t i = 0; i < numfields; i++) {\n      MutableSlice field = fields[i * 2];\n\n      lp = lpAppend(lp, SafePtr(field), field.size());\n    }\n    lp = lpAppendInteger(lp, 0); /* Master entry zero terminator. */\n    raxInsert(s->rax, (unsigned char*)&rax_key, sizeof(rax_key), lp, NULL);\n    old_lp = lp;\n    /* The first entry we insert, has obviously the same fields of the\n     * master entry. */\n    flags |= STREAM_ITEM_FLAG_SAMEFIELDS;\n  } else {  // lp != NULL\n    if (ri.key_len != sizeof(rax_key) || memcmp(ri.key, rax_key, sizeof(rax_key)) != 0) {\n      LOG(DFATAL) << \"StreamAppendItem: Key mismatch\";\n    }\n\n    /* Read the master ID from the radix tree key. */\n    streamDecodeID(rax_key, &master_id);\n    unsigned char* lp_ele = lpFirst(lp);\n\n    /* Update count and skip the deleted fields. */\n    int64_t count = lpGetInteger(lp_ele);\n    lp = lpReplaceInteger(lp, &lp_ele, count + 1);\n    lp_ele = lpNext(lp, lp_ele); /* seek deleted. */\n    lp_ele = lpNext(lp, lp_ele); /* seek master entry num fields. */\n\n    /* Check if the entry we are adding, have the same fields\n     * as the master entry. */\n    int64_t master_fields_count = lpGetInteger(lp_ele);\n    lp_ele = lpNext(lp, lp_ele);\n    if (numfields == master_fields_count) {\n      int64_t i;\n      for (i = 0; i < master_fields_count; i++) {\n        MutableSlice field = fields[i * 2];\n        int64_t e_len;\n        unsigned char buf[LP_INTBUF_SIZE];\n        unsigned char* e = lpGet(lp_ele, &e_len, buf);\n        /* Stop if there is a mismatch. */\n        if (field.size() != (size_t)e_len || memcmp(e, field.data(), e_len) != 0)\n          break;\n        lp_ele = lpNext(lp, lp_ele);\n      }\n      /* All fields are the same! We can compress the field names\n       * setting a single bit in the flags. */\n      if (i == master_fields_count)\n        flags |= STREAM_ITEM_FLAG_SAMEFIELDS;\n    }\n  }\n\n  /* Populate the listpack with the new entry. We use the following\n   * encoding:\n   *\n   * +-----+--------+----------+-------+-------+-/-+-------+-------+--------+\n   * |flags|entry-id|num-fields|field-1|value-1|...|field-N|value-N|lp-count|\n   * +-----+--------+----------+-------+-------+-/-+-------+-------+--------+\n   *\n   * However if the SAMEFIELD flag is set, we have just to populate\n   * the entry with the values, so it becomes:\n   *\n   * +-----+--------+-------+-/-+-------+--------+\n   * |flags|entry-id|value-1|...|value-N|lp-count|\n   * +-----+--------+-------+-/-+-------+--------+\n   *\n   * The entry-id field is actually two separated fields: the ms\n   * and seq difference compared to the master entry.\n   *\n   * The lp-count field is a number that states the number of listpack pieces\n   * that compose the entry, so that it's possible to travel the entry\n   * in reverse order: we can just start from the end of the listpack, read\n   * the entry, and jump back N times to seek the \"flags\" field to read\n   * the stream full entry. */\n  lp = lpAppendInteger(lp, flags);\n  lp = lpAppendInteger(lp, id.ms - master_id.ms);\n  lp = lpAppendInteger(lp, id.seq - master_id.seq);\n  if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS))\n    lp = lpAppendInteger(lp, numfields);\n  for (int64_t i = 0; i < numfields; i++) {\n    MutableSlice field = fields[i * 2], value = fields[i * 2 + 1];\n    if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS))\n      lp = lpAppend(lp, SafePtr(field), field.size());\n    lp = lpAppend(lp, SafePtr(value), value.size());\n  }\n  /* Compute and store the lp-count field. */\n  int64_t lp_count = numfields;\n  lp_count += 3; /* Add the 3 fixed fields flags + ms-diff + seq-diff. */\n  if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) {\n    /* If the item is not compressed, it also has the fields other than\n     * the values, and an additional num-fields field. */\n    lp_count += numfields + 1;\n  }\n  lp = lpAppendInteger(lp, lp_count);\n\n  /* Insert back into the tree in order to update the listpack pointer. */\n  if (old_lp != lp) {\n    raxInsert(s->rax, (unsigned char*)&rax_key, sizeof(rax_key), lp, NULL);\n  }\n  s->length++;\n  s->entries_added++;\n  s->last_id = id;\n\n  // Must find the last entry as we just inserted it.\n  CHECK_EQ(1, raxSeek(&ri, \"$\", NULL, 0));\n  lp_bytes = lpBytes((uint8_t*)ri.data);\n  CHECK_GT(lp_bytes, 0U);\n  raxStop(&ri);\n\n  if (s->length == 1)\n    s->first_id = id;\n  if (added_id)\n    *added_id = id;\n\n  return 0;\n}\n\n/* Create a NACK entry setting the delivery count to 1 and the delivery\n * time to the current time or test-hooked time. The NACK consumer will be\n * set to the one specified as argument of the function. */\nstreamNACK* StreamCreateNACK(streamConsumer* consumer, uint64_t now_ms) {\n  streamNACK* nack = reinterpret_cast<streamNACK*>(zmalloc(sizeof(*nack)));\n  nack->delivery_time = now_ms;\n  nack->delivery_count = 1;\n  nack->consumer = consumer;\n  return nack;\n}\n\nstd::string StreamsIdToString(streamID id) {\n  return absl::StrCat(id.ms, \"-\", id.seq);\n}\n\n/* Return value represents the number of deleted items. */\nint64_t TrimStream(const TrimOpts& opts, stream* s) {\n  if (!opts.HasLimit()) {\n    if (opts.IsMaxLen()) {\n      return StreamTrimByLength(s, opts.AsMaxLen(), opts.approx);\n    } else {\n      const auto& min_id = opts.AsMinId().val;\n      return StreamTrimByID(s, min_id, opts.approx);\n    }\n  }\n\n  streamAddTrimArgs trim_args = {};\n  trim_args.approx_trim = opts.approx;\n  trim_args.limit = opts.limit;\n\n  if (opts.IsMaxLen()) {\n    trim_args.trim_strategy = TRIM_STRATEGY_MAXLEN;\n    trim_args.maxlen = opts.AsMaxLen();\n  } else {\n    trim_args.trim_strategy = TRIM_STRATEGY_MINID;\n    trim_args.minid = opts.AsMinId().val;\n  }\n\n  return StreamTrim(s, &trim_args);\n}\n\nbool JournalAsMinId(const TrimOpts& opts) {\n  return opts.approx || opts.IsMaxLen();\n}\n\nOpResult<streamID> OpAdd(const OpArgs& op_args, string_view key, const AddOpts& opts,\n                         CmdArgList args, AddArgsJournaler journaler) {\n  DCHECK(!args.empty() && args.size() % 2 == 0);\n\n  auto& db_slice = op_args.GetDbSlice();\n\n  DbSlice::ItAndUpdater add_res;\n  if (opts.no_mkstream) {\n    auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_STREAM);\n    RETURN_ON_BAD_STATUS(res_it);\n    add_res = std::move(*res_it);\n  } else {\n    auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, OBJ_STREAM);\n    RETURN_ON_BAD_STATUS(op_res);\n    add_res = std::move(*op_res);\n  }\n\n  auto& it = add_res.it;\n\n  StreamMemTracker mem_tracker;\n  absl::Cleanup on_exit([it, &mem_tracker]() mutable { mem_tracker.UpdateStreamSize(it->second); });\n\n  if (add_res.is_new) {\n    stream* s = streamNew();\n    it->second.InitRobj(OBJ_STREAM, OBJ_ENCODING_STREAM, s);\n  }\n\n  stream* stream_inst = (stream*)it->second.RObjPtr();\n\n  streamID result_id;\n  const auto& parsed_id = opts.parsed_id;\n  streamID passed_id = parsed_id.val;\n  int res = StreamAppendItem(stream_inst, args, op_args.db_cntx.time_now_ms, &result_id,\n                             parsed_id.id_given ? &passed_id : nullptr, parsed_id.has_seq);\n\n  if (res != 0) {\n    if (add_res.is_new) {\n      std::move(on_exit).Cancel();\n      db_slice.DelMutable(op_args.db_cntx, std::move(add_res));\n    }\n    if (res == ERANGE)\n      return OpStatus::OUT_OF_RANGE;\n    if (res == EDOM)\n      return OpStatus::STREAM_ID_SMALL;\n\n    return OpStatus::OUT_OF_MEMORY;\n  }\n\n  if (opts.trim_opts) {\n    int64_t deleted_items_number = TrimStream(opts.trim_opts.value(), stream_inst);\n    VLOG(2) << \"Trimmed \" << deleted_items_number << \" items from stream \" << key\n            << \" during the XADD command\";\n  }\n\n  if (op_args.shard->journal()) {\n    std::string result_id_as_string = StreamsIdToString(result_id);\n    const bool stream_is_empty = stream_inst->length == 0;\n\n    if (opts.trim_opts && (stream_is_empty || JournalAsMinId(opts.trim_opts.value()))) {\n      std::string last_id;\n\n      CmdArgVec journal_args = {key};\n      journal_args.reserve(args.size() + 4);\n\n      if (stream_is_empty) {\n        // We need remove the whole stream in replica\n        journal_args.emplace_back(\"MAXLEN\"sv);\n        journal_args.emplace_back(\"0\"sv);\n      } else {\n        // We need to set exact MinId in the journal.\n        // For this we are using new first_id from the stream\n        last_id = StreamsIdToString(stream_inst->first_id);\n        journal_args.emplace_back(\"MINID\"sv);\n        journal_args.emplace_back(last_id);\n      }\n\n      if (opts.no_mkstream) {\n        journal_args.emplace_back(\"NOMKSTREAM\"sv);\n      }\n\n      journal_args.emplace_back(result_id_as_string);\n\n      for (size_t i = 0; i < args.size(); i++) {\n        journal_args.emplace_back(args[i]);\n      }\n\n      RecordJournal(op_args, \"XADD\"sv, journal_args);\n    } else {\n      journaler.SetStreamId(result_id_as_string);\n      RecordJournal(op_args, \"XADD\"sv, journaler.add_args);\n    }\n  }\n\n  RecordStreamAccess(op_args, StreamAccessKind::kSequential);\n\n  auto blocking_controller = op_args.db_cntx.ns->GetBlockingController(op_args.shard->shard_id());\n  if (blocking_controller) {\n    blocking_controller->Awaken(op_args.db_cntx.db_index, key);\n  }\n\n  return result_id;\n}\n\nOpResult<RecordVec> OpRange(const OpArgs& op_args, string_view key, const RangeOpts& opts) {\n  // It's write because we add a NACK. Relevant to XReadGroup only\n  const bool is_write_command = opts.group;\n  auto& db_slice = op_args.GetDbSlice();\n  DbSlice::ItAndUpdater it;\n  const CompactObj* cobj;\n  if (is_write_command) {\n    auto res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_STREAM);\n    if (!res)\n      return res.status();\n    it = std::move(*res);\n    cobj = &it.it->second;\n  } else {\n    auto res = db_slice.FindReadOnly(op_args.db_cntx, key, OBJ_STREAM);\n    if (!res)\n      return res.status();\n    cobj = &(*res)->second;\n  }\n\n  RecordVec result;\n\n  if (opts.count == 0)\n    return result;\n\n  streamIterator si;\n  int64_t numfields;\n  streamID id;\n  stream* s = (stream*)cobj->RObjPtr();\n  streamID sstart = opts.start.val, send = opts.end.val;\n\n  // Classify access pattern: fetch-all if start <= first_id and end is MAX.\n  StreamAccessKind effective_kind = opts.access_kind;\n  if (effective_kind != StreamAccessKind::kNone && s->length > 0 &&\n      streamCompareID(&sstart, &s->first_id) <= 0 && send.ms == UINT64_MAX &&\n      send.seq == UINT64_MAX) {\n    effective_kind = StreamAccessKind::kFetchAll;\n  }\n  RecordStreamAccess(op_args, effective_kind);\n\n  streamIteratorStart(&si, s, &sstart, &send, opts.is_rev);\n  while (streamIteratorGetID(&si, &id, &numfields)) {\n    Record rec;\n    rec.id = id;\n    rec.kv_arr.reserve(numfields);\n    if (opts.group && streamCompareID(&id, &opts.group->last_id) > 0) {\n      if (opts.group->entries_read != SCG_INVALID_ENTRIES_READ &&\n          streamCompareID(&opts.group->last_id, &s->first_id) >= 0 &&\n          !StreamRangeHasTombstones(s, &opts.group->last_id, NULL)) {\n        /* A valid counter and no tombstones in the group's last-delivered-id and the stream's\n         * last-generated-id, we can increment the read counter to keep tracking the group's\n         * progress. */\n        opts.group->entries_read++;\n      } else if (s->entries_added) {\n        /* The group's counter may be invalid, so we try to obtain it. */\n        opts.group->entries_read = streamEstimateDistanceFromFirstEverEntry(s, &id);\n      }\n      opts.group->last_id = id;\n    }\n\n    /* Emit the field-value pairs. */\n    while (numfields--) {\n      unsigned char *key, *value;\n      int64_t key_len, value_len;\n      streamIteratorGetField(&si, &key, &value, &key_len, &value_len);\n      string skey(reinterpret_cast<char*>(key), key_len);\n      string sval(reinterpret_cast<char*>(value), value_len);\n\n      rec.kv_arr.emplace_back(std::move(skey), std::move(sval));\n    }\n\n    result.push_back(std::move(rec));\n\n    // Only relevant for XREADGROUP flow. Should not trigger on XREAD which is READ only.\n    if (is_write_command && !opts.noack) {\n      StreamMemTracker mem_track;\n      unsigned char buf[sizeof(streamID)];\n      StreamEncodeID(buf, id);\n      uint64_t now_ms = op_args.db_cntx.time_now_ms;\n\n      /* Try to add a new NACK. Most of the time this will work and\n       * will not require extra lookups. We'll fix the problem later\n       * if we find that there is already an entry for this ID. */\n      streamNACK* nack = StreamCreateNACK(opts.consumer, now_ms);\n      int group_inserted = raxTryInsert(opts.group->pel, buf, sizeof(buf), nack, nullptr);\n\n      int consumer_inserted = raxTryInsert(opts.consumer->pel, buf, sizeof(buf), nack, nullptr);\n\n      /* Now we can check if the entry was already busy, and\n       * in that case reassign the entry to the new consumer,\n       * or update it if the consumer is the same as before. */\n      if (group_inserted == 0) {\n        streamFreeNACK(nack);\n        int fres = raxFind(opts.group->pel, buf, sizeof(buf), (void**)&nack);\n        DCHECK(fres);\n        raxRemove(nack->consumer->pel, buf, sizeof(buf), NULL);\n        LOG_IF(DFATAL, nack->consumer->pel->numnodes == 0) << \"Invalid rax state\";\n\n        /* Update the consumer and NACK metadata. */\n        nack->consumer = opts.consumer;\n        nack->delivery_time = now_ms;\n        nack->delivery_count = 1;\n        /* Add the entry in the new consumer local PEL. */\n        raxInsert(opts.consumer->pel, buf, sizeof(buf), nack, NULL);\n      } else if (group_inserted == 1 && consumer_inserted == 0) {\n        LOG(DFATAL) << \"Internal error\";\n        return OpStatus::SKIPPED;  // (\"NACK half-created. Should not be possible.\");\n      }\n      opts.consumer->active_time = now_ms;\n      result.back().delivery_time = now_ms;\n      mem_track.UpdateStreamSize(it.it->second);\n    }\n    if (opts.count == result.size())\n      break;\n  }\n\n  streamIteratorStop(&si);\n\n  return result;\n}\n\nOpResult<RecordVec> OpRangeFromConsumerPEL(const OpArgs& op_args, string_view key,\n                                           const RangeOpts& opts) {\n  RecordVec result;\n\n  if (opts.count == 0)\n    return result;\n\n  RecordStreamAccess(op_args, StreamAccessKind::kRandom);\n\n  unsigned char start_key[sizeof(streamID)];\n  unsigned char end_key[sizeof(streamID)];\n  auto sstart = opts.start.val;\n  auto send = opts.end.val;\n\n  StreamEncodeID(start_key, sstart);\n  StreamEncodeID(end_key, send);\n  raxIterator ri;\n\n  raxStart(&ri, opts.consumer->pel);\n  raxSeek(&ri, \">=\", start_key, sizeof(start_key));\n  size_t ecount = 0;\n  while (raxNext(&ri) && (!opts.count || ecount < opts.count)) {\n    if (memcmp(ri.key, &send, ri.key_len) > 0)\n      break;\n    streamID id;\n\n    streamDecodeID(ri.key, &id);\n    RangeOpts ropts;\n    ropts.start.val = id;\n    ropts.end.val = id;\n    ropts.access_kind =\n        StreamAccessKind::kNone;  // Prevent per-entry counting; already recorded above\n    auto op_result = OpRange(op_args, key, ropts);\n    if (!op_result || !op_result.value().size()) {\n      Record rec;\n      rec.id = id;\n      result.push_back(rec);\n    } else {\n      streamNACK* nack = static_cast<streamNACK*>(ri.data);\n      nack->delivery_time = op_args.db_cntx.time_now_ms;\n      nack->delivery_count++;\n      result.push_back(std::move(op_result.value()[0]));\n    }\n    ecount++;\n  }\n  raxStop(&ri);\n  return result;\n}\n\nnamespace {\n// Our C-API doesn't use const, so we have to const cast.\n// Only intended for read-only functions.\nstream* GetReadOnlyStream(const CompactObj& cobj) {\n  return const_cast<stream*>((const stream*)cobj.RObjPtr());\n}\n\n// Reassigns a pending NACK entry to a new consumer, updating the PELs of both the old and new\n// consumer. If the NACK already belongs to the target consumer, this is a no-op for the PELs.\nvoid ReassignNACKToConsumer(streamNACK* nack, streamConsumer* consumer, uint8_t* key_buf,\n                            size_t key_len, uint64_t now_ms) {\n  if (nack->consumer != consumer) {\n    if (nack->consumer) {\n      raxRemove(nack->consumer->pel, key_buf, key_len, nullptr);\n      LOG_IF(DFATAL, nack->consumer->pel->numnodes == 0) << \"Invalid rax state\";\n    }\n    raxInsert(consumer->pel, key_buf, key_len, nack, nullptr);\n    nack->consumer = consumer;\n  }\n  consumer->active_time = now_ms;\n}\n\n}  // namespace\n// Returns the range response for each stream on this shard in order of\n// GetShardArgs.\nvector<RecordVec> OpRead(const OpArgs& op_args, const ShardArgs& shard_args, const ReadOpts& opts) {\n  DCHECK(!shard_args.Empty());\n\n  RangeOpts range_opts;\n  range_opts.count = opts.count;\n  range_opts.end = ParsedStreamId{.val = streamID{\n                                      .ms = UINT64_MAX,\n                                      .seq = UINT64_MAX,\n                                  }};\n\n  vector<RecordVec> response(shard_args.Size());\n  unsigned index = 0;\n  for (string_view key : shard_args) {\n    const auto& sitem = opts.stream_ids.at(key);\n    auto& dest = response[index++];\n\n    // We skip, group can be empty after waking up from a blocked read\n    if (!sitem.group && opts.read_group) {\n      continue;\n    }\n\n    range_opts.start = sitem.id;\n    range_opts.group = sitem.group;\n    range_opts.consumer = sitem.consumer;\n    range_opts.noack = opts.noack;\n    // XREAD/XREADGROUP new deliveries are sequential (fetch-all detected in OpRange).\n    range_opts.access_kind = StreamAccessKind::kSequential;\n\n    OpResult<RecordVec> range_res;\n\n    if (sitem.serve_history)\n      range_res = OpRangeFromConsumerPEL(op_args, key, range_opts);\n    else\n      range_res = OpRange(op_args, key, range_opts);\n    if (range_res) {\n      dest = std::move(range_res.value());\n    }\n  }\n\n  return response;\n}\n\nOpResult<uint32_t> OpLen(const OpArgs& op_args, string_view key) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto res_it = db_slice.FindReadOnly(op_args.db_cntx, key, OBJ_STREAM);\n  RETURN_ON_BAD_STATUS(res_it);\n  const CompactObj& cobj = (*res_it)->second;\n  stream* s = (stream*)cobj.RObjPtr();\n  return s->length;\n}\n\nOpResult<vector<GroupInfo>> OpListGroups(const DbContext& db_cntx, string_view key,\n                                         EngineShard* shard) {\n  auto& db_slice = db_cntx.GetDbSlice(shard->shard_id());\n  auto res_it = db_slice.FindReadOnly(db_cntx, key, OBJ_STREAM);\n  RETURN_ON_BAD_STATUS(res_it);\n\n  vector<GroupInfo> result;\n  const CompactObj& cobj = (*res_it)->second;\n  stream* s = (stream*)cobj.RObjPtr();\n\n  if (s->cgroups) {\n    result.reserve(raxSize(s->cgroups));\n\n    raxIterator ri;\n    raxStart(&ri, s->cgroups);\n    raxSeek(&ri, \"^\", NULL, 0);\n    while (raxNext(&ri)) {\n      streamCG* cg = (streamCG*)ri.data;\n      GroupInfo ginfo;\n      ginfo.name.assign(reinterpret_cast<char*>(ri.key), ri.key_len);\n      ginfo.consumer_size = raxSize(cg->consumers);\n      ginfo.pending_size = raxSize(cg->pel);\n      ginfo.last_id = cg->last_id;\n      ginfo.entries_read = cg->entries_read;\n      ginfo.lag = StreamCGLag(s, cg);\n      result.push_back(std::move(ginfo));\n    }\n    raxStop(&ri);\n  }\n\n  return result;\n}\n\nvector<Record> GetStreamRecords(stream* s, streamID start, streamID end, bool reverse,\n                                size_t count) {\n  streamIterator si;\n  int64_t numfields;\n  streamID id;\n  size_t arraylen = 0;\n  vector<Record> records;\n\n  streamIteratorStart(&si, s, &start, &end, reverse);\n  while (streamIteratorGetID(&si, &id, &numfields)) {\n    Record rec;\n    rec.id = id;\n    rec.kv_arr.reserve(numfields);\n\n    while (numfields--) {\n      unsigned char *key, *value;\n      int64_t key_len, value_len;\n      streamIteratorGetField(&si, &key, &value, &key_len, &value_len);\n      string skey(reinterpret_cast<char*>(key), key_len);\n      string sval(reinterpret_cast<char*>(value), value_len);\n\n      rec.kv_arr.emplace_back(std::move(skey), std::move(sval));\n    }\n    records.push_back(std::move(rec));\n    arraylen++;\n    if (count && count == arraylen)\n      break;\n  }\n\n  streamIteratorStop(&si);\n\n  return records;\n}\n\nvoid GetGroupPEL(stream* s, streamCG* cg, long long count, GroupInfo* ginfo) {\n  vector<NACKInfo> nack_info_vec;\n  long long arraylen_cg_pel = 0;\n  raxIterator ri_cg_pel;\n  raxStart(&ri_cg_pel, cg->pel);\n  raxSeek(&ri_cg_pel, \"^\", NULL, 0);\n  while (raxNext(&ri_cg_pel) && (!count || arraylen_cg_pel < count)) {\n    streamNACK* nack = static_cast<streamNACK*>(ri_cg_pel.data);\n    NACKInfo nack_info;\n\n    streamID id;\n    streamDecodeID(ri_cg_pel.key, &id);\n    nack_info.pel_id = id;\n    nack_info.consumer_name = nack->consumer->name;\n    nack_info.delivery_time = nack->delivery_time;\n    nack_info.delivery_count = nack->delivery_count;\n\n    nack_info_vec.push_back(nack_info);\n    arraylen_cg_pel++;\n  }\n  raxStop(&ri_cg_pel);\n  ginfo->stream_nack_vec = std::move(nack_info_vec);\n}\n\nvoid GetConsumers(stream* s, streamCG* cg, long long count, GroupInfo* ginfo) {\n  vector<ConsumerInfo> consumer_info_vec;\n  raxIterator ri_consumers;\n  raxStart(&ri_consumers, cg->consumers);\n  raxSeek(&ri_consumers, \"^\", NULL, 0);\n  while (raxNext(&ri_consumers)) {\n    ConsumerInfo consumer_info;\n    streamConsumer* consumer = static_cast<streamConsumer*>(ri_consumers.data);\n\n    LOG_IF(DFATAL, consumer->pel->numnodes == 0) << \"Invalid rax state\";\n\n    consumer_info.name = consumer->name;\n    consumer_info.seen_time = consumer->seen_time;\n    consumer_info.active_time = consumer->active_time;\n    consumer_info.pel_count = raxSize(consumer->pel);\n\n    /* Consumer PEL */\n    long long arraylen_cpel = 0;\n    raxIterator ri_cpel;\n    raxStart(&ri_cpel, consumer->pel);\n    raxSeek(&ri_cpel, \"^\", NULL, 0);\n    vector<NACKInfo> consumer_pel_vec;\n    while (raxNext(&ri_cpel) && (!count || arraylen_cpel < count)) {\n      NACKInfo nack_info;\n      streamNACK* nack = static_cast<streamNACK*>(ri_cpel.data);\n\n      streamID id;\n      streamDecodeID(ri_cpel.key, &id);\n      nack_info.pel_id = id;\n      nack_info.delivery_time = nack->delivery_time;\n      nack_info.delivery_count = nack->delivery_count;\n\n      consumer_pel_vec.push_back(nack_info);\n      arraylen_cpel++;\n    }\n    consumer_info.pending = consumer_pel_vec;\n    consumer_info_vec.push_back(consumer_info);\n    raxStop(&ri_cpel);\n  }\n  raxStop(&ri_consumers);\n  ginfo->consumer_info_vec = std::move(consumer_info_vec);\n}\n\nOpResult<StreamInfo> OpStreams(const DbContext& db_cntx, string_view key, EngineShard* shard,\n                               int full, size_t count) {\n  auto& db_slice = db_cntx.GetDbSlice(shard->shard_id());\n  auto res_it = db_slice.FindReadOnly(db_cntx, key, OBJ_STREAM);\n  RETURN_ON_BAD_STATUS(res_it);\n\n  // Record access only after successful key validation\n  if (full) {\n    shard->stats().stream_fetch_all_accesses++;\n  } else {\n    shard->stats().stream_sequential_accesses++;\n  }\n\n  vector<StreamInfo> result;\n  const CompactObj& cobj = (*res_it)->second;\n  stream* s = (stream*)cobj.RObjPtr();\n\n  StreamInfo sinfo;\n  sinfo.length = s->length;\n\n  sinfo.radix_tree_keys = raxSize(s->rax);\n  sinfo.radix_tree_nodes = s->rax->numnodes;\n  sinfo.last_generated_id = s->last_id;\n  sinfo.max_deleted_entry_id = s->max_deleted_entry_id;\n  sinfo.entries_added = s->entries_added;\n  sinfo.recorded_first_entry_id = s->first_id;\n  sinfo.groups = s->cgroups ? raxSize(s->cgroups) : 0;\n  sinfo.entries = GetStreamRecords(s, s->first_id, s->last_id, false, count);\n\n  if (full) {\n    if (s->cgroups) {\n      GroupInfoVec group_info_vec;\n\n      raxIterator ri_cgroups;\n      raxStart(&ri_cgroups, s->cgroups);\n      raxSeek(&ri_cgroups, \"^\", NULL, 0);\n      while (raxNext(&ri_cgroups)) {\n        streamCG* cg = (streamCG*)ri_cgroups.data;\n        GroupInfo ginfo;\n        ginfo.name.assign(reinterpret_cast<char*>(ri_cgroups.key), ri_cgroups.key_len);\n        ginfo.last_id = cg->last_id;\n        ginfo.consumer_size = raxSize(cg->consumers);\n        ginfo.pending_size = raxSize(cg->pel);\n        ginfo.entries_read = cg->entries_read;\n        ginfo.lag = StreamCGLag(s, cg);\n        GetGroupPEL(s, cg, count, &ginfo);\n        GetConsumers(s, cg, count, &ginfo);\n\n        group_info_vec.push_back(ginfo);\n      }\n      raxStop(&ri_cgroups);\n\n      sinfo.cgroups = group_info_vec;\n    }\n  } else {\n    vector<Record> first_entry_vector = GetStreamRecords(s, s->first_id, s->last_id, false, 1);\n    if (first_entry_vector.size() != 0) {\n      sinfo.first_entry = first_entry_vector.at(0);\n    }\n    vector<Record> last_entry_vector = GetStreamRecords(s, s->first_id, s->last_id, true, 1);\n    if (last_entry_vector.size() != 0) {\n      sinfo.last_entry = last_entry_vector.at(0);\n    }\n  }\n\n  return sinfo;\n}\n\nOpResult<vector<ConsumerInfo>> OpConsumers(const DbContext& db_cntx, EngineShard* shard,\n                                           string_view stream_name, string_view group_name) {\n  auto& db_slice = db_cntx.GetDbSlice(shard->shard_id());\n  auto res_it = db_slice.FindReadOnly(db_cntx, stream_name, OBJ_STREAM);\n  RETURN_ON_BAD_STATUS(res_it);\n\n  vector<ConsumerInfo> result;\n  const CompactObj& cobj = (*res_it)->second;\n  stream* s = GetReadOnlyStream(cobj);\n  streamCG* cg = StreamLookupCG(s, WrapSds(group_name));\n  if (cg == NULL) {\n    return OpStatus::INVALID_VALUE;\n  }\n  result.reserve(raxSize(s->cgroups));\n\n  raxIterator ri;\n  raxStart(&ri, cg->consumers);\n  raxSeek(&ri, \"^\", NULL, 0);\n  mstime_t now = db_cntx.time_now_ms;\n  while (raxNext(&ri)) {\n    ConsumerInfo consumer_info;\n    streamConsumer* consumer = (streamConsumer*)ri.data;\n    mstime_t idle = now - consumer->seen_time;\n    if (idle < 0)\n      idle = 0;\n\n    consumer_info.name = consumer->name;\n    consumer_info.pel_count = raxSize(consumer->pel);\n    consumer_info.idle = idle;\n    consumer_info.active_time = consumer->active_time;\n    result.push_back(std::move(consumer_info));\n  }\n  raxStop(&ri);\n  return result;\n}\n\nconstexpr uint8_t kCreateOptMkstream = 1 << 0;\n\nstruct CreateOpts {\n  string_view gname;\n  string_view id;\n  uint8_t flags = 0;\n};\n\nOpStatus OpCreate(const OpArgs& op_args, string_view key, const CreateOpts& opts) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_STREAM);\n  int64_t entries_read = SCG_INVALID_ENTRIES_READ;\n  StreamMemTracker mem_tracker;\n  bool stream_created_by_mkstream = false;\n  if (!res_it) {\n    if (opts.flags & kCreateOptMkstream) {\n      // MKSTREAM is enabled, so create the stream\n      res_it = db_slice.AddNew(op_args.db_cntx, key, PrimeValue{}, 0);\n      if (!res_it)\n        return res_it.status();\n\n      stream* s = streamNew();\n      res_it->it->second.InitRobj(OBJ_STREAM, OBJ_ENCODING_STREAM, s);\n      stream_created_by_mkstream = true;\n    } else {\n      return res_it.status();\n    }\n  }\n\n  CompactObj& cobj = res_it->it->second;\n  stream* s = (stream*)cobj.RObjPtr();\n\n  streamID id;\n  ParsedStreamId parsed_id;\n  if (opts.id == \"$\") {\n    id = s->last_id;\n  } else {\n    if (ParseID(opts.id, true, 0, &parsed_id)) {\n      id = parsed_id.val;\n    } else {\n      if (stream_created_by_mkstream) {\n        db_slice.DelMutable(op_args.db_cntx, std::move(*res_it));\n      }\n      return OpStatus::SYNTAX_ERR;\n    }\n  }\n\n  streamCG* cg = streamCreateCG(s, opts.gname.data(), opts.gname.size(), &id, entries_read);\n  mem_tracker.UpdateStreamSize(res_it->it->second);\n  return cg ? OpStatus::OK : OpStatus::BUSY_GROUP;\n}\n\nstruct FindGroupResult {\n  stream* s = nullptr;\n  streamCG* cg = nullptr;\n  DbSlice::ItAndUpdater it;\n};\n\nOpResult<FindGroupResult> FindGroup(const OpArgs& op_args, string_view key, string_view gname,\n                                    bool skip_group = true) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_STREAM);\n  RETURN_ON_BAD_STATUS(res_it);\n\n  CompactObj& cobj = res_it->it->second;\n  auto* s = static_cast<stream*>(cobj.RObjPtr());\n  auto* cg = StreamLookupCG(s, WrapSds(gname));\n  if (skip_group && !cg)\n    return OpStatus::SKIPPED;\n\n  return FindGroupResult{s, cg, std::move(*res_it)};\n}\n\n// Try to get the consumer. If not found, create a new one.\nstreamConsumer* FindOrAddConsumer(string_view name, streamCG* cg, uint64_t now_ms,\n                                  bool* is_consumer_new) {\n  // Try to get the consumer. If not found, create a new one.\n  auto cname = WrapSds(name);\n  streamConsumer* consumer = StreamLookupConsumer(cg, cname);\n  if (consumer) {\n    consumer->seen_time = now_ms;\n  } else {\n    // TODO: notify xgroup-createconsumer event once we support stream events.\n    if (is_consumer_new) {\n      *is_consumer_new = true;\n    }\n    consumer = StreamCreateConsumer(cg, name, now_ms, SCC_DEFAULT);\n  }\n\n  return consumer;\n}\n\nconstexpr uint8_t kClaimForce = 1 << 0;\nconstexpr uint8_t kClaimJustID = 1 << 1;\nconstexpr uint8_t kClaimLastID = 1 << 2;\n\nstruct ClaimOpts {\n  string_view group;\n  string_view consumer;\n  int64 min_idle_time;\n  int64 delivery_time = -1;\n  int retry = -1;\n  uint8_t flags = 0;\n  int32_t count = 100;      // only for XAUTOCLAIM\n  streamID start = {0, 0};  // only for XAUTOCLAIM\n  streamID last_id;\n};\n\nstruct ClaimInfo {\n  bool justid = false;\n  vector<streamID> ids;\n  RecordVec records;\n  streamID end_id = {0, 0};      // only for XAUTOCLAIM\n  vector<streamID> deleted_ids;  // only for XAUTOCLAIM\n};\n\nvoid AppendClaimResultItem(ClaimInfo& result, stream* s, streamID id) {\n  int64_t numfields;\n  if (result.justid) {\n    result.ids.push_back(id);\n    return;\n  }\n  streamIterator it;\n  streamID cid;\n  streamIteratorStart(&it, s, &id, &id, 0);\n  while (streamIteratorGetID(&it, &cid, &numfields)) {\n    Record rec;\n    rec.id = cid;\n    rec.kv_arr.reserve(numfields);\n\n    /* Emit the field-value pairs. */\n    while (numfields--) {\n      unsigned char *key, *value;\n      int64_t key_len, value_len;\n      streamIteratorGetField(&it, &key, &value, &key_len, &value_len);\n      string skey(reinterpret_cast<char*>(key), key_len);\n      string sval(reinterpret_cast<char*>(value), value_len);\n\n      rec.kv_arr.emplace_back(std::move(skey), std::move(sval));\n    }\n    result.records.push_back(std::move(rec));\n  }\n  streamIteratorStop(&it);\n}\n\n// XCLAIM key group consumer min-idle-time id\nOpResult<ClaimInfo> OpClaim(const OpArgs& op_args, string_view key, const ClaimOpts& opts,\n                            absl::Span<streamID> ids) {\n  auto cgr_res = FindGroup(op_args, key, opts.group);\n  RETURN_ON_BAD_STATUS(cgr_res);\n  RecordStreamAccess(op_args, StreamAccessKind::kRandom);\n\n  uint64_t now_ms = op_args.db_cntx.time_now_ms;\n  ClaimInfo result;\n  result.justid = (opts.flags & kClaimJustID);\n\n  streamID last_id = opts.last_id;\n  if (opts.flags & kClaimLastID) {\n    if (streamCompareID(&last_id, &cgr_res->cg->last_id) > 0) {\n      cgr_res->cg->last_id = last_id;\n    }\n  }\n\n  StreamMemTracker tracker;\n\n  streamConsumer* consumer = FindOrAddConsumer(opts.consumer, cgr_res->cg, now_ms, nullptr);\n\n  for (streamID id : ids) {\n    std::array<uint8_t, sizeof(streamID)> buf;\n    StreamEncodeID(buf.begin(), id);\n\n    streamNACK* nack = nullptr;\n    int fres = raxFind(cgr_res->cg->pel, buf.begin(), sizeof(buf), (void**)&nack);\n    if (!StreamEntryExists(cgr_res->s, &id)) {\n      if (fres) {\n        /* Release the NACK */\n        raxRemove(cgr_res->cg->pel, buf.begin(), sizeof(buf), nullptr);\n        raxRemove(nack->consumer->pel, buf.begin(), sizeof(buf), nullptr);\n        LOG_IF(DFATAL, nack->consumer->pel->numnodes == 0) << \"Invalid rax state\";\n        streamFreeNACK(nack);\n      }\n      continue;\n    }\n\n    // We didn't find a nack but the FORCE option is given.\n    // Create the NACK forcefully.\n    if ((opts.flags & kClaimForce) && fres == 0) {\n      /* Create the NACK. */\n      nack = StreamCreateNACK(nullptr, now_ms);\n      raxInsert(cgr_res->cg->pel, buf.begin(), sizeof(buf), nack, nullptr);\n    }\n\n    // We found the nack, continue.\n    if (nack) {\n      // First check if the entry id exceeds the `min_idle_time`.\n      if (nack->consumer && opts.min_idle_time) {\n        mstime_t this_idle = now_ms - nack->delivery_time;\n        if (this_idle < opts.min_idle_time) {\n          continue;\n        }\n      }\n\n      // Set the delivery time for the entry.\n      nack->delivery_time = opts.delivery_time;\n      /* Set the delivery attempts counter if given, otherwise\n       * autoincrement unless JUSTID option provided */\n      if (opts.retry >= 0) {\n        nack->delivery_count = opts.retry;\n      } else if (!(opts.flags & kClaimJustID)) {\n        nack->delivery_count++;\n      }\n      // Note: nack->consumer is NULL if we created the NACK above because of the FORCE option.\n      ReassignNACKToConsumer(nack, consumer, buf.begin(), sizeof(buf), now_ms);\n\n      /* Send the reply for this entry. */\n      AppendClaimResultItem(result, cgr_res->s, id);\n      // TODO: propagate this change with streamPropagateXCLAIM\n    }\n  }\n  tracker.UpdateStreamSize(cgr_res->it.it->second);\n  return result;\n}\n\n// XGROUP DESTROY key groupname\nOpStatus OpDestroyGroup(const OpArgs& op_args, string_view key, string_view gname) {\n  auto cgr_res = FindGroup(op_args, key, gname);\n  RETURN_ON_BAD_STATUS(cgr_res);\n  StreamMemTracker mem_tracker;\n\n  raxRemove(cgr_res->s->cgroups, (uint8_t*)(gname.data()), gname.size(), NULL);\n  StreamFreeCG(cgr_res->cg);\n\n  mem_tracker.UpdateStreamSize(cgr_res->it.it->second);\n\n  // Awake readers blocked on this group\n  auto blocking_controller = op_args.db_cntx.ns->GetBlockingController(op_args.shard->shard_id());\n  if (blocking_controller) {\n    blocking_controller->Awaken(op_args.db_cntx.db_index, key);\n  }\n\n  return OpStatus::OK;\n}\n\nstruct GroupConsumerPair {\n  streamCG* group;\n  streamConsumer* consumer;\n};\n\nstruct GroupConsumerPairOpts {\n  string_view group;\n  string_view consumer;\n};\n\n// XGROUP CREATECONSUMER key groupname consumername\nOpResult<uint32_t> OpCreateConsumer(const OpArgs& op_args, string_view key, string_view gname,\n                                    string_view consumer_name) {\n  auto cgroup_res = FindGroup(op_args, key, gname);\n  RETURN_ON_BAD_STATUS(cgroup_res);\n\n  StreamMemTracker mem_tracker;\n\n  streamConsumer* consumer = StreamCreateConsumer(\n      cgroup_res->cg, consumer_name, op_args.db_cntx.time_now_ms, SCC_NO_NOTIFY | SCC_NO_DIRTIFY);\n\n  mem_tracker.UpdateStreamSize(cgroup_res->it.it->second);\n  return consumer ? OpStatus::OK : OpStatus::KEY_EXISTS;\n}\n\n// XGROUP DELCONSUMER key groupname consumername\nOpResult<uint32_t> OpDelConsumer(const OpArgs& op_args, string_view key, string_view gname,\n                                 string_view consumer_name) {\n  auto cgroup_res = FindGroup(op_args, key, gname);\n  RETURN_ON_BAD_STATUS(cgroup_res);\n  StreamMemTracker mem_tracker;\n\n  long long pending = 0;\n  streamConsumer* consumer = StreamLookupConsumer(cgroup_res->cg, WrapSds(consumer_name));\n  if (consumer) {\n    pending = raxSize(consumer->pel);\n    StreamDelConsumer(cgroup_res->cg, consumer);\n  }\n\n  mem_tracker.UpdateStreamSize(cgroup_res->it.it->second);\n  return pending;\n}\n\nOpStatus OpSetId(const OpArgs& op_args, string_view key, string_view gname, string_view id,\n                 std::optional<int64_t> entries_read) {\n  auto cgr_res = FindGroup(op_args, key, gname);\n  RETURN_ON_BAD_STATUS(cgr_res);\n\n  streamID sid;\n  ParsedStreamId parsed_id;\n  if (id == \"$\") {\n    sid = cgr_res->s->last_id;\n  } else {\n    if (ParseID(id, true, 0, &parsed_id)) {\n      sid = parsed_id.val;\n    } else {\n      return OpStatus::SYNTAX_ERR;\n    }\n  }\n  cgr_res->cg->last_id = sid;\n  if (entries_read) {\n    cgr_res->cg->entries_read = *entries_read;\n  }\n\n  return OpStatus::OK;\n}\n\nErrorReply OpXSetId(const OpArgs& op_args, string_view key, const streamID& sid) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_STREAM);\n  if (!res_it)\n    return res_it.status();\n\n  StreamMemTracker mem_tracker;\n\n  PrimeValue& pv = res_it->it->second;\n  stream* stream_inst = (stream*)pv.RObjPtr();\n  streamID max_xdel_id{0, 0};\n  streamID id = sid;\n\n  if (streamCompareID(&id, &stream_inst->max_deleted_entry_id) < 0) {\n    return ErrorReply{\"The ID specified in XSETID is smaller than current max_deleted_entry_id\",\n                      \"stream_smaller_deleted\"};\n  }\n\n  /* If the stream has at least one item, we want to check that the user\n   * is setting a last ID that is equal or greater than the current top\n   * item, otherwise the fundamental ID monotonicity assumption is violated. */\n  if (stream_inst->length > 0) {\n    streamID maxid;\n    StreamLastValidID(stream_inst, &maxid);\n\n    if (streamCompareID(&id, &maxid) < 0) {\n      return OpStatus::STREAM_ID_SMALL;\n    }\n  }\n\n  stream_inst->last_id = sid;\n\n  raxIterator ri;\n  raxStart(&ri, stream_inst->rax);\n  raxSeek(&ri, \"$\", NULL, 0);\n\n  if (!raxEOF(&ri)) {\n    /* Get a reference to the tail node listpack. */\n    size_t lp_bytes = lpBytes((uint8_t*)ri.data);\n    CHECK_GT(lp_bytes, 0U);\n  }\n  raxStop(&ri);\n\n  if (!StreamIDEqZero(&max_xdel_id))\n    stream_inst->max_deleted_entry_id = max_xdel_id;\n\n  RecordStreamAccess(op_args, StreamAccessKind::kSequential);\n\n  mem_tracker.UpdateStreamSize(pv);\n\n  return OpStatus::OK;\n}\n\nOpResult<uint32_t> OpDel(const OpArgs& op_args, string_view key, absl::Span<streamID> ids) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_STREAM);\n  RETURN_ON_BAD_STATUS(res_it);\n\n  PrimeValue& pv = res_it->it->second;\n  stream* stream_inst = (stream*)pv.RObjPtr();\n\n  uint32_t deleted = 0;\n  bool first_entry = false;\n\n  StreamMemTracker tracker;\n\n  // Capture last_id before deletion loop for heuristic (deletion can change it)\n  streamID original_last_id = stream_inst->last_id;\n\n  for (size_t j = 0; j < ids.size(); j++) {\n    streamID id = ids[j];\n    if (!StreamDeleteItem(stream_inst, &id))\n      continue;\n\n    /* We want to know if the first entry in the stream was deleted\n     * so we can later set the new one. */\n    if (streamCompareID(&id, &stream_inst->first_id) == 0) {\n      first_entry = 1;\n    }\n    /* Update the stream's maximal tombstone if needed. */\n    if (streamCompareID(&id, &stream_inst->max_deleted_entry_id) > 0) {\n      stream_inst->max_deleted_entry_id = id;\n    }\n    deleted++;\n  }\n\n  /* Update the stream's first ID. */\n  if (deleted) {\n    if (stream_inst->length == 0) {\n      stream_inst->first_id.ms = 0;\n      stream_inst->first_id.seq = 0;\n    } else if (first_entry) {\n      streamGetEdgeID(stream_inst, 1, 1, &stream_inst->first_id);\n    }\n    // Only update size tracking if we actually deleted something.\n    // This avoids issues with memory tracking noise from other operations\n    // in the same thread.\n    tracker.UpdateStreamSize(pv);\n  }\n\n  // Heuristic: if any deleted ID shares ms with original last_id, it's a tail delete (sequential).\n  bool is_sequential = false;\n  for (size_t j = 0; j < ids.size(); j++) {\n    if (ids[j].ms == original_last_id.ms) {\n      is_sequential = true;\n      break;\n    }\n  }\n  RecordStreamAccess(op_args,\n                     is_sequential ? StreamAccessKind::kSequential : StreamAccessKind::kRandom);\n\n  return deleted;\n}\n\n// XACK key groupname id [id ...]\nOpResult<uint32_t> OpAck(const OpArgs& op_args, string_view key, string_view gname,\n                         absl::Span<streamID> ids) {\n  auto res = FindGroup(op_args, key, gname, false);\n  RETURN_ON_BAD_STATUS(res);\n\n  if (res->cg == nullptr || res->s == nullptr) {\n    return 0;\n  }\n\n  int acknowledged = 0;\n  StreamMemTracker mem_tracker;\n  for (auto& id : ids) {\n    unsigned char buf[sizeof(streamID)];\n    StreamEncodeID(buf, id);\n\n    // From Redis' xackCommand's implemenation\n    // Lookup the ID in the group PEL: it will have a reference to the\n    // NACK structure that will have a reference to the consumer, so that\n    // we are able to remove the entry from both PELs.\n    streamNACK* nack = nullptr;\n    int fres = raxFind(res->cg->pel, buf, sizeof(buf), (void**)&nack);\n    if (fres) {\n      raxRemove(res->cg->pel, buf, sizeof(buf), nullptr);\n      raxRemove(nack->consumer->pel, buf, sizeof(buf), nullptr);\n      streamFreeNACK(nack);\n      acknowledged++;\n    }\n  }\n  mem_tracker.UpdateStreamSize(res->it.it->second);\n  return acknowledged;\n}\n\nOpResult<ClaimInfo> OpAutoClaim(const OpArgs& op_args, string_view key, const ClaimOpts& opts) {\n  auto cgr_res = FindGroup(op_args, key, opts.group, false);\n  RETURN_ON_BAD_STATUS(cgr_res);\n  RecordStreamAccess(op_args, StreamAccessKind::kRandom);\n\n  stream* stream = cgr_res->s;\n  streamCG* group = cgr_res->cg;\n\n  if (stream == nullptr || group == nullptr) {\n    return OpStatus::KEY_NOTFOUND;\n  }\n\n  StreamMemTracker mem_tracker;\n\n  // from Redis spec on XAutoClaim:\n  // https://redis.io/commands/xautoclaim/\n  // The maximum number of pending entries that the command scans is the product of\n  // multiplying <count>'s value by 10 (hard-coded).\n  int64_t attempts = opts.count * 10;\n\n  unsigned char start_key[sizeof(streamID)];\n  streamID start_id = opts.start;\n  StreamEncodeID(start_key, start_id);\n  raxIterator ri;\n  raxStart(&ri, group->pel);\n  raxSeek(&ri, \">=\", start_key, sizeof(start_key));\n\n  ClaimInfo result;\n  result.justid = (opts.flags & kClaimJustID);\n\n  uint64_t now_ms = op_args.db_cntx.time_now_ms;\n  int count = opts.count;\n\n  streamConsumer* consumer = FindOrAddConsumer(opts.consumer, group, now_ms, nullptr);\n\n  while (attempts-- && count && raxNext(&ri)) {\n    streamNACK* nack = (streamNACK*)ri.data;\n\n    streamID id;\n    streamDecodeID(ri.key, &id);\n\n    if (!StreamEntryExists(stream, &id)) {\n      // TODO: to propagate this change to replica as XCLAIM command\n      // - since we delete it from NACK. See streamPropagateXCLAIM call.\n      raxRemove(group->pel, ri.key, ri.key_len, nullptr);\n      raxRemove(nack->consumer->pel, ri.key, ri.key_len, nullptr);\n      streamFreeNACK(nack);\n      result.deleted_ids.push_back(id);\n      raxSeek(&ri, \">=\", ri.key, ri.key_len);\n\n      count--; /* Count is a limit of the command response size. */\n      continue;\n    }\n\n    if (opts.min_idle_time) {\n      mstime_t this_idle = now_ms - nack->delivery_time;\n      if (this_idle < opts.min_idle_time)\n        continue;\n    }\n\n    nack->delivery_time = now_ms;\n    if (!result.justid) {\n      nack->delivery_count++;\n    }\n    ReassignNACKToConsumer(nack, consumer, ri.key, ri.key_len, now_ms);\n    AppendClaimResultItem(result, stream, id);\n    count--;\n    // TODO: propagate xclaim to replica\n  }\n\n  raxNext(&ri);\n  streamID end_id;\n  if (raxEOF(&ri)) {\n    end_id.ms = end_id.seq = 0;\n  } else {\n    streamDecodeID(ri.key, &end_id);\n  }\n  raxStop(&ri);\n  result.end_id = end_id;\n\n  mem_tracker.UpdateStreamSize(cgr_res->it.it->second);\n\n  return result;\n}\n\nstruct PendingOpts {\n  string_view group_name;\n  string_view consumer_name;\n  ParsedStreamId start;\n  ParsedStreamId end;\n  int64_t min_idle_time = 0;\n  int64_t count = -1;\n};\n\nstruct PendingReducedResult {\n  uint64_t count = 0;\n  streamID start;\n  streamID end;\n  vector<pair<string_view, uint64_t /* size of consumer pending list*/>> consumer_list;\n};\n\nstruct PendingExtendedResult {\n  streamID start;\n  string_view consumer_name;\n  uint64_t delivery_count;\n  mstime_t elapsed;\n};\n\nusing PendingExtendedResultList = std::vector<PendingExtendedResult>;\nusing PendingResult = std::variant<PendingReducedResult, PendingExtendedResultList>;\n\nPendingReducedResult GetPendingReducedResult(streamCG* cg) {\n  PendingReducedResult result;\n  result.count = raxSize(cg->pel);\n  if (!result.count) {\n    return result;\n  }\n\n  raxIterator ri;\n\n  raxStart(&ri, cg->pel);\n  raxSeek(&ri, \"^\", nullptr, 0);\n  raxNext(&ri);\n  streamDecodeID(ri.key, &result.start);\n\n  raxSeek(&ri, \"$\", nullptr, 0);\n  raxNext(&ri);\n  streamDecodeID(ri.key, &result.end);\n\n  raxStart(&ri, cg->consumers);\n  raxSeek(&ri, \"^\", nullptr, 0);\n  while (raxNext(&ri)) {\n    streamConsumer* consumer = static_cast<streamConsumer*>(ri.data);\n    uint64_t pel_size = raxSize(consumer->pel);\n    if (!pel_size)\n      continue;\n\n    pair<string_view, uint64_t> item;\n    item.first = string_view{consumer->name, sdslen(consumer->name)};\n    item.second = pel_size;\n    result.consumer_list.push_back(item);\n  }\n  raxStop(&ri);\n  return result;\n}\n\nPendingExtendedResultList GetPendingExtendedResult(uint64_t now_ms, streamCG* cg,\n                                                   streamConsumer* consumer,\n                                                   const PendingOpts& opts) {\n  PendingExtendedResultList result;\n  rax* pel = consumer ? consumer->pel : cg->pel;\n  streamID sstart = opts.start.val, send = opts.end.val;\n  unsigned char start_key[sizeof(streamID)];\n  unsigned char end_key[sizeof(streamID)];\n  raxIterator ri;\n\n  StreamEncodeID(start_key, sstart);\n  StreamEncodeID(end_key, send);\n  raxStart(&ri, pel);\n  raxSeek(&ri, \">=\", start_key, sizeof(start_key));\n\n  auto count = opts.count;\n  while (count && raxNext(&ri)) {\n    if (memcmp(ri.key, end_key, ri.key_len) > 0) {\n      break;\n    }\n    streamNACK* nack = static_cast<streamNACK*>(ri.data);\n\n    if (opts.min_idle_time) {\n      mstime_t this_idle = now_ms - nack->delivery_time;\n      if (this_idle < opts.min_idle_time) {\n        continue;\n      }\n    }\n\n    count--;\n\n    /* Entry ID. */\n    streamID id;\n    streamDecodeID(ri.key, &id);\n\n    /* Milliseconds elapsed since last delivery. */\n    mstime_t elapsed = now_ms - nack->delivery_time;\n    if (elapsed < 0) {\n      elapsed = 0;\n    }\n\n    PendingExtendedResult item = {.start = id,\n                                  .consumer_name = nack->consumer->name,\n                                  .delivery_count = nack->delivery_count,\n                                  .elapsed = elapsed};\n    result.push_back(item);\n  }\n  raxStop(&ri);\n  return result;\n}\n\nOpResult<PendingResult> OpPending(const OpArgs& op_args, string_view key, const PendingOpts& opts) {\n  auto cgroup_res = FindGroup(op_args, key, opts.group_name);\n  RETURN_ON_BAD_STATUS(cgroup_res);\n\n  streamConsumer* consumer = nullptr;\n  if (!opts.consumer_name.empty()) {\n    consumer = StreamLookupConsumer(cgroup_res->cg, WrapSds(opts.consumer_name));\n  }\n\n  PendingResult result;\n\n  if (opts.count == -1) {\n    result = GetPendingReducedResult(cgroup_res->cg);\n  } else {\n    result = GetPendingExtendedResult(op_args.db_cntx.time_now_ms, cgroup_res->cg, consumer, opts);\n  }\n  return result;\n}\n\nvoid CreateGroup(facade::CmdArgParser* parser, CommandContext* cmd_cntx) {\n  auto key = parser->Next();\n\n  CreateOpts opts;\n  std::tie(opts.gname, opts.id) = parser->Next<string_view, string_view>();\n  if (parser->Check(\"MKSTREAM\")) {\n    opts.flags |= kCreateOptMkstream;\n  }\n\n  RETURN_ON_PARSE_ERROR(*parser, cmd_cntx);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpCreate(t->GetOpArgs(shard), key, opts);\n  };\n\n  OpStatus result = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  switch (result) {\n    case OpStatus::KEY_NOTFOUND:\n      return cmd_cntx->SendError(kXGroupKeyNotFound);\n    default:\n      cmd_cntx->SendError(result);\n  }\n}\n\nvoid DestroyGroup(facade::CmdArgParser* parser, CommandContext* cmd_cntx) {\n  auto [key, gname] = parser->Next<string_view, string_view>();\n\n  RETURN_ON_PARSE_ERROR(*parser, cmd_cntx);\n\n  if (parser->HasNext())\n    return cmd_cntx->SendError(UnknownSubCmd(\"DESTROY\", \"XGROUP\"));\n\n  auto cb = [&, &key = key, &gname = gname](Transaction* t, EngineShard* shard) {\n    return OpDestroyGroup(t->GetOpArgs(shard), key, gname);\n  };\n\n  OpStatus result = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  switch (result) {\n    case OpStatus::OK:\n      return cmd_cntx->SendLong(1);\n    case OpStatus::SKIPPED:\n      return cmd_cntx->SendLong(0);\n    case OpStatus::KEY_NOTFOUND:\n      return cmd_cntx->SendError(kXGroupKeyNotFound);\n    default:\n      cmd_cntx->SendError(result);\n  }\n}\n\nvoid CreateConsumer(facade::CmdArgParser* parser, CommandContext* cmd_cntx) {\n  auto [key, gname, consumer] = parser->Next<string_view, string_view, string_view>();\n\n  RETURN_ON_PARSE_ERROR(*parser, cmd_cntx);\n\n  if (parser->HasNext())\n    return cmd_cntx->SendError(UnknownSubCmd(\"CREATECONSUMER\", \"XGROUP\"));\n\n  auto cb = [&, &key = key, &gname = gname, &consumer = consumer](Transaction* t,\n                                                                  EngineShard* shard) {\n    return OpCreateConsumer(t->GetOpArgs(shard), key, gname, consumer);\n  };\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n\n  switch (result.status()) {\n    case OpStatus::OK:\n      return cmd_cntx->SendLong(1);\n    case OpStatus::KEY_EXISTS:\n      return cmd_cntx->SendLong(0);\n    case OpStatus::SKIPPED:\n      return cmd_cntx->SendError(NoGroupError(key, gname));\n    case OpStatus::KEY_NOTFOUND:\n      return cmd_cntx->SendError(kXGroupKeyNotFound);\n    default:\n      cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid DelConsumer(facade::CmdArgParser* parser, CommandContext* cmd_cntx) {\n  auto [key, gname, consumer] = parser->Next<string_view, string_view, string_view>();\n\n  RETURN_ON_PARSE_ERROR(*parser, cmd_cntx);\n\n  if (parser->HasNext())\n    return cmd_cntx->SendError(UnknownSubCmd(\"DELCONSUMER\", \"XGROUP\"));\n\n  auto cb = [&, &key = key, &gname = gname, &consumer = consumer](Transaction* t,\n                                                                  EngineShard* shard) {\n    return OpDelConsumer(t->GetOpArgs(shard), key, gname, consumer);\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n\n  switch (result.status()) {\n    case OpStatus::OK:\n      return cmd_cntx->SendLong(*result);\n    case OpStatus::SKIPPED:\n      return cmd_cntx->SendError(NoGroupError(key, gname));\n    case OpStatus::KEY_NOTFOUND:\n      return cmd_cntx->SendError(kXGroupKeyNotFound);\n    default:\n      cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid SetId(facade::CmdArgParser* parser, CommandContext* cmd_cntx) {\n  auto [key, gname, id] = parser->Next<string_view, string_view, string_view>();\n  std::optional<int64_t> entries_read;\n\n  while (parser->HasNext()) {\n    if (parser->Check(\"ENTRIESREAD\") && parser->HasAtLeast(1)) {\n      entries_read = parser->Next<int64>();\n      if (parser->HasError() || *entries_read < SCG_INVALID_ENTRIES_READ) {\n        return cmd_cntx->SendError(kSyntaxErr);\n      }\n    } else {\n      return cmd_cntx->SendError(kSyntaxErr);\n    }\n  }\n\n  RETURN_ON_PARSE_ERROR(*parser, cmd_cntx);\n\n  auto cb = [&, &key = key, &gname = gname, &id = id](Transaction* t, EngineShard* shard) {\n    return OpSetId(t->GetOpArgs(shard), key, gname, id, entries_read);\n  };\n\n  OpStatus result = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  switch (result) {\n    case OpStatus::SKIPPED:\n      return cmd_cntx->SendError(NoGroupError(key, gname));\n    case OpStatus::KEY_NOTFOUND:\n      return cmd_cntx->SendError(kXGroupKeyNotFound);\n    default:\n      cmd_cntx->SendError(result);\n  }\n}\n\nvoid XGroupHelp(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view help_arr[] = {\"XGROUP <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n                            \"CREATE <key> <groupname> <id|$> [option]\",\n                            \"    Create a new consumer group. Options are:\",\n                            \"    * MKSTREAM\",\n                            \"      Create the empty stream if it does not exist.\",\n                            \"    * ENTRIESREAD entries_read\",\n                            \"      Set the group's entries_read counter (internal use).\",\n                            \"CREATECONSUMER <key> <groupname> <consumer>\",\n                            \"    Create a new consumer in the specified group.\",\n                            \"DELCONSUMER <key> <groupname> <consumer>\",\n                            \"    Remove the specified consumer.\",\n                            \"DESTROY <key> <groupname>\",\n                            \"    Remove the specified group.\",\n                            \"SETID <key> <groupname> <id|$> [ENTRIESREAD entries_read]\",\n                            \"    Set the current group ID and entries_read counter.\",\n                            \"HELP\",\n                            \"    Print this help.\"};\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  return rb->SendSimpleStrArr(help_arr);\n}\n\nOpResult<int64_t> OpTrim(const OpArgs& op_args, std::string_view key, const TrimOpts& opts,\n                         bool journal_as_minid) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_STREAM);\n  if (!res_it) {\n    if (res_it.status() == OpStatus::KEY_NOTFOUND) {\n      return 0;\n    }\n    return res_it.status();\n  }\n\n  PrimeValue& pv = res_it->it->second;\n  stream* s = (stream*)pv.RObjPtr();\n\n  StreamMemTracker mem_tracker;\n\n  int64_t deleted_items_number = TrimStream(opts, s);\n\n  RecordStreamAccess(op_args, StreamAccessKind::kSequential);\n\n  mem_tracker.UpdateStreamSize(pv);\n\n  if (op_args.shard->journal() && journal_as_minid) {\n    const bool stream_is_empty = s->length == 0;\n    if (stream_is_empty) {\n      // We need remove the whole stream in replica\n      RecordJournal(op_args, \"XTRIM\"sv, ArgSlice{key, \"MAXLEN\"sv, \"0\"sv});\n    } else {\n      // We need to set exact MinId in the journal.\n      // For this we are using new first_id from the stream\n      std::string last_id = StreamsIdToString(s->first_id);\n      RecordJournal(op_args, \"XTRIM\"sv, ArgSlice{key, \"MINID\"sv, last_id});\n    }\n  }\n\n  return deleted_items_number;\n}\n\nParseResult<TrimOpts> ParseTrimOpts(bool max_len, CmdArgParser* parser) {\n  TrimOpts opts;\n  opts.approx = parser->Check(\"~\");\n  if (!opts.approx) {\n    parser->Check(\"=\");\n  }\n\n  if (max_len) {\n    opts.length_or_id = parser->Next<uint32_t>();\n  } else {\n    ParsedStreamId parsed_id;\n    if (!ParseID(parser->Next(), false, 0, &parsed_id)) {\n      return CreateSyntaxError(kSyntaxErr);\n    }\n\n    opts.length_or_id = parsed_id;  // trivial copy\n  }\n\n  if (parser->Check(\"LIMIT\")) {\n    if (!opts.approx) {\n      return CreateSyntaxError(kSyntaxErr);\n    }\n\n    opts.limit = parser->Next<uint32_t>();\n  }\n\n  return opts;\n}\n\nParseResult<TrimOpts> ParseTrimOpts(CmdArgParser* parser) {\n  bool max_len = parser->Check(\"MAXLEN\");\n  if (!max_len) {\n    parser->ExpectTag(\"MINID\");\n  }\n\n  auto res = ParseTrimOpts(max_len, parser);\n\n  if (parser->Check(\"MAXLEN\") || parser->Check(\"MINID\")) {\n    return CreateSyntaxError(kTrimOptionConflictErr);\n  }\n\n  return res;\n}\n\nParseResult<AddOpts> ParseAddOpts(CmdArgParser* parser) {\n  AddOpts opts;\n  while (parser->HasNext()) {\n    if (parser->Check(\"NOMKSTREAM\")) {\n      opts.no_mkstream = true;\n      continue;\n    }\n\n    bool max_len = parser->Check(\"MAXLEN\");\n    if (max_len || parser->Check(\"MINID\")) {\n      if (opts.trim_opts) {\n        return CreateSyntaxError(kTrimOptionConflictErr);\n      }\n\n      auto trim_opts = ParseTrimOpts(max_len, parser);\n      if (!trim_opts) {\n        return make_unexpected(trim_opts.error());\n      }\n\n      opts.trim_opts = trim_opts.value();  // trivial copy\n    } else {\n      // It is StreamId\n      std::string_view id = parser->Next();\n      if (!ParseID(id, true, 0, &opts.parsed_id)) {\n        return CreateSyntaxError(kInvalidStreamId);\n      }\n      break;\n    }\n  }\n\n  return opts;\n}\n\nstruct StreamReplies {\n  explicit StreamReplies(SinkReplyBuilder* rb) : rb{static_cast<RedisReplyBuilder*>(rb)} {\n    DCHECK(dynamic_cast<RedisReplyBuilder*>(rb));\n  }\n\n  void SendRecord(const Record& record) const {\n    RedisReplyBuilder::ArrayScope scope{rb, 2};\n    rb->SendBulkString(StreamIdRepr(record.id));\n    rb->StartArray(record.kv_arr.size() * 2);\n    for (const auto& k_v : record.kv_arr) {\n      rb->SendBulkString(k_v.first);\n      rb->SendBulkString(k_v.second);\n    }\n  }\n\n  void SendIDs(absl::Span<const streamID> ids) const {\n    RedisReplyBuilder::ArrayScope scope{rb, ids.size()};\n    for (auto id : ids)\n      rb->SendBulkString(StreamIdRepr(id));\n  }\n\n  void SendRecords(absl::Span<const Record> records) const {\n    RedisReplyBuilder::ArrayScope scope{rb, records.size()};\n    for (const auto& record : records)\n      SendRecord(record);\n  }\n\n  void SendStreamRecords(string_view key, absl::Span<const Record> records) const {\n    rb->SendBulkString(key);\n    SendRecords(records);\n  }\n\n  void SendClaimInfo(const ClaimInfo& ci) const {\n    if (ci.justid) {\n      SendIDs(ci.ids);\n    } else {\n      SendRecords(ci.records);\n    }\n  }\n\n  RedisReplyBuilder* rb;\n};\n\nstd::optional<ReadOpts> ParseReadArgsOrReply(CmdArgList args, bool read_group,\n                                             SinkReplyBuilder* builder) {\n  size_t streams_count = 0;\n\n  ReadOpts opts;\n  opts.read_group = read_group;\n  size_t id_indx = 0;\n\n  if (opts.read_group) {\n    string arg = absl::AsciiStrToUpper(ArgS(args, id_indx));\n\n    if (arg.size() - 1 < 2) {\n      builder->SendError(kSyntaxErr);\n      return std::nullopt;\n    }\n\n    if (arg != \"GROUP\") {\n      const auto m = \"Missing 'GROUP' in 'XREADGROUP' command\";\n      builder->SendError(m, kSyntaxErr);\n      return std::nullopt;\n    }\n    id_indx++;\n    opts.group_name = ArgS(args, id_indx);\n    opts.consumer_name = ArgS(args, ++id_indx);\n    if (opts.consumer_name.empty()) {\n      builder->SendError(\"consumer name can't be empty\", kSyntaxErrType);\n      return std::nullopt;\n    }\n    id_indx++;\n  }\n\n  for (; id_indx < args.size(); ++id_indx) {\n    string arg = absl::AsciiStrToUpper(ArgS(args, id_indx));\n\n    bool remaining_args = args.size() - id_indx - 1 > 0;\n    if (arg == \"BLOCK\" && remaining_args) {\n      id_indx++;\n      arg = ArgS(args, id_indx);\n      if (!absl::SimpleAtoi(arg, &opts.timeout)) {\n        builder->SendError(kInvalidIntErr);\n        return std::nullopt;\n      }\n    } else if (arg == \"COUNT\" && remaining_args) {\n      id_indx++;\n      arg = ArgS(args, id_indx);\n      if (!absl::SimpleAtoi(arg, &opts.count)) {\n        builder->SendError(kInvalidIntErr);\n        return std::nullopt;\n      }\n    } else if (opts.read_group && arg == \"NOACK\") {\n      opts.noack = true;\n    } else if (arg == \"STREAMS\" && remaining_args) {\n      opts.streams_arg = id_indx + 1;\n\n      size_t pair_count = args.size() - opts.streams_arg;\n      if ((pair_count % 2) != 0) {\n        const char* cmd_name = read_group ? \"xreadgroup\" : \"xread\";\n        const char* symbol = read_group ? \">\" : \"$\";\n        const auto msg = absl::StrCat(\"Unbalanced '\", cmd_name,\n                                      \"' list of streams: for each stream key an ID or '\", symbol,\n                                      \"' must be specified\");\n        builder->SendError(msg, kSyntaxErr);\n        return std::nullopt;\n      }\n      streams_count = pair_count / 2;\n      break;\n    } else {\n      builder->SendError(kSyntaxErr);\n      return std::nullopt;\n    }\n  }\n\n  // STREAMS option is required.\n  if (opts.streams_arg == 0) {\n    builder->SendError(kSyntaxErr);\n    return std::nullopt;\n  }\n\n  // Parse the stream IDs.\n  for (size_t i = opts.streams_arg + streams_count; i < args.size(); i++) {\n    string_view key = ArgS(args, i - streams_count);\n    string_view idstr = ArgS(args, i);\n\n    StreamIDsItem sitem;\n    ParsedStreamId id;\n\n    if (idstr == \"$\") {\n      // Set ID to 0 so if the ID cannot be resolved (when the stream doesn't\n      // exist) it takes the first entry added.\n      if (opts.read_group) {\n        builder->SendError(\"The $ can be specified only when calling XREAD.\", kSyntaxErr);\n        return std::nullopt;\n      }\n      id.val.ms = 0;\n      id.val.seq = 0;\n      id.resolve_last_id = true;\n      sitem.id = id;\n      auto [_, is_inserted] = opts.stream_ids.emplace(key, sitem);\n      if (!is_inserted) {\n        builder->SendError(kSameStreamFound);\n        return std::nullopt;\n      }\n      continue;\n    }\n\n    if (idstr == \">\") {\n      if (!opts.read_group) {\n        builder->SendError(\n            \"The > ID can be specified only when calling XREADGROUP using the GROUP <group> \"\n            \"<consumer> option.\",\n            kSyntaxErr);\n        return std::nullopt;\n      }\n      id.val.ms = UINT64_MAX;\n      id.val.seq = UINT64_MAX;\n      sitem.id = id;\n      auto [_, is_inserted] = opts.stream_ids.emplace(key, sitem);\n      if (!is_inserted) {\n        builder->SendError(kSameStreamFound);\n        return std::nullopt;\n      }\n      continue;\n    }\n\n    if (!ParseID(idstr, true, 0, &id)) {\n      builder->SendError(kInvalidStreamId, kSyntaxErrType);\n      return std::nullopt;\n    }\n\n    // We only include messages with IDs greater than start so increment the\n    // starting ID.\n    StreamIncrID(&id.val);\n    sitem.id = id;\n    auto [_, is_inserted] = opts.stream_ids.emplace(key, sitem);\n    if (!is_inserted) {\n      builder->SendError(kSameStreamFound);\n      return std::nullopt;\n    }\n  }\n  return opts;\n}\n\nvoid XRangeGeneric(std::string_view key, std::string_view start, std::string_view end,\n                   CmdArgList args, bool is_rev, CommandContext* cmd_cntx) {\n  RangeOpts range_opts;\n  RangeId rs, re;\n\n  if (!ParseRangeId(start, RangeBoundary::kStart, &rs) ||\n      !ParseRangeId(end, RangeBoundary::kEnd, &re)) {\n    return cmd_cntx->SendError(kInvalidStreamId, kSyntaxErrType);\n  }\n\n  if (rs.exclude && StreamIncrID(&rs.parsed_id.val) != C_OK) {\n    return cmd_cntx->SendError(\"invalid start ID for the interval\", kSyntaxErrType);\n  }\n\n  if (re.exclude && StreamDecrID(&re.parsed_id.val) != C_OK) {\n    return cmd_cntx->SendError(\"invalid end ID for the interval\", kSyntaxErrType);\n  }\n\n  if (!args.empty()) {\n    if (args.size() != 2) {\n      return cmd_cntx->SendError(WrongNumArgsError(\"XRANGE\"), kSyntaxErrType);\n    }\n\n    string opt = absl::AsciiStrToUpper(ArgS(args, 0));\n    string_view val = ArgS(args, 1);\n\n    if (opt != \"COUNT\" || !absl::SimpleAtoi(val, &range_opts.count)) {\n      return cmd_cntx->SendError(kSyntaxErr);\n    }\n  }\n\n  range_opts.start = rs.parsed_id;\n  range_opts.end = re.parsed_id;\n  range_opts.is_rev = is_rev;\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpRange(t->GetOpArgs(shard), key, range_opts);\n  };\n\n  OpResult<RecordVec> result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (result) {\n    SinkReplyBuilder::ReplyAggregator agg(rb);\n    StreamReplies{rb}.SendRecords(*result);\n    return;\n  }\n\n  if (result.status() == OpStatus::KEY_NOTFOUND) {\n    return rb->SendEmptyArray();\n  }\n  return cmd_cntx->SendError(result.status());\n}\n\nvoid JournalConsumerCreationIfNeeded(OpArgs op_args, const ReadOpts& opts, std::string_view key) {\n  const bool is_consumer_new = opts.stream_ids.at(key).is_consumer_new;\n\n  if (!op_args.shard->journal() || !is_consumer_new) {\n    return;\n  }\n\n  CmdArgVec args = {\"CREATECONSUMER\", key, opts.group_name, opts.consumer_name};\n  RecordJournal(op_args, \"XGROUP\"sv, args);\n}\n\n// Valkey 7.2.11:\n// --------------\n// If the consumer was created but nothing was read the consumer is *not* deleted\n// and XINFO should show it. If NOACK is used, consumer creation is replicated\n// but ignored when NOACK is omitted.\n// Journal rewrites for when reading via `>`:\n// * without noack -> xclaim + xgroup setid\n// * with noack -> xgroup createconsumer +  xgroup setid\n//\n// Redis 7.0.15:\n// --------------\n// Redis deletes the consumer in case the stream is empty and nothing\n// was read even if the command blocks. On the later case, after\n// unblocking, the consumer is created again and its side effects are\n// replicated similar to what described above.\n//\n// Dragonfly simply propagates consumer creation but does not roll back consumer\n// creation.\nvoid JournalXReadGroupIfNeeded(OpArgs op_args, const ReadOpts& opts, const RecordVec& records,\n                               std::string_view key) {\n  if (!op_args.shard->journal()) {\n    return;\n  }\n\n  const bool serve_history = opts.stream_ids.at(key).serve_history;\n\n  if (serve_history) {\n    return;\n  }\n\n  // Reading from >\n  auto journal_xgroup = [&opts, op_args](const auto& records, std::string_view key) {\n    if (!records.empty()) {\n      const auto& sitem = opts.stream_ids.at(key);\n      auto id = absl::StrCat(records.back().id.ms, \"-\", records.back().id.seq);\n      auto entries_read = absl::StrCat(sitem.group->entries_read);\n      CmdArgVec journal_args = {\"SETID\", key, opts.group_name, id, \"ENTRIESREAD\", entries_read};\n      RecordJournal(op_args, \"XGROUP\"sv, journal_args);\n    }\n  };\n\n  // If NOACK is *not* set we add entries to PEL. Consumer is created as a side\n  // effect of XCLAIM.\n  if (!opts.noack) {\n    for (auto& record : records) {\n      auto id = absl::StrCat(record.id.ms, \"-\", record.id.seq);\n      auto deliv_time = absl::StrCat(record.delivery_time);\n      CmdArgVec journal_args = {\n          key, opts.group_name, opts.consumer_name, \"0\",      id, \"TIME\", deliv_time, \"RETRYCOUNT\",\n          \"1\", \"FORCE\",         \"JUSTID\",           \"LASTID\", id};\n\n      RecordJournal(op_args, \"XCLAIM\"sv, journal_args);\n    }\n    journal_xgroup(records, key);\n    return;\n  }\n\n  journal_xgroup(records, key);\n}\n\n// Set is_consumer_new to true if the consumer is created. Only relevant for,\n// when XReadBlock is called from XREADGROUP command.\nvoid XReadBlock(ReadOpts* opts, Transaction* tx, SinkReplyBuilder* builder,\n                ConnectionContext* cntx) {\n  // If BLOCK is not set just return an empty array as there are no resolvable\n  // entries.\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  if (opts->timeout == -1 || tx->IsMulti()) {\n    // Close the transaction and release locks.\n    tx->Conclude();\n    return rb->SendNullArray();\n  }\n\n  auto tp = (opts->timeout) ? chrono::steady_clock::now() + chrono::milliseconds(opts->timeout)\n                            : Transaction::time_point::max();\n\n  const auto key_checker = [opts](EngineShard* owner, const DbContext& context, Transaction* tx,\n                                  std::string_view key) -> bool {\n    auto& db_slice = context.GetDbSlice(owner->shard_id());\n    auto res_it = db_slice.FindReadOnly(context, key, OBJ_STREAM);\n    if (!res_it.ok())\n      return false;\n\n    StreamIDsItem& sitem = opts->stream_ids.at(key);\n    if (sitem.id.val.ms != UINT64_MAX && sitem.id.val.seq != UINT64_MAX)\n      return true;\n\n    const CompactObj& cobj = (*res_it)->second;\n    stream* s = GetReadOnlyStream(cobj);\n    streamID last_id = s->last_id;\n    if (s->length) {\n      StreamLastValidID(s, &last_id);\n    }\n\n    // Update group pointer and check it's validity\n    if (opts->read_group) {\n      sitem.group = StreamLookupCG(s, WrapSds(opts->group_name));\n      if (!sitem.group)\n        return true;  // abort\n    }\n\n    return streamCompareID(&last_id, &sitem.group->last_id) > 0;\n  };\n\n  if (auto status =\n          tx->WaitOnWatch(tp, Transaction::kShardArgs, key_checker, &cntx->blocked, &cntx->paused);\n      status != OpStatus::OK)\n    return rb->SendNullArray();\n\n  // Resolve the entry in the woken key. Note this must not use OpRead since\n  // only the shard that contains the woken key blocks for the awoken\n  // transaction to proceed.\n  OpResult<RecordVec> result;\n  std::string key;\n  auto range_cb = [&](Transaction* t, EngineShard* shard) {\n    if (auto wake_key = t->GetWakeKey(shard->shard_id()); wake_key) {\n      RangeOpts range_opts;\n      range_opts.end = ParsedStreamId{.val = streamID{\n                                          .ms = UINT64_MAX,\n                                          .seq = UINT64_MAX,\n                                      }};\n      StreamIDsItem& sitem = opts->stream_ids.at(*wake_key);\n      range_opts.start = sitem.id;\n\n      // Expect group to exist? No guarantees from transactional framework\n      if (opts->read_group && !sitem.group) {\n        result = OpStatus::INVALID_VALUE;\n        return OpStatus::OK;\n      }\n\n      if (sitem.id.val.ms == UINT64_MAX || sitem.id.val.seq == UINT64_MAX) {\n        range_opts.start.val = sitem.group->last_id;  // only for '>'\n        StreamIncrID(&range_opts.start.val);\n      }\n\n      range_opts.group = sitem.group;\n\n      // Update consumer, only for XReadGroup path\n      std::optional<StreamMemTracker> tracker;\n      if (sitem.group) {\n        tracker = StreamMemTracker{};\n        sitem.is_consumer_new = false;\n        range_opts.consumer = FindOrAddConsumer(opts->consumer_name, sitem.group,\n                                                GetCurrentTimeMs(), &sitem.is_consumer_new);\n        sitem.consumer = range_opts.consumer;\n        if (!sitem.consumer) {\n          return OpStatus::OUT_OF_MEMORY;\n        }\n\n        if (sitem.consumer->pel->numnodes == 0) {\n          LOG(DFATAL) << \"Internal error when accessing consumer data, seen_time \"\n                      << sitem.consumer->seen_time;\n          result = OpStatus::CANCELLED;\n          return OpStatus::OK;\n        }\n      }\n\n      key = *wake_key;\n\n      if (tracker) {\n        auto op_args = t->GetOpArgs(shard);\n        auto& db_slice = op_args.GetDbSlice();\n        auto it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_STREAM);\n        DCHECK(it);\n        if (it) {\n          tracker->UpdateStreamSize(it->it->second);\n        }\n      }\n\n      range_opts.noack = opts->noack;\n      range_opts.access_kind = StreamAccessKind::kSequential;\n\n      result = OpRange(t->GetOpArgs(shard), *wake_key, range_opts);\n      if (result) {\n        JournalConsumerCreationIfNeeded(t->GetOpArgs(shard), *opts, *wake_key);\n        JournalXReadGroupIfNeeded(t->GetOpArgs(shard), *opts, *result, *wake_key);\n      }\n    }\n    return OpStatus::OK;\n  };\n  tx->Execute(std::move(range_cb), true);\n\n  if (result) {\n    SinkReplyBuilder::ReplyAggregator agg(rb);\n    if (opts->read_group && rb->IsResp3()) {\n      rb->StartCollection(1, CollectionType::MAP);\n    } else {\n      rb->StartArray(1);\n      rb->StartArray(2);\n    }\n    return StreamReplies{rb}.SendStreamRecords(key, *result);\n  } else if (result.status() == OpStatus::INVALID_VALUE) {\n    return rb->SendError(\"-NOGROUP the consumer group this client was blocked on no longer exists\");\n  }\n  return rb->SendNullArray();\n}\n\nvoid XReadGeneric2(CmdArgList args, bool read_group, CommandContext* cmd_cntx) {\n  optional<ReadOpts> opts = ParseReadArgsOrReply(args, read_group, cmd_cntx->rb());\n  if (!opts)\n    return;\n\n  // Determine if streams have entries or any error occured\n  AggregateValue<optional<facade::ErrorReply>> err;\n  atomic_bool have_entries = false;\n  auto* tx = cmd_cntx->tx();\n  // With a single shard we can call OpRead in a single hop, falling back to\n  // avoid concluding if no entries are available.\n  const bool is_single_shard = tx->GetUniqueShardCnt() == 1;\n  vector<RecordVec> fastread_prefetched;\n\n  auto cb = [&](auto* tx, auto* es) -> Transaction::RunnableResult {\n    auto op_args = tx->GetOpArgs(es);\n    for (string_view skey : tx->GetShardArgs(es->shard_id())) {\n      if (auto res = HasEntries2(op_args, skey, &*opts); holds_alternative<facade::ErrorReply>(res))\n        err = get<facade::ErrorReply>(res);\n      else if (holds_alternative<bool>(res) && get<bool>(res))\n        have_entries.store(true, memory_order_relaxed);\n    }\n\n    if (is_single_shard) {\n      if (have_entries.load(memory_order_relaxed)) {\n        fastread_prefetched = OpRead(tx->GetOpArgs(es), tx->GetShardArgs(es->shard_id()), *opts);\n        if (read_group) {\n          size_t index = 0;\n          for (auto key : tx->GetShardArgs(es->shard_id())) {\n            // We can batch here to improve journal writes\n            JournalConsumerCreationIfNeeded(op_args, *opts, key);\n            JournalXReadGroupIfNeeded(op_args, *opts, fastread_prefetched[index++], key);\n          }\n        }\n      } else {\n        // We didn't read any entries but we might added new consumers\n        for (auto key : tx->GetShardArgs(es->shard_id())) {\n          JournalConsumerCreationIfNeeded(op_args, *opts, key);\n        }\n        return {OpStatus::OK, Transaction::RunnableResult::AVOID_CONCLUDING};\n      }\n    }\n    return OpStatus::OK;\n  };\n  tx->Execute(cb, is_single_shard);\n\n  if (err) {\n    tx->Conclude();\n    return cmd_cntx->SendError(**err);\n  }\n\n  if (!have_entries.load(memory_order_relaxed))\n    return XReadBlock(&*opts, tx, cmd_cntx->rb(), cmd_cntx->server_conn_cntx());\n\n  vector<vector<RecordVec>> xread_resp;\n  if (is_single_shard && have_entries.load(memory_order_relaxed)) {\n    xread_resp = {std::move(fastread_prefetched)};\n  } else {\n    xread_resp.resize(shard_set->size());\n    auto read_cb = [&](Transaction* t, EngineShard* shard) {\n      ShardId sid = shard->shard_id();\n      auto op_args = tx->GetOpArgs(shard);\n      xread_resp[sid] = OpRead(op_args, t->GetShardArgs(sid), *opts);\n      if (read_group) {\n        size_t index = 0;\n        for (auto key : tx->GetShardArgs(sid)) {\n          JournalConsumerCreationIfNeeded(op_args, *opts, key);\n          JournalXReadGroupIfNeeded(op_args, *opts, xread_resp[sid][index++], key);\n        }\n      }\n      return OpStatus::OK;\n    };\n    tx->Execute(std::move(read_cb), true);\n  }\n\n  // Count number of streams and merge final results in correct order\n  int resolved_streams = 0;\n  vector<RecordVec> results(opts->stream_ids.size());\n  for (size_t i = 0; i < xread_resp.size(); i++) {\n    vector<RecordVec>& sub_results = xread_resp[i];\n    ShardId sid = xread_resp.size() < shard_set->size() ? tx->GetUniqueShard() : i;\n    if (!tx->IsActive(sid)) {\n      DCHECK(sub_results.empty());\n      continue;\n    }\n\n    ShardArgs shard_args = tx->GetShardArgs(sid);\n    DCHECK_EQ(shard_args.Size(), sub_results.size());\n\n    auto shard_args_it = shard_args.begin();\n    for (size_t j = 0; j < sub_results.size(); j++, ++shard_args_it) {\n      if (sub_results[j].empty())\n        continue;\n\n      resolved_streams++;\n      results[shard_args_it.index() - opts->streams_arg] = std::move(sub_results[j]);\n    }\n  }\n\n  // Send all results back\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  SinkReplyBuilder::ReplyScope scope(rb);\n  if (opts->read_group) {\n    if (rb->IsResp3()) {\n      rb->StartCollection(opts->stream_ids.size(), CollectionType::MAP);\n      for (size_t i = 0; i < opts->stream_ids.size(); i++) {\n        string_view key = ArgS(args, i + opts->streams_arg);\n        StreamReplies{rb}.SendStreamRecords(key, results[i]);\n      }\n    } else {\n      rb->StartArray(opts->stream_ids.size());\n      for (size_t i = 0; i < opts->stream_ids.size(); i++) {\n        string_view key = ArgS(args, i + opts->streams_arg);\n        rb->StartArray(2);\n        StreamReplies{rb}.SendStreamRecords(key, results[i]);\n      }\n    }\n  } else {\n    if (rb->IsResp3()) {\n      rb->StartCollection(resolved_streams, CollectionType::MAP);\n      for (size_t i = 0; i < results.size(); ++i) {\n        if (results[i].empty()) {\n          continue;\n        }\n        string_view key = ArgS(args, i + opts->streams_arg);\n        StreamReplies{rb}.SendStreamRecords(key, results[i]);\n      }\n    } else {\n      rb->StartArray(resolved_streams);\n      for (size_t i = 0; i < results.size(); i++) {\n        if (results[i].empty())\n          continue;\n        string_view key = ArgS(args, i + opts->streams_arg);\n        rb->StartArray(2);\n        StreamReplies{rb}.SendStreamRecords(key, results[i]);\n      }\n    }\n  }\n}\n\nvoid HelpSubCmd(facade::CmdArgParser* parser, CommandContext* cmd_cntx) {\n  XGroupHelp(parser->Tail(), cmd_cntx);\n}\n\nbool ParseXpendingOptions(CmdArgList& args, PendingOpts& opts, SinkReplyBuilder* builder) {\n  size_t id_indx = 0;\n  string arg = absl::AsciiStrToUpper(ArgS(args, id_indx));\n\n  if (arg == \"IDLE\" && args.size() > 4) {\n    id_indx++;\n    if (!absl::SimpleAtoi(ArgS(args, id_indx), &opts.min_idle_time)) {\n      builder->SendError(kInvalidIntErr, kSyntaxErrType);\n      return false;\n    }\n    // Ignore negative min_idle_time\n    opts.min_idle_time = std::max(opts.min_idle_time, static_cast<int64_t>(0));\n    args.remove_prefix(2);\n    id_indx = 0;\n  }\n  if (args.size() < 3) {\n    builder->SendError(WrongNumArgsError(\"XPENDING\"), kSyntaxErrType);\n    return false;\n  }\n\n  // Parse start and end\n  RangeId rs, re;\n  string_view start = ArgS(args, id_indx);\n  id_indx++;\n  string_view end = ArgS(args, id_indx);\n  if (!ParseRangeId(start, RangeBoundary::kStart, &rs) ||\n      !ParseRangeId(end, RangeBoundary::kEnd, &re)) {\n    builder->SendError(kInvalidStreamId, kSyntaxErrType);\n    return false;\n  }\n\n  if (rs.exclude && StreamIncrID(&rs.parsed_id.val) != C_OK) {\n    builder->SendError(\"invalid start ID for the interval\", kSyntaxErrType);\n    return false;\n  }\n\n  if (re.exclude && StreamDecrID(&re.parsed_id.val) != C_OK) {\n    builder->SendError(\"invalid end ID for the interval\", kSyntaxErrType);\n    return false;\n  }\n  id_indx++;\n  opts.start = rs.parsed_id;\n  opts.end = re.parsed_id;\n\n  // Parse count\n  if (!absl::SimpleAtoi(ArgS(args, id_indx), &opts.count)) {\n    builder->SendError(kInvalidIntErr, kSyntaxErrType);\n    return false;\n  }\n\n  // Ignore negative count value\n  opts.count = std::max(opts.count, static_cast<int64_t>(0));\n  if (args.size() - id_indx - 1) {\n    id_indx++;\n    opts.consumer_name = ArgS(args, id_indx);\n  }\n  return true;\n}\n\n}  // namespace\n\nvoid CmdXAdd(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  string_view key = parser.Next();\n\n  auto parsed_add_opts = ParseAddOpts(&parser);\n\n  if (auto err = parser.TakeError(); err || !parsed_add_opts) {\n    cmd_cntx->SendError(!parsed_add_opts ? parsed_add_opts.error() : err.MakeReply());\n    return;\n  }\n\n  // Save the index of the stream ID in the arguments list.\n  // We need this during journaling\n  // It is (parser.GetCurrentIndex() - 1) because the stream id is the last parsed argument in the\n  // ParseAddOpts\n  const size_t stream_id_index_in_args = parser.GetCurrentIndex() - 1;\n  AddArgsJournaler journaler{{args.begin(), args.end()}, stream_id_index_in_args};\n\n  CmdArgList fields = parser.Tail();\n  if (fields.empty() || fields.size() % 2 != 0) {\n    return rb->SendError(WrongNumArgsError(\"XADD\"), kSyntaxErrType);\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpAdd(t->GetOpArgs(shard), key, parsed_add_opts.value(), fields, journaler);\n  };\n\n  OpResult<streamID> add_result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n\n  if (add_result) {\n    rb->SendBulkString(StreamIdRepr(*add_result));\n  } else {\n    if (add_result == OpStatus::KEY_NOTFOUND) {\n      rb->SendNull();\n    } else if (add_result == OpStatus::STREAM_ID_SMALL) {\n      cmd_cntx->SendError(LeqTopIdError(\"XADD\"));\n    } else {\n      cmd_cntx->SendError(add_result.status());\n    }\n  }\n}\n\nabsl::InlinedVector<streamID, 8> GetXclaimIds(CmdArgList& args) {\n  size_t i;\n  absl::InlinedVector<streamID, 8> ids;\n  for (i = 0; i < args.size(); ++i) {\n    ParsedStreamId parsed_id;\n    string_view str_id = ArgS(args, i);\n    if (!ParseID(str_id, true, 0, &parsed_id)) {\n      if (i > 0) {\n        break;\n      }\n      return ids;\n    }\n    ids.push_back(parsed_id.val);\n  }\n  args.remove_prefix(i);\n  return ids;\n}\n\nbool ParseXclaimOptions(CmdArgList args, ClaimOpts& opts, CommandContext* cmd_cntx) {\n  for (size_t i = 0; i < args.size(); ++i) {\n    string arg = absl::AsciiStrToUpper(ArgS(args, i));\n    bool remaining_args = args.size() - i - 1 > 0;\n\n    if (remaining_args) {\n      if (arg == \"IDLE\") {\n        arg = ArgS(args, ++i);\n        if (!absl::SimpleAtoi(arg, &opts.delivery_time)) {\n          cmd_cntx->SendError(kInvalidIntErr);\n          return false;\n        }\n        continue;\n      } else if (arg == \"TIME\") {\n        arg = ArgS(args, ++i);\n        if (!absl::SimpleAtoi(arg, &opts.delivery_time)) {\n          cmd_cntx->SendError(kInvalidIntErr);\n          return false;\n        }\n        continue;\n      } else if (arg == \"RETRYCOUNT\") {\n        arg = ArgS(args, ++i);\n        if (!absl::SimpleAtoi(arg, &opts.retry)) {\n          cmd_cntx->SendError(kInvalidIntErr);\n          return false;\n        }\n        continue;\n      } else if (arg == \"LASTID\") {\n        opts.flags |= kClaimLastID;\n        arg = ArgS(args, ++i);\n        ParsedStreamId parsed_id;\n        if (ParseID(arg, true, 0, &parsed_id)) {\n          opts.last_id = parsed_id.val;\n        } else {\n          cmd_cntx->SendError(kInvalidStreamId, kSyntaxErrType);\n          return false;\n        }\n        continue;\n      }\n    }\n    if (arg == \"FORCE\") {\n      opts.flags |= kClaimForce;\n    } else if (arg == \"JUSTID\") {\n      opts.flags |= kClaimJustID;\n    } else {\n      cmd_cntx->SendError(\"Unknown argument given for XCLAIM command\", kSyntaxErr);\n      return false;\n    }\n  }\n  return true;\n}\n\nvoid CmdXClaim(CmdArgList args, CommandContext* cmd_cntx) {\n  ClaimOpts opts;\n  string_view key = ArgS(args, 0);\n  opts.group = ArgS(args, 1);\n  opts.consumer = ArgS(args, 2);\n\n  if (opts.group.empty() || opts.consumer.empty()) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  if (!absl::SimpleAtoi(ArgS(args, 3), &opts.min_idle_time)) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n  // Ignore negative min-idle-time\n  opts.min_idle_time = std::max(opts.min_idle_time, static_cast<int64>(0));\n  args.remove_prefix(4);\n\n  auto ids = GetXclaimIds(args);\n  if (ids.empty()) {\n    // No ids given.\n    return cmd_cntx->SendError(kInvalidStreamId, kSyntaxErrType);\n  }\n\n  // parse the options\n  if (!ParseXclaimOptions(args, opts, cmd_cntx))\n    return;\n\n  uint64_t now = cmd_cntx->tx()->GetDbContext().time_now_ms;\n  DCHECK_GT(now, 0u);\n\n  if (opts.delivery_time < 0 || static_cast<uint64_t>(opts.delivery_time) > now)\n    opts.delivery_time = now;\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpClaim(t->GetOpArgs(shard), key, opts, absl::Span{ids.data(), ids.size()});\n  };\n  OpResult<ClaimInfo> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (!result) {\n    if (result.status() == OpStatus::SKIPPED) {\n      // Return empty result when operation is skipped\n      StreamReplies{cmd_cntx->rb()}.SendClaimInfo(ClaimInfo{});\n      return;\n    }\n    cmd_cntx->SendError(result.status());\n    return;\n  }\n\n  StreamReplies{cmd_cntx->rb()}.SendClaimInfo(result.value());\n}\n\nvoid CmdXDel(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  args.remove_prefix(1);\n\n  absl::InlinedVector<streamID, 8> ids(args.size());\n\n  for (size_t i = 0; i < args.size(); ++i) {\n    ParsedStreamId parsed_id;\n    string_view str_id = ArgS(args, i);\n    if (!ParseID(str_id, true, 0, &parsed_id)) {\n      return cmd_cntx->SendError(kInvalidStreamId, kSyntaxErrType);\n    }\n    ids[i] = parsed_id.val;\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpDel(t->GetOpArgs(shard), key, absl::Span{ids.data(), ids.size()});\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n  if (result || result.status() == OpStatus::KEY_NOTFOUND) {\n    return cmd_cntx->SendLong(*result);\n  }\n\n  cmd_cntx->SendError(result.status());\n}\n\nvoid CmdXGroup(CmdArgList args, CommandContext* cmd_cntx) {\n  facade::CmdArgParser parser{args};\n\n  auto sub_cmd_func = parser.MapNext(\"HELP\", &HelpSubCmd, \"CREATE\", &CreateGroup, \"DESTROY\",\n                                     &DestroyGroup, \"CREATECONSUMER\", &CreateConsumer,\n                                     \"DELCONSUMER\", &DelConsumer, \"SETID\", &SetId);\n\n  if (auto err = parser.TakeError(); err)\n    return cmd_cntx->SendError(err.MakeReply());\n\n  sub_cmd_func(&parser, cmd_cntx);\n}\n\nvoid CmdXInfo(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  string sub_cmd = absl::AsciiStrToUpper(ArgS(args, 0));\n\n  if (sub_cmd == \"HELP\") {\n    string_view help_arr[] = {\"CONSUMERS <key> <groupname>\",\n                              \"    Show consumers of <groupname>.\",\n                              \"GROUPS <key>\",\n                              \"    Show the stream consumer groups.\",\n                              \"STREAM <key> [FULL [COUNT <count>]\",\n                              \"    Show information about the stream.\",\n                              \"HELP\",\n                              \"    Prints this help.\"};\n    return rb->SendSimpleStrArr(help_arr);\n  }\n\n  ConnectionContext* cntx = cmd_cntx->server_conn_cntx();\n  if (args.size() >= 2) {\n    string_view key = ArgS(args, 1);\n    ShardId sid = Shard(key, shard_set->size());\n\n    if (sub_cmd == \"GROUPS\") {\n      // We do not use transactional xemantics for xinfo since it's informational command.\n      auto cb = [&]() {\n        EngineShard* shard = EngineShard::tlocal();\n        DbContext db_context{cntx->ns, cntx->db_index(), GetCurrentTimeMs()};\n        return OpListGroups(db_context, key, shard);\n      };\n\n      OpResult<vector<GroupInfo>> result = shard_set->Await(sid, std::move(cb));\n      if (result) {\n        rb->StartArray(result->size());\n        for (const auto& ginfo : *result) {\n          string last_id = StreamIdRepr(ginfo.last_id);\n\n          rb->StartCollection(6, CollectionType::MAP);\n          rb->SendBulkString(\"name\");\n          rb->SendBulkString(ginfo.name);\n          rb->SendBulkString(\"consumers\");\n          rb->SendLong(ginfo.consumer_size);\n          rb->SendBulkString(\"pending\");\n          rb->SendLong(ginfo.pending_size);\n          rb->SendBulkString(\"last-delivered-id\");\n          rb->SendBulkString(last_id);\n          rb->SendBulkString(\"entries-read\");\n          if (ginfo.entries_read != SCG_INVALID_ENTRIES_READ) {\n            rb->SendLong(ginfo.entries_read);\n          } else {\n            rb->SendNull();\n          }\n          rb->SendBulkString(\"lag\");\n          if (ginfo.lag != SCG_INVALID_LAG) {\n            rb->SendLong(ginfo.lag);\n          } else {\n            rb->SendNull();\n          }\n        }\n        return;\n      }\n      return cmd_cntx->SendError(result.status());\n    } else if (sub_cmd == \"STREAM\") {\n      int full = 0;\n      size_t count = 10;  // default count for xinfo streams\n\n      if (args.size() == 4 || args.size() > 5) {\n        return rb->SendError(\n            \"unknown subcommand or wrong number of arguments for 'STREAM'. Try XINFO HELP.\");\n      }\n\n      if (args.size() >= 3) {\n        full = 1;\n        string full_arg = absl::AsciiStrToUpper(ArgS(args, 2));\n        if (full_arg != \"FULL\") {\n          return rb->SendError(\n              \"unknown subcommand or wrong number of arguments for 'STREAM'. Try XINFO HELP.\");\n        }\n        if (args.size() > 3) {\n          string count_arg = absl::AsciiStrToUpper(ArgS(args, 3));\n          string_view count_value_arg = ArgS(args, 4);\n          if (count_arg != \"COUNT\") {\n            return rb->SendError(\n                \"unknown subcommand or wrong number of arguments for 'STREAM'. Try XINFO HELP.\");\n          }\n\n          if (!absl::SimpleAtoi(count_value_arg, &count)) {\n            return rb->SendError(kInvalidIntErr);\n          }\n        }\n      }\n\n      auto cb = [&]() {\n        EngineShard* shard = EngineShard::tlocal();\n        return OpStreams(DbContext{cntx->ns, cntx->db_index(), GetCurrentTimeMs()}, key, shard,\n                         full, count);\n      };\n\n      OpResult<StreamInfo> sinfo = shard_set->Await(sid, std::move(cb));\n      if (sinfo) {\n        if (full) {\n          rb->StartCollection(9, CollectionType::MAP);\n        } else {\n          rb->StartCollection(10, CollectionType::MAP);\n        }\n\n        rb->SendBulkString(\"length\");\n        rb->SendLong(sinfo->length);\n\n        rb->SendBulkString(\"radix-tree-keys\");\n        rb->SendLong(sinfo->radix_tree_keys);\n\n        rb->SendBulkString(\"radix-tree-nodes\");\n        rb->SendLong(sinfo->radix_tree_nodes);\n\n        rb->SendBulkString(\"last-generated-id\");\n        rb->SendBulkString(StreamIdRepr(sinfo->last_generated_id));\n\n        rb->SendBulkString(\"max-deleted-entry-id\");\n        rb->SendBulkString(StreamIdRepr(sinfo->max_deleted_entry_id));\n\n        rb->SendBulkString(\"entries-added\");\n        rb->SendLong(sinfo->entries_added);\n\n        rb->SendBulkString(\"recorded-first-entry-id\");\n        rb->SendBulkString(StreamIdRepr(sinfo->recorded_first_entry_id));\n\n        if (full) {\n          rb->SendBulkString(\"entries\");\n          StreamReplies{rb}.SendRecords(sinfo->entries);\n\n          rb->SendBulkString(\"groups\");\n          rb->StartArray(sinfo->cgroups.size());\n          for (const auto& ginfo : sinfo->cgroups) {\n            rb->StartCollection(7, CollectionType::MAP);\n\n            rb->SendBulkString(\"name\");\n            rb->SendBulkString(ginfo.name);\n\n            rb->SendBulkString(\"last-delivered-id\");\n            rb->SendBulkString(StreamIdRepr(ginfo.last_id));\n\n            rb->SendBulkString(\"entries-read\");\n            if (ginfo.entries_read != SCG_INVALID_ENTRIES_READ) {\n              rb->SendLong(ginfo.entries_read);\n            } else {\n              rb->SendNull();\n            }\n            rb->SendBulkString(\"lag\");\n            if (ginfo.lag != SCG_INVALID_LAG) {\n              rb->SendLong(ginfo.lag);\n            } else {\n              rb->SendNull();\n            }\n\n            rb->SendBulkString(\"pel-count\");\n            rb->SendLong(ginfo.pending_size);\n\n            rb->SendBulkString(\"pending\");\n            rb->StartArray(ginfo.stream_nack_vec.size());\n            for (const auto& pending_info : ginfo.stream_nack_vec) {\n              rb->StartArray(4);\n              rb->SendBulkString(StreamIdRepr(pending_info.pel_id));\n              rb->SendBulkString(pending_info.consumer_name);\n              rb->SendLong(pending_info.delivery_time);\n              rb->SendLong(pending_info.delivery_count);\n            }\n\n            rb->SendBulkString(\"consumers\");\n            rb->StartArray(ginfo.consumer_info_vec.size());\n            for (const auto& consumer_info : ginfo.consumer_info_vec) {\n              rb->StartCollection(5, CollectionType::MAP);\n\n              rb->SendBulkString(\"name\");\n              rb->SendBulkString(consumer_info.name);\n\n              rb->SendBulkString(\"seen-time\");\n              rb->SendLong(consumer_info.seen_time);\n\n              rb->SendBulkString(\"active-time\");\n              rb->SendLong(consumer_info.active_time);\n\n              rb->SendBulkString(\"pel-count\");\n              rb->SendLong(consumer_info.pel_count);\n\n              rb->SendBulkString(\"pending\");\n              if (consumer_info.pending.size() == 0) {\n                rb->SendEmptyArray();\n              } else {\n                rb->StartArray(consumer_info.pending.size());\n              }\n              for (const auto& pending : consumer_info.pending) {\n                rb->StartArray(3);\n\n                rb->SendBulkString(StreamIdRepr(pending.pel_id));\n                rb->SendLong(pending.delivery_time);\n                rb->SendLong(pending.delivery_count);\n              }\n            }\n          }\n        } else {\n          rb->SendBulkString(\"groups\");\n          rb->SendLong(sinfo->groups);\n\n          rb->SendBulkString(\"first-entry\");\n          if (sinfo->first_entry.kv_arr.size() != 0) {\n            StreamReplies{rb}.SendRecord(sinfo->first_entry);\n          } else {\n            rb->SendNullArray();\n          }\n\n          rb->SendBulkString(\"last-entry\");\n          if (sinfo->last_entry.kv_arr.size() != 0) {\n            StreamReplies{rb}.SendRecord(sinfo->last_entry);\n          } else {\n            rb->SendNullArray();\n          }\n        }\n        return;\n      }\n      return cmd_cntx->SendError(sinfo.status());\n    } else if (sub_cmd == \"CONSUMERS\") {\n      if (args.size() < 3) {\n        return cmd_cntx->SendError(kSyntaxErr);\n      }\n      string_view stream_name = ArgS(args, 1);\n      string_view group_name = ArgS(args, 2);\n      auto cb = [&]() {\n        return OpConsumers(DbContext{cntx->ns, cntx->db_index(), GetCurrentTimeMs()},\n                           EngineShard::tlocal(), stream_name, group_name);\n      };\n\n      OpResult<vector<ConsumerInfo>> result = shard_set->Await(sid, std::move(cb));\n      if (result) {\n        rb->StartArray(result->size());\n        int64_t now_ms = GetCurrentTimeMs();\n        for (const auto& consumer_info : *result) {\n          int64_t active = consumer_info.active_time;\n          int64_t inactive = active != -1 ? now_ms - active : -1;\n\n          rb->StartCollection(4, CollectionType::MAP);\n          rb->SendBulkString(\"name\");\n          rb->SendBulkString(consumer_info.name);\n          rb->SendBulkString(\"pending\");\n          rb->SendLong(consumer_info.pel_count);\n          rb->SendBulkString(\"idle\");\n          rb->SendLong(consumer_info.idle);\n          rb->SendBulkString(\"inactive\");\n          rb->SendLong(inactive);\n        }\n        return;\n      }\n      if (result.status() == OpStatus::INVALID_VALUE) {\n        return rb->SendError(NoGroupError(stream_name, group_name));\n      }\n      return cmd_cntx->SendError(result.status());\n    }\n  }\n  return cmd_cntx->SendError(UnknownSubCmd(sub_cmd, \"XINFO\"));\n}\n\nvoid CmdXLen(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  auto cb = [&](Transaction* t, EngineShard* shard) { return OpLen(t->GetOpArgs(shard), key); };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n  if (result || result.status() == OpStatus::KEY_NOTFOUND) {\n    return cmd_cntx->SendLong(*result);\n  }\n\n  return cmd_cntx->SendError(result.status());\n}\n\nvoid CmdXPending(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  PendingOpts opts;\n  opts.group_name = ArgS(args, 1);\n  args.remove_prefix(2);\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (!args.empty() && !ParseXpendingOptions(args, opts, rb)) {\n    return;\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpPending(t->GetOpArgs(shard), key, opts);\n  };\n  OpResult<PendingResult> op_result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n  if (!op_result) {\n    if (op_result.status() == OpStatus::SKIPPED)\n      return cmd_cntx->SendError(NoGroupError(key, opts.group_name));\n    return cmd_cntx->SendError(op_result.status());\n  }\n  const PendingResult& result = op_result.value();\n\n  SinkReplyBuilder::ReplyScope scope{rb};\n  if (std::holds_alternative<PendingReducedResult>(result)) {\n    const auto& res = std::get<PendingReducedResult>(result);\n    rb->StartArray(4);\n    rb->SendLong(res.count);\n    if (res.count) {\n      rb->SendBulkString(StreamIdRepr(res.start));\n      rb->SendBulkString(StreamIdRepr(res.end));\n      rb->StartArray(res.consumer_list.size());\n\n      for (auto& [consumer_name, count] : res.consumer_list) {\n        rb->StartArray(2);\n        rb->SendBulkString(consumer_name);\n        rb->SendLong(count);\n      }\n    } else {\n      for (unsigned j = 0; j < 3; ++j)\n        rb->SendNull();\n    }\n  } else {\n    const auto& res = std::get<PendingExtendedResultList>(result);\n    if (!res.size()) {\n      return rb->SendEmptyArray();\n    }\n\n    rb->StartArray(res.size());\n    for (auto& item : res) {\n      rb->StartArray(4);\n      rb->SendBulkString(StreamIdRepr(item.start));\n      rb->SendBulkString(item.consumer_name);\n      rb->SendLong(item.elapsed);\n      rb->SendLong(item.delivery_count);\n    }\n  }\n}\n\nvoid CmdXRange(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = args[0];\n  string_view start = args[1];\n  string_view end = args[2];\n\n  XRangeGeneric(key, start, end, args.subspan(3), false, cmd_cntx);\n}\n\nvoid CmdXRevRange(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = args[0];\n  string_view start = args[1];\n  string_view end = args[2];\n\n  XRangeGeneric(key, end, start, args.subspan(3), true, cmd_cntx);\n}\n\n// If opts.read_group is true then this is a WRITE command. We don't however journal the consumer\n// creation, only the side effects later on from the scheduled callbacks.\nvariant<bool, facade::ErrorReply> HasEntries2(const OpArgs& op_args, string_view skey,\n                                              ReadOpts* opts) {\n  const bool is_write_command = opts->read_group;\n  auto& db_slice = op_args.GetDbSlice();\n\n  DbSlice::ItAndUpdater it;\n  const CompactObj* cobj;\n\n  auto error = [&](auto res_it) -> variant<bool, facade::ErrorReply> {\n    if (res_it.status() == OpStatus::WRONG_TYPE)\n      return facade::ErrorReply{res_it.status()};\n    else if (res_it.status() == OpStatus::KEY_NOTFOUND && opts->read_group)\n      return facade::ErrorReply{\n          NoGroupOrKey(skey, opts->group_name, \" in XREADGROUP with GROUP option\")};\n    return false;\n  };\n\n  if (is_write_command) {\n    auto res = db_slice.FindMutable(op_args.db_cntx, skey, OBJ_STREAM);\n    if (!res)\n      return error(std::move(res));\n    it = std::move(*res);\n    cobj = &it.it->second;\n  } else {\n    auto res = db_slice.FindReadOnly(op_args.db_cntx, skey, OBJ_STREAM);\n    if (!res)\n      return error(res);\n    cobj = &(*res)->second;\n  }\n\n  stream* s = GetReadOnlyStream(*cobj);\n\n  // Fetch last id\n  streamID last_id = s->last_id;\n  if (s->length)\n    StreamLastValidID(s, &last_id);\n\n  // Check requested\n  auto& requested_sitem = opts->stream_ids.at(skey);\n\n  // Look up group consumer if needed\n  streamCG* group = nullptr;\n  streamConsumer* consumer = nullptr;\n  if (is_write_command) {\n    group = StreamLookupCG(s, WrapSds(opts->group_name));\n    if (!group)\n      return facade::ErrorReply{\n          NoGroupOrKey(skey, opts->group_name, \" in XREADGROUP with GROUP option\")};\n\n    StreamMemTracker tracker;\n    requested_sitem.is_consumer_new = false;\n    consumer = FindOrAddConsumer(opts->consumer_name, group, op_args.db_cntx.time_now_ms,\n                                 &requested_sitem.is_consumer_new);\n    tracker.UpdateStreamSize(it.it->second);\n\n    requested_sitem.group = group;\n    requested_sitem.consumer = consumer;\n\n    // If '>' is not provided, consumer PEL is used. So don't need to block.\n    if (requested_sitem.id.val.ms != UINT64_MAX || requested_sitem.id.val.seq != UINT64_MAX) {\n      requested_sitem.serve_history = true;\n      return true;\n    }\n\n    // we know the requested last_id only when we already have it\n    if (streamCompareID(&last_id, &requested_sitem.group->last_id) > 0) {\n      requested_sitem.id.val = requested_sitem.group->last_id;\n      StreamIncrID(&requested_sitem.id.val);\n    }\n  } else {\n    // Resolve $ to the last ID in the stream.\n    if (requested_sitem.id.resolve_last_id) {\n      requested_sitem.id.val = last_id;\n      StreamIncrID(&requested_sitem.id.val);  // include id's strictly greater\n      requested_sitem.id.resolve_last_id = false;\n      return false;\n    }\n  }\n\n  return streamCompareID(&last_id, &requested_sitem.id.val) >= 0;\n}\n\nvoid CmdXRead(CmdArgList args, CommandContext* cmd_cntx) {\n  XReadGeneric2(args, false, cmd_cntx);\n}\n\nvoid CmdXReadGroup(CmdArgList args, CommandContext* cmd_cntx) {\n  XReadGeneric2(args, true, cmd_cntx);\n}\n\nvoid CmdXSetId(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view idstr = ArgS(args, 1);\n\n  ParsedStreamId parsed_id;\n  if (!ParseID(idstr, true, 0, &parsed_id)) {\n    return cmd_cntx->SendError(kInvalidStreamId, kSyntaxErrType);\n  }\n\n  facade::ErrorReply reply(OpStatus::OK);\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    reply = OpXSetId(t->GetOpArgs(shard), key, parsed_id.val);\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n  if (reply.status == OpStatus::STREAM_ID_SMALL) {\n    return cmd_cntx->SendError(LeqTopIdError(\"XSETID\"));\n  }\n  return cmd_cntx->SendError(reply);\n}\n\nvoid CmdXTrim(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  std::string_view key = parser.Next();\n\n  auto parsed_trim_opts = ParseTrimOpts(&parser);\n  if (!parser.Finalize() || !parsed_trim_opts) {\n    auto err = parser.TakeError();\n    cmd_cntx->SendError(!parsed_trim_opts ? parsed_trim_opts.error() : err.MakeReply());\n    return;\n  }\n\n  auto& trim_opts = parsed_trim_opts.value();\n\n  // We can auto-journal if we are not trimming approximately or by maxlen\n  const bool enable_auto_journaling = !JournalAsMinId(trim_opts);\n  if (enable_auto_journaling) {\n    cmd_cntx->tx()->ReviveAutoJournal();\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpTrim(t->GetOpArgs(shard), key, trim_opts, !enable_auto_journaling);\n  };\n\n  OpResult<int64_t> trim_result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n  if (trim_result) {\n    rb->SendLong(*trim_result);\n  } else {\n    cmd_cntx->SendError(trim_result.status());\n  }\n}\n\nvoid CmdXAck(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view group = ArgS(args, 1);\n  args.remove_prefix(2);\n  absl::InlinedVector<streamID, 8> ids(args.size());\n\n  for (size_t i = 0; i < args.size(); ++i) {\n    ParsedStreamId parsed_id;\n    string_view str_id = ArgS(args, i);\n    if (!ParseID(str_id, true, 0, &parsed_id)) {\n      return cmd_cntx->SendError(kInvalidStreamId, kSyntaxErrType);\n    }\n    ids[i] = parsed_id.val;\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpAck(t->GetOpArgs(shard), key, group, absl::Span{ids.data(), ids.size()});\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n  if (result || result.status() == OpStatus::KEY_NOTFOUND) {\n    return cmd_cntx->SendLong(*result);\n  }\n\n  cmd_cntx->SendError(result.status());\n}\n\nvoid CmdXAutoClaim(CmdArgList args, CommandContext* cmd_cntx) {\n  ClaimOpts opts;\n  string_view key = ArgS(args, 0);\n  opts.group = ArgS(args, 1);\n  opts.consumer = ArgS(args, 2);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (opts.group.empty() || opts.consumer.empty()) {\n    return cmd_cntx->SendError(kSyntaxErr);\n  }\n\n  if (!absl::SimpleAtoi(ArgS(args, 3), &opts.min_idle_time)) {\n    return rb->SendError(kSyntaxErr);\n  }\n\n  opts.min_idle_time = std::max((int64)0, opts.min_idle_time);\n\n  string_view start = ArgS(args, 4);\n  RangeId rs;\n\n  if (!ParseRangeId(start, RangeBoundary::kStart, &rs)) {\n    return rb->SendError(kSyntaxErr);\n  }\n\n  if (rs.exclude && StreamDecrID(&rs.parsed_id.val) != C_OK) {\n    return rb->SendError(\"invalid start ID for the interval\", kSyntaxErrType);\n  }\n  opts.start = rs.parsed_id.val;\n\n  for (size_t i = 5; i < args.size(); ++i) {\n    string arg = absl::AsciiStrToUpper(ArgS(args, i));\n\n    bool remaining_args = args.size() - i - 1 > 0;\n\n    if (remaining_args) {\n      if (arg == \"COUNT\") {\n        arg = ArgS(args, ++i);\n        if (!absl::SimpleAtoi(arg, &opts.count)) {\n          return rb->SendError(kInvalidIntErr);\n        }\n        if (opts.count <= 0 || opts.count >= (1L << 18)) {\n          return rb->SendError(\"COUNT must be > 0 and less than 2^18\");\n        }\n        continue;\n      }\n    }\n    if (arg == \"JUSTID\") {\n      opts.flags |= kClaimJustID;\n    } else {\n      return cmd_cntx->SendError(\"Unknown argument given for XAUTOCLAIM command\", kSyntaxErr);\n    }\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpAutoClaim(t->GetOpArgs(shard), key, opts);\n  };\n  OpResult<ClaimInfo> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n\n  if (result.status() == OpStatus::KEY_NOTFOUND) {\n    rb->SendError(NoGroupOrKey(key, opts.group));\n    return;\n  }\n\n  if (!result) {\n    cmd_cntx->SendError(result.status());\n    return;\n  }\n\n  const ClaimInfo& cresult = result.value();\n\n  rb->StartArray(3);\n  rb->SendBulkString(StreamIdRepr(cresult.end_id));\n  StreamReplies{rb}.SendClaimInfo(cresult);\n  StreamReplies{rb}.SendIDs(cresult.deleted_ids);\n}\n\n#define HFUNC(x) SetHandler(&Cmd##x)\n\nnamespace acl {\nconstexpr uint32_t kXAdd = WRITE | STREAM | FAST;\nconstexpr uint32_t kXClaim = WRITE | FAST;\nconstexpr uint32_t kXDel = WRITE | STREAM | FAST;\nconstexpr uint32_t kXGroup = SLOW;\nconstexpr uint32_t kXInfo = SLOW;\nconstexpr uint32_t kXLen = READ | STREAM | FAST;\nconstexpr uint32_t kXPending = READ | STREAM;\nconstexpr uint32_t kXRange = READ | STREAM | SLOW;\nconstexpr uint32_t kXRevRange = READ | STREAM | SLOW;\nconstexpr uint32_t kXRead = READ | STREAM | SLOW | BLOCKING;\nconstexpr uint32_t kXReadGroup = WRITE | STREAM | SLOW | BLOCKING;\nconstexpr uint32_t kXSetId = WRITE | STREAM | SLOW;\nconstexpr uint32_t kXTrim = WRITE | STREAM | SLOW;\nconstexpr uint32_t kXGroupHelp = READ | STREAM | SLOW;\nconstexpr uint32_t kXAck = WRITE | STREAM | FAST;\nconstexpr uint32_t kXAutoClaim = WRITE | STREAM | FAST;\n}  // namespace acl\n\nvoid StreamFamily::Register(CommandRegistry* registry) {\n  using CI = CommandId;\n  registry->StartFamily();\n  constexpr auto kReadFlags = CO::READONLY | CO::BLOCKING | CO::VARIADIC_KEYS;\n  *registry\n      << CI{\"XADD\",    CO::JOURNALED | CO::DENYOOM | CO::FAST | CO::NO_AUTOJOURNAL, -5, 1, 1,\n            acl::kXAdd}\n             .HFUNC(XAdd)\n      << CI{\"XCLAIM\", CO::JOURNALED | CO::FAST, -6, 1, 1, acl::kXClaim}.HFUNC(XClaim)\n      << CI{\"XDEL\", CO::JOURNALED | CO::FAST, -3, 1, 1, acl::kXDel}.HFUNC(XDel)\n      << CI{\"XGROUP\", CO::JOURNALED | CO::DENYOOM, -3, 2, 2, acl::kXGroup}.HFUNC(XGroup)\n      << CI{\"XINFO\", CO::READONLY, -2, 0, 0, acl::kXInfo}.HFUNC(XInfo)\n      << CI{\"XLEN\", CO::READONLY | CO::FAST, 2, 1, 1, acl::kXLen}.HFUNC(XLen)\n      << CI{\"XPENDING\", CO::READONLY, -3, 1, 1, acl::kXPending}.HFUNC(XPending)\n      << CI{\"XRANGE\", CO::READONLY, -4, 1, 1, acl::kXRange}.HFUNC(XRange)\n      << CI{\"XREVRANGE\", CO::READONLY, -4, 1, 1, acl::kXRevRange}.HFUNC(XRevRange)\n      << CI{\"XREAD\", kReadFlags, -3, 3, 3, acl::kXRead}.HFUNC(XRead)\n      << CI{\"XREADGROUP\",\n            CO::VARIADIC_KEYS | CO::BLOCKING | CO::JOURNALED | CO::NO_AUTOJOURNAL,\n            -6,\n            6,\n            6,\n            acl::kXReadGroup}\n             .HFUNC(XReadGroup)\n      << CI{\"XSETID\", CO::JOURNALED, 3, 1, 1, acl::kXSetId}.HFUNC(XSetId)\n      << CI{\"XTRIM\", CO::JOURNALED | CO::FAST | CO::NO_AUTOJOURNAL, -4, 1, 1, acl::kXTrim}.HFUNC(\n             XTrim)\n      << CI{\"_XGROUP_HELP\", CO::NOSCRIPT | CO::HIDDEN, 2, 0, 0, acl::kXGroupHelp}.SetHandler(\n             XGroupHelp)\n      << CI{\"XACK\", CO::JOURNALED | CO::FAST, -4, 1, 1, acl::kXAck}.HFUNC(XAck)\n      << CI{\"XAUTOCLAIM\", CO::JOURNALED | CO::FAST, -6, 1, 1, acl::kXAutoClaim}.HFUNC(XAutoClaim);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/stream_family.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <cstddef>\n\nnamespace dfly {\n\nclass CommandRegistry;\nstruct CompactValue;\n\nclass StreamMemTracker {\n public:\n  StreamMemTracker();\n\n  void UpdateStreamSize(CompactValue& pv) const;\n\n private:\n  size_t start_size_{0};\n};\n\nclass StreamFamily {\n public:\n  static void Register(CommandRegistry* registry);\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/stream_family_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/stream_family.h\"\n\n#include \"base/flags.h\"\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\n\nnamespace dfly {\n\nconst auto kMatchNil = ArgType(RespExpr::NIL);\n\nclass StreamFamilyTest : public BaseFamilyTest {\n protected:\n};\n\nTEST_F(StreamFamilyTest, Add) {\n  auto resp = Run({\"xadd\", \"key\", \"*\", \"field\", \"value\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n  string id = string(ToSV(resp.GetBuf()));\n  EXPECT_THAT(id, EndsWith(\"-0\"));\n\n  resp = Run({\"xrange\", \"null\", \"-\", \"+\"});\n  EXPECT_THAT(resp, ArrLen(0));\n\n  resp = Run({\"xrange\", \"key\", \"-\", \"+\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  auto sub_arr = resp.GetVec();\n  EXPECT_THAT(sub_arr, ElementsAre(id, ArrLen(2)));\n\n  resp = Run({\"xlen\", \"key\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"xadd\", \"key\", \"badid\", \"f1\", \"val1\"});\n  EXPECT_THAT(resp, ErrArg(\"Invalid stream ID\"));\n\n  resp = Run({\"xadd\", \"key\", \"nomkstream\", \"*\", \"field2\", \"value2\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n\n  resp = Run({\"xadd\", \"noexist\", \"nomkstream\", \"*\", \"field\", \"value\"});\n  EXPECT_THAT(resp, kMatchNil);\n}\n\nTEST_F(StreamFamilyTest, AddExtended) {\n  auto resp0 = Run({\"xadd\", \"key\", \"5\", \"f1\", \"v1\", \"f2\", \"v2\"});\n  EXPECT_EQ(resp0, \"5-0\");\n  resp0 = Run({\"xrange\", \"key\", \"5-0\", \"5-0\"});\n  EXPECT_THAT(resp0, ArrLen(2));\n  auto sub_arr = resp0.GetVec();\n  EXPECT_THAT(sub_arr, ElementsAre(\"5-0\", ArrLen(4)));\n  sub_arr = sub_arr[1].GetVec();\n  EXPECT_THAT(sub_arr, ElementsAre(\"f1\", \"v1\", \"f2\", \"v2\"));\n\n  auto resp1 = Run({\"xadd\", \"key\", \"maxlen\", \"1\", \"*\", \"field1\", \"val1\"});\n  string id1 = string(ToSV(resp1.GetBuf()));\n\n  auto resp2 = Run({\"xadd\", \"key\", \"maxlen\", \"1\", \"*\", \"field2\", \"val2\"});\n  string id2 = string(ToSV(resp2.GetBuf()));\n\n  EXPECT_THAT(Run({\"xlen\", \"key\"}), IntArg(1));\n  EXPECT_THAT(Run({\"xrange\", \"key\", id1, id1}), ArrLen(0));\n\n  auto resp3 = Run({\"xadd\", \"key\", id2, \"f1\", \"val1\"});\n  EXPECT_THAT(resp3, ErrArg(\"equal or smaller than\"));\n\n  Run({\"xadd\", \"key2\", \"5-0\", \"field\", \"val\"});\n  Run({\"xadd\", \"key2\", \"6-0\", \"field1\", \"val1\"});\n  Run({\"xadd\", \"key2\", \"7-0\", \"field2\", \"val2\"});\n  auto resp = Run({\"xadd\", \"key2\", \"minid\", \"6\", \"*\", \"field3\", \"val3\"});\n  EXPECT_THAT(Run({\"xlen\", \"key2\"}), IntArg(3));\n  EXPECT_THAT(Run({\"xrange\", \"key2\", \"5-0\", \"5-0\"}), ArrLen(0));\n\n  for (int i = 0; i < 700; i++) {\n    Run({\"xadd\", \"key3\", \"*\", \"field\", \"val\"});\n  }\n  resp = Run({\"xadd\", \"key3\", \"maxlen\", \"~\", \"500\", \"*\", \"field\", \"val\"});\n  EXPECT_THAT(Run({\"xlen\", \"key3\"}), IntArg(501));\n  for (int i = 0; i < 700; i++) {\n    Run({\"xadd\", \"key4\", \"*\", \"field\", \"val\"});\n  }\n  resp = Run({\"xadd\", \"key4\", \"maxlen\", \"~\", \"500\", \"limit\", \"100\", \"*\", \"field\", \"val\"});\n  EXPECT_THAT(Run({\"xlen\", \"key4\"}), IntArg(601));\n}\n\nTEST_F(StreamFamilyTest, XrangeRangeAutocomplete) {\n  Run({\"xadd\", \"mystream\", \"1609459200000-0\", \"0\", \"0\"});\n  Run({\"xadd\", \"mystream\", \"1609459200001-0\", \"1\", \"1\"});\n  Run({\"xadd\", \"mystream\", \"1609459200001-1\", \"2\", \"2\"});\n  Run({\"xadd\", \"mystream\", \"1609459200002-0\", \"3\", \"3\"});\n  auto resp = Run({\"xrange\", \"mystream\", \"1609459200000\", \"1609459200001\"});\n  EXPECT_THAT(resp, RespElementsAre(RespElementsAre(\"1609459200000-0\", RespElementsAre(\"0\", \"0\")),\n                                    RespElementsAre(\"1609459200001-0\", RespElementsAre(\"1\", \"1\")),\n                                    RespElementsAre(\"1609459200001-1\", RespElementsAre(\"2\", \"2\"))));\n  resp = Run({\"xrange\", \"mystream\", \"1609459200000\", \"(1609459200001\"});\n  EXPECT_THAT(resp, RespElementsAre(RespElementsAre(\"1609459200000-0\", RespElementsAre(\"0\", \"0\")),\n                                    RespElementsAre(\"1609459200001-0\", RespElementsAre(\"1\", \"1\")),\n                                    RespElementsAre(\"1609459200001-1\", RespElementsAre(\"2\", \"2\"))));\n}\n\nTEST_F(StreamFamilyTest, Range) {\n  Run({\"xadd\", \"key\", \"1-*\", \"f1\", \"v1\"});\n  Run({\"xadd\", \"key\", \"1-*\", \"f2\", \"v2\"});\n  auto resp = Run({\"xrange\", \"key\", \"-\", \"+\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  auto sub_arr = resp.GetVec();\n  EXPECT_THAT(sub_arr, ElementsAre(ArrLen(2), ArrLen(2)));\n  auto sub0 = sub_arr[0].GetVec();\n  auto sub1 = sub_arr[1].GetVec();\n  EXPECT_THAT(sub0, ElementsAre(\"1-0\", ArrLen(2)));\n  EXPECT_THAT(sub1, ElementsAre(\"1-1\", ArrLen(2)));\n\n  resp = Run({\"xrevrange\", \"key\", \"+\", \"-\"});\n  sub_arr = resp.GetVec();\n  sub0 = sub_arr[0].GetVec();\n  sub1 = sub_arr[1].GetVec();\n  EXPECT_THAT(sub0, ElementsAre(\"1-1\", ArrLen(2)));\n  EXPECT_THAT(sub1, ElementsAre(\"1-0\", ArrLen(2)));\n}\n\nTEST_F(StreamFamilyTest, GroupCreate) {\n  auto resp = Run({\"xadd\", \"key\", \"1-*\", \"f1\", \"v1\"});\n  EXPECT_EQ(resp, \"1-0\");\n  resp = Run({\"xgroup\", \"create\", \"key\", \"grname\", \"1\"});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"xgroup\", \"create\", \"test\", \"test\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"requires the key to exist\"));\n  resp = Run({\"xgroup\", \"create\", \"test\", \"test\", \"0\", \"MKSTREAM\"});\n  EXPECT_THAT(resp, \"OK\");\n  resp = Run({\"xgroup\", \"create\", \"test\", \"test\", \"0\", \"MKSTREAM\"});\n  EXPECT_THAT(resp, ErrArg(\"BUSYGROUP\"));\n}\n\nTEST_F(StreamFamilyTest, XRead) {\n  Run({\"xadd\", \"foo\", \"1-*\", \"k1\", \"v1\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k2\", \"v2\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k3\", \"v3\"});\n  Run({\"xadd\", \"bar\", \"1-*\", \"k4\", \"v4\"});\n  EXPECT_EQ(GetMetrics().shard_stats.tx_optimistic_total, 4u);\n\n  // Receive all records from a single stream, in a single hop\n  auto resp = Run({\"xread\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"foo\", ArrLen(3)));\n  EXPECT_EQ(GetMetrics().shard_stats.tx_optimistic_total, 5u);\n\n  // Receive all records from both streams.\n  resp = Run({\"xread\", \"streams\", \"foo\", \"bar\", \"0\", \"0\"});\n\n  // 2 results\n  ASSERT_THAT(resp, RespArray(ElementsAre(ArrLen(2), ArrLen(2))));\n  ASSERT_THAT(resp.GetVec()[0], RespArray(ElementsAre(\"foo\", ArrLen(3))));\n  ASSERT_THAT(resp.GetVec()[1], RespArray(ElementsAre(\"bar\", ArrLen(1))));\n\n  // Order of the requested streams is maintained.\n  resp = Run({\"xread\", \"streams\", \"bar\", \"foo\", \"0\", \"0\"});\n  ASSERT_THAT(resp, RespArray(ElementsAre(ArrLen(2), ArrLen(2))));\n  ASSERT_THAT(resp.GetVec()[0], RespArray(ElementsAre(\"bar\", ArrLen(1))));\n  ASSERT_THAT(resp.GetVec()[1], RespArray(ElementsAre(\"foo\", ArrLen(3))));\n\n  // Limit count.\n  resp = Run({\"xread\", \"count\", \"1\", \"streams\", \"foo\", \"bar\", \"0\", \"0\"});\n  ASSERT_THAT(resp, RespArray(ElementsAre(ArrLen(2), ArrLen(2))));\n  ASSERT_THAT(resp.GetVec()[0], RespArray(ElementsAre(\"foo\", ArrLen(1))));\n  ASSERT_THAT(resp.GetVec()[1], RespArray(ElementsAre(\"bar\", ArrLen(1))));\n\n  // Read from ID.\n  resp = Run({\"xread\", \"count\", \"10\", \"streams\", \"foo\", \"bar\", \"1-1\", \"2-0\"});\n  // Note when the response has length 1, Run returns the first element.\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"foo\", ArrLen(1)));\n  EXPECT_THAT(resp.GetVec()[1].GetVec()[0].GetVec(), ElementsAre(\"1-2\", ArrLen(2)));\n\n  // Stream not found.\n  resp = Run({\"xread\", \"streams\", \"foo\", \"notfound\", \"0\", \"0\"});\n  // Note when the response has length 1, Run returns the first element.\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"foo\", ArrLen(3)));\n\n  // Not found.\n  resp = Run({\"xread\", \"streams\", \"notfound\", \"0\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));\n\n  // XREAD returns a map response on RESP3\n  Run({\"HELLO\", \"3\"});\n  resp = Run({\"xread\", \"streams\", \"foo\", \"bar\", \"0\", \"0\"});\n  ASSERT_THAT(resp, RespArray(ElementsAre(\"foo\", ArrLen(3), \"bar\", ArrLen(1))));\n\n  const auto foo_resp = resp.GetVec()[1];\n  ASSERT_THAT(foo_resp, RespArray(ElementsAre(ArrLen(2), ArrLen(2), ArrLen(2))));\n\n  const auto first_kv_entry = foo_resp.GetVec()[0];\n  const auto expected = RespArray(ElementsAre(\"k1\", \"v1\"));\n  ASSERT_THAT(first_kv_entry, RespArray(ElementsAre(\"1-0\", expected)));\n}\n\nTEST_F(StreamFamilyTest, XReadGroup) {\n  Run({\"xadd\", \"foo\", \"1-*\", \"k1\", \"v1\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k2\", \"v2\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k3\", \"v3\"});\n  Run({\"xadd\", \"bar\", \"1-*\", \"k4\", \"v4\"});\n\n  Run({\"xadd\", \"mystream\", \"1-*\", \"k1\", \"v1\"});\n  Run({\"xadd\", \"mystream\", \"1-*\", \"k2\", \"v2\"});\n  Run({\"xadd\", \"mystream\", \"1-*\", \"k3\", \"v3\"});\n\n  Run({\"xgroup\", \"create\", \"foo\", \"group\", \"0\"});\n  Run({\"xgroup\", \"create\", \"bar\", \"group\", \"0\"});\n\n  // consumer PEL is empty, so resp should have empty list\n  auto resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"foo\", ArrLen(0))));\n\n  // should return unread entries with key \"foo\"\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \">\"});\n  // only \"foo\" key entries are read\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"foo\", ArrLen(3))));\n\n  Run({\"xadd\", \"foo\", \"1-*\", \"k5\", \"v5\"});\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"bar\", \"foo\", \">\", \">\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(ArrLen(2), ArrLen(2))));\n\n  EXPECT_THAT(resp.GetVec()[0].GetVec()[1].GetVec()[0], RespArray(ElementsAre(\"1-0\", ArrLen(2))));\n  EXPECT_THAT(resp.GetVec()[1].GetVec()[1].GetVec()[0], RespArray(ElementsAre(\"1-3\", ArrLen(2))));\n\n  // now we can specify id for \"foo\" and it fetches from alice's consumer PEL\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp.GetVec()[1], ArrLen(4));\n\n  // now \">\" gives nil\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \">\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));\n\n  // count limits the fetched entries\n  resp = Run(\n      {\"xreadgroup\", \"group\", \"group\", \"alice\", \"count\", \"2\", \"streams\", \"foo\", \"bar\", \"0\", \"0\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(ArrLen(2), ArrLen(2))));\n  EXPECT_THAT(resp.GetVec()[0].GetVec(), ElementsAre(\"foo\", ArrLen(2)));\n  EXPECT_THAT(resp.GetVec()[1].GetVec(), ElementsAre(\"bar\", ArrLen(1)));\n\n  // bob will not get entries of alice\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"bob\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"foo\", ArrLen(0))));\n\n  resp = Run({\"xinfo\", \"groups\", \"foo\"});\n  // 2 consumers created\n  EXPECT_THAT(resp.GetVec()[3], IntArg(2));\n  // check last_delivery_id\n  EXPECT_THAT(resp.GetVec()[7], \"1-3\");\n\n  // Noack\n  Run({\"xadd\", \"foo\", \"1-*\", \"k6\", \"v6\"});\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"bob\", \"noack\", \"streams\", \"foo\", \">\"});\n  // check basic results\n  EXPECT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"foo\", ArrLen(1)));\n  // Entry is not inserted in Bob's consumer PEL.\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"bob\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"foo\", ArrLen(0))));\n\n  // No Group\n  resp = Run({\"xreadgroup\", \"group\", \"nogroup\", \"alice\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(\n      resp,\n      ErrArg(\"No such key 'foo' or consumer group 'nogroup' in XREADGROUP with GROUP option\"));\n\n  // '>' gives the null array result if group doesn't exist\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"mystream\", \">\"});\n  EXPECT_THAT(\n      resp,\n      ErrArg(\"No such key 'mystream' or consumer group 'group' in XREADGROUP with GROUP option\"));\n\n  Run({\"xadd\", \"foo\", \"1-*\", \"k7\", \"v7\"});\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"mystream\", \"foo\", \">\", \">\"});\n  // returns no group error as \"group\" was not created for mystream.\n  EXPECT_THAT(\n      resp,\n      ErrArg(\"No such key 'mystream' or consumer group 'group' in XREADGROUP with GROUP option\"));\n\n  // returns no group error when key doesn't exists\n  // this is how Redis' behave\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"consumer\", \"count\", \"10\", \"block\", \"5000\", \"streams\",\n              \"nostream\", \">\"});\n  EXPECT_THAT(\n      resp,\n      ErrArg(\"No such key 'nostream' or consumer group 'group' in XREADGROUP with GROUP option\"));\n\n  // block on empty stream via xgroup create.\n  Run({\"xgroup\", \"create\", \"emptystream\", \"group\", \"0\", \"mkstream\"});\n  auto before = absl::Now();\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"consumer\", \"count\", \"10\", \"block\", \"1000\", \"streams\",\n              \"emptystream\", \">\"});\n  EXPECT_GE(absl::Now() - before, absl::Seconds(1));\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));\n}\n\nTEST_F(StreamFamilyTest, XReadBlock) {\n  Run({\"xadd\", \"foo\", \"1-*\", \"k1\", \"v1\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k2\", \"v2\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k3\", \"v3\"});\n  Run({\"xadd\", \"bar\", \"1-*\", \"k4\", \"v4\"});\n\n  // Receive all records from both streams.\n  auto resp = Run({\"xread\", \"block\", \"100\", \"streams\", \"foo\", \"bar\", \"0\", \"0\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec()[0].GetVec(), ElementsAre(\"foo\", ArrLen(3)));\n  EXPECT_THAT(resp.GetVec()[1].GetVec(), ElementsAre(\"bar\", ArrLen(1)));\n\n  // Timeout.\n  resp = Run({\"xread\", \"block\", \"1\", \"streams\", \"foo\", \"$\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));\n\n  // Timeout again, on two steams\n  resp = Run({\"xread\", \"block\", \"1\", \"streams\", \"foo\", \"bar\", \"$\", \"$\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));\n\n  // Run XREAD BLOCK from 2 fibers.\n  RespExpr resp0, resp1;\n  auto fb0 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    resp0 = Run({\"xread\", \"block\", \"0\", \"streams\", \"foo\", \"$\"});\n  });\n  auto fb1 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n    resp1 = Run({\"xread\", \"block\", \"0\", \"streams\", \"foo\", \"bar\", \"$\", \"$\"});\n  });\n  ThisFiber::SleepFor(50us);\n\n  resp = pp_->at(1)->Await([&] { return Run(\"xadd\", {\"xadd\", \"foo\", \"1-*\", \"k5\", \"v5\"}); });\n\n  fb0.Join();\n  fb1.Join();\n\n  // Both xread calls should have been unblocked.\n  //\n  // Note when the response has length 1, Run returns the first element.\n  EXPECT_THAT(resp0.GetVec(), ElementsAre(\"foo\", ArrLen(1)));\n  EXPECT_THAT(resp1.GetVec(), ElementsAre(\"foo\", ArrLen(1)));\n}\n\nTEST_F(StreamFamilyTest, XReadGroupBlockwithoutBlock) {\n  Run({\"xadd\", \"foo\", \"1-*\", \"k1\", \"v1\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k2\", \"v2\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k3\", \"v3\"});\n  Run({\"xadd\", \"bar\", \"1-*\", \"k4\", \"v4\"});\n\n  Run({\"xgroup\", \"create\", \"foo\", \"group\", \"0\"});\n  Run({\"xgroup\", \"create\", \"bar\", \"group\", \"0\"});\n\n  // Receive all records from both streams.\n  auto resp = Run(\n      {\"xreadgroup\", \"group\", \"group\", \"alice\", \"block\", \"100\", \"streams\", \"foo\", \"bar\", \">\", \">\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(ArrLen(2), ArrLen(2))));\n  EXPECT_THAT(resp.GetVec()[0].GetVec(), ElementsAre(\"foo\", ArrLen(3)));\n  EXPECT_THAT(resp.GetVec()[1].GetVec(), ElementsAre(\"bar\", ArrLen(1)));\n}\n\nTEST_F(StreamFamilyTest, XReadGroupBlock) {\n  Run({\"xgroup\", \"create\", \"foo\", \"group\", \"0\", \"MKSTREAM\"});\n  Run({\"xgroup\", \"create\", \"bar\", \"group\", \"0\", \"MKSTREAM\"});\n\n  // Timeout\n  auto resp = Run(\n      {\"xreadgroup\", \"group\", \"group\", \"alice\", \"block\", \"1\", \"streams\", \"foo\", \"bar\", \">\", \">\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));\n\n  // Run XREADGROUP BLOCK from 2 fibers.\n  RespExpr resp0, resp1;\n  auto fb0 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    resp0 = Run(\n        {\"xreadgroup\", \"group\", \"group\", \"alice\", \"block\", \"0\", \"streams\", \"foo\", \"bar\", \">\", \">\"});\n  });\n  auto fb1 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n    resp1 = Run(\n        {\"xreadgroup\", \"group\", \"group\", \"alice\", \"block\", \"0\", \"streams\", \"foo\", \"bar\", \">\", \">\"});\n  });\n  ThisFiber::SleepFor(50us);\n\n  pp_->at(1)->Await([&] { return Run(\"xadd\", {\"xadd\", \"foo\", \"1-*\", \"k5\", \"v5\"}); });\n  // Only one xreadgroup call should have been unblocked.\n\n  ThisFiber::SleepFor(50us);\n  pp_->at(1)->Await([&] { return Run(\"xadd\", {\"xadd\", \"bar\", \"1-*\", \"k5\", \"v5\"}); });\n  // The second one should be unblocked\n  ThisFiber::SleepFor(50us);\n\n  fb0.Join();\n  fb1.Join();\n\n  if (resp0.GetVec()[0].GetString() == \"foo\") {\n    EXPECT_THAT(resp0.GetVec(), ElementsAre(\"foo\", ArrLen(1)));\n    EXPECT_THAT(resp1.GetVec(), ElementsAre(\"bar\", ArrLen(1)));\n  } else {\n    EXPECT_THAT(resp1.GetVec(), ElementsAre(\"foo\", ArrLen(1)));\n    EXPECT_THAT(resp0.GetVec(), ElementsAre(\"bar\", ArrLen(1)));\n  }\n\n  // Call XGROUP DESTROY while blocking\n  Run({\"xgroup\", \"create\", \"to-delete\", \"to-delete\", \"0\", \"MKSTREAM\"});\n  fb0 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n    resp0 = Run({\"xreadgroup\", \"group\", \"to-delete\", \"consumer\", \"block\", \"0\", \"streams\",\n                 \"to-delete\", \">\"});\n  });\n\n  Run({\"xgroup\", \"destroy\", \"to-delete\", \"to-delete\"});\n  fb0.Join();\n  EXPECT_THAT(resp0, ErrArg(\"consumer group this client was blocked on no longer exists\"));\n}\n\nTEST_F(StreamFamilyTest, XReadGroupBlockDelconsumer) {\n  Run({\"XGROUP\", \"CREATE\", \"foo\", \"group\", \"0\", \"MKSTREAM\"});\n\n  RespExpr resp0;\n  auto fb0 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n    resp0 = Run({\"XREADGROUP\", \"GROUP\", \"group\", \"alice\", \"BLOCK\", \"0\", \"streams\", \"foo\", \">\"});\n  });\n  ThisFiber::SleepFor(50us);\n\n  // Del consumer while it's blocked\n  RespExpr resp_del_consumer = Run({\"XGROUP\", \"DELCONSUMER\", \"foo\", \"group\", \"alice\"});\n\n  pp_->at(1)->Await([&] { return Run(\"xadd\", {\"XADD\", \"foo\", \"1-0\", \"k1\", \"v1\"}); });\n  fb0.Join();\n\n  EXPECT_THAT(resp0.GetVec(), ElementsAre(\"foo\", ArrLen(1)));\n  EXPECT_THAT(resp_del_consumer, IntArg(0));\n}\n\nTEST_F(StreamFamilyTest, XReadInvalidArgs) {\n  // Invalid COUNT value.\n  auto resp = Run({\"xread\", \"count\", \"invalid\", \"streams\", \"s1\", \"s2\", \"0\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"not an integer or out of range\"));\n\n  // Missing COUNT value.\n  resp = Run({\"xread\", \"count\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments for 'xread' command\"));\n\n  // Invalid BLOCK value.\n  resp = Run({\"xread\", \"block\", \"invalid\", \"streams\", \"s1\", \"s2\", \"0\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"not an integer or out of range\"));\n\n  // Missing BLOCK value.\n  resp = Run({\"xread\", \"block\", \"streams\", \"s1\", \"s2\", \"0\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"not an integer or out of range\"));\n\n  // Missing STREAMS.\n  resp = Run({\"xread\", \"count\", \"5\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Unbalanced list of streams.\n  resp = Run({\"xread\", \"count\", \"invalid\", \"streams\", \"s1\", \"s2\", \"0\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer\"));\n\n  // Wrong type.\n  Run({\"set\", \"foo\", \"v\"});\n  resp = Run({\"xread\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"key holding the wrong kind of value\"));\n}\n\nTEST_F(StreamFamilyTest, XReadGroupInvalidArgs) {\n  Run({\"xgroup\", \"create\", \"group\", \"foo\", \"0\", \"mkstream\"});\n  // Invalid COUNT value.\n  auto resp =\n      Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"count\", \"invalid\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"not an integer or out of range\"));\n\n  // Invalid \"stream\" instead of GROUP.\n  resp = Run({\"xreadgroup\", \"stream\", \"group\", \"alice\", \"count\", \"1\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"Missing 'GROUP' in 'XREADGROUP' command\"));\n\n  // Missing streams.\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments for 'xreadgroup' command\"));\n\n  // Missing consumer.\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Missing block value.\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"block\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"not an integer or out of range\"));\n\n  // Invalid block value.\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"block\", \"invalid\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"not an integer or out of range\"));\n\n  // Unbalanced list of streams.\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"s1\", \"s2\", \"s3\", \"0\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"Unbalanced 'xreadgroup' list of streams: for each stream key an ID or \"\n                           \"'>' must be specified\"));\n\n  resp = Run({\"XREAD\", \"COUNT\", \"1\", \"STREAMS\", \"mystream\"});\n  ASSERT_THAT(resp, ErrArg(\"Unbalanced 'xread' list of streams: for each stream key an ID or '$' \"\n                           \"must be specified\"));\n}\n\nTEST_F(StreamFamilyTest, XReadGroupEmpty) {\n  Run({\"XADD\", \"stream\", \"*\", \"foo\", \"bar\"});\n  Run({\"XGROUP\", \"CREATE\", \"stream\", \"group\", \"0\"});\n  auto resp = Run({\"XREADGROUP\", \"GROUP\", \"group\", \"consumer1\", \"STREAMS\", \"stream\", \"0\"});\n  EXPECT_THAT(resp, ArrLen(2));\n}\n\nTEST_F(StreamFamilyTest, Issue854) {\n  auto resp = Run({\"xgroup\", \"help\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::ARRAY));\n\n  resp = Run({\"eval\", \"redis.call('xgroup', 'help')\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"is not allowed\"));\n}\n\nTEST_F(StreamFamilyTest, XGroupConsumer) {\n  Run({\"xgroup\", \"create\", \"foo\", \"group\", \"$\", \"MKSTREAM\"});\n  auto resp = Run({\"xgroup\", \"createconsumer\", \"foo\", \"group\", \"bob\"});\n  EXPECT_THAT(resp, IntArg(1));\n  Run({\"xgroup\", \"createconsumer\", \"foo\", \"group\", \"alice\"});\n  resp = Run({\"xinfo\", \"groups\", \"foo\"});\n  EXPECT_THAT(resp.GetVec()[3], IntArg(2));\n  Run({\"xgroup\", \"delconsumer\", \"foo\", \"group\", \"alice\"});\n  resp = Run({\"xinfo\", \"groups\", \"foo\"});\n  EXPECT_THAT(resp.GetVec()[3], IntArg(1));\n\n  resp = Run({\"xgroup\", \"createconsumer\", \"foo\", \"group\", \"alice\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // ensure createconsumer doesn't create consumer that already exists\n  resp = Run({\"xgroup\", \"createconsumer\", \"foo\", \"group\", \"alice\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // nogrouperror\n  resp = Run({\"xgroup\", \"createconsumer\", \"foo\", \"not-exists\", \"alice\"});\n  EXPECT_THAT(resp, ErrArg(\"NOGROUP\"));\n}\n\nTEST_F(StreamFamilyTest, Xclaim) {\n  Run({\"xadd\", \"foo\", \"1-0\", \"k1\", \"v1\"});\n  Run({\"xadd\", \"foo\", \"1-1\", \"k2\", \"v2\"});\n  Run({\"xadd\", \"foo\", \"1-2\", \"k3\", \"v3\"});\n  Run({\"xadd\", \"foo\", \"1-3\", \"k4\", \"v4\"});\n\n  // create a group for foo stream\n  Run({\"xgroup\", \"create\", \"foo\", \"group\", \"0\"});\n  // alice consume all the stream entries\n  Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \">\"});\n\n  // bob claims alice's two pending stream entries\n  auto resp = Run({\"xclaim\", \"foo\", \"group\", \"bob\", \"0\", \"1-2\", \"1-3\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\n                        RespArray(ElementsAre(\"1-2\", RespArray(ElementsAre(\"k3\", \"v3\")))),\n                        RespArray(ElementsAre(\"1-3\", RespArray(ElementsAre(\"k4\", \"v4\")))))));\n\n  // bob really have these claimed entries\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"bob\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\n                  \"foo\", RespArray(ElementsAre(\n                             RespArray(ElementsAre(\"1-2\", RespArray(ElementsAre(\"k3\", \"v3\")))),\n                             RespArray(ElementsAre(\"1-3\", RespArray(ElementsAre(\"k4\", \"v4\")))))))));\n\n  // alice no longer have those entries\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\n                  \"foo\", RespArray(ElementsAre(\n                             RespArray(ElementsAre(\"1-0\", RespArray(ElementsAre(\"k1\", \"v1\")))),\n                             RespArray(ElementsAre(\"1-1\", RespArray(ElementsAre(\"k2\", \"v2\")))))))));\n\n  // xclaim ensures that entries before the min-idle-time are not claimed by bob\n  resp = Run({\"xclaim\", \"foo\", \"group\", \"bob\", \"3600000\", \"1-0\"});\n  EXPECT_THAT(resp, ArrLen(0));\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\n                  \"foo\", RespArray(ElementsAre(\n                             RespArray(ElementsAre(\"1-0\", RespArray(ElementsAre(\"k1\", \"v1\")))),\n                             RespArray(ElementsAre(\"1-1\", RespArray(ElementsAre(\"k2\", \"v2\")))))))));\n\n  Run({\"xadd\", \"foo\", \"1-4\", \"k5\", \"v5\"});\n  Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \">\"});\n  // xclaim returns only claimed ids when justid is set\n  resp = Run({\"xclaim\", \"foo\", \"group\", \"bob\", \"0\", \"1-0\", \"1-4\", \"justid\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"1-0\", \"1-4\"));\n\n  Run({\"xadd\", \"foo\", \"1-5\", \"k6\", \"v6\"});\n  // bob should claim the id forcefully even if it is not yet present in group pel\n  resp = Run({\"xclaim\", \"foo\", \"group\", \"bob\", \"0\", \"1-5\", \"force\", \"justid\"});\n  EXPECT_THAT(resp.GetString(), \"1-5\");\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"bob\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp.GetVec()[1].GetVec()[4].GetVec(),\n              ElementsAre(\"1-5\", RespArray(ElementsAre(\"k6\", \"v6\"))));\n\n  TEST_current_time_ms += 2000;\n  resp = Run({\"xclaim\", \"foo\", \"group\", \"alice\", \"0\", \"1-4\", \"TIME\",\n              absl::StrCat(TEST_current_time_ms - 500), \"justid\"});\n  EXPECT_THAT(resp.GetString(), \"1-4\");\n\n  // min idle time is exceeded for this entry\n  resp = Run({\"xclaim\", \"foo\", \"group\", \"bob\", \"600\", \"1-4\"});\n  ASSERT_THAT(resp, ArrLen(0));\n\n  resp = Run({\"xclaim\", \"foo\", \"group\", \"bob\", \"400\", \"1-4\", \"justid\"});\n  EXPECT_THAT(resp.GetString(), \"1-4\");\n\n  //  test RETRYCOUNT\n  Run({\"xadd\", \"foo\", \"1-6\", \"k7\", \"v7\"});\n  resp = Run({\"xclaim\", \"foo\", \"group\", \"bob\", \"0\", \"1-6\", \"force\", \"justid\", \"retrycount\", \"5\"});\n  EXPECT_THAT(resp.GetString(), \"1-6\");\n  resp = Run({\"xpending\", \"foo\", \"group\", \"1-6\", \"1-6\", \"1\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"1-6\", \"bob\", ArgType(RespExpr::INT64), IntArg(5)));\n\n  // test LASTID\n  Run({\"xreadgroup\", \"group\", \"group\", \"bob\", \"count\", \"2\", \"streams\", \"foo\", \">\"});\n  Run({\"xclaim\", \"foo\", \"group\", \"alice\", \"0\", \"1-6\", \"LASTID\", \"1-4\"});\n  resp = Run({\"xinfo\", \"groups\", \"foo\"});\n  EXPECT_EQ(resp.GetVec()[7], \"1-6\");\n\n  Run({\"xclaim\", \"foo\", \"group\", \"bob\", \"0\", \"1-6\", \"LASTID\", \"1-9\"});\n  resp = Run({\"xinfo\", \"groups\", \"foo\"});\n  EXPECT_EQ(resp.GetVec()[7], \"1-9\");\n}\n\nTEST_F(StreamFamilyTest, XTrim) {\n  Run({\"xadd\", \"foo\", \"1-*\", \"k\", \"v\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k\", \"v\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k\", \"v\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k\", \"v\"});\n\n  // Trim to maxlen 2, 2 entries should have been deleted with 2 entries remaining.\n  auto resp = Run({\"xtrim\", \"foo\", \"maxlen\", \"2\"});\n  EXPECT_THAT(resp, IntArg(2));\n  resp = Run({\"xlen\", \"foo\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  Run({\"xadd\", \"foo\", \"1-*\", \"k\", \"v\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k\", \"v\"});\n\n  // Trim messages whose ID is before 1-4, 2 entries should have been deleted with\n  // 2 entries remaining.\n  resp = Run({\"xtrim\", \"foo\", \"minid\", \"1-4\"});\n  EXPECT_THAT(resp, IntArg(2));\n  resp = Run({\"xlen\", \"foo\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  // Trim no changes needed.\n  resp = Run({\"xtrim\", \"foo\", \"maxlen\", \"5\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"xlen\", \"foo\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  Run({\"xadd\", \"foo\", \"1-*\", \"k\", \"v\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k\", \"v\"});\n\n  // Trim exact.\n  resp = Run({\"xtrim\", \"foo\", \"maxlen\", \"=\", \"2\"});\n  EXPECT_THAT(resp, IntArg(2));\n  resp = Run({\"xlen\", \"foo\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  Run({\"xadd\", \"foo\", \"1-*\", \"k\", \"v\"});\n  Run({\"xadd\", \"foo\", \"1-*\", \"k\", \"v\"});\n\n  // Trim approx.\n  resp = Run({\"xtrim\", \"foo\", \"maxlen\", \"~\", \"2\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"xlen\", \"foo\"});\n  EXPECT_THAT(resp, IntArg(4));\n\n  // Trim stream not found should return no entries.\n  resp = Run({\"xtrim\", \"notfound\", \"maxlen\", \"5\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(StreamFamilyTest, XTrimInvalidArgs) {\n  // Missing threshold.\n  auto resp = Run({\"xtrim\", \"foo\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n  resp = Run({\"xtrim\", \"foo\", \"maxlen\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n  resp = Run({\"xtrim\", \"foo\", \"minid\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  // Invalid threshold.\n  resp = Run({\"xtrim\", \"foo\", \"maxlen\", \"nan\"});\n  EXPECT_THAT(resp, ErrArg(\"not an integer or out of range\"));\n  resp = Run({\"xtrim\", \"foo\", \"maxlen\", \"-1\"});\n  EXPECT_THAT(resp, ErrArg(\"not an integer or out of range\"));\n  resp = Run({\"xtrim\", \"foo\", \"minid\", \"nan\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Limit with non-approx.\n  resp = Run({\"xtrim\", \"foo\", \"maxlen\", \"2\", \"limit\", \"5\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Include both maxlen and minid.\n  resp = Run({\"xtrim\", \"foo\", \"maxlen\", \"2\", \"minid\", \"1-1\"});\n  EXPECT_THAT(resp, ErrArg(\"MAXLEN and MINID options at the same time are not compatible\"));\n  resp = Run({\"xtrim\", \"foo\", \"minid\", \"1-1\", \"maxlen\", \"2\"});\n  EXPECT_THAT(resp, ErrArg(\"MAXLEN and MINID options at the same time are not compatible\"));\n\n  // Invalid limit.\n  resp = Run({\"xtrim\", \"foo\", \"maxlen\", \"~\", \"2\", \"limit\", \"nan\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n}\n\nTEST_F(StreamFamilyTest, XTrimWrongSyntax) {\n  auto resp = Run({\"xtrim\", \"-992\", \"k1 \\\"v1\\\" k2 \\\"v2 with spaces\\\" \\\"k3 with spaces\\\" \\\"v3\\\"\",\n                   \"list1 element1\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n}\n\nTEST_F(StreamFamilyTest, XPending) {\n  Run({\"xadd\", \"foo\", \"1-0\", \"k1\", \"v1\"});\n  Run({\"xadd\", \"foo\", \"1-1\", \"k2\", \"v2\"});\n  Run({\"xadd\", \"foo\", \"1-2\", \"k3\", \"v3\"});\n\n  // create a group for foo stream\n  Run({\"xgroup\", \"create\", \"foo\", \"group\", \"0\"});\n  // alice consume all the stream entries\n  Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \">\"});\n  // bob doesn't have pending entries\n  Run({\"xgroup\", \"createconsumer\", \"foo\", \"group\", \"bob\"});\n\n  // XPending should print 4 entries\n  auto resp = Run({\"xpending\", \"foo\", \"group\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\n                        IntArg(3), \"1-0\", \"1-2\",\n                        RespArray(ElementsAre(RespArray(ElementsAre(\"alice\", IntArg(3))))))));\n\n  resp = Run({\"xpending\", \"foo\", \"group\", \"-\", \"+\", \"10\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\n                  RespArray(ElementsAre(\"1-0\", \"alice\", ArgType(RespExpr::INT64), IntArg(1))),\n                  RespArray(ElementsAre(\"1-1\", \"alice\", ArgType(RespExpr::INT64), IntArg(1))),\n                  RespArray(ElementsAre(\"1-2\", \"alice\", ArgType(RespExpr::INT64), IntArg(1))))));\n\n  // only return a single entry\n  resp = Run({\"xpending\", \"foo\", \"group\", \"-\", \"+\", \"1\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"1-0\", \"alice\", ArgType(RespExpr::INT64), IntArg(1)));\n\n  // Bob read a new entry\n  Run({\"xadd\", \"foo\", \"1-3\", \"k4\", \"v4\"});\n  Run({\"xreadgroup\", \"group\", \"group\", \"bob\", \"streams\", \"foo\", \">\"});\n  // Bob now has` an entry in his pending list\n  resp = Run({\"xpending\", \"foo\", \"group\", \"-\", \"+\", \"10\", \"bob\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"1-3\", \"bob\", ArgType(RespExpr::INT64), IntArg(1)));\n\n  Run({\"xadd\", \"foo\", \"1-4\", \"k5\", \"v5\"});\n  TEST_current_time_ms = 100;\n  Run({\"xreadgroup\", \"group\", \"group\", \"bob\", \"streams\", \"foo\", \">\"});\n  TEST_current_time_ms += 3000;\n\n  // min-idle-time is exceeding the delivery time of last inserted entry\n  resp = Run({\"xpending\", \"foo\", \"group\", \"IDLE\", \"4000\", \"-\", \"+\", \"10\"});\n  EXPECT_THAT(resp, ArrLen(0));\n}\n\nTEST_F(StreamFamilyTest, XPendingMissingGroup) {\n  auto resp = Run({\"xpending\", \"?\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n}\n\nTEST_F(StreamFamilyTest, XReadGroupEmptyConsumer) {\n  Run({\"xadd\", \"s\", \"*\", \"x\", \"y\"});\n  Run({\"xgroup\", \"create\", \"s\", \"g\", \"0\"});\n  auto resp = Run({\"xreadgroup\", \"group\", \"g\", \"\", \"streams\", \"s\", \">\"});\n  EXPECT_THAT(resp, ErrArg(\"consumer name can't be empty\"));\n}\n\nTEST_F(StreamFamilyTest, XPendingInvalidArgs) {\n  Run({\"xadd\", \"foo\", \"1-0\", \"k1\", \"v1\"});\n  Run({\"xadd\", \"foo\", \"1-1\", \"k2\", \"v2\"});\n\n  auto resp = Run({\"xpending\", \"unknown\", \"group\"});\n  EXPECT_THAT(resp, ErrArg(\"no such key\"));\n\n  // group doesn't exist\n  resp = Run({\"xpending\", \"foo\", \"group\"});\n  EXPECT_THAT(resp, ErrArg(\"NOGROUP\"));\n\n  Run({\"xgroup\", \"create\", \"foo\", \"group\", \"0\"});\n  // start end count not provided\n  resp = Run({\"xpending\", \"foo\", \"group\", \"IDLE\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  // count not provided\n  resp = Run({\"xpending\", \"foo\", \"group\", \"-\", \"+\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n}\n\nTEST_F(StreamFamilyTest, XPendingEmpty) {\n  Run({\"XADD\", \"stream\", \"*\", \"foo\", \"bar\"});\n  Run({\"XADD\", \"stream\", \"*\", \"foo\", \"bar\"});\n  Run({\"XGROUP\", \"CREATE\", \"stream\", \"group\", \"0\"});\n  auto resp = Run({\"XPENDING\", \"stream\", \"group\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(IntArg(0), kMatchNil, kMatchNil, kMatchNil)));\n}\n\nTEST_F(StreamFamilyTest, XAck) {\n  Run({\"xadd\", \"foo\", \"1-0\", \"k0\", \"v0\"});\n  Run({\"xadd\", \"foo\", \"1-1\", \"k1\", \"v1\"});\n  Run({\"xadd\", \"foo\", \"1-2\", \"k2\", \"v2\"});\n  Run({\"xadd\", \"foo\", \"1-3\", \"k3\", \"v3\"});\n  Run({\"xgroup\", \"create\", \"foo\", \"cgroup\", \"0\"});\n  Run({\"xreadgroup\", \"group\", \"cgroup\", \"consumer\", \"count\", \"4\", \"streams\", \"foo\", \">\"});\n\n  // PEL of cgroup now has 4 messages.\n  // Acknowledge a message that exists.\n  auto resp = Run({\"xack\", \"foo\", \"cgroup\", \"1-0\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // acknowledge a message from non-existing stream.\n  resp = Run({\"xack\", \"nosuchstream\", \"cgroup\", \"1-0\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // acknowledge a message for a non-existing consumer group.\n  resp = Run({\"xack\", \"foo\", \"nosuchcgroup\", \"1-0\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // Verifies message id 1-0 gets removed from PEL.\n  resp = Run({\"xreadgroup\", \"group\", \"cgroup\", \"consumer\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\n                  \"foo\", RespArray(ElementsAre(\n                             RespArray(ElementsAre(\"1-1\", RespArray(ElementsAre(\"k1\", \"v1\")))),\n                             RespArray(ElementsAre(\"1-2\", RespArray(ElementsAre(\"k2\", \"v2\")))),\n                             RespArray(ElementsAre(\"1-3\", RespArray(ElementsAre(\"k3\", \"v3\")))))))));\n\n  // acknowledge a message that doesn't exist\n  resp = Run({\"xack\", \"foo\", \"cgroup\", \"1-9\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // Verifies no message gets removed from PEL.\n  resp = Run({\"xreadgroup\", \"group\", \"cgroup\", \"consumer\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\n                  \"foo\", RespArray(ElementsAre(\n                             RespArray(ElementsAre(\"1-1\", RespArray(ElementsAre(\"k1\", \"v1\")))),\n                             RespArray(ElementsAre(\"1-2\", RespArray(ElementsAre(\"k2\", \"v2\")))),\n                             RespArray(ElementsAre(\"1-3\", RespArray(ElementsAre(\"k3\", \"v3\")))))))));\n\n  // acknowledge another message that exists and one non-existing message.\n  resp = Run({\"xack\", \"foo\", \"cgroup\", \"1-3\", \"1-9\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  // Verifies only \"1-3\" gets removed from PEL.\n  resp = Run({\"xreadgroup\", \"group\", \"cgroup\", \"consumer\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\n                  \"foo\", RespArray(ElementsAre(\n                             RespArray(ElementsAre(\"1-1\", RespArray(ElementsAre(\"k1\", \"v1\")))),\n                             RespArray(ElementsAre(\"1-2\", RespArray(ElementsAre(\"k2\", \"v2\")))))))));\n\n  // acknowledge all the existing messages left.\n  resp = Run({\"xack\", \"foo\", \"cgroup\", \"1-1\", \"1-2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  // Verifies that PEL is empty.\n  resp = Run({\"xreadgroup\", \"group\", \"cgroup\", \"consumer\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"foo\", ArrLen(0))));\n}\n\nTEST_F(StreamFamilyTest, XInfoGroups) {\n  Run({\"del\", \"mystream\"});\n  Run({\"xgroup\", \"create\", \"mystream\", \"mygroup\", \"$\", \"MKSTREAM\"});\n\n  // non-existent-stream\n  auto resp = Run({\"xinfo\", \"groups\", \"non-existent-stream\"});\n  EXPECT_THAT(resp, ErrArg(\"no such key\"));\n\n  // group with no consumers\n  resp = Run({\"xinfo\", \"groups\", \"mystream\"});\n  EXPECT_THAT(resp, ArrLen(12));\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(\"name\", \"mygroup\", \"consumers\", IntArg(0), \"pending\", IntArg(0),\n                          \"last-delivered-id\", \"0-0\", \"entries-read\", kMatchNil, \"lag\", IntArg(0)));\n\n  // group with multiple consumers\n  Run({\"xgroup\", \"createconsumer\", \"mystream\", \"mygroup\", \"consumer1\"});\n  Run({\"xgroup\", \"createconsumer\", \"mystream\", \"mygroup\", \"consumer2\"});\n  resp = Run({\"xinfo\", \"groups\", \"mystream\"});\n  EXPECT_THAT(resp, ArrLen(12));\n  EXPECT_THAT(resp.GetVec()[3], IntArg(2));\n\n  // group with lag\n  Run({\"xadd\", \"mystream\", \"1-0\", \"test-field-1\", \"test-value-1\"});\n  Run({\"xadd\", \"mystream\", \"2-0\", \"test-field-2\", \"test-value-2\"});\n  resp = Run({\"xinfo\", \"groups\", \"mystream\"});\n  EXPECT_THAT(resp.GetVec()[11], IntArg(2));\n  EXPECT_THAT(resp.GetVec()[7], \"0-0\");\n\n  // group with no lag, before ack\n  Run({\"xreadgroup\", \"group\", \"mygroup\", \"consumer1\", \"STREAMS\", \"mystream\", \">\"});\n  resp = Run({\"xinfo\", \"groups\", \"mystream\"});\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(\"name\", \"mygroup\", \"consumers\", IntArg(2), \"pending\", IntArg(2),\n                          \"last-delivered-id\", \"2-0\", \"entries-read\", IntArg(2), \"lag\", IntArg(0)));\n\n  // after ack\n  Run({\"xack\", \"mystream\", \"mygroup\", \"1-0\"});\n  Run({\"xack\", \"mystream\", \"mygroup\", \"2-0\"});\n  resp = Run({\"xinfo\", \"groups\", \"mystream\"});\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(\"name\", \"mygroup\", \"consumers\", IntArg(2), \"pending\", IntArg(0),\n                          \"last-delivered-id\", \"2-0\", \"entries-read\", IntArg(2), \"lag\", IntArg(0)));\n}\n\nTEST_F(StreamFamilyTest, XInfoConsumers) {\n  Run({\"del\", \"mystream\"});\n  Run({\"xgroup\", \"create\", \"mystream\", \"mygroup\", \"$\", \"MKSTREAM\"});\n\n  // no consumer\n  auto resp = Run({\"xinfo\", \"consumers\", \"mystream\", \"mygroup\"});\n  EXPECT_THAT(resp, ArrLen(0));\n\n  // invalid key\n  resp = Run({\"xinfo\", \"consumers\", \"non-existent-stream\", \"mygroup\"});\n  EXPECT_THAT(resp, ErrArg(\"no such key\"));\n\n  // invalid group\n  resp = Run({\"xinfo\", \"consumers\", \"mystream\", \"non-existent-group\"});\n  EXPECT_THAT(resp, ErrArg(\"NOGROUP\"));\n\n  Run({\"xgroup\", \"createconsumer\", \"mystream\", \"mygroup\", \"first-consumer\"});\n  Run({\"xgroup\", \"createconsumer\", \"mystream\", \"mygroup\", \"second-consumer\"});\n  resp = Run({\"xinfo\", \"consumers\", \"mystream\", \"mygroup\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec()[0], ArrLen(8));\n  EXPECT_THAT(resp.GetVec()[1], ArrLen(8));\n  EXPECT_THAT(resp.GetVec()[0].GetVec()[1], \"first-consumer\");\n  EXPECT_THAT(resp.GetVec()[1].GetVec()[1], \"second-consumer\");\n\n  Run({\"xadd\", \"mystream\", \"1-0\", \"test-field-1\", \"test-value-1\"});\n  Run({\"xreadgroup\", \"group\", \"mygroup\", \"consumer1\", \"STREAMS\", \"mystream\", \">\"});\n  resp = Run({\"xinfo\", \"consumers\", \"mystream\", \"mygroup\"});\n  // pending for first-consumer\n  EXPECT_THAT(resp.GetVec()[0].GetVec()[3], IntArg(1));\n  // pending for second-consumer\n  EXPECT_THAT(resp.GetVec()[1].GetVec()[3], IntArg(0));\n}\n\nTEST_F(StreamFamilyTest, XAutoClaim) {\n  Run({\"xadd\", \"foo\", \"1-0\", \"k1\", \"v1\"});\n  Run({\"xadd\", \"foo\", \"1-1\", \"k2\", \"v2\"});\n  Run({\"xadd\", \"foo\", \"1-2\", \"k3\", \"v3\"});\n  Run({\"xadd\", \"foo\", \"1-3\", \"k4\", \"v4\"});\n\n  // create a group for foo stream\n  Run({\"xgroup\", \"create\", \"foo\", \"group\", \"0\"});\n  // alice consume all the stream entries\n  Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \">\"});\n\n  // bob claims alice's two pending stream entries\n  // testing the mandatory command options.\n  auto resp = Run({\"xautoclaim\", \"foo\", \"group\", \"bob\", \"0\", \"1-2\"});\n  EXPECT_THAT(\n      resp,\n      RespArray(ElementsAre(\n          \"0-0\",\n          RespArray(ElementsAre(RespArray(ElementsAre(\"1-2\", RespArray(ElementsAre(\"k3\", \"v3\")))),\n                                RespArray(ElementsAre(\"1-3\", RespArray(ElementsAre(\"k4\", \"v4\")))))),\n          RespArray(ElementsAre()))));\n\n  // bob really has these claimed entries\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"bob\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\n                  \"foo\", RespArray(ElementsAre(\n                             RespArray(ElementsAre(\"1-2\", RespArray(ElementsAre(\"k3\", \"v3\")))),\n                             RespArray(ElementsAre(\"1-3\", RespArray(ElementsAre(\"k4\", \"v4\")))))))));\n\n  // alice no longer have those entries\n  resp = Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \"0\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\n                  \"foo\", RespArray(ElementsAre(\n                             RespArray(ElementsAre(\"1-0\", RespArray(ElementsAre(\"k1\", \"v1\")))),\n                             RespArray(ElementsAre(\"1-1\", RespArray(ElementsAre(\"k2\", \"v2\")))))))));\n\n  // xautoclaim ensures that entries before the min-idle-time are not claimed by bob\n  resp = Run({\"xautoclaim\", \"foo\", \"group\", \"bob\", \"3600000\", \"0-0\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\"0-0\", RespArray(ElementsAre()), RespArray(ElementsAre()))));\n\n  Run({\"xadd\", \"foo\", \"1-4\", \"k5\", \"v5\"});\n  Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \">\"});\n  // xautoclaim returns only claimed ids when justid is set\n  resp = Run({\"xautoclaim\", \"foo\", \"group\", \"bob\", \"0\", \"0-0\", \"justid\"});\n  EXPECT_THAT(\n      resp, RespArray(ElementsAre(\"0-0\", RespArray(ElementsAre(\"1-0\", \"1-1\", \"1-2\", \"1-3\", \"1-4\")),\n                                  RespArray(ElementsAre()))));\n\n  Run({\"xadd\", \"foo\", \"1-5\", \"k6\", \"v6\"});\n  Run({\"xadd\", \"foo\", \"1-6\", \"k7\", \"v7\"});\n  Run({\"xreadgroup\", \"group\", \"group\", \"alice\", \"streams\", \"foo\", \">\"});\n  // test count and end_id\n  resp = Run({\"xautoclaim\", \"foo\", \"group\", \"bob\", \"0\", \"1-5\", \"count\", \"1\", \"justid\"});\n  EXPECT_THAT(\n      resp, RespArray(ElementsAre(\"1-6\", RespArray(ElementsAre(\"1-5\")), RespArray(ElementsAre()))));\n\n  resp = Run({\"xautoclaim\", \"foo\", \"group\", \"bob\", \"0\", \"1-6\", \"count\", \"1\", \"justid\"});\n  EXPECT_THAT(\n      resp, RespArray(ElementsAre(\"0-0\", RespArray(ElementsAre(\"1-6\")), RespArray(ElementsAre()))));\n\n  resp = Run({\"xautoclaim\", \"foo\", \"group\", \"bob\", \"0\", \"1-10\", \"count\", \"1\", \"justid\"});\n  EXPECT_THAT(resp,\n              RespArray(ElementsAre(\"0-0\", RespArray(ElementsAre()), RespArray(ElementsAre()))));\n\n  // if a message being claimed is deleted, it should be listed separately.\n  Run({\"xdel\", \"foo\", \"1-2\", \"1-4\"});\n  resp = Run({\"xautoclaim\", \"foo\", \"group\", \"alice\", \"0\", \"0-0\", \"justid\"});\n  EXPECT_THAT(\n      resp, RespArray(ElementsAre(\"0-0\", RespArray(ElementsAre(\"1-0\", \"1-1\", \"1-3\", \"1-5\", \"1-6\")),\n                                  RespArray(ElementsAre(\"1-2\", \"1-4\")))));\n}\n\nTEST_F(StreamFamilyTest, XInfoStream) {\n  Run({\"del\", \"mystream\"});\n  Run({\"xgroup\", \"create\", \"mystream\", \"mygroup\", \"$\", \"MKSTREAM\"});\n  Run({\"xgroup\", \"createconsumer\", \"mystream\", \"mygroup\", \"first-consumer\"});\n\n  // invalid key\n  auto resp = Run({\"xinfo\", \"stream\", \"non-existent-stream\"});\n  EXPECT_THAT(resp, ErrArg(\"no such key\"));\n\n  // invalid args\n  resp = Run({\"xinfo\", \"stream\", \"mystream\", \"extra-arg\"});\n  EXPECT_THAT(\n      resp,\n      ErrArg(\"unknown subcommand or wrong number of arguments for 'STREAM'. Try XINFO HELP.\"));\n  resp = Run({\"xinfo\", \"stream\", \"mystream\", \"full\", \"count\"});\n  EXPECT_THAT(\n      resp,\n      ErrArg(\"unknown subcommand or wrong number of arguments for 'STREAM'. Try XINFO HELP.\"));\n  resp = Run({\"xinfo\", \"stream\", \"mystream\", \"full\", \"count\", \"a\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  // no message in stream\n  resp = Run({\"xinfo\", \"stream\", \"mystream\"});\n  EXPECT_THAT(resp, ArrLen(20));\n  EXPECT_THAT(\n      resp.GetVec(),\n      ElementsAre(\"length\", IntArg(0), \"radix-tree-keys\", IntArg(0), \"radix-tree-nodes\", IntArg(1),\n                  \"last-generated-id\", \"0-0\", \"max-deleted-entry-id\", \"0-0\", \"entries-added\",\n                  IntArg(0), \"recorded-first-entry-id\", \"0-0\", \"groups\", IntArg(1), \"first-entry\",\n                  ArgType(RespExpr::NIL_ARRAY), \"last-entry\", ArgType(RespExpr::NIL_ARRAY)));\n\n  Run({\"xadd\", \"mystream\", \"1-1\", \"message\", \"one\"});\n  Run({\"xadd\", \"mystream\", \"2-1\", \"message\", \"two\"});\n  Run({\"xadd\", \"mystream\", \"3-1\", \"message\", \"three\"});\n  Run({\"xadd\", \"mystream\", \"4-1\", \"message\", \"four\"});\n  Run({\"xadd\", \"mystream\", \"5-1\", \"message\", \"five\"});\n  Run({\"xadd\", \"mystream\", \"6-1\", \"message\", \"six\"});\n  Run({\"xadd\", \"mystream\", \"7-1\", \"message\", \"seven\"});\n  Run({\"xadd\", \"mystream\", \"8-1\", \"message\", \"eight\"});\n  Run({\"xadd\", \"mystream\", \"9-1\", \"message\", \"nine\"});\n  Run({\"xadd\", \"mystream\", \"10-1\", \"message\", \"ten\"});\n  Run({\"xadd\", \"mystream\", \"11-1\", \"message\", \"eleven\"});\n  resp = Run({\"xinfo\", \"stream\", \"mystream\"});\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(\"length\", IntArg(11), \"radix-tree-keys\", IntArg(1), \"radix-tree-nodes\",\n                          IntArg(2), \"last-generated-id\", \"11-1\", \"max-deleted-entry-id\", \"0-0\",\n                          \"entries-added\", IntArg(11), \"recorded-first-entry-id\", \"1-1\", \"groups\",\n                          IntArg(1), \"first-entry\", ArrLen(2), \"last-entry\", ArrLen(2)));\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0], \"1-1\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[1].GetVec(), ElementsAre(\"message\", \"one\"));\n  EXPECT_THAT(resp.GetVec()[19].GetVec()[0], \"11-1\");\n  EXPECT_THAT(resp.GetVec()[19].GetVec()[1].GetVec(), ElementsAre(\"message\", \"eleven\"));\n\n  // full - default\n  resp = Run({\"xinfo\", \"stream\", \"mystream\", \"full\"});\n  EXPECT_THAT(resp, ArrLen(18));\n  EXPECT_THAT(resp.GetVec()[15], ArrLen(10));\n  EXPECT_THAT(resp.GetVec()[17], ArrLen(1));\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0], ArrLen(14));\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(\"length\", IntArg(11), \"radix-tree-keys\", IntArg(1), \"radix-tree-nodes\",\n                          IntArg(2), \"last-generated-id\", \"11-1\", \"max-deleted-entry-id\", \"0-0\",\n                          \"entries-added\", IntArg(11), \"recorded-first-entry-id\", \"1-1\", \"entries\",\n                          ArrLen(10), \"groups\", ArrLen(1)));\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"mygroup\", \"last-delivered-id\", \"0-0\", \"entries-read\", kMatchNil,\n                          \"lag\", IntArg(11), \"pel-count\", IntArg(0), \"pending\", ArrLen(0),\n                          \"consumers\", ArrLen(1)));\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec()[13].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"first-consumer\", \"seen-time\", ArgType(RespExpr::INT64),\n                          \"active-time\", IntArg(-1), \"pel-count\", IntArg(0), \"pending\", ArrLen(0)));\n\n  // full with count less than number of messages in stream\n  resp = Run({\"xinfo\", \"stream\", \"mystream\", \"full\", \"count\", \"5\"});\n  EXPECT_THAT(resp.GetVec()[15], ArrLen(5));\n\n  // full with count exceeding number of messages in stream\n  resp = Run({\"xinfo\", \"stream\", \"mystream\", \"full\", \"count\", \"12\"});\n  EXPECT_THAT(resp.GetVec()[15], ArrLen(11));\n\n  // full - all messages\n  resp = Run({\"xinfo\", \"stream\", \"mystream\", \"full\", \"count\", \"0\"});\n  EXPECT_THAT(resp.GetVec()[15], ArrLen(11));\n\n  // read message\n  Run({\"xreadgroup\", \"group\", \"mygroup\", \"first-consumer\", \"STREAMS\", \"mystream\", \">\"});\n  resp = Run({\"xinfo\", \"stream\", \"mystream\", \"full\", \"count\", \"0\"});\n  EXPECT_THAT(resp.GetVec()[15], ArrLen(11));\n  // group\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec()[5], IntArg(11));   // entries-read\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec()[7], IntArg(0));    // lag\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec()[9], IntArg(11));   // pel-count\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec()[11], ArrLen(11));  // pending list\n  // consumer\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec()[13].GetVec()[0].GetVec()[7],\n              IntArg(11));  // pel-count\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec()[13].GetVec()[0].GetVec()[9],\n              ArrLen(11));  // pending list\n\n  // delete message\n  Run({\"xdel\", \"mystream\", \"1-1\"});\n  resp = Run({\"xinfo\", \"stream\", \"mystream\"});\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(\"length\", IntArg(10), \"radix-tree-keys\", IntArg(1), \"radix-tree-nodes\",\n                          IntArg(2), \"last-generated-id\", \"11-1\", \"max-deleted-entry-id\", \"1-1\",\n                          \"entries-added\", IntArg(11), \"recorded-first-entry-id\", \"2-1\", \"groups\",\n                          IntArg(1), \"first-entry\", ArrLen(2), \"last-entry\", ArrLen(2)));\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0], \"2-1\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[1].GetVec(), ElementsAre(\"message\", \"two\"));\n  EXPECT_THAT(resp.GetVec()[19].GetVec()[0], \"11-1\");\n  EXPECT_THAT(resp.GetVec()[19].GetVec()[1].GetVec(), ElementsAre(\"message\", \"eleven\"));\n\n  resp = Run({\"xinfo\", \"stream\", \"mystream\", \"full\", \"count\", \"0\"});\n  EXPECT_THAT(resp.GetVec()[15], ArrLen(10));\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(\"length\", IntArg(10), \"radix-tree-keys\", IntArg(1), \"radix-tree-nodes\",\n                          IntArg(2), \"last-generated-id\", \"11-1\", \"max-deleted-entry-id\", \"1-1\",\n                          \"entries-added\", IntArg(11), \"recorded-first-entry-id\", \"2-1\", \"entries\",\n                          ArrLen(10), \"groups\", ArrLen(1)));\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"mygroup\", \"last-delivered-id\", \"11-1\", \"entries-read\",\n                          IntArg(11), \"lag\", IntArg(0), \"pel-count\", IntArg(11), \"pending\",\n                          ArrLen(11), \"consumers\", ArrLen(1)));\n  EXPECT_THAT(\n      resp.GetVec()[17].GetVec()[0].GetVec()[13].GetVec()[0].GetVec(),\n      ElementsAre(\"name\", \"first-consumer\", \"seen-time\", ArgType(RespExpr::INT64), \"active-time\",\n                  ArgType(RespExpr::INT64), \"pel-count\", IntArg(11), \"pending\", ArrLen(11)));\n}\n\nTEST_F(StreamFamilyTest, AutoClaimPelItemsFromAnotherConsumer) {\n  auto resp = Run({\"xadd\", \"mystream\", \"*\", \"a\", \"1\"});\n  string id1 = resp.GetString();\n  resp = Run({\"xadd\", \"mystream\", \"*\", \"b\", \"2\"});\n  string id2 = resp.GetString();\n  resp = Run({\"xadd\", \"mystream\", \"*\", \"c\", \"3\"});\n  string id3 = resp.GetString();\n  resp = Run({\"xadd\", \"mystream\", \"*\", \"d\", \"4\"});\n  string id4 = resp.GetString();\n\n  Run({\"XGROUP\", \"CREATE\", \"mystream\", \"mygroup\", \"0\"});\n\n  // Consumer 1 reads item 1 from the stream without acknowledgements.\n  // Consumer 2 then claims pending item 1 from the PEL of consumer 1\n  resp = Run(\n      {\"XREADGROUP\", \"GROUP\", \"mygroup\", \"consumer1\", \"COUNT\", \"1\", \"STREAMS\", \"mystream\", \">\"});\n\n  auto match_a1 = RespElementsAre(\"a\", \"1\");\n  ASSERT_THAT(resp, RespElementsAre(\"mystream\", RespElementsAre(RespElementsAre(id1, match_a1))));\n\n  AdvanceTime(200);  // Advance time to greater time than the idle time in the autoclaim (10)\n  resp = Run({\"XAUTOCLAIM\", \"mystream\", \"mygroup\", \"consumer2\", \"10\", \"-\", \"COUNT\", \"1\"});\n\n  EXPECT_THAT(resp, RespElementsAre(\"0-0\", ArrLen(1), ArrLen(0)));\n  EXPECT_THAT(resp.GetVec()[1], RespElementsAre(RespElementsAre(id1, match_a1)));\n\n  Run({\"XREADGROUP\", \"GROUP\", \"mygroup\", \"consumer1\", \"COUNT\", \"3\", \"STREAMS\", \"mystream\", \">\"});\n  AdvanceTime(200);\n\n  // Delete item 2 from the stream.Now consumer 1 has PEL that contains\n  // only item 3. Try to use consumer 2 to claim the deleted item 2\n  // from the PEL of consumer 1, this should return nil\n  resp = Run({\"XDEL\", \"mystream\", id2});\n  ASSERT_THAT(resp, IntArg(1));\n\n  // id1 and id3 are self - claimed here but not id2('count' was set to 3)\n  // we make sure id2 is indeed skipped(the cursor points to id4)\n  resp = Run({\"XAUTOCLAIM\", \"mystream\", \"mygroup\", \"consumer2\", \"10\", \"-\", \"COUNT\", \"3\"});\n  auto match_id1_a1 = RespElementsAre(id1, match_a1);\n  auto match_id3_c3 = RespElementsAre(id3, RespElementsAre(\"c\", \"3\"));\n  ASSERT_THAT(resp, RespElementsAre(id4, RespElementsAre(match_id1_a1, match_id3_c3),\n                                    RespElementsAre(id2)));\n  // Delete item 3 from the stream.Now consumer 1 has PEL that is empty.\n  // Try to use consumer 2 to claim the deleted item 3 from the PEL\n  // of consumer 1, this should return nil\n  AdvanceTime(200);\n\n  ASSERT_THAT(Run({\"XDEL\", \"mystream\", id4}), IntArg(1));\n\n  // id1 and id3 are self - claimed here but not id2 and id4('count' is default 100)\n  // we also test the JUSTID modifier here.note that, when using JUSTID,\n  // deleted entries are returned in reply(consistent with XCLAIM).\n  resp = Run({\"XAUTOCLAIM\", \"mystream\", \"mygroup\", \"consumer2\", \"10\", \"-\", \"JUSTID\"});\n  ASSERT_THAT(resp, RespElementsAre(\"0-0\", RespElementsAre(id1, id3), RespElementsAre(id4)));\n}\n\nTEST_F(StreamFamilyTest, AutoClaimDelCount) {\n  Run({\"xadd\", \"x\", \"1-0\", \"f\", \"v\"});\n  Run({\"xadd\", \"x\", \"2-0\", \"f\", \"v\"});\n  Run({\"xadd\", \"x\", \"3-0\", \"f\", \"v\"});\n  Run({\"XGROUP\", \"CREATE\", \"x\", \"grp\", \"0\"});\n  auto resp = Run({\"XREADGROUP\", \"GROUP\", \"grp\", \"Alice\", \"STREAMS\", \"x\", \">\"});\n\n  auto m1 = RespElementsAre(\"1-0\", _);\n  auto m2 = RespElementsAre(\"2-0\", _);\n  auto m3 = RespElementsAre(\"3-0\", _);\n  EXPECT_THAT(resp, RespElementsAre(\"x\", RespElementsAre(m1, m2, m3)));\n\n  EXPECT_THAT(Run({\"XDEL\", \"x\", \"1-0\"}), IntArg(1));\n  EXPECT_THAT(Run({\"XDEL\", \"x\", \"2-0\"}), IntArg(1));\n\n  resp = Run({\"XAUTOCLAIM\", \"x\", \"grp\", \"Bob\", \"0\", \"0-0\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, RespElementsAre(\"2-0\", ArrLen(0), RespElementsAre(\"1-0\")));\n\n  resp = Run({\"XAUTOCLAIM\", \"x\", \"grp\", \"Bob\", \"0\", \"2-0\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, RespElementsAre(\"3-0\", ArrLen(0), RespElementsAre(\"2-0\")));\n\n  resp = Run({\"XAUTOCLAIM\", \"x\", \"grp\", \"Bob\", \"0\", \"3-0\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, RespElementsAre(\n                        \"0-0\", RespElementsAre(RespElementsAre(\"3-0\", RespElementsAre(\"f\", \"v\"))),\n                        ArrLen(0)));\n  resp = Run({\"xpending\", \"x\", \"grp\", \"-\", \"+\", \"10\", \"Alice\"});\n  EXPECT_THAT(resp, ArrLen(0));\n\n  resp = Run({\"XAUTOCLAIM\", \"x\", \"grp\", \"Bob\", \"0\", \"3-0\", \"COUNT\", \"704505322\"});\n  EXPECT_THAT(resp, ErrArg(\"COUNT\"));\n}\n\nTEST_F(StreamFamilyTest, XAddMaxSeq) {\n  Run({\"XADD\", \"x\", \"1-18446744073709551615\", \"f1\", \"v1\"});\n  auto resp = Run({\"XADD\", \"x\", \"1-*\", \"f2\", \"v2\"});\n  EXPECT_THAT(resp, ErrArg(\"The ID specified in XADD is equal or smaller\"));\n}\n\nTEST_F(StreamFamilyTest, XsetIdSmallerMaxDeleted) {\n  Run({\"XADD\", \"x\", \"1-1\", \"a\", \"1\"});\n  Run({\"XADD\", \"x\", \"1-2\", \"b\", \"2\"});\n  Run({\"XADD\", \"x\", \"1-3\", \"c\", \"3\"});\n  Run({\"XDEL\", \"x\", \"1-2\"});\n  Run({\"XDEL\", \"x\", \"1-3\"});\n  auto resp = Run({\"XINFO\", \"stream\", \"x\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  auto vec = resp.GetVec();\n  string max_del_id;\n  for (unsigned i = 0; i < vec.size(); i += 2) {\n    if (vec[i] == \"max-deleted-entry-id\") {\n      max_del_id = vec[i + 1].GetString();\n      break;\n    }\n  }\n  EXPECT_EQ(max_del_id, \"1-3\");\n\n  resp = Run({\"XSETID\", \"x\", \"1-2\"});\n  ASSERT_THAT(resp, ErrArg(\"smaller\"));\n}\n\nTEST_F(StreamFamilyTest, SeenActiveTime) {\n  TEST_current_time_ms = 1000;\n\n  Run({\"XGROUP\", \"CREATE\", \"mystream\", \"mygroup\", \"$\", \"MKSTREAM\"});\n  Run({\"XREADGROUP\", \"GROUP\", \"mygroup\", \"Alice\", \"COUNT\", \"1\", \"STREAMS\", \"mystream\", \">\"});\n  AdvanceTime(100);\n  auto resp = Run({\"xinfo\", \"consumers\", \"mystream\", \"mygroup\"});\n  EXPECT_THAT(resp, RespElementsAre(\"name\", \"Alice\", \"pending\", IntArg(0), \"idle\", IntArg(100),\n                                    \"inactive\", IntArg(-1)));\n\n  Run({\"XADD\", \"mystream\", \"*\", \"f\", \"v\"});\n  Run({\"XREADGROUP\", \"GROUP\", \"mygroup\", \"Alice\", \"COUNT\", \"1\", \"STREAMS\", \"mystream\", \">\"});\n  AdvanceTime(50);\n\n  resp = Run({\"xinfo\", \"consumers\", \"mystream\", \"mygroup\"});\n  EXPECT_THAT(resp, RespElementsAre(\"name\", \"Alice\", \"pending\", IntArg(1), \"idle\", IntArg(50),\n                                    \"inactive\", IntArg(50)));\n  AdvanceTime(100);\n  resp = Run({\"XREADGROUP\", \"GROUP\", \"mygroup\", \"Alice\", \"COUNT\", \"1\", \"STREAMS\", \"mystream\", \">\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));\n\n  resp = Run({\"xinfo\", \"consumers\", \"mystream\", \"mygroup\"});\n\n  // Idle is 0 because XREADGROUP just run, but inactive continues clocking because nothing was\n  // read.\n  EXPECT_THAT(resp, RespElementsAre(\"name\", \"Alice\", \"pending\", IntArg(1), \"idle\", IntArg(0),\n                                    \"inactive\", IntArg(150)));\n\n  // Serialize/deserialize.\n  resp = Run({\"XINFO\", \"STREAM\", \"mystream\", \"FULL\"});\n  auto groups = resp.GetVec()[17];\n  auto consumers = groups.GetVec()[0].GetVec()[13].GetVec()[0];\n  EXPECT_THAT(consumers, RespElementsAre(\"name\", \"Alice\", \"seen-time\", IntArg(1250), \"active-time\",\n                                         IntArg(1100), \"pel-count\", IntArg(1), \"pending\", _));\n\n  resp = Run({\"DUMP\", \"mystream\"});\n  Run({\"del\", \"mystream\"});\n  resp = Run({\"RESTORE\", \"mystream\", \"0\", resp.GetString()});\n  EXPECT_EQ(resp, \"OK\");\n  resp = Run({\"XINFO\", \"STREAM\", \"mystream\", \"FULL\"});\n  groups = resp.GetVec()[17];\n  consumers = groups.GetVec()[0].GetVec()[13].GetVec()[0];\n  EXPECT_THAT(consumers, RespElementsAre(\"name\", \"Alice\", \"seen-time\", IntArg(1250), \"active-time\",\n                                         IntArg(1100), \"pel-count\", IntArg(1), \"pending\", _));\n}\n\nTEST_F(StreamFamilyTest, XClaimWithNonExistentGroup) {\n  Run({\"xadd\", \"mystream\", \"1-0\", \"field1\", \"value1\"});\n  Run({\"xadd\", \"mystream\", \"1-1\", \"field2\", \"value2\"});\n\n  auto resp = Run({\"xclaim\", \"mystream\", \"nonexistent-group\", \"consumer1\", \"0\", \"1-0\"});\n\n  EXPECT_THAT(resp, ArrLen(0));\n\n  resp = Run({\"xclaim\", \"mystream\", \"nonexistent-group\", \"consumer1\", \"0\", \"1-0\", \"1-1\"});\n  EXPECT_THAT(resp, ArrLen(0));\n\n  resp = Run({\"xclaim\", \"mystream\", \"nonexistent-group\", \"consumer1\", \"0\", \"1-0\", \"justid\"});\n  EXPECT_THAT(resp, ArrLen(0));\n}\n\nTEST_F(StreamFamilyTest, XDelNonExistentId) {\n  string key = R\"(k1 \"v1\" k2 \"v2 with spaces\" \"k3 with spaces\" \"v3\")\";\n  Run({\"XADD\", key, \"0\", \"set1\", \"member1\"});\n\n  // Try to delete a non-existent ID - should not crash (issue #5202)\n  auto resp = Run({\"XDEL\", key, \"46-867\"});\n  EXPECT_THAT(resp, IntArg(0));  // Nothing deleted\n}\n\n// Test consumer group lag when tombstone created after last_id\nTEST_F(StreamFamilyTest, ConsumerGroupLagWithTombstoneAfterLastId) {\n  Run(\"DEL x\");\n  Run(\"XADD x 1-0 data a\");\n  Run(\"XADD x 2-0 data b\");\n  Run(\"XADD x 3-0 data c\");\n  Run(\"XADD x 4-0 data d\");\n  Run(\"XADD x 5-0 data e\");\n  Run(\"XADD x 6-0 data f\");\n  Run(\"XDEL x 3-0\");\n  Run(\"XGROUP CREATE x g1 0\");\n\n  // Read all messages (5 actual entries since 3-0 was deleted, but entries_added is 6)\n  Run(\"XREADGROUP GROUP g1 c11 COUNT 10 STREAMS x >\");\n  auto resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"6-0\", \"entries-read\", IntArg(6),\n                          \"lag\", IntArg(0), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  // Add more messages\n  Run(\"XADD x 7-0 data g\");\n  Run(\"XADD x 8-0 data h\");\n  Run(\"XADD x 9-0 data i\");\n  Run(\"XADD x 10-0 data j\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"6-0\", \"entries-read\", IntArg(6),\n                          \"lag\", IntArg(4), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  // Read 3 more messages (COUNT 3 will read 7-0, 8-0, 9-0)\n  Run(\"XREADGROUP GROUP g1 c11 COUNT 3 STREAMS x >\");\n  Run(\"XDEL x 9-0\");\n  // Now there is a tombstone in the stream after the consumer group last_id\n  // so the lag can't be calculated\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"9-0\", \"entries-read\", IntArg(9),\n                          \"lag\", kMatchNil, \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  // Read one more message to catch up\n  Run(\"XREADGROUP GROUP g1 c12 COUNT 1 STREAMS x >\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"10-0\", \"entries-read\", IntArg(10),\n                          \"lag\", IntArg(0), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n}\n\n// Test consumer group lag with XTRIM\nTEST_F(StreamFamilyTest, ConsumerGroupLagWithXTrim) {\n  Run(\"DEL x\");\n  Run(\"XADD x 1-0 data a\");\n  Run(\"XADD x 2-0 data b\");\n  Run(\"XADD x 3-0 data c\");\n  Run(\"XADD x 4-0 data d\");\n  Run(\"XADD x 5-0 data e\");\n  Run(\"XDEL x 3-0\");\n  Run(\"XGROUP CREATE x g1 0\");\n  Run(\"XGROUP CREATE x g2 0\");\n\n  auto resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"0-0\", \"entries-read\", kMatchNil,\n                          \"lag\", kMatchNil, \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  // Read messages one by one\n  Run(\"XREADGROUP GROUP g1 c11 COUNT 1 STREAMS x >\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"1-0\", \"entries-read\", kMatchNil,\n                          \"lag\", kMatchNil, \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  Run(\"XREADGROUP GROUP g1 c11 COUNT 1 STREAMS x >\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"2-0\", \"entries-read\", kMatchNil,\n                          \"lag\", kMatchNil, \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  Run(\"XREADGROUP GROUP g1 c11 COUNT 1 STREAMS x >\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"4-0\", \"entries-read\", kMatchNil,\n                          \"lag\", kMatchNil, \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  Run(\"XREADGROUP GROUP g1 c11 COUNT 1 STREAMS x >\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"5-0\", \"entries-read\", IntArg(5),\n                          \"lag\", IntArg(0), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  // Add more messages\n  Run(\"XADD x 6-0 data f\");\n  Run(\"XADD x 7-0 data g\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"5-0\", \"entries-read\", IntArg(5),\n                          \"lag\", IntArg(2), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  // XTRIM\n  Run(\"XTRIM x MINID = 7-0\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"5-0\", \"entries-read\", IntArg(5),\n                          \"lag\", IntArg(2), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[1].GetVec(),\n              ElementsAre(\"name\", \"g2\", \"last-delivered-id\", \"0-0\", \"entries-read\", kMatchNil,\n                          \"lag\", IntArg(1), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  // Read all remaining with g1\n  Run(\"XREADGROUP GROUP g1 c11 STREAMS x >\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"7-0\", \"entries-read\", IntArg(7),\n                          \"lag\", IntArg(0), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n}\n\n// Test consumer group lag with XADD trimming\nTEST_F(StreamFamilyTest, ConsumerGroupLagWithXAddTrimming) {\n  Run(\"DEL x\");\n  Run(\"XADD x 1-0 data a\");\n  Run(\"XADD x 2-0 data b\");\n  Run(\"XADD x 3-0 data c\");\n  Run(\"XADD x 4-0 data d\");\n  Run(\"XADD x 5-0 data e\");\n  Run(\"XDEL x 3-0\");\n  Run(\"XGROUP CREATE x g1 0\");\n  Run(\"XGROUP CREATE x g2 0\");\n\n  auto resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"0-0\", \"entries-read\", kMatchNil,\n                          \"lag\", kMatchNil, \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  // Read messages one by one\n  Run(\"XREADGROUP GROUP g1 c11 COUNT 1 STREAMS x >\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"1-0\", \"entries-read\", kMatchNil,\n                          \"lag\", kMatchNil, \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  Run(\"XREADGROUP GROUP g1 c11 COUNT 1 STREAMS x >\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"2-0\", \"entries-read\", kMatchNil,\n                          \"lag\", kMatchNil, \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  Run(\"XREADGROUP GROUP g1 c11 COUNT 1 STREAMS x >\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"4-0\", \"entries-read\", kMatchNil,\n                          \"lag\", kMatchNil, \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  Run(\"XREADGROUP GROUP g1 c11 COUNT 1 STREAMS x >\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"5-0\", \"entries-read\", IntArg(5),\n                          \"lag\", IntArg(0), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  // Add more messages\n  Run(\"XADD x 6-0 data f\");\n  Run(\"XADD x 7-0 data g\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"5-0\", \"entries-read\", IntArg(5),\n                          \"lag\", IntArg(2), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  // XADD with MINID trimming\n  Run(\"XADD x MINID = 7-0 8-0 data h\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"5-0\", \"entries-read\", IntArg(5),\n                          \"lag\", IntArg(3), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[1].GetVec(),\n              ElementsAre(\"name\", \"g2\", \"last-delivered-id\", \"0-0\", \"entries-read\", kMatchNil,\n                          \"lag\", IntArg(2), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n\n  // Read all remaining with g1\n  Run(\"XREADGROUP GROUP g1 c11 STREAMS x >\");\n  resp = Run(\"XINFO STREAM x FULL\");\n  EXPECT_THAT(resp.GetVec()[17].GetVec()[0].GetVec(),\n              ElementsAre(\"name\", \"g1\", \"last-delivered-id\", \"8-0\", \"entries-read\", IntArg(8),\n                          \"lag\", IntArg(0), \"pel-count\", _, \"pending\", _, \"consumers\", _));\n}\n\nTEST_F(StreamFamilyTest, XTrimCrashWithMallocUsedZero) {\n  auto resp = Run(\"xadd mystream 0-0 field1 value1\");\n  EXPECT_THAT(\n      resp, ErrArg(\"The ID specified in XADD is equal or smaller than the target stream top item\"));\n\n  // Without the fix we would have crashed here with check failed MallocUsed() != 0\n  Run(\"XTRIM mystream MAXLEN 0\");\n}\n\nTEST_F(StreamFamilyTest, XReadGroupMultipleStreams) {\n  Run(\"XGROUP CREATE mystream1 mygroup $ MKSTREAM\");\n  Run(\"XGROUP CREATE mystream mygroup $ MKSTREAM\");\n\n  Run(\"XADD mystream 2000-0 field1 value1\");\n  Run(\"XADD mystream 2000-1 field1 value1\");\n  Run(\"XADD mystream 2000-2 field1 value1\");\n\n  Run(\"XADD mystream1 2000-0 field1 value1\");\n  Run(\"XADD mystream1 2000-1 field1 value1\");\n  Run(\"XADD mystream1 2000-2 field1 value1\");\n\n  auto resp = Run(\"XREADGROUP GROUP mygroup myconsumer STREAMS mystream mystream1 > 2000-0\");\n\n  EXPECT_THAT(resp, RespArray(ElementsAre(ArrLen(2), ArrLen(2))));\n\n  const auto& vec = resp.GetVec();\n\n  auto first_stream = vec[0];\n  EXPECT_THAT(first_stream, RespArray(ElementsAre(\"mystream\", ArrLen(3))));\n  auto entries = first_stream.GetVec()[1].GetVec();\n  EXPECT_THAT(entries[0], RespArray(ElementsAre(\"2000-0\", RespElementsAre(\"field1\", \"value1\"))));\n  EXPECT_THAT(entries[1], RespArray(ElementsAre(\"2000-1\", RespElementsAre(\"field1\", \"value1\"))));\n  EXPECT_THAT(entries[2], RespArray(ElementsAre(\"2000-2\", RespElementsAre(\"field1\", \"value1\"))));\n\n  auto second_stream = vec[1];\n  EXPECT_THAT(second_stream, RespArray(ElementsAre(\"mystream1\", ArrLen(0))));\n}\n\nTEST_F(StreamFamilyTest, XGroupSetIdEntriesRead) {\n  Run(\"XGROUP CREATE mystream mygroup $ MKSTREAM\");\n  Run(\"XADD mystream 2000-0 key val\");\n  Run(\"XGROUP SETID mystream mygroup 2000-0 ENTRIESREAD 100\");\n\n  auto resp = Run(\"XINFO GROUPS mystream\");\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"name\", \"mygroup\", \"consumers\", IntArg(0), \"pending\",\n                                         IntArg(0), \"last-delivered-id\", \"2000-0\", \"entries-read\",\n                                         IntArg(100), \"lag\", IntArg(-99)));\n\n  Run(\"XGROUP SETID mystream mygroup 2000-0 ENTRIESREAD -1\");\n  resp = Run(\"XINFO GROUPS mystream\");\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"name\", \"mygroup\", \"consumers\", IntArg(0), \"pending\",\n                                         IntArg(0), \"last-delivered-id\", \"2000-0\", \"entries-read\",\n                                         kMatchNil, \"lag\", IntArg(0)));\n}\n\nTEST_F(StreamFamilyTest, XInfoConsumersArityCrash) {\n  Run(\"XGROUP CREATE mystream mygroup $ MKSTREAM\");\n  auto resp = Run(\"XINFO CONSUMERS mystream\");\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n}\n\nTEST_F(StreamFamilyTest, GroupCreateInvalidIdMemoryTracking) {\n  auto resp = Run({\"xgroup\", \"create\", \"mystream\", \"mygroup\", \"notanumber\", \"MKSTREAM\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Verify the stream was not created (no orphan stream after the error)\n  resp = Run({\"exists\", \"mystream\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(StreamFamilyTest, XAddOnOrphanedStreamMemoryTracking) {\n  auto resp = Run({\"xgroup\", \"create\", \"mystream\", \"mygroup\", \"invalid_id\", \"MKSTREAM\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"xadd\", \"mystream\", \"0-0\", \"field\", \"value\"});\n  EXPECT_THAT(resp, ErrArg(\"equal or smaller\"));\n\n  resp = Run({\"exists\", \"mystream\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\nTEST_F(StreamFamilyTest, XAutoClaimEmptyConsumer) {\n  Run({\"xadd\", \"stream4\", \"*\", \"field\", \"val1\"});\n  Run({\"xgroup\", \"create\", \"stream4\", \"group2\", \"0\"});\n  auto resp = Run({\"xautoclaim\", \"stream4\", \"group2\", \"\", \"0\", \"0-0\"});\n  EXPECT_THAT(resp, AnyOf(ErrArg(\"\"), ArgType(RespExpr::ARRAY)));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/string_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include <absl/container/inlined_vector.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_cat.h>\n\n#include <algorithm>\n#include <array>\n#include <chrono>\n#include <cstdint>\n#include <variant>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"base/stl_util.h\"\n#include \"core/overloaded.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/op_status.h\"\n#include \"facade/reply_builder.h\"\n#include \"facade/reply_capture.h\"\n#include \"redis/redis_aux.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/cmd_support.h\"\n#include \"server/command_families.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/execution_state.h\"\n#include \"server/family_utils.h\"\n#include \"server/generic_family.h\"\n#include \"server/journal/journal.h\"\n#include \"server/search/doc_index.h\"\n#include \"server/table.h\"\n#include \"server/tiered_storage.h\"\n#include \"server/transaction.h\"\n#include \"util/fibers/future.h\"\n\nABSL_FLAG(bool, mget_dedup_keys, false, \"If true, MGET will deduplicate keys\");\n\nnamespace dfly {\n\nnamespace {\n\nusing namespace std;\nusing namespace facade;\nusing namespace util;\n\nusing CI = CommandId;\n\nenum class ExpT { EX, PX, EXAT, PXAT };\n\nconstexpr uint32_t kMaxStrLen = 1 << 28;\n\n// Either immediately available value or tiering future + result\ntemplate <typename T> using TResultOrT = variant<T, TieredStorage::TResult<T>>;\nusing StringResult = TResultOrT<string>;\n\nStringResult ReadString(DbIndex dbid, string_view key, const PrimeValue& pv, EngineShard* es) {\n  return pv.IsExternal() ? StringResult{ReadTieredString(dbid, key, pv, es->tiered_storage())}\n                         : StringResult{pv.ToString()};\n}\n\n// Helper for performing SET operations with various options\nclass SetCmd {\n public:\n  explicit SetCmd(OpArgs op_args, bool explicit_journal)\n      : op_args_(op_args), explicit_journal_{explicit_journal} {\n  }\n\n  enum SetFlags {\n    SET_ALWAYS = 0,\n    SET_IF_NOTEXIST = 1 << 0,     /* NX: Set if key not exists. */\n    SET_IF_EXISTS = 1 << 1,       /* XX: Set if key exists. */\n    SET_KEEP_EXPIRE = 1 << 2,     /* KEEPTTL: Set and keep the ttl */\n    SET_GET = 1 << 3,             /* GET: Set if want to get key before set */\n    SET_EXPIRE_AFTER_MS = 1 << 4, /* EX,PX,EXAT,PXAT: Expire after ms. */\n    SET_STICK = 1 << 5,           /* Set STICK flag */\n  };\n\n  struct SetParams {\n    uint16_t flags = SET_ALWAYS;\n    uint32_t memcache_flags = 0;\n    uint64_t expire_after_ms = 0;  // Relative value based on now. 0 means no expiration.\n    optional<StringResult>* prev_val = nullptr;  // if set, previous value will be stored if found\n    BackPressureFuture* backpressure = nullptr;\n\n    constexpr bool IsConditionalSet() const {\n      return flags & SET_IF_NOTEXIST || flags & SET_IF_EXISTS;\n    }\n  };\n\n  OpStatus Set(const SetParams& params, std::string_view key, std::string_view value);\n\n private:\n  OpStatus SetExisting(const SetParams& params, std::string_view value,\n                       DbSlice::ItAndUpdater* it_upd);\n\n  void AddNew(const SetParams& params, const DbSlice::Iterator& it, std::string_view key,\n              std::string_view value);\n\n  // Called at the end of AddNew of SetExisting\n  void PostEdit(const SetParams& params, std::string_view key, std::string_view value,\n                PrimeValue* pv);\n\n  void RecordJournal(const SetParams& params, std::string_view key, std::string_view value);\n\n  OpStatus CachePrevIfNeeded(const SetParams& params, DbSlice::Iterator it);\n\n  const OpArgs op_args_;\n  bool explicit_journal_;  // call RecordJournal (auto journaling disabled)\n};\n\nsize_t SetRangeInternal(std::string* value, size_t start, std::string_view range) {\n  value->resize(max(value->size(), start + range.size()));\n  memcpy(value->data() + start, range.data(), range.size());\n  return value->size();\n}\n\nOpResult<TResultOrT<size_t>> OpStrLen(const OpArgs& op_args, string_view key) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto it_res = db_slice.FindReadOnly(op_args.db_cntx, key, OBJ_STRING);\n  if (it_res == OpStatus::KEY_NOTFOUND) {\n    return {0u};\n  }\n  RETURN_ON_BAD_STATUS(it_res);\n\n  // For external entries we have to enqueue reads because modify operations like append could be\n  // already pending.\n  // TODO(vlad): Optimize to return co.Size() if no modify operations are present\n  // TODO(vlad): Omit decoding string to just query it's length\n  if (const auto& co = it_res.value()->second; co.IsExternal()) {\n    auto cb = [](string_view s) { return s.size(); };\n\n    TieredStorage::TResult<size_t> fut = ReadTiered<size_t>(\n        op_args.db_cntx.db_index, key, co, std::move(cb), op_args.shard->tiered_storage());\n    return {std::move(fut)};\n  } else {\n    return {co.Size()};\n  }\n}\n\nOpResult<TResultOrT<size_t>> OpSetRange(const OpArgs& op_args, string_view key, size_t start,\n                                        string_view range) {\n  VLOG(2) << \"SetRange(\" << key << \", \" << start << \", \" << range << \")\";\n  auto& db_slice = op_args.GetDbSlice();\n\n  if (range.empty()) {\n    return OpStrLen(op_args, key);\n  }\n\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, OBJ_STRING);\n  RETURN_ON_BAD_STATUS(op_res);\n  auto& res = *op_res;\n\n  if (res.it->second.IsExternal()) {\n    return {ModifyTiered<size_t>(\n        op_args.db_cntx.db_index, key, res.it->second,\n        [start = start, range = string(range)](std::string* s) {\n          return SetRangeInternal(s, start, range);\n        },\n        op_args.shard->tiered_storage())};\n  } else {\n    string value;\n\n    if (!res.is_new)\n      value = res.it->second.ToString();\n\n    size_t len = SetRangeInternal(&value, start, range);\n    res.it->second.SetString(value);\n    return {len};\n  }\n}\n\nOpResult<StringResult> OpGetRange(const OpArgs& op_args, string_view key, int32_t start,\n                                  int32_t end) {\n  auto read_cb = [start, end](std::string_view slice) mutable -> string {\n    int32_t strlen = slice.size();\n    if (strlen == 0)\n      return \"\";\n\n    if (start < 0) {\n      if (end < start) {\n        return \"\";\n      }\n      start = strlen + start;\n      start = max(start, 0);\n    }\n\n    if (end < 0) {\n      end = strlen + end;\n      end = max(end, 0);\n    } else {\n      end = min(end, strlen - 1);\n    }\n\n    if (start > end) {\n      return \"\";\n    }\n\n    return string{slice.substr(start, end - start + 1)};\n  };\n\n  auto& db_slice = op_args.GetDbSlice();\n  auto it_res = db_slice.FindReadOnly(op_args.db_cntx, key, OBJ_STRING);\n  if (it_res == OpStatus::KEY_NOTFOUND) {\n    return StringResult(string{});\n  }\n  RETURN_ON_BAD_STATUS(it_res);\n\n  const PrimeValue& co = it_res.value()->second;\n  if (co.IsExternal()) {\n    fb2::Future<io::Result<string>> fut = ReadTiered<string>(\n        op_args.db_cntx.db_index, key, co,\n        [read_cb](std::string_view sv) mutable { return read_cb(sv); },\n        op_args.shard->tiered_storage());\n    return {std::move(fut)};\n  }\n\n  string tmp;\n  string_view slice = co.GetSlice(&tmp);\n  return {read_cb(slice)};\n};\n\n// TODO: Don't copy whole value just to append\nsize_t ExtendExisting(const DbSlice::Iterator& it, string_view key, string_view val, bool prepend) {\n  string tmp;\n  string_view slice = it->second.GetSlice(&tmp);\n\n  string new_val = prepend ? absl::StrCat(val, slice) : absl::StrCat(slice, val);\n  it->second.SetString(new_val);\n  return new_val.size();\n}\n\nOpResult<bool> ExtendOrSkip(const OpArgs& op_args, string_view key, string_view val, bool prepend) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto it_res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_STRING);\n  if (!it_res) {\n    return false;\n  }\n\n  return ExtendExisting(it_res->it, key, val, prepend);\n}\n\nOpResult<double> OpIncrFloat(const OpArgs& op_args, string_view key, double val) {\n  auto& db_slice = op_args.GetDbSlice();\n\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, OBJ_STRING);\n  RETURN_ON_BAD_STATUS(op_res);\n  auto& add_res = *op_res;\n\n  char buf[128];\n\n  if (add_res.is_new) {\n    char* str = RedisReplyBuilder::FormatDouble(val, buf, sizeof(buf));\n    add_res.it->second.SetString(str);\n\n    return val;\n  }\n\n  if (add_res.it->second.Size() == 0)\n    return OpStatus::INVALID_FLOAT;\n\n  string tmp;\n  string_view slice = add_res.it->second.GetSlice(&tmp);\n\n  double base = 0;\n  if (!ParseDouble(slice, &base)) {\n    return OpStatus::INVALID_FLOAT;\n  }\n\n  base += val;\n\n  if (isnan(base) || isinf(base)) {\n    return OpStatus::NAN_OR_INF_DURING_INCR;\n  }\n\n  char* str = RedisReplyBuilder::FormatDouble(base, buf, sizeof(buf));\n\n  add_res.it->second.SetString(str);\n\n  return base;\n}\n\n// if skip_on_missing - returns KEY_NOTFOUND.\nOpResult<int64_t> OpIncrBy(const OpArgs& op_args, string_view key, int64_t incr,\n                           bool skip_on_missing) {\n  auto& db_slice = op_args.GetDbSlice();\n\n  // we avoid using AddOrFind because of skip_on_missing option for memcache.\n  auto res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_STRING);\n\n  if (!res) {\n    if (res.status() == OpStatus::WRONG_TYPE)\n      return res.status();\n\n    if (skip_on_missing)\n      return OpStatus::KEY_NOTFOUND;\n\n    PrimeValue pv;\n    pv.SetInt(incr);\n\n    auto op_result = db_slice.AddNew(op_args.db_cntx, key, std::move(pv), 0);\n    RETURN_ON_BAD_STATUS(op_result);\n\n    return incr;\n  }\n\n  // Type is already checked by FindMutable (OBJ_STRING)\n  auto opt_prev = res->it->second.TryGetInt();\n  if (!opt_prev) {\n    return OpStatus::INVALID_VALUE;\n  }\n\n  long long prev = *opt_prev;\n  if ((incr < 0 && prev < 0 && incr < (LLONG_MIN - prev)) ||\n      (incr > 0 && prev > 0 && incr > (LLONG_MAX - prev))) {\n    return OpStatus::OUT_OF_RANGE;\n  }\n\n  int64_t new_val = prev + incr;\n  DCHECK(!res->it->second.IsExternal());\n  res->it->second.SetInt(new_val);\n\n  return new_val;\n}\n\n// Returns true if keys were set, false otherwise.\nOpStatus OpMSet(const OpArgs& op_args, const ShardArgs& args) {\n  DCHECK(!args.Empty() && args.Size() % 2 == 0);\n\n  SetCmd::SetParams params;\n  SetCmd sg(op_args, false);\n\n  OpStatus result = OpStatus::OK;\n  size_t stored = 0;\n  for (auto it = args.begin(); it != args.end();) {\n    string_view key = *(it++);\n    string_view value = *(it++);\n    if (auto status = sg.Set(params, key, value); status != OpStatus::OK) {\n      result = status;\n      break;\n    }\n\n    stored++;\n  }\n\n  // Above loop could have parial success (e.g. OOM), replicate only what changed\n  if (auto journal = op_args.shard->journal(); journal) {\n    if (stored * 2 == args.Size()) {\n      RecordJournal(op_args, \"MSET\", args, op_args.tx->GetUniqueShardCnt());\n      DCHECK_EQ(result, OpStatus::OK);\n    } else if (stored > 0) {\n      vector<string_view> store_args(args.begin(), args.end());\n      store_args.resize(stored * 2);\n      RecordJournal(op_args, \"MSET\", store_args, op_args.tx->GetUniqueShardCnt());\n    }\n  }\n  return result;\n}\n\nbool IsValueWithinBounds(const int64_t value, const int64_t bound) {\n  if (bound >= 0) {\n    return value >= INT64_MIN + bound;\n  }\n\n  return value <= INT64_MAX + bound;\n}\n\n// emission_interval_ns assumed to be positive // TODO: Change to unsigned??\n// limit is assumed to be positive\nOpResult<array<int64_t, 5>> OpThrottle(const OpArgs& op_args, const string_view key,\n                                       const int64_t limit, const int64_t emission_interval_ns,\n                                       const uint64_t quantity) {\n  constexpr uint64_t kSecondToMilliSecond = 1000;\n  constexpr uint64_t kMilliSecondToNanoSecond = 1000000;\n  auto& db_slice = op_args.GetDbSlice();\n\n  // Total size of the bucket\n  const int64_t delay_variation_tolerance_ns = emission_interval_ns * limit;  // should be positive\n\n  int64_t remaining = 0;\n  int64_t reset_after_ms = -kSecondToMilliSecond;\n  int64_t retry_after_ms = -kSecondToMilliSecond;\n\n  // Cost of this request\n  const int64_t increment_ns = emission_interval_ns * quantity;  // should be nonnegative\n\n  auto res = db_slice.FindMutable(op_args.db_cntx, key, OBJ_STRING);\n  const int64_t now_ns = GetCurrentTimeNs();\n\n  int64_t tat_ns = now_ns;\n  if (res) {\n    // Type is already checked by FindMutable (OBJ_STRING)\n    auto opt_prev = res->it->second.TryGetInt();\n    if (!opt_prev) {\n      return OpStatus::INVALID_VALUE;\n    }\n    tat_ns = *opt_prev;\n  } else if (res.status() == OpStatus::WRONG_TYPE) {\n    return res.status();\n  }\n\n  int64_t new_tat_ns = max(tat_ns, now_ns);\n  if (new_tat_ns > INT64_MAX - increment_ns) {\n    return OpStatus::INVALID_INT;\n  }\n  new_tat_ns += increment_ns;\n\n  if (new_tat_ns < INT64_MIN + delay_variation_tolerance_ns) {\n    return OpStatus::INVALID_INT;\n  }\n\n  // The cutoff point before which a request is rejected (throttled) and at or after which a request\n  // is accepted.\n  const int64_t allow_at_ns = new_tat_ns - delay_variation_tolerance_ns;\n\n  if (!IsValueWithinBounds(now_ns, allow_at_ns)) {\n    return OpStatus::INVALID_INT;\n  }\n\n  const int64_t diff_ns = now_ns - allow_at_ns;\n\n  const bool limited = diff_ns < 0;\n  int64_t ttl_ns;\n  if (limited) {\n    if (increment_ns <= delay_variation_tolerance_ns) {\n      if (diff_ns == INT64_MIN) {\n        return OpStatus::INVALID_INT;\n      }\n      retry_after_ms = (-diff_ns + kMilliSecondToNanoSecond - 1) / kMilliSecondToNanoSecond;\n    }\n\n    if (now_ns >= 0 ? tat_ns < INT64_MIN + now_ns : tat_ns > INT64_MAX + now_ns) {\n      return OpStatus::INVALID_INT;\n    }\n    ttl_ns = tat_ns - now_ns;\n  } else {\n    if (!IsValueWithinBounds(new_tat_ns, now_ns)) {\n      return OpStatus::INVALID_INT;\n    }\n    ttl_ns = new_tat_ns - now_ns;\n  }\n\n  if (ttl_ns < delay_variation_tolerance_ns - INT64_MAX) {\n    return OpStatus::INVALID_INT;\n  }\n  const int64_t next_ns = delay_variation_tolerance_ns - ttl_ns;\n  if (next_ns > -emission_interval_ns) {\n    remaining = next_ns / emission_interval_ns;\n  }\n  reset_after_ms = (ttl_ns + kMilliSecondToNanoSecond - 1) / kMilliSecondToNanoSecond;\n\n  if (!limited) {\n    // Although most computation so far is in nanoseconds, we must store expiry as milliseconds.\n    // While this causes loss of precision, the value stored against the throttle key is still in\n    // the nanosecond units. When the key is loaded, that value will be read and used as tat_ns. The\n    // loss of precision will cause the throttle key to be expired a bit earlier than expected, so\n    // to make up, we round up its expiry by at most 1 millisecond. Extending the key life does not\n    // break behavior because the tat_ns value will be used to check for throttling.\n    const int64_t new_tat_ms =\n        (new_tat_ns + kMilliSecondToNanoSecond - 1) / kMilliSecondToNanoSecond;\n    if (res) {\n      db_slice.AddExpire(op_args.db_cntx.db_index, res->it, new_tat_ms);\n      res->it->second.SetInt(new_tat_ns);\n    } else {\n      PrimeValue pv;\n      pv.SetInt(new_tat_ns);\n\n      auto res = db_slice.AddNew(op_args.db_cntx, key, std::move(pv), new_tat_ms);\n      if (!res) {\n        return res.status();\n      }\n    }\n  }\n\n  return array<int64_t, 5>{limited ? 1 : 0, limit, remaining, retry_after_ms, reset_after_ms};\n}\n\nstruct GetResp {\n  string_view value;\n  uint64_t mc_ver = 0;\n  uint32_t mc_flag = 0;\n  uint32_t ttl_sec = 0;\n};\n\nstruct MGetResponse {\n  explicit MGetResponse(size_t size = 0) : resp_arr(size) {\n  }\n\n  std::unique_ptr<char[]> storage;\n  absl::InlinedVector<std::optional<GetResp>, 2> resp_arr;\n};\n\ntemplate <typename Iter> using SearchKey = std::function<OpResult<Iter>(string_view)>;\n\n// A find operation which can mutate, for commands which can write, eg GAT\nusing SearchMut = SearchKey<DbSlice::Iterator>;\n\n// Const find operation, for read-only commands, eg MGet\nusing SearchConst = SearchKey<DbSlice::ConstIterator>;\n\ntemplate <typename Iter>\nMGetResponse CollectKeys(BlockingCounter wait_bc, AggregateError* err, MemcacheCmdFlags cmd_flags,\n                         const Transaction* t, EngineShard* shard, SearchKey<Iter> find_op) {\n  ShardArgs keys = t->GetShardArgs(shard->shard_id());\n  DCHECK(!keys.Empty());\n\n  if constexpr (std::is_same_v<Iter, DbSlice::Iterator>) {\n    const CommandId* cid = t->GetCId();\n    DCHECK(!cid->IsReadOnly()) << \"mutable iterator used with read-only command \" << cid->name();\n  }\n\n  MGetResponse response(keys.Size());\n  struct Item {\n    Iter it;\n    int source_index = -1;  // in case of duplicate keys, points to the first occurrence.\n  };\n\n  absl::InlinedVector<Item, 32> items(keys.Size());\n\n  // First, fetch all iterators and count total size ahead\n  size_t total_size = 0;\n  unsigned index = 0;\n  static bool mget_dedup_keys = absl::GetFlag(FLAGS_mget_dedup_keys);\n\n  // We can not make it thread-local because we may preempt during the Find loop due to\n  // replication of expiry events.\n  absl::flat_hash_map<string_view, unsigned> key_index;\n  if (mget_dedup_keys) {\n    key_index.reserve(keys.Size());\n  }\n\n  for (string_view key : keys) {\n    if (mget_dedup_keys) {\n      auto [it, inserted] = key_index.try_emplace(key, index);\n      if (!inserted) {  // duplicate -> point to the first occurrence.\n        items[index++].source_index = it->second;\n        continue;\n      }\n    }\n\n    auto it_res = find_op(key);\n    auto& dest = items[index++];\n    if (it_res) {\n      dest.it = *it_res;\n      total_size += (*it_res)->second.Size();\n    }\n  }\n\n  VLOG_IF(1, total_size > 10000000) << \"OpMGet: allocating \" << total_size << \" bytes\";\n\n  // Allocate enough for all values\n  response.storage = make_unique<char[]>(total_size);\n  char* next = response.storage.get();\n  bool fetch_mcflag = cmd_flags.return_flags;\n  bool fetch_cas = cmd_flags.return_cas;\n  const DbSlice& db_slice = t->GetDbSlice(shard->shard_id());\n\n  for (size_t i = 0; i < items.size(); ++i) {\n    auto it = items[i].it;\n    if (it.is_done()) {\n      if (items[i].source_index >= 0) {\n        response.resp_arr[i] = response.resp_arr[items[i].source_index];\n      }\n      continue;\n    }\n    auto& resp = response.resp_arr[i].emplace();\n\n    // Copy to buffer or trigger tiered read that will eventually write to\n    // buffer\n    const PrimeValue& value = it->second;\n    if (value.IsExternal()) {\n      wait_bc->Add(1);\n      auto cb = [next, err, wait_bc](const io::Result<string_view>& v) mutable {\n        if (v.has_value())\n          memcpy(next, v->data(), v->size());\n        else\n          *err = v.error();\n        wait_bc->Dec();\n      };\n      ReadTiered(t->GetDbIndex(), it.key(), value, std::move(cb), shard->tiered_storage());\n    } else {\n      value.GetString(next);\n    }\n\n    size_t size = value.Size();\n    resp.value = string_view(next, size);\n    next += size;\n\n    // Note - correct behavior is to return TTL before it was updated by GAT,\n    // but this is complex to implement so we return the updated TTL.\n    if (it->first.HasExpire() && cmd_flags.return_ttl) {\n      int64_t expire_time_ms = it->first.GetExpireTime();\n      int64_t ttl_ms = expire_time_ms - t->GetDbContext().time_now_ms;\n      resp.ttl_sec = ttl_ms > 0 ? static_cast<uint32_t>((ttl_ms + 999) / 1000) : 0;\n    }\n    if (fetch_mcflag) {\n      if (value.HasFlag()) {\n        resp.mc_flag = db_slice.GetMCFlag(t->GetDbIndex(), it->first);\n      }\n\n      if (fetch_cas) {\n        resp.mc_ver = it.GetVersion();\n      }\n    }\n  }\n  key_index.clear();\n\n  return response;\n}\n\n// Extend key with value, either prepend or append. Return size of stored string\n// after modification\nOpResult<TResultOrT<size_t>> OpExtend(const OpArgs& op_args, std::string_view key,\n                                      std::string_view value, bool prepend) {\n  auto* shard = op_args.shard;\n  auto it_res = op_args.GetDbSlice().AddOrFind(op_args.db_cntx, key, OBJ_STRING);\n  RETURN_ON_BAD_STATUS(it_res);\n\n  if (it_res->is_new) {\n    it_res->it->second.SetString(value);\n    return {it_res->it->second.Size()};\n  }\n\n  if (const PrimeValue& pv = it_res->it->second; pv.IsExternal()) {\n    auto modf = [value = string{value}, prepend](std::string* v) {\n      *v = prepend ? absl::StrCat(value, *v) : absl::StrCat(*v, value);\n      return v->size();\n    };\n    return {ModifyTiered<size_t>(op_args.db_cntx.db_index, key, pv, std::move(modf),\n                                 shard->tiered_storage())};\n  } else {\n    return {ExtendExisting(it_res->it, key, value, prepend)};\n  }\n}\n\n// Helper for building replies for strings\nstruct GetReplies {\n  GetReplies(SinkReplyBuilder* rb) : rb{static_cast<RedisReplyBuilder*>(rb)} {\n    DCHECK(dynamic_cast<RedisReplyBuilder*>(rb));\n  }\n\n  template <typename T> void Send(OpResult<T>&& res) const {\n    switch (res.status()) {\n      case OpStatus::OK:\n        return Send(std::move(res.value()));\n      case OpStatus::WRONG_TYPE:\n        return rb->SendError(kWrongTypeErr);\n      case OpStatus::IO_ERROR:\n        return rb->SendError(kTieredIoError);\n      default:\n        rb->SendNull();\n    }\n  }\n\n  template <typename T> void Send(optional<T>&& res) const {\n    if (res.has_value())\n      return Send(std::move(*res));\n    return rb->SendNull();\n  }\n\n  template <typename T> void Send(TResultOrT<T>&& res) const {\n    if (holds_alternative<T>(res))\n      return Send(get<T>(res));\n\n    io::Result<T> iores = get<1>(std::move(res)).Get();\n    if (iores.has_value())\n      Send(*iores);\n    else\n      Send(iores.error().message());\n  }\n\n  void Send(size_t val) const {\n    rb->SendLong(val);\n  }\n\n  void Send(string_view str) const {\n    rb->SendBulkString(str);\n  }\n\n  RedisReplyBuilder* rb;\n};\n\ncmd::CmdR ExtendGeneric(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view value = ArgS(args, 1);\n  bool prepend = cmd_cntx->cid()->name().starts_with('P');\n\n  VLOG(2) << \"ExtendGeneric(\" << key << \", \" << value << \")\";\n\n  if (cmd_cntx->mc_command() == nullptr) {\n    auto cb = [&](Transaction* t, EngineShard* shard) {\n      return OpExtend(t->GetOpArgs(shard), key, value, prepend);\n    };\n\n    RedisReplyBuilder* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n    GetReplies{rb}.Send(co_await cmd::SingleHopT(cb));\n  } else {\n    // Memcached skips if key is missing\n    auto cb = [&](Transaction* t, EngineShard* shard) {\n      return ExtendOrSkip(t->GetOpArgs(shard), key, value, prepend);\n    };\n\n    OpResult<bool> result = co_await cmd::SingleHopT(cb);\n    MCRender render(cmd_cntx->mc_command()->cmd_flags);\n    if (result) {\n      cmd_cntx->rb()->SendSimpleString(render.RenderStored(result.value()));\n    } else {\n      cmd_cntx->rb()->SendError(result.status());\n    }\n  }\n\n  co_return std::nullopt;\n}\n\n// Wrapper to call SetCmd::Set in ScheduleSingleHop\nOpStatus SetGeneric(const SetCmd::SetParams& sparams, string_view key, string_view value,\n                    const CommandContext& ctx) {\n  bool explicit_journal = ctx.cid()->opt_mask() & CO::NO_AUTOJOURNAL;\n  return ctx.tx()->ScheduleSingleHop([&](Transaction* t, EngineShard* shard) {\n    return SetCmd(t->GetOpArgs(shard), explicit_journal).Set(sparams, key, value);\n  });\n}\n\ncmd::CmdR IncrByGeneric(CommandContext* cmd_cntx, string_view key, int64_t val) {\n  bool skip_on_missing = (cmd_cntx->mc_command() != nullptr);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    OpResult<int64_t> res = OpIncrBy(t->GetOpArgs(shard), key, val, skip_on_missing);\n    return res;\n  };\n  auto result = co_await cmd::SingleHopT(cb);\n\n  auto* rb = cmd_cntx->rb();\n  switch (result.status()) {\n    case OpStatus::OK:\n      rb->SendLong(result.value());\n      break;\n    case OpStatus::INVALID_VALUE:\n      rb->SendError(kInvalidIntErr);\n      break;\n    case OpStatus::OUT_OF_RANGE:\n      rb->SendError(kIncrOverflow);\n      break;\n    case OpStatus::KEY_NOTFOUND:  // Relevant only for MC\n      rb->SendSimpleString(MCRender{cmd_cntx->mc_command()->cmd_flags}.RenderNotFound());\n      break;\n    default:\n      rb->SendError(result.status());\n      break;\n  }\n  co_return std::nullopt;\n}\n\nstruct GetAndTouchParams {\n  const Transaction* t;\n  EngineShard* shard;\n  const DbSlice::ExpireParams& expire_params;\n  const string_view key;\n};\n\nOpResult<DbSlice::Iterator> FindKeyAndSetExpiry(const GetAndTouchParams& params) {\n  const DbContext& ctx = params.t->GetDbContext();\n  DbSlice& db_slice = params.t->GetDbSlice(params.shard->shard_id());\n  auto find_res = db_slice.FindMutable(ctx, params.key, OBJ_STRING);\n  if (!IsValid(find_res->it)) {\n    return OpStatus::KEY_NOTFOUND;\n  }\n\n  find_res->post_updater.Run();\n\n  auto update = db_slice.UpdateExpire(ctx, find_res->it, find_res->exp_it, params.expire_params);\n  if (!update.ok()) {\n    return update.status();\n  }\n\n  const int64_t value = update.value();\n  const bool expired = value == -1;\n  if (params.shard->journal()) {\n    const OpArgs& op_args = params.t->GetOpArgs(params.shard);\n    if (expired) {\n      RecordJournal(op_args, \"DEL\"sv, ArgSlice{(params.key)});\n    } else {\n      RecordJournal(op_args, \"PEXPIREAT\"sv, ArgSlice{(params.key), (absl::StrCat(value))});\n    }\n  }\n\n  if (expired) {\n    return OpStatus::KEY_NOTFOUND;\n  }\n  return find_res->it;\n}\n\nMGetResponse OpMGet(BlockingCounter wait_bc, AggregateError* err, MemcacheCmdFlags cmd_flags,\n                    const Transaction* t, EngineShard* shard,\n                    const DbSlice::ExpireParams* gat_params = nullptr) {\n  if (gat_params) {\n    SearchMut find_op = [&](string_view key) {\n      return FindKeyAndSetExpiry(GetAndTouchParams{\n          .t = t,\n          .shard = shard,\n          .expire_params = *gat_params,\n          .key = key,\n      });\n    };\n    return CollectKeys(std::move(wait_bc), err, cmd_flags, t, shard, std::move(find_op));\n  } else {\n    SearchConst find_op = [&](string_view key) {\n      const DbSlice& db_slice = t->GetDbSlice(shard->shard_id());\n      return db_slice.FindReadOnly(t->GetDbContext(), key, OBJ_STRING);\n    };\n    return CollectKeys(std::move(wait_bc), err, cmd_flags, t, shard, std::move(find_op));\n  }\n}\n\nOpStatus SetCmd::Set(const SetParams& params, string_view key, string_view value) {\n  auto& db_slice = op_args_.GetDbSlice();\n\n  DCHECK(db_slice.IsDbValid(op_args_.db_cntx.db_index));\n  VLOG(2) << \"Set \" << key << \"(\" << db_slice.shard_id() << \") \";\n\n  if (params.IsConditionalSet()) {\n    auto find_res = db_slice.FindMutable(op_args_.db_cntx, key);\n    if (auto status = CachePrevIfNeeded(params, find_res.it); status != OpStatus::OK)\n      return status;\n\n    if (params.flags & SET_IF_EXISTS) {\n      if (IsValid(find_res.it)) {\n        return SetExisting(params, value, &find_res);\n      } else {\n        return OpStatus::SKIPPED;\n      }\n    } else {\n      DCHECK(params.flags & SET_IF_NOTEXIST) << params.flags;\n      if (IsValid(find_res.it)) {\n        return OpStatus::SKIPPED;\n      }  // else AddNew() is called below\n    }\n  }\n\n  // We can use std::nullopt here because SET command can change the key type to string\n  auto op_res = db_slice.AddOrFind(op_args_.db_cntx, key, std::nullopt);\n  RETURN_ON_BAD_STATUS(op_res);\n\n  if (!op_res->is_new) {\n    if (auto status = CachePrevIfNeeded(params, op_res->it); status != OpStatus::OK)\n      return status;\n\n    return SetExisting(params, value, &(*op_res));\n  } else {\n    AddNew(params, op_res->it, key, value);\n    return OpStatus::OK;\n  }\n}\n\nOpStatus SetCmd::SetExisting(const SetParams& params, string_view value,\n                             DbSlice::ItAndUpdater* it_upd) {\n  DCHECK_EQ(params.flags & SET_IF_NOTEXIST, 0);\n\n  PrimeKey& key = it_upd->it->first;\n  PrimeValue& prime_value = it_upd->it->second;\n  EngineShard* shard = op_args_.shard;\n\n  auto& db_slice = op_args_.GetDbSlice();\n  uint64_t at_ms =\n      params.expire_after_ms ? params.expire_after_ms + op_args_.db_cntx.time_now_ms : 0;\n\n  if (!(params.flags & SET_KEEP_EXPIRE)) {\n    if (at_ms) {\n      db_slice.AddExpire(op_args_.db_cntx.db_index, it_upd->it, at_ms);\n    } else {\n      db_slice.RemoveExpire(op_args_.db_cntx.db_index, it_upd->it);\n    }\n  }\n\n  if (params.flags & SET_STICK) {\n    key.SetSticky(true);\n  }\n\n  bool has_expire = key.HasExpire();\n\n  it_upd->post_updater.ReduceHeapUsage();\n\n  // Update flags\n  // TODO: avoid calling SetMCFlag if flags are not changed\n  prime_value.SetFlag(params.memcache_flags != 0);\n  db_slice.SetMCFlag(op_args_.db_cntx.db_index, key, params.memcache_flags);\n\n  // We need to remove the key from search indices, because we are overwriting it to OBJ_STRING\n  RemoveKeyFromIndexesIfNeeded(it_upd->it.key(), op_args_.db_cntx, prime_value, shard);\n\n  // If value is external, mark it as deleted\n  if (prime_value.IsExternal()) {\n    shard->tiered_storage()->Delete(op_args_.db_cntx.db_index, &prime_value);\n  }\n\n  // overwrite existing entry.\n  prime_value.SetString(value);\n\n  DCHECK_EQ(has_expire, key.HasExpire());\n\n  PostEdit(params, it_upd->it.key(), value, &prime_value);\n  return OpStatus::OK;\n}\n\nvoid SetCmd::AddNew(const SetParams& params, const DbSlice::Iterator& it, std::string_view key,\n                    std::string_view value) {\n  auto& db_slice = op_args_.GetDbSlice();\n  it->second = PrimeValue{value};\n\n  if (params.expire_after_ms) {\n    db_slice.AddExpire(op_args_.db_cntx.db_index, it,\n                       params.expire_after_ms + op_args_.db_cntx.time_now_ms);\n  }\n\n  if (params.memcache_flags) {\n    it->second.SetFlag(true);\n    db_slice.SetMCFlag(op_args_.db_cntx.db_index, it->first, params.memcache_flags);\n  }\n\n  if (params.flags & SET_STICK) {\n    it->first.SetSticky(true);\n  }\n\n  PostEdit(params, key, value, &it->second);\n}\n\nvoid SetCmd::PostEdit(const SetParams& params, std::string_view key, std::string_view value,\n                      PrimeValue* pv) {\n  EngineShard* shard = op_args_.shard;\n\n  // Currently we always try to offload, but Stash may ignore it, if disk I/O is overloaded.\n  // If we are beyond the offloading threshold, StashPrimeValue may populate a backpressure future\n  // via the provided out-parameter.\n  if (auto* ts = shard->tiered_storage(); ts) {\n    StashPrimeValue(op_args_.db_cntx.db_index, key, pv, ts, params.backpressure);\n  }\n\n  if (explicit_journal_ && op_args_.shard->journal()) {\n    RecordJournal(params, key, value);\n  }\n}\n\nvoid SetCmd::RecordJournal(const SetParams& params, string_view key, string_view value) {\n  absl::InlinedVector<string_view, 5> cmds({key, value});  // 5 is theoretical maximum;\n\n  std::string exp_str;\n  if (params.flags & SET_EXPIRE_AFTER_MS) {\n    exp_str = absl::StrCat(params.expire_after_ms + op_args_.db_cntx.time_now_ms);\n    cmds.insert(cmds.end(), {\"PXAT\", exp_str});\n  } else if (params.flags & SET_KEEP_EXPIRE) {\n    cmds.push_back(\"KEEPTTL\");\n  }\n\n  if (params.flags & SET_STICK) {\n    cmds.push_back(\"STICK\");\n  }\n  if (params.memcache_flags) {\n    cmds.push_back(\"_MCFLAGS\");\n    cmds.push_back(absl::StrCat(params.memcache_flags));\n  }\n\n  // Skip NX/XX because SET operation was executed.\n  // Skip GET, because its not important on replica.\n\n  dfly::RecordJournal(op_args_, \"SET\", ArgSlice{cmds});\n}\n\nOpStatus SetCmd::CachePrevIfNeeded(const SetCmd::SetParams& params, DbSlice::Iterator it) {\n  if (!params.prev_val || !IsValid(it))\n    return OpStatus::OK;\n  if (it->second.ObjType() != OBJ_STRING)\n    return OpStatus::WRONG_TYPE;\n\n  *params.prev_val =\n      ReadString(op_args_.db_cntx.db_index, it.key(), it->second, EngineShard::tlocal());\n  return OpStatus::OK;\n}\n\nstruct NegativeExpire {};  // Returned if relative expiry was in the past\nstd::variant<SetCmd::SetParams, facade::ErrorReply, NegativeExpire> ParseSetParams(\n    CmdArgParser parser, const CommandContext* cmd_cntx) {\n  SetCmd::SetParams sparams;\n\n  sparams.memcache_flags = cmd_cntx->mc_command() ? cmd_cntx->mc_command()->flags : 0;\n\n  while (parser.HasNext()) {\n    if (auto exp_type = parser.TryMapNext(\"EX\", ExpT::EX, \"PX\", ExpT::PX, \"EXAT\", ExpT::EXAT,\n                                          \"PXAT\", ExpT::PXAT);\n        exp_type) {\n      auto int_arg = parser.Next<int64_t>();\n      if (parser.HasError())\n        break;\n\n      // We can set expiry only once.\n      if (sparams.flags & SetCmd::SET_EXPIRE_AFTER_MS)\n        return facade::ErrorReply{kSyntaxErr};\n\n      sparams.flags |= SetCmd::SET_EXPIRE_AFTER_MS;\n\n      // Since PXAT/EXAT can change this, we need to check this ahead\n      if (int_arg <= 0)\n        return facade::ErrorReply{InvalidExpireTime(\"set\")};\n\n      DbSlice::ExpireParams expiry{\n          .value = int_arg,\n          .unit = *exp_type == ExpT::PX || *exp_type == ExpT::PXAT ? TimeUnit::MSEC : TimeUnit::SEC,\n          .absolute = *exp_type == ExpT::EXAT || *exp_type == ExpT::PXAT,\n      };\n\n      int64_t now_ms = GetCurrentTimeMs();\n      auto [rel_ms, abs_ms] = expiry.Calculate(now_ms, false);\n      if (abs_ms < 0)\n        return facade::ErrorReply{InvalidExpireTime(\"set\")};\n\n      // Remove existed key if the key is expired already\n      if (rel_ms < 0)\n        return NegativeExpire{};\n\n      tie(sparams.expire_after_ms, ignore) = expiry.Calculate(now_ms, true);\n    } else if (parser.Check(\"_MCFLAGS\")) {\n      sparams.memcache_flags = parser.Next<uint32_t>();\n    } else {\n      uint16_t flag = parser.MapNext(  //\n          \"GET\", SetCmd::SET_GET, \"STICK\", SetCmd::SET_STICK, \"KEEPTTL\", SetCmd::SET_KEEP_EXPIRE,\n          \"XX\", SetCmd::SET_IF_EXISTS, \"NX\", SetCmd::SET_IF_NOTEXIST);\n      sparams.flags |= flag;\n    }\n  }\n\n  if (auto err = parser.TakeError(); err)\n    return err.MakeReply();\n\n  auto has_mask = [&](uint16_t m) { return (sparams.flags & m) == m; };\n  if (has_mask(SetCmd::SET_IF_EXISTS | SetCmd::SET_IF_NOTEXIST) ||\n      has_mask(SetCmd::SET_KEEP_EXPIRE | SetCmd::SET_EXPIRE_AFTER_MS)) {\n    return facade::ErrorReply{kSyntaxErr};\n  }\n\n  return sparams;\n}\n\ncmd::CmdR CmdSet(CmdArgList args, CommandContext* cmd_cntx) {\n  facade::CmdArgParser parser{args};\n\n  auto [key, value] = parser.Next<string_view, string_view>();\n  auto params_result = ParseSetParams(parser, cmd_cntx);\n\n  if (holds_alternative<facade::ErrorReply>(params_result))\n    co_return get<facade::ErrorReply>(params_result);\n\n  if (holds_alternative<NegativeExpire>(params_result)) {\n    auto del_cb = [](const Transaction* tx, EngineShard* es) {\n      ShardArgs args = tx->GetShardArgs(es->shard_id());\n      GenericFamily::OpDel(tx->GetOpArgs(es), args, false);\n      return OpStatus::OK;\n    };\n    co_await cmd::SingleHop(del_cb);\n\n    if (cmd_cntx->mc_command() != nullptr) {\n      cmd_cntx->rb()->SendSimpleString(\n          MCRender{cmd_cntx->mc_command()->cmd_flags}.RenderStored(true));\n    } else {\n      cmd_cntx->rb()->SendOk();\n    }\n    co_return std::nullopt;\n  }\n\n  auto& sparams = get<SetCmd::SetParams>(params_result);\n\n  optional<StringResult> prev;\n  if (sparams.flags & SetCmd::SET_GET)\n    sparams.prev_val = &prev;\n\n  optional<util::fb2::Future<bool>> backpressure;\n  sparams.backpressure = &backpressure;\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return SetCmd(t->GetOpArgs(shard), true).Set(sparams, key, value);\n  };\n\n  OpStatus result = co_await cmd::SingleHop(cb);\n  auto* rb = cmd_cntx->rb();\n\n  switch (result) {\n    case OpStatus::WRONG_TYPE:\n      rb->SendError(kWrongTypeErr);  // TODO(vlad): use co_return after await?\n      co_return std::nullopt;\n    case OpStatus::OUT_OF_MEMORY:\n      rb->SendError(kOutOfMemory);\n      co_return std::nullopt;\n    default:\n      break;\n  };\n\n  // If backpressure was provided, wait with reasonable limit (to avoid client deadlocking).\n  if (backpressure) {\n    std::move(backpressure)->GetFor(5ms);\n  }\n\n  if (sparams.flags & SetCmd::SET_GET) {\n    GetReplies{rb}.Send(std::move(prev));\n    co_return std::nullopt;\n  }\n\n  if (cmd_cntx->mc_command() != nullptr) {\n    MCRender render(cmd_cntx->mc_command()->cmd_flags);\n    rb->SendSimpleString(render.RenderStored(result == OpStatus::OK));\n  } else if (result == OpStatus::OK) {\n    rb->SendOk();\n  } else {\n    static_cast<RedisReplyBuilder*>(rb)->SendNull();\n  }\n\n  co_return std::nullopt;\n}\n\n/// (P)SETEX key seconds (milliseconds) value\nvoid CmdSetExGeneric(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view cmd_name = cmd_cntx->cid()->name();\n\n  CmdArgParser parser{args};\n  auto [key, exp_int, value] = parser.Next<string_view, int64_t, string_view>();\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  if (exp_int < 1)\n    return cmd_cntx->SendError(InvalidExpireTime(cmd_name));\n\n  DbSlice::ExpireParams expiry{\n      .value = exp_int,\n      .unit = cmd_name.front() == 'P' ? TimeUnit::MSEC : TimeUnit::SEC,\n      .absolute = false,\n  };\n\n  int64_t now_ms = GetCurrentTimeMs();\n  auto [_, abs_ms] = expiry.Calculate(now_ms, false);\n  if (abs_ms < 0)\n    return cmd_cntx->SendError(InvalidExpireTime(\"set\"));\n\n  SetCmd::SetParams sparams;\n  sparams.flags |= SetCmd::SET_EXPIRE_AFTER_MS;\n  sparams.expire_after_ms = expiry.Calculate(now_ms, true).first;\n  cmd_cntx->SendError(SetGeneric(sparams, key, value, *cmd_cntx));\n}\n\nvoid CmdSetNx(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view value = ArgS(args, 1);\n\n  SetCmd::SetParams sparams;\n  sparams.flags |= SetCmd::SET_IF_NOTEXIST;\n  if (cmd_cntx->mc_command())\n    sparams.memcache_flags = cmd_cntx->mc_command()->flags;\n\n  switch (SetGeneric(sparams, key, value, *cmd_cntx)) {\n    case OpStatus::OK:\n      return cmd_cntx->SendLong(1);  // Successfully set the value\n    case OpStatus::OUT_OF_MEMORY:\n      return cmd_cntx->SendError(kOutOfMemory);\n    case OpStatus::SKIPPED:\n      return cmd_cntx->SendLong(0);  // Existed, zero updates performed\n    default:\n      LOG(FATAL) << \"Invalid result\";\n  }\n}\n\nvoid CmdGet(CmdArgList args, CommandContext* cmd_cntx) {\n  auto cb = [key = ArgS(args, 0)](Transaction* tx, EngineShard* es) -> OpResult<StringResult> {\n    auto it_res = tx->GetDbSlice(es->shard_id()).FindReadOnly(tx->GetDbContext(), key, OBJ_STRING);\n    if (!it_res.ok())\n      return it_res.status();\n\n    return ReadString(tx->GetDbIndex(), key, (*it_res)->second, es);\n  };\n\n  GetReplies{cmd_cntx->rb()}.Send(cmd_cntx->tx()->ScheduleSingleHopT(cb));\n}\n\nvoid CmdGetDel(CmdArgList args, CommandContext* cmd_cntx) {\n  auto cb = [key = ArgS(args, 0)](Transaction* tx, EngineShard* es) -> OpResult<StringResult> {\n    auto& db_slice = tx->GetDbSlice(es->shard_id());\n    auto it_res = db_slice.FindMutable(tx->GetDbContext(), key, OBJ_STRING);\n    if (!it_res.ok())\n      return it_res.status();\n\n    auto value = ReadString(tx->GetDbIndex(), key, it_res->it->second, es);\n    db_slice.DelMutable(tx->GetDbContext(), std::move(*it_res));\n    return value;\n  };\n\n  GetReplies{cmd_cntx->rb()}.Send(cmd_cntx->tx()->ScheduleSingleHopT(cb));\n}\n\nvoid CmdDigest(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  auto cb = [&key](Transaction* tx, EngineShard* es) -> OpResult<string> {\n    auto it_res = tx->GetDbSlice(es->shard_id()).FindReadOnly(tx->GetDbContext(), key, OBJ_STRING);\n    if (!it_res.ok()) {\n      return it_res.status();\n    }\n\n    // Read string value (handles tiered storage if needed)\n    StringResult str_result = ReadString(tx->GetDbIndex(), key, (*it_res)->second, es);\n\n    // Handle both immediate value and tiered storage future\n    string value;\n    if (holds_alternative<string>(str_result)) {\n      value = std::move(get<string>(str_result));\n    } else {\n      auto& future = get<TieredStorage::TResult<string>>(str_result);\n      io::Result<string> io_res = future.Get();\n      if (!io_res) {\n        return OpStatus::IO_ERROR;\n      }\n      value = std::move(*io_res);\n    }\n\n    // Compute XXH3 hash and return as 16-char hex string\n    return XXH3_Digest(value);\n  };\n\n  OpResult<string> result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (result) {\n    rb->SendBulkString(*result);\n  } else if (result.status() == OpStatus::KEY_NOTFOUND) {\n    rb->SendNull();\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid CmdGetSet(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view value = ArgS(args, 1);\n\n  optional<StringResult> prev;\n  SetCmd::SetParams sparams{.prev_val = &prev};\n\n  if (OpStatus status = SetGeneric(sparams, key, value, *cmd_cntx); status != OpStatus::OK)\n    return cmd_cntx->SendError(status);\n\n  GetReplies{cmd_cntx->rb()}.Send(std::move(prev));\n}\n\nvoid CmdGetEx(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n\n  DbSlice::ExpireParams exp_params;\n  bool defined = false;\n  auto* builder = cmd_cntx->rb();\n  while (parser.HasNext()) {\n    if (auto exp_type = parser.TryMapNext(\"EX\", ExpT::EX, \"PX\", ExpT::PX, \"EXAT\", ExpT::EXAT,\n                                          \"PXAT\", ExpT::PXAT);\n        exp_type) {\n      auto int_arg = parser.Next<int64_t>();\n      RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n      if (defined) {\n        return cmd_cntx->SendError(kSyntaxErr, kSyntaxErrType);\n      }\n\n      if (int_arg <= 0) {\n        return cmd_cntx->SendError(InvalidExpireTime(\"getex\"));\n      }\n\n      exp_params.absolute = *exp_type == ExpT::EXAT || *exp_type == ExpT::PXAT;\n      exp_params.value = int_arg;\n      exp_params.unit =\n          *exp_type == ExpT::PX || *exp_type == ExpT::PXAT ? TimeUnit::MSEC : TimeUnit::SEC;\n      defined = true;\n    } else if (parser.Check(\"PERSIST\")) {\n      exp_params.persist = true;\n    } else {\n      return builder->SendError(kSyntaxErr);\n    }\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) -> OpResult<StringResult> {\n    auto op_args = t->GetOpArgs(shard);\n\n    auto it_res = op_args.GetDbSlice().FindMutable(op_args.db_cntx, key, OBJ_STRING);\n    if (!it_res)\n      return it_res.status();\n\n    StringResult value = ReadString(t->GetDbIndex(), key, it_res->it->second, shard);\n\n    if (exp_params.IsDefined()) {\n      it_res->post_updater.Run();  // Run manually before possible delete due to negative expire\n      RETURN_ON_BAD_STATUS(op_args.GetDbSlice().UpdateExpire(op_args.db_cntx, it_res->it,\n                                                             it_res->exp_it, exp_params));\n    }\n\n    // Replicate GETEX as PEXPIREAT or PERSIST\n    if (shard->journal()) {\n      if (exp_params.persist) {\n        RecordJournal(op_args, \"PERSIST\", {key});\n      } else {\n        auto [ignore, abs_time] = exp_params.Calculate(op_args.db_cntx.time_now_ms, false);\n        auto abs_time_str = absl::StrCat(abs_time);\n        RecordJournal(op_args, \"PEXPIREAT\", {key, abs_time_str});\n      }\n    }\n\n    return value;\n  };\n\n  GetReplies{cmd_cntx->rb()}.Send(cmd_cntx->tx()->ScheduleSingleHopT(cb));\n}\n\ncmd::CmdR CmdIncr(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  return IncrByGeneric(cmd_cntx, key, 1);\n}\n\ncmd::CmdR CmdIncrBy(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view sval = ArgS(args, 1);\n  int64_t val;\n\n  if (!absl::SimpleAtoi(sval, &val)) {\n    cmd_cntx->SendError(kInvalidIntErr);\n    return cmd::kAborted;\n  }\n  return IncrByGeneric(cmd_cntx, key, val);\n}\n\ncmd::CmdR CmdIncrByFloat(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view sval = ArgS(args, 1);\n  double val;\n\n  if (!absl::SimpleAtod(sval, &val)) {\n    co_return facade::ErrorReply{kInvalidFloatErr};\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpIncrFloat(t->GetOpArgs(shard), key, val);\n  };\n\n  OpResult<double> result = co_await cmd::SingleHopT(cb);\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (result)\n    rb->SendDouble(result.value());\n  else\n    rb->SendError(result.status());\n  co_return std::nullopt;\n}\n\ncmd::CmdR CmdDecr(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  return IncrByGeneric(cmd_cntx, key, -1);\n}\n\ncmd::CmdR CmdDecrBy(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view sval = ArgS(args, 1);\n  int64_t val;\n\n  if (!absl::SimpleAtoi(sval, &val)) {\n    cmd_cntx->SendError(kInvalidIntErr);\n    return cmd::kAborted;\n  }\n  if (val == INT64_MIN) {\n    cmd_cntx->SendError(kIncrOverflow);\n    return cmd::kAborted;\n  }\n\n  return IncrByGeneric(cmd_cntx, key, -val);\n}\n\n// Reorder per-shard results according to argument order of primary command\nvoid ReorderShardResults(absl::Span<MGetResponse> mget_resp, const Transaction* t,\n                         absl::Span<optional<GetResp>> dest) {\n  for (ShardId sid = 0; sid < mget_resp.size(); ++sid) {\n    if (!t->IsActive(sid))\n      continue;\n\n    auto& src = mget_resp[sid];\n    ShardArgs shard_args = t->GetShardArgs(sid);\n    unsigned src_indx = 0;\n    for (auto it = shard_args.begin(); it != shard_args.end(); ++it, ++src_indx) {\n      if (!src.resp_arr[src_indx])\n        continue;\n\n      DCHECK_LT(it.index(), dest.size());\n      auto& item = dest[it.index()];\n      item = src.resp_arr[src_indx];\n    }\n  }\n}\n\ncmd::CmdR MGetGeneric(CommandContext* cmd_cntx, CmdArgList args,\n                      std::optional<DbSlice::ExpireParams> gat_params) {\n  DCHECK_GE(args.size(), 1U);\n\n  MemcacheCmdFlags cmd_flags;\n\n  if (cmd_cntx->mc_command()) {\n    cmd_flags = cmd_cntx->mc_command()->cmd_flags;\n  }\n\n  fb2::BlockingCounter tiering_bc{0};  // Count of pending tiered reads\n  AggregateError tiering_err;          // First tiering error\n\n  unique_ptr<MGetResponse[]> mget_resp(new MGetResponse[shard_set->size()]);\n\n  auto gat_ptr = gat_params ? &*gat_params : nullptr;\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    mget_resp[shard->shard_id()] = OpMGet(tiering_bc, &tiering_err, cmd_flags, t, shard, gat_ptr);\n    return OpStatus::OK;\n  };\n\n  // Waiter objects needs to be used to keep tx alive in its scope for ReorderShardResults\n  cmd::SingleHopWaiter waiter{cmd_cntx, cb};\n  auto result = co_await waiter;\n  CHECK_EQ(OpStatus::OK, result);\n\n  // wait for all tiered reads to finish and check for errors\n  tiering_bc->Wait();\n  if (auto err = std::move(tiering_err).Destroy(); err) {\n    cmd_cntx->rb()->SendError(err.message());\n    co_return std::nullopt;\n  }\n\n  size_t arg_len = args.size();\n\n  unique_ptr<optional<GetResp>[]> mget_results(new optional<GetResp>[arg_len]);\n  ReorderShardResults(absl::MakeSpan(mget_resp.get(), shard_set->size()), cmd_cntx->tx(),\n                      absl::MakeSpan(mget_results.get(), arg_len));\n\n  SinkReplyBuilder::ReplyScope scope{cmd_cntx->rb()};\n  if (cmd_cntx->mc_command()) {\n    auto* mc_builder = static_cast<MCReplyBuilder*>(cmd_cntx->rb());\n    facade::MCRender mc_render{cmd_cntx->mc_command()->cmd_flags};\n    for (size_t i = 0; i < arg_len; ++i) {\n      const auto& entry = mget_results[i];\n      if (entry) {\n        mc_builder->SendValue(cmd_cntx->mc_command()->cmd_flags, cmd_cntx->at(i), entry->value, 0,\n                              entry->mc_flag, entry->ttl_sec);\n      } else {\n        mc_builder->SendSimpleString(mc_render.RenderMiss());\n      }\n    }\n    mc_builder->SendSimpleString(mc_render.RenderGetEnd());\n  } else {\n    auto* redis_builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n    redis_builder->StartArray(arg_len);\n    for (size_t i = 0; i < arg_len; ++i) {\n      const auto& entry = mget_results[i];\n      if (entry) {\n        redis_builder->SendBulkString(entry->value);\n      } else {\n        redis_builder->SendNull();\n      }\n    }\n  }\n  co_return std::nullopt;\n}\n\ncmd::CmdR CmdMGet(CmdArgList args, CommandContext* cmd_cntx) {\n  return MGetGeneric(cmd_cntx, args, std::nullopt);\n}\n\n// Implements the memcache GAT command. The expected input is\n// GAT key [keys...]\n// The expiry argument is stored in mc_command()->expire_ts\ncmd::CmdR CmdGAT(CmdArgList args, CommandContext* cmd_cntx) {\n  if (!cmd_cntx->mc_command()) {\n    cmd_cntx->SendError(\"GAT is a memcache-only command\");\n    return cmd::kAborted;\n  }\n  int64_t expire_ts = cmd_cntx->mc_command()->expire_ts;\n  DbSlice::ExpireParams expire_params{\n      .value = expire_ts, .absolute = true, .persist = expire_ts == 0};\n  return MGetGeneric(cmd_cntx, args, expire_params);\n}\n\nvoid CmdMSet(CmdArgList args, CommandContext* cmd_cntx) {\n  if (VLOG_IS_ON(2)) {\n    string str;\n    for (size_t i = 1; i < args.size(); ++i) {\n      absl::StrAppend(&str, \" \", ArgS(args, i));\n    }\n    LOG(INFO) << \"MSET/\" << cmd_cntx->tx()->GetUniqueShardCnt() << str;\n  }\n\n  AggregateStatus result;\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    ShardArgs args = t->GetShardArgs(shard->shard_id());\n    if (auto status = OpMSet(t->GetOpArgs(shard), args); status != OpStatus::OK)\n      result = status;\n    return OpStatus::OK;\n  };\n\n  if (auto status = cmd_cntx->tx()->ScheduleSingleHop(std::move(cb)); status != OpStatus::OK)\n    result = status;\n\n  if (*result == OpStatus::OK) {\n    cmd_cntx->SendOk();\n  } else {\n    cmd_cntx->SendError(*result);\n  }\n}\n\nvoid CmdMSetNx(CmdArgList args, CommandContext* cmd_cntx) {\n  atomic_bool exists{false};\n\n  auto cb = [&](Transaction* t, EngineShard* es) {\n    auto sid = es->shard_id();\n    auto args = t->GetShardArgs(sid);\n    auto op_args = t->GetOpArgs(es);\n    for (auto arg_it = args.begin(); arg_it != args.end(); ++arg_it) {\n      auto it = op_args.GetDbSlice().FindReadOnly(t->GetDbContext(), *arg_it).it;\n      ++arg_it;\n      if (IsValid(it)) {\n        exists.store(true, memory_order_relaxed);\n        break;\n      }\n    }\n\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(cb), false);\n  const bool to_skip = exists.load(memory_order_relaxed);\n\n  AggregateStatus result;\n  auto epilog_cb = [&](Transaction* t, EngineShard* shard) {\n    if (to_skip)\n      return OpStatus::OK;\n\n    auto args = t->GetShardArgs(shard->shard_id());\n    if (auto status = OpMSet(t->GetOpArgs(shard), args); status != OpStatus::OK)\n      result = status;\n    return OpStatus::OK;\n  };\n  cmd_cntx->tx()->Execute(std::move(epilog_cb), true);\n\n  cmd_cntx->SendLong(to_skip || (*result != OpStatus::OK) ? 0 : 1);\n}\n\nvoid CmdStrLen(CmdArgList args, CommandContext* cmd_cntx) {\n  auto cb = [key = ArgS(args, 0)](Transaction* t, EngineShard* shard) {\n    return OpStrLen(t->GetOpArgs(shard), key);\n  };\n  GetReplies{cmd_cntx->rb()}.Send(cmd_cntx->tx()->ScheduleSingleHopT(cb));\n}\n\nvoid CmdGetRange(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser(args);\n  auto [key, start, end] = parser.Next<string_view, int32_t, int32_t>();\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  auto cb = [&, &key = key, &start = start, &end = end](Transaction* t, EngineShard* shard) {\n    return OpGetRange(t->GetOpArgs(shard), key, start, end);\n  };\n\n  GetReplies{cmd_cntx->rb()}.Send(cmd_cntx->tx()->ScheduleSingleHopT(cb));\n}\n\nvoid CmdSetRange(CmdArgList args, CommandContext* cmd_cntx) {\n  CmdArgParser parser(args);\n  auto [key, start, value] = parser.Next<string_view, int32_t, string_view>();\n  auto* builder = cmd_cntx->rb();\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  if (start < 0) {\n    return builder->SendError(\"offset is out of range\");\n  }\n\n  if (size_t min_size = start + value.size(); min_size > kMaxStrLen) {\n    return builder->SendError(\"string exceeds maximum allowed size\");\n  }\n\n  auto cb = [&, &key = key, &start = start, &value = value](Transaction* t, EngineShard* shard) {\n    return OpSetRange(t->GetOpArgs(shard), key, start, value);\n  };\n  GetReplies{builder}.Send(cmd_cntx->tx()->ScheduleSingleHopT(cb));\n}\n\n/* CL.THROTTLE <key> <max_burst> <count per period> <period> [<quantity>] */\n/* Response is array of 5 integers. The meaning of each array item is:\n *  1. Whether the action was limited:\n *   - 0 indicates the action is allowed.\n *   - 1 indicates that the action was limited/blocked.\n *  2. The total limit of the key (max_burst + 1). This is equivalent to the\n * common X-RateLimit-Limit HTTP header.\n *  3. The remaining limit of the key. Equivalent to X-RateLimit-Remaining.\n *  4. The number of seconds until the user should retry, and always -1 if the\n * action was allowed. Equivalent to Retry-After.\n *  5. The number of seconds until the limit will reset to its maximum capacity.\n * Equivalent to X-RateLimit-Reset.\n */\nvoid CmdClThrottle(CmdArgList args, CommandContext* cmd_cntx) {\n  constexpr uint64_t kSecondToNanoSecond = 1000000000;\n  const string_view key = ArgS(args, 0);\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  // Allow max burst in number of tokens\n  uint64_t max_burst;\n  const string_view max_burst_str = ArgS(args, 1);\n  if (!absl::SimpleAtoi(max_burst_str, &max_burst)) {\n    return rb->SendError(kInvalidIntErr);\n  }\n\n  // Emit count of tokens per period\n  uint64_t count;\n  const string_view count_str = ArgS(args, 2);\n  if (!absl::SimpleAtoi(count_str, &count)) {\n    return rb->SendError(kInvalidIntErr);\n  }\n\n  // Period of emitting count of tokens\n  uint64_t period;\n  const string_view period_str = ArgS(args, 3);\n  if (!absl::SimpleAtoi(period_str, &period)) {\n    return rb->SendError(kInvalidIntErr);\n  }\n\n  // Apply quantity of tokens now\n  uint64_t quantity = 1;\n  if (args.size() > 4) {\n    const string_view quantity_str = ArgS(args, 4);\n\n    if (!absl::SimpleAtoi(quantity_str, &quantity)) {\n      return rb->SendError(kInvalidIntErr);\n    }\n  }\n\n  if (max_burst > INT64_MAX - 1) {\n    return rb->SendError(kInvalidIntErr);\n  }\n  const int64_t limit = max_burst + 1;\n\n  if (period > UINT64_MAX / kSecondToNanoSecond || count == 0 ||\n      period * kSecondToNanoSecond / count > INT64_MAX) {\n    return rb->SendError(kInvalidIntErr);\n  }\n\n  const int64_t emission_interval_ns = period * kSecondToNanoSecond / count;\n\n  if (emission_interval_ns == 0) {\n    return rb->SendError(\"zero rates are not supported\");\n  }\n\n  if (emission_interval_ns > INT64_MAX / limit) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n\n  if (quantity != 0 && static_cast<uint64_t>(emission_interval_ns) > INT64_MAX / quantity) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) -> OpResult<array<int64_t, 5>> {\n    return OpThrottle(t->GetOpArgs(shard), key, limit, emission_interval_ns, quantity);\n  };\n\n  Transaction* trans = cmd_cntx->tx();\n  OpResult<array<int64_t, 5>> result = trans->ScheduleSingleHopT(std::move(cb));\n\n  if (result) {\n    RedisReplyBuilder* redis_builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n    redis_builder->StartArray(result->size());\n    auto& array = result.value();\n\n    int64_t retry_after_s = array[3] / 1000;\n    if (array[3] > 0) {\n      retry_after_s += 1;\n    }\n    array[3] = retry_after_s;\n\n    int64_t reset_after_s = array[4] / 1000;\n    if (array[4] > 0) {\n      reset_after_s += 1;\n    }\n    array[4] = reset_after_s;\n\n    for (const auto& v : array) {\n      redis_builder->SendLong(v);\n    }\n  } else {\n    switch (result.status()) {\n      case OpStatus::WRONG_TYPE:\n        cmd_cntx->SendError(kWrongTypeErr);\n        break;\n      case OpStatus::INVALID_INT:\n      case OpStatus::INVALID_VALUE:\n        cmd_cntx->SendError(kInvalidIntErr);\n        break;\n      case OpStatus::OUT_OF_MEMORY:\n        cmd_cntx->SendError(kOutOfMemory);\n        break;\n      default:\n        cmd_cntx->SendError(result.status());\n        break;\n    }\n  }\n}\n\n}  // namespace\n\n#define HFUNC(x) SetHandler(&Cmd##x)\n\nvoid RegisterStringFamily(CommandRegistry* registry) {\n  constexpr uint32_t kMSetMask = CO::JOURNALED | CO::DENYOOM | CO::NO_AUTOJOURNAL;\n\n  registry->StartFamily(acl::STRING);\n  *registry\n      << CI{\"SET\", CO::JOURNALED | CO::DENYOOM | CO::NO_AUTOJOURNAL, -3, 1, 1}.SetAsyncHandler(\n             CmdSet)\n      << CI{\"SETEX\", CO::JOURNALED | CO::DENYOOM | CO::NO_AUTOJOURNAL, 4, 1, 1}.HFUNC(SetExGeneric)\n      << CI{\"PSETEX\", CO::JOURNALED | CO::DENYOOM | CO::NO_AUTOJOURNAL, 4, 1, 1}.HFUNC(SetExGeneric)\n      << CI{\"SETNX\", CO::JOURNALED | CO::DENYOOM | CO::FAST, 3, 1, 1}.HFUNC(SetNx)\n      << CI{\"APPEND\", CO::JOURNALED | CO::DENYOOM | CO::FAST, 3, 1, 1}.SetAsyncHandler(\n             ExtendGeneric)\n      << CI{\"PREPEND\", CO::JOURNALED | CO::DENYOOM | CO::FAST, 3, 1, 1}.SetAsyncHandler(\n             ExtendGeneric)\n      << CI{\"INCR\", CO::JOURNALED | CO::FAST, 2, 1, 1}.SetAsyncHandler(CmdIncr)\n      << CI{\"DECR\", CO::JOURNALED | CO::FAST, 2, 1, 1}.SetAsyncHandler(CmdDecr)\n      << CI{\"INCRBY\", CO::JOURNALED | CO::FAST, 3, 1, 1}.SetAsyncHandler(CmdIncrBy)\n      << CI{\"INCRBYFLOAT\", CO::JOURNALED | CO::FAST, 3, 1, 1}.SetAsyncHandler(CmdIncrByFloat)\n      << CI{\"DECRBY\", CO::JOURNALED | CO::FAST, 3, 1, 1}.SetAsyncHandler(CmdDecrBy)\n      << CI{\"GET\", CO::READONLY | CO::FAST, 2, 1, 1}.HFUNC(Get)\n      << CI{\"GETDEL\", CO::JOURNALED | CO::FAST, 2, 1, 1}.HFUNC(GetDel)\n      << CI{\"DIGEST\", CO::READONLY | CO::FAST, 2, 1, 1}.HFUNC(Digest)\n      << CI{\"GETEX\", CO::JOURNALED | CO::DENYOOM | CO::FAST | CO::NO_AUTOJOURNAL, -2, 1, 1}.HFUNC(\n             GetEx)\n      << CI{\"GETSET\", CO::JOURNALED | CO::DENYOOM | CO::FAST, 3, 1, 1}.HFUNC(GetSet)\n      << CI{\"MGET\", CO::READONLY | CO::FAST | CO::IDEMPOTENT, -2, 1, -1}.SetAsyncHandler(CmdMGet)\n      << CI{\"MSET\", kMSetMask, -3, 1, -1}.HFUNC(MSet)\n      << CI{\"MSETNX\", kMSetMask, -3, 1, -1}.HFUNC(MSetNx)\n      << CI{\"STRLEN\", CO::READONLY | CO::FAST, 2, 1, 1}.HFUNC(StrLen)\n      << CI{\"GETRANGE\", CO::READONLY, 4, 1, 1}.HFUNC(GetRange)\n      << CI{\"SUBSTR\", CO::READONLY, 4, 1, 1}.HFUNC(GetRange)  // Alias for GetRange\n      << CI{\"SETRANGE\", CO::JOURNALED | CO::DENYOOM, 4, 1, 1}.HFUNC(SetRange)\n      << CI{\"CL.THROTTLE\", CO::JOURNALED | CO::DENYOOM | CO::FAST, -5, 1, 1, acl::THROTTLE}.HFUNC(\n             ClThrottle)\n      << CI{\"GAT\", CO::JOURNALED | CO::DENYOOM | CO::NO_AUTOJOURNAL | CO::HIDDEN, -2, 1, -1}\n             .SetAsyncHandler(CmdGAT);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/string_family_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/conn_context.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/test_utils.h\"\n#include \"server/transaction.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\nusing absl::StrCat;\n\nnamespace dfly {\n\nclass StringFamilyTest : public BaseFamilyTest {\n protected:\n};\n\nvector<int64_t> ToIntArr(const RespExpr& e) {\n  vector<int64_t> res;\n  CHECK_EQ(e.type, RespExpr::ARRAY);\n  const RespVec* vec = get<RespVec*>(e.u);\n  for (auto a : *vec) {\n    int64_t val;\n    std::string_view s = ToSV(a.GetBuf());\n    CHECK(absl::SimpleAtoi(s, &val)) << s;\n    res.push_back(val);\n  }\n\n  return res;\n}\n\nTEST_F(StringFamilyTest, SetGet) {\n  EXPECT_EQ(Run({\"set\", \"key\", \"val\"}), \"OK\");\n  EXPECT_EQ(Run({\"get\", \"key\"}), \"val\");\n  EXPECT_EQ(Run({\"set\", \"key1\", \"1\"}), \"OK\");\n  EXPECT_EQ(Run({\"get\", \"key1\"}), \"1\");\n  EXPECT_EQ(Run({\"set\", \"key\", \"2\"}), \"OK\");\n  EXPECT_EQ(Run({\"get\", \"key\"}), \"2\");\n  EXPECT_THAT(Run({\"get\", \"key3\"}), ArgType(RespExpr::NIL));\n\n  auto metrics = GetMetrics();\n  EXPECT_EQ(7, metrics.coordinator_stats.tx_normal_cnt);\n  EXPECT_EQ(3, metrics.events.hits);\n  EXPECT_EQ(1, metrics.events.misses);\n  EXPECT_EQ(3, metrics.events.mutations);\n}\n\nTEST_F(StringFamilyTest, Incr) {\n  ASSERT_EQ(Run({\"set\", \"key\", \"0\"}), \"OK\");\n  ASSERT_THAT(Run({\"incr\", \"key\"}), IntArg(1));\n\n  ASSERT_EQ(Run({\"set\", \"key1\", \"123456789\"}), \"OK\");\n  ASSERT_THAT(Run({\"incrby\", \"key1\", \"0\"}), IntArg(123456789));\n\n  ASSERT_EQ(Run({\"set\", \"key1\", \"-123456789\"}), \"OK\");\n  ASSERT_THAT(Run({\"incrby\", \"key1\", \"0\"}), IntArg(-123456789));\n\n  ASSERT_EQ(Run({\"set\", \"key1\", \"   -123  \"}), \"OK\");\n  ASSERT_THAT(Run({\"incrby\", \"key1\", \"1\"}), ErrArg(\"ERR value is not an integer\"));\n\n  ASSERT_THAT(Run({\"incrby\", \"ne\", \"0\"}), IntArg(0));\n  ASSERT_THAT(Run({\"decrby\", \"a\", \"-9223372036854775808\"}), ErrArg(\"overflow\"));\n  auto metrics = GetMetrics();\n  EXPECT_EQ(9, metrics.events.mutations);\n  EXPECT_EQ(0, metrics.events.misses);\n  EXPECT_EQ(0, metrics.events.hits);\n}\n\nTEST_F(StringFamilyTest, Append) {\n  Run({\"setex\", \"key\", \"100\", \"val\"});\n  EXPECT_THAT(Run({\"ttl\", \"key\"}), IntArg(100));\n\n  EXPECT_THAT(Run({\"append\", \"key\", \"bar\"}), IntArg(6));\n  EXPECT_THAT(Run({\"ttl\", \"key\"}), IntArg(100));\n}\n\nTEST_F(StringFamilyTest, Expire) {\n  ASSERT_EQ(Run({\"set\", \"key\", \"val\", \"PX\", \"20\"}), \"OK\");\n\n  AdvanceTime(10);\n  EXPECT_EQ(Run({\"get\", \"key\"}), \"val\");\n\n  AdvanceTime(10);\n\n  EXPECT_THAT(Run({\"get\", \"key\"}), ArgType(RespExpr::NIL));\n\n  ASSERT_THAT(Run({\"set\", \"i\", \"1\", \"PX\", \"10\"}), \"OK\");\n  ASSERT_THAT(Run({\"incr\", \"i\"}), IntArg(2));\n\n  AdvanceTime(10);\n  ASSERT_THAT(Run({\"incr\", \"i\"}), IntArg(1));\n}\n\nTEST_F(StringFamilyTest, Keepttl) {\n  ASSERT_EQ(Run({\"set\", \"key\", \"val\", \"EX\", \"100\"}), \"OK\");\n  ASSERT_EQ(Run({\"set\", \"key\", \"val\"}), \"OK\");\n  auto resp = Run({\"ttl\", \"key\"});\n  auto actual = get<int64_t>(resp.u);\n  ASSERT_EQ(actual, -1);\n\n  resp = Run({\"set\", \"key\", \"val\", \"EX\", \"200\"});\n  ASSERT_EQ(Run({\"set\", \"key\", \"val\", \"KEEPTTL\"}), \"OK\");\n\n  resp = Run({\"ttl\", \"key\"});\n  actual = get<int64_t>(resp.u);\n\n  EXPECT_TRUE(actual > 0 && actual <= 200);\n}\n\nTEST_F(StringFamilyTest, SetOptionsSyntaxError) {\n  auto TEST_current_time_s = TEST_current_time_ms / 1000;\n\n  EXPECT_THAT(Run({\"set\", \"key\", \"val\", \"EX\", \"1030\", \"PX\", \"1030\"}), ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"EX\", \"1030\", \"EXAT\", absl::StrCat(TEST_current_time_s + 1030)}),\n      ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"EX\", \"1030\", \"PXAT\", absl::StrCat(TEST_current_time_ms + 1030)}),\n      ErrArg(\"ERR syntax error\"));\n\n  EXPECT_THAT(Run({\"set\", \"key\", \"val\", \"PX\", \"1030\", \"EX\", \"1030\"}), ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"PX\", \"1030\", \"EXAT\", absl::StrCat(TEST_current_time_s + 1030)}),\n      ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"PX\", \"1030\", \"PXAT\", absl::StrCat(TEST_current_time_ms + 1030)}),\n      ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"EXAT\", absl::StrCat(TEST_current_time_s + 1030), \"EX\", \"1030\"}),\n      ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"EXAT\", absl::StrCat(TEST_current_time_s + 1030), \"PX\", \"1030\"}),\n      ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(Run({\"set\", \"key\", \"val\", \"EXAT\", absl::StrCat(TEST_current_time_s + 1030), \"PXAT\",\n                   absl::StrCat(TEST_current_time_ms + 1030)}),\n              ErrArg(\"ERR syntax error\"));\n\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"PXAT\", absl::StrCat(TEST_current_time_ms + 1030), \"EX\", \"1030\"}),\n      ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"PXAT\", absl::StrCat(TEST_current_time_ms + 1030), \"PX\", \"1030\"}),\n      ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(Run({\"set\", \"key\", \"val\", \"PXAT\", absl::StrCat(TEST_current_time_ms + 1030), \"EXAT\",\n                   absl::StrCat(TEST_current_time_s + 1030)}),\n              ErrArg(\"ERR syntax error\"));\n\n  EXPECT_THAT(Run({\"set\", \"key\", \"val\", \"EX\", \"1030\", \"KEEPTTL\"}), ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(Run({\"set\", \"key\", \"val\", \"PX\", \"1030\", \"KEEPTTL\"}), ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"EXAT\", absl::StrCat(TEST_current_time_s + 1030), \"KEEPTTL\"}),\n      ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"PXAT\", absl::StrCat(TEST_current_time_ms + 1030), \"KEEPTTL\"}),\n      ErrArg(\"ERR syntax error\"));\n\n  EXPECT_THAT(Run({\"set\", \"key\", \"val\", \"KEEPTTL\", \"PX\", \"1030\"}), ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"KEEPTTL\", \"PXAT\", absl::StrCat(TEST_current_time_ms + 1030)}),\n      ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(Run({\"set\", \"key\", \"val\", \"KEEPTTL\", \"EX\", \"1030\"}), ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(\n      Run({\"set\", \"key\", \"val\", \"KEEPTTL\", \"EXAT\", absl::StrCat(TEST_current_time_s + 1030)}),\n      ErrArg(\"ERR syntax error\"));\n\n  EXPECT_THAT(Run({\"set\", \"key\", \"val\", \"NX\", \"XX\"}), ErrArg(\"ERR syntax error\"));\n  EXPECT_THAT(Run({\"set\", \"key\", \"val\", \"XX\", \"NX\"}), ErrArg(\"ERR syntax error\"));\n\n  EXPECT_THAT(Run({\"set\", \"key\", \"val\", \"PX\", \"9223372036854775800\"}),\n              ErrArg(\"invalid expire time\"));\n  EXPECT_THAT(Run({\"SET\", \"foo\", \"bar\", \"EX\", \"18446744073709561\"}), ErrArg(\"invalid expire time\"));\n}\n\nTEST_F(StringFamilyTest, Set) {\n  auto resp = Run({\"set\", \"foo\", \"bar\", \"XX\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"set\", \"foo\", \"bar\", \"NX\"});\n  ASSERT_THAT(resp, \"OK\");\n  resp = Run({\"set\", \"foo\", \"bar\", \"NX\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"set\", \"foo\", \"bar\", \"xx\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  resp = Run({\"set\", \"foo\", \"bar\", \"ex\", \"abc\"});\n  ASSERT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  resp = Run({\"set\", \"foo\", \"bar\", \"ex\", \"-1\"});\n  ASSERT_THAT(resp, ErrArg(\"invalid expire time\"));\n\n  resp = Run({\"set\", \"foo\", \"bar\", \"ex\", \"1\"});\n  ASSERT_THAT(resp, \"OK\");\n\n  ASSERT_THAT(Run({\"sadd\", \"s1\", \"1\"}), IntArg(1));\n  ASSERT_THAT(Run({\"set\", \"s1\", \"2\"}), \"OK\");\n}\n\nTEST_F(StringFamilyTest, SetHugeKey) {\n  const string key(36000000, 'b');\n  auto resp = Run({\"set\", key, \"1\"});\n  ASSERT_THAT(resp, \"OK\");\n  Run({\"del\", key});\n}\n\nTEST_F(StringFamilyTest, MSetLong) {\n  vector<string> command({\"mset\"});\n  for (unsigned i = 0; i < 12000; ++i) {\n    command.push_back(StrCat(\"key\", i));\n    command.push_back(StrCat(\"val\", i));\n  }\n  auto resp = Run(absl::MakeSpan(command));\n  EXPECT_EQ(resp, \"OK\");\n}\n\nTEST_F(StringFamilyTest, MGetSet) {\n  Run({\"mset\", \"z\", \"0\"});         // single key\n  auto resp = Run({\"mget\", \"z\"});  // single key\n  EXPECT_THAT(resp, \"0\");\n\n  Run({\"mset\", \"x\", \"0\", \"b\", \"0\"});\n\n  ASSERT_EQ(2, GetDebugInfo(\"IO0\").shards_count);\n\n  auto mget_fb = pp_->at(0)->LaunchFiber([&] {\n    for (size_t i = 0; i < 1000; ++i) {\n      RespExpr resp = Run({\"mget\", \"b\", \"x\"});\n      ASSERT_THAT(resp, ArrLen(2));\n      auto ivec = ToIntArr(resp);\n\n      ASSERT_GE(ivec[1], ivec[0]);\n    }\n  });\n\n  auto set_fb = pp_->at(1)->LaunchFiber([&] {\n    for (size_t i = 1; i < 2000; ++i) {\n      Run({\"set\", \"x\", StrCat(i)});\n      Run({\"set\", \"b\", StrCat(i)});\n    }\n  });\n\n  mget_fb.Join();\n  set_fb.Join();\n}\n\nTEST_F(StringFamilyTest, MGetCachingModeBug2276) {\n  absl::FlagSaver fs;\n  SetTestFlag(\"cache_mode\", \"true\");\n  ResetService();\n  Run({\"debug\", \"populate\", \"18000\", \"key\", \"32\", \"RAND\"});\n\n  // Scan starts traversing the database, because we populated the database with lots of items we\n  // assume that scan will return items from the same bucket that reside next to each other.\n  auto resp = Run({\"scan\", \"0\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  StringVec vec = StrArray(resp.GetVec()[1]);\n  ASSERT_GE(vec.size(), 10);\n\n  auto get_bump_ups = [](const string& str) -> size_t {\n    const string matcher = \"bump_ups:\";\n    const auto pos = str.find(matcher) + matcher.size();\n    const auto next_new_line =\n        str.find(\"\\r\\n\", pos);  // Find the position of the next \"\\r\\n\" after the initial position\n    const auto sub = str.substr(pos, next_new_line - pos);\n    return atoi(sub.c_str());\n  };\n\n  resp = Run({\"info\", \"stats\"});\n  EXPECT_EQ(get_bump_ups(resp.GetString()), 0);\n\n  auto mget_resp = StrArray(Run(\n      {\"mget\", vec[0], vec[1], vec[2], vec[3], vec[4], vec[5], vec[6], vec[7], vec[8], vec[9]}));\n\n  resp = Run({\"info\", \"stats\"});\n  size_t bumps1 = get_bump_ups(resp.GetString());\n\n  EXPECT_GE(bumps1, 0);\n  EXPECT_LE(bumps1, 10);\n\n  for (int i = 0; i < 10; ++i) {\n    auto get_resp = Run({\"get\", vec[i]});\n    EXPECT_EQ(get_resp, mget_resp[i]);\n  }\n\n  resp = Run({\"info\", \"stats\"});\n  size_t bumps2 = get_bump_ups(resp.GetString());\n  EXPECT_GT(bumps2, bumps1);\n}\n\nTEST_F(StringFamilyTest, MGetCachingModeBug2465) {\n  absl::FlagSaver fs;\n  SetTestFlag(\"cache_mode\", \"true\");\n  ResetService();\n  Run({\"debug\", \"populate\", \"18000\", \"key\", \"32\", \"RAND\"});\n\n  // Scan starts traversing the database, because we populated the database with lots of items we\n  // assume that scan will return items from the same bucket that reside next to each other.\n  auto resp = Run({\"scan\", \"0\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  StringVec vec = StrArray(resp.GetVec()[1]);\n  ASSERT_GE(vec.size(), 10);\n\n  auto get_bump_ups = [](const string& str) -> size_t {\n    const string matcher = \"bump_ups:\";\n    const auto pos = str.find(matcher) + matcher.size();\n    const auto next_new_line =\n        str.find(\"\\r\\n\", pos);  // Find the position of the next \"\\r\\n\" after the initial position\n    const auto sub = str.substr(pos, next_new_line - pos);\n    return atoi(sub.c_str());\n  };\n\n  resp = Run({\"info\", \"stats\"});\n  EXPECT_EQ(get_bump_ups(resp.GetString()), 0);\n\n  Run({\"del\", vec[1]});\n  Run({\"lpush\", vec[1], \"a\"});\n\n  resp = Run({\"get\", vec[2]});\n  string val = resp.GetString();\n  auto mget_resp = StrArray(Run({\"mget\", vec[2], vec[2], vec[2]}));\n  EXPECT_THAT(mget_resp, ElementsAre(val, val, val));\n\n  resp = Run({\"info\", \"stats\"});\n  size_t bumps = get_bump_ups(resp.GetString());\n  EXPECT_EQ(bumps, 2);  // one bump for get and one for mget\n}\n\nTEST_F(StringFamilyTest, MSetGet) {\n  Run({\"mset\", \"x\", \"0\", \"y\", \"0\", \"a\", \"0\", \"b\", \"0\"});\n  ASSERT_EQ(2, GetDebugInfo().shards_count);\n\n  Run({\"mset\", \"x\", \"0\", \"y\", \"0\"});\n  ASSERT_EQ(1, GetDebugInfo().shards_count);\n\n  Run({\"mset\", \"x\", \"1\", \"b\", \"5\", \"x\", \"0\"});\n  ASSERT_EQ(2, GetDebugInfo().shards_count);\n\n  int64_t val = CheckedInt({\"get\", \"x\"});\n  EXPECT_EQ(0, val);\n\n  val = CheckedInt({\"get\", \"b\"});\n  EXPECT_EQ(5, val);\n\n  auto mset_fb = pp_->at(0)->LaunchFiber([&] {\n    for (size_t i = 0; i < 1000; ++i) {\n      RespExpr resp = Run({\"mset\", \"x\", StrCat(i), \"b\", StrCat(i)});\n      ASSERT_EQ(resp, \"OK\") << i;\n    }\n  });\n\n  // A problematic order when mset is not atomic: set x, get x, get b (old), set b\n  auto get_fb = pp_->at(2)->LaunchFiber([&] {\n    for (size_t i = 0; i < 1000; ++i) {\n      int64_t x = CheckedInt({\"get\", \"x\"});\n      int64_t z = CheckedInt({\"get\", \"b\"});\n\n      ASSERT_LE(x, z) << \"Inconsistency at \" << i;\n    }\n  });\n\n  mset_fb.Join();\n  get_fb.Join();\n}\n\nTEST_F(StringFamilyTest, MSetDel) {\n  auto mset_fb = pp_->at(0)->LaunchFiber([&] {\n    for (size_t i = 0; i < 1000; ++i) {\n      Run({\"mset\", \"x\", \"0\", \"z\", \"0\"});\n    }\n  });\n\n  auto del_fb = pp_->at(2)->LaunchFiber([&] {\n    for (size_t i = 0; i < 1000; ++i) {\n      CheckedInt({\"del\", \"x\", \"z\"});\n    }\n  });\n\n  mset_fb.Join();\n  del_fb.Join();\n}\n\nTEST_F(StringFamilyTest, IntKey) {\n  Run({\"mset\", \"1\", \"1\", \"-1000\", \"-1000\"});\n  auto resp = Run({\"get\", \"1\"});\n  ASSERT_THAT(resp, \"1\");\n}\n\nTEST_F(StringFamilyTest, SingleShard) {\n  Run({\"mset\", \"x\", \"1\", \"y\", \"1\"});\n  ASSERT_EQ(1, GetDebugInfo(\"IO0\").shards_count);\n\n  Run({\"mget\", \"x\", \"y\", \"b\"});\n  ASSERT_EQ(2, GetDebugInfo(\"IO0\").shards_count);\n\n  auto resp = Run({\"mget\", \"x\", \"y\"});\n  ASSERT_EQ(1, GetDebugInfo(\"IO0\").shards_count);\n  ASSERT_THAT(ToIntArr(resp), ElementsAre(1, 1));\n\n  auto mset_fb = pp_->at(0)->LaunchFiber([&] {\n    for (size_t i = 0; i < 100; ++i) {\n      Run({\"mset\", \"x\", \"0\", \"y\", \"0\"});\n    }\n  });\n\n  // Specially multiple shards to avoid fast-path.\n  auto mget_fb = pp_->at(1)->LaunchFiber([&] {\n    for (size_t i = 0; i < 100; ++i) {\n      Run({\"mget\", \"x\", \"b\", \"y\"});\n    }\n  });\n  mset_fb.Join();\n  mget_fb.Join();\n}\n\nTEST_F(StringFamilyTest, MSetIncr) {\n  /*  serializable orders\n   init: x=z=0\n\n   mset x=z=1\n   mset, incr x, incr z = 2, 2\n   incr x, mset, incr z = 1, 2\n   incr x, incr z, mset = 1, 1\n*/\n\n  /* unserializable scenario when mset is not atomic with respect to incr x\n      set x, incr x, incr z, set z = 2, 1\n    */\n\n  Run({\"mset\", \"a\", \"0\", \"b\", \"0\", \"c\", \"0\"});\n  ASSERT_EQ(2, GetDebugInfo(\"IO0\").shards_count);\n\n  auto mset_fb = pp_->at(0)->LaunchFiber([&] {\n    for (size_t i = 1; i < 1000; ++i) {\n      string base = StrCat(i * 900);\n      auto resp = Run({\"mset\", \"b\", base, \"a\", base, \"c\", base});\n      ASSERT_EQ(resp, \"OK\");\n    }\n  });\n\n  auto get_fb = pp_->at(1)->LaunchFiber([&] {\n    for (unsigned j = 0; j < 900; ++j) {\n      int64_t a = CheckedInt({\"incr\", \"a\"});\n      int64_t b = CheckedInt({\"incr\", \"b\"});\n      ASSERT_LE(a, b);\n\n      int64_t c = CheckedInt({\"incr\", \"c\"});\n      if (a > c) {\n        LOG(ERROR) << \"Consistency error \";\n      }\n      ASSERT_LE(a, c);\n    }\n  });\n  mset_fb.Join();\n  get_fb.Join();\n}\n\nTEST_F(StringFamilyTest, SetEx) {\n  ASSERT_EQ(Run({\"setex\", \"key\", \"1\", \"val\"}), \"OK\");\n  ASSERT_EQ(Run({\"setex\", \"key\", \"10\", \"val\"}), \"OK\");\n  ASSERT_THAT(Run({\"ttl\", \"key\"}), IntArg(10));\n  ASSERT_THAT(Run({\"setex\", \"key\", \"0\", \"val\"}), ErrArg(\"invalid expire time\"));\n  ASSERT_EQ(Run({\"setex\", \"key\", StrCat(5 * 365 * 24 * 3600), \"val\"}), \"OK\");\n  ASSERT_THAT(Run({\"setex\", \"key\", StrCat(1 << 30), \"val\"}), \"OK\");\n  ASSERT_THAT(Run({\"ttl\", \"key\"}), IntArg(kMaxExpireDeadlineSec));\n  ASSERT_THAT(Run({\"SETEX\", \"foo\", \"18446744073709561\", \"bar\"}), ErrArg(\"invalid expire time\"));\n}\n\nTEST_F(StringFamilyTest, Range) {\n  Run({\"set\", \"key1\", \"Hello World\"});\n  EXPECT_EQ(Run({\"getrange\", \"key1\", \"5\", \"3\"}), \"\");\n\n  Run({\"SETRANGE\", \"key1\", \"6\", \"Earth\"});\n  EXPECT_EQ(Run({\"get\", \"key1\"}), \"Hello Earth\");\n\n  Run({\"SETRANGE\", \"key2\", \"2\", \"Earth\"});\n  EXPECT_EQ(Run({\"get\", \"key2\"}), string_view(\"\\000\\000Earth\", 7));\n\n  Run({\"SETRANGE\", \"key3\", \"0\", \"\"});\n  EXPECT_EQ(0, CheckedInt({\"exists\", \"key3\"}));\n\n  Run({\"SETRANGE\", \"key3\", \"0\", \"abc\"});\n  EXPECT_EQ(1, CheckedInt({\"exists\", \"key3\"}));\n\n  Run({\"SET\", \"key3\", \"123\"});\n  EXPECT_EQ(Run({\"getrange\", \"key3\", \"2\", \"3\"}), \"3\");\n  EXPECT_EQ(Run({\"getrange\", \"key3\", \"3\", \"3\"}), \"\");\n  EXPECT_EQ(Run({\"getrange\", \"key3\", \"4\", \"5\"}), \"\");\n\n  Run({\"SET\", \"num\", \"1234\"});\n  EXPECT_EQ(Run({\"getrange\", \"num\", \"3\", \"5000\"}), \"4\");\n  EXPECT_EQ(Run({\"getrange\", \"num\", \"-5000\", \"10000\"}), \"1234\");\n\n  Run({\"SET\", \"key4\", \"1\"});\n  EXPECT_EQ(Run({\"getrange\", \"key4\", \"-1\", \"-2\"}), \"\");\n  EXPECT_EQ(Run({\"getrange\", \"key4\", \"0\", \"-2\"}), \"1\");\n\n  EXPECT_EQ(CheckedInt({\"SETRANGE\", \"key5\", \"1\", \"\"}), 0);\n  EXPECT_EQ(Run({\"GET\", \"key5\"}).type, facade::RespExpr::NIL);\n\n  EXPECT_EQ(CheckedInt({\"SETRANGE\", \"num\", \"6\", \"\"}), 4);\n  EXPECT_EQ(Run({\"GET\", \"num\"}), \"1234\");\n\n  // we support only 256MB string so this test is failed now\n  // EXPECT_THAT(CheckedInt({\"SETRANGE\", \"\", \"268435456\", \"0\"}), 268435457);\n}\n\nTEST_F(StringFamilyTest, IncrByFloat) {\n  Run({\"SET\", \"nonum\", \"  11\"});\n  auto resp = Run({\"INCRBYFLOAT\", \"nonum\", \"1.0\"});\n  EXPECT_THAT(resp, ErrArg(\"not a valid float\"));\n\n  Run({\"SET\", \"inf\", \"+inf\"});\n  resp = Run({\"INCRBYFLOAT\", \"inf\", \"1.0\"});\n  EXPECT_THAT(resp, ErrArg(\"increment would produce NaN or Infinity\"));\n\n  Run({\"SET\", \"nonum\", \"11 \"});\n  resp = Run({\"INCRBYFLOAT\", \"nonum\", \"1.0\"});\n  EXPECT_THAT(resp, ErrArg(\"not a valid float\"));\n\n  Run({\"SET\", \"num\", \"2.566\"});\n  resp = Run({\"INCRBYFLOAT\", \"num\", \"1.0\"});\n  EXPECT_EQ(resp, \"3.566\");\n}\n\nTEST_F(StringFamilyTest, RestoreHighTTL) {\n  Run({\"SET\", \"X\", \"1\"});\n  auto buffer = Run({\"DUMP\", \"X\"}).GetBuf();\n  Run({\"DEL\", \"X\"});\n  EXPECT_EQ(Run({\"RESTORE\", \"X\", \"5430186761345\", ToSV(buffer)}), \"OK\");\n}\n\nTEST_F(StringFamilyTest, SetNx) {\n  // Make sure that we \"screen out\" invalid parameters for this command\n  // this is important as it uses similar path as the \"normal\" set\n  auto resp = Run({\"setnx\", \"foo\", \"bar\", \"XX\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"setnx\", \"foo\", \"bar\", \"NX\"});\n  ASSERT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"setnx\", \"foo\", \"bar\", \"xx\"});\n  ASSERT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"setnx\", \"foo\", \"bar\", \"ex\", \"abc\"});\n  ASSERT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"setnx\", \"foo\", \"bar\", \"ex\", \"-1\"});\n  ASSERT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"setnx\", \"foo\", \"bar\", \"ex\", \"1\"});\n  ASSERT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  // now let see how it goes for the valid parameters\n  EXPECT_EQ(1, CheckedInt({\"setnx\", \"foo\", \"bar\"}));\n  EXPECT_EQ(Run({\"get\", \"foo\"}), \"bar\");\n  // second call to the same key should return 0 as we have it\n  EXPECT_EQ(0, CheckedInt({\"setnx\", \"foo\", \"hello\"}));\n  EXPECT_EQ(Run({\"get\", \"foo\"}), \"bar\");  // the value was not changed\n}\n\nTEST_F(StringFamilyTest, SetPxAtExAt) {\n  // Expiration time as set at unix time\n  auto TEST_current_time_s = TEST_current_time_ms / 1000;\n\n  auto resp = Run({\"set\", \"foo\", \"bar\", \"EXAT\", \"-1\"});\n  ASSERT_THAT(resp, ErrArg(\"invalid expire time\"));\n  resp = Run({\"set\", \"foo\", \"bar\", \"EXAT\", absl::StrCat(TEST_current_time_s - 1)});\n  ASSERT_THAT(resp, \"OK\");  // it would return OK but will not set the value - expiration time is 0\n                            // (checked with Redis)\n  EXPECT_EQ(Run({\"get\", \"foo\"}).type, facade::RespExpr::NIL);\n\n  resp = Run({\"set\", \"foo\", \"bar\", \"PXAT\", \"-1\"});\n  ASSERT_THAT(resp, ErrArg(\"invalid expire time\"));\n\n  resp = Run({\"set\", \"foo\", \"bar\", \"PXAT\", absl::StrCat(TEST_current_time_ms - 23)});\n  ASSERT_THAT(resp, \"OK\");  // it would return OK but will not set the value (checked with Redis)\n  EXPECT_EQ(Run({\"get\", \"foo\"}).type, facade::RespExpr::NIL);\n\n  resp = Run({\"set\", \"foo\", \"bar\", \"EXAT\", absl::StrCat(TEST_current_time_s + 1)});\n  ASSERT_THAT(resp, \"OK\");  // valid expiration time\n  EXPECT_EQ(Run({\"get\", \"foo\"}), \"bar\");\n\n  resp = Run({\"set\", \"foo2\", \"abc\", \"PXAT\", absl::StrCat(TEST_current_time_ms + 300)});\n  ASSERT_THAT(resp, \"OK\");\n  EXPECT_EQ(Run({\"get\", \"foo2\"}), \"abc\");\n}\n\nTEST_F(StringFamilyTest, SetStick) {\n  Run({\"set\", \"foo\", \"bar\", \"STICK\"});\n  EXPECT_THAT(Run({\"STICK\", \"foo\"}), IntArg(0));\n}\n\nTEST_F(StringFamilyTest, GetDel) {\n  auto resp = Run({\"set\", \"foo\", \"bar\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"getdel\", \"foo\"});\n  // foo's value\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n\n  resp = Run({\"get\", \"foo\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(StringFamilyTest, GetEx) {\n  auto resp = Run({\"set\", \"foo\", \"bar\"});\n  EXPECT_THAT(resp, \"OK\");\n\n  resp = Run({\"getex\", \"foo\", \"EX\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"getex\", \"foo\", \"EX\", \"1\", \"px\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"getex\", \"foo\", \"bar\", \"EX\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"getex\", \"foo\", \"PERSIST\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"getex\", \"foo\", \"PXAT\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"getex\", \"foo\", \"EX\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"invalid expire time\"));\n\n  resp = Run({\"getex\", \"foo\", \"PXAT\", \"-1\"});\n  EXPECT_THAT(resp, ErrArg(\"invalid expire time\"));\n\n  EXPECT_EQ(Run({\"getex\", \"foo\"}), \"bar\");\n\n  resp = Run({\"getex\", \"foo\", \"PERSIST\"});\n  EXPECT_EQ(resp, \"bar\");\n  EXPECT_THAT(Run({\"TTL\", \"foo\"}), IntArg(-1));\n\n  resp = Run({\"getex\", \"foo\", \"pxat\", absl::StrCat(TEST_current_time_ms - 1)});\n  EXPECT_EQ(resp, \"bar\");\n\n  EXPECT_THAT(Run({\"getex\", \"foo\"}), ArgType(RespExpr::NIL));\n\n  Run({\"set\", \"foo\", \"bar\"});\n\n  resp = Run({\"getex\", \"foo\", \"PXAT\", absl::StrCat(TEST_current_time_ms + 10)});\n  EXPECT_EQ(resp, \"bar\");\n\n  AdvanceTime(9);\n  EXPECT_EQ(Run({\"getex\", \"foo\"}), \"bar\");\n\n  AdvanceTime(1);\n  EXPECT_THAT(Run({\"getex\", \"foo\"}), ArgType(RespExpr::NIL));\n\n  Run({\"set\", \"foo\", \"bar\"});\n\n  resp = Run({\"getex\", \"foo\", \"exat\", absl::StrCat(TEST_current_time_ms / 1000 - 1)});\n  EXPECT_EQ(resp, \"bar\");\n  EXPECT_THAT(Run({\"getex\", \"foo\"}), ArgType(RespExpr::NIL));\n\n  Run({\"set\", \"foo\", \"bar\"});\n\n  uint64_t next_two_seconds = TEST_current_time_ms + 2000;\n  uint64_t next_two_seconds_round_down = static_cast<uint64_t>(next_two_seconds / 1000);\n  uint64_t diff = next_two_seconds_round_down * 1000 - TEST_current_time_ms;\n\n  resp = Run({\"getex\", \"foo\", \"EXAT\", absl::StrCat(next_two_seconds_round_down)});\n  EXPECT_EQ(resp, \"bar\");\n\n  AdvanceTime(diff - 1);\n  EXPECT_EQ(Run({\"getex\", \"foo\"}), \"bar\");\n\n  AdvanceTime(1);\n  EXPECT_THAT(Run({\"getex\", \"foo\"}), ArgType(RespExpr::NIL));\n\n  Run({\"set\", \"foo\", \"bar\"});\n\n  resp = Run({\"getex\", \"foo\", \"PX\", \"10\"});\n\n  AdvanceTime(9);\n  EXPECT_EQ(Run({\"getex\", \"foo\"}), \"bar\");\n\n  AdvanceTime(1);\n  EXPECT_THAT(Run({\"getex\", \"foo\"}), ArgType(RespExpr::NIL));\n\n  Run({\"set\", \"foo\", \"bar\"});\n\n  resp = Run({\"getex\", \"foo\", \"ex\", \"1\"});\n\n  AdvanceTime(999);\n  EXPECT_EQ(Run({\"getex\", \"foo\"}), \"bar\");\n\n  AdvanceTime(1);\n  EXPECT_THAT(Run({\"getex\", \"foo\"}), ArgType(RespExpr::NIL));\n}\n\nTEST_F(StringFamilyTest, ClThrottle) {\n  const int64_t limit = 5;\n  const char* const key = \"foo\";\n  const char* const max_burst = \"4\";  // limit - 1\n  const char* const count = \"1\";\n  const char* const period = \"10\";\n\n  // You can never make a request larger than the maximum.\n  auto resp = Run({\"cl.throttle\", key, max_burst, count, period, \"6\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(1), IntArg(limit), IntArg(5), IntArg(-1), IntArg(0)));\n\n  // Rate limit normal requests appropriately.\n  resp = Run({\"cl.throttle\", key, max_burst, count, period});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(4), IntArg(-1), IntArg(11)));\n\n  resp = Run({\"cl.throttle\", key, max_burst, count, period});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(3), IntArg(-1), IntArg(21)));\n\n  resp = Run({\"cl.throttle\", key, max_burst, count, period});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(2), IntArg(-1), IntArg(31)));\n\n  resp = Run({\"cl.throttle\", key, max_burst, count, period});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(1), IntArg(-1), IntArg(41)));\n\n  resp = Run({\"cl.throttle\", key, max_burst, count, period});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(0), IntArg(-1), IntArg(51)));\n\n  resp = Run({\"cl.throttle\", key, max_burst, count, period});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(1), IntArg(limit), IntArg(0), IntArg(11), IntArg(51)));\n\n  AdvanceTime(30000);\n  resp = Run({\"cl.throttle\", key, max_burst, count, period, \"1\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(2), IntArg(-1), IntArg(31)));\n\n  AdvanceTime(1000);\n  resp = Run({\"cl.throttle\", key, max_burst, count, period, \"1\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(1), IntArg(-1), IntArg(40)));\n\n  AdvanceTime(9000);\n  resp = Run({\"cl.throttle\", key, max_burst, count, period, \"1\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(1), IntArg(-1), IntArg(41)));\n\n  AdvanceTime(40000);\n  resp = Run({\"cl.throttle\", key, max_burst, count, period, \"1\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(4), IntArg(-1), IntArg(11)));\n\n  AdvanceTime(15000);\n  resp = Run({\"cl.throttle\", key, max_burst, count, period, \"1\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(4), IntArg(-1), IntArg(11)));\n\n  // Zero-volume request just peeks at the state.\n  resp = Run({\"cl.throttle\", key, max_burst, count, period, \"0\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(4), IntArg(-1), IntArg(11)));\n\n  // High-volume request uses up more of the limit.\n  resp = Run({\"cl.throttle\", key, max_burst, count, period, \"2\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(2), IntArg(-1), IntArg(31)));\n\n  // Large requests cannot exceed limits\n  resp = Run({\"cl.throttle\", key, max_burst, count, period, \"5\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(1), IntArg(limit), IntArg(2), IntArg(31), IntArg(31)));\n\n  // Zero rates aren't supported\n  resp = Run({\"cl.throttle\", \"bar\", \"10\", \"1\", \"0\"});\n  ASSERT_EQ(RespExpr::ERROR, resp.type);\n  EXPECT_THAT(resp, ErrArg(\"zero rates are not supported\"));\n\n  // count == 0\n  resp = Run({\"cl.throttle\", \"bar\", \"10\", \"0\", \"1\"});\n  ASSERT_EQ(RespExpr::ERROR, resp.type);\n  EXPECT_THAT(resp, ErrArg(kInvalidIntErr));\n\n  // emission interval = 2000 nanoseconds, cost = 2 units\n  resp = Run({\"cl.throttle\", \"bar\", max_burst, \"500000\", \"1\", \"2\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  ASSERT_THAT(resp.GetVec(),\n              ElementsAre(IntArg(0), IntArg(limit), IntArg(limit - 2), IntArg(-1), IntArg(1)));\n}\n\nTEST_F(StringFamilyTest, SetMGetWithNilResp3) {\n  Run({\"hello\", \"3\"});\n\n  EXPECT_EQ(Run({\"set\", \"key\", \"val\"}), \"OK\");\n  EXPECT_EQ(Run({\"get\", \"key\"}), \"val\");\n  RespExpr resp = Run({\"mget\", \"key\", \"nonexist\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"val\", ArgType(RespExpr::NIL)));\n}\n\nTEST_F(StringFamilyTest, OverrideOther) {\n  Run({\"lpush\", \"a\", \"fooo\"});\n  Run({\"set\", \"a\", string(100, 'b')});\n  Metrics metrics = GetMetrics();\n\n  size_t list_usage = metrics.db_stats[0].memory_usage_by_type[OBJ_LIST];\n  size_t string_usage = metrics.db_stats[0].memory_usage_by_type[OBJ_STRING];\n  EXPECT_EQ(list_usage, 0);\n  EXPECT_GT(string_usage, 0);\n  EXPECT_LT(string_usage, 100);\n}\n\nTEST_F(StringFamilyTest, SetWithGetParam) {\n  EXPECT_THAT(Run({\"set\", \"key1\", \"val1\", \"get\"}), ArgType(RespExpr::NIL));\n  EXPECT_EQ(Run({\"set\", \"key1\", \"val2\", \"get\"}), \"val1\");\n\n  EXPECT_THAT(Run({\"set\", \"key2\", \"val2\", \"nx\", \"get\"}), ArgType(RespExpr::NIL));\n  EXPECT_THAT(Run({\"set\", \"key2\", \"not used\", \"nx\", \"get\"}), \"val2\");\n  EXPECT_EQ(Run({\"get\", \"key2\"}), \"val2\");\n\n  EXPECT_THAT(Run({\"set\", \"key3\", \"not used\", \"xx\", \"get\"}), ArgType(RespExpr::NIL));\n  EXPECT_THAT(Run({\"set\", \"key2\", \"val3\", \"xx\", \"get\"}), \"val2\");\n  EXPECT_EQ(Run({\"get\", \"key2\"}), \"val3\");\n\n  EXPECT_THAT(Run({\"sadd\", \"key4\", \"1\"}), IntArg(1));\n  EXPECT_THAT(Run({\"set\", \"key4\", \"2\", \"get\"}), ErrArg(\"wrong kind of value\"));\n  EXPECT_THAT(Run({\"set\", \"key4\", \"2\", \"xx\", \"get\"}), ErrArg(\"wrong kind of value\"));\n}\n\nTEST_F(StringFamilyTest, SetWithHashtagsNoCluster) {\n  SetTestFlag(\"cluster_mode\", \"\");\n  SetTestFlag(\"lock_on_hashtags\", \"false\");\n  ResetService();\n\n  auto fb = ExpectUsedKeys({\"{key}1\"});\n  EXPECT_EQ(Run({\"set\", \"{key}1\", \"val1\"}), \"OK\");\n  fb.Join();\n  EXPECT_FALSE(IsLocked(0, \"{key}1\"));\n\n  fb = ExpectUsedKeys({\"{key}2\"});\n  EXPECT_EQ(Run({\"set\", \"{key}2\", \"val2\"}), \"OK\");\n  fb.Join();\n\n  fb = ExpectUsedKeys({\"{key}1\", \"{key}2\"});\n  EXPECT_THAT(Run({\"mget\", \"{key}1\", \"{key}2\"}), RespArray(ElementsAre(\"val1\", \"val2\")));\n  fb.Join();\n  EXPECT_NE(1, GetDebugInfo().shards_count);\n}\n\nTEST_F(StringFamilyTest, SetWithHashtagsWithEmulatedCluster) {\n  SetTestFlag(\"cluster_mode\", \"emulated\");\n  SetTestFlag(\"lock_on_hashtags\", \"false\");\n  ResetService();\n\n  auto fb = ExpectUsedKeys({\"{key}1\"});\n  EXPECT_EQ(Run({\"set\", \"{key}1\", \"val1\"}), \"OK\");\n  fb.Join();\n\n  fb = ExpectUsedKeys({\"{key}2\"});\n  EXPECT_EQ(Run({\"set\", \"{key}2\", \"val2\"}), \"OK\");\n  fb.Join();\n\n  fb = ExpectUsedKeys({\"{key}1\", \"{key}2\"});\n  EXPECT_THAT(Run({\"mget\", \"{key}1\", \"{key}2\"}), RespArray(ElementsAre(\"val1\", \"val2\")));\n  fb.Join();\n  EXPECT_EQ(1, GetDebugInfo().shards_count);\n}\n\nTEST_F(StringFamilyTest, SetWithHashtagsWithHashtagLock) {\n  SetTestFlag(\"cluster_mode\", \"emulated\");\n  SetTestFlag(\"lock_on_hashtags\", \"true\");\n  ResetService();\n\n  auto fb = ExpectUsedKeys({\"key\"});\n  EXPECT_EQ(Run({\"set\", \"{key}1\", \"val1\"}), \"OK\");\n  fb.Join();\n\n  fb = ExpectUsedKeys({\"key\"});\n  EXPECT_EQ(Run({\"set\", \"{key}2\", \"val2\"}), \"OK\");\n  fb.Join();\n\n  fb = ExpectUsedKeys({\"key\"});\n  EXPECT_THAT(Run({\"mget\", \"{key}1\", \"{key}2\"}), RespArray(ElementsAre(\"val1\", \"val2\")));\n  fb.Join();\n  EXPECT_EQ(1, GetDebugInfo().shards_count);\n}\n\nTEST_F(StringFamilyTest, MultiSetWithHashtagsDontLockHashtags) {\n  SetTestFlag(\"cluster_mode\", \"\");\n  SetTestFlag(\"lock_on_hashtags\", \"false\");\n  ResetService();\n\n  auto fb = ExpectUsedKeys({\"{key}1\", \"{key}2\", \"{key}3\"});\n\n  EXPECT_EQ(Run({\"multi\"}), \"OK\");\n  EXPECT_EQ(Run({\"set\", \"{key}1\", \"val1\"}), \"QUEUED\");\n  EXPECT_EQ(Run({\"set\", \"{key}2\", \"val2\"}), \"QUEUED\");\n  EXPECT_EQ(Run({\"eval\", \"return redis.call('set', KEYS[1], 'val3')\", \"1\", \"{key}3\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"exec\"}), RespArray(ElementsAre(\"OK\", \"OK\", \"OK\")));\n  fb.Join();\n}\n\nTEST_F(StringFamilyTest, MultiSetWithHashtagsLockHashtags) {\n  SetTestFlag(\"cluster_mode\", \"emulated\");\n  SetTestFlag(\"lock_on_hashtags\", \"true\");\n  ResetService();\n\n  auto fb = ExpectUsedKeys({\"key\"});\n\n  EXPECT_EQ(Run({\"multi\"}), \"OK\");\n  EXPECT_EQ(Run({\"set\", \"{key}1\", \"val1\"}), \"QUEUED\");\n  EXPECT_EQ(Run({\"set\", \"{key}2\", \"val2\"}), \"QUEUED\");\n  EXPECT_EQ(Run({\"eval\", \"return redis.call('set', KEYS[1], 'val3')\", \"1\", \"{key}3\"}), \"QUEUED\");\n  EXPECT_THAT(Run({\"exec\"}), RespArray(ElementsAre(\"OK\", \"OK\", \"OK\")));\n  fb.Join();\n}\n\nTEST_F(StringFamilyTest, EmptyKeys) {\n  EXPECT_EQ(0, CheckedInt({\"strlen\", \"foo\"}));\n  EXPECT_EQ(Run({\"SUBSTR\", \"foo\", \"0\", \"-1\"}), \"\");\n}\n\nTEST_F(StringFamilyTest, Digest) {\n  // Basic digest computation returns 16-char hex string\n  Run({\"set\", \"key\", \"value\"});\n  auto resp = Run({\"digest\", \"key\"});\n  ASSERT_EQ(resp.type, RespExpr::STRING);\n  string digest = resp.GetString();\n  EXPECT_EQ(\"87d57e269b9df0f0\", digest);\n\n  // Digest of non-existent key returns nil\n  EXPECT_THAT(Run({\"digest\", \"nonexistent\"}), ArgType(RespExpr::NIL));\n\n  // Digest consistency - same value always produces same digest\n  Run({\"set\", \"key1\", \"testvalue\"});\n  Run({\"set\", \"key2\", \"testvalue\"});\n  auto digest1 = Run({\"digest\", \"key1\"});\n  auto digest2 = Run({\"digest\", \"key2\"});\n  EXPECT_EQ(ToSV(digest1.GetBuf()), ToSV(digest2.GetBuf()));\n\n  // Different values produce different digests\n  Run({\"set\", \"key3\", \"different\"});\n  auto digest3 = Run({\"digest\", \"key3\"});\n  EXPECT_NE(ToSV(digest1.GetBuf()), ToSV(digest3.GetBuf()));\n\n  // Works with integer-encoded strings\n  Run({\"set\", \"intkey\", \"123\"});\n  auto int_digest = Run({\"digest\", \"intkey\"});\n  ASSERT_EQ(int_digest.type, RespExpr::STRING);\n  EXPECT_EQ(16, ToSV(int_digest.GetBuf()).size());\n\n  // Works with empty strings\n  Run({\"set\", \"empty\", \"\"});\n  auto empty_digest = Run({\"digest\", \"empty\"});\n  ASSERT_EQ(empty_digest.type, RespExpr::STRING);\n  EXPECT_EQ(16, ToSV(empty_digest.GetBuf()).size());\n\n  // Digest of non-string type returns WRONGTYPE error\n  Run({\"lpush\", \"list\", \"item\"});\n  EXPECT_THAT(Run({\"digest\", \"list\"}), ErrArg(\"WRONGTYPE\"));\n}\n\n// GAT is a memcache-only command. Sending it via Redis RESP protocol should return an error\n// instead of crashing (DCHECK on mc_command()).\nTEST_F(StringFamilyTest, GatViaRedisProtocol) {\n  Run({\"set\", \"key\", \"val\"});\n  auto resp = Run({\"GAT\", \"key\"});\n  EXPECT_THAT(resp, ErrArg(\"memcache-only\"));\n}\n\nTEST_F(StringFamilyTest, MSetNxOddArgs) {\n  auto resp = Run({\"msetnx\", \"key\", \"value\", \"key2\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"mset\", \"key\", \"value\", \"key2\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/string_stats.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/string_stats.h\"\n\n#include <absl/strings/str_cat.h>\n\n#include \"base/logging.h\"\n\nnamespace {\n\nvoid MakeHLL(HllBufferPtr* p) {\n  p->size = getDenseHllSize();\n  p->hll = new uint8_t[p->size];\n  CHECK_EQ(0, createDenseHll(*p));\n}\n\n}  // namespace\n\nnamespace dfly {\n\nusing namespace container_utils;\n\nUniqueStrings::UniqueStrings() {\n  MakeHLL(&counter_);\n}\n\nUniqueStrings::UniqueStrings(UniqueStrings&& other) noexcept\n    : total_count{other.total_count}, total_bytes{other.total_bytes}, counter_{other.counter_} {\n  other.counter_ = HllBufferPtr{};\n}\n\nUniqueStrings& UniqueStrings::operator=(UniqueStrings&& other) noexcept {\n  if (this == &other) {\n    return *this;\n  }\n\n  delete[] counter_.hll;\n  counter_ = other.counter_;\n  total_count = other.total_count;\n  total_bytes = other.total_bytes;\n  other.counter_ = HllBufferPtr{};\n  return *this;\n}\n\nvoid UniqueStrings::AddHMap(const PrimeValue& pv) {\n  // Only adds the keys of a map\n  IterateMap(pv, [&](const ContainerEntry& k, const auto&) { return AddString(k); });\n}\n\nvoid UniqueStrings::AddSet(const PrimeValue& pv) {\n  IterateSet(pv, [&](const ContainerEntry& e) { return AddString(e); });\n}\n\nvoid UniqueStrings::AddList(const PrimeValue& pv) {\n  IterateList(pv, [&](const ContainerEntry& e) { return AddString(e); });\n}\n\nvoid UniqueStrings::AddZSet(const PrimeValue& pv) {\n  IterateSortedSet(pv, [&](const ContainerEntry& e, auto) { return AddString(e); });\n}\n\nvoid UniqueStrings::Add(const UniqueStrings& other) {\n  total_count += other.total_count;\n  total_bytes += other.total_bytes;\n  HllBufferPtr inputs[2] = {other.counter_, counter_};\n  CHECK_EQ(0, pfmerge(inputs, 2, counter_));\n}\n\nstd::string UniqueStrings::ToString(std::string_view label) const {\n  if (total_count == 0)\n    return {};\n  std::string result;\n  absl::StrAppend(&result, label, \":\\n\");\n  absl::StrAppend(&result, \"  total strings: \", total_count, \"\\n\");\n  absl::StrAppend(&result, \"  unique strings: \", UniqueCount(), \"\\n\");\n  absl::StrAppend(&result, \"  total bytes: \", total_bytes, \"\\n\");\n  absl::StrAppend(&result, \"  average length: \", AverageLength(), \"\\n\");\n  absl::StrAppend(&result, \"  estimated savings: \", ByteSavingsOnDedup(), \" bytes\\n\");\n  return result;\n}\n\nbool UniqueStrings::AddString(const ContainerEntry& e) {  // NOLINT must always return true\n  // Count both strings and ints, because ints might be used as keys and will benefit from\n  // deduplication just like strings.\n  if (e.IsString()) {\n    CHECK_NE(-1, pfadd_dense(counter_, reinterpret_cast<const unsigned char*>(e.data()), e.size()));\n    ++total_count;\n    total_bytes += e.size();\n  } else {\n    char buf[absl::numbers_internal::kFastToBufferSize];\n    const char* end = absl::numbers_internal::FastIntToBuffer(e.as_long(), buf);\n    const auto size = end - buf;\n    const int result = pfadd_dense(counter_, reinterpret_cast<const unsigned char*>(buf), size);\n    CHECK_NE(-1, result);\n    ++total_count;\n    total_bytes += size;\n  }\n  return true;\n}\n\nuint64_t UniqueStrings::ByteSavingsOnDedup() const {\n  const auto uniques = UniqueCount();\n  const auto diff = total_count > uniques ? total_count - uniques : 0;\n  return diff * AverageLength();\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/string_stats.h",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\nextern \"C\" {\n#include \"redis/hyperloglog.h\"\n}\n\n#include \"server/container_utils.h\"\n\nnamespace dfly {\n\nstruct UniqueStrings {\n  uint64_t total_count{0};\n  uint64_t total_bytes{0};\n\n  UniqueStrings();\n  ~UniqueStrings() {\n    delete[] counter_.hll;\n  }\n\n  UniqueStrings(const UniqueStrings&) = delete;\n  UniqueStrings& operator=(const UniqueStrings&) = delete;\n\n  // To store in flat hash map\n  UniqueStrings(UniqueStrings&&) noexcept;\n  UniqueStrings& operator=(UniqueStrings&&) noexcept;\n\n  void AddHMap(const PrimeValue& pv);\n  void AddSet(const PrimeValue& pv);\n  void AddList(const PrimeValue& pv);\n  void AddZSet(const PrimeValue& pv);\n\n  void Add(const UniqueStrings& other);\n\n  std::string ToString(std::string_view label) const;\n\n private:\n  HllBufferPtr counter_;\n  bool AddString(const container_utils::ContainerEntry& e);\n\n  uint64_t ByteSavingsOnDedup() const;\n\n  uint64_t UniqueCount() const {\n    return pfcountSingle(counter_);\n  }\n\n  double AverageLength() const {\n    return total_count ? static_cast<double>(total_bytes) / total_count : 0;\n  }\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/string_stats_test.cc",
    "content": "// Copyright 2026, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/string_stats.h\"\n\n#include <absl/strings/numbers.h>\n#include <absl/strings/str_split.h>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\n\nnamespace {\n\nstd::string GetValue(std::string_view row) {\n  static constexpr std::string_view bytes = \" bytes\";\n  auto value = absl::StripAsciiWhitespace(row.substr(row.find(':') + 1));\n  if (value.ends_with(bytes))\n    value.remove_suffix(bytes.length());\n  return {value.begin(), value.end()};\n}\n\n}  // namespace\n\nnamespace dfly {\n\nclass StringStatsTest : public BaseFamilyTest {\n protected:\n  struct ParsedBucket {\n    uint64_t total_strings = 0;\n    uint64_t unique_strings = 0;\n    uint64_t total_bytes = 0;\n    double average_length = 0;\n    uint64_t estimated_savings = 0;\n  };\n\n  static std::optional<ParsedBucket> ParseStats(std::string_view output) {\n    std::vector<std::string_view> rows = absl::StrSplit(output, \"\\n\", absl::SkipWhitespace());\n    for (auto& row : rows)\n      row = absl::StripAsciiWhitespace(row);\n\n    auto it = rows.begin();\n    while (it != rows.end() && !it->starts_with(\"Strings\"))\n      ++it;\n\n    if (it == rows.end())\n      return std::nullopt;\n\n    ParsedBucket bucket;\n    EXPECT_NE(it, rows.end());\n    EXPECT_TRUE(absl::SimpleAtoi(GetValue(*++it), &bucket.total_strings));\n    EXPECT_NE(it, rows.end());\n    EXPECT_TRUE(absl::SimpleAtoi(GetValue(*++it), &bucket.unique_strings));\n    EXPECT_NE(it, rows.end());\n    EXPECT_TRUE(absl::SimpleAtoi(GetValue(*++it), &bucket.total_bytes));\n    EXPECT_NE(it, rows.end());\n    EXPECT_TRUE(absl::SimpleAtod(GetValue(*++it), &bucket.average_length));\n    EXPECT_NE(it, rows.end());\n    EXPECT_TRUE(absl::SimpleAtoi(GetValue(*++it), &bucket.estimated_savings));\n    return bucket;\n  }\n};\n\nTEST_F(StringStatsTest, HashWithDuplicateFields) {\n  for (int i = 0; i < 100; ++i) {\n    Run({\"HSET\", absl::StrCat(\"user:\", i), \"name\", absl::StrCat(\"name_\", i), \"email\",\n         absl::StrCat(\"email_\", i), \"age\", absl::StrCat(20 + i)});\n  }\n\n  const auto resp = Run({\"DEBUG\", \"UNIQ-STRS\"});\n\n  EXPECT_THAT(resp.GetString(), HasSubstr(\"hash\"));\n\n  const auto bucket = ParseStats(resp.GetString());\n  EXPECT_TRUE(bucket.has_value());\n\n  EXPECT_EQ(bucket->total_strings, 300);\n  EXPECT_LE(bucket->unique_strings, 5);\n  EXPECT_GE(bucket->unique_strings, 2);\n  EXPECT_GT(bucket->estimated_savings, 0);\n}\n\nTEST_F(StringStatsTest, SetWithUniqueMembers) {\n  for (int i = 0; i < 10; ++i) {\n    Run({\"SADD\", absl::StrCat(\"set:\", i), absl::StrCat(\"unique_member_\", i, \"_a\"),\n         absl::StrCat(\"unique_member_\", i, \"_b\"), absl::StrCat(\"unique_member_\", i, \"_c\")});\n  }\n\n  const auto resp = Run({\"DEBUG\", \"UNIQ-STRS\"});\n\n  const auto bucket = ParseStats(resp.GetString());\n  EXPECT_TRUE(bucket.has_value());\n\n  EXPECT_EQ(bucket->total_strings, 30);\n  EXPECT_NEAR(bucket->unique_strings, 30, 3);\n  EXPECT_LE(bucket->estimated_savings, bucket->total_bytes * 0.15);\n}\n\nTEST_F(StringStatsTest, SetWithDuplicateMembers) {\n  for (int i = 0; i < 50; ++i) {\n    Run({\"SADD\", absl::StrCat(\"set:\", i), \"alpha\", \"beta\", \"gamma\"});\n  }\n\n  const auto resp = Run({\"DEBUG\", \"UNIQ-STRS\"});\n\n  const auto bucket = ParseStats(resp.GetString());\n  EXPECT_TRUE(bucket.has_value());\n\n  EXPECT_EQ(bucket->total_strings, 150);\n  EXPECT_LE(bucket->unique_strings, 5);\n  EXPECT_GE(bucket->unique_strings, 2);\n  EXPECT_GT(bucket->estimated_savings, 0);\n}\n\nTEST_F(StringStatsTest, MultipleTypes) {\n  for (int i = 0; i < 10; ++i) {\n    Run({\"HSET\", absl::StrCat(\"h:\", i), \"field\", \"value\"});\n    Run({\"SADD\", absl::StrCat(\"s:\", i), \"member\"});\n  }\n\n  const auto resp = Run({\"DEBUG\", \"UNIQ-STRS\"});\n  const std::string output = resp.GetString();\n\n  EXPECT_THAT(output, HasSubstr(\"hash\"));\n  EXPECT_THAT(output, HasSubstr(\"set\"));\n}\n\nTEST_F(StringStatsTest, EmptyDatabase) {\n  const auto resp = Run({\"DEBUG\", \"UNIQ-STRS\"});\n  const std::string output = resp.GetString();\n\n  EXPECT_THAT(output, HasSubstr(\"___begin unique string stats___\"));\n  EXPECT_THAT(output, HasSubstr(\"___end unique string stats___\"));\n\n  auto bucket = ParseStats(output);\n  EXPECT_FALSE(bucket.has_value());\n}\n\nTEST_F(StringStatsTest, NumberKeys) {\n  for (int i = 0; i < 100; ++i) {\n    Run({\"LPUSH\", absl::StrCat(\"h:\", i), \"007\", \"value\"});\n  }\n\n  const auto resp = Run({\"DEBUG\", \"UNIQ-STRS\"});\n  const std::string output = resp.GetString();\n\n  EXPECT_THAT(output, HasSubstr(\"list\"));\n  const auto bucket = ParseStats(output);\n  EXPECT_TRUE(bucket.has_value());\n\n  EXPECT_EQ(bucket->total_strings, 200);\n  EXPECT_EQ(bucket->unique_strings, 2);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/synchronization.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/synchronization.h\"\n\n#include \"base/logging.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/server_state.h\"\n\nnamespace dfly {\n\nThreadLocalMutex::ThreadLocalMutex() {\n  shard_ = EngineShard::tlocal();\n}\n\nThreadLocalMutex::~ThreadLocalMutex() {\n  DCHECK_EQ(EngineShard::tlocal(), shard_);\n}\n\nvoid ThreadLocalMutex::lock() {\n  if (ServerState::tlocal()->serialization_max_chunk_size != 0) {\n    DCHECK_EQ(EngineShard::tlocal(), shard_);\n    util::fb2::NoOpLock noop_lk_;\n    if (locked_fiber_ != nullptr) {\n      DCHECK(util::fb2::detail::FiberActive() != locked_fiber_);\n    }\n    cond_var_.wait(noop_lk_, [this]() { return !flag_; });\n    flag_ = true;\n    DCHECK_EQ(locked_fiber_, nullptr);\n    locked_fiber_ = util::fb2::detail::FiberActive();\n  }\n}\n\nvoid ThreadLocalMutex::unlock() {\n  if (ServerState::tlocal()->serialization_max_chunk_size != 0) {\n    DCHECK_EQ(EngineShard::tlocal(), shard_);\n    flag_ = false;\n    cond_var_.notify_one();\n    locked_fiber_ = nullptr;\n  }\n}\n\nvoid LocalLatch::unlock() {\n  DCHECK_GT(mutating_, 0u);\n  --mutating_;\n  if (mutating_ == 0) {\n    cond_var_.notify_all();\n  }\n}\n\nvoid LocalLatch::Wait() {\n  util::fb2::NoOpLock noop_lk_;\n  cond_var_.wait(noop_lk_, [this]() { return mutating_ == 0; });\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/synchronization.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include \"util/fibers/fibers.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly {\n\nclass EngineShard;\n\n// Helper class used to guarantee atomicity between serialization of buckets\nclass ABSL_LOCKABLE ThreadLocalMutex {\n public:\n  ThreadLocalMutex();\n  ~ThreadLocalMutex();\n\n  void lock() ABSL_EXCLUSIVE_LOCK_FUNCTION();\n  void unlock() ABSL_UNLOCK_FUNCTION();\n  bool is_locked() const {\n    return flag_;\n  }\n\n private:\n  EngineShard* shard_;\n  util::fb2::CondVarAny cond_var_;\n  bool flag_ = false;\n  util::fb2::detail::FiberInterface* locked_fiber_{nullptr};\n};\n\n// Replacement of std::SharedLock that allows -Wthread-safety\ntemplate <typename Mutex> class ABSL_SCOPED_LOCKABLE SharedLock {\n public:\n  explicit SharedLock(Mutex& m) ABSL_EXCLUSIVE_LOCK_FUNCTION(m) : m_(m) {\n    m_.lock_shared();\n    is_locked_ = true;\n  }\n\n  ~SharedLock() ABSL_UNLOCK_FUNCTION() {\n    if (is_locked_) {\n      m_.unlock_shared();\n    }\n  }\n\n  void unlock() ABSL_UNLOCK_FUNCTION() {\n    m_.unlock_shared();\n    is_locked_ = false;\n  }\n\n private:\n  Mutex& m_;\n  bool is_locked_;\n};\n\n// A single threaded latch that passes a waiter fiber if its count is 0.\n// Fibers that increase/decrease the count do not wait on the latch.\nclass LocalLatch {\n public:\n  void lock() {\n    ++mutating_;\n  }\n\n  void unlock();\n\n  void Wait();\n\n  bool IsBlocked() const {\n    return mutating_ > 0;\n  }\n\n private:\n  util::fb2::CondVarAny cond_var_;\n  size_t mutating_ = 0;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/table.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/table.h\"\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/top_keys.h\"\n#include \"server/cluster_support.h\"\n#include \"server/server_state.h\"\n\nusing namespace std;\nnamespace dfly {\n#define ADD(x) (x) += o.x\n\n// It should be const, but we override this variable in our tests so that they run faster.\nunsigned kInitSegmentLog = 3;\n\nvoid DbTableStats::AddTypeMemoryUsage(unsigned type, int64_t delta) {\n  if (type >= memory_usage_by_type.size()) {\n    LOG(DFATAL) << \"Encountered unknown type when aggregating per-type memory: \" << type;\n    return;\n  }\n\n  DCHECK_GE(obj_memory_usage, memory_usage_by_type[type]);\n\n  if (delta < 0 && memory_usage_by_type[type] < size_t(-delta)) {\n    LOG_EVERY_T(ERROR, 1) << \"Encountered underflow memory usage when aggregating per-type memory: \"\n                          << obj_memory_usage << \" + \" << delta << \", type: \" << type;\n\n    // Truncate delta to avoid underflow, but keep the memory usage consistent with the sum of\n    // per-type usage.\n    delta = -static_cast<int64_t>(memory_usage_by_type[type]);\n  }\n\n  obj_memory_usage += delta;\n  memory_usage_by_type[type] += delta;\n}\n\nDbTableStats& DbTableStats::operator+=(const DbTableStats& o) {\n  constexpr size_t kDbSz = sizeof(DbTableStats) - sizeof(memory_usage_by_type);\n  static_assert(kDbSz == 72);\n\n  ADD(inline_keys);\n  ADD(expire_count);\n  ADD(obj_memory_usage);\n  ADD(tiered_entries);\n  ADD(tiered_used_bytes);\n  ADD(events.hits);\n  ADD(events.misses);\n  ADD(events.expired_keys);\n  ADD(events.evicted_keys);\n\n  for (size_t i = 0; i < o.memory_usage_by_type.size(); ++i) {\n    memory_usage_by_type[i] += o.memory_usage_by_type[i];\n  }\n\n  return *this;\n}\n\nSlotStats& SlotStats::operator+=(const SlotStats& o) {\n  static_assert(sizeof(SlotStats) == 32);\n\n  ADD(key_count);\n  ADD(total_reads);\n  ADD(total_writes);\n  ADD(memory_bytes);\n  return *this;\n}\n\nstd::optional<const IntentLock> LockTable::Find(LockTag tag) const {\n  LockFp fp = tag.Fingerprint();\n  if (auto it = locks_.find(fp); it != locks_.end())\n    return it->second;\n  return std::nullopt;\n}\n\nstd::optional<const IntentLock> LockTable::Find(uint64_t fp) const {\n  if (auto it = locks_.find(fp); it != locks_.end())\n    return it->second;\n  return std::nullopt;\n}\n\nvoid LockTable::Release(uint64_t fp, IntentLock::Mode mode) {\n  auto it = locks_.find(fp);\n  DCHECK(it != locks_.end()) << fp;\n\n  it->second.Release(mode);\n  if (it->second.IsFree())\n    locks_.erase(it);\n}\n\n[[maybe_unused]] constexpr size_t kSzTable = sizeof(DbTable);\n\nDbTable::SampleTopKeys::~SampleTopKeys() {\n  delete top_keys;\n}\n\nDbTable::SampleUniqueKeys::~SampleUniqueKeys() {\n  delete[] dense_hll;\n}\n\nDbTable::DbTable(PMR_NS::memory_resource* mr, DbIndex db_index)\n    : prime(kInitSegmentLog, detail::PrimeTablePolicy{}, mr),\n      mcflag(0, detail::ExpireTablePolicy{}, mr),\n      index(db_index) {\n  if (IsClusterEnabled()) {\n    slots_stats.reset(new SlotStats[kMaxSlotNum + 1]);\n  }\n  thread_index = ServerState::tlocal()->thread_index();\n}\n\nDbTable::~DbTable() {\n  DCHECK_EQ(thread_index, ServerState::tlocal()->thread_index());\n  delete sample_top_keys;\n  delete sample_unique_keys;\n}\n\nvoid DbTable::Clear() {\n  prime.size();\n  prime.Clear();\n  mcflag.Clear();\n  stats = DbTableStats{};\n}\n\nPrimeIterator DbTable::Launder(PrimeIterator it, string_view key) {\n  if (!it.IsOccupied() || it->first != key) {\n    it = prime.Find(key);\n  }\n  return it;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/table.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <boost/smart_ptr/intrusive_ptr.hpp>\n#include <boost/smart_ptr/intrusive_ref_counter.hpp>\n\n#include \"core/expire_period.h\"\n#include \"core/intent_lock.h\"\n#include \"server/detail/table.h\"\n#include \"server/tx_base.h\"\n\nextern \"C\" {\n#include \"redis/redis_aux.h\"\n}\nnamespace base {\nclass Histogram;\n}\n\nnamespace dfly {\n\nusing PrimeKey = detail::PrimeKey;\nusing PrimeValue = detail::PrimeValue;\n\nusing PrimeTable = DashTable<PrimeKey, PrimeValue, detail::PrimeTablePolicy>;\nusing ExpireTable = DashTable<PrimeKey, ExpirePeriod, detail::ExpireTablePolicy>;\n\n/// Iterators are invalidated when new keys are added to the table or some entries are deleted.\n/// Iterators are still valid if a different entry in the table was mutated.\nusing PrimeIterator = PrimeTable::iterator;\nusing PrimeConstIterator = PrimeTable::const_iterator;\nusing ExpireIterator = ExpireTable::iterator;\nusing ExpireConstIterator = ExpireTable::const_iterator;\n\nclass TopKeys;\n\ninline bool IsValid(PrimeIterator it) {\n  return !it.is_done();\n}\n\ninline bool IsValid(ExpireIterator it) {\n  return !it.is_done();\n}\n\ninline bool IsValid(PrimeConstIterator it) {\n  return !it.is_done();\n}\n\ninline bool IsValid(ExpireConstIterator it) {\n  return !it.is_done();\n}\n\nstruct SlotStats {\n  uint64_t key_count = 0;\n  uint64_t total_reads = 0;\n  uint64_t total_writes = 0;\n  uint64_t memory_bytes = 0;\n  SlotStats& operator+=(const SlotStats& o);\n};\n\nstruct DbTableStats {\n  // Number of inline keys.\n  uint64_t inline_keys = 0;\n\n  // number of keys with ttls set.\n  uint64_t expire_count = 0;\n\n  // Object memory usage besides hash-table capacity.\n  // Applies for any non-inline objects.\n  size_t obj_memory_usage = 0;\n\n  size_t tiered_entries = 0;\n  size_t tiered_used_bytes = 0;\n\n  struct {\n    // Per-database hits/misses on keys\n    size_t hits = 0;\n    size_t misses = 0;\n\n    // Per-database expired/evicted keys\n    size_t expired_keys = 0;\n    size_t evicted_keys = 0;\n  } events;\n\n  std::array<size_t, OBJ_TYPE_MAX> memory_usage_by_type = {};\n\n  // Mostly used internally, exposed for tiered storage.\n  void AddTypeMemoryUsage(unsigned type, int64_t delta);\n\n  DbTableStats& operator+=(const DbTableStats& o);\n};\n\n// Table for recording locks. Keys used with the lock table should be normalized with LockTag.\nclass LockTable {\n public:\n  size_t Size() const {\n    return locks_.size();\n  }\n  std::optional<const IntentLock> Find(LockTag tag) const;\n  std::optional<const IntentLock> Find(LockFp fp) const;\n\n  bool Acquire(LockFp fp, IntentLock::Mode mode) {\n    return locks_[fp].Acquire(mode);\n  }\n\n  void Release(LockFp fp, IntentLock::Mode mode);\n\n  auto begin() const {\n    return locks_.cbegin();\n  }\n\n  auto end() const {\n    return locks_.cend();\n  }\n\n private:\n  // We use fingerprinting before accessing locks - no need to mix more.\n  struct Hasher {\n    size_t operator()(LockFp val) const {\n      return val;\n    }\n  };\n  absl::flat_hash_map<LockFp, IntentLock, Hasher> locks_;\n};\n\n// A single Db table that represents a table that can be chosen with \"SELECT\" command.\nstruct DbTable : boost::intrusive_ref_counter<DbTable, boost::thread_unsafe_counter> {\n  PrimeTable prime;\n  // ExpireTable expire;  // TTL is now embedded in CompactKey via SDS_TTL_TAG.\n  DashTable<PrimeKey, uint32_t, detail::ExpireTablePolicy> mcflag;\n\n  // Contains transaction locks\n  LockTable trans_locks;\n\n  // Stores a list of dependant dirty flags for each watched key.\n  absl::flat_hash_map<std::string, std::vector<std::atomic_bool*>> watched_keys;\n\n  // Keyspace notifications: list of expired keys since last batch of messages was published.\n  mutable std::vector<std::string> expired_keys_events_;\n\n  mutable DbTableStats stats;\n  std::unique_ptr<SlotStats[]> slots_stats;\n  PrimeTable::Cursor expire_cursor;\n\n  struct SampleTopKeys {\n    TopKeys* top_keys = nullptr;\n    uint64_t total_samples = 0;\n\n    SampleTopKeys() = default;\n    ~SampleTopKeys();\n    void operator=(const SampleTopKeys& other) = delete;\n    SampleTopKeys(const SampleTopKeys& other) = delete;\n  };\n  SampleTopKeys* sample_top_keys = nullptr;\n\n  struct SampleUniqueKeys {\n    uint8_t* dense_hll = nullptr;\n    uint64_t total_samples = 0;\n\n    SampleUniqueKeys() = default;\n    ~SampleUniqueKeys();\n\n    void operator=(const SampleUniqueKeys& other) = delete;\n    SampleUniqueKeys(const SampleUniqueKeys& other) = delete;\n  };\n  SampleUniqueKeys* sample_unique_keys = nullptr;\n  base::Histogram* sample_values_hist = nullptr;\n\n  DbIndex index;\n  uint32_t thread_index;\n\n  explicit DbTable(PMR_NS::memory_resource* mr, DbIndex index);\n  ~DbTable();\n\n  void Clear();\n  PrimeIterator Launder(PrimeIterator it, std::string_view key);\n\n  size_t table_memory() const {\n    return prime.mem_usage();\n  }\n};\n\n// We use reference counting semantics of DbTable when doing snapshotting.\n// There we need to preserve the copy of the table in case someone flushes it during\n// the snapshot process. We copy the pointers in StartSnapshotInShard function.\nusing DbTableArray = std::vector<boost::intrusive_ptr<DbTable>>;\n\n// ChangeReq - describes the change to the table.\nstruct ChangeReq {\n  // If iterator is set then it's an update to the existing bucket.\n  // Otherwise (string_view is set) then it's a new key that is going to be added to the table.\n  std::variant<PrimeTable::bucket_iterator, std::string_view> change;\n\n  explicit ChangeReq(PrimeTable::bucket_iterator it) : change(it) {\n  }\n  explicit ChangeReq(std::string_view key) : change(key) {\n  }\n\n  const PrimeTable::bucket_iterator* update() const {\n    return std::get_if<PrimeTable::bucket_iterator>(&change);\n  }\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/test_utils.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/test_utils.h\"\n\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/acl/acl_family.h\"\n#include \"util/fibers/fibers.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\n#include <absl/flags/reflection.h>\n#include <absl/strings/match.h>\n#include <absl/strings/str_split.h>\n#include <mimalloc.h>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"base/stl_util.h\"\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/reply_builder.h\"\n#include \"io/file_util.h\"\n#include \"server/acl/acl_log.h\"\n#include \"util/fibers/pool.h\"\n\nusing namespace std;\n\nABSL_DECLARE_FLAG(string, dbfilename);\nABSL_DECLARE_FLAG(double, rss_oom_deny_ratio);\nABSL_DECLARE_FLAG(uint32_t, num_shards);\nABSL_FLAG(bool, force_epoll, false, \"If true, uses epoll api instead iouring to run tests\");\nABSL_DECLARE_FLAG(uint32_t, acllog_max_len);\nABSL_DECLARE_FLAG(bool, enable_heartbeat_rss_eviction);\n\nnamespace dfly {\n\nnamespace {\n\n// Default stack size for fibers. We decrease it by 16 bytes because some allocators\n// need additional 8-16 bytes for their internal structures, thus over reserving additional\n// memory pages if using round sizes.\n#ifdef NDEBUG\nconstexpr size_t kFiberDefaultStackSize = 32_KB - 16;\n#elif defined SANITIZERS\n// Increase stack size for sanitizers builds.\nconstexpr size_t kFiberDefaultStackSize = 64_KB - 16;\n#else\n// Increase stack size for debug builds.\nconstexpr size_t kFiberDefaultStackSize = 50_KB - 16;\n#endif\n\n}  // namespace\n\nstd::ostream& operator<<(std::ostream& os, const DbStats& stats) {\n  os << \"keycount: \" << stats.key_count << \", tiered_size: \" << stats.tiered_used_bytes\n     << \", tiered_entries: \" << stats.tiered_entries << \"\\n\";\n\n  return os;\n}\n\nextern unsigned kInitSegmentLog;\n\nusing MP = MemcacheParser;\nusing namespace util;\nusing namespace testing;\n\nstatic vector<string> SplitLines(const std::string& src) {\n  vector<string> res = absl::StrSplit(src, \"\\r\\n\");\n  if (res.back().empty())\n    res.pop_back();\n  for (auto& v : res) {\n    absl::StripAsciiWhitespace(&v);\n  }\n  return res;\n}\n\nTestConnection::TestConnection(facade::ServiceInterface* si, Protocol protocol)\n    : facade::Connection(protocol, nullptr, nullptr, si) {\n  cc_.reset(new dfly::ConnectionContext(this, {}));\n  static_cast<dfly::ConnectionContext*>(cc_.get())->skip_acl_validation = true;\n  SetSocket(ProactorBase::me()->CreateSocket());\n  OnConnectionStart();\n}\n\nvoid TestConnection::SendPubMessageAsync(PubMessage pmsg) {\n  messages.push_back(std::move(pmsg));\n}\n\nvoid TestConnection::SendInvalidationMessageAsync(InvalidationMessage msg) {\n  invalidate_messages.push_back(std::move(msg));\n}\n\nstd::string TestConnection::RemoteEndpointStr() const {\n  return \"\";\n}\n\nvoid TransactionSuspension::Start() {\n  static CommandId cid{\"TEST\", CO::JOURNALED | CO::GLOBAL_TRANS, -1, 0, 0, acl::NONE};\n\n  transaction_ = new dfly::Transaction{&cid};\n\n  auto st = transaction_->InitByArgs(&namespaces->GetDefaultNamespace(), 0, {});\n  CHECK_EQ(st, OpStatus::OK);\n\n  transaction_->Execute([](Transaction* t, EngineShard* shard) { return OpStatus::OK; }, false);\n}\n\nvoid TransactionSuspension::Terminate() {\n  transaction_->Conclude();\n  transaction_ = nullptr;\n}\n\nclass BaseFamilyTest::TestConnWrapper {\n public:\n  TestConnWrapper(facade::ServiceInterface* si, Protocol proto);\n  ~TestConnWrapper();\n\n  CmdArgVec Args(ArgSlice list);\n\n  RespVec ParseResponse(bool fully_consumed);\n\n  // returns: type(pmessage), pattern, channel, message.\n  const facade::Connection::PubMessage& GetPubMessage(size_t index) const;\n\n  const facade::Connection::InvalidationMessage& GetInvalidationMessage(size_t index) const;\n\n  ConnectionContext* cmd_cntx() {\n    auto cntx = static_cast<ConnectionContext*>(dummy_conn_->cntx());\n    cntx->ns = &namespaces->GetDefaultNamespace();\n    return cntx;\n  }\n\n  StringVec SplitLines() const {\n    return dfly::SplitLines(sink_.str());\n  }\n\n  void ClearSink() {\n    sink_.Clear();\n    expr_builder_.Clear();\n  }\n\n  TestConnection* conn() {\n    return dummy_conn_.get();\n  }\n\n  SinkReplyBuilder* builder() {\n    return builder_.get();\n  }\n\n private:\n  ::io::StringSink sink_;  // holds the response blob\n\n  std::unique_ptr<TestConnection> dummy_conn_;\n\n  std::vector<std::unique_ptr<std::string>> tmp_str_vec_;\n\n  RespExprBuilder expr_builder_;\n  std::unique_ptr<SinkReplyBuilder> builder_;\n};\n\nBaseFamilyTest::TestConnWrapper::TestConnWrapper(facade::ServiceInterface* si, Protocol proto)\n    : dummy_conn_(new TestConnection(si, proto)) {\n  switch (proto) {\n    case Protocol::REDIS:\n      builder_.reset(new RedisReplyBuilder{&sink_});\n      break;\n    case Protocol::MEMCACHE:\n      builder_.reset(new MCReplyBuilder{&sink_});\n      break;\n  }\n}\n\nBaseFamilyTest::TestConnWrapper::~TestConnWrapper() {\n}\n\nBaseFamilyTest::BaseFamilyTest() {\n}\n\nBaseFamilyTest::~BaseFamilyTest() {\n  for (auto* v : resp_vec_)\n    delete v;\n}\n\nvoid BaseFamilyTest::SetUpTestSuite() {\n  kInitSegmentLog = 1;\n\n  absl::SetFlag(&FLAGS_rss_oom_deny_ratio, -1);\n  absl::SetFlag(&FLAGS_dbfilename, \"\");\n  // We don't want rss eviction\n  absl::SetFlag(&FLAGS_enable_heartbeat_rss_eviction, false);\n\n  static bool init = true;\n  if (exchange(init, false)) {\n    fb2::SetDefaultStackResource(&fb2::std_malloc_resource, kFiberDefaultStackSize);\n  }\n\n  init_zmalloc_threadlocal(mi_heap_get_backing());\n\n  // TODO: go over all env variables starting with FLAGS_ and make sure they are in the below list.\n  static constexpr const char* kEnvFlags[] = {\n      \"cluster_mode\",\n      \"lock_on_hashtags\",\n      \"force_epoll\",\n  };\n  for (string_view flag : kEnvFlags) {\n    const char* value = getenv(absl::StrCat(\"FLAGS_\", flag).data());\n    if (value != nullptr) {\n      SetTestFlag(flag, value);\n    }\n  }\n}\n\nvoid BaseFamilyTest::SetUp() {\n  max_memory_limit = INT_MAX;\n  ResetService();\n}\n\nvoid BaseFamilyTest::TearDown() {\n  CHECK_EQ(NumLocked(), 0U);\n\n  {\n    std::unique_lock conn_lck{mu_};\n    connections_.clear();\n  }\n\n  ShutdownService();\n\n  const TestInfo* const test_info = UnitTest::GetInstance()->current_test_info();\n  LOG(INFO) << \"Finishing \" << test_info->name();\n}\n\nvoid BaseFamilyTest::ResetService() {\n  if (service_ != nullptr) {\n    TEST_InvalidateLockTagOptions();\n\n    ShutdownService();\n  }\n\n#ifdef __linux__\n  if (absl::GetFlag(FLAGS_force_epoll)) {\n    pp_.reset(fb2::Pool::Epoll(num_threads_));\n  } else {\n    pp_.reset(fb2::Pool::IOUring(16, num_threads_));\n  }\n#else\n  pp_.reset(fb2::Pool::Epoll(num_threads_));\n#endif\n\n  // Using a different default than production could expose bugs\n  if (absl::GetFlag(FLAGS_num_shards) == 0) {\n    absl::SetFlag(&FLAGS_num_shards, num_threads_ - 1);\n  }\n  pp_->Run();\n  service_ = std::make_unique<Service>(pp_.get());\n\n  // Must be reset before starting the service. Engine shard heartbeat task updates this\n  // value, and if reset after some invocations of heartbeat have run, the accumulated data is\n  // lost and can cause test failure.\n  used_mem_current = 0;\n  service_->Init(nullptr, {});\n\n  TEST_current_time_ms = absl::GetCurrentTimeNanos() / 1000000;\n  auto default_ns = &namespaces->GetDefaultNamespace();\n  auto cb = [&](EngineShard* s) {\n    default_ns->GetDbSlice(s->shard_id()).UpdateExpireBase(TEST_current_time_ms - 1000, 0);\n  };\n  shard_set->RunBriefInParallel(cb);\n\n  const TestInfo* const test_info = UnitTest::GetInstance()->current_test_info();\n  LOG(INFO) << \"Starting \" << test_info->name();\n\n  watchdog_fiber_ = pp_->GetNextProactor()->LaunchFiber([this] {\n    ThisFiber::SetName(\"Watchdog\");\n\n    if (!watchdog_done_.WaitFor(20s)) {\n      LOG(ERROR) << \"Deadlock detected!!!!\";\n      absl::SetFlag(&FLAGS_alsologtostderr, true);\n      fb2::Mutex m;\n      shard_set->pool()->AwaitFiberOnAll([&m, this](unsigned index, ProactorBase* base) {\n        ThisFiber::SetName(\"Watchdog\");\n        std::unique_lock lk(m);\n        LOG(ERROR) << \"Proactor \" << index << \":\\n\";\n        fb2::detail::FiberInterface::PrintAllFiberStackTraces();\n        EngineShard* es = EngineShard::tlocal();\n\n        if (es != nullptr) {\n          TxQueue* txq = es->txq();\n          if (!txq->Empty()) {\n            LOG(ERROR) << \"TxQueue for shard \" << es->shard_id();\n\n            auto head = txq->Head();\n            auto it = head;\n            do {\n              Transaction* trans = std::get<Transaction*>(es->txq()->At(it));\n              LOG(ERROR) << \"Transaction \" << trans->DebugId(es->shard_id());\n              it = txq->Next(it);\n            } while (it != head);\n          }\n\n          LOG(ERROR) << \"TxLocks for shard \" << es->shard_id();\n          for (const auto& k_v : namespaces->GetDefaultNamespace()\n                                     .GetDbSlice(es->shard_id())\n                                     .GetDBTable(0)\n                                     ->trans_locks) {\n            LOG(ERROR) << \"Key \" << k_v.first << \" \" << k_v.second;\n          }\n\n          LOG(ERROR) << \"Transaction for shard \" << es->shard_id();\n          std::unique_lock conn_lck{mu_};\n          for (auto& conn : connections_) {\n            auto* context = conn.second->cmd_cntx();\n            if (context->transaction && context->transaction->IsScheduled() &&\n                context->transaction->IsActive(es->shard_id())) {\n              LOG(ERROR) << context->transaction->DebugId(es->shard_id());\n            }\n          }\n        }\n      });\n    }\n  });\n}\n\nvoid BaseFamilyTest::ShutdownService() {\n  if (service_ == nullptr) {\n    return;\n  }\n\n  // Don't save files during shutdown\n  CleanupSnapshots();\n  absl::SetFlag(&FLAGS_dbfilename, \"\");\n\n  service_->Shutdown();\n  service_.reset();\n\n  // Stop the watchdog before shutting down the service, because shutdown tears down namespaces\n  // which the watchdog's diagnostic code may access. Must run before we delete shard_set as\n  // the watchdog accesses it.\n  watchdog_done_.Notify();\n  watchdog_fiber_.Join();\n\n  delete shard_set;\n  shard_set = nullptr;\n\n  pp_->Stop();\n}\n\nvoid BaseFamilyTest::InitWithDbFilename() {\n  ShutdownService();\n\n  absl::SetFlag(&FLAGS_dbfilename, \"rdbtestdump\");\n  CleanupSnapshots();\n  ResetService();\n}\n\nvoid BaseFamilyTest::CleanupSnapshots() {\n  string dbfilename = absl::GetFlag(FLAGS_dbfilename);\n  if (dbfilename.empty())\n    return;\n\n  auto rdb_files = io::StatFiles(absl::StrCat(dbfilename, \"*\"));\n  CHECK(rdb_files);\n  for (const auto& fl : *rdb_files) {\n    unlink(fl.name.c_str());\n  }\n}\n\nunsigned BaseFamilyTest::NumLocked() {\n  atomic_uint count = 0;\n  auto default_ns = &namespaces->GetDefaultNamespace();\n  shard_set->RunBriefInParallel([&](EngineShard* shard) {\n    for (const auto& db : default_ns->GetDbSlice(shard->shard_id()).databases()) {\n      if (db == nullptr) {\n        continue;\n      }\n      count += db->trans_locks.Size();\n    }\n  });\n  return count;\n}\n\nvoid BaseFamilyTest::ClearMetrics() {\n  shard_set->pool()->AwaitBrief([](unsigned, auto*) {\n    ServerState::tlocal()->stats = ServerState::Stats(shard_set->size());\n  });\n}\n\nstring BaseFamilyTest::FormatMetrics(const Metrics& metrics) const {\n  return service_->server_family().FormatInfoMetrics(metrics, \"ALL\", true);\n}\n\nvoid BaseFamilyTest::WaitUntilLocked(DbIndex db_index, string_view key, double timeout) {\n  auto step = 50us;\n  auto timeout_micro = chrono::duration_cast<chrono::microseconds>(1000ms * timeout);\n  int64_t steps = timeout_micro.count() / step.count();\n  do {\n    ThisFiber::SleepFor(step);\n  } while (!IsLocked(db_index, key) && --steps > 0);\n  CHECK(IsLocked(db_index, key));\n}\n\nbool BaseFamilyTest::WaitUntilCondition(std::function<bool()> condition_cb,\n                                        std::chrono::milliseconds timeout_ms) {\n  auto step = 50us;\n  auto timeout_micro = chrono::duration_cast<chrono::microseconds>(timeout_ms);\n  int64_t steps = timeout_micro.count() / step.count();\n  do {\n    ThisFiber::SleepFor(step);\n  } while (!condition_cb() && --steps > 0);\n  return condition_cb();\n}\n\nRespExpr BaseFamilyTest::Run(ArgSlice list) {\n  if (!ProactorBase::IsProactorThread()) {\n    return pp_->at(0)->Await([&] {\n      ThisFiber::SetName(\"Test::Run\");\n      return this->Run(list);\n    });\n  }\n\n  return Run(GetId(), list);\n}\n\nRespExpr BaseFamilyTest::Run(std::string_view command) {\n  std::vector<std::string_view> command_list = absl::StrSplit(command, ' ');\n  return Run(command_list);\n}\n\nRespExpr BaseFamilyTest::RunPrivileged(std::initializer_list<const std::string_view> list) {\n  if (!ProactorBase::IsProactorThread()) {\n    return pp_->at(0)->Await([&] { return this->RunPrivileged(list); });\n  }\n  string id = GetId();\n  TestConnWrapper* conn_wrapper = AddFindConn(Protocol::REDIS, id);\n  // Before running the command set the connection as admin connection\n  conn_wrapper->conn()->SetPrivileged(true);\n  auto res = Run(id, ArgSlice{list.begin(), list.size()});\n  // After running the command set the connection as non admin connection\n  // because the connction is returned to the poll. This way the next call to Run from the same\n  // thread will not have the connection set as admin.\n  conn_wrapper->conn()->SetPrivileged(false);\n  return res;\n}\n\nRespExpr BaseFamilyTest::Run(absl::Span<const std::string> span) {\n  vector<string_view> sv_vec(span.size());\n  for (unsigned i = 0; i < span.size(); ++i) {\n    sv_vec[i] = span[i];\n  }\n  return Run(sv_vec);\n}\n\nRespExpr BaseFamilyTest::Run(std::string_view id, ArgSlice slice) {\n  if (!ProactorBase::IsProactorThread()) {\n    return pp_->at(0)->Await([&] { return this->Run(id, slice); });\n  }\n\n  TestConnWrapper* conn_wrapper = AddFindConn(Protocol::REDIS, id);\n\n  CmdArgVec args = conn_wrapper->Args(slice);\n\n  ConnectionContext* context = conn_wrapper->cmd_cntx();\n  context->ns = &namespaces->GetDefaultNamespace();\n\n  DCHECK(context->transaction == nullptr) << id;\n  CommandContext cmd_cntx;\n  cmd_cntx.Init(conn_wrapper->builder(), context);\n  cmd_cntx.Assign(args.begin(), args.end(), args.size());\n  service_->DispatchCommand(ParsedArgs{cmd_cntx}, &cmd_cntx, AsyncPreference::ONLY_SYNC);\n\n  DCHECK(context->transaction == nullptr);\n\n  auto cmd = absl::AsciiStrToUpper(slice.front());\n  if (cmd == \"EVAL\" || cmd == \"EVALSHA\" || cmd == \"EVAL_RO\" || cmd == \"EVALSHA_RO\" ||\n      cmd == \"EXEC\") {\n    shard_set->AwaitRunningOnShardQueue([](auto*) {});  // Wait for async UnlockMulti.\n  }\n\n  unique_lock lk(mu_);\n  last_cmd_dbg_info_ = context->last_command_debug;\n\n  RespVec vec = conn_wrapper->ParseResponse(single_response_);\n  if (vec.size() == 1)\n    return vec.front();\n  RespVec* new_vec = new RespVec(vec);\n  resp_vec_.push_back(new_vec);\n  RespExpr e;\n  e.type = RespExpr::ARRAY;\n  e.u = new_vec;\n\n  return e;\n}\n\nvoid BaseFamilyTest::RunMany(const std::vector<std::vector<std::string>>& cmds) {\n  if (!ProactorBase::IsProactorThread()) {\n    return pp_->at(0)->Await([&] { return this->RunMany(cmds); });\n  }\n  TestConnWrapper* conn_wrapper = AddFindConn(Protocol::REDIS, GetId());\n  auto* context = conn_wrapper->cmd_cntx();\n  context->ns = &namespaces->GetDefaultNamespace();\n  vector<cmn::BackedArguments> backed_args_vec(cmds.size());\n  for (size_t i = 0; i < cmds.size(); ++i) {\n    backed_args_vec[i] = cmn::BackedArguments(cmds[i].begin(), cmds[i].end(), cmds[i].size());\n  }\n  auto next_fn = [it = backed_args_vec.begin()]() mutable {\n    ParsedArgs args(*it);\n    ++it;\n    return args;\n  };\n  service_->DispatchManyCommands(next_fn, cmds.size(), conn_wrapper->builder(), context);\n  DCHECK(context->transaction == nullptr);\n}\n\nauto BaseFamilyTest::RunMC(MP::CmdType cmd_type, string_view key, MCArgs args) -> MCResponse {\n  if (!ProactorBase::IsProactorThread()) {\n    return pp_->at(0)->Await([&] { return this->RunMC(cmd_type, key, args); });\n  }\n\n  TestConnWrapper* conn = AddFindConn(Protocol::MEMCACHE, GetId());\n\n  CommandContext cmd_cntx{conn->builder(), conn->cmd_cntx()};\n  cmd_cntx.ConfigureMCExtension(true);\n  auto& cmd = *cmd_cntx.mc_command();\n  cmd.type = cmd_type;\n\n  string_view kv[2] = {key, args.value};\n  unsigned num_args = MP::IsStoreCmd(cmd_type) ? 2 : 1;\n  cmd_cntx.Assign(kv, kv + num_args, num_args);\n  cmd.flags = args.val_flags;\n  cmd.expire_ts = args.ttl.count();\n  cmd.delta = args.delta;\n  if (cmd.type >= MP::GET && cmd.type <= MP::GATS) {\n    cmd.cmd_flags.return_value = true;\n    cmd.cmd_flags.return_flags = true;\n    cmd.cmd_flags.return_cas = (cmd.type == MP::GETS || cmd.type == MP::GATS);\n  }\n  auto* context = conn->cmd_cntx();\n\n  DCHECK(context->transaction == nullptr);\n\n  service_->DispatchMC(&cmd_cntx, AsyncPreference::ONLY_SYNC);\n\n  DCHECK(context->transaction == nullptr);\n\n  return conn->SplitLines();\n}\n\nauto BaseFamilyTest::RunMC(MP::CmdType cmd_type, std::string_view key) -> MCResponse {\n  if (!ProactorBase::IsProactorThread()) {\n    return pp_->at(0)->Await([&] { return this->RunMC(cmd_type, key, MCArgs{}); });\n  }\n\n  return RunMC(cmd_type, key, MCArgs{});\n}\n\nauto BaseFamilyTest::GetMC(MP::CmdType cmd_type, std::initializer_list<std::string_view> list)\n    -> MCResponse {\n  CHECK_GT(list.size(), 0u);\n  CHECK(base::_in(cmd_type, {MP::GET, MP::GAT, MP::GETS, MP::GATS}));\n\n  if (!ProactorBase::IsProactorThread()) {\n    return pp_->at(0)->Await([&] { return this->GetMC(cmd_type, list); });\n  }\n\n  TestConnWrapper* conn = AddFindConn(Protocol::MEMCACHE, GetId());\n\n  CommandContext cmd_cntx{conn->builder(), conn->cmd_cntx()};\n  cmd_cntx.ConfigureMCExtension(true);\n  auto& cmd = *cmd_cntx.mc_command();\n  cmd.type = cmd_type;\n  auto src = list.begin();\n  if (cmd.type == MP::GAT || cmd.type == MP::GATS) {\n    CHECK(absl::SimpleAtoi(*src++, &cmd.expire_ts));\n  }\n\n  cmd_cntx.Assign(src, list.end(), list.end() - src);\n  service_->DispatchMC(&cmd_cntx, AsyncPreference::ONLY_SYNC);\n\n  return conn->SplitLines();\n}\n\nint64_t BaseFamilyTest::CheckedInt(ArgSlice list) {\n  RespExpr resp = Run(list);\n  if (resp.type == RespExpr::INT64) {\n    return get<int64_t>(resp.u);\n  }\n  if (resp.type == RespExpr::NIL) {\n    return INT64_MIN;\n  }\n\n  CHECK_EQ(RespExpr::STRING, int(resp.type)) << list;\n  string_view sv = ToSV(resp.GetBuf());\n  int64_t res;\n  CHECK(absl::SimpleAtoi(sv, &res)) << \"|\" << sv << \"|\";\n  return res;\n}\n\nstring BaseFamilyTest::CheckedString(ArgSlice list) {\n  RespExpr resp = Run(list);\n  CHECK_EQ(RespExpr::STRING, int(resp.type)) << list;\n  return string{ToSV(resp.GetBuf())};\n}\n\nCmdArgVec BaseFamilyTest::TestConnWrapper::Args(ArgSlice list) {\n  CHECK_NE(0u, list.size());\n\n  CmdArgVec res;\n  string* str = new string;\n\n  // I compact all the arguments together on purpose.\n  // This way I check that arguments handling works well without c-string endings.\n  for (auto v : list) {\n    str->append(v);\n  }\n  tmp_str_vec_.emplace_back(str);\n  size_t offset = 0;\n  for (auto v : list) {\n    if (v.empty()) {\n      res.push_back(MutableSlice{});\n    } else {\n      res.emplace_back(str->data() + offset, v.size());\n      offset += v.size();\n    }\n  }\n\n  return res;\n}\n\nRespVec BaseFamilyTest::TestConnWrapper::ParseResponse(bool fully_consumed) {\n  tmp_str_vec_.emplace_back(new string{sink_.str()});\n  auto& s = *tmp_str_vec_.back();\n\n  RESPParser parser;\n  auto obj = parser.Feed(s.data(), s.size());\n\n  CHECK(obj.has_value()) << \"Failed to parse response: \\\"\" << s << \"\\\" (\" << s.size() << \" chars)\";\n\n  if (fully_consumed) {\n    size_t buf_pos = parser.BufferPos();\n    // After parsing, if successful, buf_pos can be 0 when the internal buffer is cleared\n    buf_pos = obj && !buf_pos ? s.size() : buf_pos;\n    DCHECK_EQ(buf_pos, s.size()) << s;\n  }\n\n  // Build expressions from the parsed object. We must consume the RESPObj before\n  // freeing it, since BuildExpr copies string data into owned_strings_.\n  auto& parsed = *obj;\n\n  // The old RedisParser unwraps top-level arrays: elements go directly into res.\n  // We match that behavior here for compatibility with existing tests.\n  RespVec res;\n  auto type = parsed.GetType();\n  if (type == RESPObj::Type::ARRAY || type == RESPObj::Type::MAP || type == RESPObj::Type::SET) {\n    auto arr = parsed.As<RESPArray>();\n    if (arr.has_value() && arr->Size() != SIZE_MAX) {\n      for (size_t i = 0; i < arr->Size(); ++i) {\n        res.push_back(expr_builder_.BuildExpr((*arr)[i]));\n      }\n    } else {\n      // Null aggregate (e.g. *-1\\r\\n) — produce a NIL_ARRAY entry.\n      res.push_back(expr_builder_.BuildExpr(parsed));\n    }\n  } else {\n    res.push_back(expr_builder_.BuildExpr(parsed));\n  }\n\n  // parsed (RESPObj) goes out of scope here, freeing zmalloc-allocated hiredis\n  // reply data on this thread. All needed string data has been copied into\n  // expr_builder_.owned_strings_.\n\n  return res;\n}\n\nconst facade::Connection::PubMessage& BaseFamilyTest::TestConnWrapper::GetPubMessage(\n    size_t index) const {\n  CHECK_LT(index, dummy_conn_->messages.size());\n  return dummy_conn_->messages[index];\n}\n\nconst facade::Connection::InvalidationMessage&\nBaseFamilyTest::TestConnWrapper::GetInvalidationMessage(size_t index) const {\n  CHECK_LT(index, dummy_conn_->invalidate_messages.size());\n  return dummy_conn_->invalidate_messages[index];\n}\n\nbool BaseFamilyTest::IsLocked(DbIndex db_index, std::string_view key) const {\n  return service_->IsLocked(&namespaces->GetDefaultNamespace(), db_index, key);\n}\n\nstring BaseFamilyTest::GetId() const {\n  int32 id = ProactorBase::me()->GetPoolIndex();\n  CHECK_GE(id, 0);\n  return absl::StrCat(\"IO\", id);\n}\n\nsize_t BaseFamilyTest::SubscriberMessagesLen(string_view conn_id) const {\n  auto it = connections_.find(conn_id);\n  if (it == connections_.end())\n    return 0;\n\n  return it->second->conn()->messages.size();\n}\n\nsize_t BaseFamilyTest::InvalidationMessagesLen(string_view conn_id) const {\n  auto it = connections_.find(conn_id);\n  if (it == connections_.end())\n    return 0;\n\n  return it->second->conn()->invalidate_messages.size();\n}\n\nconst facade::Connection::PubMessage& BaseFamilyTest::GetPublishedMessage(string_view conn_id,\n                                                                          size_t index) const {\n  auto it = connections_.find(conn_id);\n  CHECK(it != connections_.end());\n\n  return it->second->GetPubMessage(index);\n}\n\nconst facade::Connection::InvalidationMessage& BaseFamilyTest::GetInvalidationMessage(\n    string_view conn_id, size_t index) const {\n  auto it = connections_.find(conn_id);\n  CHECK(it != connections_.end());\n  return it->second->GetInvalidationMessage(index);\n}\n\nConnectionContext::DebugInfo BaseFamilyTest::GetDebugInfo(const std::string& id) const {\n  auto it = connections_.find(id);\n  CHECK(it != connections_.end());\n\n  return it->second->cmd_cntx()->last_command_debug;\n}\n\nauto BaseFamilyTest::AddFindConn(Protocol proto, std::string_view id) -> TestConnWrapper* {\n  DCHECK(ProactorBase::IsProactorThread());\n\n  unique_lock lk(mu_);\n\n  auto [it, inserted] = connections_.emplace(id, nullptr);\n\n  if (inserted) {\n    it->second = make_unique<TestConnWrapper>(service_.get(), proto);\n  } else {\n    it->second->ClearSink();\n  }\n  return it->second.get();\n}\n\nvector<string> BaseFamilyTest::StrArray(const RespExpr& expr) {\n  CHECK(expr.type == RespExpr::ARRAY || expr.type == RespExpr::NIL_ARRAY);\n  if (expr.type == RespExpr::NIL_ARRAY)\n    return vector<string>{};\n\n  const RespVec* src = get<RespVec*>(expr.u);\n  vector<string> res(src->size());\n  for (size_t i = 0; i < src->size(); ++i) {\n    res[i] = ToSV(src->at(i).GetBuf());\n  }\n\n  return res;\n}\n\nvector<LockFp> BaseFamilyTest::GetLastFps() {\n  fb2::Mutex mu;\n  vector<LockFp> result;\n\n  auto add_keys = [&](ProactorBase* proactor) {\n    EngineShard* shard = EngineShard::tlocal();\n    if (shard == nullptr) {\n      return;\n    }\n\n    lock_guard lk(mu);\n    for (auto fp :\n         namespaces->GetDefaultNamespace().GetDbSlice(shard->shard_id()).TEST_GetLastLockedFps()) {\n      result.push_back(fp);\n    }\n  };\n  shard_set->pool()->AwaitFiberOnAll(add_keys);\n\n  return result;\n}\n\nvoid BaseFamilyTest::ExpectConditionWithinTimeout(const std::function<bool()>& condition,\n                                                  absl::Duration timeout) {\n  absl::Time deadline = absl::Now() + timeout;\n\n  while (deadline > absl::Now()) {\n    if (condition()) {\n      break;\n    }\n    ThisFiber::SleepFor(5ms);\n  }\n\n  EXPECT_LE(absl::Now(), deadline)\n      << \"Timeout of \" << timeout << \" reached when expecting condition\";\n}\n\nfb2::Fiber BaseFamilyTest::ExpectConditionWithSuspension(const std::function<bool()>& condition) {\n  TransactionSuspension tx;\n  pp_->at(0)->Await([&] { tx.Start(); });\n\n  auto fb =\n      pp_->at(0)->LaunchFiber(fb2::Launch::dispatch, [condition, tx = std::move(tx)]() mutable {\n        ExpectConditionWithinTimeout(condition);\n        tx.Terminate();\n      });\n  return fb;\n}\n\nutil::fb2::Fiber BaseFamilyTest::ExpectUsedKeys(const std::vector<std::string_view>& keys) {\n  vector<LockFp> key_fps;\n  for (const auto& k : keys) {\n    key_fps.push_back(LockTag(k).Fingerprint());\n  }\n  sort(key_fps.begin(), key_fps.end());\n  auto cb = [=] {\n    auto last_fps = GetLastFps();\n    sort(last_fps.begin(), last_fps.end());\n    return last_fps == key_fps;\n  };\n\n  return ExpectConditionWithSuspension(std::move(cb));\n}\n\nvoid BaseFamilyTest::SetTestFlag(string_view flag_name, string_view new_value) {\n  auto* flag = absl::FindCommandLineFlag(flag_name);\n  CHECK_NE(flag, nullptr);\n  VLOG(1) << \"Changing flag \" << flag_name << \" from \" << flag->CurrentValue() << \" to \"\n          << new_value;\n  string error;\n  CHECK(flag->ParseFrom(new_value, &error)) << \"Error: \" << error;\n}\n\nstd::map<int, int> BaseFamilyTest::GetShardKeyCount() {\n  map<int, int> m;\n\n  auto res = Run({\"debug\", \"shards\"});\n  for (string_view line : absl::StrSplit(res.GetString(), '\\n')) {\n    vector<string> parts = absl::StrSplit(line, \": \");\n    if (parts.size() != 2) {\n      continue;\n    }\n\n    string_view k = parts[0];\n    if (!absl::StartsWith(k, \"shard\") || !absl::EndsWith(k, \"_key_count\")) {\n      continue;\n    }\n\n    CHECK(absl::ConsumePrefix(&k, \"shard\")) << k;\n    CHECK(absl::ConsumeSuffix(&k, \"_key_count\")) << k;\n    int sid;\n    CHECK(absl::SimpleAtoi(k, &sid));\n    int count;\n    CHECK(absl::SimpleAtoi(parts[1], &count));\n    m[sid] = count;\n  }\n  return m;\n}\n\nconst acl::AclFamily* BaseFamilyTest::TestInitAclFam() {\n  absl::SetFlag(&FLAGS_acllog_max_len, 0);\n  return service_->TestInit();\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/test_utils.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <gmock/gmock.h>\n\n#include <chrono>\n\n#include \"facade/dragonfly_connection.h\"\n#include \"facade/memcache_parser.h\"\n#include \"facade/resp_expr_test_utils.h\"\n#include \"facade/resp_parser.h\"\n#include \"io/io.h\"\n#include \"server/conn_context.h\"\n#include \"server/main_service.h\"\n#include \"server/namespaces.h\"\n#include \"server/transaction.h\"\n#include \"util/proactor_pool.h\"\n\nnamespace dfly {\nusing namespace facade;\nusing util::fb2::Fiber;\nusing util::fb2::Launch;\n\n// Test hook defined in common.cc.\nvoid TEST_InvalidateLockTagOptions();\n\nclass TestConnection : public facade::Connection {\n public:\n  explicit TestConnection(facade::ServiceInterface* si, Protocol protocol);\n  std::string RemoteEndpointStr() const override;\n\n  void SendPubMessageAsync(PubMessage pmsg) final;\n\n  void SendInvalidationMessageAsync(InvalidationMessage msg) final;\n\n  bool IsPrivileged() const override {\n    return is_privileged_;\n  }\n  void SetPrivileged(bool is_privileged) {\n    is_privileged_ = is_privileged;\n  }\n\n  std::vector<PubMessage> messages;\n\n  std::vector<InvalidationMessage> invalidate_messages;\n\n private:\n  bool is_privileged_ = false;\n};\n\n// The TransactionSuspension class is designed to facilitate the temporary suspension of commands\n// executions. When the 'start' method is invoked, it enforces the suspension of other\n// transactions by acquiring a global shard lock. Conversely, invoking the 'terminate' method\n// releases the global shard lock, enabling all transactions in the queue to resume execution.\nclass TransactionSuspension {\n public:\n  void Start();\n  void Terminate();\n\n private:\n  boost::intrusive_ptr<dfly::Transaction> transaction_;\n};\n\nclass BaseFamilyTest : public ::testing::Test {\n protected:\n  BaseFamilyTest();\n  ~BaseFamilyTest();\n\n  static void SetUpTestSuite();\n\n  void SetUp() override;\n  void TearDown() override;\n\n  class TestConnWrapper;\n\n  RespExpr Run(std::initializer_list<const std::string_view> list) {\n    return Run(ArgSlice{list.begin(), list.size()});\n  }\n\n  // Runs the command in a mocked privileged connection\n  // Use for running commands which are allowed only when using admin connection.\n  RespExpr RunPrivileged(std::initializer_list<const std::string_view> list);\n\n  RespExpr Run(ArgSlice list);\n  RespExpr Run(absl::Span<const std::string> list);\n\n  RespExpr Run(std::string_view id, ArgSlice list);\n\n  RespExpr Run(std::string_view command);\n\n  void RunMany(const std::vector<std::vector<std::string>>& cmds);\n\n  using MCResponse = std::vector<std::string>;\n\n  struct MCArgs {\n    std::string_view value;\n    uint32_t val_flags;\n    std::chrono::seconds ttl;\n    uint64_t delta;\n\n    explicit MCArgs(std::string_view v = {}, uint32_t f = 0) : value(v), val_flags(f) {\n      ttl = std::chrono::seconds{0};\n      delta = 0;\n    }\n\n    explicit MCArgs(uint64_t d) : MCArgs() {\n      delta = d;\n    }\n  };\n\n  MCResponse RunMC(MemcacheParser::CmdType cmd_type, std::string_view key, MCArgs args);\n  MCResponse RunMC(MemcacheParser::CmdType cmd_type, std::string_view key = std::string_view{});\n  MCResponse GetMC(MemcacheParser::CmdType cmd_type, std::initializer_list<std::string_view> list);\n\n  int64_t CheckedInt(std::initializer_list<std::string_view> list) {\n    return CheckedInt(ArgSlice{list.begin(), list.size()});\n  }\n  int64_t CheckedInt(ArgSlice list);\n  std::string CheckedString(ArgSlice list);\n\n  void ResetService();\n\n  void ShutdownService();\n\n  void InitWithDbFilename();\n  void CleanupSnapshots();\n\n  bool IsLocked(DbIndex db_index, std::string_view key) const;\n  ConnectionContext::DebugInfo GetDebugInfo(const std::string& id) const;\n\n  ConnectionContext::DebugInfo GetDebugInfo() const {\n    return GetDebugInfo(\"IO0\");\n  }\n\n  TestConnWrapper* AddFindConn(Protocol proto, std::string_view id);\n  static std::vector<std::string> StrArray(const RespExpr& expr);\n\n  Metrics GetMetrics() const {\n    return service_->server_family().GetMetrics(&namespaces->GetDefaultNamespace());\n  }\n\n  void ClearMetrics();\n  std::string FormatMetrics(const Metrics& metrics) const;\n\n  void AdvanceTime(int64_t ms) {\n    TEST_current_time_ms += ms;\n  }\n\n  // Wait for a locked key to unlock. Aborts after timeout seconds passed.\n  void WaitUntilLocked(DbIndex db_index, std::string_view key, double timeout = 3);\n\n  // Wait until condition_cb returns true or timeout reached. Returns condition_cb value\n  bool WaitUntilCondition(std::function<bool()> condition_cb,\n                          std::chrono::milliseconds timeout_ms = std::chrono::milliseconds(100));\n\n  std::string GetId() const;\n  size_t SubscriberMessagesLen(std::string_view conn_id) const;\n\n  size_t InvalidationMessagesLen(std::string_view conn_id) const;\n\n  const facade::Connection::PubMessage& GetPublishedMessage(std::string_view conn_id,\n                                                            size_t index) const;\n\n  const facade::Connection::InvalidationMessage& GetInvalidationMessage(std::string_view conn_id,\n                                                                        size_t index) const;\n\n  static std::vector<LockFp> GetLastFps();\n  static void ExpectConditionWithinTimeout(const std::function<bool()>& condition,\n                                           absl::Duration timeout = absl::Seconds(10));\n  util::fb2::Fiber ExpectConditionWithSuspension(const std::function<bool()>& condition);\n  util::fb2::Fiber ExpectUsedKeys(const std::vector<std::string_view>& keys);\n\n  static unsigned NumLocked();\n\n  static void SetTestFlag(std::string_view flag_name, std::string_view new_value);\n\n  const acl::AclFamily* TestInitAclFam();\n\n  std::map<int, int> GetShardKeyCount();\n\n  std::unique_ptr<util::ProactorPool> pp_;\n  std::unique_ptr<Service> service_;\n  unsigned num_threads_ = 3;\n\n  absl::flat_hash_map<std::string, std::unique_ptr<TestConnWrapper>> connections_;\n  util::fb2::Mutex mu_;\n  ConnectionContext::DebugInfo last_cmd_dbg_info_;\n\n  std::vector<RespVec*> resp_vec_;\n  bool single_response_ = true;\n  util::fb2::Fiber watchdog_fiber_;\n  util::fb2::Done watchdog_done_;\n};\n\nstd::ostream& operator<<(std::ostream& os, const DbStats& stats);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/tiered_storage.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/tiered_storage.h\"\n\n#include <mimalloc.h>\n\n#include <cstddef>\n#include <functional>\n#include <memory>\n#include <optional>\n#include <variant>\n\n#include \"absl/cleanup/cleanup.h\"\n#include \"absl/flags/internal/flag.h\"\n#include \"absl/functional/bind_front.h\"\n#include \"absl/functional/overload.h\"\n#include \"base/flag_utils.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"core/detail/listpack_wrap.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/snapshot.h\"\n#include \"server/table.h\"\n#include \"server/tiering/common.h\"\n#include \"server/tiering/op_manager.h\"\n#include \"server/tiering/serialized_map.h\"\n#include \"server/tiering/small_bins.h\"\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n}\n\nusing namespace facade;\n\nusing AtLeast64 = base::ConstrainedNumericFlagValue<size_t, 64>;  // ABSL_FLAG breaks with commas\nABSL_FLAG(AtLeast64, tiered_min_value_size, 64,\n          \"Minimum size of values eligible for offloading. Must be at least 64\");\n\nABSL_FLAG(bool, tiered_experimental_cooling, true,\n          \"If true, uses intermediate cooling layer \"\n          \"when offloading values to storage\");\n\nABSL_FLAG(unsigned, tiered_storage_write_depth, 200,\n          \"Maximum number of concurrent stash requests issued by background offload\");\n\nABSL_FLAG(float, tiered_offload_threshold, 0.5,\n          \"Ratio of free memory (free/max memory) below which offloading starts\");\n\nABSL_FLAG(float, tiered_upload_threshold, 0.1,\n          \"Ratio of free memory (free/max memory) below which uploading stops\");\n\nABSL_FLAG(bool, tiered_experimental_hash_support, false, \"Experimental hash datatype offloading\");\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace util;\nusing tiering::FragmentRef;\nusing tiering::KeyRef;\nusing tiering::TieredCoolRecord;\n\nnamespace {\n\nbool OccupiesWholePages(size_t size) {\n  return size >= TieredStorage::kMinOccupancySize;\n}\n\n// Stashed bins no longer have bin ids, so this sentinel is used to differentiate from regular reads\nconstexpr auto kFragmentedBin = tiering::SmallBins::kInvalidBin - 1;\n\n// Called after setting new value in place of previous segment\nvoid RecordDeleted(const PrimeValue& pv, size_t tiered_len, DbTableStats* stats) {\n  stats->AddTypeMemoryUsage(pv.ObjType(), pv.MallocUsed());\n  stats->tiered_entries--;\n  stats->tiered_used_bytes -= tiered_len;\n}\n\ntiering::DiskSegment FromCoolItem(const PrimeValue::CoolItem& item) {\n  return {item.record->page_index * tiering::kPageSize + item.page_offset, item.serialized_size};\n}\n\nstring SerializeToString(const TieredStorage::StashDescriptor& blobs) {\n  size_t est_size = blobs.EstimatedSerializedSize();\n  string s(est_size, 0);\n  size_t written = blobs.Serialize({reinterpret_cast<uint8_t*>(s.data()), s.size()});\n  s.resize(written);\n  return s;\n}\n\n}  // anonymous namespace\n\nsize_t TieredStorage::StashDescriptor::EstimatedSerializedSize() const {\n  return visit(\n      absl::Overload{[](const array<string_view, 2>& a) { return a[0].size() + a[1].size(); },\n                     [](uint8_t* ptr) {\n                       detail::ListpackWrap lw{ptr};\n                       return tiering::SerializedMap::EstimateSize(lw.UsedBytes(), lw.size());\n                     }},\n      blob);\n};\n\nsize_t TieredStorage::StashDescriptor::Serialize(io::MutableBytes buffer) const {\n  DCHECK_LE(EstimatedSerializedSize(), buffer.size());\n\n  switch (rep) {\n    case CompactObj::ExternalRep::STRING: {\n      auto strs = std::get<std::array<std::string_view, 2>>(blob);\n      memcpy(buffer.data(), strs[0].data(), strs[0].size());\n      if (!strs[1].empty())\n        memcpy(buffer.data() + strs[0].size(), strs[1].data(), strs[1].size());\n      return strs[0].size() + strs[1].size();\n    }\n    case CompactObj::ExternalRep::SERIALIZED_MAP: {\n      detail::ListpackWrap lw{static_cast<uint8_t*>(std::get<uint8_t*>(blob))};\n      return tiering::SerializedMap::Serialize(\n          lw, {reinterpret_cast<char*>(buffer.data()), buffer.length()});\n    }\n  };\n  return 0;\n}\n\nclass TieredStorage::ShardOpManager : public tiering::OpManager {\n  friend class TieredStorage;\n\n public:\n  ShardOpManager(TieredStorage* ts, DbSlice* db_slice, size_t max_size)\n      : tiering::OpManager{max_size}, ts_{ts}, db_slice_{*db_slice} {\n  }\n\n  // Clear Stash pending flag for entry\n  void ClearStashPending(OpManager::KeyRef key) {\n    UnblockBackpressure(key, false);\n    if (auto pv = Find(key.first, key.second); pv) {\n      pv->SetStashPending(false);\n      stats_.total_cancels++;\n    }\n  }\n\n  // Clear stash pending flag for all contained entries of bin\n  void ClearStashPending(tiering::SmallBins::BinId id) {\n    for (const auto& key : ts_->bins_->ReportStashAborted(id))\n      ClearStashPending(key);\n  }\n\n  DbTableStats* GetDbTableStats(DbIndex dbid) {\n    return db_slice_.MutableStats(dbid);\n  }\n\n  void DeleteOffloaded(DbIndex dbid, const tiering::DiskSegment& segment);\n\n private:\n  PrimeValue* Find(DbIndex dbid, string_view key) {\n    // TODO: Get DbContext for transaction for correct dbid and time\n    // Bypass all update and stat mechanisms\n    auto it = db_slice_.GetDBTable(dbid)->prime.Find(key);\n    return IsValid(it) ? &it->second : nullptr;\n  }\n\n  // Load all values from bin by their hashes\n  void Defragment(tiering::DiskSegment segment, string_view value);\n\n  void NotifyStashed(const OwnedEntryId& id,\n                     const io::Result<tiering::DiskSegment>& segment) override {\n    if (!segment) {\n      VLOG(1) << \"Stash failed \" << segment.error().message();\n      visit([this](auto id) { ClearStashPending(id); }, id);\n    } else {\n      visit([this, segment](auto id) { SetExternal(id, *segment); }, id);\n    }\n  }\n\n  bool NotifyFetched(const OwnedEntryId& id, tiering::DiskSegment segment,\n                     tiering::Decoder* decoder) override;\n\n  bool NotifyDelete(tiering::DiskSegment segment) override;\n\n  // If we are low on memory, remove entries from the ColdQueue,\n  // and promote their PrimeValues to be fully external.\n  void RetireColdEntries(size_t additional_memory);\n\n  // Set value to be an in-memory type again. Update memory stats.\n  void Upload(DbIndex dbid, string_view value, PrimeValue* pv) {\n    DCHECK(!value.empty());\n\n    switch (pv->GetExternalRep()) {\n      case CompactObj::ExternalRep::STRING:\n        pv->Materialize(value, true);\n        break;\n      case CompactObj::ExternalRep::SERIALIZED_MAP:\n        tiering::SerializedMapDecoder decoder{};\n        decoder.Initialize(value);\n        decoder.Upload(pv);\n        break;\n    };\n\n    RecordDeleted(*pv, value.size(), GetDbTableStats(dbid));\n  }\n\n  // Find entry by key in db_slice and store external segment in place of original value.\n  // Update memory stats\n  void SetExternal(OpManager::KeyRef key, tiering::DiskSegment segment) {\n    UnblockBackpressure(key, true);\n    if (auto* pv = Find(key.first, key.second); pv) {\n      auto* stats = GetDbTableStats(key.first);\n\n      pv->SetStashPending(false);\n      stats->tiered_entries++;\n      stats->tiered_used_bytes += segment.length;\n      stats_.total_stashes++;\n\n      StashDescriptor blobs{FragmentRef{*pv}.GetSerializationDescr()};\n      if (ts_->config_.experimental_cooling) {\n        RetireColdEntries(pv->MallocUsed());\n        ts_->CoolDown(key.first, key.second, segment, blobs.rep, pv);\n      } else {\n        stats->AddTypeMemoryUsage(pv->ObjType(), -pv->MallocUsed());\n        pv->SetExternal(segment.offset, segment.length, blobs.rep);\n      }\n    } else {\n      LOG(DFATAL) << \"Should not reach here\";\n    }\n  }\n\n  // Find bin by id and call SetExternal for all contained entries\n  void SetExternal(tiering::SmallBins::BinId id, tiering::DiskSegment segment) {\n    for (const auto& [sub_dbid, sub_key, sub_segment] : ts_->bins_->ReportStashed(id, segment))\n      SetExternal({sub_dbid, sub_key}, sub_segment);\n  }\n\n  // If any backpressure (throttling) is active, notify that the operation finished\n  void UnblockBackpressure(OpManager::KeyRef id, bool result) {\n    if (auto node = ts_->stash_backpressure_.extract(id); !node.empty())\n      node.mapped().Resolve(result);\n  }\n\n  struct {\n    uint64_t total_stashes = 0, total_cancels = 0, total_fetches = 0;\n    uint64_t total_defrags = 0;\n    uint64_t total_uploads = 0;\n  } stats_;\n\n  TieredStorage* ts_;\n  DbSlice& db_slice_;\n};\n\nvoid TieredStorage::ShardOpManager::Defragment(tiering::DiskSegment segment, string_view page) {\n  // Note: Bin could've already been deleted, in that case DeleteBin returns an empty list\n  for (auto [dbid, hash, item_segment] : ts_->bins_->DeleteBin(segment, page)) {\n    // Search for key with the same hash and value pointing to the same segment.\n    // If it still exists, it must correspond to the value stored in this bin\n    auto predicate = [item_segment = item_segment](const PrimeKey& key, const PrimeValue& probe) {\n      return probe.IsExternal() && tiering::DiskSegment{probe.GetExternalSlice()} == item_segment;\n    };\n    auto it = db_slice_.GetDBTable(dbid)->prime.FindFirst(hash, predicate);\n    if (!IsValid(it))\n      continue;\n\n    // TODO: Handle upload and cooling via type dependent decoders\n\n    stats_.total_defrags++;\n    PrimeValue& pv = it->second;\n    if (pv.IsCool()) {\n      PrimeValue::CoolItem item = pv.GetCool();\n      tiering::DiskSegment segment = FromCoolItem(item);\n\n      // We remove it from both cool storage and the offline storage.\n      pv = ts_->DeleteCool(item.record);\n      auto* stats = GetDbTableStats(dbid);\n      stats->tiered_entries--;\n      stats->tiered_used_bytes -= segment.length;\n    } else {\n      // Cut out relevant part of value and restore it to memory\n      string_view value = page.substr(item_segment.offset - segment.offset, item_segment.length);\n      Upload(dbid, value, &pv);\n    }\n  }\n}\n\nbool TieredStorage::ShardOpManager::NotifyFetched(const OwnedEntryId& id,\n                                                  tiering::DiskSegment segment,\n                                                  tiering::Decoder* decoder) {\n  ++stats_.total_fetches;\n\n  if (id == OwnedEntryId{kFragmentedBin}) {  // Generally we read whole bins only for defrag\n    auto* bdecoder = static_cast<tiering::BareDecoder*>(decoder);\n    Defragment(segment, bdecoder->slice);\n    return true;  // delete\n  }\n\n  tiering::Decoder::UploadMetrics metrics = decoder->GetMetrics();\n\n  // 1. When modified is true we MUST upload the value back to memory.\n  // 2. On the other hand, if read is caused by snapshotting we do not want to fetch it.\n  //    Currently, our heuristic is not very smart, because we stop uploading any reads during\n  //    the snapshotting.\n  // TODO: to revisit this when we rewrite it with more efficient snapshotting algorithm.\n  bool should_upload = metrics.modified;\n  should_upload |= (ts_->UploadBudget() > int64_t(metrics.estimated_mem_usage)) &&\n                   !SliceSnapshot::IsSnaphotInProgress();\n\n  if (!should_upload)\n    return false;\n\n  const auto& key = get<tiering::DbKeyId>(id);\n  auto* pv = Find(key.first, key.second);\n  if (pv && pv->IsExternal() && segment == pv->GetExternalSlice()) {\n    if (metrics.modified || pv->WasTouched()) {\n      ++stats_.total_uploads;\n      decoder->Upload(pv);\n      RecordDeleted(*pv, segment.length, GetDbTableStats(key.first));\n      return true;\n    }\n    pv->SetTouched(true);\n    return false;\n  }\n\n  LOG(DFATAL) << \"Internal error, should not reach this\";\n  return false;\n}\n\nbool TieredStorage::ShardOpManager::NotifyDelete(tiering::DiskSegment segment) {\n  DVLOG(2) << \"NotifyDelete [\" << segment.offset << \",\" << segment.length << \"]\";\n\n  if (OccupiesWholePages(segment.length))\n    return true;\n\n  auto bin = ts_->bins_->Delete(segment);\n  if (bin.empty) {\n    return true;\n  }\n\n  if (bin.fragmented) {\n    // Trigger read to signal need for defragmentation. NotifyFetched will handle it.\n    DVLOG(2) << \"Enqueueing bin defragmentation for: \" << bin.segment.offset;\n    Enqueue(kFragmentedBin, bin.segment, tiering::BareDecoder{}, [](auto res) {});\n  }\n\n  return false;\n}\n\nvoid TieredStorage::ShardOpManager::RetireColdEntries(size_t additional_memory) {\n  int64_t budget = ts_->UploadBudget() - additional_memory;\n  if (budget > 0)\n    return;\n\n  size_t gained = ts_->ReclaimMemory(-budget);\n  VLOG(1) << \"Upload budget: \" << budget << \", gained \" << gained;\n\n  // Update memory_budget directly since we know that gained bytes were released.\n  // We will overwrite the budget correctly in the next Hearbeat.\n  db_slice_.UpdateMemoryParams(gained + db_slice_.memory_budget(), db_slice_.bytes_per_object());\n}\n\nvoid TieredStorage::ShardOpManager::DeleteOffloaded(DbIndex dbid,\n                                                    const tiering::DiskSegment& segment) {\n  auto* stats = GetDbTableStats(dbid);\n  OpManager::DeleteOffloaded(segment);\n  stats->tiered_used_bytes -= segment.length;\n  stats->tiered_entries--;\n}\n\nTieredStorage::TieredStorage(size_t max_size, DbSlice* db_slice)\n    : op_manager_{make_unique<ShardOpManager>(this, db_slice, max_size)},\n      bins_{make_unique<tiering::SmallBins>()} {\n  UpdateFromFlags();\n}\n\nTieredStorage::~TieredStorage() {\n}\n\nerror_code TieredStorage::Open(string_view base_path) {\n  // dts - dragonfly tiered storage.\n  string path = absl::StrCat(\n      base_path, \"-\", absl::Dec(ProactorBase::me()->GetPoolIndex(), absl::kZeroPad4), \".dts\");\n  return op_manager_->Open(path);\n}\n\nvoid TieredStorage::Close() {\n  for (auto& [_, f] : stash_backpressure_)\n    f.Resolve(false);\n  op_manager_->Close();\n}\n\nvoid TieredStorage::ReadInternal(DbIndex dbid, std::string_view key,\n                                 const tiering::DiskSegment& segment,\n                                 const tiering::Decoder& decoder,\n                                 std::function<void(io::Result<tiering::Decoder*>)> cb) {\n  // TODO: improve performance by avoiding one more function wrap\n  op_manager_->Enqueue(KeyRef(dbid, key), segment, decoder, std::move(cb));\n}\n\nvoid TieredStorage::Stash(DbIndex dbid, string_view key, const StashDescriptor& blobs,\n                          BackPressureFuture* backpressure) {\n  CHECK(!bins_->IsPending(dbid, key));  // Because has stash pending is false (ShouldStash checks)\n\n  size_t est_size = blobs.EstimatedSerializedSize();\n  DCHECK_GT(est_size, 0u);\n\n  tiering::OpManager::PendingId id;\n  error_code ec;\n\n  if (OccupiesWholePages(est_size)) {  // large enough for own page\n    id = KeyRef(dbid, key);\n    auto serialize = absl::bind_front(&StashDescriptor::Serialize, &blobs);\n    ec = op_manager_->PrepareAndStash(id, est_size, serialize);\n  } else if (auto bin = bins_->Stash(dbid, key, SerializeToString(blobs)); bin) {\n    id = bin->id;\n    auto serialize = absl::bind_front(&tiering::SmallBins::SerializeBin, bins_.get(), &*bin);\n    ec = op_manager_->PrepareAndStash(id, 4_KB, serialize);\n  } else {\n    return;  // added to bin, no operations pending\n  }\n\n  // Set stash pending to false on single value or whole bin\n  if (ec) {\n    // file_too_large if we reached the limits of the storage,\n    // operation_would_block if we need to wait for a file to grow.\n    bool to_log = ec != errc::file_too_large && ec != errc::operation_would_block &&\n                  ec != errc::operation_in_progress;\n    LOG_IF(ERROR, to_log) << \"Stash failed: \" << ec.message();\n    visit([this](auto id) { op_manager_->ClearStashPending(id); }, id);\n    return;\n  }\n\n  // If we are in the active offloading phase, throttle stashes by providing backpressure future\n  if (backpressure && ShouldOffload()) {\n    stats_.total_clients_throttled++;\n    *backpressure = stash_backpressure_[{dbid, string{key}}];\n  }\n}\n\nvoid TieredStorage::Delete(DbIndex dbid, FragmentRef fragment_ref) {\n  DCHECK(!fragment_ref.HasStashPending());\n  ++stats_.total_deletes;\n\n  tiering::DiskSegment segment = fragment_ref.GetExternalSlice();\n  if (auto* cool = fragment_ref.GetCoolRecord(); cool) {\n    auto hot = DeleteCool(cool);\n    DCHECK_EQ(hot.ObjType(), OBJ_STRING);\n  }\n  fragment_ref.ClearOffloaded();\n  op_manager_->DeleteOffloaded(dbid, segment);\n}\n\nvoid TieredStorage::CancelStash(DbIndex dbid, std::string_view key,\n                                tiering::FragmentRef fragment_ref) {\n  DCHECK(fragment_ref.HasStashPending());\n\n  // If any previous write was happening, it has been cancelled\n  if (auto node = stash_backpressure_.extract(make_pair(dbid, key)); !node.empty())\n    std::move(node.mapped()).Resolve(false);\n\n  // TODO: Don't recompute size estimate, try-delete bin first\n  StashDescriptor blobs{fragment_ref.GetSerializationDescr()};\n  size_t size = blobs.EstimatedSerializedSize();\n  if (OccupiesWholePages(size)) {\n    op_manager_->CancelPending(KeyRef(dbid, key));\n  } else if (auto bin = bins_->Delete(dbid, key); bin) {\n    op_manager_->CancelPending(*bin);\n  }\n  fragment_ref.ClearStashPending();\n}\n\nTieredStats TieredStorage::GetStats() const {\n  TieredStats stats{};\n\n  {  // ShardOpManager stats\n    auto shard_stats = op_manager_->stats_;\n    stats.total_fetches = shard_stats.total_fetches;\n    stats.total_stashes = shard_stats.total_stashes;\n    stats.total_cancels = shard_stats.total_cancels;\n    stats.total_defrags = shard_stats.total_defrags;\n    stats.total_uploads = shard_stats.total_uploads;\n  }\n\n  {  // OpManager stats\n    tiering::OpManager::Stats op_stats = op_manager_->GetStats();\n    stats.pending_read_cnt = op_stats.pending_read_cnt;\n    stats.pending_stash_cnt = op_stats.pending_stash_cnt;\n    stats.allocated_bytes = op_stats.disk_stats.allocated_bytes;\n    stats.capacity_bytes = op_stats.disk_stats.capacity_bytes;\n    stats.total_heap_buf_allocs = op_stats.disk_stats.heap_buf_alloc_count;\n    stats.total_registered_buf_allocs = op_stats.disk_stats.registered_buf_alloc_count;\n  }\n\n  {  // SmallBins stats\n    tiering::SmallBins::Stats bins_stats = bins_->GetStats();\n    stats.small_bins_cnt = bins_stats.stashed_bins_cnt;\n    stats.small_bins_entries_cnt = bins_stats.stashed_entries_cnt;\n    stats.small_bins_filling_bytes = bins_stats.current_bin_bytes;\n    stats.small_bins_filling_entries_cnt = bins_stats.current_entries_cnt;\n  }\n\n  {  // Own stats\n    stats.total_stash_overflows = stats_.stash_overflow_cnt;\n    stats.cold_storage_bytes = stats_.cool_memory_used;\n    stats.total_offloading_steps = stats_.offloading_steps;\n    stats.total_offloading_stashes = stats_.offloading_stashes;\n    stats.clients_throttled = stash_backpressure_.size();\n    stats.total_clients_throttled = stats_.total_clients_throttled;\n  }\n  return stats;\n}\n\nfloat TieredStorage::WriteDepthUsage() const {\n  return 1.0f * op_manager_->GetStats().pending_stash_cnt / config_.write_depth_limit;\n}\n\nvoid TieredStorage::UpdateFromFlags() {\n  config_ = {\n      .min_value_size = absl::GetFlag(FLAGS_tiered_min_value_size),\n      .experimental_cooling = absl::GetFlag(FLAGS_tiered_experimental_cooling),\n      .write_depth_limit = absl::GetFlag(FLAGS_tiered_storage_write_depth),\n      .offload_threshold = absl::GetFlag(FLAGS_tiered_offload_threshold),\n      .upload_threshold = absl::GetFlag(FLAGS_tiered_upload_threshold),\n      .experimental_hash_offload = absl::GetFlag(FLAGS_tiered_experimental_hash_support),\n  };\n}\n\nstd::vector<std::string> TieredStorage::GetMutableFlagNames() {\n  return base::GetFlagNames(FLAGS_tiered_min_value_size, FLAGS_tiered_experimental_cooling,\n                            FLAGS_tiered_storage_write_depth, FLAGS_tiered_offload_threshold,\n                            FLAGS_tiered_upload_threshold, FLAGS_tiered_experimental_hash_support);\n}\n\nbool TieredStorage::ShouldOffload() const {\n  size_t free_memory = op_manager_->db_slice_.memory_budget();\n  size_t per_shard = max_memory_limit.load(memory_order_relaxed) / shard_set->size();\n  // Cool values are already offloadeded, so don't count them as used memory\n  return (free_memory + CoolMemoryUsage()) < config_.offload_threshold * per_shard;\n}\n\nint64_t TieredStorage::UploadBudget() const {\n  size_t free_memory = op_manager_->db_slice_.memory_budget();\n  size_t per_shard = max_memory_limit.load(memory_order_relaxed) / shard_set->size();\n  return int64_t(free_memory) - int64_t(config_.upload_threshold * per_shard);\n}\n\nvoid TieredStorage::RunOffloading(DbIndex dbid) {\n  using namespace tiering::literals;\n  if (SliceSnapshot::IsSnaphotInProgress())\n    return;\n\n  const auto start_cycles = base::CycleClock::Now();\n\n  // Don't run offloading if there's only very little space left\n  auto disk_stats = op_manager_->GetStats().disk_stats;\n  if (disk_stats.allocated_bytes + 1_MB > disk_stats.max_file_size)\n    return;\n\n  string tmp;\n  auto cb = [this, dbid, &tmp](PrimeIterator it) mutable {\n    stats_.offloading_steps++;\n    auto blobs = ShouldStash(it->second);\n    if (blobs) {\n      if (it->second.WasTouched()) {\n        it->second.SetTouched(false);\n      } else {\n        stats_.offloading_stashes++;\n        it->second.SetStashPending(true);\n        Stash(dbid, it->first.GetSlice(&tmp), *blobs, nullptr);\n      }\n    }\n  };\n\n  PrimeTable& table = op_manager_->db_slice_.GetDBTable(dbid)->prime;\n\n  // Loop over entry with time and max stash budget.\n  uint64_t cycles = 0;\n  do {\n    offloading_cursor_ = table.TraverseBySegmentOrder(offloading_cursor_, cb);\n\n    if (op_manager_->GetStats().pending_stash_cnt >= config_.write_depth_limit)\n      break;\n\n    // TODO: yield as background fiber to perform more work on idle\n    cycles = base::CycleClock::Now() - start_cycles;\n    if (base::CycleClock::ToUsec(cycles) >= 100)\n      break;\n  } while (offloading_cursor_);\n}\n\nsize_t TieredStorage::ReclaimMemory(size_t goal) {\n  size_t gained = 0;\n  do {\n    size_t memory_before = stats_.cool_memory_used;\n    TieredCoolRecord* record = PopCool();\n    if (record == nullptr)  // nothing to pull anymore\n      break;\n\n    gained += memory_before - stats_.cool_memory_used;\n\n    // Find the entry that points to the cool item and externalize it.\n    auto predicate = [record](const PrimeKey& key, const PrimeValue& probe) {\n      return probe.IsExternal() && probe.IsCool() && probe.GetCool().record == record;\n    };\n\n    PrimeIterator it = op_manager_->db_slice_.GetDBTable(record->db_index)\n                           ->prime.FindFirst(record->key_hash, predicate);\n    CHECK(IsValid(it));\n    PrimeValue& pv = it->second;\n\n    // Now the item is only in storage.\n    tiering::DiskSegment segment = FromCoolItem(pv.GetCool());\n    pv.Freeze(segment.offset, segment.length);\n\n    auto* stats = op_manager_->GetDbTableStats(record->db_index);\n    stats->AddTypeMemoryUsage(record->value.ObjType(), -record->value.MallocUsed());\n    CompactObj::DeleteMR<TieredCoolRecord>(record);\n  } while (gained < goal);\n\n  return gained;\n}\n\nauto TieredStorage::ShouldStash(const tiering::FragmentRef& fragment_ref) const\n    -> std::optional<StashDescriptor> {\n  // Check value state\n  if (fragment_ref.IsOffloaded() || fragment_ref.HasStashPending())\n    return nullopt;\n\n  // For now, hash offloading is conditional\n  if (fragment_ref.ObjType() == OBJ_HASH && !config_.experimental_hash_offload)\n    return nullopt;\n\n  // Estimate value size\n  StashDescriptor blobs{fragment_ref.GetSerializationDescr()};\n  size_t estimated_size = blobs.EstimatedSerializedSize();\n  if (estimated_size < config_.min_value_size)\n    return nullopt;\n\n  // Limit write depth. TODO: Provide backpressure?\n  if (op_manager_->GetStats().pending_stash_cnt >= config_.write_depth_limit) {\n    ++stats_.stash_overflow_cnt;\n    return {};\n  }\n\n  const auto& disk_stats = op_manager_->GetStats().disk_stats;\n  if (disk_stats.allocated_bytes + tiering::kPageSize + estimated_size < disk_stats.max_file_size) {\n    return blobs;\n  }\n  return nullopt;\n}\n\nvoid TieredStorage::CoolDown(DbIndex db_ind, std::string_view str,\n                             const tiering::DiskSegment& segment, CompactObj::ExternalRep rep,\n                             PrimeValue* pv) {\n  TieredCoolRecord* record = CompactObj::AllocateMR<TieredCoolRecord>();\n  cool_queue_.push_front(*record);\n  stats_.cool_memory_used += (sizeof(TieredCoolRecord) + pv->MallocUsed());\n\n  record->key_hash = CompactObj::HashCode(str);\n  record->db_index = db_ind;\n  record->page_index = segment.offset / tiering::kPageSize;\n  record->value = std::move(*pv);\n\n  pv->SetCool(segment.offset, segment.length, rep, record);\n}\n\nPrimeValue TieredStorage::Warmup(DbIndex dbid, PrimeValue::CoolItem item) {\n  tiering::DiskSegment segment = FromCoolItem(item);\n\n  // We remove it from both cool storage and the offline storage.\n  PrimeValue hot = DeleteCool(item.record);\n  op_manager_->DeleteOffloaded(dbid, segment);\n  return hot;\n}\n\nPrimeValue TieredStorage::DeleteCool(TieredCoolRecord* record) {\n  auto it = CoolQueue::s_iterator_to(*record);\n  cool_queue_.erase(it);\n\n  PrimeValue hot{std::move(record->value)};\n  stats_.cool_memory_used -= (sizeof(TieredCoolRecord) + hot.MallocUsed());\n  CompactObj::DeleteMR<TieredCoolRecord>(record);\n  return hot;\n}\n\nTieredCoolRecord* TieredStorage::PopCool() {\n  if (cool_queue_.empty())\n    return nullptr;\n\n  TieredCoolRecord& res = cool_queue_.back();\n  cool_queue_.pop_back();\n  stats_.cool_memory_used -= (sizeof(TieredCoolRecord) + res.value.MallocUsed());\n  return &res;\n}\n\nvoid StashPrimeValue(DbIndex dbid, std::string_view key, PrimeValue* pv, TieredStorage* ts,\n                     BackPressureFuture* backpressure) {\n  if (auto blobs = ts->ShouldStash(*pv); blobs) {\n    pv->SetStashPending(true);\n    ts->Stash(dbid, key, *blobs, backpressure);\n  }\n}\n\nvoid ReadTiered(DbIndex dbid, std::string_view key, const PrimeValue& value,\n                function<void(io::Result<string_view>)> readf, TieredStorage* ts) {\n  auto cb = [readf = std::move(readf)](io::Result<tiering::StringDecoder*> res) mutable {\n    readf(res.transform([](tiering::StringDecoder* d) { return d->GetView(); }));\n  };\n  ts->Read(dbid, key, value.GetExternalSlice(), tiering::StringDecoder{value}, std::move(cb));\n}\n\ntemplate <typename T>\nTieredStorage::TResult<T> ModifyTiered(DbIndex dbid, std::string_view key, const PrimeValue& value,\n                                       std::function<T(std::string*)> modf, TieredStorage* ts) {\n  DCHECK(value.IsExternal());\n  DCHECK_EQ(value.ObjType(), OBJ_STRING);\n\n  util::fb2::Future<io::Result<T>> future;\n\n  auto cb = [future, modf = std::move(modf)](io::Result<tiering::StringDecoder*> res) mutable {\n    future.Resolve(res.transform([&modf](auto* d) { return modf(d->Write()); }));\n  };\n  ts->Read(dbid, key, value.GetExternalSlice(), tiering::StringDecoder{value}, std::move(cb));\n\n  return future;\n}\n\n// Instantiate for size_t only - used in string_family's OpExtend.\ntemplate TieredStorage::TResult<size_t> ModifyTiered(DbIndex dbid, std::string_view key,\n                                                     const PrimeValue& value,\n                                                     std::function<size_t(std::string*)> modf,\n                                                     TieredStorage* ts);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/tiered_storage.h",
    "content": "// Copyright 2023, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <boost/intrusive/list.hpp>\n#include <memory>\n#include <utility>\n#include <vector>\n\n#include \"core/tiering_types.h\"\n#include \"io/io.h\"  // for io::Result (TODO: replace with nonstd/expected)\n#include \"server/stats.h\"\n#include \"server/table.h\"\n#include \"server/tiering/common.h\"\n#include \"server/tiering/entry_map.h\"\n#include \"util/fibers/future.h\"\n\nnamespace dfly {\n\nclass DbSlice;\n\nnamespace tiering {\nclass SmallBins;\nstruct Decoder;\n};  // namespace tiering\n\nstruct TieredStorageBase {\n  // Min sizes of values taking up full page on their own\n  const static size_t kMinOccupancySize = tiering::kPageSize / 2;\n  struct StashDescriptor : public tiering::FragmentRef::SerializationDescr {\n    StashDescriptor() = default;\n\n    StashDescriptor(const tiering::FragmentRef::SerializationDescr& params)  // NOLINT\n        : tiering::FragmentRef::SerializationDescr(params) {\n    }\n\n    size_t EstimatedSerializedSize() const;\n    size_t Serialize(io::MutableBytes buffer) const;\n  };\n\n  template <typename T> using TResult = util::fb2::Future<io::Result<T>>;\n};\n\nstruct TieredDelayedEntry {\n  DbIndex dbid;\n  PrimeKey key;\n  util::fb2::Future<io::Result<std::string>> value;\n  time_t expire;\n  uint32_t mc_flags;\n};\n\nusing BackPressureFuture = std::optional<util::fb2::Future<bool>>;\n\n#ifdef WITH_TIERING\n\n// Manages offloaded values\nclass TieredStorage : public TieredStorageBase {\n  class ShardOpManager;\n\n public:\n  explicit TieredStorage(size_t max_file_size, DbSlice* db_slice);\n  ~TieredStorage();  // drop forward declared unique_ptrs\n\n  TieredStorage(TieredStorage&& other) = delete;\n  TieredStorage(const TieredStorage& other) = delete;\n\n  std::error_code Open(std::string_view path);\n  void Close();\n\n  // Enqueue read external value with generic decoder.\n  template <typename D, typename F>\n  void Read(DbIndex dbid, std::string_view key, const tiering::DiskSegment& segment,\n            const D& decoder, F&& f) {\n    // TODO(vlad): untangle endless callback wrapping!\n    // Templates don't consider implicit conversions, so explicitly convert to std::function\n    auto wrapped_cb = [f = std::forward<F>(f)](io::Result<tiering::Decoder*> res) mutable {\n      f(res.transform([](auto* d) { return static_cast<D*>(d); }));\n    };\n    ReadInternal(dbid, key, segment, decoder, wrapped_cb);\n  }\n\n  // Returns StashDescriptor if a value should be stashed.\n  std::optional<StashDescriptor> ShouldStash(const tiering::FragmentRef& fragment_ref) const;\n\n  // Stash value, returns optional future for backpressure is not null.\n  // if `provide_bp` is set and conditions are met.\n  void Stash(DbIndex dbid, std::string_view key, const StashDescriptor& blobs,\n             BackPressureFuture* backpressure);\n\n  // Delete value, must be offloaded (external type)\n  void Delete(DbIndex dbid, tiering::FragmentRef fragment_ref);\n\n  // Cancel pending stash for the fragment, must have HasStashPending() true.\n  void CancelStash(DbIndex dbid, std::string_view key, tiering::FragmentRef fragment_ref);\n\n  // Run offloading loop until i/o device is loaded or all entries were traversed\n  void RunOffloading(DbIndex dbid);\n\n  // Prune cool entries to reach the set memory goal with freed memory\n  size_t ReclaimMemory(size_t goal);\n\n  // Returns the primary value, and deletes the cool item as well as its offloaded storage.\n  PrimeValue Warmup(DbIndex dbid, PrimeValue::CoolItem item);\n\n  TieredStats GetStats() const;\n\n  void UpdateFromFlags();  // Update internal values based on current flag values\n  static std::vector<std::string> GetMutableFlagNames();  // Triggers UpdateFromFlags\n\n  bool ShouldOffload() const;     // True if below tiered_offload_threshold\n  float WriteDepthUsage() const;  // Ratio (0-1) of used storage_write_depth for stashes\n\n  // How much we are above tiered_upload_threshold. Can be negative!\n  int64_t UploadBudget() const;\n  size_t CoolMemoryUsage() const {\n    return stats_.cool_memory_used;\n  }\n\n private:\n  void ReadInternal(DbIndex dbid, std::string_view key, const tiering::DiskSegment& segment,\n                    const tiering::Decoder& decoder,\n                    std::function<void(io::Result<tiering::Decoder*>)> cb);\n\n  // Moves pv contents to the cool storage and updates pv to point to it.\n  void CoolDown(DbIndex db_ind, std::string_view str, const tiering::DiskSegment& segment,\n                CompactObj::ExternalRep rep, PrimeValue* pv);\n\n  PrimeValue DeleteCool(tiering::TieredCoolRecord* record);\n  tiering::TieredCoolRecord* PopCool();\n\n  PrimeTable::Cursor offloading_cursor_;  // where RunOffloading left off\n\n  // Stash operations waiting for completion to throttle\n  tiering::EntryMap<::util::fb2::Future<bool>> stash_backpressure_;\n\n  std::unique_ptr<ShardOpManager> op_manager_;\n  std::unique_ptr<tiering::SmallBins> bins_;\n\n  using CoolQueue = ::boost::intrusive::list<tiering::TieredCoolRecord>;\n  CoolQueue cool_queue_;\n\n  struct {\n    size_t min_value_size;\n    bool experimental_cooling;\n    unsigned write_depth_limit;\n    float offload_threshold;\n    float upload_threshold;\n    bool experimental_hash_offload;\n  } config_;\n\n  mutable struct {\n    uint64_t stash_overflow_cnt = 0;\n    uint64_t total_deletes = 0;\n    uint64_t offloading_steps = 0;\n    uint64_t offloading_stashes = 0;\n    uint64_t total_clients_throttled = 0;\n    size_t cool_memory_used = 0;\n  } stats_;\n};\n\n// Read offloaded value. It must be of external string type\nvoid ReadTiered(DbIndex dbid, std::string_view key, const PrimeValue& value,\n                std::function<void(io::Result<std::string_view>)> readf, TieredStorage* ts);\n\n// Read offloaded value and apply transformation cb on the read result. Returns future of the\n// transformed result.\ntemplate <typename T>\nTieredStorage::TResult<T> ReadTiered(DbIndex dbid, std::string_view key, const PrimeValue& value,\n                                     std::function<T(std::string_view)> cb, TieredStorage* ts) {\n  TieredStorage::TResult<T> fut;\n  auto read_cb = [fut, cb = std::move(cb)](io::Result<std::string_view> res) mutable {\n    fut.Resolve(res.transform([&](std::string_view sv) { return cb(sv); }));\n  };\n  ReadTiered(dbid, key, value, std::move(read_cb), ts);\n  return fut;\n}\n\ninline TieredStorage::TResult<std::string> ReadTieredString(DbIndex dbid, std::string_view key,\n                                                            const PrimeValue& value,\n                                                            TieredStorage* ts) {\n  return ReadTiered<std::string>(\n      dbid, key, value, [](std::string_view val) { return std::string(val); }, ts);\n}\n\n// Reads offloaded value, and applies modifications on it and return generic result from callback.\n// Unlike with immutable Reads - the modified value will be uploaded back to memory.\n// This is handled by OpManager when modf completes.\ntemplate <typename T>\nTieredStorage::TResult<T> ModifyTiered(DbIndex dbid, std::string_view key, const PrimeValue& value,\n                                       std::function<T(std::string*)> modf, TieredStorage* ts);\n\n// Stash value if it meets criteria. If the value was stashed and `backpressure` is not nullptr,\n// assign/set the backpressure future to `*backpressure`.\nvoid StashPrimeValue(DbIndex dbid, std::string_view key, PrimeValue* pv, TieredStorage* ts,\n                     BackPressureFuture* backpressure);\n#else\n\nclass TieredStorage : public TieredStorageBase {\n  class ShardOpManager;\n\n public:\n  explicit TieredStorage(size_t max_size, DbSlice* db_slice) {\n  }\n\n  TieredStorage(TieredStorage&& other) = delete;\n  TieredStorage(const TieredStorage& other) = delete;\n\n  std::error_code Open(std::string_view path) {\n    return {};\n  }\n\n  void Close() {\n  }\n\n  // Read offloaded value. It must be of external type\n  void Read(DbIndex dbid, std::string_view key, const PrimeValue& value,\n            std::function<void(io::Result<std::string_view>)> readf) {\n  }\n\n  template <typename D, typename F>\n  void Read(DbIndex dbid, std::string_view key, const tiering::DiskSegment& value, const D& decoder,\n            F&& f) {\n  }\n\n  template <typename T>\n  TResult<T> Modify(DbIndex dbid, std::string_view key, const PrimeValue& value,\n                    std::function<T(std::string*)> modf) {\n    return {};\n  }\n\n  std::optional<StashDescriptor> ShouldStash(const tiering::FragmentRef& fragment) const {\n    return {};\n  }\n\n  void Stash(DbIndex dbid, std::string_view key, const StashDescriptor& blobs,\n             BackPressureFuture* backpressure) {\n  }\n\n  void Delete(DbIndex dbid, PrimeValue* value) {\n  }\n\n  size_t ReclaimMemory(size_t goal) {\n    return 0;\n  }\n\n  float WriteDepthUsage() const {\n    return 0;\n  }\n\n  size_t CoolMemoryUsage() const {\n    return 0;\n  }\n\n  void CancelStash(DbIndex dbid, std::string_view key, tiering::FragmentRef fragment_ref) {\n  }\n\n  TieredStats GetStats() const {\n    return {};\n  }\n\n  void RunOffloading(DbIndex dbid) {\n  }\n\n  void UpdateFromFlags() {\n  }\n\n  static std::vector<std::string> GetMutableFlagNames() {\n    return {};\n  }\n\n  bool ShouldOffload() const {\n    return false;\n  }\n\n  int64_t UploadBudget() const {\n    return 0;\n  }\n\n  PrimeValue Warmup(DbIndex dbid, PrimeValue::CoolItem item) {\n    return PrimeValue{};\n  }\n};\n\ntemplate <typename T>\nTieredStorage::TResult<T> ReadTiered(DbIndex dbid, std::string_view key, const PrimeValue& value,\n                                     std::function<T(std::string_view)> cb, TieredStorage* ts) {\n  return {};\n}\n\ninline void ReadTiered(DbIndex dbid, std::string_view key, const PrimeValue& value,\n                       std::function<void(io::Result<std::string_view>)> readf, TieredStorage* ts) {\n}\n\ninline TieredStorage::TResult<std::string> ReadTieredString(DbIndex dbid, std::string_view key,\n                                                            const PrimeValue& value,\n                                                            TieredStorage* ts) {\n  return {};\n}\n\ntemplate <typename T>\nTieredStorage::TResult<T> ModifyTiered(DbIndex dbid, std::string_view key, const PrimeValue& value,\n                                       std::function<T(std::string*)> modf, TieredStorage* ts) {\n  return {};\n}\n\ninline void StashPrimeValue(DbIndex dbid, std::string_view key, PrimeValue* pv, TieredStorage* ts,\n                            BackPressureFuture* backpressure) {\n}\n\n#endif  // WITH_TIERING\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/tiered_storage_test.cc",
    "content": "// Copyright 2022, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/tiered_storage.h\"\n\n#include <absl/strings/str_cat.h>\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n\n#include \"absl/flags/internal/flag.h\"\n#include \"absl/flags/reflection.h\"\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/test_utils.h\"\n#include \"util/fibers/fibers.h\"\n\nusing namespace std;\nusing namespace testing;\nusing namespace util;\n\nABSL_DECLARE_FLAG(bool, force_epoll);\nABSL_DECLARE_FLAG(string, tiered_prefix);\nABSL_DECLARE_FLAG(float, tiered_offload_threshold);\nABSL_DECLARE_FLAG(float, tiered_upload_threshold);\nABSL_DECLARE_FLAG(unsigned, tiered_storage_write_depth);\nABSL_DECLARE_FLAG(bool, tiered_experimental_cooling);\nABSL_DECLARE_FLAG(uint64_t, registered_buffer_size);\nABSL_DECLARE_FLAG(bool, tiered_experimental_hash_support);\n\nnamespace dfly {\n\nusing absl::GetFlag;\nusing absl::SetFlag;\n\nstring BuildString(size_t len, char c = 'A') {\n  return string(len, c);\n}\n\nclass TieredStorageTest : public BaseFamilyTest {\n protected:\n  TieredStorageTest() {\n    num_threads_ = 1;\n  }\n\n  void SetUp() override {\n    if (GetFlag(FLAGS_force_epoll)) {\n      LOG(WARNING) << \"Can't run tiered tests on EPOLL\";\n      exit(0);\n    }\n\n    // Disable registered buffers in half of the runs to use only small heap allocated buffers\n    // to possibly catch out of bounds reads/writes with sanitizers\n    if (absl::InsecureBitGen{}() % 2) {\n      SetFlag(&FLAGS_registered_buffer_size, 0);\n    }\n\n    SetFlag(&FLAGS_tiered_storage_write_depth, 15000);\n    if (GetFlag(FLAGS_tiered_prefix).empty()) {\n      SetFlag(&FLAGS_tiered_prefix, \"/tmp/tiered_storage_test\");\n    }\n\n    BaseFamilyTest::SetUp();\n  }\n\n  void UpdateFromFlags() {\n    pp_->at(0)->AwaitBrief([] { EngineShard::tlocal()->tiered_storage()->UpdateFromFlags(); });\n  }\n};\n\n// Test that should run with both modes of \"cooling\"\nclass LatentCoolingTSTest : public TieredStorageTest, public testing::WithParamInterface<bool> {\n  void SetUp() override {\n    fs.emplace();\n    SetFlag(&FLAGS_tiered_experimental_cooling, GetParam());\n    TieredStorageTest::SetUp();\n  }\n\n  optional<absl::FlagSaver> fs;\n};\n\nINSTANTIATE_TEST_SUITE_P(TS, LatentCoolingTSTest, testing::Values(true, false));\n\n// Disabled cooling and all values are offloaded\nclass PureDiskTSTest : public TieredStorageTest {\n  void SetUp() override {\n    fs.emplace();\n    SetFlag(&FLAGS_tiered_offload_threshold, 1.0);\n    SetFlag(&FLAGS_tiered_experimental_cooling, false);\n    TieredStorageTest::SetUp();\n  }\n\n  optional<absl::FlagSaver> fs;\n};\n\n// Perform simple series of SET, GETSET and GET\nTEST_P(LatentCoolingTSTest, SimpleGetSet) {\n  absl::FlagSaver saver;\n  SetFlag(&FLAGS_tiered_offload_threshold, 0.0f);  // disable offloading\n  UpdateFromFlags();\n\n  const int kMin = 256;\n  const int kMax = tiering::kPageSize + 10;\n\n  // Perform SETs\n  for (size_t i = kMin; i < kMax; i++) {\n    Run({\"SET\", absl::StrCat(\"k\", i), BuildString(i)});\n  }\n\n  // Make sure all entries were stashed, except the one not filling a small page\n  size_t stashes = 0;\n  ExpectConditionWithinTimeout([this, &stashes] {\n    stashes = GetMetrics().tiered_stats.total_stashes;\n    return stashes >= kMax - kMin - 1;\n  });\n\n  // All entries were accounted for except that one (see comment above)\n  auto metrics = GetMetrics();\n  EXPECT_EQ(metrics.db_stats[0].tiered_entries, kMax - kMin - 1);\n  EXPECT_LE(metrics.db_stats[0].tiered_used_bytes, (kMax - 1 + kMin) * (kMax - kMin) / 2 - 2047);\n\n  // Perform GETSETs\n  for (size_t i = kMin; i < kMax; i++) {\n    auto resp = Run({\"GETSET\", absl::StrCat(\"k\", i), string(i, 'B')});\n    ASSERT_EQ(resp, BuildString(i)) << i;\n  }\n\n  // Perform GETs\n  for (size_t i = kMin; i < kMax; i++) {\n    auto resp = Run({\"GET\", absl::StrCat(\"k\", i)});\n    ASSERT_EQ(resp, string(i, 'B')) << i;\n    Run({\"GET\", absl::StrCat(\"k\", i)});  // To enforce uploads.\n  }\n\n  metrics = GetMetrics();\n  EXPECT_EQ(metrics.db_stats[0].tiered_entries, 0);\n  EXPECT_EQ(metrics.db_stats[0].tiered_used_bytes, 0);\n}\n\nTEST_F(TieredStorageTest, IntStrings) {\n  absl::FlagSaver saver;\n  SetFlag(&FLAGS_tiered_upload_threshold,\n          0.0f);  // do not stop uploads based on free-memory threshold (this test does not itself\n                  // trigger uploads)\n  UpdateFromFlags();\n\n  // STRING object can be encoded as LONG LONG internally\n  string short_int_string = BuildString(18, '1');\n  Run({\"SET\", \"k1\", short_int_string});\n\n  // STRING object is not offloaded due to its small size\n  string long_int_string = BuildString(32, '1');\n  Run({\"SET\", \"k2\", long_int_string});\n\n  // Long STRING object that is offloaded\n  string tiered_int_string = BuildString(4096, '1');\n  Run({\"SET\", \"k3\", tiered_int_string});\n\n  ExpectConditionWithinTimeout([this] { return GetMetrics().tiered_stats.total_stashes == 1; });\n}\n\n// Use MGET to load multiple offloaded values\nTEST_P(LatentCoolingTSTest, MGET) {\n  vector<string> command = {\"MGET\"}, values = {};\n  for (char key = 'A'; key <= 'Z'; key++) {\n    command.emplace_back(1, key);\n    values.emplace_back(3000, key);\n    Run({\"SET\", command.back(), values.back()});\n  }\n\n  ExpectConditionWithinTimeout(\n      [this, &values] { return GetMetrics().tiered_stats.total_stashes >= values.size(); });\n\n  auto resp = Run(absl::MakeSpan(command));\n  auto elements = resp.GetVec();\n  for (size_t i = 0; i < elements.size(); i++)\n    EXPECT_EQ(elements[i], values[i]);\n}\n\n// Issue many APPEND commands to an offloaded value that are executed at once (with CLIENT PAUSE).\n// They should all finish within the same io completion loop.\nTEST_F(TieredStorageTest, AppendStorm) {\n  const size_t kAppends = 20;\n\n  absl::FlagSaver saver;\n  absl::SetFlag(&FLAGS_tiered_offload_threshold, 1.0);\n  absl::SetFlag(&FLAGS_tiered_upload_threshold, 0.0);\n  absl::SetFlag(&FLAGS_tiered_experimental_cooling, false);\n  UpdateFromFlags();\n\n  // Offload single value\n  string base_value(4096, 'a');\n  Run({\"SET\", \"key\", base_value});\n  ExpectConditionWithinTimeout([this] { return GetMetrics().tiered_stats.total_stashes == 1; });\n\n  // Accumulate APPENDs\n  Run({\"CLIENT\", \"pause\", \"1000\"});\n  vector<Fiber> fibs;\n  for (size_t i = 0; i < kAppends; i++) {\n    fibs.emplace_back(pp_->at(0)->LaunchFiber([this, i] {\n      Run(absl::StrCat(i), {\"APPEND\", \"key\", string(96, 'b')});\n    }));\n  }\n\n  // Throw in a SETRANGE\n  fibs.emplace_back(pp_->at(0)->LaunchFiber([this] {\n    Run(\"range\", {\"SETRANGE\", \"key\", \"0\", string(96, 'x')});\n  }));\n\n  // Throw in a GETRANGE to a range that keeps constant\n  string get_range;\n  fibs.emplace_back(pp_->at(0)->LaunchFiber([this, &get_range] {\n    get_range = Run(\"get\", {\"GETRANGE\", \"key\", \"96\", \"191\"}).GetString();\n  }));\n\n  // Unlock and wait\n  Run({\"CLIENT\", \"unpause\"});\n  for (auto& f : fibs)\n    f.JoinIfNeeded();\n\n  // Check partial result is right\n  EXPECT_EQ(get_range, string(96, 'a'));\n\n  // Get value and verify it\n  auto value = Run({\"GET\", \"key\"});\n  EXPECT_EQ(value, string(96, 'x') + string(4000, 'a') + string(kAppends * 96, 'b'));\n\n  // Check value was read no more than once for APPENDs and once for GET\n  auto metrics = GetMetrics();\n  EXPECT_LE(metrics.tiered_stats.total_fetches, 2u);\n  EXPECT_LE(metrics.tiered_stats.total_uploads, 2u);\n}\n\n// SETRANGE and GETRANGE\nTEST_P(LatentCoolingTSTest, Ranges) {\n  Run({\"SET\", \"key\", string(3000, 'a')});\n  ExpectConditionWithinTimeout([this] { return GetMetrics().tiered_stats.total_stashes >= 1; });\n\n  Run({\"SETRANGE\", \"key\", \"1000\", string(1000, 'b')});\n  auto resp = Run({\"GET\", \"key\"});\n  EXPECT_EQ(resp, string(1000, 'a') + string(1000, 'b') + string(1000, 'a'));\n\n  Run({\"DEL\", \"key\"});\n  Run({\"SET\", \"key\", string(1500, 'c') + string(1500, 'd')});\n  ExpectConditionWithinTimeout([this] { return GetMetrics().tiered_stats.total_stashes >= 2; });\n\n  resp = Run({\"GETRANGE\", \"key\", \"1000\", \"1999\"});\n  EXPECT_EQ(resp, string(500, 'c') + string(500, 'd'));\n}\n\n// Stash values from different databases and read them back\nTEST_P(LatentCoolingTSTest, MultiDb) {\n  for (size_t i = 0; i < 10; i++) {\n    Run({\"SELECT\", absl::StrCat(i)});\n    Run({\"SET\", absl::StrCat(\"k\", i), BuildString(3000, char('A' + i))});\n  }\n\n  ExpectConditionWithinTimeout([this] { return GetMetrics().tiered_stats.total_stashes >= 10; });\n\n  for (size_t i = 0; i < 10; i++) {\n    Run({\"SELECT\", absl::StrCat(i)});\n    EXPECT_EQ(GetMetrics().db_stats[i].tiered_entries, 1);\n    string key = absl::StrCat(\"k\", i);\n    EXPECT_EQ(Run({\"GET\", key}), BuildString(3000, char('A' + i)));\n    Run({\"GET\", key});\n    EXPECT_EQ(GetMetrics().db_stats[i].tiered_entries, 0);\n  }\n}\n\n// Trigger defragmentation\nTEST_F(TieredStorageTest, Defrag) {\n  for (char k = 'a'; k < 'a' + 8; k++) {\n    Run({\"SET\", string(1, k), string(600, k)});\n  }\n\n  ExpectConditionWithinTimeout([this] { return GetMetrics().tiered_stats.total_stashes >= 1; });\n\n  // 7 out 8 are in one bin, the last one made if flush and is now filling\n  auto metrics = GetMetrics();\n  ASSERT_EQ(metrics.tiered_stats.small_bins_cnt, 1u);\n  ASSERT_EQ(metrics.tiered_stats.small_bins_entries_cnt, 7u);\n\n  // Distorted due to encoded values.\n  ASSERT_EQ(metrics.tiered_stats.small_bins_filling_bytes, 537);\n\n  // Reading 3 values still leaves the bin more than half occupied\n  for (unsigned j = 0; j < 2; ++j) {\n    Run({\"GET\", string(1, 'a')});\n    Run({\"GET\", string(1, 'b')});\n    Run({\"GET\", string(1, 'c')});\n  }\n  metrics = GetMetrics();\n  EXPECT_EQ(metrics.tiered_stats.small_bins_cnt, 1u);\n  EXPECT_EQ(metrics.tiered_stats.small_bins_entries_cnt, 4u);\n\n  // This tirggers defragmentation, as only 3 < 7/2 remain left\n  Run({\"GET\", string(1, 'd')});\n\n  // Wait that any reads caused by defrags has been finished.\n  ExpectConditionWithinTimeout([this] { return GetMetrics().tiered_stats.pending_read_cnt == 0; });\n  metrics = GetMetrics();\n  EXPECT_EQ(metrics.tiered_stats.total_defrags, 3u);\n  EXPECT_EQ(metrics.tiered_stats.small_bins_cnt, 0u);\n  EXPECT_EQ(metrics.tiered_stats.allocated_bytes, 0u);\n}\n\nTEST_F(PureDiskTSTest, BackgroundOffloading) {\n  absl::FlagSaver saver;\n  SetFlag(&FLAGS_tiered_upload_threshold, 0.0f);  // upload all values\n  UpdateFromFlags();\n\n  const int kNum = 500;\n\n  max_memory_limit = kNum * 4096;\n\n  // Stash all values\n  string value = BuildString(3000);\n  for (size_t i = 0; i < kNum; i++) {\n    Run({\"SETEX\", absl::StrCat(\"k\", i), \"100\", value});\n  }\n\n  ExpectConditionWithinTimeout([&] { return GetMetrics().db_stats[0].tiered_entries == kNum; });\n  ASSERT_EQ(GetMetrics().tiered_stats.total_stashes, kNum);\n  ASSERT_EQ(GetMetrics().db_stats[0].tiered_entries, kNum);\n\n  // Trigger re-fetch and test TTL is preserved.\n  for (size_t i = 0; i < kNum; i++) {\n    string key = absl::StrCat(\"k\", i);\n    auto resp = Run({\"TTL\", key});\n    EXPECT_THAT(resp, IntArg(100));\n\n    resp = Run({\"GET\", key});\n    EXPECT_EQ(resp, value);\n    resp = Run({\"TTL\", key});\n    EXPECT_THAT(resp, IntArg(100));\n    Run({\"GET\", key});  // enforce uploads\n  }\n\n  // Wait for offload to do it all again\n  ExpectConditionWithinTimeout([&] { return GetMetrics().db_stats[0].tiered_entries == kNum; });\n  auto resp = Run({\"INFO\", \"ALL\"});\n  VLOG(1) << \"INFO \" << resp.GetString();\n  auto metrics = GetMetrics();\n\n  // Not all values were necessary uploaded during GET calls, but all that were uploaded\n  // should be re-stashed again.\n  EXPECT_EQ(metrics.tiered_stats.total_stashes, kNum + metrics.tiered_stats.total_uploads)\n      << resp.GetString();\n  EXPECT_EQ(metrics.tiered_stats.allocated_bytes, kNum * 4096);\n}\n\n// Verify correctness of our offloading startegy, offloading values only after second access.\nTEST_F(PureDiskTSTest, OffloadingStrategy) {\n  // Create value and wait to be offlaoded\n  string value = BuildString(3000);\n  Run({\"set\", \"key\", value});\n  ExpectConditionWithinTimeout([&] { return GetMetrics().db_stats[0].tiered_entries == 1; });\n\n  // Check base values\n  auto metrics = GetMetrics();\n  EXPECT_EQ(metrics.tiered_stats.total_fetches, 0);\n  EXPECT_EQ(metrics.tiered_stats.total_uploads, 0);\n  EXPECT_EQ(metrics.tiered_stats.total_stashes, 1);\n\n  // Repeat a few times\n  for (size_t i = 1; i <= 3; i++) {\n    // Value is not uploaded after first read\n    Run({\"get\", \"key\"});\n    metrics = GetMetrics();\n    EXPECT_EQ(metrics.tiered_stats.total_fetches, 2 * i - 1);\n    EXPECT_EQ(metrics.tiered_stats.total_uploads, i - 1);\n\n    // But on second read upload should happend at the end of chain due to two touches\n    Run({\"get\", \"key\"});\n    ExpectConditionWithinTimeout([&] { return GetMetrics().tiered_stats.total_uploads == i; });\n    metrics = GetMetrics();\n    EXPECT_EQ(metrics.tiered_stats.total_fetches, 2 * i);\n\n    // Wait for offloading again\n    ExpectConditionWithinTimeout([&] { return GetMetrics().db_stats[0].tiered_entries == 1; });\n    metrics = GetMetrics();\n    EXPECT_EQ(metrics.tiered_stats.total_offloading_stashes, i);\n    EXPECT_EQ(metrics.tiered_stats.total_stashes, i + 1);\n  }\n}\n\n// Test FLUSHALL while reading entries\nTEST_F(PureDiskTSTest, FlushAll) {\n  const int kNum = 500;\n  for (size_t i = 0; i < kNum; i++) {\n    Run({\"SET\", absl::StrCat(\"k\", i), BuildString(3000)});\n  }\n  ExpectConditionWithinTimeout([&] { return GetMetrics().db_stats[0].tiered_entries == kNum; });\n\n  // Start reading random entries\n  atomic_bool done = false;\n  auto reader = pp_->at(0)->LaunchFiber([&] {\n    while (!done) {\n      Run(\"reader\", {\"GET\", absl::StrCat(\"k\", rand() % kNum)});\n      util::ThisFiber::Yield();\n    }\n  });\n\n  Metrics metrics;\n  ExpectConditionWithinTimeout([&] {\n    metrics = GetMetrics();\n\n    // Note that metrics.events.hits is not consistent with total_fetches\n    // and it can happen that hits is greater than total_fetches due to in-progress reads.\n    return metrics.tiered_stats.total_fetches > 2;\n  });\n  LOG(INFO) << FormatMetrics(metrics);\n\n  Run({\"FLUSHALL\"});\n\n  done = true;\n  util::ThisFiber::SleepFor(100ms);\n  reader.Join();\n\n  metrics = GetMetrics();\n  LOG(INFO) << FormatMetrics(metrics);\n\n  EXPECT_EQ(metrics.db_stats.front().tiered_entries, 0u);\n}\n\n// Check FLUSHALL clears filling bytes of small bins\nTEST_F(TieredStorageTest, FlushPending) {\n  absl::FlagSaver saver;\n  SetFlag(&FLAGS_tiered_offload_threshold, 1.0f);  // offload all values\n\n  const int kNum = 10;\n  for (size_t i = 0; i < kNum; i++) {\n    Run({\"SET\", absl::StrCat(\"k\", i), BuildString(256)});\n  }\n  ExpectConditionWithinTimeout(\n      [&] { return GetMetrics().tiered_stats.small_bins_filling_bytes > 0; });\n  Run({\"FLUSHALL\"});\n  EXPECT_EQ(GetMetrics().tiered_stats.small_bins_filling_bytes, 0u);\n}\n\n// Test that clients are throttled if many stashes are issued.\n// Stashes are released with CLIENT UNPAUSE to occur at the same time\nTEST_F(PureDiskTSTest, ThrottleClients) {\n  absl::FlagSaver saver;\n  absl::SetFlag(&FLAGS_tiered_upload_threshold, 0.0);\n  UpdateFromFlags();\n\n  // issue client pause to accumualte SETs\n  Run({\"CLIENT\", \"PAUSE\", \"1000\"});\n\n  string value(4096, 'a');\n  vector<Fiber> fibs;\n  for (size_t i = 0; i < 100; i++) {\n    fibs.emplace_back(pp_->at(0)->LaunchFiber([this, i, &value] {\n      string key = absl::StrCat(\"k\", i);\n      Run(key, {\"SET\", key, value});\n    }));\n  }\n  ThisFiber::Yield();\n\n  // Unpause\n  Run({\"CLIENT\", \"UNPAUSE\"});\n\n  // Check if at least some of the clients were caugth throttling\n  // but we provided backpressure for all of them\n  auto metrics = GetMetrics();\n  EXPECT_GT(metrics.tiered_stats.clients_throttled, fibs.size() / 10);\n  EXPECT_EQ(metrics.tiered_stats.total_clients_throttled, fibs.size());\n\n  for (auto& fib : fibs)\n    fib.JoinIfNeeded();\n\n  // Because of the 5ms max wait time for backpressure, we can't rely on the stashes to have\n  // finished even after all the fibers joined, so expect the condition with a timeout\n  ExpectConditionWithinTimeout(\n      [&] { return GetMetrics().tiered_stats.total_stashes == fibs.size(); });\n}\n\nTEST_F(TieredStorageTest, Expiry) {\n  string val = BuildString(100);\n  Run({\"psetex\", \"key1\", \"1\", val});\n  AdvanceTime(10);\n  Run({\"psetex\", \"key1\", \"1\", val});\n  auto resp = Run({\"get\", \"key1\"});\n  EXPECT_EQ(resp, val);\n}\n\nTEST_F(PureDiskTSTest, SetExistingExpire) {\n  const int kNum = 20;\n  for (size_t i = 0; i < kNum; i++) {\n    Run({\"SETEX\", absl::StrCat(\"k\", i), \"100\", BuildString(256)});\n  }\n  ExpectConditionWithinTimeout([&] { return GetMetrics().tiered_stats.total_stashes > 1; });\n\n  for (size_t i = 0; i < kNum; i++) {\n    Run({\"SETEX\", absl::StrCat(\"k\", i), \"100\", BuildString(256)});\n  }\n\n  for (size_t i = 0; i < kNum; i++) {\n    auto resp = Run({\"TTL\", absl::StrCat(\"k\", i)});\n    EXPECT_THAT(resp, IntArg(100));\n  }\n}\n\nTEST_F(PureDiskTSTest, Dump) {\n  const int kNum = 10;\n  for (size_t i = 0; i < kNum; i++) {\n    Run({\"SET\", absl::StrCat(\"k\", i), BuildString(3000)});  // big enough to trigger offloading.\n  }\n\n  ExpectConditionWithinTimeout([&] { return GetMetrics().tiered_stats.total_stashes == kNum; });\n\n  auto resp = Run({\"DUMP\", \"k0\"});\n  EXPECT_THAT(Run({\"del\", \"k0\"}), IntArg(1));\n  resp = Run({\"restore\", \"k0\", \"0\", facade::ToSV(resp.GetBuf())});\n  EXPECT_EQ(resp, \"OK\");\n}\n\nTEST_P(LatentCoolingTSTest, SimpleHash) {\n  absl::FlagSaver saver;\n  absl::SetFlag(&FLAGS_tiered_experimental_hash_support, true);\n  // For now, never upload as its not implemented yet\n  absl::SetFlag(&FLAGS_tiered_upload_threshold, 0.0);\n  UpdateFromFlags();\n\n  static constexpr size_t kNUM = 100;\n\n  auto build_command = [](string_view key) {\n    vector<string> cmd = {\"HSET\", string{key}};\n    for (char c = 'a'; c <= 'z'; c++) {\n      cmd.push_back(string{1, c});\n      cmd.push_back(string{31, 'x'} + c);\n    }\n    return cmd;\n  };\n\n  // Create some hashes\n  for (size_t i = 0; i < kNUM; i++) {\n    Run(build_command(absl::StrCat(\"k\", i)));\n  }\n\n  // Wait for all to be stashed or in end up in bins\n  ExpectConditionWithinTimeout([this] {\n    auto metrics = GetMetrics();\n    return metrics.tiered_stats.total_stashes +\n               metrics.tiered_stats.small_bins_filling_entries_cnt ==\n           kNUM;\n  });\n\n  // Verify correctness\n  for (size_t i = 0; i < kNUM; i++) {\n    string key = absl::StrCat(\"k\", i);\n    EXPECT_THAT(Run({\"HLEN\", key}), IntArg(26));\n\n    auto resp = Run({\"HGET\", key, string{1, 'f'}});\n    auto v = string{31, 'x'} + 'f';\n    EXPECT_EQ(resp, v);\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/tiering/CMakeLists.txt",
    "content": "# Minimum set needed for successful compilation\nif(NOT WITH_TIERING)\n    add_library(dfly_tiering decoders.cc serialized_map.cc)\n    target_link_libraries(dfly_tiering dfly_transaction dfly_facade redis_lib base io)\n    return()\nendif()\n\nadd_library(dfly_tiering\n    decoders.cc disk_storage.cc external_alloc.cc\n    op_manager.cc serialized_map.cc small_bins.cc)\ntarget_link_libraries(dfly_tiering\n    dfly_transaction dfly_facade redis_lib base io)\n\n\nhelio_cxx_test(disk_storage_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(external_alloc_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(op_manager_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(serialized_map_test dfly_test_lib LABELS DFLY)\nhelio_cxx_test(small_bins_test dfly_test_lib LABELS DFLY)\n\nadd_dependencies(check_dfly disk_storage_test external_alloc_test op_manager_test serialized_map_test small_bins_test)\n"
  },
  {
    "path": "src/server/tiering/common.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n#include <optional>\n#include <variant>\n\nnamespace dfly::tiering {\n\ninline namespace literals {\n\nconstexpr inline unsigned long long operator\"\"_MB(unsigned long long x) {\n  return x << 20U;\n}\n\nconstexpr inline unsigned long long operator\"\"_KB(unsigned long long x) {\n  return x << 10U;\n}\n\n}  // namespace literals\n\nconstexpr size_t kPageSize = 4_KB;\n\n// Location on the offloaded blob, measured in bytes\nstruct DiskSegment {\n  DiskSegment() = default;\n  DiskSegment(size_t offset, size_t length) : offset{offset}, length{length} {\n  }\n  DiskSegment(std::pair<size_t, size_t> p) : offset{p.first}, length(p.second) {\n  }\n\n  bool operator==(const DiskSegment& other) const {\n    return offset == other.offset && length == other.length;\n  }\n\n  DiskSegment ContainingPages() const {\n    return {offset / kPageSize * kPageSize, (length + kPageSize - 1) / kPageSize * kPageSize};\n  }\n\n  size_t offset = 0, length = 0;\n\n  friend std::ostream& operator<<(std::ostream& os, const DiskSegment& ds) {\n    return os << \"[\" << ds.offset << \", \" << ds.length << \"]\";\n  }\n};\n\nusing KeyRef = std::pair<uint16_t /* DbIndex */, std::string_view>;\n\n// Two separate keyspaces are provided - one for strings, one for numeric identifiers.\n// Ids can be used to track auxiliary values that don't map to real keys (like a page index).\n// Specifically, we track page indexes when serializing small-bin pages with multiple items.\nusing PendingId = std::variant<uintptr_t, KeyRef>;\n\n};  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/decoders.cc",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/tiering/decoders.h\"\n\n#include \"base/logging.h\"\n#include \"core/detail/listpack_wrap.h\"\n#include \"server/tiering/serialized_map.h\"\n\nextern \"C\" {\n#include \"redis/redis_aux.h\"  // for OBJ_HASH\n}\n\nnamespace dfly::tiering {\n\nstd::unique_ptr<Decoder> BareDecoder::Clone() const {\n  return std::make_unique<BareDecoder>();\n}\n\nvoid BareDecoder::Initialize(std::string_view slice) {\n  this->slice = slice;\n}\n\nvoid BareDecoder::Upload(CompactObj* obj) {\n  ABSL_UNREACHABLE();\n}\n\nDecoder::UploadMetrics BareDecoder::GetMetrics() const {\n  ABSL_UNREACHABLE();\n  return UploadMetrics{};\n}\n\nStringDecoder::StringDecoder(const CompactObj& obj) : StringDecoder{obj.GetStrEncoding()} {\n}\n\nStringDecoder::StringDecoder(CompactObj::StrEncoding encoding) : encoding_{encoding} {\n}\n\nstd::unique_ptr<Decoder> StringDecoder::Clone() const {\n  return std::unique_ptr<StringDecoder>{new StringDecoder(encoding_)};\n}\n\nvoid StringDecoder::Initialize(std::string_view slice) {\n  slice_ = slice;\n  value_ = encoding_.Decode(slice);\n}\n\nvoid StringDecoder::Upload(CompactObj* obj) {\n  if (modified_)\n    obj->Materialize(value_.view(), false);\n  else\n    obj->Materialize(slice_, true);\n}\n\nDecoder::UploadMetrics StringDecoder::GetMetrics() const {\n  return UploadMetrics{\n      .modified = modified_,\n      .estimated_mem_usage = value_.view().size(),\n  };\n}\n\nstd::string* StringDecoder::Write() {\n  modified_ = true;\n  return value_.GetMutable();\n}\n\nstd::unique_ptr<Decoder> SerializedMapDecoder::Clone() const {\n  return std::make_unique<SerializedMapDecoder>();\n}\n\nvoid SerializedMapDecoder::Initialize(std::string_view slice) {\n  map_ = std::make_unique<SerializedMap>(slice);\n}\n\nDecoder::UploadMetrics SerializedMapDecoder::GetMetrics() const {\n  return UploadMetrics{.modified = false,\n                       .estimated_mem_usage = map_->DataBytes() + map_->size() * 2 * 8};\n}\n\nvoid SerializedMapDecoder::Upload(CompactObj* obj) {\n  auto lw = detail::ListpackWrap::WithCapacity(GetMetrics().estimated_mem_usage);\n  for (const auto& [key, value] : *map_)\n    lw.Insert(key, value, true);\n  obj->InitRobj(OBJ_HASH, kEncodingListPack, lw.GetPointer());\n}\n\nSerializedMap* SerializedMapDecoder::Get() const {\n  return map_.get();\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/decoders.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n#include <optional>\n#include <string>\n#include <string_view>\n\n#include \"core/compact_object.h\"\n\nnamespace dfly::tiering {\n\nstruct SerializedMap;\n\n// Decodes serialized value and provides it to callbacks.\n// Acts as generic interface to callback driver (OpManager)\nstruct Decoder {\n  struct UploadMetrics {\n    bool modified;               // whether the value as modified\n    size_t estimated_mem_usage;  // Estimated memory usage if uploaded\n  };\n\n  virtual ~Decoder() = default;\n\n  // Poor man's type-erasure copy\n  virtual std::unique_ptr<Decoder> Clone() const = 0;\n\n  // Initialize decoder from slice\n  virtual void Initialize(std::string_view slice) = 0;\n\n  // Compute upload metrics to determine if its worth\n  virtual UploadMetrics GetMetrics() const = 0;\n\n  // Store value in compact object\n  virtual void Upload(CompactObj* obj) = 0;\n};\n\n// Basic \"bare\" decoder that just stores the provided slice\nstruct BareDecoder : public Decoder {\n  std::unique_ptr<Decoder> Clone() const override;\n  void Initialize(std::string_view slice) override;\n  UploadMetrics GetMetrics() const override;\n  void Upload(CompactObj* obj) override;\n\n  std::string_view slice;\n};\n\n// Decodes string value with objects StrEncoding\nstruct StringDecoder : public Decoder {\n  explicit StringDecoder(const CompactObj& obj);\n\n  std::unique_ptr<Decoder> Clone() const override;\n  void Initialize(std::string_view slice) override;\n  UploadMetrics GetMetrics() const override;\n  void Upload(CompactObj* obj) override;\n\n  std::string_view GetView() const {\n    return value_.view();\n  }\n\n  std::string* Write();\n\n private:\n  explicit StringDecoder(CompactObj::StrEncoding encoding);\n\n  bool modified_ = false;\n  std::string_view slice_;\n  CompactObj::StrEncoding encoding_;\n  dfly::StringOrView value_;\n};\n\n// Decodes SerializedMaps\nstruct SerializedMapDecoder : public Decoder {\n  std::unique_ptr<Decoder> Clone() const override;\n  void Initialize(std::string_view slice) override;\n  UploadMetrics GetMetrics() const override;\n  void Upload(CompactObj* obj) override;\n\n  SerializedMap* Get() const;\n\n private:\n  std::unique_ptr<SerializedMap> map_;\n};\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/disk_storage.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/tiering/disk_storage.h\"\n\n#include <system_error>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"io/io_buf.h\"\n#include \"server/error.h\"\n#include \"server/tiering/common.h\"\n#include \"server/tiering/external_alloc.h\"\n#include \"util/fibers/uring_file.h\"\n#include \"util/fibers/uring_proactor.h\"\n\nusing namespace ::dfly::tiering::literals;\n\nABSL_FLAG(bool, backing_file_direct, true, \"If true uses O_DIRECT to open backing files\");\n\nABSL_FLAG(uint64_t, registered_buffer_size, 512_KB,\n          \"Size of registered buffer for IoUring fixed read/writes\");\n\nnamespace dfly::tiering {\n\nusing namespace std;\nusing namespace ::util::fb2;\n\nnamespace {\n\nconstexpr unsigned kHeapSliceId = UINT_MAX;\n\nRegisteredSlice AllocateTmpBuf(size_t size) {\n  size = (size + kPageSize - 1) / kPageSize * kPageSize;\n  VLOG(2) << \"Fallback to temporary allocation: \" << size;\n\n  uint8_t* buf = new (align_val_t(kPageSize)) uint8_t[size];\n  return RegisteredSlice{{buf, size}, kHeapSliceId};\n}\n\nvoid DestroyTmpBuf(RegisteredSlice buf) {\n  DCHECK_EQ(buf.buf_idx, kHeapSliceId);\n  ::operator delete[](buf.bytes.data(), align_val_t(kPageSize));\n}\n\nvoid ReturnBuf(RegisteredSlice buf) {\n  DCHECK_EQ(ProactorBase::me()->GetKind(), ProactorBase::IOURING);\n  auto* up = static_cast<UringProactor*>(ProactorBase::me());\n\n  if (buf.buf_idx != kHeapSliceId)\n    up->ReturnRegisteredSlice(buf);\n  else\n    DestroyTmpBuf(buf);\n}\n\nconstexpr off_t kInitialSize = 1UL << 28;  // 256MB\n\ntemplate <typename... Ts> error_code DoFiberCall(void (SubmitEntry::*c)(Ts...), Ts... args) {\n  auto* proactor = static_cast<UringProactor*>(ProactorBase::me());\n  FiberCall fc(proactor);\n  (fc.operator->()->*c)(std::forward<Ts>(args)...);\n  FiberCall::IoResult io_res = fc.Get();\n  return io_res < 0 ? error_code{-io_res, system_category()} : error_code{};\n}\n\n}  // anonymous namespace\n\nDiskStorage::DiskStorage(size_t max_size) : max_size_(max_size) {\n}\n\nDiskStorage::~DiskStorage() {\n}\n\nerror_code DiskStorage::Open(string_view path) {\n  DCHECK_EQ(ProactorBase::me()->GetKind(), ProactorBase::IOURING);\n  CHECK(!backing_file_);\n\n  int kFlags = O_CREAT | O_RDWR | O_TRUNC | O_CLOEXEC;\n  if (absl::GetFlag(FLAGS_backing_file_direct))\n    kFlags |= O_DIRECT;\n\n  backing_file_path_ = path;\n  auto res = OpenLinux(path, kFlags, 0666);\n  if (!res)\n    return res.error();\n  backing_file_ = std::move(res.value());\n\n  int fd = backing_file_->fd();\n\n  auto ec = DoFiberCall(&SubmitEntry::PrepFallocate, fd, 0, 0L, kInitialSize);\n  VLOG_IF(1, ec) << \"Fallocate not supported\";\n\n  RETURN_ON_ERR(DoFiberCall(&SubmitEntry::PrepFadvise, fd, 0L, 0L, POSIX_FADV_RANDOM));\n\n  alloc_.AddStorage(0, kInitialSize);\n\n  // TODO(vlad): Even though this is called only once for regular use,\n  // the testing code runs this initializer every time, never unregistering previous buffers\n  auto* up = static_cast<UringProactor*>(ProactorBase::me());\n  auto registered_buffer_size = absl::GetFlag(FLAGS_registered_buffer_size);\n  if (registered_buffer_size > 0) {\n    if (int io_res = up->RegisterBuffers(registered_buffer_size); io_res < 0)\n      return error_code{-io_res, system_category()};\n  }\n  return {};\n}\n\nvoid DiskStorage::Close() {\n  using namespace chrono_literals;\n\n  // TODO: to fix this polling.\n  while (pending_ops_ > 0 || grow_.pending)\n    util::ThisFiber::SleepFor(10ms);\n\n  auto ec = backing_file_->Close();\n  LOG_IF(ERROR, ec) << \"Failed to close backing file: \" << ec;\n  backing_file_.reset();\n\n  int errc = unlink(backing_file_path_.c_str());\n  LOG_IF(ERROR, errc != 0) << \"Failed to unlink backing file: \"\n                           << std::error_code{errc, std::system_category()};\n}\n\nvoid DiskStorage::Read(DiskSegment segment, ReadCb cb) {\n  DCHECK_GT(segment.length, 0u);\n  DCHECK_EQ(segment.offset % kPageSize, 0u);\n\n  size_t len = segment.length;\n  RegisteredSlice buf = PrepareBuf(len);\n  auto io_cb = [this, cb = std::move(cb), buf, len](int io_res) {\n    if (io_res < 0) {\n      cb(nonstd::make_unexpected(error_code{-io_res, system_category()}));\n    } else {\n      cb(string_view{reinterpret_cast<char*>(buf.bytes.data()), len});\n    }\n    ReturnBuf(buf);\n    pending_ops_--;\n  };\n\n  pending_ops_++;\n  if (buf.buf_idx != kHeapSliceId)\n    backing_file_->ReadFixedAsync(buf.bytes, segment.offset, buf.buf_idx, std::move(io_cb));\n  else\n    backing_file_->ReadAsync(buf.bytes, segment.offset, std::move(io_cb));\n}\n\nvoid DiskStorage::MarkAsFree(DiskSegment segment) {\n  DCHECK_GT(segment.length, 0u);\n  DCHECK_EQ(segment.offset % kPageSize, 0u);\n\n  alloc_.Free(segment.offset, segment.length);\n}\n\nio::Result<std::pair<size_t, RegisteredSlice>> DiskStorage::PrepareStash(size_t length) {\n  using namespace nonstd;\n\n  int64_t offset = alloc_.Malloc(length);\n  if (offset >= 0)\n    return std::make_pair(offset, PrepareBuf(length));\n\n  // If we don't have \"enough space\", request grow and return to avoid blocking.\n  // Note that `alloc_.Malloc` may fail even if we have enough space due to fragmentation,\n  // as internally it uses different 256MB segments for different block sizes.\n  if (offset < 0) {\n    auto ec = RequestGrow(-offset);\n    return make_unexpected(ec ? ec : make_error_code(errc::operation_would_block));\n  }\n\n  offset = alloc_.Malloc(length);\n  if (offset < 0)  // we can't fit it even after resizing\n    return make_unexpected(make_error_code(errc::file_too_large));\n\n  return std::make_pair(offset, PrepareBuf(length));\n}\n\nvoid DiskStorage::Stash(DiskSegment segment, RegisteredSlice buf, StashCb cb) {\n  auto io_cb = [this, cb = std::move(cb), buf, segment](int io_res) {\n    if (io_res < 0) {\n      MarkAsFree(segment);\n      cb(error_code{-io_res, std::system_category()});\n    } else {\n      cb({});\n    }\n    ReturnBuf(buf);\n    pending_ops_--;\n  };\n\n  pending_ops_++;\n  size_t offset = segment.offset;\n  if (buf.buf_idx != kHeapSliceId)\n    backing_file_->WriteFixedAsync(buf.bytes, offset, buf.buf_idx, std::move(io_cb));\n  else\n    backing_file_->WriteAsync(buf.bytes, offset, std::move(io_cb));\n\n  // Grow in advance if needed and possible\n  size_t capacity = alloc_.capacity();\n  size_t available = capacity - alloc_.allocated_bytes();\n  if ((available < 256_MB) && (available < capacity * 0.15) && !grow_.pending) {\n    auto ec = RequestGrow(256_MB);\n    LOG_IF(ERROR, ec && ec != errc::file_too_large) << \"Could not call grow :\" << ec.message();\n  }\n}\n\nDiskStorage::Stats DiskStorage::GetStats() const {\n  return {\n      alloc_.allocated_bytes(),       alloc_.capacity(), heap_buf_alloc_cnt_, reg_buf_alloc_cnt_,\n      static_cast<size_t>(max_size_), pending_ops_};\n}\n\nerror_code DiskStorage::RequestGrow(off_t grow_size) {\n  VLOG(1) << \"Requesting grow by \" << grow_size << \" current capacity: \" << alloc_.capacity();\n\n  DCHECK_EQ(grow_size % ExternalAllocator::kExtAlignment, 0u);\n  if (alloc_.capacity() + grow_size >= static_cast<size_t>(max_size_))\n    return make_error_code(errc::file_too_large);\n\n  // Don't try again immediately, most likely it won't succeed ever.\n  const uint64_t kCooldownTime = 100'000'000;  // 100ms\n  if (grow_.last_err && (ProactorBase::GetMonotonicTimeNs() - grow_.timestamp_ns) < kCooldownTime)\n    return make_error_code(errc::operation_canceled);\n\n  if (std::exchange(grow_.pending, true)) {\n    LOG_EVERY_T(WARNING, 1) << \"Blocked on concurrent grow\";\n    return make_error_code(errc::operation_in_progress);\n  }\n\n  off_t end = alloc_.capacity();\n  backing_file_->FallocateAsync(0, end, grow_size, [end, grow_size, this](int res) {\n    auto ec = (res < 0) ? std::error_code{-res, std::system_category()} : std::error_code{};\n    grow_.pending = false;\n    grow_.last_err = ec;\n    grow_.timestamp_ns = ProactorBase::GetMonotonicTimeNs();\n    if (!ec)\n      alloc_.AddStorage(end, grow_size);\n  });\n\n  return {};\n}\n\nRegisteredSlice DiskStorage::PrepareBuf(size_t size) {\n  DCHECK_EQ(ProactorBase::me()->GetKind(), ProactorBase::IOURING);\n  auto* up = static_cast<UringProactor*>(ProactorBase::me());\n\n  if (auto borrowed = up->RequestRegisteredSlice(size); borrowed) {\n    ++reg_buf_alloc_cnt_;\n    return *borrowed;\n  }\n  ++heap_buf_alloc_cnt_;\n  return AllocateTmpBuf(size);\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/disk_storage.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <system_error>\n\n#include \"io/io.h\"\n#include \"server/tiering/common.h\"\n#include \"server/tiering/external_alloc.h\"\n#include \"util/fibers/uring_types.h\"\n\nnamespace util::fb2 {\nclass LinuxFile;\n}  // namespace util::fb2\n\nnamespace dfly::tiering {\n\n// Disk storage controlled by asynchronous operations.\n// Provides Random Access Read/Stash asynchronous interface around low level linux file.\n// Handles ranges management and file growth via underlying ExternalAllocator.\nclass DiskStorage {\n public:\n  struct Stats {\n    size_t allocated_bytes = 0;\n    size_t capacity_bytes = 0;\n    uint64_t heap_buf_alloc_count = 0;\n    uint64_t registered_buf_alloc_count = 0;\n    size_t max_file_size = 0;\n    size_t pending_ops = 0;\n  };\n\n  using ReadCb = std::function<void(io::Result<std::string_view>)>;\n  using StashCb = std::function<void(std::error_code)>;\n\n  explicit DiskStorage(size_t max_size);\n  ~DiskStorage();\n\n  std::error_code Open(std::string_view path);\n  void Close();\n\n  // Request read for segment, cb will be called on completion with read value\n  void Read(DiskSegment segment, ReadCb cb);\n\n  // Mark segment as free, performed immediately\n  void MarkAsFree(DiskSegment segment);\n\n  // Allocate segment of at least given length and prepare buffer. Might block to grow backing file.\n  // Return error if not enough space is available or growing failed.\n  // Every successful preparation must end in a Stash(), otherwise resources are leaked.\n  io::Result<std::pair<size_t /* offset */, util::fb2::RegisteredSlice>> PrepareStash(\n      size_t length);\n\n  // Write prepared buffer to given segment and resolve completion callback when write is done.\n  void Stash(DiskSegment segment, util::fb2::RegisteredSlice buf, StashCb cb);\n\n  Stats GetStats() const;\n\n private:\n  // Try asynchronously growing backing file by requested size\n  std::error_code RequestGrow(off_t grow_size);\n\n  // Returns a buffer with size greater or equal to len.\n  util::fb2::RegisteredSlice PrepareBuf(size_t len);\n\n  off_t max_size_;\n  size_t pending_ops_ = 0;  // number of ongoing ops for safe shutdown\n\n  // how many times we allocate registered/heap buffers.\n  uint64_t heap_buf_alloc_cnt_ = 0, reg_buf_alloc_cnt_ = 0;\n\n  struct {\n    bool pending = false;  // currently in progress\n    std::error_code last_err;\n    uint64_t timestamp_ns;  // last grow finished\n  } grow_;                  // status of last RequestGrow() operation\n\n  std::string backing_file_path_;\n  std::unique_ptr<util::fb2::LinuxFile> backing_file_;\n  ExternalAllocator alloc_;\n};\n\n};  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/disk_storage_test.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/tiering/disk_storage.h\"\n\n#include <memory>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"server/tiering/common.h\"\n#include \"server/tiering/test_common.h\"\n#include \"util/fibers/fibers.h\"\n#include \"util/fibers/pool.h\"\n\nnamespace dfly::tiering {\n\nusing namespace std;\nusing namespace std::string_literals;\n\nstruct DiskStorageTest : public PoolTestBase {\n  ~DiskStorageTest() {\n    EXPECT_EQ(pending_ops_, 0);\n  }\n\n  error_code Open(string filename = \"disk_storage_test_backing\") {\n    filename_ = filename;\n    storage_ = make_unique<DiskStorage>(256_MB);\n    return storage_->Open(filename_);\n  }\n\n  void Close() {\n    storage_->Close();\n    storage_.reset();\n\n    // Disk storage deletes its files on exit\n    EXPECT_FALSE(std::filesystem::exists(filename_));\n  }\n\n  void Stash(size_t index, string value) {\n    pending_ops_++;\n\n    auto prepared = storage_->PrepareStash(value.length());\n    EXPECT_TRUE(prepared.has_value());\n    auto [offset, buf] = *prepared;\n    memcpy(buf.bytes.data(), value.data(), value.size());\n\n    DiskSegment segment{offset, value.size()};\n    storage_->Stash({offset, value.size()}, buf, [this, index, segment](std::error_code ec) {\n      segments_[index] = segment;\n      pending_ops_--;\n    });\n  }\n\n  void Read(size_t index) {\n    pending_ops_++;\n    storage_->Read(*segments_[index], [this, index](io::Result<string_view> value) {\n      last_reads_[index] =\n          value.has_value() ? io::Result<string>(*value) : nonstd::make_unexpected(value.error());\n      pending_ops_--;\n    });\n  }\n\n  void Delete(size_t index) {\n    storage_->MarkAsFree(*segments_[index]);\n    segments_.erase(index);\n    last_reads_.erase(index);\n  }\n\n  void Wait() const {\n    while (pending_ops_ > 0) {\n      ::util::ThisFiber::SleepFor(1ms);\n    }\n  }\n\n  DiskStorage::Stats GetStats() const {\n    return storage_->GetStats();\n  }\n\n protected:\n  int pending_ops_ = 0;\n\n  std::string filename_;\n  std::unordered_map<size_t, io::Result<std::string>> last_reads_;\n  std::unordered_map<size_t, io::Result<DiskSegment>> segments_;\n  std::unique_ptr<DiskStorage> storage_;\n};\n\nTEST_F(DiskStorageTest, Basic) {\n  pp_->at(0)->Await([this] {\n    // Write 100 values\n    Open();\n    for (size_t i = 0; i < 100; i++)\n      Stash(i, absl::StrCat(\"value\", i));\n    Wait();\n    EXPECT_EQ(segments_.size(), 100);\n\n    EXPECT_EQ(GetStats().allocated_bytes, 100 * kPageSize);\n\n    // Read all 100 values\n    for (size_t i = 0; i < 100; i++)\n      Read(i);\n    Wait();\n\n    // Expect them to be equal to written\n    for (size_t i = 0; i < 100; i++)\n      EXPECT_EQ(*last_reads_[i], absl::StrCat(\"value\", i));\n\n    // Delete all values\n    for (size_t i = 0; i < 100; i++)\n      Delete(i);\n    EXPECT_EQ(GetStats().allocated_bytes, 0);\n\n    Close();\n  });\n}\n\nTEST_F(DiskStorageTest, ReUse) {\n  pp_->at(0)->Await([this] {\n    Open();\n\n    Stash(0, \"value1\");\n    Wait();\n    EXPECT_EQ(segments_[0]->offset, 0u);\n\n    Delete(0);\n\n    Stash(1, \"value2\");\n    Wait();\n    EXPECT_EQ(segments_[1]->offset, 0u);\n\n    Close();\n  });\n}\n\nTEST_F(DiskStorageTest, FlakyDevice) {\n  if (!filesystem::exists(\"/mnt/tiering_flaky\"))\n    GTEST_SKIP() << \"Flaky device not created, use tools/faulty_io.sh\";\n\n  pp_->at(0)->Await([this] {\n    auto ec = Open(\"/mnt/tiering_flaky/backing\");\n    EXPECT_FALSE(ec) << ec.message();\n\n    // Create stash sequence lasting two seconds\n    const int kEntries = 200;\n    for (int i = 0; i < kEntries; i++) {\n      util::ThisFiber::SleepFor(10ms);\n      Stash(i, \"value\");\n    }\n    Wait();\n\n    // Make sure we saw at least some errors\n    int errors = 0;\n    for (int i = 0; i < kEntries; i++)\n      errors += (!segments_[i].has_value());\n    EXPECT_GT(errors, 0);\n    EXPECT_LT(errors, kEntries);\n\n    Close();\n  });\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/entry_map.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <string>\n\n#include \"server/common_types.h\"\n\nnamespace dfly::tiering {\n\nnamespace detail {\nstruct Hasher {\n  using is_transparent = void;\n  template <typename S> size_t operator()(const std::pair<DbIndex, S>& p) const {\n    return absl::HashOf(p);\n  }\n};\n\nstruct Eq {\n  using is_transparent = void;\n  template <typename S1, typename S2>\n  bool operator()(const std::pair<DbIndex, S1>& l, const std::pair<DbIndex, S2>& r) const {\n    const auto& [i1, s1] = l;\n    const auto& [i2, s2] = r;\n    return i1 == i2 && s1 == s2;\n  }\n};\n}  // namespace detail\n\nusing DbKeyId = std::pair<DbIndex, std::string>;\n\n// Map of key (db index, string key) -> T with heterogeneous lookup\ntemplate <typename T> using EntryMap = absl::flat_hash_map<DbKeyId, T, detail::Hasher, detail::Eq>;\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/external_alloc.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"src/server/tiering/external_alloc.h\"\n\n#include <mimalloc.h>\n\n#include <bitset>\n#include <cstring>\n\n#include \"base/logging.h\"\n\nnamespace dfly::tiering {\nusing namespace std;\nusing detail::PageClass;\n\nusing BinIdx = uint8_t;\n\nnamespace {\n\nconstexpr inline size_t divup(size_t num, size_t div) {\n  return (num + div - 1) / div;\n}\n\nconstexpr inline size_t alignup(size_t num, size_t align) {\n  size_t amask = align - 1;\n  return (num + amask) & (~amask);\n}\n\nconstexpr inline size_t wsize_from_size(size_t size) {\n  return divup(size, sizeof(uintptr_t));\n}\n\nconstexpr size_t kMinBlockSize = ExternalAllocator::kMinBlockSize;\n\nconstexpr size_t kSmallPageShift = 20;\nconstexpr size_t kMediumPageShift = 24;\nconstexpr size_t kSmallPageSize = 1UL << kSmallPageShift;    // 1MB\nconstexpr size_t kMediumPageSize = 1UL << kMediumPageShift;  // 16MB\n\n// we preserve 16:1 ratio, i.e. each page can host at least 16 blocks within its class.\nconstexpr size_t kSmallObjMaxSize = kSmallPageSize / 16;\nconstexpr size_t kMediumObjMaxSize = kMediumPageSize / 16;\n\nconstexpr size_t kSegmentAlignment = 256_MB;\nconstexpr size_t kSegmentSize = 256_MB;\n\nconstexpr unsigned kNumBins = detail::kNumFreePages;\nconstexpr unsigned kLargeSizeBin = kNumBins - 1;\nconstexpr unsigned kMaxPagesInSegment = kSegmentSize / kSmallPageSize;\nconstexpr unsigned kSegDescrAlignment = 16_KB;\n\nconstexpr size_t kBinWordLens[kNumBins] = {\n    512,   512 * 2, 512 * 3, 2048,  2560,  3072,  3584,   4096,   5120,      6144,\n    7168,  8192,    10240,   12288, 14336, 16384, 20480,  24576,  28672,     32768,\n    40960, 49152,   57344,   65536, 81920, 98304, 114688, 131072, UINT64_MAX};\n\nstatic_assert(kBinWordLens[kLargeSizeBin - 1] * 8 == kMediumObjMaxSize);\nstatic_assert(kBinWordLens[kLargeSizeBin] == UINT64_MAX);\n\nconstexpr inline BinIdx ToBinIdx(size_t size) {\n  // first 4 bins are multiplies of kMinBlockSize.\n  if (size < ExternalAllocator::kMinBlockSize * 4) {\n    return size <= ExternalAllocator::kMinBlockSize ? 0\n                                                    : (size - 1) / ExternalAllocator::kMinBlockSize;\n  }\n\n  if (size > kMediumObjMaxSize) {\n    return kLargeSizeBin;\n  }\n\n  size_t wsize = wsize_from_size(size);\n\n  // to correct rounding up of size to words that the last word will be within the range.\n  --wsize;\n\n  // find the highest bit\n  uint8_t b = 63 - __builtin_clzl(wsize);\n  return (b << 2) + ((wsize >> (b - 2)) & 3) - 40;\n}\n\nstatic_assert(ToBinIdx(kMinBlockSize) == 0);\nstatic_assert(ToBinIdx(kMinBlockSize * 2) == 1);\nstatic_assert(ToBinIdx(kMinBlockSize * 3) == 2);\nstatic_assert(ToBinIdx(kMinBlockSize * 4) == 3);\nstatic_assert(ToBinIdx(kMinBlockSize * 5) == 4);\nstatic_assert(ToBinIdx(kMinBlockSize * 6) == 5);\nstatic_assert(ToBinIdx(kMinBlockSize * 6 + 1) == 6);\nstatic_assert(ToBinIdx(kMinBlockSize * 7) == 6);\n\nsize_t ToBlockSize(BinIdx idx) {\n  return kBinWordLens[idx] * 8;\n}\n\n// num pages in a segment of that class.\nunsigned NumPagesInSegment(PageClass pc) {\n  switch (pc) {\n    case PageClass::SMALL_P:\n      return kSegmentSize >> kSmallPageShift;\n    case PageClass::MEDIUM_P:\n      return kSegmentSize >> kMediumPageShift;\n      break;\n    case PageClass::LARGE_P:\n      return 1;\n      break;\n  }\n  // unreachable.\n  return 0;\n}\n\ntemplate <size_t N> size_t FindFirst(const std::bitset<N>& bs) {\n#ifdef _LIBCPP_VERSION\n  for (size_t i = 0; i < bs.size(); ++i) {\n    if (bs.test(i))\n      return i;\n  }\n#else\n  return bs._Find_first();\n#endif\n}\n\n};  // namespace\n\n/*\n   block 8Kb or more, page - 2MB (256 blocks) or bigger.\n\n\n   Block sizes grow exponentially - by factor ~1.25. See MI_PAGE_QUEUES_EMPTY definition\n   for sizes example.\n*/\nnamespace detail {\n\n// Page can be exactly in either these 3 states:\n// 1. unitialized - with no blocks being allocated - segment_inuse will be 0 in that case, 1\n// otherwise.\n// 2. Partly utilized by 1 or more blocks, with available > 0 in that case. It must be present in\n// free_pages_ list then.\n// 3. Fully utilized, with available==0, in that case it's not part of free_pages_ list.\nstruct Page {\n  std::bitset<256> free_blocks;  // bitmask of free blocks (32 bytes).\n  uint8_t id;                    // index inside the Segment.pages array.\n\n  // need some mapping function to map from block_size to real_block_size given Page class.\n  BinIdx bin_idx;\n  uint8_t segment_inuse : 1;  // true if segment allocated this page.\n  uint8_t reserved[3];\n\n  // can be computed via free_blocks.count().\n  uint16_t available;  // in number of blocks.\n  Page* next_free;     // next page in the free_pages_ list\n\n  // We can not use c'tor because we use the trick in segment where we allocate more pages\n  // than SegmentDescr declares.\n  void Reset(uint8_t new_id) {\n    static_assert(sizeof(Page) == 48);\n\n    memset(&id, 0, sizeof(Page) - offsetof(Page, id));\n    id = new_id;\n  }\n\n  void Init(PageClass pc, BinIdx bin_id);\n};\n\nconstexpr size_t kSegDescrDataSize = sizeof(Page) * kMaxPagesInSegment + 128;\nstatic_assert(kSegDescrDataSize < kSegDescrAlignment);\n\nvoid Page::Init(PageClass pc, BinIdx bin_id) {\n  DCHECK_EQ(available, 0);\n  DCHECK(segment_inuse);\n\n  bin_idx = bin_id;\n  if (pc == PageClass::LARGE_P) {\n    available = 1;\n  } else {\n    size_t page_size = (pc == PageClass::SMALL_P) ? kSmallPageSize : kMediumPageSize;\n    available = page_size / ToBlockSize(bin_id);\n  }\n\n  free_blocks.reset();\n  for (unsigned i = 0; i < available; ++i) {\n    free_blocks.set(i, true);\n  }\n}\n\nPageClass ClassFromSize(size_t size) {\n  if (size <= kSmallObjMaxSize)\n    return PageClass::SMALL_P;\n  if (size <= kMediumObjMaxSize)\n    return PageClass::MEDIUM_P;\n\n  return PageClass::LARGE_P;\n}\n\n}  // namespace detail\n\n//\n/**\n * SegmentDescr denotes a 256MB segment on external storage -\n * holds upto 256 pages (in case of small pages).\n * Each segment has pages of the same type, but each page can host blocks of\n * different sizes upto maximal block size for that page class.\n * SegmentDescr points to the range within external storage space.\n * By using the page.id together with segment->page_shift and segment->offset\n * one can know where the page is located in the storage.\n * Opposite direction: by giving an offset to the file, segment_id = offset / 256MB.\n * Moreover (offset % 256MB) >> segment.page_shift gives us the page id and subsequently\n * page_start.  segment.pages[page_id].block_size gives us the block size and that in turn gives us\n * block id within the page. We can also know block_size if the originally allocated\n   size is provided by using round_up function that was used to allocate the block.\n * SegmentDescr be aligned by kSegDescrAlignment boundaries - ToSegDescr relies on that.\n */\nclass ExternalAllocator::SegmentDescr {\n  SegmentDescr(const SegmentDescr&) = delete;\n  void operator=(const SegmentDescr&) = delete;\n  friend class ExternalAllocator;\n\n public:\n  explicit SegmentDescr(PageClass pc, size_t offs, uint16_t capacity);\n\n  Page* FindPageSegment() {\n    return page_info_.FindPageSegment();\n  }\n\n  Page* GetPage(unsigned i) {\n    return page_info_.pages + i;\n  }\n\n  size_t BlockOffset(const Page* page, unsigned blockpos) {\n    return offset_ + page->id * (1 << page_info_.page_shift) +\n           ToBlockSize(page->bin_idx) * blockpos;\n  }\n\n  bool HasFreePages() const {\n    return page_info_.capacity > page_info_.used;\n  }\n\n  unsigned capacity() const {\n    return page_info_.capacity;\n  }\n\n  unsigned used() const {\n    return page_info_.used;\n  }\n\n  unsigned page_shift() const {\n    return page_info_.page_shift;\n  }\n\n  PageClass page_class() const {\n    return page_class_;\n  }\n\n  SegmentDescr *next, *prev;\n\n  // Links seg before this.\n  void LinkBefore(SegmentDescr* seg) {\n    seg->next = this;\n    seg->prev = prev;\n    this->prev->next = seg;\n    this->prev = seg;\n  }\n\n  // detaches this from the circular list.\n  // returns next if the list is has more than 1 element\n  // returns null otherwise.\n  SegmentDescr* Detach() {\n    if (next == this)\n      return nullptr;\n\n    next->prev = prev;\n    prev->next = next;\n\n    SegmentDescr* res = next;\n    next = prev = this;\n    return res;\n  }\n\n private:\n  uint64_t offset_;  // size_ - relevant for large segments.\n  PageClass page_class_;\n\n  struct PageInfo {\n    uint16_t capacity, used;  // in number of pages.\n    uint8_t page_shift;\n    Page pages[0];  // must be the last field. Can be 1-256 pages.\n\n    PageInfo(uint16_t c) : capacity(c), used(0), page_shift(0) {\n    }\n\n    auto FindPageSegment() -> Page* {\n      for (uint32_t i = 0; i < capacity; ++i) {\n        if (!pages[i].segment_inuse) {\n          pages[i].segment_inuse = 1;\n          ++used;\n          return pages + i;\n        }\n      }\n\n      LOG(DFATAL) << \"Should not reach here\";\n\n      return nullptr;\n    }\n  };\n\n  PageInfo page_info_;\n};\n\nExternalAllocator::SegmentDescr::SegmentDescr(PageClass pc, size_t offs, uint16_t page_capacity)\n    : offset_(offs), page_class_(pc), page_info_(page_capacity) {\n  constexpr size_t kDescrSize = sizeof(SegmentDescr);\n  (void)kDescrSize;\n\n  next = prev = this;\n  DCHECK(pc != PageClass::LARGE_P);\n\n  if (pc == PageClass::MEDIUM_P)\n    page_info_.page_shift = kMediumPageShift;\n  else\n    page_info_.page_shift = kSmallPageShift;\n\n  for (unsigned i = 0; i < page_capacity; ++i) {\n    page_info_.pages[i].Reset(i);\n  }\n}\n\nstatic detail::Page empty_page;\n\nExternalAllocator::ExternalAllocator() {\n  std::fill(sq_, sq_ + ABSL_ARRAYSIZE(sq_), nullptr);\n  std::fill(free_pages_, free_pages_ + detail::kNumFreePages, &empty_page);\n}\n\nExternalAllocator::~ExternalAllocator() {\n  for (auto* seg : segments_) {\n    mi_free(seg);\n  }\n}\n\nint64_t ExternalAllocator::Malloc(size_t sz) {\n  uint8_t bin_idx = ToBinIdx(sz);\n  Page* page = free_pages_[bin_idx];\n  if (page->available == 0) {  // empty page.\n    PageClass pc = detail::ClassFromSize(sz);\n    if (pc == PageClass::LARGE_P) {\n      return LargeMalloc(sz);\n    }\n\n    page = FindPage(pc);\n    if (!page)\n      return -int64_t(kSegmentSize);\n\n    DVLOG(2) << \"Allocated page: for bin \" << bin_idx << \" class \" << static_cast<int>(pc);\n    free_pages_[bin_idx] = page;\n    page->Init(pc, bin_idx);\n  }\n\n  DCHECK(page->available);\n  size_t pos = FindFirst(page->free_blocks);\n  page->free_blocks.flip(pos);\n\n  if (--page->available == 0)  // Remove empty page from freelist\n    free_pages_[bin_idx] = page->next_free ? page->next_free : &empty_page;\n\n  allocated_bytes_ += ToBlockSize(page->bin_idx);\n  SegmentDescr* seg = ToSegDescr(page);\n  return seg->BlockOffset(page, pos);\n}\n\nvoid ExternalAllocator::Free(size_t offset, size_t sz) {\n  if (sz > kMediumObjMaxSize) {\n    size_t align_sz = alignup(sz, 4_KB);\n    extent_tree_.Add(offset, align_sz);\n    return;\n  }\n\n  size_t idx = offset / 256_MB;\n  size_t delta = offset % 256_MB;\n  CHECK_LT(idx, segments_.size());\n  CHECK(segments_[idx]);\n\n  SegmentDescr* seg = segments_[idx];\n  unsigned page_id = delta >> seg->page_shift();\n  CHECK_LT(page_id, seg->capacity());\n\n  Page* page = seg->GetPage(page_id);\n  unsigned page_size = (1 << seg->page_shift());\n  unsigned block_offs = delta % page_size;\n  unsigned block_size = ToBlockSize(page->bin_idx);\n  unsigned block_id = block_offs / block_size;\n  unsigned blocks_num = page_size / block_size;\n\n  CHECK_LE(sz, block_size);\n  DCHECK_LT(block_id, blocks_num);\n  DCHECK(!page->free_blocks[block_id]) << offset;\n\n  page->free_blocks.set(block_id);\n  ++page->available;\n\n  DCHECK_EQ(page->available, page->free_blocks.count());\n  // If page becomes fully free, return it to segment list, otherwise if it just became non-empty,\n  // then return it to free pages list\n  if (page->available == blocks_num) {\n    FreePage(page, seg, block_size);\n  } else if (page->available == 1) {\n    DCHECK_NE(page, free_pages_[page->bin_idx]);\n    page->next_free = free_pages_[page->bin_idx];\n    free_pages_[page->bin_idx] = page;\n  }\n  allocated_bytes_ -= block_size;\n}\n\nvoid ExternalAllocator::AddStorage(size_t start, size_t size) {\n  VLOG(1) << \"AddStorage \" << start << \"/\" << size;\n\n  extent_tree_.Add(start, size);\n  capacity_ += size;\n}\n\nsize_t ExternalAllocator::GoodSize(size_t sz) {\n  uint8_t bin_idx = ToBinIdx(sz);\n  if (bin_idx < kLargeSizeBin)\n    return ToBlockSize(bin_idx);\n\n  return alignup(sz, 4_KB);\n}\n\n/**\n *\n  _____      _            _          __                  _   _\n |  __ \\    (_)          | |        / _|                | | (_)\n | |__) | __ ___   ____ _| |_ ___  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___\n |  ___/ '__| \\ \\ / / _` | __/ _ \\ |  _| | | | '_ \\ / __| __| |/ _ \\| '_ \\/ __|\n | |   | |  | |\\ V / (_| | ||  __/ | | | |_| | | | | (__| |_| | (_) | | | \\__ \\\n |_|   |_|  |_| \\_/ \\__,_|\\__\\___| |_|  \\__,_|_| |_|\\___|\\__|_|\\___/|_| |_|___/\n\n src: https://patorjk.com/software/taag/#f=Big\n */\n\n// private functions\nauto ExternalAllocator::FindPage(PageClass pc) -> Page* {\n  DCHECK_NE(pc, PageClass::LARGE_P);\n\n  SegmentDescr* seg = sq_[pc];\n  while (seg) {\n    if (seg->HasFreePages()) {\n      return seg->FindPageSegment();\n    }\n\n    // remove head.\n    SegmentDescr* next = seg->Detach();\n    sq_[pc] = next;\n    seg = next;\n  }\n\n  // no pages in the existing segments. Lets search in the extent tree.\n  auto op_range = extent_tree_.GetRange(kSegmentSize, kSegmentAlignment);\n  if (op_range) {\n    DCHECK_EQ(0u, op_range->first % kSegmentAlignment);\n\n    unsigned num_pages = NumPagesInSegment(pc);\n    size_t seg_idx = op_range->first / kSegmentAlignment;\n\n    if (segments_.size() > seg_idx) {\n      DCHECK(segments_[seg_idx] == nullptr);\n    } else {\n      segments_.resize(seg_idx + 1);\n    }\n\n    void* ptr =\n        mi_malloc_aligned(sizeof(SegmentDescr) + num_pages * sizeof(Page), kSegDescrAlignment);\n    SegmentDescr* seg = new (ptr) SegmentDescr(pc, op_range->first, num_pages);\n    segments_[seg_idx] = seg;\n\n    DCHECK(sq_[pc] == NULL);\n    DCHECK(seg->next == seg->prev && seg == seg->next);\n\n    sq_[pc] = seg;\n    return seg->FindPageSegment();\n  }\n\n  return nullptr;\n}\n\nint64_t ExternalAllocator::LargeMalloc(size_t size) {\n  size_t align_sz = alignup(size, 4_KB);\n  auto op_range = extent_tree_.GetRange(align_sz, 4_KB);\n  if (!op_range) {\n    align_sz = max(align_sz, kSegmentSize);\n    return -int64_t(align_sz);\n  }\n\n  return op_range->first;\n}\n\nvoid ExternalAllocator::FreePage(Page* page, SegmentDescr* owner, size_t block_size) {\n  // page is fully free. Return it to the segment even if it's\n  // referenced via free_pages_. The allows more elasticity by potentially reassigning\n  // it to other bin sizes.\n  BinIdx bidx = ToBinIdx(block_size);\n\n  // Remove fast allocation reference.\n  if (free_pages_[bidx] == page) {\n    free_pages_[bidx] = page->next_free ? page->next_free : &empty_page;\n  } else {\n    for (auto* cur = free_pages_[bidx]; cur != nullptr; cur = cur->next_free) {\n      if (cur->next_free == page) {\n        cur->next_free = page->next_free;\n        break;\n      }\n    }\n  }\n\n  page->segment_inuse = 0;\n  page->available = 0;\n  page->next_free = nullptr;\n\n  if (!owner->HasFreePages()) {\n    // Segment was fully booked but now it has a free page.\n    // Add it to the tail of segment queue.\n    DCHECK(owner->next == owner->prev);\n\n    auto& sq = sq_[owner->page_class()];\n    if (sq == nullptr) {\n      sq = owner;\n    } else {\n      sq->LinkBefore(owner);\n    }\n  }\n  --owner->page_info_.used;\n}\n\ninline auto ExternalAllocator::ToSegDescr(Page* page) -> SegmentDescr* {\n  uintptr_t ptr = (uintptr_t)page;\n\n  // find SegDescr boundary.\n  uintptr_t seg_ptr = ptr & ~uintptr_t(kSegDescrAlignment - 1);\n  SegmentDescr* res = reinterpret_cast<SegmentDescr*>(seg_ptr);\n\n  DCHECK(res->GetPage(page->id) == page);\n\n  return res;\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/external_alloc.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include <cstddef>\n#include <cstdint>\n#include <vector>\n\n#include \"core/extent_tree.h\"\n#include \"server/tiering/common.h\"\n\nnamespace dfly::tiering {\n\n/**\n *\n * An external allocator inspired by mimalloc. Its goal is to maintain a state machine for\n * bookkeeping the allocations of different sizes that are backed up by a separate\n * storage. It could be a disk, SSD or another memory allocator. This class serves\n * as a state machine that either returns an offset to the backing storage or the indication\n * of the resource that is missing. The advantage of such design is that we can use it in\n * asynchronous callbacks without blocking on any IO requests.\n * The allocator uses dynamic memory internally. Should be used in a single thread.\n *\n */\n\nnamespace detail {\nstruct Page;\n\nconstexpr unsigned kNumFreePages = 29;\n\n/**\n * pages classes can be SMALL, MEDIUM or LARGE. SMALL (2MB) for block sizes upto 128KB.\n * MEDIUM (16MB) for block sizes 128KB-1MB. Anything else is LARGE.\n *\n */\nenum PageClass : uint16_t {\n  SMALL_P = 0,\n  MEDIUM_P = 1,\n  LARGE_P = 2,\n};\n\nPageClass ClassFromSize(size_t size);\n\n}  // namespace detail\n\nclass ExternalAllocator {\n  ExternalAllocator(const ExternalAllocator&) = delete;\n  void operator=(const ExternalAllocator&) = delete;\n\n public:\n  static constexpr size_t kExtAlignment = 256_MB;     // 256 MB\n  static constexpr size_t kMinBlockSize = kPageSize;  // 4KB\n\n  ExternalAllocator();\n  ~ExternalAllocator();\n\n  // If a negative result - backing storage is required of size=-result. See AddStorage\n  // on how to add more storage.\n  // For results >= 0 Returns offset to the backing storage where we may write the data of\n  // size sz.\n  int64_t Malloc(size_t sz);\n\n  void Free(size_t offset, size_t sz);\n\n  /// Adds backing storage to the allocator. The range should not overlap with already\n  /// added storage ranges.\n  void AddStorage(size_t start, size_t size);\n\n  // Similar to mi_good_size, returns the size of the underlying block as if\n  // were returned by Malloc. Guaranteed that the result not less than sz.\n  // No allocation is done.\n  static size_t GoodSize(size_t sz);\n\n  size_t capacity() const {\n    return capacity_;\n  }\n\n  size_t allocated_bytes() const {\n    return allocated_bytes_;\n  }\n\n private:\n  class SegmentDescr;\n  using Page = detail::Page;\n\n  // Returns a page if there is a segment of that class.\n  // Returns NULL if no page is found.\n  Page* FindPage(detail::PageClass sc);\n\n  int64_t LargeMalloc(size_t size);\n  SegmentDescr* GetNewSegment(detail::PageClass sc);\n  void FreePage(Page* page, SegmentDescr* owner, size_t block_size);\n\n  static SegmentDescr* ToSegDescr(Page*);\n\n  SegmentDescr* sq_[2];                      // map: PageClass -> free Segment.\n  Page* free_pages_[detail::kNumFreePages];  // intrusive linked lists of pages with free blocks\n\n  // A segment for each 256MB range. To get a segment id from the offset, shift right by 28.\n  std::vector<SegmentDescr*> segments_;\n\n  ExtentTree extent_tree_;\n\n  size_t capacity_ = 0;  // in bytes.\n  size_t allocated_bytes_ = 0;\n};\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/external_alloc_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/tiering/external_alloc.h\"\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n\nnamespace dfly::tiering {\n\nusing namespace std;\n\nclass ExternalAllocatorTest : public ::testing::Test {\n protected:\n  static void SetUpTestSuite() {\n  }\n\n  static void TearDownTestSuite() {\n  }\n\n  ExternalAllocator ext_alloc_;\n};\n\nconstexpr int64_t kSegSize = 256_MB;\n\nstd::map<int64_t, size_t> AllocateFully(ExternalAllocator* alloc) {\n  std::map<int64_t, size_t> ranges;\n\n  int64_t res = 0;\n  while (res >= 0) {\n    for (unsigned j = 1; j < 5; ++j) {\n      size_t sz = 8000 * j;\n      res = alloc->Malloc(sz);\n      if (res < 0)\n        break;\n      auto [it, added] = ranges.emplace(res, sz);\n      VLOG(1) << \"res: \" << res << \" size: \" << sz << \" added: \" << added;\n      CHECK(added);\n    }\n  }\n\n  return ranges;\n}\n\nconstexpr size_t kMinBlockSize = ExternalAllocator::kMinBlockSize;\n\nTEST_F(ExternalAllocatorTest, Basic) {\n  int64_t res = ext_alloc_.Malloc(128);\n  EXPECT_EQ(-kSegSize, res);\n\n  ext_alloc_.AddStorage(0, kSegSize);\n  EXPECT_EQ(0, ext_alloc_.Malloc(kMinBlockSize - 96));         //  page0: 1\n  EXPECT_EQ(kMinBlockSize, ext_alloc_.Malloc(kMinBlockSize));  //  page0: 2\n\n  constexpr auto kAnotherLen = kMinBlockSize * 2 - 10;\n  size_t offset2 = ext_alloc_.Malloc(kAnotherLen);  // page1: 1\n  EXPECT_EQ(offset2, 1_MB);                         // another page.\n\n  ext_alloc_.Free(offset2, kAnotherLen);         // should return the page to the segment.\n  EXPECT_EQ(offset2, ext_alloc_.Malloc(16_KB));  // another page.  page1: 1\n\n  ext_alloc_.Free(0, kMinBlockSize - 96);         // page0: 1\n  ext_alloc_.Free(kMinBlockSize, kMinBlockSize);  // page0: 0\n\n  EXPECT_EQ(0, ext_alloc_.Malloc(kMinBlockSize * 2));  // page0\n}\n\nTEST_F(ExternalAllocatorTest, Invariants) {\n  ext_alloc_.AddStorage(0, kSegSize);\n\n  auto ranges = AllocateFully(&ext_alloc_);\n  EXPECT_GT(ext_alloc_.allocated_bytes(), ext_alloc_.capacity() * 0.75);\n\n  off_t last = 0;\n  for (const auto& k_v : ranges) {\n    ASSERT_GE(k_v.first, last);\n    last = k_v.first + k_v.second;\n  }\n\n  for (const auto& k_v : ranges) {\n    ext_alloc_.Free(k_v.first, k_v.second);\n  }\n  EXPECT_EQ(0, ext_alloc_.allocated_bytes());\n\n  for (const auto& k_v : ranges) {\n    int64_t res = ext_alloc_.Malloc(k_v.second);\n    ASSERT_GE(res, 0);\n  }\n}\n\nTEST_F(ExternalAllocatorTest, Classes) {\n  using detail::ClassFromSize;\n\n  ext_alloc_.AddStorage(0, kSegSize);\n  constexpr size_t kMaxSmallPage = 64_KB;\n  ASSERT_EQ(detail::SMALL_P, ClassFromSize(kMaxSmallPage));\n  ASSERT_EQ(detail::MEDIUM_P, ClassFromSize(kMaxSmallPage + 1));\n  ASSERT_EQ(detail::LARGE_P, ClassFromSize(1_MB + 1));\n\n  off_t offs1 = ext_alloc_.Malloc(kMaxSmallPage);\n  EXPECT_EQ(offs1, 0);\n\n  off_t offs2 = ext_alloc_.Malloc(kMaxSmallPage + 1);\n  EXPECT_EQ(offs2, -kSegSize);\n\n  ext_alloc_.AddStorage(kSegSize, kSegSize);\n  offs2 = ext_alloc_.Malloc(kMaxSmallPage * 2 + 1);\n  ASSERT_GT(offs2, 0);\n  offs2 = ext_alloc_.Malloc(1_MB);\n  ASSERT_GT(offs2, 0);\n\n  off_t offs3 = ext_alloc_.Malloc(1_MB + 1);\n  ASSERT_LT(offs3, 0);\n  ext_alloc_.AddStorage(kSegSize * 2, kSegSize);\n  offs3 = ext_alloc_.Malloc(1_MB + 1);\n  ASSERT_GT(offs3, 0);\n\n  EXPECT_EQ(1_MB + 4_KB, ExternalAllocator::GoodSize(1_MB + 1));\n}\n\n// Fill up the allocator until it has to grow, remove 90% and make sure it has free space even with\n// extreme fragmentation\nTEST_F(ExternalAllocatorTest, EmptyFull) {\n  const int kAllocSize = kMinBlockSize;\n  ext_alloc_.AddStorage(0, 2 * kSegSize);\n\n  // Fill up the allocator\n  vector<int64_t> offsets;\n  int64_t offset;\n  do {\n    offset = ext_alloc_.Malloc(kAllocSize);\n    if (offset >= 0)\n      offsets.push_back(offset);\n  } while (offset >= 0);\n\n  // Keep only 10%, free 90%\n  for (size_t i = 0; i < offsets.size(); i++) {\n    if (i % 10 == 0)\n      continue;\n    ext_alloc_.Free(offsets[i], kAllocSize);\n  }\n\n  // Expect to succeed adding 10% without growing\n  for (size_t i = 0; i < offsets.size() / 10; i++)\n    EXPECT_GT(ext_alloc_.Malloc(kAllocSize), 0u);\n}\n\nTEST_F(ExternalAllocatorTest, AllocLarge) {\n  ext_alloc_.AddStorage(0, kSegSize);\n\n  off_t offs = ext_alloc_.Malloc(2_MB - 1);\n  EXPECT_EQ(offs, 0);\n  ext_alloc_.Free(offs, 2_MB - 1);\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/op_manager.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/tiering/op_manager.h\"\n\n#include <variant>\n\n#include \"base/logging.h\"\n#include \"core/overloaded.h\"\n#include \"io/io.h\"\n#include \"server/tiering/common.h\"\n#include \"server/tiering/disk_storage.h\"\n#include \"util/fibers/fibers.h\"\nnamespace dfly::tiering {\n\nusing namespace std;\n\nOpManager::OwnedEntryId OpManager::ToOwned(PendingId id) {\n  return std::visit(Overloaded{[](uintptr_t i) -> OpManager::OwnedEntryId { return i; },\n                               [](std::pair<DbIndex, std::string_view> p) -> OwnedEntryId {\n                                 return std::make_pair(p.first, std::string{p.second});\n                               }},\n                    id);\n}\n\nstring OpManager::ToString(const OwnedEntryId& id) {\n  if (const auto* i = std::get_if<uintptr_t>(&id); i) {\n    return absl::StrCat(*i);\n  }\n  const auto& key = std::get<DbKeyId>(id);\n  return absl::StrCat(\"(\", key.first, \":\", key.second, \")\");\n}\n\nOpManager::OpManager(size_t max_size) : storage_{max_size} {\n}\n\nOpManager::~OpManager() {\n  DCHECK(pending_stash_ver_.empty());\n  DCHECK(pending_reads_.empty());\n}\n\nstd::error_code OpManager::Open(std::string_view file) {\n  return storage_.Open(file);\n}\n\nvoid OpManager::Close() {\n  storage_.Close();\n  DCHECK(pending_stash_ver_.empty());\n  DCHECK(pending_reads_.empty());\n}\n\nvoid OpManager::Enqueue(PendingId id, DiskSegment segment, const Decoder& decoder,\n                        ReadCallback cb) {\n  // Fill pages for prepared read as it has no penalty and potentially covers more small segments\n  PrepareRead(segment.ContainingPages())\n      .ForSegment(segment, id, decoder)\n      .read_cbs.emplace_back(std::move(cb));\n}\n\nvoid OpManager::CancelPending(PendingId id) {\n  // If the item isn't offloaded, it has io pending, so cancel it\n  DCHECK(pending_stash_ver_.count(ToOwned(id)));\n  pending_stash_ver_.erase(ToOwned(id));\n}\n\nvoid OpManager::DeleteOffloaded(DiskSegment segment) {\n  EntryOps* pending_read = nullptr;\n\n  auto base_it = pending_reads_.find(segment.ContainingPages().offset);\n  if (base_it != pending_reads_.end())\n    pending_read = base_it->second.Find(segment);\n\n  if (pending_read) {\n    // Mark that the read operation must finalize with deletion.\n    pending_read->deleting = true;\n  } else if (NotifyDelete(segment) && base_it == pending_reads_.end()) {\n    storage_.MarkAsFree(segment.ContainingPages());\n  }\n}\n\nvoid OpManager::Stash(PendingId id_ref, tiering::DiskSegment segment,\n                      util::fb2::RegisteredSlice buf) {\n  auto id = ToOwned(id_ref);\n  unsigned version = ++pending_stash_counter_;\n  pending_stash_ver_[id] = version;\n\n  auto io_cb = [this, version, id = std::move(id), segment](std::error_code ec) {\n    ProcessStashed(id, version,\n                   ec ? nonstd::make_unexpected(ec) : io::Result<DiskSegment>(segment));\n  };\n\n  // May block due to blocking call to Grow.\n  storage_.Stash(segment, buf, std::move(io_cb));\n}\n\nstd::error_code OpManager::PrepareAndStash(PendingId id, size_t length,\n                                           const std::function<size_t(io::MutableBytes)>& writer) {\n  auto buf = PrepareStash(length);\n  if (!buf.has_value())\n    return buf.error();\n\n  size_t written = writer(buf->second.bytes);\n  Stash(id, {buf->first, written}, buf->second);\n  return {};\n}\n\nOpManager::ReadOp& OpManager::PrepareRead(DiskSegment aligned_segment) {\n  DCHECK_EQ(aligned_segment.offset % kPageSize, 0u);\n  DCHECK_EQ(aligned_segment.length % kPageSize, 0u);\n\n  auto [it, inserted] = pending_reads_.try_emplace(aligned_segment.offset, aligned_segment);\n  if (inserted) {\n    auto io_cb = [this, aligned_segment](io::Result<std::string_view> result) {\n      ProcessRead(aligned_segment.offset, result);\n    };\n    storage_.Read(aligned_segment, io_cb);\n  }\n  return it->second;\n}\n\nvoid OpManager::ProcessStashed(const OwnedEntryId& id, unsigned version,\n                               const io::Result<DiskSegment>& segment) {\n  if (auto it = pending_stash_ver_.find(id);\n      it != pending_stash_ver_.end() && it->second == version) {\n    pending_stash_ver_.erase(it);\n    NotifyStashed(id, segment);\n  } else if (segment) {\n    // Throw away the value because it's no longer up-to-date even if no error occured\n    VLOG(1) << \"Releasing segment \" << *segment << \", id: \" << ToString(id);\n    storage_.MarkAsFree(*segment);\n  } else {\n    LOG(ERROR) << \"Stash failed with error \" << segment.error();\n  }\n}\n\nvoid OpManager::ProcessRead(size_t offset, io::Result<std::string_view> page) {\n  util::FiberAtomicGuard guard;  // atomically update items, no in-between states should be possible\n  ReadOp* info = &pending_reads_.at(offset);\n\n  // Reorder base read (offset 0) to be last, so reads for defragmentation are handled last.\n  // If we already have a page read for defragmentation pending and some other read for the\n  // sub-segment is enqueued, we first must handle the sub-segment read, only then the full page\n  // read\n  for (size_t i = 0; i + 1 < info->entry_ops.size(); i++) {\n    if (info->entry_ops[i].segment.offset % kPageSize == 0) {\n      std::swap(info->entry_ops[i], info->entry_ops.back());\n      break;\n    }\n  }\n\n  bool deleting_full = false;\n\n  // Notify functions in the loop may append items to info->entry_ops during the traversal\n  for (size_t i = 0; i < info->entry_ops.size(); i++) {\n    auto& ko = info->entry_ops[i];\n    if (page) {\n      size_t offset = ko.segment.offset - info->segment.offset;\n      ko.decoder->Initialize(page->substr(offset, ko.segment.length));\n      for (auto& cb : ko.read_cbs)\n        cb(&*ko.decoder);\n    } else {\n      for (auto& cb : ko.read_cbs)\n        cb(page.get_unexpected());\n    }\n\n    bool delete_from_storage = ko.deleting;\n\n    // If the item is not being deleted, report is as fetched to be cached potentially.\n    // In case it's cached, we might need to delete it.\n    if (page.has_value() && !delete_from_storage)\n      delete_from_storage |= NotifyFetched(ko.id, ko.segment, &*ko.decoder);\n\n    // If the item is being deleted, check if the full page needs to be deleted.\n    if (delete_from_storage)\n      deleting_full |= NotifyDelete(ko.segment);\n  }\n\n  if (deleting_full) {\n    storage_.MarkAsFree(info->segment);\n  }\n\n  pending_reads_.erase(offset);\n}\n\nOpManager::EntryOps::EntryOps(OwnedEntryId id, DiskSegment segment, const Decoder& decoder)\n    : id{std::move(id)}, segment{segment}, decoder{decoder.Clone()} {\n}\n\nOpManager::EntryOps& OpManager::ReadOp::ForSegment(DiskSegment key_segment, PendingId id,\n                                                   const Decoder& decoder) {\n  DCHECK_GE(key_segment.offset, segment.offset);\n  DCHECK_LE(key_segment.length, segment.length);\n\n  for (auto& ops : entry_ops) {\n    if (ops.segment.offset == key_segment.offset) {\n      DCHECK(typeid(*ops.decoder) == typeid(decoder));\n      return ops;\n    }\n  }\n  return entry_ops.emplace_back(ToOwned(id), key_segment, decoder);\n}\n\nOpManager::EntryOps* OpManager::ReadOp::Find(DiskSegment key_segment) {\n  for (auto& ops : entry_ops) {\n    if (ops.segment.offset == key_segment.offset)\n      return &ops;\n  }\n  return nullptr;\n}\n\nOpManager::Stats OpManager::GetStats() const {\n  return {.disk_stats = storage_.GetStats(),\n          .pending_read_cnt = pending_reads_.size(),\n          .pending_stash_cnt = pending_stash_ver_.size()};\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/op_manager.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/inlined_vector.h>\n\n#include <variant>\n\n#include \"base/function2.hpp\"\n#include \"server/tiering/common.h\"\n#include \"server/tiering/decoders.h\"\n#include \"server/tiering/disk_storage.h\"\n#include \"server/tiering/entry_map.h\"\n#include \"util/fibers/future.h\"\n\nnamespace dfly::tiering {\n\n// Manages READ/DELETE/STASH operations on top of a DiskStorage.\n// Implicitly combines reads with different offsets on the same 4kb page,\n// safely schedules deletes after reads and allows cancelling pending stashes\nclass OpManager {\n public:\n  struct Stats {\n    DiskStorage::Stats disk_stats;\n\n    size_t pending_read_cnt = 0;\n    size_t pending_stash_cnt = 0;\n  };\n\n  using KeyRef = ::dfly::tiering::KeyRef;\n\n  using PendingId = ::dfly::tiering::PendingId;\n\n  explicit OpManager(size_t max_size);\n  virtual ~OpManager();\n\n  // Open file with underlying disk storage, must be called before use\n  std::error_code Open(std::string_view file);\n\n  void Close();\n\n  using ReadCallback =\n      fu2::function_base<true /*owns*/, false /*moveable*/, fu2::capacity_fixed<40, 8>,\n                         false /* non-throwing*/, false /* strong exceptions guarantees*/,\n                         void(io::Result<Decoder*>)>;\n\n  // Enqueue callback to be executed once value is read. Trigger read if none is pending yet for\n  // this segment. Multiple entries can be obtained from a single segment, but every distinct id\n  // will have it's own independent callback loop that can safely modify the underlying value\n  void Enqueue(PendingId id, DiskSegment segment, const Decoder& decoder, ReadCallback cb);\n\n  // Cancel entry with pending io\n  void CancelPending(PendingId id);\n\n  // Delete offloaded entry located at the segment.\n  void DeleteOffloaded(DiskSegment segment);\n\n  auto PrepareStash(size_t length) {\n    return storage_.PrepareStash(length);\n  }\n\n  // Stash value to be offloaded. It is opaque to OpManager.\n  void Stash(PendingId id, tiering::DiskSegment segment, util::fb2::RegisteredSlice buf);\n\n  // PrepareStash + Stash via function\n  std::error_code PrepareAndStash(\n      PendingId id, size_t length,\n      const std::function<size_t /*written*/ (io::MutableBytes)>& writer);\n\n  Stats GetStats() const;\n\n protected:\n  using OwnedEntryId = std::variant<uintptr_t, DbKeyId>;\n\n  // Notify that a stash succeeded and the entry was stored at the provided segment or failed with\n  // given error\n  virtual void NotifyStashed(const OwnedEntryId& id, const io::Result<DiskSegment>& segment) = 0;\n\n  // Notify that an entry was successfully fetched. Includes whether entry was modified.\n  // Returns true if value needs to be deleted from the storage.\n  virtual bool NotifyFetched(const OwnedEntryId& id, DiskSegment segment, Decoder*) = 0;\n\n  // Notify delete. Return true if the filled segment needs to be marked as free.\n  virtual bool NotifyDelete(DiskSegment segment) = 0;\n\n  // Describes pending read futures for a single entry\n  struct EntryOps {\n    EntryOps(OwnedEntryId id, DiskSegment segment, const Decoder& decoder);\n\n    // unique identifier for the entry being read. Used to notify higher layers.\n    OwnedEntryId id;\n\n    // For multi-bin reads is a precise segment of the entry within a page.\n    DiskSegment segment;\n\n    // We may have multiple callbacks for the same entry.\n    absl::InlinedVector<ReadCallback, 1> read_cbs;\n    std::unique_ptr<Decoder> decoder;\n    bool deleting = false;\n  };\n\n  // Describes an ongoing read operation for a fixed segment\n  struct ReadOp {\n    explicit ReadOp(DiskSegment segment) : segment(segment) {\n    }\n\n    // Get ops for id or create new\n    EntryOps& ForSegment(DiskSegment segment, PendingId id, const Decoder& decoder);\n\n    // Find if there are operations for the given segment, return nullptr otherwise\n    EntryOps* Find(DiskSegment segment);\n\n    DiskSegment segment;  // spanning segment of whole read\n\n    // enqueued operations for different keys for this segment.\n    // Has size() > 1 only for small-bin pages with multiple items, otherwise size() == 1.\n    absl::InlinedVector<EntryOps, 1> entry_ops;\n  };\n\n  // Prepare read operation for aligned segment or return pending if it exists.\n  // Refernce is valid until any other read operations occur.\n  ReadOp& PrepareRead(DiskSegment aligned_segment);\n\n  // Called once read finished\n  void ProcessRead(size_t offset, io::Result<std::string_view> value);\n\n  // Called once Stash finished\n  void ProcessStashed(const OwnedEntryId& id, unsigned version,\n                      const io::Result<DiskSegment>& segment);\n\n private:\n  static OwnedEntryId ToOwned(PendingId id);\n  static std::string ToString(const OwnedEntryId& id);\n\n  DiskStorage storage_;\n\n  // Pending read operations are keyed by the offset of their aligned segment.\n  // This prevents an ABA problem in scenarios like: read (pending) → delete → stash → read.\n  // After the stash, the second read targets a different segment offset, so it won't\n  // interfere with the first read's pending operation, even for the same PendingId.\n  absl::flat_hash_map<size_t /* offset */, ReadOp> pending_reads_;\n\n  size_t pending_stash_counter_ = 0;\n\n  // todo: allow heterogeneous lookups with non owned id\n  absl::flat_hash_map<OwnedEntryId, unsigned /* version */> pending_stash_ver_;\n};\n\n};  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/op_manager_test.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/tiering/op_manager.h\"\n\n#include <gtest/gtest.h>\n\n#include <memory>\n\n#include \"absl/container/flat_hash_map.h\"\n#include \"absl/strings/str_cat.h\"\n#include \"server/tiering/common.h\"\n#include \"server/tiering/test_common.h\"\n#include \"util/fibers/fibers.h\"\n#include \"util/fibers/future.h\"\n\nnamespace dfly::tiering {\n\nusing namespace std;\nusing namespace std::string_literals;\n\nstruct TestDecoder : tiering::BareDecoder {\n  std::unique_ptr<tiering::Decoder> Clone() const override {\n    return std::make_unique<TestDecoder>();\n  }\n\n  void Initialize(std::string_view slice) override {\n    tiering::BareDecoder::Initialize(slice);\n    value = slice;\n  }\n\n  string value;\n};\n\nostream& operator<<(ostream& os, const OpManager::Stats& stats) {\n  return os << \"pending_read_cnt: \" << stats.pending_read_cnt\n            << \", pending_stash_cnt: \" << stats.pending_stash_cnt\n            << \", alloc_bytes: \" << stats.disk_stats.allocated_bytes\n            << \", capacity_bytes: \" << stats.disk_stats.capacity_bytes\n            << \", heap_buf_allocs: \" << stats.disk_stats.heap_buf_alloc_count\n            << \", registered_buf_allocs: \" << stats.disk_stats.registered_buf_alloc_count\n            << \", max_file_size: \" << stats.disk_stats.max_file_size\n            << \", pending_ops: \" << stats.disk_stats.pending_ops;\n}\n\nstruct OpManagerTest : PoolTestBase, OpManager {\n  OpManagerTest() : OpManager(256_MB) {\n  }\n\n  void Open() {\n    EXPECT_FALSE(OpManager::Open(\"op_manager_test_backing\"));\n  }\n\n  void Close() {\n    OpManager::Close();\n  }\n\n  util::fb2::Future<std::string> Read(PendingId id, DiskSegment segment) {\n    util::fb2::Future<std::string> future;\n    Enqueue(id, segment, TestDecoder{}, [future](io::Result<tiering::Decoder*> res) mutable {\n      auto* decoder = static_cast<TestDecoder*>(*res);\n      future.Resolve(decoder->value);\n    });\n    return future;\n  }\n\n  void NotifyStashed(const OwnedEntryId& id, const io::Result<DiskSegment>& segment) override {\n    VLOG(1) << std::get<0>(id) << \" stashed\";\n    ASSERT_TRUE(segment);\n    auto [it, inserted] = stashed_.emplace(id, *segment);\n    ASSERT_TRUE(inserted);\n  }\n\n  bool NotifyFetched(const OwnedEntryId& id, DiskSegment segment, Decoder* decoder) override {\n    auto* tdecoder = static_cast<TestDecoder*>(decoder);\n    fetched_[id] = std::move(tdecoder->value);\n    return false;\n  }\n\n  bool NotifyDelete(DiskSegment segment) override {\n    return true;\n  }\n\n  std::error_code Stash(PendingId id, std::string_view value) {\n    return PrepareAndStash(id, value.size(), [=](io::MutableBytes bytes) {\n      memcpy(bytes.data(), value.data(), value.size());\n      return value.size();\n    });\n  }\n\n  void WaitForPendingStashes() {\n    // Wait for both: pending_stash_cnt tracks entries awaiting version-matching IO completion,\n    // but cancelled stash IOs (version-mismatched, superseded by newer stashes for the same id)\n    // may still be in flight. Their callbacks free the allocated segments via MarkAsFree,\n    // so we must also wait for pending_ops to drain to ensure allocated_bytes is accurate.\n    while (GetStats().pending_stash_cnt > 0 || GetStats().disk_stats.pending_ops > 0)\n      util::ThisFiber::SleepFor(1ms);\n  }\n\n  absl::flat_hash_map<OwnedEntryId, std::string> fetched_;\n  absl::flat_hash_map<OwnedEntryId, DiskSegment> stashed_;\n};\n\nTEST_F(OpManagerTest, SimpleStashesWithReads) {\n  pp_->at(0)->Await([this] {\n    Open();\n\n    for (unsigned i = 0; i < 100; i++) {\n      EXPECT_FALSE(Stash(i, absl::StrCat(\"VALUE\", i, \"cancelled\")));\n      EXPECT_FALSE(Stash(i, absl::StrCat(\"VALUE\", i, \"cancelled\")));\n      EXPECT_FALSE(Stash(i, absl::StrCat(\"VALUE\", i, \"real\")));\n    }\n\n    EXPECT_EQ(GetStats().pending_stash_cnt, 100);\n    WaitForPendingStashes();\n\n    EXPECT_EQ(stashed_.size(), 100u);\n    EXPECT_EQ(GetStats().disk_stats.allocated_bytes, 100 * kPageSize) << GetStats();\n\n    for (unsigned i = 0; i < 100; i++) {\n      EXPECT_GE(stashed_[i].offset, i > 0);\n      EXPECT_EQ(stashed_[i].length, 10 + (i > 9));\n      EXPECT_EQ(Read(i, stashed_[i]).Get(), absl::StrCat(\"VALUE\", i, \"real\"));\n      EXPECT_EQ(fetched_.extract(i).mapped(), absl::StrCat(\"VALUE\", i, \"real\"));\n    }\n\n    Close();\n  });\n}\n\nTEST_F(OpManagerTest, DeleteAfterReads) {\n  pp_->at(0)->Await([this] {\n    Open();\n\n    EXPECT_FALSE(Stash(0u, absl::StrCat(\"DATA\")));\n    WaitForPendingStashes();\n\n    std::vector<util::fb2::Future<std::string>> reads;\n    for (unsigned i = 0; i < 100; i++)\n      reads.emplace_back(Read(0u, stashed_[0u]));\n    DeleteOffloaded(stashed_[0u]);\n\n    for (auto& fut : reads)\n      EXPECT_EQ(fut.Get(), \"DATA\");\n\n    Close();\n  });\n}\n\nTEST_F(OpManagerTest, ReadSamePageDifferentOffsets) {\n  pp_->at(0)->Await([this] {\n    Open();\n\n    // Build single numbers blob\n    std::string numbers = \"H\";  // single padding byte to recognize it as small keys\n    std::vector<DiskSegment> number_segments;\n    for (size_t i = 0; i < 100; i++) {\n      std::string number = std::to_string(i);\n      number_segments.emplace_back(numbers.size(), number.size());\n      numbers += number;\n    }\n\n    EXPECT_FALSE(Stash(0u, numbers));\n    WaitForPendingStashes();\n\n    EXPECT_EQ(stashed_[0u].offset, 0u);\n\n    // Issue lots of concurrent reads\n    std::vector<util::fb2::Future<std::string>> futures;\n    for (size_t i = 0; i < 100; i++)\n      futures.emplace_back(Read(std::make_pair(0, absl::StrCat(\"k\", i)), number_segments[i]));\n\n    for (size_t i = 0; i < 100; i++)\n      EXPECT_EQ(futures[i].Get(), std::to_string(i));\n\n    Close();\n  });\n}\n\n// Test ABA scenario: stash an entry, issue an async read, delete it and re-stash a new value\n// under the same id - all without yielding so the read I/O stays in flight. When I/O completes,\n// version tracking in pending_stash_ver_ must ensure only the new stash triggers NotifyStashed\n// while the old one is silently discarded (its segment freed).\n//\n// NOTE: We cannot guarantee that the first read completes after the second stash because we have\n// no control over io_uring completion ordering. In practice, the read submitted first likely\n// completes before or around the same time as the stash. To fully test the interleaving where\n// the new entry's read is issued while the original read is still in flight, we would need a\n// mock DiskStorage that allows explicit control over when I/O completions are delivered.\n// TODO: Add a DiskStorage mock to enable deterministic I/O completion ordering in tests.\nTEST_F(OpManagerTest, StashDeleteRestashWhileReading) {\n  pp_->at(0)->Await([this] {\n    Open();\n\n    // Stash initial value under id 0\n    EXPECT_FALSE(Stash(0u, \"ORIGINAL\"));\n    WaitForPendingStashes();\n\n    DiskSegment original_segment = stashed_.at(0u);\n\n    // Issue an async read - don't wait on it yet so it stays in flight.\n    auto read_fut = Read(0u, original_segment);\n\n    // Without yielding: delete the entry, clear tracking, re-stash under the same id.\n    // At this point the read for ORIGINAL is still pending in io_uring, and we're issuing\n    // a new stash for id 0 with a bumped version.\n    DeleteOffloaded(original_segment);\n    stashed_.clear();\n    EXPECT_FALSE(Stash(0u, \"REPLACEMENT\"));\n\n    // Both the read and the new stash are now in flight. Let them complete.\n    WaitForPendingStashes();\n    EXPECT_EQ(read_fut.Get(), \"ORIGINAL\");\n\n    // Verify only the replacement was notified (single entry in stashed_).\n    ASSERT_EQ(stashed_.size(), 1u);\n    ASSERT_EQ(1, stashed_.count(0u));\n    DiskSegment new_segment = stashed_.at(0u);\n\n    // Read the replacement and verify correctness\n    EXPECT_EQ(Read(0u, new_segment).Get(), \"REPLACEMENT\");\n\n    Close();\n  });\n}\n\nTEST_F(OpManagerTest, Modify) {\n  pp_->at(0)->Await([this] {\n    Open();\n\n    std::ignore = Stash(0u, \"D\");\n    WaitForPendingStashes();\n\n    // Atomically issue sequence of modify-read operations\n    std::vector<util::fb2::Future<std::string>> futures;\n    for (size_t i = 0; i < 10; i++) {\n      Enqueue(0u, stashed_[0u], TestDecoder{}, [i](io::Result<tiering::Decoder*> res) {\n        auto* decoder = static_cast<TestDecoder*>(*res);\n        absl::StrAppend(&decoder->value, i);\n      });\n      futures.emplace_back(Read(0u, stashed_[0u]));\n    }\n\n    // Expect futures to resolve with correct values\n    std::string expected = \"D\";\n    for (size_t i = 0; i < futures.size(); i++) {\n      absl::StrAppend(&expected, i);\n      EXPECT_EQ(futures[i].Get(), expected);\n    }\n\n    Close();\n  });\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/serialized_map.cc",
    "content": "#include \"server/tiering/serialized_map.h\"\n\n#include <absl/base/internal/endian.h>\n\n#include \"base/logging.h\"\n#include \"core/detail/listpack_wrap.h\"\n\nnamespace dfly::tiering {\n\nconstexpr size_t kLenBytes = 4;\n\nSerializedMap::Iterator& SerializedMap::Iterator::operator++() {\n  slice_.remove_prefix(2 * kLenBytes + key_.size() + value_.size());\n  Read();\n  return *this;\n}\n\nSerializedMap::Iterator::Iterator(std::string_view buffer) : slice_{buffer} {\n  Read();\n}\n\nvoid SerializedMap::Iterator::Read() {\n  if (slice_.empty())\n    return;\n\n  uint32_t key_len = absl::little_endian::Load32(slice_.data());\n  uint32_t value_len = absl::little_endian::Load32(slice_.data() + 4);\n  key_ = {slice_.data() + 8, key_len};\n  value_ = {slice_.data() + 8 + key_len, value_len};\n}\n\nSerializedMap::SerializedMap(std::string_view slice) {\n  size_ = absl::little_endian::Load32(slice.data());\n  DCHECK_GT(size_, 0u);\n  slice_ = slice;\n}\n\nSerializedMap::Iterator SerializedMap::Find(std::string_view key) const {\n  return std::find_if(begin(), end(), [key](auto p) { return p.first == key; });\n}\n\nSerializedMap::Iterator SerializedMap::begin() const {\n  return Iterator{slice_.substr(kLenBytes)};\n}\n\nSerializedMap::Iterator SerializedMap::end() const {\n  return Iterator{slice_.substr(slice_.size(), 0)};\n}\n\nsize_t SerializedMap::size() const {\n  return size_;\n}\n\nsize_t SerializedMap::DataBytes() const {\n  return slice_.size() - 4 - size() * 2 * 4;\n}\n\nsize_t SerializedMap::EstimateSize(size_t data_bytes, size_t entries) {\n  return kLenBytes /* entry number */ + data_bytes + entries * 2 * kLenBytes /* string lengths */;\n}\n\nsize_t SerializedMap::Serialize(const detail::ListpackWrap& lw, absl::Span<char> buffer) {\n  DCHECK_GE(buffer.size(), EstimateSize(lw.UsedBytes(), lw.size()));\n\n  char* ptr = buffer.data();\n  absl::little_endian::Store32(ptr, lw.size());\n  ptr += kLenBytes;\n\n  for (const auto& [key, value] : lw) {\n    absl::little_endian::Store32(ptr, key.length());\n    ptr += kLenBytes;\n    absl::little_endian::Store32(ptr, value.length());\n    ptr += kLenBytes;\n    memcpy(ptr, key.data(), key.length());\n    ptr += key.length();\n    memcpy(ptr, value.data(), value.length());\n    ptr += value.length();\n  }\n\n  return ptr - buffer.data();\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/serialized_map.h",
    "content": "#pragma once\n\n#include <absl/types/span.h>\n\n#include <string_view>\n\nnamespace dfly::detail {\nstruct ListpackWrap;\n}\n\nnamespace dfly::tiering {\n\n// Map built over single continuous byte slice to allow easy read operations.\nstruct SerializedMap {\n  struct Iterator {\n    using iterator_category = std::forward_iterator_tag;\n    using difference_type = std::ptrdiff_t;\n    using value_type = std::pair<std::string_view, std::string_view>;\n    using reference = value_type;\n    using pointer = value_type*;\n\n    Iterator& operator++();\n\n    bool operator==(const Iterator& other) const {\n      return slice_.data() == other.slice_.data() && slice_.size() == other.slice_.size();\n    }\n\n    bool operator!=(const Iterator& other) const {\n      return !operator==(other);\n    }\n\n    std::pair<std::string_view, std::string_view> operator*() const {\n      return {key_, value_};\n    }\n\n   private:\n    friend struct SerializedMap;\n\n    explicit Iterator(std::string_view buffer);\n    void Read();\n\n    std::string_view slice_;  // the part left\n    std::string_view key_, value_;\n  };\n\n  explicit SerializedMap(std::string_view slice);\n\n  Iterator Find(std::string_view key) const;  // Linear search\n  Iterator begin() const;\n  Iterator end() const;\n  size_t size() const;\n\n  // Number of bytes of pure keys or values\n  size_t DataBytes() const;\n\n  // Estimate upper bound for serialization size\n  static size_t EstimateSize(size_t data_bytes, size_t entries);\n\n  // Write a slice that can be used to a SerializedMap on top of it.\n  // Returns number of bytes written\n  static size_t Serialize(const ::dfly::detail::ListpackWrap& lw, absl::Span<char> buffer);\n\n private:\n  size_t size_;\n  std::string_view slice_;\n};\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/serialized_map_test.cc",
    "content": "#include \"server/tiering/serialized_map.h\"\n\n#include <mimalloc.h>\n\n#include <map>\n\n#include \"base/logging.h\"\n#include \"core/detail/listpack_wrap.h\"\n#include \"gmock/gmock.h\"\n\nextern \"C\" {\n#include \"redis/zmalloc.h\"\n}\n\nnamespace dfly::tiering {\n\nusing namespace std;\n\nstruct SerializedMapTest : public ::testing::Test {\n  static void SetUpTestSuite() {\n    init_zmalloc_threadlocal(mi_heap_get_backing());  // to use ListpackWrap\n  }\n};\n\nTEST_F(SerializedMapTest, TestBasic) {\n  const vector<std::pair<string, string>> kBase = {{\"first key\", \"first value\"},\n                                                   {\"second key\", \"second value\"},\n                                                   {\"third key\", \"third value\"},\n                                                   {\"fourth key\", \"fourth value\"},\n                                                   {\"fifth key\", \"fifth value\"}};\n  auto lw = detail::ListpackWrap::WithCapacity(100);\n  for (const auto& [k, v] : kBase)\n    lw.Insert(k, v, false);\n  lw.GetPointer();  // to mark as non dirty // TODO: remove\n\n  // Serialize kBase to buffer\n  std::string buffer;\n  buffer.resize(SerializedMap::EstimateSize(lw.UsedBytes(), lw.size()));\n  size_t written = SerializedMap::Serialize(lw, absl::MakeSpan(buffer));\n  EXPECT_GT(written, 0u);\n  buffer.resize(written);\n\n  // Build map over buffer and check size\n  SerializedMap map{buffer};\n  EXPECT_EQ(map.size(), kBase.size());\n\n  // Check entries\n  size_t idx = 0;\n  for (auto it = map.begin(); it != map.end(); ++it, ++idx) {\n    EXPECT_EQ((*it).first, kBase[idx].first);\n    EXPECT_EQ((*it).second, kBase[idx].second);\n  }\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/small_bins.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/tiering/small_bins.h\"\n\n#include <algorithm>\n#include <optional>\n#include <utility>\n\n#include \"absl/base/internal/endian.h\"\n#include \"base/logging.h\"\n#include \"core/compact_object.h\"\n#include \"server/tiering/common.h\"\n#include \"server/tiering/disk_storage.h\"\n\nnamespace dfly::tiering {\nusing namespace std;\n\nnamespace {\n\n// See FlushBin() for format details\nsize_t StashedValueSize(string_view value) {\n  return 2 /* dbid */ + 8 /* hash */ + 2 /* strlen*/ + value.size();\n}\n\n}  // namespace\n\nstd::optional<SmallBins::FilledBin> SmallBins::Stash(DbIndex dbid, std::string_view key,\n                                                     std::string_view value) {\n  DCHECK_LT(value.size(), 2_KB);\n\n  size_t value_bytes = StashedValueSize(value);\n\n  std::optional<FilledBin> filled_bin;\n  if (2 /* num entries */ + current_bin_.bytes_ + value_bytes >= kPageSize) {\n    filled_bin = exchange(current_bin_, FilledBin{++last_bin_id_});\n  }\n\n  current_bin_.bytes_ += value_bytes;\n  auto [it, inserted] = current_bin_.entries_.emplace(std::make_pair(dbid, key), string(value));\n  CHECK(inserted);\n\n  return filled_bin;\n}\n\nsize_t SmallBins::SerializeBin(FilledBin* bin, io::MutableBytes dest) {\n  DCHECK_GT(bin->entries_.size(), 0u);\n  DCHECK_GE(dest.size(), 4_KB);\n\n  auto& pending_set = pending_bins_[bin->id];\n  uint8_t* data = dest.data();\n\n  // Store number of entries, 2 bytes\n  absl::little_endian::Store16(data, bin->entries_.size());\n  data += sizeof(uint16_t);\n\n  // Store all dbids and hashes, n * 10 bytes\n  for (const auto& [key, _] : bin->entries_) {\n    absl::little_endian::Store16(data, key.first);\n    data += sizeof(DbIndex);\n\n    absl::little_endian::Store64(data, CompactObj::HashCode(key.second));\n    data += sizeof(uint64_t);\n  }\n\n  // Store all values with sizes, n * (2 + x) bytes\n  for (const auto& [key, value] : bin->entries_) {\n    absl::little_endian::Store16(data, value.size());\n    data += sizeof(uint16_t);\n\n    pending_set[key] = {size_t(data - dest.data()), value.size()};\n    memcpy(data, value.data(), value.size());\n    data += value.size();\n  }\n\n  // Steal backing array from bin if relevant\n  if (current_bin_.entries_.empty()) {\n    // erase doesn't shrink backing, so we can reuse the allocated capacity\n    bin->entries_.erase(bin->entries_.begin(), bin->entries_.end());\n    current_bin_.entries_ = std::move(bin->entries_);\n  }\n\n  return bin->bytes_ + 2;\n}\n\nSmallBins::KeySegmentList SmallBins::ReportStashed(BinId id, DiskSegment segment) {\n  DVLOG(1) << \"ReportStashed \" << id;\n\n  DCHECK(pending_bins_.contains(id));\n  auto seg_map_node = pending_bins_.extract(id);\n  const auto& seg_map = seg_map_node.mapped();\n  DCHECK_GT(seg_map.size(), 0u) << id;\n\n  uint16_t bytes = 0;\n  SmallBins::KeySegmentList list;\n  for (auto& [key, sub_segment] : seg_map) {\n    bytes += sub_segment.length;\n\n    DiskSegment real_segment{segment.offset + sub_segment.offset, sub_segment.length};\n    list.emplace_back(key.first, key.second, real_segment);\n  }\n\n  stats_.stashed_entries_cnt += list.size();\n  stashed_bins_[segment.offset] = {uint8_t(list.size()), bytes};\n  return list;\n}\n\nstd::vector<std::pair<DbIndex, std::string>> SmallBins::ReportStashAborted(BinId id) {\n  std::vector<std::pair<DbIndex, std::string>> out;\n\n  auto node = pending_bins_.extract(id);\n  auto& entries = node.mapped();\n  while (!entries.empty())\n    out.emplace_back(std::move(entries.extract(entries.begin()).key()));\n\n  return out;\n}\n\nstd::optional<SmallBins::BinId> SmallBins::Delete(DbIndex dbid, std::string_view key) {\n  auto& entries = current_bin_.entries_;\n  if (auto it = entries.find(make_pair(dbid, key)); it != entries.end()) {\n    size_t stashed_size = StashedValueSize(it->second);\n    DCHECK_GE(current_bin_.bytes_, stashed_size);\n\n    current_bin_.bytes_ -= stashed_size;\n    entries.erase(it);\n    return std::nullopt;\n  }\n\n  for (auto& [id, keys] : pending_bins_) {\n    if (keys.erase(make_pair(dbid, key)))\n      return keys.empty() ? std::make_optional(id) : std::nullopt;\n  }\n  return std::nullopt;\n}\n\nSmallBins::BinInfo SmallBins::Delete(DiskSegment segment) {\n  auto full_segment = segment.ContainingPages();\n  if (auto it = stashed_bins_.find(full_segment.offset); it != stashed_bins_.end()) {\n    stats_.stashed_entries_cnt--;\n    auto& bin = it->second;\n\n    DCHECK_LE(segment.length, bin.bytes);\n    bin.bytes -= segment.length;\n\n    if (--bin.entries == 0) {\n      DCHECK_EQ(bin.bytes, 0u);\n      stashed_bins_.erase(it);\n      return {full_segment, false /* fragmented */, true /* empty */};\n    }\n\n    if (bin.bytes < kPageSize / 2) {\n      return {full_segment, true /* fragmented */, false /* empty */};\n    }\n  }\n\n  return {segment};\n}\n\nSmallBins::Stats SmallBins::GetStats() const {\n  return Stats{.stashed_bins_cnt = stashed_bins_.size(),\n               .stashed_entries_cnt = stats_.stashed_entries_cnt,\n               .current_bin_bytes = current_bin_.bytes_,\n               .current_entries_cnt = current_bin_.entries_.size()};\n}\n\nSmallBins::KeyHashDbList SmallBins::DeleteBin(DiskSegment segment, std::string_view value) {\n  DCHECK_EQ(value.size(), kPageSize);\n\n  auto bin = stashed_bins_.extract(segment.offset);\n  if (bin.empty())\n    return {};\n\n  stats_.stashed_entries_cnt -= bin.mapped().entries;\n\n  const char* data = value.data();\n\n  uint16_t entries = absl::little_endian::Load16(data);\n  data += sizeof(uint16_t);\n\n  KeyHashDbList out(entries);\n\n  // Recover dbids and hashes\n  for (size_t i = 0; i < entries; i++) {\n    DbIndex dbid = absl::little_endian::Load16(data);\n    data += sizeof(DbIndex);\n\n    uint64_t hash = absl::little_endian::Load64(data);\n    data += sizeof(hash);\n\n    out[i] = {dbid, hash, {0, 0}};\n  }\n\n  // Recover segments\n  for (size_t i = 0; i < entries; i++) {\n    uint16_t length = absl::little_endian::Load16(data);\n    data += sizeof(uint16_t);\n\n    std::get<DiskSegment>(out[i]) = {segment.offset + (data - value.data()), length};\n    data += length;\n  }\n\n  return out;\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/small_bins.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/container/flat_hash_map.h>\n\n#include <optional>\n#include <string>\n#include <vector>\n\n#include \"server/tiering/disk_storage.h\"\n#include \"server/tiering/entry_map.h\"\n\nnamespace dfly::tiering {\n\nusing DbIndex = uint16_t;\n\n// Small bins accumulate small values into larger bins that fill up 4kb pages.\n// SIMPLEST VERSION for now.\nclass SmallBins {\n public:\n  struct Stats {\n    size_t stashed_bins_cnt = 0;\n    size_t stashed_entries_cnt = 0;\n    size_t current_bin_bytes = 0;\n    size_t current_entries_cnt = 0;\n  };\n\n  using BinId = unsigned;\n  static const BinId kInvalidBin = std::numeric_limits<BinId>::max();\n\n  struct BinInfo {\n    DiskSegment segment;\n    bool fragmented = false, empty = false;\n  };\n\n  // Packaged bin ready to be serialized with SerializeBin()\n  struct FilledBin {\n    friend class SmallBins;\n    BinId id;\n\n   private:\n    explicit FilledBin(BinId id) : id{id} {\n    }\n\n    unsigned bytes_ = 0;\n    tiering::EntryMap<std::string> entries_;\n  };\n\n  // List of locations of values for corresponding keys of previously filled bin\n  using KeySegmentList = std::vector<std::tuple<DbIndex, std::string /* key*/, DiskSegment>>;\n\n  // List of item key db indices and hashes\n  using KeyHashDbList = std::vector<std::tuple<DbIndex, uint64_t /* hash */, DiskSegment>>;\n\n  // Returns true if the entry is pending inside SmallBins.\n  bool IsPending(DbIndex dbid, std::string_view key) const {\n    return current_bin_.entries_.count(std::make_pair(dbid, key)) > 0;\n  }\n\n  // Enqueue key/value pair for stash. Returns page to be stashed if it filled up.\n  std::optional<FilledBin> Stash(DbIndex dbid, std::string_view key, std::string_view value);\n\n  // Report that a stash succeeeded. Returns list of stored keys with calculated value locations.\n  KeySegmentList ReportStashed(BinId id, DiskSegment segment);\n\n  // Report that a stash was aborted. Returns list of keys that the entry contained.\n  std::vector<std::pair<DbIndex, std::string>> ReportStashAborted(BinId id);\n\n  // Delete a key with pending io. Returns entry id if needs to be deleted.\n  std::optional<BinId> Delete(DbIndex dbid, std::string_view key);\n\n  // Delete a stored segment. Returns information about the current bin, which might indicate\n  // the need for external actions like deleting empty segments or triggering defragmentation\n  BinInfo Delete(DiskSegment segment);\n\n  // Delete stashed bin. Returns list of recovered item key hashes and db indices.\n  // Mainly used for defragmentation\n  KeyHashDbList DeleteBin(DiskSegment segment, std::string_view value);\n\n  // Serialize filled bin to destination buffer (4kb)\n  size_t SerializeBin(FilledBin* bin, io::MutableBytes dest);\n\n  Stats GetStats() const;\n\n private:\n  struct StashInfo {\n    uint8_t entries = 0;\n    uint16_t bytes = 0;\n  };\n  static_assert(sizeof(StashInfo) == sizeof(unsigned));\n\n  BinId last_bin_id_ = 0;\n  FilledBin current_bin_{last_bin_id_};\n\n  // Pending stashes, their keys and value sizes\n  absl::flat_hash_map<unsigned /* id */, tiering::EntryMap<DiskSegment>> pending_bins_;\n\n  // Map of bins that were stashed and should be deleted when number of entries reaches 0\n  absl::flat_hash_map<size_t /*offset*/, StashInfo> stashed_bins_;\n\n  struct {\n    size_t stashed_entries_cnt = 0;\n  } stats_;\n};\n\n};  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/small_bins_test.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/tiering/small_bins.h\"\n\n#include <absl/strings/str_cat.h>\n\n#include <algorithm>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"server/tiering/disk_storage.h\"\n\nnamespace dfly::tiering {\n\nusing namespace std;\nusing namespace std::string_literals;\n\nstring SmallString(size_t len) {\n  return string(len, 'a');\n}\n\nstruct SmallBinsTest : public ::testing::Test {\n  std::pair<tiering::SmallBins::BinId, std::string> Serialize(SmallBins::FilledBin& bin) {\n    std::string out(4_KB, 'c');\n    size_t written = bins_.SerializeBin(&bin, {reinterpret_cast<uint8_t*>(out.data()), out.size()});\n    out.resize(written);\n    return {bin.id, out};\n  }\n\n protected:\n  SmallBins bins_;\n};\n\nTEST_F(SmallBinsTest, SimpleStashRead) {\n  // Fill single bin\n  std::optional<SmallBins::FilledBin> bin;\n  for (unsigned i = 0; !bin; i++)\n    bin = bins_.Stash(0, absl::StrCat(\"k\", i), absl::StrCat(\"v\", i));\n  auto [id, data] = Serialize(*bin);\n\n  // Verify cut locations point to correct values\n  auto segments = bins_.ReportStashed(id, DiskSegment{0, 4_KB});\n  for (auto [dbid, key, location] : segments) {\n    auto value = \"v\"s + key.substr(1);\n    EXPECT_EQ(value, data.substr(location.offset, location.length));\n  }\n}\n\nTEST_F(SmallBinsTest, SimpleDeleteAbort) {\n  SmallBins bins;\n\n  // Fill single bin\n  std::optional<SmallBins::FilledBin> bin;\n  unsigned i = 0;\n  for (; !bin; i++)\n    bin = bins_.Stash(0, absl::StrCat(\"k\", i), absl::StrCat(\"v\", i));\n  auto [id, data] = Serialize(*bin);\n\n  // Delete all even values\n  for (unsigned j = 0; j <= i; j += 2)\n    bins_.Delete(0, absl::StrCat(\"k\", j));\n\n  auto remaining = bins_.ReportStashAborted(id);\n  sort(remaining.begin(), remaining.end());\n\n  // Expect all odd keys still to exist\n  EXPECT_EQ(remaining.size(), i / 2);\n  for (unsigned j = 1; j < i; j += 2) {\n    std::pair<DbIndex, std::string> needle{0, absl::StrCat(\"k\", j)};\n    EXPECT_TRUE(binary_search(remaining.begin(), remaining.end(), needle)) << j;\n  }\n}\n\nTEST_F(SmallBinsTest, PartialStashDelete) {\n  // Fill single bin\n  std::optional<SmallBins::FilledBin> bin;\n  unsigned i = 0;\n  for (; !bin; i++)\n    bin = bins_.Stash(0, absl::StrCat(\"k\", i), absl::StrCat(\"v\", i));\n  auto [id, data] = Serialize(*bin);\n\n  // Delete all even values\n  for (unsigned j = 0; j <= i; j += 2)\n    bins_.Delete(0, absl::StrCat(\"k\", j));\n\n  auto segments = bins_.ReportStashed(id, DiskSegment{0, 4_KB});\n\n  // Expect all odd keys still to exist\n  EXPECT_EQ(segments.size(), i / 2);\n  for (auto& [dbid, key, segment] : segments) {\n    EXPECT_EQ(key, \"k\"s + data.substr(segment.offset, segment.length).substr(1));\n  }\n\n  // Delete all stashed values\n  while (!segments.empty()) {\n    auto segment = std::get<2>(segments.back());\n    segments.pop_back();\n    auto bin = bins_.Delete(segment);\n\n    EXPECT_EQ(bin.segment.offset, 0u);\n    EXPECT_EQ(bin.segment.length, 4_KB);\n\n    if (segments.empty()) {\n      EXPECT_TRUE(bin.empty);\n    } else {\n      EXPECT_TRUE(bin.fragmented);  // half of the values were deleted\n    }\n  }\n}\n\nTEST_F(SmallBinsTest, UpdateStatsAfterDelete) {\n  // caused https://github.com/dragonflydb/dragonfly/issues/3240\n  for (unsigned i = 0; i < 10; i++) {\n    auto spilled_bin = bins_.Stash(0, absl::StrCat(\"k\", i), SmallString(128));\n    ASSERT_FALSE(spilled_bin);\n  }\n\n  EXPECT_GT(bins_.GetStats().current_bin_bytes, 128 * 10);\n  for (unsigned i = 0; i < 10; i++) {\n    auto res = bins_.Delete(0, absl::StrCat(\"k\", i));\n    ASSERT_FALSE(res);\n  }\n  EXPECT_EQ(0u, bins_.GetStats().current_bin_bytes);\n}\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/tiering/test_common.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <memory>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"util/fibers/fibers.h\"\n#include \"util/fibers/pool.h\"\n\nnamespace dfly::tiering {\n\nclass PoolTestBase : public testing::Test {\n protected:\n  void SetUp() override {\n    pp_.reset(util::fb2::Pool::IOUring(16, 2));\n    pp_->Run();\n  }\n\n  void TearDown() override {\n    pp_->Stop();\n    pp_.reset();\n  }\n\n  std::unique_ptr<util::ProactorPool> pp_;\n};\n\n}  // namespace dfly::tiering\n"
  },
  {
    "path": "src/server/transaction.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/transaction.h\"\n\n#include <absl/strings/match.h>\n\n#include <new>\n\n#include \"base/flags.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_stats.h\"\n#include \"facade/op_status.h\"\n#include \"redis/redis_aux.h\"\n#include \"server/blocking_controller.h\"\n#include \"server/command_registry.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/journal/journal.h\"\n#include \"server/namespaces.h\"\n#include \"server/server_state.h\"\n\nABSL_FLAG(uint32_t, tx_queue_warning_len, 96,\n          \"Length threshold for warning about long transaction queue\");\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace util;\nusing absl::StrCat;\n\nthread_local Transaction::TLTmpSpace Transaction::tmp_space;\n\nnamespace {\n\n// Global txid sequence\natomic_uint64_t op_seq{1};\n\nconstexpr size_t kTransSize [[maybe_unused]] = sizeof(Transaction);\n\nvoid AnalyzeTxQueue(const EngineShard* shard, const TxQueue* txq) {\n  unsigned q_limit = absl::GetFlag(FLAGS_tx_queue_warning_len);\n  if (txq->size() > q_limit) {\n    static thread_local time_t last_log_time = 0;\n    // TODO: glog provides LOG_EVERY_T, which uses precise clock.\n    // We should introduce inside helio LOG_PERIOD_ATLEAST macro that takes seconds and\n    // uses low precision clock.\n    time_t now = time(nullptr);\n    if (now >= last_log_time + 10) {\n      last_log_time = now;\n      EngineShard::TxQueueInfo info = shard->AnalyzeTxQueue();\n      string msg = StrCat(\"TxQueue is too long. \", info.Format());\n      absl::StrAppend(&msg, \"poll_executions:\", shard->stats().poll_execution_total);\n\n      const Transaction* cont_tx = shard->GetContTx();\n      if (cont_tx) {\n        absl::StrAppend(&msg, \" continuation_tx: \", cont_tx->DebugId(shard->shard_id()), \" \",\n                        cont_tx->DEBUG_IsArmedInShard(shard->shard_id()) ? \" armed\" : \"\");\n      }\n\n      LOG(WARNING) << msg;\n    }\n  }\n}\n\nvoid RecordTxScheduleStats(const Transaction* tx) {\n  auto* ss = ServerState::tlocal();\n  ++(tx->IsGlobal() ? ss->stats.tx_global_cnt : ss->stats.tx_normal_cnt);\n  ++ss->stats.tx_width_freq_arr[tx->GetUniqueShardCnt() - 1];\n}\n\nstd::ostream& operator<<(std::ostream& os, Transaction::time_point tp) {\n  using namespace chrono;\n  if (tp == Transaction::time_point::max())\n    return os << \"inf\";\n  size_t ms = duration_cast<milliseconds>(tp - Transaction::time_point::clock::now()).count();\n  return os << ms << \"ms\";\n}\n\nuint16_t trans_id(const Transaction* ptr) {\n  return (intptr_t(ptr) >> 8) & 0xFFFF;\n}\n\nstruct ScheduleContext {\n  Transaction* trans;\n  bool optimistic_execution = false;\n\n  std::atomic<ScheduleContext*> next{nullptr};\n\n  std::atomic_uint32_t fail_cnt{0};\n\n  ScheduleContext(Transaction* t, bool optimistic) : trans(t), optimistic_execution(optimistic) {\n  }\n};\n\nconstexpr size_t kAvoidFalseSharingSize = 64;\nstruct ScheduleQ {\n  alignas(kAvoidFalseSharingSize) base::MPSCIntrusiveQueue<ScheduleContext> queue;\n  alignas(kAvoidFalseSharingSize) atomic_bool armed{false};\n};\n\nvoid MPSC_intrusive_store_next(ScheduleContext* dest, ScheduleContext* next_node) {\n  dest->next.store(next_node, std::memory_order_relaxed);\n}\n\nScheduleContext* MPSC_intrusive_load_next(const ScheduleContext& src) {\n  return src.next.load(std::memory_order_acquire);\n}\n\n// of shard_num arity.\nScheduleQ* schedule_queues = nullptr;\n\n}  // namespace\n\nbool Transaction::BatonBarrier::IsClaimed() const {\n  return claimed_.load(memory_order_relaxed);\n}\n\nbool Transaction::BatonBarrier::TryClaim() {\n  return !claimed_.exchange(true, memory_order_relaxed);  // false means first means success\n}\n\nvoid Transaction::BatonBarrier::Close() {\n  DCHECK(claimed_.load(memory_order_relaxed));\n  closed_.store(true, memory_order_relaxed);\n  ec_.notify();  // release\n}\n\ncv_status Transaction::BatonBarrier::Wait(time_point tp) {\n  auto cb = [this] { return closed_.load(memory_order_acquire); };\n\n  if (tp != time_point::max()) {\n    // Wait until timepoint and return immediately if we finished without a timeout\n    if (ec_.await_until(cb, tp) == cv_status::no_timeout)\n      return cv_status::no_timeout;\n\n    // We timed out and claimed the barrier, so no one will be able to claim it anymore\n    if (TryClaim()) {\n      closed_.store(true, memory_order_relaxed);  // Purely formal\n      return cv_status::timeout;\n    }\n\n    // fallthrough: otherwise a modification is in progress, wait for it below\n  }\n\n  ec_.await(cb);\n  return cv_status::no_timeout;\n}\n\nTransaction::Guard::Guard(Transaction* tx) : tx(tx) {\n  DCHECK(tx->cid_->opt_mask() & CO::GLOBAL_TRANS);\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    namespaces->GetDefaultNamespace().GetDbSlice(shard->shard_id()).SetExpireAllowed(false);\n    return OpStatus::OK;\n  };\n  tx->Execute(cb, false);\n}\n\nTransaction::Guard::~Guard() {\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    namespaces->GetDefaultNamespace().GetDbSlice(shard->shard_id()).SetExpireAllowed(true);\n    return OpStatus::OK;\n  };\n  tx->Execute(cb, true);\n  tx->Refurbish();\n}\n\nvoid Transaction::Init(unsigned num_shards) {\n  DCHECK(schedule_queues == nullptr);\n  schedule_queues = new ScheduleQ[num_shards];\n}\n\nvoid Transaction::Shutdown() {\n  DCHECK(schedule_queues);\n  delete[] schedule_queues;\n  schedule_queues = nullptr;\n}\n\nTransaction::Transaction(const CommandId* cid) : cid_{cid} {\n  InitTxTime();\n  string_view cmd_name(cid_->name());\n  if (cmd_name == \"EXEC\" || cmd_name == \"EVAL\" || cmd_name == \"EVAL_RO\" || cmd_name == \"EVALSHA\" ||\n      cmd_name == \"EVALSHA_RO\") {\n    multi_.reset(new MultiData);\n    multi_->mode = NOT_DETERMINED;\n    multi_->role = DEFAULT;\n  }\n}\n\nTransaction::Transaction(const Transaction* parent, ShardId shard_id, std::optional<SlotId> slot_id)\n    : multi_{make_unique<MultiData>()},\n      txid_{parent->txid()},\n      unique_shard_cnt_{1},\n      unique_shard_id_{shard_id} {\n  if (parent->multi_) {\n    multi_->mode = parent->multi_->mode;\n  } else {\n    // Use squashing mechanism for inline execution of single-shard EVAL\n    multi_->mode = LOCK_AHEAD;\n  }\n\n  multi_->role = SQUASHED_STUB;\n\n  MultiUpdateWithParent(parent);\n  if (slot_id.has_value()) {\n    unique_slot_checker_.Add(*slot_id);\n  }\n}\n\nTransaction::~Transaction() {\n  DVLOG(3) << \"Transaction \" << StrCat(Name(), \"@\", txid_, \"/\", unique_shard_cnt_, \")\")\n           << \" destroyed\";\n}\n\nvoid Transaction::InitBase(Namespace* ns, DbIndex dbid, CmdArgList args) {\n  global_ = false;\n  db_index_ = dbid;\n  full_args_ = args;\n  local_result_ = OpStatus::OK;\n  stats_.coordinator_index = ProactorBase::me() ? ProactorBase::me()->GetPoolIndex() : kInvalidSid;\n\n  // Namespace is read by poll execution, so it can't be changed on the fly\n  if (IsScheduled()) {\n    DCHECK_EQ(namespace_, ns);\n  } else {\n    DCHECK(namespace_ == nullptr || namespace_ == ns);\n    namespace_ = ns;\n  }\n}\n\nvoid Transaction::InitGlobal() {\n  DCHECK(!multi_ || (multi_->mode == GLOBAL || multi_->mode == NON_ATOMIC));\n\n  global_ = true;\n  EnableAllShards();\n}\n\nvoid Transaction::BuildShardIndex(const KeyIndex& key_index, std::vector<PerShardCache>* out) {\n  // Because of the way we iterate in InitShardData\n  DCHECK(!key_index.bonus || key_index.step == 1);\n\n  auto& shard_index = *out;\n  for (unsigned i : key_index.Range()) {\n    string_view key = ArgS(full_args_, i);\n    unique_slot_checker_.Add(key);\n    ShardId sid = Shard(key, shard_data_.size());\n\n    unsigned step = key_index.bonus ? 1 : key_index.step;\n    shard_index[sid].key_step = step;\n    auto& slices = shard_index[sid].slices;\n    if (!slices.empty() && slices.back().second == i) {\n      slices.back().second = i + step;\n    } else {\n      slices.emplace_back(i, i + step);\n    }\n  }\n}\n\nvoid Transaction::InitShardData(absl::Span<const PerShardCache> shard_index, size_t num_args) {\n  args_slices_.reserve(num_args);\n  DCHECK(kv_fp_.empty());\n  kv_fp_.reserve(num_args);\n\n  // Store the concatenated per-shard arguments from the shard index inside kv_args_\n  // and make each shard data point to its own sub-span inside kv_args_.\n  for (size_t i = 0; i < shard_data_.size(); ++i) {\n    auto& sd = shard_data_[i];\n    const auto& src = shard_index[i];\n\n    sd.slice_count = src.slices.size();\n    sd.slice_start = args_slices_.size();\n    sd.fp_start = kv_fp_.size();\n    sd.fp_count = 0;\n\n    // Multi transactions can re-initialize on different shards, so clear ACTIVE flag.\n    DCHECK_EQ(sd.local_mask & ACTIVE, 0);\n\n    if (sd.slice_count == 0)\n      continue;\n\n    sd.local_mask |= ACTIVE;\n\n    unique_shard_cnt_++;\n    unique_shard_id_ = i;\n\n    for (const auto& [start, end] : src.slices) {\n      args_slices_.emplace_back(start, end);\n      for (string_view key : KeyIndex(start, end, src.key_step).Range(full_args_)) {\n        kv_fp_.push_back(LockTag(key).Fingerprint());\n        sd.fp_count++;\n      }\n    }\n  }\n}\n\nvoid Transaction::PrepareMultiFps(CmdArgList keys) {\n  DCHECK_EQ(multi_->mode, LOCK_AHEAD);\n  DCHECK_GT(keys.size(), 0u);\n\n  auto& tag_fps = multi_->tag_fps;\n\n  tag_fps.reserve(keys.size());\n  for (string_view str : keys) {\n    ShardId sid = Shard(str, shard_set->size());\n    tag_fps.emplace(sid, LockTag(str).Fingerprint());\n  }\n}\n\nvoid Transaction::StoreKeysInArgs(const KeyIndex& key_index) {\n  DCHECK(kv_fp_.empty());\n  DCHECK(args_slices_.empty());\n\n  // even for a single key we may have multiple arguments per key (MSET).\n  if (key_index.bonus)\n    args_slices_.emplace_back(*key_index.bonus, *key_index.bonus + 1);\n  args_slices_.emplace_back(key_index.start, key_index.end);\n\n  for (string_view key : key_index.Range(full_args_))\n    kv_fp_.push_back(LockTag(key).Fingerprint());\n}\n\nvoid Transaction::InitByKeys(const KeyIndex& key_index) {\n  // Skip initialization for key-dependent transactions without keys\n  if ((key_index.end - key_index.start) + int(bool(key_index.bonus)) == 0)\n    return;\n\n  DCHECK_LT(key_index.start, full_args_.size());\n\n  // Stub transactions always operate only on single shard.\n  bool is_stub = multi_ && multi_->role == SQUASHED_STUB;\n\n  unique_slot_checker_.Reset();\n  if ((key_index.NumArgs() == 1 && !IsAtomicMulti()) || is_stub) {\n    DCHECK(!IsActiveMulti() || multi_->mode == NON_ATOMIC);\n\n    // We don't have to split the arguments by shards, so we can copy them directly.\n    StoreKeysInArgs(key_index);\n\n    unique_shard_cnt_ = 1;\n    string_view akey = full_args_[*key_index];\n\n    if (is_stub)  // stub transactions don't migrate\n      DCHECK_EQ(unique_shard_id_, Shard(akey, shard_set->size()));\n    else {\n      unique_slot_checker_.Add(akey);\n      unique_shard_id_ = Shard(akey, shard_set->size());\n    }\n\n    // Multi transactions that execute commands on their own (not stubs) can't shrink the backing\n    // array, as it still might be read by leftover callbacks.\n    shard_data_.resize(IsActiveMulti() ? shard_set->size() : 1);\n    shard_data_[SidToId(unique_shard_id_)].local_mask |= ACTIVE;\n\n    return;\n  }\n\n  shard_data_.resize(shard_set->size());  // shard_data isn't sparse, so we must allocate for all :(\n  DCHECK_EQ(full_args_.size() % key_index.step, 0u) << full_args_;\n\n  // Safe, because flow below is not preemptive.\n  auto& shard_index = tmp_space.GetShardIndex(shard_data_.size());\n\n  // Distribute all the arguments by shards.\n  BuildShardIndex(key_index, &shard_index);\n\n  // Initialize shard data based on distributed arguments.\n  InitShardData(shard_index, key_index.NumArgs());\n\n  DCHECK(!multi_ || multi_->mode != LOCK_AHEAD || !multi_->tag_fps.empty());\n\n  DVLOG(1) << \"InitByArgs \" << DebugId() << facade::ToSV(full_args_.front());\n\n  // Compress shard data, if we occupy only one shard.\n  if (unique_shard_cnt_ == 1) {\n    PerShardData* sd;\n    if (IsActiveMulti()) {\n      sd = &shard_data_[SidToId(unique_shard_id_)];\n      DCHECK(sd->local_mask & ACTIVE);\n    } else {\n      shard_data_.resize(1);\n      sd = &shard_data_.front();\n      sd->local_mask |= ACTIVE;\n    }\n    sd->slice_count = -1;\n    sd->slice_start = -1;\n  }\n\n  // Validation.\n  for (const auto& sd : shard_data_) {\n    // sd.local_mask may be non-zero for multi transactions with instant locking.\n    // Specifically EVALs may maintain state between calls.\n    DCHECK(!sd.is_armed.load(memory_order_relaxed));\n    if (!multi_) {\n      DCHECK_EQ(TxQueue::kEnd, sd.pq_pos);\n    }\n  }\n}\n\nOpStatus Transaction::InitByArgs(Namespace* ns, DbIndex index, CmdArgList args) {\n  InitBase(ns, index, args);\n\n  if ((cid_->opt_mask() & CO::GLOBAL_TRANS) > 0) {\n    InitGlobal();\n    return OpStatus::OK;\n  }\n\n  if ((cid_->opt_mask() & CO::NO_KEY_TRANSACTIONAL) > 0) {\n    if (((cid_->opt_mask() & CO::NO_KEY_TX_SPAN_ALL) > 0)) {\n      EnableAllShards();\n    } else {\n      EnableShard(0);\n    }\n\n    return OpStatus::OK;\n  }\n\n  DCHECK_EQ(unique_shard_cnt_, 0u);\n  DCHECK(args_slices_.empty());\n  DCHECK(kv_fp_.empty());\n\n  OpResult<KeyIndex> key_index = DetermineKeys(cid_, args);\n  if (!key_index)\n    return key_index.status();\n\n  InitByKeys(*key_index);\n  return OpStatus::OK;\n}\n\nvoid Transaction::PrepareSquashedMultiHop(const CommandId* cid,\n                                          absl::FunctionRef<bool(ShardId)> enabled) {\n  CHECK(multi_->mode == GLOBAL || multi_->mode == LOCK_AHEAD);\n\n  MultiSwitchCmd(cid);\n\n  InitBase(namespace_, db_index_, {});\n\n  // Because squashing already determines active shards by partitioning commands,\n  // we don't have to work with keys manually and can just mark active shards.\n  // The partitioned commands know it's keys and assume they have correct access.\n  DCHECK_EQ(shard_data_.size(), shard_set->size());\n  for (unsigned i = 0; i < shard_data_.size(); i++) {\n    if (enabled(i)) {\n      shard_data_[i].local_mask |= ACTIVE;\n      unique_shard_cnt_++;\n      unique_shard_id_ = i;\n    } else {\n      shard_data_[i].local_mask &= ~ACTIVE;\n    }\n    shard_data_[i].slice_start = 0;\n    shard_data_[i].slice_count = 0;\n  }\n\n  MultiBecomeSquasher();\n}\n\nvoid Transaction::StartMultiGlobal(Namespace* ns, DbIndex dbid) {\n  CHECK(multi_);\n  CHECK(shard_data_.empty());  // Make sure default InitByArgs didn't run.\n\n  multi_->mode = GLOBAL;\n  InitBase(ns, dbid, {});\n  InitGlobal();\n  multi_->lock_mode = IntentLock::EXCLUSIVE;\n\n  ScheduleInternal();\n}\n\nvoid Transaction::StartMultiLockedAhead(Namespace* ns, DbIndex dbid, CmdArgList keys,\n                                        bool skip_scheduling) {\n  DVLOG(1) << \"StartMultiLockedAhead on \" << keys.size() << \" keys\";\n\n  DCHECK(multi_);\n  DCHECK(shard_data_.empty());  // Make sure default InitByArgs didn't run.\n\n  multi_->mode = LOCK_AHEAD;\n  multi_->lock_mode = LockMode();\n\n  PrepareMultiFps(keys);\n\n  InitBase(ns, dbid, keys);\n  InitByKeys(KeyIndex(0, keys.size()));\n\n  if (!skip_scheduling)\n    ScheduleInternal();\n\n  full_args_ = {};  // InitBase set it to temporary keys, now we reset it.\n}\n\nvoid Transaction::StartMultiNonAtomic() {\n  DCHECK(multi_);\n  multi_->mode = NON_ATOMIC;\n}\n\nvoid Transaction::InitTxTime() {\n  time_now_ms_ = GetCurrentTimeMs();\n}\n\nvoid Transaction::MultiSwitchCmd(const CommandId* cid) {\n  DCHECK(multi_);\n  DCHECK(!cb_ptr_);\n\n  multi_->cmd_seq_num++;\n\n  if (multi_->role != SQUASHED_STUB)  // stub transactions don't migrate between threads\n    unique_shard_id_ = 0;\n  unique_shard_cnt_ = 0;\n\n  args_slices_.clear();\n  kv_fp_.clear();\n\n  cid_ = cid;\n  re_enabled_auto_journal_ = false;\n  cb_ptr_.reset();\n\n  for (auto& sd : shard_data_) {\n    sd.slice_count = sd.slice_start = 0;\n    sd.fp_start = sd.fp_count = 0;  // Reset fingerprints span as kv_fp_ was cleared above.\n\n    if (multi_->mode == NON_ATOMIC) {\n      sd.local_mask = 0;  // Non atomic transactions schedule each time, so remove all flags\n      DCHECK_EQ(sd.pq_pos, TxQueue::kEnd);\n    } else {\n      DCHECK(IsAtomicMulti());   // Every command determines it's own active shards\n      sd.local_mask &= ~ACTIVE;  // so remove ACTIVE flags, but keep KEYLOCK_ACQUIRED\n    }\n    DCHECK(!sd.is_armed.load(memory_order_relaxed));\n  }\n\n  if (multi_->mode == NON_ATOMIC) {\n    coordinator_state_ = 0;\n    txid_ = 0;\n  } else if (multi_->role == SQUASHED_STUB) {\n    DCHECK_EQ(coordinator_state_, 0u);\n  }\n\n  // Each hop needs to be prepared, reset role\n  if (multi_->role == SQUASHER)\n    multi_->role = DEFAULT;\n}\n\nvoid Transaction::MultiUpdateWithParent(const Transaction* parent) {\n  // Disabled because of single shard lua optimization\n  // DCHECK(multi_);\n  // DCHECK(parent->multi_);  // it might not be a squasher yet, but certainly is multi\n  DCHECK_EQ(multi_->role, SQUASHED_STUB);\n  DCHECK(parent->time_now_ms_);\n\n  txid_ = parent->txid_;\n  time_now_ms_ = parent->time_now_ms_;\n  unique_slot_checker_ = parent->unique_slot_checker_;\n  namespace_ = parent->namespace_;\n}\n\nvoid Transaction::MultiBecomeSquasher() {\n  DCHECK(multi_->mode == GLOBAL || multi_->mode == LOCK_AHEAD);\n  DCHECK_GT(GetUniqueShardCnt(), 0u);                    // initialized and determined active shards\n  DCHECK(cid_->IsMultiTransactional()) << cid_->name();  // proper base command set\n  multi_->role = SQUASHER;\n}\n\nstring Transaction::DebugId(std::optional<ShardId> sid) const {\n  DCHECK_GT(use_count_.load(memory_order_relaxed), 0u);\n  string res = StrCat(Name(), \"@\", txid_, \"/\", unique_shard_cnt_);\n  if (multi_) {\n    absl::StrAppend(&res, \":\", multi_->cmd_seq_num);\n  }\n  absl::StrAppend(&res, \" {id=\", trans_id(this));\n  absl::StrAppend(&res, \" {cb_ptr=\", bool(cb_ptr_));\n  if (sid) {\n    absl::StrAppend(&res, \",mask[\", *sid, \"]=\", int(shard_data_[SidToId(*sid)].local_mask),\n                    \",is_armed=\", DEBUG_IsArmedInShard(*sid),\n                    \",txqpos[]=\", shard_data_[SidToId(*sid)].pq_pos);\n  }\n  absl::StrAppend(&res, \"}\");\n  return res;\n}\n\nvoid Transaction::PrepareSingleSquash(Namespace* ns, ShardId sid, DbIndex db, CmdArgList keys,\n                                      MultiMode mode) {\n  if (mode == LOCK_AHEAD) {\n    StartMultiLockedAhead(ns, db, keys, true);  // delay locking until first hop\n  } else {\n    DCHECK_EQ(mode, GLOBAL);\n    StartMultiGlobal(ns, db);\n  }\n  EnableShard(sid);\n  MultiBecomeSquasher();\n\n  // As we never change commands, conclude immediately\n  coordinator_state_ |= COORD_CONCLUDING;\n}\n\n// Runs in the dbslice thread. Returns true if the transaction concluded.\nbool Transaction::RunInShard(EngineShard* shard, bool allow_q_removal) {\n  DCHECK_GT(txid_, 0u);\n  CHECK(cb_ptr_) << DebugId();\n\n  unsigned idx = SidToId(shard->shard_id());\n  auto& sd = shard_data_[idx];\n\n  sd.stats.total_runs++;\n\n  DCHECK_GT(run_barrier_.DEBUG_Count(), 0u);\n  VLOG(2) << \"RunInShard: \" << DebugId() << \" sid:\" << shard->shard_id() << \" \" << sd.local_mask;\n\n  // was_suspended is true meaning that this transaction was suspended and then\n  // it was woken up by another transaction in either this thread or a key in another thread.\n  // if awaked_prerun is true - it means it was woken up by a transaction in this thread,\n  bool was_suspended = sd.local_mask & WAS_SUSPENDED;\n  bool awaked_prerun = sd.local_mask & AWAKED_Q;\n  DCHECK(was_suspended || !awaked_prerun);\n\n  IntentLock::Mode mode = LockMode();\n\n  DCHECK(IsGlobal() || (sd.local_mask & KEYLOCK_ACQUIRED) || (multi_ && multi_->mode == GLOBAL));\n\n  /*************************************************************************/\n\n  RunCallback(shard);\n\n  /*************************************************************************/\n  // at least the coordinator thread owns the reference.\n  DCHECK_GE(GetUseCount(), 1u);\n\n  bool is_concluding = coordinator_state_ & COORD_CONCLUDING;\n\n  // If we're allowed, we remove ourselves upon first invocation from the queue,\n  // and successive hops are run by continuation_trans_ in engine shard.\n  // Otherwise we can remove ourselves only when we're concluding (so no more hops follow).\n  if (sd.pq_pos != TxQueue::kEnd && (is_concluding || allow_q_removal)) {\n    VLOG(2) << \"Remove from txq \" << this->DebugId();\n    shard->txq()->Remove(sd.pq_pos);\n    sd.pq_pos = TxQueue::kEnd;\n  }\n\n  // For multi we unlock transaction (i.e. its keys) in UnlockMulti() call.\n  // If it's a final hop we should release the locks.\n  if (is_concluding) {\n    bool became_suspended = !was_suspended && (sd.local_mask & WAS_SUSPENDED);\n    KeyLockArgs largs;\n\n    if (IsGlobal()) {\n      DCHECK(!awaked_prerun && !became_suspended);  // Global transactions can not be blocking.\n      VLOG(2) << \"Releasing shard lock\";\n      shard->shard_lock()->Release(LockMode());\n    } else {  // not global.\n      largs = GetLockArgs(idx);\n      DCHECK(sd.local_mask & KEYLOCK_ACQUIRED);\n\n      // If a transaction has been suspended, we keep the lock so that future transaction\n      // touching those keys will be ordered via TxQueue. It's necessary because we preserve\n      // the atomicity of awaked transactions by halting the TxQueue.\n      if (!became_suspended) {\n        GetDbSlice(shard->shard_id()).Release(mode, largs);\n        sd.local_mask &= ~KEYLOCK_ACQUIRED;\n      }\n      sd.local_mask &= ~OUT_OF_ORDER;\n    }\n\n    // This is the last hop, so clear cont_trans if its held by the current tx\n    // The position is important because we check below if `shard->GetContTx() == nullptr`\n    // so we must clear it before we notify awaked transactions.\n    shard->RemoveContTx(this);\n\n    // It has 2 responsibilities.\n    // 1: to go over potential wakened keys, verify them and activate watch queues.\n    // 2: if this transaction was notified and finished running - to remove it from the head\n    //    of the queue and notify the next one.\n\n    if (auto* bcontroller = namespace_->GetBlockingController(shard->shard_id()); bcontroller) {\n      if (awaked_prerun || was_suspended) {\n        bcontroller->RemovedWatched(GetShardArgs(idx), this);\n      }\n\n      // Wake only if no tx queue head is currently running\n      // Note: RemoveContTx might have no effect above if this tx had no continuations\n      if (shard->GetContTx() == nullptr) {\n        bcontroller->NotifyPending();\n      }\n    }\n  }\n\n  FinishHop();  // From this point on we can not access 'this'.\n  return is_concluding;\n}\n\nvoid Transaction::RunCallback(EngineShard* shard) {\n  DCHECK_EQ(shard, EngineShard::tlocal());\n\n  RunnableResult result;\n  try {\n    result = (*cb_ptr_)(this, shard);\n\n    if (unique_shard_cnt_ == 1) {\n      cb_ptr_.reset();  // We can do it because only a single thread runs the callback.\n      local_result_ = result;\n    } else {\n      if (result == OpStatus::OUT_OF_MEMORY) {\n        absl::base_internal::SpinLockHolder lk{&local_result_mu_};\n        CHECK(local_result_ == OpStatus::OK || local_result_ == OpStatus::OUT_OF_MEMORY);\n        local_result_ = result;\n      } else {\n        CHECK_EQ(OpStatus::OK, result);\n      }\n    }\n  } catch (std::bad_alloc&) {\n    LOG_FIRST_N(ERROR, 16) << \" out of memory\";  // TODO: to log at most once per sec.\n    absl::base_internal::SpinLockHolder lk{&local_result_mu_};\n    local_result_ = OpStatus::OUT_OF_MEMORY;\n  } catch (std::exception& e) {\n    LOG(FATAL) << \"Unexpected exception \" << e.what();\n  }\n\n  auto& db_slice = GetDbSlice(shard->shard_id());\n  db_slice.OnCbFinishBlocking();\n\n  // Handle result flags to alter behaviour.\n  if (result.flags & RunnableResult::AVOID_CONCLUDING) {\n    // Multi shard callbacks should either all or none choose to conclude. They can't communicate,\n    // so they must know their decision ahead, consequently there is no point in using this flag.\n    CHECK_EQ(unique_shard_cnt_, 1u);\n    DCHECK((coordinator_state_ & COORD_CONCLUDING) || multi_->concluding);\n    coordinator_state_ &= ~COORD_CONCLUDING;\n  }\n\n  // Log to journal only once the command finished running\n  if ((coordinator_state_ & COORD_CONCLUDING) || (multi_ && multi_->concluding)) {\n    LogAutoJournalOnShard(shard, result);\n    MaybeInvokeTrackingCb();\n  }\n}\n\n// TODO: For multi-transactions we should be able to deduce mode() at run-time based\n// on the context. For regular multi-transactions we can actually inspect all commands.\n// For eval-like transactions - we can decide based on the command flavor (EVAL/EVALRO) or\n// auto-tune based on the static analysis (by identifying commands with hardcoded command names).\nvoid Transaction::ScheduleInternal() {\n  DCHECK_EQ(txid_, 0u);\n  DCHECK_EQ(coordinator_state_ & COORD_SCHED, 0);\n  DCHECK_GT(unique_shard_cnt_, 0u);\n  DCHECK(!IsAtomicMulti() || cid_->IsMultiTransactional());\n\n  // Try running immediately (during scheduling) if we're concluding and either:\n  // - have a single shard, and thus never have to cancel scheduling due to reordering\n  // - run as an idempotent command, meaning we can safely repeat the operation if scheduling fails\n  bool optimistic_exec = !IsGlobal() && (coordinator_state_ & COORD_CONCLUDING) &&\n                         (unique_shard_cnt_ == 1 || (cid_->opt_mask() & CO::IDEMPOTENT));\n\n  DVLOG(1) << \"ScheduleInternal \" << cid_->name() << \" on \" << unique_shard_cnt_ << \" shards \"\n           << \" optimistic_execution: \" << optimistic_exec;\n\n  auto is_active = [this](uint32_t i) { return IsActive(i); };\n\n  // Loop until successfully scheduled in all shards.\n  while (true) {\n    stats_.schedule_attempts++;\n\n    // This is a contention point for all threads - avoid using it unless necessary.\n    // Single shard operations can assign txid later if the immediate run failed.\n    if (unique_shard_cnt_ > 1)\n      txid_ = op_seq.fetch_add(1, memory_order_relaxed);\n\n    run_barrier_.Start(unique_shard_cnt_);\n\n    if (CanRunInlined()) {\n      // We increase the barrier above for this branch as well, in order to calm the DCHECKs\n      // in the lower-level code. It's not really needed otherwise because we run inline.\n\n      // single shard schedule operation can't fail\n      CHECK(ScheduleInShard(EngineShard::tlocal(), optimistic_exec));\n      run_barrier_.Dec();\n      break;\n    }\n\n    ScheduleContext schedule_ctx{this, optimistic_exec};\n\n    if (unique_shard_cnt_ == 1) {\n      // Single shard optimization. Note: we could apply the same optimization\n      // to multi-shard transactions as well by creating a vector of ScheduleContext.\n      schedule_queues[unique_shard_id_].queue.Push(&schedule_ctx);\n      bool current_val = false;\n      if (schedule_queues[unique_shard_id_].armed.compare_exchange_strong(current_val, true,\n                                                                          memory_order_acq_rel)) {\n        shard_set->Add(unique_shard_id_, &Transaction::ScheduleBatchInShard);\n      }\n    } else {\n      auto cb = [&schedule_ctx] {\n        if (!schedule_ctx.trans->ScheduleInShard(EngineShard::tlocal(),\n                                                 schedule_ctx.optimistic_execution)) {\n          schedule_ctx.fail_cnt.fetch_add(1, memory_order_relaxed);\n        }\n        schedule_ctx.trans->FinishHop();\n      };\n\n      IterateActiveShards([cb](const auto& sd, ShardId i) { shard_set->Add(i, cb); });\n\n      // Add this debugging function to print more information when we experience deadlock\n      // during tests.\n      ThisFiber::PrintLocalsCallback locals([&] {\n        return absl::StrCat(\"unique_shard_cnt_: \", unique_shard_cnt_,\n                            \" run_barrier_cnt: \", run_barrier_.DEBUG_Count(), \"\\n\");\n      });\n    }\n    run_barrier_.Wait();\n\n    if (schedule_ctx.fail_cnt.load(memory_order_relaxed) == 0) {\n      break;\n    }\n\n    VLOG(2) << \"Cancelling \" << DebugId();\n    ServerState::tlocal()->stats.tx_schedule_cancel_cnt += 1;\n\n    atomic_bool should_poll_execution{false};\n    auto cancel = [&](EngineShard* shard) {\n      bool res = CancelShardCb(shard);\n      if (res) {\n        should_poll_execution.store(true, memory_order_relaxed);\n      }\n    };\n    shard_set->RunBriefInParallel(std::move(cancel), is_active);\n\n    // We must follow up with PollExecution because in rare cases with multi-trans\n    // that follows this one, we may find the next transaction in the queue that is never\n    // trigerred. Which leads to deadlock. I could solve this by adding PollExecution to\n    // CancelShardCb above but then we would need to use the shard_set queue since PollExecution\n    // is blocking. I wanted to avoid the additional latency for the general case of running\n    // CancelShardCb because of the very rate case below. Therefore, I decided to just fetch the\n    // indication that we need to follow up with PollExecution and then send it to shard_set queue.\n    // We do not need to wait for this callback to finish - just make sure it will eventually run.\n    // See https://github.com/dragonflydb/dragonfly/issues/150 for more info.\n    if (should_poll_execution.load(memory_order_relaxed)) {\n      IterateActiveShards([](const auto& sd, auto i) {\n        shard_set->Add(i, [] { EngineShard::tlocal()->PollExecution(\"cancel_cleanup\", nullptr); });\n      });\n    }\n    InitTxTime();  // update time for next scheduling attempt\n  }\n\n  coordinator_state_ |= COORD_SCHED;\n  RecordTxScheduleStats(this);\n}\n\nvoid Transaction::UnlockMulti(bool block) {\n  DCHECK(multi_);\n  DCHECK_GE(GetUseCount(), 1u);  // Greater-equal because there may be callbacks in progress.\n\n  // Return if we either didn't schedule at all (and thus run) or already did conclude\n  if ((coordinator_state_ & COORD_SCHED) == 0 || (coordinator_state_ & COORD_CONCLUDING) > 0)\n    return;\n  coordinator_state_ |= COORD_CONCLUDING;\n\n  // Distribute keys by shards\n  DCHECK_EQ(shard_data_.size(), shard_set->size());  // Atomic doesn't use single shard optimization\n  vector<vector<LockFp>> sharded_keys(shard_set->size());\n  for (const auto& [sid, fp] : multi_->tag_fps)\n    sharded_keys[sid].emplace_back(fp);\n\n  // Whether transaction was active on the shard and needs to unlock\n  auto is_active = [&](ShardId sid) {\n    return !sharded_keys[sid].empty() || multi_->mode == GLOBAL;\n  };\n\n  // Count number of active shards ahead and set run/use counts\n  size_t occupied_shards = 0;\n  for (size_t sid = 0; sid < shard_set->size(); sid++) {\n    if (!is_active(sid))\n      continue;\n    occupied_shards++;\n  }\n  run_barrier_.Start(occupied_shards);\n  use_count_.fetch_add(occupied_shards, std::memory_order_relaxed);\n\n  // Dispatch callbacks to unlock on shards\n  for (ShardId sid = 0; sid < shard_data_.size(); sid++) {\n    if (!is_active(sid))\n      continue;\n\n    shard_set->Add(sid, [this, fps = std::move(sharded_keys[sid])] {\n      this->UnlockMultiShardCb(fps, EngineShard::tlocal());\n      run_barrier_.Dec();\n      intrusive_ptr_release(this);\n    });\n  }\n\n  if (block) {\n    run_barrier_.Wait();\n    Refurbish();\n  }\n}\n\nOpStatus Transaction::ScheduleSingleHop(RunnableType cb) {\n  Execute(cb, true);\n  return local_result_;\n}\n\nvoid Transaction::SingleHopAsync(RunnableType cb) {\n  CHECK(!multi_);\n  CHECK_EQ(coordinator_state_, 0u);\n\n  coordinator_state_ |= COORD_CONCLUDING;\n  cb_ptr_ = cb;\n\n  if (unique_shard_cnt_ == 1) {\n    CHECK_EQ(shard_data_.size(), 1u);\n\n    // Arm immediately\n    shard_data_.front().is_armed.store(true, memory_order_relaxed);\n\n    // Keep alive till end and set barrier\n    run_barrier_.Add(1);\n    use_count_.fetch_add(1, memory_order_relaxed);\n\n    auto shard_cb = [this] {\n      bool success = ScheduleInShard(EngineShard::tlocal(), true);\n      CHECK(success);  // single shard scheduling can't fail\n\n      if (shard_data_.front().local_mask & OPTIMISTIC_EXECUTION) {  // executed during schedule\n        run_barrier_.Dec();\n        intrusive_ptr_release(this);\n      } else {\n        // do we really need to submit a shard callback?\n        // an armed transaction will be driven by the next previous txq entry\n\n        // possible deadlock beacuse of api\n        // but really we just need to re-schedule the callback\n        // shard_set->Add(unique_shard_id_, [this] {\n        //  EngineShard::tlocal()->PollExecution(\"exec_cb\", this);\n        //  intrusive_ptr_release(this);\n        //});\n        EngineShard::tlocal()->PollExecution(\"exec_cb\", this);\n        intrusive_ptr_release(this);\n      }\n    };\n\n    // Dispatch to shard\n    if (CanRunInlined())\n      shard_cb();\n    else\n      shard_set->Add(unique_shard_id_, shard_cb);\n  } else {\n    ScheduleInternal();\n    DispatchHop();  // won't wait on run_barrier_\n  }\n}\n\n// Runs in coordinator thread.\nvoid Transaction::Execute(RunnableType cb, bool conclude) {\n  if (multi_ && multi_->role == SQUASHED_STUB) {\n    local_result_ = RunSquashedMultiCb(cb);\n    return;\n  }\n\n  local_result_ = OpStatus::OK;\n  cb_ptr_ = cb;\n\n  if (IsAtomicMulti()) {\n    multi_->concluding = conclude;\n  } else {\n    coordinator_state_ = conclude ? (coordinator_state_ | COORD_CONCLUDING)\n                                  : (coordinator_state_ & ~COORD_CONCLUDING);\n  }\n\n  if ((coordinator_state_ & COORD_SCHED) == 0) {\n    ScheduleInternal();\n  }\n\n  DispatchHop();\n  run_barrier_.Wait();\n  cb_ptr_.reset();\n\n  if (coordinator_state_ & COORD_CONCLUDING)\n    coordinator_state_ &= ~COORD_SCHED;\n}\n\n// Runs in coordinator thread.\nvoid Transaction::DispatchHop() {\n  DVLOG(1) << \"DispatchHop \" << DebugId();\n  DCHECK_GT(unique_shard_cnt_, 0u);\n  DCHECK_GT(use_count_.load(memory_order_relaxed), 0u);\n  DCHECK(!IsAtomicMulti() || multi_->lock_mode.has_value());\n  DCHECK_LE(shard_data_.size(), 1024u);\n\n  // Hops can start executing immediately after being armed, so we\n  // initialize the run barrier before arming, as well as copy indices\n  // of active shards to avoid reading concurrently accessed shard data.\n  std::bitset<1024> poll_flags(0);\n  unsigned run_cnt = 0;\n  IterateActiveShards([&poll_flags, &run_cnt](auto& sd, auto i) {\n    if ((sd.local_mask & OPTIMISTIC_EXECUTION) == 0) {\n      run_cnt++;\n      poll_flags.set(i, true);\n    }\n    sd.local_mask &= ~OPTIMISTIC_EXECUTION;  // we'll run it next time if it avoided concluding\n  });\n\n  DCHECK_EQ(run_cnt, poll_flags.count());\n  if (run_cnt == 0)  // all callbacks were run immediately\n    return;\n\n  run_barrier_.Start(run_cnt);\n\n  // Set armed flags on all active shards.\n  std::atomic_thread_fence(memory_order_release);  // once fence to avoid flushing writes in loop\n  IterateActiveShards([&poll_flags](auto& sd, auto i) {\n    if (poll_flags.test(i))\n      sd.is_armed.store(true, memory_order_relaxed);\n  });\n\n  if (CanRunInlined()) {\n    DCHECK_EQ(run_cnt, 1u);\n    DVLOG(1) << \"Short-circuit ExecuteAsync \" << DebugId();\n    EngineShard::tlocal()->PollExecution(\"exec_cb\", this);\n    return;\n  }\n\n  use_count_.fetch_add(run_cnt, memory_order_relaxed);  // for each pointer from poll_cb\n\n  auto poll_cb = [this] {\n    CHECK(namespace_ != nullptr);\n    EngineShard::tlocal()->PollExecution(\"exec_cb\", this);\n    DVLOG(3) << \"ptr_release \" << DebugId();\n    intrusive_ptr_release(this);  // against use_count_.fetch_add above.\n  };\n  IterateShards([&poll_cb, &poll_flags](PerShardData& sd, auto i) {\n    if (poll_flags.test(i))\n      shard_set->Add(i, poll_cb);\n  });\n}\n\nvoid Transaction::FinishHop() {\n  boost::intrusive_ptr<Transaction> guard(this);  // Keep alive until Dec() fully finishes\n  run_barrier_.Dec();\n}\n\nvoid Transaction::Conclude() {\n  if (!IsScheduled())\n    return;\n  auto cb = [](Transaction* t, EngineShard* shard) { return OpStatus::OK; };\n  Execute(std::move(cb), true);\n}\n\nvoid Transaction::Refurbish() {\n  txid_ = 0;\n  coordinator_state_ = 0;\n  cb_ptr_.reset();\n}\n\nconst absl::flat_hash_set<std::pair<ShardId, LockFp>>& Transaction::GetMultiFps() const {\n  DCHECK(multi_);\n  return multi_->tag_fps;\n}\n\n#if 0\nstring Transaction::DEBUG_PrintFailState(ShardId sid) const {\n  auto res = StrCat(\n      \"usc: \", unique_shard_cnt_, \", name:\", GetCId()->name(),\n      \", usecnt:\", use_count_.load(memory_order_relaxed), \", runcnt: \", run_barrier_.DEBUG_Count(),\n      \", coordstate: \", coordinator_state_, \", coord native thread: \", stats_.coordinator_index,\n      \", schedule attempts: \", stats_.schedule_attempts, \", report from sid: \", sid, \"\\n\");\n  std::atomic_thread_fence(memory_order_acquire);\n  for (unsigned i = 0; i < shard_data_.size(); ++i) {\n    const auto& sd = shard_data_[i];\n    absl::StrAppend(&res, \"- shard: \", i, \" local_mask:\", sd.local_mask,\n                    \" total_runs: \", sd.stats.total_runs, \"\\n\");\n  }\n  return res;\n}\n#endif\n\nvoid Transaction::EnableShard(ShardId sid) {\n  unique_shard_cnt_ = 1;\n  unique_shard_id_ = sid;\n  shard_data_.resize(IsActiveMulti() ? shard_set->size() : 1);\n  shard_data_.front().local_mask |= ACTIVE;\n}\n\nvoid Transaction::EnableAllShards() {\n  unique_shard_cnt_ = shard_set->size();\n  unique_shard_id_ = unique_shard_cnt_ == 1 ? 0 : kInvalidSid;\n  shard_data_.resize(shard_set->size());\n  for (auto& sd : shard_data_)\n    sd.local_mask |= ACTIVE;\n}\n\n// runs in coordinator thread.\n// Marks the transaction as expired and removes it from the waiting queue.\nvoid Transaction::ExpireBlocking(WaitKeys wkeys) {\n  DCHECK(!IsGlobal());\n  DVLOG(1) << \"ExpireBlocking \" << DebugId();\n  run_barrier_.Start(unique_shard_cnt_);\n\n  auto expire_cb = [this, &wkeys] {\n    EngineShard* es = EngineShard::tlocal();\n    if (wkeys) {\n      IndexSlice is(0, 1);\n      ShardArgs sa(absl::MakeSpan(&wkeys.value(), 1), absl::MakeSpan(&is, 1));\n      ExpireShardCb(sa, es);\n    } else {\n      ExpireShardCb(GetShardArgs(es->shard_id()), es);\n    }\n  };\n  IterateActiveShards([&expire_cb](PerShardData& sd, auto i) { shard_set->Add(i, expire_cb); });\n\n  run_barrier_.Wait();\n  DVLOG(1) << \"ExpireBlocking finished \" << DebugId();\n}\n\nstring_view Transaction::Name() const {\n  return cid_ ? cid_->name() : \"null-command\";\n}\n\nShardId Transaction::GetUniqueShard() const {\n  DCHECK_EQ(GetUniqueShardCnt(), 1U);\n  return unique_shard_id_;\n}\n\noptional<SlotId> Transaction::GetUniqueSlotId() const {\n  return unique_slot_checker_.GetUniqueSlotId();\n}\n\nKeyLockArgs Transaction::GetLockArgs(ShardId sid) const {\n  KeyLockArgs res;\n  res.db_index = db_index_;\n\n  if (unique_shard_cnt_ == 1) {\n    res.fps = {kv_fp_.data(), kv_fp_.size()};\n  } else {\n    const auto& sd = shard_data_[sid];\n    DCHECK_LE(sd.fp_start + sd.fp_count, kv_fp_.size());\n    res.fps = {kv_fp_.data() + sd.fp_start, sd.fp_count};\n  }\n  return res;\n}\n\nuint16_t Transaction::DisarmInShard(ShardId sid) {\n  auto& sd = shard_data_[SidToId(sid)];\n  // NOTE: Maybe compare_exchange is worth it to avoid redundant writes\n  return sd.is_armed.exchange(false, memory_order_acquire) ? sd.local_mask : 0;\n}\n\npair<uint16_t, bool> Transaction::DisarmInShardWhen(ShardId sid, uint16_t relevant_flags) {\n  auto& sd = shard_data_[SidToId(sid)];\n  if (sd.is_armed.load(memory_order_acquire)) {\n    bool relevant = sd.local_mask & relevant_flags;\n    if (relevant)\n      CHECK(sd.is_armed.exchange(false, memory_order_release));\n    return {sd.local_mask, relevant};\n  }\n  return {0, false};\n}\n\nbool Transaction::IsActive(ShardId sid) const {\n  // If we have only one shard, we often don't store infromation about all shards, so determine it\n  // solely by id\n  if (unique_shard_cnt_ == 1) {\n    // However the active flag is still supposed to be set for our unique shard\n    DCHECK((shard_data_[SidToId(unique_shard_id_)].local_mask & ACTIVE));\n    return sid == unique_shard_id_;\n  }\n\n  return shard_data_[SidToId(sid)].local_mask & ACTIVE;\n}\n\nIntentLock::Mode Transaction::LockMode() const {\n  return cid_->IsReadOnly() ? IntentLock::SHARED : IntentLock::EXCLUSIVE;\n}\n\nOpArgs Transaction::GetOpArgs(EngineShard* shard) const {\n  DCHECK(IsActive(shard->shard_id()));\n  DCHECK((multi_ && multi_->role == SQUASHED_STUB) || (run_barrier_.DEBUG_Count() > 0));\n  return OpArgs{shard, this, GetDbContext()};\n}\n\n// This function should not block since it's run via RunBriefInParallel.\nbool Transaction::ScheduleInShard(EngineShard* shard, bool execute_optimistic) {\n  ShardId sid = SidToId(shard->shard_id());\n  auto& sd = shard_data_[sid];\n\n  DCHECK(sd.local_mask & ACTIVE);\n  DCHECK_EQ(sd.local_mask & KEYLOCK_ACQUIRED, 0);\n  sd.local_mask &= ~(OUT_OF_ORDER | OPTIMISTIC_EXECUTION);\n\n  TxQueue* txq = shard->txq();\n  KeyLockArgs lock_args;\n  IntentLock::Mode mode = LockMode();\n  bool lock_granted = false;\n\n  // If a more recent transaction already commited, we abort\n  if (txid_ > 0 && shard->committed_txid() >= txid_)\n    return false;\n\n  auto release_fp_locks = [&]() {\n    GetDbSlice(shard->shard_id()).Release(mode, lock_args);\n    sd.local_mask &= ~KEYLOCK_ACQUIRED;\n  };\n\n  // Acquire intent locks. Intent locks are always acquired, even if already locked by others.\n  if (!IsGlobal()) {\n    lock_args = GetLockArgs(shard->shard_id());\n    const bool shard_unlocked = shard->shard_lock()->Check(mode);\n\n    // We need to acquire the fp locks because the executing callback\n    // within RunCallback below might preempt.\n    const bool keys_unlocked = GetDbSlice(shard->shard_id()).Acquire(mode, lock_args);\n    lock_granted = shard_unlocked && keys_unlocked;\n\n    sd.local_mask |= KEYLOCK_ACQUIRED;\n    if (lock_granted) {\n      sd.local_mask |= OUT_OF_ORDER;\n    }\n\n    DVLOG(3) << \"Lock granted \" << lock_granted << \" for trans \" << DebugId();\n\n    // Check if we can run immediately\n    if (lock_granted && execute_optimistic) {\n      sd.local_mask |= OPTIMISTIC_EXECUTION;\n      shard->stats().tx_optimistic_total++;\n\n      RunCallback(shard);\n\n      // Check state again, it could've been updated if the callback returned AVOID_CONCLUDING flag.\n      // Only possible for single shard.\n      if (coordinator_state_ & COORD_CONCLUDING) {\n        release_fp_locks();\n        return true;\n      }\n    }\n  }\n\n  // Single shard operations might have delayed acquiring txid unless neccessary.\n  if (txid_ == 0) {\n    DCHECK_EQ(unique_shard_cnt_, 1u);\n    txid_ = op_seq.fetch_add(1, memory_order_relaxed);\n    DCHECK_GT(txid_, shard->committed_txid());\n  }\n\n  // If the new transaction requires reordering of the pending queue (i.e. it comes before tail)\n  // and some other transaction already locked its keys we can not reorder 'trans' because\n  // the transaction could have deduced that it can run OOO and eagerly execute. Hence, we\n  // fail this scheduling attempt for trans.\n  if (!txq->Empty() && txid_ < txq->TailScore() && !lock_granted) {\n    if (sd.local_mask & KEYLOCK_ACQUIRED) {\n      release_fp_locks();\n    }\n    return false;\n  }\n\n  if (IsGlobal()) {\n    shard->shard_lock()->Acquire(mode);\n    VLOG(1) << \"Global shard lock acquired\";\n  }\n\n  TxQueue::Iterator it = txq->Insert(this);\n  DCHECK_EQ(TxQueue::kEnd, sd.pq_pos);\n  sd.pq_pos = it;\n\n  AnalyzeTxQueue(shard, txq);\n  DVLOG(1) << \"Insert into tx-queue, sid(\" << sid << \") \" << DebugId() << \", qlen \" << txq->size();\n\n  return true;\n}\n\nvoid Transaction::ScheduleBatchInShard() {\n  EngineShard* shard = EngineShard::tlocal();\n  auto& stats = shard->stats();\n  stats.tx_batch_schedule_calls_total++;\n\n  ShardId sid = shard->shard_id();\n  auto& sq = schedule_queues[sid];\n\n  for (unsigned j = 0;; ++j) {\n    // We pull the items from the queue in a loop until we reach the stop condition.\n    // TODO: we may have fairness problem here, where transactions being added up all the time\n    // and we never break from the loop. It is possible to break early but it's not trivial\n    // because we must ensure that there is another ScheduleBatchInShard callback in the queue.\n    // Can be checked with testing sq.armed is true when j == 1.\n    while (true) {\n      ScheduleContext* item = sq.queue.Pop();\n      if (!item)\n        break;\n\n      if (!item->trans->ScheduleInShard(shard, item->optimistic_execution)) {\n        item->fail_cnt.fetch_add(1, memory_order_relaxed);\n      }\n      item->trans->FinishHop();\n      stats.tx_batch_scheduled_items_total++;\n    };\n\n    // j==1 means we already signalled that we're done with the current batch.\n    if (j == 1)\n      break;\n\n    // We signal that we're done with the current batch but then we check if there are more\n    // transactions to fetch in the next iteration.\n    // We do this to avoid the situation where we have a data race, where\n    // a transaction is added to the queue, we've checked that sq.armed is true and skipped\n    // adding the callback that fetches the transaction.\n    sq.armed.exchange(false, memory_order_acq_rel);\n  }\n}\n\nbool Transaction::CancelShardCb(EngineShard* shard) {\n  ShardId idx = SidToId(shard->shard_id());\n  auto& sd = shard_data_[idx];\n\n  TxQueue::Iterator q_pos = exchange(sd.pq_pos, TxQueue::kEnd);\n  if (q_pos == TxQueue::kEnd) {\n    DCHECK_EQ(sd.local_mask & KEYLOCK_ACQUIRED, 0);\n    return false;\n  }\n\n  TxQueue* txq = shard->txq();\n  bool was_head = txq->Head() == q_pos;\n\n  Transaction* trans = absl::get<Transaction*>(txq->At(q_pos));\n  DCHECK(trans == this) << txq->size() << ' ' << sd.pq_pos << ' ' << trans->DebugId();\n  txq->Remove(q_pos);\n\n  if (IsGlobal()) {\n    shard->shard_lock()->Release(LockMode());\n  } else {\n    if ((cid_->opt_mask() & CO::NO_KEY_TRANSACTIONAL) == 0) {\n      auto lock_args = GetLockArgs(shard->shard_id());\n      DCHECK(sd.local_mask & KEYLOCK_ACQUIRED);\n      DCHECK(!lock_args.fps.empty());\n      GetDbSlice(shard->shard_id()).Release(LockMode(), lock_args);\n    }\n\n    sd.local_mask &= ~KEYLOCK_ACQUIRED;\n  }\n\n  // Check if we need to poll the next head\n  return was_head && !txq->Empty();\n}\n\n// runs in engine-shard thread.\nShardArgs Transaction::GetShardArgs(ShardId sid) const {\n  DCHECK(!multi_ || multi_->role != SQUASHER);\n\n  // We can read unique_shard_cnt_  only because ShardArgsInShard is called after IsArmedInShard\n  // barrier.\n  if (unique_shard_cnt_ == 1) {\n    return ShardArgs{full_args_, absl::MakeSpan(args_slices_)};\n  }\n\n  const auto& sd = shard_data_[sid];\n  return ShardArgs{full_args_,\n                   absl::MakeSpan(args_slices_.data() + sd.slice_start, sd.slice_count)};\n}\n\nOpStatus Transaction::WaitOnWatch(const time_point& tp, WaitKeys wkeys, KeyReadyChecker krc,\n                                  bool* block_flag, bool* pause_flag) {\n  if (blocking_barrier_.IsClaimed()) {  // Might have been cancelled ahead by a dropping connection\n    Conclude();\n    return OpStatus::CANCELLED;\n  }\n\n  DCHECK(!IsAtomicMulti());  // blocking inside MULTI is not allowed\n\n  // Register keys on active shards blocking controllers and mark shard state as suspended.\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    if (wkeys) {  // single string_view.\n      IndexSlice is(0, 1);\n      ShardArgs sa(absl::MakeSpan(&wkeys.value(), 1), absl::MakeSpan(&is, 1));\n      t->WatchInShard(&t->GetNamespace(), sa, shard, krc);\n    } else {\n      t->WatchInShard(&t->GetNamespace(), t->GetShardArgs(shard->shard_id()), shard, krc);\n    }\n    return OpStatus::OK;\n  };\n  Execute(std::move(cb), true);\n\n  // Don't reset the scheduled flag because we didn't release the locks\n  coordinator_state_ |= COORD_SCHED;\n\n  auto* stats = ServerState::tl_connection_stats();\n  ++stats->num_blocked_clients;\n  DVLOG(1) << \"WaitOnWatch wait for \" << tp << \" \" << DebugId();\n\n  // Wait for the blocking barrier to be closed.\n  // Note: It might return immediately if another thread already notified us.\n  *block_flag = true;\n  cv_status status = blocking_barrier_.Wait(tp);\n  *block_flag = false;\n\n  DVLOG(1) << \"WaitOnWatch done \" << int(status) << \" \" << DebugId();\n  --stats->num_blocked_clients;\n\n  *pause_flag = true;\n  ServerState::tlocal()->AwaitPauseState(true);  // blocking are always write commands\n  *pause_flag = false;\n\n  OpStatus result = OpStatus::OK;\n  if (status == cv_status::timeout) {\n    result = OpStatus::TIMED_OUT;\n  } else if (coordinator_state_ & COORD_CANCELLED) {\n    DCHECK_GT(block_cancel_result_, OpStatus::OK);\n    result = block_cancel_result_;\n  }\n\n  // If we don't follow up with an \"action\" hop, we must clean up manually on all shards.\n  if (result != OpStatus::OK)\n    ExpireBlocking(wkeys);\n\n  return result;\n}\n\nvoid Transaction::WatchInShard(Namespace* ns, ShardArgs keys, EngineShard* shard,\n                               KeyReadyChecker krc) {\n  auto& sd = shard_data_[SidToId(shard->shard_id())];\n\n  CHECK_EQ(0, sd.local_mask & WAS_SUSPENDED);\n  sd.local_mask |= WAS_SUSPENDED;\n  sd.local_mask &= ~OUT_OF_ORDER;\n\n  ns->GetOrAddBlockingController(shard)->AddWatched(keys, std::move(krc), this);\n  DVLOG(2) << \"WatchInShard \" << DebugId();\n}\n\nvoid Transaction::ExpireShardCb(ShardArgs keys, EngineShard* shard) {\n  // Blocking transactions don't release keys when suspending, release them now.\n  auto lock_args = GetLockArgs(shard->shard_id());\n  GetDbSlice(shard->shard_id()).Release(LockMode(), lock_args);\n\n  auto& sd = shard_data_[SidToId(shard->shard_id())];\n  sd.local_mask &= ~KEYLOCK_ACQUIRED;\n\n  namespace_->GetBlockingController(shard->shard_id())->RemovedWatched(keys, this);\n  DCHECK(!namespace_->GetBlockingController(shard->shard_id())\n              ->awakened_transactions()\n              .contains(this));\n\n  // Unblock the caller with FinishHop.\n  FinishHop();\n\n  // And then poll execution to continue processing the queued transactions.\n  shard->PollExecution(\"unwatchcb\", nullptr);\n}\n\nDbSlice& Transaction::GetDbSlice(ShardId shard_id) const {\n  CHECK(namespace_ != nullptr);\n  return namespace_->GetDbSlice(shard_id);\n}\n\nOpStatus Transaction::RunSquashedMultiCb(RunnableType cb) {\n  DCHECK(multi_ && multi_->role == SQUASHED_STUB);\n  DCHECK_EQ(unique_shard_cnt_, 1u);\n\n  auto* shard = EngineShard::tlocal();\n  auto& db_slice = GetDbSlice(shard->shard_id());\n\n  auto result = cb(this, shard);\n  db_slice.OnCbFinishBlocking();\n\n  LogAutoJournalOnShard(shard, result);\n  MaybeInvokeTrackingCb();\n\n  DCHECK_EQ(result.flags, 0);  // if it's sophisticated, we shouldn't squash it\n  return result;\n}\n\nvoid Transaction::UnlockMultiShardCb(absl::Span<const LockFp> fps, EngineShard* shard) {\n  DCHECK(multi_ && multi_->lock_mode);\n\n  if (multi_->mode == GLOBAL) {\n    shard->shard_lock()->Release(IntentLock::EXCLUSIVE);\n  } else {\n    GetDbSlice(shard->shard_id()).Release(*multi_->lock_mode, KeyLockArgs{db_index_, fps});\n  }\n\n  ShardId sid = shard->shard_id();\n  auto& sd = shard_data_[SidToId(sid)];\n\n  // It does not have to be that all shards in multi transaction execute this tx.\n  // Hence it could stay in the tx queue. We perform the necessary cleanup and remove it from\n  // there. The transaction is not guaranteed to be at front.\n  if (sd.pq_pos != TxQueue::kEnd) {\n    DVLOG(1) << \"unlockmulti: TxRemove \" << DebugId();\n\n    TxQueue* txq = shard->txq();\n    DCHECK(!txq->Empty());\n    DCHECK_EQ(absl::get<Transaction*>(txq->At(sd.pq_pos)), this);\n\n    txq->Remove(sd.pq_pos);\n    sd.pq_pos = TxQueue::kEnd;\n  }\n\n  shard->FinalizeMulti(this);\n}\n\nbool Transaction::IsGlobal() const {\n  // Please note that a transaction can be non-global even if multi_->mode == GLOBAL.\n  // It happens when a transaction is squashed and switches to execute differrent commands.\n  return global_;\n}\n\n// Runs only in the shard thread.\n// Returns true if the transacton has changed its state from suspended to awakened,\n// false, otherwise.\nbool Transaction::NotifySuspended(ShardId sid, string_view key) {\n  // Wake a transaction only once on the first notify.\n  // We don't care about preserving the strict order with multiple operations running on blocking\n  // keys in parallel, because the internal order is not observable from outside either way.\n  if (!blocking_barrier_.TryClaim())\n    return false;\n\n  auto& sd = shard_data_[SidToId(sid)];\n\n  DVLOG(1) << \"NotifySuspended \" << DebugId() << \", local_mask:\" << sd.local_mask;\n\n  // We're the first and only to wake this transaction, expect the shard to be suspended.\n  CHECK(sd.local_mask & WAS_SUSPENDED);\n\n  // We wake at most once.\n  CHECK_EQ(sd.local_mask & AWAKED_Q, 0);\n\n  // Find index of awakened key\n  ShardArgs args = GetShardArgs(sid);\n  auto it = find_if(args.cbegin(), args.cend(), [key](string_view arg) { return arg == key; });\n  CHECK(it != args.cend());\n\n  // Change state to awaked and store index of awakened key\n  sd.local_mask |= AWAKED_Q;\n  sd.wake_key_pos = it.index();\n\n  blocking_barrier_.Close();\n  return true;\n}\n\noptional<string_view> Transaction::GetWakeKey(ShardId sid) const {\n  auto& sd = shard_data_[SidToId(sid)];\n  if ((sd.local_mask & AWAKED_Q) == 0)\n    return nullopt;\n\n  CHECK_LT(sd.wake_key_pos, full_args_.size());\n  return ArgS(full_args_, sd.wake_key_pos);\n}\n\nvoid Transaction::LogAutoJournalOnShard(EngineShard* shard, RunnableResult result) {\n  // TODO: For now, we ignore non shard coordination.\n  if (shard == nullptr)\n    return;\n\n  // Ignore technical squasher hops.\n  if (multi_ && multi_->role == SQUASHER)\n    return;\n\n  // Only write commands and/or no-key-transactional commands are logged\n  if (!cid_->IsJournaled() && (cid_->opt_mask() & CO::NO_KEY_TRANSACTIONAL) == 0)\n    return;\n\n  if (!shard->journal())\n    return;\n\n  if (result.status != OpStatus::OK) {\n    return;  // Do not log to journal if command execution failed.\n  }\n\n  // If autojournaling was disabled and not re-enabled the callback is writing to journal.\n  if ((cid_->opt_mask() & CO::NO_AUTOJOURNAL) && !re_enabled_auto_journal_) {\n    return;\n  }\n\n  journal::Entry::Payload entry_payload;\n  string_view cmd{cid_->name()};\n  if (unique_shard_cnt_ == 1 || args_slices_.empty()) {\n    entry_payload = journal::Entry::Payload(cmd, full_args_);\n  } else {\n    ShardArgs shard_args = GetShardArgs(shard->shard_id());\n    entry_payload = journal::Entry::Payload(cmd, shard_args);\n  }\n  // Record to journal autojournal commands, here we allow await which anables writing to sync\n  // the journal change.\n  LogJournalOnShard(std::move(entry_payload));\n}\n\nvoid Transaction::LogJournalOnShard(journal::Entry::Payload&& payload) const {\n  journal::RecordEntry(txid_, journal::Op::COMMAND, db_index_,\n                       unique_slot_checker_.GetUniqueSlotId(), std::move(payload));\n}\n\nvoid Transaction::ReviveAutoJournal() {\n  DCHECK(cid_->opt_mask() & CO::NO_AUTOJOURNAL);\n  DCHECK_EQ(run_barrier_.DEBUG_Count(), 0u);  // Can't be changed while dispatching\n  re_enabled_auto_journal_ = true;\n}\n\nvoid Transaction::CancelBlocking(const std::function<OpStatus(ArgSlice)>& status_cb) {\n  // We're on the owning thread of this transaction, so we can safely access it's data below.\n  // First, check if it makes sense to proceed.\n  if (blocking_barrier_.IsClaimed() || cid_ == nullptr || (cid_->opt_mask() & CO::BLOCKING) == 0)\n    return;\n\n  OpStatus status = OpStatus::CANCELLED;\n  if (status_cb) {\n    vector<string_view> all_keys;\n    IterateActiveShards([this, &all_keys](PerShardData&, auto i) {\n      auto shard_keys = GetShardArgs(i);\n      all_keys.insert(all_keys.end(), shard_keys.begin(), shard_keys.end());\n    });\n    status = status_cb(absl::MakeSpan(all_keys));\n  }\n\n  if (status == OpStatus::OK)\n    return;\n\n  // Check if someone else is about to wake us up\n  if (!blocking_barrier_.TryClaim())\n    return;\n\n  coordinator_state_ |= COORD_CANCELLED;\n  // don't use local_result_ because it can be overwirtten if we cancel ahead\n  block_cancel_result_ = status;\n  blocking_barrier_.Close();\n}\n\nbool Transaction::CanRunInlined() const {\n  auto* ss = ServerState::tlocal();\n  auto* es = EngineShard::tlocal();\n  if (unique_shard_cnt_ == 1 && unique_shard_id_ == ss->thread_index() &&\n      ss->AllowInlineScheduling() && !GetDbSlice(es->shard_id()).HasRegisteredCallbacks()) {\n    ss->stats.tx_inline_runs++;\n    return true;\n  }\n  return false;\n}\n\nOpResult<KeyIndex> DetermineKeys(const CommandId* cid, CmdArgList args) {\n  if (cid->opt_mask() & (CO::GLOBAL_TRANS | CO::NO_KEY_TRANSACTIONAL))\n    return KeyIndex{};\n\n  int num_custom_keys = -1;\n\n  unsigned start = 0, end = 0, step = 0;\n  std::optional<unsigned> bonus = std::nullopt;\n\n  if (cid->opt_mask() & CO::VARIADIC_KEYS) {  // number of keys is not trivially deducable\n    // ZUNION/INTER <num_keys> <key1> [<key2> ...]\n    // EVAL <script> <num_keys>\n    // XREAD ... STREAMS ...\n    if (args.size() < 2)\n      return OpStatus::SYNTAX_ERR;\n\n    string_view name{cid->name()};\n\n    // Determine based on STREAMS argument position\n    if (name == \"XREAD\" || name == \"XREADGROUP\") {\n      for (size_t i = 0; i < args.size(); ++i) {\n        string_view arg = ArgS(args, i);\n        if (absl::EqualsIgnoreCase(arg, \"STREAMS\")) {\n          size_t left = args.size() - i - 1;\n          return KeyIndex(i + 1, i + 1 + (left / 2));\n        }\n      }\n      return OpStatus::SYNTAX_ERR;\n    }\n\n    if (absl::EndsWith(name, \"STORE\") || name == \"CMS.MERGE\")\n      bonus = 0;  // Z<xxx>STORE and CMS.MERGE <dest> commands\n\n    unsigned num_keys_index;\n    if (absl::StartsWith(name, \"EVAL\") || name == \"BLMPOP\" || name == \"BZMPOP\")\n      num_keys_index = 1;\n    else\n      num_keys_index = bonus ? *bonus + 1 : 0;\n\n    string_view num = ArgS(args, num_keys_index);\n    if (!absl::SimpleAtoi(num, &num_custom_keys) || num_custom_keys < 0)\n      return OpStatus::INVALID_INT;\n\n    if (num_custom_keys == 0 &&\n        (absl::StartsWith(name, \"ZDIFF\") || absl::StartsWith(name, \"ZUNION\") ||\n         absl::StartsWith(name, \"ZINTER\") || absl::EndsWith(name, \"MPOP\"))) {\n      return OpStatus::AT_LEAST_ONE_KEY;\n    }\n\n    if (args.size() < size_t(num_custom_keys) + num_keys_index + 1)\n      return OpStatus::SYNTAX_ERR;\n  }\n\n  if (cid->first_key_pos() > 0) {\n    start = cid->first_key_pos() - 1;\n    int8_t last = cid->last_key_pos();\n\n    if (num_custom_keys >= 0) {\n      end = start + num_custom_keys;\n    } else {\n      end = last > 0 ? last : (int(args.size()) + last + 1);\n    }\n    if (cid->interleaved_step()) {\n      step = cid->interleaved_step();\n    } else {\n      step = 1;\n    }\n\n    if (cid->opt_mask() & CO::STORE_LAST_KEY) {\n      string_view name{cid->name()};\n\n      if ((name == \"GEORADIUSBYMEMBER\" && args.size() >= 5) ||\n          (name == \"GEORADIUS\" && args.size() >= 6)) {\n        // key member radius .. STORE destkey\n        string_view opt = ArgS(args, args.size() - 2);\n        if (absl::EqualsIgnoreCase(opt, \"STORE\") || absl::EqualsIgnoreCase(opt, \"STOREDIST\")) {\n          bonus = args.size() - 1;\n        }\n      }\n\n      if (name == \"SORT\") {\n        if (args.size() >= 3) {\n          // SORT key ... STORE destkey\n          string_view opt = ArgS(args, args.size() - 2);\n          if (absl::EqualsIgnoreCase(opt, \"STORE\")) {\n            bonus = args.size() - 1;\n          }\n        }\n      }\n    }\n\n    return KeyIndex{start, end, step, bonus};\n  }\n\n  LOG(FATAL) << \"TBD: Not supported \" << cid->name();\n  return {};\n}\n\nstd::vector<Transaction::PerShardCache>& Transaction::TLTmpSpace::GetShardIndex(unsigned size) {\n  shard_cache.resize(size);\n  for (auto& v : shard_cache)\n    v.Clear();\n  return shard_cache;\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/transaction.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/base/internal/spinlock.h>\n#include <absl/container/flat_hash_map.h>\n#include <absl/container/flat_hash_set.h>\n#include <absl/container/inlined_vector.h>\n#include <absl/functional/function_ref.h>\n\n#include <atomic>\n// #include <boost/smart_ptr/intrusive_ptr.hpp>\n#include <string_view>\n#include <variant>\n#include <vector>\n\n#include \"core/intent_lock.h\"\n#include \"core/tx_queue.h\"\n#include \"facade/op_status.h\"\n#include \"server/cluster_support.h\"\n#include \"server/common.h\"\n#include \"server/journal/types.h\"\n#include \"server/tx_base.h\"\n#include \"util/fibers/synchronization.h\"\n\nnamespace dfly {\n\nclass BlockingController;\n\nusing facade::OpResult;\nusing facade::OpStatus;\n\n// Central building block of the transactional framework.\n//\n// Use it to run callbacks on the shard threads - such dispatches are called hops.\n//\n// Callbacks are not allowed to keep any possibly dangling pointers to data within the shards - it\n// must be copied explicitly. The callbacks running on different threads should also never pass any\n// messages or wait for each other, as it would block the execution of other transactions.\n//\n// The shards to run on are determined by the keys of the underlying command.\n// Global transactions run on all shards.\n//\n// 1. Multi transactions\n//\n// Multi transactions are handled by a single transaction, which exposes the same interface for\n// commands as regular transactions, but internally avoids rescheduling. There are multiple modes in\n// which a mutli-transaction can run, those are documented in the MultiMode enum.\n//\n// The flow of EXEC and EVAL is as follows:\n//\n// ```\n// trans->StartMulti_MultiMode_()\n// for ([cmd, args]) {\n//   trans->MultiSwitchCmd(cmd)  // 1. Set new command\n//   trans->InitByArgs(args)     // 2. Re-initialize with arguments\n//   cmd->Invoke(trans)          // 3. Run\n// }\n// trans->UnlockMulti()\n// ```\n//\n// 2. Multi squashing\n//\n// An important optimization for multi transactions is executing multiple single shard commands in\n// parallel. Because multiple commands are \"squashed\" into a single hop, its called multi squashing.\n// To mock the interface for commands, special \"stub\" transactions are created for each shard that\n// directly execute hop callbacks without any scheduling. Transaction roles are represented by the\n// MultiRole enum. See MultiCommandSquasher for the detailed squashing approach.\n//\n// The flow is as follows:\n//\n// ```\n// for (cmd in single_shard_sequence)\n//   sharded[shard].push_back(cmd)\n//\n// tx->PrepareSquashedMultiHop()\n// tx->ScheduleSingleHop({\n//   Transaction stub_tx {tx}\n//   for (cmd)\n//     // use stub_tx as regular multi tx, see 1. above\n// })\n//\n// ```\nclass Transaction {\n  friend class BlockingController;\n\n  Transaction(const Transaction&);\n  void operator=(const Transaction&) = delete;\n\n  ~Transaction();  // Transactions are reference counted with intrusive_ptr.\n\n  friend void intrusive_ptr_add_ref(Transaction* trans) noexcept {\n    trans->use_count_.fetch_add(1, std::memory_order_relaxed);\n  }\n\n  friend void intrusive_ptr_release(Transaction* trans) noexcept {\n    if (1 == trans->use_count_.fetch_sub(1, std::memory_order_release)) {\n      std::atomic_thread_fence(std::memory_order_acquire);\n      delete trans;\n    }\n  }\n\n public:\n  // Result returned by callbacks. Most should use the implicit conversion from OpStatus.\n  struct RunnableResult {\n    enum Flag : uint16_t {\n      // Can be issued by a **single** shard callback to avoid concluding, i.e. perform one more hop\n      // even if not requested ahead. Used for blocking command fallback.\n      AVOID_CONCLUDING = 1,\n    };\n\n    RunnableResult(OpStatus status = OpStatus::OK, uint16_t flags = 0)\n        : status(status), flags(flags) {\n    }\n\n    operator OpStatus() const {\n      return status;\n    }\n\n    OpStatus status;\n    uint16_t flags;\n  };\n\n  static_assert(sizeof(RunnableResult) == 4);\n\n  using time_point = ::std::chrono::steady_clock::time_point;\n  // Runnable that is run on shards during hop executions (often named callback).\n  // Callacks should return `OpStatus` which is implicitly converitble to `RunnableResult`!\n  using RunnableType = absl::FunctionRef<RunnableResult(Transaction* t, EngineShard*)>;\n\n  static constexpr std::nullopt_t kShardArgs{std::nullopt};\n  // Provides an override to watch a specific key or kShardArgs to watch all keys in the shard.\n  using WaitKeys = std::optional<std::string_view>;\n\n  // Modes in which a multi transaction can run.\n  enum MultiMode : uint8_t {\n    // Invalid state.\n    NOT_DETERMINED = 0,\n    // Global transaction.\n    GLOBAL = 1,\n    // Keys are locked ahead during Schedule.\n    LOCK_AHEAD = 2,\n    // Each command is executed separately. Equivalent to a pipeline.\n    NON_ATOMIC = 3,\n  };\n\n  // Squashed parallel execution requires a separate transaction for each shard. Those \"stubs\"\n  // perform no scheduling or real hops, but instead execute the handlers directly inline.\n  enum MultiRole {\n    DEFAULT = 0,        // Regular multi transaction\n    SQUASHER = 1,       // Owner of stub transactions\n    SQUASHED_STUB = 2,  // Stub transaction\n  };\n\n  // State on specific shard.\n  enum LocalMask : uint16_t {\n    ACTIVE = 1,  // Whether its active on this shard (to schedule or execute hops)\n    OPTIMISTIC_EXECUTION = 1 << 1,  // Whether the shard executed optimistically (during schedule)\n    // Whether it can run out of order. Undefined if KEYLOCK_ACQUIRED isn't set\n    OUT_OF_ORDER = 1 << 2,\n    // Whether its key locks are acquired, never set for global commands.\n    KEYLOCK_ACQUIRED = 1 << 3,\n\n    // Whether it was suspended (by WatchInShard()). This flag is sticky and stays forever once set.\n    WAS_SUSPENDED = 1 << 4,\n    AWAKED_Q = 1 << 5,  // Whether it was awakened (by NotifySuspended())\n  };\n\n  struct Guard {\n    explicit Guard(Transaction* tx);\n    ~Guard();\n\n   private:\n    Transaction* tx;\n  };\n\n  static void Init(unsigned num_shards);\n  static void Shutdown();\n\n  explicit Transaction(const CommandId* cid);\n\n  // Initialize transaction for squashing placed on a specific shard with a given parent tx\n  explicit Transaction(const Transaction* parent, ShardId shard_id, std::optional<SlotId> slot_id);\n\n  // Initialize from command (args) on specific db.\n  OpStatus InitByArgs(Namespace* ns, DbIndex index, CmdArgList args);\n\n  // Get command arguments for specific shard. Called from shard thread.\n  ShardArgs GetShardArgs(ShardId sid) const;\n\n  // Execute transaction hop. If conclude is true, it is removed from the pending queue.\n  void Execute(RunnableType cb, bool conclude);\n\n  // Execute single hop and conclude.\n  // Callback should return OK for multi key invocations, otherwise return value is ill-defined.\n  OpStatus ScheduleSingleHop(RunnableType cb);\n\n  // Experimental command. Dispatch single hop and return,\n  // use Blocker() primitive to wait for it to finish\n  void SingleHopAsync(RunnableType cb);\n\n  // Execute single hop with return value and conclude.\n  // Can be used only for single key invocations, because it writes a into shared variable.\n  template <typename F> auto ScheduleSingleHopT(F&& f) -> decltype(f(this, nullptr));\n\n  // Conclude transaction. Ignored if not scheduled\n  void Conclude();\n\n  // Called by engine shard to execute a transaction hop.\n  // Returns true if the transaction concludes.\n  bool RunInShard(EngineShard* shard, bool allow_q_removal);\n\n  // Registers transaction into watched queue and blocks until a) either notification is received.\n  // or b) tp is reached. If tp is time_point::max() then waits indefinitely.\n  // Expects that the transaction had been scheduled before, and uses Execute(.., true) to register.\n  // Returns false if timeout occurred, true if was notified by one of the keys.\n  facade::OpStatus WaitOnWatch(const time_point& tp, WaitKeys keys, KeyReadyChecker krc,\n                               bool* block_flag, bool* pause_flag);\n\n  // Returns true if transaction is awaked, false if it's timed-out and can be removed from the\n  // blocking queue.\n  bool NotifySuspended(ShardId sid, std::string_view key);\n\n  // Cancel all blocking watches. Set COORD_CANCELLED.\n  // Must be called from coordinator thread.\n  void CancelBlocking(const std::function<OpStatus(ArgSlice)>&);\n\n  // Prepare a squashed hop on given shards.\n  // Only compatible with multi modes that acquire all locks ahead - global and lock_ahead.\n  void PrepareSquashedMultiHop(const CommandId* cid, absl::FunctionRef<bool(ShardId)> enabled);\n\n  // Prepare transaction to do a single ScheduleSingleHop() for squashing\n  void PrepareSingleSquash(Namespace* ns, ShardId sid, DbIndex db, CmdArgList keys, MultiMode mode);\n\n  // Start multi in GLOBAL mode.\n  void StartMultiGlobal(Namespace* ns, DbIndex dbid);\n\n  // Start multi in LOCK_AHEAD mode with given keys.\n  void StartMultiLockedAhead(Namespace* ns, DbIndex dbid, CmdArgList keys,\n                             bool skip_scheduling = false);\n\n  // Start multi in NON_ATOMIC mode.\n  void StartMultiNonAtomic();\n\n  // Unlock key locks of a multi transaction.\n  // If block is set, wait for unlock to finish.\n  void UnlockMulti(bool block = false);\n\n  // Set new command for multi transaction.\n  void MultiSwitchCmd(const CommandId* cid);\n\n  // Copy txid, time and unique slot from parent\n  void MultiUpdateWithParent(const Transaction* parent);\n\n  // Set squasher role\n  void MultiBecomeSquasher();\n\n  // Returns locking arguments needed for DbSlice to Acquire/Release transactional locks.\n  // Runs in the shard thread.\n  KeyLockArgs GetLockArgs(ShardId sid) const;\n\n  // If the transaction is armed, disarm it and return the local mask (ACTIVE is always set).\n  // Otherwise 0 is returned. Sync point (acquire).\n  uint16_t DisarmInShard(ShardId sid);\n\n  // Same as DisarmInShard, but the transaction is only disarmed if any of the req_flags is present.\n  // If the transaction is armed, returns the local mask and a flag whether it was disarmed.\n  std::pair<uint16_t, bool /* disarmed */> DisarmInShardWhen(ShardId sid, uint16_t req_flags);\n\n  // Returns if the transaction spans this shard. Safe only when the transaction is armed.\n  bool IsActive(ShardId sid) const;\n\n  // If blocking tx was woken up on this shard, get wake key.\n  std::optional<std::string_view> GetWakeKey(ShardId sid) const;\n\n  // Get OpArgs for specific shard\n  OpArgs GetOpArgs(EngineShard* shard) const;\n\n  TxId txid() const {\n    return txid_;\n  }\n\n  IntentLock::Mode LockMode() const;  // Based on command mask\n\n  std::string_view Name() const;  // Based on command name\n\n  uint32_t GetUniqueShardCnt() const {\n    return unique_shard_cnt_;\n  }\n\n  // This method is meaningless if GetUniqueShardCnt() != 1.\n  ShardId GetUniqueShard() const;\n\n  std::optional<SlotId> GetUniqueSlotId() const;\n\n  bool IsMulti() const {\n    return bool(multi_);\n  }\n\n  bool IsScheduled() const {\n    return coordinator_state_ & COORD_SCHED;\n  }\n\n  MultiMode GetMultiMode() const {\n    return multi_->mode;\n  }\n\n  util::fb2::EmbeddedBlockingCounter* Blocker() {\n    return &run_barrier_;\n  }\n\n  // Temporary\n  OpStatus* LocalResultPtr() {\n    return &local_result_;\n  }\n\n  // Whether the transaction is multi and runs in an atomic mode.\n  // This, instead of just IsMulti(), should be used to check for the possibility of\n  // different optimizations, because they can safely be applied to non-atomic multi\n  // transactions as well.\n  bool IsAtomicMulti() const {\n    return multi_ && (multi_->mode == LOCK_AHEAD || multi_->mode == GLOBAL);\n  }\n\n  bool IsGlobal() const;\n\n  DbContext GetDbContext() const {\n    return DbContext{namespace_, db_index_, time_now_ms_};\n  }\n\n  Namespace& GetNamespace() const {\n    return *namespace_;\n  }\n\n  DbSlice& GetDbSlice(ShardId sid) const;\n\n  DbIndex GetDbIndex() const {\n    return db_index_;\n  }\n\n  const CommandId* GetCId() const {\n    return cid_;\n  }\n\n  // Return debug information about a transaction, include shard local info if passed\n  std::string DebugId(std::optional<ShardId> sid = std::nullopt) const;\n\n  // Write a journal entry to a shard journal with the given payload.\n  void LogJournalOnShard(journal::Entry::Payload&& payload) const;\n\n  // Re-enable auto journal for commands marked as NO_AUTOJOURNAL. Call during setup.\n  void ReviveAutoJournal();\n\n  // Clear all state to make transaction re-usable\n  void Refurbish();\n\n  // Get keys multi transaction was initialized with, normalized and unique\n  const absl::flat_hash_set<std::pair<ShardId, LockFp>>& GetMultiFps() const;\n\n  bool IsSquashedStub() const {\n    return multi_ && multi_->role == SQUASHED_STUB;\n  }\n\n  uint32_t DEBUG_GetTxqPosInShard(ShardId sid) const {\n    return shard_data_[SidToId(sid)].pq_pos;\n  }\n\n  bool DEBUG_IsArmedInShard(ShardId sid) const {\n    return shard_data_[SidToId(sid)].is_armed.load(std::memory_order_relaxed);\n  }\n\n  uint16_t DEBUG_GetLocalMask(ShardId sid) const {\n    return shard_data_[SidToId(sid)].local_mask;\n  }\n\n  void SetTrackingCallback(std::function<void(Transaction* trans)> f) {\n    tracking_cb_ = std::move(f);\n  }\n\n  void MaybeInvokeTrackingCb() {\n    if (tracking_cb_) {\n      tracking_cb_(this);\n    }\n  }\n\n  // Remove once BZPOP is stabilized\n  std::string DEBUGV18_BlockInfo() {\n    return \"claimed=\" + std::to_string(blocking_barrier_.IsClaimed()) +\n           \" coord_state=\" + std::to_string(int(coordinator_state_)) +\n           \" local_res=\" + std::to_string(int(local_result_));\n  }\n\n private:\n  struct alignas(64) PerShardData {\n    PerShardData() {\n    }\n    PerShardData(PerShardData&& other) noexcept {\n    }\n\n    // State of shard - bitmask with LocalState flags\n    uint16_t local_mask = 0;\n\n    // Set when the shard is prepared for another hop. Sync point. Cleared when execution starts.\n    std::atomic_bool is_armed = false;\n\n    uint32_t slice_start = 0;  // Subspan in kv_args_ with local arguments.\n    uint32_t slice_count = 0;\n\n    // span into kv_fp_\n    uint32_t fp_start = 0;\n    uint32_t fp_count = 0;\n\n    // Position in the tx queue. OOO or cancelled schedules remove themselves by this index.\n    TxQueue::Iterator pq_pos = TxQueue::kEnd;\n\n    // Index of key relative to args in shard that the shard was woken up after blocking wait.\n    uint32_t wake_key_pos = UINT32_MAX;\n\n    // Irrational stats purely for debugging purposes.\n    struct Stats {\n      unsigned total_runs = 0;  // total number of runs\n    } stats;\n\n    // Prevent \"false sharing\" between cache lines: occupy a full cache line (64 bytes)\n    char pad[64 - 7 * sizeof(uint32_t) - sizeof(Stats)];\n  };\n\n  static_assert(sizeof(PerShardData) == 64);  // cacheline\n\n  // State of a multi transaction.\n  struct MultiData {\n    MultiRole role;\n    MultiMode mode;\n    std::optional<IntentLock::Mode> lock_mode;\n\n    // Unique normalized fingerprints used for scheduling the multi transaction.\n    absl::flat_hash_set<std::pair<ShardId, LockFp>> tag_fps;\n\n    // Set if the multi command is concluding to avoid ambiguity with COORD_CONCLUDING\n    bool concluding = false;\n\n    unsigned cmd_seq_num = 0;  // used for debugging purposes.\n  };\n\n  enum CoordinatorState : uint8_t {\n    COORD_SCHED = 1,\n    COORD_CONCLUDING = 1 << 1,  // Whether its the last hop of a transaction\n    COORD_CANCELLED = 1 << 2,\n  };\n\n  // Auxiliary structure used during initialization\n  struct PerShardCache {\n    std::vector<IndexSlice> slices;\n    unsigned key_step = 1;\n\n    void Clear() {\n      slices.clear();\n    }\n  };\n\n  // \"Single claim - single modification\" barrier. Multiple threads might try to claim it, only one\n  // will succeed and will be allowed to modify the guarded object until it closes the barrier.\n  // A closed barrier can't be claimed again or re-used in any way.\n  class BatonBarrier {\n   public:\n    bool IsClaimed() const;  // Return if barrier is claimed, only for peeking\n    bool TryClaim();         // Return if the barrier was claimed successfully\n    void Close();            // Close barrier after it was claimed\n\n    // Wait for barrier until time_point, or indefinitely if time_point::max() was passed.\n    // After Wait returns, the barrier is guaranteed to be closed, including expiration.\n    std::cv_status Wait(time_point);\n\n   private:\n    std::atomic_bool claimed_{false};\n    std::atomic_bool closed_{false};\n    util::fb2::EventCount ec_{};\n  };\n\n  // Init basic fields and reset re-usable.\n  void InitBase(Namespace* ns, DbIndex dbid, CmdArgList args);\n\n  // Init as a global transaction.\n  void InitGlobal();\n\n  // Init with a set of keys.\n  void InitByKeys(const KeyIndex& keys);\n\n  void EnableShard(ShardId sid);\n  void EnableAllShards();\n\n  // Build shard index by distributing the arguments by shards based on the key index.\n  void BuildShardIndex(const KeyIndex& keys, std::vector<PerShardCache>* out);\n\n  // Init shard data from shard index.\n  void InitShardData(absl::Span<const PerShardCache> shard_index, size_t num_args);\n\n  // Store all key index keys in args_. Used only for single shard initialization.\n  void StoreKeysInArgs(const KeyIndex& key_index);\n\n  // Multi transactions unlock asynchronously, so they need to keep fingerprints of keys.\n  void PrepareMultiFps(CmdArgList keys);\n\n  void ScheduleInternal();\n\n  // Schedule on shards transaction queue. Returns true if scheduled successfully,\n  // false if inconsistent order was detected and the schedule needs to be cancelled.\n  // if execute_optimistic is true - means we can try executing during the scheduling,\n  // subject to uncontended keys.\n  bool ScheduleInShard(EngineShard* shard, bool execute_optimistic);\n\n  // Optimized extension of ScheduleInShard. Pulls several transactions queued for scheduling.\n  static void ScheduleBatchInShard();\n\n  // Set ARMED flags, start run barrier and submit poll tasks. Doesn't wait for the run barrier\n  void DispatchHop();\n\n  // Finish hop, decrement run barrier\n  void FinishHop();\n\n  // Run actual callback on shard, store result if single shard or OOM was catched\n  void RunCallback(EngineShard* shard);\n\n  // Adds itself to watched queue in the shard. Must run in that shard thread.\n  void WatchInShard(Namespace* ns, ShardArgs keys, EngineShard* shard, KeyReadyChecker krc);\n\n  // Expire blocking transaction, unlock keys and unregister it from the blocking controller\n  void ExpireBlocking(WaitKeys keys);\n\n  void ExpireShardCb(ShardArgs keys, EngineShard* shard);\n\n  // Returns true if we need to follow up with PollExecution on this shard.\n  bool CancelShardCb(EngineShard* shard);\n\n  // Run callback inline as part of multi stub.\n  OpStatus RunSquashedMultiCb(RunnableType cb);\n\n  // Set time_now_ms_\n  void InitTxTime();\n\n  void UnlockMultiShardCb(absl::Span<const LockFp> fps, EngineShard* shard);\n\n  // Log command in shard's journal, if this is a write command with auto-journaling enabled.\n  // Should be called immediately after the last hop.\n  void LogAutoJournalOnShard(EngineShard* shard, RunnableResult shard_result);\n\n  // Whether the callback can be run directly on this thread without dispatching on the shard queue\n  bool CanRunInlined() const;\n\n  uint32_t GetUseCount() const {\n    return use_count_.load(std::memory_order_relaxed);\n  }\n\n  bool IsActiveMulti() const {\n    return multi_ && multi_->role != SQUASHED_STUB;\n  }\n\n  unsigned SidToId(ShardId sid) const {\n    return sid < shard_data_.size() ? sid : 0;\n  }\n\n  // Iterate over all available shards, run functor accepting (PerShardData&, ShardId)\n  template <typename F> void IterateShards(F&& f) {\n    if (unique_shard_cnt_ == 1) {\n      f(shard_data_[SidToId(unique_shard_id_)], unique_shard_id_);\n    } else {\n      for (ShardId i = 0; i < shard_data_.size(); ++i) {\n        f(shard_data_[i], i);\n      }\n    }\n  }\n\n  // Iterate over ACTIVE shards, run functor accepting (PerShardData&, ShardId)\n  template <typename F> void IterateActiveShards(F&& f) {\n    IterateShards([&f](auto& sd, auto i) {\n      if (sd.local_mask & ACTIVE)\n        f(sd, i);\n    });\n  }\n\n  // Used for waiting for all hop callbacks to run.\n  util::fb2::EmbeddedBlockingCounter run_barrier_{0};\n\n  // Stores per-shard data: state flags and keys. Index only with SidToId(shard index)!\n  // Theoretically, same size as number of shards, but contains only a single element for\n  // single shard non-multi transactions (optimization).\n  // TODO: explore dense packing\n  absl::InlinedVector<PerShardData, 4> shard_data_;\n\n  // Stores slices of key/values partitioned by shards.\n  // Slices reference full_args_.\n  // We need values as well since we reorder keys, and we need to know what value corresponds\n  // to what key.\n  absl::InlinedVector<IndexSlice, 4> args_slices_;\n\n  // Fingerprints of keys, precomputed once during the transaction initialization.\n  absl::InlinedVector<LockFp, 4> kv_fp_;\n\n  // Stores the full undivided command.\n  CmdArgList full_args_;\n\n  // Set if a NO_AUTOJOURNAL command asked to enable auto journal again\n  bool re_enabled_auto_journal_ = false;\n\n  std::optional<RunnableType> cb_ptr_;  // Run on shard threads\n  const CommandId* cid_ = nullptr;      // Underlying command\n  std::unique_ptr<MultiData> multi_;    // Initialized when the transaction is multi/exec.\n\n  TxId txid_{0};\n  bool global_{false};\n  Namespace* namespace_{nullptr};\n  DbIndex db_index_{0};\n  uint64_t time_now_ms_{0};\n\n  std::atomic_uint32_t use_count_{0};  // transaction exists only as an intrusive_ptr\n\n  uint32_t unique_shard_cnt_{0};          // Number of unique shards active\n  ShardId unique_shard_id_{kInvalidSid};  // Set if unique_shard_cnt_ = 1\n  UniqueSlotChecker unique_slot_checker_;\n\n  // Barrier for waking blocking transactions that ensures exclusivity of waking operation.\n  BatonBarrier blocking_barrier_{};\n\n  // Stores status if COORD_CANCELLED was set. Apart from cancelled, it can be moved for cluster\n  // changes\n  OpStatus block_cancel_result_ = OpStatus::OK;\n\n  // Transaction coordinator state, written and read by coordinator thread.\n  uint8_t coordinator_state_ = 0;\n\n  // Result of callbacks. Usually written by single shard only, lock below for multishard oom error\n  OpStatus local_result_ = OpStatus::OK;\n  absl::base_internal::SpinLock local_result_mu_;\n\n  // Stats purely for debugging purposes\n  struct Stats {\n    size_t schedule_attempts = 0;\n    ShardId coordinator_index = 0;\n  } stats_;\n\n  std::function<void(Transaction* trans)> tracking_cb_;\n\n private:\n  struct TLTmpSpace {\n    std::vector<PerShardCache>& GetShardIndex(unsigned size);\n\n   private:\n    std::vector<PerShardCache> shard_cache;\n  };\n\n  static thread_local TLTmpSpace tmp_space;\n};\n\ntemplate <typename F> auto Transaction::ScheduleSingleHopT(F&& f) -> decltype(f(this, nullptr)) {\n  decltype(f(this, nullptr)) res;\n\n  ScheduleSingleHop([&res, f = std::forward<F>(f)](Transaction* t, EngineShard* shard) {\n    res = f(t, shard);\n    return res.status();\n  });\n  return res;\n}\n\nOpResult<KeyIndex> DetermineKeys(const CommandId* cid, CmdArgList args);\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/tx_base.cc",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/tx_base.h\"\n\n#include <xxhash.h>\n\n#include \"base/logging.h\"\n#include \"facade/facade_types.h\"\n#include \"server/cluster/cluster_defs.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/journal/journal.h\"\n#include \"server/namespaces.h\"\n#include \"server/transaction.h\"\n\nnamespace dfly {\n\nusing namespace std;\nusing Payload = journal::Entry::Payload;\n\nunsigned KeyIndex::operator*() const {\n  if (bonus)\n    return *bonus;\n  return start;\n}\n\nKeyIndex& KeyIndex::operator++() {\n  if (bonus)\n    bonus.reset();\n  else\n    start = std::min(end, start + step);\n  return *this;\n}\n\nbool KeyIndex::operator!=(const KeyIndex& ki) const {\n  return std::tie(start, end, step, bonus) != std::tie(ki.start, ki.end, ki.step, ki.bonus);\n}\n\nDbSlice& DbContext::GetDbSlice(ShardId shard_id) const {\n  return ns->GetDbSlice(shard_id);\n}\n\nDbSlice& OpArgs::GetDbSlice() const {\n  return db_cntx.GetDbSlice(shard->shard_id());\n}\n\nsize_t ShardArgs::Size() const {\n  size_t sz = 0;\n  for (const auto& s : slice_.second)\n    sz += (s.second - s.first);\n  return sz;\n}\n\nvoid RecordJournal(const OpArgs& op_args, string_view cmd, const ShardArgs& args, uint32_t unused) {\n  DCHECK(op_args.tx);\n  VLOG(2) << \"Logging command \" << cmd << \" from txn \" << op_args.tx->txid();\n  op_args.tx->LogJournalOnShard(Payload(cmd, args));\n}\n\nvoid RecordJournal(const OpArgs& op_args, std::string_view cmd, facade::ArgSlice args,\n                   uint32_t unused) {\n  DCHECK(op_args.tx);\n  VLOG(2) << \"Logging command \" << cmd << \" from txn \" << op_args.tx->txid();\n  op_args.tx->LogJournalOnShard(Payload(cmd, args));\n}\n\nvoid RecordDelete(DbIndex dbid, string_view key) {\n  journal::RecordEntry(0, journal::Op::COMMAND, dbid, KeySlot(key), Payload(\"DEL\", ArgSlice{key}));\n}\n\nLockTag::LockTag(std::string_view key) {\n  if (LockTagOptions::instance().enabled)\n    str_ = LockTagOptions::instance().Tag(key);\n  else\n    str_ = key;\n}\n\nLockFp LockTag::Fingerprint() const {\n  return XXH64(str_.data(), str_.size(), 0x1C69B3F74AC4AE35UL);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/tx_base.h",
    "content": "// Copyright 2024, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <absl/types/span.h>\n\n#include <cstdint>\n#include <optional>\n\n#include \"base/iterator.h\"\n#include \"common/arg_range.h\"\n#include \"server/common_types.h\"\n\nnamespace dfly {\n\nusing cmn::ArgSlice;\n\nstruct KeyLockArgs {\n  DbIndex db_index = 0;\n  absl::Span<const LockFp> fps;\n};\n\n// Describes key indices.\nstruct KeyIndex {\n  KeyIndex(unsigned start = 0, unsigned end = 0, unsigned step = 1,\n           std::optional<unsigned> bonus = std::nullopt)\n      : start(start), end(end), step(step), bonus(bonus) {\n  }\n\n  using iterator_category = std::forward_iterator_tag;\n  using value_type = unsigned;\n  using difference_type = std::ptrdiff_t;\n  using pointer = value_type;\n  using reference = value_type;\n\n  unsigned operator*() const;\n  KeyIndex& operator++();\n  bool operator!=(const KeyIndex& ki) const;\n\n  unsigned NumArgs() const {\n    return (end - start) + unsigned(bonus.has_value());\n  }\n\n  auto Range() const {\n    return base::it::Range(*this, KeyIndex{end, end, step, std::nullopt});\n  }\n\n  auto Range(const cmn::ArgSlice& args) const {\n    return base::it::Transform([args](unsigned idx) { return args[idx]; }, Range());\n  }\n\n public:\n  unsigned start, end, step;      // [start, end) with step\n  std::optional<unsigned> bonus;  // destination key, for example for commands that end with STORE\n};\n\nstruct DbContext {\n  Namespace* ns = nullptr;\n  DbIndex db_index = 0;\n  uint64_t time_now_ms = 0;\n\n  // Convenience method.\n  DbSlice& GetDbSlice(ShardId shard_id) const;\n};\n\nstruct OpArgs {\n  EngineShard* shard = nullptr;\n  const Transaction* tx = nullptr;\n  DbContext db_cntx;\n\n  OpArgs() = default;\n\n  OpArgs(EngineShard* s, const Transaction* tx, const DbContext& cntx)\n      : shard(s), tx(tx), db_cntx(cntx) {\n  }\n\n  // Convenience method.\n  DbSlice& GetDbSlice() const;\n};\n\n// A strong type for a lock tag. Helps to disambiguate between keys and the parts of the\n// keys that are used for locking.\nclass LockTag {\n  std::string_view str_;\n\n public:\n  using is_stackonly = void;  // marks that this object does not use heap.\n\n  LockTag() = default;\n  explicit LockTag(std::string_view key);\n\n  explicit operator std::string_view() const {\n    return str_;\n  }\n\n  LockFp Fingerprint() const;\n\n  // To make it hashable.\n  template <typename H> friend H AbslHashValue(H h, const LockTag& tag) {\n    return H::combine(std::move(h), tag.str_);\n  }\n\n  bool operator==(const LockTag& o) const {\n    return str_ == o.str_;\n  }\n};\n\n// Checks whether the touched key is valid for a blocking transaction watching it.\nusing KeyReadyChecker =\n    std::function<bool(EngineShard*, const DbContext& context, Transaction* tx, std::string_view)>;\n\n// References arguments in another array.\nusing IndexSlice = std::pair<uint32_t, uint32_t>;  // [begin, end)\n\n// ShardArgs - hold a span to full arguments and a span of sub-ranges\n// referencing those arguments.\nclass ShardArgs {\n  using ArgsIndexPair = std::pair<cmn::ArgSlice, absl::Span<const IndexSlice>>;\n  ArgsIndexPair slice_;\n\n public:\n  class Iterator {\n    cmn::ArgSlice arglist_;\n    absl::Span<const IndexSlice>::const_iterator index_it_;\n    uint32_t delta_ = 0;\n\n   public:\n    using iterator_category = std::input_iterator_tag;\n    using value_type = std::string_view;\n    using difference_type = ptrdiff_t;\n    using pointer = value_type*;\n    using reference = value_type&;\n\n    // First version, corresponds to spans over arguments.\n    Iterator(cmn::ArgSlice list, absl::Span<const IndexSlice>::const_iterator it)\n        : arglist_(list), index_it_(it) {\n    }\n\n    bool operator==(const Iterator& o) const {\n      return index_it_ == o.index_it_ && delta_ == o.delta_ && arglist_.data() == o.arglist_.data();\n    }\n\n    bool operator!=(const Iterator& o) const {\n      return !(*this == o);\n    }\n\n    std::string_view operator*() const {\n      return arglist_[index()];\n    }\n\n    Iterator& operator++() {\n      ++delta_;\n      if (index() >= index_it_->second) {\n        ++index_it_;\n        ++delta_ = 0;\n      }\n      return *this;\n    }\n\n    Iterator operator++(int) {\n      Iterator copy = *this;\n      operator++();\n      return copy;\n    }\n\n    size_t index() const {\n      return index_it_->first + delta_;\n    }\n  };\n\n  using const_iterator = Iterator;\n\n  ShardArgs(cmn::ArgSlice fa, absl::Span<const IndexSlice> s) : slice_(ArgsIndexPair(fa, s)) {\n  }\n\n  ShardArgs() : slice_(ArgsIndexPair{}) {\n  }\n\n  size_t Size() const;\n\n  Iterator cbegin() const {\n    return Iterator{slice_.first, slice_.second.begin()};\n  }\n\n  Iterator cend() const {\n    return Iterator{slice_.first, slice_.second.end()};\n  }\n\n  Iterator begin() const {\n    return cbegin();\n  }\n\n  Iterator end() const {\n    return cend();\n  }\n\n  bool Empty() const {\n    return slice_.second.empty();\n  }\n\n  std::string_view Front() const {\n    return *cbegin();\n  }\n};\n\n// Record non auto journal command with own txid and dbid.\nvoid RecordJournal(const OpArgs& op_args, std::string_view cmd, const ShardArgs& args,\n                   uint32_t unused = 1);\nvoid RecordJournal(const OpArgs& op_args, std::string_view cmd, ArgSlice args, uint32_t unused = 1);\n\nvoid RecordDelete(DbIndex dbid, std::string_view key);\n\n// Record expiry in journal with independent transaction.\n// Must be called from shard thread owning key.\n// Might block the calling fiber unless journal::SetFlushMode(false) is called.\ninline void RecordExpiryBlocking(DbIndex dbid, std::string_view key) {\n  RecordDelete(dbid, key);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/version.cc.in",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"version.h\"\n\nnamespace dfly {\n\n// Do not edit - autogenerated file. Please see version.cc.in for details.\n\nconst char kGitTag[] = \"@GIT_VER@\";\nconst char kGitSha[] = \"@GIT_SHA1@\";\nconst char kGitClean[] = \"@GIT_CLEAN_DIRTY@\";\nconst char kBuildTime[] = \"@PRJ_BUILD_TIME@\";\n\nconst char* GetVersion() { return \"df-@GIT_VER@\"; }\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/version.h",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\nnamespace dfly {\n\nextern const char kGitTag[];\nextern const char kGitSha[];\nextern const char kGitClean[];\nextern const char kBuildTime[];\n\nconst char* GetVersion();\n\n// An enum for internal versioning of dragonfly specific behavior.\n// Please document for each new entry what the behavior changes are\n// and to which released versions this corresponds.\nenum class DflyVersion {\n  // 1.4  <= ver <= 1.10\n  // - Supports receiving ACKs from replicas\n  // - Sends version back on REPLCONF capa dragonfly\n  VER1,\n\n  // 1.11 <= ver\n  // Supports limited partial sync\n  VER2,\n\n  // 1.15 < ver\n  // ACL with user replication\n  VER3,\n\n  // - Periodic lag checks from master to replica\n  VER4,\n\n  // - Support partial sync from different master\n  VER5,\n\n  // 1.37 <= ver\n  // - Per-shard search index definitions (search-index AUX on every flow)\n  // - HNSW index serialization opcodes (RDB_OPCODE_VECTOR_INDEX, RDB_OPCODE_SHARD_DOC_INDEX)\n  // - hnsw-index-metadata AUX field\n  VER6,\n\n  // Always points to the latest version\n  CURRENT_VER = VER6,\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/version_monitor.cc",
    "content": "// Copyright 2023, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/version_monitor.h\"\n\n#include <absl/strings/numbers.h>\n#include <absl/strings/str_cat.h>\n#include <absl/strings/str_split.h>\n#include <openssl/err.h>\n\n#include <boost/beast/http/string_body.hpp>\n#include <regex>\n\n#include \"base/logging.h\"\n#include \"server/version.h\"\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace util;\nusing http::TlsClient;\n\nnamespace {\n\nstd::optional<std::string> GetVersionString(const std::string& version_str) {\n  // The server sends a message such as {\"latest\": \"0.12.0\"}\n  const auto reg_match_expr = R\"(\\{\\\"latest\"\\:[ \\t]*\\\"([0-9]+\\.[0-9]+\\.[0-9]+)\\\"\\})\";\n  VLOG(1) << \"checking version '\" << version_str << \"'\";\n  auto const regex = std::regex(reg_match_expr);\n  std::smatch match;\n  if (std::regex_match(version_str, match, regex) && match.size() > 1) {\n    // the second entry is the match to the group that holds the version string\n    return match[1].str();\n  } else {\n    LOG_FIRST_N(WARNING, 1) << \"Remote version - invalid version number: '\" << version_str << \"'\";\n    return std::nullopt;\n  }\n}\n\nstd::optional<std::string> GetRemoteVersion(ProactorBase* proactor, SSL_CTX* ssl_context,\n                                            const std::string host, std::string_view service,\n                                            const std::string& resource,\n                                            const std::string& ver_header) {\n  namespace bh = boost::beast::http;\n  using ResponseType = bh::response<bh::string_body>;\n\n  bh::request<bh::string_body> req{bh::verb::get, resource, 11 /*http 1.1*/};\n  req.set(bh::field::host, host);\n  req.set(bh::field::user_agent, ver_header);\n  ResponseType res;\n  TlsClient http_client{proactor};\n  http_client.set_connect_timeout_ms(2000);\n\n  auto ec = http_client.Connect(host, service, ssl_context);\n\n  if (ec) {\n    LOG_FIRST_N(WARNING, 1) << \"Remote version - connection error [\" << host << \":\" << service\n                            << \"] : \" << ec.message();\n    return nullopt;\n  }\n\n  ec = http_client.Send(req, &res);\n  if (!ec) {\n    VLOG(1) << \"successfully got response from HTTP GET for host \" << host << \":\" << service << \"/\"\n            << resource << \" response code is \" << res.result();\n\n    if (res.result() == bh::status::ok) {\n      return GetVersionString(res.body());\n    }\n  } else {\n    static bool is_logged{false};\n    if (!is_logged) {\n      is_logged = true;\n\n#if (OPENSSL_VERSION_NUMBER >= 0x30000000L)\n      const char* func_err = \"ssl_internal_error\";\n#else\n      const char* func_err = ERR_func_error_string(ec.value());\n#endif\n\n      // Unfortunately AsioStreamAdapter looses the original error category\n      // because std::error_code can not be converted into boost::system::error_code.\n      // It's fixed in later versions of Boost, but for now we assume it's from TLS.\n      LOG(WARNING) << \"Remote version - HTTP GET error [\" << host << \":\" << service << resource\n                   << \"], error: \" << ec.value();\n      LOG(WARNING) << \"ssl error: \" << func_err << \"/\" << ERR_reason_error_string(ec.value());\n    }\n  }\n\n  return nullopt;\n}\n\n}  // namespace\n\nbool VersionMonitor::IsVersionOutdated(const std::string_view remote,\n                                       const std::string_view current) const {\n  const absl::InlinedVector<absl::string_view, 3> remote_xyz = absl::StrSplit(remote, \".\");\n  const absl::InlinedVector<absl::string_view, 3> current_xyz = absl::StrSplit(current, \".\");\n  if (remote_xyz.size() != current_xyz.size()) {\n    LOG(WARNING) << \"Can't compare Dragonfly version \" << current << \" to latest version \"\n                 << remote;\n    return false;\n  }\n  const auto print_to_log = [](const std::string_view version, const absl::string_view part) {\n    LOG(WARNING) << \"Can't parse \" << version << \" part of version \" << part << \" as a number\";\n  };\n  for (size_t i = 0; i < remote_xyz.size(); ++i) {\n    size_t remote_x = 0;\n    if (!absl::SimpleAtoi(remote_xyz[i], &remote_x)) {\n      print_to_log(remote, remote_xyz[i]);\n      return false;\n    }\n    size_t current_x = 0;\n    if (!absl::SimpleAtoi(current_xyz[i], &current_x)) {\n      print_to_log(current, current_xyz[i]);\n      return false;\n    }\n    if (remote_x > current_x) {\n      return true;\n    }\n\n    if (remote_x < current_x) {\n      return false;\n    }\n  }\n\n  return false;\n}\n\nvoid VersionMonitor::Run(ProactorPool* proactor_pool) {\n  // Avoid running dev environments.\n  if (getenv(\"DFLY_DEV_ENV\")) {\n    LOG(WARNING) << \"Running in dev environment (DFLY_DEV_ENV is set) - version monitoring is \"\n                    \"disabled\";\n    return;\n  }\n\n  SslPtr ssl_ctx(TlsClient::CreateSslContext());\n  if (!ssl_ctx) {\n    VLOG(1) << \"Remote version - failed to create SSL context - cannot run version monitoring\";\n    return;\n  }\n\n  version_fiber_ = proactor_pool->GetNextProactor()->LaunchFiber(\n      [ssl_ctx = std::move(ssl_ctx), this]() mutable { RunTask(std::move(ssl_ctx)); });\n}\n\nvoid VersionMonitor::Shutdown() {\n  monitor_ver_done_.Notify();\n  if (version_fiber_.IsJoinable()) {\n    version_fiber_.Join();\n  }\n}\n\nvoid VersionMonitor::RunTask(SslPtr ssl_ctx) {\n  const auto loop_sleep_time = std::chrono::hours(24);  // every 24 hours\n\n  const std::string host_name = \"version.dragonflydb.io\";\n  const std::string_view port = \"443\";\n  const std::string resource = \"/v1\";\n  string_view current_version(kGitTag);\n\n  current_version.remove_prefix(1);\n  const std::string version_header = absl::StrCat(\"DragonflyDB/\", current_version);\n\n  ProactorBase* my_pb = ProactorBase::me();\n  while (true) {\n    const std::optional<std::string> remote_version =\n        GetRemoteVersion(my_pb, ssl_ctx.get(), host_name, port, resource, version_header);\n    if (remote_version) {\n      const std::string_view rv = remote_version.value();\n      if (IsVersionOutdated(rv, current_version)) {\n        LOG_FIRST_N(INFO, 1) << \"Your current version '\" << current_version\n                             << \"' is not the latest version. A newer version '\" << rv\n                             << \"' is now available. Please consider an update.\";\n      }\n    }\n    if (monitor_ver_done_.WaitFor(loop_sleep_time)) {\n      VLOG(1) << \"finish running version monitor task\";\n      return;\n    }\n  }\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/version_monitor.h",
    "content": "// Copyright 2023, Roman Gershman.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n#pragma once\n\n#include \"util/fibers/fibers.h\"\n#include \"util/fibers/pool.h\"\n#include \"util/http/http_client.h\"\n\nnamespace dfly {\n\nclass VersionMonitor {\n public:\n  void Run(util::ProactorPool* proactor_pool);\n\n  void Shutdown();\n\n private:\n  struct SslDeleter {\n    void operator()(SSL_CTX* ssl) {\n      if (ssl) {\n        util::http::TlsClient::FreeContext(ssl);\n      }\n    }\n  };\n\n  using SslPtr = std::unique_ptr<SSL_CTX, SslDeleter>;\n  void RunTask(SslPtr);\n\n  bool IsVersionOutdated(std::string_view remote, std::string_view current) const;\n\n  util::fb2::Fiber version_fiber_;\n  util::fb2::Done monitor_ver_done_;\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/zset_family.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/zset_family.h\"\n\n#include <absl/strings/ascii.h>\n\nextern \"C\" {\n#include \"redis/listpack.h\"\n#include \"redis/redis_aux.h\"\n#include \"redis/util.h\"\n#include \"redis/zmalloc.h\"\n}\n\n#include \"base/logging.h\"\n#include \"base/stl_util.h\"\n#include \"core/sorted_map.h\"\n#include \"facade/cmd_arg_parser.h\"\n#include \"facade/error.h\"\n#include \"server/acl/acl_commands_def.h\"\n#include \"server/blocking_controller.h\"\n#include \"server/cluster/cluster_defs.h\"\n#include \"server/command_registry.h\"\n#include \"server/conn_context.h\"\n#include \"server/container_utils.h\"\n#include \"server/db_slice.h\"\n#include \"server/engine_shard_set.h\"\n#include \"server/error.h\"\n#include \"server/family_utils.h\"\n#include \"server/namespaces.h\"\n#include \"server/transaction.h\"\n\nnamespace dfly {\n\nusing namespace std;\nusing namespace facade;\nusing absl::SimpleAtoi;\nnamespace {\n\nusing CI = CommandId;\n\nconst char kNxXxErr[] = \"XX and NX options at the same time are not compatible\";\nconst char kLexRangeErr[] = \"min or max not valid string range item\";\nconst char kFloatRangeErr[] = \"min or max is not a float\";\nconst char kScoreNaN[] = \"resulting score is not a number (NaN)\";\n\nusing MScoreResponse = std::vector<std::optional<double>>;\nusing ScoredMember = ZSetFamily::ScoredMember;\nusing ScoredArray = ZSetFamily::ScoredArray;\nusing ScoredMemberView = ZSetFamily::ScoredMemberView;\nusing ScoredMemberSpan = ZSetFamily::ScoredMemberSpan;\n\nstruct ValidateZMPopResult {\n  uint32_t num_keys;\n  bool is_max;\n  int pop_count;\n  float timeout;\n};\n\ninline zrangespec GetZrangeSpec(bool reverse, const ZSetFamily::ScoreInterval& si) {\n  auto interval = si;\n  if (reverse)\n    swap(interval.first, interval.second);\n\n  zrangespec range;\n  range.min = interval.first.val;\n  range.max = interval.second.val;\n  range.minex = interval.first.is_open;\n  range.maxex = interval.second.is_open;\n\n  return range;\n}\n\nsds GetLexStr(const ZSetFamily::LexBound& bound) {\n  if (bound.type == ZSetFamily::LexBound::MINUS_INF)\n    return cminstring;\n\n  if (bound.type == ZSetFamily::LexBound::PLUS_INF)\n    return cmaxstring;\n\n  return sdsnewlen(bound.val.data(), bound.val.size());\n};\n\nzlexrangespec GetLexRange(bool reverse, const ZSetFamily::LexInterval& li) {\n  auto interval = li;\n  if (reverse)\n    swap(interval.first, interval.second);\n\n  zlexrangespec range;\n  range.minex = 0;\n  range.maxex = 0;\n\n  range.min = GetLexStr(interval.first);\n  range.max = GetLexStr(interval.second);\n  range.minex = (interval.first.type == ZSetFamily::LexBound::OPEN);\n  range.maxex = (interval.second.type == ZSetFamily::LexBound::OPEN);\n\n  return range;\n}\n\nbool IsListPack(const PrimeValue& pv) {\n  return pv.Encoding() == OBJ_ENCODING_LISTPACK;\n}\n\n/* Delete the element 'ele' from the sorted set, returning 1 if the element\n * existed and was deleted, 0 otherwise (the element was not there).\n * taken from t_zset.c\n */\n\nint ZsetDel(PrimeValue* pv, std::string_view ele) {\n  if (IsListPack(*pv)) {\n    uint8_t* lp = (uint8_t*)pv->RObjPtr();\n    unsigned char* eptr = detail::ZzlFind(lp, ele, nullptr);\n    if (eptr) {\n      lp = lpDeleteRangeWithEntry(lp, &eptr, 2);\n      pv->SetRObjPtr(lp);\n      return 1;\n    }\n  } else if (pv->Encoding() == OBJ_ENCODING_SKIPLIST) {\n    detail::SortedMap* zs = (detail::SortedMap*)pv->RObjPtr();\n    if (zs->Delete(ele))\n      return 1;\n  }\n  return 0; /* No such element found. */\n}\n\n// taken from t_zset.c\nstd::optional<double> GetZsetScore(const PrimeValue& pv, std::string_view member) {\n  if (IsListPack(pv)) {\n    double score;\n    if (detail::ZzlFind((uint8_t*)pv.RObjPtr(), member, &score) == NULL)\n      return std::nullopt;\n    return score;\n  }\n\n  if (pv.Encoding() == OBJ_ENCODING_SKIPLIST) {\n    detail::SortedMap* zs = (detail::SortedMap*)pv.RObjPtr();\n    return zs->GetScore(member);\n  }\n\n  LOG(FATAL) << \"Unknown sorted set encoding\";\n  return 0;\n}\n\nint ZsetAdd(PrimeValue* pv, double score, std::string_view ele, int in_flags, int* out_flags,\n            double* newscore) {\n  *out_flags = 0; /* We'll return our response flags. */\n  double curscore;\n\n  /* NaN as input is an error regardless of all the other parameters. */\n  if (isnan(score)) {\n    *out_flags = ZADD_OUT_NAN;\n    return 0;\n  }\n\n  /* Update the sorted set according to its encoding. */\n  if (pv->Encoding() == OBJ_ENCODING_LISTPACK) {\n    /* Turn options into simple to check vars. */\n    bool incr = (in_flags & ZADD_IN_INCR) != 0;\n    bool nx = (in_flags & ZADD_IN_NX) != 0;\n    bool xx = (in_flags & ZADD_IN_XX) != 0;\n    bool gt = (in_flags & ZADD_IN_GT) != 0;\n    bool lt = (in_flags & ZADD_IN_LT) != 0;\n\n    uint8_t* lp = (uint8_t*)pv->RObjPtr();\n    uint8_t* eptr = detail::ZzlFind(lp, ele, &curscore);\n    if (eptr != NULL) {\n      /* NX? Return, same element already exists. */\n      if (nx) {\n        *out_flags |= ZADD_OUT_NOP;\n        return 1;\n      }\n\n      /* Prepare the score for the increment if needed. */\n      if (incr) {\n        score += curscore;\n        if (isnan(score)) {\n          *out_flags |= ZADD_OUT_NAN;\n          return 0;\n        }\n      }\n\n      /* GT/LT? Only update if score is greater/less than current. */\n      if ((lt && score >= curscore) || (gt && score <= curscore)) {\n        *out_flags |= ZADD_OUT_NOP;\n        return 1;\n      }\n\n      if (newscore)\n        *newscore = score;\n\n      /* Remove and re-insert when score changed. */\n      if (score != curscore) {\n        lp = lpDeleteRangeWithEntry(lp, &eptr, 2);\n        lp = detail::ZzlInsert(lp, ele, score);\n        pv->SetRObjPtr(lp);\n        *out_flags |= ZADD_OUT_UPDATED;\n      }\n\n      return 1;\n    } else if (!xx) {\n      unsigned zl_len = lpLength(lp) / 2;\n\n      /* check if the element is too large or the list\n       * becomes too long *before* executing zzlInsert. */\n      if (zl_len >= ZSET_MAX_LISTPACK_ENTRIES || ele.size() > ZSET_MAX_LISTPACK_VALUE) {\n        auto* ptr = detail::SortedMap::FromListPack(pv->memory_resource(), lp);\n        pv->InitRobj(OBJ_ZSET, OBJ_ENCODING_SKIPLIST, ptr);\n      } else {\n        lp = detail::ZzlInsert(lp, ele, score);\n        pv->SetRObjPtr(lp);\n        if (newscore)\n          *newscore = score;\n        *out_flags |= ZADD_OUT_ADDED;\n        return 1;\n      }\n    } else {\n      *out_flags |= ZADD_OUT_NOP;\n      return 1;\n    }\n  }\n\n  CHECK_EQ(pv->Encoding(), OBJ_ENCODING_SKIPLIST);\n  detail::SortedMap* ss = (detail::SortedMap*)pv->RObjPtr();\n  return ss->AddElem(score, ele, in_flags, out_flags, newscore);\n}\n\nvoid OutputScoredArrayResult(const OpResult<ScoredArray>& result, SinkReplyBuilder* builder) {\n  if (result.status() == OpStatus::WRONG_TYPE) {\n    return builder->SendError(kWrongTypeErr);\n  }\n\n  LOG_IF(WARNING, !result && result.status() != OpStatus::KEY_NOTFOUND)\n      << \"Unexpected status \" << result.status();\n  auto* rb = static_cast<RedisReplyBuilder*>(builder);\n  rb->SendScoredArray(result.value(), true /* with scores */);\n}\n\nOpResult<DbSlice::ItAndUpdater> PrepareZEntry(const ZSetFamily::ZParams& zparams,\n                                              const OpArgs& op_args, string_view key,\n                                              size_t member_len) {\n  auto& db_slice = op_args.GetDbSlice();\n  if (zparams.flags & ZADD_IN_XX) {\n    return db_slice.FindMutable(op_args.db_cntx, key, OBJ_ZSET);\n  }\n\n  // Here we use nullopt for type because we can override the type if it exists.\n  // If override is not set, we will return an error if the type is not OBJ_ZSET.\n  auto op_res = db_slice.AddOrFind(op_args.db_cntx, key, std::nullopt);\n  RETURN_ON_BAD_STATUS(op_res);\n  auto& add_res = *op_res;\n\n  auto& it = add_res.it;\n  PrimeValue& pv = it->second;\n  if (add_res.is_new || zparams.override) {\n    // If we're overwriting an existing key (not a new one), we need to remove it from\n    // search indexes first. This prevents crashes when the key is indexed (e.g., HASH or JSON).\n    if (!add_res.is_new && zparams.override) {\n      RemoveKeyFromIndexesIfNeeded(key, op_args.db_cntx, pv, op_args.shard);\n    }\n\n    if (member_len > server.max_map_field_len) {\n      pv.InitRobj(OBJ_ZSET, OBJ_ENCODING_SKIPLIST, CompactObj::AllocateMR<detail::SortedMap>());\n    } else {\n      unsigned char* lp = lpNew(0);\n      pv.InitRobj(OBJ_ZSET, OBJ_ENCODING_LISTPACK, lp);\n    }\n  } else {\n    if (it->second.ObjType() != OBJ_ZSET)\n      return OpStatus::WRONG_TYPE;\n  }\n\n  if (!add_res.is_new && zparams.override)\n    db_slice.RemoveExpire(op_args.db_cntx.db_index, it);\n\n  auto* blocking_controller = op_args.db_cntx.ns->GetBlockingController(op_args.shard->shard_id());\n  if (add_res.is_new && blocking_controller) {\n    blocking_controller->Awaken(op_args.db_cntx.db_index, key);\n  }\n\n  return DbSlice::ItAndUpdater{add_res.it, add_res.exp_it, std::move(add_res.post_updater)};\n}\n\nenum class Action : uint8_t { RANGE = 0, REMOVE = 1, POP = 2 };\n\nclass IntervalVisitor {\n public:\n  IntervalVisitor(Action action, const ZSetFamily::RangeParams& params, PrimeValue* pv)\n      : action_(action), params_(params), pv_(pv) {\n  }\n\n  void operator()(const ZSetFamily::IndexInterval& ii);\n\n  void operator()(const ZSetFamily::ScoreInterval& si);\n\n  void operator()(const ZSetFamily::LexInterval& li);\n\n  void operator()(ZSetFamily::TopNScored sc);\n\n  ScoredArray PopResult() {\n    return std::move(result_);\n  }\n\n  unsigned removed() const {\n    return removed_;\n  }\n\n private:\n  void ExtractListPack(const zrangespec& range);\n  void ExtractSkipList(const zrangespec& range);\n\n  void ExtractListPack(const zlexrangespec& range);\n  void ExtractSkipList(const zlexrangespec& range);\n\n  void PopListPack(ZSetFamily::TopNScored sc);\n  void PopSkipList(ZSetFamily::TopNScored sc);\n\n  void ActionRange(unsigned start, unsigned end);  // rank\n  void ActionRange(const zrangespec& range);       // score\n  void ActionRange(const zlexrangespec& range);    // lex\n\n  void ActionRem(unsigned start, unsigned end);  // rank\n  void ActionRem(const zrangespec& range);       // score\n  void ActionRem(const zlexrangespec& range);    // lex\n\n  void ActionPop(ZSetFamily::TopNScored sc);\n\n  void Next(uint8_t* zl, uint8_t** eptr, uint8_t** sptr) const {\n    if (params_.reverse) {\n      detail::ZzlPrev(zl, eptr, sptr);\n    } else {\n      detail::ZzlNext(zl, eptr, sptr);\n    }\n  }\n\n  bool IsUnder(double score, const zrangespec& spec) const {\n    return params_.reverse ? detail::ZslValueGteMin(score, &spec)\n                           : detail::ZslValueLteMax(score, &spec);\n  }\n\n  void AddResult(const uint8_t* vstr, unsigned vlen, long long vlon, double score);\n\n  Action action_;\n  ZSetFamily::RangeParams params_;\n  PrimeValue* pv_;\n\n  ScoredArray result_;\n  unsigned removed_ = 0;\n};\n\nvoid IntervalVisitor::operator()(const ZSetFamily::IndexInterval& ii) {\n  unsigned long llen = pv_->Size();\n  int64_t start = ii.first;\n  int64_t end = ii.second;\n\n  if (start < 0)\n    start = llen + start;\n  if (end < 0)\n    end = llen + end;\n  if (start < 0)\n    start = 0;\n\n  if (start > end || unsigned(start) >= llen) {\n    return;\n  }\n\n  if (unsigned(end) >= llen)\n    end = llen - 1;\n\n  switch (action_) {\n    case Action::RANGE:\n      ActionRange(start, end);\n      break;\n    case Action::REMOVE:\n      ActionRem(start, end);\n      break;\n    default:\n      break;\n  }\n}\n\nvoid IntervalVisitor::operator()(const ZSetFamily::ScoreInterval& si) {\n  zrangespec range = GetZrangeSpec(params_.reverse, si);\n\n  switch (action_) {\n    case Action::RANGE:\n      ActionRange(range);\n      break;\n    case Action::REMOVE:\n      ActionRem(range);\n      break;\n    default:\n      break;\n  }\n}\n\nvoid IntervalVisitor::operator()(const ZSetFamily::LexInterval& li) {\n  zlexrangespec range = GetLexRange(params_.reverse, li);\n\n  switch (action_) {\n    case Action::RANGE:\n      ActionRange(range);\n      break;\n    case Action::REMOVE:\n      ActionRem(range);\n      break;\n    default:\n      break;\n  }\n  detail::ZslFreeLexRange(&range);\n}\n\nvoid IntervalVisitor::operator()(ZSetFamily::TopNScored sc) {\n  switch (action_) {\n    case Action::POP:\n      ActionPop(sc);\n      break;\n    default:\n      break;\n  }\n}\n\nvoid IntervalVisitor::ActionRange(unsigned start, unsigned end) {\n  if (params_.limit == 0)\n    return;\n\n  // Calculate new start and end given offset and limit.\n  start += params_.offset;\n  end = min<size_t>(size_t(start) + params_.limit - 1, end);\n  if (start > end) {\n    return;\n  }\n\n  container_utils::IterateSortedSet(\n      *pv_,\n      [this](container_utils::ContainerEntry ce, double score) {\n        result_.emplace_back(ce.ToString(), score);\n        return true;\n      },\n      start, end, params_.reverse, params_.with_scores);\n}\n\nvoid IntervalVisitor::ActionRange(const zrangespec& range) {\n  if (IsListPack(*pv_)) {\n    ExtractListPack(range);\n  } else {\n    CHECK_EQ(pv_->Encoding(), OBJ_ENCODING_SKIPLIST);\n    ExtractSkipList(range);\n  }\n}\n\nvoid IntervalVisitor::ActionRange(const zlexrangespec& range) {\n  if (IsListPack(*pv_)) {\n    ExtractListPack(range);\n  } else {\n    CHECK_EQ(pv_->Encoding(), OBJ_ENCODING_SKIPLIST);\n    ExtractSkipList(range);\n  }\n}\n\nvoid IntervalVisitor::ActionRem(unsigned start, unsigned end) {\n  if (IsListPack(*pv_)) {\n    uint8_t* zl = (uint8_t*)pv_->RObjPtr();\n\n    removed_ = (end - start) + 1;\n    zl = lpDeleteRange(zl, 2 * start, 2 * removed_);\n    pv_->SetRObjPtr(zl);\n  } else {\n    CHECK_EQ(OBJ_ENCODING_SKIPLIST, pv_->Encoding());\n    detail::SortedMap* zs = (detail::SortedMap*)pv_->RObjPtr();\n    removed_ = zs->DeleteRangeByRank(start, end);\n  }\n}\n\nvoid IntervalVisitor::ActionRem(const zrangespec& range) {\n  if (IsListPack(*pv_)) {\n    uint8_t* zl = (uint8_t*)pv_->RObjPtr();\n    unsigned long deleted = 0;\n    zl = detail::ZzlDeleteRangeByScore(zl, &range, &deleted);\n    pv_->SetRObjPtr(zl);\n    removed_ = deleted;\n  } else {\n    CHECK_EQ(OBJ_ENCODING_SKIPLIST, pv_->Encoding());\n    detail::SortedMap* zs = (detail::SortedMap*)pv_->RObjPtr();\n    removed_ = zs->DeleteRangeByScore(range);\n  }\n}\n\nvoid IntervalVisitor::ActionRem(const zlexrangespec& range) {\n  if (IsListPack(*pv_)) {\n    uint8_t* zl = (uint8_t*)pv_->RObjPtr();\n    unsigned long deleted = 0;\n    zl = detail::ZzlDeleteRangeByLex(zl, &range, &deleted);\n    pv_->SetRObjPtr(zl);\n    removed_ = deleted;\n  } else {\n    CHECK_EQ(OBJ_ENCODING_SKIPLIST, pv_->Encoding());\n    detail::SortedMap* zs = (detail::SortedMap*)pv_->RObjPtr();\n    removed_ = zs->DeleteRangeByLex(range);\n  }\n}\n\nvoid IntervalVisitor::ActionPop(ZSetFamily::TopNScored sc) {\n  if (sc > 0) {\n    if (IsListPack(*pv_)) {\n      PopListPack(sc);\n    } else {\n      CHECK_EQ(pv_->Encoding(), OBJ_ENCODING_SKIPLIST);\n      PopSkipList(sc);\n    }\n  }\n}\n\nvoid IntervalVisitor::ExtractListPack(const zrangespec& range) {\n  uint8_t* zl = (uint8_t*)pv_->RObjPtr();\n  uint8_t *eptr, *sptr;\n  uint8_t* vstr;\n  unsigned int vlen = 0;\n  long long vlong = 0;\n  unsigned offset = params_.offset;\n  unsigned limit = params_.limit;\n\n  /* If reversed, get the last node in range as starting point. */\n  if (params_.reverse) {\n    eptr = detail::ZzlLastInRange(zl, &range);\n  } else {\n    eptr = detail::ZzlFirstInRange(zl, &range);\n  }\n\n  /* Get score pointer for the first element. */\n  if (eptr)\n    sptr = lpNext(zl, eptr);\n\n  /* If there is an offset, just traverse the number of elements without\n   * checking the score because that is done in the next loop. */\n  while (eptr && offset--) {\n    Next(zl, &eptr, &sptr);\n  }\n\n  while (eptr && limit--) {\n    double score = detail::ZzlGetScore(sptr);\n\n    /* Abort when the node is no longer in range. */\n    if (!IsUnder(score, range))\n      break;\n\n    /* We know the element exists, so lpGetValue should always\n     * succeed */\n    vstr = lpGetValue(eptr, &vlen, &vlong);\n\n    AddResult(vstr, vlen, vlong, score);\n\n    /* Move to next node */\n    Next(zl, &eptr, &sptr);\n  }\n}\n\nvoid IntervalVisitor::ExtractSkipList(const zrangespec& range) {\n  detail::SortedMap* zs = (detail::SortedMap*)pv_->RObjPtr();\n\n  unsigned offset = params_.offset;\n  unsigned limit = params_.limit;\n\n  result_ = zs->GetRange(range, offset, limit, params_.reverse);\n}\n\nvoid IntervalVisitor::ExtractListPack(const zlexrangespec& range) {\n  uint8_t* zl = (uint8_t*)pv_->RObjPtr();\n  uint8_t *eptr, *sptr = nullptr;\n  uint8_t* vstr = nullptr;\n  unsigned int vlen = 0;\n  long long vlong = 0;\n  unsigned offset = params_.offset;\n  unsigned limit = params_.limit;\n\n  /* If reversed, get the last node in range as starting point. */\n  if (params_.reverse) {\n    eptr = detail::ZzlLastInLexRange(zl, &range);\n  } else {\n    eptr = detail::ZzlFirstInLexRange(zl, &range);\n  }\n\n  /* Get score pointer for the first element. */\n  if (eptr)\n    sptr = lpNext(zl, eptr);\n\n  /* If there is an offset, just traverse the number of elements without\n   * checking the score because that is done in the next loop. */\n  while (eptr && offset--) {\n    Next(zl, &eptr, &sptr);\n  }\n\n  while (eptr && limit--) {\n    double score = 0;\n    if (params_.with_scores) /* don't bother to extract the score if it's gonna be ignored. */\n      score = detail::ZzlGetScore(sptr);\n\n    /* Abort when the node is no longer in range. */\n    if (params_.reverse) {\n      if (!detail::ZzlLexValueGteMin(eptr, &range))\n        break;\n    } else {\n      if (!detail::ZzlLexValueLteMax(eptr, &range))\n        break;\n    }\n\n    vstr = lpGetValue(eptr, &vlen, &vlong);\n    AddResult(vstr, vlen, vlong, score);\n\n    /* Move to next node */\n    Next(zl, &eptr, &sptr);\n  }\n}\n\nvoid IntervalVisitor::ExtractSkipList(const zlexrangespec& range) {\n  detail::SortedMap* zs = (detail::SortedMap*)pv_->RObjPtr();\n  unsigned offset = params_.offset;\n  unsigned limit = params_.limit;\n  result_ = zs->GetLexRange(range, offset, limit, params_.reverse);\n}\n\nvoid IntervalVisitor::PopListPack(ZSetFamily::TopNScored sc) {\n  uint8_t* zl = (uint8_t*)pv_->RObjPtr();\n  uint8_t *eptr, *sptr;\n  uint8_t* vstr;\n  unsigned int vlen = 0;\n  long long vlong = 0;\n\n  if (params_.reverse) {\n    eptr = lpSeek(zl, -2);\n  } else {\n    eptr = lpSeek(zl, 0);\n  }\n\n  /* Get score pointer for the first element. */\n  if (eptr)\n    sptr = lpNext(zl, eptr);\n\n  /* First we get the entries */\n  unsigned int num = sc;\n  while (eptr && num--) {\n    double score = detail::ZzlGetScore(sptr);\n    vstr = lpGetValue(eptr, &vlen, &vlong);\n    AddResult(vstr, vlen, vlong, score);\n\n    /* Move to next node */\n    Next(zl, &eptr, &sptr);\n  }\n\n  int start = 0;\n  if (params_.reverse) {\n    /* If the number of elements to delete is greater than the listpack length,\n     * we set the start to 0 because lpseek fails to search beyond length in reverse */\n    start = (2 * sc > lpLength(zl)) ? 0 : -2 * sc;\n  }\n\n  /* We can finally delete the elements */\n  pv_->SetRObjPtr(lpDeleteRange(zl, start, 2 * sc));\n}\n\nvoid IntervalVisitor::PopSkipList(ZSetFamily::TopNScored sc) {\n  detail::SortedMap* zs = (detail::SortedMap*)pv_->RObjPtr();\n\n  /* We start from the header, or the tail if reversed. */\n  result_ = zs->PopTopScores(sc, params_.reverse);\n}\n\nvoid IntervalVisitor::AddResult(const uint8_t* vstr, unsigned vlen, long long vlong, double score) {\n  if (vstr == NULL) {\n    result_.emplace_back(absl::StrCat(vlong), score);\n  } else {\n    result_.emplace_back(string{reinterpret_cast<const char*>(vstr), vlen}, score);\n  }\n}\n\nbool ParseBound(string_view src, ZSetFamily::Bound* bound) {\n  if (src.empty())\n    return false;\n\n  if (src[0] == '(') {\n    bound->is_open = true;\n    src.remove_prefix(1);\n  }\n\n  return ParseDouble(src, &bound->val);\n}\n\nbool ParseLexBound(string_view src, ZSetFamily::LexBound* bound) {\n  if (src.empty())\n    return false;\n\n  if (src == \"+\") {\n    bound->type = ZSetFamily::LexBound::PLUS_INF;\n  } else if (src == \"-\") {\n    bound->type = ZSetFamily::LexBound::MINUS_INF;\n  } else if (src[0] == '(') {\n    bound->type = ZSetFamily::LexBound::OPEN;\n    src.remove_prefix(1);\n    bound->val = src;\n  } else if (src[0] == '[') {\n    bound->type = ZSetFamily::LexBound::CLOSED;\n    src.remove_prefix(1);\n    bound->val = src;\n  } else {\n    return false;\n  }\n\n  return true;\n}\n\nenum class AggType : uint8_t { SUM, MIN, MAX, NOOP };\nusing ScoredMap = absl::flat_hash_map<std::string, double>;\n\nScoredMap FromObject(const PrimeValue& co, double weight) {\n  ZSetFamily::RangeParams params;\n  params.with_scores = true;\n  // RANGE is a read-only operation, but requires const_cast\n  IntervalVisitor vis(Action::RANGE, params, &const_cast<PrimeValue&>(co));\n  vis(ZSetFamily::IndexInterval(0, -1));\n\n  ScoredArray arr = vis.PopResult();\n  ScoredMap res;\n  res.reserve(arr.size());\n\n  for (auto& elem : arr) {\n    elem.second *= weight;\n    if (isnan(elem.second))\n      elem.second = 0;\n    res.emplace(std::move(elem));\n  }\n\n  return res;\n}\n\nScoredMap ScoreMapFromSet(const PrimeValue& pv, double weight) {\n  ScoredMap result;\n  container_utils::IterateSet(pv, [&result, weight](container_utils::ContainerEntry ce) {\n    result.emplace(ce.ToString(), weight);\n    return true;\n  });\n  return result;\n}\n\ndouble Aggregate(double v1, double v2, AggType atype) {\n  switch (atype) {\n    case AggType::SUM:\n      v1 += v2;\n      return isnan(v1) ? 0 : v1;\n    case AggType::MAX:\n      return max(v1, v2);\n    case AggType::MIN:\n      return min(v1, v2);\n    case AggType::NOOP:\n      return 0;\n  }\n  return 0;\n}\n\n// the result is in the destination.\nvoid UnionScoredMap(ScoredMap* dest, ScoredMap* src, AggType agg_type) {\n  ScoredMap* target = dest;\n  ScoredMap* iter = src;\n\n  if (iter->size() > target->size())\n    swap(target, iter);\n\n  for (const auto& elem : *iter) {\n    auto [it, inserted] = target->emplace(elem);\n    if (!inserted) {\n      it->second = Aggregate(it->second, elem.second, agg_type);\n    }\n  }\n\n  if (target != dest)\n    dest->swap(*src);\n}\n\nvoid InterScoredMap(ScoredMap* dest, ScoredMap* src, AggType agg_type) {\n  ScoredMap* target = dest;\n  ScoredMap* iter = src;\n\n  if (iter->size() > target->size())\n    swap(target, iter);\n\n  auto it = iter->begin();\n  while (it != iter->end()) {\n    auto inter_it = target->find(it->first);\n    if (inter_it == target->end()) {\n      auto copy_it = it++;\n      iter->erase(copy_it);\n    } else {\n      it->second = Aggregate(it->second, inter_it->second, agg_type);\n      ++it;\n    }\n  }\n\n  if (iter != dest)\n    dest->swap(*src);\n}\n\nusing KeyIterWeightVec = vector<pair<DbSlice::ConstIterator, double>>;\n\nScoredMap UnionShardKeysWithScore(const KeyIterWeightVec& key_iter_weight_vec, AggType agg_type) {\n  ScoredMap result;\n  for (const auto& [it, weight] : key_iter_weight_vec) {\n    if (it.is_done()) {\n      continue;\n    }\n\n    ScoredMap sm;\n    if (it->second.ObjType() == OBJ_ZSET)\n      sm = FromObject(it->second, weight);\n    else {\n      DCHECK_EQ(it->second.ObjType(), OBJ_SET);\n      sm = ScoreMapFromSet(it->second, weight);\n    }\n    if (result.empty()) {\n      result.swap(sm);\n    } else {\n      UnionScoredMap(&result, &sm, agg_type);\n    }\n  }\n  return result;\n}\n\ndouble GetKeyWeight(const vector<double>& weights, unsigned windex) {\n  if (weights.empty()) {\n    return 1;\n  }\n\n  DCHECK_LT(windex, weights.size());\n  return weights[windex];\n}\n\nOpResult<KeyIterWeightVec> PrepareWeightedSets(const Transaction& trans, bool store,\n                                               string_view dest, const vector<double>& weights,\n                                               EngineShard* shard) {\n  ShardArgs keys = trans.GetShardArgs(shard->shard_id());\n  DCHECK(!keys.Empty());\n\n  unsigned cmdargs_keys_offset = 1;  // after {numkeys} for ZUNION/ZINTER\n  unsigned removed_keys = 0;\n\n  ShardArgs::Iterator start = keys.begin(), end = keys.end();\n\n  if (store) {\n    // first global index is 2 after {destkey, numkeys}.\n    ++cmdargs_keys_offset;\n    if (*start == dest) {\n      ++start;\n      ++removed_keys;\n    }\n\n    // In case ONLY the destination key is hosted in this shard no work on this shard should be\n    // done in this step\n    if (start == end) {\n      return OpStatus::OK;\n    }\n  }\n\n  auto& db_slice = trans.GetDbSlice(shard->shard_id());\n  KeyIterWeightVec key_weight_vec(keys.Size() - removed_keys);\n  unsigned index = 0;\n  DCHECK_GE(start.index(), cmdargs_keys_offset);\n\n  for (; start != end; ++start) {\n    auto it_res = db_slice.FindReadOnly(trans.GetDbContext(), *start);\n\n    if (!IsValid(it_res.it)) {\n      ++index;\n      continue;\n    }\n\n    auto obj_type = it_res.it->second.ObjType();\n    if (obj_type != OBJ_ZSET && obj_type != OBJ_SET)\n      return OpStatus::WRONG_TYPE;\n\n    key_weight_vec[index] = {it_res.it, GetKeyWeight(weights, start.index() - cmdargs_keys_offset)};\n    ++index;\n  }\n\n  return key_weight_vec;\n}\n\nOpResult<ScoredMap> OpUnion(EngineShard* shard, Transaction* t, string_view dest, AggType agg_type,\n                            const vector<double>& weights, bool store) {\n  OpResult<KeyIterWeightVec> key_vec_res = PrepareWeightedSets(*t, store, dest, weights, shard);\n  if (!key_vec_res)\n    return key_vec_res.status();\n\n  // Only dest is hosted on this shard.\n  if (key_vec_res->empty())\n    return OpStatus::OK;\n\n  return UnionShardKeysWithScore(*key_vec_res, agg_type);\n}\n\nOpResult<ScoredMap> OpInter(EngineShard* shard, Transaction* t, string_view dest, AggType agg_type,\n                            const vector<double>& weights, bool store) {\n  OpResult<KeyIterWeightVec> key_vec_res = PrepareWeightedSets(*t, store, dest, weights, shard);\n  if (!key_vec_res)\n    return key_vec_res.status();\n\n  // Only dest is hosted on this shard.\n  if (key_vec_res->empty())\n    return OpStatus::SKIPPED;\n\n  ScoredMap result;\n  for (const auto& [it, weight] : *key_vec_res) {\n    if (it.is_done()) {\n      return ScoredMap{};\n    }\n\n    ScoredMap sm;\n    if (it->second.ObjType() == OBJ_ZSET)\n      sm = FromObject(it->second, weight);\n    else {\n      DCHECK_EQ(it->second.ObjType(), OBJ_SET);\n      sm = ScoreMapFromSet(it->second, weight);\n    }\n    if (result.empty())\n      result.swap(sm);\n    else\n      InterScoredMap(&result, &sm, agg_type);\n\n    if (result.empty())\n      return result;\n  }\n\n  return result;\n}\n\nsize_t EstimateListpackMinBytes(ScoredMemberSpan members) {\n  size_t bytes = members.size() * 2;  // at least 2 bytes per score;\n  for (const auto& member : members) {\n    bytes += (member.second.size() + 1);  // string + at least 1 byte for string header.\n  }\n  return bytes;\n}\n\nstruct SetOpArgs {\n  AggType agg_type = AggType::SUM;\n  unsigned num_keys;\n  vector<double> weights;\n  bool with_scores = false;\n};\n\nOpResult<ScoredMap> IntersectResults(vector<OpResult<ScoredMap>>& results, AggType agg_type) {\n  ScoredMap result;\n  for (auto& op_res : results) {\n    if (op_res.status() == OpStatus::SKIPPED)\n      continue;\n\n    if (!op_res) {\n      return op_res.status();\n    }\n\n    if (op_res->empty()) {\n      return ScoredMap{};\n    }\n\n    if (result.empty()) {\n      result.swap(op_res.value());\n    } else {\n      InterScoredMap(&result, &op_res.value(), agg_type);\n    }\n\n    if (result.empty())\n      break;\n  }\n  return result;\n}\n\nOpResult<void> FillAggType(string_view agg, SetOpArgs* op_args) {\n  if (agg == \"SUM\") {\n    op_args->agg_type = AggType::SUM;\n  } else if (agg == \"MIN\") {\n    op_args->agg_type = AggType::MIN;\n  } else if (agg == \"MAX\") {\n    op_args->agg_type = AggType::MAX;\n  } else {\n    return OpStatus::SYNTAX_ERR;\n  }\n  return OpStatus::OK;\n}\n\n// Parse functions return the number of arguments read from CmdArgList\nOpResult<unsigned> ParseAggregate(CmdArgList args, bool store, SetOpArgs* op_args) {\n  if (args.size() <= 1) {\n    return OpStatus::SYNTAX_ERR;\n  }\n\n  string agg_type = absl::AsciiStrToUpper(ArgS(args, 1));\n  auto filled = FillAggType(agg_type, op_args);\n  if (!filled) {\n    return filled.status();\n  }\n  return 1;\n}\n\nOpResult<unsigned> ParseWeights(CmdArgList args, SetOpArgs* op_args) {\n  if (args.size() <= op_args->num_keys) {\n    return OpStatus::SYNTAX_ERR;\n  }\n\n  op_args->weights.resize(op_args->num_keys, 1);\n  for (unsigned i = 0; i < op_args->num_keys; ++i) {\n    string_view weight = ArgS(args, i + 1);\n    if (!absl::SimpleAtod(weight, &op_args->weights[i])) {\n      return OpStatus::INVALID_FLOAT;\n    }\n  }\n\n  return op_args->num_keys;\n}\n\nOpResult<void> ParseKeyCount(string_view arg_num_keys, SetOpArgs* op_args) {\n  // we parsed the structure before, when transaction has been initialized.\n  if (!absl::SimpleAtoi(arg_num_keys, &op_args->num_keys)) {\n    return OpStatus::SYNTAX_ERR;\n  }\n  return OpStatus::OK;\n}\n\nOpResult<unsigned> ParseWithScores(CmdArgList args, SetOpArgs* op_args) {\n  op_args->with_scores = true;\n  return 0;\n}\n\nOpResult<SetOpArgs> ParseSetOpArgs(CmdArgList args, bool store) {\n  string_view num_keys_str = store ? ArgS(args, 1) : ArgS(args, 0);\n  SetOpArgs op_args;\n\n  auto parsed = ParseKeyCount(num_keys_str, &op_args);\n  if (!parsed) {\n    return parsed.status();\n  }\n\n  unsigned opt_args_start = op_args.num_keys + (store ? 2 : 1);\n  DCHECK_LE(opt_args_start, args.size());  // Checked inside DetermineKeys\n\n  for (size_t i = opt_args_start; i < args.size(); ++i) {\n    string arg = absl::AsciiStrToUpper(ArgS(args, i));\n    if (arg == \"WEIGHTS\") {\n      auto parsed_cnt = ParseWeights(args.subspan(i), &op_args);\n      if (!parsed_cnt) {\n        return parsed_cnt.status();\n      }\n      i += *parsed_cnt;\n    } else if (arg == \"AGGREGATE\") {\n      auto parsed_cnt = ParseAggregate(args.subspan(i), store, &op_args);\n      if (!parsed_cnt) {\n        return parsed_cnt.status();\n      }\n      i += *parsed_cnt;\n    } else if (arg == \"WITHSCORES\") {\n      // Commands with store capability does not offer WITHSCORES option\n      if (store) {\n        return OpStatus::SYNTAX_ERR;\n      }\n      auto parsed_cnt = ParseWithScores(args.subspan(i), &op_args);\n      if (!parsed_cnt) {\n        return parsed_cnt.status();\n      }\n      i += *parsed_cnt;\n    } else {\n      return OpStatus::SYNTAX_ERR;\n    }\n  }\n  return op_args;\n}\n\nScoredArray OpBZPop(Transaction* t, EngineShard* shard, std::string_view key, bool is_max) {\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  auto it_res = db_slice.FindMutable(t->GetDbContext(), key, OBJ_ZSET);\n  CHECK(it_res) << t->DebugId() << \" \" << key;  // must exist and must be ok.\n  auto it = it_res->it;\n\n  ZSetFamily::RangeParams range_params;\n  range_params.reverse = is_max;\n  range_params.with_scores = true;\n  ZSetFamily::ZRangeSpec range_spec;\n  range_spec.params = range_params;\n  range_spec.interval = ZSetFamily::TopNScored(1);\n\n  DVLOG(2) << \"popping from \" << key << \" \" << t->DebugId();\n\n  PrimeValue& pv = it->second;\n  CHECK_GT(pv.Size(), 0u) << key << \" \" << pv.Encoding();\n\n  IntervalVisitor iv{Action::POP, range_spec.params, &pv};\n  std::visit(iv, range_spec.interval);\n\n  it_res->post_updater.Run();\n\n  auto res = iv.PopResult();\n\n  // We don't store empty keys\n  CHECK(!res.empty()) << key << \" failed to pop from type \" << pv.Encoding() << \" now size is \"\n                      << pv.Size();\n\n  auto zlen = pv.Size();\n  if (zlen == 0) {\n    DVLOG(1) << \"deleting key \" << key << \" \" << t->DebugId();\n    db_slice.Del(t->GetDbContext(), it_res->it);\n  }\n\n  OpArgs op_args = t->GetOpArgs(shard);\n  if (op_args.shard->journal()) {\n    string command = is_max ? \"ZPOPMAX\" : \"ZPOPMIN\";\n    RecordJournal(op_args, command, ArgSlice{key}, 1);\n  }\n\n  return res;\n}\n\nvoid BZPopMinMax(CmdArgList args, bool is_max, CommandContext* cmd_cntx) {\n  DCHECK_GE(args.size(), 2u);\n\n  float timeout;\n  auto timeout_str = ArgS(args, args.size() - 1);\n  if (!absl::SimpleAtof(timeout_str, &timeout)) {\n    return cmd_cntx->SendError(\"timeout is not a float or out of range\");\n  }\n  if (timeout < 0) {\n    return cmd_cntx->SendError(\"timeout is negative\");\n  }\n  VLOG(1) << \"BZPop timeout(\" << timeout << \")\";\n\n  optional<std::string> callback_ran_key;\n  OpResult<ScoredArray> popped_array;\n  auto cb = [is_max, &popped_array, &callback_ran_key](Transaction* t, EngineShard* shard,\n                                                       std::string_view key) {\n    callback_ran_key = key;\n    popped_array = OpBZPop(t, shard, key, is_max);\n  };\n\n  auto* cntx = cmd_cntx->server_conn_cntx();\n  OpResult<string> popped_key = container_utils::RunCbOnFirstNonEmptyBlocking(\n      cmd_cntx->tx(), OBJ_ZSET, std::move(cb), unsigned(timeout * 1000), &cntx->blocked,\n      &cntx->paused);\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (popped_key) {\n    if (!callback_ran_key) {\n      return rb->SendNullArray();\n    }\n\n    CHECK_EQ(popped_array->size(), 1u) << popped_key << \" ran \" << *callback_ran_key;\n    rb->StartArray(3);\n    rb->SendBulkString(*popped_key);\n    rb->SendBulkString(popped_array->front().first);\n    return rb->SendDouble(popped_array->front().second);\n  }\n\n  DVLOG(1) << \"result for \" << cmd_cntx->tx()->DebugId() << \" is \" << popped_key.status();\n  switch (popped_key.status()) {\n    case OpStatus::WRONG_TYPE:\n      return cmd_cntx->SendError(kWrongTypeErr);\n    case OpStatus::CANCELLED:\n    case OpStatus::TIMED_OUT:\n      return rb->SendNullArray();\n    case OpStatus::KEY_MOVED: {\n      auto error = cluster::SlotOwnershipError(*cmd_cntx->tx()->GetUniqueSlotId());\n      CHECK(!error.status.has_value() || error.status.value() != facade::OpStatus::OK);\n      return cmd_cntx->SendError(error);\n    }\n    default:\n      LOG(ERROR) << \"Unexpected error \" << popped_key.status();\n  }\n  return rb->SendNullArray();\n}\n\nOpResult<vector<ScoredMap>> OpFetch(EngineShard* shard, Transaction* t, bool skip_dest_key) {\n  ShardArgs keys = t->GetShardArgs(shard->shard_id());\n  DCHECK(!keys.Empty());\n\n  ShardArgs::Iterator start = keys.begin(), end = keys.end();\n\n  if (skip_dest_key) {\n    // If destkey is only found on this shard we can return\n    if (++start == end)\n      return OpStatus::OK;\n  }\n\n  vector<ScoredMap> results;\n  results.reserve(keys.Size() - (skip_dest_key ? 1 : 0));\n\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n  for (; start != end; ++start) {\n    auto it = db_slice.FindReadOnly(t->GetDbContext(), *start, OBJ_ZSET);\n\n    if (!it) {\n      // Key has wrong type so return so we can report error back\n      if (it.status() == OpStatus::WRONG_TYPE) {\n        return OpStatus::WRONG_TYPE;\n      }\n      // Key is not found so treat it as empty set\n      results.push_back({});\n      continue;\n    }\n\n    ScoredMap sm = FromObject((*it)->second, 1);\n    results.push_back(std::move(sm));\n  }\n\n  return results;\n}\n\nauto OpPopCount(const ZSetFamily::ZRangeSpec& range_spec, const OpArgs& op_args, string_view key)\n    -> OpResult<ScoredArray> {\n  auto& db_slice = op_args.GetDbSlice();\n  auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_ZSET);\n  if (!res_it)\n    return res_it.status();\n\n  PrimeValue& pv = res_it->it->second;\n\n  IntervalVisitor iv{Action::POP, range_spec.params, &pv};\n  std::visit(iv, range_spec.interval);\n\n  res_it->post_updater.Run();\n\n  auto zlen = pv.Size();\n  if (zlen == 0) {\n    op_args.GetDbSlice().Del(op_args.db_cntx, res_it->it);\n  }\n\n  // Checking if command conatins flag with no autojournal\n  // and we are assuming auto journaling is not re-enabled.\n  if ((op_args.tx->GetCId()->opt_mask() & CO::NO_AUTOJOURNAL) && op_args.shard->journal()) {\n    auto reverse = range_spec.params.reverse;\n    // Checking if interval is actually TopNScored or something else before proceeding.\n    DCHECK(std::holds_alternative<ZSetFamily::TopNScored>(range_spec.interval));\n    auto count = std::get<ZSetFamily::TopNScored>(range_spec.interval);\n    string command = (reverse ? \"ZPOPMAX\" : \"ZPOPMIN\");\n    RecordJournal(op_args, command, ArgSlice{key, absl::StrCat(count)}, 1);\n  }\n\n  return iv.PopResult();\n}\n\nauto OpRange(const ZSetFamily::ZRangeSpec& range_spec, const OpArgs& op_args, string_view key)\n    -> OpResult<ScoredArray> {\n  auto res_it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_ZSET);\n  if (!res_it)\n    return res_it.status();\n\n  // Action::RANGE is read-only, but requires mutable pointer, thus const_cast\n  PrimeValue& pv = const_cast<PrimeValue&>(res_it.value()->second);\n  IntervalVisitor iv{Action::RANGE, range_spec.params, &pv};\n\n  std::visit(iv, range_spec.interval);\n\n  return iv.PopResult();\n}\n\nOpResult<unsigned> OpRemRange(const OpArgs& op_args, string_view key,\n                              const ZSetFamily::ZRangeSpec& range_spec) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_ZSET);\n  if (!res_it)\n    return res_it.status();\n\n  PrimeValue& pv = res_it->it->second;\n  IntervalVisitor iv{Action::REMOVE, range_spec.params, &pv};\n  std::visit(iv, range_spec.interval);\n\n  res_it->post_updater.Run();\n\n  auto zlen = pv.Size();\n  if (zlen == 0) {\n    op_args.GetDbSlice().Del(op_args.db_cntx, res_it->it);\n  }\n\n  return iv.removed();\n}\n\nstruct RankResult {\n  unsigned rank;\n  double score = 0;\n};\n\nOpResult<RankResult> OpRank(const OpArgs& op_args, string_view key, string_view member,\n                            bool reverse, bool with_score) {\n  auto res_it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_ZSET);\n  if (!res_it)\n    return res_it.status();\n\n  auto& pv = res_it.value()->second;\n  if (IsListPack(pv)) {\n    unsigned char* zl = (uint8_t*)pv.RObjPtr();\n    unsigned char *eptr, *sptr;\n\n    eptr = lpSeek(zl, 0);\n    DCHECK(eptr != NULL);\n    sptr = lpNext(zl, eptr);\n    DCHECK(sptr != NULL);\n\n    unsigned rank = 1;\n    if (member.empty())\n      member = \"\"sv;\n\n    while (eptr != NULL) {\n      if (lpCompare(eptr, (const uint8_t*)member.data(), member.size()))\n        break;\n      rank++;\n      detail::ZzlNext(zl, &eptr, &sptr);\n    }\n\n    if (eptr == NULL)\n      return OpStatus::KEY_NOTFOUND;\n\n    RankResult res{};\n    res.rank = reverse ? lpLength(zl) / 2 - rank : rank - 1;\n    if (with_score) {\n      res.score = detail::ZzlGetScore(sptr);\n    }\n    return res;\n  }\n  DCHECK_EQ(pv.Encoding(), OBJ_ENCODING_SKIPLIST);\n  detail::SortedMap* ss = (detail::SortedMap*)pv.RObjPtr();\n\n  RankResult res{};\n\n  if (with_score) {\n    auto rankAndScore = ss->GetRankAndScore(member, reverse);\n    if (!rankAndScore) {\n      return OpStatus::KEY_NOTFOUND;\n    }\n    res.rank = rankAndScore->first;\n    res.score = rankAndScore->second;\n  } else {\n    std::optional<unsigned> rank = ss->GetRank(member, reverse);\n    if (!rank) {\n      return OpStatus::KEY_NOTFOUND;\n    }\n    res.rank = *rank;\n  }\n\n  return res;\n}\n\nOpResult<unsigned> OpCount(const OpArgs& op_args, std::string_view key,\n                           const ZSetFamily::ScoreInterval& interval) {\n  auto res_it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_ZSET);\n  if (!res_it)\n    return res_it.status();\n\n  auto& pv = res_it.value()->second;\n  zrangespec range = GetZrangeSpec(false, interval);\n  unsigned count = 0;\n\n  if (range.min > range.max) {\n    return 0;\n  }\n\n  if (IsListPack(pv)) {\n    uint8_t* zl = (uint8_t*)pv.RObjPtr();\n    uint8_t *eptr, *sptr;\n    double score;\n\n    /* Use the first element in range as the starting point */\n    eptr = detail::ZzlFirstInRange(zl, &range);\n\n    /* No \"first\" element */\n    if (eptr == NULL) {\n      return 0;\n    }\n\n    /* First element is in range */\n    sptr = lpNext(zl, eptr);\n    score = detail::ZzlGetScore(sptr);\n\n    DCHECK(detail::ZslValueLteMax(score, &range));\n\n    /* Iterate over elements in range */\n    while (eptr) {\n      score = detail::ZzlGetScore(sptr);\n\n      /* Abort when the node is no longer in range. */\n      if (!detail::ZslValueLteMax(score, &range)) {\n        break;\n      } else {\n        count++;\n        detail::ZzlNext(zl, &eptr, &sptr);\n      }\n    }\n  } else {\n    CHECK_EQ(unsigned(OBJ_ENCODING_SKIPLIST), pv.Encoding());\n    detail::SortedMap* zs = (detail::SortedMap*)pv.RObjPtr();\n    count = zs->Count(range);\n  }\n\n  return count;\n}\n\nOpResult<unsigned> OpLexCount(const OpArgs& op_args, string_view key,\n                              const ZSetFamily::LexInterval& interval) {\n  auto res_it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_ZSET);\n  if (!res_it)\n    return res_it.status();\n\n  zlexrangespec range = GetLexRange(false, interval);\n  unsigned count = 0;\n\n  auto& pv = res_it.value()->second;\n  if (IsListPack(pv)) {\n    uint8_t* zl = (uint8_t*)pv.RObjPtr();\n    uint8_t *eptr, *sptr;\n\n    /* Use the first element in range as the starting point */\n    eptr = detail::ZzlFirstInLexRange(zl, &range);\n\n    if (eptr) {\n      /* First element is in range */\n      sptr = lpNext(zl, eptr);\n      DCHECK(detail::ZzlLexValueLteMax(eptr, &range));\n\n      /* Iterate over elements in range */\n      while (eptr) {\n        /* Abort when the node is no longer in range. */\n        if (!detail::ZzlLexValueLteMax(eptr, &range)) {\n          break;\n        } else {\n          count++;\n          detail::ZzlNext(zl, &eptr, &sptr);\n        }\n      }\n    }\n  } else {\n    DCHECK_EQ(OBJ_ENCODING_SKIPLIST, pv.Encoding());\n    detail::SortedMap* zs = (detail::SortedMap*)pv.RObjPtr();\n    count = zs->LexCount(range);\n  }\n\n  detail::ZslFreeLexRange(&range);\n  return count;\n}\n\nOpResult<unsigned> OpRem(const OpArgs& op_args, string_view key, const facade::ArgRange& members) {\n  auto& db_slice = op_args.GetDbSlice();\n  auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_ZSET);\n  if (!res_it)\n    return res_it.status();\n\n  auto& pv = res_it->it->second;\n  unsigned deleted = 0;\n  for (string_view member : members)\n    deleted += ZsetDel(&pv, member);\n\n  auto zlen = pv.Size();\n  res_it->post_updater.Run();\n\n  if (zlen == 0) {\n    op_args.GetDbSlice().Del(op_args.db_cntx, res_it->it);\n  }\n\n  return deleted;\n}\n\nOpResult<MScoreResponse> OpMScore(const OpArgs& op_args, string_view key,\n                                  const facade::ArgRange& members) {\n  auto res_it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_ZSET);\n\n  if (res_it.status() == OpStatus::KEY_NOTFOUND) {\n    // If the key doesn't exist return an array of NIL values\n    MScoreResponse result(members.Size(), std::nullopt);\n    return result;\n  }\n\n  if (!res_it)\n    return res_it.status();\n\n  MScoreResponse scores(members.Size());\n\n  auto& pv = res_it.value()->second;\n  size_t i = 0;\n  for (string_view member : members.Range())\n    scores[i++] = GetZsetScore(pv, member);\n\n  return scores;\n}\n\nOpResult<StringVec> OpScan(const OpArgs& op_args, std::string_view key, uint64_t* cursor,\n                           const ScanOpts& scan_op) {\n  auto find_res = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_ZSET);\n\n  if (!find_res) {\n    *cursor = 0;\n    return find_res.status();\n  }\n\n  const PrimeValue& pv = (*find_res)->second;\n  StringVec res;\n  char buf[128];\n\n  if (IsListPack(pv)) {\n    ZSetFamily::RangeParams params;\n    params.with_scores = true;\n    IntervalVisitor iv{Action::RANGE, params, const_cast<PrimeValue*>(&pv)};\n\n    iv(ZSetFamily::IndexInterval{0, kuint32max});\n    ScoredArray arr = iv.PopResult();\n\n    for (size_t i = 0; i < arr.size(); ++i) {\n      if (!scan_op.Matches(arr[i].first)) {\n        continue;\n      }\n      res.emplace_back(std::move(arr[i].first));\n      char* str = RedisReplyBuilder::FormatDouble(arr[i].second, buf, sizeof(buf));\n      res.emplace_back(str);\n    }\n    *cursor = 0;\n  } else {\n    CHECK_EQ(unsigned(OBJ_ENCODING_SKIPLIST), pv.Encoding());\n    uint32_t count = scan_op.limit;\n    detail::SortedMap* sm = (detail::SortedMap*)pv.RObjPtr();\n    long maxiterations = count * 10;\n    uint64_t cur = *cursor;\n\n    auto cb = [&](string_view str, double score) {\n      if (scan_op.Matches(str)) {\n        res.emplace_back(str);\n        char* str = RedisReplyBuilder::FormatDouble(score, buf, sizeof(buf));\n        res.emplace_back(str);\n      }\n    };\n    do {\n      cur = sm->Scan(cur, cb);\n    } while (cur && maxiterations-- && res.size() < count);\n    *cursor = cur;\n  }\n\n  return res;\n}\n\nOpResult<ScoredArray> OpRandMember(int count, const ZSetFamily::RangeParams& params,\n                                   const OpArgs& op_args, string_view key) {\n  auto it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_ZSET);\n  if (!it)\n    return it.status();\n\n  // Action::RANGE is a read-only operation, but requires const_cast\n  PrimeValue& pv = const_cast<PrimeValue&>(it.value()->second);\n\n  const std::size_t size = pv.Size();\n  const std::size_t picks_count =\n      count >= 0 ? std::min(static_cast<std::size_t>(count), size) : std::abs(count);\n\n  ScoredArray result{picks_count};\n  std::unique_ptr<PicksGenerator> generator =\n      count >= 0 ? static_cast<std::unique_ptr<PicksGenerator>>(\n                       std::make_unique<UniquePicksGenerator>(picks_count, size))\n                 : std::make_unique<NonUniquePicksGenerator>(size);\n\n  if (picks_count * static_cast<std::uint64_t>(std::log2(size)) < size) {\n    for (std::size_t i = 0; i < picks_count; i++) {\n      const std::size_t picked_index = generator->Generate();\n\n      IntervalVisitor iv{Action::RANGE, params, &pv};\n      iv(ZSetFamily::IndexInterval{picked_index, picked_index});\n\n      result[i] = iv.PopResult().front();\n    }\n  } else {\n    IntervalVisitor iv{Action::RANGE, params, &pv};\n    iv(ZSetFamily::IndexInterval{0, -1});\n\n    ScoredArray all_elements = iv.PopResult();\n\n    for (std::size_t i = 0; i < picks_count; i++) {\n      result[i] = all_elements[generator->Generate()];\n    }\n  }\n\n  return result;\n}\n\n// Boolean operation: union or intersection, optionally storing output to destination key\nvoid ZBooleanOperation(CmdArgList args, string_view cmd, bool is_union, bool store,\n                       CommandContext* cmd_cntx) {\n  auto shard_func = is_union ? OpUnion : OpInter;\n  auto merge_func = is_union ? UnionScoredMap : InterScoredMap;\n\n  string_view dest_key = ArgS(args, 0);\n  OpResult<SetOpArgs> op_args = ParseSetOpArgs(args, store);\n  if (!op_args) {\n    switch (op_args.status()) {\n      case OpStatus::INVALID_FLOAT:\n        return cmd_cntx->SendError(\"weight value is not a float\", kSyntaxErrType);\n      default:\n        return cmd_cntx->SendError(op_args.status());\n    }\n  }\n  if (op_args->num_keys == 0) {\n    return cmd_cntx->SendError(absl::StrCat(\"at least 1 input key is needed for \", cmd));\n  }\n  Transaction* tx = cmd_cntx->tx();\n  vector<OpResult<ScoredMap>> maps(shard_set->size(), OpStatus::SKIPPED);\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    maps[shard->shard_id()] =\n        shard_func(shard, t, dest_key, op_args->agg_type, op_args->weights, store);\n    return OpStatus::OK;\n  };\n  tx->Execute(cb, !store /* if we don't store, conclude */);\n\n  // Merge results from all shards\n  ScoredMap result;\n  for (auto& op_res : maps) {\n    if (op_res.status() == OpStatus::SKIPPED)\n      continue;\n    if (!op_res) {\n      if (store) {\n        tx->Conclude();\n      }\n      return cmd_cntx->SendError(op_res.status());\n    }\n\n    if (result.empty())\n      result = std::move(op_res.value());\n    else\n      merge_func(&result, &op_res.value(), op_args->agg_type);\n\n    if (result.empty() && !is_union)  // intersection only shrinks\n      break;\n  }\n\n  // Copy to vector for sorting\n  vector<ScoredMemberView> smvec(result.size());\n  size_t i = 0;\n  for (const auto& [str, score] : result)\n    smvec[i++] = {score, str};\n\n  SinkReplyBuilder* builder = cmd_cntx->rb();\n  if (store) {\n    // TODO: Use variant collection to avoid smvec copy for store operation\n    auto store_cb = [&, dest_shard = Shard(dest_key, maps.size())](Transaction* t,\n                                                                   EngineShard* shard) {\n      if (shard->shard_id() == dest_shard)\n        ZSetFamily::OpAdd(t->GetOpArgs(shard),\n                          ZSetFamily::ZParams{.override = true, .journal_update = true}, dest_key,\n                          smvec);\n      return OpStatus::OK;\n    };\n    tx->Execute(store_cb, true);\n    builder->SendLong(smvec.size());\n  } else {\n    std::sort(std::begin(smvec), std::end(smvec));\n\n    // We can't use SendScoredArray because it expects strings, not string_views\n    // TOOD: Not longer relevant with new io, use scoping\n    auto* rb = static_cast<RedisReplyBuilder*>(builder);\n    rb->StartArray(smvec.size() * (op_args->with_scores ? 2 : 1));\n    for (const auto& elem : smvec) {\n      rb->SendBulkString(elem.second);\n      if (op_args->with_scores) {\n        rb->SendDouble(elem.first);\n      }\n    }\n  }\n}\n\nenum class FilterShards : uint8_t { NO = 0, YES = 1 };\n\nOpResult<ScoredArray> ZPopMinMaxInternal(std::string_view key, FilterShards should_filter_shards,\n                                         uint32 count, bool reverse, Transaction* tx) {\n  ZSetFamily::RangeParams range_params;\n  range_params.reverse = reverse;\n  range_params.with_scores = true;\n  ZSetFamily::ZRangeSpec range_spec;\n  range_spec.params = range_params;\n\n  range_spec.interval = count;\n\n  OpResult<ScoredArray> result;\n\n  std::optional<ShardId> key_shard;\n  if (should_filter_shards == FilterShards::YES) {\n    key_shard = Shard(key, shard_set->size());\n  }\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    if (!key_shard.has_value() || *key_shard == shard->shard_id()) {\n      result = OpPopCount(range_spec, t->GetOpArgs(shard), key);\n    }\n    return OpStatus::OK;\n  };\n\n  tx->Execute(std::move(cb), true);\n\n  return result;\n}\n\nvoid ZPopMinMaxFromArgs(CmdArgList args, bool reverse, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  uint32 count = 1;\n  if (args.size() > 1) {\n    string_view count_str = ArgS(args, 1);\n    if (!SimpleAtoi(count_str, &count)) {\n      return cmd_cntx->SendError(kUintErr);\n    }\n  }\n\n  OutputScoredArrayResult(ZPopMinMaxInternal(key, FilterShards::NO, count, reverse, cmd_cntx->tx()),\n                          cmd_cntx->rb());\n}\n\nvoid ZRangeInternal(CmdArgList args, ZSetFamily::RangeParams range_params,\n                    CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view min_s = ArgS(args, 1);\n  string_view max_s = ArgS(args, 2);\n\n  ZSetFamily::ZRangeSpec range_spec;\n  range_spec.params = range_params;\n  using RP = ZSetFamily::RangeParams;\n\n  switch (range_params.interval_type) {\n    case RP::IntervalType::SCORE: {\n      ZSetFamily::ScoreInterval si;\n      if (!ParseBound(min_s, &si.first) || !ParseBound(max_s, &si.second)) {\n        return cmd_cntx->SendError(kFloatRangeErr);\n      }\n      range_spec.interval = si;\n      break;\n    }\n    case RP::IntervalType::LEX: {\n      ZSetFamily::LexInterval li;\n      if (!ParseLexBound(min_s, &li.first) || !ParseLexBound(max_s, &li.second)) {\n        return cmd_cntx->SendError(kLexRangeErr);\n      }\n      range_spec.interval = li;\n      break;\n    }\n    case RP::IntervalType::RANK: {\n      ZSetFamily::IndexInterval ii;\n      if (!SimpleAtoi(min_s, &ii.first) || !SimpleAtoi(max_s, &ii.second)) {\n        cmd_cntx->SendError(kInvalidIntErr);\n        return;\n      }\n      range_spec.interval = ii;\n      break;\n    }\n  }\n\n  OpResult<ScoredArray> range_result;\n  ShardId src_shard = Shard(key, shard_set->size());\n  auto range_cb = [&](Transaction* t, EngineShard* shard) {\n    if (shard->shard_id() != src_shard) {\n      // Only run ZRANGE on the source shard.\n      return OpStatus::OK;\n    }\n    range_result = OpRange(range_spec, t->GetOpArgs(shard), key);\n    return OpStatus::OK;\n  };\n\n  auto* tx = cmd_cntx->tx();\n  // Don't conclude the transaction if we're storing the result.\n  tx->Execute(std::move(range_cb), !range_params.store_key);\n\n  if (range_result.status() == OpStatus::WRONG_TYPE) {\n    if (range_params.store_key) {\n      tx->Conclude();\n    }\n    return cmd_cntx->SendError(kWrongTypeErr);\n  }\n  LOG_IF(WARNING, !range_result && range_result.status() != OpStatus::KEY_NOTFOUND)\n      << \"Unexpected status \" << range_result.status();\n\n  if (!range_params.store_key) {\n    auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n    rb->SendScoredArray(range_result.value(), range_params.with_scores);\n    return;\n  }\n\n  OpResult<ZSetFamily::AddResult> add_result;\n  ShardId dest_shard = Shard(*range_params.store_key, shard_set->size());\n  auto add_cb = [&](Transaction* t, EngineShard* shard) {\n    if (shard->shard_id() != dest_shard) {\n      // Only write the result on the target shard.\n      return OpStatus::OK;\n    }\n\n    std::vector<ScoredMemberView> mvec(range_result->size());\n    size_t i = 0;\n    for (const auto& [str, score] : *range_result) {\n      mvec[i++] = {score, str};\n    }\n\n    add_result = ZSetFamily::OpAdd(t->GetOpArgs(shard),\n                                   ZSetFamily::ZParams{.override = true, .journal_update = true},\n                                   *range_params.store_key, mvec);\n\n    return OpStatus::OK;\n  };\n  tx->Execute(std::move(add_cb), true);\n\n  if (add_result.status() == OpStatus::OUT_OF_MEMORY) {\n    return cmd_cntx->SendError(add_result.status());\n  }\n  LOG_IF(WARNING, !add_result) << \"Unexpected status \" << add_result.status();\n\n  return cmd_cntx->SendLong(range_result->size());\n}\n\nvoid ZRangeGeneric(CmdArgList args, ZSetFamily::RangeParams range_params,\n                   CommandContext* cmd_cntx) {\n  facade::CmdArgParser parser{args.subspan(3)};\n  using RP = ZSetFamily::RangeParams;\n\n  while (true) {\n    RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n    if (!parser.HasNext())\n      break;\n\n    if (parser.Check(\"BYSCORE\")) {\n      if (exchange(range_params.interval_type, RP::SCORE) == RP::LEX)\n        return cmd_cntx->SendError(\"BYSCORE and BYLEX options are not compatible\");\n      continue;\n    }\n\n    if (parser.Check(\"BYLEX\")) {\n      if (exchange(range_params.interval_type, RP::LEX) == RP::SCORE)\n        return cmd_cntx->SendError(\"BYSCORE and BYLEX options are not compatible\");\n      continue;\n    }\n    if (parser.Check(\"REV\")) {\n      range_params.reverse = true;\n      continue;\n    }\n    if (parser.Check(\"WITHSCORES\")) {\n      range_params.with_scores = true;\n      continue;\n    }\n\n    if (parser.Check(\"LIMIT\")) {\n      auto [offset, limit] = parser.Next<int32_t, int32_t>();\n\n      range_params.limit = limit < 0 ? UINT32_MAX : static_cast<uint32_t>(limit);\n      range_params.offset = offset < 0 ? UINT32_MAX : static_cast<uint32_t>(offset);\n      continue;\n    }\n\n    return cmd_cntx->SendError(absl::StrCat(\"unsupported option \", parser.Peek()));\n  }\n\n  if (range_params.offset == UINT32_MAX) {\n    auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n    return rb->SendEmptyArray();\n  }\n\n  ZRangeInternal(args.subspan(0, 3), range_params, cmd_cntx);\n}\n\nvoid ZRankGeneric(CmdArgList args, bool reverse, CommandContext* cmd_cntx) {\n  // send this error exact as redis does, it checks number of arguments first\n  if (args.size() > 3) {\n    return cmd_cntx->SendError(WrongNumArgsError(reverse ? \"ZREVRANK\" : \"ZRANK\"));\n  }\n\n  facade::CmdArgParser parser(args);\n\n  string_view key = parser.Next();\n  string_view member = parser.Next();\n  bool with_score = false;\n\n  if (parser.HasNext()) {\n    parser.ExpectTag(\"WITHSCORE\");\n    with_score = true;\n  }\n\n  if (!parser.Finalize()) {\n    return cmd_cntx->SendError(parser.TakeError().MakeReply());\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpRank(t->GetOpArgs(shard), key, member, reverse, with_score);\n  };\n\n  OpResult<RankResult> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  if (result) {\n    if (with_score) {\n      rb->StartArray(2);\n      rb->SendLong(result->rank);\n      rb->SendDouble(result->score);\n    } else {\n      rb->SendLong(result->rank);\n    }\n  } else if (result.status() == OpStatus::KEY_NOTFOUND) {\n    rb->SendNull();\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid ZRemRangeGeneric(string_view key, const ZSetFamily::ZRangeSpec& range_spec,\n                      CommandContext* cmd_cntx) {\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpRemRange(t->GetOpArgs(shard), key, range_spec);\n  };\n\n  OpResult<unsigned> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result.status() == OpStatus::WRONG_TYPE) {\n    cmd_cntx->SendError(kWrongTypeErr);\n  } else {\n    cmd_cntx->SendLong(*result);\n  }\n}\n\n// Returns the key of the first non empty set found in the list of shard arguments.\n// Returns nullopt if none.\nstd::optional<std::string_view> GetFirstNonEmptyKeyFound(EngineShard* shard, Transaction* t) {\n  ShardArgs keys = t->GetShardArgs(shard->shard_id());\n  DCHECK(!keys.Empty());\n\n  auto& db_slice = t->GetDbSlice(shard->shard_id());\n\n  for (string_view key : keys) {\n    auto it = db_slice.FindReadOnly(t->GetDbContext(), key, OBJ_ZSET);\n    if (!it) {\n      continue;\n    }\n    return std::optional<std::string_view>(key);\n  }\n\n  return std::nullopt;\n}\n\n// Validates the ZMPop and BZMPop command arguments and extracts the values to the output params.\n// If the arguments are invalid sends the appropiate error to builder and returns false.\nbool ValidateZMPopCommand(CmdArgList args, bool is_blocking, CommandContext* cmd_cntx,\n                          ValidateZMPopResult* result) {\n  CmdArgParser parser{args};\n\n  if (is_blocking) {\n    if (!absl::SimpleAtof(parser.Next(), &result->timeout)) {\n      cmd_cntx->SendError(\"timeout is not a float or out of range\");\n      return false;\n    }\n    if (result->timeout < 0) {\n      cmd_cntx->SendError(\"timeout is negative\");\n      return false;\n    }\n  }\n\n  if (!SimpleAtoi(parser.Next(), &(result->num_keys))) {\n    cmd_cntx->SendError(kUintErr);\n    return false;\n  }\n\n  if (result->num_keys <= 0 || !parser.HasAtLeast(result->num_keys + 1)) {\n    // We should have at least num_keys keys + a MIN/MAX arg.\n    cmd_cntx->SendError(kSyntaxErr);\n    return false;\n  }\n  // Skip over the keys themselves.\n  parser.Skip(result->num_keys);\n\n  // We know we have at least one more arg (we checked above).\n  if (parser.Check(\"MAX\")) {\n    result->is_max = true;\n  } else if (parser.Check(\"MIN\")) {\n    result->is_max = false;\n  } else {\n    cmd_cntx->SendError(kSyntaxErr);\n    return false;\n  }\n\n  result->pop_count = 1;\n  // Check if we have additional COUNT argument.\n  if (parser.HasNext()) {\n    if (!parser.Check(\"COUNT\", &result->pop_count)) {\n      cmd_cntx->SendError(kSyntaxErr);\n      return false;\n    }\n  }\n\n  if (!parser.Finalize()) {\n    cmd_cntx->SendError(parser.TakeError().MakeReply());\n    return false;\n  }\n\n  return true;\n}\n\n}  // namespace\n\nvoid ZSetFamily::ZAddGeneric(string_view key, const ZParams& zparams, ScoredMemberSpan memb_sp,\n                             CommandContext* cmd_cntx) {\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return ZSetFamily::OpAdd(t->GetOpArgs(shard), zparams, key, memb_sp);\n  };\n\n  OpResult<AddResult> add_result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (base::_in(add_result.status(), {OpStatus::WRONG_TYPE, OpStatus::OUT_OF_MEMORY})) {\n    return cmd_cntx->SendError(add_result.status());\n  }\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  // KEY_NOTFOUND may happen in case of XX flag.\n  if (add_result.status() == OpStatus::KEY_NOTFOUND) {\n    if (zparams.flags & ZADD_IN_INCR)\n      rb->SendNull();\n    else\n      rb->SendLong(0);\n  } else if (add_result.status() == OpStatus::SKIPPED) {\n    rb->SendNull();\n  } else if (add_result->is_nan) {\n    cmd_cntx->SendError(kScoreNaN);\n  } else {\n    if (zparams.flags & ZADD_IN_INCR) {\n      rb->SendDouble(add_result->new_score);\n    } else {\n      rb->SendLong(add_result->num_updated);\n    }\n  }\n}\n\nOpResult<MScoreResponse> ZSetFamily::ZGetMembers(CmdArgList args, Transaction* tx,\n                                                 SinkReplyBuilder* builder) {\n  string_view key = ArgS(args, 0);\n  auto members = args.subspan(1);\n  auto cb = [key, members](Transaction* t, EngineShard* shard) {\n    return OpMScore(t->GetOpArgs(shard), key, members);\n  };\n\n  return tx->ScheduleSingleHopT(std::move(cb));\n}\n\nauto ZSetFamily::OpRanges(const std::vector<ZSetFamily::ZRangeSpec>& range_specs,\n                          const OpArgs& op_args, string_view key) -> OpResult<vector<ScoredArray>> {\n  auto res_it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_ZSET);\n  if (!res_it)\n    return res_it.status();\n\n  // Action::RANGE is read-only, but requires mutable pointer, thus const_cast\n  PrimeValue& pv = const_cast<PrimeValue&>(res_it.value()->second);\n  vector<ScoredArray> result_arrays;\n  for (auto& range_spec : range_specs) {\n    IntervalVisitor iv{Action::RANGE, range_spec.params, &pv};\n    std::visit(iv, range_spec.interval);\n    result_arrays.push_back(iv.PopResult());\n  }\n\n  return result_arrays;\n}\n\nOpResult<ZSetFamily::AddResult> ZSetFamily::OpAdd(const OpArgs& op_args,\n                                                  const ZSetFamily::ZParams& zparams,\n                                                  string_view key, ScoredMemberSpan members) {\n  DCHECK(!members.empty() || zparams.override);\n  auto& db_slice = op_args.GetDbSlice();\n\n  if (zparams.override && members.empty()) {\n    auto res_it = db_slice.FindMutable(op_args.db_cntx, key, OBJ_ZSET);\n    if (res_it && IsValid(res_it->it)) {\n      db_slice.DelMutable(op_args.db_cntx, std::move(*res_it));\n      if (zparams.journal_update && op_args.shard->journal()) {\n        RecordJournal(op_args, \"DEL\"sv, ArgSlice{key});\n      }\n    }\n    return OpStatus::OK;\n  }\n\n  // When we have too many members to add, make sure field_len is large enough to use\n  // skiplist encoding.\n  size_t field_len =\n      members.size() > ZSET_MAX_LISTPACK_ENTRIES ? UINT32_MAX : members.front().second.size();\n  auto res_it = PrepareZEntry(zparams, op_args, key, field_len);\n\n  if (!res_it)\n    return res_it.status();\n\n  unsigned added = 0;\n  unsigned updated = 0;\n\n  double new_score = 0;\n  int retflags = 0;\n\n  OpStatus op_status = OpStatus::OK;\n  AddResult aresult;\n  auto& pv = res_it->it->second;\n  bool is_list_pack = IsListPack(pv);\n\n  // opportunistically reserve space if multiple entries are about to be added.\n  if ((zparams.flags & ZADD_IN_XX) == 0 && members.size() > 2) {\n    if (is_list_pack) {\n      uint8_t* zl = (uint8_t*)pv.RObjPtr();\n      size_t malloc_reserved = zmalloc_size(zl);\n      size_t min_sz = EstimateListpackMinBytes(members);\n      if (min_sz > malloc_reserved) {\n        zl = (uint8_t*)zrealloc(zl, min_sz);\n        pv.SetRObjPtr(zl);\n      }\n    } else {\n      detail::SortedMap* sm = (detail::SortedMap*)pv.RObjPtr();\n      sm->Reserve(members.size());\n    }\n  }\n\n  for (size_t j = 0; j < members.size(); j++) {\n    const auto& m = members[j];\n    int retval = ZsetAdd(&pv, m.first, m.second, zparams.flags, &retflags, &new_score);\n\n    if (zparams.flags & ZADD_IN_INCR) {\n      if (retval == 0) {\n        CHECK_EQ(1u, members.size());\n\n        aresult.is_nan = true;\n        break;\n      }\n\n      if (retflags & ZADD_OUT_NOP) {\n        op_status = OpStatus::SKIPPED;\n      }\n    }\n\n    if (retflags & ZADD_OUT_ADDED)\n      added++;\n    if (retflags & ZADD_OUT_UPDATED)\n      updated++;\n  }\n\n  if (zparams.flags & ZADD_IN_INCR) {\n    aresult.new_score = new_score;\n  } else {\n    aresult.num_updated = zparams.ch ? added + updated : added;\n  }\n\n  if (op_status != OpStatus::OK)\n    return op_status;\n\n  // TODO: consider optimization to record real command if the replica is in stable_sync state\n  // and there is no slot migration process going on.\n  if (zparams.journal_update && op_args.shard->journal()) {\n    if (zparams.override) {\n      RecordJournal(op_args, \"DEL\"sv, ArgSlice{key});\n    }\n\n    vector<string> scores;\n    vector<string_view> mapped;\n    scores.reserve(members.size());\n    mapped.reserve(members.size() * 2 + 1);\n    mapped.push_back(key);\n    for (const auto& [score, member] : members) {\n      scores.push_back(absl::StrCat(score));\n      mapped.push_back(scores.back());\n      mapped.push_back(member);\n    }\n    RecordJournal(op_args, \"ZADD\"sv, mapped);\n  }\n  return aresult;\n}\n\nOpResult<void> ZSetFamily::OpKeyExisted(const OpArgs& op_args, string_view key) {\n  auto res_it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_ZSET);\n  return res_it.status();\n}\n\nOpResult<double> ZSetFamily::OpScore(const OpArgs& op_args, string_view key, string_view member) {\n  auto res_it = op_args.GetDbSlice().FindReadOnly(op_args.db_cntx, key, OBJ_ZSET);\n  if (!res_it)\n    return res_it.status();\n\n  const PrimeValue& pv = res_it.value()->second;\n  auto res = GetZsetScore(pv, member);\n  if (!res) {\n    return OpStatus::MEMBER_NOTFOUND;\n  }\n  return *res;\n}\n\nnamespace {\n\nvoid CmdBZPopMin(CmdArgList args, CommandContext* cmd_cntx) {\n  BZPopMinMax(args, false, cmd_cntx);\n}\n\nvoid CmdBZPopMax(CmdArgList args, CommandContext* cmd_cntx) {\n  BZPopMinMax(args, true, cmd_cntx);\n}\n\nvoid CmdZAdd(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  ZSetFamily::ZParams zparams;\n  size_t i = 1;\n  for (; i < args.size() - 1; ++i) {\n    string cur_arg = absl::AsciiStrToUpper(ArgS(args, i));\n\n    if (cur_arg == \"XX\") {\n      zparams.flags |= ZADD_IN_XX;  // update only\n    } else if (cur_arg == \"NX\") {\n      zparams.flags |= ZADD_IN_NX;  // add new only.\n    } else if (cur_arg == \"GT\") {\n      zparams.flags |= ZADD_IN_GT;\n    } else if (cur_arg == \"LT\") {\n      zparams.flags |= ZADD_IN_LT;\n    } else if (cur_arg == \"CH\") {\n      zparams.ch = true;\n    } else if (cur_arg == \"INCR\") {\n      zparams.flags |= ZADD_IN_INCR;\n    } else {\n      break;\n    }\n  }\n\n  auto* builder = cmd_cntx->rb();\n  if ((args.size() - i) % 2 != 0) {\n    builder->SendError(kSyntaxErr);\n    return;\n  }\n\n  if ((zparams.flags & ZADD_IN_INCR) && (i + 2 < args.size())) {\n    builder->SendError(\"INCR option supports a single increment-element pair\");\n    return;\n  }\n\n  unsigned insert_mask = zparams.flags & (ZADD_IN_NX | ZADD_IN_XX);\n  if (insert_mask == (ZADD_IN_NX | ZADD_IN_XX)) {\n    builder->SendError(kNxXxErr);\n    return;\n  }\n\n  constexpr auto kRangeOpt = ZADD_IN_GT | ZADD_IN_LT;\n  if (((zparams.flags & ZADD_IN_NX) && (zparams.flags & kRangeOpt)) ||\n      ((zparams.flags & kRangeOpt) == kRangeOpt)) {\n    builder->SendError(\"GT, LT, and/or NX options at the same time are not compatible\");\n    return;\n  }\n\n  absl::flat_hash_set<string_view> members_set;\n  absl::InlinedVector<ScoredMemberView, 4> members;\n\n  unsigned num_members = (args.size() - i) / 2;\n\n  // We sort the fields if the expected encoding could be listpack.\n  bool to_sort_fields = false;\n\n  if (num_members > 2) {\n    members.reserve(num_members);\n\n    members_set.reserve(num_members);\n    to_sort_fields = true;\n  }\n\n  for (; i < args.size(); i += 2) {\n    string_view cur_arg = ArgS(args, i);\n    double val = 0;\n\n    // Parse the score. Treats Nan as invalid double.\n    if (!ParseDouble(cur_arg, &val)) {\n      VLOG(1) << \"Bad score:\" << cur_arg << \"|\";\n      return builder->SendError(kInvalidFloatErr);\n    }\n\n    string_view member = ArgS(args, i + 1);\n    if (to_sort_fields) {\n      auto [_, inserted] = members_set.insert(member);\n      to_sort_fields &= inserted;\n    }\n    members.emplace_back(val, member);\n  }\n  DCHECK(cmd_cntx->tx());\n\n  if (to_sort_fields) {\n    if (num_members == 2) {  // fix unique_members for this special case.\n      if (members[0].second == members[1].second) {\n        to_sort_fields = false;\n      }\n    }\n    if (to_sort_fields) {\n      std::sort(members.begin(), members.end());\n    }\n  }\n\n  absl::Span memb_sp{members.data(), members.size()};\n  ZSetFamily::ZAddGeneric(key, zparams, memb_sp, cmd_cntx);\n}\n\nvoid CmdZCard(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) -> OpResult<uint32_t> {\n    auto find_res = t->GetDbSlice(shard->shard_id()).FindReadOnly(t->GetDbContext(), key, OBJ_ZSET);\n    if (!find_res) {\n      return find_res.status();\n    }\n\n    return find_res.value()->second.Size();\n  };\n\n  OpResult<uint32_t> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result.status() == OpStatus::WRONG_TYPE) {\n    cmd_cntx->SendError(kWrongTypeErr);\n    return;\n  }\n\n  cmd_cntx->SendLong(result.value());\n}\n\nvoid CmdZCount(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  string_view min_s = ArgS(args, 1);\n  string_view max_s = ArgS(args, 2);\n\n  ZSetFamily::ScoreInterval si;\n  if (!ParseBound(min_s, &si.first) || !ParseBound(max_s, &si.second)) {\n    return cmd_cntx->SendError(kFloatRangeErr);\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpCount(t->GetOpArgs(shard), key, si);\n  };\n\n  OpResult<unsigned> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result.status() == OpStatus::WRONG_TYPE) {\n    cmd_cntx->SendError(kWrongTypeErr);\n  } else {\n    cmd_cntx->SendLong(*result);\n  }\n}\n\n/* Calculate difference between key set and all other sets. */\nvector<ScoredMemberView> ZDiffOp(ShardId key_sid, vector<OpResult<vector<ScoredMap>>> maps,\n                                 ScoredMap* result) {\n  auto& key_shard_map = maps[key_sid].value();\n\n  // Key set will be first element of shard ScoredMap vector. Scored map for shard containing key\n  // should have least one - key set. If it is empty we don't need anything and return\n  // immediately.\n  if (key_shard_map[0].empty()) {\n    return {};\n  }\n\n  // Store key set values in result and remove it from vector for further calculations.\n  *result = std::move(key_shard_map[0]);\n  key_shard_map.erase(key_shard_map.begin());\n\n  auto filter = [&result](const auto& key) mutable {\n    auto it = result->find(key);\n    if (it != result->end()) {\n      result->erase(it);\n    }\n  };\n\n  // Total O(L)\n  // Iterate over the results of each shard\n  for (auto& vsm : maps) {\n    // Iterate over each fetched set\n    for (auto& sm : vsm.value()) {\n      // Iterate over each key in the fetched set and filter\n      for (auto& [key, value] : sm) {\n        filter(key);\n      }\n    }\n  }\n\n  vector<ScoredMemberView> smvec;\n  for (const auto& elem : *result) {\n    smvec.emplace_back(elem.second, elem.first);\n  }\n\n  // Total O(KlogK)\n  std::sort(std::begin(smvec), std::end(smvec));\n\n  return smvec;\n}\n\nvoid CmdZDiff(CmdArgList args, CommandContext* cmd_cntx) {\n  vector<OpResult<vector<ScoredMap>>> maps(shard_set->size(), OpStatus::OK);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    maps[shard->shard_id()] = OpFetch(shard, t, false /* no destination key */);\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  // Check shard results for WRONG_TYPE returned\n  for (auto& sm_map : maps) {\n    if (sm_map.status() == OpStatus::WRONG_TYPE) {\n      cmd_cntx->SendError(sm_map.status());\n      return;\n    }\n  }\n\n  const string_view key = ArgS(args, 1);\n  const ShardId sid = Shard(key, shard_set->size());\n\n  // We need to have result stored and not be destructed before function ends because\n  // we are passing string_view of result members to other functions\n  ScoredMap result;\n  // Calculate diff between sets.\n  vector<ScoredMemberView> smvec = ZDiffOp(sid, std::move(maps), &result);\n\n  // Empty result set so return\n  if (smvec.empty()) {\n    rb->SendEmptyArray();\n    return;\n  }\n\n  const bool with_scores = absl::EqualsIgnoreCase(ArgS(args, args.size() - 1), \"WITHSCORES\");\n  bool is_resp3 = rb->IsResp3();\n  rb->StartArray(smvec.size() * ((with_scores && !is_resp3) ? 2 : 1));\n  for (const auto& [score, key] : smvec) {\n    if (is_resp3)\n      rb->StartArray(with_scores ? 2 : 1);\n    rb->SendBulkString(key);\n    if (with_scores) {\n      rb->SendDouble(score);\n    }\n  }\n}\n\nvoid CmdZDiffStore(CmdArgList args, CommandContext* cmd_cntx) {\n  vector<OpResult<vector<ScoredMap>>> maps(shard_set->size(), OpStatus::OK);\n  const string_view dest_key = ArgS(args, 0);\n  const ShardId dest_shard = Shard(dest_key, shard_set->size());\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    // We skip destkey if shard id matches\n    const bool skip_dest_key = shard->shard_id() == dest_shard;\n    maps[shard->shard_id()] = OpFetch(shard, t, skip_dest_key);\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(cb), false);\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  // Check shard results for WRONG_TYPE returned\n  for (auto& sm_map : maps) {\n    if (sm_map.status() == OpStatus::WRONG_TYPE) {\n      cmd_cntx->tx()->Conclude();\n      return cmd_cntx->SendError(sm_map.status());\n    }\n  }\n\n  const string_view key = ArgS(args, 2);\n  const ShardId sid = Shard(key, shard_set->size());\n\n  // We need to have result stored and not be destructed before function ends because\n  // we are passing string_view of result members to other functions\n  ScoredMap result;\n  // Calculate diff between sets. We stil need to write  destination key even it is empty set\n  vector<ScoredMemberView> smvec = ZDiffOp(sid, std::move(maps), &result);\n\n  auto store_cb = [&](Transaction* t, EngineShard* shard) {\n    if (shard->shard_id() == dest_shard)\n      ZSetFamily::OpAdd(t->GetOpArgs(shard),\n                        ZSetFamily::ZParams{.override = true, .journal_update = true}, dest_key,\n                        smvec);\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(store_cb, true);\n  rb->SendLong(smvec.size());\n}\n\nvoid CmdZIncrBy(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view score_arg = ArgS(args, 1);\n\n  ScoredMemberView scored_member;\n  scored_member.second = ArgS(args, 2);\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (!absl::SimpleAtod(score_arg, &scored_member.first)) {\n    VLOG(1) << \"Bad score:\" << score_arg << \"|\";\n    return rb->SendError(kInvalidFloatErr);\n  }\n\n  if (isnan(scored_member.first)) {\n    return rb->SendError(kScoreNaN);\n  }\n\n  ZSetFamily::ZParams zparams;\n  zparams.flags = ZADD_IN_INCR;\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return ZSetFamily::OpAdd(t->GetOpArgs(shard), zparams, key,\n                             ScoredMemberSpan{&scored_member, 1});\n  };\n\n  OpResult add_result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (add_result.status() == OpStatus::WRONG_TYPE) {\n    return rb->SendError(kWrongTypeErr);\n  }\n\n  if (add_result.status() == OpStatus::SKIPPED) {\n    return rb->SendNull();\n  }\n\n  if (add_result->is_nan) {\n    return rb->SendError(kScoreNaN);\n  }\n\n  rb->SendDouble(add_result->new_score);\n}\n\nvoid CmdZInter(CmdArgList args, CommandContext* cmd_cntx) {\n  ZBooleanOperation(args, \"zinter\", false, false, cmd_cntx);\n}\n\nvoid CmdZInterStore(CmdArgList args, CommandContext* cmd_cntx) {\n  ZBooleanOperation(args, \"zinterstore\", false, true, cmd_cntx);\n}\n\nvoid CmdZInterCard(CmdArgList args, CommandContext* cmd_cntx) {\n  unsigned num_keys;\n  auto* builder = cmd_cntx->rb();\n\n  if (!absl::SimpleAtoi(ArgS(args, 0), &num_keys)) {\n    return cmd_cntx->SendError(OpStatus::SYNTAX_ERR);\n  }\n\n  uint64_t limit = 0;\n  if (args.size() == (1 + num_keys + 2) && ArgS(args, 1 + num_keys) == \"LIMIT\") {\n    if (!absl::SimpleAtoi(ArgS(args, 1 + num_keys + 1), &limit)) {\n      return builder->SendError(\"limit value is not a positive integer\", kSyntaxErrType);\n    }\n  } else if (args.size() != 1 + num_keys) {\n    return builder->SendError(kSyntaxErr);\n  }\n\n  vector<OpResult<ScoredMap>> maps(shard_set->size(), OpStatus::SKIPPED);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    maps[shard->shard_id()] = OpInter(shard, t, \"\", AggType::NOOP, {}, false);\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->ScheduleSingleHop(std::move(cb));\n\n  OpResult<ScoredMap> result = IntersectResults(maps, AggType::NOOP);\n  if (!result)\n    return cmd_cntx->SendError(result.status());\n\n  if (0 < limit && limit < result.value().size()) {\n    return builder->SendLong(limit);\n  }\n  builder->SendLong(result.value().size());\n}\n\n// Generic function for ZMPop and BZMPop commands\nvoid ZMPopGeneric(CmdArgList args, CommandContext* cmd_cntx, bool is_blocking) {\n  ValidateZMPopResult zmpop_args;\n  if (!ValidateZMPopCommand(args, is_blocking, cmd_cntx, &zmpop_args)) {\n    return;\n  }\n  auto* response_builder = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  // From the list of input keys, keep the first (in the order of keys in the command) key found\n  // in the current shard.\n  std::vector<std::optional<std::string_view>> first_found_key_per_shard_vec(shard_set->size(),\n                                                                             std::nullopt);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    std::optional<std::string_view> result = GetFirstNonEmptyKeyFound(shard, t);\n    if (result.has_value()) {\n      first_found_key_per_shard_vec[shard->shard_id()] = result;\n    }\n    return OpStatus::OK;\n  };\n\n  cmd_cntx->tx()->Execute(std::move(cb), false /* possibly another hop */);\n\n  // Keep all the keys found (first only for each shard) in a set for fast lookups.\n  absl::flat_hash_set<std::string_view> first_found_keys_for_shard;\n  // We can have at most one result from each shard.\n  first_found_keys_for_shard.reserve(std::min(shard_set->size(), zmpop_args.num_keys));\n  for (const auto& key : first_found_key_per_shard_vec) {\n    if (!key.has_value()) {\n      continue;\n    }\n    first_found_keys_for_shard.insert(*key);\n  }\n\n  // Now that we have the first non empty key from each shard, find the first overall first key\n  // and pop elements from it.\n  std::optional<std::string_view> key_to_pop = std::nullopt;\n  // BZMPOP have 1 extra argument as compared to ZMPOP hence adding 1 is is_blocking is true\n  ArgRange arg_keys(args.subspan(1 + is_blocking, zmpop_args.num_keys));\n  // Find the first arg_key which exists in any shard and is not empty.\n  for (std::string_view key : arg_keys) {\n    if (first_found_keys_for_shard.contains(key)) {\n      key_to_pop = key;\n      break;\n    }\n  }\n\n  if (!key_to_pop.has_value() && (!is_blocking || cmd_cntx->tx()->IsMulti())) {\n    cmd_cntx->tx()->Conclude();\n    response_builder->SendNull();\n    return;\n  }\n  // if we don't have any key to pop and it's blocking then we will block it using `WaitOnWatch`\n  if (is_blocking && !key_to_pop.has_value()) {\n    auto trans = cmd_cntx->tx();\n    auto* cntx = cmd_cntx->server_conn_cntx();\n    auto* ns = &trans->GetNamespace();\n\n    auto limit_tp = Transaction::time_point::max();\n    auto limit_ms = (unsigned)(zmpop_args.timeout * 1000);\n    if (limit_ms > 0) {\n      using namespace std::chrono;\n      limit_tp = steady_clock::now() + milliseconds(limit_ms);\n    }\n    const auto key_checker = [ns](EngineShard* owner, const DbContext& context, Transaction*,\n                                  std::string_view key) -> bool {\n      return ns->GetDbSlice(owner->shard_id()).FindReadOnly(context, key, OBJ_ZSET).ok();\n    };\n\n    DCHECK(trans->IsScheduled());  // Checking if the transaction is scheduled before calling\n                                   // `WaitOnWatch`\n    auto status = trans->WaitOnWatch(limit_tp, Transaction::kShardArgs, key_checker, &cntx->blocked,\n                                     &cntx->paused);\n\n    if (status != OpStatus::OK) {\n      response_builder->SendNull();\n      return;\n    }\n\n    auto cb = [&key_to_pop](Transaction* t, EngineShard* shard) {\n      if (auto wake_key = t->GetWakeKey(shard->shard_id()); wake_key) {\n        key_to_pop = *wake_key;\n      }\n      return OpStatus::OK;\n    };\n    trans->Execute(std::move(cb), false);\n  }\n\n  DCHECK(key_to_pop.has_value());\n\n  // Pop elements from relevant set.\n  OpResult<ScoredArray> pop_result = ZPopMinMaxInternal(\n      *key_to_pop, FilterShards::YES, zmpop_args.pop_count, zmpop_args.is_max, cmd_cntx->tx());\n\n  if (pop_result.status() == OpStatus::WRONG_TYPE) {\n    return response_builder->SendError(kWrongTypeErr);\n  }\n\n  LOG_IF(WARNING, !pop_result) << \"Unexpected status \" << pop_result.status();\n  response_builder->SendLabeledScoredArray(*key_to_pop, pop_result.value());\n}\n\nvoid CmdZMPop(CmdArgList args, CommandContext* cmd_cntx) {\n  ZMPopGeneric(args, cmd_cntx, false);\n}\n\nvoid CmdBZMPop(CmdArgList args, CommandContext* cmd_cntx) {\n  ZMPopGeneric(args, cmd_cntx, true);\n}\n\nvoid CmdZPopMax(CmdArgList args, CommandContext* cmd_cntx) {\n  ZPopMinMaxFromArgs(args, true, cmd_cntx);\n}\n\nvoid CmdZPopMin(CmdArgList args, CommandContext* cmd_cntx) {\n  ZPopMinMaxFromArgs(args, false, cmd_cntx);\n}\n\nvoid CmdZLexCount(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n\n  string_view min_s = ArgS(args, 1);\n  string_view max_s = ArgS(args, 2);\n\n  ZSetFamily::LexInterval li;\n  if (!ParseLexBound(min_s, &li.first) || !ParseLexBound(max_s, &li.second)) {\n    return cmd_cntx->SendError(kLexRangeErr);\n  }\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpLexCount(t->GetOpArgs(shard), key, li);\n  };\n\n  OpResult<unsigned> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result.status() == OpStatus::WRONG_TYPE) {\n    cmd_cntx->SendError(kWrongTypeErr);\n  } else {\n    cmd_cntx->SendLong(*result);\n  }\n}\n\nusing RangeParams = ZSetFamily::RangeParams;\n\nvoid CmdZRange(CmdArgList args, CommandContext* cmd_cntx) {\n  ZRangeGeneric(args, RangeParams{}, cmd_cntx);\n}\n\nvoid CmdZRank(CmdArgList args, CommandContext* cmd_cntx) {\n  ZRankGeneric(args, false, cmd_cntx);\n}\n\nvoid CmdZRevRange(CmdArgList args, CommandContext* cmd_cntx) {\n  ZRangeGeneric(args, RangeParams{.reverse = true}, cmd_cntx);\n}\n\nvoid CmdZRangeByScore(CmdArgList args, CommandContext* cmd_cntx) {\n  ZRangeGeneric(args, RangeParams{.interval_type = RangeParams::SCORE}, cmd_cntx);\n}\n\nvoid CmdZRangeStore(CmdArgList args, CommandContext* cmd_cntx) {\n  ZRangeGeneric(args.subspan(1), RangeParams{.with_scores = true, .store_key = ArgS(args, 0)},\n                cmd_cntx);\n}\n\nvoid CmdZRevRangeByScore(CmdArgList args, CommandContext* cmd_cntx) {\n  ZRangeGeneric(args, RangeParams{.reverse = true, .interval_type = RangeParams::SCORE}, cmd_cntx);\n}\n\nvoid CmdZRevRank(CmdArgList args, CommandContext* cmd_cntx) {\n  ZRankGeneric(args, true, cmd_cntx);\n}\n\nvoid CmdZRangeByLex(CmdArgList args, CommandContext* cmd_cntx) {\n  ZRangeGeneric(args, RangeParams{.interval_type = RangeParams::LEX}, cmd_cntx);\n}\n\nvoid CmdZRevRangeByLex(CmdArgList args, CommandContext* cmd_cntx) {\n  ZRangeGeneric(args, RangeParams{.reverse = true, .interval_type = RangeParams::LEX}, cmd_cntx);\n}\n\nvoid CmdZRemRangeByRank(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view min_s = ArgS(args, 1);\n  string_view max_s = ArgS(args, 2);\n\n  ZSetFamily::IndexInterval ii;\n  if (!SimpleAtoi(min_s, &ii.first) || !SimpleAtoi(max_s, &ii.second)) {\n    return cmd_cntx->SendError(kInvalidIntErr);\n  }\n\n  ZSetFamily::ZRangeSpec range_spec;\n  range_spec.interval = ii;\n  ZRemRangeGeneric(key, range_spec, cmd_cntx);\n}\n\nvoid CmdZRemRangeByScore(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view min_s = ArgS(args, 1);\n  string_view max_s = ArgS(args, 2);\n\n  ZSetFamily::ScoreInterval si;\n  if (!ParseBound(min_s, &si.first) || !ParseBound(max_s, &si.second)) {\n    return cmd_cntx->SendError(kFloatRangeErr);\n  }\n\n  ZSetFamily::ZRangeSpec range_spec;\n\n  range_spec.interval = si;\n\n  ZRemRangeGeneric(key, range_spec, cmd_cntx);\n}\n\nvoid CmdZRemRangeByLex(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view min_s = ArgS(args, 1);\n  string_view max_s = ArgS(args, 2);\n\n  ZSetFamily::LexInterval li;\n  if (!ParseLexBound(min_s, &li.first) || !ParseLexBound(max_s, &li.second)) {\n    return cmd_cntx->SendError(kLexRangeErr);\n  }\n\n  ZSetFamily::ZRangeSpec range_spec;\n\n  range_spec.interval = li;\n\n  ZRemRangeGeneric(key, range_spec, cmd_cntx);\n}\n\nvoid CmdZRem(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  auto members = args.subspan(1);\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpRem(t->GetOpArgs(shard), key, members);\n  };\n\n  OpResult<unsigned> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result.status() == OpStatus::WRONG_TYPE) {\n    cmd_cntx->SendError(kWrongTypeErr);\n  } else {\n    cmd_cntx->SendLong(*result);\n  }\n}\n\nvoid CmdZRandMember(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (args.size() > 3)\n    return rb->SendError(WrongNumArgsError(\"ZRANDMEMBER\"));\n\n  CmdArgParser parser{args};\n  string_view key = parser.Next();\n\n  bool is_count = parser.HasNext();\n  int count = is_count ? parser.Next<int>() : 1;\n\n  ZSetFamily::RangeParams params;\n  params.with_scores = static_cast<bool>(parser.Check(\"WITHSCORES\"));\n\n  if (parser.HasNext())\n    return rb->SendError(absl::StrCat(\"Unsupported option:\", string_view(parser.Next())));\n\n  RETURN_ON_PARSE_ERROR(parser, cmd_cntx);\n\n  const auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpRandMember(count, params, t->GetOpArgs(shard), key);\n  };\n\n  OpResult<ScoredArray> result = cmd_cntx->tx()->ScheduleSingleHopT(cb);\n  if (result) {\n    rb->SendScoredArray(result.value(), params.with_scores);\n  } else if (result.status() == OpStatus::KEY_NOTFOUND) {\n    if (is_count) {\n      rb->SendScoredArray(ScoredArray(), params.with_scores);\n    } else {\n      rb->SendNull();\n    }\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid CmdZScore(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view member = ArgS(args, 1);\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return ZSetFamily::OpScore(t->GetOpArgs(shard), key, member);\n  };\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n  OpResult<double> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result.status() == OpStatus::WRONG_TYPE) {\n    rb->SendError(kWrongTypeErr);\n  } else if (!result) {\n    rb->SendNull();\n  } else {\n    rb->SendDouble(*result);\n  }\n}\n\nvoid CmdZMScore(CmdArgList args, CommandContext* cmd_cntx) {\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  OpResult<MScoreResponse> result = ZSetFamily::ZGetMembers(args, cmd_cntx->tx(), rb);\n\n  if (result.status() == OpStatus::WRONG_TYPE) {\n    return rb->SendError(kWrongTypeErr);\n  }\n  rb->StartArray(result->size());  // Array return type.\n  const MScoreResponse& array = result.value();\n  for (const auto& p : array) {\n    if (p) {\n      rb->SendDouble(*p);\n    } else {\n      rb->SendNull();\n    }\n  }\n}\n\nvoid CmdZScan(CmdArgList args, CommandContext* cmd_cntx) {\n  string_view key = ArgS(args, 0);\n  string_view token = ArgS(args, 1);\n\n  uint64_t cursor = 0;\n\n  auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx->rb());\n\n  if (!absl::SimpleAtoi(token, &cursor)) {\n    return cmd_cntx->SendError(\"invalid cursor\");\n  }\n\n  OpResult<ScanOpts> ops = ScanOpts::TryFrom(args.subspan(2));\n  if (!ops) {\n    DVLOG(1) << \"Scan invalid args - return \" << ops << \" to the user\";\n    return cmd_cntx->SendError(ops.status());\n  }\n  const ScanOpts& scan_op = ops.value();\n\n  auto cb = [&](Transaction* t, EngineShard* shard) {\n    return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);\n  };\n\n  OpResult<StringVec> result = cmd_cntx->tx()->ScheduleSingleHopT(std::move(cb));\n  if (result.status() != OpStatus::WRONG_TYPE) {\n    rb->StartArray(2);\n    rb->SendBulkString(absl::StrCat(cursor));\n    rb->StartArray(result->size());  // Within scan the returned page is of type array.\n    for (const auto& k : *result) {\n      rb->SendBulkString(k);\n    }\n  } else {\n    cmd_cntx->SendError(result.status());\n  }\n}\n\nvoid CmdZUnion(CmdArgList args, CommandContext* cmd_cntx) {\n  ZBooleanOperation(args, \"zunion\", true, false, cmd_cntx);\n}\n\nvoid CmdZUnionStore(CmdArgList args, CommandContext* cmd_cntx) {\n  ZBooleanOperation(args, \"zunionstore\", true, true, cmd_cntx);\n}\n\n}  // namespace\n\n#define HFUNC(x) SetHandler(&Cmd##x)\n\nLoadBlobResult ZSetFamily::LoadZiplistBlob(std::string_view blob, PrimeValue* pv) {\n  unsigned char* lp = lpNew(blob.size());\n  if (!ZiplistPairsConvertAndValidateIntegrity((const uint8_t*)blob.data(), blob.size(), &lp)) {\n    LOG(ERROR) << \"Zset ziplist integrity check failed.\";\n    zfree(lp);\n    return LoadBlobResult::kCorrupted;\n  }\n\n  if (lpLength(lp) == 0) {\n    lpFree(lp);\n    return LoadBlobResult::kEmpty;\n  }\n\n  unsigned encoding = OBJ_ENCODING_LISTPACK;\n  void* inner;\n  if (lpBytes(lp) >= server.max_listpack_map_bytes) {\n    inner = detail::SortedMap::FromListPack(CompactObj::memory_resource(), lp);\n    lpFree(lp);\n    encoding = OBJ_ENCODING_SKIPLIST;\n  } else {\n    lp = lpShrinkToFit(lp);\n    inner = lp;\n  }\n\n  pv->InitRobj(OBJ_ZSET, encoding, inner);\n  return LoadBlobResult::kSuccess;\n}\n\nLoadBlobResult ZSetFamily::LoadListpackBlob(std::string_view blob, PrimeValue* pv) {\n  if (!lpValidateIntegrity((uint8_t*)blob.data(), blob.size(), 0, nullptr, nullptr)) {\n    LOG(ERROR) << \"Zset listpack integrity check failed.\";\n    return LoadBlobResult::kCorrupted;\n  }\n\n  unsigned char* src_lp = (unsigned char*)blob.data();\n  unsigned long long bytes = lpBytes(src_lp);\n  unsigned char* lp = (uint8_t*)zmalloc(bytes);\n  std::memcpy(lp, src_lp, bytes);\n  pv->InitRobj(OBJ_ZSET, OBJ_ENCODING_LISTPACK, lp);\n  return LoadBlobResult::kSuccess;\n}\n\nvoid ZSetFamily::Register(CommandRegistry* registry) {\n  constexpr uint32_t kStoreMask =\n      CO::JOURNALED | CO::VARIADIC_KEYS | CO::DENYOOM | CO::NO_AUTOJOURNAL;\n  registry->StartFamily(acl::SORTEDSET);\n  // TODO: to add support for SCRIPT for BZPOPMIN, BZPOPMAX similarly to BLPOP.\n  // We break up chain into multiple calls to reduce stack usage in this function.\n  *registry << CI{\"ZADD\", CO::FAST | CO::JOURNALED | CO::DENYOOM, -4, 1, 1}.HFUNC(ZAdd)\n            << CI{\"BZPOPMIN\", CO::JOURNALED | CO::NOSCRIPT | CO::BLOCKING | CO::NO_AUTOJOURNAL, -3,\n                  1, -2}\n                   .HFUNC(BZPopMin)\n            << CI{\"BZPOPMAX\", CO::JOURNALED | CO::NOSCRIPT | CO::BLOCKING | CO::NO_AUTOJOURNAL, -3,\n                  1, -2}\n                   .HFUNC(BZPopMax)\n            << CI{\"ZCARD\", CO::FAST | CO::READONLY, 2, 1, 1}.HFUNC(ZCard)\n            << CI{\"ZCOUNT\", CO::FAST | CO::READONLY, 4, 1, 1}.HFUNC(ZCount)\n            << CI{\"ZDIFF\", CO::READONLY | CO::VARIADIC_KEYS, -3, 2, 2}.HFUNC(ZDiff);\n\n  *registry << CI{\"ZDIFFSTORE\", kStoreMask, -4, 3, 3}.HFUNC(ZDiffStore)\n            << CI{\"ZINCRBY\", CO::FAST | CO::JOURNALED, 4, 1, 1}.HFUNC(ZIncrBy)\n            << CI{\"ZINTERSTORE\", kStoreMask, -4, 3, 3}.HFUNC(ZInterStore)\n            << CI{\"ZINTER\", CO::READONLY | CO::VARIADIC_KEYS, -3, 2, 2}.HFUNC(ZInter)\n            << CI{\"ZINTERCARD\", CO::READONLY | CO::VARIADIC_KEYS, -3, 2, 2}.HFUNC(ZInterCard)\n            << CI{\"ZLEXCOUNT\", CO::READONLY, 4, 1, 1}.HFUNC(ZLexCount)\n            << CI{\"ZMPOP\", CO::JOURNALED | CO::VARIADIC_KEYS | CO::NO_AUTOJOURNAL, -4, 2, 2}.HFUNC(\n                   ZMPop)\n            << CI{\"BZMPOP\", CO::JOURNALED | CO::VARIADIC_KEYS | CO::BLOCKING | CO::NO_AUTOJOURNAL,\n                  -5, 3, 3}\n                   .HFUNC(BZMPop);\n\n  *registry << CI{\"ZPOPMAX\", CO::FAST | CO::JOURNALED, -2, 1, 1}.HFUNC(ZPopMax)\n            << CI{\"ZPOPMIN\", CO::FAST | CO::JOURNALED, -2, 1, 1}.HFUNC(ZPopMin)\n            << CI{\"ZREM\", CO::FAST | CO::JOURNALED, -3, 1, 1}.HFUNC(ZRem)\n            << CI{\"ZRANGE\", CO::READONLY, -4, 1, 1}.HFUNC(ZRange)\n            << CI{\"ZRANDMEMBER\", CO::READONLY, -2, 1, 1}.HFUNC(ZRandMember)\n            << CI{\"ZRANK\", CO::READONLY | CO::FAST, -3, 1, 1}.HFUNC(ZRank)\n            << CI{\"ZRANGEBYLEX\", CO::READONLY, -4, 1, 1}.HFUNC(ZRangeByLex)\n            << CI{\"ZRANGEBYSCORE\", CO::READONLY, -4, 1, 1}.HFUNC(ZRangeByScore)\n            << CI{\"ZRANGESTORE\", CO::JOURNALED | CO::DENYOOM | CO::NO_AUTOJOURNAL, -5, 1, 2}.HFUNC(\n                   ZRangeStore);\n\n  *registry << CI{\"ZSCORE\", CO::READONLY | CO::FAST, 3, 1, 1}.HFUNC(ZScore)\n            << CI{\"ZMSCORE\", CO::READONLY | CO::FAST, -3, 1, 1}.HFUNC(ZMScore)\n            << CI{\"ZREMRANGEBYRANK\", CO::JOURNALED, 4, 1, 1}.HFUNC(ZRemRangeByRank)\n            << CI{\"ZREMRANGEBYSCORE\", CO::JOURNALED, 4, 1, 1}.HFUNC(ZRemRangeByScore)\n            << CI{\"ZREMRANGEBYLEX\", CO::JOURNALED, 4, 1, 1}.HFUNC(ZRemRangeByLex)\n            << CI{\"ZREVRANGE\", CO::READONLY, -4, 1, 1}.HFUNC(ZRevRange)\n            << CI{\"ZREVRANGEBYLEX\", CO::READONLY, -4, 1, 1}.HFUNC(ZRevRangeByLex)\n            << CI{\"ZREVRANGEBYSCORE\", CO::READONLY, -4, 1, 1}.HFUNC(ZRevRangeByScore)\n            << CI{\"ZREVRANK\", CO::READONLY | CO::FAST, -3, 1, 1}.HFUNC(ZRevRank)\n            << CI{\"ZSCAN\", CO::READONLY, -3, 1, 1}.HFUNC(ZScan)\n            << CI{\"ZUNION\", CO::READONLY | CO::VARIADIC_KEYS, -3, 2, 2}.HFUNC(ZUnion)\n            << CI{\"ZUNIONSTORE\", kStoreMask, -4, 3, 3}.HFUNC(ZUnionStore);\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/zset_family.h",
    "content": "// Copyright 2025, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#pragma once\n\n#include <string_view>\n#include <variant>\n\n#include \"facade/op_status.h\"\n#include \"server/common.h\"\n#include \"server/table.h\"\n\nnamespace facade {\nclass SinkReplyBuilder;\n}  // namespace facade\n\nnamespace dfly {\n\nstruct OpArgs;\n\nclass ZSetFamily {\n public:\n  static void Register(CommandRegistry* registry);\n\n  static LoadBlobResult LoadZiplistBlob(std::string_view blob, PrimeValue* pv);\n  static LoadBlobResult LoadListpackBlob(std::string_view blob, PrimeValue* pv);\n\n  using IndexInterval = std::pair<int64_t, int64_t>;\n  using MScoreResponse = std::vector<std::optional<double>>;\n\n  struct Bound {\n    double val;\n    bool is_open = false;\n    Bound() = default;\n    Bound(double v, bool open) : val(v), is_open(open) {\n    }\n  };\n\n  using ScoreInterval = std::pair<Bound, Bound>;\n\n  struct LexBound {\n    std::string_view val;\n    enum Type : uint8_t { PLUS_INF, MINUS_INF, OPEN, CLOSED } type = CLOSED;\n    LexBound() = default;\n    LexBound(std::string_view v, Type t) : val(v), type(t) {\n    }\n  };\n\n  using LexInterval = std::pair<LexBound, LexBound>;\n\n  using TopNScored = uint32_t;\n\n  struct RangeParams {\n    uint32_t offset = 0;\n    uint32_t limit = UINT32_MAX;\n    bool with_scores = false;\n    bool reverse = false;\n    enum IntervalType : uint8_t { LEX, RANK, SCORE } interval_type = RANK;\n    std::optional<std::string_view> store_key = std::nullopt;\n  };\n\n  struct ZRangeSpec {\n    std::variant<IndexInterval, ScoreInterval, LexInterval, TopNScored> interval;\n    RangeParams params;\n    ZRangeSpec() = default;\n    ZRangeSpec(const ScoreInterval& si, const RangeParams& rp) : interval(si), params(rp){};\n  };\n\n  struct ZParams {\n    unsigned flags = 0;  // mask of ZADD_IN_ macros.\n    bool ch = false;     // Corresponds to CH option.\n    bool override = false;\n    bool journal_update = false;\n  };\n\n  using ScoredMember = std::pair<std::string, double>;\n  using ScoredArray = std::vector<ScoredMember>;\n  using ScoredMemberView = std::pair<double, std::string_view>;\n  using ScoredMemberSpan = absl::Span<const ScoredMemberView>;\n\n  using SinkReplyBuilder = facade::SinkReplyBuilder;\n  template <typename T> using OpResult = facade::OpResult<T>;\n\n  // Used by GeoFamily also\n  static void ZAddGeneric(std::string_view key, const ZParams& zparams, ScoredMemberSpan memb_sp,\n                          CommandContext* cmd_cntx);\n\n  static OpResult<MScoreResponse> ZGetMembers(CmdArgList args, Transaction* tx,\n                                              SinkReplyBuilder* builder);\n\n  static OpResult<std::vector<ScoredArray>> OpRanges(const std::vector<ZRangeSpec>& range_specs,\n                                                     const OpArgs& op_args, std::string_view key);\n\n  struct AddResult {\n    double new_score = 0;\n    unsigned num_updated = 0;\n\n    bool is_nan = false;\n  };\n\n  static OpResult<AddResult> OpAdd(const OpArgs& op_args, const ZParams& zparams,\n                                   std::string_view key, ScoredMemberSpan members);\n\n  static OpResult<void> OpKeyExisted(const OpArgs& op_args, std::string_view key);\n\n  static OpResult<double> OpScore(const OpArgs& op_args, std::string_view key,\n                                  std::string_view member);\n};\n\n}  // namespace dfly\n"
  },
  {
    "path": "src/server/zset_family_test.cc",
    "content": "// Copyright 2022, DragonflyDB authors.  All rights reserved.\n// See LICENSE for licensing terms.\n//\n\n#include \"server/zset_family.h\"\n\n#include <vector>\n\n#include \"base/gtest.h\"\n#include \"base/logging.h\"\n#include \"facade/facade_test.h\"\n#include \"server/test_utils.h\"\n\nusing namespace testing;\nusing namespace std;\nusing namespace util;\n\nnamespace dfly {\n\nclass ZSetFamilyTest : public BaseFamilyTest {\n protected:\n};\n\nusing ScoredElement = std::pair<std::string, std::string>;\n\ntemplate <typename Array> auto ParseToScoredArray(Array arr) {\n  std::vector<ScoredElement> scored_elements;\n  for (std::size_t i = 1; i < arr.size(); i += 2) {\n    scored_elements.emplace_back(arr[i - 1].GetString(), arr[i].GetString());\n  }\n  return scored_elements;\n}\n\nMATCHER_P(ConsistsOfMatcher, elements, \"\") {\n  auto vec = arg.GetVec();\n  for (const auto& x : vec) {\n    if (elements.find(x.GetString()) == elements.end()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nMATCHER_P(ConsistsOfScoredElementsMatcher, elements, \"\") {\n  auto vec = arg.GetVec();\n  if (vec.size() % 2) {\n    return false;\n  }\n\n  auto scored_vec = ParseToScoredArray(vec);\n  for (const auto& scored_element : scored_vec) {\n    if (elements.find(scored_element) == elements.end()) {\n      return false;\n    }\n  }\n  return true;\n}\n\nMATCHER_P(IsScoredSubsetOfMatcher, elements_list, \"\") {\n  auto vec = arg.GetVec();\n  if (vec.size() % 2) {\n    return false;\n  }\n\n  auto scored_vec = ParseToScoredArray(vec);\n  std::vector<ScoredElement> elements{elements_list};\n\n  std::sort(scored_vec.begin(), scored_vec.end());\n  std::sort(elements.begin(), elements.end());\n\n  return std::includes(elements.begin(), elements.end(), scored_vec.begin(), scored_vec.end());\n}\n\nMATCHER_P(UnorderedScoredElementsAreMatcher, elements_list, \"\") {\n  auto vec = arg.GetVec();\n  if (vec.size() % 2) {\n    return false;\n  }\n\n  auto scored_vec = ParseToScoredArray(vec);\n  return std::is_permutation(scored_vec.begin(), scored_vec.end(), elements_list.begin(),\n                             elements_list.end());\n}\n\nMATCHER_P2(ContainsLabeledScoredArrayMatcher, label, elements, \"\") {\n  auto label_vec = arg.GetVec();\n  if (label_vec.size() != 2) {\n    *result_listener << \"Labeled Scored Array does no contain two elements.\";\n    return false;\n  }\n\n  if (!ExplainMatchResult(Eq(label), label_vec[0].GetString(), result_listener)) {\n    return false;\n  }\n\n  auto value_pairs_vec = label_vec[1].GetVec();\n  std::set<std::pair<std::string, std::string>> actual_elements;\n  for (const auto& scored_element : value_pairs_vec) {\n    actual_elements.insert(std::make_pair(scored_element.GetVec()[0].GetString(),\n                                          scored_element.GetVec()[1].GetString()));\n  }\n  if (actual_elements != elements) {\n    *result_listener << \"Scored elements do not match: \";\n    ExplainMatchResult(ElementsAreArray(elements), actual_elements, result_listener);\n    return false;\n  }\n\n  return true;\n}\n\nauto ConsistsOf(std::initializer_list<std::string> elements) {\n  return ConsistsOfMatcher(std::unordered_set<std::string>{elements});\n}\n\nauto ConsistsOfScoredElements(std::initializer_list<std::pair<std::string, std::string>> elements) {\n  return ConsistsOfScoredElementsMatcher(std::set<std::pair<std::string, std::string>>{elements});\n}\n\nauto IsScoredSubsetOf(std::initializer_list<std::pair<std::string, std::string>> elements) {\n  return IsScoredSubsetOfMatcher(elements);\n}\n\nauto UnorderedScoredElementsAre(\n    std::initializer_list<std::pair<std::string, std::string>> elements) {\n  return UnorderedScoredElementsAreMatcher(elements);\n}\n\nauto ContainsLabeledScoredArray(\n    std::string_view label, std::initializer_list<std::pair<std::string, std::string>> elements) {\n  return ContainsLabeledScoredArrayMatcher(label,\n                                           std::set<std::pair<std::string, std::string>>{elements});\n}\n\nTEST_F(ZSetFamilyTest, Add) {\n  auto resp = Run({\"zadd\", \"x\", \"1.1\", \"a\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"zscore\", \"x\", \"a\"});\n  EXPECT_THAT(resp, \"1.1\");\n\n  resp = Run({\"zadd\", \"x\", \"2\", \"a\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"zscore\", \"x\", \"a\"});\n  EXPECT_THAT(resp, \"2\");\n\n  resp = Run({\"zadd\", \"x\", \"ch\", \"3\", \"a\"});\n  EXPECT_THAT(resp, IntArg(1));\n  resp = Run({\"zscore\", \"x\", \"a\"});\n  EXPECT_EQ(resp, \"3\");\n\n  resp = Run({\"zcard\", \"x\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  EXPECT_THAT(Run({\"zadd\", \"x\", \"\", \"a\"}), ErrArg(\"not a valid float\"));\n\n  EXPECT_THAT(Run({\"zadd\", \"ztmp\", \"xx\", \"10\", \"member\"}), IntArg(0));\n\n  const char kHighPrecision[] = \"0.79028573343077946\";\n\n  Run({\"zadd\", \"zs\", kHighPrecision, \"a\"});\n  EXPECT_EQ(Run({\"zscore\", \"zs\", \"a\"}), \"0.7902857334307795\");\n  EXPECT_EQ(0.79028573343077946, 0.7902857334307795);\n\n  resp = Run({\"zadd\", \"x\", \"1.1\", \"\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"zscore\", \"x\", \"\"});\n  EXPECT_EQ(resp, \"1.1\");\n}\n\nTEST_F(ZSetFamilyTest, AddNonUniqeMembers) {\n  auto resp = Run({\"zadd\", \"x\", \"2\", \"a\", \"1\", \"a\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"zscore\", \"x\", \"a\"});\n  EXPECT_EQ(resp, \"1\");\n\n  resp = Run({\"zadd\", \"y\", \"3\", \"a\", \"1\", \"a\", \"2\", \"b\"});\n  EXPECT_THAT(resp, IntArg(2));\n  EXPECT_EQ(\"1\", Run({\"zscore\", \"y\", \"a\"}));\n}\n\nTEST_F(ZSetFamilyTest, ZRem) {\n  auto resp = Run({\"zadd\", \"x\", \"1.1\", \"b\", \"2.1\", \"a\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"zrem\", \"x\", \"b\", \"c\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"zcard\", \"x\"});\n  EXPECT_THAT(resp, IntArg(1));\n  EXPECT_THAT(Run({\"zrange\", \"x\", \"0\", \"3\", \"byscore\"}), \"a\");\n  EXPECT_THAT(Run({\"zrange\", \"x\", \"(-inf\", \"(+inf\", \"byscore\"}), \"a\");\n}\n\nTEST_F(ZSetFamilyTest, ZRandMember) {\n  auto resp = Run({\"ZAdd\", \"x\", \"1\", \"a\", \"2\", \"b\", \"3\", \"c\"});\n  EXPECT_THAT(resp, IntArg(3));\n\n  // Test if count > 0\n  resp = Run({\"ZRandMember\", \"x\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n  EXPECT_THAT(resp, AnyOf(\"a\", \"b\", \"c\"));\n\n  resp = Run({\"ZRandMember\", \"x\", \"1\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n  EXPECT_THAT(resp, AnyOf(\"a\", \"b\", \"c\"));\n\n  resp = Run({\"ZRandMember\", \"x\", \"2\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), IsSubsetOf({\"a\", \"b\", \"c\"}));\n\n  resp = Run({\"ZRandMember\", \"x\", \"3\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"a\", \"b\", \"c\"));\n\n  // Test if count < 0\n  resp = Run({\"ZRandMember\", \"x\", \"-1\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::STRING));\n  EXPECT_THAT(resp, AnyOf(\"a\", \"b\", \"c\"));\n\n  resp = Run({\"ZRandMember\", \"x\", \"-2\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp, ConsistsOf({\"a\", \"b\", \"c\"}));\n\n  resp = Run({\"ZRandMember\", \"x\", \"-3\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp, ConsistsOf({\"a\", \"b\", \"c\"}));\n\n  // Test if count < 0, but the absolute value is larger than the size of the sorted set\n  resp = Run({\"ZRandMember\", \"x\", \"-15\"});\n  ASSERT_THAT(resp, ArrLen(15));\n  EXPECT_THAT(resp, ConsistsOf({\"a\", \"b\", \"c\"}));\n\n  // Test if count is 0\n  ASSERT_THAT(Run({\"ZRandMember\", \"x\", \"0\"}), ArrLen(0));\n\n  // Test if count is larger than the size of the sorted set\n  resp = Run({\"ZRandMember\", \"x\", \"15\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp.GetVec(), UnorderedElementsAre(\"a\", \"b\", \"c\"));\n\n  // Test if sorted set is empty\n  EXPECT_THAT(Run({\"ZAdd\", \"empty::zset\", \"1\", \"one\"}), IntArg(1));\n  EXPECT_THAT(Run({\"ZRem\", \"empty::zset\", \"one\"}), IntArg(1));\n  ASSERT_THAT(Run({\"ZRandMember\", \"empty::zset\", \"0\"}), ArrLen(0));\n  ASSERT_THAT(Run({\"ZRandMember\", \"empty::zset\", \"3\"}), ArrLen(0));\n  ASSERT_THAT(Run({\"ZRandMember\", \"empty::zset\", \"-4\"}), ArrLen(0));\n\n  // Test if key does not exist\n  ASSERT_THAT(Run({\"ZRandMember\", \"y\"}), ArgType(RespExpr::NIL));\n  ASSERT_THAT(Run({\"ZRandMember\", \"y\", \"0\"}), ArrLen(0));\n\n  // Test WITHSCORES\n  resp = Run({\"ZRandMember\", \"x\", \"1\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp, IsScoredSubsetOf({{\"a\", \"1\"}, {\"b\", \"2\"}, {\"c\", \"3\"}}));\n\n  resp = Run({\"ZRandMember\", \"x\", \"2\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  EXPECT_THAT(resp, IsScoredSubsetOf({{\"a\", \"1\"}, {\"b\", \"2\"}, {\"c\", \"3\"}}));\n\n  resp = Run({\"ZRandMember\", \"x\", \"3\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(6));\n  EXPECT_THAT(resp, UnorderedScoredElementsAre({{\"a\", \"1\"}, {\"b\", \"2\"}, {\"c\", \"3\"}}));\n\n  resp = Run({\"ZRandMember\", \"x\", \"15\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(6));\n  EXPECT_THAT(resp, UnorderedScoredElementsAre({{\"a\", \"1\"}, {\"b\", \"2\"}, {\"c\", \"3\"}}));\n\n  resp = Run({\"ZRandMember\", \"x\", \"-1\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp, ConsistsOfScoredElements({{\"a\", \"1\"}, {\"b\", \"2\"}, {\"c\", \"3\"}}));\n\n  resp = Run({\"ZRandMember\", \"x\", \"-2\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  EXPECT_THAT(resp, ConsistsOfScoredElements({{\"a\", \"1\"}, {\"b\", \"2\"}, {\"c\", \"3\"}}));\n\n  resp = Run({\"ZRandMember\", \"x\", \"-3\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(6));\n  EXPECT_THAT(resp, ConsistsOfScoredElements({{\"a\", \"1\"}, {\"b\", \"2\"}, {\"c\", \"3\"}}));\n\n  resp = Run({\"ZRandMember\", \"x\", \"-15\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(30));\n  EXPECT_THAT(resp, ConsistsOfScoredElements({{\"a\", \"1\"}, {\"b\", \"2\"}, {\"c\", \"3\"}}));\n}\n\nTEST_F(ZSetFamilyTest, ZMScore) {\n  Run({\"zadd\", \"zms\", \"3.14\", \"a\"});\n  Run({\"zadd\", \"zms\", \"42\", \"another\"});\n\n  auto resp = Run({\"zmscore\", \"zms\", \"another\", \"a\", \"nofield\"});\n  ASSERT_EQ(RespExpr::ARRAY, resp.type);\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"42\", \"3.14\", ArgType(RespExpr::NIL)));\n}\n\n// Test for ZMSCORE with member on a non-existent keys\nTEST_F(ZSetFamilyTest, ZMScoreNonExistentKeys) {\n  // Case 1: Single member with non-existent key (ZMSCORE abc x)\n  auto resp = Run({\"zmscore\", \"abc\", \"x\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  // Case 2: Multiple members with non-existent key (ZMSCORE abc x y z)\n  resp = Run({\"zmscore\", \"abc\", \"x\", \"y\", \"z\"});\n  EXPECT_THAT(resp.GetVec(),\n              ElementsAre(ArgType(RespExpr::NIL), ArgType(RespExpr::NIL), ArgType(RespExpr::NIL)));\n}\n\nTEST_F(ZSetFamilyTest, ByScore) {\n  Run({\"zadd\", \"x\", \"1.1\", \"a\", \"2.1\", \"b\"});\n  EXPECT_THAT(Run({\"zrangebyscore\", \"x\", \"0\", \"(1.1\"}), ArrLen(0));\n  EXPECT_THAT(Run({\"zrangebyscore\", \"x\", \"-inf\", \"1.1\", \"limit\", \"0\", \"10\"}), \"a\");\n\n  auto resp = Run({\"zrangebyscore\", \"x\", \"-inf\", \"1.1\", \"limit\", \"0\", \"10\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1.1\"));\n\n  resp = Run({\"zrangebyscore\", \"x\", \"-inf\", \"1.1\", \"WITHSCORES\", \"limit\", \"0\", \"10\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1.1\"));\n\n  resp = Run({\"zrangebyscore\", \"x\", \"-inf\", \"+inf\", \"LIMIT\", \"0\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"a\", \"b\"));\n\n  resp = Run({\"zrevrangebyscore\", \"x\", \"+inf\", \"-inf\", \"limit\", \"0\", \"5\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"b\", \"a\"));\n\n  EXPECT_EQ(2, CheckedInt({\"zcount\", \"x\", \"1.1\", \"2.1\"}));\n  EXPECT_EQ(1, CheckedInt({\"zcount\", \"x\", \"(1.1\", \"2.1\"}));\n  EXPECT_EQ(0, CheckedInt({\"zcount\", \"y\", \"(1.1\", \"2.1\"}));\n}\n\nTEST_F(ZSetFamilyTest, ZRank) {\n  Run({\"zadd\", \"x\", \"1.1\", \"a\", \"2.1\", \"b\"});\n  EXPECT_EQ(0, CheckedInt({\"zrank\", \"x\", \"a\"}));\n  EXPECT_EQ(1, CheckedInt({\"zrank\", \"x\", \"b\"}));\n  EXPECT_EQ(1, CheckedInt({\"zrevrank\", \"x\", \"a\"}));\n  EXPECT_EQ(0, CheckedInt({\"zrevrank\", \"x\", \"b\"}));\n  EXPECT_THAT(Run({\"zrevrank\", \"x\", \"c\"}), ArgType(RespExpr::NIL));\n  EXPECT_THAT(Run({\"zrank\", \"y\", \"c\"}), ArgType(RespExpr::NIL));\n  EXPECT_THAT(Run({\"zrevrank\", \"x\", \"c\", \"WITHSCORE\"}), ArgType(RespExpr::NIL));\n  EXPECT_THAT(Run({\"zrank\", \"y\", \"c\", \"WITHSCORE\"}), ArgType(RespExpr::NIL));\n\n  auto resp = Run({\"zrank\", \"x\", \"a\", \"WITHSCORE\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(IntArg(0), \"1.1\"));\n\n  resp = Run({\"zrank\", \"x\", \"b\", \"WITHSCORE\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(IntArg(1), \"2.1\"));\n\n  resp = Run({\"zrevrank\", \"x\", \"a\", \"WITHSCORE\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(IntArg(1), \"1.1\"));\n\n  resp = Run({\"zrevrank\", \"x\", \"b\", \"WITHSCORE\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(IntArg(0), \"2.1\"));\n\n  resp = Run({\"zrank\", \"x\", \"a\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"zrank\", \"x\", \"a\", \"WITHSCORES\", \"42\"});\n  ASSERT_THAT(resp, ErrArg(\"wrong number of arguments for 'zrank' command\"));\n\n  resp = Run({\"zrevrank\", \"x\", \"a\", \"WITHSCORES\", \"42\"});\n  ASSERT_THAT(resp, ErrArg(\"wrong number of arguments for 'zrevrank' command\"));\n}\n\nTEST_F(ZSetFamilyTest, LargeSet) {\n  for (int i = 0; i < 129; ++i) {\n    auto resp = Run({\"zadd\", \"key\", absl::StrCat(i), absl::StrCat(\"element:\", i)});\n    EXPECT_THAT(resp, IntArg(1)) << i;\n  }\n  Run({\"zadd\", \"key\", \"129\", \"\"});\n\n  EXPECT_THAT(Run({\"zrangebyscore\", \"key\", \"(-inf\", \"(0.0\"}), ArrLen(0));\n  EXPECT_THAT(Run({\"zrangebyscore\", \"key\", \"(5\", \"0.0\"}), ArrLen(0));\n  EXPECT_THAT(Run({\"zrangebylex\", \"key\", \"-\", \"(element:0\"}), ArrLen(0));\n  EXPECT_EQ(2, CheckedInt({\"zremrangebyscore\", \"key\", \"127\", \"(129\"}));\n}\n\nTEST_F(ZSetFamilyTest, ZRemRangeRank) {\n  Run({\"zadd\", \"x\", \"1.1\", \"a\", \"2.1\", \"b\"});\n  EXPECT_THAT(Run({\"ZREMRANGEBYRANK\", \"y\", \"0\", \"1\"}), IntArg(0));\n  EXPECT_THAT(Run({\"ZREMRANGEBYRANK\", \"x\", \"0\", \"0\"}), IntArg(1));\n  EXPECT_EQ(Run({\"zrange\", \"x\", \"0\", \"5\"}), \"b\");\n  EXPECT_THAT(Run({\"ZREMRANGEBYRANK\", \"x\", \"0\", \"1\"}), IntArg(1));\n  EXPECT_EQ(Run({\"type\", \"x\"}), \"none\");\n}\n\nTEST_F(ZSetFamilyTest, ZRemRangeScore) {\n  Run({\"zadd\", \"x\", \"1.1\", \"a\", \"2.1\", \"b\"});\n  EXPECT_THAT(Run({\"ZREMRANGEBYSCORE\", \"y\", \"0\", \"1\"}), IntArg(0));\n  EXPECT_THAT(Run({\"ZREMRANGEBYSCORE\", \"x\", \"-inf\", \"1.1\"}), IntArg(1));\n  EXPECT_EQ(Run({\"zrange\", \"x\", \"0\", \"5\"}), \"b\");\n  EXPECT_THAT(Run({\"ZREMRANGEBYSCORE\", \"x\", \"(2.0\", \"+inf\"}), IntArg(1));\n  EXPECT_EQ(Run({\"type\", \"x\"}), \"none\");\n  EXPECT_THAT(Run({\"zremrangebyscore\", \"x\", \"1\", \"NaN\"}), ErrArg(\"min or max is not a float\"));\n}\n\nTEST_F(ZSetFamilyTest, IncrBy) {\n  auto resp = Run({\"zadd\", \"key\", \"xx\", \"incr\", \"2.1\", \"member\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  resp = Run({\"zadd\", \"key\", \"nx\", \"incr\", \"2.1\", \"member\"});\n  EXPECT_THAT(resp, \"2.1\");\n\n  resp = Run({\"zadd\", \"key\", \"nx\", \"incr\", \"4.9\", \"member\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n}\n\nTEST_F(ZSetFamilyTest, ByLex) {\n  Run({\n      \"zadd\", \"key\",      \"0\", \"alpha\", \"0\", \"bar\",   \"0\", \"cool\", \"0\", \"down\",\n      \"0\",    \"elephant\", \"0\", \"foo\",   \"0\", \"great\", \"0\", \"hill\", \"0\", \"omega\",\n  });\n\n  auto resp = Run({\"zrangebylex\", \"key\", \"-\", \"[cool\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"alpha\", \"bar\", \"cool\"));\n\n  EXPECT_EQ(3, CheckedInt({\"ZLEXCOUNT\", \"key\", \"(foo\", \"+\"}));\n  EXPECT_EQ(0, CheckedInt({\"ZLEXCOUNT\", \"key\", \"(foo\", \"[fop\"}));\n  EXPECT_EQ(3, CheckedInt({\"ZREMRANGEBYLEX\", \"key\", \"(foo\", \"+\"}));\n\n  resp = Run({\"zrangebylex\", \"key\", \"[a\", \"+\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"alpha\", \"bar\", \"cool\", \"down\", \"elephant\", \"foo\"));\n\n  resp = Run({\"zrangebylex\", \"key\", \"-\", \"+\", \"LIMIT\", \"2\", \"3\"});\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"cool\", \"down\", \"elephant\"));\n\n  resp = Run({\"zrangebylex\", \"key\", \"-\", \"+\", \"LIMIT\", \"5\", \"1\"});\n  ASSERT_THAT(resp, \"foo\");\n}\n\nTEST_F(ZSetFamilyTest, ZRevRangeByLex) {\n  Run({\n      \"zadd\", \"key\",      \"0\", \"alpha\", \"0\", \"bar\",   \"0\", \"cool\", \"0\", \"down\",\n      \"0\",    \"elephant\", \"0\", \"foo\",   \"0\", \"great\", \"0\", \"hill\", \"0\", \"omega\",\n  });\n\n  auto resp = Run({\"zrevrangebylex\", \"key\", \"[cool\", \"-\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"cool\", \"bar\", \"alpha\"));\n\n  EXPECT_EQ(3, CheckedInt({\"ZLEXCOUNT\", \"key\", \"(foo\", \"+\"}));\n  EXPECT_EQ(3, CheckedInt({\"ZREMRANGEBYLEX\", \"key\", \"(foo\", \"+\"}));\n\n  resp = Run({\"zrevrangebylex\", \"key\", \"+\", \"[a\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"foo\", \"elephant\", \"down\", \"cool\", \"bar\", \"alpha\"));\n\n  Run({\"zadd\", \"myzset\", \"0\", \"a\", \"0\", \"b\", \"0\", \"c\", \"0\", \"d\", \"0\", \"e\", \"0\", \"f\", \"0\", \"g\"});\n  resp = Run({\"zrevrangebylex\", \"myzset\", \"(c\", \"-\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"b\", \"a\"));\n}\n\nTEST_F(ZSetFamilyTest, ZRange) {\n  Run({\"zadd\", \"key\", \"0\", \"a\", \"1\", \"d\", \"1\", \"b\", \"2\", \"c\", \"4\", \"e\"});\n\n  auto resp = Run({\"zrange\", \"key\", \"0\", \"2\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"a\", \"b\", \"d\"));\n\n  resp = Run({\"zrange\", \"key\", \"1\", \"3\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(6));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"b\", \"1\", \"d\", \"1\", \"c\", \"2\"));\n\n  resp = Run({\"zrange\", \"key\", \"1\", \"3\", \"WITHSCORES\", \"REV\"});\n  ASSERT_THAT(resp, ArrLen(6));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"c\", \"2\", \"d\", \"1\", \"b\", \"1\"));\n\n  resp = Run({\"zrange\", \"key\", \"(1\", \"4\", \"BYSCORE\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"c\", \"2\", \"e\", \"4\"));\n\n  resp = Run({\"zrange\", \"key\", \"-\", \"d\", \"BYLEX\", \"BYSCORE\"});\n  EXPECT_THAT(resp, ErrArg(\"BYSCORE and BYLEX options are not compatible\"));\n\n  resp = Run({\"zrange\", \"key\", \"0\", \"-1\", \"LIMIT\", \"3\", \"-1\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(\"c\", \"e\"));\n\n  Run({\"zremrangebyscore\", \"key\", \"0\", \"4\"});\n\n  Run({\n      \"zadd\", \"key\",      \"0\", \"alpha\", \"0\", \"bar\",   \"0\", \"cool\", \"0\", \"down\",\n      \"0\",    \"elephant\", \"0\", \"foo\",   \"0\", \"great\", \"0\", \"hill\", \"0\", \"omega\",\n  });\n  resp = Run({\"zrange\", \"key\", \"-\", \"[cool\", \"BYLEX\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"alpha\", \"bar\", \"cool\"));\n\n  resp = Run({\"zrange\", \"key\", \"[cool\", \"-\", \"REV\", \"BYLEX\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"cool\", \"bar\", \"alpha\"));\n\n  resp = Run({\"zrange\", \"key\", \"+\", \"[cool\", \"REV\", \"BYLEX\", \"LIMIT\", \"2\", \"2\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"great\", \"foo\"));\n\n  resp = Run({\"zrange\", \"key\", \"+\", \"[cool\", \"BYLEX\", \"LIMIT\", \"2\", \"2\", \"REV\"});\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"great\", \"foo\"));\n\n  resp = Run({\"zrange\", \"key\", \"5\", \"2147483648\"});\n  ASSERT_THAT(resp, RespElementsAre(\"foo\", \"great\", \"hill\", \"omega\"));\n}\n\nTEST_F(ZSetFamilyTest, ZRevRange) {\n  Run({\"zadd\", \"key\", \"-inf\", \"a\", \"1\", \"b\", \"2\", \"c\"});\n  auto resp = Run({\"zrevrangebyscore\", \"key\", \"2\", \"-inf\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"c\", \"b\", \"a\"));\n\n  resp = Run({\"zrevrangebyscore\", \"key\", \"2\", \"-inf\", \"withscores\"});\n  ASSERT_THAT(resp, ArrLen(6));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"c\", \"2\", \"b\", \"1\", \"a\", \"-inf\"));\n\n  resp = Run({\"zrevrange\", \"key\", \"0\", \"2\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"c\", \"b\", \"a\"));\n\n  resp = Run({\"zrevrange\", \"key\", \"1\", \"2\", \"withscores\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"b\", \"1\", \"a\", \"-inf\"));\n\n  // Make sure that when using with upper case it works as well (see\n  // https://github.com/dragonflydb/dragonfly/issues/326)\n  resp = Run({\"zrevrangebyscore\", \"key\", \"2\", \"-INF\"});\n  ASSERT_THAT(resp, ArrLen(3));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"c\", \"b\", \"a\"));\n\n  resp = Run({\"zrevrangebyscore\", \"key\", \"2\", \"-INF\", \"withscores\"});\n  ASSERT_THAT(resp, ArrLen(6));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"c\", \"2\", \"b\", \"1\", \"a\", \"-inf\"));\n}\n\nTEST_F(ZSetFamilyTest, ZScan) {\n  auto resp = Run(\"zscan non-existing-key 100 count 5\");\n  ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n  ASSERT_THAT(resp.GetVec(), ElementsAre(ArgType(RespExpr::STRING), ArgType(RespExpr::ARRAY)));\n  EXPECT_EQ(ToSV(resp.GetVec()[0].GetBuf()), \"0\");\n  EXPECT_EQ(StrArray(resp.GetVec()[1]).size(), 0);\n\n  string prefix(128, 'a');\n  for (unsigned i = 0; i < 100; ++i) {\n    Run({\"zadd\", \"key\", \"1\", absl::StrCat(prefix, i)});\n  }\n\n  EXPECT_EQ(100, CheckedInt({\"zcard\", \"key\"}));\n  int64_t cursor = 0;\n  size_t scan_len = 0;\n  do {\n    auto resp = Run({\"zscan\", \"key\", absl::StrCat(cursor)});\n    ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n    ASSERT_THAT(resp.GetVec(), ElementsAre(ArgType(RespExpr::STRING), ArgType(RespExpr::ARRAY)));\n    string_view token = ToSV(resp.GetVec()[0].GetBuf());\n    ASSERT_TRUE(absl::SimpleAtoi(token, &cursor));\n    auto sub_arr = resp.GetVec()[1].GetVec();\n    scan_len += sub_arr.size();\n  } while (cursor != 0);\n\n  EXPECT_EQ(100 * 2, scan_len);\n\n  // Check scan with count and match params\n  scan_len = 0;\n  do {\n    auto resp = Run({\"zscan\", \"key\", absl::StrCat(cursor), \"count\", \"5\", \"match\", \"*0\"});\n    ASSERT_THAT(resp, ArgType(RespExpr::ARRAY));\n    ASSERT_THAT(resp.GetVec(), ElementsAre(ArgType(RespExpr::STRING), ArgType(RespExpr::ARRAY)));\n    string_view token = ToSV(resp.GetVec()[0].GetBuf());\n    ASSERT_TRUE(absl::SimpleAtoi(token, &cursor));\n    auto sub_arr = resp.GetVec()[1].GetVec();\n    scan_len += sub_arr.size();\n  } while (cursor != 0);\n  EXPECT_EQ(10 * 2, scan_len);  // expected members a0,a10,a20..,a90\n}\n\nTEST_F(ZSetFamilyTest, ZUnionError) {\n  RespExpr resp;\n\n  resp = Run({\"zunion\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"zunion\", \"0\", \"myset\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input key is needed\"));\n\n  resp = Run({\"zunion\", \"3\", \"z1\", \"z2\", \"z3\", \"weights\", \"1\", \"1\", \"k\"});\n  EXPECT_THAT(resp, ErrArg(\"weight value is not a float\"));\n\n  resp = Run({\"zunion\", \"3\", \"z1\", \"z2\", \"z3\", \"weights\", \"1\", \"1\", \"2\", \"aggregate\", \"something\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"zunion\", \"3\", \"z1\", \"z2\", \"z3\", \"weights\", \"1\", \"2\", \"aggregate\", \"something\"});\n  EXPECT_THAT(resp, ErrArg(\"weight value is not a float\"));\n\n  resp = Run({\"zunion\", \"3\", \"z1\", \"z2\", \"z3\", \"aggregate\", \"sum\", \"somescore\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"zunion\", \"3\", \"z1\", \"z2\", \"z3\", \"withscores\", \"someargs\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"zunion\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"zunion\", \"2\", \"z1\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"zunion\", \"2\", \"z1\", \"z2\", \"z3\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"zunion\", \"2\", \"z1\", \"z2\", \"weights\", \"1\", \"2\", \"3\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n}\n\nTEST_F(ZSetFamilyTest, ZUnion) {\n  RespExpr resp;\n\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z1\", \"1\", \"a\", \"3\", \"b\"}));\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z2\", \"3\", \"c\", \"2\", \"b\"}));\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z3\", \"1\", \"c\", \"1\", \"d\"}));\n\n  resp = Run({\"zunion\", \"3\", \"z1\", \"z2\", \"z3\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"d\", \"c\", \"b\"));\n\n  resp = Run({\"zunion\", \"3\", \"z1\", \"z2\", \"z3\", \"weights\", \"1\", \"1\", \"2\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"d\", \"b\", \"c\"));\n\n  // Cover union of sets and zsets\n  EXPECT_EQ(2, CheckedInt({\"sadd\", \"s2\", \"b\", \"c\"}));\n  resp = Run({\"zunion\", \"2\", \"z1\", \"s2\", \"weights\", \"1\", \"2\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1\", \"c\", \"2\", \"b\", \"5\"));\n\n  resp = Run({\"zunion\", \"3\", \"z1\", \"z2\", \"z3\", \"weights\", \"1\", \"1\", \"2\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1\", \"d\", \"2\", \"b\", \"5\", \"c\", \"5\"));\n\n  resp = Run({\"zunion\", \"3\", \"z1\", \"z2\", \"z3\", \"weights\", \"1\", \"1\", \"2\", \"aggregate\", \"min\",\n              \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1\", \"b\", \"2\", \"c\", \"2\", \"d\", \"2\"));\n\n  resp = Run({\"zunion\", \"3\", \"z1\", \"z2\", \"z3\", \"withscores\", \"weights\", \"1\", \"1\", \"2\", \"aggregate\",\n              \"min\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1\", \"b\", \"2\", \"c\", \"2\", \"d\", \"2\"));\n\n  resp = Run({\"zunion\", \"3\", \"none1\", \"none2\", \"z3\", \"withscores\", \"weights\", \"1\", \"1\", \"2\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"c\", \"2\", \"d\", \"2\"));\n\n  resp = Run({\"zunion\", \"3\", \"z1\", \"z2\", \"z3\", \"weights\", \"1\", \"1\", \"2\", \"aggregate\", \"max\",\n              \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1\", \"d\", \"2\", \"b\", \"3\", \"c\", \"3\"));\n\n  resp = Run({\"zunion\", \"1\", \"z1\", \"weights\", \"2\", \"aggregate\", \"max\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"2\", \"b\", \"6\"));\n\n  for (unsigned i = 0; i < 256; ++i) {\n    Run({\"zadd\", \"large1\", \"1000\", absl::StrCat(\"aaaaaaaaaa\", i)});\n    Run({\"zadd\", \"large2\", \"1000\", absl::StrCat(\"bbbbbbbbbb\", i)});\n    Run({\"zadd\", \"large2\", \"1000\", absl::StrCat(\"aaaaaaaaaa\", i)});\n  }\n  resp = Run({\"zunion\", \"2\", \"large2\", \"large1\"});\n  EXPECT_THAT(resp, ArrLen(512));\n}\n\nTEST_F(ZSetFamilyTest, ZUnionStore) {\n  RespExpr resp;\n\n  resp = Run({\"zunionstore\", \"key\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"zunionstore\", \"key\", \"0\", \"aggregate\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input key is needed\"));\n\n  resp = Run({\"zunionstore\", \"key\", \"0\", \"aggregate\", \"sum\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input key is needed\"));\n  resp = Run({\"zunionstore\", \"key\", \"-1\", \"aggregate\", \"sum\"});\n  EXPECT_THAT(resp, ErrArg(\"out of range\"));\n  resp = Run({\"zunionstore\", \"key\", \"2\", \"foo\", \"bar\", \"weights\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z1\", \"1\", \"a\", \"2\", \"b\"}));\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z2\", \"3\", \"c\", \"2\", \"b\"}));\n\n  resp = Run({\"zunionstore\", \"key\", \"2\", \"z1\", \"z2\"});\n  EXPECT_THAT(resp, IntArg(3));\n  resp = Run({\"zrange\", \"key\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1\", \"c\", \"3\", \"b\", \"4\"));\n\n  resp = Run({\"zunionstore\", \"z1\", \"1\", \"z1\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"zunionstore\", \"z1\", \"2\", \"z1\", \"z2\"});\n  EXPECT_THAT(resp, IntArg(3));\n  resp = Run({\"zrange\", \"z1\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1\", \"c\", \"3\", \"b\", \"4\"));\n\n  Run({\"set\", \"foo\", \"bar\"});\n  resp = Run({\"zunionstore\", \"foo\", \"1\", \"z2\"});\n  EXPECT_THAT(resp, IntArg(2));\n  resp = Run({\"zrange\", \"foo\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"b\", \"2\", \"c\", \"3\"));\n}\n\n// Check that ZUNIONSTORE overwrites a value including resetting its expiration\nTEST_F(ZSetFamilyTest, ZUnionStoreExpiration) {\n  EXPECT_THAT(Run({\"zadd\", \"z1\", \"1\", \"a\", \"2\", \"b\"}), IntArg(2));\n  EXPECT_THAT(Run({\"zadd\", \"z2\", \"3\", \"c\", \"2\", \"b\"}), IntArg(2));\n\n  Run({\"set\", \"target\", \"some-value\"});\n  EXPECT_THAT(Run({\"expire\", \"target\", \"1010\"}), IntArg(1));\n  EXPECT_THAT(Run({\"ttl\", \"target\"}), IntArg(1010));\n\n  EXPECT_THAT(Run({\"zunionstore\", \"target\", \"2\", \"z1\", \"z2\"}), IntArg(3));\n  EXPECT_THAT(Run({\"ttl\", \"target\"}), IntArg(-1));\n}\n\nTEST_F(ZSetFamilyTest, ZUnionStoreOpts) {\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z1\", \"1\", \"a\", \"2\", \"b\"}));\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z2\", \"3\", \"c\", \"2\", \"b\"}));\n  RespExpr resp;\n\n  EXPECT_EQ(3, CheckedInt({\"zunionstore\", \"a\", \"2\", \"z1\", \"z2\", \"weights\", \"1\", \"3\"}));\n  resp = Run({\"zrange\", \"a\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1\", \"b\", \"8\", \"c\", \"9\"));\n\n  resp = Run({\"zunionstore\", \"a\", \"2\", \"z1\", \"z2\", \"weights\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  resp = Run({\"zunionstore\", \"z1\", \"1\", \"z1\", \"weights\", \"2\"});\n  EXPECT_THAT(resp, IntArg(2));\n  resp = Run({\"zrange\", \"z1\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"2\", \"b\", \"4\"));\n\n  resp = Run({\"zunionstore\", \"max\", \"2\", \"z1\", \"z2\", \"weights\", \"1\", \"0\", \"aggregate\", \"max\"});\n  ASSERT_THAT(resp, IntArg(3));\n  resp = Run({\"zrange\", \"max\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"c\", \"0\", \"a\", \"2\", \"b\", \"4\"));\n\n  // Check that infinity is handled correctly.\n  Run({\"ZADD\", \"src1\", \"inf\", \"x\"});\n  Run({\"ZADD\", \"src2\", \"inf\", \"x\"});\n  Run({\"ZUNIONSTORE\", \"dest\", \"2\", \"src1\", \"src2\", \"WEIGHTS\", \"1.0\", \"0.0\"});\n  resp = Run({\"ZSCORE\", \"dest\", \"x\"});\n  EXPECT_THAT(resp, DoubleArg(numeric_limits<double>::infinity()));\n\n  Run({\"ZADD\", \"foo\", \"inf\", \"e1\"});\n  Run({\"ZADD\", \"bar\", \"-inf\", \"e1\", \"0.0\", \"e2\"});\n  Run({\"ZUNIONSTORE\", \"dest\", \"3\", \"foo\", \"bar\", \"foo\"});\n  resp = Run({\"ZSCORE\", \"dest\", \"e1\"});\n  EXPECT_THAT(resp, DoubleArg(0));\n}\n\nTEST_F(ZSetFamilyTest, ZInterStore) {\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z1\", \"1\", \"a\", \"2\", \"b\"}));\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z2\", \"3\", \"c\", \"2\", \"b\"}));\n  RespExpr resp;\n\n  EXPECT_EQ(1, CheckedInt({\"zinterstore\", \"a\", \"2\", \"z1\", \"z2\"}));\n  resp = Run({\"zrange\", \"a\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"b\", \"4\"));\n\n  // support for sets\n  EXPECT_EQ(2, CheckedInt({\"sadd\", \"s2\", \"b\", \"c\"}));\n  EXPECT_EQ(1, CheckedInt({\"zinterstore\", \"b\", \"2\", \"z1\", \"s2\"}));\n  resp = Run({\"zrange\", \"b\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"b\", \"3\"));\n\n  Run({\"ZADD\", \"foo\", \"10\", \"a\"});\n  EXPECT_EQ(1, CheckedInt({\"ZINTERSTORE\", \"bar\", \"1\", \"foo\", \"weights\", \"2\"}));\n  resp = Run({\"zrange\", \"bar\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"20\"));\n}\n\nTEST_F(ZSetFamilyTest, ZInter) {\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z1\", \"1\", \"one\", \"2\", \"two\"}));\n  EXPECT_EQ(3, CheckedInt({\"zadd\", \"z2\", \"1\", \"one\", \"2\", \"two\", \"3\", \"three\"}));\n  RespExpr resp;\n\n  resp = Run({\"zinter\", \"2\", \"z1\", \"z2\"});\n  EXPECT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"one\", \"two\"));\n\n  EXPECT_EQ(3, CheckedInt({\"zadd\", \"z3\", \"1\", \"one\", \"2\", \"two\", \"3\", \"three\"}));\n  EXPECT_EQ(3, CheckedInt({\"zadd\", \"z4\", \"4\", \"four\", \"5\", \"five\", \"6\", \"six\"}));\n  EXPECT_EQ(1, CheckedInt({\"zadd\", \"z5\", \"6\", \"six\"}));\n\n  resp = Run({\"zinter\", \"3\", \"z3\", \"z4\", \"z5\"});\n  EXPECT_THAT(resp, ArrLen(0));\n\n  // zinter output sorts keys with equal scores lexicographically\n  Run({\"del\", \"z1\", \"z2\", \"z3\", \"z4\", \"z5\"});\n  Run({\"zadd\", \"z1\", \"1\", \"e\", \"1\", \"a\", \"1\", \"b\", \"1\", \"x\"});\n  Run({\"zadd\", \"z2\", \"1\", \"e\", \"1\", \"a\", \"1\", \"b\", \"1\", \"y\"});\n  Run({\"zadd\", \"z3\", \"1\", \"e\", \"1\", \"a\", \"1\", \"b\", \"1\", \"z\"});\n  Run({\"zadd\", \"z4\", \"1\", \"e\", \"1\", \"a\", \"1\", \"b\", \"1\", \"o\"});\n  EXPECT_THAT(Run({\"zinter\", \"4\", \"z1\", \"z2\", \"z3\", \"z4\"}).GetVec(), ElementsAre(\"a\", \"b\", \"e\"));\n}\n\nTEST_F(ZSetFamilyTest, ZInterCard) {\n  EXPECT_EQ(3, CheckedInt({\"zadd\", \"z1\", \"1\", \"a\", \"2\", \"b\", \"3\", \"c\"}));\n  EXPECT_EQ(3, CheckedInt({\"zadd\", \"z2\", \"2\", \"b\", \"3\", \"c\", \"4\", \"d\"}));\n\n  EXPECT_EQ(2, CheckedInt({\"zintercard\", \"2\", \"z1\", \"z2\"}));\n  EXPECT_EQ(1, CheckedInt({\"zintercard\", \"2\", \"z1\", \"z2\", \"LIMIT\", \"1\"}));\n\n  RespExpr resp;\n\n  resp = Run({\"zintercard\", \"2\", \"z1\", \"z2\", \"LIM\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n  resp = Run({\"zintercard\", \"2\", \"z1\", \"z2\", \"LIMIT\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n  resp = Run({\"zintercard\", \"2\", \"z1\", \"z2\", \"LIMIT\", \"a\"});\n  EXPECT_THAT(resp, ErrArg(\"limit value is not a positive integer\"));\n\n  resp = Run({\"zintercard\", \"0\", \"z1\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input\"));\n\n  // support for sets\n  EXPECT_EQ(3, CheckedInt({\"sadd\", \"s2\", \"b\", \"c\", \"d\"}));\n  EXPECT_EQ(2, CheckedInt({\"zintercard\", \"2\", \"z1\", \"s2\"}));\n}\n\nTEST_F(ZSetFamilyTest, ZAddBug148) {\n  auto resp = Run({\"zadd\", \"key\", \"1\", \"9fe9f1eb\"});\n  EXPECT_THAT(resp, IntArg(1));\n}\n\nTEST_F(ZSetFamilyTest, ZMPopInvalidSyntax) {\n  // Not enough arguments.\n  auto resp = Run({\"zmpop\", \"1\", \"a\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  // Zero keys.\n  resp = Run({\"zmpop\", \"0\", \"MIN\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input key is needed\"));\n\n  // Number of keys not uint.\n  resp = Run({\"zmpop\", \"aa\", \"a\", \"MIN\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  // Missing MIN/MAX.\n  resp = Run({\"zmpop\", \"1\", \"a\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Wrong number of keys.\n  resp = Run({\"zmpop\", \"1\", \"a\", \"b\", \"MAX\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Count with no number.\n  resp = Run({\"zmpop\", \"1\", \"a\", \"MAX\", \"COUNT\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Count number is not uint.\n  resp = Run({\"zmpop\", \"1\", \"a\", \"MIN\", \"COUNT\", \"boo\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  // Too many arguments.\n  resp = Run({\"zmpop\", \"1\", \"c\", \"MAX\", \"COUNT\", \"2\", \"foo\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n}\n\nTEST_F(ZSetFamilyTest, ZMPop) {\n  // All sets are empty.\n  auto resp = Run({\"zmpop\", \"1\", \"e\", \"MIN\"});\n  EXPECT_THAT(resp, ArgType(RespExpr::NIL));\n\n  // Min operation.\n  resp = Run({\"zadd\", \"a\", \"1\", \"a1\", \"2\", \"a2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"zmpop\", \"1\", \"a\", \"MIN\"});\n  EXPECT_THAT(resp, ContainsLabeledScoredArray(\"a\", {{\"a1\", \"1\"}}));\n\n  resp = Run({\"ZRANGE\", \"a\", \"0\", \"-1\", \"WITHSCORES\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"a2\", \"2\")));\n\n  // Max operation.\n  resp = Run({\"zadd\", \"b\", \"1\", \"b1\", \"2\", \"b2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"zmpop\", \"1\", \"b\", \"MAX\"});\n  EXPECT_THAT(resp, ContainsLabeledScoredArray(\"b\", {{\"b2\", \"2\"}}));\n\n  resp = Run({\"ZRANGE\", \"b\", \"0\", \"-1\", \"WITHSCORES\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"b1\", \"1\")));\n\n  // Count > 1.\n  resp = Run({\"zadd\", \"c\", \"1\", \"c1\", \"2\", \"c2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"zmpop\", \"1\", \"c\", \"MAX\", \"COUNT\", \"2\"});\n  EXPECT_THAT(resp, ContainsLabeledScoredArray(\"c\", {{\"c1\", \"1\"}, {\"c2\", \"2\"}}));\n\n  resp = Run({\"zcard\", \"c\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // Count > #elements in set.\n  resp = Run({\"zadd\", \"d\", \"1\", \"d1\", \"2\", \"d2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"zmpop\", \"1\", \"d\", \"MAX\", \"COUNT\", \"3\"});\n  EXPECT_THAT(resp, ContainsLabeledScoredArray(\"d\", {{\"d1\", \"1\"}, {\"d2\", \"2\"}}));\n\n  resp = Run({\"zcard\", \"d\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // First non empty set is not the first set.\n  resp = Run({\"zadd\", \"x\", \"1\", \"x1\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"zadd\", \"y\", \"1\", \"y1\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"zmpop\", \"3\", \"empty\", \"x\", \"y\", \"MAX\"});\n  EXPECT_THAT(resp, ContainsLabeledScoredArray(\"x\", {{\"x1\", \"1\"}}));\n\n  resp = Run({\"zcard\", \"x\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"ZRANGE\", \"y\", \"0\", \"-1\", \"WITHSCORES\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"y1\", \"1\")));\n}\n\nTEST_F(ZSetFamilyTest, BZMPopInvalidSyntax) {\n  // Not enough arguments.\n  auto resp = Run({\"bzmpop\", \"1\", \"1\", \"a\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  // Zero keys.\n  resp = Run({\"bzmpop\", \"1\", \"0\", \"MIN\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input key is needed\"));\n\n  // Number of keys not uint.\n  resp = Run({\"bzmpop\", \"1\", \"aa\", \"a\", \"MIN\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  // Missing MIN/MAX.\n  resp = Run({\"bzmpop\", \"1\", \"1\", \"a\", \"COUNT\", \"1\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Wrong number of keys.\n  resp = Run({\"bzmpop\", \"1\", \"1\", \"a\", \"b\", \"MAX\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Count with no number.\n  resp = Run({\"bzmpop\", \"1\", \"1\", \"a\", \"MAX\", \"COUNT\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Count number is not uint.\n  resp = Run({\"bzmpop\", \"1\", \"1\", \"a\", \"MIN\", \"COUNT\", \"boo\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  // Too many arguments.\n  resp = Run({\"bzmpop\", \"1\", \"1\", \"c\", \"MAX\", \"COUNT\", \"2\", \"foo\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n\n  // Negative time argument.\n  resp = Run({\"bzmpop\", \"-1\", \"1\", \"a\", \"MIN\"});\n  EXPECT_THAT(resp, ErrArg(\"timeout is negative\"));\n}\n\nTEST_F(ZSetFamilyTest, BZMPop) {\n  // Min operation.\n  auto resp = Run({\"zadd\", \"a\", \"1\", \"a1\", \"2\", \"a2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"bzmpop\", \"1\", \"1\", \"a\", \"MIN\"});\n  EXPECT_THAT(resp, ContainsLabeledScoredArray(\"a\", {{\"a1\", \"1\"}}));\n\n  resp = Run({\"ZRANGE\", \"a\", \"0\", \"-1\", \"WITHSCORES\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"a2\", \"2\")));\n\n  // Max operation.\n  resp = Run({\"zadd\", \"b\", \"1\", \"b1\", \"2\", \"b2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"bzmpop\", \"1\", \"1\", \"b\", \"MAX\"});\n  EXPECT_THAT(resp, ContainsLabeledScoredArray(\"b\", {{\"b2\", \"2\"}}));\n\n  resp = Run({\"ZRANGE\", \"b\", \"0\", \"-1\", \"WITHSCORES\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"b1\", \"1\")));\n\n  // Count > 1.\n  resp = Run({\"zadd\", \"c\", \"1\", \"c1\", \"2\", \"c2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"bzmpop\", \"1\", \"1\", \"c\", \"MAX\", \"COUNT\", \"2\"});\n  EXPECT_THAT(resp, ContainsLabeledScoredArray(\"c\", {{\"c1\", \"1\"}, {\"c2\", \"2\"}}));\n\n  resp = Run({\"zcard\", \"c\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // Count > #elements in set.\n  resp = Run({\"zadd\", \"d\", \"1\", \"d1\", \"2\", \"d2\"});\n  EXPECT_THAT(resp, IntArg(2));\n\n  resp = Run({\"bzmpop\", \"1\", \"1\", \"d\", \"MAX\", \"COUNT\", \"3\"});\n  EXPECT_THAT(resp, ContainsLabeledScoredArray(\"d\", {{\"d1\", \"1\"}, {\"d2\", \"2\"}}));\n\n  resp = Run({\"zcard\", \"d\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  // First non empty set is not the first set.\n  resp = Run({\"zadd\", \"x\", \"1\", \"x1\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"zadd\", \"y\", \"1\", \"y1\"});\n  EXPECT_THAT(resp, IntArg(1));\n\n  resp = Run({\"bzmpop\", \"1\", \"3\", \"empty\", \"x\", \"y\", \"MAX\"});\n  EXPECT_THAT(resp, ContainsLabeledScoredArray(\"x\", {{\"x1\", \"1\"}}));\n\n  resp = Run({\"zcard\", \"x\"});\n  EXPECT_THAT(resp, IntArg(0));\n\n  resp = Run({\"ZRANGE\", \"y\", \"0\", \"-1\", \"WITHSCORES\"});\n  EXPECT_THAT(resp, RespArray(ElementsAre(\"y1\", \"1\")));\n}\n\nTEST_F(ZSetFamilyTest, BMPOPBlockingTimeout) {\n  RespExpr resp0;\n\n  auto start = absl::Now();\n  auto fb0 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    resp0 = Run({\"BZMPOP\", \"1\", \"1\", \"zset1\", \"MIN\"});\n    LOG(INFO) << \"BZMPOP\";\n  });\n  fb0.Join();\n  auto dur = absl::Now() - start;\n\n  // Check that the timeout duration is not too crazy.\n  EXPECT_LT(AbsDuration(dur - absl::Milliseconds(1000)), absl::Milliseconds(300));\n  EXPECT_THAT(resp0, ArgType(RespExpr::NIL));\n}\n\nTEST_F(ZSetFamilyTest, ZPopMin) {\n  auto resp = Run({\"zadd\", \"key\", \"1\", \"a\", \"2\", \"b\", \"3\", \"c\", \"4\", \"d\", \"5\", \"e\", \"6\", \"f\"});\n  EXPECT_THAT(resp, IntArg(6));\n\n  resp = Run({\"zpopmin\", \"key\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1\"));\n\n  resp = Run({\"zpopmin\", \"key\", \"0\"});\n  ASSERT_THAT(resp, ArrLen(0));\n\n  resp = Run({\"zpopmin\", \"key\", \"2\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"b\", \"2\", \"c\", \"3\"));\n\n  resp = Run({\"zpopmin\", \"key\", \"-1\"});\n  ASSERT_THAT(resp, ErrArg(\"value is out of range, must be positive\"));\n\n  resp = Run({\"zpopmin\", \"key\", \"1\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"d\", \"4\"));\n\n  resp = Run({\"zpopmin\", \"key\", \"3\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"e\", \"5\", \"f\", \"6\"));\n\n  resp = Run({\"zpopmin\", \"key\", \"1\"});\n  ASSERT_THAT(resp, ArrLen(0));\n}\n\nTEST_F(ZSetFamilyTest, ZPopMax) {\n  auto resp = Run({\"zadd\", \"key\", \"1\", \"a\", \"2\", \"b\", \"3\", \"c\", \"4\", \"d\", \"5\", \"e\", \"6\", \"f\"});\n  EXPECT_THAT(resp, IntArg(6));\n\n  resp = Run({\"zpopmax\", \"key\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"f\", \"6\"));\n\n  resp = Run({\"zpopmax\", \"key\", \"2\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"e\", \"5\", \"d\", \"4\"));\n\n  resp = Run({\"zpopmax\", \"key\", \"-1\"});\n  ASSERT_THAT(resp, ErrArg(\"value is out of range, must be positive\"));\n\n  resp = Run({\"zpopmax\", \"key\", \"1\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"c\", \"3\"));\n\n  resp = Run({\"zpopmax\", \"key\", \"3\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"b\", \"2\", \"a\", \"1\"));\n\n  resp = Run({\"zpopmax\", \"key\", \"1\"});\n  ASSERT_THAT(resp, ArrLen(0));\n}\n\nTEST_F(ZSetFamilyTest, ZAddPopCrash) {\n  for (int i = 0; i < 129; ++i) {\n    auto resp = Run({\"zadd\", \"key\", absl::StrCat(i), absl::StrCat(\"element:\", i)});\n    EXPECT_THAT(resp, IntArg(1)) << i;\n  }\n\n  auto resp = Run({\"zpopmin\", \"key\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"element:0\", \"0\"));\n}\n\nTEST_F(ZSetFamilyTest, Resp3) {\n  Run({\"hello\", \"3\"});\n  Run({\"zadd\", \"x\", \"1\", \"a\", \"2\", \"b\"});\n  auto resp = Run({\"zrange\", \"x\", \"0\", \"-1\", \"WITHSCORES\"});\n  ASSERT_THAT(resp, ArrLen(2));\n  ASSERT_THAT(resp.GetVec()[0].GetVec(), ElementsAre(\"a\", DoubleArg(1)));\n  ASSERT_THAT(resp.GetVec()[1].GetVec(), ElementsAre(\"b\", DoubleArg(2)));\n}\n\nTEST_F(ZSetFamilyTest, BlockingIsReleased) {\n  // Inputs for ZSET store commands.\n  Run({\"ZADD\", \"A\", \"1\", \"x\", \"2\", \"b\"});\n  Run({\"ZADD\", \"B\", \"1\", \"x\", \"3\", \"b\"});\n  Run({\"ZADD\", \"C\", \"1\", \"x\", \"10\", \"a\"});\n  Run({\"ZADD\", \"D\", \"1\", \"x\", \"5\", \"c\"});\n  Run({\"ZADD\", \"E\", \"2\", \"x\", \"1\", \"c\"});\n  Run({\"ZADD\", \"F\", \"1\", \"c\"});\n\n  vector<string> blocking_keys{\"zset1\", \"zset2\", \"zset3\"};\n  for (const auto& key : blocking_keys) {\n    vector<vector<string>> unblocking_commands;\n    // All commands output the same set {2,x}.\n    unblocking_commands.push_back({\"ZADD\", key, \"2\", \"x\", \"10\", \"y\"});\n    unblocking_commands.push_back({\"ZINCRBY\", key, \"2\", \"x\"});\n    unblocking_commands.push_back({\"ZINTERSTORE\", key, \"2\", \"A\", \"B\"});\n    unblocking_commands.push_back({\"ZUNIONSTORE\", key, \"2\", \"C\", \"D\"});\n    unblocking_commands.push_back({\"ZDIFFSTORE\", key, \"2\", \"E\", \"F\"});\n\n    for (auto& cmd : unblocking_commands) {\n      RespExpr resp0;\n      auto fb0 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n        resp0 = Run({\"BZPOPMIN\", \"zset1\", \"zset2\", \"zset3\", \"0\"});\n        LOG(INFO) << \"BZPOPMIN\";\n      });\n\n      pp_->at(1)->Await([&] { return Run({cmd.data(), cmd.size()}); });\n      fb0.Join();\n\n      ASSERT_THAT(resp0, ArrLen(3)) << cmd[0];\n      EXPECT_THAT(resp0.GetVec(), ElementsAre(key, \"x\", \"2\")) << cmd[0];\n\n      Run({\"DEL\", key});\n    }\n\n    // Tests for BZMPOP command\n    for (auto& cmd : unblocking_commands) {\n      RespExpr resp0;\n      auto fb0 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n        resp0 = Run({\"BZMPOP\", \"0\", \"3\", \"zset1\", \"zset2\", \"zset3\", \"MIN\"});\n        LOG(INFO) << \"BZMPOP\";\n      });\n\n      pp_->at(1)->Await([&] { return Run({cmd.data(), cmd.size()}); });\n      fb0.Join();\n\n      ASSERT_THAT(resp0, ArrLen(2)) << cmd[0];\n      EXPECT_THAT(resp0, ContainsLabeledScoredArray(key, {{\"x\", \"2\"}})) << cmd[0];\n\n      Run({\"DEL\", key});\n    }\n  }\n}\n\nTEST_F(ZSetFamilyTest, BlockingWithIncorrectType) {\n  RespExpr resp0;\n  RespExpr resp1;\n  auto fb0 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    resp0 = Run({\"BLPOP\", \"list1\", \"0\"});\n  });\n  auto fb1 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {\n    resp1 = Run({\"BZPOPMIN\", \"list1\", \"0\"});\n  });\n\n  ThisFiber::SleepFor(50us);\n  pp_->at(2)->Await([&] { return Run({\"ZADD\", \"list1\", \"1\", \"a\"}); });\n  pp_->at(2)->Await([&] { return Run({\"LPUSH\", \"list1\", \"0\"}); });\n  fb0.Join();\n  fb1.Join();\n\n  EXPECT_THAT(resp1.GetVec(), ElementsAre(\"list1\", \"a\", \"1\"));\n  EXPECT_THAT(resp0.GetVec(), ElementsAre(\"list1\", \"0\"));\n}\n\nTEST_F(ZSetFamilyTest, BlockingTimeout) {\n  RespExpr resp0;\n\n  auto start = absl::Now();\n  auto fb0 = pp_->at(0)->LaunchFiber(Launch::dispatch, [&] {\n    resp0 = Run({\"BZPOPMIN\", \"zset1\", \"1\"});\n    LOG(INFO) << \"BZPOPMIN\";\n  });\n  fb0.Join();\n  auto dur = absl::Now() - start;\n\n  // Check that the timeout duration is not too crazy.\n  EXPECT_LT(AbsDuration(dur - absl::Milliseconds(1000)), absl::Milliseconds(300));\n  EXPECT_THAT(resp0, ArgType(RespExpr::NIL_ARRAY));\n}\n\nTEST_F(ZSetFamilyTest, ZDiffError) {\n  RespExpr resp;\n\n  resp = Run({\"zdiff\", \"-1\", \"z1\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  resp = Run({\"zdiff\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"zdiff\", \"0\", \"z1\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input key is needed\"));\n\n  resp = Run({\"zdiff\", \"0\", \"z1\", \"z2\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input key is needed\"));\n\n  EXPECT_EQ(1, CheckedInt({\"sadd\", \"s1\", \"one\"}));\n\n  resp = Run({\"zdiff\", \"2\", \"z1\", \"s1\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONGTYPE Operation against a key holding the wrong kind of value\"));\n\n  resp = Run({\"zdiff\", \"2\", \"s1\", \"z2\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONGTYPE Operation against a key holding the wrong kind of value\"));\n}\n\nTEST_F(ZSetFamilyTest, ZDiff) {\n  RespExpr resp;\n\n  EXPECT_EQ(4, CheckedInt({\"zadd\", \"z1\", \"1\", \"one\", \"2\", \"two\", \"3\", \"three\", \"4\", \"four\"}));\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z2\", \"1\", \"one\", \"5\", \"five\"}));\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z3\", \"2\", \"two\", \"3\", \"three\"}));\n  EXPECT_EQ(1, CheckedInt({\"zadd\", \"z4\", \"4\", \"four\"}));\n\n  resp = Run({\"zdiff\", \"1\", \"z1\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"one\", \"two\", \"three\", \"four\"));\n\n  resp = Run({\"zdiff\", \"2\", \"z1\", \"z1\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"zdiff\", \"2\", \"z1\", \"doesnt_exist\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"one\", \"two\", \"three\", \"four\"));\n\n  resp = Run({\"zdiff\", \"2\", \"z1\", \"z2\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"two\", \"three\", \"four\"));\n\n  resp = Run({\"zdiff\", \"2\", \"z1\", \"z3\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"one\", \"four\"));\n\n  resp = Run({\"zdiff\", \"4\", \"z1\", \"z2\", \"z3\", \"z4\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"zdiff\", \"2\", \"doesnt_exist\", \"key1\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  // WITHSCORES\n  resp = Run({\"zdiff\", \"1\", \"z1\", \"WITHSCORES\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"one\", \"1\", \"two\", \"2\", \"three\", \"3\", \"four\", \"4\"));\n\n  resp = Run({\"zdiff\", \"2\", \"z1\", \"z2\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"two\", \"2\", \"three\", \"3\", \"four\", \"4\"));\n}\n\nTEST_F(ZSetFamilyTest, ZDiff_Resp3) {\n  Run({\"hello\", \"3\"});\n  EXPECT_EQ(4, CheckedInt({\"zadd\", \"z1\", \"1\", \"one\", \"2\", \"two\", \"3\", \"three\", \"4\", \"four\"}));\n\n  auto resp = Run({\"zdiff\", \"1\", \"z1\", \"withscores\"});\n  ASSERT_THAT(resp, ArrLen(4));\n  ASSERT_THAT(resp.GetVec()[0].GetVec(), ElementsAre(\"one\", DoubleArg(1)));\n  ASSERT_THAT(resp.GetVec()[1].GetVec(), ElementsAre(\"two\", DoubleArg(2)));\n  ASSERT_THAT(resp.GetVec()[2].GetVec(), ElementsAre(\"three\", DoubleArg(3)));\n  ASSERT_THAT(resp.GetVec()[3].GetVec(), ElementsAre(\"four\", DoubleArg(4)));\n}\n\nTEST_F(ZSetFamilyTest, ZDiffStoreError) {\n  RespExpr resp;\n\n  resp = Run({\"zdiffstore\", \"key\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"zdiffstore\", \"key\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"wrong number of arguments\"));\n\n  resp = Run({\"zdiffstore\", \"key\", \"-1\", \"z1\"});\n  EXPECT_THAT(resp, ErrArg(\"value is not an integer or out of range\"));\n\n  resp = Run({\"zdiffstore\", \"key\", \"0\", \"z1\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input key is needed\"));\n\n  resp = Run({\"zdiffstore\", \"key\", \"0\", \"z1\", \"z2\"});\n  EXPECT_THAT(resp, ErrArg(\"at least 1 input key is needed\"));\n\n  EXPECT_EQ(1, CheckedInt({\"sadd\", \"s1\", \"one\"}));\n\n  resp = Run({\"zdiffstore\", \"key\", \"2\", \"z1\", \"s1\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONGTYPE Operation against a key holding the wrong kind of value\"));\n\n  resp = Run({\"zdiffstore\", \"key\", \"2\", \"s1\", \"z2\"});\n  EXPECT_THAT(resp, ErrArg(\"WRONGTYPE Operation against a key holding the wrong kind of value\"));\n}\n\nTEST_F(ZSetFamilyTest, ZDiffStore) {\n  RespExpr resp;\n\n  EXPECT_EQ(4, CheckedInt({\"zadd\", \"z1\", \"1\", \"one\", \"2\", \"two\", \"3\", \"three\", \"4\", \"four\"}));\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z2\", \"1\", \"one\", \"5\", \"five\"}));\n  EXPECT_EQ(2, CheckedInt({\"zadd\", \"z3\", \"2\", \"two\", \"3\", \"three\"}));\n  EXPECT_EQ(1, CheckedInt({\"zadd\", \"z4\", \"4\", \"four\"}));\n\n  resp = Run({\"zdiffstore\", \"key\", \"1\", \"z1\"});\n  EXPECT_THAT(resp, IntArg(4));\n  resp = Run({\"zrange\", \"key\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"one\", \"1\", \"two\", \"2\", \"three\", \"3\", \"four\", \"4\"));\n\n  resp = Run({\"zdiffstore\", \"key\", \"2\", \"z1\", \"z1\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"zrange\", \"key\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"zdiffstore\", \"key\", \"4\", \"z1\", \"z2\", \"z3\", \"z4\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"zrange\", \"key\", \"0\", \"-1\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n\n  resp = Run({\"zdiffstore\", \"key\", \"2\", \"z1\", \"doesnt_exist\"});\n  EXPECT_THAT(resp, IntArg(4));\n  resp = Run({\"zrange\", \"key\", \"0\", \"-1\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"one\", \"two\", \"three\", \"four\"));\n\n  resp = Run({\"zdiffstore\", \"key\", \"2\", \"doesnt_exits\", \"z1\"});\n  EXPECT_THAT(resp, IntArg(0));\n  resp = Run({\"zrange\", \"key\", \"0\", \"-1\"});\n  EXPECT_THAT(resp.GetVec().empty(), true);\n}\n\nTEST_F(ZSetFamilyTest, Count) {\n  for (int i = 0; i < 129; ++i) {\n    auto resp = Run({\"zadd\", \"key\", absl::StrCat(i), absl::StrCat(\"element:\", i)});\n    EXPECT_THAT(resp, IntArg(1)) << i;\n  }\n\n  EXPECT_THAT(CheckedInt({\"zcount\", \"key\", \"-inf\", \"+inf\"}), 129);\n  EXPECT_THAT(CheckedInt({\"zlexcount\", \"key\", \"-\", \"+\"}), 129);\n\n  // Listpack object\n  Run({\"ZADD\", \"short\", \"0\", \"A\"});\n  EXPECT_THAT(CheckedInt({\"ZLEXCOUNT\", \"short\", \"-\", \"-\"}), 0);\n  EXPECT_THAT(CheckedInt({\"ZLEXCOUNT\", \"short\", \"+\", \"+\"}), 0);\n  EXPECT_THAT(CheckedInt({\"ZLEXCOUNT\", \"short\", \"+\", \"-\"}), 0);\n\n  // Sortedset object\n  Run({\"ZADD\", \"long\", \"0\", \"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\"});\n  EXPECT_THAT(CheckedInt({\"ZLEXCOUNT\", \"long\", \"-\", \"-\"}), 0);\n  EXPECT_THAT(CheckedInt({\"ZLEXCOUNT\", \"long\", \"+\", \"+\"}), 0);\n  EXPECT_THAT(CheckedInt({\"ZLEXCOUNT\", \"long\", \"+\", \"-\"}), 0);\n}\n\nTEST_F(ZSetFamilyTest, RangeLimit) {\n  auto resp = Run({\"ZRANGEBYSCORE\", \"\", \"0.0\", \"0.0\", \"limit\", \"0\"});\n  EXPECT_THAT(resp, ErrArg(\"syntax error\"));\n  resp = Run({\"ZRANGEBYSCORE\", \"\", \"0.0\", \"0.0\", \"limit\", \"0\", \"0\"});\n  EXPECT_THAT(resp, ArrLen(0));\n\n  resp = Run({\"ZRANGEBYSCORE\", \"\", \"0.0\", \"0.0\", \"foo\"});\n  EXPECT_THAT(resp, ErrArg(\"unsupported option\"));\n\n  resp = Run({\"ZRANGEBYLEX\", \"foo\", \"-\", \"+\", \"LIMIT\", \"-1\", \"3\"});\n  EXPECT_THAT(resp, ArrLen(0));\n}\n\nTEST_F(ZSetFamilyTest, RangeStore) {\n  EXPECT_EQ(3, CheckedInt({\"ZADD\", \"src\", \"1\", \"a\", \"2\", \"b\", \"3\", \"c\"}));\n  EXPECT_EQ(3, CheckedInt({\"ZRANGESTORE\", \"dest\", \"src\", \"0\", \"-1\"}));\n\n  RespExpr resp = Run({\"ZRANGE\", \"dest\", \"0\", \"-1\", \"withscores\"});\n  EXPECT_THAT(resp.GetVec(), ElementsAre(\"a\", \"1\", \"b\", \"2\", \"c\", \"3\"));\n\n  // Override dest.\n\n  EXPECT_EQ(0, CheckedInt({\"ZRANGESTORE\", \"dest\", \"not-found\", \"0\", \"-1\"}));\n\n  resp = Run({\"ZRANGE\", \"dest\", \"0\", \"-1\"});\n  EXPECT_THAT(resp, ArrLen(0));\n}\n\nTEST_F(ZSetFamilyTest, ZRangeZeroElements) {\n  Run({\"zadd\", \"myzset\", \"1\", \"one\"});\n  auto resp = Run({\"ZRANGE\", \"myzset\", \"0\", \"-1\", \"LIMIT\", \"2\", \"10\"});\n  ASSERT_THAT(resp, ArrLen(0));\n}\n\nTEST_F(ZSetFamilyTest, ZCountMinGreaterThanMaxCrash) {\n  // Add 1000 members to the sorted set\n  for (int i = 1; i <= 1000; ++i) {\n    Run({\"zadd\", \"huge_key\", absl::StrCat(i), absl::StrCat(\"member\", i)});\n  }\n\n  // Expect ZCOUNT to return 0 when min > max\n  auto resp = Run({\"zcount\", \"huge_key\", \"945\", \"261\"});\n  EXPECT_THAT(resp, IntArg(0));\n}\n\n}  // namespace dfly\n"
  },
  {
    "path": "tests/README.md",
    "content": "# System tests\n\n\n## Pytest\n\nThe tests assume you have the \"dragonfly\" binary in `<root>/build-dbg` directory.\nYou can override the location of the binary using `DRAGONFLY_PATH` environment var.\n\n### Important fixtures\n\n- `df_server` is the default instance that is available for testing. Use the `dfly_args` decorator to change its default arguments.\n- `client` and `async_client` are clients to the default instance. The default instance is re-used accross tests with the same arguments, but each new client flushes the instance.\n- `pool` and `async_pool` are client pools that are connected to the default instance\n\n### Custom arguments\n\n- use `--gdb` to start all instances inside gdb.\n- use `--df arg=val` to pass custom arguments to all dragonfly instances. Can be used multiple times.\n- use `--log-seeder file` to store all single-db commands from the lastest tests seeder inside file.\n- use `--existing-port` to use an existing instance for tests instead of starting one\n- use `--rand-seed` to set the global random seed. Makes the seeder predictable.\n- use `--repeat <N>` to run a test multiple times.\n\nfor example,\n\n```sh\npytest dragonfly/connection_test.py -s  --df logtostdout --df vmodule=dragonfly_connection=2 -k test_subscribe\n```\n### Before you start\nPlease make sure that you have python 3 installed on you local host.\nIf have more both python 2 and python 3 installed on you host, you can run the tests with the following command:\n```\npython3 -m pytest -xv dragonfly\n```\nIt is advisable to use you python virtual environment: [python virtual environment](https://docs.python.org/3/library/venv.html).\nTo activate it, run:\n```\nsource <virtual env name>/bin/activate\n```\nThen install all the required dependencies for the tests:\n```\npip3 install -r dragonfly/requirements.txt\n```\n\n### Running the tests\nto run pytest, run:\n`pytest -xv dragonfly`\n\nto run selectively, use:\n`pytest -xv dragonfly -k <substring>`\nFor more pytest flags [check here](https://fig.io/manual/pytest).\n\n## Writing tests\nThe [Getting Started](https://docs.pytest.org/en/7.1.x/getting-started.html) guide is a great resource to become familiar with writing pytest test cases.\n\nPytest will recursively search the `tests/dragonfly` directory for files matching the patterns `test_*.py` or `*_test.py` for functions matching these [rules](https://docs.pytest.org/en/7.1.x/explanation/goodpractices.html#conventions-for-python-test-discovery):\n- Functions or methods outside of a class prefixed by `test`\n- Functions or methods prefixed by `test` inside a class prefixed by `Test` (without an `__init__` method)\n\n**Note**: When making a new directory in `tests/dragonfly` be sure to create an `__init__.py` file to avoid [name conflicts](https://docs.pytest.org/en/7.1.x/explanation/goodpractices.html#tests-outside-application-code)\n\n### Passing CLI commands to Dragonfly\nTo pass custom flags to the Dragonfly executable two class decorators have been created. `@dfly_args` allows you to pass a list of parameters to the Dragonfly executable, similarly `@dfly_multi_test_args` allows you to specify multiple parameter configurations to test with a given test class.\n\nIn the case of `@dfly_multi_test_args` each parameter configuration will create one Dragonfly instance which each test will receive a client to as described in the [above section](#interacting-with-dragonfly)\n\nParameters can use environmental variables with a formatted string where `\"{<VAR>}\"` will be replaced with the value of the `<VAR>` environment variable. Due to [current pytest limtations](https://github.com/pytest-dev/pytest/issues/349) fixtures cannot be passed to either of these decorators, this is currently the provided way to pass the temporary directory path in a CLI parameter.\n\n### Test Examples\n- **[snapshot_test](./dragonfly/snapshot_test.py)**: Example test using `@dfly_args`, environment variables and pre-test setup\n- **[generic_test](./dragonfly/generic_test.py)**: Example test using `@dfly_multi_test_args`\n- **[connection_test](./dragonfly/connection_test.py)**: Example for testing running with multiple asynchronous connections.\n\n### Writing your own fixtures\nThe fixture functions located in [conftest.py](./dragonfly/conftest.py).\nYou can write your own fixture inside this file, as seem fit. Just make sure, before adding new fixture that there maybe one already written.\nTry to make the fixture running at the smallest scope possible to ensure that the test can be independent of each other (this will ensure no side effect - match our policy of \"share nothing\").\n\n### Managing test environment\nDo forget to add any new dependency that you may created to [dragonfly/requirement.txt](./dragonfly/requirements.txt) file.\nYou can do so by running\n```\npip3 freeze > requirements.txt\n```\nfrom [dragonfly](./dragonfly/) directory.\n\n# Integration tests\nIntegration tests are located in the `integration` folder.\n\nTo simplify running integration test each package should have its own Dockerfile. The Dockerfile should contain everything needed in order to test the package against Dragonfly. Docker can assume Dragonfly is running on localhost:6379.\nTo run the test:\n```\ndocker build -t [test-name] -f [test-dockerfile-name] .\ndocker run --network=host [test-name]\n```\n\n## Node-Redis\nIntegration test for node-redis client.\nBuild:\n```\ndocker build -t node-redis-test -f ./node-redis.Dockerfile .\n```\nRun:\n```\ndocker run --network=host node-redis-test\n```\n\nto run only selected tests use:\n\n```\ndocker run --network=host node-redis-test npm run test -w ./packages/client -- --redis-version=2.8 -g <regex>\n```\n\nIn general, you can add this way any option from [mocha framework](https://mochajs.org/#command-line-usage).\n\n## ioredis\nNOTE: we are depending on some changes to ioredis test, in order to pass more tests, as we are currently failing\nbecause in monitor command we always returning the command name in upper case, and the tests expected it to\nbe in lower case.\n\nIntegration tests for ioredis client.\n[ioredis](https://github.com/luin/ioredis) is a robust, performance-focused and full-featured Redis client for Node.js.\nIt contains a very extensive test coverage for Redis. Currently not all features are supported by Dragonfly.\nAs such please use the scripts for running the test successfully -\n **[run_ioredis_on_docker.sh](./integration/run_ioredis_on_docker.sh)**: to run the supported tests on a docker image\n Please note that you can run this script in two forms:\n\n If the image is already build:\n ```\n ./integration/run_ioredis_on_docker.sh\n ```\n\nA more safe way is to build the image (or ensure that it is up to date), and then execute the tests:\n```\n ./integration/run_ioredis_on_docker.sh --build\n ```\n The the \"--build\" first build the image and then execute the tests.\n Please do not try to run out of docker image as this brings the correct version and patch some tests.\nPlease note that the script only run tests that are currently supported\nYou can just build the image with\n\nBuild:\n```\ndocker build -t ioredis-test -f ./ioredis.Dockerfile .\n```\n\nFor more details on the entrypoint setup, compare the `ioredis.Dockerfile`\nwith the npm test script located on the `package.json` of the ioredis project.\n\n## Jedis\nIntegration test for the Jedis client.\nBuild:\n```\ndocker build -t jedis-test -f ./jedis.Dockerfile .\n```\nRun:\n```\ndocker run --network=host jedis-test\n```\n"
  },
  {
    "path": "tests/dragonfly/__init__.py",
    "content": "import pytest\n\n\ndef dfly_args(*args):\n    \"\"\"Used to define a singular set of arguments for dragonfly test\"\"\"\n    return pytest.mark.parametrize(\"df_factory\", args, indirect=True)\n\n\ndef dfly_multi_test_args(*args):\n    \"\"\"Used to define multiple sets of arguments to test multiple dragonfly configurations\"\"\"\n    return pytest.mark.parametrize(\"df_factory\", args, indirect=True)\n\n\nclass PortPicker:\n    \"\"\"A simple port manager to allocate available ports for tests\"\"\"\n\n    def __init__(self):\n        self.next_port = 5555\n\n    def get_available_port(self):\n        while not self.is_port_available(self.next_port):\n            self.next_port += 1\n        self.next_port += 1\n        return self.next_port - 1\n\n    def is_port_available(self, port):\n        import socket\n\n        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:\n            return s.connect_ex((\"localhost\", port)) != 0\n"
  },
  {
    "path": "tests/dragonfly/acl_family_test.py",
    "content": "import tempfile\n\nimport async_timeout\n\nfrom . import dfly_args\nfrom .utility import *\n\n\n@pytest.mark.asyncio\nasync def test_acl_setuser(async_client):\n    await async_client.execute_command(\"ACL SETUSER kostas\")\n    result = await async_client.execute_command(\"ACL LIST\")\n    assert 2 == len(result)\n    assert \"user kostas off resetchannels -@all $all\" in result\n\n    await async_client.execute_command(\"ACL SETUSER kostas ON\")\n    result = await async_client.execute_command(\"ACL LIST\")\n    assert \"user kostas on resetchannels -@all $all\" in result\n\n    await async_client.execute_command(\"ACL SETUSER kostas +@list +@string +@admin\")\n    result = await async_client.execute_command(\"ACL LIST\")\n    # TODO consider printing to lowercase\n    assert \"user kostas on resetchannels -@all +@list +@string +@admin $all\" in result\n\n    await async_client.execute_command(\"ACL SETUSER kostas -@list -@admin\")\n    result = await async_client.execute_command(\"ACL LIST\")\n    assert \"user kostas on resetchannels -@all +@string -@list -@admin $all\" in result\n\n    # mix and match\n    await async_client.execute_command(\"ACL SETUSER kostas +@list -@string\")\n    result = await async_client.execute_command(\"ACL LIST\")\n    assert \"user kostas on resetchannels -@all -@admin +@list -@string $all\" in result\n\n    # mix and match interleaved\n    await async_client.execute_command(\"ACL SETUSER kostas +@set -@set +@set\")\n    result = await async_client.execute_command(\"ACL LIST\")\n    assert \"user kostas on resetchannels -@all -@admin +@list -@string +@set $all\" in result\n\n    await async_client.execute_command(\"ACL SETUSER kostas +@all\")\n    result = await async_client.execute_command(\"ACL LIST\")\n    assert \"user kostas on resetchannels -@admin +@list -@string +@set +@all $all\" in result\n\n    # commands\n    await async_client.execute_command(\"ACL SETUSER kostas +set +get +hset\")\n    result = await async_client.execute_command(\"ACL LIST\")\n    assert (\n        \"user kostas on resetchannels -@admin +@list -@string +@set +@all +set +get +hset $all\"\n        in result\n    )\n\n    await async_client.execute_command(\"ACL SETUSER kostas -set -get +hset\")\n    result = await async_client.execute_command(\"ACL LIST\")\n    assert (\n        \"user kostas on resetchannels -@admin +@list -@string +@set +@all -set -get +hset $all\"\n        in result\n    )\n\n    # interleaved\n    await async_client.execute_command(\"ACL SETUSER kostas -hset +get -get -@all\")\n    result = await async_client.execute_command(\"ACL LIST\")\n    assert (\n        \"user kostas on resetchannels -@admin +@list -@string +@set -set -hset -get -@all $all\"\n        in result\n    )\n\n    # interleaved with categories\n    await async_client.execute_command(\"ACL SETUSER kostas +@string +get -get +set\")\n    result = await async_client.execute_command(\"ACL LIST\")\n    assert (\n        \"user kostas on resetchannels -@admin +@list +@set -hset -@all +@string -get +set $all\"\n        in result\n    )\n\n\n@pytest.mark.asyncio\nasync def test_acl_categories(async_client):\n    await async_client.execute_command(\n        \"ACL SETUSER vlad ON >mypass -@all +@string +@list +@connection ~*\"\n    )\n\n    result = await async_client.execute_command(\"AUTH vlad mypass\")\n    assert result == \"OK\"\n\n    result = await async_client.execute_command(\"SET foo bar\")\n    assert result == \"OK\"\n\n    result = await async_client.execute_command(\"LPUSH mykey space_monkey\")\n    assert result == 1\n\n    # This should fail, vlad does not have @admin\n    with pytest.raises(redis.exceptions.ResponseError):\n        result = await async_client.execute_command(\"ACL SETUSER vlad ON >mypass\")\n\n    # This should fail, vlad does not have @sortedset\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"ZADD myset 1 two\")\n\n    result = await async_client.execute_command(\"AUTH default nopass\")\n    assert result == \"OK\"\n\n    # Make vlad an admin\n    await async_client.execute_command(\"ACL SETUSER vlad -@string\")\n    assert result == \"OK\"\n\n    result = await async_client.execute_command(\"AUTH vlad mypass\")\n    assert result == \"OK\"\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"GET foo\")\n\n    result = await async_client.execute_command(\"AUTH default nopass\")\n    assert result == \"OK\"\n\n    # Vlad goes rogue starts giving admin stats to random users\n    await async_client.execute_command(\"ACL SETUSER adi >adi +@admin\")\n    assert result == \"OK\"\n\n    # Vlad can now execute everything\n    await async_client.execute_command(\"ACL SETUSER vlad +@all\")\n    assert result == \"OK\"\n\n    await async_client.execute_command(\"ZADD myset 1 two\")\n    assert result == \"OK\"\n\n\n@pytest.mark.asyncio\nasync def test_acl_commands(async_client):\n    await async_client.execute_command(\"ACL SETUSER random ON >mypass -@all +set +get ~*\")\n\n    result = await async_client.execute_command(\"AUTH random mypass\")\n    assert result == \"OK\"\n\n    result = await async_client.execute_command(\"SET foo bar\")\n    assert result == \"OK\"\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"ZADD myset 1 two\")\n\n\n@pytest.mark.asyncio\nasync def test_acl_cat_commands_multi_exec_squash(df_factory):\n    df = df_factory.create(multi_exec_squash=True, port=1111)\n\n    df.start()\n\n    # Testing acl categories\n    client = aioredis.Redis(port=df.port, decode_responses=True)\n    res = await client.execute_command(\"ACL SETUSER kk ON >kk +@transaction +@string ~*\")\n    assert res == \"OK\"\n\n    res = await client.execute_command(\"AUTH kk kk\")\n    assert res == \"OK\"\n\n    await client.execute_command(\"MULTI\")\n    assert res == \"OK\"\n    for x in range(33):\n        await client.execute_command(f\"SET x{x} {x}\")\n    await client.execute_command(\"EXEC\")\n\n    await client.aclose()\n    client = aioredis.Redis(port=df.port, decode_responses=True)\n\n    # NOPERM while executing multi\n    await client.execute_command(\"ACL SETUSER kk -@string\")\n    assert res == \"OK\"\n    await client.execute_command(\"AUTH kk kk\")\n    assert res == \"OK\"\n    await client.execute_command(\"MULTI\")\n    assert res == \"OK\"\n\n    with pytest.raises(redis.exceptions.NoPermissionError):\n        await client.execute_command(f\"SET x bar\")\n    await client.aclose()\n\n    # NOPERM between multi and exec\n    admin_client = aioredis.Redis(port=df.port, decode_responses=True)\n    res = await client.execute_command(\"ACL SETUSER kk +@string\")\n    assert res == \"OK\"\n\n    client = aioredis.Redis(port=df.port, decode_responses=True)\n    res = await client.execute_command(\"AUTH kk kk\")\n    assert res == \"OK\"\n    # CLIENT has permissions, starts MULTI and issues a bunch of SET commands\n    await client.execute_command(\"MULTI\")\n    assert res == \"OK\"\n    for x in range(33):\n        await client.execute_command(f\"SET x{x} {x}\")\n\n    # revokes permissions after MULTI; ACL checks were done when the commands were queued,\n    # so already-queued SET commands still execute successfully on EXEC\n    res = await admin_client.execute_command(\"ACL SETUSER kk -@string\")\n    assert res == \"OK\"\n\n    res = await client.execute_command(\"EXEC\")\n    for res in res:\n        assert res == \"OK\"\n\n    await admin_client.aclose()\n    await client.aclose()\n\n    # Testing acl commands\n    client = aioredis.Redis(port=df.port, decode_responses=True)\n    res = await client.execute_command(\"ACL SETUSER myuser ON >kk +@transaction +set ~*\")\n    assert res == \"OK\"\n\n    await client.execute_command(\"AUTH myuser kk\")\n    assert \"OK\" == await client.execute_command(\"MULTI\")\n    await client.execute_command(f\"SET x bar\")\n    await client.execute_command(\"EXEC\")\n\n    # NOPERM between multi and exec\n    admin_client = aioredis.Redis(port=df.port, decode_responses=True)\n    res = await admin_client.execute_command(\"ACL SETUSER myuser -set\")\n    assert res == \"OK\"\n\n    # NOPERM while executing multi\n    await client.execute_command(\"MULTI\")\n\n    # retry for a few seconds while the ACL SETUSER propagates, some SET commands might get through\n    start = time.time()\n    denied = False\n    while not denied and time.time() - start < 10:\n        try:\n            await client.execute_command(f\"SET x bar\")\n            await asyncio.sleep(0.1)\n        except redis.exceptions.NoPermissionError:\n            denied = True\n        except Exception as e:\n            assert False, f\"failed with unexpected error: {e}\"\n    assert denied, \"all SET commands succeeded unexpectedly defying ACL\"\n\n\n@pytest.mark.asyncio\nasync def test_acl_deluser(df_server):\n    client = df_server.client()\n\n    assert await client.execute_command(\"ACL SETUSER george ON >pass +@transaction +set ~*\") == \"OK\"\n    assert await client.execute_command(\"AUTH george pass\") == \"OK\"\n\n    assert await client.execute_command(\"MULTI\") == \"OK\"\n    assert await client.execute_command(\"SET the_answer 42\") == \"QUEUED\"\n\n    admin_client = df_server.client()\n    assert await admin_client.execute_command(\"ACL DELUSER george\") == 1\n\n    # the connection was destroyed so EXEC will be executed in the new connection without MULTI\n    with pytest.raises(redis.exceptions.ResponseError):\n        await client.execute_command(\"EXEC\")\n\n    assert await client.execute_command(\"ACL WHOAMI\") == \"User is default\"\n\n\nscript = \"\"\"\nfor i = 1, 10000 do\n  redis.call('SET', 'key', i)\n  redis.call('SET', 'key1', i)\n  redis.call('SET', 'key2', i)\n  redis.call('SET', 'key3', i)\nend\n\"\"\"\n\n\n@pytest.mark.asyncio\n@pytest.mark.skip(\"Flaky on CI, needs investigation\")\nasync def test_acl_del_user_while_running_lua_script(df_server):\n    client = aioredis.Redis(port=df_server.port)\n    await client.execute_command(\"ACL SETUSER kostas ON >kk +@string +@scripting ~*\")\n    await client.execute_command(\"AUTH kostas kk\")\n    admin_client = aioredis.Redis(port=df_server.port, decode_responses=True)\n\n    eval_task = asyncio.create_task(client.eval(script, 4, \"key\", \"key1\", \"key2\", \"key3\"))\n\n    # Let the script start\n    await asyncio.sleep(0.1)\n\n    # Delete the user while the script is running\n    await admin_client.execute_command(\"ACL DELUSER kostas\")\n\n    # We expect the connection to be closed, so eval task should raise ConnectionError\n    with pytest.raises(redis.exceptions.ConnectionError):\n        await eval_task\n\n    # The script should have run to completion on the server side.\n    for i in range(1, 4):\n        res = await admin_client.get(f\"key{i}\")\n        assert res == \"10000\"\n\n\n@pytest.mark.asyncio\n@pytest.mark.skip(\"Check TODO in the body below\")\nasync def test_acl_with_long_running_script(df_server):\n    client = aioredis.Redis(port=df_server.port)\n    await client.execute_command(\"ACL SETUSER roman ON >yoman +@string +@scripting ~*\")\n    await client.execute_command(\"AUTH roman yoman\")\n    admin_client = aioredis.Redis(port=df_server.port, decode_responses=True)\n\n    eval_task = asyncio.create_task(client.eval(script, 4, \"key\", \"key1\", \"key2\", \"key3\"))\n\n    # Let the script start\n    await asyncio.sleep(0.1)\n\n    # Change permissions while the script is running\n    await admin_client.execute_command(\"ACL SETUSER roman -@string -@scripting\")\n\n    # The script should continue and finish successfully\n    # TODO(fix): acl context should be immutable while the script is running. This requires\n    # a \"dummy\" context so we can allow acl commands to run in parallel but we don't use stubs\n    # anymore. Figure out a good solution for this.\n    await eval_task\n\n    for i in range(1, 4):\n        res = await admin_client.get(f\"key{i}\")\n        assert res == \"10000\"\n\n\ndef create_temp_file(content, tmp_dir):\n    file = tempfile.NamedTemporaryFile(mode=\"w\", dir=tmp_dir, delete=False)\n    acl = os.path.join(tmp_dir, file.name)\n    file.write(content)\n    file.flush()\n    return acl\n\n\n@pytest.mark.asyncio\n@dfly_args({\"port\": 1111})\nasync def test_bad_acl_file(df_factory, tmp_dir):\n    acl = create_temp_file(\"ACL SETUSER kostas ON >mypass +@WRONG\", tmp_dir)\n\n    df = df_factory.create(aclfile=acl)\n\n    df.start()\n\n    client = aioredis.Redis(port=df.port)\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await client.execute_command(\"ACL LOAD\")\n\n\n@pytest.mark.asyncio\n@dfly_args({\"port\": 1111})\nasync def test_good_acl_file(df_factory, tmp_dir):\n    # The hash below is password temp\n    acl = create_temp_file(\n        \"USER MrFoo ON #a6864eb339b0e1f6e00d75293a8840abf069a2c0fe82e6e53af6ac099793c1d5 >mypass &bar &r*nd\",\n        tmp_dir,\n    )\n    df = df_factory.create(aclfile=acl)\n\n    df.start()\n    client = df.client()\n\n    await client.execute_command(\"ACL LOAD\")\n    result = await client.execute_command(\"ACL LIST\")\n    assert 2 == len(result)\n    assert (\n        \"user MrFoo on #ea71c25a7a60224 #a6864eb339b0e1f resetchannels &bar &r*nd -@all $all\"\n        in result\n        or \"user MrFoo on #a6864eb339b0e1f #ea71c25a7a60224 resetchannels &bar &r*nd -@all $all\"\n        in result\n    )\n    assert \"user default on nopass ~* &* +@all $all\" in result\n    await client.execute_command(\"ACL SETUSER MrFoo +@all $0\")\n    # Check multiple passwords work\n    assert \"OK\" == await client.execute_command(\"AUTH mypass\")\n    assert \"OK\" == await client.execute_command(\"AUTH temp\")\n    assert \"OK\" == await client.execute_command(\"AUTH default\")\n    await client.execute_command(\"ACL DELUSER MrFoo\")\n\n    await client.execute_command(\"ACL SETUSER roy ON >mypass +@string +hset $1\")\n    await client.execute_command(\"ACL SETUSER shahar >mypass +@set $2\")\n    await client.execute_command(\"ACL SETUSER vlad ~foo ~bar* +@string $3\")\n\n    result = await client.execute_command(\"ACL LIST\")\n    assert 4 == len(result)\n    assert \"user roy on #ea71c25a7a60224 resetchannels -@all +@string +hset $1\" in result\n    assert \"user shahar off #ea71c25a7a60224 resetchannels -@all +@set $2\" in result\n    assert \"user vlad off ~foo ~bar* resetchannels -@all +@string $3\" in result\n    assert \"user default on nopass ~* &* +@all $all\" in result\n\n    result = await client.execute_command(\"ACL DELUSER shahar\")\n    assert result == 1\n\n    result = await client.execute_command(\"ACL SAVE\")\n\n    result = await client.execute_command(\"ACL LOAD\")\n\n    result = await client.execute_command(\"ACL LIST\")\n    assert 3 == len(result)\n    assert \"user roy on #ea71c25a7a60224 resetchannels -@all +@string +hset $1\" in result\n    assert \"user vlad off ~foo ~bar* resetchannels -@all +@string $3\" in result\n    assert \"user default on nopass ~* &* +@all $all\" in result\n\n\n@pytest.mark.asyncio\nasync def test_acl_log(async_client):\n    res = await async_client.execute_command(\"ACL LOG\")\n    assert [] == res\n\n    await async_client.execute_command(\"ACL SETUSER elon >mars ON +@string +@dangerous ~*\")\n\n    with pytest.raises(redis.exceptions.AuthenticationError):\n        await async_client.execute_command(\"AUTH elon wrong\")\n\n    res = await async_client.execute_command(\"ACL LOG\")\n    assert 1 == len(res)\n    assert res[0][\"reason\"] == \"AUTH\"\n    assert res[0][\"object\"] == \"AUTH\"\n    assert res[0][\"username\"] == \"elon\"\n\n    await async_client.execute_command(\"ACL LOG RESET\")\n    res = await async_client.execute_command(\"ACL LOG\")\n    assert 0 == len(res)\n\n    res = await async_client.execute_command(\"AUTH elon mars\")\n    res = await async_client.execute_command(\"SET mykey 22\")\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"hset mk kk 22\")\n\n    res = await async_client.execute_command(\"ACL LOG\")\n    assert 1 == len(res)\n    assert res[0][\"reason\"] == \"COMMAND\"\n    assert res[0][\"object\"] == \"HSET\"\n    assert res[0][\"username\"] == \"elon\"\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"LPUSH mylist 2\")\n\n    res = await async_client.execute_command(\"ACL LOG\")\n    assert 2 == len(res)\n\n    res = await async_client.execute_command(\"ACL LOG RESET\")\n    await async_client.execute_command(\"ACL SETUSER elon resetkeys ~foo\")\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"SET bar val\")\n\n    res = await async_client.execute_command(\"ACL LOG\")\n    assert 1 == len(res)\n    assert res[0][\"reason\"] == \"KEY\"\n    assert res[0][\"object\"] == \"SET\"\n    assert res[0][\"username\"] == \"elon\"\n\n\n@pytest.mark.asyncio\n@dfly_args({\"port\": 1111, \"admin_port\": 1112, \"requirepass\": \"mypass\"})\nasync def test_require_pass(df_factory):\n    df = df_factory.create()\n    df.start()\n\n    client = aioredis.Redis(port=df.port)\n\n    with pytest.raises(redis.exceptions.AuthenticationError):\n        await client.execute_command(\"AUTH default wrongpass\")\n\n    client = aioredis.Redis(password=\"mypass\", port=df.port, decode_responses=True)\n\n    res = await client.execute_command(\"AUTH default mypass\")\n    assert res == \"OK\"\n\n    res = await client.execute_command(\"CONFIG SET requirepass newpass\")\n    assert res == \"OK\"\n\n    res = await client.execute_command(\"AUTH default newpass\")\n    assert res == \"OK\"\n\n    client = aioredis.Redis(password=\"newpass\", port=df.admin_port, decode_responses=True)\n\n    await client.execute_command(\"SET foo 44\")\n    res = await client.execute_command(\"GET foo\")\n    assert res == \"44\"\n\n\n@pytest.mark.asyncio\n@dfly_args({\"port\": 1111, \"requirepass\": \"temp\"})\nasync def test_require_pass_with_acl_file_order(df_factory, tmp_dir):\n    acl = create_temp_file(\n        \"USER default ON >jordan ~* +@all\",\n        tmp_dir,\n    )\n\n    df = df_factory.create(aclfile=acl)\n    df.start()\n\n    client = aioredis.Redis(username=\"default\", password=\"jordan\", port=df.port)\n\n    assert await client.set(\"foo\", \"bar\")\n\n\n@pytest.mark.asyncio\nasync def test_set_acl_file(async_client: aioredis.Redis, tmp_dir):\n    # Note the extra space below, it's intented to also check that we properly parse extra spaces\n    acl_file_content = \"USER    roy ON #ea71c25a7a602246b4c39824b855678894a96f43bb9b71319c39700a1e045222 +@string +@fast +hset\\nUSER john on nopass +@string\"\n\n    acl = create_temp_file(acl_file_content, tmp_dir)\n\n    await async_client.execute_command(f\"CONFIG SET aclfile {acl}\")\n\n    await async_client.execute_command(\"ACL LOAD\")\n\n    result = await async_client.execute_command(\"ACL LIST\")\n    assert 3 == len(result)\n\n    result = await async_client.execute_command(\"AUTH roy mypass\")\n    assert result == \"OK\"\n\n    result = await async_client.execute_command(\"AUTH john nopass\")\n    assert result == \"OK\"\n\n\n@pytest.mark.asyncio\n@dfly_args({\"proactor_threads\": 1})\nasync def test_set_len_acl_log(async_client):\n    res = await async_client.execute_command(\"ACL LOG\")\n    assert [] == res\n\n    await async_client.execute_command(\"ACL SETUSER elon >mars ON +@string +@dangerous\")\n\n    for x in range(7):\n        with pytest.raises(redis.exceptions.AuthenticationError):\n            await async_client.execute_command(\"AUTH elon wrong\")\n\n    res = await async_client.execute_command(\"ACL LOG\")\n    assert 7 == len(res)\n\n    await async_client.execute_command(f\"CONFIG SET acllog_max_len 3\")\n\n    res = await async_client.execute_command(\"ACL LOG\")\n    assert 3 == len(res)\n\n    await async_client.execute_command(f\"CONFIG SET acllog_max_len 10\")\n\n    for x in range(7):\n        with pytest.raises(redis.exceptions.AuthenticationError):\n            await async_client.execute_command(\"AUTH elon wrong\")\n\n    res = await async_client.execute_command(\"ACL LOG\")\n    assert 10 == len(res)\n\n\n@pytest.mark.asyncio\nasync def test_acl_keys(async_client):\n    await async_client.execute_command(\"ACL SETUSER mrkeys ON >mrkeys allkeys +@admin\")\n    await async_client.execute_command(\"AUTH mrkeys mrkeys\")\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"SET foo bar\")\n\n    await async_client.execute_command(\n        \"ACL SETUSER mrkeys ON >mrkeys resetkeys +@string ~foo ~bar* ~dr*gon\"\n    )\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"SET random rand\")\n\n    assert \"OK\" == await async_client.execute_command(\"SET foo val\")\n    assert \"OK\" == await async_client.execute_command(\"SET bar val\")\n    assert \"OK\" == await async_client.execute_command(\"SET barsomething val\")\n    assert \"OK\" == await async_client.execute_command(\"SET dragon val\")\n\n    await async_client.execute_command(\"ACL SETUSER mrkeys ON >mrkeys allkeys +@sortedset\")\n    assert \"OK\" == await async_client.execute_command(\"SET random rand\")\n\n    await async_client.execute_command(\n        \"ACL SETUSER mrkeys ON >mrkeys resetkeys resetkeys %R~foo %W~bar\"\n    )\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"SET foo val\")\n    assert \"val\" == await async_client.execute_command(\"GET foo\")\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"GET bar\")\n    assert \"OK\" == await async_client.execute_command(\"SET bar val\")\n\n    await async_client.execute_command(\"ACL SETUSER mrkeys resetkeys ~bar* +@sortedset\")\n    assert 1 == await async_client.execute_command(\"ZADD barz1 1 val1\")\n    assert 1 == await async_client.execute_command(\"ZADD barz2 1 val2\")\n    # reject because bonus key does not match\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"ZUNIONSTORE destkey 2 barz1 barz2\")\n\n\n@pytest.mark.asyncio\nasync def test_namespaces(df_server):\n    admin = df_server.client()\n    assert await admin.execute_command(\"SET foo admin\") == \"OK\"\n    assert await admin.execute_command(\"GET foo\") == \"admin\"\n\n    # Create ns space named 'ns1'\n    await admin.execute_command(\"ACL SETUSER adi NAMESPACE:ns1 ON >adi_pass +@all ~*\")\n\n    adi = df_server.client()\n    assert await adi.execute_command(\"AUTH adi adi_pass\") == \"OK\"\n    assert await adi.execute_command(\"SET foo bar\") == \"OK\"\n    assert await adi.execute_command(\"GET foo\") == \"bar\"\n    assert await admin.execute_command(\"GET foo\") == \"admin\"\n\n    # Adi and Shahar are on the same team\n    await admin.execute_command(\"ACL SETUSER shahar NAMESPACE:ns1 ON >shahar_pass +@all ~*\")\n\n    shahar = df_server.client()\n    assert await shahar.execute_command(\"AUTH shahar shahar_pass\") == \"OK\"\n    assert await shahar.execute_command(\"GET foo\") == \"bar\"\n    assert await shahar.execute_command(\"SET foo bar2\") == \"OK\"\n    assert await adi.execute_command(\"GET foo\") == \"bar2\"\n\n    # Roman is a CTO, he has his own private space\n    await admin.execute_command(\"ACL SETUSER roman NAMESPACE:ns2 ON >roman_pass +@all ~*\")\n\n    roman = df_server.client()\n    assert await roman.execute_command(\"AUTH roman roman_pass\") == \"OK\"\n    assert await roman.execute_command(\"GET foo\") == None\n\n\n@pytest.mark.asyncio\nasync def test_default_user_bug(df_server):\n    client = df_server.client()\n\n    await client.execute_command(\"ACL SETUSER default -@all\")\n    await client.aclose()\n\n    client = df_server.client()\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await client.execute_command(\"SET foo bar\")\n\n\n@pytest.mark.asyncio\nasync def test_auth_resp3_bug(df_factory):\n    df = df_factory.create()\n    df.start()\n\n    client = aioredis.Redis(port=df.port, protocol=3, decode_responses=True)\n\n    await client.execute_command(\"ACL SETUSER kostas +@all ON >tmp\")\n    res = await client.execute_command(\"HELLO 3 AUTH kostas tmp\")\n    assert res[\"server\"] == \"redis\"\n    assert res[\"version\"] == \"7.4.0\"\n    assert res[\"proto\"] == 3\n    assert res[\"mode\"] == \"standalone\"\n    assert res[\"role\"] == \"master\"\n    assert res[\"id\"] == 1\n\n\n@pytest.mark.asyncio\nasync def test_acl_pub_sub_auth(df_factory):\n    df = df_factory.create()\n    df.start()\n    client = df.client()\n    await client.execute_command(\"ACL SETUSER kostas on >tmp +subscribe +psubscribe &f*o &bar\")\n    assert await client.execute_command(\"AUTH kostas tmp\") == \"OK\"\n\n    res = await client.execute_command(\"SUBSCRIBE bar\")\n    assert res == [\"subscribe\", \"bar\", 1]\n\n    res = await client.execute_command(\"SUBSCRIBE foo\")\n    assert res == [\"subscribe\", \"foo\", 2]\n\n    with pytest.raises(redis.exceptions.NoPermissionError):\n        res = await client.execute_command(\"SUBSCRIBE my_channel\")\n\n    # PSUBSCRIBE only matches pure literals, no asterisks\n    with pytest.raises(redis.exceptions.NoPermissionError):\n        res = await client.execute_command(\"PSUBSCRIBE foo\")\n\n    # my_channel is not in our list so the command should fail\n    with pytest.raises(redis.exceptions.NoPermissionError):\n        res = await client.execute_command(\"PSUBSCRIBE bar my_channel\")\n\n    res = await client.execute_command(\"PSUBSCRIBE bar\")\n    assert res == [\"psubscribe\", \"bar\", 3]\n\n\n@pytest.mark.asyncio\nasync def test_acl_revoke_pub_sub_while_subscribed(df_factory):\n    df = df_factory.create()\n    df.start()\n    publisher = df.client()\n\n    async def publish_worker(client):\n        logging.debug(\"Starting publish_worker\")\n        for i in range(0, 10):\n            logging.debug(f\"publisher iteration: {i}\")\n            await client.publish(\"channel\", f\"message{i}\")\n\n    async def subscribe_worker(channel: aioredis.client.PubSub):\n        logging.debug(\"Starting subscribe_worker\")\n        total_msgs = 0\n        async with async_timeout.timeout(10):\n            while total_msgs != 10:\n                try:\n                    res = await channel.get_message(ignore_subscribe_messages=True, timeout=5)\n                    if res is None:\n                        await asyncio.sleep(0.01)\n                        continue\n                    assert res[\"data\"] == f\"message{total_msgs}\"\n                    logging.debug(f\"subscriber iteration: {total_msgs}\")\n                    total_msgs = total_msgs + 1\n                except asyncio.TimeoutError:\n                    pass\n\n    await publisher.execute_command(\"ACL SETUSER kostas >tmp ON +@slow +SUBSCRIBE allchannels\")\n\n    subscriber = aioredis.Redis(\n        username=\"kostas\", password=\"tmp\", port=df.port, decode_responses=True\n    )\n    subscriber_obj = subscriber.pubsub()\n    await subscriber_obj.subscribe(\"channel\")\n\n    # There's a rare timing issue if we don't wait here, but given the weak guarantees of Pub/Sub,\n    # that's probably OK.\n    await asyncio.sleep(1)\n\n    subscribe_task = asyncio.create_task(subscribe_worker(subscriber_obj))\n    await publish_worker(publisher)\n    await subscribe_task\n\n    subscribe_task = asyncio.create_task(subscribe_worker(subscriber_obj))\n    # Already subscribed, we should still be able to receive messages on channel\n    # We should not be able to unsubscribe\n    await publisher.execute_command(\"ACL SETUSER kostas -SUBSCRIBE -UNSUBSCRIBE\")\n    await publish_worker(publisher)\n    await subscribe_task\n    # unsubscribe is not marked async and it's such a mess that it throws the error\n    # once we try to resubscribe. Instead I use the raw execute command to check that\n    # permission changes work\n    with pytest.raises(redis.exceptions.NoPermissionError):\n        await subscriber.execute_command(\"UNSUBSCRIBE channel\")\n\n    await publisher.execute_command(\"ACL SETUSER kostas +SUBSCRIBE +UNSUBSCRIBE\")\n\n    subscribe_task = asyncio.create_task(subscribe_worker(subscriber_obj))\n    await publisher.execute_command(\"ACL SETUSER kostas resetchannels\")\n    await publish_worker(publisher)\n    with pytest.raises((redis.exceptions.ConnectionError, redis.exceptions.NoPermissionError)):\n        await subscribe_task\n\n\n@pytest.mark.asyncio\nasync def test_acl_select(async_client):\n    await async_client.execute_command(\"ACL SETUSER kostas on >tmp +@all $1 ~*\")\n    assert await async_client.execute_command(\"AUTH kostas tmp\") == \"OK\"\n\n    res = await async_client.execute_command(\"SET foo bar\")\n    assert res == \"OK\"\n\n    with pytest.raises(redis.exceptions.NoPermissionError):\n        await async_client.execute_command(\"SELECT 0\")\n\n    with pytest.raises(redis.exceptions.NoPermissionError):\n        await async_client.execute_command(\"MOVE foo 2\")\n\n    res = await async_client.client_list()\n    assert res[0][\"db\"] == \"1\"\n"
  },
  {
    "path": "tests/dragonfly/bull_sidekiq_test.py",
    "content": "import json\nimport logging\nimport time\nimport uuid\n\nfrom redis import asyncio as aioredis\n\n# from bullmq import Queue\n# from . import dfly_args\n\n\n# BULLMQ_QUEUE_NAME = \"{test_queue}\"\n\n# @pytest.fixture\n# async def bullmq_queue(df_server):\n#     queue = Queue(BULLMQ_QUEUE_NAME, {\"connection\": {\"host\": \"localhost\", \"port\": df_server.port}})\n#     yield queue\n#     await queue.close()\n\n\n# @dfly_args({\"lock_on_hashtags\": True})\n# async def test_bullmq_push_jobs(async_client: aioredis.Redis, bullmq_queue: Queue):\n#     \"\"\"Push 200 jobs and verify they are stored in Dragonfly.\"\"\"\n#     for i in range(200):\n#         await bullmq_queue.add(\n#             \"process_job\",\n#             {\"job_id\": f\"job{i}\", \"payload\": f\"data for job {i}\"},\n#         )\n\n#     # BullMQ stores waiting jobs in a list key: bull:<queue_name>:wait\n#     wait_key = f\"bull:{BULLMQ_QUEUE_NAME}:wait\"\n#     queue_len = await async_client.llen(wait_key)\n#     assert queue_len == 200\n\n#     # Verify a job can be read back\n#     raw = await async_client.lindex(wait_key, 0)\n#     assert raw is not None\n#     mem_usage = await async_client.memory_usage(wait_key)\n#     logging.info(f\"Queue '{wait_key}' MEMORY USAGE: {mem_usage:,} bytes ({queue_len} jobs)\")\n\n\ndef _make_sidekiq_job(i: int) -> str:\n    \"\"\"Generate a job payload matching the Sidekiq wire format.\n\n    Verified against sidekiq/lib/sidekiq/client.rb (atomic_push) and\n    sidekiq/lib/sidekiq/job_util.rb (normalize_item).\n    \"\"\"\n    jid = uuid.uuid4().hex[:24]  # SecureRandom.hex(12)\n    now = time.time()  # Time.now.to_f\n    return json.dumps(\n        {\n            \"class\": \"ProcessJobWorker\",\n            \"args\": [\n                f\"job{i}\",\n                {\"user_id\": 100000 + i, \"action\": \"process\", \"priority\": \"normal\"},\n            ],\n            \"retry\": True,\n            \"queue\": \"default\",\n            \"jid\": jid,\n            \"created_at\": now,\n            \"enqueued_at\": now,\n        }\n    )\n\n\nasync def test_sidekiq_push_jobs(async_client: aioredis.Redis):\n    \"\"\"Push 2000 Sidekiq jobs and verify they are stored correctly.\"\"\"\n    queue_key = \"queue:default\"\n    num_jobs = 2000\n\n    pipe = async_client.pipeline()\n    for i in range(num_jobs):\n        pipe.lpush(queue_key, _make_sidekiq_job(i))\n    await pipe.execute()\n\n    queue_len = await async_client.llen(queue_key)\n    assert queue_len == num_jobs\n\n    # Verify readability\n    first = await async_client.lindex(queue_key, 0)\n    last = await async_client.lindex(queue_key, -1)\n    assert first is not None and last is not None\n    parsed = json.loads(first)\n    assert parsed[\"class\"] == \"ProcessJobWorker\"\n\n    mem_usage = await async_client.memory_usage(queue_key)\n    logging.info(\n        f\"Queue '{queue_key}' MEMORY USAGE: {mem_usage:,} bytes ({queue_len} Sidekiq jobs)\"\n    )\n"
  },
  {
    "path": "tests/dragonfly/celery_test.py",
    "content": "import logging\nimport threading\nfrom redis import asyncio as aioredis\n\nimport pytest\nfrom celery import Celery\nfrom celery.contrib.testing.worker import (\n    setup_app_for_worker,\n    TestWorkController,\n    _set_task_join_will_block,\n)\n\n\ndef _process_job(job_id):\n    return f\"Worker successfully processed job {job_id}\"\n\n\n@pytest.fixture\ndef celery_app(df_server):\n    broker_url = f\"redis://localhost:{df_server.port}/0\"\n    app = Celery(\"dragonfly_test\", broker=broker_url, backend=broker_url)\n    app.conf.task_default_queue = \"my_queue\"\n\n    app.task(name=\"process_job\")(_process_job)\n    yield app\n\n    # Prevent AsyncResult.__del__ on leftover task objects from pinging\n    # the Redis backend after the server has already been shut down.\n    if hasattr(app, \"backend\"):\n        app.backend.remove_pending_result = lambda *args, **kwargs: None\n\n    app.close()\n\n\n@pytest.fixture\ndef celery_worker(celery_app):\n    \"\"\"Teardown order: celery_worker -> celery_app -> df_server,\n    so the worker stops while Dragonfly is still running.\"\"\"\n    setup_app_for_worker(celery_app, loglevel=\"INFO\", logfile=None)\n    worker = TestWorkController(\n        app=celery_app,\n        concurrency=1,\n        pool=\"solo\",\n        loglevel=\"INFO\",\n        without_heartbeat=True,\n        without_mingle=True,\n        without_gossip=True,\n    )\n    t = threading.Thread(target=worker.start, daemon=True)\n    t.start()\n    worker.ensure_started()\n    # Explicitly allow tests to call .get() on tasks. By default, Celery's eager\n    # test worker will block and raise an error if you try to get results from\n    # within what it perceives to be a worker context to prevent deadlocks.\n    _set_task_join_will_block(False)\n    yield worker\n\n    # Must explicitly stop the daemon to prevent it from entering a tight\n    # reconnection spin loop when the test abruptly destroys the Redis socket.\n    worker.stop()\n    # Use a timeout because the worker thread may be blocked on socket.recv()\n    # in the kombu event loop and never notice the stop flag.\n    # The thread is a daemon, so it will be cleaned up on process exit.\n    t.join(timeout=10)\n\n\nasync def test_celery_push_jobs(async_client: aioredis.Redis, celery_app):\n    process_job = celery_app.tasks[\"process_job\"]\n\n    results = []\n    for i in range(0, 200):\n        results.append(process_job.delay(f\"job{i}\"))\n\n    queue_len = await async_client.llen(\"my_queue\")\n    assert queue_len == 200\n    mem_usage = await async_client.memory_usage(\"my_queue\")\n    logging.info(f\"Queue 'my_queue' MEMORY USAGE: {mem_usage:,} bytes ({queue_len} jobs)\")\n\n\ndef test_celery_inspect(celery_app, celery_worker):\n    process_job = celery_app.tasks[\"process_job\"]\n    inspector = celery_app.control.inspect()\n\n    # Worker should be alive\n    ping = inspector.ping()\n    logging.info(f\"Ping response: {ping}\")\n    assert len(ping) == 1\n\n    # Our task should be registered\n    registered = inspector.registered()\n    worker_name = list(registered.keys())[0]\n    task_names = registered[worker_name]\n    assert \"process_job\" in task_names\n\n    # Check active queues\n    queues = inspector.active_queues()\n    assert queues is not None\n    queue_names = [q[\"name\"] for q in queues[worker_name]]\n    assert \"my_queue\" in queue_names\n\n    # Check stats\n    stats = inspector.stats()\n    logging.info(f\"Stats response: {stats}\")\n    assert worker_name in stats\n"
  },
  {
    "path": "tests/dragonfly/cluster_mgr_test.py",
    "content": "import subprocess\nimport pytest\nimport redis\nfrom redis import asyncio as aioredis\nfrom .utility import *\nfrom . import dfly_args\n\nBASE_PORT = 30001\n\n\nasync def insert_cluster_data(cluster_client: redis.RedisCluster):\n    for i in range(1_000):\n        await cluster_client.set(i, i)\n\n\nasync def check_cluster_data(cluster_client: redis.RedisCluster):\n    for i in range(1_000):\n        assert await cluster_client.get(i) == str(i)\n\n\ndef run_cluster_mgr(args):\n    print(f\"Running cluster_mgr.py {args}\")\n    result = subprocess.run([\"../tools/cluster_mgr.py\", *args])\n    logging.debug(result)\n    return result.returncode == 0\n\n\n@pytest.mark.exclude_epoll\n@dfly_args({\"proactor_threads\": 2, \"cluster_mode\": \"yes\"})\nasync def test_cluster_mgr(df_factory):\n    NODES = 3\n    masters = [df_factory.create(port=BASE_PORT + i) for i in range(NODES)]\n    replicas = [df_factory.create(port=BASE_PORT + 100 + i) for i in range(NODES)]\n    df_factory.start_all([*masters, *replicas])\n\n    # Initialize a cluster (all slots belong to node 0)\n    assert run_cluster_mgr([\"--action=config_single_remote\", f\"--target_port={BASE_PORT}\"])\n    for i in range(1, NODES):\n        assert run_cluster_mgr(\n            [\"--action=attach\", f\"--target_port={BASE_PORT}\", f\"--attach_port={BASE_PORT+i}\"]\n        )\n\n    # Feed the cluster with data and test that it works correctly\n    client = aioredis.RedisCluster(decode_responses=True, host=\"127.0.0.1\", port=masters[0].port)\n    await insert_cluster_data(client)\n    await check_cluster_data(client)\n\n    # Migrate ~half of the slots to node 1\n    assert run_cluster_mgr(\n        [\n            f\"--action=migrate\",\n            f\"--target_port={BASE_PORT + 1}\",\n            f\"--slot_start=8000\",\n            f\"--slot_end=16383\",\n        ]\n    )\n    await check_cluster_data(client)\n\n    # Can only detach node 2 (with no assigned slots)\n    assert not run_cluster_mgr([\"--action=detach\", f\"--target_port={BASE_PORT}\"])\n    assert not run_cluster_mgr([\"--action=detach\", f\"--target_port={BASE_PORT + 1}\"])\n    assert run_cluster_mgr([\"--action=detach\", f\"--target_port={BASE_PORT + 2}\"])\n    await check_cluster_data(client)\n\n    # Can't attach non-replica as replica\n    assert not run_cluster_mgr(\n        [\n            f\"--action=attach\",\n            f\"--target_port={BASE_PORT}\",\n            f\"--attach_port={BASE_PORT+2}\",\n            f\"--attach_as_replica=True\",\n        ]\n    )\n\n    # Reattach node 2 and migrate some slots to it\n    assert run_cluster_mgr(\n        [\"--action=attach\", f\"--target_port={BASE_PORT}\", f\"--attach_port={BASE_PORT+2}\"]\n    )\n    await check_cluster_data(client)\n    # Slots 7000-8000 belong to node0, while 8001-9000 belong to node1. cluster_mgr doesn't support\n    # such a migration in a single command.\n    assert not run_cluster_mgr(\n        [\n            f\"--action=migrate\",\n            f\"--target_port={BASE_PORT + 1}\",\n            f\"--slot_start=7000\",\n            f\"--slot_end=9000\",\n        ]\n    )\n    assert run_cluster_mgr(\n        [\"--action=migrate\", f\"--target_port={BASE_PORT + 2}\", \"--slot_start=0\", \"--slot_end=2000\"]\n    )\n    await check_cluster_data(client)\n    assert run_cluster_mgr(\n        [\n            f\"--action=migrate\",\n            f\"--target_port={BASE_PORT + 2}\",\n            f\"--slot_start=8000\",\n            f\"--slot_end=10000\",\n        ]\n    )\n    await check_cluster_data(client)\n\n    # Can't attach replica before running REPLICAOF\n    assert not run_cluster_mgr(\n        [\n            f\"--action=attach\",\n            f\"--attach_as_replica=True\",\n            f\"--target_port={BASE_PORT}\",\n            f\"--attach_port={replicas[0].port}\",\n        ]\n    )\n\n    # Add replicas\n    replica_clients = [replica.client() for replica in replicas]\n    for i in range(NODES):\n        await replica_clients[i].execute_command(f\"replicaof 127.0.0.1 {masters[i].port}\")\n        assert run_cluster_mgr(\n            [\n                f\"--action=attach\",\n                f\"--attach_as_replica=True\",\n                f\"--target_port={masters[i].port}\",\n                f\"--attach_port={replicas[i].port}\",\n            ]\n        )\n\n    # Can't take over when target is a master\n    assert not run_cluster_mgr([\"--action=takeover\", f\"--target_port={masters[i].port}\"])\n\n    # Take over replica 0\n    assert run_cluster_mgr([\"--action=takeover\", f\"--target_port={replicas[0].port}\"])\n    await replica_clients[0].execute_command(\"replicaof no one\")\n    await check_cluster_data(client)\n\n    # Revert take over\n    c_master0 = masters[0].client()\n    await c_master0.execute_command(f\"replicaof 127.0.0.1 {replicas[0].port}\")\n    assert run_cluster_mgr(\n        [\n            f\"--action=attach\",\n            f\"--attach_as_replica=True\",\n            f\"--target_port={replicas[0].port}\",\n            f\"--attach_port={masters[0].port}\",\n        ]\n    )\n    assert run_cluster_mgr([\"--action=takeover\", f\"--target_port={masters[0].port}\"])\n    await c_master0.execute_command(f\"replicaof no one\")\n    await replica_clients[0].execute_command(f\"replicaof 127.0.0.1 {masters[0].port}\")\n    assert run_cluster_mgr(\n        [\n            f\"--action=attach\",\n            f\"--attach_as_replica=True\",\n            f\"--target_port={masters[0].port}\",\n            f\"--attach_port={replicas[0].port}\",\n        ]\n    )\n    await check_cluster_data(client)\n\n    # Print the config - we don't really verify the output, but at least make sure there's no error\n    assert run_cluster_mgr([\"--action=print_config\", f\"--target_port={replicas[0].port}\"])\n\n    # Test detach replicas work\n    for i in range(NODES):\n        assert run_cluster_mgr([\"--action=detach\", f\"--target_port={replicas[i].port}\"])\n    await check_cluster_data(client)\n    await client.aclose()\n"
  },
  {
    "path": "tests/dragonfly/cluster_test.py",
    "content": "import pytest\nimport copy\nimport re\nimport json\nimport redis\nfrom binascii import crc_hqx\nfrom redis import asyncio as aioredis\nimport asyncio\nfrom dataclasses import dataclass\n\nfrom .instance import DflyInstanceFactory, DflyInstance\nfrom .utility import *\nfrom .replication_test import check_all_replicas_finished\nfrom redis.cluster import RedisCluster\nfrom redis.cluster import ClusterNode\nfrom redis.exceptions import MovedError\nfrom .proxy import Proxy\nfrom .seeder import Seeder, SeederBase, DebugPopulateSeeder\n\nfrom . import dfly_args\n\nBASE_PORT = 30001\n\n\ndef monotonically_increasing_port_number():\n    port = BASE_PORT\n    while True:\n        yield port\n        port = port + 1\n\n\n# Create a generator object\nnext_port = monotonically_increasing_port_number()\n\n\nasync def get_memory(client, field):\n    info = await client.info(\"memory\")\n    return info[field]\n\n\nclass RedisClusterNode:\n    def __init__(self, port):\n        self.port = port\n        self.proc = None\n\n    def start(self):\n        self.proc = subprocess.Popen(\n            [\n                \"redis-server-6.2.11\",\n                f\"--port {self.port}\",\n                \"--save ''\",\n                \"--cluster-enabled yes\",\n                f\"--cluster-config-file nodes_{self.port}.conf\",\n                \"--cluster-node-timeout 5000\",\n                \"--appendonly no\",\n                \"--protected-mode no\",\n                \"--repl-diskless-sync yes\",\n                \"--repl-diskless-sync-delay 0\",\n            ]\n        )\n        logging.debug(self.proc.args)\n\n    def stop(self):\n        self.proc.terminate()\n        try:\n            self.proc.wait(timeout=10)\n        except Exception as e:\n            pass\n\n\n@pytest.fixture(scope=\"function\")\ndef redis_cluster(port_picker):\n    # create redis client with 3 node with default slot configuration\n    # node1 slots 0-5460\n    # node2 slots 5461-10922\n    # node3 slots 10923-16383\n    ports = [port_picker.get_available_port() for i in range(3)]\n    nodes = [RedisClusterNode(port) for port in ports]\n    try:\n        for node in nodes:\n            node.start()\n            time.sleep(1)\n    except FileNotFoundError as e:\n        skip_if_not_in_github()\n        raise\n\n    create_command = f'echo \"yes\" |redis-cli --cluster create {\" \".join([f\"127.0.0.1:{port}\" for port in ports])}'\n    subprocess.run(create_command, shell=True)\n    time.sleep(4)\n    yield nodes\n    for node in nodes:\n        node.stop()\n\n\n@dataclass\nclass MigrationInfo:\n    ip: str\n    port: int\n    slots: list\n    node_id: str\n\n\n@dataclass\nclass NodeInfo:\n    id: str\n    instance: DflyInstance\n    client: aioredis.Redis\n    admin_client: aioredis.Redis\n    slots: list\n    migrations: list\n    replicas: list\n    health: str\n\n\nasync def create_node_info(instance) -> NodeInfo:\n    client = instance.client()\n    node_id = await get_node_id(client)\n    ninfo = NodeInfo(\n        id=node_id,\n        instance=instance,\n        client=client,\n        admin_client=instance.admin_client(),\n        slots=[],\n        migrations=[],\n        replicas=[],\n        health=\"online\",\n    )\n    return ninfo\n\n\ndef generate_config(nodes):\n    return [\n        {\n            \"slot_ranges\": [{\"start\": s, \"end\": e} for (s, e) in node.slots],\n            \"master\": {\n                \"id\": node.id,\n                \"ip\": \"127.0.0.1\",\n                \"port\": node.instance.port,\n                \"health\": node.health,\n            },\n            \"replicas\": [\n                {\n                    \"id\": replica.id,\n                    \"ip\": \"127.0.0.1\",\n                    \"port\": replica.instance.port,\n                    \"health\": node.health,\n                }\n                for replica in node.replicas\n            ],\n            \"migrations\": [\n                {\n                    \"slot_ranges\": [{\"start\": s, \"end\": e} for (s, e) in m.slots],\n                    \"node_id\": m.node_id,\n                    \"ip\": m.ip,\n                    \"port\": m.port,\n                }\n                for m in node.migrations\n            ],\n        }\n        for node in nodes\n    ]\n\n\nasync def push_config(config, admin_connections):\n    logging.debug(\"Pushing config %s\", config)\n    res = await asyncio.gather(\n        *(c_admin.execute_command(\"DFLYCLUSTER\", \"CONFIG\", config) for c_admin in admin_connections)\n    )\n    assert all([r == \"OK\" for r in res])\n\n\nasync def wait_for_status(admin_client, node_id, status, timeout=10):\n    get_status = lambda: admin_client.execute_command(\n        \"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\", node_id\n    )\n\n    if not isinstance(status, list):\n        status = [status]\n\n    async for states, breaker in tick_timer(get_status, timeout=timeout):\n        with breaker:\n            assert len(states) != 0 and all(state[2] in status for state in states), states\n\n\nasync def wait_for_ft_index_creation(client, idx_name, timeout=5):\n    get_status = lambda: client.execute_command(\"FT.INFO\", idx_name)\n\n    async for states, breaker in tick_timer(get_status, timeout=timeout):\n        with breaker:\n            assert len(states) != 0, states\n\n\nasync def wait_for_error(admin_client, node_id, error, timeout=10):\n    get_status = lambda: admin_client.execute_command(\n        \"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\", node_id\n    )\n\n    async for states, breaker in tick_timer(get_status, timeout=timeout):\n        with breaker:\n            assert len(states) != 0 and all(error == state[4] for state in states), states\n\n\nasync def wait_for_migration_start(admin_client, node_id):\n    while (\n        len(await admin_client.execute_command(\"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\", node_id))\n        == 0\n    ):\n        await asyncio.sleep(0.1)\n\n\nasync def check_for_no_state_status(admin_clients):\n    for client in admin_clients:\n        state = await client.execute_command(\"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\")\n        if len(state) != 0:\n            logging.debug(f\"SLOT-MIGRATION-STATUS is {state}, instead of NO_STATE\")\n            assert False\n\n\ndef key_slot(key_str) -> int:\n    key = str.encode(key_str)\n    return crc_hqx(key, 0) % 16384\n\n\nasync def get_node_id(connection):\n    id = await connection.execute_command(\"CLUSTER MYID\")\n    assert isinstance(id, str)\n    return id\n\n\ndef stop_and_get_restore_log(instance):\n    instance.stop()\n    lines = instance.find_in_logs(\"RestoreStreamer LSN\")\n    assert len(lines) == 1\n    line = lines[0]\n    logging.debug(f\"Streamer log line: {line}\")\n    return line\n\n\n@dfly_args({})\nclass TestNotEmulated:\n    async def test_cluster_commands_fails_when_not_emulate(self, async_client: aioredis.Redis):\n        with pytest.raises(aioredis.ResponseError) as respErr:\n            await async_client.execute_command(\"CLUSTER HELP\")\n        assert \"cluster_mode\" in str(respErr.value)\n\n        with pytest.raises(aioredis.ResponseError) as respErr:\n            await async_client.execute_command(\"CLUSTER SLOTS\")\n        assert \"emulated\" in str(respErr.value)\n\n\n@dfly_args({\"cluster_mode\": \"emulated\"})\nclass TestEmulated:\n    def test_cluster_slots_command(self, df_server, cluster_client: redis.RedisCluster):\n        expected = {(0, 16383): {\"primary\": (\"127.0.0.1\", df_server.port), \"replicas\": []}}\n        res = cluster_client.execute_command(\"CLUSTER SLOTS\")\n        assert expected == res\n\n    def test_cluster_help_command(self, cluster_client: redis.RedisCluster):\n        # `target_nodes` is necessary because CLUSTER HELP is not mapped on redis-py\n        res = cluster_client.execute_command(\n            \"CLUSTER\", \"HELP\", target_nodes=redis.RedisCluster.RANDOM\n        )\n        assert \"HELP\" in res\n        assert \"SLOTS\" in res\n\n    def test_cluster_pipeline(self, cluster_client: redis.RedisCluster):\n        pipeline = cluster_client.pipeline()\n        pipeline.set(\"foo\", \"bar\")\n        pipeline.get(\"foo\")\n        val = pipeline.execute()\n        assert val == [True, \"bar\"]\n\n\n# Unfortunately we can't test --announce_port here because that causes the Python Cluster client to\n# throw if it can't access the port in `CLUSTER SLOTS` :|\n@dfly_args({\"cluster_mode\": \"emulated\", \"cluster_announce_ip\": \"127.0.0.2\"})\nclass TestEmulatedWithAnnounceIp:\n    def test_cluster_slots_command(self, df_server, cluster_client: redis.RedisCluster):\n        expected = {(0, 16383): {\"primary\": (\"127.0.0.2\", df_server.port), \"replicas\": []}}\n        res = cluster_client.execute_command(\"CLUSTER SLOTS\")\n        assert expected == res\n\n\n@dataclass\nclass ReplicaInfo:\n    id: string\n    port: int\n\n\ndef verify_slots_result(port: int, answer: list, replicas) -> bool:\n    def is_local_host(ip: str) -> bool:\n        return ip == \"127.0.0.1\" or ip == \"localhost\"\n\n    assert answer[0] == 0  # start shard\n    assert answer[1] == 16383  # last shard\n\n    info = answer[2]\n    assert len(info) == 3\n    ip_addr = info[0]\n    assert is_local_host(ip_addr)\n    assert info[1] == port\n\n    # Replicas\n    assert len(answer) == 3 + len(replicas)\n    for i in range(3, len(replicas)):\n        replica = replicas[i - 3]\n        rep_info = answer[i]\n        assert len(rep_info) == 3\n        ip_addr = rep_info[0]\n        assert is_local_host(ip_addr)\n        assert rep_info[1] == replica.port\n        assert rep_info[2] == replica.id\n\n    return True\n\n\n# --managed_service_info means that Dragonfly is running in a managed service, so some details\n# are hidden from users, see https://github.com/dragonflydb/dragonfly/issues/4173\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"emulated\", \"managed_service_info\": \"true\"})\nasync def test_emulated_cluster_with_replicas(df_factory):\n    master = df_factory.create(port=next(next_port), admin_port=next(next_port))\n    replicas = [df_factory.create(port=next(next_port), logtostdout=True) for i in range(1, 3)]\n\n    df_factory.start_all([master, *replicas])\n\n    c_master = master.client()\n    c_master_admin = master.admin_client()\n    master_id = await c_master.execute_command(\"CLUSTER MYID\")\n\n    c_replicas = [replica.client() for replica in replicas]\n    replica_ids = [(await c_replica.execute_command(\"CLUSTER MYID\")) for c_replica in c_replicas]\n\n    for replica, c_replica in zip(replicas, c_replicas):\n        res = await c_replica.execute_command(\"CLUSTER SLOTS\")\n        assert len(res) == 1\n        assert verify_slots_result(port=replica.port, answer=res[0], replicas=[])\n\n    res = await c_master.execute_command(\"CLUSTER SLOTS\")\n    assert verify_slots_result(port=master.port, answer=res[0], replicas=[])\n\n    # Connect replicas to master\n    for replica, c_replica in zip(replicas, c_replicas):\n        rc = await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n        assert rc == \"OK\"\n\n    await asyncio.sleep(0.5)\n\n    for replica, c_replica in zip(replicas, c_replicas):\n        res = await c_replica.execute_command(\"CLUSTER SLOTS\")\n        assert verify_slots_result(\n            port=master.port, answer=res[0], replicas=[ReplicaInfo(replica.port, id)]\n        )\n\n    res = await c_master.execute_command(\"CLUSTER SLOTS\")\n    assert verify_slots_result(\n        port=master.port,\n        answer=res[0],\n        replicas=[],\n    )\n\n    res = await c_master_admin.execute_command(\"CLUSTER SLOTS\")\n    assert verify_slots_result(\n        port=master.port,\n        answer=res[0],\n        replicas=[ReplicaInfo(id, replica.port) for id, replica in zip(replica_ids, replicas)],\n    )\n\n    assert await c_master.execute_command(\"CLUSTER NODES\") == {\n        f\"127.0.0.1:{master.port}\": {\n            \"connected\": True,\n            \"epoch\": \"0\",\n            \"flags\": \"myself,master\",\n            \"hostname\": \"\",\n            \"last_ping_sent\": \"0\",\n            \"last_pong_rcvd\": \"0\",\n            \"master_id\": \"-\",\n            \"migrations\": [],\n            \"node_id\": master_id,\n            \"slots\": [[\"0\", \"16383\"]],\n        },\n    }\n\n    assert await c_master_admin.execute_command(\"CLUSTER NODES\") == {\n        f\"127.0.0.1:{master.port}\": {\n            \"connected\": True,\n            \"epoch\": \"0\",\n            \"flags\": \"myself,master\",\n            \"hostname\": \"\",\n            \"last_ping_sent\": \"0\",\n            \"last_pong_rcvd\": \"0\",\n            \"master_id\": \"-\",\n            \"migrations\": [],\n            \"node_id\": master_id,\n            \"slots\": [[\"0\", \"16383\"]],\n        },\n        f\"127.0.0.1:{replicas[0].port}\": {\n            \"connected\": True,\n            \"epoch\": \"0\",\n            \"flags\": \"slave\",\n            \"hostname\": \"\",\n            \"last_ping_sent\": \"0\",\n            \"last_pong_rcvd\": \"0\",\n            \"master_id\": master_id,\n            \"migrations\": [],\n            \"node_id\": replica_ids[0],\n            \"slots\": [],\n        },\n        f\"127.0.0.1:{replicas[1].port}\": {\n            \"connected\": True,\n            \"epoch\": \"0\",\n            \"flags\": \"slave\",\n            \"hostname\": \"\",\n            \"last_ping_sent\": \"0\",\n            \"last_pong_rcvd\": \"0\",\n            \"master_id\": master_id,\n            \"migrations\": [],\n            \"node_id\": replica_ids[1],\n            \"slots\": [],\n        },\n    }\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_cluster_managed_service_info(df_factory):\n    master = df_factory.create(port=next(next_port), admin_port=next(next_port))\n    replica = df_factory.create(port=next(next_port), admin_port=next(next_port))\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_master_admin = master.admin_client()\n    master_id = await c_master.execute_command(\"CLUSTER MYID\")\n\n    c_replica = replica.client()\n    c_replica_admin = replica.admin_client()\n    replica_id = await c_replica.execute_command(\"CLUSTER MYID\")\n\n    # Connect replicas to master\n    rc = await c_replica_admin.execute_command(f\"REPLICAOF localhost {master.port}\")\n    assert rc == \"OK\"\n    await wait_available_async(c_replica)\n\n    nodes = [await create_node_info(master)]\n    nodes[0].slots = [(0, 16383)]\n    nodes[0].replicas = [await create_node_info(replica)]\n    await push_config(json.dumps(generate_config(nodes)), [master.client(), replica.client()])\n\n    expected_hidden_cluster_slots = [\n        [\n            0,\n            16383,\n            [\n                \"127.0.0.1\",\n                master.port,\n                master_id,\n            ],\n        ],\n    ]\n    expected_full_cluster_slots = copy.deepcopy(expected_hidden_cluster_slots)\n    expected_full_cluster_slots[0].append(\n        [\n            \"127.0.0.1\",\n            replica.port,\n            replica_id,\n        ]\n    )\n    assert await c_master.execute_command(\"CLUSTER SLOTS\") == expected_full_cluster_slots\n    assert await c_master_admin.execute_command(\"CLUSTER SLOTS\") == expected_full_cluster_slots\n\n    expected_hidden_cluster_nodes = {\n        f\"127.0.0.1:{master.port}\": {\n            \"connected\": True,\n            \"epoch\": \"0\",\n            \"flags\": \"myself,master\",\n            \"hostname\": \"\",\n            \"last_ping_sent\": \"0\",\n            \"last_pong_rcvd\": \"0\",\n            \"master_id\": \"-\",\n            \"migrations\": [],\n            \"node_id\": master_id,\n            \"slots\": [[\"0\", \"16383\"]],\n        },\n    }\n    expected_full_cluster_nodes = copy.deepcopy(expected_hidden_cluster_nodes)\n    expected_full_cluster_nodes[f\"127.0.0.1:{replica.port}\"] = {\n        \"connected\": True,\n        \"epoch\": \"0\",\n        \"flags\": \"slave\",\n        \"hostname\": \"\",\n        \"last_ping_sent\": \"0\",\n        \"last_pong_rcvd\": \"0\",\n        \"master_id\": master_id,\n        \"migrations\": [],\n        \"node_id\": replica_id,\n        \"slots\": [],\n    }\n    assert await c_master.execute_command(\"CLUSTER NODES\") == expected_full_cluster_nodes\n    assert await c_master_admin.execute_command(\"CLUSTER NODES\") == expected_full_cluster_nodes\n\n    expected_hidden_cluster_shards = [\n        [\n            \"slots\",\n            [0, 16383],\n            \"nodes\",\n            [\n                [\n                    \"id\",\n                    master_id,\n                    \"endpoint\",\n                    \"127.0.0.1\",\n                    \"ip\",\n                    \"127.0.0.1\",\n                    \"port\",\n                    master.port,\n                    \"role\",\n                    \"master\",\n                    \"replication-offset\",\n                    0,\n                    \"health\",\n                    \"online\",\n                ],\n            ],\n        ],\n    ]\n    expected_full_cluster_shards = copy.deepcopy(expected_hidden_cluster_shards)\n    expected_full_cluster_shards[0][3].append(\n        [\n            \"id\",\n            replica_id,\n            \"endpoint\",\n            \"127.0.0.1\",\n            \"ip\",\n            \"127.0.0.1\",\n            \"port\",\n            replica.port,\n            \"role\",\n            \"replica\",\n            \"replication-offset\",\n            0,\n            \"health\",\n            \"online\",\n        ]\n    )\n    assert await c_master.execute_command(\"CLUSTER SHARDS\") == expected_full_cluster_shards\n    assert await c_master_admin.execute_command(\"CLUSTER SHARDS\") == expected_full_cluster_shards\n\n    # this flag doesn't affect cluster anymore so the results will be the same\n    await c_master.execute_command(\"config set managed_service_info true\")\n\n    assert await c_master.execute_command(\"CLUSTER SLOTS\") == expected_full_cluster_slots\n    assert await c_master_admin.execute_command(\"CLUSTER SLOTS\") == expected_full_cluster_slots\n\n    assert await c_master.execute_command(\"CLUSTER NODES\") == expected_full_cluster_nodes\n    assert await c_master_admin.execute_command(\"CLUSTER NODES\") == expected_full_cluster_nodes\n\n    assert await c_master.execute_command(\"CLUSTER SHARDS\") == expected_full_cluster_shards\n    assert await c_master_admin.execute_command(\"CLUSTER SHARDS\") == expected_full_cluster_shards\n\n\n@dfly_args({\"cluster_mode\": \"emulated\"})\nasync def test_cluster_info(async_client):\n    res = await async_client.execute_command(\"CLUSTER INFO\")\n    assert len(res) == 16\n    assert res == {\n        \"cluster_current_epoch\": \"1\",\n        \"cluster_known_nodes\": \"1\",\n        \"cluster_my_epoch\": \"1\",\n        \"cluster_size\": \"1\",\n        \"cluster_slots_assigned\": \"16384\",\n        \"cluster_slots_fail\": \"0\",\n        \"cluster_slots_ok\": \"16384\",\n        \"cluster_slots_pfail\": \"0\",\n        \"cluster_state\": \"ok\",\n        \"cluster_stats_messages_meet_received\": \"0\",\n        \"cluster_stats_messages_ping_received\": \"1\",\n        \"cluster_stats_messages_ping_sent\": \"1\",\n        \"cluster_stats_messages_pong_received\": \"1\",\n        \"cluster_stats_messages_pong_sent\": \"1\",\n        \"cluster_stats_messages_received\": \"1\",\n        \"cluster_stats_messages_sent\": \"1\",\n    }\n\n\n@dfly_args({\"cluster_mode\": \"emulated\", \"cluster_announce_ip\": \"127.0.0.2\"})\n@pytest.mark.asyncio\nasync def test_cluster_nodes(df_server, async_client):\n    res = await async_client.execute_command(\"CLUSTER NODES\")\n    assert len(res) == 1\n    info = res[f\"127.0.0.2:{df_server.port}\"]\n    assert res is not None\n    assert info[\"connected\"] == True\n    assert info[\"epoch\"] == \"0\"\n    assert info[\"flags\"] == \"myself,master\"\n    assert info[\"last_ping_sent\"] == \"0\"\n    assert info[\"slots\"] == [[\"0\", \"16383\"]]\n    assert info[\"master_id\"] == \"-\"\n\n\n\"\"\"\nTest that slot ownership changes correctly with config changes.\n\nAdd a key to node0, then move the slot ownership to node1 and see that they both behave as\nintended.\nAlso add keys to each of them that are *not* moved, and see that they are unaffected by the move.\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\", \"cluster_node_id\": \"inigo montoya\"})\nasync def test_cluster_node_id(df_factory: DflyInstanceFactory):\n    node = df_factory.create(port=next(next_port))\n    df_factory.start_all([node])\n\n    conn = node.client()\n    assert \"inigo montoya\" == await get_node_id(conn)\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_cluster_slot_ownership_changes(df_factory: DflyInstanceFactory):\n    # Start and configure cluster with 2 nodes\n    nodes = [df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(2)]\n\n    df_factory.start_all(nodes)\n\n    c_nodes = [node.client() for node in nodes]\n    c_nodes_admin = [node.admin_client() for node in nodes]\n\n    node_ids = await asyncio.gather(*(get_node_id(c) for c in c_nodes))\n\n    config = f\"\"\"\n      [\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": 0,\n              \"end\": LAST_SLOT_CUTOFF\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"{node_ids[0]}\",\n            \"ip\": \"localhost\",\n            \"port\": {nodes[0].port}\n          }},\n          \"replicas\": []\n        }},\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": NEXT_SLOT_CUTOFF,\n              \"end\": 16383\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"{node_ids[1]}\",\n            \"ip\": \"localhost\",\n            \"port\": {nodes[1].port}\n          }},\n          \"replicas\": []\n        }}\n      ]\n    \"\"\"\n\n    await push_config(\n        config.replace(\"LAST_SLOT_CUTOFF\", \"5259\").replace(\"NEXT_SLOT_CUTOFF\", \"5260\"),\n        c_nodes_admin,\n    )\n\n    # Slot for \"KEY1\" is 5259\n\n    # Insert a key that should stay in node0\n    assert await c_nodes[0].set(\"KEY0\", \"value\")\n\n    # And to node1 (so it happens that 'KEY0' belongs to 0 and 'KEY2' to 1)\n    assert await c_nodes[1].set(\"KEY2\", \"value\")\n\n    # Insert a key that we will move ownership of to node1 (but without migration yet)\n    assert await c_nodes[0].set(\"KEY1\", \"value\")\n    assert await c_nodes[0].execute_command(\"DBSIZE\") == 2\n\n    # Make sure that node0 owns \"KEY0\"\n    assert (await c_nodes[0].get(\"KEY0\")) == \"value\"\n\n    # Make sure that \"KEY1\" is not owned by node1\n    with pytest.raises((MovedError, aioredis.ResponseError)) as e:\n        await c_nodes[1].set(\"KEY1\", \"value\")\n\n    assert e.value.args[0].endswith(f\"5259 localhost:{nodes[0].port}\")\n\n    # And that node1 only has 1 key (\"KEY2\")\n    assert await c_nodes[1].execute_command(\"DBSIZE\") == 1\n\n    print(\"Moving ownership over 5259 ('KEY1') to other node\")\n\n    await push_config(\n        config.replace(\"LAST_SLOT_CUTOFF\", \"5258\").replace(\"NEXT_SLOT_CUTOFF\", \"5259\"),\n        c_nodes_admin,\n    )\n\n    # node0 should have removed \"KEY1\" as it no longer owns it\n    # deleting non owned keys is background operation therefore we add timeout to this check\n    @assert_eventually(times=2)\n    async def check_dbsize(node_index, expected_size):\n        assert await c_nodes[node_index].execute_command(\"DBSIZE\") == expected_size\n\n    await check_dbsize(node_index=0, expected_size=1)\n    # node0 should still own \"KEY0\" though\n    assert (await c_nodes[0].get(\"KEY0\")) == \"value\"\n    # node1 should still have \"KEY2\"\n    assert await c_nodes[1].execute_command(\"DBSIZE\") == 1\n\n    # Now node0 should reply with MOVED for \"KEY1\"\n    with pytest.raises((MovedError, aioredis.ResponseError)) as e:\n        await c_nodes[0].set(\"KEY1\", \"value\")\n\n    assert e.value.args[0].endswith(f\"5259 localhost:{nodes[1].port}\")\n\n    # And node1 should own it and allow using it\n    assert await c_nodes[1].set(\"KEY1\", \"value\")\n    assert await c_nodes[1].execute_command(\"DBSIZE\") == 2\n\n    config = f\"\"\"\n      [\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": 0,\n              \"end\": 16383\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"{node_ids[0]}\",\n            \"ip\": \"localhost\",\n            \"port\": {nodes[0].port}\n          }},\n          \"replicas\": []\n        }}\n      ]\n    \"\"\"\n    await push_config(config, c_nodes_admin)\n\n    assert await c_nodes[0].execute_command(\"DBSIZE\") == 1\n    assert (await c_nodes[0].get(\"KEY0\")) == \"value\"\n    await check_dbsize(node_index=1, expected_size=0)\n\n\n# Tests that master commands to the replica are applied regardless of slot ownership\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_cluster_replica_sets_non_owned_keys(df_factory: DflyInstanceFactory):\n    # Start and configure cluster with 1 master and 1 replica, both own all slots\n    master = df_factory.create(admin_port=next(next_port))\n    replica = df_factory.create(admin_port=next(next_port))\n    df_factory.start_all([master, replica])\n\n    async with master.client() as c_master, master.admin_client() as c_master_admin, replica.client() as c_replica, replica.admin_client() as c_replica_admin:\n        master_id = await get_node_id(c_master)\n        replica_id = await get_node_id(c_replica)\n\n        config = f\"\"\"\n        [\n          {{\n            \"slot_ranges\": [\n              {{\n                \"start\": 0,\n                \"end\": 16383\n              }}\n            ],\n            \"master\": {{\n              \"id\": \"{master_id}\",\n              \"ip\": \"localhost\",\n              \"port\": {master.port}\n            }},\n            \"replicas\": [\n              {{\n                \"id\": \"{replica_id}\",\n                \"ip\": \"localhost\",\n                \"port\": {replica.port}\n              }}\n            ]\n          }}\n        ]\n      \"\"\"\n        await push_config(config, [c_master_admin, c_replica_admin])\n\n        # Setup replication and make sure that it works properly.\n        await c_master.set(\"key\", \"value\")\n        await c_replica.execute_command(\"REPLICAOF\", \"localhost\", master.port)\n        await check_all_replicas_finished([c_replica], c_master)\n        assert (await c_replica.get(\"key\")) == \"value\"\n        assert await c_replica.execute_command(\"dbsize\") == 1\n\n        # Tell the replica that it and the master no longer own any data, but don't tell that to the\n        # master. This will allow us to set keys on the master and make sure that they are set in the\n        # replica.\n\n        replica_config = f\"\"\"\n        [\n          {{\n            \"slot_ranges\": [],\n            \"master\": {{\n              \"id\": \"{master_id}\",\n              \"ip\": \"localhost\",\n              \"port\": {master.port}\n            }},\n            \"replicas\": [\n              {{\n                \"id\": \"{replica_id}\",\n                \"ip\": \"localhost\",\n                \"port\": {replica.port}\n              }}\n            ]\n          }},\n          {{\n            \"slot_ranges\": [\n              {{\n                \"start\": 0,\n                \"end\": 16383\n              }}\n            ],\n            \"master\": {{\n              \"id\": \"non-existing-master\",\n              \"ip\": \"localhost\",\n              \"port\": 1111\n            }},\n            \"replicas\": []\n          }}\n        ]\n      \"\"\"\n\n        await push_config(replica_config, [c_replica_admin])\n\n        # The replica should *not* have deleted the key.\n        assert await c_replica.execute_command(\"dbsize\") == 1\n\n        # Set another key on the master, which it owns but the replica does not own.\n        await c_master.set(\"key2\", \"value\")\n        await check_all_replicas_finished([c_replica], c_master)\n\n        # See that the key exists in both replica and master\n        assert await c_master.execute_command(\"dbsize\") == 2\n        assert await c_replica.execute_command(\"dbsize\") == 2\n\n        # The replica should still reply with MOVED, despite having that key.\n        with pytest.raises((MovedError, aioredis.ResponseError)) as e:\n            await c_replica.get(\"key2\")\n            assert False, \"Should not be able to get key on non-owner cluster node\"\n\n        assert re.search(r\"\\d+ localhost:1111\", e.value.args[0])\n\n        await push_config(replica_config, [c_master_admin])\n        await check_all_replicas_finished([c_replica], c_master)\n        assert await c_master.execute_command(\"dbsize\") == 0\n        assert await c_replica.execute_command(\"dbsize\") == 0\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_cluster_flush_slots_after_config_change(df_factory: DflyInstanceFactory):\n    # Start and configure cluster with 1 master and 1 replica, both own all slots\n    master = df_factory.create(port=next(next_port), admin_port=next(next_port))\n    replica = df_factory.create(port=next(next_port), admin_port=next(next_port))\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_master_admin = master.admin_client()\n    master_id = await get_node_id(c_master)\n\n    c_replica = replica.client()\n    c_replica_admin = replica.admin_client()\n    replica_id = await get_node_id(c_replica)\n\n    config = f\"\"\"\n      [\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": 0,\n              \"end\": 16383\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"{master_id}\",\n            \"ip\": \"localhost\",\n            \"port\": {master.port}\n          }},\n          \"replicas\": [\n            {{\n              \"id\": \"{replica_id}\",\n              \"ip\": \"localhost\",\n              \"port\": {replica.port}\n            }}\n          ]\n        }}\n      ]\n    \"\"\"\n    await push_config(config, [c_master_admin, c_replica_admin])\n\n    await c_master.execute_command(\"debug\", \"populate\", \"100000\")\n    assert await c_master.execute_command(\"dbsize\") == 100_000\n\n    # Setup replication and make sure that it works properly.\n    await c_replica.execute_command(\"REPLICAOF\", \"localhost\", master.port)\n    await check_all_replicas_finished([c_replica], c_master)\n    assert await c_replica.execute_command(\"dbsize\") == 100_000\n\n    resp = await c_master_admin.execute_command(\"dflycluster\", \"getslotinfo\", \"slots\", \"0\")\n    assert resp[0][0] == 0\n    slot_0_size = resp[0][2]\n    print(f\"Slot 0 size = {slot_0_size}\")\n    assert slot_0_size > 0\n\n    config = f\"\"\"\n      [\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": 1,\n              \"end\": 16383\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"{master_id}\",\n            \"ip\": \"localhost\",\n            \"port\": {master.port}\n          }},\n          \"replicas\": [\n            {{\n              \"id\": \"{replica_id}\",\n              \"ip\": \"localhost\",\n              \"port\": {replica.port}\n            }}\n          ]\n        }},\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": 0,\n              \"end\": 0\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"other-master\",\n            \"ip\": \"localhost\",\n            \"port\": 9000\n          }},\n          \"replicas\": [\n            {{\n              \"id\": \"other-replica\",\n              \"ip\": \"localhost\",\n              \"port\": 9001\n            }}\n          ]\n        }}\n      ]\n    \"\"\"\n    await push_config(config, [c_master_admin, c_replica_admin])\n\n    await check_all_replicas_finished([c_replica], c_master)\n\n    assert await c_master.execute_command(\"dbsize\") == (100_000 - slot_0_size)\n    assert await c_replica.execute_command(\"dbsize\") == (100_000 - slot_0_size)\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\", \"admin_port\": next(next_port)})\nasync def test_cluster_blocking_command(df_server):\n    c_master = df_server.client()\n    c_master_admin = df_server.admin_client()\n\n    config = [\n        {\n            \"slot_ranges\": [{\"start\": 0, \"end\": 8000}],\n            \"master\": {\"id\": await get_node_id(c_master), \"ip\": \"10.0.0.1\", \"port\": 7000},\n            \"replicas\": [],\n        },\n        {\n            \"slot_ranges\": [{\"start\": 8001, \"end\": 16383}],\n            \"master\": {\"id\": \"other\", \"ip\": \"10.0.0.2\", \"port\": 7000},\n            \"replicas\": [],\n        },\n    ]\n\n    assert (\n        await c_master_admin.execute_command(\"DFLYCLUSTER\", \"CONFIG\", json.dumps(config))\n    ) == \"OK\"\n\n    assert (await c_master.execute_command(\"CLUSTER\", \"KEYSLOT\", \"keep-local\")) == 3479\n    assert (await c_master.execute_command(\"CLUSTER\", \"KEYSLOT\", \"remove-key-4\")) == 6103\n\n    v1 = asyncio.create_task(c_master.blpop(\"keep-local\", 2))\n    v2 = asyncio.create_task(c_master.blpop(\"remove-key-4\", 2))\n\n    await asyncio.sleep(0.1)\n\n    config[0][\"slot_ranges\"][0][\"end\"] = 5000\n    config[1][\"slot_ranges\"][0][\"start\"] = 5001\n    assert (\n        await c_master_admin.execute_command(\"DFLYCLUSTER\", \"CONFIG\", json.dumps(config))\n    ) == \"OK\"\n\n    await c_master.lpush(\"keep-local\", \"WORKS\")\n\n    assert (await v1) == (\"keep-local\", \"WORKS\")\n    with pytest.raises(MovedError) as e_info:\n        await v2\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_blocking_commands_cancel(df_factory, df_seeder_factory):\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(2)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    set_task = asyncio.create_task(nodes[0].client.execute_command(\"BZPOPMIN set1 0\"))\n    list_task = asyncio.create_task(nodes[0].client.execute_command(\"BLPOP list1 0\"))\n\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.port, [(0, 16383)], nodes[1].id)\n    )\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\")\n\n    nodes[0].migrations = []\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n    logging.debug(\"remove finished migrations\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    with pytest.raises(MovedError) as set_e_info:\n        await set_task\n    assert f\"3037 127.0.0.1:{instances[1].port}\" == str(set_e_info.value)\n\n    with pytest.raises(MovedError) as list_e_info:\n        await list_task\n    assert f\"7141 127.0.0.1:{instances[1].port}\" == str(list_e_info.value)\n\n\n@pytest.mark.parametrize(\"set_cluster_node_id\", [True, False])\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_cluster_native_client(\n    df_factory: DflyInstanceFactory,\n    df_seeder_factory: DflySeederFactory,\n    set_cluster_node_id: bool,\n):\n    # Start and configure cluster with 3 masters and 3 replicas\n    masters = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            cluster_node_id=f\"master{i}\" if set_cluster_node_id else \"\",\n        )\n        for i in range(3)\n    ]\n    df_factory.start_all(masters)\n    c_masters_admin = [master.admin_client() for master in masters]\n    master_ids = await asyncio.gather(*(get_node_id(c) for c in c_masters_admin))\n\n    replicas = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            cluster_node_id=f\"replica{i}\" if set_cluster_node_id else \"\",\n            replicaof=f\"localhost:{masters[i].port}\",\n        )\n        for i in range(3)\n    ]\n    df_factory.start_all(replicas)\n    c_replicas = [replica.client() for replica in replicas]\n    await asyncio.gather(*(wait_available_async(c) for c in c_replicas))\n    c_replicas_admin = [replica.admin_client() for replica in replicas]\n    replica_ids = await asyncio.gather(*(get_node_id(c) for c in c_replicas))\n\n    config = f\"\"\"\n      [\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": 0,\n              \"end\": 5000\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"{master_ids[0]}\",\n            \"ip\": \"localhost\",\n            \"port\": {masters[0].port}\n          }},\n          \"replicas\": [\n              {{\n                \"id\": \"{replica_ids[0]}\",\n                \"ip\": \"localhost\",\n                \"port\": {replicas[0].port}\n              }}\n          ]\n        }},\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": 5001,\n              \"end\": 10000\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"{master_ids[1]}\",\n            \"ip\": \"localhost\",\n            \"port\": {masters[1].port}\n          }},\n          \"replicas\": [\n              {{\n                \"id\": \"{replica_ids[1]}\",\n                \"ip\": \"localhost\",\n                \"port\": {replicas[1].port}\n              }}\n          ]\n        }},\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": 10001,\n              \"end\": 16383\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"{master_ids[2]}\",\n            \"ip\": \"localhost\",\n            \"port\": {masters[2].port}\n          }},\n          \"replicas\": [\n              {{\n                \"id\": \"{replica_ids[2]}\",\n                \"ip\": \"localhost\",\n                \"port\": {replicas[2].port}\n              }}\n          ]\n        }}\n      ]\n    \"\"\"\n    await push_config(config, c_masters_admin + c_replicas_admin)\n\n    seeder = df_seeder_factory.create(port=masters[0].port, cluster_mode=True)\n    await seeder.run(target_deviation=0.1)\n\n    client = masters[0].cluster_client()\n\n    assert await client.set(\"key0\", \"value\") == True\n    assert await client.get(\"key0\") == \"value\"\n\n    async def test_random_keys():\n        for i in range(100):\n            key = \"key\" + str(random.randint(0, 100_000))\n            assert await client.set(key, \"value\") == True\n            assert await client.get(key) == \"value\"\n\n    await test_random_keys()\n\n    for i in range(3):\n        await check_all_replicas_finished([c_replicas[i]], c_masters_admin[i])\n\n    await asyncio.gather(*(wait_available_async(c) for c in c_replicas))\n\n    # Make sure that getting a value from a replica works as well.\n    # We use connections directly to NOT follow 'MOVED' error, as that will redirect to the master.\n    for c in c_replicas:\n        try:\n            assert await c.get(\"key0\")\n        except MovedError as e:\n            pass\n\n    # Push new config\n    config = f\"\"\"\n      [\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": 0,\n              \"end\": 4000\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"{master_ids[0]}\",\n            \"ip\": \"localhost\",\n            \"port\": {masters[0].port}\n          }},\n          \"replicas\": [\n              {{\n                \"id\": \"{replica_ids[0]}\",\n                \"ip\": \"localhost\",\n                \"port\": {replicas[0].port}\n              }}\n          ]\n        }},\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": 4001,\n              \"end\": 14000\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"{master_ids[1]}\",\n            \"ip\": \"localhost\",\n            \"port\": {masters[1].port}\n          }},\n          \"replicas\": [\n              {{\n                \"id\": \"{replica_ids[1]}\",\n                \"ip\": \"localhost\",\n                \"port\": {replicas[1].port}\n              }}\n          ]\n        }},\n        {{\n          \"slot_ranges\": [\n            {{\n              \"start\": 14001,\n              \"end\": 16383\n            }}\n          ],\n          \"master\": {{\n            \"id\": \"{master_ids[2]}\",\n            \"ip\": \"localhost\",\n            \"port\": {masters[2].port}\n          }},\n          \"replicas\": [\n              {{\n                \"id\": \"{replica_ids[2]}\",\n                \"ip\": \"localhost\",\n                \"port\": {replicas[2].port}\n              }}\n          ]\n        }}\n      ]\n    \"\"\"\n    await push_config(config, c_masters_admin + c_replicas_admin)\n\n    await test_random_keys()\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_config_consistency(df_factory: DflyInstanceFactory):\n    # Check slot migration from one node to another\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(2)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 5259)]\n    nodes[1].slots = [(5260, 16383)]\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await check_for_no_state_status([node.admin_client for node in nodes])\n\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.admin_port, [(5200, 5259)], nodes[1].id)\n    )\n\n    # Push config to source node. Migration will not start until target node gets the config as well.\n    logging.debug(\"Push migration config to source node\")\n    await push_config(json.dumps(generate_config(nodes)), [nodes[0].admin_client])\n\n    # some delay to check that migration isn't started until we send config to target node\n    await asyncio.sleep(0.2)\n\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"CONNECTING\")\n    await check_for_no_state_status([nodes[1].admin_client])\n\n    logging.debug(\"Push migration config to target node\")\n    await push_config(json.dumps(generate_config(nodes)), [nodes[1].admin_client])\n\n    await wait_for_status(nodes[1].admin_client, nodes[0].id, \"FINISHED\")\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\")\n\n    nodes[0].migrations = []\n    nodes[0].slots = [(0, 5199)]\n    nodes[1].slots = [(5200, 16383)]\n\n    logging.debug(\"remove finished migrations\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await check_for_no_state_status([node.admin_client for node in nodes])\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_cluster_flushall_during_migration(\n    df_factory: DflyInstanceFactory, df_seeder_factory\n):\n    # Check data migration from one node to another\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            vmodule=\"cluster_family=2,outgoing_slot_migration=2,incoming_slot_migration=2,streamer=2,server_family=1\",\n        )\n        for i in range(2)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    seeder = df_seeder_factory.create(keys=10_000, port=nodes[0].instance.port, cluster_mode=True)\n    await seeder.run(target_deviation=0.1)\n\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.admin_port, [(0, 16383)], nodes[1].id)\n    )\n\n    logging.debug(\"Start migration\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await nodes[0].client.execute_command(\"flushall\")\n\n    status1 = await nodes[1].admin_client.execute_command(\n        \"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\", nodes[0].id\n    )\n    assert (\n        len(status1) == 0 or \"FINISHED\" not in status1[0]\n    ), \"Weak test case - finished migration too early\"\n\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\")\n\n    logging.debug(\"Finalizing migration\")\n    nodes[0].migrations = []\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n    logging.debug(\"Migration finalized\")\n\n    assert await nodes[0].client.dbsize() == 0\n\n    # Push config that causes mass async slot deletion on nodes[1]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    # Issue flushall right after pushing new config so it runs at the same time as disowned slots are flushed\n    await nodes[1].client.execute_command(\"flushall\")\n\n\n@pytest.mark.parametrize(\"interrupt\", [False, True])\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_cluster_data_migration(df_factory: DflyInstanceFactory, interrupt: bool):\n    # Check data migration from one node to another\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            vmodule=\"outgoing_slot_migration=2,cluster_family=2,incoming_slot_migration=2,streamer=2\",\n        )\n        for i in range(2)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 9000)]\n    nodes[1].slots = [(9001, 16383)]\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    for i in range(20):\n        key = \"KEY\" + str(i)\n        assert await nodes[key_slot(key) // 9001].client.set(key, \"value\")\n\n    assert await nodes[0].client.execute_command(\"DBSIZE\") == 10\n\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.admin_port, [(3000, 9000)], nodes[1].id)\n    )\n\n    logging.debug(\"Start migration\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    if interrupt:  # Test nodes properly shut down with pending migration\n        await asyncio.sleep(random.random())\n\n        # random instance\n        stop = random.getrandbits(1)\n        keep = 1 - stop\n\n        nodes[stop].instance.stop()\n\n        slots = await nodes[keep].admin_client.execute_command(\"CLUSTER SLOTS\")\n        slots.sort(key=lambda cfg: cfg[0])\n        assert 0 in slots[0] and 9000 in slots[0]\n        assert 9001 in slots[1] and 16383 in slots[1]\n\n        return\n\n    await wait_for_status(nodes[1].admin_client, nodes[0].id, \"FINISHED\")\n\n    for i in range(20, 22):\n        key = \"KEY\" + str(i)\n        assert await nodes[0 if (key_slot(key) // 3000) == 0 else 1].client.set(key, \"value\")\n\n    status = await nodes[0].admin_client.execute_command(\n        \"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\", nodes[1].id\n    )\n    status[0].pop()\n    assert status[0] == [\"out\", nodes[1].id, \"FINISHED\", 7]\n\n    status = await nodes[1].admin_client.execute_command(\n        \"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\", nodes[0].id\n    )\n    status[0].pop()\n    assert status[0] == [\"in\", nodes[0].id, \"FINISHED\", 7]\n\n    nodes[0].migrations = []\n    nodes[0].slots = [(0, 2999)]\n    nodes[1].slots = [(3000, 16383)]\n    logging.debug(\"remove finished migrations\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    for i in range(22):\n        key = \"KEY\" + str(i)\n        assert await nodes[0 if (key_slot(key) // 3000) == 0 else 1].client.set(key, \"value\")\n\n    assert await nodes[1].client.execute_command(\"DBSIZE\") == 19\n\n    await check_for_no_state_status([node.admin_client for node in nodes])\n\n\n@dfly_args({\"proactor_threads\": 2, \"cluster_mode\": \"yes\", \"cache_mode\": \"true\"})\nasync def test_migration_with_key_ttl(df_factory):\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(2)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await nodes[0].client.execute_command(\"set k_with_ttl v1 EX 2\")\n    await nodes[0].client.execute_command(\"set k_without_ttl v2\")\n    await nodes[0].client.execute_command(\"set k_sticky v3\")\n    assert await nodes[0].client.execute_command(\"stick k_sticky\") == 1\n\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", instances[1].port, [(0, 16383)], nodes[1].id)\n    )\n    logging.debug(\"Start migration\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\")\n\n    nodes[0].migrations = []\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n    logging.debug(\"finalize migration\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    assert await nodes[1].client.execute_command(\"get k_with_ttl\") == \"v1\"\n    assert await nodes[1].client.execute_command(\"get k_without_ttl\") == \"v2\"\n    assert await nodes[1].client.execute_command(\"get k_sticky\") == \"v3\"\n    assert await nodes[1].client.execute_command(\"ttl k_with_ttl\") > 0\n    assert await nodes[1].client.execute_command(\"ttl k_without_ttl\") == -1\n    assert await nodes[1].client.execute_command(\"stick k_sticky\") == 0  # Sticky bit already set\n\n    await asyncio.sleep(2)  # Force expiration\n\n    assert await nodes[1].client.execute_command(\"get k_with_ttl\") == None\n    assert await nodes[1].client.execute_command(\"get k_without_ttl\") == \"v2\"\n    assert await nodes[1].client.execute_command(\"ttl k_with_ttl\") == -2\n    assert await nodes[1].client.execute_command(\"ttl k_without_ttl\") == -1\n    assert await nodes[1].client.execute_command(\"stick k_sticky\") == 0\n\n\n@pytest.mark.exclude_epoll\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\", \"migration_finalization_timeout_ms\": 5})\nasync def test_network_disconnect_during_migration(df_factory):\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            vmodule=\"cluster_family=9,outgoing_slot_migration=9,incoming_slot_migration=9\",\n        )\n        for i in range(2)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await DebugPopulateSeeder(key_target=100000).run(nodes[0].client)\n    start_capture = await DebugPopulateSeeder.capture(nodes[0].client)\n\n    proxy = Proxy(\"127.0.0.1\", next(next_port), \"127.0.0.1\", nodes[1].instance.admin_port)\n    await proxy.start()\n    task = asyncio.create_task(proxy.serve())\n\n    nodes[0].migrations.append(MigrationInfo(\"127.0.0.1\", proxy.port, [(0, 16383)], nodes[1].id))\n    try:\n        logging.debug(\"Start migration\")\n        await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n        for _ in range(10):\n            await asyncio.sleep(random.randint(0, 50) / 100)\n            info = await nodes[0].admin_client.info(\"CLUSTER\")\n            logging.debug(\"drop connection: %s\", info)\n            proxy.drop_connection()\n            logging.debug(\n                await nodes[0].admin_client.execute_command(\"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\")\n            )\n\n        await wait_for_status(nodes[0].admin_client, nodes[1].id, \"SYNC\", 20)\n    finally:\n        await proxy.close(task)\n\n    await proxy.start()\n    task = asyncio.create_task(proxy.serve())\n    try:\n        await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\", 300)\n        nodes[0].migrations = []\n        nodes[0].slots = []\n        nodes[1].slots = [(0, 16383)]\n        logging.debug(\"remove finished migrations\")\n        await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n        assert (await DebugPopulateSeeder.capture(nodes[1].client)) == start_capture\n    finally:\n        await proxy.close(task)\n\n\n@pytest.mark.parametrize(\n    \"node_count, segments, keys, huge_values, cache_mode\",\n    [\n        pytest.param(3, 16, 20_000, 10, \"false\"),\n        pytest.param(3, 16, 20_000, 10, \"true\"),\n        # 1mb effectively disables breakdown of huge values.\n        # TODO: add a test that mixes huge and small values, see\n        # https://github.com/dragonflydb/dragonfly/pull/4144/files/11e5e387d31bcf1bc53dfbb28cf3bcaf094d77fa#r1850130930\n        pytest.param(3, 16, 20_000, 1_000_000, \"true\"),\n        pytest.param(3, 16, 20_000, 1_000_000, \"false\"),\n        pytest.param(\n            5, 20, 30_000, 1_000_000, \"false\", marks=[pytest.mark.large, pytest.mark.opt_only]\n        ),\n    ],\n)\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_cluster_fuzzymigration(\n    df_factory: DflyInstanceFactory,\n    df_seeder_factory,\n    node_count: int,\n    segments: int,\n    keys: int,\n    huge_values: int,\n    cache_mode: string,\n):\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            vmodule=\"outgoing_slot_migration=2,cluster_family=2,incoming_slot_migration=2,streamer=2\",\n            serialization_max_chunk_size=huge_values,\n            replication_stream_output_limit=10,\n            cache_mode=cache_mode,\n        )\n        for i in range(node_count)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n\n    # Generate equally sized ranges and distribute by nodes\n    step = 16400 // segments\n    for slot_range in [(s, min(s + step - 1, 16383)) for s in range(0, 16383, step)]:\n        nodes[random.randint(0, node_count - 1)].slots.append(slot_range)\n\n    # Push config to all nodes\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    # Fill instances with some data\n    seeder = df_seeder_factory.create(\n        keys=keys, port=nodes[0].instance.port, cluster_mode=True, mirror_to_fake_redis=True\n    )\n    seed_task = asyncio.create_task(seeder.run())\n\n    # Counter that pushes values to a list\n    async def list_counter(key, client: aioredis.RedisCluster):\n        try:\n            for i in itertools.count(start=1):\n                await client.lpush(key, i)\n        except asyncio.exceptions.CancelledError:\n            return\n\n    # Start ten counters\n    counter_keys = [f\"_counter{i}\" for i in range(10)]\n    counter_connections = [nodes[0].instance.cluster_client() for _ in range(10)]\n    counters = [\n        asyncio.create_task(list_counter(key, conn))\n        for key, conn in zip(counter_keys, counter_connections)\n    ]\n\n    # Generate migration plan\n    for node_idx, node in enumerate(nodes):\n        random.shuffle(node.slots)\n\n        # Decide on number of outgoing slot ranges\n        outgoing = [[] for _ in range(node_count)]\n        num_outgoing = random.randint(0, len(node.slots))\n\n        # Distribute first 0..num_outgoing\n        for slot_range in node.slots[:num_outgoing]:\n            dest_idx = random.randint(0, node_count - 1)\n            while dest_idx == node_idx:\n                dest_idx = random.randint(0, node_count - 1)\n            outgoing[dest_idx].append(slot_range)\n\n        for dest_idx, dest_slots in enumerate(outgoing):\n            if len(dest_slots) == 0:\n                continue\n\n            print(node_idx, \"migrates to\", dest_idx, \"slots\", dest_slots)\n            node.migrations.append(\n                MigrationInfo(\n                    ip=\"127.0.0.1\",\n                    port=nodes[dest_idx].instance.admin_port,\n                    slots=dest_slots,\n                    node_id=nodes[dest_idx].id,\n                )\n            )\n\n    logging.debug(\"start migrations\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"finish migrations\")\n\n    async def all_finished():\n        res = True\n        for node in nodes:\n            states = await node.admin_client.execute_command(\"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\")\n            logging.debug(states)\n            for state in states:\n                direction, node_id, st, _, _ = state\n                if direction == \"out\":\n                    if st == \"FINISHED\":\n                        m_id = [id for id, x in enumerate(node.migrations) if x.node_id == node_id][\n                            0\n                        ]\n                        node.slots = [s for s in node.slots if s not in node.migrations[m_id].slots]\n                        target_node = [n for n in nodes if n.id == node_id][0]\n                        target_node.slots.extend(node.migrations[m_id].slots)\n                        print(\n                            \"FINISH migration\",\n                            node.id,\n                            \":\",\n                            node.migrations[m_id].node_id,\n                            \" slots:\",\n                            node.migrations[m_id].slots,\n                        )\n                        node.migrations.pop(m_id)\n                        await push_config(\n                            json.dumps(generate_config(nodes)),\n                            [node.admin_client for node in nodes],\n                        )\n                    else:\n                        res = False\n        return res\n\n    @assert_eventually(times=600)\n    async def test_all_finished():\n        assert await all_finished()\n\n    await test_all_finished()\n\n    for counter in counters:\n        counter.cancel()\n        await counter\n\n    # Check counter consistency\n    cluster_client = nodes[0].instance.cluster_client()\n    for key in counter_keys:\n        counter_list = await cluster_client.lrange(key, 0, -1)\n        for i, j in zip(counter_list, counter_list[1:]):\n            assert int(i) == int(j) + 1, f\"Found inconsistent list in {key}: {counter_list}\"\n\n    # Compare to fake redis, capture ignores counter keys\n    seeder.stop()\n    await seed_task\n    fake_capture = await seeder.capture_fake_redis()\n\n    assert await seeder.compare(fake_capture, nodes[0].instance.port)\n\n    await asyncio.gather(*[c.aclose() for c in counter_connections])\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_cluster_config_reapply(df_factory: DflyInstanceFactory):\n    \"\"\"Check data migration from one node to another.\"\"\"\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(2)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [await create_node_info(instance) for instance in instances]\n    nodes[0].slots = [(0, 8000)]\n    nodes[1].slots = [(8001, 16383)]\n\n    logging.debug(\"Pushing data to slot 6XXX\")\n    SIZE = 10_000\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n    for i in range(SIZE):\n        assert await nodes[0].admin_client.set(f\"{{key50}}:{i}\", i)  # key50 belongs to slot 6686\n    assert [SIZE, 0] == [await node.admin_client.dbsize() for node in nodes]\n\n    nodes[0].migrations = [\n        MigrationInfo(\"127.0.0.1\", instances[1].admin_port, [(6000, 8000)], nodes[1].id)\n    ]\n    logging.debug(\"Migrating slots 6000-8000\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\")\n\n    assert [SIZE, SIZE] == [await node.client.dbsize() for node in nodes]\n\n    logging.debug(\"Reapply config with migration\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await asyncio.sleep(0.1)\n    assert [SIZE, SIZE] == [await node.client.dbsize() for node in nodes]\n\n    logging.debug(\"Finalizing migration\")\n    nodes[0].migrations = []\n    nodes[0].slots = [(0, 6000)]\n    nodes[1].slots = [(6001, 16383)]\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n    logging.debug(\"Migration finalized\")\n\n    await asyncio.sleep(1)\n    assert [0, SIZE] == [await node.client.dbsize() for node in nodes]\n\n    for i in range(SIZE):\n        assert str(i) == await nodes[1].client.get(f\"{{key50}}:{i}\")\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_cluster_replication_migration(\n    df_factory: DflyInstanceFactory, df_seeder_factory: DflySeederFactory\n):\n    \"\"\"\n    Test replication with migration. Create the following setup:\n\n    master_1 -> replica_1, master_2 -> replica_2\n\n    with each master owning half the slots. Let them then fully exchange their slots\n    and make sure the captures on the replicas are equal.\n    \"\"\"\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(4)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [await create_node_info(n) for n in instances]\n    m1_node, r1_node, m2_node, r2_node = nodes\n    master_nodes = [m1_node, m2_node]\n\n    # divide node slots by half\n    m1_node.slots = [(0, 8000)]\n    m1_node.replicas = [r1_node]\n    m2_node.slots = [(8001, 16383)]\n    m2_node.replicas = [r2_node]\n\n    logging.debug(\"Push initial config\")\n    await push_config(\n        json.dumps(generate_config(master_nodes)), [node.admin_client for node in nodes]\n    )\n\n    logging.debug(\"create data\")\n    seeder = df_seeder_factory.create(\n        keys=2000, port=m1_node.instance.port, cluster_mode=True, mirror_to_fake_redis=True\n    )\n    seed = asyncio.create_task(seeder.run())\n\n    logging.debug(\"start replication\")\n    await r1_node.admin_client.execute_command(f\"replicaof localhost {m1_node.instance.port}\")\n    await r2_node.admin_client.execute_command(f\"replicaof localhost {m2_node.instance.port}\")\n\n    await wait_available_async(r1_node.admin_client)\n    await wait_available_async(r2_node.admin_client)\n\n    logging.debug(\"start migration\")\n    m1_node.migrations = [\n        MigrationInfo(\"127.0.0.1\", m2_node.instance.admin_port, [(0, 8000)], m2_node.id)\n    ]\n    m2_node.migrations = [\n        MigrationInfo(\"127.0.0.1\", m1_node.instance.admin_port, [(8001, 16383)], m1_node.id)\n    ]\n    await push_config(\n        json.dumps(generate_config(master_nodes)), [node.admin_client for node in nodes]\n    )\n\n    await wait_for_status(m1_node.admin_client, m2_node.id, \"FINISHED\")\n    await wait_for_status(m2_node.admin_client, m1_node.id, \"FINISHED\")\n\n    logging.debug(\"finish migration\")\n    m1_node.migrations = []\n    m1_node.slots = [(8001, 16383)]\n    m2_node.migrations = []\n    m2_node.slots = [(0, 8000)]\n\n    await push_config(\n        json.dumps(generate_config(master_nodes)), [node.admin_client for node in nodes]\n    )\n\n    # wait for replicas to catch up\n    await asyncio.sleep(2)\n\n    # ensure captures got exchanged\n    seeder.stop()\n    await seed\n    fake_capture = await seeder.capture_fake_redis()\n    assert await seeder.compare(fake_capture, r1_node.instance.port)\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\", \"pause_wait_timeout\": 10})\nasync def test_start_replication_during_migration(\n    df_factory: DflyInstanceFactory, df_seeder_factory: DflySeederFactory\n):\n    \"\"\"\n    Test replication with migration. Create the following setup:\n\n    master_1 do migration to master_2 and we start replication for master_1 during this migration\n\n    in the end master_1 and replica_1 should have the same data\n    \"\"\"\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(3)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [await create_node_info(n) for n in instances]\n    m1_node, r1_node, m2_node = nodes\n    master_nodes = [m1_node, m2_node]\n\n    m1_node.slots = [(0, 16383)]\n    m1_node.replicas = [r1_node]\n    m2_node.slots = []\n\n    logging.debug(\"Push initial config\")\n    await push_config(\n        json.dumps(generate_config(master_nodes)), [node.admin_client for node in nodes]\n    )\n\n    logging.debug(\"create data\")\n    seeder = df_seeder_factory.create(\n        keys=10000, port=nodes[0].instance.port, cluster_mode=True, mirror_to_fake_redis=True\n    )\n    seed = asyncio.create_task(seeder.run())\n\n    logging.debug(\"start migration\")\n    m1_node.migrations = [\n        MigrationInfo(\"127.0.0.1\", m2_node.instance.admin_port, [(2001, 16383)], m2_node.id)\n    ]\n    await push_config(\n        json.dumps(generate_config(master_nodes)), [node.admin_client for node in nodes]\n    )\n\n    logging.debug(\"start replication\")\n    await r1_node.admin_client.execute_command(f\"replicaof localhost {m1_node.instance.port}\")\n\n    await wait_available_async(r1_node.admin_client)\n\n    await wait_for_status(m1_node.admin_client, m2_node.id, \"FINISHED\")\n\n    logging.debug(\"finish migration\")\n    m1_node.migrations = []\n    m1_node.slots = [(0, 2000)]\n    m2_node.migrations = []\n    m2_node.slots = [(2001, 16383)]\n\n    await push_config(\n        json.dumps(generate_config(master_nodes)), [node.admin_client for node in nodes]\n    )\n\n    await check_all_replicas_finished([r1_node.client], m1_node.client)\n\n    seeder.stop()\n    await seed\n    fake_capture = await seeder.capture_fake_redis()\n    assert await seeder.compare(fake_capture, r1_node.instance.port)\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_keys_expiration_during_migration(df_factory: DflyInstanceFactory):\n    # Check data migration from one node to another with expiration\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(2)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"Start seeder\")\n    await nodes[0].client.execute_command(\"debug\", \"populate\", \"100\", \"foo\", \"100\", \"RAND\")\n\n    capture_before = await DebugPopulateSeeder.capture(nodes[0].client)\n\n    seeder = ExpirySeeder(timeout=4)\n    seeder_task = asyncio.create_task(seeder.run(nodes[0].client))\n    await seeder.wait_until_n_inserts(500)\n\n    logging.debug(\"Start migration\")\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.admin_port, [(0, 16383)], nodes[1].id)\n    )\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await wait_for_status(nodes[1].admin_client, nodes[0].id, \"FINISHED\")\n\n    logging.debug(\"Stop seeders\")\n    seeder.stop()\n    await seeder_task\n\n    logging.debug(\"finish migration\")\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    # wait to expire all keys\n    await asyncio.sleep(5)\n\n    assert await DebugPopulateSeeder.capture(nodes[1].client) == capture_before\n\n    stats = await nodes[1].client.info(\"STATS\")\n    assert stats[\"expired_keys\"] > 0\n\n\n@pytest.mark.parametrize(\"migration_first\", [False, True])\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_snapshoting_during_migration(\n    df_factory: DflyInstanceFactory, df_seeder_factory: DflySeederFactory, migration_first: bool\n):\n    \"\"\"\n    Test saving snapshot during migration. Create the following setups:\n\n    1) Start saving and then run migration simultaneously\n    2) Run migration and start saving simultaneously\n\n    The result should be the same: snapshot contains all the data that existed before migration\n    \"\"\"\n    dbfilename = f\"snap_{tmp_file_name()}\"\n    instances = [\n        df_factory.create(\n            dbfilename=dbfilename if i == 0 else \"\",\n            port=next(next_port),\n            admin_port=next(next_port),\n        )\n        for i in range(2)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [await create_node_info(n) for n in instances]\n\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    logging.debug(\"Push initial config\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"create data\")\n    seeder = df_seeder_factory.create(\n        keys=10000, port=nodes[0].instance.port, cluster_mode=True, mirror_to_fake_redis=True\n    )\n    seed = asyncio.create_task(seeder.run())\n\n    nodes[0].migrations = [\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.admin_port, [(0, 16383)], nodes[1].id)\n    ]\n\n    async def start_migration():\n        logging.debug(\"start migration\")\n        await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    async def start_save():\n        logging.debug(\"BGSAVE\")\n        await nodes[0].client.execute_command(f\"BGSAVE\")\n\n    if migration_first:\n        await start_migration()\n        await asyncio.sleep(random.randint(0, 10) / 100)\n        await start_save()\n    else:\n        await start_save()\n        await asyncio.sleep(random.randint(0, 10) / 100)\n        await start_migration()\n\n    logging.debug(\"wait for snapshot\")\n    while await is_saving(nodes[0].client):\n        await asyncio.sleep(0.1)\n\n    logging.debug(\"wait migration finish\")\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\")\n\n    logging.debug(\"finish migration\")\n    nodes[0].migrations = []\n    nodes[0].slots = []\n    nodes[1].migrations = []\n    nodes[1].slots = [(0, 16383)]\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    seeder.stop()\n    await seed\n    fake_capture = await seeder.capture_fake_redis()\n    assert await seeder.compare(fake_capture, nodes[1].instance.port)\n\n    await nodes[1].client.execute_command(\n        \"DFLY\",\n        \"LOAD\",\n        f\"{dbfilename}-summary.dfs\",\n    )\n\n    # TODO: We can't compare the post-loaded data as is, because it might have changed by now.\n    # We can try to use FakeRedis with the DebugPopulateSeeder comparison here.\n\n\n@pytest.mark.exclude_epoll\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\n@pytest.mark.asyncio\nasync def test_cluster_migration_cancel(df_factory: DflyInstanceFactory):\n    \"\"\"Check data migration from one node to another.\"\"\"\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(2)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [await create_node_info(instance) for instance in instances]\n    nodes[0].slots = [(0, 8000)]\n    nodes[1].slots = [(8001, 16383)]\n\n    logging.debug(\"Pushing data to slot 6XXX\")\n    SIZE = 10_000\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n    for i in range(SIZE):\n        assert await nodes[0].client.set(f\"{{key50}}:{i}\", i)  # key50 belongs to slot 6686\n    assert [SIZE, 0] == [await node.client.dbsize() for node in nodes]\n\n    nodes[0].migrations = [\n        MigrationInfo(\"127.0.0.1\", instances[1].admin_port, [(6000, 8000)], nodes[1].id)\n    ]\n    logging.debug(\"Migrating slots 6000-8000\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"Cancelling migration\")\n    nodes[0].migrations = []\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n    assert SIZE == await nodes[0].client.dbsize()\n\n    @assert_eventually\n    async def node1size0():\n        if await nodes[1].client.dbsize() != 0:\n            logging.debug(await nodes[1].client.execute_command(\"keys *\"))\n            assert False\n\n    await node1size0()\n\n    logging.debug(\"Reissuing migration\")\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", instances[1].admin_port, [(6001, 8000)], nodes[1].id)\n    )\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\")\n    assert [SIZE, SIZE] == [await node.client.dbsize() for node in nodes]\n\n    logging.debug(\"Finalizing migration\")\n    nodes[0].migrations = []\n    nodes[0].slots = [(0, 6000)]\n    nodes[1].slots = [(6001, 16383)]\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n    logging.debug(\"Migration finalized\")\n\n    while 0 != await nodes[0].client.dbsize():\n        logging.debug(f\"wait until source dbsize is empty\")\n        await asyncio.sleep(0.1)\n\n    for i in range(SIZE):\n        assert str(i) == await nodes[1].client.get(f\"{{key50}}:{i}\")\n\n\n@dfly_args({\"proactor_threads\": 2, \"cluster_mode\": \"yes\"})\n@pytest.mark.asyncio\n@pytest.mark.opt_only\n@pytest.mark.exclude_epoll\nasync def test_cluster_migration_huge_container(df_factory: DflyInstanceFactory):\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(2)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [await create_node_info(instance) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"Generating huge containers\")\n    seeder = DebugPopulateSeeder(\n        key_target=100,\n        data_size=10_000_000,\n        collection_size=10_000,\n        variance=1,\n        samples=1,\n        types=[\"LIST\", \"HASH\", \"SET\", \"ZSET\", \"STREAM\", \"STRING\"],\n    )\n    await seeder.run(nodes[0].client)\n    source_data = await DebugPopulateSeeder.capture(nodes[0].client)\n\n    mem_before = await get_memory(nodes[0].client, \"used_memory_rss\")\n\n    nodes[0].migrations = [\n        MigrationInfo(\"127.0.0.1\", instances[1].admin_port, [(0, 16383)], nodes[1].id)\n    ]\n    logging.debug(\"Migrating slots\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"Waiting for migration to finish\")\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\", timeout=300)\n\n    target_data = await DebugPopulateSeeder.capture(nodes[1].client)\n    assert source_data == target_data\n\n    # Get peak memory, because migration removes the data\n    mem_after = await get_memory(nodes[0].client, \"used_memory_peak_rss\")\n    logging.debug(f\"Memory before {mem_before} after {mem_after}\")\n    assert mem_after < mem_before * 1.1\n\n    line = stop_and_get_restore_log(nodes[0].instance)\n\n    # 'with X commands' - how many breakdowns we used for the keys\n    assert extract_int_after_prefix(\"with \", line) > 500_000\n\n    assert extract_int_after_prefix(\"Keys skipped \", line) == 0\n    assert extract_int_after_prefix(\"buckets skipped \", line) == 0\n    assert extract_int_after_prefix(\"keys written \", line) > 90\n\n    # We don't send updates during the migration\n    assert extract_int_after_prefix(\"buckets on_db_update \", line) == 0\n\n\n@dfly_args(\n    {\"proactor_threads\": 2, \"cluster_mode\": \"yes\", \"migration_buckets_serialization_threshold\": 1}\n)\n@pytest.mark.large\n@pytest.mark.parametrize(\"chunk_size\", [1_000_000, 30])\n@pytest.mark.asyncio\n@pytest.mark.exclude_epoll\nasync def test_cluster_migration_while_seeding(\n    df_factory: DflyInstanceFactory, df_seeder_factory: DflySeederFactory, chunk_size\n):\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            serialization_max_chunk_size=chunk_size,\n        )\n        for _ in range(2)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [await create_node_info(instance) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n    client0 = nodes[0].client\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"Seeding cluster\")\n    seeder = df_seeder_factory.create(\n        keys=20_000, port=instances[0].port, cluster_mode=True, mirror_to_fake_redis=True\n    )\n    await seeder.run(target_deviation=0.1)\n\n    seed = asyncio.create_task(seeder.run())\n    await asyncio.sleep(1)\n\n    nodes[0].migrations = [\n        MigrationInfo(\"127.0.0.1\", instances[1].admin_port, [(0, 16383)], nodes[1].id)\n    ]\n    logging.debug(\"Migrating slots\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"Waiting for migration to finish\")\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\", timeout=300)\n    logging.debug(\"Migration finished\")\n\n    logging.debug(\"Finalizing migration\")\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await asyncio.sleep(1)  # Let seeder feed dest before migration finishes\n\n    seeder.stop()\n    await seed\n    logging.debug(\"Seeding finished\")\n\n    assert (\n        await get_memory(client0, \"used_memory_peak_rss\")\n        < await get_memory(client0, \"used_memory_rss\") * 1.2\n    )\n\n    capture = await seeder.capture_fake_redis()\n    assert await seeder.compare(capture, instances[1].port)\n\n    line = stop_and_get_restore_log(nodes[0].instance)\n    assert extract_int_after_prefix(\"Keys skipped \", line) == 0\n    assert extract_int_after_prefix(\"buckets skipped \", line) > 0\n    assert extract_int_after_prefix(\"keys written \", line) >= 15_000\n    # buckets on_db_update can be 0 once in a while because we can not predict keys distribution during migration\n    assert extract_int_after_prefix(\"buckets on_db_update \", line) > 0\n\n\n@dfly_args({\"proactor_threads\": 2, \"cluster_mode\": \"yes\"})\n@pytest.mark.asyncio\nasync def test_cluster_migrations_sequence(\n    df_factory: DflyInstanceFactory, df_seeder_factory: DflySeederFactory\n):\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for _ in range(2)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [await create_node_info(instance) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"Seeding cluster\")\n    seeder = df_seeder_factory.create(\n        keys=10_000, port=instances[0].port, cluster_mode=True, mirror_to_fake_redis=True\n    )\n    await seeder.run(target_deviation=0.1)\n\n    seed = asyncio.create_task(seeder.run())\n    await asyncio.sleep(1)\n\n    slot_step = 500\n    nodes[0].migrations = [\n        MigrationInfo(\"127.0.0.1\", instances[1].admin_port, [(0, slot_step - 1)], nodes[1].id)\n    ]\n    logging.debug(\"Migrating slots\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    for i in range(slot_step, 16301, slot_step):\n        logging.debug(\"Waiting for migration to finish\")\n        await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\", timeout=10)\n\n        nodes[0].slots = [(i, 16383)]\n        nodes[1].slots = [(0, i - 1)]\n        end_slot = min(i + slot_step - 1, 16383)\n        nodes[0].migrations = [\n            MigrationInfo(\"127.0.0.1\", instances[1].admin_port, [(i, end_slot)], nodes[1].id)\n        ]\n\n        await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"Waiting for migration to finish\")\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\", timeout=10)\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"Finalizing migration\")\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n    nodes[0].migrations = []\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"stop seeding\")\n    seeder.stop()\n    await seed\n\n    capture = await seeder.capture_fake_redis()\n    assert await seeder.compare(capture, instances[1].port)\n\n\ndef parse_lag(replication_info: str):\n    lags = re.findall(\"lag=([0-9]+)\\r\\n\", replication_info)\n    assert len(lags) == 1\n    return int(lags[0])\n\n\nasync def await_no_lag(client: aioredis.Redis, timeout=10):\n    start = time.time()\n    while (time.time() - start) < timeout:\n        lag = parse_lag(await client.execute_command(\"info replication\"))\n        print(\"current lag =\", lag)\n        if lag == 0:\n            return\n        await asyncio.sleep(0.05)\n\n    raise RuntimeError(\"Lag did not reduced to 0!\")\n\n\n@pytest.mark.exclude_epoll\n@dfly_args({\"proactor_threads\": 4})\nasync def test_replicate_cluster(df_factory: DflyInstanceFactory, df_seeder_factory):\n    \"\"\"\n    Create dragonfly cluster of 2 nodes.\n    Create additional dragonfly server in emulated mode.\n    Replicate the dragonfly cluster into a single dragonfly node.\n    Send traffic before replication start and while replicating.\n    Promote the replica to master and check data consistency between cluster and single node.\n    \"\"\"\n    replica = df_factory.create(admin_port=next(next_port), cluster_mode=\"emulated\")\n    cluster_nodes = [\n        df_factory.create(admin_port=next(next_port), cluster_mode=\"yes\") for i in range(2)\n    ]\n\n    # Start instances and connect clients\n    df_factory.start_all(cluster_nodes + [replica])\n    c_nodes = [node.client() for node in cluster_nodes]\n\n    c_replica = replica.client()\n\n    node_ids = await asyncio.gather(*(get_node_id(c) for c in c_nodes))\n    config = f\"\"\"\n      [\n        {{\n          \"slot_ranges\": [ {{ \"start\": 0, \"end\": LAST_SLOT_CUTOFF }} ],\n          \"master\": {{ \"id\": \"{node_ids[0]}\", \"ip\": \"localhost\", \"port\": {cluster_nodes[0].port} }},\n          \"replicas\": []\n        }},\n        {{\n          \"slot_ranges\": [ {{ \"start\": NEXT_SLOT_CUTOFF, \"end\": 16383 }} ],\n          \"master\": {{ \"id\": \"{node_ids[1]}\", \"ip\": \"localhost\", \"port\": {cluster_nodes[1].port} }},\n          \"replicas\": []\n        }}\n      ]\n    \"\"\"\n\n    await push_config(\n        config.replace(\"LAST_SLOT_CUTOFF\", \"5259\").replace(\"NEXT_SLOT_CUTOFF\", \"5260\"),\n        c_nodes,\n    )\n\n    # Fill instances with some data\n    seeder = df_seeder_factory.create(\n        keys=2000, port=cluster_nodes[0].port, cluster_mode=True, mirror_to_fake_redis=True\n    )\n    await seeder.run(target_deviation=0.1)\n\n    fill_task = asyncio.create_task(seeder.run())\n\n    # Start replication\n    await c_replica.execute_command(\"REPLICAOF localhost \" + str(cluster_nodes[0].port) + \" 0 5259\")\n    await c_replica.execute_command(\n        \"ADDREPLICAOF localhost \" + str(cluster_nodes[1].port) + \" 5260 16383\"\n    )\n\n    # give seeder time to run.\n    await asyncio.sleep(1.0)\n    # Stop seeder\n    seeder.stop()\n    await fill_task\n\n    # wait for replication to finish\n    await asyncio.gather(*(asyncio.create_task(await_no_lag(c)) for c in c_nodes))\n\n    # promote replica to master and compare data\n    await c_replica.execute_command(\"REPLICAOF NO ONE\")\n    capture = await seeder.capture()\n    assert await seeder.compare(capture, replica.port)\n    fake_capture = await seeder.capture_fake_redis()\n    assert await seeder.compare(fake_capture, replica.port)\n\n\nasync def await_stable_sync(m_client: aioredis.Redis, replica_port, timeout=10):\n    start = time.time()\n\n    async def is_stable():\n        role = await m_client.execute_command(\"role\")\n        return role == [\n            \"master\",\n            [[\"127.0.0.1\", str(replica_port), \"online\"]],\n        ]\n\n    while (time.time() - start) < timeout:\n        if await is_stable():\n            return\n        await asyncio.sleep(0.05)\n\n    raise RuntimeError(\"Failed to reach stable sync\")\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_replicate_disconnect_cluster(df_factory: DflyInstanceFactory, df_seeder_factory):\n    \"\"\"\n    Create dragonfly cluster of 2 nodes and additional dragonfly server in emulated mode.\n    Populate the cluster with data\n    Replicate the dragonfly cluster into a single dragonfly node and wait for stable sync\n    Break connection between cluster node 0 and replica and reconnect\n    Promote replica to master\n    Compare cluster data and replica data\n    \"\"\"\n    replica = df_factory.create(admin_port=next(next_port), cluster_mode=\"emulated\")\n    cluster_nodes = [\n        df_factory.create(admin_port=next(next_port), cluster_mode=\"yes\") for i in range(2)\n    ]\n\n    # Start instances and connect clients\n    df_factory.start_all(cluster_nodes + [replica])\n    c_nodes = [node.client() for node in cluster_nodes]\n\n    c_replica = replica.client()\n\n    node_ids = await asyncio.gather(*(get_node_id(c) for c in c_nodes))\n    config = f\"\"\"\n      [\n        {{\n          \"slot_ranges\": [ {{ \"start\": 0, \"end\": LAST_SLOT_CUTOFF }} ],\n          \"master\": {{ \"id\": \"{node_ids[0]}\", \"ip\": \"localhost\", \"port\": {cluster_nodes[0].port} }},\n          \"replicas\": []\n        }},\n        {{\n          \"slot_ranges\": [ {{ \"start\": NEXT_SLOT_CUTOFF, \"end\": 16383 }} ],\n          \"master\": {{ \"id\": \"{node_ids[1]}\", \"ip\": \"localhost\", \"port\": {cluster_nodes[1].port} }},\n          \"replicas\": []\n        }}\n      ]\n    \"\"\"\n\n    await push_config(\n        config.replace(\"LAST_SLOT_CUTOFF\", \"5259\").replace(\"NEXT_SLOT_CUTOFF\", \"5260\"),\n        c_nodes,\n    )\n\n    # Fill instances with some data\n    seeder = df_seeder_factory.create(\n        keys=2000, port=cluster_nodes[0].port, cluster_mode=True, mirror_to_fake_redis=True\n    )\n    await seeder.run(target_deviation=0.1)\n\n    fill_task = asyncio.create_task(seeder.run())\n\n    proxy = Proxy(\"127.0.0.1\", next(next_port), \"127.0.0.1\", cluster_nodes[0].port)\n    await proxy.start()\n    proxy_task = asyncio.create_task(proxy.serve())\n\n    # Start replication\n    await c_replica.execute_command(\"REPLICAOF localhost \" + str(proxy.port) + \" 0 5259\")\n    await c_replica.execute_command(\n        \"ADDREPLICAOF localhost \" + str(cluster_nodes[1].port) + \" 5260 16383\"\n    )\n\n    # wait for replication to reach stable state on all nodes\n    await asyncio.gather(\n        *(asyncio.create_task(await_stable_sync(c, replica.port)) for c in c_nodes)\n    )\n\n    # break connection between first node and replica\n    await proxy.close(proxy_task)\n    await asyncio.sleep(3)\n\n    async def is_first_master_conn_down(conn):\n        info = await conn.execute_command(\"INFO REPLICATION\")\n        print(info)\n        statuses = re.findall(\"master_link_status:(down|up)\\r\\n\", info)\n        assert len(statuses) == 2\n        assert statuses[0] == \"down\"\n        assert statuses[1] == \"up\"\n\n    await is_first_master_conn_down(c_replica)\n\n    # start connection again\n    await proxy.start()\n    proxy_task = asyncio.create_task(proxy.serve())\n\n    seeder.stop()\n    await fill_task\n\n    # wait for stable sync on first master\n    await await_stable_sync(c_nodes[0], replica.port)\n    # wait for no lag on all cluster nodes\n    await asyncio.gather(*(asyncio.create_task(await_no_lag(c)) for c in c_nodes))\n\n    # promote replica to master and compare data\n    await c_replica.execute_command(\"REPLICAOF NO ONE\")\n    capture = await seeder.capture()\n    assert await seeder.compare(capture, replica.port)\n    fake_capture = await seeder.capture_fake_redis()\n    assert await seeder.compare(fake_capture, replica.port)\n\n    await proxy.close(proxy_task)\n\n\ndef is_offset_eq_master_repl_offset(replication_info: str):\n    offset = re.findall(\"offset=([0-9]+),\", replication_info)\n    assert len(offset) == 1\n    master_repl_offset = re.findall(\"master_repl_offset:([0-9]+)\\r\\n\", replication_info)\n    assert len(master_repl_offset) == 1\n    return int(offset[0]) == int(master_repl_offset[0])\n\n\nasync def await_eq_offset(client: aioredis.Redis, timeout=20):\n    start = time.time()\n    while (time.time() - start) < timeout:\n        if is_offset_eq_master_repl_offset(await client.execute_command(\"info replication\")):\n            return\n        await asyncio.sleep(0.05)\n\n    raise RuntimeError(\"offset not equal!\")\n\n\n@pytest.mark.exclude_epoll\n@dfly_args({\"proactor_threads\": 4})\nasync def test_replicate_redis_cluster(redis_cluster, df_factory, df_seeder_factory):\n    \"\"\"\n    Create redis cluster of 3 nodes.\n    Create dragonfly server in emulated mode.\n    Replicate the redis cluster into a single dragonfly node.\n    Send traffic before replication start and while replicating.\n    Promote the replica to master and check data consistency between cluster and single dragonfly node.\n    \"\"\"\n    replica = df_factory.create(admin_port=next(next_port), cluster_mode=\"emulated\")\n\n    # Start instances and connect clients\n    df_factory.start_all([replica])\n\n    redis_cluster_nodes = redis_cluster\n    node_clients = [\n        aioredis.Redis(decode_responses=True, host=\"localhost\", port=node.port)\n        for node in redis_cluster_nodes\n    ]\n\n    c_replica = replica.client()\n\n    seeder = df_seeder_factory.create(\n        keys=2000, port=redis_cluster_nodes[0].port, cluster_mode=True\n    )\n    await seeder.run(target_deviation=0.1)\n\n    fill_task = asyncio.create_task(seeder.run())\n\n    # Start replication\n    await c_replica.execute_command(\n        \"REPLICAOF localhost \" + str(redis_cluster_nodes[0].port) + \" 0 5460\"\n    )\n    await asyncio.sleep(0.5)\n    await c_replica.execute_command(\n        \"ADDREPLICAOF localhost \" + str(redis_cluster_nodes[1].port) + \" 5461 10922\"\n    )\n    await asyncio.sleep(0.5)\n    await c_replica.execute_command(\n        \"ADDREPLICAOF localhost \" + str(redis_cluster_nodes[2].port) + \" 10923 16383\"\n    )\n\n    # give seeder time to run.\n    await asyncio.sleep(0.5)\n    # Stop seeder\n    seeder.stop()\n    await fill_task\n\n    # wait for replication to finish\n    await asyncio.gather(*(asyncio.create_task(await_eq_offset(client)) for client in node_clients))\n\n    await c_replica.execute_command(\"REPLICAOF NO ONE\")\n    capture = await seeder.capture()\n    assert await seeder.compare(capture, replica.port)\n\n\n@dfly_args({\"proactor_threads\": 4, \"pause_wait_timeout\": 10})\nasync def test_replicate_disconnect_redis_cluster(redis_cluster, df_factory, df_seeder_factory):\n    \"\"\"\n    Create redis cluster of 3 nodes.\n    Create dragonfly server in emulated mode.\n    Replicate the redis cluster into a single dragonfly node.\n    Send traffic before replication start and while replicating.\n    Close connection between dfly replica and one of master nodes and reconnect\n    Send more traffic\n    Promote the replica to master and check data consistency between cluster and single dragonfly node.\n    \"\"\"\n    replica = df_factory.create(admin_port=next(next_port), cluster_mode=\"emulated\")\n\n    # Start instances and connect clients\n    df_factory.start_all([replica])\n\n    redis_cluster_nodes = redis_cluster\n    node_clients = [\n        aioredis.Redis(decode_responses=True, host=\"localhost\", port=node.port)\n        for node in redis_cluster_nodes\n    ]\n\n    c_replica = replica.client()\n\n    seeder = df_seeder_factory.create(\n        keys=1000, port=redis_cluster_nodes[0].port, cluster_mode=True\n    )\n    await seeder.run(target_deviation=0.1)\n\n    fill_task = asyncio.create_task(seeder.run())\n\n    proxy = Proxy(\"127.0.0.1\", next(next_port), \"127.0.0.1\", redis_cluster_nodes[1].port)\n    await proxy.start()\n    proxy_task = asyncio.create_task(proxy.serve())\n\n    # Start replication\n    await c_replica.execute_command(\n        \"REPLICAOF localhost \" + str(redis_cluster_nodes[0].port) + \" 0 5460\"\n    )\n    await c_replica.execute_command(\"ADDREPLICAOF localhost \" + str(proxy.port) + \" 5461 10922\")\n    await c_replica.execute_command(\n        \"ADDREPLICAOF localhost \" + str(redis_cluster_nodes[2].port) + \" 10923 16383\"\n    )\n\n    # give seeder time to run.\n    await asyncio.sleep(1)\n\n    # break connection between second node and replica\n    await proxy.close(proxy_task)\n    await asyncio.sleep(3)\n\n    # check second node connection is down\n    info = await c_replica.execute_command(\"INFO REPLICATION\")\n    statuses = re.findall(\"master_link_status:(down|up)\\r\\n\", info)\n    assert len(statuses) == 3\n    assert statuses[0] == \"up\"\n    assert statuses[1] == \"down\"\n    assert statuses[2] == \"up\"\n\n    # start connection again\n    await proxy.start()\n    proxy_task = asyncio.create_task(proxy.serve())\n\n    # give seeder more time to run\n    await asyncio.sleep(1)\n\n    # check second node connection is up\n    info = await c_replica.execute_command(\"INFO REPLICATION\")\n    statuses = re.findall(\"master_link_status:(down|up)\\r\\n\", info)\n    assert len(statuses) == 3\n    assert statuses[0] == \"up\"\n    assert statuses[1] == \"up\"\n    assert statuses[2] == \"up\"\n\n    # give seeder time to run.\n    await asyncio.sleep(1)\n\n    # Stop seeder\n    seeder.stop()\n    await fill_task\n\n    # wait for replication to finish\n    await asyncio.gather(*(asyncio.create_task(await_eq_offset(client)) for client in node_clients))\n\n    await c_replica.execute_command(\"REPLICAOF NO ONE\")\n    capture = await seeder.capture()\n    assert await seeder.compare(capture, replica.port)\n    await proxy.close(proxy_task)\n\n\n@pytest.mark.large\n@dfly_args({\"cluster_mode\": \"yes\"})\nasync def test_cluster_memory_consumption_migration(df_factory: DflyInstanceFactory):\n    # Check data migration from one node to another\n    instances = [\n        df_factory.create(\n            maxmemory=\"15G\",\n            port=next(next_port),\n            admin_port=next(next_port),\n            vmodule=\"streamer=2\",\n        )\n        for i in range(3)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    for i in range(1, len(instances)):\n        nodes[i].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await nodes[0].client.execute_command(\"DEBUG POPULATE 5000000 test 1000 RAND SLOTS 0 16383\")\n\n    await asyncio.sleep(2)\n\n    migration_nodes = len(instances) - 1\n    slot_step = 16384 // migration_nodes\n    ranges = []\n    for i in range(0, migration_nodes):\n        ranges.append(i * slot_step)\n    ranges.append(16384)\n\n    for i in range(1, len(instances)):\n        nodes[0].migrations.append(\n            MigrationInfo(\n                \"127.0.0.1\",\n                nodes[i].instance.admin_port,\n                [(ranges[i - 1], ranges[i] - 1)],\n                nodes[i].id,\n            )\n        )\n\n    logging.debug(\"Start migration\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await wait_for_status(nodes[1].admin_client, nodes[0].id, \"FINISHED\", 1000)\n\n    nodes[0].migrations = []\n    nodes[0].slots = []\n    for i in range(1, len(instances)):\n        nodes[i].slots = [(ranges[i - 1], ranges[i] - 1)]\n    logging.debug(\"remove finished migrations\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await check_for_no_state_status([node.admin_client for node in nodes])\n\n\n@pytest.mark.large\n@pytest.mark.exclude_epoll\n@pytest.mark.asyncio\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\", \"migration_buckets_cpu_budget\": 1})\nasync def test_migration_timeout_on_sync(df_factory: DflyInstanceFactory, df_seeder_factory):\n    # Timeout set to 3 seconds because we must first saturate the socket before we get the timeout\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            replication_timeout=3000,\n            vmodule=\"outgoing_slot_migration=2,cluster_family=2,incoming_slot_migration=2\",\n        )\n        for i in range(2)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"source node DEBUG POPULATE\")\n\n    await DebugPopulateSeeder(key_target=300000, data_size=1000).run(nodes[0].client)\n\n    # we use this seeder to saturate the pending_buf_ in streamer\n    seeder = df_seeder_factory.create(port=nodes[0].instance.port, cluster_mode=True)\n    fill_task = asyncio.create_task(seeder.run())\n\n    logging.debug(\"Start migration\")\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.admin_port, [(0, 16383)], nodes[1].id)\n    )\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await asyncio.sleep(random.randint(0, 50) / 100)\n    # to pause migration we need to be in sync state\n    await wait_for_status(nodes[1].admin_client, nodes[0].id, \"SYNC\", 1000)\n\n    logging.debug(\"debug migration pause\")\n    await nodes[1].client.execute_command(\"debug migration pause\")\n\n    await wait_for_error(\n        nodes[0].admin_client, nodes[1].id, \"JournalStreamer write operation timeout\", 30\n    )\n\n    logging.debug(\"debug migration resume\")\n    await nodes[1].client.execute_command(\"debug migration resume\")\n\n    # Stop seeder\n    seeder.stop()\n    await fill_task\n\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\", 300)\n    await wait_for_status(nodes[1].admin_client, nodes[0].id, \"FINISHED\")\n\n    with pytest.raises(MovedError) as e_info:\n        await nodes[0].client.get(\"x\")\n\n    assert f\"16287 127.0.0.1:{instances[1].port}\" == str(e_info.value)\n\n    nodes[0].migrations = []\n    # cancel migration for the source node to get the original data from it\n    await push_config(json.dumps(generate_config(nodes)), [nodes[0].admin_client])\n\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n    # finish migration for the target node to get the migrated data from it\n    await push_config(json.dumps(generate_config(nodes)), [nodes[1].admin_client])\n\n    source_capture = await DebugPopulateSeeder.capture(nodes[0].client)\n    assert (await DebugPopulateSeeder.capture(nodes[1].client)) == source_capture\n\n\n\"\"\"\nTest cluster node distributing its slots into 2 other nodes.\nIn this test we start migrating to the second node only after the first one finished to\nreproduce the bug found in issue #4455\n\"\"\"\n\n\n@pytest.mark.asyncio\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_migration_one_after_another(df_factory: DflyInstanceFactory, df_seeder_factory):\n    # 1. Create cluster of 3 nodes with all slots allocated to first node.\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            vmodule=\"outgoing_slot_migration=2,cluster_family=2,incoming_slot_migration=2,streamer=2\",\n        )\n        for i in range(3)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n    nodes[2].slots = []\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"DEBUG POPULATE first node\")\n    key_num = 100000\n    await DebugPopulateSeeder(key_target=key_num, data_size=100).run(nodes[0].client)\n    dbsize_node0 = await nodes[0].client.dbsize()\n    assert dbsize_node0 > (key_num * 0.95)\n\n    # 2. Start migrating part of the slots from first node to second\n    logging.debug(\"Start first migration\")\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.admin_port, [(0, 16300)], nodes[1].id)\n    )\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    # 3. Wait for migratin finish\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\", timeout=50)\n    await wait_for_status(nodes[1].admin_client, nodes[0].id, \"FINISHED\", timeout=50)\n\n    nodes[0].migrations = []\n    nodes[0].slots = [(16301, 16383)]\n    nodes[1].slots = [(0, 16300)]\n    nodes[2].slots = []\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    # 4. Start migrating remaind slots from first node to third node\n    logging.debug(\"Start second migration\")\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes[2].instance.admin_port, [(16301, 16383)], nodes[2].id)\n    )\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    # 5. Wait for migratin finish\n    await wait_for_status(nodes[0].admin_client, nodes[2].id, \"FINISHED\", timeout=10)\n    await wait_for_status(nodes[2].admin_client, nodes[0].id, \"FINISHED\", timeout=10)\n\n    nodes[0].migrations = []\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16300)]\n    nodes[2].slots = [(16301, 16383)]\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    # 6. Check all data was migrated\n    # Using dbsize to check all the data was migrated to the other nodes.\n    # Note: we can not use the seeder capture as we migrate the data to 2 different nodes.\n    # TODO: improve the migration conrrectness by running the seeder capture on slot range (requiers changes in capture script).\n    dbsize_node1 = await nodes[1].client.dbsize()\n    dbsize_node2 = await nodes[2].client.dbsize()\n    assert dbsize_node1 + dbsize_node2 == dbsize_node0\n    assert dbsize_node2 > 0 and dbsize_node1 > 0\n\n\n\"\"\"\nTest cluster node distributing its slots into 3 other nodes.\nIn this test we randomize the slot ranges that are migrated to each node\nFor each migration we start migration, wait for it to finish and once it is finished we send migration finalization config\n\"\"\"\n\n\n@pytest.mark.large\n@pytest.mark.exclude_epoll\n@pytest.mark.asyncio\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\", \"pause_wait_timeout\": 10})\nasync def test_migration_rebalance_node(df_factory: DflyInstanceFactory, df_seeder_factory):\n    # 1. Create cluster of 3 nodes with all slots allocated to first node.\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            vmodule=\"outgoing_slot_migration=2,cluster_family=2,incoming_slot_migration=2,streamer=2\",\n        )\n        for i in range(4)\n    ]\n    df_factory.start_all(instances)\n\n    def create_random_ranges():\n        # Generate 2 random breakpoints within the range\n        breakpoints = sorted(random.sample(range(1, 16382), 2))\n        ranges = [\n            (0, breakpoints[0] - 1),\n            (breakpoints[0], breakpoints[1] - 1),\n            (breakpoints[1], 16383),\n        ]\n        return ranges\n\n    # Create 3 random ranges from 0 to 16383\n    random_ranges = create_random_ranges()\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = random_ranges\n    nodes[1].slots = []\n    nodes[2].slots = []\n    nodes[3].slots = []\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    key_num = 100000\n    logging.debug(f\"DEBUG POPULATE first node with number of keys: {key_num}\")\n    await DebugPopulateSeeder(key_target=key_num, data_size=100).run(nodes[0].client)\n    dbsize_node0 = await nodes[0].client.dbsize()\n    assert dbsize_node0 > (key_num * 0.95)\n\n    logging.debug(\"start seeding\")\n    # Running seeder with pipeline mode when finalizing migrations leads to errors\n    # TODO: I believe that changing the seeder to generate pipeline command only on specific slot will fix the problem\n    seeder = df_seeder_factory.create(\n        keys=50_000,\n        port=instances[0].port,\n        cluster_mode=True,\n        pipeline=False,\n        mirror_to_fake_redis=True,\n    )\n    await seeder.run(target_deviation=0.1)\n    seed = asyncio.create_task(seeder.run())\n\n    migration_info = [\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.admin_port, [random_ranges[0]], nodes[1].id),\n        MigrationInfo(\"127.0.0.1\", nodes[2].instance.admin_port, [random_ranges[1]], nodes[2].id),\n        MigrationInfo(\"127.0.0.1\", nodes[3].instance.admin_port, [random_ranges[2]], nodes[3].id),\n    ]\n\n    nodes_lock = asyncio.Lock()\n\n    async def do_migration(index):\n        await asyncio.sleep(random.randint(1, 10) / 5)\n        async with nodes_lock:\n            logging.debug(f\"Start migration from node {index}\")\n            nodes[0].migrations.append(migration_info[index - 1])\n            await push_config(\n                json.dumps(generate_config(nodes)), [node.admin_client for node in nodes]\n            )\n\n        logging.debug(f\"wait migration from node {index}\")\n        await wait_for_status(nodes[0].admin_client, nodes[index].id, \"FINISHED\", timeout=50)\n        await wait_for_status(nodes[index].admin_client, nodes[0].id, \"FINISHED\", timeout=50)\n        logging.debug(f\"finished migration from node {index}\")\n        await asyncio.sleep(random.randint(1, 5) / 5)\n        async with nodes_lock:\n            logging.debug(f\"Finalize migration from node {index}\")\n            nodes[index].slots = migration_info[index - 1].slots\n            nodes[0].slots.remove(migration_info[index - 1].slots[0])\n            nodes[0].migrations.remove(migration_info[index - 1])\n            await push_config(\n                json.dumps(generate_config(nodes)), [node.admin_client for node in nodes]\n            )\n\n    all_migrations = [asyncio.create_task(do_migration(i)) for i in range(1, 4)]\n    for migration in all_migrations:\n        await migration\n\n    logging.debug(\"stop seeding\")\n    seeder.stop()\n    await seed\n    await asyncio.sleep(0.5)  # wait untill all keys with ttl are expired\n    capture = await seeder.capture_fake_redis()\n    assert await seeder.compare(capture, nodes[1].instance.port)\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_migration_restart(df_factory: DflyInstanceFactory, df_seeder_factory):\n    # 1. Start migration, and than restart it with another slots set\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            vmodule=\"outgoing_slot_migration=2,cluster_family=2,incoming_slot_migration=2\",\n        )\n        for i in range(2)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(\"Start seeder\")\n    seeder = df_seeder_factory.create(\n        keys=50_000,\n        port=instances[0].port,\n        cluster_mode=True,\n    )\n    await seeder.run(target_deviation=0.1)\n    capture = await seeder.capture()\n\n    logging.debug(f\"Start migration\")\n    nodes[0].migrations.append(\n        MigrationInfo(\n            \"127.0.0.1\",\n            nodes[1].instance.admin_port,\n            [(random.randint(1, 8000), random.randint(8001, 16383))],\n            nodes[1].id,\n        )\n    )\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await asyncio.sleep(random.randint(1, 10) / 5)\n    logging.debug(f\"Restart migration\")\n    final_migration_range = (random.randint(1, 8000), random.randint(8001, 16382))\n    nodes[0].migrations[0] = MigrationInfo(\n        \"127.0.0.1\", nodes[1].instance.admin_port, [final_migration_range], nodes[1].id\n    )\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    logging.debug(f\"wait migration to finish\")\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\", timeout=50)\n    await wait_for_status(nodes[1].admin_client, nodes[0].id, \"FINISHED\", timeout=50)\n\n    nodes[0].migrations = []\n    nodes[0].slots = [(0, final_migration_range[0] - 1), (final_migration_range[1] + 1, 16383)]\n    nodes[1].slots = [final_migration_range]\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    assert await seeder.compare(capture, nodes[0].instance.port)\n\n\n@dfly_args({\"proactor_threads\": 2, \"cluster_mode\": \"yes\"})\nasync def test_cluster_sharded_pub_sub(df_factory: DflyInstanceFactory):\n    nodes = [df_factory.create(port=next(next_port)) for i in range(2)]\n    df_factory.start_all(nodes)\n\n    c_nodes = [node.client() for node in nodes]\n\n    nodes_info = [(await create_node_info(instance)) for instance in nodes]\n    nodes_info[0].slots = [(0, 16383)]\n    nodes_info[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes_info)), [node.client for node in nodes_info])\n    # channel name kostas crc is at slot 2883 which is part of the first node.\n    with pytest.raises((MovedError, aioredis.ResponseError)) as moved_error:\n        await c_nodes[1].execute_command(\"SSUBSCRIBE kostas\")\n\n    assert str(moved_error.value).endswith(f\"2833 127.0.0.1:{nodes[0].port}\")\n\n    node_a = ClusterNode(\"localhost\", nodes[0].port)\n    node_b = ClusterNode(\"localhost\", nodes[1].port)\n\n    consumer_client = RedisCluster(startup_nodes=[node_a, node_b])\n    consumer = consumer_client.pubsub()\n    consumer.ssubscribe(\"kostas\")\n\n    await c_nodes[0].execute_command(\"SPUBLISH kostas hello\")\n    # We need to sleep cause we use DispatchBrief internally. Otherwise we can't really gurantee\n    # that the client received the message\n    await asyncio.sleep(2)\n\n    # Consume subscription message result from above\n    message = consumer.get_sharded_message(target_node=node_a)\n    assert message == {\"type\": \"ssubscribe\", \"pattern\": None, \"channel\": b\"kostas\", \"data\": 1}\n\n    message = consumer.get_sharded_message(target_node=node_a)\n    assert message == {\"type\": \"smessage\", \"pattern\": None, \"channel\": b\"kostas\", \"data\": b\"hello\"}\n\n    consumer.sunsubscribe(\"kostas\")\n    await asyncio.sleep(2)\n    await c_nodes[0].execute_command(\"SPUBLISH kostas new_message\")\n    message = consumer.get_sharded_message(target_node=node_a)\n    assert message == {\"type\": \"sunsubscribe\", \"pattern\": None, \"channel\": b\"kostas\", \"data\": 0}\n\n\n@dfly_args({\"proactor_threads\": 2, \"cluster_mode\": \"yes\"})\nasync def test_cluster_sharded_pubsub_shard_commands(df_factory: DflyInstanceFactory):\n    nodes = [df_factory.create(port=next(next_port)) for i in range(2)]\n    df_factory.start_all(nodes)\n\n    c_nodes = [node.client() for node in nodes]\n\n    nodes_info = [(await create_node_info(instance)) for instance in nodes]\n    nodes_info[0].slots = [(0, 16383)]\n    nodes_info[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes_info)), [node.client for node in nodes_info])\n\n    # We are executing SSUBSCRIBE commands and wait for them to be sure that\n    # channels are created\n    message = await c_nodes[0].execute_command(\"SSUBSCRIBE pubsub-shard-channel\")\n    message = await c_nodes[0].execute_command(\"SSUBSCRIBE shard-channel\")\n\n    message = await c_nodes[0].execute_command(\"PUBSUB SHARDCHANNELS\")\n    message.sort()\n    assert message == [\"pubsub-shard-channel\", \"shard-channel\"]\n\n    message = await c_nodes[0].execute_command(\"PUBSUB SHARDCHANNELS pubsub*\")\n    assert message == [\"pubsub-shard-channel\"]\n\n    message = await c_nodes[0].execute_command(\"PUBSUB SHARDCHANNELS *channel\")\n    message.sort()\n    assert message == [\"pubsub-shard-channel\", \"shard-channel\"]\n\n    message = await c_nodes[0].execute_command(\"PUBSUB SHARDNUMSUB pubsub-shard-channel\")\n    assert message == [\"pubsub-shard-channel\", 1]\n\n    message = await c_nodes[0].execute_command(\n        \"PUBSUB SHARDNUMSUB pubsub-shard-channel shard-channel\"\n    )\n    assert message == [\"pubsub-shard-channel\", 1, \"shard-channel\", 1]\n\n    message = await c_nodes[0].execute_command(\"PUBSUB SHARDNUMSUB\")\n    assert message == []\n\n\n@pytest.mark.large\n@dfly_args({\"proactor_threads\": 2, \"cluster_mode\": \"yes\"})\nasync def test_cluster_migration_errors_num(df_factory: DflyInstanceFactory):\n    # create cluster with several nodes and create migrations from one node to others\n    # but config propagated only to source node to get errors for migrations\n    # number of errors should be the same as number of target nodes\n    nodes = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            vmodule=\"cluster_family=2,outgoing_slot_migration=2,incoming_slot_migration=2\",\n        )\n        for i in range(3)\n    ]\n    df_factory.start_all(nodes)\n\n    c_nodes = [node.client() for node in nodes]\n\n    nodes_info = [(await create_node_info(instance)) for instance in nodes]\n    nodes_info[0].slots = [(0, 16383)]\n    nodes_info[1].slots = []\n    nodes_info[2].slots = []\n\n    await push_config(json.dumps(generate_config(nodes_info)), c_nodes)\n\n    async def wait_for_errors_num(client, err_num, timeout=10):\n        cluster_info = lambda: client.info(\"CLUSTER\")\n\n        async for info, breaker in tick_timer(cluster_info, timeout=timeout):\n            with breaker:\n                assert info[\"migration_errors_total\"] == err_num\n\n    await wait_for_errors_num(c_nodes[0], 0)\n\n    nodes_info[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes_info[1].instance.admin_port, [(0, 100)], nodes_info[1].id)\n    )\n\n    await push_config(json.dumps(generate_config(nodes_info)), [c_nodes[0]])\n\n    # the error will be reported after 30 seconds, because config is missing for target node\n    await wait_for_errors_num(c_nodes[0], 1, timeout=40)\n    # the migration process attempt to start migration in a second so we get more errors\n    await wait_for_errors_num(c_nodes[0], 2)\n\n\n@dfly_args({\"proactor_threads\": 2, \"cluster_mode\": \"yes\"})\nasync def test_cluster_sharded_pub_sub_migration(df_factory: DflyInstanceFactory):\n    instances = [df_factory.create(port=next(next_port)) for i in range(2)]\n    df_factory.start_all(instances)\n\n    c_nodes = [instance.client() for instance in instances]\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.client for node in nodes])\n\n    # Setup producer and consumer\n    node_a = ClusterNode(\"localhost\", instances[0].port)\n    node_b = ClusterNode(\"localhost\", instances[1].port)\n\n    consumer_client = RedisCluster(startup_nodes=[node_a, node_b])\n    consumer = consumer_client.pubsub()\n    consumer.ssubscribe(\"kostas\")\n\n    # Push new config\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.port, [(0, 16383)], nodes[1].id)\n    )\n    await push_config(json.dumps(generate_config(nodes)), [node.client for node in nodes])\n\n    await wait_for_status(nodes[0].client, nodes[1].id, \"FINISHED\")\n\n    nodes[0].migrations = []\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n    logging.debug(\"remove finished migrations\")\n    await push_config(json.dumps(generate_config(nodes)), [node.client for node in nodes])\n\n    # channel name kostas crc is at slot 2883 which is part of the second now.\n    with pytest.raises((MovedError, aioredis.ResponseError)) as moved_error:\n        await c_nodes[0].execute_command(\"SSUBSCRIBE kostas\")\n\n    assert str(moved_error.value).endswith(f\"2833 127.0.0.1:{instances[1].port}\")\n\n    # Consume subscription message result from above\n    message = consumer.get_sharded_message(target_node=node_a)\n    assert message == {\"type\": \"ssubscribe\", \"pattern\": None, \"channel\": b\"kostas\", \"data\": 1}\n    message = consumer.get_sharded_message(target_node=node_a)\n    assert message == {\"type\": \"sunsubscribe\", \"pattern\": None, \"channel\": b\"kostas\", \"data\": 0}\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_readonly_replication(\n    df_factory: DflyInstanceFactory, df_seeder_factory: DflySeederFactory\n):\n    # create cluster master and replica\n    # For now replica always should work in read-only mode\n    # READONLY command returns always OK without any impact\n    # In the future we may decide to implement the same behavior as REDIS\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(2)\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [await create_node_info(n) for n in instances]\n    m1_node, r1_node = nodes\n    master_nodes = [m1_node]\n\n    m1_node.slots = [(0, 16383)]\n    m1_node.replicas = [r1_node]\n\n    logging.debug(\"Push initial config\")\n    await push_config(\n        json.dumps(generate_config(master_nodes)), [node.admin_client for node in nodes]\n    )\n\n    logging.debug(\"create data\")\n    await m1_node.client.execute_command(\"SET X 1\")\n\n    logging.debug(\"start replication\")\n    await r1_node.admin_client.execute_command(f\"replicaof localhost {m1_node.instance.admin_port}\")\n\n    await wait_available_async(r1_node.admin_client)\n\n    assert await r1_node.client.execute_command(\"GET X\") == \"1\"\n    assert await r1_node.client.execute_command(\"READONLY\")\n    assert await r1_node.client.execute_command(\"GET X\") == \"1\"\n\n    # This behavior can be changed in the future\n    assert await r1_node.client.execute_command(\"GET Y\") == None\n\n    m1_node.replicas = []\n\n    logging.debug(\"Push config without replica\")\n    await push_config(\n        json.dumps(generate_config(master_nodes)), [node.admin_client for node in nodes]\n    )\n\n    with pytest.raises((MovedError, aioredis.ResponseError)) as moved_error:\n        await r1_node.client.execute_command(\"GET X\")\n\n    assert str(moved_error.value).endswith(f\"7165 127.0.0.1:{instances[0].port}\")\n\n    with pytest.raises((MovedError, aioredis.ResponseError)) as moved_error:\n        await r1_node.client.execute_command(\"GET Y\")\n\n    assert str(moved_error.value).endswith(f\"3036 127.0.0.1:{instances[0].port}\")\n\n\n@dfly_args({\"proactor_threads\": 2, \"cluster_mode\": \"yes\"})\nasync def test_cancel_blocking_cmd_during_mygration_finalization(df_factory: DflyInstanceFactory):\n    # blocking commands should be canceled during migration finalization\n    instances = [df_factory.create(port=next(next_port)) for i in range(2)]\n    df_factory.start_all(instances)\n\n    c_nodes = [instance.client() for instance in instances]\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.client for node in nodes])\n\n    logging.debug(\"Start blpop task\")\n    blpop_task = asyncio.create_task(c_nodes[0].blpop(\"list\", 0))\n\n    await asyncio.sleep(0.5)\n\n    assert not blpop_task.done()\n\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.port, [(0, 16383)], nodes[1].id)\n    )\n    await push_config(json.dumps(generate_config(nodes)), [node.client for node in nodes])\n\n    await wait_for_status(nodes[0].client, nodes[1].id, \"FINISHED\")\n\n    with pytest.raises(aioredis.ResponseError) as e_info:\n        await blpop_task\n\n    assert await c_nodes[1].type(\"list\") == \"none\"\n\n    nodes[0].migrations = []\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n\n    logging.debug(\"remove finished migrations\")\n    await push_config(json.dumps(generate_config(nodes)), [node.client for node in nodes])\n\n    assert await c_nodes[1].type(\"list\") == \"none\"\n\n\n@dfly_args({\"cluster_mode\": \"yes\"})\nasync def test_slot_migration_oom(df_factory):\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            proactor_threads=4,\n            maxmemory=\"1024MB\",\n        ),\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            proactor_threads=2,\n            maxmemory=\"512MB\",\n        ),\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await nodes[0].client.execute_command(\"DEBUG POPULATE 100 test 10000000\")\n\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", nodes[1].instance.admin_port, [(0, 16383)], nodes[1].id)\n    )\n\n    logging.info(\"Start migration\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    # Wait for FATAL status\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FATAL\", 300)\n    await wait_for_status(nodes[1].admin_client, nodes[0].id, \"FATAL\")\n\n    # There's a rare timing issue if we don't wait here. Status can be set to FATAL\n    # but error message is not still set for slot migration.\n    await asyncio.sleep(1)\n\n    # Node_0 slot-migration-status\n    status = await nodes[0].admin_client.execute_command(\n        \"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\", nodes[1].id\n    )\n    # Direction\n    assert status[0][0] == \"out\"\n    # Error message\n    assert status[0][4] == \"Cannot allocate memory: INCOMING_MIGRATION_OOM\"\n\n    # Node_1 slot-migration-status\n    status = await nodes[1].admin_client.execute_command(\n        \"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\", nodes[0].id\n    )\n    # Direction\n    assert status[0][0] == \"in\"\n    # Error message\n    assert status[0][4] == \"INCOMING_MIGRATION_OOM\"\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\"})\nasync def test_replica_takeover_moved(\n    df_factory: DflyInstanceFactory, df_seeder_factory: DflySeederFactory\n):\n    instances = [df_factory.create(port=next(next_port)) for i in range(4)]\n    df_factory.start_all(instances)\n\n    nodes = [await create_node_info(n) for n in instances]\n    m1, r1, m2, r2 = nodes\n    master_nodes = [m1, m2]\n\n    m1.slots = [(0, 9000)]\n    m2.slots = [(9001, 16383)]\n\n    m1.replicas = [r1]\n    m2.replicas = [r2]\n\n    await push_config(json.dumps(generate_config(master_nodes)), [node.client for node in nodes])\n\n    logging.debug(\"create data\")\n    await m1.client.execute_command(\"SET X 1\")\n    # Slot number 16022\n    await m2.client.execute_command(\"SET FOOX 1\")\n\n    logging.debug(\"start replication\")\n    await r1.client.execute_command(f\"replicaof localhost {m1.instance.port}\")\n    await r2.client.execute_command(f\"replicaof localhost {m2.instance.port}\")\n\n    await wait_available_async(r1.client)\n\n    assert await r1.client.execute_command(\"GET X\") == \"1\"\n    assert await r1.client.execute_command(\"REPLTAKEOVER 20\") == \"OK\"\n\n    with pytest.raises((MovedError, aioredis.ResponseError)) as moved_error:\n        await m1.client.execute_command(\"GET X\")\n\n    assert str(moved_error.value).endswith(f\"7165 127.0.0.1:{r1.instance.port}\")\n\n    with pytest.raises((MovedError, aioredis.ResponseError)) as moved_error:\n        await m1.client.execute_command(\"GET FOOX\")\n\n    assert str(moved_error.value).endswith(f\"16022 127.0.0.1:{m2.instance.port}\")\n\n    # Try write command on the new master. It should succeed because during takeover,\n    # we updated the config as well\n    assert await r1.client.execute_command(\"SET X 2\") == \"OK\"\n\n    master_nodes = [r1, m2]\n    r1.slots = [(0, 9000)]\n    nodes.pop(0)\n    await push_config(json.dumps(generate_config(master_nodes)), [node.client for node in nodes])\n\n    assert await r1.client.execute_command(\"GET X\") == \"2\"\n    assert await m2.client.execute_command(\"GET FOOX\") == \"1\"\n\n    await r1.client.execute_command(\"flushall\")\n    assert await r1.client.dbsize() == 0\n    await r1.client.execute_command(\"SET newk foo\")\n    # Now bring back m1 as a replica of r1\n    nodes.append(m1)\n    r1.replicas = [m1]\n    await push_config(json.dumps(generate_config(master_nodes)), [node.client for node in nodes])\n    await m1.client.execute_command(f\"replicaof localhost {r1.instance.port}\")\n    await check_all_replicas_finished([m1.client], r1.client)\n    assert await m1.client.execute_command(\"GET newk\") == \"foo\"\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\", \"cluster_search\": \"yes\"})\nasync def test_SearchRequestDistribution(df_factory: DflyInstanceFactory):\n    \"\"\"\n    Create cluster of 3 nodes.\n    Send FT.CREATE to first node and check that index was created on all nodes.\n    Search for all documents from cluster.\n    \"\"\"\n\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            vmodule=\"coordinator=2,search_family=3,protocol_client=3\",\n        )\n        for i in range(3)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 5259)]\n    nodes[1].slots = [(5260, 10519)]\n    nodes[2].slots = [(10520, 16383)]\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    assert (\n        await nodes[0].client.execute_command(\n            \"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"SCHEMA\", \"title\", \"TEXT\"\n        )\n        == \"OK\"\n    )\n\n    for node in nodes:\n        await wait_for_ft_index_creation(node.client, \"idx\")\n\n    cclient = instances[0].cluster_client()\n\n    docs_num = 100\n    for i in range(0, docs_num):\n        assert await cclient.execute_command(\"HSET\", f\"s{i}\", \"title\", f\"test {i}\") == 1\n\n    async def search_test():\n        res = await nodes[0].client.execute_command(\n            \"FT.SEARCH\", \"idx\", \"@title:test\", \"text\", \"LIMIT\", \"0\", \"1000\"\n        )\n        assert res[0] == docs_num\n        for i in range(0, docs_num):\n            assert f\"s{i}\" in res\n\n    await asyncio.gather(*(search_test() for _ in range(docs_num)))\n\n\n@dfly_args({\"proactor_threads\": 4, \"cluster_mode\": \"yes\", \"cluster_search\": \"yes\"})\nasync def test_SortedSearchRequest(df_factory: DflyInstanceFactory):\n    \"\"\"\n    Create cluster of 3 nodes.\n    Execute Search request with sorting on indexed field.\n    \"\"\"\n\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            vmodule=\"coordinator=2,search_family=3,protocol_client=3\",\n        )\n        for i in range(3)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 5259)]\n    nodes[1].slots = [(5260, 10519)]\n    nodes[2].slots = [(10520, 16383)]\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    assert (\n        await nodes[0].client.execute_command(\n            \"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"SCHEMA\", \"title\", \"TEXT\", \"size\", \"NUMERIC\"\n        )\n        == \"OK\"\n    )\n\n    for node in nodes:\n        await wait_for_ft_index_creation(node.client, \"idx\")\n\n    cclient = instances[0].cluster_client()\n\n    docs_num = 100\n    for i in range(0, docs_num):\n        assert (\n            await cclient.execute_command(\"HSET\", f\"s{i}\", \"title\", f\"test {i}\", \"size\", f\"{i}\")\n            == 2\n        )\n\n    async def search_test():\n        limit_size = random.randint(1, docs_num // 2)\n        offset = random.randint(0, docs_num // 2)\n        res = await nodes[0].client.execute_command(\n            \"FT.SEARCH\",\n            \"idx\",\n            \"@title:test\",\n            \"text\",\n            \"SORTBY\",\n            \"size\",\n            \"ASC\",\n            \"LIMIT\",\n            f\"{offset}\",\n            f\"{limit_size}\",\n        )\n        assert res[0] == docs_num\n        for i in range(offset, offset + limit_size):\n            assert f\"s{i}\" in res, f\"offset: {offset}, limit_size: {limit_size}, res: {res}\"\n\n        for i in range(0, offset):\n            assert f\"s{i}\" not in res\n\n        for i in range(offset + limit_size, docs_num):\n            assert f\"s{i}\" not in res\n\n    await asyncio.gather(*(search_test() for _ in range(2)))\n\n\nasync def verify_keys_match_number_of_index_docs(client, expected_num_keys):\n    # Get number of docs in index\n    index_info = await client.execute_command(f\"FT.INFO idx\")\n    index_info_num_docs = index_info[9]\n\n    # Get number of keys in database\n    keyspace_info = await client.info(\"keyspace\")\n    keyspace_keys = keyspace_info[\"db0\"][\"keys\"]\n\n    assert index_info_num_docs == keyspace_keys\n    assert index_info_num_docs == expected_num_keys\n    assert keyspace_keys == expected_num_keys\n\n\n@dfly_args({\"proactor_threads\": 2, \"cluster_mode\": \"yes\", \"cluster_search\": \"yes\"})\nasync def test_remove_docs_on_cluster_migration(df_factory):\n    instances = [\n        df_factory.create(port=next(next_port), admin_port=next(next_port)) for i in range(2)\n    ]\n\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    # Create index on both nodes\n    await nodes[0].client.execute_command(\n        \"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"v\", \"TEXT\"\n    )\n\n    # Populate node 0\n    keys = 100\n    for i in range(0, keys):\n        random_string = \"\".join(random.choices(string.ascii_letters + string.digits, k=1_000))\n        await nodes[0].client.execute_command(\"HSET\", f\"doc:{i}\", \"v\", random_string)\n\n    # Verify on node 0 that keys are added and index is populated\n    await verify_keys_match_number_of_index_docs(nodes[0].client, keys)\n\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", instances[1].port, [(0, 16383)], nodes[1].id)\n    )\n    logging.debug(\"Start migration\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\")\n\n    nodes[0].migrations = []\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n    logging.debug(\"finalize migration\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await asyncio.sleep(1)\n\n    # Verify on node 1 that keys are moved and index is populated\n    await verify_keys_match_number_of_index_docs(nodes[1].client, keys)\n\n    # Verify that node 0 doesn't have any keys and no index docs\n    await verify_keys_match_number_of_index_docs(nodes[0].client, 0)\n\n\n@pytest.mark.large\n@pytest.mark.exclude_epoll\n@pytest.mark.opt_only\n@dfly_args({\"cluster_mode\": \"yes\"})\nasync def test_cluster_migration_with_tiering(df_factory):\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            proactor_threads=2,\n            tiered_prefix=\"/tmp/tiered/cluster_node\",\n            tiered_offload_threshold=\"0.2\",\n            maxmemory=\"512MB\",\n        ),\n        df_factory.create(\n            port=next(next_port), admin_port=next(next_port), proactor_threads=2, maxmemory=\"1024MB\"\n        ),\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    keys = 1000000\n    await nodes[0].client.execute_command(f\"DEBUG POPULATE {keys} size 440\")\n\n    await asyncio.sleep(5)  # wait for tiering to offload data\n\n    # We need to wait for some tiered entries to verify migration works with tiering.\n    async for info, breaker in info_tick_timer(nodes[0].client, section=\"TIERED\"):\n        with breaker:\n            logging.info(f\"Tiered entries: {info['tiered_entries']}\")\n            assert info[\"tiered_entries\"] >= 10_000\n\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", instances[1].port, [(0, 16383)], nodes[1].id)\n    )\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\", 300)\n\n    nodes[0].migrations = []\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n    logging.debug(\"finalize migration\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    info = await nodes[1].client.info(\"keyspace\")\n    assert info[\"db0\"][\"keys\"] == keys\n\n    async for info, breaker in info_tick_timer(nodes[0].client, section=\"TIERED\"):\n        with breaker:\n            assert info[\"tiered_entries\"] == 0\n\n    await asyncio.sleep(5)  # wait for tiered deletions to finish\n\n    info = await nodes[0].client.info(\"keyspace\")\n    assert info[\"db0\"][\"keys\"] == 0\n\n\n@pytest.mark.large\n@pytest.mark.exclude_epoll\n@pytest.mark.opt_only\n@dfly_args({\"cluster_mode\": \"yes\"})\nasync def test_cluster_migration_with_tiering_and_deletes(df_factory: DflyInstanceFactory):\n    instances = [\n        df_factory.create(\n            port=next(next_port),\n            admin_port=next(next_port),\n            proactor_threads=2,\n            tiered_prefix=\"/tmp/tiered/cluster_node\",\n            tiered_offload_threshold=\"0.2\",\n            maxmemory=\"512MB\",\n        ),\n        df_factory.create(\n            port=next(next_port), admin_port=next(next_port), proactor_threads=2, maxmemory=\"1024MB\"\n        ),\n    ]\n    df_factory.start_all(instances)\n\n    nodes = [(await create_node_info(instance)) for instance in instances]\n    nodes[0].slots = [(0, 16383)]\n    nodes[1].slots = []\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    keys = 1000000\n    await nodes[0].client.execute_command(f\"DEBUG POPULATE {keys} key 440\")\n\n    # Expect that number of added keys is 1000000\n    info = await nodes[0].client.info(\"keyspace\")\n    assert info[\"db0\"][\"keys\"] == keys\n\n    # Wait for some data to be offloaded to tiered storage\n    await asyncio.sleep(10)\n\n    # Wait for sufficient tiered entries\n    async for info, breaker in info_tick_timer(nodes[0].client, section=\"TIERED\"):\n        with breaker:\n            tiered_entries = info[\"tiered_entries\"]\n            assert tiered_entries >= 50_000\n\n    nodes[0].migrations.append(\n        MigrationInfo(\"127.0.0.1\", instances[1].port, [(0, 16383)], nodes[1].id)\n    )\n\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    # Delete 50k keys during migration to create mutations and verify that they are applied correctly\n    delete_expected_num = 50_000\n    delete_succeded = 0\n\n    # Indicator that migration is done and we can stop deleting keys\n    migration_done = False\n\n    async def delete_job():\n        nonlocal delete_succeded\n        for i in range(delete_expected_num):\n            if migration_done:\n                break\n            try:\n                await nodes[0].client.delete(f\"key:{i}\")\n                delete_succeded += 1\n            except Exception as e:\n                pass\n\n    delete_task = asyncio.create_task(delete_job())\n\n    await wait_for_status(nodes[0].admin_client, nodes[1].id, \"FINISHED\", 300)\n    migration_done = True\n\n    await delete_task\n\n    nodes[0].migrations = []\n    nodes[0].slots = []\n    nodes[1].slots = [(0, 16383)]\n    logging.debug(\"finalize migration\")\n    await push_config(json.dumps(generate_config(nodes)), [node.admin_client for node in nodes])\n\n    async for info, breaker in info_tick_timer(nodes[0].client, section=\"TIERED\"):\n        with breaker:\n            assert info[\"tiered_entries\"] == 0\n\n    await asyncio.sleep(5)  # wait for tiered deletions to finish\n\n    info = await nodes[0].client.info(\"keyspace\")\n    assert info[\"db0\"][\"keys\"] == 0\n\n    # Verify that mutations are applied on the target node after migration\n    info = await nodes[1].client.info(\"keyspace\")\n    assert info[\"db0\"][\"keys\"] == keys - delete_succeded\n\n\n@dfly_args(\n    {\n        \"proactor_threads\": 1,\n        \"cluster_mode\": \"yes\",\n        \"cluster_node_id\": \"0\" * 40,\n    }\n)\nasync def test_cluster_config_slot_overflow_doesnt_crash(df_factory: DflyInstanceFactory):\n    instance = df_factory.create(port=next(next_port))\n    df_factory.start_all([instance])\n    client = instance.client()\n    node_id = \"0\" * 40\n\n    # Build invalid config JSON manually - 1E383 is a valid JSON number but overflows uint16_t.\n    # We must NOT use json.dumps here because Python would reject 1e383 (infinity).\n    invalid_config = (\n        '[{\"slot_ranges\":[{\"start\":0,\"end\":8191}],'\n        '\"master\":{\"id\":\"' + node_id + '\",\"ip\":\"127.0.0.1\",\"port\":' + str(instance.port) + \"},\"\n        '\"replicas\":[]},'\n        '{\"slot_ranges\":[{\"start\":8192,\"end\":1E383}],'\n        '\"master\":{\"id\":\"' + \"1\" * 40 + '\",\"ip\":\"127.0.0.1\",\"port\":9999},'\n        '\"replicas\":[]}]'\n    )\n\n    pipe = client.pipeline(transaction=False)\n    pipe.execute_command(\"DFLYCLUSTER\", \"CONFIG\", invalid_config)\n    pipe.execute_command(\"CLUSTER\", \"MYID\")\n    results = await pipe.execute(raise_on_error=False)\n\n    # CONFIG must return an error (not crash), MYID must still work\n    assert isinstance(results[0], Exception)\n    assert results[1] == node_id\n"
  },
  {
    "path": "tests/dragonfly/config_test.py",
    "content": "import pytest\nimport redis\nfrom redis.asyncio import Redis as RedisClient\nfrom .utility import *\nfrom .instance import DflyStartException\n\n\nasync def test_maxclients(df_factory):\n    # Needs some authentication\n    with df_factory.create(port=1111, maxclients=1, admin_port=1112) as server:\n        async with server.client() as client1:\n            assert [\"maxclients\", \"1\"] == await client1.execute_command(\"CONFIG GET maxclients\")\n\n            with pytest.raises(redis.exceptions.ConnectionError):\n                async with server.client() as client2:\n                    await client2.get(\"test\")\n\n            # Check that admin connections are not limited.\n            async with RedisClient(port=server.admin_port) as admin_client:\n                await admin_client.get(\"test\")\n\n            await client1.execute_command(\"CONFIG SET maxclients 3\")\n            assert [\"maxclients\", \"3\"] == await client1.execute_command(\"CONFIG GET maxclients\")\n            async with server.client() as client2:\n                await client2.get(\"test\")\n"
  },
  {
    "path": "tests/dragonfly/conftest.py",
    "content": "\"\"\"\nPytest fixtures to be provided for all tests without import\n\"\"\"\n\nimport asyncio\nimport logging\nimport os\nimport random\nimport shutil\nimport subprocess\nimport sys\nimport time\nimport typing\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import gettempdir, mkdtemp\nfrom time import sleep\nfrom typing import Dict, List, Union\n\nimport pymemcache\nimport pytest\nimport pytest_asyncio\nimport redis\nfrom redis import asyncio as aioredis\n\nfrom . import PortPicker\nfrom .instance import DflyInstance, DflyParams, DflyInstanceFactory, RedisServer\nfrom .utility import DflySeederFactory, gen_ca_cert, gen_certificate, skip_if_not_in_github\n\nlogging.getLogger(\"asyncio\").setLevel(logging.WARNING)\n# Suppress \"Unclosed ClusterNode\" warnings from redis-py topology refreshes (not actionable)\nlogging.getLogger(\"asyncio\").addFilter(lambda r: \"Unclosed ClusterNode\" not in r.getMessage())\n\nDATABASE_INDEX = 0\nBASE_LOG_DIR = \"/tmp/dragonfly_logs/\"\nFAILED_PATH = \"/tmp/failed/\"\nLAST_LOGS = \"/tmp/last_test_log_dir.txt\"\n\n\ndef _download_minio_binary(dest: Path):\n    \"\"\"Download MinIO binary to dest if not already cached.\n\n    Downloads to a temporary file first, then renames atomically to avoid\n    leaving a corrupt binary on interrupted downloads.\n    \"\"\"\n    import platform\n    import urllib.request\n\n    system = platform.system().lower()\n    arch = platform.machine()\n    arch_map = {\"x86_64\": \"amd64\", \"aarch64\": \"arm64\", \"arm64\": \"arm64\"}\n    arch = arch_map.get(arch, arch)\n    url = f\"https://dl.min.io/server/minio/release/{system}-{arch}/minio\"\n    logging.info(f\"Downloading MinIO binary from {url}\")\n    tmp_dest = dest.with_suffix(\".tmp\")\n    try:\n        urllib.request.urlretrieve(url, tmp_dest)\n        tmp_dest.chmod(0o755)\n        tmp_dest.rename(dest)\n    except Exception:\n        tmp_dest.unlink(missing_ok=True)\n        raise\n\n\ndef _start_minio_server(endpoint):\n    \"\"\"Start MinIO subprocess and configure env vars for S3 tests.\"\"\"\n    import boto3\n    from urllib.parse import urlparse\n\n    cache_dir = Path.home() / \".cache\" / \"dragonfly-tests\"\n    cache_dir.mkdir(parents=True, exist_ok=True)\n    minio_bin = cache_dir / \"minio\"\n\n    if not minio_bin.exists():\n        _download_minio_binary(minio_bin)\n\n    # Normalize scheme-less values (e.g. \"localhost:9000\") so urlparse\n    # correctly populates hostname/port instead of treating it as a path.\n    to_parse = endpoint if \"://\" in endpoint else \"http://\" + endpoint\n    parsed = urlparse(to_parse)\n    address = f\":{parsed.port or 9000}\"\n    endpoint = f\"{parsed.scheme}://{parsed.hostname}:{parsed.port or 9000}\"\n\n    data_dir = Path(mkdtemp(prefix=\"minio_data_\"))\n    minio_log = data_dir / \"minio.log\"\n    log_file = open(minio_log, \"w\")\n    proc = subprocess.Popen(\n        [str(minio_bin), \"server\", str(data_dir), \"--address\", address],\n        env={**os.environ, \"MINIO_ROOT_USER\": \"minioadmin\", \"MINIO_ROOT_PASSWORD\": \"minioadmin\"},\n        stdout=log_file,\n        stderr=subprocess.STDOUT,\n    )\n\n    bucket = \"dragonfly-test\"\n    try:\n        s3 = boto3.client(\n            \"s3\",\n            endpoint_url=endpoint,\n            aws_access_key_id=\"minioadmin\",\n            aws_secret_access_key=\"minioadmin\",\n            region_name=\"us-east-1\",\n        )\n\n        for attempt in range(30):\n            try:\n                s3.create_bucket(Bucket=bucket)\n                break\n            except Exception:\n                if proc.poll() is not None:\n                    logs = minio_log.read_text()\n                    raise RuntimeError(\n                        f\"MinIO process exited with code {proc.returncode}.\\nLogs:\\n{logs}\"\n                    )\n                time.sleep(1)\n        else:\n            logs = minio_log.read_text()\n            raise RuntimeError(f\"MinIO did not become ready in time.\\nLogs:\\n{logs}\")\n    except Exception:\n        proc.terminate()\n        log_file.close()\n        shutil.rmtree(data_dir, ignore_errors=True)\n        raise\n\n    log_file.close()\n    os.environ[\"DRAGONFLY_S3_BUCKET\"] = bucket\n    os.environ[\"AWS_ACCESS_KEY_ID\"] = \"minioadmin\"\n    os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"minioadmin\"\n    os.environ[\"AWS_ENDPOINT_URL\"] = endpoint\n    # Remove any existing session token (e.g. from OIDC auth) as MinIO doesn't support it\n    os.environ.pop(\"AWS_SESSION_TOKEN\", None)\n\n    return proc, data_dir\n\n\n_minio_proc = None\n_minio_data_dir = None\n\n\n# runs on pytest start\ndef pytest_configure(config):\n    global _minio_proc, _minio_data_dir\n\n    # clean everything\n    if os.path.exists(FAILED_PATH):\n        shutil.rmtree(FAILED_PATH)\n    if os.path.exists(BASE_LOG_DIR):\n        shutil.rmtree(BASE_LOG_DIR)\n\n    # Start MinIO if MINIO_S3_ENDPOINT is set (must happen before test collection\n    # so that @pytest.mark.skipif checking DRAGONFLY_S3_BUCKET sees it)\n    endpoint = os.environ.get(\"MINIO_S3_ENDPOINT\")\n    if endpoint:\n        _minio_proc, _minio_data_dir = _start_minio_server(endpoint)\n\n\ndef pytest_unconfigure(config):\n    global _minio_proc, _minio_data_dir\n\n    if _minio_proc is not None:\n        _minio_proc.terminate()\n        try:\n            _minio_proc.wait(timeout=10)\n        except subprocess.TimeoutExpired:\n            _minio_proc.kill()\n            _minio_proc.wait()\n        _minio_proc = None\n\n    if _minio_data_dir is not None:\n        shutil.rmtree(_minio_data_dir, ignore_errors=True)\n        _minio_data_dir = None\n\n\n@pytest.fixture(scope=\"class\")\ndef df_log_dir(request):\n    \"\"\"\n    Fixture to provide a log directory for the test class.\n    This directory will be created before each test class and cleaned up after.\n    \"\"\"\n    # Generate a unique directory name for the test class based on its nodeid\n    translator = str.maketrans(\":[]{}/ \", \"_______\", \"\\\"*'\")\n    unique_dir = request.node.name.translate(translator)\n    log_dir = os.path.join(BASE_LOG_DIR, unique_dir)\n\n    if os.path.exists(log_dir):\n        shutil.rmtree(log_dir)\n    os.makedirs(log_dir)\n\n    # needs for action.yml to get logs if timedout is happen for test\n    last_logs = open(LAST_LOGS, \"w\")\n    last_logs.write(log_dir)\n    last_logs.close()\n\n    return log_dir\n\n\ndef determine_scope(fixture_name, config):\n    drop_data_after_each_test = config.getoption(\"--drop-data-after-each-test\", False)\n    if drop_data_after_each_test:\n        return \"class\"\n    return \"session\"\n\n\n@pytest.fixture(scope=determine_scope)\ndef tmp_dir():\n    \"\"\"\n    Pytest fixture to provide the test temporary directory for the session\n    where the Dragonfly executable will be run and where all test data\n    should be stored. The directory will be cleaned up at the end of a session\n    \"\"\"\n    tmp_name = mkdtemp()\n    yield Path(tmp_name)\n    if os.environ.get(\"DRAGONFLY_KEEP_TMP\"):\n        logging.info(f\"Keeping tmp dir {tmp_name}\")\n        return\n    shutil.rmtree(tmp_name, ignore_errors=True)\n\n\n@pytest.fixture(scope=determine_scope)\ndef test_env(tmp_dir: Path):\n    \"\"\"\n    Provide the environment the Dragonfly executable is running in as a\n    python dictionary\n    \"\"\"\n    env = os.environ.copy()\n    env[\"DRAGONFLY_TMP\"] = str(tmp_dir)\n    return env\n\n\n@pytest.fixture(scope=\"class\", params=[{}])\ndef df_seeder_factory(request) -> DflySeederFactory:\n    seed = request.config.getoption(\"--rand-seed\")\n    if seed is None:\n        seed = random.randrange(sys.maxsize)\n\n    random.seed(int(seed))\n    logging.debug(f\"Random seed: {seed}, check: {random.randrange(100)}\")\n\n    return DflySeederFactory(request.config.getoption(\"--log-seeder\"))\n\n\ndef parse_args(args: List[str]) -> Dict[str, Union[str, None]]:\n    args_dict = {}\n    for arg in args:\n        if \"=\" in arg:\n            pos = arg.find(\"=\")\n            name, value = arg[:pos], arg[pos + 1 :]\n            args_dict[name] = value\n        else:\n            args_dict[arg] = None\n    return args_dict\n\n\n@pytest_asyncio.fixture(scope=\"class\")\ndef event_loop():\n    loop = asyncio.new_event_loop()\n    yield loop\n    loop.close()\n\n\n@pytest_asyncio.fixture(scope=\"class\", params=[{}])\nasync def df_factory(\n    request,\n    tmp_dir,\n    test_env,\n    df_log_dir,\n) -> typing.AsyncGenerator[DflyInstanceFactory, None]:\n    \"\"\"\n    Create an instance factory with supplied params.\n    \"\"\"\n    os.makedirs(os.path.join(gettempdir(), \"tiered\"), exist_ok=True)\n    scripts_dir = os.path.dirname(os.path.abspath(__file__))\n    path = os.environ.get(\"DRAGONFLY_PATH\", os.path.join(scripts_dir, \"../../build-dbg/dragonfly\"))\n\n    args = request.param if request.param else {}\n    existing = request.config.getoption(\"--existing-port\")\n    existing_admin = request.config.getoption(\"--existing-admin-port\")\n    existing_mc = request.config.getoption(\"--existing-mc-port\")\n    params = DflyParams(\n        path=path,\n        cwd=tmp_dir,\n        gdb=request.config.getoption(\"--gdb\"),\n        direct_output=request.config.getoption(\"--direct-out\"),\n        buffered_out=request.config.getoption(\"--buffered-output\"),\n        args=parse_args(request.config.getoption(\"--df\")),\n        existing_port=int(existing) if existing else None,\n        existing_admin_port=int(existing_admin) if existing_admin else None,\n        existing_mc_port=int(existing_mc) if existing_mc else None,\n        env=test_env,\n        log_dir=df_log_dir,\n    )\n\n    factory = DflyInstanceFactory(params, args)\n    yield factory\n    await factory.stop_all()\n\n\n@pytest.fixture(scope=\"class\")\ndef df_server(df_factory: DflyInstanceFactory) -> typing.Generator[DflyInstance, None, None]:\n    \"\"\"\n    Start the default Dragonfly server that will be used for the default pools\n    and clients.\n    \"\"\"\n    instance = df_factory.create()\n    instance.start()\n\n    yield instance\n    clients_left = None\n    try:\n        client = redis.Redis(port=instance.port)\n        client.client_setname(\"mgr\")\n        sleep(0.1)\n        clients_left = [x for x in client.client_list() if x[\"name\"] != \"mgr\"]\n\n        # Graceful shutdown, and avoid saving on shutdown if possible\n        try:\n            if instance.proc:\n                client.shutdown(nosave=True)\n        except Exception:\n            pass\n    except Exception as e:\n        print(e, file=sys.stderr)\n\n    instance.stop()\n\n    # TODO: Investigate spurious open connection with cluster client\n    # if not instance['cluster_mode']:\n    # TODO: Investigate adding fine grain control over the pool by\n    # by adding a cache ontop of the clients connection pool and then evict\n    # properly with client.connection_pool.disconnect() avoiding non synced\n    # side effects\n    # assert clients_left == []\n    # else:\n    #    print(\"Cluster clients left: \", len(clients_left))\n\n    if instance[\"cluster_mode\"]:\n        print(\"Cluster clients left: \", len(clients_left))\n\n\n@pytest.fixture(scope=\"function\")\ndef cluster_client(df_server):\n    \"\"\"\n    Return a cluster client to the default instance with all entries flushed.\n    \"\"\"\n    client = redis.RedisCluster(decode_responses=True, host=\"localhost\", port=df_server.port)\n    client.client_setname(\"default-cluster-fixture\")\n    client.flushall()\n\n    yield client\n    client.disconnect_connection_pools()\n\n\n@pytest_asyncio.fixture(scope=\"function\")\nasync def async_pool(df_server: DflyInstance):\n    pool = aioredis.ConnectionPool(\n        host=\"localhost\",\n        port=df_server.port,\n        db=DATABASE_INDEX,\n        decode_responses=True,\n        max_connections=32,\n    )\n    yield pool\n    await pool.disconnect(inuse_connections=True)\n\n\n@pytest_asyncio.fixture(scope=\"function\")\nasync def async_client(async_pool):\n    \"\"\"\n    Return an async client to the default instance with all entries flushed.\n    \"\"\"\n    client = aioredis.Redis(connection_pool=async_pool)\n    await client.client_setname(\"default-async-fixture\")\n    await client.flushall()\n    await client.select(DATABASE_INDEX)\n    yield client\n\n\ndef pytest_addoption(parser):\n    parser.addoption(\"--gdb\", action=\"store_true\", default=False, help=\"Run instances in gdb\")\n    parser.addoption(\"--df\", action=\"append\", default=[], help=\"Add arguments to dragonfly\")\n    parser.addoption(\n        \"--buffered-output\",\n        action=\"store_true\",\n        default=False,\n        help=\"Makes instance output buffered, grouping it together\",\n    )\n    parser.addoption(\n        \"--log-seeder\", action=\"store\", default=None, help=\"Store last generator commands in file\"\n    )\n    parser.addoption(\n        \"--rand-seed\",\n        action=\"store\",\n        default=None,\n        help=\"Set seed for global random. Makes seeder predictable\",\n    )\n    parser.addoption(\n        \"--existing-port\",\n        action=\"store\",\n        default=None,\n        help=\"Provide a port to the existing process for the test\",\n    )\n    parser.addoption(\n        \"--existing-admin-port\",\n        action=\"store\",\n        default=None,\n        help=\"Provide an admin port to the existing process for the test\",\n    )\n    parser.addoption(\n        \"--existing-mc-port\",\n        action=\"store\",\n        default=None,\n        help=\"Provide a port to the existing memcached process for the test\",\n    )\n    parser.addoption(\n        \"--direct-out\",\n        action=\"store_true\",\n        default=False,\n        help=\"If true, does not post process dragonfly output\",\n    )\n\n    parser.addoption(\"--repeat\", action=\"store\", help=\"Number of times to repeat each test\")\n    parser.addoption(\n        \"--drop-data-after-each-test\",\n        action=\"store_true\",\n        default=False,\n        help=\"Remove test data after each test, instead of after each session, \"\n        \"useful when running tests on repeat to avoid filling up disk\",\n    )\n\n\ndef pytest_generate_tests(metafunc):\n    if metafunc.config.option.repeat is not None:\n        count = int(metafunc.config.option.repeat)\n\n        # We're going to duplicate these tests by parametrizing them,\n        # which requires that each test has a fixture to accept the parameter.\n        # We can add a new fixture like so:\n        metafunc.fixturenames.append(\"tmp_ct\")\n\n        # Now we parametrize. This is what happens when we do e.g.,\n        # @pytest.mark.parametrize('tmp_ct', range(count))\n        # def test_foo(): pass\n        metafunc.parametrize(\"tmp_ct\", range(count))\n\n\n@pytest.fixture(scope=\"session\")\ndef port_picker():\n    yield PortPicker()\n\n\n@pytest.fixture(scope=\"function\")\ndef memcached_client(df_server: DflyInstance):\n    client = pymemcache.Client(f\"127.0.0.1:{df_server.mc_port}\", default_noreply=False)\n\n    yield client\n\n    client.flush_all()  # clean up after test\n    client.quit()\n\n\n@pytest.fixture(scope=\"session\")\ndef with_tls_ca_cert_args(tmp_dir):\n    ca_key = os.path.join(tmp_dir, \"ca-key.pem\")\n    ca_cert = os.path.join(tmp_dir, \"ca-cert.pem\")\n    gen_ca_cert(ca_key, ca_cert)\n    return {\"ca_key\": ca_key, \"ca_cert\": ca_cert}\n\n\n@pytest.fixture(scope=\"session\")\ndef with_tls_server_args(tmp_dir, with_tls_ca_cert_args):\n    tls_server_key = os.path.join(tmp_dir, \"df-key.pem\")\n    tls_server_req = os.path.join(tmp_dir, \"df-req.pem\")\n    tls_server_cert = os.path.join(tmp_dir, \"df-cert.pem\")\n\n    gen_certificate(\n        with_tls_ca_cert_args[\"ca_key\"],\n        with_tls_ca_cert_args[\"ca_cert\"],\n        tls_server_req,\n        tls_server_key,\n        tls_server_cert,\n    )\n\n    args = {\"tls\": None, \"tls_key_file\": tls_server_key, \"tls_cert_file\": tls_server_cert}\n    return args\n\n\n@pytest.fixture(scope=\"session\")\ndef with_ca_tls_server_args(with_tls_server_args, with_tls_ca_cert_args):\n    args = deepcopy(with_tls_server_args)\n    args[\"tls_ca_cert_file\"] = with_tls_ca_cert_args[\"ca_cert\"]\n    return args\n\n\n@pytest.fixture(scope=\"session\")\ndef with_ca_dir_tls_server_args(with_tls_server_args, with_tls_ca_cert_args):\n    args = deepcopy(with_tls_server_args)\n    ca_cert = with_tls_ca_cert_args[\"ca_cert\"]\n    ca_dir = os.path.dirname(ca_cert)\n    # We need this because any program that uses OpenSSL requires directories to be set up like this\n    # in order to find the certificates. This command, creates the necessary symlinks to the files\n    # such that they can be consumed by OpenSSL when loaded from the directory.\n    # For more info see: https://www.openssl.org/docs/man3.0/man1/c_rehash.html\n    command = f\"c_rehash {ca_dir}\"\n    subprocess.run(command, shell=True)\n    args[\"tls_ca_cert_dir\"] = ca_dir\n    return args, ca_cert\n\n\n@pytest.fixture(scope=\"session\")\ndef with_tls_client_args(tmp_dir, with_tls_ca_cert_args):\n    tls_client_key = os.path.join(tmp_dir, \"client-key.pem\")\n    tls_client_req = os.path.join(tmp_dir, \"client-req.pem\")\n    tls_client_cert = os.path.join(tmp_dir, \"client-cert.pem\")\n\n    gen_certificate(\n        with_tls_ca_cert_args[\"ca_key\"],\n        with_tls_ca_cert_args[\"ca_cert\"],\n        tls_client_req,\n        tls_client_key,\n        tls_client_cert,\n    )\n\n    args = {\"ssl\": True, \"ssl_keyfile\": tls_client_key, \"ssl_certfile\": tls_client_cert}\n    return args\n\n\n@pytest.fixture(scope=\"session\")\ndef with_ca_tls_client_args(with_tls_client_args, with_tls_ca_cert_args):\n    args = deepcopy(with_tls_client_args)\n    args[\"ssl_ca_certs\"] = with_tls_ca_cert_args[\"ca_cert\"]\n    return args\n\n\ndef copy_failed_logs(log_dir, report):\n    assert log_dir\n    test_failed_path = os.path.join(FAILED_PATH, os.path.basename(log_dir))\n    if not os.path.exists(test_failed_path):\n        os.makedirs(test_failed_path)\n\n    logging.error(f\"Test failed {report.nodeid} with logs: \")\n\n    for f in os.listdir(log_dir):\n        file = os.path.join(log_dir, f)\n        if os.path.isfile(file):\n            file = file.rstrip(\"\\n\")\n            logging.error(f\"🪵🪵🪵🪵🪵🪵 {file} 🪵🪵🪵🪵🪵🪵\")\n            shutil.copy(file, test_failed_path)\n\n    # Clean up\n    try:\n        os.remove(LAST_LOGS)\n    except OSError:\n        pass\n\n\n# tests results we get on the \"call\" state\n# but we can not copy logs until \"teardown\" state because the server isn't stoped\n# so we save result of the \"call\" state and process it on the \"teardown\" when the server is stoped\n@pytest.hookimpl(hookwrapper=True)\ndef pytest_runtest_makereport(item, call):\n    outcome = yield\n    report = outcome.get_result()\n\n    if report.when == \"call\":\n        # Store the result of the call phase in the item\n        item.call_outcome = report\n\n    if report.when == \"teardown\":\n        call_outcome = getattr(item, \"call_outcome\", None)\n        log_dir = item.funcargs.get(\"df_log_dir\")\n        if log_dir:\n            if report.failed:\n                copy_failed_logs(log_dir, report)\n            if call_outcome and call_outcome.failed:\n                copy_failed_logs(log_dir, call_outcome)\n\n\n@pytest.fixture(scope=\"function\")\ndef redis_server(port_picker) -> RedisServer:\n    s = RedisServer(port_picker.get_available_port())\n    try:\n        s.start()\n    except FileNotFoundError as e:\n        skip_if_not_in_github()\n        raise\n    time.sleep(1)\n    yield s\n    s.stop()\n\n\n@pytest.fixture(scope=\"function\")\ndef redis_local_server(port_picker) -> RedisServer:\n    s = RedisServer(port_picker.get_available_port())\n    time.sleep(1)\n    yield s\n    s.stop()\n"
  },
  {
    "path": "tests/dragonfly/connection_test.py",
    "content": "import asyncio\nimport logging\nimport random\nimport socket\nimport ssl\nimport string\nimport time\nfrom dataclasses import dataclass\nfrom threading import Thread\n\nimport async_timeout\nimport pytest\nimport redis as base_redis\nfrom redis import asyncio as aioredis\nfrom redis.cache import CacheConfig\nfrom redis.backoff import NoBackoff\nfrom redis.retry import Retry\nfrom redis.exceptions import ConnectionError, ResponseError\n\nfrom . import dfly_args\nfrom .instance import DflyInstance, DflyInstanceFactory\nfrom .utility import tick_timer, assert_eventually\n\nBASE_PORT = 1111\n\n\n@dataclass(frozen=True)\nclass CollectedRedisMsg:\n    cmd: str\n    src: str = \"tcp\"\n\n    @staticmethod\n    def all_from_src(*args, src=\"tcp\"):\n        return [CollectedRedisMsg(arg, src) for arg in args]\n\n\nclass CollectingMonitor:\n    \"\"\"Tracks all monitor messages between start() and stop()\"\"\"\n\n    def __init__(self, client):\n        self.client = client\n        self.messages = []\n        self._monitor_task = None\n\n    async def _monitor(self):\n        async with self.client.monitor() as monitor:\n            async for message in monitor.listen():\n                self.messages.append(CollectedRedisMsg(message[\"command\"], message[\"client_type\"]))\n\n    async def start(self):\n        if self._monitor_task is None:\n            self._monitor_task = asyncio.create_task(self._monitor())\n        await asyncio.sleep(0.1)\n\n    async def stop(self, timeout=0.1):\n        if self._monitor_task:\n            # Wait for Dragonfly to send all async monitor messages\n            await asyncio.sleep(timeout)\n            self._monitor_task.cancel()\n            try:\n                await self._monitor_task\n            except asyncio.CancelledError:\n                pass\n            self._monitor_task = None\n\n        def should_exclude(cmd: str):\n            cmd = cmd.upper()\n            return \"SELECT\" in cmd or \"CLIENT SETINFO\" in cmd\n\n        while len(self.messages) > 0 and should_exclude(self.messages[0].cmd):\n            self.messages = self.messages[1:]\n        return self.messages\n\n\n\"\"\"\nTest MONITOR command with basic use case\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_monitor_command(async_pool):\n    monitor = CollectingMonitor(aioredis.Redis(connection_pool=async_pool))\n    await monitor.start()\n\n    c = aioredis.Redis(connection_pool=async_pool)\n    await c.set(\"a\", 1)\n    await c.get(\"a\")\n    await c.lpush(\"l\", \"V\")\n    await c.lpop(\"l\")\n\n    collected = await monitor.stop()\n    expected = CollectedRedisMsg.all_from_src(\"SET a 1\", \"GET a\", \"LPUSH l V\", \"LPOP l\")\n\n    assert expected == collected\n\n\n\"\"\"\nTest MONITOR command with MULTI/EXEC transaction with squashing\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": 4, \"multi_exec_squash\": \"true\"})\nasync def test_monitor_command_multi(async_pool):\n    monitor = CollectingMonitor(aioredis.Redis(connection_pool=async_pool))\n    await monitor.start()\n\n    c = aioredis.Redis(connection_pool=async_pool)\n    p = c.pipeline(transaction=True)\n\n    expected = []\n    for i in range(100):\n        p.lpush(str(i), \"V\")\n        expected.append(f\"LPUSH {i} V\")\n\n    await p.execute()\n\n    collected = await monitor.stop(0.3)\n    expected = CollectedRedisMsg.all_from_src(*expected)\n\n    # The order is random due to squashing\n    assert set(expected) == set(collected[1:-1])\n\n\n\"\"\"\nTest MONITOR command preserves correct order for MULTI/EXEC sequences\nRegression test for https://github.com/dragonflydb/dragonfly/issues/5953\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_monitor_command_multi_exec_order(async_pool):\n    monitor = CollectingMonitor(aioredis.Redis(connection_pool=async_pool))\n    await monitor.start()\n\n    c = aioredis.Redis(connection_pool=async_pool)\n    p = c.pipeline(transaction=True)\n    p.ping()\n    p.set(\"key1\", \"value1\")\n    p.get(\"key1\")\n    await p.execute()\n\n    collected = await monitor.stop()\n\n    # Verify the commands appear in the correct order: MULTI, PING, SET, GET, EXEC\n    assert len(collected) == 5\n    assert \"MULTI\" in collected[0].cmd.upper()\n    assert \"PING\" in collected[1].cmd.upper()\n    assert \"SET\" in collected[2].cmd.upper()\n    assert \"GET\" in collected[3].cmd.upper()\n    assert \"EXEC\" in collected[4].cmd.upper()\n\n\n\"\"\"\nTest MONITOR command with lua script\nhttps://github.com/dragonflydb/dragonfly/issues/756\n\"\"\"\n\nTEST_MONITOR_SCRIPT = \"\"\"\n    redis.call('SET', 'A', 1)\n    redis.call('GET', 'A')\n    redis.call('SADD', 'S', 1, 2, 3)\n    redis.call('LPUSH', 'L', 1)\n    redis.call('LPOP', 'L')\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": 4, \"lua_auto_async\": \"false\"})\nasync def test_monitor_command_lua(async_pool):\n    monitor = CollectingMonitor(aioredis.Redis(connection_pool=async_pool))\n    await monitor.start()\n\n    c = aioredis.Redis(connection_pool=async_pool)\n    await c.eval(TEST_MONITOR_SCRIPT, 3, \"A\", \"S\", \"L\")\n\n    collected = await monitor.stop()\n    expected = CollectedRedisMsg.all_from_src(\n        \"SET A 1\", \"GET A\", \"SADD S 1 2 3\", \"LPUSH L 1\", \"LPOP L\", src=\"lua\"\n    )\n\n    assert expected == collected[1:]\n\n\n@dfly_args({\"proactor_threads\": 1})\nasync def test_monitor_multi_exec_close(df_server: DflyInstance):\n    async def monitor_multi_exec_close():\n        client = aioredis.Redis(port=df_server.port, single_connection_client=True)\n        try:\n            await client.execute_command(\"MULTI\")\n            await client.execute_command(\"MONITOR\")\n            await client.execute_command(\"MONITOR\")\n            await client.execute_command(\"SET\", \"a\", \"1\")\n            await client.execute_command(\"EXEC\")\n        except Exception:\n            pass\n        finally:\n            await client.close()\n\n    for _ in range(200):\n        await asyncio.gather(*[monitor_multi_exec_close() for _ in range(10)])\n\n    # If we get here, the server did not crash.\n    client = df_server.client()\n    assert await client.ping()\n\n\n\"\"\"\nRun test in pipeline mode.\nThis is mostly how this is done with python - its more like a transaction that\nthe connections is running all commands in its context\n\"\"\"\n\n\nasync def test_pipeline_support(async_client):\n    def generate(max):\n        for i in range(max):\n            yield f\"key{i}\", f\"value={i}\"\n\n    messages = {a: b for a, b in generate(5)}\n    assert await run_pipeline_mode(async_client, messages)\n\n\nasync def reader(channel: aioredis.client.PubSub, messages, max: int):\n    message_count = len(messages)\n    while message_count > 0:\n        try:\n            async with async_timeout.timeout(1):\n                message = await channel.get_message(ignore_subscribe_messages=True)\n                if message is not None:\n                    message_count = message_count - 1\n                    if message[\"data\"] not in messages:\n                        return False, f\"got unexpected message from pubsub - {message['data']}\"\n                await asyncio.sleep(0.01)\n        except asyncio.TimeoutError:\n            pass\n    return True, \"success\"\n\n\nasync def run_pipeline_mode(async_client: aioredis.Redis, messages):\n    pipe = async_client.pipeline(transaction=False)\n    for key, val in messages.items():\n        pipe.set(key, val)\n    result = await pipe.execute()\n\n    print(f\"got result from the pipeline of {result} with len = {len(result)}\")\n    if len(result) != len(messages):\n        return False, f\"number of results from pipe {len(result)} != expected {len(messages)}\"\n    elif False in result:\n        return False, \"expecting to successfully get all result good, but some failed\"\n    else:\n        return True, \"all command processed successfully\"\n\n\n\"\"\"\nTest the pipeline command\nOpen connection to the subscriber and publish on the other end messages\nMake sure that we are able to send all of them and that we are getting the\nexpected results on the subscriber side\n\"\"\"\n\n\nasync def test_pubsub_command(async_client):\n    def generate(max):\n        for i in range(max):\n            yield f\"message number {i}\"\n\n    messages = [a for a in generate(5)]\n    assert await run_pubsub(async_client, messages, \"channel-1\")\n\n\nasync def run_pubsub(async_client, messages, channel_name):\n    pubsub = async_client.pubsub()\n    await pubsub.subscribe(channel_name)\n\n    future = asyncio.create_task(reader(pubsub, messages, len(messages)))\n    success = True\n\n    for message in messages:\n        res = await async_client.publish(channel_name, message)\n        if not res:\n            success = False\n            break\n\n    await future\n    status, message = future.result()\n\n    await pubsub.close()\n    if status and success:\n        return True, \"successfully completed all\"\n    else:\n        return (\n            False,\n            f\"subscriber result: {status}: {message},  publisher publish: success {success}\",\n        )\n\n\nasync def run_multi_pubsub(async_client, messages, channel_name):\n    subs = [async_client.pubsub() for i in range(5)]\n    for s in subs:\n        await s.subscribe(channel_name)\n\n    tasks = [\n        asyncio.create_task(reader(s, messages, random.randint(0, len(messages)))) for s in subs\n    ]\n\n    success = True\n\n    for message in messages:\n        res = await async_client.publish(channel_name, message)\n        if not res:\n            success = False\n            break\n\n    for f in tasks:\n        await f\n    results = [f.result() for f in tasks]\n\n    for s in subs:\n        await s.close()\n    if success:\n        for status, message in results:\n            if not status:\n                return False, f\"failed to process {message}\"\n        return True, \"success\"\n    else:\n        return False, \"failed to publish\"\n\n\n\"\"\"\nTest with multiple subscribers for a channel\nWe want to stress this to see if we have any issue\nwith the pub sub code since we are \"sharing\" the message\nacross multiple connections internally\n\"\"\"\n\n\nasync def test_multi_pubsub(async_client):\n    def generate(max):\n        for i in range(max):\n            yield f\"this is message number {i} from the publisher on the channel\"\n\n    messages = [a for a in generate(500)]\n    state, message = await run_multi_pubsub(async_client, messages, \"my-channel\")\n\n    assert state, message\n\n\n\"\"\"\nTest PUBSUB NUMSUB command.\n\"\"\"\n\n\nasync def test_pubsub_subcommand_for_numsub(async_client: aioredis.Redis):\n    async def resub(s: \"aioredis.PubSub\", sub: bool, chan: str):\n        if sub:\n            await s.subscribe(chan)\n        else:\n            await s.unsubscribe(chan)\n        # Wait for PUSH message to be parsed to make sure upadte was performed\n        await s.get_message(timeout=0.1)\n\n    # Subscribe 5 times to chan1\n    subs1 = [async_client.pubsub() for i in range(5)]\n    await asyncio.gather(*(resub(s, True, \"chan1\") for s in subs1))\n    assert await async_client.pubsub_numsub(\"chan1\") == [(\"chan1\", 5)]\n\n    # Unsubscribe all from chan1\n    await asyncio.gather(*(resub(s, False, \"chan1\") for s in subs1))\n\n    # Make sure numsub drops to 0\n    async for numsub, breaker in tick_timer(lambda: async_client.pubsub_numsub(\"chan1\")):\n        with breaker:\n            assert numsub[0][1] == 0\n\n    # Check empty numsub\n    assert await async_client.pubsub_numsub() == []\n\n    subs2 = [async_client.pubsub() for i in range(5)]\n    await asyncio.gather(*(resub(s, True, \"chan2\") for s in subs2))\n\n    subs3 = [async_client.pubsub() for i in range(10)]\n    await asyncio.gather(*(resub(s, True, \"chan3\") for s in subs3))\n\n    assert await async_client.pubsub_numsub(\"chan2\", \"chan3\") == [(\"chan2\", 5), (\"chan3\", 10)]\n\n    await asyncio.gather(*(s.unsubscribe() for s in subs2 + subs3))\n\n\n\"\"\"\nTest that pubsub clients who are stuck on backpressure from a slow client (the one in the test doesn't read messages at all)\nwill eventually unblock when it disconnects.\n\"\"\"\n\n\n@pytest.mark.large\n@dfly_args({\"proactor_threads\": \"1\", \"publish_buffer_limit\": \"100\"})\nasync def test_publish_stuck(df_server: DflyInstance, async_client: aioredis.Redis):\n    reader, writer = await asyncio.open_connection(\"127.0.0.1\", df_server.port, limit=10)\n    writer.write(b\"SUBSCRIBE channel\\r\\n\")\n    await writer.drain()\n\n    async def pub_task():\n        payload = \"msg\" * 1000\n        p = async_client.pipeline()\n        for _ in range(1000):\n            p.publish(\"channel\", payload)\n        await p.execute()\n\n    publishers = [asyncio.create_task(pub_task()) for _ in range(20)]\n\n    await asyncio.sleep(5)\n\n    # Check we reached the limit\n    pub_bytes = int((await async_client.info())[\"dispatch_queue_subscriber_bytes\"])\n    assert pub_bytes >= 100\n\n    await asyncio.sleep(0.1)\n\n    # Make sure processing is stalled\n    new_pub_bytes = int((await async_client.info())[\"dispatch_queue_subscriber_bytes\"])\n    assert new_pub_bytes == pub_bytes\n\n    writer.write(b\"QUIT\\r\\n\")\n    await writer.drain()\n    writer.close()\n\n    # Make sure all publishers unblock eventually\n    for pub in asyncio.as_completed(publishers):\n        await pub\n\n\n@pytest.mark.large\n@dfly_args({\"proactor_threads\": \"4\"})\nasync def test_pubsub_busy_connections(df_server: DflyInstance):\n    sleep = 60\n\n    async def sub_thread():\n        i = 0\n\n        async def sub_task():\n            nonlocal i\n            sleep_task = asyncio.create_task(asyncio.sleep(sleep))\n            while not sleep_task.done():\n                client = df_server.client()\n                pubsub = client.pubsub()\n                await pubsub.subscribe(\"channel\")\n                # await pubsub.unsubscribe(\"channel\")\n                i = i + 1\n                await client.close()\n\n        subs = [asyncio.create_task(sub_task()) for _ in range(10)]\n        for s in subs:\n            await s\n        logging.debug(f\"Exiting thread after {i} subscriptions\")\n\n    async def pub_task():\n        pub = df_server.client()\n        i = 0\n        sleep_task = asyncio.create_task(asyncio.sleep(sleep))\n        while not sleep_task.done():\n            await pub.publish(\"channel\", f\"message-{i}\")\n            i = i + 1\n\n    def run_in_thread():\n        loop = asyncio.new_event_loop()\n        asyncio.set_event_loop(loop)\n        loop.run_until_complete(sub_thread())\n\n    threads = []\n    for _ in range(10):\n        thread = Thread(target=run_in_thread)\n        thread.start()\n        threads.append(thread)\n\n    await pub_task()\n\n    for thread in threads:\n        thread.join()\n\n\nasync def test_subscribers_with_active_publisher(df_server: DflyInstance, max_connections=100):\n    # TODO: I am not how to customize the max connections for the pool.\n    async_pool = aioredis.ConnectionPool(\n        host=\"localhost\",\n        port=df_server.port,\n        db=0,\n        decode_responses=True,\n        max_connections=max_connections,\n    )\n\n    async def publish_worker():\n        client = aioredis.Redis(connection_pool=async_pool)\n        for i in range(0, 2000):\n            await client.publish(\"channel\", f\"message-{i}\")\n        await client.aclose()\n\n    async def channel_reader(channel: aioredis.client.PubSub):\n        for i in range(0, 150):\n            try:\n                async with async_timeout.timeout(1):\n                    message = await channel.get_message(ignore_subscribe_messages=True)\n            except asyncio.TimeoutError:\n                break\n\n    async def subscribe_worker():\n        client = aioredis.Redis(connection_pool=async_pool)\n        pubsub = client.pubsub()\n        async with pubsub as p:\n            await pubsub.subscribe(\"channel\")\n            await channel_reader(pubsub)\n            await pubsub.unsubscribe(\"channel\")\n\n    # Create a publisher that sends constantly messages to the channel\n    # Then create subscribers that will subscribe to already active channel\n    pub_task = asyncio.create_task(publish_worker())\n    await asyncio.gather(*(subscribe_worker() for _ in range(max_connections - 10)))\n    await pub_task\n    await async_pool.disconnect()\n\n\n# This test ensures that no messages are sent after a successful\n# acknowledgement of a unsubscribe.\n# Low publish_buffer_limit makes publishers block on memory backpressure\n\n\n@dfly_args({\"publish_buffer_limit\": 100, \"proactor_threads\": 2})\nasync def test_pubsub_unsubscribe(df_server: DflyInstance):\n    long_message = \"a\" * 100_000\n    pub_sent = 0\n    pub_ready_ev = asyncio.Event()\n\n    async def publisher():\n        nonlocal pub_sent\n        async with df_server.client(single_connection_client=True) as c:\n            for _ in range(32):\n                await c.execute_command(\"PUBLISH\", \"chan\", long_message)\n                # Unblock subscriber after a sufficient amount of publish requests accumulated\n                pub_sent += 1\n                if pub_sent >= 16:\n                    pub_ready_ev.set()\n\n    # Get raw connection from the client and subscribe to chan\n    cl = df_server.client(single_connection_client=True)\n    await cl.ping()\n    conn = cl.connection\n    await conn.send_command(\"SUBSCRIBE chan\")\n\n    # Flood our only subscriber with large messages to make publishers stop\n    tasks = [asyncio.create_task(publisher()) for _ in range(16)]\n\n    # Unsubscribe in the process\n    await pub_ready_ev.wait()\n    await conn.send_command(\"UNSUBSCRIBE\")\n\n    # No messages should be received after we've read unsubscribe reply\n    had_unsub = False\n    while True:\n        reply = await conn.read_response(timeout=0.2)\n        if reply is None:\n            break\n\n        if reply[0] == \"unsubscribe\":\n            assert reply[2] == 0  # zero subscriptions left\n            had_unsub = True\n        else:\n            assert not had_unsub, \"found message even after all subscriptions were removed\"\n\n    assert had_unsub\n    await asyncio.gather(*tasks)\n    await cl.aclose()\n\n\nasync def produce_expiring_keys(async_client: aioredis.Redis):\n    keys = []\n    for i in range(10, 50):\n        keys.append(f\"k{i}\")\n        await async_client.set(keys[-1], \"X\", px=200 + i * 10)\n    return keys\n\n\nasync def collect_expiring_events(pclient, keys):\n    events = []\n    async for message in pclient.listen():\n        if message[\"type\"] == \"subscribe\":\n            continue\n\n        events.append(message)\n        if len(events) >= len(keys):\n            break\n    return events\n\n\n@dfly_args({\"notify_keyspace_events\": \"Ex\"})\nasync def test_keyspace_events(async_client: aioredis.Redis):\n    pclient = async_client.pubsub()\n    await pclient.subscribe(\"__keyevent@0__:expired\")\n\n    keys = await produce_expiring_keys(async_client)\n\n    # We don't support immediate expiration:\n    # keys += ['immediate']\n    # await async_client.set(keys[-1], 'Y', exat=123) # expired 50 years ago\n\n    events = await collect_expiring_events(pclient, keys)\n\n    assert set(ev[\"data\"] for ev in events) == set(keys)\n\n\nasync def test_keyspace_events_config_set(async_client: aioredis.Redis):\n    # nonsense does not make sense as argument, we only accept ex or empty string\n    with pytest.raises(ResponseError):\n        await async_client.config_set(\"notify_keyspace_events\", \"nonsense\")\n\n    await async_client.config_set(\"notify_keyspace_events\", \"ex\")\n    pclient = async_client.pubsub()\n    await pclient.subscribe(\"__keyevent@0__:expired\")\n\n    keys = await produce_expiring_keys(async_client)\n\n    events = await collect_expiring_events(pclient, keys)\n\n    assert set(ev[\"data\"] for ev in events) == set(keys)\n\n    keys = await produce_expiring_keys(async_client)\n    await async_client.config_set(\"notify_keyspace_events\", \"\")\n    with pytest.raises(asyncio.TimeoutError):\n        async with async_timeout.timeout(1):\n            await collect_expiring_events(pclient, keys)\n\n\n@dfly_args({\"max_busy_read_usec\": 50000})\nasync def test_reply_count(df_server: DflyInstance):\n    \"\"\"Make sure reply aggregations reduce reply counts for common cases\"\"\"\n\n    async def get_reply_count():\n        metrics = await df_server.metrics()\n        return int(metrics[\"dragonfly_reply\"].samples[0].value)\n\n    async def measure(aw):\n        before = await get_reply_count()\n        await aw\n        return await get_reply_count() - before\n\n    async_client = df_server.client()\n    await async_client.config_resetstat()\n    base = await get_reply_count()\n    info_diff = await get_reply_count() - base\n    assert info_diff == 0  # no commands yet\n\n    # Warm client buffer up\n    await async_client.lpush(\"warmup\", *(i for i in range(500)))\n    await async_client.lrange(\"warmup\", 0, -1)\n\n    # Integer list\n    await async_client.lpush(\"list-1\", *(i for i in range(100)))\n    assert await measure(async_client.lrange(\"list-1\", 0, -1)) == 1\n\n    # Integer set\n    await async_client.sadd(\"set-1\", *(i for i in range(100)))\n    assert await measure(async_client.smembers(\"set-1\")) <= 2\n\n    # Sorted sets\n    await async_client.zadd(\"zset-1\", mapping={str(i): i for i in range(50)})\n    assert await measure(async_client.zrange(\"zset-1\", 0, -1, withscores=True)) <= 2\n\n    # Exec call\n    e = async_client.pipeline(transaction=True)\n    for _ in range(100):\n        e.incr(\"num-1\")\n\n    # one - for MULTI-OK, one for the rest. Depends on the squashing efficiency,\n    # can be either 1 or 2 replies.\n    assert await measure(e.execute()) <= 2\n\n    # Just pipeline\n    p = async_client.pipeline(transaction=False)\n    for _ in range(100):\n        p.incr(\"num-1\")\n    assert await measure(p.execute()) <= 2\n\n    # Script result\n    assert await measure(async_client.eval('return {1,2,{3,4},5,6,7,8,\"nine\"}', 0)) == 1\n\n    # Search results\n    await async_client.execute_command(\"FT.CREATE i1 SCHEMA name text\")\n    for i in range(50):\n        await async_client.hset(f\"key-{i}\", \"name\", f\"name number {i}\")\n    assert await measure(async_client.ft(\"i1\").search(\"*\")) <= 2\n\n\nasync def test_big_command(df_server, size=8 * 1024):\n    reader, writer = await asyncio.open_connection(\"127.0.0.1\", df_server.port)\n\n    writer.write(f\"SET a {'v'*size}\\n\".encode())\n    await writer.drain()\n\n    assert \"OK\" in (await reader.readline()).decode()\n\n    writer.close()\n    await writer.wait_closed()\n\n\nasync def test_subscribe_pipelined(async_client: aioredis.Redis):\n    pipe = async_client.pipeline(transaction=False)\n    pipe.execute_command(\"subscribe channel\").execute_command(\"subscribe channel\")\n    await pipe.echo(\"bye bye\").execute()\n\n\nasync def test_subscribe_in_pipeline(async_client: aioredis.Redis):\n    pipe = async_client.pipeline(transaction=False)\n    pipe.echo(\"one\")\n    pipe.execute_command(\"SUBSCRIBE ch1\")\n    pipe.echo(\"two\")\n    pipe.execute_command(\"SUBSCRIBE ch2\")\n    pipe.echo(\"three\")\n    res = await pipe.execute()\n\n    assert res == [\"one\", [\"subscribe\", \"ch1\", 1], \"two\", [\"subscribe\", \"ch2\", 2], \"three\"]\n\n\nasync def test_send_delay_metric(df_server: DflyInstance):\n    client = df_server.client()\n    await client.client_setname(\"client1\")\n    blob = \"A\" * 1000\n    for j in range(10):\n        await client.set(f\"key-{j}\", blob)\n\n    await client.config_set(\"pipeline_queue_limit\", 100)\n    reader, writer = await asyncio.open_connection(\"localhost\", df_server.port)\n\n    async def send_data_noread():\n        for j in range(500000):\n            writer.write(f\"GET key-{j % 10}\\n\".encode())\n            await writer.drain()\n\n    t1 = asyncio.create_task(send_data_noread())\n\n    @assert_eventually\n    async def wait_for_large_delay():\n        info = await client.info(\"clients\")\n        assert int(info[\"send_delay_ms\"]) > 100\n\n    # Check that the delay metric indeed increases as we have a connection\n    # that is not reading the data.\n    await wait_for_large_delay()\n    t1.cancel()\n    writer.close()\n\n\nasync def test_match_http(df_server: DflyInstance):\n    client = df_server.client()\n    reader, writer = await asyncio.open_connection(\"localhost\", df_server.port)\n    for i in range(2000):\n        writer.write(f\"foo bar \".encode())\n        await writer.drain()\n\n\n\"\"\"\nThis test makes sure that Dragonfly can receive blocks of pipelined commands even\nwhile a script is still executing. This is a dangerous scenario because both the dispatch fiber\nand the connection fiber are actively using the context. What is more, the script execution injects\nits own custom reply builder, which can't be used anywhere else, besides the lua script itself.\n\"\"\"\n\nBUSY_SCRIPT = \"\"\"\nfor i=1,300 do\n    redis.call('MGET', 'k1', 'k2', 'k3')\nend\n\"\"\"\n\nPACKET1 = \"\"\"\nMGET s1 s2 s3\nEVALSHA {sha} 3 k1 k2 k3\n\"\"\"\n\nPACKET2 = \"\"\"\nMGET m1 m2 m3\nMGET m4 m5 m6\nMGET m7 m8 m9\\n\n\"\"\"\n\nPACKET3 = (\n    \"\"\"\nPING\n\"\"\"\n    * 500\n    + \"ECHO DONE\\n\"\n)\n\n\nasync def test_parser_while_script_running(async_client: aioredis.Redis, df_server: DflyInstance):\n    sha = await async_client.script_load(BUSY_SCRIPT)\n\n    # Use a raw tcp connection for strict control of sent commands\n    # Below we send commands while the previous ones didn't finish\n    reader, writer = await asyncio.open_connection(\"localhost\", df_server.port)\n\n    # Send first pipeline packet, last commands is a long executing script\n    writer.write(PACKET1.format(sha=sha).encode())\n    await writer.drain()\n\n    # Give the script some time to start running\n    await asyncio.sleep(0.01)\n\n    # Send another packet that will be received while the script is running\n    writer.write(PACKET2.encode())\n    # The last batch has to be big enough, so the script will finish before it is fully consumed\n    writer.write(PACKET3.encode())\n    await writer.drain()\n\n    await reader.readuntil(b\"DONE\")\n    writer.close()\n    await writer.wait_closed()\n\n\n\"\"\"\n    This test makes sure that we can migrate while handling pipelined commands and don't keep replies\n    batched even if the stream suddenly stops.\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": \"4\", \"pipeline_squash\": 0})\nasync def test_pipeline_batching_while_migrating(\n    async_client: aioredis.Redis, df_server: DflyInstance\n):\n    sha = await async_client.script_load(\"return redis.call('GET', KEYS[1])\")\n\n    reader, writer = await asyncio.open_connection(\"localhost\", df_server.port)\n\n    # First, write a EVALSHA that will ask for migration (75% it's on the wrong shard)\n    # and some more pipelined commands that will keep Dragonfly busy\n    incrs = \"\".join(\"INCR a\\r\\n\" for _ in range(50))\n    writer.write((f\"EVALSHA {sha} 1 a\\r\\n\" + incrs).encode())\n    await writer.drain()\n    # We migrate only when the socket wakes up, so send another batch to trigger migration\n    writer.write(\"INCR a\\r\\n\".encode())\n    await writer.drain()\n\n    # The data doesn't necessarily arrive in a single batch\n    async def read():\n        reply = \"\"\n        while not reply.strip().endswith(\"51\"):\n            reply = (await reader.read(520)).decode()\n\n    # Make sure we recived all replies\n    await asyncio.wait_for(read(), timeout=2.0)\n\n    writer.close()\n    await writer.wait_closed()\n\n\n@dfly_args({\"proactor_threads\": 1})\nasync def test_large_cmd(async_client: aioredis.Redis):\n    MAX_ARR_SIZE = 65535\n    res = await async_client.hset(\n        \"foo\", mapping={f\"key{i}\": f\"val{i}\" for i in range(MAX_ARR_SIZE // 2)}\n    )\n    assert res == MAX_ARR_SIZE // 2\n\n    res = await async_client.mset({f\"key{i}\": f\"val{i}\" for i in range(MAX_ARR_SIZE // 2)})\n    assert res\n\n    res = await async_client.mget([f\"key{i}\" for i in range(MAX_ARR_SIZE)])\n    assert len(res) == MAX_ARR_SIZE\n\n\n@dfly_args({\"proactor_threads\": 1})\nasync def test_parser_memory_stats(df_server, async_client: aioredis.Redis):\n    reader, writer = await asyncio.open_connection(\"127.0.0.1\", df_server.port, limit=10)\n    writer.write(b\"*1000\\r\\n\")\n    writer.write(b\"$4\\r\\nmget\\r\\n\")\n    val = (b\"a\" * 100) + b\"\\r\\n\"\n    for i in range(0, 900):\n        writer.write(b\"$100\\r\\n\" + val)\n    await writer.drain()  # writer is pending because the request is not finished.\n\n    @assert_eventually\n    async def check_stats():\n        stats = await async_client.execute_command(\"memory stats\")\n        assert stats[\"connections.direct_bytes\"] > 130000\n\n    await check_stats()\n\n\nasync def test_reject_non_tls_connections_on_tls(with_tls_server_args, df_factory):\n    server: DflyInstance = df_factory.create(\n        no_tls_on_admin_port=\"true\",\n        admin_port=1111,\n        port=1211,\n        requirepass=\"XXX\",\n        **with_tls_server_args,\n    )\n    server.start()\n\n    client = server.client(password=\"XXX\")\n    with pytest.raises(ResponseError):\n        await client.dbsize()\n    await client.aclose()\n\n    client = server.admin_client(password=\"XXX\")\n    assert await client.dbsize() == 0\n\n\nasync def test_tls_insecure(with_ca_tls_server_args, with_tls_client_args, df_factory):\n    server = df_factory.create(port=BASE_PORT, **with_ca_tls_server_args)\n    server.start()\n\n    client = aioredis.Redis(port=server.port, **with_tls_client_args, ssl_cert_reqs=None)\n    assert await client.dbsize() == 0\n\n\nasync def test_tls_full_auth(with_ca_tls_server_args, with_ca_tls_client_args, df_factory):\n    server = df_factory.create(port=BASE_PORT, **with_ca_tls_server_args)\n    server.start()\n\n    client = aioredis.Redis(port=server.port, **with_ca_tls_client_args)\n    assert await client.dbsize() == 0\n\n\nasync def test_tls_reject(\n    with_ca_tls_server_args, with_tls_client_args, df_factory: DflyInstanceFactory\n):\n    server: DflyInstance = df_factory.create(port=BASE_PORT, **with_ca_tls_server_args)\n    server.start()\n\n    client = server.client(**with_tls_client_args, ssl_cert_reqs=None)\n    await client.ping()\n    await client.aclose()\n\n    client = server.client(**with_tls_client_args)\n    with pytest.raises(ConnectionError):\n        await client.ping()\n\n\n@dfly_args({\"proactor_threads\": \"4\", \"pipeline_squash\": 1})\nasync def test_squashed_pipeline_eval(async_client: aioredis.Redis):\n    p = async_client.pipeline(transaction=False)\n    for _ in range(5):\n        # Deliberately lowcase EVAL to test that it is not squashed\n        p.execute_command(\"eval\", \"return redis.call('set', KEYS[1], 'value')\", 1, \"key\")\n    res = await p.execute()\n    assert res == [\"OK\"] * 5\n\n\n@dfly_args({\"proactor_threads\": \"4\", \"pipeline_squash\": 10})\nasync def test_squashed_pipeline(async_client: aioredis.Redis):\n    p = async_client.pipeline(transaction=False)\n\n    for j in range(50):\n        for i in range(10):\n            p.incr(f\"k{i}\")\n        p.execute_command(\"NOTFOUND\")\n\n    res = await p.execute(raise_on_error=False)\n\n    for j in range(50):\n        assert res[0:10] == [j + 1] * 10\n        assert isinstance(res[10], aioredis.ResponseError)\n        res = res[11:]\n\n\n@dfly_args({\"proactor_threads\": \"4\", \"pipeline_squash\": 10})\nasync def test_squashed_pipeline_seeder(df_server, df_seeder_factory):\n    seeder = df_seeder_factory.create(port=df_server.port, keys=10_000)\n    await seeder.run(target_deviation=0.1)\n\n\n\"\"\"\nThis test makes sure that multi transactions can be integrated into pipeline squashing\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": \"4\", \"pipeline_squash\": 1})\nasync def test_squashed_pipeline_multi(async_client: aioredis.Redis):\n    p = async_client.pipeline(transaction=False)\n    for _ in range(5):\n        # Series of squashable commands\n        for _ in range(5):\n            p.set(\"first\", \"true\")\n        # Non-squashable\n        p.info()\n        # Eval without at tx\n        p.execute_command(\"MULTI\")\n        p.set(\"second\", \"true\")\n        p.execute_command(\"EXEC\")\n        # Finishing sequence\n        for _ in range(5):\n            p.set(\"third\", \"true\")\n    await p.execute()\n\n\nasync def test_unix_domain_socket(df_factory, tmp_dir):\n    server = df_factory.create(proactor_threads=1, port=BASE_PORT, unixsocket=\"./df.sock\")\n    server.start()\n\n    await asyncio.sleep(0.5)\n\n    r = aioredis.Redis(unix_socket_path=tmp_dir / \"df.sock\")\n    assert await r.ping()\n\n\nasync def test_unix_socket_only(df_factory, tmp_dir):\n    server = df_factory.create(proactor_threads=1, port=0, unixsocket=\"./df.sock\")\n    # we call _start because we start() wait for the port to become available and\n    # we run here a process without a port.\n    server._start()\n\n    await asyncio.sleep(1)\n\n    r = aioredis.Redis(unix_socket_path=tmp_dir / \"df.sock\")\n    assert await r.ping()\n\n\n\"\"\"\nTest nested pauses. Executing CLIENT PAUSE should be possible even if another write-pause is active.\nIt should prolong the pause for all current commands.\n\"\"\"\n\n\n@pytest.mark.large\nasync def test_nested_client_pause(async_client: aioredis.Redis):\n    async def do_pause():\n        await async_client.execute_command(\"CLIENT\", \"PAUSE\", \"1000\", \"WRITE\")\n\n    async def do_write():\n        await async_client.execute_command(\"LPUSH\", \"l\", \"1\")\n\n    p1 = asyncio.create_task(do_pause())\n    await asyncio.sleep(0.1)\n\n    p2 = asyncio.create_task(do_write())\n    assert not p2.done()\n\n    await asyncio.sleep(0.5)\n    p3 = asyncio.create_task(do_pause())\n\n    await p1\n    await asyncio.sleep(0.1)\n    assert not p2.done()  # blocked by p3 now\n\n    await p2\n    await asyncio.sleep(0.0)\n    assert p3.done()\n    await p3\n\n\n@dfly_args({\"proactor_threads\": \"4\"})\nasync def test_blocking_command_client_pause(async_client: aioredis.Redis):\n    \"\"\"\n    1. Check client pause success when blocking transaction is running\n    2. lpush is paused after running client puase\n    3. once puased is finished lpush will run and blpop will pop the pushed value\n    \"\"\"\n\n    async def blpop_command():\n        res = await async_client.execute_command(\"blpop dest7 10\")\n        assert res == [\"dest7\", \"value\"]\n\n    async def brpoplpush_command():\n        res = await async_client.execute_command(\"brpoplpush src dest7 2\")\n        assert res == \"value\"\n\n    async def lpush_command():\n        await async_client.execute_command(\"lpush src value\")\n\n    blpop = asyncio.create_task(blpop_command())\n    brpoplpush = asyncio.create_task(brpoplpush_command())\n    await asyncio.sleep(0.1)\n\n    res = await async_client.execute_command(\"client pause 1000\")\n    assert res == \"OK\"\n\n    lpush = asyncio.create_task(lpush_command())\n    assert not lpush.done()\n\n    await lpush\n    await brpoplpush\n    await blpop\n\n\nasync def test_multiple_blocking_commands_client_pause(async_client: aioredis.Redis):\n    \"\"\"\n    Check running client pause command simultaneously with running multiple blocking command\n    from multiple connections\n    \"\"\"\n\n    async def just_blpop():\n        key = \"\".join(random.choices(string.ascii_letters, k=3))\n        await async_client.execute_command(f\"blpop {key} 2\")\n\n    async def client_pause():\n        res = await async_client.execute_command(\"client pause 1000\")\n        assert res == \"OK\"\n\n    tasks = [just_blpop() for _ in range(20)]\n    tasks.append(client_pause())\n\n    all = asyncio.gather(*tasks)\n\n    assert not all.done()\n    await all\n\n\nasync def test_tls_when_read_write_is_interleaved(\n    with_ca_tls_server_args, with_ca_tls_client_args, df_factory\n):\n    \"\"\"\n    This test covers a deadlock bug in helio and TlsSocket when a client connection renegotiated a\n    handshake without reading its pending data from the socket.\n    This is a weak test case and from our local experiments it deadlocked 30% of the test runs\n    \"\"\"\n    server: DflyInstance = df_factory.create(\n        port=1211, **with_ca_tls_server_args, proactor_threads=1\n    )\n\n    server.start()\n\n    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n\n    ssl_key = with_ca_tls_client_args[\"ssl_keyfile\"]\n    ssl_cert = with_ca_tls_client_args[\"ssl_certfile\"]\n    ssl_ca_cert = with_ca_tls_client_args[\"ssl_ca_certs\"]\n\n    context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)\n    context.load_verify_locations(ssl_ca_cert)\n    context.load_cert_chain(certfile=ssl_cert, keyfile=ssl_key)\n    context.verify_mode = ssl.CERT_REQUIRED\n    context.maximum_version = ssl.TLSVersion.TLSv1_2\n\n    ssl_sock = context.wrap_socket(s, server_hostname=\"localhost\")\n    ssl_sock.connect((\"127.0.0.1\", server.port))\n    ssl_sock.settimeout(0.1)\n\n    tmp = \"f\" * 1000\n    message = f\"SET foo {tmp}\\r\\n\".encode()\n    ssl_sock.send(message)\n\n    try:\n        for i in range(0, 100_000):\n            res = random.randint(1, 4)\n            message = b\"\"\n            for j in range(0, res):\n                message = message + b\"GET foo\\r\\n\"\n            ssl_sock.send(message)\n            ssl_sock.do_handshake()\n    except:\n        # We might have filled the socket buffer, causing further sending will fail\n        pass\n\n    # This deadlocks\n    client = aioredis.Redis(port=server.port, **with_ca_tls_client_args)\n    await client.execute_command(\"GET foo\")\n\n\nasync def test_lib_name_ver(async_client: aioredis.Redis):\n    await async_client.execute_command(\"client setinfo lib-name dragonfly\")\n    await async_client.execute_command(\"client setinfo lib-ver 1.2.3.4\")\n\n    list = await async_client.execute_command(\"client list\")\n    assert len(list) == 1\n    assert list[0][\"lib-name\"] == \"dragonfly\"\n    assert list[0][\"lib-ver\"] == \"1.2.3.4\"\n\n\nasync def test_client_info(async_client: aioredis.Redis):\n    \"\"\"Test CLIENT INFO returns info about the current connection only.\"\"\"\n    await async_client.client_setname(\"test_client_info\")\n\n    info = await async_client.execute_command(\"CLIENT INFO\")\n    assert isinstance(info, dict)\n    assert info[\"name\"] == \"test_client_info\"\n\n    # Verify CLIENT INFO returns same format as CLIENT LIST but for single connection\n    client_list = await async_client.client_list()\n    assert len(client_list) == 1\n    # CLIENT INFO should contain the same client id as CLIENT LIST\n    assert str(info[\"id\"]) == str(client_list[0][\"id\"])\n\n\nasync def test_hiredis(df_factory):\n    server = df_factory.create(proactor_threads=1)\n    server.start()\n    client = base_redis.Redis(port=server.port, protocol=3, cache_config=CacheConfig())\n    client.ping()\n\n\n@assert_eventually(times=500)\nasync def wait_for_conn_drop(async_client):\n    clients = await async_client.client_list()\n    logging.info(\"wait_for_conn_drop clients: %s\", clients)\n    assert len(clients) <= 1\n\n\n@dfly_args({\"timeout\": 1})\nasync def test_timeout(df_server: DflyInstance, async_client: aioredis.Redis):\n    # TODO investigate why it fails -- client is not stuck.\n    if df_server.has_arg(\"experimental_io_loop_v2\"):\n        pytest.skip(f\"Fails in the assertion below\")\n\n    another_client = df_server.client()\n    await another_client.ping()\n    clients = await async_client.client_list()\n    assert len(clients) == 2\n\n    await asyncio.sleep(2)\n\n    await wait_for_conn_drop(async_client)\n    info = await async_client.info(\"clients\")\n    assert int(info[\"timeout_disconnects\"]) >= 1\n\n\n@dfly_args({\"send_timeout\": 3})\nasync def test_send_timeout(df_server, async_client: aioredis.Redis):\n    reader, writer = await asyncio.open_connection(\"127.0.0.1\", df_server.port)\n    writer.write(f\"client setname writer_test\\n\".encode())\n    await writer.drain()\n    assert \"OK\" in (await reader.readline()).decode()\n    clients = await async_client.client_list()\n    assert len(clients) == 2\n    size = 1024 * 1024\n    writer.write(f\"SET a {'v'*size}\\n\".encode())\n    await writer.drain()\n\n    async def get_task():\n        while True:\n            writer.write(f\"GET a\\n\".encode())\n            await writer.drain()\n            await asyncio.sleep(0.1)\n\n    get = asyncio.create_task(get_task())\n\n    @assert_eventually(times=600)\n    async def wait_for_stuck_on_send():\n        clients = await async_client.client_list()\n        logging.info(\"wait_for_stuck_on_send clients: %s\", clients)\n        phase = next(\n            (client[\"phase\"] for client in clients if client[\"name\"] == \"writer_test\"), None\n        )\n        assert phase == \"send\"\n\n    await wait_for_stuck_on_send()\n    await wait_for_conn_drop(async_client)\n    info = await async_client.info(\"clients\")\n    assert int(info[\"timeout_disconnects\"]) >= 1\n    logging.info(\"finished disconnect\")\n    get.cancel()\n\n\n# Test that the cache pipeline does not grow or shrink under constant pipeline load.\n@dfly_args({\"proactor_threads\": 1, \"pipeline_squash\": 9, \"max_busy_read_usec\": 50000})\nasync def test_pipeline_cache_only_async_squashed_dispatches(df_factory):\n    server = df_factory.create()\n    server.start()\n\n    client = server.client()\n    await client.ping()  # Make sure the connection and the protocol were established\n\n    async def push_pipeline(size):\n        p = client.pipeline(transaction=True)\n        for i in range(size):\n            p.info()\n        res = await p.execute()\n        return res\n\n    # Dispatch only async command/pipelines and force squashing. pipeline_cache_bytes,\n    # should be zero because:\n    # We always dispatch the items that will be squashed, so when `INFO` gets called\n    # the cache is empty because the pipeline consumed it throughout its execution\n    # high max_busy_read_usec ensures that the connection fiber has enough time to push\n    # all the commands to reach the squashing limit.\n    for i in range(0, 10):\n        # it's actually 11 commands. 8 INFO + 2 from the MULTI/EXEC block that is injected\n        # by the client. The minimum to squash is 9 so it will squash the pipeline\n        # and INFO ALL should return zero for all the squashed commands in the pipeline\n        res = await push_pipeline(8)\n        for r in res:\n            assert r[\"pipeline_cache_bytes\"] == 0\n\n    # Non zero because we reclaimed/recycled the messages back to the cache\n    info = await client.info()\n    assert info[\"pipeline_cache_bytes\"] > 0\n\n\n# Test that the pipeline cache size shrinks on workloads that storm the datastore with\n# pipeline commands and then \"back off\" by gradually reducing the pipeline load such that\n# the cache becomes progressively underutilized. At that stage, the pipeline should slowly\n# shrink (because it's underutilized).\n@pytest.mark.skip(\"Flaky\")\n@dfly_args({\"proactor_threads\": 1})\nasync def test_pipeline_cache_size(df_server: DflyInstance):\n    # Start 1 client.\n    good_client = df_server.client()\n    bad_actor_client = df_server.client()\n\n    async def push_pipeline(bad_actor_client, size=1):\n        # Fill cache.\n        p = bad_actor_client.pipeline(transaction=True)\n        for i in range(size):\n            p.lpush(str(i), \"V\")\n        await p.execute()\n\n    # Establish a baseline for the cache size. We dispatch async here.\n    await push_pipeline(bad_actor_client, 32)\n    info = await good_client.info()\n\n    old_pipeline_cache_bytes = info[\"pipeline_cache_bytes\"]\n    assert old_pipeline_cache_bytes > 0\n    assert info[\"dispatch_queue_bytes\"] == 0\n\n    for i in range(30):\n        await push_pipeline(bad_actor_client)\n        await good_client.execute_command(f\"set foo{i} bar\")\n\n    info = await good_client.info()\n\n    # Gradually release pipeline.\n    assert old_pipeline_cache_bytes > info[\"pipeline_cache_bytes\"]\n    assert info[\"dispatch_queue_bytes\"] == 0\n\n    # Now drain the full cache.\n    async with async_timeout.timeout(5):\n        while info[\"pipeline_cache_bytes\"] != 0:\n            await good_client.execute_command(f\"set foo{i} bar\")\n            info = await good_client.info()\n\n    assert info[\"dispatch_queue_bytes\"] == 0\n\n\n@dfly_args({\"proactor_threads\": 4, \"pipeline_queue_limit\": 10})\nasync def test_pipeline_overlimit(df_server: DflyInstance):\n    client = df_server.client()\n\n    await client.set(\"x\", \"a\" * 1024 * 5)\n\n    async def pipe_overlimit():\n        c = df_server.client()\n        pipe = c.pipeline()\n        for i in range(1000):\n            pipe.get(\"x\")\n        logging.debug(\"Executing...\")\n        res = await pipe.execute()\n        logging.debug(f\"Executed.\")\n\n    pipeline_tasks = [asyncio.create_task(pipe_overlimit()) for _ in range(20)]\n\n    await asyncio.sleep(2)\n    await client.config_set(\"pipeline_queue_limit\", 10000)\n    for task in pipeline_tasks:\n        await task\n\n\nasync def test_client_unpause(df_server: DflyInstance):\n    async_client = df_server.client()\n    await async_client.client_pause(3000, all=False)\n\n    async def set_foo():\n        client = df_server.client()\n        async with async_timeout.timeout(2):\n            await client.execute_command(\"SET\", \"foo\", \"bar\")\n\n    p1 = asyncio.create_task(set_foo())\n\n    await asyncio.sleep(0.5)\n    assert not p1.done()\n\n    async with async_timeout.timeout(0.5):\n        await async_client.client_unpause()\n\n    async with async_timeout.timeout(0.5):\n        await p1\n        assert p1.done()\n\n    await async_client.client_pause(1, all=False)\n    await asyncio.sleep(2)\n\n\nasync def test_client_pause_b2b(async_client):\n    async with async_timeout.timeout(1):\n        await async_client.client_pause(2000, all=False)\n        await async_client.client_pause(2000, all=False)\n\n\nasync def test_client_unpause_after_pause_all(async_client):\n    await async_client.client_pause(2000, all=True)\n    # Blocks and waits\n    res = await async_client.client_unpause()\n    assert res == \"OK\"\n    await async_client.client_pause(2000, all=False)\n    res = await async_client.client_unpause()\n\n\nasync def test_client_detached_crash(df_factory):\n    server = df_factory.create(proactor_threads=1)\n    server.start()\n    async_client = server.client()\n    await async_client.client_pause(2, all=False)\n    server.stop()\n\n\nasync def test_tls_client_kill_preemption(\n    with_ca_tls_server_args, with_ca_tls_client_args, df_factory\n):\n    server = df_factory.create(proactor_threads=4, port=BASE_PORT, **with_ca_tls_server_args)\n    server.start()\n\n    client = server.client(\n        single_connection_client=True, retry=Retry(NoBackoff(), 0), **with_ca_tls_client_args\n    )\n    assert await client.dbsize() == 0\n\n    # Get the list of clients\n    clients_info = await client.client_list()\n    assert len(clients_info) == 1\n\n    kill_id = clients_info[0][\"id\"]\n\n    async def seed():\n        try:\n            while True:\n                p = client.pipeline(transaction=True)\n                for i in range(100):\n                    p.lpush(str(i), \"V\")\n                await p.execute()\n        except (aioredis.ConnectionError, asyncio.CancelledError):\n            pass\n\n    task = asyncio.create_task(seed())\n\n    await asyncio.sleep(0.1)\n\n    cl = aioredis.Redis(port=server.port, **with_ca_tls_client_args)\n    await cl.execute_command(f\"CLIENT KILL ID {kill_id}\")\n\n    # Ensure that the killed client actually disconnects before we cancel the worker task.\n    for _ in range(100):\n        try:\n            await client.ping()\n        except aioredis.ConnectionError:\n            break\n        await asyncio.sleep(0.05)\n    else:\n        pytest.fail(\"Killed client did not disconnect\")\n\n    # Give the server time to process the kill and write logs\n    await asyncio.sleep(0.5)\n    task.cancel()\n    await task\n\n    server.stop()\n    lines = server.find_in_logs(\"Preempting inside of atomic section, fiber\")\n    assert len(lines) == 0\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_client_migrate(df_server: DflyInstance):\n    \"\"\"\n    Test that we can migrate a client with \"CLIENT MIGRATE\" command.\n    \"\"\"\n    client1 = df_server.client()\n    await client1.client_setname(\"test_migrate\")\n    resp = await client1.execute_command(\"DFLY THREAD\")\n    client_id = await client1.client_id()\n    assert resp[1] == 4\n    current_tid = resp[0]\n    client2 = df_server.client()\n    resp = await client2.execute_command(\"CLIENT\", \"MIGRATE\", client_id, current_tid)\n    assert resp == 0  # not migrated as it's the same thread\n    dest_tid = (current_tid + 1) % 4\n    resp = await client2.execute_command(\"CLIENT\", \"MIGRATE\", client_id + 999, dest_tid)\n    assert resp == 0  # Not migrated as the client does not exist\n    resp = await client2.execute_command(\"CLIENT\", \"MIGRATE\", client_id, dest_tid)\n    assert resp == 1  # migrated successfully\n\n\nasync def test_client_migrate_no_conn_leak(df_server: DflyInstance):\n    admin = df_server.client()\n    resp = await admin.execute_command(\"DFLY THREAD\")\n    num_threads = resp[1]\n\n    # Create multiple clients and migrate them all to the same thread.\n    # If DecreaseConnStats is called twice per migration (double-decrement bug),\n    # the source threads' uint32 counters are invalid.\n    num_clients = 20\n    clients = []\n    client_ids = []\n    dest_tid = 0\n    for _ in range(num_clients):\n        c = df_server.client()\n        clients.append(c)\n        client_ids.append(await c.client_id())\n\n    info = await admin.info(\"clients\")\n    baseline = info[\"connected_clients\"]\n\n    for c, cid in zip(clients, client_ids):\n        r = await c.execute_command(\"DFLY THREAD\")\n        if r[0] != dest_tid:\n            await admin.execute_command(\"CLIENT\", \"MIGRATE\", cid, dest_tid)\n\n    # Wait for all migrations to complete by polling each client's thread\n    for c in clients:\n        async for r, breaker in tick_timer(lambda c=c: c.execute_command(\"DFLY THREAD\")):\n            with breaker:\n                assert r[0] == dest_tid\n\n    # After all migrations complete, connected_clients must stay the same\n    info = await admin.info(\"clients\")\n    assert (\n        info[\"connected_clients\"] == baseline\n    ), f\"connected_clients changed from {baseline} to {info['connected_clients']} after migrations\"\n\n    for c in clients:\n        await c.aclose()\n    await admin.aclose()\n\n\nasync def test_issue_5931_malformed_protocol_crash(df_server: DflyInstance):\n    \"\"\"\n    Regression test for #5931\n\n    The crash.txt file contains malformed RESP protocol that caused the server to crash\n    with: \"Check failed: RespExpr::STRING == arg.type\" in FromArgs()\n\n    This test sends the exact bytes from crash.txt to verify the server handles it\n    gracefully without crashing.\n    \"\"\"\n    # Open raw TCP connection to send malformed protocol\n    reader, writer = await asyncio.open_connection(\"127.0.0.1\", df_server.port)\n\n    try:\n        # Send the exact bytes from crash.txt:\n        # *0\\r\\n$5\\r\\nMULTI\\r\\n*3\\r\\n$3\\r\\nSET\\r\\n$1\\r\\na\\r\\n$1\\r\\n1\\r<0xf4>)1\\r\\n$4\\r\\nEXEC\\r\\n\n        crash_data = b\"*0\\r\\n$5\\r\\nMULTI\\r\\n*3\\r\\n$3\\r\\nSET\\r\\n$1\\r\\na\\r\\n$1\\r\\n1\\r\"\n        crash_data += bytes([0xF4])  # Binary byte instead of \\n\n        crash_data += b\")1\\r\\n$4\\r\\nEXEC\\r\\n\"\n\n        writer.write(crash_data)\n        await writer.drain()\n\n        try:\n            response = await asyncio.wait_for(reader.read(1024), timeout=2.0)\n            # If we get a response, it should be an error, not a crash\n            # The server is still running if we got here\n        except asyncio.TimeoutError:\n            # Timeout is acceptable - connection might be closed\n            pass\n        except ConnectionError:\n            # Connection closed is acceptable - server detected bad protocol\n            pass\n\n    finally:\n        writer.close()\n        await writer.wait_closed()\n\n    # Verify server is still running by making a normal request\n    client = df_server.client()\n    await client.ping()\n    assert await client.ping() == True\n\n\nasync def test_issue_5949_nil_bulk_string_crash(df_server: DflyInstance):\n    \"\"\"\n    Regression test for #5949\n\n    The crash1.txt and crash2.txt files contain malformed RESP protocol with NIL bulk\n    strings ($-1) as command arguments, which caused the server to crash with:\n    \"Check failed: RespExpr::STRING == arg.type\" in FromArgs()\n\n    According to RESP protocol spec, NIL bulk strings are valid for server responses\n    but NOT for command arguments sent by clients. Commands must be arrays of bulk strings.\n    \"\"\"\n    # Open raw TCP connection to send malformed protocol\n    reader, writer = await asyncio.open_connection(\"127.0.0.1\", df_server.port)\n\n    try:\n        # Test crash1.txt: MULTI followed by SET with NIL bulk string argument\n        # *1\\r\\n$5\\r\\nMULTI\\r\\n*3\\r\\n$3\\r\\nSET\\r\\n$1\\r\\na\\r\\n$-1\\r\\n1\\r\\n*1\\r\\n$4\\r\\nEXEC\\r\\n\n        crash_data = (\n            b\"*1\\r\\n$5\\r\\nMULTI\\r\\n*3\\r\\n$3\\r\\nSET\\r\\n$1\\r\\na\\r\\n$-1\\r\\n1\\r\\n*1\\r\\n$4\\r\\nEXEC\\r\\n\"\n        )\n\n        writer.write(crash_data)\n        await writer.drain()\n\n        try:\n            response = await asyncio.wait_for(reader.read(1024), timeout=2.0)\n            # If we get a response, it should be an error, not a crash\n        except asyncio.TimeoutError:\n            # Timeout is acceptable - connection might be closed\n            pass\n        except ConnectionError:\n            # Connection closed is acceptable - server detected bad protocol\n            pass\n\n    finally:\n        writer.close()\n        await writer.wait_closed()\n\n    # Verify server is still running by making a normal request\n    client = df_server.client()\n    await client.ping()\n    assert await client.ping() == True\n\n\nasync def test_issue_6165_squash_invalid_syntax(async_client):\n    pipe = async_client.pipeline(transaction=False)\n    pipe.set(\"k\", \"v\")\n    pipe.execute_command(\"RENAME bar\")\n    res = await pipe.execute(raise_on_error=False)\n\n    assert res[0] == True  # SET key1\n    assert isinstance(res[1], aioredis.ResponseError)  # INVALID SYNTAX COMMAND\n\n    pip = async_client.pipeline(transaction=False)\n    pip.set(\"k\", \"v\")\n    pip.execute_command(\"ZUNION 2 set1\")\n    res = await pip.execute(raise_on_error=False)\n    assert res[0] == True  # SET key1\n    assert isinstance(res[1], aioredis.ResponseError)  # INVALID SYNTAX\n\n\n@dfly_args({\"proactor_threads\": \"2\", \"pipeline_squash\": 1})\nasync def test_quit_in_pipeline(df_server: DflyInstance):\n    \"\"\"\n    Regression test: when QUIT is pipelined together with other commands\n    (e.g. DEL DEL ... DEL QUIT), the server must flush all preceding replies\n    before closing the connection.\n\n    Reproduces the BullMQ removeAllQueueData() pattern.\n    \"\"\"\n    NUM_KEYS = 9\n    client = df_server.client()\n\n    # Setup: create NUM_KEYS keys\n    for i in range(NUM_KEYS):\n        await client.set(f\"{{b}}:pqt:k{i}\", \"v\")\n\n    # Send DEL for all keys + QUIT in one pipeline\n    pipe = client.pipeline(transaction=False)\n    for i in range(NUM_KEYS):\n        pipe.delete(f\"{{b}}:pqt:k{i}\")\n    pipe.execute_command(\"QUIT\")\n    res = await pipe.execute()\n\n    assert res[:NUM_KEYS] == [1] * NUM_KEYS, f\"Expected {NUM_KEYS} DEL replies, got: {res}\"\n    assert res[NUM_KEYS] in (b\"OK\", True), f\"Expected QUIT OK reply, got: {res[NUM_KEYS]}\"\n\n\nasync def test_tls_partial_header_read(\n    with_ca_tls_server_args, with_ca_tls_client_args, df_factory\n):\n    server = df_factory.create(port=BASE_PORT, **with_ca_tls_server_args)\n    server.start()\n\n    # Connect with raw socket and send only 1 byte (less than the 2-byte TLS header check)\n    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:\n        sock.connect((\"localhost\", server.port))\n        # Send 1 byte (less than the 2-byte TLS header that dragonfly expects)\n        sock.send(b\"\\x16\")\n\n    # If the server crashes due to UB, it will fail. Otherwise this test passes.\n    # The server should handle this gracefully without crashing.\n    await asyncio.sleep(0.5)  # Give server time to handle the connection\n\n    # Verify server is still alive by making a valid connection\n    client = aioredis.Redis(port=server.port, **with_ca_tls_client_args)\n    assert await client.ping()\n\n\nasync def test_blocking_command_pipeline_flush(df_server: DflyInstance):\n    blpop_timeout = 5\n    num_blpops = 3\n    push_after = 1.0\n    max_allowed_delay = 2.0\n    src_key = \"__blpop_pipeline_flush_test__\"\n\n    pusher = aioredis.Redis(port=df_server.port)\n    await pusher.delete(src_key)\n\n    def encode_resp_command(*args):\n        encoded_args = [str(a).encode() for a in args]\n        header = f\"*{len(encoded_args)}\\r\\n\".encode()\n        body = b\"\".join(f\"${len(a)}\\r\\n\".encode() + a + b\"\\r\\n\" for a in encoded_args)\n        return header + body\n\n    pipeline_data = encode_resp_command(\"SET\", src_key + \":dummy\", \"val\")\n    pipeline_data += encode_resp_command(\"PING\")\n    pipeline_data += b\"\".join(\n        encode_resp_command(\"BLPOP\", src_key, blpop_timeout) for _ in range(num_blpops)\n    )\n\n    conn_reader, writer = await asyncio.open_connection(\"localhost\", df_server.port)\n    writer.write(pipeline_data)\n    await writer.drain()\n\n    async def expect_reply(expected: str, timeout=max_allowed_delay):\n        reply = await asyncio.wait_for(conn_reader.readline(), timeout)\n        assert reply == f\"{expected}\\r\\n\".encode(), f\"expected {expected}, got {reply!r}\"\n\n    t0 = time.monotonic()\n    await expect_reply(\"+OK\")\n    await expect_reply(\"+PONG\")\n    total_nonblocking_time = time.monotonic() - t0\n\n    assert (\n        total_nonblocking_time < max_allowed_delay\n    ), f\"Non-blocking replies took {total_nonblocking_time:.2f}s, expected < {max_allowed_delay}s.\"\n\n    async def delayed_push():\n        await asyncio.sleep(push_after)\n        await pusher.lpush(src_key, \"hello\")\n\n    push_task = asyncio.create_task(delayed_push())\n\n    t0 = time.monotonic()\n    total_timeout = blpop_timeout * num_blpops + 5\n    try:\n        await expect_reply(\"*2\", total_timeout)\n        first_blpop_time = time.monotonic() - t0\n        assert (\n            first_blpop_time < max_allowed_delay\n        ), f\"First blocking response took {first_blpop_time:.2f}s, expected < {max_allowed_delay}s\"\n    finally:\n        writer.close()\n        await writer.wait_closed()\n\n        await push_task\n        await pusher.delete(src_key, src_key + \":dummy\")\n        await pusher.aclose()\n\n\n@dfly_args({\"proactor_threads\": 2, \"async_dispatch_quota\": 50})\nasync def test_pubsub_pipeline_starvation(df_server: DflyInstance):\n    reader, writer = await asyncio.open_connection(\"127.0.0.1\", df_server.port)\n    # Send subscribe and consume the standard 6-line RESP array reply\n    # to completely clean the socket buffer before the flood begins.\n    writer.write(b\"SUBSCRIBE starvation_chan\\r\\n\")\n    await writer.drain()\n    for _ in range(6):\n        await reader.readline()\n\n    # Continuous Flood Task with batches of 500 commands (publisher)\n    keep_flooding = True\n\n    async def flood():\n        pub = aioredis.Redis(port=df_server.port)\n        while keep_flooding:\n            pipe = pub.pipeline(transaction=False)\n            for _ in range(500):\n                pipe.publish(\"starvation_chan\", \"hello\")\n            await pipe.execute()\n            # short sleep to yield the event loop but maintain constant pressure\n            await asyncio.sleep(0.001)\n        await pub.aclose()\n\n    flood_task = asyncio.create_task(flood())\n\n    try:\n        # Wait just 10ms for the first wave to hit the server's queue\n        await asyncio.sleep(0.01)\n\n        # Inject UNSUBSCRIBE + PING into the active flood.\n        # This triggers our quota logic, forcing the server to yield and read the commands from the TCP buffer, preventing input starvation.\n        writer.write(b\"UNSUBSCRIBE starvation_chan\\r\\nPING starvation_survived\\r\\n\")\n        await writer.drain()\n\n        # Count the PubSub messages that arrive before the PING\n        pubsub_messages_before_ping = 0\n        ping_found = False\n        async with async_timeout.timeout(2.0):\n            while True:\n                line = await reader.readline()\n                if not line:\n                    break\n\n                if b\"starvation_survived\" in line:\n                    ping_found = True\n                    break\n\n                if b\"message\" in line:\n                    pubsub_messages_before_ping += 1\n\n        # Assert 1: The PING must arrive before the flood is fully drained.\n        assert ping_found, \"PING was starved and timed out!\"\n\n        # Assert 2: the quota logic prioritized the pipeline.\n        # If it was truly starving, this would timeout or hit tens of thousands.\n        assert (\n            pubsub_messages_before_ping <= 1000\n        ), f\"Starvation detected! Pipeline queued behind {pubsub_messages_before_ping} messages.\"\n    finally:\n        keep_flooding = False\n        await flood_task\n        writer.close()\n        await writer.wait_closed()\n"
  },
  {
    "path": "tests/dragonfly/eval_test.py",
    "content": "import asyncio\nimport async_timeout\nfrom redis import asyncio as aioredis\nimport time\nimport json\nimport logging\nimport pytest\nimport random\nimport itertools\nimport random\nimport string\n\nfrom .instance import DflyInstance\n\nfrom . import dfly_args, dfly_multi_test_args\n\nDJANGO_CACHEOPS_SCRIPT = \"\"\"\nlocal prefix = KEYS[1]\nlocal key = KEYS[2]\nlocal precall_key = KEYS[3]\nlocal data = ARGV[1]\nlocal dnfs = cjson.decode(ARGV[2])\nlocal timeout = tonumber(ARGV[3])\n\nif precall_key ~= prefix and redis.call('exists', precall_key) == 0 then\n  -- Cached data was invalidated during the function call. The data is\n  -- stale and should not be cached.\n  return\nend\n\n-- Write data to cache\nredis.call('setex', key, timeout, data)\n\n\n-- A pair of funcs\n-- NOTE: we depend here on keys order being stable\nlocal conj_schema = function (conj)\n    local parts = {}\n    for field, _ in pairs(conj) do\n        table.insert(parts, field)\n    end\n\n    return table.concat(parts, ',')\nend\n\nlocal conj_cache_key = function (db_table, conj)\n    local parts = {}\n    for field, val in pairs(conj) do\n        table.insert(parts, field .. '=' .. tostring(val))\n    end\n\n    return prefix .. 'conj:' .. db_table .. ':' .. table.concat(parts, '&')\nend\n\n\n-- Update schemes and invalidators\nfor db_table, disj in pairs(dnfs) do\n    for _, conj in ipairs(disj) do\n        -- Ensure scheme is known\n        redis.acall('sadd', prefix .. 'schemes:' .. db_table, conj_schema(conj))\n\n        -- Add new cache_key to list of dependencies\n        local conj_key = conj_cache_key(db_table, conj)\n\n        redis.acall('sadd', conj_key, key)\n        -- NOTE: an invalidator should live longer than any key it references.\n        --       So we update its ttl on every key if needed.\n        -- NOTE: if CACHEOPS_LRU is True when invalidators should be left persistent,\n        --       so we strip next section from this script.\n        -- TOSTRIP\n        local conj_ttl = redis.call('ttl', conj_key)\n        if conj_ttl < timeout then\n            -- We set conj_key life with a margin over key life to call expire rarer\n            -- And add few extra seconds to be extra safe\n            redis.call('expire', conj_key, timeout * 2 + 10)\n        end\n        -- /TOSTRIP\n    end\nend\n\nreturn 'OK'\n\"\"\"\n\n\ndef DJANGO_CACHEOPS_SCHEMA(vs):\n    return {\n        \"table_1\": [{\"f-1\": f\"v-{vs[0]}\"}, {\"f-2\": f\"v-{vs[1]}\"}],\n        \"table_2\": [{\"f-1\": f\"v-{vs[2]}\"}, {\"f-2\": f\"v-{vs[3]}\"}],\n    }\n\n\n\"\"\"\nTest the main caching script of https://github.com/Suor/django-cacheops.\nThe script accesses undeclared keys (that are built based on argument data),\nso Dragonfly must run in global (1) or non-atomic (4) multi eval mode.\n\"\"\"\n\n\n@dfly_multi_test_args(\n    {\"default_lua_flags\": \"allow-undeclared-keys\", \"proactor_threads\": 4},\n    {\"default_lua_flags\": \"allow-undeclared-keys disable-atomicity\", \"proactor_threads\": 4},\n)\nasync def test_django_cacheops_script(async_client, num_keys=500):\n    script = async_client.register_script(DJANGO_CACHEOPS_SCRIPT)\n\n    data = [(f\"k-{k}\", [random.randint(0, 10) for _ in range(4)]) for k in range(num_keys)]\n    for k, vs in data:\n        schema = DJANGO_CACHEOPS_SCHEMA(vs)\n        assert (\n            await script(keys=[\"\", k, \"\"], args=[\"a\" * 10, json.dumps(schema, sort_keys=True), 100])\n            == \"OK\"\n        )\n\n    # Check schema was built correctly\n    base_schema = DJANGO_CACHEOPS_SCHEMA([0] * 4)\n    for table, fields in base_schema.items():\n        schema = await async_client.smembers(f\"schemes:{table}\")\n        fields = set.union(*(set(part.keys()) for part in fields))\n        assert schema == fields\n\n    # Check revese mapping is correct\n    for k, vs in data:\n        assert await async_client.exists(k)\n        for table, fields in DJANGO_CACHEOPS_SCHEMA(vs).items():\n            for sub_schema in fields:\n                conj_key = f\"conj:{table}:\" + \"&\".join(\n                    \"{}={}\".format(f, v) for f, v in sub_schema.items()\n                )\n                assert await async_client.sismember(conj_key, k)\n\n\nASYNQ_ENQUEUE_SCRIPT = \"\"\"\nif redis.call(\"EXISTS\", KEYS[1]) == 1 then\n\treturn 0\nend\nredis.call(\"HSET\", KEYS[1],\n           \"msg\", ARGV[1],\n           \"state\", \"pending\",\n           \"pending_since\", ARGV[3])\nredis.call(\"LPUSH\", KEYS[2], ARGV[2])\nreturn 1\n\"\"\"\n\nASYNQ_DEQUE_SCRIPT = \"\"\"\nif redis.call(\"EXISTS\", KEYS[2]) == 0 then\n\tlocal id = redis.call(\"RPOPLPUSH\", KEYS[1], KEYS[3])\n\tif id then\n\t\tlocal key = ARGV[2] .. id\n\t\tredis.call(\"HSET\", key, \"state\", \"active\")\n\t\tredis.call(\"HDEL\", key, \"pending_since\")\n\t\tredis.call(\"ZADD\", KEYS[4], ARGV[1], id)\n\t\treturn redis.call(\"HGET\", key, \"msg\")\n\tend\nend\nreturn nil\n\"\"\"\n\n\"\"\"\nTest the main queueing scripts of https://github.com/hibiken/asynq.\nThe deque script accesses undeclared keys (that are popped from a list),\nso Dragonfly must run in global (1) or non-atomic (4) multi eval mode.\n\nRunning the deque script in non-atomic mode can introduce inconsistency to an outside observer.\nFor example, an item can be already placed into the active queue (RPUSH KEYS[3]), buts its state in the hash\nwasn't yet updated to active. Because we only access keys that we popped from the list (RPOPLPUSH is still atomic by itself),\nthe task system should work reliably.\n\"\"\"\n\n\n@dfly_multi_test_args(\n    {\"default_lua_flags\": \"allow-undeclared-keys\", \"proactor_threads\": 4},\n    {\"default_lua_flags\": \"allow-undeclared-keys disable-atomicity\", \"proactor_threads\": 4},\n)\nasync def test_golang_asynq_script(async_pool, num_queues=10, num_tasks=100):\n    async def enqueue_worker(queue):\n        client = aioredis.Redis(connection_pool=async_pool)\n        enqueue = client.register_script(ASYNQ_ENQUEUE_SCRIPT)\n\n        task_ids = 2 * list(range(num_tasks))\n        random.shuffle(task_ids)\n        res = [\n            await enqueue(\n                keys=[f\"asynq:{{{queue}}}:t:{task_id}\", f\"asynq:{{{queue}}}:pending\"],\n                args=[f\"{task_id}\", task_id, int(time.time())],\n            )\n            for task_id in task_ids\n        ]\n\n        assert sum(res) == num_tasks\n\n    # Start filling the queues\n    jobs = [asyncio.create_task(enqueue_worker(f\"q-{queue}\")) for queue in range(num_queues)]\n\n    collected = 0\n\n    async def dequeue_worker():\n        nonlocal collected\n        client = aioredis.Redis(connection_pool=async_pool)\n        dequeue = client.register_script(ASYNQ_DEQUE_SCRIPT)\n\n        while collected < num_tasks * num_queues:\n            # pct = round(collected/(num_tasks*num_queues), 2)\n            # print(f'\\r    \\r{pct}', end='', flush=True)\n            for queue in (f\"q-{queue}\" for queue in range(num_queues)):\n                prefix = f\"asynq:{{{queue}}}:t:\"\n                msg = await dequeue(\n                    keys=[\n                        f\"asynq:{{{queue}}}:\" + t for t in [\"pending\", \"paused\", \"active\", \"lease\"]\n                    ],\n                    args=[int(time.time()), prefix],\n                )\n                if msg is not None:\n                    collected += 1\n                    assert await client.hget(prefix + msg, \"state\") == \"active\"\n\n    # Run many contending workers\n    await asyncio.gather(*(dequeue_worker() for _ in range(num_queues * 2)))\n\n    for job in jobs:\n        await job\n\n\nERROR_CALL_SCRIPT_TEMPLATE = [\n    \"redis.{}('LTRIM', 'l', 'a', 'b')\",  # error only on evaluation\n    \"redis.{}('obviously wrong')\",  # error immediately on preprocessing\n]\n\n\n@dfly_args({\"proactor_threads\": 1})\n@pytest.mark.asyncio\nasync def test_eval_error_propagation(async_client):\n    CMDS = [\"call\", \"pcall\", \"acall\", \"apcall\"]\n\n    for cmd, template in itertools.product(CMDS, ERROR_CALL_SCRIPT_TEMPLATE):\n        does_abort = \"p\" not in cmd\n        try:\n            await async_client.eval(template.format(cmd), 1, \"l\")\n            if does_abort:\n                assert False, \"Eval must have thrown an error: \" + cmd\n        except aioredis.RedisError as e:\n            if not does_abort:\n                assert False, \"Error should have been ignored: \" + cmd\n\n\n@dfly_args({\"proactor_threads\": 1, \"default_lua_flags\": \"allow-undeclared-keys\"})\nasync def test_global_eval_in_multi(async_client: aioredis.Redis):\n    GLOBAL_SCRIPT = \"\"\"\n        return redis.call('GET', 'any-key');\n    \"\"\"\n\n    await async_client.set(\"any-key\", \"works\")\n\n    pipe = async_client.pipeline(transaction=True)\n    pipe.set(\"another-key\", \"ok\")\n    pipe.eval(GLOBAL_SCRIPT, 0)\n    res = await pipe.execute()\n\n    print(res)\n    assert res[1] == \"works\"\n\n\n@dfly_args({\"proactor_threads\": 4, \"lua_auto_async\": None})\nasync def test_lua_auto_async(async_client: aioredis.Redis):\n    TEST_SCRIPT = \"\"\"\n        for i = 1, 100 do\n            redis.call('LPUSH', KEYS[(i % 4) + 1], 'W')\n        end\n    \"\"\"\n\n    await async_client.eval(TEST_SCRIPT, 4, \"a\", \"b\", \"c\", \"d\")\n\n    flushes = (await async_client.info(\"transaction\"))[\"eval_squashed_flushes\"]\n    assert 3 <= flushes <= 5  # all 100 commands are executed in a few batches\n\n\n\"\"\"\nEnsure liveness even with only a single interpreter in scenarios where EVAL and EVAL inside multi run concurrently while also contending for keys\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": 2, \"interpreter_per_thread\": 1})\nasync def test_one_interpreter(async_client: aioredis.Redis):\n    sha = await async_client.script_load(\"redis.call('GET', KEYS[1])\")\n    all_keys = [string.ascii_lowercase[i] for i in range(5)]\n    total_runs = 100\n\n    async def run(transaction):\n        for _ in range(total_runs):\n            p = async_client.pipeline(transaction=transaction)\n            pkeys = random.choices(all_keys, k=3)\n            for key in pkeys:\n                p.evalsha(sha, 1, key)\n            await p.execute()\n\n    max_blocked = 0\n\n    async def measure_blocked():\n        nonlocal max_blocked\n        while True:\n            max_blocked = max(\n                max_blocked, (await async_client.info(\"STATS\"))[\"blocked_on_interpreter\"]\n            )\n            await asyncio.sleep(0.01)\n\n    tm = [asyncio.create_task(run(True)) for _ in range(10)]\n    ts = [asyncio.create_task(run(False)) for _ in range(10)]\n    # block_measure = asyncio.create_task(measure_blocked())\n\n    async with async_timeout.timeout(5):\n        await asyncio.gather(*(tm + ts))\n\n    # block_measure.cancel()\n\n    # At least some connection was seen blocked\n    # Flaky: release build is too fast and never blocks\n    # assert max_blocked > 0\n\n\n\"\"\"\nTests migrate/close interaction for the connection\nReproduces #2569\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": \"4\", \"pipeline_squash\": 0})\nasync def test_migrate_close_connection(async_client: aioredis.Redis, df_server: DflyInstance):\n    sha = await async_client.script_load(\"return redis.call('GET', KEYS[1])\")\n\n    async def run():\n        reader, writer = await asyncio.open_connection(\"localhost\", df_server.port)\n\n        # write a EVALSHA that will ask for migration (75% it's on the wrong shard)\n        writer.write((f\"EVALSHA {sha} 1 a\\r\\n\").encode())\n        await writer.drain()\n\n        # disconnect the client connection\n        writer.close()\n        await writer.wait_closed()\n\n    tasks = [asyncio.create_task(run()) for _ in range(50)]\n    await asyncio.gather(*tasks)\n\n\n@pytest.mark.opt_only\n@dfly_args({\"proactor_threads\": 4, \"interpreter_per_thread\": 4, \"lua_mem_gc_threshold\": 60000000})\nasync def test_fill_memory_gc(async_client: aioredis.Redis):\n    SCRIPT = \"\"\"\n        local res = {{}}\n        for j = 1, 100 do\n          for i = 1, 10000 do\n            table.insert(res, tostring(i) .. 'data')\n          end\n        end\n    \"\"\"\n\n    await asyncio.gather(*(async_client.eval(SCRIPT, 0) for _ in range(5)))\n\n    info = await async_client.info(\"memory\")\n    # if this assert fails, we likely run gc after script invocations, remove this test\n    assert info[\"used_memory_lua\"] > 50 * 1e6\n\n    await async_client.execute_command(\"SCRIPT GC\")\n    info = await async_client.info(\"memory\")\n    assert info[\"used_memory_lua\"] < 10 * 1e6\n\n\n@dfly_args({\"proactor_threads\": 4, \"interpreter_per_thread\": 4, \"lua_mem_gc_threshold\": 100000000})\nasync def test_gc_force_flag(async_client: aioredis.Redis):\n    SCRIPT = \"\"\"\n        local res = {{}}\n        for j = 1, 10 do\n          for i = 1, 1000 do\n            table.insert(res, tostring(i) .. 'data')\n          end\n        end\n    \"\"\"\n    for i in range(0, 1000):\n        await asyncio.gather(*(async_client.eval(SCRIPT, 0) for _ in range(5)))\n\n    info = await async_client.info(\"memory\")\n    assert info[\"used_memory_lua\"] > 1e6\n\n    stats = await async_client.info(\"stats\")\n    assert stats[\"lua_interpreter_return\"] == 5000\n    assert stats[\"lua_force_gc_calls\"] == 0\n    assert stats[\"lua_gc_duration_total_sec\"] == 0\n    assert stats[\"lua_gc_freed_memory_total\"] == 0\n\n    await async_client.execute_command(\"SCRIPT\", \"GC\")\n\n    info = await async_client.info(\"memory\")\n    assert info[\"used_memory_lua\"] < 4 * 1e6\n\n    await async_client.execute_command(\"CONFIG\", \"SET\", \"lua_mem_gc_threshold\", \"1000\")\n\n    for i in range(0, 1000):\n        await asyncio.gather(*(async_client.eval(SCRIPT, 0) for _ in range(5)))\n\n    info = await async_client.info(\"memory\")\n    assert info[\"used_memory_lua\"] < 4 * 1e6\n\n    stats = await async_client.info(\"stats\")\n    assert stats[\"lua_interpreter_return\"] >= 10000\n    assert stats[\"lua_force_gc_calls\"] > 0\n    assert stats[\"lua_gc_duration_total_sec\"] > 0\n    assert stats[\"lua_gc_freed_memory_total\"] > 0\n\n\n@dfly_args({\"proactor_threads\": 1})\n@pytest.mark.asyncio\nasync def test_StackOverflowByHincrbyfloat(df_server: DflyInstance):\n    client = df_server.client()\n\n    await client.execute_command(\"HSET myhash field 1.0\")\n    await client.eval(\"return redis.pcall('HINCRBYFLOAT', KEYS[1], 'field', '1.5')\", 1, \"myhash\")\n    assert \"2.5\" == await client.execute_command(\"HGET myhash field\")\n"
  },
  {
    "path": "tests/dragonfly/generic_test.py",
    "content": "import logging\nimport pytest\nimport redis\nimport asyncio\nfrom redis import asyncio as aioredis\n\nfrom . import dfly_multi_test_args, dfly_args\nfrom .instance import DflyInstance, DflyStartException\nfrom .utility import batch_fill_data, gen_test_data, EnvironCntx\nfrom .seeder import DebugPopulateSeeder\n\n\n@dfly_multi_test_args({\"keys_output_limit\": 512}, {\"keys_output_limit\": 1024})\nclass TestKeys:\n    async def test_max_keys(self, async_client: aioredis.Redis, df_server):\n        max_keys = df_server[\"keys_output_limit\"]\n        pipe = async_client.pipeline()\n        batch_fill_data(pipe, gen_test_data(max_keys * 3))\n        await pipe.execute()\n        keys = await async_client.keys()\n        assert len(keys) in range(max_keys, max_keys + 512)\n\n\n@pytest.fixture(scope=\"function\")\ndef export_dfly_password() -> str:\n    pwd = \"flypwd\"\n    with EnvironCntx(DFLY_requirepass=pwd):\n        yield pwd\n\n\nasync def test_password(df_factory, export_dfly_password):\n    with df_factory.create() as dfly:\n        # Expect password form environment variable\n        with pytest.raises(redis.exceptions.AuthenticationError):\n            async with aioredis.Redis(port=dfly.port) as client:\n                await client.ping()\n        async with aioredis.Redis(password=export_dfly_password, port=dfly.port) as client:\n            await client.ping()\n\n    # --requirepass should take precedence over environment variable\n    requirepass = \"requirepass\"\n    with df_factory.create(requirepass=requirepass) as dfly:\n        # Expect password form flag\n        with pytest.raises(redis.exceptions.AuthenticationError):\n            async with aioredis.Redis(port=dfly.port, password=export_dfly_password) as client:\n                await client.ping()\n        async with aioredis.Redis(password=requirepass, port=dfly.port) as client:\n            await client.ping()\n\n\n\"\"\"\nMake sure that multi-hop transactions can't run OOO.\n\"\"\"\n\nMULTI_HOPS = \"\"\"\nfor i = 0, ARGV[1] do\n  redis.call('INCR', KEYS[1])\nend\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": 1})\nasync def test_txq_ooo(async_client: aioredis.Redis, df_server):\n    async def task1(k, h):\n        c = aioredis.Redis(port=df_server.port)\n        for _ in range(100):\n            await c.eval(MULTI_HOPS, 1, k, h)\n\n    async def task2(k, n):\n        c = aioredis.Redis(port=df_server.port)\n        for _ in range(100):\n            pipe = c.pipeline(transaction=False)\n            pipe.lpush(k, 1)\n            for _ in range(n):\n                pipe.blpop(k, 0.001)\n            await pipe.execute()\n\n    await asyncio.gather(\n        task1(\"i1\", 2), task1(\"i2\", 3), task2(\"l1\", 2), task2(\"l1\", 2), task2(\"l1\", 5)\n    )\n\n\n@dfly_args({\"proactor_threads\": 2, \"num_shards\": 2})\nasync def test_blocking_multiple_dbs(async_client: aioredis.Redis, df_server: DflyInstance):\n    active = True\n\n    # A task to trigger the flow that eventually looses a transaction\n    # blmove is used to trigger a global deadlock, but we could use any\n    # command - the effect would be - a deadlocking locally that connection\n    async def blmove_task_loose(num):\n        async def run(id):\n            c = df_server.client()\n            await c.lpush(f\"key{id}\", \"val\")\n            while active:\n                await c.blmove(f\"key{id}\", f\"key{id}\", 0, \"LEFT\", \"LEFT\")\n                await asyncio.sleep(0.01)\n\n        tasks = []\n        for i in range(num):\n            tasks.append(run(i))\n\n        await asyncio.gather(*tasks)\n\n    # A task that creates continuation_trans_ by constantly timing out on\n    # an empty set. We could probably use any 2-hop operation like rename.\n    async def task_blocking(num):\n        async def block(id):\n            c = df_server.client()\n            while active:\n                await c.blmove(f\"{{{id}}}from\", f\"{{{id}}}to\", 0.1, \"LEFT\", \"LEFT\")\n\n        tasks = []\n        for i in range(num):\n            tasks.append(block(i))\n        await asyncio.gather(*tasks)\n\n    # produce is constantly waking up consumers. It is used to trigger the\n    # flow that creates wake ups on a differrent database in the\n    # middle of continuation transaction.\n    async def tasks_produce(num, iters):\n        LPUSH_SCRIPT = \"\"\"\n            redis.call('LPUSH', KEYS[1], \"val\")\n        \"\"\"\n\n        async def produce(id):\n            c = df_server.client(db=1)  # important to be on a different db\n            for i in range(iters):\n                # Must be a lua script and not multi-exec for some reason.\n                await c.eval(LPUSH_SCRIPT, 1, f\"list{{{id}}}\")\n\n        tasks = []\n        for i in range(num):\n            task = asyncio.create_task(produce(i))\n            tasks.append(task)\n\n        await asyncio.gather(*tasks)\n        logging.info(\"Finished producing\")\n\n    # works with producer to constantly block and wake up\n    async def tasks_consume(num, iters):\n        async def drain(id, iters):\n            client = df_server.client(db=1)\n            for _ in range(iters):\n                await client.blmove(f\"list{{{id}}}\", f\"sink{{{id}}}\", 0, \"LEFT\", \"LEFT\")\n\n        tasks = []\n        for i in range(num):\n            task = asyncio.create_task(drain(i, iters))\n            tasks.append(task)\n\n        await asyncio.gather(*tasks)\n        logging.info(\"Finished consuming\")\n\n    num_keys = 32\n    num_iters = 200\n    async_task1 = asyncio.create_task(blmove_task_loose(num_keys))\n    async_task2 = asyncio.create_task(task_blocking(num_keys))\n    logging.info(\"Starting tasks\")\n    await asyncio.gather(\n        tasks_consume(num_keys, num_iters),\n        tasks_produce(num_keys, num_iters),\n    )\n    logging.info(\"Finishing tasks\")\n    active = False\n    await asyncio.gather(async_task1, async_task2)\n\n\nasync def test_arg_from_environ_overwritten_by_cli(df_factory):\n    with EnvironCntx(DFLY_port=\"6378\"):\n        with df_factory.create(port=6377):\n            client = aioredis.Redis(port=6377)\n            await client.ping()\n\n\nasync def test_arg_from_environ(df_factory):\n    with EnvironCntx(DFLY_requirepass=\"pass\"):\n        with df_factory.create() as dfly:\n            # Expect password from environment variable\n            with pytest.raises(redis.exceptions.AuthenticationError):\n                client = aioredis.Redis(port=dfly.port)\n                await client.ping()\n\n            client = aioredis.Redis(password=\"pass\", port=dfly.port)\n            await client.ping()\n\n\nasync def test_unknown_dfly_env(df_factory, export_dfly_password):\n    with EnvironCntx(DFLY_abcdef=\"xyz\"):\n        dfly = df_factory.create()\n        with pytest.raises(DflyStartException):\n            dfly.start()\n        dfly.set_proc_to_none()\n\n\nasync def test_restricted_commands(df_factory):\n    # Restrict GET and SET, then verify non-admin clients are blocked from\n    # using these commands, though admin clients can use them.\n    with df_factory.create(restricted_commands=\"get,set\", admin_port=1112) as server:\n        async with aioredis.Redis(port=server.port) as client:\n            with pytest.raises(redis.exceptions.ResponseError):\n                await client.get(\"foo\")\n\n            with pytest.raises(redis.exceptions.ResponseError):\n                await client.set(\"foo\", \"bar\")\n\n        async with aioredis.Redis(port=server.admin_port) as admin_client:\n            await admin_client.get(\"foo\")\n            await admin_client.set(\"foo\", \"bar\")\n\n\n@pytest.mark.asyncio\nasync def test_reply_guard_oom(df_factory, df_seeder_factory):\n    master = df_factory.create(\n        proactor_threads=1,\n        cache_mode=\"true\",\n        maxmemory=\"256mb\",\n        enable_heartbeat_eviction=\"false\",\n        rss_oom_deny_ratio=2,\n    )\n    df_factory.start_all([master])\n    c_master = master.client()\n    await c_master.execute_command(\"DEBUG POPULATE 6000 size 40000\")\n\n    seeder = df_seeder_factory.create(\n        port=master.port, keys=5000, val_size=1000, stop_on_failure=False\n    )\n    await seeder.run(target_deviation=0.1)\n\n    info = await c_master.info(\"stats\")\n    assert info[\"evicted_keys\"] > 0, \"Weak testcase: policy based eviction was not triggered.\"\n\n\n@pytest.mark.asyncio\nasync def test_denyoom_commands(df_factory):\n    df_server = df_factory.create(proactor_threads=1, maxmemory=\"256mb\", oom_deny_commands=\"get\")\n    df_server.start()\n    client = df_server.client()\n    await client.execute_command(\"DEBUG POPULATE 7000 size 44000\")\n\n    min_deny = 256 * 1024 * 1024  # 256mb\n    info = await client.info(\"memory\")\n    print(f'Used memory {info[\"used_memory\"]}, rss {info[\"used_memory_rss\"]}')\n    assert info[\"used_memory\"] > min_deny, \"Weak testcase: too little used memory\"\n\n    # reject set due to oom\n    with pytest.raises(redis.exceptions.ResponseError):\n        await client.execute_command(\"set x y\")\n\n    # reject get because it is set in oom_deny_commands\n    with pytest.raises(redis.exceptions.ResponseError):\n        await client.execute_command(\"get x\")\n\n    # mget should not be rejected\n    await client.execute_command(\"mget x\")\n\n\n@pytest.mark.parametrize(\"type\", [\"LIST\", \"HASH\", \"SET\", \"ZSET\", \"STRING\", \"STREAM\"])\n@dfly_args({\"proactor_threads\": 4})\n@pytest.mark.asyncio\nasync def test_rename_huge_values(df_factory, type):\n    df_server = df_factory.create()\n    df_server.start()\n    client = df_server.client()\n\n    logging.debug(f\"Generating huge {type}\")\n    seeder = DebugPopulateSeeder(\n        key_target=1,\n        data_size=10_000_000,\n        collection_size=10_000,\n        variance=1,\n        samples=1,\n        types=[type],\n    )\n    await seeder.run(client)\n    source_data = await DebugPopulateSeeder.capture(client)\n    logging.debug(f\"src {source_data}\")\n\n    # Rename multiple times to make sure the key moves between shards\n    orig_name = (await client.execute_command(\"keys *\"))[0]\n    old_name = orig_name\n    new_name = \"\"\n    for i in range(10):\n        new_name = f\"new:{i}\"\n        await client.execute_command(f\"rename {old_name} {new_name}\")\n        old_name = new_name\n    await client.execute_command(f\"rename {new_name} {orig_name}\")\n    target_data = await DebugPopulateSeeder.capture(client)\n\n    assert source_data == target_data\n\n\n@pytest.mark.asyncio\nasync def test_key_bump_ups(df_factory):\n    master = df_factory.create(\n        proactor_threads=2,\n        cache_mode=\"true\",\n    )\n    df_factory.start_all([master])\n    c_master = master.client()\n\n    await c_master.execute_command(\"DEBUG POPULATE 18000 KEY 32 RAND\")\n\n    info = await c_master.info(\"stats\")\n    assert info[\"bump_ups\"] == 0\n\n    keys = await c_master.execute_command(\"SCAN 0\")\n    keys = keys[1][0:10]\n\n    # Bump keys\n    for key in keys:\n        await c_master.execute_command(\"GET \" + key)\n    info = await c_master.info(\"stats\")\n    assert info[\"bump_ups\"] <= 10\n\n    # Multi get bump\n    await c_master.execute_command(\"MGET \" + \" \".join(keys))\n    info = await c_master.info(\"stats\")\n    assert info[\"bump_ups\"] >= 10 and info[\"bump_ups\"] <= 20\n    last_bump_ups = info[\"bump_ups\"]\n\n    for key in keys:\n        await c_master.execute_command(\"DEL \" + key)\n\n    # DEL should not bump up any key\n    info = await c_master.info(\"stats\")\n    assert last_bump_ups == info[\"bump_ups\"]\n\n    #  Find key that has slot > 0 and bump it\n    while True:\n        keys = await c_master.execute_command(\"SCAN 0\")\n        key = keys[1][0]\n\n        debug_key_info = await c_master.execute_command(\"DEBUG OBJECT \" + key)\n        slot_id = int(dict(map(lambda s: s.split(\":\"), debug_key_info.split()))[\"slot\"])\n        if slot_id == 0:\n            # delete the key and continue\n            await c_master.execute_command(\"DEL \" + key)\n            continue\n\n        await c_master.execute_command(\"GET \" + key)\n        debug_key_info = await c_master.execute_command(\"DEBUG OBJECT \" + key)\n        new_slot_id = int(dict(map(lambda s: s.split(\":\"), debug_key_info.split()))[\"slot\"])\n        assert new_slot_id + 1 == slot_id\n        break\n\n\n@pytest.mark.debug_only\n@pytest.mark.asyncio\nasync def test_command_empty_key(df_factory):\n    df_server = df_factory.create()\n    df_server.start()\n    client = df_server.client()\n    res = await client.lpush(\"\", \"a\")\n    assert res == 1\n    res = await client.execute_command(\"KEYS *\")\n    assert len(res) == 1\n"
  },
  {
    "path": "tests/dragonfly/http_conf_test.py",
    "content": "import aiohttp\nimport json\nfrom . import dfly_args\nfrom .instance import DflyInstance\n\n\ndef get_http_session(*args):\n    if args:\n        return aiohttp.ClientSession(auth=aiohttp.BasicAuth(*args))\n    return aiohttp.ClientSession()\n\n\n@dfly_args({\"proactor_threads\": \"1\", \"requirepass\": \"XXX\"})\nasync def test_password(df_server: DflyInstance):\n    async with get_http_session() as session:\n        resp = await session.get(f\"http://localhost:{df_server.port}/\")\n        assert resp.status == 401\n    async with get_http_session(\"default\", \"wrongpassword\") as session:\n        resp = await session.get(f\"http://localhost:{df_server.port}/\")\n        assert resp.status == 401\n    async with get_http_session(\"default\", \"XXX\") as session:\n        resp = await session.get(f\"http://localhost:{df_server.port}/\")\n        assert resp.status == 200\n\n\n@dfly_args({\"proactor_threads\": \"1\", \"requirepass\": \"XXX\", \"admin_port\": 1113})\nasync def test_skip_metrics(df_server: DflyInstance):\n    async with get_http_session(\"whoops\", \"whoops\") as session:\n        resp = await session.get(f\"http://localhost:{df_server.port}/metrics\")\n        assert resp.status == 200\n    async with get_http_session(\"whoops\", \"whoops\") as session:\n        resp = await session.get(f\"http://localhost:{df_server.admin_port}/metrics\")\n        assert resp.status == 200\n\n\nasync def test_no_password_main_port(df_server: DflyInstance):\n    async with get_http_session(\"default\", \"XXX\") as session:\n        resp = await session.get(f\"http://localhost:{df_server.port}/\")\n        assert resp.status == 200\n    async with get_http_session(\"random\") as session:\n        resp = await session.get(f\"http://localhost:{df_server.port}/\")\n        assert resp.status == 200\n    async with get_http_session() as session:\n        resp = await session.get(f\"http://localhost:{df_server.port}/\")\n        assert resp.status == 200\n\n\n@dfly_args(\n    {\n        \"proactor_threads\": \"1\",\n        \"requirepass\": \"XXX\",\n        \"admin_port\": 1113,\n        \"primary_port_http_enabled\": True,\n        \"admin_nopass\": True,\n    }\n)\nasync def test_no_password_on_admin(df_server: DflyInstance):\n    async with get_http_session(\"default\", \"XXX\") as session:\n        resp = await session.get(f\"http://localhost:{df_server.admin_port}/\")\n        assert resp.status == 200\n    async with get_http_session(\"random\") as session:\n        resp = await session.get(f\"http://localhost:{df_server.admin_port}/\")\n        assert resp.status == 200\n    async with get_http_session() as session:\n        resp = await session.get(f\"http://localhost:{df_server.admin_port}/\")\n        assert resp.status == 200\n\n\n@dfly_args({\"proactor_threads\": \"1\", \"requirepass\": \"XXX\", \"admin_port\": 1113})\nasync def test_password_on_admin(df_server: DflyInstance):\n    async with get_http_session(\"default\", \"badpass\") as session:\n        resp = await session.get(f\"http://localhost:{df_server.admin_port}/\")\n        assert resp.status == 401\n    async with get_http_session() as session:\n        resp = await session.get(f\"http://localhost:{df_server.admin_port}/\")\n        assert resp.status == 401\n    async with get_http_session(\"default\", \"XXX\") as session:\n        resp = await session.get(f\"http://localhost:{df_server.admin_port}/\")\n        assert resp.status == 200\n\n\n@dfly_args({\"proactor_threads\": \"1\", \"expose_http_api\": \"true\"})\nasync def test_no_password_on_http_api(df_server: DflyInstance):\n    async with get_http_session(\"default\", \"XXX\") as session:\n        resp = await session.post(f\"http://localhost:{df_server.port}/api\", json=[\"ping\"])\n        assert resp.status == 200\n    async with get_http_session(\"random\") as session:\n        resp = await session.post(f\"http://localhost:{df_server.port}/api\", json=[\"ping\"])\n        assert resp.status == 200\n    async with get_http_session() as session:\n        resp = await session.post(f\"http://localhost:{df_server.port}/api\", json=[\"ping\"])\n        assert resp.status == 200\n\n\n@dfly_args({\"proactor_threads\": \"1\", \"expose_http_api\": \"true\"})\nasync def test_http_api(df_server: DflyInstance):\n    client = df_server.client()\n    async with get_http_session() as session:\n        body = '[\"set\", \"foo\", \"МайяХилли\", \"ex\", \"100\"]'\n        async with session.post(f\"http://localhost:{df_server.port}/api\", data=body) as resp:\n            assert resp.status == 200\n            text = await resp.text()\n            assert text.strip() == '{\"result\":\"OK\"}'\n\n        body = '[\"get\", \"foo\"]'\n        async with session.post(f\"http://localhost:{df_server.port}/api\", data=body) as resp:\n            assert resp.status == 200\n            text = await resp.text()\n            assert text.strip() == '{\"result\":\"МайяХилли\"}'\n\n        body = '[\"foo\", \"bar\"]'\n        async with session.post(f\"http://localhost:{df_server.port}/api\", data=body) as resp:\n            assert resp.status == 200\n            text = await resp.text()\n            assert text.strip() == '{\"error\": \"unknown command `FOO`\"}'\n\n    assert await client.ttl(\"foo\") > 0\n\n\n@dfly_args({\"proactor_threads\": \"1\", \"expose_http_api\": \"true\", \"requirepass\": \"XXX\"})\nasync def test_password_on_http_api(df_server: DflyInstance):\n    async with get_http_session(\"default\", \"badpass\") as session:\n        resp = await session.post(f\"http://localhost:{df_server.port}/api\", json=[\"ping\"])\n        assert resp.status == 401\n    async with get_http_session() as session:\n        resp = await session.post(f\"http://localhost:{df_server.port}/api\", json=[\"ping\"])\n        assert resp.status == 401\n    async with get_http_session(\"default\", \"XXX\") as session:\n        resp = await session.post(f\"http://localhost:{df_server.port}/api\", json=[\"ping\"])\n        assert resp.status == 200\n\n\ndef get_json_object(json_str):\n    try:\n        json_obj = json.loads(json_str)\n        return json_obj\n    except ValueError:\n        return None\n\n\n@dfly_args({\"proactor_threads\": \"1\", \"expose_http_api\": \"true\", \"slowlog_log_slower_than\": 0})\nasync def test_http_api_json_response(df_server: DflyInstance):\n    client = df_server.client()\n    async with get_http_session() as session:\n        body = '[\"set\", \"foo\",\"bar\"]'\n        async with session.post(f\"http://localhost:{df_server.port}/api\", data=body) as resp:\n            assert resp.status == 200\n            text = await resp.text()\n            json_object = get_json_object(text)\n            assert json_object != None\n            assert json_object == {\"result\": \"OK\"}\n\n        body = '[\"get\", \"foo\"]'\n        async with session.post(f\"http://localhost:{df_server.port}/api\", data=body) as resp:\n            assert resp.status == 200\n            text = await resp.text()\n            json_object = get_json_object(text)\n            assert json_object != None\n            assert json_object == {\"result\": \"bar\"}\n\n        body = '[\"slowlog\", \"get\"]'\n        async with session.post(f\"http://localhost:{df_server.port}/api\", data=body) as resp:\n            assert resp.status == 200\n            text = await resp.text()\n            json_object = get_json_object(text)\n            assert json_object != None\n            # Compare commands\n            assert json_object[\"result\"][0][3] == [\"GET\", \"foo\"]\n            assert json_object[\"result\"][1][3] == [\"SET\", \"foo\", \"bar\"]\n\n        body = '[\"hset\", \"myhash\", \"k1\", \"1\", \"k2\", \"2\"]'\n        async with session.post(f\"http://localhost:{df_server.port}/api\", data=body) as resp:\n            assert resp.status == 200\n            text = await resp.text()\n            json_object = get_json_object(text)\n            assert json_object != None\n            assert json_object == {\"result\": 2}\n\n        body = '[\"hkeys\", \"myhash\"]'\n        async with session.post(f\"http://localhost:{df_server.port}/api\", data=body) as resp:\n            assert resp.status == 200\n            text = await resp.text()\n            json_object = get_json_object(text)\n            assert json_object != None\n            assert json_object[\"result\"] == [\"k1\", \"k2\"]\n"
  },
  {
    "path": "tests/dragonfly/instance.py",
    "content": "import dataclasses\nimport os\nimport threading\nimport time\nimport subprocess\nimport random\nimport aiohttp\nimport logging\nfrom dataclasses import dataclass\nfrom typing import Dict, Optional, List, Union\nimport re\nimport psutil\nimport itertools\nfrom prometheus_client.parser import text_string_to_metric_families\nfrom redis.asyncio import Redis as RedisClient\nfrom redis.asyncio import RedisCluster as RedisCluster\nimport signal\n\n\nSTART_DELAY = 0.8\nSTART_GDB_DELAY = 5.0\n\n\n@dataclass\nclass DflyParams:\n    path: str\n    cwd: str\n    gdb: bool\n    direct_output: bool\n    buffered_out: bool\n    args: Dict[str, Union[str, None]]\n    existing_port: int\n    existing_admin_port: int\n    existing_mc_port: int\n    env: any\n    log_dir: str\n\n\nclass Colors:\n    CLEAR = \"\\\\o33[0m\"\n    COLORS = [f\"\\\\o33[0;{i}m\" for i in range(31, 37)]\n    last_color = -1\n\n    @classmethod\n    def next(clz):\n        clz.last_color = (clz.last_color + 1) % len(clz.COLORS)\n        return clz.COLORS[clz.last_color]\n\n\nclass DflyStartException(Exception):\n    pass\n\n\ndef symbolize_stack_trace(binary_path, lines):\n    addr2line_proc = subprocess.Popen(\n        [\"/usr/bin/addr2line\", \"-fCa\", \"-e\", binary_path], stdin=subprocess.PIPE\n    )\n    for line in lines:\n        addr2line_proc.stdin.write(line.encode())\n\n    addr2line_proc.stdin.close()\n    addr2line_proc.wait()\n\n\ndef read_sedout(pipe, stacktrace):\n    try:\n        seen = set()\n        pattern = r\"@\\s*(0x[0-9a-fA-F]+)\"\n        matcher = re.compile(pattern)\n\n        for line in iter(pipe.readline, b\"\"):\n            # Deduplicate output - we somewhere duplicate the output, probably due\n            # to tty redirections.\n            if line not in seen:\n                seen.add(line)\n                print(line)\n                res = matcher.search(line)\n                if res:\n                    stacktrace.append(res.group(1) + \"\\n\")\n    except ValueError:\n        pass\n    finally:\n        pipe.close()\n\n\nclass DflyInstance:\n    \"\"\"\n    Represents a runnable and stoppable Dragonfly instance\n    with fixed arguments.\n    \"\"\"\n\n    def __init__(self, params: DflyParams, args):\n        self.args = args\n        self.args.update(params.args)\n        self.params = params\n        self.proc: Optional[subprocess.Popen] = None\n        self._client: Optional[RedisClient] = None\n        self.log_files: List[str] = []\n        self.dynamic_port = False\n        self.sed_proc = None\n        self.clients = []\n\n        if self.params.existing_port:\n            self._port = self.params.existing_port\n        elif \"port\" in self.args:\n            self._port = int(self.args[\"port\"])\n        else:\n            # Tell DF to choose a random open port.\n            # We'll find out what port it is using lsof.\n            self.args[\"port\"] = -1\n            self._port = None\n            self.dynamic_port = True\n\n        # Some tests check the log files, so make sure the log files\n        # exist even when people try to debug their test.\n        if \"logtostderr\" in self.args:\n            del self.args[\"logtostderr\"]\n            self.args[\"alsologtostderr\"] = None\n\n        # Run with num_shards = (proactor_threads - 1) if possible, so help expose bugs\n        if \"num_shards\" not in self.args:\n            threads = psutil.cpu_count()\n            if \"proactor_threads\" in self.args:\n                threads = int(self.args[\"proactor_threads\"])\n            if threads > 1:\n                self.args[\"num_shards\"] = threads - 1\n\n    def __del__(self):\n        if self.proc:\n            self.stop()\n        assert self.proc == None\n\n    def client(self, *args, **kwargs) -> RedisClient:\n        host = \"localhost\" if self[\"bind\"] is None else self[\"bind\"]\n        client = RedisClient(host=host, port=self.port, decode_responses=True, *args, **kwargs)\n        self.clients.append(client)\n        return client\n\n    def admin_client(self, *args, **kwargs) -> RedisClient:\n        client = RedisClient(\n            port=self.admin_port,\n            single_connection_client=True,\n            decode_responses=True,\n            *args,\n            **kwargs,\n        )\n        self.clients.append(client)\n        return client\n\n    def cluster_client(self, *args, **kwargs) -> RedisCluster:\n        client = RedisCluster(\n            host=\"localhost\", port=self.port, decode_responses=True, *args, **kwargs\n        )\n        self.clients.append(client)\n        return client\n\n    async def close_clients(self):\n        for client in self.clients:\n            await client.aclose() if hasattr(client, \"aclose\") else await client.close()\n\n    def __enter__(self):\n        self.start()\n        return self\n\n    def __repr__(self):\n        return f\":{self.port}\"\n\n    def __exit__(self, exc_type, exc_value, exc_traceback):\n        self.stop()\n\n    def start(self):\n        if self.params.existing_port:\n            return\n\n        self._start()\n        self._wait_for_server()\n\n    def _wait_for_server(self):\n        if self.params.existing_port:\n            return\n        # Give Dragonfly time to start and detect possible failure causes\n        # Gdb starts slowly\n        delay = START_DELAY if not self.params.gdb else START_GDB_DELAY\n\n        # Wait until the process is listening on the port.\n        s = time.time()\n        while time.time() - s < delay:\n            self._check_status()\n            try:\n                self.get_port_from_psutil()\n                logging.info(\n                    f\"Process {self.proc.pid} started after {time.time() - s:.2f} seconds. port={self.port}\"\n                )\n                break\n            except RuntimeError:\n                time.sleep(0.05)\n        else:\n            raise DflyStartException(\"Process didn't start listening on port in time\")\n\n        self.log_files = self.get_logs_from_psutil()\n\n        # Remove first 6 lines - our default header with log locations (as it carries no useful information)\n        # Next, replace log-level + date with port and colored arrow\n        sed_format = f\"1,6d;s/[^ ]*/{self.port}{Colors.next()}➜{Colors.CLEAR}/\"\n        sed_cmd = [\"sed\", \"-u\", \"-e\", sed_format]\n        if self.params.buffered_out:\n            sed_cmd.remove(\"-u\")\n        if not self.params.direct_output:\n            self.sed_proc = subprocess.Popen(\n                sed_cmd,\n                stdin=self.proc.stdout,\n                stdout=subprocess.PIPE,\n                bufsize=1,\n                universal_newlines=True,\n            )\n            self.stacktrace = []\n            self.sed_thread = threading.Thread(\n                target=read_sedout, args=(self.sed_proc.stdout, self.stacktrace), daemon=True\n            )\n            self.sed_thread.start()\n\n    def set_proc_to_none(self):\n        self.proc = None\n\n    def stop(self, kill=False):\n        proc, self.proc = self.proc, None\n        if proc is None:\n            return\n\n        logging.debug(f\"Stopping instance on {self._port}\")\n        try:\n            if kill:\n                proc.kill()\n            else:\n                proc.terminate()\n                proc.communicate(timeout=120)\n                # if the return code is 0 it means normal termination\n                # if the return code is negative it means termination by signal\n                # if the return code is positive it means abnormal exit\n                if proc.returncode != 0:\n                    raise Exception(\n                        f\"Dragonfly did not terminate gracefully, exit code {proc.returncode}, \"\n                        f\"pid: {proc.pid}\"\n                    )\n\n        except subprocess.TimeoutExpired:\n            # We need to send SIGUSR1 to DF such that it prints the stacktrace\n            proc.send_signal(signal.SIGUSR1)\n            # Then we sleep for 5 seconds such that DF has enough time to print the stacktraces\n            # We can't really synchronize here because SIGTERM and SIGKILL do not block even if\n            # sigaction explicitly blocks other incoming signals until it handles SIGUSR1.\n            # Even worse, on SIGTERM and SIGKILL none of the handlers registered via sigaction\n            # are guranteed to run\n            time.sleep(5)\n            logging.debug(f\"Unable to kill the process on port {self._port}\")\n            logging.debug(f\"INFO LOGS of DF are:\")\n            self.print_info_logs_to_debug_log()\n            proc.kill()\n            proc.communicate()\n            raise Exception(\"Unable to terminate DragonflyDB gracefully, it was killed\")\n        finally:\n            if self.sed_proc:\n                self.sed_proc.communicate()\n                self.sed_thread.join()\n                symbolize_stack_trace(proc.args[0], self.stacktrace)\n\n    def _start(self):\n        if self.params.existing_port:\n            return\n\n        if self.dynamic_port:\n            self._port = None\n\n        all_args = self.format_args(self.args)\n        real_path = os.path.realpath(self.params.path)\n\n        run_cmd = [self.params.path, *all_args]\n        if self.params.gdb:\n            run_cmd = [\"gdb\", \"--ex\", \"r\", \"--args\"] + run_cmd\n\n        self.proc = subprocess.Popen(\n            run_cmd,\n            cwd=self.params.cwd,\n            stdout=None if self.params.direct_output else subprocess.PIPE,\n            stderr=subprocess.STDOUT,\n        )\n        logging.info(f\"Starting {real_path} {' '.join(all_args)}, pid {self.proc.pid}\")\n\n    def _check_status(self):\n        if not self.params.existing_port:\n            return_code = self.proc.poll()\n            if return_code is not None:\n                # log stdout of the failed process\n                logging.error(\"Dragonfly process error:\\n%s\", self.proc.stdout.read().decode())\n                self.proc = None\n                raise DflyStartException(f\"Failed to start instance, return code {return_code}\")\n\n    def __getitem__(self, k):\n        return self.args.get(k)\n\n    @property\n    def port(self) -> int:\n        if self._port is None:\n            self._port = self.get_port_from_psutil()\n        return self._port\n\n    @property\n    def admin_port(self) -> Optional[int]:\n        if self.params.existing_admin_port:\n            return self.params.existing_admin_port\n        if \"admin_port\" in self.args:\n            return int(self.args[\"admin_port\"])\n        return None\n\n    @property\n    def mc_port(self) -> Optional[int]:\n        if self.params.existing_mc_port:\n            return self.params.existing_mc_port\n        if \"memcached_port\" in self.args:\n            return int(self.args[\"memcached_port\"])\n        return None\n\n    def get_port_from_psutil(self) -> int:\n        if self.proc is None:\n            raise RuntimeError(\"port is not available yet\")\n        p = psutil.Process(self.proc.pid)\n\n        # If running with gdb, look for port on child\n        children = p.children()\n        if len(children) == 1 and children[0].name() == \"dragonfly\":\n            p = children[0]\n\n        ports = set()\n        try:\n            for connection in p.connections():\n                if connection.status == \"LISTEN\":\n                    ports.add(connection.laddr.port)\n        except psutil.AccessDenied:\n            raise RuntimeError(\"Access denied\")\n\n        ports.difference_update({self.admin_port, self.mc_port})\n        assert len(ports) < 2, \"Open ports detection found too many ports\"\n        if ports:\n            return ports.pop()\n        raise RuntimeError(\"Couldn't parse port\")\n\n    def get_logs_from_psutil(self) -> List[str]:\n        p = psutil.Process(self.proc.pid)\n        rv = []\n        for file in p.open_files():\n            if \".log.\" in file.path and \"dragonfly\" in file.path:\n                rv.append(file.path)\n        return rv\n\n    def print_info_logs_to_debug_log(self):\n        logs = self.log_files\n        sed_format = f\"s/[^ ]*/{self.port}{Colors.next()}➜{Colors.CLEAR}/\"\n        sed_cmd = [\"sed\", \"-e\", sed_format]\n        for log in logs:\n            if \"INFO\" in log:\n                with open(log) as file:\n                    print(f\"🪵🪵🪵🪵🪵🪵 LOG name {log} 🪵🪵🪵🪵🪵🪵\")\n                    subprocess.call(sed_cmd, stdin=file)\n\n    @staticmethod\n    def format_args(args):\n        out = []\n        for k, v in args.items():\n            if v is not None:\n                out.append(f\"--{k}={v}\")\n            else:\n                out.append(f\"--{k}\")\n        return out\n\n    async def metrics(self):\n        session = aiohttp.ClientSession()\n        resp = await session.get(f\"http://localhost:{self.port}/metrics\")\n        data = await resp.text(encoding=\"utf-8\")\n        await session.close()\n        return {\n            metric_family.name: metric_family\n            for metric_family in text_string_to_metric_families(data)\n        }\n\n    def find_in_logs(self, pattern):\n        if self.proc is not None:\n            raise RuntimeError(\"Must close server first\")\n\n        results = []\n        matcher = re.compile(pattern)\n        for path in self.log_files:\n            for line in open(path):\n                if matcher.search(line):\n                    results.append(line)\n        return results\n\n    @property\n    def rss(self):\n        if self.proc is None:\n            return 0\n        process = psutil.Process(self.proc.pid)\n        mem_info = process.memory_info()\n        return mem_info.rss\n\n    def has_arg(self, arg):\n        return arg in self.args\n\n\nclass DflyInstanceFactory:\n    \"\"\"\n    A factory for creating dragonfly instances with pre-supplied arguments.\n    \"\"\"\n\n    def __init__(self, params: DflyParams, args):\n        self.args = args\n        self.params = params\n        self.instances = []\n\n    def create(self, existing_port=None, path=None, version=100, **kwargs) -> DflyInstance:\n        args = {**self.args, **kwargs}\n        args.setdefault(\"dbfilename\", \"\")\n        args.setdefault(\"noversion_check\", None)\n        # MacOs does not set it automatically, so we need to set it manually\n        args.setdefault(\"maxmemory\", \"8G\")\n        vmod = \"dragonfly_connection=1,db_slice=1,listener_interface=1,main_service=1,rdb_save=1,replica=1,cluster_family=1,engine_shard=1,dflycmd=1,snapshot=1,streamer=1\"\n        args.setdefault(\"vmodule\", vmod)\n        args.setdefault(\"jsonpathv2\")\n        if version > 1.27:\n            args.setdefault(\"omit_basic_usage\")\n\n        if version > 1.31:\n            args.setdefault(\"latency_tracking\")\n\n        args.setdefault(\"log_dir\", self.params.log_dir)\n\n        if version >= 1.21 and \"serialization_max_chunk_size\" not in args:\n            args.setdefault(\"serialization_max_chunk_size\", 300000)\n\n        if version > 1.36:\n            args.setdefault(\"serialize_hnsw_index\", \"true\")\n            args.setdefault(\"deserialize_hnsw_index\", \"true\")\n\n        if version >= 1.26:\n            args.setdefault(\"fiber_safety_margin=4096\")\n\n        # When a custom S3 endpoint is configured (e.g. MinIO), pass it to Dragonfly\n        s3_endpoint = os.environ.get(\"MINIO_S3_ENDPOINT\")\n        if s3_endpoint:\n            from urllib.parse import urlparse\n\n            # Normalize scheme-less values (e.g. \"localhost:9000\") so urlparse\n            # correctly populates hostname/port instead of treating it as a path.\n            to_parse = s3_endpoint if \"://\" in s3_endpoint else \"http://\" + s3_endpoint\n            parsed = urlparse(to_parse)\n            endpoint_host = parsed.hostname or \"\"\n            if parsed.port:\n                endpoint_host = f\"{endpoint_host}:{parsed.port}\"\n            if endpoint_host:\n                args.setdefault(\"s3_endpoint\", endpoint_host)\n                args.setdefault(\"s3_use_https\", \"false\" if parsed.scheme == \"http\" else \"true\")\n\n        for k, v in args.items():\n            args[k] = v.format(**self.params.env) if isinstance(v, str) else v\n\n        if existing_port is not None:\n            params = dataclasses.replace(self.params, existing_port=existing_port)\n        else:\n            params = self.params\n\n        if path is not None:\n            params = dataclasses.replace(self.params, path=path)\n\n        if version < 1.35:\n            params.args.pop(\"experimental_io_loop_v2\", None)\n\n        instance = DflyInstance(params, args)\n        self.instances.append(instance)\n        return instance\n\n    def start_all(self, instances: List[DflyInstance]):\n        \"\"\"Start multiple instances in parallel\"\"\"\n        for instance in instances:\n            instance._start()\n\n        for instance in instances:\n            instance._wait_for_server()\n\n    async def stop_all(self):\n        \"\"\"Stop all launched instances.\"\"\"\n        exceptions = []  # To collect exceptions\n        for instance in self.instances:\n            try:  # ioloop might be no longer running\n                await instance.close_clients()\n            except Exception as e:\n                pass\n\n            try:\n                instance.stop()\n            except Exception as e:\n                exceptions.append(e)  # Collect the exception\n        if exceptions:\n            first_exception = exceptions[0]\n            raise Exception(\n                f\"One or more errors occurred while stopping instances. \"\n                f\"First exception: {first_exception}\"\n            ) from first_exception\n\n    def __repr__(self) -> str:\n        return f\"Factory({self.args})\"\n\n\nclass RedisServer:\n    def __init__(self, port):\n        self.port = port\n        self.proc = None\n\n    def start(self, redis7=None, **kwargs):\n        servers = [\"redis-server-7.2.2\"]\n        if not redis7:\n            servers += [\"redis-server-6.2.11\", \"valkey-server-8.0.1\"]\n        command = [\n            random.choice(servers),\n            f\"--port {self.port}\",\n            \"--save ''\",\n            \"--appendonly no\",\n            \"--protected-mode no\",\n            \"--repl-diskless-sync yes\",\n            \"--repl-diskless-sync-delay 0\",\n        ]\n        # Convert kwargs to command-line arguments\n        for key, value in kwargs.items():\n            if value is None:\n                command.append(f\"--{key}\")\n            else:\n                command.append(f\"--{key} {value}\")\n\n        self.proc = subprocess.Popen(command)\n        logging.debug(self.proc.args)\n\n    def stop(self):\n        self.proc.terminate()\n        try:\n            self.proc.wait(timeout=10)\n        except Exception as e:\n            pass\n"
  },
  {
    "path": "tests/dragonfly/json_test.py",
    "content": "import pytest\nimport redis\nfrom redis import asyncio as aioredis\nfrom .utility import *\nfrom json import JSONDecoder, JSONEncoder, dumps\n\njane = {\"name\": \"Jane\", \"Age\": 33, \"Location\": \"Chawton\"}\n\njson_num = {\"a\": {\"a\": 1, \"b\": 2, \"c\": 3}}\n\n\nasync def get_set_json(connection: aioredis.Redis, key, value, path=\"$\"):\n    encoder = JSONEncoder()\n    await connection.execute_command(\"json.set\", key, path, encoder.encode(value))\n    result = await connection.execute_command(\"json.get\", key, path)\n    decoder = JSONDecoder()\n    return decoder.decode(result)\n\n\nasync def test_basic_json_get_set(async_client: aioredis.Redis):\n    key_name = \"test-json-key\"\n    result = await get_set_json(connection=async_client, key=key_name, value=jane)\n    assert result, \"failed to set JSON value\"\n    the_type = await async_client.type(key_name)\n    assert the_type == \"ReJSON-RL\"\n    assert len(result) == 1\n    assert result[0][\"name\"] == \"Jane\"\n    assert result[0][\"Age\"] == 33\n\n\nasync def test_access_json_value_as_string(async_client: aioredis.Redis):\n    key_name = \"test-json-key\"\n    result = await get_set_json(async_client, key_name, value=jane)\n    assert result is not None, \"failed to set JSON value\"\n    # make sure that we have valid JSON here\n    the_type = await async_client.type(key_name)\n    assert the_type == \"ReJSON-RL\"\n    # you cannot access this key as string\n    with pytest.raises(redis.exceptions.ResponseError) as e:\n        result = await async_client.get(key_name)\n\n    assert e.value.args[0] == \"WRONGTYPE Operation against a key holding the wrong kind of value\"\n\n\nasync def test_reset_key_to_string(async_client: aioredis.Redis):\n    key_name = \"test-json-key\"\n    result = await get_set_json(async_client, key=key_name, value=jane)\n    assert result is not None, \"failed to set JSON value\"\n    # make sure that we have valid JSON here\n    the_type = await async_client.type(key_name)\n    assert the_type == \"ReJSON-RL\"\n\n    # set the key to be string - this is legal\n    await async_client.set(key_name, \"some random value\")\n    result = await async_client.get(key_name)\n    assert result == \"some random value\"\n\n    # For JSON set the update the root path, we are allowing\n    # to change the type to JSON and override it\n    result = await get_set_json(async_client, key=key_name, value=jane)\n    the_type = await async_client.type(key_name)\n    assert the_type == \"ReJSON-RL\"\n\n\nasync def test_update_value(async_client: aioredis.Redis):\n    key_name = \"test-json-key\"\n    result = await get_set_json(async_client, key=key_name, value=json_num)\n    assert result is not None, \"failed to set JSON value\"\n    # make sure that we have valid JSON here\n    the_type = await async_client.type(key_name)\n    assert the_type == \"ReJSON-RL\"\n    result = await get_set_json(async_client, value=\"0\", key=key_name, path=\"$.a.*\")\n    assert len(result) == 3\n    # make sure that all the values under 'a' where set to 0\n    assert result == [\"0\", \"0\", \"0\"]\n\n    # Ensure that after we're changing this into STRING type, it will no longer work\n    await async_client.set(key_name, \"some random value\")\n    assert await async_client.type(key_name) == \"string\"\n    with pytest.raises(redis.exceptions.ResponseError) as e:\n        await get_set_json(async_client, value=\"0\", key=key_name, path=\"$.a.*\")\n\n    assert e.value.args[0] == \"WRONGTYPE Operation against a key holding the wrong kind of value\"\n    assert await async_client.type(key_name) == \"string\"\n\n\n@pytest.mark.parametrize(\n    \"description,expected_value,expected_type\",\n    (\n        (\"array\", \"[]\", \"array\"),\n        (\"string\", dumps(\"dragonfly\"), \"string\"),\n        (\"number\", dumps(3.50), \"number\"),\n        (\"object\", dumps({\"dragon\": \"fly\"}, separators=(\",\", \":\")), \"object\"),\n        (\"boolean true\", \"true\", \"boolean\"),\n        (\"boolean false\", \"false\", \"boolean\"),\n    ),\n)\n@pytest.mark.asyncio\nasync def test_arrappend(async_client: aioredis.Redis, description, expected_value, expected_type):\n    key_name = \"test-json-key\"\n\n    await async_client.execute_command(\"json.set\", key_name, \"$\", \"[]\")\n    await async_client.execute_command(\"json.arrappend\", key_name, \"$\", expected_value)\n\n    # make sure the value is as expected\n    first_element = await async_client.execute_command(\"json.get\", key_name, \"$[0]\")\n    assert first_element == \"[{}]\".format(expected_value)\n\n    # make sure the type is as expected\n    actual_type = await async_client.execute_command(\"json.type\", key_name, \"$[0]\")\n    assert actual_type[0] == expected_type\n"
  },
  {
    "path": "tests/dragonfly/list_family_test.py",
    "content": "import asyncio\nfrom redis import asyncio as aioredis\n\nimport pytest\n\n\n@pytest.mark.parametrize(\"index\", range(50))\nclass TestBlPop:\n    async def async_blpop(client: aioredis.Redis):\n        return await client.blpop([\"list1{t}\", \"list2{t}\", \"list2{t}\", \"list1{t}\"], 0.5)\n\n    async def blpop_mult_keys(async_client: aioredis.Redis, key: str, val: str):\n        task = asyncio.create_task(TestBlPop.async_blpop(async_client))\n        await async_client.lpush(key, val)\n        result = await asyncio.wait_for(task, 3)\n        assert result[1] == val\n        watched = await async_client.execute_command(\"DEBUG WATCHED\")\n        assert watched == [\"awaked\", [], \"watched\", []]\n\n    async def test_blpop_multiple_keys(self, async_client: aioredis.Redis, index):\n        await TestBlPop.blpop_mult_keys(async_client, \"list1{t}\", \"a\")\n        await TestBlPop.blpop_mult_keys(async_client, \"list2{t}\", \"b\")\n"
  },
  {
    "path": "tests/dragonfly/management_test.py",
    "content": "import pytest\nimport asyncio\nfrom redis import asyncio as aioredis\nfrom redis.exceptions import ResponseError\n\n\n@pytest.mark.asyncio\nasync def test_config_cmd(async_client: aioredis.Redis):\n    with pytest.raises(ResponseError):\n        await async_client.config_set(\"foo\", \"bar\")\n    await async_client.config_set(\"requirepass\", \"foobar\") == \"OK\"\n    res = await async_client.config_get(\"*\")\n    assert len(res) > 0\n    assert res[\"requirepass\"] == \"foobar\"\n"
  },
  {
    "path": "tests/dragonfly/memcache_meta.py",
    "content": "import pytest\nfrom .instance import DflyInstance\nfrom . import dfly_args\nfrom meta_memcache import (\n    Key,\n    ServerAddress,\n    CacheClient,\n    connection_pool_factory_builder,\n)\nfrom meta_memcache.protocol import RequestFlags, Success\n\nDEFAULT_ARGS = {\"memcached_port\": 11211, \"proactor_threads\": 4}\n\n\n@pytest.fixture(scope=\"function\")\ndef meta_client(df_server: DflyInstance):\n    result = CacheClient.cache_client_from_servers(\n        servers=[\n            ServerAddress(host=\"localhost\", port=DEFAULT_ARGS.get(\"memcached_port\")),\n        ],\n        connection_pool_factory_fn=connection_pool_factory_builder(recv_timeout=5),\n    )\n    yield result\n\n\n@dfly_args(DEFAULT_ARGS)\nclass TestMetaMode:\n    def test_basic(self, meta_client: CacheClient):\n        pool = meta_client\n\n        assert pool.set(\"key1\", \"value1\", 100)\n        assert pool.set(\"key1\", \"value2\", 0)\n        assert pool.get(\"key1\") == \"value2\"\n\n        request_flags = RequestFlags(return_value=False)\n        response = pool.meta_get(Key(\"key1\"), flags=request_flags)\n        assert isinstance(response, Success)\n        assert pool.get(\"key2\") is None\n        assert pool.delete(\"key1\")\n        assert pool.delete(\"key1\") is False\n\n        assert pool.set(\"cask\", \"v\", 100)\n        value, cas_token = pool.get_cas(\"cask\")\n        assert value == \"v\" and cas_token == 0\n\n        k = Key(\"cask\")\n        response = pool.meta_multiget([k], RequestFlags(return_cas_token=True, return_value=True))\n        assert k in response\n        assert response[k].flags.cas_token == 0 and response[k].value == \"v\"\n\n    def test_gat(self, meta_client: CacheClient):\n        resp = meta_client.meta_set(\n            Key(\"k1\"), \"value1\", None, RequestFlags(return_ttl=True, cache_ttl=5)\n        )\n        assert isinstance(resp, Success)\n        val = meta_client.meta_get(Key(\"k1\"), RequestFlags(cache_ttl=15, return_ttl=True))\n\n        # Note the correct behavior is to return previous TTL before it was updated by GAT,\n        # but Dragonfly currently returns the updated TTL.\n        assert val.flags.ttl == 15  # returns updated ttl\n"
  },
  {
    "path": "tests/dragonfly/memory_test.py",
    "content": "import asyncio\nimport logging\nimport random\nimport string\nimport time\n\nimport pytest\nimport redis\n\nfrom . import dfly_args\nfrom .instance import DflyInstanceFactory\nfrom .utility import tmp_file_name\n\n\n@pytest.mark.large\n@pytest.mark.opt_only\n@pytest.mark.parametrize(\n    \"type, keys, val_size, elements\",\n    [\n        (\"JSON\", 200_000, 100, 100),\n        (\"SET\", 280_000, 100, 100),\n        (\"HASH\", 250_000, 100, 100),\n        (\"ZSET\", 250_000, 100, 100),\n        (\"LIST\", 300_000, 100, 100),\n        (\"STRING\", 3_500_000, 1000, 1),\n        (\"STREAM\", 280_000, 100, 100),\n    ],\n)\n# We limit to 5gb just in case to sanity check the gh runner. Otherwise, if we ask for too much\n# memory it might force the gh runner to run out of memory (since OOM killer might not even\n# get a chance to run).\nasync def test_rss_used_mem_gap(df_factory: DflyInstanceFactory, type, keys, val_size, elements):\n    dbfilename = f\"dump_{tmp_file_name()}\"\n    instance = df_factory.create(\n        proactor_threads=2,\n        maxmemory=\"5gb\",\n        dbfilename=dbfilename,\n        compression_mode=0,\n        serialization_max_chunk_size=8192,\n        num_shards=2,\n    )\n    instance.start()\n    # Create a Dragonfly and fill it up with `type` until it reaches `min_rss`, then make sure that\n    # the gap between used_memory and rss is no more than `max_unaccounted_ratio`.\n    min_rss = 3 * 1024 * 1024 * 1024  # 3gb\n    max_unaccounted = 200 * 1024 * 1024  # 200mb\n    if type == \"JSON\":\n        # For json data type, the interned string pool stores data on the default heap, not mimalloc.\n        max_unaccounted *= 2\n\n    # There is a big rss spike when this test is ran in one the gh runners (not the self hosted)\n    # and it fails. This rss spike is not observed locally or on our self host runner so\n    # this adjustment is mostly for CI\n    if type == \"STREAM\":\n        max_unaccounted = max_unaccounted * 3\n\n    client = instance.client()\n    await asyncio.sleep(1)  # Wait for another RSS heartbeat update in Dragonfly\n\n    cmd = f\"DEBUG POPULATE {keys} k {val_size} RAND TYPE {type} ELEMENTS {elements}\"\n    logging.info(f\"Running {cmd}\")\n    await client.execute_command(cmd)\n\n    await asyncio.sleep(2)  # Wait for another RSS heartbeat update in Dragonfly\n\n    async def check_memory():\n        info = await client.info(\"memory\")\n        logging.info(f'Used memory {info[\"used_memory\"]}, rss {info[\"used_memory_rss\"]}')\n        assert info[\"used_memory\"] > min_rss, \"Weak testcase: too little used memory\"\n        delta = info[\"used_memory_rss\"] - info[\"used_memory\"]\n        # It could be the case that the machine is configured to use swap if this assertion fails\n        assert delta > 0, info\n        assert delta < max_unaccounted, info\n\n        if type != \"STRING\" and type != \"JSON\":\n            # STRINGs keep some of the data inline, so not all of it is accounted in object_used_memory\n            # We have a very small over-accounting bug in JSON\n            assert info[\"object_used_memory\"] > keys * elements * val_size\n            assert info[\"used_memory\"] > info[\"object_used_memory\"]\n\n    await check_memory()\n\n    assert await client.execute_command(\"SAVE\", \"DF\") == True\n    assert await client.execute_command(\"DFLY\", \"LOAD\", f\"{dbfilename}-summary.dfs\") == \"OK\"\n\n    await check_memory()\n\n    # FLUSHALL sync waits for flush to finish and decommit memory, so send INFO immediately after\n    p = client.pipeline(transaction=False)\n    p.execute_command(\"FLUSHALL\", \"SYNC\")  # flushall(asynchronous=False) will just issue FLUSHALL$\n    p.info(\"memory\")\n\n    info = (await p.execute())[-1]\n    assert info[\"used_memory\"] < 4 * 1_000_000  # Table memory\n    assert info[\"used_memory_rss\"] < min_rss / 10  # RSS must have been freed\n\n\n@pytest.mark.asyncio\n@dfly_args(\n    {\n        \"maxmemory\": \"512mb\",\n        \"proactor_threads\": 2,\n        \"rss_oom_deny_ratio\": 0.5,\n    }\n)\n@pytest.mark.parametrize(\"admin_port\", [0, 1112])\nasync def test_rss_oom_ratio(df_factory: DflyInstanceFactory, admin_port):\n    \"\"\"\n    Test dragonfly rejects denyoom commands and new connections when rss memory is above maxmemory*rss_oom_deny_ratio\n    Test dragonfly does not rejects when rss memory goes below threshold\n    \"\"\"\n    df_server = df_factory.create(admin_port=admin_port)\n    df_server.start()\n\n    client = df_server.client()\n    await client.execute_command(\"DEBUG POPULATE 10000 key 40000 RAND\")\n\n    await asyncio.sleep(1)  # Wait for another RSS heartbeat update in Dragonfly\n\n    new_client = df_server.admin_client() if admin_port else df_server.client()\n    await new_client.ping()\n\n    info = await new_client.info(\"memory\")\n    logging.debug(f'Used memory {info[\"used_memory\"]}, rss {info[\"used_memory_rss\"]}')\n\n    reject_limit = 256 * 1024 * 1024  # 256mb\n    assert info[\"used_memory_rss\"] > reject_limit\n\n    # get command from existing connection should not be rejected\n    await client.execute_command(\"get x\")\n\n    # reject set due to oom\n    with pytest.raises(redis.exceptions.ResponseError):\n        await client.execute_command(\"set x y\")\n\n    if admin_port:\n        # new client create should also fail if admin port was set\n        client = df_server.client()\n        with pytest.raises(redis.exceptions.ConnectionError):\n            await client.ping()\n\n    # flush to free memory\n    await new_client.flushall()\n\n    await asyncio.sleep(2)  # Wait for another RSS heartbeat update in Dragonfly\n\n    info = await new_client.info(\"memory\")\n    logging.debug(f'Used memory {info[\"used_memory\"]}, rss {info[\"used_memory_rss\"]}')\n    assert info[\"used_memory_rss\"] < reject_limit\n\n    # new client create shoud not fail after memory usage decrease\n    client = df_server.client()\n    await client.execute_command(\"set x y\")\n\n\n@pytest.mark.large\n@pytest.mark.asyncio\n@dfly_args(\n    {\n        \"maxmemory\": \"512mb\",\n        \"proactor_threads\": 1,\n    }\n)\nasync def test_eval_with_oom(df_factory: DflyInstanceFactory):\n    \"\"\"\n    Test running eval commands when dragonfly returns OOM on write commands and check rss memory\n    This test was writen after detecting memory leak in script runs on OOM state\n    \"\"\"\n    df_server = df_factory.create()\n    df_server.start()\n\n    client = df_server.client()\n    await client.execute_command(\"DEBUG POPULATE 20000 key 40000 RAND\")\n\n    await asyncio.sleep(1)  # Wait for another RSS heartbeat update in Dragonfly\n\n    info = await client.info(\"memory\")\n    logging.debug(f'Used memory {info[\"used_memory\"]}, rss {info[\"used_memory_rss\"]}')\n\n    reject_limit = 512 * 1024 * 1024  # 256mb\n    assert info[\"used_memory\"] > reject_limit\n    rss_before_eval = info[\"used_memory_rss\"]\n\n    pipe = client.pipeline(transaction=False)\n    MSET_SCRIPT = \"\"\"\n        redis.call('MSET', KEYS[1], ARGV[1], KEYS[2], ARGV[2])\n    \"\"\"\n\n    for _ in range(20):\n        for _ in range(8000):\n            pipe.eval(MSET_SCRIPT, 2, \"x1\", \"y1\", \"x2\", \"y2\")\n        # reject mset due to oom\n        with pytest.raises(redis.exceptions.ResponseError):\n            await pipe.execute()\n\n    await asyncio.sleep(1)  # Wait for another RSS heartbeat update in Dragonfly\n\n    info = await client.info(\"memory\")\n    logging.debug(f'Used memory {info[\"used_memory\"]}, rss {info[\"used_memory_rss\"]}')\n    assert rss_before_eval * 1.01 > info[\"used_memory_rss\"]\n\n\n@pytest.mark.parametrize(\"heartbeat_rss_eviction\", [True, False])\nasync def test_eviction_on_rss_treshold(df_factory: DflyInstanceFactory, heartbeat_rss_eviction):\n    max_memory = 1024 * 1024**2  # 10242mb\n\n    df_server = df_factory.create(\n        proactor_threads=3,\n        cache_mode=\"yes\",\n        maxmemory=max_memory,\n        enable_heartbeat_eviction=\"false\",\n        enable_heartbeat_rss_eviction=heartbeat_rss_eviction,\n    )\n    df_server.start()\n    client = df_server.client()\n\n    data_fill_size = int(0.70 * max_memory)  # 70% of max_memory\n\n    val_size = 1024 * 5  # 5 kb\n    num_keys = data_fill_size // val_size\n\n    await client.execute_command(\"DEBUG\", \"POPULATE\", num_keys, \"key\", val_size)\n\n    # Create huge list which can be used with LRANGE to increase RSS memory only\n    for name in [\"list_1\", \"list_2\"]:\n        for i in range(1, 1000):\n            rand_str = \"\".join(random.choices(string.ascii_letters, k=val_size))\n            await client.execute_command(f\"LPUSH {name} {rand_str}\")\n\n    # Make them STICK so we don't evict them\n    await client.execute_command(f\"STICK list_1\")\n    await client.execute_command(f\"STICK list_2\")\n\n    await client.execute_command(\"CONFIG SET enable_heartbeat_eviction true\")\n\n    memory_info_before = await client.info(\"memory\")\n\n    # This will increase only RSS memory above treshold\n    p = client.pipeline()\n    for _ in range(50):\n        p.execute_command(\"LRANGE list_1 0 -1\")\n        p.execute_command(\"LRANGE list_2 0 -1\")\n    await p.execute()\n\n    # Wait for some time\n    await asyncio.sleep(3)\n    memory_info_after = await client.info(\"memory\")\n    stats_info_after = await client.info(\"stats\")\n\n    if heartbeat_rss_eviction:\n        # We should see used memory deacrease and number of some number of evicted keys\n        assert memory_info_after[\"used_memory\"] < memory_info_before[\"used_memory\"]\n        assert stats_info_after[\"evicted_keys\"]\n    else:\n        # If heartbeat rss eviction is disabled there should be no chage\n        assert memory_info_after[\"used_memory\"] == memory_info_before[\"used_memory\"]\n        assert stats_info_after[\"evicted_keys\"] == 0\n\n\n# Github issue #5891\nasync def test_no_rss_eviction_overflow_on_expired_keys(df_factory: DflyInstanceFactory):\n    max_memory = 256 * 1024**2  # 256MB\n    df_server = df_factory.create(\n        proactor_threads=1, cache_mode=\"yes\", maxmemory=max_memory, vmodule=\"engine_shard=2\"\n    )\n    df_server.start()\n    client = df_server.client()\n\n    data_fill_size = int(0.20 * max_memory)  # 20% of max_memory\n\n    val_size = 1024 * 50  # 50 kb for key\n    num_keys = data_fill_size // val_size\n\n    for i in range(0, 5):\n        pipe = client.pipeline(transaction=False)\n        step_keys = num_keys + i * 10\n        await pipe.execute_command(\"DEBUG\", \"POPULATE\", step_keys, \"key_1\", val_size)\n        await pipe.execute_command(\"DEBUG\", \"POPULATE\", step_keys + i * 10, \"key_2\", val_size)\n        for i in range(step_keys):\n            if i % 2 == 0:\n                await pipe.execute_command(f\"EXPIRE key_1:{i} 1\")\n            else:\n                await pipe.execute_command(f\"EXPIRE key_2:{i} 1\")\n        await pipe.execute()\n        await asyncio.sleep(2)\n\n    await client.execute_command(\"FLUSHALL\")\n\n    # New keys should be added\n    await client.execute_command(\"DEBUG\", \"POPULATE\", num_keys, \"key\", val_size)\n    # Wait so heartbeat eviction\n    await asyncio.sleep(5)\n\n    keyspace_info = await client.info(\"keyspace\")\n    assert keyspace_info[\"db0\"][\"keys\"] == num_keys\n\n\n@pytest.mark.asyncio\nasync def test_throttle_on_commands_squashing_replies_bytes(df_factory: DflyInstanceFactory):\n    df = df_factory.create(\n        proactor_threads=2,\n        squashed_reply_size_limit=100_000_000,\n        vmodule=\"dragonfly_connection=5\",\n    )\n    df.start()\n\n    client = df.client()\n    # 100mb\n    await client.execute_command(\"debug populate 64 test 3125 rand type hash elements 500\")\n\n    async def poll():\n        # At any point we should not cross this limit\n        # 2x the reply_size_limit, 200mb\n        assert df.rss < 200_000_000\n        cl = df.client()\n        pipe = cl.pipeline(transaction=False)\n        for i in range(64):\n            pipe.execute_command(f\"hgetall test:{i}\")\n\n        await pipe.execute()\n\n    tasks = []\n    for i in range(20):\n        tasks.append(asyncio.create_task(poll()))\n\n    for task in tasks:\n        await task\n\n    df.stop()\n    found = df.find_in_logs(\"Commands squashing current reply size is overlimit\")\n    assert len(found) > 0\n\n\n@pytest.mark.asyncio\nasync def test_remove_docs_on_eviction(df_factory):\n    max_memory = 256 * 1024**2  # 256MB\n    df_server = df_factory.create(\n        proactor_threads=1,\n        cache_mode=\"yes\",\n        maxmemory=max_memory,\n        vmodule=\"engine_shard=2\",\n        eviction_memory_budget_threshold=0.99,\n        enable_heartbeat_rss_eviction=\"no\",\n    )\n    df_server.start()\n    client = df_server.client()\n\n    await client.execute_command(\n        \"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"PREFIX\", \"1\", \"doc:\", \"SCHEMA\", \"v\", \"TEXT\"\n    )\n\n    i = 0\n    while True:\n        random_string = \"\".join(random.choices(string.ascii_letters + string.digits, k=1_000))\n        await client.execute_command(\"HSET\", f\"doc:{i}\", \"v\", random_string)\n        stats_info = await client.info(\"stats\")\n        # Done when see at least 50 evictions\n        if stats_info[\"evicted_keys\"] > 50:\n            break\n        i = i + 1\n\n    # Give some time to eviction stabilize\n    await asyncio.sleep(1)\n\n    # Get number of docs in index\n    index_info = await client.execute_command(f\"FT.INFO idx\")\n    index_info_num_docs = index_info[9]\n\n    # Get number of keys in database\n    keyspace_info = await client.info(\"keyspace\")\n    keyspace_keys = keyspace_info[\"db0\"][\"keys\"]\n\n    assert index_info_num_docs == keyspace_keys\n\n\n@pytest.mark.asyncio\nasync def test_memory_shrink_basic(df_factory: DflyInstanceFactory):\n    df_server = df_factory.create(proactor_threads=2)\n    df_server.start()\n    client = df_server.client()\n\n    # Create sparse set - add many elements then delete most\n    for i in range(10000):\n        await client.sadd(\"myset\", f\"elem_{i}\")\n\n    # Delete 99% to make it sparse (10000 -> 100)\n    for i in range(9900):\n        await client.srem(\"myset\", f\"elem_{i}\")\n\n    # Shrink the set and verify bytes saved\n    bytes_saved = await client.execute_command(\"SHRINK\", \"myset\")\n    assert bytes_saved > 0, f\"Expected bytes_saved > 0, got {bytes_saved}\"\n\n    # Shrinking again should return 0 (already optimal)\n    bytes_saved_again = await client.execute_command(\"SHRINK\", \"myset\")\n    assert bytes_saved_again == 0, f\"Expected 0, got {bytes_saved_again}\"\n\n    # Non-existent key returns null\n    result = await client.execute_command(\"SHRINK\", \"nonexistent\")\n    assert result is None\n\n\n@pytest.mark.asyncio\nasync def test_memory_shrink_with_scan(df_factory: DflyInstanceFactory):\n    df_server = df_factory.create(proactor_threads=1)\n    df_server.start()\n    client = df_server.client()\n\n    # Create set with many elements\n    for i in range(100):\n        await client.sadd(\"set:0\", *[f\"elem_{j}\" for j in range(i * 10, (i + 1) * 10)])\n\n    # Start SCAN\n    cursor, keys = await client.sscan(\"set:0\", 0, count=50)\n\n    # Shrink during scan\n    await client.execute_command(\"SHRINK\", \"set:0\")\n\n    # Continue and complete scan\n    all_keys = set(keys)\n    while cursor != 0:\n        cursor, keys = await client.sscan(\"set:0\", cursor, count=50)\n        all_keys.update(keys)\n\n    assert len(all_keys) == 1000\n\n\n@pytest.mark.asyncio\nasync def test_expiry_heartbeat_responsiveness(df_factory: DflyInstanceFactory):\n    df_server = df_factory.create(proactor_threads=1)\n    df_server.start()\n    client = df_server.client()\n\n    await client.execute_command(\"DEBUG\", \"POPULATE\", 50000, \"key\", 1, \"EXPIRE\", 3, 4)\n    await asyncio.sleep(2.5)\n    worst_ping = 0\n    deadline = time.monotonic() + 60\n    while await client.dbsize() > 0:\n        t0 = time.monotonic()\n        assert t0 < deadline, \"All keys did not expire in 60 seconds\"\n        await client.ping()\n        worst_ping = max(time.monotonic() - t0, worst_ping)\n        await asyncio.sleep(0.05)\n    assert (\n        worst_ping < 0.5\n    ), f\"Worst PING latency {worst_ping:.3f}s exceeded 500ms during mass expiry\"\n"
  },
  {
    "path": "tests/dragonfly/proxy.py",
    "content": "import asyncio\nimport random\n\n\nclass Proxy:\n    def __init__(self, host, port, remote_host, remote_port):\n        self.host = host\n        self.port = port\n        self.remote_host = remote_host\n        self.remote_port = remote_port\n        self.stop_connections = []\n        self.server = None\n\n    async def handle(self, reader, writer):\n        try:\n            remote_reader, remote_writer = await asyncio.open_connection(\n                self.remote_host, self.remote_port\n            )\n        except OSError:\n            writer.close()\n            await writer.wait_closed()\n            return\n\n        async def forward(reader, writer):\n            while True:\n                data = await reader.read(1024)\n                if not data:\n                    break\n                writer.write(data)\n                await writer.drain()\n            writer.close()\n\n        task1 = asyncio.ensure_future(forward(reader, remote_writer))\n        task2 = asyncio.ensure_future(forward(remote_reader, writer))\n\n        def cleanup():\n            task1.cancel()\n            task2.cancel()\n            writer.close()\n            remote_writer.close()\n\n        self.stop_connections.append(cleanup)\n\n        try:\n            await asyncio.gather(task1, task2)\n        except (asyncio.CancelledError, ConnectionResetError):\n            pass\n        finally:\n            cleanup()\n            if cleanup in self.stop_connections:\n                self.stop_connections.remove(cleanup)\n\n    async def start(self):\n        self.server = await asyncio.start_server(self.handle, self.host, self.port)\n\n        if self.port == 0:\n            _, port = self.server.sockets[0].getsockname()[:2]\n            self.port = port\n\n    async def serve(self):\n        async with self.server:\n            await self.server.serve_forever()\n\n    def drop_connection(self):\n        \"\"\"\n        Randomly drop one connection\n        \"\"\"\n        if self.stop_connections:\n            cb = random.choice(self.stop_connections)\n            self.stop_connections.remove(cb)\n            cb()\n\n    async def close(self, task=None):\n        if self.server is not None:\n            self.server.close()\n            self.server = None\n\n        for cb in self.stop_connections:\n            cb()\n        self.stop_connections = []\n\n        if not task == None:\n            try:\n                await task\n            except asyncio.exceptions.CancelledError:\n                pass\n"
  },
  {
    "path": "tests/dragonfly/pymemcached_test.py",
    "content": "import logging\nimport random\nimport socket\nimport ssl\nimport time\n\nfrom pymemcache.client.base import Client as MCClient\n\nfrom . import dfly_args\nfrom .instance import DflyInstance\n\nDEFAULT_ARGS = {\"memcached_port\": 11212, \"proactor_threads\": 4}\n\n\ndef read_response(client, expected_len):\n    response = b\"\"\n    while len(response) < expected_len:\n        data = client.recv(1024)\n        if not data:\n            break\n        response += data\n    return response\n\n\n# Generic basic tests\n@dfly_args(DEFAULT_ARGS)\nclass TestMemcached:\n    def test_basic(self, memcached_client: MCClient):\n        assert not memcached_client.default_noreply\n\n        # set -> replace -> add -> get\n        assert memcached_client.set(\"key1\", \"value1\")\n        assert memcached_client.replace(\"key1\", \"value2\")\n        assert not memcached_client.add(\"key1\", \"value3\")\n        assert memcached_client.get(\"key1\") == b\"value2\"\n\n        # add -> get\n        assert memcached_client.add(\"key2\", \"value1\")\n        assert memcached_client.get(\"key2\") == b\"value1\"\n\n        # delete\n        assert memcached_client.delete(\"key1\")\n        assert not memcached_client.delete(\"key3\")\n        assert memcached_client.get(\"key1\") is None\n\n        # prepend append\n        assert memcached_client.set(\"key4\", \"B\")\n        assert memcached_client.prepend(\"key4\", \"A\")\n        assert memcached_client.append(\"key4\", \"C\")\n        assert memcached_client.get(\"key4\") == b\"ABC\"\n\n        # incr\n        memcached_client.set(\"key5\", 0)\n        assert memcached_client.incr(\"key5\", 1) == 1\n        assert memcached_client.incr(\"key5\", 1) == 2\n        assert memcached_client.decr(\"key5\", 1) == 1\n\n        assert memcached_client.gets(\"key5\") == (b\"1\", b\"0\")\n\n    # Noreply (and pipeline) tests\n    async def test_noreply_pipeline(self, df_server: DflyInstance, memcached_client: MCClient):\n        \"\"\"\n        With the noreply option the python client doesn't wait for replies,\n        so all the commands are pipelined. Assert pipelines work correctly and the\n        succeeding regular command receives a reply (it should join the pipeline as last).\n        \"\"\"\n\n        client = df_server.client()\n        for attempts in range(2):\n            keys = [f\"k{i}\" for i in range(1000)]\n            values = [f\"d{i}\" for i in range(len(keys))]\n\n            for k, v in zip(keys, values):\n                memcached_client.set(k, v, noreply=True)\n\n            # quick follow up before the pipeline finishes\n            assert memcached_client.get(\"k10\") == b\"d10\"\n            # check all commands were executed\n            assert memcached_client.get_many(keys) == {k: v.encode() for k, v in zip(keys, values)}\n\n            info = await client.info()\n            if info[\"total_pipelined_commands\"] > 100:\n                return\n            logging.warning(\n                f\"Have not identified pipelining at attempt {attempts} Info: \\n\" + str(info)\n            )\n            await client.flushall()\n\n        assert False, \"Pipelining not detected\"\n\n    def test_noreply_alternating(self, memcached_client: MCClient):\n        \"\"\"\n        Assert alternating noreply works correctly, will cause many dispatch queue emptyings.\n        \"\"\"\n        for i in range(200):\n            if i % 2 == 0:\n                memcached_client.set(f\"k{i}\", \"D1\", noreply=True)\n                memcached_client.set(f\"k{i}\", \"D2\", noreply=True)\n                memcached_client.set(f\"k{i}\", \"D3\", noreply=True)\n            assert memcached_client.add(f\"k{i}\", \"DX\", noreply=False) == (i % 2 != 0)\n\n    def test_length_in_set_command(self, df_server: DflyInstance, memcached_client: MCClient):\n        \"\"\"\n        Test parser correctly reads value based on length and complains about bad chunks\n        \"\"\"\n        cases = [b\"NOTFOUR\", b\"FOUR\", b\"F4\\r\\n\", b\"\\r\\n\\r\\n\"]\n\n        for case in cases:\n            client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n            client.connect((\"127.0.0.1\", int(df_server[\"memcached_port\"])))\n\n            logging.info(f\"Case {case}\")\n            client.sendall(b\"set foo 0 0 4\\r\\n\" + case + b\"\\r\\n\")\n            response = client.recv(256).decode()\n            if len(case) == 4:\n                assert response == \"STORED\\r\\n\"\n            else:\n                # response should follow up with ERROR due to OUR\\r\\n being\n                # parsed as unknown command but we can not guarantee that\n                # it will be read in the same recv call, so just check the prefix.\n                assert response.startswith(\"CLIENT_ERROR bad data chunk\\r\\n\")\n\n            client.close()\n\n    def test_pipeline_get_then_stats_version(self, df_server: DflyInstance):\n        \"\"\"\n        Verify GET pipelined before STATS or VERSION doesn't crash the server.\n        \"\"\"\n        port = int(df_server[\"memcached_port\"])\n\n        client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n        client.settimeout(5)\n        client.connect((\"127.0.0.1\", port))\n        client.sendall(b\"get nokey\\r\\nversion\\r\\n\")\n        response = read_response(client, len(b\"END\\r\\nVERSION 1.6.0 DF\\r\\n\"))\n        client.close()\n        assert response == b\"END\\r\\nVERSION 1.6.0 DF\\r\\n\"\n\n        client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n        client.settimeout(5)\n        client.connect((\"127.0.0.1\", port))\n        client.sendall(b\"get nokey\\r\\nstats\\r\\n\")\n        # Read until both GET's END and STATS' END are received before closing.\n        response = b\"\"\n        while response.count(b\"END\\r\\n\") < 2:\n            response += client.recv(4096)\n        client.close()\n        assert response.startswith(b\"END\\r\\nSTAT \")\n\n    def test_error_in_pipeline(self, df_server: DflyInstance, memcached_client: MCClient):\n        \"\"\"\n        Verify correct responses to  \"get x\\r\\ngetaa\\r\\nget y z\\r\\n\"\n        \"\"\"\n        client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n        client.settimeout(5)\n        client.connect((\"127.0.0.1\", int(df_server[\"memcached_port\"])))\n\n        client.sendall(b\"get x\\r\\ngetaa\\r\\nget y z\\r\\n\")\n\n        expected = b\"END\\r\\nERROR\\r\\nEND\\r\\n\"\n        response = read_response(client, len(expected))\n        client.close()\n\n        assert response == expected\n\n    def test_large_request(self, memcached_client):\n        assert memcached_client.set(b\"key1\", b\"d\" * 4096, noreply=False)\n        assert memcached_client.set(b\"key2\", b\"d\" * 4096 * 2, noreply=False)\n\n    def test_version(self, memcached_client: MCClient):\n        \"\"\"\n        php-memcached client expects version to be in the format of \"n.n.n\", so we return 1.5.0 emulating an old memcached server.\n        Our real version is being returned in the stats command.\n        Also verified manually that php client parses correctly the version string that ends with \"DF\".\n        \"\"\"\n        assert b\"1.6.0 DF\" == memcached_client.version()\n        stats = memcached_client.stats()\n        version = stats[b\"version\"].decode(\"utf-8\")\n        assert version.startswith(\"v\") or version == \"dev\"\n\n    def test_flags(self, memcached_client: MCClient):\n        for i in range(1, 20):\n            flags = random.randrange(50, 1000)\n            memcached_client.set(\"a\", \"real-value\", flags=flags, noreply=True)\n\n            res = memcached_client.raw_command(\"get a\", \"END\\r\\n\").split()\n            # workaround sometimes memcached_client.raw_command returns empty str\n            if len(res) > 0:\n                assert res[2].decode() == str(flags)\n\n    def test_expiration(self, memcached_client: MCClient):\n        assert not memcached_client.default_noreply\n\n        assert memcached_client.set(\"key1\", \"value1\", 2)\n        assert memcached_client.set(\"key2\", \"value2\", int(time.time()) + 2)\n        assert memcached_client.set(\"key3\", \"value3\", int(time.time()) + 200)\n        assert memcached_client.get(\"key1\") == b\"value1\"\n        assert memcached_client.get(\"key2\") == b\"value2\"\n        assert memcached_client.get(\"key3\") == b\"value3\"\n        assert memcached_client.set(\"key3\", \"value3\", int(time.time()) - 200)\n        assert memcached_client.get(\"key3\") is None\n        time.sleep(2)\n        assert memcached_client.get(\"key1\") is None\n        assert memcached_client.get(\"key2\") is None\n        assert memcached_client.get(\"key3\") is None\n\n    def test_pipeline_cas_crash(self, df_server: DflyInstance, memcached_client: MCClient):\n        \"\"\"\n        Tests that an unsupported/invalid command (CAS) sent in a pipeline\n        after an async command (GETS) does not crash the server\n        and correctly buffers the error reply in order.\n        \"\"\"\n        client_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n        client_sock.settimeout(5)\n        client_sock.connect((\"127.0.0.1\", int(df_server[\"memcached_port\"])))\n\n        # Command sequence:\n        # 1. SET (sync)\n        # 2. GETS (async - forces the next command to not be the head)\n        # 3. CAS (hits the default block, triggering the early error)\n        payload = (\n            b\"set mykey 0 0 5\\r\\nvalue\\r\\n\" b\"gets mykey\\r\\n\" b\"cas mykey 0 0 5 12345\\r\\nvalue\\r\\n\"\n        )\n        client_sock.sendall(payload)\n\n        response = b\"\"\n        while b\"CLIENT_ERROR bad command line format\\r\\n\" not in response:\n            data = client_sock.recv(4096)\n            if not data:\n                break\n            response += data\n        client_sock.close()\n\n        # Ensure strict ordering: STORED -> GETS (VALUE + END) -> CLIENT_ERROR\n        idx_stored = response.find(b\"STORED\\r\\n\")\n        idx_value = response.find(b\"VALUE mykey\")\n        idx_error = response.find(b\"CLIENT_ERROR bad command line format\")\n        # Look for the GETS terminator specifically AFTER the value\n        idx_end = response.find(b\"END\\r\\n\", idx_value)\n\n        assert idx_stored != -1 and idx_value != -1 and idx_error != -1 and idx_end != -1\n        assert (\n            idx_stored < idx_value < idx_end < idx_error\n        ), f\"Responses out of order/interleaved: {response}\"\n\n        # Final sanity check to ensure the connection/server is still healthy\n        assert memcached_client.set(\"sanity_check\", \"alive\")\n        assert memcached_client.get(\"sanity_check\") == b\"alive\"\n\n\n@dfly_args(DEFAULT_ARGS)\ndef test_memcached_tls_no_requirepass(df_factory, with_tls_server_args, with_tls_ca_cert_args):\n    \"\"\"\n    Test for issue #5084: ability to use TLS for Memcached without requirepass.\n\n    Dragonfly required a password to be set when using TLS, but the Memcached protocol\n    does not support password authentication. This test verifies that we can start\n    the server with TLS enabled but without specifying requirepass and with the Memcached port.\n    \"\"\"\n    # Create arguments for TLS without specifying requirepass\n    server_args = {**DEFAULT_ARGS, **with_tls_server_args, \"requirepass\": \"test_password\"}\n\n    # Create and start the server - it should not crash\n    server = df_factory.create(**server_args)\n    server.start()\n\n    # Give the server time to start\n    time.sleep(1)\n\n    # Create SSL context for client\n    ssl_context = ssl.create_default_context()\n    ssl_context.load_verify_locations(with_tls_ca_cert_args[\"ca_cert\"])\n    ssl_context.check_hostname = False\n\n    # Disable certificate verification (since we don't provide a client certificate)\n    ssl_context.verify_mode = ssl.CERT_NONE\n\n    # Output port information for diagnostics\n    logging.info(f\"Connecting to memcached port: {server.mc_port} on host: 127.0.0.1\")\n\n    # Connect to Memcached over TLS\n    client = MCClient((\"127.0.0.1\", server.mc_port), tls_context=ssl_context)\n\n    # Test basic operations\n    assert client.set(\"foo\", \"bar\")\n    assert client.get(\"foo\") == b\"bar\"\n"
  },
  {
    "path": "tests/dragonfly/redis_replication_test.py",
    "content": "import time\nimport pytest\nimport asyncio\nfrom redis import asyncio as aioredis\nimport subprocess\nfrom .utility import *\nfrom .instance import DflyInstanceFactory\nfrom .proxy import Proxy\n\n\n# Checks that master redis and dragonfly replica are synced by writing a random key to master\n# and waiting for it to exist in replica. Foreach db in 0..dbcount-1.\nasync def await_synced(c_master: aioredis.Redis, c_replica: aioredis.Redis, dbcount=1):\n    rnd_str = \"\".join(random.choices(string.ascii_letters, k=10))\n    key = \"sync_key/\" + rnd_str\n    for db in range(dbcount):\n        await c_master.set(key, \"dummy\")\n        logging.debug(f\"set {key} MASTER db = {db}\")\n        timeout = 30\n        while timeout > 0:\n            v = await c_replica.get(key)\n            logging.debug(f\"get {key} from REPLICA db = {db} got {v}\")\n            if v is not None:\n                break\n            repl_state = await c_master.info(\"replication\")\n            logging.debug(f\"replication info: {repl_state}\")\n            await asyncio.sleep(1)\n\n            timeout -= 1\n        await c_master.close()\n        await c_replica.close()\n        assert timeout > 0, \"Timeout while waiting for replica to sync\"\n\n\nasync def await_synced_all(c_master, c_replicas):\n    for c_replica in c_replicas:\n        await await_synced(c_master, c_replica)\n\n\nasync def check_data(seeder, replicas, c_replicas):\n    capture = await seeder.capture()\n    for replica, c_replica in zip(replicas, c_replicas):\n        await wait_available_async(c_replica)\n        assert await seeder.compare(capture, port=replica.port)\n\n\n# Start replication\nasync def run_replication(client: aioredis.Redis, port):\n    res = await client.execute_command(\"REPLICAOF localhost \" + str(port))\n    assert res == \"OK\"\n    await wait_available_async(client)\n\n\nasync def replicate_all(replicas, port):\n    await asyncio.gather(*(asyncio.create_task(run_replication(c, port)) for c in replicas))\n\n\nfull_sync_replication_specs = [\n    ([1], dict(keys=100, dbcount=1, unsupported_types=[ValueType.JSON])),\n    ([1], dict(keys=5000, dbcount=2, unsupported_types=[ValueType.JSON])),\n    ([2], dict(keys=5000, dbcount=4, unsupported_types=[ValueType.JSON])),\n]\n\n\n@pytest.mark.parametrize(\"t_replicas, seeder_config\", full_sync_replication_specs)\nasync def test_replication_full_sync(\n    df_factory, df_seeder_factory, redis_server, t_replicas, seeder_config, port_picker\n):\n    master = redis_server\n    c_master = aioredis.Redis(port=master.port)\n    assert await c_master.ping()\n\n    seeder = df_seeder_factory.create(port=master.port, **seeder_config)\n    await seeder.run(target_deviation=0.1)\n\n    replica = df_factory.create(\n        port=port_picker.get_available_port(), proactor_threads=t_replicas[0]\n    )\n    replica.start()\n    c_replica = replica.client()\n    assert await c_replica.ping()\n\n    await run_replication(c_replica, master.port)\n    await await_synced(c_master, c_replica, seeder_config[\"dbcount\"])\n\n    capture = await seeder.capture()\n    assert await seeder.compare(capture, port=replica.port)\n\n\nstable_sync_replication_specs = [\n    ([1], dict(keys=100, dbcount=1, unsupported_types=[ValueType.JSON])),\n    ([1], dict(keys=10_000, dbcount=2, unsupported_types=[ValueType.JSON])),\n    ([2], dict(keys=10_000, dbcount=1, unsupported_types=[ValueType.JSON])),\n    ([2], dict(keys=10_000, dbcount=2, unsupported_types=[ValueType.JSON])),\n    ([8], dict(keys=10_000, dbcount=4, unsupported_types=[ValueType.JSON])),\n]\n\n\n@pytest.mark.parametrize(\"t_replicas, seeder_config\", stable_sync_replication_specs)\nasync def test_replication_stable_sync(\n    df_factory, df_seeder_factory, redis_server, t_replicas, seeder_config, port_picker\n):\n    master = redis_server\n    c_master = aioredis.Redis(port=master.port)\n    assert await c_master.ping()\n\n    replica = df_factory.create(\n        port=port_picker.get_available_port(), proactor_threads=t_replicas[0]\n    )\n    replica.start()\n    c_replica = replica.client()\n    assert await c_replica.ping()\n\n    await c_replica.execute_command(\"REPLICAOF\", \"localhost\", master.port)\n    await wait_available_async(c_replica)\n\n    seeder = df_seeder_factory.create(port=master.port, **seeder_config)\n    await seeder.run(target_ops=1000)\n\n    await await_synced(c_master, c_replica, seeder_config[\"dbcount\"])\n\n    capture = await seeder.capture()\n    assert await seeder.compare(capture, port=replica.port)\n\n\n# Threads for each dragonfly replica, Seeder Config.\nreplication_specs = [\n    ([1], dict(keys=1000, dbcount=1, unsupported_types=[ValueType.JSON])),\n    ([6, 6, 6], dict(keys=4_000, dbcount=2, unsupported_types=[ValueType.JSON])),\n    ([2, 2], dict(keys=4_000, dbcount=2, unsupported_types=[ValueType.JSON])),\n    ([8, 8], dict(keys=4_000, dbcount=2, unsupported_types=[ValueType.JSON])),\n    ([1] * 8, dict(keys=500, dbcount=1, unsupported_types=[ValueType.JSON])),\n    ([1], dict(keys=100, dbcount=4, unsupported_types=[ValueType.JSON])),\n]\n\n\n@pytest.mark.parametrize(\"t_replicas, seeder_config\", replication_specs)\nasync def test_redis_replication_all(\n    df_factory: DflyInstanceFactory,\n    df_seeder_factory,\n    redis_server,\n    t_replicas,\n    seeder_config,\n    port_picker,\n):\n    master = redis_server\n    c_master = aioredis.Redis(port=master.port)\n    assert await c_master.ping()\n\n    replicas = [\n        df_factory.create(port=port_picker.get_available_port(), proactor_threads=t)\n        for i, t in enumerate(t_replicas)\n    ]\n\n    # Fill master with test data\n    seeder = df_seeder_factory.create(port=master.port, **seeder_config)\n    await seeder.run(target_deviation=0.1)\n\n    # Start replicas\n    df_factory.start_all(replicas)\n\n    c_replicas = [replica.client() for replica in replicas]\n\n    # Start data stream\n    stream_task = asyncio.create_task(seeder.run())\n    await asyncio.sleep(0.0)\n\n    await replicate_all(c_replicas, master.port)\n\n    # Wait for streaming to finish\n    assert (\n        not stream_task.done()\n    ), \"Weak testcase. Increase number of streamed iterations to surpass full sync\"\n    seeder.stop()\n    await stream_task\n\n    # Check data after full sync\n    await await_synced_all(c_master, c_replicas)\n    await check_data(seeder, replicas, c_replicas)\n\n    # Stream more data in stable state\n    await seeder.run(target_ops=2000)\n\n    # Check data after stable state stream\n    await await_synced_all(c_master, c_replicas)\n    await check_data(seeder, replicas, c_replicas)\n\n\nmaster_disconnect_cases = [\n    ([6], 1, dict(keys=4_000, dbcount=1, unsupported_types=[ValueType.JSON])),\n    ([1, 4, 6], 3, dict(keys=1_000, dbcount=2, unsupported_types=[ValueType.JSON])),\n]\n\n\n@pytest.mark.parametrize(\"t_replicas, t_disconnect, seeder_config\", master_disconnect_cases)\nasync def test_redis_master_restart(\n    df_factory,\n    df_seeder_factory,\n    redis_server,\n    t_replicas,\n    t_disconnect,\n    seeder_config,\n    port_picker,\n):\n    master = redis_server\n    c_master = aioredis.Redis(port=master.port)\n    assert await c_master.ping()\n\n    replicas = [\n        df_factory.create(port=port_picker.get_available_port(), proactor_threads=t)\n        for i, t in enumerate(t_replicas)\n    ]\n\n    # Fill master with test data\n    seeder = df_seeder_factory.create(port=master.port, **seeder_config)\n    await seeder.run(target_deviation=0.1)\n\n    # Start replicas\n    df_factory.start_all(replicas)\n\n    c_replicas = [replica.client() for replica in replicas]\n\n    # Start data stream\n    stream_task = asyncio.create_task(seeder.run())\n    await asyncio.sleep(0.0)\n\n    await replicate_all(c_replicas, master.port)\n\n    # Wait for streaming to finish\n    assert (\n        not stream_task.done()\n    ), \"Weak testcase. Increase number of streamed iterations to surpass full sync\"\n    seeder.stop()\n    await stream_task\n\n    for _ in range(t_disconnect):\n        master.stop()\n        await asyncio.sleep(1)\n        master.start()\n        await asyncio.sleep(1)\n        # fill master with data\n        await seeder.run(target_deviation=0.1)\n\n    # Check data after stable state stream\n    await wait_available_async(c_replicas)\n    await await_synced_all(c_master, c_replicas)\n    await check_data(seeder, replicas, c_replicas)\n\n\nmaster_disconnect_cases = [\n    ([6], dict(keys=4_000, dbcount=1, unsupported_types=[ValueType.JSON])),\n    pytest.param(\n        [1, 4, 6],\n        dict(keys=1_000, dbcount=2, unsupported_types=[ValueType.JSON]),\n        marks=pytest.mark.large,\n    ),\n]\n\n\n@pytest.mark.parametrize(\"t_replicas, seeder_config\", master_disconnect_cases)\nasync def test_disconnect_master(\n    df_factory,\n    df_seeder_factory,\n    redis_server,\n    t_replicas,\n    seeder_config,\n    port_picker,\n):\n    master = redis_server\n    c_master = aioredis.Redis(port=master.port)\n    assert await c_master.ping()\n\n    proxy = Proxy(\"127.0.0.1\", 1114, \"127.0.0.1\", master.port)\n    await proxy.start()\n    proxy_task = asyncio.create_task(proxy.serve())\n\n    replicas = [\n        df_factory.create(port=port_picker.get_available_port(), proactor_threads=t)\n        for i, t in enumerate(t_replicas)\n    ]\n\n    # Fill master with test data\n    seeder = df_seeder_factory.create(port=master.port, **seeder_config)\n    await seeder.run(target_deviation=0.1)\n\n    # Start replicas\n    df_factory.start_all(replicas)\n\n    c_replicas = [replica.client() for replica in replicas]\n\n    # Start data stream\n    stream_task = asyncio.create_task(seeder.run())\n    await asyncio.sleep(0.5)\n\n    await replicate_all(c_replicas, proxy.port)\n\n    # Break the connection between master and replica\n    await proxy.close(proxy_task)\n    await asyncio.sleep(2)\n    await proxy.start()\n    proxy_task = asyncio.create_task(proxy.serve())\n\n    # finish streaming data\n    await asyncio.sleep(1)\n    seeder.stop()\n    await stream_task\n\n    # Check data after stable state stream\n    await wait_available_async(c_replicas)\n    await await_synced_all(c_master, c_replicas)\n    await check_data(seeder, replicas, c_replicas)\n\n    await proxy.close(proxy_task)\n"
  },
  {
    "path": "tests/dragonfly/replication_test.py",
    "content": "import os\nimport platform\nimport shutil\nimport signal\nimport struct\nimport tarfile\nimport time\nimport urllib.request\nfrom itertools import chain, repeat\n\nimport async_timeout\nimport pymemcache\n\nfrom . import dfly_args\nfrom .instance import DflyInstanceFactory, DflyInstance\nfrom .proxy import Proxy\nfrom .seeder import DebugPopulateSeeder, HnswSearchSeeder\nfrom .seeder import Seeder as SeederV2\nfrom .utility import *\n\nADMIN_PORT = 1211\n\nDISCONNECT_CRASH_FULL_SYNC = 0\nDISCONNECT_CRASH_STABLE_SYNC = 1\nDISCONNECT_NORMAL_STABLE_SYNC = 2\n\nM_OPT = [pytest.mark.opt_only]\nM_SLOW = [pytest.mark.large]\nM_STRESS = [pytest.mark.large, pytest.mark.opt_only]\nM_NOT_EPOLL = [pytest.mark.exclude_epoll]\n\n\n\"\"\"\nTest full replication pipeline. Test full sync with streaming changes and stable state streaming.\n\"\"\"\n\n\n@pytest.mark.parametrize(\n    \"t_master, t_replicas, seeder_config, stream_target\",\n    [\n        # Quick general test that replication is working\n        (1, 3 * [1], dict(key_target=1_000), 500),\n        # A lot of huge values\n        (2, 2 * [1], dict(key_target=5_000, huge_value_target=30), 500),\n        (4, [4, 4], dict(key_target=10_000), 1_000),\n        pytest.param(6, [6, 6, 6], dict(key_target=100_000), 20_000, marks=M_OPT),\n        # Skewed tests with different thread ratio\n        pytest.param(8, 6 * [1], dict(key_target=5_000), 2_000, marks=M_SLOW),\n        pytest.param(2, [8, 8], dict(key_target=10_000), 2_000, marks=M_SLOW),\n        # Everything is big because data size is 10k\n        pytest.param(\n            2, [2], dict(key_target=1_000, data_size=10_000, huge_value_target=0), 100, marks=M_SLOW\n        ),\n        # Stress test\n        pytest.param(8, [8, 8], dict(key_target=1_000_000, units=16), 50_000, marks=M_STRESS),\n    ],\n)\n@pytest.mark.parametrize(\"mode\", [({}), ({\"cache_mode\": \"true\"})])\n@pytest.mark.parametrize(\"background_snapshotting\", [False, True])\n# Disabled cache_mode until #5371 is fixed\n# @pytest.mark.parametrize(\"point_in_time_replication\", [True, False])\nasync def test_replication_all(\n    df_factory: DflyInstanceFactory,\n    t_master,\n    t_replicas,\n    seeder_config,\n    stream_target,\n    mode,\n    background_snapshotting,\n    # point_in_time_replication,\n):\n    args = {}\n    if mode:\n        args[\"cache_mode\"] = \"true\"\n        args[\"maxmemory\"] = str(t_master * 256) + \"mb\"\n\n    if background_snapshotting:\n        args[\"background_heartbeat\"] = None\n        args[\"background_snapshotting\"] = None\n\n    master = df_factory.create(\n        admin_port=ADMIN_PORT,\n        proactor_threads=t_master,\n        # point_in_time_snapshot=point_in_time_replication,\n        **args,\n    )\n    replicas = [\n        df_factory.create(admin_port=ADMIN_PORT + i + 1, proactor_threads=t)\n        for i, t in enumerate(t_replicas)\n    ]\n\n    from_admin_port = random.choice([True, False])\n\n    # Start instances and connect clients\n    df_factory.start_all([master] + replicas)\n    c_master = master.client()\n    c_replicas = [replica.client() for replica in replicas]\n\n    # Fill master with test data\n    seeder = SeederV2(**seeder_config, huge_value_add_only=True)\n    await seeder.run(c_master, target_deviation=0.01)\n\n    # Start data stream\n    stream_task = asyncio.create_task(seeder.run(c_master))\n    await asyncio.sleep(0.0)\n\n    # Start replication\n    master_port = master.port if not from_admin_port else master.admin_port\n    await asyncio.gather(\n        *(\n            asyncio.create_task(c.execute_command(\"REPLICAOF localhost \" + str(master_port)))\n            for c in c_replicas\n        )\n    )\n\n    # Wait for all replicas to transition into stable sync\n    async with async_timeout.timeout(240):\n        await wait_for_replicas_state(*c_replicas)\n\n    # Stop streaming data once every replica is in stable sync\n    await seeder.stop(c_master)\n    await stream_task\n\n    # Check data after full sync\n    async def check():\n        await check_all_replicas_finished(c_replicas, c_master)\n        hashes = await asyncio.gather(*(SeederV2.capture(c) for c in [c_master] + c_replicas))\n        assert len(set(hashes)) == 1\n\n    await check()\n    # Stream more data in stable state\n    await seeder.run(c_master, target_ops=stream_target)\n\n    # Check data after stable state stream\n    await check()\n\n    info = await c_master.info()\n    preemptions = info[\"big_value_preemptions\"]\n    key_capacity = info[\"prime_capacity\"]\n    compressed_blobs = info[\"compressed_blobs\"]\n    logging.debug(\n        f\"Compressed blobs {compressed_blobs} .Capacity {key_capacity}. Preemptions {preemptions}\"\n    )\n\n    assert preemptions >= seeder.huge_value_target * 0.5\n    assert compressed_blobs > 0\n    # Because data size could be 10k and for that case there will be almost a preemption\n    # per bucket.\n    if seeder.data_size < 1000:\n        # We care that we preempt less times than the total buckets such that we can be\n        # sure that we test both flows (with and without preemptions). Preemptions on 3%\n        # of buckets seems like a big number but that depends on a few parameters like\n        # the size of the hug value and the serialization max chunk size. For the test cases here,\n        # it's usually close to 1% but there are some that are close to 3.\n        assert preemptions <= (key_capacity * 0.03)\n\n\n\"\"\"\nTest disconnecting replicas during different phases while constantly streaming changes to master.\n\nThis test is targeted at the master cancellation mechanism that should qickly stop operations for a\ndisconnected replica.\n\nThree types are tested:\n1. Replicas crashing during full sync state\n2. Replicas crashing during stable sync state\n3. Replicas disconnecting normally with REPLICAOF NO ONE during stable state\n\"\"\"\n\n# 1. Number of master threads\n# 2. Number of threads for each replica that crashes during full sync\n# 3. Number of threads for each replica that crashes during stable sync\n# 4. Number of threads for each replica that disconnects normally\n# 5. Number of distinct keys that are constantly streamed\ndisconnect_cases = [\n    # balanced\n    (8, [4, 4], [4, 4], [4], 4_000),\n    (4, [2] * 4, [2] * 4, [2, 2], 2_000),\n    # full sync heavy\n    (8, [4] * 4, [], [], 4_000),\n    # stable state heavy\n    (8, [], [4] * 4, [], 4_000),\n    # disconnect only\n    (8, [], [], [4] * 4, 4_000),\n]\n\n\n@pytest.mark.parametrize(\"t_master, t_crash_fs, t_crash_ss, t_disonnect, n_keys\", disconnect_cases)\nasync def test_disconnect_replica(\n    df_factory: DflyInstanceFactory,\n    df_seeder_factory,\n    t_master,\n    t_crash_fs,\n    t_crash_ss,\n    t_disonnect,\n    n_keys,\n):\n    master = df_factory.create(\n        proactor_threads=t_master, vmodule=\"replica=2,dflycmd=2,server_family=2\"\n    )\n    replicas = [\n        (\n            df_factory.create(proactor_threads=t, vmodule=\"replica=2,dflycmd=2,server_family=2\"),\n            crash_fs,\n        )\n        for i, (t, crash_fs) in enumerate(\n            chain(\n                zip(t_crash_fs, repeat(DISCONNECT_CRASH_FULL_SYNC)),\n                zip(t_crash_ss, repeat(DISCONNECT_CRASH_STABLE_SYNC)),\n                zip(t_disonnect, repeat(DISCONNECT_NORMAL_STABLE_SYNC)),\n            )\n        )\n    ]\n\n    logging.debug(\"Start master\")\n    master.start()\n    c_master = master.client(single_connection_client=True)\n\n    logging.debug(\"Start replicas and create clients\")\n    df_factory.start_all([replica for replica, _ in replicas])\n\n    c_replicas = [(replica, replica.client(), crash_type) for replica, crash_type in replicas]\n\n    def replicas_of_type(tfunc):\n        return [args for args in c_replicas if tfunc(args[2])]\n\n    logging.debug(\"Start data fill loop\")\n    seeder = df_seeder_factory.create(port=master.port, keys=n_keys, dbcount=2)\n    fill_task = asyncio.create_task(seeder.run())\n\n    logging.debug(\"Run full sync\")\n\n    async def full_sync(replica: DflyInstance, c_replica, crash_type):\n        await c_replica.execute_command(\"REPLICAOF localhost \" + str(master.port))\n        if crash_type == 0:\n            await asyncio.sleep(random.random() / 100 + 0.01)\n            await c_replica.aclose()\n            replica.stop(kill=True)\n        else:\n            await wait_available_async(c_replica)\n\n    await asyncio.gather(*(full_sync(*args) for args in c_replicas))\n\n    # Wait for master to stream a bit more\n    await asyncio.sleep(0.1)\n\n    # Check master survived full sync crashes\n    assert await c_master.ping()\n\n    # Check phase-2 replicas survived\n    for _, c_replica, _ in replicas_of_type(lambda t: t > 0):\n        assert await c_replica.ping()\n\n    logging.debug(\"Run stable state crashes\")\n\n    async def stable_sync(replica, c_replica, crash_type):\n        await asyncio.sleep(random.random() / 100)\n        await c_replica.aclose()\n        replica.stop(kill=True)\n\n    await asyncio.gather(*(stable_sync(*args) for args in replicas_of_type(lambda t: t == 1)))\n\n    # Check master survived all crashes\n    assert await c_master.ping()\n\n    # Check phase 3 replica survived\n    for _, c_replica, _ in replicas_of_type(lambda t: t > 1):\n        assert await c_replica.ping()\n\n    logging.debug(\"Check master survived all crashes\")\n    assert await c_master.ping()\n\n    # Check disconnects\n    async def disconnect(replica, c_replica, crash_type):\n        await asyncio.sleep(random.random() / 100)\n        await c_replica.execute_command(\"REPLICAOF NO ONE\")\n\n    logging.debug(\"disconnect replicas\")\n    await asyncio.gather(*(disconnect(*args) for args in replicas_of_type(lambda t: t == 2)))\n\n    await asyncio.sleep(0.5)\n\n    logging.debug(\"Check phase 3 replica survived\")\n    for replica, c_replica, _ in replicas_of_type(lambda t: t == 2):\n        assert await c_replica.ping()\n        await c_replica.aclose()\n\n    logging.debug(\"Stop streaming\")\n    seeder.stop()\n    await fill_task\n\n    logging.debug(\"Check master survived all disconnects\")\n    assert await c_master.ping()\n\n\n\"\"\"\nTest stopping master during different phases.\n\nThis test is targeted at the replica cancellation mechanism that should quickly abort a failed operation\nand revert to connection retry state.\n\nThree types are tested:\n1. Master crashing during full sync state\n2. Master crashing in a random state.\n3. Master crashing during stable sync state\n\n\"\"\"\n\n# 1. Number of master threads\n# 2. Number of threads for each replica\n# 3. Number of times a random crash happens\n# 4. Number of keys transferred (the more, the higher the propability to not miss full sync)\nmaster_crash_cases = [\n    (6, [6], 3, 2_000),\n    (4, [4, 4, 4], 3, 2_000),\n]\n\n\n@pytest.mark.large\n@pytest.mark.parametrize(\"t_master, t_replicas, n_random_crashes, n_keys\", master_crash_cases)\nasync def test_disconnect_master(\n    df_factory, df_seeder_factory, t_master, t_replicas, n_random_crashes, n_keys\n):\n    master = df_factory.create(port=1111, proactor_threads=t_master)\n    replicas = [df_factory.create(proactor_threads=t) for i, t in enumerate(t_replicas)]\n\n    df_factory.start_all(replicas)\n    c_replicas = [replica.client() for replica in replicas]\n\n    seeder = df_seeder_factory.create(port=master.port, keys=n_keys, dbcount=2)\n\n    async def crash_master_fs():\n        await asyncio.sleep(random.random() / 10)\n        master.stop(kill=True)\n\n    async def start_master():\n        await asyncio.sleep(0.2)\n        master.start()\n        async with master.client() as c_master:\n            assert await c_master.ping()\n            seeder.reset()\n            await seeder.run(target_deviation=0.1)\n\n    await start_master()\n\n    # Crash master during full sync, but with all passing initial connection phase\n    await asyncio.gather(\n        *(\n            c_replica.execute_command(\"REPLICAOF localhost \" + str(master.port))\n            for c_replica in c_replicas\n        )\n    )\n    await crash_master_fs()\n\n    await asyncio.sleep(1 + len(replicas) * 0.5)\n\n    for _ in range(n_random_crashes):\n        await start_master()\n        await asyncio.sleep(random.random() + len(replicas) * random.random() / 10)\n        # Crash master in some random state for each replica\n        master.stop(kill=True)\n\n    await start_master()\n    await asyncio.sleep(1 + len(replicas) * 0.5)  # Replicas check every 500ms.\n    capture = await seeder.capture()\n    for replica, c_replica in zip(replicas, c_replicas):\n        await wait_available_async(c_replica)\n        assert await seeder.compare(capture, port=replica.port)\n\n    # Crash master during stable state\n    master.stop(kill=True)\n\n    await start_master()\n    await asyncio.sleep(1 + len(replicas) * 0.5)\n    capture = await seeder.capture()\n    for c_replica in c_replicas:\n        await wait_available_async(c_replica)\n        assert await seeder.compare(capture, port=replica.port)\n\n\n\"\"\"\nTest re-connecting replica to different masters.\n\"\"\"\n\nrotating_master_cases = [(4, [4, 4, 4, 4], dict(keys=2_000, dbcount=4))]\n\n\n@pytest.mark.large\n@pytest.mark.parametrize(\"t_replica, t_masters, seeder_config\", rotating_master_cases)\nasync def test_rotating_masters(df_factory, df_seeder_factory, t_replica, t_masters, seeder_config):\n    replica = df_factory.create(proactor_threads=t_replica)\n    masters = [df_factory.create(proactor_threads=t) for i, t in enumerate(t_masters)]\n    df_factory.start_all([replica] + masters)\n\n    seeders = [df_seeder_factory.create(port=m.port, **seeder_config) for m in masters]\n\n    c_replica = replica.client()\n\n    await asyncio.gather(*(seeder.run(target_deviation=0.1) for seeder in seeders))\n\n    fill_seeder = None\n    fill_task = None\n\n    for master, seeder in zip(masters, seeders):\n        if fill_task is not None:\n            fill_seeder.stop()\n            fill_task.cancel()\n\n        await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n        await wait_available_async(c_replica)\n\n        capture = await seeder.capture()\n        assert await seeder.compare(capture, port=replica.port)\n\n        fill_task = asyncio.create_task(seeder.run())\n        fill_seeder = seeder\n\n    if fill_task is not None:\n        fill_seeder.stop()\n        fill_task.cancel()\n\n\n@pytest.mark.large\nasync def test_cancel_replication_immediately(df_factory, df_seeder_factory: DflySeederFactory):\n    \"\"\"\n    Issue 100 replication commands. This checks that the replication state\n    machine can handle cancellation well.\n    \"\"\"\n    COMMANDS_TO_ISSUE = 100\n\n    replica = df_factory.create()\n    master = df_factory.create()\n    df_factory.start_all([replica, master])\n\n    seeder = df_seeder_factory.create(port=master.port)\n    c_replica = replica.client(socket_timeout=80)\n\n    await seeder.run(target_deviation=0.1)\n\n    async def ping_status():\n        while True:\n            await c_replica.info()\n            await asyncio.sleep(0.05)\n\n    async def replicate():\n        await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n        return True\n\n    ping_job = asyncio.create_task(ping_status())\n    replication_commands = [asyncio.create_task(replicate()) for _ in range(COMMANDS_TO_ISSUE)]\n\n    num_successes = 0\n    for result in asyncio.as_completed(replication_commands, timeout=80):\n        num_successes += await result\n\n    logging.info(f\"succeses: {num_successes}\")\n    assert COMMANDS_TO_ISSUE == num_successes\n\n    await wait_available_async(c_replica)\n    capture = await seeder.capture()\n    logging.info(f\"number of items captured {len(capture)}\")\n    assert await seeder.compare(capture, replica.port)\n\n    ping_job.cancel()\n\n    replica.stop()\n    lines = replica.find_in_logs(\"Stopping replication\")\n    # Cancelled 99 times by REPLICAOF command and once by Shutdown() because\n    # we stopped the instance\n    assert len(lines) == COMMANDS_TO_ISSUE\n\n\n\"\"\"\nTest flushall command. Set data to master send flashall and set more data.\nCheck replica keys at the end.\n\"\"\"\n\n\nasync def test_flushall(df_factory):\n    master = df_factory.create(proactor_threads=4)\n    replica = df_factory.create(proactor_threads=2)\n\n    master.start()\n    replica.start()\n\n    # Connect replica to master\n    c_replica = replica.client()\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    n_keys = 1000\n\n    def gen_test_data(start, end):\n        for i in range(start, end):\n            yield f\"key-{i}\", f\"value-{i}\"\n\n    c_master = master.client()\n    pipe = c_master.pipeline(transaction=False)\n    # Set simple keys 0..n_keys on master\n    batch_fill_data(client=pipe, gen=gen_test_data(0, n_keys), batch_size=3)\n    # flushall\n    pipe.flushall()\n    # Set simple keys n_keys..n_keys*2 on master\n    batch_fill_data(client=pipe, gen=gen_test_data(n_keys, n_keys * 2), batch_size=3)\n\n    await pipe.execute()\n    # Check replica finished executing the replicated commands\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # Check replica keys 0..n_keys-1 dont exist\n    pipe = c_replica.pipeline(transaction=False)\n    for i in range(n_keys):\n        pipe.get(f\"key-{i}\")\n    vals = await pipe.execute()\n    assert all(v is None for v in vals)\n\n    # Check replica keys n_keys..n_keys*2-1 exist\n    for i in range(n_keys, n_keys * 2):\n        pipe.get(f\"key-{i}\")\n    vals = await pipe.execute()\n    assert all(v is not None for v in vals)\n\n\n\"\"\"\nTest journal rewrites.\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_rewrites(df_factory):\n    CLOSE_TIMESTAMP = int(time.time()) + 100\n    CLOSE_TIMESTAMP_MS = CLOSE_TIMESTAMP * 1000\n\n    master = df_factory.create()\n    replica = df_factory.create()\n\n    master.start()\n    replica.start()\n\n    # Connect clients, connect replica to master\n    c_master = master.client()\n    c_replica = replica.client()\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    # Create monitor and bind utility functions\n    m_replica = c_replica.monitor()\n\n    async def get_next_command():\n        mcmd = (await m_replica.next_command())[\"command\"]\n        # skip select command\n        while mcmd == \"SELECT 0\" or mcmd.startswith(\"CLIENT SETINFO\"):\n            mcmd = (await m_replica.next_command())[\"command\"]\n        print(\"Got:\", mcmd)\n        return mcmd\n\n    async def is_match_rsp(rx):\n        mcmd = await get_next_command()\n        print(mcmd, rx)\n        return re.match(rx, mcmd)\n\n    async def skip_cmd():\n        await is_match_rsp(r\".*\")\n\n    async def skip_cmds(n):\n        for _ in range(n):\n            await skip_cmd()\n\n    async def check(cmd, rx):\n        await c_master.execute_command(cmd)\n        match = await is_match_rsp(rx)\n        assert match\n\n    async def check_list(cmd, rx_list):\n        print(\"master cmd:\", cmd)\n        await c_master.execute_command(cmd)\n        for rx in rx_list:\n            match = await is_match_rsp(rx)\n            assert match\n\n    async def check_list_ooo(cmd, rx_list):\n        print(\"master cmd:\", cmd)\n        await c_master.execute_command(cmd)\n        expected_cmds = len(rx_list)\n        for i in range(expected_cmds):\n            mcmd = await get_next_command()\n            # check command matches one regex from list\n            match_rx = list(filter(lambda rx: re.match(rx, mcmd), rx_list))\n            assert len(match_rx) == 1\n            rx_list.remove(match_rx[0])\n\n    async def check_expire(key):\n        ttl1 = await c_master.ttl(key)\n        ttl2 = await c_replica.ttl(key)\n        await skip_cmd()\n        assert abs(ttl1 - ttl2) <= 1\n\n    async with m_replica:\n        # CHECK EXPIRE, PEXPIRE, PEXPIRE turn into EXPIREAT\n        await c_master.set(\"k-exp\", \"v\")\n        await skip_cmd()\n        await check(\"EXPIRE k-exp 100\", r\"PEXPIREAT k-exp (.*?)\")\n        await check_expire(\"k-exp\")\n        await check(\"PEXPIRE k-exp 50000\", r\"PEXPIREAT k-exp (.*?)\")\n        await check_expire(\"k-exp\")\n        await check(f\"EXPIREAT k-exp {CLOSE_TIMESTAMP}\", rf\"PEXPIREAT k-exp {CLOSE_TIMESTAMP_MS}\")\n\n        # Check SPOP turns into SREM or SDEL\n        await c_master.sadd(\"k-set\", \"v1\", \"v2\", \"v3\")\n        await skip_cmd()\n        await check(\"SPOP k-set 1\", r\"SREM k-set (v1|v2|v3)\")\n        await check(\"SPOP k-set 2\", r\"DEL k-set\")\n\n        # Check SET + {EX/PX/EXAT} + {XX/NX/GET} arguments turns into SET PXAT\n        await check(f\"SET k v EX 100 NX GET\", r\"SET k v PXAT (.*?)\")\n        await check_expire(\"k\")\n        await check(f\"SET k v PX 50000\", r\"SET k v PXAT (.*?)\")\n        await check_expire(\"k\")\n        # Exact expiry is skewed\n        await check(f\"SET k v XX EXAT {CLOSE_TIMESTAMP}\", rf\"SET k v PXAT (.*?)\")\n        await check_expire(\"k\")\n\n        # Check SET + KEEPTTL doesn't loose KEEPTTL\n        await check(f\"SET k v KEEPTTL\", r\"SET k v KEEPTTL\")\n\n        # Check SETEX/PSETEX turn into SET PXAT\n        await check(\"SETEX k 100 v\", r\"SET k v PXAT (.*?)\")\n        await check_expire(\"k\")\n        await check(\"PSETEX k 500000 v\", r\"SET k v PXAT (.*?)\")\n        await check_expire(\"k\")\n\n        # Check GETEX turns into PEXPIREAT or PERSIST\n        await check(\"GETEX k PERSIST\", r\"PERSIST k\")\n        await check_expire(\"k\")\n        await check(\"GETEX k EX 100\", r\"PEXPIREAT k (.*?)\")\n        await check_expire(\"k\")\n\n        # Check SDIFFSTORE turns into DEL and SADD\n        await c_master.sadd(\"set1\", \"v1\", \"v2\", \"v3\")\n        await c_master.sadd(\"set2\", \"v1\", \"v2\")\n        await skip_cmd()\n        await skip_cmd()\n        await check_list(\"SDIFFSTORE k set1 set2\", [r\"DEL k\", r\"SADD k v3\"])\n\n        # Check SINTERSTORE turns into DEL and SADD\n        await check_list(\"SINTERSTORE k set1 set2\", [r\"DEL k\", r\"SADD k (.*?)\"])\n\n        # Check SMOVE turns into SREM and SADD\n        await check_list_ooo(\"SMOVE set1 set2 v3\", [r\"SREM set1 v3\", r\"SADD set2 v3\"])\n\n        # Check SUNIONSTORE turns into DEL and SADD\n        await check_list_ooo(\"SUNIONSTORE k set1 set2\", [r\"DEL k\", r\"SADD k (.*?)\"])\n\n        # Check ZDIFFSTORE turns into DEL and ZADD\n        await c_master.execute_command(\"zadd zet1 1 v1 2 v2 3 v3\")\n        await c_master.execute_command(\"zadd zet2 1 v1 2 v2\")\n        await skip_cmd()\n        await skip_cmd()\n        await check_list(\"ZDIFFSTORE k 2 zet1 zet2\", [r\"DEL k\", r\"ZADD k 3 v3\"])\n\n        # Check ZINTERSTORE turns into DEL and ZADD\n        await check_list(\"ZINTERSTORE k 2 zet1 zet2\", [r\"DEL k\", r\"ZADD k (.*?)\"])\n\n        # Check ZRANGESTORE turns into SREM and ZADD\n        await check_list_ooo(\"ZRANGESTORE k zet1 2 -1\", [r\"DEL k\", r\"ZADD k 3 v3\"])\n\n        # Check ZUNIONSTORE turns into DEL and ZADD\n        await check_list_ooo(\"ZUNIONSTORE k 2 zet1 zet2\", [r\"DEL k\", r\"ZADD k (.*?)\"])\n\n        await c_master.set(\"k1\", \"1000\")\n        await c_master.set(\"k2\", \"1100\")\n        await skip_cmd()\n        await skip_cmd()\n        # Check BITOP turns into SET\n        await check(\"BITOP OR kdest k1 k2\", r\"SET kdest 1100\")\n        # See gh issue #3528\n        await c_master.execute_command(f\"HSET foo bar val\")\n        await skip_cmd()\n        await check(\"BITOP NOT foo tmp\", r\"DEL foo\")\n        await c_master.execute_command(f\"HSET foo bar val\")\n        await skip_cmd()\n        await c_master.set(\"k3\", \"-\")\n        await skip_cmd()\n        await check(\"BITOP NOT foo k3\", r\"SET foo \\\\xd2\")\n\n        # Check there is no rewrite for LMOVE on single shard\n        await c_master.lpush(\"list\", \"v1\", \"v2\", \"v3\", \"v4\")\n        await skip_cmd()\n        # Check LMOVE/BLMOVE turns into POP PUSH\n        await check_list_ooo(\"LMOVE list list LEFT RIGHT\", [r\"LPOP list\", r\"RPUSH list v4\"])\n        await check_list_ooo(\"BLMOVE list list RIGHT LEFT 0\", [r\"RPOP list\", r\"LPUSH list v4\"])\n\n        # Check RPOPLPUSH turns into RPOP LPUSH\n        await check_list_ooo(\"RPOPLPUSH list list\", [r\"RPOP list\", r\"LPUSH list v1\"])\n        # Check BRPOPLPUSH turns into RPOP LPUSH\n        await check_list_ooo(\"BRPOPLPUSH list list 0\", [r\"RPOP list\", r\"LPUSH list v2\"])\n        # Check BLPOP turns into LPOP\n        await check(\"BLPOP list list1 0\", r\"LPOP list\")\n        # Check BRPOP turns into RPOP\n        await check(\"BRPOP list 0\", r\"RPOP list\")\n\n        await c_master.lpush(\"list1s\", \"v1\", \"v2\", \"v3\", \"v4\")\n        await skip_cmd()\n        # Check LMOVE turns into LPUSH LPOP on multi shard\n        await check_list_ooo(\"LMOVE list1s list2s LEFT LEFT\", [r\"LPUSH list2s v4\", r\"LPOP list1s\"])\n        # Check RPOPLPUSH turns into LPUSH RPOP on multi shard\n        await check_list_ooo(\"RPOPLPUSH list1s list2s\", [r\"LPUSH list2s v1\", r\"RPOP list1s\"])\n        # Check BRPOPLPUSH turns into LPUSH RPOP on multi shard\n        await check_list_ooo(\"BRPOPLPUSH list1s list2s 0\", [r\"LPUSH list2s v2\", r\"RPOP list1s\"])\n\n        await check(\"LMPOP 2 list list1s LEFT\", r\"LPOP list\")\n        await check(\"BLMPOP 0 2 list1s list RIGHT\", r\"RPOP list1s\")\n\n        # MOVE runs as global command, check only one journal entry is sent\n        await check(\"MOVE list2s 2\", r\"MOVE list2s 2\")\n\n        await c_master.set(\"renamekey\", \"1000\", px=50000)\n        await skip_cmd()\n        # Check RENAME turns into DEL and RESTORE\n        await check_list_ooo(\n            \"RENAME renamekey renamed\",\n            [r\"DEL renamekey\", r\"RESTORE renamed (.*?) (.*?) REPLACE ABSTTL\"],\n        )\n        await check_expire(\"renamed\")\n        # Check RENAMENX turns into DEL and RESTORE\n        await check_list_ooo(\n            \"RENAMENX renamed renamekey\",\n            [r\"DEL renamed\", r\"RESTORE renamekey (.*?) (.*?) REPLACE ABSTTL\"],\n        )\n        await check_expire(\"renamekey\")\n\n        # Test autojournaling in the multi-mode\n        await c_master.execute_command(\"XADD k-stream * field value\")\n        await c_master.execute_command(\"SADD k-one-element-set value1 value2\")\n        sha = await c_master.script_load(\n            \"redis.call('XTRIM', KEYS[1], 'MINID', '0'); return redis.call('SPOP', KEYS[2]);\"\n        )\n        await skip_cmds(3)\n        # The first call to XTRIM triggers autojournaling.\n        # The SPOP command is executed with CO::NO_AUTOJOURNALING.\n        # This test ensures that the SPOP command is still properly replicated\n        await check_list_ooo(\n            f\"EVALSHA {sha} 2 k-stream k-one-element-set\",\n            [r\"XTRIM k-stream MINID 0\", r\"SREM k-one-element-set value[12]\"],\n        )\n\n        # TODO next Z-tests won't work with no-point-in-time replication\n        # check BZMPOP turns into ZPOPMAX and ZPOPMIN command\n        await c_master.zadd(\"key\", {\"a\": 1, \"b\": 2, \"c\": 3})\n        await skip_cmd()\n        await check(\"BZMPOP 0 3 key3 key2 key MAX COUNT 3\", r\"ZPOPMAX key 3\")\n\n        await c_master.zadd(\"key\", {\"a\": 1, \"b\": 2, \"c\": 3})\n        await skip_cmd()\n        await check(\"BZMPOP 0 3 key3 key2 key MIN\", r\"ZPOPMIN key 1\")\n\n        # Check ZMPOP turns into ZPOPMAX and ZPOPMIN commands\n        await c_master.zadd(\"key\", {\"a\": 1, \"b\": 2, \"c\": 3})\n        await skip_cmd()\n        await check(\"ZMPOP 3 key3 key2 key MIN COUNT 3\", r\"ZPOPMIN key 3\")\n\n        await c_master.zadd(\"key\", {\"a\": 1, \"b\": 2, \"c\": 3})\n        await skip_cmd()\n        await check(\"ZMPOP 3 key3 key2 key MAX\", r\"ZPOPMAX key 1\")\n\n        # Check XREADGROUP turns into XGROUP SETID + XCLAIM (for non-NOACK)\n        await c_master.execute_command(\"XGROUP CREATE mystream mygroup $ MKSTREAM\")\n        await skip_cmd()\n        await c_master.execute_command(\"XADD mystream * field1 value1\")\n        await skip_cmd()\n        # XREADGROUP without NOACK should journal XCLAIM + XGROUP SETID\n        await c_master.execute_command(\"XREADGROUP GROUP mygroup consumer1 STREAMS mystream >\")\n        # Consumer creation\n        assert await is_match_rsp(\"XGROUP CREATECONSUMER mystream mygroup consumer1\")\n        # Expect XCLAIM for the message + XGROUP SETID with ENTRIESREAD\n        assert await is_match_rsp(\n            r\"XCLAIM mystream mygroup consumer1 0 (.*?) TIME \\d+ RETRYCOUNT 1 FORCE JUSTID LASTID (.*?)\"\n        )\n        assert await is_match_rsp(r\"XGROUP SETID mystream mygroup (.*?) ENTRIESREAD 1\")\n\n        # Check XREADGROUP with NOACK only journals XGROUP SETID\n        await c_master.execute_command(\"XADD mystream * field2 value2\")\n        await skip_cmd()\n        await c_master.execute_command(\n            \"XREADGROUP GROUP mygroup consumer1 NOACK STREAMS mystream >\"\n        )\n        # With NOACK, only XGROUP SETID should be journaled (no XCLAIM)\n        assert await is_match_rsp(r\"XGROUP SETID mystream mygroup (.*?) ENTRIESREAD 2\")\n\n\n\"\"\"\nTest automatic replication of expiry.\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_expiry(df_factory: DflyInstanceFactory, n_keys=1000):\n    master = df_factory.create()\n    replica = df_factory.create()\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # Connect replica to master\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    # Set keys\n    pipe = c_master.pipeline(transaction=False)\n    batch_fill_data(pipe, gen_test_data(n_keys))\n    await pipe.execute()\n\n    # Check replica finished executing the replicated commands\n    await check_all_replicas_finished([c_replica], c_master)\n    # Check keys are on replica\n    res = await c_replica.mget(k for k, _ in gen_test_data(n_keys))\n    assert all(v is not None for v in res)\n\n    # Set key different expries times in ms\n    pipe = c_master.pipeline(transaction=True)\n    for k, _ in gen_test_data(n_keys):\n        ms = random.randint(20, 500)\n        pipe.pexpire(k, ms)\n    await pipe.execute()\n\n    # send more traffic for differnt dbs while keys are expired\n    for i in range(8):\n        is_multi = i % 2\n        async with aioredis.Redis(port=master.port, db=i) as c_master_db:\n            pipe = c_master_db.pipeline(transaction=is_multi)\n            # Set simple keys n_keys..n_keys*2 on master\n            start_key = n_keys * (i + 1)\n            end_key = start_key + n_keys\n            batch_fill_data(client=pipe, gen=gen_test_data(end_key, start_key), batch_size=20)\n\n            await pipe.execute()\n\n    # Wait for master to expire keys\n    await asyncio.sleep(3.0)\n\n    # Check all keys with expiry have been deleted\n    res = await c_master.mget(k for k, _ in gen_test_data(n_keys))\n    assert all(v is None for v in res)\n\n    # Check replica finished executing the replicated commands\n    await check_all_replicas_finished([c_replica], c_master)\n    res = await c_replica.mget(k for k, _ in gen_test_data(n_keys))\n    assert all(v is None for v in res)\n\n    # Set expired keys again\n    pipe = c_master.pipeline(transaction=False)\n    batch_fill_data(pipe, gen_test_data(n_keys))\n    for k, _ in gen_test_data(n_keys):\n        pipe.pexpire(k, 500)\n    await pipe.execute()\n    await asyncio.sleep(1.0)\n    # Disconnect from master\n    await c_replica.execute_command(\"REPLICAOF NO ONE\")\n    # Check replica expires keys on its own\n    await asyncio.sleep(1.0)\n    res = await c_replica.mget(k for k, _ in gen_test_data(n_keys))\n    assert all(v is None for v in res)\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_simple_scripts(df_factory: DflyInstanceFactory):\n    master = df_factory.create()\n    replicas = [df_factory.create() for _ in range(2)]\n    df_factory.start_all([master] + replicas)\n\n    c_replicas = [replica.client() for replica in replicas]\n    c_master = master.client()\n\n    # Connect replicas and wait for sync to finish\n    for c_replica in c_replicas:\n        await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # Generate some scripts and run them\n    keys = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n    for i in range(len(keys) + 1):\n        script = \"\"\n        subkeys = keys[:i]\n        for key in subkeys:\n            script += f\"redis.call('INCR', '{key}')\"\n            script += f\"redis.call('INCR', '{key}')\"\n\n        await c_master.eval(script, len(subkeys), *subkeys)\n\n    # Wait for replicas\n    await check_all_replicas_finished([c_replica], c_master)\n\n    for c_replica in c_replicas:\n        assert (await c_replica.mget(keys)) == [\"10\", \"8\", \"6\", \"4\", \"2\"]\n\n\n\"\"\"\nTest script replication.\n\nFill multiple lists with values and rotate them one by one with LMOVE until they're at the same place again.\n\"\"\"\n\n# t_master, t_replicas, num_ops, num_keys, num_parallel, flags\nscript_cases = [\n    (4, [4, 4, 4], 50, 5, 5, \"\"),\n    (4, [4, 4, 4], 50, 5, 5, \"disable-atomicity\"),\n]\n\nscript_test_s1 = \"\"\"\n{flags}\nlocal N = ARGV[1]\n\n-- fill each list with its k value\nfor i, k in pairs(KEYS) do\n  for j = 1, N do\n    redis.call('LPUSH', k, i-1)\n  end\nend\n\n-- rotate #KEYS times\nfor l = 1, #KEYS do\n  for j = 1, N do\n    for i, k in pairs(KEYS) do\n      redis.call('LMOVE', k, KEYS[i%#KEYS+1], 'LEFT', 'RIGHT')\n    end\n  end\nend\n\n\nreturn 'OK'\n\"\"\"\n\n\n@pytest.mark.parametrize(\"t_master, t_replicas, num_ops, num_keys, num_par, flags\", script_cases)\nasync def test_scripts(df_factory, t_master, t_replicas, num_ops, num_keys, num_par, flags):\n    master = df_factory.create(proactor_threads=t_master)\n    replicas = [df_factory.create(proactor_threads=t) for i, t in enumerate(t_replicas)]\n\n    df_factory.start_all([master] + replicas)\n\n    c_master = master.client()\n    c_replicas = [replica.client() for replica in replicas]\n    for c_replica in c_replicas:\n        await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n        await wait_available_async(c_replica)\n\n    script = script_test_s1.format(flags=f\"--!df flags={flags}\" if flags else \"\")\n    sha = await c_master.script_load(script)\n\n    key_sets = [[f\"{i}-{j}\" for j in range(num_keys)] for i in range(num_par)]\n\n    rsps = await asyncio.gather(\n        *(c_master.evalsha(sha, len(keys), *keys, num_ops) for keys in key_sets)\n    )\n    assert rsps == [\"OK\"] * num_par\n\n    await check_all_replicas_finished(c_replicas, c_master)\n\n    for c_replica in c_replicas:\n        for key_set in key_sets:\n            for j, k in enumerate(key_set):\n                l = await c_replica.lrange(k, 0, -1)\n                assert l == [f\"{j}\"] * num_ops\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_auth_master(df_factory, n_keys=20):\n    masterpass = \"requirepass\"\n    replicapass = \"replicapass\"\n    master = df_factory.create(requirepass=masterpass)\n    replica = df_factory.create(logtostdout=True, masterauth=masterpass, requirepass=replicapass)\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client(password=masterpass)\n    c_replica = replica.client(password=replicapass)\n\n    # Connect replica to master\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    # Set keys\n    pipe = c_master.pipeline(transaction=False)\n    batch_fill_data(pipe, gen_test_data(n_keys))\n    await pipe.execute()\n\n    # Check replica finished executing the replicated commands\n    await check_all_replicas_finished([c_replica], c_master)\n    # Check keys are on replica\n    res = await c_replica.mget(k for k, _ in gen_test_data(n_keys))\n    assert all(v is not None for v in res)\n    await c_master.connection_pool.disconnect()\n    await c_replica.connection_pool.disconnect()\n\n\nSCRIPT_TEMPLATE = \"return {}\"\n\n\n@dfly_args({\"proactor_threads\": 2})\nasync def test_script_transfer(df_factory):\n    master = df_factory.create()\n    replica = df_factory.create()\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # Load some scripts into master ahead\n    scripts = []\n    for i in range(0, 10):\n        sha = await c_master.script_load(SCRIPT_TEMPLATE.format(i))\n        scripts.append(sha)\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    # transfer in stable state\n    for i in range(10, 20):\n        sha = await c_master.script_load(SCRIPT_TEMPLATE.format(i))\n        scripts.append(sha)\n\n    await check_all_replicas_finished([c_replica], c_master)\n    await c_replica.execute_command(\"REPLICAOF NO ONE\")\n\n    for i, sha in enumerate(scripts):\n        assert await c_replica.evalsha(sha, 0) == i\n    await c_master.connection_pool.disconnect()\n    await c_replica.connection_pool.disconnect()\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_role_command(df_factory, n_keys=20):\n    master = df_factory.create()\n    replica = df_factory.create()\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    assert await c_master.execute_command(\"role\") == [\"master\", []]\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    # It may take a bit more time to actually propagate the role change\n    # See https://github.com/dragonflydb/dragonfly/pull/2111\n    await asyncio.sleep(1)\n\n    assert await c_master.execute_command(\"role\") == [\n        \"master\",\n        [[\"127.0.0.1\", str(replica.port), \"online\"]],\n    ]\n    assert await c_replica.execute_command(\"role\") == [\n        \"slave\",\n        \"localhost\",\n        str(master.port),\n        \"online\",\n    ]\n\n    # This tests that we react fast to socket shutdowns and don't hang on\n    # things like the ACK or execution fibers.\n    master.stop()\n    await asyncio.sleep(0.1)\n    assert await c_replica.execute_command(\"role\") == [\n        \"slave\",\n        \"localhost\",\n        str(master.port),\n        \"connecting\",\n    ]\n\n    await c_master.connection_pool.disconnect()\n    await c_replica.connection_pool.disconnect()\n\n\ndef parse_lag(replication_info: str):\n    lags = re.findall(\"lag=([0-9]+)\\r\\n\", replication_info)\n    assert len(lags) == 1\n    return int(lags[0])\n\n\nasync def get_metric_value(inst, metric_name, sample_index=0):\n    return (await inst.metrics())[metric_name].samples[sample_index].value\n\n\nasync def assert_lag_condition(inst, client, condition):\n    \"\"\"\n    Since lag is a bit random, and we want stable tests, we check\n    10 times in quick succession and validate that the condition\n    is satisfied at least once.\n    We check both `INFO REPLICATION` redis protocol and the `/metrics`\n    prometheus endpoint.\n    \"\"\"\n    for _ in range(10):\n        lag = await get_metric_value(inst, \"dragonfly_connected_replica_lag_records\")\n        if condition(lag):\n            break\n        print(\"current prometheus lag =\", lag)\n        await asyncio.sleep(0.05)\n    else:\n        assert False, \"Lag from prometheus metrics has never satisfied condition!\"\n    for _ in range(10):\n        lag = parse_lag(await client.execute_command(\"info replication\"))\n        if condition(lag):\n            break\n        print(\"current lag =\", lag)\n        await asyncio.sleep(0.05)\n    else:\n        assert False, \"Lag has never satisfied condition!\"\n\n\nasync def get_replica_reconnects_count(replica_inst):\n    return await get_metric_value(replica_inst, \"dragonfly_replica_reconnect_count\")\n\n\nasync def assert_replica_reconnections(replica_inst, initial_reconnects_count):\n    \"\"\"\n    Asserts that the replica has attempted to reconnect at least once.\n    \"\"\"\n    reconnects_count = await get_replica_reconnects_count(replica_inst)\n    if reconnects_count > initial_reconnects_count:\n        return\n\n    assert (\n        False\n    ), f\"Expected reconnect count to increase by at least 1, but it did not. Initial dragonfly_replica_reconnect_count: {initial_reconnects_count}, current count: {reconnects_count}\"\n\n\n@dfly_args({\"proactor_threads\": 2})\nasync def test_replication_info(df_factory: DflyInstanceFactory, df_seeder_factory, n_keys=2000):\n    master = df_factory.create()\n    replica = df_factory.create(replication_acks_interval=100)\n    df_factory.start_all([master, replica])\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n    await assert_lag_condition(master, c_master, lambda lag: lag == 0)\n\n    seeder = df_seeder_factory.create(port=master.port, keys=n_keys, dbcount=2)\n    fill_task = asyncio.create_task(seeder.run(target_ops=3000))\n    await assert_lag_condition(master, c_master, lambda lag: lag > 30)\n    seeder.stop()\n\n    await fill_task\n    await wait_available_async(c_replica)\n    await assert_lag_condition(master, c_master, lambda lag: lag == 0)\n\n    await c_master.connection_pool.disconnect()\n    await c_replica.connection_pool.disconnect()\n\n\n\"\"\"\nTest flushall command that's invoked while in full sync mode.\nThis can cause an issue because it will be executed on each shard independently.\nMore details in https://github.com/dragonflydb/dragonfly/issues/1231\n\"\"\"\n\n\n@pytest.mark.large\n@pytest.mark.exclude_epoll\nasync def test_flushall_in_full_sync(df_factory):\n    master = df_factory.create(proactor_threads=4)\n    replica = df_factory.create(proactor_threads=2)\n\n    df_factory.start_all([master, replica])\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # Fill master with test data\n    seeder = DebugPopulateSeeder(key_target=100_000)\n    await seeder.run(c_master)\n\n    # Start replication and wait for full sync\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    async with async_timeout.timeout(3):\n        await wait_for_replicas_state(c_replica, state=\"full_sync\", timeout=0.05)\n\n    syncid, _ = await c_replica.execute_command(\"DEBUG REPLICA OFFSET\")\n\n    # Issue FLUSHALL and record replica role at the same instant\n    _, role = await asyncio.gather(c_master.execute_command(\"FLUSHALL\"), c_replica.role())\n\n    # Print warning if replication was too quick\n    if role[3] != \"full_sync\":\n        logging.error(\"!!! Full sync finished too fast. Adjust test parameters !!!\")\n        return\n\n    # Run a few more commands on top\n    post_seeder = SeederV2(key_target=100)\n    await post_seeder.run(c_master, target_deviation=0.1)\n\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # Check replica data consisten\n    hash1, hash2 = await asyncio.gather(*(SeederV2.capture(c) for c in (c_master, c_replica)))\n    assert hash1 == hash2\n\n    # Make sure that a new sync ID is present, meaning replication restarted following FLUSHALL.\n    new_syncid, _ = await c_replica.execute_command(\"DEBUG REPLICA OFFSET\")\n    assert new_syncid != syncid\n\n\n\"\"\"\nTest read-only scripts work with replication. EVAL_RO and the 'no-writes' flags are currently not supported.\n\"\"\"\n\nREADONLY_SCRIPT = \"\"\"\nredis.call('GET', 'A')\nredis.call('EXISTS', 'B')\nreturn redis.call('GET', 'WORKS')\n\"\"\"\n\nWRITE_SCRIPT = \"\"\"\nredis.call('SET', 'A', 'ErrroR')\n\"\"\"\n\n\nasync def test_readonly_script(df_factory):\n    master = df_factory.create(proactor_threads=2)\n    replica = df_factory.create(proactor_threads=2)\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_master.set(\"WORKS\", \"YES\")\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    await c_replica.eval(READONLY_SCRIPT, 3, \"A\", \"B\", \"WORKS\") == \"YES\"\n\n    with pytest.raises(aioredis.ResponseError) as roe:\n        await c_replica.eval(WRITE_SCRIPT, 1, \"A\")\n\n\ntake_over_cases = [\n    [2, 2],\n    [2, 4],\n    [4, 2],\n    [8, 8],\n]\n\n\n@pytest.mark.exclude_epoll\n@pytest.mark.parametrize(\"master_threads, replica_threads\", take_over_cases)\nasync def test_take_over_counters(df_factory, master_threads, replica_threads):\n    master = df_factory.create(proactor_threads=master_threads)\n    replica1 = df_factory.create(proactor_threads=replica_threads)\n    replica2 = df_factory.create(proactor_threads=replica_threads)\n    replica3 = df_factory.create(proactor_threads=replica_threads)\n    df_factory.start_all([master, replica1, replica2, replica3])\n    c_master = master.client()\n    c1 = replica1.client()\n    c_blocking = master.client()\n    c2 = replica2.client()\n    c3 = replica3.client()\n\n    await c1.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await c2.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await c3.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    await wait_available_async(c1)\n\n    async def counter(key):\n        value = 0\n        await c_master.execute_command(f\"SET {key} 0\")\n        start = time.time()\n        while time.time() - start < 20:\n            try:\n                value = await c_master.execute_command(f\"INCR {key}\")\n            except (redis.exceptions.ConnectionError, redis.exceptions.ResponseError) as e:\n                break\n        else:\n            assert False, \"The incrementing loop should be exited with a connection error\"\n        return key, value\n\n    async def block_during_takeover():\n        \"Add a blocking command during takeover to make sure it doesn't block it.\"\n        start = time.time()\n        # The command should just be canceled\n        assert await c_blocking.execute_command(\"BLPOP BLOCKING_KEY1 BLOCKING_KEY2 100\") is None\n        # And it should happen in reasonable amount of time.\n        assert time.time() - start < 10\n\n    async def delayed_takeover():\n        await asyncio.sleep(1)\n        await c1.execute_command(f\"REPLTAKEOVER 5\")\n\n    _, _, *results = await asyncio.gather(\n        delayed_takeover(), block_during_takeover(), *[counter(f\"key{i}\") for i in range(16)]\n    )\n    assert await c1.execute_command(\"role\") == [\"master\", []]\n\n    for key, client_value in results:\n        replicated_value = await c1.get(key)\n        assert client_value == int(replicated_value)\n\n\n@pytest.mark.exclude_epoll\n@pytest.mark.parametrize(\"master_threads, replica_threads\", take_over_cases)\nasync def test_take_over_seeder(\n    request, df_factory, df_seeder_factory, master_threads, replica_threads\n):\n    master = df_factory.create(\n        proactor_threads=master_threads, dbfilename=f\"dump_{tmp_file_name()}\", admin_port=ADMIN_PORT\n    )\n    replica = df_factory.create(proactor_threads=replica_threads)\n    df_factory.start_all([master, replica])\n\n    seeder = df_seeder_factory.create(port=master.port, keys=1000, dbcount=5, stop_on_failure=False)\n\n    c_replica = replica.client()\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.admin_port}\")\n    await wait_available_async(c_replica)\n\n    fill_task = asyncio.create_task(seeder.run())\n\n    stop_info = False\n\n    async def info_replication():\n        my_client = replica.client()\n        while not stop_info:\n            await my_client.info(\"replication\")\n            await asyncio.sleep(0.5)\n\n    info_task = asyncio.create_task(info_replication())\n\n    # Give the seeder a bit of time.\n    await asyncio.sleep(3)\n    logging.debug(\"running repltakover\")\n    await c_replica.execute_command(f\"REPLTAKEOVER 30 SAVE\")\n    logging.debug(\"after running repltakover\")\n    seeder.stop()\n    await fill_task\n\n    assert await c_replica.execute_command(\"role\") == [\"master\", []]\n    stop_info = True\n    await info_task\n\n    @assert_eventually\n    async def assert_master_exists():\n        assert master.proc.poll() == 0, \"Master process did not exit correctly.\"\n\n    await assert_master_exists()\n\n    master.start()\n    c_master = master.client()\n    await wait_available_async(c_master)\n\n    capture = await seeder.capture(port=master.port)\n    assert await seeder.compare(capture, port=replica.port)\n\n\n@pytest.mark.parametrize(\"master_threads, replica_threads\", [[4, 4]])\nasync def test_take_over_read_commands(df_factory, master_threads, replica_threads):\n    master = df_factory.create(proactor_threads=master_threads)\n    replica = df_factory.create(proactor_threads=replica_threads)\n    df_factory.start_all([master, replica])\n\n    c_master = master.client(socket_timeout=1, socket_connect_timeout=1)\n    await c_master.execute_command(\"SET foo bar\")\n\n    c_replica = replica.client()\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    async def prompt():\n        client = replica.client()\n        master_alive = True\n        for i in range(10):\n            # TODO remove try block when we no longer shut down master after take over\n            if master_alive:\n                try:\n                    res = await c_master.execute_command(\"GET foo\")\n                    assert res == \"bar\"\n                    res = await c_master.execute_command(\"CONFIG SET aclfile myfile\")\n                    assert res == \"OK\"\n                except:\n                    master_alive = False\n            res = await client.execute_command(\"GET foo\")\n            assert res == \"bar\"\n\n    promt_task = asyncio.create_task(prompt())\n    await c_replica.execute_command(f\"REPLTAKEOVER 5\")\n\n    assert await c_replica.execute_command(\"role\") == [\"master\", []]\n    await promt_task\n\n\nasync def test_take_over_timeout(df_factory, df_seeder_factory):\n    master = df_factory.create(proactor_threads=2)\n    replica = df_factory.create(proactor_threads=2)\n    df_factory.start_all([master, replica])\n\n    seeder = df_seeder_factory.create(port=master.port, keys=1000, dbcount=5, stop_on_failure=False)\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    logging.debug(f\"PORTS ARE:  {master.port} {replica.port}\")\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    fill_task = asyncio.create_task(seeder.run(target_ops=3000))\n\n    # Give the seeder a bit of time.\n    await asyncio.sleep(1)\n    try:\n        await c_replica.execute_command(f\"REPLTAKEOVER 0\")\n    except redis.exceptions.ResponseError as e:\n        # Should fail with detailed error message\n        assert str(e).startswith(\"Couldn't execute takeover\")\n        # Verify it includes diagnostic information\n        assert \":\" in str(e), \"Error message should include diagnostic details\"\n    else:\n        assert False, \"Takeover should not succeed.\"\n    seeder.stop()\n    await fill_task\n\n    assert await c_master.execute_command(\"role\") == [\n        \"master\",\n        [[\"127.0.0.1\", str(replica.port), \"online\"]],\n    ]\n    assert await c_replica.execute_command(\"role\") == [\n        \"slave\",\n        \"localhost\",\n        str(master.port),\n        \"online\",\n    ]\n\n\n# 1. Number of master threads\n# 2. Number of threads for each replica\nreplication_cases = [(8, 8)]\n\n\n@pytest.mark.parametrize(\"t_master, t_replica\", replication_cases)\nasync def test_no_tls_on_admin_port(\n    df_factory: DflyInstanceFactory,\n    df_seeder_factory,\n    t_master,\n    t_replica,\n    with_tls_server_args,\n):\n    # 1. Spin up dragonfly without tls, debug populate\n    master = df_factory.create(\n        no_tls_on_admin_port=\"true\",\n        admin_port=ADMIN_PORT,\n        **with_tls_server_args,\n        requirepass=\"XXX\",\n        proactor_threads=t_master,\n    )\n    master.start()\n    c_master = master.admin_client(password=\"XXX\")\n    await c_master.execute_command(\"DEBUG POPULATE 100\")\n    db_size = await c_master.execute_command(\"DBSIZE\")\n    assert 100 == db_size\n\n    # 2. Spin up a replica and initiate a REPLICAOF\n    replica = df_factory.create(\n        no_tls_on_admin_port=\"true\",\n        admin_port=ADMIN_PORT + 1,\n        **with_tls_server_args,\n        proactor_threads=t_replica,\n        requirepass=\"XXX\",\n        masterauth=\"XXX\",\n    )\n    replica.start()\n    c_replica = replica.admin_client(password=\"XXX\")\n    res = await c_replica.execute_command(\"REPLICAOF localhost \" + str(master.admin_port))\n    assert \"OK\" == res\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # 3. Verify that replica dbsize == debug populate key size -- replication works\n    db_size = await c_replica.execute_command(\"DBSIZE\")\n    assert 100 == db_size\n\n\n# 1. Number of master threads\n# 2. Number of threads for each replica\n# 3. Admin port\nreplication_cases = [(8, 8, False), (8, 8, True)]\n\n\n@pytest.mark.parametrize(\"t_master, t_replica, test_admin_port\", replication_cases)\nasync def test_tls_replication(\n    df_factory,\n    df_seeder_factory,\n    t_master,\n    t_replica,\n    test_admin_port,\n    with_ca_tls_server_args,\n    with_ca_tls_client_args,\n):\n    # 1. Spin up dragonfly tls enabled, debug populate\n    master = df_factory.create(\n        tls_replication=\"true\",\n        **with_ca_tls_server_args,\n        port=1111,\n        admin_port=ADMIN_PORT,\n        proactor_threads=t_master,\n    )\n    master.start()\n    c_master = master.client(**with_ca_tls_client_args)\n    await c_master.execute_command(\"DEBUG POPULATE 100\")\n    db_size = await c_master.execute_command(\"DBSIZE\")\n    assert 100 == db_size\n\n    proxy = Proxy(\n        \"127.0.0.1\", 1114, \"127.0.0.1\", master.port if not test_admin_port else master.admin_port\n    )\n    await proxy.start()\n    proxy_task = asyncio.create_task(proxy.serve())\n\n    # 2. Spin up a replica and initiate a REPLICAOF\n    replica = df_factory.create(\n        tls_replication=\"true\",\n        **with_ca_tls_server_args,\n        proactor_threads=t_replica,\n    )\n    replica.start()\n    c_replica = replica.client(**with_ca_tls_client_args)\n    res = await c_replica.execute_command(\"REPLICAOF localhost \" + str(proxy.port))\n    assert \"OK\" == res\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # 3. Verify that replica dbsize == debug populate key size -- replication works\n    db_size = await c_replica.execute_command(\"DBSIZE\")\n    assert 100 == db_size\n\n    # 4. Break the connection between master and replica\n    await proxy.close(proxy_task)\n    await asyncio.sleep(3)\n    await proxy.start()\n    proxy_task = asyncio.create_task(proxy.serve())\n\n    # Check replica gets new keys\n    await c_master.execute_command(\"SET MY_KEY 1\")\n    db_size = await c_master.execute_command(\"DBSIZE\")\n    assert 101 == db_size\n\n    await check_all_replicas_finished([c_replica], c_master)\n    db_size = await c_replica.execute_command(\"DBSIZE\")\n    assert 101 == db_size\n\n    await proxy.close(proxy_task)\n\n\n@dfly_args({\"proactor_threads\": 2})\nasync def test_tls_replication_without_ca(\n    df_factory,\n    df_seeder_factory,\n    with_tls_server_args,\n    with_ca_tls_client_args,\n):\n    # 1. Spin up dragonfly tls enabled, debug populate\n    master = df_factory.create(tls_replication=\"true\", **with_tls_server_args, requirepass=\"hi\")\n    master.start()\n    # Somehow redis-py forces to verify the certificate and it fails\n    # TODO investigate why and remove with_ca_tls_clients_args\n    c_master = master.client(password=\"hi\", **with_ca_tls_client_args)\n    await c_master.execute_command(\"DEBUG POPULATE 100\")\n\n    # 2. Spin up a replica and initiate a REPLICAOF\n    replica = df_factory.create(\n        tls_replication=\"true\", **with_tls_server_args, masterauth=\"hi\", requirepass=\"hi\"\n    )\n    replica.start()\n\n    c_replica = replica.client(password=\"hi\", **with_ca_tls_client_args)\n\n    res = await c_replica.execute_command(\"REPLICAOF localhost \" + str(master.port))\n    assert \"OK\" == res\n    await check_all_replicas_finished([c_replica], c_master)\n    assert 100 == await c_replica.execute_command(\"dbsize\")\n\n\n@pytest.mark.exclude_epoll\nasync def test_ipv6_replication(df_factory: DflyInstanceFactory):\n    \"\"\"Test that IPV6 addresses work for replication, ::1 is 127.0.0.1 localhost\"\"\"\n    master = df_factory.create(proactor_threads=1, bind=\"::1\", port=1111)\n    replica = df_factory.create(proactor_threads=1, bind=\"::1\", port=1112)\n\n    df_factory.start_all([master, replica])\n    c_master = master.client()\n    c_replica = replica.client()\n\n    assert await c_master.ping()\n    assert await c_replica.ping()\n    assert await c_replica.execute_command(\"REPLICAOF\", master[\"bind\"], master[\"port\"]) == \"OK\"\n\n\n# busy wait for 'replica' instance to have replication status 'status'\nasync def wait_for_replica_status(\n    replica: aioredis.Redis, status: str, wait_for_seconds=0.01, timeout=20\n):\n    start = time.time()\n    while (time.time() - start) < timeout:\n        await asyncio.sleep(wait_for_seconds)\n\n        info = await replica.info(\"replication\")\n        if info[\"master_link_status\"] == status:\n            return\n    raise RuntimeError(\"Client did not become available in time!\")\n\n\nasync def test_replicaof_flag(df_factory):\n    # tests --replicaof works under normal conditions\n    master = df_factory.create(\n        proactor_threads=2,\n    )\n\n    # set up master\n    master.start()\n    c_master = master.client()\n    await c_master.set(\"KEY\", \"VALUE\")\n    db_size = await c_master.dbsize()\n    assert 1 == db_size\n\n    replica = df_factory.create(\n        proactor_threads=2,\n        replicaof=f\"localhost:{master.port}\",  # start to replicate master\n    )\n\n    # set up replica. check that it is replicating\n    replica.start()\n    c_replica = replica.client()\n\n    await wait_available_async(c_replica)  # give it time to startup\n    # wait until we have a connection\n    await check_all_replicas_finished([c_replica], c_master)\n\n    dbsize = await c_replica.dbsize()\n    assert 1 == dbsize\n\n    val = await c_replica.get(\"KEY\")\n    assert \"VALUE\" == val\n\n\nasync def test_replicaof_flag_replication_waits(df_factory):\n    # tests --replicaof works when we launch replication before the master\n    BASE_PORT = 1111\n    replica = df_factory.create(\n        proactor_threads=2,\n        replicaof=f\"localhost:{BASE_PORT}\",  # start to replicate master\n    )\n\n    # set up replica first\n    replica.start()\n    c_replica = replica.client()\n    await wait_for_replica_status(c_replica, status=\"down\")\n\n    # check that it is in replica mode, yet status is down\n    info = await c_replica.info(\"replication\")\n    assert info[\"role\"] == \"slave\"\n    assert info[\"master_host\"] == \"localhost\"\n    assert info[\"master_port\"] == BASE_PORT\n    assert info[\"master_link_status\"] == \"down\"\n\n    # set up master\n    master = df_factory.create(\n        port=BASE_PORT,\n        proactor_threads=2,\n    )\n\n    master.start()\n    c_master = master.client()\n    await c_master.set(\"KEY\", \"VALUE\")\n    db_size = await c_master.dbsize()\n    assert 1 == db_size\n\n    # check that replication works now\n    await wait_for_replica_status(c_replica, status=\"up\")\n    await check_all_replicas_finished([c_replica], c_master)\n\n    dbsize = await c_replica.dbsize()\n    assert 1 == dbsize\n\n    val = await c_replica.get(\"KEY\")\n    assert \"VALUE\" == val\n\n\nasync def test_replicaof_flag_disconnect(df_factory):\n    # test stopping replication when started using --replicaof\n    master = df_factory.create(\n        proactor_threads=2,\n    )\n\n    # set up master\n    master.start()\n    c_master = master.client()\n    await wait_available_async(c_master)\n\n    await c_master.set(\"KEY\", \"VALUE\")\n    db_size = await c_master.dbsize()\n    assert 1 == db_size\n\n    replica = df_factory.create(\n        proactor_threads=2,\n        replicaof=f\"localhost:{master.port}\",  # start to replicate master\n    )\n\n    # set up replica. check that it is replicating\n    replica.start()\n\n    c_replica = replica.client()\n    await wait_available_async(c_replica)\n    await check_all_replicas_finished([c_replica], c_master)\n\n    dbsize = await c_replica.dbsize()\n    assert 1 == dbsize\n\n    val = await c_replica.get(\"KEY\")\n    assert \"VALUE\" == val\n\n    await c_replica.replicaof(\"no\", \"one\")  # disconnect\n\n    role = await c_replica.role()\n    assert role[0] == \"master\"\n\n\nasync def test_df_crash_on_memcached_error(df_factory):\n    master = df_factory.create(\n        memcached_port=11211,\n        proactor_threads=2,\n    )\n\n    replica = df_factory.create(\n        memcached_port=master.mc_port + 1,\n        proactor_threads=2,\n    )\n\n    master.start()\n    replica.start()\n\n    c_master = master.client()\n    await wait_available_async(c_master)\n\n    c_replica = replica.client()\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    memcached_client = pymemcache.Client(f\"127.0.0.1:{replica.mc_port}\")\n\n    with pytest.raises(pymemcache.exceptions.MemcacheServerError):\n        memcached_client.set(\"key\", \"data\", noreply=False)\n\n\nasync def test_df_crash_on_replicaof_flag(df_factory):\n    master = df_factory.create(\n        proactor_threads=2,\n    )\n    master.start()\n\n    replica = df_factory.create(proactor_threads=2, replicaof=f\"127.0.0.1:{master.port}\")\n    replica.start()\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await wait_available_async(c_master)\n    await wait_available_async(c_replica)\n\n    res = await c_replica.execute_command(\"SAVE DF myfile\")\n    assert \"OK\" == res\n\n    res = await c_replica.execute_command(\"DBSIZE\")\n    assert res == 0\n\n\nasync def test_network_disconnect(df_factory, df_seeder_factory):\n    master = df_factory.create(proactor_threads=6)\n    replica = df_factory.create(proactor_threads=4)\n\n    df_factory.start_all([replica, master])\n    seeder = df_seeder_factory.create(port=master.port)\n\n    async with replica.client() as c_replica:\n        await seeder.run(target_deviation=0.1)\n\n        proxy = Proxy(\"127.0.0.1\", 1111, \"127.0.0.1\", master.port)\n        await proxy.start()\n        task = asyncio.create_task(proxy.serve())\n        try:\n            await c_replica.execute_command(f\"REPLICAOF localhost {proxy.port}\")\n\n            for _ in range(10):\n                await asyncio.sleep(random.randint(0, 10) / 10)\n                proxy.drop_connection()\n\n            # Give time to detect dropped connection and reconnect\n            await asyncio.sleep(1.0)\n            await wait_available_async(c_replica)\n\n            capture = await seeder.capture()\n            assert await seeder.compare(capture, replica.port)\n        finally:\n            await proxy.close(task)\n\n\nasync def test_network_disconnect_active_stream(df_factory, df_seeder_factory):\n    master = df_factory.create(proactor_threads=4, shard_repl_backlog_len=4000)\n    replica = df_factory.create(proactor_threads=4)\n\n    df_factory.start_all([replica, master])\n    seeder = df_seeder_factory.create(port=master.port)\n\n    async with replica.client() as c_replica, master.client() as c_master:\n        await seeder.run(target_deviation=0.1)\n\n        proxy = Proxy(\"127.0.0.1\", 1112, \"127.0.0.1\", master.port)\n        await proxy.start()\n        task = asyncio.create_task(proxy.serve())\n        try:\n            await c_replica.execute_command(f\"REPLICAOF localhost {proxy.port}\")\n\n            fill_task = asyncio.create_task(seeder.run(target_ops=4000))\n\n            for _ in range(3):\n                await asyncio.sleep(random.randint(10, 20) / 10)\n                proxy.drop_connection()\n\n            seeder.stop()\n            await fill_task\n\n            # Give time to detect dropped connection and reconnect\n            await asyncio.sleep(1.0)\n            await wait_available_async(c_replica)\n\n            logging.debug(await c_replica.execute_command(\"INFO REPLICATION\"))\n            logging.debug(await c_master.execute_command(\"INFO REPLICATION\"))\n\n            capture = await seeder.capture()\n            assert await seeder.compare(capture, replica.port)\n        finally:\n            await proxy.close(task)\n\n\nasync def test_network_disconnect_small_buffer(df_factory, df_seeder_factory):\n    master = df_factory.create(proactor_threads=4, shard_repl_backlog_len=1)\n    replica = df_factory.create(proactor_threads=4)\n\n    df_factory.start_all([replica, master])\n    seeder = df_seeder_factory.create(port=master.port)\n\n    async with replica.client() as c_replica, master.client() as c_master:\n        await seeder.run(target_deviation=0.1)\n\n        proxy = Proxy(\"127.0.0.1\", 1113, \"127.0.0.1\", master.port)\n        await proxy.start()\n        task = asyncio.create_task(proxy.serve())\n\n        try:\n            await c_replica.execute_command(f\"REPLICAOF localhost {proxy.port}\")\n\n            # Wait for the two nodes to be in sync (stable state replication)\n            await wait_available_async(c_replica)\n\n            # Now start seeding and dropping\n            fill_task = asyncio.create_task(seeder.run())\n\n            for _ in range(3):\n                await asyncio.sleep(random.randint(5, 10) / 10)\n                proxy.drop_connection()\n\n            seeder.stop()\n            await fill_task\n\n            # Give time to detect dropped connection and reconnect\n            await asyncio.sleep(1.0)\n            await wait_available_async(c_replica)\n\n            # logging.debug(await c_replica.execute_command(\"INFO REPLICATION\"))\n            # logging.debug(await c_master.execute_command(\"INFO REPLICATION\"))\n            capture = await seeder.capture()\n            assert await seeder.compare(capture, replica.port)\n        finally:\n            await proxy.close(task)\n\n    info = await c_replica.info(\"replication\")\n    master.stop()\n    lines = master.find_in_logs(\"Partial sync requested from stale LSN\")\n    assert len(lines) > 0\n\n\nasync def test_replica_reconnections_after_network_disconnect(df_factory, df_seeder_factory):\n    master = df_factory.create(proactor_threads=6)\n    replica = df_factory.create(proactor_threads=4)\n\n    df_factory.start_all([replica, master])\n    seeder = df_seeder_factory.create(port=master.port)\n\n    async with replica.client() as c_replica:\n        await seeder.run(target_deviation=0.1)\n\n        proxy = Proxy(\"127.0.0.1\", 1115, \"127.0.0.1\", master.port)\n        await proxy.start()\n        task = asyncio.create_task(proxy.serve())\n        try:\n            await c_replica.execute_command(f\"REPLICAOF localhost {proxy.port}\")\n\n            # Wait replica to be up and synchronized with master\n            await wait_available_async(c_replica)\n\n            initial_reconnects_count = await get_replica_reconnects_count(replica)\n\n            # Fully drop the server\n            await proxy.close(task)\n\n            # After dropping the connection replica should try to reconnect\n            await wait_for_replica_status(c_replica, status=\"down\")\n            await asyncio.sleep(2)\n\n            # Restart the proxy\n            await proxy.start()\n            task = asyncio.create_task(proxy.serve())\n\n            # Wait replica to be reconnected and synchronized with master\n            await wait_available_async(c_replica)\n\n            capture = await seeder.capture()\n            assert await seeder.compare(capture, replica.port)\n\n            # Assert replica reconnects metrics increased\n            await assert_replica_reconnections(replica, initial_reconnects_count)\n\n        finally:\n            await proxy.close(task)\n\n\nasync def test_search(df_factory):\n    master = df_factory.create(proactor_threads=4)\n    replica = df_factory.create(proactor_threads=4)\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # First, create an index on replica\n    await c_replica.execute_command(\"FT.CREATE\", \"idx-r\", \"SCHEMA\", \"f1\", \"numeric\")\n    for i in range(0, 10):\n        await c_replica.hset(f\"k{i}\", mapping={\"f1\": i})\n    assert (await c_replica.ft(\"idx-r\").search(\"@f1:[5 9]\")).total == 5\n\n    # Second, create an index on master\n    await c_master.execute_command(\"FT.CREATE\", \"idx-m\", \"SCHEMA\", \"f2\", \"numeric\")\n    for i in range(0, 10):\n        await c_master.hset(f\"k{i}\", mapping={\"f2\": i * 2})\n    assert (await c_master.ft(\"idx-m\").search(\"@f2:[6 10]\")).total == 3\n\n    # Replicate\n    await c_replica.execute_command(\"REPLICAOF\", \"localhost\", master.port)\n    await wait_available_async(c_replica)\n\n    # Check master index was picked up and original index was deleted\n    assert (await c_replica.execute_command(\"FT._LIST\")) == [\"idx-m\"]\n\n    # Check query from master runs on replica\n    assert (await c_replica.ft(\"idx-m\").search(\"@f2:[6 10]\")).total == 3\n\n    # Set a new key\n    await c_master.hset(\"kNEW\", mapping={\"f2\": 100})\n    await asyncio.sleep(0.1)\n\n    assert (await c_replica.ft(\"idx-m\").search(\"@f2:[100 100]\")).docs[0].id == \"kNEW\"\n\n    # Create a new aux index on master\n    await c_master.execute_command(\"FT.CREATE\", \"idx-m2\", \"SCHEMA\", \"f2\", \"numeric\", \"sortable\")\n    await asyncio.sleep(0.1)\n\n    from redis.commands.search.query import Query\n\n    assert (await c_replica.ft(\"idx-m2\").search(Query(\"*\").sort_by(\"f2\").paging(0, 1))).docs[\n        0\n    ].id == \"k0\"\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_search_with_stream(df_factory: DflyInstanceFactory):\n    master = df_factory.create()\n    replica = df_factory.create()\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # fill master with hsets and create index\n    p = c_master.pipeline(transaction=False)\n    for i in range(10_000):\n        p.hset(f\"k{i}\", mapping={\"name\": f\"name of {i}\"})\n    await p.execute()\n\n    await c_master.execute_command(\"FT.CREATE i1 SCHEMA name text\")\n\n    # start replication and issue one add command and delete commands on master in parallel\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await c_master.hset(\"secret-key\", mapping={\"name\": \"new-secret\"})\n    for i in range(1_000):\n        await c_master.delete(f\"k{i}\")\n\n    # expect replica to see only 10k - 1k + 1 = 9001 keys in it's index\n    await wait_available_async(c_replica)\n    await check_all_replicas_finished([c_replica], c_master)\n    assert await c_replica.execute_command(\"FT.SEARCH i1 * LIMIT 0 0\") == [9_001]\n    assert await c_replica.execute_command('FT.SEARCH i1 \"secret\"') == [\n        1,\n        \"secret-key\",\n        [\"name\", \"new-secret\"],\n    ]\n\n\n# @pytest.mark.large\nasync def test_client_pause_with_replica(df_factory, df_seeder_factory):\n    master = df_factory.create(proactor_threads=4)\n    replica = df_factory.create(proactor_threads=4)\n    df_factory.start_all([master, replica])\n\n    seeder = df_seeder_factory.create(port=master.port)\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    fill_task = asyncio.create_task(seeder.run())\n\n    # Give the seeder a bit of time.\n    await asyncio.sleep(1)\n    # block the seeder for 4 seconds\n    await c_master.execute_command(\"client pause 4000 write\")\n    stats = await c_master.info(\"CommandStats\")\n    await asyncio.sleep(0.5)\n    stats_after_sleep = await c_master.info(\"CommandStats\")\n    # Check no commands are executed except info and replconf called from replica\n    for cmd, cmd_stats in stats_after_sleep.items():\n        if cmd in [\"cmdstat_info\", \"cmdstat_replconf\", \"cmdstat_multi\"]:\n            continue\n        assert stats[cmd] == cmd_stats, cmd\n\n    await asyncio.sleep(6)\n    seeder.stop()\n    await fill_task\n    stats_after_pause_finish = await c_master.info(\"CommandStats\")\n    more_exeuted = False\n    for cmd, cmd_stats in stats_after_pause_finish.items():\n        if \"cmdstat_info\" != cmd and \"cmdstat_replconf\" != cmd_stats and stats[cmd] != cmd_stats:\n            more_exeuted = True\n    assert more_exeuted\n\n    capture = await seeder.capture(port=master.port)\n    assert await seeder.compare(capture, port=replica.port)\n\n\n@pytest.mark.debug_only\n@dfly_args({\"proactor_threads\": 2})\nasync def test_replicaof_reject_on_load(df_factory, df_seeder_factory):\n    master = df_factory.create()\n    replica = df_factory.create(dbfilename=f\"dump_{tmp_file_name()}\")\n    df_factory.start_all([master, replica])\n\n    c_replica = replica.client()\n\n    await c_replica.execute_command(f\"DEBUG POPULATE 1000 key 500 RAND type set elements 500\")\n\n    replica.stop()\n    replica.start()\n    # Disable retries so that BusyLoadingError is raised immediately.\n    # redis-py >= 7 retries on ConnectionError by default, and BusyLoadingError\n    # inherits from ConnectionError, causing the REPLICAOF to be silently\n    # retried until loading finishes.\n    from redis.retry import Retry\n    from redis.backoff import NoBackoff\n\n    c_replica = replica.client(retry=Retry(NoBackoff(), 0))\n\n    @assert_eventually\n    async def check_replica_isloading():\n        persistence = await c_replica.info(\"PERSISTENCE\")\n        assert persistence[\"loading\"] == 1\n\n    # If this fails adjust load of DEBUG POPULATE above.\n    await check_replica_isloading()\n\n    # Check replica of not alowed while loading snapshot\n    # Keep in mind that if the exception has not been raised, it doesn't mean\n    # that there is a bug because it could be the case that while executing\n    # INFO PERSISTENCE df is in loading state but when we call REPLICAOF df\n    # is no longer in loading state and the assertion false is triggered.\n    with pytest.raises(aioredis.BusyLoadingError):\n        await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    # Check one we finish loading snapshot replicaof success\n    await wait_available_async(c_replica, timeout=180)\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n\nasync def test_heartbeat_eviction_propagation(df_factory):\n    master = df_factory.create(\n        proactor_threads=1, cache_mode=\"true\", maxmemory=\"256mb\", enable_heartbeat_eviction=\"false\"\n    )\n    replica = df_factory.create(proactor_threads=1)\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # fill the master to use about 233mb > 256mb * 0.9, which will trigger heartbeat eviction.\n    await c_master.execute_command(\"DEBUG POPULATE 233 size 1048576\")\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    # now enable heart beat eviction\n    await c_master.execute_command(\"CONFIG SET enable_heartbeat_eviction true\")\n\n    while True:\n        info = await c_master.info(\"stats\")\n        evicted_1 = info[\"evicted_keys\"]\n        time.sleep(2)\n        info = await c_master.info(\"stats\")\n        evicted_2 = info[\"evicted_keys\"]\n        if evicted_2 == evicted_1:\n            break\n        else:\n            print(\"waiting for eviction to finish...\", end=\"\\r\", flush=True)\n\n    await check_all_replicas_finished([c_replica], c_master)\n    keys_master = await c_master.execute_command(\"keys *\")\n    keys_replica = await c_replica.execute_command(\"keys *\")\n    assert set(keys_master) == set(keys_replica)\n\n\nasync def test_policy_based_eviction_propagation(df_factory, df_seeder_factory):\n    master = df_factory.create(\n        proactor_threads=2,\n        cache_mode=\"true\",\n        maxmemory=\"512mb\",\n        enable_heartbeat_eviction=\"false\",\n        rss_oom_deny_ratio=1.3,\n    )\n    replica = df_factory.create(proactor_threads=2)\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_master.execute_command(\"DEBUG POPULATE 6000 size 88000\")\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    seeder = df_seeder_factory.create(\n        port=master.port, keys=600, val_size=1000, stop_on_failure=False\n    )\n    await seeder.run(target_deviation=0.1)\n\n    info = await c_master.info(\"stats\")\n    assert (\n        info[\"evicted_keys\"] > 0\n    ), f\"Weak testcase: policy based eviction was not triggered. {await c_master.info()}\"\n\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # KEYS may trigger lazy expiry on master, generating DELs not yet received by replica.\n    # Fetch master keys first, then re-sync to ensure replica applies any resulting DELs.\n    keys_master = await c_master.execute_command(\"keys k*\")\n    await check_all_replicas_finished([c_replica], c_master)\n    keys_replica = await c_replica.execute_command(\"keys k*\")\n\n    assert set(keys_replica).difference(keys_master) == set()\n    assert set(keys_master).difference(keys_replica) == set()\n\n\nasync def test_journal_doesnt_yield_issue_2500(df_factory, df_seeder_factory):\n    \"\"\"\n    Issues many SETEX commands through a Lua script so that no yields are done between them.\n    In parallel, connect a replica, so that these SETEX commands write their custom journal log.\n    This makes sure that no Fiber context switch while inside a shard callback.\n    \"\"\"\n    master = df_factory.create()\n    replica = df_factory.create()\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    async def send_setex():\n        script = \"\"\"\n        local charset = \"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890\"\n\n        local random_string = function(length)\n            local str = ''\n            for i=1,length do\n                str = str .. charset:sub(math.random(1, #charset))\n            end\n            return str\n        end\n\n        for i = 1, 200 do\n            -- 200 iterations to make sure SliceSnapshot dest queue is full\n            -- 100 bytes string to make sure serializer is big enough\n            redis.call('SETEX', KEYS[1], 1000, random_string(100))\n        end\n        \"\"\"\n\n        for i in range(10):\n            await asyncio.gather(\n                *[c_master.eval(script, 1, random.randint(0, 1_000)) for j in range(3)]\n            )\n\n    stream_task = asyncio.create_task(send_setex())\n    await asyncio.sleep(0.1)\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    assert not stream_task.done(), \"Weak testcase. finished sending commands before replication.\"\n\n    await wait_available_async(c_replica)\n    await stream_task\n\n    await check_all_replicas_finished([c_replica], c_master)\n    keys_master = await c_master.execute_command(\"keys *\")\n    keys_replica = await c_replica.execute_command(\"keys *\")\n    assert set(keys_master) == set(keys_replica)\n\n\n@pytest.mark.large\nasync def test_saving_replica(df_factory):\n    master = df_factory.create(proactor_threads=1)\n    replica = df_factory.create(proactor_threads=1, dbfilename=f\"dump_{tmp_file_name()}\")\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_master.execute_command(\"DEBUG POPULATE 100000 key 4048 RAND\")\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    async def save_replica():\n        await c_replica.execute_command(\"save\")\n\n    save_task = asyncio.create_task(save_replica())\n    while not await is_saving(c_replica):  # wait for replica start saving\n        assert \"rdb_changes_since_last_success_save:0\" not in await c_replica.execute_command(\n            \"info persistence\"\n        ), \"Weak test case, finished saving too quickly\"\n        await asyncio.sleep(0.1)\n    await c_replica.execute_command(\"replicaof no one\")\n    assert await is_saving(c_replica)\n    await save_task\n    assert not await is_saving(c_replica)\n\n\nasync def test_start_replicating_while_save(df_factory):\n    master = df_factory.create(proactor_threads=4)\n    replica = df_factory.create(proactor_threads=4, dbfilename=f\"dump_{tmp_file_name()}\")\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_replica.execute_command(\"DEBUG POPULATE 100000 key 4096 RAND\")\n\n    async def save_replica():\n        await c_replica.execute_command(\"save\")\n\n    save_task = asyncio.create_task(save_replica())\n    while not await is_saving(c_replica):  # wait for server start saving\n        assert \"rdb_changes_since_last_success_save:0\" not in await c_replica.execute_command(\n            \"info persistence\"\n        ), \"Weak test case, finished saving too quickly\"\n        await asyncio.sleep(0.1)\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    assert await is_saving(c_replica)\n    await save_task\n    assert not await is_saving(c_replica)\n\n\nasync def test_user_acl_replication(df_factory):\n    master = df_factory.create(proactor_threads=4)\n    replica = df_factory.create(proactor_threads=4)\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    await c_master.execute_command(\"ACL SETUSER tmp >tmp ON +ping +dfly +replconf\")\n    await c_master.execute_command(\"SET foo bar\")\n    assert 1 == await c_master.execute_command(\"DBSIZE\")\n\n    c_replica = replica.client()\n    await c_replica.execute_command(\"CONFIG SET masteruser tmp\")\n    await c_replica.execute_command(\"CONFIG SET masterauth tmp\")\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    await wait_available_async(c_replica)\n    assert 1 == await c_replica.execute_command(\"DBSIZE\")\n\n    # revoke acl's from tmp\n    await c_master.execute_command(\"ACL SETUSER tmp -replconf\")\n    async for info, breaker in info_tick_timer(c_replica, section=\"REPLICATION\"):\n        with breaker:\n            assert info[\"master_link_status\"] == \"down\"\n\n    await c_master.execute_command(\"SET bar foo\")\n\n    # reinstate and let replication continue\n    await c_master.execute_command(\"ACL SETUSER tmp +replconf\")\n    await check_all_replicas_finished([c_replica], c_master, 5)\n    assert 2 == await c_replica.execute_command(\"DBSIZE\")\n\n\n@pytest.mark.parametrize(\"break_conn\", [False, True])\nasync def test_replica_reconnect(df_factory, break_conn):\n    \"\"\"\n    Test replica does not connect to master if master restarted\n    step1: create master and replica\n    step2: stop master and start again with the same port\n    step3: check replica is not replicating the restarted master\n    step4: issue new replicaof command\n    step5: check replica replicates master\n    \"\"\"\n    # Connect replica to master\n    master = df_factory.create(proactor_threads=1)\n    replica = df_factory.create(proactor_threads=1, break_replication_on_master_restart=break_conn)\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_master.set(\"k\", \"12345\")\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n    assert (await c_replica.info(\"REPLICATION\"))[\"master_link_status\"] == \"up\"\n\n    # kill existing master, create master with different repl_id but same port\n    master_port = master.port\n    master.stop()\n\n    await asyncio.sleep(1)\n\n    repl_info = await c_replica.info(\"REPLICATION\")\n    assert repl_info[\"master_link_status\"] == \"down\", str(repl_info)\n\n    master = df_factory.create(proactor_threads=1, port=master_port)\n    df_factory.start_all([master])\n    await asyncio.sleep(1)  # We sleep for 0.5s in replica.cc before reconnecting\n\n    # Assert that replica did not reconnected to master with different repl_id\n    if break_conn:\n        assert await c_master.execute_command(\"get k\") == None\n        assert await c_replica.execute_command(\"get k\") == \"12345\"\n        assert await c_master.execute_command(\"set k 6789\")\n        assert await c_replica.execute_command(\"get k\") == \"12345\"\n        assert (await c_replica.info(\"REPLICATION\"))[\"master_link_status\"] == \"down\"\n    else:\n        assert await c_master.execute_command(\"get k\") == None\n        assert await c_replica.execute_command(\"get k\") == None\n        assert await c_master.execute_command(\"set k 6789\")\n        await check_all_replicas_finished([c_replica], c_master)\n        assert await c_replica.execute_command(\"get k\") == \"6789\"\n        assert (await c_replica.info(\"REPLICATION\"))[\"master_link_status\"] == \"up\"\n\n    # Force re-replication, assert that it worked\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n    assert await c_replica.execute_command(\"get k\") == \"6789\"\n\n\nasync def test_announce_ip_port(df_factory):\n    master = df_factory.create()\n    replica = df_factory.create(replica_announce_ip=\"overrode-host\", announce_port=\"1337\")\n\n    master.start()\n    replica.start()\n\n    # Connect clients, connect replica to master\n    c_master = master.client()\n    c_replica = replica.client()\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    role, node = await c_master.execute_command(\"role\")\n    assert role == \"master\"\n    host, port, _ = node[0]\n    assert host == \"overrode-host\"\n    assert port == \"1337\"\n\n\nasync def test_replication_timeout_on_full_sync(df_factory: DflyInstanceFactory, df_seeder_factory):\n    # setting replication_timeout to a very small value to force the replica to timeout\n    master = df_factory.create(\n        replication_timeout=100, vmodule=\"replica=2,dflycmd=2,snapshot=1,rdb_save=1,rdb_load=1\"\n    )\n    replica = df_factory.create()\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_master.execute_command(\"debug\", \"populate\", \"200000\", \"foo\", \"5000\", \"RAND\")\n    seeder = df_seeder_factory.create(port=master.port)\n    seeder_task = asyncio.create_task(seeder.run())\n\n    await asyncio.sleep(0.5)  # wait for seeder running\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    # wait for full sync\n    async with async_timeout.timeout(3):\n        await wait_for_replicas_state(c_replica, state=\"full_sync\", timeout=0.05)\n\n    await c_replica.execute_command(\n        \"debug replica pause\"\n    )  # pause replica to trigger reconnect on master\n\n    await asyncio.sleep(1)\n\n    await c_replica.execute_command(\"debug replica resume\")  # resume replication\n\n    await asyncio.sleep(1)  # replica will start resync\n    seeder.stop()\n    await seeder_task\n\n    await check_all_replicas_finished([c_replica], c_master, timeout=60)\n    await assert_replica_reconnections(replica, 0)\n\n\n@pytest.mark.exclude_epoll\n@dfly_args({\"proactor_threads\": 1})\nasync def test_master_stalled_disconnect(df_factory: DflyInstanceFactory):\n    # disconnect after 1 second of being blocked\n    master = df_factory.create(replication_timeout=1000)\n    replica = df_factory.create()\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_master.execute_command(\"debug\", \"populate\", \"200000\", \"foo\", \"500\", \"RAND\")\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    @assert_eventually\n    async def check_replica_connected():\n        repl_info = await c_master.info(\"replication\")\n        assert \"slave0\" in repl_info\n\n    @assert_eventually\n    async def check_replica_disconnected():\n        repl_info = await c_master.info(\"replication\")\n        assert \"slave0\" not in repl_info\n\n    await check_replica_connected()\n    await c_replica.execute_command(\"DEBUG REPLICA PAUSE\")\n    await check_replica_connected()  # still connected\n    await asyncio.sleep(1)  # wait for the master to recognize it's being blocked\n    await check_replica_disconnected()\n\n\ndef download_dragonfly_release(version):\n    path = f\"/tmp/old_df/{version}\"\n    binary = f\"{path}/dragonfly-x86_64\"\n    if os.path.isfile(binary):\n        return binary\n\n    # Cleanup in case there's partial files\n    if os.path.exists(path):\n        shutil.rmtree(path)\n\n    os.makedirs(path)\n    gzfile = f\"{path}/dragonfly.tar.gz\"\n    logging.debug(f\"Downloading Dragonfly release into {gzfile}...\")\n\n    # Download\n    urllib.request.urlretrieve(\n        f\"https://github.com/dragonflydb/dragonfly/releases/download/{version}/dragonfly-x86_64.tar.gz\",\n        gzfile,\n    )\n\n    # Extract\n    file = tarfile.open(gzfile)\n    file.extractall(path)\n    file.close()\n\n    # Return path\n    return binary\n\n\n@pytest.mark.parametrize(\n    \"cluster_mode, announce_ip, announce_port\",\n    [\n        (\"\", \"localhost\", 7000),\n        (\"emulated\", \"\", 0),\n        (\"emulated\", \"localhost\", 7000),\n    ],\n)\nasync def test_replicate_old_master(\n    df_factory: DflyInstanceFactory, cluster_mode, announce_ip, announce_port\n):\n    cpu = platform.processor()\n    if cpu != \"x86_64\":\n        pytest.skip(f\"Supported only on x64, running on {cpu}\")\n\n    dfly_version = \"v1.19.2\"\n    released_dfly_path = download_dragonfly_release(dfly_version)\n    master = df_factory.create(\n        version=1.19,\n        path=released_dfly_path,\n        cluster_mode=cluster_mode,\n    )\n    replica = df_factory.create(\n        cluster_mode=cluster_mode,\n        cluster_announce_ip=announce_ip,\n        announce_port=announce_port,\n    )\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    assert (\n        f\"df-{dfly_version}\"\n        == (await c_master.execute_command(\"info\", \"server\"))[\"dragonfly_version\"]\n    )\n    assert dfly_version != (await c_replica.execute_command(\"info\", \"server\"))[\"dragonfly_version\"]\n\n    await c_master.execute_command(\"set\", \"k1\", \"v1\")\n\n    assert await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\") == \"OK\"\n    await wait_available_async(c_replica)\n\n    assert await c_replica.execute_command(\"get\", \"k1\") == \"v1\"\n\n\n# This Test was intorduced in response to a bug when replicating empty hashmaps (encoded as\n# ziplists) created with HSET, HSETEX, HDEL and then replicated 2 times.\n# For more information plz refer to the issue on gh:\n# https://github.com/dragonflydb/dragonfly/issues/3504\n@dfly_args({\"proactor_threads\": 1})\nasync def test_empty_hash_map_replicate_old_master(df_factory):\n    cpu = platform.processor()\n    if cpu != \"x86_64\":\n        pytest.skip(f\"Supported only on x64, running on {cpu}\")\n\n    dfly_version = \"v1.21.2\"\n    released_dfly_path = download_dragonfly_release(dfly_version)\n    # old versions\n    instances = [df_factory.create(path=released_dfly_path, version=1.21) for i in range(3)]\n    # new version\n    instances.append(df_factory.create())\n\n    df_factory.start_all(instances)\n\n    old_c_master = instances[0].client()\n    # Create an empty hashmap\n    await old_c_master.execute_command(\"HSET foo a_field a_value\")\n    await old_c_master.execute_command(\"HSETEX foo 2 b_field b_value\")\n    await old_c_master.execute_command(\"HDEL foo a_field\")\n\n    @assert_eventually\n    async def check_if_empty():\n        assert await old_c_master.execute_command(\"HGETALL foo\") == []\n\n    await check_if_empty()\n    assert await old_c_master.execute_command(f\"EXISTS foo\") == 1\n    await old_c_master.aclose()\n\n    async def assert_body(client, result=1, state=\"online\", node_role=\"slave\"):\n        async with async_timeout.timeout(10):\n            await wait_for_replicas_state(client, state=state, node_role=node_role)\n\n        assert await client.execute_command(f\"EXISTS foo\") == result\n        assert await client.execute_command(\"REPLTAKEOVER 1\") == \"OK\"\n\n    index = 0\n    last_old_replica = 2\n\n    # Adjacent pairs\n    for a, b in zip(instances, instances[1:]):\n        logging.debug(index)\n        client_b = b.client()\n        assert await client_b.execute_command(f\"REPLICAOF localhost {a.port}\") == \"OK\"\n\n        if index != last_old_replica:\n            await assert_body(client_b, state=\"stable_sync\", node_role=\"replica\")\n        else:\n            await assert_body(client_b, result=0)\n\n        index = index + 1\n        await client_b.aclose()\n\n\n# This Test was intorduced in response to a bug when replicating empty hash maps with\n# HSET, HSETEX, HDEL and then loaded via replication.\n# For more information plz refer to the issue on gh:\n# https://github.com/dragonflydb/dragonfly/issues/3504\n@dfly_args({\"proactor_threads\": 1})\nasync def test_empty_hashmap_loading_bug(df_factory: DflyInstanceFactory):\n    cpu = platform.processor()\n    if cpu != \"x86_64\":\n        pytest.skip(f\"Supported only on x64, running on {cpu}\")\n\n    dfly_version = \"v1.21.2\"\n    released_dfly_path = download_dragonfly_release(dfly_version)\n\n    master = df_factory.create(path=released_dfly_path, version=1.21)\n    master.start()\n\n    c_master = master.client()\n    # Create an empty hashmap\n    await c_master.execute_command(\"HSET foo a_field a_value\")\n    await c_master.execute_command(\"HSETEX foo 2 b_field b_value\")\n    await c_master.execute_command(\"HDEL foo a_field\")\n\n    @assert_eventually\n    async def check_if_empty():\n        assert await c_master.execute_command(\"HGETALL foo\") == []\n\n    await check_if_empty()\n    assert await c_master.execute_command(f\"EXISTS foo\") == 1\n\n    replica = df_factory.create()\n    replica.start()\n    c_replica = replica.client()\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_for_replicas_state(c_replica)\n    assert await c_replica.execute_command(f\"dbsize\") == 0\n\n\nasync def test_replicate_search_index_to_old_replica(df_factory: DflyInstanceFactory):\n    \"\"\"\n    Test that a new master with search indices (including HNSW vector index) can\n    replicate to a v1.35 replica. This verifies backward compatibility of replication\n    when search indices are defined, ensuring the replica receives the data without\n    errors from new RDB AUX fields (search-index, hnsw-index-metadata, HNSW opcodes).\n    \"\"\"\n    cpu = platform.processor()\n    if cpu != \"x86_64\":\n        pytest.skip(f\"Supported only on x64, running on {cpu}\")\n\n    dfly_version = \"v1.35.1\"\n    released_dfly_path = download_dragonfly_release(dfly_version)\n\n    # New master (current version) with search index\n    master = df_factory.create(proactor_threads=2)\n    # Old replica (v1.35)\n    replica = df_factory.create(\n        version=1.35,\n        path=released_dfly_path,\n        proactor_threads=2,\n    )\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # Create a search index with HNSW vector field on the new master\n    await c_master.execute_command(\n        \"FT.CREATE\",\n        \"test_idx\",\n        \"ON\",\n        \"HASH\",\n        \"PREFIX\",\n        \"1\",\n        \"item:\",\n        \"SCHEMA\",\n        \"name\",\n        \"TEXT\",\n        \"price\",\n        \"NUMERIC\",\n        \"SORTABLE\",\n        \"category\",\n        \"TAG\",\n        \"embedding\",\n        \"VECTOR\",\n        \"HNSW\",\n        \"6\",\n        \"TYPE\",\n        \"FLOAT32\",\n        \"DIM\",\n        \"2\",\n        \"DISTANCE_METRIC\",\n        \"L2\",\n    )\n\n    # Insert test data with vector embeddings\n    for i in range(100):\n        category = \"electronics\" if i % 2 == 0 else \"clothing\"\n        embedding = struct.pack(\"<2f\", float(i), float(i * 2))\n        await c_master.hset(\n            f\"item:{i}\",\n            mapping={\n                \"name\": f\"Product {i}\",\n                \"price\": str(i * 10),\n                \"category\": category,\n                \"embedding\": embedding,\n            },\n        )\n\n    # Verify data and index on master\n    assert await c_master.dbsize() == 100\n    master_idx = c_master.ft(\"test_idx\")\n    text_result = await master_idx.search(\"Product 50\")\n    assert text_result.total >= 1\n\n    # Verify KNN search on master\n    query_vec = struct.pack(\"<2f\", 50.0, 100.0)\n    knn_result = await c_master.execute_command(\n        \"FT.SEARCH\", \"test_idx\", \"*=>[KNN 2 @embedding $vec]\", \"PARAMS\", \"2\", \"vec\", query_vec\n    )\n    assert knn_result[0] >= 1\n    assert \"item:50\" in knn_result\n\n    # Start replication from new master to old replica\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    # Verify data replicated successfully\n    assert await c_replica.dbsize() == 100\n    assert await c_replica.hget(\"item:0\", \"name\") == \"Product 0\"\n    assert await c_replica.hget(\"item:99\", \"name\") == \"Product 99\"\n\n    # Verify KNN search works on old replica (index rebuilt from replicated data)\n    knn_result = await c_replica.execute_command(\n        \"FT.SEARCH\", \"test_idx\", \"*=>[KNN 2 @embedding $vec]\", \"PARAMS\", \"2\", \"vec\", query_vec\n    )\n    assert knn_result[0] >= 1\n    assert \"item:50\" in knn_result\n\n\nasync def test_replicating_mc_flags(df_factory):\n    master = df_factory.create(memcached_port=11211, proactor_threads=1)\n    replica = df_factory.create(\n        memcached_port=11212, proactor_threads=1, dbfilename=f\"dump_{tmp_file_name()}\"\n    )\n    df_factory.start_all([master, replica])\n\n    c_mc_master = pymemcache.Client(f\"127.0.0.1:{master.mc_port}\", default_noreply=False)\n\n    c_replica = replica.client()\n\n    assert c_mc_master.set(\"key1\", \"value0\", noreply=True)\n    assert c_mc_master.set(\"key2\", \"value2\", noreply=True, expire=3600, flags=123456)\n    assert c_mc_master.replace(\"key1\", \"value1\", expire=4000, flags=2, noreply=True)\n\n    c_master = master.client()\n    for i in range(3, 100):\n        await c_master.set(f\"key{i}\", f\"value{i}\")\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    c_mc_replica = pymemcache.Client(f\"127.0.0.1:{replica.mc_port}\", default_noreply=False)\n\n    async def check_flag(key, flag):\n        res = c_mc_replica.raw_command(\"get \" + key, \"END\\r\\n\").split()\n        # workaround sometimes memcached_client.raw_command returns empty str\n        if len(res) > 2:\n            assert res[2].decode() == str(flag)\n\n    await check_flag(\"key1\", 2)\n    await check_flag(\"key2\", 123456)\n\n    for i in range(1, 100):\n        assert c_mc_replica.get(f\"key{i}\") == str.encode(f\"value{i}\")\n\n\nasync def test_double_take_over(df_factory, df_seeder_factory):\n    master = df_factory.create(proactor_threads=4, dbfilename=\"\", admin_port=ADMIN_PORT)\n    replica = df_factory.create(proactor_threads=4, dbfilename=\"\", admin_port=ADMIN_PORT + 1)\n    df_factory.start_all([master, replica])\n\n    seeder = df_seeder_factory.create(port=master.port, keys=1000, dbcount=5, stop_on_failure=False)\n    await seeder.run(target_deviation=0.1)\n\n    capture = await seeder.capture(port=master.port)\n\n    c_replica = replica.client()\n\n    logging.debug(\"start replication\")\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.admin_port}\")\n    await wait_available_async(c_replica)\n\n    logging.debug(\"running repltakover\")\n    await c_replica.execute_command(f\"REPLTAKEOVER 10\")\n    assert await c_replica.execute_command(\"role\") == [\"master\", []]\n\n    @assert_eventually\n    async def check_master_status():\n        assert master.proc.poll() == 0, \"Master process did not exit correctly.\"\n\n    await check_master_status()\n\n    logging.debug(\"restart previous master\")\n    master.start()\n    c_master = master.client()\n\n    logging.debug(\"start second replication\")\n    await c_master.execute_command(f\"REPLICAOF localhost {replica.admin_port}\")\n    await wait_available_async(c_master)\n\n    logging.debug(\"running second repltakover\")\n    await c_master.execute_command(f\"REPLTAKEOVER 10\")\n    assert await c_master.execute_command(\"role\") == [\"master\", []]\n\n    assert await seeder.compare(capture, port=master.port)\n\n\nasync def test_replica_of_replica(df_factory):\n    # Can't connect a replica to a replica, but OK to connect 2 replicas to the same master\n    master = df_factory.create(proactor_threads=2)\n    replica = df_factory.create(proactor_threads=2)\n    replica2 = df_factory.create(proactor_threads=2)\n\n    df_factory.start_all([master, replica, replica2])\n\n    c_replica = replica.client()\n    c_replica2 = replica2.client()\n\n    assert await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\") == \"OK\"\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await c_replica2.execute_command(f\"REPLICAOF localhost {replica.port}\")\n\n    assert await c_replica2.execute_command(f\"REPLICAOF localhost {master.port}\") == \"OK\"\n\n\n@pytest.mark.large\nasync def test_replication_timeout_on_full_sync_heartbeat_expiry(\n    df_factory: DflyInstanceFactory, df_seeder_factory\n):\n    # Timeout set to 3 seconds because we must first saturate the socket such that subsequent\n    # writes block. Otherwise, we will break the flows before Heartbeat actually deadlocks.\n    master = df_factory.create(\n        proactor_threads=2, replication_timeout=3000, vmodule=\"replica=2,dflycmd=2\"\n    )\n    replica = df_factory.create()\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_master.execute_command(\"debug\", \"populate\", \"100000\", \"foo\", \"5000\", \"RAND\")\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    seeder = ExpirySeeder()\n    seeder_task = asyncio.create_task(seeder.run(c_master))\n    await seeder.wait_until_n_inserts(50000)\n    seeder.stop()\n    await seeder_task\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    # wait for full sync\n    async with async_timeout.timeout(3):\n        await wait_for_replicas_state(c_replica, state=\"full_sync\", timeout=0.05)\n\n    await c_replica.execute_command(\"debug replica pause\")\n\n    # Dragonfly would get stuck here without the bug fix. When replica does not read from the\n    # socket, Heartbeat() will block on the journal write for the expired items and shard_handler\n    # would never be called and break replication. More details on #3936.\n\n    await asyncio.sleep(6)\n\n    await c_replica.execute_command(\"debug replica resume\")  # resume replication\n\n    await asyncio.sleep(1)  # replica will start resync\n\n    await check_all_replicas_finished([c_replica], c_master, 60)\n    await assert_replica_reconnections(replica, 0)\n\n\n@pytest.mark.exclude_epoll\n@dfly_args({\"proactor_threads\": 1})\nasync def test_memory_on_big_string_loading(df_factory):\n    \"\"\"\n    In this test we want to make sure there is no spike in rss while loading big string value\n    1. insert 1 big value to master\n    2. replicate master\n    3. check rss peak memory on replica node\n    \"\"\"\n    master = df_factory.create()\n    replica = df_factory.create()\n\n    df_factory.start_all([master, replica])\n    c_master = master.client()\n    c_replica = replica.client()\n\n    logging.debug(\"Populate with one big string\")\n    await c_master.execute_command(\"DEBUG POPULATE 1 key 200000000 RAND\")\n\n    async def get_memory(client, field):\n        info = await client.info(\"memory\")\n        return info[field]\n\n    logging.debug(\"Start replication and wait for full sync\")\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_for_replicas_state(c_replica)\n\n    await c_replica.execute_command(\"memory decommit\")\n    await asyncio.sleep(1)\n    replica_peak_memory = await get_memory(c_replica, \"used_memory_peak_rss\")\n    replica_used_memory = await get_memory(c_replica, \"used_memory_rss\")\n\n    logging.info(f\"Replica Used memory {replica_used_memory}, peak memory {replica_peak_memory}\")\n    assert replica_peak_memory < 1.1 * replica_used_memory\n\n    # Check replica data consistent\n    replica_data = await DebugPopulateSeeder.capture(c_replica)\n    master_data = await DebugPopulateSeeder.capture(c_master)\n    assert master_data == replica_data\n\n\n@pytest.mark.exclude_epoll\n@pytest.mark.parametrize(\n    \"element_size, elements_number\",\n    [(16, 30000), (30000, 16)],\n)\n@dfly_args({\"proactor_threads\": 1})\nasync def test_big_containers(df_factory, element_size, elements_number):\n    master = df_factory.create()\n    replica = df_factory.create()\n\n    df_factory.start_all([master, replica])\n    c_master = master.client()\n    c_replica = replica.client()\n\n    logging.debug(\"Fill master with test data\")\n    seeder = DebugPopulateSeeder(\n        key_target=50,\n        data_size=element_size * elements_number,\n        collection_size=elements_number,\n        variance=1,\n        samples=1,\n        types=[\"LIST\", \"SET\", \"ZSET\", \"HASH\", \"STREAM\"],\n    )\n    await seeder.run(c_master)\n\n    async def get_memory(client, field):\n        info = await client.info(\"memory\")\n        return info[field]\n\n    await asyncio.sleep(1)  # wait for heartbeat to update rss memory\n    used_memory = await get_memory(c_master, \"used_memory_rss\")\n\n    logging.debug(\"Start replication and wait for full sync\")\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_for_replicas_state(c_replica)\n\n    peak_memory = await get_memory(c_master, \"used_memory_peak_rss\")\n\n    logging.info(f\"Used memory {used_memory}, peak memory {peak_memory}\")\n    assert peak_memory < 1.1 * used_memory\n\n    await c_replica.execute_command(\"memory decommit\")\n    await asyncio.sleep(1)\n    replica_peak_memory = await get_memory(c_replica, \"used_memory_peak_rss\")\n    replica_used_memory = await get_memory(c_replica, \"used_memory_rss\")\n\n    logging.info(f\"Replica Used memory {replica_used_memory}, peak memory {replica_peak_memory}\")\n    assert replica_peak_memory < 1.1 * replica_used_memory\n\n    # Check replica data consistent\n    replica_data = await DebugPopulateSeeder.capture(c_replica)\n    master_data = await DebugPopulateSeeder.capture(c_master)\n    assert master_data == replica_data\n\n\nasync def test_master_too_big(df_factory):\n    master = df_factory.create(proactor_threads=4)\n    replica = df_factory.create(proactor_threads=2, maxmemory=\"600mb\")\n\n    df_factory.start_all([master, replica])\n    c_master = master.client()\n    c_replica = replica.client()\n    await c_master.execute_command(\"DEBUG POPULATE 1000000 key 1000 RAND\")\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    # We should never sync due to used memory too high during full sync\n    with pytest.raises(TimeoutError):\n        await wait_available_async(c_replica, timeout=10)\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_stream_approximate_trimming(df_factory):\n    master = df_factory.create()\n    replica = df_factory.create()\n\n    df_factory.start_all([master, replica])\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_for_replicas_state(c_replica)\n\n    # Step 1: Populate master with 100 streams, each containing 200 entries\n    num_streams = 100\n    entries_per_stream = 200\n\n    for i in range(num_streams):\n        stream_name = f\"stream{i}\"\n        for j in range(entries_per_stream):\n            await c_master.execute_command(\"XADD\", stream_name, \"*\", f\"field{j}\", f\"value{j}\")\n\n    # Step 2: Trim each stream to a random size between 70 and 200\n    for i in range(num_streams):\n        stream_name = f\"stream{i}\"\n        trim_size = random.randint(70, entries_per_stream)\n        await c_master.execute_command(\"XTRIM\", stream_name, \"MAXLEN\", \"~\", trim_size)\n\n    # Wait for replica sync\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # Check replica data consistent\n    master_data = await DebugPopulateSeeder.capture(c_master)\n    replica_data = await DebugPopulateSeeder.capture(c_replica)\n    assert master_data == replica_data\n\n    # Step 3: Trim all streams to 0\n    for i in range(num_streams):\n        stream_name = f\"stream{i}\"\n        await c_master.execute_command(\"XTRIM\", stream_name, \"MAXLEN\", \"0\")\n\n    # Wait for replica sync\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # Check replica data consistent\n    master_data = await DebugPopulateSeeder.capture(c_master)\n    replica_data = await DebugPopulateSeeder.capture(c_replica)\n    assert master_data == replica_data\n\n\n@dfly_args({\"proactor_threads\": 2})\nasync def test_replicaof_does_not_flush_if_it_fails_to_connect(df_factory):\n    master = df_factory.create(proactor_threads=2)\n    replica = df_factory.create(proactor_threads=2)\n\n    df_factory.start_all([master, replica])\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_master.execute_command(\"SET foo bar\")\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await check_all_replicas_finished([c_replica], c_master)\n\n    res = await c_replica.execute_command(\"dbsize\")\n    assert res == 1\n    with pytest.raises(redis.exceptions.ResponseError):\n        await c_replica.execute_command(f\"REPLICAOF localhost {replica.port}\")\n    res = await c_replica.execute_command(\"dbsize\")\n    assert res == 1\n\n\n@dfly_args({\"proactor_threads\": 2})\nasync def test_replicaof_inside_multi(df_factory):\n    master = df_factory.create()\n    replica = df_factory.create()\n    df_factory.start_all([master, replica])\n\n    async def replicate_inside_multi():\n        try:\n            c_master = master.client()\n            p = c_master.pipeline(transaction=True)\n            for i in range(5):\n                p.execute_command(\"dbsize\")\n            p.execute_command(f\"replicaof localhost {replica.port}\")\n            await p.execute()\n            return True\n        except redis.exceptions.ResponseError:\n            return False\n\n    MULTI_COMMANDS_TO_ISSUE = 30\n    replication_commands = [\n        asyncio.create_task(replicate_inside_multi()) for _ in range(MULTI_COMMANDS_TO_ISSUE)\n    ]\n\n    num_successes = 0\n    for result in asyncio.as_completed(replication_commands, timeout=80):\n        num_successes += await result\n\n    logging.info(f\"succeses: {num_successes}\")\n    assert MULTI_COMMANDS_TO_ISSUE == num_successes\n\n\n@pytest.mark.large\nasync def test_preempt_in_atomic_section_of_heartbeat(df_factory: DflyInstanceFactory):\n    master = df_factory.create(proactor_threads=1, serialization_max_chunk_size=100000000000)\n    replicas = [df_factory.create(proactor_threads=1) for i in range(2)]\n\n    # Start instances and connect clients\n    df_factory.start_all([master] + replicas)\n    c_master = master.client()\n    c_replicas = [replica.client() for replica in replicas]\n\n    total = 100000\n    await c_master.execute_command(f\"DEBUG POPULATE {total} tmp 100 TYPE SET ELEMENTS 100\")\n\n    thresehold = 50000\n    for i in range(thresehold):\n        rand = random.randint(1, 10)\n        await c_master.execute_command(f\"EXPIRE tmp:{i} {rand} NX\")\n\n    seeder = SeederV2(key_target=10_000)\n    fill_task = asyncio.create_task(seeder.run(master.client()))\n\n    for replica in c_replicas:\n        await replica.execute_command(f\"REPLICAOF LOCALHOST {master.port}\")\n\n    async with async_timeout.timeout(240):\n        await wait_for_replicas_state(*c_replicas)\n\n    await fill_task\n\n\n@pytest.mark.large\nasync def test_bug_in_json_memory_tracking(df_factory: DflyInstanceFactory):\n    \"\"\"\n    This test reproduces a bug in the JSON memory tracking.\n    \"\"\"\n    random.seed(42)\n\n    master = df_factory.create(\n        proactor_threads=2,\n        serialization_max_chunk_size=1,\n        vmodule=\"replica=2,dflycmd=2,snapshot=1,rdb_save=1,rdb_load=1,journal_slice=2\",\n    )\n    replicas = [df_factory.create(proactor_threads=2) for i in range(2)]\n\n    # Start instances and connect clients\n    df_factory.start_all([master] + replicas)\n    c_master = master.client()\n    c_replicas = [replica.client() for replica in replicas]\n\n    total = 100000\n    await c_master.execute_command(f\"DEBUG POPULATE {total} tmp 1000 TYPE SET ELEMENTS 100\")\n\n    threshold = 25000\n    for i in range(threshold):\n        rand = random.randint(1, 4)\n        await c_master.execute_command(f\"EXPIRE tmp:{i} {rand} NX\")\n\n    seeder = SeederV2(key_target=50_000)\n    fill_task = asyncio.create_task(seeder.run(master.client()))\n    await asyncio.sleep(0.2)\n\n    for replica in c_replicas:\n        await replica.execute_command(f\"REPLICAOF LOCALHOST {master.port}\")\n\n    async with async_timeout.timeout(240):\n        await wait_for_replicas_state(*c_replicas)\n\n    await seeder.stop(c_master)\n    await fill_task\n\n\n@pytest.mark.large\n@pytest.mark.opt_only\n@dfly_args({\"proactor_threads\": 2, \"serialization_max_chunk_size\": 5000, \"compression_mode\": \"0\"})\nasync def test_big_huge_streaming_restart(df_factory: DflyInstanceFactory):\n    \"\"\"\n    Restart replicating instance with huge values. Tests that interrupting the streaming process doesn't hinder retrying replication\n    \"\"\"\n\n    master, replica = df_factory.create(), df_factory.create(proactor_threads=1)\n    df_factory.start_all([master, replica])\n    c_master, c_replica = master.client(), replica.client()\n\n    # Create huge values\n    await c_master.execute_command(\n        \"debug\", \"populate\", \"2\", \"test\", \"1000\", \"rand\", \"type\", \"zset\", \"elements\", \"1000000\"\n    )\n\n    # Restart replication a few times\n    for _ in range(3):\n        assert await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n        await asyncio.sleep(random.random() + 0.5)\n\n    # Wait for it to finish finally\n    async with async_timeout.timeout(60):\n        await wait_for_replicas_state(c_replica)\n\n    # Check that everything is in sync\n    hashes = await asyncio.gather(*(SeederV2.capture(c) for c in [c_master, c_replica]))\n    assert len(set(hashes)) == 1\n\n    # No in-between errors occured\n    replica.stop()\n    lines = replica.find_in_logs(\"Duplicate zset fields detected\")\n    assert len(lines) == 0\n\n\n@pytest.mark.large\nasync def test_replica_snapshot_with_big_values_while_seeding(df_factory: DflyInstanceFactory):\n    proactors = 4\n    master = df_factory.create(proactor_threads=proactors, dbfilename=\"\")\n    replica = df_factory.create(proactor_threads=proactors, dbfilename=\"\")\n    df_factory.start_all([master, replica])\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # 50% big values\n    seeder_config = dict(key_target=8_000, huge_value_target=4_000)\n    # Fill instance with test data\n    seeder = SeederV2(**seeder_config)\n    await seeder.run(c_master, target_deviation=0.01)\n\n    assert await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    async with async_timeout.timeout(60):\n        await wait_for_replicas_state(c_replica)\n\n    # Start data stream\n    stream_task = asyncio.create_task(seeder.run(c_master))\n    await asyncio.sleep(1)\n\n    file_name = tmp_file_name()\n    assert await c_replica.execute_command(f\"SAVE DF {file_name}\") == \"OK\"\n    await seeder.stop(c_master)\n    await stream_task\n\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # Check that everything is in sync\n    hashes = await asyncio.gather(*(SeederV2.capture(c) for c in [c_master, c_replica]))\n    assert len(set(hashes)) == 1\n\n    replica.stop()\n    lines = replica.find_in_logs(\"Exit SnapshotSerializer\")\n    assert len(lines) == (proactors - 1)\n    for line in lines:\n        # We test the serializtion path of command execution\n        side_saved = extract_int_after_prefix(\"side_saved \", line)\n        assert side_saved > 0\n\n    # Check that the produced rdb is loaded correctly\n    node = df_factory.create(dbfilename=file_name)\n    node.start()\n    c_node = node.client()\n    await wait_available_async(c_node)\n    assert await c_node.execute_command(\"dbsize\") > 0\n    await c_node.execute_command(\"FLUSHALL\")\n\n\n@pytest.mark.parametrize(\n    \"use_takeover, backlog_len\",\n    [(False, 2), (False, 1), (True, 1), (True, 10)],\n)\nasync def test_partial_replication_on_same_source_master(df_factory, use_takeover, backlog_len):\n    master = df_factory.create()\n    replica1 = df_factory.create(shard_repl_backlog_len=backlog_len)\n    replica2 = df_factory.create()\n\n    df_factory.start_all([master, replica1, replica2])\n    c_master = master.client()\n    c_replica1 = replica1.client()\n    c_replica2 = replica2.client()\n\n    logging.debug(\"Fill master with test data\")\n    seeder = DebugPopulateSeeder(key_target=50)\n    await seeder.run(c_master)\n\n    logging.debug(\"Start replication and wait for full sync\")\n    await c_replica1.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_for_replicas_state(c_replica1)\n    await c_replica2.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_for_replicas_state(c_replica2)\n\n    # Send some traffic\n    seeder = SeederV2(key_target=8_000)\n    await seeder.run(c_master, target_deviation=0.01)\n\n    # Wait for all journal changes propagate to replicas\n    await check_all_replicas_finished([c_replica1, c_replica2], c_master)\n\n    if use_takeover:\n        # Promote first replica to master\n        await c_replica1.execute_command(f\"REPLTAKEOVER 5\")\n        if backlog_len > 1:\n            await c_replica1.execute_command(\"SET bar foo\")\n            await c_replica1.execute_command(\"SET foo bar\")\n\n    else:\n        # Promote first replica to master\n        await c_replica1.execute_command(f\"REPLICAOF NO ONE\")\n        await c_master.set(\"x\", \"y\")\n        await c_master.set(\"x\", \"y\")\n        await check_all_replicas_finished([c_replica2], c_master)\n\n    # Start replication with new master\n    await c_replica2.execute_command(f\"REPLICAOF localhost {replica1.port}\")\n\n    await check_all_replicas_finished([c_replica2], c_replica1)\n    # Validate data\n    if use_takeover:\n        hash1, hash2 = await asyncio.gather(\n            *(SeederV2.capture(c) for c in (c_replica1, c_replica2))\n        )\n        assert hash1 == hash2\n        s1 = await c_replica1.execute_command(\"dbsize\")\n        s2 = await c_replica1.execute_command(\"dbsize\")\n        assert s1 == s2\n\n    # Check we can takeover to the second replica\n    await c_replica2.execute_command(f\"REPLTAKEOVER 5\")\n\n    replica1.stop()\n    replica2.stop()\n    if use_takeover:\n        # Check logs for partial replication\n        lines = replica2.find_in_logs(f\"Started partial sync with localhost:{replica1.port}\")\n        assert len(lines) == 1\n        # Check no full sync logs\n        lines = replica2.find_in_logs(f\"Started full sync with localhost:{replica1.port}\")\n        assert len(lines) == 0\n    else:\n        lines = replica2.find_in_logs(f\"Started full sync with localhost:{replica1.port}\")\n        assert len(lines) == 1\n        # No partial sync after NO ONE\n        lines = replica2.find_in_logs(f\"Started partial sync with localhost:{replica1.port}\")\n        assert len(lines) == 0\n\n\nasync def test_partial_replication_on_same_source_master_with_replica_lsn_inc(df_factory):\n    server1 = df_factory.create()\n    server2 = df_factory.create()\n    server3 = df_factory.create()\n    server4 = df_factory.create()\n\n    df_factory.start_all([server1, server2, server3, server4])\n    c_s2 = server2.client()\n    c_s3 = server3.client()\n    c_s4 = server4.client()\n\n    logging.debug(\"Start replication and wait for full sync\")\n    await c_s2.execute_command(f\"REPLICAOF localhost {server1.port}\")\n    await wait_for_replicas_state(c_s2)\n    await c_s3.execute_command(f\"REPLICAOF localhost {server1.port}\")\n    await wait_for_replicas_state(c_s3)\n\n    # Promote server 2 to master\n    await c_s2.execute_command(f\"REPLTAKEOVER 20\")\n    # Make server 4 replica of server 2\n    await c_s4.execute_command(f\"REPLICAOF localhost {server2.port}\")\n    # Send some write command for lsn inc\n    for i in range(100):\n        await c_s2.set(i, \"val\")\n    # Make server 3 replica of server 2\n    await c_s3.execute_command(f\"REPLICAOF localhost {server2.port}\")\n\n    await check_all_replicas_finished([c_s3], c_s2)\n    await check_all_replicas_finished([c_s4], c_s2)\n\n    s2_sz = await c_s2.dbsize()\n    s3_sz = await c_s3.dbsize()\n    assert s2_sz == 100\n    assert s2_sz == s3_sz\n\n    s4_sz = await c_s4.dbsize()\n    assert s3_sz == s4_sz\n\n    server3.stop()\n    # Check logs for partial replication\n    lines = server3.find_in_logs(f\"Started partial sync with localhost:{server2.port}\")\n    assert len(lines) == 1\n\n\nasync def test_replicate_hset_with_expiry(df_factory: DflyInstanceFactory):\n    master = df_factory.create(proactor_threads=2)\n    replica = df_factory.create(proactor_threads=2)\n\n    master.start()\n    replica.start()\n\n    cm = master.client()\n    await cm.execute_command(\"HSETEX key 86400 name 1234\")\n\n    cr = replica.client()\n    await cr.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(cr)\n\n    result = await cr.hgetall(\"key\")\n\n    assert \"name\" in result\n    assert result[\"name\"] == \"1234\"\n\n\nasync def test_bug_5221(df_factory):\n    master = df_factory.create(\n        proactor_threads=1,\n        cache_mode=\"true\",\n        maxmemory=\"256mb\",\n        enable_heartbeat_eviction=\"true\",\n        eviction_memory_budget_threshold=0.9,\n    )\n    replica = df_factory.create(proactor_threads=4)\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n    await c_replica.execute_command(f\"replicaof localhost {master.port}\")\n\n    # Fill master with test data\n    seeder = SeederV2(key_target=22000, data_size=1000)\n    await seeder.run(c_master, target_deviation=0.01)\n    await asyncio.sleep(1)\n    await seeder.run(c_master, target_deviation=0.01)\n    res = await c_master.execute_command(\"dbsize\")\n    assert res > 0\n\n\n@pytest.mark.parametrize(\"proactors\", [1, 4, 6])\n@pytest.mark.parametrize(\"backlog_len\", [1, 256, 1024, 1300])\nasync def test_partial_sync(df_factory, proactors, backlog_len):\n    keys = 5_000\n    if proactors > 1:\n        keys = 10_000\n\n    # We use lock_on_hashtag because we want to seed enough elements to one flow/journal such that\n    # the partial sync stales.\n    master = df_factory.create(\n        proactor_threads=proactors, shard_repl_backlog_len=backlog_len, lock_on_hashtags=True\n    )\n    replica = df_factory.create(proactor_threads=proactors)\n\n    df_factory.start_all([replica, master])\n\n    async def stream(client, total):\n        for i in range(0, total):\n            prefix = \"{prefix}\"\n            # Seed to one shard only. This will eventually cause one of the flows to become stale.\n            await client.execute_command(f\"SET {prefix}foo{i} bar{i}\")\n\n    async with replica.client() as c_replica, master.client() as c_master:\n        seeder = SeederV2(key_target=keys)\n        await seeder.run(c_master, target_deviation=0.01)\n\n        proxy = Proxy(\"127.0.0.1\", 1113, \"127.0.0.1\", master.port)\n        await proxy.start()\n        task = asyncio.create_task(proxy.serve())\n\n        try:\n            await c_replica.execute_command(f\"REPLICAOF localhost {proxy.port}\")\n            # Reach stable sync\n            await wait_for_replicas_state(c_replica)\n            # Stream some elements\n            await stream(c_master, backlog_len)\n\n            proxy.drop_connection()\n            # Give time to detect dropped connection and reconnect\n            await asyncio.sleep(1.0)\n            # Partial synced here\n            await check_all_replicas_finished([c_replica], c_master)\n            hash1, hash2 = await asyncio.gather(\n                *(SeederV2.capture(c) for c in (c_master, c_replica))\n            )\n            assert hash1 == hash2\n\n            await proxy.close()\n            # Whoops we moved too much, no partial sync here\n            await stream(c_master, backlog_len + 10)\n            await proxy.start()\n            await asyncio.sleep(1.0)\n\n            await check_all_replicas_finished([c_replica], c_master)\n\n            hash1, hash2 = await asyncio.gather(\n                *(SeederV2.capture(c) for c in (c_master, c_replica))\n            )\n            assert hash1 == hash2\n        finally:\n            await proxy.close(task)\n\n    master.stop()\n    replica.stop()\n    # Partial sync worked\n    lines = master.find_in_logs(\"Partial sync requested from LSN\")\n    # Because we run with num_shards = proactors - 1\n    total_attempts = 1\n    if proactors > 1:\n        total_attempts = proactors - 1 + proactors - 2\n    assert len(lines) == total_attempts\n    # Second partial sync failed because of stale LSN\n    lines = master.find_in_logs(\"Partial sync requested from stale LSN\")\n    assert len(lines) == 1\n\n\nasync def test_mc_gat_replication(df_factory):\n    master = df_factory.create(memcached_port=11211, proactor_threads=1)\n    replica = df_factory.create(memcached_port=11212, proactor_threads=1)\n    df_factory.start_all([master, replica])\n\n    cm = pymemcache.Client(f\"127.0.0.1:{master.mc_port}\", default_noreply=False)\n\n    key = \"foo\"\n    value = b\"bar\"\n    not_found = b\"NOTFOUND\"\n    assert cm.set(key, value, noreply=True)\n\n    async with replica.client() as cl:\n        await cl.execute_command(f\"REPLICAOF localhost {master.port}\")\n        await wait_available_async(cl)\n\n    async def state_transitioned_stable(\n        init: bytes,\n        expected: bytes,\n        duration_sec=5,\n        sleep_sec=1,\n    ):\n        \"\"\"\n        Asserts that the state goes from initial to expected and then stays at expected, observing state for duration_sec\n        \"\"\"\n        _start = time.time()\n        transitioned = False\n        state = None\n        while time.time() - _start < duration_sec:\n            state = cr.get(key, not_found)\n            if not transitioned and state == expected:\n                transitioned = True\n            if transitioned:\n                assert (\n                    state == expected\n                ), f\"state moved back to initial after transition {state=} {init=} {expected=}\"\n            else:\n                assert state == init, f\"unexpected state: {state=} {init=}\"\n            await asyncio.sleep(sleep_sec)\n        return state == expected\n\n    cr = pymemcache.Client(f\"127.0.0.1:{replica.mc_port}\", default_noreply=False)\n\n    assert await state_transitioned_stable(not_found, value)\n\n    # Force the key to be removed by setting expiry in the past. Memcache treats expiry > 1 month as absolute from\n    # epoch, so 1 month + 1 second expires the key\n    month_plus_one = 60 * 60 * 24 * 30 + 1\n\n    # GAT|GATS are not directly exposed in the python client API\n    assert cm._fetch_cmd(b\"gat\", [str(month_plus_one), key], expect_cas=False) == {}\n\n    # The replica should eventually sync the delete operation\n    assert await state_transitioned_stable(value, not_found)\n\n    assert cm.set(key, value, noreply=True)\n    # expiry is set as now + 1000 seconds, which ensures the key will remain for the duration of the test\n    assert cm._fetch_cmd(b\"gat\", [str(1000), key], expect_cas=False) == {key: value}\n\n    # once the value is synced to the replica, assert that it remains stable and is not removed by setting expiry\n    assert await state_transitioned_stable(not_found, value)\n\n    result = cm._fetch_cmd(b\"gats\", [str(1000), key], expect_cas=True)\n    assert len(result) == 1 and key in result, f\"missing expected key: {result=}\"\n    expected_cas_ver = b\"0\"\n    assert result[key] == (value, expected_cas_ver), f\"unexpected result for key: {result=}\"\n\n\n@pytest.mark.skip(\"Fails constantly on CI\")\n@pytest.mark.large\n@pytest.mark.parametrize(\"serialization_max_size\", [1, 64000])\nasync def test_replication_onmove_flow(df_factory, serialization_max_size):\n    master = df_factory.create(\n        proactor_threads=2,\n        cache_mode=True,\n        point_in_time_snapshot=False,\n        serialization_max_chunk_size=serialization_max_size,\n    )\n    replica = df_factory.create(proactor_threads=2)\n\n    df_factory.start_all([master, replica])\n    c_master = master.client()\n    c_replica = replica.client()\n\n    key_target = 100000\n    # Fill master with test data\n    await c_master.execute_command(f\"DEBUG POPULATE {key_target} key 32 RAND TYPE hash ELEMENTS 10\")\n    logging.debug(\"finished populate\")\n\n    stop_event = asyncio.Event()\n\n    async def get_keys():\n        while not stop_event.is_set():\n            pipe = c_master.pipeline(transaction=False)\n            for _ in range(50):\n                id = random.randint(0, key_target)\n                pipe.hlen(f\"key:{id}\")\n            await pipe.execute()\n\n    get_task = asyncio.create_task(get_keys())\n    await asyncio.sleep(0.1)\n\n    # Start replication and wait for full sync\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_for_replicas_state(c_replica)\n\n    info = await c_master.info(\"stats\")\n    assert info[\"bump_ups\"] >= 100\n\n    await check_all_replicas_finished([c_replica], c_master)\n    stop_event.set()\n    await get_task\n\n    # Check replica data consisten\n    hash1, hash2 = await asyncio.gather(*(SeederV2.capture(c) for c in (c_master, c_replica)))\n    assert hash1 == hash2\n\n    master.stop()\n    lines = master.find_in_logs(\"Exit SnapshotSerializer\")\n    assert len(lines) > 0\n    for line in lines:\n        # We test the full sync on moved path execution\n        moved_saved = extract_int_after_prefix(\"moved_saved \", line)\n        logging.debug(f\"Moved saves {moved_saved}\")\n        assert moved_saved > 0\n\n\n@pytest.mark.large\n@dfly_args({\"proactor_threads\": 1})\nasync def test_big_strings(df_factory):\n    master = df_factory.create(\n        proactor_threads=1, serialization_max_chunk_size=1, vmodule=\"snapshot=1\"\n    )\n    replica = df_factory.create(proactor_threads=1)\n\n    df_factory.start_all([master, replica])\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # 200kb\n    value_size = 200_000\n\n    async def get_memory(client, field):\n        info = await client.info(\"memory\")\n        return info[field]\n\n    capacity = await get_memory(c_master, \"prime_capacity\")\n\n    seeder = DebugPopulateSeeder(\n        key_target=int(capacity * 0.7),\n        data_size=value_size,\n        collection_size=1,\n        variance=1,\n        samples=1,\n        types=[\"STRING\"],\n    )\n    await seeder.run(c_master)\n\n    # sanity\n    capacity = await get_memory(c_master, \"prime_capacity\")\n    assert capacity < 8000\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_for_replicas_state(c_replica)\n\n    # Check if replica data is consistent\n    replica_data = await DebugPopulateSeeder.capture(c_replica)\n    master_data = await DebugPopulateSeeder.capture(c_master)\n    assert master_data == replica_data\n\n    replica.stop()\n    master.stop()\n\n    lines = master.find_in_logs(\"Serialization peak bytes: \")\n    assert len(lines) == 1\n    # We test the serializtion path of command execution\n    line = lines[0]\n    peak_bytes = extract_int_after_prefix(\"Serialization peak bytes: \", line)\n    assert peak_bytes < value_size\n\n\n@pytest.mark.large\nasync def test_takeover_bug_wrong_replica_checked_in_logs(df_factory):\n    master = df_factory.create(proactor_threads=4, vmodule=\"dflycmd=1\")\n    replicas = [df_factory.create(proactor_threads=2) for _ in range(3)]\n    df_factory.start_all([master] + replicas)\n\n    c_master = master.client()\n    clients = [r.client() for r in replicas]\n\n    # Connect all replicas\n    for c in clients:\n        await c.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await asyncio.gather(*[wait_available_async(c) for c in clients])\n\n    # Disconnect replica[1] to create lag\n    await clients[1].execute_command(\"REPLICAOF NO ONE\")\n\n    # Write data that replica[1] will miss\n    pipe = c_master.pipeline()\n    for i in range(10000):\n        pipe.set(f\"k{i}\", \"x\" * 100)\n    await pipe.execute()\n\n    # Reconnect replica[1] and immediately takeover from replica[0]\n    await clients[1].execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    await check_all_replicas_finished(clients, c_master)\n\n    await clients[0].execute_command(\"REPLTAKEOVER 10\")\n\n    # Check master logs\n    master.stop(kill=False)\n\n    timeout_logs = master.find_in_logs(\n        f\"Couldn't synchronize with replica for takeover in time: 127.0.0.1:{replicas[0].port}\"\n    )\n    assert not timeout_logs\n\n\n@pytest.mark.large\nasync def test_takeover_timeout_on_unresponsive_master(df_factory):\n    master = df_factory.create(proactor_threads=4)\n    replica = df_factory.create(proactor_threads=2)\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # Setup replication\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    # Write some data\n    for i in range(10):\n        await c_master.set(f\"key{i}\", f\"val{i}\")\n    await asyncio.sleep(0.2)\n\n    # PAUSE master process (SIGSTOP) - socket stays open but doesn't respondExpand commentComment on line R3629Code has comments. Press enter to view.\n    os.kill(master.proc.pid, signal.SIGSTOP)\n    logging.info(f\"Paused master process {master.proc.pid}\")\n\n    # Try takeover with 5 second timeout\n    # BUG: This will hang forever because SendNextPhaseRequest has no timeout\n    # FIXED: Should return error within ~15 seconds (5 + buffer)\n    start_time = time.time()\n    try:\n        await asyncio.wait_for(\n            c_replica.execute_command(\"REPLTAKEOVER 5\"),\n            timeout=20,  # Should complete within 20 seconds\n        )\n        elapsed = time.time() - start_time\n        logging.info(f\"Takeover completed in {elapsed:.1f}s\")\n    except asyncio.TimeoutError:\n        elapsed = time.time() - start_time\n        pytest.fail(\n            f\"BUG: REPLTAKEOVER hung for {elapsed:.1f}s without timeout. \"\n            f\"SendNextPhaseRequest in replica.cc has no socket timeout.\"\n        )\n    except Exception as e:\n        # Expected: connection error or timeout error\n        elapsed = time.time() - start_time\n        logging.info(f\"Takeover failed after {elapsed:.1f}s: {e}\")\n        # Should fail quickly, not hang\n        assert elapsed < 20, f\"Took too long: {elapsed:.1f}s\"\n    finally:\n        # Resume master so it can be stopped properly\n        try:\n            os.kill(master.proc.pid, signal.SIGCONT)\n        except Exception:\n            pass\n\n\nasync def test_replica_of_self(async_client):\n    port = async_client.connection_pool.connection_kwargs[\"port\"]\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(f\"replicaof localhost {port}\")\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(f\"replicaof 127.0.0.1 {port}\")\n\n\n@dfly_args({\"replicaof_no_one_start_journal\": True, \"proactor_threads\": 2})\nasync def test_repl_offset(df_factory):\n    master = df_factory.create()\n    replica1 = df_factory.create()\n    replica2 = df_factory.create()\n    replica3 = df_factory.create()\n\n    df_factory.start_all([master, replica1, replica2, replica3])\n    c_master = master.client()\n    c_replica1 = replica1.client()\n    c_replica2 = replica2.client()\n    c_replica3 = replica3.client()\n\n    seeder = DebugPopulateSeeder(key_target=50)\n    await seeder.run(c_master)\n\n    await c_replica1.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_for_replicas_state(c_replica1)\n    await c_replica2.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_for_replicas_state(c_replica2)\n    await c_replica3.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_for_replicas_state(c_replica3)\n\n    seeder = SeederV2(key_target=50)\n    await seeder.run(c_master, target_deviation=0.01)\n\n    # Wait for all journal changes propagate to replicas\n    await check_all_replicas_finished([c_replica1, c_replica2, c_replica3], c_master)\n\n    # Promote first replica to master\n    await c_replica1.execute_command(f\"REPLTAKEOVER 5\")\n\n    # issue 4183\n    async def with_timeout_link_down(client):\n        async with async_timeout.timeout(2):\n            while True:\n                info = await client.info(\"replication\")\n                if info[\"master_link_status\"] == \"down\":\n                    assert info[\"slave_repl_offset\"] > 0\n                    break\n                await asyncio.sleep(0.1)\n\n    await with_timeout_link_down(c_replica2)\n    assert \"OK\" == await c_replica2.execute_command(\"replicaof no one\")\n\n    # Partial sync here\n    await c_replica3.execute_command(f\"REPLICAOF localhost {replica2.port}\")\n    # Full sync here\n    await c_replica1.execute_command(f\"REPLICAOF localhost {replica2.port}\")\n\n    await check_all_replicas_finished([c_replica1, c_replica3], c_replica2)\n\n    info = await c_replica3.info(\"replication\")\n    # 1 repl flow per proactor.\n    proactors = 2\n    # if `replicaof no one` on `c_replica2` does not preserve the journal offsets,\n    # then the assertion below shall fail. In that case, replicas perform a full sync first\n    # and as there are no journal changes the slave offsets are 2 (1 per shard).\n    assert info[\"slave_repl_offset\"] > proactors\n    assert info[\"psync_successes\"] == 1\n\n    await c_replica1.execute_command(f\"REPLTAKEOVER 5\")\n    await with_timeout_link_down(c_replica3)\n\n\nasync def test_partial_sync_with_different_shard_sizes(df_factory):\n    master = df_factory.create(proactor_threads=3)\n    replica1 = df_factory.create(proactor_threads=4)\n    replica2 = df_factory.create(proactor_threads=5)\n    replica3 = df_factory.create(proactor_threads=6)\n\n    df_factory.start_all([replica1, replica2, replica3, master])\n\n    c_replica1 = replica1.client()\n    c_replica2 = replica2.client()\n    c_replica3 = replica3.client()\n\n    c_master = master.client()\n\n    await c_master.execute_command(\"debug populate 5000\")\n\n    await c_replica1.execute_command(f\"replicaof localhost {master.port}\")\n    await c_replica2.execute_command(f\"replicaof localhost {master.port}\")\n    await c_replica3.execute_command(f\"replicaof localhost {master.port}\")\n\n    seeder = SeederV2(key_target=100)\n    await seeder.run(c_master, target_deviation=0.01)\n\n    await check_all_replicas_finished([c_replica1, c_replica2, c_replica3], c_master)\n\n    await c_replica1.execute_command(\"repltakeover 5\")\n    await c_replica2.execute_command(f\"replicaof localhost {replica1.port}\")\n    await c_replica3.execute_command(f\"replicaof localhost {replica1.port}\")\n\n    await check_all_replicas_finished([c_replica2, c_replica3], c_replica1)\n\n    for replica in (replica1, replica2, replica3):\n        replica.stop()\n\n    lines = replica2.find_in_logs(f\"Started partial sync with localhost:{replica1.port}\")\n    assert len(lines) == 0\n    lines = replica3.find_in_logs(f\"Started partial sync with localhost:{replica1.port}\")\n    assert len(lines) == 0\n\n\n@pytest.mark.large\nasync def test_replica_reconnection_leaks_connections(df_factory: DflyInstanceFactory):\n    master = df_factory.create(proactor_threads=4)\n    replica = df_factory.create(proactor_threads=4)\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    info = await c_master.info(\"clients\")\n    baseline = info[\"connected_clients\"]\n\n    num_cycles = 20\n    for _ in range(num_cycles):\n        await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n        await wait_for_replicas_state(c_replica)\n        await c_replica.execute_command(\"REPLICAOF NO ONE\")\n\n    # Wait for connected_clients to stabilize (stop changing)\n    prev = None\n    async for info, breaker in info_tick_timer(c_master, \"clients\", timeout=10):\n        with breaker:\n            curr = info[\"connected_clients\"]\n            assert curr == prev\n        prev = curr\n\n    leaked = prev - baseline\n    assert leaked == 0, f\"connected_clients leaked {leaked} after {num_cycles} reconnect cycles\"\n\n    await c_master.aclose()\n    await c_replica.aclose()\n\n\n@dfly_args({\"proactor_threads\": 2})\nasync def test_xreadgroup_replication(df_factory):\n    master = df_factory.create()\n    replica = df_factory.create()\n\n    master.start()\n    replica.start()\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    async def compare_group_info(stream_key, expected_pending, expected_entries_read):\n        master_info = await c_master.execute_command(f\"XINFO GROUPS {stream_key}\")\n        replica_info = await c_replica.execute_command(f\"XINFO GROUPS {stream_key}\")\n\n        # Parse group info (format: [name, consumers, pending, last-delivered-id, entries-read, lag])\n        assert len(master_info) == len(replica_info)\n\n        for m_group, r_group in zip(master_info, replica_info):\n            m_dict = dict(zip(m_group[::2], m_group[1::2]))\n            r_dict = dict(zip(r_group[::2], r_group[1::2]))\n\n            assert m_dict[\"last-delivered-id\"] == r_dict[\"last-delivered-id\"]\n            assert m_dict[\"entries-read\"] == r_dict[\"entries-read\"]\n            assert m_dict[\"entries-read\"] == expected_entries_read\n            assert m_dict[\"pending\"] == r_dict[\"pending\"]\n            assert m_dict[\"pending\"] == expected_pending\n            assert m_dict[\"consumers\"] == r_dict[\"consumers\"]\n\n    # Case 1: Non-blocking path, NOACK\n    await c_master.execute_command(\"XGROUP CREATE mystream mygroup $ MKSTREAM\")\n    await c_master.execute_command(\"XADD mystream * tmp tmp\")\n    await c_master.execute_command(\"XREADGROUP GROUP mygroup worker1 NOACK STREAMS mystream >\")\n\n    await check_all_replicas_finished([c_replica], c_master)\n    await compare_group_info(\"mystream\", 0, 1)\n\n    # Case 2: Non-blocking path, with PEL\n    await c_master.execute_command(\"XADD mystream * tmp tmp\")\n    await c_master.execute_command(\"XADD mystream * tmp tmp\")\n    await c_master.execute_command(\"XREADGROUP GROUP mygroup worker1 STREAMS mystream >\")\n\n    await check_all_replicas_finished([c_replica], c_master)\n    await compare_group_info(\"mystream\", 2, 3)\n\n    # Case 3: Blocking path, NOACK\n\n    # Start blocking XREADGROUP in background\n    read_task = asyncio.create_task(\n        c_master.execute_command(\n            \"XREADGROUP GROUP mygroup worker1 NOACK BLOCK 0 STREAMS mystream >\"\n        )\n    )\n    # Let the blocking command start\n    await asyncio.sleep(0.1)\n    await c_master.execute_command(\"XADD mystream * tmp tmp\")\n\n    await read_task\n\n    await check_all_replicas_finished([c_replica], c_master)\n    await compare_group_info(\"mystream\", 2, 4)\n\n    # Case 4: Blocking path, with PEL\n\n    # Start blocking XREADGROUP in background\n    read_task = asyncio.create_task(\n        c_master.execute_command(\"XREADGROUP GROUP mygroup worker1 BLOCK 0 STREAMS mystream >\")\n    )\n\n    await asyncio.sleep(0.1)\n    await c_master.execute_command(\"XADD mystream * tmp tmp\")\n    await read_task\n\n    await check_all_replicas_finished([c_replica], c_master)\n    await compare_group_info(\"mystream\", 3, 5)\n\n    await c_master.execute_command(\"flushall\")\n    # Create consumer\n    await c_master.execute_command(\"XGROUP CREATE mystream mygroup $ MKSTREAM\")\n    await c_master.execute_command(\"XADD mystream 2000-0 tmp tmp\")\n    # Add to PEL but don't ack\n    await c_master.execute_command(\"XREADGROUP GROUP mygroup worker1 STREAMS mystream >\")\n    await c_master.execute_command(\"XREADGROUP GROUP mygroup worker2 STREAMS mystream 2000-0\")\n\n    await check_all_replicas_finished([c_replica], c_master)\n    await compare_group_info(\"mystream\", 1, 1)\n\n\n\"\"\"\nTest replication with mismatched dbnum between master and replica.\n\"\"\"\n\n\n@dfly_args({\"proactor_threads\": 2})\nasync def test_replication_replica_smaller_dbnum_shared_dbs_only(\n    df_factory: DflyInstanceFactory,\n):\n    \"\"\"\n    Replica dbnum < Master dbnum, but master only uses DBs within\n    the replica's range. Replication should succeed.\n    \"\"\"\n    master = df_factory.create(dbnum=8)\n    replica = df_factory.create(dbnum=4)\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n\n    # Populate data only in DBs 0-3 (within replica's dbnum range)\n    for db in range(4):\n        c = master.client(db=db)\n        for i in range(50):\n            await c.set(f\"key:{db}:{i}\", f\"val:{db}:{i}\")\n        await c.close()\n\n    # Start replication\n    c_replica = replica.client()\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    async with async_timeout.timeout(10):\n        await wait_for_replicas_state(c_replica)\n\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # Verify all data is present in the replica across shared DBs\n    for db in range(4):\n        c_m = master.client(db=db)\n        c_r = replica.client(db=db)\n        for i in range(50):\n            assert await c_r.get(f\"key:{db}:{i}\") == await c_m.get(f\"key:{db}:{i}\")\n        await c_m.close()\n        await c_r.close()\n\n\n@dfly_args({\"proactor_threads\": 2})\nasync def test_replication_replica_larger_dbnum(\n    df_factory: DflyInstanceFactory,\n):\n    \"\"\"\n    Replica dbnum > Master dbnum. Replication should succeed;\n    the replica's extra DBs remain empty.\n    \"\"\"\n    master = df_factory.create(dbnum=4)\n    replica = df_factory.create(dbnum=8)\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n\n    # Populate all DBs on the master (0-3)\n    for db in range(4):\n        c = master.client(db=db)\n        for i in range(50):\n            await c.set(f\"key:{db}:{i}\", f\"val:{db}:{i}\")\n        await c.close()\n\n    # Start replication\n    c_replica = replica.client()\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    async with async_timeout.timeout(10):\n        await wait_for_replicas_state(c_replica)\n\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # Verify master's data is present in the replica\n    for db in range(4):\n        c_m = master.client(db=db)\n        c_r = replica.client(db=db)\n        for i in range(50):\n            assert await c_r.get(f\"key:{db}:{i}\") == await c_m.get(f\"key:{db}:{i}\")\n        await c_m.close()\n        await c_r.close()\n\n    # Verify the replica's extra DBs (4-7) are empty\n    for db in range(4, 8):\n        c_r = replica.client(db=db)\n        assert await c_r.dbsize() == 0\n        await c_r.close()\n\n\n# BF.RESERVE with error_rate=0.00001 and capacity=1e9 creates a single bloom filter\n# of exactly 2^32 bytes (4 GiB). The chunked RDB loader used `unsigned` for the total\n# filter size, which silently overflowed to 0 and broke the RDB stream.\n@pytest.mark.large\nasync def test_sbf_chunked_replication_over_4gb(df_factory: DflyInstanceFactory):\n    master = df_factory.create(\n        proactor_threads=1,\n        maxmemory=\"6G\",\n        rdb_sbf_chunked=\"true\",\n    )\n    replica = df_factory.create(\n        proactor_threads=1,\n        maxmemory=\"6G\",\n    )\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    await c_master.execute_command(\"BF.RESERVE\", \"bf\", \"0.00001\", \"1000000000\")\n    await c_master.execute_command(\"BF.ADD\", \"bf\", \"hello\")\n\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n\n    async with async_timeout.timeout(240):\n        await wait_for_replicas_state(c_replica)\n\n    await check_all_replicas_finished([c_replica], c_master)\n\n    assert await c_replica.execute_command(\"BF.EXISTS\", \"bf\", \"hello\") == 1\n\n\n@pytest.mark.parametrize(\n    \"master_threads, replica_threads\",\n    [[3, 4], [4, 4], [4, 3]],\n)\nasync def test_hnsw_search_replication_with_network_disruptions(\n    df_factory: DflyInstanceFactory,\n    master_threads: int,\n    replica_threads: int,\n):\n    \"\"\"\n    Test HNSW search index replication under continuous traffic and a network disruption.\n\n    Creates a master with an HNSW vector index, starts concurrent write traffic and\n    search queries, replicates through a proxy, and drops the connection at a random\n    moment within the first 10 seconds (may hit full sync or stable sync).\n    \"\"\"\n    master = df_factory.create(proactor_threads=master_threads)\n    replica = df_factory.create(proactor_threads=replica_threads)\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    seeder = HnswSearchSeeder(num_initial_docs=500)\n    await seeder.create_index(c_master)\n    await seeder.seed_initial_docs(c_master)\n\n    proxy = Proxy(\"127.0.0.1\", 0, \"127.0.0.1\", master.port)\n    await proxy.start()\n    proxy_task = asyncio.create_task(proxy.serve())\n\n    traffic_task = asyncio.create_task(seeder.run_traffic(c_master))\n    search_task = asyncio.create_task(seeder.run_search_queries(c_master))\n    replica_search_task = asyncio.create_task(seeder.run_search_queries(c_replica))\n    await c_replica.execute_command(f\"REPLICAOF localhost {proxy.port}\")\n\n    try:\n        await asyncio.sleep(random.uniform(0, 10))\n        proxy.drop_connection()\n\n        # Give time to detect dropped connection and reconnect\n        await asyncio.sleep(1.0)\n\n        await wait_available_async(c_replica)\n        seeder.stop()\n        await traffic_task\n        await search_task\n        await replica_search_task\n\n        # Log replica FT.INFO for debugging if assertion fails later\n        info = await c_replica.execute_command(\"FT.INFO\", seeder.index_name)\n        logging.info(f\"Replica FT.INFO: {info}\")\n\n        await check_all_replicas_finished([c_replica], c_master)\n        await seeder.verify(c_master, c_replica)\n\n    finally:\n        seeder.stop()\n        traffic_task.cancel()\n        search_task.cancel()\n        replica_search_task.cancel()\n        await proxy.close(proxy_task)\n\n\nasync def test_rm_replication(df_factory: DflyInstanceFactory):\n    \"\"\"Test that RM command propagates deletions to replica and is rejected on replica.\"\"\"\n    master = df_factory.create(proactor_threads=2)\n    replica = df_factory.create(proactor_threads=2)\n\n    master.start()\n    replica.start()\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # Populate master with keys before replication starts\n    for i in range(20):\n        await c_master.set(f\"key:{i}\", f\"val{i}\")\n    for i in range(5):\n        await c_master.set(f\"other:{i}\", f\"val{i}\")\n\n    # Set up replication\n    await c_replica.execute_command(f\"REPLICAOF localhost {master.port}\")\n    await wait_available_async(c_replica)\n\n    # Verify replica has all keys\n    assert await c_replica.dbsize() == 25\n    logging.info(\"Replica has all keys\")\n\n    # Run RM on master with a MATCH filter to delete only \"key:*\" keys\n    cursor = 0\n    while True:\n        result = await c_master.execute_command(\"RM\", cursor, \"MATCH\", \"key:*\")\n        cursor = int(result[0])\n        if cursor == 0:\n            break\n\n    # Master should have only \"other:*\" keys left\n    assert await c_master.dbsize() == 5\n\n    # Wait for replication to propagate\n    await check_all_replicas_finished([c_replica], c_master)\n\n    # Replica should reflect deletions\n    assert await c_replica.dbsize() == 5\n    for i in range(5):\n        assert await c_replica.exists(f\"other:{i}\") == 1\n    for i in range(20):\n        assert await c_replica.exists(f\"key:{i}\") == 0\n\n    # RM must be rejected on replica (it's a write command)\n    with pytest.raises((aioredis.ResponseError, aioredis.ReadOnlyError)):\n        await c_replica.execute_command(\"RM\", 0)\n"
  },
  {
    "path": "tests/dragonfly/requirements.txt",
    "content": "async-timeout>=4.0.3\nattrs>=22.1.0\nDeprecated>=1.2.13\niniconfig>=1.1.1\npackaging>=23.1\npluggy>=1.0.0\npy>=1.11.0\npyparsing>=3.0.9\npytest>=7.1.2\nredis>=5.2.1\ntomli>=2.0.1\nwrapt>=1.14.1\npytest-asyncio==0.20.1\npytest-repeat>=0.9.3\npymemcache>=4.0.0\nmeta_memcache>=2\nprometheus_client>=0.17.0\naiohttp>=3.10.2\nnumpy\npytest-json-report>=1.5.0\npsutil>=5.9.5\nboto3>=1.28.55\nredis-om>=0.3.3\npytest-emoji>=0.2.0\npytest-icdiff>=0.8\npytest-timeout>=2.2.0\nasyncio>=3.4.3\nfakeredis[json]>=2.26.2\nhiredis==2.4.0\nPyYAML>=6.0\nvalkey>=6.0.2\ncelery>=5.3.0\n# bullmq>=2.0.0\n"
  },
  {
    "path": "tests/dragonfly/search_benchmark_test.py",
    "content": "import logging\nimport time\nimport pytest\n\nfrom . import dfly_args\nfrom .instance import DflyInstance\nfrom .search_benchmark_utils import (\n    generate_document_columns,\n    create_search_index,\n    generate_document_data,\n    run_query_load_test,\n    set_random_seed,\n    INDEX_KEY,\n    DOCUMENT_KEY,\n)\n\n\n@dfly_args({\"proactor_threads\": 4})\n@pytest.mark.opt_only\n@pytest.mark.large\nclass TestSearchBenchmark:\n    random_seed = 42\n    num_documents = 3000\n    chunk_size = 1000\n\n    @pytest.fixture(scope=\"class\")\n    async def prepared_benchmark_data(self, df_server: DflyInstance):\n        set_random_seed(self.random_seed)\n\n        logging.info(f\"Preparing benchmark data on port {df_server.port}\")\n        client = df_server.client()\n\n        # Basic connectivity check\n        assert await client.ping() == True\n\n        # Schema Generation\n        logging.info(\"Schema Generation - generating columns and creating search index\")\n        document_columns = generate_document_columns()\n        await create_search_index(client, document_columns)\n\n        # Verify the index was created\n        index_info = await client.execute_command(f\"FT.INFO {INDEX_KEY}\")\n        assert index_info is not None\n        logging.info(f\"Search index '{INDEX_KEY}' created with {len(document_columns)} columns\")\n\n        # Data Generation\n        logging.info(\n            f\"Data Generation - generating {self.num_documents:,} documents with full column data\"\n        )\n        stage_start = time.time()\n        document_ids = await generate_document_data(\n            client=client,\n            columns=document_columns,\n            num_documents=self.num_documents,\n            chunk_size=self.chunk_size,  # Chunk size for batch processing\n        )\n\n        # Verify data was generated\n        assert len(document_ids) == self.num_documents\n\n        # Verify some documents were stored\n        sample_document_id = document_ids[0]\n        document_key = DOCUMENT_KEY.format(documentId=sample_document_id)\n        stored_document = await client.hgetall(document_key)\n        assert stored_document is not None\n        assert stored_document[\"DocumentId\"] == sample_document_id\n        stage_duration = time.time() - stage_start\n        logging.info(\n            f\"Preparation stage completed in {stage_duration:.2f}s: {len(document_ids)} documents generated and stored\"\n        )\n\n        await client.aclose()\n\n        return {\n            \"document_columns\": document_columns,\n            \"document_ids\": document_ids,\n            \"num_documents\": self.num_documents,\n            \"setup_duration\": stage_duration,\n        }\n\n    async def _run_benchmark(\n        self,\n        df_server: DflyInstance,\n        prepared_benchmark_data,\n        num_queries: int,\n        num_concurrent_clients: int,\n        test_name: str,\n    ):\n        logging.info(f\"Starting {test_name} test on port {df_server.port}\")\n        logging.info(\n            f\"Parameters: {prepared_benchmark_data['num_documents']} documents, {num_queries} queries, {num_concurrent_clients} concurrent clients\"\n        )\n\n        client = df_server.client()\n\n        # Basic connectivity check\n        assert await client.ping() == True\n\n        # Query Load Testing\n        logging.info(\n            f\"Query Load Testing - running {num_queries:,} queries with {num_concurrent_clients} concurrent clients\"\n        )\n        stage_start = time.time()\n        total_completed = await run_query_load_test(\n            df_server=df_server,\n            columns=prepared_benchmark_data[\"document_columns\"],\n            document_ids=prepared_benchmark_data[\"document_ids\"],\n            total_queries=num_queries,\n            num_concurrent_clients=num_concurrent_clients,\n        )\n\n        # Verify queries completed\n        assert total_completed == num_queries\n        stage_duration = time.time() - stage_start\n        logging.info(\n            f\"Query Load Testing completed in {stage_duration:.2f}s: {total_completed} queries executed successfully\"\n        )\n\n        # Final summary\n        logging.info(\n            f\"Benchmark Timings Summary -> Data Generation: {prepared_benchmark_data['setup_duration']:.2f}s | Query Load: {stage_duration:.2f}s\"\n        )\n\n        # Command statistics\n        cmd_stats = await client.info(\"commandstats\")\n        logging.info(\"Command Statistics:\")\n        for key, value in cmd_stats.items():\n            if key.startswith(\"cmdstat_\") and \"ft.\" in key.lower():\n                command = key[8:]  # Remove \"cmdstat_\" prefix\n                logging.info(f\"  {command}: {value}\")\n\n        # Latency statistics\n        latency_stats = await client.info(\"latencystats\")\n        logging.info(\"Latency Statistics:\")\n        for key, value in latency_stats.items():\n            if \"ft.\" in key.lower():\n                logging.info(f\"  {key}: {value}\")\n\n        # Memory statistics\n        memory_stats = await client.info(\"memory\")\n        logging.info(\"Memory Statistics:\")\n        important_memory_keys = [\n            \"used_memory\",\n            \"used_memory_human\",\n            \"used_memory_rss\",\n            \"used_memory_rss_human\",\n            \"used_memory_peak\",\n            \"used_memory_peak_human\",\n        ]\n        for key in important_memory_keys:\n            if key in memory_stats:\n                logging.info(f\"  {key}: {memory_stats[key]}\")\n\n        logging.info(f\"{test_name} completed successfully\")\n\n        # Close client\n        await client.aclose()\n\n    async def test_standard_benchmark(self, df_server: DflyInstance, prepared_benchmark_data):\n        \"\"\"Standard benchmark test - 100 queries with 10 concurrent clients.\"\"\"\n        await self._run_benchmark(df_server, prepared_benchmark_data, 100, 10, \"Standard Benchmark\")\n\n    async def test_small_benchmark(self, df_server: DflyInstance, prepared_benchmark_data):\n        \"\"\"Small benchmark test - 50 queries with 5 concurrent clients.\"\"\"\n        await self._run_benchmark(df_server, prepared_benchmark_data, 50, 5, \"Small Benchmark\")\n"
  },
  {
    "path": "tests/dragonfly/search_benchmark_utils.py",
    "content": "import asyncio\nimport logging\nimport random\nimport string\nimport uuid\nimport math\nfrom typing import List, Tuple\nfrom redis import asyncio as aioredis\nfrom redis.commands.search.query import Query\n\n\ndef set_random_seed(seed: int):\n    random.seed(seed)\n\n\nINDEX_KEY = \"idx:DocumentBase\"\nDOCUMENT_KEY = \"DocumentBase:{documentId}\"\n\n\n# Simple data types for generation\nCOLUMN_TYPES = {\n    \"TEXT\": {\n        \"dragonfly_type\": \"TEXT\",\n        \"generator\": lambda: random.choice(PRE_GENERATED_STRINGS),\n    },\n    \"NUMERIC\": {\n        \"dragonfly_type\": \"NUMERIC\",\n        \"generator\": lambda: random.randint(1, 100),\n    },\n    \"TAG\": {\n        \"dragonfly_type\": \"TAG\",\n        \"generator\": lambda: random.choice(PRE_GENERATED_UIDS),\n    },\n    \"BIT\": {\n        \"dragonfly_type\": \"NUMERIC\",\n        \"generator\": lambda: random.choice([0, 1]),\n    },\n}\n\n\nPRE_GENERATED_STRINGS = []\nPRE_GENERATED_UIDS = []\n\n\ndef _initialize_pre_generated_data(size: int):\n    global PRE_GENERATED_STRINGS, PRE_GENERATED_UIDS\n\n    # Clear previous data and generate new\n    PRE_GENERATED_STRINGS.clear()\n    PRE_GENERATED_UIDS.clear()\n\n    PRE_GENERATED_STRINGS.extend(\n        [\n            \"\".join(random.choices(string.ascii_letters, k=k))\n            for _ in range(size)\n            for k in range(5, 11)  # lengths 5–10\n        ]\n    )\n\n    PRE_GENERATED_UIDS.extend([str(uuid.uuid4()) for _ in range(size)])\n\n\nasync def generate_document_data(\n    client: aioredis.Redis,\n    columns: List[Tuple[str, str]],\n    num_documents: int,\n    chunk_size: int = 1000,\n) -> List[str]:\n    # Initialize pre-generated data\n    _initialize_pre_generated_data(num_documents)\n\n    # Generate document IDs\n    document_ids = [str(uuid.uuid4()) for _ in range(num_documents)]\n\n    # Process in chunks for better performance\n    chunks_count = math.ceil(num_documents / chunk_size)\n\n    tasks = []\n    for chunk_number in range(chunks_count):\n        start_idx = chunk_number * chunk_size\n        end_idx = min((chunk_number + 1) * chunk_size, num_documents)\n        chunk_document_ids = document_ids[start_idx:end_idx]\n\n        task = asyncio.create_task(_generate_documents_chunk(client, chunk_document_ids, columns))\n        tasks.append(task)\n\n    await asyncio.gather(*tasks)\n    return document_ids\n\n\nasync def _generate_documents_chunk(\n    client: aioredis.Redis, document_ids: List[str], columns: List[Tuple[str, str]]\n):\n    pipeline = client.pipeline()\n\n    for document_id in document_ids:\n        document = {\"DocumentId\": document_id}\n\n        # Generate values for all columns except DocumentId\n        for column_name, column_type in columns:\n            if column_name == \"DocumentId\":\n                continue\n\n            value = COLUMN_TYPES[column_type][\"generator\"]()\n            if value is not None:\n                document[column_name] = value\n\n        doc_key = DOCUMENT_KEY.format(documentId=document_id)\n        pipeline.hset(doc_key, mapping=document)\n\n    await pipeline.execute()\n\n\ndef generate_search_query(columns: List[Tuple[str, str]], document_ids: List[str]) -> Query:\n    column_names = [name for name, _ in columns]\n    num_columns = random.randint(int(len(column_names) / 3.5), int(len(column_names) / 2))\n    selected_columns = random.sample(column_names, num_columns)\n\n    if random.random() < 0.5:\n        query = Query(\"*\").return_fields(*selected_columns)\n        query = query.paging(0, 50)\n        return query\n\n    reliable_filter_columns = [name for name, col_type in columns if col_type in [\"NUMERIC\", \"BIT\"]]\n\n    if reliable_filter_columns and random.random() < 0.5:\n        filter_column = random.choice(reliable_filter_columns)\n        filter_column_type = next(col_type for name, col_type in columns if name == filter_column)\n        filter_str = create_simple_numeric_filter(filter_column, filter_column_type)\n        filter_string = filter_str if filter_str else \"*\"\n    else:\n        filter_string = \"*\"\n\n    query = Query(filter_string).return_fields(*selected_columns)\n    query = query.paging(0, 50)\n    return query\n\n\ndef create_simple_numeric_filter(property_name: str, property_type: str) -> str:\n    if property_type == \"NUMERIC\":\n        return f\"@{property_name}: [1 100]\"\n    elif property_type == \"BIT\":\n        bit_value = random.choice([0, 1])\n        return f\"@{property_name}: [{bit_value} {bit_value}]\"\n    else:\n        return \"*\"\n\n\nasync def run_query_client(\n    client_id: int,\n    df_server,\n    columns: List[Tuple[str, str]],\n    document_ids: List[str],\n    num_queries: int,\n) -> int:\n    client = df_server.client()\n\n    query_count = 0\n    success_count = 0\n\n    try:\n        for i in range(num_queries):\n            try:\n                query = generate_search_query(columns, document_ids)\n                results = await client.ft(INDEX_KEY).search(query)\n                success_count += 1\n\n            except Exception as e:\n                logging.error(f\"Client {client_id}: ERROR in query {i}: {e}\")\n\n            query_count += 1\n\n    finally:\n        if query_count > 0:\n            final_success_rate = (success_count / query_count) * 100\n            logging.info(\n                f\"Client {client_id} completed: {success_count}/{query_count} successful queries ({final_success_rate:.1f}%)\"\n            )\n        await client.aclose()\n\n    return success_count\n\n\nasync def run_query_load_test(\n    df_server,\n    columns: List[Tuple[str, str]],\n    document_ids: List[str],\n    total_queries: int,\n    num_concurrent_clients: int,\n) -> int:\n    queries_per_client = total_queries // num_concurrent_clients\n\n    tasks = []\n    for client_id in range(num_concurrent_clients):\n        task = asyncio.create_task(\n            run_query_client(client_id, df_server, columns, document_ids, queries_per_client)\n        )\n        tasks.append(task)\n\n    results = await asyncio.gather(*tasks)\n    total_completed = sum(results)\n    return total_completed\n\n\ndef generate_document_columns(num_columns: int = 700) -> List[Tuple[str, str]]:\n    max_text_fields = 128\n\n    # Available types for generation\n    available_types = [\"TEXT\", \"NUMERIC\", \"BIT\", \"TAG\"]\n\n    columns = []\n    existing_names = set()\n    text_field_count = 0\n\n    # Standard columns\n    standard_columns = [\n        (\"DocumentId\", \"TAG\"),\n        (\"Name\", \"TEXT\"),\n        (\"DocumentNumber\", \"TEXT\"),\n        (\"Revenue\", \"NUMERIC\"),\n        (\"NumberOfEmployees\", \"NUMERIC\"),\n        (\"CreatedOn\", \"NUMERIC\"),\n        (\"ModifiedOn\", \"NUMERIC\"),\n        (\"IsPrivate\", \"BIT\"),\n        (\"StateCode\", \"NUMERIC\"),\n        (\"StatusCode\", \"NUMERIC\"),\n    ]\n\n    columns.extend(standard_columns)\n    existing_names.update(name for name, _ in standard_columns)\n    text_field_count = sum(1 for _, col_type in standard_columns if col_type == \"TEXT\")\n\n    while len(columns) < num_columns:\n        # Generate unique name\n        candidate_name = (\n            f\"lv_{''.join(random.choices(string.ascii_lowercase, k=random.randint(5, 15)))}\"\n        )\n\n        if candidate_name in existing_names:\n            continue\n\n        # Choose type\n        if text_field_count >= max_text_fields:\n            column_type = random.choice([t for t in available_types if t != \"TEXT\"])\n        else:\n            column_type = random.choice(available_types)\n            if column_type == \"TEXT\":\n                text_field_count += 1\n\n        columns.append((candidate_name, column_type))\n        existing_names.add(candidate_name)\n\n    logging.info(f\"Created {len(columns)} columns, with {text_field_count} TEXT fields\")\n    return columns\n\n\nasync def create_search_index(client: aioredis.Redis, columns: List[Tuple[str, str]]) -> None:\n    text_field_count = sum(1 for _, col_type in columns if col_type == \"TEXT\")\n\n    if text_field_count > 128:\n        raise ValueError(\n            f\"Too many TEXT fields: {text_field_count}. RediSearch supports a maximum of 128 TEXT fields.\"\n        )\n\n    logging.info(\n        f\"Creating index with {len(columns)} columns, including {text_field_count} TEXT fields\"\n    )\n\n    # Create schema directly\n    schema_parts = []\n    for name, col_type in columns:\n        dragonfly_type = COLUMN_TYPES[col_type][\"dragonfly_type\"]\n        schema_parts.append(f\"{name} {dragonfly_type}\")\n\n    schema_create_command = (\n        f\"FT.CREATE {INDEX_KEY} ON HASH PREFIX 1 DocumentBase: SCHEMA {' '.join(schema_parts)}\"\n    )\n    await client.execute_command(schema_create_command)\n"
  },
  {
    "path": "tests/dragonfly/search_test.py",
    "content": "\"\"\"\nTest compatibility with the redis-py client search module.\nSearch correctness should be ensured with unit tests.\n\"\"\"\n\nimport copy\n\nimport numpy as np\nfrom redis.commands.search.field import TextField, NumericField, TagField, VectorField, GeoField\n\ntry:\n    from redis.commands.search.indexDefinition import IndexDefinition, IndexType\nexcept ModuleNotFoundError:\n    from redis.commands.search.index_definition import IndexDefinition, IndexType\nfrom redis.commands.search.query import Query\n\nfrom . import dfly_args\nfrom .utility import *\n\nTEST_DATA = [\n    {\n        \"title\": \"First article\",\n        \"content\": \"Long description\",\n        \"views\": 100,\n        \"topic\": \"world, science\",\n    },\n    {\n        \"title\": \"Second article\",\n        \"content\": \"Small text\",\n        \"views\": 200,\n        \"topic\": \"national, policits\",\n    },\n    {\n        \"title\": \"Third piece\",\n        \"content\": \"Brief description\",\n        \"views\": 300,\n        \"topic\": \"health, lifestyle\",\n    },\n    {\n        \"title\": \"Last piece\",\n        \"content\": \"Interesting text\",\n        \"views\": 400,\n        \"topic\": \"world, business\",\n    },\n]\n\nBASIC_TEST_SCHEMA = [\n    TextField(\"title\"),\n    TextField(\"content\"),\n    NumericField(\"views\"),\n    TagField(\"topic\"),\n]\n\n\ndef fix_schema_naming(itype: IndexType, idx_list: list):\n    \"\"\"Copy all schema fields and for json types, change name to json $.path and add alias\"\"\"\n    if itype == IndexType.HASH:\n        return idx_list\n    copies = [copy.copy(idx) for idx in idx_list]\n    for idx in copies:\n        idx.as_name = idx.name\n        idx.name = \"$.\" + idx.name\n    return copies\n\n\nasync def index_test_data(async_client: aioredis.Redis, itype: IndexType, prefix=\"\"):\n    for i, e in enumerate(TEST_DATA):\n        if itype == IndexType.HASH:\n            await async_client.hset(prefix + str(i), mapping=e)\n        else:\n            await async_client.json().set(prefix + str(i), \"$\", e)\n\n\ndef doc_to_str(index_type, doc):\n    if not type(doc) is dict:\n        doc = doc.__dict__\n\n    if \"json\" in doc:\n        return json.dumps(json.loads(doc[\"json\"]), sort_keys=True)\n\n    if index_type == IndexType.JSON:\n        return json.dumps(doc, sort_keys=True)\n\n    doc = dict(doc)  # copy to remove fields\n    doc.pop(\"id\", None)\n    doc.pop(\"payload\", None)\n\n    return \"//\".join(sorted(doc))\n\n\ndef contains_test_data(itype, res, td_indices):\n    if res.total != len(td_indices):\n        return False\n\n    docset = {doc_to_str(itype, doc) for doc in res.docs}\n\n    for td_entry in (TEST_DATA[tdi] for tdi in td_indices):\n        if not doc_to_str(itype, td_entry) in docset:\n            return False\n\n    return True\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_management(async_client: aioredis.Redis):\n    SCHEMA_1 = [TextField(\"f1\"), NumericField(\"f2\", sortable=True)]\n    SCHEMA_2 = [\n        NumericField(\"f3\", no_index=True, sortable=True),\n        TagField(\"f4\"),\n        VectorField(\n            \"f5\",\n            algorithm=\"HNSW\",\n            attributes={\"TYPE\": \"FLOAT32\", \"DIM\": 1, \"DISTANCE_METRIC\": \"L2\", \"INITIAL_CAP\": 100},\n        ),\n    ]\n\n    i1 = async_client.ft(\"i1\")\n    i2 = async_client.ft(\"i2\")\n\n    await i1.create_index(SCHEMA_1, definition=IndexDefinition(prefix=[\"p1\"]))\n    await i2.create_index(SCHEMA_2, definition=IndexDefinition(prefix=[\"p2\"]))\n\n    # Fill indices with 10 and 15 docs respectively\n    for i in range(10):\n        await async_client.hset(f\"p1-{i}\", mapping={\"f1\": \"ok\", \"f2\": 11})\n    for i in range(15):\n        await async_client.hset(\n            f\"p2-{i}\",\n            mapping={\"f3\": 12, \"f4\": \"hmm\", \"f5\": np.array(0).astype(np.float32).tobytes()},\n        )\n\n    assert sorted(await async_client.execute_command(\"FT._LIST\")) == [\"i1\", \"i2\"]\n\n    i1info = await i1.info()\n    assert i1info[\"index_definition\"] == [\n        \"key_type\",\n        \"HASH\",\n        \"prefixes\",\n        [\"p1\"],\n        \"default_score\",\n        1,\n    ]\n    assert i1info[\"num_docs\"] == 10\n    assert sorted(i1info[\"attributes\"]) == [\n        [\"identifier\", \"f1\", \"attribute\", \"f1\", \"type\", \"TEXT\"],\n        [\n            \"identifier\",\n            \"f2\",\n            \"attribute\",\n            \"f2\",\n            \"type\",\n            \"NUMERIC\",\n            \"SORTABLE\",\n            \"blocksize\",\n            \"10000\",\n        ],\n    ]\n\n    i2info = await i2.info()\n    assert i2info[\"index_definition\"] == [\n        \"key_type\",\n        \"HASH\",\n        \"prefixes\",\n        [\"p2\"],\n        \"default_score\",\n        1,\n    ]\n    assert i2info[\"num_docs\"] == 15\n    assert sorted(i2info[\"attributes\"]) == [\n        [\n            \"identifier\",\n            \"f3\",\n            \"attribute\",\n            \"f3\",\n            \"type\",\n            \"NUMERIC\",\n            \"NOINDEX\",\n            \"SORTABLE\",\n            \"blocksize\",\n            \"10000\",\n        ],\n        [\"identifier\", \"f4\", \"attribute\", \"f4\", \"type\", \"TAG\"],\n        [\"identifier\", \"f5\", \"attribute\", \"f5\", \"type\", \"VECTOR\"],\n    ]\n\n    await i1.dropindex()\n    await i2.dropindex()\n\n    assert await async_client.execute_command(\"FT._LIST\") == []\n\n\n@dfly_args({\"proactor_threads\": 4})\n@pytest.mark.parametrize(\"index_type\", [IndexType.HASH, IndexType.JSON])\nasync def test_basic(async_client: aioredis.Redis, index_type):\n    i1 = async_client.ft(\"i1-\" + str(index_type))\n\n    await i1.create_index(\n        fix_schema_naming(index_type, BASIC_TEST_SCHEMA),\n        definition=IndexDefinition(index_type=index_type),\n    )\n    await index_test_data(async_client, index_type)\n\n    res = await i1.search(\"article\")\n    assert contains_test_data(index_type, res, [0, 1])\n\n    res = await i1.search(\"text\")\n    assert contains_test_data(index_type, res, [1, 3])\n\n    res = await i1.search(\"brief piece\")\n    assert contains_test_data(index_type, res, [2])\n\n    res = await i1.search(\"@title:(article|last) @content:text\")\n    assert contains_test_data(index_type, res, [1, 3])\n\n    res = await i1.search(\"@views:[200 300]\")\n    assert contains_test_data(index_type, res, [1, 2])\n\n    res = await i1.search(\"@views:[0 150] | @views:[350 500]\")\n    assert contains_test_data(index_type, res, [0, 3])\n\n    res = await i1.search(\"@topic:{world}\")\n    assert contains_test_data(index_type, res, [0, 3])\n\n    res = await i1.search(\"@topic:{business}\")\n    assert contains_test_data(index_type, res, [3])\n\n    res = await i1.search(\"@topic:{world | national}\")\n    assert contains_test_data(index_type, res, [0, 1, 3])\n\n    res = await i1.search(\"@topic:{science | health}\")\n    assert contains_test_data(index_type, res, [0, 2])\n\n    await i1.dropindex()\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_big_json(async_client: aioredis.Redis):\n    i1 = async_client.ft(\"i1\")\n    gen_arr = lambda base: {\"blob\": [base + str(i) for i in range(100)]}\n\n    await i1.create_index(\n        [TextField(name=\"$.blob\", as_name=\"items\")],\n        definition=IndexDefinition(index_type=IndexType.JSON),\n    )\n\n    await async_client.json().set(\"k1\", \"$\", gen_arr(\"alex\"))\n    await async_client.json().set(\"k2\", \"$\", gen_arr(\"bob\"))\n\n    res = await i1.search(\"alex55\")\n    assert res.docs[0].id == \"k1\"\n\n    res = await i1.search(\"bob77\")\n    assert res.docs[0].id == \"k2\"\n\n    res = await i1.search(\"alex11 | bob22\")\n    assert res.total == 2\n\n    await i1.dropindex()\n\n\nasync def knn_query(idx, query, vector):\n    params = {\"vec\": np.array(vector, dtype=np.float32).tobytes()}\n    result = await idx.search(query, params)\n    return {doc[\"id\"] for doc in result.docs}\n\n\nasync def knn_query_with_limit(idx, query, vector, limit):\n    params = {\"vec\": np.array(vector, dtype=np.float32).tobytes()}\n    result = await idx.search(Query(query).paging(0, limit), params)\n    return {doc[\"id\"] for doc in result.docs}\n\n\n@dfly_args({\"proactor_threads\": 4})\n@pytest.mark.parametrize(\"index_type\", [IndexType.HASH, IndexType.JSON])\n@pytest.mark.parametrize(\"algo_type\", [\"FLAT\", \"HNSW\"])\nasync def test_knn(async_client: aioredis.Redis, index_type, algo_type):\n    i2 = async_client.ft(\"i2-\" + str(index_type))\n\n    vector_field = VectorField(\n        \"pos\",\n        algorithm=algo_type,\n        attributes={\n            \"TYPE\": \"FLOAT32\",\n            \"DIM\": 1,\n            \"DISTANCE_METRIC\": \"L2\",\n            \"INITIAL_CAP\": 100,\n        },\n    )\n\n    await i2.create_index(\n        fix_schema_naming(index_type, [TagField(\"even\"), vector_field]),\n        definition=IndexDefinition(index_type=index_type),\n    )\n\n    pipe = async_client.pipeline()\n    for i in range(100):\n        even = \"yes\" if i % 2 == 0 else \"no\"\n        if index_type == IndexType.HASH:\n            pos = np.array(i, dtype=np.float32).tobytes()\n            pipe.hset(f\"k{i}\", mapping={\"even\": even, \"pos\": pos})\n        else:\n            pipe.json().set(f\"k{i}\", \"$\", {\"even\": even, \"pos\": [float(i)]})\n    await pipe.execute()\n\n    assert await knn_query(i2, \"* => [KNN 3 @pos $vec]\", [50.0]) == {\"k49\", \"k50\", \"k51\"}\n\n    assert await knn_query(i2, \"@even:{yes} => [KNN 3 @pos $vec]\", [20.0]) == {\"k18\", \"k20\", \"k22\"}\n\n    assert await knn_query(i2, \"@even:{no} => [KNN 4 @pos $vec]\", [30.0]) == {\n        \"k27\",\n        \"k29\",\n        \"k31\",\n        \"k33\",\n    }\n\n    assert await knn_query(i2, \"@even:{yes} => [KNN 3 @pos $vec]\", [10.0] == {\"k8\", \"k10\", \"k12\"})\n    await i2.dropindex()\n\n\nNUM_DIMS = 10\nNUM_POINTS = 100\n\n\n@dfly_args({\"proactor_threads\": 4})\n@pytest.mark.parametrize(\"index_type\", [IndexType.HASH, IndexType.JSON])\n@pytest.mark.parametrize(\"algo_type\", [\"HNSW\", \"FLAT\"])\nasync def test_multidim_knn(async_client: aioredis.Redis, index_type, algo_type):\n    vector_field = VectorField(\n        \"pos\",\n        algorithm=algo_type,\n        attributes={\n            \"TYPE\": \"FLOAT32\",\n            \"DIM\": NUM_DIMS,\n            \"DISTANCE_METRIC\": \"L2\",\n        },\n    )\n\n    i3 = async_client.ft(\"i3-\" + str(index_type))\n    await i3.create_index(\n        fix_schema_naming(index_type, [vector_field]),\n        definition=IndexDefinition(index_type=index_type),\n    )\n\n    # Use fixed seed for deterministic results\n    np.random.seed(42)\n\n    def rand_point():\n        return np.random.uniform(0, 10, NUM_DIMS).astype(np.float32)\n\n    # Generate points and send to DF\n    points = [rand_point() for _ in range(NUM_POINTS)]\n    points = list(enumerate(points))\n\n    pipe = async_client.pipeline(transaction=False)\n    for i, point in points:\n        if index_type == IndexType.HASH:\n            pipe.hset(f\"k{i}\", mapping={\"pos\": point.tobytes()})\n        else:\n            pipe.json().set(f\"k{i}\", \"$\", {\"pos\": point.tolist()})\n    await pipe.execute()\n\n    # Run 10 random queries\n    for _ in range(10):\n        center = rand_point()\n        limit = np.random.randint(\n            1, NUM_POINTS // 10 + 1\n        )  # +1 because numpy's randint is exclusive\n\n        expected_ids = [\n            f\"k{i}\"\n            for i, point in sorted(points, key=lambda p: np.linalg.norm(center - p[1]))[:limit]\n        ]\n\n        if algo_type == \"HNSW\":\n            # We need to search all points because results can be different between expected_ids that\n            # distance is  calculated on all points and hnsw which is approximate greedy search\n            knn_limit = NUM_POINTS\n            got_ids = await knn_query_with_limit(\n                i3, f\"* => [KNN {knn_limit} @pos $vec]\", center, limit\n            )\n        else:\n            got_ids = await knn_query(i3, f\"* => [KNN {limit} @pos $vec]\", center)\n\n        assert set(expected_ids) == set(got_ids)\n\n    await i3.dropindex()\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_knn_score_return(async_client: aioredis.Redis):\n    i1 = async_client.ft(\"i1\")\n    vector_field = VectorField(\n        \"pos\",\n        algorithm=\"FLAT\",\n        attributes={\n            \"DIM\": 1,\n            \"DISTANCE_METRIC\": \"L2\",\n            \"INITIAL_CAP\": 100,\n        },\n    )\n\n    await i1.create_index(\n        [vector_field],\n        definition=IndexDefinition(index_type=IndexType.HASH),\n    )\n\n    pipe = async_client.pipeline()\n    for i in range(100):\n        pipe.hset(f\"k{i}\", mapping={\"pos\": np.array(i, dtype=np.float32).tobytes()})\n    await pipe.execute()\n\n    params = {\"vec\": np.array([1.0], dtype=np.float32).tobytes()}\n    result = await i1.search(\"* => [KNN 3 @pos $vec AS distance]\", params)\n\n    assert result.total == 3\n    assert [d[\"distance\"] for d in result.docs] == [\"0\", \"1\", \"1\"]\n\n    result = await i1.search(\n        Query(\"* => [KNN 3 @pos $vec AS distance]\").return_fields(\"pos\"), params\n    )\n    assert not any(hasattr(d, \"distance\") for d in result.docs)\n\n    await i1.dropindex()\n\n\n@dfly_args({\"proactor_threads\": 4, \"dbfilename\": \"search-data\"})\nasync def test_index_persistence(df_server):\n    client = aioredis.Redis(port=df_server.port)\n\n    # Build two indices and fill them with data\n\n    SCHEMA_1 = [TextField(\"title\"), NumericField(\"views\", sortable=True), TagField(\"topic\")]\n    SCHEMA_2 = [\n        TextField(\"name\"),\n        NumericField(\"age\", sortable=True),\n        TagField(\"job\", separator=\":\", case_sensitive=True),\n        VectorField(\n            \"pos\",\n            algorithm=\"HNSW\",\n            attributes={\"TYPE\": \"FLOAT32\", \"DIM\": 1, \"DISTANCE_METRIC\": \"L2\", \"INITIAL_CAP\": 100},\n        ),\n    ]\n\n    i1 = client.ft(\"i1\")\n    await i1.create_index(\n        fix_schema_naming(IndexType.JSON, SCHEMA_1),\n        stopwords=[\"interesting\", \"stopwords\"],\n        definition=IndexDefinition(index_type=IndexType.JSON, prefix=[\"blog-\"]),\n    )\n\n    i2 = client.ft(\"i2\")\n    await i2.create_index(\n        fix_schema_naming(IndexType.HASH, SCHEMA_2),\n        definition=IndexDefinition(index_type=IndexType.HASH, prefix=[\"people-\"]),\n    )\n\n    for i in range(150):\n        await client.json().set(\n            f\"blog-{i}\",\n            \".\",\n            {\"title\": f\"Post {i}\", \"views\": i * 10, \"topic\": \"even\" if i % 2 == 0 else \"odd\"},\n        )\n\n    for i in range(200):\n        await client.hset(\n            f\"people-{i}\",\n            mapping={\n                \"name\": f\"Name {i}\",\n                \"age\": i,\n                \"job\": \"newsagent\" if i % 2 == 0 else \"writer\",\n                \"pos\": np.array(i / 200.0).astype(np.float32).tobytes(),\n            },\n        )\n\n    info_1 = await i1.info()\n    info_2 = await i2.info()\n    assert info_1[\"num_docs\"] == 150\n    assert info_2[\"num_docs\"] == 200\n\n    # stop & start server\n\n    df_server.stop()\n    df_server.start()\n\n    client = aioredis.Redis(port=df_server.port)\n    await wait_available_async(client)\n\n    # Check indices were loaded\n\n    assert {i.decode() for i in await client.execute_command(\"FT._LIST\")} == {\"i1\", \"i2\"}\n\n    i1 = client.ft(\"i1\")\n    i2 = client.ft(\"i2\")\n\n    info_1_new = await i1.info()\n    info_2_new = await i2.info()\n\n    def build_fields_set(info):\n        fields = set()\n        for field in info[\"attributes\"]:\n            fields.add(tuple(field))\n        return fields\n\n    assert build_fields_set(info_1) == build_fields_set(info_1_new)\n    assert build_fields_set(info_2) == build_fields_set(info_2_new)\n\n    assert info_1[\"index_definition\"] == info_1_new[\"index_definition\"]\n    assert info_2[\"index_definition\"] == info_2_new[\"index_definition\"]\n\n    assert info_1[\"num_docs\"] == info_1_new[\"num_docs\"]\n    assert info_2[\"num_docs\"] == info_2_new[\"num_docs\"]\n\n    # Check basic queries run correctly\n\n    assert (await i1.search(\"@views:[0 90]\")).total == 10\n    assert (await i1.search(\"@views:[100 190] @topic:{even}\")).total == 5\n\n    assert (await i2.search(\"@job:{writer}\")).total == 100\n    assert (await i2.search(\"@job:{writer} @age:[100 200]\")).total == 50\n    assert (await i2.search(\"@job:{wRiTeR}\")).total == 0\n\n    # Check fields are sortable\n    assert (await i1.search(Query(\"*\").sort_by(\"views\", asc=True).paging(0, 1))).docs[0][\n        \"id\"\n    ] == \"blog-0\"\n    assert (await i2.search(Query(\"*\").sort_by(\"age\", asc=False).paging(0, 1))).docs[0][\n        \"age\"\n    ] == \"199\"\n\n    # Check stopwords were loaded\n    await client.json().set(\"blog-sw1\", \".\", {\"title\": \"some stopwords\"})\n    assert (await i1.search(\"some\")).total == 1\n    assert (await i1.search(\"stopwords\")).total == 0\n\n    await i1.dropindex()\n    await i2.dropindex()\n\n\n@dfly_args({\"proactor_threads\": 4})\ndef test_redis_om(df_server):\n    try:\n        import redis_om\n    except ModuleNotFoundError:\n        skip_if_not_in_github(\"redis-om python library not installed\")\n        raise\n\n    client = redis.Redis(port=df_server.port, decode_responses=True)\n\n    class TestCar(redis_om.HashModel, index=True):\n        producer: str = redis_om.Field(index=True)\n        description: str = redis_om.Field(index=True, full_text_search=True)\n        speed: int = redis_om.Field(index=True, sortable=True)\n\n        class Meta:\n            database = client\n\n    def extract_producers(testset):\n        return sorted([car.producer for car in testset])\n\n    def make_car(producer, description, speed):\n        return TestCar(producer=producer, description=description, speed=speed)\n\n    CARS = [\n        make_car(\"BMW\", \"Very fast and elegant\", 200),\n        make_car(\"Audi\", \"Fast & stylish\", 170),\n        make_car(\"Mercedes\", \"High class but expensive!\", 150),\n        make_car(\"Honda\", \"Good allrounder with flashy looks\", 120),\n        make_car(\"Peugeot\", \"Good allrounder for the whole family\", 100),\n        make_car(\"Mini\", \"Fashinable cooper for the big city\", 80),\n        make_car(\"John Deere\", \"It's not a car, it's a tractor in fact!\", 50),\n    ]\n\n    for car in CARS:\n        car.save()\n\n    redis_om.Migrator().run()\n\n    # Wait for async indexing of existing documents to complete\n    for index_name in client.execute_command(\"FT._LIST\"):\n        timeout = time.time() + 10\n        while int(client.ft(index_name).info()[\"indexing\"]) == 1:\n            if time.time() > timeout:\n                raise TimeoutError(f\"Indexing {index_name} did not complete within 10 seconds\")\n            time.sleep(0.05)\n\n    # Get all cars\n    assert extract_producers(TestCar.find().all()) == extract_producers(CARS)\n\n    # Get all cars of a specific producer\n    assert extract_producers(\n        TestCar.find((TestCar.producer == \"Peugeot\") | (TestCar.producer == \"Mini\"))\n    ) == [\"Mini\", \"Peugeot\"]\n\n    # Get only fast cars\n    assert extract_producers(TestCar.find(TestCar.speed >= 150).all()) == extract_producers(\n        [c for c in CARS if c.speed >= 150]\n    )\n\n    # Get only slow cars\n    assert extract_producers(TestCar.find(TestCar.speed < 100).all()) == extract_producers(\n        [c for c in CARS if c.speed < 100]\n    )\n\n    # Get all cars which are fast based on description\n    assert extract_producers(TestCar.find(TestCar.description % \"fast\")) == [\"Audi\", \"BMW\"]\n\n    # Get all cars which are not marked as extensive by descriptions\n    assert extract_producers(\n        TestCar.find(~(TestCar.description % \"expensive\")).all()\n    ) == extract_producers([c for c in CARS if c.producer != \"Mercedes\"])\n\n    # Get a fast allrounder\n    assert extract_producers(\n        TestCar.find((TestCar.speed >= 110) & (TestCar.description % \"allrounder\"))\n    ) == [\"Honda\"]\n\n    # What's the slowest car\n    assert extract_producers([TestCar.find().sort_by(\"speed\").first()]) == [\"John Deere\"]\n\n    # What's the fastest car\n    assert extract_producers([TestCar.find().sort_by(\"-speed\").first()]) == [\"BMW\"]\n\n    for index_name in client.execute_command(\"FT._LIST\"):\n        client.ft(index_name).dropindex()\n\n\n@dfly_args({\"proactor_threads\": 4, \"dbfilename\": \"synonym-persistence\"})\nasync def test_synonym_persistence(df_server):\n    \"\"\"Test that synonyms are persisted across server restarts\"\"\"\n    client = aioredis.Redis(port=df_server.port)\n\n    # Create index and add documents\n    idx = client.ft(\"idx\")\n    await idx.create_index([TextField(\"txt\")], definition=IndexDefinition(prefix=[\"d:\"]))\n    await client.hset(\"d:1\", mapping={\"txt\": \"car\"})\n    await client.hset(\"d:2\", mapping={\"txt\": \"automobile\"})\n\n    # Add synonyms and verify they work\n    await client.execute_command(\"FT.SYNUPDATE\", \"idx\", \"grp\", \"car\", \"automobile\")\n    assert (await idx.search(Query(\"car\"))).total == 2\n\n    # Restart server\n    df_server.stop()\n    df_server.start()\n    client = aioredis.Redis(port=df_server.port)\n    await wait_available_async(client)\n\n    idx = client.ft(\"idx\")\n\n    # Verify synonyms still work after restart\n    assert (await idx.search(Query(\"car\"))).total == 2\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_ft_info_concurrent_create_drop(df_server):\n    \"\"\"\n    Test that FT.INFO doesn't crash when called concurrently with FT.CREATE/FT.DROPINDEX.\n    The bug was a DCHECK failure when some shards have the index while others don't.\n    \"\"\"\n    ITERATIONS = 500\n\n    async def create_drop_worker(port):\n        client = aioredis.Redis(port=port)\n        for _ in range(ITERATIONS):\n            try:\n                await client.execute_command(\n                    \"FT.CREATE\", \"idx\", \"ON\", \"HASH\", \"SCHEMA\", \"f\", \"TEXT\"\n                )\n            except Exception:\n                pass  # Index might already exist\n            try:\n                await client.execute_command(\"FT.DROPINDEX\", \"idx\")\n            except Exception:\n                pass  # Index might not exist\n        await client.close()\n\n    async def info_worker(port):\n        client = aioredis.Redis(port=port)\n        for _ in range(ITERATIONS):\n            try:\n                await client.execute_command(\"FT.INFO\", \"idx\")\n            except Exception:\n                pass  # Index might not exist - that's OK\n        await client.close()\n\n    # Run multiple workers concurrently with separate connections\n    port = df_server.port\n    tasks = [\n        create_drop_worker(port),\n        create_drop_worker(port),\n        create_drop_worker(port),\n        create_drop_worker(port),\n        create_drop_worker(port),\n        info_worker(port),\n        info_worker(port),\n        info_worker(port),\n        info_worker(port),\n        info_worker(port),\n    ]\n\n    # If there's a crash, this will fail\n    await asyncio.gather(*tasks)\n\n    # Verify server is still alive\n    client = aioredis.Redis(port=port)\n    assert await client.ping()\n    await client.close()\n\n\n@pytest.mark.parametrize(\n    \"master_threads,replica_threads\",\n    [\n        (4, 4),  # Same thread count\n        (4, 3),  # Master has more threads\n        (3, 4),  # Replica has more threads\n    ],\n)\nasync def test_replicate_all_index_types(df_factory, master_threads, replica_threads):\n    \"\"\"\n    Test that all index types (text, numeric, tag, geo, and vector) can be replicated\n    via full sync rebuild on the replica side. Uses 10000 elements for stress testing.\n    Tests with different thread counts between master and replica to ensure proper\n    shard handling during replication.\n    \"\"\"\n    from .instance import DflyInstanceFactory\n\n    master = df_factory.create(proactor_threads=master_threads)\n    # logbuflevel=-1 forces glog to flush every log line immediately, so INFO messages\n    # are visible in the log file when we read it (before the process exits).\n    replica = df_factory.create(proactor_threads=replica_threads, logbuflevel=-1)\n\n    df_factory.start_all([master, replica])\n\n    c_master = master.client()\n    c_replica = replica.client()\n\n    # Create an index with all field types on master\n    await c_master.execute_command(\n        \"FT.CREATE\",\n        \"all_types_idx\",\n        \"ON\",\n        \"HASH\",\n        \"PREFIX\",\n        \"1\",\n        \"item:\",\n        \"SCHEMA\",\n        \"name\",\n        \"TEXT\",\n        \"price\",\n        \"NUMERIC\",\n        \"SORTABLE\",\n        \"category\",\n        \"TAG\",\n        \"location\",\n        \"GEO\",\n        \"embedding\",\n        \"VECTOR\",\n        \"HNSW\",\n        \"6\",\n        \"TYPE\",\n        \"FLOAT32\",\n        \"DIM\",\n        \"2\",\n        \"DISTANCE_METRIC\",\n        \"L2\",\n    )\n\n    # Insert 10000 test documents\n    NUM_DOCS = 10000\n    pipe = c_master.pipeline(transaction=False)\n    for i in range(NUM_DOCS):\n        lat = 37.0 + (i % 100) * 0.01  # Varying latitudes\n        lon = -122.0 + (i // 100) * 0.01  # Varying longitudes\n        category = \"electronics\" if i % 3 == 0 else (\"clothing\" if i % 3 == 1 else \"food\")\n        embedding = np.array([float(i % 100), float(i // 100)], dtype=np.float32).tobytes()\n        pipe.hset(\n            f\"item:{i}\",\n            mapping={\n                \"name\": f\"Product {i}\",\n                \"price\": i,\n                \"category\": category,\n                \"location\": f\"{lon},{lat}\",\n                \"embedding\": embedding,\n            },\n        )\n        # Execute in batches to avoid memory issues\n        if i % 1000 == 999:\n            await pipe.execute()\n            pipe = c_master.pipeline(transaction=False)\n    await pipe.execute()\n\n    # Verify searches work on master\n    master_idx = c_master.ft(\"all_types_idx\")\n\n    # Text search\n    text_result = await master_idx.search(\"Product 100\")\n    assert text_result.total >= 1\n\n    # Numeric search\n    numeric_result = await master_idx.search(\"@price:[1000 2000]\")\n    assert numeric_result.total == 1001  # prices 1000-2000\n\n    # Tag search - every 3rd item is electronics (0, 3, 6, ...)\n    tag_result = await master_idx.search(Query(\"@category:{electronics}\").paging(0, 0))\n    expected_electronics = (NUM_DOCS + 2) // 3  # ceil(10000/3)\n    assert tag_result.total == expected_electronics\n\n    # Geo search - search around (-122.0, 37.0) with 10km radius\n    geo_result = await master_idx.search(\"@location:[-122.0 37.0 10 km]\")\n    assert geo_result.total > 0\n\n    # Vector search (KNN)\n    query_vec = np.array([50.0, 50.0], dtype=np.float32).tobytes()\n    knn_result = await c_master.execute_command(\n        \"FT.SEARCH\",\n        \"all_types_idx\",\n        \"*=>[KNN 10 @embedding $vec]\",\n        \"PARAMS\",\n        \"2\",\n        \"vec\",\n        query_vec,\n    )\n    assert knn_result[0] == 10  # Exactly 10 results for KNN 10\n\n    # Start replication\n    await c_replica.execute_command(\"REPLICAOF\", \"localhost\", master.port)\n    await wait_available_async(c_replica)\n\n    # Verify index exists on replica\n    indices = await c_replica.execute_command(\"FT._LIST\")\n    assert b\"all_types_idx\" in indices or \"all_types_idx\" in indices\n\n    replica_idx = c_replica.ft(\"all_types_idx\")\n\n    # Verify all search types work on replica\n\n    # Text search\n    replica_text = await replica_idx.search(\"Product 100\")\n    assert replica_text.total >= 1\n\n    # Numeric search\n    replica_numeric = await replica_idx.search(\"@price:[1000 2000]\")\n    assert replica_numeric.total == 1001\n\n    # Tag search\n    replica_tag = await replica_idx.search(Query(\"@category:{electronics}\").paging(0, 0))\n    assert replica_tag.total == expected_electronics\n\n    # Geo search\n    replica_geo = await replica_idx.search(\"@location:[-122.0 37.0 10 km]\")\n    assert replica_geo.total == geo_result.total\n\n    # Vector search (KNN) - verify same results as master\n    replica_knn = await c_replica.execute_command(\n        \"FT.SEARCH\",\n        \"all_types_idx\",\n        \"*=>[KNN 10 @embedding $vec]\",\n        \"PARAMS\",\n        \"2\",\n        \"vec\",\n        query_vec,\n    )\n    assert replica_knn[0] == 10\n\n    # Extract and compare document keys from KNN results (sorted because order may vary\n    # slightly due to floating-point distance ties).\n    # Format: [count, key1, fields1, key2, fields2, ...]\n    master_knn_keys = sorted([knn_result[i] for i in range(1, len(knn_result), 2)])\n    replica_knn_keys = sorted([replica_knn[i] for i in range(1, len(replica_knn), 2)])\n    assert master_knn_keys == replica_knn_keys, (\n        f\"KNN results differ between master and replica: \"\n        f\"master={master_knn_keys}, replica={replica_knn_keys}\"\n    )\n\n    # Verify the HNSW index was actually restored from the serialized graph (not rebuilt\n    # from scratch). Check replica's INFO log for the restoration message.\n    info_logs = [f for f in replica.log_files if \"INFO\" in f]\n    assert info_logs, \"Could not find replica INFO log file\"\n    with open(info_logs[0], \"r\") as f:\n        log_content = f.read()\n    if master_threads == replica_threads:\n        assert (\n            \"Restored HNSW index\" in log_content\n        ), \"Expected HNSW index to be restored from serialized graph (same shard count)\"\n    else:\n        assert (\n            \"global_ids remapped\" in log_content\n        ), \"Expected HNSW index to be restored with global_id remapping (different shard count)\"\n    rebuild_lines = [\n        l.strip()\n        for l in log_content.splitlines()\n        if \"Will rebuild from scratch\" in l and \"HNSW\" in l\n    ]\n    assert (\n        not rebuild_lines\n    ), \"HNSW index fell back to rebuild from scratch unexpectedly:\\n\" + \"\\n\".join(rebuild_lines)\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_vector_search_with_geo_and_tags(async_client: aioredis.Redis):\n    \"\"\"\n    Test combining vector search (KNN) with geo radius filter and category tags.\n    This tests complex queries that use multiple index types together with 10000 elements.\n    \"\"\"\n    idx = async_client.ft(\"combined_idx\")\n\n    # Create index with vector, geo, and tag fields\n    await idx.create_index(\n        [\n            TextField(\"name\"),\n            TagField(\"category\"),\n            GeoField(\"location\"),\n            VectorField(\n                \"embedding\",\n                algorithm=\"HNSW\",\n                attributes={\n                    \"TYPE\": \"FLOAT32\",\n                    \"DIM\": 3,\n                    \"DISTANCE_METRIC\": \"L2\",\n                    \"INITIAL_CAP\": 10000,\n                },\n            ),\n        ],\n        definition=IndexDefinition(index_type=IndexType.HASH, prefix=[\"place:\"]),\n    )\n\n    # Insert 10000 places with varying locations and categories\n    NUM_PLACES = 10000\n    categories = [\"restaurant\", \"cafe\", \"bar\", \"shop\", \"hotel\"]\n\n    pipe = async_client.pipeline(transaction=False)\n    for i in range(NUM_PLACES):\n        # Distribute locations across a grid\n        lat = 37.0 + (i % 100) * 0.01  # 100 different latitudes\n        lon = -122.5 + (i // 100) * 0.01  # 100 different longitudes\n        category = categories[i % len(categories)]\n        # Create embeddings that form clusters based on category\n        cat_offset = (i % len(categories)) * 10\n        embedding = np.array(\n            [float(i % 100) + cat_offset, float(i // 100), float(i % 10)], dtype=np.float32\n        )\n        pipe.hset(\n            f\"place:{i}\",\n            mapping={\n                \"name\": f\"Place {i}\",\n                \"category\": category,\n                \"location\": f\"{lon},{lat}\",\n                \"embedding\": embedding.tobytes(),\n            },\n        )\n        # Execute in batches\n        if i % 1000 == 999:\n            await pipe.execute()\n            pipe = async_client.pipeline(transaction=False)\n    await pipe.execute()\n\n    # Test 1: Vector search only - find places with embeddings closest to a point\n    query_vec = np.array([50.0, 50.0, 5.0], dtype=np.float32).tobytes()\n    result = await async_client.execute_command(\n        \"FT.SEARCH\",\n        \"combined_idx\",\n        \"*=>[KNN 10 @embedding $vec]\",\n        \"PARAMS\",\n        \"2\",\n        \"vec\",\n        query_vec,\n        \"RETURN\",\n        \"1\",\n        \"name\",\n    )\n    assert result[0] == 10\n\n    # Test 2: Vector search filtered by tag - only restaurants (every 5th item starting from 0)\n    result = await async_client.execute_command(\n        \"FT.SEARCH\",\n        \"combined_idx\",\n        \"@category:{restaurant}=>[KNN 10 @embedding $vec]\",\n        \"PARAMS\",\n        \"2\",\n        \"vec\",\n        query_vec,\n        \"RETURN\",\n        \"2\",\n        \"name\",\n        \"category\",\n    )\n    assert result[0] == 10\n    # Verify all results are restaurants\n    result_str = str(result)\n    for cat in [\"cafe\", \"bar\", \"shop\", \"hotel\"]:\n        # The category field should not contain other categories\n        assert (\n            f\"'category', '{cat}'\" not in result_str and f\"b'category', b'{cat}'\" not in result_str\n        )\n\n    # COMMENTED OUT: Test 3 - Triggers DCHECK failure due to unsorted geo results\n    # See: src/core/search/indices.cc:622 - GeoIndex::RadiusSearch doesn't sort results\n    # This causes DCHECK failure at src/core/search/search.cc:402 when combining filters\n    # TODO: Uncomment after fixing GeoIndex::RadiusSearch to sort results\n    #\n    # # Test 3: Vector search filtered by geo - only places near center (within 5km)\n    # result = await async_client.execute_command(\n    #     \"FT.SEARCH\",\n    #     \"combined_idx\",\n    #     \"@location:[-122.0 37.5 5 km]=>[KNN 20 @embedding $vec]\",\n    #     \"PARAMS\",\n    #     \"2\",\n    #     \"vec\",\n    #     query_vec,\n    #     \"RETURN\",\n    #     \"2\",\n    #     \"name\",\n    #     \"location\",\n    # )\n    # # Should find places within the geo radius\n    # assert result[0] >= 1\n    # assert result[0] <= 20\n\n    # COMMENTED OUT: Test 4 - Triggers DCHECK failure due to unsorted geo results\n    # See: src/core/search/indices.cc:622 - GeoIndex::RadiusSearch doesn't sort results\n    # This causes DCHECK failure at src/core/search/search.cc:402 when combining geo + tag filters\n    # TODO: Uncomment after fixing GeoIndex::RadiusSearch to sort results\n    #\n    # # Test 4: Combined - vector search with both geo AND tag filters\n    # # Find cafes (category index 1) near a specific location\n    # query_vec_cafe = np.array([60.0, 50.0, 5.0], dtype=np.float32).tobytes()  # Near cafe cluster\n    # result = await async_client.execute_command(\n    #     \"FT.SEARCH\",\n    #     \"combined_idx\",\n    #     \"@category:{cafe} @location:[-122.0 37.5 20 km]=>[KNN 10 @embedding $vec]\",\n    #     \"PARAMS\",\n    #     \"2\",\n    #     \"vec\",\n    #     query_vec_cafe,\n    #     \"RETURN\",\n    #     \"2\",\n    #     \"name\",\n    #     \"category\",\n    # )\n    # # Should find cafes within the geo and vector constraints\n    # assert result[0] >= 1\n    # result_str = str(result)\n    # # Should not contain other categories\n    # assert \"restaurant\" not in result_str.lower() or \"category\" not in result_str\n\n    # COMMENTED OUT: Test 5 - Triggers DCHECK failure due to unsorted geo results\n    # See: src/core/search/indices.cc:622 - GeoIndex::RadiusSearch doesn't sort results\n    # This causes DCHECK failure at src/core/search/search.cc:402 when combining geo + tag filters\n    # TODO: Uncomment after fixing GeoIndex::RadiusSearch to sort results\n    #\n    # # Test 5: Tag search with geo filter (no vector)\n    # result = await idx.search(\n    #     Query(\"@category:{restaurant} @location:[-122.0 37.5 50 km]\").paging(0, 0)\n    # )\n    # # Should find restaurants within 50km radius\n    # assert result.total >= 1\n\n    # Test 6: Count documents per category\n    for cat in categories:\n        result = await idx.search(Query(f\"@category:{{{cat}}}\").paging(0, 0))\n        expected_count = NUM_PLACES // len(categories)\n        assert (\n            result.total == expected_count\n        ), f\"Expected {expected_count} {cat}s, got {result.total}\"\n\n    await idx.dropindex()\n"
  },
  {
    "path": "tests/dragonfly/seeder/README.md",
    "content": "## Seeder library\n\nPlease use the testing frameworks factories to obtain proper seeder instances!\n\n### 1. Static seeder\n\nThe DebugPopulateSeeder is a thin wrapper around `DEBUG POPULATE` with a little bit of fuzziness for collection sizes. It should be preffered for generating \"static\" data for snapshotting, memory consumption tests, etc.\n\n```python\ns = DebugPopulateSeeder(key_target=10_000)\nawait s.run(client) # Creates around 10k keys\n```\n\n### 2. Checking consistency\n\nUse `SeederBase.capture()` (accessed via `DebugPopulateSeeder` or `Seeder`) to calculate a \"state hashes\" based on all the data inside an instance. Equal data produces equal hashes (equal hashes don't guarantee equal data but what are the odds...).\n\n```python\n# Fill master with ~10k keys\ns = DebugPopulateSeeder(key_target=10_000)\nawait seeder.run(master)\n\n# \"Replicate\" or other operations\nreplicate(master, replica)\n\n# Ensure master and replica have same state hashes\nmaster_hashes, replica_hashes = await asyncio.gather(\n    DebugPopulateSeeder.capture(master), # note it's a static method\n    DebugPopulateSeeder.capture(replica)\n)\nassert master_hashes == replica_hashes\n```\n\n### 3. Dynamic seeder\n\nContrary to the static seeder, the normal seeder issues a more complicated mix of commands, supports deleting keys and sending modification traffic.\nThe seeder tries to maintain a specific number of keys, quickly filling or emptying the instance to reach the target. Once reached, it will issue a balanced load of all kinds of operations.\n\n```python\n# Configure how many keys we want\ns = Seeder(key_target=10_000)\n\n# Fill instance with keys until it's 10k +- 1%\n# Will create many new keys with data and reach equilibrium\nawait s.run(client, target_deviation=0.01)\nassert abs(client.dbsize() - 10_000) <= 100\n\n# Run 5k operations, balanced mix of create/delete/modify\nawait s.run(client, target_ops=5000)\n\n# Now we want only 500 keys, issue many deletes\ns.change_key_target(500)\nawait s.run(client, target_deviation=0.01)\n```\n\n### 4. Working with load\n\nA seeders `run(client)` can be called without any target. It can only be stopped with\n\n```python\n# Fill instance with keys\ns = Seeder()\nawait seeder.run(client, target_deviation=0.01)\n\n# Start seeder without target\n# Because the instance reached its key target, the seeder\n# will issue a balanced mix of modifications/additions/deletions\nseeding_task = asyncio.create_task(s.run(client))\n\n# Do operations under fuzzy load\nsave(client)\n\nawait s.stop(client) # request stop, no immediate effect\nawait seeding_task # wait for actual stop and cleanup\n```\n"
  },
  {
    "path": "tests/dragonfly/seeder/__init__.py",
    "content": "import asyncio\nimport random\nimport logging\nimport re\nimport typing\nimport math\nimport redis\nimport redis.asyncio as aioredis\nfrom dataclasses import dataclass\nimport time\nimport sys\n\nimport numpy as np\n\ntry:\n    from importlib import resources as impresources\nexcept ImportError:\n    # CI runs on python < 3.8\n    import importlib_resources as impresources\n\n\nclass SeederBase:\n    UID_COUNTER = 1  # multiple generators should not conflict on keys\n    CACHED_SCRIPTS = {}\n    DEFAULT_TYPES = [\"STRING\", \"LIST\", \"SET\", \"HASH\", \"ZSET\", \"JSON\", \"STREAM\"]\n\n    def __init__(self, types: typing.Optional[typing.List[str]] = None, seed=None):\n        self.uid = SeederBase.UID_COUNTER\n        SeederBase.UID_COUNTER += 1\n        self.types = types if types is not None else SeederBase.DEFAULT_TYPES\n\n        self.seed = random.randrange(sys.maxsize)\n        if seed is not None:\n            self.seed = seed\n\n        random.seed(int(self.seed))\n        logging.debug(f\"Random seed: {self.seed}, check: {random.randrange(100)}\")\n\n    @classmethod\n    async def capture(\n        clz, client: aioredis.Redis, types: typing.Optional[typing.List[str]] = None\n    ) -> typing.Tuple[int]:\n        \"\"\"Generate hash capture for all data stored in instance pointed by client\"\"\"\n\n        sha = await client.script_load(clz._load_script(\"hash\"))\n        types_to_capture = types if types is not None else clz.DEFAULT_TYPES\n        return tuple(\n            await asyncio.gather(\n                *(clz._run_capture(client, sha, data_type) for data_type in types_to_capture)\n            )\n        )\n\n    @staticmethod\n    async def _run_capture(client, sha, data_type):\n        s = time.time()\n        res = await client.evalsha(sha, 0, data_type)\n        logging.debug(f\"hash capture of {data_type} took {time.time() - s}\")\n        return res\n\n    @staticmethod\n    def _read_file(fname):\n        try:\n            script_file = impresources.files(__package__) / fname\n            with script_file.open(\"rt\") as f:\n                return f.read()\n        except AttributeError:\n            return impresources.read_text(__package__, fname)\n\n    @classmethod\n    def _load_script(clz, fname):\n        if fname in clz.CACHED_SCRIPTS:\n            return clz.CACHED_SCRIPTS[fname]\n\n        script = clz._read_file(f\"script-{fname}.lua\")\n        requested = re.findall(r\"-- import:(.*?) --\", script)\n        for request in requested:\n            lib = clz._read_file(f\"script-{request}.lua\")\n            script = script.replace(f\"-- import:{request} --\", lib)\n\n        clz.CACHED_SCRIPTS[fname] = script\n        return script\n\n\nclass DebugPopulateSeeder(SeederBase):\n    \"\"\"Wrapper around DEBUG POPULATE with fuzzy key sizes and a balanced type mix\"\"\"\n\n    def __init__(\n        self,\n        key_target=10_000,\n        data_size=100,\n        variance=5,\n        samples=10,\n        collection_size=None,\n        types: typing.Optional[typing.List[str]] = None,\n        seed=None,\n    ):\n        SeederBase.__init__(self, types, seed)\n        self.key_target = key_target\n        self.data_size = data_size\n        self.variance = variance\n        self.samples = samples\n\n        if collection_size is None:\n            self.collection_size = data_size ** (1 / 3)\n        else:\n            self.collection_size = collection_size\n\n    async def run(self, client: aioredis.Redis):\n        \"\"\"Run with specified options until key_target is met\"\"\"\n        samples = [\n            (dtype, f\"k-s{self.uid}u{i}-\") for i, dtype in enumerate(self.types * self.samples)\n        ]\n\n        # Handle samples in chuncks of 24 to not overload client pool and instance\n        chunk_size = 24\n        for i in range(0, len(samples), chunk_size):\n            await asyncio.gather(\n                *(\n                    self._run_unit(client, dtype, prefix)\n                    for dtype, prefix in samples[i : i + chunk_size]\n                )\n            )\n\n    async def _run_unit(self, client: aioredis.Redis, dtype: str, prefix: str):\n        key_target = self.key_target // (self.samples * len(self.types))\n        if dtype == \"STRING\":\n            dsize = random.uniform(self.data_size / self.variance, self.data_size * self.variance)\n            csize = 1\n        else:\n            csize = self.collection_size\n            csize = math.ceil(random.uniform(csize / self.variance, csize * self.variance))\n            dsize = self.data_size // csize\n\n        args = [\"DEBUG\", \"POPULATE\", key_target, prefix, math.ceil(dsize)]\n        args += [\"RAND\", \"TYPE\", dtype, \"ELEMENTS\", csize]\n        return await client.execute_command(*args)\n\n\nclass Seeder(SeederBase):\n    @dataclass\n    class Unit:\n        prefix: str\n        type: str\n        counter: int\n        stop_key: str\n\n    units: typing.List[Unit]\n\n    def __init__(\n        self,\n        units=10,\n        key_target=10_000,\n        data_size=100,\n        collection_size=None,\n        types: typing.Optional[typing.List[str]] = None,\n        huge_value_target=5,\n        huge_value_size=100000,\n        seed=None,\n        huge_value_add_only=False,\n    ):\n        SeederBase.__init__(self, types, seed)\n        self.key_target = key_target\n        self.data_size = data_size\n        if collection_size is None:\n            self.collection_size = math.ceil(data_size ** (1 / 3))\n        else:\n            self.collection_size = collection_size\n\n        self.huge_value_add_only = huge_value_add_only\n        self.huge_value_target = huge_value_target\n        self.huge_value_size = huge_value_size\n\n        self.units = [\n            Seeder.Unit(\n                prefix=f\"k-s{self.uid}u{i}-\",\n                type=self.types[i % len(self.types)],\n                counter=0,\n                stop_key=f\"_s{self.uid}u{i}-stop\",\n            )\n            for i in range(units)\n        ]\n\n    async def run(self, client: aioredis.Redis, target_ops=None, target_deviation=None):\n        \"\"\"Run seeder until one of the targets or until stopped if none are set\"\"\"\n\n        using_stopkey = target_ops is None and target_deviation is None\n        args = [\n            self.key_target / len(self.units),\n            target_ops if target_ops is not None else 0,\n            target_deviation if target_deviation is not None else -1,\n            self.data_size,\n            self.collection_size,\n            int(self.huge_value_add_only),\n            self.huge_value_target / len(self.units),\n            self.huge_value_size,\n            self.seed,\n        ]\n\n        sha = await client.script_load(Seeder._load_script(\"generate\"))\n        for unit in self.units:\n            # Must be serial, otherwise cluster clients throws an exception\n            await self._run_unit(client, sha, unit, using_stopkey, args)\n\n    async def stop(self, client: aioredis.Redis):\n        \"\"\"Request seeder seeder if it's running without a target, future returned from start() must still be awaited\"\"\"\n\n        for unit in self.units:\n            # Must be serial, otherwise cluster clients throws an exception\n            await client.set(unit.stop_key, \"X\")\n\n    def change_key_target(self, target: int):\n        \"\"\"Change key target, applied only on succeeding runs\"\"\"\n\n        self.key_target = max(target, 100)  # math breaks with low values\n\n    @staticmethod\n    async def _run_unit(client: aioredis.Redis, sha: str, unit: Unit, using_stopkey, args):\n        await client.delete(unit.stop_key)\n\n        s = time.time()\n\n        args = [\n            unit.prefix,\n            unit.type,\n            unit.counter,\n            unit.stop_key if using_stopkey else \"\",\n        ] + args\n\n        result = await client.evalsha(sha, 0, *args)\n        result = result.split()\n        unit.counter = int(result[0])\n        huge_entries = int(result[1])\n\n        msg = f\"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}\"\n        if huge_entries > 0:\n            msg = f\"{msg}. Total huge entries {huge_entries} added.\"\n\n        logging.debug(msg)\n\n\nclass HnswSearchSeeder:\n\n    def __init__(\n        self,\n        index_name=\"hnsw_idx\",\n        prefix=\"doc:\",\n        num_dims=4,\n        num_initial_docs=200,\n        seed=42,\n    ):\n        self.index_name = index_name\n        self.prefix = prefix\n        self.num_dims = num_dims\n        self.num_initial_docs = num_initial_docs\n        self.seed = seed\n\n        self._doc_counter = 0\n        self._stop_event = asyncio.Event()\n\n    def _make_embedding(self):\n        return np.random.uniform(-10, 10, self.num_dims).astype(np.float32)\n\n    async def create_index(self, client: aioredis.Redis):\n        await client.execute_command(\n            \"FT.CREATE\",\n            self.index_name,\n            \"ON\",\n            \"HASH\",\n            \"PREFIX\",\n            \"1\",\n            self.prefix,\n            \"SCHEMA\",\n            \"title\",\n            \"TEXT\",\n            \"doc_id\",\n            \"TAG\",\n            \"embedding\",\n            \"VECTOR\",\n            \"HNSW\",\n            \"6\",\n            \"TYPE\",\n            \"FLOAT32\",\n            \"DIM\",\n            str(self.num_dims),\n            \"DISTANCE_METRIC\",\n            \"L2\",\n        )\n\n    async def seed_initial_docs(self, client: aioredis.Redis):\n        pipe = client.pipeline(transaction=False)\n        for i in range(self.num_initial_docs):\n            emb = self._make_embedding()\n            pipe.hset(\n                f\"{self.prefix}{i}\",\n                mapping={\n                    \"title\": f\"Product {i}\",\n                    \"doc_id\": str(i),\n                    \"embedding\": emb.tobytes(),\n                },\n            )\n        await pipe.execute()\n        self._doc_counter = self.num_initial_docs\n\n    def stop(self):\n        self._stop_event.set()\n\n    async def _search_knn(self, client, query_vec, k=5):\n        \"\"\"Run a KNN search and return (total_count, set_of_doc_ids).\"\"\"\n        r = await client.execute_command(\n            \"FT.SEARCH\",\n            self.index_name,\n            \"*=>[KNN {k} @embedding $vec]\".format(k=k),\n            \"PARAMS\",\n            \"2\",\n            \"vec\",\n            query_vec,\n            \"LIMIT\",\n            \"0\",\n            str(k),\n        )\n        doc_ids = set(r[i] for i in range(1, len(r), 2))\n        return r[0], doc_ids\n\n    async def _search_knn_filtered(self, client, query_vec, doc_id, k=5):\n        \"\"\"Run a filtered KNN search for a specific document by its doc_id TAG.\n\n        With a TAG filter, Dragonfly bypasses KNN approximate search and just\n        checks presence in the index, making this a reliable existence check.\n        \"\"\"\n        doc_key = doc_id if isinstance(doc_id, str) else doc_id.decode()\n        doc_num = doc_key[len(self.prefix) :] if doc_key.startswith(self.prefix) else doc_key\n        r = await client.execute_command(\n            \"FT.SEARCH\",\n            self.index_name,\n            \"@doc_id:{{{id}}}=>[KNN {k} @embedding $vec]\".format(id=doc_num, k=k),\n            \"PARAMS\",\n            \"2\",\n            \"vec\",\n            query_vec,\n            \"LIMIT\",\n            \"0\",\n            str(k),\n        )\n        return r[0] > 0\n\n    async def verify(self, *clients: aioredis.Redis, num_queries=10):\n        if len(clients) < 2:\n            raise ValueError(\"Need at least two clients to compare\")\n\n        sizes = [await c.dbsize() for c in clients]\n        for i in range(1, len(sizes)):\n            assert (\n                sizes[0] == sizes[i]\n            ), f\"dbsize mismatch: client[0]={sizes[0]} vs client[{i}]={sizes[i]}\"\n\n        # HNSW is approximate, so KNN results between master and replica may differ.\n        # For any document that appears on one side but not the other, we run a\n        # filtered KNN search using the doc_id TAG. With a filter, Dragonfly skips\n        # approximate KNN and just checks index presence, so this reliably verifies\n        # that the replica has indexed all documents.\n        k = 5\n\n        for q in range(num_queries):\n            query_vec = self._make_embedding().tobytes()\n            results = []\n            for c in clients:\n                total, doc_ids = await self._search_knn(c, query_vec, k)\n                results.append((total, doc_ids))\n\n            assert results[0][0] > 0, \"KNN search returned no results on master\"\n\n            for i in range(1, len(results)):\n                master_ids = results[0][1]\n                replica_ids = results[i][1]\n\n                # Check documents found on master but not on replica\n                missing_on_replica = master_ids - replica_ids\n                truly_missing = []\n                for doc_id in missing_on_replica:\n                    if not await self._search_knn_filtered(clients[i], query_vec, doc_id, 1):\n                        truly_missing.append(doc_id)\n\n                assert not truly_missing, (\n                    f\"Query {q}: documents {truly_missing} found on master but \"\n                    f\"not indexed on replica (client[{i}]). \"\n                    f\"Master results: {sorted(master_ids)}, \"\n                    f\"Replica results: {sorted(replica_ids)}\"\n                )\n\n                # Check documents found on replica but not on master\n                missing_on_master = replica_ids - master_ids\n                truly_missing = []\n                for doc_id in missing_on_master:\n                    if not await self._search_knn_filtered(clients[0], query_vec, doc_id, k):\n                        truly_missing.append(doc_id)\n\n                assert not truly_missing, (\n                    f\"Query {q}: documents {truly_missing} found on replica \"\n                    f\"(client[{i}]) but not indexed on master. \"\n                    f\"Master results: {sorted(master_ids)}, \"\n                    f\"Replica results: {sorted(replica_ids)}\"\n                )\n\n    async def run_traffic(self, client: aioredis.Redis, sleep_interval=0.01):\n        self._stop_event.clear()\n        while not self._stop_event.is_set():\n            op = random.choice([\"insert\", \"update\", \"delete\"])\n            try:\n                if op == \"insert\":\n                    emb = self._make_embedding()\n                    await client.hset(\n                        f\"{self.prefix}{self._doc_counter}\",\n                        mapping={\n                            \"title\": f\"Product {self._doc_counter}\",\n                            \"doc_id\": str(self._doc_counter),\n                            \"embedding\": emb.tobytes(),\n                        },\n                    )\n                    self._doc_counter += 1\n                elif op == \"update\":\n                    key_id = random.randint(0, max(self._doc_counter - 1, 0))\n                    key = f\"{self.prefix}{key_id}\"\n                    if not await client.exists(key):\n                        continue\n                    emb = self._make_embedding()\n                    await client.hset(key, mapping={\"embedding\": emb.tobytes()})\n                elif op == \"delete\":\n                    key_id = random.randint(0, max(self._doc_counter - 1, 0))\n                    await client.delete(f\"{self.prefix}{key_id}\")\n            except (redis.exceptions.ConnectionError, redis.exceptions.ResponseError):\n                await asyncio.sleep(sleep_interval)\n            await asyncio.sleep(sleep_interval)\n\n    async def run_search_queries(self, client: aioredis.Redis, sleep_interval=0.05):\n        while not self._stop_event.is_set():\n            try:\n                query_vec = self._make_embedding().tobytes()\n                await client.execute_command(\n                    \"FT.SEARCH\",\n                    self.index_name,\n                    \"*=>[KNN 5 @embedding $vec]\",\n                    \"PARAMS\",\n                    \"2\",\n                    \"vec\",\n                    query_vec,\n                    \"LIMIT\",\n                    \"0\",\n                    \"5\",\n                )\n            except (redis.exceptions.ConnectionError, redis.exceptions.ResponseError):\n                pass\n            await asyncio.sleep(sleep_interval)\n"
  },
  {
    "path": "tests/dragonfly/seeder/script-generate.lua",
    "content": "--!df flags=disable-atomicity\n\n--[[\nScript for quickly generating various data\n]] --\n-- import:genlib --\n-- import:utillib --\n\n-- inputs: unit identifiers\nlocal prefix = ARGV[1]\nlocal type = ARGV[2]\nlocal key_counter = tonumber(ARGV[3])\nlocal stop_key = ARGV[4]\n-- inputs: task specific\nlocal key_target = tonumber(ARGV[5])\nlocal total_ops = tonumber(ARGV[6])\nlocal min_dev = tonumber(ARGV[7])\nlocal data_size = tonumber(ARGV[8])\nlocal collection_size = tonumber(ARGV[9])\nlocal huge_value_keys_add_only = tonumber(ARGV[10])\n-- Probability of each key in key_target to be a big value\nlocal huge_value_target = tonumber(ARGV[11])\nlocal huge_value_size = tonumber(ARGV[12])\n-- Seed\nlocal seed = tonumber(ARGV[13])\nmath.randomseed(seed)\n\n-- collect all keys belonging to this script\n-- assumes exclusive ownership\nlocal keys = LU_collect_keys(prefix, type)\n\nLG_funcs.init(data_size, collection_size, huge_value_target, huge_value_size)\nlocal addfunc = LG_funcs['add_' .. string.lower(type)]\nlocal modfunc = LG_funcs['mod_' .. string.lower(type)]\nlocal huge_entries = LG_funcs[\"get_huge_entries\"]\nlocal is_huge_entry = LG_funcs[\"is_huge_entry\"]\n-- Keep track of total number of keys including huge value keys. Intialize\n-- to number of keys that currently exists.\nlocal total_keys = #keys\n\nlocal function action_add()\n    local key = prefix .. tostring(key_counter)\n    local op_type = string.lower(type)\n    local is_next_huge_entry = false\n    key_counter = key_counter + 1\n    total_keys = total_keys + 1\n\n    if huge_value_keys_add_only == 1 then\n        is_next_huge_entry = is_huge_entry(op_type)\n    end\n\n    table.insert(keys, key)\n    addfunc(key, keys)\n\n\n   -- If we allow adding only huge value keys we will now remove it from\n    -- table so it wouldn't be selected for any action_del / action_mod\n    if is_next_huge_entry then\n        table.remove(keys)\n    end\nend\n\nlocal function action_mod()\n    local key = keys[math.random(#keys)]\n    modfunc(key, keys)\nend\n\nlocal function action_del()\n    total_keys = total_keys - 1\n    local key_idx = math.random(#keys)\n    keys[key_idx], keys[#keys] = keys[#keys], keys[key_idx]\n    local key = table.remove(keys)\n    redis.acall('DEL', key)\nend\n\n-- set equilibrium point as key target, see intensity calculations below\nlocal real_target = key_target\nkey_target = key_target / 0.956\n\n-- accumulative probabilities: [add, add + delete, modify = 1-( add + delete) ]\nlocal p_add = 0\nlocal p_del = 0\n\nlocal counter = 0\nwhile true do\n    counter = counter + 1\n\n    -- break if we reached target ops\n    if total_ops > 0 and counter > total_ops then\n        break\n    end\n\n    -- break if we reached our target deviation\n    if min_dev > 0 and math.abs(total_keys - real_target) / real_target < min_dev then\n        break\n    end\n\n    -- break if stop key was set (every 100 ops to not slow down)\n    if stop_key ~= '' and counter % 100 == 0 and redis.call('EXISTS', stop_key) then\n        break\n    end\n\n    -- fast path, if we have less than half of the target, always grow\n    if total_keys * 2 < key_target then\n        action_add()\n        goto continue\n    end\n\n    -- update probability only every 10 iterations\n    if counter % 10 == 0 then\n        -- calculate intensity (not normalized probabilities)\n        -- please see attached plots in PR to understand convergence\n        -- https://github.com/dragonflydb/dragonfly/pull/2556\n\n        -- the add intensity is monotonically decreasing with keycount growing,\n        -- the delete intensity is monotonically increasing with keycount growing,\n        -- the point where the intensities are equal is the equilibrium point,\n        -- based on the formulas it's ~0.956 * key_target\n        local i_add = math.max(0, 1 - (total_keys / key_target) ^ 16)\n        local i_del = (total_keys / key_target) ^ 16\n\n        -- we are only interested in large amounts of modification commands when we are in an\n        -- equilibrium, where there are no low intensities\n        local i_mod = math.max(0, 7 * math.min(i_add, i_del) ^ 3)\n\n        -- transform intensities to [0, 1] probability ranges\n        local sum = i_add + i_del + i_mod\n        p_add = i_add / sum\n        p_del = p_add + i_del / sum\n    end\n\n    -- generate random action\n    local p = math.random()\n    if p < p_add then\n        action_add()\n    elseif p < p_del then\n        action_del()\n    else\n        action_mod()\n    end\n\n    ::continue::\nend\n\n-- clear stop key\nif stop_key ~= '' then\n    redis.call('DEL', stop_key)\nend\n\nreturn tostring(key_counter) .. \" \" .. tostring(huge_entries())\n"
  },
  {
    "path": "tests/dragonfly/seeder/script-genlib.lua",
    "content": "local LG_funcs = {}\n\nfunction LG_funcs.init(dsize, csize, large_val_count, large_val_sz)\n    LG_funcs.dsize = dsize\n    LG_funcs.csize = csize\n    LG_funcs.esize = math.ceil(dsize / csize)\n    LG_funcs.huge_value_target = large_val_count\n    LG_funcs.huge_value_size = large_val_sz\nend\n\nlocal huge_entries = 0\n\n\nlocal function is_huge_entry()\n    if huge_entries >= LG_funcs.huge_value_target then\n        return false\n    else\n        huge_entries = huge_entries + 1\n        return true\n    end\nend\n\n\nlocal function randstr()\n    local str\n    local is_huge = is_huge_entry()\n    if is_huge then\n        str = dragonfly.randstr(LG_funcs.huge_value_size)\n    else\n        str = dragonfly.randstr(LG_funcs.esize)\n    end\n    return str\nend\n\nlocal function randstr_sequence()\n    local strs\n    local is_huge = is_huge_entry()\n    if is_huge then\n        strs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize)\n    else\n        strs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)\n    end\n    return strs\nend\n\n-- strings\n-- store blobs of random chars\n\nfunction LG_funcs.add_string(key)\n    redis.apcall('SET', key, dragonfly.randstr(LG_funcs.dsize))\nend\n\nfunction LG_funcs.mod_string(key)\n    -- APPEND and SETRANGE are the only modifying operations for strings,\n    -- issue APPEND rarely to not grow data too much\n    if math.random() < 0.05 then\n        redis.apcall('APPEND', key, '+')\n    else\n        local replacement = dragonfly.randstr(LG_funcs.dsize // 2)\n        redis.apcall('SETRANGE', key, math.random(0, LG_funcs.dsize // 2), replacement)\n    end\nend\n\n-- lists\n-- store list of random blobs of default container/element sizes\n\nfunction LG_funcs.add_list(key, keys)\n    redis.apcall('LPUSH', key, unpack(randstr_sequence()))\nend\n\nfunction LG_funcs.mod_list(key, keys)\n    -- equally likely pops and pushes, we rely on the list size being large enough\n    -- to \"highly likely\" not get emptied out by consequitve pops\n    local action = math.random(1, 4)\n    if action == 1 then\n        redis.apcall('RPOP', key)\n    elseif action == 2 then\n        redis.apcall('LPOP', key)\n    elseif action == 3 then\n      redis.apcall('LPUSH', key, randstr())\n    else\n      redis.apcall('RPUSH', key, randstr())\n    end\nend\n\n-- sets\n-- store sets of blobs of default container/element sizes\n\nfunction LG_funcs.add_set(key, keys)\n    if #keys > 100 and math.random() < 0.05 then\n        -- we assume that elements overlap with a very low proabiblity, so\n        -- SDIFF is expected to be equal to the origin set.\n        -- Repeating this operation too often can lead to two equal sets being chosen\n        local i1 = math.random(#keys)\n        local i2 = math.random(#keys)\n        while i1 == i2 do\n            i2 = math.random(#keys)\n        end\n        redis.apcall('SDIFFSTORE', key, keys[i1], keys[i2])\n    else\n        redis.apcall('SADD', key, unpack(randstr_sequence()))\n    end\nend\n\nfunction LG_funcs.mod_set(key, keys)\n     -- equally likely pops and additions\n    if math.random() < 0.5 then\n        redis.apcall('SPOP', key)\n    else\n        redis.apcall('SADD', key, randstr())\n    end\nend\n\n\n-- hashes\n-- store  {to_string(i): value for i in [1, csize]},\n-- where `value` is a random string for even indices and a number for odd indices\n\nfunction LG_funcs.add_hash(key, keys)\n    local blobs = randstr_sequence()\n    local limit = LG_funcs.csize\n\n    local htable = {}\n    for i = 1, limit do\n        htable[i * 2 - 1] = tostring(i)\n        htable[i * 2] = blobs[i]\n    end\n\n    redis.apcall('HSET', key, unpack(htable))\nend\n\nfunction LG_funcs.mod_hash(key, keys)\n    local idx = math.random(LG_funcs.csize)\n    redis.apcall('HSET', key, tostring(idx), randstr())\nend\n\n-- sorted sets\n\nfunction LG_funcs.add_zset(key, keys)\n    -- TODO: We don't support ZDIFFSTORE\n    local blobs = randstr_sequence()\n    local ztable = {}\n\n    local limit = LG_funcs.csize\n\n    for i = 1, limit do\n        ztable[i * 2 - 1] = tostring(i)\n        ztable[i * 2] = blobs[i]\n    end\n    redis.apcall('ZADD', key, unpack(ztable))\nend\n\nfunction LG_funcs.mod_zset(key, keys)\n    local action = math.random(1, 4)\n    if action <= 2 then\n        local size = LG_funcs.csize * 2\n        redis.apcall('ZADD', key, math.random(0, size), randstr())\n    elseif action == 3 then\n        redis.apcall('ZPOPMAX', key)\n    else\n        redis.apcall('ZPOPMIN', key)\n    end\nend\n\n-- json\n-- store single list of integers inside object\n\nfunction LG_funcs.add_json(key)\n    -- generate single list of counters\n    local seed = math.random(100)\n    local counters = {}\n    for i = 1, LG_funcs.csize do\n        counters[i] = ((i + seed) * 123) % 701\n    end\n    redis.apcall('JSON.SET', key, '$', cjson.encode({counters = counters}))\nend\n\nfunction LG_funcs.mod_json(key, dbsize)\n    local action = math.random(1, 4)\n    if action == 1 then\n        redis.apcall('JSON.ARRAPPEND', key, '$.counters', math.random(701))\n    elseif action == 2 then\n        redis.apcall('JSON.ARRPOP', key, '$.counters')\n    elseif action == 3 then\n        redis.apcall('JSON.NUMMULTBY', key, '$.counters[' .. math.random(LG_funcs.csize ) .. ']', 2)\n    else\n        redis.apcall('JSON.NUMINCRBY', key, '$.counters[' .. math.random(LG_funcs.csize ) .. ']', 1)\n    end\nend\n\n-- streams\n-- store sequences of timestamped events\n\nfunction LG_funcs.add_stream(key)\n    local entries = {}\n\n    local limit = LG_funcs.csize\n    local blobs = randstr_sequence()\n\n    for i = 1, limit do\n        table.insert(entries, tostring(i))\n        table.insert(entries, blobs[i])\n    end\n\n    redis.apcall('XADD', key, '*', unpack(entries))\nend\n\nfunction LG_funcs.mod_stream(key)\n    local action = math.random(1, 3)\n    if action <= 2 then\n        local size = LG_funcs.csize * 2\n        redis.apcall('XADD', key, '*', math.random(0, size), randstr())\n    else\n        local maxlen = math.random(0, 100)\n        redis.apcall('XTRIM', key, 'MAXLEN', '~', maxlen)\n    end\nend\n\nfunction LG_funcs.get_huge_entries()\n  return huge_entries\nend\n\n-- Check if next entry generate huge value keys\nfunction LG_funcs.is_huge_entry(type)\n    -- These types doesn't generate huge value\n    if type == \"string\" or type == \"json\" then\n        return false\n    else\n        return huge_entries < LG_funcs.huge_value_target\n    end\nend\n"
  },
  {
    "path": "tests/dragonfly/seeder/script-hash.lua",
    "content": "--!df flags=disable-atomicity\n--[[\nScript for quickly computing single 64bit hash for keys of types specified in ARGV[].\nKeys of every type are sorted lexicographically to ensure consistent order.\n]]--\n\n-- import:hashlib --\n-- import:utillib --\n\n-- inputs\nlocal type = ARGV[1]\n\nlocal OUT_HASH = 0\n\nlocal function process(type)\n    local keys = LU_collect_keys('', type)\n    local hfunc = LH_funcs[type]\n\n    -- sort to provide consistent order\n    table.sort(keys)\n\n    if type == 'string' then\n        -- batch with MGET to reduce per-key round trips (important for tiering)\n        local batch_size = 16\n        for i = 1, #keys, batch_size do\n            local batch = {}\n            for j = i, math.min(i + batch_size - 1, #keys) do\n                table.insert(batch, keys[j])\n            end\n            OUT_HASH = dragonfly.ihash(OUT_HASH, false, 'MGET', table.unpack(batch))\n        end\n    else\n        for _, key in ipairs(keys) do\n            -- hand hash over to callback\n            OUT_HASH = hfunc(key, OUT_HASH)\n        end\n    end\nend\n\nprocess(string.lower(type))\n\nreturn OUT_HASH\n"
  },
  {
    "path": "tests/dragonfly/seeder/script-hashlib.lua",
    "content": "local LH_funcs = {}\n\nfunction LH_funcs.string(key, hash)\n    -- add value to hash\n    return dragonfly.ihash(hash, false, 'GET', key)\nend\n\nfunction LH_funcs.list(key, hash)\n    -- add values to hash\n    return dragonfly.ihash(hash, false, 'LRANGE', key, 0, -1)\nend\n\nfunction LH_funcs.set(key, hash)\n    -- add values to hash, sort before to avoid ambiguity\n    return dragonfly.ihash(hash, true, 'SMEMBERS', key)\nend\n\nfunction LH_funcs.zset(key, hash)\n    -- add values to hash, ZRANGE returns always sorted values\n    return dragonfly.ihash(hash, false, 'ZRANGE', key, 0, -1, 'WITHSCORES')\nend\n\nfunction LH_funcs.hash(key, hash)\n    -- add values to hash, first convert to key-value pairs and sort\n    return dragonfly.ihash(hash, true, 'HGETALL', key)\nend\n\nfunction LH_funcs.json(key, hash)\n    -- add values to hash, note JSON.GET returns just a string\n    return dragonfly.ihash(hash, false, 'JSON.GET', key)\nend\n\nfunction LH_funcs.stream(key, hash)\n    return dragonfly.ihash(hash, false, 'XRANGE', key, '-', '+')\nend\n"
  },
  {
    "path": "tests/dragonfly/seeder/script-utillib.lua",
    "content": "-- collect all keys into table specific type on specific prefix. Uses SCAN--\nlocal function LU_collect_keys(prefix, type)\n    -- SCAN wants this weird type name for json\n    if string.lower(type) == 'json' then\n        type = 'ReJSON-RL'\n    end\n\n    local pattern = prefix .. \"*\"\n    local cursor = \"0\"\n    local keys = {}\n    repeat\n        local result = redis.call(\"SCAN\", cursor, \"COUNT\", 500, \"TYPE\", type, \"MATCH\", pattern)\n        cursor = result[1]\n        local scan_keys = result[2]\n        for i, key in ipairs(scan_keys) do\n            table.insert(keys, key)\n        end\n    until cursor == \"0\"\n    return keys\nend\n"
  },
  {
    "path": "tests/dragonfly/seeder_test.py",
    "content": "import asyncio\nimport async_timeout\nimport string\nfrom redis import asyncio as aioredis\nfrom . import dfly_args\nfrom .seeder import Seeder, DebugPopulateSeeder\nfrom .instance import DflyInstanceFactory, DflyInstance\nfrom .utility import *\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_static_seeder(async_client: aioredis.Redis):\n    s = DebugPopulateSeeder(key_target=10_000, data_size=100)\n    await s.run(async_client)\n\n    assert abs(await async_client.dbsize() - 10_000) <= 70\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_static_collection_size(async_client: aioredis.Redis):\n    async def check_list():\n        keys = await async_client.keys()\n        for key in keys:\n            assert await async_client.llen(key) == 1\n            assert len(await async_client.lpop(key)) == 10_000\n\n    s = DebugPopulateSeeder(\n        key_target=10, data_size=10_000, variance=1, samples=1, collection_size=1, types=[\"LIST\"]\n    )\n    await s.run(async_client)\n    await check_list()\n\n    await async_client.flushall()\n\n    s = Seeder(\n        units=1,\n        key_target=10,\n        data_size=10_000,\n        collection_size=1,\n        types=[\"LIST\"],\n        huge_value_target=0,\n        huge_value_size=0,\n    )\n    await s.run(async_client)\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_seeder_key_target(async_client: aioredis.Redis):\n    \"\"\"Ensure seeder reaches its key targets\"\"\"\n    s = Seeder(units=len(Seeder.DEFAULT_TYPES) * 2, key_target=5000)\n\n    # Ensure tests are not reasonably slow\n    async with async_timeout.timeout(20):\n        # Fill with 5k keys, 1% derivation = 50\n        await s.run(async_client, target_deviation=0.01)\n        assert abs(await async_client.dbsize() - 5000) <= 50\n\n        # Run 1k ops, ensure key balance stays the \"more or less\" the same\n        await s.run(async_client, target_ops=1000)\n        assert abs(await async_client.dbsize() - 5000) <= 100\n\n        # Run one second until stopped\n        task = asyncio.create_task(s.run(async_client))\n        await asyncio.sleep(1.0)\n        await s.stop(async_client)\n        await task\n\n        # Change key target, 100 is actual minimum because \"math breaks\"\n        s.change_key_target(0)\n        await s.run(async_client, target_deviation=0.5)  # don't set low precision with low values\n        assert await async_client.dbsize() < 200\n\n        # Get cmdstat calls\n        info = await async_client.info(\"ALL\")\n        calls = {\n            k.split(\"_\")[1]: v[\"calls\"]\n            for k, v in info.items()\n            if k.startswith(\"cmdstat_\") and v[\"calls\"] > 50\n        }\n        assert len(calls) > 15  # we use at least 15 different commands\n\n\n@dfly_args({\"proactor_threads\": 4})\nasync def test_seeder_capture(async_client: aioredis.Redis):\n    \"\"\"Ensure same data produces same state hashes\"\"\"\n\n    async def set_data():\n        p = async_client.pipeline()\n        p.mset(mapping={f\"string{i}\": f\"{i}\" for i in range(100)})\n        p.lpush(\"list1\", *list(string.ascii_letters))\n        p.sadd(\"set1\", *list(string.ascii_letters))\n        p.hset(\"hash1\", mapping={f\"{i}\": l for i, l in enumerate(string.ascii_letters)})\n        p.zadd(\"zset1\", mapping={l: i for i, l in enumerate(string.ascii_letters)})\n        await p.execute()\n\n    # Capture with filled data\n    await set_data()\n    capture = await Seeder.capture(async_client)\n\n    # Check hashes are 0 without data\n    await async_client.flushall()\n    assert all(h == 0 for h in (await Seeder.capture(async_client)))\n\n    # Check setting the same data results in same hashes\n    await set_data()\n    assert capture == await Seeder.capture(async_client)\n\n    # Check changing the data gives different hahses\n    await async_client.lpush(\"list1\", \"NEW\")\n    assert capture != await Seeder.capture(async_client)\n\n    # Undo our change\n    await async_client.lpop(\"list1\")\n    assert capture == await Seeder.capture(async_client)\n\n    # Do another change\n    await async_client.spop(\"set1\")\n    assert capture != await Seeder.capture(async_client)\n\n\n@pytest.mark.asyncio\n@dfly_args({\"proactor_threads\": 2})\nasync def test_seeder_fake_redis(\n    df_factory: DflyInstanceFactory, df_seeder_factory: DflySeederFactory\n):\n    instance = df_factory.create()\n    df_factory.start_all([instance])\n\n    seeder = df_seeder_factory.create(\n        keys=100, port=instance.port, unsupported_types=[ValueType.JSON], mirror_to_fake_redis=True\n    )\n\n    await seeder.run(target_ops=5_000)\n\n    capture = await seeder.capture_fake_redis()\n\n    assert await seeder.compare(capture, instance.port)\n\n\n@pytest.mark.asyncio\n@dfly_args({\"proactor_threads\": 2})\nasync def test_seeder_huge_value(\n    df_factory: DflyInstanceFactory, df_seeder_factory: DflySeederFactory\n):\n    instance = df_factory.create()\n    df_factory.start_all([instance])\n\n    expected_huge_value_count = 10\n    seeder = df_seeder_factory.create(\n        keys=100,\n        port=instance.port,\n        huge_value_count=expected_huge_value_count,\n        huge_value_size=240_000,\n    )\n\n    def custom_command_generation_probability():\n        return [\n            0.0,\n            0.0,\n            100.0,\n        ]  # We will only execute GROW commands\n\n    # Provide custom function for command generation probability\n    seeder.gen.size_change_probs = custom_command_generation_probability\n\n    await seeder.run(target_ops=100)\n\n    client = instance.client()\n\n    keys = await client.execute_command(\"KEYS *\")\n    huge_val_keys_count = 0\n\n    for key in keys:\n        key_size = await client.execute_command(f\"MEMORY USAGE {key}\")\n        # Count all keys that have memory - i.e. contain huge strings\n        if key_size != None and key_size > 100_000:\n            huge_val_keys_count += 1\n\n    assert huge_val_keys_count == expected_huge_value_count\n"
  },
  {
    "path": "tests/dragonfly/sentinel_test.py",
    "content": "import pathlib\nimport subprocess\nfrom typing import Awaitable\nfrom redis import asyncio as aioredis\nimport pytest\nimport time\nimport asyncio\nfrom datetime import datetime\nfrom sys import stderr\nimport logging\n\nfrom .utility import assert_eventually, wait_available_async\n\nfrom .instance import DflyInstanceFactory\nfrom . import dfly_args\n\n\n# Helper function to parse some sentinel cli commands output as key value dictionaries.\n# Output is expected be of even number of lines where each pair of consecutive lines results in a single key value pair.\n# If new_dict_key is not empty, encountering it in the output will start a new dictionary, this let us return multiple\n# dictionaries, for example in the 'slaves' command, one dictionary for each slave.\ndef stdout_as_list_of_dicts(cp: subprocess.CompletedProcess, new_dict_key=\"\"):\n    lines = cp.stdout.splitlines()\n    res = []\n    d = None\n    if new_dict_key == \"\":\n        d = dict()\n        res.append(d)\n    for i in range(0, len(lines), 2):\n        if (lines[i]) == new_dict_key:  # assumes output never has '' as a key\n            d = dict()\n            res.append(d)\n        d[lines[i]] = lines[i + 1]\n    return res\n\n\ndef wait_for(func, pred, timeout_sec, timeout_msg=\"\"):\n    while not pred(func()):\n        assert timeout_sec > 0, timeout_msg\n        timeout_sec = timeout_sec - 1\n        time.sleep(1)\n\n\nasync def await_for(func, pred, timeout_sec, timeout_msg=\"\"):\n    done = False\n    while not done:\n        val = func()\n        if isinstance(val, Awaitable):\n            val = await val\n        done = pred(val)\n        assert timeout_sec > 0, timeout_msg\n        timeout_sec = timeout_sec - 1\n        await asyncio.sleep(1)\n\n\n@assert_eventually\nasync def assert_master_became_replica(client):\n    repl_info = await client.info(\"replication\")\n    assert repl_info[\"role\"] == \"slave\"\n\n\nclass Sentinel:\n    def __init__(self, port, master_port, config_dir) -> None:\n        self.config_file = pathlib.Path(config_dir).joinpath(\"sentinel.conf\")\n        self.port = port\n        self.image = \"bitnami/redis-sentinel:latest\"\n        self.container_name = \"sentinel_test_py_sentinel\"\n        self.default_deployment = \"my_deployment\"\n        self.initial_master_port = master_port\n        self.proc = None\n\n    def start(self):\n        config = [\n            f\"port {self.port}\",\n            f\"sentinel monitor {self.default_deployment} 127.0.0.1 {self.initial_master_port} 1\",\n            f\"sentinel down-after-milliseconds {self.default_deployment} 3000\",\n            f\"slave-priority 100\",\n        ]\n        self.config_file.write_text(\"\\n\".join(config))\n\n        logging.info(self.config_file.read_text())\n\n        self.proc = subprocess.Popen(\n            [\"redis-server-6.2.11\", f\"{self.config_file.absolute()}\", \"--sentinel\"]\n        )\n\n    def stop(self):\n        self.proc.terminate()\n        self.proc.wait(timeout=10)\n\n    def run_cmd(\n        self, args, sentinel_cmd=True, capture_output=False, assert_ok=True\n    ) -> subprocess.CompletedProcess:\n        run_args = [\"redis-cli\", \"-p\", f\"{self.port}\"]\n        if sentinel_cmd:\n            run_args = run_args + [\"sentinel\"]\n        run_args = run_args + args\n        cp = subprocess.run(run_args, capture_output=capture_output, text=True)\n        if assert_ok:\n            assert cp.returncode == 0, f\"Command failed: {run_args}\"\n        return cp\n\n    def wait_ready(self):\n        wait_for(\n            lambda: self.run_cmd([\"ping\"], sentinel_cmd=False, assert_ok=False),\n            lambda cp: cp.returncode == 0,\n            timeout_sec=10,\n            timeout_msg=\"Timeout waiting for sentinel to become ready.\",\n        )\n\n    def master(self, deployment=\"\") -> dict:\n        if deployment == \"\":\n            deployment = self.default_deployment\n        cp = self.run_cmd([\"master\", deployment], capture_output=True)\n        return stdout_as_list_of_dicts(cp)[0]\n\n    def slaves(self, deployment=\"\") -> dict:\n        if deployment == \"\":\n            deployment = self.default_deployment\n        cp = self.run_cmd([\"slaves\", deployment], capture_output=True)\n        return stdout_as_list_of_dicts(cp)\n\n    def live_master_port(self, deployment=\"\"):\n        if deployment == \"\":\n            deployment = self.default_deployment\n        cp = self.run_cmd([\"get-master-addr-by-name\", deployment], capture_output=True)\n        return int(cp.stdout.splitlines()[1])\n\n    def failover(self, deployment=\"\"):\n        if deployment == \"\":\n            deployment = self.default_deployment\n        self.run_cmd(\n            [\n                \"failover\",\n                deployment,\n            ]\n        )\n\n\n@pytest.fixture(\n    scope=\"function\"\n)  # Sentinel has state which we don't want carried over form test to test.\ndef sentinel(tmp_dir, port_picker) -> Sentinel:\n    s = Sentinel(port_picker.get_available_port(), port_picker.get_available_port(), tmp_dir)\n    s.start()\n    s.wait_ready()\n    yield s\n    s.stop()\n\n\n@pytest.mark.asyncio\n@pytest.mark.large\nasync def test_failover(df_factory: DflyInstanceFactory, sentinel, port_picker):\n    master = df_factory.create(port=sentinel.initial_master_port)\n    replica = df_factory.create(port=port_picker.get_available_port())\n\n    master.start()\n    replica.start()\n\n    master_client = aioredis.Redis(port=master.port)\n    replica_client = aioredis.Redis(port=replica.port)\n    logging.info(\"master: \" + str(master.port) + \" replica: \" + str(replica.port))\n\n    await replica_client.execute_command(\"REPLICAOF localhost \" + str(master.port))\n\n    assert sentinel.live_master_port() == master.port\n\n    # Verify sentinel picked up replica.\n    await await_for(\n        lambda: sentinel.master(),\n        lambda m: m[\"num-slaves\"] == \"1\",\n        timeout_sec=15,\n        timeout_msg=\"Timeout waiting for sentinel to pick up replica.\",\n    )\n    sentinel.failover()\n\n    # Verify sentinel switched.\n    await await_for(\n        lambda: sentinel.live_master_port(),\n        lambda p: p == replica.port,\n        timeout_sec=10,\n        timeout_msg=\"Timeout waiting for sentinel to report replica as master.\",\n    )\n    assert sentinel.slaves()[0][\"port\"] == str(master.port)\n\n    # Verify we can now write to replica and read replicated value from master.\n    assert await replica_client.set(\"key\", \"value\"), \"Failed to set key on promoted replica.\"\n\n    logging.info(\"key was set on promoted replica, awaiting get on promoted replica. \")\n\n    await assert_master_became_replica(master_client)\n    await wait_available_async(master_client)\n\n    try:\n        await await_for(\n            lambda: master_client.get(\"key\"),\n            lambda val: val == b\"value\",\n            10,\n            \"Timeout waiting for key to exist in replica.\",\n        )\n    except AssertionError:\n        syncid, r_offset = await master_client.execute_command(\"DEBUG REPLICA OFFSET\")\n        replicaoffset_cmd = \"DFLY REPLICAOFFSET \" + syncid.decode()\n        m_offset = await replica_client.execute_command(replicaoffset_cmd)\n        logging.info(f\"{syncid.decode()} {r_offset} {m_offset}\")\n        logging.info(\"replica client role:\")\n        logging.info(await replica_client.execute_command(\"role\"))\n        logging.info(\"master client role:\")\n        logging.info(await master_client.execute_command(\"role\"))\n        logging.info(\"replica client info:\")\n        logging.info(await replica_client.info())\n        logging.info(\"master client info:\")\n        logging.info(await master_client.info())\n        replica_val = await replica_client.get(\"key\")\n        master_val = await master_client.get(\"key\")\n        logging.info(f\"replica val: {replica_val}\")\n        logging.info(f\"master val: {master_val}\")\n        raise\n\n\n@pytest.mark.asyncio\n@pytest.mark.large\nasync def test_master_failure(df_factory, sentinel, port_picker):\n    master = df_factory.create(port=sentinel.initial_master_port)\n    replica = df_factory.create(port=port_picker.get_available_port())\n\n    master.start()\n    replica.start()\n\n    replica_client = aioredis.Redis(port=replica.port)\n\n    await replica_client.execute_command(\"REPLICAOF localhost \" + str(master.port))\n\n    assert sentinel.live_master_port() == master.port\n\n    # Verify sentinel picked up replica.\n    await await_for(\n        lambda: sentinel.master(),\n        lambda m: m[\"num-slaves\"] == \"1\",\n        timeout_sec=15,\n        timeout_msg=\"Timeout waiting for sentinel to pick up replica.\",\n    )\n\n    # Simulate master failure.\n    master.stop()\n\n    # Verify replica promoted.\n    await await_for(\n        lambda: sentinel.live_master_port(),\n        lambda p: p == replica.port,\n        timeout_sec=300,\n        timeout_msg=\"Timeout waiting for sentinel to report replica as master.\",\n    )\n\n    # Verify we can now write to replica.\n    await replica_client.set(\"key\", \"value\")\n    assert await replica_client.get(\"key\") == b\"value\"\n\n\n@dfly_args({\"info_replication_valkey_compatible\": True})\n@pytest.mark.asyncio\nasync def test_priority_on_failover(df_factory, sentinel, port_picker):\n    master = df_factory.create(port=sentinel.initial_master_port)\n    # lower priority is the best candidate for sentinel\n    low_priority_repl = df_factory.create(\n        port=port_picker.get_available_port(), replica_priority=20\n    )\n    mid_priority_repl = df_factory.create(\n        port=port_picker.get_available_port(), replica_priority=60\n    )\n    high_priority_repl = df_factory.create(\n        port=port_picker.get_available_port(), replica_priority=80\n    )\n\n    master.start()\n    low_priority_repl.start()\n    mid_priority_repl.start()\n    high_priority_repl.start()\n\n    high_client = aioredis.Redis(port=high_priority_repl.port)\n    await high_client.execute_command(\"REPLICAOF localhost \" + str(master.port))\n\n    mid_client = aioredis.Redis(port=mid_priority_repl.port)\n    await mid_client.execute_command(\"REPLICAOF localhost \" + str(master.port))\n\n    low_client = aioredis.Redis(port=low_priority_repl.port)\n    await low_client.execute_command(\"REPLICAOF localhost \" + str(master.port))\n\n    assert sentinel.live_master_port() == master.port\n\n    # Verify sentinel picked up replica.\n    await await_for(\n        lambda: sentinel.master(),\n        lambda m: m[\"num-slaves\"] == \"3\",\n        timeout_sec=15,\n        timeout_msg=\"Timeout waiting for sentinel to pick up replica.\",\n    )\n\n    # Simulate master failure.\n    master.stop()\n\n    # Verify replica promoted.\n    await await_for(\n        lambda: sentinel.live_master_port(),\n        lambda p: p == low_priority_repl.port,\n        timeout_sec=30,\n        timeout_msg=\"Timeout waiting for sentinel to report replica as master.\",\n    )\n"
  },
  {
    "path": "tests/dragonfly/server_family_test.py",
    "content": "import platform\n\nimport aiohttp\nfrom prometheus_client.samples import Sample\nfrom pymemcache import Client\n\nfrom redis.exceptions import ResponseError\n\nfrom . import dfly_args\nfrom .instance import DflyInstance\nfrom .utility import *\n\n\n@pytest.fixture(scope=\"class\")\ndef connection(df_server: DflyInstance):\n    return redis.Connection(port=df_server.port)\n\n\nclass TestServer:\n    def test_quit(self, connection: redis.Connection):\n        connection.send_command(\"QUIT\")\n        assert connection.read_response() == b\"OK\"\n\n        with pytest.raises(redis.exceptions.ConnectionError) as e:\n            connection.read_response()\n\n    def test_quit_after_sub(self, connection):\n        connection.send_command(\"SUBSCRIBE\", \"foo\")\n        connection.read_response()\n\n        connection.send_command(\"QUIT\")\n        assert connection.read_response() == b\"OK\"\n\n        with pytest.raises(redis.exceptions.ConnectionError) as e:\n            connection.read_response()\n\n    async def test_multi_exec(self, async_client: aioredis.Redis):\n        pipeline = async_client.pipeline()\n        pipeline.set(\"foo\", \"bar\")\n        pipeline.get(\"foo\")\n        val = await pipeline.execute()\n        assert val == [True, \"bar\"]\n\n\n\"\"\"\nsee https://github.com/dragonflydb/dragonfly/issues/457\nFor now we would not allow for eval command inside multi\nAs this would create to level transactions (in effect recursive call\nto Schedule function).\nWhen this issue is fully fixed, this test would failed, and then it should\nchange to match the fact that we supporting this operation.\nFor now we are expecting to get an error\n\"\"\"\n\n\nasync def test_multi_eval(async_client: aioredis.Redis):\n    pipeline = async_client.pipeline()\n    pipeline.set(\"foo\", \"bar\")\n    pipeline.get(\"foo\")\n    pipeline.eval(\"return 43\", 0)\n    val = await pipeline.execute()\n    assert val == [True, \"bar\", 43]\n\n\nasync def test_connection_name(async_client: aioredis.Redis):\n    name = await async_client.execute_command(\"CLIENT GETNAME\")\n    assert name == \"default-async-fixture\"\n    await async_client.execute_command(\"CLIENT SETNAME test_conn_name\")\n    name = await async_client.execute_command(\"CLIENT GETNAME\")\n    assert name == \"test_conn_name\"\n\n\nasync def test_get_databases(async_client: aioredis.Redis):\n    \"\"\"\n    make sure that the config get databases command is working\n    to ensure compatibility with UI frameworks like AnotherRedisDesktopManager\n    \"\"\"\n    dbnum = await async_client.config_get(\"databases\")\n    assert dbnum == {\"databases\": \"16\"}\n\n\nasync def test_client_kill(df_factory):\n    with df_factory.create(port=1111, admin_port=1112) as instance:\n        instance: DflyInstance\n        from redis.backoff import NoBackoff\n        from redis.asyncio.retry import Retry\n\n        client = instance.client(retry=Retry(NoBackoff(), 0))\n        admin_client = instance.admin_client()\n        await admin_client.ping()\n\n        # This creates `client_conn` as a non-auto-reconnect client\n        async with client.client() as client_conn:\n            assert len(await client_conn.execute_command(\"CLIENT LIST\")) == 2\n            assert len(await admin_client.execute_command(\"CLIENT LIST\")) == 2\n\n            # Can't kill admin from regular connection\n            with pytest.raises(ResponseError) as e_info:\n                await client_conn.execute_command(\"CLIENT KILL LADDR 127.0.0.1:1112\")\n\n            assert len(await admin_client.execute_command(\"CLIENT LIST\")) == 2\n            await admin_client.execute_command(\"CLIENT KILL LADDR 127.0.0.1:1111\")\n            assert len(await admin_client.execute_command(\"CLIENT LIST\")) == 1\n            with pytest.raises(redis.exceptions.ConnectionError) as e_info:\n                await client_conn.ping()\n\n\nasync def test_scan(async_client: aioredis.Redis):\n    \"\"\"\n    make sure that the scan command is working with python\n    \"\"\"\n\n    def gen_test_data():\n        for i in range(10):\n            yield f\"key-{i}\", f\"value-{i}\"\n\n    for key, val in gen_test_data():\n        res = await async_client.set(key, val)\n        assert res is not None\n        cur, keys = await async_client.scan(cursor=0, match=key, count=2)\n        assert cur == 0\n        assert len(keys) == 1\n        assert keys[0] == key\n\n\ndef configure_slowlog_parsing(async_client: aioredis.Redis):\n    def parse_slowlog_get(response, **options):\n        logging.info(f\"slowlog response: {response}\")\n\n        def stringify(item):\n            if isinstance(item, bytes):\n                return item.decode()\n            if isinstance(item, list):\n                return [stringify(i) for i in item]\n            return item\n\n        def parse_item(item):\n            item = stringify(item)\n            result = {\"id\": item[0], \"start_time\": int(item[1]), \"duration\": int(item[2])}\n            result[\"command\"] = \" \".join(item[3])\n            result[\"client_address\"] = item[4]\n            result[\"client_name\"] = item[5]\n            return result\n\n        return [parse_item(item) for item in response]\n\n    async_client.set_response_callback(\"SLOWLOG GET\", parse_slowlog_get)\n    return async_client\n\n\n@pytest.mark.asyncio\n@dfly_args({\"slowlog_log_slower_than\": 0, \"slowlog_max_len\": 3})\nasync def test_slowlog_client_name_and_ip(df_factory, async_client: aioredis.Redis):\n    df = df_factory.create()\n    df.start()\n    expected_clientname = \"dragonfly\"\n\n    await async_client.client_setname(expected_clientname)\n    async_client = configure_slowlog_parsing(async_client)\n\n    client_list = await async_client.client_list()\n    addr = client_list[0][\"addr\"]\n\n    slowlog = await async_client.slowlog_get(1)\n    assert slowlog[0][\"client_name\"] == expected_clientname\n    assert slowlog[0][\"client_address\"] == addr\n\n\n@pytest.mark.asyncio\n@dfly_args({\"slowlog_log_slower_than\": 0, \"slowlog_max_len\": 3})\nasync def test_blocking_commands_should_not_show_up_in_slow_log(\n    df_factory, async_client: aioredis.Redis\n):\n    await async_client.slowlog_reset()\n    df = df_factory.create()\n    df.start()\n    async_client = configure_slowlog_parsing(async_client)\n\n    await async_client.blpop(\"mykey\", 0.5)\n    reply = await async_client.slowlog_get()\n\n    # blpop does not show up, only the previous reset\n    assert reply[0][\"command\"] == \"SLOWLOG RESET\"\n\n\n@dfly_args({\"memcached_port\": 11211, \"admin_port\": 1112})\nasync def test_metric_labels(\n    df_server: DflyInstance, async_client: aioredis.Redis, memcached_client: Client\n):\n    result = await async_client.set(\"foo\", \"bar\")\n    assert result, \"Failed to set key\"\n\n    result = await async_client.get(\"foo\")\n    assert result == \"bar\", \"Failed to read value\"\n\n    def match_label_value(s: Sample, name, func):\n        assert \"listener\" in s.labels\n        if s.labels[\"listener\"] == name:\n            assert func(s.value)\n\n    metrics = await df_server.metrics()\n    for sample in metrics[\"dragonfly_commands_processed\"].samples:\n        match_label_value(sample, \"main\", lambda v: v > 0)\n        match_label_value(sample, \"other\", lambda v: v == 0)\n    for sample in metrics[\"dragonfly_connected_clients\"].samples:\n        match_label_value(sample, \"main\", lambda v: v == 1)\n        match_label_value(sample, \"other\", lambda v: v == 0)\n\n    # Memcached client also counts as main\n    memcached_client.set(\"foo\", \"bar\")\n\n    metrics = await df_server.metrics()\n    for sample in metrics[\"dragonfly_commands_processed\"].samples:\n        match_label_value(sample, \"main\", lambda v: v > 0)\n        match_label_value(sample, \"other\", lambda v: v == 0)\n    for sample in metrics[\"dragonfly_connected_clients\"].samples:\n        match_label_value(sample, \"main\", lambda v: v == 2)\n        match_label_value(sample, \"other\", lambda v: v == 0)\n\n    # admin client counts as other\n    async with aioredis.Redis(port=1112) as admin:\n        await admin.ping()\n\n        metrics = await df_server.metrics()\n        for sample in metrics[\"dragonfly_commands_processed\"].samples:\n            match_label_value(sample, \"main\", lambda v: v > 0)\n            # memcached listener processes command as other\n            match_label_value(sample, \"other\", lambda v: v > 0)\n        for sample in metrics[\"dragonfly_connected_clients\"].samples:\n            match_label_value(sample, \"main\", lambda v: v == 2)\n            match_label_value(sample, \"other\", lambda v: v == 1)\n\n\nasync def test_latency_stats(async_client: aioredis.Redis):\n    for _ in range(100):\n        await async_client.set(\"foo\", \"bar\")\n        await async_client.get(\"foo\")\n        await async_client.get(\"bar\")\n        await async_client.hgetall(\"missing\")\n\n    latency_stats = await async_client.info(\"LATENCYSTATS\")\n    for expected in {\"hgetall\", \"set\", \"get\"}:\n        key = f\"latency_percentiles_usec_{expected}\"\n        assert key in latency_stats\n        assert latency_stats[key].keys() == {\"p50\", \"p99\", \"p99.9\"}\n\n    await async_client.config_resetstat()\n    latency_stats = await async_client.info(\"LATENCYSTATS\")\n    # Only stats for the `config resetstat` command should remain in stats\n    assert (\n        len(latency_stats) == 1 and \"latency_percentiles_usec_config\" in latency_stats\n    ), f\"unexpected latency stats after reset: {latency_stats}\"\n\n\n@dfly_args({\"latency_tracking\": False})\nasync def test_latency_stats_disabled(async_client: aioredis.Redis):\n    for _ in range(100):\n        await async_client.set(\"foo\", \"bar\")\n    assert await async_client.info(\"LATENCYSTATS\") == {}\n\n\n@pytest.mark.skipif(\n    platform.machine() != \"x86_64\" or platform.system() != \"Linux\",\n    reason=\"Validate metrics only on one platform to simplify download\",\n)\nasync def test_metrics_sanity_check(df_server: DflyInstance):\n    lint_errors = frozenset(\n        (\n            \"no help text\",\n            \"\"\"should have \"_total\" suffix\"\"\",\n            \"\"\"should not have \"_count\" suffix\"\"\",\n            \"metric names should not contain abbreviated units\",\n        )\n    )\n\n    async with aiohttp.ClientSession() as s:\n        metrics_url = f\"http://localhost:{df_server.port}/metrics\"\n        async with s.get(metrics_url, raise_for_status=True) as response:\n            metrics = await response.text()\n    result = subprocess.run(\n        [\"promtool\", \"check\", \"metrics\"],\n        input=metrics,\n        capture_output=True,\n        text=True,\n    )\n\n    actual_errors = []\n    if result.returncode != 0:\n        for e in result.stderr.splitlines():\n            if any(e.endswith(error) for error in lint_errors):\n                logging.debug(f\"ignored linting error: {e}\")\n            else:\n                actual_errors.append(e)\n\n    for error in actual_errors:\n        logging.error(f\"found error: {error}\")\n\n    assert actual_errors == []\n\n\n@pytest.mark.opt_only\n@dfly_args({\"proactor_threads\": \"2\"})\nasync def test_huffman_tables_built(df_server: DflyInstance):\n    async_client = df_server.client()\n    # Insert enough data to trigger background huffman table building\n    key_name = \"keyfooobarrsoooooooooooooooooooooooooooooooooooooooooooooooo\"\n    await async_client.execute_command(\"DEBUG\", \"POPULATE\", \"1000000\", key_name, \"14\")\n\n    @assert_eventually(times=500)\n    async def check_metrics():\n        metrics = await df_server.metrics()\n        m = metrics[\"dragonfly_huffman_tables_built\"]\n        assert m.samples[0].value > 0\n\n    await check_metrics()\n"
  },
  {
    "path": "tests/dragonfly/set_test.py",
    "content": "import pytest\nfrom redis import asyncio as aioredis\nfrom .instance import DflyInstance, DflyInstanceFactory\nimport logging\nimport asyncio\n\n\n@pytest.mark.asyncio\nasync def test_sscan_regression(df_factory: DflyInstanceFactory):\n    df = df_factory.create(\n        proactor_threads=2,\n    )\n    df.start()\n\n    client = df.client()\n\n    await client.execute_command(f\"SADD key el1 el2\")\n\n    element = \"a*\" * 3\n\n    cursor = await client.execute_command(f\"SSCAN key 0 match {element}.pt\")\n    length = len(cursor[1])\n    # Takes 3 seconds\n    res = await client.execute_command(\"SLOWLOG GET 100\")\n    assert res == []\n\n\n@pytest.mark.asyncio\nasync def test_spop_with_null_byte_members(df_factory: DflyInstanceFactory):\n    df = df_factory.create(proactor_threads=1)\n\n    df.start()\n\n    client = df.client()\n\n    num_members = 10\n\n    for i in range(num_members):\n        await client.sadd(\"set\", \"b'MEMBER\\x01\\x02\\x00_KEY{i}'\".format(i=i))\n\n    assert await client.scard(\"set\") == num_members\n\n    await client.spop(\"set\")\n\n    assert await client.scard(\"set\") == num_members - 1\n"
  },
  {
    "path": "tests/dragonfly/shutdown_test.py",
    "content": "import pytest\nimport asyncio\nimport redis\nfrom redis import asyncio as aioredis\nfrom pathlib import Path\n\nfrom . import dfly_args\nfrom .utility import wait_available_async\n\nBASIC_ARGS = {\"dir\": \"{DRAGONFLY_TMP}/\"}\n\n\n@pytest.mark.skip(\n    reason=\"Currently we can not guarantee that on shutdown if command is executed and value is written we response before breaking the connection\"\n)\n@dfly_args({\"proactor_threads\": \"4\"})\nclass TestDflyAutoLoadSnapshot:\n    \"\"\"\n    Test automatic loading of dump files on startup with timestamp.\n    When command is executed if a value is written we should send the response before shutdown\n    \"\"\"\n\n    @pytest.mark.asyncio\n    async def test_gracefull_shutdown(self, df_factory):\n        df_args = {\"dbfilename\": \"dump\", **BASIC_ARGS, \"port\": 1111}\n\n        df_server = df_factory.create(**df_args)\n        df_server.start()\n        client = aioredis.Redis(port=df_server.port)\n\n        async def counter(key):\n            value = 0\n            await client.execute_command(f\"SET {key} 0\")\n            while True:\n                try:\n                    value = await client.execute_command(f\"INCR {key}\")\n                except (redis.exceptions.ConnectionError, redis.exceptions.ResponseError) as e:\n                    break\n            return key, value\n\n        async def delayed_takeover():\n            await asyncio.sleep(1)\n            await client.execute_command(\"SHUTDOWN\")\n            await client.connection_pool.disconnect()\n\n        _, *results = await asyncio.gather(\n            delayed_takeover(), *[counter(f\"key{i}\") for i in range(16)]\n        )\n\n        df_server.start()\n        client = aioredis.Redis(port=df_server.port)\n\n        for key, acknowleged_value in results:\n            value_from_snapshot = await client.get(key)\n            assert acknowleged_value == int(value_from_snapshot)\n\n        await client.connection_pool.disconnect()\n\n\n@dfly_args({\"proactor_threads\": \"2\"})\nclass TestShutdownOptions:\n    @pytest.mark.asyncio\n    async def test_shutdown_abort_and_invalid_option(self, df_factory):\n        df_args = {\"dbfilename\": \"dump\", **BASIC_ARGS, \"port\": 1121}\n        df_server = df_factory.create(**df_args)\n        df_server.start()\n\n        client = aioredis.Redis(port=df_server.port)\n\n        # ABORT should be rejected and server should remain responsive\n        with pytest.raises(redis.exceptions.ResponseError):\n            await client.execute_command(\"SHUTDOWN ABORT\")\n\n        pong = await client.ping()\n        assert pong is True\n\n        # Invalid option -> syntax error\n        with pytest.raises(redis.exceptions.ResponseError):\n            await client.execute_command(\"SHUTDOWN FOO\")\n\n        await client.connection_pool.disconnect()\n        df_server.stop()\n\n    @pytest.mark.asyncio\n    async def test_shutdown_safe_persists_snapshot(self, df_factory, tmp_path):\n        # Ensure snapshot dir exists and is used\n        snap_dir = tmp_path\n        df_args = {\"dbfilename\": \"dump\", \"dir\": str(snap_dir) + \"/\", \"port\": 1122}\n\n        df_server = df_factory.create(**df_args)\n        df_server.start()\n\n        client = aioredis.Redis(port=df_server.port)\n        await client.set(\"safe_key\", \"safe_value\")\n\n        # SHUTDOWN SAFE should save synchronously and then stop\n        try:\n            await client.execute_command(\"SHUTDOWN SAFE\")\n        except Exception:\n            # Connection may be dropped as part of shutdown; this is acceptable\n            pass\n\n        await client.connection_pool.disconnect()\n\n        # Restart and verify data persisted\n        df_server.start()\n        client = aioredis.Redis(port=df_server.port)\n        await wait_available_async(client)\n        val = await client.get(\"safe_key\")\n        assert val == b\"safe_value\"\n        await client.connection_pool.disconnect()\n        df_server.stop()\n\n    @pytest.mark.asyncio\n    async def test_shutdown_save_persists_snapshot(self, df_factory, tmp_path):\n        # SAVE should follow the same synchronous path as SAFE\n        snap_dir = tmp_path\n        df_args = {\"dbfilename\": \"dump\", \"dir\": str(snap_dir) + \"/\", \"port\": 1123}\n\n        df_server = df_factory.create(**df_args)\n        df_server.start()\n\n        client = aioredis.Redis(port=df_server.port)\n        await client.set(\"save_key\", \"save_value\")\n\n        try:\n            await client.execute_command(\"SHUTDOWN SAVE\")\n        except Exception:\n            pass\n\n        await client.connection_pool.disconnect()\n\n        df_server.start()\n        client = aioredis.Redis(port=df_server.port)\n        await wait_available_async(client)\n        val = await client.get(\"save_key\")\n        assert val == b\"save_value\"\n        await client.connection_pool.disconnect()\n        df_server.stop()\n"
  },
  {
    "path": "tests/dragonfly/snapshot_test.py",
    "content": "import pytest\nimport logging\nimport os\nimport glob\nimport asyncio\nfrom async_timeout import timeout\nimport redis\nfrom redis import asyncio as aioredis\nfrom pathlib import Path\nimport boto3\nfrom .instance import DflyInstanceFactory, RedisServer\nfrom random import randint as rand\nimport string\nimport random\nfrom pymemcache.client.base import Client as MCClient\n\nfrom . import dfly_args\nfrom .utility import assert_eventually, wait_available_async, is_saving, tmp_file_name\n\nfrom .seeder import DebugPopulateSeeder\n\nBASIC_ARGS = {\"dir\": \"{DRAGONFLY_TMP}/\", \"proactor_threads\": 4}\nFILE_FORMATS = [\"RDB\", \"DF\"]\n\n# Should be used where text auxiliary mechanisms like filenames\nLIGHTWEIGHT_SEEDER_ARGS = dict(key_target=100, data_size=100, variance=1, samples=1)\n\n\ndef find_main_file(path: Path, pattern):\n    return next(iter(glob.glob(str(path) + \"/\" + pattern)), None)\n\n\nasync def get_metric_value(inst, metric_name, sample_index=0):\n    return (await inst.metrics())[metric_name].samples[sample_index].value\n\n\nasync def assert_metric_value(inst, metric_name, expected_value):\n    actual_value = await get_metric_value(inst, metric_name)\n    assert (\n        actual_value == expected_value\n    ), f\"Expected {metric_name} to be {expected_value}, got ${actual_value}\"\n\n\n@pytest.mark.opt_only\n@pytest.mark.parametrize(\"format\", FILE_FORMATS)\n@pytest.mark.parametrize(\n    \"seeder_opts\",\n    [\n        # Many small keys, high variance\n        dict(key_target=50_000, data_size=100, variance=10, samples=50),\n        # A few large keys, high variance\n        dict(key_target=1000, data_size=5_000, variance=10, samples=10),\n    ],\n)\n@dfly_args({**BASIC_ARGS})\nasync def test_consistency(df_factory, format: str, seeder_opts: dict):\n    \"\"\"\n    Test consistency over a large variety of data with different sizes\n    \"\"\"\n    dbfilename = f\"dump_{tmp_file_name()}\"\n    instance = df_factory.create(dbfilename=dbfilename)\n    instance.start()\n    async_client = instance.client()\n    await DebugPopulateSeeder(**seeder_opts).run(async_client)\n\n    start_capture = await DebugPopulateSeeder.capture(async_client)\n\n    # save + flush + load\n    await async_client.execute_command(\"SAVE\", format)\n    assert await async_client.flushall()\n    await async_client.execute_command(\n        \"DFLY\",\n        \"LOAD\",\n        f\"{dbfilename}.rdb\" if format == \"RDB\" else f\"{dbfilename}-summary.dfs\",\n    )\n\n    assert (await DebugPopulateSeeder.capture(async_client)) == start_capture\n\n\n@pytest.mark.parametrize(\"format\", FILE_FORMATS)\n@dfly_args({**BASIC_ARGS})\nasync def test_multidb(df_factory, format: str):\n    \"\"\"\n    Test serialization of multiple logical databases\n    \"\"\"\n    dbfilename = f\"dump_{tmp_file_name()}\"\n    instance = df_factory.create(dbfilename=dbfilename)\n    instance.start()\n    async_client = instance.client()\n    start_captures = []\n    for dbid in range(10):\n        db_client = instance.client(db=dbid)\n        await DebugPopulateSeeder(key_target=1000).run(db_client)\n        start_captures.append(await DebugPopulateSeeder.capture(db_client))\n\n    # save + flush + load\n    await async_client.execute_command(\"SAVE\", format)\n    assert await async_client.flushall()\n    await async_client.execute_command(\n        \"DFLY\",\n        \"LOAD\",\n        f\"{dbfilename}.rdb\" if format == \"RDB\" else f\"{dbfilename}-summary.dfs\",\n    )\n\n    for dbid in range(10):\n        db_client = instance.client(db=dbid)\n        assert (await DebugPopulateSeeder.capture(db_client)) == start_captures[dbid]\n\n\n@pytest.mark.parametrize(\n    \"save_type, dbfilename, pattern\",\n    [\n        (\"rdb\", \"test-autoload1-{{timestamp}}\", \"test-autoload1-*.rdb\"),\n        (\"df\", \"test-autoload2-{{timestamp}}\", \"test-autoload2-*-summary.dfs\"),\n        (\"rdb\", \"test-autoload3-{{timestamp}}.rdb\", \"test-autoload3-*.rdb\"),\n        (\"rdb\", \"test-autoload4\", \"test-autoload4.rdb\"),\n        (\"df\", \"test-autoload5\", \"test-autoload5-summary.dfs\"),\n        (\"rdb\", \"test-autoload6.rdb\", \"test-autoload6.rdb\"),\n    ],\n)\nasync def test_dbfilenames(\n    df_factory, tmp_dir: Path, save_type: str, dbfilename: str, pattern: str\n):\n    df_args = {**BASIC_ARGS, \"dbfilename\": dbfilename, \"port\": 1111}\n\n    if save_type == \"rdb\":\n        df_args[\"nodf_snapshot_format\"] = None\n\n    start_capture = None\n\n    with df_factory.create(**df_args) as df_server:\n        async with df_server.client() as client:\n            await wait_available_async(client)\n\n            # We use the seeder just to check we don't loose any files (and thus keys)\n            await DebugPopulateSeeder(**LIGHTWEIGHT_SEEDER_ARGS).run(client)\n            start_capture = await DebugPopulateSeeder.capture(client)\n\n            await client.execute_command(\"SAVE \" + save_type)\n\n    file = find_main_file(tmp_dir, pattern)\n    assert file is not None\n    assert os.path.basename(file).startswith(dbfilename.split(\"{{\")[0])\n\n    with df_factory.create(**df_args) as df_server:\n        async with df_server.client() as client:\n            await wait_available_async(client)\n            assert await DebugPopulateSeeder.capture(client) == start_capture\n\n\n@dfly_args(\n    {\n        **BASIC_ARGS,\n        \"dbfilename\": \"test-redis-load-rdb\",\n    }\n)\nasync def test_redis_load_snapshot(\n    async_client: aioredis.Redis, df_server, redis_local_server: RedisServer, tmp_dir: Path\n):\n    \"\"\"\n    Test redis server loading dragonfly snapshot rdb format\n    \"\"\"\n    await DebugPopulateSeeder(\n        **LIGHTWEIGHT_SEEDER_ARGS, types=[\"STRING\", \"LIST\", \"SET\", \"HASH\", \"ZSET\", \"STREAM\"]\n    ).run(async_client)\n\n    await async_client.lpush(\"list\", \"A\" * 10_000)\n\n    await async_client.execute_command(\"SAVE\", \"rdb\")\n    dbsize = await async_client.dbsize()\n\n    await async_client.connection_pool.disconnect()\n    df_server.stop()\n\n    redis_local_server.start(dir=tmp_dir, redis7=True, dbfilename=\"test-redis-load-rdb.rdb\")\n    await asyncio.sleep(1)\n    c_master = aioredis.Redis(port=redis_local_server.port)\n    await c_master.ping()\n\n    assert await c_master.dbsize() == dbsize\n\n\n@pytest.mark.large\n@dfly_args({**BASIC_ARGS, \"dbfilename\": \"test-cron\", \"snapshot_cron\": \"* * * * *\"})\nasync def test_cron_snapshot(tmp_dir: Path, async_client: aioredis.Redis):\n    await DebugPopulateSeeder(**LIGHTWEIGHT_SEEDER_ARGS).run(async_client)\n\n    file = None\n    async with timeout(65):\n        while file is None:\n            await asyncio.sleep(1)\n            file = find_main_file(tmp_dir, \"test-cron-summary.dfs\")\n\n    assert file is not None, os.listdir(tmp_dir)\n\n\n@pytest.mark.skip(\"Fails and also causes all TLS tests to fail\")\n@pytest.mark.large\n@dfly_args({**BASIC_ARGS, \"dbfilename\": \"test-failed-saving\", \"snapshot_cron\": \"* * * * *\"})\nasync def test_cron_snapshot_failed_saving(df_server, tmp_dir: Path, async_client: aioredis.Redis):\n    await DebugPopulateSeeder(**LIGHTWEIGHT_SEEDER_ARGS).run(async_client)\n\n    backups_total = await get_metric_value(df_server, \"dragonfly_backups\")\n    failed_backups_total = await get_metric_value(df_server, \"dragonfly_failed_backups\")\n\n    file = None\n    async with timeout(65):\n        while file is None:\n            await asyncio.sleep(1)\n            file = find_main_file(tmp_dir, \"test-failed-saving-summary.dfs\")\n\n    assert file is not None, os.listdir(tmp_dir)\n\n    await assert_metric_value(df_server, \"dragonfly_backups\", backups_total + 1)\n    await assert_metric_value(df_server, \"dragonfly_failed_backups\", failed_backups_total)\n\n    # Remove all files from directory\n    for dir_file in tmp_dir.iterdir():\n        os.unlink(dir_file)\n\n    # Make directory read-only\n    os.chmod(tmp_dir, 0o555)\n\n    # Wait for the next SAVE command\n    await asyncio.sleep(65)\n    file = find_main_file(tmp_dir, \"test-failed-saving-summary.dfs\")\n\n    # Make directory writable again\n    os.chmod(tmp_dir, 0o777)\n\n    assert file is None, os.listdir(tmp_dir)\n\n    await assert_metric_value(df_server, \"dragonfly_backups\", backups_total + 2)\n    await assert_metric_value(df_server, \"dragonfly_failed_backups\", failed_backups_total + 1)\n\n\n@pytest.mark.large\n@dfly_args({**BASIC_ARGS, \"dbfilename\": \"test-cron-set\"})\nasync def test_set_cron_snapshot(tmp_dir: Path, async_client: aioredis.Redis):\n    await DebugPopulateSeeder(**LIGHTWEIGHT_SEEDER_ARGS).run(async_client)\n\n    await async_client.config_set(\"snapshot_cron\", \"* * * * *\")\n\n    file = None\n    async with timeout(65):\n        while file is None:\n            await asyncio.sleep(1)\n            file = find_main_file(tmp_dir, \"test-cron-set-summary.dfs\")\n\n    assert file is not None\n\n\n@dfly_args(\n    {**BASIC_ARGS, \"dbfilename\": \"test-save-rename-command\", \"rename_command\": \"save=save-foo\"}\n)\nasync def test_shutdown_save_with_rename(df_server):\n    \"\"\"Checks that on shutdown we save snapshot\"\"\"\n    client = df_server.client()\n\n    await DebugPopulateSeeder(**LIGHTWEIGHT_SEEDER_ARGS).run(client)\n    start_capture = await DebugPopulateSeeder.capture(client)\n\n    await client.connection_pool.disconnect()\n    df_server.stop()\n    df_server.start()\n    client = df_server.client()\n\n    await wait_available_async(client)\n    assert await DebugPopulateSeeder.capture(client) == start_capture\n\n    await client.connection_pool.disconnect()\n\n\n@pytest.mark.opt_only\nasync def test_parallel_snapshot(async_client):\n    \"\"\"Dragonfly does not allow simultaneous save operations, send 2 save operations and make sure one is rejected\"\"\"\n\n    await async_client.execute_command(\"debug\", \"populate\", \"1000000\", \"askldjh\", \"1000\", \"RAND\")\n\n    async def save():\n        try:\n            await async_client.execute_command(\"save\", \"rdb\", \"dump\")\n            return True\n        except Exception as e:\n            return False\n\n    save_successes = sum(await asyncio.gather(*(save() for _ in range(2))), 0)\n    assert save_successes == 1, \"Only one SAVE must be successful\"\n\n\n@pytest.mark.opt_only\nasync def test_parallel_snapshot_race_condition(async_client):\n    await async_client.execute_command(\"debug\", \"populate\", \"300000\", \"racekey\", \"2000\", \"RAND\")\n\n    async def save_operation(operation_id):\n        try:\n            await async_client.execute_command(\"save\", \"rdb\", \"dump\")\n            return f\"success_{operation_id}\"\n        except Exception as e:\n            return f\"failed_{operation_id}_{type(e).__name__}\"\n\n    # Fire many concurrent operations to maximize collision probability\n    # The more concurrent operations, the higher chance of hitting the race window\n    num_concurrent = 3\n\n    # Multiple rounds to increase overall probability\n    for round_num in range(2):\n        tasks = [save_operation(f\"r{round_num}_op{i}\") for i in range(num_concurrent)]\n\n        # Execute all operations simultaneously to hit race condition\n        results = await asyncio.gather(*tasks, return_exceptions=True)\n\n        successes = [r for r in results if isinstance(r, str) and r.startswith(\"success_\")]\n        failures = [r for r in results if isinstance(r, str) and r.startswith(\"failed_\")]\n        exceptions = [r for r in results if not isinstance(r, str)]\n\n        # Exactly one should succeed, rest should fail gracefully\n        assert (\n            len(successes) == 1\n        ), f\"Round {round_num}: Expected exactly 1 success, got {len(successes)} successes, {len(failures)} failures, {len(exceptions)} exceptions. Results: {results}\"\n\n        # Short delay between rounds\n        await asyncio.sleep(0.05)\n\n\nasync def test_path_escapes(df_factory):\n    \"\"\"Test that we don't allow path escapes. We just check that df_server.start()\n    fails because we don't have a much better way to test that.\"\"\"\n\n    df_server = df_factory.create(dbfilename=\"../../../../etc/passwd\")\n    with pytest.raises(Exception):\n        df_server.start()\n\n\n@dfly_args({**BASIC_ARGS, \"dbfilename\": \"test-info-persistence\"})\nasync def test_info_persistence_field(async_client):\n    \"\"\"Test is_loading field on INFO PERSISTENCE during snapshot loading\"\"\"\n\n    await DebugPopulateSeeder(**LIGHTWEIGHT_SEEDER_ARGS).run(async_client)\n\n    # Wait for snapshot to finish loading and try INFO PERSISTENCE\n    await wait_available_async(async_client)\n    assert \"loading:0\" in (await async_client.execute_command(\"INFO PERSISTENCE\"))\n\n\ndef delete_s3_objects(bucket, prefix):\n    client = boto3.client(\"s3\")\n    resp = client.list_objects_v2(\n        Bucket=bucket,\n        Prefix=prefix,\n    )\n    keys = []\n    for obj in resp[\"Contents\"]:\n        keys.append({\"Key\": obj[\"Key\"]})\n    client.delete_objects(\n        Bucket=bucket,\n        Delete={\"Objects\": keys},\n    )\n\n\n# If DRAGONFLY_S3_BUCKET is configured, AWS credentials must also be\n# configured.\n@pytest.mark.skipif(\n    \"DRAGONFLY_S3_BUCKET\" not in os.environ or os.environ[\"DRAGONFLY_S3_BUCKET\"] == \"\",\n    reason=\"AWS S3 snapshots bucket is not configured\",\n)\nasync def test_exit_on_s3_snapshot_load_err(df_factory):\n    invalid_s3_dir = \"s3://{DRAGONFLY_S3_BUCKET}\" + \"_invalid_bucket_\"\n    df_server = df_factory.create(dir=invalid_s3_dir, dbfilename=\"db\")\n    with pytest.raises(Exception):\n        df_server.start()\n        df_server.stop()\n\n\n# If DRAGONFLY_S3_BUCKET is configured, AWS credentials must also be\n# configured.\n@pytest.mark.skipif(\n    \"DRAGONFLY_S3_BUCKET\" not in os.environ or os.environ[\"DRAGONFLY_S3_BUCKET\"] == \"\",\n    reason=\"AWS S3 snapshots bucket is not configured\",\n)\n@dfly_args({**BASIC_ARGS, \"dir\": \"s3://{DRAGONFLY_S3_BUCKET}{DRAGONFLY_TMP}\", \"dbfilename\": \"\"})\nasync def test_s3_snapshot(async_client, tmp_dir):\n    seeder = DebugPopulateSeeder(key_target=10_000)\n    await seeder.run(async_client)\n\n    start_capture = await DebugPopulateSeeder.capture(async_client)\n\n    try:\n        # save + flush + load\n        await async_client.execute_command(\"SAVE DF snapshot\")\n        assert await async_client.flushall()\n        await async_client.execute_command(\n            \"DFLY LOAD \"\n            + os.environ[\"DRAGONFLY_S3_BUCKET\"]\n            + str(tmp_dir)\n            + \"/snapshot-summary.dfs\"\n        )\n\n        assert await DebugPopulateSeeder.capture(async_client) == start_capture\n\n    finally:\n        delete_s3_objects(\n            os.environ[\"DRAGONFLY_S3_BUCKET\"],\n            str(tmp_dir)[1:],\n        )\n\n\n# If DRAGONFLY_S3_BUCKET is configured, AWS credentials must also be\n# configured.\n@pytest.mark.skipif(\n    \"DRAGONFLY_S3_BUCKET\" not in os.environ or os.environ[\"DRAGONFLY_S3_BUCKET\"] == \"\",\n    reason=\"AWS S3 snapshots bucket is not configured\",\n)\n@dfly_args(\n    {\n        **BASIC_ARGS,\n        \"dir\": \"s3://{DRAGONFLY_S3_BUCKET}{DRAGONFLY_TMP}\",\n        \"dbfilename\": \"snapshot-{{Y}}{{m}}{{d}}-{{timestamp}}\",\n    }\n)\nasync def test_s3_reload_snapshot_after_restart(df_factory, tmp_dir):\n    # this test checks that after saving to s3, stopping the server and starting a new one\n    # we can load the snapshot from s3 correctly.\n    try:\n        instance = df_factory.create()\n        instance.start()\n        async_client = instance.client()\n        seeder = DebugPopulateSeeder(key_target=10_000)\n        await seeder.run(async_client)\n        start_capture = await DebugPopulateSeeder.capture(async_client)\n        # instance stop generates snapshot on exit\n        instance.stop()\n\n        new_instance = df_factory.create()\n        new_instance.start()\n        new_async_client = new_instance.client()\n\n        await wait_available_async(new_async_client)\n\n        assert await DebugPopulateSeeder.capture(new_async_client) == start_capture\n\n    finally:\n        delete_s3_objects(\n            os.environ[\"DRAGONFLY_S3_BUCKET\"],\n            str(tmp_dir)[1:],\n        )\n\n\n# If DRAGONFLY_S3_BUCKET is configured, AWS credentials must also be\n# configured.\n@pytest.mark.skipif(\n    \"DRAGONFLY_S3_BUCKET\" not in os.environ or os.environ[\"DRAGONFLY_S3_BUCKET\"] == \"\",\n    reason=\"AWS S3 snapshots bucket is not configured\",\n)\n@dfly_args({**BASIC_ARGS})\nasync def test_s3_save_local_dir(async_client, tmp_dir):\n    seeder = DebugPopulateSeeder(key_target=10_000)\n    await seeder.run(async_client)\n\n    try:\n        # SAVE to S3 bucket with `s3_dump` as filename prefix\n        await async_client.execute_command(\n            \"SAVE\", \"DF\", \"s3://\" + os.environ[\"DRAGONFLY_S3_BUCKET\"] + str(tmp_dir), \"s3_dump\"\n        )\n\n    finally:\n        delete_s3_objects(\n            os.environ[\"DRAGONFLY_S3_BUCKET\"],\n            str(tmp_dir)[1:] + \"/s3_dump\",\n        )\n\n\n@dfly_args({**BASIC_ARGS, \"dbfilename\": \"test-shutdown\"})\nclass TestDflySnapshotOnShutdown:\n    SEEDER_ARGS = dict(key_target=10_000)\n\n    \"\"\"Test multi file snapshot\"\"\"\n\n    async def _get_info_memory_fields(self, client):\n        res = await client.execute_command(\"INFO MEMORY\")\n        fields = {}\n        for line in res.splitlines():\n            if line.startswith(\"#\"):\n                continue\n            k, v = line.split(\":\")\n            if k == \"object_used_memory\" or k.startswith(\"type_used_memory_\"):\n                fields.update({k: int(v)})\n        return fields\n\n    async def _delete_all_keys(self, client: aioredis.Redis):\n        while True:\n            keys = await client.keys()\n            if len(keys) == 0:\n                break\n            await client.delete(*keys)\n\n    async def test_memory_counters(self, async_client: aioredis.Redis):\n        memory_counters = await self._get_info_memory_fields(async_client)\n        assert memory_counters == {\"object_used_memory\": 0}\n\n        seeder = DebugPopulateSeeder(**self.SEEDER_ARGS)\n        await seeder.run(async_client)\n\n        memory_counters = await self._get_info_memory_fields(async_client)\n        assert all(value > 0 for value in memory_counters.values())\n\n        await self._delete_all_keys(async_client)\n        memory_counters = await self._get_info_memory_fields(async_client)\n        assert memory_counters == {\"object_used_memory\": 0}\n\n    async def test_snapshot(self, df_server, async_client):\n        \"\"\"Checks that:\n        1. After reloading the snapshot file the data is the same\n        2. Memory counters after loading should be non zero\n        3. Memory counters after deleting all keys loaded by snapshot - this validates the memory\n           counting when loading from snapshot.\"\"\"\n\n        seeder = DebugPopulateSeeder(**self.SEEDER_ARGS)\n        await seeder.run(async_client)\n        start_capture = await DebugPopulateSeeder.capture(async_client)\n\n        memory_before = await self._get_info_memory_fields(async_client)\n\n        await async_client.connection_pool.disconnect()\n        df_server.stop()\n        df_server.start()\n\n        async_client = df_server.client()\n        await wait_available_async(async_client)\n\n        assert await DebugPopulateSeeder.capture(async_client) == start_capture\n\n        memory_after = await self._get_info_memory_fields(async_client)\n        for counter, value in memory_before.items():\n            # Counters should be non zero.\n            assert memory_after[counter] > 0\n\n        await self._delete_all_keys(async_client)\n        memory_empty = await self._get_info_memory_fields(async_client)\n        assert memory_empty == {\"object_used_memory\": 0}\n\n\n@pytest.mark.parametrize(\"format\", FILE_FORMATS)\n@dfly_args({**BASIC_ARGS, \"dbfilename\": \"info-while-snapshot\"})\nasync def test_infomemory_while_snapshotting(df_factory, format: str):\n    instance = df_factory.create(dbfilename=f\"dump_{tmp_file_name()}\")\n    instance.start()\n    async_client = instance.client()\n    await async_client.execute_command(\"DEBUG POPULATE 10000 key 4048 RAND\")\n\n    async def save():\n        await async_client.execute_command(\"SAVE\", format)\n\n    save_finished = False\n\n    async def info_in_loop():\n        while not save_finished:\n            await async_client.execute_command(\"INFO MEMORY\")\n            await asyncio.sleep(0.1)\n\n    save_task = asyncio.create_task(save())\n    info_task = asyncio.create_task(info_in_loop())\n\n    await save_task\n    save_finished = True\n    await info_task\n\n\n@dfly_args({**BASIC_ARGS, \"dbfilename\": \"test-bgsave\"})\nasync def test_bgsave_and_save(async_client: aioredis.Redis):\n    await async_client.execute_command(\"DEBUG POPULATE 200000\")\n\n    await async_client.execute_command(\"BGSAVE\")\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"BGSAVE\")\n\n    while await is_saving(async_client):\n        await asyncio.sleep(0.1)\n    await async_client.execute_command(\"BGSAVE\")\n    with pytest.raises(redis.exceptions.ResponseError):\n        await async_client.execute_command(\"SAVE\")\n\n    while await is_saving(async_client):\n        await asyncio.sleep(0.1)\n    await async_client.execute_command(\"SAVE\")\n\n\n@pytest.mark.exclude_epoll\n@dfly_args(\n    {\n        **BASIC_ARGS,\n        \"dbfilename\": \"tiered-entries\",\n        \"tiered_prefix\": \"/tmp/tiered/backing\",\n        \"tiered_offload_threshold\": \"1.0\",  # ask offloading loop to offload as much as possible\n    }\n)\nasync def test_tiered_entries(async_client: aioredis.Redis):\n    \"\"\"This test makes sure tieried entries are correctly persisted\"\"\"\n\n    # With variance 4: 512 - 8192 we include small and large values\n    await DebugPopulateSeeder(key_target=5000, data_size=1024, variance=4, types=[\"STRING\"]).run(\n        async_client\n    )\n\n    # Compute the capture, this brings all items back to memory... so we'll wait for offloading\n    start_capture = await DebugPopulateSeeder.capture(async_client)\n\n    # Wait until the total_stashes counter stops increasing, meaning offloading finished\n    last_writes, current_writes = 0, -1\n    while last_writes != current_writes:\n        await asyncio.sleep(0.1)\n        last_writes = current_writes\n        current_writes = (await async_client.info(\"TIERED\"))[\"tiered_total_stashes\"]\n\n    # Save + flush + load\n    await async_client.execute_command(\"SAVE\", \"DF\")\n    assert await async_client.flushall()\n    await async_client.execute_command(\n        \"DFLY\",\n        \"LOAD\",\n        \"tiered-entries-summary.dfs\",\n    )\n\n    # Compare captures\n    assert await DebugPopulateSeeder.capture(async_client) == start_capture\n\n\n@pytest.mark.skip\n@pytest.mark.large\n@pytest.mark.opt_only\n@dfly_args(\n    {\n        **BASIC_ARGS,\n        \"maxmemory\": \"2G\",\n        \"dbfilename\": \"tiered-entries\",\n        \"tiered_prefix\": \"/tmp/tiered/backing\",\n        \"tiered_offload_threshold\": \"0.5\",  # ask to keep below 0.5 * 2G\n        \"tiered_storage_write_depth\": 1000,\n        \"tiered_experimental_cooling\": \"false\",\n    }\n)\nasync def test_tiered_entries_throttle(async_client: aioredis.Redis):\n    \"\"\"\n    This test ensures that tiered entries are correctly persisted and loaded back\n    when memory is limited and tiered storage throttling is enabled.\n    \"\"\"\n\n    # Populate the database with a large number of string keys to exceed the in-memory threshold\n    # and trigger tiered storage offloading/throttling. Each key is 4KB, total ~3GB.\n    await DebugPopulateSeeder(\n        key_target=750_000, data_size=4096, samples=20, variance=1, types=[\"STRING\"]\n    ).run(async_client)\n\n    # Capture the initial state of the database for later comparison\n    logging.info(\"Seeder completed, starting capture\")\n    start_capture = await DebugPopulateSeeder.capture(async_client)\n\n    # Check memory usage after population. The peak memory should remain below the set limit (2.3GB).\n    # This validates that tiered storage throttling is working as expected.\n    # TODO: investigate why it sometimes exceeds the expected limit.\n    info = await async_client.info(\"ALL\")\n    assert info[\"used_memory_peak\"] < 2300e6\n\n    logging.info(\"Memory usage check completed, starting save and load\")\n    await async_client.execute_command(\"SAVE\", \"DF\")\n    assert await async_client.flushall()\n    await async_client.execute_command(\n        \"DFLY\",\n        \"LOAD\",\n        \"tiered-entries-summary.dfs\",\n    )\n\n    logging.info(\"Save and load completed, starting consistency checks after reload\")\n    # After reload, check that memory usage is still within the expected bounds.\n    # This ensures that loading from tiered storage does not violate memory constraints.\n    # TODO: investigate high error margin.\n    info = await async_client.info(\"ALL\")\n    assert info[\"used_memory_peak\"] < 2300e6\n\n    assert await DebugPopulateSeeder.capture(async_client) == start_capture\n\n\n@pytest.mark.large\nasync def test_rdb_load_with_tiering_6823(df_factory: DflyInstanceFactory):\n    \"\"\"\n    Regression test for RDB load with tiering. Verifies that loading a snapshot\n    into a tiered instance produces correct memory accounting (no underflow)\n    and preserves data integrity. Covers #6823.\n    \"\"\"\n    dbfilename = f\"dump_{tmp_file_name()}\"\n\n    # 1. Create a non-tiered instance, populate with DEBUG POPULATE and save a DF snapshot.\n    plain = df_factory.create(\n        proactor_threads=4,\n        dbfilename=dbfilename,\n    )\n    plain.start()\n    plain_client = plain.client()\n\n    await plain_client.execute_command(\"DEBUG POPULATE 50000 key 8192 RAND\")\n    num_keys = await plain_client.dbsize()\n\n    await plain_client.execute_command(\"SAVE\", \"DF\")\n    plain.stop()\n\n    # 2. Start a tiered instance and load the snapshot. Before the fix this would crash\n    #    with \"Check failed: obj_memory_usage + size >= 0\" in AccountObjectMemory.\n    tiered = df_factory.create(\n        proactor_threads=1,\n        dbfilename=\"\",\n        maxmemory=\"256MB\",\n        tiered_prefix=\"/tmp/tiered/rdb_load_test\",\n        tiered_offload_threshold=\"0.9\",\n        tiered_experimental_cooling=\"false\",\n        tiered_storage_write_depth=10,\n    )\n    tiered.start()\n    tiered_client = tiered.client()\n\n    assert await tiered_client.execute_command(\"DFLY\", \"LOAD\", f\"{dbfilename}-summary.dfs\") == \"OK\"\n\n    # Wait for tiering to stash entries\n    @assert_eventually(timeout=30)\n    async def assert_tiered_reached():\n        info = await tiered_client.info(\"TIERED\")\n        assert info[\"tiered_entries\"] > 40_000\n\n    await assert_tiered_reached()\n\n    info = await tiered_client.info(\"memory\")\n    used_mem = info[\"used_memory\"]\n    obj_mem = info[\"object_used_memory\"]\n    assert used_mem > 20_000_000 and used_mem < 300_000_000\n    assert obj_mem > 20_000_000 and obj_mem < 300_000_000\n\n    assert info[\"num_entries\"] == num_keys\n\n\n@dfly_args({\"serialization_max_chunk_size\": 4096, \"proactor_threads\": 1})\n@pytest.mark.parametrize(\n    \"cont_type\",\n    [(\"HASH\"), (\"SET\"), (\"ZSET\"), (\"LIST\"), (\"STREAM\")],\n)\n@pytest.mark.large\nasync def test_big_value_serialization_memory_limit(df_factory, cont_type):\n    dbfilename = f\"dump_{tmp_file_name()}\"\n    instance = df_factory.create(dbfilename=dbfilename)\n    instance.start()\n    client = instance.client()\n\n    one_gb = 1_000_000_000\n    elements = 1000\n    element_size = 1_000_000  # 1mb\n\n    await client.execute_command(\n        f\"debug populate 1 prefix {element_size} TYPE {cont_type} RAND ELEMENTS {elements}\"\n    )\n    await asyncio.sleep(1)\n\n    info = await client.info(\"ALL\")\n    assert info[\"used_memory_peak_rss\"] < (one_gb * 1.2)\n    # if we execute SAVE below without big value serialization we trigger the assertion below.\n    # note the peak would reach (one_gb * 3) without it.\n    await client.execute_command(\"SAVE\")\n    info = await client.info(\"ALL\")\n\n    assert info[\"used_memory_peak_rss\"] < (one_gb * 1.3)\n\n    await client.execute_command(\"FLUSHALL\")\n    await client.aclose()\n\n\n@dfly_args(\n    {\n        \"dir\": \"{DRAGONFLY_TMP}/\",\n        \"memcached_port\": 11211,\n        \"proactor_threads\": 4,\n        \"dbfilename\": \"test-MC-flags\",\n    }\n)\nasync def test_mc_flags_saving(memcached_client: MCClient, async_client: aioredis.Redis):\n    async def check_flag(key, flag):\n        res = memcached_client.raw_command(\"get \" + key, \"END\\r\\n\").split()\n        # workaround sometimes memcached_client.raw_command returns empty str\n        if len(res) > 2:\n            assert res[2].decode() == str(flag)\n\n    assert memcached_client.set(\"key1\", \"value1\", noreply=True)\n    assert memcached_client.set(\"key2\", \"value1\", noreply=True, expire=3600, flags=123456)\n    assert memcached_client.replace(\"key1\", \"value2\", expire=4000, flags=2, noreply=True)\n\n    await check_flag(\"key1\", 2)\n    await check_flag(\"key2\", 123456)\n\n    await async_client.execute_command(\"SAVE\", \"DF\")\n    assert await async_client.flushall()\n\n    await async_client.execute_command(\n        \"DFLY\",\n        \"LOAD\",\n        \"test-MC-flags-summary.dfs\",\n    )\n\n    await check_flag(\"key1\", 2)\n    await check_flag(\"key2\", 123456)\n"
  },
  {
    "path": "tests/dragonfly/test_dash_gc.py",
    "content": "import asyncio\nfrom redis import asyncio as aioredis\nfrom . import dfly_args\nfrom .seeder import Seeder\nimport logging\n\n\n@dfly_args({\"proactor_threads\": 2, \"maxmemory\": \"1G\"})\nasync def test_gc_merges_segments_and_shrinks_capacity(async_client: aioredis.Redis):\n    value_size = 50\n    target_keys = 10_000\n    value = \"x\" * value_size\n\n    batch_size = 100\n    for batch_start in range(0, target_keys, batch_size):\n        batch_end = min(batch_start + batch_size, target_keys)\n        pipeline = async_client.pipeline()\n        for i in range(batch_start, batch_end):\n            pipeline.set(f\"key{i}\", value)\n        await pipeline.execute()\n\n    await asyncio.sleep(0.5)\n\n    stats_before = await async_client.info(\"MEMORY\")\n\n    # Delete 90% of keys to create very sparse segments\n    keys_to_delete = [f\"key{i}\" for i in range(target_keys) if i % 10 != 0]\n    keys_left = [f\"key{i}\" for i in range(target_keys) if i % 10 == 0]\n\n    for batch_start in range(0, len(keys_to_delete), 1000):\n        await async_client.delete(*keys_to_delete[batch_start : batch_start + 1000])\n\n    # Run GC with aggressive threshold to trigger merges\n    segments_merged = await async_client.execute_command(\"DEBUG\", \"COMPACT-TABLE\", \"0.5\")\n\n    stats_after = await async_client.info(\"MEMORY\")\n    assert segments_merged > 0\n    # Fewer segments means fewer buckets, so the table's total capacity must shrink\n    assert stats_after[\"prime_capacity\"] < stats_before[\"prime_capacity\"], (\n        f\"Table capacity should shrink after GC: before={stats_before['prime_capacity']}, \"\n        f\"after={stats_after['prime_capacity']}\"\n    )\n\n    logging.info(\n        f\"COMPACT-TABLE merged {segments_merged} segments, \"\n        f\"capacity {stats_before['prime_capacity']} -> {stats_after['prime_capacity']}\"\n    )\n\n    for key in keys_left:\n        res = await async_client.get(key)\n        assert res == value\n\n\n@dfly_args({\"proactor_threads\": 1, \"maxmemory\": \"2G\"})\nasync def test_gc_concurrent_with_seeding(async_client: aioredis.Redis):\n    \"\"\"\n    Verify COMPACT-TABLE running concurrently with data insertion doesn't corrupt seeded data.\n\n    a) Grow the dash table via DEBUG POPULATE with a prefix\n    b) Delete all populated keys to create sparse segments\n    c) Run DEBUG COMPACT-TABLE concurrently with Seeder\n    d) Assert all data seeded by Seeder exists in the dash table\n    \"\"\"\n    # a) Grow the dash table by seeding a large number of keys with a prefix\n    populate_prefix = \"gc-init-\"\n    await async_client.execute_command(\"DEBUG\", \"POPULATE\", 100_000, populate_prefix, 50)\n\n    # b) Delete all keys with the populate prefix to leave the segments sparse\n    cursor = 0\n    while True:\n        cursor, keys = await async_client.scan(cursor, match=f\"{populate_prefix}*\", count=1000)\n        if keys:\n            await async_client.delete(*keys)\n        if cursor == 0:\n            break\n\n    assert await async_client.dbsize() == 0\n\n    # c) Run COMPACT-TABLE concurrently with Seeder so GC reclaims sparse segments\n    #    while new data is being written\n    key_target = 5_000\n    seeder = Seeder(key_target=key_target, data_size=100)\n\n    async def run_gc():\n        for _ in range(10):\n            await async_client.execute_command(\"DEBUG\", \"COMPACT-TABLE\", \"0.5\")\n            await asyncio.sleep(0.05)\n\n    await asyncio.gather(\n        seeder.run(async_client, target_deviation=0.05),\n        run_gc(),\n    )\n\n    # d) Capture a reference snapshot of the data seeder wrote, then run GC again\n    #    and verify the full dataset is unchanged (no corruption or partial loss).\n    capture_before = await Seeder.capture(async_client)\n    assert all(h != 0 for h in capture_before), \"Seeder should have written data for all types\"\n\n    for _ in range(5):\n        await async_client.execute_command(\"DEBUG\", \"COMPACT-TABLE\", \"0.5\")\n        await asyncio.sleep(0.05)\n\n    capture_after = await Seeder.capture(async_client)\n    assert (\n        capture_before == capture_after\n    ), \"Data should be identical after GC: seeder dataset must survive concurrent GC runs\"\n"
  },
  {
    "path": "tests/dragonfly/tiering_test.py",
    "content": "import async_timeout\nimport asyncio\nimport itertools\nimport logging\nimport pytest\nimport random\nimport redis.asyncio as aioredis\n\nfrom . import dfly_args\nfrom .seeder import DebugPopulateSeeder, Seeder as SeederV2\nfrom .utility import (\n    info_tick_timer,\n    wait_for_replicas_state,\n    check_all_replicas_finished,\n    LogMonitor,\n)\nfrom .instance import DflyInstance, DflyInstanceFactory\n\nBASIC_ARGS = {\n    \"proactor_threads\": 4,\n    \"tiered_prefix\": \"/tmp/tiered/backing\",\n    \"tiered_offload_threshold\": \"1.0\",  # offload immediately\n    \"tiered_storage_write_depth\": 1000,\n    \"maxmemory\": \"1G\",\n}\n\n\n@pytest.mark.large\n@pytest.mark.opt_only\n@dfly_args({**BASIC_ARGS, \"tiered_experimental_cooling\": \"false\"})\nasync def test_basic_memory_usage(async_client: aioredis.Redis):\n    \"\"\"\n    Loading 1GB of mixed size strings (256b-16kb) will keep most of them on disk and thus RAM remains almost unused\n    \"\"\"\n\n    seeder = DebugPopulateSeeder(\n        key_target=200_000, data_size=2048, variance=8, samples=100, types=[\"STRING\"]\n    )\n    await seeder.run(async_client)\n\n    # Wait for tiering stashes\n    async for info, breaker in info_tick_timer(async_client, section=\"TIERED\", timeout=60):\n        with breaker:\n            assert info[\"tiered_entries\"] > 195_000\n\n    info = await async_client.info(\"ALL\")\n    assert info[\"num_entries\"] == 200_000\n\n    assert (\n        info[\"tiered_allocated_bytes\"] > 195_000 * 2048 * 0.8\n    )  # 0.8 just to be sure because it fluctuates due to variance\n\n    assert info[\"used_memory\"] < 50 * 1024 * 1024\n    assert (\n        info[\"used_memory_rss\"] < 500 * 1024 * 1024\n    )  # the grown table itself takes up lots of space\n\n\n@pytest.mark.large\n@pytest.mark.exclude_epoll\n@pytest.mark.opt_only\n@dfly_args(\n    {\n        **BASIC_ARGS,\n    }\n)\nasync def test_mixed_append(async_client: aioredis.Redis):\n    \"\"\"\n    Issue conflicting mixed APPEND calls for a limited subset of keys with aggressive offloading in the background.\n    Make sure no appends were lost\n    \"\"\"\n\n    # Generate operations and shuffle them, key number `k` will have `k` append operations\n    key_range = list(range(100, 300))\n    ops = list(itertools.chain(*map(lambda k: itertools.repeat(k, k), key_range)))\n    random.shuffle(ops)\n\n    # Split list into n workers and run it\n    async def run(sub_ops):\n        p = async_client.pipeline(transaction=False)\n        for k in sub_ops:\n            p.append(f\"k{k}\", 10 * \"x\")\n        await p.execute()\n\n    n = 20\n    await asyncio.gather(*(run(ops[i::n]) for i in range(n)))\n\n    async for info, breaker in info_tick_timer(async_client, section=\"TIERED\"):\n        with breaker:\n            assert info[\"tiered_entries\"] > len(key_range) / 5\n\n    # Verify lengths\n    p = async_client.pipeline(transaction=False)\n    for k in key_range:\n        p.strlen(f\"k{k}\")\n    res = await p.execute()\n\n    assert res == [10 * k for k in key_range]\n\n\n@pytest.mark.large\n@pytest.mark.exclude_epoll\n@pytest.mark.opt_only\n@dfly_args(\n    {\n        \"proactor_threads\": 2,\n        \"tiered_prefix\": \"/tmp/tiered/backing_master\",\n        \"maxmemory\": \"512MB\",\n        \"cache_mode\": True,\n        \"tiered_offload_threshold\": \"0.6\",\n        \"tiered_upload_threshold\": \"0.2\",\n        \"tiered_storage_write_depth\": 1500,\n    }\n)\nasync def test_replication(\n    async_client: aioredis.Redis, df_server: DflyInstance, df_factory: DflyInstanceFactory\n):\n    \"\"\"\n    Test replication with tiered storage for strings\n    \"\"\"\n\n    # Fill master with values\n    seeder = DebugPopulateSeeder(key_target=400000, data_size=2000, samples=100, types=[\"STRING\"])\n    await seeder.run(async_client)\n\n    # Start replica\n    replica = df_factory.create(\n        proactor_threads=2,\n        cache_mode=True,\n        maxmemory=\"512MB\",\n        tiered_prefix=\"/tmp/tiered/backing_replica\",\n        tiered_offload_threshold=\"0.5\",\n        tiered_storage_write_depth=1500,\n    )\n    replica.start()\n    replica_client = replica.client()\n\n    # Get some keys and start tasks that append to values\n    keys = await async_client.keys()\n\n    async def fill_job():\n        for i, key in enumerate(keys):\n            await async_client.append(key, f\":{i}:\")\n            await asyncio.sleep(0.005)  # limit qps\n\n    fill_tasks = [asyncio.create_task(fill_job()) for _ in range(3)]\n\n    # Start replication\n    await replica_client.replicaof(\"localhost\", df_server.port)\n    logging.info(\"Waiting for replica to sync\")\n\n    # Wait for replication to finish\n    try:\n        async with async_timeout.timeout(500):\n            await wait_for_replicas_state(replica_client)\n    except asyncio.TimeoutError:\n        master_info = await async_client.info(\"ALL\")\n        replica_info = await replica_client.info(\"ALL\")\n        pytest.fail(\n            f\"Replica did not sync in time. \\nmaster: {master_info} \\n\\nreplica: {replica_info}\"\n        )\n\n    # cancel filler and wait for replica to catch up\n    for task in fill_tasks:\n        task.cancel()\n    await asyncio.gather(*fill_tasks, return_exceptions=True)\n    await check_all_replicas_finished([replica_client], async_client, timeout=500)\n\n    #\n    # Check that everything is in sync\n    hashes = await asyncio.gather(\n        *(SeederV2.capture(c, types=[\"STRING\"]) for c in [async_client, replica_client])\n    )\n\n    if len(set(hashes)) != 1:\n        for key in keys:\n            key_master = await async_client.get(key)\n            key_replica = await replica_client.get(key)\n            assert key_master == key_replica\n        assert False, \"Inconsistency detected, but key not determined\"\n\n\n@pytest.mark.large\n@pytest.mark.exclude_epoll\n@pytest.mark.opt_only\n@dfly_args(\n    {\n        **BASIC_ARGS,\n        \"proactor_threads\": 2,\n        \"maxmemory\": \"512MB\",\n        \"serialization_max_chunk_size\": 64000,\n        \"tiered_experimental_cooling\": False,\n    }\n)\nasync def test_tiered_replication_with_hashes(\n    async_client: aioredis.Redis, df_server: DflyInstance, df_factory: DflyInstanceFactory\n):\n    \"\"\"\n    Test replication from a tiered master with large string and hash data.\n    Verifies that the replica does not encounter internal RDB loading errors.\n    \"\"\"\n\n    # Fill master with data\n    await async_client.execute_command(\"DEBUG POPULATE 200000 key 3000\")\n    await async_client.execute_command(\"DEBUG POPULATE 200 hash 70 RAND TYPE HASH ELEMENTS 900\")\n\n    # Start replica\n    replica = df_factory.create(\n        proactor_threads=1,\n        dbfilename=\"\",\n    )\n    replica.start()\n    replica_client = replica.client()\n\n    # Monitor replica logs for RDB loading errors in the background\n    monitor = LogMonitor(replica, \"Internal error when loading RDB\")\n    monitor.start()\n\n    # Start replication\n    await replica_client.replicaof(\"localhost\", df_server.port)\n    logging.info(\"Waiting for replica to sync\")\n\n    # Wait for replication to finish or RDB error\n    try:\n        async with async_timeout.timeout(500):\n            wait_task = asyncio.create_task(wait_for_replicas_state(replica_client))\n            done, _ = await asyncio.wait(\n                [wait_task, monitor.task], return_when=asyncio.FIRST_COMPLETED\n            )\n            if monitor.task in done:\n                wait_task.cancel()\n                await asyncio.gather(wait_task, return_exceptions=True)\n                monitor.assert_no_match()\n            if wait_task in done:\n                wait_task.result()  # propagate exceptions\n    except asyncio.TimeoutError:\n        master_info = await async_client.info(\"ALL\")\n        replica_info = await replica_client.info(\"ALL\")\n        pytest.fail(\n            f\"Replica did not sync in time. \\nmaster: {master_info} \\n\\nreplica: {replica_info}\"\n        )\n    finally:\n        await monitor.stop()\n\n    await check_all_replicas_finished([replica_client], async_client, timeout=500)\n    monitor.assert_no_match()\n"
  },
  {
    "path": "tests/dragonfly/tls_conf_test.py",
    "content": "import pytest\nimport redis\nfrom .utility import *\nfrom .instance import DflyStartException\n\n\nasync def test_tls_no_auth(df_factory, with_tls_server_args):\n    # Needs some authentication\n    server = df_factory.create(**with_tls_server_args)\n    with pytest.raises(DflyStartException):\n        server.start()\n\n\nasync def test_tls_no_key(df_factory):\n    # Needs a private key and certificate.\n    server = df_factory.create(tls=None, requirepass=\"XXX\")\n    with pytest.raises(DflyStartException):\n        server.start()\n\n\nasync def test_tls_password(df_factory, with_tls_server_args, with_tls_ca_cert_args):\n    with df_factory.create(requirepass=\"XXX\", **with_tls_server_args) as server:\n        async with server.client(\n            ssl=True, password=\"XXX\", ssl_ca_certs=with_tls_ca_cert_args[\"ca_cert\"]\n        ) as client:\n            await client.ping()\n\n\nasync def test_tls_client_certs(\n    df_factory, with_ca_tls_server_args, with_tls_client_args, with_tls_ca_cert_args\n):\n    with df_factory.create(**with_ca_tls_server_args) as server:\n        async with server.client(\n            **with_tls_client_args, ssl_ca_certs=with_tls_ca_cert_args[\"ca_cert\"]\n        ) as client:\n            await client.ping()\n\n\nasync def test_client_tls_no_auth(df_factory):\n    server = df_factory.create(tls_replication=None)\n    with pytest.raises(DflyStartException):\n        server.start()\n\n\nasync def test_client_tls_password(df_factory):\n    with df_factory.create(tls_replication=None, masterauth=\"XXX\"):\n        pass\n\n\nasync def test_client_tls_cert(df_factory, with_tls_server_args):\n    key_args = with_tls_server_args.copy()\n    key_args.pop(\"tls\")\n    with df_factory.create(tls_replication=None, **key_args):\n        pass\n\n\nasync def test_config_enable_tls_with_ca_dir(\n    df_factory, with_ca_dir_tls_server_args, with_tls_client_args\n):\n    server_args, ca_cert = with_ca_dir_tls_server_args\n    server_args[\"tls\"] = \"true\"\n\n    with df_factory.create(**server_args) as server:\n        async with server.client(**with_tls_client_args, ssl_ca_certs=ca_cert) as client:\n            await client.execute_command(\"SET foo 44\")\n            res = await client.execute_command(\"GET foo\")\n            assert res == \"44\"\n\n\nasync def test_config_update_tls_certs(\n    df_factory, with_tls_server_args, with_tls_ca_cert_args, tmp_dir\n):\n    # Generate new certificates.\n    ca_key = os.path.join(tmp_dir, \"ca-key-new.pem\")\n    ca_cert = os.path.join(tmp_dir, \"ca-cert-new.pem\")\n    gen_ca_cert(ca_key, ca_cert)\n    tls_server_key = os.path.join(tmp_dir, \"df-key-new.pem\")\n    tls_server_req = os.path.join(tmp_dir, \"df-req-new.pem\")\n    tls_server_cert = os.path.join(tmp_dir, \"df-cert-new.pem\")\n    gen_certificate(\n        ca_key,\n        ca_cert,\n        tls_server_req,\n        tls_server_key,\n        tls_server_cert,\n    )\n\n    with df_factory.create(requirepass=\"XXX\", **with_tls_server_args) as server:\n        async with server.client(\n            ssl=True, password=\"XXX\", ssl_ca_certs=with_tls_ca_cert_args[\"ca_cert\"]\n        ) as client:\n            await client.config_set(\n                \"tls_key_file\",\n                tls_server_key,\n            )\n            await client.config_set(\"tls_cert_file\", tls_server_cert)\n            # Note must still set `tls true` to reload the TLS context.\n            await client.config_set(\"tls\", \"true\")\n\n            # The existing connection should still work.\n            await client.ping()\n\n        # Connecting with the old CA should fail.\n        with pytest.raises(redis.exceptions.ConnectionError):\n            async with server.client(\n                ssl=True, password=\"XXX\", ssl_ca_certs=with_tls_ca_cert_args[\"ca_cert\"]\n            ) as client:\n                await client.ping()\n\n        # Connecting with the new CA should succeed.\n        async with server.client(ssl=True, password=\"XXX\", ssl_ca_certs=ca_cert) as client:\n            await client.ping()\n\n\nasync def test_config_enable_tls(\n    df_factory, with_ca_tls_server_args, with_tls_client_args, with_tls_ca_cert_args\n):\n    with df_factory.create() as server:\n        async with server.client() as client:\n            await client.ping()\n\n            # Note the order here matters as flags are applied in order.\n            await client.config_set(\n                \"tls_key_file\",\n                with_ca_tls_server_args[\"tls_key_file\"],\n            )\n            await client.config_set(\n                \"tls_cert_file\",\n                with_ca_tls_server_args[\"tls_cert_file\"],\n            )\n            await client.config_set(\n                \"tls_ca_cert_file\",\n                with_ca_tls_server_args[\"tls_ca_cert_file\"],\n            )\n            await client.config_set(\n                \"tls\",\n                \"true\",\n            )\n\n            # The existing client should still be connected.\n            await client.ping()\n\n        # Connecting without TLS should fail.\n        with pytest.raises(redis.exceptions.ConnectionError):\n            async with server.client() as client_unauth:\n                await client_unauth.ping()\n\n        # Connecting with TLS should succeed.\n        async with server.client(\n            **with_tls_client_args, ssl_ca_certs=with_tls_ca_cert_args[\"ca_cert\"]\n        ) as client_tls:\n            await client_tls.ping()\n\n\nasync def test_config_disable_tls(\n    df_factory, with_ca_tls_server_args, with_tls_client_args, with_tls_ca_cert_args\n):\n    with df_factory.create(**with_ca_tls_server_args) as server:\n        async with server.client(\n            **with_tls_client_args, ssl_ca_certs=with_tls_ca_cert_args[\"ca_cert\"]\n        ) as client_tls:\n            await client_tls.config_set(\"tls\", \"false\")\n\n        # Connecting without TLS should succeed.\n        async with server.client() as client_unauth:\n            await client_unauth.ping()\n"
  },
  {
    "path": "tests/dragonfly/utility.py",
    "content": "import asyncio\nimport functools\nimport itertools\nimport logging\nimport sys\nimport wrapt\nfrom redis import asyncio as aioredis\nimport redis\nimport random\nimport string\nimport time\nimport difflib\nimport json\nimport subprocess\nimport pytest\nimport os\nimport fakeredis\nfrom typing import Iterable, Union\nfrom enum import Enum\nimport re\n\n\ndef tmp_file_name():\n    return \"\".join(random.choices(string.ascii_letters, k=10))\n\n\ndef chunked(n, iterable):\n    \"\"\"Transform iterable into iterator of chunks of size n\"\"\"\n    it = iter(iterable)\n    while True:\n        chunk = tuple(itertools.islice(it, n))\n        if not chunk:\n            return\n        yield chunk\n\n\ndef eprint(*args, **kwargs):\n    \"\"\"Print to stderr\"\"\"\n    print(*args, file=sys.stderr, **kwargs)\n\n\ndef gen_test_data(n, start=0, seed=None):\n    for i in range(start, n):\n        yield \"k-\" + str(i), \"v-\" + str(i) + (\"-\" + str(seed) if seed else \"\")\n\n\ndef batch_fill_data(client, gen, batch_size=100):\n    for group in chunked(batch_size, gen):\n        client.mset({k: v for k, v, in group})\n\n\nasync def tick_timer(func, timeout=5, step=0.1):\n    \"\"\"\n    Async generator with automatic break when all asserts pass\n\n    for object, breaker in tick_timer():\n        with breaker:\n            assert conditions on object\n\n    If the generator times out, the last failed assert is raised\n    \"\"\"\n\n    class ticker_breaker:\n        def __init__(self):\n            self.exc = None\n            self.entered = False\n\n        def __enter__(self):\n            self.entered = True\n\n        def __exit__(self, exc_type, exc_value, trace):\n            if exc_value:\n                self.exc = exc_value\n                return True\n\n    last_error = None\n    start = time.time()\n    while time.time() - start < timeout:\n        breaker = ticker_breaker()\n        yield (await func(), breaker)\n        if breaker.entered and not breaker.exc:\n            return\n\n        last_error = breaker.exc\n        await asyncio.sleep(step)\n\n    if last_error:\n        raise TimeoutError(\"Timed out!\") from last_error\n    raise TimeoutError(\"Timed out!\")\n\n\nasync def info_tick_timer(client: aioredis.Redis, section=None, **kwargs):\n    async for x in tick_timer(lambda: client.info(section), **kwargs):\n        yield x\n\n\n# wait for a process becomes \"responsive\":\n# for a master - waits that it finishes loading a snapshot if it's budy doing so,\n# and for replica it waits until it finishes its full sync stage and reaches the stable sync state.\nasync def wait_available_async(\n    clients: Union[aioredis.Redis, Iterable[aioredis.Redis]], timeout=120\n):\n    if not isinstance(clients, aioredis.Redis):\n        # Syntactic sugar to seamlessly handle an array of clients.\n        return await asyncio.gather(*(wait_available_async(c) for c in clients))\n\n    \"\"\"Block until instance exits loading phase\"\"\"\n    # First we make sure that ping passes\n    start = time.time()\n    while (time.time() - start) < timeout:\n        try:\n            await clients.ping()\n            break\n        except aioredis.BusyLoadingError as e:\n            assert \"Dragonfly is loading the dataset in memory\" in str(e)\n    timeout -= time.time() - start\n    if timeout <= 0:\n        raise TimeoutError(\"Timed out!\")\n\n    # Secondly for replicas, we make sure they reached stable state replicaton\n    async for info, breaker in info_tick_timer(clients, \"REPLICATION\", timeout=timeout):\n        with breaker:\n            assert info[\"role\"] == \"master\" or \"slave_repl_offset\" in info, info\n\n\nclass SizeChange(Enum):\n    SHRINK = 0\n    NO_CHANGE = 1\n    GROW = 2\n\n\nclass ValueType(Enum):\n    STRING = 0\n    LIST = 1\n    SET = 2\n    HSET = 3\n    ZSET = 4\n    JSON = 5\n\n\nclass CommandGenerator:\n    \"\"\"Class for generating complex command sequences\"\"\"\n\n    def __init__(\n        self,\n        target_keys,\n        val_size,\n        huge_val_count,\n        huge_val_size,\n        batch_size,\n        max_multikey,\n        unsupported_types=[],\n    ):\n        self.key_cnt_target = target_keys\n        self.val_size = val_size\n        self.batch_size = min(batch_size, target_keys)\n        self.max_multikey = max_multikey\n        self.unsupported_types = unsupported_types\n\n        # Generate sorted list of random samples in target_keys range\n        self.huge_val_sample = sorted(random.sample(range(target_keys), huge_val_count))\n        self.huge_val_size = huge_val_size\n\n        # Key management\n        self.key_sets = [set() for _ in ValueType]\n        self.key_cursor = 0\n        self.key_cnt = 0\n\n        # Grow factors\n        self.diff_speed = 5\n        self.base_diff_prob = 0.2\n        self.min_diff_prob = 0.1\n\n    def keys(self):\n        return itertools.chain(*self.key_sets)\n\n    def keys_and_types(self):\n        return ((k, t) for t in list(ValueType) for k in self.set_for_type(t))\n\n    def set_for_type(self, t: ValueType):\n        return self.key_sets[t.value]\n\n    def add_key(self, t: ValueType):\n        \"\"\"Add new key of type t\"\"\"\n        k, self.key_cursor = self.key_cursor, self.key_cursor + 1\n        self.set_for_type(t).add(k)\n        return k\n\n    def random_type(self):\n        return random.choice([t for t in ValueType if (t not in self.unsupported_types)])\n\n    def randomize_nonempty_set(self):\n        \"\"\"Return random non-empty set and its type\"\"\"\n        if not any(self.key_sets):\n            return None, None\n\n        t = self.random_type()\n        s = self.set_for_type(t)\n\n        if len(s) == 0:\n            return self.randomize_nonempty_set()\n        else:\n            return s, t\n\n    def randomize_key(self, t=None, pop=False):\n        \"\"\"Return random key and its type\"\"\"\n        if t is None:\n            s, t = self.randomize_nonempty_set()\n        else:\n            s = self.set_for_type(t)\n\n        if s is None or len(s) == 0:\n            return None, None\n\n        k = s.pop()\n        if not pop:\n            s.add(k)\n\n        return k, t\n\n    def generate_val(self, t: ValueType, idx):\n        \"\"\"Generate filler value of configured size for type t\"\"\"\n\n        # If current key count matches huge val sample than we will create one element with huge val size.\n        generate_huge_val = False\n        if len(self.huge_val_sample) and self.huge_val_sample[0] == (self.key_cnt + idx):\n            generate_huge_val = True\n            # Remove this sample from list\n            self.huge_val_sample.pop(0)\n\n        def rand_str(k=3, s=\"\"):\n            # Use small k value to reduce mem usage and increase number of ops\n            return s.join(random.choices(string.ascii_letters, k=k))\n\n        if t == ValueType.STRING:\n            # Random string for MSET\n            return (rand_str(self.huge_val_size if generate_huge_val else self.val_size),)\n        elif t == ValueType.LIST:\n            # Random sequence k-letter elements for LPUSH\n            list_size = self.val_size // 4\n            element_size = (\n                self.huge_val_size // list_size if generate_huge_val else self.val_size // list_size\n            )\n            return tuple(rand_str(element_size) for i in range(list_size))\n        elif t == ValueType.SET:\n            # Random sequence of k-letter elements for SADD\n            set_size = self.val_size // 4\n            element_size = (\n                self.huge_val_size // set_size if generate_huge_val else self.val_size // set_size\n            )\n            return tuple(rand_str(element_size) for i in range(set_size))\n        elif t == ValueType.HSET:\n            # Random sequence of k-letter keys + int and two start values for HSET\n            hset_size = self.val_size // 5\n            element_size = (\n                self.huge_val_size // hset_size if generate_huge_val else self.val_size // hset_size\n            )\n            elements = (\n                (\n                    rand_str(element_size),\n                    random.randint(0, self.val_size),\n                )\n                for i in range(hset_size)\n            )\n            return (\"v0\", 0, \"v1\", 0) + tuple(itertools.chain(*elements))\n        elif t == ValueType.ZSET:\n            # Random sequnce of k-letter members and int score for ZADD\n            # The length of the sequence will vary between val_size/4 and 130.\n            # This ensures that we test both the ZSET implementation with listpack and the our custom BPtree.\n            value_sizes = [self.val_size // 4, 130]\n            probabilities = [8, 1]\n            zset_size = random.choices(value_sizes, probabilities)[0]\n            element_size = (\n                self.huge_val_size // zset_size if generate_huge_val else self.val_size // zset_size\n            )\n            elements = (\n                (\n                    random.randint(0, self.val_size),\n                    rand_str(element_size),\n                )\n                for i in range(zset_size)\n            )\n            return tuple(itertools.chain(*elements))\n        elif t == ValueType.JSON:\n            # Json object with keys:\n            # - arr (array of random strings)\n            # - ints (array of objects {i:random integer})\n            # - i (random integer)\n            json_size = self.val_size // 6\n            element_size = (\n                self.huge_val_size // json_size if generate_huge_val else self.val_size // json_size\n            )\n            ints = [{\"i\": random.randint(0, 100)} for i in range(json_size)]\n            strs = [rand_str(element_size) for i in range(json_size)]\n            return \"$\", json.dumps({\"arr\": strs, \"ints\": ints, \"i\": random.randint(0, 100)})\n        else:\n            assert False, \"Invalid ValueType\"\n\n    def gen_shrink_cmd(self):\n        \"\"\"\n        Generate command that shrinks data: DEL of random keys or almost immediate <=50ms PEXPIRE.\n        \"\"\"\n        if random.random() < 0.3:\n            key, _ = self.randomize_key(pop=True)\n            if key == None:\n                return None, 0\n            return (\"PEXPIRE\", f\"k{key}\", f\"{random.randint(0, 50)}\"), -1\n        else:\n            keys_gen = (\n                self.randomize_key(pop=True) for _ in range(random.randint(1, self.max_multikey))\n            )\n            keys = [f\"k{k}\" for k, _ in keys_gen if k is not None]\n\n            if len(keys) == 0:\n                return None, 0\n            return (\"DEL\", *keys), -len(keys)\n\n    UPDATE_ACTIONS = [\n        (\"APPEND {k} {val}\", ValueType.STRING),\n        (\"SETRANGE {k} 10 {val}\", ValueType.STRING),\n        (\"LPUSH {k} {val}\", ValueType.LIST),\n        (\"LPOP {k}\", ValueType.LIST),\n        (\"SADD {k} {val}\", ValueType.SET),\n        # (\"SPOP {k}\", ValueType.SET),  # Disabled because it is inconsistent\n        (\"HSETNX {k} v0 {val}\", ValueType.HSET),\n        (\"HINCRBY {k} v1 1\", ValueType.HSET),\n        (\"ZPOPMIN {k} 1\", ValueType.ZSET),\n        (\"ZADD {k} 0 {val}\", ValueType.ZSET),\n        (\"JSON.NUMINCRBY {k} $..i 1\", ValueType.JSON),\n        (\"JSON.ARRPOP {k} $.arr\", ValueType.JSON),\n        ('JSON.ARRAPPEND {k} $.arr \"{val}\"', ValueType.JSON),\n    ]\n\n    def gen_update_cmd(self):\n        \"\"\"\n        Generate command that makes no change to keyset: random of UPDATE_ACTIONS.\n        \"\"\"\n        cmd, t = random.choice(self.UPDATE_ACTIONS)\n        k, _ = self.randomize_key(t)\n        val = \"\".join(random.choices(string.ascii_letters, k=3))\n        return cmd.format(k=f\"k{k}\", val=val).split() if k is not None else None, 0\n\n    GROW_ACTINONS = {\n        ValueType.STRING: \"MSET\",\n        ValueType.LIST: \"LPUSH\",\n        ValueType.SET: \"SADD\",\n        ValueType.HSET: \"HMSET\",\n        ValueType.ZSET: \"ZADD\",\n        ValueType.JSON: \"JSON.MSET\",\n    }\n\n    def gen_grow_cmd(self):\n        \"\"\"\n        Generate command that grows keyset: Initialize key of random type with filler value.\n        \"\"\"\n        # TODO: Implement COPY in Dragonfly.\n        t = self.random_type()\n        if t in [ValueType.STRING, ValueType.JSON]:\n            count = random.randint(1, self.max_multikey)\n        else:\n            count = 1\n\n        keys = (self.add_key(t) for _ in range(count))\n        payload = itertools.chain(\n            *((f\"k{k}\",) + self.generate_val(t, idx) for idx, k in enumerate(keys))\n        )\n        filtered_payload = filter(lambda p: p is not None, payload)\n\n        return (self.GROW_ACTINONS[t],) + tuple(filtered_payload), count\n\n    def make(self, action):\n        \"\"\"Create command for action and return it together with number of keys added (removed)\"\"\"\n        if action == SizeChange.SHRINK:\n            return self.gen_shrink_cmd()\n        elif action == SizeChange.NO_CHANGE:\n            return self.gen_update_cmd()\n        else:\n            return self.gen_grow_cmd()\n\n    def reset(self):\n        self.key_sets = [set() for _ in ValueType]\n        self.key_cursor = 0\n        self.key_cnt = 0\n\n    def size_change_probs(self):\n        \"\"\"Calculate probabilities of size change actions\"\"\"\n        # Relative distance to key target\n        dist = (self.key_cnt_target - self.key_cnt) / self.key_cnt_target\n        # Shrink has a roughly twice as large expected number of changed keys than grow\n        return [\n            max(self.base_diff_prob - self.diff_speed * dist, self.min_diff_prob),\n            15.0,\n            max(self.base_diff_prob + 2 * self.diff_speed * dist, self.min_diff_prob),\n        ]\n\n    def generate(self):\n        \"\"\"Generate next batch of commands, return it and ratio of current keys to target\"\"\"\n        changes = []\n        cmds = []\n        while len(cmds) < self.batch_size:\n            # Re-calculating changes in small groups\n            if len(changes) == 0:\n                changes = random.choices(list(SizeChange), weights=self.size_change_probs(), k=20)\n\n            cmd, delta = self.make(changes.pop())\n            if cmd is not None:\n                cmds.append(cmd)\n                self.key_cnt += delta\n        return cmds, self.key_cnt / self.key_cnt_target\n\n\nclass DataCapture:\n    \"\"\"\n    Captured state of single database.\n    \"\"\"\n\n    def __init__(self, entries):\n        self.entries = entries\n\n    def compare(self, other):\n        if self.entries == other.entries:\n            return True\n\n        self._print_diff(other)\n        return False\n\n    def _print_diff(self, other):\n        eprint(\"=== DIFF ===\")\n        printed = 0\n        diff = difflib.ndiff(self.entries, other.entries)\n        for line in diff:\n            if line.startswith(\" \"):\n                continue\n            eprint(line)\n            if printed >= 20:\n                eprint(\"... omitted ...\")\n                break\n            printed += 1\n        eprint(\"=== END DIFF ===\")\n\n\nclass DflySeeder:\n    \"\"\"\n    Data seeder with support for multiple types and commands.\n\n    Usage:\n\n    Create a seeder with target number of keys (100k) of specified size (200) and work on 5 dbs,\n\n        seeder = new DflySeeder(keys=100_000, value_size=200, dbcount=5)\n\n    Stop when we are in 5% of target number of keys (i.e. above 95_000)\n    Because its probabilistic we might never reach exactly 100_000.\n\n        await seeder.run(target_deviation=0.05)\n\n    Run 3000 commands in stable state, crate a capture and compare it to\n    replica on port 1112\n\n        await seeder.run(target_op=3000)\n        capture = await seeder.capture()\n        assert await seeder.compare(capture, port=1112)\n    \"\"\"\n\n    def __init__(\n        self,\n        port=6379,\n        keys=1000,\n        val_size=50,\n        huge_value_count=5,\n        huge_value_size=100000,\n        batch_size=100,\n        max_multikey=5,\n        dbcount=1,\n        multi_transaction_probability=0.3,\n        log_file=None,\n        unsupported_types=[],\n        stop_on_failure=True,\n        cluster_mode=False,\n        mirror_to_fake_redis=False,\n        pipeline=True,\n    ):\n        if cluster_mode:\n            max_multikey = 1\n            multi_transaction_probability = 0\n            unsupported_types.append(ValueType.JSON)  # Cluster aio client doesn't support JSON\n\n        self.cluster_mode = cluster_mode\n        self.gen = CommandGenerator(\n            keys,\n            val_size,\n            huge_value_count,\n            huge_value_size,\n            batch_size,\n            max_multikey,\n            unsupported_types,\n        )\n        self.port = port\n        self.dbcount = dbcount\n        self.multi_transaction_probability = multi_transaction_probability\n        self.stop_flag = False\n        self.stop_on_failure = stop_on_failure\n        self.fake_redis = None\n        self.use_pipeline = pipeline\n\n        self.log_file = log_file\n        if self.log_file is not None:\n            open(self.log_file, \"w\").close()\n\n        if mirror_to_fake_redis:\n            logging.debug(\"Creating FakeRedis instance\")\n            self.fake_redis = fakeredis.FakeAsyncRedis()\n            self.use_pipeline = False\n\n    async def run(self, target_ops=None, target_deviation=None):\n        \"\"\"\n        Run a seeding cycle on all dbs either until stop(), a fixed number of commands (target_ops)\n        or until reaching an allowed deviation from the target number of keys (target_deviation)\n        \"\"\"\n        logging.debug(f\"Running ops:{target_ops} deviation:{target_deviation}\")\n        self.stop_flag = False\n        queues = [asyncio.Queue(maxsize=3) for _ in range(self.dbcount)]\n        producer = asyncio.create_task(\n            self._generator_task(queues, target_ops=target_ops, target_deviation=target_deviation)\n        )\n        consumers = [\n            asyncio.create_task(self._executor_task(i, queue)) for i, queue in enumerate(queues)\n        ]\n\n        time_start = time.time()\n\n        cmdcount = await producer\n        for consumer in consumers:\n            await consumer\n\n        took = time.time() - time_start\n        qps = round(cmdcount * self.dbcount / took, 2)\n        logging.debug(f\"Filling took: {took}, QPS: {qps}\")\n\n    def stop(self):\n        \"\"\"Stop all invocations to run\"\"\"\n        self.stop_flag = True\n\n    def reset(self):\n        \"\"\"Reset internal state. Needs to be called after flush or restart\"\"\"\n        self.gen.reset()\n\n    async def capture_fake_redis(self):\n        keys = sorted(list(self.gen.keys_and_types()))\n        # TODO: support multiple databases\n        assert self.dbcount == 1\n        assert self.fake_redis != None\n        capture = DataCapture(await self._capture_entries(self.fake_redis, keys))\n        return [capture]\n\n    async def capture(self, port=None):\n        \"\"\"Create DataCapture for all dbs\"\"\"\n\n        if port is None:\n            port = self.port\n        logging.debug(f\"Starting capture from {port=}\")\n        keys = sorted(list(self.gen.keys_and_types()))\n\n        captures = await asyncio.gather(\n            *(self._capture_db(port=port, target_db=db, keys=keys) for db in range(self.dbcount))\n        )\n        return captures\n\n    async def compare(self, initial_captures, port=6379):\n        \"\"\"Compare data capture with all dbs of instance and return True if all dbs are correct\"\"\"\n        print(f\"comparing capture to {port}\")\n        target_captures = await self.capture(port=port)\n\n        for db, target_capture, initial_capture in zip(\n            range(self.dbcount), target_captures, initial_captures\n        ):\n            print(f\"comparing capture to {port}, db: {db}\")\n            if not initial_capture.compare(target_capture):\n                eprint(f\">>> Inconsistent data on port {port}, db {db}\")\n                return False\n        return True\n\n    def target(self, key_cnt):\n        self.gen.key_cnt_target = key_cnt\n\n    def _make_client(self, **kwargs):\n        if self.cluster_mode:\n            return aioredis.RedisCluster(host=\"127.0.0.1\", **kwargs)\n        else:\n            return aioredis.Redis(**kwargs)\n\n    async def _close_client(self, client):\n        if not self.cluster_mode:\n            await client.connection_pool.disconnect()\n        await client.aclose()\n\n    async def _capture_db(self, port, target_db, keys):\n        client = self._make_client(port=port, db=target_db)\n        capture = DataCapture(await self._capture_entries(client, keys))\n\n        await self._close_client(client)\n\n        return capture\n\n    async def _generator_task(self, queues, target_ops=None, target_deviation=None):\n        cpu_time = 0\n        submitted = 0\n        batches = 0\n        deviation = 0.0\n\n        file = None\n        if self.log_file:\n            file = open(self.log_file, \"a\")\n\n        def should_run():\n            if self.stop_flag:\n                return False\n            if target_ops is not None and submitted >= target_ops:\n                return False\n            if target_deviation is not None and (\n                deviation > 1 or abs(1 - deviation) < target_deviation\n            ):\n                return False\n            return True\n\n        def stringify_cmd(cmd):\n            if isinstance(cmd, tuple):\n                return \" \".join(str(c) for c in cmd)\n            else:\n                return str(cmd)\n\n        while should_run():\n            start_time = time.time()\n            blob, deviation = self.gen.generate()\n            is_multi_transaction = random.random() < self.multi_transaction_probability\n            tx_data = (blob, is_multi_transaction)\n            cpu_time += time.time() - start_time\n\n            await asyncio.gather(*(q.put(tx_data) for q in queues))\n            submitted += len(blob)\n            batches += 1\n\n            if file is not None:\n                pattern = \"MULTI\\n{}\\nEXEC\\n\" if is_multi_transaction else \"{}\\n\"\n                file.write(pattern.format(\"\\n\".join(stringify_cmd(cmd) for cmd in blob)))\n\n            print(\".\", end=\"\", flush=True)\n            await asyncio.sleep(0.0)\n\n        print(\"\\ncpu time\", cpu_time, \"batches\", batches, \"commands\", submitted)\n\n        await asyncio.gather(*(q.put(None) for q in queues))\n        for q in queues:\n            await q.join()\n\n        if file is not None:\n            file.flush()\n\n        return submitted\n\n    async def _executor_task(self, db, queue):\n        client = self._make_client(port=self.port, db=db)\n\n        while True:\n            tx_data = await queue.get()\n            if tx_data is None:\n                queue.task_done()\n                break\n\n            try:\n                if self.use_pipeline:\n                    pipe = client.pipeline(transaction=tx_data[1])\n                    for cmd in tx_data[0]:\n                        pipe.execute_command(*cmd)\n                    await pipe.execute()\n                else:\n                    for cmd in tx_data[0]:\n                        dfly_resp = await client.execute_command(*cmd)\n                        # To mirror consistently to Fake Redis we must only send to it successful\n                        # commands. We can't use pipes because they might succeed partially.\n                        if self.fake_redis is not None:\n                            fake_resp = await self.fake_redis.execute_command(*cmd)\n                            assert dfly_resp == fake_resp\n            except (redis.exceptions.ConnectionError, redis.exceptions.ResponseError) as e:\n                if self.stop_on_failure:\n                    await self._close_client(client)\n                    raise SystemExit(e)\n            except Exception as e:\n                await self._close_client(client)\n                raise SystemExit(e)\n            queue.task_done()\n\n        await self._close_client(client)\n\n    CAPTURE_COMMANDS = {\n        ValueType.STRING: lambda pipe, k: pipe.get(k),\n        ValueType.LIST: lambda pipe, k: pipe.lrange(k, 0, -1),\n        ValueType.SET: lambda pipe, k: pipe.smembers(k),\n        ValueType.HSET: lambda pipe, k: pipe.hgetall(k),\n        ValueType.ZSET: lambda pipe, k: pipe.zrange(k, start=0, end=-1, withscores=True),\n        ValueType.JSON: lambda pipe, k: pipe.execute_command(\"JSON.GET\", k, \"$\"),\n    }\n\n    CAPTURE_EXTRACTORS = {\n        ValueType.STRING: lambda res, tostr: (tostr(res),),\n        ValueType.LIST: lambda res, tostr: (tostr(s) for s in res),\n        ValueType.SET: lambda res, tostr: sorted(tostr(s) for s in res),\n        ValueType.HSET: lambda res, tostr: sorted(\n            tostr(k) + \"=\" + tostr(v) for k, v in res.items()\n        ),\n        ValueType.ZSET: lambda res, tostr: (tostr(s) + \"-\" + str(f) for (s, f) in res),\n        ValueType.JSON: lambda res, tostr: (tostr(res),),\n    }\n\n    async def _capture_entries(self, client, keys):\n        def tostr(b):\n            return b.decode(\"utf-8\") if isinstance(b, bytes) else str(b)\n\n        entries = []\n        for group in chunked(self.gen.batch_size * 2, keys):\n            pipe = client.pipeline(transaction=False)\n            for k, t in group:\n                self.CAPTURE_COMMANDS[t](pipe, f\"k{k}\")\n\n            results = await pipe.execute()\n            for (k, t), res in zip(group, results):\n                out = f\"{t.name} k{k}: \" + \" \".join(self.CAPTURE_EXTRACTORS[t](res, tostr))\n                entries.append(out)\n\n        return entries\n\n\nclass DflySeederFactory:\n    \"\"\"\n    Used to pass params to a DflySeeder.\n    \"\"\"\n\n    def __init__(self, log_file=None):\n        self.log_file = log_file\n\n    def __repr__(self) -> str:\n        return f\"DflySeederFactory(log_file={self.log_file})\"\n\n    def create(self, **kwargs):\n        return DflySeeder(log_file=self.log_file, **kwargs)\n\n\ndef gen_ca_cert(ca_key_path, ca_cert_path):\n    # We first need to generate the tls certificates to be used by the server\n\n    # Generate CA (certificate authority) key and self-signed certificate\n    # In production, CA should be generated by a third party authority\n    # Expires in one day and is not encrtypted (-nodes)\n    # X.509 format for the key\n    step = rf\"openssl req -x509 -newkey rsa:4096 -days 1 -nodes -keyout {ca_key_path} -out {ca_cert_path} \"\n    step += '-subj \"/C=GR/ST=SKG/L=Thessaloniki/O=KK/OU=AcmeStudios/CN=localhost/emailAddress=acme@gmail.com\"'\n    subprocess.run(step, shell=True)\n\n\ndef gen_certificate(\n    ca_key_path, ca_certificate_path, certificate_request_path, private_key_path, certificate_path\n):\n    # Generate Dragonfly's private key and certificate signing request (CSR)\n    step1 = rf\"openssl req -newkey rsa:4096 -nodes -keyout {private_key_path} -out {certificate_request_path} \"\n    step1 += '-subj \"/C=GR/ST=SKG/L=Thessaloniki/O=KK/OU=Comp/CN=localhost/emailAddress=does_not_exist@gmail.com\"'\n    subprocess.run(step1, shell=True)\n\n    # Use CA's private key to sign dragonfly's CSR and get back the signed certificate\n    step2 = rf\"openssl x509 -req -in {certificate_request_path} -days 1 -CA {ca_certificate_path} -CAkey {ca_key_path} -CAcreateserial -out {certificate_path}\"\n    subprocess.run(step2, shell=True)\n\n\nclass EnvironCntx:\n    def __init__(self, **kwargs):\n        self.updates = kwargs\n        self.undo = {}\n\n    def __enter__(self):\n        for k, v in self.updates.items():\n            if k in os.environ:\n                self.undo[k] = os.environ[k]\n            os.environ[k] = v\n\n    def __exit__(self, exc_type, exc_value, exc_traceback):\n        for k, v in self.updates.items():\n            if k in self.undo:\n                os.environ[k] = self.undo[k]\n            else:\n                del os.environ[k]\n\n\nasync def is_saving(c_client: aioredis.Redis):\n    return \"saving:1\" in (await c_client.execute_command(\"INFO PERSISTENCE\"))\n\n\ndef assert_eventually(wrapped=None, *, times=100, timeout=None):\n    if wrapped is None:\n        return functools.partial(assert_eventually, times=times, timeout=timeout)\n\n    @wrapt.decorator\n    async def wrapper(wrapped, instance, args, kwargs):\n        max_attempts = times\n        if timeout is not None:  # If timeout is set, we will ignore times and use timeout.\n            start = time.time()\n            max_attempts = 1 << 32  # Effectively infinite\n\n        for attempt in range(max_attempts):\n            try:\n                result = await wrapped(*args, **kwargs)\n                return result\n            except AssertionError:\n                if timeout is not None and (time.time() - start) > timeout:\n                    raise\n                if attempt == max_attempts - 1:\n                    raise\n                await asyncio.sleep(0.1)\n\n    return wrapper(wrapped)\n\n\ndef skip_if_not_in_github(reason: str = \"Redis server not found\"):\n    if os.getenv(\"GITHUB_ACTIONS\") == None:\n        pytest.skip(reason)\n\n\nclass ExpirySeeder:\n    def __init__(self, stop_on_failure=True, timeout=3):\n        self.stop_flag = False\n        self.i = 0\n        self.batch_size = 200\n        self.stop_on_failure = stop_on_failure\n        self.timeout = timeout\n\n    async def run(self, client):\n        while not self.stop_flag:\n            try:\n                pipeline = client.pipeline(transaction=False)\n                for i in range(0, self.batch_size):\n                    pipeline.execute_command(f\"SET tmp{self.i} bar{self.i} EX {self.timeout}\")\n                    self.i = self.i + 1\n                await pipeline.execute()\n            except (redis.exceptions.ConnectionError, redis.exceptions.ResponseError) as e:\n                if self.stop_on_failure:\n                    return\n                else:\n                    raise SystemExit(e)\n\n    async def wait_until_n_inserts(self, count):\n        while not self.i > count:\n            await asyncio.sleep(0.5)\n\n    def stop(self):\n        self.stop_flag = True\n\n\ndef extract_int_after_prefix(prefix, line):\n    match = re.search(prefix + \"(\\\\d+)\", line)\n    assert match\n    return int(match.group(1))\n\n\nasync def wait_for_replicas_state(*clients, state=\"online\", node_role=\"slave\", timeout=0.05):\n    \"\"\"Wait until all clients (replicas) reach passed state\"\"\"\n    while len(clients) > 0:\n        await asyncio.sleep(timeout)\n        roles = await asyncio.gather(*(c.role() for c in clients))\n        clients = [c for c, role in zip(clients, roles) if role[0] != node_role or role[3] != state]\n\n\nasync def check_replica_finished_exec(c_replica: aioredis.Redis, m_offset):\n    role = await c_replica.role()\n    if role[0] != \"slave\" or role[3] != \"online\":\n        return False\n    syncid, r_offset = await c_replica.execute_command(\"DEBUG REPLICA OFFSET\")\n\n    logging.debug(f\"  offset {syncid} {r_offset} {m_offset}\")\n    return r_offset == m_offset\n\n\nasync def check_all_replicas_finished(c_replicas, c_master, timeout=20):\n    logging.debug(\"Waiting for replicas to finish\")\n\n    waiting_for = list(c_replicas)\n    start = time.time()\n    while (time.time() - start) < timeout:\n        if not waiting_for:\n            logging.debug(\"All replicas finished after %s seconds\", time.time() - start)\n            return\n        await asyncio.sleep(0.2)\n        m_offset = await c_master.execute_command(\"DFLY REPLICAOFFSET\")\n        finished_list = await asyncio.gather(\n            *(check_replica_finished_exec(c, m_offset) for c in waiting_for)\n        )\n\n        # Remove clients that finished from waiting list\n        waiting_for = [c for (c, finished) in zip(waiting_for, finished_list) if not finished]\n\n    first_r: aioredis.Redis = waiting_for[0]\n    logging.error(\"Replica not finished, role %s\", await first_r.role())\n    raise RuntimeError(\"Not all replicas finished in time!\")\n\n\nclass LogMonitor:\n    \"\"\"\n    Monitors an instance's INFO log files for a specific pattern in the background.\n\n    Usage:\n        monitor = LogMonitor(instance, \"Internal error when loading RDB\")\n        monitor.start()\n        # ... do work ...\n        await monitor.stop()       # stops polling\n        monitor.assert_no_match()  # raises AssertionError if pattern was found\n\n    Can also be used with asyncio.wait to fail fast:\n        done, _ = await asyncio.wait(\n            [work_task, monitor.task], return_when=asyncio.FIRST_COMPLETED\n        )\n        if monitor.task in done:\n            monitor.assert_no_match()\n    \"\"\"\n\n    def __init__(self, instance, pattern: str, poll_interval: float = 0.5):\n        self.instance = instance\n        self.pattern = pattern\n        self.poll_interval = poll_interval\n        self.matched_lines = []\n        self._stop_event = asyncio.Event()\n        self.task = None\n\n    def start(self):\n        self.task = asyncio.create_task(self._poll())\n\n    async def _poll(self):\n        file_positions = {}\n        while not self._stop_event.is_set():\n            for log_path in self.instance.log_files:\n                if \"INFO\" not in log_path:\n                    continue\n                pos = file_positions.get(log_path, 0)\n                try:\n                    with open(log_path, \"r\") as f:\n                        f.seek(pos)\n                        new_content = f.read()\n                        file_positions[log_path] = f.tell()\n                except FileNotFoundError:\n                    continue\n                for line in new_content.splitlines():\n                    if self.pattern in line:\n                        self.matched_lines.append(line.strip())\n                        self._stop_event.set()\n                        return\n            await asyncio.sleep(self.poll_interval)\n\n    async def stop(self):\n        self._stop_event.set()\n        if self.task:\n            self.task.cancel()\n            try:\n                await self.task\n            except asyncio.CancelledError:\n                pass\n\n    def assert_no_match(self):\n        assert not self.matched_lines, f\"Log pattern '{self.pattern}' found:\\n\" + \"\\n\".join(\n            self.matched_lines\n        )\n"
  },
  {
    "path": "tests/dragonfly/valkey_search/README.md",
    "content": "# Valkey-Search Integration Tests for Dragonfly\r\n\r\nIntegration tests from [valkey-search](https://github.com/valkey-io/valkey-search) project, adapted to run on Dragonfly without modifying the original test code.\r\n\r\n## Prerequisites\r\n\r\n1. Build Dragonfly\r\n\r\n2. Install Python dependencies:\r\n   ```bash\r\n   pip install -r tests/dragonfly/requirements.txt\r\n   ```\r\n\r\n## Setup\r\n\r\n1. Sync tests from valkey-search:\r\n   ```bash\r\n   cd tests/dragonfly/valkey_search\r\n   ./sync-valkey-search-tests.sh\r\n   ```\r\n\r\n2. Set environment variables:\r\n   ```bash\r\n   export DRAGONFLY_PATH=\"/path/to/dragonfly/build-dbg/dragonfly\"\r\n   export ROOT_DIR=\"/path/to/dragonfly/tests/dragonfly/valkey_search\"\r\n   ```\r\n\r\n## Running Tests\r\n\r\n```bash\r\n# All tests\r\npytest tests/dragonfly/valkey_search/integration/ -v\r\n\r\n# Specific test file\r\npytest tests/dragonfly/valkey_search/integration/test_ft_create.py -v\r\n\r\n# Specific test\r\npytest tests/dragonfly/valkey_search/integration/test_ft_create.py::TestSearchFTCreateCMD::test_ft_create_fails_on_replica_cmd -v\r\n```\r\n\r\n## Structure\r\n\r\n```\r\ntests/dragonfly/valkey_search/\r\n __init__.py                          # Mock framework for valkey-search imports\r\n conftest.py                          # Pytest configuration\r\n util.py                              # Utility functions (waiters)\r\n valkey_search_test_case_dragonfly.py # Dragonfly adapter (real replicas, clusters)\r\n sync-valkey-search-tests.sh          # Script to sync tests\r\n integration/                         # Synced from valkey-search (not in git)\r\n```\r\n\r\n## How It Works\r\n\r\n1. **Infrastructure files** (committed to git) provide compatibility layer\r\n2. **Test files** (in `integration/`, not in git) are synced from valkey-search\r\n3. **Mock framework** (`__init__.py`) replaces valkey-search imports with Dragonfly equivalents\r\n4. **Adapter** (`valkey_search_test_case_dragonfly.py`) creates real Dragonfly instances with replicas\r\n5. **Original tests run unchanged** - all adaptation happens in infrastructure layer\r\n6. **Python 3.8 compatibility** - sync script patches all `.py` files to add `from __future__ import annotations`\r\n"
  },
  {
    "path": "tests/dragonfly/valkey_search/__init__.py",
    "content": "\"\"\"\r\nValkey-search integration tests for Dragonfly\r\n\r\nThis module automatically adapts original valkey-search tests to run on Dragonfly\r\nby replacing valkeytestframework imports with Dragonfly equivalents.\r\n\"\"\"\r\n\r\nimport sys\r\nimport types\r\nimport os\r\nfrom . import util\r\n\r\n# Check if integration directory exists before attempting import\r\n_integration_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), \"integration\")\r\nINTEGRATION_AVAILABLE = os.path.isdir(_integration_dir)\r\n\r\nif not INTEGRATION_AVAILABLE:\r\n    pass  # pytest_ignore_collect in conftest.py will skip these tests\r\nelse:\r\n    from .integration import compatibility\r\n\r\n    # Add current directory to path for imports\r\n    current_dir = os.path.dirname(os.path.abspath(__file__))\r\n    if current_dir not in sys.path:\r\n        sys.path.insert(0, current_dir)\r\n\r\n    # Import the Dragonfly-specific test case classes\r\n    with open(os.path.join(current_dir, \"valkey_search_test_case_dragonfly.py\")) as f:\r\n        exec(f.read())\r\n\r\n    # Create a mock module for valkey_search_test_case\r\n    mock_module = types.ModuleType(\"valkey_search_test_case\")\r\n    mock_module.ValkeySearchTestCaseBase = ValkeySearchTestCaseBase\r\n    mock_module.ValkeySearchTestCaseDebugMode = ValkeySearchTestCaseDebugMode\r\n    mock_module.ValkeySearchClusterTestCase = ValkeySearchClusterTestCase\r\n    mock_module.ValkeySearchClusterTestCaseDebugMode = ValkeySearchClusterTestCaseDebugMode\r\n    mock_module.Node = Node\r\n    mock_module.ReplicationGroup = ReplicationGroup\r\n\r\n    # Replace the module in sys.modules\r\n    sys.modules[\"valkey_search_test_case\"] = mock_module\r\n\r\n    # Also need to provide valkeytestframework modules\r\n    valkey_test_framework = types.ModuleType(\"valkeytestframework\")\r\n\r\n    valkey_test_case = types.ModuleType(\"valkeytestframework.valkey_test_case\")\r\n    valkey_test_case.ValkeyTestCase = ValkeyTestCase\r\n    valkey_test_case.ReplicationTestCase = ReplicationTestCase\r\n    valkey_test_case.ValkeyServerHandle = ValkeyServerHandle\r\n\r\n    util_module = types.ModuleType(\"valkeytestframework.util\")\r\n    waiters_module = types.ModuleType(\"valkeytestframework.util.waiters\")\r\n\r\n    waiters_module.wait_for_true = util.waiters.wait_for_true\r\n    waiters_module.wait_for_equal = util.waiters.wait_for_equal\r\n    waiters_module.wait_for_not_equal = util.waiters.wait_for_not_equal\r\n    waiters_module.wait_for_condition = util.waiters.wait_for_condition\r\n    util_module.waiters = waiters_module\r\n\r\n    # Also add direct util module access\r\n    sys.modules[\"util\"] = util_module\r\n    sys.modules[\"util.waiters\"] = waiters_module\r\n\r\n    conftest_module = types.ModuleType(\"valkeytestframework.conftest\")\r\n    conftest_module.resource_port_tracker = types.ModuleType(\"resource_port_tracker\")\r\n\r\n    # Setup compatibility as a module in sys.modules\r\n    sys.modules[\"compatibility\"] = compatibility\r\n\r\n    # Also set up the submodules\r\n    if hasattr(compatibility, \"data_sets\"):\r\n        sys.modules[\"compatibility.data_sets\"] = compatibility.data_sets\r\n\r\n    # Add all modules to sys.modules\r\n    sys.modules[\"valkeytestframework\"] = valkey_test_framework\r\n    sys.modules[\"valkeytestframework.valkey_test_case\"] = valkey_test_case\r\n    sys.modules[\"valkeytestframework.util\"] = util_module\r\n    sys.modules[\"valkeytestframework.util.waiters\"] = waiters_module\r\n    sys.modules[\"valkeytestframework.conftest\"] = conftest_module\r\n"
  },
  {
    "path": "tests/dragonfly/valkey_search/conftest.py",
    "content": "\"\"\"\r\nPytest configuration for valkey-search tests on Dragonfly\r\n\"\"\"\r\n\r\nimport os\r\nimport pytest\r\nfrom .. import dfly_args\r\n\r\n# Check if integration directory exists\r\n_integration_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), \"integration\")\r\nINTEGRATION_AVAILABLE = os.path.isdir(_integration_dir)\r\n\r\n\r\ndef pytest_ignore_collect(collection_path, config):\r\n    \"\"\"Skip collection of integration tests if directory is not synced.\"\"\"\r\n    if not INTEGRATION_AVAILABLE and \"integration\" in str(collection_path):\r\n        return True\r\n    return None\r\n\r\n\r\n# List of tests to skip - add test node IDs here\r\n# Example format: \"integration/test_file.py::TestClass::test_method\"\r\nSKIP_TESTS = [\r\n    \"integration/compatibility_test.py::TestAnswersCMD::test_answers\",\r\n    \"integration/test_cancel.py::TestCancelCMD::test_timeoutCMD\",\r\n    \"integration/test_cancel.py::TestCancelCME::test_timeoutCME\",\r\n    \"integration/test_eviction.py::TestEviction::test_eviction_with_search_index\",\r\n    \"integration/test_fanout_base.py::TestFanoutBase::test_fanout_retry\",\r\n    \"integration/test_fanout_base.py::TestFanoutBase::test_fanout_shutdown\",\r\n    \"integration/test_fanout_base.py::TestFanoutBase::test_fanout_timeout\",\r\n    \"integration/test_flushall.py::TestFlushAllCME::test_flushallCME\",\r\n    \"integration/test_ft_create_consistency.py::TestFTCreateConsistency::test_create_force_index_name_error_retry\",\r\n    \"integration/test_ft_create_consistency.py::TestFTCreateConsistency::test_duplicate_creation\",\r\n    \"integration/test_ft_create_consistency.py::TestFTCreateConsistency::test_concurrent_creation\",\r\n    \"integration/test_ft_create_consistency.py::TestFTCreateConsistency::test_create_timeout\",\r\n    \"integration/test_ft_dropindex_consistency.py::TestFTDropindexConsistency::test_dropindex_synchronize_handle_message_first\",\r\n    \"integration/test_ft_dropindex_consistency.py::TestFTDropindexConsistency::test_dropindex_synchronize_consistency_check_first\",\r\n    \"integration/test_info.py::TestVSSBasic::test_info_fields_present\",\r\n    \"integration/test_info_cluster.py::TestFTInfoCluster::test_ft_info_cluster_success\",\r\n    \"integration/test_info_cluster.py::TestFTInfoCluster::test_ft_info_cluster_force_index_name_error_retry\",\r\n    \"integration/test_info_cluster.py::TestFTInfoCluster::test_ft_info_cluster_retry\",\r\n    \"integration/test_info_primary.py::TestFTInfoPrimary::test_ft_info_primary_success\",\r\n    \"integration/test_info_primary.py::TestFTInfoPrimary::test_ft_info_primary_force_index_name_error_retry\",\r\n    \"integration/test_info_primary.py::TestFTInfoPrimary::test_ft_info_primary_retry\",\r\n    \"integration/test_oom_handling.py::TestSearchOOMHandlingCME::test_search_oom_cme\",\r\n    \"integration/test_oom_handling.py::TestSearchOOMHandlingCMD::test_search_oom_cmd\",\r\n    \"integration/test_query_parser.py::TestQueryParser::test_query_string_depth_limit\",\r\n    \"integration/test_query_parser.py::TestQueryParser::test_query_string_terms_count_limit\",\r\n    \"integration/test_reclaimable_memory.py::TestReclaimableMemory::test_reclaimable_memory_with_vector_operations\",\r\n    \"integration/test_reclaimable_memory.py::TestReclaimableMemory::test_reclaimable_memory_multiple_indexes\",\r\n    \"integration/test_skip_index_load.py::TestRDBCorruptedIndex::test_corrupted_rdb_skip_index_load_succeeds\",\r\n    \"integration/test_valkey_search_acl.py::TestCommandsACLs::test_acl_specific_search_commands_permissions\",\r\n    \"integration/test_valkey_search_acl.py::TestCommandsACLs::test_index_with_several_prefixes_permissions\",\r\n    \"integration/test_valkey_search_acl.py::TestCommandsACLs::test_valkey_search_cmds_categories\",\r\n]\r\n\r\n\r\n# Apply dfly_args to all test classes in this directory\r\ndef pytest_collection_modifyitems(items):\r\n    \"\"\"Apply dfly_args decorator to all test classes and skip marked tests\"\"\"\r\n    for item in items:\r\n        if item.cls and not hasattr(item.cls, \"_dfly_args_applied\"):\r\n            # Apply the decorator to the class\r\n            decorated_class = dfly_args({\"proactor_threads\": 4})(item.cls)\r\n            item.cls._dfly_args_applied = True\r\n\r\n        # Skip tests that are in the skip list\r\n        # Get the relative path from valkey_search directory\r\n        item_path = str(item.nodeid)\r\n        for skip_pattern in SKIP_TESTS:\r\n            if skip_pattern in item_path:\r\n                item.add_marker(pytest.mark.skip(reason=f\"Test skipped: {skip_pattern}\"))\r\n"
  },
  {
    "path": "tests/dragonfly/valkey_search/sync-valkey-search-tests.sh",
    "content": "#!/bin/bash\nset -e\n\nSCRIPT_DIR=\"$(cd \"$(dirname \"${BASH_SOURCE[0]}\")\" && pwd)\"\nINTEGRATION_DIR=\"$SCRIPT_DIR/integration\"\nVALKEY_SEARCH_REPO=\"https://github.com/valkey-io/valkey-search.git\"\nTEMP_DIR=$(mktemp -d)\n\n# Accept optional tag/revision parameter\nTAG_OR_REV=\"${1:-}\"\n\nif [ -n \"$TAG_OR_REV\" ]; then\n  echo \"Syncing valkey-search tests from tag/revision: $TAG_OR_REV\"\nelse\n  echo \"Syncing valkey-search tests from latest commit...\"\nfi\n\n# Remove old integration directory\nrm -rf \"$INTEGRATION_DIR\"\n\n# Clone to temp directory\nif [ -n \"$TAG_OR_REV\" ]; then\n  # Clone with full history and checkout specific tag/revision\n  git clone \"$VALKEY_SEARCH_REPO\" \"$TEMP_DIR\" >/dev/null 2>&1\n  pushd \"$TEMP_DIR\" >/dev/null\n  git checkout \"$TAG_OR_REV\" >/dev/null 2>&1\n  popd >/dev/null\nelse\n  # Clone only the latest commit (shallow clone)\n  git clone --depth=1 \"$VALKEY_SEARCH_REPO\" \"$TEMP_DIR\" >/dev/null 2>&1\nfi\n\n# Copy integration directory\ncp -r \"$TEMP_DIR/integration\" \"$INTEGRATION_DIR\"\n\n# Patch all Python files for Python 3.8 compatibility\n# Add 'from __future__ import annotations' to support modern type hints\necho \"Patching Python files for Python 3.8 compatibility...\"\nfind \"$INTEGRATION_DIR\" -name \"*.py\" -type f | while read -r file; do\n  # Check if the file doesn't already have 'from __future__ import annotations'\n  if ! grep -q \"from __future__ import annotations\" \"$file\"; then\n    sed -i '1i from __future__ import annotations' \"$file\"\n  fi\ndone\n\n# Cleanup\nrm -rf \"$TEMP_DIR\"\n\necho \"Done. Synced $(find \"$INTEGRATION_DIR\" -name '*test*.py' | wc -l) test files.\"\n"
  },
  {
    "path": "tests/dragonfly/valkey_search/util.py",
    "content": "\"\"\"\r\nUtility module for valkey-search tests running on Dragonfly\r\nProvides waiters functionality compatible with valkeytestframework.util.waiters\r\n\"\"\"\r\n\r\nimport time\r\n\r\n\r\nclass waiters:\r\n    \"\"\"Waiters utility class for test synchronization\"\"\"\r\n\r\n    @staticmethod\r\n    def wait_for_true(func, timeout=30, interval=0.1):\r\n        \"\"\"\r\n        Wait for a function to return True\r\n\r\n        Args:\r\n            func: Function to call repeatedly until it returns True\r\n            timeout: Maximum time to wait in seconds (default: 30)\r\n            interval: Time between checks in seconds (default: 0.1)\r\n\r\n        Returns:\r\n            True if function returned True within timeout, False otherwise\r\n        \"\"\"\r\n        start_time = time.time()\r\n        while time.time() - start_time < timeout:\r\n            try:\r\n                if func():\r\n                    return True\r\n            except Exception:\r\n                # Ignore exceptions during polling\r\n                pass\r\n            time.sleep(interval)\r\n        return False\r\n\r\n    @staticmethod\r\n    def wait_for_equal(func, value, timeout=30, interval=0.1):\r\n        \"\"\"\r\n        Wait for a function to return a specific value\r\n\r\n        Args:\r\n            func: Function to call repeatedly\r\n            value: Expected return value\r\n            timeout: Maximum time to wait in seconds (default: 30)\r\n            interval: Time between checks in seconds (default: 0.1)\r\n\r\n        Returns:\r\n            True if function returned expected value within timeout, False otherwise\r\n        \"\"\"\r\n        start_time = time.time()\r\n        while time.time() - start_time < timeout:\r\n            try:\r\n                if func() == value:\r\n                    return True\r\n            except Exception:\r\n                # Ignore exceptions during polling\r\n                pass\r\n            time.sleep(interval)\r\n        return False\r\n\r\n    @staticmethod\r\n    def wait_for_not_equal(func, value, timeout=30, interval=0.1):\r\n        \"\"\"\r\n        Wait for a function to return a value different from the specified one\r\n\r\n        Args:\r\n            func: Function to call repeatedly\r\n            value: Value that should NOT be returned\r\n            timeout: Maximum time to wait in seconds (default: 30)\r\n            interval: Time between checks in seconds (default: 0.1)\r\n\r\n        Returns:\r\n            True if function returned different value within timeout, False otherwise\r\n        \"\"\"\r\n        start_time = time.time()\r\n        while time.time() - start_time < timeout:\r\n            try:\r\n                if func() != value:\r\n                    return True\r\n            except Exception:\r\n                # Ignore exceptions during polling\r\n                pass\r\n            time.sleep(interval)\r\n        return False\r\n\r\n    @staticmethod\r\n    def wait_for_condition(condition_func, timeout=30, interval=0.1):\r\n        \"\"\"\r\n        Wait for a condition function to return True\r\n        Alias for wait_for_true for compatibility\r\n        \"\"\"\r\n        return waiters.wait_for_true(condition_func, timeout, interval)\r\n\r\n\r\n# For backward compatibility with direct import style\r\nwait_for_true = waiters.wait_for_true\r\nwait_for_equal = waiters.wait_for_equal\r\nwait_for_not_equal = waiters.wait_for_not_equal\r\nwait_for_condition = waiters.wait_for_condition\n"
  },
  {
    "path": "tests/dragonfly/valkey_search/valkey_search_test_case_dragonfly.py",
    "content": "\"\"\"\r\nDragonfly adapter for valkey_search_test_case.py\r\nCreates real Dragonfly instances with replicas and clusters\r\n\"\"\"\r\n\r\nimport os\r\nimport time\r\nimport pytest\r\nimport valkey\r\nfrom valkey import ResponseError\r\nfrom valkey.client import Valkey\r\nfrom typing import List\r\nimport random\r\nimport string\r\nimport logging\r\n\r\n# Import Dragonfly test infrastructure\r\nfrom ..instance import DflyInstance, DflyInstanceFactory\r\n\r\nLOGS_DIR = \"/tmp/dragonfly-valkey-test-logs\"\r\n\r\nif \"LOGS_DIR\" in os.environ:\r\n    LOGS_DIR = os.environ[\"LOGS_DIR\"]\r\n\r\n\r\nclass Node:\r\n    \"\"\"This class represents a Dragonfly instance as a valkey server node\"\"\"\r\n\r\n    def __init__(\r\n        self,\r\n        client=None,\r\n        server=None,\r\n        logfile=None,\r\n        df_instance=None,\r\n    ):\r\n        self.client: Valkey = client\r\n        self.server = server\r\n        self.logfile: str = logfile\r\n        self.df_instance: DflyInstance = df_instance\r\n\r\n    def does_logfile_contains(self, pattern: str) -> bool:\r\n        # For Dragonfly, simplified log checking\r\n        return True\r\n\r\n\r\nclass ValkeyServerHandle:\r\n    \"\"\"Adapter for Dragonfly instance to look like ValkeyServerHandle\"\"\"\r\n\r\n    def __init__(self, df_instance: DflyInstance):\r\n        self.df_instance = df_instance\r\n        self.bind_ip = \"127.0.0.1\"\r\n        self.port = df_instance.port if df_instance else 6379\r\n\r\n    def pid(self):\r\n        return self.df_instance.proc.pid if self.df_instance and self.df_instance.proc else None\r\n\r\n    def get_new_client(self):\r\n        return valkey.Valkey(host=self.bind_ip, port=self.port, decode_responses=False)\r\n\r\n\r\nclass ReplicationGroup:\r\n    \"\"\"Replication group for Dragonfly\"\"\"\r\n\r\n    def __init__(\r\n        self,\r\n        primary,\r\n        replicas=None,\r\n    ):\r\n        self.primary: Node = primary\r\n        self.replicas: List[Node] = replicas or []\r\n        self._setup_done = False\r\n\r\n    def setup_replications_cluster(self):\r\n        # For cluster mode - not needed for single master/replica\r\n        pass\r\n\r\n    def setup_replications_cmd(self):\r\n        \"\"\"Setup replication using REPLICAOF command\"\"\"\r\n        if self._setup_done or not self.replicas:\r\n            return\r\n\r\n        primary_ip = \"localhost\"\r\n        primary_port = self.primary.df_instance.port\r\n\r\n        # Configure each replica\r\n        for replica in self.replicas:\r\n            try:\r\n                # Use REPLICAOF to setup replication\r\n                result = replica.client.execute_command(f\"REPLICAOF {primary_ip} {primary_port}\")\r\n                logging.debug(f\"Setup replica on port {replica.df_instance.port}: {result}\")\r\n            except Exception as e:\r\n                logging.error(f\"Failed to setup replica: {e}\")\r\n\r\n        self._setup_done = True\r\n        self._wait_for_replication()\r\n\r\n    def _wait_for_replication(self):\r\n        \"\"\"Wait for replicas to sync\"\"\"\r\n        # Give replicas time to connect\r\n        time.sleep(0.5)\r\n\r\n        # Check if replicas are connected\r\n        try:\r\n            info = self.primary.client.info(\"replication\")\r\n            connected_slaves = info.get(\"connected_slaves\", 0)\r\n            logging.debug(f\"Connected slaves: {connected_slaves}, expected: {len(self.replicas)}\")\r\n        except Exception as e:\r\n            logging.debug(f\"Could not check replication status: {e}\")\r\n\r\n    def _check_all_replicas_are_connected(self):\r\n        try:\r\n            return self.primary.client.info(\"replication\")[\"connected_slaves\"] == len(self.replicas)\r\n        except:\r\n            return False\r\n\r\n    def _check_is_replica_online(self, name) -> bool:\r\n        try:\r\n            replica_status = self.primary.client.info(\"replication\")[name]\r\n            return replica_status[\"state\"] == \"online\"\r\n        except:\r\n            return False  # Assume offline if we can't check\r\n\r\n    def get_replica_connection(self, index) -> Valkey:\r\n        if index < len(self.replicas):\r\n            return self.replicas[index].client\r\n        raise IndexError(f\"No replica at index {index}\")\r\n\r\n    def get_primary_connection(self) -> Valkey:\r\n        return self.primary.client\r\n\r\n    @staticmethod\r\n    def cleanup(rg):\r\n        \"\"\"Cleanup Dragonfly instances\"\"\"\r\n        # Cleanup is handled by Dragonfly fixtures\r\n        pass\r\n\r\n\r\nclass ValkeySearchTestCaseCommon:\r\n    \"\"\"Common base class for tests\"\"\"\r\n\r\n    pass\r\n\r\n\r\nclass ValkeyTestCase(ValkeySearchTestCaseCommon):\r\n    \"\"\"Base test case class\"\"\"\r\n\r\n    pass\r\n\r\n\r\nclass ReplicationTestCase(ValkeyTestCase):\r\n    \"\"\"Replication test case\"\"\"\r\n\r\n    pass\r\n\r\n\r\nclass ValkeySearchTestCaseBase(ValkeySearchTestCaseCommon):\r\n    \"\"\"Base test case for valkey-search tests running on Dragonfly\"\"\"\r\n\r\n    @pytest.fixture(autouse=True)\r\n    def setup_test(self, request, df_factory: DflyInstanceFactory):\r\n        \"\"\"Setup test with Dragonfly instances\"\"\"\r\n        # Get replica count from parametrize if provided\r\n        replica_count = 0\r\n        if hasattr(request, \"param\") and \"replica_count\" in request.param:\r\n            replica_count = request.param[\"replica_count\"]\r\n\r\n        # Create primary instance\r\n        primary_df = df_factory.create(proactor_threads=4)\r\n        primary_df.start()\r\n\r\n        primary_client = valkey.Valkey(\r\n            host=\"127.0.0.1\", port=primary_df.port, decode_responses=False\r\n        )\r\n\r\n        primary_server = ValkeyServerHandle(primary_df)\r\n        primary_node = Node(\r\n            client=primary_client, server=primary_server, logfile=None, df_instance=primary_df\r\n        )\r\n\r\n        # Create replica instances\r\n        replicas: List[Node] = []\r\n        for i in range(replica_count):\r\n            replica_df = df_factory.create(proactor_threads=4)\r\n            replica_df.start()\r\n\r\n            replica_client = valkey.Valkey(\r\n                host=\"127.0.0.1\", port=replica_df.port, decode_responses=False\r\n            )\r\n\r\n            replica_server = ValkeyServerHandle(replica_df)\r\n            replica_node = Node(\r\n                client=replica_client, server=replica_server, logfile=None, df_instance=replica_df\r\n            )\r\n            replicas.append(replica_node)\r\n\r\n        # Setup replication group\r\n        self.rg = ReplicationGroup(primary=primary_node, replicas=replicas)\r\n\r\n        # Configure replication\r\n        if replica_count > 0:\r\n            self.rg.setup_replications_cmd()\r\n\r\n        self.server = self.rg.primary.server\r\n        self.client = self.rg.primary.client\r\n        self.nodes: List[Node] = [self.rg.primary] + self.rg.replicas\r\n\r\n        yield\r\n\r\n        # Cleanup is handled by df_factory\r\n\r\n    def verify_error_response(self, client, cmd, expected_err_reply):\r\n        try:\r\n            if isinstance(cmd, str):\r\n                cmd_args = cmd.split()\r\n            else:\r\n                cmd_args = cmd\r\n            client.execute_command(*cmd_args)\r\n            assert False, f\"Expected error '{expected_err_reply}' but command succeeded\"\r\n        except ResponseError as e:\r\n            error_str = str(e)\r\n            assert (\r\n                expected_err_reply in error_str\r\n            ), f\"Actual error message: '{error_str}' doesn't contain expected: '{expected_err_reply}'\"\r\n            return error_str\r\n\r\n    def verify_server_key_count(self, client, expected_num_keys):\r\n        actual_num_keys = client.dbsize()\r\n        assert (\r\n            actual_num_keys == expected_num_keys\r\n        ), f\"Actual key number {actual_num_keys} is different from expected key number {expected_num_keys}\"\r\n\r\n    def generate_random_string(self, length=7):\r\n        \"\"\"Creates a random string with specified length.\"\"\"\r\n        characters = string.ascii_letters + string.digits\r\n        random_string = \"\".join(random.choice(characters) for _ in range(length))\r\n        return random_string\r\n\r\n    def parse_valkey_info(self, section):\r\n        mem_info = self.client.execute_command(\"INFO \" + section)\r\n        if isinstance(mem_info, bytes):\r\n            mem_info = mem_info.decode(\"utf-8\")\r\n        lines = mem_info.split(\"\\\\r\\\\n\")\r\n        stats_dict = {}\r\n        for line in lines:\r\n            if \":\" in line:\r\n                key, value = line.split(\":\", 1)\r\n                stats_dict[key.strip()] = value.strip()\r\n        return stats_dict\r\n\r\n    def start_new_server(self, is_primary=True) -> Node:\r\n        \"\"\"Return existing or create new server\"\"\"\r\n        if is_primary:\r\n            return self.rg.primary\r\n        elif self.rg.replicas:\r\n            return self.rg.replicas[0]\r\n        else:\r\n            # No replicas configured\r\n            return self.rg.primary\r\n\r\n    def get_replica_connection(self, index) -> Valkey:\r\n        return self.rg.get_replica_connection(index)\r\n\r\n    def get_primary_connection(self) -> Valkey:\r\n        return self.rg.get_primary_connection()\r\n\r\n\r\nclass ValkeySearchTestCaseDebugMode(ValkeySearchTestCaseBase):\r\n    \"\"\"Debug mode variant\"\"\"\r\n\r\n    pass\r\n\r\n\r\nclass ValkeySearchClusterTestCase(ValkeySearchTestCaseCommon):\r\n    \"\"\"Cluster test case - simplified for single Dragonfly instance\"\"\"\r\n\r\n    CLUSTER_SIZE = 1  # Simplified to single node\r\n    REPLICAS_COUNT = 0\r\n\r\n    @pytest.fixture(autouse=True)\r\n    def setup_test(self, request, df_factory: DflyInstanceFactory):\r\n        \"\"\"Setup cluster test with Dragonfly instances\"\"\"\r\n\r\n        # Get replica count from parametrize if provided\r\n        replica_count = 0\r\n        if hasattr(request, \"param\") and \"replica_count\" in request.param:\r\n            replica_count = request.param[\"replica_count\"]\r\n\r\n        # Create primary instance\r\n        primary_df = df_factory.create(proactor_threads=4)\r\n        primary_df.start()\r\n\r\n        primary_client = valkey.Valkey(\r\n            host=\"127.0.0.1\", port=primary_df.port, decode_responses=False\r\n        )\r\n\r\n        primary_server = ValkeyServerHandle(primary_df)\r\n        primary_node = Node(\r\n            client=primary_client, server=primary_server, logfile=None, df_instance=primary_df\r\n        )\r\n\r\n        # Create replica instances\r\n        replicas: List[Node] = []\r\n        for i in range(replica_count):\r\n            replica_df = df_factory.create(proactor_threads=4)\r\n            replica_df.start()\r\n\r\n            replica_client = valkey.Valkey(\r\n                host=\"127.0.0.1\", port=replica_df.port, decode_responses=False\r\n            )\r\n\r\n            replica_server = ValkeyServerHandle(replica_df)\r\n            replica_node = Node(\r\n                client=replica_client, server=replica_server, logfile=None, df_instance=replica_df\r\n            )\r\n            replicas.append(replica_node)\r\n\r\n        rg = ReplicationGroup(primary=primary_node, replicas=replicas)\r\n\r\n        # Configure replication\r\n        if replica_count > 0:\r\n            rg.setup_replications_cmd()\r\n\r\n        self.replication_groups = [rg]\r\n        self.nodes: List[Node] = [rg.primary] + rg.replicas\r\n\r\n        yield\r\n\r\n        # Cleanup handled by df_factory\r\n\r\n    def get_primary(self, index):\r\n        return self.replication_groups[index].primary.server\r\n\r\n    def get_primary_port(self, index):\r\n        return self.replication_groups[index].primary.server.port\r\n\r\n    def new_client_for_primary(self, index):\r\n        return self.replication_groups[index].primary.server.get_new_client()\r\n\r\n    def client_for_primary(self, index):\r\n        return self.replication_groups[index].primary.client\r\n\r\n    def get_all_primary_clients(self) -> List[Valkey]:\r\n        return [rg.primary.client for rg in self.replication_groups]\r\n\r\n    def get_replication_group(self, index):\r\n        return self.replication_groups[index]\r\n\r\n    def new_cluster_client(self):\r\n        \"\"\"Return regular client for single-node\"\"\"\r\n        return self.replication_groups[0].primary.client\r\n\r\n\r\nclass ValkeySearchClusterTestCaseDebugMode(ValkeySearchClusterTestCase):\r\n    \"\"\"Debug mode cluster variant\"\"\"\r\n\r\n    pass\r\n"
  },
  {
    "path": "tests/fakeredis/README.md",
    "content": "Running FakeRedis tests on Dragonfly\n====================================\n\nFakeRedis is a Python library that provides a full implementation of the Redis protocol. It is useful for testing Redis\nclients and for running Redis commands in Python code without having a running Redis server.\n\nThe tests in this directory are running against FakeRedis and against a dragonfly instance.\nThe results are then compared to ensure that the two implementations are consistent.\n\n## Prerequisites\n\n- Python 3.10 or above is required to run the tests.\n- Poetry is required to install the dependencies.\n- A dragonfly instance running on port 6380.\n\n## Setup environment\n\n1. Install Poetry by following the instructions at https://python-poetry.org/docs/#installation.\n2. From the root directory of the tests (`dragonfly/tests/fakeredis`) run `poetry env use python3.10` (or higher) to\n   create a virtual environment for Python 3.10.\n3. Run `poetry install` to install the dependencies.\n4. Run `poetry run pytest -v` to run all the tests.\n5. Or alternatively, run `poetry run pytest -v test/{test-name}` to run a specific set of tests.\n\n## Tests\n\n- `test_connection.py`: Tests for the connection parameters to the Dragonfly server.\n- `test_zadd.py`: Considering the various options for the ZADD command, it has its own set of tests.\n- `test_json/*.py`: Tests for the JSON commands.\n- `test_stack/*.py`: Tests for the stack commands, bloom filter, cuckoo filter, CMS, TDigest, time-series, top-k.\n- `test_mixins/*.py`: Tests for various generic commands: bitmap, geospacial, hash, list, pubsub, scripting, streams,\n  string, etc.\n- `test_hypothesis.py`: Hypothesis tests for the mixins commands. These tests are using [hypothesis][1] and generate\n  random tests with edge cases. Note these tests take significantly more time to run.\n\n## General info\n\n- `@pytest.mark.unsupported_server_types(\"dragonfly\")` decorator indicates to pytest that the test should not run on\n  dragonfly.\n  - Some tests are skipped the commands are CURRENTLY not supported (e.g., `GEORARIUS`).\n  - Others are skipped because they cause an expected behavior, and usually marked with TODO comment as well.\n\n[1]: https://hypothesis.readthedocs.io/en/latest/\n"
  },
  {
    "path": "tests/fakeredis/pyproject.toml",
    "content": "[build-system]\nrequires = [\"poetry_core\"]\nbuild-backend = \"poetry.core.masonry.api\"\n\n[tool.poetry]\nname = \"dragonfly-fakeredis-tests\"\npackages = [\n    { include = \"test\" },\n]\nversion = \"0.1.0\"\ndescription = \"Tests running against dragonfly and fakeredis to check compatibility\"\nauthors = [\n    \"Daniel Moran <daniel@moransoftware.ca>\",\n]\nmaintainers = [\n    \"Daniel Moran <daniel@moransoftware.ca>\",\n]\n\n[tool.poetry.dependencies]\npython = \"^3.10\"\nredis = \">=5\"\nfakeredis = { version = \"^2.26.1\", extras = [\"json\", \"bf\", \"cf\", \"lua\"] }\nhypothesis = \"^6.111\"\npytest = \"^8.3\"\npytest-timeout = \"^2.3.1\"\npytest-asyncio = \"^0.24\"\npytest-cov = \"^5.0\"\npytest-mock = \"^3.14\"\npytest-html = \"^4.1\"\n\n[tool.pytest.ini_options]\nasyncio_default_fixture_loop_scope = \"function\"\nmarkers = [\n    \"slow: marks tests as slow (deselect with '-m \\\"not slow\\\"')\",\n    \"fake: run tests only with fake redis\",\n    \"real: run tests with a locally running real Redis server\",\n    \"disconnected\",\n    \"min_server\",\n    \"max_server\",\n    \"decode_responses\",\n    \"unsupported_server_types\",\n]\nasyncio_mode = \"strict\"\ngenerate_report_on_test = true\nrender_collapsed = \"failed,error\"\naddopts = [\n    \"--self-contained-html\",\n    \"--import-mode=importlib\",\n]\n"
  },
  {
    "path": "tests/fakeredis/test/__init__.py",
    "content": ""
  },
  {
    "path": "tests/fakeredis/test/conftest.py",
    "content": "from typing import Callable, Tuple, Union, Optional\n\nimport fakeredis\nimport pytest\nimport pytest_asyncio\nimport redis\nfrom fakeredis._server import _create_version\n\n\ndef _check_lua_module_supported() -> bool:\n    redis = fakeredis.FakeRedis(lua_modules={\"cjson\"})\n    try:\n        redis.eval(\"return cjson.encode({})\", 0)\n        return True\n    except Exception:\n        return False\n\n\n@pytest_asyncio.fixture(scope=\"session\")\ndef real_redis_version() -> Tuple[str, Union[None, Tuple[int, ...]]]:\n    \"\"\"Returns server's version or None if server is not running\"\"\"\n    client = None\n    try:\n        client = redis.StrictRedis(\"localhost\", port=6380, db=2)\n        client_info = client.info()\n        server_type = \"dragonfly\" if \"dragonfly_version\" in client_info else \"redis\"\n        server_version = (\n            client_info[\"redis_version\"] if server_type != \"dragonfly\" else (7, 0)\n        )\n        server_version = _create_version(server_version) or (7,)\n        return server_type, server_version\n    except redis.ConnectionError:\n        pytest.exit(\"Redis is not running\")\n        return \"redis\", (6,)\n    finally:\n        if hasattr(client, \"close\"):\n            client.close()  # Absent in older versions of redis-py\n\n\n@pytest_asyncio.fixture(name=\"fake_server\")\ndef _fake_server(request) -> fakeredis.FakeServer:\n    min_server_marker = request.node.get_closest_marker(\"min_server\")\n    server_version = min_server_marker.args[0] if min_server_marker else \"7\"\n    server = fakeredis.FakeServer(version=server_version)\n    server.connected = request.node.get_closest_marker(\"disconnected\") is None\n    return server\n\n\n@pytest_asyncio.fixture\ndef r(request, create_redis) -> redis.Redis:\n    rconn = create_redis(db=2)\n    connected = request.node.get_closest_marker(\"disconnected\") is None\n    if connected:\n        rconn.flushall()\n    yield rconn\n    if connected:\n        rconn.flushall()\n    if hasattr(r, \"close\"):\n        rconn.close()  # Older versions of redis-py don't have this method\n\n\ndef _marker_version_value(request, marker_name: str):\n    marker_value = request.node.get_closest_marker(marker_name)\n    if marker_value is None:\n        return (0,) if marker_name == \"min_server\" else (100,)\n    return _create_version(marker_value.args[0])\n\n\n@pytest_asyncio.fixture(\n    name=\"create_redis\",\n    params=[\n        pytest.param(\"StrictRedis\", marks=pytest.mark.real),\n        pytest.param(\"FakeStrictRedis\", marks=pytest.mark.fake),\n    ],\n)\ndef _create_redis(request) -> Callable[[int], redis.Redis]:\n    cls_name = request.param\n    server_type, server_version = request.getfixturevalue(\"real_redis_version\")\n    if not cls_name.startswith(\"Fake\") and not server_version:\n        pytest.skip(\"Redis is not running\")\n    unsupported_server_types = request.node.get_closest_marker(\n        \"unsupported_server_types\"\n    )\n    if unsupported_server_types and server_type in unsupported_server_types.args:\n        pytest.skip(f\"Server type {server_type} is not supported\")\n    min_server = _marker_version_value(request, \"min_server\")\n    max_server = _marker_version_value(request, \"max_server\")\n    if server_version < min_server:\n        pytest.skip(\n            f\"Redis server {min_server} or more required but {server_version} found\"\n        )\n    if server_version > max_server:\n        pytest.skip(\n            f\"Redis server {max_server} or less required but {server_version} found\"\n        )\n    decode_responses = request.node.get_closest_marker(\"decode_responses\") is not None\n    lua_modules_marker = request.node.get_closest_marker(\"load_lua_modules\")\n    lua_modules = set(lua_modules_marker.args) if lua_modules_marker else None\n    if lua_modules and not _check_lua_module_supported():\n        pytest.skip(\"LUA modules not supported by fakeredis\")\n\n    def factory(db=2):\n        if cls_name.startswith(\"Fake\"):\n            fake_server = request.getfixturevalue(\"fake_server\")\n            cls = getattr(fakeredis, cls_name)\n            return cls(\n                db=db,\n                decode_responses=decode_responses,\n                server=fake_server,\n                lua_modules=lua_modules,\n            )\n        # Real\n        cls = getattr(redis, cls_name)\n        return cls(\"localhost\", port=6380, db=db, decode_responses=decode_responses)\n\n    return factory\n\n\n@pytest_asyncio.fixture(\n    name=\"async_redis\",\n    params=[\n        pytest.param(\"fake\", marks=pytest.mark.fake),\n        pytest.param(\"real\", marks=pytest.mark.real),\n    ],\n)\nasync def _req_aioredis2(request) -> redis.asyncio.Redis:\n    server_type, server_version = request.getfixturevalue(\"real_redis_version\")\n    if request.param != \"fake\" and not server_version:\n        pytest.skip(\"Redis is not running\")\n    unsupported_server_types = request.node.get_closest_marker(\n        \"unsupported_server_types\"\n    )\n    if unsupported_server_types and server_type in unsupported_server_types.args:\n        pytest.skip(f\"Server type {server_type} is not supported\")\n    min_server_marker = _marker_version_value(request, \"min_server\")\n    max_server_marker = _marker_version_value(request, \"max_server\")\n    if server_version < min_server_marker:\n        pytest.skip(\n            f\"Redis server {min_server_marker} or more required but {server_version} found\"\n        )\n    if server_version > max_server_marker:\n        pytest.skip(\n            f\"Redis server {max_server_marker} or less required but {server_version} found\"\n        )\n    lua_modules_marker = request.node.get_closest_marker(\"load_lua_modules\")\n    lua_modules = set(lua_modules_marker.args) if lua_modules_marker else None\n    if lua_modules and not _check_lua_module_supported():\n        pytest.skip(\"LUA modules not supported by fakeredis\")\n    fake_server: Optional[fakeredis.FakeServer]\n    if request.param == \"fake\":\n        fake_server = request.getfixturevalue(\"fake_server\")\n        ret = fakeredis.FakeAsyncRedis(server=fake_server, lua_modules=lua_modules)\n    else:\n        ret = redis.asyncio.Redis(host=\"localhost\", port=6380, db=2)\n        fake_server = None\n    if not fake_server or fake_server.connected:\n        await ret.flushall()\n\n    yield ret\n\n    if not fake_server or fake_server.connected:\n        await ret.flushall()\n    await ret.connection_pool.disconnect()\n"
  },
  {
    "path": "tests/fakeredis/test/test_asyncredis.py",
    "content": "import asyncio\nimport sys\n\n\nif sys.version_info >= (3, 11):\n    from asyncio import timeout as async_timeout\nelse:\n    from async_timeout import timeout as async_timeout\nimport pytest\nimport pytest_asyncio\nimport redis\nimport redis.asyncio\n\nfrom fakeredis import FakeServer, aioredis\nfrom test import testtools\n\npytestmark = []\npytestmark.extend(\n    [\n        pytest.mark.asyncio,\n    ]\n)\n\n\n@pytest_asyncio.fixture\nasync def conn(async_redis: redis.asyncio.Redis):\n    \"\"\"A single connection, rather than a pool.\"\"\"\n    async with async_redis.client() as conn:\n        yield conn\n\n\nasync def test_ping(async_redis: redis.asyncio.Redis):\n    pong = await async_redis.ping()\n    assert pong is True\n\n\nasync def test_types(async_redis: redis.asyncio.Redis):\n    await async_redis.hset(\n        \"hash\", mapping={\"key1\": \"value1\", \"key2\": \"value2\", \"key3\": 123}\n    )\n    result = await async_redis.hgetall(\"hash\")\n    assert result == {b\"key1\": b\"value1\", b\"key2\": b\"value2\", b\"key3\": b\"123\"}\n\n\nasync def test_transaction(async_redis: redis.asyncio.Redis):\n    async with async_redis.pipeline(transaction=True) as tr:\n        tr.set(\"key1\", \"value1\")\n        tr.set(\"key2\", \"value2\")\n        ok1, ok2 = await tr.execute()\n    assert ok1\n    assert ok2\n    result = await async_redis.get(\"key1\")\n    assert result == b\"value1\"\n\n\nasync def test_transaction_fail(async_redis: redis.asyncio.Redis):\n    await async_redis.set(\"foo\", \"1\")\n    async with async_redis.pipeline(transaction=True) as tr:\n        await tr.watch(\"foo\")\n        await async_redis.set(\"foo\", \"2\")  # Different connection\n        tr.multi()\n        tr.get(\"foo\")\n        with pytest.raises(redis.asyncio.WatchError):\n            await tr.execute()\n\n\nasync def test_pubsub(async_redis, event_loop):\n    queue = asyncio.Queue()\n\n    async def reader(ps):\n        while True:\n            message = await ps.get_message(ignore_subscribe_messages=True, timeout=5)\n            if message is not None:\n                if message.get(\"data\") == b\"stop\":\n                    break\n                queue.put_nowait(message)\n\n    async with async_timeout(5), async_redis.pubsub() as ps:\n        await ps.subscribe(\"channel\")\n        task = event_loop.create_task(reader(ps))\n        await async_redis.publish(\"channel\", \"message1\")\n        await async_redis.publish(\"channel\", \"message2\")\n        result1 = await queue.get()\n        result2 = await queue.get()\n        assert result1 == {\n            \"channel\": b\"channel\",\n            \"pattern\": None,\n            \"type\": \"message\",\n            \"data\": b\"message1\",\n        }\n        assert result2 == {\n            \"channel\": b\"channel\",\n            \"pattern\": None,\n            \"type\": \"message\",\n            \"data\": b\"message2\",\n        }\n        await async_redis.publish(\"channel\", \"stop\")\n        await task\n\n\n@pytest.mark.slow\nasync def test_pubsub_timeout(async_redis: redis.asyncio.Redis):\n    async with async_redis.pubsub() as ps:\n        await ps.subscribe(\"channel\")\n        await ps.get_message(timeout=0.5)  # Subscription message\n        message = await ps.get_message(timeout=0.5)\n        assert message is None\n\n\n@pytest.mark.slow\nasync def test_pubsub_disconnect(async_redis: redis.asyncio.Redis):\n    async with async_redis.pubsub() as ps:\n        await ps.subscribe(\"channel\")\n        await ps.connection.disconnect()\n        message = await ps.get_message(timeout=0.5)  # Subscription message\n        assert message is not None\n        message = await ps.get_message(timeout=0.5)\n        assert message is None\n\n\nasync def test_blocking_ready(async_redis, conn):\n    \"\"\"Blocking command which does not need to block.\"\"\"\n    await async_redis.rpush(\"list\", \"x\")\n    result = await conn.blpop(\"list\", timeout=1)\n    assert result == (b\"list\", b\"x\")\n\n\n@pytest.mark.slow\nasync def test_blocking_timeout(conn):\n    \"\"\"Blocking command that times out without completing.\"\"\"\n    result = await conn.blpop(\"missing\", timeout=1)\n    assert result is None\n\n\n@pytest.mark.slow\nasync def test_blocking_unblock(async_redis, conn, event_loop):\n    \"\"\"Blocking command that gets unblocked after some time.\"\"\"\n\n    async def unblock():\n        await asyncio.sleep(0.1)\n        await async_redis.rpush(\"list\", \"y\")\n\n    task = event_loop.create_task(unblock())\n    result = await conn.blpop(\"list\", timeout=1)\n    assert result == (b\"list\", b\"y\")\n    await task\n\n\nasync def test_wrongtype_error(async_redis: redis.asyncio.Redis):\n    await async_redis.set(\"foo\", \"bar\")\n    with pytest.raises(redis.asyncio.ResponseError, match=\"^WRONGTYPE\"):\n        await async_redis.rpush(\"foo\", \"baz\")\n\n\nasync def test_syntax_error(async_redis: redis.asyncio.Redis):\n    with pytest.raises(\n        redis.asyncio.ResponseError,\n        match=\"^wrong number of arguments for 'get' command$\",\n    ):\n        await async_redis.execute_command(\"get\")\n\n\n@testtools.run_test_if_lupa\nclass TestScripts:\n    async def test_no_script_error(self, async_redis: redis.asyncio.Redis):\n        with pytest.raises(redis.exceptions.NoScriptError):\n            await async_redis.evalsha(\"0123456789abcdef0123456789abcdef\", 0)\n\n    @pytest.mark.min_server(\"7\")\n    async def test_failed_script_error7(self, async_redis):\n        await async_redis.set(\"foo\", \"bar\")\n        with pytest.raises(redis.asyncio.ResponseError):\n            await async_redis.eval('return redis.call(\"ZCOUNT\", KEYS[1])', 1, \"foo\")\n\n\nasync def test_type(async_redis: redis.asyncio.Redis):\n    await async_redis.set(\"string_key\", \"value\")\n    await async_redis.lpush(\"list_key\", \"value\")\n    await async_redis.sadd(\"set_key\", \"value\")\n    await async_redis.zadd(\"zset_key\", {\"value\": 1})\n    await async_redis.hset(\"hset_key\", \"key\", \"value\")\n\n    assert b\"string\" == await async_redis.type(\"string_key\")  # noqa: E721\n    assert b\"list\" == await async_redis.type(\"list_key\")  # noqa: E721\n    assert b\"set\" == await async_redis.type(\"set_key\")  # noqa: E721\n    assert b\"zset\" == await async_redis.type(\"zset_key\")  # noqa: E721\n    assert b\"hash\" == await async_redis.type(\"hset_key\")  # noqa: E721\n    assert b\"none\" == await async_redis.type(\"none_key\")  # noqa: E721\n\n\nasync def test_xdel(async_redis: redis.asyncio.Redis):\n    stream = \"stream\"\n\n    # deleting from an empty stream doesn't do anything\n    assert await async_redis.xdel(stream, 1) == 0\n\n    m1 = await async_redis.xadd(stream, {\"foo\": \"bar\"})\n    m2 = await async_redis.xadd(stream, {\"foo\": \"bar\"})\n    m3 = await async_redis.xadd(stream, {\"foo\": \"bar\"})\n\n    # xdel returns the number of deleted elements\n    assert await async_redis.xdel(stream, m1) == 1\n    assert await async_redis.xdel(stream, m2, m3) == 2\n\n\nasync def test_connection_with_username_and_password():\n    server = FakeServer()\n    r = aioredis.FakeRedis(server=server, username=\"username\", password=\"password\")\n\n    test_value = \"this_is_a_test\"\n    await r.hset(\"test:key\", \"test_hash\", test_value)\n    result = await r.hget(\"test:key\", \"test_hash\")\n    assert result.decode() == test_value\n\n\n@pytest.mark.asyncio\nasync def test_cause_fakeredis_bug(async_redis):\n    if sys.version_info < (3, 11):\n        return\n\n    async def worker_task():\n        assert await async_redis.rpush(\"list1\", \"list1_val\") == 1  # 1\n        assert await async_redis.blpop(\"list2\") == (b\"list2\", b\"list2_val\")  # 4\n        assert await async_redis.set(\"foo\", \"bar\") is True  # 5\n\n    async with asyncio.TaskGroup() as tg:\n        tg.create_task(worker_task())\n        assert await async_redis.blpop(\"list1\") == (b\"list1\", b\"list1_val\")  # 2\n        assert await async_redis.rpush(\"list2\", \"list2_val\") == 1  # 3\n\n    # await async_redis.get(\"foo\")  # uncomment to make test pass\n    assert await async_redis.get(\"foo\") == b\"bar\"\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypotesis_joint/__init__.py",
    "content": ""
  },
  {
    "path": "tests/fakeredis/test/test_hypotesis_joint/test_joint.py",
    "content": "import hypothesis.strategies as st\n\nfrom .. import test_hypothesis as tests\nfrom ..test_hypothesis.base import BaseTest, common_commands, commands\nfrom ..test_hypothesis.test_string import string_commands\n\nbad_commands = (\n    # redis-py splits the command on spaces, and hangs if that ends up being an empty list\n    commands(\n        st.text().filter(lambda x: bool(x.split())), st.lists(st.binary() | st.text())\n    )\n)\n\n\nclass TestJoint(BaseTest):\n    create_command_strategy = (\n        tests.TestString.create_command_strategy\n        | tests.TestHash.create_command_strategy\n        | tests.TestList.create_command_strategy\n        | tests.TestSet.create_command_strategy\n        | tests.TestZSet.create_command_strategy\n    )\n    command_strategy = (\n        tests.TestServer.server_commands\n        | tests.TestConnection.connection_commands\n        | string_commands\n        | tests.TestHash.hash_commands\n        | tests.TestList.list_commands\n        | tests.TestSet.set_commands\n        | tests.TestZSet.zset_commands\n        | common_commands\n        | bad_commands\n    )\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypothesis/__init__.py",
    "content": "__all__ = [\n    \"TestConnection\",\n    \"TestHash\",\n    \"TestList\",\n    \"TestServer\",\n    \"TestSet\",\n    \"TestString\",\n    \"TestTransaction\",\n    \"TestZSet\",\n]\n\nfrom .test_connection import TestConnection\nfrom .test_hash import TestHash\nfrom .test_list import TestList\nfrom .test_server import TestServer\nfrom .test_set import TestSet\nfrom .test_string import TestString\nfrom .test_transaction import TestTransaction\nfrom .test_zset import TestZSet\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypothesis/_server_info.py",
    "content": "from typing import Tuple, Union\n\nimport pytest\nimport redis\n\n\ndef server_info() -> Tuple[str, Union[None, Tuple[int, ...]]]:\n    \"\"\"Returns server's version or None if server is not running\"\"\"\n    client = None\n    try:\n        client = redis.Redis(\"localhost\", port=6380, db=2)\n        client_info = client.info()\n        server_type = \"dragonfly\" if \"dragonfly_version\" in client_info else \"redis\"\n        server_version = (7, 0)\n        return server_type, server_version\n    except redis.ConnectionError as e:\n        print(e)\n        pytest.exit(\"Redis is not running\")\n        return \"redis\", (6,)\n    finally:\n        if hasattr(client, \"close\"):\n            client.close()  # Absent in older versions of redis-py\n\n\nserver_type, redis_ver = server_info()\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypothesis/base.py",
    "content": "import functools\nimport math\nimport string\nimport sys\nfrom typing import Any, List, Tuple, Type, Optional\n\nimport fakeredis\nimport hypothesis\nimport hypothesis.stateful\nimport hypothesis.strategies as st\nimport pytest\nimport redis\nfrom hypothesis.stateful import rule, initialize, precondition\nfrom hypothesis.strategies import SearchStrategy\n\nfrom ._server_info import redis_ver\n\nself_strategy = st.runner()\n\nMAX_INT = 2_147_483_647\nMIN_INT = -2_147_483_648\n\n\n@st.composite\ndef sample_attr(draw, name):\n    \"\"\"Strategy for sampling a specific attribute from a state machine\"\"\"\n    machine = draw(self_strategy)\n    values = getattr(machine, name)\n    position = draw(st.integers(min_value=0, max_value=len(values) - 1))\n    return values[position]\n\n\nkeys = sample_attr(\"keys\")\nfields = sample_attr(\"fields\")\nvalues = sample_attr(\"values\")\nscores = sample_attr(\"scores\")\n\neng_text = st.builds(\n    lambda x: x.encode(), st.text(alphabet=string.ascii_letters, min_size=1)\n)\nints = st.integers(min_value=MIN_INT, max_value=MAX_INT)\nint_as_bytes = st.builds(lambda x: str(_default_normalize(x)).encode(), ints)\nfloats = st.floats(\n    width=32, allow_nan=False, allow_subnormal=False, allow_infinity=False\n)\nfloat_as_bytes = st.builds(lambda x: repr(_default_normalize(x)).encode(), floats)\ncounts = st.integers(min_value=-3, max_value=3) | ints\n# Redis has an integer overflow bug in swapdb, so we confine the numbers to\n# a limited range (https://github.com/antirez/redis/issues/5737).\ndbnums = st.integers(min_value=0, max_value=3) | st.integers(\n    min_value=-1000, max_value=1000\n)\n# The filter is to work around https://github.com/antirez/redis/issues/5632\npatterns = st.text(\n    alphabet=st.sampled_from(\"[]^$*.?-azAZ\\\\\\r\\n\\t\")\n) | st.binary().filter(lambda x: b\"\\0\" not in x)\n\n# Redis has integer overflow bugs in time computations, which is why we set a maximum.\nexpires_seconds = st.integers(min_value=5, max_value=1_000)\nexpires_ms = st.integers(min_value=5_000, max_value=50_000)\n\n\nclass WrappedException:\n    \"\"\"Wraps an exception for the purposes of comparison.\"\"\"\n\n    def __init__(self, exc):\n        self.wrapped = exc\n\n    def __str__(self):\n        return str(self.wrapped)\n\n    def __repr__(self):\n        return \"WrappedException({!r})\".format(self.wrapped)\n\n    def __eq__(self, other):\n        if not isinstance(other, WrappedException):\n            return NotImplemented\n        if type(self.wrapped) != type(other.wrapped):  # noqa: E721\n            return False\n        return True\n        # return self.wrapped.args == other.wrapped.args\n\n    def __ne__(self, other):\n        if not isinstance(other, WrappedException):\n            return NotImplemented\n        return not self == other\n\n\ndef _wrap_exceptions(obj):\n    if isinstance(obj, list):\n        return [_wrap_exceptions(item) for item in obj]\n    elif isinstance(obj, Exception):\n        return WrappedException(obj)\n    else:\n        return obj\n\n\ndef _sort_list(lst):\n    if isinstance(lst, list):\n        return sorted(lst)\n    else:\n        return lst\n\n\ndef _normalize_if_number(x):\n    if isinstance(x, list):\n        return [_normalize_if_number(i) for i in x]\n    try:\n        res = float(x)\n        return x if math.isnan(res) else res\n    except ValueError:\n        return x\n\n\ndef _flatten(args):\n    if isinstance(args, (list, tuple)):\n        for arg in args:\n            yield from _flatten(arg)\n    elif args is not None:\n        yield args\n\n\ndef _default_normalize(x: Any) -> Any:\n    if redis_ver >= (7,) and (isinstance(x, float) or isinstance(x, int)):\n        return 0 + x\n\n    return x\n\n\nclass Command:\n    def __init__(self, *args):\n        args = list(_flatten(args))\n        args = [_default_normalize(x) for x in args]\n        self.args = tuple(args)\n\n    def __repr__(self):\n        parts = [repr(arg) for arg in self.args]\n        return \"Command({})\".format(\", \".join(parts))\n\n    @staticmethod\n    def encode(arg):\n        encoder = redis.connection.Encoder(\"utf-8\", \"replace\", False)\n        return encoder.encode(arg)\n\n    @property\n    def normalize(self):\n        command = self.encode(self.args[0]).lower() if self.args else None\n        # Functions that return a list in arbitrary order\n        unordered = {\n            b\"keys\",\n            b\"sort\",\n            b\"hgetall\",\n            b\"hkeys\",\n            b\"hvals\",\n            b\"sdiff\",\n            b\"sinter\",\n            b\"sunion\",\n            b\"smembers\",\n            b\"hexpire\",\n        }\n        if command in unordered:\n            return _sort_list\n        else:\n            return _normalize_if_number\n\n    @property\n    def testable(self) -> bool:\n        \"\"\"Whether this command is suitable for a test.\n\n        The fuzzer can create commands with behavior that is non-deterministic, not supported, or which hits redis bugs.\n        \"\"\"\n        N = len(self.args)\n        if N == 0:\n            return False\n        command = self.encode(self.args[0]).lower()\n        if not command.split():\n            return False\n        if command == b\"keys\" and N == 2 and self.args[1] != b\"*\":\n            return False\n        # Redis will ignore a NULL character in some commands but not others,\n        # e.g., it recognises EXEC\\0 but not MULTI\\00.\n        # Rather than try to reproduce this quirky behavior, just skip these tests.\n        if b\"\\0\" in command:\n            return False\n        return True\n\n\ndef zero_or_more(*args) -> List[SearchStrategy]:\n    return [st.none() | st.just(arg) for arg in args]\n\n\ndef commands(*args, **kwargs):\n    return st.builds(functools.partial(Command, **kwargs), *args)\n\n\n# # TODO: all expiry-related commands\ncommon_commands = (\n    commands(st.sampled_from([\"del\", \"persist\", \"type\", \"unlink\"]), keys)\n    | commands(st.just(\"exists\"), st.lists(keys))\n    | commands(st.just(\"keys\"), st.just(\"*\"))\n    # Disabled for now due to redis giving wrong answers\n    # (https://github.com/antirez/redis/issues/5632)\n    # | commands(st.just('keys'), patterns)\n    | commands(st.just(\"move\"), keys, dbnums)\n    | commands(st.sampled_from([\"rename\", \"renamenx\"]), keys, keys)\n    # TODO: find a better solution to sort instability than throwing\n    #  away the sort entirely with normalize. This also prevents us\n    #  using LIMIT.\n    | commands(st.just(\"sort\"), keys, *zero_or_more(\"asc\", \"desc\", \"alpha\"))\n)\n\n\n@hypothesis.settings(max_examples=1000)\nclass CommonMachine(hypothesis.stateful.RuleBasedStateMachine):\n    create_command_strategy = st.nothing()\n\n    def __init__(self):\n        super().__init__()\n        try:\n            self.real = redis.StrictRedis(\"localhost\", port=6380, db=2)\n            self.real.ping()\n        except redis.ConnectionError:\n            pytest.skip(\"redis is not running\")\n        if self.real.info(\"server\").get(\"arch_bits\") != 64:\n            self.real.connection_pool.disconnect()\n            pytest.skip(\"redis server is not 64-bit\")\n        self.fake = fakeredis.FakeStrictRedis(\n            server=fakeredis.FakeServer(version=redis_ver), port=6380, db=2\n        )\n        # Disable the response parsing so that we can check the raw values returned\n        self.fake.response_callbacks.clear()\n        self.real.response_callbacks.clear()\n        self.transaction_normalize = []\n        self.keys = []\n        self.fields = []\n        self.values = []\n        self.scores = []\n        self.initialized_data = False\n        try:\n            self.real.execute_command(\"discard\")\n        except redis.ResponseError:\n            pass\n        self.real.flushall()\n\n    def teardown(self) -> None:\n        self.real.connection_pool.disconnect()\n        self.fake.connection_pool.disconnect()\n        super().teardown()\n\n    @staticmethod\n    def _evaluate(\n        client: redis.Redis, command\n    ) -> Tuple[Any, Optional[Type[Exception]]]:\n        try:\n            result = client.execute_command(*command.args)\n            if result != \"QUEUED\":\n                result = command.normalize(result)\n            exc = None\n        except Exception as e:\n            result = exc = e\n        return _wrap_exceptions(result), exc\n\n    def _compare(self, command: Command) -> None:\n        fake_result, fake_exc = self._evaluate(self.fake, command)\n        real_result, real_exc = self._evaluate(self.real, command)\n\n        if fake_exc is not None and real_exc is None:\n            print(\n                f\"{fake_exc} raised on only on fake when running {command}\",\n                file=sys.stderr,\n            )\n            raise fake_exc\n        elif real_exc is not None and fake_exc is None:\n            assert real_exc == fake_exc, f\"Expected exception {real_exc} not raised\"\n        elif (\n            real_exc is None\n            and isinstance(real_result, list)\n            and command.args\n            and command.args[0].lower() == \"exec\"\n        ):\n            assert fake_result is not None\n            # Transactions need to use the normalize functions of the component commands.\n            assert len(self.transaction_normalize) == len(real_result)\n            assert len(self.transaction_normalize) == len(fake_result)\n            for n, r, f in zip(self.transaction_normalize, real_result, fake_result):\n                assert n(f) == n(r)\n            self.transaction_normalize = []\n        elif isinstance(fake_result, list):\n            assert len(fake_result) == len(real_result), (\n                f\"Discrepancy when running command {command}, fake({fake_result}) != real({real_result})\",\n            )\n            for i in range(len(fake_result)):\n                assert fake_result[i] == real_result[i] or (\n                    type(fake_result[i]) is float\n                    and fake_result[i] == pytest.approx(real_result[i])\n                ), f\"Discrepancy when running command {command}, fake({fake_result}) != real({real_result})\"\n\n        else:\n            assert fake_result == real_result or (\n                type(fake_result) is float and fake_result == pytest.approx(real_result)\n            ), f\"Discrepancy when running command {command}, fake({fake_result}) != real({real_result})\"\n            if real_result == b\"QUEUED\":\n                # Since redis removes the distinction between simple strings and\n                # bulk strings, this might not actually indicate that we're in a\n                # transaction. But it is extremely unlikely that hypothesis will\n                # find such examples.\n                self.transaction_normalize.append(command.normalize)\n        if len(command.args) == 1 and Command.encode(command.args[0]).lower() in (\n            b\"discard\",\n            b\"exec\",\n        ):\n            self.transaction_normalize = []\n\n    @initialize(\n        attrs=st.fixed_dictionaries(\n            dict(\n                keys=st.lists(eng_text, min_size=2, max_size=5, unique=True),\n                fields=st.lists(eng_text, min_size=2, max_size=5, unique=True),\n                values=st.lists(\n                    eng_text | int_as_bytes | float_as_bytes,\n                    min_size=2,\n                    max_size=5,\n                    unique=True,\n                ),\n                scores=st.lists(\n                    floats,\n                    min_size=2,\n                    max_size=5,\n                    unique=True,\n                ),\n            )\n        )\n    )\n    def init_attrs(self, attrs):\n        for key, value in attrs.items():\n            setattr(self, key, value)\n\n    # hypothesis doesn't allow ordering of @initialize, so we have to put\n    # preconditions on rules to ensure we call init_data exactly once and\n    # after init_attrs.\n    @precondition(lambda self: not self.initialized_data)\n    @rule(\n        commands=self_strategy.flatmap(\n            lambda self: st.lists(self.create_command_strategy)\n        )\n    )\n    def init_data(self, commands) -> None:\n        for command in commands:\n            self._compare(command)\n        self.initialized_data = True\n\n    @precondition(lambda self: self.initialized_data)\n    @rule(command=self_strategy.flatmap(lambda self: self.command_strategy))\n    def one_command(self, command: Command) -> None:\n        self._compare(command)\n\n\nclass BaseTest:\n    \"\"\"Base class for test classes.\"\"\"\n\n    command_strategy: SearchStrategy\n    create_command_strategy = st.nothing()\n\n    @pytest.mark.slow\n    def test(self):\n        class Machine(CommonMachine):\n            create_command_strategy = self.create_command_strategy\n            command_strategy = self.command_strategy\n\n        # hypothesis.settings.register_profile(\n        #     \"debug\", max_examples=10, verbosity=hypothesis.Verbosity.debug\n        # )\n        hypothesis.settings.register_profile(\n            \"debug\", verbosity=hypothesis.Verbosity.debug\n        )\n        hypothesis.settings.load_profile(\"debug\")\n        hypothesis.stateful.run_state_machine_as_test(Machine)\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypothesis/test_connection.py",
    "content": "import hypothesis.strategies as st\n\nfrom .base import BaseTest, commands, values, common_commands\n\n\nclass TestConnection(BaseTest):\n    # TODO: tests for select\n    connection_commands = (\n        commands(st.just(\"echo\"), values)\n        | commands(st.just(\"ping\"), st.lists(values, max_size=2))\n        # | commands(st.just(\"swapdb\"), dbnums, dbnums)\n    )\n    command_strategy = connection_commands | common_commands\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypothesis/test_hash.py",
    "content": "import hypothesis.strategies as st\n\nfrom .base import (\n    BaseTest,\n    commands,\n    values,\n    keys,\n    common_commands,\n    fields,\n    ints,\n    expires_seconds,\n)\n\n\nclass TestHash(BaseTest):\n    hash_commands = (\n        commands(st.just(\"hset\"), keys, st.lists(st.tuples(fields, values)))\n        | commands(st.just(\"hdel\"), keys, st.lists(fields))\n        | commands(st.just(\"hexists\"), keys, fields)\n        | commands(st.just(\"hget\"), keys, fields)\n        | commands(st.sampled_from([\"hgetall\", \"hkeys\", \"hvals\"]), keys)\n        | commands(st.just(\"hincrby\"), keys, fields, ints)\n        | commands(st.just(\"hlen\"), keys)\n        | commands(st.just(\"hmget\"), keys, st.lists(fields))\n        | commands(st.just(\"hset\"), keys, st.lists(st.tuples(fields, values)))\n        | commands(st.just(\"hsetnx\"), keys, fields, values)\n        | commands(st.just(\"hstrlen\"), keys, fields)\n        | commands(\n            st.just(\"hpersist\"),\n            st.just(\"fields\"),\n            st.just(2),\n            st.lists(fields, min_size=2, max_size=2),\n        )\n        | commands(\n            st.just(\"hexpire\"),\n            keys,\n            expires_seconds,\n            st.just(\"fields\"),\n            st.just(2),\n            st.lists(fields, min_size=2, max_size=2, unique=True),\n        )\n    )\n    create_command_strategy = commands(\n        st.just(\"hset\"), keys, st.lists(st.tuples(fields, values), min_size=1)\n    )\n    command_strategy = hash_commands | common_commands\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypothesis/test_list.py",
    "content": "import hypothesis.strategies as st\n\nfrom .base import (\n    BaseTest,\n    commands,\n    values,\n    keys,\n    common_commands,\n    counts,\n    ints,\n)\n\n\nclass TestList(BaseTest):\n    # TODO: blocking commands\n    list_commands = (\n        commands(st.just(\"lindex\"), keys, counts)\n        | commands(\n            st.just(\"linsert\"),\n            keys,\n            st.sampled_from([\"before\", \"after\", \"BEFORE\", \"AFTER\"]) | st.binary(),\n            values,\n            values,\n        )\n        | commands(st.just(\"llen\"), keys)\n        | commands(\n            st.sampled_from([\"lpop\", \"rpop\"]),\n            keys,\n            st.just(None) | st.just([]) | ints,\n        )\n        | commands(\n            st.sampled_from([\"lpush\", \"lpushx\", \"rpush\", \"rpushx\"]),\n            keys,\n            st.lists(values),\n        )\n        | commands(st.just(\"lrange\"), keys, counts, counts)\n        | commands(st.just(\"lrem\"), keys, counts, values)\n        | commands(st.just(\"lset\"), keys, counts, values)\n        | commands(st.just(\"ltrim\"), keys, counts, counts)\n        | commands(st.just(\"rpoplpush\"), keys, keys)\n    )\n    create_command_strategy = commands(\n        st.just(\"rpush\"), keys, st.lists(values, min_size=1)\n    )\n    command_strategy = list_commands | common_commands\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypothesis/test_server.py",
    "content": "import hypothesis.strategies as st\n\nfrom .base import (\n    BaseTest,\n    commands,\n    common_commands,\n    keys,\n    values,\n)\nfrom .test_string import string_commands\n\n\nclass TestServer(BaseTest):\n    # TODO: real redis raises an error if there is a save already in progress.\n    #  Find a better way to test this. commands(st.just('bgsave'))\n    server_commands = (\n        commands(st.just(\"dbsize\"))\n        | commands(st.sampled_from([\"flushdb\", \"flushall\"]))\n        # TODO: result is non-deterministic\n        # | commands(st.just('lastsave'))\n        | commands(st.just(\"save\"))\n    )\n    create_command_strategy = commands(st.just(\"set\"), keys, values)\n    command_strategy = server_commands | string_commands | common_commands\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypothesis/test_set.py",
    "content": "import hypothesis.strategies as st\n\nfrom .base import (\n    BaseTest,\n    commands,\n    keys,\n    common_commands,\n    fields,\n)\n\n\nclass TestSet(BaseTest):\n    set_commands = (\n        commands(st.just(\"sadd\"), keys, st.lists(fields))\n        | commands(st.just(\"scard\"), keys)\n        | commands(st.sampled_from([\"sdiff\", \"sinter\", \"sunion\"]), st.lists(keys))\n        | commands(\n            st.sampled_from([\"sdiffstore\", \"sinterstore\", \"sunionstore\"]),\n            keys,\n            st.lists(keys),\n        )\n        | commands(st.just(\"sismember\"), keys, fields)\n        | commands(st.just(\"smembers\"), keys)\n        | commands(st.just(\"smove\"), keys, keys, fields)\n        | commands(st.just(\"srem\"), keys, st.lists(fields))\n    )\n    create_command_strategy = commands(\n        st.just(\"sadd\"), keys, st.lists(fields, min_size=1)\n    )\n    command_strategy = set_commands | common_commands\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypothesis/test_string.py",
    "content": "import hypothesis.strategies as st\n\nfrom .base import (\n    BaseTest,\n    commands,\n    values,\n    keys,\n    common_commands,\n    counts,\n    int_as_bytes,\n    zero_or_more,\n    ints,\n    expires_seconds,\n    expires_ms,\n)\n\noptional_bitcount_range = st.just(()) | st.tuples(int_as_bytes, int_as_bytes)\n# todo: Should be addressed\n# str_len = st.integers(min_value=-3, max_value=3) | st.integers(\n#     min_value=-2147483647, max_value=2147483648\n# )\nstr_len = st.integers(min_value=-3, max_value=3) | st.integers(\n    min_value=-3000, max_value=3000\n)\n\nstring_commands = (\n    commands(st.just(\"append\"), keys, values)\n    | commands(st.just(\"bitcount\"), keys, optional_bitcount_range)\n    | commands(st.sampled_from([\"incr\", \"decr\"]), keys)\n    | commands(st.sampled_from([\"incrby\", \"decrby\"]), keys, values)\n    | commands(st.just(\"get\"), keys)\n    | commands(st.just(\"getbit\"), keys, counts)\n    | commands(\n        st.just(\"setbit\"),\n        keys,\n        counts,\n        st.integers(min_value=0, max_value=1) | ints,\n    )\n    | commands(st.sampled_from([\"substr\", \"getrange\"]), keys, str_len, counts)\n    | commands(st.just(\"getset\"), keys, values)\n    | commands(st.just(\"mget\"), st.lists(keys))\n    | commands(st.sampled_from([\"mset\", \"msetnx\"]), st.lists(st.tuples(keys, values)))\n    | commands(\n        st.just(\"set\"),\n        keys,\n        values,\n        *zero_or_more(\"nx\", \"xx\", \"keepttl\"),\n    )\n    | commands(st.just(\"setex\"), keys, expires_seconds, values)\n    | commands(st.just(\"psetex\"), keys, expires_ms, values)\n    | commands(st.just(\"setnx\"), keys, values)\n    | commands(st.just(\"setrange\"), keys, str_len, values)\n    | commands(st.just(\"strlen\"), keys)\n)\n\n\nclass TestString(BaseTest):\n    create_command_strategy = commands(st.just(\"set\"), keys, values)\n    command_strategy = string_commands | common_commands\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypothesis/test_transaction.py",
    "content": "import hypothesis.strategies as st\n\nfrom .base import (\n    BaseTest,\n    commands,\n    values,\n    keys,\n    common_commands,\n    counts,\n    zero_or_more,\n    ints,\n    expires_seconds,\n    expires_ms,\n)\nfrom .test_string import TestString\n\n\nclass TestTransaction(BaseTest):\n    transaction_commands = (\n        commands(st.sampled_from([\"multi\", \"discard\", \"exec\", \"unwatch\"]))\n        | commands(st.just(\"watch\"), keys)\n        | commands(st.just(\"append\"), keys, values)\n        | commands(st.just(\"bitcount\"), keys)\n        | commands(st.just(\"bitcount\"), keys, values, values)\n        | commands(st.sampled_from([\"incr\", \"decr\"]), keys)\n        | commands(st.sampled_from([\"incrby\", \"decrby\"]), keys, values)\n        | commands(st.just(\"get\"), keys)\n        | commands(st.just(\"getbit\"), keys, counts)\n        | commands(\n            st.just(\"setbit\"),\n            keys,\n            counts,\n            st.integers(min_value=0, max_value=1) | ints,\n        )\n        | commands(st.sampled_from([\"substr\", \"getrange\"]), keys, counts, counts)\n        | commands(st.just(\"getset\"), keys, values)\n        | commands(st.just(\"mget\"), st.lists(keys))\n        | commands(\n            st.sampled_from([\"mset\", \"msetnx\"]), st.lists(st.tuples(keys, values))\n        )\n        | commands(\n            st.just(\"set\"),\n            keys,\n            values,\n            *zero_or_more(\"nx\", \"xx\", \"keepttl\"),\n        )\n        | commands(st.just(\"setex\"), keys, expires_seconds, values)\n        | commands(st.just(\"psetex\"), keys, expires_ms, values)\n        | commands(st.just(\"setnx\"), keys, values)\n        | commands(st.just(\"setrange\"), keys, counts, values)\n        | commands(st.just(\"strlen\"), keys)\n    )\n    create_command_strategy = TestString.create_command_strategy\n    command_strategy = transaction_commands | common_commands\n"
  },
  {
    "path": "tests/fakeredis/test/test_hypothesis/test_zset.py",
    "content": "import operator\n\nimport hypothesis.strategies as st\n\nfrom .base import (\n    BaseTest,\n    commands,\n    keys,\n    common_commands,\n    counts,\n    fields,\n    zero_or_more,\n    scores,\n    Command,\n    float_as_bytes,\n)\n\nscore_tests = scores | st.builds(lambda x: b\"(\" + repr(x).encode(), scores)\nlimits = st.just(()) | st.tuples(st.just(\"limit\"), counts, counts)\nstring_tests = st.sampled_from([b\"+\", b\"-\"]) | st.builds(\n    operator.add, st.sampled_from([b\"(\", b\"[\"]), fields\n)\nzset_no_score_create_commands = commands(\n    st.just(\"zadd\"), keys, st.lists(st.tuples(st.just(0), fields), min_size=1)\n)\nzset_no_score_commands = (  # TODO: test incr\n    commands(\n        st.just(\"zadd\"),\n        keys,\n        *zero_or_more(\"nx\", \"xx\", \"ch\", \"incr\"),\n        st.lists(st.tuples(st.just(0), fields)),\n    )\n    | commands(st.just(\"zlexcount\"), keys, string_tests, string_tests)\n    | commands(\n        st.sampled_from([\"zrangebylex\", \"zrevrangebylex\"]),\n        keys,\n        string_tests,\n        string_tests,\n        limits,\n    )\n    | commands(st.just(\"zremrangebylex\"), keys, string_tests, string_tests)\n)\n\n\ndef optional(arg):\n    return st.none() | st.just(arg)\n\n\ndef build_zstore(command, dest, sources, weights, aggregate) -> Command:\n    args = [command, dest, len(sources)]\n    args += [source[0] for source in sources]\n    if weights:\n        args.append(\"weights\")\n        args += [source[1] for source in sources]\n    if aggregate:\n        args += [\"aggregate\", aggregate]\n    return Command(args)\n\n\nclass TestZSet(BaseTest):\n    zset_commands = (\n        commands(\n            st.just(\"zadd\"),\n            keys,\n            *zero_or_more(\"nx\", \"xx\", \"ch\", \"incr\"),\n            st.lists(st.tuples(scores, fields)),\n        )\n        | commands(st.just(\"zcard\"), keys)\n        | commands(st.just(\"zcount\"), keys, score_tests, score_tests)\n        | commands(st.just(\"zincrby\"), keys, scores, fields)\n        | commands(\n            st.sampled_from([\"zrange\", \"zrevrange\"]),\n            keys,\n            counts,\n            counts,\n            optional(\"withscores\"),\n        )\n        | commands(\n            st.sampled_from([\"zrangebyscore\", \"zrevrangebyscore\"]),\n            keys,\n            score_tests,\n            score_tests,\n            limits,\n            optional(\"withscores\"),\n        )\n        | commands(st.sampled_from([\"zrank\", \"zrevrank\"]), keys, fields)\n        | commands(st.just(\"zrem\"), keys, st.lists(fields))\n        | commands(st.just(\"zremrangebyrank\"), keys, counts, counts)\n        | commands(st.just(\"zremrangebyscore\"), keys, score_tests, score_tests)\n        | commands(st.just(\"zscore\"), keys, fields)\n        | st.builds(\n            build_zstore,\n            command=st.sampled_from([\"zunionstore\", \"zinterstore\"]),\n            dest=keys,\n            sources=st.lists(st.tuples(keys, float_as_bytes)),\n            weights=st.booleans(),\n            aggregate=st.sampled_from([None, \"sum\", \"min\", \"max\"]),\n        )\n    )\n    # TODO: zscan, zpopmin/zpopmax, bzpopmin/bzpopmax, probably more\n    create_command_strategy = commands(\n        st.just(\"zadd\"), keys, st.lists(st.tuples(scores, fields), min_size=1)\n    )\n    command_strategy = zset_commands | common_commands\n\n\nclass TestZSetNoScores(BaseTest):\n    create_command_strategy = zset_no_score_create_commands\n    command_strategy = zset_no_score_commands | common_commands\n"
  },
  {
    "path": "tests/fakeredis/test/test_issues.py",
    "content": "import pytest\nimport redis.client\n\n\ndef test_causes_crash(r: redis.Redis):\n    key = b\"}W\\xfa\\x87\\xf4\"\n    key2 = b\"\\xf3\\xba\\x00\\xa1\\x1c\\xac\\x01A\\x8b\\xc4\\xe9\\xe2\\xa8\"\n    r.rpush(key, b\"!\\xef\\x9e\\xd2\", b\"1175417134\")\n    r.rpoplpush(key, key)\n    r.lrange(key, -1, 14795)\n    with pytest.raises(redis.ResponseError):\n        r.rename(key2, key2)\n    r.lrange(key, 2, 0)\n    r.sort(key, alpha=True)\n    r.llen(key)\n    r.keys(\"*\")\n    r.keys(\"*\")\n    r.lindex(key, 1)\n    r.exists(key, key2, key, key2, key2, key2, key2, key)\n    r.linsert(key, \"AFTER\", b\"inf\", b\"!\\xef\\x9e\\xd2\")\n    with pytest.raises(redis.ResponseError):\n        r.linsert(\n            key,\n            b\"W8\\xe9&\",\n            b\"-43950\",\n            b\"-43950\",\n        )\n    r.rpoplpush(key, key)\n    with pytest.raises(redis.ResponseError):\n        r.exists()\n    r.lrem(key2, -56700, b\"-6.816602725023744e+16\")\n    r.lrem(key, -3, b\"1175417134\")\n    r.llen(key2)\n    r.lrem(key, -3, b\"!\\xef\\x9e\\xd2\")\n\n\ndef test_another_test_causes_crash(r: redis.Redis):\n    key1 = b\"\\xc2\\xdb\"\n    key2 = b\"z`\\xf8,\\xe2\\x02\\xb3\\x85\\xc5\"\n    key3 = b\"\\xf4<\\xe1\\xb6\\xcb\\xde\\xaf\"\n    key4 = b\"\\xad\"\n    r.rpush(key1, b\"i\\x05\\x0b\\xb1\")\n    r.rpush(\n        key2,\n        b\"i\\x05\\x0b\\xb1\",\n        b\"i\\x05\\x0b\\xb1\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"i\\x05\\x0b\\xb1\",\n        b\"\\\\h\\xf2\",\n    )\n    r.rpush(\n        key3,\n        b\"\\\\h\\xf2\",\n        b\"i\\x05\\x0b\\xb1\",\n        b\"i\\x05\\x0b\\xb1\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"i\\x05\\x0b\\xb1\",\n    )\n    r.rpush(\n        key1,\n        b\"i\\x05\\x0b\\xb1\",\n        b\"\\\\h\\xf2\",\n        b\"i\\x05\\x0b\\xb1\",\n        b\"\\\\h\\xf2\",\n        b\"i\\x05\\x0b\\xb1\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"i\\x05\\x0b\\xb1\",\n    )\n    r.rpush(key2, b\"i\\x05\\x0b\\xb1\")\n\n    r.lpop(b\"\")\n    with pytest.raises(redis.ResponseError):\n        r.rpushx(key4)\n    r.move(b\"\", 1)\n    with pytest.raises(redis.ResponseError):\n        r.move(key3, -730)\n    r.ltrim(key3, -51547, -2)\n    r.rpoplpush(key4, b\"\")\n\n    r.rpush(key4, b\"i\\x05\\x0b\\xb1\", b\"\\\\h\\xf2\", b\"\\\\h\\xf2\")\n    r.persist(key2)\n    r.exists(key1, key1, key1)\n\n    r.ltrim(b\"\", -12584, -3)\n    r.lrem(key4, -1, b\"i\\x05\\x0b\\xb1\")\n    with pytest.raises(redis.ResponseError):\n        r.linsert(key2, b\"\\xa5\", b\"\\\\h\\xf2\", b\"i\\x05\\x0b\\xb1\")\n    r.linsert(key2, \"BEFORE\", b\"\\\\h\\xf2\", b\"\\\\h\\xf2\")\n    r.ltrim(key2, 1, -2_147_483_648)\n    r.ltrim(key1, -4200252, 1)\n    r.rpush(\n        b\"\",\n        b\"i\\x05\\x0b\\xb1\",\n        b\"i\\x05\\x0b\\xb1\",\n        b\"i\\x05\\x0b\\xb1\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"i\\x05\\x0b\\xb1\",\n    )\n    with pytest.raises(redis.ResponseError):\n        r.rpop(key1, -2_147_483_648)\n    r.lrem(key1, 77, b\"i\\x05\\x0b\\xb1\")\n    r.rpoplpush(b\"\", key2)\n    r.ltrim(b\"\", 0, 1)\n    r.unlink(b\"\")\n    r.ltrim(key1, 0, 0)\n    r.lrem(key3, 31029, b\"\\\\h\\xf2\")\n    r.lrange(key1, -2, -91)\n    r.rpoplpush(key1, key2)\n    r.rpush(\n        key1,\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"\\\\h\\xf2\",\n        b\"i\\x05\\x0b\\xb1\",\n        b\"i\\x05\\x0b\\xb1\",\n        b\"\\\\h\\xf2\",\n    )\n    r.ltrim(key1, 0, 18)\n    r.keys(\"*\")\n    with pytest.raises(redis.ResponseError):\n        r.move(key4, 993)\n    r.lrange(b\"\", 0, 38001)\n    with pytest.raises(redis.ResponseError):\n        r.sort(key4)\n    r.lindex(key1, -2)\n    r.rpoplpush(key4, key4)\n    r.lrem(key4, -18528, b\"\\\\h\\xf2\")\n"
  },
  {
    "path": "tests/fakeredis/test/test_json/__init__.py",
    "content": ""
  },
  {
    "path": "tests/fakeredis/test/test_json/test_json.py",
    "content": "\"\"\"\nTests for `fakeredis-py`'s emulation of Redis's JSON.GET command subset.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nfrom test import testtools\n\nimport pytest\nimport redis\nfrom redis.commands.json.path import Path\n\njson_tests = pytest.importorskip(\"jsonpath_ng\")\n\n\ndef test_jsonget(r: redis.Redis):\n    data = {\"x\": \"bar\", \"y\": {\"x\": 33}}\n    r.json().set(\"foo\", Path.root_path(), data)\n    assert r.json().get(\"foo\") == data\n    assert r.json().get(\"foo\", Path(\"$..x\")) == [\"bar\", 33]\n\n    data2 = {\"x\": \"bar\"}\n    r.json().set(\n        \"foo2\",\n        Path.root_path(),\n        data2,\n    )\n    assert r.json().get(\"foo2\") == data2\n    assert r.json().get(\"foo2\", \"$\") == [\n        data2,\n    ]\n    assert r.json().get(\"foo2\", Path(\"$.a\"), Path(\"$.x\")) == {\"$.a\": [], \"$.x\": [\"bar\"]}\n\n    assert r.json().get(\"non-existing-key\") is None\n\n    r.json().set(\n        \"foo2\",\n        Path.root_path(),\n        {\"x\": \"bar\", \"y\": {\"x\": 33}},\n    )\n    assert r.json().get(\"foo2\") == {\"x\": \"bar\", \"y\": {\"x\": 33}}\n    assert r.json().get(\"foo2\", Path(\"$..x\")) == [\"bar\", 33]\n\n    r.json().set(\n        \"foo\",\n        Path.root_path(),\n        {\"x\": \"bar\"},\n    )\n    assert r.json().get(\"foo\") == {\"x\": \"bar\"}\n    assert r.json().get(\"foo\", Path(\"$.a\"), Path(\"$.x\")) == {\"$.a\": [], \"$.x\": [\"bar\"]}\n    assert r.json().get(\"unknown\", \"$\") is None\n\n\ndef test_json_setgetdeleteforget(r: redis.Redis):\n    data = {\"x\": \"bar\"}\n    assert r.json().set(\"foo\", Path.root_path(), data) == 1\n    assert r.json().get(\"foo\") == data\n    assert r.json().get(\"baz\") is None\n    assert r.json().delete(\"foo\") == 1\n    assert r.json().forget(\"foo\") == 0  # second delete\n    assert r.exists(\"foo\") == 0\n\n\ndef test_json_delete_with_dollar(r: redis.Redis):\n    doc1 = {\"a\": 1, \"nested\": {\"a\": 2, \"b\": 3}}\n    assert r.json().set(\"doc1\", Path.root_path(), doc1)\n    assert r.json().delete(\"doc1\", \"$..a\") == 2\n    assert r.json().get(\"doc1\", Path.root_path()) == {\"nested\": {\"b\": 3}}\n\n    doc2 = {\"a\": {\"a\": 2, \"b\": 3}, \"b\": [\"a\", \"b\"], \"nested\": {\"b\": [True, \"a\", \"b\"]}}\n    r.json().set(\"doc2\", \"$\", doc2)\n    assert r.json().delete(\"doc2\", \"$..a\") == 1\n    assert r.json().get(\"doc2\", Path.root_path()) == {\n        \"nested\": {\"b\": [True, \"a\", \"b\"]},\n        \"b\": [\"a\", \"b\"],\n    }\n\n    doc3 = [\n        {\n            \"ciao\": [\"non ancora\"],\n            \"nested\": [\n                {\"ciao\": [1, \"a\"]},\n                {\"ciao\": [2, \"a\"]},\n                {\"ciaoc\": [3, \"non\", \"ciao\"]},\n                {\"ciao\": [4, \"a\"]},\n                {\"e\": [5, \"non\", \"ciao\"]},\n            ],\n        }\n    ]\n    assert r.json().set(\"doc3\", Path.root_path(), doc3)\n    assert r.json().delete(\"doc3\", '$.[0][\"nested\"]..ciao') == 3\n\n    doc3val = [\n        [\n            {\n                \"ciao\": [\"non ancora\"],\n                \"nested\": [\n                    {},\n                    {},\n                    {\"ciaoc\": [3, \"non\", \"ciao\"]},\n                    {},\n                    {\"e\": [5, \"non\", \"ciao\"]},\n                ],\n            }\n        ]\n    ]\n    assert r.json().get(\"doc3\", Path.root_path()) == doc3val[0]\n\n    # Test default path\n    assert r.json().delete(\"doc3\") == 1\n    assert r.json().get(\"doc3\", Path.root_path()) is None\n\n    r.json().delete(\"not_a_document\", \"..a\")\n\n\ndef test_json_et_non_dict_value(r: redis.Redis):\n    r.json().set(\n        \"str\",\n        Path.root_path(),\n        \"str_val\",\n    )\n    assert r.json().get(\"str\") == \"str_val\"\n\n    r.json().set(\"bool\", Path.root_path(), True)\n    assert r.json().get(\"bool\") is True\n\n    r.json().set(\"bool\", Path.root_path(), False)\n    assert r.json().get(\"bool\") is False\n\n\ndef test_jsonset_existential_modifiers_should_succeed(r: redis.Redis):\n    obj = {\"foo\": \"bar\"}\n    assert r.json().set(\"obj\", Path.root_path(), obj)\n\n    # Test that flags prevent updates when conditions are unmet\n    assert (\n        r.json().set(\n            \"obj\",\n            Path(\"foo\"),\n            \"baz\",\n            nx=True,\n        )\n        is None\n    )\n    assert r.json().get(\"obj\") == obj\n\n    assert (\n        r.json().set(\n            \"obj\",\n            Path(\"qaz\"),\n            \"baz\",\n            xx=True,\n        )\n        is None\n    )\n    assert r.json().get(\"obj\") == obj\n\n    # Test that flags allow updates when conditions are met\n    assert r.json().set(\"obj\", Path(\"foo\"), \"baz\", xx=True) == 1\n    assert r.json().set(\"obj\", Path(\"foo2\"), \"qaz\", nx=True) == 1\n    assert r.json().get(\"obj\") == {\"foo\": \"baz\", \"foo2\": \"qaz\"}\n\n    # Test with raw\n    obj = {\"foo\": \"bar\"}\n    testtools.raw_command(r, \"json.set\", \"obj\", \"$\", json.dumps(obj))\n    assert r.json().get(\"obj\") == obj\n\n\ndef test_jsonset_flags_should_be_mutually_exclusive(r: redis.Redis):\n    with pytest.raises(Exception):\n        r.json().set(\"obj\", Path(\"foo\"), \"baz\", nx=True, xx=True)\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(\n            r, \"json.set\", \"obj\", \"$\", json.dumps({\"foo\": \"bar\"}), \"NX\", \"XX\"\n        )\n\n\ndef test_json_unknown_param(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(\n            r, \"json.set\", \"obj\", \"$\", json.dumps({\"foo\": \"bar\"}), \"unknown\"\n        )\n\n\ndef test_jsonmget(r: redis.Redis):\n    # Test mget with multi paths\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\"a\": 1, \"b\": 2, \"nested\": {\"a\": 3}, \"c\": None, \"nested2\": {\"a\": None}},\n    )\n    r.json().set(\n        \"doc2\",\n        \"$\",\n        {\"a\": 4, \"b\": 5, \"nested\": {\"a\": 6}, \"c\": None, \"nested2\": {\"a\": [None]}},\n    )\n    r.json().set(\n        \"doc3\",\n        \"$\",\n        {\n            \"a\": 5,\n            \"b\": 5,\n            \"nested\": {\"a\": 8},\n            \"c\": None,\n            \"nested2\": {\"a\": {\"b\": \"nested3\"}},\n        },\n    )\n    # Compare also to single JSON.GET\n    assert r.json().get(\"doc1\", Path(\"$..a\")) == [1, 3, None]\n    assert r.json().get(\"doc2\", \"$..a\") == [4, 6, [None]]\n    assert r.json().get(\"doc3\", \"$..a\") == [5, 8, {\"b\": \"nested3\"}]\n\n    # Test mget with single path\n    assert r.json().mget([\"doc1\"], \"$..a\") == [[1, 3, None]]\n\n    # Test mget with multi path\n    assert r.json().mget([\"doc1\", \"doc2\", \"doc3\"], \"$..a\") == [\n        [1, 3, None],\n        [4, 6, [None]],\n        [5, 8, {\"b\": \"nested3\"}],\n    ]\n\n    # Test missing key\n    assert r.json().mget([\"doc1\", \"missing_doc\"], \"$..a\") == [[1, 3, None], None]\n\n    assert r.json().mget([\"missing_doc1\", \"missing_doc2\"], \"$..a\") == [None, None]\n\n\ndef test_jsonmget_should_succeed(r: redis.Redis):\n    r.json().set(\"1\", Path.root_path(), 1)\n    r.json().set(\"2\", Path.root_path(), 2)\n\n    assert r.json().mget([\"1\"], Path.root_path()) == [1]\n\n    assert r.json().mget([1, 2], Path.root_path()) == [1, 2]\n\n\ndef test_jsonclear(r: redis.Redis):\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [0, 1, 2, 3, 4],\n    )\n\n    assert 1 == r.json().clear(\n        \"arr\",\n        Path.root_path(),\n    )\n    assert [] == r.json().get(\"arr\")\n\n\ndef test_jsonclear_dollar(r: redis.Redis):\n    data = {\n        \"nested1\": {\"a\": {\"foo\": 10, \"bar\": 20}},\n        \"a\": [\"foo\"],\n        \"nested2\": {\"a\": \"claro\"},\n        \"nested3\": {\"a\": {\"baz\": 50}},\n    }\n    r.json().set(\"doc1\", \"$\", data)\n    # Test multi\n    assert r.json().clear(\"doc1\", \"$..a\") == 3\n\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\"nested1\": {\"a\": {}}, \"a\": [], \"nested2\": {\"a\": \"claro\"}, \"nested3\": {\"a\": {}}}\n    ]\n\n    # Test single\n    r.json().set(\"doc1\", \"$\", data)\n    assert r.json().clear(\"doc1\", \"$.nested1.a\") == 1\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\n            \"nested1\": {\"a\": {}},\n            \"a\": [\"foo\"],\n            \"nested2\": {\"a\": \"claro\"},\n            \"nested3\": {\"a\": {\"baz\": 50}},\n        }\n    ]\n\n    # Test missing path (defaults to root)\n    assert r.json().clear(\"doc1\") == 1\n    assert r.json().get(\"doc1\", \"$\") == [{}]\n\n\ndef test_jsonclear_no_doc(r: redis.Redis):\n    # Test missing key\n    with pytest.raises(redis.ResponseError):\n        r.json().clear(\"non_existing_doc\", \"$..a\")\n\n\ndef test_jsonstrlen(r: redis.Redis):\n    data = {\"x\": \"bar\", \"y\": {\"x\": 33}}\n    r.json().set(\"foo\", Path.root_path(), data)\n    assert r.json().strlen(\"foo\", Path(\"$..x\")) == [3, None]\n\n    r.json().set(\"foo2\", Path.root_path(), \"data2\")\n    assert r.json().strlen(\"foo2\") == 5\n    assert r.json().strlen(\"foo2\", Path.root_path()) == 5\n\n    r.json().set(\"foo3\", Path.root_path(), {\"x\": \"string\"})\n    assert r.json().strlen(\"foo3\", Path(\"$.x\")) == [\n        6,\n    ]\n\n    assert r.json().strlen(\"non-existing\") is None\n\n    r.json().set(\"str\", Path.root_path(), \"foo\")\n    assert r.json().strlen(\"str\", Path.root_path()) == 3\n    # Test multi\n    r.json().set(\n        \"doc1\", \"$\", {\"a\": \"foo\", \"nested1\": {\"a\": \"hello\"}, \"nested2\": {\"a\": 31}}\n    )\n    assert r.json().strlen(\"doc1\", \"$..a\") == [3, 5, None]\n\n    res2 = r.json().strappend(\"doc1\", \"bar\", \"$..a\")\n    res1 = r.json().strlen(\"doc1\", \"$..a\")\n    assert res1 == res2\n\n    # Test single\n    assert r.json().strlen(\"doc1\", \"$.nested1.a\") == [8]\n    assert r.json().strlen(\"doc1\", \"$.nested2.a\") == [None]\n\n    # Test missing key\n    # Note: Dragonfly returns NIL in the accordance to the official docs\n    # with pytest.raises(redis.ResponseError):\n    #    r.json().strlen(\"non_existing_doc\", \"$..a\")\n\n\ndef test_toggle(r: redis.Redis):\n    r.json().set(\"bool\", Path.root_path(), False)\n    assert r.json().toggle(\"bool\", Path.root_path())\n    assert r.json().toggle(\"bool\", Path.root_path()) is False\n\n    r.json().set(\"num\", Path.root_path(), 1)\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.json().toggle(\"num\", Path.root_path())\n\n\ndef test_toggle_dollar(r: redis.Redis):\n    data = {\n        \"a\": [\"foo\"],\n        \"nested1\": {\"a\": False},\n        \"nested2\": {\"a\": 31},\n        \"nested3\": {\"a\": True},\n    }\n    r.json().set(\"doc1\", \"$\", data)\n    # Test multi\n    assert r.json().toggle(\"doc1\", \"$..a\") == [None, 1, None, 0]\n    data[\"nested1\"][\"a\"] = True\n    data[\"nested3\"][\"a\"] = False\n    assert r.json().get(\"doc1\", \"$\") == [data]\n\n    # Test missing key\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.json().toggle(\"non_existing_doc\", \"$..a\")\n\n\ndef test_json_commands_in_pipeline(r: redis.Redis):\n    p = r.json().pipeline()\n    p.set(\"foo\", Path.root_path(), \"bar\")\n    p.get(\"foo\")\n    p.delete(\"foo\")\n    assert [True, \"bar\", 1] == p.execute()\n    assert r.keys() == []\n    assert r.get(\"foo\") is None\n\n    # now with a true, json object\n    r.flushdb()\n    p = r.json().pipeline()\n    d = {\"hello\": \"world\", \"oh\": \"snap\"}\n\n    with pytest.deprecated_call():\n        p.jsonset(\"foo\", Path.root_path(), d)\n        p.jsonget(\"foo\")\n\n    p.exists(\"not-a-real-key\")\n    p.delete(\"foo\")\n\n    assert [True, d, 0, 1] == p.execute()\n    assert r.keys() == []\n    assert r.get(\"foo\") is None\n\n\ndef test_strappend(r: redis.Redis):\n    # Test single\n    r.json().set(\"json-key\", Path.root_path(), \"foo\")\n    assert r.json().strappend(\"json-key\", \"bar\") == 6\n    assert \"foobar\" == r.json().get(\"json-key\", Path.root_path())\n\n    # Test multi\n    r.json().set(\n        \"doc1\",\n        Path.root_path(),\n        {\n            \"a\": \"foo\",\n            \"nested1\": {\"a\": \"hello\"},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n    assert r.json().strappend(\"doc1\", \"bar\", \"$..a\") == [6, 8, None]\n    assert r.json().get(\"doc1\") == {\n        \"a\": \"foobar\",\n        \"nested1\": {\"a\": \"hellobar\"},\n        \"nested2\": {\"a\": 31},\n    }\n\n    # Test single\n    assert r.json().strappend(\n        \"doc1\",\n        \"baz\",\n        \"$.nested1.a\",\n    ) == [11]\n    assert r.json().get(\"doc1\") == {\n        \"a\": \"foobar\",\n        \"nested1\": {\"a\": \"hellobarbaz\"},\n        \"nested2\": {\"a\": 31},\n    }\n\n    # Test missing key\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.json().strappend(\"non_existing_doc\", \"$..a\", \"err\")\n\n    # Test multi\n    r.json().set(\n        \"doc2\",\n        Path.root_path(),\n        {\n            \"a\": \"foo\",\n            \"nested1\": {\"a\": \"hello\"},\n            \"nested2\": {\"a\": \"hi\"},\n        },\n    )\n    assert r.json().strappend(\"doc2\", \"bar\", \"$.*.a\") == [8, 5]\n    assert r.json().get(\"doc2\") == {\n        \"a\": \"foo\",\n        \"nested1\": {\"a\": \"hellobar\"},\n        \"nested2\": {\"a\": \"hibar\"},\n    }\n\n    # Test missing path\n    r.json().set(\n        \"doc1\",\n        Path.root_path(),\n        {\n            \"a\": \"foo\",\n            \"nested1\": {\"a\": \"hello\"},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.json().strappend(\"doc1\", \"add\", \"piu\")\n\n    # Test raw command with no arguments\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(r, \"json.strappend\", \"\")\n\n\n@pytest.mark.decode_responses(True)\ndef test_decode_null(r: redis.Redis):\n    assert r.json().get(\"abc\") is None\n\n\ndef test_decode_response_disabaled_null(r: redis.Redis):\n    assert r.json().get(\"abc\") is None\n\n\ndef test_json_get_jset(r: redis.Redis):\n    assert r.json().set(\"foo\", Path.root_path(), \"bar\") == 1\n    assert \"bar\" == r.json().get(\"foo\")\n    assert r.json().get(\"baz\") is None\n    assert 1 == r.json().delete(\"foo\")\n    assert r.exists(\"foo\") == 0\n\n\ndef test_nonascii_setgetdelete(r: redis.Redis):\n    assert r.json().set(\n        \"not-ascii\",\n        Path.root_path(),\n        \"hyvää-élève\",\n    )\n    assert \"hyvää-élève\" == r.json().get(\n        \"not-ascii\",\n        no_escape=True,\n    )\n    assert 1 == r.json().delete(\"not-ascii\")\n    assert r.exists(\"not-ascii\") == 0\n\n\ndef test_json_setbinarykey(r: redis.Redis):\n    data = {\"hello\": \"world\", b\"some\": \"value\"}\n\n    with pytest.raises(TypeError):\n        r.json().set(\"some-key\", Path.root_path(), data)\n\n    assert r.json().set(\"some-key\", Path.root_path(), data, decode_keys=True)\n\n\ndef test_set_file(r: redis.Redis):\n    # Standard Library Imports\n    import json\n    import tempfile\n\n    obj = {\"hello\": \"world\"}\n    jsonfile = tempfile.NamedTemporaryFile(suffix=\".json\")\n    with open(jsonfile.name, \"w+\") as fp:\n        fp.write(json.dumps(obj))\n\n    no_json_file = tempfile.NamedTemporaryFile()\n    no_json_file.write(b\"Hello World\")\n\n    assert r.json().set_file(\"test\", Path.root_path(), jsonfile.name)\n    assert r.json().get(\"test\") == obj\n    with pytest.raises(json.JSONDecodeError):\n        r.json().set_file(\"test2\", Path.root_path(), no_json_file.name)\n\n\ndef test_set_path(r: redis.Redis):\n    # Standard Library Imports\n    import json\n    import tempfile\n\n    root = tempfile.mkdtemp()\n    jsonfile = tempfile.NamedTemporaryFile(mode=\"w+\", dir=root, delete=False)\n    no_json_file = tempfile.NamedTemporaryFile(mode=\"a+\", dir=root, delete=False)\n    jsonfile.write(json.dumps({\"hello\": \"world\"}))\n    jsonfile.close()\n    no_json_file.write(\"hello\")\n\n    result = {jsonfile.name: True, no_json_file.name: False}\n    assert r.json().set_path(Path.root_path(), root) == result\n    assert r.json().get(jsonfile.name.rsplit(\".\")[0]) == {\"hello\": \"world\"}\n\n\ndef test_type(r: redis.Redis):\n    r.json().set(\"1\", Path.root_path(), 1)\n\n    assert r.json().type(\"1\", Path.root_path()) == b\"integer\"\n    assert r.json().type(\"1\") == b\"integer\"  # noqa: E721\n\n    meta_data = {\n        \"object\": {},\n        \"array\": [],\n        \"string\": \"str\",\n        \"integer\": 42,\n        \"number\": 1.2,\n        \"boolean\": False,\n        \"null\": None,\n    }\n    data = {k: {\"a\": meta_data[k]} for k in meta_data}\n    r.json().set(\"doc1\", \"$\", data)\n\n    # Dragonfly does not guarantee the traversal order for multi field traversal\n    # json.type api assumes a predefined order and is not designed very well.\n    # Test multi by comparing unordered sets\n    assert set(r.json().type(\"doc1\", \"$..a\")) == set(\n        [k.encode() for k in meta_data.keys()]\n    )  # noqa: E721\n\n    # Test single\n    assert r.json().type(\"doc1\", \"$.integer.a\") == [b\"integer\"]  # noqa: E721\n    assert r.json().type(\"doc1\") == b\"object\"  # noqa: E721\n\n    # Test missing key\n    assert r.json().type(\"non_existing_doc\", \"..a\") is None\n\n\ndef test_objlen(r: redis.Redis):\n    # Test missing key, and path\n    with pytest.raises(redis.ResponseError):\n        r.json().objlen(\"non_existing_doc\", \"$..a\")\n\n    obj = {\"foo\": \"bar\", \"baz\": \"qaz\"}\n\n    r.json().set(\"obj\", Path.root_path(), obj)\n    assert len(obj) == r.json().objlen(\"obj\", Path.root_path())\n\n    r.json().set(\"obj\", Path.root_path(), obj)\n    assert len(obj) == r.json().objlen(\"obj\")\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": {\"foo\": 10, \"bar\": 20}},\n            \"nested2\": {\"a\": {\"baz\": 50}},\n        },\n    )\n    # Test multi\n    assert r.json().objlen(\"doc1\", \"$..a\") == [None, 2, 1]\n    # Test single\n    assert r.json().objlen(\"doc1\", \"$.nested1.a\") == [2]\n\n    assert r.json().objlen(\"doc1\", \"$.nowhere\") == []\n\n    # Test legacy\n    assert r.json().objlen(\"doc1\", \".*.a\") == 2\n\n    # Test single\n    assert r.json().objlen(\"doc1\", \".nested2.a\") == 1\n\n    # Test missing key\n    assert r.json().objlen(\"non_existing_doc\", \"..a\") is None\n\n    # Test missing path\n    # with pytest.raises(exceptions.ResponseError):\n    r.json().objlen(\"doc1\", \".nowhere\")\n\n\ndef test_objkeys(r: redis.Redis):\n    obj = {\"foo\": \"bar\", \"baz\": \"qaz\"}\n    r.json().set(\"obj\", Path.root_path(), obj)\n    keys = r.json().objkeys(\"obj\", Path.root_path())\n    keys.sort()\n    exp = list(obj.keys())\n    exp.sort()\n    assert exp == keys\n\n    r.json().set(\"obj\", Path.root_path(), obj)\n\n    # Dragonfly does not guarantee the order (implementation detail)\n    assert set(r.json().objkeys(\"obj\")) == obj.keys()\n\n    assert r.json().objkeys(\"fakekey\") is None\n\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\n            \"nested1\": {\"a\": {\"foo\": 10, \"bar\": 20}},\n            \"a\": [\"foo\"],\n            \"nested2\": {\"a\": {\"baz\": 50}},\n        },\n    )\n\n    # Test single\n    assert set(r.json().objkeys(\"doc1\", \"$.nested1.a\")[0]) == {b\"foo\", b\"bar\"}\n\n    # Test legacy\n    assert set(r.json().objkeys(\"doc1\", \".*.a\")) == {\"foo\", \"bar\"}\n    # Test single\n    assert r.json().objkeys(\"doc1\", \".nested2.a\") == [\"baz\"]\n\n    # Test missing key\n    assert r.json().objkeys(\"non_existing_doc\", \"..a\") is None\n\n    # Test non existing doc\n    with pytest.raises(redis.ResponseError):\n        assert r.json().objkeys(\"non_existing_doc\", \"$..a\") == []\n\n    assert r.json().objkeys(\"doc1\", \"$..nowhere\") == []\n\n\ndef test_numincrby(r: redis.Redis):\n    r.json().set(\"num\", Path.root_path(), 1)\n\n    assert 2 == r.json().numincrby(\"num\", Path.root_path(), 1)\n    assert 2.5 == r.json().numincrby(\"num\", Path.root_path(), 0.5)\n    assert 1.25 == r.json().numincrby(\"num\", Path.root_path(), -1.25)\n    # Test NUMINCRBY\n    r.json().set(\"doc1\", \"$\", {\"a\": \"b\", \"b\": [{\"a\": 2}, {\"a\": 5.0}, {\"a\": \"c\"}]})\n    # Test multi\n    assert r.json().numincrby(\"doc1\", \"$..a\", 2) == [None, 4, 7.0, None]\n\n    assert r.json().numincrby(\"doc1\", \"$..a\", 2.5) == [None, 6.5, 9.5, None]\n    # Test single\n    assert r.json().numincrby(\"doc1\", \"$.b[1].a\", 2) == [11.5]\n\n    assert r.json().numincrby(\"doc1\", \"$.b[2].a\", 2) == [None]\n    assert r.json().numincrby(\"doc1\", \"$.b[1].a\", 3.5) == [15.0]\n\n\ndef test_nummultby(r: redis.Redis):\n    r.json().set(\"num\", Path.root_path(), 1)\n\n    with pytest.deprecated_call():\n        assert r.json().nummultby(\"num\", Path.root_path(), 2) == 2\n        assert r.json().nummultby(\"num\", Path.root_path(), 2.5) == 5\n        assert r.json().nummultby(\"num\", Path.root_path(), 0.5) == 2.5\n\n    r.json().set(\"doc1\", \"$\", {\"a\": \"b\", \"b\": [{\"a\": 2}, {\"a\": 5.0}, {\"a\": \"c\"}]})\n\n    # test list\n    with pytest.deprecated_call():\n        assert r.json().nummultby(\"doc1\", \"$..a\", 2) == [None, 4, 10, None]\n        assert r.json().nummultby(\"doc1\", \"$..a\", 2.5) == [None, 10.0, 25.0, None]\n\n    # Test single\n    with pytest.deprecated_call():\n        assert r.json().nummultby(\"doc1\", \"$.b[1].a\", 2) == [50.0]\n        assert r.json().nummultby(\"doc1\", \"$.b[2].a\", 2) == [None]\n        assert r.json().nummultby(\"doc1\", \"$.b[1].a\", 3) == [150.0]\n\n    # test missing keys\n    with pytest.raises(redis.ResponseError):\n        r.json().numincrby(\"non_existing_doc\", \"$..a\", 2)\n        r.json().nummultby(\"non_existing_doc\", \"$..a\", 2)\n\n    # Test legacy NUMINCRBY\n    r.json().set(\"doc1\", \"$\", {\"a\": \"b\", \"b\": [{\"a\": 2}, {\"a\": 5.0}, {\"a\": \"c\"}]})\n    assert r.json().numincrby(\"doc1\", \".b[0].a\", 3) == 5\n\n    # Test legacy NUMMULTBY\n    r.json().set(\"doc1\", \"$\", {\"a\": \"b\", \"b\": [{\"a\": 2}, {\"a\": 5.0}, {\"a\": \"c\"}]})\n\n    with pytest.deprecated_call():\n        assert r.json().nummultby(\"doc1\", \".b[0].a\", 3) == 6\n\n\n@testtools.run_test_if_redispy_ver(\"gte\", \"4.6\")\n@pytest.mark.min_server(\"7.1\")\ndef test_json_merge(r: redis.Redis):\n    # Test with root path $\n    assert r.json().set(\n        \"person_data\",\n        \"$\",\n        {\"person1\": {\"personal_data\": {\"name\": \"John\"}}},\n    )\n    assert r.json().merge(\n        \"person_data\", \"$\", {\"person1\": {\"personal_data\": {\"hobbies\": \"reading\"}}}\n    )\n    assert r.json().get(\"person_data\") == {\n        \"person1\": {\"personal_data\": {\"name\": \"John\", \"hobbies\": \"reading\"}}\n    }\n\n    # Test with root path path $.person1.personal_data\n    assert r.json().merge(\n        \"person_data\", \"$.person1.personal_data\", {\"country\": \"Israel\"}\n    )\n    assert r.json().get(\"person_data\") == {\n        \"person1\": {\n            \"personal_data\": {\"name\": \"John\", \"hobbies\": \"reading\", \"country\": \"Israel\"}\n        }\n    }\n\n    # Test with null value to delete a value\n    assert r.json().merge(\"person_data\", \"$.person1.personal_data\", {\"name\": None})\n    assert r.json().get(\"person_data\") == {\n        \"person1\": {\"personal_data\": {\"country\": \"Israel\", \"hobbies\": \"reading\"}}\n    }\n\n\n@testtools.run_test_if_redispy_ver(\"gte\", \"4.6\")\n@pytest.mark.min_server(\"7.1\")\ndef test_mset(r: redis.Redis):\n    r.json().mset([(\"1\", Path.root_path(), 1), (\"2\", Path.root_path(), 2)])\n\n    assert r.json().mget([\"1\"], Path.root_path()) == [1]\n    assert r.json().mget([\"1\", \"2\"], Path.root_path()) == [1, 2]\n"
  },
  {
    "path": "tests/fakeredis/test/test_json/test_json_arr_commands.py",
    "content": "import pytest\nimport redis\nfrom redis.commands.json.path import Path\n\nfrom test.testtools import raw_command\n\njson_tests = pytest.importorskip(\"jsonpath_ng\")\n\n\ndef test_arrlen(r: redis.Redis):\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [0, 1, 2, 3, 4],\n    )\n    assert (\n        r.json().arrlen(\n            \"arr\",\n            Path.root_path(),\n        )\n        == 5\n    )\n    assert r.json().arrlen(\"arr\") == 5\n    assert r.json().arrlen(\"fake-key\") is None\n\n    r.json().set(\n        \"doc1\",\n        Path.root_path(),\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n\n    assert r.json().arrlen(\"doc1\", \"$..a\") == [1, 3, None]\n    assert r.json().arrlen(\"doc1\", \"$.nested1.a\") == [3]\n\n    r.json().set(\n        \"doc2\",\n        \"$\",\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": [\"hello\", 1, 1, None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n    assert r.json().arrlen(\"doc2\", \"$..a\") == [1, 5, None]\n    assert r.json().arrlen(\"doc2\", \".nested1.a\") == 5\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n\n    # Test multi\n    assert r.json().arrlen(\"doc1\", \"$..a\") == [1, 3, None]\n    assert r.json().arrappend(\"doc1\", \"$..a\", \"non\", \"abba\", \"stanza\") == [\n        4,\n        6,\n        None,\n    ]\n\n    r.json().clear(\"doc1\", \"$.a\")\n    assert r.json().arrlen(\"doc1\", \"$..a\") == [0, 6, None]\n    # Test single\n    assert r.json().arrlen(\"doc1\", \"$.nested1.a\") == [6]\n\n    # Test missing key\n    with pytest.raises(redis.ResponseError):\n        r.json().arrappend(\"non_existing_doc\", \"$..a\")\n\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n    # Test multi (return result of last path)\n    assert r.json().arrlen(\"doc1\", \"$..a\") == [1, 3, None]\n    assert r.json().arrappend(\"doc1\", \"..a\", \"non\", \"abba\", \"stanza\") == 6\n\n    # Test single\n    assert r.json().arrlen(\"doc1\", \".nested1.a\") == 6\n\n    # Test missing key\n    assert r.json().arrlen(\"non_existing_doc\", \"..a\") is None\n\n\ndef test_arrappend(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        r.json().arrappend(\"non-existing-key\", Path.root_path(), 2)\n\n    r.json().set(\"arr\", Path.root_path(), [1])\n    assert r.json().arrappend(\"arr\", Path.root_path(), 2) == 2\n    assert r.json().arrappend(\"arr\", Path.root_path(), 3, 4) == 4\n    assert r.json().arrappend(\"arr\", Path.root_path(), *[5, 6, 7]) == 7\n    assert r.json().get(\"arr\") == [1, 2, 3, 4, 5, 6, 7]\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n    # Test multi\n    assert r.json().arrappend(\"doc1\", \"$..a\", \"bar\", \"racuda\") == [3, 5, None]\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\n            \"a\": [\"foo\", \"bar\", \"racuda\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\", \"bar\", \"racuda\"]},\n            \"nested2\": {\"a\": 31},\n        }\n    ]\n    assert r.json().arrappend(\"doc1\", \"$.nested1.a\", \"baz\") == [6]\n\n    # Test legacy\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n    # Test multi (all paths are updated, but return result of last path)\n    assert r.json().arrappend(\"doc1\", \"..a\", \"bar\", \"racuda\") == 5\n\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\n            \"a\": [\"foo\", \"bar\", \"racuda\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\", \"bar\", \"racuda\"]},\n            \"nested2\": {\"a\": 31},\n        }\n    ]\n    # Test single\n    assert r.json().arrappend(\"doc1\", \".nested1.a\", \"baz\") == 6\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\n            \"a\": [\"foo\", \"bar\", \"racuda\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\", \"bar\", \"racuda\", \"baz\"]},\n            \"nested2\": {\"a\": 31},\n        }\n    ]\n\n    # Test missing key\n    with pytest.raises(redis.ResponseError):\n        r.json().arrappend(\"non_existing_doc\", \"$..a\")\n\n\ndef test_arrindex(r: redis.Redis):\n    r.json().set(\n        \"foo\",\n        Path.root_path(),\n        [0, 1, 2, 3, 4],\n    )\n\n    assert r.json().arrindex(\"foo\", Path.root_path(), 1) == 1\n    assert r.json().arrindex(\"foo\", Path.root_path(), 1, 2) == -1\n\n    r.json().set(\n        \"store\",\n        \"$\",\n        {\n            \"store\": {\n                \"book\": [\n                    {\n                        \"category\": \"reference\",\n                        \"author\": \"Nigel Rees\",\n                        \"title\": \"Sayings of the Century\",\n                        \"price\": 8.95,\n                        \"size\": [10, 20, 30, 40],\n                    },\n                    {\n                        \"category\": \"fiction\",\n                        \"author\": \"Evelyn Waugh\",\n                        \"title\": \"Sword of Honour\",\n                        \"price\": 12.99,\n                        \"size\": [50, 60, 70, 80],\n                    },\n                    {\n                        \"category\": \"fiction\",\n                        \"author\": \"Herman Melville\",\n                        \"title\": \"Moby Dick\",\n                        \"isbn\": \"0-553-21311-3\",\n                        \"price\": 8.99,\n                        \"size\": [5, 10, 20, 30],\n                    },\n                    {\n                        \"category\": \"fiction\",\n                        \"author\": \"J. R. R. Tolkien\",\n                        \"title\": \"The Lord of the Rings\",\n                        \"isbn\": \"0-395-19395-8\",\n                        \"price\": 22.99,\n                        \"size\": [5, 6, 7, 8],\n                    },\n                ],\n                \"bicycle\": {\"color\": \"red\", \"price\": 19.95},\n            }\n        },\n    )\n\n    # Temporary disable filter expressions tests\n    #\n    # assert r.json().get(\"store\", \"$.store.book[?(@.price<10)].size\") == [\n    #     [10, 20, 30, 40],\n    #     [5, 10, 20, 30],\n    # ]\n    # assert r.json().arrindex(\"store\", \"$.store.book[?(@.price<10)].size\", \"20\") == [\n    #     -1,\n    #     -1,\n    # ]\n\n    # Test index of int scalar in multi values\n    r.json().set(\n        \"test_num\",\n        \".\",\n        [\n            {\"arr\": [0, 1, 3.0, 3, 2, 1, 0, 3]},\n            {\"nested1_found\": {\"arr\": [5, 4, 3, 2, 1, 0, 1, 2, 3.0, 2, 4, 5]}},\n            {\"nested2_not_found\": {\"arr\": [2, 4, 6]}},\n            {\"nested3_scalar\": {\"arr\": \"3\"}},\n            [\n                {\"nested41_not_arr\": {\"arr_renamed\": [1, 2, 3]}},\n                {\"nested42_empty_arr\": {\"arr\": []}},\n            ],\n        ],\n    )\n\n    assert r.json().get(\"test_num\", \"$..arr\") == [\n        [0, 1, 3.0, 3, 2, 1, 0, 3],\n        [5, 4, 3, 2, 1, 0, 1, 2, 3.0, 2, 4, 5],\n        [2, 4, 6],\n        \"3\",\n        [],\n    ]\n\n    assert r.json().arrindex(\"test_num\", \"$..nonexistingpath\", 3) == []\n    assert r.json().arrindex(\"test_num\", \"$..arr\", 3) == [3, 2, -1, None, -1]\n\n    # Test index of double scalar in multi values\n    assert r.json().arrindex(\"test_num\", \"$..arr\", 3.0) == [2, 8, -1, None, -1]\n\n    # Test index of string scalar in multi values\n    r.json().set(\n        \"test_string\",\n        \".\",\n        [\n            {\"arr\": [\"bazzz\", \"bar\", 2, \"baz\", 2, \"ba\", \"baz\", 3]},\n            {\n                \"nested1_found\": {\n                    \"arr\": [None, \"baz2\", \"buzz\", 2, 1, 0, 1, \"2\", \"baz\", 2, 4, 5]\n                }\n            },\n            {\"nested2_not_found\": {\"arr\": [\"baz2\", 4, 6]}},\n            {\"nested3_scalar\": {\"arr\": \"3\"}},\n            [\n                {\"nested41_arr\": {\"arr_renamed\": [1, \"baz\", 3]}},\n                {\"nested42_empty_arr\": {\"arr\": []}},\n            ],\n        ],\n    )\n    assert r.json().get(\"test_string\", \"$..arr\") == [\n        [\"bazzz\", \"bar\", 2, \"baz\", 2, \"ba\", \"baz\", 3],\n        [None, \"baz2\", \"buzz\", 2, 1, 0, 1, \"2\", \"baz\", 2, 4, 5],\n        [\"baz2\", 4, 6],\n        \"3\",\n        [],\n    ]\n\n    assert r.json().arrindex(\"test_string\", \"$..arr\", \"baz\") == [\n        3,\n        8,\n        -1,\n        None,\n        -1,\n    ]\n\n    assert r.json().arrindex(\"test_string\", \"$..arr\", \"baz\", 2) == [\n        3,\n        8,\n        -1,\n        None,\n        -1,\n    ]\n    assert r.json().arrindex(\"test_string\", \"$..arr\", \"baz\", 4) == [\n        6,\n        8,\n        -1,\n        None,\n        -1,\n    ]\n    assert r.json().arrindex(\"test_string\", \"$..arr\", \"baz\", -5) == [\n        3,\n        8,\n        -1,\n        None,\n        -1,\n    ]\n    assert r.json().arrindex(\"test_string\", \"$..arr\", \"baz\", 4, 7) == [\n        6,\n        -1,\n        -1,\n        None,\n        -1,\n    ]\n    assert r.json().arrindex(\"test_string\", \"$..arr\", \"baz\", 4, -1) == [\n        6,\n        8,\n        -1,\n        None,\n        -1,\n    ]\n    assert r.json().arrindex(\"test_string\", \"$..arr\", \"baz\", 4, 0) == [\n        6,\n        8,\n        -1,\n        None,\n        -1,\n    ]\n    assert r.json().arrindex(\"test_string\", \"$..arr\", \"5\", 7, -1) == [\n        -1,\n        -1,\n        -1,\n        None,\n        -1,\n    ]\n    assert r.json().arrindex(\"test_string\", \"$..arr\", \"5\", 7, 0) == [\n        -1,\n        -1,\n        -1,\n        None,\n        -1,\n    ]\n\n    # Test index of None scalar in multi values\n    r.json().set(\n        \"test_None\",\n        \".\",\n        [\n            {\"arr\": [\"bazzz\", \"None\", 2, None, 2, \"ba\", \"baz\", 3]},\n            {\n                \"nested1_found\": {\n                    \"arr\": [\"zaz\", \"baz2\", \"buzz\", 2, 1, 0, 1, \"2\", None, 2, 4, 5]\n                }\n            },\n            {\"nested2_not_found\": {\"arr\": [\"None\", 4, 6]}},\n            {\"nested3_scalar\": {\"arr\": None}},\n            [\n                {\"nested41_arr\": {\"arr_renamed\": [1, None, 3]}},\n                {\"nested42_empty_arr\": {\"arr\": []}},\n            ],\n        ],\n    )\n    assert r.json().get(\"test_None\", \"$..arr\") == [\n        [\"bazzz\", \"None\", 2, None, 2, \"ba\", \"baz\", 3],\n        [\"zaz\", \"baz2\", \"buzz\", 2, 1, 0, 1, \"2\", None, 2, 4, 5],\n        [\"None\", 4, 6],\n        None,\n        [],\n    ]\n\n    # Test with none-scalar value\n    # assert r.json().arrindex(\"test_None\", \"$..nested42_empty_arr.arr\", {\"arr\": []}) == [-1]\n\n    # Test legacy (path begins with dot)\n    # Test index of int scalar in single value\n    assert r.json().arrindex(\"test_num\", \".[0].arr\", 3) == 3\n    assert r.json().arrindex(\"test_num\", \".[0].arr\", 9) == -1\n\n    with pytest.raises(redis.ResponseError):\n        r.json().arrindex(\"test_num\", \".[0].arr_not\", 3)\n    # Test index of string scalar in single value\n    assert r.json().arrindex(\"test_string\", \".[0].arr\", \"baz\") == 3\n    assert r.json().arrindex(\"test_string\", \".[0].arr\", \"faz\") == -1\n    # Test index of None scalar in single value\n    assert r.json().arrindex(\"test_None\", \".[0].arr\", \"None\") == 1\n    assert r.json().arrindex(\"test_None\", \"..nested2_not_found.arr\", \"None\") == 0\n\n\ndef test_arrinsert(r: redis.Redis):\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [0, 4],\n    )\n\n    assert r.json().arrinsert(\"arr\", Path.root_path(), 1, *[1, 2, 3]) == 5\n    assert r.json().get(\"arr\") == [0, 1, 2, 3, 4]\n\n    # test prepends\n    r.json().set(\"val2\", Path.root_path(), [5, 6, 7, 8, 9])\n    assert r.json().arrinsert(\"val2\", Path.root_path(), 0, [\"some\", \"thing\"]) == 6\n    assert r.json().get(\"val2\") == [[\"some\", \"thing\"], 5, 6, 7, 8, 9]\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n    # Test multi\n    assert r.json().arrinsert(\"doc1\", \"$..a\", \"1\", \"bar\", \"racuda\") == [3, 5, None]\n\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\n            \"a\": [\"foo\", \"bar\", \"racuda\"],\n            \"nested1\": {\"a\": [\"hello\", \"bar\", \"racuda\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        }\n    ]\n    # Test single\n    assert r.json().arrinsert(\"doc1\", \"$.nested1.a\", -2, \"baz\") == [6]\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\n            \"a\": [\"foo\", \"bar\", \"racuda\"],\n            \"nested1\": {\"a\": [\"hello\", \"bar\", \"racuda\", \"baz\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        }\n    ]\n\n    # Test missing key\n    with pytest.raises(redis.ResponseError):\n        r.json().arrappend(\"non_existing_doc\", \"$..a\")\n\n\ndef test_arrpop(r: redis.Redis):\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [0, 1, 2, 3, 4],\n    )\n    assert raw_command(r, \"json.arrpop\", \"arr\") == b\"4\"\n\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [0, 1, 2, 3, 4],\n    )\n    assert r.json().arrpop(\"arr\", Path.root_path(), 4) == 4\n    assert r.json().arrpop(\"arr\", Path.root_path(), -1) == 3\n    assert r.json().arrpop(\"arr\", Path.root_path()) == 2\n    assert r.json().arrpop(\"arr\", Path.root_path(), 0) == 0\n    assert r.json().get(\"arr\") == [1]\n\n    # test out of bounds\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [0, 1, 2, 3, 4],\n    )\n    assert r.json().arrpop(\"arr\", Path.root_path(), 99) == 4\n\n    # none test\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [],\n    )\n    assert r.json().arrpop(\"arr\") is None\n\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n\n    # # Test legacy\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n    # Test multi (all paths are updated, but return result of last path)\n    assert r.json().arrpop(\"doc1\", \"..a\", \"1\") is None\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\"a\": [], \"nested1\": {\"a\": [\"hello\", \"world\"]}, \"nested2\": {\"a\": 31}}\n    ]\n\n    # # Test missing key\n    with pytest.raises(redis.ResponseError):\n        r.json().arrpop(\"non_existing_doc\", \"..a\")\n\n\ndef test_arrtrim(r: redis.Redis):\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [0, 1, 2, 3, 4],\n    )\n\n    assert r.json().arrtrim(\"arr\", Path.root_path(), 1, 3) == 3\n    assert r.json().get(\"arr\") == [1, 2, 3]\n\n    # <0 test, should be 0 equivalent\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [0, 1, 2, 3, 4],\n    )\n    assert r.json().arrtrim(\"arr\", Path.root_path(), -1, 3) == 0\n\n    # testing stop > end\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [0, 1, 2, 3, 4],\n    )\n    assert r.json().arrtrim(\"arr\", Path.root_path(), 3, 99) == 2\n\n    # start > array size and stop\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [0, 1, 2, 3, 4],\n    )\n    assert r.json().arrtrim(\"arr\", Path.root_path(), 9, 1) == 0\n\n    # all larger\n    r.json().set(\n        \"arr\",\n        Path.root_path(),\n        [0, 1, 2, 3, 4],\n    )\n    assert r.json().arrtrim(\"arr\", Path.root_path(), 9, 11) == 0\n\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n    # Test multi\n    assert r.json().arrtrim(\"doc1\", \"$..a\", \"1\", -1) == [0, 2, None]\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\"a\": [], \"nested1\": {\"a\": [None, \"world\"]}, \"nested2\": {\"a\": 31}}\n    ]\n\n    r.json().set(\n        \"doc1\", \"$\", {\"a\": [], \"nested1\": {\"a\": [None, \"world\"]}, \"nested2\": {\"a\": 31}}\n    )\n    assert r.json().arrtrim(\"doc1\", \"$..a\", \"1\", \"1\") == [0, 1, None]\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\"a\": [], \"nested1\": {\"a\": [\"world\"]}, \"nested2\": {\"a\": 31}}\n    ]\n    # Test single\n    assert r.json().arrtrim(\"doc1\", \"$.nested1.a\", 1, 0) == [0]\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\"a\": [], \"nested1\": {\"a\": []}, \"nested2\": {\"a\": 31}}\n    ]\n\n    # Test missing key\n    with pytest.raises(redis.ResponseError):\n        r.json().arrtrim(\"non_existing_doc\", \"..a\", \"0\", 1)\n\n    # Test legacy\n    r.json().set(\n        \"doc1\",\n        \"$\",\n        {\n            \"a\": [\"foo\"],\n            \"nested1\": {\"a\": [\"hello\", None, \"world\"]},\n            \"nested2\": {\"a\": 31},\n        },\n    )\n\n    # Test multi (all paths are updated, but return result of last path)\n    assert r.json().arrtrim(\"doc1\", \"..a\", \"1\", \"-1\") == 2\n\n    # Test single\n    assert r.json().arrtrim(\"doc1\", \".nested1.a\", \"1\", \"1\") == 1\n    assert r.json().get(\"doc1\", \"$\") == [\n        {\"a\": [], \"nested1\": {\"a\": [\"world\"]}, \"nested2\": {\"a\": 31}}\n    ]\n\n    # Test missing key\n    with pytest.raises(redis.ResponseError):\n        r.json().arrtrim(\"non_existing_doc\", \"..a\", 1, 1)\n"
  },
  {
    "path": "tests/fakeredis/test/test_json/test_json_commands.py",
    "content": "\"\"\"Tests for `fakeredis-py`'s emulation of Redis's JSON command subset.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import (\n    Any,\n    Dict,\n    List,\n    Tuple,\n)\n\nimport pytest\n\njson_tests = pytest.importorskip(\"jsonpath_ng\")\n\n\nSAMPLE_DATA = {\n    \"a\": [\"foo\"],\n    \"nested1\": {\"a\": [\"hello\", None, \"world\"]},\n    \"nested2\": {\"a\": 31},\n}\n\n\n@pytest.fixture(scope=\"function\")\ndef json_data() -> Dict[str, Any]:\n    \"\"\"A module-scoped \"blob\" of JSON-encodable data.\"\"\"\n    return {\n        \"L1\": {\n            \"a\": {\n                \"A1_B1\": 10,\n                \"A1_B2\": False,\n                \"A1_B3\": {\n                    \"A1_B3_C1\": None,\n                    \"A1_B3_C2\": [\n                        \"A1_B3_C2_D1_1\",\n                        \"A1_B3_C2_D1_2\",\n                        -19.5,\n                        \"A1_B3_C2_D1_4\",\n                        \"A1_B3_C2_D1_5\",\n                        {\"A1_B3_C2_D1_6_E1\": True},\n                    ],\n                    \"A1_B3_C3\": [1],\n                },\n                \"A1_B4\": {\"A1_B4_C1\": \"foo\"},\n            }\n        },\n        \"L2\": {\n            \"a\": {\n                \"A2_B1\": 20,\n                \"A2_B2\": False,\n                \"A2_B3\": {\n                    \"A2_B3_C1\": None,\n                    \"A2_B3_C2\": [\n                        \"A2_B3_C2_D1_1\",\n                        \"A2_B3_C2_D1_2\",\n                        -37.5,\n                        \"A2_B3_C2_D1_4\",\n                        \"A2_B3_C2_D1_5\",\n                        {\"A2_B3_C2_D1_6_E1\": False},\n                    ],\n                    \"A2_B3_C3\": [2],\n                },\n                \"A2_B4\": {\"A2_B4_C1\": \"bar\"},\n            }\n        },\n    }\n\n\ndef load_types_data(nested_key_name: str) -> Tuple[Dict[str, Any], List[bytes]]:\n    \"\"\"Generate a structure with sample of all types\"\"\"\n    type_samples = {\n        \"object\": {},\n        \"array\": [],\n        \"string\": \"str\",\n        \"integer\": 42,\n        \"number\": 1.2,\n        \"boolean\": False,\n        \"null\": None,\n    }\n    jdata = {}\n\n    for k, v in type_samples.items():\n        jdata[f\"nested_{k}\"] = {nested_key_name: v}\n\n    return jdata, [k.encode() for k in type_samples.keys()]\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/__init__.py",
    "content": ""
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_bitmap_commands.py",
    "content": "import pytest\nimport redis\nimport redis.client\n\nfrom test.testtools import raw_command\n\n\ndef test_getbit(r: redis.Redis):\n    r.setbit(\"foo\", 3, 1)\n    assert r.getbit(\"foo\", 0) == 0\n    assert r.getbit(\"foo\", 1) == 0\n    assert r.getbit(\"foo\", 2) == 0\n    assert r.getbit(\"foo\", 3) == 1\n    assert r.getbit(\"foo\", 4) == 0\n    assert r.getbit(\"foo\", 100) == 0\n\n\ndef test_getbit_wrong_type(r: redis.Redis):\n    r.rpush(\"foo\", b\"x\")\n    with pytest.raises(redis.ResponseError):\n        r.getbit(\"foo\", 1)\n\n\n@pytest.mark.min_server(\"7\")\n@pytest.mark.skip(\"Fails on FakeRedis\")\ndef test_bitcount_error(r: redis.Redis):\n    with pytest.raises(redis.ResponseError) as e:\n        raw_command(r, b\"BITCOUNT\", b\"\", b\"\", b\"\")\n    assert str(e.value) == \"value is not an integer or out of range\"\n\n\n@pytest.mark.min_server(\"7\")\ndef test_bitcount_does_not_exist(r: redis.Redis):\n    res = raw_command(r, b\"BITCOUNT\", b\"\", 0, 0)\n    assert res == 0\n\n\ndef test_multiple_bits_set(r: redis.Redis):\n    r.setbit(\"foo\", 1, 1)\n    r.setbit(\"foo\", 3, 1)\n    r.setbit(\"foo\", 5, 1)\n\n    assert r.getbit(\"foo\", 0) == 0\n    assert r.getbit(\"foo\", 1) == 1\n    assert r.getbit(\"foo\", 2) == 0\n    assert r.getbit(\"foo\", 3) == 1\n    assert r.getbit(\"foo\", 4) == 0\n    assert r.getbit(\"foo\", 5) == 1\n    assert r.getbit(\"foo\", 6) == 0\n\n\ndef test_unset_bits(r: redis.Redis):\n    r.setbit(\"foo\", 1, 1)\n    r.setbit(\"foo\", 2, 0)\n    r.setbit(\"foo\", 3, 1)\n    assert r.getbit(\"foo\", 1) == 1\n    r.setbit(\"foo\", 1, 0)\n    assert r.getbit(\"foo\", 1) == 0\n    r.setbit(\"foo\", 3, 0)\n    assert r.getbit(\"foo\", 3) == 0\n\n\ndef test_get_set_bits(r: redis.Redis):\n    # set bit 5\n    assert not r.setbit(\"a\", 5, True)\n    assert r.getbit(\"a\", 5)\n    # unset bit 4\n    assert not r.setbit(\"a\", 4, False)\n    assert not r.getbit(\"a\", 4)\n    # set bit 4\n    assert not r.setbit(\"a\", 4, True)\n    assert r.getbit(\"a\", 4)\n    # set bit 5 again\n    assert r.setbit(\"a\", 5, True)\n    assert r.getbit(\"a\", 5)\n\n\ndef test_setbits_and_getkeys(r: redis.Redis):\n    # The bit operations and the get commands\n    # should play nicely with each other.\n    r.setbit(\"foo\", 1, 1)\n    assert r.get(\"foo\") == b\"@\"\n    r.setbit(\"foo\", 2, 1)\n    assert r.get(\"foo\") == b\"`\"\n    r.setbit(\"foo\", 3, 1)\n    assert r.get(\"foo\") == b\"p\"\n    r.setbit(\"foo\", 9, 1)\n    assert r.get(\"foo\") == b\"p@\"\n    r.setbit(\"foo\", 54, 1)\n    assert r.get(\"foo\") == b\"p@\\x00\\x00\\x00\\x00\\x02\"\n\n\ndef test_setbit_wrong_type(r: redis.Redis):\n    r.rpush(\"foo\", b\"x\")\n    with pytest.raises(redis.ResponseError):\n        r.setbit(\"foo\", 0, 1)\n\n\ndef test_setbit_expiry(r: redis.Redis):\n    r.set(\"foo\", b\"0x00\", ex=10)\n    r.setbit(\"foo\", 1, 1)\n    assert r.ttl(\"foo\") > 0\n\n\ndef test_bitcount(r: redis.Redis):\n    r.delete(\"foo\")\n    assert r.bitcount(\"foo\") == 0\n    r.setbit(\"foo\", 1, 1)\n    assert r.bitcount(\"foo\") == 1\n    r.setbit(\"foo\", 8, 1)\n    assert r.bitcount(\"foo\") == 2\n    assert r.bitcount(\"foo\", 1, 1) == 1\n    r.setbit(\"foo\", 57, 1)\n    assert r.bitcount(\"foo\") == 3\n    r.set(\"foo\", \" \")\n    assert r.bitcount(\"foo\") == 1\n    r.set(\"key\", \"foobar\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitcount\", \"key\", \"1\", \"2\", \"dsd\")\n    assert r.bitcount(\"key\") == 26\n    assert r.bitcount(\"key\", start=0, end=0) == 4\n    assert r.bitcount(\"key\", start=1, end=1) == 6\n\n\n@pytest.mark.min_server(\"7\")\ndef test_bitcount_mode_redis7(r: redis.Redis):\n    r.set(\"key\", \"foobar\")\n    assert r.bitcount(\"key\", start=1, end=1, mode=\"byte\") == 6\n    assert r.bitcount(\"key\", start=5, end=30, mode=\"bit\") == 17\n    with pytest.raises(redis.ResponseError):\n        r.bitcount(\"key\", start=5, end=30, mode=\"dscd\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitcount\", \"key\", \"1\", \"2\", \"dsd\", \"cd\")\n\n\ndef test_bitcount_wrong_type(r: redis.Redis):\n    r.rpush(\"foo\", b\"x\")\n    with pytest.raises(redis.ResponseError):\n        r.bitcount(\"foo\")\n\n\ndef test_bitop(r: redis.Redis):\n    r.set(\"key1\", \"foobar\")\n    r.set(\"key2\", \"abcdef\")\n\n    assert r.bitop(\"and\", \"dest\", \"key1\", \"key2\") == 6\n    assert r.get(\"dest\") == b\"`bc`ab\"\n\n    assert r.bitop(\"not\", \"dest1\", \"key1\") == 6\n    assert r.get(\"dest1\") == b\"\\x99\\x90\\x90\\x9d\\x9e\\x8d\"\n\n    assert r.bitop(\"or\", \"dest-or\", \"key1\", \"key2\") == 6\n    assert r.get(\"dest-or\") == b\"goofev\"\n\n    assert r.bitop(\"xor\", \"dest-xor\", \"key1\", \"key2\") == 6\n    assert r.get(\"dest-xor\") == b\"\\x07\\r\\x0c\\x06\\x04\\x14\"\n\n\ndef test_bitop_errors(r: redis.Redis):\n    r.set(\"key1\", \"foobar\")\n    r.set(\"key2\", \"abcdef\")\n    r.sadd(\"key-set\", \"member1\")\n    with pytest.raises(redis.ResponseError):\n        r.bitop(\"not\", \"dest\", \"key1\", \"key2\")\n    with pytest.raises(redis.ResponseError):\n        r.bitop(\"badop\", \"dest\", \"key1\", \"key2\")\n    with pytest.raises(redis.ResponseError):\n        r.bitop(\"and\", \"dest\", \"key1\", \"key-set\")\n    with pytest.raises(redis.ResponseError):\n        r.bitop(\"and\", \"dest\")\n\n\ndef test_bitpos(r: redis.Redis):\n    key = \"key:bitpos\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    assert r.bitpos(key, 0) == 12\n    assert r.bitpos(key, 0, 2, -1) == 16\n    assert r.bitpos(key, 0, -2, -1) == 12\n    r.set(key, b\"\\x00\\xff\\xf0\")\n    assert r.bitpos(key, 1, 0) == 8\n    assert r.bitpos(key, 1, 1) == 8\n    r.set(key, b\"\\x00\\x00\\x00\")\n    assert r.bitpos(key, 1) == -1\n    r.set(key, b\"\\xff\\xf0\\x00\")\n\n\n@pytest.mark.min_server(\"7\")\ndef test_bitops_mode_redis7(r: redis.Redis):\n    key = \"key:bitpos\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    assert r.bitpos(key, 0, 8, -1, \"bit\") == 12\n    assert r.bitpos(key, 1, 8, -1, \"bit\") == 8\n    with pytest.raises(redis.ResponseError):\n        assert r.bitpos(key, 0, 8, -1, \"bad_mode\") == 12\n\n\ndef test_bitpos_wrong_arguments(r: redis.Redis):\n    key = \"key:bitpos:wrong:args\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitpos\", key, \"7\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitpos\", key, 1, \"6\", \"5\", \"BYTE\", \"6\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitpos\", key)\n\n\ndef test_bitfield_wrong_arguments(r: redis.Redis):\n    key = \"key:bitfield:wrong:args\"\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitfield\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitfield\", key, \"foo\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitfield\", key, \"overflow\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitfield\", key, \"overflow\", \"foo\")\n\n\ndef test_bitfield_get(r: redis.Redis):\n    key = \"key:bitfield_get\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    for i in range(0, 12):\n        assert r.bitfield(key).get(\"u1\", i).get(\"i1\", i).execute() == [1, -1]\n    for i in range(12, 25):\n        for j in range(1, 63):\n            assert r.bitfield(key).get(f\"u{j}\", i).get(f\"i{j}\", i).execute() == [0, 0]\n\n    for i in range(0, 11):\n        assert r.bitfield(key).get(\"u2\", i).get(\"i2\", i).execute() == [3, -1]\n    assert r.bitfield(key).get(\"u2\", 11).get(\"i2\", 11).execute() == [2, -2]\n    assert r.bitfield(key).get(\"u8\", 0).get(\"u8\", 8).get(\"u8\", 16).execute() == [\n        0xFF,\n        0xF0,\n        0,\n    ]\n    assert r.bitfield(key).get(\"i8\", 0).get(\"i8\", 8).get(\"i8\", 16).execute() == [\n        ~0,\n        ~0x0F,\n        0,\n    ]\n\n    assert r.bitfield(key).get(\"u32\", 8).get(\"u8\", 100).execute() == [0xF000_0000, 0]\n\n    r.set(key, b\"\\x01\\x23\\x45\\x67\\x89\\xab\\xcd\\xef\")\n    for enc in (\"i16\", \"u16\"):\n        assert r.bitfield(key).get(enc, 0).execute() == [0x0123]\n        assert r.bitfield(key).get(enc, 4).execute() == [0x1234]\n        assert r.bitfield(key).get(enc, 8).execute() == [0x2345]\n\n        assert r.bitfield(key).get(enc, 1).execute() == [0x0246]\n        assert r.bitfield(key).get(enc, 5).execute() == [0x2468]\n        assert r.bitfield(key).get(enc, 9).execute() == [0x468A]\n\n        assert r.bitfield(key).get(enc, 2).execute() == [0x048D]\n        assert r.bitfield(key).get(enc, 6).execute() == [0x48D1]\n\n    assert r.bitfield(key).get(\"u16\", 10).get(\"i16\", 10).execute() == [\n        0x8D15,\n        0xD15 - 0x8000,\n    ]\n    assert r.bitfield(key).get(\"u32\", 16).get(\"u48\", 8).execute() == [\n        0x456789AB,\n        0x2345_6789_ABCD,\n    ]\n    assert r.bitfield(key).get(\"i32\", 16).get(\"i48\", 8).execute() == [\n        0x456789AB,\n        0x2345_6789_ABCD,\n    ]\n    assert r.bitfield(key).get(\"u63\", 1).execute() == [0x123456789_ABCDEF]\n    assert r.bitfield(key).get(\"i63\", 1).execute() == [0x123456789_ABCDEF]\n    assert r.bitfield(key).get(\"i64\", 0).execute() == [0x123456789_ABCDEF]\n    assert raw_command(r, \"bitfield\", key, \"get\", \"i16\", 0) == [0x0123]\n\n\ndef test_bitfield_set(r: redis.Redis):\n    key = \"key:bitfield_set\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    assert r.bitfield(key).set(\"u8\", 0, 0x55).set(\"u8\", 16, 0xAA).execute() == [0xFF, 0]\n    assert r.get(key) == b\"\\x55\\xf0\\xaa\"\n    assert r.bitfield(key).set(\"u1\", 0, 1).set(\"u1\", 16, 2).execute() == [0, 1]\n    assert r.get(key) == b\"\\xd5\\xf0\\x2a\"\n    assert r.bitfield(key).set(\"i1\", 31, 1).set(\"i1\", 30, 1).execute() == [0, 0]\n    assert r.get(key) == b\"\\xd5\\xf0\\x2a\\x03\"\n    assert r.bitfield(key).set(\"u36\", 4, 0xBADC0FFE).execute() == [0x5_F02A_0300]\n    assert r.get(key) == b\"\\xd0\\xba\\xdc\\x0f\\xfe\"\n    assert r.bitfield(key, \"WRAP\").set(\"u12\", 8, 0xFFF).execute() == [0xBAD]\n    assert r.get(key) == b\"\\xd0\\xff\\xfc\\x0f\\xfe\"\n\n\ndef test_bitfield_set_sat(r: redis.Redis):\n    key = \"key:bitfield_set\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    assert r.bitfield(key, \"SAT\").set(\"u8\", 4, 0x123).set(\"u8\", 8, 0x55).execute() == [\n        0xFF,\n        0xF0,\n    ]\n    assert r.get(key) == b\"\\xff\\x55\\x00\"\n    assert r.bitfield(key, \"SAT\").set(\"u12\", 0, -1).set(\"u1\", 1, 2).execute() == [\n        0xFF5,\n        1,\n    ]\n    assert r.get(key) == b\"\\xff\\xf5\\x00\"\n    assert r.bitfield(key, \"SAT\").set(\"i4\", 0, 8).set(\"i4\", 4, 7).execute() == [-1, -1]\n    assert r.get(key) == b\"\\x77\\xf5\\x00\"\n    assert r.bitfield(key, \"SAT\").set(\"i4\", 4, -8).set(\"i4\", 0, -9).execute() == [7, 7]\n    assert r.get(key) == b\"\\x88\\xf5\\x00\"\n    assert r.bitfield(key, \"SAT\").set(\"i60\", 0, -(1 << 62) + 1).execute() == [\n        0x88F5000_00000000 - (1 << 60)\n    ]\n    assert r.get(key) == b\"\\x80\" + b\"\\0\" * 7\n    assert r.bitfield(key, \"SAT\").set(\"u60\", 0, -(1 << 63) + 1).execute() == [1 << 59]\n    assert r.get(key) == b\"\\xff\" * 7 + b\"\\xf0\"\n\n\ndef test_bitfield_set_fail(r: redis.Redis):\n    key = \"key:bitfield_set\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    assert r.bitfield(key, \"FAIL\").set(\"u8\", 4, 0x123).set(\"u8\", 8, 0x55).execute() == [\n        None,\n        0xF0,\n    ]\n    assert r.get(key) == b\"\\xff\\x55\\x00\"\n    assert r.bitfield(key, \"FAIL\").set(\"u12\", 0, -1).set(\"u1\", 1, 2).execute() == [\n        None,\n        None,\n    ]\n    assert r.get(key) == b\"\\xff\\x55\\x00\"\n    assert r.bitfield(key, \"FAIL\").set(\"i4\", 0, 8).set(\"i4\", 4, 7).execute() == [\n        None,\n        -1,\n    ]\n    assert r.get(key) == b\"\\xf7\\x55\\x00\"\n    assert r.bitfield(key, \"FAIL\").set(\"i4\", 4, -8).set(\"i4\", 0, -9).execute() == [\n        7,\n        None,\n    ]\n    assert r.get(key) == b\"\\xf8\\x55\\x00\"\n\n\ndef test_bitfield_incr(r: redis.Redis):\n    key = \"key:bitfield_incr\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    assert r.bitfield(key).incrby(\"u8\", 0, 0x55).incrby(\"u8\", 16, 0xAA).execute() == [\n        0x54,\n        0xAA,\n    ]\n    assert r.get(key) == b\"\\x54\\xf0\\xaa\"\n    assert r.bitfield(key).incrby(\"u1\", 0, 1).incrby(\"u1\", 16, 2).execute() == [1, 1]\n    assert r.get(key) == b\"\\xd4\\xf0\\xaa\"\n    assert r.bitfield(key).incrby(\"i1\", 31, 1).incrby(\"i1\", 30, 1).execute() == [-1, -1]\n    assert r.get(key) == b\"\\xd4\\xf0\\xaa\\x03\"\n    assert r.bitfield(key).incrby(\"u36\", 4, 0xBADC0FFE).execute() == [0x5_AB86_12FE]\n    assert r.get(key) == b\"\\xd5\\xab\\x86\\x12\\xfe\"\n    assert r.bitfield(key, \"WRAP\").incrby(\"u12\", 8, 0xFFF).execute() == [0xAB7]\n    assert r.get(key) == b\"\\xd5\\xab\\x76\\x12\\xfe\"\n\n\ndef test_bitfield_incr_sat(r: redis.Redis):\n    key = \"key:bitfield_incr_sat\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    assert r.bitfield(key, \"SAT\").incrby(\"u8\", 4, 0x123).incrby(\"u8\", 8, 0x55).execute() == [\n        0xFF,\n        0xFF,\n    ]\n    assert r.get(key) == b\"\\xff\\xff\\x00\"\n    assert r.bitfield(key, \"SAT\").incrby(\"u12\", 0, -1).incrby(\"u1\", 1, 2).execute() == [\n        0xFFE,\n        1,\n    ]\n    assert r.get(key) == b\"\\xff\\xef\\x00\"\n    assert r.bitfield(key, \"SAT\").incrby(\"i4\", 0, 8).incrby(\"i4\", 4, 7).execute() == [\n        7,\n        6,\n    ]\n    assert r.get(key) == b\"\\x76\\xef\\x00\"\n    assert r.bitfield(key, \"SAT\").incrby(\"i4\", 4, -8).incrby(\"i4\", 0, -9).execute() == [\n        -2,\n        -2,\n    ]\n    assert r.get(key) == b\"\\xee\\xef\\x00\"\n    assert r.bitfield(key, \"SAT\").incrby(\"i60\", 0, -(1 << 62) + 1).execute() == [-(1 << 59)]\n    assert r.get(key) == b\"\\x80\" + b\"\\0\" * 7\n    assert r.bitfield(key, \"SAT\").set(\"u60\", 0, -(1 << 63) + 1).execute() == [1 << 59]\n    assert r.get(key) == b\"\\xff\" * 7 + b\"\\xf0\"\n\n\ndef test_bitfield_incr_fail(r: redis.Redis):\n    key = \"key:bitfield_incr_fail\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    assert r.bitfield(key, \"FAIL\").incrby(\"u8\", 4, 0x123).incrby(\"u8\", 8, 0x55).execute() == [\n        None,\n        None,\n    ]\n    assert r.get(key) == b\"\\xff\\xf0\\x00\"\n    assert r.bitfield(key, \"FAIL\").incrby(\"u12\", 0, -1).incrby(\"u1\", 1, 2).execute() == [\n        0xFFE,\n        None,\n    ]\n    assert r.get(key) == b\"\\xff\\xe0\\x00\"\n    assert r.bitfield(key, \"FAIL\").incrby(\"i4\", 0, 8).incrby(\"i4\", 4, 7).execute() == [\n        7,\n        6,\n    ]\n    assert r.get(key) == b\"\\x76\\xe0\\x00\"\n    assert r.bitfield(key, \"FAIL\").incrby(\"i4\", 4, -8).incrby(\"i4\", 0, -9).execute() == [-2, -2]\n    assert r.get(key) == b\"\\xee\\xe0\\x00\"\n\n\ndef test_bitfield_get_wrong_arguments(r: redis.Redis):\n    key = \"key:bitfield_get:wrong:args\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitfield\", key, \"get\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitfield\", key, \"get\", \"i16\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitfield\", key, \"get\", \"i16\", -1)\n    for encoding in (\"I8\", \"i-42\", \"i5?\", \"u0\", \"i0\", \"i65\", \"u64\", \"i 60\"):\n        with pytest.raises(redis.ResponseError):\n            raw_command(r, \"bitfield\", key, \"get\", encoding, 0)\n\n\ndef test_bitfield_set_wrong_arguments(r: redis.Redis):\n    key = \"key:bitfield_set:wrong:args\"\n    r.set(key, b\"\\xff\\xf0\\x00\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitfield\", key, \"set\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitfield\", key, \"set\", \"i16\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitfield\", key, \"set\", \"i16\", -1)\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"bitfield\", key, \"set\", \"i16\", 0, \"foo\")\n    for encoding in (\"I8\", \"i-42\", \"i5?\", \"u0\", \"i0\", \"i65\", \"u64\", \"i 60\"):\n        with pytest.raises(redis.ResponseError):\n            raw_command(r, \"bitfield\", key, \"set\", encoding, 0, 0)\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_connection.py",
    "content": "import pytest\nimport redis\nimport redis.client\nfrom fakeredis import _msgs as msgs\nfrom redis.exceptions import ResponseError\n\nfrom test import testtools\nfrom test.testtools import raw_command\n\n\ndef test_ping(r: redis.Redis):\n    assert r.ping()\n    assert testtools.raw_command(r, \"ping\", \"test\") == b\"test\"\n    with pytest.raises(\n        redis.ResponseError, match=msgs.WRONG_ARGS_MSG6.format(\"ping\")[4:]\n    ):\n        raw_command(r, \"ping\", \"arg1\", \"arg2\")\n\n\ndef test_echo(r: redis.Redis):\n    assert r.echo(b\"hello\") == b\"hello\"\n    assert r.echo(\"hello\") == b\"hello\"\n\n\ndef test_unknown_command(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"0 3 3\")\n\n\n@pytest.mark.decode_responses\nclass TestDecodeResponses:\n    def test_decode_str(self, r):\n        r.set(\"foo\", \"bar\")\n        assert r.get(\"foo\") == \"bar\"\n\n    def test_decode_set(self, r):\n        r.sadd(\"foo\", \"member1\")\n        assert set(r.smembers(\"foo\")) == {\"member1\"}\n\n    def test_decode_list(self, r):\n        r.rpush(\"foo\", \"a\", \"b\")\n        assert r.lrange(\"foo\", 0, -1) == [\"a\", \"b\"]\n\n    def test_decode_dict(self, r):\n        r.hset(\"foo\", \"key\", \"value\")\n        assert r.hgetall(\"foo\") == {\"key\": \"value\"}\n\n    def test_decode_error(self, r):\n        r.set(\"foo\", \"bar\")\n        with pytest.raises(ResponseError) as exc_info:\n            r.hset(\"foo\", \"bar\", \"baz\")\n        assert isinstance(exc_info.value.args[0], str)\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_generic_commands.py",
    "content": "from datetime import datetime, timedelta\nfrom time import sleep, time\n\nimport pytest\nimport redis\nfrom fakeredis import _msgs as msgs\nfrom redis.exceptions import ResponseError\n\nfrom test.testtools import raw_command\n\n\n@pytest.mark.slow\ndef test_expireat_should_expire_key_by_datetime(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    r.expireat(\"foo\", datetime.now() + timedelta(seconds=1))\n    sleep(1.5)\n    assert r.get(\"foo\") is None\n    assert r.expireat(\"bar\", datetime.now()) is False\n\n\n@pytest.mark.slow\ndef test_expireat_should_expire_key_by_timestamp(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    r.expireat(\"foo\", int(time() + 1))\n    sleep(1.5)\n    assert r.get(\"foo\") is None\n    assert r.expire(\"bar\", 1) is False\n\n\ndef test_expireat_should_return_true_for_existing_key(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.expireat(\"foo\", int(time() + 1)) is True\n\n\ndef test_expireat_should_return_false_for_missing_key(r: redis.Redis):\n    assert r.expireat(\"missing\", int(time() + 1)) is False\n\n\ndef test_del_operator(r: redis.Redis):\n    r[\"foo\"] = \"bar\"\n    del r[\"foo\"]\n    assert r.get(\"foo\") is None\n\n\ndef test_expire_should_not_handle_floating_point_values(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError, match=\"value is not an integer or out of range\"):\n        r.expire(\"something_new\", 1.2)\n        r.pexpire(\"something_new\", 1000.2)\n        r.expire(\"some_unused_key\", 1.2)\n        r.pexpire(\"some_unused_key\", 1000.2)\n\n\ndef test_ttl_should_return_minus_one_for_non_expiring_key(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.ttl(\"foo\") == -1\n\n\ndef test_sort_range_offset_range(r: redis.Redis):\n    r.rpush(\"foo\", \"2\")\n    r.rpush(\"foo\", \"1\")\n    r.rpush(\"foo\", \"4\")\n    r.rpush(\"foo\", \"3\")\n\n    assert r.sort(\"foo\", start=0, num=2) == [b\"1\", b\"2\"]\n\n\ndef test_sort_range_offset_range_and_desc(r: redis.Redis):\n    r.rpush(\"foo\", \"2\")\n    r.rpush(\"foo\", \"1\")\n    r.rpush(\"foo\", \"4\")\n    r.rpush(\"foo\", \"3\")\n\n    assert r.sort(\"foo\", start=0, num=1, desc=True) == [b\"4\"]\n\n\ndef test_sort_range_offset_norange(r: redis.Redis):\n    with pytest.raises(redis.RedisError):\n        r.sort(\"foo\", start=1)\n\n\ndef test_sort_range_with_large_range(r: redis.Redis):\n    r.rpush(\"foo\", \"2\")\n    r.rpush(\"foo\", \"1\")\n    r.rpush(\"foo\", \"4\")\n    r.rpush(\"foo\", \"3\")\n    # num=20 even though len(foo) is 4.\n    assert r.sort(\"foo\", start=1, num=20) == [b\"2\", b\"3\", b\"4\"]\n\n\ndef test_sort_descending(r: redis.Redis):\n    r.rpush(\"foo\", \"1\")\n    r.rpush(\"foo\", \"2\")\n    r.rpush(\"foo\", \"3\")\n    assert r.sort(\"foo\", desc=True) == [b\"3\", b\"2\", b\"1\"]\n\n\ndef test_sort_alpha(r: redis.Redis):\n    r.rpush(\"foo\", \"2a\")\n    r.rpush(\"foo\", \"1b\")\n    r.rpush(\"foo\", \"2b\")\n    r.rpush(\"foo\", \"1a\")\n\n    assert r.sort(\"foo\", alpha=True) == [b\"1a\", b\"1b\", b\"2a\", b\"2b\"]\n\n\ndef test_sort_foo(r: redis.Redis):\n    r.rpush(\"foo\", \"2a\")\n    r.rpush(\"foo\", \"1b\")\n    r.rpush(\"foo\", \"2b\")\n    r.rpush(\"foo\", \"1a\")\n    with pytest.raises(redis.ResponseError):\n        r.sort(\"foo\", alpha=False)\n\n\ndef test_sort_empty(r: redis.Redis):\n    assert r.sort(\"foo\") == []\n\n\ndef test_sort_wrong_type(r: redis.Redis):\n    r.set(\"string\", \"3\")\n    with pytest.raises(redis.ResponseError):\n        r.sort(\"string\")\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_sort_with_store_option(r: redis.Redis):\n    r.rpush(\"foo\", \"2\")\n    r.rpush(\"foo\", \"1\")\n    r.rpush(\"foo\", \"4\")\n    r.rpush(\"foo\", \"3\")\n\n    assert r.sort(\"foo\", store=\"bar\") == 4\n    assert r.lrange(\"bar\", 0, -1) == [b\"1\", b\"2\", b\"3\", b\"4\"]\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_sort_with_by_and_get_option(r: redis.Redis):\n    r.rpush(\"foo\", \"2\")\n    r.rpush(\"foo\", \"1\")\n    r.rpush(\"foo\", \"4\")\n    r.rpush(\"foo\", \"3\")\n\n    r[\"weight_1\"] = \"4\"\n    r[\"weight_2\"] = \"3\"\n    r[\"weight_3\"] = \"2\"\n    r[\"weight_4\"] = \"1\"\n\n    r[\"data_1\"] = \"one\"\n    r[\"data_2\"] = \"two\"\n    r[\"data_3\"] = \"three\"\n    r[\"data_4\"] = \"four\"\n\n    assert r.sort(\"foo\", by=\"weight_*\", get=\"data_*\") == [\n        b\"four\",\n        b\"three\",\n        b\"two\",\n        b\"one\",\n    ]\n    assert r.sort(\"foo\", by=\"weight_*\", get=\"#\") == [b\"4\", b\"3\", b\"2\", b\"1\"]\n    assert r.sort(\"foo\", by=\"weight_*\", get=(\"data_*\", \"#\")) == [\n        b\"four\",\n        b\"4\",\n        b\"three\",\n        b\"3\",\n        b\"two\",\n        b\"2\",\n        b\"one\",\n        b\"1\",\n    ]\n    assert r.sort(\"foo\", by=\"weight_*\", get=\"data_1\") == [None, None, None, None]\n    # Test sort with different parameters order\n    assert raw_command(r, \"sort\", \"foo\", \"get\", \"data_*\", \"by\", \"weight_*\", \"get\", \"#\") == [\n        b\"four\",\n        b\"4\",\n        b\"three\",\n        b\"3\",\n        b\"two\",\n        b\"2\",\n        b\"one\",\n        b\"1\",\n    ]\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_sort_with_hash(r: redis.Redis):\n    r.rpush(\"foo\", \"middle\")\n    r.rpush(\"foo\", \"eldest\")\n    r.rpush(\"foo\", \"youngest\")\n    r.hset(\"record_youngest\", \"age\", 1)\n    r.hset(\"record_youngest\", \"name\", \"baby\")\n\n    r.hset(\"record_middle\", \"age\", 10)\n    r.hset(\"record_middle\", \"name\", \"teen\")\n\n    r.hset(\"record_eldest\", \"age\", 20)\n    r.hset(\"record_eldest\", \"name\", \"adult\")\n\n    assert r.sort(\"foo\", by=\"record_*->age\") == [b\"youngest\", b\"middle\", b\"eldest\"]\n    assert r.sort(\"foo\", by=\"record_*->age\", get=\"record_*->name\") == [\n        b\"baby\",\n        b\"teen\",\n        b\"adult\",\n    ]\n\n\ndef test_sort_with_set(r: redis.Redis):\n    r.sadd(\"foo\", \"3\")\n    r.sadd(\"foo\", \"1\")\n    r.sadd(\"foo\", \"2\")\n    assert r.sort(\"foo\") == [b\"1\", b\"2\", b\"3\"]\n\n\ndef test_ttl_should_return_minus_two_for_non_existent_key(r: redis.Redis):\n    assert r.get(\"foo\") is None\n    assert r.ttl(\"foo\") == -2\n\n\ndef test_type(r: redis.Redis):\n    r.set(\"string_key\", \"value\")\n    r.lpush(\"list_key\", \"value\")\n    r.sadd(\"set_key\", \"value\")\n    r.zadd(\"zset_key\", {\"value\": 1})\n    r.hset(\"hset_key\", \"key\", \"value\")\n\n    assert r.type(\"string_key\") == b\"string\"  # noqa: E721\n    assert r.type(\"list_key\") == b\"list\"  # noqa: E721\n    assert r.type(\"set_key\") == b\"set\"  # noqa: E721\n    assert r.type(\"zset_key\") == b\"zset\"  # noqa: E721\n    assert r.type(\"hset_key\") == b\"hash\"  # noqa: E721\n    assert r.type(\"none_key\") == b\"none\"  # noqa: E721\n\n\ndef test_unlink(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    r.unlink(\"foo\")\n    assert r.get(\"foo\") is None\n\n\ndef test_dump_missing(r: redis.Redis):\n    assert r.dump(\"foo\") is None\n\n\ndef test_dump_restore(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    dump = r.dump(\"foo\")\n    r.restore(\"baz\", 0, dump)\n    assert r.get(\"baz\") == b\"bar\"\n    assert r.ttl(\"baz\") == -1\n\n\ndef test_dump_restore_ttl(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    dump = r.dump(\"foo\")\n    r.restore(\"baz\", 2000, dump)\n    assert r.get(\"baz\") == b\"bar\"\n    assert 1000 <= r.pttl(\"baz\") <= 2000\n\n\ndef test_dump_restore_replace(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    dump = r.dump(\"foo\")\n    r.set(\"foo\", \"baz\")\n    r.restore(\"foo\", 0, dump, replace=True)\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_restore_exists(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    dump = r.dump(\"foo\")\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.restore(\"foo\", 0, dump)\n\n\ndef test_restore_invalid_dump(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    dump = r.dump(\"foo\")\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.restore(\"baz\", 0, dump[:-1])\n\n\ndef test_restore_invalid_ttl(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    dump = r.dump(\"foo\")\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.restore(\"baz\", -1, dump)\n\n\ndef test_set_then_get(r: redis.Redis):\n    assert r.set(\"foo\", \"bar\") is True\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_exists(r: redis.Redis):\n    assert \"foo\" not in r\n    r.set(\"foo\", \"bar\")\n    assert \"foo\" in r\n    with pytest.raises(redis.ResponseError, match=msgs.WRONG_ARGS_MSG6.format(\"exists\")[4:]):\n        raw_command(r, \"exists\")\n\n\n@pytest.mark.slow\ndef test_expire_should_expire_key(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    r.expire(\"foo\", 1)\n    sleep(1.5)\n    assert r.get(\"foo\") is None\n    assert r.expire(\"bar\", 1) is False\n\n\ndef test_expire_should_throw_error(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    with pytest.raises(ResponseError):\n        r.expire(\"foo\", 1, nx=True, xx=True)\n    with pytest.raises(ResponseError):\n        r.expire(\"foo\", 1, gt=True, lt=True)\n\n\n@pytest.mark.max_server(\"7\")\ndef test_expire_extra_params_return_error(r: redis.Redis):\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.expire(\"foo\", 1, nx=True)\n\n\ndef test_expire_should_return_true_for_existing_key(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.expire(\"foo\", 1) is True\n\n\ndef test_expire_should_return_false_for_missing_key(r: redis.Redis):\n    assert r.expire(\"missing\", 1) is False\n\n\n@pytest.mark.slow\ndef test_expire_should_expire_key_using_timedelta(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    r.expire(\"foo\", timedelta(seconds=1))\n    sleep(1.5)\n    assert r.get(\"foo\") is None\n    assert r.expire(\"bar\", 1) is False\n\n\n@pytest.mark.slow\ndef test_expire_should_expire_immediately_with_millisecond_timedelta(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    r.expire(\"foo\", timedelta(milliseconds=750))\n    assert r.get(\"foo\") is None\n    assert r.expire(\"bar\", 1) is False\n\n\ndef test_watch_expire(r: redis.Redis):\n    \"\"\"EXPIRE should mark a key as changed for WATCH.\"\"\"\n    r.set(\"foo\", \"bar\")\n    with r.pipeline() as p:\n        p.watch(\"foo\")\n        r.expire(\"foo\", 10000)\n        p.multi()\n        p.get(\"foo\")\n        with pytest.raises(redis.exceptions.WatchError):\n            p.execute()\n\n\n@pytest.mark.slow\ndef test_pexpire_should_expire_key(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    r.pexpire(\"foo\", 150)\n    sleep(0.2)\n    assert r.get(\"foo\") is None\n    assert r.pexpire(\"bar\", 1) == 0\n\n\ndef test_pexpire_should_return_truthy_for_existing_key(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.pexpire(\"foo\", 1)\n\n\ndef test_pexpire_should_return_falsey_for_missing_key(r: redis.Redis):\n    assert not r.pexpire(\"missing\", 1)\n\n\n@pytest.mark.slow\ndef test_pexpire_should_expire_key_using_timedelta(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    r.pexpire(\"foo\", timedelta(milliseconds=750))\n    sleep(0.5)\n    assert r.get(\"foo\") == b\"bar\"\n    sleep(0.5)\n    assert r.get(\"foo\") is None\n    assert r.pexpire(\"bar\", 1) == 0\n\n\n@pytest.mark.slow\ndef test_pexpireat_should_expire_key_by_datetime(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    r.pexpireat(\"foo\", datetime.now() + timedelta(milliseconds=150))\n    sleep(0.2)\n    assert r.get(\"foo\") is None\n    assert r.pexpireat(\"bar\", datetime.now()) == 0\n\n\n@pytest.mark.slow\ndef test_pexpireat_should_expire_key_by_timestamp(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    r.pexpireat(\"foo\", int(time() * 1000 + 150))\n    sleep(0.2)\n    assert r.get(\"foo\") is None\n    assert r.expire(\"bar\", 1) is False\n\n\ndef test_pexpireat_should_return_true_for_existing_key(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.pexpireat(\"foo\", int(time() * 1000 + 150))\n\n\ndef test_pexpireat_should_return_false_for_missing_key(r: redis.Redis):\n    assert not r.pexpireat(\"missing\", int(time() * 1000 + 150))\n\n\ndef test_pttl_should_return_minus_one_for_non_expiring_key(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.pttl(\"foo\") == -1\n\n\ndef test_pttl_should_return_minus_two_for_non_existent_key(r: redis.Redis):\n    assert r.get(\"foo\") is None\n    assert r.pttl(\"foo\") == -2\n\n\ndef test_randomkey_returns_none_on_empty_db(r: redis.Redis):\n    assert r.randomkey() is None\n\n\ndef test_randomkey_returns_existing_key(r: redis.Redis):\n    r.set(\"foo\", 1)\n    r.set(\"bar\", 2)\n    r.set(\"baz\", 3)\n    assert r.randomkey().decode() in (\"foo\", \"bar\", \"baz\")\n\n\ndef test_persist(r: redis.Redis):\n    r.set(\"foo\", \"bar\", ex=20)\n    assert r.persist(\"foo\") == 1\n    assert r.ttl(\"foo\") == -1\n    assert r.persist(\"foo\") == 0\n\n\ndef test_watch_persist(r: redis.Redis):\n    \"\"\"PERSIST should mark a variable as changed.\"\"\"\n    r.set(\"foo\", \"bar\", ex=10000)\n    with r.pipeline() as p:\n        p.watch(\"foo\")\n        r.persist(\"foo\")\n        p.multi()\n        p.get(\"foo\")\n        with pytest.raises(redis.exceptions.WatchError):\n            p.execute()\n\n\ndef test_set_existing_key_persists(r: redis.Redis):\n    r.set(\"foo\", \"bar\", ex=20)\n    r.set(\"foo\", \"foo\")\n    assert r.ttl(\"foo\") == -1\n\n\ndef test_set_non_str_keys(r: redis.Redis):\n    assert r.set(2, \"bar\") is True\n    assert r.get(2) == b\"bar\"\n    assert r.get(\"2\") == b\"bar\"\n\n\ndef test_getset_not_exist(r: redis.Redis):\n    val = r.getset(\"foo\", \"bar\")\n    assert val is None\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_get_float_type(r: redis.Redis):  # Test for issue #58\n    r.set(\"key\", 123)\n    assert r.get(\"key\") == b\"123\"\n    r.incr(\"key\")\n    assert r.get(\"key\") == b\"124\"\n\n\ndef test_set_float_value(r: redis.Redis):\n    x = 1.23456789123456789\n    r.set(\"foo\", x)\n    assert float(r.get(\"foo\")) == x\n\n\n@pytest.mark.min_server(\"7\")\ndef test_expire_should_not_expire__when_no_expire_is_set(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.expire(\"foo\", 1, xx=True) == 0\n\n\n@pytest.mark.min_server(\"7\")\ndef test_expire_should_not_expire__when_expire_is_set(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.expire(\"foo\", 1, nx=True) == 1\n    assert r.expire(\"foo\", 2, nx=True) == 0\n\n\n@pytest.mark.min_server(\"7\")\ndef test_expire_should_expire__when_expire_is_greater(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.expire(\"foo\", 100) == 1\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.expire(\"foo\", 200, gt=True) == 1\n\n\n@pytest.mark.min_server(\"7\")\ndef test_expire_should_expire__when_expire_is_lessthan(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.expire(\"foo\", 20) == 1\n    assert r.expire(\"foo\", 10, lt=True) == 1\n\n\ndef test_rename(r: redis.Redis):\n    r.set(\"foo\", \"unique value\")\n    assert r.rename(\"foo\", \"bar\")\n    assert r.get(\"foo\") is None\n    assert r.get(\"bar\") == b\"unique value\"\n\n\ndef test_rename_nonexistent_key(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        r.rename(\"foo\", \"bar\")\n\n\ndef test_renamenx_doesnt_exist(r: redis.Redis):\n    r.set(\"foo\", \"unique value\")\n    assert r.renamenx(\"foo\", \"bar\")\n    assert r.get(\"foo\") is None\n    assert r.get(\"bar\") == b\"unique value\"\n\n\ndef test_rename_does_exist(r: redis.Redis):\n    r.set(\"foo\", \"unique value\")\n    r.set(\"bar\", \"unique value2\")\n    assert not r.renamenx(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"unique value\"\n    assert r.get(\"bar\") == b\"unique value2\"\n\n\ndef test_rename_expiry(r: redis.Redis):\n    r.set(\"foo\", \"value1\", ex=10)\n    r.set(\"bar\", \"value2\")\n    r.rename(\"foo\", \"bar\")\n    assert r.ttl(\"bar\") > 0\n\n\ndef test_keys(r: redis.Redis):\n    r.set(\"\", \"empty\")\n    r.set(\"abc\\n\", \"\")\n    r.set(\"abc\\\\\", \"\")\n    r.set(\"abcde\", \"\")\n    r.set(b\"\\xfe\\xcd\", \"\")\n    assert sorted(r.keys()) == [b\"\", b\"abc\\n\", b\"abc\\\\\", b\"abcde\", b\"\\xfe\\xcd\"]\n    assert r.keys(\"??\") == [b\"\\xfe\\xcd\"]\n    # empty pattern not the same as no pattern\n    assert r.keys(\"\") == [b\"\"]\n    # ? must match \\n\n    assert sorted(r.keys(\"abc?\")) == [b\"abc\\n\", b\"abc\\\\\"]\n    # must be anchored at both ends\n    assert r.keys(\"abc\") == []\n    assert r.keys(\"bcd\") == []\n    # wildcard test\n    assert r.keys(\"a*de\") == [b\"abcde\"]\n    # positive groups\n    assert sorted(r.keys(\"abc[d\\n]*\")) == [b\"abc\\n\", b\"abcde\"]\n    assert r.keys(\"abc[c-e]?\") == [b\"abcde\"]\n\n    # Not working in Dragonfly with reverse range\n    # assert r.keys(\"abc[e-c]?\") == [b\"abcde\"]\n    assert r.keys(\"abc[e-e]?\") == []\n    assert r.keys(\"abcd[ef\") == [b\"abcde\"]\n    assert r.keys(\"abcd[]\") == []\n    # negative groups\n    assert r.keys(\"abc[^d\\\\\\\\]*\") == [b\"abc\\n\"]\n    assert r.keys(\"abc[^]e\") == [b\"abcde\"]\n    # escaping\n    assert r.keys(r\"abc\\?e\") == []\n    assert r.keys(r\"abc\\de\") == [b\"abcde\"]\n    assert r.keys(r\"abc[\\d]e\") == [b\"abcde\"]\n    # some escaping cases that redis handles strangely\n    assert r.keys(\"abc\\\\\") == [b\"abc\\\\\"]\n    # assert r.keys(r\"abc[\\c-e]e\") == [] dragonfly matches abcde\n    # assert r.keys(r\"abc[c-\\e]e\") == [] dragonfly matches abcde\n\n\ndef test_contains(r: redis.Redis):\n    assert not r.exists(\"foo\")\n    r.set(\"foo\", \"bar\")\n    assert r.exists(\"foo\")\n\n\ndef test_delete(r: redis.Redis):\n    r[\"foo\"] = \"bar\"\n    assert r.delete(\"foo\") == 1\n    assert r.get(\"foo\") is None\n\n\n@pytest.mark.slow\ndef test_delete_expire(r: redis.Redis):\n    r.set(\"foo\", \"bar\", ex=1)\n    r.delete(\"foo\")\n    r.set(\"foo\", \"bar\")\n    sleep(2)\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_delete_multiple(r: redis.Redis):\n    r[\"one\"] = \"one\"\n    r[\"two\"] = \"two\"\n    r[\"three\"] = \"three\"\n    # Since redis>=2.7.6 returns number of deleted items.\n    assert r.delete(\"one\", \"two\") == 2\n    assert r.get(\"one\") is None\n    assert r.get(\"two\") is None\n    assert r.get(\"three\") == b\"three\"\n    assert r.delete(\"one\", \"two\") == 0\n    # If any keys are deleted, True is returned.\n    assert r.delete(\"two\", \"three\", \"three\") == 1\n    assert r.get(\"three\") is None\n\n\ndef test_delete_nonexistent_key(r: redis.Redis):\n    assert r.delete(\"foo\") == 0\n\n\ndef test_basic_sort(r: redis.Redis):\n    r.rpush(\"foo\", \"2\")\n    r.rpush(\"foo\", \"1\")\n    r.rpush(\"foo\", \"3\")\n\n    assert r.sort(\"foo\") == [b\"1\", b\"2\", b\"3\"]\n    assert raw_command(r, \"sort\", \"foo\", \"asc\") == [b\"1\", b\"2\", b\"3\"]\n\n\ndef test_key_patterns(r: redis.Redis):\n    r.mset({\"one\": 1, \"two\": 2, \"three\": 3, \"four\": 4})\n    assert sorted(r.keys(\"*o*\")) == [b\"four\", b\"one\", b\"two\"]\n    assert r.keys(\"t??\") == [b\"two\"]\n    assert sorted(r.keys(\"*\")) == [b\"four\", b\"one\", b\"three\", b\"two\"]\n    assert sorted(r.keys()) == [b\"four\", b\"one\", b\"three\", b\"two\"]\n\n\n# seems like a rather peculiar behavior of Redis, maybe a bug? Disabling for Dragonfly for now.\n@pytest.mark.min_server(\"7\")\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_watch_when_setbit_does_not_change_value(r: redis.Redis):\n    r.set(\"foo\", b\"0\")\n\n    with r.pipeline() as p:\n        p.watch(\"foo\")\n        assert r.setbit(\"foo\", 0, 0) == 0\n        assert p.multi() is None\n        assert p.execute() == []\n\n\ndef test_from_hypothesis_redis7(r: redis.Redis):\n    r.set(\"foo\", b\"0\")\n    assert r.setbit(\"foo\", 0, 0) == 0\n    assert r.append(\"foo\", b\"\") == 1\n\n    r.set(b\"\", b\"\")\n    assert r.setbit(b\"\", 0, 0) == 0\n    assert r.get(b\"\") == b\"\\x00\"\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_geo_commands.py",
    "content": "from typing import Dict, Any\n\nimport pytest\nimport redis\n\nfrom test import testtools\n\n\ndef test_geoadd_ch(r: redis.Redis):\n    values = (2.1909389952632, 41.433791470673, \"place1\")\n    assert r.geoadd(\"a\", values) == 1\n    values = (\n        2.1909389952632,\n        31.433791470673,\n        \"place1\",\n        2.1873744593677,\n        41.406342043777,\n        \"place2\",\n    )\n    assert r.geoadd(\"a\", values, ch=True) == 2\n    assert r.zrange(\"a\", 0, -1) == [b\"place1\", b\"place2\"]\n\n\ndef test_geoadd(r: redis.Redis):\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        \"place1\",\n        2.1873744593677,\n        41.406342043777,\n        \"place2\",\n    )\n    assert r.geoadd(\"barcelona\", values) == 2\n    assert r.zcard(\"barcelona\") == 2\n\n    values = (2.1909389952632, 41.433791470673, \"place1\")\n    assert r.geoadd(\"a\", values) == 1\n\n    with pytest.raises(redis.DataError):\n        r.geoadd(\"barcelona\", (1, 2))\n    with pytest.raises(redis.DataError):\n        r.geoadd(\"t\", values, ch=True, nx=True, xx=True)\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(r, \"geoadd\", \"barcelona\", \"1\", \"2\")\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(\n            r,\n            \"geoadd\",\n            \"barcelona\",\n            \"nx\",\n            \"xx\",\n            *values,\n        )\n\n\ndef test_geoadd_xx(r: redis.Redis):\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        \"place1\",\n        2.1873744593677,\n        41.406342043777,\n        \"place2\",\n    )\n    assert r.geoadd(\"a\", values) == 2\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        b\"place1\",\n        2.1873744593677,\n        41.406342043777,\n        b\"place2\",\n        2.1804738294738,\n        41.405647879212,\n        b\"place3\",\n    )\n    assert r.geoadd(\"a\", values, nx=True) == 1\n    assert r.zrange(\"a\", 0, -1) == [b\"place3\", b\"place2\", b\"place1\"]\n\n\ndef test_geohash(r: redis.Redis):\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        \"place1\",\n        2.1873744593677,\n        41.406342043777,\n        \"place2\",\n    )\n    r.geoadd(\"barcelona\", values)\n    assert r.geohash(\"barcelona\", \"place1\", \"place2\", \"place3\") == [\n        \"sp3e9yg3kd0\",\n        \"sp3e9cbc3t0\",\n        None,\n    ]\n\n\ndef test_geopos(r: redis.Redis):\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        \"place1\",\n        2.1873744593677,\n        41.406342043777,\n        \"place2\",\n    )\n    r.geoadd(\"barcelona\", values)\n    # small errors may be introduced.\n    assert r.geopos(\"barcelona\", \"place1\", \"place4\", \"place2\") == [\n        pytest.approx((2.1909389952632, 41.433791470673), 0.00001),\n        None,\n        pytest.approx((2.1873744593677, 41.406342043777), 0.00001),\n    ]\n\n\ndef test_geodist(r: redis.Redis):\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        \"place1\",\n        2.1873744593677,\n        41.406342043777,\n        \"place2\",\n    )\n    assert r.geoadd(\"barcelona\", values) == 2\n    assert r.geodist(\"barcelona\", \"place1\", \"place2\") == pytest.approx(\n        3067.4157, 0.0001\n    )\n\n\ndef test_geodist_units(r: redis.Redis):\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        \"place1\",\n        2.1873744593677,\n        41.406342043777,\n        \"place2\",\n    )\n    r.geoadd(\"barcelona\", values)\n    assert r.geodist(\"barcelona\", \"place1\", \"place2\", \"km\") == pytest.approx(\n        3.0674, 0.0001\n    )\n    assert r.geodist(\"barcelona\", \"place1\", \"place2\", \"mi\") == pytest.approx(\n        1.906, 0.0001\n    )\n    assert r.geodist(\"barcelona\", \"place1\", \"place2\", \"ft\") == pytest.approx(\n        10063.6998, 0.0001\n    )\n    with pytest.raises(redis.RedisError):\n        assert r.geodist(\"x\", \"y\", \"z\", \"inches\")\n\n\ndef test_geodist_missing_one_member(r: redis.Redis):\n    values = (2.1909389952632, 41.433791470673, \"place1\")\n    r.geoadd(\"barcelona\", values)\n    assert r.geodist(\"barcelona\", \"place1\", \"missing_member\", \"km\") is None\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\n@pytest.mark.parametrize(\n    \"long,lat,radius,extra,expected\",\n    [\n        (2.191, 41.433, 1000, {}, [b\"place1\"]),\n        (2.187, 41.406, 1000, {}, [b\"place2\"]),\n        (1, 2, 1000, {}, []),\n        (2.191, 41.433, 1, {\"unit\": \"km\"}, [b\"place1\"]),\n        (2.191, 41.433, 3000, {\"count\": 1}, [b\"place1\"]),\n    ],\n)\ndef test_georadius(\n    r: redis.Redis,\n    long: float,\n    lat: float,\n    radius: float,\n    extra: Dict[str, Any],\n    expected,\n):\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        \"place1\",\n        2.1873744593677,\n        41.406342043777,\n        \"place2\",\n    )\n    r.geoadd(\"barcelona\", values)\n    assert r.georadius(\"barcelona\", long, lat, radius, **extra) == expected\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\n@pytest.mark.parametrize(\n    \"member,radius,extra,expected\",\n    [\n        (\"place1\", 1000, {}, [b\"place1\"]),\n        (\"place2\", 1000, {}, [b\"place2\"]),\n        (\"place1\", 1, {\"unit\": \"km\"}, [b\"place1\"]),\n        (\"place1\", 3000, {\"count\": 1}, [b\"place1\"]),\n    ],\n)\ndef test_georadiusbymember(\n    r: redis.Redis, member: str, radius: float, extra: Dict[str, Any], expected\n):\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        \"place1\",\n        2.1873744593677,\n        41.406342043777,\n        b\"place2\",\n    )\n    r.geoadd(\"barcelona\", values)\n    assert r.georadiusbymember(\"barcelona\", member, radius, **extra) == expected\n    assert r.georadiusbymember(\n        \"barcelona\", member, radius, **extra, store_dist=\"extract\"\n    ) == len(expected)\n    assert r.zcard(\"extract\") == len(expected)\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_georadius_with(r: redis.Redis):\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        \"place1\",\n        2.1873744593677,\n        41.406342043777,\n        \"place2\",\n    )\n\n    r.geoadd(\"barcelona\", values)\n    # test a bunch of combinations to test the parse response function.\n    res = r.georadius(\n        \"barcelona\",\n        2.191,\n        41.433,\n        1,\n        unit=\"km\",\n        withdist=True,\n        withcoord=True,\n    )\n    assert res == [\n        pytest.approx(\n            [b\"place1\", 0.0881, pytest.approx((2.1909, 41.4337), 0.0001)], 0.001\n        )\n    ]\n\n    res = r.georadius(\n        \"barcelona\", 2.191, 41.433, 1, unit=\"km\", withdist=True, withcoord=True\n    )\n    assert res == [\n        pytest.approx(\n            [b\"place1\", 0.0881, pytest.approx((2.1909, 41.4337), 0.0001)], 0.001\n        )\n    ]\n\n    res = r.georadius(\"barcelona\", 2.191, 41.433, 1, unit=\"km\", withcoord=True)\n    assert res == [[b\"place1\", pytest.approx((2.1909, 41.4337), 0.0001)]]\n\n    # test no values.\n    assert (\n        r.georadius(\n            \"barcelona\",\n            2,\n            1,\n            1,\n            unit=\"km\",\n            withdist=True,\n            withcoord=True,\n        )\n        == []\n    )\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_georadius_count(r: redis.Redis):\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        \"place1\",\n        2.1873744593677,\n        41.406342043777,\n        \"place2\",\n    )\n\n    r.geoadd(\"barcelona\", values)\n\n    assert (\n        r.georadius(\"barcelona\", 2.191, 41.433, 3000, count=1, store=\"barcelona\") == 1\n    )\n    assert r.georadius(\"barcelona\", 2.191, 41.433, 3000, store_dist=\"extract\") == 1\n    assert r.zcard(\"extract\") == 1\n    res = r.georadius(\"barcelona\", 2.191, 41.433, 3000, count=1, any=True)\n    assert (res == [b\"place2\"]) or res == [b\"place1\"]\n\n    values = (\n        13.361389,\n        38.115556,\n        \"Palermo\",\n        15.087269,\n        37.502669,\n        \"Catania\",\n    )\n\n    r.geoadd(\"Sicily\", values)\n    assert (\n        testtools.raw_command(\n            r,\n            \"GEORADIUS\",\n            \"Sicily\",\n            \"15\",\n            \"37\",\n            \"200\",\n            \"km\",\n            \"STOREDIST\",\n            \"neardist\",\n            \"STORE\",\n            \"near\",\n        )\n        == 2\n    )\n    assert r.zcard(\"near\") == 2\n    assert r.zcard(\"neardist\") == 0\n\n\ndef test_georadius_errors(r: redis.Redis):\n    values = (\n        13.361389,\n        38.115556,\n        \"Palermo\",\n        15.087269,\n        37.502669,\n        \"Catania\",\n    )\n\n    r.geoadd(\"Sicily\", values)\n\n    with pytest.raises(redis.DataError):  # Unsupported unit\n        r.georadius(\"barcelona\", 2.191, 41.433, 3000, unit=\"dsf\")\n    with pytest.raises(redis.ResponseError):  # Unsupported unit\n        testtools.raw_command(\n            r,\n            \"GEORADIUS\",\n            \"Sicily\",\n            \"15\",\n            \"37\",\n            \"200\",\n            \"ddds\",\n            \"STOREDIST\",\n            \"neardist\",\n            \"STORE\",\n            \"near\",\n        )\n\n    bad_values = (\n        13.361389,\n        38.115556,\n        \"Palermo\",\n        15.087269,\n        \"Catania\",\n    )\n    with pytest.raises(redis.DataError):\n        r.geoadd(\"newgroup\", bad_values)\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(r, \"geoadd\", \"newgroup\", *bad_values)\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_geosearch(r: redis.Redis):\n    values = (\n        2.1909389952632,\n        41.433791470673,\n        b\"place1\",\n        2.1873744593677,\n        41.406342043777,\n        b\"place2\",\n        2.583333,\n        41.316667,\n        b\"place3\",\n    )\n    r.geoadd(\"barcelona\", values)\n    assert r.geosearch(\"barcelona\", longitude=2.191, latitude=41.433, radius=1000) == [\n        b\"place1\"\n    ]\n    assert r.geosearch(\"barcelona\", longitude=2.187, latitude=41.406, radius=1000) == [\n        b\"place2\"\n    ]\n    # assert r.geosearch(\"barcelona\", longitude=2.191, latitude=41.433, height=1000, width=1000) == [b\"place1\"]\n    assert set(r.geosearch(\"barcelona\", member=\"place3\", radius=100, unit=\"km\")) == {\n        b\"place2\",\n        b\"place1\",\n        b\"place3\",\n    }\n    # test count\n    assert r.geosearch(\n        \"barcelona\", member=\"place3\", radius=100, unit=\"km\", count=2\n    ) == [\n        b\"place3\",\n        b\"place2\",\n    ]\n    assert r.geosearch(\n        \"barcelona\", member=\"place3\", radius=100, unit=\"km\", count=1, any=True\n    )[0] in [\n        b\"place1\",\n        b\"place3\",\n        b\"place2\",\n    ]\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_hash_commands.py",
    "content": "import pytest\nimport redis\nimport redis.client\n\nfrom test import testtools\n\n\ndef test_hstrlen_missing(r: redis.Redis):\n    assert r.hstrlen(\"foo\", \"doesnotexist\") == 0\n\n    r.hset(\"foo\", \"key\", \"value\")\n    assert r.hstrlen(\"foo\", \"doesnotexist\") == 0\n\n\ndef test_hstrlen(r: redis.Redis):\n    r.hset(\"foo\", \"key\", \"value\")\n    assert r.hstrlen(\"foo\", \"key\") == 5\n\n\ndef test_hset_then_hget(r: redis.Redis):\n    assert r.hset(\"foo\", \"key\", \"value\") == 1\n    assert r.hget(\"foo\", \"key\") == b\"value\"\n\n\ndef test_hset_update(r: redis.Redis):\n    assert r.hset(\"foo\", \"key\", \"value\") == 1\n    assert r.hset(\"foo\", \"key\", \"value\") == 0\n\n\ndef test_hset_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"bar\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.hset(\"foo\", \"key\", \"value\")\n\n\ndef test_hgetall(r: redis.Redis):\n    assert r.hset(\"foo\", \"k1\", \"v1\") == 1\n    assert r.hset(\"foo\", \"k2\", \"v2\") == 1\n    assert r.hset(\"foo\", \"k3\", \"v3\") == 1\n    assert r.hgetall(\"foo\") == {b\"k1\": b\"v1\", b\"k2\": b\"v2\", b\"k3\": b\"v3\"}\n\n\ndef test_hgetall_empty_key(r: redis.Redis):\n    assert r.hgetall(\"foo\") == {}\n\n\ndef test_hgetall_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"bar\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.hgetall(\"foo\")\n\n\ndef test_hexists(r: redis.Redis):\n    r.hset(\"foo\", \"bar\", \"v1\")\n    assert r.hexists(\"foo\", \"bar\") == 1\n    assert r.hexists(\"foo\", \"baz\") == 0\n    assert r.hexists(\"bar\", \"bar\") == 0\n\n\ndef test_hexists_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"bar\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.hexists(\"foo\", \"key\")\n\n\ndef test_hkeys(r: redis.Redis):\n    r.hset(\"foo\", \"k1\", \"v1\")\n    r.hset(\"foo\", \"k2\", \"v2\")\n    assert set(r.hkeys(\"foo\")) == {b\"k1\", b\"k2\"}\n    assert set(r.hkeys(\"bar\")) == set()\n\n\ndef test_hkeys_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"bar\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.hkeys(\"foo\")\n\n\ndef test_hlen(r: redis.Redis):\n    r.hset(\"foo\", \"k1\", \"v1\")\n    r.hset(\"foo\", \"k2\", \"v2\")\n    assert r.hlen(\"foo\") == 2\n\n\ndef test_hlen_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"bar\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.hlen(\"foo\")\n\n\ndef test_hvals(r: redis.Redis):\n    r.hset(\"foo\", \"k1\", \"v1\")\n    r.hset(\"foo\", \"k2\", \"v2\")\n    assert set(r.hvals(\"foo\")) == {b\"v1\", b\"v2\"}\n    assert set(r.hvals(\"bar\")) == set()\n\n\ndef test_hvals_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"bar\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.hvals(\"foo\")\n\n\ndef test_hmget(r: redis.Redis):\n    r.hset(\"foo\", \"k1\", \"v1\")\n    r.hset(\"foo\", \"k2\", \"v2\")\n    r.hset(\"foo\", \"k3\", \"v3\")\n    # Normal case.\n    assert r.hmget(\"foo\", [\"k1\", \"k3\"]) == [b\"v1\", b\"v3\"]\n    assert r.hmget(\"foo\", \"k1\", \"k3\") == [b\"v1\", b\"v3\"]\n    # Key does not exist.\n    assert r.hmget(\"bar\", [\"k1\", \"k3\"]) == [None, None]\n    assert r.hmget(\"bar\", \"k1\", \"k3\") == [None, None]\n    # Some keys in the hash do not exist.\n    assert r.hmget(\"foo\", [\"k1\", \"k500\"]) == [b\"v1\", None]\n    assert r.hmget(\"foo\", \"k1\", \"k500\") == [b\"v1\", None]\n\n\ndef test_hmget_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"bar\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.hmget(\"foo\", \"key1\", \"key2\")\n\n\ndef test_hdel(r: redis.Redis):\n    r.hset(\"foo\", \"k1\", \"v1\")\n    r.hset(\"foo\", \"k2\", \"v2\")\n    r.hset(\"foo\", \"k3\", \"v3\")\n    assert r.hget(\"foo\", \"k1\") == b\"v1\"\n    assert r.hdel(\"foo\", \"k1\") == 1\n    assert r.hget(\"foo\", \"k1\") is None\n    assert r.hdel(\"foo\", \"k1\") == 0\n    # Since redis>=2.7.6 returns number of deleted items.\n    assert r.hdel(\"foo\", \"k2\", \"k3\") == 2\n    assert r.hget(\"foo\", \"k2\") is None\n    assert r.hget(\"foo\", \"k3\") is None\n    assert r.hdel(\"foo\", \"k2\", \"k3\") == 0\n\n\ndef test_hdel_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"bar\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.hdel(\"foo\", \"key\")\n\n\ndef test_hincrby(r: redis.Redis):\n    r.hset(\"foo\", \"counter\", 0)\n    assert r.hincrby(\"foo\", \"counter\") == 1\n    assert r.hincrby(\"foo\", \"counter\") == 2\n    assert r.hincrby(\"foo\", \"counter\") == 3\n\n\ndef test_hincrby_with_no_starting_value(r: redis.Redis):\n    assert r.hincrby(\"foo\", \"counter\") == 1\n    assert r.hincrby(\"foo\", \"counter\") == 2\n    assert r.hincrby(\"foo\", \"counter\") == 3\n\n\ndef test_hincrby_with_range_param(r: redis.Redis):\n    assert r.hincrby(\"foo\", \"counter\", 2) == 2\n    assert r.hincrby(\"foo\", \"counter\", 2) == 4\n    assert r.hincrby(\"foo\", \"counter\", 2) == 6\n\n\ndef test_hincrby_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"bar\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.hincrby(\"foo\", \"key\", 2)\n\n\ndef test_hincrbyfloat(r: redis.Redis):\n    r.hset(\"foo\", \"counter\", 0.0)\n    assert r.hincrbyfloat(\"foo\", \"counter\") == 1.0\n    assert r.hincrbyfloat(\"foo\", \"counter\") == 2.0\n    assert r.hincrbyfloat(\"foo\", \"counter\") == 3.0\n\n\ndef test_hincrbyfloat_with_no_starting_value(r: redis.Redis):\n    assert r.hincrbyfloat(\"foo\", \"counter\") == 1.0\n    assert r.hincrbyfloat(\"foo\", \"counter\") == 2.0\n    assert r.hincrbyfloat(\"foo\", \"counter\") == 3.0\n\n\ndef test_hincrbyfloat_with_range_param(r: redis.Redis):\n    assert r.hincrbyfloat(\"foo\", \"counter\", 0.1) == pytest.approx(0.1)\n    assert r.hincrbyfloat(\"foo\", \"counter\", 0.1) == pytest.approx(0.2)\n    assert r.hincrbyfloat(\"foo\", \"counter\", 0.1) == pytest.approx(0.3)\n\n\ndef test_hincrbyfloat_on_non_float_value_raises_error(r: redis.Redis):\n    r.hset(\"foo\", \"counter\", \"cat\")\n    with pytest.raises(redis.ResponseError):\n        r.hincrbyfloat(\"foo\", \"counter\")\n\n\ndef test_hincrbyfloat_with_non_float_amount_raises_error(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        r.hincrbyfloat(\"foo\", \"counter\", \"cat\")\n\n\ndef test_hincrbyfloat_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"bar\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.hincrbyfloat(\"foo\", \"key\", 0.1)\n\n\ndef test_hincrbyfloat_precision(r: redis.Redis):\n    x = 1.23456789123456789\n    assert r.hincrbyfloat(\"foo\", \"bar\", x) == x\n    assert float(r.hget(\"foo\", \"bar\")) == x\n\n\ndef test_hsetnx(r: redis.Redis):\n    assert r.hsetnx(\"foo\", \"newkey\", \"v1\") == 1\n    assert r.hsetnx(\"foo\", \"newkey\", \"v1\") == 0\n    assert r.hget(\"foo\", \"newkey\") == b\"v1\"\n\n\ndef test_hmset_empty_raises_error(r: redis.Redis):\n    with pytest.raises(redis.DataError):\n        r.hmset(\"foo\", {})\n\n\n@testtools.run_test_if_redispy_ver(\"lte\", \"4.6\")\ndef test_hmset_redispy4(r: redis.Redis):\n    r.hset(\"foo\", \"k1\", \"v1\")\n    assert r.hmset(\"foo\", {\"k2\": \"v2\", \"k3\": \"v3\"}) is True\n\n\ndef test_hmset_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"bar\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.hmset(\"foo\", {\"key\": \"value\"})\n\n\ndef test_empty_hash(r: redis.Redis):\n    r.hset(\"foo\", \"bar\", \"baz\")\n    r.hdel(\"foo\", \"bar\")\n    assert not r.exists(\"foo\")\n\n\ndef test_hset_removing_last_field_delete_key(r: redis.Redis):\n    r.hset(b\"3L\", b\"f1\", b\"v1\")\n    r.hdel(b\"3L\", b\"f1\")\n    assert r.keys(\"*\") == []\n\n\ndef test_hscan(r: redis.Redis):\n    # Set up the data\n    name = \"hscan-test\"\n    for ix in range(20):\n        k = \"key:%s\" % ix\n        v = \"result:%s\" % ix\n        r.hset(name, k, v)\n    expected = r.hgetall(name)\n    assert len(expected) == 20  # Ensure we know what we're testing\n\n    # Test that we page through the results and get everything out\n    results = {}\n    cursor = \"0\"\n    while cursor != 0:\n        cursor, data = r.hscan(name, cursor, count=6)\n        results.update(data)\n    assert expected == results\n\n    # Test the iterator version\n    results = {}\n    for key, val in r.hscan_iter(name, count=6):\n        results[key] = val\n    assert expected == results\n\n    # Now test that the MATCH functionality works\n    results = {}\n    cursor = \"0\"\n    while cursor != 0:\n        cursor, data = r.hscan(name, cursor, match=\"*7\", count=100)\n        results.update(data)\n    assert b\"key:7\" in results\n    assert b\"key:17\" in results\n    assert len(results) == 2\n\n    # Test the match on iterator\n    results = {}\n    for key, val in r.hscan_iter(name, match=\"*7\"):\n        results[key] = val\n    assert b\"key:7\" in results\n    assert b\"key:17\" in results\n    assert len(results) == 2\n\n\ndef test_hrandfield(r: redis.Redis):\n    assert r.hrandfield(\"key\") is None\n    hash = {b\"a\": 1, b\"b\": 2, b\"c\": 3, b\"d\": 4, b\"e\": 5}\n    r.hset(\"key\", mapping=hash)\n    assert r.hrandfield(\"key\") is not None\n    assert len(r.hrandfield(\"key\", 0)) == 0\n    res = r.hrandfield(\"key\", 2)\n    assert len(res) == 2\n    assert res[0] in set(hash.keys())\n    assert res[1] in set(hash.keys())\n    # with values\n    res = r.hrandfield(\"key\", 2, True)\n    assert len(res) == 4\n    assert res[0] in set(hash.keys())\n    assert res[1] in {str(x).encode() for x in hash.values()}\n    assert res[2] in set(hash.keys())\n    assert res[3] in {str(x).encode() for x in hash.values()}\n    # without duplications\n    assert len(r.hrandfield(\"key\", 10)) == 5\n    # with duplications\n    assert len(r.hrandfield(\"key\", -10)) == 10\n\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(r, \"HRANDFIELD\", \"key\", 3, \"WITHVALUES\", 3)\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_list_commands.py",
    "content": "import threading\nfrom time import sleep\n\nimport pytest\nimport redis\nimport redis.client\n\nfrom .. import testtools\n\n\ndef _push_thread(r: redis.Redis) -> threading.Thread:\n    def run():\n        sleep(0.5)\n        r.rpush(\"foo\", \"value1\")\n        sleep(0.5)\n        # Will wake the condition variable\n        r.set(\"bar\", \"go back to sleep some more\")\n        r.rpush(\"foo\", \"value2\")\n\n    thread = threading.Thread(target=run)\n    thread.start()\n    return thread\n\n\ndef test_lpush_then_lrange_all(r: redis.Redis):\n    assert r.lpush(\"foo\", \"bar\") == 1\n    assert r.lpush(\"foo\", \"baz\") == 2\n    assert r.lpush(\"foo\", \"bam\", \"buzz\") == 4\n    assert r.lrange(\"foo\", 0, -1) == [b\"buzz\", b\"bam\", b\"baz\", b\"bar\"]\n\n\ndef test_lpush_then_lrange_portion(r: redis.Redis):\n    r.lpush(\"foo\", \"one\")\n    r.lpush(\"foo\", \"two\")\n    r.lpush(\"foo\", \"three\")\n    r.lpush(\"foo\", \"four\")\n    assert r.lrange(\"foo\", 0, 2) == [b\"four\", b\"three\", b\"two\"]\n    assert r.lrange(\"foo\", 0, 3) == [b\"four\", b\"three\", b\"two\", b\"one\"]\n\n\ndef test_lrange_negative_indices(r: redis.Redis):\n    r.rpush(\"foo\", \"a\", \"b\", \"c\")\n    assert r.lrange(\"foo\", -1, -2) == []\n    assert r.lrange(\"foo\", -2, -1) == [b\"b\", b\"c\"]\n\n\ndef test_lpush_key_does_not_exist(r: redis.Redis):\n    assert r.lrange(\"foo\", 0, -1) == []\n\n\ndef test_lpush_with_nonstr_key(r: redis.Redis):\n    r.lpush(1, \"one\")\n    r.lpush(1, \"two\")\n    r.lpush(1, \"three\")\n    assert r.lrange(1, 0, 2) == [b\"three\", b\"two\", b\"one\"]\n    assert r.lrange(\"1\", 0, 2) == [b\"three\", b\"two\", b\"one\"]\n\n\ndef test_lpush_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.lpush(\"foo\", \"element\")\n\n\ndef test_llen(r: redis.Redis):\n    r.lpush(\"foo\", \"one\")\n    r.lpush(\"foo\", \"two\")\n    r.lpush(\"foo\", \"three\")\n    assert r.llen(\"foo\") == 3\n\n\ndef test_llen_no_exist(r: redis.Redis):\n    assert r.llen(\"foo\") == 0\n\n\ndef test_llen_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.llen(\"foo\")\n\n\ndef test_lrem_positive_count(r: redis.Redis):\n    r.lpush(\"foo\", \"same\")\n    r.lpush(\"foo\", \"same\")\n    r.lpush(\"foo\", \"different\")\n    r.lrem(\"foo\", 2, \"same\")\n    assert r.lrange(\"foo\", 0, -1) == [b\"different\"]\n\n\ndef test_lrem_negative_count(r: redis.Redis):\n    r.lpush(\"foo\", \"removeme\")\n    r.lpush(\"foo\", \"three\")\n    r.lpush(\"foo\", \"two\")\n    r.lpush(\"foo\", \"one\")\n    r.lpush(\"foo\", \"removeme\")\n    r.lrem(\"foo\", -1, \"removeme\")\n    # Should remove it from the end of the list,\n    # leaving the 'removeme' from the front of the list alone.\n    assert r.lrange(\"foo\", 0, -1) == [b\"removeme\", b\"one\", b\"two\", b\"three\"]\n\n\ndef test_lrem_zero_count(r: redis.Redis):\n    r.lpush(\"foo\", \"one\")\n    r.lpush(\"foo\", \"one\")\n    r.lpush(\"foo\", \"one\")\n    r.lrem(\"foo\", 0, \"one\")\n    assert r.lrange(\"foo\", 0, -1) == []\n\n\ndef test_lrem_default_value(r: redis.Redis):\n    r.lpush(\"foo\", \"one\")\n    r.lpush(\"foo\", \"one\")\n    r.lpush(\"foo\", \"one\")\n    r.lrem(\"foo\", 0, \"one\")\n    assert r.lrange(\"foo\", 0, -1) == []\n\n\ndef test_lrem_does_not_exist(r: redis.Redis):\n    r.lpush(\"foo\", \"one\")\n    r.lrem(\"foo\", 0, \"one\")\n    # These should be noops.\n    r.lrem(\"foo\", -2, \"one\")\n    r.lrem(\"foo\", 2, \"one\")\n\n\ndef test_lrem_return_value(r: redis.Redis):\n    r.lpush(\"foo\", \"one\")\n    count = r.lrem(\"foo\", 0, \"one\")\n    assert count == 1\n    assert r.lrem(\"foo\", 0, \"one\") == 0\n\n\ndef test_lrem_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.lrem(\"foo\", 0, \"element\")\n\n\ndef test_rpush(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    r.rpush(\"foo\", \"three\")\n    r.rpush(\"foo\", \"four\", \"five\")\n    assert r.lrange(\"foo\", 0, -1) == [b\"one\", b\"two\", b\"three\", b\"four\", b\"five\"]\n\n\ndef test_rpush_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.rpush(\"foo\", \"element\")\n\n\ndef test_lpop(r: redis.Redis):\n    assert r.rpush(\"foo\", \"one\") == 1\n    assert r.rpush(\"foo\", \"two\") == 2\n    assert r.rpush(\"foo\", \"three\") == 3\n    assert r.lpop(\"foo\") == b\"one\"\n    assert r.lpop(\"foo\") == b\"two\"\n    assert r.lpop(\"foo\") == b\"three\"\n\n\ndef test_lpop_empty_list(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    r.lpop(\"foo\")\n    assert r.lpop(\"foo\") is None\n    # Verify what happens if we try to pop from a key\n    # we've never seen before.\n    assert r.lpop(\"noexists\") is None\n\n\ndef test_lpop_zero_elem(r: redis.Redis):\n    r.rpush(b\"\\x00\", b\"\")\n    assert r.lpop(b\"\\x00\", 0) == []\n\n\ndef test_lpop_zero_non_existing_list(r: redis.Redis):\n    assert r.lpop(b\"\", 0) is None\n\n\ndef test_lpop_zero_wrong_type(r: redis.Redis):\n    r.set(b\"\", b\"\")\n    with pytest.raises(redis.ResponseError):\n        r.lpop(b\"\", 0)\n\n\ndef test_lpop_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.lpop(\"foo\")\n\n\n@pytest.mark.min_server(\"6.2\")\ndef test_lpop_count(r: redis.Redis):\n    assert r.rpush(\"foo\", \"one\") == 1\n    assert r.rpush(\"foo\", \"two\") == 2\n    assert r.rpush(\"foo\", \"three\") == 3\n    assert testtools.raw_command(r, \"lpop\", \"foo\", 2) == [b\"one\", b\"two\"]\n    # See https://github.com/redis/redis/issues/9680\n    raw = testtools.raw_command(r, \"rpop\", \"foo\", 0)\n    assert raw is None or raw == []  # https://github.com/redis/redis/pull/10095\n\n\n@pytest.mark.min_server(\"6.2\")\ndef test_lpop_count_negative(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(r, \"lpop\", \"foo\", -1)\n\n\ndef test_lset(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    r.rpush(\"foo\", \"three\")\n    r.lset(\"foo\", 0, \"four\")\n    r.lset(\"foo\", -2, \"five\")\n    assert r.lrange(\"foo\", 0, -1) == [b\"four\", b\"five\", b\"three\"]\n\n\ndef test_lset_index_out_of_range(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    with pytest.raises(redis.ResponseError):\n        r.lset(\"foo\", 3, \"three\")\n\n\ndef test_lset_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.lset(\"foo\", 0, \"element\")\n\n\ndef test_rpushx(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    r.rpushx(\"foo\", \"two\")\n    r.rpushx(\"bar\", \"three\")\n    assert r.lrange(\"foo\", 0, -1) == [b\"one\", b\"two\"]\n    assert r.lrange(\"bar\", 0, -1) == []\n\n\ndef test_rpushx_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.rpushx(\"foo\", \"element\")\n\n\ndef test_ltrim(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    r.rpush(\"foo\", \"three\")\n    r.rpush(\"foo\", \"four\")\n\n    assert r.ltrim(\"foo\", 1, 3)\n    assert r.lrange(\"foo\", 0, -1) == [b\"two\", b\"three\", b\"four\"]\n    assert r.ltrim(\"foo\", 1, -1)\n    assert r.lrange(\"foo\", 0, -1) == [b\"three\", b\"four\"]\n\n\ndef test_ltrim_with_non_existent_key(r: redis.Redis):\n    assert r.ltrim(\"foo\", 0, -1)\n\n\ndef test_ltrim_expiry(r: redis.Redis):\n    r.rpush(\"foo\", \"one\", \"two\", \"three\")\n    r.expire(\"foo\", 10)\n    r.ltrim(\"foo\", 1, 2)\n    assert r.ttl(\"foo\") > 0\n\n\ndef test_ltrim_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.ltrim(\"foo\", 1, -1)\n\n\ndef test_lindex(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    assert r.lindex(\"foo\", 0) == b\"one\"\n    assert r.lindex(\"foo\", 4) is None\n    assert r.lindex(\"bar\", 4) is None\n\n\ndef test_lindex_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.lindex(\"foo\", 0)\n\n\ndef test_lpushx(r: redis.Redis):\n    r.lpush(\"foo\", \"two\")\n    r.lpushx(\"foo\", \"one\")\n    r.lpushx(\"bar\", \"one\")\n    assert r.lrange(\"foo\", 0, -1) == [b\"one\", b\"two\"]\n    assert r.lrange(\"bar\", 0, -1) == []\n\n\ndef test_lpushx_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.lpushx(\"foo\", \"element\")\n\n\ndef test_rpop(r: redis.Redis):\n    assert r.rpop(\"foo\") is None\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    assert r.rpop(\"foo\") == b\"two\"\n    assert r.rpop(\"foo\") == b\"one\"\n    assert r.rpop(\"foo\") is None\n\n\ndef test_rpop_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.rpop(\"foo\")\n\n\n@pytest.mark.min_server(\"6.2\")\ndef test_rpop_count(r: redis.Redis):\n    assert r.rpush(\"foo\", \"one\") == 1\n    assert r.rpush(\"foo\", \"two\") == 2\n    assert r.rpush(\"foo\", \"three\") == 3\n    assert testtools.raw_command(r, \"rpop\", \"foo\", 2) == [b\"three\", b\"two\"]\n    # See https://github.com/redis/redis/issues/9680\n    raw = testtools.raw_command(r, \"rpop\", \"foo\", 0)\n    assert raw is None or raw == []  # https://github.com/redis/redis/pull/10095\n\n\n@pytest.mark.min_server(\"6.2\")\ndef test_rpop_count_negative(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(r, \"rpop\", \"foo\", -1)\n\n\ndef test_linsert_before(r: redis.Redis):\n    r.rpush(\"foo\", \"hello\")\n    r.rpush(\"foo\", \"world\")\n    assert r.linsert(\"foo\", \"before\", \"world\", \"there\") == 3\n    assert r.lrange(\"foo\", 0, -1) == [b\"hello\", b\"there\", b\"world\"]\n    assert r.linsert(\"empty_list\", \"before\", \"world\", \"there\") == 0\n\n\ndef test_linsert_after(r: redis.Redis):\n    r.rpush(\"foo\", \"hello\")\n    r.rpush(\"foo\", \"world\")\n    assert r.linsert(\"foo\", \"after\", \"hello\", \"there\") == 3\n    assert r.lrange(\"foo\", 0, -1) == [b\"hello\", b\"there\", b\"world\"]\n\n\ndef test_linsert_bad_command(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(r, \"LINSERT\", \"x\", \"NOT_BEFORE\", \"pivot\", \"val\")\n\n\ndef test_linsert_no_pivot(r: redis.Redis):\n    r.rpush(\"foo\", \"hello\")\n    r.rpush(\"foo\", \"world\")\n    assert r.linsert(\"foo\", \"after\", \"goodbye\", \"bar\") == -1\n    assert r.lrange(\"foo\", 0, -1) == [b\"hello\", b\"world\"]\n\n\ndef test_linsert_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.linsert(\"foo\", \"after\", \"bar\", \"element\")\n\n\ndef test_rpoplpush(r: redis.Redis):\n    assert r.rpoplpush(\"foo\", \"bar\") is None\n    assert r.lpop(\"bar\") is None\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    r.rpush(\"bar\", \"one\")\n\n    assert r.rpoplpush(\"foo\", \"bar\") == b\"two\"\n    assert r.lrange(\"foo\", 0, -1) == [b\"one\"]\n    assert r.lrange(\"bar\", 0, -1) == [b\"two\", b\"one\"]\n\n    # Catch instances where we store bytes and strings inconsistently\n    # and thus bar = ['two', b'one']\n    assert r.lrem(\"bar\", -1, \"two\") == 1\n\n\ndef test_rpoplpush_to_nonexistent_destination(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    assert r.rpoplpush(\"foo\", \"bar\") == b\"one\"\n    assert r.rpop(\"bar\") == b\"one\"\n\n\ndef test_rpoplpush_expiry(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"bar\", \"two\")\n    r.expire(\"bar\", 10)\n    r.rpoplpush(\"foo\", \"bar\")\n    assert r.ttl(\"bar\") > 0\n\n\ndef test_rpoplpush_one_to_self(r: redis.Redis):\n    r.rpush(\"list\", \"element\")\n    assert r.brpoplpush(\"list\", \"list\") == b\"element\"\n    assert r.lrange(\"list\", 0, -1) == [b\"element\"]\n\n\ndef test_rpoplpush_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    r.rpush(\"list\", \"element\")\n    with pytest.raises(redis.ResponseError):\n        r.rpoplpush(\"foo\", \"list\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.lrange(\"list\", 0, -1) == [b\"element\"]\n    with pytest.raises(redis.ResponseError):\n        r.rpoplpush(\"list\", \"foo\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.lrange(\"list\", 0, -1) == [b\"element\"]\n\n\ndef test_blpop_single_list(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    r.rpush(\"foo\", \"three\")\n    assert r.blpop([\"foo\"], timeout=1) == (b\"foo\", b\"one\")\n\n\ndef test_blpop_test_multiple_lists(r: redis.Redis):\n    r.rpush(\"baz\", \"zero\")\n    assert r.blpop([\"foo\", \"baz\"], timeout=1) == (b\"baz\", b\"zero\")\n    assert not r.exists(\"baz\")\n\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    # bar has nothing, so the returned value should come\n    # from foo.\n    assert r.blpop([\"bar\", \"foo\"], timeout=1) == (b\"foo\", b\"one\")\n    r.rpush(\"bar\", \"three\")\n    # bar now has something, so the returned value should come\n    # from bar.\n    assert r.blpop([\"bar\", \"foo\"], timeout=1) == (b\"bar\", b\"three\")\n    assert r.blpop([\"bar\", \"foo\"], timeout=1) == (b\"foo\", b\"two\")\n\n\ndef test_blpop_allow_single_key(r: redis.Redis):\n    # blpop converts single key arguments to a one element list.\n    r.rpush(\"foo\", \"one\")\n    assert r.blpop(\"foo\", timeout=1) == (b\"foo\", b\"one\")\n\n\n@pytest.mark.slow\ndef test_blpop_block(r: redis.Redis):\n    thread = _push_thread(r)\n    try:\n        assert r.blpop(\"foo\") == (b\"foo\", b\"value1\")\n        assert r.blpop(\"foo\", timeout=5) == (b\"foo\", b\"value2\")\n    finally:\n        thread.join()\n\n\n@pytest.mark.slow\ndef test_blpop_block_float(r: redis.Redis):\n    thread = _push_thread(r)\n    try:\n        assert testtools.raw_command(r, \"blpop\", \"foo\", 0) == [b\"foo\", b\"value1\"]\n        assert testtools.raw_command(r, \"blpop\", \"foo\", 1.1) == [b\"foo\", b\"value2\"]\n    finally:\n        thread.join()\n\n\n@pytest.mark.slow\ndef test_brpop_block(r: redis.Redis):\n    thread = _push_thread(r)\n    try:\n        assert r.brpop(\"foo\") == (b\"foo\", b\"value1\")\n        assert r.brpop(\"foo\", timeout=5) == (b\"foo\", b\"value2\")\n    finally:\n        thread.join()\n\n\ndef test_blpop_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.blpop(\"foo\", timeout=1)\n\n\ndef test_blpop_transaction(r: redis.Redis):\n    p = r.pipeline()\n    p.multi()\n    p.blpop(\"missing\", timeout=1000)\n    result = p.execute()\n    # Blocking commands behave like non-blocking versions in transactions\n    assert result == [None]\n\n\ndef test_brpop_test_multiple_lists(r: redis.Redis):\n    r.rpush(\"baz\", \"zero\")\n    assert r.brpop([\"foo\", \"baz\"], timeout=1) == (b\"baz\", b\"zero\")\n    assert not r.exists(\"baz\")\n\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    assert r.brpop([\"bar\", \"foo\"], timeout=1) == (b\"foo\", b\"two\")\n\n\ndef test_brpop_single_key(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    assert r.brpop(\"foo\", timeout=1) == (b\"foo\", b\"two\")\n\n\ndef test_brpop_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.brpop(\"foo\", timeout=1)\n\n\ndef test_brpoplpush_multi_keys(r: redis.Redis):\n    assert r.lpop(\"bar\") is None\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    assert r.brpoplpush(\"foo\", \"bar\", timeout=1) == b\"two\"\n    assert r.lrange(\"bar\", 0, -1) == [b\"two\"]\n\n    # Catch instances where we store bytes and strings inconsistently\n    # and thus bar = ['two']\n    assert r.lrem(\"bar\", -1, \"two\") == 1\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")  # TODO Should this be supported?\ndef test_brpoplpush_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    r.rpush(\"list\", \"element\")\n    with pytest.raises(redis.ResponseError):\n        r.brpoplpush(\"foo\", \"list\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.lrange(\"list\", 0, -1) == [b\"element\"]\n    with pytest.raises(redis.ResponseError):\n        r.brpoplpush(\"list\", \"foo\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.lrange(\"list\", 0, -1) == [b\"element\"]\n\n\n@pytest.mark.slow\ndef test_blocking_operations_when_empty(r: redis.Redis):\n    assert r.blpop([\"foo\"], timeout=1) is None\n    assert r.blpop([\"bar\", \"foo\"], timeout=1) is None\n    assert r.brpop(\"foo\", timeout=1) is None\n    assert r.brpoplpush(\"foo\", \"bar\", timeout=1) is None\n\n\ndef test_empty_list(r: redis.Redis):\n    r.rpush(\"foo\", \"bar\")\n    r.rpop(\"foo\")\n    assert not r.exists(\"foo\")\n\n\ndef test_lmove_to_nonexistent_destination(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    assert r.lmove(\"foo\", \"bar\", \"RIGHT\", \"LEFT\") == b\"one\"\n    assert r.rpop(\"bar\") == b\"one\"\n\n\ndef test_lmove_expiry(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"bar\", \"two\")\n    r.expire(\"bar\", 10)\n    r.lmove(\"foo\", \"bar\", \"RIGHT\", \"LEFT\")\n    assert r.ttl(\"bar\") > 0\n\n\ndef test_lmove_wrong_type(r: redis.Redis):\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"bar\", \"two\")\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(r, \"LMOVE\", \"foo\", \"bar\", \"left\", \"NOT_LEFT_OR_RIGHT\")\n\n    r.set(\"foo\", \"bar\")\n    r.rpush(\"list\", \"element\")\n    with pytest.raises(redis.ResponseError):\n        r.lmove(\"foo\", \"list\", \"RIGHT\", \"LEFT\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.lrange(\"list\", 0, -1) == [b\"element\"]\n    with pytest.raises(redis.ResponseError):\n        r.lmove(\"list\", \"foo\", \"RIGHT\", \"LEFT\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.lrange(\"list\", 0, -1) == [b\"element\"]\n\n\ndef test_lmove(r: redis.Redis):\n    assert r.lmove(\"foo\", \"bar\", \"RIGHT\", \"LEFT\") is None\n    assert r.lpop(\"bar\") is None\n    r.rpush(\"foo\", \"one\")\n    r.rpush(\"foo\", \"two\")\n    r.rpush(\"bar\", \"one\")\n\n    # RPOPLPUSH\n    assert r.lmove(\"foo\", \"bar\", \"RIGHT\", \"LEFT\") == b\"two\"\n    assert r.lrange(\"foo\", 0, -1) == [b\"one\"]\n    assert r.lrange(\"bar\", 0, -1) == [b\"two\", b\"one\"]\n    # LPOPRPUSH\n    assert r.lmove(\"bar\", \"bar\", \"LEFT\", \"RIGHT\") == b\"two\"\n    assert r.lrange(\"bar\", 0, -1) == [b\"one\", b\"two\"]\n    # RPOPRPUSH\n    r.rpush(\"foo\", \"three\")\n    assert r.lmove(\"foo\", \"bar\", \"RIGHT\", \"RIGHT\") == b\"three\"\n    assert r.lrange(\"foo\", 0, -1) == [b\"one\"]\n    assert r.lrange(\"bar\", 0, -1) == [b\"one\", b\"two\", b\"three\"]\n    # LPOPLPUSH\n    assert r.lmove(\"bar\", \"foo\", \"LEFT\", \"LEFT\") == b\"one\"\n    assert r.lrange(\"foo\", 0, -1) == [b\"one\", b\"one\"]\n    assert r.lrange(\"bar\", 0, -1) == [b\"two\", b\"three\"]\n\n    # Catch instances where we store bytes and strings inconsistently\n    # and thus bar = ['two', b'one']\n    assert r.lrem(\"bar\", -1, \"two\") == 1\n\n\ndef test_blmove(r: redis.Redis):\n    r.rpush(\"a\", \"one\", \"two\", \"three\", \"four\")\n    assert r.blmove(\"a\", \"b\", 5)\n    assert r.blmove(\"a\", \"b\", 1, \"RIGHT\", \"LEFT\")\n\n\ndef test_lpos(r: redis.Redis):\n    assert r.rpush(\"a\", \"a\", \"b\", \"c\", \"1\", \"2\", \"3\", \"c\", \"c\") == 8\n    assert r.lpos(\"a\", \"a\") == 0\n    assert r.lpos(\"a\", \"c\") == 2\n\n    assert r.lpos(\"a\", \"c\", rank=1) == 2\n    assert r.lpos(\"a\", \"c\", rank=2) == 6\n    assert r.lpos(\"a\", \"c\", rank=4) is None\n    assert r.lpos(\"a\", \"c\", rank=-1) == 7\n    assert r.lpos(\"a\", \"c\", rank=-2) == 6\n\n    assert r.lpos(\"a\", \"c\", count=0) == [2, 6, 7]\n    assert r.lpos(\"a\", \"c\", count=1) == [2]\n    assert r.lpos(\"a\", \"c\", count=2) == [2, 6]\n    assert r.lpos(\"a\", \"c\", count=100) == [2, 6, 7]\n\n    assert r.lpos(\"a\", \"c\", count=0, rank=2) == [6, 7]\n    assert r.lpos(\"a\", \"c\", count=2, rank=-1) == [7, 6]\n\n    assert r.lpos(\"axxx\", \"c\", count=0, rank=2) == []\n    assert r.lpos(\"axxx\", \"c\") is None\n\n    assert r.lpos(\"a\", \"x\", count=2) == []\n    assert r.lpos(\"a\", \"x\") is None\n\n    assert r.lpos(\"a\", \"a\", count=0, maxlen=1) == [0]\n    assert r.lpos(\"a\", \"c\", count=0, maxlen=1) == []\n    assert r.lpos(\"a\", \"c\", count=0, maxlen=3) == [2]\n    assert r.lpos(\"a\", \"c\", count=0, maxlen=3, rank=-1) == [7, 6]\n    assert r.lpos(\"a\", \"c\", count=0, maxlen=7, rank=2) == [6]\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\n@pytest.mark.min_server(\"7\")\ndef test_blmpop(r: redis.Redis):\n    r.rpush(\"a\", \"1\", \"2\", \"3\", \"4\", \"5\")\n    res = [b\"a\", [b\"1\", b\"2\"]]\n    assert r.blmpop(1, \"2\", \"b\", \"a\", direction=\"LEFT\", count=2) == res\n    with pytest.raises(TypeError):\n        r.blmpop(1, \"2\", \"b\", \"a\", count=2)\n    r.rpush(\"b\", \"6\", \"7\", \"8\", \"9\")\n    assert r.blmpop(0, \"2\", \"b\", \"a\", direction=\"LEFT\") == [b\"b\", [b\"6\"]]\n    assert r.blmpop(1, \"2\", \"foo\", \"bar\", direction=\"RIGHT\") is None\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\n@pytest.mark.min_server(\"7\")\ndef test_lmpop(r: redis.Redis):\n    r.rpush(\"foo\", \"1\", \"2\", \"3\", \"4\", \"5\")\n    result = [b\"foo\", [b\"1\", b\"2\"]]\n    assert r.lmpop(\"2\", \"bar\", \"foo\", direction=\"LEFT\", count=2) == result\n    with pytest.raises(redis.ResponseError):\n        r.lmpop(\"2\", \"bar\", \"foo\", direction=\"up\", count=2)\n    r.rpush(\"bar\", \"a\", \"b\", \"c\", \"d\")\n    assert r.lmpop(\"2\", \"bar\", \"foo\", direction=\"LEFT\") == [b\"bar\", [b\"a\"]]\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_pubsub_commands.py",
    "content": "import threading\nimport time\nimport uuid\nfrom queue import Queue\nfrom time import sleep\nfrom typing import Optional, Dict, Any\n\nimport pytest\nimport redis\nfrom redis.client import PubSub\n\nfrom .. import testtools\n\n\ndef wait_for_message(\n    pubsub: PubSub, timeout=0.5, ignore_subscribe_messages=False\n) -> Optional[Dict[str, Any]]:\n    now = time.time()\n    timeout = now + timeout\n    while now < timeout:\n        message = pubsub.get_message(\n            ignore_subscribe_messages=ignore_subscribe_messages\n        )\n        if message is not None:\n            return message\n        time.sleep(0.01)\n        now = time.time()\n    return None\n\n\ndef make_message(_type, channel, data, pattern=None):\n    return {\n        \"type\": _type,\n        \"pattern\": pattern and pattern.encode(\"utf-8\") or None,\n        \"channel\": channel and channel.encode(\"utf-8\") or None,\n        \"data\": data.encode(\"utf-8\") if isinstance(data, str) else data,\n    }\n\n\ndef test_ping_pubsub(r: redis.Redis):\n    p = r.pubsub()\n    p.subscribe(\"channel\")\n    p.parse_response()  # Consume the subscribe command reply\n    p.ping()\n    assert p.parse_response() == [b\"pong\", b\"\"]\n    p.ping(\"test\")\n    assert p.parse_response() == [b\"pong\", b\"test\"]\n\n\n@pytest.mark.slow\ndef test_pubsub_subscribe(r: redis.Redis):\n    pubsub = r.pubsub()\n    pubsub.subscribe(\"channel\")\n    sleep(1)\n    expected_message = {\n        \"type\": \"subscribe\",\n        \"pattern\": None,\n        \"channel\": b\"channel\",\n        \"data\": 1,\n    }\n    message = pubsub.get_message()\n    keys = list(pubsub.channels.keys())\n\n    key = keys[0]\n    key = key if type(key) is bytes else bytes(key, encoding=\"utf-8\")\n\n    assert len(keys) == 1\n    assert key == b\"channel\"\n    assert message == expected_message\n\n\n@pytest.mark.slow\ndef test_pubsub_numpat(r: redis.Redis):\n    p = r.pubsub()\n    p.psubscribe(\"*oo\", \"*ar\", \"b*z\")\n    for i in range(3):\n        assert wait_for_message(p)[\"type\"] == \"psubscribe\"\n    assert r.pubsub_numpat() == 3\n\n\n@pytest.mark.slow\ndef test_pubsub_psubscribe(r: redis.Redis):\n    pubsub = r.pubsub()\n    pubsub.psubscribe(\"channel.*\")\n    sleep(1)\n    expected_message = {\n        \"type\": \"psubscribe\",\n        \"pattern\": None,\n        \"channel\": b\"channel.*\",\n        \"data\": 1,\n    }\n\n    message = pubsub.get_message()\n    keys = list(pubsub.patterns.keys())\n    assert len(keys) == 1\n    assert message == expected_message\n\n\n@pytest.mark.slow\ndef test_pubsub_unsubscribe(r: redis.Redis):\n    pubsub = r.pubsub()\n    pubsub.subscribe(\"channel-1\", \"channel-2\", \"channel-3\")\n    sleep(1)\n    expected_message = {\n        \"type\": \"unsubscribe\",\n        \"pattern\": None,\n        \"channel\": b\"channel-1\",\n        \"data\": 2,\n    }\n    pubsub.get_message()\n    pubsub.get_message()\n    pubsub.get_message()\n\n    # unsubscribe from one\n    pubsub.unsubscribe(\"channel-1\")\n    sleep(1)\n    message = pubsub.get_message()\n    keys = list(pubsub.channels.keys())\n    assert message == expected_message\n    assert len(keys) == 2\n\n    # unsubscribe from multiple\n    pubsub.unsubscribe()\n    sleep(1)\n    pubsub.get_message()\n    pubsub.get_message()\n    keys = list(pubsub.channels.keys())\n    assert message == expected_message\n    assert len(keys) == 0\n\n\n@pytest.mark.slow\ndef test_pubsub_punsubscribe(r: redis.Redis):\n    pubsub = r.pubsub()\n    pubsub.psubscribe(\"channel-1.*\", \"channel-2.*\", \"channel-3.*\")\n    sleep(1)\n    expected_message = {\n        \"type\": \"punsubscribe\",\n        \"pattern\": None,\n        \"channel\": b\"channel-1.*\",\n        \"data\": 2,\n    }\n    pubsub.get_message()\n    pubsub.get_message()\n    pubsub.get_message()\n\n    # unsubscribe from one\n    pubsub.punsubscribe(\"channel-1.*\")\n    sleep(1)\n    message = pubsub.get_message()\n    keys = list(pubsub.patterns.keys())\n    assert message == expected_message\n    assert len(keys) == 2\n\n    # unsubscribe from multiple\n    pubsub.punsubscribe()\n    sleep(1)\n    pubsub.get_message()\n    pubsub.get_message()\n    keys = list(pubsub.patterns.keys())\n    assert len(keys) == 0\n\n\n@pytest.mark.slow\ndef test_pubsub_listen(r: redis.Redis):\n    def _listen(pubsub, q):\n        count = 0\n        for message in pubsub.listen():\n            q.put(message)\n            count += 1\n            if count == 4:\n                pubsub.close()\n\n    channel = \"ch1\"\n    patterns = [\"ch1*\", \"ch[1]\", \"ch?\"]\n    pubsub = r.pubsub()\n    pubsub.subscribe(channel)\n    pubsub.psubscribe(*patterns)\n    sleep(1)\n    msgs = [pubsub.get_message() for _ in range(4)]\n    assert msgs[0][\"type\"] == \"subscribe\"\n    for i in range(1, 4):\n        assert msgs[i][\"type\"] == \"psubscribe\"\n\n    q = Queue()\n    t = threading.Thread(target=_listen, args=(pubsub, q))\n    t.start()\n    msg = \"hello world\"\n    r.publish(channel, msg)\n    t.join()\n\n    msgs = [q.get() for _ in range(4)]\n\n    bpatterns = [pattern.encode() for pattern in patterns]\n    bpatterns.append(channel.encode())\n    msg = msg.encode()\n    for item in msgs:\n        assert item[\"data\"] == msg\n        assert item[\"channel\"] in bpatterns\n\n\n@pytest.mark.slow\ndef test_pubsub_listen_handler(r: redis.Redis):\n    def _handler(message):\n        calls.append(message)\n\n    channel = \"ch1\"\n    patterns = {\"ch?\": _handler}\n    calls = []\n\n    pubsub = r.pubsub()\n    pubsub.subscribe(ch1=_handler)\n    pubsub.psubscribe(**patterns)\n    sleep(1)\n    msg1 = pubsub.get_message()\n    msg2 = pubsub.get_message()\n    assert msg1[\"type\"] == \"subscribe\"\n    assert msg2[\"type\"] == \"psubscribe\"\n    msg = \"hello world\"\n    r.publish(channel, msg)\n    sleep(1)\n    for i in range(2):\n        msg = pubsub.get_message()\n        assert msg is None  # get_message returns None when handler is used\n    pubsub.close()\n    calls.sort(key=lambda call: call[\"type\"])\n    assert calls == [\n        {\"pattern\": None, \"channel\": b\"ch1\", \"data\": b\"hello world\", \"type\": \"message\"},\n        {\n            \"pattern\": b\"ch?\",\n            \"channel\": b\"ch1\",\n            \"data\": b\"hello world\",\n            \"type\": \"pmessage\",\n        },\n    ]\n\n\n@pytest.mark.slow\ndef test_pubsub_ignore_sub_messages_listen(r: redis.Redis):\n    def _listen(pubsub, q):\n        count = 0\n        for message in pubsub.listen():\n            q.put(message)\n            count += 1\n            if count == 4:\n                pubsub.close()\n\n    channel = \"ch1\"\n    patterns = [\"ch1*\", \"ch[1]\", \"ch?\"]\n    pubsub = r.pubsub(ignore_subscribe_messages=True)\n    pubsub.subscribe(channel)\n    pubsub.psubscribe(*patterns)\n    sleep(1)\n\n    q = Queue()\n    t = threading.Thread(target=_listen, args=(pubsub, q))\n    t.start()\n    msg = \"hello world\"\n    r.publish(channel, msg)\n    t.join()\n\n    msg1 = q.get()\n    msg2 = q.get()\n    msg3 = q.get()\n    msg4 = q.get()\n\n    bpatterns = [pattern.encode() for pattern in patterns]\n    bpatterns.append(channel.encode())\n    msg = msg.encode()\n    assert msg1[\"data\"] == msg\n    assert msg1[\"channel\"] in bpatterns\n    assert msg2[\"data\"] == msg\n    assert msg2[\"channel\"] in bpatterns\n    assert msg3[\"data\"] == msg\n    assert msg3[\"channel\"] in bpatterns\n    assert msg4[\"data\"] == msg\n    assert msg4[\"channel\"] in bpatterns\n\n\n@pytest.mark.slow\ndef test_pubsub_binary(r: redis.Redis):\n    def _listen(pubsub, q):\n        for message in pubsub.listen():\n            q.put(message)\n            pubsub.close()\n\n    pubsub = r.pubsub(ignore_subscribe_messages=True)\n    pubsub.subscribe(\"channel\\r\\n\\xff\")\n    sleep(1)\n\n    q = Queue()\n    t = threading.Thread(target=_listen, args=(pubsub, q))\n    t.start()\n    msg = b\"\\x00hello world\\r\\n\\xff\"\n    r.publish(\"channel\\r\\n\\xff\", msg)\n    t.join()\n\n    received = q.get()\n    assert received[\"data\"] == msg\n\n\n@pytest.mark.slow\ndef test_pubsub_run_in_thread(r: redis.Redis):\n    q = Queue()\n\n    pubsub = r.pubsub()\n    pubsub.subscribe(channel=q.put)\n    pubsub_thread = pubsub.run_in_thread()\n\n    msg = b\"Hello World\"\n    r.publish(\"channel\", msg)\n\n    retrieved = q.get()\n    assert retrieved[\"data\"] == msg\n\n    pubsub_thread.stop()\n    # Newer versions of redis wait for an unsubscribe message, which sometimes comes early\n    # https://github.com/andymccurdy/redis-py/issues/1150\n    if pubsub.channels:\n        pubsub.channels = {}\n    pubsub_thread.join()\n    assert not pubsub_thread.is_alive()\n\n    pubsub.subscribe(channel=None)\n    with pytest.raises(redis.exceptions.PubSubError):\n        pubsub_thread = pubsub.run_in_thread()\n\n    pubsub.unsubscribe(\"channel\")\n\n    pubsub.psubscribe(channel=None)\n    with pytest.raises(redis.exceptions.PubSubError):\n        pubsub_thread = pubsub.run_in_thread()\n\n\n@pytest.mark.slow\n@pytest.mark.parametrize(\n    \"timeout_value\",\n    [1, pytest.param(None, marks=testtools.run_test_if_redispy_ver(\"gte\", \"3.2\"))],\n)\ndef test_pubsub_timeout(r, timeout_value):\n    def publish():\n        sleep(0.1)\n        r.publish(\"channel\", \"hello\")\n\n    p = r.pubsub()\n    p.subscribe(\"channel\")\n    p.parse_response()  # Drains the subscribe command message\n    publish_thread = threading.Thread(target=publish)\n    publish_thread.start()\n    message = p.get_message(timeout=timeout_value)\n    assert message == {\n        \"type\": \"message\",\n        \"pattern\": None,\n        \"channel\": b\"channel\",\n        \"data\": b\"hello\",\n    }\n    publish_thread.join()\n\n    if timeout_value is not None:\n        # For infinite timeout case don't wait for the message that will never appear.\n        message = p.get_message(timeout=timeout_value)\n        assert message is None\n\n\ndef test_pubsub_channels(r: redis.Redis):\n    p = r.pubsub()\n    p.subscribe(\"foo\", \"bar\", \"baz\", \"test\")\n    expected = {b\"foo\", b\"bar\", b\"baz\", b\"test\"}\n    assert set(r.pubsub_channels()) == expected\n\n\ndef test_pubsub_channels_pattern(r: redis.Redis):\n    p = r.pubsub()\n    p.subscribe(\"foo\", \"bar\", \"baz\", \"test\")\n    assert set(r.pubsub_channels(\"b*\")) == {\n        b\"bar\",\n        b\"baz\",\n    }\n\n\ndef test_pubsub_no_subcommands(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(r, \"PUBSUB\")\n\n\n@pytest.mark.min_server(\"7\")\n@pytest.mark.max_server(\"7\")\ndef test_pubsub_help_redis7(r: redis.Redis):\n    assert testtools.raw_command(r, \"PUBSUB HELP\") == [\n        b\"PUBSUB <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n        b\"CHANNELS [<pattern>]\",\n        b\"    Return the currently active channels matching a <pattern> (default: '*')\"\n        b\".\",\n        b\"NUMPAT\",\n        b\"    Return number of subscriptions to patterns.\",\n        b\"NUMSUB [<channel> ...]\",\n        b\"    Return the number of subscribers for the specified channels, excluding\",\n        b\"    pattern subscriptions(default: no channels).\",\n        b\"SHARDCHANNELS [<pattern>]\",\n        b\"    Return the currently active shard level channels matching a <pattern> (d\"\n        b\"efault: '*').\",\n        b\"SHARDNUMSUB [<shardchannel> ...]\",\n        b\"    Return the number of subscribers for the specified shard level channel(s\"\n        b\")\",\n        b\"HELP\",\n        b\"    Prints this help.\",\n    ]\n\n\n@pytest.mark.min_server(\"7.1\")\ndef test_pubsub_help_redis71(r: redis.Redis):\n    assert testtools.raw_command(r, \"PUBSUB HELP\") == [\n        b\"PUBSUB <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n        b\"CHANNELS [<pattern>]\",\n        b\"    Return the currently active channels matching a <pattern> (default: '*')\"\n        b\".\",\n        b\"NUMPAT\",\n        b\"    Return number of subscriptions to patterns.\",\n        b\"NUMSUB [<channel> ...]\",\n        b\"    Return the number of subscribers for the specified channels, excluding\",\n        b\"    pattern subscriptions(default: no channels).\",\n        b\"SHARDCHANNELS [<pattern>]\",\n        b\"    Return the currently active shard level channels matching a <pattern> (d\"\n        b\"efault: '*').\",\n        b\"SHARDNUMSUB [<shardchannel> ...]\",\n        b\"    Return the number of subscribers for the specified shard level channel(s\"\n        b\")\",\n        b\"HELP\",\n        b\"    Print this help.\",\n    ]\n\n\ndef test_pubsub_numsub(r: redis.Redis):\n    a = uuid.uuid4().hex\n    b = uuid.uuid4().hex\n    c = uuid.uuid4().hex\n    p1 = r.pubsub()\n    p2 = r.pubsub()\n\n    p1.subscribe(a, b, c)\n    p2.subscribe(a, b)\n\n    assert r.pubsub_numsub(a, b, c) == [\n        (a.encode(), 2),\n        (b.encode(), 2),\n        (c.encode(), 1),\n    ]\n    assert r.pubsub_numsub() == []\n    assert r.pubsub_numsub(a, \"non-existing\") == [(a.encode(), 2), (b\"non-existing\", 0)]\n\n\n@pytest.mark.min_server(\"7\")\n@testtools.run_test_if_redispy_ver(\"gte\", \"5.0.0rc2\")\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_published_message_to_shard_channel(r: redis.Redis):\n    p = r.pubsub()\n    p.ssubscribe(\"foo\")\n    assert wait_for_message(p) == make_message(\"ssubscribe\", \"foo\", 1)\n    assert r.spublish(\"foo\", \"test message\") == 1\n\n    message = wait_for_message(p)\n    assert isinstance(message, dict)\n    assert message == make_message(\"smessage\", \"foo\", \"test message\")\n\n\n@pytest.mark.min_server(\"7\")\n@testtools.run_test_if_redispy_ver(\"gte\", \"5.0.0\")\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_subscribe_property_with_shard_channels_cluster(r: redis.Redis):\n    p = r.pubsub()\n    keys = [\"foo\", \"bar\", \"uni\" + chr(4456) + \"code\"]\n    assert p.subscribed is False\n    p.ssubscribe(keys[0])\n    # we're now subscribed even though we haven't processed the reply from the server just yet\n    assert p.subscribed is True\n    assert wait_for_message(p) == make_message(\"ssubscribe\", keys[0], 1)\n    # we're still subscribed\n    assert p.subscribed is True\n\n    # unsubscribe from all shard_channels\n    p.sunsubscribe()\n    # we're still technically subscribed until we process the response messages from the server\n    assert p.subscribed is True\n    assert wait_for_message(p) == make_message(\"sunsubscribe\", keys[0], 0)\n    # now we're no longer subscribed as no more messages can be delivered to any channels we were listening to\n    assert p.subscribed is False\n\n    # subscribing again flips the flag back\n    p.ssubscribe(keys[0])\n    assert p.subscribed is True\n    assert wait_for_message(p) == make_message(\"ssubscribe\", keys[0], 1)\n\n    # unsubscribe again\n    p.sunsubscribe()\n    assert p.subscribed is True\n    # subscribe to another shard_channel before reading the unsubscribe response\n    p.ssubscribe(keys[1])\n    assert p.subscribed is True\n    # read the unsubscribe for key1\n    assert wait_for_message(p) == make_message(\"sunsubscribe\", keys[0], 0)\n    # we're still subscribed to key2, so subscribed should still be True\n    assert p.subscribed is True\n    # read the key2 subscribe message\n    assert wait_for_message(p) == make_message(\"ssubscribe\", keys[1], 1)\n    p.sunsubscribe()\n    # haven't read the message yet, so we're still subscribed\n    assert p.subscribed is True\n    assert wait_for_message(p) == make_message(\"sunsubscribe\", keys[1], 0)\n    # now we're finally unsubscribed\n    assert p.subscribed is False\n\n\n@pytest.mark.min_server(\"7\")\n@testtools.run_test_if_redispy_ver(\"gte\", \"5.0.0\")\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_pubsub_shardnumsub(r: redis.Redis):\n    channels = {b\"foo\", b\"bar\", b\"baz\"}\n    p1 = r.pubsub()\n    p1.ssubscribe(*channels)\n    for node in channels:\n        assert wait_for_message(p1)[\"type\"] == \"ssubscribe\"\n    p2 = r.pubsub()\n    p2.ssubscribe(\"bar\", \"baz\")\n    for i in range(2):\n        assert wait_for_message(p2)[\"type\"] == \"ssubscribe\"\n    p3 = r.pubsub()\n    p3.ssubscribe(\"baz\")\n    assert wait_for_message(p3)[\"type\"] == \"ssubscribe\"\n\n    channels = [(b\"foo\", 1), (b\"bar\", 2), (b\"baz\", 3)]\n    assert r.pubsub_shardnumsub(\"foo\", \"bar\", \"baz\", target_nodes=\"all\") == channels\n\n\n@pytest.mark.min_server(\"7\")\n@testtools.run_test_if_redispy_ver(\"gte\", \"5.0.0rc2\")\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_pubsub_shardchannels(r: redis.Redis):\n    p = r.pubsub()\n    p.ssubscribe(\"foo\", \"bar\", \"baz\", \"quux\")\n    for i in range(4):\n        assert wait_for_message(p)[\"type\"] == \"ssubscribe\"\n    expected = [b\"bar\", b\"baz\", b\"foo\", b\"quux\"]\n    assert all([channel in r.pubsub_shardchannels() for channel in expected])\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_scan.py",
    "content": "from time import sleep\n\nimport pytest\nimport redis\n\nfrom test.testtools import key_val_dict\n\n\ndef test_sscan_delete_key_while_scanning_should_not_returns_it_in_scan(r: redis.Redis):\n    size = 600\n    name = \"sscan-test\"\n    all_keys_set = {f\"{i}\".encode() for i in range(size)}\n    r.sadd(name, *[k for k in all_keys_set])\n    assert r.scard(name) == size\n\n    cursor, keys = r.sscan(name, 0)\n    assert len(keys) < len(all_keys_set)\n\n    key_to_remove = next(x for x in all_keys_set if x not in keys)\n    assert r.srem(name, key_to_remove) == 1\n    assert not r.sismember(name, key_to_remove)\n    while cursor != 0:\n        cursor, data = r.sscan(name, cursor=cursor)\n        keys.extend(data)\n    assert len(set(keys)) == len(keys)\n    assert len(keys) == size - 1\n    assert key_to_remove not in keys\n\n\ndef test_hscan_delete_key_while_scanning_should_not_returns_it_in_scan(r: redis.Redis):\n    size = 600\n    name = \"hscan-test\"\n    all_keys_dict = key_val_dict(size=size)\n    r.hset(name, mapping=all_keys_dict)\n    assert len(r.hgetall(name)) == size\n\n    cursor, keys = r.hscan(name, 0)\n    assert len(keys) < len(all_keys_dict)\n\n    key_to_remove = next(x for x in all_keys_dict if x not in keys)\n    assert r.hdel(name, key_to_remove) == 1\n    assert r.hget(name, key_to_remove) is None\n    while cursor != 0:\n        cursor, data = r.hscan(name, cursor=cursor)\n        keys.update(data)\n    assert len(set(keys)) == len(keys)\n    assert len(keys) == size - 1\n    assert key_to_remove not in keys\n\n\ndef test_scan_delete_unseen_key_while_scanning_should_not_returns_it_in_scan(\n    r: redis.Redis,\n):\n    size = 30\n    all_keys_dict = key_val_dict(size=size)\n    assert all(r.set(k, v) for k, v in all_keys_dict.items())\n    assert len(r.keys()) == size\n\n    cursor, keys = r.scan()\n\n    key_to_remove = next(x for x in all_keys_dict if x not in keys)\n    assert r.delete(key_to_remove) == 1\n    assert r.get(key_to_remove) is None\n    while cursor != 0:\n        cursor, data = r.scan(cursor=cursor)\n        keys.extend(data)\n    assert len(set(keys)) == len(keys)\n    assert len(keys) == size - 1\n    assert key_to_remove not in keys\n\n\n# @pytest.mark.xfail # todo\n# def test_scan_delete_seen_key_while_scanning_should_return_all_keys(r: redis.Redis):\n#     size = 30\n#     all_keys_dict = key_val_dict(size=size)\n#     assert all(r.set(k, v) for k, v in all_keys_dict.items())\n#     assert len(r.keys()) == size\n#\n#     cursor, keys = r.scan()\n#\n#     key_to_remove = keys[0]\n#     assert r.delete(keys[0]) == 1\n#     assert r.get(key_to_remove) is None\n#     while cursor != 0:\n#         cursor, data = r.scan(cursor=cursor)\n#         keys.extend(data)\n#\n#     assert len(set(keys)) == len(keys)\n#     keys = set(keys)\n#     assert len(keys) == size, f\"{set(all_keys_dict).difference(keys)} is not empty but should be\"\n#     assert key_to_remove in keys\n\n\ndef test_scan_add_key_while_scanning_should_return_all_keys(r: redis.Redis):\n    size = 30\n    all_keys_dict = key_val_dict(size=size)\n    assert all(r.set(k, v) for k, v in all_keys_dict.items())\n    assert len(r.keys()) == size\n\n    cursor, keys = r.scan()\n\n    r.set(\"new_key\", \"new val\")\n    while cursor != 0:\n        cursor, data = r.scan(cursor=cursor)\n        keys.extend(data)\n\n    keys = set(keys)\n    assert (\n        len(keys) >= size\n    ), f\"{set(all_keys_dict).difference(keys)} is not empty but should be\"\n\n\ndef test_scan(r: redis.Redis):\n    # Set up the data\n    for ix in range(20):\n        k = \"scan-test:%s\" % ix\n        v = \"result:%s\" % ix\n        r.set(k, v)\n    expected = r.keys()\n    assert len(expected) == 20  # Ensure we know what we're testing\n\n    # Test that we page through the results and get everything out\n    results = []\n    cursor = \"0\"\n    while cursor != 0:\n        cursor, data = r.scan(cursor, count=6)\n        results.extend(data)\n    assert set(expected) == set(results)\n\n    # Now test that the MATCH functionality works\n    results = []\n    cursor = \"0\"\n    while cursor != 0:\n        cursor, data = r.scan(cursor, match=\"*7\", count=100)\n        results.extend(data)\n    assert b\"scan-test:7\" in results\n    assert b\"scan-test:17\" in results\n    assert len(set(results)) == 2\n\n    # Test the match on iterator\n    results = [r for r in r.scan_iter(match=\"*7\")]\n    assert b\"scan-test:7\" in results\n    assert b\"scan-test:17\" in results\n    assert len(set(results)) == 2\n\n\ndef test_scan_single(r: redis.Redis):\n    r.set(\"foo1\", \"bar1\")\n    assert r.scan(match=\"foo*\") == (0, [b\"foo1\"])\n\n\ndef test_scan_iter_single_page(r: redis.Redis):\n    r.set(\"foo1\", \"bar1\")\n    r.set(\"foo2\", \"bar2\")\n    assert set(r.scan_iter(match=\"foo*\")) == {b\"foo1\", b\"foo2\"}\n    assert set(r.scan_iter()) == {b\"foo1\", b\"foo2\"}\n    assert set(r.scan_iter(match=\"\")) == set()\n    assert set(r.scan_iter(match=\"foo1\", _type=\"string\")) == {\n        b\"foo1\",\n    }\n\n\ndef test_scan_iter_multiple_pages(r: redis.Redis):\n    all_keys = key_val_dict(size=100)\n    assert all(r.set(k, v) for k, v in all_keys.items())\n    assert set(r.scan_iter()) == set(all_keys)\n\n\ndef test_scan_iter_multiple_pages_with_match(r: redis.Redis):\n    all_keys = key_val_dict(size=100)\n    assert all(r.set(k, v) for k, v in all_keys.items())\n    # Now add a few keys that don't match the key:<number> pattern.\n    r.set(\"otherkey\", \"foo\")\n    r.set(\"andanother\", \"bar\")\n    actual = set(r.scan_iter(match=\"key:*\"))\n    assert actual == set(all_keys)\n\n\ndef test_scan_multiple_pages_with_count_arg(r: redis.Redis):\n    all_keys = key_val_dict(size=100)\n    assert all(r.set(k, v) for k, v in all_keys.items())\n    assert set(r.scan_iter(count=1000)) == set(all_keys)\n\n\ndef test_scan_all_in_single_call(r: redis.Redis):\n    all_keys = key_val_dict(size=100)\n    assert all(r.set(k, v) for k, v in all_keys.items())\n    # Specify way more than the 100 keys we've added.\n    actual = r.scan(count=1000)\n    assert set(actual[1]) == set(all_keys)\n    assert actual[0] == 0\n\n\n@pytest.mark.slow\ndef test_scan_expired_key(r: redis.Redis):\n    r.set(\"expiringkey\", \"value\")\n    r.pexpire(\"expiringkey\", 1)\n    sleep(1)\n    assert r.scan()[1] == []\n\n\ndef test_scan_stream(r: redis.Redis):\n    r.xadd(\"mystream\", {\"test\": \"value\"})\n    assert r.type(\"mystream\") == b\"stream\"  # noqa: E721\n    for s in r.scan_iter(_type=\"STRING\"):\n        print(s)\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_scripting.py",
    "content": "from __future__ import annotations\n\nimport pytest\nimport redis\nimport redis.client\nfrom redis.exceptions import ResponseError\n\nfrom test.testtools import raw_command\n\njson_tests = pytest.importorskip(\"lupa\")\n\n\n@pytest.mark.min_server(\"7\")\ndef test_script_exists_redis7(r: redis.Redis):\n    # test response for no arguments by bypassing the py-redis command\n    # as it requires at least one argument\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"SCRIPT EXISTS\")\n\n    # use single character characters for non-existing scripts, as those\n    # will never be equal to an actual sha1 hash digest\n    assert r.script_exists(\"a\") == [0]\n    assert r.script_exists(\"a\", \"b\", \"c\", \"d\", \"e\", \"f\") == [0, 0, 0, 0, 0, 0]\n\n    sha1_one = r.script_load(\"return 'a'\")\n    assert r.script_exists(sha1_one) == [1]\n    assert r.script_exists(sha1_one, \"a\") == [1, 0]\n    assert r.script_exists(\"a\", \"b\", \"c\", sha1_one, \"e\") == [0, 0, 0, 1, 0]\n\n    sha1_two = r.script_load(\"return 'b'\")\n    assert r.script_exists(sha1_one, sha1_two) == [1, 1]\n    assert r.script_exists(\"a\", sha1_one, \"c\", sha1_two, \"e\", \"f\") == [0, 1, 0, 1, 0, 0]\n\n\n@pytest.mark.parametrize(\"args\", [(\"a\",), tuple(\"abcdefghijklmn\")])\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_script_flush_errors_with_args(r, args):\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"SCRIPT FLUSH %s\" % \" \".join(args))\n\n\ndef test_script_flush(r: redis.Redis):\n    # generate/load six unique scripts and store their sha1 hash values\n    sha1_values = [r.script_load(\"return '%s'\" % char) for char in \"abcdef\"]\n\n    # assert the scripts all exist prior to flushing\n    assert r.script_exists(*sha1_values) == [1] * len(sha1_values)\n\n    # flush and assert OK response\n    assert r.script_flush() is True\n\n    # assert none of the scripts exists after flushing\n    assert r.script_exists(*sha1_values) == [0] * len(sha1_values)\n\n\ndef test_script_no_subcommands(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"SCRIPT\")\n\n\n@pytest.mark.max_server(\"7\")\ndef test_script_help(r: redis.Redis):\n    assert raw_command(r, \"SCRIPT HELP\") == [\n        b\"SCRIPT <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n        b\"DEBUG (YES|SYNC|NO)\",\n        b\"    Set the debug mode for subsequent scripts executed.\",\n        b\"EXISTS <sha1> [<sha1> ...]\",\n        b\"    Return information about the existence of the scripts in the script cach\"\n        b\"e.\",\n        b\"FLUSH [ASYNC|SYNC]\",\n        b\"    Flush the Lua scripts cache. Very dangerous on replicas.\",\n        b\"    When called without the optional mode argument, the behavior is determin\"\n        b\"ed by the\",\n        b\"    lazyfree-lazy-user-flush configuration directive. Valid modes are:\",\n        b\"    * ASYNC: Asynchronously flush the scripts cache.\",\n        b\"    * SYNC: Synchronously flush the scripts cache.\",\n        b\"KILL\",\n        b\"    Kill the currently executing Lua script.\",\n        b\"LOAD <script>\",\n        b\"    Load a script into the scripts cache without executing it.\",\n        b\"HELP\",\n        b\"    Prints this help.\",\n    ]\n\n\n@pytest.mark.min_server(\"7.1\")\ndef test_script_help71(r: redis.Redis):\n    assert raw_command(r, \"SCRIPT HELP\") == [\n        b\"SCRIPT <subcommand> [<arg> [value] [opt] ...]. Subcommands are:\",\n        b\"DEBUG (YES|SYNC|NO)\",\n        b\"    Set the debug mode for subsequent scripts executed.\",\n        b\"EXISTS <sha1> [<sha1> ...]\",\n        b\"    Return information about the existence of the scripts in the script cach\"\n        b\"e.\",\n        b\"FLUSH [ASYNC|SYNC]\",\n        b\"    Flush the Lua scripts cache. Very dangerous on replicas.\",\n        b\"    When called without the optional mode argument, the behavior is determin\"\n        b\"ed by the\",\n        b\"    lazyfree-lazy-user-flush configuration directive. Valid modes are:\",\n        b\"    * ASYNC: Asynchronously flush the scripts cache.\",\n        b\"    * SYNC: Synchronously flush the scripts cache.\",\n        b\"KILL\",\n        b\"    Kill the currently executing Lua script.\",\n        b\"LOAD <script>\",\n        b\"    Load a script into the scripts cache without executing it.\",\n        b\"HELP\",\n        b\"    Print this help.\",\n    ]\n\n\n@pytest.mark.max_server(\"7.1\")\ndef test_eval_blpop(r: redis.Redis):\n    r.rpush(\"foo\", \"bar\")\n    with pytest.raises(\n        redis.ResponseError, match=\"This Redis command is not allowed from script\"\n    ):\n        r.eval('return redis.pcall(\"BLPOP\", KEYS[1], 1)', 1, \"foo\")\n\n\ndef test_eval_set_value_to_arg(r: redis.Redis):\n    r.eval('redis.call(\"SET\", KEYS[1], ARGV[1])', 1, \"foo\", \"bar\")\n    val = r.get(\"foo\")\n    assert val == b\"bar\"\n\n\ndef test_eval_conditional(r: redis.Redis):\n    lua = \"\"\"\n    local val = redis.call(\"GET\", KEYS[1])\n    if val == ARGV[1] then\n        redis.call(\"SET\", KEYS[1], ARGV[2])\n    else\n        redis.call(\"SET\", KEYS[1], ARGV[1])\n    end\n    \"\"\"\n    r.eval(lua, 1, \"foo\", \"bar\", \"baz\")\n    val = r.get(\"foo\")\n    assert val == b\"bar\"\n    r.eval(lua, 1, \"foo\", \"bar\", \"baz\")\n    val = r.get(\"foo\")\n    assert val == b\"baz\"\n\n\ndef test_eval_table(r: redis.Redis):\n    lua = \"\"\"\n    local a = {}\n    a[1] = \"foo\"\n    a[2] = \"bar\"\n    a[17] = \"baz\"\n    return a\n    \"\"\"\n    val = r.eval(lua, 0)\n    assert val == [b\"foo\", b\"bar\"]\n\n\ndef test_eval_table_with_nil(r: redis.Redis):\n    lua = \"\"\"\n    local a = {}\n    a[1] = \"foo\"\n    a[2] = nil\n    a[3] = \"bar\"\n    return a\n    \"\"\"\n    val = r.eval(lua, 0)\n    assert val == [b\"foo\"]\n\n\ndef test_eval_table_with_numbers(r: redis.Redis):\n    lua = \"\"\"\n    local a = {}\n    a[1] = 42\n    return a\n    \"\"\"\n    val = r.eval(lua, 0)\n    assert val == [42]\n\n\ndef test_eval_nested_table(r: redis.Redis):\n    lua = \"\"\"\n    local a = {}\n    a[1] = {}\n    a[1][1] = \"foo\"\n    return a\n    \"\"\"\n    val = r.eval(lua, 0)\n    assert val == [[b\"foo\"]]\n\n\ndef test_eval_iterate_over_argv(r: redis.Redis):\n    lua = \"\"\"\n    for i, v in ipairs(ARGV) do\n    end\n    return ARGV\n    \"\"\"\n    val = r.eval(lua, 0, \"a\", \"b\", \"c\")\n    assert val == [b\"a\", b\"b\", b\"c\"]\n\n\ndef test_eval_iterate_over_keys(r: redis.Redis):\n    lua = \"\"\"\n    for i, v in ipairs(KEYS) do\n    end\n    return KEYS\n    \"\"\"\n    val = r.eval(lua, 2, \"a\", \"b\", \"c\")\n    assert val == [b\"a\", b\"b\"]\n\n\ndef test_eval_mget(r: redis.Redis):\n    r.set(\"foo1\", \"bar1\")\n    r.set(\"foo2\", \"bar2\")\n    val = r.eval('return redis.call(\"mget\", \"foo1\", \"foo2\")', 2, \"foo1\", \"foo2\")\n    assert val == [b\"bar1\", b\"bar2\"]\n\n\ndef test_eval_mget_not_set(r: redis.Redis):\n    val = r.eval('return redis.call(\"mget\", \"foo1\", \"foo2\")', 2, \"foo1\", \"foo2\")\n    assert val == [None, None]\n\n\ndef test_eval_hgetall(r: redis.Redis):\n    r.hset(\"foo\", \"k1\", \"bar\")\n    r.hset(\"foo\", \"k2\", \"baz\")\n    val = r.eval('return redis.call(\"hgetall\", \"foo\")', 1, \"foo\")\n    sorted_val = sorted([val[:2], val[2:]])\n    assert sorted_val == [[b\"k1\", b\"bar\"], [b\"k2\", b\"baz\"]]\n\n\ndef test_eval_hgetall_iterate(r: redis.Redis):\n    r.hset(\"foo\", \"k1\", \"bar\")\n    r.hset(\"foo\", \"k2\", \"baz\")\n    lua = \"\"\"\n    local result = redis.call(\"hgetall\", \"foo\")\n    for i, v in ipairs(result) do\n    end\n    return result\n    \"\"\"\n    val = r.eval(lua, 1, \"foo\")\n    sorted_val = sorted([val[:2], val[2:]])\n    assert sorted_val == [[b\"k1\", b\"bar\"], [b\"k2\", b\"baz\"]]\n\n\ndef test_eval_invalid_command(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.eval('return redis.call(\"FOO\")', 0)\n\n\ndef test_eval_syntax_error(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.eval('return \"', 0)\n\n\ndef test_eval_runtime_error(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.eval('error(\"CRASH\")', 0)\n\n\ndef test_eval_more_keys_than_args(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.eval(\"return 1\", 42)\n\n\ndef test_eval_numkeys_float_string(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.eval(\"return KEYS[1]\", \"0.7\", \"foo\")\n\n\ndef test_eval_numkeys_integer_string(r: redis.Redis):\n    val = r.eval(\"return KEYS[1]\", \"1\", \"foo\")\n    assert val == b\"foo\"\n\n\ndef test_eval_numkeys_negative(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.eval(\"return KEYS[1]\", -1, \"foo\")\n\n\ndef test_eval_numkeys_float(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.eval(\"return KEYS[1]\", 0.7, \"foo\")\n\n\ndef test_eval_global_variable(r: redis.Redis):\n    # Redis doesn't allow script to define global variables\n    with pytest.raises(ResponseError):\n        r.eval(\"a=10\", 0)\n\n\ndef test_eval_global_and_return_ok(r: redis.Redis):\n    # Redis doesn't allow script to define global variables\n    with pytest.raises(ResponseError):\n        r.eval(\n            \"\"\"\n            a=10\n            return redis.status_reply(\"Everything is awesome\")\n            \"\"\",\n            0,\n        )\n\n\n# Dragonfly uses lua5.4, so it natively supports doubles.\n# To use legacy rounding of doubles to integers run dragonfly with --lua_resp2_legacy_float\ndef test_eval_convert_number(r: redis.Redis):\n    # Redis forces all Lua numbers to integer\n    val = r.eval(\"return 3.2\", 0)\n    assert val == 3\n    val = r.eval(\"return 3.8\", 0)\n    assert val == 3\n    val = r.eval(\"return -3.8\", 0)\n    assert val == -3\n\n\ndef test_eval_convert_bool(r: redis.Redis):\n    # Redis converts true to 1 and false to nil (which redis-py converts to None)\n    assert r.eval(\"return false\", 0) is None\n    val = r.eval(\"return true\", 0)\n    assert val == 1\n    assert not isinstance(val, bool)\n\n\n@pytest.mark.min_server(\"7\")\n@pytest.mark.unsupported_server_types(\"dragonfly\")  # dragonfly allows this\ndef test_eval_call_bool7(r: redis.Redis):\n    # Redis doesn't allow Lua bools to be passed to [p]call\n    with pytest.raises(\n        redis.ResponseError,\n        match=r\"Lua redis lib command arguments must be strings or integers\",\n    ):\n        r.eval('return redis.call(\"SET\", KEYS[1], true)', 1, \"testkey\")\n\n\ndef test_eval_return_error(r: redis.Redis):\n    with pytest.raises(redis.ResponseError, match=\"Testing\") as exc_info:\n        r.eval('return {err=\"Testing\"}', 0)\n    assert isinstance(exc_info.value.args[0], str)\n    with pytest.raises(redis.ResponseError, match=\"Testing\") as exc_info:\n        r.eval('return redis.error_reply(\"Testing\")', 0)\n    assert isinstance(exc_info.value.args[0], str)\n\n\ndef test_eval_return_redis_error(r: redis.Redis):\n    with pytest.raises(redis.ResponseError) as exc_info:\n        r.eval('return redis.pcall(\"BADCOMMAND\")', 0)\n    assert isinstance(exc_info.value.args[0], str)\n\n\ndef test_eval_return_ok(r: redis.Redis):\n    val = r.eval('return {ok=\"Testing\"}', 0)\n    assert val == b\"Testing\"\n    val = r.eval('return redis.status_reply(\"Testing\")', 0)\n    assert val == b\"Testing\"\n\n\ndef test_eval_return_ok_nested(r: redis.Redis):\n    val = r.eval(\n        \"\"\"\n        local a = {}\n        a[1] = {ok=\"Testing\"}\n        return a\n        \"\"\",\n        0,\n    )\n    assert val == [b\"Testing\"]\n\n\ndef test_eval_return_ok_wrong_type(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        r.eval(\"return redis.status_reply(123)\", 0)\n\n\ndef test_eval_pcall(r: redis.Redis):\n    val = r.eval(\n        \"\"\"\n        local a = {}\n        a[1] = redis.pcall(\"foo\")\n        return a\n        \"\"\",\n        0,\n    )\n    assert isinstance(val, list)\n    assert len(val) == 1\n    assert isinstance(val[0], ResponseError)\n\n\ndef test_eval_pcall_return_value(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.eval('return redis.pcall(\"foo\")', 0)\n\n\ndef test_eval_delete(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    val = r.get(\"foo\")\n    assert val == b\"bar\"\n    val = r.eval('redis.call(\"DEL\", KEYS[1])', 1, \"foo\")\n    assert val is None\n\n\ndef test_eval_exists(r: redis.Redis):\n    val = r.eval('return redis.call(\"exists\", KEYS[1]) == 0', 1, \"foo\")\n    assert val == 1\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_eval_flushdb(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    val = r.eval(\n        \"\"\"\n        local value = redis.call(\"FLUSHDB\");\n        return type(value) == \"table\" and value.ok == \"OK\";\n        \"\"\",\n        0,\n    )\n    assert val == 1\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_eval_flushall(r, create_redis):\n    r1 = create_redis(db=2)\n    r2 = create_redis(db=3)\n\n    r1[\"r1\"] = \"r1\"\n    r2[\"r2\"] = \"r2\"\n\n    val = r.eval(\n        \"\"\"\n        local value = redis.call(\"FLUSHALL\");\n        return type(value) == \"table\" and value.ok == \"OK\";\n        \"\"\",\n        0,\n    )\n\n    assert val == 1\n    assert \"r1\" not in r1\n    assert \"r2\" not in r2\n\n\n# Dragonfly lua supports doubles\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_eval_incrbyfloat(r: redis.Redis):\n    r.set(\"foo\", 0.5)\n    val = r.eval(\n        \"\"\"\n        local value = redis.call(\"INCRBYFLOAT\", KEYS[1], 2.0);\n        return type(value) == \"string\" and tonumber(value) == 2.5;\n        \"\"\",\n        1,\n        \"foo\",\n    )\n    assert val == 1\n\n\ndef test_eval_lrange(r: redis.Redis):\n    r.rpush(\"foo\", \"a\", \"b\")\n    val = r.eval(\n        \"\"\"\n        local value = redis.call(\"LRANGE\", KEYS[1], 0, -1);\n        return type(value) == \"table\" and value[1] == \"a\" and value[2] == \"b\";\n        \"\"\",\n        1,\n        \"foo\",\n    )\n    assert val == 1\n\n\ndef test_eval_ltrim(r: redis.Redis):\n    r.rpush(\"foo\", \"a\", \"b\", \"c\", \"d\")\n    val = r.eval(\n        \"\"\"\n        local value = redis.call(\"LTRIM\", KEYS[1], 1, 2);\n        return type(value) == \"table\" and value.ok == \"OK\";\n        \"\"\",\n        1,\n        \"foo\",\n    )\n    assert val == 1\n    assert r.lrange(\"foo\", 0, -1) == [b\"b\", b\"c\"]\n\n\ndef test_eval_lset(r: redis.Redis):\n    r.rpush(\"foo\", \"a\", \"b\")\n    val = r.eval(\n        \"\"\"\n        local value = redis.call(\"LSET\", KEYS[1], 0, \"z\");\n        return type(value) == \"table\" and value.ok == \"OK\";\n        \"\"\",\n        1,\n        \"foo\",\n    )\n    assert val == 1\n    assert r.lrange(\"foo\", 0, -1) == [b\"z\", b\"b\"]\n\n\ndef test_eval_sdiff(r: redis.Redis):\n    r.sadd(\"foo\", \"a\", \"b\", \"c\", \"f\", \"e\", \"d\")\n    r.sadd(\"bar\", \"b\")\n    val = r.eval(\n        \"\"\"\n        local value = redis.call(\"SDIFF\", KEYS[1], KEYS[2]);\n        if type(value) ~= \"table\" then\n            return redis.error_reply(type(value) .. \", should be table\");\n        else\n            return value;\n        end\n        \"\"\",\n        2,\n        \"foo\",\n        \"bar\",\n    )\n    # Note: while fakeredis sorts the result when using Lua, this isn't\n    # actually part of the redis contract (see\n    # https://github.com/antirez/redis/issues/5538), and for Redis 5 we\n    # need to sort val to pass the test.\n    assert sorted(val) == [b\"a\", b\"c\", b\"d\", b\"e\", b\"f\"]\n\n\ndef test_script(r: redis.Redis):\n    script = r.register_script(\"return ARGV[1]\")\n    result = script(args=[42])\n    assert result == b\"42\"\n\n\ndef test_lua_log_no_message(r: redis.Redis):\n    script = \"redis.log(redis.LOG_DEBUG)\"\n    script = r.register_script(script)\n    with pytest.raises(redis.ResponseError):\n        script()\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_lua_log_wrong_level(r: redis.Redis):\n    script = \"redis.log(10, 'string')\"\n    script = r.register_script(script)\n    with pytest.raises(redis.ResponseError):\n        script()\n\n\ndef test_hscan_cursors_are_bytes(r: redis.Redis):\n    r.hset(\"hkey\", \"foo\", 1)\n\n    result = r.eval(\n        \"\"\"\n        local results = redis.call(\"HSCAN\", KEYS[1], \"0\")\n        return results[1]\n        \"\"\",\n        1,\n        \"hkey\",\n    )\n\n    assert result == b\"0\"\n    assert isinstance(result, bytes)\n\n\n@pytest.mark.xfail  # TODO\ndef test_deleting_while_scan(r: redis.Redis):\n    for i in range(100):\n        r.set(f\"key-{i}\", i)\n\n    assert len(r.keys()) == 100\n\n    script = \"\"\"\n        local cursor = 0\n        local seen = {}\n        repeat\n            local result = redis.call('SCAN', cursor)\n            for _,key in ipairs(result[2]) do\n                seen[#seen+1] = key\n                redis.call('DEL', key)\n            end\n            cursor = tonumber(result[1])\n        until cursor == 0\n        return seen\n    \"\"\"\n\n    assert len(r.register_script(script)()) == 100\n    assert len(r.keys()) == 0\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_server_commands.py",
    "content": "from datetime import datetime\nfrom time import sleep\n\nimport pytest\nimport redis\nfrom redis.exceptions import ResponseError\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_swapdb(r, create_redis):\n    r1 = create_redis(3)\n    r.set(\"foo\", \"abc\")\n    r.set(\"bar\", \"xyz\")\n    r1.set(\"foo\", \"foo\")\n    r1.set(\"baz\", \"baz\")\n    assert r.swapdb(2, 3)\n    assert r.get(\"foo\") == b\"foo\"\n    assert r.get(\"bar\") is None\n    assert r.get(\"baz\") == b\"baz\"\n    assert r1.get(\"foo\") == b\"abc\"\n    assert r1.get(\"bar\") == b\"xyz\"\n    assert r1.get(\"baz\") is None\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_swapdb_same_db(r: redis.Redis):\n    assert r.swapdb(1, 1)\n\n\ndef test_save(r: redis.Redis):\n    assert r.save()\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_bgsave(r: redis.Redis):\n    assert r.bgsave()\n    with pytest.raises(ResponseError):\n        r.execute_command(\"BGSAVE\", \"SCHEDULE\", \"FOO\")\n    with pytest.raises(ResponseError):\n        r.execute_command(\"BGSAVE\", \"FOO\")\n\n\ndef test_lastsave(r: redis.Redis):\n    assert isinstance(r.lastsave(), datetime)\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\n@pytest.mark.slow\ndef test_bgsave_timestamp_update(r: redis.Redis):\n    early_timestamp = r.lastsave()\n    sleep(1)\n    assert r.bgsave()\n    sleep(1)\n    late_timestamp = r.lastsave()\n    assert early_timestamp < late_timestamp\n\n\n@pytest.mark.slow\ndef test_save_timestamp_update(r: redis.Redis):\n    early_timestamp = r.lastsave()\n    sleep(1)\n    assert r.save()\n    late_timestamp = r.lastsave()\n    assert early_timestamp < late_timestamp\n\n\ndef test_dbsize(r: redis.Redis):\n    assert r.dbsize() == 0\n    r.set(\"foo\", \"bar\")\n    r.set(\"bar\", \"foo\")\n    assert r.dbsize() == 2\n\n\ndef test_flushdb_redispy4(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.keys() == [b\"foo\"]\n    assert r.flushdb() is True\n    assert r.keys() == []\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_set_commands.py",
    "content": "from __future__ import annotations\n\nimport os\nfrom datetime import timedelta\nfrom time import sleep\n\nimport pytest\nimport redis\nimport redis.client\nfrom redis.exceptions import ResponseError\n\n\ndef test_sadd(r: redis.Redis):\n    assert r.sadd(\"foo\", \"member1\") == 1\n    assert r.sadd(\"foo\", \"member1\") == 0\n    assert set(r.smembers(\"foo\")) == {b\"member1\"}\n    assert r.sadd(\"foo\", \"member2\", \"member3\") == 2\n    assert set(r.smembers(\"foo\")) == {b\"member1\", b\"member2\", b\"member3\"}\n    assert r.sadd(\"foo\", \"member3\", \"member4\") == 1\n    assert set(r.smembers(\"foo\")) == {b\"member1\", b\"member2\", b\"member3\", b\"member4\"}\n\n\ndef test_sadd_redispy_5(r: redis.Redis):\n    assert r.sadd(\"foo\", \"member1\") == 1\n    assert r.sadd(\"foo\", \"member1\") == 0\n    assert r.smembers(\"foo\") == {b\"member1\"}\n    assert r.sadd(\"foo\", \"member2\", \"member3\") == 2\n    assert r.smembers(\"foo\") == {b\"member1\", b\"member2\", b\"member3\"}\n    assert r.sadd(\"foo\", \"member3\", \"member4\") == 1\n    assert r.smembers(\"foo\") == {b\"member1\", b\"member2\", b\"member3\", b\"member4\"}\n\n\ndef test_sadd_as_str_type(r: redis.Redis):\n    assert r.sadd(\"foo\", *range(3)) == 3\n    assert set(r.smembers(\"foo\")) == {b\"0\", b\"1\", b\"2\"}\n\n\ndef test_sadd_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.sadd(\"foo\", \"member2\")\n\n\ndef test_scard(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member2\")\n    r.sadd(\"foo\", \"member2\")\n    assert r.scard(\"foo\") == 2\n\n\ndef test_scard_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.scard(\"foo\")\n\n\ndef test_sdiff(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member2\")\n    r.sadd(\"bar\", \"member2\")\n    r.sadd(\"bar\", \"member3\")\n    assert set(r.sdiff(\"foo\", \"bar\")) == {b\"member1\"}\n    # Original sets shouldn't be modified.\n    assert set(r.smembers(\"foo\")) == {b\"member1\", b\"member2\"}\n    assert set(r.smembers(\"bar\")) == {b\"member2\", b\"member3\"}\n\n\ndef test_sdiff_one_key(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member2\")\n    assert set(r.sdiff(\"foo\")) == {b\"member1\", b\"member2\"}\n\n\ndef test_sdiff_empty(r: redis.Redis):\n    assert set(r.sdiff(\"foo\")) == set()\n\n\ndef test_sdiff_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    r.sadd(\"bar\", \"member\")\n    with pytest.raises(redis.ResponseError):\n        r.sdiff(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.sdiff(\"bar\", \"foo\")\n\n\ndef test_sdiffstore(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member2\")\n    r.sadd(\"bar\", \"member2\")\n    r.sadd(\"bar\", \"member3\")\n    assert r.sdiffstore(\"baz\", \"foo\", \"bar\") == 1\n\n    # Catch instances where we store bytes and strings inconsistently\n    # and thus baz = {'member1', b'member1'}\n    r.sadd(\"baz\", \"member1\")\n    assert r.scard(\"baz\") == 1\n\n\ndef test_sinter(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member2\")\n    r.sadd(\"bar\", \"member2\")\n    r.sadd(\"bar\", \"member3\")\n    assert set(r.sinter(\"foo\", \"bar\")) == {b\"member2\"}\n    assert set(r.sinter(\"foo\")) == {b\"member1\", b\"member2\"}\n\n\ndef test_sinter_bytes_keys(r: redis.Redis):\n    foo = os.urandom(10)\n    bar = os.urandom(10)\n    r.sadd(foo, \"member1\")\n    r.sadd(foo, \"member2\")\n    r.sadd(bar, \"member2\")\n    r.sadd(bar, \"member3\")\n    assert set(r.sinter(foo, bar)) == {b\"member2\"}\n    assert set(r.sinter(foo)) == {b\"member1\", b\"member2\"}\n\n\ndef test_sinter_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    r.sadd(\"bar\", \"member\")\n    with pytest.raises(redis.ResponseError):\n        r.sinter(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.sinter(\"bar\", \"foo\")\n\n\ndef test_sinterstore(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member2\")\n    r.sadd(\"bar\", \"member2\")\n    r.sadd(\"bar\", \"member3\")\n    assert r.sinterstore(\"baz\", \"foo\", \"bar\") == 1\n\n    # Catch instances where we store bytes and strings inconsistently\n    # and thus baz = {'member2', b'member2'}\n    r.sadd(\"baz\", \"member2\")\n    assert r.scard(\"baz\") == 1\n\n\ndef test_sismember(r: redis.Redis):\n    assert not r.sismember(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member1\")\n    assert r.sismember(\"foo\", \"member1\")\n\n\ndef test_smismember(r: redis.Redis):\n    assert r.smismember(\"foo\", [\"member1\", \"member2\", \"member3\"]) == [0, 0, 0]\n    r.sadd(\"foo\", \"member1\", \"member2\", \"member3\")\n    assert r.smismember(\"foo\", [\"member1\", \"member2\", \"member3\"]) == [1, 1, 1]\n    assert r.smismember(\"foo\", [\"member1\", \"member2\", \"member3\", \"member4\"]) == [\n        1,\n        1,\n        1,\n        0,\n    ]\n    assert r.smismember(\"foo\", [\"member4\", \"member2\", \"member3\"]) == [0, 1, 1]\n    # should also work if provided values as arguments\n    assert r.smismember(\"foo\", \"member4\", \"member2\", \"member3\") == [0, 1, 1]\n\n\ndef test_smismember_wrong_type(r: redis.Redis):\n    # verify that command fails when the key itself is not a SET\n    r.zadd(\"foo\", {\"member\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.smismember(\"foo\", \"member\")\n\n    # verify that command fails if the input parameter is of wrong type\n    r.sadd(\"foo2\", \"member1\", \"member2\", \"member3\")\n    with pytest.raises(redis.DataError, match=\"Invalid input of type\"):\n        r.smismember(\"foo2\", [[\"member1\", \"member2\"]])\n\n\ndef test_sismember_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.sismember(\"foo\", \"member\")\n\n\ndef test_smembers(r: redis.Redis):\n    assert set(r.smembers(\"foo\")) == set()\n\n\ndef test_smembers_copy(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    ret = r.smembers(\"foo\")\n    r.sadd(\"foo\", \"member2\")\n    assert r.smembers(\"foo\") != ret\n\n\ndef test_smembers_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.smembers(\"foo\")\n\n\ndef test_smembers_runtime_error(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\", \"member2\")\n    for member in r.smembers(\"foo\"):\n        r.srem(\"foo\", member)\n\n\ndef test_smove(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member2\")\n    assert r.smove(\"foo\", \"bar\", \"member1\")\n    assert set(r.smembers(\"bar\")) == {b\"member1\"}\n\n\ndef test_smove_non_existent_key(r: redis.Redis):\n    assert not r.smove(\"foo\", \"bar\", \"member1\")\n\n\ndef test_smove_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    r.sadd(\"bar\", \"member\")\n    with pytest.raises(redis.ResponseError):\n        r.smove(\"bar\", \"foo\", \"member\")\n    # Must raise the error before removing member from bar\n    assert set(r.smembers(\"bar\")) == {b\"member\"}\n    with pytest.raises(redis.ResponseError):\n        r.smove(\"foo\", \"bar\", \"member\")\n\n\ndef test_spop(r: redis.Redis):\n    # This is tricky because it pops a random element.\n    r.sadd(\"foo\", \"member1\")\n    assert r.spop(\"foo\") == b\"member1\"\n    assert r.spop(\"foo\") is None\n\n\ndef test_spop_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.spop(\"foo\")\n\n\ndef test_srandmember(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    assert r.srandmember(\"foo\") == b\"member1\"\n    # Shouldn't be removed from the set.\n    assert r.srandmember(\"foo\") == b\"member1\"\n\n\ndef test_srandmember_number(r: redis.Redis):\n    \"\"\"srandmember works with the number argument.\"\"\"\n    assert r.srandmember(\"foo\", 2) == []\n    r.sadd(\"foo\", b\"member1\")\n    assert r.srandmember(\"foo\", 2) == [b\"member1\"]\n    r.sadd(\"foo\", b\"member2\")\n    assert set(r.srandmember(\"foo\", 2)) == {b\"member1\", b\"member2\"}\n    r.sadd(\"foo\", b\"member3\")\n    res = r.srandmember(\"foo\", 2)\n    assert len(res) == 2\n    for e in res:\n        assert e in {b\"member1\", b\"member2\", b\"member3\"}\n\n\ndef test_srandmember_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.srandmember(\"foo\")\n\n\ndef test_srem(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\", \"member2\", \"member3\", \"member4\")\n    assert set(r.smembers(\"foo\")) == {b\"member1\", b\"member2\", b\"member3\", b\"member4\"}\n    assert r.srem(\"foo\", \"member1\") == 1\n    assert set(r.smembers(\"foo\")) == {b\"member2\", b\"member3\", b\"member4\"}\n    assert r.srem(\"foo\", \"member1\") == 0\n    # Since redis>=2.7.6 returns number of deleted items.\n    assert r.srem(\"foo\", \"member2\", \"member3\") == 2\n    assert set(r.smembers(\"foo\")) == {b\"member4\"}\n    assert r.srem(\"foo\", \"member3\", \"member4\") == 1\n    assert set(r.smembers(\"foo\")) == set()\n    assert r.srem(\"foo\", \"member3\", \"member4\") == 0\n\n\ndef test_srem_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    with pytest.raises(redis.ResponseError):\n        r.srem(\"foo\", \"member\")\n\n\ndef test_sunion(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member2\")\n    r.sadd(\"bar\", \"member2\")\n    r.sadd(\"bar\", \"member3\")\n    assert set(r.sunion(\"foo\", \"bar\")) == {b\"member1\", b\"member2\", b\"member3\"}\n\n\ndef test_sunion_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    r.sadd(\"bar\", \"member\")\n    with pytest.raises(redis.ResponseError):\n        r.sunion(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.sunion(\"bar\", \"foo\")\n\n\ndef test_sunionstore(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member2\")\n    r.sadd(\"bar\", \"member2\")\n    r.sadd(\"bar\", \"member3\")\n    assert r.sunionstore(\"baz\", \"foo\", \"bar\") == 3\n    assert set(r.smembers(\"baz\")) == {b\"member1\", b\"member2\", b\"member3\"}\n\n    # Catch instances where we store bytes and strings inconsistently\n    # and thus baz = {b'member1', b'member2', b'member3', 'member3'}\n    r.sadd(\"baz\", \"member3\")\n    assert r.scard(\"baz\") == 3\n\n\ndef test_empty_set(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    r.srem(\"foo\", \"bar\")\n    assert not r.exists(\"foo\")\n\n\ndef test_sscan(r: redis.Redis):\n    # Set up the data\n    name = \"sscan-test\"\n    for ix in range(20):\n        k = \"sscan-test:%s\" % ix\n        r.sadd(name, k)\n    expected = r.smembers(name)\n    assert len(expected) == 20  # Ensure we know what we're testing\n\n    # Test that we page through the results and get everything out\n    results = []\n    cursor = \"0\"\n    while cursor != 0:\n        cursor, data = r.sscan(name, cursor, count=6)\n        results.extend(data)\n    assert set(expected) == set(results)\n\n    # Test the iterator version\n    results = [r for r in r.sscan_iter(name, count=6)]\n    assert set(expected) == set(results)\n\n    # Now test that the MATCH functionality works\n    results = []\n    cursor = \"0\"\n    while cursor != 0:\n        cursor, data = r.sscan(name, cursor, match=\"*7\", count=100)\n        results.extend(data)\n    assert b\"sscan-test:7\" in results\n    assert b\"sscan-test:17\" in results\n    assert len(results) == 2\n\n    # Test the match on iterator\n    results = [r for r in r.sscan_iter(name, match=\"*7\")]\n    assert b\"sscan-test:7\" in results\n    assert b\"sscan-test:17\" in results\n    assert len(results) == 2\n\n\n@pytest.mark.min_server(\"7\")\ndef test_sintercard(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member2\")\n    r.sadd(\"bar\", \"member2\")\n    r.sadd(\"bar\", \"member3\")\n    assert r.sintercard(2, [\"foo\", \"bar\"]) == 1\n    assert r.sintercard(1, [\"foo\"]) == 2\n\n\n@pytest.mark.min_server(\"7\")\ndef test_sintercard_key_doesnt_exist(r: redis.Redis):\n    r.sadd(\"foo\", \"member1\")\n    r.sadd(\"foo\", \"member2\")\n    r.sadd(\"bar\", \"member2\")\n    r.sadd(\"bar\", \"member3\")\n    assert r.sintercard(2, [\"foo\", \"bar\"]) == 1\n    assert r.sintercard(1, [\"foo\"]) == 2\n    assert r.sintercard(1, [\"foo\"], limit=1) == 1\n    assert r.sintercard(3, [\"foo\", \"bar\", \"ddd\"]) == 0\n\n\n@pytest.mark.min_server(\"7\")\ndef test_sintercard_bytes_keys(r: redis.Redis):\n    foo = os.urandom(10)\n    bar = os.urandom(10)\n    r.sadd(foo, \"member1\")\n    r.sadd(foo, \"member2\")\n    r.sadd(bar, \"member2\")\n    r.sadd(bar, \"member3\")\n    assert r.sintercard(2, [foo, bar]) == 1\n    assert r.sintercard(1, [foo]) == 2\n    assert r.sintercard(1, [foo], limit=1) == 1\n\n\n@pytest.mark.min_server(\"7\")\ndef test_sintercard_wrong_type(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    r.sadd(\"bar\", \"member\")\n    with pytest.raises(redis.ResponseError):\n        r.sintercard(2, [\"foo\", \"bar\"])\n    with pytest.raises(redis.ResponseError):\n        r.sintercard(2, [\"bar\", \"foo\"])\n\n\n@pytest.mark.min_server(\"7\")\ndef test_sintercard_syntax_error(r: redis.Redis):\n    r.zadd(\"foo\", {\"member\": 1})\n    r.sadd(\"bar\", \"member\")\n    with pytest.raises(redis.ResponseError):\n        r.sintercard(3, [\"foo\", \"bar\"])\n    with pytest.raises(redis.ResponseError):\n        r.sintercard(1, [\"bar\", \"foo\"])\n    with pytest.raises(redis.ResponseError):\n        r.sintercard(1, [\"bar\", \"foo\"], limit=\"x\")\n\n\ndef test_pfadd(r: redis.Redis):\n    key = \"hll-pfadd\"\n    assert r.pfadd(key, \"a\", \"b\", \"c\", \"d\", \"e\", \"f\", \"g\") == 1\n    assert r.pfcount(key) == 7\n\n\ndef test_pfcount(r: redis.Redis):\n    key1 = \"hll-pfcount01\"\n    key2 = \"hll-pfcount02\"\n    key3 = \"hll-pfcount03\"\n    assert r.pfadd(key1, \"foo\", \"bar\", \"zap\") == 1\n    assert r.pfadd(key1, \"zap\", \"zap\", \"zap\") == 0\n    assert r.pfadd(key1, \"foo\", \"bar\") == 0\n    assert r.pfcount(key1) == 3\n    assert r.pfadd(key2, \"1\", \"2\", \"3\") == 1\n    assert r.pfcount(key2) == 3\n    assert r.pfcount(key1, key2) == 6\n    assert r.pfadd(key3, \"foo\", \"bar\", \"zip\") == 1\n    assert r.pfcount(key3) == 3\n    assert r.pfcount(key1, key3) == 4\n    assert r.pfcount(key1, key2, key3) == 7\n\n\ndef test_pfmerge(r: redis.Redis):\n    key1 = \"hll-pfmerge01\"\n    key2 = \"hll-pfmerge02\"\n    key3 = \"hll-pfmerge03\"\n    assert r.pfadd(key1, \"foo\", \"bar\", \"zap\", \"a\") == 1\n    assert r.pfadd(key2, \"a\", \"b\", \"c\", \"foo\") == 1\n    assert r.pfmerge(key3, key1, key2)\n    assert r.pfcount(key3) == 6\n\n\n@pytest.mark.slow\ndef test_set_ex_should_expire_value(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    r.set(\"foo\", \"bar\", ex=1)\n    sleep(2)\n    assert r.get(\"foo\") is None\n\n\n@pytest.mark.slow\ndef test_set_px_should_expire_value(r: redis.Redis):\n    r.set(\"foo\", \"bar\", px=500)\n    sleep(1.5)\n    assert r.get(\"foo\") is None\n\n\n@pytest.mark.slow\ndef test_psetex_expire_value(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.psetex(\"foo\", 0, \"bar\")\n    r.psetex(\"foo\", 500, \"bar\")\n    sleep(1.5)\n    assert r.get(\"foo\") is None\n\n\n@pytest.mark.slow\ndef test_psetex_expire_value_using_timedelta(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.psetex(\"foo\", timedelta(seconds=0), \"bar\")\n    r.psetex(\"foo\", timedelta(seconds=0.5), \"bar\")\n    sleep(1.5)\n    assert r.get(\"foo\") is None\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_sortedset_commands.py",
    "content": "from __future__ import annotations\n\nimport math\nfrom collections import OrderedDict\nfrom typing import Tuple, List, Optional\n\nimport pytest\nimport redis\nimport redis.client\n\nfrom test import testtools\n\n\ndef round_str(x):\n    assert isinstance(x, bytes)\n    return round(float(x))\n\n\ndef test_zpopmin(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zpopmin(\"foo\", count=2) == [(b\"one\", 1.0), (b\"two\", 2.0)]\n    assert r.zpopmin(\"foo\", count=2) == [(b\"three\", 3.0)]\n\n\ndef test_zpopmin_too_many(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zpopmin(\"foo\", count=5) == [(b\"one\", 1.0), (b\"two\", 2.0), (b\"three\", 3.0)]\n\n\ndef test_zpopmax(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zpopmax(\"foo\", count=2) == [(b\"three\", 3.0), (b\"two\", 2.0)]\n    assert r.zpopmax(\"foo\", count=2) == [(b\"one\", 1.0)]\n\n\ndef test_zrange_same_score(r: redis.Redis):\n    r.zadd(\"foo\", {\"two_a\": 2})\n    r.zadd(\"foo\", {\"two_b\": 2})\n    r.zadd(\"foo\", {\"two_c\": 2})\n    r.zadd(\"foo\", {\"two_d\": 2})\n    r.zadd(\"foo\", {\"two_e\": 2})\n    assert r.zrange(\"foo\", 2, 3) == [b\"two_c\", b\"two_d\"]\n\n\ndef test_zrange_with_bylex_and_byscore(r: redis.Redis):\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(r, \"zrange\", \"foo\", \"(t\", \"+\", \"bylex\", \"byscore\")\n\n\ndef test_zrange_with_rev_and_bylex(r: redis.Redis):\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n    assert r.zrange(\"foo\", b\"+\", b\"(t\", desc=True, bylex=True) == [\n        b\"two_b\",\n        b\"two_a\",\n        b\"three_a\",\n    ]\n    assert r.zrange(\"foo\", b\"[two_b\", b\"(t\", desc=True, bylex=True) == [\n        b\"two_b\",\n        b\"two_a\",\n        b\"three_a\",\n    ]\n    assert r.zrange(\"foo\", b\"(two_b\", b\"(t\", desc=True, bylex=True) == [\n        b\"two_a\",\n        b\"three_a\",\n    ]\n    assert r.zrange(\"foo\", b\"[two_b\", b\"[three_a\", desc=True, bylex=True) == [\n        b\"two_b\",\n        b\"two_a\",\n        b\"three_a\",\n    ]\n    assert r.zrange(\"foo\", b\"[two_b\", b\"(three_a\", desc=True, bylex=True) == [\n        b\"two_b\",\n        b\"two_a\",\n    ]\n    assert r.zrange(\"foo\", b\"(two_b\", b\"-\", desc=True, bylex=True) == [\n        b\"two_a\",\n        b\"three_a\",\n        b\"one_a\",\n    ]\n    assert r.zrange(\"foo\", b\"(two_b\", b\"[two_b\", bylex=True) == []\n    # reversed max + and min - boundaries\n    # these will be always empty, but allowed by redis\n    assert r.zrange(\"foo\", b\"-\", b\"+\", desc=True, bylex=True) == []\n    assert r.zrange(\"foo\", b\"[three_a\", b\"+\", desc=True, bylex=True) == []\n    assert r.zrange(\"foo\", b\"-\", b\"[o\", desc=True, bylex=True) == []\n\n\ndef test_zrange_with_bylex(r: redis.Redis):\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n    assert r.zrange(\"foo\", b\"(t\", b\"+\", bylex=True) == [b\"three_a\", b\"two_a\", b\"two_b\"]\n    assert r.zrange(\"foo\", b\"(t\", b\"[two_b\", bylex=True) == [\n        b\"three_a\",\n        b\"two_a\",\n        b\"two_b\",\n    ]\n    assert r.zrange(\"foo\", b\"(t\", b\"(two_b\", bylex=True) == [b\"three_a\", b\"two_a\"]\n    assert r.zrange(\"foo\", b\"[three_a\", b\"[two_b\", bylex=True) == [\n        b\"three_a\",\n        b\"two_a\",\n        b\"two_b\",\n    ]\n    assert r.zrange(\"foo\", b\"(three_a\", b\"[two_b\", bylex=True) == [b\"two_a\", b\"two_b\"]\n    assert r.zrange(\"foo\", b\"-\", b\"(two_b\", bylex=True) == [\n        b\"one_a\",\n        b\"three_a\",\n        b\"two_a\",\n    ]\n    assert r.zrange(\"foo\", b\"[two_b\", b\"(two_b\", bylex=True) == []\n    # reversed max + and min - boundaries\n    # these will be always empty, but allowed by redis\n    assert r.zrange(\"foo\", b\"+\", b\"-\", bylex=True) == []\n    assert r.zrange(\"foo\", b\"+\", b\"[three_a\", bylex=True) == []\n    assert r.zrange(\"foo\", b\"[o\", b\"-\", bylex=True) == []\n\n\ndef test_zrange_with_byscore(r: redis.Redis):\n    r.zadd(\"foo\", {\"zero\": 0})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"two_a_also\": 2})\n    r.zadd(\"foo\", {\"two_b_also\": 2})\n    r.zadd(\"foo\", {\"four\": 4})\n    assert r.zrange(\"foo\", 1, 3, byscore=True) == [b\"two\", b\"two_a_also\", b\"two_b_also\"]\n    assert r.zrange(\"foo\", 2, 3, byscore=True) == [b\"two\", b\"two_a_also\", b\"two_b_also\"]\n    assert r.zrange(\"foo\", 0, 4, byscore=True) == [\n        b\"zero\",\n        b\"two\",\n        b\"two_a_also\",\n        b\"two_b_also\",\n        b\"four\",\n    ]\n    assert r.zrange(\"foo\", \"-inf\", 1, byscore=True) == [b\"zero\"]\n    assert r.zrange(\"foo\", 2, \"+inf\", byscore=True) == [\n        b\"two\",\n        b\"two_a_also\",\n        b\"two_b_also\",\n        b\"four\",\n    ]\n    assert r.zrange(\"foo\", \"-inf\", \"+inf\", byscore=True) == [\n        b\"zero\",\n        b\"two\",\n        b\"two_a_also\",\n        b\"two_b_also\",\n        b\"four\",\n    ]\n\n\ndef test_zcard(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    assert r.zcard(\"foo\") == 2\n\n\ndef test_zcard_non_existent_key(r: redis.Redis):\n    assert r.zcard(\"foo\") == 0\n\n\ndef test_zcard_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zcard(\"foo\")\n\n\ndef test_zcount(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"three\": 2})\n    r.zadd(\"foo\", {\"five\": 5})\n    assert r.zcount(\"foo\", 2, 4) == 1\n    assert r.zcount(\"foo\", 1, 4) == 2\n    assert r.zcount(\"foo\", 0, 5) == 3\n    assert r.zcount(\"foo\", 4, \"+inf\") == 1\n    assert r.zcount(\"foo\", \"-inf\", 4) == 2\n    assert r.zcount(\"foo\", \"-inf\", \"+inf\") == 3\n\n\ndef test_zcount_exclusive(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"three\": 2})\n    r.zadd(\"foo\", {\"five\": 5})\n    assert r.zcount(\"foo\", \"-inf\", \"(2\") == 1\n    assert r.zcount(\"foo\", \"-inf\", 2) == 2\n    assert r.zcount(\"foo\", \"(5\", \"+inf\") == 0\n    assert r.zcount(\"foo\", \"(1\", 5) == 2\n    assert r.zcount(\"foo\", \"(2\", \"(5\") == 0\n    assert r.zcount(\"foo\", \"(1\", \"(5\") == 1\n    assert r.zcount(\"foo\", 2, \"(5\") == 1\n\n\ndef test_zcount_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zcount(\"foo\", \"-inf\", \"+inf\")\n\n\ndef test_zincrby(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    assert r.zincrby(\"foo\", 10, \"one\") == 11\n    assert r.zrange(\"foo\", 0, -1, withscores=True) == [(b\"one\", 11)]\n\n\ndef test_zincrby_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zincrby(\"foo\", 10, \"one\")\n\n\ndef test_zrange_descending(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zrange(\"foo\", 0, -1, desc=True) == [b\"three\", b\"two\", b\"one\"]\n\n\ndef test_zrange_descending_with_scores(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zrange(\"foo\", 0, -1, desc=True, withscores=True) == [\n        (b\"three\", 3),\n        (b\"two\", 2),\n        (b\"one\", 1),\n    ]\n\n\ndef test_zrange_with_positive_indices(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zrange(\"foo\", 0, 1) == [b\"one\", b\"two\"]\n\n\ndef test_zrange_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zrange(\"foo\", 0, -1)\n\n\ndef test_zrange_score_cast(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1.2})\n    r.zadd(\"foo\", {\"two\": 2.2})\n\n    expected_without_cast_round = [(b\"one\", 1.2), (b\"two\", 2.2)]\n    expected_with_cast_round = [(b\"one\", 1.0), (b\"two\", 2.0)]\n    assert r.zrange(\"foo\", 0, 2, withscores=True) == expected_without_cast_round\n    assert (\n        r.zrange(\"foo\", 0, 2, withscores=True, score_cast_func=round_str)\n        == expected_with_cast_round\n    )\n\n\ndef test_zrank(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zrank(\"foo\", \"one\") == 0\n    assert r.zrank(\"foo\", \"two\") == 1\n    assert r.zrank(\"foo\", \"three\") == 2\n\n\ndef test_zrank_non_existent_member(r: redis.Redis):\n    assert r.zrank(\"foo\", \"one\") is None\n\n\ndef test_zrank_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zrank(\"foo\", \"one\")\n\n\ndef test_zrem(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    r.zadd(\"foo\", {\"four\": 4})\n    assert r.zrem(\"foo\", \"one\") == 1\n    assert r.zrange(\"foo\", 0, -1) == [b\"two\", b\"three\", b\"four\"]\n    # Since redis>=2.7.6 returns number of deleted items.\n    assert r.zrem(\"foo\", \"two\", \"three\") == 2\n    assert r.zrange(\"foo\", 0, -1) == [b\"four\"]\n    assert r.zrem(\"foo\", \"three\", \"four\") == 1\n    assert r.zrange(\"foo\", 0, -1) == []\n    assert r.zrem(\"foo\", \"three\", \"four\") == 0\n\n\ndef test_zrem_non_existent_member(r: redis.Redis):\n    assert not r.zrem(\"foo\", \"one\")\n\n\ndef test_zrem_numeric_member(r: redis.Redis):\n    r.zadd(\"foo\", {\"128\": 13.0, \"129\": 12.0})\n    assert r.zrem(\"foo\", 128) == 1\n    assert r.zrange(\"foo\", 0, -1) == [b\"129\"]\n\n\ndef test_zrem_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zrem(\"foo\", \"bar\")\n\n\ndef test_zscore(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 54})\n    assert r.zscore(\"foo\", \"one\") == 54\n\n\ndef test_zscore_non_existent_member(r: redis.Redis):\n    assert r.zscore(\"foo\", \"one\") is None\n\n\ndef test_zscore_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zscore(\"foo\", \"one\")\n\n\ndef test_zmscore(r: redis.Redis):\n    \"\"\"When all the requested sorted-set members are in the cache, a valid\n    float value should be returned for each requested member.\n\n    The order of the returned scores should always match the order in\n    which the set members were supplied.\n    \"\"\"\n    cache_key: str = \"scored-set-members\"\n    members: Tuple[str, ...] = (\"one\", \"two\", \"three\", \"four\", \"five\", \"six\")\n    scores: Tuple[float, ...] = (1.1, 2.2, 3.3, 4.4, 5.5, 6.6)\n\n    r.zadd(cache_key, dict(zip(members, scores)))\n    cached_scores: List[Optional[float]] = r.zmscore(\n        cache_key,\n        list(members),\n    )\n\n    assert all(cached_scores[idx] == score for idx, score in enumerate(scores))\n\n\ndef test_zmscore_missing_members(r: redis.Redis):\n    \"\"\"When none of the requested sorted-set members are in the cache, a value\n    of `None` should be returned once for each requested member.\"\"\"\n    cache_key: str = \"scored-set-members\"\n    members: Tuple[str, ...] = (\"one\", \"two\", \"three\", \"four\", \"five\", \"six\")\n\n    r.zadd(cache_key, {\"eight\": 8.8})\n    cached_scores: List[Optional[float]] = r.zmscore(\n        cache_key,\n        list(members),\n    )\n\n    assert all(score is None for score in cached_scores)\n\n\ndef test_zmscore_mixed_membership(r: redis.Redis):\n    \"\"\"When only some requested sorted-set members are in the cache, a\n    valid float value should be returned for each present member and `None` for\n    each missing member.\n\n    The order of the returned scores should always match the order in\n    which the set members were supplied.\n    \"\"\"\n    cache_key: str = \"scored-set-members\"\n    members: Tuple[str, ...] = (\"one\", \"two\", \"three\", \"four\", \"five\", \"six\")\n    scores: Tuple[float, ...] = (1.1, 2.2, 3.3, 4.4, 5.5, 6.6)\n\n    r.zadd(\n        cache_key,\n        dict((member, scores[idx]) for (idx, member) in enumerate(members) if idx % 2 != 0),\n    )\n\n    cached_scores: List[Optional[float]] = r.zmscore(cache_key, list(members))\n\n    assert all(cached_scores[idx] is None for (idx, score) in enumerate(scores) if idx % 2 == 0)\n    assert all(cached_scores[idx] == score for (idx, score) in enumerate(scores) if idx % 2 != 0)\n\n\ndef test_zrevrank(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zrevrank(\"foo\", \"one\") == 2\n    assert r.zrevrank(\"foo\", \"two\") == 1\n    assert r.zrevrank(\"foo\", \"three\") == 0\n\n\ndef test_zrevrank_non_existent_member(r: redis.Redis):\n    assert r.zrevrank(\"foo\", \"one\") is None\n\n\ndef test_zrevrank_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zrevrank(\"foo\", \"one\")\n\n\ndef test_zrevrange(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zrevrange(\"foo\", 0, 1) == [b\"three\", b\"two\"]\n    assert r.zrevrange(\"foo\", 0, -1) == [b\"three\", b\"two\", b\"one\"]\n\n\ndef test_zrevrange_sorted_keys(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"two_b\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zrevrange(\"foo\", 0, 2) == [b\"three\", b\"two_b\", b\"two\"]\n    assert r.zrevrange(\"foo\", 0, -1) == [b\"three\", b\"two_b\", b\"two\", b\"one\"]\n\n\ndef test_zrevrange_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zrevrange(\"foo\", 0, 2)\n\n\ndef test_zrevrange_score_cast(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1.2})\n    r.zadd(\"foo\", {\"two\": 2.2})\n\n    expected_without_cast_round = [(b\"two\", 2.2), (b\"one\", 1.2)]\n    expected_with_cast_round = [(b\"two\", 2.0), (b\"one\", 1.0)]\n    assert r.zrevrange(\"foo\", 0, 2, withscores=True) == expected_without_cast_round\n    assert (\n        r.zrevrange(\"foo\", 0, 2, withscores=True, score_cast_func=round_str)\n        == expected_with_cast_round\n    )\n\n\ndef test_zrange_with_large_int(r: redis.Redis):\n    with pytest.raises(redis.ResponseError, match=\"value is not an integer or out of range\"):\n        r.zrange(\"\", 0, 9223372036854775808)\n    with pytest.raises(redis.ResponseError, match=\"value is not an integer or out of range\"):\n        r.zrange(\"\", 0, -9223372036854775809)\n\n\ndef test_zrangebyscore(r: redis.Redis):\n    r.zadd(\"foo\", {\"zero\": 0})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"two_a_also\": 2})\n    r.zadd(\"foo\", {\"two_b_also\": 2})\n    r.zadd(\"foo\", {\"four\": 4})\n    assert r.zrangebyscore(\"foo\", 1, 3) == [b\"two\", b\"two_a_also\", b\"two_b_also\"]\n    assert r.zrangebyscore(\"foo\", 2, 3) == [b\"two\", b\"two_a_also\", b\"two_b_also\"]\n    assert r.zrangebyscore(\"foo\", 0, 4) == [\n        b\"zero\",\n        b\"two\",\n        b\"two_a_also\",\n        b\"two_b_also\",\n        b\"four\",\n    ]\n    assert r.zrangebyscore(\"foo\", \"-inf\", 1) == [b\"zero\"]\n    assert r.zrangebyscore(\"foo\", 2, \"+inf\") == [\n        b\"two\",\n        b\"two_a_also\",\n        b\"two_b_also\",\n        b\"four\",\n    ]\n    assert r.zrangebyscore(\"foo\", \"-inf\", \"+inf\") == [\n        b\"zero\",\n        b\"two\",\n        b\"two_a_also\",\n        b\"two_b_also\",\n        b\"four\",\n    ]\n\n\ndef test_zrangebysore_exclusive(r: redis.Redis):\n    r.zadd(\"foo\", {\"zero\": 0})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"four\": 4})\n    r.zadd(\"foo\", {\"five\": 5})\n    assert r.zrangebyscore(\"foo\", \"(0\", 6) == [b\"two\", b\"four\", b\"five\"]\n    assert r.zrangebyscore(\"foo\", \"(2\", \"(5\") == [b\"four\"]\n    assert r.zrangebyscore(\"foo\", 0, \"(4\") == [b\"zero\", b\"two\"]\n\n\ndef test_zrangebyscore_raises_error(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    with pytest.raises(redis.ResponseError):\n        r.zrangebyscore(\"foo\", \"one\", 2)\n    with pytest.raises(redis.ResponseError):\n        r.zrangebyscore(\"foo\", 2, \"three\")\n    with pytest.raises(redis.ResponseError):\n        r.zrangebyscore(\"foo\", 2, \"3)\")\n    with pytest.raises(redis.RedisError):\n        r.zrangebyscore(\"foo\", 2, \"3)\", 0, None)\n\n\ndef test_zrangebyscore_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zrangebyscore(\"foo\", \"(1\", \"(2\")\n\n\ndef test_zrangebyscore_slice(r: redis.Redis):\n    r.zadd(\"foo\", {\"two_a\": 2})\n    r.zadd(\"foo\", {\"two_b\": 2})\n    r.zadd(\"foo\", {\"two_c\": 2})\n    r.zadd(\"foo\", {\"two_d\": 2})\n    assert r.zrangebyscore(\"foo\", 0, 4, 0, 2) == [b\"two_a\", b\"two_b\"]\n    assert r.zrangebyscore(\"foo\", 0, 4, 1, 3) == [b\"two_b\", b\"two_c\", b\"two_d\"]\n\n\ndef test_zrangebyscore_withscores(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zrangebyscore(\"foo\", 1, 3, 0, 2, True) == [(b\"one\", 1), (b\"two\", 2)]\n\n\ndef test_zrangebyscore_cast_scores(r: redis.Redis):\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"two_a_also\": 2.2})\n\n    expected_without_cast_round = [(b\"two\", 2.0), (b\"two_a_also\", 2.2)]\n    expected_with_cast_round = [(b\"two\", 2.0), (b\"two_a_also\", 2.0)]\n    assert sorted(r.zrangebyscore(\"foo\", 2, 3, withscores=True)) == sorted(\n        expected_without_cast_round\n    )\n    assert sorted(\n        r.zrangebyscore(\"foo\", 2, 3, withscores=True, score_cast_func=round_str)\n    ) == sorted(expected_with_cast_round)\n\n\ndef test_zrevrangebyscore(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zrevrangebyscore(\"foo\", 3, 1) == [b\"three\", b\"two\", b\"one\"]\n    assert r.zrevrangebyscore(\"foo\", 3, 2) == [b\"three\", b\"two\"]\n    assert r.zrevrangebyscore(\"foo\", 3, 1, 0, 1) == [b\"three\"]\n    assert r.zrevrangebyscore(\"foo\", 3, 1, 1, 2) == [b\"two\", b\"one\"]\n\n\ndef test_zrevrangebyscore_exclusive(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zrevrangebyscore(\"foo\", \"(3\", 1) == [b\"two\", b\"one\"]\n    assert r.zrevrangebyscore(\"foo\", 3, \"(2\") == [b\"three\"]\n    assert r.zrevrangebyscore(\"foo\", \"(3\", \"(1\") == [b\"two\"]\n    assert r.zrevrangebyscore(\"foo\", \"(2\", 1, 0, 1) == [b\"one\"]\n    assert r.zrevrangebyscore(\"foo\", \"(2\", \"(1\", 0, 1) == []\n    assert r.zrevrangebyscore(\"foo\", \"(3\", \"(0\", 1, 2) == [b\"one\"]\n\n\ndef test_zrevrangebyscore_raises_error(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    with pytest.raises(redis.ResponseError):\n        r.zrevrangebyscore(\"foo\", \"three\", 1)\n    with pytest.raises(redis.ResponseError):\n        r.zrevrangebyscore(\"foo\", 3, \"one\")\n    with pytest.raises(redis.ResponseError):\n        r.zrevrangebyscore(\"foo\", 3, \"1)\")\n    with pytest.raises(redis.ResponseError):\n        r.zrevrangebyscore(\"foo\", \"((3\", \"1)\")\n\n\ndef test_zrevrangebyscore_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zrevrangebyscore(\"foo\", \"(3\", \"(1\")\n\n\ndef test_zrevrangebyscore_cast_scores(r: redis.Redis):\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"two_a_also\": 2.2})\n\n    assert r.zrevrangebyscore(\"foo\", 3, 2, withscores=True) == [\n        (b\"two_a_also\", 2.2),\n        (b\"two\", 2.0),\n    ]\n\n    assert r.zrevrangebyscore(\"foo\", 3, 2, withscores=True, score_cast_func=round_str) == [\n        (b\"two_a_also\", 2.0),\n        (b\"two\", 2.0),\n    ]\n\n\ndef test_zrangebylex(r: redis.Redis):\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n    assert r.zrangebylex(\"foo\", b\"(t\", b\"+\") == [b\"three_a\", b\"two_a\", b\"two_b\"]\n    assert r.zrangebylex(\"foo\", b\"(t\", b\"[two_b\") == [b\"three_a\", b\"two_a\", b\"two_b\"]\n    assert r.zrangebylex(\"foo\", b\"(t\", b\"(two_b\") == [b\"three_a\", b\"two_a\"]\n    assert r.zrangebylex(\"foo\", b\"[three_a\", b\"[two_b\") == [\n        b\"three_a\",\n        b\"two_a\",\n        b\"two_b\",\n    ]\n    assert r.zrangebylex(\"foo\", b\"(three_a\", b\"[two_b\") == [b\"two_a\", b\"two_b\"]\n    assert r.zrangebylex(\"foo\", b\"-\", b\"(two_b\") == [b\"one_a\", b\"three_a\", b\"two_a\"]\n    assert r.zrangebylex(\"foo\", b\"[two_b\", b\"(two_b\") == []\n    # reversed max + and min - boundaries\n    # these will be always empty, but allowed by redis\n    assert r.zrangebylex(\"foo\", b\"+\", b\"-\") == []\n    assert r.zrangebylex(\"foo\", b\"+\", b\"[three_a\") == []\n    assert r.zrangebylex(\"foo\", b\"[o\", b\"-\") == []\n\n\ndef test_zrangebylex_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zrangebylex(\"foo\", b\"-\", b\"+\")\n\n\ndef test_zlexcount(r: redis.Redis):\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n    assert r.zlexcount(\"foo\", b\"(t\", b\"+\") == 3\n    assert r.zlexcount(\"foo\", b\"(t\", b\"[two_b\") == 3\n    assert r.zlexcount(\"foo\", b\"(t\", b\"(two_b\") == 2\n    assert r.zlexcount(\"foo\", b\"[three_a\", b\"[two_b\") == 3\n    assert r.zlexcount(\"foo\", b\"(three_a\", b\"[two_b\") == 2\n    assert r.zlexcount(\"foo\", b\"-\", b\"(two_b\") == 3\n    assert r.zlexcount(\"foo\", b\"[two_b\", b\"(two_b\") == 0\n    # reversed max + and min - boundaries\n    # these will be always empty, but allowed by redis\n    assert r.zlexcount(\"foo\", b\"+\", b\"-\") == 0\n    assert r.zlexcount(\"foo\", b\"+\", b\"[three_a\") == 0\n    assert r.zlexcount(\"foo\", b\"[o\", b\"-\") == 0\n\n\ndef test_zlexcount_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zlexcount(\"foo\", b\"-\", b\"+\")\n\n\ndef test_zrangebylex_with_limit(r: redis.Redis):\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n    assert r.zrangebylex(\"foo\", b\"-\", b\"+\", 1, 2) == [b\"three_a\", b\"two_a\"]\n\n    # negative offset no results\n    assert r.zrangebylex(\"foo\", b\"-\", b\"+\", -1, 3) == []\n\n    # negative limit ignored\n    assert r.zrangebylex(\"foo\", b\"-\", b\"+\", 0, -2) == [\n        b\"one_a\",\n        b\"three_a\",\n        b\"two_a\",\n        b\"two_b\",\n    ]\n    assert r.zrangebylex(\"foo\", b\"-\", b\"+\", 1, -2) == [b\"three_a\", b\"two_a\", b\"two_b\"]\n    assert r.zrangebylex(\"foo\", b\"+\", b\"-\", 1, 1) == []\n\n\ndef test_zrangebylex_raises_error(r: redis.Redis):\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n\n    with pytest.raises(redis.ResponseError):\n        r.zrangebylex(\"foo\", b\"\", b\"[two_b\")\n\n    with pytest.raises(redis.ResponseError):\n        r.zrangebylex(\"foo\", b\"-\", b\"two_b\")\n\n    with pytest.raises(redis.ResponseError):\n        r.zrangebylex(\"foo\", b\"(t\", b\"two_b\")\n\n    with pytest.raises(redis.ResponseError):\n        r.zrangebylex(\"foo\", b\"t\", b\"+\")\n\n    with pytest.raises(redis.ResponseError):\n        r.zrangebylex(\"foo\", b\"[two_a\", b\"\")\n\n    with pytest.raises(redis.RedisError):\n        r.zrangebylex(\"foo\", b\"(two_a\", b\"[two_b\", 1)\n\n\ndef test_zrevrangebylex(r: redis.Redis):\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n    assert r.zrevrangebylex(\"foo\", b\"+\", b\"(t\") == [b\"two_b\", b\"two_a\", b\"three_a\"]\n    assert r.zrevrangebylex(\"foo\", b\"[two_b\", b\"(t\") == [b\"two_b\", b\"two_a\", b\"three_a\"]\n    assert r.zrevrangebylex(\"foo\", b\"(two_b\", b\"(t\") == [b\"two_a\", b\"three_a\"]\n    assert r.zrevrangebylex(\"foo\", b\"[two_b\", b\"[three_a\") == [\n        b\"two_b\",\n        b\"two_a\",\n        b\"three_a\",\n    ]\n    assert r.zrevrangebylex(\"foo\", b\"[two_b\", b\"(three_a\") == [b\"two_b\", b\"two_a\"]\n    assert r.zrevrangebylex(\"foo\", b\"(two_b\", b\"-\") == [b\"two_a\", b\"three_a\", b\"one_a\"]\n    assert r.zrangebylex(\"foo\", b\"(two_b\", b\"[two_b\") == []\n    # reversed max + and min - boundaries\n    # these will be always empty, but allowed by redis\n    assert r.zrevrangebylex(\"foo\", b\"-\", b\"+\") == []\n    assert r.zrevrangebylex(\"foo\", b\"[three_a\", b\"+\") == []\n    assert r.zrevrangebylex(\"foo\", b\"-\", b\"[o\") == []\n\n\ndef test_zrevrangebylex_with_limit(r: redis.Redis):\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n    assert r.zrevrangebylex(\"foo\", b\"+\", b\"-\", 1, 2) == [b\"two_a\", b\"three_a\"]\n\n\ndef test_zrevrangebylex_raises_error(r: redis.Redis):\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n\n    with pytest.raises(redis.ResponseError):\n        r.zrevrangebylex(\"foo\", b\"[two_b\", b\"\")\n\n    with pytest.raises(redis.ResponseError):\n        r.zrevrangebylex(\"foo\", b\"two_b\", b\"-\")\n\n    with pytest.raises(redis.ResponseError):\n        r.zrevrangebylex(\"foo\", b\"two_b\", b\"(t\")\n\n    with pytest.raises(redis.ResponseError):\n        r.zrevrangebylex(\"foo\", b\"+\", b\"t\")\n\n    with pytest.raises(redis.ResponseError):\n        r.zrevrangebylex(\"foo\", b\"\", b\"[two_a\")\n\n    with pytest.raises(redis.RedisError):\n        r.zrevrangebylex(\"foo\", b\"[two_a\", b\"(two_b\", 1)\n\n\ndef test_zrevrangebylex_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zrevrangebylex(\"foo\", b\"+\", b\"-\")\n\n\ndef test_zremrangebyrank(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zremrangebyrank(\"foo\", 0, 1) == 2\n    assert r.zrange(\"foo\", 0, -1) == [b\"three\"]\n\n\ndef test_zremrangebyrank_negative_indices(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zremrangebyrank(\"foo\", -2, -1) == 2\n    assert r.zrange(\"foo\", 0, -1) == [b\"one\"]\n\n\ndef test_zremrangebyrank_out_of_bounds(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    assert r.zremrangebyrank(\"foo\", 1, 3) == 0\n\n\ndef test_zremrangebyrank_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zremrangebyrank(\"foo\", 1, 3)\n\n\ndef test_zremrangebyscore(r: redis.Redis):\n    r.zadd(\"foo\", {\"zero\": 0})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"four\": 4})\n    # Outside of range.\n    assert r.zremrangebyscore(\"foo\", 5, 10) == 0\n    assert r.zrange(\"foo\", 0, -1) == [b\"zero\", b\"two\", b\"four\"]\n    # Middle of range.\n    assert r.zremrangebyscore(\"foo\", 1, 3) == 1\n    assert r.zrange(\"foo\", 0, -1) == [b\"zero\", b\"four\"]\n    assert r.zremrangebyscore(\"foo\", 1, 3) == 0\n    # Entire range.\n    assert r.zremrangebyscore(\"foo\", 0, 4) == 2\n    assert r.zrange(\"foo\", 0, -1) == []\n\n\ndef test_zremrangebyscore_exclusive(r: redis.Redis):\n    r.zadd(\"foo\", {\"zero\": 0})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"four\": 4})\n    assert r.zremrangebyscore(\"foo\", \"(0\", 1) == 0\n    assert r.zrange(\"foo\", 0, -1) == [b\"zero\", b\"two\", b\"four\"]\n    assert r.zremrangebyscore(\"foo\", \"-inf\", \"(0\") == 0\n    assert r.zrange(\"foo\", 0, -1) == [b\"zero\", b\"two\", b\"four\"]\n    assert r.zremrangebyscore(\"foo\", \"(2\", 5) == 1\n    assert r.zrange(\"foo\", 0, -1) == [b\"zero\", b\"two\"]\n    assert r.zremrangebyscore(\"foo\", 0, \"(2\") == 1\n    assert r.zrange(\"foo\", 0, -1) == [b\"two\"]\n    assert r.zremrangebyscore(\"foo\", \"(1\", \"(3\") == 1\n    assert r.zrange(\"foo\", 0, -1) == []\n\n\ndef test_zremrangebyscore_raises_error(r: redis.Redis):\n    r.zadd(\"foo\", {\"zero\": 0})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"four\": 4})\n    with pytest.raises(redis.ResponseError):\n        r.zremrangebyscore(\"foo\", \"three\", 1)\n    with pytest.raises(redis.ResponseError):\n        r.zremrangebyscore(\"foo\", 3, \"one\")\n    with pytest.raises(redis.ResponseError):\n        r.zremrangebyscore(\"foo\", 3, \"1)\")\n    with pytest.raises(redis.ResponseError):\n        r.zremrangebyscore(\"foo\", \"((3\", \"1)\")\n\n\ndef test_zremrangebyscore_badkey(r: redis.Redis):\n    assert r.zremrangebyscore(\"foo\", 0, 2) == 0\n\n\ndef test_zremrangebyscore_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zremrangebyscore(\"foo\", 0, 2)\n\n\ndef test_zremrangebylex(r: redis.Redis):\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n    assert r.zremrangebylex(\"foo\", b\"(three_a\", b\"[two_b\") == 2\n    assert r.zremrangebylex(\"foo\", b\"(three_a\", b\"[two_b\") == 0\n    assert r.zremrangebylex(\"foo\", b\"-\", b\"(o\") == 0\n    assert r.zremrangebylex(\"foo\", b\"-\", b\"[one_a\") == 1\n    assert r.zremrangebylex(\"foo\", b\"[tw\", b\"+\") == 0\n    assert r.zremrangebylex(\"foo\", b\"[t\", b\"+\") == 1\n    assert r.zremrangebylex(\"foo\", b\"[t\", b\"+\") == 0\n\n\ndef test_zremrangebylex_error(r: redis.Redis):\n    r.zadd(\"foo\", {\"two_a\": 0})\n    r.zadd(\"foo\", {\"two_b\": 0})\n    r.zadd(\"foo\", {\"one_a\": 0})\n    r.zadd(\"foo\", {\"three_a\": 0})\n    with pytest.raises(redis.ResponseError):\n        r.zremrangebylex(\"foo\", b\"(t\", b\"two_b\")\n\n    with pytest.raises(redis.ResponseError):\n        r.zremrangebylex(\"foo\", b\"t\", b\"+\")\n\n    with pytest.raises(redis.ResponseError):\n        r.zremrangebylex(\"foo\", b\"[two_a\", b\"\")\n\n\ndef test_zremrangebylex_badkey(r: redis.Redis):\n    assert r.zremrangebylex(\"foo\", b\"(three_a\", b\"[two_b\") == 0\n\n\ndef test_zremrangebylex_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zremrangebylex(\"foo\", b\"bar\", b\"baz\")\n\n\ndef test_zunionstore(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"bar\", {\"one\": 1})\n    r.zadd(\"bar\", {\"two\": 2})\n    r.zadd(\"bar\", {\"three\": 3})\n    r.zunionstore(\"baz\", [\"foo\", \"bar\"])\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [\n        (b\"one\", 2),\n        (b\"three\", 3),\n        (b\"two\", 4),\n    ]\n\n\ndef test_zunionstore_sum(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"bar\", {\"one\": 1})\n    r.zadd(\"bar\", {\"two\": 2})\n    r.zadd(\"bar\", {\"three\": 3})\n    r.zunionstore(\"baz\", [\"foo\", \"bar\"], aggregate=\"SUM\")\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [\n        (b\"one\", 2),\n        (b\"three\", 3),\n        (b\"two\", 4),\n    ]\n\n\ndef test_zunionstore_max(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 0})\n    r.zadd(\"foo\", {\"two\": 0})\n    r.zadd(\"bar\", {\"one\": 1})\n    r.zadd(\"bar\", {\"two\": 2})\n    r.zadd(\"bar\", {\"three\": 3})\n    r.zunionstore(\"baz\", [\"foo\", \"bar\"], aggregate=\"MAX\")\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [\n        (b\"one\", 1),\n        (b\"two\", 2),\n        (b\"three\", 3),\n    ]\n\n\ndef test_zunionstore_min(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"bar\", {\"one\": 0})\n    r.zadd(\"bar\", {\"two\": 0})\n    r.zadd(\"bar\", {\"three\": 3})\n    r.zunionstore(\"baz\", [\"foo\", \"bar\"], aggregate=\"MIN\")\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [\n        (b\"one\", 0),\n        (b\"two\", 0),\n        (b\"three\", 3),\n    ]\n\n\ndef test_zunionstore_weights(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"bar\", {\"one\": 1})\n    r.zadd(\"bar\", {\"two\": 2})\n    r.zadd(\"bar\", {\"four\": 4})\n    r.zunionstore(\"baz\", {\"foo\": 1, \"bar\": 2}, aggregate=\"SUM\")\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [\n        (b\"one\", 3),\n        (b\"two\", 6),\n        (b\"four\", 8),\n    ]\n\n\ndef test_zunionstore_nan_to_zero(r: redis.Redis):\n    r.zadd(\"foo\", {\"x\": math.inf})\n    r.zadd(\"foo2\", {\"x\": math.inf})\n    r.zunionstore(\"bar\", OrderedDict([(\"foo\", 1.0), (\"foo2\", 0.0)]))\n    # This is different to test_zinterstore_nan_to_zero because of a quirk\n    # in redis. See https://github.com/antirez/redis/issues/3954.\n    assert r.zscore(\"bar\", \"x\") == math.inf\n\n\ndef test_zunionstore_nan_to_zero2(r: redis.Redis):\n    r.zadd(\"foo\", {\"zero\": 0})\n    r.zadd(\"foo2\", {\"one\": 1})\n    r.zadd(\"foo3\", {\"one\": 1})\n    r.zunionstore(\"bar\", {\"foo\": math.inf}, aggregate=\"SUM\")\n    assert r.zrange(\"bar\", 0, -1, withscores=True) == [(b\"zero\", 0)]\n    r.zunionstore(\"bar\", OrderedDict([(\"foo2\", math.inf), (\"foo3\", -math.inf)]))\n    assert r.zrange(\"bar\", 0, -1, withscores=True) == [(b\"one\", 0)]\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_zunionstore_nan_to_zero_ordering(r: redis.Redis):\n    r.zadd(\"foo\", {\"e1\": math.inf})\n    r.zadd(\"bar\", {\"e1\": -math.inf, \"e2\": 0.0})\n    r.zunionstore(\"baz\", [\"foo\", \"bar\", \"foo\"])\n    assert r.zscore(\"baz\", \"e1\") == 0.0\n\n\ndef test_zunionstore_mixed_set_types(r: redis.Redis):\n    # No score, redis will use 1.0.\n    r.sadd(\"foo\", \"one\")\n    r.sadd(\"foo\", \"two\")\n    r.zadd(\"bar\", {\"one\": 1})\n    r.zadd(\"bar\", {\"two\": 2})\n    r.zadd(\"bar\", {\"three\": 3})\n    r.zunionstore(\"baz\", [\"foo\", \"bar\"], aggregate=\"SUM\")\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [\n        (b\"one\", 2),\n        (b\"three\", 3),\n        (b\"two\", 3),\n    ]\n\n\ndef test_zunionstore_badkey(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zunionstore(\"baz\", [\"foo\", \"bar\"], aggregate=\"SUM\")\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [(b\"one\", 1), (b\"two\", 2)]\n    r.zunionstore(\"baz\", {\"foo\": 1, \"bar\": 2}, aggregate=\"SUM\")\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [(b\"one\", 1), (b\"two\", 2)]\n\n\ndef test_zunionstore_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zunionstore(\"baz\", [\"foo\", \"bar\"])\n\n\ndef test_zinterstore(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"bar\", {\"one\": 1})\n    r.zadd(\"bar\", {\"two\": 2})\n    r.zadd(\"bar\", {\"three\": 3})\n    r.zinterstore(\"baz\", [\"foo\", \"bar\"])\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [(b\"one\", 2), (b\"two\", 4)]\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_zinterstore_mixed_set_types(r: redis.Redis):\n    r.sadd(\"foo\", \"one\")\n    r.sadd(\"foo\", \"two\")\n    r.zadd(\"bar\", {\"one\": 1})\n    r.zadd(\"bar\", {\"two\": 2})\n    r.zadd(\"bar\", {\"three\": 3})\n    r.zinterstore(\"baz\", [\"foo\", \"bar\"], aggregate=\"SUM\")\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [(b\"one\", 2), (b\"two\", 3)]\n\n\ndef test_zinterstore_max(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 0})\n    r.zadd(\"foo\", {\"two\": 0})\n    r.zadd(\"bar\", {\"one\": 1})\n    r.zadd(\"bar\", {\"two\": 2})\n    r.zadd(\"bar\", {\"three\": 3})\n    r.zinterstore(\"baz\", [\"foo\", \"bar\"], aggregate=\"MAX\")\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [(b\"one\", 1), (b\"two\", 2)]\n\n\ndef test_zinterstore_onekey(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zinterstore(\"baz\", [\"foo\"], aggregate=\"MAX\")\n    assert r.zrange(\"baz\", 0, -1, withscores=True) == [(b\"one\", 1)]\n\n\ndef test_zinterstore_nokey(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        r.zinterstore(\"baz\", [], aggregate=\"MAX\")\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")  # TODO causes a crash!\ndef test_zinterstore_nan_to_zero(r: redis.Redis):\n    r.zadd(\"foo\", {\"x\": math.inf})\n    r.zadd(\"foo2\", {\"x\": math.inf})\n    r.zinterstore(\"bar\", OrderedDict([(\"foo\", 1.0), (\"foo2\", 0.0)]))\n    assert r.zscore(\"bar\", \"x\") == 0.0\n\n\ndef test_zunionstore_nokey(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        r.zunionstore(\"baz\", [], aggregate=\"MAX\")\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")  # TODO Hang server\ndef test_zinterstore_wrong_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zinterstore(\"baz\", [\"foo\", \"bar\"])\n\n\ndef test_empty_zset(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zrem(\"foo\", \"one\")\n    assert not r.exists(\"foo\")\n\n\ndef test_zpopmax_too_many(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1})\n    r.zadd(\"foo\", {\"two\": 2})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zpopmax(\"foo\", count=5) == [\n        (b\"three\", 3.0),\n        (b\"two\", 2.0),\n        (b\"one\", 1.0),\n    ]\n\n\ndef test_bzpopmin(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1, \"two\": 2, \"three\": 3})\n    r.zadd(\"bar\", {\"a\": 1.5, \"b\": 2, \"c\": 3})\n    assert r.bzpopmin([\"foo\", \"bar\"], 0) == (b\"foo\", b\"one\", 1.0)\n    assert r.bzpopmin([\"foo\", \"bar\"], 0) == (b\"foo\", b\"two\", 2.0)\n    assert r.bzpopmin([\"foo\", \"bar\"], 0) == (b\"foo\", b\"three\", 3.0)\n    assert r.bzpopmin([\"foo\", \"bar\"], 0) == (b\"bar\", b\"a\", 1.5)\n\n\ndef test_bzpopmax(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1, \"two\": 2, \"three\": 3})\n    r.zadd(\"bar\", {\"a\": 1.5, \"b\": 2.5, \"c\": 3.5})\n    assert r.bzpopmax([\"foo\", \"bar\"], 0) == (b\"foo\", b\"three\", 3.0)\n    assert r.bzpopmax([\"foo\", \"bar\"], 0) == (b\"foo\", b\"two\", 2.0)\n    assert r.bzpopmax([\"foo\", \"bar\"], 0) == (b\"foo\", b\"one\", 1.0)\n    assert r.bzpopmax([\"foo\", \"bar\"], 0) == (b\"bar\", b\"c\", 3.5)\n\n\ndef test_zscan(r: redis.Redis):\n    # Set up the data\n    name = \"zscan-test\"\n    for ix in range(20):\n        r.zadd(name, {\"key:%s\" % ix: ix})\n    expected = dict(r.zrange(name, 0, -1, withscores=True))\n\n    # Test the basic version\n    results = {}\n    for key, val in r.zscan_iter(name, count=6):\n        results[key] = val\n    assert results == expected\n\n    # Now test that the MATCH functionality works\n    results = {}\n    cursor = \"0\"\n    while cursor != 0:\n        cursor, data = r.zscan(name, cursor, match=\"*7\", count=6)\n        results.update(data)\n    assert results == {b\"key:7\": 7.0, b\"key:17\": 17.0}\n\n\ndef test_zrandemember(r: redis.Redis):\n    r.zadd(\"a\", {\"a1\": 1, \"a2\": 2, \"a3\": 3, \"a4\": 4, \"a5\": 5})\n    assert r.zrandmember(\"a\") is not None\n    assert len(r.zrandmember(\"a\", 2)) == 2\n    # with scores\n    assert len(r.zrandmember(\"a\", 2, True)) == 4\n    # without duplications\n    assert len(r.zrandmember(\"a\", 10)) == 5\n    # with duplications\n    assert len(r.zrandmember(\"a\", -10)) == 10\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_zdiffstore(r: redis.Redis):\n    r.zadd(\"a\", {\"a1\": 1, \"a2\": 2, \"a3\": 3})\n    r.zadd(\"b\", {\"a1\": 1, \"a2\": 2})\n    assert r.zdiffstore(\"out\", [\"a\", \"b\"])\n    assert r.zrange(\"out\", 0, -1) == [b\"a3\"]\n    assert r.zrange(\"out\", 0, -1, withscores=True) == [(b\"a3\", 3.0)]\n\n\ndef test_zdiff(r: redis.Redis):\n    r.zadd(\"a\", {\"a1\": 1, \"a2\": 2, \"a3\": 3})\n    r.zadd(\"b\", {\"a1\": 1, \"a2\": 2})\n    assert r.zdiff([\"a\", \"b\"]) == [b\"a3\"]\n    assert r.zdiff([\"a\", \"b\"], withscores=True) == [b\"a3\", b\"3\"]\n\n\ndef test_zunion(r: redis.Redis):\n    r.zadd(\"a\", {\"a1\": 1, \"a2\": 1, \"a3\": 1})\n    r.zadd(\"b\", {\"a1\": 2, \"a2\": 2, \"a3\": 2})\n    r.zadd(\"c\", {\"a1\": 6, \"a3\": 5, \"a4\": 4})\n    # sum\n    assert r.zunion([\"a\", \"b\", \"c\"]) == [b\"a2\", b\"a4\", b\"a3\", b\"a1\"]\n    assert r.zunion([\"a\", \"b\", \"c\"], withscores=True) == [\n        (b\"a2\", 3),\n        (b\"a4\", 4),\n        (b\"a3\", 8),\n        (b\"a1\", 9),\n    ]\n    # max\n    assert r.zunion([\"a\", \"b\", \"c\"], aggregate=\"MAX\", withscores=True) == [\n        (b\"a2\", 2),\n        (b\"a4\", 4),\n        (b\"a3\", 5),\n        (b\"a1\", 6),\n    ]\n    # min\n    assert r.zunion([\"a\", \"b\", \"c\"], aggregate=\"MIN\", withscores=True) == [\n        (b\"a1\", 1),\n        (b\"a2\", 1),\n        (b\"a3\", 1),\n        (b\"a4\", 4),\n    ]\n    # with weight\n    assert r.zunion({\"a\": 1, \"b\": 2, \"c\": 3}, withscores=True) == [\n        (b\"a2\", 5),\n        (b\"a4\", 12),\n        (b\"a3\", 20),\n        (b\"a1\", 23),\n    ]\n\n\ndef test_zinter(r: redis.Redis):\n    r.zadd(\"a\", {\"a1\": 1, \"a2\": 2, \"a3\": 1})\n    r.zadd(\"b\", {\"a1\": 2, \"a2\": 2, \"a3\": 2})\n    r.zadd(\"c\", {\"a1\": 6, \"a3\": 5, \"a4\": 4})\n    assert r.zinter([\"a\", \"b\", \"c\"]) == [b\"a3\", b\"a1\"]\n    # invalid aggregation\n    with pytest.raises(redis.DataError):\n        r.zinter([\"a\", \"b\", \"c\"], aggregate=\"foo\", withscores=True)\n    # aggregate with SUM\n    assert r.zinter([\"a\", \"b\", \"c\"], withscores=True) == [(b\"a3\", 8), (b\"a1\", 9)]\n    # aggregate with MAX\n    assert r.zinter([\"a\", \"b\", \"c\"], aggregate=\"MAX\", withscores=True) == [\n        (b\"a3\", 5),\n        (b\"a1\", 6),\n    ]\n    # aggregate with MIN\n    assert r.zinter([\"a\", \"b\", \"c\"], aggregate=\"MIN\", withscores=True) == [\n        (b\"a1\", 1),\n        (b\"a3\", 1),\n    ]\n    # with weights\n    assert r.zinter({\"a\": 1, \"b\": 2, \"c\": 3}, withscores=True) == [\n        (b\"a3\", 20),\n        (b\"a1\", 23),\n    ]\n\n\n@pytest.mark.min_server(\"7\")\ndef test_zintercard(r: redis.Redis):\n    r.zadd(\"a\", {\"a1\": 1, \"a2\": 2, \"a3\": 1})\n    r.zadd(\"b\", {\"a1\": 2, \"a2\": 2, \"a3\": 2})\n    r.zadd(\"c\", {\"a1\": 6, \"a3\": 5, \"a4\": 4})\n    assert r.zintercard(3, [\"a\", \"b\", \"c\"]) == 2\n    assert r.zintercard(3, [\"a\", \"b\", \"c\"], limit=1) == 1\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_zrangestore(r: redis.Redis):\n    r.zadd(\"a\", {\"a1\": 1, \"a2\": 2, \"a3\": 3})\n    assert r.zrangestore(\"b\", \"a\", 0, 1)\n    assert r.zrange(\"b\", 0, -1) == [b\"a1\", b\"a2\"]\n    assert r.zrangestore(\"b\", \"a\", 1, 2)\n    assert r.zrange(\"b\", 0, -1) == [b\"a2\", b\"a3\"]\n    assert r.zrange(\"b\", 0, -1, withscores=True) == [(b\"a2\", 2), (b\"a3\", 3)]\n    # reversed order\n    assert r.zrangestore(\"b\", \"a\", 1, 2, desc=True)\n    assert r.zrange(\"b\", 0, -1) == [b\"a1\", b\"a2\"]\n    # by score\n    assert r.zrangestore(\"b\", \"a\", 2, 1, byscore=True, offset=0, num=1, desc=True)\n    assert r.zrange(\"b\", 0, -1) == [b\"a2\"]\n    # by lex\n    assert r.zrange(\"a\", \"[a2\", \"(a3\", bylex=True, offset=0, num=1) == [b\"a2\"]\n    assert r.zrangestore(\"b\", \"a\", \"[a2\", \"(a3\", bylex=True, offset=0, num=1)\n    assert r.zrange(\"b\", 0, -1) == [b\"a2\"]\n\n\n@pytest.mark.min_server(\"7\")\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_zmpop(r: redis.Redis):\n    r.zadd(\"a\", {\"a1\": 1, \"a2\": 2, \"a3\": 3})\n    res = [b\"a\", [[b\"a1\", b\"1\"], [b\"a2\", b\"2\"]]]\n    assert r.zmpop(\"2\", [\"b\", \"a\"], min=True, count=2) == res\n    with pytest.raises(redis.DataError):\n        r.zmpop(\"2\", [\"b\", \"a\"], count=2)\n    r.zadd(\"b\", {\"b1\": 10, \"ab\": 9, \"b3\": 8})\n    assert r.zmpop(\"2\", [\"b\", \"a\"], max=True) == [b\"b\", [[b\"b1\", b\"10\"]]]\n\n\n@pytest.mark.min_server(\"7\")\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_bzmpop(r: redis.Redis):\n    r.zadd(\"a\", {\"a1\": 1, \"a2\": 2, \"a3\": 3})\n    res = [b\"a\", [[b\"a1\", b\"1\"], [b\"a2\", b\"2\"]]]\n    assert r.bzmpop(1, \"2\", [\"b\", \"a\"], min=True, count=2) == res\n    with pytest.raises(redis.DataError):\n        r.bzmpop(1, \"2\", [\"b\", \"a\"], count=2)\n    r.zadd(\"b\", {\"b1\": 10, \"ab\": 9, \"b3\": 8})\n    res = [b\"b\", [[b\"b1\", b\"10\"]]]\n    assert r.bzmpop(0, \"2\", [\"b\", \"a\"], max=True) == res\n    assert r.bzmpop(1, \"2\", [\"foo\", \"bar\"], max=True) is None\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_streams_commands.py",
    "content": "import threading\nimport time\nfrom typing import List\n\nimport pytest\nimport redis\nfrom fakeredis import _msgs as msgs\n\nfrom test import testtools\n\n\ndef get_ids(results):\n    return [result[0] for result in results]\n\n\ndef add_items(r: redis.Redis, stream: str, n: int):\n    id_list = list()\n    for i in range(n):\n        id_list.append(r.xadd(stream, {\"k\": i}))\n    return id_list\n\n\ndef test_xadd_redis__green(r: redis.Redis):\n    stream = \"stream\"\n    before = int(1000 * time.time())\n    m1 = r.xadd(stream, {\"some\": \"other\"})\n    ts1, seq1 = m1.decode().split(\"-\")\n    after = int(1000 * time.time()) + 1\n    assert before <= int(ts1) <= after\n    seq1 = int(seq1)\n    m2 = r.xadd(stream, {\"add\": \"more\"}, id=f\"{ts1}-{seq1 + 1}\")\n    ts2, seq2 = m2.decode().split(\"-\")\n    assert ts1 == ts2\n    assert int(seq2) == int(seq1) + 1\n\n    stream = \"stream2\"\n    m1 = r.xadd(stream, {\"some\": \"other\"})\n    ts1, seq1 = m1.decode().split(\"-\")\n    ts1 = int(ts1) - 1\n    with pytest.raises(redis.ResponseError):\n        r.xadd(stream, {\"add\": \"more\"}, id=f\"{ts1}-*\")\n    with pytest.raises(redis.ResponseError):\n        r.xadd(stream, {\"add\": \"more\"}, id=f\"{ts1}-1\")\n\n\n@pytest.mark.min_server(\"7\")\ndef test_xadd_redis7(r: redis.Redis):  # Using ts-*\n    stream = \"stream\"\n    m1 = r.xadd(stream, {\"some\": \"other\"})\n    ts1, seq1 = m1.decode().split(\"-\")\n    m2 = r.xadd(stream, {\"add\": \"more\"}, id=f\"{ts1}-*\")\n    ts2, seq2 = m2.decode().split(\"-\")\n    ts1, seq1 = int(ts1), int(seq1)\n    ts2, seq2 = int(ts2), int(seq2)\n    assert ts2 == ts1\n    assert seq2 == seq1 + 1\n\n\ndef test_xadd_maxlen(r: redis.Redis):\n    stream = \"stream\"\n    id_list = add_items(r, stream, 10)\n    maxlen = 5\n    id_list.append(r.xadd(stream, {\"k\": \"new\"}, maxlen=maxlen, approximate=False))\n    assert r.xlen(stream) == maxlen\n    results = r.xrange(stream, id_list[0])\n    assert get_ids(results) == id_list[len(id_list) - maxlen :]\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(\n            r, \"xadd\", stream, \"maxlen\", \"3\", \"minid\", \"sometestvalue\", \"field\", \"value\"\n        )\n    assert r.set(\"non-a-stream\", 1) == 1\n    with pytest.raises(redis.ResponseError):\n        r.xlen(\"non-a-stream\")\n\n\ndef test_xadd_minid(r: redis.Redis):\n    stream = \"stream\"\n    id_list = add_items(r, stream, 10)\n    minid = id_list[6]\n    id_list.append(r.xadd(stream, {\"k\": \"new\"}, minid=minid, approximate=False))\n    assert r.xlen(stream) == len(id_list) - 6\n    results = r.xrange(stream, id_list[0])\n    assert get_ids(results) == id_list[6:]\n\n\ndef test_xtrim(r: redis.Redis):\n    stream = \"stream\"\n\n    # trimming an empty key doesn't do anything\n    assert r.xtrim(stream, 1000) == 0\n    add_items(r, stream, 4)\n\n    # trimming an amount larger than the number of messages doesn't do anything\n    assert r.xtrim(stream, 5, approximate=False) == 0\n\n    # 1 message is trimmed\n    assert r.xtrim(stream, 3, approximate=False) == 1\n\n\n@pytest.mark.min_server(\"6.2.4\")\ndef test_xtrim_minlen_and_length_args(r: redis.Redis):\n    stream = \"stream\"\n    add_items(r, stream, 4)\n\n    # Future self: No limits without approximate, according to the api\n    # with pytest.raises(redis.ResponseError):\n    #     assert r.xtrim(stream, 3, approximate=False, limit=2)\n\n    with pytest.raises(redis.DataError):\n        assert r.xtrim(stream, maxlen=3, minid=\"sometestvalue\")\n\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(\n            r, \"xtrim\", stream, \"maxlen\", \"3\", \"minid\", \"sometestvalue\"\n        )\n    # minid with a limit\n    stream = \"s2\"\n    m1 = add_items(r, stream, 4)[0]\n    assert r.xtrim(stream, minid=m1, limit=3) == 0\n\n    # pure minid\n    m4 = add_items(r, stream, 4)[-1]\n    assert r.xtrim(stream, approximate=False, minid=m4) == 7\n\n    # minid approximate\n    r.xadd(stream, {\"foo\": \"bar\"})\n    r.xadd(stream, {\"foo\": \"bar\"})\n    m3 = r.xadd(stream, {\"foo\": \"bar\"})\n    r.xadd(stream, {\"foo\": \"bar\"})\n    assert r.xtrim(stream, approximate=False, minid=m3) == 3\n\n\ndef test_xadd_nomkstream(r: redis.Redis):\n    r.xadd(\"stream2\", {\"some\": \"other\"}, nomkstream=True)\n    assert r.xlen(\"stream2\") == 0\n    # nomkstream option\n    stream = \"stream\"\n    r.xadd(stream, {\"foo\": \"bar\"})\n    r.xadd(stream, {\"some\": \"other\"}, nomkstream=False)\n    assert r.xlen(stream) == 2\n    r.xadd(stream, {\"some\": \"other\"}, nomkstream=True)\n    assert r.xlen(stream) == 3\n\n\ndef _add_to_stream(r: redis.Redis, stream_name: str, n: int):\n    res = []\n    for _ in range(n):\n        res.append(r.xadd(stream_name, {\"foo\": \"bar\"}))\n    return res\n\n\ndef test_xrevrange(r: redis.Redis):\n    stream = \"stream\"\n    m1, m2, m3, m4 = _add_to_stream(r, stream, 4)\n\n    results = r.xrevrange(stream, max=m4)\n    assert get_ids(results) == [m4, m3, m2, m1]\n\n    results = r.xrevrange(stream, max=m3, min=m2)\n    assert get_ids(results) == [m3, m2]\n\n    results = r.xrevrange(stream, min=m3)\n    assert get_ids(results) == [m4, m3]\n\n    results = r.xrevrange(stream, min=m2, count=1)\n    assert get_ids(results) == [m4]\n\n\ndef test_xrange(r: redis.Redis):\n    m = r.xadd(\"stream1\", {\"foo\": \"bar\"})\n    assert r.xrange(\"stream1\") == [\n        (m, {b\"foo\": b\"bar\"}),\n    ]\n\n    stream = \"stream2\"\n    m = testtools.raw_command(\n        r, \"xadd\", stream, \"*\", b\"field\", b\"value\", b\"foo\", b\"bar\"\n    )\n\n    assert r.xrevrange(stream) == [\n        (m, {b\"field\": b\"value\", b\"foo\": b\"bar\"}),\n    ]\n\n    stream = \"stream\"\n    m1, m2, m3, m4 = _add_to_stream(r, stream, 4)\n\n    results = r.xrange(stream, min=m1)\n    assert get_ids(results) == [m1, m2, m3, m4]\n\n    results = r.xrange(stream, min=m2, max=m3)\n    assert get_ids(results) == [m2, m3]\n\n    results = r.xrange(stream, max=m3)\n    assert get_ids(results) == [m1, m2, m3]\n\n    results = r.xrange(stream, max=m2, count=1)\n    assert get_ids(results) == [m1]\n\n\ndef get_stream_message(client, stream, message_id):\n    \"\"\"Fetch a stream message and format it as a (message_id, fields) pair\"\"\"\n    response = client.xrange(stream, min=message_id, max=message_id)\n    assert len(response) == 1\n    return response[0]\n\n\ndef test_xread_multiple_streams_blocking(r: redis.Redis):\n    stream1 = \"stream1\"\n    stream2 = \"stream2\"\n    m1 = r.xadd(stream1, {\"foo\": \"bar\"})\n    m2 = r.xadd(stream2, {\"bing\": \"baz\"})\n\n    res = r.xread(streams={stream1: 0, stream2: 0}, block=10)\n    assert len(res) == 2\n\n\ndef test_xread_blocking_no_count(r: redis.Redis):\n    k = \"key\"\n    r.xadd(k, {\"value\": 1234})\n    streams = {k: \"0\"}\n    m1 = r.xread(streams=streams, block=10)\n    assert m1[0][1][0][1] == {b\"value\": b\"1234\"}\n\n\ndef test_xread(r: redis.Redis):\n    stream = \"stream\"\n    m1 = r.xadd(stream, {\"foo\": \"bar\"})\n    m2 = r.xadd(stream, {\"bing\": \"baz\"})\n\n    expected = [\n        [\n            stream.encode(),\n            [get_stream_message(r, stream, m1), get_stream_message(r, stream, m2)],\n        ]\n    ]\n    # xread starting at 0 returns both messages\n    assert r.xread(streams={stream: 0}) == expected\n\n    expected = [[stream.encode(), [get_stream_message(r, stream, m1)]]]\n    # xread starting at 0 and count=1 returns only the first message\n    assert r.xread(streams={stream: 0}, count=1) == expected\n\n    expected = [[stream.encode(), [get_stream_message(r, stream, m2)]]]\n    # xread starting at m1 returns only the second message\n    assert r.xread(streams={stream: m1}) == expected\n\n    # xread starting at the last message returns an empty list\n    assert r.xread(streams={stream: m2}) == []\n\n\ndef test_xread_count(r: redis.Redis):\n    r.xadd(\"test\", {\"x\": 1})\n    result = r.xread(streams={\"test\": \"0\"}, count=100, block=10)\n    assert result[0][0] == b\"test\"\n    assert result[0][1][0][1] == {b\"x\": b\"1\"}\n\n\ndef test_xread_bad_commands(r: redis.Redis):\n    with pytest.raises(redis.ResponseError) as exc_info:\n        testtools.raw_command(r, \"xread\", \"foo\", \"11-1\")\n    print(exc_info)\n    with pytest.raises(redis.ResponseError) as ex2:\n        testtools.raw_command(\n            r,\n            \"xread\",\n            \"streams\",\n            \"foo\",\n        )\n    print(ex2)\n\n\ndef test_xdel(r: redis.Redis):\n    stream = \"stream\"\n\n    # deleting from an empty stream doesn't do anything\n    assert r.xdel(stream, 1) == 0\n\n    m1 = r.xadd(stream, {\"foo\": \"bar\"})\n    m2 = r.xadd(stream, {\"foo\": \"bar\"})\n    m3 = r.xadd(stream, {\"foo\": \"bar\"})\n\n    # xdel returns the number of deleted elements\n    assert r.xdel(stream, m1) == 1\n    assert r.xdel(stream, m2, m3) == 2\n\n    with pytest.raises(redis.ResponseError) as ex:\n        testtools.raw_command(r, \"XDEL\", stream)\n    assert ex.value.args[0] == msgs.WRONG_ARGS_MSG6.format(\"xdel\")[4:]\n    assert r.xdel(\"non-existing-key\", \"1-1\") == 0\n\n\ndef test_xgroup_destroy(r: redis.Redis):\n    stream = \"stream\"\n    group = \"group\"\n    r.xadd(stream, {\"foo\": \"bar\"})\n\n    assert r.xgroup_destroy(stream, group) == 0\n\n    r.xgroup_create(stream, group, 0)\n    assert r.xgroup_destroy(stream, group) == 1\n\n\n@pytest.mark.min_server(\"7\")\ndef test_xgroup_create_redis7(r: redis.Redis):\n    stream, group = \"stream\", \"group\"\n    message_id = r.xadd(stream, {\"foo\": \"bar\"})\n    r.xgroup_create(stream, group, message_id)\n    r.xadd(stream, {\"foo\": \"bar\"})\n    expected = [\n        {\n            \"name\": group.encode(),\n            \"consumers\": 0,\n            \"pending\": 0,\n            \"last-delivered-id\": message_id,\n            \"entries-read\": None,\n            \"lag\": 1,\n        }\n    ]\n    assert r.xinfo_groups(stream) == expected\n\n\n@pytest.mark.min_server(\"7\")\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_xgroup_setid_redis7(r: redis.Redis):\n    stream, group = \"stream\", \"group\"\n    message_id = r.xadd(stream, {\"foo\": \"bar\"})\n\n    r.xgroup_create(stream, group, 0)\n    # advance the last_delivered_id to the message_id\n    r.xgroup_setid(stream, group, message_id, entries_read=2)\n    expected = [\n        {\n            \"name\": group.encode(),\n            \"consumers\": 0,\n            \"pending\": 0,\n            \"last-delivered-id\": message_id,\n            \"entries-read\": 2,\n            \"lag\": -1,\n        }\n    ]\n    assert r.xinfo_groups(stream) == expected\n\n\ndef test_xgroup_delconsumer(r: redis.Redis):\n    stream, group, consumer = \"stream\", \"group\", \"consumer\"\n    r.xadd(stream, {\"foo\": \"bar\"})\n    r.xadd(stream, {\"foo\": \"bar\"})\n    r.xgroup_create(stream, group, 0)\n\n    # a consumer that hasn't yet read any messages doesn't do anything\n    assert r.xgroup_delconsumer(stream, group, consumer) == 0\n\n    # read all messages from the group\n    r.xreadgroup(group, consumer, streams={stream: \">\"})\n\n    # deleting the consumer should return 2 pending messages\n    assert r.xgroup_delconsumer(stream, group, consumer) == 2\n\n\ndef test_xgroup_createconsumer(r: redis.Redis):\n    stream, group, consumer = \"stream\", \"group\", \"consumer\"\n    r.xadd(stream, {\"foo\": \"bar\"})\n    r.xadd(stream, {\"foo\": \"bar\"})\n    r.xgroup_create(stream, group, 0)\n    assert r.xgroup_createconsumer(stream, group, consumer) == 1\n    # Adding consumer with existing consumer name does nothing\n    assert r.xgroup_createconsumer(stream, group, consumer) == 0\n\n    # read all messages from the group\n    r.xreadgroup(group, consumer, streams={stream: \">\"})\n\n    # deleting the consumer should return 2 pending messages\n    assert r.xgroup_delconsumer(stream, group, consumer) == 2\n\n\ndef test_xinfo_consumers(r: redis.Redis):\n    stream, group, consumer1, consumer2 = \"stream\", \"group\", \"consumer1\", \"consumer2\"\n    r.xadd(stream, {\"foo\": \"bar\"})\n    r.xadd(stream, {\"foo\": \"bar\"})\n    r.xadd(stream, {\"foo\": \"bar\"})\n\n    r.xgroup_create(stream, group, 0)\n    r.xreadgroup(group, consumer1, streams={stream: \">\"}, count=1)\n    r.xreadgroup(group, consumer2, streams={stream: \">\"})\n    info = r.xinfo_consumers(stream, group)\n    assert len(info) == 2\n    expected = [\n        {\"name\": consumer1.encode(), \"pending\": 1},\n        {\"name\": consumer2.encode(), \"pending\": 2},\n    ]\n\n    # we can't determine the idle/inactive time, so just make sure it's an int\n    assert isinstance(info[0].pop(\"idle\"), int)\n    assert isinstance(info[1].pop(\"idle\"), int)\n    assert isinstance(info[0].pop(\"inactive\", 0), int)\n    assert isinstance(info[1].pop(\"inactive\", 0), int)\n    assert info == expected\n\n\ndef test_xreadgroup(r: redis.Redis):\n    stream, group, consumer = \"stream\", \"group\", \"consumer1\"\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.xreadgroup(group, consumer, streams={stream: \">\"})\n    c1 = {b\"foo\": b\"bar\"}\n    c2 = {b\"bing\": b\"baz\"}\n    m1 = r.xadd(stream, c1)\n    m2 = r.xadd(stream, c2)\n    with pytest.raises(\n        redis.exceptions.ResponseError,\n        match=msgs.XREADGROUP_KEY_OR_GROUP_NOT_FOUND_MSG.format(stream, group),\n    ):\n        r.xreadgroup(group, consumer, streams={stream: \">\"})\n    r.xgroup_create(stream, group, 0)\n\n    expected = [\n        [\n            stream.encode(),\n            [get_stream_message(r, stream, m1), get_stream_message(r, stream, m2)],\n        ]\n    ]\n    # xread starting at 0 returns both messages\n    assert r.xreadgroup(group, consumer, streams={stream: \">\"}) == expected\n\n    r.xgroup_destroy(stream, group)\n    r.xgroup_create(stream, group, 0)\n\n    expected = [[stream.encode(), [get_stream_message(r, stream, m1)]]]\n    # xread with count=1 returns only the first message\n    assert r.xreadgroup(group, consumer, streams={stream: \">\"}, count=1) == expected\n\n    r.xgroup_destroy(stream, group)\n\n    # create the group using $ as the last id meaning subsequent reads\n    # will only find messages added after this\n    r.xgroup_create(stream, group, \"$\")\n\n    expected = []\n    # xread starting after the last message returns an empty message list\n    assert r.xreadgroup(group, consumer, streams={stream: \">\"}) == expected\n\n    # xreadgroup with noack does not have any items in the PEL\n    r.xgroup_destroy(stream, group)\n    r.xgroup_create(stream, group, \"0\")\n    assert (\n        len(r.xreadgroup(group, consumer, streams={stream: \">\"}, noack=True)[0][1]) == 2\n    )\n    # now there should be nothing pending\n    res = r.xreadgroup(group, consumer, streams={stream: \"0\"})\n    assert len(res[0][1]) == 0\n\n    r.xgroup_destroy(stream, group)\n    r.xgroup_create(stream, group, \"0\")\n\n    assert r.xreadgroup(group, consumer, streams={stream: \">\"}) == [\n        [stream.encode(), [(m1, c1), (m2, c2)]]\n    ]\n    # delete all the messages in the stream\n    assert r.xtrim(stream, 0) == 2\n    # TODO groups keep ids of deleted messages\n    # expected = [[stream.encode(), [(m1, {}), (m2, {})]]]\n    # assert r.xreadgroup(group, consumer, streams={stream: \"0\"}) == expected\n    r.xreadgroup(group, consumer, streams={stream: \">\"}, count=10, block=500)\n\n\ndef test_xinfo_stream(r: redis.Redis):\n    stream = \"stream\"\n    m1 = r.xadd(stream, {\"foo\": \"bar\"})\n    m2 = r.xadd(stream, {\"foo\": \"bar\"})\n    info = r.xinfo_stream(stream)\n\n    assert info[\"length\"] == 2\n    assert info[\"first-entry\"] == get_stream_message(r, stream, m1)\n    assert info[\"last-entry\"] == get_stream_message(r, stream, m2)\n\n\ndef assert_consumer_info(\n    r: redis.Redis, stream: str, group: str, equal_keys: List\n) -> List:\n    res = r.xinfo_consumers(stream, group)\n    assert len(res) == len(equal_keys)\n    for i in range(len(equal_keys)):\n        for k in res[i]:\n            if k in equal_keys[i]:\n                assert (\n                    res[i][k] == equal_keys[i][k]\n                ), f\"res[{i}][{k}] mismatch, {res}!={equal_keys}\"\n            else:\n                print(f\"res[{i}][{k}]={res[i][k]}\")\n    return res\n\n\ndef test_xack(r: redis.Redis):\n    stream, group, consumer = \"stream\", \"group\", \"consumer\"\n    # xack on a stream that doesn't exist\n    assert r.xack(stream, group, \"0-0\") == 0\n\n    m1 = r.xadd(stream, {\"one\": \"one\"})\n    m2 = r.xadd(stream, {\"two\": \"two\"})\n    m3 = r.xadd(stream, {\"three\": \"three\"})\n\n    # xack on a group that doesn't exist\n    assert r.xack(stream, group, m1) == 0\n\n    r.xgroup_create(stream, group, 0)\n    r.xreadgroup(group, consumer, streams={stream: \">\"})\n    assert_consumer_info(r, stream, group, [{\"name\": b\"consumer\", \"pending\": 3}])\n    assert r.xack(stream, group, m1) == 1\n    time.sleep(0.01)\n    res = assert_consumer_info(r, stream, group, [{\"name\": b\"consumer\", \"pending\": 2}])\n    assert \"idle\" in res[0] and res[0][\"idle\"] > 0\n    assert r.xack(stream, group, m2, m3) == 2\n    assert_consumer_info(r, stream, group, [{\"name\": b\"consumer\", \"pending\": 0}])\n\n\n@pytest.mark.min_server(\"7\")\ndef test_xinfo_stream_redis7(r: redis.Redis):\n    stream = \"stream\"\n    m1 = r.xadd(stream, {\"foo\": \"bar\"})\n    m2 = r.xadd(stream, {\"foo\": \"bar\"})\n    info = r.xinfo_stream(stream)\n\n    assert info[\"length\"] == 2\n    assert info[\"first-entry\"] == get_stream_message(r, stream, m1)\n    assert info[\"last-entry\"] == get_stream_message(r, stream, m2)\n    assert info[\"max-deleted-entry-id\"] == b\"0-0\"\n    assert info[\"entries-added\"] == 2\n    assert info[\"recorded-first-entry-id\"] == m1\n\n    r.xtrim(stream, 0)\n    # Info about empty stream\n    info = r.xinfo_stream(stream)\n\n    assert info[\"length\"] == 0\n    assert info[\"first-entry\"] is None\n    assert info[\"last-entry\"] is None\n    assert info[\"max-deleted-entry-id\"] == b\"0-0\"\n    assert info[\"entries-added\"] == 2\n    assert info[\"recorded-first-entry-id\"] == b\"0-0\"\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.xinfo_stream(\"non-existing-key\")\n\n\ndef test_xinfo_stream_full(r: redis.Redis):\n    stream, group = \"stream\", \"group\"\n    m1 = r.xadd(stream, {\"foo\": \"bar\"})\n    r.xgroup_create(stream, group, 0)\n    info = r.xinfo_stream(stream, full=True)\n\n    assert info[\"length\"] == 1\n    assert m1 in info[\"entries\"]\n    assert len(info[\"groups\"]) == 1\n\n\ndef test_xpending(r: redis.Redis):\n    stream, group, consumer1, consumer2 = \"stream\", \"group\", \"consumer1\", \"consumer2\"\n    m1 = r.xadd(stream, {\"foo\": \"bar\"})\n    m2 = r.xadd(stream, {\"foo\": \"bar\"})\n    r.xgroup_create(stream, group, 0)\n\n    # xpending on a group that has no consumers yet\n    expected = {\"pending\": 0, \"min\": None, \"max\": None, \"consumers\": []}\n    assert r.xpending(stream, group) == expected\n\n    # read 1 message from the group with each consumer\n    r.xreadgroup(group, consumer1, streams={stream: \">\"}, count=1)\n    r.xreadgroup(group, consumer2, streams={stream: \">\"}, count=1)\n\n    expected = {\n        \"pending\": 2,\n        \"min\": m1,\n        \"max\": m2,\n        \"consumers\": [\n            {\"name\": consumer1.encode(), \"pending\": 1},\n            {\"name\": consumer2.encode(), \"pending\": 1},\n        ],\n    }\n    assert r.xpending(stream, group) == expected\n\n\ndef test_xpending_range(r: redis.Redis):\n    stream, group, consumer1, consumer2 = \"stream\", \"group\", \"consumer1\", \"consumer2\"\n    m1 = r.xadd(stream, {\"foo\": \"bar\"})\n    m2 = r.xadd(stream, {\"foo\": \"bar\"})\n    r.xgroup_create(stream, group, 0)\n\n    # xpending range on a group that has no consumers yet\n    assert r.xpending_range(stream, group, min=\"-\", max=\"+\", count=5) == []\n\n    # read 1 message from the group with each consumer\n    r.xreadgroup(group, consumer1, streams={stream: \">\"}, count=1)\n    r.xreadgroup(group, consumer2, streams={stream: \">\"}, count=1)\n\n    response = r.xpending_range(stream, group, min=\"-\", max=\"+\", count=5)\n    assert len(response) == 2\n    assert response[0][\"message_id\"] == m1\n    assert response[0][\"consumer\"] == consumer1.encode()\n    assert response[1][\"message_id\"] == m2\n    assert response[1][\"consumer\"] == consumer2.encode()\n\n    # test with consumer name\n    response = r.xpending_range(\n        stream, group, min=\"-\", max=\"+\", count=5, consumername=consumer1\n    )\n    assert response[0][\"message_id\"] == m1\n    assert response[0][\"consumer\"] == consumer1.encode()\n\n\ndef test_xpending_range_idle(r: redis.Redis):\n    stream, group, consumer1, consumer2 = \"stream\", \"group\", \"consumer1\", \"consumer2\"\n    r.xadd(stream, {\"foo\": \"bar\"})\n    r.xadd(stream, {\"foo\": \"bar\"})\n    r.xgroup_create(stream, group, 0)\n\n    # read 1 message from the group with each consumer\n    r.xreadgroup(group, consumer1, streams={stream: \">\"}, count=1)\n    r.xreadgroup(group, consumer2, streams={stream: \">\"}, count=1)\n\n    response = r.xpending_range(stream, group, min=\"-\", max=\"+\", count=5)\n    assert len(response) == 2\n    response = r.xpending_range(stream, group, min=\"-\", max=\"+\", count=5, idle=1000)\n    assert len(response) == 0\n\n\ndef test_xpending_range_negative(r: redis.Redis):\n    stream, group = \"stream\", \"group\"\n    with pytest.raises(redis.DataError):\n        r.xpending_range(stream, group, min=\"-\", max=\"+\", count=None)\n    with pytest.raises(ValueError):\n        r.xpending_range(stream, group, min=\"-\", max=\"+\", count=\"one\")\n    with pytest.raises(redis.DataError):\n        r.xpending_range(stream, group, min=\"-\", max=\"+\", count=-1)\n    with pytest.raises(ValueError):\n        r.xpending_range(stream, group, min=\"-\", max=\"+\", count=5, idle=\"one\")\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.xpending_range(stream, group, min=\"-\", max=\"+\", count=5, idle=1.5)\n    with pytest.raises(redis.DataError):\n        r.xpending_range(stream, group, min=\"-\", max=\"+\", count=5, idle=-1)\n    with pytest.raises(redis.DataError):\n        r.xpending_range(stream, group, min=None, max=None, count=None, idle=0)\n    with pytest.raises(redis.DataError):\n        r.xpending_range(stream, group, min=None, max=None, count=None, consumername=0)\n\n\n@pytest.mark.min_server(\"7\")\n@testtools.run_test_if_redispy_ver(\"gte\", \"4.4\")\ndef test_xautoclaim_redis7(r: redis.Redis):\n    stream, group, consumer1, consumer2 = \"stream\", \"group\", \"consumer1\", \"consumer2\"\n\n    message_id1 = r.xadd(stream, {\"john\": \"wick\"})\n    message_id2 = r.xadd(stream, {\"johny\": \"deff\"})\n    message = get_stream_message(r, stream, message_id1)\n    r.xgroup_create(stream, group, 0)\n\n    # trying to claim a message that isn't already pending doesn't\n    # do anything\n    assert r.xautoclaim(stream, group, consumer2, min_idle_time=0) == [b\"0-0\", [], []]\n\n    # read the group as consumer1 to initially claim the messages\n    r.xreadgroup(group, consumer1, streams={stream: \">\"})\n\n    # claim one message as consumer2\n    response = r.xautoclaim(stream, group, consumer2, min_idle_time=0, count=1)\n    assert response[1] == [message]\n\n    # reclaim the messages as consumer1, but use the justid argument\n    # which only returns message ids\n    assert r.xautoclaim(\n        stream, group, consumer1, min_idle_time=0, start_id=0, justid=True\n    ) == [\n        message_id1,\n        message_id2,\n    ]\n    assert r.xautoclaim(\n        stream, group, consumer1, min_idle_time=0, start_id=message_id2, justid=True\n    ) == [message_id2]\n\n\n@pytest.mark.min_server(\"7\")\ndef test_xclaim_trimmed_redis7(r: redis.Redis):\n    # xclaim should not raise an exception if the item is not there\n    stream, group = \"stream\", \"group\"\n\n    r.xgroup_create(stream, group, id=\"$\", mkstream=True)\n\n    # add a couple of new items\n    sid1 = r.xadd(stream, {\"item\": 0})\n    sid2 = r.xadd(stream, {\"item\": 0})\n\n    # read them from consumer1\n    r.xreadgroup(group, \"consumer1\", {stream: \">\"})\n\n    # add a 3rd and trim the stream down to 2 items\n    r.xadd(stream, {\"item\": 3}, maxlen=2, approximate=False)\n\n    # xclaim them from consumer2\n    # the item that is still in the stream should be returned\n    item = r.xclaim(stream, group, \"consumer2\", 0, [sid1, sid2])\n    assert len(item) == 1\n    assert item[0][0] == sid2\n\n\ndef test_xclaim(r: redis.Redis):\n    stream, group, consumer1, consumer2 = \"stream\", \"group\", \"consumer1\", \"consumer2\"\n\n    message_id = r.xadd(stream, {\"john\": \"wick\"})\n    message = get_stream_message(r, stream, message_id)\n    r.xgroup_create(stream, group, 0)\n\n    # trying to claim a message that isn't already pending doesn't\n    # do anything\n    assert (\n        r.xclaim(stream, group, consumer2, min_idle_time=0, message_ids=(message_id,))\n        == []\n    )\n\n    # read the group as consumer1 to initially claim the messages\n    r.xreadgroup(group, consumer1, streams={stream: \">\"})\n\n    # claim the message as consumer2\n    assert r.xclaim(\n        stream, group, consumer2, min_idle_time=0, message_ids=(message_id,)\n    ) == [\n        message,\n    ]\n\n    # reclaim the message as consumer1, but use the justid argument\n    # which only returns message ids\n    assert r.xclaim(\n        stream,\n        group,\n        consumer1,\n        min_idle_time=0,\n        message_ids=(message_id,),\n        justid=True,\n    ) == [\n        message_id,\n    ]\n\n\ndef test_xread_blocking(create_redis):\n    # thread with xread block 0 should hang\n    # putting data in the stream should unblock it\n    event = threading.Event()\n    event.clear()\n\n    def thread_func():\n        while not event.is_set():\n            time.sleep(0.1)\n        r = create_redis(db=1)\n        r.xadd(\"stream\", {\"x\": \"1\"})\n        time.sleep(0.1)\n\n    t = threading.Thread(target=thread_func)\n    t.start()\n    r1 = create_redis(db=1)\n    event.set()\n    result = r1.xread({\"stream\": \"$\"}, block=0, count=1)\n    event.clear()\n    t.join()\n    assert result[0][0] == b\"stream\"\n    assert result[0][1][0][1] == {b\"x\": b\"1\"}\n\n\ndef test_stream_ttl(r: redis.Redis):\n    stream = \"stream\"\n\n    m1 = r.xadd(stream, {\"foo\": \"bar\"})\n    expected = [\n        [\n            stream.encode(),\n            [get_stream_message(r, stream, m1)],\n        ]\n    ]\n    assert r.xread(streams={stream: 0}) == expected\n    assert r.xtrim(stream, 0) == 1\n    assert r.ttl(stream) == -1\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_string_commands.py",
    "content": "from __future__ import annotations\n\nimport time\nfrom datetime import timedelta\n\nimport pytest\nimport redis\nimport redis.client\nfrom redis.exceptions import ResponseError\n\nfrom ..testtools import raw_command\n\n\ndef test_append(r: redis.Redis):\n    assert r.set(\"foo\", \"bar\")\n    assert r.append(\"foo\", \"baz\") == 6\n    assert r.get(\"foo\") == b\"barbaz\"\n\n\ndef test_append_with_no_preexisting_key(r: redis.Redis):\n    assert r.append(\"foo\", \"bar\") == 3\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_append_wrong_type(r: redis.Redis):\n    r.rpush(\"foo\", b\"x\")\n    with pytest.raises(redis.ResponseError):\n        r.append(\"foo\", b\"x\")\n\n\ndef test_decr(r: redis.Redis):\n    r.set(\"foo\", 10)\n    assert r.decr(\"foo\") == 9\n    assert r.get(\"foo\") == b\"9\"\n\n\ndef test_decr_newkey(r: redis.Redis):\n    r.decr(\"foo\")\n    assert r.get(\"foo\") == b\"-1\"\n\n\ndef test_decr_expiry(r: redis.Redis):\n    r.set(\"foo\", 10, ex=10)\n    r.decr(\"foo\", 5)\n    assert r.ttl(\"foo\") > 0\n\n\ndef test_decr_badtype(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.decr(\"foo\", 15)\n    r.rpush(\"foo2\", 1)\n    with pytest.raises(redis.ResponseError):\n        r.decr(\"foo2\", 15)\n\n\ndef test_get_does_not_exist(r: redis.Redis):\n    assert r.get(\"foo\") is None\n\n\ndef test_get_with_non_str_keys(r: redis.Redis):\n    assert r.set(\"2\", \"bar\") is True\n    assert r.get(2) == b\"bar\"\n\n\ndef test_get_invalid_type(r: redis.Redis):\n    assert r.hset(\"foo\", \"key\", \"value\") == 1\n    with pytest.raises(redis.ResponseError):\n        r.get(\"foo\")\n\n\ndef test_getset_exists(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    val = r.getset(\"foo\", b\"baz\")\n    assert val == b\"bar\"\n    val = r.getset(\"foo\", b\"baz2\")\n    assert val == b\"baz\"\n\n\ndef test_getset_wrong_type(r: redis.Redis):\n    r.rpush(\"foo\", b\"x\")\n    with pytest.raises(redis.ResponseError):\n        r.getset(\"foo\", \"bar\")\n\n\ndef test_getdel(r: redis.Redis):\n    r[\"foo\"] = \"bar\"\n    assert r.getdel(\"foo\") == b\"bar\"\n    assert r.get(\"foo\") is None\n\n\ndef test_getdel_doesnt_exist(r: redis.Redis):\n    assert r.getdel(\"foo\") is None\n\n\ndef test_incr_with_no_preexisting_key(r: redis.Redis):\n    assert r.incr(\"foo\") == 1\n    assert r.incr(\"bar\", 2) == 2\n\n\ndef test_incr_by(r: redis.Redis):\n    assert r.incrby(\"foo\") == 1\n    assert r.incrby(\"bar\", 2) == 2\n\n\ndef test_incr_preexisting_key(r: redis.Redis):\n    r.set(\"foo\", 15)\n    assert r.incr(\"foo\", 5) == 20\n    assert r.get(\"foo\") == b\"20\"\n\n\ndef test_incr_expiry(r: redis.Redis):\n    r.set(\"foo\", 15, ex=10)\n    r.incr(\"foo\", 5)\n    assert r.ttl(\"foo\") > 0\n\n\ndef test_incr_bad_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.incr(\"foo\", 15)\n    r.rpush(\"foo2\", 1)\n    with pytest.raises(redis.ResponseError):\n        r.incr(\"foo2\", 15)\n\n\ndef test_incr_with_float(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        r.incr(\"foo\", 2.0)\n\n\ndef test_incr_followed_by_mget(r: redis.Redis):\n    r.set(\"foo\", 15)\n    assert r.incr(\"foo\", 5) == 20\n    assert r.get(\"foo\") == b\"20\"\n\n\ndef test_incr_followed_by_mget_returns_strings(r: redis.Redis):\n    r.incr(\"foo\", 1)\n    assert r.mget([\"foo\"]) == [b\"1\"]\n\n\ndef test_incrbyfloat(r: redis.Redis):\n    r.set(\"foo\", 0)\n    assert r.incrbyfloat(\"foo\", 1.0) == 1.0\n    assert r.incrbyfloat(\"foo\", 1.0) == 2.0\n\n\ndef test_incrbyfloat_with_noexist(r: redis.Redis):\n    assert r.incrbyfloat(\"foo\", 1.0) == 1.0\n    assert r.incrbyfloat(\"foo\", 1.0) == 2.0\n\n\ndef test_incrbyfloat_expiry(r: redis.Redis):\n    r.set(\"foo\", 1.5, ex=10)\n    r.incrbyfloat(\"foo\", 2.5)\n    assert r.ttl(\"foo\") > 0\n\n\ndef test_incrbyfloat_bad_type(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError, match=\"not a valid float\"):\n        r.incrbyfloat(\"foo\", 1.0)\n    r.rpush(\"foo2\", 1)\n    with pytest.raises(redis.ResponseError):\n        r.incrbyfloat(\"foo2\", 1.0)\n\n\ndef test_incrbyfloat_precision(r: redis.Redis):\n    x = 1.23456789123456789\n    assert r.incrbyfloat(\"foo\", x) == x\n    assert float(r.get(\"foo\")) == x\n\n\ndef test_mget(r: redis.Redis):\n    r.set(\"foo\", \"one\")\n    r.set(\"bar\", \"two\")\n    assert r.mget([\"foo\", \"bar\"]) == [b\"one\", b\"two\"]\n    assert r.mget([\"foo\", \"bar\", \"baz\"]) == [b\"one\", b\"two\", None]\n    assert r.mget(\"foo\", \"bar\") == [b\"one\", b\"two\"]\n\n\ndef test_mget_with_no_keys(r: redis.Redis):\n    assert r.mget([]) == []\n\n\ndef test_mget_mixed_types(r: redis.Redis):\n    r.hset(\"hash\", \"bar\", \"baz\")\n    r.zadd(\"zset\", {\"bar\": 1})\n    r.sadd(\"set\", \"member\")\n    r.rpush(\"list\", \"item1\")\n    r.set(\"string\", \"value\")\n    assert r.mget([\"hash\", \"zset\", \"set\", \"string\", \"absent\"]) == [\n        None,\n        None,\n        None,\n        b\"value\",\n        None,\n    ]\n\n\ndef test_mset_with_no_keys(r: redis.Redis):\n    with pytest.raises(redis.ResponseError):\n        r.mset({})\n\n\ndef test_mset(r: redis.Redis):\n    assert r.mset({\"foo\": \"one\", \"bar\": \"two\"}) is True\n    assert r.mset({\"foo\": \"one\", \"bar\": \"two\"}) is True\n    assert r.mget(\"foo\", \"bar\") == [b\"one\", b\"two\"]\n\n\ndef test_msetnx(r: redis.Redis):\n    assert r.msetnx({\"foo\": \"one\", \"bar\": \"two\"})\n    assert not r.msetnx({\"bar\": \"two\", \"baz\": \"three\"})\n    assert r.mget(\"foo\", \"bar\", \"baz\") == [b\"one\", b\"two\", None]\n\n\ndef test_setex(r: redis.Redis):\n    assert r.setex(\"foo\", 100, \"bar\") is True\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_setex_using_timedelta(r: redis.Redis):\n    assert r.setex(\"foo\", timedelta(seconds=100), \"bar\") is True\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_setex_using_float(r: redis.Redis):\n    with pytest.raises(redis.ResponseError, match=\"integer\"):\n        r.setex(\"foo\", 1.2, \"bar\")\n\n\n@pytest.mark.min_server(\"6.2\")\ndef test_setex_overflow(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.setex(\"foo\", 18446744073709561, \"bar\")  # Overflows longlong in ms\n\n\ndef test_set_ex(r: redis.Redis):\n    assert r.set(\"foo\", \"bar\", ex=100) is True\n    assert r.get(\"foo\") == b\"bar\"\n\n\n@pytest.mark.min_server(\"6.2\")\ndef test_set_exat(r: redis.Redis):\n    curr_time = int(time.time())\n    assert r.set(\"foo\", \"bar\", exat=curr_time + 100) is True\n    assert r.get(\"foo\") == b\"bar\"\n\n\n@pytest.mark.min_server(\"6.2\")\ndef test_set_pxat(r: redis.Redis):\n    curr_time = int(time.time() * 1000)\n    assert r.set(\"foo\", \"bar\", pxat=curr_time + 100) is True\n    assert r.get(\"foo\") == b\"bar\"\n    time.sleep(0.15)\n    assert r.get(\"foo\") is None\n\n\ndef test_set_ex_using_timedelta(r: redis.Redis):\n    assert r.set(\"foo\", \"bar\", ex=timedelta(seconds=100)) is True\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_set_ex_overflow(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.set(\"foo\", \"bar\", ex=18446744073709561)  # Overflows longlong in ms\n\n\ndef test_set_px_overflow(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.set(\"foo\", \"bar\", px=2**63 - 2)  # Overflows after adding current time\n\n\ndef test_set_px(r: redis.Redis):\n    assert r.set(\"foo\", \"bar\", px=100) is True\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_set_px_using_timedelta(r: redis.Redis):\n    assert r.set(\"foo\", \"bar\", px=timedelta(milliseconds=100)) is True\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_set_conflicting_expire_options(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.set(\"foo\", \"bar\", ex=1, px=1)\n\n\ndef test_set_raises_wrong_ex(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.set(\"foo\", \"bar\", ex=-100)\n    with pytest.raises(ResponseError):\n        r.set(\"foo\", \"bar\", ex=0)\n    assert not r.exists(\"foo\")\n\n\ndef test_set_using_timedelta_raises_wrong_ex(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.set(\"foo\", \"bar\", ex=timedelta(seconds=-100))\n    with pytest.raises(ResponseError):\n        r.set(\"foo\", \"bar\", ex=timedelta(seconds=0))\n    assert not r.exists(\"foo\")\n\n\ndef test_set_raises_wrong_px(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.set(\"foo\", \"bar\", px=-100)\n    with pytest.raises(ResponseError):\n        r.set(\"foo\", \"bar\", px=0)\n    assert not r.exists(\"foo\")\n\n\ndef test_set_using_timedelta_raises_wrong_px(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.set(\"foo\", \"bar\", px=timedelta(milliseconds=-100))\n    with pytest.raises(ResponseError):\n        r.set(\"foo\", \"bar\", px=timedelta(milliseconds=0))\n    assert not r.exists(\"foo\")\n\n\ndef test_setex_raises_wrong_ex(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.setex(\"foo\", -100, \"bar\")\n    with pytest.raises(ResponseError):\n        r.setex(\"foo\", 0, \"bar\")\n    assert not r.exists(\"foo\")\n\n\ndef test_setex_using_timedelta_raises_wrong_ex(r: redis.Redis):\n    with pytest.raises(ResponseError):\n        r.setex(\"foo\", timedelta(seconds=-100), \"bar\")\n    with pytest.raises(ResponseError):\n        r.setex(\"foo\", timedelta(seconds=-100), \"bar\")\n    assert not r.exists(\"foo\")\n\n\ndef test_setnx(r: redis.Redis):\n    assert r.setnx(\"foo\", \"bar\")\n    assert r.get(\"foo\") == b\"bar\"\n    assert not r.setnx(\"foo\", \"baz\")\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_set_nx(r: redis.Redis):\n    assert r.set(\"foo\", \"bar\", nx=True) is True\n    assert r.get(\"foo\") == b\"bar\"\n    assert r.set(\"foo\", \"bar\", nx=True) is None\n    assert r.get(\"foo\") == b\"bar\"\n\n\ndef test_set_xx(r: redis.Redis):\n    assert r.set(\"foo\", \"bar\", xx=True) is None\n    r.set(\"foo\", \"bar\")\n    assert r.set(\"foo\", \"bar\", xx=True) is True\n\n\n@pytest.mark.min_server(\"6.2\")\ndef test_set_get(r: redis.Redis):\n    assert raw_command(r, \"set\", \"foo\", \"bar\", \"GET\") is None\n    assert r.get(\"foo\") == b\"bar\"\n    assert raw_command(r, \"set\", \"foo\", \"baz\", \"GET\") == b\"bar\"\n    assert r.get(\"foo\") == b\"baz\"\n\n\n@pytest.mark.min_server(\"6.2\")\ndef test_set_get_xx(r: redis.Redis):\n    assert raw_command(r, \"set\", \"foo\", \"bar\", \"XX\", \"GET\") is None\n    assert r.get(\"foo\") is None\n    r.set(\"foo\", \"bar\")\n    assert raw_command(r, \"set\", \"foo\", \"baz\", \"XX\", \"GET\") == b\"bar\"\n    assert r.get(\"foo\") == b\"baz\"\n    assert raw_command(r, \"set\", \"foo\", \"baz\", \"GET\") == b\"baz\"\n\n\n@pytest.mark.min_server(\"7\")\ndef test_set_get_nx_redis7(r: redis.Redis):\n    # Note: this will most likely fail on a 7.0 server, based on the docs for SET\n    assert raw_command(r, \"set\", \"foo\", \"bar\", \"NX\", \"GET\") is None\n\n\n@pytest.mark.min_server(\"6.2\")\ndef set_get_wrongtype(r: redis.Redis):\n    r.lpush(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"set\", \"foo\", \"bar\", \"GET\")\n\n\ndef test_substr(r: redis.Redis):\n    r[\"foo\"] = \"one_two_three\"\n    assert r.substr(\"foo\", 0) == b\"one_two_three\"\n    assert r.substr(\"foo\", 0, 2) == b\"one\"\n    assert r.substr(\"foo\", 4, 6) == b\"two\"\n    assert r.substr(\"foo\", -5) == b\"three\"\n    assert r.substr(\"foo\", -4, -5) == b\"\"\n    assert r.substr(\"foo\", -5, -3) == b\"thr\"\n\n\ndef test_substr_noexist_key(r: redis.Redis):\n    assert r.substr(\"foo\", 0) == b\"\"\n    assert r.substr(\"foo\", 10) == b\"\"\n    assert r.substr(\"foo\", -5, -1) == b\"\"\n\n\ndef test_substr_wrong_type(r: redis.Redis):\n    r.rpush(\"foo\", b\"x\")\n    with pytest.raises(redis.ResponseError):\n        r.substr(\"foo\", 0)\n\n\ndef test_strlen(r: redis.Redis):\n    r[\"foo\"] = \"bar\"\n\n    assert r.strlen(\"foo\") == 3\n    assert r.strlen(\"noexists\") == 0\n\n\ndef test_strlen_wrong_type(r: redis.Redis):\n    r.rpush(\"foo\", b\"x\")\n    with pytest.raises(redis.ResponseError):\n        r.strlen(\"foo\")\n\n\ndef test_setrange(r: redis.Redis):\n    r.set(\"foo\", \"test\")\n    assert r.setrange(\"foo\", 1, \"aste\") == 5\n    assert r.get(\"foo\") == b\"taste\"\n\n    r.set(\"foo\", \"test\")\n    assert r.setrange(\"foo\", 1, \"a\") == 4\n    assert r.get(\"foo\") == b\"tast\"\n\n    assert r.setrange(\"bar\", 2, \"test\") == 6\n    assert r.get(\"bar\") == b\"\\x00\\x00test\"\n\n\ndef test_setrange_expiry(r: redis.Redis):\n    r.set(\"foo\", \"test\", ex=10)\n    r.setrange(\"foo\", 1, \"aste\")\n    assert r.ttl(\"foo\") > 0\n\n\ndef test_large_command(r: redis.Redis):\n    r.set(\"foo\", \"bar\" * 10000)\n    assert r.get(\"foo\") == b\"bar\" * 10000\n\n\ndef test_saving_non_ascii_chars_as_value(r: redis.Redis):\n    assert r.set(\"foo\", \"Ñandu\") is True\n    assert r.get(\"foo\") == \"Ñandu\".encode()\n\n\ndef test_saving_unicode_type_as_value(r: redis.Redis):\n    assert r.set(\"foo\", \"Ñandu\") is True\n    assert r.get(\"foo\") == \"Ñandu\".encode()\n\n\ndef test_saving_non_ascii_chars_as_key(r: redis.Redis):\n    assert r.set(\"Ñandu\", \"foo\") is True\n    assert r.get(\"Ñandu\") == b\"foo\"\n\n\ndef test_saving_unicode_type_as_key(r: redis.Redis):\n    assert r.set(\"Ñandu\", \"foo\") is True\n    assert r.get(\"Ñandu\") == b\"foo\"\n\n\ndef test_future_newbytes(r: redis.Redis):\n    # bytes = pytest.importorskip('builtins', reason='future.types not available').bytes\n    r.set(bytes(b\"\\xc3\\x91andu\"), \"foo\")\n    assert r.get(\"Ñandu\") == b\"foo\"\n\n\ndef test_future_newstr(r: redis.Redis):\n    # str = pytest.importorskip('builtins', reason='future.types not available').str\n    r.set(str(\"Ñandu\"), \"foo\")\n    assert r.get(\"Ñandu\") == b\"foo\"\n\n\ndef test_setitem_getitem(r: redis.Redis):\n    assert r.keys() == []\n    r[\"foo\"] = \"bar\"\n    assert r[\"foo\"] == b\"bar\"\n\n\ndef test_getitem_non_existent_key(r: redis.Redis):\n    assert r.keys() == []\n    assert \"noexists\" not in r.keys()\n\n\n@pytest.mark.slow\ndef test_getex(r: redis.Redis):\n    # Exceptions\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"getex\", \"foo\", \"px\", 1000, \"ex\", 1)\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"getex\", \"foo\", \"dsac\", 1000, \"ex\", 1)\n\n    r.set(\"foo\", \"val\")\n    assert r.getex(\"foo\", ex=1) == b\"val\"\n    time.sleep(1.5)\n    assert r.get(\"foo\") is None\n\n    r.set(\"foo2\", \"val\")\n    assert r.getex(\"foo2\", px=1000) == b\"val\"\n    time.sleep(1.5)\n    assert r.get(\"foo2\") is None\n\n    r.set(\"foo4\", \"val\")\n    r.getex(\"foo4\", exat=int(time.time() + 1))\n    time.sleep(1.5)\n    assert r.get(\"foo4\") is None\n\n    r.set(\"foo2\", \"val\")\n    r.getex(\"foo2\", pxat=int(time.time() + 1) * 1000)\n    time.sleep(1.5)\n    assert r.get(\"foo2\") is None\n\n    r.setex(\"foo5\", 1, \"val\")\n    r.getex(\"foo5\", persist=True)\n    assert r.ttl(\"foo5\") == -1\n    time.sleep(1.5)\n    assert r.get(\"foo5\") == b\"val\"\n\n\n@pytest.mark.min_server(\"7\")\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_lcs(r: redis.Redis):\n    r.mset({\"key1\": \"ohmytext\", \"key2\": \"mynewtext\"})\n    assert r.lcs(\"key1\", \"key2\") == b\"mytext\"\n    assert r.lcs(\"key1\", \"key2\", len=True) == 6\n\n    assert r.lcs(\"key1\", \"key2\", idx=True, minmatchlen=3, withmatchlen=True) == [\n        b\"matches\",\n        [[[4, 7], [5, 8], 4]],\n        b\"len\",\n        6,\n    ]\n    assert r.lcs(\"key1\", \"key2\", idx=True, minmatchlen=3) == [\n        b\"matches\",\n        [[[4, 7], [5, 8]]],\n        b\"len\",\n        6,\n    ]\n\n    with pytest.raises(redis.ResponseError):\n        assert r.lcs(\"key1\", \"key2\", len=True, idx=True)\n    with pytest.raises(redis.ResponseError):\n        raw_command(r, \"lcs\", \"key1\", \"key2\", \"not_supported_arg\")\n"
  },
  {
    "path": "tests/fakeredis/test/test_mixins/test_zadd.py",
    "content": "import pytest\nimport redis\nimport redis.client\nfrom packaging.version import Version\n\nfrom test.testtools import raw_command\n\nREDIS_VERSION = Version(redis.__version__)\n\n\ndef test_zadd(r: redis.Redis):\n    r.zadd(\"foo\", {\"four\": 4})\n    r.zadd(\"foo\", {\"three\": 3})\n    assert r.zadd(\"foo\", {\"two\": 2, \"one\": 1, \"zero\": 0}) == 3\n    assert r.zrange(\"foo\", 0, -1) == [b\"zero\", b\"one\", b\"two\", b\"three\", b\"four\"]\n    assert r.zadd(\"foo\", {\"zero\": 7, \"one\": 1, \"five\": 5}) == 1\n    assert r.zrange(\"foo\", 0, -1) == [\n        b\"one\",\n        b\"two\",\n        b\"three\",\n        b\"four\",\n        b\"five\",\n        b\"zero\",\n    ]\n\n\ndef test_zadd_empty(r: redis.Redis):\n    # Have to add at least one key/value pair\n    with pytest.raises(redis.RedisError):\n        r.zadd(\"foo\", {})\n\n\n@pytest.mark.min_server(\"7\")\ndef test_zadd_minus_zero_redis7(r: redis.Redis):\n    r.zadd(\"foo\", {\"a\": -0.0})\n    r.zadd(\"foo\", {\"a\": 0.0})\n    assert raw_command(r, \"zscore\", \"foo\", \"a\") == b\"0\"\n\n\ndef test_zadd_wrong_type(r: redis.Redis):\n    r.sadd(\"foo\", \"bar\")\n    with pytest.raises(redis.ResponseError):\n        r.zadd(\"foo\", {\"two\": 2})\n\n\ndef test_zadd_multiple(r: redis.Redis):\n    r.zadd(\"foo\", {\"one\": 1, \"two\": 2})\n    assert r.zrange(\"foo\", 0, 0) == [b\"one\"]\n    assert r.zrange(\"foo\", 1, 1) == [b\"two\"]\n\n\n@pytest.mark.parametrize(\n    \"param,return_value,state\",\n    [\n        ({\"four\": 2.0, \"three\": 1.0}, 0, [(b\"three\", 3.0), (b\"four\", 4.0)]),\n        (\n            {\"four\": 2.0, \"three\": 1.0, \"zero\": 0.0},\n            1,\n            [(b\"zero\", 0.0), (b\"three\", 3.0), (b\"four\", 4.0)],\n        ),\n        (\n            {\"two\": 2.0, \"one\": 1.0},\n            2,\n            [(b\"one\", 1.0), (b\"two\", 2.0), (b\"three\", 3.0), (b\"four\", 4.0)],\n        ),\n    ],\n)\n@pytest.mark.parametrize(\"ch\", [False, True])\ndef test_zadd_with_nx(r, param, return_value, state, ch):\n    r.zadd(\"foo\", {\"four\": 4.0, \"three\": 3.0})\n    assert r.zadd(\"foo\", param, nx=True, ch=ch) == return_value\n    assert r.zrange(\"foo\", 0, -1, withscores=True) == state\n\n\n@pytest.mark.parametrize(\n    \"param,return_value,state\",\n    [\n        ({\"four\": 2.0, \"three\": 1.0}, 0, [(b\"three\", 3.0), (b\"four\", 4.0)]),\n        (\n            {\"four\": 5.0, \"three\": 1.0, \"zero\": 0.0},\n            2,\n            [\n                (b\"zero\", 0.0),\n                (b\"three\", 3.0),\n                (b\"four\", 5.0),\n            ],\n        ),\n        (\n            {\"two\": 2.0, \"one\": 1.0},\n            2,\n            [(b\"one\", 1.0), (b\"two\", 2.0), (b\"three\", 3.0), (b\"four\", 4.0)],\n        ),\n    ],\n)\ndef test_zadd_with_gt_and_ch(r, param, return_value, state):\n    r.zadd(\"foo\", {\"four\": 4.0, \"three\": 3.0})\n    assert r.zadd(\"foo\", param, gt=True, ch=True) == return_value\n    assert r.zrange(\"foo\", 0, -1, withscores=True) == state\n\n\n@pytest.mark.parametrize(\n    \"param,return_value,state\",\n    [\n        ({\"four\": 2.0, \"three\": 1.0}, 0, [(b\"three\", 3.0), (b\"four\", 4.0)]),\n        (\n            {\"four\": 5.0, \"three\": 1.0, \"zero\": 0.0},\n            1,\n            [(b\"zero\", 0.0), (b\"three\", 3.0), (b\"four\", 5.0)],\n        ),\n        (\n            {\"two\": 2.0, \"one\": 1.0},\n            2,\n            [(b\"one\", 1.0), (b\"two\", 2.0), (b\"three\", 3.0), (b\"four\", 4.0)],\n        ),\n    ],\n)\ndef test_zadd_with_gt(r, param, return_value, state):\n    r.zadd(\"foo\", {\"four\": 4.0, \"three\": 3.0})\n    assert r.zadd(\"foo\", param, gt=True) == return_value\n    assert r.zrange(\"foo\", 0, -1, withscores=True) == state\n\n\n@pytest.mark.parametrize(\n    \"param,return_value,state\",\n    [\n        ({\"four\": 4.0, \"three\": 1.0}, 1, [(b\"three\", 1.0), (b\"four\", 4.0)]),\n        (\n            {\"four\": 4.0, \"three\": 1.0, \"zero\": 0.0},\n            2,\n            [(b\"zero\", 0.0), (b\"three\", 1.0), (b\"four\", 4.0)],\n        ),\n        (\n            {\"two\": 2.0, \"one\": 1.0},\n            2,\n            [(b\"one\", 1.0), (b\"two\", 2.0), (b\"three\", 3.0), (b\"four\", 4.0)],\n        ),\n    ],\n)\ndef test_zadd_with_ch(r, param, return_value, state):\n    r.zadd(\"foo\", {\"four\": 4.0, \"three\": 3.0})\n    assert r.zadd(\"foo\", param, ch=True) == return_value\n    assert r.zrange(\"foo\", 0, -1, withscores=True) == state\n\n\n@pytest.mark.parametrize(\n    \"param,changed,state\",\n    [\n        ({\"four\": 2.0, \"three\": 1.0}, 2, [(b\"three\", 1.0), (b\"four\", 2.0)]),\n        (\n            {\"four\": 4.0, \"three\": 3.0, \"zero\": 0.0},\n            0,\n            [(b\"three\", 3.0), (b\"four\", 4.0)],\n        ),\n        ({\"two\": 2.0, \"one\": 1.0}, 0, [(b\"three\", 3.0), (b\"four\", 4.0)]),\n    ],\n)\n@pytest.mark.parametrize(\"ch\", [False, True])\ndef test_zadd_with_xx(r, param, changed, state, ch):\n    r.zadd(\"foo\", {\"four\": 4.0, \"three\": 3.0})\n    assert r.zadd(\"foo\", param, xx=True, ch=ch) == (changed if ch else 0)\n    assert r.zrange(\"foo\", 0, -1, withscores=True) == state\n\n\n@pytest.mark.parametrize(\"ch\", [False, True])\ndef test_zadd_with_nx_and_xx(r, ch):\n    r.zadd(\"foo\", {\"four\": 4.0, \"three\": 3.0})\n    with pytest.raises(redis.DataError):\n        r.zadd(\"foo\", {\"four\": -4.0, \"three\": -3.0}, nx=True, xx=True, ch=ch)\n\n\n@pytest.mark.parametrize(\"ch\", [False, True])\ndef test_zadd_incr(r, ch):\n    r.zadd(\"foo\", {\"four\": 4.0, \"three\": 3.0})\n    assert r.zadd(\"foo\", {\"four\": 1.0}, incr=True, ch=ch) == 5.0\n    assert r.zadd(\"foo\", {\"three\": 1.0}, incr=True, nx=True, ch=ch) is None\n    assert r.zscore(\"foo\", \"three\") == 3.0\n    assert r.zadd(\"foo\", {\"bar\": 1.0}, incr=True, xx=True, ch=ch) is None\n    assert r.zadd(\"foo\", {\"three\": 1.0}, incr=True, xx=True, ch=ch) == 4.0\n\n\ndef test_zadd_with_xx_and_gt_and_ch(r: redis.Redis):\n    r.zadd(\"test\", {\"one\": 1})\n    assert r.zscore(\"test\", \"one\") == 1.0\n    assert r.zadd(\"test\", {\"one\": 4}, xx=True, gt=True, ch=True) == 1\n    assert r.zscore(\"test\", \"one\") == 4.0\n    assert r.zadd(\"test\", {\"one\": 0}, xx=True, gt=True, ch=True) == 0\n    assert r.zscore(\"test\", \"one\") == 4.0\n\n\ndef test_zadd_and_zrangebyscore(r: redis.Redis):\n    raw_command(r, \"zadd\", \"\", 0.0, \"\")\n    assert raw_command(r, \"zrangebyscore\", \"\", 0.0, 0.0, \"limit\", 0, 0) == []\n    with pytest.raises(redis.RedisError):\n        raw_command(r, \"zrangebyscore\", \"\", 0.0, 0.0, \"limit\", 0)\n    with pytest.raises(redis.RedisError):\n        raw_command(r, \"zadd\", \"t\", 0.0, \"xx\", \"\")\n"
  },
  {
    "path": "tests/fakeredis/test/test_stack/__init__.py",
    "content": ""
  },
  {
    "path": "tests/fakeredis/test/test_stack/test_bloomfilter.py",
    "content": "import pytest\nimport redis\nfrom fakeredis import _msgs as msgs\nfrom redis.commands.bf import BFInfo\n\nbloom_tests = pytest.importorskip(\"probables\")\n\n\ndef intlist(obj):\n    return [int(v) for v in obj]\n\n\ndef test_create_bf(r: redis.Redis):\n    assert r.bf().create(\"bloom\", 0.01, 1000)\n    assert r.bf().create(\"bloom_e\", 0.01, 1000, expansion=1)\n    assert r.bf().create(\"bloom_ns\", 0.01, 1000, noScale=True)\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_bf_reserve(r: redis.Redis):\n    assert r.bf().reserve(\"bloom\", 0.01, 1000)\n    assert r.bf().reserve(\"bloom_ns\", 0.01, 1000, noScale=True)\n    with pytest.raises(\n        redis.exceptions.ResponseError, match=msgs.NONSCALING_FILTERS_CANNOT_EXPAND_MSG\n    ):\n        assert r.bf().reserve(\"bloom_e\", 0.01, 1000, expansion=1, noScale=True)\n    with pytest.raises(redis.exceptions.ResponseError, match=msgs.ITEM_EXISTS_MSG):\n        assert r.bf().reserve(\"bloom\", 0.01, 1000)\n\n\ndef test_bf_add(r: redis.Redis):\n    assert r.bf().add(\"key\", \"value\") == 1\n    assert r.bf().add(\"key\", \"value\") == 0\n\n    r.set(\"key1\", \"value\")\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.bf().add(\"key1\", \"v\")\n    assert r.bf().create(\"bloom\", 0.01, 1000)\n    assert 1 == r.bf().add(\"bloom\", \"foo\")\n    assert 0 == r.bf().add(\"bloom\", \"foo\")\n    assert [0] == intlist(r.bf().madd(\"bloom\", \"foo\"))\n    assert [0, 1] == r.bf().madd(\"bloom\", \"foo\", \"bar\")\n    assert [0, 0, 1] == r.bf().madd(\"bloom\", \"foo\", \"bar\", \"baz\")\n    assert 1 == r.bf().exists(\"bloom\", \"foo\")\n    assert 0 == r.bf().exists(\"bloom\", \"noexist\")\n    assert [1, 0] == intlist(r.bf().mexists(\"bloom\", \"foo\", \"noexist\"))\n\n\ndef test_bf_madd(r: redis.Redis):\n    assert r.bf().madd(\"key\", \"v1\", \"v2\", \"v2\") == [1, 1, 0]\n    assert r.bf().madd(\"key\", \"v1\", \"v2\", \"v4\") == [0, 0, 1]\n\n    r.set(\"key1\", \"value\")\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.bf().add(\"key1\", \"v\")\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_bf_card(r: redis.Redis):\n    assert r.bf().madd(\"key\", \"v1\", \"v2\", \"v3\") == [1, 1, 1]\n    assert r.bf().card(\"key\") == 3\n    assert r.bf().card(\"key-new\") == 0\n\n    r.set(\"key1\", \"value\")\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.bf().card(\"key1\")\n    # return 0 if the key does not exist\n    assert r.bf().card(\"not_exist\") == 0\n\n    # Store a filter\n    assert r.bf().add(\"bf1\", \"item_foo\") == 1\n    assert r.bf().card(\"bf1\") == 1\n\n    # Error when key is of a type other than Bloom filter.\n    with pytest.raises(redis.ResponseError):\n        r.set(\"setKey\", \"value\")\n        r.bf().card(\"setKey\")\n\n\ndef test_bf_exists(r: redis.Redis):\n    assert r.bf().madd(\"key\", \"v1\", \"v2\", \"v3\") == [1, 1, 1]\n    assert r.bf().exists(\"key\", \"v1\") == 1\n    assert r.bf().exists(\"key\", \"v5\") == 0\n    assert r.bf().exists(\"key-new\", \"v5\") == 0\n\n    r.set(\"key1\", \"value\")\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.bf().add(\"key1\", \"v\")\n\n\ndef test_bf_mexists(r: redis.Redis):\n    assert r.bf().madd(\"key\", \"v1\", \"v2\", \"v3\") == [1, 1, 1]\n    assert r.bf().mexists(\"key\", \"v1\") == [\n        1,\n    ]\n    assert r.bf().mexists(\"key\", \"v1\", \"v5\") == [1, 0]\n    assert r.bf().mexists(\"key-new\", \"v5\") == [\n        0,\n    ]\n\n    r.set(\"key1\", \"value\")\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.bf().add(\"key1\", \"v\")\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_bf_insert(r: redis.Redis):\n    assert r.bf().create(\"key\", 0.01, 1000)\n    assert r.bf().insert(\"key\", [\"foo\"]) == [1]\n    assert r.bf().insert(\"key\", [\"foo\", \"bar\"]) == [0, 1]\n    assert r.bf().insert(\"captest\", [\"foo\"], capacity=10) == [1]\n    assert r.bf().insert(\"errtest\", [\"foo\"], error=0.01) == [1]\n    assert r.bf().exists(\"key\", \"foo\") == 1\n    assert r.bf().exists(\"key\", \"noexist\") == 0\n    assert r.bf().mexists(\"key\", \"foo\", \"noexist\") == [1, 0]\n    with pytest.raises(redis.exceptions.ResponseError, match=msgs.NOT_FOUND_MSG):\n        r.bf().insert(\"nocreate\", [1, 2, 3], noCreate=True)\n    # with pytest.raises(redis.exceptions.ResponseError, match=msgs.NONSCALING_FILTERS_CANNOT_EXPAND_MSG):\n    #     r.bf().insert(\"nocreate\", [1, 2, 3], expansion=2, noScale=True)\n    assert r.bf().create(\"bloom\", 0.01, 1000)\n    assert [1] == intlist(r.bf().insert(\"bloom\", [\"foo\"]))\n    assert [0, 1] == intlist(r.bf().insert(\"bloom\", [\"foo\", \"bar\"]))\n    assert 1 == r.bf().exists(\"bloom\", \"foo\")\n    assert 0 == r.bf().exists(\"bloom\", \"noexist\")\n    assert [1, 0] == intlist(r.bf().mexists(\"bloom\", \"foo\", \"noexist\"))\n    info = r.bf().info(\"bloom\")\n    assert 2 == info.get(\"insertedNum\")\n    assert 1000 == info.get(\"capacity\")\n    assert 1 == info.get(\"filterNum\")\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_bf_scandump_and_loadchunk(r: redis.Redis):\n    r.bf().create(\"myBloom\", \"0.0001\", \"1000\")\n\n    # Test is probabilistic and might fail. It is OK to change variables if\n    # certain to not break anything\n\n    res = 0\n    for x in range(1000):\n        r.bf().add(\"myBloom\", x)\n        assert r.bf().exists(\"myBloom\", x)\n        rv = r.bf().exists(\"myBloom\", f\"nonexist_{x}\")\n        res += rv == x\n    assert res < 5\n\n    cmds = list()\n    first = 0\n    while first is not None:\n        cur = r.bf().scandump(\"myBloom\", first)\n        if cur[0] == 0:\n            first = None\n        else:\n            first = cur[0]\n            cmds.append(cur)\n\n    # Remove the filter\n    r.bf().client.delete(\"myBloom\")\n\n    # Now, load all the commands:\n    for cmd in cmds:\n        r.bf().loadchunk(\"myBloom1\", *cmd)\n\n    for x in range(1000):\n        assert r.bf().exists(\"myBloom1\", x), f\"{x} not in filter\"\n\n\n@pytest.mark.unsupported_server_types(\"dragonfly\")\ndef test_bf_info(r: redis.Redis):\n    # Store a filter\n    r.bf().create(\"nonscaling\", \"0.0001\", \"1000\", noScale=True)\n    info: BFInfo = r.bf().info(\"nonscaling\")\n    assert info.expansionRate is None\n\n    expansion = 4\n    r.bf().create(\"expanding\", \"0.0001\", \"1000\", expansion=expansion)\n    info = r.bf().info(\"expanding\")\n    assert info.expansionRate == 4\n    assert info.capacity == 1000\n    assert info.insertedNum == 0\n"
  },
  {
    "path": "tests/fakeredis/test/test_stack/test_cms.py",
    "content": "import pytest\nimport redis\n\nfrom test import testtools\n\njson_tests = pytest.importorskip(\"probables\")\n\npytestmark = []\n\n\ndef test_cms_create(r: redis.Redis):\n    assert r.cms().initbydim(\"cmsDim\", 100, 5)\n    assert r.cms().initbyprob(\"cmsProb\", 0.01, 0.01)\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.cms().initbydim(\"cmsDim\", 1, 5)\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.cms().initbydim(\"cmsDim2\", 0, 5)\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.cms().initbydim(\"cmsDim2\", 3, 0)\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.cms().initbyprob(\"cmsProb\", 0.01, 0.1)\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.cms().initbyprob(\"cmsProb2\", 2, 0.01)\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.cms().initbyprob(\"cmsProb2\", 0.01, 0)\n\n\ndef test_cms_incrby(r: redis.Redis):\n    assert r.cms().initbydim(\"cmsDim\", 100, 5)\n    assert r.cms().initbyprob(\"cmsProb\", 0.01, 0.01)\n\n    assert r.cms().incrby(\"cmsDim\", [\"foo\"], [3]) == [3]\n    assert r.cms().incrby(\"cmsDim\", [\"foo\", \"bar\"], [4, 1]) == [7, 1]\n    assert r.cms().query(\"cmsDim\", \"foo\") == [7]\n    assert r.cms().query(\"cmsDim\", \"foo\", \"bar\") == [7, 1]\n    assert r.cms().query(\"cmsDim\", \"noexist\") == [0]\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.cms().query(\"cmsDim\")\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        r.cms().query(\"noexist\", \"foo\")\n\n    with pytest.raises(redis.exceptions.ResponseError):\n        testtools.raw_command(r, \"CMS.INCRBY\", \"cmsDim\", \"foo\", 1, \"bar\")\n\n    with pytest.raises(redis.exceptions.ResponseError, match=\"CMS: key does not exist\"):\n        r.cms().incrby(\"noexist\", [\"foo\", \"bar\"], [3, 4])\n\n    with pytest.raises(redis.exceptions.ResponseError, match=\"CMS: Cannot parse number\"):\n        r.cms().incrby(\"cmsDim\", [\"foo\", \"bar\"], [3, \"four\"])\n\n\ndef test_cms_merge(r: redis.Redis):\n    assert r.cms().initbydim(\"cmsDim\", 100, 5)\n    assert r.cms().initbydim(\"cms2\", 100, 5)\n\n    assert r.cms().incrby(\"cmsDim\", [\"foo\"], [3]) == [3]\n    assert r.cms().incrby(\"cms2\", [\"foo\", \"bar\"], [4, 1]) == [4, 1]\n    assert r.cms().merge(\"cmsDim\", 1, [\"cms2\"])\n    assert r.cms().query(\"cmsDim\", \"foo\", \"bar\") == [4, 1]\n\n    with pytest.raises(redis.exceptions.ResponseError, match=\"CMS: key does not exist\"):\n        r.cms().merge(\"noexist\", 1, [\"cms2\"])\n\n    # This shared test hard-coded one error string, but the FakeStrictRedis run can raise a different one.\n    with pytest.raises(\n        redis.exceptions.ResponseError,\n        match=r\"CMS: (wrong number of keys|Number of keys must be positive)\",\n    ):\n        r.cms().merge(\"cms2\", 0, [\"cmsDim\"])\n\n    with pytest.raises(\n        redis.exceptions.ResponseError,\n        match=\"wrong number of arguments for '.*' command\",\n    ):\n        r.cms().merge(\"cms2\", 1, [])\n\n    with pytest.raises(redis.exceptions.ResponseError, match=\"CMS: wrong number of keys/weights\"):\n        r.cms().merge(\"cmsDim\", 1, [\"cms2\", \"cms1\"], [4, 3])\n\n    with pytest.raises(redis.exceptions.ResponseError, match=\"CMS: key does not exist\"):\n        r.cms().merge(\"cmsDim\", 2, [\"cms2\", \"noexist\"], [4, 3])\n\n\ndef test_cms_info(r: redis.Redis):\n    assert r.cms().initbydim(\"A\", 1000, 5)\n    assert r.cms().initbydim(\"B\", 1000, 5)\n    assert r.cms().initbydim(\"C\", 1000, 5)\n\n    assert r.cms().incrby(\"A\", [\"foo\", \"bar\", \"baz\"], [5, 3, 9])\n    assert r.cms().incrby(\"B\", [\"foo\", \"bar\", \"baz\"], [2, 3, 1])\n    assert r.cms().query(\"A\", \"foo\", \"bar\", \"baz\") == [5, 3, 9]\n    assert r.cms().query(\"B\", \"foo\", \"bar\", \"baz\") == [2, 3, 1]\n    assert r.cms().merge(\"C\", 2, [\"A\", \"B\"])\n    assert r.cms().query(\"C\", \"foo\", \"bar\", \"baz\") == [7, 6, 10]\n\n    assert r.cms().merge(\"C\", 2, [\"A\", \"B\"], [\"1\", \"2\"])\n    assert r.cms().query(\"C\", \"foo\", \"bar\", \"baz\") == [9, 9, 11]\n\n    assert r.cms().merge(\"C\", 2, [\"A\", \"B\"], [\"2\", \"3\"])\n    assert r.cms().query(\"C\", \"foo\", \"bar\", \"baz\") == [16, 15, 21]\n    info = r.cms().info(\"A\")\n    assert info.width == 1000\n    assert info.depth == 5\n    assert info.count == 17\n\n    with pytest.raises(redis.exceptions.ResponseError, match=\"CMS: key does not exist\"):\n        r.cms().info(\"noexist\")\n\n\n@pytest.mark.xfail(reason=\"Bug in pyprobables\")\ndef test_cms_merge_fail(r: redis.Redis):\n    assert r.cms().initbydim(\"A\", 1000, 5)\n    assert r.cms().initbydim(\"B\", 1000, 5)\n    assert r.cms().initbydim(\"C\", 1000, 5)\n\n    assert r.cms().incrby(\"A\", [\"foo\", \"bar\", \"baz\"], [5, 3, 9])\n    assert r.cms().incrby(\"B\", [\"foo\", \"bar\", \"baz\"], [2, 3, 1])\n    assert r.cms().query(\"A\", \"foo\", \"bar\", \"baz\") == [5, 3, 9]\n    assert r.cms().query(\"B\", \"foo\", \"bar\", \"baz\") == [2, 3, 1]\n    assert r.cms().merge(\"C\", 2, [\"A\", \"B\"])\n    assert r.cms().query(\"C\", \"foo\", \"bar\", \"baz\") == [7, 6, 10]\n\n    assert r.cms().merge(\"C\", 2, [\"A\", \"B\"], [\"2\", \"3\"])\n    info = r.cms().info(\"C\")\n    assert info.width == 1000\n    assert info.depth == 5\n    assert info.count == 52\n"
  },
  {
    "path": "tests/fakeredis/test/test_stack/test_cuckoofilter.py",
    "content": "import pytest\nimport redis\n\ncuckoofilters_tests = pytest.importorskip(\"probables\")\n\ntopk_tests = pytest.importorskip(\"probables\")\n\npytestmark = []\npytestmark.extend(\n    [\n        pytest.mark.unsupported_server_types(\"dragonfly\"),\n    ]\n)\n\n\ndef test_cf_add_and_insert(r: redis.Redis):\n    assert r.cf().create(\"cuckoo\", 1000)\n    assert r.cf().add(\"cuckoo\", \"filter\")\n    assert not r.cf().addnx(\"cuckoo\", \"filter\")\n    assert 1 == r.cf().addnx(\"cuckoo\", \"newItem\")\n    assert [1] == r.cf().insert(\"captest\", [\"foo\"])\n    assert [1] == r.cf().insert(\"captest\", [\"foo\"], capacity=1000)\n    assert [1] == r.cf().insertnx(\"captest\", [\"bar\"])\n    assert [1] == r.cf().insertnx(\"captest\", [\"food\"], nocreate=\"1\")\n    assert [0, 0, 1] == r.cf().insertnx(\"captest\", [\"foo\", \"bar\", \"baz\"])\n    assert [0] == r.cf().insertnx(\"captest\", [\"bar\"], capacity=1000)\n    assert [1] == r.cf().insert(\"empty1\", [\"foo\"], capacity=1000)\n    assert [1] == r.cf().insertnx(\"empty2\", [\"bar\"], capacity=1000)\n    info = r.cf().info(\"captest\")\n    assert info.get(\"insertedNum\") == 5\n    assert info.get(\"deletedNum\") == 0\n    assert info.get(\"filterNum\") == 1\n\n\ndef test_create_cf(r: redis.Redis):\n    assert r.cf().create(\"cuckoo\", 1000)\n    assert r.cf().create(\"cuckoo_e\", 1000, expansion=1)\n    assert r.cf().create(\"cuckoo_bs\", 1000, bucket_size=4)\n    assert r.cf().create(\"cuckoo_mi\", 1000, max_iterations=10)\n    assert r.cms().initbydim(\"cmsDim\", 100, 5)\n    assert r.cms().initbyprob(\"cmsProb\", 0.01, 0.01)\n    assert r.topk().reserve(\"topk\", 5, 100, 5, 0.9)\n\n\ndef test_cf_exists_and_del(r: redis.Redis):\n    assert r.cf().create(\"cuckoo\", 1000)\n    assert r.cf().add(\"cuckoo\", \"filter\")\n    assert r.cf().exists(\"cuckoo\", \"filter\")\n    assert not r.cf().exists(\"cuckoo\", \"notexist\")\n    assert [1, 0] == r.cf().mexists(\"cuckoo\", \"filter\", \"notexist\")\n    assert 1 == r.cf().count(\"cuckoo\", \"filter\")\n    assert 0 == r.cf().count(\"cuckoo\", \"notexist\")\n    assert r.cf().delete(\"cuckoo\", \"filter\")\n    assert 0 == r.cf().count(\"cuckoo\", \"filter\")\n"
  },
  {
    "path": "tests/fakeredis/test/test_stack/test_tdigest.py",
    "content": "from math import inf\n\nimport pytest\nimport redis\n\ntopk_tests = pytest.importorskip(\"probables\")\npytestmark = []\npytestmark.extend(\n    [\n        pytest.mark.unsupported_server_types(\"dragonfly\"),\n    ]\n)\n\n\ndef test_tdigest_reset(r: redis.Redis):\n    assert r.tdigest().create(\"tDigest\", 10)\n    # reset on empty histogram\n    assert r.tdigest().reset(\"tDigest\")\n    # insert data-points into sketch\n    assert r.tdigest().add(\"tDigest\", list(range(10)))\n\n    assert r.tdigest().reset(\"tDigest\")\n    # assert we have 0 unmerged\n    info = r.tdigest().info(\"tDigest\")\n    assert 0 == info.get(\"unmerged_weight\")\n\n\ndef test_tdigest_merge(r: redis.Redis):\n    assert r.tdigest().create(\"to-tDigest\", 10)\n    assert r.tdigest().create(\"from-tDigest\", 10)\n    # insert data-points into sketch\n    assert r.tdigest().add(\"from-tDigest\", [1.0] * 10)\n    assert r.tdigest().add(\"to-tDigest\", [2.0] * 10)\n    # merge from-tdigest into to-tdigest\n    assert r.tdigest().merge(\"to-tDigest\", 1, \"from-tDigest\")\n    # we should now have 110 weight on to-histogram\n    info = r.tdigest().info(\"to-tDigest\")\n    assert 20 == float(info[\"merged_weight\"]) + float(info[\"unmerged_weight\"])\n    # test override\n    assert r.tdigest().create(\"from-override\", 10)\n    assert r.tdigest().create(\"from-override-2\", 10)\n    assert r.tdigest().add(\"from-override\", [3.0] * 10)\n    assert r.tdigest().add(\"from-override-2\", [4.0] * 10)\n    assert r.tdigest().merge(\n        \"to-tDigest\", 2, \"from-override\", \"from-override-2\", override=True\n    )\n    assert 3.0 == r.tdigest().min(\"to-tDigest\")\n    assert 4.0 == r.tdigest().max(\"to-tDigest\")\n\n\ndef test_tdigest_min_and_max(r: redis.Redis):\n    assert r.tdigest().create(\"tDigest\", 100)\n    # insert data-points into sketch\n    assert r.tdigest().add(\"tDigest\", [1, 2, 3])\n    # min/max\n    assert 3 == r.tdigest().max(\"tDigest\")\n    assert 1 == r.tdigest().min(\"tDigest\")\n\n\ndef test_tdigest_quantile(r: redis.Redis):\n    assert r.tdigest().create(\"tDigest\", 500)\n    # insert data-points into sketch\n    assert r.tdigest().add(\"tDigest\", list([x * 0.01 for x in range(1, 10000)]))\n    # assert min min/max have same result as quantile 0 and 1\n    res = r.tdigest().quantile(\"tDigest\", 1.0)\n    assert r.tdigest().max(\"tDigest\") == res[0]\n    res = r.tdigest().quantile(\"tDigest\", 0.0)\n    assert r.tdigest().min(\"tDigest\") == res[0]\n\n    assert 1.0 == round(r.tdigest().quantile(\"tDigest\", 0.01)[0], 2)\n    assert 99.0 == round(r.tdigest().quantile(\"tDigest\", 0.99)[0], 2)\n\n    # test multiple quantiles\n    assert r.tdigest().create(\"t-digest\", 100)\n    assert r.tdigest().add(\"t-digest\", [1, 2, 3, 4, 5])\n    assert [3.0, 5.0] == r.tdigest().quantile(\"t-digest\", 0.5, 0.8)\n\n\ndef test_tdigest_cdf(r: redis.Redis):\n    assert r.tdigest().create(\"tDigest\", 100)\n    # insert data-points into sketch\n    assert r.tdigest().add(\"tDigest\", list(range(1, 10)))\n    assert 0.1 == round(r.tdigest().cdf(\"tDigest\", 1.0)[0], 1)\n    assert 0.9 == round(r.tdigest().cdf(\"tDigest\", 9.0)[0], 1)\n    res = r.tdigest().cdf(\"tDigest\", 1.0, 9.0)\n    assert [0.1, 0.9] == [round(x, 1) for x in res]\n\n\ndef test_tdigest_trimmed_mean(r: redis.Redis):\n    assert r.tdigest().create(\"tDigest\", 100)\n    # insert data-points into sketch\n    assert r.tdigest().add(\"tDigest\", list(range(1, 10)))\n    assert 5 == r.tdigest().trimmed_mean(\"tDigest\", 0.1, 0.9)\n    assert 4.5 == r.tdigest().trimmed_mean(\"tDigest\", 0.4, 0.5)\n\n\ndef test_tdigest_rank(r: redis.Redis):\n    assert r.tdigest().create(\"t-digest\", 500)\n    assert r.tdigest().add(\"t-digest\", list(range(0, 20)))\n    assert -1 == r.tdigest().rank(\"t-digest\", -1)[0]\n    assert 0 == r.tdigest().rank(\"t-digest\", 0)[0]\n    assert 10 == r.tdigest().rank(\"t-digest\", 10)[0]\n    assert [-1, 20, 9] == r.tdigest().rank(\"t-digest\", -20, 20, 9)\n\n\ndef test_tdigest_revrank(r: redis.Redis):\n    assert r.tdigest().create(\"t-digest\", 500)\n    assert r.tdigest().add(\"t-digest\", list(range(0, 20)))\n    assert -1 == r.tdigest().revrank(\"t-digest\", 20)[0]\n    assert 19 == r.tdigest().revrank(\"t-digest\", 0)[0]\n    assert [-1, 19, 9] == r.tdigest().revrank(\"t-digest\", 21, 0, 10)\n\n\ndef test_tdigest_byrank(r: redis.Redis):\n    assert r.tdigest().create(\"t-digest\", 500)\n    assert r.tdigest().add(\"t-digest\", list(range(1, 11)))\n    assert 1 == r.tdigest().byrank(\"t-digest\", 0)[0]\n    assert 10 == r.tdigest().byrank(\"t-digest\", 9)[0]\n    assert r.tdigest().byrank(\"t-digest\", 100)[0] == inf\n    with pytest.raises(redis.ResponseError):\n        r.tdigest().byrank(\"t-digest\", -1)[0]\n\n\ndef test_tdigest_byrevrank(r: redis.Redis):\n    assert r.tdigest().create(\"t-digest\", 500)\n    assert r.tdigest().add(\"t-digest\", list(range(1, 11)))\n    assert 10 == r.tdigest().byrevrank(\"t-digest\", 0)[0]\n    assert 1 == r.tdigest().byrevrank(\"t-digest\", 9)[0]\n    assert r.tdigest().byrevrank(\"t-digest\", 100)[0] == -inf\n    with pytest.raises(redis.ResponseError):\n        r.tdigest().byrevrank(\"t-digest\", -1)[0]\n"
  },
  {
    "path": "tests/fakeredis/test/test_stack/test_topk.py",
    "content": "import pytest\nimport redis\n\ntopk_tests = pytest.importorskip(\"probables\")\n\npytestmark = []\npytestmark.extend(\n    [\n        pytest.mark.unsupported_server_types(\"dragonfly\"),\n    ]\n)\n\n\ndef test_topk_incrby(r: redis.Redis):\n    assert r.topk().reserve(\"topk\", 3, 10, 3, 1)\n    assert [None, None, None] == r.topk().incrby(\n        \"topk\", [\"bar\", \"baz\", \"42\"], [3, 6, 2]\n    )\n    assert [None, \"bar\"] == r.topk().incrby(\"topk\", [\"42\", \"xyzzy\"], [8, 4])\n    with pytest.deprecated_call():\n        assert [3, 6, 10, 4, 0] == r.topk().count(\n            \"topk\", \"bar\", \"baz\", \"42\", \"xyzzy\", 4\n        )\n\n\ndef test_topk(r: redis.Redis):\n    # test list with empty buckets\n    assert r.topk().reserve(\"topk\", 3, 50, 4, 0.9)\n    ret = r.topk().add(\n        \"topk\",\n        \"A\",\n        \"B\",\n        \"C\",\n        \"D\",\n        \"D\",\n        \"E\",\n        \"A\",\n        \"A\",\n        \"B\",\n        \"C\",\n        \"G\",\n        \"D\",\n        \"B\",\n        \"D\",\n        \"A\",\n        \"E\",\n        \"E\",\n        1,\n    )\n    assert len(ret) == 18\n\n    with pytest.deprecated_call():\n        assert r.topk().count(\"topk\", \"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\") == [\n            4,\n            3,\n            2,\n            4,\n            3,\n            0,\n            1,\n        ]\n    ret = r.topk().query(\"topk\", \"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\")\n    assert (ret == [1, 0, 0, 1, 1, 0, 0]) or (ret == [1, 1, 0, 1, 0, 0, 0])\n    # test full list\n    assert r.topk().reserve(\"topklist\", 3, 50, 3, 0.9)\n    assert r.topk().add(\n        \"topklist\",\n        \"A\",\n        \"B\",\n        \"D\",\n        \"E\",\n        \"A\",\n        \"A\",\n        \"B\",\n        \"C\",\n        \"G\",\n        \"D\",\n        \"B\",\n        \"A\",\n        \"B\",\n        \"E\",\n        \"E\",\n    )\n    with pytest.deprecated_call():\n        assert r.topk().count(\"topklist\", \"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\") == [\n            4,\n            4,\n            1,\n            2,\n            3,\n            0,\n            1,\n        ]\n    assert r.topk().list(\"topklist\") == [\"A\", \"B\", \"E\"]\n    assert r.topk().list(\"topklist\", withcount=True) == [\"A\", 4, \"B\", 4, \"E\", 3]\n    info = r.topk().info(\"topklist\")\n    assert 3 == info[\"k\"]\n    assert 50 == info[\"width\"]\n    assert 3 == info[\"depth\"]\n    assert 0.9 == round(float(info[\"decay\"]), 1)\n"
  },
  {
    "path": "tests/fakeredis/test/test_transactions.py",
    "content": "from __future__ import annotations\n\nimport fakeredis\nimport pytest\nimport redis\nimport redis.client\n\nfrom . import testtools\n\n\ndef test_multiple_successful_watch_calls(r: redis.Redis):\n    p = r.pipeline()\n    p.watch(\"bam\")\n    p.multi()\n    p.set(\"foo\", \"bar\")\n    # Check that the watched keys buffer has been emptied.\n    p.execute()\n\n    # bam is no longer being watched, so it's ok to modify\n    # it now.\n    p.watch(\"foo\")\n    r.set(\"bam\", \"boo\")\n    p.multi()\n    p.set(\"foo\", \"bats\")\n    assert p.execute() == [True]\n\n\ndef test_watch_state_is_cleared_after_abort(r: redis.Redis):\n    # redis-py's pipeline handling and connection pooling interferes with this\n    # test, so raw commands are used instead.\n    testtools.raw_command(r, \"watch\", \"foo\")\n    testtools.raw_command(r, \"multi\")\n    with pytest.raises(redis.ResponseError):\n        testtools.raw_command(r, \"mget\")  # Wrong number of arguments\n    with pytest.raises(redis.exceptions.ExecAbortError):\n        testtools.raw_command(r, \"exec\")\n\n    testtools.raw_command(\n        r, \"set\", \"foo\", \"bar\"\n    )  # Should NOT trigger the watch from earlier\n    testtools.raw_command(r, \"multi\")\n    testtools.raw_command(r, \"set\", \"abc\", \"done\")\n    testtools.raw_command(r, \"exec\")\n\n    assert r.get(\"abc\") == b\"done\"\n\n\ndef test_pipeline_transaction_shortcut(r: redis.Redis):\n    # This example taken pretty much from the redis-py documentation.\n    r.set(\"OUR-SEQUENCE-KEY\", 13)\n    calls = []\n\n    def client_side_incr(pipe):\n        calls.append((pipe,))\n        current_value = pipe.get(\"OUR-SEQUENCE-KEY\")\n        next_value = int(current_value) + 1\n\n        if len(calls) < 3:\n            # Simulate a change from another thread.\n            r.set(\"OUR-SEQUENCE-KEY\", next_value)\n\n        pipe.multi()\n        pipe.set(\"OUR-SEQUENCE-KEY\", next_value)\n\n    res = r.transaction(client_side_incr, \"OUR-SEQUENCE-KEY\")\n\n    assert res == [True]\n    assert int(r.get(\"OUR-SEQUENCE-KEY\")) == 16\n    assert len(calls) == 3\n\n\ndef test_pipeline_transaction_value_from_callable(r: redis.Redis):\n    def callback(pipe):\n        # No need to do anything here since we only want the return value\n        return \"OUR-RETURN-VALUE\"\n\n    res = r.transaction(callback, \"OUR-SEQUENCE-KEY\", value_from_callable=True)\n    assert res == \"OUR-RETURN-VALUE\"\n\n\ndef test_pipeline_empty(r: redis.Redis):\n    p = r.pipeline()\n    assert len(p) == 0\n\n\ndef test_pipeline_length(r: redis.Redis):\n    p = r.pipeline()\n    p.set(\"baz\", \"quux\").get(\"baz\")\n    assert len(p) == 2\n\n\ndef test_pipeline_no_commands(r: redis.Redis):\n    # Prior to 3.4, redis-py's execute is a nop if there are no commands\n    # queued, so it succeeds even if watched keys have been changed.\n    r.set(\"foo\", \"1\")\n    p = r.pipeline()\n    p.watch(\"foo\")\n    r.set(\"foo\", \"2\")\n    with pytest.raises(redis.WatchError):\n        p.execute()\n\n\ndef test_pipeline_failed_transaction(r: redis.Redis):\n    p = r.pipeline()\n    p.multi()\n    p.set(\"foo\", \"bar\")\n    # Deliberately induce a syntax error\n    p.execute_command(\"set\")\n    # It should be an ExecAbortError, but redis-py tries to DISCARD after the\n    # failed EXEC, which raises a ResponseError.\n    with pytest.raises(redis.ResponseError):\n        p.execute()\n    assert not r.exists(\"foo\")\n\n\ndef test_pipeline_srem_no_change(r: redis.Redis):\n    # A regression test for a case picked up by hypothesis tests.\n    p = r.pipeline()\n    p.watch(\"foo\")\n    r.srem(\"foo\", \"bar\")\n    p.multi()\n    p.set(\"foo\", \"baz\")\n    p.execute()\n    assert r.get(\"foo\") == b\"baz\"\n\n\n# The behaviour changed in redis 6.0 (see https://github.com/redis/redis/issues/6594).\n@pytest.mark.min_server(\"6.0\")\ndef test_pipeline_move(r: redis.Redis):\n    # A regression test for a case picked up by hypothesis tests.\n    r.set(\"foo\", \"bar\")\n    p = r.pipeline()\n    p.watch(\"foo\")\n    r.move(\"foo\", 1)\n    # Ensure the transaction isn't empty, which had different behaviour in\n    # older versions of redis-py.\n    p.multi()\n    p.set(\"bar\", \"baz\")\n    with pytest.raises(redis.exceptions.WatchError):\n        p.execute()\n\n\n@pytest.mark.min_server(\"6.0.6\")\ndef test_exec_bad_arguments(r: redis.Redis):\n    # Redis 6.0.6 changed the behaviour of exec so that it always fails with\n    # EXECABORT, even when it's just bad syntax.\n    with pytest.raises(redis.exceptions.ExecAbortError):\n        r.execute_command(\"exec\", \"blahblah\")\n\n\n@pytest.mark.min_server(\"6.0.6\")\ndef test_exec_bad_arguments_abort(r: redis.Redis):\n    r.execute_command(\"multi\")\n    with pytest.raises(redis.exceptions.ExecAbortError):\n        r.execute_command(\"exec\", \"blahblah\")\n    # Should have aborted the transaction, so we can run another one\n    p = r.pipeline()\n    p.multi()\n    p.set(\"bar\", \"baz\")\n    p.execute()\n    assert r.get(\"bar\") == b\"baz\"\n\n\ndef test_pipeline(r: redis.Redis):\n    # The pipeline method returns an object for\n    # issuing multiple commands in a batch.\n    p = r.pipeline()\n    p.watch(\"bam\")\n    p.multi()\n    p.set(\"foo\", \"bar\").get(\"foo\")\n    p.lpush(\"baz\", \"quux\")\n    p.lpush(\"baz\", \"quux2\").lrange(\"baz\", 0, -1)\n    res = p.execute()\n\n    # Check return values returned as list.\n    assert res == [True, b\"bar\", 1, 2, [b\"quux2\", b\"quux\"]]\n\n    # Check side effects happened as expected.\n    assert r.lrange(\"baz\", 0, -1) == [b\"quux2\", b\"quux\"]\n\n    # Check that the command buffer has been emptied.\n    assert p.execute() == []\n\n\ndef test_pipeline_ignore_errors(r: redis.Redis):\n    \"\"\"Test the pipeline ignoring errors when asked.\"\"\"\n    with r.pipeline() as p:\n        p.set(\"foo\", \"bar\")\n        p.rename(\"baz\", \"bats\")\n        with pytest.raises(redis.exceptions.ResponseError):\n            p.execute()\n        assert [] == p.execute()\n    with r.pipeline() as p:\n        p.set(\"foo\", \"bar\")\n        p.rename(\"baz\", \"bats\")\n        res = p.execute(raise_on_error=False)\n\n        assert [] == p.execute()\n\n        assert len(res) == 2\n        assert isinstance(res[1], redis.exceptions.ResponseError)\n\n\ndef test_pipeline_non_transactional(r: redis.Redis):\n    # For our simple-minded model I don't think\n    # there is any observable difference.\n    p = r.pipeline(transaction=False)\n    res = p.set(\"baz\", \"quux\").get(\"baz\").execute()\n\n    assert res == [True, b\"quux\"]\n\n\ndef test_pipeline_raises_when_watched_key_changed(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    r.rpush(\"greet\", \"hello\")\n    p = r.pipeline()\n    try:\n        p.watch(\"greet\", \"foo\")\n        nextf = bytes(p.get(\"foo\")) + b\"baz\"\n        # Simulate change happening on another thread.\n        r.rpush(\"greet\", \"world\")\n        # Begin pipelining.\n        p.multi()\n        p.set(\"foo\", nextf)\n\n        with pytest.raises(redis.WatchError):\n            p.execute()\n    finally:\n        p.reset()\n\n\ndef test_pipeline_succeeds_despite_unwatched_key_changed(r: redis.Redis):\n    # Same setup as before except for the params to the WATCH command.\n    r.set(\"foo\", \"bar\")\n    r.rpush(\"greet\", \"hello\")\n    p = r.pipeline()\n    try:\n        # Only watch one of the 2 keys.\n        p.watch(\"foo\")\n        nextf = bytes(p.get(\"foo\")) + b\"baz\"\n        # Simulate change happening on another thread.\n        r.rpush(\"greet\", \"world\")\n        p.multi()\n        p.set(\"foo\", nextf)\n        p.execute()\n\n        # Check the commands were executed.\n        assert r.get(\"foo\") == b\"barbaz\"\n    finally:\n        p.reset()\n\n\ndef test_pipeline_succeeds_when_watching_nonexistent_key(r: redis.Redis):\n    r.set(\"foo\", \"bar\")\n    r.rpush(\"greet\", \"hello\")\n    p = r.pipeline()\n    try:\n        # Also watch a nonexistent key.\n        p.watch(\"foo\", \"bam\")\n        nextf = bytes(p.get(\"foo\")) + b\"baz\"\n        # Simulate change happening on another thread.\n        r.rpush(\"greet\", \"world\")\n        p.multi()\n        p.set(\"foo\", nextf)\n        p.execute()\n\n        # Check the commands were executed.\n        assert r.get(\"foo\") == b\"barbaz\"\n    finally:\n        p.reset()\n\n\ndef test_watch_state_is_cleared_across_multiple_watches(r: redis.Redis):\n    r.set(\"foo\", \"one\")\n    r.set(\"bar\", \"baz\")\n    p = r.pipeline()\n\n    try:\n        p.watch(\"foo\")\n        # Simulate change happening on another thread.\n        r.set(\"foo\", \"three\")\n        p.multi()\n        p.set(\"foo\", \"three\")\n        with pytest.raises(redis.WatchError):\n            p.execute()\n\n        # Now watch another key.  It should be ok to change\n        # foo as we're no longer watching it.\n        p.watch(\"bar\")\n        r.set(\"foo\", \"four\")\n        p.multi()\n        p.set(\"bar\", \"five\")\n        assert p.execute() == [True]\n    finally:\n        p.reset()\n\n\n@pytest.mark.fake\ndef test_socket_cleanup_watch(fake_server):\n    r1 = fakeredis.FakeStrictRedis(server=fake_server)\n    r2 = fakeredis.FakeStrictRedis(server=fake_server)\n    pipeline = r1.pipeline(transaction=False)\n    # This needs some poking into redis-py internals to ensure that we reach\n    # FakeSocket._cleanup. We need to close the socket while there is still\n    # a watch in place, but not allow it to be garbage collected (hence we\n    # set 'sock' even though it is unused).\n    with pipeline:\n        pipeline.watch(\"test\")\n        sock = pipeline.connection._sock  # noqa: F841\n        pipeline.connection.disconnect()\n    r2.set(\"test\", \"foo\")\n\n\ndef test_get_within_pipeline(r: redis.Redis):\n    r.set(\"test\", \"foo\")\n    r.set(\"test2\", \"foo2\")\n    expected_keys = set(r.keys())\n    with r.pipeline() as p:\n        assert set(r.keys()) == expected_keys\n        p.watch(\"test\")\n        assert set(r.keys()) == expected_keys\n\n\n@pytest.mark.fake\ndef test_get_within_pipeline_w_host():\n    r = fakeredis.FakeRedis(\"localhost\")\n    r.set(\"test\", \"foo\")\n    r.set(\"test2\", \"foo2\")\n    expected_keys = set(r.keys())\n    with r.pipeline() as p:\n        assert set(r.keys()) == expected_keys\n        p.watch(\"test\")\n        assert set(r.keys()) == expected_keys\n\n\n@pytest.mark.fake\ndef test_get_within_pipeline_no_args():\n    r = fakeredis.FakeRedis()\n    r.set(\"test\", \"foo\")\n    r.set(\"test2\", \"foo2\")\n    expected_keys = set(r.keys())\n    with r.pipeline() as p:\n        assert set(r.keys()) == expected_keys\n        p.watch(\"test\")\n        assert set(r.keys()) == expected_keys\n"
  },
  {
    "path": "tests/fakeredis/test/testtools.py",
    "content": "import importlib.util\n\nimport pytest\nimport redis\nfrom packaging.version import Version\n\nREDIS_VERSION = Version(redis.__version__)\n\n\ndef key_val_dict(size=100):\n    return {f\"key:{i}\".encode(): f\"val:{i}\".encode() for i in range(size)}\n\n\ndef raw_command(r: redis.Redis, *args):\n    \"\"\"Like execute_command, but does not do command-specific response parsing\"\"\"\n    response_callbacks = r.response_callbacks\n    try:\n        r.response_callbacks = {}\n        return r.execute_command(*args)\n    finally:\n        r.response_callbacks = response_callbacks\n\n\nALLOWED_CONDITIONS = {\"eq\", \"gte\", \"lte\", \"lt\", \"gt\", \"ne\"}\n\n\ndef run_test_if_redispy_ver(condition: str, ver: str):\n    if condition not in ALLOWED_CONDITIONS:\n        raise ValueError(\n            f\"condition {condition} is not in allowed conditions ({ALLOWED_CONDITIONS})\"\n        )\n    cond = False\n    cond = cond or condition == \"eq\" and REDIS_VERSION == Version(ver)\n    cond = cond or condition == \"gte\" and REDIS_VERSION >= Version(ver)\n    cond = cond or condition == \"lte\" and REDIS_VERSION <= Version(ver)\n    cond = cond or condition == \"lt\" and REDIS_VERSION < Version(ver)\n    cond = cond or condition == \"gt\" and REDIS_VERSION > Version(ver)\n    cond = cond or condition == \"ne\" and REDIS_VERSION != Version(ver)\n    return pytest.mark.skipif(\n        not cond,\n        reason=f\"Test is not applicable to redis-py {REDIS_VERSION} ({condition}, {ver})\",\n    )\n\n\n_lua_module = importlib.util.find_spec(\"lupa\")\nrun_test_if_lupa = pytest.mark.skipif(\n    _lua_module is None, reason=\"Test is only applicable if lupa is installed\"\n)\n\nfake_only = pytest.mark.parametrize(\n    \"create_redis\",\n    [pytest.param(\"FakeStrictRedis\", marks=pytest.mark.fake)],\n    indirect=True,\n)\n"
  },
  {
    "path": "tests/integration/.dockerignore",
    "content": "Dockerfile\n*.Dockerfile\nstress_shutdown.sh\nget_sets.sh\nasync.py\ngenerate_sets.py\nvenv\n"
  },
  {
    "path": "tests/integration/.run_ioredis_valid_test.sh",
    "content": "#!/usr/bin/env bash\n\n# The following tests are not supported\n#\"should reconnect if reconnectOnError\n# supported in transaction blocks\n# rejects when monitor is disabled\n# should resend unfulfilled commands to the correct\n# should set the name before any subscribe\n# should name the connection if options\n# scanStream\n# should affect the old way\n# should support Map\n# should support object\n# should batch all commands before ready event\n# should support key prefixing for sort\n# should be sent on the connect event\n\n## Some issues that are still open need to be resolved such as\n# https://github.com/dragonflydb/dragonfly/issues/457\n# and https://github.com/dragonflydb/dragonfly/issues/458\n\n\n# The follwing tests would pass once we support script flush command:\n# does not fallback to EVAL in manual transaction\n# does not fallback to EVAL in regular\n# should reload scripts on redis restart (reconnect)\"\n\n\nTS_NODE_TRANSPILE_ONLY=true NODE_ENV=test mocha \\\n\"test/helpers/*.ts\" \"test/unit/**/*.ts\" \"test/functional/**/*.ts\" \\\n-g \"should reload scripts on redis restart|should reconnect if reconnectOnError|should be supported in transaction blocks|rejects when monitor is disabled|should resend unfulfilled commands to the correct|should set the name before any subscribe|should name the connection if options|scanStream|does not fallback to EVAL|should try to use EVALSHA and fallback to EVAL|should use evalsha when script|should affect the old way|should support Map|should support object|should batch all commands before ready event|should support key prefixing for sort|should be sent on the connect event|spub|ssub|should support parallel script execution|works for moved\" \\\n--invert\n"
  },
  {
    "path": "tests/integration/async.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nThis is the script that helped to reproduce https://github.com/dragonflydb/dragonfly/issues/150\nThe outcome - stalled code with all its connections deadlocked.\nReproduced only with dragonfly in release mode on multi-core machine.\n\"\"\"\n\nimport asyncio\nimport aioredis\n\nfrom loguru import logger as log\nimport sys\nimport random\n\nconnection_pool = aioredis.ConnectionPool(\n    host=\"localhost\", port=6379, db=1, decode_responses=True, max_connections=16\n)\n\n\nkey_index = 1\n\n\nasync def post_to_redis(sem, db_name, index):\n    global key_index\n    async with sem:\n        results = None\n        try:\n            redis_client = aioredis.Redis(connection_pool=connection_pool)\n            async with redis_client.pipeline(transaction=True) as pipe:\n                for i in range(1, 15):\n                    pipe.hsetnx(name=f\"key_{key_index}\", key=\"name\", value=\"bla\")\n                    key_index += 1\n                # log.info(f\"after first half {key_index}\")\n                for i in range(1, 15):\n                    pipe.hsetnx(name=f\"bla_{key_index}\", key=\"name2\", value=\"bla\")\n                    key_index += 1\n                assert len(pipe.command_stack) > 0\n                log.info(f\"before pipe.execute {key_index}\")\n                results = await pipe.execute()\n                log.info(f\"after pipe.execute {key_index}\")\n        finally:\n            # log.info(f\"before close {index}\")\n            await redis_client.aclose()\n            # log.info(f\"after close {index} {len(results)}\")\n\n\nasync def do_concurrent(db_name):\n    tasks = []\n    sem = asyncio.Semaphore(10)\n    for i in range(1, 3000):\n        tasks.append(post_to_redis(sem, db_name, i))\n    res = await asyncio.gather(*tasks)\n\n\nif __name__ == \"__main__\":\n    log.remove()\n    log.add(sys.stdout, enqueue=True, level=\"INFO\")\n    loop = asyncio.get_event_loop()\n    loop.run_until_complete(do_concurrent(\"my_db\"))\n"
  },
  {
    "path": "tests/integration/gen_sets.sh",
    "content": "#!/bin/bash\n\nmemtier_benchmark -p 6379 --command \"sadd __key__ __data__\"   -n 20 --threads=4 \\\n    -c 10 --command-key-pattern=R --distinct-client-seed -c 30 --data-size=64 \\\n    --key-prefix=\"key:\"  --hide-histogram --random-data --key-maximum=10000\n\n"
  },
  {
    "path": "tests/integration/generate_sets.py",
    "content": "#!/usr/bin/env python3\n\nimport argparse\nimport random\nimport string\nimport redis as rclient\nimport uuid\nimport time\n\n\ndef fill_set(args, redis: rclient.Redis):\n    for j in range(args.num):\n        token = uuid.uuid1().hex\n        # print(token)\n        key = f\"USER_OTP:{token}\"\n        arr = []\n        for i in range(30):\n            otp = \"\".join(random.choices(string.ascii_uppercase + string.digits, k=12))\n            arr.append(otp)\n        redis.execute_command(\"sadd\", key, *arr)\n\n\ndef fill_hset(args, redis):\n    for j in range(args.num):\n        token = uuid.uuid1().hex\n        key = f\"USER_INFO:{token}\"\n        phone = f\"555-999-{j}\"\n        user_id = \"user\" * 5 + f\"-{j}\"\n        redis.hset(key, \"phone\", phone)\n        redis.hset(key, \"user_id\", user_id)\n        redis.hset(key, \"login_time\", time.time())\n\n\ndef main():\n    parser = argparse.ArgumentParser(description=\"fill hset entities\")\n    parser.add_argument(\"-p\", type=int, help=\"redis port\", dest=\"port\", default=6380)\n    parser.add_argument(\"-n\", type=int, help=\"number of keys\", dest=\"num\", default=10000)\n    parser.add_argument(\n        \"--type\", type=str, choices=[\"hset\", \"set\"], help=\"set type\", default=\"hset\"\n    )\n\n    args = parser.parse_args()\n    redis = rclient.Redis(host=\"localhost\", port=args.port, db=0)\n    if args.type == \"hset\":\n        fill_hset(args, redis)\n    elif args.type == \"set\":\n        fill_set(args, redis)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tests/integration/ioredis.Dockerfile",
    "content": "# syntax=docker/dockerfile:1\n\nFROM node:18.7.0\nENV NODE_ENV=development\nENV RUN_IN_DOCKER=1\n\nWORKDIR /app\n\n# Git\nRUN apt update -y && apt install -y git\n\n# The latest version from io-redis contain changes that we need to have\n# to successfully run the tests\nRUN git clone https://github.com/luin/ioredis\n\nWORKDIR /app/ioredis\n\nRUN npm install\n\n# Script to run the tests that curretly pass successfully.\n# Note that in DF we still don't have support for cluster and we\n# want to skip tests such as elasticache, also we have some issues that\n# need to be resolved such as\n# https://github.com/dragonflydb/dragonfly/issues/457\n# and https://github.com/dragonflydb/dragonfly/issues/458\nADD .run_ioredis_valid_test.sh run_tests.sh\n\nENTRYPOINT [ \"npm\", \"run\", \"env\", \"--\", \"TS_NODE_TRANSPILE_ONLY=true\", \"NODE_ENV=test\" ]\n"
  },
  {
    "path": "tests/integration/jedis.Dockerfile",
    "content": "# syntax=docker/dockerfile:1\n\nFROM maven:3.8.6-jdk-11\nENV NODE_ENV=development\n\nWORKDIR /app\n# Clone jedis dragonfly fork\nRUN git clone -b dragonfly https://github.com/dragonflydb/jedis.git\n\nWORKDIR /app/jedis\n\n# Build the client and tests\nRUN mvn test -DskipTests \n\n# Run selected tests\nCMD mvn surefire:test -Dtest=\"AllKindOfValuesCommandsTest,BitCommandsTest,ControlCommandsTest,ControlCommandsTest,HashesCommandsTest,ListCommandsTest,ScriptingCommandsTest,ScriptingCommandsTest,SetCommandsTest,SetCommandsTest,SetCommandsTest,TransactionCommandsTest,ClientCommandsTest,PublishSubscribeCommandsTest,SortedSetCommandsTest,SortingCommandsTest,StreamsCommandsTest\" \n\n"
  },
  {
    "path": "tests/integration/node-redis.Dockerfile",
    "content": "# syntax=docker/dockerfile:1\n\nFROM node:18.7.0\nENV NODE_ENV=development\n\nWORKDIR /app\n# Clone node-redis dragonfly fork\nRUN git clone -b dragonfly https://github.com/dragonflydb/node-redis.git\n\nWORKDIR /app/node-redis\n\nRUN npm install && npm run build:tests-tools\n\nCMD npm run test -w ./packages/client -- --redis-version=2.8\n"
  },
  {
    "path": "tests/integration/pascaldekloe.Dockerfile",
    "content": "FROM golang:1.20\n\nRUN git clone https://github.com/pascaldekloe/redis.git\nWORKDIR redis\n\nENV TEST_REDIS_ADDR=localhost\n\nCMD [\"go\", \"test\", \"-v\"]\n"
  },
  {
    "path": "tests/integration/relay.Dockerfile",
    "content": "# to build the test\n# docker build --pull -t relay-test -f ./relay.Dockerfile .\n# to run the test, start dragonfly locally with port 6379\n# then\n# docker run --network=host -t relay-test\n\nFROM linuxmintd/mint21.2-amd64\n\nARG DEBIAN_FRONTEND=noninteractive\n\nRUN apt-get update\n\nRUN add-apt-repository -y ppa:ondrej/php\n\nRUN apt-get install -y \\\n  curl \\\n  php-dev\n\n# Install Relay dependencies\nRUN apt-get install -y \\\n  php-msgpack \\\n  php-igbinary\n\nARG RELAY=v0.6.8\n\n# Download Relay\nRUN PHP=$(php -r 'echo substr(PHP_VERSION, 0, 3);') \\\n  && curl -L \"https://builds.r2.relay.so/$RELAY/relay-$RELAY-php$PHP-debian-x86-64+libssl3.tar.gz\" | tar xz --strip-components=1 -C /tmp\n\n# Copy relay.{so,ini}\nRUN cp \"/tmp/relay.ini\" $(php-config --ini-dir)/30-relay.ini \\\n  && cp \"/tmp/relay-pkg.so\" $(php-config --extension-dir)/relay.so\n\n# Inject UUID\nRUN sed -i \"s/00000000-0000-0000-0000-000000000000/$(cat /proc/sys/kernel/random/uuid)/\" $(php-config --extension-dir)/relay.so\n\n# needed by the Relay benchmark\nRUN apt-get install -y composer php-curl\n\n# checkout relay benchmark\nRUN git clone https://github.com/cachewerk/relay.git\n\nWORKDIR relay\nRUN composer install\n\nWORKDIR benchmarks\n\nCMD ./run --filter '^(Relay)'\n"
  },
  {
    "path": "tests/integration/run_ioredis_on_docker.sh",
    "content": "#!/usr/bin/env bash\n# Running this with --build would build the image as well\nif [ \"$1\" = \"--build\" ]; then\n    docker build -t ioredis-test -f ./ioredis.Dockerfile . || {\n        echo \"failed to build io redis image\"\n        exit 1\n    }\nfi\n\n# run the tests\necho \"running ioredis tests\"\ndocker run --rm -i --network=host ioredis-test ./run_tests.sh\nif [ $? -ne 0 ];then\n\techo \"some tests failed - please look at the output from this run\"\n\texit 1\nelse\n\techo \"finish runing tests successfully\"\n\texit 0\nfi\n"
  },
  {
    "path": "tests/integration/stress_shutdown.sh",
    "content": "#!/bin/bash\n\nwhile true; do\n./dragonfly  --vmodule=accept_server=1,listener_interface=1 --logbuflevel=-1 &\nDRAGON_PID=$!\necho \"dragonfly pid $DRAGON_PID\"\n\nsleep 0.5\n\nmemtier_benchmark -p 6379 --ratio 1:0  -n 100000 --threads=2 --expiry-range=15-25  --distinct-client-seed \\\n                  --hide-histogram 2> /dev/null > /dev/null &\nMEMT_ID=$!\n\necho \"memtier pid $MEMT_ID\"\necho \"Running..............\"\nsleep 5\necho \"killing dragonfly\"\n\nkill $DRAGON_PID\nwait $DRAGON_PID\n\ndone"
  },
  {
    "path": "tests/pytest.ini",
    "content": "[pytest]\nlog_format = [%(asctime)s.%(msecs)03d %(levelname)s] %(message)s\nlog_cli_format = [%(asctime)s.%(msecs)03d %(levelname)s] %(message)s\nlog_date_format = %Y-%m-%d %H:%M:%S\nlog_file_level=INFO\nlog_cli = true\nasyncio_mode=auto\naddopts = -ra --emoji -m \"not large\"\nmarkers =\n# Tests that should only run on release builds and take significant amount of time to run.\n# For example stress tests found in replication.\n# `opt_only` runs only on regression-test and release workflow (skipped for ci)\n  opt_only: marks tests that are only reasonable to run against an opt-built Dragonfly\n# Usually tests that are known to fail for iouring and we skip them on epoll workflows\n  exclude_epoll: marks tests that should not run on epoll socket\n# Tests that should only run in debug mode because release builds are fast enough\n# for their assertions to hold. They never run on release build.\n  debug_only: mark tests that should run only in debug mode\n# Heavy tests that require large runners and significant resources to complete.\n# Run only in heavy-tests workflow on CI-LARGE runners.\n  large: marks tests as large/heavy (deselect with '-m \"not large\"')\nfilterwarnings =\n    ignore::DeprecationWarning\n"
  },
  {
    "path": "tools/balls_bins.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"Simulate throwing balls into bins.\"\"\"\n\nimport numpy as np\nimport argparse\nimport matplotlib.pyplot as plt\n\n\ndef simulate_balls_into_bins(balls: int, bins: int, threshold: int, exact, trials=10000):\n    \"\"\"Simulate throwing M balls into N bins for a given number of trials.\"\"\"\n    counts = np.zeros(bins, dtype=int)\n    success = 0\n    exact_success = 0\n    deltas = []\n\n    for _ in range(trials):\n        # Reset counts for each trial\n        counts.fill(0)\n\n        # Throw M balls into the bins\n        bins_seq = np.random.randint(0, bins, balls)\n        unique, counts_bins = np.unique(bins_seq, return_counts=True)\n        counts[unique] += counts_bins\n        deltas.append(counts.max() - counts.min())\n        # Check if any bin has K or more balls\n        if np.any(counts >= threshold):\n            success += 1\n        if exact is not None:\n            if np.any(counts == exact):\n                exact_success += 1\n\n    probability = success / trials\n    return deltas, probability, exact_success / trials\n\n\ndef main():\n    parser = argparse.ArgumentParser(description=\"Simulate throwing balls into bins.\")\n    parser.add_argument(\"--balls\", type=int, default=30, help=\"Number of balls to throw.\")\n    parser.add_argument(\"--bins\", type=int, default=3, help=\"Number of bins.\")\n    parser.add_argument(\n        \"--high-threshold\",\n        type=int,\n        default=15,\n        help=\"Minimum number of balls for the success condition\",\n    )\n    parser.add_argument(\n        \"--exact-num\", type=int, help=\"Exact number of balls for the success condition.\"\n    )\n    parser.add_argument(\n        \"--trials\", type=int, default=10000, help=\"Number of trials. Default is 10,000.\"\n    )\n\n    args = parser.parse_args()\n\n    deltas, atleast_p, exact_p = simulate_balls_into_bins(\n        args.balls, args.bins, args.high_threshold, args.exact_num, args.trials\n    )\n\n    print(f\"Probability that at least one bin has {args.high_threshold} or more balls: {atleast_p}\")\n    if args.exact_num is not None:\n        print(f\"Probability that at least one bin has {args.exact_num} balls: {exact_p}\")\n\n    print(\n        f\"Histogram of the difference between the most and least populated bins for {args.trials} trials\"\n    )\n    plt.hist(deltas, bins=30, color=\"steelblue\", edgecolor=\"none\")\n    plt.show()\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tools/benchmark/k8s-benchmark-job.yaml",
    "content": "---\napiVersion: batch/v1\nkind: Job\nmetadata:\n  name: memtier-benchmark\nspec:\n  backoffLimit: 0\n  template:\n    spec:\n      containers:\n        - name: memtier\n          image: redislabs/memtier_benchmark:latest\n          args:\n            - memtier_benchmark --pipeline=30 --key-maximum=100000 -c 10 -t 2 --test-time=600 --reconnect-interval=10000 --distinct-client-seed --hide-histogram -s dragonfly-sample\n          command:\n            - sh # This is important! without it memtier cannot DIG the dragonfly SVC domain\n            - -c\n          resources:\n            requests:\n              cpu: \"2\"\n              memory: \"500Mi\"\n            limits:\n              cpu: \"2\"\n              memory: \"500Mi\"\n      restartPolicy: Never\n"
  },
  {
    "path": "tools/benchmark/post_run_checks.py",
    "content": "#!/usr/bin/env python3\nimport redis\nimport time\n\n\ndef main():\n    max_unaccounted = 200 * 1024 * 1024  # 200mb\n\n    client = redis.Redis(decode_responses=True)\n    info = client.info(\"server\")\n    # Check version upgrade finsihed from last released version to last weekly docker build\n    assert info[\"dragonfly_version\"] == \"df-HEAD-HASH-NOTFOUND\"\n\n    info = client.info(\"memory\")\n    print(f'Used memory {info[\"used_memory\"]}, rss {info[\"used_memory_rss\"]}')\n    assert info[\"used_memory_rss\"] - info[\"used_memory\"] < max_unaccounted\n\n    info = client.info(\"replication\")\n    assert info[\"role\"] == \"master\"\n    replication_state = info[\"slave0\"]\n    assert replication_state[\"state\"] == \"online\"\n\n    def is_zero_lag(replication_state):\n        return replication_state[\"lag\"] == 0\n\n    # Wait for 10 seconds for lag to be zero\n    for _ in range(10):\n        if is_zero_lag(replication_state):\n            break\n        time.sleep(1)\n        replication_state = client.info(\"replication\")[\"slave0\"]\n\n    if replication_state[\"lag\"] != 0:\n        print(f\"Lag is bad, expected 0, got {replication_state['lag']}\")\n        info = client.info(\"all\")\n        print(f\"Info all output: {info}\")\n        assert False\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tools/cache_logs_player.py",
    "content": "#!/usr/bin/env python3\nimport argparse\nfrom datetime import datetime\nimport aioredis\nimport asyncio\nfrom aiocsv import AsyncReader\nimport aiofiles\n\n'''\nTo install: pip install -r requirements.txt\n'''\n\n\nclass Command:\n    args = None\n    sync_id = 0 # Commands with the same sync_id will be executed synchrnously\n\nclass TwitterCacheTraceParser:\n    \"\"\"\n    https://github.com/twitter/cache-trace\n    \"\"\"\n    def parse(self, csv) -> Command:\n        operation = csv[5]\n        key = csv[1] + \"a\"\n        value_size = int(csv[3])\n        synthetic_value = \"\".zfill(value_size)\n\n        client_id = csv[4]\n        ttl = csv[6]\n\n        cmd = Command()\n        cmd.sync_id = client_id\n\n        if operation == \"get\":\n            cmd.args = [\"GET\", key]\n        elif operation == 'gets':\n            cmd.args = [\"GET\", key]\n        elif operation == 'set':\n            cmd.args = [\"SET\", key, synthetic_value]\n        elif operation == 'add':\n            cmd.args = [\"SET\", key, synthetic_value]\n        elif operation == 'replace':\n            cmd.args = [\"SET\", key, synthetic_value]\n        elif operation == 'cas':\n            cmd.args = [\"SET\", key, synthetic_value]\n        elif operation == 'append':\n            cmd.args = [\"APPEND\", key, synthetic_value]\n        elif operation == 'prepend':\n            cmd.args = [\"SET\", key, synthetic_value]\n        elif operation == 'delete':\n            cmd.args = [\"DEL\", key]\n        elif operation == 'incr':\n            cmd.args = [\"INCR\", key]\n        elif operation == 'decr':\n            cmd.args = [\"DECR\", key]\n\n        return cmd\n\nclass AsyncWorker:\n    QUEUE_SIZE = 100000\n\n    def __init__(self, redis_client) -> None:\n        self.queue = asyncio.Queue(self.QUEUE_SIZE)\n        self.redis_client = redis_client\n        self.working = False\n\n    async def put(self, batch: list) -> None:\n        await self.queue.put(batch)\n\n    async def work(self) -> None:\n        self.working = True\n        while self.working or not self.queue.empty() :\n            batch = await self.queue.get()\n            await self.execute(batch)\n\n    async def execute(self, batch) -> None:\n        async with self.redis_client.pipeline(transaction=False) as pipe:\n            for cmd in batch:\n                pipe.execute_command(*cmd.args)\n            await pipe.execute()\n\n    def start(self) -> asyncio.Task:\n        return asyncio.create_task(self.work())\n\n    def stop(self) -> None:\n        self.working = False\n\nclass AsyncWorkerPool:\n    \"\"\"\n    Mangaes worker pool to send commands in parallel\n    Maintains synchronous order for commands with the same sync_id\n    \"\"\"\n    def __init__(self, redis_client, num_workers) -> None:\n        self.redis_client = redis_client\n        self.num_workers = num_workers\n        self.workers = []\n        self.tasks = []\n        self.sync_id_to_worker = {}\n        self.next_worker_index = -1\n\n    def allocate(self, sync_id) -> AsyncWorker:\n        if not sync_id in self.sync_id_to_worker:\n            self.next_worker_index = (self.next_worker_index + 1) % self.num_workers\n\n            if len(self.workers) <= self.next_worker_index:\n                assert len(self.workers) == self.next_worker_index\n                self.workers.append(AsyncWorker(self.redis_client))\n                self.tasks.append(self.workers[self.next_worker_index].start())\n\n            self.sync_id_to_worker[sync_id] = self.workers[self.next_worker_index]\n\n        return self.sync_id_to_worker[sync_id]\n\n    async def put(self, batch: list, sync_id: int) -> None:\n        worker = self.allocate(sync_id)\n        await worker.put(batch)\n\n    async def stop(self):\n        for worker in self.workers:\n            worker.stop()\n        await asyncio.gather(*self.tasks)\n\n\nclass AsyncPlayer:\n    READ_BATCH_SIZE = 10 * 1000 * 1000\n\n    def __init__(self, redis_uri, num_workers) -> None:\n        self.redis_uri = redis_uri\n        self.redis_client = aioredis.from_url(f\"redis://{self.redis_uri}\", encoding=\"utf-8\", decode_responses=True)\n        self.worker_pool = AsyncWorkerPool(self.redis_client, 100)\n\n        self.batch_by_sync_id = {}\n\n    async def dispatch_batches(self):\n        for sync_id in self.batch_by_sync_id:\n            await self.worker_pool.put(self.batch_by_sync_id[sync_id], sync_id)\n        self.batch_by_sync_id.clear()\n\n    async def read_and_dispatch(self, csv_file, parser):\n        print(f\"dispatching from {csv_file}\")\n\n        line_count = 0\n\n        async with aiofiles.open(csv_file, mode=\"r\", encoding=\"utf-8\", newline=\"\") as afp:\n            async for row in AsyncReader(afp):\n                cmd = parser.parse(row)\n                if not self.batch_by_sync_id.get(cmd.sync_id):\n                    self.batch_by_sync_id[cmd.sync_id] = []\n                batch = self.batch_by_sync_id[cmd.sync_id]\n                batch.append(cmd)\n                line_count = line_count + 1\n                if (line_count >= self.READ_BATCH_SIZE):\n                    await self.dispatch_batches()\n                    line_count = 0\n            # handle the remaining lines\n            await self.dispatch_batches()\n\n    async def print_stats(self):\n        info = await self.redis_client.execute_command(\"info\", \"stats\")\n        print(f\"{datetime.now()}: {info}\")\n\n    async def report_stats(self):\n        while True:\n            self.print_stats()\n\n    async def report_stats(self):\n        while True:\n            await self.print_stats()\n            await asyncio.sleep(10)\n\n    async def play(self, csv_file, parser) -> None:\n        print(f\"pinging {self.redis_uri} successful?\")\n        print(await self.redis_client.ping())\n\n        read_dispatch_task = asyncio.create_task(self.read_and_dispatch(csv_file, parser))\n        stats_task = asyncio.create_task(self.report_stats())\n\n        await read_dispatch_task\n        print(f\"finished reading {csv_file}\")\n\n        await self.worker_pool.stop()\n        stats_task.cancel()\n        print(\"all done\")\n        await self.print_stats()\n\ndef main():\n    parser = argparse.ArgumentParser(description='Cache Logs Player')\n    parser.add_argument('-u', '--uri', type=str, default='localhost:6379', help='Redis server URI')\n    parser.add_argument('-f', '--csv_file', type=str, default='/home/ari/Downloads/cluster017.csv', help='Redis server URI')\n    parser.add_argument('--num_workers', type=int, default=100, help='Maximum number of workers sending commands in parllel')\n\n    args = parser.parse_args()\n\n    player = AsyncPlayer(redis_uri=args.uri, num_workers=args.num_workers)\n    asyncio.run(player.play(args.csv_file, TwitterCacheTraceParser()))\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tools/cache_testing.py",
    "content": "#!/usr/bin/env python\n\nimport redis\nimport aioredis\nimport asyncio\nimport argparse\nimport numpy as np\n\n'''\nRun Cache Testing.\nThis tool performs cache testing for Dragonfly\nby calling the `incrby` function on a constrained set\nof items, as defined by the user. Additionally, it\ndistributes the frequency of `incrby` calls for each\nitem based on a Zipfian distribution (with alpha values\nbetween 0 and 1 being representative of real-life cache\nload scenarios)\n'''\n\n\ndef rand_zipf_generator(alpha: float, upper: int, batch: int):\n    \"\"\"\n    n: The upper bound of the values to generate a zipfian distribution over\n    (n = 30 would generate a distribution of given alpha from values 1 to 30)\n    alpha: The alpha parameter to be used while creating the Zipfian distribution\n    num_samples: The total number of samples to generate over the Zipfian distribution\n    This is a generator that yields up to count values using a generator.\n    \"\"\"\n\n    # Calculate Zeta values from 1 to n:\n    tmp = np.power(np.arange(1, upper+1), -alpha)\n    zeta = np.r_[0.0, np.cumsum(tmp)]\n\n    # Store the translation map:\n    distMap = [x / zeta[-1] for x in zeta]\n\n    while True:\n        # Generate an array of uniform 0-1 pseudo-random values:\n        u = np.random.random(batch)\n\n        # bisect them with distMap\n        v = np.searchsorted(distMap, u)\n\n        samples = [t-1 for t in v]\n        yield samples\n\n\ndef update_stats(hits, misses, value_index, total_count):\n    \"\"\"\n    A void function that uses terminal control sequences\n    to update hit/miss ratio stats for the user\n    while the testing tool runs.\n    \"\"\"\n    percent_complete = (value_index + 1) / total_count\n\n    # Use the terminal control sequence to move the cursor to the beginning of the line\n    print(\"\\r\", end=\"\")\n\n    # Print the loading bar and current hit rate\n    print(\"[{}{}] {:.0f}%, current hit rate: {:.6f}%\".format(\"#\" * int(percent_complete * 20), \" \" *\n          int(20 - percent_complete * 20), percent_complete * 100, (hits / (hits + misses)) * 100), end=\"\")\n\n\nasync def run_single_conn(redis_client, keys_gen, args) -> None:\n    misses = 0\n    hits = 0\n    val = 'x' * args.length\n    items_sent = 0\n    last_stat = 0\n    for keys in keys_gen:\n        if len(keys) == 1:\n            result = await redis_client.set(str(keys[0]), val, nx=True)\n            responses = [result]\n        else:\n            p = redis_client.pipeline(transaction=False)\n            for key in keys:\n                p.set(str(key), val, nx=True)\n            responses = await p.execute()\n\n        for resp in responses:\n            if resp:\n                misses += 1\n            else:\n                hits += 1\n        items_sent += len(keys)\n        if items_sent // 100 != last_stat:\n            last_stat = items_sent // 100\n            update_stats(hits, misses, items_sent, args.count)\n        if items_sent >= args.count:\n            break\n    print()\n\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser(\n        description='Cache Benchmark', formatter_class=argparse.ArgumentDefaultsHelpFormatter)\n    parser.add_argument('-c', '--count', type=int, default=100000,\n                        help='total number of operations')\n    parser.add_argument('-u', '--uri', type=str,\n                        default='localhost:6379', help='Redis server URI')\n    parser.add_argument('-a', '--alpha', type=float, default=1.0,\n                        help='alpha value being used for the Zipf distribution')\n    parser.add_argument('--upper_bound', type=int, default=1000,\n                        help='the number of values to be used in the distribution')\n    parser.add_argument('-d', '--length', type=int, default=10,\n                        help='the length of the values to be used in the distribution')\n    parser.add_argument('-p', '--pipeline', type=int,\n                        default=1, help='pipeline size')\n    parser.add_argument('-t', '--test', action='store_true')\n\n    args = parser.parse_args()\n    if args.test:\n        for idx, items in enumerate(rand_zipf_generator(args.alpha, args.upper_bound, 1)):\n            assert len(items) == 1\n            print(items[0])\n            if idx == args.count:\n                break\n        exit(0)\n\n    r = aioredis.from_url(\n        f\"redis://{args.uri}\", encoding=\"utf-8\", decode_responses=True)\n\n    distribution_keys_generator = rand_zipf_generator(\n        args.alpha, args.upper_bound, args.pipeline)\n\n    asyncio.run(run_single_conn(r, distribution_keys_generator, args))\n"
  },
  {
    "path": "tools/cluster_mgr.py",
    "content": "#!/usr/bin/env python3\n\nimport argparse\nfrom argparse import RawTextHelpFormatter\nimport json\nimport math\nfrom typing import Iterable, List\nimport redis\nimport subprocess\nimport time\n\n\"\"\"\nTo install: pip install -r requirements.txt\n\"\"\"\n\n\ndef die_with_err(err):\n    print(\"!!!\", err)\n    exit(-1)\n\n\nclass Node:\n    def __init__(self, host, port):\n        self.id = \"\"\n        self.host = host\n        self.port = port\n\n    def update_id(node):\n        node.id = send_command(node, [\"cluster\", \"myid\"])\n        print(f\"- ID {node.id}\")\n\n    def __repr__(self):\n        return f\"{self.host}:{self.port}/{self.id}\"\n\n    def to_dict(self):\n        return {\"id\": self.id, \"ip\": self.host, \"port\": self.port}\n\n\nclass Master(Node):\n    def __init__(self, host, port):\n        Node.__init__(self, host, port)\n        self.replicas = []\n\n\ndef start_node(node, dragonfly_bin, threads):\n    f = open(f\"/tmp/dfly.cluster.node.{node.port}.log\", \"w\")\n    print(f\"- Log file for node {node.port}: {f.name}\")\n    subprocess.Popen(\n        [\n            f\"{dragonfly_bin}\",\n            f\"--port={node.port}\",\n            \"--cluster_mode=yes\",\n            f\"--proactor_threads={threads}\",\n            \"--dbfilename=\",\n            f\"--logtostderr\",\n            \"--proactor_affinity_mode=off\",\n            \"--omit_basic_usage\",\n        ],\n        stderr=f,\n    )\n\n\ndef send_command(node, command, print_errors=True):\n    client = redis.Redis(decode_responses=True, host=node.host, port=node.port)\n\n    for i in range(0, 5):\n        try:\n            result = client.execute_command(*command)\n            return result\n        except Exception as e:\n            if print_errors:\n                print(e)\n            time.sleep(0.1 * i)\n        finally:\n            client.close()\n\n    if print_errors:\n        print(f\"Unable to run command {command} against {node.host}:{node.port} after 5 attempts!\")\n\n    return Exception()\n\n\nclass SlotRange:\n    def __init__(self, start, end):\n        assert start <= end\n        self.start = start\n        self.end = end\n\n    def to_dict(self):\n        return {\"start\": self.start, \"end\": self.end}\n\n    @classmethod\n    def from_dict(cls, d):\n        return cls(d[\"start\"], d[\"end\"])\n\n    def __repr__(self):\n        return f\"({self.start}-{self.end})\"\n\n    def merge(self, other: \"SlotRange\"):\n        if self.end + 1 == other.start:\n            self.end = other.end\n            return True\n        elif other.end + 1 == self.start:\n            self.start = other.start\n            return True\n        return False\n\n    def contains(self, slot_id):\n        return self.start <= slot_id <= self.end\n\n    def remove(self, slot_id):\n        assert self.contains(slot_id)\n\n        if self.start < self.end:\n            if slot_id == self.start:\n                return None, SlotRange(self.start + 1, self.end)\n            elif slot_id == self.end:\n                return SlotRange(self.start, self.end - 1), None\n            elif self.start < slot_id < self.end:\n                return SlotRange(self.start, slot_id - 1), SlotRange(slot_id + 1, self.end)\n        return None, None\n\n\n# Custom JSON encoder to handle SlotRange objects\nclass ClusterConfigEncoder(json.JSONEncoder):\n    def default(self, obj):\n        if isinstance(obj, SlotRange) or isinstance(obj, Node):\n            return obj.to_dict()\n        return super().default(obj)\n\n\ndef build_config_from_list(masters: List[Master]):\n    total_slots = 16384\n    slots_per_node = math.floor(total_slots / len(masters))\n\n    config = []\n    for i, master in enumerate(masters):\n        slot_range = SlotRange(i * slots_per_node, (i + 1) * slots_per_node - 1)\n        c = {\n            \"slot_ranges\": [slot_range],\n            \"master\": master,\n            \"replicas\": master.replicas,\n        }\n        config.append(c)\n\n    # Adjust the last slot range to include any remaining slots\n    config[-1][\"slot_ranges\"][-1].end += total_slots % len(masters)\n    return config\n\n\ndef get_nodes_from_config(config):\n    nodes = []\n    for shard in config:\n        nodes.append(shard[\"master\"])\n        for replica in shard[\"replicas\"]:\n            nodes.append(replica)\n\n    for node in nodes:\n        node.update_id()\n    return nodes\n\n\ndef push_config(config):\n    def push_to_node(node, config):\n        # Use the custom encoder to convert SlotRange objects during serialization\n        config_str = json.dumps(config, indent=2, cls=ClusterConfigEncoder)\n        response = send_command(node, [\"dflycluster\", \"config\", config_str])\n        print(f\"- Push to {node.port}: {response}\")\n\n    for node in get_nodes_from_config(config):\n        push_to_node(node, config)\n\n\ndef create_locally(args):\n    print(f\"Setting up a Dragonfly cluster:\")\n    print(f\"- Master nodes: {args.num_masters}\")\n    print(f\"- Ports: {args.first_port}...{args.first_port + args.num_masters - 1}\")\n    print(f\"- Replicas for each master: {args.replicas_per_master}\")\n    print()\n\n    next_port = args.first_port\n    masters = []\n    for i in range(args.num_masters):\n        master = Master(\"127.0.0.1\", next_port)\n        next_port += 1\n        for j in range(args.replicas_per_master):\n            replica = Node(\"127.0.0.1\", next_port)\n            master.replicas.append(replica)\n            next_port += 1\n        masters.append(master)\n\n    nodes = []\n    for master in masters:\n        nodes.append(master)\n        for replica in master.replicas:\n            nodes.append(replica)\n\n    print(\"Starting nodes...\")\n    for node in nodes:\n        start_node(node, args.dragonfly_bin, args.threads)\n    print()\n    time.sleep(0.5)\n\n    if args.replicas_per_master > 0:\n        print(\"Configuring replication...\")\n        for master in masters:\n            for replica in master.replicas:\n                response = send_command(replica, [\"replicaof\", master.host, master.port])\n                print(f\"- {replica.port} replicating {master.port}: {response}\")\n        print()\n\n    print(f\"Getting IDs...\")\n    for n in nodes:\n        n.update_id()\n    print()\n\n    config = build_config_from_list(masters)\n    print(f\"Pushing config:\\n{config}\\n\")\n    push_config(config)\n    print()\n\n\ndef config_single_remote(args):\n    print(\n        f\"Configuring remote Dragonfly {args.target_host}:{args.target_port} to be a single-server cluster\"\n    )\n\n    master = Master(args.target_host, args.target_port)\n    master.update_id()\n\n    test = send_command(master, [\"get\", \"x\"], print_errors=False)\n    if type(test) is not Exception:\n        die_with_err(\"Node either not found or already configured\")\n\n    config = build_config_from_list([master])\n    print(f\"Pushing config:\\n{config}\\n\")\n    push_config(config)\n    print()\n\n\ndef build_config_from_existing(args):\n    def list_to_dict(l):\n        return {l[i]: l[i + 1] for i in range(0, len(l), 2)}\n\n    def build_node(node_list):\n        d = list_to_dict(node_list)\n        node = Node(d[\"endpoint\"], d[\"port\"])\n        node.id = d[\"id\"]\n        return node\n\n    def build_slots(slot_list):\n        slots = []\n        for i in range(0, len(slot_list), 2):\n            slots.append(SlotRange(slot_list[i], slot_list[i + 1]))\n        return slots\n\n    client = redis.Redis(decode_responses=True, host=args.target_host, port=args.target_port)\n    existing = client.execute_command(\"cluster\", \"shards\")\n    config = []\n    for shard_list in existing:\n        shard = list_to_dict(shard_list)\n        config.append(\n            {\n                \"slot_ranges\": build_slots(shard[\"slots\"]),\n                \"master\": build_node(shard[\"nodes\"][0]),\n                \"replicas\": [build_node(replica) for replica in shard[\"nodes\"][1::]],\n            }\n        )\n\n    client.close()\n    return config\n\n\ndef find_master(config, host, port, die_if_not_found=True):\n    new_owner = None\n    for shard in config:\n        if shard[\"master\"].host == host and shard[\"master\"].port == port:\n            new_owner = shard\n            break\n\n    if new_owner == None and die_if_not_found:\n        die_with_err(f\"Can't find master (hint: use flag --target_host / --target_port).\")\n\n    return new_owner\n\n\ndef find_replica(config, host, port):\n    for shard in config:\n        for replica in shard[\"replicas\"]:\n            if replica.host == host and replica.port == port:\n                return replica, shard\n    die_with_err(\"Can't find target node\")\n\n\ndef attach(args):\n    print(f\"Attaching remote Dragonfly {args.attach_host}:{args.attach_port} to cluster\")\n    if args.attach_as_replica:\n        newcomer = Node(args.attach_host, args.attach_port)\n        replica_resp = send_command(newcomer, [\"info\", \"replication\"])\n        if replica_resp[\"role\"] != \"slave\":\n            die_with_err(\"Node is not in replica mode\")\n        if (\n            replica_resp[\"master_host\"] != args.target_host\n            or replica_resp[\"master_port\"] != args.target_port\n        ):\n            die_with_err(\"Node is not a replica of target\")\n\n        newcomer.update_id()\n\n        config = build_config_from_existing(args)\n        master_node = find_master(config, args.target_host, args.target_port)\n\n        master_node[\"replicas\"].append(newcomer)\n        print(f\"Pushing config:\\n{config}\\n\")\n        push_config(config)\n    else:\n        newcomer = Master(args.attach_host, args.attach_port)\n        replica_resp = send_command(newcomer, [\"info\", \"replication\"])\n        if replica_resp[\"role\"] != \"master\":\n            die_with_err(\"Node is not in master mode\")\n        newcomer.update_id()\n\n        newcomer_config = build_config_from_list([newcomer])\n        newcomer_config[0][\"slot_ranges\"] = []\n        config = build_config_from_existing(args)\n        print(f\"Pushing config:\\n{config}\\n\")\n        push_config([*config, newcomer_config[0]])\n    print()\n\n\ndef detach(args):\n    print(f\"Detaching remote Dragonfly {args.target_host}:{args.target_port} from cluster\")\n    print(\n        \"Important: detached node will not receive a new config! This means that the detached node will still 'think' that it belongs to the cluster\"\n    )\n    config = build_config_from_existing(args)\n    node = find_master(config, args.target_host, args.target_port, die_if_not_found=False)\n    if node == None:\n        replica, master = find_replica(config, args.target_host, args.target_port)\n        master[\"replicas\"].remove(replica)\n    else:\n        if len(node[\"slot_ranges\"]) != 0:\n            die_with_err(\"Can't detach a master with assigned slots\")\n        if len(node[\"replicas\"]) != 0:\n            die_with_err(\"Can't detach a master with replicas\")\n        config = [m for m in config if m != node]\n    push_config(config)\n\n\ndef takeover(args):\n    print(f\"Promoting Dragonfly {args.target_host}:{args.target_port} from replica to master\")\n    print(\n        \"Important: do not forget to send command REPLICAOF NO ONE to new master, and update \"\n        \"           additional replicas if such exist\"\n    )\n    print(\"Important: previous master will be detached from the cluster\")\n\n    config = build_config_from_existing(args)\n    replica, master = find_replica(config, args.target_host, args.target_port)\n    master[\"replicas\"].remove(replica)\n    master[\"master\"] = replica\n\n    push_config(config)\n\n\ndef move(args):\n    config = build_config_from_existing(args)\n    new_owner = find_master(config, args.target_host, args.target_port)\n\n    def remove_slot(slot_id, from_range: SlotRange, slot_ranges: list):\n        slot_ranges.remove(from_range)\n        left, right = from_range.remove(slot_id)\n        if left:\n            slot_ranges.append(left)\n        if right:\n            slot_ranges.append(right)\n\n    def add_slot(slot, to_shard):\n        slot_range = SlotRange(slot, slot)\n        for existing_range in to_shard[\"slot_ranges\"]:\n            if existing_range.merge(slot_range):\n                return\n        to_shard[\"slot_ranges\"].append(slot_range)\n\n    def find_slot(slot, config):\n        for shard in config:\n            for slot_range in shard[\"slot_ranges\"]:\n                if slot_range.contains(slot):\n                    return shard, slot_range\n        return None, None\n\n    def pack(slot_ranges):\n        slot_objects = sorted(slot_ranges, key=lambda x: x.start)\n        packed = []\n        for slot_range in slot_objects:\n            if packed and packed[-1].merge(slot_range):\n                continue\n            packed.append(slot_range)\n        return packed\n\n    for slot in range(args.slot_start, args.slot_end + 1):\n        shard, slot_range = find_slot(slot, config)\n        if shard == None or shard == new_owner:\n            continue\n        remove_slot(slot, slot_range, shard[\"slot_ranges\"])\n        add_slot(slot, new_owner)\n\n    for shard in config:\n        shard[\"slot_ranges\"] = pack(shard[\"slot_ranges\"])\n\n    # Use the custom encoder for printing the JSON\n    print(f\"Pushing new config:\\n{json.dumps(config, indent=2, cls=ClusterConfigEncoder)}\\n\")\n    push_config(config)\n\n\ndef migrate(args):\n    config = build_config_from_existing(args)\n    target = find_master(config, args.target_host, args.target_port)\n    target_node = target[\"master\"]\n    target_node.update_id()\n\n    # Find source node\n    source = None\n    for node in config:\n        slots: Iterable[SlotRange] = node[\"slot_ranges\"]\n        for slot in slots:\n            if slot.start <= args.slot_start and slot.end >= args.slot_end:\n                source = node\n                break\n    if source == None:\n        die_with_err(\"Unsupported slot range migration (currently only 1-node migration supported)\")\n\n    source[\"migrations\"] = [\n        {\n            \"slot_ranges\": [{\"start\": args.slot_start, \"end\": args.slot_end}],\n            \"node_id\": target_node.id,\n            \"ip\": target_node.host,\n            \"port\": target_node.port,\n        }\n    ]\n    push_config(config)\n\n    # wait for migration finish\n    sync_status = []\n    while True:\n        sync_status = send_command(target_node, [\"DFLYCLUSTER\", \"SLOT-MIGRATION-STATUS\"])\n        if len(sync_status) == 0:\n            # Migration didn't start yet\n            continue\n        if len(sync_status) != 1:\n            die_with_err(f\"Unexpected number of migrations {len(sync_status)}: {sync_status}\")\n        if \"FATAL\" in sync_status[0]:\n            die_with_err(f\"Error in migration {len(sync_status)}: {sync_status}\")\n        if \"FINISHED\" in sync_status[0]:\n            print(f\"Migration finished: {sync_status[0]}\")\n            break\n\n    # Push new config to all nodes\n    print(\"Updating all nodes with new slots state\")\n    move(args)\n\n\ndef populate(args):\n    config = build_config_from_existing(args)\n    for shard in config:\n        master = shard[\"master\"]\n        slot_ranges = shard[\"slot_ranges\"]\n        for slot_range in slot_ranges:\n            cmd = [\n                \"debug\",\n                \"populate\",\n                str(args.size),\n                \"key\",\n                str(args.valsize),\n                \"SLOTS\",\n                str(slot_range.start),\n                str(slot_range.end),\n            ]\n            send_command(master, cmd)\n\n\ndef print_config(args):\n    config = build_config_from_existing(args)\n    print(json.dumps(config, indent=2, cls=ClusterConfigEncoder))\n\n\ndef shutdown(args):\n    config = build_config_from_existing(args)\n    for node in get_nodes_from_config(config):\n        send_command(node, [\"shutdown\"])\n\n\ndef main():\n    parser = argparse.ArgumentParser(\n        description=\"\"\"\nDragonfly Manual Cluster Manager\n\nThis tool helps managing a Dragonfly cluster manually.\nCluster can either be local or remote:\n- Starting Dragonfly instances must be done locally, binary path can be set with `--dragonfly_bin` (default: ../build-opt/dragonfly)\n- Remote Dragonflies must already be started, and initialized with `--cluster_mode=yes`\n\nExample usage:\n\nCreate a 3 node cluster locally:\n  ./cluster_mgr.py --action=create_locally --num_masters=3\nThis will create 3 Dragonfly processes with ports 7001-7003.\nPorts can be overridden with `--first_port`.\n\nCreate a 6 node cluster locally, 3 of them masters with 1 replica each:\n  ./cluster_mgr.py --action=create_locally --num_masters=3 --replicas_per_master=1\n\nConnect to existing cluster and print current config:\n  ./cluster_mgr.py --action=print_config\nThis will connect to 127.0.0.1:6379 by default. Override with `--target_host` and `--target_port`\n\nConfigure an existing Dragonfly server to be a standalone cluster (owning all slots):\n  ./cluster_mgr.py --action=config_single_remote\nThis connects to an *existing* Dragonfly server, and pushes a config telling it to own all slots.\nThis will connect to 127.0.0.1:6379 by default. Override with `--target_host` and `--target_port`\n\nAttach an existing Dragonfly server to an existing cluster (owning no slots):\n  ./cluster_mgr.py --action=attach --attach_host=HOST --attach_port=PORT\nThis will connect to existing cluster present at 127.0.0.1:6379 by default. Override with\n`--target_host` and `--target_port`.\nTo attach node as a replica - use --attach_as_replica=True. In such case, the node will be a\nreplica of --target_host/--target_port.\n\nTo set up a new cluster - start the servers and then use\n  ./cluster_mgr.py --action=config_single_remote ...\n  ./cluster_mgr.py --action=attach ...\nAnd repeat `--action=attach` for all servers.\nAfterwards, distribute the slots between the servers as desired with `--action=move` or\n`--action=migrate`.\n\nTo detach (remove) a node from the cluster:\n  ./cluster_mgr.py --action=detach --target_host=X --target_port=X\nNotes:\n- If the node is a master, it must not have any slots assigned to it.\n- The node will not be notified that it's no longer in a cluster. It's a good idea to shut it down\n  after detaching it from the cluster.\n\nTo take over (turn replica to master):\n  ./cluster_mgr.py --action=takeover --target_host=X --target_port=X\nNotes:\n- You'll need to run REPLICAOF NO ONE on the new master\n- If previous master had other replicas, you'll need to update them with REPLICAOF as well\n- Previous master will be detached from cluster. It's a good idea to shut it down.\n\nConnect to cluster and move slots 10-20 to target:\n  ./cluster_mgr.py --action=move --slot_start=10 --slot_end=20 --target_host=X --target_port=X\nWARNING: This will NOT migrate existing data, i.e. data in slots 10-20 will be erased.\n\nMigrate slots 10-20 to target:\n  ./cluster_mgr.py --action=migrate --slot_start=10 --slot_end=20 --target_host=X --target_port=X\nUnlike --action=move above, this will migrate the data to the new owner.\n\nConnect to cluster and shutdown all nodes:\n  ./cluster_mgr.py --action=shutdown --target_port=X\nWARNING: Be careful! This will close all Dragonfly servers connected to the cluster.\n\"\"\",\n        formatter_class=RawTextHelpFormatter,\n    )\n    parser.add_argument(\n        \"--action\",\n        default=\"\",\n        help=\"Which action to take? See `--help`\",\n    )\n    parser.add_argument(\n        \"--num_masters\", type=int, default=3, help=\"Number of master nodes in cluster\"\n    )\n    parser.add_argument(\n        \"--replicas_per_master\", type=int, default=0, help=\"How many replicas for each master\"\n    )\n    parser.add_argument(\"--first_port\", type=int, default=7001, help=\"First master's port\")\n    parser.add_argument(\"--threads\", type=int, default=2, help=\"Threads per node\")\n    parser.add_argument(\n        \"--slot_start\", type=int, default=0, help=\"First slot to move / migrate (inclusive)\"\n    )\n    parser.add_argument(\n        \"--slot_end\", type=int, default=100, help=\"Last slot to move / migrate (inclusive)\"\n    )\n    parser.add_argument(\"--target_host\", default=\"127.0.0.1\", help=\"Master host/ip\")\n    parser.add_argument(\"--target_port\", type=int, default=6379, help=\"Master port\")\n    parser.add_argument(\n        \"--attach_host\", default=\"127.0.0.1\", help=\"New cluster node master host/ip\"\n    )\n    parser.add_argument(\n        \"--attach_port\", type=int, default=6379, help=\"New cluster node master port\"\n    )\n    parser.add_argument(\n        \"--attach_as_replica\", type=bool, default=False, help=\"Is the attached node a replica?\"\n    )\n    parser.add_argument(\n        \"--dragonfly_bin\", default=\"../build-opt/dragonfly\", help=\"Dragonfly binary path\"\n    )\n    parser.add_argument(\n        \"--size\", type=int, default=1000000, help=\"Number of keys to populate in each slotrange\"\n    )\n    parser.add_argument(\n        \"--valsize\", type=int, default=16, help=\"Value size for each key during population\"\n    )\n\n    args = parser.parse_args()\n\n    actions = dict(\n        [\n            (f.__name__, f)\n            for f in [\n                create_locally,\n                shutdown,\n                config_single_remote,\n                attach,\n                detach,\n                takeover,\n                move,\n                print_config,\n                migrate,\n                populate,\n            ]\n        ]\n    )\n    action = actions.get(args.action.lower())\n    if action:\n        action(args)\n    else:\n        die_with_err(f'Error - unknown action \"{args.action}\". See --help')\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tools/defrag_db.py",
    "content": "import redis.asyncio as aioredis\nimport argparse\nimport asyncio\n\n\"\"\"\nThis script iterates over all keys and \"recycles\" them.\nRecycling is done by DUMPing the key first and then re-creating it with EXPIRE.\nThis will trigger re-allocation of internal data structures in order to reduce\nmemory fragmentation.\n\"\"\"\n\nSCRIPT = \"\"\"\nlocal recycled = 0\nfor _, key in ipairs(KEYS) do\n    local ttl = redis.call('PTTL', key)\n    local dumpedData = redis.call('DUMP', key)\n\n    if dumpedData then\n        redis.call('RESTORE', key, 0, dumpedData, 'REPLACE')\n        if ttl > 0 then\n            redis.call('PEXPIRE', key, ttl)\n        end\n        recycled = recycled + 1\n    end\nend\nreturn recycled\n\"\"\"\n\ntotal_recycled = 0\n\n\nasync def workerfn(client_supplier, sha, queue):\n    global total_recycled\n\n    r = client_supplier()\n    while True:\n        keys = await queue.get()\n\n        try:\n            recycled = await r.evalsha(sha, len(keys), *keys)\n        except Exception as e:\n            raise SystemExit(e)\n\n        if isinstance(recycled, int):\n            total_recycled += recycled\n        else:\n            print(\"Error recycling\", recycled)\n\n        queue.task_done()\n\n\nasync def infofn():\n    while True:\n        await asyncio.sleep(0.5)\n        print(\"Keys processed:\", total_recycled)\n\n\nasync def main(client_supplier, scan_type, num_workers, queue_size, batch_size):\n    r = client_supplier()\n    sha = await r.script_load(SCRIPT)\n    queue = asyncio.Queue(maxsize=queue_size)\n\n    workers = [\n        asyncio.create_task(workerfn(client_supplier, sha, queue)) for _ in range(num_workers)\n    ]\n    info_worker = asyncio.create_task(infofn())\n\n    keys = []\n    async for key in r.scan_iter(\"*\", count=batch_size * 2, _type=scan_type):\n        keys.append(key)\n        if len(keys) >= batch_size:\n            await queue.put(keys)\n            keys = []\n\n    await queue.put(keys)\n    await queue.join()\n\n    info_worker.cancel()\n    for w in workers:\n        w.cancel()\n\n    await asyncio.gather(*workers, info_worker, return_exceptions=True)\n    print(\"Recycled in total:\", total_recycled)\n\n\narg_parser = argparse.ArgumentParser()\narg_parser.add_argument(\"--workers\", type=int, default=8)\narg_parser.add_argument(\"--batch\", type=int, default=20)\n\narg_parser.add_argument(\n    \"--type\", type=str, default=None, help=\"Process keys only of specified type\"\n)\n\narg_parser.add_argument(\"--db\", type=int)\narg_parser.add_argument(\"--port\", type=int, default=6379)\narg_parser.add_argument(\"--host\", type=str, default=\"localhost\")\nargs = arg_parser.parse_args()\n\n\ndef client_supplier():\n    return aioredis.StrictRedis(db=args.db, port=args.port, host=args.host)\n\n\nasyncio.run(main(client_supplier, args.type, args.workers, args.workers * 2, args.batch))\n"
  },
  {
    "path": "tools/defrag_mem_test.py",
    "content": "#!/usr/bin/env python3\nimport asyncio\nimport aioredis\nimport async_timeout\nimport sys\nimport argparse\n\n\"\"\"\nTo install: pip install -r requirements.txt\n\nRun\ndragonfly --mem_defrag_threshold=0.01 --mem_defrag_waste_threshold=0.01\ndefrag_mem_test.py -k 8000000 -v 645\n\nThis program would try to re-create the issue with memory defragmentation.\nSee issue number 448 for more details.\nTo run this:\n    You can just execute this from the command line without any arguemnts.\n    Or you can run with --help to see the options.\n    The defaults are:\n    number of keys: 800,000\n    value size: 64 bytes\n    key name pattern: key-for-testing\n    host: localhost\n    port: default redis port\n    Please note that this would create 4 * number of keys entries\n    You can see the memory usage/defrag state with the monitoring task that\n    prints the current state\n\nNOTE:\n    If this seems to get stuck please kill it with ctrl+c\n    This can happen in case we don't have \"defrag_realloc_total > 0\"\n\"\"\"\n\n\nclass TaskCancel:\n    def __init__(self):\n        self.run = True\n\n    def dont_stop(self):\n        return self.run\n\n    def stop(self):\n        self.run = False\n\n\nasync def run_cmd(connection, cmd, sub_val):\n    val = await connection.execute_command(cmd, sub_val)\n    return val\n\n\nasync def handle_defrag_stats(connection, prev):\n    info = await run_cmd(connection, \"info\", \"stats\")\n    if info is not None:\n        if info[\"defrag_task_invocation_total\"] != prev:\n            print(\"--------------------------------------------------------------\")\n            print(f\"defrag_task_invocation_total: {info['defrag_task_invocation_total']:,}\")\n            print(f\"defrag_realloc_total: {info['defrag_realloc_total']:,}\")\n            print(f\"defrag_attempt_total: {info['defrag_attempt_total']:,}\")\n            print(\"--------------------------------------------------------------\")\n            if info[\"defrag_realloc_total\"] > 0:\n                return True, None\n            return False, info[\"defrag_task_invocation_total\"]\n    return False, None\n\n\nasync def memory_stats(connection):\n    print(\"--------------------------------------------------------------\")\n    info = await run_cmd(connection, \"info\", \"memory\")\n    # print(f\"memory commited: {info['comitted_memory']:,}\")\n    print(f\"memory used: {info['used_memory']:,}\")\n    # print(f\"memory usage ratio: {info['comitted_memory']/info['used_memory']:.2f}\")\n    print(\"--------------------------------------------------------------\")\n\n\nasync def stats_check(connection, condition):\n    try:\n        defrag_task_invocation_total = 0\n        runs = 0\n        while condition.dont_stop():\n            await asyncio.sleep(0.3)\n            done, d = await handle_defrag_stats(connection, defrag_task_invocation_total)\n            if done:\n                print(\"defrag task successfully found memory locations to reallocate\")\n                condition.stop()\n            else:\n                if d is not None:\n                    defrag_task_invocation_total = d\n            runs += 1\n            if runs % 3 == 0:\n                await memory_stats(connection)\n        for i in range(5):\n            done, d = await handle_defrag_stats(connection, -1)\n            if done:\n                print(\"defrag task successfully found memory locations to reallocate\")\n                return True\n            else:\n                await asyncio.sleep(2)\n        return True\n    except Exception as e:\n        print(f\"failed to run monitor task: {e}\")\n    return False\n\n\nasync def delete_keys(connection, keys):\n    results = await connection.delete(*keys)\n    return results\n\n\ndef generate_keys(pattern: str, count: int, batch_size: int) -> list:\n    for i in range(1, count, batch_size):\n        batch = [f\"{pattern}{j}\" for j in range(i, batch_size + i, 3)]\n        yield batch\n\n\nasync def mem_cleanup(connection, pattern, num, cond, keys_count):\n    counter = 0\n    for keys in generate_keys(pattern=pattern, count=keys_count, batch_size=950):\n        if cond.dont_stop() == False:\n            print(f\"task number {num} that deleted keys {pattern} finished\")\n            return counter\n        counter += await delete_keys(connection, keys)\n        await asyncio.sleep(0.2)\n    print(f\"task number {num} that deleted keys {pattern} finished\")\n    return counter\n\n\nasync def run_tasks(pool, key_name, value_size, keys_count):\n    keys = [f\"{key_name}-{i}\" for i in range(4)]\n    stop_cond = TaskCancel()\n    try:\n        connection = aioredis.Redis(connection_pool=pool)\n        for key in keys:\n            print(f\"creating key {key} with size {value_size} of count {keys_count}\")\n            await connection.execute_command(\"DEBUG\", \"POPULATE\", keys_count, key, value_size)\n            await asyncio.sleep(2)\n        tasks = []\n        count = 0\n        for key in keys:\n            pattern = f\"{key}:\"\n            print(f\"deleting keys from {pattern}\")\n            tasks.append(\n                mem_cleanup(\n                    connection=connection,\n                    pattern=pattern,\n                    num=count,\n                    cond=stop_cond,\n                    keys_count=int(keys_count),\n                )\n            )\n            count += 1\n        monitor_task = asyncio.create_task(stats_check(connection, stop_cond))\n        total = await asyncio.gather(*tasks, return_exceptions=True)\n        print(f\"successfully deleted {sum(total)} keys\")\n        stop_cond.stop()\n        await monitor_task\n        print(\"finish executing\")\n        return True\n    except Exception as e:\n        print(f\"got error {e} while running delete keys\")\n        return False\n\n\ndef connect_and_run(key_name, value_size, keys_count, host=\"localhost\", port=6379):\n    async_pool = aioredis.ConnectionPool(\n        host=host, port=port, db=0, decode_responses=True, max_connections=16\n    )\n\n    loop = asyncio.new_event_loop()\n    success = loop.run_until_complete(\n        run_tasks(pool=async_pool, key_name=key_name, value_size=value_size, keys_count=keys_count)\n    )\n    return success\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"active memory testing\", formatter_class=argparse.ArgumentDefaultsHelpFormatter\n    )\n    parser.add_argument(\"-k\", \"--keys\", type=int, default=800000, help=\"total number of keys\")\n    parser.add_argument(\"-v\", \"--value_size\", type=int, default=645, help=\"size of the values\")\n    parser.add_argument(\n        \"-n\", \"--key_name\", type=str, default=\"key-for-testing\", help=\"the base key name\"\n    )\n    parser.add_argument(\"-s\", \"--server\", type=str, default=\"localhost\", help=\"server host name\")\n    parser.add_argument(\"-p\", \"--port\", type=int, default=6379, help=\"server port number\")\n    args = parser.parse_args()\n    keys_num = args.keys\n    key_name = args.key_name\n    value_size = args.value_size\n    host = args.server\n    port = args.port\n    print(\n        f\"running key deletion on {host}:{port} for keys {key_name} value size of {value_size} and number of keys {keys_num}\"\n    )\n    result = connect_and_run(\n        key_name=key_name, value_size=value_size, keys_count=keys_num, host=host, port=port\n    )\n    if result == True:\n        print(\"finished successfully\")\n    else:\n        print(\"failed\")\n"
  },
  {
    "path": "tools/docker/entrypoint.sh",
    "content": "#!/bin/sh\n\n# This is important in order to provide enough locked memory to dragonfly\n# when running on kernels < 5.12.\n# This line should reside before `set -e` so it could fail silently\n# in case the container runs in non-privileged mode.\nulimit -l 65000 2> /dev/null\n\nset -e\n\n# first arg is `-some-option`\nif [ \"${1#-}\" != \"$1\" ]; then\n    # override arguments by prepending \"dragonfly --logtostderr\" to them.\n    set -- dragonfly --logtostderr \"$@\"\nfi\n\n# allow the docker container to be started with `--user`\nif [ \"$1\" = 'dragonfly' -a \"$(id -u)\" = '0' ]; then\n    # find all the files in the WORKDIR including the dir itself that do not\n    # have dfly user on them and chmod them to dfly.\n    find . \\! -user dfly -exec chown dfly '{}' +\n    # runs this script under user dfly\n    exec setpriv --reuid=dfly --regid=dfly --clear-groups -- \"$0\" \"$@\"\nfi\n\num=\"$(umask)\"\nif [ \"$um\" = '0022' ]; then\n    umask 0077  # restrict access permissions only to the owner\nfi\n\nexec \"$@\"\n"
  },
  {
    "path": "tools/docker/fetch_release.sh",
    "content": "#!/bin/sh\n\nset -e\n\nPLATFORM=$1\n\nPSHORT=${PLATFORM#\"linux/\"}\necho \"PSHORT ${PSHORT}\"\n\n\nif [ \"${PSHORT}\" = \"amd64\" ]; then\n  SUFFIX='x86_64'\nelse\n  SUFFIX='aarch64'\nfi\n\nmv /tmp/dragonfly-${SUFFIX} /build/dragonfly\nls -l /build/"
  },
  {
    "path": "tools/docker/healthcheck.sh",
    "content": "#!/bin/sh\n\n# Cleanup function to prevent zombie processes (issue #5844)\n# This is critical when dragonfly runs as PID 1 without an init system\ncleanup() {\n  # Wait for all background/child processes to finish\n  wait 2>/dev/null || true\n}\n\n# Set trap to ensure cleanup runs on exit, regardless of how the script exits\ntrap cleanup EXIT\n\nHOST=\"localhost\"\nPORT=$HEALTHCHECK_PORT\n\nif [ -z \"$HEALTHCHECK_PORT\" ]; then\n  # try unpriveleged version first. This should cover cases when the container is running\n  # without root, for example:\n  # docker run  --group-add 999  --cap-drop=ALL --user 999 docker.dragonflydb.io/dragonflydb/dragonfly\n  DF_NET=$(netstat -tlnp | grep \"/dragonfly\")\n  if [ -z \"$DF_NET\" ]; then\n    # if we failed, then lets try the priveleged version. is triggerred by the regular command:\n    # docker run docker.dragonflydb.io/dragonflydb/dragonfly\n    DF_NET=$(su dfly -c \"netstat -tlnp\" | grep \"/dragonfly\")\n  fi\n\n  # check all the TCP ports, and fetch the port.\n  # For cases when dragonfly opens multiple ports, we filter with tail to choose one of them.\n  PORT=$(echo $DF_NET | grep -oE ':[0-9]+' | cut -c2- | tail -n 1)\nfi\n\n_healthcheck=\"nc -q1 $HOST $PORT\"\n\necho PING | ${_healthcheck}\n\nexit $?\n"
  },
  {
    "path": "tools/eviction/fill_db.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nThis script implements facilities for assessing cache eviction.\nTwo major functions have been implemented that allow users to\n  1. Populate Dragonfly with a specified key and value length distributions.\n  2. Measuring the cache hit rate of Dragonfly with workloads that access keys using Zipfian distribution.\n\nUsage:\nTo perform database population, simply run:\n\n./fill_db.py -f\n\nThis will automatically populate the database to the point where about 2X of maxmemory (specified by Dragonfly)\nof KV pairs will be inserted. By default, we always stop at 2X maxmemory, and this can be changed using the -r\noption, for instance\n\n./fill_db.py -f -r 0.25  # population stops at 4x maxmemory\n\nTo accelerate the population, we can use multiple processes running this script in parallel. A convenient script\nhas been provided in this directory:\n./run_fill_db.sh 10  # use 10 processes to fill in parallel\n\nAfter database has been populated, we can start measuring cache hit rate using the -m option:\n./fill_db.py -m\nNote that the measurement must be done after the population as this mode relies on reading back the complete key\nspace inserted during the population phase. By default, we perform 100000 set operations for calculating cache hit rate.\nThis number can be changed using the -c option:\n./fill_db.py -m -c 2000\n\"\"\"\n\n\nimport redis\nimport string\nfrom random import choice\nfrom random import shuffle\nimport numpy as np\n\nimport asyncio\nfrom redis import asyncio as aioredis\nimport os\nimport argparse\nimport re\nimport glob\n\nall_val_lens = [400, 800, 1600, 25000]\nval_lens_probs = [0.003, 0.78, 0.2, 0.017]\n\nall_key_lens = [35, 60, 70]\nkey_lens_probs = [0.2, 0.06, 0.74]\n\ninserted_keys = []\n\n\ndef random_str(len):\n    return \"\".join(\n        choice(string.ascii_letters + string.digits + string.punctuation) for i in range(len)\n    )\n\n\ndef random_key():\n    global all_key_lens, key_lens_probs\n    return random_str(np.random.choice(all_key_lens, p=key_lens_probs))\n\n\ndef random_val():\n    global all_val_lens, val_lens_probs\n    return random_str(np.random.choice(all_val_lens, p=val_lens_probs))\n\n\ndef flush_keys_to_file(file_name):\n    global inserted_keys\n    with open(file_name, \"a\") as f:\n        for key in inserted_keys:\n            f.write(f\"{key}\\n\")\n\n\ndef read_keys_from_file(file_name):\n    global inserted_keys\n    with open(file_name) as file:\n        for line in file:\n            inserted_keys.append(line.rstrip())\n\n\ndef read_keys():\n    global inserted_keys\n    inserted_keys.clear()\n    key_files = glob.glob(\"./keys_*.txt\")\n    for key_file in key_files:\n        read_keys_from_file(key_file)\n\n\ndef sync_populate_db():\n    r = redis.Redis(decode_responses=True)\n    n = 0\n    while True:\n        r.set(random_key(), random_val())\n        n += 1\n        if n % 1000 == 0:\n            print(\"\\r>> Number of key-value pairs inserted: {}\".format(n), end=\"\")\n\n\ndef sync_query_db():\n    global inserted_keys\n    r = redis.Redis(decode_responses=True)\n    n = 0\n    read_keys()\n    misses = 0\n    hits = 0\n    for key in inserted_keys:\n        resp = r.set(key, random_val(), nx=True)\n        # print(resp)\n        if resp:\n            misses += 1\n        else:\n            hits += 1\n        n += 1\n        if n % 1000 == 0:\n            print(\n                \"\\r>> Number of key-value pairs inserted: {0}, hit: {1}, miss: {2}\".format(\n                    n, hits, misses\n                ),\n                end=\"\",\n            )\n\n\nasync def populate_db(ratio):\n    global inserted_keys\n    r = aioredis.Redis(decode_responses=True)\n    n = 0\n    misses = 0\n    hits = 0\n\n    total_key_count = 0\n    while True:\n        # await r.set(random_key(), random_val())\n        pipeline = r.pipeline(False)\n        for x in range(200):\n            k = random_key()\n            inserted_keys.append(k)\n            pipeline.set(k, random_val())\n            # pipeline.set(k, random_val(), nx=True)\n        await pipeline.execute()\n        # responses = await pipeline.execute()\n        # for resp in responses:\n        #    if resp:\n        #        misses += 1\n        #    else:\n        #        hits += 1\n\n        # key file names are in keys_xxxx.txt format\n        key_file_name = \"keys_\" + str(os.getpid()) + \".txt\"\n        flush_keys_to_file(key_file_name)\n        inserted_keys.clear()\n        n += 200\n\n        if total_key_count == 0:\n            db_info = await r.info()\n            used_mem = float(db_info[\"used_memory\"])\n            max_mem = float(db_info[\"maxmemory\"])\n            redline = 0.9\n            # we will know the total number of keys of the whole space\n            # only when we approach the maxmemory of the db\n            if used_mem >= max_mem * redline:\n                total_key_count = int(float(n) / ratio)\n                print(\n                    \"\\n>> Determined target key count: {0}, current key count: {1}, ratio: {2}\".format(\n                        total_key_count, n, ratio\n                    ),\n                    end=\"\",\n                )\n        else:\n            if n >= total_key_count:\n                print(\"\\n>> Target number of keys reached: {}, stopping...\".format(n), end=\"\")\n                break\n        if n % 1000 == 0:\n            print(\"\\r>> Number of key-value pairs inserted: {0}\".format(n), end=\"\")\n            # print(\"\\r>> Number of key-value pairs inserted: {0}, hit: {1}, miss: {2}\".format(n, hits, misses), end='')\n\n\ndef rand_zipf_generator(alpha: float, upper: int, batch: int):\n    \"\"\"\n    n: The upper bound of the values to generate a zipfian distribution over\n    (n = 30 would generate a distribution of given alpha from values 1 to 30)\n    alpha: The alpha parameter to be used while creating the Zipfian distribution\n    num_samples: The total number of samples to generate over the Zipfian distribution\n    This is a generator that yields up to count values using a generator.\n    \"\"\"\n\n    # Calculate Zeta values from 1 to n:\n    tmp = np.power(np.arange(1, upper + 1), -alpha)\n    zeta = np.r_[0.0, np.cumsum(tmp)]\n\n    # Store the translation map:\n    distMap = [x / zeta[-1] for x in zeta]\n\n    while True:\n        # Generate an array of uniform 0-1 pseudo-random values:\n        u = np.random.random(batch)\n\n        # bisect them with distMap\n        v = np.searchsorted(distMap, u)\n\n        samples = [t - 1 for t in v]\n        yield samples\n\n\ndef rearrange_keys():\n    \"\"\"\n    This function potentially provides the capability for testing different caching workloads.\n    for instance, if we rearrange all the keys via sorting based on the k-v memory usage,\n    we will generate a zipfian hotspot that prefers to access small kv pairs (or larger kv pairs)\n    current implementation just uses a random shuffle.\n    \"\"\"\n    global inserted_keys\n    shuffle(inserted_keys)\n\n\nasync def query_db_with_locality(count):\n    global inserted_keys\n    r = aioredis.Redis(decode_responses=True)\n    n = 0\n    read_keys()\n    rearrange_keys()\n    misses = 0\n    hits = 0\n    pipeline_size = 200\n    key_index_gen = rand_zipf_generator(1.0, len(inserted_keys), pipeline_size)\n    for key_indices in key_index_gen:\n        pipeline = r.pipeline(False)\n        # print(key_indices)\n        for key_index in key_indices:\n            k = inserted_keys[key_index]\n            pipeline.set(k, random_val(), nx=True)\n\n        responses = await pipeline.execute()\n        n += pipeline_size\n        for resp in responses:\n            if resp:\n                misses += 1\n            else:\n                hits += 1\n        print(\n            \"\\r>> Number of ops: {0}, hit: {1}, miss: {2}, hit rate: {3:.4f}\".format(\n                n, hits, misses, float(hits) / float(hits + misses)\n            ),\n            end=\"\",\n        )\n        if n >= count:\n            break\n    hit_rate = float(hits) / float(hits + misses)\n    print(\"\\n>> Cache hit rate: {:.4f}\".format(hit_rate))\n\n\nclass Range(object):\n    def __init__(self, start, end):\n        self.start = start\n        self.end = end\n\n    def __eq__(self, other):\n        return self.start <= other <= self.end\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Cache Benchmark\", formatter_class=argparse.ArgumentDefaultsHelpFormatter\n    )\n\n    parser.add_argument(\n        \"-f\",\n        \"--fill\",\n        action=\"store_true\",\n        help=\"fill database with random key-value pairs with their lengths follow some distributions\",\n    )\n\n    parser.add_argument(\n        \"-r\",\n        \"--ratio\",\n        type=float,\n        default=0.5,\n        choices=[Range(0.0, 1.0)],\n        help=\"the ratio between in memory data size and total data size\",\n    )\n\n    parser.add_argument(\n        \"-m\",\n        \"--measure\",\n        action=\"store_true\",\n        help=\"measure cache hit rate by visiting the entire key space with a Zipfian distribution\",\n    )\n\n    parser.add_argument(\n        \"-c\",\n        \"--count\",\n        type=int,\n        default=100000,\n        help=\"total number of operations to be performed when measuring cache hit rate\",\n    )\n\n    args = parser.parse_args()\n\n    if args.fill:\n        asyncio.run(populate_db(args.ratio))\n        exit(0)\n\n    if args.measure:\n        asyncio.run(query_db_with_locality(args.count))\n"
  },
  {
    "path": "tools/eviction/run_fill_db.sh",
    "content": "#!/bin/sh\n\nrm ./keys_*.txt\nfor i in `seq 1 $1`\ndo\n    echo \"launching process $i to fill..\"\n    ./fill_db.py -f &\ndone\n\nwait\n"
  },
  {
    "path": "tools/eviction/stop_fill_db.sh",
    "content": "#!/bin/sh\nps -ef | grep fill_db.py | grep -v grep | awk '{print $2}' | xargs kill -9\n"
  },
  {
    "path": "tools/faulty_io.sh",
    "content": "#!/bin/bash\n\n# Utility script for creating block devices with fault injection to test tiering\n#\nif [[ $EUID -ne 0 ]]; then\n   echo \"This script must be run as root\"\n   exit 1\nfi\n\nfunction finalize_block_dev {\n    mke2fs /dev/mapper/$1\n    mkdir -p /mnt/$1\n    mount /dev/mapper/$1  /mnt/$1\n    chmod o+rw /mnt/$1\n}\n\nfunction remove_block_dev {\n     umount /dev/mapper/$1\n    rm -rf /mnt/$1\n}\n\nif [[ \"$1\" == \"create\" ]]\nthen\n    # Create backing file of 256MB\n    dd if=/dev/zero of=./tiering_backing bs=1024 count=262144\n    # Create loopback device\n    DEV=$(losetup --find --show ./tiering_backing)\n\n    # Create first block device with flaky sectors\n    dmsetup create tiering_flaky << EOF\n    0 20000 linear $DEV 0\n    20000 105424 flakey $DEV 0 1 1\nEOF\n    finalize_block_dev tiering_flaky\nelif [[ \"$1\" == \"remove\" ]]\nthen\n    remove_block_dev tiering_flaky\n    dmsetup remove_all\n    losetup -a | grep tiering | awk -F ':' '{print $1}'| xargs losetup --detach\n    rm ./tiering_backing\nelse\n    echo \"\"\"Devices created by this script:\n    1. /mnt/tiering_flaky_1 - flaky device with 1:1 second success/error intervals\n\nuse with either create/remove arguments\"\"\"\nfi\n"
  },
  {
    "path": "tools/generate-tls-files.sh",
    "content": "#!/bin/bash\n\n# This script generates locally-signed TLS files for development usage.\n# It's probably a good idea to run in an empty, temporary directory.\n#\n# Example usage:\n#\n# mkdir /tmp/dfly-tls\n# cd /tmp/dfly-tls\n# ~/dragonfly/tools/generate-tls-files.sh\n# ~/dragonfly/build-dbg/dragonfly \\\n#      --dbfilename= \\\n#      --logtostdout \\\n#      --tls=true \\\n#      --tls_key_file=/tmp/dfly-tls/df-key.pem \\\n#      --tls_cert_file=/tmp/dfly-tls/df-cert.pem \\\n#      --requirepass=XXX\n# redis-cli --tls --cacert /tmp/dfly-tls/ca-cert.pem -a XXX\n\nCA_KEY_PATH=ca-key.pem\nCA_CERTIFICATE_PATH=ca-cert.pem\nCERTIFICATE_REQUEST_PATH=df-req.pem\nPRIVATE_KEY_PATH=df-key.pem\nCERTIFICATE_PATH=df-cert.pem\n\necho \"Generating files in local directory (rm *.pem to cleanup)\"\n\nopenssl req -x509 -newkey rsa:4096 -days 1 -nodes \\\n  -keyout ${CA_KEY_PATH} \\\n  -out ${CA_CERTIFICATE_PATH} \\\n  -subj \"/C=GR/ST=SKG/L=Thessaloniki/O=KK/OU=AcmeStudios/CN=Gr/emailAddress=acme@gmail.com\"\n\nopenssl req -newkey rsa:4096 -nodes \\\n  -keyout ${PRIVATE_KEY_PATH} \\\n  -out ${CERTIFICATE_REQUEST_PATH} \\\n  -subj \"/C=GR/ST=SKG/L=Thessaloniki/O=KK/OU=Comp/CN=Gr/emailAddress=does_not_exist@gmail.com\"\n\nopenssl x509 -req \\\n  -in ${CERTIFICATE_REQUEST_PATH} \\\n  -days 1 \\\n  -CA ${CA_CERTIFICATE_PATH} \\\n  -CAkey ${CA_KEY_PATH} \\\n  -CAcreateserial -out ${CERTIFICATE_PATH}\n\necho \"You can now run:\"\necho \"dragonfly --tls=true --tls_key_file=${PRIVATE_KEY_PATH} --tls_cert_file=${CERTIFICATE_PATH} --requirepass=XXX\"\necho \"redis-cli --tls --cacert ${CA_CERTIFICATE_PATH} -a XXX\"\n"
  },
  {
    "path": "tools/json_benchmark.py",
    "content": "#!/usr/bin/env python\n\nimport multiprocessing\nimport time\nimport redis\nimport sys\nimport argparse\nfrom urllib.parse import urlparse\nimport os\nfrom collections import defaultdict\nimport math\n\n'''\nRun JSON benchmark for 3 commands:\n    JSON.SET\n    JSON.GET\n    JSON.TYPE\nWe want to the overall time it takes\nto save and access keys that contains\nJSON values with this benchmark.\nThis also verify that the basic functionalities\nfor using JSON types work correctly\n'''\n\ndef ping(r):\n    r.ping()\n\ndef jsonset(r, i):\n    key = \"json-{}\".format(i)\n    r.execute_command('JSON.SET', key, '.', '{\"a\":123456, \"b\": \"hello\", \"nested\": {\"abc\": \"ffffff\", \"bfb\": null}}')\n\n\ndef jsonget(r, i):\n    key = \"json-{}\".format(i)\n    r.execute_command('JSON.GET', key, '$.a', '$..abc')\n\ndef jsontype(r, i):\n    key = \"json-{}\".format(i)\n    r.execute_command('JSON.TYPE', key, '$.a')\n\ndef runWorker(ctx):\n    wpid = os.getpid()\n    print( '{} '.format(wpid))\n\n    rep = defaultdict(int)\n    r = redis.StrictRedis(host=ctx['host'], port=ctx['port'])\n    work = ctx['work']\n    if ctx['pipeline'] == 0:\n        total_count = int(ctx['count'])\n        for i in range(0, total_count):\n            s0 = time.time()\n            jsonset(r, i)\n            s1 = time.time() - s0\n            bin = int(math.floor(s1 * 1000)) + 1\n            rep[bin] += 1\n        for i in range(0, total_count):\n            s0 = time.time()\n            jsonget(r, i)\n            s1 = time.time() - s0\n            bin = int(math.floor(s1 * 1000)) + 1\n            rep[bin] += 1\n        for i in range(0, total_count):\n            s0 = time.time()\n            jsontype(r, i)\n            s1 = time.time() - s0\n            bin = int(math.floor(s1 * 1000)) + 1\n            rep[bin] += 1\n    else:\n        for i in range(0, ctx['count'], ctx['pipeline']):\n            p = r.pipeline()\n            s0 = time.time()\n            for j in range(0, ctx['pipeline']):\n                work(p)\n            p.execute()\n            s1 = time.time() - s0\n            bin = int(math.floor(s1 * 1000)) + 1\n            rep[bin] += ctx['pipeline']\n\n    return rep\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser(description='ReJSON Benchmark', formatter_class=argparse.ArgumentDefaultsHelpFormatter)\n    parser.add_argument('-c', '--count', type=int, default=100000, help='total number of operations')\n    parser.add_argument('-p', '--pipeline', type=int, default=0, help='pipeline size')\n    parser.add_argument('-w', '--workers', type=int, default=8, help='number of worker processes')\n    parser.add_argument('-u', '--uri', type=str, default='redis://localhost:6379', help='Redis server URI')\n    args = parser.parse_args()\n    uri = urlparse(args.uri)\n\n    r = redis.Redis(host=uri.hostname, port=uri.port)\n\n    pool = multiprocessing.Pool(args.workers)\n    s0 = time.time()\n    ctx = {\n        'count': args.count / args.workers,\n        'pipeline': args.pipeline,\n        'host': uri.hostname,\n        'port': uri.port,\n        'work': jsonset,\n    }\n\n    print ('Starting workers: ')\n    p = multiprocessing.Pool(args.workers)\n    results = p.map(runWorker, (ctx, ) * args.workers)\n    print(\"\")\n    sys.stdout.flush()\n\n    s1 = time.time() - s0\n    agg = defaultdict(int)\n    for res in results:\n        for k, v in res.items():\n            agg[k] += v\n\n    print()\n    count = args.count * 3\n    print (f'Count: {args.count}, Workers: {args.workers}, Pipeline: {args.pipeline}')\n    print (f'Using hireds: {redis.utils.HIREDIS_AVAILABLE}')\n    print (f'Runtime: {round(s1, 2):,} seconds')\n    print (f'Throughput: {round(count/s1, 2):,} requests per second')\n    for k, v in sorted(agg.items()):\n        perc = 100.0 * v / count\n        print (f'{perc:.4f}% <= {k:,} milliseconds')\n"
  },
  {
    "path": "tools/local/gen-test-certs.sh",
    "content": "#!/bin/bash\nset -e\n\nSCRIPT_DIR=$(dirname \"$0\")\nROOT_DIR=$(readlink -f \"$SCRIPT_DIR/../..\")\nGEN_DIR=$ROOT_DIR/genfiles/tls\n\n\n#   genfiles/tls/ca.{crt,key}          Self signed CA certificate.\n#   genfiles/tls/dragonfly.{crt,key}   A certificate with no key usage/policy restrictions.\n#   genfiles/tls/client.{crt,key}      A certificate restricted for SSL client usage.\n#   genfiles/tls/server.{crt,key}      A certificate restricted for SSL server usage.\n\n: '\nTo run dragonfly use:\ndragonfly --tls --tls_key_file ../genfiles/tls/server.key  --tls_cert_file ../genfiles/tls/server.crt -requirepass pass\n\nOr with CA (does not require password):\ndragonfly --tls --tls_key_file ../genfiles/tls/server.key  --tls_cert_file ../genfiles/tls/server.crt \\\n--tls_ca_cert_file ../genfiles/tls/ca.crt\n\nTo connect with client (without ca):\nopenssl s_client   -state -crlf  -connect 127.0.0.1:6379\n\nWith CA:\nopenssl s_client   -state -crlf -CAfile ../genfiles/tls/ca.crt  -cert ../genfiles/tls/client.crt -key ../genfiles/tls/client.key  -connect 127.0.0.1:6379\n\nSimilarly, to connect with redis-cli (no CA):\nredis-cli --tls --insecure -a pass\n\nWith CA:\nredis-cli --tls  --cacert ../genfiles/tls/ca.crt  --cert ../genfiles/tls/client.crt --key ../genfiles/tls/client.key\n\nmemtier (without CA):\nmemtier_benchmark --tls --key ../genfiles/tls/client.key  --cert ../genfiles/tls/client.crt -a pass\n\nmemtier (with CA):\nmemtier_benchmark --tls --key ../genfiles/tls/client.key  --cert ../genfiles/tls/client.crt --cacert ../genfiles/tls/ca.crt\n'\n\ngenerate_cert() {\n    local name=$1\n    local cn=\"$2\"\n    local opts=\"$3\"\n\n    local keyfile=$GEN_DIR/${name}.key\n    local certfile=$GEN_DIR/${name}.crt\n\n    [ -f $keyfile ] || openssl genpkey -algorithm ED25519 -out $keyfile\n    openssl req -new -sha256 \\\n        -subj \"/O=Dragonfly Test/CN=$cn\" \\\n        -key $keyfile | \\\n        openssl x509 \\\n            -req -sha256 \\\n            -CA $GEN_DIR/ca.crt \\\n            -CAkey $GEN_DIR/ca.key \\\n            -CAserial $GEN_DIR/ca.txt \\\n            -CAcreateserial \\\n            -days 365 \\\n            $opts \\\n            -out $certfile\n}\n\nmkdir -p $GEN_DIR\n[ -f $GEN_DIR/ca.key ] || openssl genpkey -algorithm ED25519 -out $GEN_DIR/ca.key\n\n# -x509: self-signed certificate, -nodes: no password\nopenssl req \\\n    -x509 -new -nodes -sha256 \\\n    -key $GEN_DIR/ca.key \\\n    -days 3650 \\\n    -subj '/O=Dragonfly Test/CN=Certificate Authority' \\\n    -out $GEN_DIR/ca.crt\n\ncat > $GEN_DIR/openssl.cnf <<_END_\n[ server_cert ]\nkeyUsage = digitalSignature, keyEncipherment\nnsCertType = server\n\n[ client_cert ]\nkeyUsage = digitalSignature, keyEncipherment\nnsCertType = client\n_END_\n\ngenerate_cert server \"Server-only\" \"-extfile $GEN_DIR/openssl.cnf -extensions server_cert\"\ngenerate_cert client \"Client-only\" \"-extfile $GEN_DIR/openssl.cnf -extensions client_cert\"\ngenerate_cert dragonfly \"Generic-cert\"\n"
  },
  {
    "path": "tools/local/monitoring/docker-compose.yml",
    "content": "version: '3.8'\n\n# To run redis exporter, run: docker compose --profile redis up\n# To run memcached and its exporter: docker compose --profile memcached up\n#\n# Note you may still need to disable/change scraping job configs\n# in prometheus.yml\n#\nvolumes:\n  prometheus_data:\n  grafana_data:\n  memcached_data:\n\nservices:\n  change_vol_ownership:\n    image: alpine\n    user: root\n    volumes:\n      - memcached_data:/memcached\n    command: chown -R 11211:11211 /memcached\n\n  prometheus:\n    image: prom/prometheus:v3.0.0\n    restart: always\n    volumes:\n      - ./prometheus:/etc/prometheus/\n      - prometheus_data:/prometheus\n    command:\n      - '--config.file=/etc/prometheus/prometheus.yml'\n      - '--storage.tsdb.path=/prometheus'\n      - '--web.console.libraries=/usr/share/prometheus/console_libraries'\n      - '--web.console.templates=/usr/share/prometheus/consoles'\n    ports:\n      - 9090:9090\n    extra_hosts:\n      - 'host.docker.internal:host-gateway'\n    depends_on:\n      node-exporter:\n        condition: service_started\n      change_vol_ownership:\n        condition: service_completed_successfully\n\n  memcached:\n    image: memcached\n    restart: unless-stopped\n    ports:\n      - \"11211:11211\"\n    command: \"-t 8 -m 10000 -c 10000 --pidfile=/memcached/memcached.pid\"\n    pid: host\n    volumes:\n      - memcached_data:/memcached\n    profiles: [memcached]\n\n\n  node-exporter:\n    image: prom/node-exporter\n    volumes:\n      - /proc:/host/proc:ro\n      - /sys:/host/sys:ro\n      - /:/rootfs:ro\n    command:\n      - '--path.procfs=/host/proc'\n      - '--path.sysfs=/host/sys'\n      - --collector.filesystem.ignored-mount-points\n      - '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)'\n    ports:\n      - 9100:9100\n    restart: always\n    deploy:\n      mode: global\n\n  grafana:\n    image: grafana/grafana:10.1.10\n    user: '472'\n    restart: always\n    environment:\n      # do not really need it, just to leave it as an example on how to install plugins here\n      - GF_INSTALL_PLUGINS=grafana-clock-panel\n      - GF_RENDERING_SERVER_URL=http://renderer:8081/render\n      - GF_RENDERING_CALLBACK_URL=http://grafana:3000/\n    volumes:\n      - grafana_data:/var/lib/grafana\n      - ./grafana/provisioning/:/etc/grafana/provisioning/\n    env_file:\n      - ./grafana/config.monitoring\n    ports:\n      - 3000:3000\n    depends_on:\n      - prometheus\n\n  memcached-exporter:\n    image: prom/memcached-exporter\n    container_name: memcached-exporter\n    restart: unless-stopped\n    ports:\n      - \"9150:9150\"\n    pid: host\n    command:\n      - --memcached.address=memcached:11211\n      - --memcached.pid-file=/memcached/memcached.pid\n    volumes:\n      - memcached_data:/memcached\n    profiles: [memcached]\n    depends_on:\n      - memcached\n\n  redis-exporter:\n    image: quay.io/oliver006/redis_exporter\n    profiles: [redis]\n    ports:\n      - 9121:9121\n    extra_hosts:\n      - \"host.docker.internal:host-gateway\"\n    restart: always\n    environment:\n      - REDIS_ADDR=host.docker.internal:6379\n\n  renderer:\n    image: grafana/grafana-image-renderer:latest\n    ports:\n      - 8081\n"
  },
  {
    "path": "tools/local/monitoring/grafana/config.monitoring",
    "content": "GF_SECURITY_ADMIN_USER=admin\nGF_SECURITY_ADMIN_PASSWORD=foobar\nGF_USERS_ALLOW_SIGN_UP=false\n"
  },
  {
    "path": "tools/local/monitoring/grafana/provisioning/dashboards/dashboard.yml",
    "content": "apiVersion: 1\n\nproviders:\n- name: dashboards\n  orgId: 1\n  folder: ''\n  type: file\n  disableDeletion: false\n  editable: true\n  options:\n    path: /etc/grafana/provisioning/dashboards\n"
  },
  {
    "path": "tools/local/monitoring/grafana/provisioning/dashboards/dragonfly.json",
    "content": "{\n  \"annotations\": {\n    \"list\": [\n      {\n        \"builtIn\": 1,\n        \"datasource\": {\n          \"type\": \"datasource\",\n          \"uid\": \"grafana\"\n        },\n        \"enable\": true,\n        \"hide\": true,\n        \"iconColor\": \"rgba(0, 211, 255, 1)\",\n        \"name\": \"Annotations & Alerts\",\n        \"target\": {\n          \"limit\": 100,\n          \"matchAny\": false,\n          \"tags\": [],\n          \"type\": \"dashboard\"\n        },\n        \"type\": \"dashboard\"\n      }\n    ]\n  },\n  \"editable\": true,\n  \"fiscalYearStartMonth\": 0,\n  \"graphTooltip\": 0,\n  \"links\": [],\n  \"liveNow\": false,\n  \"panels\": [\n    {\n      \"collapsed\": false,\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 0\n      },\n      \"id\": 17,\n      \"panels\": [],\n      \"title\": \"Basic metrics\",\n      \"type\": \"row\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"fixedColor\": \"rgb(31, 120, 193)\",\n            \"mode\": \"fixed\"\n          },\n          \"decimals\": 0,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 4,\n        \"x\": 0,\n        \"y\": 1\n      },\n      \"id\": 9,\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"area\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"percentChangeColorMode\": \"standard\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showPercentChange\": false,\n        \"textMode\": \"auto\",\n        \"wideLayout\": true\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"max(max_over_time(dragonfly_uptime_in_seconds{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__interval]))\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"\",\n          \"metric\": \"\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 1800\n        }\n      ],\n      \"title\": \"Uptime\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"fixedColor\": \"rgb(31, 120, 193)\",\n            \"mode\": \"fixed\"\n          },\n          \"decimals\": 0,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"none\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 2,\n        \"x\": 4,\n        \"y\": 1\n      },\n      \"hideTimeOverride\": true,\n      \"id\": 12,\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"area\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"percentChangeColorMode\": \"standard\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showPercentChange\": false,\n        \"textMode\": \"auto\",\n        \"wideLayout\": true\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\": \"dragonfly_connected_clients{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"\",\n          \"metric\": \"\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 2\n        }\n      ],\n      \"timeFrom\": \"1m\",\n      \"title\": \"Clients\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 0,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"max\": 100,\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"rgba(50, 172, 45, 0.97)\",\n                \"value\": null\n              },\n              {\n                \"color\": \"rgba(237, 129, 40, 0.89)\",\n                \"value\": 80\n              },\n              {\n                \"color\": \"rgba(245, 54, 54, 0.9)\",\n                \"value\": 95\n              }\n            ]\n          },\n          \"unit\": \"percent\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 6,\n        \"x\": 6,\n        \"y\": 1\n      },\n      \"hideTimeOverride\": true,\n      \"id\": 11,\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"minVizHeight\": 75,\n        \"minVizWidth\": 75,\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showThresholdLabels\": false,\n        \"showThresholdMarkers\": true,\n        \"sizing\": \"auto\"\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"100 * (dragonfly_memory_used_bytes{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}  / dragonfly_memory_max_bytes{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"} )\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"\",\n          \"metric\": \"\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 2\n        }\n      ],\n      \"timeFrom\": \"1m\",\n      \"title\": \"Memory Usage\",\n      \"type\": \"gauge\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 22,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"normal\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 1\n      },\n      \"id\": 2,\n      \"options\": {\n        \"alertThreshold\": true,\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"sum(irate(dragonfly_commands_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])) by (cmd)\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{pod}}\",\n          \"metric\": \"A\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        }\n      ],\n      \"title\": \"Commands Executed / sec\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"bytes\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 8\n      },\n      \"id\": 7,\n      \"options\": {\n        \"alertThreshold\": true,\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\": \"dragonfly_memory_used_bytes{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"} \",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"used\",\n          \"metric\": \"\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\": \"dragonfly_memory_max_bytes{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"} \",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"max\",\n          \"range\": true,\n          \"refId\": \"B\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\": \"dragonfly_used_memory_rss_bytes{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"} \",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"legendFormat\": \"RSS\",\n          \"range\": true,\n          \"refId\": \"C\"\n        }\n      ],\n      \"title\": \"Total Memory Usage\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 22,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"normal\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 8\n      },\n      \"id\": 35,\n      \"options\": {\n        \"alertThreshold\": true,\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"max by (cmd) (irate(dragonfly_commands_duration_seconds{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval]) / (irate(dragonfly_commands_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])))\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"__auto\",\n          \"metric\": \"A\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        }\n      ],\n      \"title\": \"Latency per command \",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 15\n      },\n      \"id\": 5,\n      \"options\": {\n        \"alertThreshold\": true,\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\": \"sum (dragonfly_db_keys{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}) by (db)\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{ db }} \",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        }\n      ],\n      \"title\": \"Total Items per DB\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 22,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"normal\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 15\n      },\n      \"id\": 24,\n      \"options\": {\n        \"alertThreshold\": true,\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"irate(dragonfly_keyspace_hits_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"hits\",\n          \"metric\": \"A\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"irate(dragonfly_keyspace_misses_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"misses\",\n          \"metric\": \"A\",\n          \"range\": true,\n          \"refId\": \"B\",\n          \"step\": 240,\n          \"target\": \"\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"irate(dragonfly_keyspace_mutations_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"mutations\",\n          \"metric\": \"A\",\n          \"range\": true,\n          \"refId\": \"C\",\n          \"step\": 240,\n          \"target\": \"\"\n        }\n      ],\n      \"title\": \"Keys Ops / sec\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 22\n      },\n      \"id\": 8,\n      \"options\": {\n        \"alertThreshold\": true,\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"sum(irate(dragonfly_expired_keys_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])) by (pod)\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"expired\",\n          \"metric\": \"\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"sum(irate(dragonfly_evicted_keys_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])) by (pod)\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"evicted\",\n          \"range\": true,\n          \"refId\": \"B\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Expired / Evicted\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 22\n      },\n      \"id\": 25,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"dragonfly_db_keys{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}/ on(namespace, pod, db) dragonfly_db_capacity{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\", type=\\\"prime\\\"}\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{ db }} \",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        }\n      ],\n      \"title\": \"Table Load per DB\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 29\n      },\n      \"id\": 26,\n      \"options\": {\n        \"alertThreshold\": true,\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"irate(dragonfly_reply_duration_seconds{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__interval]) / irate(dragonfly_reply_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__interval])\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{ pod }} input\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Reply Latency\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 29\n      },\n      \"id\": 10,\n      \"options\": {\n        \"alertThreshold\": true,\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"irate(dragonfly_net_input_bytes_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{ pod }} input\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"irate(dragonfly_net_output_bytes_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{ pod }} output\",\n          \"range\": true,\n          \"refId\": \"B\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Network I/O\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 36\n      },\n      \"id\": 16,\n      \"options\": {\n        \"alertThreshold\": true,\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\": \"dragonfly_connected_clients{namespace=\\\"$namespace\\\",pod=\\\"$pod_name\\\"}\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"{{pod}}\",\n          \"range\": true,\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Dragonfly connected clients\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 36\n      },\n      \"id\": 27,\n      \"options\": {\n        \"alertThreshold\": true,\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"rate(dragonfly_pipeline_commands_duration_seconds{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])/rate(dragonfly_pipeline_commands_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"pipeline\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"rate(dragonfly_cmd_squash_hop_duration_seconds{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])/rate(dragonfly_cmd_squash_hop_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"execute_hop\",\n          \"range\": true,\n          \"refId\": \"B\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"rate(dragonfly_pipeline_queue_wait_duration_seconds{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])/rate(dragonfly_pipeline_commands_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"wait_queue\",\n          \"range\": true,\n          \"refId\": \"C\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"rate(dragonfly_pipeline_dispatch_flush_duration_seconds{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])/rate(dragonfly_pipeline_dispatch_calls_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"instant\": false,\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"dispatch_flush\",\n          \"range\": true,\n          \"refId\": \"D\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Pipeline Latency\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 43\n      },\n      \"id\": 22,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\": \"dragonfly_pipeline_queue_length{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}\",\n          \"instant\": false,\n          \"legendFormat\": \"avr_pipeline_depth\",\n          \"range\": true,\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Pipeline length\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 43\n      },\n      \"id\": 13,\n      \"options\": {\n        \"alertThreshold\": true,\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\":\n              \"sum (dragonfly_db_keys{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}) - sum (dragonfly_db_keys_expiring{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}) \",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"not expiring\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": true,\n          \"expr\": \"sum (dragonfly_db_keys_expiring{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"})\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"expiring\",\n          \"metric\": \"\",\n          \"range\": true,\n          \"refId\": \"B\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Expiring vs Not-Expiring Keys\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 50\n      },\n      \"id\": 28,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"irate(dragonfly_cmd_squash_commands_total\\n{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])/irate(dragonfly_cmd_squash_hop_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"hide\": false,\n          \"instant\": false,\n          \"legendFormat\": \"squash_len\",\n          \"range\": true,\n          \"refId\": \"A\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"irate(dragonfly_pipeline_dispatch_commands_total\\n{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])/irate(dragonfly_pipeline_dispatch_calls_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"hide\": false,\n          \"instant\": false,\n          \"legendFormat\": \"dispatch_len\",\n          \"range\": true,\n          \"refId\": \"B\"\n        }\n      ],\n      \"title\": \"Average Squashing Length\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          }\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 51\n      },\n      \"id\": 21,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"dragonfly_replication_full_sync_bytes{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}\",\n          \"instant\": false,\n          \"legendFormat\": \"fullsync\",\n          \"range\": true,\n          \"refId\": \"A\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"dragonfly_replication_streaming_bytes{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}\",\n          \"hide\": false,\n          \"instant\": false,\n          \"legendFormat\": \"stable_sync\",\n          \"range\": true,\n          \"refId\": \"B\"\n        }\n      ],\n      \"title\": \"Master Replication memory\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"collapsed\": true,\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 59\n      },\n      \"id\": 36,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 0,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"insertNulls\": false,\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"auto\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              }\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 8,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 67\n          },\n          \"id\": 33,\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [],\n              \"displayMode\": \"list\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${DS_PROMETHEUS}\"\n              },\n              \"disableTextWrap\": false,\n              \"editorMode\": \"builder\",\n              \"expr\": \"rate(dragonfly_tiered_hits[$__rate_interval])\",\n              \"fullMetaSearch\": false,\n              \"includeNullMetadata\": true,\n              \"instant\": false,\n              \"legendFormat\": \"{{type}}\",\n              \"range\": true,\n              \"refId\": \"A\",\n              \"useBackend\": false\n            }\n          ],\n          \"title\": \"Tiered hits\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 0,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"insertNulls\": false,\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"auto\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              }\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 8,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 67\n          },\n          \"id\": 34,\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [],\n              \"displayMode\": \"list\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${DS_PROMETHEUS}\"\n              },\n              \"disableTextWrap\": false,\n              \"editorMode\": \"builder\",\n              \"expr\": \"rate(dragonfly_tiered_overload[$__rate_interval])\",\n              \"fullMetaSearch\": false,\n              \"includeNullMetadata\": true,\n              \"instant\": false,\n              \"legendFormat\": \"{{type}}\",\n              \"range\": true,\n              \"refId\": \"A\",\n              \"useBackend\": false\n            }\n          ],\n          \"title\": \"Tiered overload\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"description\": \"Tiered bytes\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 0,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"insertNulls\": false,\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"auto\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 8,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 75\n          },\n          \"id\": 31,\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [],\n              \"displayMode\": \"list\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${DS_PROMETHEUS}\"\n              },\n              \"disableTextWrap\": false,\n              \"editorMode\": \"builder\",\n              \"expr\": \"dragonfly_tiered_bytes\",\n              \"format\": \"time_series\",\n              \"fullMetaSearch\": false,\n              \"includeNullMetadata\": true,\n              \"instant\": false,\n              \"interval\": \"\",\n              \"legendFormat\": \"{{type}}\",\n              \"range\": true,\n              \"refId\": \"A\",\n              \"useBackend\": false\n            }\n          ],\n          \"title\": \"Tiered bytes\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 0,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"insertNulls\": false,\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"auto\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              }\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 8,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 75\n          },\n          \"id\": 32,\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [],\n              \"displayMode\": \"list\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${DS_PROMETHEUS}\"\n              },\n              \"disableTextWrap\": false,\n              \"editorMode\": \"builder\",\n              \"expr\": \"rate(dragonfly_tiered_events[$__rate_interval])\",\n              \"fullMetaSearch\": false,\n              \"includeNullMetadata\": true,\n              \"instant\": false,\n              \"legendFormat\": \"{{type}}\",\n              \"range\": true,\n              \"refId\": \"A\",\n              \"useBackend\": false\n            }\n          ],\n          \"title\": \"Tiered events\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"title\": \"Tiered\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": false,\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 60\n      },\n      \"id\": 19,\n      \"panels\": [],\n      \"title\": \"Advanced metrics\",\n      \"type\": \"row\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 61\n      },\n      \"id\": 18,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"disableTextWrap\": false,\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"irate(dragonfly_fiber_switch_delay_seconds_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])/rate(dragonfly_fiber_switch_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"fullMetaSearch\": false,\n          \"includeNullMetadata\": false,\n          \"instant\": false,\n          \"legendFormat\": \"switch\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"useBackend\": false\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"irate(dragonfly_fiber_longrun_seconds{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])/irate(dragonfly_fiber_longrun_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"hide\": false,\n          \"instant\": false,\n          \"legendFormat\": \"longrun\",\n          \"range\": true,\n          \"refId\": \"B\"\n        }\n      ],\n      \"title\": \"FiberSwitchDelay\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"percentunit\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 61\n      },\n      \"id\": 29,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"1 - irate(dragonfly_pipeline_dispatch_calls_total\\n{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])/(irate(dragonfly_pipeline_dispatch_calls_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval]) + irate(dragonfly_cmd_squash_stats_ignored_total{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval]))\",\n          \"hide\": false,\n          \"instant\": false,\n          \"legendFormat\": \"__auto\",\n          \"range\": true,\n          \"refId\": \"B\"\n        }\n      ],\n      \"title\": \"Metrics Select Pipeline Ratio\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${DS_PROMETHEUS}\"\n      },\n      \"description\": \"\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"none\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 69\n      },\n      \"id\": 30,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${DS_PROMETHEUS}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"irate(dragonfly_net_read_yields_total\\n{namespace=\\\"$namespace\\\",pod=~\\\"$pod_name\\\"}[$__rate_interval])\",\n          \"hide\": false,\n          \"instant\": false,\n          \"legendFormat\": \"__auto\",\n          \"range\": true,\n          \"refId\": \"B\"\n        }\n      ],\n      \"title\": \"Read Yields Per second\",\n      \"type\": \"timeseries\"\n    }\n  ],\n  \"refresh\": \"10s\",\n  \"schemaVersion\": 38,\n  \"style\": \"dark\",\n  \"tags\": [\n    \"prometheus\",\n    \"dragonfly\"\n  ],\n  \"templating\": {\n    \"list\": [\n      {\n        \"current\": {\n          \"selected\": false,\n          \"text\": \"Prometheus\",\n          \"value\": \"PBFA97CFB590B2093\"\n        },\n        \"hide\": 0,\n        \"includeAll\": false,\n        \"label\": \"Prometheus\",\n        \"multi\": false,\n        \"name\": \"DS_PROMETHEUS\",\n        \"options\": [],\n        \"query\": \"prometheus\",\n        \"refresh\": 1,\n        \"regex\": \"\",\n        \"skipUrlSync\": false,\n        \"type\": \"datasource\"\n      },\n      {\n        \"current\": {\n          \"isNone\": true,\n          \"selected\": false,\n          \"text\": \"None\",\n          \"value\": \"\"\n        },\n        \"datasource\": {\n          \"uid\": \"$DS_PROMETHEUS\"\n        },\n        \"definition\": \"label_values(dragonfly_version, namespace)\",\n        \"hide\": 0,\n        \"includeAll\": false,\n        \"label\": \"Namespace\",\n        \"multi\": false,\n        \"name\": \"namespace\",\n        \"options\": [],\n        \"query\": {\n          \"query\": \"label_values(dragonfly_version, namespace)\",\n          \"refId\": \"StandardVariableQuery\"\n        },\n        \"refresh\": 2,\n        \"regex\": \"\",\n        \"skipUrlSync\": false,\n        \"sort\": 0,\n        \"type\": \"query\"\n      },\n      {\n        \"current\": {\n          \"isNone\": true,\n          \"selected\": false,\n          \"text\": \"None\",\n          \"value\": \"\"\n        },\n        \"datasource\": {\n          \"uid\": \"$DS_PROMETHEUS\"\n        },\n        \"definition\": \"label_values(dragonfly_version{namespace=\\\"$namespace\\\"}, pod)\",\n        \"hide\": 0,\n        \"includeAll\": false,\n        \"label\": \"Pod Name\",\n        \"multi\": false,\n        \"name\": \"pod_name\",\n        \"options\": [],\n        \"query\": {\n          \"query\": \"label_values(dragonfly_version{namespace=\\\"$namespace\\\"}, pod)\",\n          \"refId\": \"StandardVariableQuery\"\n        },\n        \"refresh\": 2,\n        \"regex\": \"\",\n        \"skipUrlSync\": false,\n        \"sort\": 1,\n        \"type\": \"query\"\n      }\n    ]\n  },\n  \"time\": {\n    \"from\": \"now-5m\",\n    \"to\": \"now\"\n  },\n  \"timepicker\": {},\n  \"timezone\": \"browser\",\n  \"title\": \"Dragonfly Dashboard\",\n  \"uid\": \"xDLNRKUWz\",\n  \"version\": 1,\n  \"weekStart\": \"\"\n}\n"
  },
  {
    "path": "tools/local/monitoring/grafana/provisioning/dashboards/memcached.json",
    "content": "{\n  \"annotations\": {\n    \"list\": [\n      {\n        \"builtIn\": 1,\n        \"datasource\": {\n          \"type\": \"datasource\",\n          \"uid\": \"grafana\"\n        },\n        \"enable\": true,\n        \"hide\": true,\n        \"iconColor\": \"rgba(0, 211, 255, 1)\",\n        \"name\": \"Annotations & Alerts\",\n        \"type\": \"dashboard\"\n      }\n    ]\n  },\n  \"description\":\n      \"Memcached dashboard for grafana 6.5.1 or above.\\r\\nDataSource: Prometheus\\r\\nCollector: Memcached official exporter : https://github.com/prometheus/memcached_exporter/releases \\r\\n\",\n  \"editable\": true,\n  \"fiscalYearStartMonth\": 0,\n  \"gnetId\": 11527,\n  \"graphTooltip\": 0,\n  \"links\": [],\n  \"liveNow\": false,\n  \"panels\": [\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [\n            {\n              \"options\": {\n                \"0\": {\n                  \"text\": \"DOWN\"\n                },\n                \"1\": {\n                  \"text\": \"UP\"\n                }\n              },\n              \"type\": \"value\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"#d44a3a\",\n                \"value\": null\n              },\n              {\n                \"color\": \"rgba(237, 129, 40, 0.89)\",\n                \"value\": 0.1\n              },\n              {\n                \"color\": \"#299c46\",\n                \"value\": 0.9\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 4,\n        \"w\": 2,\n        \"x\": 0,\n        \"y\": 0\n      },\n      \"id\": 6,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"background\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"textMode\": \"auto\"\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"repeatDirection\": \"h\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"memcached_up{job=\\\"$job\\\"}\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Instance State\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"fixedColor\": \"rgb(31, 120, 193)\",\n            \"mode\": \"fixed\"\n          },\n          \"decimals\": 1,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"#37872D\",\n                \"value\": null\n              },\n              {\n                \"color\": \"rgba(237, 129, 40, 0.89)\",\n                \"value\": 100000\n              },\n              {\n                \"color\": \"#d44a3a\",\n                \"value\": 200000\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 4,\n        \"w\": 3,\n        \"x\": 2,\n        \"y\": 0\n      },\n      \"id\": 7,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"area\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"textMode\": \"auto\"\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"memcached_uptime_seconds{job=\\\"$job\\\"}\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Up Time\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [\n            {\n              \"id\": 0,\n              \"op\": \"=\",\n              \"text\": \"N/A\",\n              \"type\": 1,\n              \"value\": \"null\"\n            }\n          ],\n          \"max\": 1,\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"rgba(50, 172, 45, 0.97)\",\n                \"value\": null\n              },\n              {\n                \"color\": \"rgba(237, 129, 40, 0.89)\",\n                \"value\": 0.75\n              },\n              {\n                \"color\": \"rgba(245, 54, 54, 0.9)\",\n                \"value\": 0.9\n              }\n            ]\n          },\n          \"unit\": \"percentunit\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 4,\n        \"w\": 3,\n        \"x\": 5,\n        \"y\": 0\n      },\n      \"id\": 23,\n      \"links\": [],\n      \"options\": {\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"mean\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showThresholdLabels\": false,\n        \"showThresholdMarkers\": true\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"aggregation\": \"Last\",\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"decimals\": 2,\n          \"displayAliasType\": \"Warning / Critical\",\n          \"displayType\": \"Regular\",\n          \"displayValueWithAlias\": \"Never\",\n          \"expr\": \"memcached_current_bytes{job=\\\"$job\\\"}/memcached_limit_bytes{job=\\\"$job\\\"}\",\n          \"format\": \"time_series\",\n          \"instant\": false,\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Memory used\",\n          \"refId\": \"A\",\n          \"step\": 20,\n          \"units\": \"none\",\n          \"valueHandler\": \"Number Threshold\"\n        }\n      ],\n      \"title\": \"Memory usage\",\n      \"type\": \"gauge\"\n    },\n    {\n      \"aliasColors\": {\n        \"ratio\": \"#6ED0E0\"\n      },\n      \"bars\": true,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"editable\": true,\n      \"error\": false,\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 0,\n      \"fillGradient\": 0,\n      \"grid\": {},\n      \"gridPos\": {\n        \"h\": 6,\n        \"w\": 8,\n        \"x\": 8,\n        \"y\": 0\n      },\n      \"hiddenSeries\": false,\n      \"id\": 25,\n      \"legend\": {\n        \"alignAsTable\": false,\n        \"avg\": false,\n        \"current\": false,\n        \"max\": false,\n        \"min\": false,\n        \"rightSide\": false,\n        \"show\": false,\n        \"total\": false,\n        \"values\": false\n      },\n      \"lines\": false,\n      \"linewidth\": 2,\n      \"links\": [],\n      \"nullPointMode\": \"connected\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 5,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum (memcached_commands_total{command=\\\"get\\\",job=\\\"$job\\\"}) / (sum (memcached_commands_total{command=\\\"get\\\",job=\\\"$job\\\"}) + sum (memcached_commands_total{command=\\\"set\\\",job=\\\"$job\\\"}))\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Get\",\n          \"refId\": \"A\",\n          \"step\": 5,\n          \"target\": \"\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum (memcached_commands_total{command=\\\"set\\\",job=\\\"$job\\\"}) / (sum (memcached_commands_total{command=\\\"get\\\",job=\\\"$job\\\"}) + sum (memcached_commands_total{command=\\\"set\\\",job=\\\"$job\\\"}))\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"Set\",\n          \"refId\": \"B\",\n          \"step\": 10\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"Get & Set ratio\",\n      \"tooltip\": {\n        \"msResolution\": false,\n        \"shared\": false,\n        \"sort\": 0,\n        \"value_type\": \"cumulative\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"series\",\n        \"show\": true,\n        \"values\": [\n          \"current\"\n        ]\n      },\n      \"yaxes\": [\n        {\n          \"format\": \"percentunit\",\n          \"logBase\": 1,\n          \"max\": \"1\",\n          \"min\": \"0\",\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": false\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    },\n    {\n      \"aliasColors\": {},\n      \"bars\": true,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"editable\": true,\n      \"error\": false,\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 1,\n      \"fillGradient\": 0,\n      \"grid\": {},\n      \"gridPos\": {\n        \"h\": 6,\n        \"w\": 8,\n        \"x\": 16,\n        \"y\": 0\n      },\n      \"height\": \"120px\",\n      \"hiddenSeries\": false,\n      \"id\": 29,\n      \"legend\": {\n        \"avg\": false,\n        \"current\": false,\n        \"max\": false,\n        \"min\": false,\n        \"show\": false,\n        \"total\": false,\n        \"values\": false\n      },\n      \"lines\": false,\n      \"linewidth\": 2,\n      \"links\": [],\n      \"nullPointMode\": \"connected\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 5,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [\n        {\n          \"alias\": \"miss\",\n          \"color\": \"#E24D42\"\n        }\n      ],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum (memcached_commands_total{status=\\\"hit\\\",command=\\\"get\\\",}) / sum (memcached_commands_total{command=\\\"get\\\",job=\\\"$job\\\"})\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"hit\",\n          \"refId\": \"C\",\n          \"step\": 5\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum (memcached_commands_total{status=\\\"miss\\\",command=\\\"get\\\",job=\\\"$job\\\"}) / sum (memcached_commands_total{command=\\\"get\\\",job=\\\"$job\\\"})\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"miss\",\n          \"refId\": \"A\",\n          \"step\": 5\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"Hit & Miss ratio\",\n      \"tooltip\": {\n        \"msResolution\": true,\n        \"shared\": false,\n        \"sort\": 0,\n        \"value_type\": \"cumulative\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"series\",\n        \"show\": true,\n        \"values\": [\n          \"current\"\n        ]\n      },\n      \"yaxes\": [\n        {\n          \"format\": \"percentunit\",\n          \"logBase\": 1,\n          \"max\": \"1\",\n          \"min\": 0,\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": false\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 2,\n        \"w\": 4,\n        \"x\": 0,\n        \"y\": 4\n      },\n      \"id\": 35,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"textMode\": \"auto\"\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"sum (memcached_items_evicted_total{job=\\\"$job\\\"})\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"refId\": \"A\",\n          \"step\": 20\n        }\n      ],\n      \"title\": \"Evicts (total)\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 2,\n        \"w\": 4,\n        \"x\": 4,\n        \"y\": 4\n      },\n      \"id\": 37,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"textMode\": \"auto\"\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"sum (memcached_items_reclaimed_total{job=\\\"$job\\\"})\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"refId\": \"A\",\n          \"step\": 20\n        }\n      ],\n      \"title\": \"Reclaims (total)\",\n      \"type\": \"stat\"\n    },\n    {\n      \"aliasColors\": {\n        \"evicts\": \"#890F02\",\n        \"memcached_items_evicted_total{instance=\\\"172.17.0.1:9150\\\",job=\\\"prometheus\\\"}\": \"#890F02\",\n        \"reclaims\": \"#3F6833\"\n      },\n      \"bars\": false,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"editable\": true,\n      \"error\": false,\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 1,\n      \"fillGradient\": 0,\n      \"grid\": {},\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 8,\n        \"x\": 0,\n        \"y\": 6\n      },\n      \"height\": \"240px\",\n      \"hiddenSeries\": false,\n      \"id\": 27,\n      \"legend\": {\n        \"avg\": false,\n        \"current\": false,\n        \"max\": false,\n        \"min\": false,\n        \"show\": true,\n        \"total\": false,\n        \"values\": false\n      },\n      \"lines\": true,\n      \"linewidth\": 2,\n      \"links\": [],\n      \"nullPointMode\": \"connected\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 5,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [\n        {\n          \"alias\": \"reclaims\",\n          \"yaxis\": 2\n        }\n      ],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"sum (irate (memcached_items_evicted_total{job=\\\"$job\\\"}[5m]))\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"evicts\",\n          \"refId\": \"A\",\n          \"step\": 5,\n          \"target\": \"\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"sum (irate (memcached_items_reclaimed_total{job=\\\"$job\\\"}[5m]))\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"reclaims\",\n          \"refId\": \"B\",\n          \"step\": 5\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"Evicts & Reclaims rate\",\n      \"tooltip\": {\n        \"msResolution\": false,\n        \"shared\": true,\n        \"sort\": 0,\n        \"value_type\": \"cumulative\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"time\",\n        \"show\": true,\n        \"values\": []\n      },\n      \"yaxes\": [\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    },\n    {\n      \"aliasColors\": {},\n      \"bars\": false,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 1,\n      \"fillGradient\": 0,\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 8,\n        \"x\": 8,\n        \"y\": 6\n      },\n      \"hiddenSeries\": false,\n      \"id\": 10,\n      \"legend\": {\n        \"alignAsTable\": true,\n        \"avg\": false,\n        \"current\": true,\n        \"max\": true,\n        \"min\": true,\n        \"rightSide\": false,\n        \"show\": true,\n        \"total\": false,\n        \"values\": true\n      },\n      \"lines\": true,\n      \"linewidth\": 2,\n      \"links\": [],\n      \"nullPointMode\": \"null\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 5,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"memcached_current_connections{job=\\\"$job\\\"}\",\n          \"format\": \"time_series\",\n          \"interval\": \"1m\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Connections\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"Connections\",\n      \"tooltip\": {\n        \"shared\": true,\n        \"sort\": 0,\n        \"value_type\": \"individual\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"time\",\n        \"show\": true,\n        \"values\": []\n      },\n      \"yaxes\": [\n        {\n          \"format\": \"none\",\n          \"logBase\": 1,\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    },\n    {\n      \"aliasColors\": {},\n      \"bars\": false,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"editable\": true,\n      \"error\": false,\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 1,\n      \"fillGradient\": 0,\n      \"grid\": {},\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 8,\n        \"x\": 16,\n        \"y\": 6\n      },\n      \"height\": \"240px\",\n      \"hiddenSeries\": false,\n      \"id\": 31,\n      \"legend\": {\n        \"avg\": false,\n        \"current\": false,\n        \"max\": false,\n        \"min\": false,\n        \"show\": false,\n        \"total\": false,\n        \"values\": false\n      },\n      \"lines\": true,\n      \"linewidth\": 2,\n      \"links\": [],\n      \"nullPointMode\": \"connected\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 5,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum (irate (memcached_commands_total{status=\\\"hit\\\",command=\\\"get\\\",job=\\\"$job\\\"}[5m])) / sum (irate (memcached_commands_total{command=\\\"get\\\",job=\\\"$job\\\"}[5m]\\n))\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Hit\",\n          \"refId\": \"A\",\n          \"step\": 5\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"Hit rate\",\n      \"tooltip\": {\n        \"msResolution\": true,\n        \"shared\": false,\n        \"sort\": 0,\n        \"value_type\": \"cumulative\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"time\",\n        \"show\": true,\n        \"values\": [\n          \"total\"\n        ]\n      },\n      \"yaxes\": [\n        {\n          \"format\": \"percentunit\",\n          \"logBase\": 1,\n          \"min\": 0,\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": false\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    },\n    {\n      \"aliasColors\": {},\n      \"bars\": false,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 1,\n      \"fillGradient\": 0,\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 8,\n        \"x\": 0,\n        \"y\": 13\n      },\n      \"hiddenSeries\": false,\n      \"id\": 19,\n      \"legend\": {\n        \"alignAsTable\": true,\n        \"avg\": false,\n        \"current\": true,\n        \"max\": true,\n        \"min\": true,\n        \"rightSide\": false,\n        \"show\": true,\n        \"total\": false,\n        \"values\": true\n      },\n      \"lines\": true,\n      \"linewidth\": 2,\n      \"links\": [],\n      \"nullPointMode\": \"null\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 5,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"delta(memcached_read_bytes_total{job=\\\"$job\\\"}[1m])\",\n          \"format\": \"time_series\",\n          \"interval\": \"1m\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"read\",\n          \"refId\": \"A\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"delta(memcached_written_bytes_total{job=\\\"$job\\\"}[1m])\",\n          \"format\": \"time_series\",\n          \"interval\": \"1m\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"write\",\n          \"refId\": \"B\"\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"Read/Write\",\n      \"tooltip\": {\n        \"shared\": true,\n        \"sort\": 0,\n        \"value_type\": \"individual\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"time\",\n        \"show\": true,\n        \"values\": []\n      },\n      \"yaxes\": [\n        {\n          \"format\": \"bytes\",\n          \"logBase\": 1,\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    },\n    {\n      \"aliasColors\": {},\n      \"bars\": false,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 1,\n      \"fillGradient\": 0,\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 8,\n        \"x\": 8,\n        \"y\": 13\n      },\n      \"hiddenSeries\": false,\n      \"id\": 5,\n      \"legend\": {\n        \"avg\": false,\n        \"current\": false,\n        \"max\": false,\n        \"min\": false,\n        \"show\": true,\n        \"total\": false,\n        \"values\": false\n      },\n      \"lines\": true,\n      \"linewidth\": 1,\n      \"links\": [],\n      \"nullPointMode\": \"null\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 2,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"memcached_current_bytes{job=\\\"$job\\\"}/memcached_limit_bytes{job=\\\"$job\\\"}\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Memory\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"Memory Used\",\n      \"tooltip\": {\n        \"shared\": true,\n        \"sort\": 0,\n        \"value_type\": \"individual\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"time\",\n        \"show\": true,\n        \"values\": []\n      },\n      \"yaxes\": [\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    },\n    {\n      \"aliasColors\": {},\n      \"bars\": false,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 1,\n      \"fillGradient\": 0,\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 8,\n        \"x\": 16,\n        \"y\": 13\n      },\n      \"hiddenSeries\": false,\n      \"id\": 4,\n      \"legend\": {\n        \"avg\": false,\n        \"current\": false,\n        \"max\": false,\n        \"min\": false,\n        \"show\": true,\n        \"total\": false,\n        \"values\": false\n      },\n      \"lines\": true,\n      \"linewidth\": 1,\n      \"links\": [],\n      \"nullPointMode\": \"null\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 2,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"memcached_current_items{job=\\\"$job\\\"}\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Items\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"Items in cache\",\n      \"tooltip\": {\n        \"shared\": true,\n        \"sort\": 0,\n        \"value_type\": \"individual\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"time\",\n        \"show\": true,\n        \"values\": []\n      },\n      \"yaxes\": [\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"description\": \"\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"auto\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"bytes\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 20\n      },\n      \"id\": 38,\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"single\",\n          \"sort\": \"none\"\n        }\n      },\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\": \"memcached_process_resident_memory_bytes\",\n          \"instant\": true,\n          \"legendFormat\": \"__auto\",\n          \"range\": true,\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"RSS Memory\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"aliasColors\": {},\n      \"bars\": false,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 1,\n      \"fillGradient\": 0,\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 20\n      },\n      \"hiddenSeries\": false,\n      \"id\": 20,\n      \"legend\": {\n        \"alignAsTable\": true,\n        \"avg\": true,\n        \"current\": true,\n        \"max\": true,\n        \"min\": true,\n        \"rightSide\": false,\n        \"show\": true,\n        \"total\": false,\n        \"values\": true\n      },\n      \"lines\": true,\n      \"linewidth\": 2,\n      \"links\": [],\n      \"nullPointMode\": \"null\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 5,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"sum(delta(memcached_commands_total{job=\\\"$job\\\"}[30s]))/30\",\n          \"format\": \"time_series\",\n          \"interval\": \"15s\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"QPS\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"QPS\",\n      \"tooltip\": {\n        \"shared\": true,\n        \"sort\": 0,\n        \"value_type\": \"individual\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"time\",\n        \"show\": true,\n        \"values\": []\n      },\n      \"yaxes\": [\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    },\n    {\n      \"aliasColors\": {},\n      \"bars\": false,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"editable\": true,\n      \"error\": false,\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 1,\n      \"fillGradient\": 0,\n      \"grid\": {},\n      \"gridPos\": {\n        \"h\": 8,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 28\n      },\n      \"hiddenSeries\": false,\n      \"id\": 33,\n      \"legend\": {\n        \"alignAsTable\": true,\n        \"avg\": false,\n        \"current\": true,\n        \"hideEmpty\": false,\n        \"hideZero\": false,\n        \"max\": false,\n        \"min\": false,\n        \"rightSide\": true,\n        \"show\": true,\n        \"sideWidth\": 120,\n        \"total\": false,\n        \"values\": true\n      },\n      \"lines\": true,\n      \"linewidth\": 2,\n      \"links\": [],\n      \"nullPointMode\": \"connected\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 5,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"sum (irate (memcached_commands_total{job=\\\"$job\\\"}[5m])) by (command)\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{command}}\",\n          \"refId\": \"A\",\n          \"step\": 4,\n          \"target\": \"\"\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"Commands\",\n      \"tooltip\": {\n        \"msResolution\": false,\n        \"shared\": true,\n        \"sort\": 0,\n        \"value_type\": \"cumulative\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"time\",\n        \"show\": true,\n        \"values\": []\n      },\n      \"yaxes\": [\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": false\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    },\n    {\n      \"aliasColors\": {},\n      \"bars\": false,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"decimals\": 2,\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 1,\n      \"fillGradient\": 0,\n      \"gridPos\": {\n        \"h\": 9,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 28\n      },\n      \"hiddenSeries\": false,\n      \"id\": 11,\n      \"legend\": {\n        \"alignAsTable\": true,\n        \"avg\": true,\n        \"current\": true,\n        \"max\": true,\n        \"min\": true,\n        \"rightSide\": false,\n        \"show\": true,\n        \"total\": false,\n        \"values\": true\n      },\n      \"lines\": true,\n      \"linewidth\": 2,\n      \"links\": [],\n      \"nullPointMode\": \"null\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 5,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum (delta(memcached_commands_total{job=\\\"$job\\\", status=\\\"hit\\\",command=\\\"get\\\"}[1m]))  / sum (delta(memcached_commands_total{job=\\\"$job\\\",command=\\\"get\\\"}[1m])) * 100\",\n          \"format\": \"time_series\",\n          \"interval\": \"1m\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"get\",\n          \"refId\": \"A\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum (delta(memcached_commands_total{job=\\\"$job\\\", status=\\\"hit\\\",command=\\\"delete\\\"}[1m]))  / sum (delta(memcached_commands_total{job=\\\"$job\\\",command=\\\"delete\\\"}[1m])) * 100\",\n          \"format\": \"time_series\",\n          \"interval\": \"1m\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"delete\",\n          \"refId\": \"B\"\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"Hit Ratio Per Command\",\n      \"tooltip\": {\n        \"shared\": true,\n        \"sort\": 0,\n        \"value_type\": \"individual\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"time\",\n        \"show\": true,\n        \"values\": []\n      },\n      \"yaxes\": [\n        {\n          \"decimals\": 2,\n          \"format\": \"percent\",\n          \"logBase\": 1,\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    },\n    {\n      \"aliasColors\": {},\n      \"bars\": false,\n      \"dashLength\": 10,\n      \"dashes\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"decimals\": 2,\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"links\": []\n        },\n        \"overrides\": []\n      },\n      \"fill\": 1,\n      \"fillGradient\": 0,\n      \"gridPos\": {\n        \"h\": 9,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 36\n      },\n      \"hiddenSeries\": false,\n      \"id\": 9,\n      \"legend\": {\n        \"alignAsTable\": true,\n        \"avg\": true,\n        \"current\": true,\n        \"max\": true,\n        \"min\": true,\n        \"rightSide\": false,\n        \"show\": true,\n        \"total\": false,\n        \"values\": true\n      },\n      \"lines\": true,\n      \"linewidth\": 2,\n      \"links\": [],\n      \"nullPointMode\": \"null\",\n      \"options\": {\n        \"alertThreshold\": true\n      },\n      \"percentage\": false,\n      \"pluginVersion\": \"10.1.10\",\n      \"pointradius\": 5,\n      \"points\": false,\n      \"renderer\": \"flot\",\n      \"seriesOverrides\": [],\n      \"spaceLength\": 10,\n      \"stack\": false,\n      \"steppedLine\": false,\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum (delta(memcached_commands_total{job=\\\"$job\\\", status=\\\"hit\\\"}[1m]))  / sum (delta(memcached_commands_total{job=\\\"$job\\\"}[1m])) * 100\",\n          \"format\": \"time_series\",\n          \"interval\": \"1m\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Hit Ratio\",\n          \"refId\": \"A\"\n        }\n      ],\n      \"thresholds\": [],\n      \"timeRegions\": [],\n      \"title\": \"Hit Ratio\",\n      \"tooltip\": {\n        \"shared\": true,\n        \"sort\": 0,\n        \"value_type\": \"individual\"\n      },\n      \"type\": \"graph\",\n      \"xaxis\": {\n        \"mode\": \"time\",\n        \"show\": true,\n        \"values\": []\n      },\n      \"yaxes\": [\n        {\n          \"decimals\": 2,\n          \"format\": \"percent\",\n          \"logBase\": 1,\n          \"show\": true\n        },\n        {\n          \"format\": \"short\",\n          \"logBase\": 1,\n          \"show\": true\n        }\n      ],\n      \"yaxis\": {\n        \"align\": false\n      }\n    }\n  ],\n  \"refresh\": \"10s\",\n  \"schemaVersion\": 38,\n  \"style\": \"dark\",\n  \"tags\": [],\n  \"templating\": {\n    \"list\": [\n      {\n        \"current\": {\n          \"selected\": false,\n          \"text\": \"memcached-exporter\",\n          \"value\": \"memcached-exporter\"\n        },\n        \"datasource\": {\n          \"type\": \"prometheus\",\n          \"uid\": \"PBFA97CFB590B2093\"\n        },\n        \"definition\": \"\",\n        \"hide\": 0,\n        \"includeAll\": false,\n        \"multi\": false,\n        \"name\": \"job\",\n        \"options\": [],\n        \"query\": \"label_values(memcached_up, job)\",\n        \"refresh\": 1,\n        \"regex\": \"\",\n        \"skipUrlSync\": false,\n        \"sort\": 0,\n        \"tagValuesQuery\": \"\",\n        \"tagsQuery\": \"\",\n        \"type\": \"query\",\n        \"useTags\": false\n      }\n    ]\n  },\n  \"time\": {\n    \"from\": \"now-6h\",\n    \"to\": \"now\"\n  },\n  \"timepicker\": {\n    \"refresh_intervals\": [\n      \"5s\",\n      \"10s\",\n      \"30s\",\n      \"1m\",\n      \"5m\",\n      \"15m\",\n      \"30m\",\n      \"1h\",\n      \"2h\",\n      \"1d\"\n    ],\n    \"time_options\": [\n      \"5m\",\n      \"15m\",\n      \"1h\",\n      \"6h\",\n      \"12h\",\n      \"24h\",\n      \"2d\",\n      \"7d\",\n      \"30d\"\n    ]\n  },\n  \"timezone\": \"\",\n  \"title\": \"Memcached\",\n  \"uid\": \"AQxf3X-mk\",\n  \"version\": 1,\n  \"weekStart\": \"\"\n}\n"
  },
  {
    "path": "tools/local/monitoring/grafana/provisioning/dashboards/node-exporter.json",
    "content": "{\n  \"__inputs\": [\n    {\n      \"name\": \"DS_PROMETHEUS\",\n      \"label\": \"Prometheus\",\n      \"description\": \"\",\n      \"type\": \"datasource\",\n      \"pluginId\": \"prometheus\",\n      \"pluginName\": \"Prometheus\"\n    }\n  ],\n  \"__elements\": {},\n  \"__requires\": [\n    {\n      \"type\": \"panel\",\n      \"id\": \"bargauge\",\n      \"name\": \"Bar gauge\",\n      \"version\": \"\"\n    },\n    {\n      \"type\": \"panel\",\n      \"id\": \"gauge\",\n      \"name\": \"Gauge\",\n      \"version\": \"\"\n    },\n    {\n      \"type\": \"grafana\",\n      \"id\": \"grafana\",\n      \"name\": \"Grafana\",\n      \"version\": \"9.4.3\"\n    },\n    {\n      \"type\": \"datasource\",\n      \"id\": \"prometheus\",\n      \"name\": \"Prometheus\",\n      \"version\": \"1.0.0\"\n    },\n    {\n      \"type\": \"panel\",\n      \"id\": \"stat\",\n      \"name\": \"Stat\",\n      \"version\": \"\"\n    },\n    {\n      \"type\": \"panel\",\n      \"id\": \"timeseries\",\n      \"name\": \"Time series\",\n      \"version\": \"\"\n    }\n  ],\n  \"annotations\": {\n    \"list\": [\n      {\n        \"$$hashKey\": \"object:1058\",\n        \"builtIn\": 1,\n        \"datasource\": {\n          \"type\": \"datasource\",\n          \"uid\": \"grafana\"\n        },\n        \"enable\": true,\n        \"hide\": true,\n        \"iconColor\": \"rgba(0, 211, 255, 1)\",\n        \"name\": \"Annotations & Alerts\",\n        \"target\": {\n          \"limit\": 100,\n          \"matchAny\": false,\n          \"tags\": [],\n          \"type\": \"dashboard\"\n        },\n        \"type\": \"dashboard\"\n      }\n    ]\n  },\n  \"editable\": true,\n  \"fiscalYearStartMonth\": 0,\n  \"gnetId\": 1860,\n  \"graphTooltip\": 1,\n  \"id\": null,\n  \"links\": [\n    {\n      \"icon\": \"external link\",\n      \"tags\": [],\n      \"targetBlank\": true,\n      \"title\": \"GitHub\",\n      \"type\": \"link\",\n      \"url\": \"https://github.com/rfmoz/grafana-dashboards\"\n    },\n    {\n      \"icon\": \"external link\",\n      \"tags\": [],\n      \"targetBlank\": true,\n      \"title\": \"Grafana\",\n      \"type\": \"link\",\n      \"url\": \"https://grafana.com/grafana/dashboards/1860\"\n    }\n  ],\n  \"liveNow\": false,\n  \"panels\": [\n    {\n      \"collapsed\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 0\n      },\n      \"id\": 261,\n      \"panels\": [],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Quick CPU / Mem / Disk\",\n      \"type\": \"row\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Resource pressure via PSI\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 1,\n          \"links\": [],\n          \"mappings\": [],\n          \"max\": 1,\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"percentage\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"dark-yellow\",\n                \"value\": 70\n              },\n              {\n                \"color\": \"dark-red\",\n                \"value\": 90\n              }\n            ]\n          },\n          \"unit\": \"percentunit\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 4,\n        \"w\": 3,\n        \"x\": 0,\n        \"y\": 1\n      },\n      \"id\": 323,\n      \"links\": [],\n      \"options\": {\n        \"displayMode\": \"basic\",\n        \"minVizHeight\": 10,\n        \"minVizWidth\": 0,\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showUnfilled\": true,\n        \"text\": {}\n      },\n      \"pluginVersion\": \"9.4.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"irate(node_pressure_cpu_waiting_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"CPU\",\n          \"range\": false,\n          \"refId\": \"CPU some\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"irate(node_pressure_memory_waiting_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Mem\",\n          \"range\": false,\n          \"refId\": \"Memory some\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"irate(node_pressure_io_waiting_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"I/O\",\n          \"range\": false,\n          \"refId\": \"I/O some\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Pressure\",\n      \"type\": \"bargauge\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Busy state of all CPU cores together\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 1,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"max\": 100,\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"rgba(50, 172, 45, 0.97)\",\n                \"value\": null\n              },\n              {\n                \"color\": \"rgba(237, 129, 40, 0.89)\",\n                \"value\": 85\n              },\n              {\n                \"color\": \"rgba(245, 54, 54, 0.9)\",\n                \"value\": 95\n              }\n            ]\n          },\n          \"unit\": \"percent\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 4,\n        \"w\": 3,\n        \"x\": 3,\n        \"y\": 1\n      },\n      \"id\": 20,\n      \"links\": [],\n      \"options\": {\n        \"orientation\": \"auto\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showThresholdLabels\": false,\n        \"showThresholdMarkers\": true\n      },\n      \"pluginVersion\": \"9.4.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"100 * (1 - avg(rate(node_cpu_seconds_total{mode=\\\"idle\\\", instance=\\\"$node\\\"}[$__rate_interval])))\",\n          \"hide\": false,\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"\",\n          \"range\": false,\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"CPU Busy\",\n      \"type\": \"gauge\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"System load  over all CPU cores together\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 1,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"max\": 100,\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"rgba(50, 172, 45, 0.97)\",\n                \"value\": null\n              },\n              {\n                \"color\": \"rgba(237, 129, 40, 0.89)\",\n                \"value\": 85\n              },\n              {\n                \"color\": \"rgba(245, 54, 54, 0.9)\",\n                \"value\": 95\n              }\n            ]\n          },\n          \"unit\": \"percent\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 4,\n        \"w\": 3,\n        \"x\": 6,\n        \"y\": 1\n      },\n      \"id\": 155,\n      \"links\": [],\n      \"options\": {\n        \"orientation\": \"auto\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showThresholdLabels\": false,\n        \"showThresholdMarkers\": true\n      },\n      \"pluginVersion\": \"9.4.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"scalar(node_load1{instance=\\\"$node\\\",job=\\\"$job\\\"}) * 100 / count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu))\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"range\": false,\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Sys Load\",\n      \"type\": \"gauge\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Non available RAM memory\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 1,\n          \"mappings\": [],\n          \"max\": 100,\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"rgba(50, 172, 45, 0.97)\",\n                \"value\": null\n              },\n              {\n                \"color\": \"rgba(237, 129, 40, 0.89)\",\n                \"value\": 80\n              },\n              {\n                \"color\": \"rgba(245, 54, 54, 0.9)\",\n                \"value\": 90\n              }\n            ]\n          },\n          \"unit\": \"percent\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 4,\n        \"w\": 3,\n        \"x\": 9,\n        \"y\": 1\n      },\n      \"hideTimeOverride\": false,\n      \"id\": 16,\n      \"links\": [],\n      \"options\": {\n        \"orientation\": \"auto\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showThresholdLabels\": false,\n        \"showThresholdMarkers\": true\n      },\n      \"pluginVersion\": \"9.4.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"((node_memory_MemTotal_bytes{instance=\\\"$node\\\", job=\\\"$job\\\"} - node_memory_MemFree_bytes{instance=\\\"$node\\\", job=\\\"$job\\\"}) / node_memory_MemTotal_bytes{instance=\\\"$node\\\", job=\\\"$job\\\"}) * 100\",\n          \"format\": \"time_series\",\n          \"hide\": true,\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"range\": false,\n          \"refId\": \"A\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"(1 - (node_memory_MemAvailable_bytes{instance=\\\"$node\\\", job=\\\"$job\\\"} / node_memory_MemTotal_bytes{instance=\\\"$node\\\", job=\\\"$job\\\"})) * 100\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"range\": false,\n          \"refId\": \"B\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"RAM Used\",\n      \"type\": \"gauge\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Used Swap\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 1,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"max\": 100,\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"rgba(50, 172, 45, 0.97)\",\n                \"value\": null\n              },\n              {\n                \"color\": \"rgba(237, 129, 40, 0.89)\",\n                \"value\": 10\n              },\n              {\n                \"color\": \"rgba(245, 54, 54, 0.9)\",\n                \"value\": 25\n              }\n            ]\n          },\n          \"unit\": \"percent\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 4,\n        \"w\": 3,\n        \"x\": 12,\n        \"y\": 1\n      },\n      \"id\": 21,\n      \"links\": [],\n      \"options\": {\n        \"orientation\": \"auto\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showThresholdLabels\": false,\n        \"showThresholdMarkers\": true\n      },\n      \"pluginVersion\": \"9.4.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"((node_memory_SwapTotal_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} - node_memory_SwapFree_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}) / (node_memory_SwapTotal_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"})) * 100\",\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"range\": false,\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"SWAP Used\",\n      \"type\": \"gauge\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Used Root FS\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 1,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"max\": 100,\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"rgba(50, 172, 45, 0.97)\",\n                \"value\": null\n              },\n              {\n                \"color\": \"rgba(237, 129, 40, 0.89)\",\n                \"value\": 80\n              },\n              {\n                \"color\": \"rgba(245, 54, 54, 0.9)\",\n                \"value\": 90\n              }\n            ]\n          },\n          \"unit\": \"percent\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 4,\n        \"w\": 3,\n        \"x\": 15,\n        \"y\": 1\n      },\n      \"id\": 154,\n      \"links\": [],\n      \"options\": {\n        \"orientation\": \"auto\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showThresholdLabels\": false,\n        \"showThresholdMarkers\": true\n      },\n      \"pluginVersion\": \"9.4.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"100 - ((node_filesystem_avail_bytes{instance=\\\"$node\\\",job=\\\"$job\\\",mountpoint=\\\"/\\\",fstype!=\\\"rootfs\\\"} * 100) / node_filesystem_size_bytes{instance=\\\"$node\\\",job=\\\"$job\\\",mountpoint=\\\"/\\\",fstype!=\\\"rootfs\\\"})\",\n          \"format\": \"time_series\",\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"range\": false,\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Root FS Used\",\n      \"type\": \"gauge\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Total number of CPU cores\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 2,\n        \"w\": 2,\n        \"x\": 18,\n        \"y\": 1\n      },\n      \"id\": 14,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"textMode\": \"auto\"\n      },\n      \"pluginVersion\": \"9.4.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\": \"count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu))\",\n          \"instant\": true,\n          \"legendFormat\": \"__auto\",\n          \"range\": false,\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"CPU Cores\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"System uptime\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 1,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 2,\n        \"w\": 4,\n        \"x\": 20,\n        \"y\": 1\n      },\n      \"hideTimeOverride\": true,\n      \"id\": 15,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"textMode\": \"auto\"\n      },\n      \"pluginVersion\": \"9.4.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"node_time_seconds{instance=\\\"$node\\\",job=\\\"$job\\\"} - node_boot_time_seconds{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"range\": false,\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Uptime\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Total RootFS\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 0,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"rgba(50, 172, 45, 0.97)\",\n                \"value\": null\n              },\n              {\n                \"color\": \"rgba(237, 129, 40, 0.89)\",\n                \"value\": 70\n              },\n              {\n                \"color\": \"rgba(245, 54, 54, 0.9)\",\n                \"value\": 90\n              }\n            ]\n          },\n          \"unit\": \"bytes\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 2,\n        \"w\": 2,\n        \"x\": 18,\n        \"y\": 3\n      },\n      \"id\": 23,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"textMode\": \"auto\"\n      },\n      \"pluginVersion\": \"9.4.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"node_filesystem_size_bytes{instance=\\\"$node\\\",job=\\\"$job\\\",mountpoint=\\\"/\\\",fstype!=\\\"rootfs\\\"}\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"range\": false,\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"RootFS Total\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Total RAM\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 0,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"bytes\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 2,\n        \"w\": 2,\n        \"x\": 20,\n        \"y\": 3\n      },\n      \"id\": 75,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"textMode\": \"auto\"\n      },\n      \"pluginVersion\": \"9.4.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\": \"node_memory_MemTotal_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"range\": false,\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"RAM Total\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Total SWAP\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 0,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"bytes\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 2,\n        \"w\": 2,\n        \"x\": 22,\n        \"y\": 3\n      },\n      \"id\": 18,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"none\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"textMode\": \"auto\"\n      },\n      \"pluginVersion\": \"9.4.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\": \"node_memory_SwapTotal_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n          \"instant\": true,\n          \"intervalFactor\": 1,\n          \"range\": false,\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"SWAP Total\",\n      \"type\": \"stat\"\n    },\n    {\n      \"collapsed\": false,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 5\n      },\n      \"id\": 263,\n      \"panels\": [],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Basic CPU / Mem / Net / Disk\",\n      \"type\": \"row\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Basic CPU info\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 40,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"lineInterpolation\": \"smooth\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"percent\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"percentunit\"\n        },\n        \"overrides\": [\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Busy Iowait\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#890F02\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Idle\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#052B51\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Busy Iowait\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#890F02\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Idle\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#7EB26D\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Busy System\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#EAB839\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Busy User\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#0A437C\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Busy Other\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#6D1F62\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          }\n        ]\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 6\n      },\n      \"id\": 77,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true,\n          \"width\": 250\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"desc\"\n        }\n      },\n      \"pluginVersion\": \"9.2.0\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"exemplar\": false,\n          \"expr\":\n              \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"system\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"instant\": false,\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Busy System\",\n          \"range\": true,\n          \"refId\": \"A\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"user\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Busy User\",\n          \"range\": true,\n          \"refId\": \"B\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"iowait\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Busy Iowait\",\n          \"range\": true,\n          \"refId\": \"C\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=~\\\".*irq\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Busy IRQs\",\n          \"range\": true,\n          \"refId\": \"D\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\",  mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq'}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Busy Other\",\n          \"range\": true,\n          \"refId\": \"E\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"editorMode\": \"code\",\n          \"expr\":\n              \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"idle\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"Idle\",\n          \"range\": true,\n          \"refId\": \"F\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"CPU Basic\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Basic memory usage\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 40,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"normal\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"bytes\"\n        },\n        \"overrides\": [\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Apps\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#629E51\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Buffers\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#614D93\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Cache\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#6D1F62\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Cached\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#511749\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Committed\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#508642\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Free\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#0A437C\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\":\n                  \"Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#CFFAFF\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Inactive\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#584477\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"PageTables\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#0A50A1\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Page_Tables\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#0A50A1\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"RAM_Free\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#E0F9D7\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"SWAP Used\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#BF1B00\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Slab\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#806EB7\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Slab_Cache\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#E0752D\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Swap\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#BF1B00\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Swap Used\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#BF1B00\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Swap_Cache\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#C15C17\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Swap_Free\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#2F575E\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Unused\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#EAB839\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"RAM Total\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#E0F9D7\",\n                  \"mode\": \"fixed\"\n                }\n              },\n              {\n                \"id\": \"custom.fillOpacity\",\n                \"value\": 0\n              },\n              {\n                \"id\": \"custom.stacking\",\n                \"value\": {\n                  \"group\": false,\n                  \"mode\": \"normal\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"RAM Cache + Buffer\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#052B51\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"RAM Free\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#7EB26D\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Available\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#DEDAF7\",\n                  \"mode\": \"fixed\"\n                }\n              },\n              {\n                \"id\": \"custom.fillOpacity\",\n                \"value\": 0\n              },\n              {\n                \"id\": \"custom.stacking\",\n                \"value\": {\n                  \"group\": false,\n                  \"mode\": \"normal\"\n                }\n              }\n            ]\n          }\n        ]\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 6\n      },\n      \"id\": 78,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true,\n          \"width\": 350\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"9.2.0\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"expr\": \"node_memory_MemTotal_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"RAM Total\",\n          \"refId\": \"A\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"expr\":\n              \"node_memory_MemTotal_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} - node_memory_MemFree_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} - (node_memory_Cached_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} + node_memory_Buffers_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} + node_memory_SReclaimable_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"})\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"RAM Used\",\n          \"refId\": \"B\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"expr\":\n              \"node_memory_Cached_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} + node_memory_Buffers_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} + node_memory_SReclaimable_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"RAM Cache + Buffer\",\n          \"refId\": \"C\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"expr\": \"node_memory_MemFree_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"RAM Free\",\n          \"refId\": \"D\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"expr\":\n              \"(node_memory_SwapTotal_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} - node_memory_SwapFree_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"})\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"SWAP Used\",\n          \"refId\": \"E\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Memory Basic\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Basic network info per interface\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 40,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"bps\"\n        },\n        \"overrides\": [\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Recv_bytes_eth2\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#7EB26D\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Recv_bytes_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#0A50A1\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Recv_drop_eth2\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#6ED0E0\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Recv_drop_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#E0F9D7\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Recv_errs_eth2\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#BF1B00\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Recv_errs_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#CCA300\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Trans_bytes_eth2\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#7EB26D\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Trans_bytes_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#0A50A1\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Trans_drop_eth2\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#6ED0E0\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Trans_drop_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#E0F9D7\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Trans_errs_eth2\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#BF1B00\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"Trans_errs_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#CCA300\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"recv_bytes_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#0A50A1\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"recv_drop_eth0\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#99440A\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"recv_drop_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#967302\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"recv_errs_eth0\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#BF1B00\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"recv_errs_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#890F02\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"trans_bytes_eth0\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#7EB26D\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"trans_bytes_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#0A50A1\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"trans_drop_eth0\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#99440A\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"trans_drop_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#967302\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"trans_errs_eth0\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#BF1B00\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"trans_errs_lo\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#890F02\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byRegexp\",\n              \"options\": \"/.*trans.*/\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"custom.transform\",\n                \"value\": \"negative-Y\"\n              }\n            ]\n          }\n        ]\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 13\n      },\n      \"id\": 74,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"9.2.0\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"expr\":\n              \"irate(node_network_receive_bytes_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])*8\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"recv {{device}}\",\n          \"refId\": \"A\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"expr\":\n              \"irate(node_network_transmit_bytes_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])*8\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"trans {{device}} \",\n          \"refId\": \"B\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Network Traffic Basic\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"${datasource}\"\n      },\n      \"description\": \"Disk space used of all filesystems mounted\",\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 40,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"max\": 100,\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"percent\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 13\n      },\n      \"id\": 152,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"9.2.0\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"expr\":\n              \"100 - ((node_filesystem_avail_bytes{instance=\\\"$node\\\",job=\\\"$job\\\",device!~'rootfs'} * 100) / node_filesystem_size_bytes{instance=\\\"$node\\\",job=\\\"$job\\\",device!~'rootfs'})\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"{{mountpoint}}\",\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Disk Space Used Basic\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 20\n      },\n      \"id\": 265,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"percentage\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 70,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"smooth\",\n                \"lineWidth\": 2,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"percent\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"percentunit\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Idle - Waiting for something to happen\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Iowait - Waiting for I/O to complete\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Irq - Servicing interrupts\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Nice - Niced processes executing in user mode\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Softirq - Servicing softirqs\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"Steal - Time spent in other operating systems when running in a virtualized environment\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FCE2DE\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"System - Processes executing in kernel mode\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"User - Normal processes executing in user mode\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#5195CE\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 12,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 21\n          },\n          \"id\": 3,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 250\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"desc\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\":\n              [\n                {\n                  \"datasource\": {\n                    \"type\": \"prometheus\",\n                    \"uid\": \"${datasource}\"\n                  },\n                  \"editorMode\": \"code\",\n                  \"expr\": \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"system\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n                  \"format\": \"time_series\",\n                  \"interval\": \"\",\n                  \"intervalFactor\": 1,\n                  \"legendFormat\": \"System - Processes executing in kernel mode\",\n                  \"range\": true,\n                  \"refId\": \"A\",\n                  \"step\": 240\n                },\n                {\n                  \"datasource\":\n                      {\n                        \"type\": \"prometheus\",\n                        \"uid\": \"${datasource}\"\n                      },\n                  \"editorMode\": \"code\",\n                  \"expr\": \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"user\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n                  \"format\": \"time_series\",\n                  \"intervalFactor\": 1,\n                  \"legendFormat\": \"User - Normal processes executing in user mode\",\n                  \"range\": true,\n                  \"refId\": \"B\",\n                  \"step\": 240\n                },\n                {\n                  \"datasource\":\n                      {\n                        \"type\": \"prometheus\",\n                        \"uid\": \"${datasource}\"\n                      },\n                  \"editorMode\": \"code\",\n                  \"expr\": \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"nice\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n                  \"format\": \"time_series\",\n                  \"intervalFactor\": 1,\n                  \"legendFormat\": \"Nice - Niced processes executing in user mode\",\n                  \"range\": true,\n                  \"refId\": \"C\",\n                  \"step\": 240\n                },\n                {\n                  \"datasource\":\n                      {\n                        \"type\": \"prometheus\",\n                        \"uid\": \"${datasource}\"\n                      },\n                  \"editorMode\": \"code\",\n                  \"expr\":\n                      \"sum by(instance) (irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"iowait\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n                  \"format\": \"time_series\",\n                  \"intervalFactor\": 1,\n                  \"legendFormat\": \"Iowait - Waiting for I/O to complete\",\n                  \"range\": true,\n                  \"refId\": \"E\",\n                  \"step\": 240\n                },\n                {\n                  \"datasource\": {\n                    \"type\": \"prometheus\",\n                    \"uid\": \"${datasource}\"\n                  },\n                  \"editorMode\": \"code\",\n                  \"expr\":\n                      \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"irq\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n                  \"format\": \"time_series\",\n                  \"intervalFactor\": 1,\n                  \"legendFormat\": \"Irq - Servicing interrupts\",\n                  \"range\": true,\n                  \"refId\": \"F\",\n                  \"step\": 240\n                },\n                {\n                  \"datasource\": {\n                    \"type\": \"prometheus\",\n                    \"uid\": \"${datasource}\"\n                  },\n                  \"editorMode\": \"code\",\n                  \"expr\":\n                      \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"softirq\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n                  \"format\": \"time_series\",\n                  \"intervalFactor\": 1,\n                  \"legendFormat\": \"Softirq - Servicing softirqs\",\n                  \"range\": true,\n                  \"refId\": \"G\",\n                  \"step\": 240\n                },\n                {\n                  \"datasource\": {\n                    \"type\": \"prometheus\",\n                    \"uid\": \"${datasource}\"\n                  },\n                  \"editorMode\": \"code\",\n                  \"expr\":\n                      \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"steal\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n                  \"format\": \"time_series\",\n                  \"intervalFactor\": 1,\n                  \"legendFormat\":\n                      \"Steal - Time spent in other operating systems when running in a virtualized environment\",\n                  \"range\": true,\n                  \"refId\": \"H\",\n                  \"step\": 240\n                },\n                {\n                  \"datasource\": {\n                    \"type\": \"prometheus\",\n                    \"uid\": \"${datasource}\"\n                  },\n                  \"editorMode\": \"code\",\n                  \"expr\":\n                      \"sum(irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"idle\\\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}) by (cpu)))\",\n                  \"format\": \"time_series\",\n                  \"hide\": false,\n                  \"intervalFactor\": 1,\n                  \"legendFormat\": \"Idle - Waiting for something to happen\",\n                  \"range\": true,\n                  \"refId\": \"J\",\n                  \"step\": 240\n                }\n              ],\n          \"title\": \"CPU\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 40,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"normal\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Apps\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#629E51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A437C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#CFFAFF\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"RAM_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap - Swap memory usage\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#2F575E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Unused\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Unused - Free memory unassigned\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Hardware Corrupted - *./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.stacking\",\n                    \"value\": {\n                      \"group\": false,\n                      \"mode\": \"normal\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 12,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 21\n          },\n          \"id\": 24,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 350\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_memory_MemTotal_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} - node_memory_MemFree_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} - node_memory_Buffers_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} - node_memory_Cached_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} - node_memory_Slab_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} - node_memory_PageTables_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} - node_memory_SwapCached_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Apps - Memory used by user-space applications\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_PageTables_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"PageTables - Memory used to map between virtual and physical memory addresses\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_SwapCached_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified\",\n              \"refId\": \"C\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Slab_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)\",\n              \"refId\": \"D\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Cached_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Cache - Parked file data (file content) cache\",\n              \"refId\": \"E\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Buffers_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Buffers - Block device (e.g. harddisk) cache\",\n              \"refId\": \"F\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_MemFree_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Unused - Free memory unassigned\",\n              \"refId\": \"G\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"(node_memory_SwapTotal_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"} - node_memory_SwapFree_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"})\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Swap - Swap space used\",\n              \"refId\": \"H\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_HardwareCorrupted_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working\",\n              \"refId\": \"I\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Stack\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bits out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 40,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bps\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"receive_packets_eth0\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"receive_packets_lo\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"transmit_packets_eth0\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"transmit_packets_lo\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Trans.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 12,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 33\n          },\n          \"id\": 84,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_receive_bytes_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])*8\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Receive\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_transmit_bytes_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])*8\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Transmit\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Network Traffic\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 40,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 12,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 33\n          },\n          \"id\": 156,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_filesystem_size_bytes{instance=\\\"$node\\\",job=\\\"$job\\\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\\\"$node\\\",job=\\\"$job\\\",device!~'rootfs'}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{mountpoint}}\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Disk Space Used\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"IO read (-) / write (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"iops\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Read.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda2_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BA43A9\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda3_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F4D598\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#962D82\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#9AC48A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#65C5DB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9934E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FCEACA\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9E2D2\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 12,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 45\n          },\n          \"id\": 229,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_reads_completed_total{instance=\\\"$node\\\",job=\\\"$job\\\",device=~\\\"$diskdevices\\\"}[$__rate_interval])\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"{{device}} - Reads completed\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_writes_completed_total{instance=\\\"$node\\\",job=\\\"$job\\\",device=~\\\"$diskdevices\\\"}[$__rate_interval])\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Writes completed\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Disk IOps\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes read (-) / write (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 40,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"Bps\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"io time\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#890F02\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*read*./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byType\",\n                  \"options\": \"time\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.axisPlacement\",\n                    \"value\": \"hidden\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 12,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 45\n          },\n          \"id\": 42,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_read_bytes_total{instance=\\\"$node\\\",job=\\\"$job\\\",device=~\\\"$diskdevices\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Successfully read bytes\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_written_bytes_total{instance=\\\"$node\\\",job=\\\"$job\\\",device=~\\\"$diskdevices\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Successfully written bytes\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"I/O Usage Read / Write\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"%util\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 40,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"percentunit\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"io time\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#890F02\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byType\",\n                  \"options\": \"time\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.axisPlacement\",\n                    \"value\": \"hidden\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 12,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 57\n          },\n          \"id\": 127,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_io_time_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\",device=~\\\"$diskdevices\\\"} [$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}}\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"I/O Utilization\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"percentage\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"bars\",\n                \"fillOpacity\": 70,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"smooth\",\n                \"lineWidth\": 2,\n                \"pointSize\": 3,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"mappings\": [],\n              \"max\": 1,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"percentunit\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/^Guest - /\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#5195ce\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/^GuestNice - /\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#c15c17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 12,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 57\n          },\n          \"id\": 319,\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"desc\"\n            }\n          },\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"sum by(instance) (irate(node_cpu_guest_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"user\\\"}[1m])) / on(instance) group_left sum by (instance)((irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[1m])))\",\n              \"hide\": false,\n              \"legendFormat\":\n                  \"Guest - Time spent running a virtual CPU for a guest operating system\",\n              \"range\": true,\n              \"refId\": \"A\"\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"sum by(instance) (irate(node_cpu_guest_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\", mode=\\\"nice\\\"}[1m])) / on(instance) group_left sum by (instance)((irate(node_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[1m])))\",\n              \"hide\": false,\n              \"legendFormat\":\n                  \"GuestNice - Time spent running a niced guest  (virtual CPU for guest operating system)\",\n              \"range\": true,\n              \"refId\": \"B\"\n            }\n          ],\n          \"title\": \"CPU spent seconds in guests (VMs)\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"CPU / Memory / Net / Disk\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 21\n      },\n      \"id\": 266,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"normal\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Apps\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#629E51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A437C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#CFFAFF\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"RAM_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#2F575E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Unused\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 54\n          },\n          \"id\": 136,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 350\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Inactive_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"Inactive - Memory which has been less recently used.  It is more eligible to be reclaimed for other purposes\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Active_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"Active - Memory that has been used more recently and usually not reclaimed unless absolutely necessary\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Active / Inactive\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Apps\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#629E51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A437C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#CFFAFF\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"RAM_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#2F575E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Unused\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*CommitLimit - *./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 54\n          },\n          \"id\": 135,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 350\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Committed_AS_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Committed_AS - Amount of memory presently allocated on the system\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_CommitLimit_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"CommitLimit - Amount of  memory currently available to be allocated on the system\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Committed\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"normal\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Apps\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#629E51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A437C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#CFFAFF\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"RAM_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#2F575E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Unused\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 64\n          },\n          \"id\": 191,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 350\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Inactive_file_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Inactive_file - File-backed memory on inactive LRU list\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Inactive_anon_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"Inactive_anon - Anonymous and swap cache on inactive LRU list, including tmpfs (shmem)\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Active_file_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Active_file - File-backed memory on active LRU list\",\n              \"refId\": \"C\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Active_anon_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"Active_anon - Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs\",\n              \"refId\": \"D\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Active / Inactive Detail\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Active\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#99440A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#58140C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Dirty\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#B7DBAB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Mapped\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM + Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"VmallocUsed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 64\n          },\n          \"id\": 130,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Writeback_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Writeback - Memory which is actively being written back to disk\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_WritebackTmp_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"WritebackTmp - Memory used by FUSE for temporary writeback buffers\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Dirty_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Dirty - Memory which is waiting to get written back to the disk\",\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Writeback and Dirty\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Apps\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#629E51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A437C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#CFFAFF\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"RAM_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#2F575E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Unused\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated  with huge pages\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated  with huge pages\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 74\n          },\n          \"id\": 138,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 350\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Mapped_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"Mapped - Used memory in mapped pages files which have been mapped, such as libraries\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Shmem_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"Shmem - Used shared memory (shared between several processes, thus including RAM disks)\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_ShmemHugePages_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated  with huge pages\",\n              \"refId\": \"C\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_ShmemPmdMapped_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"ShmemPmdMapped - Amount of shared (shmem/tmpfs) memory backed by huge pages\",\n              \"refId\": \"D\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Shared and Mapped\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"normal\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Active\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#99440A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#58140C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Dirty\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#B7DBAB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Mapped\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM + Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"VmallocUsed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 74\n          },\n          \"id\": 131,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_SUnreclaim_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"SUnreclaim - Part of Slab, that cannot be reclaimed on memory pressure\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_SReclaimable_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"SReclaimable - Part of Slab, that might be reclaimed, such as caches\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Slab\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Active\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#99440A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#58140C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Dirty\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#B7DBAB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Mapped\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM + Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"VmallocUsed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 84\n          },\n          \"id\": 70,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_VmallocChunk_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"VmallocChunk - Largest contiguous block of vmalloc area which is free\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_VmallocTotal_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"VmallocTotal - Total size of vmalloc memory area\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_VmallocUsed_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"VmallocUsed - Amount of vmalloc area which is used\",\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Vmalloc\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Apps\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#629E51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A437C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#CFFAFF\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"RAM_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#2F575E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Unused\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 84\n          },\n          \"id\": 159,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 350\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Bounce_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Bounce - Memory used for block device bounce buffers\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Bounce\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Active\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#99440A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#58140C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Dirty\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#B7DBAB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Mapped\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM + Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"VmallocUsed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Inactive *./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 94\n          },\n          \"id\": 129,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_AnonHugePages_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"AnonHugePages - Memory in anonymous huge pages\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_AnonPages_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"AnonPages - Memory in user pages not backed by files\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Anonymous\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Apps\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#629E51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A437C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#CFFAFF\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"RAM_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#2F575E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Unused\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 94\n          },\n          \"id\": 160,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 350\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_KernelStack_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"KernelStack - Kernel memory stack. This is not reclaimable\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Percpu_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"PerCPU - Per CPU memory allocated dynamically by loadable modules\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Kernel / CPU\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"pages\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Active\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#99440A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#58140C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Dirty\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#B7DBAB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Mapped\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM + Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"VmallocUsed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 104\n          },\n          \"id\": 140,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_HugePages_Free{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"HugePages_Free - Huge pages in the pool that are not yet allocated\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_HugePages_Rsvd{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"HugePages_Rsvd - Huge pages for which a commitment to allocate from the pool has been made, but no allocation has yet been made\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_HugePages_Surp{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"HugePages_Surp - Huge pages in the pool above the value in /proc/sys/vm/nr_hugepages\",\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory HugePages Counter\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Active\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#99440A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#58140C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Dirty\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#B7DBAB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Mapped\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM + Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"VmallocUsed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 104\n          },\n          \"id\": 71,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_HugePages_Total{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"HugePages - Total size of the pool of huge pages\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Hugepagesize_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Hugepagesize - Huge Page size\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory HugePages Size\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Active\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#99440A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#58140C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Dirty\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#B7DBAB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Mapped\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM + Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"VmallocUsed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 114\n          },\n          \"id\": 128,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_DirectMap1G_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"DirectMap1G - Amount of pages mapped as this size\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_DirectMap2M_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"DirectMap2M - Amount of pages mapped as this size\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_DirectMap4k_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"DirectMap4K - Amount of pages mapped as this size\",\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory DirectMap\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Apps\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#629E51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A437C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#CFFAFF\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"RAM_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#2F575E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Unused\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 114\n          },\n          \"id\": 137,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 350\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Unevictable_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"Unevictable - Amount of unevictable memory that can't be swapped out for a variety of reasons\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_Mlocked_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"MLocked - Size of pages locked to memory using the mlock() system call\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Unevictable and MLocked\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Active\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#99440A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#58140C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Dirty\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#B7DBAB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Mapped\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM + Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"VmallocUsed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 124\n          },\n          \"id\": 132,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_memory_NFS_Unstable_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"NFS Unstable - Memory in NFS pages sent to the server, but not yet committed to the storage\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory NFS\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Memory Meminfo\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 22\n      },\n      \"id\": 267,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"pages out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*out/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 41\n          },\n          \"id\": 176,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_vmstat_pgpgin{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Pagesin - Page in operations\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_vmstat_pgpgout{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Pagesout - Page out operations\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Pages In / Out\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"pages out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*out/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 41\n          },\n          \"id\": 22,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_vmstat_pswpin{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Pswpin - Pages swapped in\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_vmstat_pswpout{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Pswpout - Pages swapped out\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Pages Swap In / Out\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"faults\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"normal\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Apps\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#629E51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A437C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\":\n                      \"Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#CFFAFF\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"RAM_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#806EB7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#2F575E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Unused\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Pgfault - Page major and minor fault operations\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  },\n                  {\n                    \"id\": \"custom.stacking\",\n                    \"value\": {\n                      \"group\": false,\n                      \"mode\": \"normal\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 51\n          },\n          \"id\": 175,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 350\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_vmstat_pgfault{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Pgfault - Page major and minor fault operations\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_vmstat_pgmajfault{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Pgmajfault - Major page fault operations\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_vmstat_pgfault{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])  - irate(node_vmstat_pgmajfault{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Pgminfault - Minor page fault operations\",\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Memory Page Faults\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Active\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#99440A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Buffers\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#58140C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6D1F62\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Cached\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Committed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#508642\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Dirty\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Free\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#B7DBAB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Mapped\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PageTables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Page_Tables\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Slab_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Swap_Cache\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C15C17\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#511749\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total RAM + Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#052B51\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Total Swap\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"VmallocUsed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 51\n          },\n          \"id\": 307,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_vmstat_oom_kill{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"oom killer invocations \",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"OOM Killer\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Memory Vmstat\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 23\n      },\n      \"id\": 293,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"seconds\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"s\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Variation*./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#890F02\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 24\n          },\n          \"id\": 260,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_timex_estimated_error_seconds{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Estimated error in seconds\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_timex_offset_seconds{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Time offset in between local system and reference clock\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_timex_maxerror_seconds{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Maximum error in seconds\",\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Time Synchronized Drift\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 24\n          },\n          \"id\": 291,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_timex_loop_time_constant{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Phase-locked loop time adjust\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Time PLL Adjust\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Variation*./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#890F02\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 34\n          },\n          \"id\": 168,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_timex_sync_status{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Is clock synchronized to a reliable server (1 = yes, 0 = no)\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_timex_frequency_adjustment_ratio{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Local clock frequency adjustment\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Time Synchronized Status\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"seconds\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"s\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 34\n          },\n          \"id\": 294,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_timex_tick_seconds{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Seconds between clock ticks\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_timex_tai_offset_seconds{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"International Atomic Time (TAI) offset\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Time Misc\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"System Timesync\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 24\n      },\n      \"id\": 312,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 73\n          },\n          \"id\": 62,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_procs_blocked{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Processes blocked waiting for I/O to complete\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_procs_running{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Processes in runnable state\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Processes Status\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"Enable with --collector.processes argument on node-exporter\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"normal\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 73\n          },\n          \"id\": 315,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_processes_state{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ state }}\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Processes State\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"forks / sec\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 83\n          },\n          \"id\": 148,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"irate(node_forks_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Processes forks second\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Processes  Forks\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"decbytes\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Max.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 83\n          },\n          \"id\": 149,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(process_virtual_memory_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Processes virtual memory size in bytes\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"process_resident_memory_max_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Maximum amount of virtual memory available in bytes\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(process_virtual_memory_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Processes virtual memory size in bytes\",\n              \"refId\": \"C\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(process_virtual_memory_max_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Maximum amount of virtual memory available in bytes\",\n              \"refId\": \"D\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Processes Memory\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"Enable with --collector.processes argument on node-exporter\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"PIDs limit\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F2495C\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 93\n          },\n          \"id\": 313,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_processes_pids{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Number of PIDs\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_processes_max_processes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"PIDs limit\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"PIDs Number and Limit\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"seconds\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"s\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*waiting.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 93\n          },\n          \"id\": 305,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_schedstat_running_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"CPU {{ cpu }} - seconds spent running a process\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_schedstat_waiting_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"CPU {{ cpu }} - seconds spent by processing waiting for this CPU\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Process schedule stats Running / Waiting\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"Enable with --collector.processes argument on node-exporter\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Threads limit\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F2495C\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 103\n          },\n          \"id\": 314,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_processes_threads{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Allocated threads\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_processes_max_threads{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Threads limit\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Threads Number and Limit\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"System Processes\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 25\n      },\n      \"id\": 269,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 26\n          },\n          \"id\": 8,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_context_switches_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Context switches\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"irate(node_intr_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Interrupts\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Context Switches / Interrupts\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 26\n          },\n          \"id\": 7,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_load1{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"Load 1m\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_load5{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"Load 5m\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_load15{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"Load 15m\",\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"System Load\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 0,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"hertz\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Max\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.lineStyle\",\n                    \"value\": {\n                      \"dash\": [\n                        10,\n                        10\n                      ],\n                      \"fill\": \"dash\"\n                    }\n                  },\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"blue\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 10\n                  },\n                  {\n                    \"id\": \"custom.hideFrom\",\n                    \"value\": {\n                      \"legend\": true,\n                      \"tooltip\": false,\n                      \"viz\": false\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillBelowTo\",\n                    \"value\": \"Min\"\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Min\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.lineStyle\",\n                    \"value\": {\n                      \"dash\": [\n                        10,\n                        10\n                      ],\n                      \"fill\": \"dash\"\n                    }\n                  },\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"blue\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.hideFrom\",\n                    \"value\": {\n                      \"legend\": true,\n                      \"tooltip\": false,\n                      \"viz\": false\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 36\n          },\n          \"id\": 321,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"desc\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\": \"node_cpu_scaling_frequency_hertz{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"CPU {{ cpu }}\",\n              \"range\": true,\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\": \"avg(node_cpu_scaling_frequency_max_hertz{instance=\\\"$node\\\",job=\\\"$job\\\"})\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Max\",\n              \"range\": true,\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\": \"avg(node_cpu_scaling_frequency_min_hertz{instance=\\\"$node\\\",job=\\\"$job\\\"})\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Min\",\n              \"range\": true,\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"CPU Frequency Scaling\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"https://docs.kernel.org/accounting/psi.html\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 10,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"percentunit\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Memory some\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"dark-red\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Memory full\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"light-red\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"I/O some\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"dark-blue\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"I/O full\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"light-blue\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 36\n          },\n          \"id\": 322,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"rate(node_pressure_cpu_waiting_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"CPU some\",\n              \"range\": true,\n              \"refId\": \"CPU some\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"rate(node_pressure_memory_waiting_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Memory some\",\n              \"range\": true,\n              \"refId\": \"Memory some\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"rate(node_pressure_memory_stalled_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Memory full\",\n              \"range\": true,\n              \"refId\": \"Memory full\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"rate(node_pressure_io_waiting_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"I/O some\",\n              \"range\": true,\n              \"refId\": \"I/O some\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"rate(node_pressure_io_stalled_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"I/O full\",\n              \"range\": true,\n              \"refId\": \"I/O full\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Pressure Stall Information\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"Enable with --collector.interrupts argument on node-exporter\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Critical*./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Max*./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 46\n          },\n          \"id\": 259,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_interrupts_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ type }} - {{ info }}\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Interrupts Detail\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 46\n          },\n          \"id\": 306,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_schedstat_timeslices_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"CPU {{ cpu }}\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Schedule timeslices executed by each cpu\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 56\n          },\n          \"id\": 151,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_entropy_available_bits{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Entropy available to random number generators\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Entropy\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"seconds\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"s\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 56\n          },\n          \"id\": 308,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(process_cpu_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Time spent\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"CPU time spent in user and system contexts\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Max*./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#890F02\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 66\n          },\n          \"id\": 64,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"process_max_fds{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Maximum open file descriptors\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"process_open_fds{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Open file descriptors\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"File Descriptors\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"System Misc\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 26\n      },\n      \"id\": 304,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"temperature\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"celsius\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Critical*./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Max*./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 59\n          },\n          \"id\": 158,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_hwmon_temp_celsius{instance=\\\"$node\\\",job=\\\"$job\\\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ chip_name }} {{ sensor }} temp\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_hwmon_temp_crit_alarm_celsius{instance=\\\"$node\\\",job=\\\"$job\\\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": true,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ chip_name }} {{ sensor }} Critical Alarm\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_hwmon_temp_crit_celsius{instance=\\\"$node\\\",job=\\\"$job\\\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ chip_name }} {{ sensor }} Critical\",\n              \"refId\": \"C\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_hwmon_temp_crit_hyst_celsius{instance=\\\"$node\\\",job=\\\"$job\\\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": true,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ chip_name }} {{ sensor }} Critical Historical\",\n              \"refId\": \"D\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_hwmon_temp_max_celsius{instance=\\\"$node\\\",job=\\\"$job\\\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": true,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ chip_name }} {{ sensor }} Max\",\n              \"refId\": \"E\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Hardware temperature monitor\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Max*./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 59\n          },\n          \"id\": 300,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_cooling_device_cur_state{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Current {{ name }} in {{ type }}\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_cooling_device_max_state{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Max {{ name }} in {{ type }}\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Throttle cooling device\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 69\n          },\n          \"id\": 302,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_power_supply_online{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ power_supply }} online\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Power supply\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Hardware Misc\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 27\n      },\n      \"id\": 296,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 46\n          },\n          \"id\": 297,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_systemd_socket_accepted_connections_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ name }} Connections\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Systemd Sockets\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"normal\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Failed\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F2495C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Inactive\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FF9830\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Active\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#73BF69\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Deactivating\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FFCB7D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"Activating\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#C8F2C2\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 46\n          },\n          \"id\": 298,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_systemd_units{instance=\\\"$node\\\",job=\\\"$job\\\",state=\\\"activating\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Activating\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_systemd_units{instance=\\\"$node\\\",job=\\\"$job\\\",state=\\\"active\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Active\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_systemd_units{instance=\\\"$node\\\",job=\\\"$job\\\",state=\\\"deactivating\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Deactivating\",\n              \"refId\": \"C\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_systemd_units{instance=\\\"$node\\\",job=\\\"$job\\\",state=\\\"failed\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Failed\",\n              \"refId\": \"D\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_systemd_units{instance=\\\"$node\\\",job=\\\"$job\\\",state=\\\"inactive\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Inactive\",\n              \"refId\": \"E\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Systemd Units State\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Systemd\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 28\n      },\n      \"id\": 270,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\":\n              \"The number (after merges) of I/O requests completed per second for the device\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"IO read (-) / write (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"iops\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Read.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda2_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BA43A9\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda3_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F4D598\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#962D82\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#9AC48A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#65C5DB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9934E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FCEACA\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9E2D2\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 47\n          },\n          \"id\": 9,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_reads_completed_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"{{device}} - Reads completed\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_writes_completed_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Writes completed\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Disk IOps Completed\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"The number of bytes read from or written to the device per second\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes read (-) / write (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"Bps\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Read.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda2_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BA43A9\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda3_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F4D598\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#962D82\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#9AC48A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#65C5DB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9934E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FCEACA\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9E2D2\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 47\n          },\n          \"id\": 33,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_read_bytes_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"{{device}} - Read bytes\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_written_bytes_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Written bytes\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Disk R/W Data\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\":\n              \"The average time for requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"time. read (-) / write (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 30,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"s\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Read.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda2_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BA43A9\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda3_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F4D598\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#962D82\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#9AC48A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#65C5DB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9934E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FCEACA\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9E2D2\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 57\n          },\n          \"id\": 37,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_read_time_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval]) / irate(node_disk_reads_completed_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"{{device}} - Read wait time avg\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_write_time_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval]) / irate(node_disk_writes_completed_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Write wait time avg\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Disk Average Wait Time\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"The average queue length of the requests that were issued to the device\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"aqu-sz\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"none\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda2_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BA43A9\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda3_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F4D598\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#962D82\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#9AC48A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#65C5DB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9934E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FCEACA\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9E2D2\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 57\n          },\n          \"id\": 35,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_io_time_weighted_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"interval\": \"\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"{{device}}\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Average Queue Size\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\":\n              \"The number of read and write requests merged per second that were queued to the device\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"I/Os\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"iops\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Read.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda2_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BA43A9\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda3_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F4D598\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#962D82\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#9AC48A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#65C5DB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9934E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FCEACA\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9E2D2\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 67\n          },\n          \"id\": 133,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_reads_merged_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Read merged\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_writes_merged_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Write merged\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Disk R/W Merged\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\":\n              \"Percentage of elapsed time during which I/O requests were issued to the device (bandwidth utilization for the device). Device saturation occurs when this value is close to 100% for devices serving requests serially.  But for devices  serving requests in parallel, such as RAID arrays and modern SSDs, this number does not reflect their performance limits.\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"%util\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 30,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"percentunit\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda2_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BA43A9\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda3_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F4D598\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#962D82\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#9AC48A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#65C5DB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9934E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FCEACA\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9E2D2\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 67\n          },\n          \"id\": 36,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_io_time_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"interval\": \"\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"{{device}} - IO\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_discard_time_seconds_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"interval\": \"\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"{{device}} - discard\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Time Spent Doing I/Os\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\":\n              \"The number of outstanding requests at the instant the sample was taken. Incremented as requests are given to appropriate struct request_queue and decremented as they finish.\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"Outstanding req.\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"none\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda2_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BA43A9\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda3_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F4D598\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#962D82\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#9AC48A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#65C5DB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9934E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FCEACA\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9E2D2\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 77\n          },\n          \"id\": 34,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_disk_io_now{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"interval\": \"\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"{{device}} - IO now\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Instantaneous Queue Size\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"IOs\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"iops\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EAB839\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#6ED0E0\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EF843C\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#584477\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda2_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BA43A9\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sda3_.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F4D598\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#0A50A1\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#BF1B00\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdb3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0752D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#962D82\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#614D93\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdc3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#9AC48A\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#65C5DB\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9934E\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#EA6460\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde1.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E0F9D7\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sdd2.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#FCEACA\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*sde3.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F9E2D2\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 77\n          },\n          \"id\": 301,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_discards_completed_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"interval\": \"\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"{{device}} - Discards completed\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_disk_discards_merged_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Discards merged\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Disk IOps Discards completed / merged\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Storage Disk\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 29\n      },\n      \"id\": 271,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 62\n          },\n          \"id\": 43,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_filesystem_avail_bytes{instance=\\\"$node\\\",job=\\\"$job\\\",device!~'rootfs'}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{mountpoint}} - Available\",\n              \"metric\": \"\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_filesystem_free_bytes{instance=\\\"$node\\\",job=\\\"$job\\\",device!~'rootfs'}\",\n              \"format\": \"time_series\",\n              \"hide\": true,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{mountpoint}} - Free\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_filesystem_size_bytes{instance=\\\"$node\\\",job=\\\"$job\\\",device!~'rootfs'}\",\n              \"format\": \"time_series\",\n              \"hide\": true,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{mountpoint}} - Size\",\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Filesystem space available\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"file nodes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 62\n          },\n          \"id\": 41,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_filesystem_files_free{instance=\\\"$node\\\",job=\\\"$job\\\",device!~'rootfs'}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{mountpoint}} - Free file nodes\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"File Nodes Free\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"files\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 72\n          },\n          \"id\": 28,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"single\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_filefd_maximum{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 4,\n              \"legendFormat\": \"Max open files\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_filefd_allocated{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Open files\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"File Descriptor\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"file Nodes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 72\n          },\n          \"id\": 219,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_filesystem_files{instance=\\\"$node\\\",job=\\\"$job\\\",device!~'rootfs'}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{mountpoint}} - File nodes total\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"File Nodes Size\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"normal\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"max\": 1,\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"/ ReadOnly\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#890F02\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 82\n          },\n          \"id\": 44,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_filesystem_readonly{instance=\\\"$node\\\",job=\\\"$job\\\",device!~'rootfs'}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{mountpoint}} - ReadOnly\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"node_filesystem_device_error{instance=\\\"$node\\\",job=\\\"$job\\\",device!~'rootfs',fstype!~'tmpfs'}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{mountpoint}} - Device error\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Filesystem in ReadOnly / Error\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Storage Filesystem\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 30\n      },\n      \"id\": 272,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"packets out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"pps\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"receive_packets_eth0\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"receive_packets_lo\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"transmit_packets_eth0\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#7EB26D\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"transmit_packets_lo\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#E24D42\",\n                      \"mode\": \"fixed\"\n                    }\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Trans.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 47\n          },\n          \"id\": 60,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_receive_packets_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Receive\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_transmit_packets_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Transmit\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Network Traffic by Packets\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"packets out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"pps\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Trans.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 47\n          },\n          \"id\": 142,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_receive_errs_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Receive errors\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_transmit_errs_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Transmit errors\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Network Traffic Errors\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"packets out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"pps\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Trans.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 57\n          },\n          \"id\": 143,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_receive_drop_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Receive drop\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_transmit_drop_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Transmit drop\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Network Traffic Drop\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"packets out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"pps\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Trans.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 57\n          },\n          \"id\": 141,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_receive_compressed_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Receive compressed\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_transmit_compressed_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Transmit compressed\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Network Traffic Compressed\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"packets out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"pps\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Trans.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 67\n          },\n          \"id\": 146,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_receive_multicast_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Receive multicast\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Network Traffic Multicast\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"packets out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"pps\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Trans.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 67\n          },\n          \"id\": 144,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_receive_fifo_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Receive fifo\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_transmit_fifo_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Transmit fifo\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Network Traffic Fifo\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"packets out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"pps\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Trans.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 77\n          },\n          \"id\": 145,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_receive_frame_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Receive frame\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Network Traffic Frame\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 77\n          },\n          \"id\": 231,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_transmit_carrier_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Statistic transmit_carrier\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Network Traffic Carrier\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Trans.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 87\n          },\n          \"id\": 232,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_network_transmit_colls_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{device}} - Transmit colls\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Network Traffic Colls\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"entries\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byName\",\n                  \"options\": \"NF conntrack limit\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#890F02\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 87\n          },\n          \"id\": 61,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_nf_conntrack_entries{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"NF conntrack entries\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_nf_conntrack_entries_limit{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"NF conntrack limit\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"NF Conntrack\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"Entries\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 97\n          },\n          \"id\": 230,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_arp_entries{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ device }} - ARP entries\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"ARP Entries\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"decimals\": 0,\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 97\n          },\n          \"id\": 288,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_network_mtu_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ device }} - Bytes\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"MTU\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"decimals\": 0,\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 107\n          },\n          \"id\": 280,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_network_speed_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ device }} - Speed\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Speed\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"packets\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"decimals\": 0,\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"none\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 107\n          },\n          \"id\": 289,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_network_transmit_queue_length{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{ device }} -   Interface transmit queue length\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Queue Length\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"packetes drop (-) / process (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Dropped.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 117\n          },\n          \"id\": 290,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_softnet_processed_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"CPU {{cpu}} - Processed\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_softnet_dropped_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"CPU {{cpu}} - Dropped\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Softnet Packets\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 117\n          },\n          \"id\": 310,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_softnet_times_squeezed_total{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"CPU {{cpu}} - Squeezed\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Softnet Out of Quota\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 127\n          },\n          \"id\": 309,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_network_up{operstate=\\\"up\\\",instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{interface}} - Operational state UP\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_network_carrier{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"instant\": false,\n              \"legendFormat\": \"{{device}} - Physical link state\",\n              \"refId\": \"B\"\n            }\n          ],\n          \"title\": \"Network Operational Status\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Network Traffic\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 31\n      },\n      \"id\": 273,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 48\n          },\n          \"id\": 63,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_TCP_alloc{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"TCP_alloc - Allocated sockets\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_TCP_inuse{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"TCP_inuse - Tcp sockets currently in use\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_TCP_mem{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": true,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"TCP_mem - Used memory for tcp\",\n              \"refId\": \"C\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_TCP_orphan{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"TCP_orphan - Orphan sockets\",\n              \"refId\": \"D\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_TCP_tw{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"TCP_tw - Sockets waiting close\",\n              \"refId\": \"E\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Sockstat TCP\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 48\n          },\n          \"id\": 124,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_UDPLITE_inuse{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"UDPLITE_inuse - Udplite sockets currently in use\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_UDP_inuse{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"UDP_inuse - Udp sockets currently in use\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_UDP_mem{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"UDP_mem - Used memory for udp\",\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Sockstat UDP\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 58\n          },\n          \"id\": 125,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_FRAG_inuse{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"FRAG_inuse - Frag sockets currently in use\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_RAW_inuse{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"RAW_inuse - Raw sockets currently in use\",\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Sockstat FRAG / RAW\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"bytes\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"bytes\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 58\n          },\n          \"id\": 220,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_TCP_mem_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"mem_bytes - TCP sockets in that state\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_UDP_mem_bytes{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"mem_bytes - UDP sockets in that state\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_FRAG_memory{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"FRAG_memory - Used memory for frag\",\n              \"refId\": \"C\"\n            }\n          ],\n          \"title\": \"Sockstat Memory Size\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"sockets\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 68\n          },\n          \"id\": 126,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_sockstat_sockets_used{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Sockets_used - Sockets currently in use\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Sockstat Used\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Network Sockstat\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 32\n      },\n      \"id\": 274,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"octets out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Out.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 33\n          },\n          \"id\": 221,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_IpExt_InOctets{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"InOctets - Received octets\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_IpExt_OutOctets{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"OutOctets - Sent octets\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Netstat IP In / Out Octets\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"datagrams\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 33\n          },\n          \"id\": 81,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true,\n              \"width\": 300\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Ip_Forwarding{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"Forwarding - IP forwarding\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Netstat IP Forwarding\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"messages out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Out.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 43\n          },\n          \"id\": 115,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Icmp_InMsgs{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"InMsgs -  Messages which the entity received. Note that this counter includes all those counted by icmpInErrors\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Icmp_OutMsgs{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"OutMsgs - Messages which this entity attempted to send. Note that this counter includes all those counted by icmpOutErrors\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"ICMP In / Out\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"messages out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Out.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 43\n          },\n          \"id\": 50,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Icmp_InErrors{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"InErrors - Messages which the entity received but determined as having ICMP-specific errors (bad ICMP checksums, bad length, etc.)\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"ICMP Errors\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"datagrams out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Out.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Snd.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 53\n          },\n          \"id\": 55,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Udp_InDatagrams{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"InDatagrams - Datagrams received\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Udp_OutDatagrams{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"OutDatagrams - Datagrams sent\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"UDP In / Out\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"datagrams\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 53\n          },\n          \"id\": 109,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Udp_InErrors{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"InErrors - UDP Datagrams that could not be delivered to an application\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Udp_NoPorts{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"NoPorts - UDP Datagrams received on a port with no listener\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_UdpLite_InErrors{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"interval\": \"\",\n              \"legendFormat\":\n                  \"InErrors Lite - UDPLite Datagrams that could not be delivered to an application\",\n              \"refId\": \"C\"\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Udp_RcvbufErrors{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"RcvbufErrors - UDP buffer errors received\",\n              \"refId\": \"D\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Udp_SndbufErrors{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"SndbufErrors - UDP buffer errors send\",\n              \"refId\": \"E\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"UDP Errors\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"datagrams out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Out.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              },\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Snd.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 63\n          },\n          \"id\": 299,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Tcp_InSegs{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"instant\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"InSegs - Segments received, including those received in error. This count includes segments received on currently established connections\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Tcp_OutSegs{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"OutSegs - Segments sent, including those on current connections but excluding those containing only retransmitted octets\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"TCP In / Out\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 63\n          },\n          \"id\": 104,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_TcpExt_ListenOverflows{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"ListenOverflows - Times the listen queue of a socket overflowed\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_TcpExt_ListenDrops{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"ListenDrops - SYNs to LISTEN sockets ignored\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_TcpExt_TCPSynRetrans{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"TCPSynRetrans - SYN-SYN/ACK retransmits to break down retransmissions in SYN, fast/timeout retransmits\",\n              \"refId\": \"C\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Tcp_RetransSegs{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"interval\": \"\",\n              \"legendFormat\":\n                  \"RetransSegs - Segments retransmitted - that is, the number of TCP segments transmitted containing one or more previously transmitted octets\",\n              \"refId\": \"D\"\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Tcp_InErrs{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"interval\": \"\",\n              \"legendFormat\": \"InErrs - Segments received in error (e.g., bad TCP checksums)\",\n              \"refId\": \"E\"\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Tcp_OutRsts{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"interval\": \"\",\n              \"legendFormat\": \"OutRsts - Segments sent with RST flag\",\n              \"refId\": \"F\"\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"irate(node_netstat_TcpExt_TCPRcvQDrop{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"legendFormat\":\n                  \"TCPRcvQDrop - Packets meant to be queued in rcv queue but dropped because socket rcvbuf limit hit\",\n              \"range\": true,\n              \"refId\": \"G\"\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"irate(node_netstat_TcpExt_TCPOFOQueue{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"legendFormat\":\n                  \"TCPOFOQueue - TCP layer receives an out of order packet and has enough memory to queue it\",\n              \"range\": true,\n              \"refId\": \"H\"\n            }\n          ],\n          \"title\": \"TCP Errors\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"connections\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*MaxConn *./\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#890F02\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.fillOpacity\",\n                    \"value\": 0\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 73\n          },\n          \"id\": 85,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_netstat_Tcp_CurrEstab{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"CurrEstab - TCP connections for which the current state is either ESTABLISHED or CLOSE- WAIT\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_netstat_Tcp_MaxConn{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"MaxConn - Limit on the total number of TCP connections the entity can support (Dynamic is \\\"-1\\\")\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"TCP Connections\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter out (-) / in (+)\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*Sent.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 73\n          },\n          \"id\": 91,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_TcpExt_SyncookiesFailed{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"SyncookiesFailed - Invalid SYN cookies received\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_TcpExt_SyncookiesRecv{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"SyncookiesRecv - SYN cookies received\",\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_TcpExt_SyncookiesSent{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"SyncookiesSent - SYN cookies sent\",\n              \"refId\": \"C\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"TCP SynCookie\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"connections\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 83\n          },\n          \"id\": 82,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Tcp_ActiveOpens{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"ActiveOpens - TCP connections that have made a direct transition to the SYN-SENT state from the CLOSED state\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\":\n                  \"irate(node_netstat_Tcp_PassiveOpens{instance=\\\"$node\\\",job=\\\"$job\\\"}[$__rate_interval])\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\":\n                  \"PassiveOpens - TCP connections that have made a direct transition to the SYN-RCVD state from the LISTEN state\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"TCP Direct Transition\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"Enable with --collector.tcpstat argument on node-exporter\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"connections\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"min\": 0,\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\",\n                    \"value\": null\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 83\n          },\n          \"id\": 320,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"node_tcp_connection_states{state=\\\"established\\\",instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"established - TCP sockets in established state\",\n              \"range\": true,\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"node_tcp_connection_states{state=\\\"fin_wait2\\\",instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"fin_wait2 - TCP sockets in fin_wait2 state\",\n              \"range\": true,\n              \"refId\": \"B\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"node_tcp_connection_states{state=\\\"listen\\\",instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"listen - TCP sockets in listen state\",\n              \"range\": true,\n              \"refId\": \"C\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"editorMode\": \"code\",\n              \"expr\":\n                  \"node_tcp_connection_states{state=\\\"time_wait\\\",instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"time_wait - TCP sockets in time_wait state\",\n              \"range\": true,\n              \"refId\": \"D\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"TCP Stat\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Network Netstat\",\n      \"type\": \"row\"\n    },\n    {\n      \"collapsed\": true,\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"000000001\"\n      },\n      \"gridPos\": {\n        \"h\": 1,\n        \"w\": 24,\n        \"x\": 0,\n        \"y\": 33\n      },\n      \"id\": 279,\n      \"panels\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"seconds\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"normal\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"s\"\n            },\n            \"overrides\": []\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 0,\n            \"y\": 66\n          },\n          \"id\": 40,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_scrape_collector_duration_seconds{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{collector}} - Scrape duration\",\n              \"refId\": \"A\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Node Exporter Scrape Time\",\n          \"type\": \"timeseries\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"${datasource}\"\n          },\n          \"description\": \"\",\n          \"fieldConfig\": {\n            \"defaults\": {\n              \"color\": {\n                \"mode\": \"palette-classic\"\n              },\n              \"custom\": {\n                \"axisCenteredZero\": false,\n                \"axisColorMode\": \"text\",\n                \"axisLabel\": \"counter\",\n                \"axisPlacement\": \"auto\",\n                \"barAlignment\": 0,\n                \"drawStyle\": \"line\",\n                \"fillOpacity\": 20,\n                \"gradientMode\": \"none\",\n                \"hideFrom\": {\n                  \"legend\": false,\n                  \"tooltip\": false,\n                  \"viz\": false\n                },\n                \"lineInterpolation\": \"linear\",\n                \"lineStyle\": {\n                  \"fill\": \"solid\"\n                },\n                \"lineWidth\": 1,\n                \"pointSize\": 5,\n                \"scaleDistribution\": {\n                  \"type\": \"linear\"\n                },\n                \"showPoints\": \"never\",\n                \"spanNulls\": false,\n                \"stacking\": {\n                  \"group\": \"A\",\n                  \"mode\": \"none\"\n                },\n                \"thresholdsStyle\": {\n                  \"mode\": \"off\"\n                }\n              },\n              \"links\": [],\n              \"mappings\": [],\n              \"thresholds\": {\n                \"mode\": \"absolute\",\n                \"steps\": [\n                  {\n                    \"color\": \"green\"\n                  },\n                  {\n                    \"color\": \"red\",\n                    \"value\": 80\n                  }\n                ]\n              },\n              \"unit\": \"short\"\n            },\n            \"overrides\": [\n              {\n                \"matcher\": {\n                  \"id\": \"byRegexp\",\n                  \"options\": \"/.*error.*/\"\n                },\n                \"properties\": [\n                  {\n                    \"id\": \"color\",\n                    \"value\": {\n                      \"fixedColor\": \"#F2495C\",\n                      \"mode\": \"fixed\"\n                    }\n                  },\n                  {\n                    \"id\": \"custom.transform\",\n                    \"value\": \"negative-Y\"\n                  }\n                ]\n              }\n            ]\n          },\n          \"gridPos\": {\n            \"h\": 10,\n            \"w\": 12,\n            \"x\": 12,\n            \"y\": 66\n          },\n          \"id\": 157,\n          \"links\": [],\n          \"options\": {\n            \"legend\": {\n              \"calcs\": [\n                \"mean\",\n                \"lastNotNull\",\n                \"max\",\n                \"min\"\n              ],\n              \"displayMode\": \"table\",\n              \"placement\": \"bottom\",\n              \"showLegend\": true\n            },\n            \"tooltip\": {\n              \"mode\": \"multi\",\n              \"sort\": \"none\"\n            }\n          },\n          \"pluginVersion\": \"9.2.0\",\n          \"targets\": [\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_scrape_collector_success{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{collector}} - Scrape success\",\n              \"refId\": \"A\",\n              \"step\": 240\n            },\n            {\n              \"datasource\": {\n                \"type\": \"prometheus\",\n                \"uid\": \"${datasource}\"\n              },\n              \"expr\": \"node_textfile_scrape_error{instance=\\\"$node\\\",job=\\\"$job\\\"}\",\n              \"format\": \"time_series\",\n              \"hide\": false,\n              \"interval\": \"\",\n              \"intervalFactor\": 1,\n              \"legendFormat\": \"{{collector}} - Scrape textfile error (1 = true)\",\n              \"refId\": \"B\",\n              \"step\": 240\n            }\n          ],\n          \"title\": \"Node Exporter Scrape\",\n          \"type\": \"timeseries\"\n        }\n      ],\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"000000001\"\n          },\n          \"refId\": \"A\"\n        }\n      ],\n      \"title\": \"Node Exporter\",\n      \"type\": \"row\"\n    }\n  ],\n  \"refresh\": \"1m\",\n  \"revision\": 1,\n  \"schemaVersion\": 38,\n  \"style\": \"dark\",\n  \"tags\": [\n    \"linux\"\n  ],\n  \"templating\": {\n    \"list\": [\n      {\n        \"current\": {\n          \"selected\": false,\n          \"text\": \"default\",\n          \"value\": \"default\"\n        },\n        \"hide\": 0,\n        \"includeAll\": false,\n        \"label\": \"Datasource\",\n        \"multi\": false,\n        \"name\": \"datasource\",\n        \"options\": [],\n        \"query\": \"prometheus\",\n        \"queryValue\": \"\",\n        \"refresh\": 1,\n        \"regex\": \"\",\n        \"skipUrlSync\": false,\n        \"type\": \"datasource\"\n      },\n      {\n        \"current\": {},\n        \"datasource\": {\n          \"type\": \"prometheus\",\n          \"uid\": \"${datasource}\"\n        },\n        \"definition\": \"\",\n        \"hide\": 0,\n        \"includeAll\": false,\n        \"label\": \"Job\",\n        \"multi\": false,\n        \"name\": \"job\",\n        \"options\": [],\n        \"query\": {\n          \"query\": \"label_values(node_uname_info, job)\",\n          \"refId\": \"Prometheus-job-Variable-Query\"\n        },\n        \"refresh\": 1,\n        \"regex\": \"\",\n        \"skipUrlSync\": false,\n        \"sort\": 1,\n        \"tagValuesQuery\": \"\",\n        \"tagsQuery\": \"\",\n        \"type\": \"query\",\n        \"useTags\": false\n      },\n      {\n        \"current\": {},\n        \"datasource\": {\n          \"type\": \"prometheus\",\n          \"uid\": \"${datasource}\"\n        },\n        \"definition\": \"label_values(node_uname_info{job=\\\"$job\\\"}, instance)\",\n        \"hide\": 0,\n        \"includeAll\": false,\n        \"label\": \"Host\",\n        \"multi\": false,\n        \"name\": \"node\",\n        \"options\": [],\n        \"query\": {\n          \"query\": \"label_values(node_uname_info{job=\\\"$job\\\"}, instance)\",\n          \"refId\": \"Prometheus-node-Variable-Query\"\n        },\n        \"refresh\": 1,\n        \"regex\": \"\",\n        \"skipUrlSync\": false,\n        \"sort\": 1,\n        \"tagValuesQuery\": \"\",\n        \"tagsQuery\": \"\",\n        \"type\": \"query\",\n        \"useTags\": false\n      },\n      {\n        \"current\": {\n          \"selected\": false,\n          \"text\": \"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\",\n          \"value\": \"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"\n        },\n        \"hide\": 2,\n        \"includeAll\": false,\n        \"multi\": false,\n        \"name\": \"diskdevices\",\n        \"options\": [\n          {\n            \"selected\": true,\n            \"text\": \"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\",\n            \"value\": \"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"\n          }\n        ],\n        \"query\": \"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\",\n        \"skipUrlSync\": false,\n        \"type\": \"custom\"\n      }\n    ]\n  },\n  \"time\": {\n    \"from\": \"now-24h\",\n    \"to\": \"now\"\n  },\n  \"timepicker\": {\n    \"refresh_intervals\": [\n      \"5s\",\n      \"10s\",\n      \"30s\",\n      \"1m\",\n      \"5m\",\n      \"15m\",\n      \"30m\",\n      \"1h\",\n      \"2h\",\n      \"1d\"\n    ],\n    \"time_options\": [\n      \"5m\",\n      \"15m\",\n      \"1h\",\n      \"6h\",\n      \"12h\",\n      \"24h\",\n      \"2d\",\n      \"7d\",\n      \"30d\"\n    ]\n  },\n  \"timezone\": \"browser\",\n  \"title\": \"Node Exporter Full\",\n  \"uid\": \"rYdddlPWk\",\n  \"version\": 92,\n  \"weekStart\": \"\"\n}\n"
  },
  {
    "path": "tools/local/monitoring/grafana/provisioning/dashboards/redis.json",
    "content": "{\n  \"annotations\": {\n    \"list\": [\n      {\n        \"builtIn\": 1,\n        \"datasource\": {\n          \"type\": \"datasource\",\n          \"uid\": \"grafana\"\n        },\n        \"enable\": true,\n        \"hide\": true,\n        \"iconColor\": \"rgba(0, 211, 255, 1)\",\n        \"name\": \"Annotations & Alerts\",\n        \"type\": \"dashboard\"\n      }\n    ]\n  },\n  \"description\": \"Redis Dashboard for Prometheus Redis Exporter 1.x\",\n  \"editable\": true,\n  \"fiscalYearStartMonth\": 0,\n  \"gnetId\": 763,\n  \"graphTooltip\": 1,\n  \"id\": 4,\n  \"links\": [],\n  \"liveNow\": false,\n  \"panels\": [\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"fixedColor\": \"rgb(31, 120, 193)\",\n            \"mode\": \"fixed\"\n          },\n          \"decimals\": 0,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"s\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 3,\n        \"x\": 0,\n        \"y\": 0\n      },\n      \"id\": 9,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"area\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showPercentChange\": false,\n        \"textMode\": \"auto\",\n        \"wideLayout\": true\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"max(max_over_time(redis_uptime_in_seconds{instance=~\\\"$instance\\\"}[$__interval]))\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"\",\n          \"metric\": \"\",\n          \"refId\": \"A\",\n          \"step\": 1800\n        }\n      ],\n      \"title\": \"Max Uptime\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"fixedColor\": \"rgb(31, 120, 193)\",\n            \"mode\": \"fixed\"\n          },\n          \"decimals\": 0,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"none\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 2,\n        \"x\": 3,\n        \"y\": 0\n      },\n      \"hideTimeOverride\": true,\n      \"id\": 12,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"colorMode\": \"none\",\n        \"graphMode\": \"area\",\n        \"justifyMode\": \"auto\",\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showPercentChange\": false,\n        \"textMode\": \"auto\",\n        \"wideLayout\": true\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"sum(redis_connected_clients{instance=~\\\"$instance\\\"})\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"\",\n          \"metric\": \"\",\n          \"refId\": \"A\",\n          \"step\": 2\n        }\n      ],\n      \"timeFrom\": \"1m\",\n      \"title\": \"Clients\",\n      \"type\": \"stat\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"thresholds\"\n          },\n          \"decimals\": 0,\n          \"mappings\": [\n            {\n              \"options\": {\n                \"match\": \"null\",\n                \"result\": {\n                  \"text\": \"N/A\"\n                }\n              },\n              \"type\": \"special\"\n            }\n          ],\n          \"max\": 100,\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"rgba(50, 172, 45, 0.97)\",\n                \"value\": null\n              },\n              {\n                \"color\": \"rgba(237, 129, 40, 0.89)\",\n                \"value\": 80\n              },\n              {\n                \"color\": \"rgba(245, 54, 54, 0.9)\",\n                \"value\": 95\n              }\n            ]\n          },\n          \"unit\": \"percent\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 3,\n        \"x\": 5,\n        \"y\": 0\n      },\n      \"hideTimeOverride\": true,\n      \"id\": 11,\n      \"links\": [],\n      \"maxDataPoints\": 100,\n      \"options\": {\n        \"minVizHeight\": 75,\n        \"minVizWidth\": 75,\n        \"orientation\": \"horizontal\",\n        \"reduceOptions\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"fields\": \"\",\n          \"values\": false\n        },\n        \"showThresholdLabels\": false,\n        \"showThresholdMarkers\": true,\n        \"sizing\": \"auto\"\n      },\n      \"pluginVersion\": \"10.1.10\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum(100 * (redis_memory_used_bytes{instance=~\\\"$instance\\\"}  / redis_memory_max_bytes{instance=~\\\"$instance\\\"}))\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"\",\n          \"metric\": \"\",\n          \"refId\": \"A\",\n          \"step\": 2\n        }\n      ],\n      \"timeFrom\": \"1m\",\n      \"title\": \"Memory Usage\",\n      \"type\": \"gauge\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 80,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": true,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"normal\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 8,\n        \"x\": 8,\n        \"y\": 0\n      },\n      \"id\": 18,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": false\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"desc\"\n        }\n      },\n      \"pluginVersion\": \"10.3.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum(rate(redis_commands_total{instance=~\\\"$instance\\\"} [$__rate_interval])) by (cmd)\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{ cmd }}\",\n          \"metric\": \"redis_command_calls_total\",\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Total Commands / sec\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": true,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 8,\n        \"x\": 16,\n        \"y\": 0\n      },\n      \"id\": 1,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": false\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.3.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"irate(redis_keyspace_hits_total{instance=~\\\"$instance\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"hits, {{ instance }}\",\n          \"metric\": \"\",\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"irate(redis_keyspace_misses_total{instance=~\\\"$instance\\\"}[$__rate_interval])\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"misses, {{ instance }}\",\n          \"metric\": \"\",\n          \"refId\": \"B\",\n          \"step\": 240,\n          \"target\": \"\"\n        }\n      ],\n      \"title\": \"Hits / Misses per Sec\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"min\": 0,\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"bytes\"\n        },\n        \"overrides\": [\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"max\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#BF1B00\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          }\n        ]\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 7\n      },\n      \"id\": 7,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.3.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"redis_memory_used_bytes{instance=~\\\"$instance\\\"}\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"used, {{ instance }}\",\n          \"metric\": \"\",\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"redis_memory_max_bytes{instance=~\\\"$instance\\\"}\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"max, {{ instance }}\",\n          \"refId\": \"B\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Total Memory Usage\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": true,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"bytes\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 7\n      },\n      \"id\": 10,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.3.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum(rate(redis_net_input_bytes_total{instance=~\\\"$instance\\\"}[$__rate_interval]))\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{ input }}\",\n          \"refId\": \"A\",\n          \"step\": 240\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum(rate(redis_net_output_bytes_total{instance=~\\\"$instance\\\"}[$__rate_interval]))\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{ output }}\",\n          \"refId\": \"B\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Network I/O\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 70,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": true,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"normal\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"none\"\n        },\n        \"overrides\": [\n          {\n            \"matcher\": {\n              \"id\": \"byValue\",\n              \"options\": {\n                \"op\": \"gte\",\n                \"reducer\": \"allIsZero\",\n                \"value\": 0\n              }\n            },\n            \"properties\": [\n              {\n                \"id\": \"custom.hideFrom\",\n                \"value\": {\n                  \"legend\": true,\n                  \"tooltip\": true,\n                  \"viz\": false\n                }\n              }\n            ]\n          }\n        ]\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 14\n      },\n      \"id\": 5,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [\n            \"lastNotNull\"\n          ],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.3.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"sum (redis_db_keys{instance=~\\\"$instance\\\"}) by (db, instance)\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{ db }}, {{ instance }}\",\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        }\n      ],\n      \"title\": \"Total Items per DB\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 70,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": true,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"normal\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 14\n      },\n      \"id\": 13,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.3.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum (redis_db_keys{instance=~\\\"$instance\\\"}) by (instance) - sum (redis_db_keys_expiring{instance=~\\\"$instance\\\"}) by (instance)\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"not expiring, {{ instance }}\",\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"sum (redis_db_keys_expiring{instance=~\\\"$instance\\\"}) by (instance)\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"expiring, {{ instance }}\",\n          \"metric\": \"\",\n          \"refId\": \"B\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Expiring vs Not-Expiring Keys\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 2,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": true,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": [\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"evicts\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#890F02\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\":\n                  \"memcached_items_evicted_total{instance=\\\"172.17.0.1:9150\\\",job=\\\"prometheus\\\"}\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#890F02\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          },\n          {\n            \"matcher\": {\n              \"id\": \"byName\",\n              \"options\": \"reclaims\"\n            },\n            \"properties\": [\n              {\n                \"id\": \"color\",\n                \"value\": {\n                  \"fixedColor\": \"#3F6833\",\n                  \"mode\": \"fixed\"\n                }\n              }\n            ]\n          }\n        ]\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 21\n      },\n      \"id\": 8,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.3.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum(rate(redis_expired_keys_total{instance=~\\\"$instance\\\"}[$__rate_interval])) by (instance)\",\n          \"format\": \"time_series\",\n          \"hide\": false,\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"expired, {{ instance }}\",\n          \"metric\": \"\",\n          \"refId\": \"A\",\n          \"step\": 240,\n          \"target\": \"\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum(rate(redis_evicted_keys_total{instance=~\\\"$instance\\\"}[$__rate_interval])) by (instance)\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"evicted, {{ instance }}\",\n          \"refId\": \"B\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Expired/Evicted Keys\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 10,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": false,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\",\n                \"value\": null\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"short\"\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 21\n      },\n      \"id\": 16,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"none\"\n        }\n      },\n      \"pluginVersion\": \"10.3.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"sum(redis_connected_clients{instance=~\\\"$instance\\\"})\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"connected\",\n          \"refId\": \"A\"\n        },\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\": \"sum(redis_blocked_clients{instance=~\\\"$instance\\\"})\",\n          \"format\": \"time_series\",\n          \"intervalFactor\": 1,\n          \"legendFormat\": \"blocked\",\n          \"refId\": \"B\"\n        }\n      ],\n      \"title\": \"Connected/Blocked Clients\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisBorderShow\": false,\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 0,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": true,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"none\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\"\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"s\",\n          \"unitScale\": true\n        },\n        \"overrides\": [\n          {\n            \"matcher\": {\n              \"id\": \"byValue\",\n              \"options\": {\n                \"op\": \"gte\",\n                \"reducer\": \"allIsZero\",\n                \"value\": 0\n              }\n            },\n            \"properties\": [\n              {\n                \"id\": \"custom.hideFrom\",\n                \"value\": {\n                  \"legend\": true,\n                  \"tooltip\": true,\n                  \"viz\": false\n                }\n              }\n            ]\n          }\n        ]\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 0,\n        \"y\": 28\n      },\n      \"id\": 20,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"desc\"\n        }\n      },\n      \"pluginVersion\": \"10.3.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum(irate(redis_commands_duration_seconds_total{instance =~ \\\"$instance\\\"}[$__rate_interval])) by (cmd)\\n  /\\nsum(irate(redis_commands_total{instance =~ \\\"$instance\\\"}[$__rate_interval])) by (cmd)\\n\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{ cmd }}\",\n          \"metric\": \"redis_command_calls_total\",\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Average Time Spent by Command / sec\",\n      \"type\": \"timeseries\"\n    },\n    {\n      \"datasource\": {\n        \"type\": \"prometheus\",\n        \"uid\": \"PBFA97CFB590B2093\"\n      },\n      \"fieldConfig\": {\n        \"defaults\": {\n          \"color\": {\n            \"mode\": \"palette-classic\"\n          },\n          \"custom\": {\n            \"axisBorderShow\": false,\n            \"axisCenteredZero\": false,\n            \"axisColorMode\": \"text\",\n            \"axisLabel\": \"\",\n            \"axisPlacement\": \"auto\",\n            \"barAlignment\": 0,\n            \"drawStyle\": \"line\",\n            \"fillOpacity\": 80,\n            \"gradientMode\": \"none\",\n            \"hideFrom\": {\n              \"legend\": false,\n              \"tooltip\": false,\n              \"viz\": false\n            },\n            \"insertNulls\": false,\n            \"lineInterpolation\": \"linear\",\n            \"lineWidth\": 1,\n            \"pointSize\": 5,\n            \"scaleDistribution\": {\n              \"type\": \"linear\"\n            },\n            \"showPoints\": \"never\",\n            \"spanNulls\": true,\n            \"stacking\": {\n              \"group\": \"A\",\n              \"mode\": \"normal\"\n            },\n            \"thresholdsStyle\": {\n              \"mode\": \"off\"\n            }\n          },\n          \"links\": [],\n          \"mappings\": [],\n          \"thresholds\": {\n            \"mode\": \"absolute\",\n            \"steps\": [\n              {\n                \"color\": \"green\"\n              },\n              {\n                \"color\": \"red\",\n                \"value\": 80\n              }\n            ]\n          },\n          \"unit\": \"s\",\n          \"unitScale\": true\n        },\n        \"overrides\": []\n      },\n      \"gridPos\": {\n        \"h\": 7,\n        \"w\": 12,\n        \"x\": 12,\n        \"y\": 28\n      },\n      \"id\": 14,\n      \"links\": [],\n      \"options\": {\n        \"legend\": {\n          \"calcs\": [],\n          \"displayMode\": \"list\",\n          \"placement\": \"bottom\",\n          \"showLegend\": true\n        },\n        \"tooltip\": {\n          \"mode\": \"multi\",\n          \"sort\": \"desc\"\n        }\n      },\n      \"pluginVersion\": \"10.3.3\",\n      \"targets\": [\n        {\n          \"datasource\": {\n            \"type\": \"prometheus\",\n            \"uid\": \"PBFA97CFB590B2093\"\n          },\n          \"expr\":\n              \"sum(irate(redis_commands_duration_seconds_total{instance=~\\\"$instance\\\"}[$__rate_interval])) by (cmd) != 0\",\n          \"format\": \"time_series\",\n          \"interval\": \"\",\n          \"intervalFactor\": 2,\n          \"legendFormat\": \"{{ cmd }}\",\n          \"metric\": \"redis_command_calls_total\",\n          \"refId\": \"A\",\n          \"step\": 240\n        }\n      ],\n      \"title\": \"Total Time Spent by Command / sec\",\n      \"type\": \"timeseries\"\n    }\n  ],\n  \"refresh\": \"\",\n  \"schemaVersion\": 38,\n  \"style\": \"dark\",\n  \"tags\": [\n    \"prometheus\",\n    \"redis\"\n  ],\n  \"templating\": {\n    \"list\": [\n      {\n        \"current\": {\n          \"isNone\": true,\n          \"selected\": false,\n          \"text\": \"None\",\n          \"value\": \"\"\n        },\n        \"datasource\": {\n          \"type\": \"prometheus\",\n          \"uid\": \"PBFA97CFB590B2093\"\n        },\n        \"definition\": \"label_values(redis_up, namespace)\",\n        \"hide\": 0,\n        \"includeAll\": false,\n        \"multi\": false,\n        \"name\": \"namespace\",\n        \"options\": [],\n        \"query\": \"label_values(redis_up, namespace)\",\n        \"refresh\": 2,\n        \"regex\": \"\",\n        \"skipUrlSync\": false,\n        \"sort\": 1,\n        \"tagValuesQuery\": \"\",\n        \"tagsQuery\": \"\",\n        \"type\": \"query\",\n        \"useTags\": false\n      },\n      {\n        \"current\": {\n          \"selected\": true,\n          \"text\": [\n            \"redis-exporter:9121\"\n          ],\n          \"value\": [\n            \"redis-exporter:9121\"\n          ]\n        },\n        \"datasource\": {\n          \"type\": \"prometheus\",\n          \"uid\": \"PBFA97CFB590B2093\"\n        },\n        \"definition\": \"label_values(redis_up{namespace=~\\\"$namespace\\\"}, instance)\",\n        \"hide\": 0,\n        \"includeAll\": false,\n        \"multi\": true,\n        \"name\": \"instance\",\n        \"options\": [],\n        \"query\": \"label_values(redis_up{namespace=~\\\"$namespace\\\"}, instance)\",\n        \"refresh\": 2,\n        \"regex\": \"\",\n        \"skipUrlSync\": false,\n        \"sort\": 1,\n        \"tagValuesQuery\": \"\",\n        \"tagsQuery\": \"\",\n        \"type\": \"query\",\n        \"useTags\": false\n      }\n    ]\n  },\n  \"time\": {\n    \"from\": \"now-15m\",\n    \"to\": \"now\"\n  },\n  \"timepicker\": {\n    \"refresh_intervals\": [\n      \"5s\",\n      \"10s\",\n      \"30s\",\n      \"1m\",\n      \"5m\",\n      \"15m\",\n      \"30m\",\n      \"1h\",\n      \"2h\",\n      \"1d\"\n    ],\n    \"time_options\": [\n      \"5m\",\n      \"15m\",\n      \"1h\",\n      \"6h\",\n      \"12h\",\n      \"24h\",\n      \"2d\",\n      \"7d\",\n      \"30d\"\n    ]\n  },\n  \"timezone\": \"browser\",\n  \"title\": \"Redis Dashboard for Prometheus\",\n  \"uid\": \"e008bc3f-81a2-40f9-baf2-a33fd8dec7ec\",\n  \"version\": 3,\n  \"weekStart\": \"\"\n}\n"
  },
  {
    "path": "tools/local/monitoring/grafana/provisioning/datasources/datasource.yml",
    "content": "# config file version\napiVersion: 1\n\n# list of datasources that should be deleted from the database\ndeleteDatasources:\n  - name: Prometheus\n    orgId: 1\n\n# list of datasources to insert/update depending\n# whats available in the database\ndatasources:\n  # <string, required> name of the datasource. Required\n- name: Prometheus\n  # <string, required> datasource type. Required\n  type: prometheus\n  # <string, required> access mode. direct or proxy. Required\n  access: proxy\n  # <int> org id. will default to orgId 1 if not specified\n  orgId: 1\n  # <string> url\n  url: http://prometheus:9090\n  # <string> database password, if used\n  password:\n  # <string> database user, if used\n  user:\n  # <string> database name, if used\n  database:\n  # <bool> enable/disable basic auth\n  basicAuth: false\n  # <string> basic auth username, if used\n  basicAuthUser:\n  # <string> basic auth password, if used\n  basicAuthPassword:\n  # <bool> enable/disable with credentials headers\n  withCredentials:\n  # <bool> mark as default datasource. Max one per org\n  isDefault: true\n  # <map> fields that will be converted to json and stored in json_data\n  jsonData:\n     graphiteVersion: \"1.1\"\n     tlsAuth: false\n     tlsAuthWithCACert: false\n     timeInterval: 1s  # Based on https://stackoverflow.com/a/66830690\n  # <string> json object of data that will be encrypted.\n  secureJsonData:\n    tlsCACert: \"...\"\n    tlsClientCert: \"...\"\n    tlsClientKey: \"...\"\n  version: 1\n  # <bool> allow users to edit datasources from the UI.\n  editable: true\n"
  },
  {
    "path": "tools/local/monitoring/prometheus/prometheus.yml",
    "content": "# my global config\nglobal:\n  scrape_interval:     5s\n  evaluation_interval: 5s\n\n  # Attach these labels to any time series or alerts when communicating with\n  # external systems (federation, remote storage, Alertmanager).\n  external_labels:\n      monitor: 'my-project'\n\n# Load and evaluate rules in this file every 'evaluation_interval' seconds.\n# rule_files:\n  # - 'alert.rules'\n  # - \"first.rules\"\n  # - \"second.rules\"\n\n# alert\n# alerting:\n#   alertmanagers:\n#   - scheme: http\n#     static_configs:\n#     - targets:\n#       - \"alertmanager:9093\"\n\n# A scrape configuration containing exactly one endpoint to scrape:\n# Here it's Prometheus itself.\nscrape_configs:\n  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.\n\n  - job_name: dragonfly\n    scrape_interval: 1s\n    static_configs:\n      - targets: ['host.docker.internal:6379']\n\n  - job_name: 'prometheus'\n    scrape_interval: 1s\n    static_configs:\n         - targets: ['localhost:9090']\n\n\n  - job_name: 'node-exporter'\n    scrape_interval: 1s\n    static_configs:\n      - targets: ['node-exporter:9100']\n        labels:\n          instance: node\n\n  - job_name: 'memcached-exporter'\n    scrape_interval: 1s\n    static_configs:\n      - targets: ['memcached-exporter:9150']\n\n  - job_name: 'redis-exporter'\n    scrape_interval: 1s\n    static_configs:\n      - targets: ['redis-exporter:9121']\n"
  },
  {
    "path": "tools/packaging/Dockerfile.alpine-dev",
    "content": "# syntax=docker/dockerfile:1\nFROM gcr.io/cadvisor/cadvisor:v0.46.0 AS libpfm_donor\n\nFROM alpine:3 AS builder\n\n# \"openssl-libs-static\" fixes \"Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the\"\nRUN apk add autoconf-archive automake bash bison boost-dev cmake coreutils net-tools \\\n        curl ccache git gcc gdb g++ libunwind-dev libtool make ninja \\\n        openssl-dev openssl-libs-static patch zip zstd-static\n\n# This is required to make static linking work\nRUN ls -1 /usr/lib/libboost_*.so | while read -r _file; do ln -sfv ${_file} ${_file//.so/.a}; done\n\n# Borrow libpfm from cadvisor, so we don't have to build it ourselves\n# https://github.com/google/cadvisor/blob/master/deploy/Dockerfile\nCOPY --from=libpfm_donor /usr/local/lib/libpfm.so* /usr/local/lib/\n\nWORKDIR /build\n\nCOPY ./Makefile ./CMakeLists.txt ./\nCOPY src ./src\n\nCOPY .git ./.git\nCOPY patches ./patches\nCOPY helio ./helio\n\nRUN make release\n\nRUN build-release/dragonfly --version\n\nFROM alpine:3\n\nCOPY tools/docker/entrypoint.sh /usr/local/bin/entrypoint.sh\nCOPY tools/docker/healthcheck.sh /usr/local/bin/healthcheck.sh\nCOPY --from=builder /build/build-release/dragonfly /usr/local/bin/\n\nRUN apk --no-cache add libgcc libstdc++  \\\n     setpriv netcat-openbsd boost-context tini && ldd /usr/local/bin/dragonfly\n\nRUN addgroup -S -g 1000 dfly && adduser -S -G dfly -u 999 dfly\nRUN mkdir /data && chown dfly:dfly /data\n\nVOLUME /data\nWORKDIR /data\n\nHEALTHCHECK CMD /usr/local/bin/healthcheck.sh\n\n# Use tini as PID 1 to properly reap zombie processes (issue #5844)\nENTRYPOINT [\"/sbin/tini\", \"--\", \"entrypoint.sh\"]\n\nEXPOSE 6379\n\nCMD [\"dragonfly\", \"--logtostderr\"]\n"
  },
  {
    "path": "tools/packaging/Dockerfile.ubuntu-dev",
    "content": "# syntax=docker/dockerfile:1\nFROM ghcr.io/romange/ubuntu-dev:20-gcc14 AS builder\n\nWORKDIR /build\n\nCOPY ./Makefile ./CMakeLists.txt ./\nCOPY src ./src\n\nCOPY .git ./.git\nCOPY patches ./patches\nCOPY helio ./helio\n\nRUN make release\n\nRUN build-release/dragonfly --version\n\nFROM ubuntu:22.04\n\nRUN --mount=type=tmpfs,target=/var/cache/apt \\\n    --mount=type=tmpfs,target=/var/lib/apt/lists \\\n    apt update && \\\n    apt install -q -y --no-install-recommends netcat-openbsd ca-certificates redis-tools net-tools tini\n\nRUN groupadd -r -g 999 dfly && useradd -r -g dfly -u 999 dfly\nRUN mkdir /data && chown dfly:dfly /data\n\nVOLUME /data\nWORKDIR /data\n\nCOPY tools/docker/entrypoint.sh /usr/local/bin/entrypoint.sh\nCOPY tools/docker/healthcheck.sh /usr/local/bin/healthcheck.sh\nCOPY --from=builder /build/build-release/dragonfly /usr/local/bin/\n\nHEALTHCHECK CMD /usr/local/bin/healthcheck.sh\n\n# Use tini as PID 1 to properly reap zombie processes (issue #5844)\nENTRYPOINT [\"/usr/bin/tini\", \"--\", \"entrypoint.sh\"]\n\n# For inter-container communication.\nEXPOSE 6379\n\nCMD [\"dragonfly\", \"--logtostderr\"]\n"
  },
  {
    "path": "tools/packaging/Dockerfile.ubuntu-prod",
    "content": "# syntax=docker/dockerfile:1\nFROM ghcr.io/romange/ubuntu-dev:20-gcc14 AS builder\n\nARG TARGETPLATFORM\n\nWORKDIR /build\nCOPY tools/docker/fetch_release.sh /tmp/\nCOPY releases/dragonfly-* /tmp/\n\nRUN /tmp/fetch_release.sh ${TARGETPLATFORM}\n\n# Now prod image\nFROM ubuntu:22.04\n\n# ARG in fact change the env vars during the build process\n# ENV persist the env vars for the built image as well.\nARG QEMU_CPU\nARG DEBIAN_FRONTEND=noninteractive\n\nRUN --mount=type=tmpfs,target=/var/cache/apt \\\n    --mount=type=tmpfs,target=/var/lib/apt/lists \\\n    apt -q update && \\\n    apt install -q -y --no-install-recommends netcat-openbsd ca-certificates redis-tools net-tools tini\n\nRUN groupadd -r -g 999 dfly && useradd -r -g dfly -u 999 dfly\nRUN mkdir /data && chown dfly:dfly /data\n\nVOLUME /data\nWORKDIR /data\n\nCOPY tools/docker/entrypoint.sh /usr/local/bin/entrypoint.sh\nCOPY tools/docker/healthcheck.sh /usr/local/bin/healthcheck.sh\nCOPY --from=builder /build/dragonfly /usr/local/bin/\n\nHEALTHCHECK CMD /usr/local/bin/healthcheck.sh\n\n# Use tini as PID 1 to properly reap zombie processes (issue #5844)\nENTRYPOINT [\"/usr/bin/tini\", \"--\", \"entrypoint.sh\"]\n\n# For inter-container communication.\nEXPOSE 6379\n\nCMD [\"dragonfly\", \"--logtostderr\"]\n"
  },
  {
    "path": "tools/packaging/README.md",
    "content": "# Installation Packages\n\n## Overview\nThis directory includes a set of files and scripts to build installation package for various Linux distributions.\n\n## Debian\nThe file to build the Debian package all located under \"debian\" directory.\nThe resulting package will install the binary of Dragonfly as well as generate a new service entry for dragonfly,\nthat can be controlled with \"systemctl\" command, to start, stop and check status of.\n### Building\nTo build the package, you have a script called \"generate_debian_package.sh\". This script accepts the following parameters:\n* Optional binary path - the location from which to take the binary for the installation. The default for this is \"repo path/build-opt\".\nThe location to which the resulting package is writing is at the location from which the script is executed.\nThis script is depends on the following packages:\n* git\n* moreutils\n* debhelper\n* dpkg-dev\n\nTo build:\n```\n/path/to/dragonfly/tools/packaging/generate_debian_package.sh [/path/to/dragonfly-binary-file]\n```\n\nThis can only be run on Debian based hosts.\nYou can use the flowing docker file to generate this package:\n```\nFROM ubuntu:20.04\nARG DEBIAN_FRONTEND=noninteractive\nRUN apt update -y && apt-get install -y gcc dpkg-dev gpg vim wget git moreutils debhelper\n```\nBuild the above docker and then run it with your dragonfly source code path mount as volume for the build:\n```\ndocker build -t ubuntu-package .\ndocker run --rm -ti -v /path/to/dragonfly-repo:/mydocker-path ubuntu-package bash\n```\nAgain note that you need to be at \"main\" branch to successfully build this package.\nNote: If at the end of the installation you see a message \"/usr/bin/deb-systemd-helper: error: systemctl preset failed on dragonfly.service: No such file or directory\",\nyou can ignore it, this seem to be related to [the following issue](https://groups.google.com/g/linux.debian.bugs.dist/c/m6xGZ82TdvM).\n"
  },
  {
    "path": "tools/packaging/debian/compat",
    "content": "11\n"
  },
  {
    "path": "tools/packaging/debian/control",
    "content": "Source: dragonfly\nMaintainer: DragonflyDB authors  <dragonfly@dragonflydb.io>\nStandards-Version:  4.2.1\nPriority: optional\nSection: database\nVcs-Git: https://github.com/dragonflydb/dragonfly\n\nPackage: dragonfly\nArchitecture: amd64 arm64\nDepends: libc6, openssl, adduser, zstd\nHomepage: https://dragonflydb.io\nDescription: A fast in-memory store that is fully compatible with Redis™* and Memcached.\n"
  },
  {
    "path": "tools/packaging/debian/dragonfly.conf",
    "content": "--pidfile=/var/run/dragonfly/dragonfly.pid\n--log_dir=/var/log/dragonfly\n--dir=/var/lib/dragonfly\n--max_log_size=20\n--version_check=true\n"
  },
  {
    "path": "tools/packaging/debian/dragonfly.install",
    "content": "debian/dragonfly.service /lib/systemd/system\ndebian/dragonfly.conf\t/etc/dragonfly\ndebian/bin/dragonfly /usr/bin\n"
  },
  {
    "path": "tools/packaging/debian/dragonfly.logrotate",
    "content": "# installed by debhelper by convention into /etc/logrotate.d/\n\n/var/log/dragonfly/dragonfly*.log {\n        daily\n        missingok\n\n        compress\n        compresscmd zstd\n        uncompresscmd unzstd\n        compressext .zst\n        notifempty\n\n# do not create an empty file after the rotation.\n        nocreate\n        prerotate\n                if lsof -t $1 > /dev/null; then\n                # file is open. Skipping rotation.\"\n                exit 0\n                fi\n        endscript\n\n# Possible hook to upload rotated logs to cloud storage.\n        postrotate\n                echo \"TBD: POSTROTATE\"\n        endscript\n}\n"
  },
  {
    "path": "tools/packaging/debian/dragonfly.postinst",
    "content": "#!/bin/sh\n\n# Script to run at the end of the installation\nset -eu\n\nUSER=\"dfly\"\nDIR_NAME=\"dragonfly\"\nGROUP=\"$USER\"\nCONFFILE=\"/etc/${DIR_NAME}/${DIR_NAME}.conf\"\n\nif [ \"$1\" = \"configure\" ]\nthen\n\tif ! dpkg-statoverride --list ${CONFFILE} >/dev/null 2>&1\n\tthen\n\t\tdpkg-statoverride --update --add ${USER} ${GROUP} 640 ${CONFFILE}\n\tfi\nfi\n\n#DEBHELPER#\n\nif [ \"$1\" = \"configure\" ]\nthen\n\tfind /etc/${DIR_NAME} -maxdepth 1 -type d -name '${DIR_NAME}.*.d' -empty -delete\nfi\n\nexit 0\n"
  },
  {
    "path": "tools/packaging/debian/dragonfly.postrm",
    "content": "#!/bin/sh\n# Script to run at the end of remove\nset -eu\nDIR_NAME=\"dragonfly\"\nUSER_NAME=\"dfly\"\nCONFFILE=\"/etc/${DIR_NAME}/${DIR_NAME}.conf\"\n\n# When purging the package, remove all trances\nif [ \"${1}\" = \"purge\" ]\nthen\n\tuserdel ${USER_NAME} || true\n\trm -rf /var/lib/${DIR_NAME} /var/log/${DIR_NAME} /etc/${DIR_NAME} /var/run/${DIR_NAME}\n\tdpkg-statoverride --remove ${CONFFILE} || test $? -eq 2\nfi\n\n#DEBHELPER#\n\nexit 0\n"
  },
  {
    "path": "tools/packaging/debian/dragonfly.preinst",
    "content": "#!/bin/sh\n\nset -eu\n# Script to run before the installation starts.\n# We are creating a user \"dragonfly\", and the directories that\n# would be used by the application\n\nUSER=\"dfly\"\nDIR_NAME=\"dragonfly\"\n\nsetup_dir () {\n\tDIR=\"${1}\"\n\tMODE=\"${2}\"\n\tGROUP=\"${3}\"\n\n\tmkdir -p ${DIR} || {\n\t\techo \"failed to create dir ${DIR}\"\n\t\treturn 1\n\t}\n\n\tif ! dpkg-statoverride --list ${DIR} >/dev/null 2>&1\n\tthen\n\t\techo \"changing owner for ${DIR} to user ${USER}\"\n\t\tchown ${USER}:${GROUP} ${DIR}\n\t\tchmod ${MODE} ${DIR}\n\tfi\n}\n\nif [ \"$1\" = \"install\" ]; then\n\tif ! id ${USER} >/dev/null 2>&1 ; then\n\t\techo \"trying to create user ${USER}\"\n\t\tadduser \\\n\t\t\t--system \\\n\t\t\t--home /var/lib/${DIR_NAME} \\\n\t\t\t--quiet \\\n\t\t\t--group \\\n\t\t\t${USER} || {\n\t\t\techo \"failed to add user ${USER}\"\n\t\t\texit 1\n\t\t}\n\n\t\tsetup_dir /var/log/${DIR_NAME} 2755 adm\n\t\tsetup_dir /var/lib/${DIR_NAME} 755 ${USER}\n\t\tsetup_dir /var/run/${DIR_NAME} 755 ${USER}\n\t\tsetup_dir /etc/${DIR_NAME} 2775 ${USER}\n\tfi\nfi\n#DEBHELPER#\n\nexit 0\n"
  },
  {
    "path": "tools/packaging/debian/dragonfly.service",
    "content": "[Unit]\nDescription=Modern and fast key-value store\nAfter=network.target\nDocumentation=\n\n[Service]\nType=simple\nEnvironmentFile=-/etc/dragonfly/environment\nExecStart=/usr/bin/dragonfly --flagfile=/etc/dragonfly/dragonfly.conf\nPIDFile=/var/run/dragonfly/dragonfly.pid\nTimeoutStopSec=infinity\nRestart=always\nUser=dfly\nGroup=dfly\nRuntimeDirectory=dragonfly\nRuntimeDirectoryMode=2755\n\nUMask=007\nPrivateTmp=yes\nLimitNOFILE=262144\nPrivateDevices=yes\nProtectHome=yes\nProtectSystem=full\n\nReadWritePaths=-/var/lib/dragonfly\nReadWritePaths=-/var/log/dragonfly\nReadWritePaths=-/var/run/dragonfly\n\nNoNewPrivileges=true\nCapabilityBoundingSet=CAP_SETGID CAP_SETUID CAP_SYS_RESOURCE\nMemoryDenyWriteExecute=true\nProtectKernelModules=true\nProtectKernelTunables=true\nProtectControlGroups=true\nRestrictRealtime=true\nRestrictNamespaces=true\nRestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX\n\n\n\n[Install]\nWantedBy=multi-user.target\nAlias=dragonfly.service\n"
  },
  {
    "path": "tools/packaging/debian/rules",
    "content": "#!/usr/bin/make -f\n\nexport DEB_BUILD_OPTIONS=\"noopt nostrip nocheck\"\n\n%:\n\tdh $@\n\n\noverride_dh_auto_build:\n\t@echo \"no build is done here\"\n\noverride_dh_installchangelogs:\n\t@echo \"no change long installation\"\n\noverride_dh_auto_test:\n\t@echo \"no testing\"\n\noverride_dh_auto_clean:\n\tdh_auto_clean\n"
  },
  {
    "path": "tools/packaging/generate_changelog.sh",
    "content": "#!/usr/bin/env bash\n\n# This would generate a change log required for build Debian installation package.\n# Don't run this script on your local machine, run this inside docker\n# you would need to install git client as well as moreutils\n# apt install -y git moreutils\n# note: This script should run on branch \"main\".\n\nset -eu\n\nif [ $# -ne 1 ]; then\n\techo \"usage: <git repo path> <target path>\"\n\texit 1\nfi\nSCRIPT_ABS_PATH=$(realpath $0)\n\nTHIS_DIR=$(dirname ${SCRIPT_ABS_PATH})\nGIT_DIR=$1\nPACKGE_DIR=${THIS_DIR}/debian\nCHANGE_LOG=${PACKGE_DIR}/changelog\ncd ${GIT_DIR}\ngit config --global --add safe.directory ${GIT_DIR}\nhas_tags=$(git tag -l v* | wc -l 2>/dev/null)\nif [ \"$has_tags\" = \"\" -o \"$has_tags\" = \"0\" ]; then\n\tgit fetch --all --tags || {\n\t\techo \"failed to fetch tags, cannot build changelog file\"\n\t\texit 1\n\t}\nfi\n\n>${CHANGE_LOG}\nprevtag=v0.2.0\npkgname=`cat ${PACKGE_DIR}/control | grep '^Package: ' | sed 's/^Package: //'`\ngit tag -l v* | sort -V | while read tag; do\n    (echo \"$pkgname (${tag#v}) unstable; urgency=low\"; git log --pretty=format:'  * %s' $prevtag..$tag; git log --pretty='format:%n%n -- %aN <%aE>  %aD%n%n' $tag^..$tag) | cat - ${CHANGE_LOG} | sponge ${CHANGE_LOG}\n        prevtag=$tag\ndone\nif [ -f ${CHANGE_LOG} ]; then\n\thaslnes=$(wc -l ${CHANGE_LOG} 2>/dev/null | awk '{print $1}')\n\tif [ \"$haslnes\" = \"\" ]; then\n\t\techo \"empty file ${CHANGE_LOG}, failed to generate changelog\"\n\t\texit 1\n\tfi\nelse\n\techo \"failed to generate ${CHANGE_LOG}\"\n\texit 1\nfi\n"
  },
  {
    "path": "tools/packaging/generate_debian_package.sh",
    "content": "#!/usr/bin/env bash\n\n# Generate a debian package from a pre-build dragonfly bianry and set of files as well as generating change log from git history.\n# The result is debian install package file (.deb file).\n# This script accept 2 parameters:\n#\t1. Optioanl path to the location at which the binary file is located.\n# this depends on\n# * git\n# * moreutils\n# * debhelper\n# e.g. apt update -y && apt install -y git moreutils debhelper\n# Please note that is must run from main branch.\n# Best running this from inside a container.\n# The result are writing to the location from which you would execute the script (not where the script is located).\n# Version number is the tag number.\n# Params:\n#\t* optional location to the binary to place at the package\n\n\nset -eu\n\n\nif [ $# -ge 1 ]; then\n    VERSION_FILE=$1\n    if ! [ -f ${VERSION_FILE} ]; then\n        echo \"binary file ${VERSION_FILE} does not exist\"\n        exit 1\n    fi\n\nelse\n    echo \"no binary file provided\"\n    exit 1\nfi\n\nSCRIPT_ABS_PATH=$(realpath $0)\nSCRIPT_PATH=$(dirname ${SCRIPT_ABS_PATH})\nPACKAGES_PATH=${SCRIPT_PATH}/debian\nCHANGELOG_SCRIPT=generate_changelog.sh\nROOT_ABS_PATH=$(realpath $SCRIPT_PATH/../..)\nTEMP_WORK_DIR=$(mktemp -d)\nBASE_DIR=${TEMP_WORK_DIR}/packages\nBASE_PATH=${BASE_DIR}/dragonfly\nBINARY_TARGET_DIR=${BASE_PATH}/debian/bin\n\nfunction cleanup {\n    echo $@\n    rm -rf ${TEMP_WORK_DIR}\n    exit 1\n}\n\nmkdir -p ${BASE_PATH} || cleanup \"failed to create working directory for building the package\"\n\ncp -r ${PACKAGES_PATH} ${BASE_PATH} || cleanup \"failed to copy required files for the package build from ${PACKAGES_PATH}\"\n\ncp ${SCRIPT_PATH}/${CHANGELOG_SCRIPT} ${BASE_PATH} || cleanup \"failed to copy changelog script to ${BASE_PATH}\"\n\nmkdir -p ${BINARY_TARGET_DIR} || cleanup \"failed to create install directory for building the package\"\n\ncp ${VERSION_FILE} ${BINARY_TARGET_DIR}/dragonfly || cleanup \"failed to copy binary to target dir\"\n\n${BASE_PATH}/${CHANGELOG_SCRIPT} ${ROOT_ABS_PATH} || cleanup \"failed to generate changelog for package\"\n\nMY_DIR=${PWD}\ncd ${BASE_PATH}\ndpkg-buildpackage --build=binary || cleanup \"failed to generate the package\"\n\nTEMP_RESULT_FILE=$(ls ../*.deb)\nif [ \"$TEMP_RESULT_FILE\" = \"\" ]; then\n    cleanup \"failed to find debian file\"\nfi\n\nfor fl in ${TEMP_RESULT_FILE}; do\n    destfile=$(basename ${fl} | sed 's/_\\([0-9.]*_\\)/_/')\n    mv ${fl} ${MY_DIR}/${destfile}\ndone\n\ncd ${MY_DIR}\nRESULT_FILE=$(ls *.deb 2>/dev/null)\necho \"successfully built the install package at ${MY_DIR}/${RESULT_FILE}\"\nrm -rf ${TEMP_WORK_DIR}\n"
  },
  {
    "path": "tools/packaging/osrepos/README.md",
    "content": "# Package repositories for rpm and debian packages\n\nThis directory contains scripts and definitions for setting up YUM and apt repositories for Linux users to install\ndragonfly packages.\n\nThe repositories are served as static websites. The generate-site workflow is used to set up and deploy the sites using\nscripts and definitions included here.\n\nThe workflow does the following tasks:\n\n* Download the latest 5 releases from dragonfly releases page, specifically deb and rpm assets\n    * for deb files, only the latest package is downloaded and present (see note below)\n* Set up a directory structure separating deb and rpm files into version specific paths\n* Sign the packages (see note on GPG)\n* Deploy the assets prepared, along with the public GPG key and repo definitions for apt and rpm tooling\n\n## Using the YUM repository\n\nAdd the repository using:\n\n```shell\nsudo dnf config-manager addrepo --from-repofile=https://packages.dragonflydb.io/dragonfly.repo\n```\n\nThen install dragonfly as usual, or a specific version:\n\n```shell\nsudo dnf -y install dragonfly-0:v1.33.1-1.fc30.x86_64\n```\n\n## Using the APT repository\n\nFirst download the public GPG key to an appropriate location:\n\n```shell\nsudo curl -Lo /usr/share/keyrings/dragonfly-keyring.public https://packages.dragonflydb.io/pgp-key.public\n```\n\nThen add the sources file:\n\n```shell\nsudo curl -Lo /etc/apt/sources.list.d/dragonfly.sources https://packages.dragonflydb.io/dragonfly.sources\n```\n\nFinally install dragonfly using apt\n\n```shell\nsudo apt update && sudo apt install dragonfly\n```\n\n#### Versions in APT repository\n\nUnlike the yum repo, the apt repo only has the latest version. The reason for this is the tool, `reprepro` supplied by\ndebian to build repositories only supports multiple\nversions in version 5.4 onwards, and the github runner using ubuntu-latest does not have this version.\n\nAnother option would be to use the components feature of apt repositories in the sources file we ask users to install,\nbut then the versions would need\nto be hardcoded in the sources file and the user would have\nto update the file with each new release which makes for a bad user experience. As of now users wanting older packages\nshould download them directly.\n\n### Signing packages\n\nThe packages are signed using the GPG key imported from the secret GPG_PRIVATE_KEY in this repository.\n\nThe corresponding public key is served with site assets, so the apt/yum/dnf based tooling can consume the public key to\nverify package integrity.\n\n### TODO\n\n- [X] debian packages signing (not required? release file is signed)\n- [X] debian repo metadata setup\n- [ ] tests asserting that packages are installable?\n"
  },
  {
    "path": "tools/packaging/osrepos/dragonfly.repo",
    "content": "[dragonfly]\nname=Dragonfly Packages\nbaseurl=https://packages.dragonflydb.io/rpm/\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.dragonflydb.io/pgp-key.public\n"
  },
  {
    "path": "tools/packaging/osrepos/dragonfly.sources",
    "content": "Types: deb\nURIs: https://packages.dragonflydb.io/deb\nSuites: noble\nComponents: main\nSigned-By: /usr/share/keyrings/dragonfly-keyring.public\n"
  },
  {
    "path": "tools/packaging/osrepos/pgp-key.public",
    "content": "-----BEGIN PGP PUBLIC KEY BLOCK-----\n\nmQINBGjkpygBEADuvzXdOXChr/e4Uh2UBne60NPjmuhpjmArfMfqySeRezJ1Nuvd\nAvKNuYRyCw+zsh0Zc/sSANpIdAeKPqrfZJgfEIJI0f8WVjfqsCKi+yWB7Bx0GjQ9\ny/xoFLKkT7p0P/F4yRlb8kQq2KVP9UvcZBETJY96TpQIJM4N3XoG+8DsELW5HYF2\n6sbhgmaNUsxm9oH5UqHcBc7TTgUp10GmZFR4dTeB1IffD/eLMVDMQ8ygzmVxkJPQ\nzEKfpFFzseTVyreQlZ5U4GDR8FiB0mY4gZxbCywNqZRycyMM7v4EHuUO0fOgRHdl\n5dseF+H1aEG/00JRo6zjiIbgMga0x9wYmVWvTU4wLnGoomukEMCkEQxlil1QjUlK\nXI0EltU03DuGki5uhYc9dSS1h74ku2xWePaMsvmxrTphRo1WQBDutzVXSIZ6NBc3\nBN+VBHcumVvif9aRrsfsj2CXhnOB61AW+VWk3fk0evW9cceXZDA0NgGdyeTfS7EI\npioaWtmE3Uv3AfHTlNbMytxG7d7k7oAT2xV6z2IygyQZ5LI1tvSJJ+I5kZHKeruj\nk2bFp6H9FGi+g4kA+z9QWgkt+0UXYbjKZAs5Es1uGrRk6o1rAyVTKBKz62F0YQbK\nj8Q49Z6iSobaKeQG8naCVkALSM49i4Zpw3x1jUpd7k8/KhpJObq3rewqIQARAQAB\ntCREcmFnb25mbHkgPHBhY2thZ2luZ0BkcmFnb25mbHlkYi5pbz6JAlIEEwEKADwW\nIQRgvYPC7oTdikxvMGcSMEAYvD0qugUCaOSnKAMbLwQFCwkIBwICIgIGFQoJCAsC\nBBYCAwECHgcCF4AACgkQEjBAGLw9KrpGbw//VH2zUjaoSh7SnKGdDOA7A95o2EET\nZvChxImyb6xNKfUoMajPnKcJFg514aPFKLuJl4qJmikxdqBF/bYkznCQSJcLQhsT\npvkqanUh/XwBqbJye1QjBq1o0qXLgeY/Ciz2nqupwLQdzvGHO6+2Yk04T89pnZEo\nCDSoZKkacu8TpalStqzqDlumryXZzdZ35hAu9OT0fVc2wtcMiY3pznLG1iawNk8I\nbzme0ezGA/fk7xEptEbGlb1OtUV5+iG/SFEVvic8GTNf1yLQNCVK3QzD1ciL3MzR\nOTH8a04ov2bMxjl8bIefKE/dFBeCSKbvkfTSMAEgqUAuRp7gvoO7uHO05A5AHU2i\ny4agskGkgQR9u1yqUXyYIM9kkpuUqqAkwRqg1pw55LG686Xe35QYH4zbpgvr45/Q\nJRPFjCbLzR1ZcNyrecHgrq2M9WNlk6dtdWBSJuc7L0M8KJqfrPxQmMpMm/KR43Ey\num0FCgb2J+ceO2W4GrE/DHHoNTt2iio2gMcmRXM7XTmVupsigbYk7AqGncLIQ60B\n94jtv16ggXIeA5sPqmyssARXtweTM+EzLLs4K79be4K5j/yyg3CxxvZcq5CZNwoi\nfbQgGVNb4SS+nv2r1mVe9XNSonmVVrAqSIFpptH5ahqgaRDUnmy0Lzk7qiHv02OW\nPjbSiwQGHDHwq98=\n=SOT5\n-----END PGP PUBLIC KEY BLOCK-----\n"
  },
  {
    "path": "tools/packaging/osrepos/reprepro-config/distributions",
    "content": "Codename: noble\nSuite: stable\nArchitectures: amd64 arm64\nComponents: main\nOrigin: Dragonfly\nLabel: Dragonfly\nDescription: Dragonfly APT repository\nSignWith: 60BD83C2EE84DD8A4C6F306712304018BC3D2ABA\n"
  },
  {
    "path": "tools/packaging/osrepos/reprepro-config/options",
    "content": "verbose\n"
  },
  {
    "path": "tools/packaging/osrepos/requirements.txt",
    "content": "certifi>=2025.10.5\ncharset-normalizer>=3.4.3\nidna>=3.10\nrequests>=2.32.5\nurllib3>=2.5.0\n"
  },
  {
    "path": "tools/packaging/osrepos/scripts/fetch-releases.py",
    "content": "import dataclasses\nimport enum\nimport os.path\nimport time\n\nimport requests\n\n\"\"\"\nFetches the latest five releases for RPM and the single latest release for DEB.\nRPM files are placed in the destination folder where the DNF repo will expect them.\nDEB files are placed in a temporary location from where they will be copied by the\nreprepro tool.\n\"\"\"\n\nRELEASE_URL = \"https://api.github.com/repos/dragonflydb/dragonfly/releases\"\n\n\nclass AssetKind(enum.Enum):\n    RPM = 1\n    DEB = 2\n\n\n@dataclasses.dataclass\nclass Package:\n    kind: AssetKind\n    download_url: str\n    version: str\n    filename: str\n    arch: str\n\n    @staticmethod\n    def from_url(url: str) -> \"Package\":\n        tokens = url.split(\"/\")\n        filename = tokens[-1]\n        kind = AssetKind.RPM if filename.endswith(\".rpm\") else AssetKind.DEB\n        if kind == AssetKind.DEB:\n            arch = filename.split(\".\")[0].split(\"_\")[1]\n        else:\n            arch = filename.split(\".\")[1]\n        return Package(\n            kind=kind, download_url=url, version=tokens[-2], filename=filename, arch=arch\n        )\n\n    def storage_path(self, root: str) -> str:\n        match self.kind:\n            case AssetKind.RPM:\n                return os.path.join(root, \"rpm\", self.version)\n            case AssetKind.DEB:\n                # Debian packages are stored in a temporary path.\n                # The reprepro tool will copy them later to the final path.\n                return os.path.join(\"deb_tmp\", self.arch, self.version)\n\n\ndef collect_download_urls() -> list[Package]:\n    packages = []\n    # TODO retry logic\n    response = requests.get(RELEASE_URL)\n    releases = response.json()\n    for release in releases[:5]:\n        for asset in release[\"assets\"]:\n            if asset[\"name\"].endswith(\".rpm\") or asset[\"name\"].endswith(\".deb\"):\n                packages.append(Package.from_url(asset[\"browser_download_url\"]))\n    return packages\n\n\ndef download_packages(root: str, packages: list[Package]):\n    # The debian repository building tool, reprepo, only supports a single package per version by default.\n    # The ability to support multiple versions has been added but is not present in ubuntu-latest on\n    # github action runners yet. So we only download one package per architecture, the latest, for ubuntu.\n    # The rest of the scripts work on a set of packages, so that when the Limit parameter is supported,\n    # we can remove this flag and start hosting more than the latest versions.\n    # Another alternative would be to use the components feature of reprepo, but it would involve updating\n    # the repository definition itself for each release, which is a bad experience for end users.\n    deb_done = 0\n    for package in packages:\n        # Download the latest arm and amd64 package for .deb format\n        if package.kind == AssetKind.DEB and deb_done == 2:\n            continue\n\n        print(f\"Downloading {package.download_url}\")\n        path = package.storage_path(root)\n        if not os.path.exists(path):\n            os.makedirs(path)\n\n        target = os.path.join(path, package.filename)\n        # TODO retry logic\n        response = requests.get(package.download_url)\n        with open(target, \"wb\") as f:\n            f.write(response.content)\n        print(f\"Downloaded {package.download_url}\")\n        time.sleep(0.5)\n        if package.kind == AssetKind.DEB:\n            deb_done += 1\n\n\ndef main(root: str):\n    packages = collect_download_urls()\n    download_packages(root, packages)\n\n\nif __name__ == \"__main__\":\n    import sys\n\n    if len(sys.argv) == 1:\n        print(f\"Usage: {sys.argv[0]} <site folder>\")\n        sys.exit(1)\n    main(sys.argv[1])\n"
  },
  {
    "path": "tools/packaging/osrepos/scripts/generate-apt-repo.sh",
    "content": "set -e\n\nMETADATA_ROOT=_site/deb\nmkdir -pv ${METADATA_ROOT}/conf\n\ncp -av reprepro-config/* ${METADATA_ROOT}/conf\n\nreprepro -b ${METADATA_ROOT} createsymlinks\nreprepro -b ${METADATA_ROOT} export\n\nfor file in $(find deb_tmp -type f -name \"*.deb\"); do\n  reprepro -b ${METADATA_ROOT} includedeb noble \"${file}\"\ndone\n\n# reprepro copied files to the destination, the temporary files can be removed now\nrm -rf deb_tmp\n"
  },
  {
    "path": "tools/packaging/osrepos/scripts/generate-index.py",
    "content": "import os.path\n\n\"\"\"\nGenerates index.html files recursively in all directories. Note that this is strictly an optional step.\nBoth YUM and APT repositories work fine without index listing, but this is useful for debugging a broken\nsite.\n\"\"\"\n\nHEADER = \"\"\"<!DOCTYPE html>\n<html>\n<body>\n\"\"\"\n\nFOOTER = \"\"\"</body>\n</html>\n\"\"\"\n\n\ndef build_index(dirpath):\n    print(f\"building index.html for {dirpath}\")\n    target = os.path.join(dirpath, \"index.html\")\n    with open(target, \"w\") as f:\n        f.write(HEADER.format(dir=dirpath))\n        for item in sorted(os.listdir(dirpath)):\n            if item == \"index.html\":\n                continue\n            name = item + \"/\" if os.path.isdir(os.path.join(dirpath, item)) else item\n            f.write(f\"\"\"<a href=\"{item}\">{name}</a><br>\\n\"\"\")\n        f.write(FOOTER)\n\n\ndef recurse_dir(root):\n    for root, dirs, _ in os.walk(root):\n        build_index(root)\n\n\nif __name__ == \"__main__\":\n    import sys\n\n    if len(sys.argv) == 1:\n        print(f\"Usage: {sys.argv[0]} <site folder>\")\n        sys.exit(1)\n\n    recurse_dir(sys.argv[1])\n"
  },
  {
    "path": "tools/packaging/osrepos/scripts/sign-rpms.sh",
    "content": "set -e\n\n# GPG key must have been imported\n\necho \"Signing RPMs with key id ${1}\"\n\n# The script fails in CI without an empty GPG_TTY\nGPG_TTY=\"\"\nexport GPG_TTY\n\nfor file in $(find _site/rpm -type f -name \"*.rpm\"); do\n  echo \"Signing ${file}\"\n  rpm --define \"__gpg /usr/bin/gpg\" --define \"%_signature gpg\" --define \"%_gpg_name ${1}\" --addsign \"${file}\"\ndone\n"
  },
  {
    "path": "tools/packaging/rpm/build_rpm.sh",
    "content": "#!/bin/bash\n\nset -e\n\n# Get the full path of the binary\nARCHIVE=$(realpath \"$1\")\nVERSION=\"$2\"\necho \"Preparing $ARCHIVE\"\n\nSCRIPT_DIR=\"$(dirname \"$(readlink -f \"$0\")\")\"\n\n# Setup RPM build environment in a unique subdirectory under /tmp\nRPM_ROOT=$(mktemp -d /tmp/rpmbuild_XXXXXX)\necho \"Working dir is $RPM_ROOT\"\nmkdir -p $RPM_ROOT/{BUILD,RPMS,SOURCES,SPECS}\n\n# Put the archive and configuration files to the SOURCES directory\nln -s \"$ARCHIVE\" -t \"$RPM_ROOT/SOURCES/\"\ncp $SCRIPT_DIR/dragonfly.service $RPM_ROOT/SOURCES/\ncp $SCRIPT_DIR/dragonfly.conf $RPM_ROOT/SOURCES/\n\ncp $SCRIPT_DIR/dragonfly.spec $RPM_ROOT/SPECS/\n\nrpmbuild --define \"_topdir $RPM_ROOT\" --define \"version $VERSION\" -bb \"$RPM_ROOT/SPECS/dragonfly.spec\"\nmv $RPM_ROOT/RPMS/*.rpm ./\n"
  },
  {
    "path": "tools/packaging/rpm/dragonfly.service",
    "content": "[Unit]\nDescription=Modern and fast key-value store\nAfter=network.target\nDocumentation=\n\n[Service]\nType=simple\nEnvironmentFile=-/etc/dragonfly/environment\nExecStart=/usr/local/bin/dragonfly --flagfile=/etc/dragonfly/dragonfly.conf\nPIDFile=/var/run/dragonfly/dragonfly.pid\nTimeoutStopSec=infinity\nRestart=always\nUser=dfly\nGroup=dfly\nRuntimeDirectory=dragonfly\nRuntimeDirectoryMode=2755\n\nUMask=007\nPrivateTmp=yes\nLimitNOFILE=262144\nPrivateDevices=yes\nProtectHome=yes\nProtectSystem=full\n\nReadWritePaths=-/var/lib/dragonfly\nReadWritePaths=-/var/log/dragonfly\nReadWritePaths=-/var/run/dragonfly\n\nNoNewPrivileges=true\nCapabilityBoundingSet=CAP_SETGID CAP_SETUID CAP_SYS_RESOURCE\nMemoryDenyWriteExecute=true\nProtectKernelModules=true\nProtectKernelTunables=true\nProtectControlGroups=true\nRestrictRealtime=true\nRestrictNamespaces=true\nRestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX\n\n\n\n[Install]\nWantedBy=multi-user.target\nAlias=dragonfly.service\n"
  },
  {
    "path": "tools/packaging/rpm/dragonfly.spec",
    "content": "%define     pkg_name dragonfly\n%define     archive dragonfly-%{_arch}.tar.gz\n\n# How the package name looks like\n%define     _build_name_fmt  %%{NAME}.%%{ARCH}.rpm\n\nName:       %{pkg_name}\nVersion:    %{version}\nRelease:    1%{?dist}\nSummary:    DragonflyDB memory store\nLicense:    BUSL-1.1\nURL:        https://www.dragonflydb.io\nSource0:    %{archive}\nSource1:    dragonfly.service\nSource2:    dragonfly.conf\nGroup:      Applications/System\nProvides:   user(dfly)\nProvides:   group(dfly)\n\n%description\nDragonflyDB is a vertically scalable and memory efficient in-memory store\nthat is compatible with Redis OSS and Memcached.\n\n%pre\n\ngetent group dfly >/dev/null || groupadd -r dfly\ngetent passwd dfly >/dev/null || useradd -r -g dfly -M -s /sbin/nologin -c \"User for DragonflyDB service\" dfly\n\n%prep\n\n%build\ntar xvfz %{SOURCE0}\nmv ./dragonfly-%{_arch} ./dragonfly\n\n%install\nmkdir -p %{buildroot}/usr/local/bin\nmkdir -p %{buildroot}/etc/dragonfly\nmkdir -p %{buildroot}/var/log/dragonfly\nmkdir -p %{buildroot}/var/lib/dragonfly\n\ninstall -m 755 ./dragonfly %{buildroot}/usr/local/bin/\nmkdir -p %{buildroot}/usr/lib/systemd/system\ncp %{SOURCE1} %{buildroot}/usr/lib/systemd/system/\ncp %{SOURCE2} %{buildroot}/etc/dragonfly/\n\n%clean\nrm -rf %{buildroot}\nrm -rf %{_builddir}/*\n\n%files\n%attr(-,dfly,dfly) /usr/local/bin/dragonfly\n%attr(-,dfly,dfly) /usr/lib/systemd/system/dragonfly.service\n%attr(-,dfly,dfly) /etc/dragonfly/dragonfly.conf\n\n%changelog\n"
  },
  {
    "path": "tools/parse_allocator_tracking_logs.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nUsage:\n1. First run Dragonfly with tracking allocator enabled. Must be a single allocator range with 100% samping rate to catch both allocations and deallocations.\n2. Finish tracking.\n3. cat /tmp/dragonfly.INFO |  ./parse_allocator_tracking_logs.py\n\"\"\"\nimport re\nimport sys\n\n\ndef parse_log(log_lines):\n    memory_map = {}\n\n    allocation_pattern = re.compile(r\"Allocating (\\d+) bytes \\((0x[0-9a-f]+)\\)\")\n    deallocation_pattern = re.compile(r\"Deallocating (\\d+) bytes \\((0x[0-9a-f]+)\\)\")\n\n    for line in log_lines:\n        allocation_match = allocation_pattern.search(line)\n        deallocation_match = deallocation_pattern.search(line)\n\n        if allocation_match:\n            size = int(allocation_match.group(1))\n            address = allocation_match.group(2)\n            assert address not in memory_map\n            memory_map[address] = (size, line)\n        elif deallocation_match:\n            size = int(deallocation_match.group(1))\n            address = deallocation_match.group(2)\n            if address in memory_map:\n                assert size == memory_map[address][0]\n                del memory_map[address]\n            else:\n                print(f\"Deallocating non existing address: {address} {size}\")\n\n    return memory_map\n\n\nif __name__ == \"__main__\":\n    log_lines = sys.stdin.readlines()\n    memory_map = parse_log(log_lines)\n\n    for address, item in memory_map.items():\n        print(f\"Address: {address}, Size: {item[0]} bytes, original line: `{item[1]}`\")\n"
  },
  {
    "path": "tools/plot_memtier_latency.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nScript to read memtier_benchmark JSON output and generate interactive latency charts.\n\nThe script generates interactive HTML charts (using Plotly) where you can:\n- Click on legend items to show/hide time series\n- Zoom in/out and pan\n- Hover over data points for detailed information\n\nTo generate the JSON file, run memtier_benchmark with the --json-out-file option:\n\n    memtier_benchmark --server <host> --port <port> \\\\\n        --json-out-file memtier_out.json \\\\\n        [other options...]\n\nExample:\n    memtier_benchmark --json-out-file memtier_out.json \\\\\n        --clients 25 --threads 4 --test-time 120 \\\\\n        --ratio 1:10\n\nThen run this script to visualize the results:\n    ./plot_memtier_latency.py memtier_out.json\n\nRequirements:\n    pip install plotly matplotlib numpy\n\nNote: If plotly is not available, falls back to static SVG charts.\n\"\"\"\n\nimport json\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom pathlib import Path\nimport webbrowser\nimport tempfile\nimport os\n\n# Try to import plotly for interactive charts\ntry:\n    import plotly.graph_objects as go\n    from plotly.subplots import make_subplots\n\n    PLOTLY_AVAILABLE = True\nexcept ImportError:\n    PLOTLY_AVAILABLE = False\n    print(\"Warning: plotly not available. Install with: pip install plotly\")\n\n\ndef load_json_data(filepath):\n    \"\"\"Load JSON data from file.\"\"\"\n    with open(filepath, \"r\") as f:\n        return json.load(f)\n\n\ndef extract_latency_timeseries(data, operation, ignore_last_seconds=3):\n    \"\"\"\n    Extract latency time series data from memtier output.\n\n    Args:\n        data: Parsed JSON data\n        operation: Operation type (e.g., 'Mgets', 'Sets', 'Gets', etc.)\n        ignore_last_seconds: Number of seconds to ignore from the end\n\n    Returns:\n        Dictionary with time series data\n    \"\"\"\n    time_serie = data[\"ALL STATS\"][operation][\"Time-Serie\"]\n\n    times = []\n    avg_latencies = []\n    p50_latencies = []\n    p99_latencies = []\n    p99_9_latencies = []\n    min_latencies = []\n    max_latencies = []\n    ops_per_sec = []\n\n    # Sort time points and determine cutoff\n    sorted_times = sorted(time_serie.keys(), key=lambda x: int(x))\n    if ignore_last_seconds > 0 and len(sorted_times) > ignore_last_seconds:\n        # Remove last N seconds\n        sorted_times = sorted_times[:-ignore_last_seconds]\n\n    for time_point in sorted_times:\n        interval_data = time_serie[time_point]\n        times.append(int(time_point))\n        avg_latencies.append(interval_data[\"Average Latency\"])\n        p50_latencies.append(interval_data.get(\"p50.00\", 0))\n        p99_latencies.append(interval_data.get(\"p99.00\", 0))\n        p99_9_latencies.append(interval_data.get(\"p99.90\", 0))\n        min_latencies.append(interval_data[\"Min Latency\"])\n        max_latencies.append(interval_data[\"Max Latency\"])\n\n        # Calculate ops/sec for this interval (count per second)\n        ops_per_sec.append(interval_data[\"Count\"])\n\n    return {\n        \"times\": times,\n        \"avg\": avg_latencies,\n        \"p50\": p50_latencies,\n        \"p99\": p99_latencies,\n        \"p99.9\": p99_9_latencies,\n        \"min\": min_latencies,\n        \"max\": max_latencies,\n        \"ops_per_sec\": ops_per_sec,\n    }\n\n\ndef plot_latency_chart_interactive(data, output_file=\"latency_chart.html\", open_browser=True):\n    \"\"\"\n    Generate interactive latency chart using Plotly.\n\n    Args:\n        data: Parsed JSON data\n        output_file: Output filename for the chart\n        open_browser: If True, open the chart in the browser\n    \"\"\"\n    if not PLOTLY_AVAILABLE:\n        print(\"Plotly not available. Falling back to matplotlib...\")\n        # Change extension to svg for matplotlib fallback\n        svg_file = output_file.replace(\".html\", \".svg\")\n        return plot_latency_chart(data, svg_file, open_browser)\n\n    # Get all available operations from ALL STATS (excluding 'Runtime')\n    all_stats = data[\"ALL STATS\"]\n    operations = [\n        key\n        for key in all_stats.keys()\n        if key != \"Runtime\" and isinstance(all_stats[key], dict) and \"Time-Serie\" in all_stats[key]\n    ]\n\n    if not operations:\n        print(\"Error: No operation data found in JSON\")\n        return\n\n    # Extract data for all operations\n    ops_data = {}\n    for op in operations:\n        ops_data[op] = extract_latency_timeseries(data, op, ignore_last_seconds=3)\n\n    # Determine subplot layout\n    num_ops = len(operations)\n    if num_ops == 1:\n        rows, cols = 2, 1\n        specs = [[{\"secondary_y\": False}], [{\"secondary_y\": False}]]\n        subplot_titles = [f\"{operations[0]} Latency\", \"Throughput\"]\n    elif num_ops == 2:\n        rows, cols = 2, 2\n        specs = [\n            [{\"secondary_y\": False}, {\"secondary_y\": False}],\n            [{\"secondary_y\": False}, {\"secondary_y\": False}],\n        ]\n        subplot_titles = [\n            f\"{operations[0]} Latency\",\n            f\"{operations[1]} Latency\",\n            \"Latency Comparison\",\n            \"Throughput\",\n        ]\n    else:\n        rows = num_ops + 1\n        cols = 1\n        specs = [[{\"secondary_y\": False}] for _ in range(rows)]\n        subplot_titles = [f\"{op} Latency\" for op in operations] + [\"Throughput\"]\n\n    # Create subplots\n    fig = make_subplots(\n        rows=rows,\n        cols=cols,\n        subplot_titles=subplot_titles,\n        specs=specs,\n        vertical_spacing=0.12,\n        horizontal_spacing=0.1,\n    )\n\n    # Plot individual operation latencies\n    for idx, op in enumerate(operations):\n        if num_ops == 2:\n            row = (idx // cols) + 1\n            col = (idx % cols) + 1\n        else:\n            row = idx + 1\n            col = 1\n\n        op_data = ops_data[op]\n\n        # Add traces with independent visibility toggle (no legendgroup)\n        fig.add_trace(\n            go.Scatter(\n                x=op_data[\"times\"],\n                y=op_data[\"avg\"],\n                name=f\"{op} Avg\",\n                mode=\"lines\",\n                line=dict(width=2),\n            ),\n            row=row,\n            col=col,\n        )\n        fig.add_trace(\n            go.Scatter(\n                x=op_data[\"times\"],\n                y=op_data[\"p50\"],\n                name=f\"{op} p50\",\n                mode=\"lines\",\n                line=dict(width=2),\n            ),\n            row=row,\n            col=col,\n        )\n        fig.add_trace(\n            go.Scatter(\n                x=op_data[\"times\"],\n                y=op_data[\"p99\"],\n                name=f\"{op} p99\",\n                mode=\"lines\",\n                line=dict(width=2),\n            ),\n            row=row,\n            col=col,\n        )\n        fig.add_trace(\n            go.Scatter(\n                x=op_data[\"times\"],\n                y=op_data[\"p99.9\"],\n                name=f\"{op} p99.9\",\n                mode=\"lines\",\n                line=dict(width=2),\n            ),\n            row=row,\n            col=col,\n        )\n\n        fig.update_xaxes(title_text=\"Time (seconds)\", row=row, col=col)\n        fig.update_yaxes(title_text=\"Latency (ms)\", row=row, col=col)\n\n    # Add comparison plot if multiple operations and layout allows\n    if num_ops == 2:\n        comp_row, comp_col = 2, 1\n        for op in operations:\n            op_data = ops_data[op]\n            fig.add_trace(\n                go.Scatter(\n                    x=op_data[\"times\"],\n                    y=op_data[\"p99\"],\n                    name=f\"{op} p99 (comp)\",\n                    mode=\"lines\",\n                    line=dict(width=2, dash=\"solid\"),\n                ),\n                row=comp_row,\n                col=comp_col,\n            )\n            fig.add_trace(\n                go.Scatter(\n                    x=op_data[\"times\"],\n                    y=op_data[\"avg\"],\n                    name=f\"{op} Avg (comp)\",\n                    mode=\"lines\",\n                    line=dict(width=2, dash=\"dash\"),\n                ),\n                row=comp_row,\n                col=comp_col,\n            )\n        fig.update_xaxes(title_text=\"Time (seconds)\", row=comp_row, col=comp_col)\n        fig.update_yaxes(title_text=\"Latency (ms)\", row=comp_row, col=comp_col)\n\n    # Add throughput plot\n    if num_ops == 2:\n        tp_row, tp_col = 2, 2\n    else:\n        tp_row = rows\n        tp_col = 1\n\n    for op in operations:\n        op_data = ops_data[op]\n        fig.add_trace(\n            go.Scatter(\n                x=op_data[\"times\"],\n                y=op_data[\"ops_per_sec\"],\n                name=f\"{op} ops/sec\",\n                mode=\"lines\",\n                line=dict(width=2),\n            ),\n            row=tp_row,\n            col=tp_col,\n        )\n\n    fig.update_xaxes(title_text=\"Time (seconds)\", row=tp_row, col=tp_col)\n    fig.update_yaxes(title_text=\"Operations per Second\", row=tp_row, col=tp_col)\n\n    # Update layout\n    fig.update_layout(\n        title_text=\"Memtier Benchmark - Latency Analysis (Interactive - Click legend to toggle)\",\n        height=300 * rows,\n        hovermode=\"x unified\",\n        showlegend=True,\n        legend=dict(orientation=\"v\", yanchor=\"top\", y=1, xanchor=\"left\", x=1.02),\n    )\n\n    # Add annotation with statistics\n    stats_lines = [\"<b>Overall Statistics (last 3 seconds excluded):</b><br>\"]\n    for op in operations:\n        op_stats = all_stats[op]\n        stats_lines.append(\n            f\"{op}: Avg={op_stats['Average Latency']:.3f}ms, \"\n            f\"p99={op_stats['Percentile Latencies']['p99.00']:.3f}ms, \"\n            f\"Ops/sec={op_stats['Ops/sec']:.2f}<br>\"\n        )\n    stats_lines.append(f\"Duration: {all_stats['Runtime']['Total duration'] / 1000:.1f}s\")\n\n    fig.add_annotation(\n        text=\"\".join(stats_lines),\n        xref=\"paper\",\n        yref=\"paper\",\n        x=0.5,\n        y=-0.05,\n        showarrow=False,\n        font=dict(size=10),\n        bgcolor=\"wheat\",\n        bordercolor=\"black\",\n        borderwidth=1,\n        xanchor=\"center\",\n        yanchor=\"top\",\n    )\n\n    # Save to HTML\n    fig.write_html(output_file)\n    print(f\"Interactive chart saved to: {output_file}\")\n\n    # Open in browser\n    if open_browser:\n        abs_path = os.path.abspath(output_file)\n        file_url = f\"file://{abs_path}\"\n        print(f\"Opening chart in browser: {file_url}\")\n        webbrowser.open(file_url)\n\n\ndef plot_latency_chart(data, output_file=\"latency_chart.svg\", open_browser=True):\n    \"\"\"\n    Generate latency chart from memtier data.\n\n    Args:\n        data: Parsed JSON data\n        output_file: Output filename for the chart\n        open_browser: If True, open the chart in the browser\n    \"\"\"\n    # Get all available operations from ALL STATS (excluding 'Runtime')\n    all_stats = data[\"ALL STATS\"]\n    operations = [\n        key\n        for key in all_stats.keys()\n        if key != \"Runtime\" and isinstance(all_stats[key], dict) and \"Time-Serie\" in all_stats[key]\n    ]\n\n    if not operations:\n        print(\"Error: No operation data found in JSON\")\n        return\n\n    # Extract data for all operations\n    ops_data = {}\n    for op in operations:\n        ops_data[op] = extract_latency_timeseries(data, op, ignore_last_seconds=3)\n\n    # Determine number of subplots needed\n    num_ops = len(operations)\n    if num_ops == 1:\n        # Single operation: 2x2 grid with detailed views\n        fig, axes = plt.subplots(2, 2, figsize=(16, 12))\n        axes = axes.flatten()\n    elif num_ops == 2:\n        # Two operations: 2x2 grid\n        fig, axes = plt.subplots(2, 2, figsize=(16, 12))\n        axes = axes.flatten()\n    else:\n        # Multiple operations: dynamic grid\n        rows = (num_ops + 1) // 2 + 1\n        fig, axes = plt.subplots(rows, 2, figsize=(16, 4 * rows))\n        axes = axes.flatten()\n\n    fig.suptitle(\"Memtier Benchmark - Latency Analysis\", fontsize=16, fontweight=\"bold\")\n\n    # Plot each operation's latency percentiles\n    for idx, op in enumerate(operations):\n        if idx >= len(axes) - 1:  # Save last plot for throughput\n            break\n\n        ax = axes[idx]\n        op_data = ops_data[op]\n\n        ax.plot(op_data[\"times\"], op_data[\"avg\"], label=\"Average\", linewidth=2)\n        ax.plot(op_data[\"times\"], op_data[\"p50\"], label=\"p50\", linewidth=2)\n        ax.plot(op_data[\"times\"], op_data[\"p99\"], label=\"p99\", linewidth=2)\n        ax.plot(op_data[\"times\"], op_data[\"p99.9\"], label=\"p99.9\", linewidth=2)\n        ax.set_xlabel(\"Time (seconds)\", fontsize=12)\n        ax.set_ylabel(\"Latency (ms)\", fontsize=12)\n        ax.set_title(f\"{op} Operations - Latency Percentiles\", fontsize=14, fontweight=\"bold\")\n        ax.legend(loc=\"best\")\n        ax.grid(True, alpha=0.3)\n\n    # Comparison plot (if multiple operations)\n    if num_ops > 1:\n        comparison_idx = min(num_ops, len(axes) - 2)\n        ax_comp = axes[comparison_idx]\n\n        for op in operations:\n            op_data = ops_data[op]\n            ax_comp.plot(op_data[\"times\"], op_data[\"p99\"], label=f\"{op} p99\", linewidth=2)\n            ax_comp.plot(\n                op_data[\"times\"],\n                op_data[\"avg\"],\n                label=f\"{op} Avg\",\n                linewidth=2,\n                linestyle=\"--\",\n                alpha=0.7,\n            )\n\n        ax_comp.set_xlabel(\"Time (seconds)\", fontsize=12)\n        ax_comp.set_ylabel(\"Latency (ms)\", fontsize=12)\n        ax_comp.set_title(\"Operations Comparison - Latency\", fontsize=14, fontweight=\"bold\")\n        ax_comp.legend(loc=\"best\")\n        ax_comp.grid(True, alpha=0.3)\n\n    # Throughput plot\n    throughput_idx = min(num_ops + 1, len(axes) - 1) if num_ops > 1 else len(axes) - 1\n    ax_throughput = axes[throughput_idx]\n\n    for op in operations:\n        op_data = ops_data[op]\n        ax_throughput.plot(\n            op_data[\"times\"], op_data[\"ops_per_sec\"], label=f\"{op} ops/sec\", linewidth=2\n        )\n\n    ax_throughput.set_xlabel(\"Time (seconds)\", fontsize=12)\n    ax_throughput.set_ylabel(\"Operations per Second\", fontsize=12)\n    ax_throughput.set_title(\"Throughput Over Time\", fontsize=14, fontweight=\"bold\")\n    ax_throughput.legend(loc=\"best\")\n    ax_throughput.grid(True, alpha=0.3)\n\n    # Hide any unused subplots\n    for idx in range(throughput_idx + 1, len(axes)):\n        axes[idx].set_visible(False)\n\n    # Add overall statistics as text\n    stats_lines = [\"Overall Statistics (last 3 seconds excluded):\"]\n    for op in operations:\n        op_stats = all_stats[op]\n        stats_lines.append(\n            f\"{op}: Avg={op_stats['Average Latency']:.3f}ms, \"\n            f\"p99={op_stats['Percentile Latencies']['p99.00']:.3f}ms, \"\n            f\"Ops/sec={op_stats['Ops/sec']:.2f}\"\n        )\n    stats_lines.append(f\"Duration: {all_stats['Runtime']['Total duration'] / 1000:.1f}s\")\n    stats_text = \"\\n\".join(stats_lines)\n\n    fig.text(\n        0.5,\n        0.02,\n        stats_text,\n        ha=\"center\",\n        fontsize=10,\n        bbox=dict(boxstyle=\"round\", facecolor=\"wheat\", alpha=0.5),\n    )\n\n    plt.tight_layout(rect=[0, 0.05, 1, 0.97])\n    plt.savefig(output_file, dpi=300, bbox_inches=\"tight\", format=\"svg\")\n    print(f\"Chart saved to: {output_file}\")\n\n    # Open in browser\n    if open_browser:\n        abs_path = os.path.abspath(output_file)\n        file_url = f\"file://{abs_path}\"\n        print(f\"Opening chart in browser: {file_url}\")\n        webbrowser.open(file_url)\n\n    plt.close()\n\n\ndef print_summary(data):\n    \"\"\"Print summary statistics.\"\"\"\n    print(\"\\n\" + \"=\" * 60)\n    print(\"MEMTIER BENCHMARK SUMMARY\")\n    print(\"=\" * 60)\n\n    config = data[\"configuration\"]\n    runtime = data[\"ALL STATS\"][\"Runtime\"]\n\n    print(f\"\\nConfiguration:\")\n    print(f\"  Server: {config['server']}:{config['port']}\")\n    print(f\"  Clients: {config['clients']}\")\n    print(f\"  Threads: {config['threads']}\")\n    print(f\"  Duration: {runtime['Total duration'] / 1000:.1f}s\")\n    print(f\"  Pipeline: {config['pipeline']}\")\n    print(f\"  Ratio (SET:GET): {config['ratio']}\")\n\n    # Get all operations dynamically\n    all_stats = data[\"ALL STATS\"]\n    operations = [\n        key\n        for key in all_stats.keys()\n        if key != \"Runtime\" and isinstance(all_stats[key], dict) and \"Count\" in all_stats[key]\n    ]\n\n    for op in operations:\n        op_stats = all_stats[op]\n        print(f\"\\n{op} Operations:\")\n        print(f\"  Total: {op_stats['Count']:,}\")\n        print(f\"  Ops/sec: {op_stats['Ops/sec']:.2f}\")\n        print(f\"  Avg Latency: {op_stats['Average Latency']:.3f} ms\")\n        print(f\"  Min Latency: {op_stats['Min Latency']:.3f} ms\")\n        print(f\"  Max Latency: {op_stats['Max Latency']:.3f} ms\")\n        print(f\"  p50: {op_stats['Percentile Latencies']['p50.00']:.3f} ms\")\n        print(f\"  p99: {op_stats['Percentile Latencies']['p99.00']:.3f} ms\")\n        print(f\"  p99.9: {op_stats['Percentile Latencies']['p99.90']:.3f} ms\")\n\n    print(\"\\n\" + \"=\" * 60 + \"\\n\")\n\n\ndef main():\n    \"\"\"Main function.\"\"\"\n    import sys\n\n    # Get input file from command line or use default\n    if len(sys.argv) > 1:\n        input_file = sys.argv[1]\n    else:\n        input_file = \"memtier_out.json\"\n\n    # Get output file from command line or use default\n    if len(sys.argv) > 2:\n        output_file = sys.argv[2]\n    else:\n        # Use .html for interactive charts by default\n        output_file = \"latency_chart.html\" if PLOTLY_AVAILABLE else \"latency_chart.svg\"\n\n    # Check if input file exists\n    if not Path(input_file).exists():\n        print(f\"Error: Input file '{input_file}' not found!\")\n        print(f\"\\nUsage: {sys.argv[0]} [input_file.json] [output_file.html|.svg]\")\n        print(f\"\\nTo generate the JSON file, run memtier_benchmark with --json-out-file:\")\n        print(f\"  memtier_benchmark --server <host> --port <port> \\\\\")\n        print(f\"      --json-out-file memtier_out.json \\\\\")\n        print(f\"      [other options...]\")\n        sys.exit(1)\n\n    # Load and process data\n    print(f\"Loading data from: {input_file}\")\n    data = load_json_data(input_file)\n\n    # Print summary\n    print_summary(data)\n\n    # Generate chart\n    print(f\"Generating latency chart...\")\n\n    # Use interactive chart if output is .html, otherwise use matplotlib\n    if output_file.endswith(\".html\"):\n        plot_latency_chart_interactive(data, output_file)\n    else:\n        plot_latency_chart(data, output_file)\n\n    print(f\"\\nDone!\")\n    if PLOTLY_AVAILABLE:\n        print(f\"Tip: Use .html extension for interactive charts with toggleable series\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tools/release.sh",
    "content": "#!/usr/bin/env sh\n\nRELEASE_DIR=build-release\nAPP_PATH=$RELEASE_DIR/dragonfly\n\nset -e\n\nif ! [ -f \"helio/blaze.sh\" ]; then\n   echo \"ERROR\"\n   echo \"Could not find helio. Please only run this script from repo root.\"\n   echo \"If you are already on the repo root, you might've cloned without submodules.\"\n   echo \"Try running 'git submodule update --init --recursive'\"\n   exit 1\nfi\n\npwd\n\nmake release\n\nif ! [ -f ${APP_PATH} ]; then\n   echo \"ERROR\"\n   echo \"Failed to generate new dragonfly binary.\"\n   exit 1\nfi\n\necho \"Running ${APP_PATH} --version\"\n${APP_PATH} --version\n\nif readelf -a ${APP_PATH} | grep GLIBC_PRIVATE >/dev/null 2>&1 ; then\n   echo \"ERROR\"\n   echo \"The generated binary contain invalid GLIBC version entries.\"\n   exit 1\nfi\n\nmake package\necho \"Release package created: \"\nls -lh $RELEASE_DIR/\n"
  },
  {
    "path": "tools/replay/go.mod",
    "content": "module dragonfydb.io/traffic-replay\n\ngo 1.18\n\nrequire (\n\tgithub.com/influxdata/tdigest v0.0.1\n\tgithub.com/pterm/pterm v0.12.25\n\tgithub.com/redis/go-redis/v9 v9.7.3\n)\n\nrequire (\n\tgithub.com/atomicgo/cursor v0.0.1 // indirect\n\tgithub.com/cespare/xxhash/v2 v2.2.0 // indirect\n\tgithub.com/davecgh/go-spew v1.1.1 // indirect\n\tgithub.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect\n\tgithub.com/gookit/color v1.4.2 // indirect\n\tgithub.com/mattn/go-runewidth v0.0.13 // indirect\n\tgithub.com/pmezard/go-difflib v1.0.0 // indirect\n\tgithub.com/rivo/uniseg v0.2.0 // indirect\n\tgithub.com/stretchr/testify v1.7.0 // indirect\n\tgithub.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 // indirect\n\tgolang.org/x/sys v0.1.0 // indirect\n\tgolang.org/x/term v0.0.0-20210220032956-6a3ed077a48d // indirect\n\tgopkg.in/yaml.v3 v3.0.1 // indirect\n)\n"
  },
  {
    "path": "tools/replay/go.sum",
    "content": "github.com/atomicgo/cursor v0.0.1 h1:xdogsqa6YYlLfM+GyClC/Lchf7aiMerFiZQn7soTOoU=\ngithub.com/atomicgo/cursor v0.0.1/go.mod h1:cBON2QmmrysudxNBFthvMtN32r3jxVRIvzkUiF/RuIk=\ngithub.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=\ngithub.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=\ngithub.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=\ngithub.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=\ngithub.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=\ngithub.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=\ngithub.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=\ngithub.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=\ngithub.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=\ngithub.com/gookit/color v1.4.2 h1:tXy44JFSFkKnELV6WaMo/lLfu/meqITX3iAV52do7lk=\ngithub.com/gookit/color v1.4.2/go.mod h1:fqRyamkC1W8uxl+lxCQxOT09l/vYfZ+QeiX3rKQHCoQ=\ngithub.com/influxdata/tdigest v0.0.1 h1:XpFptwYmnEKUqmkcDjrzffswZ3nvNeevbUSLPP/ZzIY=\ngithub.com/influxdata/tdigest v0.0.1/go.mod h1:Z0kXnxzbTC2qrx4NaIzYkE1k66+6oEDQTvL95hQFh5Y=\ngithub.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=\ngithub.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=\ngithub.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=\ngithub.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/pterm/pterm v0.12.25 h1:l9a8FU4XmJHs6rug8/YV1L2g/LBdMgaTvBBgwxD8avI=\ngithub.com/pterm/pterm v0.12.25/go.mod h1:PhQ89w4i95rhgE+xedAoqous6K9X+r6aSOI2eFF7DZI=\ngithub.com/redis/go-redis/v9 v9.7.3 h1:YpPyAayJV+XErNsatSElgRZZVCwXX9QzkKYNvO7x0wM=\ngithub.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRltRTZHSvrA=\ngithub.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=\ngithub.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=\ngithub.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=\ngithub.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=\ngithub.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=\ngithub.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 h1:QldyIu/L63oPpyvQmHgvgickp1Yw510KJOqX7H24mg8=\ngithub.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs=\ngolang.org/x/exp v0.0.0-20180321215751-8460e604b9de h1:xSjD6HQTqT0H/k60N5yYBtnN1OEkVy7WIo/DYyxKRO0=\ngolang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=\ngolang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=\ngolang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/term v0.0.0-20210220032956-6a3ed077a48d h1:SZxvLBoTP5yHO3Frd4z4vrF+DBX9vMVanchswa69toE=\ngolang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=\ngolang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=\ngonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca h1:PupagGYwj8+I4ubCxcmcBRk3VlUWtTg5huQpZR9flmE=\ngonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=\ngonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=\ngopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=\ngopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\ngopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\ngopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=\ngopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\n"
  },
  {
    "path": "tools/replay/main.go",
    "content": "package main\n\nimport (\n\t\"flag\"\n\t\"fmt\"\n\t\"os\"\n\t\"sort\"\n\t\"strings\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\t\"github.com/pterm/pterm\"\n)\n\nvar fHost = flag.String(\"host\", \"127.0.0.1:6379\", \"Redis host\")\nvar fCompareHost = flag.String(\"compare-host\", \"\", \"Redis host to compare with\")\nvar fClientBuffer = flag.Int(\"buffer\", 100, \"How many records to buffer per client\")\nvar fPace = flag.Bool(\"pace\", true, \"whether to pace the traffic according to the original timings.false - to pace as fast as possible\")\nvar fSkip = flag.Uint(\"skip\", 0, \"skip N records\")\nvar fSkipTimeSec = flag.Int(\"skip-time-sec\", 0, \"skip records in the first N seconds of the recording\")\nvar fIgnoreParseErrors = flag.Bool(\"ignore-parse-errors\", false, \"ignore parsing errors\")\nvar fTimeLimit = flag.Int(\"time-limit\", 0, \"time limit in seconds (0 = no limit)\")\n\nfunc RenderTable(area *pterm.AreaPrinter, files []string, workers []FileWorker) {\n\ttableData := pterm.TableData{{\"file\", \"parsed\", \"processed\", \"delayed\", \"clients\", \"avg(us)\", \"p75(us)\", \"p90(us)\", \"p99(us)\", \"p99.9(us)\"}}\n\tfor i := range workers {\n\t\tworkers[i].latencyMu.Lock()\n\t\tavg := 0.0\n\t\tif workers[i].latencyCount > 0 {\n\t\t\tavg = workers[i].latencySum / float64(workers[i].latencyCount)\n\t\t}\n\t\tp75 := workers[i].latencyDigest.Quantile(0.75)\n\t\tp90 := workers[i].latencyDigest.Quantile(0.9)\n\t\tp99 := workers[i].latencyDigest.Quantile(0.99)\n\t\tp999 := workers[i].latencyDigest.Quantile(0.999)\n\t\tworkers[i].latencyMu.Unlock()\n\t\ttableData = append(tableData, []string{\n\t\t\tfiles[i],\n\t\t\tfmt.Sprint(atomic.LoadUint64(&workers[i].parsed)),\n\t\t\tfmt.Sprint(atomic.LoadUint64(&workers[i].processed)),\n\t\t\tfmt.Sprint(atomic.LoadUint64(&workers[i].delayed)),\n\t\t\tfmt.Sprint(atomic.LoadUint64(&workers[i].clients)),\n\t\t\tfmt.Sprintf(\"%.0f\", avg),\n\t\t\tfmt.Sprintf(\"%.0f\", p75),\n\t\t\tfmt.Sprintf(\"%.0f\", p90),\n\t\t\tfmt.Sprintf(\"%.0f\", p99),\n\t\t\tfmt.Sprintf(\"%.0f\", p999),\n\t\t})\n\t}\n\tcontent, _ := pterm.DefaultTable.WithHasHeader().WithBoxed().WithData(tableData).Srender()\n\tarea.Update(content)\n}\n\n// RenderPipelineRangesTable renders the latency digests for each pipeline range\nfunc RenderPipelineRangesTable(area *pterm.AreaPrinter, files []string, workers []FileWorker) {\n\ttableData := pterm.TableData{{\"file\", \"Pipeline Range\", \"p75(us)\", \"p90(us)\", \"p99(us)\", \"p99.9(us)\"}}\n\tfor i := range workers {\n\t\tworkers[i].latencyMu.Lock()\n\t\tfor _, rng := range pipelineRanges {\n\t\t\tif digest, ok := workers[i].perRange[rng.label]; ok {\n\t\t\t\tp75 := digest.Quantile(0.75)\n\t\t\t\tp90 := digest.Quantile(0.9)\n\t\t\t\tp99 := digest.Quantile(0.99)\n\t\t\t\tp999 := digest.Quantile(0.999)\n\t\t\t\ttableData = append(tableData, []string{\n\t\t\t\t\tfiles[i],\n\t\t\t\t\trng.label,\n\t\t\t\t\tfmt.Sprintf(\"%.0f\", p75),\n\t\t\t\t\tfmt.Sprintf(\"%.0f\", p90),\n\t\t\t\t\tfmt.Sprintf(\"%.0f\", p99),\n\t\t\t\t\tfmt.Sprintf(\"%.0f\", p999),\n\t\t\t\t})\n\t\t\t}\n\t\t}\n\t\tworkers[i].latencyMu.Unlock()\n\t}\n\tcontent, _ := pterm.DefaultTable.WithHasHeader().WithBoxed().WithData(tableData).Srender()\n\tarea.Update(content)\n}\n\nfunc Run(files []string) {\n\tbaseTime := DetermineBaseTime(files)\n\n\tvar skipUntil uint64\n\teffectiveBaseTime := baseTime\n\tif *fSkipTimeSec > 0 {\n\t\tskipDuration := time.Duration(*fSkipTimeSec) * time.Second\n\t\tskipUntil = uint64(baseTime.Add(skipDuration).UnixNano())\n\t\teffectiveBaseTime = baseTime.Add(skipDuration)\n\t}\n\ttimeOffset := time.Now().Add(500 * time.Millisecond).Sub(effectiveBaseTime)\n\tfmt.Println(\"Offset -> \", timeOffset)\n\n\t// Calculate stop time based on recording timestamps if time limit is specified\n\tvar stopUntil uint64\n\tif *fTimeLimit > 0 {\n\t\tlimitDuration := time.Duration(*fTimeLimit) * time.Second\n\t\tstopUntil = uint64(effectiveBaseTime.Add(limitDuration).UnixNano())\n\t\tfmt.Printf(\"Time limit set to %d seconds\\n\", *fTimeLimit)\n\t}\n\n\t// Start a worker for every file. They take care of spawning client workers.\n\tvar wg sync.WaitGroup\n\tworkers := make([]FileWorker, len(files))\n\tfor i := range workers {\n\t\tworkers[i] = FileWorker{timeOffset: timeOffset, skipUntil: skipUntil, stopUntil: stopUntil}\n\t\twg.Add(1)\n\t\tgo workers[i].Run(files[i], &wg)\n\t}\n\n\twgDone := make(chan bool)\n\tgo func() {\n\t\twg.Wait()\n\t\twgDone <- true\n\t}()\n\n\t// Render table while running\n\tarea, _ := pterm.DefaultArea.WithCenter().Start()\n\tfor running := true; running; {\n\t\tselect {\n\t\tcase <-wgDone:\n\t\t\trunning = false\n\t\tcase <-time.After(100 * time.Millisecond):\n\t\t\tRenderTable(area, files, workers)\n\t\t}\n\t}\n\n\tRenderTable(area, files, workers) // to show last stats\n\tareaPipelineRanges, _ := pterm.DefaultArea.WithCenter().Start()\n\tRenderPipelineRangesTable(areaPipelineRanges, files, workers) // to render per pipeline-range latency digests\n}\n\nfunc Print(files []string) {\n\ttype StreamTop struct {\n\t\trecord Record\n\t\tch     chan Record\n\t}\n\n\t// Start file reader goroutines\n\tvar wg sync.WaitGroup\n\twg.Add(len(files))\n\n\ttops := make([]StreamTop, len(files))\n\tfor i, file := range files {\n\t\ttops[i].ch = make(chan Record, 100)\n\t\tgo func(ch chan Record, file string) {\n\t\t\tparseRecords(file, func(r Record) bool {\n\t\t\t\tch <- r\n\t\t\t\treturn true\n\t\t\t}, *fIgnoreParseErrors)\n\t\t\tclose(ch)\n\t\t\twg.Done()\n\t\t}(tops[i].ch, file)\n\t}\n\n\t// Pick record with minimum time from each channel\n\tfor {\n\t\tminTime := ^uint64(0)\n\t\tminIndex := -1\n\t\tfor i := range tops {\n\t\t\tif tops[i].record.Time == 0 {\n\t\t\t\tif r, ok := <-tops[i].ch; ok {\n\t\t\t\t\ttops[i].record = r\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif rt := tops[i].record.Time; rt > 0 && rt < minTime {\n\t\t\t\tminTime = rt\n\t\t\t\tminIndex = i\n\t\t\t}\n\t\t}\n\n\t\tif minIndex == -1 {\n\t\t\tbreak\n\t\t}\n\n\t\tfmt.Println(tops[minIndex].record.values...)\n\t\ttops[minIndex].record = Record{}\n\t}\n\n\twg.Wait()\n}\n\nfunc Analyze(files []string) {\n\ttotal := 0\n\tchained := 0\n\tclients := 0\n\tcmdCounts := make(map[string]uint)\n\n\t// count stats\n\tfor _, file := range files {\n\t\tfileClients := make(map[uint32]bool)\n\n\t\tparseRecords(file, func(r Record) bool {\n\t\t\ttotal += 1\n\t\t\tif r.HasMore > 0 {\n\t\t\t\tchained += 1\n\t\t\t}\n\n\t\t\tfileClients[r.Client] = true\n\t\t\tcmdCounts[r.values[0].(string)] += 1\n\n\t\t\treturn true\n\t\t}, *fIgnoreParseErrors)\n\n\t\tclients += len(fileClients)\n\t}\n\n\t// sort commands by frequencies\n\ttype Freq struct {\n\t\tcmd   string\n\t\tcount uint\n\t}\n\tvar sortedCmds []Freq\n\tfor cmd, count := range cmdCounts {\n\t\tsortedCmds = append(sortedCmds, Freq{cmd, count})\n\t}\n\tsort.Slice(sortedCmds, func(i, j int) bool {\n\t\treturn sortedCmds[i].count > sortedCmds[j].count\n\t})\n\n\t// Print all the info\n\tfmt.Println(\"Total commands\", total)\n\tfmt.Println(\"Has more%\", 100*float32(chained)/float32(total))\n\tfmt.Println(\"Total clients\", clients)\n\n\tfor _, freq := range sortedCmds {\n\t\tfmt.Printf(\"%8d | %v \\n\", freq.count, freq.cmd)\n\t}\n}\n\nfunc main() {\n\tflag.Usage = func() {\n\t\tbinaryName := os.Args[0]\n\n\t\tfmt.Fprintf(os.Stderr, \"Usage: %s [options] <command> <files...>\\n\", binaryName)\n\t\tfmt.Fprintln(os.Stderr, \"\\nOptions:\")\n\t\tflag.PrintDefaults()\n\t\tfmt.Fprintln(os.Stderr, \"\\nCommands:\")\n\t\tfmt.Fprintln(os.Stderr, \"  run  - replays the traffic\")\n\t\tfmt.Fprintln(os.Stderr, \"  print - prints the command\")\n\t\tfmt.Fprintln(os.Stderr, \"  analyze - analyzes the traffic\")\n\n\t\tfmt.Fprintln(os.Stderr, \"\\nExamples:\")\n\t\tfmt.Fprintf(os.Stderr, \"   %s -host 192.168.1.10:6379 -buffer 50 run *.bin\\n\", binaryName)\n\t\tfmt.Fprintf(os.Stderr, \"   %s -skip-time-sec 30 run *.bin\\n\", binaryName)\n\t\tfmt.Fprintf(os.Stderr, \"   %s -time-limit 60 run *.bin\\n\", binaryName)\n\t\tfmt.Fprintf(os.Stderr, \"   %s print *.bin\\n\", binaryName)\n\t}\n\n\tflag.Parse()\n\tif flag.NArg() < 2 {\n\t\tflag.Usage()\n\t\tos.Exit(1)\n\t}\n\n\tcmd := flag.Arg(0)\n\tfiles := flag.Args()[1:]\n\n\tswitch strings.ToLower(cmd) {\n\tcase \"run\":\n\t\tRun(files)\n\tcase \"print\":\n\t\tPrint(files)\n\tcase \"analyze\":\n\t\tAnalyze(files)\n\t}\n}\n"
  },
  {
    "path": "tools/replay/parsing.go",
    "content": "package main\n\nimport (\n\t\"bufio\"\n\t\"encoding/binary\"\n\t\"io\"\n\t\"log\"\n\t\"os\"\n)\n\nvar kBigEmptyBytes = make([]byte, 100_000)\n\nfunc parseStrings(file io.Reader) (out []interface{}, err error) {\n\tvar num, strLen uint32\n\terr = binary.Read(file, binary.LittleEndian, &num)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\tout = make([]interface{}, num)\n\tfor i := range out {\n\t\terr = binary.Read(file, binary.LittleEndian, &strLen)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\t\tout[i] = strLen\n\t}\n\n\tfor i := range out {\n\t\tstrLen = out[i].(uint32)\n\t\tbuf := make([]byte, strLen)\n\n\t\t_, err := io.ReadFull(file, buf)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\n\t\tout[i] = string(buf)\n\t}\n\treturn\n}\n\nfunc parseRecords(filename string, cb func(Record) bool, ignoreErrors bool) error {\n\tfile, err := os.Open(filename)\n\tif err != nil {\n\t\treturn err\n\t}\n\tdefer file.Close()\n\n\treader := bufio.NewReader(file)\n\n\tvar version uint8\n\tbinary.Read(reader, binary.LittleEndian, &version)\n\tif version != 2 {\n\t\tpanic(\"Requires version two replayer, roll back in commits!\")\n\t}\n\n\trecordNum := 0\n\tfor {\n\t\tvar rec Record\n\t\terr := binary.Read(reader, binary.LittleEndian, &rec.RecordHeader)\n\t\tif err != nil {\n\t\t\tif err == io.EOF {\n\t\t\t\tbreak\n\t\t\t}\n\t\t\treturn err\n\t\t}\n\n\t\trec.values, err = parseStrings(reader)\n\t\tif err != nil {\n\t\t\tlog.Printf(\"Could not parse %vth record\", recordNum)\n\t\t\tif !ignoreErrors {\n\t\t\t\treturn err\n\t\t\t}\n\t\t\tlog.Printf(\"Ignoring parse error and continuing\")\n\t\t\trecordNum++\n\t\t\tcontinue\n\t\t}\n\n\t\tif !cb(rec) {\n\t\t\treturn nil\n\t\t}\n\t\trecordNum++\n\t}\n\n\treturn nil\n}\n"
  },
  {
    "path": "tools/replay/workers.go",
    "content": "package main\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"log\"\n\t\"math\"\n\t\"strings\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\t\"github.com/influxdata/tdigest\"\n\t\"github.com/redis/go-redis/v9\"\n)\n\ntype RecordHeader struct {\n\tClient  uint32\n\tTime    uint64\n\tDbIndex uint32\n\tHasMore uint32\n}\n\ntype Record struct {\n\tRecordHeader\n\tvalues []interface{} // instead of []string to unwrap into variadic\n}\n\n// Determine earliest time\nfunc DetermineBaseTime(files []string) time.Time {\n\tvar minTime uint64 = math.MaxUint64\n\tfor _, file := range files {\n\t\tparseRecords(file, func(r Record) bool {\n\t\t\tif r.Time < minTime {\n\t\t\t\tminTime = r.Time\n\t\t\t}\n\t\t\treturn false\n\t\t}, *fIgnoreParseErrors)\n\t}\n\treturn time.Unix(0, int64(minTime))\n}\n\n// Handles a single connection/client\ntype ClientWorker struct {\n\tredis     *redis.Client\n\tcompare     *redis.Client\n\tincoming  chan Record\n\tprocessed uint\n\tpipe      redis.Pipeliner\n\tcomparePipe redis.Pipeliner\n}\n\n// Pipeline length ranges for summary\nvar pipelineRanges = []struct {\n\tlabel string\n\tmin   int\n\tmax   int // inclusive, except last\n}{\n\t{\"0-29\", 0, 29},\n\t{\"30-79\", 30, 79},\n\t{\"80-199\", 80, 199},\n\t{\"200+\", 200, 1 << 30},\n}\n\nvar compareIgnoreCmds = []string{\n    \"HELLO\",\n    \"AUTH\",\n    \"SELECT\",\n    \"INFO\",\n    \"TIME\",\n    \"CLIENT\",\n    \"CONFIG\",\n}\n\n// Handles a single file and distributes messages to clients\ntype FileWorker struct {\n\tclientGroup sync.WaitGroup\n\ttimeOffset  time.Duration\n\tskipUntil   uint64\n\tstopUntil   uint64 // timestamp when to stop processing traffic (0 = no limit)\n\t// stats for output, updated by clients, read by rendering goroutine\n\tprocessed uint64\n\tdelayed   uint64\n\tparsed    uint64\n\tclients   uint64\n\n\tlatencyDigest *tdigest.TDigest\n\tlatencyMu     sync.Mutex\n\n\tlatencySum   float64 // sum of all batch latencies (microseconds)\n\tlatencyCount uint64  // number of batches\n\n\t// per-pipeline-range latency digests\n\tperRange map[string]*tdigest.TDigest\n}\n\n// Helper function to track latency and update digests\nfunc trackLatency(worker *FileWorker, batchLatency float64, size int) {\n\tworker.latencyMu.Lock()\n\tdefer worker.latencyMu.Unlock()\n\tworker.latencyDigest.Add(batchLatency, 1)\n\tworker.latencySum += batchLatency\n\tworker.latencyCount++\n\t// Add to per-range digest\n\tif worker.perRange != nil {\n\t\tfor _, rng := range pipelineRanges {\n\t\t\tif size >= rng.min && size <= rng.max {\n\t\t\t\tworker.perRange[rng.label].Add(batchLatency, 1)\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\t}\n}\n\nfunc ignoreCompareCmd(c redis.Cmder) bool {\n    args := c.Args()\n    if len(args) == 0 {\n        return true\n    }\n    name := strings.ToUpper(fmt.Sprint(args[0]))\n    for _, ign := range compareIgnoreCmds {\n        if name == ign {\n            return true\n        }\n    }\n    return false\n}\n\nfunc cmdAsString(c redis.Cmder) string {\n    args := c.Args()\n    if len(args) == 0 {\n        return \"<no-args>\"\n    }\n\n    name := strings.ToUpper(fmt.Sprint(args[0]))\n    if len(args) == 1 {\n        return name\n    }\n\n    parts := make([]string, 0, len(args) - 1)\n    for _, a := range args[1:] {\n        s := fmt.Sprint(a)\n        parts = append(parts, s)\n    }\n    return name + \" \" + strings.Join(parts, \" \")\n}\n\nfunc cmdResultString(cm redis.Cmder) string {\n    if err := cm.Err(); err != nil {\n        if err == redis.Nil {\n            return \"(nil)\"\n        }\n        return \"ERR: \" + err.Error()\n    }\n\n    if cmd, ok := cm.(*redis.Cmd); ok {\n        v := cmd.Val()\n        s := fmt.Sprintf(\"%v\", v)\n        return s\n    }\n\n    return fmt.Sprintf(\"<unknown Cmder %T>\", cm)\n}\n\nfunc compareCmdResults(a, b []redis.Cmder, lastMsg Record) {\n    if len(a) != len(b) {\n        log.Fatalf(\"[COMPARE] mismatch count: primary=%d compare=%d (last client=%d time=%d)\", len(a), len(b), lastMsg.Client, lastMsg.Time)\n        return\n    }\n\n    for i := range a {\n\t\tif (ignoreCompareCmd(a[i])) {\n\t\t\tcontinue\n\t\t}\n\t\tpa := cmdResultString(a[i])\n        pb := cmdResultString(b[i])\n        if pa != pb {\n\t\t\tlog.Fatalf(\"[COMPARE] mismatch at idx %d cmd=%s\\n  primary=%s\\n  compare=%s\\n  (client=%d time=%d)\", i, cmdAsString(a[i]), pa, pb, lastMsg.Client, lastMsg.Time)\n\t\t}\n    }\n}\n\nfunc (c *ClientWorker) Run(pace bool, worker *FileWorker) {\n\tfor msg := range c.incoming {\n\t\tif c.processed == 0 && msg.DbIndex != 0 {\n\t\t\t// There is no easy way to switch, we rely on connection pool consisting only of one connection\n\t\t\tc.redis.Do(context.Background(), []interface{}{\"SELECT\", fmt.Sprint(msg.DbIndex)})\n\t\t\tif c.compare != nil {\n        \t\tc.compare.Do(context.Background(), []interface{}{\"SELECT\", fmt.Sprint(msg.DbIndex)})\n    \t\t}\n\t\t}\n\n\t\tlag := time.Until(worker.HappensAt(time.Unix(0, int64(msg.Time))))\n\t\tif lag < 0 {\n\t\t\tatomic.AddUint64(&worker.delayed, 1)\n\t\t}\n\n\t\tif pace {\n\t\t\ttime.Sleep(lag)\n\t\t}\n\n\t\tc.pipe.Do(context.Background(), msg.values...).Result()\n\t\tif c.comparePipe != nil {\n    \t\tc.comparePipe.Do(context.Background(), msg.values...).Result()\n\t\t}\n\n\t\tatomic.AddUint64(&worker.processed, 1)\n\n\t\tif msg.HasMore == 0 {\n\t\t\tsize := c.pipe.Len()\n\t\t\tstart := time.Now()\n\t\t\tcmds, _ := c.pipe.Exec(context.Background())\n\t\t\tbatchLatency := float64(time.Since(start).Microseconds())\n\t\t\ttrackLatency(worker, batchLatency, size)\n\t\t\tc.processed += uint(size)\n\n    \t\tif c.comparePipe != nil {\n        \t\tccmds, _ := c.comparePipe.Exec(context.Background())\n        \t\tcompareCmdResults(cmds, ccmds, msg)\n    \t\t}\n\t\t}\n\t}\n\n\tif size := c.pipe.Len(); size >= 0 {\n\t\tstart := time.Now()\n\t\tc.pipe.Exec(context.Background())\n\t\tbatchLatency := float64(time.Since(start).Microseconds())\n\t\ttrackLatency(worker, batchLatency, size)\n\t\tc.processed += uint(size)\n\t}\n\n\tworker.clientGroup.Done()\n}\n\nfunc NewClient(w *FileWorker, pace bool) *ClientWorker {\n\tclient := &ClientWorker{\n\t\tredis:    redis.NewClient(&redis.Options{Addr: *fHost, PoolSize: 1, DisableIndentity: true}),\n\t\tincoming: make(chan Record, *fClientBuffer),\n\t}\n\tclient.pipe = client.redis.Pipeline()\n\n\tif *fCompareHost != \"\" {\n        client.compare = redis.NewClient(&redis.Options{Addr: *fCompareHost, PoolSize: 1, DisableIndentity: true})\n        client.comparePipe = client.compare.Pipeline()\n    }\n\n\tatomic.AddUint64(&w.clients, 1)\n\tw.clientGroup.Add(1)\n\tgo client.Run(pace, w)\n\treturn client\n}\n\nfunc (w *FileWorker) Run(file string, wg *sync.WaitGroup) {\n\tw.latencyDigest = tdigest.NewWithCompression(1000)\n\tw.perRange = make(map[string]*tdigest.TDigest)\n\tfor _, rng := range pipelineRanges {\n\t\tw.perRange[rng.label] = tdigest.NewWithCompression(500)\n\t}\n\tclients := make(map[uint32]*ClientWorker, 0)\n\trecordId := uint64(0)\n\terr := parseRecords(file, func(r Record) bool {\n\t\tclient, ok := clients[r.Client]\n\t\tif !ok {\n\t\t\tclient = NewClient(w, *fPace)\n\t\t\tclients[r.Client] = client\n\t\t}\n\t\tcmdName := strings.ToLower(r.values[0].(string))\n\t\trecordId += 1\n\t\tif cmdName != \"eval\" && recordId < uint64(*fSkip) {\n\t\t\treturn true\n\t\t}\n\n\t\tif w.skipUntil > 0 && r.Time < w.skipUntil {\n\t\t\treturn true\n\t\t}\n\n\t\tatomic.AddUint64(&w.parsed, 1)\n\n\t\tif w.stopUntil > 0 && r.Time > w.stopUntil {\n\t\t\treturn true\n\t\t}\n\n\t\tclient.incoming <- r\n\t\treturn true\n\t}, *fIgnoreParseErrors)\n\n\tif err != nil {\n\t\tlog.Fatalf(\"Could not parse records for file %s: %v\", file, err)\n\t}\n\n\tfor _, client := range clients {\n\t\tclose(client.incoming)\n\t}\n\tw.clientGroup.Wait()\n\twg.Done()\n}\n\nfunc (w *FileWorker) HappensAt(recordTime time.Time) time.Time {\n\treturn recordTime.Add(w.timeOffset)\n}\n"
  },
  {
    "path": "tools/requirements.txt",
    "content": "aioredis==2.0.1\nasync_timeout==4.0.2\npytoml==0.1.21\nPyYAML==6.0\nrailroad==0.5.0\nredis==4.4.4\nrequests>=2.32.0\naiocsv==1.2.3\naiofiles==22.1.0\nnumpy==2.1.3\n"
  },
  {
    "path": "tools/run_master_replica.sh",
    "content": "#!/usr/bin/env bash\nset -euo pipefail\n\nROOT_DIR=\"$(cd \"$(dirname \"${BASH_SOURCE[0]}\")/..\" && pwd)\"\nDFLY_BIN=\"${DFLY_BIN:-${ROOT_DIR}/build-dbg/dragonfly}\"\nMASTER_PORT=\"${MASTER_PORT:-6379}\"\nREPLICA_PORT=\"${REPLICA_PORT:-6380}\"\n\nif [[ ! -x \"${DFLY_BIN}\" ]]; then\n    echo \"Dragonfly binary not found. Build it first (e.g., cd build-dbg && ninja dragonfly).\" >&2\n    exit 1\nfi\n\nMASTER_DIR=\"$(mktemp -d -t dfly-master-XXXXXX)\"\nREPLICA_DIR=\"$(mktemp -d -t dfly-replica-XXXXXX)\"\nMASTER_LOG_DIR=\"$(mktemp -d -t dfly-master-logs-XXXXXX)\"\nREPLICA_LOG_DIR=\"$(mktemp -d -t dfly-replica-logs-XXXXXX)\"\n\ncleanup() {\n  if [[ -n \"${REPLICA_PID:-}\" ]]; then\n    kill \"${REPLICA_PID}\" 2>/dev/null || true\n  fi\n  if [[ -n \"${MASTER_PID:-}\" ]]; then\n    kill \"${MASTER_PID}\" 2>/dev/null || true\n  fi\n  rm -rf \"${MASTER_DIR}\" \"${REPLICA_DIR}\" \"${MASTER_LOG_DIR}\" \"${REPLICA_LOG_DIR}\"\n}\ntrap cleanup EXIT\n\nset -x\necho \"Starting master on port ${MASTER_PORT} (threads=4, shards=3)...\"\n\"${DFLY_BIN}\" \\\n  --port \"${MASTER_PORT}\" --dir \"${MASTER_DIR}\" --omit_basic_usage \\\n  --dbfilename=\"\" \\\n  --proactor_threads 4 --num_shards 3 \\\n  --log_dir \"${MASTER_LOG_DIR}\" &\nMASTER_PID=$!\n\necho \"Starting replica on port ${REPLICA_PORT} (threads=3, shards=2)...\"\n\"${DFLY_BIN}\" \\\n  --port \"${REPLICA_PORT}\" --dir \"${REPLICA_DIR}\" --omit_basic_usage \\\n  --dbfilename=\"\" \\\n  --proactor_threads 3 --num_shards 2 \\\n  --replicaof \"127.0.0.1:${MASTER_PORT}\" \\\n  --log_dir \"${REPLICA_LOG_DIR}\" \\\n  --replicaof_no_one_start_journal &\nREPLICA_PID=$!\n\nset +x\nsleep 0.5\necho -e \"\\n\\n\\nReplication running. Master PID: ${MASTER_PID}, Replica PID: ${REPLICA_PID}\"\necho \"Master port: ${MASTER_PORT}, Replica port: ${REPLICA_PORT}\"\necho \"Press Ctrl+C to stop.\"\n\nwait \"${MASTER_PID}\" \"${REPLICA_PID}\"\n"
  },
  {
    "path": "tools/vector-benches/README.md",
    "content": "Tool for benchmarking vector search with randomized vectors.\n\n## Logic\nThe tool connects to the Redis/Dragonfly instance.\n1. Checks if the database has enough data (at least 50% of requested `-n`). If not, it **flushes the DB** and generates random vectors.\n2. Checks if the index `idx` exists. If not, it creates it.\n3. Runs concurrent search queries and reports latency/QPS.\n\n## Arguments\n\n| Flag | Default | Description |\n|------|---------|-------------|\n| `-n` | 50000 | **Number of vectors** to populate if the DB is empty. |\n| `-q` | 1000 | **Total number of queries** to run during the benchmark. |\n| `-t` | 8 | **Query threads**. Number of concurrent workers sending queries. |\n| `-d` | 100 | **Vector dimension**. Size of the float32 vectors. |\n| `-k` | 10 | **Top K**. Number of nearest neighbors to retrieve per query. |\n| `-p` | 6379 | **Port** of the server. |\n| `-h` | localhost | **Host** of the server. |\n\nRun with `-h` (help) to see these defaults in the tool itself.\n"
  },
  {
    "path": "tools/vector-benches/go.mod",
    "content": "module dragonfly.vsbench\n\ngo 1.24.5\n\nrequire (\n\tatomicgo.dev/cursor v0.2.0 // indirect\n\tatomicgo.dev/keyboard v0.2.9 // indirect\n\tatomicgo.dev/schedule v0.1.0 // indirect\n\tgithub.com/cespare/xxhash/v2 v2.3.0 // indirect\n\tgithub.com/containerd/console v1.0.5 // indirect\n\tgithub.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect\n\tgithub.com/gomodule/redigo v1.8.3 // indirect\n\tgithub.com/gookit/color v1.5.4 // indirect\n\tgithub.com/lithammer/fuzzysearch v1.1.8 // indirect\n\tgithub.com/mattn/go-runewidth v0.0.16 // indirect\n\tgithub.com/pterm/pterm v0.12.82 // indirect\n\tgithub.com/redis/go-redis/v9 v9.14.1 // indirect\n\tgithub.com/rivo/uniseg v0.4.7 // indirect\n\tgithub.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect\n\tgolang.org/x/sys v0.33.0 // indirect\n\tgolang.org/x/term v0.32.0 // indirect\n\tgolang.org/x/text v0.26.0 // indirect\n)\n"
  },
  {
    "path": "tools/vector-benches/go.sum",
    "content": "atomicgo.dev/cursor v0.2.0 h1:H6XN5alUJ52FZZUkI7AlJbUc1aW38GWZalpYRPpoPOw=\natomicgo.dev/cursor v0.2.0/go.mod h1:Lr4ZJB3U7DfPPOkbH7/6TOtJ4vFGHlgj1nc+n900IpU=\natomicgo.dev/keyboard v0.2.9 h1:tOsIid3nlPLZ3lwgG8KZMp/SFmr7P0ssEN5JUsm78K8=\natomicgo.dev/keyboard v0.2.9/go.mod h1:BC4w9g00XkxH/f1HXhW2sXmJFOCWbKn9xrOunSFtExQ=\natomicgo.dev/schedule v0.1.0 h1:nTthAbhZS5YZmgYbb2+DH8uQIZcTlIrd4eYr3UQxEjs=\natomicgo.dev/schedule v0.1.0/go.mod h1:xeUa3oAkiuHYh8bKiQBRojqAMq3PXXbJujjb0hw8pEU=\ngithub.com/MarvinJWendt/testza v0.1.0/go.mod h1:7AxNvlfeHP7Z/hDQ5JtE3OKYT3XFUeLCDE2DQninSqs=\ngithub.com/MarvinJWendt/testza v0.2.1/go.mod h1:God7bhG8n6uQxwdScay+gjm9/LnO4D3kkcZX4hv9Rp8=\ngithub.com/MarvinJWendt/testza v0.2.8/go.mod h1:nwIcjmr0Zz+Rcwfh3/4UhBp7ePKVhuBExvZqnKYWlII=\ngithub.com/MarvinJWendt/testza v0.2.10/go.mod h1:pd+VWsoGUiFtq+hRKSU1Bktnn+DMCSrDrXDpX2bG66k=\ngithub.com/MarvinJWendt/testza v0.2.12/go.mod h1:JOIegYyV7rX+7VZ9r77L/eH6CfJHHzXjB69adAhzZkI=\ngithub.com/MarvinJWendt/testza v0.3.0/go.mod h1:eFcL4I0idjtIx8P9C6KkAuLgATNKpX4/2oUqKc6bF2c=\ngithub.com/MarvinJWendt/testza v0.4.2/go.mod h1:mSdhXiKH8sg/gQehJ63bINcCKp7RtYewEjXsvsVUPbE=\ngithub.com/RediSearch/redisearch-go v1.1.1 h1:YElqguUO9lSqCYszrQcoTUoB9zBRyb2gkO4+yh3STMo=\ngithub.com/RediSearch/redisearch-go v1.1.1/go.mod h1:vcSdla+ZmI3B9doZbLoUrwNJfuvJzRt+/FoE38JcMS8=\ngithub.com/atomicgo/cursor v0.0.1/go.mod h1:cBON2QmmrysudxNBFthvMtN32r3jxVRIvzkUiF/RuIk=\ngithub.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=\ngithub.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=\ngithub.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=\ngithub.com/containerd/console v1.0.5 h1:R0ymNeydRqH2DmakFNdmjR2k0t7UPuiOV/N/27/qqsc=\ngithub.com/containerd/console v1.0.5/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk=\ngithub.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=\ngithub.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=\ngithub.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg=\ngithub.com/go-redis/redis v6.15.9+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA=\ngithub.com/gomodule/redigo v1.8.3 h1:HR0kYDX2RJZvAup8CsiJwxB4dTCSC0AaUq6S4SiLwUc=\ngithub.com/gomodule/redigo v1.8.3/go.mod h1:P9dn9mFrCBvWhGE1wpxx6fgq7BAeLBk+UUUzlpkBYO0=\ngithub.com/gookit/color v1.4.2/go.mod h1:fqRyamkC1W8uxl+lxCQxOT09l/vYfZ+QeiX3rKQHCoQ=\ngithub.com/gookit/color v1.5.0/go.mod h1:43aQb+Zerm/BWh2GnrgOQm7ffz7tvQXEKV6BFMl7wAo=\ngithub.com/gookit/color v1.5.4 h1:FZmqs7XOyGgCAxmWyPslpiok1k05wmY3SJTytgvYFs0=\ngithub.com/gookit/color v1.5.4/go.mod h1:pZJOeOS8DM43rXbp4AZo1n9zCU2qjpcRko0b6/QJi9w=\ngithub.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=\ngithub.com/klauspost/cpuid/v2 v2.0.10/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c=\ngithub.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c=\ngithub.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=\ngithub.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=\ngithub.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=\ngithub.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4=\ngithub.com/lithammer/fuzzysearch v1.1.8/go.mod h1:IdqeyBClc3FFqSzYq/MXESsS4S0FsZ5ajtkr5xPLts4=\ngithub.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=\ngithub.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=\ngithub.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=\ngithub.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/pterm/pterm v0.12.27/go.mod h1:PhQ89w4i95rhgE+xedAoqous6K9X+r6aSOI2eFF7DZI=\ngithub.com/pterm/pterm v0.12.29/go.mod h1:WI3qxgvoQFFGKGjGnJR849gU0TsEOvKn5Q8LlY1U7lg=\ngithub.com/pterm/pterm v0.12.30/go.mod h1:MOqLIyMOgmTDz9yorcYbcw+HsgoZo3BQfg2wtl3HEFE=\ngithub.com/pterm/pterm v0.12.31/go.mod h1:32ZAWZVXD7ZfG0s8qqHXePte42kdz8ECtRyEejaWgXU=\ngithub.com/pterm/pterm v0.12.33/go.mod h1:x+h2uL+n7CP/rel9+bImHD5lF3nM9vJj80k9ybiiTTE=\ngithub.com/pterm/pterm v0.12.36/go.mod h1:NjiL09hFhT/vWjQHSj1athJpx6H8cjpHXNAK5bUw8T8=\ngithub.com/pterm/pterm v0.12.40/go.mod h1:ffwPLwlbXxP+rxT0GsgDTzS3y3rmpAO1NMjUkGTYf8s=\ngithub.com/pterm/pterm v0.12.82 h1:+D9wYhCaeaK0FIQoZtqbNQuNpe2lB2tajKKsTd5paVQ=\ngithub.com/pterm/pterm v0.12.82/go.mod h1:TyuyrPjnxfwP+ccJdBTeWHtd/e0ybQHkOS/TakajZCw=\ngithub.com/redis/go-redis/v9 v9.14.1 h1:nDCrEiJmfOWhD76xlaw+HXT0c9hfNWeXgl0vIRYSDvQ=\ngithub.com/redis/go-redis/v9 v9.14.1/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw=\ngithub.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=\ngithub.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=\ngithub.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=\ngithub.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=\ngithub.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=\ngithub.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=\ngithub.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=\ngithub.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=\ngithub.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs=\ngithub.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=\ngithub.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=\ngithub.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=\ngithub.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=\ngithub.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=\ngolang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=\ngolang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=\ngolang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=\ngolang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=\ngolang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=\ngolang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=\ngolang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=\ngolang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=\ngolang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=\ngolang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=\ngolang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=\ngolang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=\ngolang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=\ngolang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210421221651-33663a62ff08/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=\ngolang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=\ngolang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=\ngolang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=\ngolang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=\ngolang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=\ngolang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=\ngolang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg=\ngolang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ=\ngolang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=\ngolang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=\ngolang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=\ngolang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=\ngolang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=\ngolang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=\ngolang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M=\ngolang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=\ngolang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=\ngolang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=\ngolang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=\ngolang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=\ngolang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=\ngolang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngolang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\ngopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\n"
  },
  {
    "path": "tools/vector-benches/main.go",
    "content": "package main\n\nimport (\n\t\"context\"\n\t\"flag\"\n\t\"fmt\"\n\t\"math/rand/v2\"\n\t\"os\"\n\t\"reflect\"\n\t\"slices\"\n\t\"sort\"\n\t\"strings\"\n\t\"sync\"\n\t\"time\"\n\t\"unsafe\"\n\n\t\"github.com/pterm/pterm\"\n\t\"github.com/redis/go-redis/v9\"\n)\n\nvar nEntries = flag.Int(\"n\", 50000, \"Number of vectors\")\nvar nQueries = flag.Int(\"q\", 1000, \"Number of total queries\")\nvar nQueryJobs = flag.Int(\"t\", 8, \"Query threads (jobs)\")\nvar nDim = flag.Int(\"d\", 100, \"Vector dimension\")\nvar nTop = flag.Int(\"k\", 10, \"Top K vectors selected\")\n\nvar fPort = flag.Int(\"p\", 6379, \"Port\")\nvar fHost = flag.String(\"h\", \"localhost\", \"Host\")\n\nfunc formatLargeNumber(n int) string {\n\tif n < 1000000 {\n\t\treturn fmt.Sprintf(\"%.1fK\", float64(n)/1000.0)\n\t} else {\n\t\treturn fmt.Sprintf(\"%.1fM\", float64(n)/1000000.0)\n\t}\n}\n\n// Convert float slice to byte slice without copies\nfunc VecToSlice(vec []float32) []byte {\n\trawPtr := unsafe.Pointer(&vec[0])\n\tbyteLen := len(vec) * int(unsafe.Sizeof(vec[0]))\n\tvar out []byte\n\tsheader := (*reflect.SliceHeader)(unsafe.Pointer(&out))\n\tsheader.Data = uintptr(rawPtr)\n\tsheader.Len = byteLen\n\tsheader.Cap = byteLen\n\treturn out\n}\n\n// Generate random vector\nfunc RandVec(dim uint) []float32 {\n\tres := make([]float32, dim)\n\tfor i := range res {\n\t\tres[i] = rand.Float32()\n\t}\n\treturn res\n}\n\n// Create index\nfunc CreateIndex(ctx context.Context, rdb *redis.Client, dim uint) error {\n\thnswOptions := &redis.FTHNSWOptions{Type: \"FLOAT32\", Dim: int(dim), DistanceMetric: \"L2\"}\n\tschema := redis.FieldSchema{FieldName: \"v\", FieldType: redis.SearchFieldTypeVector, VectorArgs: &redis.FTVectorArgs{HNSWOptions: hnswOptions}}\n\t_, err := rdb.FTCreate(ctx, \"idx\", &redis.FTCreateOptions{}, &schema).Result()\n\treturn err\n}\n\nfunc WaitForIndex(ctx context.Context, rdb *redis.Client) {\n\tinfo, _ := rdb.Info(ctx).Result()\n\tif strings.Contains(info, \"dragonfly\") {\n\t\treturn\n\t}\n\n\tfor {\n\t\tidxInfo, err := rdb.FTInfo(ctx, \"idx\").Result()\n\t\tif err != nil {\n\t\t\tpanic(err)\n\t\t}\n\t\tif idxInfo.PercentIndexed >= 1.0 {\n\t\t\treturn\n\t\t}\n\t\ttime.Sleep(time.Millisecond * 100)\n\t}\n}\n\n// Fill with random vectors\nfunc Fill(ctx context.Context, rdb *redis.Client, prefix string, entries uint, dim uint) {\n\tconst kBatchSize = uint(100)\n\tfor i := uint(0); i < entries/kBatchSize; i += 1 {\n\t\tp := rdb.Pipeline()\n\t\tfor j := 0; j < int(kBatchSize); j += 1 {\n\t\t\tkey := fmt.Sprint(prefix, i, \":\", j)\n\t\t\tvec := RandVec(dim)\n\t\t\tp.HSet(ctx, key, \"v\", VecToSlice(vec))\n\t\t}\n\t\t_, err := p.Exec(ctx)\n\t\tif err != nil {\n\t\t\tpanic(err)\n\t\t}\n\t}\n}\n\n// Distribute Fill() over workers\nfunc FillParallel(ctx context.Context, rdb *redis.Client, entries uint, dim uint) {\n\tconst kJobs = uint(8)\n\twg := sync.WaitGroup{}\n\twg.Add(int(kJobs))\n\n\tfor i := uint(0); i < kJobs; i += 1 {\n\t\tli := i\n\t\tgo func() {\n\t\t\tFill(ctx, rdb, fmt.Sprint(\"k\", li, \":\"), entries/kJobs, dim)\n\t\t\twg.Done()\n\t\t}()\n\t}\n\twg.Wait()\n}\n\n// Perform queries and measure latencies\nfunc Query(ctx context.Context, rdb *redis.Client, queries uint, limit uint, dim uint) []time.Duration {\n\tlatencies := make([]time.Duration, queries)\n\tquery := fmt.Sprintf(\"*=>[KNN %v @v $vec]\", limit)\n\n\tfor i := range latencies {\n\t\tsearchOptions := &redis.FTSearchOptions{\n\t\t\tDialectVersion: 2,\n\t\t\tNoContent:      true,\n\t\t\tParams:         map[string]interface{}{\"vec\": VecToSlice(RandVec(dim))},\n\t\t}\n\n\t\tstart := time.Now()\n\t\tres, err := rdb.FTSearchWithArgs(ctx, \"idx\", query, searchOptions).Result()\n\t\tif err != nil {\n\t\t\tpanic(err)\n\t\t}\n\t\tif res.Total != int(limit) {\n\t\t\tpanic(\"Didn't hit limit\")\n\t\t}\n\t\tlatencies[i] = time.Since(start)\n\t}\n\treturn latencies\n}\n\n// Call Query() from multiple workers and combine latencies sorted\nfunc RunQueries(ctx context.Context, rdb *redis.Client) (time.Duration, []time.Duration) {\n\tjobs := uint(*nQueryJobs)\n\tjobQueries := uint(*nQueries) / jobs\n\tlatencies := make([][]time.Duration, jobs)\n\n\tstart := time.Now()\n\twg := sync.WaitGroup{}\n\n\twg.Add(len(latencies))\n\tfor i := range latencies {\n\t\tli := i\n\t\tgo func() {\n\t\t\tlatencies[li] = Query(ctx, rdb, jobQueries, uint(*nTop), uint(*nDim))\n\t\t\twg.Done()\n\t\t}()\n\t}\n\n\twg.Wait()\n\ttook := time.Since(start)\n\n\t// Unify all latencies and sort them\n\tallLatencies := make([]time.Duration, 0, 1000)\n\tfor _, sub := range latencies {\n\t\tallLatencies = append(allLatencies, sub...)\n\t}\n\tsort.Slice(allLatencies, func(i, j int) bool {\n\t\treturn allLatencies[i] < allLatencies[j]\n\t})\n\treturn took, allLatencies\n}\n\nfunc Print(took time.Duration, latencies []time.Duration) {\n\tqps := float64(*nQueries) / took.Seconds()\n\tstyle := pterm.NewStyle(pterm.Bold)\n\n\tpterm.Print(\"Entries(n): \", formatLargeNumber(*nEntries), \" Queries(q): \", formatLargeNumber(*nQueries), \" \")\n\tpterm.Print(\"Dimension(d): \", *nDim, \" Threads(t): \", *nQueryJobs, \" Top(k): \", *nTop)\n\tpterm.Println()\n\tpterm.Println()\n\n\tpterm.Println(\"Took:\", took)\n\tpterm.Print(\"QPS: \")\n\tstyle.Println(int(qps))\n\n\tp50 := float64(latencies[len(latencies)/2]) / float64(time.Millisecond)\n\tpterm.Printf(\"P50: %.2f ms\", p50)\n\n\tpterm.Print(\" P95: \")\n\tp95 := float64(latencies[int(float32(len(latencies))*0.95)]) / float64(time.Millisecond)\n\tstyle.Printf(\"%.2f ms\", p95)\n\n\tp99 := float64(latencies[int(float32(len(latencies))*0.99)]) / float64(time.Millisecond)\n\tpterm.Printf(\" P99: %.2f ms \\n\", p99)\n}\n\nfunc CheckData(ctx context.Context, rdb *redis.Client) {\n\tsize, _ := rdb.DBSize(ctx).Result()\n\tif size*2 < int64(*nEntries) {\n\t\trdb.FlushAll(ctx).Result()\n\t\tpterm.Println(\"Filling database\")\n\t\tFillParallel(ctx, rdb, uint(*nEntries), uint(*nDim))\n\t}\n}\n\nfunc CheckIndex(ctx context.Context, rdb *redis.Client) {\n\tindices, _ := rdb.FT_List(ctx).Result()\n\tif slices.Contains(indices, \"idx\") {\n\t\tpterm.Println(\"Index exists\")\n\t} else {\n\t\tpterm.Println(\"Creating index with\", formatLargeNumber(*nEntries), \"entries\")\n\t\tstart := time.Now()\n\t\terr := CreateIndex(ctx, rdb, uint(*nDim))\n\t\tif err != nil {\n\t\t\tpanic(err)\n\t\t}\n\t\tWaitForIndex(ctx, rdb)\n\t\tpterm.Println(\"Created index in\", time.Since(start))\n\t}\n}\n\nfunc main() {\n\tflag.Usage = func() {\n\t\tfmt.Fprintln(os.Stderr, \"Randomized vector search benchmark\")\n\t\tflag.PrintDefaults()\n\t}\n\tflag.Parse()\n\n\tctx := context.Background()\n\trdb := redis.NewClient(&redis.Options{\n\t\tProtocol:    2,\n\t\tReadTimeout: -1, // due to possibly long index construction\n\t\tAddr:        fmt.Sprint(*fHost, \":\", *fPort),\n\t})\n\n\tCheckData(ctx, rdb)\n\tCheckIndex(ctx, rdb)\n\n\tpterm.Println(\"Running queries\")\n\ttook, latencies := RunQueries(ctx, rdb)\n\tPrint(took, latencies)\n}\n"
  }
]